1 /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
15
16 /* Describe, check and repair of MARIA tables */
17
18 /*
19 About checksum calculation.
20
21 There are two types of checksums. Table checksum and row checksum.
22
23 Row checksum is an additional uchar at the end of dynamic length
24 records. It must be calculated if the table is configured for them.
25 Otherwise they must not be used. The variable
26 MYISAM_SHARE::calc_checksum determines if row checksums are used.
27 MI_INFO::checksum is used as temporary storage during row handling.
28 For parallel repair we must assure that only one thread can use this
29 variable. There is no problem on the write side as this is done by one
30 thread only. But when checking a record after read this could go
31 wrong. But since all threads read through a common read buffer, it is
32 sufficient if only one thread checks it.
33
34 Table checksum is an eight uchar value in the header of the index file.
35 It can be calculated even if row checksums are not used. The variable
36 MI_CHECK::glob_crc is calculated over all records.
37 MI_SORT_PARAM::calc_checksum determines if this should be done. This
38 variable is not part of MI_CHECK because it must be set per thread for
39 parallel repair. The global glob_crc must be changed by one thread
40 only. And it is sufficient to calculate the checksum once only.
41 */
42
43 #include "ma_ftdefs.h"
44 #include "ma_rt_index.h"
45 #include "ma_blockrec.h"
46 #include "trnman.h"
47 #include "ma_key_recover.h"
48 #include <my_check_opt.h>
49
50 #include <stdarg.h>
51 #include <my_getopt.h>
52 #ifdef HAVE_SYS_VADVISE_H
53 #include <sys/vadvise.h>
54 #endif
55
56 /* Functions defined in this file */
57
58 static int check_k_link(HA_CHECK *param, MARIA_HA *info, my_off_t next_link);
59 static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo,
60 MARIA_PAGE *page, ha_rows *keys,
61 ha_checksum *key_checksum, uint level);
62 static uint isam_key_length(MARIA_HA *info,MARIA_KEYDEF *keyinfo);
63 static ha_checksum calc_checksum(ha_rows count);
64 static int writekeys(MARIA_SORT_PARAM *sort_param);
65 static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
66 MARIA_KEYDEF *keyinfo,
67 my_off_t pagepos, File new_file);
68 static int sort_key_read(MARIA_SORT_PARAM *sort_param, uchar *key);
69 static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, uchar *key);
70 static int sort_get_next_record(MARIA_SORT_PARAM *sort_param);
71 static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,
72 const void *b);
73 static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
74 const uchar *a);
75 static int sort_key_write(MARIA_SORT_PARAM *sort_param, const uchar *a);
76 static my_off_t get_record_for_key(MARIA_KEYDEF *keyinfo, const uchar *key);
77 static int sort_insert_key(MARIA_SORT_PARAM *sort_param,
78 reg1 SORT_KEY_BLOCKS *key_block,
79 const uchar *key, my_off_t prev_block);
80 static int sort_delete_record(MARIA_SORT_PARAM *sort_param);
81 /*static int _ma_flush_pending_blocks(HA_CHECK *param);*/
82 static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
83 uint buffer_length);
84 static ha_checksum maria_byte_checksum(const uchar *buf, uint length);
85 static void set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share);
86 static void restore_data_file_type(MARIA_SHARE *share);
87 static void change_data_file_descriptor(MARIA_HA *info, File new_file);
88 static void unuse_data_file_descriptor(MARIA_HA *info);
89 static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info,
90 MARIA_HA *info, uchar *record);
91 static void copy_data_file_state(MARIA_STATE_INFO *to,
92 MARIA_STATE_INFO *from);
93 static void report_keypage_fault(HA_CHECK *param, MARIA_HA *info,
94 my_off_t position);
95 static my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file);
96 static my_bool _ma_flush_table_files_before_swap(HA_CHECK *param,
97 MARIA_HA *info);
98 static TrID max_trid_in_system(void);
99 static void _ma_check_print_not_visible_error(HA_CHECK *param, TrID used_trid);
100 void retry_if_quick(MARIA_SORT_PARAM *param, int error);
101 static void print_bitmap_description(MARIA_SHARE *share,
102 pgcache_page_no_t page,
103 uchar *buff);
104
105
106 /* Initialize check param with default values */
107
maria_chk_init(HA_CHECK * param)108 void maria_chk_init(HA_CHECK *param)
109 {
110 bzero((uchar*) param,sizeof(*param));
111 param->opt_follow_links=1;
112 param->keys_in_use= ~(ulonglong) 0;
113 param->search_after_block=HA_OFFSET_ERROR;
114 param->auto_increment_value= 0;
115 param->use_buffers= PAGE_BUFFER_INIT;
116 param->read_buffer_length=READ_BUFFER_INIT;
117 param->write_buffer_length=READ_BUFFER_INIT;
118 param->sort_buffer_length=SORT_BUFFER_INIT;
119 param->sort_key_blocks=BUFFERS_WHEN_SORTING;
120 param->tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL;
121 param->myf_rw=MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL);
122 param->start_check_pos=0;
123 param->max_record_length= LONGLONG_MAX;
124 param->pagecache_block_size= KEY_CACHE_BLOCK_SIZE;
125 param->stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL;
126 param->max_stage= 1;
127 }
128
129
130 /* Initialize check param and maria handler for check of table */
131
maria_chk_init_for_check(HA_CHECK * param,MARIA_HA * info)132 void maria_chk_init_for_check(HA_CHECK *param, MARIA_HA *info)
133 {
134 param->not_visible_rows_found= 0;
135 param->max_found_trid= 0;
136
137 /*
138 Set up transaction handler so that we can see all rows. When rows is read
139 we will check the found id against param->max_tried
140 */
141 if (!info->s->base.born_transactional)
142 {
143 /*
144 There are no trids. Howver we want to set max_trid to make test of
145 create_trid simpler.
146 */
147 param->max_trid= ~(TrID) 0;
148 }
149 else if (param->max_trid == 0 || param->max_trid == ~(TrID) 0)
150 {
151 if (!ma_control_file_inited())
152 param->max_trid= 0; /* Give warning for first trid found */
153 else
154 param->max_trid= max_trid_in_system();
155 }
156
157 maria_ignore_trids(info);
158 }
159
160
161 /* Check the status flags for the table */
162
maria_chk_status(HA_CHECK * param,MARIA_HA * info)163 int maria_chk_status(HA_CHECK *param, MARIA_HA *info)
164 {
165 MARIA_SHARE *share= info->s;
166
167 if (maria_is_crashed_on_repair(info))
168 _ma_check_print_warning(param,
169 "Table is marked as crashed and last repair failed");
170 else if (maria_in_repair(info))
171 _ma_check_print_warning(param,
172 "Last repair was aborted before finishing");
173 else if (maria_is_crashed(info))
174 _ma_check_print_warning(param,
175 "Table is marked as crashed");
176 if (share->state.open_count != (uint) (share->global_changed ? 1 : 0))
177 {
178 /* Don't count this as a real warning, as check can correct this ! */
179 my_bool save=param->warning_printed;
180 _ma_check_print_warning(param,
181 share->state.open_count==1 ?
182 "%d client is using or hasn't closed the table properly" :
183 "%d clients are using or haven't closed the table properly",
184 share->state.open_count);
185 /* If this will be fixed by the check, forget the warning */
186 if (param->testflag & T_UPDATE_STATE)
187 param->warning_printed=save;
188 }
189 if (share->state.create_trid > param->max_trid)
190 {
191 param->wrong_trd_printed= 1; /* Force should run zerofill */
192 _ma_check_print_warning(param,
193 "Table create_trd (%llu) > current max_transaction id (%llu). Table needs to be repaired or zerofilled to be usable",
194 share->state.create_trid, param->max_trid);
195 return 1;
196 }
197 return 0;
198 }
199
200 /*
201 Check delete links in row data
202 */
203
maria_chk_del(HA_CHECK * param,register MARIA_HA * info,ulonglong test_flag)204 int maria_chk_del(HA_CHECK *param, register MARIA_HA *info,
205 ulonglong test_flag)
206 {
207 MARIA_SHARE *share= info->s;
208 reg2 ha_rows i;
209 uint delete_link_length;
210 my_off_t empty,next_link,UNINIT_VAR(old_link);
211 char buff[22],buff2[22];
212 DBUG_ENTER("maria_chk_del");
213
214 param->record_checksum=0;
215
216 if (share->data_file_type == BLOCK_RECORD)
217 DBUG_RETURN(0); /* No delete links here */
218
219 delete_link_length=((share->options & HA_OPTION_PACK_RECORD) ? 20 :
220 share->rec_reflength+1);
221
222 if (!(test_flag & T_SILENT))
223 puts("- check record delete-chain");
224
225 next_link=share->state.dellink;
226 if (share->state.state.del == 0)
227 {
228 if (test_flag & T_VERBOSE)
229 {
230 puts("No recordlinks");
231 }
232 }
233 else
234 {
235 if (test_flag & T_VERBOSE)
236 printf("Recordlinks: ");
237 empty=0;
238 for (i= share->state.state.del ; i > 0L && next_link != HA_OFFSET_ERROR ; i--)
239 {
240 if (_ma_killed_ptr(param))
241 DBUG_RETURN(1);
242 if (test_flag & T_VERBOSE)
243 printf(" %9s",llstr(next_link,buff));
244 if (next_link >= share->state.state.data_file_length)
245 goto wrong;
246 if (mysql_file_pread(info->dfile.file, (uchar*) buff, delete_link_length,
247 next_link,MYF(MY_NABP)))
248 {
249 if (test_flag & T_VERBOSE) puts("");
250 _ma_check_print_error(param,"Can't read delete-link at filepos: %s",
251 llstr(next_link,buff));
252 DBUG_RETURN(1);
253 }
254 if (*buff != '\0')
255 {
256 if (test_flag & T_VERBOSE) puts("");
257 _ma_check_print_error(param,"Record at pos: %s is not remove-marked",
258 llstr(next_link,buff));
259 goto wrong;
260 }
261 if (share->options & HA_OPTION_PACK_RECORD)
262 {
263 my_off_t prev_link=mi_sizekorr(buff+12);
264 if (empty && prev_link != old_link)
265 {
266 if (test_flag & T_VERBOSE) puts("");
267 _ma_check_print_error(param,
268 "Deleted block at %s doesn't point back at previous delete link",
269 llstr(next_link,buff2));
270 goto wrong;
271 }
272 old_link=next_link;
273 next_link=mi_sizekorr(buff+4);
274 empty+=mi_uint3korr(buff+1);
275 }
276 else
277 {
278 param->record_checksum+=(ha_checksum) next_link;
279 next_link= _ma_rec_pos(share, (uchar *) buff + 1);
280 empty+=share->base.pack_reclength;
281 }
282 }
283 if (share->state.state.del && (test_flag & T_VERBOSE))
284 puts("\n");
285 if (empty != share->state.state.empty)
286 {
287 _ma_check_print_warning(param,
288 "Found %s deleted space in delete link chain. Should be %s",
289 llstr(empty,buff2),
290 llstr(share->state.state.empty,buff));
291 }
292 if (next_link != HA_OFFSET_ERROR)
293 {
294 _ma_check_print_error(param,
295 "Found more than the expected %s deleted rows in delete link chain",
296 llstr(share->state.state.del, buff));
297 goto wrong;
298 }
299 if (i != 0)
300 {
301 _ma_check_print_error(param,
302 "Found %s deleted rows in delete link chain. Should be %s",
303 llstr(share->state.state.del - i, buff2),
304 llstr(share->state.state.del, buff));
305 goto wrong;
306 }
307 }
308 DBUG_RETURN(0);
309
310 wrong:
311 param->testflag|=T_RETRY_WITHOUT_QUICK;
312 if (test_flag & T_VERBOSE)
313 puts("");
314 _ma_check_print_error(param,"record delete-link-chain corrupted");
315 DBUG_RETURN(1);
316 } /* maria_chk_del */
317
318
319 /* Check delete links in index file */
320
check_k_link(HA_CHECK * param,register MARIA_HA * info,my_off_t next_link)321 static int check_k_link(HA_CHECK *param, register MARIA_HA *info,
322 my_off_t next_link)
323 {
324 MARIA_SHARE *share= info->s;
325 uint block_size= share->block_size;
326 ha_rows records;
327 char llbuff[21], llbuff2[21];
328 uchar *buff;
329 DBUG_ENTER("check_k_link");
330
331 if (next_link == HA_OFFSET_ERROR)
332 DBUG_RETURN(0); /* Avoid printing empty line */
333
334 records= (ha_rows) (share->state.state.key_file_length / block_size);
335 while (next_link != HA_OFFSET_ERROR && records > 0)
336 {
337 if (_ma_killed_ptr(param))
338 DBUG_RETURN(1);
339 if (param->testflag & T_VERBOSE)
340 printf("%16s",llstr(next_link,llbuff));
341
342 /* Key blocks must lay within the key file length entirely. */
343 if (next_link + block_size > share->state.state.key_file_length)
344 {
345 /* purecov: begin tested */
346 _ma_check_print_error(param, "Invalid key block position: %s "
347 "key block size: %u file_length: %s",
348 llstr(next_link, llbuff), block_size,
349 llstr(share->state.state.key_file_length, llbuff2));
350 DBUG_RETURN(1);
351 /* purecov: end */
352 }
353
354 /* Key blocks must be aligned at block_size */
355 if (next_link & (block_size -1))
356 {
357 /* purecov: begin tested */
358 _ma_check_print_error(param, "Mis-aligned key block: %s "
359 "minimum key block length: %u",
360 llstr(next_link, llbuff),
361 block_size);
362 DBUG_RETURN(1);
363 /* purecov: end */
364 }
365
366 DBUG_ASSERT(share->pagecache->block_size == block_size);
367 if (!(buff= pagecache_read(share->pagecache,
368 &share->kfile,
369 (pgcache_page_no_t) (next_link / block_size),
370 DFLT_INIT_HITS,
371 info->buff, PAGECACHE_READ_UNKNOWN_PAGE,
372 PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
373 {
374 /* purecov: begin tested */
375 _ma_check_print_error(param, "key cache read error for block: %s",
376 llstr(next_link,llbuff));
377 DBUG_RETURN(1);
378 /* purecov: end */
379 }
380 if (_ma_get_keynr(info->s, buff) != MARIA_DELETE_KEY_NR)
381 _ma_check_print_error(param, "Page at %s is not delete marked",
382 llstr(next_link, llbuff));
383
384 next_link= mi_sizekorr(buff + share->keypage_header);
385 records--;
386 param->key_file_blocks+=block_size;
387 }
388 if (param->testflag & T_VERBOSE)
389 {
390 if (next_link != HA_OFFSET_ERROR)
391 printf("%16s\n",llstr(next_link,llbuff));
392 else
393 puts("");
394 }
395 DBUG_RETURN (next_link != HA_OFFSET_ERROR);
396 } /* check_k_link */
397
398
399 /* Check sizes of files */
400
maria_chk_size(HA_CHECK * param,register MARIA_HA * info)401 int maria_chk_size(HA_CHECK *param, register MARIA_HA *info)
402 {
403 MARIA_SHARE *share= info->s;
404 int error;
405 register my_off_t skr,size;
406 char buff[22],buff2[22];
407 DBUG_ENTER("maria_chk_size");
408
409 if (!(param->testflag & T_SILENT))
410 puts("- check file-size");
411
412 /*
413 The following is needed if called externally (not from maria_chk).
414 To get a correct physical size we need to flush them.
415 */
416 if ((error= _ma_flush_table_files(info,
417 MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
418 FLUSH_FORCE_WRITE, FLUSH_FORCE_WRITE)))
419 _ma_check_print_error(param, "Failed to flush data or index file");
420
421 size= mysql_file_seek(share->kfile.file, 0L, MY_SEEK_END, MYF(MY_THREADSAFE));
422 if ((skr=(my_off_t) share->state.state.key_file_length) != size)
423 {
424 /* Don't give error if file generated by maria_pack */
425 if (skr > size && maria_is_any_key_active(share->state.key_map))
426 {
427 error=1;
428 _ma_check_print_error(param,
429 "Size of indexfile is: %-8s Expected: %s",
430 llstr(size,buff), llstr(skr,buff2));
431 share->state.state.key_file_length= size;
432 }
433 else if (!(param->testflag & T_VERY_SILENT))
434 _ma_check_print_warning(param,
435 "Size of indexfile is: %-8s Expected: %s",
436 llstr(size,buff), llstr(skr,buff2));
437 }
438 if (size > share->base.max_key_file_length)
439 {
440 _ma_check_print_warning(param,
441 "Size of indexfile is: %-8s which is bigger than max indexfile size: %s",
442 ullstr(size,buff),
443 ullstr(share->base.max_key_file_length, buff2));
444 }
445 else if (!(param->testflag & T_VERY_SILENT) &&
446 ! (share->options & HA_OPTION_COMPRESS_RECORD) &&
447 ulonglong2double(share->state.state.key_file_length) >
448 ulonglong2double(share->base.margin_key_file_length)*0.9)
449 _ma_check_print_warning(param,"Keyfile is almost full, %10s of %10s used",
450 llstr(share->state.state.key_file_length,buff),
451 llstr(share->base.max_key_file_length,buff));
452
453 size= mysql_file_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
454 skr=(my_off_t) share->state.state.data_file_length;
455 if (share->options & HA_OPTION_COMPRESS_RECORD)
456 skr+= MEMMAP_EXTRA_MARGIN;
457 #ifdef USE_RELOC
458 if (share->data_file_type == STATIC_RECORD &&
459 skr < (my_off_t) share->base.reloc*share->base.min_pack_length)
460 skr=(my_off_t) share->base.reloc*share->base.min_pack_length;
461 #endif
462 if (skr != size)
463 {
464 share->state.state.data_file_length=size; /* Skip other errors */
465 if (skr > size && skr != size + MEMMAP_EXTRA_MARGIN)
466 {
467 error=1;
468 _ma_check_print_error(param,"Size of datafile is: %-9s Expected: %s",
469 llstr(size,buff), llstr(skr,buff2));
470 param->testflag|=T_RETRY_WITHOUT_QUICK;
471 }
472 else
473 {
474 _ma_check_print_warning(param,
475 "Size of datafile is: %-9s Expected: %s",
476 llstr(size,buff), llstr(skr,buff2));
477 }
478 }
479 if (size > share->base.max_data_file_length)
480 {
481 _ma_check_print_warning(param,
482 "Size of datafile is: %-8s which is bigger than max datafile size: %s",
483 ullstr(size,buff),
484 ullstr(share->base.max_data_file_length, buff2));
485 } else if (!(param->testflag & T_VERY_SILENT) &&
486 !(share->options & HA_OPTION_COMPRESS_RECORD) &&
487 ulonglong2double(share->state.state.data_file_length) >
488 (ulonglong2double(share->base.max_data_file_length)*0.9))
489 _ma_check_print_warning(param, "Datafile is almost full, %10s of %10s used",
490 llstr(share->state.state.data_file_length,buff),
491 llstr(share->base.max_data_file_length,buff2));
492 DBUG_RETURN(error);
493 } /* maria_chk_size */
494
495
496 /* Check keys */
497
maria_chk_key(HA_CHECK * param,register MARIA_HA * info)498 int maria_chk_key(HA_CHECK *param, register MARIA_HA *info)
499 {
500 uint key,found_keys=0,full_text_keys=0,result=0;
501 ha_rows keys;
502 ha_checksum old_record_checksum,init_checksum;
503 my_off_t all_keydata,all_totaldata,key_totlength,length;
504 double *rec_per_key_part;
505 MARIA_SHARE *share= info->s;
506 MARIA_KEYDEF *keyinfo;
507 char buff[22],buff2[22];
508 MARIA_PAGE page;
509 DBUG_ENTER("maria_chk_key");
510
511 if (!(param->testflag & T_SILENT))
512 puts("- check key delete-chain");
513
514 param->key_file_blocks=share->base.keystart;
515 if (check_k_link(param, info, share->state.key_del))
516 {
517 if (param->testflag & T_VERBOSE) puts("");
518 _ma_check_print_error(param,"key delete-link-chain corrupted");
519 DBUG_RETURN(-1);
520 }
521
522 if (!(param->testflag & T_SILENT))
523 puts("- check index reference");
524
525 all_keydata=all_totaldata=key_totlength=0;
526 init_checksum=param->record_checksum;
527 old_record_checksum=0;
528 if (share->data_file_type == STATIC_RECORD)
529 old_record_checksum= (calc_checksum(share->state.state.records +
530 share->state.state.del-1) *
531 share->base.pack_reclength);
532 rec_per_key_part= param->new_rec_per_key_part;
533 for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
534 rec_per_key_part+=keyinfo->keysegs, key++, keyinfo++)
535 {
536 param->key_crc[key]=0;
537 if (! maria_is_key_active(share->state.key_map, key))
538 {
539 /* Remember old statistics for key */
540 memcpy((char*) rec_per_key_part,
541 (char*) (share->state.rec_per_key_part +
542 (uint) (rec_per_key_part - param->new_rec_per_key_part)),
543 keyinfo->keysegs*sizeof(*rec_per_key_part));
544 continue;
545 }
546 found_keys++;
547 _ma_report_progress(param, key, share->base.keys);
548
549 param->record_checksum=init_checksum;
550
551 bzero((char*) ¶m->unique_count,sizeof(param->unique_count));
552 bzero((char*) ¶m->notnull_count,sizeof(param->notnull_count));
553
554 if ((!(param->testflag & T_SILENT)))
555 printf ("- check data record references index: %d\n",key+1);
556 if (keyinfo->flag & (HA_FULLTEXT | HA_SPATIAL))
557 full_text_keys++;
558 if (share->state.key_root[key] == HA_OFFSET_ERROR)
559 {
560 if (share->state.state.records != 0 && !(keyinfo->flag & HA_FULLTEXT))
561 _ma_check_print_error(param, "Key tree %u is empty", key + 1);
562 goto do_stat;
563 }
564 if (_ma_fetch_keypage(&page, info, keyinfo, share->state.key_root[key],
565 PAGECACHE_LOCK_LEFT_UNLOCKED, DFLT_INIT_HITS,
566 info->buff, 0))
567 {
568 report_keypage_fault(param, info, share->state.key_root[key]);
569 if (!(param->testflag & T_INFO))
570 DBUG_RETURN(-1);
571 result= -1;
572 continue;
573 }
574 param->key_file_blocks+=keyinfo->block_length;
575 keys=0;
576 param->keydata=param->totaldata=0;
577 param->key_blocks=0;
578 param->max_level=0;
579 if (chk_index(param, info,keyinfo, &page, &keys, param->key_crc+key,1))
580 DBUG_RETURN(-1);
581 if (!(keyinfo->flag & (HA_FULLTEXT | HA_SPATIAL | HA_RTREE_INDEX)))
582 {
583 if (keys != share->state.state.records)
584 {
585 _ma_check_print_error(param,"Found %s keys of %s",llstr(keys,buff),
586 llstr(share->state.state.records,buff2));
587 if (!(param->testflag & T_INFO))
588 DBUG_RETURN(-1);
589 result= -1;
590 continue;
591 }
592 if ((found_keys - full_text_keys == 1 &&
593 !(share->data_file_type == STATIC_RECORD)) ||
594 (param->testflag & T_DONT_CHECK_CHECKSUM))
595 old_record_checksum= param->record_checksum;
596 else if (old_record_checksum != param->record_checksum)
597 {
598 if (key)
599 _ma_check_print_error(param,
600 "Key %u doesn't point at same records as "
601 "key 1",
602 key+1);
603 else
604 _ma_check_print_error(param,"Key 1 doesn't point at all records");
605 if (!(param->testflag & T_INFO))
606 DBUG_RETURN(-1);
607 result= -1;
608 continue;
609 }
610 }
611 if ((uint) share->base.auto_key -1 == key)
612 {
613 /* Check that auto_increment key is bigger than max key value */
614 ulonglong auto_increment;
615 const HA_KEYSEG *keyseg= share->keyinfo[share->base.auto_key-1].seg;
616 info->lastinx=key;
617 _ma_read_key_record(info, info->rec_buff, 0);
618 auto_increment=
619 ma_retrieve_auto_increment(info->rec_buff + keyseg->start,
620 keyseg->type);
621 if (auto_increment > share->state.auto_increment)
622 {
623 _ma_check_print_warning(param, "Auto-increment value: %s is smaller "
624 "than max used value: %s",
625 llstr(share->state.auto_increment,buff2),
626 llstr(auto_increment, buff));
627 }
628 if (param->testflag & T_AUTO_INC)
629 {
630 set_if_bigger(share->state.auto_increment,
631 auto_increment);
632 set_if_bigger(share->state.auto_increment,
633 param->auto_increment_value);
634 }
635
636 /* Check that there isn't a row with auto_increment = 0 in the table */
637 maria_extra(info,HA_EXTRA_KEYREAD,0);
638 bzero(info->lastkey_buff, keyinfo->seg->length);
639 if (!maria_rkey(info, info->rec_buff, key,
640 info->lastkey_buff,
641 (key_part_map) 1, HA_READ_KEY_EXACT))
642 {
643 /* Don't count this as a real warning, as maria_chk can't correct it */
644 my_bool save=param->warning_printed;
645 _ma_check_print_warning(param, "Found row where the auto_increment "
646 "column has the value 0");
647 param->warning_printed=save;
648 }
649 maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
650 }
651
652 length=(my_off_t) isam_key_length(info,keyinfo)*keys + param->key_blocks*2;
653 if (param->testflag & T_INFO && param->totaldata != 0L && keys != 0L)
654 printf("Key: %2d: Keyblocks used: %3d%% Packed: %4d%% Max levels: %2d\n",
655 key+1,
656 (int) (my_off_t2double(param->keydata)*100.0/my_off_t2double(param->totaldata)),
657 (int) ((my_off_t2double(length) - my_off_t2double(param->keydata))*100.0/
658 my_off_t2double(length)),
659 param->max_level);
660 all_keydata+=param->keydata; all_totaldata+=param->totaldata; key_totlength+=length;
661
662 do_stat:
663 if (param->testflag & T_STATISTICS)
664 maria_update_key_parts(keyinfo, rec_per_key_part, param->unique_count,
665 param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
666 param->notnull_count: NULL,
667 (ulonglong)share->state.state.records);
668 }
669 if (param->testflag & T_INFO)
670 {
671 if (all_totaldata != 0L && found_keys > 0)
672 printf("Total: Keyblocks used: %3d%% Packed: %4d%%\n\n",
673 (int) (my_off_t2double(all_keydata)*100.0/
674 my_off_t2double(all_totaldata)),
675 (int) ((my_off_t2double(key_totlength) -
676 my_off_t2double(all_keydata))*100.0/
677 my_off_t2double(key_totlength)));
678 else if (all_totaldata != 0L && maria_is_any_key_active(share->state.key_map))
679 puts("");
680 }
681 if (param->key_file_blocks != share->state.state.key_file_length &&
682 share->state.key_map == ~(ulonglong) 0)
683 _ma_check_print_warning(param, "Some data are unreferenced in keyfile");
684 if (found_keys != full_text_keys)
685 param->record_checksum=old_record_checksum-init_checksum; /* Remove delete links */
686 else
687 param->record_checksum=0;
688 DBUG_RETURN(result);
689 } /* maria_chk_key */
690
691
692
chk_index_down(HA_CHECK * param,MARIA_HA * info,MARIA_KEYDEF * keyinfo,my_off_t page,uchar * buff,ha_rows * keys,ha_checksum * key_checksum,uint level)693 static int chk_index_down(HA_CHECK *param, MARIA_HA *info,
694 MARIA_KEYDEF *keyinfo,
695 my_off_t page, uchar *buff, ha_rows *keys,
696 ha_checksum *key_checksum, uint level)
697 {
698 char llbuff[22],llbuff2[22];
699 MARIA_SHARE *share= info->s;
700 MARIA_PAGE ma_page;
701 DBUG_ENTER("chk_index_down");
702
703 /* Key blocks must lay within the key file length entirely. */
704 if (page + keyinfo->block_length > share->state.state.key_file_length)
705 {
706 /* purecov: begin tested */
707 /* Give it a chance to fit in the real file size. */
708 my_off_t max_length= mysql_file_seek(info->s->kfile.file, 0L, MY_SEEK_END,
709 MYF(MY_THREADSAFE));
710 _ma_check_print_error(param, "Invalid key block position: %s "
711 "key block size: %u file_length: %s",
712 llstr(page, llbuff), keyinfo->block_length,
713 llstr(share->state.state.key_file_length, llbuff2));
714 if (page + keyinfo->block_length > max_length)
715 goto err;
716 /* Fix the remembered key file length. */
717 share->state.state.key_file_length= (max_length &
718 ~ (my_off_t) (keyinfo->block_length -
719 1));
720 /* purecov: end */
721 }
722
723 /* Key blocks must be aligned at block length */
724 if (page & (info->s->block_size -1))
725 {
726 /* purecov: begin tested */
727 _ma_check_print_error(param, "Mis-aligned key block: %s "
728 "key block length: %u",
729 llstr(page, llbuff), info->s->block_size);
730 goto err;
731 /* purecov: end */
732 }
733
734 if (_ma_fetch_keypage(&ma_page, info, keyinfo, page,
735 PAGECACHE_LOCK_LEFT_UNLOCKED,
736 DFLT_INIT_HITS, buff, 0))
737 {
738 report_keypage_fault(param, info, page);
739 goto err;
740 }
741 param->key_file_blocks+=keyinfo->block_length;
742 if (chk_index(param, info, keyinfo, &ma_page, keys, key_checksum,level))
743 goto err;
744
745 DBUG_RETURN(0);
746
747 /* purecov: begin tested */
748 err:
749 DBUG_RETURN(1);
750 /* purecov: end */
751 }
752
753
754 /*
755 "Ignore NULLs" statistics collection method: process first index tuple.
756
757 SYNOPSIS
758 maria_collect_stats_nonulls_first()
759 keyseg IN Array of key part descriptions
760 notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i}
761 tuples that don't contain NULLs)
762 key IN Key values tuple
763
764 DESCRIPTION
765 Process the first index tuple - find out which prefix tuples don't
766 contain NULLs, and update the array of notnull counters accordingly.
767 */
768
769 static
maria_collect_stats_nonulls_first(HA_KEYSEG * keyseg,ulonglong * notnull,const uchar * key)770 void maria_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull,
771 const uchar *key)
772 {
773 size_t first_null, kp;
774 first_null= ha_find_null(keyseg, key) - keyseg;
775 /*
776 All prefix tuples that don't include keypart_{first_null} are not-null
777 tuples (and all others aren't), increment counters for them.
778 */
779 for (kp= 0; kp < first_null; kp++)
780 notnull[kp]++;
781 }
782
783
784 /*
785 "Ignore NULLs" statistics collection method: process next index tuple.
786
787 SYNOPSIS
788 maria_collect_stats_nonulls_next()
789 keyseg IN Array of key part descriptions
790 notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i}
791 tuples that don't contain NULLs)
792 prev_key IN Previous key values tuple
793 last_key IN Next key values tuple
794
795 DESCRIPTION
796 Process the next index tuple:
797 1. Find out which prefix tuples of last_key don't contain NULLs, and
798 update the array of notnull counters accordingly.
799 2. Find the first keypart number where the prev_key and last_key tuples
800 are different(A), or last_key has NULL value(B), and return it, so the
801 caller can count number of unique tuples for each key prefix. We don't
802 need (B) to be counted, and that is compensated back in
803 maria_update_key_parts().
804
805 RETURN
806 1 + number of first keypart where values differ or last_key tuple has NULL
807 */
808
809 static
maria_collect_stats_nonulls_next(HA_KEYSEG * keyseg,ulonglong * notnull,const uchar * prev_key,const uchar * last_key)810 int maria_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull,
811 const uchar *prev_key,
812 const uchar *last_key)
813 {
814 uint diffs[2];
815 size_t first_null_seg, kp;
816 HA_KEYSEG *seg;
817
818 /*
819 Find the first keypart where values are different or either of them is
820 NULL. We get results in diffs array:
821 diffs[0]= 1 + number of first different keypart
822 diffs[1]=offset: (last_key + diffs[1]) points to first value in
823 last_key that is NULL or different from corresponding
824 value in prev_key.
825 */
826 ha_key_cmp(keyseg, prev_key, last_key, USE_WHOLE_KEY,
827 SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diffs);
828 seg= keyseg + diffs[0] - 1;
829
830 /* Find first NULL in last_key */
831 first_null_seg= ha_find_null(seg, last_key + diffs[1]) - keyseg;
832 for (kp= 0; kp < first_null_seg; kp++)
833 notnull[kp]++;
834
835 /*
836 Return 1+ number of first key part where values differ. Don't care if
837 these were NULLs and not .... We compensate for that in
838 maria_update_key_parts.
839 */
840 return diffs[0];
841 }
842
843
844 /* Check if index is ok */
845
chk_index(HA_CHECK * param,MARIA_HA * info,MARIA_KEYDEF * keyinfo,MARIA_PAGE * anc_page,ha_rows * keys,ha_checksum * key_checksum,uint level)846 static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo,
847 MARIA_PAGE *anc_page, ha_rows *keys,
848 ha_checksum *key_checksum, uint level)
849 {
850 int flag;
851 uint comp_flag, page_flag, nod_flag;
852 uchar *temp_buff, *keypos, *old_keypos, *endpos;
853 my_off_t next_page,record;
854 MARIA_SHARE *share= info->s;
855 char llbuff[22];
856 uint diff_pos[2];
857 uchar tmp_key_buff[MARIA_MAX_KEY_BUFF];
858 MARIA_KEY tmp_key;
859 DBUG_ENTER("chk_index");
860 DBUG_DUMP("buff", anc_page->buff, anc_page->size);
861
862 /* TODO: implement appropriate check for RTree keys */
863 if (keyinfo->flag & (HA_SPATIAL | HA_RTREE_INDEX))
864 DBUG_RETURN(0);
865
866 if (!(temp_buff=(uchar*) my_alloca((uint) keyinfo->block_length)))
867 {
868 _ma_check_print_error(param,"Not enough memory for keyblock");
869 DBUG_RETURN(-1);
870 }
871
872 if (keyinfo->flag & HA_NOSAME)
873 {
874 /* Not real duplicates */
875 comp_flag=SEARCH_FIND | SEARCH_UPDATE | SEARCH_INSERT;
876 }
877 else
878 comp_flag=SEARCH_SAME; /* Keys in positionorder */
879
880 page_flag= anc_page->flag;
881 nod_flag= anc_page->node;
882 old_keypos= anc_page->buff + share->keypage_header;
883 keypos= old_keypos + nod_flag;
884 endpos= anc_page->buff + anc_page->size;
885
886 param->keydata+= anc_page->size;
887 param->totaldata+= keyinfo->block_length; /* INFO */
888 param->key_blocks++;
889 if (level > param->max_level)
890 param->max_level=level;
891
892 if (_ma_get_keynr(share, anc_page->buff) != keyinfo->key_nr)
893 _ma_check_print_error(param, "Page at %s is not marked for index %u",
894 llstr(anc_page->pos, llbuff),
895 (uint) keyinfo->key_nr);
896 if ((page_flag & KEYPAGE_FLAG_HAS_TRANSID) &&
897 !share->base.born_transactional)
898 {
899 _ma_check_print_error(param,
900 "Page at %s is marked with HAS_TRANSID even if "
901 "table is not transactional",
902 llstr(anc_page->pos, llbuff));
903 }
904
905 if (anc_page->size > share->max_index_block_size)
906 {
907 _ma_check_print_error(param,
908 "Page at %s has impossible (too big) pagelength",
909 llstr(anc_page->pos, llbuff));
910 goto err;
911 }
912
913 info->last_key.keyinfo= tmp_key.keyinfo= keyinfo;
914 info->lastinx= ~0; /* Safety */
915 tmp_key.data= tmp_key_buff;
916 for ( ;; _ma_copy_key(&info->last_key, &tmp_key))
917 {
918 if (nod_flag)
919 {
920 if (_ma_killed_ptr(param))
921 goto err;
922 next_page= _ma_kpos(nod_flag,keypos);
923 if (chk_index_down(param,info,keyinfo,next_page,
924 temp_buff,keys,key_checksum,level+1))
925 {
926 DBUG_DUMP("page_data", old_keypos, (uint) (keypos - old_keypos));
927 goto err;
928 }
929 }
930 old_keypos=keypos;
931 if (keypos >= endpos ||
932 !(*keyinfo->get_key)(&tmp_key, page_flag, nod_flag, &keypos))
933 break;
934 if (keypos > endpos)
935 {
936 _ma_check_print_error(param,
937 "Page length and length of keys don't match at "
938 "page: %s",
939 llstr(anc_page->pos,llbuff));
940 goto err;
941 }
942 if (share->data_file_type == BLOCK_RECORD &&
943 !(page_flag & KEYPAGE_FLAG_HAS_TRANSID) &&
944 key_has_transid(tmp_key.data + tmp_key.data_length +
945 share->rec_reflength-1))
946 {
947 _ma_check_print_error(param,
948 "Found key marked for transid on page that is not "
949 "marked for transid at: %s",
950 llstr(anc_page->pos,llbuff));
951 goto err;
952 }
953
954 if ((*keys)++ &&
955 (flag=ha_key_cmp(keyinfo->seg, info->last_key.data, tmp_key.data,
956 tmp_key.data_length + tmp_key.ref_length,
957 (comp_flag | SEARCH_INSERT | (tmp_key.flag >> 1) |
958 info->last_key.flag), diff_pos)) >=0)
959 {
960 DBUG_DUMP_KEY("old", &info->last_key);
961 DBUG_DUMP_KEY("new", &tmp_key);
962 DBUG_DUMP("new_in_page", old_keypos, (uint) (keypos-old_keypos));
963
964 if ((comp_flag & SEARCH_FIND) && flag == 0)
965 _ma_check_print_error(param,"Found duplicated key at page %s",
966 llstr(anc_page->pos,llbuff));
967 else
968 _ma_check_print_error(param,"Key in wrong position at page %s",
969 llstr(anc_page->pos,llbuff));
970 goto err;
971 }
972
973 if (param->testflag & T_STATISTICS)
974 {
975 if (*keys != 1L) /* not first_key */
976 {
977 if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
978 ha_key_cmp(keyinfo->seg, info->last_key.data,
979 tmp_key.data, tmp_key.data_length,
980 SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
981 diff_pos);
982 else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
983 {
984 diff_pos[0]= maria_collect_stats_nonulls_next(keyinfo->seg,
985 param->notnull_count,
986 info->last_key.data,
987 tmp_key.data);
988 }
989 param->unique_count[diff_pos[0]-1]++;
990 }
991 else
992 {
993 if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
994 maria_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count,
995 tmp_key.data);
996 }
997 }
998 (*key_checksum)+= maria_byte_checksum(tmp_key.data, tmp_key.data_length);
999 record= _ma_row_pos_from_key(&tmp_key);
1000
1001 if (keyinfo->flag & HA_FULLTEXT) /* special handling for ft2 */
1002 {
1003 uint off;
1004 int subkeys;
1005 get_key_full_length_rdonly(off, tmp_key.data);
1006 subkeys= ft_sintXkorr(tmp_key.data + off);
1007 if (subkeys < 0)
1008 {
1009 ha_rows tmp_keys=0;
1010 share->ft2_keyinfo.key_nr= keyinfo->key_nr;
1011 if (chk_index_down(param,info,&share->ft2_keyinfo,record,
1012 temp_buff,&tmp_keys,key_checksum,1))
1013 goto err;
1014 if (tmp_keys + subkeys)
1015 {
1016 _ma_check_print_error(param,
1017 "Number of words in the 2nd level tree "
1018 "does not match the number in the header. "
1019 "Parent word in on the page %s, offset %u",
1020 llstr(anc_page->pos,llbuff),
1021 (uint) (old_keypos - anc_page->buff));
1022 goto err;
1023 }
1024 (*keys)+=tmp_keys-1;
1025 continue;
1026 }
1027 /* fall through */
1028 }
1029 if ((share->data_file_type != BLOCK_RECORD &&
1030 share->data_file_type != NO_RECORD &&
1031 record >= share->state.state.data_file_length) ||
1032 (share->data_file_type == BLOCK_RECORD &&
1033 ma_recordpos_to_page(record) * share->base.min_block_length >=
1034 share->state.state.data_file_length) ||
1035 (share->data_file_type == NO_RECORD && record != 0))
1036 {
1037 #ifndef DBUG_OFF
1038 char llbuff2[22], llbuff3[22];
1039 #endif
1040 _ma_check_print_error(param,
1041 "Found key at page %s that points to record "
1042 "outside datafile",
1043 llstr(anc_page->pos,llbuff));
1044 DBUG_PRINT("test",("page: %s record: %s filelength: %s",
1045 llstr(anc_page->pos,llbuff),llstr(record,llbuff2),
1046 llstr(share->state.state.data_file_length,llbuff3)));
1047 DBUG_DUMP_KEY("key", &tmp_key);
1048 DBUG_DUMP("new_in_page", old_keypos, (uint) (keypos-old_keypos));
1049 goto err;
1050 }
1051 param->record_checksum+= (ha_checksum) record;
1052 }
1053 if (keypos != endpos)
1054 {
1055 _ma_check_print_error(param,
1056 "Keyblock size at page %s is not correct. "
1057 "Block length: %u key length: %u",
1058 llstr(anc_page->pos, llbuff), anc_page->size,
1059 (uint) (keypos - anc_page->buff));
1060 goto err;
1061 }
1062 my_afree(temp_buff);
1063 DBUG_RETURN(0);
1064 err:
1065 my_afree(temp_buff);
1066 DBUG_RETURN(1);
1067 } /* chk_index */
1068
1069
1070 /* Calculate a checksum of 1+2+3+4...N = N*(N+1)/2 without overflow */
1071
calc_checksum(ha_rows count)1072 static ha_checksum calc_checksum(ha_rows count)
1073 {
1074 ulonglong sum,a,b;
1075 DBUG_ENTER("calc_checksum");
1076
1077 sum=0;
1078 a=count; b=count+1;
1079 if (a & 1)
1080 b>>=1;
1081 else
1082 a>>=1;
1083 while (b)
1084 {
1085 if (b & 1)
1086 sum+=a;
1087 a<<=1; b>>=1;
1088 }
1089 DBUG_PRINT("exit",("sum: %lx",(ulong) sum));
1090 DBUG_RETURN((ha_checksum) sum);
1091 } /* calc_checksum */
1092
1093
1094 /* Calc length of key in normal isam */
1095
isam_key_length(MARIA_HA * info,register MARIA_KEYDEF * keyinfo)1096 static uint isam_key_length(MARIA_HA *info, register MARIA_KEYDEF *keyinfo)
1097 {
1098 uint length;
1099 HA_KEYSEG *keyseg;
1100 DBUG_ENTER("isam_key_length");
1101
1102 length= info->s->rec_reflength;
1103 for (keyseg=keyinfo->seg ; keyseg->type ; keyseg++)
1104 length+= keyseg->length;
1105
1106 DBUG_PRINT("exit",("length: %d",length));
1107 DBUG_RETURN(length);
1108 } /* key_length */
1109
1110
1111
record_pos_to_txt(MARIA_HA * info,my_off_t recpos,char * buff)1112 static void record_pos_to_txt(MARIA_HA *info, my_off_t recpos,
1113 char *buff)
1114 {
1115 if (info->s->data_file_type != BLOCK_RECORD)
1116 llstr(recpos, buff);
1117 else
1118 {
1119 my_off_t page= ma_recordpos_to_page(recpos);
1120 uint row= ma_recordpos_to_dir_entry(recpos);
1121 char *end= longlong10_to_str(page, buff, 10);
1122 *(end++)= ':';
1123 longlong10_to_str(row, end, 10);
1124 }
1125 }
1126
1127
1128 /*
1129 Check that keys in records exist in index tree
1130
1131 SYNOPSIS
1132 check_keys_in_record()
1133 param Check paramenter
1134 info Maria handler
1135 extend Type of check (extended or normal)
1136 start_recpos Position to row
1137 record Record buffer
1138
1139 NOTES
1140 This function also calculates record checksum & number of rows
1141 */
1142
check_keys_in_record(HA_CHECK * param,MARIA_HA * info,int extend,my_off_t start_recpos,uchar * record)1143 static int check_keys_in_record(HA_CHECK *param, MARIA_HA *info, int extend,
1144 my_off_t start_recpos, uchar *record)
1145 {
1146 MARIA_SHARE *share= info->s;
1147 MARIA_KEYDEF *keyinfo;
1148 char llbuff[22+4];
1149 uint keynr;
1150
1151 param->tmp_record_checksum+= (ha_checksum) start_recpos;
1152 param->records++;
1153 if (param->records % WRITE_COUNT == 0)
1154 {
1155 if (param->testflag & T_WRITE_LOOP)
1156 {
1157 printf("%s\r", llstr(param->records, llbuff));
1158 fflush(stdout);
1159 }
1160 _ma_report_progress(param, param->records, share->state.state.records);
1161 }
1162
1163 /* Check if keys match the record */
1164 for (keynr=0, keyinfo= share->keyinfo; keynr < share->base.keys;
1165 keynr++, keyinfo++)
1166 {
1167 if (maria_is_key_active(share->state.key_map, keynr))
1168 {
1169 MARIA_KEY key;
1170 if (!(keyinfo->flag & HA_FULLTEXT))
1171 {
1172 (*keyinfo->make_key)(info, &key, keynr, info->lastkey_buff, record,
1173 start_recpos, 0);
1174 info->last_key.keyinfo= key.keyinfo;
1175 if (extend)
1176 {
1177 /* We don't need to lock the key tree here as we don't allow
1178 concurrent threads when running maria_chk
1179 */
1180 int search_result=
1181 #ifdef HAVE_RTREE_KEYS
1182 (keyinfo->flag & (HA_SPATIAL | HA_RTREE_INDEX)) ?
1183 maria_rtree_find_first(info, &key, MBR_EQUAL | MBR_DATA) :
1184 #endif
1185 _ma_search(info, &key, SEARCH_SAME, share->state.key_root[keynr]);
1186 if (search_result)
1187 {
1188 record_pos_to_txt(info, start_recpos, llbuff);
1189 _ma_check_print_error(param,
1190 "Record at: %14s "
1191 "Can't find key for index: %2d",
1192 llbuff, keynr+1);
1193 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1194 return -1;
1195 }
1196 }
1197 else
1198 param->tmp_key_crc[keynr]+=
1199 maria_byte_checksum(key.data, key.data_length);
1200 }
1201 }
1202 }
1203 return 0;
1204 }
1205
1206
1207 /*
1208 Functions to loop through all rows and check if they are ok
1209
1210 NOTES
1211 One function for each record format
1212
1213 RESULT
1214 0 ok
1215 -1 Interrupted by user
1216 1 Error
1217 */
1218
check_static_record(HA_CHECK * param,MARIA_HA * info,int extend,uchar * record)1219 static int check_static_record(HA_CHECK *param, MARIA_HA *info, int extend,
1220 uchar *record)
1221 {
1222 MARIA_SHARE *share= info->s;
1223 my_off_t start_recpos, pos;
1224 char llbuff[22];
1225
1226 pos= 0;
1227 while (pos < share->state.state.data_file_length)
1228 {
1229 if (_ma_killed_ptr(param))
1230 return -1;
1231 if (my_b_read(¶m->read_cache, record,
1232 share->base.pack_reclength))
1233 {
1234 _ma_check_print_error(param,
1235 "got error: %d when reading datafile at position: "
1236 "%s",
1237 my_errno, llstr(pos, llbuff));
1238 return 1;
1239 }
1240 start_recpos= pos;
1241 pos+= share->base.pack_reclength;
1242 param->splits++;
1243 if (*record == '\0')
1244 {
1245 param->del_blocks++;
1246 param->del_length+= share->base.pack_reclength;
1247 continue; /* Record removed */
1248 }
1249 param->glob_crc+= _ma_static_checksum(info,record);
1250 param->used+= share->base.pack_reclength;
1251 if (check_keys_in_record(param, info, extend, start_recpos, record))
1252 return 1;
1253 }
1254 return 0;
1255 }
1256
1257
check_dynamic_record(HA_CHECK * param,MARIA_HA * info,int extend,uchar * record)1258 static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend,
1259 uchar *record)
1260 {
1261 MARIA_BLOCK_INFO block_info;
1262 MARIA_SHARE *share= info->s;
1263 my_off_t UNINIT_VAR(start_recpos), start_block, pos;
1264 uchar *UNINIT_VAR(to);
1265 ulong UNINIT_VAR(left_length);
1266 uint b_type;
1267 char llbuff[22],llbuff2[22],llbuff3[22];
1268 DBUG_ENTER("check_dynamic_record");
1269
1270 pos= 0;
1271 while (pos < share->state.state.data_file_length)
1272 {
1273 my_bool got_error= 0;
1274 int flag;
1275 if (_ma_killed_ptr(param))
1276 DBUG_RETURN(-1);
1277
1278 flag= block_info.second_read=0;
1279 block_info.next_filepos=pos;
1280 do
1281 {
1282 if (_ma_read_cache(info, ¶m->read_cache, block_info.header,
1283 (start_block=block_info.next_filepos),
1284 sizeof(block_info.header),
1285 (flag ? 0 : READING_NEXT) | READING_HEADER))
1286 {
1287 _ma_check_print_error(param,
1288 "got error: %d when reading datafile at "
1289 "position: %s",
1290 my_errno, llstr(start_block, llbuff));
1291 DBUG_RETURN(1);
1292 }
1293
1294 if (start_block & (MARIA_DYN_ALIGN_SIZE-1))
1295 {
1296 _ma_check_print_error(param,"Wrong aligned block at %s",
1297 llstr(start_block,llbuff));
1298 DBUG_RETURN(1);
1299 }
1300 b_type= _ma_get_block_info(info, &block_info,-1,start_block);
1301 if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
1302 BLOCK_FATAL_ERROR))
1303 {
1304 if (b_type & BLOCK_SYNC_ERROR)
1305 {
1306 if (flag)
1307 {
1308 _ma_check_print_error(param,"Unexpected byte: %d at link: %s",
1309 (int) block_info.header[0],
1310 llstr(start_block,llbuff));
1311 DBUG_RETURN(1);
1312 }
1313 pos=block_info.filepos+block_info.block_len;
1314 goto next;
1315 }
1316 if (b_type & BLOCK_DELETED)
1317 {
1318 if (block_info.block_len < share->base.min_block_length)
1319 {
1320 _ma_check_print_error(param,
1321 "Deleted block with impossible length %lu "
1322 "at %s",
1323 block_info.block_len,llstr(pos,llbuff));
1324 DBUG_RETURN(1);
1325 }
1326 if ((block_info.next_filepos != HA_OFFSET_ERROR &&
1327 block_info.next_filepos >= share->state.state.data_file_length) ||
1328 (block_info.prev_filepos != HA_OFFSET_ERROR &&
1329 block_info.prev_filepos >= share->state.state.data_file_length))
1330 {
1331 _ma_check_print_error(param,"Delete link points outside datafile "
1332 "at %s",
1333 llstr(pos,llbuff));
1334 DBUG_RETURN(1);
1335 }
1336 param->del_blocks++;
1337 param->del_length+= block_info.block_len;
1338 param->splits++;
1339 pos= block_info.filepos+block_info.block_len;
1340 goto next;
1341 }
1342 _ma_check_print_error(param,"Wrong bytesec: %d-%d-%d at linkstart: %s",
1343 block_info.header[0],block_info.header[1],
1344 block_info.header[2],
1345 llstr(start_block,llbuff));
1346 DBUG_RETURN(1);
1347 }
1348 if (share->state.state.data_file_length < block_info.filepos+
1349 block_info.block_len)
1350 {
1351 _ma_check_print_error(param,
1352 "Recordlink that points outside datafile at %s",
1353 llstr(pos,llbuff));
1354 got_error=1;
1355 break;
1356 }
1357 param->splits++;
1358 if (!flag++) /* First block */
1359 {
1360 start_recpos=pos;
1361 pos=block_info.filepos+block_info.block_len;
1362 if (block_info.rec_len > (uint) share->base.max_pack_length)
1363 {
1364 my_errno= HA_ERR_WRONG_IN_RECORD;
1365 _ma_check_print_error(param,"Found too long record (%lu) at %s",
1366 (ulong) block_info.rec_len,
1367 llstr(start_recpos,llbuff));
1368 got_error=1;
1369 break;
1370 }
1371 if (share->base.blobs)
1372 {
1373 if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
1374 block_info.rec_len +
1375 share->base.extra_rec_buff_size))
1376
1377 {
1378 _ma_check_print_error(param,
1379 "Not enough memory (%lu) for blob at %s",
1380 (ulong) block_info.rec_len,
1381 llstr(start_recpos,llbuff));
1382 got_error=1;
1383 break;
1384 }
1385 }
1386 to= info->rec_buff;
1387 left_length= block_info.rec_len;
1388 }
1389 if (left_length < block_info.data_len)
1390 {
1391 _ma_check_print_error(param,"Found too long record (%lu) at %s",
1392 (ulong) block_info.data_len,
1393 llstr(start_recpos,llbuff));
1394 got_error=1;
1395 break;
1396 }
1397 if (_ma_read_cache(info, ¶m->read_cache, to, block_info.filepos,
1398 (uint) block_info.data_len,
1399 flag == 1 ? READING_NEXT : 0))
1400 {
1401 _ma_check_print_error(param,
1402 "got error: %d when reading datafile at "
1403 "position: %s", my_errno,
1404 llstr(block_info.filepos, llbuff));
1405
1406 DBUG_RETURN(1);
1407 }
1408 to+=block_info.data_len;
1409 param->link_used+= block_info.filepos-start_block;
1410 param->used+= block_info.filepos - start_block + block_info.data_len;
1411 param->empty+= block_info.block_len-block_info.data_len;
1412 left_length-= block_info.data_len;
1413 if (left_length)
1414 {
1415 if (b_type & BLOCK_LAST)
1416 {
1417 _ma_check_print_error(param,
1418 "Wrong record length %s of %s at %s",
1419 llstr(block_info.rec_len-left_length,llbuff),
1420 llstr(block_info.rec_len, llbuff2),
1421 llstr(start_recpos,llbuff3));
1422 got_error=1;
1423 break;
1424 }
1425 if (share->state.state.data_file_length < block_info.next_filepos)
1426 {
1427 _ma_check_print_error(param,
1428 "Found next-recordlink that points outside "
1429 "datafile at %s",
1430 llstr(block_info.filepos,llbuff));
1431 got_error=1;
1432 break;
1433 }
1434 }
1435 } while (left_length);
1436
1437 if (! got_error)
1438 {
1439 if (_ma_rec_unpack(info,record,info->rec_buff,block_info.rec_len) ==
1440 MY_FILE_ERROR)
1441 {
1442 _ma_check_print_error(param,"Found wrong record at %s",
1443 llstr(start_recpos,llbuff));
1444 got_error=1;
1445 }
1446 else
1447 {
1448 ha_checksum checksum= 0;
1449 if (share->calc_checksum)
1450 checksum= (*share->calc_checksum)(info, record);
1451
1452 if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE))
1453 {
1454 if (_ma_rec_check(info,record, info->rec_buff,block_info.rec_len,
1455 MY_TEST(share->calc_checksum), checksum))
1456 {
1457 _ma_check_print_error(param,"Found wrong packed record at %s",
1458 llstr(start_recpos,llbuff));
1459 got_error= 1;
1460 }
1461 }
1462 param->glob_crc+= checksum;
1463 }
1464
1465 if (! got_error)
1466 {
1467 if (check_keys_in_record(param, info, extend, start_recpos, record))
1468 DBUG_RETURN(1);
1469 }
1470 else
1471 {
1472 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1473 DBUG_RETURN(1);
1474 }
1475 }
1476 else if (!flag)
1477 pos= block_info.filepos+block_info.block_len;
1478 next:;
1479 }
1480 DBUG_RETURN(0);
1481 }
1482
1483
check_compressed_record(HA_CHECK * param,MARIA_HA * info,int extend,uchar * record)1484 static int check_compressed_record(HA_CHECK *param, MARIA_HA *info, int extend,
1485 uchar *record)
1486 {
1487 MARIA_BLOCK_INFO block_info;
1488 MARIA_SHARE *share= info->s;
1489 my_off_t start_recpos, pos;
1490 char llbuff[22];
1491 my_bool got_error= 0;
1492 DBUG_ENTER("check_compressed_record");
1493
1494 pos= share->pack.header_length; /* Skip header */
1495 while (pos < share->state.state.data_file_length)
1496 {
1497 if (_ma_killed_ptr(param))
1498 DBUG_RETURN(-1);
1499
1500 if (_ma_read_cache(info, ¶m->read_cache, block_info.header, pos,
1501 share->pack.ref_length, READING_NEXT))
1502 {
1503 _ma_check_print_error(param,
1504 "got error: %d when reading datafile at position: "
1505 "%s",
1506 my_errno, llstr(pos, llbuff));
1507 DBUG_RETURN(1);
1508 }
1509
1510 start_recpos= pos;
1511 param->splits++;
1512 _ma_pack_get_block_info(info, &info->bit_buff, &block_info,
1513 &info->rec_buff, &info->rec_buff_size, -1,
1514 start_recpos);
1515 pos=block_info.filepos+block_info.rec_len;
1516 if (block_info.rec_len < (uint) share->min_pack_length ||
1517 block_info.rec_len > (uint) share->max_pack_length)
1518 {
1519 _ma_check_print_error(param,
1520 "Found block with wrong recordlength: %lu at %s",
1521 block_info.rec_len, llstr(start_recpos,llbuff));
1522 got_error=1;
1523 goto end;
1524 }
1525 if (_ma_read_cache(info, ¶m->read_cache, info->rec_buff,
1526 block_info.filepos, block_info.rec_len, READING_NEXT))
1527 {
1528 _ma_check_print_error(param,
1529 "got error: %d when reading datafile at position: "
1530 "%s",
1531 my_errno, llstr(block_info.filepos, llbuff));
1532 DBUG_RETURN(1);
1533 }
1534 if (_ma_pack_rec_unpack(info, &info->bit_buff, record,
1535 info->rec_buff, block_info.rec_len))
1536 {
1537 _ma_check_print_error(param,"Found wrong record at %s",
1538 llstr(start_recpos,llbuff));
1539 got_error=1;
1540 goto end;
1541 }
1542 param->glob_crc+= (*share->calc_checksum)(info,record);
1543 param->link_used+= (block_info.filepos - start_recpos);
1544 param->used+= (pos-start_recpos);
1545
1546 end:
1547 if (! got_error)
1548 {
1549 if (check_keys_in_record(param, info, extend, start_recpos, record))
1550 DBUG_RETURN(1);
1551 }
1552 else
1553 {
1554 got_error= 0; /* Reset for next loop */
1555 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1556 DBUG_RETURN(1);
1557 }
1558 }
1559 DBUG_RETURN(0);
1560 }
1561
1562
1563 /*
1564 Check if layout on head or tail page is ok
1565
1566 NOTES
1567 This is for rows-in-block format.
1568 */
1569
check_page_layout(HA_CHECK * param,MARIA_HA * info,my_off_t page_pos,uchar * page,uint row_count,uint head_empty,uint * real_rows_found,uint * free_slots_found)1570 static int check_page_layout(HA_CHECK *param, MARIA_HA *info,
1571 my_off_t page_pos, uchar *page,
1572 uint row_count, uint head_empty,
1573 uint *real_rows_found, uint *free_slots_found)
1574 {
1575 uint empty, last_row_end, row, first_dir_entry, free_entry, block_size;
1576 uint free_entries, prev_free_entry;
1577 uchar *dir_entry;
1578 char llbuff[22];
1579 my_bool error_in_free_list= 0;
1580 DBUG_ENTER("check_page_layout");
1581
1582 block_size= info->s->block_size;
1583 empty= 0;
1584 last_row_end= PAGE_HEADER_SIZE(info->s);
1585 *real_rows_found= 0;
1586
1587 /* Check free directory list */
1588 free_entry= (uint) page[DIR_FREE_OFFSET];
1589 free_entries= 0;
1590 prev_free_entry= END_OF_DIR_FREE_LIST;
1591 while (free_entry != END_OF_DIR_FREE_LIST)
1592 {
1593 uchar *dir;
1594 if (free_entry > row_count)
1595 {
1596 _ma_check_print_error(param,
1597 "Page %9s: Directory free entry points outside "
1598 "directory",
1599 llstr(page_pos, llbuff));
1600 error_in_free_list= 1;
1601 break;
1602 }
1603 dir= dir_entry_pos(page, block_size, free_entry);
1604 if (uint2korr(dir) != 0)
1605 {
1606 _ma_check_print_error(param,
1607 "Page %9s: Directory free entry points to "
1608 "not deleted entry",
1609 llstr(page_pos, llbuff));
1610 error_in_free_list= 1;
1611 break;
1612 }
1613 if (dir[2] != prev_free_entry)
1614 {
1615 _ma_check_print_error(param,
1616 "Page %9s: Directory free list back pointer "
1617 "points to wrong entry",
1618 llstr(page_pos, llbuff));
1619 error_in_free_list= 1;
1620 break;
1621 }
1622 prev_free_entry= free_entry;
1623 free_entry= dir[3];
1624 free_entries++;
1625 }
1626 *free_slots_found= free_entries;
1627
1628 /* Check directry */
1629 dir_entry= page+ block_size - PAGE_SUFFIX_SIZE;
1630 first_dir_entry= (block_size - row_count * DIR_ENTRY_SIZE -
1631 PAGE_SUFFIX_SIZE);
1632 for (row= 0 ; row < row_count ; row++)
1633 {
1634 uint pos, length;
1635 dir_entry-= DIR_ENTRY_SIZE;
1636 pos= uint2korr(dir_entry);
1637 if (!pos)
1638 {
1639 free_entries--;
1640 if (row == row_count -1)
1641 {
1642 _ma_check_print_error(param,
1643 "Page %9s: First entry in directory is 0",
1644 llstr(page_pos, llbuff));
1645 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1646 DBUG_RETURN(1);
1647 }
1648 continue; /* Deleted row */
1649 }
1650 (*real_rows_found)++;
1651 length= uint2korr(dir_entry+2);
1652 param->used+= length;
1653 if (pos < last_row_end)
1654 {
1655 _ma_check_print_error(param,
1656 "Page %9s: Row %3u overlapps with previous row",
1657 llstr(page_pos, llbuff), row);
1658 DBUG_RETURN(1);
1659 }
1660 empty+= (pos - last_row_end);
1661 last_row_end= pos + length;
1662 if (last_row_end > first_dir_entry)
1663 {
1664 _ma_check_print_error(param,
1665 "Page %9s: Row %3u overlapps with directory",
1666 llstr(page_pos, llbuff), row);
1667 DBUG_RETURN(1);
1668 }
1669 }
1670 empty+= (first_dir_entry - last_row_end);
1671
1672 if (empty != head_empty)
1673 {
1674 _ma_check_print_error(param,
1675 "Page %9s: Wrong empty size. Stored: %5u "
1676 "Actual: %5u",
1677 llstr(page_pos, llbuff), head_empty, empty);
1678 param->err_count++;
1679 }
1680 if (free_entries != 0 && !error_in_free_list)
1681 {
1682 _ma_check_print_error(param,
1683 "Page %9s: Directory free link don't include "
1684 "all free entries",
1685 llstr(page_pos, llbuff));
1686 param->err_count++;
1687 }
1688 DBUG_RETURN(param->err_count &&
1689 (param->err_count >= MAXERR || !(param->testflag & T_VERBOSE)));
1690 }
1691
1692
1693 /*
1694 Check all rows on head page
1695
1696 NOTES
1697 This is for rows-in-block format.
1698
1699 Before this, we have already called check_page_layout(), so
1700 we know the block is logicaly correct (even if the rows may not be that)
1701
1702 RETURN
1703 0 ok
1704 1 error
1705 */
1706
1707
check_head_page(HA_CHECK * param,MARIA_HA * info,uchar * record,int extend,my_off_t page_pos,uchar * page_buff,uint row_count)1708 static my_bool check_head_page(HA_CHECK *param, MARIA_HA *info, uchar *record,
1709 int extend, my_off_t page_pos, uchar *page_buff,
1710 uint row_count)
1711 {
1712 MARIA_SHARE *share= info->s;
1713 uchar *dir_entry;
1714 uint row;
1715 char llbuff[22], llbuff2[22];
1716 ulonglong page= page_pos / share->block_size;
1717 DBUG_ENTER("check_head_page");
1718
1719 dir_entry= page_buff+ share->block_size - PAGE_SUFFIX_SIZE;
1720 for (row= 0 ; row < row_count ; row++)
1721 {
1722 uint pos, length, flag;
1723 dir_entry-= DIR_ENTRY_SIZE;
1724 pos= uint2korr(dir_entry);
1725 if (!pos)
1726 continue;
1727 length= uint2korr(dir_entry+2);
1728 if (length < share->base.min_block_length)
1729 {
1730 _ma_check_print_error(param,
1731 "Page %9s: Row %3u is too short "
1732 "(%d of min %d bytes)",
1733 llstr(page, llbuff), row, length,
1734 (uint) share->base.min_block_length);
1735 DBUG_RETURN(1);
1736 }
1737 flag= (uint) (uchar) page_buff[pos];
1738 if (flag & ~(ROW_FLAG_ALL))
1739 _ma_check_print_error(param,
1740 "Page %9s: Row %3u has wrong flag: %u",
1741 llstr(page, llbuff), row, flag);
1742
1743 DBUG_PRINT("info", ("rowid: %s page: %lu row: %u",
1744 llstr(ma_recordpos(page, row), llbuff),
1745 (ulong) page, row));
1746 info->cur_row.trid= 0;
1747 if (_ma_read_block_record2(info, record, page_buff+pos,
1748 page_buff+pos+length))
1749 {
1750 _ma_check_print_error(param,
1751 "Page %9s: Row %3d is crashed",
1752 llstr(page, llbuff), row);
1753 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1754 DBUG_RETURN(1);
1755 continue;
1756 }
1757 set_if_bigger(param->max_found_trid, info->cur_row.trid);
1758 if (info->cur_row.trid > param->max_trid)
1759 _ma_check_print_not_visible_error(param, info->cur_row.trid);
1760
1761 if (share->calc_checksum)
1762 {
1763 ha_checksum checksum= (*share->calc_checksum)(info, record);
1764 if (info->cur_row.checksum != (checksum & 255))
1765 _ma_check_print_error(param, "Page %9s: Row %3d has wrong checksum",
1766 llstr(page, llbuff), row);
1767 param->glob_crc+= checksum;
1768 }
1769 if (info->cur_row.extents_count)
1770 {
1771 uchar *extents= info->cur_row.extents;
1772 uint i;
1773 /* Check that bitmap has the right marker for the found extents */
1774 for (i= 0 ; i < info->cur_row.extents_count ; i++)
1775 {
1776 pgcache_page_no_t extent_page;
1777 uint page_count, page_type;
1778 extent_page= uint5korr(extents);
1779 page_count= uint2korr(extents+5) & ~START_EXTENT_BIT;
1780 extents+= ROW_EXTENT_SIZE;
1781 page_type= BLOB_PAGE;
1782 if (page_count & TAIL_BIT)
1783 {
1784 page_count= 1;
1785 page_type= TAIL_PAGE;
1786 }
1787 /*
1788 TODO OPTIMIZE:
1789 Check the whole extent with one test and only do the loop if
1790 something is wrong (for exact error reporting)
1791 */
1792 for ( ; page_count--; extent_page++)
1793 {
1794 uint bitmap_pattern;
1795 if (_ma_check_if_right_bitmap_type(info, page_type, extent_page,
1796 &bitmap_pattern))
1797 {
1798 _ma_check_print_error(param,
1799 "Page %9s: Row: %3d has an extent with "
1800 "wrong information in bitmap: "
1801 "Page: %9s Page_type: %d Bitmap: %d",
1802 llstr(page, llbuff), row,
1803 llstr(extent_page, llbuff2),
1804 page_type, bitmap_pattern);
1805 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1806 DBUG_RETURN(1);
1807 }
1808 }
1809 }
1810 }
1811 param->full_page_count+= info->cur_row.full_page_count;
1812 param->tail_count+= info->cur_row.tail_count;
1813 if (check_keys_in_record(param, info, extend,
1814 ma_recordpos(page, row), record))
1815 DBUG_RETURN(1);
1816 }
1817 DBUG_RETURN(0);
1818 }
1819
1820
1821 /*
1822 Check if rows-in-block data file is consistent
1823 */
1824
check_block_record(HA_CHECK * param,MARIA_HA * info,int extend,uchar * record)1825 static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
1826 uchar *record)
1827 {
1828 MARIA_SHARE *share= info->s;
1829 my_off_t pos;
1830 pgcache_page_no_t page;
1831 uchar *page_buff, *bitmap_buff, *data;
1832 char llbuff[22], llbuff2[22];
1833 uint block_size= share->block_size;
1834 ha_rows full_page_count, tail_count;
1835 my_bool UNINIT_VAR(full_dir), now_transactional;
1836 uint offset_page, offset, free_count;
1837
1838 if (_ma_scan_init_block_record(info))
1839 {
1840 _ma_check_print_error(param, "got error %d when initializing scan",
1841 my_errno);
1842 return 1;
1843 }
1844
1845 now_transactional= info->s->now_transactional;
1846 info->s->now_transactional= 0; /* Don't log changes */
1847
1848 bitmap_buff= info->scan.bitmap_buff;
1849 page_buff= info->scan.page_buff;
1850 full_page_count= tail_count= 0;
1851 param->full_page_count= param->tail_count= 0;
1852 param->used= param->link_used= 0;
1853 param->splits= share->state.state.data_file_length / block_size;
1854
1855 for (pos= 0, page= 0;
1856 pos < share->state.state.data_file_length;
1857 pos+= block_size, page++)
1858 {
1859 uint UNINIT_VAR(row_count), real_row_count, UNINIT_VAR(empty_space),
1860 page_type, bitmap_pattern;
1861 uint bitmap_for_page;
1862
1863 if (_ma_killed_ptr(param))
1864 {
1865 _ma_scan_end_block_record(info);
1866 info->s->now_transactional= now_transactional;
1867 return -1; /* Interrupted */
1868 }
1869 if ((page % share->bitmap.pages_covered) == 0)
1870 {
1871 /* Bitmap page */
1872 if (pagecache_read(share->pagecache,
1873 &info->s->bitmap.file,
1874 page, 1,
1875 bitmap_buff,
1876 PAGECACHE_PLAIN_PAGE,
1877 PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0)
1878 {
1879 _ma_check_print_error(param,
1880 "Page %9s: Got error: %d when reading datafile",
1881 llstr(page, llbuff), my_errno);
1882 goto err;
1883 }
1884 param->used+= block_size;
1885 param->link_used+= block_size;
1886 if (param->verbose > 2)
1887 print_bitmap_description(share, page, bitmap_buff);
1888 continue;
1889 }
1890 /* Skip pages marked as empty in bitmap */
1891 offset_page= (uint) ((page % share->bitmap.pages_covered) -1) * 3;
1892 offset= offset_page & 7;
1893 data= bitmap_buff + offset_page / 8;
1894 bitmap_pattern= uint2korr(data);
1895 if (!(bitmap_for_page= ((bitmap_pattern >> offset) & 7)))
1896 {
1897 param->empty+= block_size;
1898 param->del_blocks++;
1899 continue;
1900 }
1901
1902 if (pagecache_read(share->pagecache,
1903 &info->dfile,
1904 page, 1,
1905 page_buff,
1906 share->page_type,
1907 PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0)
1908 {
1909 _ma_check_print_error(param,
1910 "Page %9s: Got error: %d when reading datafile",
1911 llstr(page, llbuff), my_errno);
1912 goto err;
1913 }
1914 page_type= page_buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK;
1915 if (page_type == UNALLOCATED_PAGE || page_type >= MAX_PAGE_TYPE)
1916 {
1917 _ma_check_print_error(param,
1918 "Page: %9s Found wrong page type %d. Bitmap: %d '%s'",
1919 llstr(page, llbuff), page_type,
1920 bitmap_for_page, bits_to_txt[bitmap_for_page]);
1921 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1922 goto err;
1923 continue;
1924 }
1925 switch ((enum en_page_type) page_type) {
1926 case UNALLOCATED_PAGE:
1927 case MAX_PAGE_TYPE:
1928 default:
1929 DBUG_ASSERT(0); /* Impossible */
1930 break;
1931 case HEAD_PAGE:
1932 row_count= page_buff[DIR_COUNT_OFFSET];
1933 empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET);
1934 param->used+= block_size - empty_space;
1935 param->link_used+= (PAGE_HEADER_SIZE(info->s) + PAGE_SUFFIX_SIZE +
1936 row_count * DIR_ENTRY_SIZE);
1937 if (empty_space < share->bitmap.sizes[3])
1938 param->lost+= empty_space;
1939 if (check_page_layout(param, info, pos, page_buff, row_count,
1940 empty_space, &real_row_count, &free_count))
1941 goto err;
1942 full_dir= (row_count == MAX_ROWS_PER_PAGE &&
1943 page_buff[DIR_FREE_OFFSET] == END_OF_DIR_FREE_LIST);
1944 break;
1945 case TAIL_PAGE:
1946 row_count= page_buff[DIR_COUNT_OFFSET];
1947 empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET);
1948 param->used+= block_size - empty_space;
1949 param->link_used+= (PAGE_HEADER_SIZE(info->s) + PAGE_SUFFIX_SIZE +
1950 row_count * DIR_ENTRY_SIZE);
1951 if (empty_space < share->bitmap.sizes[6])
1952 param->lost+= empty_space;
1953 if (check_page_layout(param, info, pos, page_buff, row_count,
1954 empty_space, &real_row_count, &free_count))
1955 goto err;
1956 full_dir= (row_count - free_count >= MAX_ROWS_PER_PAGE -
1957 share->base.blobs);
1958 break;
1959 case BLOB_PAGE:
1960 full_page_count++;
1961 full_dir= 0;
1962 empty_space= block_size; /* for error reporting */
1963 param->link_used+= FULL_PAGE_HEADER_SIZE(info->s);
1964 param->used+= block_size;
1965 break;
1966 }
1967 if (_ma_check_bitmap_data(info, page_type,
1968 full_dir ? 0 : empty_space,
1969 bitmap_for_page))
1970 {
1971 _ma_check_print_error(param,
1972 "Page %9s: Wrong data in bitmap. Page_type: "
1973 "%d full: %d empty_space: %u Bitmap-bits: %d "
1974 "'%s'",
1975 llstr(page, llbuff), page_type, full_dir,
1976 empty_space, bitmap_for_page,
1977 bits_to_txt[bitmap_for_page]);
1978 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1979 goto err;
1980 }
1981 if ((enum en_page_type) page_type == BLOB_PAGE)
1982 continue;
1983 param->empty+= empty_space;
1984 if ((enum en_page_type) page_type == TAIL_PAGE)
1985 {
1986 tail_count+= real_row_count;
1987 continue;
1988 }
1989 if (check_head_page(param, info, record, extend, pos, page_buff,
1990 row_count))
1991 goto err;
1992 }
1993
1994 /* Verify that rest of bitmap is zero */
1995
1996 if (page % share->bitmap.pages_covered)
1997 {
1998 /* Not at end of bitmap */
1999 uint bitmap_pattern;
2000 uint byte_offset;
2001
2002 offset_page= (uint) ((page % share->bitmap.pages_covered) -1) * 3;
2003 offset= offset_page & 7;
2004 byte_offset= offset_page / 8;
2005 data= bitmap_buff + byte_offset;
2006 bitmap_pattern= uint2korr(data);
2007 if (byte_offset + 1 == share->bitmap.max_total_size)
2008 {
2009 /* On last byte of bitmap; Remove possible checksum */
2010 bitmap_pattern&= 0xff;
2011 }
2012 if (((bitmap_pattern >> offset)) ||
2013 (byte_offset + 2 < share->bitmap.max_total_size &&
2014 _ma_check_if_zero(data+2, share->bitmap.max_total_size -
2015 byte_offset - 2)))
2016 {
2017 ulonglong bitmap_page;
2018 bitmap_page= page / share->bitmap.pages_covered;
2019 bitmap_page*= share->bitmap.pages_covered;
2020
2021 _ma_check_print_error(param,
2022 "Bitmap at page %s has pages reserved outside of "
2023 "data file length",
2024 llstr(bitmap_page, llbuff));
2025 DBUG_EXECUTE("bitmap", _ma_print_bitmap(&share->bitmap, bitmap_buff,
2026 bitmap_page););
2027 }
2028 }
2029
2030 _ma_scan_end_block_record(info);
2031
2032 if (full_page_count != param->full_page_count)
2033 _ma_check_print_error(param, "Full page count read through records was %s "
2034 "but we found %s pages while scanning table",
2035 llstr(param->full_page_count, llbuff),
2036 llstr(full_page_count, llbuff2));
2037 if (tail_count != param->tail_count)
2038 _ma_check_print_error(param, "Tail count read through records was %s but "
2039 "we found %s tails while scanning table",
2040 llstr(param->tail_count, llbuff),
2041 llstr(tail_count, llbuff2));
2042
2043 info->s->now_transactional= now_transactional;
2044 return param->error_printed != 0;
2045
2046 err:
2047 _ma_scan_end_block_record(info);
2048 info->s->now_transactional= now_transactional;
2049 return 1;
2050 }
2051
2052
2053 /* Check that record-link is ok */
2054
maria_chk_data_link(HA_CHECK * param,MARIA_HA * info,my_bool extend)2055 int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, my_bool extend)
2056 {
2057 MARIA_SHARE *share= info->s;
2058 int error;
2059 uchar *record;
2060 char llbuff[22],llbuff2[22],llbuff3[22];
2061 DBUG_ENTER("maria_chk_data_link");
2062
2063 if (!(param->testflag & T_SILENT))
2064 {
2065 if (extend)
2066 puts("- check records and index references");
2067 else
2068 puts("- check record links");
2069 }
2070
2071 if (!(record= (uchar*) my_malloc(share->base.default_rec_buff_size, MYF(0))))
2072 {
2073 _ma_check_print_error(param,"Not enough memory for record");
2074 DBUG_RETURN(-1);
2075 }
2076 param->records= param->del_blocks= 0;
2077 param->used= param->link_used= param->splits= param->del_length= 0;
2078 param->lost= 0;
2079 param->tmp_record_checksum= param->glob_crc= 0;
2080 param->err_count= 0;
2081
2082 error= 0;
2083 param->empty= share->pack.header_length;
2084
2085 bzero((char*) param->tmp_key_crc,
2086 share->base.keys * sizeof(param->tmp_key_crc[0]));
2087
2088 info->in_check_table= 1; /* Don't assert on checksum errors */
2089
2090 switch (share->data_file_type) {
2091 case BLOCK_RECORD:
2092 error= check_block_record(param, info, extend, record);
2093 break;
2094 case STATIC_RECORD:
2095 error= check_static_record(param, info, extend, record);
2096 break;
2097 case DYNAMIC_RECORD:
2098 error= check_dynamic_record(param, info, extend, record);
2099 break;
2100 case COMPRESSED_RECORD:
2101 error= check_compressed_record(param, info, extend, record);
2102 break;
2103 case NO_RECORD:
2104 param->records= share->state.state.records;
2105 param->record_checksum= 0;
2106 extend= 1; /* No row checksums */
2107 /* no data, nothing to do */
2108 break;
2109 } /* switch */
2110
2111 info->in_check_table= 0;
2112
2113 if (error)
2114 goto err;
2115
2116 if (param->testflag & T_WRITE_LOOP)
2117 {
2118 fputs(" \r",stdout);
2119 fflush(stdout);
2120 }
2121 if (param->records != share->state.state.records)
2122 {
2123 _ma_check_print_error(param,
2124 "Record-count is not ok; found %-10s Should be: %s",
2125 llstr(param->records,llbuff),
2126 llstr(share->state.state.records,llbuff2));
2127 error=1;
2128 }
2129 if (param->record_checksum &&
2130 param->record_checksum != param->tmp_record_checksum)
2131 {
2132 _ma_check_print_error(param,
2133 "Key pointers and record positions doesn't match");
2134 error=1;
2135 }
2136 if (param->glob_crc != share->state.state.checksum &&
2137 (share->options &
2138 (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)))
2139 {
2140 _ma_check_print_warning(param,
2141 "Record checksum is not the same as checksum "
2142 "stored in the index file");
2143 error=1;
2144 }
2145 if (!extend)
2146 {
2147 uint key;
2148 for (key=0 ; key < share->base.keys; key++)
2149 {
2150 if (param->tmp_key_crc[key] != param->key_crc[key] &&
2151 !(share->keyinfo[key].flag &
2152 (HA_FULLTEXT | HA_SPATIAL | HA_RTREE_INDEX)))
2153 {
2154 _ma_check_print_error(param,"Checksum for key: %2d doesn't match "
2155 "checksum for records",
2156 key+1);
2157 error=1;
2158 }
2159 }
2160 }
2161
2162 if (param->del_length != share->state.state.empty)
2163 {
2164 _ma_check_print_warning(param,
2165 "Found %s deleted space. Should be %s",
2166 llstr(param->del_length,llbuff2),
2167 llstr(share->state.state.empty,llbuff));
2168 }
2169 /* Skip following checks for BLOCK RECORD as they don't make any sence */
2170 if (share->data_file_type != BLOCK_RECORD)
2171 {
2172 if (param->used + param->empty + param->del_length !=
2173 share->state.state.data_file_length)
2174 {
2175 _ma_check_print_warning(param,
2176 "Found %s record data and %s unused data and %s "
2177 "deleted data",
2178 llstr(param->used, llbuff),
2179 llstr(param->empty,llbuff2),
2180 llstr(param->del_length,llbuff3));
2181 _ma_check_print_warning(param,
2182 "Total %s Should be: %s",
2183 llstr((param->used+param->empty +
2184 param->del_length), llbuff),
2185 llstr(share->state.state.data_file_length,
2186 llbuff2));
2187 }
2188 if (param->del_blocks != share->state.state.del)
2189 {
2190 _ma_check_print_warning(param,
2191 "Found %10s deleted blocks. Should be: %s",
2192 llstr(param->del_blocks,llbuff),
2193 llstr(share->state.state.del,llbuff2));
2194 }
2195 if (param->splits != share->state.split)
2196 {
2197 _ma_check_print_warning(param,
2198 "Found %10s parts. Should be: %s",
2199 llstr(param->splits, llbuff),
2200 llstr(share->state.split,llbuff2));
2201 }
2202 }
2203 if (param->testflag & T_INFO)
2204 {
2205 if (param->warning_printed || param->error_printed)
2206 puts("");
2207 if (param->used != 0 && ! param->error_printed)
2208 {
2209 if (param->records)
2210 {
2211 printf("Records:%18s M.recordlength:%9lu Packed:%14.0f%%\n",
2212 llstr(param->records,llbuff),
2213 (long)((param->used - param->link_used)/param->records),
2214 (share->base.blobs ? 0.0 :
2215 (ulonglong2double((ulonglong) share->base.reclength *
2216 param->records)-
2217 my_off_t2double(param->used))/
2218 ulonglong2double((ulonglong) share->base.reclength *
2219 param->records)*100.0));
2220 printf("Recordspace used:%9.0f%% Empty space:%12d%% "
2221 "Blocks/Record: %6.2f\n",
2222 (ulonglong2double(param->used - param->link_used)/
2223 ulonglong2double(param->used-param->link_used+param->empty) *
2224 100.0),
2225 (!param->records ? 100 :
2226 (int) (ulonglong2double(param->del_length+param->empty)/
2227 my_off_t2double(param->used)*100.0)),
2228 ulonglong2double(param->splits - param->del_blocks) /
2229 param->records);
2230 }
2231 else
2232 printf("Records:%18s\n", "0");
2233 }
2234 printf("Record blocks:%12s Delete blocks:%10s\n",
2235 llstr(param->splits - param->del_blocks, llbuff),
2236 llstr(param->del_blocks, llbuff2));
2237 printf("Record data: %12s Deleted data: %10s\n",
2238 llstr(param->used - param->link_used,llbuff),
2239 llstr(param->del_length, llbuff2));
2240 printf("Empty space: %12s Linkdata: %10s\n",
2241 llstr(param->empty, llbuff),llstr(param->link_used, llbuff2));
2242 if (share->data_file_type == BLOCK_RECORD)
2243 {
2244 printf("Full pages: %12s Tail count: %12s\n",
2245 llstr(param->full_page_count, llbuff),
2246 llstr(param->tail_count, llbuff2));
2247 printf("Lost space: %12s\n", llstr(param->lost, llbuff));
2248 if (param->max_found_trid)
2249 {
2250 printf("Max trans. id: %11s\n",
2251 llstr(param->max_found_trid, llbuff));
2252 }
2253 }
2254 }
2255 my_free(record);
2256 DBUG_RETURN (error);
2257
2258 err:
2259 my_free(record);
2260 param->testflag|=T_RETRY_WITHOUT_QUICK;
2261 DBUG_RETURN(1);
2262 } /* maria_chk_data_link */
2263
2264
2265 /**
2266 Prepares a table for a repair or index sort: flushes pages, records durably
2267 in the table that it is undergoing the operation (if that op crashes, that
2268 info will serve for Recovery and the user).
2269
2270 If we start overwriting the index file, and crash then, old REDOs will
2271 be tried and fail. To prevent that, we bump skip_redo_lsn, and thus we have
2272 to flush and sync pages so that old REDOs can be skipped.
2273 If this is not a bulk insert, which Recovery can handle gracefully (by
2274 truncating files, see UNDO_BULK_INSERT) we also mark the table
2275 crashed-on-repair, so that user knows it has to re-repair. If bulk insert we
2276 shouldn't mark it crashed-on-repair, because if we did this, the UNDO phase
2277 would skip the table (UNDO_BULK_INSERT would not be applied),
2278 and maria_chk would not improve that.
2279 If this is an OPTIMIZE which merely sorts index, we need to do the same
2280 too: old REDOs should not apply to the new index file.
2281 Only the flush is needed when in maria_chk which is not crash-safe.
2282
2283 @param info table
2284 @param param repair parameters
2285 @param discard_index if index pages can be thrown away
2286 */
2287
protect_against_repair_crash(MARIA_HA * info,const HA_CHECK * param,my_bool discard_index)2288 static my_bool protect_against_repair_crash(MARIA_HA *info,
2289 const HA_CHECK *param,
2290 my_bool discard_index)
2291 {
2292 MARIA_SHARE *share= info->s;
2293
2294 /*
2295 There are other than recovery-related reasons to do the writes below:
2296 - the physical size of the data file is sometimes used during repair: we
2297 need to flush to have it exact
2298 - we flush the state because maria_open(HA_OPEN_COPY) will want to read
2299 it from disk.
2300 */
2301 if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
2302 FLUSH_FORCE_WRITE,
2303 discard_index ? FLUSH_IGNORE_CHANGED :
2304 FLUSH_FORCE_WRITE) ||
2305 (share->changed &&
2306 _ma_state_info_write(share,
2307 MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
2308 MA_STATE_INFO_WRITE_FULL_INFO |
2309 MA_STATE_INFO_WRITE_LOCK)))
2310 return TRUE;
2311 /* In maria_chk this is not needed: */
2312 if (maria_multi_threaded && share->base.born_transactional)
2313 {
2314 if ((param->testflag & T_NO_CREATE_RENAME_LSN) == 0)
2315 {
2316 /* this can be true only for a transactional table */
2317 maria_mark_in_repair(info);
2318 if (_ma_state_info_write(share,
2319 MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
2320 MA_STATE_INFO_WRITE_LOCK))
2321 return TRUE;
2322 }
2323 if (translog_status == TRANSLOG_OK &&
2324 _ma_update_state_lsns(share, translog_get_horizon(),
2325 share->state.create_trid, FALSE, FALSE))
2326 return TRUE;
2327 if (_ma_sync_table_files(info))
2328 return TRUE;
2329 }
2330 return FALSE;
2331 }
2332
2333
2334 /**
2335 @brief Initialize variables for repair
2336 */
2337
initialize_variables_for_repair(HA_CHECK * param,MARIA_SORT_INFO * sort_info,MARIA_SORT_PARAM * sort_param,MARIA_HA * info,my_bool rep_quick,MARIA_SHARE * org_share)2338 static int initialize_variables_for_repair(HA_CHECK *param,
2339 MARIA_SORT_INFO *sort_info,
2340 MARIA_SORT_PARAM *sort_param,
2341 MARIA_HA *info,
2342 my_bool rep_quick,
2343 MARIA_SHARE *org_share)
2344 {
2345 MARIA_SHARE *share= info->s;
2346
2347 /*
2348 We have to clear these variables first, as the cleanup-in-case-of-error
2349 handling may touch these.
2350 */
2351 bzero((char*) sort_info, sizeof(*sort_info));
2352 bzero((char*) sort_param, sizeof(*sort_param));
2353 bzero(&info->rec_cache, sizeof(info->rec_cache));
2354
2355 if (share->data_file_type == NO_RECORD)
2356 {
2357 _ma_check_print_error(param,
2358 "Can't repair tables with record type NO_DATA");
2359 return 1;
2360 }
2361
2362 /* Make a copy to allow us to restore state and check how state changed */
2363 memcpy(org_share, share, sizeof(*share));
2364
2365 /* Repair code relies on share->state.state so we have to update it here */
2366 if (share->lock.update_status)
2367 (*share->lock.update_status)(info);
2368
2369 param->testflag|= T_REP; /* for easy checking */
2370 if (share->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
2371 param->testflag|= T_CALC_CHECKSUM;
2372 param->glob_crc= 0;
2373 if (rep_quick)
2374 param->testflag|= T_QUICK;
2375 else
2376 param->testflag&= ~T_QUICK;
2377 param->org_key_map= share->state.key_map;
2378
2379 /*
2380 Clear check variables set by repair. This is needed to allow one to run
2381 several repair's in a row with same param
2382 */
2383 param->retry_repair= 0;
2384 param->warning_printed= 0;
2385 param->error_printed= 0;
2386 param->wrong_trd_printed= 0;
2387
2388 sort_param->sort_info= sort_info;
2389 sort_param->fix_datafile= ! rep_quick;
2390 sort_param->calc_checksum= MY_TEST(param->testflag & T_CALC_CHECKSUM);
2391 sort_info->info= sort_info->new_info= info;
2392 sort_info->param= param;
2393 set_data_file_type(sort_info, info->s);
2394 sort_info->org_data_file_type= share->data_file_type;
2395
2396 info->rec_cache.file= info->dfile.file;
2397 info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
2398
2399 if (protect_against_repair_crash(info, param,
2400 !MY_TEST(param->testflag &
2401 T_CREATE_MISSING_KEYS)))
2402 return 1;
2403
2404 /* calculate max_records */
2405 sort_info->filelength= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
2406 param->max_progress= sort_info->filelength;
2407 if ((param->testflag & T_CREATE_MISSING_KEYS) ||
2408 sort_info->org_data_file_type == COMPRESSED_RECORD)
2409 sort_info->max_records= share->state.state.records;
2410 else
2411 {
2412 ulong rec_length;
2413 rec_length= MY_MAX(share->base.min_pack_length,
2414 share->base.min_block_length);
2415 sort_info->max_records= (ha_rows) (sort_info->filelength / rec_length);
2416 }
2417
2418 /* Set up transaction handler so that we can see all rows */
2419 if (param->max_trid == 0)
2420 {
2421 if (!ma_control_file_inited())
2422 param->max_trid= 0; /* Give warning for first trid found */
2423 else
2424 param->max_trid= max_trid_in_system();
2425 }
2426 maria_ignore_trids(info);
2427 /* Don't write transid's during repair */
2428 maria_versioning(info, 0);
2429 /* remember original number of rows */
2430 *info->state= info->s->state.state;
2431 return 0;
2432 }
2433
2434
2435 /*
2436 During initialize_variables_for_repair and related functions we set some
2437 variables to values that makes sence during repair.
2438 This function restores these values to their original values so that we can
2439 use the handler in MariaDB without having to close and open the table.
2440 */
2441
restore_table_state_after_repair(MARIA_HA * info,MARIA_SHARE * org_share)2442 static void restore_table_state_after_repair(MARIA_HA *info,
2443 MARIA_SHARE *org_share)
2444 {
2445 maria_versioning(info, info->s->have_versioning);
2446 info->s->lock_key_trees= org_share->lock_key_trees;
2447 DBUG_ASSERT(!info->s->have_versioning || info->s->lock_key_trees);
2448 }
2449
2450
2451 /**
2452 @brief Drop all indexes
2453
2454 @param[in] param check parameters
2455 @param[in] info MARIA_HA handle
2456 @param[in] force if to force drop all indexes
2457
2458 @return status
2459 @retval 0 OK
2460 @retval != 0 Error
2461
2462 @note
2463 Once allocated, index blocks remain part of the key file forever.
2464 When indexes are disabled, no block is freed. When enabling indexes,
2465 no block is freed either. The new indexes are create from new
2466 blocks. (Bug #4692)
2467
2468 Before recreating formerly disabled indexes, the unused blocks
2469 must be freed. There are two options to do this:
2470 - Follow the tree of disabled indexes, add all blocks to the
2471 deleted blocks chain. Would require a lot of random I/O.
2472 - Drop all blocks by clearing all index root pointers and all
2473 delete chain pointers and resetting key_file_length to the end
2474 of the index file header. This requires to recreate all indexes,
2475 even those that may still be intact.
2476 The second method is probably faster in most cases.
2477
2478 When disabling indexes, MySQL disables either all indexes or all
2479 non-unique indexes. When MySQL [re-]enables disabled indexes
2480 (T_CREATE_MISSING_KEYS), then we either have "lost" blocks in the
2481 index file, or there are no non-unique indexes. In the latter case,
2482 maria_repair*() would not be called as there would be no disabled
2483 indexes.
2484
2485 If there would be more unique indexes than disabled (non-unique)
2486 indexes, we could do the first method. But this is not implemented
2487 yet. By now we drop and recreate all indexes when repair is called.
2488
2489 However, there is an exception. Sometimes MySQL disables non-unique
2490 indexes when the table is empty (e.g. when copying a table in
2491 mysql_alter_table()). When enabling the non-unique indexes, they
2492 are still empty. So there is no index block that can be lost. This
2493 optimization is implemented in this function.
2494
2495 Note that in normal repair (T_CREATE_MISSING_KEYS not set) we
2496 recreate all enabled indexes unconditonally. We do not change the
2497 key_map. Otherwise we invert the key map temporarily (outside of
2498 this function) and recreate the then "seemingly" enabled indexes.
2499 When we cannot use the optimization, and drop all indexes, we
2500 pretend that all indexes were disabled. By the inversion, we will
2501 then recrate all indexes.
2502 */
2503
maria_drop_all_indexes(HA_CHECK * param,MARIA_HA * info,my_bool force)2504 static int maria_drop_all_indexes(HA_CHECK *param, MARIA_HA *info,
2505 my_bool force)
2506 {
2507 MARIA_SHARE *share= info->s;
2508 MARIA_STATE_INFO *state= &share->state;
2509 uint i;
2510 DBUG_ENTER("maria_drop_all_indexes");
2511
2512 /*
2513 If any of the disabled indexes has a key block assigned, we must
2514 drop and recreate all indexes to avoid losing index blocks.
2515
2516 If we want to recreate disabled indexes only _and_ all of these
2517 indexes are empty, we don't need to recreate the existing indexes.
2518 */
2519 if (!force && (param->testflag & T_CREATE_MISSING_KEYS))
2520 {
2521 DBUG_PRINT("repair", ("creating missing indexes"));
2522 for (i= 0; i < share->base.keys; i++)
2523 {
2524 DBUG_PRINT("repair", ("index #: %u key_root:%lld active: %d",
2525 i, state->key_root[i],
2526 maria_is_key_active(state->key_map, i)));
2527 if ((state->key_root[i] != HA_OFFSET_ERROR) &&
2528 !maria_is_key_active(state->key_map, i))
2529 {
2530 /*
2531 This index has at least one key block and it is disabled.
2532 We would lose its block(s) if would just recreate it.
2533 So we need to drop and recreate all indexes.
2534 */
2535 DBUG_PRINT("repair", ("nonempty and disabled: recreate all"));
2536 break;
2537 }
2538 }
2539 if (i >= share->base.keys)
2540 goto end;
2541
2542 /*
2543 We do now drop all indexes and declare them disabled. With the
2544 T_CREATE_MISSING_KEYS flag, maria_repair*() will recreate all
2545 disabled indexes and enable them.
2546 */
2547 maria_clear_all_keys_active(state->key_map);
2548 DBUG_PRINT("repair", ("declared all indexes disabled"));
2549 }
2550
2551 /* Clear index root block pointers. */
2552 for (i= 0; i < share->base.keys; i++)
2553 state->key_root[i]= HA_OFFSET_ERROR;
2554
2555 /* Drop the delete chain. */
2556 share->state.key_del= HA_OFFSET_ERROR;
2557
2558 /* Reset index file length to end of index file header. */
2559 share->state.state.key_file_length= share->base.keystart;
2560
2561 end:
2562 DBUG_RETURN(0);
2563 }
2564
2565
2566 /*
2567 Recover old table by reading each record and writing all keys
2568
2569 NOTES
2570 Save new datafile-name in temp_filename.
2571 We overwrite the index file as we go (writekeys() for example), so if we
2572 crash during this the table is unusable and user (or Recovery in the
2573 future) must repeat the REPAIR/OPTIMIZE operation. We could use a
2574 temporary index file in the future (drawback: more disk space).
2575
2576 IMPLEMENTATION (for hard repair with block format)
2577 - Create new, unrelated MARIA_HA of the table
2578 - Create new datafile and associate it with new handler
2579 - Reset all statistic information in new handler
2580 - Copy all data to new handler with normal write operations
2581 - Move state of new handler to old handler
2582 - Close new handler
2583 - Close data file in old handler
2584 - Rename old data file to new data file.
2585 - Reopen data file in old handler
2586 */
2587
maria_repair(HA_CHECK * param,register MARIA_HA * info,char * name,my_bool rep_quick)2588 int maria_repair(HA_CHECK *param, register MARIA_HA *info,
2589 char *name, my_bool rep_quick)
2590 {
2591 int error, got_error;
2592 ha_rows start_records,new_header_length;
2593 my_off_t del;
2594 File new_file;
2595 MARIA_SHARE *share= info->s;
2596 char llbuff[22],llbuff2[22];
2597 MARIA_SORT_INFO sort_info;
2598 MARIA_SORT_PARAM sort_param;
2599 my_bool block_record, scan_inited= 0, reenable_logging= 0;
2600 enum data_file_type org_data_file_type= share->data_file_type;
2601 myf sync_dir= ((share->now_transactional && !share->temporary) ?
2602 MY_SYNC_DIR : 0);
2603 MARIA_SHARE backup_share;
2604 DBUG_ENTER("maria_repair");
2605
2606 got_error= 1;
2607 new_file= -1;
2608 start_records= share->state.state.records;
2609 if (!(param->testflag & T_SILENT))
2610 {
2611 printf("- recovering (with keycache) Aria-table '%s'\n",name);
2612 printf("Data records: %s\n", llstr(start_records, llbuff));
2613 }
2614
2615 if (initialize_variables_for_repair(param, &sort_info, &sort_param, info,
2616 rep_quick, &backup_share))
2617 goto err;
2618
2619 if ((reenable_logging= share->now_transactional))
2620 _ma_tmp_disable_logging_for_table(info, 0);
2621
2622 sort_param.current_filepos= sort_param.filepos= new_header_length=
2623 ((param->testflag & T_UNPACK) ? 0L : share->pack.header_length);
2624
2625 if (!rep_quick)
2626 {
2627 /* Get real path for data file */
2628 if ((new_file= mysql_file_create(key_file_tmp,
2629 fn_format(param->temp_filename,
2630 share->data_file_name.str, "",
2631 DATA_TMP_EXT, 2+4),
2632 0,param->tmpfile_createflag,
2633 MYF(0))) < 0)
2634 {
2635 _ma_check_print_error(param,"Can't create new tempfile: '%s'",
2636 param->temp_filename);
2637 goto err;
2638 }
2639 if (new_header_length &&
2640 maria_filecopy(param, new_file, info->dfile.file, 0L,
2641 new_header_length, "datafile-header"))
2642 goto err;
2643 share->state.dellink= HA_OFFSET_ERROR;
2644 info->rec_cache.file= new_file; /* For sort_delete_record */
2645 if (share->data_file_type == BLOCK_RECORD ||
2646 (param->testflag & T_UNPACK))
2647 {
2648 if (create_new_data_handle(&sort_param, new_file))
2649 goto err;
2650 sort_info.new_info->rec_cache.file= new_file;
2651 }
2652 }
2653
2654 block_record= sort_info.new_info->s->data_file_type == BLOCK_RECORD;
2655
2656 if (org_data_file_type != BLOCK_RECORD)
2657 {
2658 /* We need a read buffer to read rows in big blocks */
2659 if (init_io_cache(¶m->read_cache, info->dfile.file,
2660 (uint) param->read_buffer_length,
2661 READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)))
2662 goto err;
2663 }
2664 if (sort_info.new_info->s->data_file_type != BLOCK_RECORD)
2665 {
2666 /* When writing to not block records, we need a write buffer */
2667 if (!rep_quick)
2668 {
2669 if (init_io_cache(&sort_info.new_info->rec_cache, new_file,
2670 (uint) param->write_buffer_length,
2671 WRITE_CACHE, new_header_length, 1,
2672 MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
2673 goto err;
2674 sort_info.new_info->opt_flag|=WRITE_CACHE_USED;
2675 }
2676 }
2677 else if (block_record)
2678 {
2679 scan_inited= 1;
2680 if (maria_scan_init(sort_info.info))
2681 goto err;
2682 }
2683
2684 if (!(sort_param.record=
2685 (uchar *) my_malloc((uint)
2686 share->base.default_rec_buff_size, MYF(0))) ||
2687 _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size,
2688 share->base.default_rec_buff_size))
2689 {
2690 _ma_check_print_error(param, "Not enough memory for extra record");
2691 goto err;
2692 }
2693
2694 sort_param.read_cache=param->read_cache;
2695 sort_param.pos=sort_param.max_pos=share->pack.header_length;
2696 param->read_cache.end_of_file= sort_info.filelength;
2697 sort_param.master=1;
2698 sort_info.max_records= ~(ha_rows) 0;
2699
2700 del= share->state.state.del;
2701 share->state.state.records= share->state.state.del= share->state.split= 0;
2702 share->state.state.empty= 0;
2703
2704 if (param->testflag & T_CREATE_MISSING_KEYS)
2705 maria_set_all_keys_active(share->state.key_map, share->base.keys);
2706 maria_drop_all_indexes(param, info, TRUE);
2707
2708 maria_lock_memory(param); /* Everything is alloced */
2709
2710 sort_param.sort_info->info->in_check_table= 1;
2711 /* Re-create all keys, which are set in key_map. */
2712 while (!(error=sort_get_next_record(&sort_param)))
2713 {
2714 if (block_record && _ma_sort_write_record(&sort_param))
2715 goto err;
2716
2717 if (writekeys(&sort_param))
2718 {
2719 if (my_errno != HA_ERR_FOUND_DUPP_KEY)
2720 goto err;
2721 DBUG_DUMP("record", sort_param.record,
2722 share->base.default_rec_buff_size);
2723 _ma_check_print_warning(param,
2724 "Duplicate key %2d for record at %10s against "
2725 "new record at %10s",
2726 info->errkey+1,
2727 llstr(sort_param.current_filepos, llbuff),
2728 llstr(info->dup_key_pos,llbuff2));
2729 if (param->testflag & T_VERBOSE)
2730 {
2731 MARIA_KEY tmp_key;
2732 MARIA_KEYDEF *keyinfo= share->keyinfo + info->errkey;
2733 (*keyinfo->make_key)(info, &tmp_key, (uint) info->errkey,
2734 info->lastkey_buff,
2735 sort_param.record, 0L, 0);
2736 _ma_print_key(stdout, &tmp_key);
2737 }
2738 sort_info.dupp++;
2739 if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
2740 {
2741 param->testflag|=T_RETRY_WITHOUT_QUICK;
2742 param->error_printed=1;
2743 goto err;
2744 }
2745 /* purecov: begin tested */
2746 if (block_record)
2747 {
2748 sort_info.new_info->s->state.state.records--;
2749 if ((*sort_info.new_info->s->write_record_abort)(sort_info.new_info))
2750 {
2751 _ma_check_print_error(param,"Couldn't delete duplicate row");
2752 goto err;
2753 }
2754 }
2755 /* purecov: end */
2756 continue;
2757 }
2758 if (!block_record)
2759 {
2760 if (_ma_sort_write_record(&sort_param))
2761 goto err;
2762 /* Filepos is pointer to where next row will be stored */
2763 sort_param.current_filepos= sort_param.filepos;
2764 }
2765 }
2766 if (error > 0 || maria_write_data_suffix(&sort_info, !rep_quick) ||
2767 flush_io_cache(&sort_info.new_info->rec_cache) ||
2768 param->read_cache.error < 0)
2769 goto err;
2770
2771 if (param->testflag & T_WRITE_LOOP)
2772 {
2773 fputs(" \r",stdout); fflush(stdout);
2774 }
2775 if (mysql_file_chsize(share->kfile.file,
2776 share->state.state.key_file_length, 0, MYF(0)))
2777 {
2778 _ma_check_print_warning(param,
2779 "Can't change size of indexfile, error: %d",
2780 my_errno);
2781 goto err;
2782 }
2783
2784 if (rep_quick && del+sort_info.dupp != share->state.state.del)
2785 {
2786 _ma_check_print_error(param,"Couldn't fix table with quick recovery: "
2787 "Found wrong number of deleted records");
2788 _ma_check_print_error(param,"Run recovery again without -q");
2789 param->retry_repair=1;
2790 param->testflag|=T_RETRY_WITHOUT_QUICK;
2791 goto err;
2792 }
2793
2794 if (param->testflag & T_SAFE_REPAIR)
2795 {
2796 /* Don't repair if we loosed more than one row */
2797 if (sort_info.new_info->s->state.state.records+1 < start_records)
2798 {
2799 share->state.state.records= start_records;
2800 goto err;
2801 }
2802 }
2803
2804 end_io_cache(&sort_info.new_info->rec_cache);
2805 info->opt_flag&= ~WRITE_CACHE_USED;
2806
2807 /*
2808 As we have read the data file (sort_get_next_record()) we may have
2809 cached, non-changed blocks of it in the page cache. We must throw them
2810 away as we are going to close their descriptor ('new_file'). We also want
2811 to flush any index block, so that it is ready for the upcoming sync.
2812 */
2813 if (_ma_flush_table_files_before_swap(param, info))
2814 goto err;
2815
2816 if (!rep_quick)
2817 {
2818 sort_info.new_info->s->state.state.data_file_length= sort_param.filepos;
2819 if (sort_info.new_info != sort_info.info)
2820 {
2821 MARIA_STATE_INFO save_state= sort_info.new_info->s->state;
2822 if (maria_close(sort_info.new_info))
2823 {
2824 _ma_check_print_error(param, "Got error %d on close", my_errno);
2825 goto err;
2826 }
2827 copy_data_file_state(&share->state, &save_state);
2828 new_file= -1;
2829 sort_info.new_info= info;
2830 }
2831 share->state.version=(ulong) time((time_t*) 0); /* Force reopen */
2832
2833 /* Replace the actual file with the temporary file */
2834 if (new_file >= 0)
2835 mysql_file_close(new_file, MYF(MY_WME));
2836 new_file= -1;
2837 change_data_file_descriptor(info, -1);
2838 if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
2839 DATA_TMP_EXT, param->backup_time,
2840 (param->testflag & T_BACKUP_DATA ?
2841 MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
2842 sync_dir) ||
2843 _ma_open_datafile(info, share))
2844 {
2845 goto err;
2846 }
2847 }
2848 else
2849 {
2850 share->state.state.data_file_length= sort_param.max_pos;
2851 }
2852 if (param->testflag & T_CALC_CHECKSUM)
2853 share->state.state.checksum= param->glob_crc;
2854
2855 if (!(param->testflag & T_SILENT))
2856 {
2857 if (start_records != share->state.state.records)
2858 printf("Data records: %s\n", llstr(share->state.state.records,llbuff));
2859 }
2860 if (sort_info.dupp)
2861 _ma_check_print_warning(param,
2862 "%s records have been removed",
2863 llstr(sort_info.dupp,llbuff));
2864
2865 got_error= 0;
2866 /* If invoked by external program that uses thr_lock */
2867 if (&share->state.state != info->state)
2868 *info->state= *info->state_start= share->state.state;
2869
2870 err:
2871 if (scan_inited)
2872 maria_scan_end(sort_info.info);
2873 _ma_reset_state(info);
2874
2875 end_io_cache(¶m->read_cache);
2876 if (sort_info.new_info)
2877 {
2878 end_io_cache(&sort_info.new_info->rec_cache);
2879 sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
2880 }
2881 info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
2882
2883 sort_param.sort_info->info->in_check_table= 0;
2884 /* this below could fail, shouldn't we detect error? */
2885 if (got_error)
2886 {
2887 if (! param->error_printed)
2888 _ma_check_print_error(param,"%d for record at pos %s",my_errno,
2889 llstr(sort_param.start_recpos,llbuff));
2890 (void)_ma_flush_table_files_before_swap(param, info);
2891 if (sort_info.new_info && sort_info.new_info != sort_info.info)
2892 {
2893 unuse_data_file_descriptor(sort_info.new_info);
2894 maria_close(sort_info.new_info);
2895 }
2896 if (new_file >= 0)
2897 {
2898 mysql_file_close(new_file,MYF(0));
2899 mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
2900 }
2901 maria_mark_crashed_on_repair(info);
2902 }
2903 /* If caller had disabled logging it's not up to us to re-enable it */
2904 if (reenable_logging)
2905 _ma_reenable_logging_for_table(info, FALSE);
2906 restore_table_state_after_repair(info, &backup_share);
2907
2908 my_free(sort_param.rec_buff);
2909 my_free(sort_param.record);
2910 my_free(sort_info.buff);
2911 if (!got_error && (param->testflag & T_UNPACK))
2912 restore_data_file_type(share);
2913 share->state.changed|= (STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES |
2914 STATE_NOT_ANALYZED | STATE_NOT_ZEROFILLED);
2915 if (!rep_quick)
2916 share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_MOVABLE);
2917 DBUG_RETURN(got_error);
2918 }
2919
2920
2921 /* Uppdate keyfile when doing repair */
2922
writekeys(MARIA_SORT_PARAM * sort_param)2923 static int writekeys(MARIA_SORT_PARAM *sort_param)
2924 {
2925 uint i;
2926 MARIA_HA *info= sort_param->sort_info->info;
2927 MARIA_SHARE *share= info->s;
2928 uchar *record= sort_param->record;
2929 uchar *key_buff;
2930 my_off_t filepos= sort_param->current_filepos;
2931 MARIA_KEY key;
2932 DBUG_ENTER("writekeys");
2933
2934 key_buff= info->lastkey_buff+share->base.max_key_length;
2935
2936 for (i=0 ; i < share->base.keys ; i++)
2937 {
2938 if (maria_is_key_active(share->state.key_map, i))
2939 {
2940 if (share->keyinfo[i].flag & HA_FULLTEXT )
2941 {
2942 if (_ma_ft_add(info, i, key_buff, record, filepos))
2943 goto err;
2944 }
2945 else
2946 {
2947 if (!(*share->keyinfo[i].make_key)(info, &key, i, key_buff, record,
2948 filepos, 0))
2949 goto err;
2950 if ((*share->keyinfo[i].ck_insert)(info, &key))
2951 goto err;
2952 }
2953 }
2954 }
2955 DBUG_RETURN(0);
2956
2957 err:
2958 if (my_errno == HA_ERR_FOUND_DUPP_KEY)
2959 {
2960 info->errkey=(int) i; /* This key was found */
2961 while ( i-- > 0 )
2962 {
2963 if (maria_is_key_active(share->state.key_map, i))
2964 {
2965 if (share->keyinfo[i].flag & HA_FULLTEXT)
2966 {
2967 if (_ma_ft_del(info,i,key_buff,record,filepos))
2968 break;
2969 }
2970 else
2971 {
2972 (*share->keyinfo[i].make_key)(info, &key, i, key_buff, record,
2973 filepos, 0);
2974 if (_ma_ck_delete(info, &key))
2975 break;
2976 }
2977 }
2978 }
2979 }
2980 /* Remove checksum that was added to glob_crc in sort_get_next_record */
2981 if (sort_param->calc_checksum)
2982 sort_param->sort_info->param->glob_crc-= info->cur_row.checksum;
2983 DBUG_PRINT("error",("errno: %d",my_errno));
2984 DBUG_RETURN(-1);
2985 } /* writekeys */
2986
2987
2988 /* Change all key-pointers that points to a records */
2989
maria_movepoint(register MARIA_HA * info,uchar * record,MARIA_RECORD_POS oldpos,MARIA_RECORD_POS newpos,uint prot_key)2990 int maria_movepoint(register MARIA_HA *info, uchar *record,
2991 MARIA_RECORD_POS oldpos, MARIA_RECORD_POS newpos,
2992 uint prot_key)
2993 {
2994 uint i;
2995 uchar *key_buff;
2996 MARIA_SHARE *share= info->s;
2997 MARIA_PAGE page;
2998 DBUG_ENTER("maria_movepoint");
2999
3000 key_buff= info->lastkey_buff + share->base.max_key_length;
3001 for (i=0 ; i < share->base.keys; i++)
3002 {
3003 if (i != prot_key && maria_is_key_active(share->state.key_map, i))
3004 {
3005 MARIA_KEY key;
3006 (*share->keyinfo[i].make_key)(info, &key, i, key_buff, record, oldpos,
3007 0);
3008 if (key.keyinfo->flag & HA_NOSAME)
3009 { /* Change pointer direct */
3010 MARIA_KEYDEF *keyinfo;
3011 keyinfo=share->keyinfo+i;
3012 if (_ma_search(info, &key, (uint32) (SEARCH_SAME | SEARCH_SAVE_BUFF),
3013 share->state.key_root[i]))
3014 DBUG_RETURN(-1);
3015 _ma_page_setup(&page, info, keyinfo, info->last_keypage,
3016 info->keyread_buff);
3017
3018 _ma_dpointer(share, info->int_keypos - page.node -
3019 share->rec_reflength,newpos);
3020
3021 if (_ma_write_keypage(&page, PAGECACHE_LOCK_LEFT_UNLOCKED,
3022 DFLT_INIT_HITS))
3023 DBUG_RETURN(-1);
3024 }
3025 else
3026 { /* Change old key to new */
3027 if (_ma_ck_delete(info, &key))
3028 DBUG_RETURN(-1);
3029 (*share->keyinfo[i].make_key)(info, &key, i, key_buff, record, newpos,
3030 0);
3031 if (_ma_ck_write(info, &key))
3032 DBUG_RETURN(-1);
3033 }
3034 }
3035 }
3036 DBUG_RETURN(0);
3037 } /* maria_movepoint */
3038
3039
3040 /* Tell system that we want all memory for our cache */
3041
maria_lock_memory(HA_CHECK * param)3042 void maria_lock_memory(HA_CHECK *param __attribute__((unused)))
3043 {
3044 #ifdef SUN_OS /* Key-cacheing thrases on sun 4.1 */
3045 if (param->opt_maria_lock_memory)
3046 {
3047 int success = mlockall(MCL_CURRENT); /* or plock(DATLOCK); */
3048 if (geteuid() == 0 && success != 0)
3049 _ma_check_print_warning(param,
3050 "Failed to lock memory. errno %d",my_errno);
3051 }
3052 #endif
3053 } /* maria_lock_memory */
3054
3055
3056 /**
3057 Flush all changed blocks to disk.
3058
3059 We release blocks as it's unlikely that they would all be needed soon.
3060 This function needs to be called before swapping data or index files or
3061 syncing them.
3062
3063 @param param description of the repair operation
3064 @param info table
3065 */
3066
_ma_flush_table_files_before_swap(HA_CHECK * param,MARIA_HA * info)3067 static my_bool _ma_flush_table_files_before_swap(HA_CHECK *param,
3068 MARIA_HA *info)
3069 {
3070 DBUG_ENTER("_ma_flush_table_files_before_swap");
3071 if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
3072 FLUSH_RELEASE, FLUSH_RELEASE))
3073 {
3074 _ma_check_print_error(param, "%d when trying to write buffers", my_errno);
3075 DBUG_RETURN(TRUE);
3076 }
3077 DBUG_RETURN(FALSE);
3078 }
3079
3080
3081 /* Sort index for more efficent reads */
3082
maria_sort_index(HA_CHECK * param,register MARIA_HA * info,char * name)3083 int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, char *name)
3084 {
3085 reg2 uint key;
3086 reg1 MARIA_KEYDEF *keyinfo;
3087 File new_file;
3088 my_off_t index_pos[HA_MAX_POSSIBLE_KEY];
3089 uint r_locks,w_locks;
3090 int old_lock;
3091 MARIA_SHARE *share= info->s;
3092 MARIA_STATE_INFO old_state;
3093 myf sync_dir= ((share->now_transactional && !share->temporary) ?
3094 MY_SYNC_DIR : 0);
3095 DBUG_ENTER("maria_sort_index");
3096
3097 /* cannot sort index files with R-tree indexes */
3098 for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
3099 key++,keyinfo++)
3100 if (keyinfo->key_alg == HA_KEY_ALG_RTREE)
3101 DBUG_RETURN(0);
3102
3103 if (!(param->testflag & T_SILENT))
3104 printf("- Sorting index for Aria-table '%s'\n",name);
3105
3106 if (protect_against_repair_crash(info, param, FALSE))
3107 DBUG_RETURN(1);
3108
3109 /* Get real path for index file */
3110 fn_format(param->temp_filename,name,"", MARIA_NAME_IEXT,2+4+32);
3111 if ((new_file=mysql_file_create(key_file_kfile, fn_format(param->temp_filename,param->temp_filename,
3112 "", INDEX_TMP_EXT,2+4),
3113 0, param->tmpfile_createflag, MYF(0))) < 0)
3114 {
3115 _ma_check_print_error(param,"Can't create new tempfile: '%s'",
3116 param->temp_filename);
3117 DBUG_RETURN(-1);
3118 }
3119 if (maria_filecopy(param, new_file, share->kfile.file, 0L,
3120 (ulong) share->base.keystart, "headerblock"))
3121 goto err;
3122
3123 param->new_file_pos=share->base.keystart;
3124 for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
3125 key++,keyinfo++)
3126 {
3127 if (maria_is_key_active(share->state.key_map, key) &&
3128 share->state.key_root[key] != HA_OFFSET_ERROR)
3129 {
3130 index_pos[key]=param->new_file_pos; /* Write first block here */
3131 if (sort_one_index(param,info,keyinfo,share->state.key_root[key],
3132 new_file))
3133 goto err;
3134 }
3135 else
3136 index_pos[key]= HA_OFFSET_ERROR; /* No blocks */
3137 }
3138
3139 /* Flush key cache for this file if we are calling this outside maria_chk */
3140 flush_pagecache_blocks(share->pagecache, &share->kfile,
3141 FLUSH_IGNORE_CHANGED);
3142
3143 share->state.version=(ulong) time((time_t*) 0);
3144 old_state= share->state; /* save state if not stored */
3145 r_locks= share->r_locks;
3146 w_locks= share->w_locks;
3147 old_lock= info->lock_type;
3148
3149 /* Put same locks as old file */
3150 share->r_locks= share->w_locks= share->tot_locks= 0;
3151 (void) _ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE);
3152 mysql_mutex_lock(&share->intern_lock);
3153 mysql_file_close(share->kfile.file, MYF(MY_WME));
3154 share->kfile.file = -1;
3155 mysql_mutex_unlock(&share->intern_lock);
3156 mysql_file_close(new_file, MYF(MY_WME));
3157 if (maria_change_to_newfile(share->index_file_name.str, MARIA_NAME_IEXT,
3158 INDEX_TMP_EXT, 0, sync_dir) ||
3159 _ma_open_keyfile(share))
3160 goto err2;
3161 info->lock_type= F_UNLCK; /* Force maria_readinfo to lock */
3162 _ma_readinfo(info,F_WRLCK,0); /* Will lock the table */
3163 info->lock_type= old_lock;
3164 share->r_locks= r_locks;
3165 share->w_locks= w_locks;
3166 share->tot_locks= r_locks+w_locks;
3167 share->state= old_state; /* Restore old state */
3168
3169 share->state.state.key_file_length=param->new_file_pos;
3170 info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
3171 for (key=0 ; key < share->base.keys ; key++)
3172 share->state.key_root[key]=index_pos[key];
3173 share->state.key_del= HA_OFFSET_ERROR;
3174
3175 share->state.changed&= ~STATE_NOT_SORTED_PAGES;
3176 DBUG_EXECUTE_IF("maria_flush_whole_log",
3177 {
3178 DBUG_PRINT("maria_flush_whole_log", ("now"));
3179 translog_flush(translog_get_horizon());
3180 });
3181 DBUG_EXECUTE_IF("maria_crash_sort_index",
3182 {
3183 DBUG_PRINT("maria_crash_sort_index", ("now"));
3184 DBUG_SUICIDE();
3185 });
3186 DBUG_RETURN(0);
3187
3188 err:
3189 mysql_file_close(new_file, MYF(MY_WME));
3190 err2:
3191 mysql_file_delete(key_file_tmp, param->temp_filename,MYF(MY_WME));
3192 DBUG_RETURN(-1);
3193 } /* maria_sort_index */
3194
3195
3196 /**
3197 @brief write a page directly to index file
3198
3199 */
3200
write_page(MARIA_SHARE * share,File file,uchar * buff,uint block_size,my_off_t pos,int myf_rw)3201 static int write_page(MARIA_SHARE *share, File file,
3202 uchar *buff, uint block_size,
3203 my_off_t pos, int myf_rw)
3204 {
3205 int res;
3206 PAGECACHE_IO_HOOK_ARGS args;
3207 args.page= buff;
3208 args.pageno= (pgcache_page_no_t) (pos / share->block_size);
3209 args.data= (uchar*) share;
3210 args.crypt_buf= NULL;
3211 (* share->kfile.pre_write_hook)(&args);
3212 res= (int)my_pwrite(file, args.page, block_size, pos, myf_rw);
3213 (* share->kfile.post_write_hook)(res, &args);
3214 return res;
3215 }
3216
3217
3218 /* Sort index blocks recursive using one index */
3219
sort_one_index(HA_CHECK * param,MARIA_HA * info,MARIA_KEYDEF * keyinfo,my_off_t pagepos,File new_file)3220 static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
3221 MARIA_KEYDEF *keyinfo,
3222 my_off_t pagepos, File new_file)
3223 {
3224 uint length,nod_flag;
3225 uchar *buff,*keypos,*endpos;
3226 my_off_t new_page_pos,next_page;
3227 MARIA_SHARE *share= info->s;
3228 MARIA_KEY key;
3229 MARIA_PAGE page;
3230 DBUG_ENTER("sort_one_index");
3231
3232 /* cannot walk over R-tree indices */
3233 DBUG_ASSERT(keyinfo->key_alg != HA_KEY_ALG_RTREE);
3234 new_page_pos=param->new_file_pos;
3235 param->new_file_pos+=keyinfo->block_length;
3236 key.keyinfo= keyinfo;
3237
3238 if (!(buff= (uchar*) my_alloca((uint) keyinfo->block_length +
3239 keyinfo->maxlength +
3240 MARIA_INDEX_OVERHEAD_SIZE)))
3241 {
3242 _ma_check_print_error(param,"Not enough memory for key block");
3243 DBUG_RETURN(-1);
3244 }
3245 key.data= buff + keyinfo->block_length;
3246
3247 if (_ma_fetch_keypage(&page, info, keyinfo, pagepos,
3248 PAGECACHE_LOCK_LEFT_UNLOCKED,
3249 DFLT_INIT_HITS, buff, 0))
3250 {
3251 report_keypage_fault(param, info, pagepos);
3252 goto err;
3253 }
3254
3255 if ((nod_flag= page.node) || keyinfo->flag & HA_FULLTEXT)
3256 {
3257 keypos= page.buff + share->keypage_header + nod_flag;
3258 endpos= page.buff + page.size;
3259
3260 for ( ;; )
3261 {
3262 if (nod_flag)
3263 {
3264 next_page= _ma_kpos(nod_flag,keypos);
3265 /* Save new pos */
3266 _ma_kpointer(info,keypos-nod_flag,param->new_file_pos);
3267 if (sort_one_index(param,info,keyinfo,next_page,new_file))
3268 {
3269 DBUG_PRINT("error",
3270 ("From page: %ld, keyoffset: %lu used_length: %d",
3271 (ulong) pagepos, (ulong) (keypos - buff),
3272 (int) page.size));
3273 DBUG_DUMP("buff", page.buff, page.size);
3274 goto err;
3275 }
3276 }
3277 if (keypos >= endpos ||
3278 !(*keyinfo->get_key)(&key, page.flag, nod_flag, &keypos))
3279 break;
3280 DBUG_ASSERT(keypos <= endpos);
3281 if (keyinfo->flag & HA_FULLTEXT)
3282 {
3283 uint off;
3284 int subkeys;
3285 get_key_full_length_rdonly(off, key.data);
3286 subkeys= ft_sintXkorr(key.data + off);
3287 if (subkeys < 0)
3288 {
3289 next_page= _ma_row_pos_from_key(&key);
3290 _ma_dpointer(share, keypos - nod_flag - share->rec_reflength,
3291 param->new_file_pos); /* Save new pos */
3292 if (sort_one_index(param,info,&share->ft2_keyinfo,
3293 next_page,new_file))
3294 goto err;
3295 }
3296 }
3297 }
3298 }
3299
3300 /* Fill block with zero and write it to the new index file */
3301 length= page.size;
3302 bzero(buff+length,keyinfo->block_length-length);
3303 if (write_page(share, new_file, buff, keyinfo->block_length,
3304 new_page_pos, MYF(MY_NABP | MY_WAIT_IF_FULL)))
3305 {
3306 _ma_check_print_error(param,"Can't write indexblock, error: %d",my_errno);
3307 goto err;
3308 }
3309 my_afree(buff);
3310 DBUG_RETURN(0);
3311 err:
3312 my_afree(buff);
3313 DBUG_RETURN(1);
3314 } /* sort_one_index */
3315
3316
3317 /**
3318 @brief Fill empty space in index file with zeroes
3319
3320 @return
3321 @retval 0 Ok
3322 @retval 1 Error
3323 */
3324
maria_zerofill_index(HA_CHECK * param,MARIA_HA * info,const char * name)3325 static my_bool maria_zerofill_index(HA_CHECK *param, MARIA_HA *info,
3326 const char *name)
3327 {
3328 MARIA_SHARE *share= info->s;
3329 MARIA_PINNED_PAGE page_link;
3330 char llbuff[21];
3331 uchar *buff;
3332 pgcache_page_no_t page;
3333 my_off_t pos;
3334 my_off_t key_file_length= share->state.state.key_file_length;
3335 uint block_size= share->block_size;
3336 my_bool zero_lsn= (share->base.born_transactional &&
3337 !(param->testflag & T_ZEROFILL_KEEP_LSN));
3338 int error= 1;
3339 DBUG_ENTER("maria_zerofill_index");
3340
3341 if (!(param->testflag & T_SILENT))
3342 printf("- Zerofilling index for Aria-table '%s'\n",name);
3343
3344 /* Go through the index file */
3345 for (pos= share->base.keystart, page= (ulonglong) (pos / block_size);
3346 pos < key_file_length;
3347 pos+= block_size, page++)
3348 {
3349 uint length;
3350 if (!(buff= pagecache_read(share->pagecache,
3351 &share->kfile, page,
3352 DFLT_INIT_HITS, 0,
3353 PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
3354 &page_link.link)))
3355 {
3356 pagecache_unlock_by_link(share->pagecache, page_link.link,
3357 PAGECACHE_LOCK_WRITE_UNLOCK,
3358 PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3359 LSN_IMPOSSIBLE, 0, FALSE);
3360 _ma_check_print_error(param,
3361 "Page %9s: Got error %d when reading index file",
3362 llstr(pos, llbuff), my_errno);
3363 goto end;
3364 }
3365 if (zero_lsn)
3366 bzero(buff, LSN_SIZE);
3367
3368 if (share->base.born_transactional)
3369 {
3370 uint keynr= _ma_get_keynr(share, buff);
3371 if (keynr < share->base.keys)
3372 {
3373 MARIA_PAGE page;
3374 DBUG_ASSERT(keynr < share->base.keys);
3375
3376 _ma_page_setup(&page, info, share->keyinfo + keynr, pos, buff);
3377 if (_ma_compact_keypage(&page, ~(TrID) 0))
3378 {
3379 _ma_check_print_error(param,
3380 "Page %9s: Got error %d when reading index "
3381 "file",
3382 llstr(pos, llbuff), my_errno);
3383 goto end;
3384 }
3385 }
3386 }
3387
3388 length= _ma_get_page_used(share, buff);
3389 DBUG_ASSERT(length <= block_size);
3390 if (length < block_size)
3391 bzero(buff + length, block_size - length);
3392 pagecache_unlock_by_link(share->pagecache, page_link.link,
3393 PAGECACHE_LOCK_WRITE_UNLOCK,
3394 PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3395 LSN_IMPOSSIBLE, 1, FALSE);
3396 }
3397 error= 0; /* ok */
3398
3399 end:
3400 if (flush_pagecache_blocks(share->pagecache, &share->kfile,
3401 FLUSH_FORCE_WRITE))
3402 DBUG_RETURN(1);
3403 DBUG_RETURN(error);
3404 }
3405
3406
3407 /**
3408 @brief Fill empty space in data file with zeroes
3409
3410 @todo
3411 Zerofill all pages marked in bitmap as empty and change them to
3412 be of type UNALLOCATED_PAGE
3413
3414 @return
3415 @retval 0 Ok
3416 @retval 1 Error
3417 */
3418
maria_zerofill_data(HA_CHECK * param,MARIA_HA * info,const char * name)3419 static my_bool maria_zerofill_data(HA_CHECK *param, MARIA_HA *info,
3420 const char *name)
3421 {
3422 MARIA_SHARE *share= info->s;
3423 MARIA_PINNED_PAGE page_link;
3424 char llbuff[21];
3425 my_off_t pos;
3426 pgcache_page_no_t page;
3427 uint block_size= share->block_size;
3428 MARIA_FILE_BITMAP *bitmap= &share->bitmap;
3429 my_bool zero_lsn= !(param->testflag & T_ZEROFILL_KEEP_LSN), error;
3430 DBUG_ENTER("maria_zerofill_data");
3431
3432 /* This works only with BLOCK_RECORD files */
3433 if (share->data_file_type != BLOCK_RECORD)
3434 DBUG_RETURN(0);
3435
3436 if (!(param->testflag & T_SILENT))
3437 printf("- Zerofilling data for Aria-table '%s'\n",name);
3438
3439 /* Go through the record file */
3440 for (page= 1, pos= block_size;
3441 pos < share->state.state.data_file_length;
3442 pos+= block_size, page++)
3443 {
3444 uchar *buff;
3445 enum en_page_type page_type;
3446
3447 /* Ignore bitmap pages */
3448 if ((page % share->bitmap.pages_covered) == 0)
3449 continue;
3450 if (!(buff= pagecache_read(share->pagecache,
3451 &info->dfile,
3452 page, 1, 0,
3453 PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
3454 &page_link.link)))
3455 {
3456 _ma_check_print_error(param,
3457 "Page %9s: Got error: %d when reading datafile",
3458 llstr(pos, llbuff), my_errno);
3459 goto err;
3460 }
3461 page_type= (enum en_page_type) (buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK);
3462 switch (page_type) {
3463 case UNALLOCATED_PAGE:
3464 if (zero_lsn)
3465 bzero(buff, block_size);
3466 else
3467 bzero(buff + LSN_SIZE, block_size - LSN_SIZE);
3468 break;
3469 case BLOB_PAGE:
3470 if (_ma_bitmap_get_page_bits(info, bitmap, page) == 0)
3471 {
3472 /* Unallocated page */
3473 if (zero_lsn)
3474 bzero(buff, block_size);
3475 else
3476 bzero(buff + LSN_SIZE, block_size - LSN_SIZE);
3477 }
3478 else
3479 if (zero_lsn)
3480 bzero(buff, LSN_SIZE);
3481 break;
3482 case HEAD_PAGE:
3483 case TAIL_PAGE:
3484 {
3485 uint max_entry= (uint) buff[DIR_COUNT_OFFSET];
3486 uint offset, dir_start, empty_space;
3487 uchar *dir;
3488
3489 if (zero_lsn)
3490 bzero(buff, LSN_SIZE);
3491 if (max_entry != 0)
3492 {
3493 my_bool is_head_page= (page_type == HEAD_PAGE);
3494 dir= dir_entry_pos(buff, block_size, max_entry - 1);
3495 _ma_compact_block_page(share,
3496 buff, max_entry -1, 0,
3497 is_head_page ? ~(TrID) 0 : 0,
3498 is_head_page ?
3499 share->base.min_block_length : 0);
3500
3501 /* compactation may have increased free space */
3502 empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
3503 if (!enough_free_entries_on_page(share, buff))
3504 empty_space= 0; /* Page is full */
3505 if (_ma_bitmap_set(info, page, is_head_page,
3506 empty_space))
3507 goto err;
3508
3509 /* Zerofill the not used part */
3510 offset= uint2korr(dir) + uint2korr(dir+2);
3511 dir_start= (uint) (dir - buff);
3512 DBUG_ASSERT(dir_start >= offset);
3513 if (dir_start > offset)
3514 bzero(buff + offset, dir_start - offset);
3515 }
3516 break;
3517 }
3518 default:
3519 _ma_check_print_error(param,
3520 "Page %9s: Found unrecognizable block of type %d",
3521 llstr(pos, llbuff), page_type);
3522 goto err;
3523 }
3524 pagecache_unlock_by_link(share->pagecache, page_link.link,
3525 PAGECACHE_LOCK_WRITE_UNLOCK,
3526 PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3527 LSN_IMPOSSIBLE, 1, FALSE);
3528 }
3529 error= _ma_bitmap_flush(share);
3530 if (flush_pagecache_blocks(share->pagecache, &info->dfile,
3531 FLUSH_FORCE_WRITE))
3532 error= 1;
3533 DBUG_RETURN(error);
3534
3535 err:
3536 pagecache_unlock_by_link(share->pagecache, page_link.link,
3537 PAGECACHE_LOCK_WRITE_UNLOCK,
3538 PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3539 LSN_IMPOSSIBLE, 0, FALSE);
3540 /* flush what was changed so far */
3541 (void) _ma_bitmap_flush(share);
3542 (void) flush_pagecache_blocks(share->pagecache, &info->dfile,
3543 FLUSH_FORCE_WRITE);
3544
3545 DBUG_RETURN(1);
3546 }
3547
3548
3549 /**
3550 @brief Fill empty space in index and data files with zeroes
3551
3552 @return
3553 @retval 0 Ok
3554 @retval 1 Error
3555 */
3556
maria_zerofill(HA_CHECK * param,MARIA_HA * info,const char * name)3557 int maria_zerofill(HA_CHECK *param, MARIA_HA *info, const char *name)
3558 {
3559 my_bool error, reenable_logging,
3560 zero_lsn= !(param->testflag & T_ZEROFILL_KEEP_LSN);
3561 MARIA_SHARE *share= info->s;
3562 DBUG_ENTER("maria_zerofill");
3563 if ((reenable_logging= share->now_transactional))
3564 _ma_tmp_disable_logging_for_table(info, 0);
3565 if (!(error= (maria_zerofill_index(param, info, name) ||
3566 maria_zerofill_data(param, info, name) ||
3567 _ma_set_uuid(info->s, 0))))
3568 {
3569 /*
3570 Mark that we have done zerofill of data and index. If we zeroed pages'
3571 LSN, table is movable.
3572 */
3573 share->state.changed&= ~STATE_NOT_ZEROFILLED;
3574 if (zero_lsn)
3575 {
3576 share->state.changed&= ~(STATE_NOT_MOVABLE | STATE_MOVED);
3577 /* Table should get new LSNs */
3578 share->state.create_rename_lsn= share->state.is_of_horizon=
3579 share->state.skip_redo_lsn= LSN_NEEDS_NEW_STATE_LSNS;
3580 }
3581 /* Ensure state is later flushed to disk, if within maria_chk */
3582 info->update= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
3583
3584 /*
3585 Reset create_trid to make file comparable and to ensure that new
3586 trid's in the file starts from 0.
3587 */
3588 share->state.create_trid= 0;
3589 }
3590 if (reenable_logging)
3591 _ma_reenable_logging_for_table(info, FALSE);
3592 DBUG_RETURN(error);
3593 }
3594
3595
3596 /*
3597 Let temporary file replace old file.
3598 This assumes that the new file was created in the same
3599 directory as given by realpath(filename).
3600 This will ensure that any symlinks that are used will still work.
3601 Copy stats from old file to new file, deletes orignal and
3602 changes new file name to old file name
3603 */
3604
maria_change_to_newfile(const char * filename,const char * old_ext,const char * new_ext,time_t backup_time,myf MyFlags)3605 int maria_change_to_newfile(const char * filename, const char * old_ext,
3606 const char * new_ext, time_t backup_time,
3607 myf MyFlags)
3608 {
3609 char old_filename[FN_REFLEN],new_filename[FN_REFLEN];
3610 /* Get real path to filename */
3611 (void) fn_format(old_filename,filename,"",old_ext,2+4+32);
3612 return my_redel(old_filename,
3613 fn_format(new_filename,old_filename,"",new_ext,2+4),
3614 backup_time,
3615 MYF(MY_WME | MY_LINK_WARNING | MyFlags));
3616 } /* maria_change_to_newfile */
3617
3618
3619 /* Copy a block between two files */
3620
maria_filecopy(HA_CHECK * param,File to,File from,my_off_t start,my_off_t length,const char * type)3621 int maria_filecopy(HA_CHECK *param, File to,File from,my_off_t start,
3622 my_off_t length, const char *type)
3623 {
3624 uchar tmp_buff[IO_SIZE], *buff;
3625 ulong buff_length;
3626 DBUG_ENTER("maria_filecopy");
3627
3628 buff_length=(ulong) MY_MIN(param->write_buffer_length,length);
3629 if (!(buff=my_malloc(buff_length,MYF(0))))
3630 {
3631 buff=tmp_buff; buff_length=IO_SIZE;
3632 }
3633
3634 mysql_file_seek(from, start, MY_SEEK_SET,MYF(0));
3635 while (length > buff_length)
3636 {
3637 if (mysql_file_read(from, buff, buff_length, MYF(MY_NABP)) ||
3638 mysql_file_write(to, buff, buff_length, param->myf_rw))
3639 goto err;
3640 length-= buff_length;
3641 }
3642 if (mysql_file_read(from, buff, (size_t) length,MYF(MY_NABP)) ||
3643 mysql_file_write(to, buff, (size_t) length,param->myf_rw))
3644 goto err;
3645 if (buff != tmp_buff)
3646 my_free(buff);
3647 DBUG_RETURN(0);
3648 err:
3649 if (buff != tmp_buff)
3650 my_free(buff);
3651 _ma_check_print_error(param,"Can't copy %s to tempfile, error %d",
3652 type,my_errno);
3653 DBUG_RETURN(1);
3654 }
3655
3656
3657 /*
3658 Repair table or given index using sorting
3659
3660 SYNOPSIS
3661 maria_repair_by_sort()
3662 param Repair parameters
3663 info MARIA handler to repair
3664 name Name of table (for warnings)
3665 rep_quick set to <> 0 if we should not change data file
3666
3667 RESULT
3668 0 ok
3669 <>0 Error
3670 */
3671
maria_repair_by_sort(HA_CHECK * param,register MARIA_HA * info,const char * name,my_bool rep_quick)3672 int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
3673 const char * name, my_bool rep_quick)
3674 {
3675 int got_error;
3676 uint i, keys_to_repair;
3677 ha_rows start_records;
3678 my_off_t new_header_length, org_header_length, del;
3679 File new_file;
3680 MARIA_SORT_PARAM sort_param;
3681 MARIA_SHARE *share= info->s;
3682 HA_KEYSEG *keyseg;
3683 double *rec_per_key_part;
3684 char llbuff[22];
3685 MARIA_SORT_INFO sort_info;
3686 ulonglong UNINIT_VAR(key_map);
3687 myf sync_dir= ((share->now_transactional && !share->temporary) ?
3688 MY_SYNC_DIR : 0);
3689 my_bool scan_inited= 0, reenable_logging= 0;
3690 MARIA_SHARE backup_share;
3691 DBUG_ENTER("maria_repair_by_sort");
3692
3693 got_error= 1;
3694 new_file= -1;
3695 start_records= share->state.state.records;
3696 if (!(param->testflag & T_SILENT))
3697 {
3698 printf("- recovering (with sort) Aria-table '%s'\n",name);
3699 printf("Data records: %s\n", llstr(start_records,llbuff));
3700 }
3701
3702 if (initialize_variables_for_repair(param, &sort_info, &sort_param, info,
3703 rep_quick, &backup_share))
3704 goto err;
3705
3706 if ((reenable_logging= share->now_transactional))
3707 _ma_tmp_disable_logging_for_table(info, 0);
3708
3709 org_header_length= share->pack.header_length;
3710 new_header_length= (param->testflag & T_UNPACK) ? 0 : org_header_length;
3711 sort_param.filepos= new_header_length;
3712
3713 if (!rep_quick)
3714 {
3715 /* Get real path for data file */
3716 if ((new_file=mysql_file_create(key_file_tmp,
3717 fn_format(param->temp_filename,
3718 share->data_file_name.str, "",
3719 DATA_TMP_EXT, 2+4),
3720 0,param->tmpfile_createflag,
3721 MYF(0))) < 0)
3722 {
3723 _ma_check_print_error(param,"Can't create new tempfile: '%s'",
3724 param->temp_filename);
3725 goto err;
3726 }
3727 if (new_header_length &&
3728 maria_filecopy(param, new_file, info->dfile.file, 0L,
3729 new_header_length, "datafile-header"))
3730 goto err;
3731
3732 share->state.dellink= HA_OFFSET_ERROR;
3733 info->rec_cache.file= new_file; /* For sort_delete_record */
3734 if (share->data_file_type == BLOCK_RECORD ||
3735 (param->testflag & T_UNPACK))
3736 {
3737 if (create_new_data_handle(&sort_param, new_file))
3738 goto err;
3739 sort_info.new_info->rec_cache.file= new_file;
3740 }
3741 }
3742
3743 if (!(sort_info.key_block=
3744 alloc_key_blocks(param,
3745 (uint) param->sort_key_blocks,
3746 share->base.max_key_block_length)))
3747 goto err;
3748 sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;
3749
3750 if (share->data_file_type != BLOCK_RECORD)
3751 {
3752 /* We need a read buffer to read rows in big blocks */
3753 if (init_io_cache(¶m->read_cache, info->dfile.file,
3754 (uint) param->read_buffer_length,
3755 READ_CACHE, org_header_length, 1, MYF(MY_WME)))
3756 goto err;
3757 }
3758 if (sort_info.new_info->s->data_file_type != BLOCK_RECORD)
3759 {
3760 /* When writing to not block records, we need a write buffer */
3761 if (!rep_quick)
3762 {
3763 if (init_io_cache(&sort_info.new_info->rec_cache, new_file,
3764 (uint) param->write_buffer_length,
3765 WRITE_CACHE, new_header_length, 1,
3766 MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
3767 goto err;
3768 sort_info.new_info->opt_flag|= WRITE_CACHE_USED;
3769 }
3770 }
3771
3772 if (!(sort_param.record=
3773 (uchar*) my_malloc((size_t) share->base.default_rec_buff_size,
3774 MYF(0))) ||
3775 _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size,
3776 share->base.default_rec_buff_size))
3777 {
3778 _ma_check_print_error(param, "Not enough memory for extra record");
3779 goto err;
3780 }
3781
3782 /* Optionally drop indexes and optionally modify the key_map */
3783 maria_drop_all_indexes(param, info, FALSE);
3784 key_map= share->state.key_map;
3785 if (param->testflag & T_CREATE_MISSING_KEYS)
3786 {
3787 /* Invert the copied key_map to recreate all disabled indexes. */
3788 key_map= ~key_map;
3789 }
3790
3791 param->read_cache.end_of_file= sort_info.filelength;
3792 sort_param.wordlist=NULL;
3793 init_alloc_root(&sort_param.wordroot, "sort", FTPARSER_MEMROOT_ALLOC_SIZE, 0,
3794 MYF(param->malloc_flags));
3795
3796 sort_param.key_cmp=sort_key_cmp;
3797 sort_param.lock_in_memory=maria_lock_memory;
3798 sort_param.tmpdir=param->tmpdir;
3799 sort_param.master =1;
3800
3801 del=share->state.state.del;
3802
3803 /* Calculate number of keys to repair */
3804 keys_to_repair= 0;
3805 for (sort_param.key=0 ; sort_param.key < share->base.keys ;
3806 sort_param.key++)
3807 {
3808 if (maria_is_key_active(key_map, sort_param.key))
3809 keys_to_repair++;
3810 }
3811 /* For each key we scan and merge sort the keys */
3812 param->max_stage= keys_to_repair*2;
3813
3814 rec_per_key_part= param->new_rec_per_key_part;
3815 for (sort_param.key=0 ; sort_param.key < share->base.keys ;
3816 rec_per_key_part+=sort_param.keyinfo->keysegs, sort_param.key++)
3817 {
3818 sort_param.keyinfo=share->keyinfo+sort_param.key;
3819 /*
3820 Skip this index if it is marked disabled in the copied
3821 (and possibly inverted) key_map.
3822 */
3823 if (! maria_is_key_active(key_map, sort_param.key))
3824 {
3825 /* Remember old statistics for key */
3826 memcpy((char*) rec_per_key_part,
3827 (char*) (share->state.rec_per_key_part +
3828 (uint) (rec_per_key_part - param->new_rec_per_key_part)),
3829 sort_param.keyinfo->keysegs*sizeof(*rec_per_key_part));
3830 DBUG_PRINT("repair", ("skipping seemingly disabled index #: %u",
3831 sort_param.key));
3832 continue;
3833 }
3834
3835 if ((!(param->testflag & T_SILENT)))
3836 printf ("- Fixing index %d\n",sort_param.key+1);
3837
3838 sort_param.read_cache=param->read_cache;
3839 sort_param.seg=sort_param.keyinfo->seg;
3840 sort_param.max_pos= sort_param.pos= org_header_length;
3841 keyseg=sort_param.seg;
3842 bzero((char*) sort_param.unique,sizeof(sort_param.unique));
3843 sort_param.key_length=share->rec_reflength;
3844 for (i=0 ; keyseg[i].type != HA_KEYTYPE_END; i++)
3845 {
3846 sort_param.key_length+=keyseg[i].length;
3847 if (keyseg[i].flag & HA_SPACE_PACK)
3848 sort_param.key_length+=get_pack_length(keyseg[i].length);
3849 if (keyseg[i].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
3850 sort_param.key_length+= 2 + MY_TEST(keyseg[i].length >= 127);
3851 if (keyseg[i].flag & HA_NULL_PART)
3852 sort_param.key_length++;
3853 }
3854 share->state.state.records=share->state.state.del=share->state.split=0;
3855 share->state.state.empty=0;
3856
3857 if (sort_param.keyinfo->flag & HA_FULLTEXT)
3858 {
3859 uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
3860 sort_param.keyinfo->seg->charset->mbmaxlen;
3861 sort_param.key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
3862 /*
3863 fulltext indexes may have much more entries than the
3864 number of rows in the table. We estimate the number here.
3865
3866 Note, built-in parser is always nr. 0 - see ftparser_call_initializer()
3867 */
3868 if (sort_param.keyinfo->ftkey_nr == 0)
3869 {
3870 /*
3871 for built-in parser the number of generated index entries
3872 cannot be larger than the size of the data file divided
3873 by the minimal word's length
3874 */
3875 sort_info.max_records=
3876 (ha_rows) (sort_info.filelength/ft_min_word_len+1);
3877 }
3878 else
3879 {
3880 /*
3881 for external plugin parser we cannot tell anything at all :(
3882 so, we'll use all the sort memory and start from ~10 buffpeks.
3883 (see _ma_create_index_by_sort)
3884 */
3885 sort_info.max_records=
3886 10*param->sort_buffer_length/sort_param.key_length;
3887 }
3888
3889 sort_param.key_read= sort_maria_ft_key_read;
3890 sort_param.key_write= sort_maria_ft_key_write;
3891 }
3892 else
3893 {
3894 sort_param.key_read= sort_key_read;
3895 sort_param.key_write= sort_key_write;
3896 }
3897
3898 if (sort_info.new_info->s->data_file_type == BLOCK_RECORD)
3899 {
3900 scan_inited= 1;
3901 if (maria_scan_init(sort_info.info))
3902 goto err;
3903 }
3904 if (_ma_create_index_by_sort(&sort_param,
3905 (my_bool) (!(param->testflag & T_VERBOSE)),
3906 (size_t) param->sort_buffer_length))
3907 {
3908 if ((param->testflag & T_CREATE_UNIQUE_BY_SORT) && sort_param.sort_info->dupp)
3909 share->state.dupp_key= sort_param.key;
3910 else
3911 param->retry_repair= 1;
3912 _ma_check_print_error(param, "Create index by sort failed");
3913 goto err;
3914 }
3915 DBUG_EXECUTE_IF("maria_flush_whole_log",
3916 {
3917 DBUG_PRINT("maria_flush_whole_log", ("now"));
3918 translog_flush(translog_get_horizon());
3919 });
3920 DBUG_EXECUTE_IF("maria_crash_create_index_by_sort",
3921 {
3922 DBUG_PRINT("maria_crash_create_index_by_sort", ("now"));
3923 DBUG_SUICIDE();
3924 });
3925 if (scan_inited)
3926 {
3927 scan_inited= 0;
3928 maria_scan_end(sort_info.info);
3929 }
3930
3931 /* No need to calculate checksum again. */
3932 sort_param.calc_checksum= 0;
3933 free_root(&sort_param.wordroot, MYF(0));
3934
3935 /* Set for next loop */
3936 sort_info.max_records= (ha_rows) sort_info.new_info->s->state.state.records;
3937 param->stage++; /* Next stage */
3938 param->progress= 0;
3939
3940 if (param->testflag & T_STATISTICS)
3941 maria_update_key_parts(sort_param.keyinfo, rec_per_key_part,
3942 sort_param.unique,
3943 (param->stats_method ==
3944 MI_STATS_METHOD_IGNORE_NULLS ?
3945 sort_param.notnull : NULL),
3946 (ulonglong) share->state.state.records);
3947 maria_set_key_active(share->state.key_map, sort_param.key);
3948 DBUG_PRINT("repair", ("set enabled index #: %u", sort_param.key));
3949
3950 if (_ma_flush_table_files_before_swap(param, info))
3951 goto err;
3952
3953 if (sort_param.fix_datafile)
3954 {
3955 param->read_cache.end_of_file=sort_param.filepos;
3956 if (maria_write_data_suffix(&sort_info,1) ||
3957 end_io_cache(&sort_info.new_info->rec_cache))
3958 {
3959 _ma_check_print_error(param, "Got error when flushing row cache");
3960 goto err;
3961 }
3962 sort_info.new_info->opt_flag&= ~WRITE_CACHE_USED;
3963
3964 if (param->testflag & T_SAFE_REPAIR)
3965 {
3966 /* Don't repair if we loosed more than one row */
3967 if (sort_info.new_info->s->state.state.records+1 < start_records)
3968 {
3969 _ma_check_print_error(param,
3970 "Rows lost (Found %lu of %lu); Aborting "
3971 "because safe repair was requested",
3972 (ulong) sort_info.new_info->s->
3973 state.state.records,
3974 (ulong) start_records);
3975 share->state.state.records=start_records;
3976 goto err;
3977 }
3978 }
3979
3980 sort_info.new_info->s->state.state.data_file_length= sort_param.filepos;
3981 if (sort_info.new_info != sort_info.info)
3982 {
3983 MARIA_STATE_INFO save_state= sort_info.new_info->s->state;
3984 if (maria_close(sort_info.new_info))
3985 {
3986 _ma_check_print_error(param, "Got error %d on close", my_errno);
3987 goto err;
3988 }
3989 copy_data_file_state(&share->state, &save_state);
3990 new_file= -1;
3991 sort_info.new_info= info;
3992 info->rec_cache.file= info->dfile.file;
3993 }
3994
3995 share->state.version=(ulong) time((time_t*) 0); /* Force reopen */
3996
3997 /* Replace the actual file with the temporary file */
3998 if (new_file >= 0)
3999 {
4000 mysql_file_close(new_file, MYF(MY_WME));
4001 new_file= -1;
4002 }
4003 change_data_file_descriptor(info, -1);
4004 if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
4005 DATA_TMP_EXT, param->backup_time,
4006 (param->testflag & T_BACKUP_DATA ?
4007 MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
4008 sync_dir) ||
4009 _ma_open_datafile(info, share))
4010 {
4011 _ma_check_print_error(param, "Couldn't change to new data file");
4012 goto err;
4013 }
4014 if (param->testflag & T_UNPACK)
4015 restore_data_file_type(share);
4016
4017 org_header_length= share->pack.header_length;
4018 sort_info.org_data_file_type= share->data_file_type;
4019 sort_info.filelength= share->state.state.data_file_length;
4020 sort_param.fix_datafile=0;
4021
4022 /* Offsets are now in proportion to the new file length */
4023 param->max_progress= sort_info.filelength;
4024
4025 }
4026 else
4027 share->state.state.data_file_length=sort_param.max_pos;
4028
4029 param->read_cache.file= info->dfile.file; /* re-init read cache */
4030 if (share->data_file_type != BLOCK_RECORD)
4031 reinit_io_cache(¶m->read_cache, READ_CACHE,
4032 share->pack.header_length, 1, 1);
4033 }
4034
4035 if (param->testflag & T_WRITE_LOOP)
4036 {
4037 fputs(" \r",stdout);
4038 fflush(stdout);
4039 }
4040
4041 if (rep_quick && del+sort_info.dupp != share->state.state.del)
4042 {
4043 _ma_check_print_error(param,"Couldn't fix table with quick recovery: "
4044 "Found wrong number of deleted records");
4045 _ma_check_print_error(param,"Run recovery again without -q");
4046 got_error=1;
4047 param->retry_repair=1;
4048 param->testflag|=T_RETRY_WITHOUT_QUICK;
4049 goto err;
4050 }
4051
4052 if (rep_quick && (param->testflag & T_FORCE_UNIQUENESS))
4053 {
4054 my_off_t skr= share->state.state.data_file_length +
4055 ((sort_info.org_data_file_type == COMPRESSED_RECORD) ?
4056 MEMMAP_EXTRA_MARGIN : 0);
4057 #ifdef USE_RELOC
4058 if (sort_info.org_data_file_type == STATIC_RECORD &&
4059 skr < share->base.reloc*share->base.min_pack_length)
4060 skr=share->base.reloc*share->base.min_pack_length;
4061 #endif
4062 if (skr != sort_info.filelength)
4063 if (mysql_file_chsize(info->dfile.file, skr, 0, MYF(0)))
4064 _ma_check_print_warning(param,
4065 "Can't change size of datafile, error: %d",
4066 my_errno);
4067 }
4068
4069 if (param->testflag & T_CALC_CHECKSUM)
4070 share->state.state.checksum=param->glob_crc;
4071
4072 if (mysql_file_chsize(share->kfile.file,
4073 share->state.state.key_file_length, 0, MYF(0)))
4074 _ma_check_print_warning(param,
4075 "Can't change size of indexfile, error: %d",
4076 my_errno);
4077
4078 if (!(param->testflag & T_SILENT))
4079 {
4080 if (start_records != share->state.state.records)
4081 printf("Data records: %s\n", llstr(share->state.state.records,llbuff));
4082 }
4083 if (sort_info.dupp)
4084 _ma_check_print_warning(param,
4085 "%s records have been removed",
4086 llstr(sort_info.dupp,llbuff));
4087 got_error=0;
4088 /* If invoked by external program that uses thr_lock */
4089 if (&share->state.state != info->state)
4090 *info->state= *info->state_start= share->state.state;
4091
4092 err:
4093 if (scan_inited)
4094 maria_scan_end(sort_info.info);
4095 _ma_reset_state(info);
4096
4097 if (sort_info.new_info)
4098 {
4099 end_io_cache(&sort_info.new_info->rec_cache);
4100 sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4101 }
4102 end_io_cache(¶m->read_cache);
4103 info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4104 if (got_error)
4105 {
4106 if (! param->error_printed)
4107 _ma_check_print_error(param,"%d when fixing table",my_errno);
4108 (void)_ma_flush_table_files_before_swap(param, info);
4109 if (sort_info.new_info && sort_info.new_info != sort_info.info)
4110 {
4111 unuse_data_file_descriptor(sort_info.new_info);
4112 maria_close(sort_info.new_info);
4113 }
4114 if (new_file >= 0)
4115 {
4116 mysql_file_close(new_file, MYF(0));
4117 mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
4118 }
4119 maria_mark_crashed_on_repair(info);
4120 }
4121 else
4122 {
4123 if (key_map == share->state.key_map)
4124 share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS;
4125 /*
4126 Now that we have flushed and forced everything, we can bump
4127 create_rename_lsn:
4128 */
4129 DBUG_EXECUTE_IF("maria_flush_whole_log",
4130 {
4131 DBUG_PRINT("maria_flush_whole_log", ("now"));
4132 translog_flush(translog_get_horizon());
4133 });
4134 DBUG_EXECUTE_IF("maria_crash_repair",
4135 {
4136 DBUG_PRINT("maria_crash_repair", ("now"));
4137 DBUG_SUICIDE();
4138 });
4139 }
4140 share->state.changed|= STATE_NOT_SORTED_PAGES;
4141 if (!rep_quick)
4142 share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_ZEROFILLED |
4143 STATE_NOT_MOVABLE);
4144
4145 /* If caller had disabled logging it's not up to us to re-enable it */
4146 if (reenable_logging)
4147 _ma_reenable_logging_for_table(info, FALSE);
4148 restore_table_state_after_repair(info, &backup_share);
4149
4150 my_free(sort_param.rec_buff);
4151 my_free(sort_param.record);
4152 my_free(sort_info.key_block);
4153 my_free(sort_info.ft_buf);
4154 my_free(sort_info.buff);
4155 DBUG_RETURN(got_error);
4156 }
4157
4158
4159 /*
4160 Threaded repair of table using sorting
4161
4162 SYNOPSIS
4163 maria_repair_parallel()
4164 param Repair parameters
4165 info MARIA handler to repair
4166 name Name of table (for warnings)
4167 rep_quick set to <> 0 if we should not change data file
4168
4169 DESCRIPTION
4170 Same as maria_repair_by_sort but do it multithreaded
4171 Each key is handled by a separate thread.
4172 TODO: make a number of threads a parameter
4173
4174 In parallel repair we use one thread per index. There are two modes:
4175
4176 Quick
4177
4178 Only the indexes are rebuilt. All threads share a read buffer.
4179 Every thread that needs fresh data in the buffer enters the shared
4180 cache lock. The last thread joining the lock reads the buffer from
4181 the data file and wakes all other threads.
4182
4183 Non-quick
4184
4185 The data file is rebuilt and all indexes are rebuilt to point to
4186 the new record positions. One thread is the master thread. It
4187 reads from the old data file and writes to the new data file. It
4188 also creates one of the indexes. The other threads read from a
4189 buffer which is filled by the master. If they need fresh data,
4190 they enter the shared cache lock. If the masters write buffer is
4191 full, it flushes it to the new data file and enters the shared
4192 cache lock too. When all threads joined in the lock, the master
4193 copies its write buffer to the read buffer for the other threads
4194 and wakes them.
4195
4196 RESULT
4197 0 ok
4198 <>0 Error
4199 */
4200
maria_repair_parallel(HA_CHECK * param,register MARIA_HA * info,const char * name,my_bool rep_quick)4201 int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info,
4202 const char * name, my_bool rep_quick)
4203 {
4204 int got_error;
4205 uint i,key, istep;
4206 ha_rows start_records;
4207 my_off_t new_header_length,del;
4208 File new_file;
4209 MARIA_SORT_PARAM *sort_param=0, tmp_sort_param;
4210 MARIA_SHARE *share= info->s;
4211 double *rec_per_key_part;
4212 HA_KEYSEG *keyseg;
4213 char llbuff[22];
4214 IO_CACHE new_data_cache; /* For non-quick repair. */
4215 IO_CACHE_SHARE io_share;
4216 MARIA_SORT_INFO sort_info;
4217 MARIA_SHARE backup_share;
4218 ulonglong UNINIT_VAR(key_map);
4219 pthread_attr_t thr_attr;
4220 myf sync_dir= ((share->now_transactional && !share->temporary) ?
4221 MY_SYNC_DIR : 0);
4222 my_bool reenable_logging= 0;
4223 DBUG_ENTER("maria_repair_parallel");
4224
4225 got_error= 1;
4226 new_file= -1;
4227 start_records= share->state.state.records;
4228 if (!(param->testflag & T_SILENT))
4229 {
4230 printf("- parallel recovering (with sort) Aria-table '%s'\n",name);
4231 printf("Data records: %s\n", llstr(start_records, llbuff));
4232 }
4233
4234 bzero(&new_data_cache, sizeof(new_data_cache));
4235 if (initialize_variables_for_repair(param, &sort_info, &tmp_sort_param, info,
4236 rep_quick, &backup_share))
4237 goto err;
4238
4239 if ((reenable_logging= share->now_transactional))
4240 _ma_tmp_disable_logging_for_table(info, 0);
4241
4242 new_header_length= ((param->testflag & T_UNPACK) ? 0 :
4243 share->pack.header_length);
4244
4245 /*
4246 Quick repair (not touching data file, rebuilding indexes):
4247 {
4248 Read cache is (HA_CHECK *param)->read_cache using info->dfile.file.
4249 }
4250
4251 Non-quick repair (rebuilding data file and indexes):
4252 {
4253 Master thread:
4254
4255 Read cache is (HA_CHECK *param)->read_cache using info->dfile.file.
4256 Write cache is (MARIA_INFO *info)->rec_cache using new_file.
4257
4258 Slave threads:
4259
4260 Read cache is new_data_cache synced to master rec_cache.
4261
4262 The final assignment of the filedescriptor for rec_cache is done
4263 after the cache creation.
4264
4265 Don't check file size on new_data_cache, as the resulting file size
4266 is not known yet.
4267
4268 As rec_cache and new_data_cache are synced, write_buffer_length is
4269 used for the read cache 'new_data_cache'. Both start at the same
4270 position 'new_header_length'.
4271 }
4272 */
4273 DBUG_PRINT("info", ("is quick repair: %d", (int) rep_quick));
4274 if (!rep_quick)
4275 my_b_clear(&new_data_cache);
4276
4277 /* Initialize pthread structures before goto err. */
4278 mysql_mutex_init(key_SORT_INFO_mutex, &sort_info.mutex, MY_MUTEX_INIT_FAST);
4279 mysql_cond_init(key_SORT_INFO_cond, &sort_info.cond, 0);
4280
4281 if (!(sort_info.key_block=
4282 alloc_key_blocks(param, (uint) param->sort_key_blocks,
4283 share->base.max_key_block_length)))
4284 goto err;
4285
4286 if (init_io_cache(¶m->read_cache, info->dfile.file,
4287 (uint) param->read_buffer_length,
4288 READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)))
4289 goto err;
4290
4291 sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;
4292 info->opt_flag|=WRITE_CACHE_USED;
4293 info->rec_cache.file= info->dfile.file; /* for sort_delete_record */
4294
4295 if (!rep_quick)
4296 {
4297 /* Get real path for data file */
4298 if ((new_file= mysql_file_create(key_file_tmp,
4299 fn_format(param->temp_filename,
4300 share->data_file_name.str, "",
4301 DATA_TMP_EXT,
4302 2+4),
4303 0,param->tmpfile_createflag,
4304 MYF(0))) < 0)
4305 {
4306 _ma_check_print_error(param,"Can't create new tempfile: '%s'",
4307 param->temp_filename);
4308 goto err;
4309 }
4310 if (new_header_length &&
4311 maria_filecopy(param, new_file, info->dfile.file,0L,new_header_length,
4312 "datafile-header"))
4313 goto err;
4314 if (param->testflag & T_UNPACK)
4315 restore_data_file_type(share);
4316 share->state.dellink= HA_OFFSET_ERROR;
4317
4318 if (init_io_cache(&new_data_cache, -1,
4319 (uint) param->write_buffer_length,
4320 READ_CACHE, new_header_length, 1,
4321 MYF(MY_WME | MY_DONT_CHECK_FILESIZE)))
4322 goto err;
4323
4324 if (init_io_cache(&info->rec_cache, new_file,
4325 (uint) param->write_buffer_length,
4326 WRITE_CACHE, new_header_length, 1,
4327 MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
4328 goto err;
4329
4330 }
4331
4332 /* Optionally drop indexes and optionally modify the key_map. */
4333 maria_drop_all_indexes(param, info, FALSE);
4334 key_map= share->state.key_map;
4335 if (param->testflag & T_CREATE_MISSING_KEYS)
4336 {
4337 /* Invert the copied key_map to recreate all disabled indexes. */
4338 key_map= ~key_map;
4339 }
4340
4341 param->read_cache.end_of_file= sort_info.filelength;
4342
4343 /*
4344 +1 below is required hack for parallel repair mode.
4345 The share->state.state.records value, that is compared later
4346 to sort_info.max_records and cannot exceed it, is
4347 increased in sort_key_write. In maria_repair_by_sort, sort_key_write
4348 is called after sort_key_read, where the comparison is performed,
4349 but in parallel mode master thread can call sort_key_write
4350 before some other repair thread calls sort_key_read.
4351 Furthermore I'm not even sure +1 would be enough.
4352 May be sort_info.max_records shold be always set to max value in
4353 parallel mode.
4354 */
4355 sort_info.max_records++;
4356
4357 del=share->state.state.del;
4358
4359 if (!(sort_param=(MARIA_SORT_PARAM *)
4360 my_malloc((uint) share->base.keys *
4361 (sizeof(MARIA_SORT_PARAM) + share->base.pack_reclength),
4362 MYF(MY_ZEROFILL))))
4363 {
4364 _ma_check_print_error(param,"Not enough memory for key!");
4365 goto err;
4366 }
4367 #ifdef USING_SECOND_APPROACH
4368 uint total_key_length=0;
4369 #endif
4370 rec_per_key_part= param->new_rec_per_key_part;
4371 share->state.state.records=share->state.state.del=share->state.split=0;
4372 share->state.state.empty=0;
4373
4374 for (i=key=0, istep=1 ; key < share->base.keys ;
4375 rec_per_key_part+=sort_param[i].keyinfo->keysegs, i+=istep, key++)
4376 {
4377 sort_param[i].key=key;
4378 sort_param[i].keyinfo=share->keyinfo+key;
4379 sort_param[i].seg=sort_param[i].keyinfo->seg;
4380 /*
4381 Skip this index if it is marked disabled in the copied
4382 (and possibly inverted) key_map.
4383 */
4384 if (! maria_is_key_active(key_map, key))
4385 {
4386 /* Remember old statistics for key */
4387 memcpy((char*) rec_per_key_part,
4388 (char*) (share->state.rec_per_key_part+
4389 (uint) (rec_per_key_part - param->new_rec_per_key_part)),
4390 sort_param[i].keyinfo->keysegs*sizeof(*rec_per_key_part));
4391 istep=0;
4392 continue;
4393 }
4394 istep=1;
4395 if ((!(param->testflag & T_SILENT)))
4396 printf ("- Fixing index %d\n",key+1);
4397 if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
4398 {
4399 sort_param[i].key_read=sort_maria_ft_key_read;
4400 sort_param[i].key_write=sort_maria_ft_key_write;
4401 }
4402 else
4403 {
4404 sort_param[i].key_read=sort_key_read;
4405 sort_param[i].key_write=sort_key_write;
4406 }
4407 sort_param[i].key_cmp=sort_key_cmp;
4408 sort_param[i].lock_in_memory=maria_lock_memory;
4409 sort_param[i].tmpdir=param->tmpdir;
4410 sort_param[i].sort_info=&sort_info;
4411 sort_param[i].master=0;
4412 sort_param[i].fix_datafile=0;
4413 sort_param[i].calc_checksum= 0;
4414
4415 sort_param[i].filepos=new_header_length;
4416 sort_param[i].max_pos=sort_param[i].pos=share->pack.header_length;
4417
4418 sort_param[i].record= (((uchar *)(sort_param+share->base.keys))+
4419 (share->base.pack_reclength * i));
4420 if (_ma_alloc_buffer(&sort_param[i].rec_buff, &sort_param[i].rec_buff_size,
4421 share->base.default_rec_buff_size))
4422 {
4423 _ma_check_print_error(param,"Not enough memory!");
4424 goto err;
4425 }
4426 sort_param[i].key_length=share->rec_reflength;
4427 for (keyseg=sort_param[i].seg; keyseg->type != HA_KEYTYPE_END;
4428 keyseg++)
4429 {
4430 sort_param[i].key_length+=keyseg->length;
4431 if (keyseg->flag & HA_SPACE_PACK)
4432 sort_param[i].key_length+=get_pack_length(keyseg->length);
4433 if (keyseg->flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
4434 sort_param[i].key_length+= 2 + MY_TEST(keyseg->length >= 127);
4435 if (keyseg->flag & HA_NULL_PART)
4436 sort_param[i].key_length++;
4437 }
4438 #ifdef USING_SECOND_APPROACH
4439 total_key_length+=sort_param[i].key_length;
4440 #endif
4441
4442 if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
4443 {
4444 uint ft_max_word_len_for_sort=
4445 (FT_MAX_WORD_LEN_FOR_SORT *
4446 sort_param[i].keyinfo->seg->charset->mbmaxlen);
4447 sort_param[i].key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
4448 init_alloc_root(&sort_param[i].wordroot, "sort",
4449 FTPARSER_MEMROOT_ALLOC_SIZE, 0,
4450 MYF(param->malloc_flags));
4451 }
4452 }
4453 sort_info.total_keys=i;
4454 sort_param[0].master= 1;
4455 sort_param[0].fix_datafile= ! rep_quick;
4456 sort_param[0].calc_checksum= MY_TEST(param->testflag & T_CALC_CHECKSUM);
4457
4458 if (!maria_ftparser_alloc_param(info))
4459 goto err;
4460
4461 sort_info.got_error=0;
4462 mysql_mutex_lock(&sort_info.mutex);
4463
4464 /*
4465 Initialize the I/O cache share for use with the read caches and, in
4466 case of non-quick repair, the write cache. When all threads join on
4467 the cache lock, the writer copies the write cache contents to the
4468 read caches.
4469 */
4470 if (i > 1)
4471 {
4472 if (rep_quick)
4473 init_io_cache_share(¶m->read_cache, &io_share, NULL, i);
4474 else
4475 init_io_cache_share(&new_data_cache, &io_share, &info->rec_cache, i);
4476 }
4477 else
4478 io_share.total_threads= 0; /* share not used */
4479
4480 (void) pthread_attr_init(&thr_attr);
4481 (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED);
4482
4483 for (i=0 ; i < sort_info.total_keys ; i++)
4484 {
4485 /*
4486 Copy the properly initialized IO_CACHE structure so that every
4487 thread has its own copy. In quick mode param->read_cache is shared
4488 for use by all threads. In non-quick mode all threads but the
4489 first copy the shared new_data_cache, which is synchronized to the
4490 write cache of the first thread. The first thread copies
4491 param->read_cache, which is not shared.
4492 */
4493 sort_param[i].read_cache= ((rep_quick || !i) ? param->read_cache :
4494 new_data_cache);
4495 DBUG_PRINT("io_cache_share", ("thread: %u read_cache: %p",
4496 i, &sort_param[i].read_cache));
4497
4498 /*
4499 two approaches: the same amount of memory for each thread
4500 or the memory for the same number of keys for each thread...
4501 In the second one all the threads will fill their sort_buffers
4502 (and call write_keys) at the same time, putting more stress on i/o.
4503 */
4504 sort_param[i].sortbuff_size=
4505 #ifndef USING_SECOND_APPROACH
4506 param->sort_buffer_length/sort_info.total_keys;
4507 #else
4508 param->sort_buffer_length*sort_param[i].key_length/total_key_length;
4509 #endif
4510 if (mysql_thread_create(key_thread_find_all_keys,
4511 &sort_param[i].thr, &thr_attr,
4512 _ma_thr_find_all_keys, (void *) (sort_param+i)))
4513 {
4514 _ma_check_print_error(param,"Cannot start a repair thread");
4515 /* Cleanup: Detach from the share. Avoid others to be blocked. */
4516 if (io_share.total_threads)
4517 remove_io_thread(&sort_param[i].read_cache);
4518 DBUG_PRINT("error", ("Cannot start a repair thread"));
4519 sort_info.got_error=1;
4520 }
4521 else
4522 sort_info.threads_running++;
4523 }
4524 (void) pthread_attr_destroy(&thr_attr);
4525
4526 /* waiting for all threads to finish */
4527 while (sort_info.threads_running)
4528 mysql_cond_wait(&sort_info.cond, &sort_info.mutex);
4529 mysql_mutex_unlock(&sort_info.mutex);
4530
4531 if ((got_error= _ma_thr_write_keys(sort_param)))
4532 {
4533 param->retry_repair=1;
4534 goto err;
4535 }
4536 got_error=1; /* Assume the following may go wrong */
4537
4538 if (_ma_flush_table_files_before_swap(param, info))
4539 goto err;
4540
4541 if (sort_param[0].fix_datafile)
4542 {
4543 /*
4544 Append some nulls to the end of a memory mapped file. Destroy the
4545 write cache. The master thread did already detach from the share
4546 by remove_io_thread() in sort.c:thr_find_all_keys().
4547 */
4548 if (maria_write_data_suffix(&sort_info,1) ||
4549 end_io_cache(&info->rec_cache))
4550 goto err;
4551 if (param->testflag & T_SAFE_REPAIR)
4552 {
4553 /* Don't repair if we loosed more than one row */
4554 if (sort_info.new_info->s->state.state.records+1 < start_records)
4555 {
4556 _ma_check_print_error(param,
4557 "Rows lost (Found %lu of %lu); Aborting "
4558 "because safe repair was requested",
4559 (ulong) share->state.state.records,
4560 (ulong) start_records);
4561 share->state.state.records=start_records;
4562 goto err;
4563 }
4564 }
4565 share->state.state.data_file_length= sort_param->filepos;
4566 /* Only whole records */
4567 share->state.version= (ulong) time((time_t*) 0);
4568 /*
4569 Exchange the data file descriptor of the table, so that we use the
4570 new file from now on.
4571 */
4572 mysql_file_close(info->dfile.file, MYF(0));
4573 info->dfile.file= new_file;
4574 share->pack.header_length=(ulong) new_header_length;
4575 }
4576 else
4577 share->state.state.data_file_length=sort_param->max_pos;
4578
4579 if (rep_quick && del+sort_info.dupp != share->state.state.del)
4580 {
4581 _ma_check_print_error(param,"Couldn't fix table with quick recovery: "
4582 "Found wrong number of deleted records");
4583 _ma_check_print_error(param,"Run recovery again without -q");
4584 param->retry_repair=1;
4585 param->testflag|=T_RETRY_WITHOUT_QUICK;
4586 goto err;
4587 }
4588
4589 if (rep_quick && (param->testflag & T_FORCE_UNIQUENESS))
4590 {
4591 my_off_t skr= share->state.state.data_file_length +
4592 ((sort_info.org_data_file_type == COMPRESSED_RECORD) ?
4593 MEMMAP_EXTRA_MARGIN : 0);
4594 #ifdef USE_RELOC
4595 if (sort_info.org_data_file_type == STATIC_RECORD &&
4596 skr < share->base.reloc*share->base.min_pack_length)
4597 skr=share->base.reloc*share->base.min_pack_length;
4598 #endif
4599 if (skr != sort_info.filelength)
4600 if (mysql_file_chsize(info->dfile.file, skr, 0, MYF(0)))
4601 _ma_check_print_warning(param,
4602 "Can't change size of datafile, error: %d",
4603 my_errno);
4604 }
4605 if (param->testflag & T_CALC_CHECKSUM)
4606 share->state.state.checksum=param->glob_crc;
4607
4608 if (mysql_file_chsize(share->kfile.file,
4609 share->state.state.key_file_length, 0, MYF(0)))
4610 _ma_check_print_warning(param,
4611 "Can't change size of indexfile, error: %d",
4612 my_errno);
4613
4614 if (!(param->testflag & T_SILENT))
4615 {
4616 if (start_records != share->state.state.records)
4617 printf("Data records: %s\n", llstr(share->state.state.records,llbuff));
4618 }
4619 if (sort_info.dupp)
4620 _ma_check_print_warning(param,
4621 "%s records have been removed",
4622 llstr(sort_info.dupp,llbuff));
4623 got_error=0;
4624 /* If invoked by external program that uses thr_lock */
4625 if (&share->state.state != info->state)
4626 *info->state= *info->state_start= share->state.state;
4627
4628 err:
4629 _ma_reset_state(info);
4630
4631 /*
4632 Destroy the write cache. The master thread did already detach from
4633 the share by remove_io_thread() or it was not yet started (if the
4634 error happend before creating the thread).
4635 */
4636 if (sort_info.new_info)
4637 {
4638 end_io_cache(&sort_info.new_info->rec_cache);
4639 sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4640 }
4641 end_io_cache(¶m->read_cache);
4642 info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4643 /*
4644 Destroy the new data cache in case of non-quick repair. All slave
4645 threads did either detach from the share by remove_io_thread()
4646 already or they were not yet started (if the error happend before
4647 creating the threads).
4648 */
4649 if (!rep_quick && my_b_inited(&new_data_cache))
4650 end_io_cache(&new_data_cache);
4651 if (!got_error)
4652 {
4653 /* Replace the actual file with the temporary file */
4654 if (new_file >= 0)
4655 {
4656 mysql_file_close(new_file,MYF(0));
4657 info->dfile.file= new_file= -1;
4658 if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
4659 DATA_TMP_EXT, param->backup_time,
4660 MYF((param->testflag & T_BACKUP_DATA ?
4661 MY_REDEL_MAKE_BACKUP : 0) |
4662 sync_dir)) ||
4663 _ma_open_datafile(info,share))
4664 got_error=1;
4665 }
4666 }
4667 if (got_error)
4668 {
4669 if (! param->error_printed)
4670 _ma_check_print_error(param,"%d when fixing table",my_errno);
4671 (void)_ma_flush_table_files_before_swap(param, info);
4672 if (new_file >= 0)
4673 {
4674 mysql_file_close(new_file,MYF(0));
4675 mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
4676 if (info->dfile.file == new_file)
4677 info->dfile.file= -1;
4678 }
4679 maria_mark_crashed_on_repair(info);
4680 }
4681 else if (key_map == share->state.key_map)
4682 share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS;
4683 share->state.changed|= STATE_NOT_SORTED_PAGES;
4684 if (!rep_quick)
4685 share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_ZEROFILLED |
4686 STATE_NOT_MOVABLE);
4687
4688 mysql_cond_destroy (&sort_info.cond);
4689 mysql_mutex_destroy(&sort_info.mutex);
4690
4691 /* If caller had disabled logging it's not up to us to re-enable it */
4692 if (reenable_logging)
4693 _ma_reenable_logging_for_table(info, FALSE);
4694 restore_table_state_after_repair(info, &backup_share);
4695
4696 my_free(sort_info.ft_buf);
4697 my_free(sort_info.key_block);
4698 my_free(sort_param);
4699 my_free(sort_info.buff);
4700 if (!got_error && (param->testflag & T_UNPACK))
4701 restore_data_file_type(share);
4702 DBUG_RETURN(got_error);
4703 }
4704
4705 /* Read next record and return next key */
4706
sort_key_read(MARIA_SORT_PARAM * sort_param,uchar * key)4707 static int sort_key_read(MARIA_SORT_PARAM *sort_param, uchar *key)
4708 {
4709 int error;
4710 MARIA_SORT_INFO *sort_info= sort_param->sort_info;
4711 MARIA_HA *info= sort_info->info;
4712 MARIA_KEY int_key;
4713 DBUG_ENTER("sort_key_read");
4714
4715 if ((error=sort_get_next_record(sort_param)))
4716 DBUG_RETURN(error);
4717 if (info->s->state.state.records == sort_info->max_records)
4718 {
4719 _ma_check_print_error(sort_info->param,
4720 "Key %d - Found too many records; Can't continue",
4721 sort_param->key+1);
4722 DBUG_RETURN(1);
4723 }
4724 if (_ma_sort_write_record(sort_param))
4725 DBUG_RETURN(1);
4726
4727 (*info->s->keyinfo[sort_param->key].make_key)(info, &int_key,
4728 sort_param->key, key,
4729 sort_param->record,
4730 sort_param->current_filepos,
4731 0);
4732 sort_param->real_key_length= int_key.data_length + int_key.ref_length;
4733 #ifdef HAVE_valgrind
4734 bzero(key+sort_param->real_key_length,
4735 (sort_param->key_length-sort_param->real_key_length));
4736 #endif
4737 DBUG_RETURN(0);
4738 } /* sort_key_read */
4739
4740
sort_maria_ft_key_read(MARIA_SORT_PARAM * sort_param,uchar * key)4741 static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, uchar *key)
4742 {
4743 int error;
4744 MARIA_SORT_INFO *sort_info=sort_param->sort_info;
4745 MARIA_HA *info=sort_info->info;
4746 FT_WORD *wptr=0;
4747 MARIA_KEY int_key;
4748 DBUG_ENTER("sort_maria_ft_key_read");
4749
4750 if (!sort_param->wordlist)
4751 {
4752 for (;;)
4753 {
4754 free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE));
4755 if ((error=sort_get_next_record(sort_param)))
4756 DBUG_RETURN(error);
4757 if ((error= _ma_sort_write_record(sort_param)))
4758 DBUG_RETURN(error);
4759 if (!(wptr= _ma_ft_parserecord(info,sort_param->key,sort_param->record,
4760 &sort_param->wordroot)))
4761
4762 DBUG_RETURN(1);
4763 if (wptr->pos)
4764 break;
4765 }
4766 sort_param->wordptr=sort_param->wordlist=wptr;
4767 }
4768 else
4769 {
4770 error=0;
4771 wptr=(FT_WORD*)(sort_param->wordptr);
4772 }
4773
4774 _ma_ft_make_key(info, &int_key, sort_param->key, key, wptr++,
4775 sort_param->current_filepos);
4776 sort_param->real_key_length= int_key.data_length + int_key.ref_length;
4777
4778 #ifdef HAVE_valgrind
4779 if (sort_param->key_length > sort_param->real_key_length)
4780 bzero(key+sort_param->real_key_length,
4781 (sort_param->key_length-sort_param->real_key_length));
4782 #endif
4783 if (!wptr->pos)
4784 {
4785 free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE));
4786 sort_param->wordlist=0;
4787 }
4788 else
4789 sort_param->wordptr=(void*)wptr;
4790
4791 DBUG_RETURN(error);
4792 } /* sort_maria_ft_key_read */
4793
4794
4795 /*
4796 Read next record from file using parameters in sort_info.
4797
4798 SYNOPSIS
4799 sort_get_next_record()
4800 sort_param Information about and for the sort process
4801
4802 NOTES
4803 Dynamic Records With Non-Quick Parallel Repair
4804
4805 For non-quick parallel repair we use a synchronized read/write
4806 cache. This means that one thread is the master who fixes the data
4807 file by reading each record from the old data file and writing it
4808 to the new data file. By doing this the records in the new data
4809 file are written contiguously. Whenever the write buffer is full,
4810 it is copied to the read buffer. The slaves read from the read
4811 buffer, which is not associated with a file. Thus read_cache.file
4812 is -1. When using _mi_read_cache(), the slaves must always set
4813 flag to READING_NEXT so that the function never tries to read from
4814 file. This is safe because the records are contiguous. There is no
4815 need to read outside the cache. This condition is evaluated in the
4816 variable 'parallel_flag' for quick reference. read_cache.file must
4817 be >= 0 in every other case.
4818
4819 RETURN
4820 -1 end of file
4821 0 ok
4822 sort_param->current_filepos points to record position.
4823 sort_param->record contains record
4824 sort_param->max_pos contains position to last byte read
4825 > 0 error
4826 */
4827
sort_get_next_record(MARIA_SORT_PARAM * sort_param)4828 static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
4829 {
4830 int searching;
4831 int parallel_flag;
4832 uint found_record,b_type,left_length;
4833 my_off_t pos;
4834 MARIA_BLOCK_INFO block_info;
4835 MARIA_SORT_INFO *sort_info=sort_param->sort_info;
4836 HA_CHECK *param=sort_info->param;
4837 MARIA_HA *info=sort_info->info;
4838 MARIA_SHARE *share= info->s;
4839 char llbuff[22],llbuff2[22];
4840 DBUG_ENTER("sort_get_next_record");
4841
4842 if (_ma_killed_ptr(param))
4843 DBUG_RETURN(1);
4844 if (param->progress_counter++ >= WRITE_COUNT)
4845 {
4846 param->progress_counter= 0;
4847 _ma_report_progress(param, param->progress, param->max_progress);
4848 }
4849
4850 switch (sort_info->org_data_file_type) {
4851 case BLOCK_RECORD:
4852 {
4853 for (;;)
4854 {
4855 int flag;
4856 /*
4857 Assume table is transactional and it had LSN pages in the
4858 cache. Repair has flushed them, left data pages stay in
4859 cache, and disabled transactionality (so share's current page
4860 type is PLAIN); page cache would assert if it finds a cached LSN page
4861 while _ma_scan_block_record() requested a PLAIN page. So we use
4862 UNKNOWN.
4863 */
4864 enum pagecache_page_type save_page_type= share->page_type;
4865 share->page_type= PAGECACHE_READ_UNKNOWN_PAGE;
4866 if (info != sort_info->new_info)
4867 {
4868 /* Safe scanning */
4869 flag= _ma_safe_scan_block_record(sort_info, info,
4870 sort_param->record);
4871 }
4872 else
4873 {
4874 /*
4875 Scan on clean table.
4876 It requires a reliable data_file_length so we set it.
4877 */
4878 share->state.state.data_file_length= sort_info->filelength;
4879 info->cur_row.trid= 0;
4880 flag= _ma_scan_block_record(info, sort_param->record,
4881 info->cur_row.nextpos, 1);
4882 set_if_bigger(param->max_found_trid, info->cur_row.trid);
4883 if (info->cur_row.trid > param->max_trid)
4884 {
4885 _ma_check_print_not_visible_error(param, info->cur_row.trid);
4886 flag= HA_ERR_ROW_NOT_VISIBLE;
4887 }
4888 }
4889 param->progress= (ma_recordpos_to_page(info->cur_row.lastpos)*
4890 share->block_size);
4891
4892 share->page_type= save_page_type;
4893 if (!flag)
4894 {
4895 if (sort_param->calc_checksum)
4896 {
4897 ha_checksum checksum;
4898 checksum= (*share->calc_check_checksum)(info, sort_param->record);
4899 if (share->calc_checksum &&
4900 info->cur_row.checksum != (checksum & 255))
4901 {
4902 if (param->testflag & T_VERBOSE)
4903 {
4904 record_pos_to_txt(info, info->cur_row.lastpos, llbuff);
4905 _ma_check_print_info(param,
4906 "Found record with wrong checksum at %s",
4907 llbuff);
4908 }
4909 continue;
4910 }
4911 info->cur_row.checksum= checksum;
4912 param->glob_crc+= checksum;
4913 }
4914 sort_param->start_recpos= sort_param->current_filepos=
4915 info->cur_row.lastpos;
4916 DBUG_RETURN(0);
4917 }
4918 if (flag == HA_ERR_END_OF_FILE)
4919 {
4920 sort_param->max_pos= share->state.state.data_file_length;
4921 DBUG_RETURN(-1);
4922 }
4923 /* Retry only if wrong record, not if disk error */
4924 if (flag != HA_ERR_WRONG_IN_RECORD && flag != HA_ERR_WRONG_CRC)
4925 {
4926 retry_if_quick(sort_param, flag);
4927 DBUG_RETURN(flag);
4928 }
4929 }
4930 break; /* Impossible */
4931 }
4932 case STATIC_RECORD:
4933 for (;;)
4934 {
4935 if (my_b_read(&sort_param->read_cache,sort_param->record,
4936 share->base.pack_reclength))
4937 {
4938 if (sort_param->read_cache.error)
4939 param->out_flag |= O_DATA_LOST;
4940 retry_if_quick(sort_param, my_errno);
4941 DBUG_RETURN(-1);
4942 }
4943 sort_param->start_recpos=sort_param->pos;
4944 param->progress= sort_param->pos;
4945 if (!sort_param->fix_datafile)
4946 {
4947 sort_param->current_filepos= sort_param->pos;
4948 if (sort_param->master)
4949 share->state.split++;
4950 }
4951 sort_param->max_pos=(sort_param->pos+=share->base.pack_reclength);
4952 if (*sort_param->record)
4953 {
4954 if (sort_param->calc_checksum)
4955 param->glob_crc+= (info->cur_row.checksum=
4956 _ma_static_checksum(info,sort_param->record));
4957 DBUG_RETURN(0);
4958 }
4959 if (!sort_param->fix_datafile && sort_param->master)
4960 {
4961 share->state.state.del++;
4962 share->state.state.empty+=share->base.pack_reclength;
4963 }
4964 }
4965 case DYNAMIC_RECORD:
4966 {
4967 uchar *UNINIT_VAR(to);
4968 ha_checksum checksum= 0;
4969
4970 pos=sort_param->pos;
4971 param->progress= pos;
4972 searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND));
4973 parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0;
4974 for (;;)
4975 {
4976 found_record=block_info.second_read= 0;
4977 left_length=1;
4978 if (searching)
4979 {
4980 pos=MY_ALIGN(pos,MARIA_DYN_ALIGN_SIZE);
4981 param->testflag|=T_RETRY_WITHOUT_QUICK;
4982 sort_param->start_recpos=pos;
4983 }
4984 do
4985 {
4986 if (pos > sort_param->max_pos)
4987 sort_param->max_pos=pos;
4988 if (pos & (MARIA_DYN_ALIGN_SIZE-1))
4989 {
4990 if ((param->testflag & T_VERBOSE) || searching == 0)
4991 _ma_check_print_info(param,"Wrong aligned block at %s",
4992 llstr(pos,llbuff));
4993 if (searching)
4994 goto try_next;
4995 }
4996 if (found_record && pos == param->search_after_block)
4997 _ma_check_print_info(param,"Block: %s used by record at %s",
4998 llstr(param->search_after_block,llbuff),
4999 llstr(sort_param->start_recpos,llbuff2));
5000 if (_ma_read_cache(info, &sort_param->read_cache,
5001 block_info.header, pos,
5002 MARIA_BLOCK_INFO_HEADER_LENGTH,
5003 (! found_record ? READING_NEXT : 0) |
5004 parallel_flag | READING_HEADER))
5005 {
5006 if (found_record)
5007 {
5008 _ma_check_print_info(param,
5009 "Can't read whole record at %s (errno: %d)",
5010 llstr(sort_param->start_recpos,llbuff),errno);
5011 goto try_next;
5012 }
5013 DBUG_RETURN(-1);
5014 }
5015 if (searching && ! sort_param->fix_datafile)
5016 {
5017 param->error_printed=1;
5018 param->retry_repair=1;
5019 param->testflag|=T_RETRY_WITHOUT_QUICK;
5020 my_errno= HA_ERR_WRONG_IN_RECORD;
5021 DBUG_RETURN(1); /* Something wrong with data */
5022 }
5023 b_type= _ma_get_block_info(info, &block_info,-1,pos);
5024 if ((b_type & (BLOCK_ERROR | BLOCK_FATAL_ERROR)) ||
5025 ((b_type & BLOCK_FIRST) &&
5026 (block_info.rec_len < (uint) share->base.min_pack_length ||
5027 block_info.rec_len > (uint) share->base.max_pack_length)))
5028 {
5029 uint i;
5030 if (param->testflag & T_VERBOSE || searching == 0)
5031 _ma_check_print_info(param,
5032 "Wrong bytesec: %3d-%3d-%3d at %10s; Skipped",
5033 block_info.header[0],block_info.header[1],
5034 block_info.header[2],llstr(pos,llbuff));
5035 if (found_record)
5036 goto try_next;
5037 block_info.second_read=0;
5038 searching=1;
5039 /* Search after block in read header string */
5040 for (i=MARIA_DYN_ALIGN_SIZE ;
5041 i < MARIA_BLOCK_INFO_HEADER_LENGTH ;
5042 i+= MARIA_DYN_ALIGN_SIZE)
5043 if (block_info.header[i] >= 1 &&
5044 block_info.header[i] <= MARIA_MAX_DYN_HEADER_BYTE)
5045 break;
5046 pos+=(ulong) i;
5047 sort_param->start_recpos=pos;
5048 continue;
5049 }
5050 if (b_type & BLOCK_DELETED)
5051 {
5052 my_bool error=0;
5053 if (block_info.block_len+ (uint) (block_info.filepos-pos) <
5054 share->base.min_block_length)
5055 {
5056 if (!searching)
5057 _ma_check_print_info(param,
5058 "Deleted block with impossible length %lu "
5059 "at %s",
5060 block_info.block_len,llstr(pos,llbuff));
5061 error=1;
5062 }
5063 else
5064 {
5065 if ((block_info.next_filepos != HA_OFFSET_ERROR &&
5066 block_info.next_filepos >=
5067 share->state.state.data_file_length) ||
5068 (block_info.prev_filepos != HA_OFFSET_ERROR &&
5069 block_info.prev_filepos >=
5070 share->state.state.data_file_length))
5071 {
5072 if (!searching)
5073 _ma_check_print_info(param,
5074 "Delete link points outside datafile at "
5075 "%s",
5076 llstr(pos,llbuff));
5077 error=1;
5078 }
5079 }
5080 if (error)
5081 {
5082 if (found_record)
5083 goto try_next;
5084 searching=1;
5085 pos+= MARIA_DYN_ALIGN_SIZE;
5086 sort_param->start_recpos=pos;
5087 block_info.second_read=0;
5088 continue;
5089 }
5090 }
5091 else
5092 {
5093 if (block_info.block_len+ (uint) (block_info.filepos-pos) <
5094 share->base.min_block_length ||
5095 block_info.block_len > (uint) share->base.max_pack_length+
5096 MARIA_SPLIT_LENGTH)
5097 {
5098 if (!searching)
5099 _ma_check_print_info(param,
5100 "Found block with impossible length %lu "
5101 "at %s; Skipped",
5102 block_info.block_len+
5103 (uint) (block_info.filepos-pos),
5104 llstr(pos,llbuff));
5105 if (found_record)
5106 goto try_next;
5107 searching=1;
5108 pos+= MARIA_DYN_ALIGN_SIZE;
5109 sort_param->start_recpos=pos;
5110 block_info.second_read=0;
5111 continue;
5112 }
5113 }
5114 if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
5115 {
5116 if (!sort_param->fix_datafile && sort_param->master &&
5117 (b_type & BLOCK_DELETED))
5118 {
5119 share->state.state.empty+=block_info.block_len;
5120 share->state.state.del++;
5121 share->state.split++;
5122 }
5123 if (found_record)
5124 goto try_next;
5125 if (searching)
5126 {
5127 pos+=MARIA_DYN_ALIGN_SIZE;
5128 sort_param->start_recpos=pos;
5129 }
5130 else
5131 pos=block_info.filepos+block_info.block_len;
5132 block_info.second_read=0;
5133 continue;
5134 }
5135
5136 if (!sort_param->fix_datafile && sort_param->master)
5137 share->state.split++;
5138 if (! found_record++)
5139 {
5140 sort_param->find_length=left_length=block_info.rec_len;
5141 sort_param->start_recpos=pos;
5142 if (!sort_param->fix_datafile)
5143 sort_param->current_filepos= sort_param->start_recpos;
5144 if (sort_param->fix_datafile && (param->testflag & T_EXTEND))
5145 sort_param->pos=block_info.filepos+1;
5146 else
5147 sort_param->pos=block_info.filepos+block_info.block_len;
5148 if (share->base.blobs)
5149 {
5150 if (_ma_alloc_buffer(&sort_param->rec_buff,
5151 &sort_param->rec_buff_size,
5152 block_info.rec_len +
5153 share->base.extra_rec_buff_size))
5154
5155 {
5156 if (param->max_record_length >= block_info.rec_len)
5157 {
5158 _ma_check_print_error(param,"Not enough memory for blob at %s "
5159 "(need %lu)",
5160 llstr(sort_param->start_recpos,llbuff),
5161 (ulong) block_info.rec_len);
5162 DBUG_RETURN(1);
5163 }
5164 else
5165 {
5166 _ma_check_print_info(param,"Not enough memory for blob at %s "
5167 "(need %lu); Row skipped",
5168 llstr(sort_param->start_recpos,llbuff),
5169 (ulong) block_info.rec_len);
5170 goto try_next;
5171 }
5172 }
5173 }
5174 to= sort_param->rec_buff;
5175 }
5176 if (left_length < block_info.data_len || ! block_info.data_len)
5177 {
5178 _ma_check_print_info(param,
5179 "Found block with too small length at %s; "
5180 "Skipped",
5181 llstr(sort_param->start_recpos,llbuff));
5182 goto try_next;
5183 }
5184 if (block_info.filepos + block_info.data_len >
5185 sort_param->read_cache.end_of_file)
5186 {
5187 _ma_check_print_info(param,
5188 "Found block that points outside data file "
5189 "at %s",
5190 llstr(sort_param->start_recpos,llbuff));
5191 goto try_next;
5192 }
5193 /*
5194 Copy information that is already read. Avoid accessing data
5195 below the cache start. This could happen if the header
5196 streched over the end of the previous buffer contents.
5197 */
5198 {
5199 uint header_len= (uint) (block_info.filepos - pos);
5200 uint prefetch_len= (MARIA_BLOCK_INFO_HEADER_LENGTH - header_len);
5201
5202 if (prefetch_len > block_info.data_len)
5203 prefetch_len= block_info.data_len;
5204 if (prefetch_len)
5205 {
5206 memcpy(to, block_info.header + header_len, prefetch_len);
5207 block_info.filepos+= prefetch_len;
5208 block_info.data_len-= prefetch_len;
5209 left_length-= prefetch_len;
5210 to+= prefetch_len;
5211 }
5212 }
5213 if (block_info.data_len &&
5214 _ma_read_cache(info, &sort_param->read_cache,to,block_info.filepos,
5215 block_info.data_len,
5216 (found_record == 1 ? READING_NEXT : 0) |
5217 parallel_flag))
5218 {
5219 _ma_check_print_info(param,
5220 "Read error for block at: %s (error: %d); "
5221 "Skipped",
5222 llstr(block_info.filepos,llbuff),my_errno);
5223 goto try_next;
5224 }
5225 left_length-=block_info.data_len;
5226 to+=block_info.data_len;
5227 pos=block_info.next_filepos;
5228 if (pos == HA_OFFSET_ERROR && left_length)
5229 {
5230 _ma_check_print_info(param,
5231 "Wrong block with wrong total length "
5232 "starting at %s",
5233 llstr(sort_param->start_recpos,llbuff));
5234 goto try_next;
5235 }
5236 if (pos + MARIA_BLOCK_INFO_HEADER_LENGTH >
5237 sort_param->read_cache.end_of_file)
5238 {
5239 _ma_check_print_info(param,
5240 "Found link that points at %s (outside data "
5241 "file) at %s",
5242 llstr(pos,llbuff2),
5243 llstr(sort_param->start_recpos,llbuff));
5244 goto try_next;
5245 }
5246 } while (left_length);
5247
5248 if (_ma_rec_unpack(info,sort_param->record,sort_param->rec_buff,
5249 sort_param->find_length) != MY_FILE_ERROR)
5250 {
5251 if (sort_param->read_cache.error < 0)
5252 DBUG_RETURN(1);
5253 if (sort_param->calc_checksum)
5254 checksum= (share->calc_check_checksum)(info, sort_param->record);
5255 if ((param->testflag & (T_EXTEND | T_REP)) || searching)
5256 {
5257 if (_ma_rec_check(info, sort_param->record, sort_param->rec_buff,
5258 sort_param->find_length,
5259 (param->testflag & T_QUICK) &&
5260 sort_param->calc_checksum &&
5261 MY_TEST(share->calc_checksum), checksum))
5262 {
5263 _ma_check_print_info(param,"Found wrong packed record at %s",
5264 llstr(sort_param->start_recpos,llbuff));
5265 goto try_next;
5266 }
5267 }
5268 if (sort_param->calc_checksum)
5269 param->glob_crc+= checksum;
5270 DBUG_RETURN(0);
5271 }
5272 if (!searching)
5273 _ma_check_print_info(param,"Key %d - Found wrong stored record at %s",
5274 sort_param->key+1,
5275 llstr(sort_param->start_recpos,llbuff));
5276 try_next:
5277 pos=(sort_param->start_recpos+=MARIA_DYN_ALIGN_SIZE);
5278 searching=1;
5279 }
5280 }
5281 case COMPRESSED_RECORD:
5282 param->progress= sort_param->pos;
5283 for (searching=0 ;; searching=1, sort_param->pos++)
5284 {
5285 if (_ma_read_cache(info, &sort_param->read_cache, block_info.header,
5286 sort_param->pos,
5287 share->pack.ref_length,READING_NEXT))
5288 DBUG_RETURN(-1);
5289 if (searching && ! sort_param->fix_datafile)
5290 {
5291 param->error_printed=1;
5292 param->retry_repair=1;
5293 param->testflag|=T_RETRY_WITHOUT_QUICK;
5294 my_errno= HA_ERR_WRONG_IN_RECORD;
5295 DBUG_RETURN(1); /* Something wrong with data */
5296 }
5297 sort_param->start_recpos=sort_param->pos;
5298 if (_ma_pack_get_block_info(info, &sort_param->bit_buff, &block_info,
5299 &sort_param->rec_buff,
5300 &sort_param->rec_buff_size, -1,
5301 sort_param->pos))
5302 DBUG_RETURN(-1);
5303 if (!block_info.rec_len &&
5304 sort_param->pos + MEMMAP_EXTRA_MARGIN ==
5305 sort_param->read_cache.end_of_file)
5306 DBUG_RETURN(-1);
5307 if (block_info.rec_len < (uint) share->min_pack_length ||
5308 block_info.rec_len > (uint) share->max_pack_length)
5309 {
5310 if (! searching)
5311 _ma_check_print_info(param,
5312 "Found block with wrong recordlength: %lu "
5313 "at %s\n",
5314 block_info.rec_len,
5315 llstr(sort_param->pos,llbuff));
5316 continue;
5317 }
5318 if (_ma_read_cache(info, &sort_param->read_cache, sort_param->rec_buff,
5319 block_info.filepos, block_info.rec_len,
5320 READING_NEXT))
5321 {
5322 if (! searching)
5323 _ma_check_print_info(param,"Couldn't read whole record from %s",
5324 llstr(sort_param->pos,llbuff));
5325 continue;
5326 }
5327 #ifdef HAVE_valgrind
5328 bzero(sort_param->rec_buff + block_info.rec_len,
5329 share->base.extra_rec_buff_size);
5330 #endif
5331 if (_ma_pack_rec_unpack(info, &sort_param->bit_buff, sort_param->record,
5332 sort_param->rec_buff, block_info.rec_len))
5333 {
5334 if (! searching)
5335 _ma_check_print_info(param,"Found wrong record at %s",
5336 llstr(sort_param->pos,llbuff));
5337 continue;
5338 }
5339 if (!sort_param->fix_datafile)
5340 {
5341 sort_param->current_filepos= sort_param->pos;
5342 if (sort_param->master)
5343 share->state.split++;
5344 }
5345 sort_param->max_pos= (sort_param->pos=block_info.filepos+
5346 block_info.rec_len);
5347 info->packed_length=block_info.rec_len;
5348
5349 if (sort_param->calc_checksum)
5350 {
5351 info->cur_row.checksum= (*share->calc_check_checksum)(info,
5352 sort_param->
5353 record);
5354 param->glob_crc+= info->cur_row.checksum;
5355 }
5356 DBUG_RETURN(0);
5357 }
5358 case NO_RECORD:
5359 DBUG_RETURN(1); /* Impossible */
5360 }
5361 DBUG_RETURN(1); /* Impossible */
5362 }
5363
5364
5365 /**
5366 @brief Write record to new file.
5367
5368 @fn _ma_sort_write_record()
5369 @param sort_param Sort parameters.
5370
5371 @note
5372 This is only called by a master thread if parallel repair is used.
5373
5374 @return
5375 @retval 0 OK
5376 sort_param->current_filepos points to inserted record for
5377 block_records and to the place for the next record for
5378 other row types.
5379 sort_param->filepos points to end of file
5380 @retval 1 Error
5381 */
5382
_ma_sort_write_record(MARIA_SORT_PARAM * sort_param)5383 int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param)
5384 {
5385 int flag;
5386 uint length;
5387 ulong block_length,reclength;
5388 uchar *from;
5389 uchar block_buff[8];
5390 MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5391 HA_CHECK *param= sort_info->param;
5392 MARIA_HA *info= sort_info->new_info;
5393 MARIA_SHARE *share= info->s;
5394 DBUG_ENTER("_ma_sort_write_record");
5395
5396 if (sort_param->fix_datafile)
5397 {
5398 sort_param->current_filepos= sort_param->filepos;
5399 switch (sort_info->new_data_file_type) {
5400 case BLOCK_RECORD:
5401 if ((sort_param->current_filepos=
5402 (*share->write_record_init)(info, sort_param->record)) ==
5403 HA_OFFSET_ERROR)
5404 {
5405 _ma_check_print_error(param, "%d when writing to datafile", my_errno);
5406 DBUG_RETURN(1);
5407 }
5408 /* Pointer to end of file */
5409 sort_param->filepos= share->state.state.data_file_length;
5410 break;
5411 case STATIC_RECORD:
5412 if (my_b_write(&info->rec_cache,sort_param->record,
5413 share->base.pack_reclength))
5414 {
5415 _ma_check_print_error(param,"%d when writing to datafile",my_errno);
5416 DBUG_RETURN(1);
5417 }
5418 sort_param->filepos+=share->base.pack_reclength;
5419 share->state.split++;
5420 break;
5421 case DYNAMIC_RECORD:
5422 if (! info->blobs)
5423 from=sort_param->rec_buff;
5424 else
5425 {
5426 /* must be sure that local buffer is big enough */
5427 reclength=share->base.pack_reclength+
5428 _ma_calc_total_blob_length(info,sort_param->record)+
5429 ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+
5430 MARIA_DYN_DELETE_BLOCK_HEADER;
5431 if (sort_info->buff_length < reclength)
5432 {
5433 if (!(sort_info->buff=my_realloc(sort_info->buff, (uint) reclength,
5434 MYF(MY_FREE_ON_ERROR |
5435 MY_ALLOW_ZERO_PTR))))
5436 DBUG_RETURN(1);
5437 sort_info->buff_length=reclength;
5438 }
5439 from= (uchar *) sort_info->buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER);
5440 }
5441 /* We can use info->checksum here as only one thread calls this */
5442 info->cur_row.checksum= (*share->calc_check_checksum)(info,
5443 sort_param->
5444 record);
5445 if (!(reclength= _ma_rec_pack(info,from,sort_param->record)))
5446 {
5447 _ma_check_print_error(param,"Got error %d when packing record",
5448 my_errno);
5449 DBUG_RETURN(1);
5450 }
5451 flag=0;
5452
5453 do
5454 {
5455 block_length= reclength + 3 + MY_TEST(reclength >= (65520 - 3));
5456 if (block_length < share->base.min_block_length)
5457 block_length=share->base.min_block_length;
5458 info->update|=HA_STATE_WRITE_AT_END;
5459 block_length=MY_ALIGN(block_length,MARIA_DYN_ALIGN_SIZE);
5460 if (block_length > MARIA_MAX_BLOCK_LENGTH)
5461 block_length=MARIA_MAX_BLOCK_LENGTH;
5462 if (_ma_write_part_record(info,0L,block_length,
5463 sort_param->filepos+block_length,
5464 &from,&reclength,&flag))
5465 {
5466 _ma_check_print_error(param,"%d when writing to datafile",my_errno);
5467 DBUG_RETURN(1);
5468 }
5469 sort_param->filepos+=block_length;
5470 share->state.split++;
5471 } while (reclength);
5472 break;
5473 case COMPRESSED_RECORD:
5474 reclength=info->packed_length;
5475 length= _ma_save_pack_length((uint) share->pack.version, block_buff,
5476 reclength);
5477 if (share->base.blobs)
5478 length+= _ma_save_pack_length((uint) share->pack.version,
5479 block_buff + length, info->blob_length);
5480 if (my_b_write(&info->rec_cache,block_buff,length) ||
5481 my_b_write(&info->rec_cache, sort_param->rec_buff, reclength))
5482 {
5483 _ma_check_print_error(param,"%d when writing to datafile",my_errno);
5484 DBUG_RETURN(1);
5485 }
5486 sort_param->filepos+=reclength+length;
5487 share->state.split++;
5488 break;
5489 case NO_RECORD:
5490 DBUG_RETURN(1); /* Impossible */
5491 }
5492 }
5493 if (sort_param->master)
5494 {
5495 share->state.state.records++;
5496 if ((param->testflag & T_WRITE_LOOP) &&
5497 (share->state.state.records % WRITE_COUNT) == 0)
5498 {
5499 char llbuff[22];
5500 printf("%s\r", llstr(share->state.state.records,llbuff));
5501 fflush(stdout);
5502 }
5503 }
5504 DBUG_RETURN(0);
5505 } /* _ma_sort_write_record */
5506
5507
5508 /* Compare two keys from _ma_create_index_by_sort */
5509
sort_key_cmp(MARIA_SORT_PARAM * sort_param,const void * a,const void * b)5510 static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,
5511 const void *b)
5512 {
5513 uint not_used[2];
5514 return (ha_key_cmp(sort_param->seg, *((uchar* const *) a),
5515 *((uchar* const *) b),
5516 USE_WHOLE_KEY, SEARCH_SAME, not_used));
5517 } /* sort_key_cmp */
5518
5519
sort_key_write(MARIA_SORT_PARAM * sort_param,const uchar * a)5520 static int sort_key_write(MARIA_SORT_PARAM *sort_param, const uchar *a)
5521 {
5522 uint diff_pos[2];
5523 char llbuff[22],llbuff2[22];
5524 MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5525 HA_CHECK *param= sort_info->param;
5526 int cmp;
5527
5528 if (sort_info->key_block->inited)
5529 {
5530 cmp= ha_key_cmp(sort_param->seg, sort_info->key_block->lastkey,
5531 a, USE_WHOLE_KEY,
5532 SEARCH_FIND | SEARCH_UPDATE | SEARCH_INSERT,
5533 diff_pos);
5534 if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
5535 ha_key_cmp(sort_param->seg, sort_info->key_block->lastkey,
5536 a, USE_WHOLE_KEY,
5537 SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos);
5538 else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
5539 {
5540 diff_pos[0]= maria_collect_stats_nonulls_next(sort_param->seg,
5541 sort_param->notnull,
5542 sort_info->key_block->lastkey,
5543 a);
5544 }
5545 sort_param->unique[diff_pos[0]-1]++;
5546 }
5547 else
5548 {
5549 cmp= -1;
5550 if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
5551 maria_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull,
5552 a);
5553 }
5554 if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0)
5555 {
5556 DBUG_EXECUTE("key", _ma_print_keydata(DBUG_FILE, sort_param->seg, a,
5557 USE_WHOLE_KEY););
5558 sort_info->dupp++;
5559 sort_info->info->cur_row.lastpos= get_record_for_key(sort_param->keyinfo,
5560 a);
5561 if ((param->testflag & (T_CREATE_UNIQUE_BY_SORT | T_SUPPRESS_ERR_HANDLING))
5562 == T_CREATE_UNIQUE_BY_SORT)
5563 param->testflag|= T_SUPPRESS_ERR_HANDLING;
5564 _ma_check_print_warning(param,
5565 "Duplicate key %2u for record at %10s against "
5566 "record at %10s",
5567 sort_param->key + 1,
5568 llstr(sort_info->info->cur_row.lastpos, llbuff),
5569 llstr(get_record_for_key(sort_param->keyinfo,
5570 sort_info->key_block->
5571 lastkey),
5572 llbuff2));
5573 param->testflag|=T_RETRY_WITHOUT_QUICK;
5574 if (sort_info->param->testflag & T_VERBOSE)
5575 _ma_print_keydata(stdout,sort_param->seg, a, USE_WHOLE_KEY);
5576 return (sort_delete_record(sort_param));
5577 }
5578 #ifndef DBUG_OFF
5579 if (cmp > 0)
5580 {
5581 _ma_check_print_error(param,
5582 "Internal error: Keys are not in order from sort");
5583 return(1);
5584 }
5585 #endif
5586 return (sort_insert_key(sort_param, sort_info->key_block,
5587 a, HA_OFFSET_ERROR));
5588 } /* sort_key_write */
5589
5590
_ma_sort_ft_buf_flush(MARIA_SORT_PARAM * sort_param)5591 int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param)
5592 {
5593 MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5594 SORT_KEY_BLOCKS *key_block=sort_info->key_block;
5595 MARIA_SHARE *share=sort_info->info->s;
5596 uint val_off, val_len;
5597 int error;
5598 SORT_FT_BUF *maria_ft_buf=sort_info->ft_buf;
5599 uchar *from, *to;
5600
5601 val_len=share->ft2_keyinfo.keylength;
5602 get_key_full_length_rdonly(val_off, maria_ft_buf->lastkey);
5603 to= maria_ft_buf->lastkey+val_off;
5604
5605 if (maria_ft_buf->buf)
5606 {
5607 /* flushing first-level tree */
5608 error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey,
5609 HA_OFFSET_ERROR);
5610 for (from=to+val_len;
5611 !error && from < maria_ft_buf->buf;
5612 from+= val_len)
5613 {
5614 memcpy(to, from, val_len);
5615 error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey,
5616 HA_OFFSET_ERROR);
5617 }
5618 return error;
5619 }
5620 /* flushing second-level tree keyblocks */
5621 error=_ma_flush_pending_blocks(sort_param);
5622 /* updating lastkey with second-level tree info */
5623 ft_intXstore(maria_ft_buf->lastkey+val_off, -maria_ft_buf->count);
5624 _ma_dpointer(sort_info->info->s, maria_ft_buf->lastkey+val_off+HA_FT_WLEN,
5625 share->state.key_root[sort_param->key]);
5626 /* restoring first level tree data in sort_info/sort_param */
5627 sort_info->key_block=sort_info->key_block_end- sort_info->param->sort_key_blocks;
5628 sort_param->keyinfo=share->keyinfo+sort_param->key;
5629 share->state.key_root[sort_param->key]=HA_OFFSET_ERROR;
5630 /* writing lastkey in first-level tree */
5631 return error ? error :
5632 sort_insert_key(sort_param,sort_info->key_block,
5633 maria_ft_buf->lastkey,HA_OFFSET_ERROR);
5634 }
5635
5636
sort_maria_ft_key_write(MARIA_SORT_PARAM * sort_param,const uchar * a)5637 static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
5638 const uchar *a)
5639 {
5640 uint a_len, val_off, val_len, error;
5641 MARIA_SORT_INFO *sort_info= sort_param->sort_info;
5642 SORT_FT_BUF *ft_buf= sort_info->ft_buf;
5643 SORT_KEY_BLOCKS *key_block= sort_info->key_block;
5644 MARIA_SHARE *share= sort_info->info->s;
5645
5646 val_len=HA_FT_WLEN+share->rec_reflength;
5647 get_key_full_length_rdonly(a_len, a);
5648
5649 if (!ft_buf)
5650 {
5651 /*
5652 use two-level tree only if key_reflength fits in rec_reflength place
5653 and row format is NOT static - for _ma_dpointer not to garble offsets
5654 */
5655 if ((share->base.key_reflength <=
5656 share->rec_reflength) &&
5657 (share->options &
5658 (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)))
5659 ft_buf= (SORT_FT_BUF *)my_malloc(sort_param->keyinfo->block_length +
5660 sizeof(SORT_FT_BUF), MYF(MY_WME));
5661
5662 if (!ft_buf)
5663 {
5664 sort_param->key_write=sort_key_write;
5665 return sort_key_write(sort_param, a);
5666 }
5667 sort_info->ft_buf= ft_buf;
5668 goto word_init_ft_buf; /* no need to duplicate the code */
5669 }
5670 get_key_full_length_rdonly(val_off, ft_buf->lastkey);
5671
5672 if (ha_compare_text(sort_param->seg->charset,
5673 a+1,a_len-1,
5674 ft_buf->lastkey+1,val_off-1, 0)==0)
5675 {
5676 uchar *p;
5677 if (!ft_buf->buf) /* store in second-level tree */
5678 {
5679 ft_buf->count++;
5680 return sort_insert_key(sort_param,key_block,
5681 a + a_len, HA_OFFSET_ERROR);
5682 }
5683
5684 /* storing the key in the buffer. */
5685 memcpy (ft_buf->buf, (const char *)a+a_len, val_len);
5686 ft_buf->buf+=val_len;
5687 if (ft_buf->buf < ft_buf->end)
5688 return 0;
5689
5690 /* converting to two-level tree */
5691 p=ft_buf->lastkey+val_off;
5692
5693 while (key_block->inited)
5694 key_block++;
5695 sort_info->key_block=key_block;
5696 sort_param->keyinfo= &share->ft2_keyinfo;
5697 ft_buf->count=(uint)(ft_buf->buf - p)/val_len;
5698
5699 /* flushing buffer to second-level tree */
5700 for (error=0; !error && p < ft_buf->buf; p+= val_len)
5701 error=sort_insert_key(sort_param,key_block,p,HA_OFFSET_ERROR);
5702 ft_buf->buf=0;
5703 return error;
5704 }
5705
5706 /* flushing buffer */
5707 if ((error=_ma_sort_ft_buf_flush(sort_param)))
5708 return error;
5709
5710 word_init_ft_buf:
5711 a_len+=val_len;
5712 memcpy(ft_buf->lastkey, a, a_len);
5713 ft_buf->buf=ft_buf->lastkey+a_len;
5714 /*
5715 32 is just a safety margin here
5716 (at least MY_MAX(val_len, sizeof(nod_flag)) should be there).
5717 May be better performance could be achieved if we'd put
5718 (sort_info->keyinfo->block_length-32)/XXX
5719 instead.
5720 TODO: benchmark the best value for XXX.
5721 */
5722 ft_buf->end= ft_buf->lastkey+ (sort_param->keyinfo->block_length-32);
5723 return 0;
5724 } /* sort_maria_ft_key_write */
5725
5726
5727 /* get pointer to record from a key */
5728
get_record_for_key(MARIA_KEYDEF * keyinfo,const uchar * key_data)5729 static my_off_t get_record_for_key(MARIA_KEYDEF *keyinfo,
5730 const uchar *key_data)
5731 {
5732 MARIA_KEY key;
5733 key.keyinfo= keyinfo;
5734 key.data= (uchar*) key_data;
5735 key.data_length= (_ma_keylength(keyinfo, key_data) -
5736 keyinfo->share->rec_reflength);
5737 return _ma_row_pos_from_key(&key);
5738 } /* get_record_for_key */
5739
5740
5741 /* Insert a key in sort-key-blocks */
5742
sort_insert_key(MARIA_SORT_PARAM * sort_param,register SORT_KEY_BLOCKS * key_block,const uchar * key,my_off_t prev_block)5743 static int sort_insert_key(MARIA_SORT_PARAM *sort_param,
5744 register SORT_KEY_BLOCKS *key_block,
5745 const uchar *key,
5746 my_off_t prev_block)
5747 {
5748 uint a_length,t_length,nod_flag;
5749 my_off_t filepos;
5750 uchar *anc_buff,*lastkey;
5751 MARIA_KEY_PARAM s_temp;
5752 MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
5753 MARIA_SORT_INFO *sort_info= sort_param->sort_info;
5754 HA_CHECK *param=sort_info->param;
5755 MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
5756 MARIA_KEY tmp_key;
5757 MARIA_HA *info= sort_info->info;
5758 MARIA_SHARE *share= info->s;
5759 DBUG_ENTER("sort_insert_key");
5760
5761 anc_buff= key_block->buff;
5762 lastkey=key_block->lastkey;
5763 nod_flag= (key_block == sort_info->key_block ? 0 :
5764 share->base.key_reflength);
5765
5766 if (!key_block->inited)
5767 {
5768 key_block->inited=1;
5769 if (key_block == sort_info->key_block_end)
5770 {
5771 _ma_check_print_error(param,
5772 "To many key-block-levels; "
5773 "Try increasing sort_key_blocks");
5774 DBUG_RETURN(1);
5775 }
5776 a_length= share->keypage_header + nod_flag;
5777 key_block->end_pos= anc_buff + share->keypage_header;
5778 bzero(anc_buff, share->keypage_header);
5779 _ma_store_keynr(share, anc_buff, sort_param->keyinfo->key_nr);
5780 lastkey=0; /* No previous key in block */
5781 }
5782 else
5783 a_length= _ma_get_page_used(share, anc_buff);
5784
5785 /* Save pointer to previous block */
5786 if (nod_flag)
5787 {
5788 _ma_store_keypage_flag(share, anc_buff, KEYPAGE_FLAG_ISNOD);
5789 _ma_kpointer(info,key_block->end_pos,prev_block);
5790 }
5791
5792 tmp_key.keyinfo= keyinfo;
5793 tmp_key.data= (uchar*) key;
5794 tmp_key.data_length= _ma_keylength(keyinfo, key) - share->rec_reflength;
5795 tmp_key.ref_length= share->rec_reflength;
5796
5797 t_length= (*keyinfo->pack_key)(&tmp_key, nod_flag,
5798 (uchar*) 0, lastkey, lastkey, &s_temp);
5799 (*keyinfo->store_key)(keyinfo, key_block->end_pos+nod_flag,&s_temp);
5800 a_length+=t_length;
5801 _ma_store_page_used(share, anc_buff, a_length);
5802 key_block->end_pos+=t_length;
5803 if (a_length <= share->max_index_block_size)
5804 {
5805 MARIA_KEY tmp_key2;
5806 tmp_key2.data= key_block->lastkey;
5807 _ma_copy_key(&tmp_key2, &tmp_key);
5808 key_block->last_length=a_length-t_length;
5809 DBUG_RETURN(0);
5810 }
5811
5812 /* Fill block with end-zero and write filled block */
5813 _ma_store_page_used(share, anc_buff, key_block->last_length);
5814 bzero(anc_buff+key_block->last_length,
5815 keyinfo->block_length- key_block->last_length);
5816 if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) == HA_OFFSET_ERROR)
5817 DBUG_RETURN(1);
5818 _ma_fast_unlock_key_del(info);
5819
5820 /* If we read the page from the key cache, we have to write it back to it */
5821 if (page_link->changed)
5822 {
5823 MARIA_PAGE page;
5824 pop_dynamic(&info->pinned_pages);
5825 _ma_page_setup(&page, info, keyinfo, filepos, anc_buff);
5826 if (_ma_write_keypage(&page, PAGECACHE_LOCK_WRITE_UNLOCK, DFLT_INIT_HITS))
5827 DBUG_RETURN(1);
5828 }
5829 else
5830 {
5831 if (write_page(share, share->kfile.file, anc_buff,
5832 keyinfo->block_length, filepos, param->myf_rw))
5833 DBUG_RETURN(1);
5834 }
5835 DBUG_DUMP("buff", anc_buff, _ma_get_page_used(share, anc_buff));
5836
5837 /* Write separator-key to block in next level */
5838 if (sort_insert_key(sort_param,key_block+1,key_block->lastkey,filepos))
5839 DBUG_RETURN(1);
5840
5841 /* clear old block and write new key in it */
5842 key_block->inited=0;
5843 DBUG_RETURN(sort_insert_key(sort_param, key_block,key,prev_block));
5844 } /* sort_insert_key */
5845
5846
5847 /* Delete record when we found a duplicated key */
5848
sort_delete_record(MARIA_SORT_PARAM * sort_param)5849 static int sort_delete_record(MARIA_SORT_PARAM *sort_param)
5850 {
5851 uint i;
5852 int old_file,error;
5853 uchar *key;
5854 MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5855 HA_CHECK *param=sort_info->param;
5856 MARIA_HA *row_info= sort_info->new_info, *key_info= sort_info->info;
5857 DBUG_ENTER("sort_delete_record");
5858
5859 if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
5860 {
5861 _ma_check_print_error(param,
5862 "Quick-recover aborted; Run recovery without switch "
5863 "-q or with switch -qq");
5864 DBUG_RETURN(1);
5865 }
5866 if (key_info->s->options & HA_OPTION_COMPRESS_RECORD)
5867 {
5868 _ma_check_print_error(param,
5869 "Recover aborted; Can't run standard recovery on "
5870 "compressed tables with errors in data-file. "
5871 "Use 'aria_chk --safe-recover' to fix it");
5872 DBUG_RETURN(1);
5873 }
5874
5875 old_file= row_info->dfile.file;
5876 /* This only affects static and dynamic row formats */
5877 row_info->dfile.file= row_info->rec_cache.file;
5878 if (flush_io_cache(&row_info->rec_cache))
5879 DBUG_RETURN(1);
5880
5881 key= key_info->lastkey_buff + key_info->s->base.max_key_length;
5882 if ((error=(*row_info->s->read_record)(row_info, sort_param->record,
5883 key_info->cur_row.lastpos)) &&
5884 error != HA_ERR_RECORD_DELETED)
5885 {
5886 _ma_check_print_error(param,"Can't read record to be removed");
5887 row_info->dfile.file= old_file;
5888 DBUG_RETURN(1);
5889 }
5890 row_info->cur_row.lastpos= key_info->cur_row.lastpos;
5891
5892 for (i=0 ; i < sort_info->current_key ; i++)
5893 {
5894 MARIA_KEY tmp_key;
5895 (*key_info->s->keyinfo[i].make_key)(key_info, &tmp_key, i, key,
5896 sort_param->record,
5897 key_info->cur_row.lastpos, 0);
5898 if (_ma_ck_delete(key_info, &tmp_key))
5899 {
5900 _ma_check_print_error(param,
5901 "Can't delete key %d from record to be removed",
5902 i+1);
5903 row_info->dfile.file= old_file;
5904 DBUG_RETURN(1);
5905 }
5906 }
5907 if (sort_param->calc_checksum)
5908 param->glob_crc-=(*key_info->s->calc_check_checksum)(key_info,
5909 sort_param->record);
5910 error= (*row_info->s->delete_record)(row_info, sort_param->record);
5911 if (error)
5912 _ma_check_print_error(param,"Got error %d when deleting record",
5913 my_errno);
5914 row_info->dfile.file= old_file; /* restore actual value */
5915 row_info->s->state.state.records--;
5916 DBUG_RETURN(error);
5917 } /* sort_delete_record */
5918
5919
5920 /* Fix all pending blocks and flush everything to disk */
5921
_ma_flush_pending_blocks(MARIA_SORT_PARAM * sort_param)5922 int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param)
5923 {
5924 uint nod_flag,length;
5925 my_off_t filepos;
5926 SORT_KEY_BLOCKS *key_block;
5927 MARIA_SORT_INFO *sort_info= sort_param->sort_info;
5928 myf myf_rw=sort_info->param->myf_rw;
5929 MARIA_HA *info=sort_info->info;
5930 MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
5931 MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
5932 DBUG_ENTER("_ma_flush_pending_blocks");
5933
5934 filepos= HA_OFFSET_ERROR; /* if empty file */
5935 nod_flag=0;
5936 for (key_block=sort_info->key_block ; key_block->inited ; key_block++)
5937 {
5938 key_block->inited=0;
5939 length= _ma_get_page_used(info->s, key_block->buff);
5940 if (nod_flag)
5941 _ma_kpointer(info,key_block->end_pos,filepos);
5942 bzero(key_block->buff+length, keyinfo->block_length-length);
5943 if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) ==
5944 HA_OFFSET_ERROR)
5945 goto err;
5946
5947 /* If we read the page from the key cache, we have to write it back */
5948 if (page_link->changed)
5949 {
5950 MARIA_PAGE page;
5951 pop_dynamic(&info->pinned_pages);
5952
5953 _ma_page_setup(&page, info, keyinfo, filepos, key_block->buff);
5954 if (_ma_write_keypage(&page, PAGECACHE_LOCK_WRITE_UNLOCK,
5955 DFLT_INIT_HITS))
5956 goto err;
5957 }
5958 else
5959 {
5960 if (write_page(info->s, info->s->kfile.file, key_block->buff,
5961 keyinfo->block_length, filepos, myf_rw))
5962 goto err;
5963 }
5964 DBUG_DUMP("buff",key_block->buff,length);
5965 nod_flag=1;
5966 }
5967 info->s->state.key_root[sort_param->key]=filepos; /* Last is root for tree */
5968 _ma_fast_unlock_key_del(info);
5969 DBUG_RETURN(0);
5970
5971 err:
5972 _ma_fast_unlock_key_del(info);
5973 DBUG_RETURN(1);
5974 } /* _ma_flush_pending_blocks */
5975
5976 /* alloc space and pointers for key_blocks */
5977
alloc_key_blocks(HA_CHECK * param,uint blocks,uint buffer_length)5978 static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
5979 uint buffer_length)
5980 {
5981 reg1 uint i;
5982 SORT_KEY_BLOCKS *block;
5983 DBUG_ENTER("alloc_key_blocks");
5984
5985 if (!(block= (SORT_KEY_BLOCKS*) my_malloc((sizeof(SORT_KEY_BLOCKS)+
5986 buffer_length+IO_SIZE)*blocks,
5987 MYF(0))))
5988 {
5989 _ma_check_print_error(param,"Not enough memory for sort-key-blocks");
5990 return(0);
5991 }
5992 for (i=0 ; i < blocks ; i++)
5993 {
5994 block[i].inited=0;
5995 block[i].buff= (uchar*) (block+blocks)+(buffer_length+IO_SIZE)*i;
5996 }
5997 DBUG_RETURN(block);
5998 } /* alloc_key_blocks */
5999
6000
6001 /* Check if file is almost full */
6002
maria_test_if_almost_full(MARIA_HA * info)6003 int maria_test_if_almost_full(MARIA_HA *info)
6004 {
6005 MARIA_SHARE *share= info->s;
6006
6007 if (share->options & HA_OPTION_COMPRESS_RECORD)
6008 return 0;
6009 return mysql_file_seek(share->kfile.file, 0L, MY_SEEK_END,
6010 MYF(MY_THREADSAFE))/10*9 >
6011 (my_off_t) share->base.max_key_file_length ||
6012 mysql_file_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0)) / 10 * 9 >
6013 (my_off_t) share->base.max_data_file_length;
6014 }
6015
6016
6017 /* Recreate table with bigger more alloced record-data */
6018
maria_recreate_table(HA_CHECK * param,MARIA_HA ** org_info,char * filename)6019 int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename)
6020 {
6021 int error;
6022 MARIA_HA info;
6023 MARIA_SHARE share;
6024 MARIA_KEYDEF *keyinfo,*key,*key_end;
6025 HA_KEYSEG *keysegs,*keyseg;
6026 MARIA_COLUMNDEF *columndef,*column,*end;
6027 MARIA_UNIQUEDEF *uniquedef,*u_ptr,*u_end;
6028 MARIA_STATUS_INFO status_info;
6029 uint unpack,key_parts;
6030 ha_rows max_records;
6031 ulonglong file_length,tmp_length;
6032 MARIA_CREATE_INFO create_info;
6033 DBUG_ENTER("maria_recreate_table");
6034
6035 if ((!(param->testflag & T_SILENT)))
6036 printf("Recreating table '%s'\n", param->isam_file_name);
6037
6038 error=1; /* Default error */
6039 info= **org_info;
6040 status_info= (*org_info)->state[0];
6041 info.state= &status_info;
6042 share= *(*org_info)->s;
6043 unpack= ((share.data_file_type == COMPRESSED_RECORD) &&
6044 (param->testflag & T_UNPACK));
6045 if (!(keyinfo=(MARIA_KEYDEF*) my_alloca(sizeof(MARIA_KEYDEF) *
6046 share.base.keys)))
6047 DBUG_RETURN(0);
6048 memcpy((uchar*) keyinfo,(uchar*) share.keyinfo,
6049 (size_t) (sizeof(MARIA_KEYDEF)*share.base.keys));
6050
6051 key_parts= share.base.all_key_parts;
6052 if (!(keysegs=(HA_KEYSEG*) my_alloca(sizeof(HA_KEYSEG)*
6053 (key_parts+share.base.keys))))
6054 {
6055 my_afree(keyinfo);
6056 DBUG_RETURN(1);
6057 }
6058 if (!(columndef=(MARIA_COLUMNDEF*)
6059 my_alloca(sizeof(MARIA_COLUMNDEF)*(share.base.fields+1))))
6060 {
6061 my_afree(keyinfo);
6062 my_afree(keysegs);
6063 DBUG_RETURN(1);
6064 }
6065 if (!(uniquedef=(MARIA_UNIQUEDEF*)
6066 my_alloca(sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques+1))))
6067 {
6068 my_afree(columndef);
6069 my_afree(keyinfo);
6070 my_afree(keysegs);
6071 DBUG_RETURN(1);
6072 }
6073
6074 /* Copy the column definitions in their original order */
6075 for (column= share.columndef, end= share.columndef+share.base.fields;
6076 column != end ;
6077 column++)
6078 columndef[column->column_nr]= *column;
6079
6080 /* Change the new key to point at the saved key segments */
6081 memcpy((uchar*) keysegs,(uchar*) share.keyparts,
6082 (size_t) (sizeof(HA_KEYSEG)*(key_parts+share.base.keys+
6083 share.state.header.uniques)));
6084 keyseg=keysegs;
6085 for (key=keyinfo,key_end=keyinfo+share.base.keys; key != key_end ; key++)
6086 {
6087 key->seg=keyseg;
6088 for (; keyseg->type ; keyseg++)
6089 {
6090 if (param->language)
6091 keyseg->language=param->language; /* change language */
6092 }
6093 keyseg++; /* Skip end pointer */
6094 }
6095
6096 /*
6097 Copy the unique definitions and change them to point at the new key
6098 segments
6099 */
6100 memcpy((uchar*) uniquedef,(uchar*) share.uniqueinfo,
6101 (size_t) (sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques)));
6102 for (u_ptr=uniquedef,u_end=uniquedef+share.state.header.uniques;
6103 u_ptr != u_end ; u_ptr++)
6104 {
6105 u_ptr->seg=keyseg;
6106 keyseg+=u_ptr->keysegs+1;
6107 }
6108
6109 file_length=(ulonglong) mysql_file_seek(info.dfile.file, 0L, MY_SEEK_END, MYF(0));
6110 if (share.options & HA_OPTION_COMPRESS_RECORD)
6111 share.base.records=max_records=info.state->records;
6112 else if (share.base.min_pack_length)
6113 max_records=(ha_rows) (file_length / share.base.min_pack_length);
6114 else
6115 max_records=0;
6116 share.options&= ~HA_OPTION_TEMP_COMPRESS_RECORD;
6117
6118 tmp_length= file_length+file_length/10;
6119 set_if_bigger(file_length,param->max_data_file_length);
6120 set_if_bigger(file_length,tmp_length);
6121 set_if_bigger(file_length,(ulonglong) share.base.max_data_file_length);
6122
6123 maria_close(*org_info);
6124
6125 bzero((char*) &create_info,sizeof(create_info));
6126 create_info.max_rows=MY_MAX(max_records,share.base.records);
6127 create_info.reloc_rows=share.base.reloc;
6128 create_info.old_options=(share.options |
6129 (unpack ? HA_OPTION_TEMP_COMPRESS_RECORD : 0));
6130
6131 create_info.data_file_length=file_length;
6132 create_info.auto_increment=share.state.auto_increment;
6133 create_info.language = (param->language ? param->language :
6134 share.base.language);
6135 create_info.key_file_length= status_info.key_file_length;
6136 create_info.org_data_file_type= ((enum data_file_type)
6137 share.state.header.org_data_file_type);
6138
6139 /*
6140 Allow for creating an auto_increment key. This has an effect only if
6141 an auto_increment key exists in the original table.
6142 */
6143 create_info.with_auto_increment= TRUE;
6144 create_info.null_bytes= share.base.null_bytes;
6145 create_info.transactional= share.base.born_transactional;
6146
6147 /*
6148 We don't have to handle symlinks here because we are using
6149 HA_DONT_TOUCH_DATA
6150 */
6151 if (maria_create(filename, share.data_file_type,
6152 share.base.keys - share.state.header.uniques,
6153 keyinfo, share.base.fields, columndef,
6154 share.state.header.uniques, uniquedef,
6155 &create_info,
6156 HA_DONT_TOUCH_DATA))
6157 {
6158 _ma_check_print_error(param,
6159 "Got error %d when trying to recreate indexfile",
6160 my_errno);
6161 goto end;
6162 }
6163 *org_info= maria_open(filename,O_RDWR,
6164 (HA_OPEN_FOR_REPAIR |
6165 ((param->testflag & T_WAIT_FOREVER) ?
6166 HA_OPEN_WAIT_IF_LOCKED :
6167 (param->testflag & T_DESCRIPT) ?
6168 HA_OPEN_IGNORE_IF_LOCKED :
6169 HA_OPEN_ABORT_IF_LOCKED)));
6170 if (!*org_info)
6171 {
6172 _ma_check_print_error(param,
6173 "Got error %d when trying to open re-created "
6174 "indexfile", my_errno);
6175 goto end;
6176 }
6177 /* We are modifing */
6178 (*org_info)->s->options&= ~HA_OPTION_READ_ONLY_DATA;
6179 _ma_readinfo(*org_info,F_WRLCK,0);
6180 (*org_info)->s->state.state.records= info.state->records;
6181 if (share.state.create_time)
6182 (*org_info)->s->state.create_time=share.state.create_time;
6183 #ifdef MARIA_EXTERNAL_LOCKING
6184 (*org_info)->s->state.unique= (*org_info)->this_unique= share.state.unique;
6185 #endif
6186 (*org_info)->s->state.state.checksum= info.state->checksum;
6187 (*org_info)->s->state.state.del= info.state->del;
6188 (*org_info)->s->state.dellink= share.state.dellink;
6189 (*org_info)->s->state.state.empty= info.state->empty;
6190 (*org_info)->s->state.state.data_file_length= info.state->data_file_length;
6191 *(*org_info)->state= (*org_info)->s->state.state;
6192 if (maria_update_state_info(param,*org_info,UPDATE_TIME | UPDATE_STAT |
6193 UPDATE_OPEN_COUNT))
6194 goto end;
6195 error=0;
6196 end:
6197 my_afree(uniquedef);
6198 my_afree(keyinfo);
6199 my_afree(columndef);
6200 my_afree(keysegs);
6201 DBUG_RETURN(error);
6202 }
6203
6204
6205 /* Write suffix to data file if needed */
6206
maria_write_data_suffix(MARIA_SORT_INFO * sort_info,my_bool fix_datafile)6207 int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile)
6208 {
6209 MARIA_HA *info=sort_info->new_info;
6210
6211 if (info->s->data_file_type == COMPRESSED_RECORD && fix_datafile)
6212 {
6213 uchar buff[MEMMAP_EXTRA_MARGIN];
6214 bzero(buff,sizeof(buff));
6215 if (my_b_write(&info->rec_cache,buff,sizeof(buff)))
6216 {
6217 _ma_check_print_error(sort_info->param,
6218 "%d when writing to datafile",my_errno);
6219 return 1;
6220 }
6221 sort_info->param->read_cache.end_of_file+=sizeof(buff);
6222 }
6223 return 0;
6224 }
6225
6226
6227 /* Update state and maria_chk time of indexfile */
6228
maria_update_state_info(HA_CHECK * param,MARIA_HA * info,uint update)6229 int maria_update_state_info(HA_CHECK *param, MARIA_HA *info,uint update)
6230 {
6231 MARIA_SHARE *share= info->s;
6232 DBUG_ENTER("maria_update_state_info");
6233
6234 if (update & UPDATE_OPEN_COUNT)
6235 {
6236 share->state.open_count=0;
6237 share->global_changed=0;
6238 share->changed= 1;
6239 }
6240 if (update & UPDATE_STAT)
6241 {
6242 uint i, key_parts= mi_uint2korr(share->state.header.key_parts);
6243 share->state.records_at_analyze= share->state.state.records;
6244 share->state.changed&= ~STATE_NOT_ANALYZED;
6245 if (share->state.state.records)
6246 {
6247 for (i=0; i<key_parts; i++)
6248 {
6249 if (!(share->state.rec_per_key_part[i]=param->new_rec_per_key_part[i]))
6250 share->state.changed|= STATE_NOT_ANALYZED;
6251 }
6252 }
6253 }
6254 if (update & (UPDATE_STAT | UPDATE_SORT | UPDATE_TIME | UPDATE_AUTO_INC))
6255 {
6256 if (update & UPDATE_TIME)
6257 {
6258 share->state.check_time= time((time_t*) 0);
6259 if (!share->state.create_time)
6260 share->state.create_time= share->state.check_time;
6261 }
6262 if (_ma_state_info_write(share,
6263 MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
6264 MA_STATE_INFO_WRITE_FULL_INFO))
6265 goto err;
6266 }
6267 { /* Force update of status */
6268 int error;
6269 uint r_locks=share->r_locks,w_locks=share->w_locks;
6270 share->r_locks= share->w_locks= share->tot_locks= 0;
6271 error= _ma_writeinfo(info,WRITEINFO_NO_UNLOCK);
6272 share->r_locks=r_locks;
6273 share->w_locks=w_locks;
6274 share->tot_locks=r_locks+w_locks;
6275 if (!error)
6276 DBUG_RETURN(0);
6277 }
6278 err:
6279 _ma_check_print_error(param,"%d when updating keyfile",my_errno);
6280 DBUG_RETURN(1);
6281 }
6282
6283 /*
6284 Update auto increment value for a table
6285 When setting the 'repair_only' flag we only want to change the
6286 old auto_increment value if its wrong (smaller than some given key).
6287 The reason is that we shouldn't change the auto_increment value
6288 for a table without good reason when only doing a repair; If the
6289 user have inserted and deleted rows, the auto_increment value
6290 may be bigger than the biggest current row and this is ok.
6291
6292 If repair_only is not set, we will update the flag to the value in
6293 param->auto_increment is bigger than the biggest key.
6294 */
6295
_ma_update_auto_increment_key(HA_CHECK * param,MARIA_HA * info,my_bool repair_only)6296 void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info,
6297 my_bool repair_only)
6298 {
6299 MARIA_SHARE *share= info->s;
6300 uchar *record;
6301 DBUG_ENTER("update_auto_increment_key");
6302
6303 if (!share->base.auto_key ||
6304 ! maria_is_key_active(share->state.key_map, share->base.auto_key - 1))
6305 {
6306 if (!(param->testflag & T_VERY_SILENT))
6307 _ma_check_print_info(param,
6308 "Table: %s doesn't have an auto increment key\n",
6309 param->isam_file_name);
6310 DBUG_VOID_RETURN;
6311 }
6312 if (!(param->testflag & T_SILENT) &&
6313 !(param->testflag & T_REP))
6314 printf("Updating Aria file: %s\n", param->isam_file_name);
6315 /*
6316 We have to use an allocated buffer instead of info->rec_buff as
6317 _ma_put_key_in_record() may use info->rec_buff
6318 */
6319 if (!(record= (uchar*) my_malloc((size_t) share->base.default_rec_buff_size,
6320 MYF(0))))
6321 {
6322 _ma_check_print_error(param,"Not enough memory for extra record");
6323 DBUG_VOID_RETURN;
6324 }
6325
6326 maria_extra(info,HA_EXTRA_KEYREAD,0);
6327 if (maria_rlast(info, record, share->base.auto_key-1))
6328 {
6329 if (my_errno != HA_ERR_END_OF_FILE)
6330 {
6331 maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
6332 my_free(record);
6333 _ma_check_print_error(param,"%d when reading last record",my_errno);
6334 DBUG_VOID_RETURN;
6335 }
6336 if (!repair_only)
6337 share->state.auto_increment=param->auto_increment_value;
6338 }
6339 else
6340 {
6341 const HA_KEYSEG *keyseg= share->keyinfo[share->base.auto_key-1].seg;
6342 ulonglong auto_increment=
6343 ma_retrieve_auto_increment(record + keyseg->start, keyseg->type);
6344 set_if_bigger(share->state.auto_increment,auto_increment);
6345 if (!repair_only)
6346 set_if_bigger(share->state.auto_increment, param->auto_increment_value);
6347 }
6348 maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
6349 my_free(record);
6350 maria_update_state_info(param, info, UPDATE_AUTO_INC);
6351 DBUG_VOID_RETURN;
6352 }
6353
6354
6355 /*
6356 Update statistics for each part of an index
6357
6358 SYNOPSIS
6359 maria_update_key_parts()
6360 keyinfo IN Index information (only key->keysegs used)
6361 rec_per_key_part OUT Store statistics here
6362 unique IN Array of (#distinct tuples)
6363 notnull_tuples IN Array of (#tuples), or NULL
6364 records Number of records in the table
6365
6366 DESCRIPTION
6367 This function is called produce index statistics values from unique and
6368 notnull_tuples arrays after these arrays were produced with sequential
6369 index scan (the scan is done in two places: chk_index() and
6370 sort_key_write()).
6371
6372 This function handles all 3 index statistics collection methods.
6373
6374 Unique is an array:
6375 unique[0]= (#different values of {keypart1}) - 1
6376 unique[1]= (#different values of {keypart1,keypart2} tuple)-unique[0]-1
6377 ...
6378
6379 For MI_STATS_METHOD_IGNORE_NULLS method, notnull_tuples is an array too:
6380 notnull_tuples[0]= (#of {keypart1} tuples such that keypart1 is not NULL)
6381 notnull_tuples[1]= (#of {keypart1,keypart2} tuples such that all
6382 keypart{i} are not NULL)
6383 ...
6384 For all other statistics collection methods notnull_tuples==NULL.
6385
6386 Output is an array:
6387 rec_per_key_part[k] =
6388 = E(#records in the table such that keypart_1=c_1 AND ... AND
6389 keypart_k=c_k for arbitrary constants c_1 ... c_k)
6390
6391 = {assuming that values have uniform distribution and index contains all
6392 tuples from the domain (or that {c_1, ..., c_k} tuple is choosen from
6393 index tuples}
6394
6395 = #tuples-in-the-index / #distinct-tuples-in-the-index.
6396
6397 The #tuples-in-the-index and #distinct-tuples-in-the-index have different
6398 meaning depending on which statistics collection method is used:
6399
6400 MI_STATS_METHOD_* how are nulls compared? which tuples are counted?
6401 NULLS_EQUAL NULL == NULL all tuples in table
6402 NULLS_NOT_EQUAL NULL != NULL all tuples in table
6403 IGNORE_NULLS n/a tuples that don't have NULLs
6404 */
6405
maria_update_key_parts(MARIA_KEYDEF * keyinfo,double * rec_per_key_part,ulonglong * unique,ulonglong * notnull,ulonglong records)6406 void maria_update_key_parts(MARIA_KEYDEF *keyinfo, double *rec_per_key_part,
6407 ulonglong *unique, ulonglong *notnull,
6408 ulonglong records)
6409 {
6410 ulonglong count=0, unique_tuples;
6411 ulonglong tuples= records;
6412 uint parts;
6413 double tmp;
6414 for (parts=0 ; parts < keyinfo->keysegs ; parts++)
6415 {
6416 count+=unique[parts];
6417 unique_tuples= count + 1;
6418 if (notnull)
6419 {
6420 tuples= notnull[parts];
6421 /*
6422 #(unique_tuples not counting tuples with NULLs) =
6423 #(unique_tuples counting tuples with NULLs as different) -
6424 #(tuples with NULLs)
6425 */
6426 unique_tuples -= (records - notnull[parts]);
6427 }
6428
6429 if (unique_tuples == 0)
6430 tmp= 1;
6431 else if (count == 0)
6432 tmp= ulonglong2double(tuples); /* 1 unique tuple */
6433 else
6434 tmp= ulonglong2double(tuples) / ulonglong2double(unique_tuples);
6435
6436 /*
6437 for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
6438 let's ensure it is not
6439 */
6440 set_if_bigger(tmp,1);
6441
6442 *rec_per_key_part++= tmp;
6443 }
6444 }
6445
6446
maria_byte_checksum(const uchar * buf,uint length)6447 static ha_checksum maria_byte_checksum(const uchar *buf, uint length)
6448 {
6449 ha_checksum crc;
6450 const uchar *end=buf+length;
6451 for (crc=0; buf != end; buf++)
6452 crc=((crc << 1) + *buf) +
6453 MY_TEST(crc & (((ha_checksum) 1) << (8 * sizeof(ha_checksum) - 1)));
6454 return crc;
6455 }
6456
maria_too_big_key_for_sort(MARIA_KEYDEF * key,ha_rows rows)6457 static my_bool maria_too_big_key_for_sort(MARIA_KEYDEF *key, ha_rows rows)
6458 {
6459 uint key_maxlength=key->maxlength;
6460 if (key->flag & HA_FULLTEXT)
6461 {
6462 uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
6463 key->seg->charset->mbmaxlen;
6464 key_maxlength+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
6465 }
6466 return (key->flag & HA_SPATIAL) ||
6467 (key->flag & (HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY | HA_FULLTEXT) &&
6468 ((ulonglong) rows * key_maxlength >
6469 (ulonglong) maria_max_temp_length));
6470 }
6471
6472 /*
6473 Deactivate all indexes that can be recreated fast.
6474 These include packed keys on which sorting will use more temporary
6475 space than the max allowed file length or for which the unpacked keys
6476 will take much more space than packed keys.
6477 Note that 'rows' may be zero for the case when we don't know how many
6478 rows we will put into the file.
6479 */
6480
maria_disable_indexes_for_rebuild(MARIA_HA * info,ha_rows rows,my_bool all_keys)6481 void maria_disable_indexes_for_rebuild(MARIA_HA *info, ha_rows rows,
6482 my_bool all_keys)
6483 {
6484 MARIA_SHARE *share= info->s;
6485 MARIA_KEYDEF *key=share->keyinfo;
6486 uint i;
6487
6488 DBUG_ASSERT(share->state.state.records == 0 &&
6489 (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES));
6490 for (i=0 ; i < share->base.keys ; i++,key++)
6491 {
6492 if (!(key->flag & (HA_SPATIAL | HA_AUTO_KEY | HA_RTREE_INDEX)) &&
6493 ! maria_too_big_key_for_sort(key,rows) && share->base.auto_key != i+1 &&
6494 (all_keys || !(key->flag & HA_NOSAME)))
6495 {
6496 maria_clear_key_active(share->state.key_map, i);
6497 info->update|= HA_STATE_CHANGED;
6498 info->create_unique_index_by_sort= all_keys;
6499 }
6500 }
6501 }
6502
6503
6504 /*
6505 Return TRUE if we can use repair by sorting
6506 One can set the force argument to force to use sorting
6507 even if the temporary file would be quite big!
6508 */
6509
maria_test_if_sort_rep(MARIA_HA * info,ha_rows rows,ulonglong key_map,my_bool force)6510 my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows,
6511 ulonglong key_map, my_bool force)
6512 {
6513 MARIA_SHARE *share= info->s;
6514 MARIA_KEYDEF *key=share->keyinfo;
6515 uint i;
6516
6517 /*
6518 maria_repair_by_sort only works if we have at least one key. If we don't
6519 have any keys, we should use the normal repair.
6520 */
6521 if (! maria_is_any_key_active(key_map))
6522 return FALSE; /* Can't use sort */
6523 for (i=0 ; i < share->base.keys ; i++,key++)
6524 {
6525 if (!force && maria_too_big_key_for_sort(key,rows))
6526 return FALSE;
6527 }
6528 return TRUE;
6529 }
6530
6531
6532 /**
6533 @brief Create a new handle for manipulation the new record file
6534
6535 @note
6536 It's ok for Recovery to have two MARIA_SHARE on the same index file
6537 because the one we create here is not transactional
6538 */
6539
create_new_data_handle(MARIA_SORT_PARAM * param,File new_file)6540 static my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file)
6541 {
6542
6543 MARIA_SORT_INFO *sort_info= param->sort_info;
6544 MARIA_HA *info= sort_info->info;
6545 MARIA_HA *new_info;
6546 DBUG_ENTER("create_new_data_handle");
6547
6548 if (!(sort_info->new_info= maria_open(info->s->open_file_name.str, O_RDWR,
6549 HA_OPEN_COPY | HA_OPEN_FOR_REPAIR |
6550 HA_OPEN_INTERNAL_TABLE)))
6551 DBUG_RETURN(1);
6552
6553 new_info= sort_info->new_info;
6554 _ma_bitmap_set_pagecache_callbacks(&new_info->s->bitmap.file,
6555 new_info->s);
6556 _ma_set_data_pagecache_callbacks(&new_info->dfile, new_info->s);
6557 change_data_file_descriptor(new_info, new_file);
6558 maria_lock_database(new_info, F_EXTRA_LCK);
6559 if ((sort_info->param->testflag & T_UNPACK) &&
6560 info->s->data_file_type == COMPRESSED_RECORD)
6561 {
6562 (*new_info->s->once_end)(new_info->s);
6563 (*new_info->s->end)(new_info);
6564 restore_data_file_type(new_info->s);
6565 _ma_setup_functions(new_info->s);
6566 if ((*new_info->s->once_init)(new_info->s, new_file) ||
6567 (*new_info->s->init)(new_info))
6568 DBUG_RETURN(1);
6569 }
6570 _ma_reset_status(new_info);
6571 if (_ma_initialize_data_file(new_info->s, new_file))
6572 DBUG_RETURN(1);
6573
6574 /* Take into account any bitmap page created above: */
6575 param->filepos= new_info->s->state.state.data_file_length;
6576
6577 /* Use new virtual functions for key generation */
6578 info->s->keypos_to_recpos= new_info->s->keypos_to_recpos;
6579 info->s->recpos_to_keypos= new_info->s->recpos_to_keypos;
6580 DBUG_RETURN(0);
6581 }
6582
6583
6584 static void
set_data_file_type(MARIA_SORT_INFO * sort_info,MARIA_SHARE * share)6585 set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share)
6586 {
6587 if ((sort_info->new_data_file_type=share->data_file_type) ==
6588 COMPRESSED_RECORD && sort_info->param->testflag & T_UNPACK)
6589 {
6590 MARIA_SHARE tmp;
6591 sort_info->new_data_file_type= share->state.header.org_data_file_type;
6592 /* Set delete_function for sort_delete_record() */
6593 tmp= *share;
6594 tmp.state.header.data_file_type= tmp.state.header.org_data_file_type;
6595 tmp.options= ~HA_OPTION_COMPRESS_RECORD;
6596 _ma_setup_functions(&tmp);
6597 share->delete_record=tmp.delete_record;
6598 }
6599 }
6600
restore_data_file_type(MARIA_SHARE * share)6601 static void restore_data_file_type(MARIA_SHARE *share)
6602 {
6603 MARIA_SHARE tmp_share;
6604 share->options&= ~HA_OPTION_COMPRESS_RECORD;
6605 mi_int2store(share->state.header.options,share->options);
6606 share->state.header.data_file_type=
6607 share->state.header.org_data_file_type;
6608 share->data_file_type= share->state.header.data_file_type;
6609 share->pack.header_length= 0;
6610
6611 /* Use new virtual functions for key generation */
6612 tmp_share= *share;
6613 _ma_setup_functions(&tmp_share);
6614 share->keypos_to_recpos= tmp_share.keypos_to_recpos;
6615 share->recpos_to_keypos= tmp_share.recpos_to_keypos;
6616 }
6617
6618
change_data_file_descriptor(MARIA_HA * info,File new_file)6619 static void change_data_file_descriptor(MARIA_HA *info, File new_file)
6620 {
6621 mysql_file_close(info->dfile.file, MYF(MY_WME));
6622 info->dfile.file= info->s->bitmap.file.file= new_file;
6623 _ma_bitmap_reset_cache(info->s);
6624 }
6625
6626
6627 /**
6628 @brief Mark the data file to not be used
6629
6630 @note
6631 This is used in repair when we want to ensure the handler will not
6632 write anything to the data file anymore
6633 */
6634
unuse_data_file_descriptor(MARIA_HA * info)6635 static void unuse_data_file_descriptor(MARIA_HA *info)
6636 {
6637 (void) flush_pagecache_blocks(info->s->pagecache,
6638 &info->s->bitmap.file,
6639 FLUSH_IGNORE_CHANGED);
6640 info->dfile.file= info->s->bitmap.file.file= -1;
6641 _ma_bitmap_reset_cache(info->s);
6642 }
6643
6644
6645 /*
6646 Copy all states that has to do with the data file
6647
6648 NOTES
6649 This is done to copy the state from the data file generated from
6650 repair to the original handler
6651 */
6652
copy_data_file_state(MARIA_STATE_INFO * to,MARIA_STATE_INFO * from)6653 static void copy_data_file_state(MARIA_STATE_INFO *to,
6654 MARIA_STATE_INFO *from)
6655 {
6656 to->state.records= from->state.records;
6657 to->state.del= from->state.del;
6658 to->state.empty= from->state.empty;
6659 to->state.data_file_length= from->state.data_file_length;
6660 to->split= from->split;
6661 to->dellink= from->dellink;
6662 to->first_bitmap_with_space= from->first_bitmap_with_space;
6663 }
6664
6665
6666 /*
6667 Read 'safely' next record while scanning table.
6668
6669 SYNOPSIS
6670 _ma_safe_scan_block_record()
6671 info Maria handler
6672 record Store found here
6673
6674 NOTES
6675 - One must have called mi_scan() before this
6676
6677 Differences compared to _ma_scan_block_records() are:
6678 - We read all blocks, not only blocks marked by the bitmap to be safe
6679 - In case of errors, next read will read next record.
6680 - More sanity checks
6681
6682 RETURN
6683 0 ok
6684 HA_ERR_END_OF_FILE End of file
6685 # error number
6686 */
6687
6688
_ma_safe_scan_block_record(MARIA_SORT_INFO * sort_info,MARIA_HA * info,uchar * record)6689 static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info,
6690 MARIA_HA *info, uchar *record)
6691 {
6692 MARIA_SHARE *share= info->s;
6693 MARIA_RECORD_POS record_pos= info->cur_row.nextpos;
6694 pgcache_page_no_t page= sort_info->page;
6695 DBUG_ENTER("_ma_safe_scan_block_record");
6696
6697 for (;;)
6698 {
6699 /* Find next row in current page */
6700 if (likely(record_pos < info->scan.number_of_rows))
6701 {
6702 uint length, offset;
6703 uchar *data, *end_of_data;
6704 char llbuff[22];
6705
6706 while (!(offset= uint2korr(info->scan.dir)))
6707 {
6708 info->scan.dir-= DIR_ENTRY_SIZE;
6709 record_pos++;
6710 if (info->scan.dir < info->scan.dir_end)
6711 {
6712 _ma_check_print_info(sort_info->param,
6713 "Wrong directory on page %s",
6714 llstr(page, llbuff));
6715 goto read_next_page;
6716 }
6717 }
6718 /* found row */
6719 info->cur_row.lastpos= info->scan.row_base_page + record_pos;
6720 info->cur_row.nextpos= record_pos + 1;
6721 data= info->scan.page_buff + offset;
6722 length= uint2korr(info->scan.dir + 2);
6723 end_of_data= data + length;
6724 info->scan.dir-= DIR_ENTRY_SIZE; /* Point to previous row */
6725
6726 if (end_of_data > info->scan.dir_end ||
6727 offset < PAGE_HEADER_SIZE(info->s) ||
6728 length < share->base.min_block_length)
6729 {
6730 _ma_check_print_info(sort_info->param,
6731 "Wrong directory entry %3u at page %s",
6732 (uint) record_pos, llstr(page, llbuff));
6733 record_pos++;
6734 continue;
6735 }
6736 else
6737 {
6738 DBUG_PRINT("info", ("rowid: %lu", (ulong) info->cur_row.lastpos));
6739 DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data));
6740 }
6741 }
6742
6743 read_next_page:
6744 /* Read until we find next head page */
6745 for (;;)
6746 {
6747 uint page_type;
6748 char llbuff[22];
6749
6750 sort_info->page++; /* In case of errors */
6751 page++;
6752 if (!(page % share->bitmap.pages_covered))
6753 {
6754 /* Skip bitmap */
6755 page++;
6756 sort_info->page++;
6757 }
6758 if ((my_off_t) (page + 1) * share->block_size > sort_info->filelength)
6759 DBUG_RETURN(HA_ERR_END_OF_FILE);
6760 if (!(pagecache_read(share->pagecache,
6761 &info->dfile,
6762 page, 0, info->scan.page_buff,
6763 PAGECACHE_READ_UNKNOWN_PAGE,
6764 PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
6765 {
6766 if (my_errno == HA_ERR_WRONG_CRC)
6767 {
6768 /*
6769 Don't give errors for zero filled blocks. These can
6770 sometimes be found at end of a bitmap when we wrote a big
6771 record last that was moved to the next bitmap.
6772 */
6773 if (_ma_check_bitmap_data(info, UNALLOCATED_PAGE, 0,
6774 _ma_bitmap_get_page_bits(info,
6775 &share->bitmap,
6776 page)))
6777 {
6778 _ma_check_print_info(sort_info->param,
6779 "Wrong CRC on datapage at %s",
6780 llstr(page, llbuff));
6781 }
6782 continue;
6783 }
6784 DBUG_RETURN(my_errno);
6785 }
6786 page_type= (info->scan.page_buff[PAGE_TYPE_OFFSET] &
6787 PAGE_TYPE_MASK);
6788 if (page_type == HEAD_PAGE)
6789 {
6790 if ((info->scan.number_of_rows=
6791 (uint) (uchar) info->scan.page_buff[DIR_COUNT_OFFSET]) != 0)
6792 break;
6793 _ma_check_print_info(sort_info->param,
6794 "Wrong head page at page %s",
6795 llstr(page, llbuff));
6796 }
6797 else if (page_type >= MAX_PAGE_TYPE)
6798 {
6799 _ma_check_print_info(sort_info->param,
6800 "Found wrong page type: %d at page %s",
6801 page_type, llstr(page, llbuff));
6802 }
6803 }
6804
6805 /* New head page */
6806 info->scan.dir= (info->scan.page_buff + share->block_size -
6807 PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE);
6808 info->scan.dir_end= (info->scan.dir -
6809 (info->scan.number_of_rows - 1) *
6810 DIR_ENTRY_SIZE);
6811 info->scan.row_base_page= ma_recordpos(page, 0);
6812 record_pos= 0;
6813 }
6814 }
6815
6816
6817 /**
6818 @brief Writes a LOGREC_REPAIR_TABLE record and updates create_rename_lsn
6819 if needed (so that maria_read_log does not redo the repair).
6820
6821 @param param description of the REPAIR operation
6822 @param info table
6823
6824 @return Operation status
6825 @retval 0 ok
6826 @retval 1 error (disk problem)
6827 */
6828
write_log_record_for_repair(const HA_CHECK * param,MARIA_HA * info)6829 my_bool write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info)
6830 {
6831 MARIA_SHARE *share= info->s;
6832 /* in case this is maria_chk or recovery... */
6833 if (translog_status == TRANSLOG_OK && !maria_in_recovery &&
6834 share->base.born_transactional)
6835 {
6836 my_bool save_now_transactional= share->now_transactional;
6837
6838 /*
6839 For now this record is only informative. It could serve when applying
6840 logs to a backup, but that needs more thought. Assume table became
6841 corrupted. It is repaired, then some writes happen to it.
6842 Later we restore an old backup, and want to apply this REDO_REPAIR_TABLE
6843 record. For it to give the same result as originally, the table should
6844 be corrupted the same way, so applying previous REDOs should produce the
6845 same corruption; that's really not guaranteed (different execution paths
6846 in execution of REDOs vs runtime code so not same bugs hit, temporary
6847 hardware issues not repeatable etc). Corruption may not be repeatable.
6848 A reasonable solution is to execute the REDO_REPAIR_TABLE record and
6849 check if the checksum of the resulting table matches what it was at the
6850 end of the original repair (should be stored in log record); or execute
6851 the REDO_REPAIR_TABLE if the checksum of the table-before-repair matches
6852 was it was at the start of the original repair (should be stored in log
6853 record).
6854 */
6855 LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
6856 uchar log_data[FILEID_STORE_SIZE + 8 + 8];
6857 LSN lsn;
6858
6859 /*
6860 testflag gives an idea of what REPAIR did (in particular T_QUICK
6861 or not: did it touch the data file or not?).
6862 */
6863 int8store(log_data + FILEID_STORE_SIZE, param->testflag);
6864 /* org_key_map is used when recreating index after a load data infile */
6865 int8store(log_data + FILEID_STORE_SIZE + 8, param->org_key_map);
6866
6867 log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
6868 log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
6869
6870 share->now_transactional= 1;
6871 if (unlikely(translog_write_record(&lsn, LOGREC_REDO_REPAIR_TABLE,
6872 &dummy_transaction_object, info,
6873 (translog_size_t) sizeof(log_data),
6874 sizeof(log_array)/sizeof(log_array[0]),
6875 log_array, log_data, NULL) ||
6876 translog_flush(lsn)))
6877 return TRUE;
6878 /*
6879 The table's existence was made durable earlier (MY_SYNC_DIR passed to
6880 maria_change_to_newfile()). All pages have been flushed, state too, we
6881 need to force it to disk. Old REDOs should not be applied to the table,
6882 which is already enforced as skip_redos_lsn was increased in
6883 protect_against_repair_crash(). But if this is an explicit repair,
6884 even UNDO phase should ignore this table: create_rename_lsn should be
6885 increased, and this also serves for the REDO_REPAIR to be ignored by
6886 maria_read_log.
6887 The fully correct order would be: sync data and index file, remove crash
6888 mark and update LSNs then write state and sync index file. But at this
6889 point state (without crash mark) is already written.
6890 */
6891 if ((!(param->testflag & T_NO_CREATE_RENAME_LSN) &&
6892 _ma_update_state_lsns(share, lsn, share->state.create_trid, FALSE,
6893 FALSE)) ||
6894 _ma_sync_table_files(info))
6895 return TRUE;
6896 share->now_transactional= save_now_transactional;
6897 }
6898 return FALSE;
6899 }
6900
6901
6902 /**
6903 Writes an UNDO record which if executed in UNDO phase, will empty the
6904 table. Such record is thus logged only in certain cases of bulk insert
6905 (table needs to be empty etc).
6906 */
write_log_record_for_bulk_insert(MARIA_HA * info)6907 my_bool write_log_record_for_bulk_insert(MARIA_HA *info)
6908 {
6909 LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
6910 uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE];
6911 LSN lsn;
6912 lsn_store(log_data, info->trn->undo_lsn);
6913 log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
6914 log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
6915 return translog_write_record(&lsn, LOGREC_UNDO_BULK_INSERT,
6916 info->trn, info,
6917 (translog_size_t)
6918 log_array[TRANSLOG_INTERNAL_PARTS +
6919 0].length,
6920 TRANSLOG_INTERNAL_PARTS + 1, log_array,
6921 log_data + LSN_STORE_SIZE, NULL) ||
6922 translog_flush(lsn); /* WAL */
6923 }
6924
6925
6926 /* Give error message why reading of key page failed */
6927
report_keypage_fault(HA_CHECK * param,MARIA_HA * info,my_off_t position)6928 static void report_keypage_fault(HA_CHECK *param, MARIA_HA *info,
6929 my_off_t position)
6930 {
6931 char buff[11];
6932 uint32 block_size= info->s->block_size;
6933
6934 if (my_errno == HA_ERR_CRASHED)
6935 _ma_check_print_error(param,
6936 "Wrong base information on indexpage at page: %s",
6937 llstr(position / block_size, buff));
6938 else
6939 _ma_check_print_error(param,
6940 "Can't read indexpage from page: %s, "
6941 "error: %d",
6942 llstr(position / block_size, buff), my_errno);
6943 }
6944
6945
6946 /**
6947 When we want to check a table, we verify that the transaction ids of rows
6948 and keys are not bigger than the biggest id generated by Maria so far, which
6949 is returned by the function below.
6950
6951 @note If control file is not open, 0 may be returned; to not confuse
6952 this with a valid max trid of 0, the caller should notice that it failed to
6953 open the control file (ma_control_file_inited() can serve for that).
6954 */
6955
max_trid_in_system(void)6956 static TrID max_trid_in_system(void)
6957 {
6958 TrID id= trnman_get_max_trid(); /* 0 if transac manager not initialized */
6959 /* 'id' may be far bigger, if last shutdown is old */
6960 return MY_MAX(id, max_trid_in_control_file);
6961 }
6962
6963
_ma_check_print_not_visible_error(HA_CHECK * param,TrID used_trid)6964 static void _ma_check_print_not_visible_error(HA_CHECK *param, TrID used_trid)
6965 {
6966 char buff[22], buff2[22];
6967 if (!param->not_visible_rows_found++)
6968 {
6969 if (!ma_control_file_inited())
6970 {
6971 _ma_check_print_warning(param,
6972 "Found row with transaction id %s but no "
6973 "aria_control_file was used or specified. "
6974 "The table may be corrupted",
6975 llstr(used_trid, buff));
6976 }
6977 else
6978 {
6979 _ma_check_print_error(param,
6980 "Found row with transaction id %s when max "
6981 "transaction id according to aria_control_file "
6982 "is %s",
6983 llstr(used_trid, buff),
6984 llstr(param->max_trid, buff2));
6985 }
6986 }
6987 }
6988
6989
6990 /**
6991 Mark that we can retry normal repair if we used quick repair
6992
6993 We shouldn't do this in case of disk error as in this case we are likely
6994 to loose much more than expected.
6995 */
6996
retry_if_quick(MARIA_SORT_PARAM * sort_param,int error)6997 void retry_if_quick(MARIA_SORT_PARAM *sort_param, int error)
6998 {
6999 HA_CHECK *param=sort_param->sort_info->param;
7000
7001 if (!sort_param->fix_datafile && error >= HA_ERR_FIRST)
7002 {
7003 param->retry_repair=1;
7004 param->testflag|=T_RETRY_WITHOUT_QUICK;
7005 }
7006 }
7007
7008 /* Print information about bitmap page */
7009
print_bitmap_description(MARIA_SHARE * share,pgcache_page_no_t page,uchar * bitmap_data)7010 static void print_bitmap_description(MARIA_SHARE *share,
7011 pgcache_page_no_t page,
7012 uchar *bitmap_data)
7013 {
7014 char *tmp= my_malloc(MAX_BITMAP_INFO_LENGTH, MYF(MY_WME));
7015 if (!tmp)
7016 return;
7017 _ma_get_bitmap_description(&share->bitmap, bitmap_data, page, tmp);
7018 printf("Bitmap page %lu\n%s", (ulong) page, tmp);
7019 my_free(tmp);
7020 }
7021