1 /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; version 2 of the License.
6
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
11
12 You should have received a copy of the GNU General Public License
13 along with this program; if not, write to the Free Software
14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
15
16 /* Describe, check and repair of MARIA tables */
17
18 /*
19 About checksum calculation.
20
21 There are two types of checksums. Table checksum and row checksum.
22
23 Row checksum is an additional uchar at the end of dynamic length
24 records. It must be calculated if the table is configured for them.
25 Otherwise they must not be used. The variable
26 MYISAM_SHARE::calc_checksum determines if row checksums are used.
27 MI_INFO::checksum is used as temporary storage during row handling.
28 For parallel repair we must assure that only one thread can use this
29 variable. There is no problem on the write side as this is done by one
30 thread only. But when checking a record after read this could go
31 wrong. But since all threads read through a common read buffer, it is
32 sufficient if only one thread checks it.
33
34 Table checksum is an eight uchar value in the header of the index file.
35 It can be calculated even if row checksums are not used. The variable
36 MI_CHECK::glob_crc is calculated over all records.
37 MI_SORT_PARAM::calc_checksum determines if this should be done. This
38 variable is not part of MI_CHECK because it must be set per thread for
39 parallel repair. The global glob_crc must be changed by one thread
40 only. And it is sufficient to calculate the checksum once only.
41 */
42
43 #include "ma_ftdefs.h"
44 #include "ma_rt_index.h"
45 #include "ma_blockrec.h"
46 #include "trnman.h"
47 #include "ma_key_recover.h"
48 #include <my_check_opt.h>
49
50 #include <stdarg.h>
51 #include <my_getopt.h>
52 #ifdef HAVE_SYS_VADVISE_H
53 #include <sys/vadvise.h>
54 #endif
55
56 /* Functions defined in this file */
57
58 static int check_k_link(HA_CHECK *param, MARIA_HA *info, my_off_t next_link);
59 static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo,
60 MARIA_PAGE *page, ha_rows *keys,
61 ha_checksum *key_checksum, uint level);
62 static uint isam_key_length(MARIA_HA *info,MARIA_KEYDEF *keyinfo);
63 static ha_checksum calc_checksum(ha_rows count);
64 static int writekeys(MARIA_SORT_PARAM *sort_param);
65 static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
66 MARIA_KEYDEF *keyinfo,
67 my_off_t pagepos, File new_file);
68 static int sort_key_read(MARIA_SORT_PARAM *sort_param, uchar *key);
69 static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, uchar *key);
70 static int sort_get_next_record(MARIA_SORT_PARAM *sort_param);
71 static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,
72 const void *b);
73 static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
74 const uchar *a);
75 static int sort_key_write(MARIA_SORT_PARAM *sort_param, const uchar *a);
76 static my_off_t get_record_for_key(MARIA_KEYDEF *keyinfo, const uchar *key);
77 static int sort_insert_key(MARIA_SORT_PARAM *sort_param,
78 reg1 SORT_KEY_BLOCKS *key_block,
79 const uchar *key, my_off_t prev_block);
80 static int sort_delete_record(MARIA_SORT_PARAM *sort_param);
81 /*static int _ma_flush_pending_blocks(HA_CHECK *param);*/
82 static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
83 uint buffer_length);
84 static ha_checksum maria_byte_checksum(const uchar *buf, uint length);
85 static void set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share);
86 static void restore_data_file_type(MARIA_SHARE *share);
87 static void change_data_file_descriptor(MARIA_HA *info, File new_file);
88 static void unuse_data_file_descriptor(MARIA_HA *info);
89 static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info,
90 MARIA_HA *info, uchar *record);
91 static void copy_data_file_state(MARIA_STATE_INFO *to,
92 MARIA_STATE_INFO *from);
93 static void report_keypage_fault(HA_CHECK *param, MARIA_HA *info,
94 my_off_t position);
95 static my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file);
96 static my_bool _ma_flush_table_files_before_swap(HA_CHECK *param,
97 MARIA_HA *info);
98 static TrID max_trid_in_system(void);
99 static void _ma_check_print_not_visible_error(HA_CHECK *param, TrID used_trid);
100 void retry_if_quick(MARIA_SORT_PARAM *param, int error);
101 static void print_bitmap_description(MARIA_SHARE *share,
102 pgcache_page_no_t page,
103 uchar *buff);
104
105
106 /* Initialize check param with default values */
107
maria_chk_init(HA_CHECK * param)108 void maria_chk_init(HA_CHECK *param)
109 {
110 bzero((uchar*) param,sizeof(*param));
111 param->opt_follow_links=1;
112 param->keys_in_use= ~(ulonglong) 0;
113 param->search_after_block=HA_OFFSET_ERROR;
114 param->auto_increment_value= 0;
115 param->use_buffers= PAGE_BUFFER_INIT;
116 param->read_buffer_length=READ_BUFFER_INIT;
117 param->write_buffer_length=READ_BUFFER_INIT;
118 param->sort_buffer_length=SORT_BUFFER_INIT;
119 param->sort_key_blocks=BUFFERS_WHEN_SORTING;
120 param->tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL;
121 param->myf_rw=MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL);
122 param->start_check_pos=0;
123 param->max_record_length= LONGLONG_MAX;
124 param->pagecache_block_size= KEY_CACHE_BLOCK_SIZE;
125 param->stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL;
126 param->max_stage= 1;
127 }
128
129
130 /* Initialize check param and maria handler for check of table */
131
maria_chk_init_for_check(HA_CHECK * param,MARIA_HA * info)132 void maria_chk_init_for_check(HA_CHECK *param, MARIA_HA *info)
133 {
134 param->not_visible_rows_found= 0;
135 param->max_found_trid= 0;
136
137 /*
138 Set up transaction handler so that we can see all rows. When rows is read
139 we will check the found id against param->max_tried
140 */
141 if (!info->s->base.born_transactional)
142 {
143 /*
144 There are no trids. Howver we want to set max_trid to make test of
145 create_trid simpler.
146 */
147 param->max_trid= ~(TrID) 0;
148 }
149 else if (param->max_trid == 0 || param->max_trid == ~(TrID) 0)
150 {
151 if (!ma_control_file_inited())
152 param->max_trid= 0; /* Give warning for first trid found */
153 else
154 param->max_trid= max_trid_in_system();
155 }
156
157 maria_ignore_trids(info);
158 }
159
160
161 /* Check the status flags for the table */
162
maria_chk_status(HA_CHECK * param,MARIA_HA * info)163 int maria_chk_status(HA_CHECK *param, MARIA_HA *info)
164 {
165 MARIA_SHARE *share= info->s;
166
167 /* Protection for HA_EXTRA_FLUSH */
168 mysql_mutex_lock(&share->intern_lock);
169
170 if (maria_is_crashed_on_repair(info))
171 _ma_check_print_warning(param,
172 "Table is marked as crashed and last repair failed");
173 else if (maria_in_repair(info))
174 _ma_check_print_warning(param,
175 "Last repair was aborted before finishing");
176 else if (maria_is_crashed(info))
177 _ma_check_print_warning(param,
178 "Table is marked as crashed");
179 if (share->state.open_count != (uint) (share->global_changed ? 1 : 0))
180 {
181 /* Don't count this as a real warning, as check can correct this ! */
182 my_bool save=param->warning_printed;
183 _ma_check_print_warning(param,
184 share->state.open_count==1 ?
185 "%d client is using or hasn't closed the table properly" :
186 "%d clients are using or haven't closed the table properly",
187 share->state.open_count);
188 /* If this will be fixed by the check, forget the warning */
189 if (param->testflag & T_UPDATE_STATE)
190 param->warning_printed=save;
191 }
192
193 mysql_mutex_unlock(&share->intern_lock);
194
195 if (share->state.create_trid > param->max_trid)
196 {
197 param->wrong_trd_printed= 1; /* Force should run zerofill */
198 _ma_check_print_warning(param,
199 "Table create_trd (%llu) > current max_transaction id (%llu). Table needs to be repaired or zerofilled to be usable",
200 share->state.create_trid, param->max_trid);
201 return 1;
202 }
203 return 0;
204 }
205
206 /*
207 Check delete links in row data
208 */
209
maria_chk_del(HA_CHECK * param,register MARIA_HA * info,ulonglong test_flag)210 int maria_chk_del(HA_CHECK *param, register MARIA_HA *info,
211 ulonglong test_flag)
212 {
213 MARIA_SHARE *share= info->s;
214 reg2 ha_rows i;
215 uint delete_link_length;
216 my_off_t empty,next_link,UNINIT_VAR(old_link);
217 char buff[22],buff2[22];
218 DBUG_ENTER("maria_chk_del");
219
220 param->record_checksum=0;
221
222 if (share->data_file_type == BLOCK_RECORD)
223 DBUG_RETURN(0); /* No delete links here */
224
225 delete_link_length=((share->options & HA_OPTION_PACK_RECORD) ? 20 :
226 share->rec_reflength+1);
227
228 if (!(test_flag & T_SILENT))
229 puts("- check record delete-chain");
230
231 next_link=share->state.dellink;
232 if (share->state.state.del == 0)
233 {
234 if (test_flag & T_VERBOSE)
235 {
236 puts("No recordlinks");
237 }
238 }
239 else
240 {
241 if (test_flag & T_VERBOSE)
242 printf("Recordlinks: ");
243 empty=0;
244 for (i= share->state.state.del ; i > 0L && next_link != HA_OFFSET_ERROR ; i--)
245 {
246 if (_ma_killed_ptr(param))
247 DBUG_RETURN(1);
248 if (test_flag & T_VERBOSE)
249 printf(" %9s",llstr(next_link,buff));
250 if (next_link >= share->state.state.data_file_length)
251 goto wrong;
252 if (mysql_file_pread(info->dfile.file, (uchar*) buff, delete_link_length,
253 next_link,MYF(MY_NABP)))
254 {
255 if (test_flag & T_VERBOSE) puts("");
256 _ma_check_print_error(param,"Can't read delete-link at filepos: %s",
257 llstr(next_link,buff));
258 DBUG_RETURN(1);
259 }
260 if (*buff != '\0')
261 {
262 if (test_flag & T_VERBOSE) puts("");
263 _ma_check_print_error(param,"Record at pos: %s is not remove-marked",
264 llstr(next_link,buff));
265 goto wrong;
266 }
267 if (share->options & HA_OPTION_PACK_RECORD)
268 {
269 my_off_t prev_link=mi_sizekorr(buff+12);
270 if (empty && prev_link != old_link)
271 {
272 if (test_flag & T_VERBOSE) puts("");
273 _ma_check_print_error(param,
274 "Deleted block at %s doesn't point back at previous delete link",
275 llstr(next_link,buff2));
276 goto wrong;
277 }
278 old_link=next_link;
279 next_link=mi_sizekorr(buff+4);
280 empty+=mi_uint3korr(buff+1);
281 }
282 else
283 {
284 param->record_checksum+=(ha_checksum) next_link;
285 next_link= _ma_rec_pos(share, (uchar *) buff + 1);
286 empty+=share->base.pack_reclength;
287 }
288 }
289 if (share->state.state.del && (test_flag & T_VERBOSE))
290 puts("\n");
291 if (empty != share->state.state.empty)
292 {
293 _ma_check_print_warning(param,
294 "Found %s deleted space in delete link chain. Should be %s",
295 llstr(empty,buff2),
296 llstr(share->state.state.empty,buff));
297 }
298 if (next_link != HA_OFFSET_ERROR)
299 {
300 _ma_check_print_error(param,
301 "Found more than the expected %s deleted rows in delete link chain",
302 llstr(share->state.state.del, buff));
303 goto wrong;
304 }
305 if (i != 0)
306 {
307 _ma_check_print_error(param,
308 "Found %s deleted rows in delete link chain. Should be %s",
309 llstr(share->state.state.del - i, buff2),
310 llstr(share->state.state.del, buff));
311 goto wrong;
312 }
313 }
314 DBUG_RETURN(0);
315
316 wrong:
317 param->testflag|=T_RETRY_WITHOUT_QUICK;
318 if (test_flag & T_VERBOSE)
319 puts("");
320 _ma_check_print_error(param,"record delete-link-chain corrupted");
321 DBUG_RETURN(1);
322 } /* maria_chk_del */
323
324
325 /* Check delete links in index file */
326
check_k_link(HA_CHECK * param,register MARIA_HA * info,my_off_t next_link)327 static int check_k_link(HA_CHECK *param, register MARIA_HA *info,
328 my_off_t next_link)
329 {
330 MARIA_SHARE *share= info->s;
331 uint block_size= share->block_size;
332 ha_rows records;
333 char llbuff[21], llbuff2[21];
334 uchar *buff;
335 DBUG_ENTER("check_k_link");
336
337 if (next_link == HA_OFFSET_ERROR)
338 DBUG_RETURN(0); /* Avoid printing empty line */
339
340 records= (ha_rows) (share->state.state.key_file_length / block_size);
341 while (next_link != HA_OFFSET_ERROR && records > 0)
342 {
343 if (_ma_killed_ptr(param))
344 DBUG_RETURN(1);
345 if (param->testflag & T_VERBOSE)
346 printf("%16s",llstr(next_link,llbuff));
347
348 /* Key blocks must lay within the key file length entirely. */
349 if (next_link + block_size > share->state.state.key_file_length)
350 {
351 /* purecov: begin tested */
352 _ma_check_print_error(param, "Invalid key block position: %s "
353 "key block size: %u file_length: %s",
354 llstr(next_link, llbuff), block_size,
355 llstr(share->state.state.key_file_length, llbuff2));
356 DBUG_RETURN(1);
357 /* purecov: end */
358 }
359
360 /* Key blocks must be aligned at block_size */
361 if (next_link & (block_size -1))
362 {
363 /* purecov: begin tested */
364 _ma_check_print_error(param, "Mis-aligned key block: %s "
365 "minimum key block length: %u",
366 llstr(next_link, llbuff),
367 block_size);
368 DBUG_RETURN(1);
369 /* purecov: end */
370 }
371
372 DBUG_ASSERT(share->pagecache->block_size == block_size);
373 if (!(buff= pagecache_read(share->pagecache,
374 &share->kfile,
375 (pgcache_page_no_t) (next_link / block_size),
376 DFLT_INIT_HITS,
377 info->buff, PAGECACHE_READ_UNKNOWN_PAGE,
378 PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
379 {
380 /* purecov: begin tested */
381 _ma_check_print_error(param, "key cache read error for block: %s",
382 llstr(next_link,llbuff));
383 DBUG_RETURN(1);
384 /* purecov: end */
385 }
386 if (_ma_get_keynr(info->s, buff) != MARIA_DELETE_KEY_NR)
387 _ma_check_print_error(param, "Page at %s is not delete marked",
388 llstr(next_link, llbuff));
389
390 next_link= mi_sizekorr(buff + share->keypage_header);
391 records--;
392 param->key_file_blocks+=block_size;
393 }
394 if (param->testflag & T_VERBOSE)
395 {
396 if (next_link != HA_OFFSET_ERROR)
397 printf("%16s\n",llstr(next_link,llbuff));
398 else
399 puts("");
400 }
401 DBUG_RETURN (next_link != HA_OFFSET_ERROR);
402 } /* check_k_link */
403
404
405 /* Check sizes of files */
406
maria_chk_size(HA_CHECK * param,register MARIA_HA * info)407 int maria_chk_size(HA_CHECK *param, register MARIA_HA *info)
408 {
409 MARIA_SHARE *share= info->s;
410 int error;
411 register my_off_t skr,size;
412 char buff[22],buff2[22];
413 DBUG_ENTER("maria_chk_size");
414
415 if (!(param->testflag & T_SILENT))
416 puts("- check file-size");
417
418 /*
419 The following is needed if called externally (not from maria_chk).
420 To get a correct physical size we need to flush them.
421 */
422 if ((error= _ma_flush_table_files(info,
423 MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
424 FLUSH_FORCE_WRITE, FLUSH_FORCE_WRITE)))
425 _ma_check_print_error(param, "Failed to flush data or index file");
426
427 size= mysql_file_seek(share->kfile.file, 0L, MY_SEEK_END, MYF(MY_THREADSAFE));
428 if ((skr=(my_off_t) share->state.state.key_file_length) != size)
429 {
430 /* Don't give error if file generated by maria_pack */
431 if (skr > size && maria_is_any_key_active(share->state.key_map))
432 {
433 error=1;
434 _ma_check_print_error(param,
435 "Size of indexfile is: %-8s Expected: %s",
436 llstr(size,buff), llstr(skr,buff2));
437 share->state.state.key_file_length= size;
438 }
439 else if (!(param->testflag & T_VERY_SILENT))
440 _ma_check_print_warning(param,
441 "Size of indexfile is: %-8s Expected: %s",
442 llstr(size,buff), llstr(skr,buff2));
443 }
444 if (size > share->base.max_key_file_length)
445 {
446 _ma_check_print_warning(param,
447 "Size of indexfile is: %-8s which is bigger than max indexfile size: %s",
448 ullstr(size,buff),
449 ullstr(share->base.max_key_file_length, buff2));
450 }
451 else if (!(param->testflag & T_VERY_SILENT) &&
452 ! (share->options & HA_OPTION_COMPRESS_RECORD) &&
453 ulonglong2double(share->state.state.key_file_length) >
454 ulonglong2double(share->base.margin_key_file_length)*0.9)
455 _ma_check_print_warning(param,"Keyfile is almost full, %10s of %10s used",
456 llstr(share->state.state.key_file_length,buff),
457 llstr(share->base.max_key_file_length,buff));
458
459 size= mysql_file_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
460 skr=(my_off_t) share->state.state.data_file_length;
461 if (share->options & HA_OPTION_COMPRESS_RECORD)
462 skr+= MEMMAP_EXTRA_MARGIN;
463 #ifdef USE_RELOC
464 if (share->data_file_type == STATIC_RECORD &&
465 skr < (my_off_t) share->base.reloc*share->base.min_pack_length)
466 skr=(my_off_t) share->base.reloc*share->base.min_pack_length;
467 #endif
468 if (skr != size)
469 {
470 share->state.state.data_file_length=size; /* Skip other errors */
471 if (skr > size && skr != size + MEMMAP_EXTRA_MARGIN)
472 {
473 error=1;
474 _ma_check_print_error(param,"Size of datafile is: %-9s Expected: %s",
475 llstr(size,buff), llstr(skr,buff2));
476 param->testflag|=T_RETRY_WITHOUT_QUICK;
477 }
478 else
479 {
480 _ma_check_print_warning(param,
481 "Size of datafile is: %-9s Expected: %s",
482 llstr(size,buff), llstr(skr,buff2));
483 }
484 }
485 if (size > share->base.max_data_file_length)
486 {
487 _ma_check_print_warning(param,
488 "Size of datafile is: %-8s which is bigger than max datafile size: %s",
489 ullstr(size,buff),
490 ullstr(share->base.max_data_file_length, buff2));
491 } else if (!(param->testflag & T_VERY_SILENT) &&
492 !(share->options & HA_OPTION_COMPRESS_RECORD) &&
493 ulonglong2double(share->state.state.data_file_length) >
494 (ulonglong2double(share->base.max_data_file_length)*0.9))
495 _ma_check_print_warning(param, "Datafile is almost full, %10s of %10s used",
496 llstr(share->state.state.data_file_length,buff),
497 llstr(share->base.max_data_file_length,buff2));
498 DBUG_RETURN(error);
499 } /* maria_chk_size */
500
501
502 /* Check keys */
503
maria_chk_key(HA_CHECK * param,register MARIA_HA * info)504 int maria_chk_key(HA_CHECK *param, register MARIA_HA *info)
505 {
506 uint key,found_keys=0,full_text_keys=0,result=0;
507 ha_rows keys;
508 ha_checksum old_record_checksum,init_checksum;
509 my_off_t all_keydata,all_totaldata,key_totlength,length;
510 double *rec_per_key_part;
511 MARIA_SHARE *share= info->s;
512 MARIA_KEYDEF *keyinfo;
513 char buff[22],buff2[22];
514 MARIA_PAGE page;
515 DBUG_ENTER("maria_chk_key");
516
517 if (!(param->testflag & T_SILENT))
518 puts("- check key delete-chain");
519
520 param->key_file_blocks=share->base.keystart;
521 if (check_k_link(param, info, share->state.key_del))
522 {
523 if (param->testflag & T_VERBOSE) puts("");
524 _ma_check_print_error(param,"key delete-link-chain corrupted");
525 DBUG_RETURN(-1);
526 }
527
528 if (!(param->testflag & T_SILENT))
529 puts("- check index reference");
530
531 all_keydata=all_totaldata=key_totlength=0;
532 init_checksum=param->record_checksum;
533 old_record_checksum=0;
534 if (share->data_file_type == STATIC_RECORD)
535 old_record_checksum= (calc_checksum(share->state.state.records +
536 share->state.state.del-1) *
537 share->base.pack_reclength);
538 rec_per_key_part= param->new_rec_per_key_part;
539 for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
540 rec_per_key_part+=keyinfo->keysegs, key++, keyinfo++)
541 {
542 param->key_crc[key]=0;
543 if (! maria_is_key_active(share->state.key_map, key))
544 {
545 /* Remember old statistics for key */
546 memcpy((char*) rec_per_key_part,
547 (char*) (share->state.rec_per_key_part +
548 (uint) (rec_per_key_part - param->new_rec_per_key_part)),
549 keyinfo->keysegs*sizeof(*rec_per_key_part));
550 continue;
551 }
552 found_keys++;
553 _ma_report_progress(param, key, share->base.keys);
554
555 param->record_checksum=init_checksum;
556
557 bzero((char*) ¶m->unique_count,sizeof(param->unique_count));
558 bzero((char*) ¶m->notnull_count,sizeof(param->notnull_count));
559
560 if ((!(param->testflag & T_SILENT)))
561 printf ("- check data record references index: %d\n",key+1);
562 if (keyinfo->flag & (HA_FULLTEXT | HA_SPATIAL))
563 full_text_keys++;
564 if (share->state.key_root[key] == HA_OFFSET_ERROR)
565 {
566 if (share->state.state.records != 0 && !(keyinfo->flag & HA_FULLTEXT))
567 _ma_check_print_error(param, "Key tree %u is empty", key + 1);
568 goto do_stat;
569 }
570 if (_ma_fetch_keypage(&page, info, keyinfo, share->state.key_root[key],
571 PAGECACHE_LOCK_LEFT_UNLOCKED, DFLT_INIT_HITS,
572 info->buff, 0))
573 {
574 report_keypage_fault(param, info, share->state.key_root[key]);
575 if (!(param->testflag & T_INFO))
576 DBUG_RETURN(-1);
577 result= -1;
578 continue;
579 }
580 param->key_file_blocks+=keyinfo->block_length;
581 keys=0;
582 param->keydata=param->totaldata=0;
583 param->key_blocks=0;
584 param->max_level=0;
585 if (chk_index(param, info,keyinfo, &page, &keys, param->key_crc+key,1))
586 DBUG_RETURN(-1);
587 if (!(keyinfo->flag & (HA_FULLTEXT | HA_SPATIAL | HA_RTREE_INDEX)))
588 {
589 if (keys != share->state.state.records)
590 {
591 _ma_check_print_error(param,"Found %s keys of %s",llstr(keys,buff),
592 llstr(share->state.state.records,buff2));
593 if (!(param->testflag & T_INFO))
594 DBUG_RETURN(-1);
595 result= -1;
596 continue;
597 }
598 if ((found_keys - full_text_keys == 1 &&
599 !(share->data_file_type == STATIC_RECORD)) ||
600 (param->testflag & T_DONT_CHECK_CHECKSUM))
601 old_record_checksum= param->record_checksum;
602 else if (old_record_checksum != param->record_checksum)
603 {
604 if (key)
605 _ma_check_print_error(param,
606 "Key %u doesn't point at same records as "
607 "key 1",
608 key+1);
609 else
610 _ma_check_print_error(param,"Key 1 doesn't point at all records");
611 if (!(param->testflag & T_INFO))
612 DBUG_RETURN(-1);
613 result= -1;
614 continue;
615 }
616 }
617 if ((uint) share->base.auto_key -1 == key)
618 {
619 /* Check that auto_increment key is bigger than max key value */
620 ulonglong auto_increment;
621 const HA_KEYSEG *keyseg= share->keyinfo[share->base.auto_key-1].seg;
622 info->lastinx=key;
623 _ma_read_key_record(info, info->rec_buff, 0);
624 auto_increment=
625 ma_retrieve_auto_increment(info->rec_buff + keyseg->start,
626 keyseg->type);
627 if (auto_increment > share->state.auto_increment)
628 {
629 _ma_check_print_warning(param, "Auto-increment value: %s is smaller "
630 "than max used value: %s",
631 llstr(share->state.auto_increment,buff2),
632 llstr(auto_increment, buff));
633 }
634 if (param->testflag & T_AUTO_INC)
635 {
636 set_if_bigger(share->state.auto_increment,
637 auto_increment);
638 set_if_bigger(share->state.auto_increment,
639 param->auto_increment_value);
640 }
641
642 /* Check that there isn't a row with auto_increment = 0 in the table */
643 maria_extra(info,HA_EXTRA_KEYREAD,0);
644 bzero(info->lastkey_buff, keyinfo->seg->length);
645 if (!maria_rkey(info, info->rec_buff, key,
646 info->lastkey_buff,
647 (key_part_map) 1, HA_READ_KEY_EXACT))
648 {
649 /* Don't count this as a real warning, as maria_chk can't correct it */
650 my_bool save=param->warning_printed;
651 _ma_check_print_warning(param, "Found row where the auto_increment "
652 "column has the value 0");
653 param->warning_printed=save;
654 }
655 maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
656 }
657
658 length=(my_off_t) isam_key_length(info,keyinfo)*keys + param->key_blocks*2;
659 if (param->testflag & T_INFO && param->totaldata != 0L && keys != 0L)
660 printf("Key: %2d: Keyblocks used: %3d%% Packed: %4d%% Max levels: %2d\n",
661 key+1,
662 (int) (my_off_t2double(param->keydata)*100.0/my_off_t2double(param->totaldata)),
663 (int) ((my_off_t2double(length) - my_off_t2double(param->keydata))*100.0/
664 my_off_t2double(length)),
665 param->max_level);
666 all_keydata+=param->keydata; all_totaldata+=param->totaldata; key_totlength+=length;
667
668 do_stat:
669 if (param->testflag & T_STATISTICS)
670 maria_update_key_parts(keyinfo, rec_per_key_part, param->unique_count,
671 param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
672 param->notnull_count: NULL,
673 (ulonglong)share->state.state.records);
674 }
675 if (param->testflag & T_INFO)
676 {
677 if (all_totaldata != 0L && found_keys > 0)
678 printf("Total: Keyblocks used: %3d%% Packed: %4d%%\n\n",
679 (int) (my_off_t2double(all_keydata)*100.0/
680 my_off_t2double(all_totaldata)),
681 (int) ((my_off_t2double(key_totlength) -
682 my_off_t2double(all_keydata))*100.0/
683 my_off_t2double(key_totlength)));
684 else if (all_totaldata != 0L && maria_is_any_key_active(share->state.key_map))
685 puts("");
686 }
687 if (param->key_file_blocks != share->state.state.key_file_length &&
688 share->state.key_map == ~(ulonglong) 0)
689 _ma_check_print_warning(param, "Some data are unreferenced in keyfile");
690 if (found_keys != full_text_keys)
691 param->record_checksum=old_record_checksum-init_checksum; /* Remove delete links */
692 else
693 param->record_checksum=0;
694 DBUG_RETURN(result);
695 } /* maria_chk_key */
696
697
698
chk_index_down(HA_CHECK * param,MARIA_HA * info,MARIA_KEYDEF * keyinfo,my_off_t page,uchar * buff,ha_rows * keys,ha_checksum * key_checksum,uint level)699 static int chk_index_down(HA_CHECK *param, MARIA_HA *info,
700 MARIA_KEYDEF *keyinfo,
701 my_off_t page, uchar *buff, ha_rows *keys,
702 ha_checksum *key_checksum, uint level)
703 {
704 char llbuff[22],llbuff2[22];
705 MARIA_SHARE *share= info->s;
706 MARIA_PAGE ma_page;
707 DBUG_ENTER("chk_index_down");
708
709 /* Key blocks must lay within the key file length entirely. */
710 if (page + keyinfo->block_length > share->state.state.key_file_length)
711 {
712 /* purecov: begin tested */
713 /* Give it a chance to fit in the real file size. */
714 my_off_t max_length= mysql_file_seek(info->s->kfile.file, 0L, MY_SEEK_END,
715 MYF(MY_THREADSAFE));
716 _ma_check_print_error(param, "Invalid key block position: %s "
717 "key block size: %u file_length: %s",
718 llstr(page, llbuff), keyinfo->block_length,
719 llstr(share->state.state.key_file_length, llbuff2));
720 if (page + keyinfo->block_length > max_length)
721 goto err;
722 /* Fix the remembered key file length. */
723 share->state.state.key_file_length= (max_length &
724 ~ (my_off_t) (keyinfo->block_length -
725 1));
726 /* purecov: end */
727 }
728
729 /* Key blocks must be aligned at block length */
730 if (page & (info->s->block_size -1))
731 {
732 /* purecov: begin tested */
733 _ma_check_print_error(param, "Mis-aligned key block: %s "
734 "key block length: %u",
735 llstr(page, llbuff), info->s->block_size);
736 goto err;
737 /* purecov: end */
738 }
739
740 if (_ma_fetch_keypage(&ma_page, info, keyinfo, page,
741 PAGECACHE_LOCK_LEFT_UNLOCKED,
742 DFLT_INIT_HITS, buff, 0))
743 {
744 report_keypage_fault(param, info, page);
745 goto err;
746 }
747 param->key_file_blocks+=keyinfo->block_length;
748 if (chk_index(param, info, keyinfo, &ma_page, keys, key_checksum,level))
749 goto err;
750
751 DBUG_RETURN(0);
752
753 /* purecov: begin tested */
754 err:
755 DBUG_RETURN(1);
756 /* purecov: end */
757 }
758
759
760 /*
761 "Ignore NULLs" statistics collection method: process first index tuple.
762
763 SYNOPSIS
764 maria_collect_stats_nonulls_first()
765 keyseg IN Array of key part descriptions
766 notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i}
767 tuples that don't contain NULLs)
768 key IN Key values tuple
769
770 DESCRIPTION
771 Process the first index tuple - find out which prefix tuples don't
772 contain NULLs, and update the array of notnull counters accordingly.
773 */
774
775 static
maria_collect_stats_nonulls_first(HA_KEYSEG * keyseg,ulonglong * notnull,const uchar * key)776 void maria_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull,
777 const uchar *key)
778 {
779 size_t first_null, kp;
780 first_null= ha_find_null(keyseg, key) - keyseg;
781 /*
782 All prefix tuples that don't include keypart_{first_null} are not-null
783 tuples (and all others aren't), increment counters for them.
784 */
785 for (kp= 0; kp < first_null; kp++)
786 notnull[kp]++;
787 }
788
789
790 /*
791 "Ignore NULLs" statistics collection method: process next index tuple.
792
793 SYNOPSIS
794 maria_collect_stats_nonulls_next()
795 keyseg IN Array of key part descriptions
796 notnull INOUT Array, notnull[i] = (number of {keypart1...keypart_i}
797 tuples that don't contain NULLs)
798 prev_key IN Previous key values tuple
799 last_key IN Next key values tuple
800
801 DESCRIPTION
802 Process the next index tuple:
803 1. Find out which prefix tuples of last_key don't contain NULLs, and
804 update the array of notnull counters accordingly.
805 2. Find the first keypart number where the prev_key and last_key tuples
806 are different(A), or last_key has NULL value(B), and return it, so the
807 caller can count number of unique tuples for each key prefix. We don't
808 need (B) to be counted, and that is compensated back in
809 maria_update_key_parts().
810
811 RETURN
812 1 + number of first keypart where values differ or last_key tuple has NULL
813 */
814
815 static
maria_collect_stats_nonulls_next(HA_KEYSEG * keyseg,ulonglong * notnull,const uchar * prev_key,const uchar * last_key)816 int maria_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull,
817 const uchar *prev_key,
818 const uchar *last_key)
819 {
820 uint diffs[2];
821 size_t first_null_seg, kp;
822 HA_KEYSEG *seg;
823
824 /*
825 Find the first keypart where values are different or either of them is
826 NULL. We get results in diffs array:
827 diffs[0]= 1 + number of first different keypart
828 diffs[1]=offset: (last_key + diffs[1]) points to first value in
829 last_key that is NULL or different from corresponding
830 value in prev_key.
831 */
832 ha_key_cmp(keyseg, prev_key, last_key, USE_WHOLE_KEY,
833 SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diffs);
834 seg= keyseg + diffs[0] - 1;
835
836 /* Find first NULL in last_key */
837 first_null_seg= ha_find_null(seg, last_key + diffs[1]) - keyseg;
838 for (kp= 0; kp < first_null_seg; kp++)
839 notnull[kp]++;
840
841 /*
842 Return 1+ number of first key part where values differ. Don't care if
843 these were NULLs and not .... We compensate for that in
844 maria_update_key_parts.
845 */
846 return diffs[0];
847 }
848
849
850 /* Check if index is ok */
851
chk_index(HA_CHECK * param,MARIA_HA * info,MARIA_KEYDEF * keyinfo,MARIA_PAGE * anc_page,ha_rows * keys,ha_checksum * key_checksum,uint level)852 static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo,
853 MARIA_PAGE *anc_page, ha_rows *keys,
854 ha_checksum *key_checksum, uint level)
855 {
856 int flag;
857 uint comp_flag, page_flag, nod_flag;
858 uchar *temp_buff, *keypos, *old_keypos, *endpos;
859 my_off_t next_page,record;
860 MARIA_SHARE *share= info->s;
861 char llbuff[22];
862 uint diff_pos[2];
863 uchar tmp_key_buff[MARIA_MAX_KEY_BUFF];
864 MARIA_KEY tmp_key;
865 DBUG_ENTER("chk_index");
866 DBUG_DUMP("buff", anc_page->buff, anc_page->size);
867
868 /* TODO: implement appropriate check for RTree keys */
869 if (keyinfo->flag & (HA_SPATIAL | HA_RTREE_INDEX))
870 DBUG_RETURN(0);
871
872 if (!(temp_buff=(uchar*) my_alloca((uint) keyinfo->block_length)))
873 {
874 _ma_check_print_error(param,"Not enough memory for keyblock");
875 DBUG_RETURN(-1);
876 }
877
878 if (keyinfo->flag & HA_NOSAME)
879 {
880 /* Not real duplicates */
881 comp_flag=SEARCH_FIND | SEARCH_UPDATE | SEARCH_INSERT;
882 }
883 else
884 comp_flag=SEARCH_SAME; /* Keys in positionorder */
885
886 page_flag= anc_page->flag;
887 nod_flag= anc_page->node;
888 old_keypos= anc_page->buff + share->keypage_header;
889 keypos= old_keypos + nod_flag;
890 endpos= anc_page->buff + anc_page->size;
891
892 param->keydata+= anc_page->size;
893 param->totaldata+= keyinfo->block_length; /* INFO */
894 param->key_blocks++;
895 if (level > param->max_level)
896 param->max_level=level;
897
898 if (_ma_get_keynr(share, anc_page->buff) != keyinfo->key_nr)
899 _ma_check_print_error(param, "Page at %s is not marked for index %u",
900 llstr(anc_page->pos, llbuff),
901 (uint) keyinfo->key_nr);
902 if ((page_flag & KEYPAGE_FLAG_HAS_TRANSID) &&
903 !share->base.born_transactional)
904 {
905 _ma_check_print_error(param,
906 "Page at %s is marked with HAS_TRANSID even if "
907 "table is not transactional",
908 llstr(anc_page->pos, llbuff));
909 }
910
911 if (anc_page->size > share->max_index_block_size)
912 {
913 _ma_check_print_error(param,
914 "Page at %s has impossible (too big) pagelength",
915 llstr(anc_page->pos, llbuff));
916 goto err;
917 }
918
919 info->last_key.keyinfo= tmp_key.keyinfo= keyinfo;
920 info->lastinx= ~0; /* Safety */
921 tmp_key.data= tmp_key_buff;
922 for ( ;; _ma_copy_key(&info->last_key, &tmp_key))
923 {
924 if (nod_flag)
925 {
926 if (_ma_killed_ptr(param))
927 goto err;
928 next_page= _ma_kpos(nod_flag,keypos);
929 if (chk_index_down(param,info,keyinfo,next_page,
930 temp_buff,keys,key_checksum,level+1))
931 {
932 DBUG_DUMP("page_data", old_keypos, (uint) (keypos - old_keypos));
933 goto err;
934 }
935 }
936 old_keypos=keypos;
937 if (keypos >= endpos ||
938 !(*keyinfo->get_key)(&tmp_key, page_flag, nod_flag, &keypos))
939 break;
940 if (keypos > endpos)
941 {
942 _ma_check_print_error(param,
943 "Page length and length of keys don't match at "
944 "page: %s",
945 llstr(anc_page->pos,llbuff));
946 goto err;
947 }
948 if (share->data_file_type == BLOCK_RECORD &&
949 !(page_flag & KEYPAGE_FLAG_HAS_TRANSID) &&
950 key_has_transid(tmp_key.data + tmp_key.data_length +
951 share->rec_reflength-1))
952 {
953 _ma_check_print_error(param,
954 "Found key marked for transid on page that is not "
955 "marked for transid at: %s",
956 llstr(anc_page->pos,llbuff));
957 goto err;
958 }
959
960 if ((*keys)++ &&
961 (flag=ha_key_cmp(keyinfo->seg, info->last_key.data, tmp_key.data,
962 tmp_key.data_length + tmp_key.ref_length,
963 (comp_flag | SEARCH_INSERT | (tmp_key.flag >> 1) |
964 info->last_key.flag), diff_pos)) >=0)
965 {
966 DBUG_DUMP_KEY("old", &info->last_key);
967 DBUG_DUMP_KEY("new", &tmp_key);
968 DBUG_DUMP("new_in_page", old_keypos, (uint) (keypos-old_keypos));
969
970 if ((comp_flag & SEARCH_FIND) && flag == 0)
971 _ma_check_print_error(param,"Found duplicated key at page %s",
972 llstr(anc_page->pos,llbuff));
973 else
974 _ma_check_print_error(param,"Key in wrong position at page %s",
975 llstr(anc_page->pos,llbuff));
976 goto err;
977 }
978
979 if (param->testflag & T_STATISTICS)
980 {
981 if (*keys != 1L) /* not first_key */
982 {
983 if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
984 ha_key_cmp(keyinfo->seg, info->last_key.data,
985 tmp_key.data, tmp_key.data_length,
986 SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
987 diff_pos);
988 else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
989 {
990 diff_pos[0]= maria_collect_stats_nonulls_next(keyinfo->seg,
991 param->notnull_count,
992 info->last_key.data,
993 tmp_key.data);
994 }
995 param->unique_count[diff_pos[0]-1]++;
996 }
997 else
998 {
999 if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
1000 maria_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count,
1001 tmp_key.data);
1002 }
1003 }
1004 (*key_checksum)+= maria_byte_checksum(tmp_key.data, tmp_key.data_length);
1005 record= _ma_row_pos_from_key(&tmp_key);
1006
1007 if (keyinfo->flag & HA_FULLTEXT) /* special handling for ft2 */
1008 {
1009 uint off;
1010 int subkeys;
1011 get_key_full_length_rdonly(off, tmp_key.data);
1012 subkeys= ft_sintXkorr(tmp_key.data + off);
1013 if (subkeys < 0)
1014 {
1015 ha_rows tmp_keys=0;
1016 share->ft2_keyinfo.key_nr= keyinfo->key_nr;
1017 if (chk_index_down(param,info,&share->ft2_keyinfo,record,
1018 temp_buff,&tmp_keys,key_checksum,1))
1019 goto err;
1020 if (tmp_keys + subkeys)
1021 {
1022 _ma_check_print_error(param,
1023 "Number of words in the 2nd level tree "
1024 "does not match the number in the header. "
1025 "Parent word in on the page %s, offset %u",
1026 llstr(anc_page->pos,llbuff),
1027 (uint) (old_keypos - anc_page->buff));
1028 goto err;
1029 }
1030 (*keys)+=tmp_keys-1;
1031 continue;
1032 }
1033 /* fall through */
1034 }
1035 if ((share->data_file_type != BLOCK_RECORD &&
1036 share->data_file_type != NO_RECORD &&
1037 record >= share->state.state.data_file_length) ||
1038 (share->data_file_type == BLOCK_RECORD &&
1039 ma_recordpos_to_page(record) * share->base.min_block_length >=
1040 share->state.state.data_file_length) ||
1041 (share->data_file_type == NO_RECORD && record != 0))
1042 {
1043 #ifndef DBUG_OFF
1044 char llbuff2[22], llbuff3[22];
1045 #endif
1046 _ma_check_print_error(param,
1047 "Found key at page %s that points to record "
1048 "outside datafile",
1049 llstr(anc_page->pos,llbuff));
1050 DBUG_PRINT("test",("page: %s record: %s filelength: %s",
1051 llstr(anc_page->pos,llbuff),llstr(record,llbuff2),
1052 llstr(share->state.state.data_file_length,llbuff3)));
1053 DBUG_DUMP_KEY("key", &tmp_key);
1054 DBUG_DUMP("new_in_page", old_keypos, (uint) (keypos-old_keypos));
1055 goto err;
1056 }
1057 param->record_checksum+= (ha_checksum) record;
1058 }
1059 if (keypos != endpos)
1060 {
1061 _ma_check_print_error(param,
1062 "Keyblock size at page %s is not correct. "
1063 "Block length: %u key length: %u",
1064 llstr(anc_page->pos, llbuff), anc_page->size,
1065 (uint) (keypos - anc_page->buff));
1066 goto err;
1067 }
1068 my_afree(temp_buff);
1069 DBUG_RETURN(0);
1070 err:
1071 my_afree(temp_buff);
1072 DBUG_RETURN(1);
1073 } /* chk_index */
1074
1075
1076 /* Calculate a checksum of 1+2+3+4...N = N*(N+1)/2 without overflow */
1077
calc_checksum(ha_rows count)1078 static ha_checksum calc_checksum(ha_rows count)
1079 {
1080 ulonglong sum,a,b;
1081 DBUG_ENTER("calc_checksum");
1082
1083 sum=0;
1084 a=count; b=count+1;
1085 if (a & 1)
1086 b>>=1;
1087 else
1088 a>>=1;
1089 while (b)
1090 {
1091 if (b & 1)
1092 sum+=a;
1093 a<<=1; b>>=1;
1094 }
1095 DBUG_PRINT("exit",("sum: %lx",(ulong) sum));
1096 DBUG_RETURN((ha_checksum) sum);
1097 } /* calc_checksum */
1098
1099
1100 /* Calc length of key in normal isam */
1101
isam_key_length(MARIA_HA * info,register MARIA_KEYDEF * keyinfo)1102 static uint isam_key_length(MARIA_HA *info, register MARIA_KEYDEF *keyinfo)
1103 {
1104 uint length;
1105 HA_KEYSEG *keyseg;
1106 DBUG_ENTER("isam_key_length");
1107
1108 length= info->s->rec_reflength;
1109 for (keyseg=keyinfo->seg ; keyseg->type ; keyseg++)
1110 length+= keyseg->length;
1111
1112 DBUG_PRINT("exit",("length: %d",length));
1113 DBUG_RETURN(length);
1114 } /* key_length */
1115
1116
1117
record_pos_to_txt(MARIA_HA * info,my_off_t recpos,char * buff)1118 static void record_pos_to_txt(MARIA_HA *info, my_off_t recpos,
1119 char *buff)
1120 {
1121 if (info->s->data_file_type != BLOCK_RECORD)
1122 llstr(recpos, buff);
1123 else
1124 {
1125 my_off_t page= ma_recordpos_to_page(recpos);
1126 uint row= ma_recordpos_to_dir_entry(recpos);
1127 char *end= longlong10_to_str(page, buff, 10);
1128 *(end++)= ':';
1129 longlong10_to_str(row, end, 10);
1130 }
1131 }
1132
1133
1134 /*
1135 Check that keys in records exist in index tree
1136
1137 SYNOPSIS
1138 check_keys_in_record()
1139 param Check paramenter
1140 info Maria handler
1141 extend Type of check (extended or normal)
1142 start_recpos Position to row
1143 record Record buffer
1144
1145 NOTES
1146 This function also calculates record checksum & number of rows
1147 */
1148
check_keys_in_record(HA_CHECK * param,MARIA_HA * info,int extend,my_off_t start_recpos,uchar * record)1149 static int check_keys_in_record(HA_CHECK *param, MARIA_HA *info, int extend,
1150 my_off_t start_recpos, uchar *record)
1151 {
1152 MARIA_SHARE *share= info->s;
1153 MARIA_KEYDEF *keyinfo;
1154 char llbuff[22+4];
1155 uint keynr;
1156
1157 param->tmp_record_checksum+= (ha_checksum) start_recpos;
1158 param->records++;
1159 if (param->records % WRITE_COUNT == 0)
1160 {
1161 if (param->testflag & T_WRITE_LOOP)
1162 {
1163 printf("%s\r", llstr(param->records, llbuff));
1164 fflush(stdout);
1165 }
1166 _ma_report_progress(param, param->records, share->state.state.records);
1167 }
1168
1169 /* Check if keys match the record */
1170 for (keynr=0, keyinfo= share->keyinfo; keynr < share->base.keys;
1171 keynr++, keyinfo++)
1172 {
1173 if (maria_is_key_active(share->state.key_map, keynr))
1174 {
1175 MARIA_KEY key;
1176 if (!(keyinfo->flag & HA_FULLTEXT))
1177 {
1178 (*keyinfo->make_key)(info, &key, keynr, info->lastkey_buff, record,
1179 start_recpos, 0);
1180 info->last_key.keyinfo= key.keyinfo;
1181 if (extend)
1182 {
1183 /* We don't need to lock the key tree here as we don't allow
1184 concurrent threads when running maria_chk
1185 */
1186 int search_result=
1187 #ifdef HAVE_RTREE_KEYS
1188 (keyinfo->flag & (HA_SPATIAL | HA_RTREE_INDEX)) ?
1189 maria_rtree_find_first(info, &key, MBR_EQUAL | MBR_DATA) :
1190 #endif
1191 _ma_search(info, &key, SEARCH_SAME, share->state.key_root[keynr]);
1192 if (search_result)
1193 {
1194 record_pos_to_txt(info, start_recpos, llbuff);
1195 _ma_check_print_error(param,
1196 "Record at: %14s "
1197 "Can't find key for index: %2d",
1198 llbuff, keynr+1);
1199 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1200 return -1;
1201 }
1202 }
1203 else
1204 param->tmp_key_crc[keynr]+=
1205 maria_byte_checksum(key.data, key.data_length);
1206 }
1207 }
1208 }
1209 return 0;
1210 }
1211
1212
1213 /*
1214 Functions to loop through all rows and check if they are ok
1215
1216 NOTES
1217 One function for each record format
1218
1219 RESULT
1220 0 ok
1221 -1 Interrupted by user
1222 1 Error
1223 */
1224
check_static_record(HA_CHECK * param,MARIA_HA * info,int extend,uchar * record)1225 static int check_static_record(HA_CHECK *param, MARIA_HA *info, int extend,
1226 uchar *record)
1227 {
1228 MARIA_SHARE *share= info->s;
1229 my_off_t start_recpos, pos;
1230 char llbuff[22];
1231
1232 pos= 0;
1233 while (pos < share->state.state.data_file_length)
1234 {
1235 if (_ma_killed_ptr(param))
1236 return -1;
1237 if (my_b_read(¶m->read_cache, record,
1238 share->base.pack_reclength))
1239 {
1240 _ma_check_print_error(param,
1241 "got error: %d when reading datafile at position: "
1242 "%s",
1243 my_errno, llstr(pos, llbuff));
1244 return 1;
1245 }
1246 start_recpos= pos;
1247 pos+= share->base.pack_reclength;
1248 param->splits++;
1249 if (*record == '\0')
1250 {
1251 param->del_blocks++;
1252 param->del_length+= share->base.pack_reclength;
1253 continue; /* Record removed */
1254 }
1255 param->glob_crc+= _ma_static_checksum(info,record);
1256 param->used+= share->base.pack_reclength;
1257 if (check_keys_in_record(param, info, extend, start_recpos, record))
1258 return 1;
1259 }
1260 return 0;
1261 }
1262
1263
check_dynamic_record(HA_CHECK * param,MARIA_HA * info,int extend,uchar * record)1264 static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend,
1265 uchar *record)
1266 {
1267 MARIA_BLOCK_INFO block_info;
1268 MARIA_SHARE *share= info->s;
1269 my_off_t UNINIT_VAR(start_recpos), start_block, pos;
1270 uchar *UNINIT_VAR(to);
1271 ulong UNINIT_VAR(left_length);
1272 uint b_type;
1273 char llbuff[22],llbuff2[22],llbuff3[22];
1274 myf myflag= MY_WME | (share->temporary ? MY_THREAD_SPECIFIC : 0);
1275 DBUG_ENTER("check_dynamic_record");
1276
1277 pos= 0;
1278 while (pos < share->state.state.data_file_length)
1279 {
1280 my_bool got_error= 0;
1281 int flag;
1282 if (_ma_killed_ptr(param))
1283 DBUG_RETURN(-1);
1284
1285 flag= block_info.second_read=0;
1286 block_info.next_filepos=pos;
1287 do
1288 {
1289 if (_ma_read_cache(info, ¶m->read_cache, block_info.header,
1290 (start_block=block_info.next_filepos),
1291 sizeof(block_info.header),
1292 (flag ? 0 : READING_NEXT) | READING_HEADER))
1293 {
1294 _ma_check_print_error(param,
1295 "got error: %d when reading datafile at "
1296 "position: %s",
1297 my_errno, llstr(start_block, llbuff));
1298 DBUG_RETURN(1);
1299 }
1300
1301 if (start_block & (MARIA_DYN_ALIGN_SIZE-1))
1302 {
1303 _ma_check_print_error(param,"Wrong aligned block at %s",
1304 llstr(start_block,llbuff));
1305 DBUG_RETURN(1);
1306 }
1307 b_type= _ma_get_block_info(info, &block_info,-1,start_block);
1308 if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
1309 BLOCK_FATAL_ERROR))
1310 {
1311 if (b_type & BLOCK_SYNC_ERROR)
1312 {
1313 if (flag)
1314 {
1315 _ma_check_print_error(param,"Unexpected byte: %d at link: %s",
1316 (int) block_info.header[0],
1317 llstr(start_block,llbuff));
1318 DBUG_RETURN(1);
1319 }
1320 pos=block_info.filepos+block_info.block_len;
1321 goto next;
1322 }
1323 if (b_type & BLOCK_DELETED)
1324 {
1325 if (block_info.block_len < share->base.min_block_length)
1326 {
1327 _ma_check_print_error(param,
1328 "Deleted block with impossible length %lu "
1329 "at %s",
1330 block_info.block_len,llstr(pos,llbuff));
1331 DBUG_RETURN(1);
1332 }
1333 if ((block_info.next_filepos != HA_OFFSET_ERROR &&
1334 block_info.next_filepos >= share->state.state.data_file_length) ||
1335 (block_info.prev_filepos != HA_OFFSET_ERROR &&
1336 block_info.prev_filepos >= share->state.state.data_file_length))
1337 {
1338 _ma_check_print_error(param,"Delete link points outside datafile "
1339 "at %s",
1340 llstr(pos,llbuff));
1341 DBUG_RETURN(1);
1342 }
1343 param->del_blocks++;
1344 param->del_length+= block_info.block_len;
1345 param->splits++;
1346 pos= block_info.filepos+block_info.block_len;
1347 goto next;
1348 }
1349 _ma_check_print_error(param,"Wrong bytesec: %d-%d-%d at linkstart: %s",
1350 block_info.header[0],block_info.header[1],
1351 block_info.header[2],
1352 llstr(start_block,llbuff));
1353 DBUG_RETURN(1);
1354 }
1355 if (share->state.state.data_file_length < block_info.filepos+
1356 block_info.block_len)
1357 {
1358 _ma_check_print_error(param,
1359 "Recordlink that points outside datafile at %s",
1360 llstr(pos,llbuff));
1361 got_error=1;
1362 break;
1363 }
1364 param->splits++;
1365 if (!flag++) /* First block */
1366 {
1367 start_recpos=pos;
1368 pos=block_info.filepos+block_info.block_len;
1369 if (block_info.rec_len > (uint) share->base.max_pack_length)
1370 {
1371 my_errno= HA_ERR_WRONG_IN_RECORD;
1372 _ma_check_print_error(param,"Found too long record (%lu) at %s",
1373 (ulong) block_info.rec_len,
1374 llstr(start_recpos,llbuff));
1375 got_error=1;
1376 break;
1377 }
1378 if (share->base.blobs)
1379 {
1380 if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
1381 block_info.rec_len +
1382 share->base.extra_rec_buff_size, myflag))
1383
1384 {
1385 _ma_check_print_error(param,
1386 "Not enough memory (%lu) for blob at %s",
1387 (ulong) block_info.rec_len,
1388 llstr(start_recpos,llbuff));
1389 got_error=1;
1390 break;
1391 }
1392 }
1393 to= info->rec_buff;
1394 left_length= block_info.rec_len;
1395 }
1396 if (left_length < block_info.data_len)
1397 {
1398 _ma_check_print_error(param,"Found too long record (%lu) at %s",
1399 (ulong) block_info.data_len,
1400 llstr(start_recpos,llbuff));
1401 got_error=1;
1402 break;
1403 }
1404 if (_ma_read_cache(info, ¶m->read_cache, to, block_info.filepos,
1405 (uint) block_info.data_len,
1406 flag == 1 ? READING_NEXT : 0))
1407 {
1408 _ma_check_print_error(param,
1409 "got error: %d when reading datafile at "
1410 "position: %s", my_errno,
1411 llstr(block_info.filepos, llbuff));
1412
1413 DBUG_RETURN(1);
1414 }
1415 to+=block_info.data_len;
1416 param->link_used+= block_info.filepos-start_block;
1417 param->used+= block_info.filepos - start_block + block_info.data_len;
1418 param->empty+= block_info.block_len-block_info.data_len;
1419 left_length-= block_info.data_len;
1420 if (left_length)
1421 {
1422 if (b_type & BLOCK_LAST)
1423 {
1424 _ma_check_print_error(param,
1425 "Wrong record length %s of %s at %s",
1426 llstr(block_info.rec_len-left_length,llbuff),
1427 llstr(block_info.rec_len, llbuff2),
1428 llstr(start_recpos,llbuff3));
1429 got_error=1;
1430 break;
1431 }
1432 if (share->state.state.data_file_length < block_info.next_filepos)
1433 {
1434 _ma_check_print_error(param,
1435 "Found next-recordlink that points outside "
1436 "datafile at %s",
1437 llstr(block_info.filepos,llbuff));
1438 got_error=1;
1439 break;
1440 }
1441 }
1442 } while (left_length);
1443
1444 if (! got_error)
1445 {
1446 if (_ma_rec_unpack(info,record,info->rec_buff,block_info.rec_len) ==
1447 MY_FILE_ERROR)
1448 {
1449 _ma_check_print_error(param,"Found wrong record at %s",
1450 llstr(start_recpos,llbuff));
1451 got_error=1;
1452 }
1453 else
1454 {
1455 ha_checksum checksum= 0;
1456 if (share->calc_checksum)
1457 checksum= (*share->calc_checksum)(info, record);
1458
1459 if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE))
1460 {
1461 if (_ma_rec_check(info,record, info->rec_buff,block_info.rec_len,
1462 MY_TEST(share->calc_checksum), checksum))
1463 {
1464 _ma_check_print_error(param,"Found wrong packed record at %s",
1465 llstr(start_recpos,llbuff));
1466 got_error= 1;
1467 }
1468 }
1469 param->glob_crc+= checksum;
1470 }
1471
1472 if (! got_error)
1473 {
1474 if (check_keys_in_record(param, info, extend, start_recpos, record))
1475 DBUG_RETURN(1);
1476 }
1477 else
1478 {
1479 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1480 DBUG_RETURN(1);
1481 }
1482 }
1483 else if (!flag)
1484 pos= block_info.filepos+block_info.block_len;
1485 next:;
1486 }
1487 DBUG_RETURN(0);
1488 }
1489
1490
check_compressed_record(HA_CHECK * param,MARIA_HA * info,int extend,uchar * record)1491 static int check_compressed_record(HA_CHECK *param, MARIA_HA *info, int extend,
1492 uchar *record)
1493 {
1494 MARIA_BLOCK_INFO block_info;
1495 MARIA_SHARE *share= info->s;
1496 my_off_t start_recpos, pos;
1497 char llbuff[22];
1498 my_bool got_error= 0;
1499 DBUG_ENTER("check_compressed_record");
1500
1501 pos= share->pack.header_length; /* Skip header */
1502 while (pos < share->state.state.data_file_length)
1503 {
1504 if (_ma_killed_ptr(param))
1505 DBUG_RETURN(-1);
1506
1507 if (_ma_read_cache(info, ¶m->read_cache, block_info.header, pos,
1508 share->pack.ref_length, READING_NEXT))
1509 {
1510 _ma_check_print_error(param,
1511 "got error: %d when reading datafile at position: "
1512 "%s",
1513 my_errno, llstr(pos, llbuff));
1514 DBUG_RETURN(1);
1515 }
1516
1517 start_recpos= pos;
1518 param->splits++;
1519 _ma_pack_get_block_info(info, &info->bit_buff, &block_info,
1520 &info->rec_buff, &info->rec_buff_size, -1,
1521 start_recpos);
1522 pos=block_info.filepos+block_info.rec_len;
1523 if (block_info.rec_len < (uint) share->min_pack_length ||
1524 block_info.rec_len > (uint) share->max_pack_length)
1525 {
1526 _ma_check_print_error(param,
1527 "Found block with wrong recordlength: %lu at %s",
1528 block_info.rec_len, llstr(start_recpos,llbuff));
1529 got_error=1;
1530 goto end;
1531 }
1532 if (_ma_read_cache(info, ¶m->read_cache, info->rec_buff,
1533 block_info.filepos, block_info.rec_len, READING_NEXT))
1534 {
1535 _ma_check_print_error(param,
1536 "got error: %d when reading datafile at position: "
1537 "%s",
1538 my_errno, llstr(block_info.filepos, llbuff));
1539 DBUG_RETURN(1);
1540 }
1541 if (_ma_pack_rec_unpack(info, &info->bit_buff, record,
1542 info->rec_buff, block_info.rec_len))
1543 {
1544 _ma_check_print_error(param,"Found wrong record at %s",
1545 llstr(start_recpos,llbuff));
1546 got_error=1;
1547 goto end;
1548 }
1549 param->glob_crc+= (*share->calc_checksum)(info,record);
1550 param->link_used+= (block_info.filepos - start_recpos);
1551 param->used+= (pos-start_recpos);
1552
1553 end:
1554 if (! got_error)
1555 {
1556 if (check_keys_in_record(param, info, extend, start_recpos, record))
1557 DBUG_RETURN(1);
1558 }
1559 else
1560 {
1561 got_error= 0; /* Reset for next loop */
1562 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1563 DBUG_RETURN(1);
1564 }
1565 }
1566 DBUG_RETURN(0);
1567 }
1568
1569
1570 /*
1571 Check if layout on head or tail page is ok
1572
1573 NOTES
1574 This is for rows-in-block format.
1575 */
1576
check_page_layout(HA_CHECK * param,MARIA_HA * info,my_off_t page_pos,uchar * page,uint row_count,uint head_empty,uint * real_rows_found,uint * free_slots_found)1577 static int check_page_layout(HA_CHECK *param, MARIA_HA *info,
1578 my_off_t page_pos, uchar *page,
1579 uint row_count, uint head_empty,
1580 uint *real_rows_found, uint *free_slots_found)
1581 {
1582 uint empty, last_row_end, row, first_dir_entry, free_entry, block_size;
1583 uint free_entries, prev_free_entry;
1584 uchar *dir_entry;
1585 char llbuff[22];
1586 my_bool error_in_free_list= 0;
1587 DBUG_ENTER("check_page_layout");
1588
1589 block_size= info->s->block_size;
1590 empty= 0;
1591 last_row_end= PAGE_HEADER_SIZE(info->s);
1592 *real_rows_found= 0;
1593
1594 /* Check free directory list */
1595 free_entry= (uint) page[DIR_FREE_OFFSET];
1596 free_entries= 0;
1597 prev_free_entry= END_OF_DIR_FREE_LIST;
1598 while (free_entry != END_OF_DIR_FREE_LIST)
1599 {
1600 uchar *dir;
1601 if (free_entry > row_count)
1602 {
1603 _ma_check_print_error(param,
1604 "Page %9s: Directory free entry points outside "
1605 "directory",
1606 llstr(page_pos, llbuff));
1607 error_in_free_list= 1;
1608 break;
1609 }
1610 dir= dir_entry_pos(page, block_size, free_entry);
1611 if (uint2korr(dir) != 0)
1612 {
1613 _ma_check_print_error(param,
1614 "Page %9s: Directory free entry points to "
1615 "not deleted entry",
1616 llstr(page_pos, llbuff));
1617 error_in_free_list= 1;
1618 break;
1619 }
1620 if (dir[2] != prev_free_entry)
1621 {
1622 _ma_check_print_error(param,
1623 "Page %9s: Directory free list back pointer "
1624 "points to wrong entry",
1625 llstr(page_pos, llbuff));
1626 error_in_free_list= 1;
1627 break;
1628 }
1629 prev_free_entry= free_entry;
1630 free_entry= dir[3];
1631 free_entries++;
1632 }
1633 *free_slots_found= free_entries;
1634
1635 /* Check directry */
1636 dir_entry= page+ block_size - PAGE_SUFFIX_SIZE;
1637 first_dir_entry= (block_size - row_count * DIR_ENTRY_SIZE -
1638 PAGE_SUFFIX_SIZE);
1639 for (row= 0 ; row < row_count ; row++)
1640 {
1641 uint pos, length;
1642 dir_entry-= DIR_ENTRY_SIZE;
1643 pos= uint2korr(dir_entry);
1644 if (!pos)
1645 {
1646 free_entries--;
1647 if (row == row_count -1)
1648 {
1649 _ma_check_print_error(param,
1650 "Page %9s: First entry in directory is 0",
1651 llstr(page_pos, llbuff));
1652 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1653 DBUG_RETURN(1);
1654 }
1655 continue; /* Deleted row */
1656 }
1657 (*real_rows_found)++;
1658 length= uint2korr(dir_entry+2);
1659 param->used+= length;
1660 if (pos < last_row_end)
1661 {
1662 _ma_check_print_error(param,
1663 "Page %9s: Row %3u overlapps with previous row",
1664 llstr(page_pos, llbuff), row);
1665 DBUG_RETURN(1);
1666 }
1667 empty+= (pos - last_row_end);
1668 last_row_end= pos + length;
1669 if (last_row_end > first_dir_entry)
1670 {
1671 _ma_check_print_error(param,
1672 "Page %9s: Row %3u overlapps with directory",
1673 llstr(page_pos, llbuff), row);
1674 DBUG_RETURN(1);
1675 }
1676 }
1677 empty+= (first_dir_entry - last_row_end);
1678
1679 if (empty != head_empty)
1680 {
1681 _ma_check_print_error(param,
1682 "Page %9s: Wrong empty size. Stored: %5u "
1683 "Actual: %5u",
1684 llstr(page_pos, llbuff), head_empty, empty);
1685 param->err_count++;
1686 }
1687 if (free_entries != 0 && !error_in_free_list)
1688 {
1689 _ma_check_print_error(param,
1690 "Page %9s: Directory free link don't include "
1691 "all free entries",
1692 llstr(page_pos, llbuff));
1693 param->err_count++;
1694 }
1695 DBUG_RETURN(param->err_count &&
1696 (param->err_count >= MAXERR || !(param->testflag & T_VERBOSE)));
1697 }
1698
1699
1700 /*
1701 Check all rows on head page
1702
1703 NOTES
1704 This is for rows-in-block format.
1705
1706 Before this, we have already called check_page_layout(), so
1707 we know the block is logicaly correct (even if the rows may not be that)
1708
1709 RETURN
1710 0 ok
1711 1 error
1712 */
1713
1714
check_head_page(HA_CHECK * param,MARIA_HA * info,uchar * record,int extend,my_off_t page_pos,uchar * page_buff,uint row_count)1715 static my_bool check_head_page(HA_CHECK *param, MARIA_HA *info, uchar *record,
1716 int extend, my_off_t page_pos, uchar *page_buff,
1717 uint row_count)
1718 {
1719 MARIA_SHARE *share= info->s;
1720 uchar *dir_entry;
1721 uint row;
1722 char llbuff[22], llbuff2[22];
1723 ulonglong page= page_pos / share->block_size;
1724 DBUG_ENTER("check_head_page");
1725
1726 dir_entry= page_buff+ share->block_size - PAGE_SUFFIX_SIZE;
1727 for (row= 0 ; row < row_count ; row++)
1728 {
1729 uint pos, length, flag;
1730 dir_entry-= DIR_ENTRY_SIZE;
1731 pos= uint2korr(dir_entry);
1732 if (!pos)
1733 continue;
1734 length= uint2korr(dir_entry+2);
1735 if (length < share->base.min_block_length)
1736 {
1737 _ma_check_print_error(param,
1738 "Page %9s: Row %3u is too short "
1739 "(%d of min %d bytes)",
1740 llstr(page, llbuff), row, length,
1741 (uint) share->base.min_block_length);
1742 DBUG_RETURN(1);
1743 }
1744 flag= (uint) (uchar) page_buff[pos];
1745 if (flag & ~(ROW_FLAG_ALL))
1746 _ma_check_print_error(param,
1747 "Page %9s: Row %3u has wrong flag: %u",
1748 llstr(page, llbuff), row, flag);
1749
1750 DBUG_PRINT("info", ("rowid: %s page: %lu row: %u",
1751 llstr(ma_recordpos(page, row), llbuff),
1752 (ulong) page, row));
1753 info->cur_row.trid= 0;
1754 if (_ma_read_block_record2(info, record, page_buff+pos,
1755 page_buff+pos+length))
1756 {
1757 _ma_check_print_error(param,
1758 "Page %9s: Row %3d is crashed",
1759 llstr(page, llbuff), row);
1760 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1761 DBUG_RETURN(1);
1762 continue;
1763 }
1764 set_if_bigger(param->max_found_trid, info->cur_row.trid);
1765 if (info->cur_row.trid > param->max_trid)
1766 _ma_check_print_not_visible_error(param, info->cur_row.trid);
1767
1768 if (share->calc_checksum)
1769 {
1770 ha_checksum checksum= (*share->calc_checksum)(info, record);
1771 if (info->cur_row.checksum != (checksum & 255))
1772 _ma_check_print_error(param, "Page %9s: Row %3d has wrong checksum",
1773 llstr(page, llbuff), row);
1774 param->glob_crc+= checksum;
1775 }
1776 if (info->cur_row.extents_count)
1777 {
1778 uchar *extents= info->cur_row.extents;
1779 uint i;
1780 /* Check that bitmap has the right marker for the found extents */
1781 for (i= 0 ; i < info->cur_row.extents_count ; i++)
1782 {
1783 pgcache_page_no_t extent_page;
1784 uint page_count, page_type;
1785 extent_page= uint5korr(extents);
1786 page_count= uint2korr(extents+5) & ~START_EXTENT_BIT;
1787 extents+= ROW_EXTENT_SIZE;
1788 page_type= BLOB_PAGE;
1789 if (page_count & TAIL_BIT)
1790 {
1791 page_count= 1;
1792 page_type= TAIL_PAGE;
1793 }
1794 /*
1795 TODO OPTIMIZE:
1796 Check the whole extent with one test and only do the loop if
1797 something is wrong (for exact error reporting)
1798 */
1799 for ( ; page_count--; extent_page++)
1800 {
1801 uint bitmap_pattern;
1802 if (_ma_check_if_right_bitmap_type(info, page_type, extent_page,
1803 &bitmap_pattern))
1804 {
1805 _ma_check_print_error(param,
1806 "Page %9s: Row: %3d has an extent with "
1807 "wrong information in bitmap: "
1808 "Page: %9s Page_type: %d Bitmap: %d",
1809 llstr(page, llbuff), row,
1810 llstr(extent_page, llbuff2),
1811 page_type, bitmap_pattern);
1812 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1813 DBUG_RETURN(1);
1814 }
1815 }
1816 }
1817 }
1818 param->full_page_count+= info->cur_row.full_page_count;
1819 param->tail_count+= info->cur_row.tail_count;
1820 if (check_keys_in_record(param, info, extend,
1821 ma_recordpos(page, row), record))
1822 DBUG_RETURN(1);
1823 }
1824 DBUG_RETURN(0);
1825 }
1826
1827
1828 /*
1829 Check if rows-in-block data file is consistent
1830 */
1831
check_block_record(HA_CHECK * param,MARIA_HA * info,int extend,uchar * record)1832 static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
1833 uchar *record)
1834 {
1835 MARIA_SHARE *share= info->s;
1836 my_off_t pos;
1837 pgcache_page_no_t page;
1838 uchar *page_buff, *bitmap_buff, *data;
1839 char llbuff[22], llbuff2[22];
1840 uint block_size= share->block_size;
1841 ha_rows full_page_count, tail_count;
1842 my_bool UNINIT_VAR(full_dir), now_transactional;
1843 uint offset_page, offset, free_count;
1844
1845 if (_ma_scan_init_block_record(info))
1846 {
1847 _ma_check_print_error(param, "got error %d when initializing scan",
1848 my_errno);
1849 return 1;
1850 }
1851
1852 now_transactional= info->s->now_transactional;
1853 info->s->now_transactional= 0; /* Don't log changes */
1854
1855 bitmap_buff= info->scan.bitmap_buff;
1856 page_buff= info->scan.page_buff;
1857 full_page_count= tail_count= 0;
1858 param->full_page_count= param->tail_count= 0;
1859 param->used= param->link_used= 0;
1860 param->splits= share->state.state.data_file_length / block_size;
1861
1862 for (pos= 0, page= 0;
1863 pos < share->state.state.data_file_length;
1864 pos+= block_size, page++)
1865 {
1866 uint UNINIT_VAR(row_count), real_row_count, UNINIT_VAR(empty_space),
1867 page_type, bitmap_pattern;
1868 uint bitmap_for_page;
1869
1870 if (_ma_killed_ptr(param))
1871 {
1872 _ma_scan_end_block_record(info);
1873 info->s->now_transactional= now_transactional;
1874 return -1; /* Interrupted */
1875 }
1876 if ((page % share->bitmap.pages_covered) == 0)
1877 {
1878 /* Bitmap page */
1879 if (pagecache_read(share->pagecache,
1880 &info->s->bitmap.file,
1881 page, 1,
1882 bitmap_buff,
1883 PAGECACHE_PLAIN_PAGE,
1884 PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0)
1885 {
1886 _ma_check_print_error(param,
1887 "Page %9s: Got error: %d when reading datafile",
1888 llstr(page, llbuff), my_errno);
1889 goto err;
1890 }
1891 param->used+= block_size;
1892 param->link_used+= block_size;
1893 if (param->verbose > 2)
1894 print_bitmap_description(share, page, bitmap_buff);
1895 continue;
1896 }
1897 /* Skip pages marked as empty in bitmap */
1898 offset_page= (uint) ((page % share->bitmap.pages_covered) -1) * 3;
1899 offset= offset_page & 7;
1900 data= bitmap_buff + offset_page / 8;
1901 bitmap_pattern= uint2korr(data);
1902 if (!(bitmap_for_page= ((bitmap_pattern >> offset) & 7)))
1903 {
1904 param->empty+= block_size;
1905 param->del_blocks++;
1906 continue;
1907 }
1908
1909 if (pagecache_read(share->pagecache,
1910 &info->dfile,
1911 page, 1,
1912 page_buff,
1913 share->page_type,
1914 PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0)
1915 {
1916 _ma_check_print_error(param,
1917 "Page %9s: Got error: %d when reading datafile",
1918 llstr(page, llbuff), my_errno);
1919 goto err;
1920 }
1921 page_type= page_buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK;
1922 if (page_type == UNALLOCATED_PAGE || page_type >= MAX_PAGE_TYPE)
1923 {
1924 _ma_check_print_error(param,
1925 "Page: %9s Found wrong page type %d. Bitmap: %d '%s'",
1926 llstr(page, llbuff), page_type,
1927 bitmap_for_page, bits_to_txt[bitmap_for_page]);
1928 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1929 goto err;
1930 continue;
1931 }
1932 switch ((enum en_page_type) page_type) {
1933 case UNALLOCATED_PAGE:
1934 case MAX_PAGE_TYPE:
1935 default:
1936 DBUG_ASSERT(0); /* Impossible */
1937 break;
1938 case HEAD_PAGE:
1939 row_count= page_buff[DIR_COUNT_OFFSET];
1940 empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET);
1941 param->used+= block_size - empty_space;
1942 param->link_used+= (PAGE_HEADER_SIZE(info->s) + PAGE_SUFFIX_SIZE +
1943 row_count * DIR_ENTRY_SIZE);
1944 if (empty_space < share->bitmap.sizes[3])
1945 param->lost+= empty_space;
1946 if (check_page_layout(param, info, pos, page_buff, row_count,
1947 empty_space, &real_row_count, &free_count))
1948 goto err;
1949 full_dir= (row_count == MAX_ROWS_PER_PAGE &&
1950 page_buff[DIR_FREE_OFFSET] == END_OF_DIR_FREE_LIST);
1951 break;
1952 case TAIL_PAGE:
1953 row_count= page_buff[DIR_COUNT_OFFSET];
1954 empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET);
1955 param->used+= block_size - empty_space;
1956 param->link_used+= (PAGE_HEADER_SIZE(info->s) + PAGE_SUFFIX_SIZE +
1957 row_count * DIR_ENTRY_SIZE);
1958 if (empty_space < share->bitmap.sizes[6])
1959 param->lost+= empty_space;
1960 if (check_page_layout(param, info, pos, page_buff, row_count,
1961 empty_space, &real_row_count, &free_count))
1962 goto err;
1963 full_dir= (row_count - free_count >= MAX_ROWS_PER_PAGE -
1964 share->base.blobs);
1965 break;
1966 case BLOB_PAGE:
1967 full_page_count++;
1968 full_dir= 0;
1969 empty_space= block_size; /* for error reporting */
1970 param->link_used+= FULL_PAGE_HEADER_SIZE(info->s);
1971 param->used+= block_size;
1972 break;
1973 }
1974 if (_ma_check_bitmap_data(info, page_type,
1975 full_dir ? 0 : empty_space,
1976 bitmap_for_page))
1977 {
1978 _ma_check_print_error(param,
1979 "Page %9s: Wrong data in bitmap. Page_type: "
1980 "%d full: %d empty_space: %u Bitmap-bits: %d "
1981 "'%s'",
1982 llstr(page, llbuff), page_type, full_dir,
1983 empty_space, bitmap_for_page,
1984 bits_to_txt[bitmap_for_page]);
1985 if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1986 goto err;
1987 }
1988 if ((enum en_page_type) page_type == BLOB_PAGE)
1989 continue;
1990 param->empty+= empty_space;
1991 if ((enum en_page_type) page_type == TAIL_PAGE)
1992 {
1993 tail_count+= real_row_count;
1994 continue;
1995 }
1996 if (check_head_page(param, info, record, extend, pos, page_buff,
1997 row_count))
1998 goto err;
1999 }
2000
2001 /* Verify that rest of bitmap is zero */
2002
2003 if (page % share->bitmap.pages_covered)
2004 {
2005 /* Not at end of bitmap */
2006 uint bitmap_pattern;
2007 uint byte_offset;
2008
2009 offset_page= (uint) ((page % share->bitmap.pages_covered) -1) * 3;
2010 offset= offset_page & 7;
2011 byte_offset= offset_page / 8;
2012 data= bitmap_buff + byte_offset;
2013 bitmap_pattern= uint2korr(data);
2014 if (byte_offset + 1 == share->bitmap.max_total_size)
2015 {
2016 /* On last byte of bitmap; Remove possible checksum */
2017 bitmap_pattern&= 0xff;
2018 }
2019 if (((bitmap_pattern >> offset)) ||
2020 (byte_offset + 2 < share->bitmap.max_total_size &&
2021 _ma_check_if_zero(data+2, share->bitmap.max_total_size -
2022 byte_offset - 2)))
2023 {
2024 ulonglong bitmap_page;
2025 bitmap_page= page / share->bitmap.pages_covered;
2026 bitmap_page*= share->bitmap.pages_covered;
2027
2028 _ma_check_print_error(param,
2029 "Bitmap at page %s has pages reserved outside of "
2030 "data file length",
2031 llstr(bitmap_page, llbuff));
2032 DBUG_EXECUTE("bitmap", _ma_print_bitmap(&share->bitmap, bitmap_buff,
2033 bitmap_page););
2034 }
2035 }
2036
2037 _ma_scan_end_block_record(info);
2038
2039 if (full_page_count != param->full_page_count)
2040 _ma_check_print_error(param, "Full page count read through records was %s "
2041 "but we found %s pages while scanning table",
2042 llstr(param->full_page_count, llbuff),
2043 llstr(full_page_count, llbuff2));
2044 if (tail_count != param->tail_count)
2045 _ma_check_print_error(param, "Tail count read through records was %s but "
2046 "we found %s tails while scanning table",
2047 llstr(param->tail_count, llbuff),
2048 llstr(tail_count, llbuff2));
2049
2050 info->s->now_transactional= now_transactional;
2051 return param->error_printed != 0;
2052
2053 err:
2054 _ma_scan_end_block_record(info);
2055 info->s->now_transactional= now_transactional;
2056 return 1;
2057 }
2058
2059
2060 /* Check that record-link is ok */
2061
maria_chk_data_link(HA_CHECK * param,MARIA_HA * info,my_bool extend)2062 int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, my_bool extend)
2063 {
2064 MARIA_SHARE *share= info->s;
2065 int error;
2066 uchar *record;
2067 char llbuff[22],llbuff2[22],llbuff3[22];
2068 DBUG_ENTER("maria_chk_data_link");
2069
2070 if (!(param->testflag & T_SILENT))
2071 {
2072 if (extend)
2073 puts("- check records and index references");
2074 else
2075 puts("- check record links");
2076 }
2077
2078 if (!(record= (uchar*) my_malloc(share->base.default_rec_buff_size, MYF(0))))
2079 {
2080 _ma_check_print_error(param,"Not enough memory for record");
2081 DBUG_RETURN(-1);
2082 }
2083 param->records= param->del_blocks= 0;
2084 param->used= param->link_used= param->splits= param->del_length= 0;
2085 param->lost= 0;
2086 param->tmp_record_checksum= param->glob_crc= 0;
2087 param->err_count= 0;
2088
2089 error= 0;
2090 param->empty= share->pack.header_length;
2091
2092 bzero((char*) param->tmp_key_crc,
2093 share->base.keys * sizeof(param->tmp_key_crc[0]));
2094
2095 info->in_check_table= 1; /* Don't assert on checksum errors */
2096
2097 switch (share->data_file_type) {
2098 case BLOCK_RECORD:
2099 error= check_block_record(param, info, extend, record);
2100 break;
2101 case STATIC_RECORD:
2102 error= check_static_record(param, info, extend, record);
2103 break;
2104 case DYNAMIC_RECORD:
2105 error= check_dynamic_record(param, info, extend, record);
2106 break;
2107 case COMPRESSED_RECORD:
2108 error= check_compressed_record(param, info, extend, record);
2109 break;
2110 case NO_RECORD:
2111 param->records= share->state.state.records;
2112 param->record_checksum= 0;
2113 extend= 1; /* No row checksums */
2114 /* no data, nothing to do */
2115 break;
2116 } /* switch */
2117
2118 info->in_check_table= 0;
2119
2120 if (error)
2121 goto err;
2122
2123 if (param->testflag & T_WRITE_LOOP)
2124 {
2125 fputs(" \r",stdout);
2126 fflush(stdout);
2127 }
2128 if (param->records != share->state.state.records)
2129 {
2130 _ma_check_print_error(param,
2131 "Record-count is not ok; found %-10s Should be: %s",
2132 llstr(param->records,llbuff),
2133 llstr(share->state.state.records,llbuff2));
2134 error=1;
2135 }
2136 if (param->record_checksum &&
2137 param->record_checksum != param->tmp_record_checksum)
2138 {
2139 _ma_check_print_error(param,
2140 "Key pointers and record positions doesn't match");
2141 error=1;
2142 }
2143 if (param->glob_crc != share->state.state.checksum &&
2144 (share->options &
2145 (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)))
2146 {
2147 _ma_check_print_warning(param,
2148 "Record checksum is not the same as checksum "
2149 "stored in the index file");
2150 error=1;
2151 }
2152 if (!extend)
2153 {
2154 uint key;
2155 for (key=0 ; key < share->base.keys; key++)
2156 {
2157 if (param->tmp_key_crc[key] != param->key_crc[key] &&
2158 !(share->keyinfo[key].flag &
2159 (HA_FULLTEXT | HA_SPATIAL | HA_RTREE_INDEX)))
2160 {
2161 _ma_check_print_error(param,"Checksum for key: %2d doesn't match "
2162 "checksum for records",
2163 key+1);
2164 error=1;
2165 }
2166 }
2167 }
2168
2169 if (param->del_length != share->state.state.empty)
2170 {
2171 _ma_check_print_warning(param,
2172 "Found %s deleted space. Should be %s",
2173 llstr(param->del_length,llbuff2),
2174 llstr(share->state.state.empty,llbuff));
2175 }
2176 /* Skip following checks for BLOCK RECORD as they don't make any sence */
2177 if (share->data_file_type != BLOCK_RECORD)
2178 {
2179 if (param->used + param->empty + param->del_length !=
2180 share->state.state.data_file_length)
2181 {
2182 _ma_check_print_warning(param,
2183 "Found %s record data and %s unused data and %s "
2184 "deleted data",
2185 llstr(param->used, llbuff),
2186 llstr(param->empty,llbuff2),
2187 llstr(param->del_length,llbuff3));
2188 _ma_check_print_warning(param,
2189 "Total %s Should be: %s",
2190 llstr((param->used+param->empty +
2191 param->del_length), llbuff),
2192 llstr(share->state.state.data_file_length,
2193 llbuff2));
2194 }
2195 if (param->del_blocks != share->state.state.del)
2196 {
2197 _ma_check_print_warning(param,
2198 "Found %10s deleted blocks. Should be: %s",
2199 llstr(param->del_blocks,llbuff),
2200 llstr(share->state.state.del,llbuff2));
2201 }
2202 if (param->splits != share->state.split)
2203 {
2204 _ma_check_print_warning(param,
2205 "Found %10s parts. Should be: %s",
2206 llstr(param->splits, llbuff),
2207 llstr(share->state.split,llbuff2));
2208 }
2209 }
2210 if (param->testflag & T_INFO)
2211 {
2212 if (param->warning_printed || param->error_printed)
2213 puts("");
2214 if (param->used != 0 && ! param->error_printed)
2215 {
2216 if (param->records)
2217 {
2218 printf("Records:%18s M.recordlength:%9lu Packed:%14.0f%%\n",
2219 llstr(param->records,llbuff),
2220 (long)((param->used - param->link_used)/param->records),
2221 (share->base.blobs ? 0.0 :
2222 (ulonglong2double((ulonglong) share->base.reclength *
2223 param->records)-
2224 my_off_t2double(param->used))/
2225 ulonglong2double((ulonglong) share->base.reclength *
2226 param->records)*100.0));
2227 printf("Recordspace used:%9.0f%% Empty space:%12d%% "
2228 "Blocks/Record: %6.2f\n",
2229 (ulonglong2double(param->used - param->link_used)/
2230 ulonglong2double(param->used-param->link_used+param->empty) *
2231 100.0),
2232 (!param->records ? 100 :
2233 (int) (ulonglong2double(param->del_length+param->empty)/
2234 my_off_t2double(param->used)*100.0)),
2235 ulonglong2double(param->splits - param->del_blocks) /
2236 param->records);
2237 }
2238 else
2239 printf("Records:%18s\n", "0");
2240 }
2241 printf("Record blocks:%12s Delete blocks:%10s\n",
2242 llstr(param->splits - param->del_blocks, llbuff),
2243 llstr(param->del_blocks, llbuff2));
2244 printf("Record data: %12s Deleted data: %10s\n",
2245 llstr(param->used - param->link_used,llbuff),
2246 llstr(param->del_length, llbuff2));
2247 printf("Empty space: %12s Linkdata: %10s\n",
2248 llstr(param->empty, llbuff),llstr(param->link_used, llbuff2));
2249 if (share->data_file_type == BLOCK_RECORD)
2250 {
2251 printf("Full pages: %12s Tail count: %12s\n",
2252 llstr(param->full_page_count, llbuff),
2253 llstr(param->tail_count, llbuff2));
2254 printf("Lost space: %12s\n", llstr(param->lost, llbuff));
2255 if (param->max_found_trid)
2256 {
2257 printf("Max trans. id: %11s\n",
2258 llstr(param->max_found_trid, llbuff));
2259 }
2260 }
2261 }
2262 my_free(record);
2263 DBUG_RETURN (error);
2264
2265 err:
2266 my_free(record);
2267 param->testflag|=T_RETRY_WITHOUT_QUICK;
2268 DBUG_RETURN(1);
2269 } /* maria_chk_data_link */
2270
2271
2272 /**
2273 Prepares a table for a repair or index sort: flushes pages, records durably
2274 in the table that it is undergoing the operation (if that op crashes, that
2275 info will serve for Recovery and the user).
2276
2277 If we start overwriting the index file, and crash then, old REDOs will
2278 be tried and fail. To prevent that, we bump skip_redo_lsn, and thus we have
2279 to flush and sync pages so that old REDOs can be skipped.
2280 If this is not a bulk insert, which Recovery can handle gracefully (by
2281 truncating files, see UNDO_BULK_INSERT) we also mark the table
2282 crashed-on-repair, so that user knows it has to re-repair. If bulk insert we
2283 shouldn't mark it crashed-on-repair, because if we did this, the UNDO phase
2284 would skip the table (UNDO_BULK_INSERT would not be applied),
2285 and maria_chk would not improve that.
2286 If this is an OPTIMIZE which merely sorts index, we need to do the same
2287 too: old REDOs should not apply to the new index file.
2288 Only the flush is needed when in maria_chk which is not crash-safe.
2289
2290 @param info table
2291 @param param repair parameters
2292 @param discard_index if index pages can be thrown away
2293 */
2294
protect_against_repair_crash(MARIA_HA * info,const HA_CHECK * param,my_bool discard_index)2295 static my_bool protect_against_repair_crash(MARIA_HA *info,
2296 const HA_CHECK *param,
2297 my_bool discard_index)
2298 {
2299 MARIA_SHARE *share= info->s;
2300
2301 /*
2302 There are other than recovery-related reasons to do the writes below:
2303 - the physical size of the data file is sometimes used during repair: we
2304 need to flush to have it exact
2305 - we flush the state because maria_open(HA_OPEN_COPY) will want to read
2306 it from disk.
2307 */
2308 if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
2309 FLUSH_FORCE_WRITE,
2310 discard_index ? FLUSH_IGNORE_CHANGED :
2311 FLUSH_FORCE_WRITE) ||
2312 (share->changed &&
2313 _ma_state_info_write(share,
2314 MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
2315 MA_STATE_INFO_WRITE_FULL_INFO |
2316 MA_STATE_INFO_WRITE_LOCK)))
2317 return TRUE;
2318 /* In maria_chk this is not needed: */
2319 if (maria_multi_threaded && share->base.born_transactional)
2320 {
2321 if ((param->testflag & T_NO_CREATE_RENAME_LSN) == 0)
2322 {
2323 /* this can be true only for a transactional table */
2324 maria_mark_in_repair(info);
2325 if (_ma_state_info_write(share,
2326 MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
2327 MA_STATE_INFO_WRITE_LOCK))
2328 return TRUE;
2329 }
2330 if (translog_status == TRANSLOG_OK &&
2331 _ma_update_state_lsns(share, translog_get_horizon(),
2332 share->state.create_trid, FALSE, FALSE))
2333 return TRUE;
2334 if (_ma_sync_table_files(info))
2335 return TRUE;
2336 }
2337 return FALSE;
2338 }
2339
2340
2341 /**
2342 @brief Initialize variables for repair
2343 */
2344
initialize_variables_for_repair(HA_CHECK * param,MARIA_SORT_INFO * sort_info,MARIA_SORT_PARAM * sort_param,MARIA_HA * info,my_bool rep_quick,MARIA_SHARE * org_share)2345 static int initialize_variables_for_repair(HA_CHECK *param,
2346 MARIA_SORT_INFO *sort_info,
2347 MARIA_SORT_PARAM *sort_param,
2348 MARIA_HA *info,
2349 my_bool rep_quick,
2350 MARIA_SHARE *org_share)
2351 {
2352 MARIA_SHARE *share= info->s;
2353
2354 /*
2355 We have to clear these variables first, as the cleanup-in-case-of-error
2356 handling may touch these.
2357 */
2358 bzero((char*) sort_info, sizeof(*sort_info));
2359 bzero((char*) sort_param, sizeof(*sort_param));
2360 bzero(&info->rec_cache, sizeof(info->rec_cache));
2361
2362 if (share->data_file_type == NO_RECORD)
2363 {
2364 _ma_check_print_error(param,
2365 "Can't repair tables with record type NO_DATA");
2366 return 1;
2367 }
2368
2369 /* Make a copy to allow us to restore state and check how state changed */
2370 memcpy(org_share, share, sizeof(*share));
2371
2372 /* Repair code relies on share->state.state so we have to update it here */
2373 if (share->lock.update_status)
2374 (*share->lock.update_status)(info);
2375
2376 param->testflag|= T_REP; /* for easy checking */
2377 if (share->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
2378 param->testflag|= T_CALC_CHECKSUM;
2379 param->glob_crc= 0;
2380 if (rep_quick)
2381 param->testflag|= T_QUICK;
2382 else
2383 param->testflag&= ~T_QUICK;
2384 param->org_key_map= share->state.key_map;
2385
2386 /*
2387 Clear check variables set by repair. This is needed to allow one to run
2388 several repair's in a row with same param
2389 */
2390 param->retry_repair= 0;
2391 param->warning_printed= 0;
2392 param->error_printed= 0;
2393 param->wrong_trd_printed= 0;
2394
2395 sort_param->sort_info= sort_info;
2396 sort_param->fix_datafile= ! rep_quick;
2397 sort_param->calc_checksum= MY_TEST(param->testflag & T_CALC_CHECKSUM);
2398 sort_info->info= sort_info->new_info= info;
2399 sort_info->param= param;
2400 set_data_file_type(sort_info, info->s);
2401 sort_info->org_data_file_type= share->data_file_type;
2402
2403 info->rec_cache.file= info->dfile.file;
2404 info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
2405
2406 if (protect_against_repair_crash(info, param,
2407 !MY_TEST(param->testflag &
2408 T_CREATE_MISSING_KEYS)))
2409 return 1;
2410
2411 /* calculate max_records */
2412 sort_info->filelength= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
2413 param->max_progress= sort_info->filelength;
2414 if ((param->testflag & T_CREATE_MISSING_KEYS) ||
2415 sort_info->org_data_file_type == COMPRESSED_RECORD)
2416 sort_info->max_records= share->state.state.records;
2417 else
2418 {
2419 ulong rec_length;
2420 rec_length= MY_MAX(share->base.min_pack_length,
2421 share->base.min_block_length);
2422 sort_info->max_records= (ha_rows) (sort_info->filelength / rec_length);
2423 }
2424
2425 /* Set up transaction handler so that we can see all rows */
2426 if (param->max_trid == 0)
2427 {
2428 if (!ma_control_file_inited())
2429 param->max_trid= 0; /* Give warning for first trid found */
2430 else
2431 param->max_trid= max_trid_in_system();
2432 }
2433 maria_ignore_trids(info);
2434 /* Don't write transid's during repair */
2435 maria_versioning(info, 0);
2436 /* remember original number of rows */
2437 *info->state= info->s->state.state;
2438 return 0;
2439 }
2440
2441
2442 /*
2443 During initialize_variables_for_repair and related functions we set some
2444 variables to values that makes sence during repair.
2445 This function restores these values to their original values so that we can
2446 use the handler in MariaDB without having to close and open the table.
2447 */
2448
restore_table_state_after_repair(MARIA_HA * info,MARIA_SHARE * org_share)2449 static void restore_table_state_after_repair(MARIA_HA *info,
2450 MARIA_SHARE *org_share)
2451 {
2452 maria_versioning(info, info->s->have_versioning);
2453 info->s->lock_key_trees= org_share->lock_key_trees;
2454 DBUG_ASSERT(!info->s->have_versioning || info->s->lock_key_trees);
2455 }
2456
2457
2458 /**
2459 @brief Drop all indexes
2460
2461 @param[in] param check parameters
2462 @param[in] info MARIA_HA handle
2463 @param[in] force if to force drop all indexes
2464
2465 @return status
2466 @retval 0 OK
2467 @retval != 0 Error
2468
2469 @note
2470 Once allocated, index blocks remain part of the key file forever.
2471 When indexes are disabled, no block is freed. When enabling indexes,
2472 no block is freed either. The new indexes are create from new
2473 blocks. (Bug #4692)
2474
2475 Before recreating formerly disabled indexes, the unused blocks
2476 must be freed. There are two options to do this:
2477 - Follow the tree of disabled indexes, add all blocks to the
2478 deleted blocks chain. Would require a lot of random I/O.
2479 - Drop all blocks by clearing all index root pointers and all
2480 delete chain pointers and resetting key_file_length to the end
2481 of the index file header. This requires to recreate all indexes,
2482 even those that may still be intact.
2483 The second method is probably faster in most cases.
2484
2485 When disabling indexes, MySQL disables either all indexes or all
2486 non-unique indexes. When MySQL [re-]enables disabled indexes
2487 (T_CREATE_MISSING_KEYS), then we either have "lost" blocks in the
2488 index file, or there are no non-unique indexes. In the latter case,
2489 maria_repair*() would not be called as there would be no disabled
2490 indexes.
2491
2492 If there would be more unique indexes than disabled (non-unique)
2493 indexes, we could do the first method. But this is not implemented
2494 yet. By now we drop and recreate all indexes when repair is called.
2495
2496 However, there is an exception. Sometimes MySQL disables non-unique
2497 indexes when the table is empty (e.g. when copying a table in
2498 mysql_alter_table()). When enabling the non-unique indexes, they
2499 are still empty. So there is no index block that can be lost. This
2500 optimization is implemented in this function.
2501
2502 Note that in normal repair (T_CREATE_MISSING_KEYS not set) we
2503 recreate all enabled indexes unconditonally. We do not change the
2504 key_map. Otherwise we invert the key map temporarily (outside of
2505 this function) and recreate the then "seemingly" enabled indexes.
2506 When we cannot use the optimization, and drop all indexes, we
2507 pretend that all indexes were disabled. By the inversion, we will
2508 then recrate all indexes.
2509 */
2510
maria_drop_all_indexes(HA_CHECK * param,MARIA_HA * info,my_bool force)2511 static int maria_drop_all_indexes(HA_CHECK *param, MARIA_HA *info,
2512 my_bool force)
2513 {
2514 MARIA_SHARE *share= info->s;
2515 MARIA_STATE_INFO *state= &share->state;
2516 uint i;
2517 DBUG_ENTER("maria_drop_all_indexes");
2518
2519 /*
2520 If any of the disabled indexes has a key block assigned, we must
2521 drop and recreate all indexes to avoid losing index blocks.
2522
2523 If we want to recreate disabled indexes only _and_ all of these
2524 indexes are empty, we don't need to recreate the existing indexes.
2525 */
2526 if (!force && (param->testflag & T_CREATE_MISSING_KEYS))
2527 {
2528 DBUG_PRINT("repair", ("creating missing indexes"));
2529 for (i= 0; i < share->base.keys; i++)
2530 {
2531 DBUG_PRINT("repair", ("index #: %u key_root:%lld active: %d",
2532 i, state->key_root[i],
2533 maria_is_key_active(state->key_map, i)));
2534 if ((state->key_root[i] != HA_OFFSET_ERROR) &&
2535 !maria_is_key_active(state->key_map, i))
2536 {
2537 /*
2538 This index has at least one key block and it is disabled.
2539 We would lose its block(s) if would just recreate it.
2540 So we need to drop and recreate all indexes.
2541 */
2542 DBUG_PRINT("repair", ("nonempty and disabled: recreate all"));
2543 break;
2544 }
2545 }
2546 if (i >= share->base.keys)
2547 goto end;
2548
2549 /*
2550 We do now drop all indexes and declare them disabled. With the
2551 T_CREATE_MISSING_KEYS flag, maria_repair*() will recreate all
2552 disabled indexes and enable them.
2553 */
2554 maria_clear_all_keys_active(state->key_map);
2555 DBUG_PRINT("repair", ("declared all indexes disabled"));
2556 }
2557
2558 /* Flush obsolete index data from key cache */
2559 _ma_flush_table_files(info, MARIA_FLUSH_INDEX,
2560 FLUSH_IGNORE_CHANGED, FLUSH_IGNORE_CHANGED);
2561 /* Clear index root block pointers. */
2562 for (i= 0; i < share->base.keys; i++)
2563 state->key_root[i]= HA_OFFSET_ERROR;
2564
2565 /* Drop the delete chain. */
2566 share->state.key_del= HA_OFFSET_ERROR;
2567
2568 /* Reset index file length to end of index file header. */
2569 share->state.state.key_file_length= share->base.keystart;
2570
2571 end:
2572 DBUG_RETURN(0);
2573 }
2574
2575
2576 /*
2577 Recover old table by reading each record and writing all keys
2578
2579 NOTES
2580 Save new datafile-name in temp_filename.
2581 We overwrite the index file as we go (writekeys() for example), so if we
2582 crash during this the table is unusable and user (or Recovery in the
2583 future) must repeat the REPAIR/OPTIMIZE operation. We could use a
2584 temporary index file in the future (drawback: more disk space).
2585
2586 IMPLEMENTATION (for hard repair with block format)
2587 - Create new, unrelated MARIA_HA of the table
2588 - Create new datafile and associate it with new handler
2589 - Reset all statistic information in new handler
2590 - Copy all data to new handler with normal write operations
2591 - Move state of new handler to old handler
2592 - Close new handler
2593 - Close data file in old handler
2594 - Rename old data file to new data file.
2595 - Reopen data file in old handler
2596 */
2597
maria_repair(HA_CHECK * param,register MARIA_HA * info,char * name,my_bool rep_quick)2598 int maria_repair(HA_CHECK *param, register MARIA_HA *info,
2599 char *name, my_bool rep_quick)
2600 {
2601 int error, got_error;
2602 ha_rows start_records,new_header_length;
2603 my_off_t del;
2604 File new_file;
2605 MARIA_SHARE *share= info->s;
2606 char llbuff[22],llbuff2[22];
2607 MARIA_SORT_INFO sort_info;
2608 MARIA_SORT_PARAM sort_param;
2609 my_bool block_record, scan_inited= 0, reenable_logging= 0;
2610 enum data_file_type org_data_file_type= share->data_file_type;
2611 myf sync_dir= ((share->now_transactional && !share->temporary) ?
2612 MY_SYNC_DIR : 0);
2613 MARIA_SHARE backup_share;
2614 DBUG_ENTER("maria_repair");
2615
2616 got_error= 1;
2617 new_file= -1;
2618 start_records= share->state.state.records;
2619 if (!(param->testflag & T_SILENT))
2620 {
2621 printf("- recovering (with keycache) Aria-table '%s'\n",name);
2622 printf("Data records: %s\n", llstr(start_records, llbuff));
2623 }
2624
2625 if (initialize_variables_for_repair(param, &sort_info, &sort_param, info,
2626 rep_quick, &backup_share))
2627 goto err;
2628
2629 if ((reenable_logging= share->now_transactional))
2630 _ma_tmp_disable_logging_for_table(info, 0);
2631
2632 sort_param.current_filepos= sort_param.filepos= new_header_length=
2633 ((param->testflag & T_UNPACK) ? 0L : share->pack.header_length);
2634
2635 if (!rep_quick)
2636 {
2637 /* Get real path for data file */
2638 if ((new_file= mysql_file_create(key_file_tmp,
2639 fn_format(param->temp_filename,
2640 share->data_file_name.str, "",
2641 DATA_TMP_EXT, 2+4),
2642 0,param->tmpfile_createflag,
2643 MYF(0))) < 0)
2644 {
2645 _ma_check_print_error(param,"Can't create new tempfile: '%s'",
2646 param->temp_filename);
2647 goto err;
2648 }
2649 if (new_header_length &&
2650 maria_filecopy(param, new_file, info->dfile.file, 0L,
2651 new_header_length, "datafile-header"))
2652 goto err;
2653 share->state.dellink= HA_OFFSET_ERROR;
2654 info->rec_cache.file= new_file; /* For sort_delete_record */
2655 if (share->data_file_type == BLOCK_RECORD ||
2656 (param->testflag & T_UNPACK))
2657 {
2658 if (create_new_data_handle(&sort_param, new_file))
2659 goto err;
2660 sort_info.new_info->rec_cache.file= new_file;
2661 }
2662 }
2663
2664 block_record= sort_info.new_info->s->data_file_type == BLOCK_RECORD;
2665
2666 if (org_data_file_type != BLOCK_RECORD)
2667 {
2668 /* We need a read buffer to read rows in big blocks */
2669 if (init_io_cache(¶m->read_cache, info->dfile.file,
2670 (uint) param->read_buffer_length,
2671 READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)))
2672 goto err;
2673 }
2674 if (sort_info.new_info->s->data_file_type != BLOCK_RECORD)
2675 {
2676 /* When writing to not block records, we need a write buffer */
2677 if (!rep_quick)
2678 {
2679 if (init_io_cache(&sort_info.new_info->rec_cache, new_file,
2680 (uint) param->write_buffer_length,
2681 WRITE_CACHE, new_header_length, 1,
2682 MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
2683 goto err;
2684 sort_info.new_info->opt_flag|=WRITE_CACHE_USED;
2685 }
2686 }
2687 else if (block_record)
2688 {
2689 scan_inited= 1;
2690 if (maria_scan_init(sort_info.info))
2691 goto err;
2692 }
2693
2694 if (!(sort_param.record=
2695 (uchar *) my_malloc((uint)
2696 share->base.default_rec_buff_size, MYF(0))) ||
2697 _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size,
2698 share->base.default_rec_buff_size, MYF(0)))
2699 {
2700 _ma_check_print_error(param, "Not enough memory for extra record");
2701 goto err;
2702 }
2703
2704 sort_param.read_cache=param->read_cache;
2705 sort_param.pos=sort_param.max_pos=share->pack.header_length;
2706 param->read_cache.end_of_file= sort_info.filelength;
2707 sort_param.master=1;
2708 sort_info.max_records= ~(ha_rows) 0;
2709
2710 del= share->state.state.del;
2711 share->state.state.records= share->state.state.del= share->state.split= 0;
2712 share->state.state.empty= 0;
2713
2714 if (param->testflag & T_CREATE_MISSING_KEYS)
2715 maria_set_all_keys_active(share->state.key_map, share->base.keys);
2716 maria_drop_all_indexes(param, info, TRUE);
2717
2718 maria_lock_memory(param); /* Everything is alloced */
2719
2720 sort_param.sort_info->info->in_check_table= 1;
2721 /* Re-create all keys, which are set in key_map. */
2722 while (!(error=sort_get_next_record(&sort_param)))
2723 {
2724 if (block_record && _ma_sort_write_record(&sort_param))
2725 goto err;
2726
2727 if (writekeys(&sort_param))
2728 {
2729 if (my_errno != HA_ERR_FOUND_DUPP_KEY)
2730 goto err;
2731 DBUG_DUMP("record", sort_param.record,
2732 share->base.default_rec_buff_size);
2733 _ma_check_print_warning(param,
2734 "Duplicate key %2d for record at %10s against "
2735 "new record at %10s",
2736 info->errkey+1,
2737 llstr(sort_param.current_filepos, llbuff),
2738 llstr(info->dup_key_pos,llbuff2));
2739 if (param->testflag & T_VERBOSE)
2740 {
2741 MARIA_KEY tmp_key;
2742 MARIA_KEYDEF *keyinfo= share->keyinfo + info->errkey;
2743 (*keyinfo->make_key)(info, &tmp_key, (uint) info->errkey,
2744 info->lastkey_buff,
2745 sort_param.record, 0L, 0);
2746 _ma_print_key(stdout, &tmp_key);
2747 }
2748 sort_info.dupp++;
2749 if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
2750 {
2751 param->testflag|=T_RETRY_WITHOUT_QUICK;
2752 param->error_printed=1;
2753 goto err;
2754 }
2755 /* purecov: begin tested */
2756 if (block_record)
2757 {
2758 sort_info.new_info->s->state.state.records--;
2759 if ((*sort_info.new_info->s->write_record_abort)(sort_info.new_info))
2760 {
2761 _ma_check_print_error(param,"Couldn't delete duplicate row");
2762 goto err;
2763 }
2764 }
2765 /* purecov: end */
2766 continue;
2767 }
2768 if (!block_record)
2769 {
2770 if (_ma_sort_write_record(&sort_param))
2771 goto err;
2772 /* Filepos is pointer to where next row will be stored */
2773 sort_param.current_filepos= sort_param.filepos;
2774 }
2775 }
2776 if (error > 0 || maria_write_data_suffix(&sort_info, !rep_quick) ||
2777 flush_io_cache(&sort_info.new_info->rec_cache) ||
2778 param->read_cache.error < 0)
2779 goto err;
2780
2781 if (param->testflag & T_WRITE_LOOP)
2782 {
2783 fputs(" \r",stdout); fflush(stdout);
2784 }
2785 if (mysql_file_chsize(share->kfile.file,
2786 share->state.state.key_file_length, 0, MYF(0)))
2787 {
2788 _ma_check_print_warning(param,
2789 "Can't change size of indexfile, error: %d",
2790 my_errno);
2791 goto err;
2792 }
2793
2794 if (rep_quick && del+sort_info.dupp != share->state.state.del)
2795 {
2796 _ma_check_print_error(param,"Couldn't fix table with quick recovery: "
2797 "Found wrong number of deleted records");
2798 _ma_check_print_error(param,"Run recovery again without -q");
2799 param->retry_repair=1;
2800 param->testflag|=T_RETRY_WITHOUT_QUICK;
2801 goto err;
2802 }
2803
2804 if (param->testflag & T_SAFE_REPAIR)
2805 {
2806 /* Don't repair if we loosed more than one row */
2807 if (sort_info.new_info->s->state.state.records+1 < start_records)
2808 {
2809 share->state.state.records= start_records;
2810 goto err;
2811 }
2812 }
2813
2814 end_io_cache(&sort_info.new_info->rec_cache);
2815 info->opt_flag&= ~WRITE_CACHE_USED;
2816
2817 /*
2818 As we have read the data file (sort_get_next_record()) we may have
2819 cached, non-changed blocks of it in the page cache. We must throw them
2820 away as we are going to close their descriptor ('new_file'). We also want
2821 to flush any index block, so that it is ready for the upcoming sync.
2822 */
2823 if (_ma_flush_table_files_before_swap(param, info))
2824 goto err;
2825
2826 if (!rep_quick)
2827 {
2828 sort_info.new_info->s->state.state.data_file_length= sort_param.filepos;
2829 if (sort_info.new_info != sort_info.info)
2830 {
2831 MARIA_STATE_INFO save_state= sort_info.new_info->s->state;
2832 if (maria_close(sort_info.new_info))
2833 {
2834 _ma_check_print_error(param, "Got error %d on close", my_errno);
2835 goto err;
2836 }
2837 copy_data_file_state(&share->state, &save_state);
2838 new_file= -1;
2839 sort_info.new_info= info;
2840 }
2841 share->state.version=(ulong) time((time_t*) 0); /* Force reopen */
2842
2843 /* Replace the actual file with the temporary file */
2844 if (new_file >= 0)
2845 mysql_file_close(new_file, MYF(MY_WME));
2846 new_file= -1;
2847 change_data_file_descriptor(info, -1);
2848 if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
2849 DATA_TMP_EXT, param->backup_time,
2850 (param->testflag & T_BACKUP_DATA ?
2851 MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
2852 sync_dir) ||
2853 _ma_open_datafile(info, share))
2854 {
2855 goto err;
2856 }
2857 }
2858 else
2859 {
2860 share->state.state.data_file_length= sort_param.max_pos;
2861 }
2862 if (param->testflag & T_CALC_CHECKSUM)
2863 share->state.state.checksum= param->glob_crc;
2864
2865 if (!(param->testflag & T_SILENT))
2866 {
2867 if (start_records != share->state.state.records)
2868 printf("Data records: %s\n", llstr(share->state.state.records,llbuff));
2869 }
2870 if (sort_info.dupp)
2871 _ma_check_print_warning(param,
2872 "%s records have been removed",
2873 llstr(sort_info.dupp,llbuff));
2874
2875 got_error= 0;
2876 /* If invoked by external program that uses thr_lock */
2877 if (&share->state.state != info->state)
2878 *info->state= *info->state_start= share->state.state;
2879
2880 err:
2881 if (scan_inited)
2882 maria_scan_end(sort_info.info);
2883 _ma_reset_state(info);
2884
2885 end_io_cache(¶m->read_cache);
2886 if (sort_info.new_info)
2887 {
2888 end_io_cache(&sort_info.new_info->rec_cache);
2889 sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
2890 }
2891 info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
2892
2893 sort_param.sort_info->info->in_check_table= 0;
2894 /* this below could fail, shouldn't we detect error? */
2895 if (got_error)
2896 {
2897 if (! param->error_printed)
2898 _ma_check_print_error(param,"%d for record at pos %s",my_errno,
2899 llstr(sort_param.start_recpos,llbuff));
2900 (void)_ma_flush_table_files_before_swap(param, info);
2901 if (sort_info.new_info && sort_info.new_info != sort_info.info)
2902 {
2903 unuse_data_file_descriptor(sort_info.new_info);
2904 maria_close(sort_info.new_info);
2905 }
2906 if (new_file >= 0)
2907 {
2908 mysql_file_close(new_file,MYF(0));
2909 mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
2910 }
2911 maria_mark_crashed_on_repair(info);
2912 }
2913 /* If caller had disabled logging it's not up to us to re-enable it */
2914 if (reenable_logging)
2915 _ma_reenable_logging_for_table(info, FALSE);
2916 restore_table_state_after_repair(info, &backup_share);
2917
2918 my_free(sort_param.rec_buff);
2919 my_free(sort_param.record);
2920 my_free(sort_info.buff);
2921 if (!got_error && (param->testflag & T_UNPACK))
2922 restore_data_file_type(share);
2923 share->state.changed|= (STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES |
2924 STATE_NOT_ANALYZED | STATE_NOT_ZEROFILLED);
2925 if (!rep_quick)
2926 share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_MOVABLE);
2927 DBUG_RETURN(got_error);
2928 }
2929
2930
2931 /* Uppdate keyfile when doing repair */
2932
writekeys(MARIA_SORT_PARAM * sort_param)2933 static int writekeys(MARIA_SORT_PARAM *sort_param)
2934 {
2935 uint i;
2936 MARIA_HA *info= sort_param->sort_info->info;
2937 MARIA_SHARE *share= info->s;
2938 uchar *record= sort_param->record;
2939 uchar *key_buff;
2940 my_off_t filepos= sort_param->current_filepos;
2941 MARIA_KEY key;
2942 DBUG_ENTER("writekeys");
2943
2944 key_buff= info->lastkey_buff+share->base.max_key_length;
2945
2946 for (i=0 ; i < share->base.keys ; i++)
2947 {
2948 if (maria_is_key_active(share->state.key_map, i))
2949 {
2950 if (share->keyinfo[i].flag & HA_FULLTEXT )
2951 {
2952 if (_ma_ft_add(info, i, key_buff, record, filepos))
2953 goto err;
2954 }
2955 else
2956 {
2957 if (!(*share->keyinfo[i].make_key)(info, &key, i, key_buff, record,
2958 filepos, 0))
2959 goto err;
2960 if ((*share->keyinfo[i].ck_insert)(info, &key))
2961 goto err;
2962 }
2963 }
2964 }
2965 DBUG_RETURN(0);
2966
2967 err:
2968 if (my_errno == HA_ERR_FOUND_DUPP_KEY)
2969 {
2970 info->errkey=(int) i; /* This key was found */
2971 while ( i-- > 0 )
2972 {
2973 if (maria_is_key_active(share->state.key_map, i))
2974 {
2975 if (share->keyinfo[i].flag & HA_FULLTEXT)
2976 {
2977 if (_ma_ft_del(info,i,key_buff,record,filepos))
2978 break;
2979 }
2980 else
2981 {
2982 (*share->keyinfo[i].make_key)(info, &key, i, key_buff, record,
2983 filepos, 0);
2984 if (_ma_ck_delete(info, &key))
2985 break;
2986 }
2987 }
2988 }
2989 }
2990 /* Remove checksum that was added to glob_crc in sort_get_next_record */
2991 if (sort_param->calc_checksum)
2992 sort_param->sort_info->param->glob_crc-= info->cur_row.checksum;
2993 DBUG_PRINT("error",("errno: %d",my_errno));
2994 DBUG_RETURN(-1);
2995 } /* writekeys */
2996
2997
2998 /* Change all key-pointers that points to a records */
2999
maria_movepoint(register MARIA_HA * info,uchar * record,MARIA_RECORD_POS oldpos,MARIA_RECORD_POS newpos,uint prot_key)3000 int maria_movepoint(register MARIA_HA *info, uchar *record,
3001 MARIA_RECORD_POS oldpos, MARIA_RECORD_POS newpos,
3002 uint prot_key)
3003 {
3004 uint i;
3005 uchar *key_buff;
3006 MARIA_SHARE *share= info->s;
3007 MARIA_PAGE page;
3008 DBUG_ENTER("maria_movepoint");
3009
3010 key_buff= info->lastkey_buff + share->base.max_key_length;
3011 for (i=0 ; i < share->base.keys; i++)
3012 {
3013 if (i != prot_key && maria_is_key_active(share->state.key_map, i))
3014 {
3015 MARIA_KEY key;
3016 (*share->keyinfo[i].make_key)(info, &key, i, key_buff, record, oldpos,
3017 0);
3018 if (key.keyinfo->flag & HA_NOSAME)
3019 { /* Change pointer direct */
3020 MARIA_KEYDEF *keyinfo;
3021 keyinfo=share->keyinfo+i;
3022 if (_ma_search(info, &key, (uint32) (SEARCH_SAME | SEARCH_SAVE_BUFF),
3023 share->state.key_root[i]))
3024 DBUG_RETURN(-1);
3025 _ma_page_setup(&page, info, keyinfo, info->last_keypage,
3026 info->keyread_buff);
3027
3028 _ma_dpointer(share, info->int_keypos - page.node -
3029 share->rec_reflength,newpos);
3030
3031 if (_ma_write_keypage(&page, PAGECACHE_LOCK_LEFT_UNLOCKED,
3032 DFLT_INIT_HITS))
3033 DBUG_RETURN(-1);
3034 }
3035 else
3036 { /* Change old key to new */
3037 if (_ma_ck_delete(info, &key))
3038 DBUG_RETURN(-1);
3039 (*share->keyinfo[i].make_key)(info, &key, i, key_buff, record, newpos,
3040 0);
3041 if (_ma_ck_write(info, &key))
3042 DBUG_RETURN(-1);
3043 }
3044 }
3045 }
3046 DBUG_RETURN(0);
3047 } /* maria_movepoint */
3048
3049
3050 /* Tell system that we want all memory for our cache */
3051
maria_lock_memory(HA_CHECK * param)3052 void maria_lock_memory(HA_CHECK *param __attribute__((unused)))
3053 {
3054 #ifdef SUN_OS /* Key-cacheing thrases on sun 4.1 */
3055 if (param->opt_maria_lock_memory)
3056 {
3057 int success = mlockall(MCL_CURRENT); /* or plock(DATLOCK); */
3058 if (geteuid() == 0 && success != 0)
3059 _ma_check_print_warning(param,
3060 "Failed to lock memory. errno %d",my_errno);
3061 }
3062 #endif
3063 } /* maria_lock_memory */
3064
3065
3066 /**
3067 Flush all changed blocks to disk.
3068
3069 We release blocks as it's unlikely that they would all be needed soon.
3070 This function needs to be called before swapping data or index files or
3071 syncing them.
3072
3073 @param param description of the repair operation
3074 @param info table
3075 */
3076
_ma_flush_table_files_before_swap(HA_CHECK * param,MARIA_HA * info)3077 static my_bool _ma_flush_table_files_before_swap(HA_CHECK *param,
3078 MARIA_HA *info)
3079 {
3080 DBUG_ENTER("_ma_flush_table_files_before_swap");
3081 if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
3082 FLUSH_RELEASE, FLUSH_RELEASE))
3083 {
3084 _ma_check_print_error(param, "%d when trying to write buffers", my_errno);
3085 DBUG_RETURN(TRUE);
3086 }
3087 DBUG_RETURN(FALSE);
3088 }
3089
3090
3091 /* Sort index for more efficent reads */
3092
maria_sort_index(HA_CHECK * param,register MARIA_HA * info,char * name)3093 int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, char *name)
3094 {
3095 reg2 uint key;
3096 reg1 MARIA_KEYDEF *keyinfo;
3097 File new_file;
3098 my_off_t index_pos[HA_MAX_POSSIBLE_KEY];
3099 uint r_locks,w_locks;
3100 int old_lock;
3101 MARIA_SHARE *share= info->s;
3102 MARIA_STATE_INFO old_state;
3103 myf sync_dir= ((share->now_transactional && !share->temporary) ?
3104 MY_SYNC_DIR : 0);
3105 DBUG_ENTER("maria_sort_index");
3106
3107 /* cannot sort index files with R-tree indexes */
3108 for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
3109 key++,keyinfo++)
3110 if (keyinfo->key_alg == HA_KEY_ALG_RTREE)
3111 DBUG_RETURN(0);
3112
3113 if (!(param->testflag & T_SILENT))
3114 printf("- Sorting index for Aria-table '%s'\n",name);
3115
3116 if (protect_against_repair_crash(info, param, FALSE))
3117 DBUG_RETURN(1);
3118
3119 /* Get real path for index file */
3120 fn_format(param->temp_filename,name,"", MARIA_NAME_IEXT,2+4+32);
3121 if ((new_file=mysql_file_create(key_file_kfile, fn_format(param->temp_filename,param->temp_filename,
3122 "", INDEX_TMP_EXT,2+4),
3123 0, param->tmpfile_createflag, MYF(0))) < 0)
3124 {
3125 _ma_check_print_error(param,"Can't create new tempfile: '%s'",
3126 param->temp_filename);
3127 DBUG_RETURN(-1);
3128 }
3129 if (maria_filecopy(param, new_file, share->kfile.file, 0L,
3130 (ulong) share->base.keystart, "headerblock"))
3131 goto err;
3132
3133 param->new_file_pos=share->base.keystart;
3134 for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
3135 key++,keyinfo++)
3136 {
3137 if (maria_is_key_active(share->state.key_map, key) &&
3138 share->state.key_root[key] != HA_OFFSET_ERROR)
3139 {
3140 index_pos[key]=param->new_file_pos; /* Write first block here */
3141 if (sort_one_index(param,info,keyinfo,share->state.key_root[key],
3142 new_file))
3143 goto err;
3144 }
3145 else
3146 index_pos[key]= HA_OFFSET_ERROR; /* No blocks */
3147 }
3148
3149 /* Flush key cache for this file if we are calling this outside maria_chk */
3150 flush_pagecache_blocks(share->pagecache, &share->kfile,
3151 FLUSH_IGNORE_CHANGED);
3152
3153 share->state.version=(ulong) time((time_t*) 0);
3154 old_state= share->state; /* save state if not stored */
3155 r_locks= share->r_locks;
3156 w_locks= share->w_locks;
3157 old_lock= info->lock_type;
3158
3159 /* Put same locks as old file */
3160 share->r_locks= share->w_locks= share->tot_locks= 0;
3161 (void) _ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE);
3162 mysql_mutex_lock(&share->intern_lock);
3163 mysql_file_close(share->kfile.file, MYF(MY_WME));
3164 share->kfile.file = -1;
3165 mysql_mutex_unlock(&share->intern_lock);
3166 mysql_file_close(new_file, MYF(MY_WME));
3167 if (maria_change_to_newfile(share->index_file_name.str, MARIA_NAME_IEXT,
3168 INDEX_TMP_EXT, 0, sync_dir) ||
3169 _ma_open_keyfile(share))
3170 goto err2;
3171 info->lock_type= F_UNLCK; /* Force maria_readinfo to lock */
3172 _ma_readinfo(info,F_WRLCK,0); /* Will lock the table */
3173 info->lock_type= old_lock;
3174 share->r_locks= r_locks;
3175 share->w_locks= w_locks;
3176 share->tot_locks= r_locks+w_locks;
3177 share->state= old_state; /* Restore old state */
3178
3179 share->state.state.key_file_length=param->new_file_pos;
3180 info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
3181 for (key=0 ; key < share->base.keys ; key++)
3182 share->state.key_root[key]=index_pos[key];
3183 share->state.key_del= HA_OFFSET_ERROR;
3184
3185 share->state.changed&= ~STATE_NOT_SORTED_PAGES;
3186 DBUG_EXECUTE_IF("maria_flush_whole_log",
3187 {
3188 DBUG_PRINT("maria_flush_whole_log", ("now"));
3189 translog_flush(translog_get_horizon());
3190 });
3191 DBUG_EXECUTE_IF("maria_crash_sort_index",
3192 {
3193 DBUG_PRINT("maria_crash_sort_index", ("now"));
3194 DBUG_SUICIDE();
3195 });
3196 DBUG_RETURN(0);
3197
3198 err:
3199 mysql_file_close(new_file, MYF(MY_WME));
3200 err2:
3201 mysql_file_delete(key_file_tmp, param->temp_filename,MYF(MY_WME));
3202 DBUG_RETURN(-1);
3203 } /* maria_sort_index */
3204
3205
3206 /**
3207 @brief write a page directly to index file
3208
3209 */
3210
write_page(MARIA_SHARE * share,File file,uchar * buff,uint block_size,my_off_t pos,int myf_rw)3211 static int write_page(MARIA_SHARE *share, File file,
3212 uchar *buff, uint block_size,
3213 my_off_t pos, int myf_rw)
3214 {
3215 int res;
3216 PAGECACHE_IO_HOOK_ARGS args;
3217 args.page= buff;
3218 args.pageno= (pgcache_page_no_t) (pos / share->block_size);
3219 args.data= (uchar*) share;
3220 args.crypt_buf= NULL;
3221 (* share->kfile.pre_write_hook)(&args);
3222 res= (int)my_pwrite(file, args.page, block_size, pos, myf_rw);
3223 (* share->kfile.post_write_hook)(res, &args);
3224 return res;
3225 }
3226
3227
3228 /* Sort index blocks recursive using one index */
3229
sort_one_index(HA_CHECK * param,MARIA_HA * info,MARIA_KEYDEF * keyinfo,my_off_t pagepos,File new_file)3230 static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
3231 MARIA_KEYDEF *keyinfo,
3232 my_off_t pagepos, File new_file)
3233 {
3234 uint length,nod_flag;
3235 uchar *buff,*keypos,*endpos;
3236 my_off_t new_page_pos,next_page;
3237 MARIA_SHARE *share= info->s;
3238 MARIA_KEY key;
3239 MARIA_PAGE page;
3240 DBUG_ENTER("sort_one_index");
3241
3242 /* cannot walk over R-tree indices */
3243 DBUG_ASSERT(keyinfo->key_alg != HA_KEY_ALG_RTREE);
3244 new_page_pos=param->new_file_pos;
3245 param->new_file_pos+=keyinfo->block_length;
3246 key.keyinfo= keyinfo;
3247
3248 if (!(buff= (uchar*) my_alloca((uint) keyinfo->block_length +
3249 keyinfo->maxlength +
3250 MARIA_INDEX_OVERHEAD_SIZE)))
3251 {
3252 _ma_check_print_error(param,"Not enough memory for key block");
3253 DBUG_RETURN(-1);
3254 }
3255 key.data= buff + keyinfo->block_length;
3256
3257 if (_ma_fetch_keypage(&page, info, keyinfo, pagepos,
3258 PAGECACHE_LOCK_LEFT_UNLOCKED,
3259 DFLT_INIT_HITS, buff, 0))
3260 {
3261 report_keypage_fault(param, info, pagepos);
3262 goto err;
3263 }
3264
3265 if ((nod_flag= page.node) || keyinfo->flag & HA_FULLTEXT)
3266 {
3267 keypos= page.buff + share->keypage_header + nod_flag;
3268 endpos= page.buff + page.size;
3269
3270 for ( ;; )
3271 {
3272 if (nod_flag)
3273 {
3274 next_page= _ma_kpos(nod_flag,keypos);
3275 /* Save new pos */
3276 _ma_kpointer(info,keypos-nod_flag,param->new_file_pos);
3277 if (sort_one_index(param,info,keyinfo,next_page,new_file))
3278 {
3279 DBUG_PRINT("error",
3280 ("From page: %ld, keyoffset: %lu used_length: %d",
3281 (ulong) pagepos, (ulong) (keypos - buff),
3282 (int) page.size));
3283 DBUG_DUMP("buff", page.buff, page.size);
3284 goto err;
3285 }
3286 }
3287 if (keypos >= endpos ||
3288 !(*keyinfo->get_key)(&key, page.flag, nod_flag, &keypos))
3289 break;
3290 DBUG_ASSERT(keypos <= endpos);
3291 if (keyinfo->flag & HA_FULLTEXT)
3292 {
3293 uint off;
3294 int subkeys;
3295 get_key_full_length_rdonly(off, key.data);
3296 subkeys= ft_sintXkorr(key.data + off);
3297 if (subkeys < 0)
3298 {
3299 next_page= _ma_row_pos_from_key(&key);
3300 _ma_dpointer(share, keypos - nod_flag - share->rec_reflength,
3301 param->new_file_pos); /* Save new pos */
3302 if (sort_one_index(param,info,&share->ft2_keyinfo,
3303 next_page,new_file))
3304 goto err;
3305 }
3306 }
3307 }
3308 }
3309
3310 /* Fill block with zero and write it to the new index file */
3311 length= page.size;
3312 bzero(buff+length,keyinfo->block_length-length);
3313 if (write_page(share, new_file, buff, keyinfo->block_length,
3314 new_page_pos, MYF(MY_NABP | MY_WAIT_IF_FULL)))
3315 {
3316 _ma_check_print_error(param,"Can't write indexblock, error: %d",my_errno);
3317 goto err;
3318 }
3319 my_afree(buff);
3320 DBUG_RETURN(0);
3321 err:
3322 my_afree(buff);
3323 DBUG_RETURN(1);
3324 } /* sort_one_index */
3325
3326
3327 /**
3328 @brief Fill empty space in index file with zeroes
3329
3330 @return
3331 @retval 0 Ok
3332 @retval 1 Error
3333 */
3334
maria_zerofill_index(HA_CHECK * param,MARIA_HA * info,const char * name)3335 static my_bool maria_zerofill_index(HA_CHECK *param, MARIA_HA *info,
3336 const char *name)
3337 {
3338 MARIA_SHARE *share= info->s;
3339 MARIA_PINNED_PAGE page_link;
3340 char llbuff[21];
3341 uchar *buff;
3342 pgcache_page_no_t page;
3343 my_off_t pos;
3344 my_off_t key_file_length= share->state.state.key_file_length;
3345 uint block_size= share->block_size;
3346 my_bool zero_lsn= (share->base.born_transactional &&
3347 !(param->testflag & T_ZEROFILL_KEEP_LSN));
3348 int error= 1;
3349 DBUG_ENTER("maria_zerofill_index");
3350
3351 if (!(param->testflag & T_SILENT))
3352 printf("- Zerofilling index for Aria-table '%s'\n",name);
3353
3354 /* Go through the index file */
3355 for (pos= share->base.keystart, page= (ulonglong) (pos / block_size);
3356 pos < key_file_length;
3357 pos+= block_size, page++)
3358 {
3359 uint length;
3360 if (!(buff= pagecache_read(share->pagecache,
3361 &share->kfile, page,
3362 DFLT_INIT_HITS, 0,
3363 PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
3364 &page_link.link)))
3365 {
3366 pagecache_unlock_by_link(share->pagecache, page_link.link,
3367 PAGECACHE_LOCK_WRITE_UNLOCK,
3368 PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3369 LSN_IMPOSSIBLE, 0, FALSE);
3370 _ma_check_print_error(param,
3371 "Page %9s: Got error %d when reading index file",
3372 llstr(pos, llbuff), my_errno);
3373 goto end;
3374 }
3375 if (zero_lsn)
3376 bzero(buff, LSN_SIZE);
3377
3378 if (share->base.born_transactional)
3379 {
3380 uint keynr= _ma_get_keynr(share, buff);
3381 if (keynr < share->base.keys)
3382 {
3383 MARIA_PAGE page;
3384 DBUG_ASSERT(keynr < share->base.keys);
3385
3386 _ma_page_setup(&page, info, share->keyinfo + keynr, pos, buff);
3387 if (_ma_compact_keypage(&page, ~(TrID) 0))
3388 {
3389 _ma_check_print_error(param,
3390 "Page %9s: Got error %d when reading index "
3391 "file",
3392 llstr(pos, llbuff), my_errno);
3393 goto end;
3394 }
3395 }
3396 }
3397
3398 length= _ma_get_page_used(share, buff);
3399 DBUG_ASSERT(length <= block_size);
3400 if (length < block_size)
3401 bzero(buff + length, block_size - length);
3402 pagecache_unlock_by_link(share->pagecache, page_link.link,
3403 PAGECACHE_LOCK_WRITE_UNLOCK,
3404 PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3405 LSN_IMPOSSIBLE, 1, FALSE);
3406 }
3407 error= 0; /* ok */
3408
3409 end:
3410 if (flush_pagecache_blocks(share->pagecache, &share->kfile,
3411 FLUSH_FORCE_WRITE))
3412 DBUG_RETURN(1);
3413 DBUG_RETURN(error);
3414 }
3415
3416
3417 /**
3418 @brief Fill empty space in data file with zeroes
3419
3420 @todo
3421 Zerofill all pages marked in bitmap as empty and change them to
3422 be of type UNALLOCATED_PAGE
3423
3424 @return
3425 @retval 0 Ok
3426 @retval 1 Error
3427 */
3428
maria_zerofill_data(HA_CHECK * param,MARIA_HA * info,const char * name)3429 static my_bool maria_zerofill_data(HA_CHECK *param, MARIA_HA *info,
3430 const char *name)
3431 {
3432 MARIA_SHARE *share= info->s;
3433 MARIA_PINNED_PAGE page_link;
3434 char llbuff[21];
3435 my_off_t pos;
3436 pgcache_page_no_t page;
3437 uint block_size= share->block_size;
3438 MARIA_FILE_BITMAP *bitmap= &share->bitmap;
3439 my_bool zero_lsn= !(param->testflag & T_ZEROFILL_KEEP_LSN), error;
3440 DBUG_ENTER("maria_zerofill_data");
3441
3442 /* This works only with BLOCK_RECORD files */
3443 if (share->data_file_type != BLOCK_RECORD)
3444 DBUG_RETURN(0);
3445
3446 if (!(param->testflag & T_SILENT))
3447 printf("- Zerofilling data for Aria-table '%s'\n",name);
3448
3449 /* Go through the record file */
3450 for (page= 1, pos= block_size;
3451 pos < share->state.state.data_file_length;
3452 pos+= block_size, page++)
3453 {
3454 uchar *buff;
3455 enum en_page_type page_type;
3456
3457 /* Ignore bitmap pages */
3458 if ((page % share->bitmap.pages_covered) == 0)
3459 continue;
3460 if (!(buff= pagecache_read(share->pagecache,
3461 &info->dfile,
3462 page, 1, 0,
3463 PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
3464 &page_link.link)))
3465 {
3466 _ma_check_print_error(param,
3467 "Page %9s: Got error: %d when reading datafile",
3468 llstr(pos, llbuff), my_errno);
3469 goto err;
3470 }
3471 page_type= (enum en_page_type) (buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK);
3472 switch (page_type) {
3473 case UNALLOCATED_PAGE:
3474 if (zero_lsn)
3475 bzero(buff, block_size);
3476 else
3477 bzero(buff + LSN_SIZE, block_size - LSN_SIZE);
3478 break;
3479 case BLOB_PAGE:
3480 if (_ma_bitmap_get_page_bits(info, bitmap, page) == 0)
3481 {
3482 /* Unallocated page */
3483 if (zero_lsn)
3484 bzero(buff, block_size);
3485 else
3486 bzero(buff + LSN_SIZE, block_size - LSN_SIZE);
3487 }
3488 else
3489 if (zero_lsn)
3490 bzero(buff, LSN_SIZE);
3491 break;
3492 case HEAD_PAGE:
3493 case TAIL_PAGE:
3494 {
3495 uint max_entry= (uint) buff[DIR_COUNT_OFFSET];
3496 uint offset, dir_start, empty_space;
3497 uchar *dir;
3498
3499 if (zero_lsn)
3500 bzero(buff, LSN_SIZE);
3501 if (max_entry != 0)
3502 {
3503 my_bool is_head_page= (page_type == HEAD_PAGE);
3504 dir= dir_entry_pos(buff, block_size, max_entry - 1);
3505 _ma_compact_block_page(share,
3506 buff, max_entry -1, 0,
3507 is_head_page ? ~(TrID) 0 : 0,
3508 is_head_page ?
3509 share->base.min_block_length : 0);
3510
3511 /* compactation may have increased free space */
3512 empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
3513 if (!enough_free_entries_on_page(share, buff))
3514 empty_space= 0; /* Page is full */
3515 if (_ma_bitmap_set(info, page, is_head_page,
3516 empty_space))
3517 goto err;
3518
3519 /* Zerofill the not used part */
3520 offset= uint2korr(dir) + uint2korr(dir+2);
3521 dir_start= (uint) (dir - buff);
3522 DBUG_ASSERT(dir_start >= offset);
3523 if (dir_start > offset)
3524 bzero(buff + offset, dir_start - offset);
3525 }
3526 break;
3527 }
3528 default:
3529 _ma_check_print_error(param,
3530 "Page %9s: Found unrecognizable block of type %d",
3531 llstr(pos, llbuff), page_type);
3532 goto err;
3533 }
3534 pagecache_unlock_by_link(share->pagecache, page_link.link,
3535 PAGECACHE_LOCK_WRITE_UNLOCK,
3536 PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3537 LSN_IMPOSSIBLE, 1, FALSE);
3538 }
3539 error= _ma_bitmap_flush(share);
3540 if (flush_pagecache_blocks(share->pagecache, &info->dfile,
3541 FLUSH_FORCE_WRITE))
3542 error= 1;
3543 DBUG_RETURN(error);
3544
3545 err:
3546 pagecache_unlock_by_link(share->pagecache, page_link.link,
3547 PAGECACHE_LOCK_WRITE_UNLOCK,
3548 PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3549 LSN_IMPOSSIBLE, 0, FALSE);
3550 /* flush what was changed so far */
3551 (void) _ma_bitmap_flush(share);
3552 (void) flush_pagecache_blocks(share->pagecache, &info->dfile,
3553 FLUSH_FORCE_WRITE);
3554
3555 DBUG_RETURN(1);
3556 }
3557
3558
3559 /**
3560 @brief Fill empty space in index and data files with zeroes
3561
3562 @return
3563 @retval 0 Ok
3564 @retval 1 Error
3565 */
3566
maria_zerofill(HA_CHECK * param,MARIA_HA * info,const char * name)3567 int maria_zerofill(HA_CHECK *param, MARIA_HA *info, const char *name)
3568 {
3569 my_bool error, reenable_logging,
3570 zero_lsn= !(param->testflag & T_ZEROFILL_KEEP_LSN);
3571 MARIA_SHARE *share= info->s;
3572 DBUG_ENTER("maria_zerofill");
3573 if ((reenable_logging= share->now_transactional))
3574 _ma_tmp_disable_logging_for_table(info, 0);
3575 if (!(error= (maria_zerofill_index(param, info, name) ||
3576 maria_zerofill_data(param, info, name) ||
3577 _ma_set_uuid(info->s, 0))))
3578 {
3579 /*
3580 Mark that we have done zerofill of data and index. If we zeroed pages'
3581 LSN, table is movable.
3582 */
3583 share->state.changed&= ~STATE_NOT_ZEROFILLED;
3584 if (zero_lsn)
3585 {
3586 share->state.changed&= ~(STATE_NOT_MOVABLE | STATE_MOVED);
3587 /* Table should get new LSNs */
3588 share->state.create_rename_lsn= share->state.is_of_horizon=
3589 share->state.skip_redo_lsn= LSN_NEEDS_NEW_STATE_LSNS;
3590 }
3591 /* Ensure state is later flushed to disk, if within maria_chk */
3592 info->update= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
3593
3594 /*
3595 Reset create_trid to make file comparable and to ensure that new
3596 trid's in the file starts from 0.
3597 */
3598 share->state.create_trid= 0;
3599 }
3600 if (reenable_logging)
3601 _ma_reenable_logging_for_table(info, FALSE);
3602 DBUG_RETURN(error);
3603 }
3604
3605
3606 /*
3607 Let temporary file replace old file.
3608 This assumes that the new file was created in the same
3609 directory as given by realpath(filename).
3610 This will ensure that any symlinks that are used will still work.
3611 Copy stats from old file to new file, deletes orignal and
3612 changes new file name to old file name
3613 */
3614
maria_change_to_newfile(const char * filename,const char * old_ext,const char * new_ext,time_t backup_time,myf MyFlags)3615 int maria_change_to_newfile(const char * filename, const char * old_ext,
3616 const char * new_ext, time_t backup_time,
3617 myf MyFlags)
3618 {
3619 char old_filename[FN_REFLEN],new_filename[FN_REFLEN];
3620 /* Get real path to filename */
3621 (void) fn_format(old_filename,filename,"",old_ext,2+4+32);
3622 return my_redel(old_filename,
3623 fn_format(new_filename,old_filename,"",new_ext,2+4),
3624 backup_time,
3625 MYF(MY_WME | MY_LINK_WARNING | MyFlags));
3626 } /* maria_change_to_newfile */
3627
3628
3629 /* Copy a block between two files */
3630
maria_filecopy(HA_CHECK * param,File to,File from,my_off_t start,my_off_t length,const char * type)3631 int maria_filecopy(HA_CHECK *param, File to,File from,my_off_t start,
3632 my_off_t length, const char *type)
3633 {
3634 uchar tmp_buff[IO_SIZE], *buff;
3635 ulong buff_length;
3636 DBUG_ENTER("maria_filecopy");
3637
3638 buff_length=(ulong) MY_MIN(param->write_buffer_length,length);
3639 if (!(buff=my_malloc(buff_length,MYF(0))))
3640 {
3641 buff=tmp_buff; buff_length=IO_SIZE;
3642 }
3643
3644 mysql_file_seek(from, start, MY_SEEK_SET,MYF(0));
3645 while (length > buff_length)
3646 {
3647 if (mysql_file_read(from, buff, buff_length, MYF(MY_NABP)) ||
3648 mysql_file_write(to, buff, buff_length, param->myf_rw))
3649 goto err;
3650 length-= buff_length;
3651 }
3652 if (mysql_file_read(from, buff, (size_t) length,MYF(MY_NABP)) ||
3653 mysql_file_write(to, buff, (size_t) length,param->myf_rw))
3654 goto err;
3655 if (buff != tmp_buff)
3656 my_free(buff);
3657 DBUG_RETURN(0);
3658 err:
3659 if (buff != tmp_buff)
3660 my_free(buff);
3661 _ma_check_print_error(param,"Can't copy %s to tempfile, error %d",
3662 type,my_errno);
3663 DBUG_RETURN(1);
3664 }
3665
3666
3667 /*
3668 Repair table or given index using sorting
3669
3670 SYNOPSIS
3671 maria_repair_by_sort()
3672 param Repair parameters
3673 info MARIA handler to repair
3674 name Name of table (for warnings)
3675 rep_quick set to <> 0 if we should not change data file
3676
3677 RESULT
3678 0 ok
3679 <>0 Error
3680 */
3681
maria_repair_by_sort(HA_CHECK * param,register MARIA_HA * info,const char * name,my_bool rep_quick)3682 int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
3683 const char * name, my_bool rep_quick)
3684 {
3685 int got_error;
3686 uint i, keys_to_repair;
3687 ha_rows start_records;
3688 my_off_t new_header_length, org_header_length, del;
3689 File new_file;
3690 MARIA_SORT_PARAM sort_param;
3691 MARIA_SHARE *share= info->s;
3692 HA_KEYSEG *keyseg;
3693 double *rec_per_key_part;
3694 char llbuff[22];
3695 MARIA_SORT_INFO sort_info;
3696 ulonglong UNINIT_VAR(key_map);
3697 myf sync_dir= ((share->now_transactional && !share->temporary) ?
3698 MY_SYNC_DIR : 0);
3699 my_bool scan_inited= 0, reenable_logging= 0;
3700 MARIA_SHARE backup_share;
3701 DBUG_ENTER("maria_repair_by_sort");
3702
3703 got_error= 1;
3704 new_file= -1;
3705 start_records= share->state.state.records;
3706 if (!(param->testflag & T_SILENT))
3707 {
3708 printf("- recovering (with sort) Aria-table '%s'\n",name);
3709 printf("Data records: %s\n", llstr(start_records,llbuff));
3710 }
3711
3712 if (initialize_variables_for_repair(param, &sort_info, &sort_param, info,
3713 rep_quick, &backup_share))
3714 goto err;
3715
3716 if ((reenable_logging= share->now_transactional))
3717 _ma_tmp_disable_logging_for_table(info, 0);
3718
3719 org_header_length= share->pack.header_length;
3720 new_header_length= (param->testflag & T_UNPACK) ? 0 : org_header_length;
3721 sort_param.filepos= new_header_length;
3722
3723 if (!rep_quick)
3724 {
3725 /* Get real path for data file */
3726 if ((new_file=mysql_file_create(key_file_tmp,
3727 fn_format(param->temp_filename,
3728 share->data_file_name.str, "",
3729 DATA_TMP_EXT, 2+4),
3730 0,param->tmpfile_createflag,
3731 MYF(0))) < 0)
3732 {
3733 _ma_check_print_error(param,"Can't create new tempfile: '%s'",
3734 param->temp_filename);
3735 goto err;
3736 }
3737 if (new_header_length &&
3738 maria_filecopy(param, new_file, info->dfile.file, 0L,
3739 new_header_length, "datafile-header"))
3740 goto err;
3741
3742 share->state.dellink= HA_OFFSET_ERROR;
3743 info->rec_cache.file= new_file; /* For sort_delete_record */
3744 if (share->data_file_type == BLOCK_RECORD ||
3745 (param->testflag & T_UNPACK))
3746 {
3747 if (create_new_data_handle(&sort_param, new_file))
3748 goto err;
3749 sort_info.new_info->rec_cache.file= new_file;
3750 }
3751 }
3752
3753 if (!(sort_info.key_block=
3754 alloc_key_blocks(param,
3755 (uint) param->sort_key_blocks,
3756 share->base.max_key_block_length)))
3757 goto err;
3758 sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;
3759
3760 if (share->data_file_type != BLOCK_RECORD)
3761 {
3762 /* We need a read buffer to read rows in big blocks */
3763 if (init_io_cache(¶m->read_cache, info->dfile.file,
3764 (uint) param->read_buffer_length,
3765 READ_CACHE, org_header_length, 1, MYF(MY_WME)))
3766 goto err;
3767 }
3768 if (sort_info.new_info->s->data_file_type != BLOCK_RECORD)
3769 {
3770 /* When writing to not block records, we need a write buffer */
3771 if (!rep_quick)
3772 {
3773 if (init_io_cache(&sort_info.new_info->rec_cache, new_file,
3774 (uint) param->write_buffer_length,
3775 WRITE_CACHE, new_header_length, 1,
3776 MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
3777 goto err;
3778 sort_info.new_info->opt_flag|= WRITE_CACHE_USED;
3779 }
3780 }
3781
3782 if (!(sort_param.record=
3783 (uchar*) my_malloc((size_t) share->base.default_rec_buff_size,
3784 MYF(0))) ||
3785 _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size,
3786 share->base.default_rec_buff_size, MYF(0)))
3787 {
3788 _ma_check_print_error(param, "Not enough memory for extra record");
3789 goto err;
3790 }
3791
3792 /* Optionally drop indexes and optionally modify the key_map */
3793 maria_drop_all_indexes(param, info, FALSE);
3794 key_map= share->state.key_map;
3795 if (param->testflag & T_CREATE_MISSING_KEYS)
3796 {
3797 /* Invert the copied key_map to recreate all disabled indexes. */
3798 key_map= ~key_map;
3799 }
3800
3801 param->read_cache.end_of_file= sort_info.filelength;
3802 sort_param.wordlist=NULL;
3803 init_alloc_root(&sort_param.wordroot, "sort", FTPARSER_MEMROOT_ALLOC_SIZE, 0,
3804 MYF(param->malloc_flags));
3805
3806 sort_param.key_cmp=sort_key_cmp;
3807 sort_param.lock_in_memory=maria_lock_memory;
3808 sort_param.tmpdir=param->tmpdir;
3809 sort_param.master =1;
3810
3811 del=share->state.state.del;
3812
3813 /* Calculate number of keys to repair */
3814 keys_to_repair= 0;
3815 for (sort_param.key=0 ; sort_param.key < share->base.keys ;
3816 sort_param.key++)
3817 {
3818 if (maria_is_key_active(key_map, sort_param.key))
3819 keys_to_repair++;
3820 }
3821 /* For each key we scan and merge sort the keys */
3822 param->max_stage= keys_to_repair*2;
3823
3824 rec_per_key_part= param->new_rec_per_key_part;
3825 for (sort_param.key=0 ; sort_param.key < share->base.keys ;
3826 rec_per_key_part+=sort_param.keyinfo->keysegs, sort_param.key++)
3827 {
3828 sort_param.keyinfo=share->keyinfo+sort_param.key;
3829 /*
3830 Skip this index if it is marked disabled in the copied
3831 (and possibly inverted) key_map.
3832 */
3833 if (! maria_is_key_active(key_map, sort_param.key))
3834 {
3835 /* Remember old statistics for key */
3836 memcpy((char*) rec_per_key_part,
3837 (char*) (share->state.rec_per_key_part +
3838 (uint) (rec_per_key_part - param->new_rec_per_key_part)),
3839 sort_param.keyinfo->keysegs*sizeof(*rec_per_key_part));
3840 DBUG_PRINT("repair", ("skipping seemingly disabled index #: %u",
3841 sort_param.key));
3842 continue;
3843 }
3844
3845 if ((!(param->testflag & T_SILENT)))
3846 printf ("- Fixing index %d\n",sort_param.key+1);
3847
3848 sort_param.read_cache=param->read_cache;
3849 sort_param.seg=sort_param.keyinfo->seg;
3850 sort_param.max_pos= sort_param.pos= org_header_length;
3851 keyseg=sort_param.seg;
3852 bzero((char*) sort_param.unique,sizeof(sort_param.unique));
3853 sort_param.key_length=share->rec_reflength;
3854 for (i=0 ; keyseg[i].type != HA_KEYTYPE_END; i++)
3855 {
3856 sort_param.key_length+=keyseg[i].length;
3857 if (keyseg[i].flag & HA_SPACE_PACK)
3858 sort_param.key_length+=get_pack_length(keyseg[i].length);
3859 if (keyseg[i].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
3860 sort_param.key_length+= 2 + MY_TEST(keyseg[i].length >= 127);
3861 if (keyseg[i].flag & HA_NULL_PART)
3862 sort_param.key_length++;
3863 }
3864 share->state.state.records=share->state.state.del=share->state.split=0;
3865 share->state.state.empty=0;
3866
3867 if (sort_param.keyinfo->flag & HA_FULLTEXT)
3868 {
3869 uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
3870 sort_param.keyinfo->seg->charset->mbmaxlen;
3871 sort_param.key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
3872 /*
3873 fulltext indexes may have much more entries than the
3874 number of rows in the table. We estimate the number here.
3875
3876 Note, built-in parser is always nr. 0 - see ftparser_call_initializer()
3877 */
3878 if (sort_param.keyinfo->ftkey_nr == 0)
3879 {
3880 /*
3881 for built-in parser the number of generated index entries
3882 cannot be larger than the size of the data file divided
3883 by the minimal word's length
3884 */
3885 sort_info.max_records=
3886 (ha_rows) (sort_info.filelength/ft_min_word_len+1);
3887 }
3888 else
3889 {
3890 /*
3891 for external plugin parser we cannot tell anything at all :(
3892 so, we'll use all the sort memory and start from ~10 buffpeks.
3893 (see _ma_create_index_by_sort)
3894 */
3895 sort_info.max_records=
3896 10*param->sort_buffer_length/sort_param.key_length;
3897 }
3898
3899 sort_param.key_read= sort_maria_ft_key_read;
3900 sort_param.key_write= sort_maria_ft_key_write;
3901 }
3902 else
3903 {
3904 sort_param.key_read= sort_key_read;
3905 sort_param.key_write= sort_key_write;
3906 }
3907
3908 if (sort_info.new_info->s->data_file_type == BLOCK_RECORD)
3909 {
3910 scan_inited= 1;
3911 if (maria_scan_init(sort_info.info))
3912 goto err;
3913 }
3914 if (_ma_create_index_by_sort(&sort_param,
3915 (my_bool) (!(param->testflag & T_VERBOSE)),
3916 (size_t) param->sort_buffer_length))
3917 {
3918 if ((param->testflag & T_CREATE_UNIQUE_BY_SORT) && sort_param.sort_info->dupp)
3919 share->state.dupp_key= sort_param.key;
3920 else
3921 param->retry_repair= 1;
3922 _ma_check_print_error(param, "Create index by sort failed");
3923 goto err;
3924 }
3925 DBUG_EXECUTE_IF("maria_flush_whole_log",
3926 {
3927 DBUG_PRINT("maria_flush_whole_log", ("now"));
3928 translog_flush(translog_get_horizon());
3929 });
3930 DBUG_EXECUTE_IF("maria_crash_create_index_by_sort",
3931 {
3932 DBUG_PRINT("maria_crash_create_index_by_sort", ("now"));
3933 DBUG_SUICIDE();
3934 });
3935 if (scan_inited)
3936 {
3937 scan_inited= 0;
3938 maria_scan_end(sort_info.info);
3939 }
3940
3941 /* No need to calculate checksum again. */
3942 sort_param.calc_checksum= 0;
3943 free_root(&sort_param.wordroot, MYF(0));
3944
3945 /* Set for next loop */
3946 sort_info.max_records= (ha_rows) sort_info.new_info->s->state.state.records;
3947 param->stage++; /* Next stage */
3948 param->progress= 0;
3949
3950 if (param->testflag & T_STATISTICS)
3951 maria_update_key_parts(sort_param.keyinfo, rec_per_key_part,
3952 sort_param.unique,
3953 (param->stats_method ==
3954 MI_STATS_METHOD_IGNORE_NULLS ?
3955 sort_param.notnull : NULL),
3956 (ulonglong) share->state.state.records);
3957 maria_set_key_active(share->state.key_map, sort_param.key);
3958 DBUG_PRINT("repair", ("set enabled index #: %u", sort_param.key));
3959
3960 if (_ma_flush_table_files_before_swap(param, info))
3961 goto err;
3962
3963 if (sort_param.fix_datafile)
3964 {
3965 param->read_cache.end_of_file=sort_param.filepos;
3966 if (maria_write_data_suffix(&sort_info,1) ||
3967 end_io_cache(&sort_info.new_info->rec_cache))
3968 {
3969 _ma_check_print_error(param, "Got error when flushing row cache");
3970 goto err;
3971 }
3972 sort_info.new_info->opt_flag&= ~WRITE_CACHE_USED;
3973
3974 if (param->testflag & T_SAFE_REPAIR)
3975 {
3976 /* Don't repair if we loosed more than one row */
3977 if (sort_info.new_info->s->state.state.records+1 < start_records)
3978 {
3979 _ma_check_print_error(param,
3980 "Rows lost (Found %lu of %lu); Aborting "
3981 "because safe repair was requested",
3982 (ulong) sort_info.new_info->s->
3983 state.state.records,
3984 (ulong) start_records);
3985 share->state.state.records=start_records;
3986 goto err;
3987 }
3988 }
3989
3990 sort_info.new_info->s->state.state.data_file_length= sort_param.filepos;
3991 if (sort_info.new_info != sort_info.info)
3992 {
3993 MARIA_STATE_INFO save_state= sort_info.new_info->s->state;
3994 if (maria_close(sort_info.new_info))
3995 {
3996 _ma_check_print_error(param, "Got error %d on close", my_errno);
3997 goto err;
3998 }
3999 copy_data_file_state(&share->state, &save_state);
4000 new_file= -1;
4001 sort_info.new_info= info;
4002 info->rec_cache.file= info->dfile.file;
4003 }
4004
4005 share->state.version=(ulong) time((time_t*) 0); /* Force reopen */
4006
4007 /* Replace the actual file with the temporary file */
4008 if (new_file >= 0)
4009 {
4010 mysql_file_close(new_file, MYF(MY_WME));
4011 new_file= -1;
4012 }
4013 change_data_file_descriptor(info, -1);
4014 if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
4015 DATA_TMP_EXT, param->backup_time,
4016 (param->testflag & T_BACKUP_DATA ?
4017 MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
4018 sync_dir) ||
4019 _ma_open_datafile(info, share))
4020 {
4021 _ma_check_print_error(param, "Couldn't change to new data file");
4022 goto err;
4023 }
4024 if (param->testflag & T_UNPACK)
4025 restore_data_file_type(share);
4026
4027 org_header_length= share->pack.header_length;
4028 sort_info.org_data_file_type= share->data_file_type;
4029 sort_info.filelength= share->state.state.data_file_length;
4030 sort_param.fix_datafile=0;
4031
4032 /* Offsets are now in proportion to the new file length */
4033 param->max_progress= sort_info.filelength;
4034
4035 }
4036 else
4037 share->state.state.data_file_length=sort_param.max_pos;
4038
4039 param->read_cache.file= info->dfile.file; /* re-init read cache */
4040 if (share->data_file_type != BLOCK_RECORD)
4041 reinit_io_cache(¶m->read_cache, READ_CACHE,
4042 share->pack.header_length, 1, 1);
4043 }
4044
4045 if (param->testflag & T_WRITE_LOOP)
4046 {
4047 fputs(" \r",stdout);
4048 fflush(stdout);
4049 }
4050
4051 if (rep_quick && del+sort_info.dupp != share->state.state.del)
4052 {
4053 _ma_check_print_error(param,"Couldn't fix table with quick recovery: "
4054 "Found wrong number of deleted records");
4055 _ma_check_print_error(param,"Run recovery again without -q");
4056 got_error=1;
4057 param->retry_repair=1;
4058 param->testflag|=T_RETRY_WITHOUT_QUICK;
4059 goto err;
4060 }
4061
4062 if (rep_quick && (param->testflag & T_FORCE_UNIQUENESS))
4063 {
4064 my_off_t skr= share->state.state.data_file_length +
4065 ((sort_info.org_data_file_type == COMPRESSED_RECORD) ?
4066 MEMMAP_EXTRA_MARGIN : 0);
4067 #ifdef USE_RELOC
4068 if (sort_info.org_data_file_type == STATIC_RECORD &&
4069 skr < share->base.reloc*share->base.min_pack_length)
4070 skr=share->base.reloc*share->base.min_pack_length;
4071 #endif
4072 if (skr != sort_info.filelength)
4073 if (mysql_file_chsize(info->dfile.file, skr, 0, MYF(0)))
4074 _ma_check_print_warning(param,
4075 "Can't change size of datafile, error: %d",
4076 my_errno);
4077 }
4078
4079 if (param->testflag & T_CALC_CHECKSUM)
4080 share->state.state.checksum=param->glob_crc;
4081
4082 if (mysql_file_chsize(share->kfile.file,
4083 share->state.state.key_file_length, 0, MYF(0)))
4084 _ma_check_print_warning(param,
4085 "Can't change size of indexfile, error: %d",
4086 my_errno);
4087
4088 if (!(param->testflag & T_SILENT))
4089 {
4090 if (start_records != share->state.state.records)
4091 printf("Data records: %s\n", llstr(share->state.state.records,llbuff));
4092 }
4093 if (sort_info.dupp)
4094 _ma_check_print_warning(param,
4095 "%s records have been removed",
4096 llstr(sort_info.dupp,llbuff));
4097 got_error=0;
4098 /* If invoked by external program that uses thr_lock */
4099 if (&share->state.state != info->state)
4100 *info->state= *info->state_start= share->state.state;
4101
4102 err:
4103 if (scan_inited)
4104 maria_scan_end(sort_info.info);
4105 _ma_reset_state(info);
4106
4107 if (sort_info.new_info)
4108 {
4109 end_io_cache(&sort_info.new_info->rec_cache);
4110 sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4111 }
4112 end_io_cache(¶m->read_cache);
4113 info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4114 if (got_error)
4115 {
4116 if (! param->error_printed)
4117 _ma_check_print_error(param,"%d when fixing table",my_errno);
4118 (void)_ma_flush_table_files_before_swap(param, info);
4119 if (sort_info.new_info && sort_info.new_info != sort_info.info)
4120 {
4121 unuse_data_file_descriptor(sort_info.new_info);
4122 maria_close(sort_info.new_info);
4123 }
4124 if (new_file >= 0)
4125 {
4126 mysql_file_close(new_file, MYF(0));
4127 mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
4128 }
4129 maria_mark_crashed_on_repair(info);
4130 }
4131 else
4132 {
4133 if (key_map == share->state.key_map)
4134 share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS;
4135 /*
4136 Now that we have flushed and forced everything, we can bump
4137 create_rename_lsn:
4138 */
4139 DBUG_EXECUTE_IF("maria_flush_whole_log",
4140 {
4141 DBUG_PRINT("maria_flush_whole_log", ("now"));
4142 translog_flush(translog_get_horizon());
4143 });
4144 DBUG_EXECUTE_IF("maria_crash_repair",
4145 {
4146 DBUG_PRINT("maria_crash_repair", ("now"));
4147 DBUG_SUICIDE();
4148 });
4149 }
4150 share->state.changed|= STATE_NOT_SORTED_PAGES;
4151 if (!rep_quick)
4152 share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_ZEROFILLED |
4153 STATE_NOT_MOVABLE);
4154
4155 /* If caller had disabled logging it's not up to us to re-enable it */
4156 if (reenable_logging)
4157 _ma_reenable_logging_for_table(info, FALSE);
4158 restore_table_state_after_repair(info, &backup_share);
4159
4160 my_free(sort_param.rec_buff);
4161 my_free(sort_param.record);
4162 my_free(sort_info.key_block);
4163 my_free(sort_info.ft_buf);
4164 my_free(sort_info.buff);
4165 DBUG_RETURN(got_error);
4166 }
4167
4168
4169 /*
4170 Threaded repair of table using sorting
4171
4172 SYNOPSIS
4173 maria_repair_parallel()
4174 param Repair parameters
4175 info MARIA handler to repair
4176 name Name of table (for warnings)
4177 rep_quick set to <> 0 if we should not change data file
4178
4179 DESCRIPTION
4180 Same as maria_repair_by_sort but do it multithreaded
4181 Each key is handled by a separate thread.
4182 TODO: make a number of threads a parameter
4183
4184 In parallel repair we use one thread per index. There are two modes:
4185
4186 Quick
4187
4188 Only the indexes are rebuilt. All threads share a read buffer.
4189 Every thread that needs fresh data in the buffer enters the shared
4190 cache lock. The last thread joining the lock reads the buffer from
4191 the data file and wakes all other threads.
4192
4193 Non-quick
4194
4195 The data file is rebuilt and all indexes are rebuilt to point to
4196 the new record positions. One thread is the master thread. It
4197 reads from the old data file and writes to the new data file. It
4198 also creates one of the indexes. The other threads read from a
4199 buffer which is filled by the master. If they need fresh data,
4200 they enter the shared cache lock. If the masters write buffer is
4201 full, it flushes it to the new data file and enters the shared
4202 cache lock too. When all threads joined in the lock, the master
4203 copies its write buffer to the read buffer for the other threads
4204 and wakes them.
4205
4206 RESULT
4207 0 ok
4208 <>0 Error
4209 */
4210
maria_repair_parallel(HA_CHECK * param,register MARIA_HA * info,const char * name,my_bool rep_quick)4211 int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info,
4212 const char * name, my_bool rep_quick)
4213 {
4214 int got_error;
4215 uint i,key, istep;
4216 ha_rows start_records;
4217 my_off_t new_header_length,del;
4218 File new_file;
4219 MARIA_SORT_PARAM *sort_param=0, tmp_sort_param;
4220 MARIA_SHARE *share= info->s;
4221 double *rec_per_key_part;
4222 HA_KEYSEG *keyseg;
4223 char llbuff[22];
4224 IO_CACHE new_data_cache; /* For non-quick repair. */
4225 IO_CACHE_SHARE io_share;
4226 MARIA_SORT_INFO sort_info;
4227 MARIA_SHARE backup_share;
4228 ulonglong UNINIT_VAR(key_map);
4229 pthread_attr_t thr_attr;
4230 myf sync_dir= ((share->now_transactional && !share->temporary) ?
4231 MY_SYNC_DIR : 0);
4232 my_bool reenable_logging= 0;
4233 DBUG_ENTER("maria_repair_parallel");
4234
4235 got_error= 1;
4236 new_file= -1;
4237 start_records= share->state.state.records;
4238 if (!(param->testflag & T_SILENT))
4239 {
4240 printf("- parallel recovering (with sort) Aria-table '%s'\n",name);
4241 printf("Data records: %s\n", llstr(start_records, llbuff));
4242 }
4243
4244 bzero(&new_data_cache, sizeof(new_data_cache));
4245 if (initialize_variables_for_repair(param, &sort_info, &tmp_sort_param, info,
4246 rep_quick, &backup_share))
4247 goto err;
4248
4249 if ((reenable_logging= share->now_transactional))
4250 _ma_tmp_disable_logging_for_table(info, 0);
4251
4252 new_header_length= ((param->testflag & T_UNPACK) ? 0 :
4253 share->pack.header_length);
4254
4255 /*
4256 Quick repair (not touching data file, rebuilding indexes):
4257 {
4258 Read cache is (HA_CHECK *param)->read_cache using info->dfile.file.
4259 }
4260
4261 Non-quick repair (rebuilding data file and indexes):
4262 {
4263 Master thread:
4264
4265 Read cache is (HA_CHECK *param)->read_cache using info->dfile.file.
4266 Write cache is (MARIA_INFO *info)->rec_cache using new_file.
4267
4268 Slave threads:
4269
4270 Read cache is new_data_cache synced to master rec_cache.
4271
4272 The final assignment of the filedescriptor for rec_cache is done
4273 after the cache creation.
4274
4275 Don't check file size on new_data_cache, as the resulting file size
4276 is not known yet.
4277
4278 As rec_cache and new_data_cache are synced, write_buffer_length is
4279 used for the read cache 'new_data_cache'. Both start at the same
4280 position 'new_header_length'.
4281 }
4282 */
4283 DBUG_PRINT("info", ("is quick repair: %d", (int) rep_quick));
4284 if (!rep_quick)
4285 my_b_clear(&new_data_cache);
4286
4287 /* Initialize pthread structures before goto err. */
4288 mysql_mutex_init(key_SORT_INFO_mutex, &sort_info.mutex, MY_MUTEX_INIT_FAST);
4289 mysql_cond_init(key_SORT_INFO_cond, &sort_info.cond, 0);
4290
4291 if (!(sort_info.key_block=
4292 alloc_key_blocks(param, (uint) param->sort_key_blocks,
4293 share->base.max_key_block_length)))
4294 goto err;
4295
4296 if (init_io_cache(¶m->read_cache, info->dfile.file,
4297 (uint) param->read_buffer_length,
4298 READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)))
4299 goto err;
4300
4301 sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;
4302 info->opt_flag|=WRITE_CACHE_USED;
4303 info->rec_cache.file= info->dfile.file; /* for sort_delete_record */
4304
4305 if (!rep_quick)
4306 {
4307 /* Get real path for data file */
4308 if ((new_file= mysql_file_create(key_file_tmp,
4309 fn_format(param->temp_filename,
4310 share->data_file_name.str, "",
4311 DATA_TMP_EXT,
4312 2+4),
4313 0,param->tmpfile_createflag,
4314 MYF(0))) < 0)
4315 {
4316 _ma_check_print_error(param,"Can't create new tempfile: '%s'",
4317 param->temp_filename);
4318 goto err;
4319 }
4320 if (new_header_length &&
4321 maria_filecopy(param, new_file, info->dfile.file,0L,new_header_length,
4322 "datafile-header"))
4323 goto err;
4324 if (param->testflag & T_UNPACK)
4325 restore_data_file_type(share);
4326 share->state.dellink= HA_OFFSET_ERROR;
4327
4328 if (init_io_cache(&new_data_cache, -1,
4329 (uint) param->write_buffer_length,
4330 READ_CACHE, new_header_length, 1,
4331 MYF(MY_WME | MY_DONT_CHECK_FILESIZE)))
4332 goto err;
4333
4334 if (init_io_cache(&info->rec_cache, new_file,
4335 (uint) param->write_buffer_length,
4336 WRITE_CACHE, new_header_length, 1,
4337 MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
4338 goto err;
4339
4340 }
4341
4342 /* Optionally drop indexes and optionally modify the key_map. */
4343 maria_drop_all_indexes(param, info, FALSE);
4344 key_map= share->state.key_map;
4345 if (param->testflag & T_CREATE_MISSING_KEYS)
4346 {
4347 /* Invert the copied key_map to recreate all disabled indexes. */
4348 key_map= ~key_map;
4349 }
4350
4351 param->read_cache.end_of_file= sort_info.filelength;
4352
4353 /*
4354 +1 below is required hack for parallel repair mode.
4355 The share->state.state.records value, that is compared later
4356 to sort_info.max_records and cannot exceed it, is
4357 increased in sort_key_write. In maria_repair_by_sort, sort_key_write
4358 is called after sort_key_read, where the comparison is performed,
4359 but in parallel mode master thread can call sort_key_write
4360 before some other repair thread calls sort_key_read.
4361 Furthermore I'm not even sure +1 would be enough.
4362 May be sort_info.max_records shold be always set to max value in
4363 parallel mode.
4364 */
4365 sort_info.max_records++;
4366
4367 del=share->state.state.del;
4368
4369 if (!(sort_param=(MARIA_SORT_PARAM *)
4370 my_malloc((uint) share->base.keys *
4371 (sizeof(MARIA_SORT_PARAM) + share->base.pack_reclength),
4372 MYF(MY_ZEROFILL))))
4373 {
4374 _ma_check_print_error(param,"Not enough memory for key!");
4375 goto err;
4376 }
4377 #ifdef USING_SECOND_APPROACH
4378 uint total_key_length=0;
4379 #endif
4380 rec_per_key_part= param->new_rec_per_key_part;
4381 share->state.state.records=share->state.state.del=share->state.split=0;
4382 share->state.state.empty=0;
4383
4384 for (i=key=0, istep=1 ; key < share->base.keys ;
4385 rec_per_key_part+=sort_param[i].keyinfo->keysegs, i+=istep, key++)
4386 {
4387 sort_param[i].key=key;
4388 sort_param[i].keyinfo=share->keyinfo+key;
4389 sort_param[i].seg=sort_param[i].keyinfo->seg;
4390 /*
4391 Skip this index if it is marked disabled in the copied
4392 (and possibly inverted) key_map.
4393 */
4394 if (! maria_is_key_active(key_map, key))
4395 {
4396 /* Remember old statistics for key */
4397 memcpy((char*) rec_per_key_part,
4398 (char*) (share->state.rec_per_key_part+
4399 (uint) (rec_per_key_part - param->new_rec_per_key_part)),
4400 sort_param[i].keyinfo->keysegs*sizeof(*rec_per_key_part));
4401 istep=0;
4402 continue;
4403 }
4404 istep=1;
4405 if ((!(param->testflag & T_SILENT)))
4406 printf ("- Fixing index %d\n",key+1);
4407 if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
4408 {
4409 sort_param[i].key_read=sort_maria_ft_key_read;
4410 sort_param[i].key_write=sort_maria_ft_key_write;
4411 }
4412 else
4413 {
4414 sort_param[i].key_read=sort_key_read;
4415 sort_param[i].key_write=sort_key_write;
4416 }
4417 sort_param[i].key_cmp=sort_key_cmp;
4418 sort_param[i].lock_in_memory=maria_lock_memory;
4419 sort_param[i].tmpdir=param->tmpdir;
4420 sort_param[i].sort_info=&sort_info;
4421 sort_param[i].master=0;
4422 sort_param[i].fix_datafile=0;
4423 sort_param[i].calc_checksum= 0;
4424
4425 sort_param[i].filepos=new_header_length;
4426 sort_param[i].max_pos=sort_param[i].pos=share->pack.header_length;
4427
4428 sort_param[i].record= (((uchar *)(sort_param+share->base.keys))+
4429 (share->base.pack_reclength * i));
4430 if (_ma_alloc_buffer(&sort_param[i].rec_buff, &sort_param[i].rec_buff_size,
4431 share->base.default_rec_buff_size, MYF(0)))
4432 {
4433 _ma_check_print_error(param,"Not enough memory!");
4434 goto err;
4435 }
4436 sort_param[i].key_length=share->rec_reflength;
4437 for (keyseg=sort_param[i].seg; keyseg->type != HA_KEYTYPE_END;
4438 keyseg++)
4439 {
4440 sort_param[i].key_length+=keyseg->length;
4441 if (keyseg->flag & HA_SPACE_PACK)
4442 sort_param[i].key_length+=get_pack_length(keyseg->length);
4443 if (keyseg->flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
4444 sort_param[i].key_length+= 2 + MY_TEST(keyseg->length >= 127);
4445 if (keyseg->flag & HA_NULL_PART)
4446 sort_param[i].key_length++;
4447 }
4448 #ifdef USING_SECOND_APPROACH
4449 total_key_length+=sort_param[i].key_length;
4450 #endif
4451
4452 if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
4453 {
4454 uint ft_max_word_len_for_sort=
4455 (FT_MAX_WORD_LEN_FOR_SORT *
4456 sort_param[i].keyinfo->seg->charset->mbmaxlen);
4457 sort_param[i].key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
4458 init_alloc_root(&sort_param[i].wordroot, "sort",
4459 FTPARSER_MEMROOT_ALLOC_SIZE, 0,
4460 MYF(param->malloc_flags));
4461 }
4462 }
4463 sort_info.total_keys=i;
4464 sort_param[0].master= 1;
4465 sort_param[0].fix_datafile= ! rep_quick;
4466 sort_param[0].calc_checksum= MY_TEST(param->testflag & T_CALC_CHECKSUM);
4467
4468 if (!maria_ftparser_alloc_param(info))
4469 goto err;
4470
4471 sort_info.got_error=0;
4472 mysql_mutex_lock(&sort_info.mutex);
4473
4474 /*
4475 Initialize the I/O cache share for use with the read caches and, in
4476 case of non-quick repair, the write cache. When all threads join on
4477 the cache lock, the writer copies the write cache contents to the
4478 read caches.
4479 */
4480 if (i > 1)
4481 {
4482 if (rep_quick)
4483 init_io_cache_share(¶m->read_cache, &io_share, NULL, i);
4484 else
4485 init_io_cache_share(&new_data_cache, &io_share, &info->rec_cache, i);
4486 }
4487 else
4488 io_share.total_threads= 0; /* share not used */
4489
4490 (void) pthread_attr_init(&thr_attr);
4491 (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED);
4492
4493 for (i=0 ; i < sort_info.total_keys ; i++)
4494 {
4495 /*
4496 Copy the properly initialized IO_CACHE structure so that every
4497 thread has its own copy. In quick mode param->read_cache is shared
4498 for use by all threads. In non-quick mode all threads but the
4499 first copy the shared new_data_cache, which is synchronized to the
4500 write cache of the first thread. The first thread copies
4501 param->read_cache, which is not shared.
4502 */
4503 sort_param[i].read_cache= ((rep_quick || !i) ? param->read_cache :
4504 new_data_cache);
4505 DBUG_PRINT("io_cache_share", ("thread: %u read_cache: %p",
4506 i, &sort_param[i].read_cache));
4507
4508 /*
4509 two approaches: the same amount of memory for each thread
4510 or the memory for the same number of keys for each thread...
4511 In the second one all the threads will fill their sort_buffers
4512 (and call write_keys) at the same time, putting more stress on i/o.
4513 */
4514 sort_param[i].sortbuff_size=
4515 #ifndef USING_SECOND_APPROACH
4516 param->sort_buffer_length/sort_info.total_keys;
4517 #else
4518 param->sort_buffer_length*sort_param[i].key_length/total_key_length;
4519 #endif
4520 if (mysql_thread_create(key_thread_find_all_keys,
4521 &sort_param[i].thr, &thr_attr,
4522 _ma_thr_find_all_keys, (void *) (sort_param+i)))
4523 {
4524 _ma_check_print_error(param,"Cannot start a repair thread");
4525 /* Cleanup: Detach from the share. Avoid others to be blocked. */
4526 if (io_share.total_threads)
4527 remove_io_thread(&sort_param[i].read_cache);
4528 DBUG_PRINT("error", ("Cannot start a repair thread"));
4529 sort_info.got_error=1;
4530 }
4531 else
4532 sort_info.threads_running++;
4533 }
4534 (void) pthread_attr_destroy(&thr_attr);
4535
4536 /* waiting for all threads to finish */
4537 while (sort_info.threads_running)
4538 mysql_cond_wait(&sort_info.cond, &sort_info.mutex);
4539 mysql_mutex_unlock(&sort_info.mutex);
4540
4541 if ((got_error= _ma_thr_write_keys(sort_param)))
4542 {
4543 param->retry_repair=1;
4544 goto err;
4545 }
4546 got_error=1; /* Assume the following may go wrong */
4547
4548 if (_ma_flush_table_files_before_swap(param, info))
4549 goto err;
4550
4551 if (sort_param[0].fix_datafile)
4552 {
4553 /*
4554 Append some nulls to the end of a memory mapped file. Destroy the
4555 write cache. The master thread did already detach from the share
4556 by remove_io_thread() in sort.c:thr_find_all_keys().
4557 */
4558 if (maria_write_data_suffix(&sort_info,1) ||
4559 end_io_cache(&info->rec_cache))
4560 goto err;
4561 if (param->testflag & T_SAFE_REPAIR)
4562 {
4563 /* Don't repair if we loosed more than one row */
4564 if (sort_info.new_info->s->state.state.records+1 < start_records)
4565 {
4566 _ma_check_print_error(param,
4567 "Rows lost (Found %lu of %lu); Aborting "
4568 "because safe repair was requested",
4569 (ulong) share->state.state.records,
4570 (ulong) start_records);
4571 share->state.state.records=start_records;
4572 goto err;
4573 }
4574 }
4575 share->state.state.data_file_length= sort_param->filepos;
4576 /* Only whole records */
4577 share->state.version= (ulong) time((time_t*) 0);
4578 /*
4579 Exchange the data file descriptor of the table, so that we use the
4580 new file from now on.
4581 */
4582 mysql_file_close(info->dfile.file, MYF(0));
4583 info->dfile.file= new_file;
4584 share->pack.header_length=(ulong) new_header_length;
4585 }
4586 else
4587 share->state.state.data_file_length=sort_param->max_pos;
4588
4589 if (rep_quick && del+sort_info.dupp != share->state.state.del)
4590 {
4591 _ma_check_print_error(param,"Couldn't fix table with quick recovery: "
4592 "Found wrong number of deleted records");
4593 _ma_check_print_error(param,"Run recovery again without -q");
4594 param->retry_repair=1;
4595 param->testflag|=T_RETRY_WITHOUT_QUICK;
4596 goto err;
4597 }
4598
4599 if (rep_quick && (param->testflag & T_FORCE_UNIQUENESS))
4600 {
4601 my_off_t skr= share->state.state.data_file_length +
4602 ((sort_info.org_data_file_type == COMPRESSED_RECORD) ?
4603 MEMMAP_EXTRA_MARGIN : 0);
4604 #ifdef USE_RELOC
4605 if (sort_info.org_data_file_type == STATIC_RECORD &&
4606 skr < share->base.reloc*share->base.min_pack_length)
4607 skr=share->base.reloc*share->base.min_pack_length;
4608 #endif
4609 if (skr != sort_info.filelength)
4610 if (mysql_file_chsize(info->dfile.file, skr, 0, MYF(0)))
4611 _ma_check_print_warning(param,
4612 "Can't change size of datafile, error: %d",
4613 my_errno);
4614 }
4615 if (param->testflag & T_CALC_CHECKSUM)
4616 share->state.state.checksum=param->glob_crc;
4617
4618 if (mysql_file_chsize(share->kfile.file,
4619 share->state.state.key_file_length, 0, MYF(0)))
4620 _ma_check_print_warning(param,
4621 "Can't change size of indexfile, error: %d",
4622 my_errno);
4623
4624 if (!(param->testflag & T_SILENT))
4625 {
4626 if (start_records != share->state.state.records)
4627 printf("Data records: %s\n", llstr(share->state.state.records,llbuff));
4628 }
4629 if (sort_info.dupp)
4630 _ma_check_print_warning(param,
4631 "%s records have been removed",
4632 llstr(sort_info.dupp,llbuff));
4633 got_error=0;
4634 /* If invoked by external program that uses thr_lock */
4635 if (&share->state.state != info->state)
4636 *info->state= *info->state_start= share->state.state;
4637
4638 err:
4639 _ma_reset_state(info);
4640
4641 /*
4642 Destroy the write cache. The master thread did already detach from
4643 the share by remove_io_thread() or it was not yet started (if the
4644 error happend before creating the thread).
4645 */
4646 if (sort_info.new_info)
4647 {
4648 end_io_cache(&sort_info.new_info->rec_cache);
4649 sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4650 }
4651 end_io_cache(¶m->read_cache);
4652 info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4653 /*
4654 Destroy the new data cache in case of non-quick repair. All slave
4655 threads did either detach from the share by remove_io_thread()
4656 already or they were not yet started (if the error happend before
4657 creating the threads).
4658 */
4659 if (!rep_quick && my_b_inited(&new_data_cache))
4660 end_io_cache(&new_data_cache);
4661 if (!got_error)
4662 {
4663 /* Replace the actual file with the temporary file */
4664 if (new_file >= 0)
4665 {
4666 mysql_file_close(new_file,MYF(0));
4667 info->dfile.file= new_file= -1;
4668 if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
4669 DATA_TMP_EXT, param->backup_time,
4670 MYF((param->testflag & T_BACKUP_DATA ?
4671 MY_REDEL_MAKE_BACKUP : 0) |
4672 sync_dir)) ||
4673 _ma_open_datafile(info,share))
4674 got_error=1;
4675 }
4676 }
4677 if (got_error)
4678 {
4679 if (! param->error_printed)
4680 _ma_check_print_error(param,"%d when fixing table",my_errno);
4681 (void)_ma_flush_table_files_before_swap(param, info);
4682 if (new_file >= 0)
4683 {
4684 mysql_file_close(new_file,MYF(0));
4685 mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
4686 if (info->dfile.file == new_file)
4687 info->dfile.file= -1;
4688 }
4689 maria_mark_crashed_on_repair(info);
4690 }
4691 else if (key_map == share->state.key_map)
4692 share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS;
4693 share->state.changed|= STATE_NOT_SORTED_PAGES;
4694 if (!rep_quick)
4695 share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_ZEROFILLED |
4696 STATE_NOT_MOVABLE);
4697
4698 mysql_cond_destroy (&sort_info.cond);
4699 mysql_mutex_destroy(&sort_info.mutex);
4700
4701 /* If caller had disabled logging it's not up to us to re-enable it */
4702 if (reenable_logging)
4703 _ma_reenable_logging_for_table(info, FALSE);
4704 restore_table_state_after_repair(info, &backup_share);
4705
4706 my_free(sort_info.ft_buf);
4707 my_free(sort_info.key_block);
4708 my_free(sort_param);
4709 my_free(sort_info.buff);
4710 if (!got_error && (param->testflag & T_UNPACK))
4711 restore_data_file_type(share);
4712 DBUG_RETURN(got_error);
4713 }
4714
4715 /* Read next record and return next key */
4716
sort_key_read(MARIA_SORT_PARAM * sort_param,uchar * key)4717 static int sort_key_read(MARIA_SORT_PARAM *sort_param, uchar *key)
4718 {
4719 int error;
4720 MARIA_SORT_INFO *sort_info= sort_param->sort_info;
4721 MARIA_HA *info= sort_info->info;
4722 MARIA_KEY int_key;
4723 DBUG_ENTER("sort_key_read");
4724
4725 if ((error=sort_get_next_record(sort_param)))
4726 DBUG_RETURN(error);
4727 if (info->s->state.state.records == sort_info->max_records)
4728 {
4729 _ma_check_print_error(sort_info->param,
4730 "Key %d - Found too many records; Can't continue",
4731 sort_param->key+1);
4732 DBUG_RETURN(1);
4733 }
4734 if (_ma_sort_write_record(sort_param))
4735 DBUG_RETURN(1);
4736
4737 (*info->s->keyinfo[sort_param->key].make_key)(info, &int_key,
4738 sort_param->key, key,
4739 sort_param->record,
4740 sort_param->current_filepos,
4741 0);
4742 sort_param->real_key_length= int_key.data_length + int_key.ref_length;
4743 #ifdef HAVE_valgrind
4744 bzero(key+sort_param->real_key_length,
4745 (sort_param->key_length-sort_param->real_key_length));
4746 #endif
4747 DBUG_RETURN(0);
4748 } /* sort_key_read */
4749
4750
sort_maria_ft_key_read(MARIA_SORT_PARAM * sort_param,uchar * key)4751 static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, uchar *key)
4752 {
4753 int error;
4754 MARIA_SORT_INFO *sort_info=sort_param->sort_info;
4755 MARIA_HA *info=sort_info->info;
4756 FT_WORD *wptr=0;
4757 MARIA_KEY int_key;
4758 DBUG_ENTER("sort_maria_ft_key_read");
4759
4760 if (!sort_param->wordlist)
4761 {
4762 for (;;)
4763 {
4764 free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE));
4765 if ((error=sort_get_next_record(sort_param)))
4766 DBUG_RETURN(error);
4767 if ((error= _ma_sort_write_record(sort_param)))
4768 DBUG_RETURN(error);
4769 if (!(wptr= _ma_ft_parserecord(info,sort_param->key,sort_param->record,
4770 &sort_param->wordroot)))
4771
4772 DBUG_RETURN(1);
4773 if (wptr->pos)
4774 break;
4775 }
4776 sort_param->wordptr=sort_param->wordlist=wptr;
4777 }
4778 else
4779 {
4780 error=0;
4781 wptr=(FT_WORD*)(sort_param->wordptr);
4782 }
4783
4784 _ma_ft_make_key(info, &int_key, sort_param->key, key, wptr++,
4785 sort_param->current_filepos);
4786 sort_param->real_key_length= int_key.data_length + int_key.ref_length;
4787
4788 #ifdef HAVE_valgrind
4789 if (sort_param->key_length > sort_param->real_key_length)
4790 bzero(key+sort_param->real_key_length,
4791 (sort_param->key_length-sort_param->real_key_length));
4792 #endif
4793 if (!wptr->pos)
4794 {
4795 free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE));
4796 sort_param->wordlist=0;
4797 }
4798 else
4799 sort_param->wordptr=(void*)wptr;
4800
4801 DBUG_RETURN(error);
4802 } /* sort_maria_ft_key_read */
4803
4804
4805 /*
4806 Read next record from file using parameters in sort_info.
4807
4808 SYNOPSIS
4809 sort_get_next_record()
4810 sort_param Information about and for the sort process
4811
4812 NOTES
4813 Dynamic Records With Non-Quick Parallel Repair
4814
4815 For non-quick parallel repair we use a synchronized read/write
4816 cache. This means that one thread is the master who fixes the data
4817 file by reading each record from the old data file and writing it
4818 to the new data file. By doing this the records in the new data
4819 file are written contiguously. Whenever the write buffer is full,
4820 it is copied to the read buffer. The slaves read from the read
4821 buffer, which is not associated with a file. Thus read_cache.file
4822 is -1. When using _mi_read_cache(), the slaves must always set
4823 flag to READING_NEXT so that the function never tries to read from
4824 file. This is safe because the records are contiguous. There is no
4825 need to read outside the cache. This condition is evaluated in the
4826 variable 'parallel_flag' for quick reference. read_cache.file must
4827 be >= 0 in every other case.
4828
4829 RETURN
4830 -1 end of file
4831 0 ok
4832 sort_param->current_filepos points to record position.
4833 sort_param->record contains record
4834 sort_param->max_pos contains position to last byte read
4835 > 0 error
4836 */
4837
sort_get_next_record(MARIA_SORT_PARAM * sort_param)4838 static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
4839 {
4840 int searching;
4841 int parallel_flag;
4842 uint found_record,b_type,left_length;
4843 my_off_t pos;
4844 MARIA_BLOCK_INFO block_info;
4845 MARIA_SORT_INFO *sort_info=sort_param->sort_info;
4846 HA_CHECK *param=sort_info->param;
4847 MARIA_HA *info=sort_info->info;
4848 MARIA_SHARE *share= info->s;
4849 char llbuff[22],llbuff2[22];
4850 DBUG_ENTER("sort_get_next_record");
4851
4852 if (_ma_killed_ptr(param))
4853 DBUG_RETURN(1);
4854 if (param->progress_counter++ >= WRITE_COUNT)
4855 {
4856 param->progress_counter= 0;
4857 _ma_report_progress(param, param->progress, param->max_progress);
4858 }
4859
4860 switch (sort_info->org_data_file_type) {
4861 case BLOCK_RECORD:
4862 {
4863 for (;;)
4864 {
4865 int flag;
4866 /*
4867 Assume table is transactional and it had LSN pages in the
4868 cache. Repair has flushed them, left data pages stay in
4869 cache, and disabled transactionality (so share's current page
4870 type is PLAIN); page cache would assert if it finds a cached LSN page
4871 while _ma_scan_block_record() requested a PLAIN page. So we use
4872 UNKNOWN.
4873 */
4874 enum pagecache_page_type save_page_type= share->page_type;
4875 share->page_type= PAGECACHE_READ_UNKNOWN_PAGE;
4876 if (info != sort_info->new_info)
4877 {
4878 /* Safe scanning */
4879 flag= _ma_safe_scan_block_record(sort_info, info,
4880 sort_param->record);
4881 }
4882 else
4883 {
4884 /*
4885 Scan on clean table.
4886 It requires a reliable data_file_length so we set it.
4887 */
4888 share->state.state.data_file_length= sort_info->filelength;
4889 info->cur_row.trid= 0;
4890 flag= _ma_scan_block_record(info, sort_param->record,
4891 info->cur_row.nextpos, 1);
4892 set_if_bigger(param->max_found_trid, info->cur_row.trid);
4893 if (info->cur_row.trid > param->max_trid)
4894 {
4895 _ma_check_print_not_visible_error(param, info->cur_row.trid);
4896 flag= HA_ERR_ROW_NOT_VISIBLE;
4897 }
4898 }
4899 param->progress= (ma_recordpos_to_page(info->cur_row.lastpos)*
4900 share->block_size);
4901
4902 share->page_type= save_page_type;
4903 if (!flag)
4904 {
4905 if (sort_param->calc_checksum)
4906 {
4907 ha_checksum checksum;
4908 checksum= (*share->calc_check_checksum)(info, sort_param->record);
4909 if (share->calc_checksum &&
4910 info->cur_row.checksum != (checksum & 255))
4911 {
4912 if (param->testflag & T_VERBOSE)
4913 {
4914 record_pos_to_txt(info, info->cur_row.lastpos, llbuff);
4915 _ma_check_print_info(param,
4916 "Found record with wrong checksum at %s",
4917 llbuff);
4918 }
4919 continue;
4920 }
4921 info->cur_row.checksum= checksum;
4922 param->glob_crc+= checksum;
4923 }
4924 sort_param->start_recpos= sort_param->current_filepos=
4925 info->cur_row.lastpos;
4926 DBUG_RETURN(0);
4927 }
4928 if (flag == HA_ERR_END_OF_FILE)
4929 {
4930 sort_param->max_pos= share->state.state.data_file_length;
4931 DBUG_RETURN(-1);
4932 }
4933 /* Retry only if wrong record, not if disk error */
4934 if (flag != HA_ERR_WRONG_IN_RECORD && flag != HA_ERR_WRONG_CRC &&
4935 flag != HA_ERR_DECRYPTION_FAILED)
4936 {
4937 retry_if_quick(sort_param, flag);
4938 DBUG_RETURN(flag);
4939 }
4940 }
4941 break; /* Impossible */
4942 }
4943 case STATIC_RECORD:
4944 for (;;)
4945 {
4946 if (my_b_read(&sort_param->read_cache,sort_param->record,
4947 share->base.pack_reclength))
4948 {
4949 if (sort_param->read_cache.error)
4950 param->out_flag |= O_DATA_LOST;
4951 retry_if_quick(sort_param, my_errno);
4952 DBUG_RETURN(-1);
4953 }
4954 sort_param->start_recpos=sort_param->pos;
4955 param->progress= sort_param->pos;
4956 if (!sort_param->fix_datafile)
4957 {
4958 sort_param->current_filepos= sort_param->pos;
4959 if (sort_param->master)
4960 share->state.split++;
4961 }
4962 sort_param->max_pos=(sort_param->pos+=share->base.pack_reclength);
4963 if (*sort_param->record)
4964 {
4965 if (sort_param->calc_checksum)
4966 param->glob_crc+= (info->cur_row.checksum=
4967 _ma_static_checksum(info,sort_param->record));
4968 DBUG_RETURN(0);
4969 }
4970 if (!sort_param->fix_datafile && sort_param->master)
4971 {
4972 share->state.state.del++;
4973 share->state.state.empty+=share->base.pack_reclength;
4974 }
4975 }
4976 case DYNAMIC_RECORD:
4977 {
4978 uchar *UNINIT_VAR(to);
4979 ha_checksum checksum= 0;
4980
4981 pos=sort_param->pos;
4982 param->progress= pos;
4983 searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND));
4984 parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0;
4985 for (;;)
4986 {
4987 found_record=block_info.second_read= 0;
4988 left_length=1;
4989 if (searching)
4990 {
4991 pos=MY_ALIGN(pos,MARIA_DYN_ALIGN_SIZE);
4992 param->testflag|=T_RETRY_WITHOUT_QUICK;
4993 sort_param->start_recpos=pos;
4994 }
4995 do
4996 {
4997 if (pos > sort_param->max_pos)
4998 sort_param->max_pos=pos;
4999 if (pos & (MARIA_DYN_ALIGN_SIZE-1))
5000 {
5001 if ((param->testflag & T_VERBOSE) || searching == 0)
5002 _ma_check_print_info(param,"Wrong aligned block at %s",
5003 llstr(pos,llbuff));
5004 if (searching)
5005 goto try_next;
5006 }
5007 if (found_record && pos == param->search_after_block)
5008 _ma_check_print_info(param,"Block: %s used by record at %s",
5009 llstr(param->search_after_block,llbuff),
5010 llstr(sort_param->start_recpos,llbuff2));
5011 if (_ma_read_cache(info, &sort_param->read_cache,
5012 block_info.header, pos,
5013 MARIA_BLOCK_INFO_HEADER_LENGTH,
5014 (! found_record ? READING_NEXT : 0) |
5015 parallel_flag | READING_HEADER))
5016 {
5017 if (found_record)
5018 {
5019 _ma_check_print_info(param,
5020 "Can't read whole record at %s (errno: %d)",
5021 llstr(sort_param->start_recpos,llbuff),errno);
5022 goto try_next;
5023 }
5024 DBUG_RETURN(-1);
5025 }
5026 if (searching && ! sort_param->fix_datafile)
5027 {
5028 param->error_printed=1;
5029 param->retry_repair=1;
5030 param->testflag|=T_RETRY_WITHOUT_QUICK;
5031 my_errno= HA_ERR_WRONG_IN_RECORD;
5032 DBUG_RETURN(1); /* Something wrong with data */
5033 }
5034 b_type= _ma_get_block_info(info, &block_info,-1,pos);
5035 if ((b_type & (BLOCK_ERROR | BLOCK_FATAL_ERROR)) ||
5036 ((b_type & BLOCK_FIRST) &&
5037 (block_info.rec_len < (uint) share->base.min_pack_length ||
5038 block_info.rec_len > (uint) share->base.max_pack_length)))
5039 {
5040 uint i;
5041 if (param->testflag & T_VERBOSE || searching == 0)
5042 _ma_check_print_info(param,
5043 "Wrong bytesec: %3d-%3d-%3d at %10s; Skipped",
5044 block_info.header[0],block_info.header[1],
5045 block_info.header[2],llstr(pos,llbuff));
5046 if (found_record)
5047 goto try_next;
5048 block_info.second_read=0;
5049 searching=1;
5050 /* Search after block in read header string */
5051 for (i=MARIA_DYN_ALIGN_SIZE ;
5052 i < MARIA_BLOCK_INFO_HEADER_LENGTH ;
5053 i+= MARIA_DYN_ALIGN_SIZE)
5054 if (block_info.header[i] >= 1 &&
5055 block_info.header[i] <= MARIA_MAX_DYN_HEADER_BYTE)
5056 break;
5057 pos+=(ulong) i;
5058 sort_param->start_recpos=pos;
5059 continue;
5060 }
5061 if (b_type & BLOCK_DELETED)
5062 {
5063 my_bool error=0;
5064 if (block_info.block_len+ (uint) (block_info.filepos-pos) <
5065 share->base.min_block_length)
5066 {
5067 if (!searching)
5068 _ma_check_print_info(param,
5069 "Deleted block with impossible length %lu "
5070 "at %s",
5071 block_info.block_len,llstr(pos,llbuff));
5072 error=1;
5073 }
5074 else
5075 {
5076 if ((block_info.next_filepos != HA_OFFSET_ERROR &&
5077 block_info.next_filepos >=
5078 share->state.state.data_file_length) ||
5079 (block_info.prev_filepos != HA_OFFSET_ERROR &&
5080 block_info.prev_filepos >=
5081 share->state.state.data_file_length))
5082 {
5083 if (!searching)
5084 _ma_check_print_info(param,
5085 "Delete link points outside datafile at "
5086 "%s",
5087 llstr(pos,llbuff));
5088 error=1;
5089 }
5090 }
5091 if (error)
5092 {
5093 if (found_record)
5094 goto try_next;
5095 searching=1;
5096 pos+= MARIA_DYN_ALIGN_SIZE;
5097 sort_param->start_recpos=pos;
5098 block_info.second_read=0;
5099 continue;
5100 }
5101 }
5102 else
5103 {
5104 if (block_info.block_len+ (uint) (block_info.filepos-pos) <
5105 share->base.min_block_length ||
5106 block_info.block_len > (uint) share->base.max_pack_length+
5107 MARIA_SPLIT_LENGTH)
5108 {
5109 if (!searching)
5110 _ma_check_print_info(param,
5111 "Found block with impossible length %lu "
5112 "at %s; Skipped",
5113 block_info.block_len+
5114 (uint) (block_info.filepos-pos),
5115 llstr(pos,llbuff));
5116 if (found_record)
5117 goto try_next;
5118 searching=1;
5119 pos+= MARIA_DYN_ALIGN_SIZE;
5120 sort_param->start_recpos=pos;
5121 block_info.second_read=0;
5122 continue;
5123 }
5124 }
5125 if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
5126 {
5127 if (!sort_param->fix_datafile && sort_param->master &&
5128 (b_type & BLOCK_DELETED))
5129 {
5130 share->state.state.empty+=block_info.block_len;
5131 share->state.state.del++;
5132 share->state.split++;
5133 }
5134 if (found_record)
5135 goto try_next;
5136 if (searching)
5137 {
5138 pos+=MARIA_DYN_ALIGN_SIZE;
5139 sort_param->start_recpos=pos;
5140 }
5141 else
5142 pos=block_info.filepos+block_info.block_len;
5143 block_info.second_read=0;
5144 continue;
5145 }
5146
5147 if (!sort_param->fix_datafile && sort_param->master)
5148 share->state.split++;
5149 if (! found_record++)
5150 {
5151 sort_param->find_length=left_length=block_info.rec_len;
5152 sort_param->start_recpos=pos;
5153 if (!sort_param->fix_datafile)
5154 sort_param->current_filepos= sort_param->start_recpos;
5155 if (sort_param->fix_datafile && (param->testflag & T_EXTEND))
5156 sort_param->pos=block_info.filepos+1;
5157 else
5158 sort_param->pos=block_info.filepos+block_info.block_len;
5159 if (share->base.blobs)
5160 {
5161 if (_ma_alloc_buffer(&sort_param->rec_buff,
5162 &sort_param->rec_buff_size,
5163 block_info.rec_len +
5164 share->base.extra_rec_buff_size, MYF(0)))
5165
5166 {
5167 if (param->max_record_length >= block_info.rec_len)
5168 {
5169 _ma_check_print_error(param,"Not enough memory for blob at %s "
5170 "(need %lu)",
5171 llstr(sort_param->start_recpos,llbuff),
5172 (ulong) block_info.rec_len);
5173 DBUG_RETURN(1);
5174 }
5175 else
5176 {
5177 _ma_check_print_info(param,"Not enough memory for blob at %s "
5178 "(need %lu); Row skipped",
5179 llstr(sort_param->start_recpos,llbuff),
5180 (ulong) block_info.rec_len);
5181 goto try_next;
5182 }
5183 }
5184 }
5185 to= sort_param->rec_buff;
5186 }
5187 if (left_length < block_info.data_len || ! block_info.data_len)
5188 {
5189 _ma_check_print_info(param,
5190 "Found block with too small length at %s; "
5191 "Skipped",
5192 llstr(sort_param->start_recpos,llbuff));
5193 goto try_next;
5194 }
5195 if (block_info.filepos + block_info.data_len >
5196 sort_param->read_cache.end_of_file)
5197 {
5198 _ma_check_print_info(param,
5199 "Found block that points outside data file "
5200 "at %s",
5201 llstr(sort_param->start_recpos,llbuff));
5202 goto try_next;
5203 }
5204 /*
5205 Copy information that is already read. Avoid accessing data
5206 below the cache start. This could happen if the header
5207 streched over the end of the previous buffer contents.
5208 */
5209 {
5210 uint header_len= (uint) (block_info.filepos - pos);
5211 uint prefetch_len= (MARIA_BLOCK_INFO_HEADER_LENGTH - header_len);
5212
5213 if (prefetch_len > block_info.data_len)
5214 prefetch_len= block_info.data_len;
5215 if (prefetch_len)
5216 {
5217 memcpy(to, block_info.header + header_len, prefetch_len);
5218 block_info.filepos+= prefetch_len;
5219 block_info.data_len-= prefetch_len;
5220 left_length-= prefetch_len;
5221 to+= prefetch_len;
5222 }
5223 }
5224 if (block_info.data_len &&
5225 _ma_read_cache(info, &sort_param->read_cache,to,block_info.filepos,
5226 block_info.data_len,
5227 (found_record == 1 ? READING_NEXT : 0) |
5228 parallel_flag))
5229 {
5230 _ma_check_print_info(param,
5231 "Read error for block at: %s (error: %d); "
5232 "Skipped",
5233 llstr(block_info.filepos,llbuff),my_errno);
5234 goto try_next;
5235 }
5236 left_length-=block_info.data_len;
5237 to+=block_info.data_len;
5238 pos=block_info.next_filepos;
5239 if (pos == HA_OFFSET_ERROR && left_length)
5240 {
5241 _ma_check_print_info(param,
5242 "Wrong block with wrong total length "
5243 "starting at %s",
5244 llstr(sort_param->start_recpos,llbuff));
5245 goto try_next;
5246 }
5247 if (pos + MARIA_BLOCK_INFO_HEADER_LENGTH >
5248 sort_param->read_cache.end_of_file)
5249 {
5250 _ma_check_print_info(param,
5251 "Found link that points at %s (outside data "
5252 "file) at %s",
5253 llstr(pos,llbuff2),
5254 llstr(sort_param->start_recpos,llbuff));
5255 goto try_next;
5256 }
5257 } while (left_length);
5258
5259 if (_ma_rec_unpack(info,sort_param->record,sort_param->rec_buff,
5260 sort_param->find_length) != MY_FILE_ERROR)
5261 {
5262 if (sort_param->read_cache.error < 0)
5263 DBUG_RETURN(1);
5264 if (sort_param->calc_checksum)
5265 checksum= (share->calc_check_checksum)(info, sort_param->record);
5266 if ((param->testflag & (T_EXTEND | T_REP)) || searching)
5267 {
5268 if (_ma_rec_check(info, sort_param->record, sort_param->rec_buff,
5269 sort_param->find_length,
5270 (param->testflag & T_QUICK) &&
5271 sort_param->calc_checksum &&
5272 MY_TEST(share->calc_checksum), checksum))
5273 {
5274 _ma_check_print_info(param,"Found wrong packed record at %s",
5275 llstr(sort_param->start_recpos,llbuff));
5276 goto try_next;
5277 }
5278 }
5279 if (sort_param->calc_checksum)
5280 param->glob_crc+= checksum;
5281 DBUG_RETURN(0);
5282 }
5283 if (!searching)
5284 _ma_check_print_info(param,"Key %d - Found wrong stored record at %s",
5285 sort_param->key+1,
5286 llstr(sort_param->start_recpos,llbuff));
5287 try_next:
5288 pos=(sort_param->start_recpos+=MARIA_DYN_ALIGN_SIZE);
5289 searching=1;
5290 }
5291 }
5292 case COMPRESSED_RECORD:
5293 param->progress= sort_param->pos;
5294 for (searching=0 ;; searching=1, sort_param->pos++)
5295 {
5296 if (_ma_read_cache(info, &sort_param->read_cache, block_info.header,
5297 sort_param->pos,
5298 share->pack.ref_length,READING_NEXT))
5299 DBUG_RETURN(-1);
5300 if (searching && ! sort_param->fix_datafile)
5301 {
5302 param->error_printed=1;
5303 param->retry_repair=1;
5304 param->testflag|=T_RETRY_WITHOUT_QUICK;
5305 my_errno= HA_ERR_WRONG_IN_RECORD;
5306 DBUG_RETURN(1); /* Something wrong with data */
5307 }
5308 sort_param->start_recpos=sort_param->pos;
5309 if (_ma_pack_get_block_info(info, &sort_param->bit_buff, &block_info,
5310 &sort_param->rec_buff,
5311 &sort_param->rec_buff_size, -1,
5312 sort_param->pos))
5313 DBUG_RETURN(-1);
5314 if (!block_info.rec_len &&
5315 sort_param->pos + MEMMAP_EXTRA_MARGIN ==
5316 sort_param->read_cache.end_of_file)
5317 DBUG_RETURN(-1);
5318 if (block_info.rec_len < (uint) share->min_pack_length ||
5319 block_info.rec_len > (uint) share->max_pack_length)
5320 {
5321 if (! searching)
5322 _ma_check_print_info(param,
5323 "Found block with wrong recordlength: %lu "
5324 "at %s\n",
5325 block_info.rec_len,
5326 llstr(sort_param->pos,llbuff));
5327 continue;
5328 }
5329 if (_ma_read_cache(info, &sort_param->read_cache, sort_param->rec_buff,
5330 block_info.filepos, block_info.rec_len,
5331 READING_NEXT))
5332 {
5333 if (! searching)
5334 _ma_check_print_info(param,"Couldn't read whole record from %s",
5335 llstr(sort_param->pos,llbuff));
5336 continue;
5337 }
5338 #ifdef HAVE_valgrind
5339 bzero(sort_param->rec_buff + block_info.rec_len,
5340 share->base.extra_rec_buff_size);
5341 #endif
5342 if (_ma_pack_rec_unpack(info, &sort_param->bit_buff, sort_param->record,
5343 sort_param->rec_buff, block_info.rec_len))
5344 {
5345 if (! searching)
5346 _ma_check_print_info(param,"Found wrong record at %s",
5347 llstr(sort_param->pos,llbuff));
5348 continue;
5349 }
5350 if (!sort_param->fix_datafile)
5351 {
5352 sort_param->current_filepos= sort_param->pos;
5353 if (sort_param->master)
5354 share->state.split++;
5355 }
5356 sort_param->max_pos= (sort_param->pos=block_info.filepos+
5357 block_info.rec_len);
5358 info->packed_length=block_info.rec_len;
5359
5360 if (sort_param->calc_checksum)
5361 {
5362 info->cur_row.checksum= (*share->calc_check_checksum)(info,
5363 sort_param->
5364 record);
5365 param->glob_crc+= info->cur_row.checksum;
5366 }
5367 DBUG_RETURN(0);
5368 }
5369 case NO_RECORD:
5370 DBUG_RETURN(1); /* Impossible */
5371 }
5372 DBUG_RETURN(1); /* Impossible */
5373 }
5374
5375
5376 /**
5377 @brief Write record to new file.
5378
5379 @fn _ma_sort_write_record()
5380 @param sort_param Sort parameters.
5381
5382 @note
5383 This is only called by a master thread if parallel repair is used.
5384
5385 @return
5386 @retval 0 OK
5387 sort_param->current_filepos points to inserted record for
5388 block_records and to the place for the next record for
5389 other row types.
5390 sort_param->filepos points to end of file
5391 @retval 1 Error
5392 */
5393
_ma_sort_write_record(MARIA_SORT_PARAM * sort_param)5394 int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param)
5395 {
5396 int flag;
5397 uint length;
5398 ulong block_length,reclength;
5399 uchar *from;
5400 uchar block_buff[8];
5401 MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5402 HA_CHECK *param= sort_info->param;
5403 MARIA_HA *info= sort_info->new_info;
5404 MARIA_SHARE *share= info->s;
5405 DBUG_ENTER("_ma_sort_write_record");
5406
5407 if (sort_param->fix_datafile)
5408 {
5409 sort_param->current_filepos= sort_param->filepos;
5410 switch (sort_info->new_data_file_type) {
5411 case BLOCK_RECORD:
5412 if ((sort_param->current_filepos=
5413 (*share->write_record_init)(info, sort_param->record)) ==
5414 HA_OFFSET_ERROR)
5415 {
5416 _ma_check_print_error(param, "%d when writing to datafile", my_errno);
5417 DBUG_RETURN(1);
5418 }
5419 /* Pointer to end of file */
5420 sort_param->filepos= share->state.state.data_file_length;
5421 break;
5422 case STATIC_RECORD:
5423 if (my_b_write(&info->rec_cache,sort_param->record,
5424 share->base.pack_reclength))
5425 {
5426 _ma_check_print_error(param,"%d when writing to datafile",my_errno);
5427 DBUG_RETURN(1);
5428 }
5429 sort_param->filepos+=share->base.pack_reclength;
5430 share->state.split++;
5431 break;
5432 case DYNAMIC_RECORD:
5433 if (! info->blobs)
5434 from=sort_param->rec_buff;
5435 else
5436 {
5437 /* must be sure that local buffer is big enough */
5438 reclength=share->base.pack_reclength+
5439 _ma_calc_total_blob_length(info,sort_param->record)+
5440 ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+
5441 MARIA_DYN_DELETE_BLOCK_HEADER;
5442 if (sort_info->buff_length < reclength)
5443 {
5444 if (!(sort_info->buff=my_realloc(sort_info->buff, (uint) reclength,
5445 MYF(MY_FREE_ON_ERROR |
5446 MY_ALLOW_ZERO_PTR))))
5447 DBUG_RETURN(1);
5448 sort_info->buff_length=reclength;
5449 }
5450 from= (uchar *) sort_info->buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER);
5451 }
5452 /* We can use info->checksum here as only one thread calls this */
5453 info->cur_row.checksum= (*share->calc_check_checksum)(info,
5454 sort_param->
5455 record);
5456 if (!(reclength= _ma_rec_pack(info,from,sort_param->record)))
5457 {
5458 _ma_check_print_error(param,"Got error %d when packing record",
5459 my_errno);
5460 DBUG_RETURN(1);
5461 }
5462 flag=0;
5463
5464 do
5465 {
5466 block_length= reclength + 3 + MY_TEST(reclength >= (65520 - 3));
5467 if (block_length < share->base.min_block_length)
5468 block_length=share->base.min_block_length;
5469 info->update|=HA_STATE_WRITE_AT_END;
5470 block_length=MY_ALIGN(block_length,MARIA_DYN_ALIGN_SIZE);
5471 if (block_length > MARIA_MAX_BLOCK_LENGTH)
5472 block_length=MARIA_MAX_BLOCK_LENGTH;
5473 if (_ma_write_part_record(info,0L,block_length,
5474 sort_param->filepos+block_length,
5475 &from,&reclength,&flag))
5476 {
5477 _ma_check_print_error(param,"%d when writing to datafile",my_errno);
5478 DBUG_RETURN(1);
5479 }
5480 sort_param->filepos+=block_length;
5481 share->state.split++;
5482 } while (reclength);
5483 break;
5484 case COMPRESSED_RECORD:
5485 reclength=info->packed_length;
5486 length= _ma_save_pack_length((uint) share->pack.version, block_buff,
5487 reclength);
5488 if (share->base.blobs)
5489 length+= _ma_save_pack_length((uint) share->pack.version,
5490 block_buff + length, info->blob_length);
5491 if (my_b_write(&info->rec_cache,block_buff,length) ||
5492 my_b_write(&info->rec_cache, sort_param->rec_buff, reclength))
5493 {
5494 _ma_check_print_error(param,"%d when writing to datafile",my_errno);
5495 DBUG_RETURN(1);
5496 }
5497 sort_param->filepos+=reclength+length;
5498 share->state.split++;
5499 break;
5500 case NO_RECORD:
5501 DBUG_RETURN(1); /* Impossible */
5502 }
5503 }
5504 if (sort_param->master)
5505 {
5506 share->state.state.records++;
5507 if ((param->testflag & T_WRITE_LOOP) &&
5508 (share->state.state.records % WRITE_COUNT) == 0)
5509 {
5510 char llbuff[22];
5511 printf("%s\r", llstr(share->state.state.records,llbuff));
5512 fflush(stdout);
5513 }
5514 }
5515 DBUG_RETURN(0);
5516 } /* _ma_sort_write_record */
5517
5518
5519 /* Compare two keys from _ma_create_index_by_sort */
5520
sort_key_cmp(MARIA_SORT_PARAM * sort_param,const void * a,const void * b)5521 static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,
5522 const void *b)
5523 {
5524 uint not_used[2];
5525 return (ha_key_cmp(sort_param->seg, *((uchar* const *) a),
5526 *((uchar* const *) b),
5527 USE_WHOLE_KEY, SEARCH_SAME, not_used));
5528 } /* sort_key_cmp */
5529
5530
sort_key_write(MARIA_SORT_PARAM * sort_param,const uchar * a)5531 static int sort_key_write(MARIA_SORT_PARAM *sort_param, const uchar *a)
5532 {
5533 uint diff_pos[2];
5534 char llbuff[22],llbuff2[22];
5535 MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5536 HA_CHECK *param= sort_info->param;
5537 int cmp;
5538
5539 if (sort_info->key_block->inited)
5540 {
5541 cmp= ha_key_cmp(sort_param->seg, sort_info->key_block->lastkey,
5542 a, USE_WHOLE_KEY,
5543 SEARCH_FIND | SEARCH_UPDATE | SEARCH_INSERT,
5544 diff_pos);
5545 if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
5546 ha_key_cmp(sort_param->seg, sort_info->key_block->lastkey,
5547 a, USE_WHOLE_KEY,
5548 SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos);
5549 else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
5550 {
5551 diff_pos[0]= maria_collect_stats_nonulls_next(sort_param->seg,
5552 sort_param->notnull,
5553 sort_info->key_block->lastkey,
5554 a);
5555 }
5556 sort_param->unique[diff_pos[0]-1]++;
5557 }
5558 else
5559 {
5560 cmp= -1;
5561 if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
5562 maria_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull,
5563 a);
5564 }
5565 if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0)
5566 {
5567 DBUG_EXECUTE("key", _ma_print_keydata(DBUG_FILE, sort_param->seg, a,
5568 USE_WHOLE_KEY););
5569 sort_info->dupp++;
5570 sort_info->info->cur_row.lastpos= get_record_for_key(sort_param->keyinfo,
5571 a);
5572 if ((param->testflag & (T_CREATE_UNIQUE_BY_SORT | T_SUPPRESS_ERR_HANDLING))
5573 == T_CREATE_UNIQUE_BY_SORT)
5574 param->testflag|= T_SUPPRESS_ERR_HANDLING;
5575 _ma_check_print_warning(param,
5576 "Duplicate key %2u for record at %10s against "
5577 "record at %10s",
5578 sort_param->key + 1,
5579 llstr(sort_info->info->cur_row.lastpos, llbuff),
5580 llstr(get_record_for_key(sort_param->keyinfo,
5581 sort_info->key_block->
5582 lastkey),
5583 llbuff2));
5584 param->testflag|=T_RETRY_WITHOUT_QUICK;
5585 if (sort_info->param->testflag & T_VERBOSE)
5586 _ma_print_keydata(stdout,sort_param->seg, a, USE_WHOLE_KEY);
5587 return (sort_delete_record(sort_param));
5588 }
5589 #ifndef DBUG_OFF
5590 if (cmp > 0)
5591 {
5592 _ma_check_print_error(param,
5593 "Internal error: Keys are not in order from sort");
5594 return(1);
5595 }
5596 #endif
5597 return (sort_insert_key(sort_param, sort_info->key_block,
5598 a, HA_OFFSET_ERROR));
5599 } /* sort_key_write */
5600
5601
_ma_sort_ft_buf_flush(MARIA_SORT_PARAM * sort_param)5602 int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param)
5603 {
5604 MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5605 SORT_KEY_BLOCKS *key_block=sort_info->key_block;
5606 MARIA_SHARE *share=sort_info->info->s;
5607 uint val_off, val_len;
5608 int error;
5609 SORT_FT_BUF *maria_ft_buf=sort_info->ft_buf;
5610 uchar *from, *to;
5611
5612 val_len=share->ft2_keyinfo.keylength;
5613 get_key_full_length_rdonly(val_off, maria_ft_buf->lastkey);
5614 to= maria_ft_buf->lastkey+val_off;
5615
5616 if (maria_ft_buf->buf)
5617 {
5618 /* flushing first-level tree */
5619 error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey,
5620 HA_OFFSET_ERROR);
5621 for (from=to+val_len;
5622 !error && from < maria_ft_buf->buf;
5623 from+= val_len)
5624 {
5625 memcpy(to, from, val_len);
5626 error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey,
5627 HA_OFFSET_ERROR);
5628 }
5629 return error;
5630 }
5631 /* flushing second-level tree keyblocks */
5632 error=_ma_flush_pending_blocks(sort_param);
5633 /* updating lastkey with second-level tree info */
5634 ft_intXstore(maria_ft_buf->lastkey+val_off, -maria_ft_buf->count);
5635 _ma_dpointer(sort_info->info->s, maria_ft_buf->lastkey+val_off+HA_FT_WLEN,
5636 share->state.key_root[sort_param->key]);
5637 /* restoring first level tree data in sort_info/sort_param */
5638 sort_info->key_block=sort_info->key_block_end- sort_info->param->sort_key_blocks;
5639 sort_param->keyinfo=share->keyinfo+sort_param->key;
5640 share->state.key_root[sort_param->key]=HA_OFFSET_ERROR;
5641 /* writing lastkey in first-level tree */
5642 return error ? error :
5643 sort_insert_key(sort_param,sort_info->key_block,
5644 maria_ft_buf->lastkey,HA_OFFSET_ERROR);
5645 }
5646
5647
sort_maria_ft_key_write(MARIA_SORT_PARAM * sort_param,const uchar * a)5648 static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
5649 const uchar *a)
5650 {
5651 uint a_len, val_off, val_len, error;
5652 MARIA_SORT_INFO *sort_info= sort_param->sort_info;
5653 SORT_FT_BUF *ft_buf= sort_info->ft_buf;
5654 SORT_KEY_BLOCKS *key_block= sort_info->key_block;
5655 MARIA_SHARE *share= sort_info->info->s;
5656
5657 val_len=HA_FT_WLEN+share->rec_reflength;
5658 get_key_full_length_rdonly(a_len, a);
5659
5660 if (!ft_buf)
5661 {
5662 /*
5663 use two-level tree only if key_reflength fits in rec_reflength place
5664 and row format is NOT static - for _ma_dpointer not to garble offsets
5665 */
5666 if ((share->base.key_reflength <=
5667 share->rec_reflength) &&
5668 (share->options &
5669 (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)))
5670 ft_buf= (SORT_FT_BUF *)my_malloc(sort_param->keyinfo->block_length +
5671 sizeof(SORT_FT_BUF), MYF(MY_WME));
5672
5673 if (!ft_buf)
5674 {
5675 sort_param->key_write=sort_key_write;
5676 return sort_key_write(sort_param, a);
5677 }
5678 sort_info->ft_buf= ft_buf;
5679 goto word_init_ft_buf; /* no need to duplicate the code */
5680 }
5681 get_key_full_length_rdonly(val_off, ft_buf->lastkey);
5682
5683 if (ha_compare_text(sort_param->seg->charset,
5684 a+1,a_len-1,
5685 ft_buf->lastkey+1,val_off-1, 0)==0)
5686 {
5687 uchar *p;
5688 if (!ft_buf->buf) /* store in second-level tree */
5689 {
5690 ft_buf->count++;
5691 return sort_insert_key(sort_param,key_block,
5692 a + a_len, HA_OFFSET_ERROR);
5693 }
5694
5695 /* storing the key in the buffer. */
5696 memcpy (ft_buf->buf, (const char *)a+a_len, val_len);
5697 ft_buf->buf+=val_len;
5698 if (ft_buf->buf < ft_buf->end)
5699 return 0;
5700
5701 /* converting to two-level tree */
5702 p=ft_buf->lastkey+val_off;
5703
5704 while (key_block->inited)
5705 key_block++;
5706 sort_info->key_block=key_block;
5707 sort_param->keyinfo= &share->ft2_keyinfo;
5708 ft_buf->count=(uint)(ft_buf->buf - p)/val_len;
5709
5710 /* flushing buffer to second-level tree */
5711 for (error=0; !error && p < ft_buf->buf; p+= val_len)
5712 error=sort_insert_key(sort_param,key_block,p,HA_OFFSET_ERROR);
5713 ft_buf->buf=0;
5714 return error;
5715 }
5716
5717 /* flushing buffer */
5718 if ((error=_ma_sort_ft_buf_flush(sort_param)))
5719 return error;
5720
5721 word_init_ft_buf:
5722 a_len+=val_len;
5723 memcpy(ft_buf->lastkey, a, a_len);
5724 ft_buf->buf=ft_buf->lastkey+a_len;
5725 /*
5726 32 is just a safety margin here
5727 (at least MY_MAX(val_len, sizeof(nod_flag)) should be there).
5728 May be better performance could be achieved if we'd put
5729 (sort_info->keyinfo->block_length-32)/XXX
5730 instead.
5731 TODO: benchmark the best value for XXX.
5732 */
5733 ft_buf->end= ft_buf->lastkey+ (sort_param->keyinfo->block_length-32);
5734 return 0;
5735 } /* sort_maria_ft_key_write */
5736
5737
5738 /* get pointer to record from a key */
5739
get_record_for_key(MARIA_KEYDEF * keyinfo,const uchar * key_data)5740 static my_off_t get_record_for_key(MARIA_KEYDEF *keyinfo,
5741 const uchar *key_data)
5742 {
5743 MARIA_KEY key;
5744 key.keyinfo= keyinfo;
5745 key.data= (uchar*) key_data;
5746 key.data_length= (_ma_keylength(keyinfo, key_data) -
5747 keyinfo->share->rec_reflength);
5748 return _ma_row_pos_from_key(&key);
5749 } /* get_record_for_key */
5750
5751
5752 /* Insert a key in sort-key-blocks */
5753
sort_insert_key(MARIA_SORT_PARAM * sort_param,register SORT_KEY_BLOCKS * key_block,const uchar * key,my_off_t prev_block)5754 static int sort_insert_key(MARIA_SORT_PARAM *sort_param,
5755 register SORT_KEY_BLOCKS *key_block,
5756 const uchar *key,
5757 my_off_t prev_block)
5758 {
5759 uint a_length,t_length,nod_flag;
5760 my_off_t filepos;
5761 uchar *anc_buff,*lastkey;
5762 MARIA_KEY_PARAM s_temp;
5763 MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
5764 MARIA_SORT_INFO *sort_info= sort_param->sort_info;
5765 HA_CHECK *param=sort_info->param;
5766 MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
5767 MARIA_KEY tmp_key;
5768 MARIA_HA *info= sort_info->info;
5769 MARIA_SHARE *share= info->s;
5770 DBUG_ENTER("sort_insert_key");
5771
5772 anc_buff= key_block->buff;
5773 lastkey=key_block->lastkey;
5774 nod_flag= (key_block == sort_info->key_block ? 0 :
5775 share->base.key_reflength);
5776
5777 if (!key_block->inited)
5778 {
5779 key_block->inited=1;
5780 if (key_block == sort_info->key_block_end)
5781 {
5782 _ma_check_print_error(param,
5783 "To many key-block-levels; "
5784 "Try increasing sort_key_blocks");
5785 DBUG_RETURN(1);
5786 }
5787 a_length= share->keypage_header + nod_flag;
5788 key_block->end_pos= anc_buff + share->keypage_header;
5789 bzero(anc_buff, share->keypage_header);
5790 _ma_store_keynr(share, anc_buff, sort_param->keyinfo->key_nr);
5791 lastkey=0; /* No previous key in block */
5792 }
5793 else
5794 a_length= _ma_get_page_used(share, anc_buff);
5795
5796 /* Save pointer to previous block */
5797 if (nod_flag)
5798 {
5799 _ma_store_keypage_flag(share, anc_buff, KEYPAGE_FLAG_ISNOD);
5800 _ma_kpointer(info,key_block->end_pos,prev_block);
5801 }
5802
5803 tmp_key.keyinfo= keyinfo;
5804 tmp_key.data= (uchar*) key;
5805 tmp_key.data_length= _ma_keylength(keyinfo, key) - share->rec_reflength;
5806 tmp_key.ref_length= share->rec_reflength;
5807
5808 t_length= (*keyinfo->pack_key)(&tmp_key, nod_flag,
5809 (uchar*) 0, lastkey, lastkey, &s_temp);
5810 (*keyinfo->store_key)(keyinfo, key_block->end_pos+nod_flag,&s_temp);
5811 a_length+=t_length;
5812 _ma_store_page_used(share, anc_buff, a_length);
5813 key_block->end_pos+=t_length;
5814 if (a_length <= share->max_index_block_size)
5815 {
5816 MARIA_KEY tmp_key2;
5817 tmp_key2.data= key_block->lastkey;
5818 _ma_copy_key(&tmp_key2, &tmp_key);
5819 key_block->last_length=a_length-t_length;
5820 DBUG_RETURN(0);
5821 }
5822
5823 /* Fill block with end-zero and write filled block */
5824 _ma_store_page_used(share, anc_buff, key_block->last_length);
5825 bzero(anc_buff+key_block->last_length,
5826 keyinfo->block_length- key_block->last_length);
5827 if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) == HA_OFFSET_ERROR)
5828 DBUG_RETURN(1);
5829 _ma_fast_unlock_key_del(info);
5830
5831 /* If we read the page from the key cache, we have to write it back to it */
5832 if (page_link->changed)
5833 {
5834 MARIA_PAGE page;
5835 pop_dynamic(&info->pinned_pages);
5836 _ma_page_setup(&page, info, keyinfo, filepos, anc_buff);
5837 if (_ma_write_keypage(&page, PAGECACHE_LOCK_WRITE_UNLOCK, DFLT_INIT_HITS))
5838 DBUG_RETURN(1);
5839 }
5840 else
5841 {
5842 if (write_page(share, share->kfile.file, anc_buff,
5843 keyinfo->block_length, filepos, param->myf_rw))
5844 DBUG_RETURN(1);
5845 }
5846 DBUG_DUMP("buff", anc_buff, _ma_get_page_used(share, anc_buff));
5847
5848 /* Write separator-key to block in next level */
5849 if (sort_insert_key(sort_param,key_block+1,key_block->lastkey,filepos))
5850 DBUG_RETURN(1);
5851
5852 /* clear old block and write new key in it */
5853 key_block->inited=0;
5854 DBUG_RETURN(sort_insert_key(sort_param, key_block,key,prev_block));
5855 } /* sort_insert_key */
5856
5857
5858 /* Delete record when we found a duplicated key */
5859
sort_delete_record(MARIA_SORT_PARAM * sort_param)5860 static int sort_delete_record(MARIA_SORT_PARAM *sort_param)
5861 {
5862 uint i;
5863 int old_file,error;
5864 uchar *key;
5865 MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5866 HA_CHECK *param=sort_info->param;
5867 MARIA_HA *row_info= sort_info->new_info, *key_info= sort_info->info;
5868 DBUG_ENTER("sort_delete_record");
5869
5870 if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
5871 {
5872 _ma_check_print_error(param,
5873 "Quick-recover aborted; Run recovery without switch "
5874 "-q or with switch -qq");
5875 DBUG_RETURN(1);
5876 }
5877 if (key_info->s->options & HA_OPTION_COMPRESS_RECORD)
5878 {
5879 _ma_check_print_error(param,
5880 "Recover aborted; Can't run standard recovery on "
5881 "compressed tables with errors in data-file. "
5882 "Use 'aria_chk --safe-recover' to fix it");
5883 DBUG_RETURN(1);
5884 }
5885
5886 old_file= row_info->dfile.file;
5887 /* This only affects static and dynamic row formats */
5888 row_info->dfile.file= row_info->rec_cache.file;
5889 if (flush_io_cache(&row_info->rec_cache))
5890 DBUG_RETURN(1);
5891
5892 key= key_info->lastkey_buff + key_info->s->base.max_key_length;
5893 if ((error=(*row_info->s->read_record)(row_info, sort_param->record,
5894 key_info->cur_row.lastpos)) &&
5895 error != HA_ERR_RECORD_DELETED)
5896 {
5897 _ma_check_print_error(param,"Can't read record to be removed");
5898 row_info->dfile.file= old_file;
5899 DBUG_RETURN(1);
5900 }
5901 row_info->cur_row.lastpos= key_info->cur_row.lastpos;
5902
5903 for (i=0 ; i < sort_info->current_key ; i++)
5904 {
5905 MARIA_KEY tmp_key;
5906 (*key_info->s->keyinfo[i].make_key)(key_info, &tmp_key, i, key,
5907 sort_param->record,
5908 key_info->cur_row.lastpos, 0);
5909 if (_ma_ck_delete(key_info, &tmp_key))
5910 {
5911 _ma_check_print_error(param,
5912 "Can't delete key %d from record to be removed",
5913 i+1);
5914 row_info->dfile.file= old_file;
5915 DBUG_RETURN(1);
5916 }
5917 }
5918 if (sort_param->calc_checksum)
5919 param->glob_crc-=(*key_info->s->calc_check_checksum)(key_info,
5920 sort_param->record);
5921 error= (*row_info->s->delete_record)(row_info, sort_param->record);
5922 if (error)
5923 _ma_check_print_error(param,"Got error %d when deleting record",
5924 my_errno);
5925 row_info->dfile.file= old_file; /* restore actual value */
5926 row_info->s->state.state.records--;
5927 DBUG_RETURN(error);
5928 } /* sort_delete_record */
5929
5930
5931 /* Fix all pending blocks and flush everything to disk */
5932
_ma_flush_pending_blocks(MARIA_SORT_PARAM * sort_param)5933 int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param)
5934 {
5935 uint nod_flag,length;
5936 my_off_t filepos;
5937 SORT_KEY_BLOCKS *key_block;
5938 MARIA_SORT_INFO *sort_info= sort_param->sort_info;
5939 myf myf_rw=sort_info->param->myf_rw;
5940 MARIA_HA *info=sort_info->info;
5941 MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
5942 MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
5943 DBUG_ENTER("_ma_flush_pending_blocks");
5944
5945 filepos= HA_OFFSET_ERROR; /* if empty file */
5946 nod_flag=0;
5947 for (key_block=sort_info->key_block ; key_block->inited ; key_block++)
5948 {
5949 key_block->inited=0;
5950 length= _ma_get_page_used(info->s, key_block->buff);
5951 if (nod_flag)
5952 _ma_kpointer(info,key_block->end_pos,filepos);
5953 bzero(key_block->buff+length, keyinfo->block_length-length);
5954 if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) ==
5955 HA_OFFSET_ERROR)
5956 goto err;
5957
5958 /* If we read the page from the key cache, we have to write it back */
5959 if (page_link->changed)
5960 {
5961 MARIA_PAGE page;
5962 pop_dynamic(&info->pinned_pages);
5963
5964 _ma_page_setup(&page, info, keyinfo, filepos, key_block->buff);
5965 if (_ma_write_keypage(&page, PAGECACHE_LOCK_WRITE_UNLOCK,
5966 DFLT_INIT_HITS))
5967 goto err;
5968 }
5969 else
5970 {
5971 if (write_page(info->s, info->s->kfile.file, key_block->buff,
5972 keyinfo->block_length, filepos, myf_rw))
5973 goto err;
5974 }
5975 DBUG_DUMP("buff",key_block->buff,length);
5976 nod_flag=1;
5977 }
5978 info->s->state.key_root[sort_param->key]=filepos; /* Last is root for tree */
5979 _ma_fast_unlock_key_del(info);
5980 DBUG_RETURN(0);
5981
5982 err:
5983 _ma_fast_unlock_key_del(info);
5984 DBUG_RETURN(1);
5985 } /* _ma_flush_pending_blocks */
5986
5987 /* alloc space and pointers for key_blocks */
5988
alloc_key_blocks(HA_CHECK * param,uint blocks,uint buffer_length)5989 static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
5990 uint buffer_length)
5991 {
5992 reg1 uint i;
5993 SORT_KEY_BLOCKS *block;
5994 DBUG_ENTER("alloc_key_blocks");
5995
5996 if (!(block= (SORT_KEY_BLOCKS*) my_malloc((sizeof(SORT_KEY_BLOCKS)+
5997 buffer_length+IO_SIZE)*blocks,
5998 MYF(0))))
5999 {
6000 _ma_check_print_error(param,"Not enough memory for sort-key-blocks");
6001 return(0);
6002 }
6003 for (i=0 ; i < blocks ; i++)
6004 {
6005 block[i].inited=0;
6006 block[i].buff= (uchar*) (block+blocks)+(buffer_length+IO_SIZE)*i;
6007 }
6008 DBUG_RETURN(block);
6009 } /* alloc_key_blocks */
6010
6011
6012 /* Check if file is almost full */
6013
maria_test_if_almost_full(MARIA_HA * info)6014 int maria_test_if_almost_full(MARIA_HA *info)
6015 {
6016 MARIA_SHARE *share= info->s;
6017
6018 if (share->options & HA_OPTION_COMPRESS_RECORD)
6019 return 0;
6020 return mysql_file_seek(share->kfile.file, 0L, MY_SEEK_END,
6021 MYF(MY_THREADSAFE))/10*9 >
6022 (my_off_t) share->base.max_key_file_length ||
6023 mysql_file_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0)) / 10 * 9 >
6024 (my_off_t) share->base.max_data_file_length;
6025 }
6026
6027
6028 /* Recreate table with bigger more alloced record-data */
6029
maria_recreate_table(HA_CHECK * param,MARIA_HA ** org_info,char * filename)6030 int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename)
6031 {
6032 int error;
6033 MARIA_HA info;
6034 MARIA_SHARE share;
6035 MARIA_KEYDEF *keyinfo,*key,*key_end;
6036 HA_KEYSEG *keysegs,*keyseg;
6037 MARIA_COLUMNDEF *columndef,*column,*end;
6038 MARIA_UNIQUEDEF *uniquedef,*u_ptr,*u_end;
6039 MARIA_STATUS_INFO status_info;
6040 uint unpack,key_parts;
6041 ha_rows max_records;
6042 ulonglong file_length,tmp_length;
6043 MARIA_CREATE_INFO create_info;
6044 DBUG_ENTER("maria_recreate_table");
6045
6046 if ((!(param->testflag & T_SILENT)))
6047 printf("Recreating table '%s'\n", param->isam_file_name);
6048
6049 error=1; /* Default error */
6050 info= **org_info;
6051 status_info= (*org_info)->state[0];
6052 info.state= &status_info;
6053 share= *(*org_info)->s;
6054 unpack= ((share.data_file_type == COMPRESSED_RECORD) &&
6055 (param->testflag & T_UNPACK));
6056 if (!(keyinfo=(MARIA_KEYDEF*) my_alloca(sizeof(MARIA_KEYDEF) *
6057 share.base.keys)))
6058 DBUG_RETURN(0);
6059 memcpy((uchar*) keyinfo,(uchar*) share.keyinfo,
6060 (size_t) (sizeof(MARIA_KEYDEF)*share.base.keys));
6061
6062 key_parts= share.base.all_key_parts;
6063 if (!(keysegs=(HA_KEYSEG*) my_alloca(sizeof(HA_KEYSEG)*
6064 (key_parts+share.base.keys))))
6065 {
6066 my_afree(keyinfo);
6067 DBUG_RETURN(1);
6068 }
6069 if (!(columndef=(MARIA_COLUMNDEF*)
6070 my_alloca(sizeof(MARIA_COLUMNDEF)*(share.base.fields+1))))
6071 {
6072 my_afree(keyinfo);
6073 my_afree(keysegs);
6074 DBUG_RETURN(1);
6075 }
6076 if (!(uniquedef=(MARIA_UNIQUEDEF*)
6077 my_alloca(sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques+1))))
6078 {
6079 my_afree(columndef);
6080 my_afree(keyinfo);
6081 my_afree(keysegs);
6082 DBUG_RETURN(1);
6083 }
6084
6085 /* Copy the column definitions in their original order */
6086 for (column= share.columndef, end= share.columndef+share.base.fields;
6087 column != end ;
6088 column++)
6089 columndef[column->column_nr]= *column;
6090
6091 /* Change the new key to point at the saved key segments */
6092 memcpy((uchar*) keysegs,(uchar*) share.keyparts,
6093 (size_t) (sizeof(HA_KEYSEG)*(key_parts+share.base.keys+
6094 share.state.header.uniques)));
6095 keyseg=keysegs;
6096 for (key=keyinfo,key_end=keyinfo+share.base.keys; key != key_end ; key++)
6097 {
6098 key->seg=keyseg;
6099 for (; keyseg->type ; keyseg++)
6100 {
6101 if (param->language)
6102 keyseg->language=param->language; /* change language */
6103 }
6104 keyseg++; /* Skip end pointer */
6105 }
6106
6107 /*
6108 Copy the unique definitions and change them to point at the new key
6109 segments
6110 */
6111 memcpy((uchar*) uniquedef,(uchar*) share.uniqueinfo,
6112 (size_t) (sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques)));
6113 for (u_ptr=uniquedef,u_end=uniquedef+share.state.header.uniques;
6114 u_ptr != u_end ; u_ptr++)
6115 {
6116 u_ptr->seg=keyseg;
6117 keyseg+=u_ptr->keysegs+1;
6118 }
6119
6120 file_length=(ulonglong) mysql_file_seek(info.dfile.file, 0L, MY_SEEK_END, MYF(0));
6121 if (share.options & HA_OPTION_COMPRESS_RECORD)
6122 share.base.records=max_records=info.state->records;
6123 else if (share.base.min_pack_length)
6124 max_records=(ha_rows) (file_length / share.base.min_pack_length);
6125 else
6126 max_records=0;
6127 share.options&= ~HA_OPTION_TEMP_COMPRESS_RECORD;
6128
6129 tmp_length= file_length+file_length/10;
6130 set_if_bigger(file_length,param->max_data_file_length);
6131 set_if_bigger(file_length,tmp_length);
6132 set_if_bigger(file_length,(ulonglong) share.base.max_data_file_length);
6133
6134 maria_close(*org_info);
6135
6136 bzero((char*) &create_info,sizeof(create_info));
6137 create_info.max_rows=MY_MAX(max_records,share.base.records);
6138 create_info.reloc_rows=share.base.reloc;
6139 create_info.old_options=(share.options |
6140 (unpack ? HA_OPTION_TEMP_COMPRESS_RECORD : 0));
6141
6142 create_info.data_file_length=file_length;
6143 create_info.auto_increment=share.state.auto_increment;
6144 create_info.language = (param->language ? param->language :
6145 share.base.language);
6146 create_info.key_file_length= status_info.key_file_length;
6147 create_info.org_data_file_type= ((enum data_file_type)
6148 share.state.header.org_data_file_type);
6149
6150 /*
6151 Allow for creating an auto_increment key. This has an effect only if
6152 an auto_increment key exists in the original table.
6153 */
6154 create_info.with_auto_increment= TRUE;
6155 create_info.null_bytes= share.base.null_bytes;
6156 create_info.transactional= share.base.born_transactional;
6157
6158 /*
6159 We don't have to handle symlinks here because we are using
6160 HA_DONT_TOUCH_DATA
6161 */
6162 if (maria_create(filename, share.data_file_type,
6163 share.base.keys - share.state.header.uniques,
6164 keyinfo, share.base.fields, columndef,
6165 share.state.header.uniques, uniquedef,
6166 &create_info,
6167 HA_DONT_TOUCH_DATA))
6168 {
6169 _ma_check_print_error(param,
6170 "Got error %d when trying to recreate indexfile",
6171 my_errno);
6172 goto end;
6173 }
6174 *org_info= maria_open(filename,O_RDWR,
6175 (HA_OPEN_FOR_REPAIR |
6176 ((param->testflag & T_WAIT_FOREVER) ?
6177 HA_OPEN_WAIT_IF_LOCKED :
6178 (param->testflag & T_DESCRIPT) ?
6179 HA_OPEN_IGNORE_IF_LOCKED :
6180 HA_OPEN_ABORT_IF_LOCKED)));
6181 if (!*org_info)
6182 {
6183 _ma_check_print_error(param,
6184 "Got error %d when trying to open re-created "
6185 "indexfile", my_errno);
6186 goto end;
6187 }
6188 /* We are modifing */
6189 (*org_info)->s->options&= ~HA_OPTION_READ_ONLY_DATA;
6190 _ma_readinfo(*org_info,F_WRLCK,0);
6191 (*org_info)->s->state.state.records= info.state->records;
6192 if (share.state.create_time)
6193 (*org_info)->s->state.create_time=share.state.create_time;
6194 #ifdef MARIA_EXTERNAL_LOCKING
6195 (*org_info)->s->state.unique= (*org_info)->this_unique= share.state.unique;
6196 #endif
6197 (*org_info)->s->state.state.checksum= info.state->checksum;
6198 (*org_info)->s->state.state.del= info.state->del;
6199 (*org_info)->s->state.dellink= share.state.dellink;
6200 (*org_info)->s->state.state.empty= info.state->empty;
6201 (*org_info)->s->state.state.data_file_length= info.state->data_file_length;
6202 *(*org_info)->state= (*org_info)->s->state.state;
6203 if (maria_update_state_info(param,*org_info,UPDATE_TIME | UPDATE_STAT |
6204 UPDATE_OPEN_COUNT))
6205 goto end;
6206 error=0;
6207 end:
6208 my_afree(uniquedef);
6209 my_afree(keyinfo);
6210 my_afree(columndef);
6211 my_afree(keysegs);
6212 DBUG_RETURN(error);
6213 }
6214
6215
6216 /* Write suffix to data file if needed */
6217
maria_write_data_suffix(MARIA_SORT_INFO * sort_info,my_bool fix_datafile)6218 int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile)
6219 {
6220 MARIA_HA *info=sort_info->new_info;
6221
6222 if (info->s->data_file_type == COMPRESSED_RECORD && fix_datafile)
6223 {
6224 uchar buff[MEMMAP_EXTRA_MARGIN];
6225 bzero(buff,sizeof(buff));
6226 if (my_b_write(&info->rec_cache,buff,sizeof(buff)))
6227 {
6228 _ma_check_print_error(sort_info->param,
6229 "%d when writing to datafile",my_errno);
6230 return 1;
6231 }
6232 sort_info->param->read_cache.end_of_file+=sizeof(buff);
6233 }
6234 return 0;
6235 }
6236
6237
6238 /* Update state and maria_chk time of indexfile */
6239
maria_update_state_info(HA_CHECK * param,MARIA_HA * info,uint update)6240 int maria_update_state_info(HA_CHECK *param, MARIA_HA *info,uint update)
6241 {
6242 MARIA_SHARE *share= info->s;
6243 DBUG_ENTER("maria_update_state_info");
6244
6245 if (update & UPDATE_OPEN_COUNT)
6246 {
6247 share->state.open_count=0;
6248 share->global_changed=0;
6249 share->changed= 1;
6250 }
6251 if (update & UPDATE_STAT)
6252 {
6253 uint i, key_parts= mi_uint2korr(share->state.header.key_parts);
6254 share->state.records_at_analyze= share->state.state.records;
6255 share->state.changed&= ~STATE_NOT_ANALYZED;
6256 if (share->state.state.records)
6257 {
6258 for (i=0; i<key_parts; i++)
6259 {
6260 if (!(share->state.rec_per_key_part[i]=param->new_rec_per_key_part[i]))
6261 share->state.changed|= STATE_NOT_ANALYZED;
6262 }
6263 }
6264 }
6265 if (update & (UPDATE_STAT | UPDATE_SORT | UPDATE_TIME | UPDATE_AUTO_INC))
6266 {
6267 if (update & UPDATE_TIME)
6268 {
6269 share->state.check_time= time((time_t*) 0);
6270 if (!share->state.create_time)
6271 share->state.create_time= share->state.check_time;
6272 }
6273 if (_ma_state_info_write(share,
6274 MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
6275 MA_STATE_INFO_WRITE_FULL_INFO))
6276 goto err;
6277 }
6278 { /* Force update of status */
6279 int error;
6280 uint r_locks=share->r_locks,w_locks=share->w_locks;
6281 share->r_locks= share->w_locks= share->tot_locks= 0;
6282 error= _ma_writeinfo(info,WRITEINFO_NO_UNLOCK);
6283 share->r_locks=r_locks;
6284 share->w_locks=w_locks;
6285 share->tot_locks=r_locks+w_locks;
6286 if (!error)
6287 DBUG_RETURN(0);
6288 }
6289 err:
6290 _ma_check_print_error(param,"%d when updating keyfile",my_errno);
6291 DBUG_RETURN(1);
6292 }
6293
6294 /*
6295 Update auto increment value for a table
6296 When setting the 'repair_only' flag we only want to change the
6297 old auto_increment value if its wrong (smaller than some given key).
6298 The reason is that we shouldn't change the auto_increment value
6299 for a table without good reason when only doing a repair; If the
6300 user have inserted and deleted rows, the auto_increment value
6301 may be bigger than the biggest current row and this is ok.
6302
6303 If repair_only is not set, we will update the flag to the value in
6304 param->auto_increment is bigger than the biggest key.
6305 */
6306
_ma_update_auto_increment_key(HA_CHECK * param,MARIA_HA * info,my_bool repair_only)6307 void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info,
6308 my_bool repair_only)
6309 {
6310 MARIA_SHARE *share= info->s;
6311 uchar *record;
6312 DBUG_ENTER("update_auto_increment_key");
6313
6314 if (!share->base.auto_key ||
6315 ! maria_is_key_active(share->state.key_map, share->base.auto_key - 1))
6316 {
6317 if (!(param->testflag & T_VERY_SILENT))
6318 _ma_check_print_info(param,
6319 "Table: %s doesn't have an auto increment key\n",
6320 param->isam_file_name);
6321 DBUG_VOID_RETURN;
6322 }
6323 if (!(param->testflag & T_SILENT) &&
6324 !(param->testflag & T_REP))
6325 printf("Updating Aria file: %s\n", param->isam_file_name);
6326 /*
6327 We have to use an allocated buffer instead of info->rec_buff as
6328 _ma_put_key_in_record() may use info->rec_buff
6329 */
6330 if (!(record= (uchar*) my_malloc((size_t) share->base.default_rec_buff_size,
6331 MYF(0))))
6332 {
6333 _ma_check_print_error(param,"Not enough memory for extra record");
6334 DBUG_VOID_RETURN;
6335 }
6336
6337 maria_extra(info,HA_EXTRA_KEYREAD,0);
6338 if (maria_rlast(info, record, share->base.auto_key-1))
6339 {
6340 if (my_errno != HA_ERR_END_OF_FILE)
6341 {
6342 maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
6343 my_free(record);
6344 _ma_check_print_error(param,"%d when reading last record",my_errno);
6345 DBUG_VOID_RETURN;
6346 }
6347 if (!repair_only)
6348 share->state.auto_increment=param->auto_increment_value;
6349 }
6350 else
6351 {
6352 const HA_KEYSEG *keyseg= share->keyinfo[share->base.auto_key-1].seg;
6353 ulonglong auto_increment=
6354 ma_retrieve_auto_increment(record + keyseg->start, keyseg->type);
6355 set_if_bigger(share->state.auto_increment,auto_increment);
6356 if (!repair_only)
6357 set_if_bigger(share->state.auto_increment, param->auto_increment_value);
6358 }
6359 maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
6360 my_free(record);
6361 maria_update_state_info(param, info, UPDATE_AUTO_INC);
6362 DBUG_VOID_RETURN;
6363 }
6364
6365
6366 /*
6367 Update statistics for each part of an index
6368
6369 SYNOPSIS
6370 maria_update_key_parts()
6371 keyinfo IN Index information (only key->keysegs used)
6372 rec_per_key_part OUT Store statistics here
6373 unique IN Array of (#distinct tuples)
6374 notnull_tuples IN Array of (#tuples), or NULL
6375 records Number of records in the table
6376
6377 DESCRIPTION
6378 This function is called produce index statistics values from unique and
6379 notnull_tuples arrays after these arrays were produced with sequential
6380 index scan (the scan is done in two places: chk_index() and
6381 sort_key_write()).
6382
6383 This function handles all 3 index statistics collection methods.
6384
6385 Unique is an array:
6386 unique[0]= (#different values of {keypart1}) - 1
6387 unique[1]= (#different values of {keypart1,keypart2} tuple)-unique[0]-1
6388 ...
6389
6390 For MI_STATS_METHOD_IGNORE_NULLS method, notnull_tuples is an array too:
6391 notnull_tuples[0]= (#of {keypart1} tuples such that keypart1 is not NULL)
6392 notnull_tuples[1]= (#of {keypart1,keypart2} tuples such that all
6393 keypart{i} are not NULL)
6394 ...
6395 For all other statistics collection methods notnull_tuples==NULL.
6396
6397 Output is an array:
6398 rec_per_key_part[k] =
6399 = E(#records in the table such that keypart_1=c_1 AND ... AND
6400 keypart_k=c_k for arbitrary constants c_1 ... c_k)
6401
6402 = {assuming that values have uniform distribution and index contains all
6403 tuples from the domain (or that {c_1, ..., c_k} tuple is choosen from
6404 index tuples}
6405
6406 = #tuples-in-the-index / #distinct-tuples-in-the-index.
6407
6408 The #tuples-in-the-index and #distinct-tuples-in-the-index have different
6409 meaning depending on which statistics collection method is used:
6410
6411 MI_STATS_METHOD_* how are nulls compared? which tuples are counted?
6412 NULLS_EQUAL NULL == NULL all tuples in table
6413 NULLS_NOT_EQUAL NULL != NULL all tuples in table
6414 IGNORE_NULLS n/a tuples that don't have NULLs
6415 */
6416
maria_update_key_parts(MARIA_KEYDEF * keyinfo,double * rec_per_key_part,ulonglong * unique,ulonglong * notnull,ulonglong records)6417 void maria_update_key_parts(MARIA_KEYDEF *keyinfo, double *rec_per_key_part,
6418 ulonglong *unique, ulonglong *notnull,
6419 ulonglong records)
6420 {
6421 ulonglong count=0, unique_tuples;
6422 ulonglong tuples= records;
6423 uint parts;
6424 double tmp;
6425 for (parts=0 ; parts < keyinfo->keysegs ; parts++)
6426 {
6427 count+=unique[parts];
6428 unique_tuples= count + 1;
6429 if (notnull)
6430 {
6431 tuples= notnull[parts];
6432 /*
6433 #(unique_tuples not counting tuples with NULLs) =
6434 #(unique_tuples counting tuples with NULLs as different) -
6435 #(tuples with NULLs)
6436 */
6437 unique_tuples -= (records - notnull[parts]);
6438 }
6439
6440 if (unique_tuples == 0)
6441 tmp= 1;
6442 else if (count == 0)
6443 tmp= ulonglong2double(tuples); /* 1 unique tuple */
6444 else
6445 tmp= ulonglong2double(tuples) / ulonglong2double(unique_tuples);
6446
6447 /*
6448 for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
6449 let's ensure it is not
6450 */
6451 set_if_bigger(tmp,1);
6452
6453 *rec_per_key_part++= tmp;
6454 }
6455 }
6456
6457
maria_byte_checksum(const uchar * buf,uint length)6458 static ha_checksum maria_byte_checksum(const uchar *buf, uint length)
6459 {
6460 ha_checksum crc;
6461 const uchar *end=buf+length;
6462 for (crc=0; buf != end; buf++)
6463 crc=((crc << 1) + *buf) +
6464 MY_TEST(crc & (((ha_checksum) 1) << (8 * sizeof(ha_checksum) - 1)));
6465 return crc;
6466 }
6467
maria_too_big_key_for_sort(MARIA_KEYDEF * key,ha_rows rows)6468 my_bool maria_too_big_key_for_sort(MARIA_KEYDEF *key, ha_rows rows)
6469 {
6470 uint key_maxlength=key->maxlength;
6471 if (key->flag & HA_FULLTEXT)
6472 {
6473 uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
6474 key->seg->charset->mbmaxlen;
6475 key_maxlength+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
6476 }
6477 return (key->flag & HA_SPATIAL) ||
6478 (key->flag & (HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY | HA_FULLTEXT) &&
6479 ((ulonglong) rows * key_maxlength >
6480 (ulonglong) maria_max_temp_length));
6481 }
6482
6483 /*
6484 Return TRUE if we can use repair by sorting
6485 One can set the force argument to force to use sorting
6486 even if the temporary file would be quite big!
6487 */
6488
maria_test_if_sort_rep(MARIA_HA * info,ha_rows rows,ulonglong key_map,my_bool force)6489 my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows,
6490 ulonglong key_map, my_bool force)
6491 {
6492 MARIA_SHARE *share= info->s;
6493 MARIA_KEYDEF *key=share->keyinfo;
6494 uint i;
6495
6496 /*
6497 maria_repair_by_sort only works if we have at least one key. If we don't
6498 have any keys, we should use the normal repair.
6499 */
6500 if (! maria_is_any_key_active(key_map))
6501 return FALSE; /* Can't use sort */
6502 for (i=0 ; i < share->base.keys ; i++,key++)
6503 {
6504 if (!force && maria_too_big_key_for_sort(key,rows))
6505 return FALSE;
6506 }
6507 return TRUE;
6508 }
6509
6510
6511 /**
6512 @brief Create a new handle for manipulation the new record file
6513
6514 @note
6515 It's ok for Recovery to have two MARIA_SHARE on the same index file
6516 because the one we create here is not transactional
6517 */
6518
create_new_data_handle(MARIA_SORT_PARAM * param,File new_file)6519 static my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file)
6520 {
6521
6522 MARIA_SORT_INFO *sort_info= param->sort_info;
6523 MARIA_HA *info= sort_info->info;
6524 MARIA_HA *new_info;
6525 DBUG_ENTER("create_new_data_handle");
6526
6527 if (!(sort_info->new_info= maria_open(info->s->open_file_name.str, O_RDWR,
6528 HA_OPEN_COPY | HA_OPEN_FOR_REPAIR |
6529 HA_OPEN_INTERNAL_TABLE)))
6530 DBUG_RETURN(1);
6531
6532 new_info= sort_info->new_info;
6533 _ma_bitmap_set_pagecache_callbacks(&new_info->s->bitmap.file,
6534 new_info->s);
6535 _ma_set_data_pagecache_callbacks(&new_info->dfile, new_info->s);
6536 change_data_file_descriptor(new_info, new_file);
6537 maria_lock_database(new_info, F_EXTRA_LCK);
6538 if ((sort_info->param->testflag & T_UNPACK) &&
6539 info->s->data_file_type == COMPRESSED_RECORD)
6540 {
6541 (*new_info->s->once_end)(new_info->s);
6542 (*new_info->s->end)(new_info);
6543 restore_data_file_type(new_info->s);
6544 _ma_setup_functions(new_info->s);
6545 if ((*new_info->s->once_init)(new_info->s, new_file) ||
6546 (*new_info->s->init)(new_info))
6547 DBUG_RETURN(1);
6548 }
6549 _ma_reset_status(new_info);
6550 if (_ma_initialize_data_file(new_info->s, new_file))
6551 DBUG_RETURN(1);
6552
6553 /* Take into account any bitmap page created above: */
6554 param->filepos= new_info->s->state.state.data_file_length;
6555
6556 /* Use new virtual functions for key generation */
6557 info->s->keypos_to_recpos= new_info->s->keypos_to_recpos;
6558 info->s->recpos_to_keypos= new_info->s->recpos_to_keypos;
6559 DBUG_RETURN(0);
6560 }
6561
6562
6563 static void
set_data_file_type(MARIA_SORT_INFO * sort_info,MARIA_SHARE * share)6564 set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share)
6565 {
6566 if ((sort_info->new_data_file_type=share->data_file_type) ==
6567 COMPRESSED_RECORD && sort_info->param->testflag & T_UNPACK)
6568 {
6569 MARIA_SHARE tmp;
6570 sort_info->new_data_file_type= share->state.header.org_data_file_type;
6571 /* Set delete_function for sort_delete_record() */
6572 tmp= *share;
6573 tmp.state.header.data_file_type= tmp.state.header.org_data_file_type;
6574 tmp.options= ~HA_OPTION_COMPRESS_RECORD;
6575 _ma_setup_functions(&tmp);
6576 share->delete_record=tmp.delete_record;
6577 }
6578 }
6579
restore_data_file_type(MARIA_SHARE * share)6580 static void restore_data_file_type(MARIA_SHARE *share)
6581 {
6582 MARIA_SHARE tmp_share;
6583 share->options&= ~HA_OPTION_COMPRESS_RECORD;
6584 mi_int2store(share->state.header.options,share->options);
6585 share->state.header.data_file_type=
6586 share->state.header.org_data_file_type;
6587 share->data_file_type= share->state.header.data_file_type;
6588 share->pack.header_length= 0;
6589
6590 /* Use new virtual functions for key generation */
6591 tmp_share= *share;
6592 _ma_setup_functions(&tmp_share);
6593 share->keypos_to_recpos= tmp_share.keypos_to_recpos;
6594 share->recpos_to_keypos= tmp_share.recpos_to_keypos;
6595 }
6596
6597
change_data_file_descriptor(MARIA_HA * info,File new_file)6598 static void change_data_file_descriptor(MARIA_HA *info, File new_file)
6599 {
6600 mysql_file_close(info->dfile.file, MYF(MY_WME));
6601 info->dfile.file= info->s->bitmap.file.file= new_file;
6602 _ma_bitmap_reset_cache(info->s);
6603 }
6604
6605
6606 /**
6607 @brief Mark the data file to not be used
6608
6609 @note
6610 This is used in repair when we want to ensure the handler will not
6611 write anything to the data file anymore
6612 */
6613
unuse_data_file_descriptor(MARIA_HA * info)6614 static void unuse_data_file_descriptor(MARIA_HA *info)
6615 {
6616 (void) flush_pagecache_blocks(info->s->pagecache,
6617 &info->s->bitmap.file,
6618 FLUSH_IGNORE_CHANGED);
6619 info->dfile.file= info->s->bitmap.file.file= -1;
6620 _ma_bitmap_reset_cache(info->s);
6621 }
6622
6623
6624 /*
6625 Copy all states that has to do with the data file
6626
6627 NOTES
6628 This is done to copy the state from the data file generated from
6629 repair to the original handler
6630 */
6631
copy_data_file_state(MARIA_STATE_INFO * to,MARIA_STATE_INFO * from)6632 static void copy_data_file_state(MARIA_STATE_INFO *to,
6633 MARIA_STATE_INFO *from)
6634 {
6635 to->state.records= from->state.records;
6636 to->state.del= from->state.del;
6637 to->state.empty= from->state.empty;
6638 to->state.data_file_length= from->state.data_file_length;
6639 to->split= from->split;
6640 to->dellink= from->dellink;
6641 to->first_bitmap_with_space= from->first_bitmap_with_space;
6642 }
6643
6644
6645 /*
6646 Read 'safely' next record while scanning table.
6647
6648 SYNOPSIS
6649 _ma_safe_scan_block_record()
6650 info Maria handler
6651 record Store found here
6652
6653 NOTES
6654 - One must have called mi_scan() before this
6655
6656 Differences compared to _ma_scan_block_records() are:
6657 - We read all blocks, not only blocks marked by the bitmap to be safe
6658 - In case of errors, next read will read next record.
6659 - More sanity checks
6660
6661 RETURN
6662 0 ok
6663 HA_ERR_END_OF_FILE End of file
6664 # error number
6665 */
6666
6667
_ma_safe_scan_block_record(MARIA_SORT_INFO * sort_info,MARIA_HA * info,uchar * record)6668 static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info,
6669 MARIA_HA *info, uchar *record)
6670 {
6671 MARIA_SHARE *share= info->s;
6672 MARIA_RECORD_POS record_pos= info->cur_row.nextpos;
6673 pgcache_page_no_t page= sort_info->page;
6674 DBUG_ENTER("_ma_safe_scan_block_record");
6675
6676 for (;;)
6677 {
6678 /* Find next row in current page */
6679 if (likely(record_pos < info->scan.number_of_rows))
6680 {
6681 uint length, offset;
6682 uchar *data, *end_of_data;
6683 char llbuff[22];
6684
6685 while (!(offset= uint2korr(info->scan.dir)))
6686 {
6687 info->scan.dir-= DIR_ENTRY_SIZE;
6688 record_pos++;
6689 if (info->scan.dir < info->scan.dir_end)
6690 {
6691 _ma_check_print_info(sort_info->param,
6692 "Wrong directory on page %s",
6693 llstr(page, llbuff));
6694 goto read_next_page;
6695 }
6696 }
6697 /* found row */
6698 info->cur_row.lastpos= info->scan.row_base_page + record_pos;
6699 info->cur_row.nextpos= record_pos + 1;
6700 data= info->scan.page_buff + offset;
6701 length= uint2korr(info->scan.dir + 2);
6702 end_of_data= data + length;
6703 info->scan.dir-= DIR_ENTRY_SIZE; /* Point to previous row */
6704
6705 if (end_of_data > info->scan.dir_end ||
6706 offset < PAGE_HEADER_SIZE(info->s) ||
6707 length < share->base.min_block_length)
6708 {
6709 _ma_check_print_info(sort_info->param,
6710 "Wrong directory entry %3u at page %s",
6711 (uint) record_pos, llstr(page, llbuff));
6712 record_pos++;
6713 continue;
6714 }
6715 else
6716 {
6717 DBUG_PRINT("info", ("rowid: %lu", (ulong) info->cur_row.lastpos));
6718 DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data));
6719 }
6720 }
6721
6722 read_next_page:
6723 /* Read until we find next head page */
6724 for (;;)
6725 {
6726 uint page_type;
6727 char llbuff[22];
6728
6729 sort_info->page++; /* In case of errors */
6730 page++;
6731 if (!(page % share->bitmap.pages_covered))
6732 {
6733 /* Skip bitmap */
6734 page++;
6735 sort_info->page++;
6736 }
6737 if ((my_off_t) (page + 1) * share->block_size > sort_info->filelength)
6738 DBUG_RETURN(HA_ERR_END_OF_FILE);
6739 if (!(pagecache_read(share->pagecache,
6740 &info->dfile,
6741 page, 0, info->scan.page_buff,
6742 PAGECACHE_READ_UNKNOWN_PAGE,
6743 PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
6744 {
6745 if (my_errno == HA_ERR_WRONG_CRC ||
6746 my_errno == HA_ERR_DECRYPTION_FAILED)
6747 {
6748 /*
6749 Don't give errors for zero filled blocks. These can
6750 sometimes be found at end of a bitmap when we wrote a big
6751 record last that was moved to the next bitmap.
6752 */
6753 if (_ma_check_bitmap_data(info, UNALLOCATED_PAGE, 0,
6754 _ma_bitmap_get_page_bits(info,
6755 &share->bitmap,
6756 page)))
6757 {
6758 _ma_check_print_info(sort_info->param,
6759 "Wrong CRC on datapage at %s",
6760 llstr(page, llbuff));
6761 }
6762 continue;
6763 }
6764 DBUG_RETURN(my_errno);
6765 }
6766 page_type= (info->scan.page_buff[PAGE_TYPE_OFFSET] &
6767 PAGE_TYPE_MASK);
6768 if (page_type == HEAD_PAGE)
6769 {
6770 if ((info->scan.number_of_rows=
6771 (uint) (uchar) info->scan.page_buff[DIR_COUNT_OFFSET]) != 0)
6772 break;
6773 _ma_check_print_info(sort_info->param,
6774 "Wrong head page at page %s",
6775 llstr(page, llbuff));
6776 }
6777 else if (page_type >= MAX_PAGE_TYPE)
6778 {
6779 _ma_check_print_info(sort_info->param,
6780 "Found wrong page type: %d at page %s",
6781 page_type, llstr(page, llbuff));
6782 }
6783 }
6784
6785 /* New head page */
6786 info->scan.dir= (info->scan.page_buff + share->block_size -
6787 PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE);
6788 info->scan.dir_end= (info->scan.dir -
6789 (info->scan.number_of_rows - 1) *
6790 DIR_ENTRY_SIZE);
6791 info->scan.row_base_page= ma_recordpos(page, 0);
6792 record_pos= 0;
6793 }
6794 }
6795
6796
6797 /**
6798 @brief Writes a LOGREC_REPAIR_TABLE record and updates create_rename_lsn
6799 if needed (so that maria_read_log does not redo the repair).
6800
6801 @param param description of the REPAIR operation
6802 @param info table
6803
6804 @return Operation status
6805 @retval 0 ok
6806 @retval 1 error (disk problem)
6807 */
6808
write_log_record_for_repair(const HA_CHECK * param,MARIA_HA * info)6809 my_bool write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info)
6810 {
6811 MARIA_SHARE *share= info->s;
6812 /* in case this is maria_chk or recovery... */
6813 if (translog_status == TRANSLOG_OK && !maria_in_recovery &&
6814 share->base.born_transactional)
6815 {
6816 my_bool save_now_transactional= share->now_transactional;
6817
6818 /*
6819 For now this record is only informative. It could serve when applying
6820 logs to a backup, but that needs more thought. Assume table became
6821 corrupted. It is repaired, then some writes happen to it.
6822 Later we restore an old backup, and want to apply this REDO_REPAIR_TABLE
6823 record. For it to give the same result as originally, the table should
6824 be corrupted the same way, so applying previous REDOs should produce the
6825 same corruption; that's really not guaranteed (different execution paths
6826 in execution of REDOs vs runtime code so not same bugs hit, temporary
6827 hardware issues not repeatable etc). Corruption may not be repeatable.
6828 A reasonable solution is to execute the REDO_REPAIR_TABLE record and
6829 check if the checksum of the resulting table matches what it was at the
6830 end of the original repair (should be stored in log record); or execute
6831 the REDO_REPAIR_TABLE if the checksum of the table-before-repair matches
6832 was it was at the start of the original repair (should be stored in log
6833 record).
6834 */
6835 LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
6836 uchar log_data[FILEID_STORE_SIZE + 8 + 8];
6837 LSN lsn;
6838
6839 /*
6840 testflag gives an idea of what REPAIR did (in particular T_QUICK
6841 or not: did it touch the data file or not?).
6842 */
6843 int8store(log_data + FILEID_STORE_SIZE, param->testflag);
6844 /* org_key_map is used when recreating index after a load data infile */
6845 int8store(log_data + FILEID_STORE_SIZE + 8, param->org_key_map);
6846
6847 log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
6848 log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
6849
6850 share->now_transactional= 1;
6851 if (unlikely(translog_write_record(&lsn, LOGREC_REDO_REPAIR_TABLE,
6852 &dummy_transaction_object, info,
6853 (translog_size_t) sizeof(log_data),
6854 sizeof(log_array)/sizeof(log_array[0]),
6855 log_array, log_data, NULL) ||
6856 translog_flush(lsn)))
6857 return TRUE;
6858 /*
6859 The table's existence was made durable earlier (MY_SYNC_DIR passed to
6860 maria_change_to_newfile()). All pages have been flushed, state too, we
6861 need to force it to disk. Old REDOs should not be applied to the table,
6862 which is already enforced as skip_redos_lsn was increased in
6863 protect_against_repair_crash(). But if this is an explicit repair,
6864 even UNDO phase should ignore this table: create_rename_lsn should be
6865 increased, and this also serves for the REDO_REPAIR to be ignored by
6866 maria_read_log.
6867 The fully correct order would be: sync data and index file, remove crash
6868 mark and update LSNs then write state and sync index file. But at this
6869 point state (without crash mark) is already written.
6870 */
6871 if ((!(param->testflag & T_NO_CREATE_RENAME_LSN) &&
6872 _ma_update_state_lsns(share, lsn, share->state.create_trid, FALSE,
6873 FALSE)) ||
6874 _ma_sync_table_files(info))
6875 return TRUE;
6876 share->now_transactional= save_now_transactional;
6877 }
6878 return FALSE;
6879 }
6880
6881
6882 /**
6883 Writes an UNDO record which if executed in UNDO phase, will empty the
6884 table. Such record is thus logged only in certain cases of bulk insert
6885 (table needs to be empty etc).
6886 */
write_log_record_for_bulk_insert(MARIA_HA * info)6887 my_bool write_log_record_for_bulk_insert(MARIA_HA *info)
6888 {
6889 LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
6890 uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE];
6891 LSN lsn;
6892 lsn_store(log_data, info->trn->undo_lsn);
6893 log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
6894 log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
6895 return translog_write_record(&lsn, LOGREC_UNDO_BULK_INSERT,
6896 info->trn, info,
6897 (translog_size_t)
6898 log_array[TRANSLOG_INTERNAL_PARTS +
6899 0].length,
6900 TRANSLOG_INTERNAL_PARTS + 1, log_array,
6901 log_data + LSN_STORE_SIZE, NULL) ||
6902 translog_flush(lsn); /* WAL */
6903 }
6904
6905
6906 /* Give error message why reading of key page failed */
6907
report_keypage_fault(HA_CHECK * param,MARIA_HA * info,my_off_t position)6908 static void report_keypage_fault(HA_CHECK *param, MARIA_HA *info,
6909 my_off_t position)
6910 {
6911 char buff[11];
6912 uint32 block_size= info->s->block_size;
6913
6914 if (my_errno == HA_ERR_CRASHED)
6915 _ma_check_print_error(param,
6916 "Wrong base information on indexpage at page: %s",
6917 llstr(position / block_size, buff));
6918 else
6919 _ma_check_print_error(param,
6920 "Can't read indexpage from page: %s, "
6921 "error: %d",
6922 llstr(position / block_size, buff), my_errno);
6923 }
6924
6925
6926 /**
6927 When we want to check a table, we verify that the transaction ids of rows
6928 and keys are not bigger than the biggest id generated by Maria so far, which
6929 is returned by the function below.
6930
6931 @note If control file is not open, 0 may be returned; to not confuse
6932 this with a valid max trid of 0, the caller should notice that it failed to
6933 open the control file (ma_control_file_inited() can serve for that).
6934 */
6935
max_trid_in_system(void)6936 static TrID max_trid_in_system(void)
6937 {
6938 TrID id= trnman_get_max_trid(); /* 0 if transac manager not initialized */
6939 /* 'id' may be far bigger, if last shutdown is old */
6940 return MY_MAX(id, max_trid_in_control_file);
6941 }
6942
6943
_ma_check_print_not_visible_error(HA_CHECK * param,TrID used_trid)6944 static void _ma_check_print_not_visible_error(HA_CHECK *param, TrID used_trid)
6945 {
6946 char buff[22], buff2[22];
6947 if (!param->not_visible_rows_found++)
6948 {
6949 if (!ma_control_file_inited())
6950 {
6951 _ma_check_print_warning(param,
6952 "Found row with transaction id %s but no "
6953 "aria_control_file was used or specified. "
6954 "The table may be corrupted",
6955 llstr(used_trid, buff));
6956 }
6957 else
6958 {
6959 _ma_check_print_error(param,
6960 "Found row with transaction id %s when max "
6961 "transaction id according to aria_control_file "
6962 "is %s",
6963 llstr(used_trid, buff),
6964 llstr(param->max_trid, buff2));
6965 }
6966 }
6967 }
6968
6969
6970 /**
6971 Mark that we can retry normal repair if we used quick repair
6972
6973 We shouldn't do this in case of disk error as in this case we are likely
6974 to loose much more than expected.
6975 */
6976
retry_if_quick(MARIA_SORT_PARAM * sort_param,int error)6977 void retry_if_quick(MARIA_SORT_PARAM *sort_param, int error)
6978 {
6979 HA_CHECK *param=sort_param->sort_info->param;
6980
6981 if (!sort_param->fix_datafile && error >= HA_ERR_FIRST)
6982 {
6983 param->retry_repair=1;
6984 param->testflag|=T_RETRY_WITHOUT_QUICK;
6985 }
6986 }
6987
6988 /* Print information about bitmap page */
6989
print_bitmap_description(MARIA_SHARE * share,pgcache_page_no_t page,uchar * bitmap_data)6990 static void print_bitmap_description(MARIA_SHARE *share,
6991 pgcache_page_no_t page,
6992 uchar *bitmap_data)
6993 {
6994 char *tmp= my_malloc(MAX_BITMAP_INFO_LENGTH, MYF(MY_WME));
6995 if (!tmp)
6996 return;
6997 _ma_get_bitmap_description(&share->bitmap, bitmap_data, page, tmp);
6998 printf("Bitmap page %lu\n%s", (ulong) page, tmp);
6999 my_free(tmp);
7000 }
7001