1 /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation; version 2 of the License.
6 
7    This program is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10    GNU General Public License for more details.
11 
12    You should have received a copy of the GNU General Public License
13    along with this program; if not, write to the Free Software
14    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
15 
16 /* Describe, check and repair of MARIA tables */
17 
18 /*
19   About checksum calculation.
20 
21   There are two types of checksums. Table checksum and row checksum.
22 
23   Row checksum is an additional uchar at the end of dynamic length
24   records. It must be calculated if the table is configured for them.
25   Otherwise they must not be used. The variable
26   MYISAM_SHARE::calc_checksum determines if row checksums are used.
27   MI_INFO::checksum is used as temporary storage during row handling.
28   For parallel repair we must assure that only one thread can use this
29   variable. There is no problem on the write side as this is done by one
30   thread only. But when checking a record after read this could go
31   wrong. But since all threads read through a common read buffer, it is
32   sufficient if only one thread checks it.
33 
34   Table checksum is an eight uchar value in the header of the index file.
35   It can be calculated even if row checksums are not used. The variable
36   MI_CHECK::glob_crc is calculated over all records.
37   MI_SORT_PARAM::calc_checksum determines if this should be done. This
38   variable is not part of MI_CHECK because it must be set per thread for
39   parallel repair. The global glob_crc must be changed by one thread
40   only. And it is sufficient to calculate the checksum once only.
41 */
42 
43 #include "ma_ftdefs.h"
44 #include "ma_rt_index.h"
45 #include "ma_blockrec.h"
46 #include "trnman.h"
47 #include "ma_key_recover.h"
48 #include <my_check_opt.h>
49 
50 #include <stdarg.h>
51 #include <my_getopt.h>
52 #ifdef HAVE_SYS_VADVISE_H
53 #include <sys/vadvise.h>
54 #endif
55 
56 /* Functions defined in this file */
57 
58 static int check_k_link(HA_CHECK *param, MARIA_HA *info, my_off_t next_link);
59 static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo,
60 		     MARIA_PAGE *page, ha_rows *keys,
61 		     ha_checksum *key_checksum, uint level);
62 static uint isam_key_length(MARIA_HA *info,MARIA_KEYDEF *keyinfo);
63 static ha_checksum calc_checksum(ha_rows count);
64 static int writekeys(MARIA_SORT_PARAM *sort_param);
65 static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
66                           MARIA_KEYDEF *keyinfo,
67 			  my_off_t pagepos, File new_file);
68 static int sort_key_read(MARIA_SORT_PARAM *sort_param, uchar *key);
69 static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, uchar *key);
70 static int sort_get_next_record(MARIA_SORT_PARAM *sort_param);
71 static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,
72                         const void *b);
73 static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
74                                    const uchar *a);
75 static int sort_key_write(MARIA_SORT_PARAM *sort_param, const uchar *a);
76 static my_off_t get_record_for_key(MARIA_KEYDEF *keyinfo, const uchar *key);
77 static int sort_insert_key(MARIA_SORT_PARAM  *sort_param,
78                            reg1 SORT_KEY_BLOCKS *key_block,
79 			   const uchar *key, my_off_t prev_block);
80 static int sort_delete_record(MARIA_SORT_PARAM *sort_param);
81 /*static int _ma_flush_pending_blocks(HA_CHECK *param);*/
82 static SORT_KEY_BLOCKS	*alloc_key_blocks(HA_CHECK *param, uint blocks,
83 					  uint buffer_length);
84 static ha_checksum maria_byte_checksum(const uchar *buf, uint length);
85 static void set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share);
86 static void restore_data_file_type(MARIA_SHARE *share);
87 static void change_data_file_descriptor(MARIA_HA *info, File new_file);
88 static void unuse_data_file_descriptor(MARIA_HA *info);
89 static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info,
90                                       MARIA_HA *info, uchar *record);
91 static void copy_data_file_state(MARIA_STATE_INFO *to,
92                                  MARIA_STATE_INFO *from);
93 static void report_keypage_fault(HA_CHECK *param, MARIA_HA *info,
94                                  my_off_t position);
95 static my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file);
96 static my_bool _ma_flush_table_files_before_swap(HA_CHECK *param,
97                                                  MARIA_HA *info);
98 static TrID max_trid_in_system(void);
99 static void _ma_check_print_not_visible_error(HA_CHECK *param, TrID used_trid);
100 void retry_if_quick(MARIA_SORT_PARAM *param, int error);
101 static void print_bitmap_description(MARIA_SHARE *share,
102                                      pgcache_page_no_t page,
103                                      uchar *buff);
104 
105 
106 /* Initialize check param with default values */
107 
maria_chk_init(HA_CHECK * param)108 void maria_chk_init(HA_CHECK *param)
109 {
110   bzero((uchar*) param,sizeof(*param));
111   param->opt_follow_links=1;
112   param->keys_in_use= ~(ulonglong) 0;
113   param->search_after_block=HA_OFFSET_ERROR;
114   param->auto_increment_value= 0;
115   param->use_buffers= PAGE_BUFFER_INIT;
116   param->read_buffer_length=READ_BUFFER_INIT;
117   param->write_buffer_length=READ_BUFFER_INIT;
118   param->sort_buffer_length=SORT_BUFFER_INIT;
119   param->sort_key_blocks=BUFFERS_WHEN_SORTING;
120   param->tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL;
121   param->myf_rw=MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL);
122   param->start_check_pos=0;
123   param->max_record_length= LONGLONG_MAX;
124   param->pagecache_block_size= KEY_CACHE_BLOCK_SIZE;
125   param->stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL;
126   param->max_stage= 1;
127 }
128 
129 
130 /* Initialize check param and maria handler for check of table */
131 
maria_chk_init_for_check(HA_CHECK * param,MARIA_HA * info)132 void maria_chk_init_for_check(HA_CHECK *param, MARIA_HA *info)
133 {
134   param->not_visible_rows_found= 0;
135   param->max_found_trid= 0;
136 
137   /*
138     Set up transaction handler so that we can see all rows. When rows is read
139     we will check the found id against param->max_tried
140   */
141   if (!info->s->base.born_transactional)
142   {
143     /*
144       There are no trids. Howver we want to set max_trid to make test of
145       create_trid simpler.
146     */
147     param->max_trid= ~(TrID) 0;
148   }
149   else if (param->max_trid == 0 || param->max_trid == ~(TrID) 0)
150   {
151     if (!ma_control_file_inited())
152       param->max_trid= 0;      /* Give warning for first trid found */
153     else
154       param->max_trid= max_trid_in_system();
155   }
156 
157   maria_ignore_trids(info);
158 }
159 
160 
161 	/* Check the status flags for the table */
162 
maria_chk_status(HA_CHECK * param,MARIA_HA * info)163 int maria_chk_status(HA_CHECK *param, MARIA_HA *info)
164 {
165   MARIA_SHARE *share= info->s;
166 
167   if (maria_is_crashed_on_repair(info))
168     _ma_check_print_warning(param,
169 			   "Table is marked as crashed and last repair failed");
170   else if (maria_in_repair(info))
171     _ma_check_print_warning(param,
172                             "Last repair was aborted before finishing");
173   else if (maria_is_crashed(info))
174     _ma_check_print_warning(param,
175 			   "Table is marked as crashed");
176   if (share->state.open_count != (uint) (share->global_changed ? 1 : 0))
177   {
178     /* Don't count this as a real warning, as check can correct this ! */
179     my_bool save=param->warning_printed;
180     _ma_check_print_warning(param,
181 			   share->state.open_count==1 ?
182 			   "%d client is using or hasn't closed the table properly" :
183 			   "%d clients are using or haven't closed the table properly",
184 			   share->state.open_count);
185     /* If this will be fixed by the check, forget the warning */
186     if (param->testflag & T_UPDATE_STATE)
187       param->warning_printed=save;
188   }
189   if (share->state.create_trid > param->max_trid)
190   {
191     param->wrong_trd_printed= 1;       /* Force should run zerofill */
192     _ma_check_print_warning(param,
193                             "Table create_trd (%llu) > current max_transaction id (%llu).  Table needs to be repaired or zerofilled to be usable",
194                             share->state.create_trid, param->max_trid);
195     return 1;
196   }
197   return 0;
198 }
199 
200 /*
201   Check delete links in row data
202 */
203 
maria_chk_del(HA_CHECK * param,register MARIA_HA * info,ulonglong test_flag)204 int maria_chk_del(HA_CHECK *param, register MARIA_HA *info,
205                   ulonglong test_flag)
206 {
207   MARIA_SHARE *share= info->s;
208   reg2 ha_rows i;
209   uint delete_link_length;
210   my_off_t empty,next_link,UNINIT_VAR(old_link);
211   char buff[22],buff2[22];
212   DBUG_ENTER("maria_chk_del");
213 
214   param->record_checksum=0;
215 
216   if (share->data_file_type == BLOCK_RECORD)
217     DBUG_RETURN(0);                             /* No delete links here */
218 
219   delete_link_length=((share->options & HA_OPTION_PACK_RECORD) ? 20 :
220 		      share->rec_reflength+1);
221 
222   if (!(test_flag & T_SILENT))
223     puts("- check record delete-chain");
224 
225   next_link=share->state.dellink;
226   if (share->state.state.del == 0)
227   {
228     if (test_flag & T_VERBOSE)
229     {
230       puts("No recordlinks");
231     }
232   }
233   else
234   {
235     if (test_flag & T_VERBOSE)
236       printf("Recordlinks:    ");
237     empty=0;
238     for (i= share->state.state.del ; i > 0L && next_link != HA_OFFSET_ERROR ; i--)
239     {
240       if (_ma_killed_ptr(param))
241         DBUG_RETURN(1);
242       if (test_flag & T_VERBOSE)
243 	printf(" %9s",llstr(next_link,buff));
244       if (next_link >= share->state.state.data_file_length)
245 	goto wrong;
246       if (mysql_file_pread(info->dfile.file, (uchar*) buff, delete_link_length,
247 		   next_link,MYF(MY_NABP)))
248       {
249 	if (test_flag & T_VERBOSE) puts("");
250 	_ma_check_print_error(param,"Can't read delete-link at filepos: %s",
251                               llstr(next_link,buff));
252 	DBUG_RETURN(1);
253       }
254       if (*buff != '\0')
255       {
256 	if (test_flag & T_VERBOSE) puts("");
257 	_ma_check_print_error(param,"Record at pos: %s is not remove-marked",
258                               llstr(next_link,buff));
259 	goto wrong;
260       }
261       if (share->options & HA_OPTION_PACK_RECORD)
262       {
263 	my_off_t prev_link=mi_sizekorr(buff+12);
264 	if (empty && prev_link != old_link)
265 	{
266 	  if (test_flag & T_VERBOSE) puts("");
267 	  _ma_check_print_error(param,
268                                 "Deleted block at %s doesn't point back at previous delete link",
269                                 llstr(next_link,buff2));
270 	  goto wrong;
271 	}
272 	old_link=next_link;
273 	next_link=mi_sizekorr(buff+4);
274 	empty+=mi_uint3korr(buff+1);
275       }
276       else
277       {
278 	param->record_checksum+=(ha_checksum) next_link;
279 	next_link= _ma_rec_pos(share, (uchar *) buff + 1);
280 	empty+=share->base.pack_reclength;
281       }
282     }
283     if (share->state.state.del && (test_flag & T_VERBOSE))
284       puts("\n");
285     if (empty != share->state.state.empty)
286     {
287       _ma_check_print_warning(param,
288                               "Found %s deleted space in delete link chain. Should be %s",
289                               llstr(empty,buff2),
290                               llstr(share->state.state.empty,buff));
291     }
292     if (next_link != HA_OFFSET_ERROR)
293     {
294       _ma_check_print_error(param,
295                             "Found more than the expected %s deleted rows in delete link chain",
296                             llstr(share->state.state.del, buff));
297       goto wrong;
298     }
299     if (i != 0)
300     {
301       _ma_check_print_error(param,
302                             "Found %s deleted rows in delete link chain. Should be %s",
303                             llstr(share->state.state.del - i, buff2),
304                             llstr(share->state.state.del, buff));
305       goto wrong;
306     }
307   }
308   DBUG_RETURN(0);
309 
310 wrong:
311   param->testflag|=T_RETRY_WITHOUT_QUICK;
312   if (test_flag & T_VERBOSE)
313     puts("");
314   _ma_check_print_error(param,"record delete-link-chain corrupted");
315   DBUG_RETURN(1);
316 } /* maria_chk_del */
317 
318 
319 /* Check delete links in index file */
320 
check_k_link(HA_CHECK * param,register MARIA_HA * info,my_off_t next_link)321 static int check_k_link(HA_CHECK *param, register MARIA_HA *info,
322                         my_off_t next_link)
323 {
324   MARIA_SHARE *share= info->s;
325   uint block_size= share->block_size;
326   ha_rows records;
327   char llbuff[21], llbuff2[21];
328   uchar *buff;
329   DBUG_ENTER("check_k_link");
330 
331   if (next_link == HA_OFFSET_ERROR)
332     DBUG_RETURN(0);                             /* Avoid printing empty line */
333 
334   records= (ha_rows) (share->state.state.key_file_length / block_size);
335   while (next_link != HA_OFFSET_ERROR && records > 0)
336   {
337     if (_ma_killed_ptr(param))
338       DBUG_RETURN(1);
339     if (param->testflag & T_VERBOSE)
340       printf("%16s",llstr(next_link,llbuff));
341 
342     /* Key blocks must lay within the key file length entirely. */
343     if (next_link + block_size > share->state.state.key_file_length)
344     {
345       /* purecov: begin tested */
346       _ma_check_print_error(param, "Invalid key block position: %s  "
347                             "key block size: %u  file_length: %s",
348                             llstr(next_link, llbuff), block_size,
349                             llstr(share->state.state.key_file_length, llbuff2));
350       DBUG_RETURN(1);
351       /* purecov: end */
352     }
353 
354     /* Key blocks must be aligned at block_size */
355     if (next_link & (block_size -1))
356     {
357       /* purecov: begin tested */
358       _ma_check_print_error(param, "Mis-aligned key block: %s  "
359                             "minimum key block length: %u",
360                             llstr(next_link, llbuff),
361                             block_size);
362       DBUG_RETURN(1);
363       /* purecov: end */
364     }
365 
366     DBUG_ASSERT(share->pagecache->block_size == block_size);
367     if (!(buff= pagecache_read(share->pagecache,
368                                &share->kfile,
369                                (pgcache_page_no_t) (next_link / block_size),
370                                DFLT_INIT_HITS,
371                                info->buff, PAGECACHE_READ_UNKNOWN_PAGE,
372                                PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
373     {
374       /* purecov: begin tested */
375       _ma_check_print_error(param, "key cache read error for block: %s",
376                             llstr(next_link,llbuff));
377       DBUG_RETURN(1);
378       /* purecov: end */
379     }
380     if (_ma_get_keynr(info->s, buff) != MARIA_DELETE_KEY_NR)
381       _ma_check_print_error(param, "Page at %s is not delete marked",
382                             llstr(next_link, llbuff));
383 
384     next_link= mi_sizekorr(buff + share->keypage_header);
385     records--;
386     param->key_file_blocks+=block_size;
387   }
388   if (param->testflag & T_VERBOSE)
389   {
390     if (next_link != HA_OFFSET_ERROR)
391       printf("%16s\n",llstr(next_link,llbuff));
392     else
393       puts("");
394   }
395   DBUG_RETURN (next_link != HA_OFFSET_ERROR);
396 } /* check_k_link */
397 
398 
399 	/* Check sizes of files */
400 
maria_chk_size(HA_CHECK * param,register MARIA_HA * info)401 int maria_chk_size(HA_CHECK *param, register MARIA_HA *info)
402 {
403   MARIA_SHARE *share= info->s;
404   int error;
405   register my_off_t skr,size;
406   char buff[22],buff2[22];
407   DBUG_ENTER("maria_chk_size");
408 
409   if (!(param->testflag & T_SILENT))
410     puts("- check file-size");
411 
412   /*
413     The following is needed if called externally (not from maria_chk).
414     To get a correct physical size we need to flush them.
415   */
416   if ((error= _ma_flush_table_files(info,
417                                     MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
418                                     FLUSH_FORCE_WRITE, FLUSH_FORCE_WRITE)))
419     _ma_check_print_error(param, "Failed to flush data or index file");
420 
421   size= mysql_file_seek(share->kfile.file, 0L, MY_SEEK_END, MYF(MY_THREADSAFE));
422   if ((skr=(my_off_t) share->state.state.key_file_length) != size)
423   {
424     /* Don't give error if file generated by maria_pack */
425     if (skr > size && maria_is_any_key_active(share->state.key_map))
426     {
427       error=1;
428       _ma_check_print_error(param,
429 			   "Size of indexfile is: %-8s         Expected: %s",
430 			   llstr(size,buff), llstr(skr,buff2));
431       share->state.state.key_file_length= size;
432     }
433     else if (!(param->testflag & T_VERY_SILENT))
434       _ma_check_print_warning(param,
435 			     "Size of indexfile is: %-8s       Expected: %s",
436 			     llstr(size,buff), llstr(skr,buff2));
437   }
438   if (size > share->base.max_key_file_length)
439   {
440     _ma_check_print_warning(param,
441                             "Size of indexfile is: %-8s which is bigger than max indexfile size: %s",
442                             ullstr(size,buff),
443                             ullstr(share->base.max_key_file_length, buff2));
444   }
445   else if (!(param->testflag & T_VERY_SILENT) &&
446            ! (share->options & HA_OPTION_COMPRESS_RECORD) &&
447            ulonglong2double(share->state.state.key_file_length) >
448            ulonglong2double(share->base.margin_key_file_length)*0.9)
449     _ma_check_print_warning(param,"Keyfile is almost full, %10s of %10s used",
450                             llstr(share->state.state.key_file_length,buff),
451                             llstr(share->base.max_key_file_length,buff));
452 
453   size= mysql_file_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
454   skr=(my_off_t) share->state.state.data_file_length;
455   if (share->options & HA_OPTION_COMPRESS_RECORD)
456     skr+= MEMMAP_EXTRA_MARGIN;
457 #ifdef USE_RELOC
458   if (share->data_file_type == STATIC_RECORD &&
459       skr < (my_off_t) share->base.reloc*share->base.min_pack_length)
460     skr=(my_off_t) share->base.reloc*share->base.min_pack_length;
461 #endif
462   if (skr != size)
463   {
464     share->state.state.data_file_length=size;	/* Skip other errors */
465     if (skr > size && skr != size + MEMMAP_EXTRA_MARGIN)
466     {
467       error=1;
468       _ma_check_print_error(param,"Size of datafile is: %-9s         Expected: %s",
469 		    llstr(size,buff), llstr(skr,buff2));
470       param->testflag|=T_RETRY_WITHOUT_QUICK;
471     }
472     else
473     {
474       _ma_check_print_warning(param,
475                               "Size of datafile is: %-9s       Expected: %s",
476                               llstr(size,buff), llstr(skr,buff2));
477     }
478   }
479   if (size > share->base.max_data_file_length)
480   {
481     _ma_check_print_warning(param,
482                             "Size of datafile is: %-8s which is bigger than max datafile size: %s",
483                             ullstr(size,buff),
484                             ullstr(share->base.max_data_file_length, buff2));
485   } else if (!(param->testflag & T_VERY_SILENT) &&
486              !(share->options & HA_OPTION_COMPRESS_RECORD) &&
487              ulonglong2double(share->state.state.data_file_length) >
488              (ulonglong2double(share->base.max_data_file_length)*0.9))
489     _ma_check_print_warning(param, "Datafile is almost full, %10s of %10s used",
490                             llstr(share->state.state.data_file_length,buff),
491                             llstr(share->base.max_data_file_length,buff2));
492   DBUG_RETURN(error);
493 } /* maria_chk_size */
494 
495 
496 /* Check keys */
497 
maria_chk_key(HA_CHECK * param,register MARIA_HA * info)498 int maria_chk_key(HA_CHECK *param, register MARIA_HA *info)
499 {
500   uint key,found_keys=0,full_text_keys=0,result=0;
501   ha_rows keys;
502   ha_checksum old_record_checksum,init_checksum;
503   my_off_t all_keydata,all_totaldata,key_totlength,length;
504   double  *rec_per_key_part;
505   MARIA_SHARE *share= info->s;
506   MARIA_KEYDEF *keyinfo;
507   char buff[22],buff2[22];
508   MARIA_PAGE page;
509   DBUG_ENTER("maria_chk_key");
510 
511   if (!(param->testflag & T_SILENT))
512     puts("- check key delete-chain");
513 
514   param->key_file_blocks=share->base.keystart;
515   if (check_k_link(param, info, share->state.key_del))
516   {
517     if (param->testflag & T_VERBOSE) puts("");
518     _ma_check_print_error(param,"key delete-link-chain corrupted");
519     DBUG_RETURN(-1);
520   }
521 
522   if (!(param->testflag & T_SILENT))
523     puts("- check index reference");
524 
525   all_keydata=all_totaldata=key_totlength=0;
526   init_checksum=param->record_checksum;
527   old_record_checksum=0;
528   if (share->data_file_type == STATIC_RECORD)
529     old_record_checksum= (calc_checksum(share->state.state.records +
530                                         share->state.state.del-1) *
531                           share->base.pack_reclength);
532   rec_per_key_part= param->new_rec_per_key_part;
533   for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
534        rec_per_key_part+=keyinfo->keysegs, key++, keyinfo++)
535   {
536     param->key_crc[key]=0;
537     if (! maria_is_key_active(share->state.key_map, key))
538     {
539       /* Remember old statistics for key */
540       memcpy((char*) rec_per_key_part,
541 	     (char*) (share->state.rec_per_key_part +
542 		      (uint) (rec_per_key_part - param->new_rec_per_key_part)),
543 	     keyinfo->keysegs*sizeof(*rec_per_key_part));
544       continue;
545     }
546     found_keys++;
547     _ma_report_progress(param, key, share->base.keys);
548 
549     param->record_checksum=init_checksum;
550 
551     bzero((char*) &param->unique_count,sizeof(param->unique_count));
552     bzero((char*) &param->notnull_count,sizeof(param->notnull_count));
553 
554     if ((!(param->testflag & T_SILENT)))
555       printf ("- check data record references index: %d\n",key+1);
556     if (keyinfo->flag & (HA_FULLTEXT | HA_SPATIAL))
557       full_text_keys++;
558     if (share->state.key_root[key] == HA_OFFSET_ERROR)
559     {
560       if (share->state.state.records != 0 && !(keyinfo->flag & HA_FULLTEXT))
561         _ma_check_print_error(param, "Key tree %u is empty", key + 1);
562       goto do_stat;
563     }
564     if (_ma_fetch_keypage(&page, info, keyinfo, share->state.key_root[key],
565                           PAGECACHE_LOCK_LEFT_UNLOCKED, DFLT_INIT_HITS,
566                           info->buff, 0))
567     {
568       report_keypage_fault(param, info, share->state.key_root[key]);
569       if (!(param->testflag & T_INFO))
570 	DBUG_RETURN(-1);
571       result= -1;
572       continue;
573     }
574     param->key_file_blocks+=keyinfo->block_length;
575     keys=0;
576     param->keydata=param->totaldata=0;
577     param->key_blocks=0;
578     param->max_level=0;
579     if (chk_index(param, info,keyinfo, &page, &keys, param->key_crc+key,1))
580       DBUG_RETURN(-1);
581     if (!(keyinfo->flag & (HA_FULLTEXT | HA_SPATIAL | HA_RTREE_INDEX)))
582     {
583       if (keys != share->state.state.records)
584       {
585 	_ma_check_print_error(param,"Found %s keys of %s",llstr(keys,buff),
586 		    llstr(share->state.state.records,buff2));
587 	if (!(param->testflag & T_INFO))
588 	DBUG_RETURN(-1);
589 	result= -1;
590 	continue;
591       }
592       if ((found_keys - full_text_keys == 1 &&
593            !(share->data_file_type == STATIC_RECORD)) ||
594           (param->testflag & T_DONT_CHECK_CHECKSUM))
595 	old_record_checksum= param->record_checksum;
596       else if (old_record_checksum != param->record_checksum)
597       {
598 	if (key)
599 	  _ma_check_print_error(param,
600                                 "Key %u doesn't point at same records as "
601                                 "key 1",
602 		      key+1);
603 	else
604 	  _ma_check_print_error(param,"Key 1 doesn't point at all records");
605 	if (!(param->testflag & T_INFO))
606 	  DBUG_RETURN(-1);
607 	result= -1;
608 	continue;
609       }
610     }
611     if ((uint) share->base.auto_key -1 == key)
612     {
613       /* Check that auto_increment key is bigger than max key value */
614       ulonglong auto_increment;
615       const HA_KEYSEG *keyseg= share->keyinfo[share->base.auto_key-1].seg;
616       info->lastinx=key;
617       _ma_read_key_record(info, info->rec_buff, 0);
618       auto_increment=
619         ma_retrieve_auto_increment(info->rec_buff + keyseg->start,
620                                    keyseg->type);
621       if (auto_increment > share->state.auto_increment)
622       {
623 	_ma_check_print_warning(param, "Auto-increment value: %s is smaller "
624                                 "than max used value: %s",
625                                 llstr(share->state.auto_increment,buff2),
626                                 llstr(auto_increment, buff));
627       }
628       if (param->testflag & T_AUTO_INC)
629       {
630         set_if_bigger(share->state.auto_increment,
631                       auto_increment);
632         set_if_bigger(share->state.auto_increment,
633                       param->auto_increment_value);
634       }
635 
636       /* Check that there isn't a row with auto_increment = 0 in the table */
637       maria_extra(info,HA_EXTRA_KEYREAD,0);
638       bzero(info->lastkey_buff, keyinfo->seg->length);
639       if (!maria_rkey(info, info->rec_buff, key,
640                       info->lastkey_buff,
641                       (key_part_map) 1, HA_READ_KEY_EXACT))
642       {
643 	/* Don't count this as a real warning, as maria_chk can't correct it */
644 	my_bool save=param->warning_printed;
645 	_ma_check_print_warning(param, "Found row where the auto_increment "
646                                 "column has the value 0");
647 	param->warning_printed=save;
648       }
649       maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
650     }
651 
652     length=(my_off_t) isam_key_length(info,keyinfo)*keys + param->key_blocks*2;
653     if (param->testflag & T_INFO && param->totaldata != 0L && keys != 0L)
654       printf("Key: %2d:  Keyblocks used: %3d%%  Packed: %4d%%  Max levels: %2d\n",
655 	     key+1,
656 	     (int) (my_off_t2double(param->keydata)*100.0/my_off_t2double(param->totaldata)),
657 	     (int) ((my_off_t2double(length) - my_off_t2double(param->keydata))*100.0/
658 		    my_off_t2double(length)),
659 	     param->max_level);
660     all_keydata+=param->keydata; all_totaldata+=param->totaldata; key_totlength+=length;
661 
662 do_stat:
663     if (param->testflag & T_STATISTICS)
664       maria_update_key_parts(keyinfo, rec_per_key_part, param->unique_count,
665                        param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
666                        param->notnull_count: NULL,
667                        (ulonglong)share->state.state.records);
668   }
669   if (param->testflag & T_INFO)
670   {
671     if (all_totaldata != 0L && found_keys > 0)
672       printf("Total:    Keyblocks used: %3d%%  Packed: %4d%%\n\n",
673 	     (int) (my_off_t2double(all_keydata)*100.0/
674 		    my_off_t2double(all_totaldata)),
675 	     (int) ((my_off_t2double(key_totlength) -
676 		     my_off_t2double(all_keydata))*100.0/
677 		     my_off_t2double(key_totlength)));
678     else if (all_totaldata != 0L && maria_is_any_key_active(share->state.key_map))
679       puts("");
680   }
681   if (param->key_file_blocks != share->state.state.key_file_length &&
682       share->state.key_map == ~(ulonglong) 0)
683     _ma_check_print_warning(param, "Some data are unreferenced in keyfile");
684   if (found_keys != full_text_keys)
685     param->record_checksum=old_record_checksum-init_checksum;	/* Remove delete links */
686   else
687     param->record_checksum=0;
688   DBUG_RETURN(result);
689 } /* maria_chk_key */
690 
691 
692 
chk_index_down(HA_CHECK * param,MARIA_HA * info,MARIA_KEYDEF * keyinfo,my_off_t page,uchar * buff,ha_rows * keys,ha_checksum * key_checksum,uint level)693 static int chk_index_down(HA_CHECK *param, MARIA_HA *info,
694                           MARIA_KEYDEF *keyinfo,
695                           my_off_t page, uchar *buff, ha_rows *keys,
696                           ha_checksum *key_checksum, uint level)
697 {
698   char llbuff[22],llbuff2[22];
699   MARIA_SHARE *share= info->s;
700   MARIA_PAGE ma_page;
701   DBUG_ENTER("chk_index_down");
702 
703   /* Key blocks must lay within the key file length entirely. */
704   if (page + keyinfo->block_length > share->state.state.key_file_length)
705   {
706     /* purecov: begin tested */
707     /* Give it a chance to fit in the real file size. */
708     my_off_t max_length= mysql_file_seek(info->s->kfile.file, 0L, MY_SEEK_END,
709                                  MYF(MY_THREADSAFE));
710     _ma_check_print_error(param, "Invalid key block position: %s  "
711                           "key block size: %u  file_length: %s",
712                           llstr(page, llbuff), keyinfo->block_length,
713                           llstr(share->state.state.key_file_length, llbuff2));
714     if (page + keyinfo->block_length > max_length)
715       goto err;
716     /* Fix the remembered key file length. */
717     share->state.state.key_file_length= (max_length &
718                                           ~ (my_off_t) (keyinfo->block_length -
719                                                         1));
720     /* purecov: end */
721   }
722 
723   /* Key blocks must be aligned at block length */
724   if (page & (info->s->block_size -1))
725   {
726     /* purecov: begin tested */
727     _ma_check_print_error(param, "Mis-aligned key block: %s  "
728                           "key block length: %u",
729                           llstr(page, llbuff), info->s->block_size);
730     goto err;
731     /* purecov: end */
732   }
733 
734   if (_ma_fetch_keypage(&ma_page, info, keyinfo, page,
735                         PAGECACHE_LOCK_LEFT_UNLOCKED,
736                         DFLT_INIT_HITS, buff, 0))
737   {
738     report_keypage_fault(param, info, page);
739     goto err;
740   }
741   param->key_file_blocks+=keyinfo->block_length;
742   if (chk_index(param, info, keyinfo, &ma_page, keys, key_checksum,level))
743     goto err;
744 
745   DBUG_RETURN(0);
746 
747   /* purecov: begin tested */
748 err:
749   DBUG_RETURN(1);
750   /* purecov: end */
751 }
752 
753 
754 /*
755   "Ignore NULLs" statistics collection method: process first index tuple.
756 
757   SYNOPSIS
758     maria_collect_stats_nonulls_first()
759       keyseg   IN     Array of key part descriptions
760       notnull  INOUT  Array, notnull[i] = (number of {keypart1...keypart_i}
761                                            tuples that don't contain NULLs)
762       key      IN     Key values tuple
763 
764   DESCRIPTION
765     Process the first index tuple - find out which prefix tuples don't
766     contain NULLs, and update the array of notnull counters accordingly.
767 */
768 
769 static
maria_collect_stats_nonulls_first(HA_KEYSEG * keyseg,ulonglong * notnull,const uchar * key)770 void maria_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull,
771                                        const uchar *key)
772 {
773   size_t first_null, kp;
774   first_null= ha_find_null(keyseg, key) - keyseg;
775   /*
776     All prefix tuples that don't include keypart_{first_null} are not-null
777     tuples (and all others aren't), increment counters for them.
778   */
779   for (kp= 0; kp < first_null; kp++)
780     notnull[kp]++;
781 }
782 
783 
784 /*
785   "Ignore NULLs" statistics collection method: process next index tuple.
786 
787   SYNOPSIS
788     maria_collect_stats_nonulls_next()
789       keyseg   IN     Array of key part descriptions
790       notnull  INOUT  Array, notnull[i] = (number of {keypart1...keypart_i}
791                                            tuples that don't contain NULLs)
792       prev_key IN     Previous key values tuple
793       last_key IN     Next key values tuple
794 
795   DESCRIPTION
796     Process the next index tuple:
797     1. Find out which prefix tuples of last_key don't contain NULLs, and
798        update the array of notnull counters accordingly.
799     2. Find the first keypart number where the prev_key and last_key tuples
800        are different(A), or last_key has NULL value(B), and return it, so the
801        caller can count number of unique tuples for each key prefix. We don't
802        need (B) to be counted, and that is compensated back in
803        maria_update_key_parts().
804 
805   RETURN
806     1 + number of first keypart where values differ or last_key tuple has NULL
807 */
808 
809 static
maria_collect_stats_nonulls_next(HA_KEYSEG * keyseg,ulonglong * notnull,const uchar * prev_key,const uchar * last_key)810 int maria_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull,
811                                      const uchar *prev_key,
812                                      const uchar *last_key)
813 {
814   uint diffs[2];
815   size_t first_null_seg, kp;
816   HA_KEYSEG *seg;
817 
818   /*
819      Find the first keypart where values are different or either of them is
820      NULL. We get results in diffs array:
821      diffs[0]= 1 + number of first different keypart
822      diffs[1]=offset: (last_key + diffs[1]) points to first value in
823                       last_key that is NULL or different from corresponding
824                       value in prev_key.
825   */
826   ha_key_cmp(keyseg, prev_key, last_key, USE_WHOLE_KEY,
827              SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diffs);
828   seg= keyseg + diffs[0] - 1;
829 
830   /* Find first NULL in last_key */
831   first_null_seg= ha_find_null(seg, last_key + diffs[1]) - keyseg;
832   for (kp= 0; kp < first_null_seg; kp++)
833     notnull[kp]++;
834 
835   /*
836     Return 1+ number of first key part where values differ. Don't care if
837     these were NULLs and not .... We compensate for that in
838     maria_update_key_parts.
839   */
840   return diffs[0];
841 }
842 
843 
844 /* Check if index is ok */
845 
chk_index(HA_CHECK * param,MARIA_HA * info,MARIA_KEYDEF * keyinfo,MARIA_PAGE * anc_page,ha_rows * keys,ha_checksum * key_checksum,uint level)846 static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo,
847 		     MARIA_PAGE *anc_page, ha_rows *keys,
848 		     ha_checksum *key_checksum, uint level)
849 {
850   int flag;
851   uint comp_flag, page_flag, nod_flag;
852   uchar *temp_buff, *keypos, *old_keypos, *endpos;
853   my_off_t next_page,record;
854   MARIA_SHARE *share= info->s;
855   char llbuff[22];
856   uint diff_pos[2];
857   uchar tmp_key_buff[MARIA_MAX_KEY_BUFF];
858   MARIA_KEY tmp_key;
859   DBUG_ENTER("chk_index");
860   DBUG_DUMP("buff", anc_page->buff, anc_page->size);
861 
862   /* TODO: implement appropriate check for RTree keys */
863   if (keyinfo->flag & (HA_SPATIAL | HA_RTREE_INDEX))
864     DBUG_RETURN(0);
865 
866   if (!(temp_buff=(uchar*) my_alloca((uint) keyinfo->block_length)))
867   {
868     _ma_check_print_error(param,"Not enough memory for keyblock");
869     DBUG_RETURN(-1);
870   }
871 
872   if (keyinfo->flag & HA_NOSAME)
873   {
874     /* Not real duplicates */
875     comp_flag=SEARCH_FIND | SEARCH_UPDATE | SEARCH_INSERT;
876   }
877   else
878     comp_flag=SEARCH_SAME;			/* Keys in positionorder */
879 
880   page_flag=  anc_page->flag;
881   nod_flag=   anc_page->node;
882   old_keypos= anc_page->buff + share->keypage_header;
883   keypos=     old_keypos + nod_flag;
884   endpos=     anc_page->buff + anc_page->size;
885 
886   param->keydata+=   anc_page->size;
887   param->totaldata+= keyinfo->block_length;	/* INFO */
888   param->key_blocks++;
889   if (level > param->max_level)
890     param->max_level=level;
891 
892   if (_ma_get_keynr(share, anc_page->buff) != keyinfo->key_nr)
893     _ma_check_print_error(param, "Page at %s is not marked for index %u",
894                           llstr(anc_page->pos, llbuff),
895                           (uint) keyinfo->key_nr);
896   if ((page_flag & KEYPAGE_FLAG_HAS_TRANSID) &&
897       !share->base.born_transactional)
898   {
899     _ma_check_print_error(param,
900                           "Page at %s is marked with HAS_TRANSID even if "
901                           "table is not transactional",
902                           llstr(anc_page->pos, llbuff));
903   }
904 
905   if (anc_page->size > share->max_index_block_size)
906   {
907     _ma_check_print_error(param,
908                           "Page at %s has impossible (too big) pagelength",
909                           llstr(anc_page->pos, llbuff));
910     goto err;
911   }
912 
913   info->last_key.keyinfo= tmp_key.keyinfo= keyinfo;
914   info->lastinx= ~0;                            /* Safety */
915   tmp_key.data= tmp_key_buff;
916   for ( ;; _ma_copy_key(&info->last_key, &tmp_key))
917   {
918     if (nod_flag)
919     {
920       if (_ma_killed_ptr(param))
921         goto err;
922       next_page= _ma_kpos(nod_flag,keypos);
923       if (chk_index_down(param,info,keyinfo,next_page,
924                          temp_buff,keys,key_checksum,level+1))
925       {
926         DBUG_DUMP("page_data", old_keypos, (uint) (keypos - old_keypos));
927 	goto err;
928       }
929     }
930     old_keypos=keypos;
931     if (keypos >= endpos ||
932 	!(*keyinfo->get_key)(&tmp_key, page_flag, nod_flag, &keypos))
933       break;
934     if (keypos > endpos)
935     {
936       _ma_check_print_error(param,
937                             "Page length and length of keys don't match at "
938                             "page: %s",
939                             llstr(anc_page->pos,llbuff));
940       goto err;
941     }
942     if (share->data_file_type == BLOCK_RECORD &&
943         !(page_flag & KEYPAGE_FLAG_HAS_TRANSID) &&
944         key_has_transid(tmp_key.data + tmp_key.data_length +
945                         share->rec_reflength-1))
946     {
947       _ma_check_print_error(param,
948                             "Found key marked for transid on page that is not "
949                             "marked for transid at: %s",
950                             llstr(anc_page->pos,llbuff));
951       goto err;
952     }
953 
954     if ((*keys)++ &&
955 	(flag=ha_key_cmp(keyinfo->seg, info->last_key.data, tmp_key.data,
956                          tmp_key.data_length + tmp_key.ref_length,
957                          (comp_flag | SEARCH_INSERT | (tmp_key.flag >> 1) |
958                           info->last_key.flag), diff_pos)) >=0)
959     {
960       DBUG_DUMP_KEY("old", &info->last_key);
961       DBUG_DUMP_KEY("new", &tmp_key);
962       DBUG_DUMP("new_in_page", old_keypos, (uint) (keypos-old_keypos));
963 
964       if ((comp_flag & SEARCH_FIND) && flag == 0)
965 	_ma_check_print_error(param,"Found duplicated key at page %s",
966                               llstr(anc_page->pos,llbuff));
967       else
968 	_ma_check_print_error(param,"Key in wrong position at page %s",
969                               llstr(anc_page->pos,llbuff));
970       goto err;
971     }
972 
973     if (param->testflag & T_STATISTICS)
974     {
975       if (*keys != 1L)				/* not first_key */
976       {
977         if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
978           ha_key_cmp(keyinfo->seg, info->last_key.data,
979                      tmp_key.data, tmp_key.data_length,
980                      SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
981                      diff_pos);
982         else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
983         {
984           diff_pos[0]= maria_collect_stats_nonulls_next(keyinfo->seg,
985                                                         param->notnull_count,
986                                                         info->last_key.data,
987                                                         tmp_key.data);
988         }
989 	param->unique_count[diff_pos[0]-1]++;
990       }
991       else
992       {
993         if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
994           maria_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count,
995                                             tmp_key.data);
996       }
997     }
998     (*key_checksum)+= maria_byte_checksum(tmp_key.data, tmp_key.data_length);
999     record= _ma_row_pos_from_key(&tmp_key);
1000 
1001     if (keyinfo->flag & HA_FULLTEXT) /* special handling for ft2 */
1002     {
1003       uint off;
1004       int  subkeys;
1005       get_key_full_length_rdonly(off, tmp_key.data);
1006       subkeys= ft_sintXkorr(tmp_key.data + off);
1007       if (subkeys < 0)
1008       {
1009         ha_rows tmp_keys=0;
1010         share->ft2_keyinfo.key_nr= keyinfo->key_nr;
1011         if (chk_index_down(param,info,&share->ft2_keyinfo,record,
1012                            temp_buff,&tmp_keys,key_checksum,1))
1013           goto err;
1014         if (tmp_keys + subkeys)
1015         {
1016           _ma_check_print_error(param,
1017                                "Number of words in the 2nd level tree "
1018                                "does not match the number in the header. "
1019                                "Parent word in on the page %s, offset %u",
1020                                llstr(anc_page->pos,llbuff),
1021                                 (uint) (old_keypos - anc_page->buff));
1022           goto err;
1023         }
1024         (*keys)+=tmp_keys-1;
1025         continue;
1026       }
1027       /* fall through */
1028     }
1029     if ((share->data_file_type != BLOCK_RECORD &&
1030          share->data_file_type != NO_RECORD &&
1031          record >= share->state.state.data_file_length) ||
1032         (share->data_file_type == BLOCK_RECORD &&
1033          ma_recordpos_to_page(record) * share->base.min_block_length >=
1034          share->state.state.data_file_length) ||
1035         (share->data_file_type == NO_RECORD && record != 0))
1036     {
1037 #ifndef DBUG_OFF
1038       char llbuff2[22], llbuff3[22];
1039 #endif
1040       _ma_check_print_error(param,
1041                             "Found key at page %s that points to record "
1042                             "outside datafile",
1043                             llstr(anc_page->pos,llbuff));
1044       DBUG_PRINT("test",("page: %s  record: %s  filelength: %s",
1045 			 llstr(anc_page->pos,llbuff),llstr(record,llbuff2),
1046 			 llstr(share->state.state.data_file_length,llbuff3)));
1047       DBUG_DUMP_KEY("key", &tmp_key);
1048       DBUG_DUMP("new_in_page", old_keypos, (uint) (keypos-old_keypos));
1049       goto err;
1050     }
1051     param->record_checksum+= (ha_checksum) record;
1052   }
1053   if (keypos != endpos)
1054   {
1055     _ma_check_print_error(param,
1056                           "Keyblock size at page %s is not correct. "
1057                           "Block length: %u  key length: %u",
1058                           llstr(anc_page->pos, llbuff), anc_page->size,
1059                           (uint) (keypos - anc_page->buff));
1060     goto err;
1061   }
1062   my_afree(temp_buff);
1063   DBUG_RETURN(0);
1064  err:
1065   my_afree(temp_buff);
1066   DBUG_RETURN(1);
1067 } /* chk_index */
1068 
1069 
1070 	/* Calculate a checksum of 1+2+3+4...N = N*(N+1)/2 without overflow */
1071 
calc_checksum(ha_rows count)1072 static ha_checksum calc_checksum(ha_rows count)
1073 {
1074   ulonglong sum,a,b;
1075   DBUG_ENTER("calc_checksum");
1076 
1077   sum=0;
1078   a=count; b=count+1;
1079   if (a & 1)
1080     b>>=1;
1081   else
1082     a>>=1;
1083   while (b)
1084   {
1085     if (b & 1)
1086       sum+=a;
1087     a<<=1; b>>=1;
1088   }
1089   DBUG_PRINT("exit",("sum: %lx",(ulong) sum));
1090   DBUG_RETURN((ha_checksum) sum);
1091 } /* calc_checksum */
1092 
1093 
1094 	/* Calc length of key in normal isam */
1095 
isam_key_length(MARIA_HA * info,register MARIA_KEYDEF * keyinfo)1096 static uint isam_key_length(MARIA_HA *info, register MARIA_KEYDEF *keyinfo)
1097 {
1098   uint length;
1099   HA_KEYSEG *keyseg;
1100   DBUG_ENTER("isam_key_length");
1101 
1102   length= info->s->rec_reflength;
1103   for (keyseg=keyinfo->seg ; keyseg->type ; keyseg++)
1104     length+= keyseg->length;
1105 
1106   DBUG_PRINT("exit",("length: %d",length));
1107   DBUG_RETURN(length);
1108 } /* key_length */
1109 
1110 
1111 
record_pos_to_txt(MARIA_HA * info,my_off_t recpos,char * buff)1112 static void record_pos_to_txt(MARIA_HA *info, my_off_t recpos,
1113                               char *buff)
1114 {
1115   if (info->s->data_file_type != BLOCK_RECORD)
1116     llstr(recpos, buff);
1117   else
1118   {
1119     my_off_t page= ma_recordpos_to_page(recpos);
1120     uint row= ma_recordpos_to_dir_entry(recpos);
1121     char *end= longlong10_to_str(page, buff, 10);
1122     *(end++)= ':';
1123     longlong10_to_str(row, end, 10);
1124   }
1125 }
1126 
1127 
1128 /*
1129   Check that keys in records exist in index tree
1130 
1131   SYNOPSIS
1132   check_keys_in_record()
1133   param		Check paramenter
1134   info		Maria handler
1135   extend	Type of check (extended or normal)
1136   start_recpos	Position to row
1137   record	Record buffer
1138 
1139   NOTES
1140     This function also calculates record checksum & number of rows
1141 */
1142 
check_keys_in_record(HA_CHECK * param,MARIA_HA * info,int extend,my_off_t start_recpos,uchar * record)1143 static int check_keys_in_record(HA_CHECK *param, MARIA_HA *info, int extend,
1144                                 my_off_t start_recpos, uchar *record)
1145 {
1146   MARIA_SHARE *share= info->s;
1147   MARIA_KEYDEF *keyinfo;
1148   char llbuff[22+4];
1149   uint keynr;
1150 
1151   param->tmp_record_checksum+= (ha_checksum) start_recpos;
1152   param->records++;
1153   if (param->records % WRITE_COUNT == 0)
1154   {
1155     if (param->testflag & T_WRITE_LOOP)
1156     {
1157       printf("%s\r", llstr(param->records, llbuff));
1158       fflush(stdout);
1159     }
1160     _ma_report_progress(param, param->records, share->state.state.records);
1161   }
1162 
1163   /* Check if keys match the record */
1164   for (keynr=0, keyinfo= share->keyinfo; keynr < share->base.keys;
1165        keynr++, keyinfo++)
1166   {
1167     if (maria_is_key_active(share->state.key_map, keynr))
1168     {
1169       MARIA_KEY key;
1170       if (!(keyinfo->flag & HA_FULLTEXT))
1171       {
1172         (*keyinfo->make_key)(info, &key, keynr, info->lastkey_buff, record,
1173                              start_recpos, 0);
1174         info->last_key.keyinfo= key.keyinfo;
1175         if (extend)
1176         {
1177           /* We don't need to lock the key tree here as we don't allow
1178              concurrent threads when running maria_chk
1179           */
1180           int search_result=
1181 #ifdef HAVE_RTREE_KEYS
1182             (keyinfo->flag & (HA_SPATIAL | HA_RTREE_INDEX)) ?
1183             maria_rtree_find_first(info, &key, MBR_EQUAL | MBR_DATA) :
1184 #endif
1185             _ma_search(info, &key, SEARCH_SAME, share->state.key_root[keynr]);
1186           if (search_result)
1187           {
1188             record_pos_to_txt(info, start_recpos, llbuff);
1189             _ma_check_print_error(param,
1190                                   "Record at: %14s  "
1191                                   "Can't find key for index: %2d",
1192                                   llbuff, keynr+1);
1193             if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1194               return -1;
1195           }
1196         }
1197         else
1198           param->tmp_key_crc[keynr]+=
1199             maria_byte_checksum(key.data, key.data_length);
1200       }
1201     }
1202   }
1203   return 0;
1204 }
1205 
1206 
1207 /*
1208   Functions to loop through all rows and check if they are ok
1209 
1210   NOTES
1211     One function for each record format
1212 
1213   RESULT
1214     0  ok
1215     -1 Interrupted by user
1216     1  Error
1217 */
1218 
check_static_record(HA_CHECK * param,MARIA_HA * info,int extend,uchar * record)1219 static int check_static_record(HA_CHECK *param, MARIA_HA *info, int extend,
1220                                uchar *record)
1221 {
1222   MARIA_SHARE *share= info->s;
1223   my_off_t start_recpos, pos;
1224   char llbuff[22];
1225 
1226   pos= 0;
1227   while (pos < share->state.state.data_file_length)
1228   {
1229     if (_ma_killed_ptr(param))
1230       return -1;
1231     if (my_b_read(&param->read_cache, record,
1232                   share->base.pack_reclength))
1233     {
1234       _ma_check_print_error(param,
1235                             "got error: %d when reading datafile at position: "
1236                             "%s",
1237                             my_errno, llstr(pos, llbuff));
1238       return 1;
1239     }
1240     start_recpos= pos;
1241     pos+= share->base.pack_reclength;
1242     param->splits++;
1243     if (*record == '\0')
1244     {
1245       param->del_blocks++;
1246       param->del_length+= share->base.pack_reclength;
1247       continue;					/* Record removed */
1248     }
1249     param->glob_crc+= _ma_static_checksum(info,record);
1250     param->used+= share->base.pack_reclength;
1251     if (check_keys_in_record(param, info, extend, start_recpos, record))
1252       return 1;
1253   }
1254   return 0;
1255 }
1256 
1257 
check_dynamic_record(HA_CHECK * param,MARIA_HA * info,int extend,uchar * record)1258 static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend,
1259                                 uchar *record)
1260 {
1261   MARIA_BLOCK_INFO block_info;
1262   MARIA_SHARE *share= info->s;
1263   my_off_t UNINIT_VAR(start_recpos), start_block, pos;
1264   uchar *UNINIT_VAR(to);
1265   ulong UNINIT_VAR(left_length);
1266   uint	b_type;
1267   char llbuff[22],llbuff2[22],llbuff3[22];
1268   DBUG_ENTER("check_dynamic_record");
1269 
1270   pos= 0;
1271   while (pos < share->state.state.data_file_length)
1272   {
1273     my_bool got_error= 0;
1274     int flag;
1275     if (_ma_killed_ptr(param))
1276       DBUG_RETURN(-1);
1277 
1278     flag= block_info.second_read=0;
1279     block_info.next_filepos=pos;
1280     do
1281     {
1282       if (_ma_read_cache(info, &param->read_cache, block_info.header,
1283                          (start_block=block_info.next_filepos),
1284                          sizeof(block_info.header),
1285                          (flag ? 0 : READING_NEXT) | READING_HEADER))
1286       {
1287         _ma_check_print_error(param,
1288                               "got error: %d when reading datafile at "
1289                               "position: %s",
1290                               my_errno, llstr(start_block, llbuff));
1291         DBUG_RETURN(1);
1292       }
1293 
1294       if (start_block & (MARIA_DYN_ALIGN_SIZE-1))
1295       {
1296         _ma_check_print_error(param,"Wrong aligned block at %s",
1297                               llstr(start_block,llbuff));
1298         DBUG_RETURN(1);
1299       }
1300       b_type= _ma_get_block_info(info, &block_info,-1,start_block);
1301       if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
1302                     BLOCK_FATAL_ERROR))
1303       {
1304         if (b_type & BLOCK_SYNC_ERROR)
1305         {
1306           if (flag)
1307           {
1308             _ma_check_print_error(param,"Unexpected byte: %d at link: %s",
1309                                   (int) block_info.header[0],
1310                                   llstr(start_block,llbuff));
1311             DBUG_RETURN(1);
1312           }
1313           pos=block_info.filepos+block_info.block_len;
1314           goto next;
1315         }
1316         if (b_type & BLOCK_DELETED)
1317         {
1318           if (block_info.block_len < share->base.min_block_length)
1319           {
1320             _ma_check_print_error(param,
1321                                   "Deleted block with impossible length %lu "
1322                                   "at %s",
1323                                   block_info.block_len,llstr(pos,llbuff));
1324             DBUG_RETURN(1);
1325           }
1326           if ((block_info.next_filepos != HA_OFFSET_ERROR &&
1327                block_info.next_filepos >= share->state.state.data_file_length) ||
1328               (block_info.prev_filepos != HA_OFFSET_ERROR &&
1329                block_info.prev_filepos >= share->state.state.data_file_length))
1330           {
1331             _ma_check_print_error(param,"Delete link points outside datafile "
1332                                   "at %s",
1333                                   llstr(pos,llbuff));
1334             DBUG_RETURN(1);
1335           }
1336           param->del_blocks++;
1337           param->del_length+= block_info.block_len;
1338           param->splits++;
1339           pos= block_info.filepos+block_info.block_len;
1340           goto next;
1341         }
1342         _ma_check_print_error(param,"Wrong bytesec: %d-%d-%d at linkstart: %s",
1343                               block_info.header[0],block_info.header[1],
1344                               block_info.header[2],
1345                               llstr(start_block,llbuff));
1346         DBUG_RETURN(1);
1347       }
1348       if (share->state.state.data_file_length < block_info.filepos+
1349           block_info.block_len)
1350       {
1351         _ma_check_print_error(param,
1352                               "Recordlink that points outside datafile at %s",
1353                               llstr(pos,llbuff));
1354         got_error=1;
1355         break;
1356       }
1357       param->splits++;
1358       if (!flag++)				/* First block */
1359       {
1360         start_recpos=pos;
1361         pos=block_info.filepos+block_info.block_len;
1362         if (block_info.rec_len > (uint) share->base.max_pack_length)
1363         {
1364           my_errno= HA_ERR_WRONG_IN_RECORD;
1365           _ma_check_print_error(param,"Found too long record (%lu) at %s",
1366                                 (ulong) block_info.rec_len,
1367                                 llstr(start_recpos,llbuff));
1368           got_error=1;
1369           break;
1370         }
1371         if (share->base.blobs)
1372         {
1373           if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
1374                                block_info.rec_len +
1375                                share->base.extra_rec_buff_size))
1376 
1377           {
1378             _ma_check_print_error(param,
1379                                   "Not enough memory (%lu) for blob at %s",
1380                                   (ulong) block_info.rec_len,
1381                                   llstr(start_recpos,llbuff));
1382             got_error=1;
1383             break;
1384           }
1385         }
1386         to= info->rec_buff;
1387         left_length= block_info.rec_len;
1388       }
1389       if (left_length < block_info.data_len)
1390       {
1391         _ma_check_print_error(param,"Found too long record (%lu) at %s",
1392                               (ulong) block_info.data_len,
1393                               llstr(start_recpos,llbuff));
1394         got_error=1;
1395         break;
1396       }
1397       if (_ma_read_cache(info, &param->read_cache, to, block_info.filepos,
1398                          (uint) block_info.data_len,
1399                          flag == 1 ? READING_NEXT : 0))
1400       {
1401         _ma_check_print_error(param,
1402                               "got error: %d when reading datafile at "
1403                               "position: %s", my_errno,
1404                               llstr(block_info.filepos, llbuff));
1405 
1406         DBUG_RETURN(1);
1407       }
1408       to+=block_info.data_len;
1409       param->link_used+= block_info.filepos-start_block;
1410       param->used+= block_info.filepos - start_block + block_info.data_len;
1411       param->empty+= block_info.block_len-block_info.data_len;
1412       left_length-= block_info.data_len;
1413       if (left_length)
1414       {
1415         if (b_type & BLOCK_LAST)
1416         {
1417           _ma_check_print_error(param,
1418                                 "Wrong record length %s of %s at %s",
1419                                 llstr(block_info.rec_len-left_length,llbuff),
1420                                 llstr(block_info.rec_len, llbuff2),
1421                                 llstr(start_recpos,llbuff3));
1422           got_error=1;
1423           break;
1424         }
1425         if (share->state.state.data_file_length < block_info.next_filepos)
1426         {
1427           _ma_check_print_error(param,
1428                                 "Found next-recordlink that points outside "
1429                                 "datafile at %s",
1430                                 llstr(block_info.filepos,llbuff));
1431           got_error=1;
1432           break;
1433         }
1434       }
1435     } while (left_length);
1436 
1437     if (! got_error)
1438     {
1439       if (_ma_rec_unpack(info,record,info->rec_buff,block_info.rec_len) ==
1440           MY_FILE_ERROR)
1441       {
1442         _ma_check_print_error(param,"Found wrong record at %s",
1443                               llstr(start_recpos,llbuff));
1444         got_error=1;
1445       }
1446       else
1447       {
1448         ha_checksum checksum= 0;
1449         if (share->calc_checksum)
1450           checksum= (*share->calc_checksum)(info, record);
1451 
1452         if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE))
1453         {
1454           if (_ma_rec_check(info,record, info->rec_buff,block_info.rec_len,
1455                             MY_TEST(share->calc_checksum), checksum))
1456           {
1457             _ma_check_print_error(param,"Found wrong packed record at %s",
1458                                   llstr(start_recpos,llbuff));
1459             got_error= 1;
1460           }
1461         }
1462         param->glob_crc+= checksum;
1463       }
1464 
1465       if (! got_error)
1466       {
1467         if (check_keys_in_record(param, info, extend, start_recpos, record))
1468           DBUG_RETURN(1);
1469       }
1470       else
1471       {
1472         if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1473           DBUG_RETURN(1);
1474       }
1475     }
1476     else if (!flag)
1477       pos= block_info.filepos+block_info.block_len;
1478 next:;
1479   }
1480   DBUG_RETURN(0);
1481 }
1482 
1483 
check_compressed_record(HA_CHECK * param,MARIA_HA * info,int extend,uchar * record)1484 static int check_compressed_record(HA_CHECK *param, MARIA_HA *info, int extend,
1485                                    uchar *record)
1486 {
1487   MARIA_BLOCK_INFO block_info;
1488   MARIA_SHARE *share= info->s;
1489   my_off_t start_recpos, pos;
1490   char llbuff[22];
1491   my_bool got_error= 0;
1492   DBUG_ENTER("check_compressed_record");
1493 
1494   pos= share->pack.header_length;             /* Skip header */
1495   while (pos < share->state.state.data_file_length)
1496   {
1497     if (_ma_killed_ptr(param))
1498       DBUG_RETURN(-1);
1499 
1500     if (_ma_read_cache(info, &param->read_cache, block_info.header, pos,
1501                        share->pack.ref_length, READING_NEXT))
1502     {
1503       _ma_check_print_error(param,
1504                             "got error: %d when reading datafile at position: "
1505                             "%s",
1506                             my_errno, llstr(pos, llbuff));
1507       DBUG_RETURN(1);
1508     }
1509 
1510     start_recpos= pos;
1511     param->splits++;
1512     _ma_pack_get_block_info(info, &info->bit_buff, &block_info,
1513                                  &info->rec_buff, &info->rec_buff_size, -1,
1514                                  start_recpos);
1515     pos=block_info.filepos+block_info.rec_len;
1516     if (block_info.rec_len < (uint) share->min_pack_length ||
1517         block_info.rec_len > (uint) share->max_pack_length)
1518     {
1519       _ma_check_print_error(param,
1520                             "Found block with wrong recordlength: %lu at %s",
1521                             block_info.rec_len, llstr(start_recpos,llbuff));
1522       got_error=1;
1523       goto end;
1524     }
1525     if (_ma_read_cache(info, &param->read_cache, info->rec_buff,
1526                        block_info.filepos, block_info.rec_len, READING_NEXT))
1527     {
1528       _ma_check_print_error(param,
1529                             "got error: %d when reading datafile at position: "
1530                             "%s",
1531                             my_errno, llstr(block_info.filepos, llbuff));
1532       DBUG_RETURN(1);
1533     }
1534     if (_ma_pack_rec_unpack(info, &info->bit_buff, record,
1535                             info->rec_buff, block_info.rec_len))
1536     {
1537       _ma_check_print_error(param,"Found wrong record at %s",
1538                             llstr(start_recpos,llbuff));
1539       got_error=1;
1540       goto end;
1541     }
1542     param->glob_crc+= (*share->calc_checksum)(info,record);
1543     param->link_used+= (block_info.filepos - start_recpos);
1544     param->used+= (pos-start_recpos);
1545 
1546 end:
1547     if (! got_error)
1548     {
1549       if (check_keys_in_record(param, info, extend, start_recpos, record))
1550         DBUG_RETURN(1);
1551     }
1552     else
1553     {
1554       got_error= 0;                             /* Reset for next loop */
1555       if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1556         DBUG_RETURN(1);
1557     }
1558   }
1559   DBUG_RETURN(0);
1560 }
1561 
1562 
1563 /*
1564   Check if layout on head or tail page is ok
1565 
1566   NOTES
1567     This is for rows-in-block format.
1568 */
1569 
check_page_layout(HA_CHECK * param,MARIA_HA * info,my_off_t page_pos,uchar * page,uint row_count,uint head_empty,uint * real_rows_found,uint * free_slots_found)1570 static int check_page_layout(HA_CHECK *param, MARIA_HA *info,
1571                              my_off_t page_pos, uchar *page,
1572                              uint row_count, uint head_empty,
1573                              uint *real_rows_found, uint *free_slots_found)
1574 {
1575   uint empty, last_row_end, row, first_dir_entry, free_entry, block_size;
1576   uint free_entries, prev_free_entry;
1577   uchar *dir_entry;
1578   char llbuff[22];
1579   my_bool error_in_free_list= 0;
1580   DBUG_ENTER("check_page_layout");
1581 
1582   block_size= info->s->block_size;
1583   empty= 0;
1584   last_row_end= PAGE_HEADER_SIZE(info->s);
1585   *real_rows_found= 0;
1586 
1587   /* Check free directory list */
1588   free_entry= (uint) page[DIR_FREE_OFFSET];
1589   free_entries= 0;
1590   prev_free_entry= END_OF_DIR_FREE_LIST;
1591   while (free_entry != END_OF_DIR_FREE_LIST)
1592   {
1593     uchar *dir;
1594     if (free_entry > row_count)
1595     {
1596       _ma_check_print_error(param,
1597                             "Page %9s:  Directory free entry points outside "
1598                             "directory",
1599                             llstr(page_pos, llbuff));
1600       error_in_free_list= 1;
1601       break;
1602     }
1603     dir= dir_entry_pos(page, block_size, free_entry);
1604     if (uint2korr(dir) != 0)
1605     {
1606       _ma_check_print_error(param,
1607                             "Page %9s:  Directory free entry points to "
1608                             "not deleted entry",
1609                             llstr(page_pos, llbuff));
1610       error_in_free_list= 1;
1611       break;
1612     }
1613     if (dir[2] != prev_free_entry)
1614     {
1615       _ma_check_print_error(param,
1616                             "Page %9s:  Directory free list back pointer "
1617                             "points to wrong entry",
1618                             llstr(page_pos, llbuff));
1619       error_in_free_list= 1;
1620       break;
1621     }
1622     prev_free_entry= free_entry;
1623     free_entry= dir[3];
1624     free_entries++;
1625   }
1626   *free_slots_found= free_entries;
1627 
1628   /* Check directry */
1629   dir_entry= page+ block_size - PAGE_SUFFIX_SIZE;
1630   first_dir_entry= (block_size - row_count * DIR_ENTRY_SIZE -
1631                     PAGE_SUFFIX_SIZE);
1632   for (row= 0 ; row < row_count ; row++)
1633   {
1634     uint pos, length;
1635     dir_entry-= DIR_ENTRY_SIZE;
1636     pos= uint2korr(dir_entry);
1637     if (!pos)
1638     {
1639       free_entries--;
1640       if (row == row_count -1)
1641       {
1642         _ma_check_print_error(param,
1643                               "Page %9s:  First entry in directory is 0",
1644                               llstr(page_pos, llbuff));
1645         if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1646           DBUG_RETURN(1);
1647       }
1648       continue;                                 /* Deleted row */
1649     }
1650     (*real_rows_found)++;
1651     length= uint2korr(dir_entry+2);
1652     param->used+= length;
1653     if (pos < last_row_end)
1654     {
1655       _ma_check_print_error(param,
1656                             "Page %9s:  Row %3u overlapps with previous row",
1657                             llstr(page_pos, llbuff), row);
1658       DBUG_RETURN(1);
1659     }
1660     empty+= (pos - last_row_end);
1661     last_row_end= pos + length;
1662     if (last_row_end > first_dir_entry)
1663     {
1664       _ma_check_print_error(param,
1665                             "Page %9s:  Row %3u overlapps with directory",
1666                             llstr(page_pos, llbuff), row);
1667       DBUG_RETURN(1);
1668     }
1669   }
1670   empty+= (first_dir_entry - last_row_end);
1671 
1672   if (empty != head_empty)
1673   {
1674     _ma_check_print_error(param,
1675                           "Page %9s:  Wrong empty size.  Stored: %5u  "
1676                           "Actual: %5u",
1677                           llstr(page_pos, llbuff), head_empty, empty);
1678     param->err_count++;
1679   }
1680   if (free_entries != 0 && !error_in_free_list)
1681   {
1682     _ma_check_print_error(param,
1683                           "Page %9s:  Directory free link don't include "
1684                           "all free entries",
1685                           llstr(page_pos, llbuff));
1686     param->err_count++;
1687   }
1688   DBUG_RETURN(param->err_count &&
1689               (param->err_count >= MAXERR || !(param->testflag & T_VERBOSE)));
1690 }
1691 
1692 
1693 /*
1694   Check all rows on head page
1695 
1696   NOTES
1697     This is for rows-in-block format.
1698 
1699     Before this, we have already called check_page_layout(), so
1700     we know the block is logicaly correct (even if the rows may not be that)
1701 
1702   RETURN
1703    0  ok
1704    1  error
1705 */
1706 
1707 
check_head_page(HA_CHECK * param,MARIA_HA * info,uchar * record,int extend,my_off_t page_pos,uchar * page_buff,uint row_count)1708 static my_bool check_head_page(HA_CHECK *param, MARIA_HA *info, uchar *record,
1709                                int extend, my_off_t page_pos, uchar *page_buff,
1710                                uint row_count)
1711 {
1712   MARIA_SHARE *share= info->s;
1713   uchar *dir_entry;
1714   uint row;
1715   char llbuff[22], llbuff2[22];
1716   ulonglong page= page_pos / share->block_size;
1717   DBUG_ENTER("check_head_page");
1718 
1719   dir_entry= page_buff+ share->block_size - PAGE_SUFFIX_SIZE;
1720   for (row= 0 ; row < row_count ; row++)
1721   {
1722     uint pos, length, flag;
1723     dir_entry-= DIR_ENTRY_SIZE;
1724     pos= uint2korr(dir_entry);
1725     if (!pos)
1726       continue;
1727     length= uint2korr(dir_entry+2);
1728     if (length < share->base.min_block_length)
1729     {
1730       _ma_check_print_error(param,
1731                             "Page %9s:  Row %3u is too short "
1732                             "(%d of min %d bytes)",
1733                             llstr(page, llbuff), row, length,
1734                             (uint) share->base.min_block_length);
1735       DBUG_RETURN(1);
1736     }
1737     flag= (uint) (uchar) page_buff[pos];
1738     if (flag & ~(ROW_FLAG_ALL))
1739       _ma_check_print_error(param,
1740                             "Page %9s: Row %3u has wrong flag: %u",
1741                             llstr(page, llbuff), row, flag);
1742 
1743     DBUG_PRINT("info", ("rowid: %s  page: %lu  row: %u",
1744                         llstr(ma_recordpos(page, row), llbuff),
1745                         (ulong) page, row));
1746     info->cur_row.trid= 0;
1747     if (_ma_read_block_record2(info, record, page_buff+pos,
1748                                page_buff+pos+length))
1749     {
1750       _ma_check_print_error(param,
1751                             "Page %9s:  Row %3d is crashed",
1752                             llstr(page, llbuff), row);
1753       if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1754         DBUG_RETURN(1);
1755       continue;
1756     }
1757     set_if_bigger(param->max_found_trid, info->cur_row.trid);
1758     if (info->cur_row.trid > param->max_trid)
1759       _ma_check_print_not_visible_error(param, info->cur_row.trid);
1760 
1761     if (share->calc_checksum)
1762     {
1763       ha_checksum checksum= (*share->calc_checksum)(info, record);
1764       if (info->cur_row.checksum != (checksum & 255))
1765         _ma_check_print_error(param, "Page %9s:  Row %3d has wrong checksum",
1766                               llstr(page, llbuff), row);
1767       param->glob_crc+= checksum;
1768     }
1769     if (info->cur_row.extents_count)
1770     {
1771       uchar *extents= info->cur_row.extents;
1772       uint i;
1773       /* Check that bitmap has the right marker for the found extents */
1774       for (i= 0 ; i < info->cur_row.extents_count ; i++)
1775       {
1776         pgcache_page_no_t extent_page;
1777         uint page_count, page_type;
1778         extent_page= uint5korr(extents);
1779         page_count=  uint2korr(extents+5) & ~START_EXTENT_BIT;
1780         extents+=    ROW_EXTENT_SIZE;
1781         page_type=   BLOB_PAGE;
1782         if (page_count & TAIL_BIT)
1783         {
1784           page_count= 1;
1785           page_type= TAIL_PAGE;
1786         }
1787         /*
1788           TODO OPTIMIZE:
1789           Check the whole extent with one test and only do the loop if
1790           something is wrong (for exact error reporting)
1791         */
1792         for ( ; page_count--; extent_page++)
1793         {
1794           uint bitmap_pattern;
1795           if (_ma_check_if_right_bitmap_type(info, page_type, extent_page,
1796                                              &bitmap_pattern))
1797           {
1798             _ma_check_print_error(param,
1799                                   "Page %9s:  Row: %3d has an extent with "
1800                                   "wrong information in bitmap:  "
1801                                   "Page: %9s  Page_type: %d  Bitmap: %d",
1802                                   llstr(page, llbuff), row,
1803                                   llstr(extent_page, llbuff2),
1804                                   page_type, bitmap_pattern);
1805             if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1806               DBUG_RETURN(1);
1807           }
1808         }
1809       }
1810     }
1811     param->full_page_count+= info->cur_row.full_page_count;
1812     param->tail_count+= info->cur_row.tail_count;
1813     if (check_keys_in_record(param, info, extend,
1814                              ma_recordpos(page, row), record))
1815       DBUG_RETURN(1);
1816   }
1817   DBUG_RETURN(0);
1818 }
1819 
1820 
1821 /*
1822   Check if rows-in-block data file is consistent
1823 */
1824 
check_block_record(HA_CHECK * param,MARIA_HA * info,int extend,uchar * record)1825 static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
1826                               uchar *record)
1827 {
1828   MARIA_SHARE *share= info->s;
1829   my_off_t pos;
1830   pgcache_page_no_t page;
1831   uchar *page_buff, *bitmap_buff, *data;
1832   char llbuff[22], llbuff2[22];
1833   uint block_size= share->block_size;
1834   ha_rows full_page_count, tail_count;
1835   my_bool UNINIT_VAR(full_dir), now_transactional;
1836   uint offset_page, offset, free_count;
1837 
1838   if (_ma_scan_init_block_record(info))
1839   {
1840     _ma_check_print_error(param, "got error %d when initializing scan",
1841                           my_errno);
1842     return 1;
1843   }
1844 
1845   now_transactional= info->s->now_transactional;
1846   info->s->now_transactional= 0;                /* Don't log changes */
1847 
1848   bitmap_buff= info->scan.bitmap_buff;
1849   page_buff= info->scan.page_buff;
1850   full_page_count= tail_count= 0;
1851   param->full_page_count= param->tail_count= 0;
1852   param->used= param->link_used= 0;
1853   param->splits= share->state.state.data_file_length / block_size;
1854 
1855   for (pos= 0, page= 0;
1856        pos < share->state.state.data_file_length;
1857        pos+= block_size, page++)
1858   {
1859     uint UNINIT_VAR(row_count), real_row_count, UNINIT_VAR(empty_space),
1860          page_type, bitmap_pattern;
1861     uint bitmap_for_page;
1862 
1863     if (_ma_killed_ptr(param))
1864     {
1865       _ma_scan_end_block_record(info);
1866       info->s->now_transactional= now_transactional;
1867       return -1;                                /* Interrupted */
1868     }
1869     if ((page % share->bitmap.pages_covered) == 0)
1870     {
1871       /* Bitmap page */
1872       if (pagecache_read(share->pagecache,
1873                          &info->s->bitmap.file,
1874                          page, 1,
1875                          bitmap_buff,
1876                          PAGECACHE_PLAIN_PAGE,
1877                          PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0)
1878       {
1879         _ma_check_print_error(param,
1880                               "Page %9s:  Got error: %d when reading datafile",
1881                               llstr(page, llbuff), my_errno);
1882         goto err;
1883       }
1884       param->used+= block_size;
1885       param->link_used+= block_size;
1886       if (param->verbose > 2)
1887         print_bitmap_description(share, page, bitmap_buff);
1888       continue;
1889     }
1890     /* Skip pages marked as empty in bitmap */
1891     offset_page= (uint) ((page % share->bitmap.pages_covered) -1) * 3;
1892     offset= offset_page & 7;
1893     data= bitmap_buff + offset_page / 8;
1894     bitmap_pattern= uint2korr(data);
1895     if (!(bitmap_for_page= ((bitmap_pattern >> offset) & 7)))
1896     {
1897       param->empty+= block_size;
1898       param->del_blocks++;
1899       continue;
1900     }
1901 
1902     if (pagecache_read(share->pagecache,
1903                        &info->dfile,
1904                        page, 1,
1905                        page_buff,
1906                        share->page_type,
1907                        PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0)
1908     {
1909       _ma_check_print_error(param,
1910                             "Page %9s:  Got error: %d when reading datafile",
1911                             llstr(page, llbuff), my_errno);
1912       goto err;
1913     }
1914     page_type= page_buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK;
1915     if (page_type == UNALLOCATED_PAGE || page_type >= MAX_PAGE_TYPE)
1916     {
1917       _ma_check_print_error(param,
1918                             "Page: %9s  Found wrong page type %d. Bitmap: %d '%s'",
1919                             llstr(page, llbuff), page_type,
1920                             bitmap_for_page, bits_to_txt[bitmap_for_page]);
1921       if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1922         goto err;
1923       continue;
1924     }
1925     switch ((enum en_page_type) page_type) {
1926     case UNALLOCATED_PAGE:
1927     case MAX_PAGE_TYPE:
1928     default:
1929       DBUG_ASSERT(0);                           /* Impossible */
1930       break;
1931     case HEAD_PAGE:
1932       row_count= page_buff[DIR_COUNT_OFFSET];
1933       empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET);
1934       param->used+= block_size - empty_space;
1935       param->link_used+= (PAGE_HEADER_SIZE(info->s) + PAGE_SUFFIX_SIZE +
1936                           row_count * DIR_ENTRY_SIZE);
1937       if (empty_space < share->bitmap.sizes[3])
1938         param->lost+= empty_space;
1939       if (check_page_layout(param, info, pos, page_buff, row_count,
1940                             empty_space, &real_row_count, &free_count))
1941         goto err;
1942       full_dir= (row_count == MAX_ROWS_PER_PAGE &&
1943                  page_buff[DIR_FREE_OFFSET] == END_OF_DIR_FREE_LIST);
1944       break;
1945     case TAIL_PAGE:
1946       row_count= page_buff[DIR_COUNT_OFFSET];
1947       empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET);
1948       param->used+= block_size - empty_space;
1949       param->link_used+= (PAGE_HEADER_SIZE(info->s) + PAGE_SUFFIX_SIZE +
1950                           row_count * DIR_ENTRY_SIZE);
1951       if (empty_space < share->bitmap.sizes[6])
1952         param->lost+= empty_space;
1953       if (check_page_layout(param, info, pos, page_buff, row_count,
1954                             empty_space, &real_row_count, &free_count))
1955         goto err;
1956       full_dir= (row_count - free_count >= MAX_ROWS_PER_PAGE -
1957                  share->base.blobs);
1958       break;
1959     case BLOB_PAGE:
1960       full_page_count++;
1961       full_dir= 0;
1962       empty_space= block_size;                  /* for error reporting */
1963       param->link_used+= FULL_PAGE_HEADER_SIZE(info->s);
1964       param->used+= block_size;
1965       break;
1966     }
1967     if (_ma_check_bitmap_data(info, page_type,
1968                               full_dir ? 0 : empty_space,
1969                               bitmap_for_page))
1970     {
1971         _ma_check_print_error(param,
1972                               "Page %9s:  Wrong data in bitmap.  Page_type: "
1973                               "%d  full: %d  empty_space: %u  Bitmap-bits: %d "
1974                               "'%s'",
1975                               llstr(page, llbuff), page_type, full_dir,
1976                               empty_space, bitmap_for_page,
1977                               bits_to_txt[bitmap_for_page]);
1978       if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1979         goto err;
1980     }
1981     if ((enum en_page_type) page_type == BLOB_PAGE)
1982       continue;
1983     param->empty+= empty_space;
1984     if ((enum en_page_type) page_type == TAIL_PAGE)
1985     {
1986       tail_count+= real_row_count;
1987       continue;
1988     }
1989     if (check_head_page(param, info, record, extend, pos, page_buff,
1990                         row_count))
1991       goto err;
1992   }
1993 
1994   /* Verify that rest of bitmap is zero */
1995 
1996   if (page % share->bitmap.pages_covered)
1997   {
1998     /* Not at end of bitmap */
1999     uint bitmap_pattern;
2000     uint byte_offset;
2001 
2002     offset_page= (uint) ((page % share->bitmap.pages_covered) -1) * 3;
2003     offset= offset_page & 7;
2004     byte_offset= offset_page / 8;
2005     data= bitmap_buff + byte_offset;
2006     bitmap_pattern= uint2korr(data);
2007     if (byte_offset + 1 == share->bitmap.max_total_size)
2008     {
2009       /* On last byte of bitmap; Remove possible checksum */
2010       bitmap_pattern&= 0xff;
2011     }
2012     if (((bitmap_pattern >> offset)) ||
2013         (byte_offset + 2 < share->bitmap.max_total_size &&
2014          _ma_check_if_zero(data+2, share->bitmap.max_total_size -
2015                            byte_offset - 2)))
2016     {
2017       ulonglong bitmap_page;
2018       bitmap_page= page / share->bitmap.pages_covered;
2019       bitmap_page*= share->bitmap.pages_covered;
2020 
2021       _ma_check_print_error(param,
2022                             "Bitmap at page %s has pages reserved outside of "
2023                             "data file length",
2024                             llstr(bitmap_page, llbuff));
2025       DBUG_EXECUTE("bitmap", _ma_print_bitmap(&share->bitmap, bitmap_buff,
2026                                               bitmap_page););
2027     }
2028   }
2029 
2030   _ma_scan_end_block_record(info);
2031 
2032   if (full_page_count != param->full_page_count)
2033     _ma_check_print_error(param, "Full page count read through records was %s "
2034                           "but we found %s pages while scanning table",
2035                           llstr(param->full_page_count, llbuff),
2036                           llstr(full_page_count, llbuff2));
2037   if (tail_count != param->tail_count)
2038     _ma_check_print_error(param, "Tail count read through records was %s but "
2039                           "we found %s tails while scanning table",
2040                           llstr(param->tail_count, llbuff),
2041                           llstr(tail_count, llbuff2));
2042 
2043   info->s->now_transactional= now_transactional;
2044   return param->error_printed != 0;
2045 
2046 err:
2047   _ma_scan_end_block_record(info);
2048   info->s->now_transactional= now_transactional;
2049   return 1;
2050 }
2051 
2052 
2053 /* Check that record-link is ok */
2054 
maria_chk_data_link(HA_CHECK * param,MARIA_HA * info,my_bool extend)2055 int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, my_bool extend)
2056 {
2057   MARIA_SHARE *share= info->s;
2058   int	error;
2059   uchar *record;
2060   char llbuff[22],llbuff2[22],llbuff3[22];
2061   DBUG_ENTER("maria_chk_data_link");
2062 
2063   if (!(param->testflag & T_SILENT))
2064   {
2065     if (extend)
2066       puts("- check records and index references");
2067     else
2068       puts("- check record links");
2069   }
2070 
2071   if (!(record= (uchar*) my_malloc(share->base.default_rec_buff_size, MYF(0))))
2072   {
2073     _ma_check_print_error(param,"Not enough memory for record");
2074     DBUG_RETURN(-1);
2075   }
2076   param->records= param->del_blocks= 0;
2077   param->used= param->link_used= param->splits= param->del_length= 0;
2078   param->lost= 0;
2079   param->tmp_record_checksum= param->glob_crc= 0;
2080   param->err_count= 0;
2081 
2082   error= 0;
2083   param->empty= share->pack.header_length;
2084 
2085   bzero((char*) param->tmp_key_crc,
2086         share->base.keys * sizeof(param->tmp_key_crc[0]));
2087 
2088   info->in_check_table= 1;       /* Don't assert on checksum errors */
2089 
2090   switch (share->data_file_type) {
2091   case BLOCK_RECORD:
2092     error= check_block_record(param, info, extend, record);
2093     break;
2094   case STATIC_RECORD:
2095     error= check_static_record(param, info, extend, record);
2096     break;
2097   case DYNAMIC_RECORD:
2098     error= check_dynamic_record(param, info, extend, record);
2099     break;
2100   case COMPRESSED_RECORD:
2101     error= check_compressed_record(param, info, extend, record);
2102     break;
2103   case NO_RECORD:
2104     param->records= share->state.state.records;
2105     param->record_checksum= 0;
2106     extend= 1;                                  /* No row checksums */
2107     /* no data, nothing to do */
2108     break;
2109   } /* switch */
2110 
2111   info->in_check_table= 0;
2112 
2113   if (error)
2114     goto err;
2115 
2116   if (param->testflag & T_WRITE_LOOP)
2117   {
2118     fputs("          \r",stdout);
2119     fflush(stdout);
2120   }
2121   if (param->records != share->state.state.records)
2122   {
2123     _ma_check_print_error(param,
2124                           "Record-count is not ok; found %-10s  Should be: %s",
2125                           llstr(param->records,llbuff),
2126                           llstr(share->state.state.records,llbuff2));
2127     error=1;
2128   }
2129   if (param->record_checksum &&
2130 	   param->record_checksum != param->tmp_record_checksum)
2131   {
2132     _ma_check_print_error(param,
2133                           "Key pointers and record positions doesn't match");
2134     error=1;
2135   }
2136   if (param->glob_crc != share->state.state.checksum &&
2137       (share->options &
2138        (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)))
2139   {
2140     _ma_check_print_warning(param,
2141                             "Record checksum is not the same as checksum "
2142                             "stored in the index file");
2143     error=1;
2144   }
2145   if (!extend)
2146   {
2147     uint key;
2148     for (key=0 ; key < share->base.keys;  key++)
2149     {
2150       if (param->tmp_key_crc[key] != param->key_crc[key] &&
2151           !(share->keyinfo[key].flag &
2152             (HA_FULLTEXT | HA_SPATIAL | HA_RTREE_INDEX)))
2153       {
2154 	_ma_check_print_error(param,"Checksum for key: %2d doesn't match "
2155                               "checksum for records",
2156                               key+1);
2157 	error=1;
2158       }
2159     }
2160   }
2161 
2162   if (param->del_length != share->state.state.empty)
2163   {
2164     _ma_check_print_warning(param,
2165                             "Found %s deleted space.   Should be %s",
2166                             llstr(param->del_length,llbuff2),
2167                             llstr(share->state.state.empty,llbuff));
2168   }
2169   /* Skip following checks for BLOCK RECORD as they don't make any sence */
2170   if (share->data_file_type != BLOCK_RECORD)
2171   {
2172     if (param->used + param->empty + param->del_length !=
2173         share->state.state.data_file_length)
2174     {
2175       _ma_check_print_warning(param,
2176                               "Found %s record data and %s unused data and %s "
2177                               "deleted data",
2178                               llstr(param->used, llbuff),
2179                               llstr(param->empty,llbuff2),
2180                               llstr(param->del_length,llbuff3));
2181       _ma_check_print_warning(param,
2182                               "Total %s   Should be: %s",
2183                               llstr((param->used+param->empty +
2184                                      param->del_length), llbuff),
2185                               llstr(share->state.state.data_file_length,
2186                                     llbuff2));
2187     }
2188     if (param->del_blocks != share->state.state.del)
2189     {
2190       _ma_check_print_warning(param,
2191                               "Found %10s deleted blocks.  Should be: %s",
2192                               llstr(param->del_blocks,llbuff),
2193                               llstr(share->state.state.del,llbuff2));
2194     }
2195     if (param->splits != share->state.split)
2196     {
2197       _ma_check_print_warning(param,
2198                               "Found %10s parts.  Should be: %s",
2199                               llstr(param->splits, llbuff),
2200                               llstr(share->state.split,llbuff2));
2201     }
2202   }
2203   if (param->testflag & T_INFO)
2204   {
2205     if (param->warning_printed || param->error_printed)
2206       puts("");
2207     if (param->used != 0 && ! param->error_printed)
2208     {
2209       if (param->records)
2210       {
2211         printf("Records:%18s    M.recordlength:%9lu   Packed:%14.0f%%\n",
2212                llstr(param->records,llbuff),
2213                (long)((param->used - param->link_used)/param->records),
2214                (share->base.blobs ? 0.0 :
2215                 (ulonglong2double((ulonglong) share->base.reclength *
2216                                   param->records)-
2217                  my_off_t2double(param->used))/
2218                 ulonglong2double((ulonglong) share->base.reclength *
2219                                  param->records)*100.0));
2220         printf("Recordspace used:%9.0f%%   Empty space:%12d%%  "
2221                "Blocks/Record: %6.2f\n",
2222                (ulonglong2double(param->used - param->link_used)/
2223                 ulonglong2double(param->used-param->link_used+param->empty) *
2224                 100.0),
2225                (!param->records ? 100 :
2226                 (int) (ulonglong2double(param->del_length+param->empty)/
2227                        my_off_t2double(param->used)*100.0)),
2228                ulonglong2double(param->splits - param->del_blocks) /
2229                param->records);
2230       }
2231       else
2232         printf("Records:%18s\n", "0");
2233     }
2234     printf("Record blocks:%12s    Delete blocks:%10s\n",
2235            llstr(param->splits - param->del_blocks, llbuff),
2236            llstr(param->del_blocks, llbuff2));
2237     printf("Record data:  %12s    Deleted data: %10s\n",
2238            llstr(param->used - param->link_used,llbuff),
2239            llstr(param->del_length, llbuff2));
2240     printf("Empty space:  %12s    Linkdata:     %10s\n",
2241            llstr(param->empty, llbuff),llstr(param->link_used, llbuff2));
2242     if (share->data_file_type == BLOCK_RECORD)
2243     {
2244       printf("Full pages:   %12s    Tail count: %12s\n",
2245              llstr(param->full_page_count, llbuff),
2246              llstr(param->tail_count, llbuff2));
2247       printf("Lost space:   %12s\n", llstr(param->lost, llbuff));
2248       if (param->max_found_trid)
2249       {
2250         printf("Max trans. id: %11s\n",
2251                llstr(param->max_found_trid, llbuff));
2252       }
2253     }
2254   }
2255   my_free(record);
2256   DBUG_RETURN (error);
2257 
2258 err:
2259   my_free(record);
2260   param->testflag|=T_RETRY_WITHOUT_QUICK;
2261   DBUG_RETURN(1);
2262 } /* maria_chk_data_link */
2263 
2264 
2265 /**
2266   Prepares a table for a repair or index sort: flushes pages, records durably
2267   in the table that it is undergoing the operation (if that op crashes, that
2268   info will serve for Recovery and the user).
2269 
2270   If we start overwriting the index file, and crash then, old REDOs will
2271   be tried and fail. To prevent that, we bump skip_redo_lsn, and thus we have
2272   to flush and sync pages so that old REDOs can be skipped.
2273   If this is not a bulk insert, which Recovery can handle gracefully (by
2274   truncating files, see UNDO_BULK_INSERT) we also mark the table
2275   crashed-on-repair, so that user knows it has to re-repair. If bulk insert we
2276   shouldn't mark it crashed-on-repair, because if we did this, the UNDO phase
2277   would skip the table (UNDO_BULK_INSERT would not be applied),
2278   and maria_chk would not improve that.
2279   If this is an OPTIMIZE which merely sorts index, we need to do the same
2280   too: old REDOs should not apply to the new index file.
2281   Only the flush is needed when in maria_chk which is not crash-safe.
2282 
2283   @param  info             table
2284   @param  param            repair parameters
2285   @param  discard_index    if index pages can be thrown away
2286 */
2287 
protect_against_repair_crash(MARIA_HA * info,const HA_CHECK * param,my_bool discard_index)2288 static my_bool protect_against_repair_crash(MARIA_HA *info,
2289                                             const HA_CHECK *param,
2290                                             my_bool discard_index)
2291 {
2292   MARIA_SHARE *share= info->s;
2293 
2294   /*
2295     There are other than recovery-related reasons to do the writes below:
2296     - the physical size of the data file is sometimes used during repair: we
2297     need to flush to have it exact
2298     - we flush the state because maria_open(HA_OPEN_COPY) will want to read
2299     it from disk.
2300   */
2301   if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
2302                             FLUSH_FORCE_WRITE,
2303                             discard_index ? FLUSH_IGNORE_CHANGED :
2304                             FLUSH_FORCE_WRITE) ||
2305       (share->changed &&
2306        _ma_state_info_write(share,
2307                             MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
2308                             MA_STATE_INFO_WRITE_FULL_INFO |
2309                             MA_STATE_INFO_WRITE_LOCK)))
2310     return TRUE;
2311   /* In maria_chk this is not needed: */
2312   if (maria_multi_threaded && share->base.born_transactional)
2313   {
2314     if ((param->testflag & T_NO_CREATE_RENAME_LSN) == 0)
2315     {
2316       /* this can be true only for a transactional table */
2317       maria_mark_in_repair(info);
2318       if (_ma_state_info_write(share,
2319                                MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
2320                                MA_STATE_INFO_WRITE_LOCK))
2321         return TRUE;
2322     }
2323     if (translog_status == TRANSLOG_OK &&
2324         _ma_update_state_lsns(share, translog_get_horizon(),
2325                               share->state.create_trid, FALSE, FALSE))
2326       return TRUE;
2327     if (_ma_sync_table_files(info))
2328       return TRUE;
2329   }
2330   return FALSE;
2331 }
2332 
2333 
2334 /**
2335    @brief Initialize variables for repair
2336 */
2337 
initialize_variables_for_repair(HA_CHECK * param,MARIA_SORT_INFO * sort_info,MARIA_SORT_PARAM * sort_param,MARIA_HA * info,my_bool rep_quick,MARIA_SHARE * org_share)2338 static int initialize_variables_for_repair(HA_CHECK *param,
2339                                            MARIA_SORT_INFO *sort_info,
2340                                            MARIA_SORT_PARAM *sort_param,
2341                                            MARIA_HA *info,
2342                                            my_bool rep_quick,
2343                                            MARIA_SHARE *org_share)
2344 {
2345   MARIA_SHARE *share= info->s;
2346 
2347   /*
2348     We have to clear these variables first, as the cleanup-in-case-of-error
2349     handling may touch these.
2350   */
2351   bzero((char*) sort_info,  sizeof(*sort_info));
2352   bzero((char*) sort_param, sizeof(*sort_param));
2353   bzero(&info->rec_cache, sizeof(info->rec_cache));
2354 
2355   if (share->data_file_type == NO_RECORD)
2356   {
2357     _ma_check_print_error(param,
2358                           "Can't repair tables with record type NO_DATA");
2359     return 1;
2360   }
2361 
2362   /* Make a copy to allow us to restore state and check how state changed */
2363   memcpy(org_share, share, sizeof(*share));
2364 
2365   /* Repair code relies on share->state.state so we have to update it here */
2366   if (share->lock.update_status)
2367     (*share->lock.update_status)(info);
2368 
2369   param->testflag|= T_REP;                     /* for easy checking */
2370   if (share->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
2371     param->testflag|= T_CALC_CHECKSUM;
2372   param->glob_crc= 0;
2373   if (rep_quick)
2374     param->testflag|= T_QUICK;
2375   else
2376     param->testflag&= ~T_QUICK;
2377   param->org_key_map= share->state.key_map;
2378 
2379   /*
2380     Clear check variables set by repair. This is needed to allow one to run
2381     several repair's in a row with same param
2382   */
2383   param->retry_repair= 0;
2384   param->warning_printed= 0;
2385   param->error_printed= 0;
2386   param->wrong_trd_printed= 0;
2387 
2388   sort_param->sort_info= sort_info;
2389   sort_param->fix_datafile= ! rep_quick;
2390   sort_param->calc_checksum= MY_TEST(param->testflag & T_CALC_CHECKSUM);
2391   sort_info->info= sort_info->new_info= info;
2392   sort_info->param= param;
2393   set_data_file_type(sort_info, info->s);
2394   sort_info->org_data_file_type= share->data_file_type;
2395 
2396   info->rec_cache.file= info->dfile.file;
2397   info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
2398 
2399   if (protect_against_repair_crash(info, param,
2400                                    !MY_TEST(param->testflag &
2401                                             T_CREATE_MISSING_KEYS)))
2402     return 1;
2403 
2404   /* calculate max_records */
2405   sort_info->filelength= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
2406   param->max_progress= sort_info->filelength;
2407   if ((param->testflag & T_CREATE_MISSING_KEYS) ||
2408       sort_info->org_data_file_type == COMPRESSED_RECORD)
2409     sort_info->max_records= share->state.state.records;
2410   else
2411   {
2412     ulong rec_length;
2413     rec_length= MY_MAX(share->base.min_pack_length,
2414                     share->base.min_block_length);
2415     sort_info->max_records= (ha_rows) (sort_info->filelength / rec_length);
2416   }
2417 
2418   /* Set up transaction handler so that we can see all rows */
2419   if (param->max_trid == 0)
2420   {
2421     if (!ma_control_file_inited())
2422       param->max_trid= 0;      /* Give warning for first trid found */
2423     else
2424       param->max_trid= max_trid_in_system();
2425   }
2426   maria_ignore_trids(info);
2427   /* Don't write transid's during repair */
2428   maria_versioning(info, 0);
2429   /* remember original number of rows */
2430   *info->state= info->s->state.state;
2431   return 0;
2432 }
2433 
2434 
2435 /*
2436   During initialize_variables_for_repair and related functions we set some
2437   variables to values that makes sence during repair.
2438   This function restores these values to their original values so that we can
2439   use the handler in MariaDB without having to close and open the table.
2440 */
2441 
restore_table_state_after_repair(MARIA_HA * info,MARIA_SHARE * org_share)2442 static void restore_table_state_after_repair(MARIA_HA *info,
2443                                              MARIA_SHARE *org_share)
2444 {
2445   maria_versioning(info, info->s->have_versioning);
2446   info->s->lock_key_trees= org_share->lock_key_trees;
2447   DBUG_ASSERT(!info->s->have_versioning || info->s->lock_key_trees);
2448 }
2449 
2450 
2451 /**
2452   @brief Drop all indexes
2453 
2454   @param[in]    param           check parameters
2455   @param[in]    info            MARIA_HA handle
2456   @param[in]    force           if to force drop all indexes
2457 
2458   @return       status
2459     @retval     0               OK
2460     @retval     != 0            Error
2461 
2462   @note
2463     Once allocated, index blocks remain part of the key file forever.
2464     When indexes are disabled, no block is freed. When enabling indexes,
2465     no block is freed either. The new indexes are create from new
2466     blocks. (Bug #4692)
2467 
2468     Before recreating formerly disabled indexes, the unused blocks
2469     must be freed. There are two options to do this:
2470     - Follow the tree of disabled indexes, add all blocks to the
2471       deleted blocks chain. Would require a lot of random I/O.
2472     - Drop all blocks by clearing all index root pointers and all
2473       delete chain pointers and resetting key_file_length to the end
2474       of the index file header. This requires to recreate all indexes,
2475       even those that may still be intact.
2476     The second method is probably faster in most cases.
2477 
2478     When disabling indexes, MySQL disables either all indexes or all
2479     non-unique indexes. When MySQL [re-]enables disabled indexes
2480     (T_CREATE_MISSING_KEYS), then we either have "lost" blocks in the
2481     index file, or there are no non-unique indexes. In the latter case,
2482     maria_repair*() would not be called as there would be no disabled
2483     indexes.
2484 
2485     If there would be more unique indexes than disabled (non-unique)
2486     indexes, we could do the first method. But this is not implemented
2487     yet. By now we drop and recreate all indexes when repair is called.
2488 
2489     However, there is an exception. Sometimes MySQL disables non-unique
2490     indexes when the table is empty (e.g. when copying a table in
2491     mysql_alter_table()). When enabling the non-unique indexes, they
2492     are still empty. So there is no index block that can be lost. This
2493     optimization is implemented in this function.
2494 
2495     Note that in normal repair (T_CREATE_MISSING_KEYS not set) we
2496     recreate all enabled indexes unconditonally. We do not change the
2497     key_map. Otherwise we invert the key map temporarily (outside of
2498     this function) and recreate the then "seemingly" enabled indexes.
2499     When we cannot use the optimization, and drop all indexes, we
2500     pretend that all indexes were disabled. By the inversion, we will
2501     then recrate all indexes.
2502 */
2503 
maria_drop_all_indexes(HA_CHECK * param,MARIA_HA * info,my_bool force)2504 static int maria_drop_all_indexes(HA_CHECK *param, MARIA_HA *info,
2505                                   my_bool force)
2506 {
2507   MARIA_SHARE *share= info->s;
2508   MARIA_STATE_INFO *state= &share->state;
2509   uint i;
2510   DBUG_ENTER("maria_drop_all_indexes");
2511 
2512   /*
2513     If any of the disabled indexes has a key block assigned, we must
2514     drop and recreate all indexes to avoid losing index blocks.
2515 
2516     If we want to recreate disabled indexes only _and_ all of these
2517     indexes are empty, we don't need to recreate the existing indexes.
2518   */
2519   if (!force && (param->testflag & T_CREATE_MISSING_KEYS))
2520   {
2521     DBUG_PRINT("repair", ("creating missing indexes"));
2522     for (i= 0; i < share->base.keys; i++)
2523     {
2524       DBUG_PRINT("repair", ("index #: %u  key_root:%lld  active: %d",
2525                             i, state->key_root[i],
2526                             maria_is_key_active(state->key_map, i)));
2527       if ((state->key_root[i] != HA_OFFSET_ERROR) &&
2528           !maria_is_key_active(state->key_map, i))
2529       {
2530         /*
2531           This index has at least one key block and it is disabled.
2532           We would lose its block(s) if would just recreate it.
2533           So we need to drop and recreate all indexes.
2534         */
2535         DBUG_PRINT("repair", ("nonempty and disabled: recreate all"));
2536         break;
2537       }
2538     }
2539     if (i >= share->base.keys)
2540       goto end;
2541 
2542     /*
2543       We do now drop all indexes and declare them disabled. With the
2544       T_CREATE_MISSING_KEYS flag, maria_repair*() will recreate all
2545       disabled indexes and enable them.
2546     */
2547     maria_clear_all_keys_active(state->key_map);
2548     DBUG_PRINT("repair", ("declared all indexes disabled"));
2549   }
2550 
2551   /* Clear index root block pointers. */
2552   for (i= 0; i < share->base.keys; i++)
2553     state->key_root[i]= HA_OFFSET_ERROR;
2554 
2555   /* Drop the delete chain. */
2556   share->state.key_del=  HA_OFFSET_ERROR;
2557 
2558   /* Reset index file length to end of index file header. */
2559   share->state.state.key_file_length= share->base.keystart;
2560 
2561 end:
2562   DBUG_RETURN(0);
2563 }
2564 
2565 
2566 /*
2567   Recover old table by reading each record and writing all keys
2568 
2569   NOTES
2570     Save new datafile-name in temp_filename.
2571     We overwrite the index file as we go (writekeys() for example), so if we
2572     crash during this the table is unusable and user (or Recovery in the
2573     future) must repeat the REPAIR/OPTIMIZE operation. We could use a
2574     temporary index file in the future (drawback: more disk space).
2575 
2576   IMPLEMENTATION (for hard repair with block format)
2577    - Create new, unrelated MARIA_HA of the table
2578    - Create new datafile and associate it with new handler
2579    - Reset all statistic information in new handler
2580    - Copy all data to new handler with normal write operations
2581    - Move state of new handler to old handler
2582    - Close new handler
2583    - Close data file in old handler
2584    - Rename old data file to new data file.
2585    - Reopen data file in old handler
2586 */
2587 
maria_repair(HA_CHECK * param,register MARIA_HA * info,char * name,my_bool rep_quick)2588 int maria_repair(HA_CHECK *param, register MARIA_HA *info,
2589                  char *name, my_bool rep_quick)
2590 {
2591   int error, got_error;
2592   ha_rows start_records,new_header_length;
2593   my_off_t del;
2594   File new_file;
2595   MARIA_SHARE *share= info->s;
2596   char llbuff[22],llbuff2[22];
2597   MARIA_SORT_INFO sort_info;
2598   MARIA_SORT_PARAM sort_param;
2599   my_bool block_record, scan_inited= 0, reenable_logging= 0;
2600   enum data_file_type org_data_file_type= share->data_file_type;
2601   myf sync_dir= ((share->now_transactional && !share->temporary) ?
2602                  MY_SYNC_DIR : 0);
2603   MARIA_SHARE backup_share;
2604   DBUG_ENTER("maria_repair");
2605 
2606   got_error= 1;
2607   new_file= -1;
2608   start_records= share->state.state.records;
2609   if (!(param->testflag & T_SILENT))
2610   {
2611     printf("- recovering (with keycache) Aria-table '%s'\n",name);
2612     printf("Data records: %s\n", llstr(start_records, llbuff));
2613   }
2614 
2615   if (initialize_variables_for_repair(param, &sort_info, &sort_param, info,
2616                                       rep_quick, &backup_share))
2617     goto err;
2618 
2619   if ((reenable_logging= share->now_transactional))
2620     _ma_tmp_disable_logging_for_table(info, 0);
2621 
2622   sort_param.current_filepos= sort_param.filepos= new_header_length=
2623     ((param->testflag & T_UNPACK) ? 0L : share->pack.header_length);
2624 
2625   if (!rep_quick)
2626   {
2627     /* Get real path for data file */
2628     if ((new_file= mysql_file_create(key_file_tmp,
2629                                      fn_format(param->temp_filename,
2630                                                share->data_file_name.str, "",
2631                                                DATA_TMP_EXT, 2+4),
2632                                      0,param->tmpfile_createflag,
2633                                      MYF(0))) < 0)
2634     {
2635       _ma_check_print_error(param,"Can't create new tempfile: '%s'",
2636 			   param->temp_filename);
2637       goto err;
2638     }
2639     if (new_header_length &&
2640         maria_filecopy(param, new_file, info->dfile.file, 0L,
2641                        new_header_length, "datafile-header"))
2642       goto err;
2643     share->state.dellink= HA_OFFSET_ERROR;
2644     info->rec_cache.file= new_file;             /* For sort_delete_record */
2645     if (share->data_file_type == BLOCK_RECORD ||
2646         (param->testflag & T_UNPACK))
2647     {
2648       if (create_new_data_handle(&sort_param, new_file))
2649         goto err;
2650       sort_info.new_info->rec_cache.file= new_file;
2651     }
2652   }
2653 
2654   block_record= sort_info.new_info->s->data_file_type == BLOCK_RECORD;
2655 
2656   if (org_data_file_type != BLOCK_RECORD)
2657   {
2658     /* We need a read buffer to read rows in big blocks */
2659     if (init_io_cache(&param->read_cache, info->dfile.file,
2660                       (uint) param->read_buffer_length,
2661                       READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)))
2662       goto err;
2663   }
2664   if (sort_info.new_info->s->data_file_type != BLOCK_RECORD)
2665   {
2666     /* When writing to not block records, we need a write buffer */
2667     if (!rep_quick)
2668     {
2669       if (init_io_cache(&sort_info.new_info->rec_cache, new_file,
2670                         (uint) param->write_buffer_length,
2671                         WRITE_CACHE, new_header_length, 1,
2672                         MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
2673         goto err;
2674       sort_info.new_info->opt_flag|=WRITE_CACHE_USED;
2675     }
2676   }
2677   else if (block_record)
2678   {
2679     scan_inited= 1;
2680     if (maria_scan_init(sort_info.info))
2681       goto err;
2682   }
2683 
2684   if (!(sort_param.record=
2685         (uchar *) my_malloc((uint)
2686                             share->base.default_rec_buff_size, MYF(0))) ||
2687       _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size,
2688                        share->base.default_rec_buff_size))
2689   {
2690     _ma_check_print_error(param, "Not enough memory for extra record");
2691     goto err;
2692   }
2693 
2694   sort_param.read_cache=param->read_cache;
2695   sort_param.pos=sort_param.max_pos=share->pack.header_length;
2696   param->read_cache.end_of_file= sort_info.filelength;
2697   sort_param.master=1;
2698   sort_info.max_records= ~(ha_rows) 0;
2699 
2700   del= share->state.state.del;
2701   share->state.state.records= share->state.state.del= share->state.split= 0;
2702   share->state.state.empty= 0;
2703 
2704   if (param->testflag & T_CREATE_MISSING_KEYS)
2705     maria_set_all_keys_active(share->state.key_map, share->base.keys);
2706   maria_drop_all_indexes(param, info, TRUE);
2707 
2708   maria_lock_memory(param);			/* Everything is alloced */
2709 
2710   sort_param.sort_info->info->in_check_table= 1;
2711   /* Re-create all keys, which are set in key_map. */
2712   while (!(error=sort_get_next_record(&sort_param)))
2713   {
2714     if (block_record && _ma_sort_write_record(&sort_param))
2715       goto err;
2716 
2717     if (writekeys(&sort_param))
2718     {
2719       if (my_errno != HA_ERR_FOUND_DUPP_KEY)
2720 	goto err;
2721       DBUG_DUMP("record", sort_param.record,
2722                 share->base.default_rec_buff_size);
2723       _ma_check_print_warning(param,
2724                               "Duplicate key %2d for record at %10s against "
2725                               "new record at %10s",
2726                               info->errkey+1,
2727                               llstr(sort_param.current_filepos, llbuff),
2728                               llstr(info->dup_key_pos,llbuff2));
2729       if (param->testflag & T_VERBOSE)
2730       {
2731         MARIA_KEY tmp_key;
2732         MARIA_KEYDEF *keyinfo= share->keyinfo + info->errkey;
2733 	(*keyinfo->make_key)(info, &tmp_key, (uint) info->errkey,
2734                              info->lastkey_buff,
2735                              sort_param.record, 0L, 0);
2736         _ma_print_key(stdout, &tmp_key);
2737       }
2738       sort_info.dupp++;
2739       if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
2740       {
2741         param->testflag|=T_RETRY_WITHOUT_QUICK;
2742 	param->error_printed=1;
2743 	goto err;
2744       }
2745       /* purecov: begin tested */
2746       if (block_record)
2747       {
2748         sort_info.new_info->s->state.state.records--;
2749         if ((*sort_info.new_info->s->write_record_abort)(sort_info.new_info))
2750         {
2751           _ma_check_print_error(param,"Couldn't delete duplicate row");
2752           goto err;
2753         }
2754       }
2755       /* purecov: end */
2756       continue;
2757     }
2758     if (!block_record)
2759     {
2760       if (_ma_sort_write_record(&sort_param))
2761         goto err;
2762       /* Filepos is pointer to where next row will be stored */
2763       sort_param.current_filepos= sort_param.filepos;
2764     }
2765   }
2766   if (error > 0 || maria_write_data_suffix(&sort_info, !rep_quick) ||
2767       flush_io_cache(&sort_info.new_info->rec_cache) ||
2768       param->read_cache.error < 0)
2769     goto err;
2770 
2771   if (param->testflag & T_WRITE_LOOP)
2772   {
2773     fputs("          \r",stdout); fflush(stdout);
2774   }
2775   if (mysql_file_chsize(share->kfile.file,
2776                         share->state.state.key_file_length, 0, MYF(0)))
2777   {
2778     _ma_check_print_warning(param,
2779 			   "Can't change size of indexfile, error: %d",
2780 			   my_errno);
2781     goto err;
2782   }
2783 
2784   if (rep_quick && del+sort_info.dupp != share->state.state.del)
2785   {
2786     _ma_check_print_error(param,"Couldn't fix table with quick recovery: "
2787                           "Found wrong number of deleted records");
2788     _ma_check_print_error(param,"Run recovery again without -q");
2789     param->retry_repair=1;
2790     param->testflag|=T_RETRY_WITHOUT_QUICK;
2791     goto err;
2792   }
2793 
2794   if (param->testflag & T_SAFE_REPAIR)
2795   {
2796     /* Don't repair if we loosed more than one row */
2797     if (sort_info.new_info->s->state.state.records+1 < start_records)
2798     {
2799       share->state.state.records= start_records;
2800       goto err;
2801     }
2802   }
2803 
2804   end_io_cache(&sort_info.new_info->rec_cache);
2805   info->opt_flag&= ~WRITE_CACHE_USED;
2806 
2807   /*
2808     As we have read the data file (sort_get_next_record()) we may have
2809     cached, non-changed blocks of it in the page cache. We must throw them
2810     away as we are going to close their descriptor ('new_file'). We also want
2811     to flush any index block, so that it is ready for the upcoming sync.
2812   */
2813   if (_ma_flush_table_files_before_swap(param, info))
2814     goto err;
2815 
2816   if (!rep_quick)
2817   {
2818     sort_info.new_info->s->state.state.data_file_length= sort_param.filepos;
2819     if (sort_info.new_info != sort_info.info)
2820     {
2821       MARIA_STATE_INFO save_state= sort_info.new_info->s->state;
2822       if (maria_close(sort_info.new_info))
2823       {
2824         _ma_check_print_error(param, "Got error %d on close", my_errno);
2825         goto err;
2826       }
2827       copy_data_file_state(&share->state, &save_state);
2828       new_file= -1;
2829       sort_info.new_info= info;
2830     }
2831     share->state.version=(ulong) time((time_t*) 0);	/* Force reopen */
2832 
2833     /* Replace the actual file with the temporary file */
2834     if (new_file >= 0)
2835       mysql_file_close(new_file, MYF(MY_WME));
2836     new_file= -1;
2837     change_data_file_descriptor(info, -1);
2838     if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
2839                                 DATA_TMP_EXT, param->backup_time,
2840                                 (param->testflag & T_BACKUP_DATA ?
2841                                  MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
2842                                 sync_dir) ||
2843         _ma_open_datafile(info, share))
2844     {
2845       goto err;
2846     }
2847   }
2848   else
2849   {
2850     share->state.state.data_file_length= sort_param.max_pos;
2851   }
2852   if (param->testflag & T_CALC_CHECKSUM)
2853     share->state.state.checksum= param->glob_crc;
2854 
2855   if (!(param->testflag & T_SILENT))
2856   {
2857     if (start_records != share->state.state.records)
2858       printf("Data records: %s\n", llstr(share->state.state.records,llbuff));
2859   }
2860   if (sort_info.dupp)
2861     _ma_check_print_warning(param,
2862                             "%s records have been removed",
2863                             llstr(sort_info.dupp,llbuff));
2864 
2865   got_error= 0;
2866   /* If invoked by external program that uses thr_lock */
2867   if (&share->state.state != info->state)
2868     *info->state= *info->state_start= share->state.state;
2869 
2870 err:
2871   if (scan_inited)
2872     maria_scan_end(sort_info.info);
2873   _ma_reset_state(info);
2874 
2875   end_io_cache(&param->read_cache);
2876   if (sort_info.new_info)
2877   {
2878     end_io_cache(&sort_info.new_info->rec_cache);
2879     sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
2880   }
2881   info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
2882 
2883   sort_param.sort_info->info->in_check_table= 0;
2884   /* this below could fail, shouldn't we detect error? */
2885   if (got_error)
2886   {
2887     if (! param->error_printed)
2888       _ma_check_print_error(param,"%d for record at pos %s",my_errno,
2889 		  llstr(sort_param.start_recpos,llbuff));
2890     (void)_ma_flush_table_files_before_swap(param, info);
2891     if (sort_info.new_info && sort_info.new_info != sort_info.info)
2892     {
2893       unuse_data_file_descriptor(sort_info.new_info);
2894       maria_close(sort_info.new_info);
2895     }
2896     if (new_file >= 0)
2897     {
2898       mysql_file_close(new_file,MYF(0));
2899       mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
2900     }
2901     maria_mark_crashed_on_repair(info);
2902   }
2903   /* If caller had disabled logging it's not up to us to re-enable it */
2904   if (reenable_logging)
2905     _ma_reenable_logging_for_table(info, FALSE);
2906   restore_table_state_after_repair(info, &backup_share);
2907 
2908   my_free(sort_param.rec_buff);
2909   my_free(sort_param.record);
2910   my_free(sort_info.buff);
2911   if (!got_error && (param->testflag & T_UNPACK))
2912     restore_data_file_type(share);
2913   share->state.changed|= (STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES |
2914 			  STATE_NOT_ANALYZED | STATE_NOT_ZEROFILLED);
2915   if (!rep_quick)
2916     share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_MOVABLE);
2917   DBUG_RETURN(got_error);
2918 }
2919 
2920 
2921 /* Uppdate keyfile when doing repair */
2922 
writekeys(MARIA_SORT_PARAM * sort_param)2923 static int writekeys(MARIA_SORT_PARAM *sort_param)
2924 {
2925   uint i;
2926   MARIA_HA *info=     sort_param->sort_info->info;
2927   MARIA_SHARE *share= info->s;
2928   uchar *record=    sort_param->record;
2929   uchar *key_buff;
2930   my_off_t filepos=   sort_param->current_filepos;
2931   MARIA_KEY key;
2932   DBUG_ENTER("writekeys");
2933 
2934   key_buff= info->lastkey_buff+share->base.max_key_length;
2935 
2936   for (i=0 ; i < share->base.keys ; i++)
2937   {
2938     if (maria_is_key_active(share->state.key_map, i))
2939     {
2940       if (share->keyinfo[i].flag & HA_FULLTEXT )
2941       {
2942         if (_ma_ft_add(info, i, key_buff, record, filepos))
2943 	  goto err;
2944       }
2945       else
2946       {
2947 	if (!(*share->keyinfo[i].make_key)(info, &key, i, key_buff, record,
2948                                          filepos, 0))
2949           goto err;
2950 	if ((*share->keyinfo[i].ck_insert)(info, &key))
2951 	  goto err;
2952       }
2953     }
2954   }
2955   DBUG_RETURN(0);
2956 
2957  err:
2958   if (my_errno == HA_ERR_FOUND_DUPP_KEY)
2959   {
2960     info->errkey=(int) i;			/* This key was found */
2961     while ( i-- > 0 )
2962     {
2963       if (maria_is_key_active(share->state.key_map, i))
2964       {
2965 	if (share->keyinfo[i].flag & HA_FULLTEXT)
2966         {
2967           if (_ma_ft_del(info,i,key_buff,record,filepos))
2968 	    break;
2969         }
2970         else
2971 	{
2972 	  (*share->keyinfo[i].make_key)(info, &key, i, key_buff, record,
2973                                         filepos, 0);
2974 	  if (_ma_ck_delete(info, &key))
2975 	    break;
2976 	}
2977       }
2978     }
2979   }
2980   /* Remove checksum that was added to glob_crc in sort_get_next_record */
2981   if (sort_param->calc_checksum)
2982     sort_param->sort_info->param->glob_crc-= info->cur_row.checksum;
2983   DBUG_PRINT("error",("errno: %d",my_errno));
2984   DBUG_RETURN(-1);
2985 } /* writekeys */
2986 
2987 
2988 	/* Change all key-pointers that points to a records */
2989 
maria_movepoint(register MARIA_HA * info,uchar * record,MARIA_RECORD_POS oldpos,MARIA_RECORD_POS newpos,uint prot_key)2990 int maria_movepoint(register MARIA_HA *info, uchar *record,
2991                     MARIA_RECORD_POS oldpos, MARIA_RECORD_POS newpos,
2992                     uint prot_key)
2993 {
2994   uint i;
2995   uchar *key_buff;
2996   MARIA_SHARE *share= info->s;
2997   MARIA_PAGE page;
2998   DBUG_ENTER("maria_movepoint");
2999 
3000   key_buff= info->lastkey_buff + share->base.max_key_length;
3001   for (i=0 ; i < share->base.keys; i++)
3002   {
3003     if (i != prot_key && maria_is_key_active(share->state.key_map, i))
3004     {
3005       MARIA_KEY key;
3006       (*share->keyinfo[i].make_key)(info, &key, i, key_buff, record, oldpos,
3007                                     0);
3008       if (key.keyinfo->flag & HA_NOSAME)
3009       {					/* Change pointer direct */
3010 	MARIA_KEYDEF *keyinfo;
3011 	keyinfo=share->keyinfo+i;
3012 	if (_ma_search(info, &key, (uint32) (SEARCH_SAME | SEARCH_SAVE_BUFF),
3013 		       share->state.key_root[i]))
3014 	  DBUG_RETURN(-1);
3015         _ma_page_setup(&page, info, keyinfo, info->last_keypage,
3016                        info->keyread_buff);
3017 
3018 	_ma_dpointer(share, info->int_keypos - page.node -
3019 		     share->rec_reflength,newpos);
3020 
3021 	if (_ma_write_keypage(&page, PAGECACHE_LOCK_LEFT_UNLOCKED,
3022                               DFLT_INIT_HITS))
3023 	  DBUG_RETURN(-1);
3024       }
3025       else
3026       {					/* Change old key to new */
3027 	if (_ma_ck_delete(info, &key))
3028 	  DBUG_RETURN(-1);
3029 	(*share->keyinfo[i].make_key)(info, &key, i, key_buff, record, newpos,
3030                                       0);
3031 	if (_ma_ck_write(info, &key))
3032 	  DBUG_RETURN(-1);
3033       }
3034     }
3035   }
3036   DBUG_RETURN(0);
3037 } /* maria_movepoint */
3038 
3039 
3040 	/* Tell system that we want all memory for our cache */
3041 
maria_lock_memory(HA_CHECK * param)3042 void maria_lock_memory(HA_CHECK *param __attribute__((unused)))
3043 {
3044 #ifdef SUN_OS				/* Key-cacheing thrases on sun 4.1 */
3045   if (param->opt_maria_lock_memory)
3046   {
3047     int success = mlockall(MCL_CURRENT);	/* or plock(DATLOCK); */
3048     if (geteuid() == 0 && success != 0)
3049       _ma_check_print_warning(param,
3050 			     "Failed to lock memory. errno %d",my_errno);
3051   }
3052 #endif
3053 } /* maria_lock_memory */
3054 
3055 
3056 /**
3057    Flush all changed blocks to disk.
3058 
3059    We release blocks as it's unlikely that they would all be needed soon.
3060    This function needs to be called before swapping data or index files or
3061    syncing them.
3062 
3063    @param  param           description of the repair operation
3064    @param  info            table
3065 */
3066 
_ma_flush_table_files_before_swap(HA_CHECK * param,MARIA_HA * info)3067 static my_bool _ma_flush_table_files_before_swap(HA_CHECK *param,
3068                                                  MARIA_HA *info)
3069 {
3070   DBUG_ENTER("_ma_flush_table_files_before_swap");
3071   if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
3072                             FLUSH_RELEASE, FLUSH_RELEASE))
3073   {
3074     _ma_check_print_error(param, "%d when trying to write buffers", my_errno);
3075     DBUG_RETURN(TRUE);
3076   }
3077   DBUG_RETURN(FALSE);
3078 }
3079 
3080 
3081 	/* Sort index for more efficent reads */
3082 
maria_sort_index(HA_CHECK * param,register MARIA_HA * info,char * name)3083 int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, char *name)
3084 {
3085   reg2 uint key;
3086   reg1 MARIA_KEYDEF *keyinfo;
3087   File new_file;
3088   my_off_t index_pos[HA_MAX_POSSIBLE_KEY];
3089   uint r_locks,w_locks;
3090   int old_lock;
3091   MARIA_SHARE *share= info->s;
3092   MARIA_STATE_INFO old_state;
3093   myf sync_dir= ((share->now_transactional && !share->temporary) ?
3094                  MY_SYNC_DIR : 0);
3095   DBUG_ENTER("maria_sort_index");
3096 
3097   /* cannot sort index files with R-tree indexes */
3098   for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
3099        key++,keyinfo++)
3100     if (keyinfo->key_alg == HA_KEY_ALG_RTREE)
3101       DBUG_RETURN(0);
3102 
3103   if (!(param->testflag & T_SILENT))
3104     printf("- Sorting index for Aria-table '%s'\n",name);
3105 
3106   if (protect_against_repair_crash(info, param, FALSE))
3107     DBUG_RETURN(1);
3108 
3109   /* Get real path for index file */
3110   fn_format(param->temp_filename,name,"", MARIA_NAME_IEXT,2+4+32);
3111   if ((new_file=mysql_file_create(key_file_kfile, fn_format(param->temp_filename,param->temp_filename,
3112 				    "", INDEX_TMP_EXT,2+4),
3113                                   0, param->tmpfile_createflag, MYF(0))) < 0)
3114   {
3115     _ma_check_print_error(param,"Can't create new tempfile: '%s'",
3116 			 param->temp_filename);
3117     DBUG_RETURN(-1);
3118   }
3119   if (maria_filecopy(param, new_file, share->kfile.file, 0L,
3120                      (ulong) share->base.keystart, "headerblock"))
3121     goto err;
3122 
3123   param->new_file_pos=share->base.keystart;
3124   for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
3125        key++,keyinfo++)
3126   {
3127     if (maria_is_key_active(share->state.key_map, key) &&
3128         share->state.key_root[key] != HA_OFFSET_ERROR)
3129     {
3130       index_pos[key]=param->new_file_pos;	/* Write first block here */
3131       if (sort_one_index(param,info,keyinfo,share->state.key_root[key],
3132 			 new_file))
3133 	goto err;
3134     }
3135     else
3136       index_pos[key]= HA_OFFSET_ERROR;		/* No blocks */
3137   }
3138 
3139   /* Flush key cache for this file if we are calling this outside maria_chk */
3140   flush_pagecache_blocks(share->pagecache, &share->kfile,
3141                          FLUSH_IGNORE_CHANGED);
3142 
3143   share->state.version=(ulong) time((time_t*) 0);
3144   old_state= share->state;			/* save state if not stored */
3145   r_locks=   share->r_locks;
3146   w_locks=   share->w_locks;
3147   old_lock=  info->lock_type;
3148 
3149 	/* Put same locks as old file */
3150   share->r_locks= share->w_locks= share->tot_locks= 0;
3151   (void) _ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE);
3152   mysql_mutex_lock(&share->intern_lock);
3153   mysql_file_close(share->kfile.file, MYF(MY_WME));
3154   share->kfile.file = -1;
3155   mysql_mutex_unlock(&share->intern_lock);
3156   mysql_file_close(new_file, MYF(MY_WME));
3157   if (maria_change_to_newfile(share->index_file_name.str, MARIA_NAME_IEXT,
3158                               INDEX_TMP_EXT, 0, sync_dir) ||
3159       _ma_open_keyfile(share))
3160     goto err2;
3161   info->lock_type= F_UNLCK;			/* Force maria_readinfo to lock */
3162   _ma_readinfo(info,F_WRLCK,0);			/* Will lock the table */
3163   info->lock_type=  old_lock;
3164   share->r_locks=   r_locks;
3165   share->w_locks=   w_locks;
3166   share->tot_locks= r_locks+w_locks;
3167   share->state=     old_state;			/* Restore old state */
3168 
3169   share->state.state.key_file_length=param->new_file_pos;
3170   info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
3171   for (key=0 ; key < share->base.keys ; key++)
3172     share->state.key_root[key]=index_pos[key];
3173   share->state.key_del=  HA_OFFSET_ERROR;
3174 
3175   share->state.changed&= ~STATE_NOT_SORTED_PAGES;
3176   DBUG_EXECUTE_IF("maria_flush_whole_log",
3177                   {
3178                     DBUG_PRINT("maria_flush_whole_log", ("now"));
3179                     translog_flush(translog_get_horizon());
3180                   });
3181   DBUG_EXECUTE_IF("maria_crash_sort_index",
3182                   {
3183                     DBUG_PRINT("maria_crash_sort_index", ("now"));
3184                     DBUG_SUICIDE();
3185                   });
3186   DBUG_RETURN(0);
3187 
3188 err:
3189   mysql_file_close(new_file, MYF(MY_WME));
3190 err2:
3191   mysql_file_delete(key_file_tmp, param->temp_filename,MYF(MY_WME));
3192   DBUG_RETURN(-1);
3193 } /* maria_sort_index */
3194 
3195 
3196 /**
3197   @brief write a page directly to index file
3198 
3199 */
3200 
write_page(MARIA_SHARE * share,File file,uchar * buff,uint block_size,my_off_t pos,int myf_rw)3201 static int write_page(MARIA_SHARE *share, File file,
3202                       uchar *buff, uint block_size,
3203                       my_off_t pos, int myf_rw)
3204 {
3205   int res;
3206   PAGECACHE_IO_HOOK_ARGS args;
3207   args.page= buff;
3208   args.pageno= (pgcache_page_no_t) (pos / share->block_size);
3209   args.data= (uchar*) share;
3210   args.crypt_buf= NULL;
3211   (* share->kfile.pre_write_hook)(&args);
3212   res= (int)my_pwrite(file, args.page, block_size, pos, myf_rw);
3213   (* share->kfile.post_write_hook)(res, &args);
3214   return res;
3215 }
3216 
3217 
3218 /* Sort index blocks recursive using one index */
3219 
sort_one_index(HA_CHECK * param,MARIA_HA * info,MARIA_KEYDEF * keyinfo,my_off_t pagepos,File new_file)3220 static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
3221                           MARIA_KEYDEF *keyinfo,
3222 			  my_off_t pagepos, File new_file)
3223 {
3224   uint length,nod_flag;
3225   uchar *buff,*keypos,*endpos;
3226   my_off_t new_page_pos,next_page;
3227   MARIA_SHARE *share= info->s;
3228   MARIA_KEY key;
3229   MARIA_PAGE page;
3230   DBUG_ENTER("sort_one_index");
3231 
3232   /* cannot walk over R-tree indices */
3233   DBUG_ASSERT(keyinfo->key_alg != HA_KEY_ALG_RTREE);
3234   new_page_pos=param->new_file_pos;
3235   param->new_file_pos+=keyinfo->block_length;
3236   key.keyinfo= keyinfo;
3237 
3238   if (!(buff= (uchar*) my_alloca((uint) keyinfo->block_length +
3239                                  keyinfo->maxlength +
3240                                  MARIA_INDEX_OVERHEAD_SIZE)))
3241   {
3242     _ma_check_print_error(param,"Not enough memory for key block");
3243     DBUG_RETURN(-1);
3244   }
3245   key.data= buff + keyinfo->block_length;
3246 
3247   if (_ma_fetch_keypage(&page, info, keyinfo, pagepos,
3248                         PAGECACHE_LOCK_LEFT_UNLOCKED,
3249                         DFLT_INIT_HITS, buff, 0))
3250   {
3251     report_keypage_fault(param, info, pagepos);
3252     goto err;
3253   }
3254 
3255   if ((nod_flag= page.node) || keyinfo->flag & HA_FULLTEXT)
3256   {
3257     keypos= page.buff + share->keypage_header + nod_flag;
3258     endpos= page.buff + page.size;
3259 
3260     for ( ;; )
3261     {
3262       if (nod_flag)
3263       {
3264 	next_page= _ma_kpos(nod_flag,keypos);
3265         /* Save new pos */
3266 	_ma_kpointer(info,keypos-nod_flag,param->new_file_pos);
3267 	if (sort_one_index(param,info,keyinfo,next_page,new_file))
3268 	{
3269 	  DBUG_PRINT("error",
3270 		     ("From page: %ld, keyoffset: %lu  used_length: %d",
3271 		      (ulong) pagepos, (ulong) (keypos - buff),
3272 		      (int) page.size));
3273 	  DBUG_DUMP("buff", page.buff, page.size);
3274 	  goto err;
3275 	}
3276       }
3277       if (keypos >= endpos ||
3278 	  !(*keyinfo->get_key)(&key, page.flag, nod_flag, &keypos))
3279 	break;
3280       DBUG_ASSERT(keypos <= endpos);
3281       if (keyinfo->flag & HA_FULLTEXT)
3282       {
3283         uint off;
3284         int  subkeys;
3285         get_key_full_length_rdonly(off, key.data);
3286         subkeys= ft_sintXkorr(key.data + off);
3287         if (subkeys < 0)
3288         {
3289           next_page= _ma_row_pos_from_key(&key);
3290           _ma_dpointer(share, keypos - nod_flag - share->rec_reflength,
3291                        param->new_file_pos); /* Save new pos */
3292           if (sort_one_index(param,info,&share->ft2_keyinfo,
3293                              next_page,new_file))
3294             goto err;
3295         }
3296       }
3297     }
3298   }
3299 
3300   /* Fill block with zero and write it to the new index file */
3301   length= page.size;
3302   bzero(buff+length,keyinfo->block_length-length);
3303   if (write_page(share, new_file, buff, keyinfo->block_length,
3304                  new_page_pos, MYF(MY_NABP | MY_WAIT_IF_FULL)))
3305   {
3306     _ma_check_print_error(param,"Can't write indexblock, error: %d",my_errno);
3307     goto err;
3308   }
3309   my_afree(buff);
3310   DBUG_RETURN(0);
3311 err:
3312   my_afree(buff);
3313   DBUG_RETURN(1);
3314 } /* sort_one_index */
3315 
3316 
3317 /**
3318    @brief Fill empty space in index file with zeroes
3319 
3320    @return
3321    @retval 0  Ok
3322    @retval 1  Error
3323 */
3324 
maria_zerofill_index(HA_CHECK * param,MARIA_HA * info,const char * name)3325 static my_bool maria_zerofill_index(HA_CHECK *param, MARIA_HA *info,
3326                                     const char *name)
3327 {
3328   MARIA_SHARE *share= info->s;
3329   MARIA_PINNED_PAGE page_link;
3330   char llbuff[21];
3331   uchar *buff;
3332   pgcache_page_no_t page;
3333   my_off_t pos;
3334   my_off_t key_file_length= share->state.state.key_file_length;
3335   uint block_size= share->block_size;
3336   my_bool zero_lsn= (share->base.born_transactional &&
3337                      !(param->testflag & T_ZEROFILL_KEEP_LSN));
3338   int error= 1;
3339   DBUG_ENTER("maria_zerofill_index");
3340 
3341   if (!(param->testflag & T_SILENT))
3342     printf("- Zerofilling index for Aria-table '%s'\n",name);
3343 
3344   /* Go through the index file */
3345   for (pos= share->base.keystart, page= (ulonglong) (pos / block_size);
3346        pos < key_file_length;
3347        pos+= block_size, page++)
3348   {
3349     uint length;
3350     if (!(buff= pagecache_read(share->pagecache,
3351                                &share->kfile, page,
3352                                DFLT_INIT_HITS, 0,
3353                                PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
3354                                &page_link.link)))
3355     {
3356       pagecache_unlock_by_link(share->pagecache, page_link.link,
3357                                PAGECACHE_LOCK_WRITE_UNLOCK,
3358                                PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3359                                LSN_IMPOSSIBLE, 0, FALSE);
3360       _ma_check_print_error(param,
3361                             "Page %9s: Got error %d when reading index file",
3362                             llstr(pos, llbuff), my_errno);
3363       goto end;
3364     }
3365     if (zero_lsn)
3366       bzero(buff, LSN_SIZE);
3367 
3368     if (share->base.born_transactional)
3369     {
3370       uint keynr= _ma_get_keynr(share, buff);
3371       if (keynr < share->base.keys)
3372       {
3373         MARIA_PAGE page;
3374         DBUG_ASSERT(keynr < share->base.keys);
3375 
3376         _ma_page_setup(&page, info, share->keyinfo + keynr, pos, buff);
3377         if (_ma_compact_keypage(&page, ~(TrID) 0))
3378         {
3379           _ma_check_print_error(param,
3380                                 "Page %9s: Got error %d when reading index "
3381                                 "file",
3382                                 llstr(pos, llbuff), my_errno);
3383           goto end;
3384         }
3385       }
3386     }
3387 
3388     length= _ma_get_page_used(share, buff);
3389     DBUG_ASSERT(length <= block_size);
3390     if (length < block_size)
3391       bzero(buff + length, block_size - length);
3392     pagecache_unlock_by_link(share->pagecache, page_link.link,
3393                              PAGECACHE_LOCK_WRITE_UNLOCK,
3394                              PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3395                              LSN_IMPOSSIBLE, 1, FALSE);
3396   }
3397   error= 0;                                     /* ok */
3398 
3399 end:
3400   if (flush_pagecache_blocks(share->pagecache, &share->kfile,
3401                              FLUSH_FORCE_WRITE))
3402     DBUG_RETURN(1);
3403   DBUG_RETURN(error);
3404 }
3405 
3406 
3407 /**
3408    @brief Fill empty space in data file with zeroes
3409 
3410    @todo
3411    Zerofill all pages marked in bitmap as empty and change them to
3412    be of type UNALLOCATED_PAGE
3413 
3414    @return
3415    @retval 0  Ok
3416    @retval 1  Error
3417 */
3418 
maria_zerofill_data(HA_CHECK * param,MARIA_HA * info,const char * name)3419 static my_bool maria_zerofill_data(HA_CHECK *param, MARIA_HA *info,
3420                                    const char *name)
3421 {
3422   MARIA_SHARE *share= info->s;
3423   MARIA_PINNED_PAGE page_link;
3424   char llbuff[21];
3425   my_off_t pos;
3426   pgcache_page_no_t page;
3427   uint block_size= share->block_size;
3428   MARIA_FILE_BITMAP *bitmap= &share->bitmap;
3429   my_bool zero_lsn= !(param->testflag & T_ZEROFILL_KEEP_LSN), error;
3430   DBUG_ENTER("maria_zerofill_data");
3431 
3432   /* This works only with BLOCK_RECORD files */
3433   if (share->data_file_type != BLOCK_RECORD)
3434     DBUG_RETURN(0);
3435 
3436   if (!(param->testflag & T_SILENT))
3437     printf("- Zerofilling data  for Aria-table '%s'\n",name);
3438 
3439   /* Go through the record file */
3440   for (page= 1, pos= block_size;
3441        pos < share->state.state.data_file_length;
3442        pos+= block_size, page++)
3443   {
3444     uchar *buff;
3445     enum en_page_type page_type;
3446 
3447     /* Ignore bitmap pages */
3448     if ((page % share->bitmap.pages_covered) == 0)
3449       continue;
3450     if (!(buff= pagecache_read(share->pagecache,
3451                                &info->dfile,
3452                                page, 1, 0,
3453                                PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
3454                                &page_link.link)))
3455     {
3456       _ma_check_print_error(param,
3457                             "Page %9s:  Got error: %d when reading datafile",
3458                             llstr(pos, llbuff), my_errno);
3459       goto err;
3460     }
3461     page_type= (enum en_page_type) (buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK);
3462     switch (page_type) {
3463     case UNALLOCATED_PAGE:
3464       if (zero_lsn)
3465         bzero(buff, block_size);
3466       else
3467         bzero(buff + LSN_SIZE, block_size - LSN_SIZE);
3468       break;
3469     case BLOB_PAGE:
3470       if (_ma_bitmap_get_page_bits(info, bitmap, page) == 0)
3471       {
3472         /* Unallocated page */
3473         if (zero_lsn)
3474           bzero(buff, block_size);
3475         else
3476           bzero(buff + LSN_SIZE, block_size - LSN_SIZE);
3477       }
3478       else
3479         if (zero_lsn)
3480           bzero(buff, LSN_SIZE);
3481       break;
3482     case HEAD_PAGE:
3483     case TAIL_PAGE:
3484     {
3485       uint max_entry= (uint) buff[DIR_COUNT_OFFSET];
3486       uint offset, dir_start, empty_space;
3487       uchar *dir;
3488 
3489       if (zero_lsn)
3490         bzero(buff, LSN_SIZE);
3491       if (max_entry != 0)
3492       {
3493         my_bool is_head_page= (page_type == HEAD_PAGE);
3494         dir= dir_entry_pos(buff, block_size, max_entry - 1);
3495         _ma_compact_block_page(share,
3496                                buff, max_entry -1, 0,
3497                                is_head_page ? ~(TrID) 0 : 0,
3498                                is_head_page ?
3499                                share->base.min_block_length : 0);
3500 
3501         /* compactation may have increased free space */
3502         empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
3503         if (!enough_free_entries_on_page(share, buff))
3504           empty_space= 0;                         /* Page is full */
3505         if (_ma_bitmap_set(info, page, is_head_page,
3506                            empty_space))
3507           goto err;
3508 
3509         /* Zerofill the not used part */
3510         offset= uint2korr(dir) + uint2korr(dir+2);
3511         dir_start= (uint) (dir - buff);
3512         DBUG_ASSERT(dir_start >= offset);
3513         if (dir_start > offset)
3514           bzero(buff + offset, dir_start - offset);
3515       }
3516       break;
3517     }
3518     default:
3519       _ma_check_print_error(param,
3520                             "Page %9s:  Found unrecognizable block of type %d",
3521                             llstr(pos, llbuff), page_type);
3522       goto err;
3523     }
3524     pagecache_unlock_by_link(share->pagecache, page_link.link,
3525                              PAGECACHE_LOCK_WRITE_UNLOCK,
3526                              PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3527                              LSN_IMPOSSIBLE, 1, FALSE);
3528   }
3529   error= _ma_bitmap_flush(share);
3530   if (flush_pagecache_blocks(share->pagecache, &info->dfile,
3531                              FLUSH_FORCE_WRITE))
3532     error= 1;
3533   DBUG_RETURN(error);
3534 
3535 err:
3536   pagecache_unlock_by_link(share->pagecache, page_link.link,
3537                            PAGECACHE_LOCK_WRITE_UNLOCK,
3538                            PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3539                            LSN_IMPOSSIBLE, 0, FALSE);
3540   /* flush what was changed so far */
3541   (void) _ma_bitmap_flush(share);
3542   (void) flush_pagecache_blocks(share->pagecache, &info->dfile,
3543                                 FLUSH_FORCE_WRITE);
3544 
3545   DBUG_RETURN(1);
3546 }
3547 
3548 
3549 /**
3550    @brief Fill empty space in index and data files with zeroes
3551 
3552    @return
3553    @retval 0  Ok
3554    @retval 1  Error
3555 */
3556 
maria_zerofill(HA_CHECK * param,MARIA_HA * info,const char * name)3557 int maria_zerofill(HA_CHECK *param, MARIA_HA *info, const char *name)
3558 {
3559   my_bool error, reenable_logging,
3560     zero_lsn= !(param->testflag & T_ZEROFILL_KEEP_LSN);
3561   MARIA_SHARE *share= info->s;
3562   DBUG_ENTER("maria_zerofill");
3563   if ((reenable_logging= share->now_transactional))
3564     _ma_tmp_disable_logging_for_table(info, 0);
3565   if (!(error= (maria_zerofill_index(param, info, name) ||
3566                 maria_zerofill_data(param, info, name) ||
3567                 _ma_set_uuid(info->s, 0))))
3568   {
3569     /*
3570       Mark that we have done zerofill of data and index. If we zeroed pages'
3571       LSN, table is movable.
3572     */
3573     share->state.changed&= ~STATE_NOT_ZEROFILLED;
3574     if (zero_lsn)
3575     {
3576       share->state.changed&= ~(STATE_NOT_MOVABLE | STATE_MOVED);
3577       /* Table should get new LSNs */
3578       share->state.create_rename_lsn= share->state.is_of_horizon=
3579         share->state.skip_redo_lsn= LSN_NEEDS_NEW_STATE_LSNS;
3580     }
3581     /* Ensure state is later flushed to disk, if within maria_chk */
3582     info->update= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
3583 
3584     /*
3585       Reset create_trid to make file comparable and to ensure that new
3586       trid's in the file starts from 0.
3587     */
3588     share->state.create_trid= 0;
3589   }
3590   if (reenable_logging)
3591     _ma_reenable_logging_for_table(info, FALSE);
3592   DBUG_RETURN(error);
3593 }
3594 
3595 
3596 /*
3597   Let temporary file replace old file.
3598   This assumes that the new file was created in the same
3599   directory as given by realpath(filename).
3600   This will ensure that any symlinks that are used will still work.
3601   Copy stats from old file to new file, deletes orignal and
3602   changes new file name to old file name
3603 */
3604 
maria_change_to_newfile(const char * filename,const char * old_ext,const char * new_ext,time_t backup_time,myf MyFlags)3605 int maria_change_to_newfile(const char * filename, const char * old_ext,
3606                             const char * new_ext, time_t backup_time,
3607                             myf MyFlags)
3608 {
3609   char old_filename[FN_REFLEN],new_filename[FN_REFLEN];
3610   /* Get real path to filename */
3611   (void) fn_format(old_filename,filename,"",old_ext,2+4+32);
3612   return my_redel(old_filename,
3613 		  fn_format(new_filename,old_filename,"",new_ext,2+4),
3614                   backup_time,
3615 		  MYF(MY_WME | MY_LINK_WARNING | MyFlags));
3616 } /* maria_change_to_newfile */
3617 
3618 
3619 /* Copy a block between two files */
3620 
maria_filecopy(HA_CHECK * param,File to,File from,my_off_t start,my_off_t length,const char * type)3621 int maria_filecopy(HA_CHECK *param, File to,File from,my_off_t start,
3622                    my_off_t length, const char *type)
3623 {
3624   uchar tmp_buff[IO_SIZE], *buff;
3625   ulong buff_length;
3626   DBUG_ENTER("maria_filecopy");
3627 
3628   buff_length=(ulong) MY_MIN(param->write_buffer_length,length);
3629   if (!(buff=my_malloc(buff_length,MYF(0))))
3630   {
3631     buff=tmp_buff; buff_length=IO_SIZE;
3632   }
3633 
3634   mysql_file_seek(from, start, MY_SEEK_SET,MYF(0));
3635   while (length > buff_length)
3636   {
3637     if (mysql_file_read(from, buff, buff_length, MYF(MY_NABP)) ||
3638 	mysql_file_write(to,  buff, buff_length, param->myf_rw))
3639       goto err;
3640     length-= buff_length;
3641   }
3642   if (mysql_file_read(from, buff, (size_t) length,MYF(MY_NABP)) ||
3643       mysql_file_write(to,  buff, (size_t) length,param->myf_rw))
3644     goto err;
3645   if (buff != tmp_buff)
3646     my_free(buff);
3647   DBUG_RETURN(0);
3648 err:
3649   if (buff != tmp_buff)
3650     my_free(buff);
3651   _ma_check_print_error(param,"Can't copy %s to tempfile, error %d",
3652 		       type,my_errno);
3653   DBUG_RETURN(1);
3654 }
3655 
3656 
3657 /*
3658   Repair table or given index using sorting
3659 
3660   SYNOPSIS
3661     maria_repair_by_sort()
3662     param		Repair parameters
3663     info		MARIA handler to repair
3664     name		Name of table (for warnings)
3665     rep_quick		set to <> 0 if we should not change data file
3666 
3667   RESULT
3668     0	ok
3669     <>0	Error
3670 */
3671 
maria_repair_by_sort(HA_CHECK * param,register MARIA_HA * info,const char * name,my_bool rep_quick)3672 int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
3673                          const char * name, my_bool rep_quick)
3674 {
3675   int got_error;
3676   uint i, keys_to_repair;
3677   ha_rows start_records;
3678   my_off_t new_header_length, org_header_length, del;
3679   File new_file;
3680   MARIA_SORT_PARAM sort_param;
3681   MARIA_SHARE *share= info->s;
3682   HA_KEYSEG *keyseg;
3683   double  *rec_per_key_part;
3684   char llbuff[22];
3685   MARIA_SORT_INFO sort_info;
3686   ulonglong UNINIT_VAR(key_map);
3687   myf sync_dir= ((share->now_transactional && !share->temporary) ?
3688                  MY_SYNC_DIR : 0);
3689   my_bool scan_inited= 0, reenable_logging= 0;
3690   MARIA_SHARE backup_share;
3691   DBUG_ENTER("maria_repair_by_sort");
3692 
3693   got_error= 1;
3694   new_file= -1;
3695   start_records= share->state.state.records;
3696   if (!(param->testflag & T_SILENT))
3697   {
3698     printf("- recovering (with sort) Aria-table '%s'\n",name);
3699     printf("Data records: %s\n", llstr(start_records,llbuff));
3700   }
3701 
3702   if (initialize_variables_for_repair(param, &sort_info, &sort_param, info,
3703                                       rep_quick, &backup_share))
3704     goto err;
3705 
3706   if ((reenable_logging= share->now_transactional))
3707     _ma_tmp_disable_logging_for_table(info, 0);
3708 
3709   org_header_length= share->pack.header_length;
3710   new_header_length= (param->testflag & T_UNPACK) ? 0 : org_header_length;
3711   sort_param.filepos= new_header_length;
3712 
3713   if (!rep_quick)
3714   {
3715     /* Get real path for data file */
3716     if ((new_file=mysql_file_create(key_file_tmp,
3717                                     fn_format(param->temp_filename,
3718                                               share->data_file_name.str, "",
3719                                               DATA_TMP_EXT, 2+4),
3720                                     0,param->tmpfile_createflag,
3721                                     MYF(0))) < 0)
3722     {
3723       _ma_check_print_error(param,"Can't create new tempfile: '%s'",
3724 			   param->temp_filename);
3725       goto err;
3726     }
3727     if (new_header_length &&
3728         maria_filecopy(param, new_file, info->dfile.file, 0L,
3729                        new_header_length, "datafile-header"))
3730       goto err;
3731 
3732     share->state.dellink= HA_OFFSET_ERROR;
3733     info->rec_cache.file= new_file;             /* For sort_delete_record */
3734     if (share->data_file_type == BLOCK_RECORD ||
3735         (param->testflag & T_UNPACK))
3736     {
3737       if (create_new_data_handle(&sort_param, new_file))
3738         goto err;
3739       sort_info.new_info->rec_cache.file= new_file;
3740     }
3741   }
3742 
3743   if (!(sort_info.key_block=
3744 	alloc_key_blocks(param,
3745 			 (uint) param->sort_key_blocks,
3746 			 share->base.max_key_block_length)))
3747     goto err;
3748   sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;
3749 
3750   if (share->data_file_type != BLOCK_RECORD)
3751   {
3752     /* We need a read buffer to read rows in big blocks */
3753     if (init_io_cache(&param->read_cache, info->dfile.file,
3754                       (uint) param->read_buffer_length,
3755                       READ_CACHE, org_header_length, 1, MYF(MY_WME)))
3756       goto err;
3757   }
3758   if (sort_info.new_info->s->data_file_type != BLOCK_RECORD)
3759   {
3760     /* When writing to not block records, we need a write buffer */
3761     if (!rep_quick)
3762     {
3763       if (init_io_cache(&sort_info.new_info->rec_cache, new_file,
3764                         (uint) param->write_buffer_length,
3765                         WRITE_CACHE, new_header_length, 1,
3766                         MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
3767         goto err;
3768       sort_info.new_info->opt_flag|= WRITE_CACHE_USED;
3769     }
3770   }
3771 
3772   if (!(sort_param.record=
3773         (uchar*) my_malloc((size_t) share->base.default_rec_buff_size,
3774                            MYF(0))) ||
3775       _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size,
3776                        share->base.default_rec_buff_size))
3777   {
3778     _ma_check_print_error(param, "Not enough memory for extra record");
3779     goto err;
3780   }
3781 
3782   /* Optionally drop indexes and optionally modify the key_map */
3783   maria_drop_all_indexes(param, info, FALSE);
3784   key_map= share->state.key_map;
3785   if (param->testflag & T_CREATE_MISSING_KEYS)
3786   {
3787     /* Invert the copied key_map to recreate all disabled indexes. */
3788     key_map= ~key_map;
3789   }
3790 
3791   param->read_cache.end_of_file= sort_info.filelength;
3792   sort_param.wordlist=NULL;
3793   init_alloc_root(&sort_param.wordroot, "sort", FTPARSER_MEMROOT_ALLOC_SIZE, 0,
3794                   MYF(param->malloc_flags));
3795 
3796   sort_param.key_cmp=sort_key_cmp;
3797   sort_param.lock_in_memory=maria_lock_memory;
3798   sort_param.tmpdir=param->tmpdir;
3799   sort_param.master =1;
3800 
3801   del=share->state.state.del;
3802 
3803   /* Calculate number of keys to repair */
3804   keys_to_repair= 0;
3805   for (sort_param.key=0 ; sort_param.key < share->base.keys ;
3806        sort_param.key++)
3807   {
3808     if (maria_is_key_active(key_map, sort_param.key))
3809       keys_to_repair++;
3810   }
3811   /* For each key we scan and merge sort the keys */
3812   param->max_stage= keys_to_repair*2;
3813 
3814   rec_per_key_part= param->new_rec_per_key_part;
3815   for (sort_param.key=0 ; sort_param.key < share->base.keys ;
3816        rec_per_key_part+=sort_param.keyinfo->keysegs, sort_param.key++)
3817   {
3818     sort_param.keyinfo=share->keyinfo+sort_param.key;
3819     /*
3820       Skip this index if it is marked disabled in the copied
3821       (and possibly inverted) key_map.
3822     */
3823     if (! maria_is_key_active(key_map, sort_param.key))
3824     {
3825       /* Remember old statistics for key */
3826       memcpy((char*) rec_per_key_part,
3827 	     (char*) (share->state.rec_per_key_part +
3828 		      (uint) (rec_per_key_part - param->new_rec_per_key_part)),
3829 	     sort_param.keyinfo->keysegs*sizeof(*rec_per_key_part));
3830       DBUG_PRINT("repair", ("skipping seemingly disabled index #: %u",
3831                             sort_param.key));
3832       continue;
3833     }
3834 
3835     if ((!(param->testflag & T_SILENT)))
3836       printf ("- Fixing index %d\n",sort_param.key+1);
3837 
3838     sort_param.read_cache=param->read_cache;
3839     sort_param.seg=sort_param.keyinfo->seg;
3840     sort_param.max_pos= sort_param.pos= org_header_length;
3841     keyseg=sort_param.seg;
3842     bzero((char*) sort_param.unique,sizeof(sort_param.unique));
3843     sort_param.key_length=share->rec_reflength;
3844     for (i=0 ; keyseg[i].type != HA_KEYTYPE_END; i++)
3845     {
3846       sort_param.key_length+=keyseg[i].length;
3847       if (keyseg[i].flag & HA_SPACE_PACK)
3848 	sort_param.key_length+=get_pack_length(keyseg[i].length);
3849       if (keyseg[i].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
3850         sort_param.key_length+= 2 + MY_TEST(keyseg[i].length >= 127);
3851       if (keyseg[i].flag & HA_NULL_PART)
3852 	sort_param.key_length++;
3853     }
3854     share->state.state.records=share->state.state.del=share->state.split=0;
3855     share->state.state.empty=0;
3856 
3857     if (sort_param.keyinfo->flag & HA_FULLTEXT)
3858     {
3859       uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
3860                                     sort_param.keyinfo->seg->charset->mbmaxlen;
3861       sort_param.key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
3862       /*
3863         fulltext indexes may have much more entries than the
3864         number of rows in the table. We estimate the number here.
3865 
3866         Note, built-in parser is always nr. 0 - see ftparser_call_initializer()
3867       */
3868       if (sort_param.keyinfo->ftkey_nr == 0)
3869       {
3870         /*
3871           for built-in parser the number of generated index entries
3872           cannot be larger than the size of the data file divided
3873           by the minimal word's length
3874         */
3875         sort_info.max_records=
3876           (ha_rows) (sort_info.filelength/ft_min_word_len+1);
3877       }
3878       else
3879       {
3880         /*
3881           for external plugin parser we cannot tell anything at all :(
3882           so, we'll use all the sort memory and start from ~10 buffpeks.
3883           (see _ma_create_index_by_sort)
3884         */
3885         sort_info.max_records=
3886           10*param->sort_buffer_length/sort_param.key_length;
3887       }
3888 
3889       sort_param.key_read=  sort_maria_ft_key_read;
3890       sort_param.key_write= sort_maria_ft_key_write;
3891     }
3892     else
3893     {
3894       sort_param.key_read=  sort_key_read;
3895       sort_param.key_write= sort_key_write;
3896     }
3897 
3898     if (sort_info.new_info->s->data_file_type == BLOCK_RECORD)
3899     {
3900       scan_inited= 1;
3901       if (maria_scan_init(sort_info.info))
3902         goto err;
3903     }
3904     if (_ma_create_index_by_sort(&sort_param,
3905                                  (my_bool) (!(param->testflag & T_VERBOSE)),
3906                                  (size_t) param->sort_buffer_length))
3907     {
3908       if ((param->testflag & T_CREATE_UNIQUE_BY_SORT) && sort_param.sort_info->dupp)
3909         share->state.dupp_key= sort_param.key;
3910       else
3911         param->retry_repair= 1;
3912       _ma_check_print_error(param, "Create index by sort failed");
3913       goto err;
3914     }
3915     DBUG_EXECUTE_IF("maria_flush_whole_log",
3916                     {
3917                       DBUG_PRINT("maria_flush_whole_log", ("now"));
3918                       translog_flush(translog_get_horizon());
3919                     });
3920     DBUG_EXECUTE_IF("maria_crash_create_index_by_sort",
3921                     {
3922                       DBUG_PRINT("maria_crash_create_index_by_sort", ("now"));
3923                       DBUG_SUICIDE();
3924                     });
3925     if (scan_inited)
3926     {
3927       scan_inited= 0;
3928       maria_scan_end(sort_info.info);
3929     }
3930 
3931     /* No need to calculate checksum again. */
3932     sort_param.calc_checksum= 0;
3933     free_root(&sort_param.wordroot, MYF(0));
3934 
3935     /* Set for next loop */
3936     sort_info.max_records= (ha_rows) sort_info.new_info->s->state.state.records;
3937     param->stage++;                             /* Next stage */
3938     param->progress= 0;
3939 
3940     if (param->testflag & T_STATISTICS)
3941       maria_update_key_parts(sort_param.keyinfo, rec_per_key_part,
3942                              sort_param.unique,
3943                              (param->stats_method ==
3944                               MI_STATS_METHOD_IGNORE_NULLS ?
3945                               sort_param.notnull : NULL),
3946                              (ulonglong) share->state.state.records);
3947     maria_set_key_active(share->state.key_map, sort_param.key);
3948     DBUG_PRINT("repair", ("set enabled index #: %u", sort_param.key));
3949 
3950     if (_ma_flush_table_files_before_swap(param, info))
3951       goto err;
3952 
3953     if (sort_param.fix_datafile)
3954     {
3955       param->read_cache.end_of_file=sort_param.filepos;
3956       if (maria_write_data_suffix(&sort_info,1) ||
3957           end_io_cache(&sort_info.new_info->rec_cache))
3958       {
3959         _ma_check_print_error(param, "Got error when flushing row cache");
3960 	goto err;
3961       }
3962       sort_info.new_info->opt_flag&= ~WRITE_CACHE_USED;
3963 
3964       if (param->testflag & T_SAFE_REPAIR)
3965       {
3966 	/* Don't repair if we loosed more than one row */
3967         if (sort_info.new_info->s->state.state.records+1 < start_records)
3968 	{
3969           _ma_check_print_error(param,
3970                                 "Rows lost (Found %lu of %lu); Aborting "
3971                                 "because safe repair was requested",
3972                                 (ulong) sort_info.new_info->s->
3973                                 state.state.records,
3974                                 (ulong) start_records);
3975           share->state.state.records=start_records;
3976 	  goto err;
3977 	}
3978       }
3979 
3980       sort_info.new_info->s->state.state.data_file_length= sort_param.filepos;
3981       if (sort_info.new_info != sort_info.info)
3982       {
3983         MARIA_STATE_INFO save_state= sort_info.new_info->s->state;
3984         if (maria_close(sort_info.new_info))
3985         {
3986           _ma_check_print_error(param, "Got error %d on close", my_errno);
3987           goto err;
3988         }
3989         copy_data_file_state(&share->state, &save_state);
3990         new_file= -1;
3991         sort_info.new_info= info;
3992         info->rec_cache.file= info->dfile.file;
3993       }
3994 
3995       share->state.version=(ulong) time((time_t*) 0);	/* Force reopen */
3996 
3997       /* Replace the actual file with the temporary file */
3998       if (new_file >= 0)
3999       {
4000         mysql_file_close(new_file, MYF(MY_WME));
4001         new_file= -1;
4002       }
4003       change_data_file_descriptor(info, -1);
4004       if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
4005                                   DATA_TMP_EXT, param->backup_time,
4006                                   (param->testflag & T_BACKUP_DATA ?
4007                                    MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
4008                                   sync_dir) ||
4009           _ma_open_datafile(info, share))
4010       {
4011         _ma_check_print_error(param, "Couldn't change to new data file");
4012         goto err;
4013       }
4014       if (param->testflag & T_UNPACK)
4015         restore_data_file_type(share);
4016 
4017       org_header_length= share->pack.header_length;
4018       sort_info.org_data_file_type= share->data_file_type;
4019       sort_info.filelength= share->state.state.data_file_length;
4020       sort_param.fix_datafile=0;
4021 
4022       /* Offsets are now in proportion to the new file length */
4023       param->max_progress= sort_info.filelength;
4024 
4025     }
4026     else
4027       share->state.state.data_file_length=sort_param.max_pos;
4028 
4029     param->read_cache.file= info->dfile.file;	/* re-init read cache */
4030     if (share->data_file_type != BLOCK_RECORD)
4031       reinit_io_cache(&param->read_cache, READ_CACHE,
4032                       share->pack.header_length, 1, 1);
4033   }
4034 
4035   if (param->testflag & T_WRITE_LOOP)
4036   {
4037     fputs("          \r",stdout);
4038     fflush(stdout);
4039   }
4040 
4041   if (rep_quick && del+sort_info.dupp != share->state.state.del)
4042   {
4043     _ma_check_print_error(param,"Couldn't fix table with quick recovery: "
4044                           "Found wrong number of deleted records");
4045     _ma_check_print_error(param,"Run recovery again without -q");
4046     got_error=1;
4047     param->retry_repair=1;
4048     param->testflag|=T_RETRY_WITHOUT_QUICK;
4049     goto err;
4050   }
4051 
4052   if (rep_quick && (param->testflag & T_FORCE_UNIQUENESS))
4053   {
4054     my_off_t skr= share->state.state.data_file_length +
4055                    ((sort_info.org_data_file_type == COMPRESSED_RECORD) ?
4056                    MEMMAP_EXTRA_MARGIN : 0);
4057 #ifdef USE_RELOC
4058     if (sort_info.org_data_file_type == STATIC_RECORD &&
4059 	skr < share->base.reloc*share->base.min_pack_length)
4060       skr=share->base.reloc*share->base.min_pack_length;
4061 #endif
4062     if (skr != sort_info.filelength)
4063       if (mysql_file_chsize(info->dfile.file, skr, 0, MYF(0)))
4064 	_ma_check_print_warning(param,
4065 			       "Can't change size of datafile,  error: %d",
4066 			       my_errno);
4067   }
4068 
4069   if (param->testflag & T_CALC_CHECKSUM)
4070     share->state.state.checksum=param->glob_crc;
4071 
4072   if (mysql_file_chsize(share->kfile.file,
4073                         share->state.state.key_file_length, 0, MYF(0)))
4074     _ma_check_print_warning(param,
4075 			   "Can't change size of indexfile, error: %d",
4076 			   my_errno);
4077 
4078   if (!(param->testflag & T_SILENT))
4079   {
4080     if (start_records != share->state.state.records)
4081       printf("Data records: %s\n", llstr(share->state.state.records,llbuff));
4082   }
4083   if (sort_info.dupp)
4084     _ma_check_print_warning(param,
4085                             "%s records have been removed",
4086                             llstr(sort_info.dupp,llbuff));
4087   got_error=0;
4088   /* If invoked by external program that uses thr_lock */
4089   if (&share->state.state != info->state)
4090     *info->state= *info->state_start= share->state.state;
4091 
4092 err:
4093   if (scan_inited)
4094     maria_scan_end(sort_info.info);
4095   _ma_reset_state(info);
4096 
4097   if (sort_info.new_info)
4098   {
4099     end_io_cache(&sort_info.new_info->rec_cache);
4100     sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4101   }
4102   end_io_cache(&param->read_cache);
4103   info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4104   if (got_error)
4105   {
4106     if (! param->error_printed)
4107       _ma_check_print_error(param,"%d when fixing table",my_errno);
4108     (void)_ma_flush_table_files_before_swap(param, info);
4109     if (sort_info.new_info && sort_info.new_info != sort_info.info)
4110     {
4111       unuse_data_file_descriptor(sort_info.new_info);
4112       maria_close(sort_info.new_info);
4113     }
4114     if (new_file >= 0)
4115     {
4116       mysql_file_close(new_file, MYF(0));
4117       mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
4118     }
4119     maria_mark_crashed_on_repair(info);
4120   }
4121   else
4122   {
4123     if (key_map == share->state.key_map)
4124       share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS;
4125     /*
4126       Now that we have flushed and forced everything, we can bump
4127       create_rename_lsn:
4128     */
4129     DBUG_EXECUTE_IF("maria_flush_whole_log",
4130                     {
4131                       DBUG_PRINT("maria_flush_whole_log", ("now"));
4132                       translog_flush(translog_get_horizon());
4133                     });
4134     DBUG_EXECUTE_IF("maria_crash_repair",
4135                     {
4136                       DBUG_PRINT("maria_crash_repair", ("now"));
4137                       DBUG_SUICIDE();
4138                     });
4139   }
4140   share->state.changed|= STATE_NOT_SORTED_PAGES;
4141   if (!rep_quick)
4142     share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_ZEROFILLED |
4143                              STATE_NOT_MOVABLE);
4144 
4145   /* If caller had disabled logging it's not up to us to re-enable it */
4146   if (reenable_logging)
4147     _ma_reenable_logging_for_table(info, FALSE);
4148   restore_table_state_after_repair(info, &backup_share);
4149 
4150   my_free(sort_param.rec_buff);
4151   my_free(sort_param.record);
4152   my_free(sort_info.key_block);
4153   my_free(sort_info.ft_buf);
4154   my_free(sort_info.buff);
4155   DBUG_RETURN(got_error);
4156 }
4157 
4158 
4159 /*
4160   Threaded repair of table using sorting
4161 
4162   SYNOPSIS
4163     maria_repair_parallel()
4164     param		Repair parameters
4165     info		MARIA handler to repair
4166     name		Name of table (for warnings)
4167     rep_quick		set to <> 0 if we should not change data file
4168 
4169   DESCRIPTION
4170     Same as maria_repair_by_sort but do it multithreaded
4171     Each key is handled by a separate thread.
4172     TODO: make a number of threads a parameter
4173 
4174     In parallel repair we use one thread per index. There are two modes:
4175 
4176     Quick
4177 
4178       Only the indexes are rebuilt. All threads share a read buffer.
4179       Every thread that needs fresh data in the buffer enters the shared
4180       cache lock. The last thread joining the lock reads the buffer from
4181       the data file and wakes all other threads.
4182 
4183     Non-quick
4184 
4185       The data file is rebuilt and all indexes are rebuilt to point to
4186       the new record positions. One thread is the master thread. It
4187       reads from the old data file and writes to the new data file. It
4188       also creates one of the indexes. The other threads read from a
4189       buffer which is filled by the master. If they need fresh data,
4190       they enter the shared cache lock. If the masters write buffer is
4191       full, it flushes it to the new data file and enters the shared
4192       cache lock too. When all threads joined in the lock, the master
4193       copies its write buffer to the read buffer for the other threads
4194       and wakes them.
4195 
4196   RESULT
4197     0	ok
4198     <>0	Error
4199 */
4200 
maria_repair_parallel(HA_CHECK * param,register MARIA_HA * info,const char * name,my_bool rep_quick)4201 int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info,
4202 			const char * name, my_bool rep_quick)
4203 {
4204   int got_error;
4205   uint i,key, istep;
4206   ha_rows start_records;
4207   my_off_t new_header_length,del;
4208   File new_file;
4209   MARIA_SORT_PARAM *sort_param=0, tmp_sort_param;
4210   MARIA_SHARE *share= info->s;
4211   double  *rec_per_key_part;
4212   HA_KEYSEG *keyseg;
4213   char llbuff[22];
4214   IO_CACHE new_data_cache; /* For non-quick repair. */
4215   IO_CACHE_SHARE io_share;
4216   MARIA_SORT_INFO sort_info;
4217   MARIA_SHARE backup_share;
4218   ulonglong UNINIT_VAR(key_map);
4219   pthread_attr_t thr_attr;
4220   myf sync_dir= ((share->now_transactional && !share->temporary) ?
4221                  MY_SYNC_DIR : 0);
4222   my_bool reenable_logging= 0;
4223   DBUG_ENTER("maria_repair_parallel");
4224 
4225   got_error= 1;
4226   new_file= -1;
4227   start_records= share->state.state.records;
4228   if (!(param->testflag & T_SILENT))
4229   {
4230     printf("- parallel recovering (with sort) Aria-table '%s'\n",name);
4231     printf("Data records: %s\n", llstr(start_records, llbuff));
4232   }
4233 
4234   bzero(&new_data_cache, sizeof(new_data_cache));
4235   if (initialize_variables_for_repair(param, &sort_info, &tmp_sort_param, info,
4236                                       rep_quick, &backup_share))
4237     goto err;
4238 
4239   if ((reenable_logging= share->now_transactional))
4240     _ma_tmp_disable_logging_for_table(info, 0);
4241 
4242   new_header_length= ((param->testflag & T_UNPACK) ? 0 :
4243                       share->pack.header_length);
4244 
4245   /*
4246     Quick repair (not touching data file, rebuilding indexes):
4247     {
4248       Read cache is (HA_CHECK *param)->read_cache using info->dfile.file.
4249     }
4250 
4251     Non-quick repair (rebuilding data file and indexes):
4252     {
4253       Master thread:
4254 
4255         Read  cache is (HA_CHECK *param)->read_cache using info->dfile.file.
4256         Write cache is (MARIA_INFO *info)->rec_cache using new_file.
4257 
4258       Slave threads:
4259 
4260         Read cache is new_data_cache synced to master rec_cache.
4261 
4262       The final assignment of the filedescriptor for rec_cache is done
4263       after the cache creation.
4264 
4265       Don't check file size on new_data_cache, as the resulting file size
4266       is not known yet.
4267 
4268       As rec_cache and new_data_cache are synced, write_buffer_length is
4269       used for the read cache 'new_data_cache'. Both start at the same
4270       position 'new_header_length'.
4271     }
4272   */
4273   DBUG_PRINT("info", ("is quick repair: %d", (int) rep_quick));
4274   if (!rep_quick)
4275     my_b_clear(&new_data_cache);
4276 
4277   /* Initialize pthread structures before goto err. */
4278   mysql_mutex_init(key_SORT_INFO_mutex, &sort_info.mutex, MY_MUTEX_INIT_FAST);
4279   mysql_cond_init(key_SORT_INFO_cond, &sort_info.cond, 0);
4280 
4281   if (!(sort_info.key_block=
4282 	alloc_key_blocks(param, (uint) param->sort_key_blocks,
4283 			 share->base.max_key_block_length)))
4284     goto err;
4285 
4286   if (init_io_cache(&param->read_cache, info->dfile.file,
4287                     (uint) param->read_buffer_length,
4288                     READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)))
4289     goto err;
4290 
4291   sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;
4292   info->opt_flag|=WRITE_CACHE_USED;
4293   info->rec_cache.file= info->dfile.file;         /* for sort_delete_record */
4294 
4295   if (!rep_quick)
4296   {
4297     /* Get real path for data file */
4298     if ((new_file= mysql_file_create(key_file_tmp,
4299                                      fn_format(param->temp_filename,
4300                                                share->data_file_name.str, "",
4301                                                DATA_TMP_EXT,
4302                                                2+4),
4303                                      0,param->tmpfile_createflag,
4304                                      MYF(0))) < 0)
4305     {
4306       _ma_check_print_error(param,"Can't create new tempfile: '%s'",
4307 			   param->temp_filename);
4308       goto err;
4309     }
4310     if (new_header_length &&
4311         maria_filecopy(param, new_file, info->dfile.file,0L,new_header_length,
4312                        "datafile-header"))
4313       goto err;
4314     if (param->testflag & T_UNPACK)
4315       restore_data_file_type(share);
4316     share->state.dellink= HA_OFFSET_ERROR;
4317 
4318     if (init_io_cache(&new_data_cache, -1,
4319                         (uint) param->write_buffer_length,
4320                         READ_CACHE, new_header_length, 1,
4321                         MYF(MY_WME | MY_DONT_CHECK_FILESIZE)))
4322       goto err;
4323 
4324     if (init_io_cache(&info->rec_cache, new_file,
4325                         (uint) param->write_buffer_length,
4326                         WRITE_CACHE, new_header_length, 1,
4327                         MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
4328       goto err;
4329 
4330   }
4331 
4332   /* Optionally drop indexes and optionally modify the key_map. */
4333   maria_drop_all_indexes(param, info, FALSE);
4334   key_map= share->state.key_map;
4335   if (param->testflag & T_CREATE_MISSING_KEYS)
4336   {
4337     /* Invert the copied key_map to recreate all disabled indexes. */
4338     key_map= ~key_map;
4339   }
4340 
4341   param->read_cache.end_of_file= sort_info.filelength;
4342 
4343   /*
4344     +1 below is required hack for parallel repair mode.
4345     The share->state.state.records value, that is compared later
4346     to sort_info.max_records and cannot exceed it, is
4347     increased in sort_key_write. In maria_repair_by_sort, sort_key_write
4348     is called after sort_key_read, where the comparison is performed,
4349     but in parallel mode master thread can call sort_key_write
4350     before some other repair thread calls sort_key_read.
4351     Furthermore I'm not even sure +1 would be enough.
4352     May be sort_info.max_records shold be always set to max value in
4353     parallel mode.
4354   */
4355   sort_info.max_records++;
4356 
4357   del=share->state.state.del;
4358 
4359   if (!(sort_param=(MARIA_SORT_PARAM *)
4360         my_malloc((uint) share->base.keys *
4361 		  (sizeof(MARIA_SORT_PARAM) + share->base.pack_reclength),
4362 		  MYF(MY_ZEROFILL))))
4363   {
4364     _ma_check_print_error(param,"Not enough memory for key!");
4365     goto err;
4366   }
4367 #ifdef USING_SECOND_APPROACH
4368   uint total_key_length=0;
4369 #endif
4370   rec_per_key_part= param->new_rec_per_key_part;
4371   share->state.state.records=share->state.state.del=share->state.split=0;
4372   share->state.state.empty=0;
4373 
4374   for (i=key=0, istep=1 ; key < share->base.keys ;
4375        rec_per_key_part+=sort_param[i].keyinfo->keysegs, i+=istep, key++)
4376   {
4377     sort_param[i].key=key;
4378     sort_param[i].keyinfo=share->keyinfo+key;
4379     sort_param[i].seg=sort_param[i].keyinfo->seg;
4380     /*
4381       Skip this index if it is marked disabled in the copied
4382       (and possibly inverted) key_map.
4383     */
4384     if (! maria_is_key_active(key_map, key))
4385     {
4386       /* Remember old statistics for key */
4387       memcpy((char*) rec_per_key_part,
4388 	     (char*) (share->state.rec_per_key_part+
4389 		      (uint) (rec_per_key_part - param->new_rec_per_key_part)),
4390 	     sort_param[i].keyinfo->keysegs*sizeof(*rec_per_key_part));
4391       istep=0;
4392       continue;
4393     }
4394     istep=1;
4395     if ((!(param->testflag & T_SILENT)))
4396       printf ("- Fixing index %d\n",key+1);
4397     if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
4398     {
4399       sort_param[i].key_read=sort_maria_ft_key_read;
4400       sort_param[i].key_write=sort_maria_ft_key_write;
4401     }
4402     else
4403     {
4404       sort_param[i].key_read=sort_key_read;
4405       sort_param[i].key_write=sort_key_write;
4406     }
4407     sort_param[i].key_cmp=sort_key_cmp;
4408     sort_param[i].lock_in_memory=maria_lock_memory;
4409     sort_param[i].tmpdir=param->tmpdir;
4410     sort_param[i].sort_info=&sort_info;
4411     sort_param[i].master=0;
4412     sort_param[i].fix_datafile=0;
4413     sort_param[i].calc_checksum= 0;
4414 
4415     sort_param[i].filepos=new_header_length;
4416     sort_param[i].max_pos=sort_param[i].pos=share->pack.header_length;
4417 
4418     sort_param[i].record= (((uchar *)(sort_param+share->base.keys))+
4419                           (share->base.pack_reclength * i));
4420     if (_ma_alloc_buffer(&sort_param[i].rec_buff, &sort_param[i].rec_buff_size,
4421                          share->base.default_rec_buff_size))
4422     {
4423       _ma_check_print_error(param,"Not enough memory!");
4424       goto err;
4425     }
4426     sort_param[i].key_length=share->rec_reflength;
4427     for (keyseg=sort_param[i].seg; keyseg->type != HA_KEYTYPE_END;
4428 	 keyseg++)
4429     {
4430       sort_param[i].key_length+=keyseg->length;
4431       if (keyseg->flag & HA_SPACE_PACK)
4432         sort_param[i].key_length+=get_pack_length(keyseg->length);
4433       if (keyseg->flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
4434         sort_param[i].key_length+= 2 + MY_TEST(keyseg->length >= 127);
4435       if (keyseg->flag & HA_NULL_PART)
4436         sort_param[i].key_length++;
4437     }
4438 #ifdef USING_SECOND_APPROACH
4439     total_key_length+=sort_param[i].key_length;
4440 #endif
4441 
4442     if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
4443     {
4444       uint ft_max_word_len_for_sort=
4445         (FT_MAX_WORD_LEN_FOR_SORT *
4446          sort_param[i].keyinfo->seg->charset->mbmaxlen);
4447       sort_param[i].key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
4448       init_alloc_root(&sort_param[i].wordroot, "sort",
4449                       FTPARSER_MEMROOT_ALLOC_SIZE, 0,
4450                       MYF(param->malloc_flags));
4451     }
4452   }
4453   sort_info.total_keys=i;
4454   sort_param[0].master= 1;
4455   sort_param[0].fix_datafile= ! rep_quick;
4456   sort_param[0].calc_checksum= MY_TEST(param->testflag & T_CALC_CHECKSUM);
4457 
4458   if (!maria_ftparser_alloc_param(info))
4459     goto err;
4460 
4461   sort_info.got_error=0;
4462   mysql_mutex_lock(&sort_info.mutex);
4463 
4464   /*
4465     Initialize the I/O cache share for use with the read caches and, in
4466     case of non-quick repair, the write cache. When all threads join on
4467     the cache lock, the writer copies the write cache contents to the
4468     read caches.
4469   */
4470   if (i > 1)
4471   {
4472     if (rep_quick)
4473       init_io_cache_share(&param->read_cache, &io_share, NULL, i);
4474     else
4475       init_io_cache_share(&new_data_cache, &io_share, &info->rec_cache, i);
4476   }
4477   else
4478     io_share.total_threads= 0; /* share not used */
4479 
4480   (void) pthread_attr_init(&thr_attr);
4481   (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED);
4482 
4483   for (i=0 ; i < sort_info.total_keys ; i++)
4484   {
4485     /*
4486       Copy the properly initialized IO_CACHE structure so that every
4487       thread has its own copy. In quick mode param->read_cache is shared
4488       for use by all threads. In non-quick mode all threads but the
4489       first copy the shared new_data_cache, which is synchronized to the
4490       write cache of the first thread. The first thread copies
4491       param->read_cache, which is not shared.
4492     */
4493     sort_param[i].read_cache= ((rep_quick || !i) ? param->read_cache :
4494                                new_data_cache);
4495     DBUG_PRINT("io_cache_share", ("thread: %u  read_cache: %p",
4496                                   i, &sort_param[i].read_cache));
4497 
4498     /*
4499       two approaches: the same amount of memory for each thread
4500       or the memory for the same number of keys for each thread...
4501       In the second one all the threads will fill their sort_buffers
4502       (and call write_keys) at the same time, putting more stress on i/o.
4503     */
4504     sort_param[i].sortbuff_size=
4505 #ifndef USING_SECOND_APPROACH
4506       param->sort_buffer_length/sort_info.total_keys;
4507 #else
4508       param->sort_buffer_length*sort_param[i].key_length/total_key_length;
4509 #endif
4510     if (mysql_thread_create(key_thread_find_all_keys,
4511                             &sort_param[i].thr, &thr_attr,
4512 	                    _ma_thr_find_all_keys, (void *) (sort_param+i)))
4513     {
4514       _ma_check_print_error(param,"Cannot start a repair thread");
4515       /* Cleanup: Detach from the share. Avoid others to be blocked. */
4516       if (io_share.total_threads)
4517         remove_io_thread(&sort_param[i].read_cache);
4518       DBUG_PRINT("error", ("Cannot start a repair thread"));
4519       sort_info.got_error=1;
4520     }
4521     else
4522       sort_info.threads_running++;
4523   }
4524   (void) pthread_attr_destroy(&thr_attr);
4525 
4526   /* waiting for all threads to finish */
4527   while (sort_info.threads_running)
4528     mysql_cond_wait(&sort_info.cond, &sort_info.mutex);
4529   mysql_mutex_unlock(&sort_info.mutex);
4530 
4531   if ((got_error= _ma_thr_write_keys(sort_param)))
4532   {
4533     param->retry_repair=1;
4534     goto err;
4535   }
4536   got_error=1;				/* Assume the following may go wrong */
4537 
4538   if (_ma_flush_table_files_before_swap(param, info))
4539     goto err;
4540 
4541   if (sort_param[0].fix_datafile)
4542   {
4543     /*
4544       Append some nulls to the end of a memory mapped file. Destroy the
4545       write cache. The master thread did already detach from the share
4546       by remove_io_thread() in sort.c:thr_find_all_keys().
4547     */
4548     if (maria_write_data_suffix(&sort_info,1) ||
4549         end_io_cache(&info->rec_cache))
4550       goto err;
4551     if (param->testflag & T_SAFE_REPAIR)
4552     {
4553       /* Don't repair if we loosed more than one row */
4554       if (sort_info.new_info->s->state.state.records+1 < start_records)
4555       {
4556         _ma_check_print_error(param,
4557                               "Rows lost (Found %lu of %lu); Aborting "
4558                               "because safe repair was requested",
4559                               (ulong) share->state.state.records,
4560                               (ulong) start_records);
4561         share->state.state.records=start_records;
4562         goto err;
4563       }
4564     }
4565     share->state.state.data_file_length= sort_param->filepos;
4566     /* Only whole records */
4567     share->state.version= (ulong) time((time_t*) 0);
4568     /*
4569       Exchange the data file descriptor of the table, so that we use the
4570       new file from now on.
4571      */
4572     mysql_file_close(info->dfile.file, MYF(0));
4573     info->dfile.file= new_file;
4574     share->pack.header_length=(ulong) new_header_length;
4575   }
4576   else
4577     share->state.state.data_file_length=sort_param->max_pos;
4578 
4579   if (rep_quick && del+sort_info.dupp != share->state.state.del)
4580   {
4581     _ma_check_print_error(param,"Couldn't fix table with quick recovery: "
4582                           "Found wrong number of deleted records");
4583     _ma_check_print_error(param,"Run recovery again without -q");
4584     param->retry_repair=1;
4585     param->testflag|=T_RETRY_WITHOUT_QUICK;
4586     goto err;
4587   }
4588 
4589   if (rep_quick && (param->testflag & T_FORCE_UNIQUENESS))
4590   {
4591     my_off_t skr= share->state.state.data_file_length +
4592                    ((sort_info.org_data_file_type == COMPRESSED_RECORD) ?
4593                    MEMMAP_EXTRA_MARGIN : 0);
4594 #ifdef USE_RELOC
4595     if (sort_info.org_data_file_type == STATIC_RECORD &&
4596 	skr < share->base.reloc*share->base.min_pack_length)
4597       skr=share->base.reloc*share->base.min_pack_length;
4598 #endif
4599     if (skr != sort_info.filelength)
4600       if (mysql_file_chsize(info->dfile.file, skr, 0, MYF(0)))
4601 	_ma_check_print_warning(param,
4602 			       "Can't change size of datafile,  error: %d",
4603 			       my_errno);
4604   }
4605   if (param->testflag & T_CALC_CHECKSUM)
4606     share->state.state.checksum=param->glob_crc;
4607 
4608   if (mysql_file_chsize(share->kfile.file,
4609                         share->state.state.key_file_length, 0, MYF(0)))
4610     _ma_check_print_warning(param,
4611 			   "Can't change size of indexfile, error: %d",
4612                             my_errno);
4613 
4614   if (!(param->testflag & T_SILENT))
4615   {
4616     if (start_records != share->state.state.records)
4617       printf("Data records: %s\n", llstr(share->state.state.records,llbuff));
4618   }
4619   if (sort_info.dupp)
4620     _ma_check_print_warning(param,
4621                             "%s records have been removed",
4622                             llstr(sort_info.dupp,llbuff));
4623   got_error=0;
4624   /* If invoked by external program that uses thr_lock */
4625   if (&share->state.state != info->state)
4626     *info->state= *info->state_start= share->state.state;
4627 
4628 err:
4629   _ma_reset_state(info);
4630 
4631   /*
4632     Destroy the write cache. The master thread did already detach from
4633     the share by remove_io_thread() or it was not yet started (if the
4634     error happend before creating the thread).
4635   */
4636   if (sort_info.new_info)
4637   {
4638     end_io_cache(&sort_info.new_info->rec_cache);
4639     sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4640   }
4641   end_io_cache(&param->read_cache);
4642   info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4643   /*
4644     Destroy the new data cache in case of non-quick repair. All slave
4645     threads did either detach from the share by remove_io_thread()
4646     already or they were not yet started (if the error happend before
4647     creating the threads).
4648   */
4649   if (!rep_quick && my_b_inited(&new_data_cache))
4650     end_io_cache(&new_data_cache);
4651   if (!got_error)
4652   {
4653     /* Replace the actual file with the temporary file */
4654     if (new_file >= 0)
4655     {
4656       mysql_file_close(new_file,MYF(0));
4657       info->dfile.file= new_file= -1;
4658       if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
4659                                   DATA_TMP_EXT, param->backup_time,
4660                                   MYF((param->testflag & T_BACKUP_DATA ?
4661                                        MY_REDEL_MAKE_BACKUP : 0) |
4662                                       sync_dir)) ||
4663 	  _ma_open_datafile(info,share))
4664 	got_error=1;
4665     }
4666   }
4667   if (got_error)
4668   {
4669     if (! param->error_printed)
4670       _ma_check_print_error(param,"%d when fixing table",my_errno);
4671     (void)_ma_flush_table_files_before_swap(param, info);
4672     if (new_file >= 0)
4673     {
4674       mysql_file_close(new_file,MYF(0));
4675       mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
4676       if (info->dfile.file == new_file)
4677 	info->dfile.file= -1;
4678     }
4679     maria_mark_crashed_on_repair(info);
4680   }
4681   else if (key_map == share->state.key_map)
4682     share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS;
4683   share->state.changed|= STATE_NOT_SORTED_PAGES;
4684   if (!rep_quick)
4685     share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_ZEROFILLED |
4686                              STATE_NOT_MOVABLE);
4687 
4688   mysql_cond_destroy (&sort_info.cond);
4689   mysql_mutex_destroy(&sort_info.mutex);
4690 
4691   /* If caller had disabled logging it's not up to us to re-enable it */
4692   if (reenable_logging)
4693     _ma_reenable_logging_for_table(info, FALSE);
4694   restore_table_state_after_repair(info, &backup_share);
4695 
4696   my_free(sort_info.ft_buf);
4697   my_free(sort_info.key_block);
4698   my_free(sort_param);
4699   my_free(sort_info.buff);
4700   if (!got_error && (param->testflag & T_UNPACK))
4701     restore_data_file_type(share);
4702   DBUG_RETURN(got_error);
4703 }
4704 
4705 	/* Read next record and return next key */
4706 
sort_key_read(MARIA_SORT_PARAM * sort_param,uchar * key)4707 static int sort_key_read(MARIA_SORT_PARAM *sort_param, uchar *key)
4708 {
4709   int error;
4710   MARIA_SORT_INFO *sort_info= sort_param->sort_info;
4711   MARIA_HA *info= sort_info->info;
4712   MARIA_KEY int_key;
4713   DBUG_ENTER("sort_key_read");
4714 
4715   if ((error=sort_get_next_record(sort_param)))
4716     DBUG_RETURN(error);
4717   if (info->s->state.state.records == sort_info->max_records)
4718   {
4719     _ma_check_print_error(sort_info->param,
4720 			 "Key %d - Found too many records; Can't continue",
4721                          sort_param->key+1);
4722     DBUG_RETURN(1);
4723   }
4724   if (_ma_sort_write_record(sort_param))
4725     DBUG_RETURN(1);
4726 
4727   (*info->s->keyinfo[sort_param->key].make_key)(info, &int_key,
4728                                                 sort_param->key, key,
4729                                                 sort_param->record,
4730                                                 sort_param->current_filepos,
4731                                                 0);
4732   sort_param->real_key_length= int_key.data_length + int_key.ref_length;
4733 #ifdef HAVE_valgrind
4734   bzero(key+sort_param->real_key_length,
4735 	(sort_param->key_length-sort_param->real_key_length));
4736 #endif
4737   DBUG_RETURN(0);
4738 } /* sort_key_read */
4739 
4740 
sort_maria_ft_key_read(MARIA_SORT_PARAM * sort_param,uchar * key)4741 static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, uchar *key)
4742 {
4743   int error;
4744   MARIA_SORT_INFO *sort_info=sort_param->sort_info;
4745   MARIA_HA *info=sort_info->info;
4746   FT_WORD *wptr=0;
4747   MARIA_KEY int_key;
4748   DBUG_ENTER("sort_maria_ft_key_read");
4749 
4750   if (!sort_param->wordlist)
4751   {
4752     for (;;)
4753     {
4754       free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE));
4755       if ((error=sort_get_next_record(sort_param)))
4756         DBUG_RETURN(error);
4757       if ((error= _ma_sort_write_record(sort_param)))
4758         DBUG_RETURN(error);
4759       if (!(wptr= _ma_ft_parserecord(info,sort_param->key,sort_param->record,
4760                                      &sort_param->wordroot)))
4761 
4762         DBUG_RETURN(1);
4763       if (wptr->pos)
4764         break;
4765     }
4766     sort_param->wordptr=sort_param->wordlist=wptr;
4767   }
4768   else
4769   {
4770     error=0;
4771     wptr=(FT_WORD*)(sort_param->wordptr);
4772   }
4773 
4774   _ma_ft_make_key(info, &int_key, sort_param->key, key, wptr++,
4775                   sort_param->current_filepos);
4776   sort_param->real_key_length= int_key.data_length + int_key.ref_length;
4777 
4778 #ifdef HAVE_valgrind
4779   if (sort_param->key_length > sort_param->real_key_length)
4780     bzero(key+sort_param->real_key_length,
4781 	  (sort_param->key_length-sort_param->real_key_length));
4782 #endif
4783   if (!wptr->pos)
4784   {
4785     free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE));
4786     sort_param->wordlist=0;
4787   }
4788   else
4789     sort_param->wordptr=(void*)wptr;
4790 
4791   DBUG_RETURN(error);
4792 } /* sort_maria_ft_key_read */
4793 
4794 
4795 /*
4796   Read next record from file using parameters in sort_info.
4797 
4798   SYNOPSIS
4799     sort_get_next_record()
4800       sort_param                Information about and for the sort process
4801 
4802   NOTES
4803     Dynamic Records With Non-Quick Parallel Repair
4804 
4805     For non-quick parallel repair we use a synchronized read/write
4806     cache. This means that one thread is the master who fixes the data
4807     file by reading each record from the old data file and writing it
4808     to the new data file. By doing this the records in the new data
4809     file are written contiguously. Whenever the write buffer is full,
4810     it is copied to the read buffer. The slaves read from the read
4811     buffer, which is not associated with a file. Thus read_cache.file
4812     is -1. When using _mi_read_cache(), the slaves must always set
4813     flag to READING_NEXT so that the function never tries to read from
4814     file. This is safe because the records are contiguous. There is no
4815     need to read outside the cache. This condition is evaluated in the
4816     variable 'parallel_flag' for quick reference. read_cache.file must
4817     be >= 0 in every other case.
4818 
4819   RETURN
4820     -1          end of file
4821     0           ok
4822                 sort_param->current_filepos points to record position.
4823                 sort_param->record contains record
4824                 sort_param->max_pos contains position to last byte read
4825     > 0         error
4826 */
4827 
sort_get_next_record(MARIA_SORT_PARAM * sort_param)4828 static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
4829 {
4830   int searching;
4831   int parallel_flag;
4832   uint found_record,b_type,left_length;
4833   my_off_t pos;
4834   MARIA_BLOCK_INFO block_info;
4835   MARIA_SORT_INFO *sort_info=sort_param->sort_info;
4836   HA_CHECK *param=sort_info->param;
4837   MARIA_HA *info=sort_info->info;
4838   MARIA_SHARE *share= info->s;
4839   char llbuff[22],llbuff2[22];
4840   DBUG_ENTER("sort_get_next_record");
4841 
4842   if (_ma_killed_ptr(param))
4843     DBUG_RETURN(1);
4844   if (param->progress_counter++ >= WRITE_COUNT)
4845   {
4846     param->progress_counter= 0;
4847     _ma_report_progress(param, param->progress, param->max_progress);
4848   }
4849 
4850   switch (sort_info->org_data_file_type) {
4851   case BLOCK_RECORD:
4852   {
4853     for (;;)
4854     {
4855       int flag;
4856       /*
4857         Assume table is transactional and it had LSN pages in the
4858         cache. Repair has flushed them, left data pages stay in
4859         cache, and disabled transactionality (so share's current page
4860         type is PLAIN); page cache would assert if it finds a cached LSN page
4861         while _ma_scan_block_record() requested a PLAIN page. So we use
4862         UNKNOWN.
4863       */
4864       enum pagecache_page_type save_page_type= share->page_type;
4865       share->page_type= PAGECACHE_READ_UNKNOWN_PAGE;
4866       if (info != sort_info->new_info)
4867       {
4868         /* Safe scanning */
4869         flag= _ma_safe_scan_block_record(sort_info, info,
4870                                          sort_param->record);
4871       }
4872       else
4873       {
4874         /*
4875           Scan on clean table.
4876           It requires a reliable data_file_length so we set it.
4877         */
4878         share->state.state.data_file_length= sort_info->filelength;
4879         info->cur_row.trid= 0;
4880         flag= _ma_scan_block_record(info, sort_param->record,
4881                                     info->cur_row.nextpos, 1);
4882         set_if_bigger(param->max_found_trid, info->cur_row.trid);
4883         if (info->cur_row.trid > param->max_trid)
4884         {
4885           _ma_check_print_not_visible_error(param, info->cur_row.trid);
4886           flag= HA_ERR_ROW_NOT_VISIBLE;
4887         }
4888       }
4889       param->progress= (ma_recordpos_to_page(info->cur_row.lastpos)*
4890                         share->block_size);
4891 
4892       share->page_type= save_page_type;
4893       if (!flag)
4894       {
4895 	if (sort_param->calc_checksum)
4896         {
4897           ha_checksum checksum;
4898           checksum= (*share->calc_check_checksum)(info, sort_param->record);
4899           if (share->calc_checksum &&
4900               info->cur_row.checksum != (checksum & 255))
4901           {
4902             if (param->testflag & T_VERBOSE)
4903             {
4904               record_pos_to_txt(info, info->cur_row.lastpos, llbuff);
4905               _ma_check_print_info(param,
4906                                    "Found record with wrong checksum at %s",
4907                                    llbuff);
4908             }
4909             continue;
4910           }
4911           info->cur_row.checksum= checksum;
4912 	  param->glob_crc+= checksum;
4913         }
4914         sort_param->start_recpos= sort_param->current_filepos=
4915           info->cur_row.lastpos;
4916         DBUG_RETURN(0);
4917       }
4918       if (flag == HA_ERR_END_OF_FILE)
4919       {
4920         sort_param->max_pos= share->state.state.data_file_length;
4921         DBUG_RETURN(-1);
4922       }
4923       /* Retry only if wrong record, not if disk error */
4924       if (flag != HA_ERR_WRONG_IN_RECORD && flag != HA_ERR_WRONG_CRC)
4925       {
4926         retry_if_quick(sort_param, flag);
4927         DBUG_RETURN(flag);
4928       }
4929     }
4930     break;                                      /* Impossible */
4931   }
4932   case STATIC_RECORD:
4933     for (;;)
4934     {
4935       if (my_b_read(&sort_param->read_cache,sort_param->record,
4936 		    share->base.pack_reclength))
4937       {
4938 	if (sort_param->read_cache.error)
4939 	  param->out_flag |= O_DATA_LOST;
4940         retry_if_quick(sort_param, my_errno);
4941 	DBUG_RETURN(-1);
4942       }
4943       sort_param->start_recpos=sort_param->pos;
4944       param->progress= sort_param->pos;
4945       if (!sort_param->fix_datafile)
4946       {
4947 	sort_param->current_filepos= sort_param->pos;
4948         if (sort_param->master)
4949 	  share->state.split++;
4950       }
4951       sort_param->max_pos=(sort_param->pos+=share->base.pack_reclength);
4952       if (*sort_param->record)
4953       {
4954 	if (sort_param->calc_checksum)
4955 	  param->glob_crc+= (info->cur_row.checksum=
4956 			     _ma_static_checksum(info,sort_param->record));
4957 	DBUG_RETURN(0);
4958       }
4959       if (!sort_param->fix_datafile && sort_param->master)
4960       {
4961 	share->state.state.del++;
4962 	share->state.state.empty+=share->base.pack_reclength;
4963       }
4964     }
4965   case DYNAMIC_RECORD:
4966   {
4967     uchar *UNINIT_VAR(to);
4968     ha_checksum checksum= 0;
4969 
4970     pos=sort_param->pos;
4971     param->progress= pos;
4972     searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND));
4973     parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0;
4974     for (;;)
4975     {
4976       found_record=block_info.second_read= 0;
4977       left_length=1;
4978       if (searching)
4979       {
4980 	pos=MY_ALIGN(pos,MARIA_DYN_ALIGN_SIZE);
4981         param->testflag|=T_RETRY_WITHOUT_QUICK;
4982 	sort_param->start_recpos=pos;
4983       }
4984       do
4985       {
4986 	if (pos > sort_param->max_pos)
4987 	  sort_param->max_pos=pos;
4988 	if (pos & (MARIA_DYN_ALIGN_SIZE-1))
4989 	{
4990 	  if ((param->testflag & T_VERBOSE) || searching == 0)
4991 	    _ma_check_print_info(param,"Wrong aligned block at %s",
4992 				llstr(pos,llbuff));
4993 	  if (searching)
4994 	    goto try_next;
4995 	}
4996 	if (found_record && pos == param->search_after_block)
4997 	  _ma_check_print_info(param,"Block: %s used by record at %s",
4998 		     llstr(param->search_after_block,llbuff),
4999 		     llstr(sort_param->start_recpos,llbuff2));
5000 	if (_ma_read_cache(info, &sort_param->read_cache,
5001                            block_info.header, pos,
5002 			   MARIA_BLOCK_INFO_HEADER_LENGTH,
5003 			   (! found_record ? READING_NEXT : 0) |
5004 			   parallel_flag | READING_HEADER))
5005 	{
5006 	  if (found_record)
5007 	  {
5008 	    _ma_check_print_info(param,
5009 				"Can't read whole record at %s (errno: %d)",
5010 				llstr(sort_param->start_recpos,llbuff),errno);
5011 	    goto try_next;
5012 	  }
5013 	  DBUG_RETURN(-1);
5014 	}
5015 	if (searching && ! sort_param->fix_datafile)
5016 	{
5017 	  param->error_printed=1;
5018           param->retry_repair=1;
5019           param->testflag|=T_RETRY_WITHOUT_QUICK;
5020           my_errno= HA_ERR_WRONG_IN_RECORD;
5021 	  DBUG_RETURN(1);	/* Something wrong with data */
5022 	}
5023 	b_type= _ma_get_block_info(info, &block_info,-1,pos);
5024 	if ((b_type & (BLOCK_ERROR | BLOCK_FATAL_ERROR)) ||
5025 	   ((b_type & BLOCK_FIRST) &&
5026 	     (block_info.rec_len < (uint) share->base.min_pack_length ||
5027 	      block_info.rec_len > (uint) share->base.max_pack_length)))
5028 	{
5029 	  uint i;
5030 	  if (param->testflag & T_VERBOSE || searching == 0)
5031 	    _ma_check_print_info(param,
5032 				"Wrong bytesec: %3d-%3d-%3d at %10s; Skipped",
5033 		       block_info.header[0],block_info.header[1],
5034 		       block_info.header[2],llstr(pos,llbuff));
5035 	  if (found_record)
5036 	    goto try_next;
5037 	  block_info.second_read=0;
5038 	  searching=1;
5039 	  /* Search after block in read header string */
5040 	  for (i=MARIA_DYN_ALIGN_SIZE ;
5041 	       i < MARIA_BLOCK_INFO_HEADER_LENGTH ;
5042 	       i+= MARIA_DYN_ALIGN_SIZE)
5043 	    if (block_info.header[i] >= 1 &&
5044 		block_info.header[i] <= MARIA_MAX_DYN_HEADER_BYTE)
5045 	      break;
5046 	  pos+=(ulong) i;
5047 	  sort_param->start_recpos=pos;
5048 	  continue;
5049 	}
5050 	if (b_type & BLOCK_DELETED)
5051 	{
5052 	  my_bool error=0;
5053 	  if (block_info.block_len+ (uint) (block_info.filepos-pos) <
5054 	      share->base.min_block_length)
5055 	  {
5056 	    if (!searching)
5057 	      _ma_check_print_info(param,
5058                                    "Deleted block with impossible length %lu "
5059                                    "at %s",
5060                                    block_info.block_len,llstr(pos,llbuff));
5061 	    error=1;
5062 	  }
5063 	  else
5064 	  {
5065 	    if ((block_info.next_filepos != HA_OFFSET_ERROR &&
5066 		 block_info.next_filepos >=
5067 		 share->state.state.data_file_length) ||
5068 		(block_info.prev_filepos != HA_OFFSET_ERROR &&
5069 		 block_info.prev_filepos >=
5070                  share->state.state.data_file_length))
5071 	    {
5072 	      if (!searching)
5073 		_ma_check_print_info(param,
5074 				    "Delete link points outside datafile at "
5075                                      "%s",
5076                                      llstr(pos,llbuff));
5077 	      error=1;
5078 	    }
5079 	  }
5080 	  if (error)
5081 	  {
5082 	    if (found_record)
5083 	      goto try_next;
5084 	    searching=1;
5085 	    pos+= MARIA_DYN_ALIGN_SIZE;
5086 	    sort_param->start_recpos=pos;
5087 	    block_info.second_read=0;
5088 	    continue;
5089 	  }
5090 	}
5091 	else
5092 	{
5093 	  if (block_info.block_len+ (uint) (block_info.filepos-pos) <
5094 	      share->base.min_block_length ||
5095 	      block_info.block_len > (uint) share->base.max_pack_length+
5096 	      MARIA_SPLIT_LENGTH)
5097 	  {
5098 	    if (!searching)
5099 	      _ma_check_print_info(param,
5100                                    "Found block with impossible length %lu "
5101                                    "at %s; Skipped",
5102                                    block_info.block_len+
5103                                    (uint) (block_info.filepos-pos),
5104                                    llstr(pos,llbuff));
5105 	    if (found_record)
5106 	      goto try_next;
5107 	    searching=1;
5108 	    pos+= MARIA_DYN_ALIGN_SIZE;
5109 	    sort_param->start_recpos=pos;
5110 	    block_info.second_read=0;
5111 	    continue;
5112 	  }
5113 	}
5114 	if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
5115 	{
5116           if (!sort_param->fix_datafile && sort_param->master &&
5117               (b_type & BLOCK_DELETED))
5118 	  {
5119 	    share->state.state.empty+=block_info.block_len;
5120 	    share->state.state.del++;
5121 	    share->state.split++;
5122 	  }
5123 	  if (found_record)
5124 	    goto try_next;
5125 	  if (searching)
5126 	  {
5127 	    pos+=MARIA_DYN_ALIGN_SIZE;
5128 	    sort_param->start_recpos=pos;
5129 	  }
5130 	  else
5131 	    pos=block_info.filepos+block_info.block_len;
5132 	  block_info.second_read=0;
5133 	  continue;
5134 	}
5135 
5136 	if (!sort_param->fix_datafile && sort_param->master)
5137 	  share->state.split++;
5138 	if (! found_record++)
5139 	{
5140 	  sort_param->find_length=left_length=block_info.rec_len;
5141 	  sort_param->start_recpos=pos;
5142 	  if (!sort_param->fix_datafile)
5143 	    sort_param->current_filepos= sort_param->start_recpos;
5144 	  if (sort_param->fix_datafile && (param->testflag & T_EXTEND))
5145 	    sort_param->pos=block_info.filepos+1;
5146 	  else
5147 	    sort_param->pos=block_info.filepos+block_info.block_len;
5148 	  if (share->base.blobs)
5149 	  {
5150 	    if (_ma_alloc_buffer(&sort_param->rec_buff,
5151                                  &sort_param->rec_buff_size,
5152                                  block_info.rec_len +
5153                                  share->base.extra_rec_buff_size))
5154 
5155 	    {
5156 	      if (param->max_record_length >= block_info.rec_len)
5157 	      {
5158 		_ma_check_print_error(param,"Not enough memory for blob at %s "
5159                                       "(need %lu)",
5160 				     llstr(sort_param->start_recpos,llbuff),
5161 				     (ulong) block_info.rec_len);
5162 		DBUG_RETURN(1);
5163 	      }
5164 	      else
5165 	      {
5166 		_ma_check_print_info(param,"Not enough memory for blob at %s "
5167                                      "(need %lu); Row skipped",
5168 				    llstr(sort_param->start_recpos,llbuff),
5169 				    (ulong) block_info.rec_len);
5170 		goto try_next;
5171 	      }
5172 	    }
5173 	  }
5174           to= sort_param->rec_buff;
5175 	}
5176 	if (left_length < block_info.data_len || ! block_info.data_len)
5177 	{
5178 	  _ma_check_print_info(param,
5179 			      "Found block with too small length at %s; "
5180                                "Skipped",
5181                                llstr(sort_param->start_recpos,llbuff));
5182 	  goto try_next;
5183 	}
5184 	if (block_info.filepos + block_info.data_len >
5185 	    sort_param->read_cache.end_of_file)
5186 	{
5187 	  _ma_check_print_info(param,
5188 			      "Found block that points outside data file "
5189                                "at %s",
5190                                llstr(sort_param->start_recpos,llbuff));
5191 	  goto try_next;
5192 	}
5193         /*
5194           Copy information that is already read. Avoid accessing data
5195           below the cache start. This could happen if the header
5196           streched over the end of the previous buffer contents.
5197         */
5198         {
5199           uint header_len= (uint) (block_info.filepos - pos);
5200           uint prefetch_len= (MARIA_BLOCK_INFO_HEADER_LENGTH - header_len);
5201 
5202           if (prefetch_len > block_info.data_len)
5203             prefetch_len= block_info.data_len;
5204           if (prefetch_len)
5205           {
5206             memcpy(to, block_info.header + header_len, prefetch_len);
5207             block_info.filepos+= prefetch_len;
5208             block_info.data_len-= prefetch_len;
5209             left_length-= prefetch_len;
5210             to+= prefetch_len;
5211           }
5212         }
5213         if (block_info.data_len &&
5214             _ma_read_cache(info, &sort_param->read_cache,to,block_info.filepos,
5215                            block_info.data_len,
5216                            (found_record == 1 ? READING_NEXT : 0) |
5217                            parallel_flag))
5218 	{
5219 	  _ma_check_print_info(param,
5220 			      "Read error for block at: %s (error: %d); "
5221                                "Skipped",
5222 			      llstr(block_info.filepos,llbuff),my_errno);
5223 	  goto try_next;
5224 	}
5225 	left_length-=block_info.data_len;
5226 	to+=block_info.data_len;
5227 	pos=block_info.next_filepos;
5228 	if (pos == HA_OFFSET_ERROR && left_length)
5229 	{
5230 	  _ma_check_print_info(param,
5231                                "Wrong block with wrong total length "
5232                                "starting at %s",
5233 			      llstr(sort_param->start_recpos,llbuff));
5234 	  goto try_next;
5235 	}
5236 	if (pos + MARIA_BLOCK_INFO_HEADER_LENGTH >
5237             sort_param->read_cache.end_of_file)
5238 	{
5239 	  _ma_check_print_info(param,
5240                                "Found link that points at %s (outside data "
5241                                "file) at %s",
5242 			      llstr(pos,llbuff2),
5243 			      llstr(sort_param->start_recpos,llbuff));
5244 	  goto try_next;
5245 	}
5246       } while (left_length);
5247 
5248       if (_ma_rec_unpack(info,sort_param->record,sort_param->rec_buff,
5249 			 sort_param->find_length) != MY_FILE_ERROR)
5250       {
5251 	if (sort_param->read_cache.error < 0)
5252 	  DBUG_RETURN(1);
5253 	if (sort_param->calc_checksum)
5254 	  checksum= (share->calc_check_checksum)(info, sort_param->record);
5255 	if ((param->testflag & (T_EXTEND | T_REP)) || searching)
5256 	{
5257 	  if (_ma_rec_check(info, sort_param->record, sort_param->rec_buff,
5258                             sort_param->find_length,
5259                             (param->testflag & T_QUICK) &&
5260                             sort_param->calc_checksum &&
5261                             MY_TEST(share->calc_checksum), checksum))
5262 	  {
5263 	    _ma_check_print_info(param,"Found wrong packed record at %s",
5264 				llstr(sort_param->start_recpos,llbuff));
5265 	    goto try_next;
5266 	  }
5267 	}
5268 	if (sort_param->calc_checksum)
5269 	  param->glob_crc+= checksum;
5270 	DBUG_RETURN(0);
5271       }
5272       if (!searching)
5273         _ma_check_print_info(param,"Key %d - Found wrong stored record at %s",
5274                             sort_param->key+1,
5275                             llstr(sort_param->start_recpos,llbuff));
5276     try_next:
5277       pos=(sort_param->start_recpos+=MARIA_DYN_ALIGN_SIZE);
5278       searching=1;
5279     }
5280   }
5281   case COMPRESSED_RECORD:
5282     param->progress= sort_param->pos;
5283     for (searching=0 ;; searching=1, sort_param->pos++)
5284     {
5285       if (_ma_read_cache(info, &sort_param->read_cache, block_info.header,
5286 			 sort_param->pos,
5287 			 share->pack.ref_length,READING_NEXT))
5288 	DBUG_RETURN(-1);
5289       if (searching && ! sort_param->fix_datafile)
5290       {
5291 	param->error_printed=1;
5292         param->retry_repair=1;
5293         param->testflag|=T_RETRY_WITHOUT_QUICK;
5294         my_errno= HA_ERR_WRONG_IN_RECORD;
5295 	DBUG_RETURN(1);		/* Something wrong with data */
5296       }
5297       sort_param->start_recpos=sort_param->pos;
5298       if (_ma_pack_get_block_info(info, &sort_param->bit_buff, &block_info,
5299                                   &sort_param->rec_buff,
5300                                   &sort_param->rec_buff_size, -1,
5301                                   sort_param->pos))
5302 	DBUG_RETURN(-1);
5303       if (!block_info.rec_len &&
5304 	  sort_param->pos + MEMMAP_EXTRA_MARGIN ==
5305 	  sort_param->read_cache.end_of_file)
5306 	DBUG_RETURN(-1);
5307       if (block_info.rec_len < (uint) share->min_pack_length ||
5308 	  block_info.rec_len > (uint) share->max_pack_length)
5309       {
5310 	if (! searching)
5311 	  _ma_check_print_info(param,
5312                                "Found block with wrong recordlength: %lu "
5313                                "at %s\n",
5314                                block_info.rec_len,
5315                                llstr(sort_param->pos,llbuff));
5316 	continue;
5317       }
5318       if (_ma_read_cache(info, &sort_param->read_cache, sort_param->rec_buff,
5319 			 block_info.filepos, block_info.rec_len,
5320 			 READING_NEXT))
5321       {
5322 	if (! searching)
5323 	  _ma_check_print_info(param,"Couldn't read whole record from %s",
5324 			      llstr(sort_param->pos,llbuff));
5325 	continue;
5326       }
5327 #ifdef HAVE_valgrind
5328       bzero(sort_param->rec_buff + block_info.rec_len,
5329             share->base.extra_rec_buff_size);
5330 #endif
5331       if (_ma_pack_rec_unpack(info, &sort_param->bit_buff, sort_param->record,
5332                               sort_param->rec_buff, block_info.rec_len))
5333       {
5334 	if (! searching)
5335 	  _ma_check_print_info(param,"Found wrong record at %s",
5336 			      llstr(sort_param->pos,llbuff));
5337 	continue;
5338       }
5339       if (!sort_param->fix_datafile)
5340       {
5341 	sort_param->current_filepos= sort_param->pos;
5342         if (sort_param->master)
5343 	  share->state.split++;
5344       }
5345       sort_param->max_pos= (sort_param->pos=block_info.filepos+
5346                             block_info.rec_len);
5347       info->packed_length=block_info.rec_len;
5348 
5349       if (sort_param->calc_checksum)
5350       {
5351         info->cur_row.checksum= (*share->calc_check_checksum)(info,
5352                                                                 sort_param->
5353                                                                 record);
5354 	param->glob_crc+= info->cur_row.checksum;
5355       }
5356       DBUG_RETURN(0);
5357     }
5358   case NO_RECORD:
5359     DBUG_RETURN(1);                             /* Impossible */
5360   }
5361   DBUG_RETURN(1);                               /* Impossible */
5362 }
5363 
5364 
5365 /**
5366    @brief Write record to new file.
5367 
5368    @fn    _ma_sort_write_record()
5369    @param sort_param                Sort parameters.
5370 
5371    @note
5372    This is only called by a master thread if parallel repair is used.
5373 
5374    @return
5375    @retval  0   OK
5376                 sort_param->current_filepos points to inserted record for
5377                 block_records and to the place for the next record for
5378                 other row types.
5379                 sort_param->filepos points to end of file
5380   @retval   1   Error
5381 */
5382 
_ma_sort_write_record(MARIA_SORT_PARAM * sort_param)5383 int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param)
5384 {
5385   int flag;
5386   uint length;
5387   ulong block_length,reclength;
5388   uchar *from;
5389   uchar block_buff[8];
5390   MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5391   HA_CHECK *param= sort_info->param;
5392   MARIA_HA *info= sort_info->new_info;
5393   MARIA_SHARE *share= info->s;
5394   DBUG_ENTER("_ma_sort_write_record");
5395 
5396   if (sort_param->fix_datafile)
5397   {
5398     sort_param->current_filepos= sort_param->filepos;
5399     switch (sort_info->new_data_file_type) {
5400     case BLOCK_RECORD:
5401       if ((sort_param->current_filepos=
5402            (*share->write_record_init)(info, sort_param->record)) ==
5403           HA_OFFSET_ERROR)
5404       {
5405         _ma_check_print_error(param, "%d when writing to datafile", my_errno);
5406         DBUG_RETURN(1);
5407       }
5408       /* Pointer to end of file */
5409       sort_param->filepos= share->state.state.data_file_length;
5410       break;
5411     case STATIC_RECORD:
5412       if (my_b_write(&info->rec_cache,sort_param->record,
5413 		     share->base.pack_reclength))
5414       {
5415 	_ma_check_print_error(param,"%d when writing to datafile",my_errno);
5416 	DBUG_RETURN(1);
5417       }
5418       sort_param->filepos+=share->base.pack_reclength;
5419       share->state.split++;
5420       break;
5421     case DYNAMIC_RECORD:
5422       if (! info->blobs)
5423 	from=sort_param->rec_buff;
5424       else
5425       {
5426 	/* must be sure that local buffer is big enough */
5427 	reclength=share->base.pack_reclength+
5428 	  _ma_calc_total_blob_length(info,sort_param->record)+
5429 	  ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+
5430 	  MARIA_DYN_DELETE_BLOCK_HEADER;
5431 	if (sort_info->buff_length < reclength)
5432 	{
5433 	  if (!(sort_info->buff=my_realloc(sort_info->buff, (uint) reclength,
5434 					   MYF(MY_FREE_ON_ERROR |
5435 					       MY_ALLOW_ZERO_PTR))))
5436 	    DBUG_RETURN(1);
5437 	  sort_info->buff_length=reclength;
5438 	}
5439 	from= (uchar *) sort_info->buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER);
5440       }
5441       /* We can use info->checksum here as only one thread calls this */
5442       info->cur_row.checksum= (*share->calc_check_checksum)(info,
5443                                                               sort_param->
5444                                                               record);
5445       if (!(reclength= _ma_rec_pack(info,from,sort_param->record)))
5446       {
5447         _ma_check_print_error(param,"Got error %d when packing record",
5448                               my_errno);
5449         DBUG_RETURN(1);
5450       }
5451       flag=0;
5452 
5453       do
5454       {
5455         block_length= reclength + 3 + MY_TEST(reclength >= (65520 - 3));
5456 	if (block_length < share->base.min_block_length)
5457 	  block_length=share->base.min_block_length;
5458 	info->update|=HA_STATE_WRITE_AT_END;
5459 	block_length=MY_ALIGN(block_length,MARIA_DYN_ALIGN_SIZE);
5460 	if (block_length > MARIA_MAX_BLOCK_LENGTH)
5461 	  block_length=MARIA_MAX_BLOCK_LENGTH;
5462 	if (_ma_write_part_record(info,0L,block_length,
5463 				  sort_param->filepos+block_length,
5464 				  &from,&reclength,&flag))
5465 	{
5466 	  _ma_check_print_error(param,"%d when writing to datafile",my_errno);
5467 	  DBUG_RETURN(1);
5468 	}
5469 	sort_param->filepos+=block_length;
5470 	share->state.split++;
5471       } while (reclength);
5472       break;
5473     case COMPRESSED_RECORD:
5474       reclength=info->packed_length;
5475       length= _ma_save_pack_length((uint) share->pack.version, block_buff,
5476                                reclength);
5477       if (share->base.blobs)
5478 	length+= _ma_save_pack_length((uint) share->pack.version,
5479 	                          block_buff + length, info->blob_length);
5480       if (my_b_write(&info->rec_cache,block_buff,length) ||
5481 	  my_b_write(&info->rec_cache, sort_param->rec_buff, reclength))
5482       {
5483 	_ma_check_print_error(param,"%d when writing to datafile",my_errno);
5484 	DBUG_RETURN(1);
5485       }
5486       sort_param->filepos+=reclength+length;
5487       share->state.split++;
5488       break;
5489     case NO_RECORD:
5490       DBUG_RETURN(1);                           /* Impossible */
5491     }
5492   }
5493   if (sort_param->master)
5494   {
5495     share->state.state.records++;
5496     if ((param->testflag & T_WRITE_LOOP) &&
5497         (share->state.state.records % WRITE_COUNT) == 0)
5498     {
5499       char llbuff[22];
5500       printf("%s\r", llstr(share->state.state.records,llbuff));
5501       fflush(stdout);
5502     }
5503   }
5504   DBUG_RETURN(0);
5505 } /* _ma_sort_write_record */
5506 
5507 
5508 /* Compare two keys from _ma_create_index_by_sort */
5509 
sort_key_cmp(MARIA_SORT_PARAM * sort_param,const void * a,const void * b)5510 static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,
5511 			const void *b)
5512 {
5513   uint not_used[2];
5514   return (ha_key_cmp(sort_param->seg, *((uchar* const *) a),
5515                      *((uchar* const *) b),
5516 		     USE_WHOLE_KEY, SEARCH_SAME, not_used));
5517 } /* sort_key_cmp */
5518 
5519 
sort_key_write(MARIA_SORT_PARAM * sort_param,const uchar * a)5520 static int sort_key_write(MARIA_SORT_PARAM *sort_param, const uchar *a)
5521 {
5522   uint diff_pos[2];
5523   char llbuff[22],llbuff2[22];
5524   MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5525   HA_CHECK *param= sort_info->param;
5526   int cmp;
5527 
5528   if (sort_info->key_block->inited)
5529   {
5530     cmp= ha_key_cmp(sort_param->seg, sort_info->key_block->lastkey,
5531                     a, USE_WHOLE_KEY,
5532                     SEARCH_FIND | SEARCH_UPDATE | SEARCH_INSERT,
5533                     diff_pos);
5534     if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
5535       ha_key_cmp(sort_param->seg, sort_info->key_block->lastkey,
5536                  a, USE_WHOLE_KEY,
5537                  SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos);
5538     else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
5539     {
5540       diff_pos[0]= maria_collect_stats_nonulls_next(sort_param->seg,
5541                                                  sort_param->notnull,
5542                                                  sort_info->key_block->lastkey,
5543                                                  a);
5544     }
5545     sort_param->unique[diff_pos[0]-1]++;
5546   }
5547   else
5548   {
5549     cmp= -1;
5550     if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
5551       maria_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull,
5552                                         a);
5553   }
5554   if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0)
5555   {
5556     DBUG_EXECUTE("key", _ma_print_keydata(DBUG_FILE, sort_param->seg, a,
5557                                           USE_WHOLE_KEY););
5558     sort_info->dupp++;
5559     sort_info->info->cur_row.lastpos= get_record_for_key(sort_param->keyinfo,
5560                                                          a);
5561     if ((param->testflag & (T_CREATE_UNIQUE_BY_SORT | T_SUPPRESS_ERR_HANDLING))
5562         == T_CREATE_UNIQUE_BY_SORT)
5563       param->testflag|= T_SUPPRESS_ERR_HANDLING;
5564     _ma_check_print_warning(param,
5565 			   "Duplicate key %2u for record at %10s against "
5566                             "record at %10s",
5567                             sort_param->key + 1,
5568                             llstr(sort_info->info->cur_row.lastpos, llbuff),
5569                             llstr(get_record_for_key(sort_param->keyinfo,
5570                                                      sort_info->key_block->
5571                                                      lastkey),
5572                                   llbuff2));
5573     param->testflag|=T_RETRY_WITHOUT_QUICK;
5574     if (sort_info->param->testflag & T_VERBOSE)
5575       _ma_print_keydata(stdout,sort_param->seg, a, USE_WHOLE_KEY);
5576     return (sort_delete_record(sort_param));
5577   }
5578 #ifndef DBUG_OFF
5579   if (cmp > 0)
5580   {
5581     _ma_check_print_error(param,
5582 			 "Internal error: Keys are not in order from sort");
5583     return(1);
5584   }
5585 #endif
5586   return (sort_insert_key(sort_param, sort_info->key_block,
5587 			  a, HA_OFFSET_ERROR));
5588 } /* sort_key_write */
5589 
5590 
_ma_sort_ft_buf_flush(MARIA_SORT_PARAM * sort_param)5591 int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param)
5592 {
5593   MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5594   SORT_KEY_BLOCKS *key_block=sort_info->key_block;
5595   MARIA_SHARE *share=sort_info->info->s;
5596   uint val_off, val_len;
5597   int error;
5598   SORT_FT_BUF *maria_ft_buf=sort_info->ft_buf;
5599   uchar *from, *to;
5600 
5601   val_len=share->ft2_keyinfo.keylength;
5602   get_key_full_length_rdonly(val_off, maria_ft_buf->lastkey);
5603   to= maria_ft_buf->lastkey+val_off;
5604 
5605   if (maria_ft_buf->buf)
5606   {
5607     /* flushing first-level tree */
5608     error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey,
5609                            HA_OFFSET_ERROR);
5610     for (from=to+val_len;
5611          !error && from < maria_ft_buf->buf;
5612          from+= val_len)
5613     {
5614       memcpy(to, from, val_len);
5615       error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey,
5616                              HA_OFFSET_ERROR);
5617     }
5618     return error;
5619   }
5620   /* flushing second-level tree keyblocks */
5621   error=_ma_flush_pending_blocks(sort_param);
5622   /* updating lastkey with second-level tree info */
5623   ft_intXstore(maria_ft_buf->lastkey+val_off, -maria_ft_buf->count);
5624   _ma_dpointer(sort_info->info->s, maria_ft_buf->lastkey+val_off+HA_FT_WLEN,
5625       share->state.key_root[sort_param->key]);
5626   /* restoring first level tree data in sort_info/sort_param */
5627   sort_info->key_block=sort_info->key_block_end- sort_info->param->sort_key_blocks;
5628   sort_param->keyinfo=share->keyinfo+sort_param->key;
5629   share->state.key_root[sort_param->key]=HA_OFFSET_ERROR;
5630   /* writing lastkey in first-level tree */
5631   return error ? error :
5632                  sort_insert_key(sort_param,sort_info->key_block,
5633                                  maria_ft_buf->lastkey,HA_OFFSET_ERROR);
5634 }
5635 
5636 
sort_maria_ft_key_write(MARIA_SORT_PARAM * sort_param,const uchar * a)5637 static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
5638                                    const uchar *a)
5639 {
5640   uint a_len, val_off, val_len, error;
5641   MARIA_SORT_INFO *sort_info= sort_param->sort_info;
5642   SORT_FT_BUF *ft_buf= sort_info->ft_buf;
5643   SORT_KEY_BLOCKS *key_block= sort_info->key_block;
5644   MARIA_SHARE *share= sort_info->info->s;
5645 
5646   val_len=HA_FT_WLEN+share->rec_reflength;
5647   get_key_full_length_rdonly(a_len, a);
5648 
5649   if (!ft_buf)
5650   {
5651     /*
5652       use two-level tree only if key_reflength fits in rec_reflength place
5653       and row format is NOT static - for _ma_dpointer not to garble offsets
5654      */
5655     if ((share->base.key_reflength <=
5656          share->rec_reflength) &&
5657         (share->options &
5658           (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)))
5659       ft_buf= (SORT_FT_BUF *)my_malloc(sort_param->keyinfo->block_length +
5660                                        sizeof(SORT_FT_BUF), MYF(MY_WME));
5661 
5662     if (!ft_buf)
5663     {
5664       sort_param->key_write=sort_key_write;
5665       return sort_key_write(sort_param, a);
5666     }
5667     sort_info->ft_buf= ft_buf;
5668     goto word_init_ft_buf;              /* no need to duplicate the code */
5669   }
5670   get_key_full_length_rdonly(val_off, ft_buf->lastkey);
5671 
5672   if (ha_compare_text(sort_param->seg->charset,
5673                       a+1,a_len-1,
5674                       ft_buf->lastkey+1,val_off-1, 0)==0)
5675   {
5676     uchar *p;
5677     if (!ft_buf->buf)                   /* store in second-level tree */
5678     {
5679       ft_buf->count++;
5680       return sort_insert_key(sort_param,key_block,
5681                              a + a_len, HA_OFFSET_ERROR);
5682     }
5683 
5684     /* storing the key in the buffer. */
5685     memcpy (ft_buf->buf, (const char *)a+a_len, val_len);
5686     ft_buf->buf+=val_len;
5687     if (ft_buf->buf < ft_buf->end)
5688       return 0;
5689 
5690     /* converting to two-level tree */
5691     p=ft_buf->lastkey+val_off;
5692 
5693     while (key_block->inited)
5694       key_block++;
5695     sort_info->key_block=key_block;
5696     sort_param->keyinfo= &share->ft2_keyinfo;
5697     ft_buf->count=(uint)(ft_buf->buf - p)/val_len;
5698 
5699     /* flushing buffer to second-level tree */
5700     for (error=0; !error && p < ft_buf->buf; p+= val_len)
5701       error=sort_insert_key(sort_param,key_block,p,HA_OFFSET_ERROR);
5702     ft_buf->buf=0;
5703     return error;
5704   }
5705 
5706   /* flushing buffer */
5707   if ((error=_ma_sort_ft_buf_flush(sort_param)))
5708     return error;
5709 
5710 word_init_ft_buf:
5711   a_len+=val_len;
5712   memcpy(ft_buf->lastkey, a, a_len);
5713   ft_buf->buf=ft_buf->lastkey+a_len;
5714   /*
5715     32 is just a safety margin here
5716     (at least MY_MAX(val_len, sizeof(nod_flag)) should be there).
5717     May be better performance could be achieved if we'd put
5718       (sort_info->keyinfo->block_length-32)/XXX
5719       instead.
5720         TODO: benchmark the best value for XXX.
5721   */
5722   ft_buf->end= ft_buf->lastkey+ (sort_param->keyinfo->block_length-32);
5723   return 0;
5724 } /* sort_maria_ft_key_write */
5725 
5726 
5727 /* get pointer to record from a key */
5728 
get_record_for_key(MARIA_KEYDEF * keyinfo,const uchar * key_data)5729 static my_off_t get_record_for_key(MARIA_KEYDEF *keyinfo,
5730 				   const uchar *key_data)
5731 {
5732   MARIA_KEY key;
5733   key.keyinfo= keyinfo;
5734   key.data= (uchar*) key_data;
5735   key.data_length= (_ma_keylength(keyinfo, key_data) -
5736                     keyinfo->share->rec_reflength);
5737   return _ma_row_pos_from_key(&key);
5738 } /* get_record_for_key */
5739 
5740 
5741 /* Insert a key in sort-key-blocks */
5742 
sort_insert_key(MARIA_SORT_PARAM * sort_param,register SORT_KEY_BLOCKS * key_block,const uchar * key,my_off_t prev_block)5743 static int sort_insert_key(MARIA_SORT_PARAM *sort_param,
5744 			   register SORT_KEY_BLOCKS *key_block,
5745                            const uchar *key,
5746 			   my_off_t prev_block)
5747 {
5748   uint a_length,t_length,nod_flag;
5749   my_off_t filepos;
5750   uchar *anc_buff,*lastkey;
5751   MARIA_KEY_PARAM s_temp;
5752   MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
5753   MARIA_SORT_INFO *sort_info= sort_param->sort_info;
5754   HA_CHECK *param=sort_info->param;
5755   MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
5756   MARIA_KEY tmp_key;
5757   MARIA_HA *info= sort_info->info;
5758   MARIA_SHARE *share= info->s;
5759   DBUG_ENTER("sort_insert_key");
5760 
5761   anc_buff= key_block->buff;
5762   lastkey=key_block->lastkey;
5763   nod_flag= (key_block == sort_info->key_block ? 0 :
5764 	     share->base.key_reflength);
5765 
5766   if (!key_block->inited)
5767   {
5768     key_block->inited=1;
5769     if (key_block == sort_info->key_block_end)
5770     {
5771       _ma_check_print_error(param,
5772                             "To many key-block-levels; "
5773                             "Try increasing sort_key_blocks");
5774       DBUG_RETURN(1);
5775     }
5776     a_length= share->keypage_header + nod_flag;
5777     key_block->end_pos= anc_buff + share->keypage_header;
5778     bzero(anc_buff, share->keypage_header);
5779     _ma_store_keynr(share, anc_buff, sort_param->keyinfo->key_nr);
5780     lastkey=0;					/* No previous key in block */
5781   }
5782   else
5783     a_length= _ma_get_page_used(share, anc_buff);
5784 
5785 	/* Save pointer to previous block */
5786   if (nod_flag)
5787   {
5788     _ma_store_keypage_flag(share, anc_buff, KEYPAGE_FLAG_ISNOD);
5789     _ma_kpointer(info,key_block->end_pos,prev_block);
5790   }
5791 
5792   tmp_key.keyinfo= keyinfo;
5793   tmp_key.data= (uchar*) key;
5794   tmp_key.data_length= _ma_keylength(keyinfo, key) - share->rec_reflength;
5795   tmp_key.ref_length=  share->rec_reflength;
5796 
5797   t_length= (*keyinfo->pack_key)(&tmp_key, nod_flag,
5798                                  (uchar*) 0, lastkey, lastkey, &s_temp);
5799   (*keyinfo->store_key)(keyinfo, key_block->end_pos+nod_flag,&s_temp);
5800   a_length+=t_length;
5801   _ma_store_page_used(share, anc_buff, a_length);
5802   key_block->end_pos+=t_length;
5803   if (a_length <= share->max_index_block_size)
5804   {
5805     MARIA_KEY tmp_key2;
5806     tmp_key2.data= key_block->lastkey;
5807     _ma_copy_key(&tmp_key2, &tmp_key);
5808     key_block->last_length=a_length-t_length;
5809     DBUG_RETURN(0);
5810   }
5811 
5812   /* Fill block with end-zero and write filled block */
5813   _ma_store_page_used(share, anc_buff, key_block->last_length);
5814   bzero(anc_buff+key_block->last_length,
5815 	keyinfo->block_length- key_block->last_length);
5816   if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) == HA_OFFSET_ERROR)
5817     DBUG_RETURN(1);
5818   _ma_fast_unlock_key_del(info);
5819 
5820   /* If we read the page from the key cache, we have to write it back to it */
5821   if (page_link->changed)
5822   {
5823     MARIA_PAGE page;
5824     pop_dynamic(&info->pinned_pages);
5825     _ma_page_setup(&page, info, keyinfo, filepos, anc_buff);
5826     if (_ma_write_keypage(&page, PAGECACHE_LOCK_WRITE_UNLOCK, DFLT_INIT_HITS))
5827       DBUG_RETURN(1);
5828   }
5829   else
5830   {
5831     if (write_page(share, share->kfile.file, anc_buff,
5832                    keyinfo->block_length, filepos, param->myf_rw))
5833       DBUG_RETURN(1);
5834   }
5835   DBUG_DUMP("buff", anc_buff, _ma_get_page_used(share, anc_buff));
5836 
5837 	/* Write separator-key to block in next level */
5838   if (sort_insert_key(sort_param,key_block+1,key_block->lastkey,filepos))
5839     DBUG_RETURN(1);
5840 
5841 	/* clear old block and write new key in it */
5842   key_block->inited=0;
5843   DBUG_RETURN(sort_insert_key(sort_param, key_block,key,prev_block));
5844 } /* sort_insert_key */
5845 
5846 
5847 /* Delete record when we found a duplicated key */
5848 
sort_delete_record(MARIA_SORT_PARAM * sort_param)5849 static int sort_delete_record(MARIA_SORT_PARAM *sort_param)
5850 {
5851   uint i;
5852   int old_file,error;
5853   uchar *key;
5854   MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5855   HA_CHECK *param=sort_info->param;
5856   MARIA_HA *row_info= sort_info->new_info, *key_info= sort_info->info;
5857   DBUG_ENTER("sort_delete_record");
5858 
5859   if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
5860   {
5861     _ma_check_print_error(param,
5862 			 "Quick-recover aborted; Run recovery without switch "
5863                           "-q or with switch -qq");
5864     DBUG_RETURN(1);
5865   }
5866   if (key_info->s->options & HA_OPTION_COMPRESS_RECORD)
5867   {
5868     _ma_check_print_error(param,
5869                           "Recover aborted; Can't run standard recovery on "
5870                           "compressed tables with errors in data-file. "
5871                           "Use 'aria_chk --safe-recover' to fix it");
5872     DBUG_RETURN(1);
5873   }
5874 
5875   old_file= row_info->dfile.file;
5876   /* This only affects static and dynamic row formats */
5877   row_info->dfile.file= row_info->rec_cache.file;
5878   if (flush_io_cache(&row_info->rec_cache))
5879     DBUG_RETURN(1);
5880 
5881   key= key_info->lastkey_buff + key_info->s->base.max_key_length;
5882   if ((error=(*row_info->s->read_record)(row_info, sort_param->record,
5883                                          key_info->cur_row.lastpos)) &&
5884 	error != HA_ERR_RECORD_DELETED)
5885   {
5886     _ma_check_print_error(param,"Can't read record to be removed");
5887     row_info->dfile.file= old_file;
5888     DBUG_RETURN(1);
5889   }
5890   row_info->cur_row.lastpos= key_info->cur_row.lastpos;
5891 
5892   for (i=0 ; i < sort_info->current_key ; i++)
5893   {
5894     MARIA_KEY tmp_key;
5895     (*key_info->s->keyinfo[i].make_key)(key_info, &tmp_key, i, key,
5896                                         sort_param->record,
5897                                         key_info->cur_row.lastpos, 0);
5898     if (_ma_ck_delete(key_info, &tmp_key))
5899     {
5900       _ma_check_print_error(param,
5901                             "Can't delete key %d from record to be removed",
5902                             i+1);
5903       row_info->dfile.file= old_file;
5904       DBUG_RETURN(1);
5905     }
5906   }
5907   if (sort_param->calc_checksum)
5908     param->glob_crc-=(*key_info->s->calc_check_checksum)(key_info,
5909                                                          sort_param->record);
5910   error= (*row_info->s->delete_record)(row_info, sort_param->record);
5911   if (error)
5912     _ma_check_print_error(param,"Got error %d when deleting record",
5913                           my_errno);
5914   row_info->dfile.file= old_file;           /* restore actual value */
5915   row_info->s->state.state.records--;
5916   DBUG_RETURN(error);
5917 } /* sort_delete_record */
5918 
5919 
5920 /* Fix all pending blocks and flush everything to disk */
5921 
_ma_flush_pending_blocks(MARIA_SORT_PARAM * sort_param)5922 int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param)
5923 {
5924   uint nod_flag,length;
5925   my_off_t filepos;
5926   SORT_KEY_BLOCKS *key_block;
5927   MARIA_SORT_INFO *sort_info= sort_param->sort_info;
5928   myf myf_rw=sort_info->param->myf_rw;
5929   MARIA_HA *info=sort_info->info;
5930   MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
5931   MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
5932   DBUG_ENTER("_ma_flush_pending_blocks");
5933 
5934   filepos= HA_OFFSET_ERROR;			/* if empty file */
5935   nod_flag=0;
5936   for (key_block=sort_info->key_block ; key_block->inited ; key_block++)
5937   {
5938     key_block->inited=0;
5939     length= _ma_get_page_used(info->s, key_block->buff);
5940     if (nod_flag)
5941       _ma_kpointer(info,key_block->end_pos,filepos);
5942     bzero(key_block->buff+length, keyinfo->block_length-length);
5943     if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) ==
5944         HA_OFFSET_ERROR)
5945       goto err;
5946 
5947     /* If we read the page from the key cache, we have to write it back */
5948     if (page_link->changed)
5949     {
5950       MARIA_PAGE page;
5951       pop_dynamic(&info->pinned_pages);
5952 
5953       _ma_page_setup(&page, info, keyinfo, filepos, key_block->buff);
5954       if (_ma_write_keypage(&page, PAGECACHE_LOCK_WRITE_UNLOCK,
5955                             DFLT_INIT_HITS))
5956 	goto err;
5957     }
5958     else
5959     {
5960       if (write_page(info->s, info->s->kfile.file, key_block->buff,
5961                      keyinfo->block_length, filepos, myf_rw))
5962         goto err;
5963     }
5964     DBUG_DUMP("buff",key_block->buff,length);
5965     nod_flag=1;
5966   }
5967   info->s->state.key_root[sort_param->key]=filepos; /* Last is root for tree */
5968   _ma_fast_unlock_key_del(info);
5969   DBUG_RETURN(0);
5970 
5971 err:
5972   _ma_fast_unlock_key_del(info);
5973   DBUG_RETURN(1);
5974 } /* _ma_flush_pending_blocks */
5975 
5976 	/* alloc space and pointers for key_blocks */
5977 
alloc_key_blocks(HA_CHECK * param,uint blocks,uint buffer_length)5978 static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
5979                                          uint buffer_length)
5980 {
5981   reg1 uint i;
5982   SORT_KEY_BLOCKS *block;
5983   DBUG_ENTER("alloc_key_blocks");
5984 
5985   if (!(block= (SORT_KEY_BLOCKS*) my_malloc((sizeof(SORT_KEY_BLOCKS)+
5986                                              buffer_length+IO_SIZE)*blocks,
5987                                             MYF(0))))
5988   {
5989     _ma_check_print_error(param,"Not enough memory for sort-key-blocks");
5990     return(0);
5991   }
5992   for (i=0 ; i < blocks ; i++)
5993   {
5994     block[i].inited=0;
5995     block[i].buff= (uchar*) (block+blocks)+(buffer_length+IO_SIZE)*i;
5996   }
5997   DBUG_RETURN(block);
5998 } /* alloc_key_blocks */
5999 
6000 
6001 	/* Check if file is almost full */
6002 
maria_test_if_almost_full(MARIA_HA * info)6003 int maria_test_if_almost_full(MARIA_HA *info)
6004 {
6005   MARIA_SHARE *share= info->s;
6006 
6007   if (share->options & HA_OPTION_COMPRESS_RECORD)
6008     return 0;
6009   return mysql_file_seek(share->kfile.file, 0L, MY_SEEK_END,
6010                  MYF(MY_THREADSAFE))/10*9 >
6011     (my_off_t) share->base.max_key_file_length ||
6012     mysql_file_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0)) / 10 * 9 >
6013     (my_off_t) share->base.max_data_file_length;
6014 }
6015 
6016 
6017 /* Recreate table with bigger more alloced record-data */
6018 
maria_recreate_table(HA_CHECK * param,MARIA_HA ** org_info,char * filename)6019 int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename)
6020 {
6021   int error;
6022   MARIA_HA info;
6023   MARIA_SHARE share;
6024   MARIA_KEYDEF *keyinfo,*key,*key_end;
6025   HA_KEYSEG *keysegs,*keyseg;
6026   MARIA_COLUMNDEF *columndef,*column,*end;
6027   MARIA_UNIQUEDEF *uniquedef,*u_ptr,*u_end;
6028   MARIA_STATUS_INFO status_info;
6029   uint unpack,key_parts;
6030   ha_rows max_records;
6031   ulonglong file_length,tmp_length;
6032   MARIA_CREATE_INFO create_info;
6033   DBUG_ENTER("maria_recreate_table");
6034 
6035   if ((!(param->testflag & T_SILENT)))
6036     printf("Recreating table '%s'\n", param->isam_file_name);
6037 
6038   error=1;					/* Default error */
6039   info= **org_info;
6040   status_info= (*org_info)->state[0];
6041   info.state= &status_info;
6042   share= *(*org_info)->s;
6043   unpack= ((share.data_file_type == COMPRESSED_RECORD) &&
6044            (param->testflag & T_UNPACK));
6045   if (!(keyinfo=(MARIA_KEYDEF*) my_alloca(sizeof(MARIA_KEYDEF) *
6046                                           share.base.keys)))
6047     DBUG_RETURN(0);
6048   memcpy((uchar*) keyinfo,(uchar*) share.keyinfo,
6049 	 (size_t) (sizeof(MARIA_KEYDEF)*share.base.keys));
6050 
6051   key_parts= share.base.all_key_parts;
6052   if (!(keysegs=(HA_KEYSEG*) my_alloca(sizeof(HA_KEYSEG)*
6053 				       (key_parts+share.base.keys))))
6054   {
6055     my_afree(keyinfo);
6056     DBUG_RETURN(1);
6057   }
6058   if (!(columndef=(MARIA_COLUMNDEF*)
6059 	my_alloca(sizeof(MARIA_COLUMNDEF)*(share.base.fields+1))))
6060   {
6061     my_afree(keyinfo);
6062     my_afree(keysegs);
6063     DBUG_RETURN(1);
6064   }
6065   if (!(uniquedef=(MARIA_UNIQUEDEF*)
6066 	my_alloca(sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques+1))))
6067   {
6068     my_afree(columndef);
6069     my_afree(keyinfo);
6070     my_afree(keysegs);
6071     DBUG_RETURN(1);
6072   }
6073 
6074   /* Copy the column definitions in their original order */
6075   for (column= share.columndef, end= share.columndef+share.base.fields;
6076        column != end ;
6077        column++)
6078     columndef[column->column_nr]= *column;
6079 
6080   /* Change the new key to point at the saved key segments */
6081   memcpy((uchar*) keysegs,(uchar*) share.keyparts,
6082 	 (size_t) (sizeof(HA_KEYSEG)*(key_parts+share.base.keys+
6083 				      share.state.header.uniques)));
6084   keyseg=keysegs;
6085   for (key=keyinfo,key_end=keyinfo+share.base.keys; key != key_end ; key++)
6086   {
6087     key->seg=keyseg;
6088     for (; keyseg->type ; keyseg++)
6089     {
6090       if (param->language)
6091 	keyseg->language=param->language;	/* change language */
6092     }
6093     keyseg++;					/* Skip end pointer */
6094   }
6095 
6096   /*
6097     Copy the unique definitions and change them to point at the new key
6098     segments
6099   */
6100   memcpy((uchar*) uniquedef,(uchar*) share.uniqueinfo,
6101 	 (size_t) (sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques)));
6102   for (u_ptr=uniquedef,u_end=uniquedef+share.state.header.uniques;
6103        u_ptr != u_end ; u_ptr++)
6104   {
6105     u_ptr->seg=keyseg;
6106     keyseg+=u_ptr->keysegs+1;
6107   }
6108 
6109   file_length=(ulonglong) mysql_file_seek(info.dfile.file, 0L, MY_SEEK_END, MYF(0));
6110   if (share.options & HA_OPTION_COMPRESS_RECORD)
6111     share.base.records=max_records=info.state->records;
6112   else if (share.base.min_pack_length)
6113     max_records=(ha_rows) (file_length / share.base.min_pack_length);
6114   else
6115     max_records=0;
6116   share.options&= ~HA_OPTION_TEMP_COMPRESS_RECORD;
6117 
6118   tmp_length= file_length+file_length/10;
6119   set_if_bigger(file_length,param->max_data_file_length);
6120   set_if_bigger(file_length,tmp_length);
6121   set_if_bigger(file_length,(ulonglong) share.base.max_data_file_length);
6122 
6123   maria_close(*org_info);
6124 
6125   bzero((char*) &create_info,sizeof(create_info));
6126   create_info.max_rows=MY_MAX(max_records,share.base.records);
6127   create_info.reloc_rows=share.base.reloc;
6128   create_info.old_options=(share.options |
6129 			   (unpack ? HA_OPTION_TEMP_COMPRESS_RECORD : 0));
6130 
6131   create_info.data_file_length=file_length;
6132   create_info.auto_increment=share.state.auto_increment;
6133   create_info.language = (param->language ? param->language :
6134 			  share.base.language);
6135   create_info.key_file_length=  status_info.key_file_length;
6136   create_info.org_data_file_type= ((enum data_file_type)
6137                                    share.state.header.org_data_file_type);
6138 
6139   /*
6140     Allow for creating an auto_increment key. This has an effect only if
6141     an auto_increment key exists in the original table.
6142   */
6143   create_info.with_auto_increment= TRUE;
6144   create_info.null_bytes= share.base.null_bytes;
6145   create_info.transactional= share.base.born_transactional;
6146 
6147   /*
6148     We don't have to handle symlinks here because we are using
6149     HA_DONT_TOUCH_DATA
6150   */
6151   if (maria_create(filename, share.data_file_type,
6152                    share.base.keys - share.state.header.uniques,
6153                    keyinfo, share.base.fields, columndef,
6154                    share.state.header.uniques, uniquedef,
6155                    &create_info,
6156                    HA_DONT_TOUCH_DATA))
6157   {
6158     _ma_check_print_error(param,
6159                           "Got error %d when trying to recreate indexfile",
6160                           my_errno);
6161     goto end;
6162   }
6163   *org_info= maria_open(filename,O_RDWR,
6164                         (HA_OPEN_FOR_REPAIR |
6165                          ((param->testflag & T_WAIT_FOREVER) ?
6166                           HA_OPEN_WAIT_IF_LOCKED :
6167                           (param->testflag & T_DESCRIPT) ?
6168                           HA_OPEN_IGNORE_IF_LOCKED :
6169                           HA_OPEN_ABORT_IF_LOCKED)));
6170   if (!*org_info)
6171   {
6172     _ma_check_print_error(param,
6173                           "Got error %d when trying to open re-created "
6174                           "indexfile", my_errno);
6175     goto end;
6176   }
6177   /* We are modifing */
6178   (*org_info)->s->options&= ~HA_OPTION_READ_ONLY_DATA;
6179   _ma_readinfo(*org_info,F_WRLCK,0);
6180   (*org_info)->s->state.state.records= info.state->records;
6181   if (share.state.create_time)
6182     (*org_info)->s->state.create_time=share.state.create_time;
6183 #ifdef MARIA_EXTERNAL_LOCKING
6184   (*org_info)->s->state.unique= (*org_info)->this_unique= share.state.unique;
6185 #endif
6186   (*org_info)->s->state.state.checksum= info.state->checksum;
6187   (*org_info)->s->state.state.del= info.state->del;
6188   (*org_info)->s->state.dellink= share.state.dellink;
6189   (*org_info)->s->state.state.empty= info.state->empty;
6190   (*org_info)->s->state.state.data_file_length= info.state->data_file_length;
6191   *(*org_info)->state= (*org_info)->s->state.state;
6192   if (maria_update_state_info(param,*org_info,UPDATE_TIME | UPDATE_STAT |
6193                               UPDATE_OPEN_COUNT))
6194     goto end;
6195   error=0;
6196 end:
6197   my_afree(uniquedef);
6198   my_afree(keyinfo);
6199   my_afree(columndef);
6200   my_afree(keysegs);
6201   DBUG_RETURN(error);
6202 }
6203 
6204 
6205 /* Write suffix to data file if needed */
6206 
maria_write_data_suffix(MARIA_SORT_INFO * sort_info,my_bool fix_datafile)6207 int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile)
6208 {
6209   MARIA_HA *info=sort_info->new_info;
6210 
6211   if (info->s->data_file_type == COMPRESSED_RECORD && fix_datafile)
6212   {
6213     uchar buff[MEMMAP_EXTRA_MARGIN];
6214     bzero(buff,sizeof(buff));
6215     if (my_b_write(&info->rec_cache,buff,sizeof(buff)))
6216     {
6217       _ma_check_print_error(sort_info->param,
6218 			   "%d when writing to datafile",my_errno);
6219       return 1;
6220     }
6221     sort_info->param->read_cache.end_of_file+=sizeof(buff);
6222   }
6223   return 0;
6224 }
6225 
6226 
6227 /* Update state and maria_chk time of indexfile */
6228 
maria_update_state_info(HA_CHECK * param,MARIA_HA * info,uint update)6229 int maria_update_state_info(HA_CHECK *param, MARIA_HA *info,uint update)
6230 {
6231   MARIA_SHARE *share= info->s;
6232   DBUG_ENTER("maria_update_state_info");
6233 
6234   if (update & UPDATE_OPEN_COUNT)
6235   {
6236     share->state.open_count=0;
6237     share->global_changed=0;
6238     share->changed= 1;
6239   }
6240   if (update & UPDATE_STAT)
6241   {
6242     uint i, key_parts= mi_uint2korr(share->state.header.key_parts);
6243     share->state.records_at_analyze= share->state.state.records;
6244     share->state.changed&= ~STATE_NOT_ANALYZED;
6245     if (share->state.state.records)
6246     {
6247       for (i=0; i<key_parts; i++)
6248       {
6249         if (!(share->state.rec_per_key_part[i]=param->new_rec_per_key_part[i]))
6250           share->state.changed|= STATE_NOT_ANALYZED;
6251       }
6252     }
6253   }
6254   if (update & (UPDATE_STAT | UPDATE_SORT | UPDATE_TIME | UPDATE_AUTO_INC))
6255   {
6256     if (update & UPDATE_TIME)
6257     {
6258       share->state.check_time= time((time_t*) 0);
6259       if (!share->state.create_time)
6260 	share->state.create_time= share->state.check_time;
6261     }
6262     if (_ma_state_info_write(share,
6263                              MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
6264                              MA_STATE_INFO_WRITE_FULL_INFO))
6265       goto err;
6266   }
6267   {						/* Force update of status */
6268     int error;
6269     uint r_locks=share->r_locks,w_locks=share->w_locks;
6270     share->r_locks= share->w_locks= share->tot_locks= 0;
6271     error= _ma_writeinfo(info,WRITEINFO_NO_UNLOCK);
6272     share->r_locks=r_locks;
6273     share->w_locks=w_locks;
6274     share->tot_locks=r_locks+w_locks;
6275     if (!error)
6276       DBUG_RETURN(0);
6277   }
6278 err:
6279   _ma_check_print_error(param,"%d when updating keyfile",my_errno);
6280   DBUG_RETURN(1);
6281 }
6282 
6283 /*
6284   Update auto increment value for a table
6285   When setting the 'repair_only' flag we only want to change the
6286   old auto_increment value if its wrong (smaller than some given key).
6287   The reason is that we shouldn't change the auto_increment value
6288   for a table without good reason when only doing a repair; If the
6289   user have inserted and deleted rows, the auto_increment value
6290   may be bigger than the biggest current row and this is ok.
6291 
6292   If repair_only is not set, we will update the flag to the value in
6293   param->auto_increment is bigger than the biggest key.
6294 */
6295 
_ma_update_auto_increment_key(HA_CHECK * param,MARIA_HA * info,my_bool repair_only)6296 void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info,
6297                                    my_bool repair_only)
6298 {
6299   MARIA_SHARE *share= info->s;
6300   uchar *record;
6301   DBUG_ENTER("update_auto_increment_key");
6302 
6303   if (!share->base.auto_key ||
6304       ! maria_is_key_active(share->state.key_map, share->base.auto_key - 1))
6305   {
6306     if (!(param->testflag & T_VERY_SILENT))
6307       _ma_check_print_info(param,
6308 			  "Table: %s doesn't have an auto increment key\n",
6309 			  param->isam_file_name);
6310     DBUG_VOID_RETURN;
6311   }
6312   if (!(param->testflag & T_SILENT) &&
6313       !(param->testflag & T_REP))
6314     printf("Updating Aria file: %s\n", param->isam_file_name);
6315   /*
6316     We have to use an allocated buffer instead of info->rec_buff as
6317     _ma_put_key_in_record() may use info->rec_buff
6318   */
6319   if (!(record= (uchar*) my_malloc((size_t) share->base.default_rec_buff_size,
6320                                    MYF(0))))
6321   {
6322     _ma_check_print_error(param,"Not enough memory for extra record");
6323     DBUG_VOID_RETURN;
6324   }
6325 
6326   maria_extra(info,HA_EXTRA_KEYREAD,0);
6327   if (maria_rlast(info, record, share->base.auto_key-1))
6328   {
6329     if (my_errno != HA_ERR_END_OF_FILE)
6330     {
6331       maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
6332       my_free(record);
6333       _ma_check_print_error(param,"%d when reading last record",my_errno);
6334       DBUG_VOID_RETURN;
6335     }
6336     if (!repair_only)
6337       share->state.auto_increment=param->auto_increment_value;
6338   }
6339   else
6340   {
6341     const HA_KEYSEG *keyseg= share->keyinfo[share->base.auto_key-1].seg;
6342     ulonglong auto_increment=
6343       ma_retrieve_auto_increment(record + keyseg->start, keyseg->type);
6344     set_if_bigger(share->state.auto_increment,auto_increment);
6345     if (!repair_only)
6346       set_if_bigger(share->state.auto_increment, param->auto_increment_value);
6347   }
6348   maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
6349   my_free(record);
6350   maria_update_state_info(param, info, UPDATE_AUTO_INC);
6351   DBUG_VOID_RETURN;
6352 }
6353 
6354 
6355 /*
6356   Update statistics for each part of an index
6357 
6358   SYNOPSIS
6359     maria_update_key_parts()
6360       keyinfo           IN  Index information (only key->keysegs used)
6361       rec_per_key_part  OUT Store statistics here
6362       unique            IN  Array of (#distinct tuples)
6363       notnull_tuples    IN  Array of (#tuples), or NULL
6364       records               Number of records in the table
6365 
6366   DESCRIPTION
6367     This function is called produce index statistics values from unique and
6368     notnull_tuples arrays after these arrays were produced with sequential
6369     index scan (the scan is done in two places: chk_index() and
6370     sort_key_write()).
6371 
6372     This function handles all 3 index statistics collection methods.
6373 
6374     Unique is an array:
6375       unique[0]= (#different values of {keypart1}) - 1
6376       unique[1]= (#different values of {keypart1,keypart2} tuple)-unique[0]-1
6377       ...
6378 
6379     For MI_STATS_METHOD_IGNORE_NULLS method, notnull_tuples is an array too:
6380       notnull_tuples[0]= (#of {keypart1} tuples such that keypart1 is not NULL)
6381       notnull_tuples[1]= (#of {keypart1,keypart2} tuples such that all
6382                           keypart{i} are not NULL)
6383       ...
6384     For all other statistics collection methods notnull_tuples==NULL.
6385 
6386     Output is an array:
6387     rec_per_key_part[k] =
6388      = E(#records in the table such that keypart_1=c_1 AND ... AND
6389          keypart_k=c_k for arbitrary constants c_1 ... c_k)
6390 
6391      = {assuming that values have uniform distribution and index contains all
6392         tuples from the domain (or that {c_1, ..., c_k} tuple is choosen from
6393         index tuples}
6394 
6395      = #tuples-in-the-index / #distinct-tuples-in-the-index.
6396 
6397     The #tuples-in-the-index and #distinct-tuples-in-the-index have different
6398     meaning depending on which statistics collection method is used:
6399 
6400     MI_STATS_METHOD_*  how are nulls compared?  which tuples are counted?
6401      NULLS_EQUAL            NULL == NULL           all tuples in table
6402      NULLS_NOT_EQUAL        NULL != NULL           all tuples in table
6403      IGNORE_NULLS               n/a             tuples that don't have NULLs
6404 */
6405 
maria_update_key_parts(MARIA_KEYDEF * keyinfo,double * rec_per_key_part,ulonglong * unique,ulonglong * notnull,ulonglong records)6406 void maria_update_key_parts(MARIA_KEYDEF *keyinfo, double *rec_per_key_part,
6407                       ulonglong *unique, ulonglong *notnull,
6408                       ulonglong records)
6409 {
6410   ulonglong count=0, unique_tuples;
6411   ulonglong tuples= records;
6412   uint parts;
6413   double tmp;
6414   for (parts=0 ; parts < keyinfo->keysegs  ; parts++)
6415   {
6416     count+=unique[parts];
6417     unique_tuples= count + 1;
6418     if (notnull)
6419     {
6420       tuples= notnull[parts];
6421       /*
6422         #(unique_tuples not counting tuples with NULLs) =
6423           #(unique_tuples counting tuples with NULLs as different) -
6424           #(tuples with NULLs)
6425       */
6426       unique_tuples -= (records - notnull[parts]);
6427     }
6428 
6429     if (unique_tuples == 0)
6430       tmp= 1;
6431     else if (count == 0)
6432       tmp= ulonglong2double(tuples); /* 1 unique tuple */
6433     else
6434       tmp= ulonglong2double(tuples) / ulonglong2double(unique_tuples);
6435 
6436     /*
6437       for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
6438       let's ensure it is not
6439     */
6440     set_if_bigger(tmp,1);
6441 
6442     *rec_per_key_part++= tmp;
6443   }
6444 }
6445 
6446 
maria_byte_checksum(const uchar * buf,uint length)6447 static ha_checksum maria_byte_checksum(const uchar *buf, uint length)
6448 {
6449   ha_checksum crc;
6450   const uchar *end=buf+length;
6451   for (crc=0; buf != end; buf++)
6452     crc=((crc << 1) + *buf) +
6453       MY_TEST(crc & (((ha_checksum) 1) << (8 * sizeof(ha_checksum) - 1)));
6454   return crc;
6455 }
6456 
maria_too_big_key_for_sort(MARIA_KEYDEF * key,ha_rows rows)6457 static my_bool maria_too_big_key_for_sort(MARIA_KEYDEF *key, ha_rows rows)
6458 {
6459   uint key_maxlength=key->maxlength;
6460   if (key->flag & HA_FULLTEXT)
6461   {
6462     uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
6463                                   key->seg->charset->mbmaxlen;
6464     key_maxlength+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
6465   }
6466   return (key->flag & HA_SPATIAL) ||
6467           (key->flag & (HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY | HA_FULLTEXT) &&
6468 	  ((ulonglong) rows * key_maxlength >
6469 	   (ulonglong) maria_max_temp_length));
6470 }
6471 
6472 /*
6473   Deactivate all indexes that can be recreated fast.
6474   These include packed keys on which sorting will use more temporary
6475   space than the max allowed file length or for which the unpacked keys
6476   will take much more space than packed keys.
6477   Note that 'rows' may be zero for the case when we don't know how many
6478   rows we will put into the file.
6479  */
6480 
maria_disable_indexes_for_rebuild(MARIA_HA * info,ha_rows rows,my_bool all_keys)6481 void maria_disable_indexes_for_rebuild(MARIA_HA *info, ha_rows rows,
6482                                     my_bool all_keys)
6483 {
6484   MARIA_SHARE *share= info->s;
6485   MARIA_KEYDEF    *key=share->keyinfo;
6486   uint          i;
6487 
6488   DBUG_ASSERT(share->state.state.records == 0 &&
6489               (!rows || rows >= MARIA_MIN_ROWS_TO_DISABLE_INDEXES));
6490   for (i=0 ; i < share->base.keys ; i++,key++)
6491   {
6492     if (!(key->flag & (HA_SPATIAL | HA_AUTO_KEY | HA_RTREE_INDEX)) &&
6493         ! maria_too_big_key_for_sort(key,rows) && share->base.auto_key != i+1 &&
6494         (all_keys || !(key->flag & HA_NOSAME)))
6495     {
6496       maria_clear_key_active(share->state.key_map, i);
6497       info->update|= HA_STATE_CHANGED;
6498       info->create_unique_index_by_sort= all_keys;
6499     }
6500   }
6501 }
6502 
6503 
6504 /*
6505   Return TRUE if we can use repair by sorting
6506   One can set the force argument to force to use sorting
6507   even if the temporary file would be quite big!
6508 */
6509 
maria_test_if_sort_rep(MARIA_HA * info,ha_rows rows,ulonglong key_map,my_bool force)6510 my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows,
6511                                ulonglong key_map, my_bool force)
6512 {
6513   MARIA_SHARE *share= info->s;
6514   MARIA_KEYDEF *key=share->keyinfo;
6515   uint i;
6516 
6517   /*
6518     maria_repair_by_sort only works if we have at least one key. If we don't
6519     have any keys, we should use the normal repair.
6520   */
6521   if (! maria_is_any_key_active(key_map))
6522     return FALSE;				/* Can't use sort */
6523   for (i=0 ; i < share->base.keys ; i++,key++)
6524   {
6525     if (!force && maria_too_big_key_for_sort(key,rows))
6526       return FALSE;
6527   }
6528   return TRUE;
6529 }
6530 
6531 
6532 /**
6533    @brief Create a new handle for manipulation the new record file
6534 
6535    @note
6536    It's ok for Recovery to have two MARIA_SHARE on the same index file
6537    because the one we create here is not transactional
6538 */
6539 
create_new_data_handle(MARIA_SORT_PARAM * param,File new_file)6540 static my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file)
6541 {
6542 
6543   MARIA_SORT_INFO *sort_info= param->sort_info;
6544   MARIA_HA *info= sort_info->info;
6545   MARIA_HA *new_info;
6546   DBUG_ENTER("create_new_data_handle");
6547 
6548   if (!(sort_info->new_info= maria_open(info->s->open_file_name.str, O_RDWR,
6549                                         HA_OPEN_COPY | HA_OPEN_FOR_REPAIR |
6550                                         HA_OPEN_INTERNAL_TABLE)))
6551     DBUG_RETURN(1);
6552 
6553   new_info= sort_info->new_info;
6554   _ma_bitmap_set_pagecache_callbacks(&new_info->s->bitmap.file,
6555                                      new_info->s);
6556   _ma_set_data_pagecache_callbacks(&new_info->dfile, new_info->s);
6557   change_data_file_descriptor(new_info, new_file);
6558   maria_lock_database(new_info, F_EXTRA_LCK);
6559   if ((sort_info->param->testflag & T_UNPACK) &&
6560       info->s->data_file_type == COMPRESSED_RECORD)
6561   {
6562     (*new_info->s->once_end)(new_info->s);
6563     (*new_info->s->end)(new_info);
6564     restore_data_file_type(new_info->s);
6565     _ma_setup_functions(new_info->s);
6566     if ((*new_info->s->once_init)(new_info->s, new_file) ||
6567         (*new_info->s->init)(new_info))
6568       DBUG_RETURN(1);
6569   }
6570   _ma_reset_status(new_info);
6571   if (_ma_initialize_data_file(new_info->s, new_file))
6572     DBUG_RETURN(1);
6573 
6574   /* Take into account any bitmap page created above: */
6575   param->filepos= new_info->s->state.state.data_file_length;
6576 
6577   /* Use new virtual functions for key generation */
6578   info->s->keypos_to_recpos= new_info->s->keypos_to_recpos;
6579   info->s->recpos_to_keypos= new_info->s->recpos_to_keypos;
6580   DBUG_RETURN(0);
6581 }
6582 
6583 
6584 static void
set_data_file_type(MARIA_SORT_INFO * sort_info,MARIA_SHARE * share)6585 set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share)
6586 {
6587   if ((sort_info->new_data_file_type=share->data_file_type) ==
6588       COMPRESSED_RECORD && sort_info->param->testflag & T_UNPACK)
6589   {
6590     MARIA_SHARE tmp;
6591     sort_info->new_data_file_type= share->state.header.org_data_file_type;
6592     /* Set delete_function for sort_delete_record() */
6593     tmp= *share;
6594     tmp.state.header.data_file_type= tmp.state.header.org_data_file_type;
6595     tmp.options= ~HA_OPTION_COMPRESS_RECORD;
6596     _ma_setup_functions(&tmp);
6597     share->delete_record=tmp.delete_record;
6598   }
6599 }
6600 
restore_data_file_type(MARIA_SHARE * share)6601 static void restore_data_file_type(MARIA_SHARE *share)
6602 {
6603   MARIA_SHARE tmp_share;
6604   share->options&= ~HA_OPTION_COMPRESS_RECORD;
6605   mi_int2store(share->state.header.options,share->options);
6606   share->state.header.data_file_type=
6607     share->state.header.org_data_file_type;
6608   share->data_file_type= share->state.header.data_file_type;
6609   share->pack.header_length= 0;
6610 
6611   /* Use new virtual functions for key generation */
6612   tmp_share= *share;
6613   _ma_setup_functions(&tmp_share);
6614   share->keypos_to_recpos= tmp_share.keypos_to_recpos;
6615   share->recpos_to_keypos= tmp_share.recpos_to_keypos;
6616 }
6617 
6618 
change_data_file_descriptor(MARIA_HA * info,File new_file)6619 static void change_data_file_descriptor(MARIA_HA *info, File new_file)
6620 {
6621   mysql_file_close(info->dfile.file, MYF(MY_WME));
6622   info->dfile.file= info->s->bitmap.file.file= new_file;
6623   _ma_bitmap_reset_cache(info->s);
6624 }
6625 
6626 
6627 /**
6628    @brief Mark the data file to not be used
6629 
6630    @note
6631    This is used in repair when we want to ensure the handler will not
6632    write anything to the data file anymore
6633 */
6634 
unuse_data_file_descriptor(MARIA_HA * info)6635 static void unuse_data_file_descriptor(MARIA_HA *info)
6636 {
6637   (void) flush_pagecache_blocks(info->s->pagecache,
6638                                 &info->s->bitmap.file,
6639                                 FLUSH_IGNORE_CHANGED);
6640   info->dfile.file= info->s->bitmap.file.file= -1;
6641   _ma_bitmap_reset_cache(info->s);
6642 }
6643 
6644 
6645 /*
6646   Copy all states that has to do with the data file
6647 
6648   NOTES
6649     This is done to copy the state from the data file generated from
6650     repair to the original handler
6651 */
6652 
copy_data_file_state(MARIA_STATE_INFO * to,MARIA_STATE_INFO * from)6653 static void copy_data_file_state(MARIA_STATE_INFO *to,
6654                                  MARIA_STATE_INFO *from)
6655 {
6656   to->state.records=           from->state.records;
6657   to->state.del=               from->state.del;
6658   to->state.empty=             from->state.empty;
6659   to->state.data_file_length=  from->state.data_file_length;
6660   to->split=                   from->split;
6661   to->dellink=		       from->dellink;
6662   to->first_bitmap_with_space= from->first_bitmap_with_space;
6663 }
6664 
6665 
6666 /*
6667   Read 'safely' next record while scanning table.
6668 
6669   SYNOPSIS
6670     _ma_safe_scan_block_record()
6671     info                Maria handler
6672     record              Store found here
6673 
6674   NOTES
6675     - One must have called mi_scan() before this
6676 
6677     Differences compared to  _ma_scan_block_records() are:
6678     - We read all blocks, not only blocks marked by the bitmap to be safe
6679     - In case of errors, next read will read next record.
6680     - More sanity checks
6681 
6682   RETURN
6683     0   ok
6684     HA_ERR_END_OF_FILE  End of file
6685     #   error number
6686 */
6687 
6688 
_ma_safe_scan_block_record(MARIA_SORT_INFO * sort_info,MARIA_HA * info,uchar * record)6689 static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info,
6690                                       MARIA_HA *info, uchar *record)
6691 {
6692   MARIA_SHARE *share= info->s;
6693   MARIA_RECORD_POS record_pos= info->cur_row.nextpos;
6694   pgcache_page_no_t page= sort_info->page;
6695   DBUG_ENTER("_ma_safe_scan_block_record");
6696 
6697   for (;;)
6698   {
6699     /* Find next row in current page */
6700     if (likely(record_pos < info->scan.number_of_rows))
6701     {
6702       uint length, offset;
6703       uchar *data, *end_of_data;
6704       char llbuff[22];
6705 
6706       while (!(offset= uint2korr(info->scan.dir)))
6707       {
6708         info->scan.dir-= DIR_ENTRY_SIZE;
6709         record_pos++;
6710         if (info->scan.dir < info->scan.dir_end)
6711         {
6712           _ma_check_print_info(sort_info->param,
6713                                "Wrong directory on page %s",
6714                                llstr(page, llbuff));
6715           goto read_next_page;
6716         }
6717       }
6718       /* found row */
6719       info->cur_row.lastpos= info->scan.row_base_page + record_pos;
6720       info->cur_row.nextpos= record_pos + 1;
6721       data= info->scan.page_buff + offset;
6722       length= uint2korr(info->scan.dir + 2);
6723       end_of_data= data + length;
6724       info->scan.dir-= DIR_ENTRY_SIZE;          /* Point to previous row */
6725 
6726       if (end_of_data > info->scan.dir_end ||
6727           offset < PAGE_HEADER_SIZE(info->s) ||
6728           length < share->base.min_block_length)
6729       {
6730         _ma_check_print_info(sort_info->param,
6731                              "Wrong directory entry %3u at page %s",
6732                              (uint) record_pos, llstr(page, llbuff));
6733         record_pos++;
6734         continue;
6735       }
6736       else
6737       {
6738         DBUG_PRINT("info", ("rowid: %lu", (ulong) info->cur_row.lastpos));
6739         DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data));
6740       }
6741     }
6742 
6743 read_next_page:
6744     /* Read until we find next head page */
6745     for (;;)
6746     {
6747       uint page_type;
6748       char llbuff[22];
6749 
6750       sort_info->page++;                        /* In case of errors */
6751       page++;
6752       if (!(page % share->bitmap.pages_covered))
6753       {
6754         /* Skip bitmap */
6755         page++;
6756         sort_info->page++;
6757       }
6758       if ((my_off_t) (page + 1) * share->block_size > sort_info->filelength)
6759         DBUG_RETURN(HA_ERR_END_OF_FILE);
6760       if (!(pagecache_read(share->pagecache,
6761                            &info->dfile,
6762                            page, 0, info->scan.page_buff,
6763                            PAGECACHE_READ_UNKNOWN_PAGE,
6764                            PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
6765       {
6766         if (my_errno == HA_ERR_WRONG_CRC)
6767         {
6768           /*
6769             Don't give errors for zero filled blocks. These can
6770             sometimes be found at end of a bitmap when we wrote a big
6771             record last that was moved to the next bitmap.
6772           */
6773           if (_ma_check_bitmap_data(info, UNALLOCATED_PAGE, 0,
6774                                     _ma_bitmap_get_page_bits(info,
6775                                                              &share->bitmap,
6776                                                              page)))
6777           {
6778             _ma_check_print_info(sort_info->param,
6779                                  "Wrong CRC on datapage at %s",
6780                                  llstr(page, llbuff));
6781           }
6782           continue;
6783         }
6784         DBUG_RETURN(my_errno);
6785       }
6786       page_type= (info->scan.page_buff[PAGE_TYPE_OFFSET] &
6787                   PAGE_TYPE_MASK);
6788       if (page_type == HEAD_PAGE)
6789       {
6790         if ((info->scan.number_of_rows=
6791              (uint) (uchar) info->scan.page_buff[DIR_COUNT_OFFSET]) != 0)
6792           break;
6793         _ma_check_print_info(sort_info->param,
6794                              "Wrong head page at page %s",
6795                              llstr(page, llbuff));
6796       }
6797       else if (page_type >= MAX_PAGE_TYPE)
6798       {
6799         _ma_check_print_info(sort_info->param,
6800                              "Found wrong page type: %d at page %s",
6801                              page_type, llstr(page, llbuff));
6802       }
6803     }
6804 
6805     /* New head page */
6806     info->scan.dir= (info->scan.page_buff + share->block_size -
6807                      PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE);
6808     info->scan.dir_end= (info->scan.dir -
6809                          (info->scan.number_of_rows - 1) *
6810                          DIR_ENTRY_SIZE);
6811     info->scan.row_base_page= ma_recordpos(page, 0);
6812     record_pos= 0;
6813   }
6814 }
6815 
6816 
6817 /**
6818    @brief Writes a LOGREC_REPAIR_TABLE record and updates create_rename_lsn
6819    if needed (so that maria_read_log does not redo the repair).
6820 
6821    @param  param            description of the REPAIR operation
6822    @param  info             table
6823 
6824    @return Operation status
6825      @retval 0      ok
6826      @retval 1      error (disk problem)
6827 */
6828 
write_log_record_for_repair(const HA_CHECK * param,MARIA_HA * info)6829 my_bool write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info)
6830 {
6831   MARIA_SHARE *share= info->s;
6832   /* in case this is maria_chk or recovery... */
6833   if (translog_status == TRANSLOG_OK && !maria_in_recovery &&
6834       share->base.born_transactional)
6835   {
6836     my_bool save_now_transactional= share->now_transactional;
6837 
6838     /*
6839       For now this record is only informative. It could serve when applying
6840       logs to a backup, but that needs more thought. Assume table became
6841       corrupted. It is repaired, then some writes happen to it.
6842       Later we restore an old backup, and want to apply this REDO_REPAIR_TABLE
6843       record. For it to give the same result as originally, the table should
6844       be corrupted the same way, so applying previous REDOs should produce the
6845       same corruption; that's really not guaranteed (different execution paths
6846       in execution of REDOs vs runtime code so not same bugs hit, temporary
6847       hardware issues not repeatable etc). Corruption may not be repeatable.
6848       A reasonable solution is to execute the REDO_REPAIR_TABLE record and
6849       check if the checksum of the resulting table matches what it was at the
6850       end of the original repair (should be stored in log record); or execute
6851       the REDO_REPAIR_TABLE if the checksum of the table-before-repair matches
6852       was it was at the start of the original repair (should be stored in log
6853       record).
6854     */
6855     LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
6856     uchar log_data[FILEID_STORE_SIZE + 8 + 8];
6857     LSN lsn;
6858 
6859     /*
6860       testflag gives an idea of what REPAIR did (in particular T_QUICK
6861       or not: did it touch the data file or not?).
6862     */
6863     int8store(log_data + FILEID_STORE_SIZE, param->testflag);
6864     /* org_key_map is used when recreating index after a load data infile */
6865     int8store(log_data + FILEID_STORE_SIZE + 8, param->org_key_map);
6866 
6867     log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    log_data;
6868     log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
6869 
6870     share->now_transactional= 1;
6871     if (unlikely(translog_write_record(&lsn, LOGREC_REDO_REPAIR_TABLE,
6872                                        &dummy_transaction_object, info,
6873                                        (translog_size_t) sizeof(log_data),
6874                                        sizeof(log_array)/sizeof(log_array[0]),
6875                                        log_array, log_data, NULL) ||
6876                  translog_flush(lsn)))
6877       return TRUE;
6878     /*
6879       The table's existence was made durable earlier (MY_SYNC_DIR passed to
6880       maria_change_to_newfile()). All pages have been flushed, state too, we
6881       need to force it to disk. Old REDOs should not be applied to the table,
6882       which is already enforced as skip_redos_lsn was increased in
6883       protect_against_repair_crash(). But if this is an explicit repair,
6884       even UNDO phase should ignore this table: create_rename_lsn should be
6885       increased, and this also serves for the REDO_REPAIR to be ignored by
6886       maria_read_log.
6887       The fully correct order would be: sync data and index file, remove crash
6888       mark and update LSNs then write state and sync index file. But at this
6889       point state (without crash mark) is already written.
6890     */
6891     if ((!(param->testflag & T_NO_CREATE_RENAME_LSN) &&
6892          _ma_update_state_lsns(share, lsn, share->state.create_trid, FALSE,
6893                                FALSE)) ||
6894         _ma_sync_table_files(info))
6895       return TRUE;
6896     share->now_transactional= save_now_transactional;
6897   }
6898   return FALSE;
6899 }
6900 
6901 
6902 /**
6903   Writes an UNDO record which if executed in UNDO phase, will empty the
6904   table. Such record is thus logged only in certain cases of bulk insert
6905   (table needs to be empty etc).
6906 */
write_log_record_for_bulk_insert(MARIA_HA * info)6907 my_bool write_log_record_for_bulk_insert(MARIA_HA *info)
6908 {
6909   LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
6910   uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE];
6911   LSN lsn;
6912   lsn_store(log_data, info->trn->undo_lsn);
6913   log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    log_data;
6914   log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
6915   return translog_write_record(&lsn, LOGREC_UNDO_BULK_INSERT,
6916                                info->trn, info,
6917                                (translog_size_t)
6918                                log_array[TRANSLOG_INTERNAL_PARTS +
6919                                          0].length,
6920                                TRANSLOG_INTERNAL_PARTS + 1, log_array,
6921                                log_data + LSN_STORE_SIZE, NULL) ||
6922     translog_flush(lsn); /* WAL */
6923 }
6924 
6925 
6926 /* Give error message why reading of key page failed */
6927 
report_keypage_fault(HA_CHECK * param,MARIA_HA * info,my_off_t position)6928 static void report_keypage_fault(HA_CHECK *param, MARIA_HA *info,
6929                                  my_off_t position)
6930 {
6931   char buff[11];
6932   uint32 block_size= info->s->block_size;
6933 
6934   if (my_errno == HA_ERR_CRASHED)
6935     _ma_check_print_error(param,
6936                           "Wrong base information on indexpage at page: %s",
6937                           llstr(position / block_size, buff));
6938   else
6939     _ma_check_print_error(param,
6940                           "Can't read indexpage from page: %s, "
6941                           "error: %d",
6942                           llstr(position / block_size, buff), my_errno);
6943 }
6944 
6945 
6946 /**
6947   When we want to check a table, we verify that the transaction ids of rows
6948   and keys are not bigger than the biggest id generated by Maria so far, which
6949   is returned by the function below.
6950 
6951   @note If control file is not open, 0 may be returned; to not confuse
6952   this with a valid max trid of 0, the caller should notice that it failed to
6953   open the control file (ma_control_file_inited() can serve for that).
6954 */
6955 
max_trid_in_system(void)6956 static TrID max_trid_in_system(void)
6957 {
6958   TrID id= trnman_get_max_trid(); /* 0 if transac manager not initialized */
6959   /* 'id' may be far bigger, if last shutdown is old */
6960   return MY_MAX(id, max_trid_in_control_file);
6961 }
6962 
6963 
_ma_check_print_not_visible_error(HA_CHECK * param,TrID used_trid)6964 static void _ma_check_print_not_visible_error(HA_CHECK *param, TrID used_trid)
6965 {
6966   char buff[22], buff2[22];
6967   if (!param->not_visible_rows_found++)
6968   {
6969     if (!ma_control_file_inited())
6970     {
6971       _ma_check_print_warning(param,
6972                               "Found row with transaction id %s but no "
6973                               "aria_control_file was used or specified.  "
6974                               "The table may be corrupted",
6975                               llstr(used_trid, buff));
6976     }
6977     else
6978     {
6979       _ma_check_print_error(param,
6980                             "Found row with transaction id %s when max "
6981                             "transaction id according to aria_control_file "
6982                             "is %s",
6983                             llstr(used_trid, buff),
6984                             llstr(param->max_trid, buff2));
6985     }
6986   }
6987 }
6988 
6989 
6990 /**
6991   Mark that we can retry normal repair if we used quick repair
6992 
6993   We shouldn't do this in case of disk error as in this case we are likely
6994   to loose much more than expected.
6995 */
6996 
retry_if_quick(MARIA_SORT_PARAM * sort_param,int error)6997 void retry_if_quick(MARIA_SORT_PARAM *sort_param, int error)
6998 {
6999   HA_CHECK *param=sort_param->sort_info->param;
7000 
7001   if (!sort_param->fix_datafile && error >= HA_ERR_FIRST)
7002   {
7003     param->retry_repair=1;
7004     param->testflag|=T_RETRY_WITHOUT_QUICK;
7005   }
7006 }
7007 
7008 /* Print information about bitmap page */
7009 
print_bitmap_description(MARIA_SHARE * share,pgcache_page_no_t page,uchar * bitmap_data)7010 static void print_bitmap_description(MARIA_SHARE *share,
7011                                      pgcache_page_no_t page,
7012                                      uchar *bitmap_data)
7013 {
7014   char *tmp= my_malloc(MAX_BITMAP_INFO_LENGTH, MYF(MY_WME));
7015   if (!tmp)
7016     return;
7017   _ma_get_bitmap_description(&share->bitmap, bitmap_data, page, tmp);
7018   printf("Bitmap page %lu\n%s", (ulong) page, tmp);
7019   my_free(tmp);
7020 }
7021