1 /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation; version 2 of the License.
6 
7    This program is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10    GNU General Public License for more details.
11 
12    You should have received a copy of the GNU General Public License
13    along with this program; if not, write to the Free Software
14    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
15 
16 /* Describe, check and repair of MARIA tables */
17 
18 /*
19   About checksum calculation.
20 
21   There are two types of checksums. Table checksum and row checksum.
22 
23   Row checksum is an additional uchar at the end of dynamic length
24   records. It must be calculated if the table is configured for them.
25   Otherwise they must not be used. The variable
26   MYISAM_SHARE::calc_checksum determines if row checksums are used.
27   MI_INFO::checksum is used as temporary storage during row handling.
28   For parallel repair we must assure that only one thread can use this
29   variable. There is no problem on the write side as this is done by one
30   thread only. But when checking a record after read this could go
31   wrong. But since all threads read through a common read buffer, it is
32   sufficient if only one thread checks it.
33 
34   Table checksum is an eight uchar value in the header of the index file.
35   It can be calculated even if row checksums are not used. The variable
36   MI_CHECK::glob_crc is calculated over all records.
37   MI_SORT_PARAM::calc_checksum determines if this should be done. This
38   variable is not part of MI_CHECK because it must be set per thread for
39   parallel repair. The global glob_crc must be changed by one thread
40   only. And it is sufficient to calculate the checksum once only.
41 */
42 
43 #include "ma_ftdefs.h"
44 #include "ma_rt_index.h"
45 #include "ma_blockrec.h"
46 #include "trnman.h"
47 #include "ma_key_recover.h"
48 #include <my_check_opt.h>
49 
50 #include <stdarg.h>
51 #include <my_getopt.h>
52 #ifdef HAVE_SYS_VADVISE_H
53 #include <sys/vadvise.h>
54 #endif
55 
56 /* Functions defined in this file */
57 
58 static int check_k_link(HA_CHECK *param, MARIA_HA *info, my_off_t next_link);
59 static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo,
60 		     MARIA_PAGE *page, ha_rows *keys,
61 		     ha_checksum *key_checksum, uint level);
62 static uint isam_key_length(MARIA_HA *info,MARIA_KEYDEF *keyinfo);
63 static ha_checksum calc_checksum(ha_rows count);
64 static int writekeys(MARIA_SORT_PARAM *sort_param);
65 static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
66                           MARIA_KEYDEF *keyinfo,
67 			  my_off_t pagepos, File new_file);
68 static int sort_key_read(MARIA_SORT_PARAM *sort_param, uchar *key);
69 static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, uchar *key);
70 static int sort_get_next_record(MARIA_SORT_PARAM *sort_param);
71 static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,
72                         const void *b);
73 static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
74                                    const uchar *a);
75 static int sort_key_write(MARIA_SORT_PARAM *sort_param, const uchar *a);
76 static my_off_t get_record_for_key(MARIA_KEYDEF *keyinfo, const uchar *key);
77 static int sort_insert_key(MARIA_SORT_PARAM  *sort_param,
78                            reg1 SORT_KEY_BLOCKS *key_block,
79 			   const uchar *key, my_off_t prev_block);
80 static int sort_delete_record(MARIA_SORT_PARAM *sort_param);
81 /*static int _ma_flush_pending_blocks(HA_CHECK *param);*/
82 static SORT_KEY_BLOCKS	*alloc_key_blocks(HA_CHECK *param, uint blocks,
83 					  uint buffer_length);
84 static ha_checksum maria_byte_checksum(const uchar *buf, uint length);
85 static void set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share);
86 static void restore_data_file_type(MARIA_SHARE *share);
87 static void change_data_file_descriptor(MARIA_HA *info, File new_file);
88 static void unuse_data_file_descriptor(MARIA_HA *info);
89 static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info,
90                                       MARIA_HA *info, uchar *record);
91 static void copy_data_file_state(MARIA_STATE_INFO *to,
92                                  MARIA_STATE_INFO *from);
93 static void report_keypage_fault(HA_CHECK *param, MARIA_HA *info,
94                                  my_off_t position);
95 static my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file);
96 static my_bool _ma_flush_table_files_before_swap(HA_CHECK *param,
97                                                  MARIA_HA *info);
98 static TrID max_trid_in_system(void);
99 static void _ma_check_print_not_visible_error(HA_CHECK *param, TrID used_trid);
100 void retry_if_quick(MARIA_SORT_PARAM *param, int error);
101 static void print_bitmap_description(MARIA_SHARE *share,
102                                      pgcache_page_no_t page,
103                                      uchar *buff);
104 
105 
106 /* Initialize check param with default values */
107 
maria_chk_init(HA_CHECK * param)108 void maria_chk_init(HA_CHECK *param)
109 {
110   bzero((uchar*) param,sizeof(*param));
111   param->opt_follow_links=1;
112   param->keys_in_use= ~(ulonglong) 0;
113   param->search_after_block=HA_OFFSET_ERROR;
114   param->auto_increment_value= 0;
115   param->use_buffers= PAGE_BUFFER_INIT;
116   param->read_buffer_length=READ_BUFFER_INIT;
117   param->write_buffer_length=READ_BUFFER_INIT;
118   param->sort_buffer_length=SORT_BUFFER_INIT;
119   param->sort_key_blocks=BUFFERS_WHEN_SORTING;
120   param->tmpfile_createflag=O_RDWR | O_TRUNC | O_EXCL;
121   param->myf_rw=MYF(MY_NABP | MY_WME | MY_WAIT_IF_FULL);
122   param->start_check_pos=0;
123   param->max_record_length= LONGLONG_MAX;
124   param->pagecache_block_size= KEY_CACHE_BLOCK_SIZE;
125   param->stats_method= MI_STATS_METHOD_NULLS_NOT_EQUAL;
126   param->max_stage= 1;
127 }
128 
129 
130 /* Initialize check param and maria handler for check of table */
131 
maria_chk_init_for_check(HA_CHECK * param,MARIA_HA * info)132 void maria_chk_init_for_check(HA_CHECK *param, MARIA_HA *info)
133 {
134   param->not_visible_rows_found= 0;
135   param->max_found_trid= 0;
136 
137   /*
138     Set up transaction handler so that we can see all rows. When rows is read
139     we will check the found id against param->max_tried
140   */
141   if (!info->s->base.born_transactional)
142   {
143     /*
144       There are no trids. Howver we want to set max_trid to make test of
145       create_trid simpler.
146     */
147     param->max_trid= ~(TrID) 0;
148   }
149   else if (param->max_trid == 0 || param->max_trid == ~(TrID) 0)
150   {
151     if (!ma_control_file_inited())
152       param->max_trid= 0;      /* Give warning for first trid found */
153     else
154       param->max_trid= max_trid_in_system();
155   }
156 
157   maria_ignore_trids(info);
158 }
159 
160 
161 	/* Check the status flags for the table */
162 
maria_chk_status(HA_CHECK * param,MARIA_HA * info)163 int maria_chk_status(HA_CHECK *param, MARIA_HA *info)
164 {
165   MARIA_SHARE *share= info->s;
166 
167   /* Protection for HA_EXTRA_FLUSH */
168   mysql_mutex_lock(&share->intern_lock);
169 
170   if (maria_is_crashed_on_repair(info))
171     _ma_check_print_warning(param,
172 			   "Table is marked as crashed and last repair failed");
173   else if (maria_in_repair(info))
174     _ma_check_print_warning(param,
175                             "Last repair was aborted before finishing");
176   else if (maria_is_crashed(info))
177     _ma_check_print_warning(param,
178 			   "Table is marked as crashed");
179   if (share->state.open_count != (uint) (share->global_changed ? 1 : 0))
180   {
181     /* Don't count this as a real warning, as check can correct this ! */
182     my_bool save=param->warning_printed;
183     _ma_check_print_warning(param,
184 			   share->state.open_count==1 ?
185 			   "%d client is using or hasn't closed the table properly" :
186 			   "%d clients are using or haven't closed the table properly",
187 			   share->state.open_count);
188     /* If this will be fixed by the check, forget the warning */
189     if (param->testflag & T_UPDATE_STATE)
190       param->warning_printed=save;
191   }
192 
193   mysql_mutex_unlock(&share->intern_lock);
194 
195   if (share->state.create_trid > param->max_trid)
196   {
197     param->wrong_trd_printed= 1;       /* Force should run zerofill */
198     _ma_check_print_warning(param,
199                             "Table create_trd (%llu) > current max_transaction id (%llu).  Table needs to be repaired or zerofilled to be usable",
200                             share->state.create_trid, param->max_trid);
201     return 1;
202   }
203   return 0;
204 }
205 
206 /*
207   Check delete links in row data
208 */
209 
maria_chk_del(HA_CHECK * param,register MARIA_HA * info,ulonglong test_flag)210 int maria_chk_del(HA_CHECK *param, register MARIA_HA *info,
211                   ulonglong test_flag)
212 {
213   MARIA_SHARE *share= info->s;
214   reg2 ha_rows i;
215   uint delete_link_length;
216   my_off_t empty,next_link,UNINIT_VAR(old_link);
217   char buff[22],buff2[22];
218   DBUG_ENTER("maria_chk_del");
219 
220   param->record_checksum=0;
221 
222   if (share->data_file_type == BLOCK_RECORD)
223     DBUG_RETURN(0);                             /* No delete links here */
224 
225   delete_link_length=((share->options & HA_OPTION_PACK_RECORD) ? 20 :
226 		      share->rec_reflength+1);
227 
228   if (!(test_flag & T_SILENT))
229     puts("- check record delete-chain");
230 
231   next_link=share->state.dellink;
232   if (share->state.state.del == 0)
233   {
234     if (test_flag & T_VERBOSE)
235     {
236       puts("No recordlinks");
237     }
238   }
239   else
240   {
241     if (test_flag & T_VERBOSE)
242       printf("Recordlinks:    ");
243     empty=0;
244     for (i= share->state.state.del ; i > 0L && next_link != HA_OFFSET_ERROR ; i--)
245     {
246       if (_ma_killed_ptr(param))
247         DBUG_RETURN(1);
248       if (test_flag & T_VERBOSE)
249 	printf(" %9s",llstr(next_link,buff));
250       if (next_link >= share->state.state.data_file_length)
251 	goto wrong;
252       if (mysql_file_pread(info->dfile.file, (uchar*) buff, delete_link_length,
253 		   next_link,MYF(MY_NABP)))
254       {
255 	if (test_flag & T_VERBOSE) puts("");
256 	_ma_check_print_error(param,"Can't read delete-link at filepos: %s",
257                               llstr(next_link,buff));
258 	DBUG_RETURN(1);
259       }
260       if (*buff != '\0')
261       {
262 	if (test_flag & T_VERBOSE) puts("");
263 	_ma_check_print_error(param,"Record at pos: %s is not remove-marked",
264                               llstr(next_link,buff));
265 	goto wrong;
266       }
267       if (share->options & HA_OPTION_PACK_RECORD)
268       {
269 	my_off_t prev_link=mi_sizekorr(buff+12);
270 	if (empty && prev_link != old_link)
271 	{
272 	  if (test_flag & T_VERBOSE) puts("");
273 	  _ma_check_print_error(param,
274                                 "Deleted block at %s doesn't point back at previous delete link",
275                                 llstr(next_link,buff2));
276 	  goto wrong;
277 	}
278 	old_link=next_link;
279 	next_link=mi_sizekorr(buff+4);
280 	empty+=mi_uint3korr(buff+1);
281       }
282       else
283       {
284 	param->record_checksum+=(ha_checksum) next_link;
285 	next_link= _ma_rec_pos(share, (uchar *) buff + 1);
286 	empty+=share->base.pack_reclength;
287       }
288     }
289     if (share->state.state.del && (test_flag & T_VERBOSE))
290       puts("\n");
291     if (empty != share->state.state.empty)
292     {
293       _ma_check_print_warning(param,
294                               "Found %s deleted space in delete link chain. Should be %s",
295                               llstr(empty,buff2),
296                               llstr(share->state.state.empty,buff));
297     }
298     if (next_link != HA_OFFSET_ERROR)
299     {
300       _ma_check_print_error(param,
301                             "Found more than the expected %s deleted rows in delete link chain",
302                             llstr(share->state.state.del, buff));
303       goto wrong;
304     }
305     if (i != 0)
306     {
307       _ma_check_print_error(param,
308                             "Found %s deleted rows in delete link chain. Should be %s",
309                             llstr(share->state.state.del - i, buff2),
310                             llstr(share->state.state.del, buff));
311       goto wrong;
312     }
313   }
314   DBUG_RETURN(0);
315 
316 wrong:
317   param->testflag|=T_RETRY_WITHOUT_QUICK;
318   if (test_flag & T_VERBOSE)
319     puts("");
320   _ma_check_print_error(param,"record delete-link-chain corrupted");
321   DBUG_RETURN(1);
322 } /* maria_chk_del */
323 
324 
325 /* Check delete links in index file */
326 
check_k_link(HA_CHECK * param,register MARIA_HA * info,my_off_t next_link)327 static int check_k_link(HA_CHECK *param, register MARIA_HA *info,
328                         my_off_t next_link)
329 {
330   MARIA_SHARE *share= info->s;
331   uint block_size= share->block_size;
332   ha_rows records;
333   char llbuff[21], llbuff2[21];
334   uchar *buff;
335   DBUG_ENTER("check_k_link");
336 
337   if (next_link == HA_OFFSET_ERROR)
338     DBUG_RETURN(0);                             /* Avoid printing empty line */
339 
340   records= (ha_rows) (share->state.state.key_file_length / block_size);
341   while (next_link != HA_OFFSET_ERROR && records > 0)
342   {
343     if (_ma_killed_ptr(param))
344       DBUG_RETURN(1);
345     if (param->testflag & T_VERBOSE)
346       printf("%16s",llstr(next_link,llbuff));
347 
348     /* Key blocks must lay within the key file length entirely. */
349     if (next_link + block_size > share->state.state.key_file_length)
350     {
351       /* purecov: begin tested */
352       _ma_check_print_error(param, "Invalid key block position: %s  "
353                             "key block size: %u  file_length: %s",
354                             llstr(next_link, llbuff), block_size,
355                             llstr(share->state.state.key_file_length, llbuff2));
356       DBUG_RETURN(1);
357       /* purecov: end */
358     }
359 
360     /* Key blocks must be aligned at block_size */
361     if (next_link & (block_size -1))
362     {
363       /* purecov: begin tested */
364       _ma_check_print_error(param, "Mis-aligned key block: %s  "
365                             "minimum key block length: %u",
366                             llstr(next_link, llbuff),
367                             block_size);
368       DBUG_RETURN(1);
369       /* purecov: end */
370     }
371 
372     DBUG_ASSERT(share->pagecache->block_size == block_size);
373     if (!(buff= pagecache_read(share->pagecache,
374                                &share->kfile,
375                                (pgcache_page_no_t) (next_link / block_size),
376                                DFLT_INIT_HITS,
377                                info->buff, PAGECACHE_READ_UNKNOWN_PAGE,
378                                PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
379     {
380       /* purecov: begin tested */
381       _ma_check_print_error(param, "key cache read error for block: %s",
382                             llstr(next_link,llbuff));
383       DBUG_RETURN(1);
384       /* purecov: end */
385     }
386     if (_ma_get_keynr(info->s, buff) != MARIA_DELETE_KEY_NR)
387       _ma_check_print_error(param, "Page at %s is not delete marked",
388                             llstr(next_link, llbuff));
389 
390     next_link= mi_sizekorr(buff + share->keypage_header);
391     records--;
392     param->key_file_blocks+=block_size;
393   }
394   if (param->testflag & T_VERBOSE)
395   {
396     if (next_link != HA_OFFSET_ERROR)
397       printf("%16s\n",llstr(next_link,llbuff));
398     else
399       puts("");
400   }
401   DBUG_RETURN (next_link != HA_OFFSET_ERROR);
402 } /* check_k_link */
403 
404 
405 	/* Check sizes of files */
406 
maria_chk_size(HA_CHECK * param,register MARIA_HA * info)407 int maria_chk_size(HA_CHECK *param, register MARIA_HA *info)
408 {
409   MARIA_SHARE *share= info->s;
410   int error;
411   register my_off_t skr,size;
412   char buff[22],buff2[22];
413   DBUG_ENTER("maria_chk_size");
414 
415   if (!(param->testflag & T_SILENT))
416     puts("- check file-size");
417 
418   /*
419     The following is needed if called externally (not from maria_chk).
420     To get a correct physical size we need to flush them.
421   */
422   if ((error= _ma_flush_table_files(info,
423                                     MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
424                                     FLUSH_FORCE_WRITE, FLUSH_FORCE_WRITE)))
425     _ma_check_print_error(param, "Failed to flush data or index file");
426 
427   size= mysql_file_seek(share->kfile.file, 0L, MY_SEEK_END, MYF(MY_THREADSAFE));
428   if ((skr=(my_off_t) share->state.state.key_file_length) != size)
429   {
430     /* Don't give error if file generated by maria_pack */
431     if (skr > size && maria_is_any_key_active(share->state.key_map))
432     {
433       error=1;
434       _ma_check_print_error(param,
435 			   "Size of indexfile is: %-8s         Expected: %s",
436 			   llstr(size,buff), llstr(skr,buff2));
437       share->state.state.key_file_length= size;
438     }
439     else if (!(param->testflag & T_VERY_SILENT))
440       _ma_check_print_warning(param,
441 			     "Size of indexfile is: %-8s       Expected: %s",
442 			     llstr(size,buff), llstr(skr,buff2));
443   }
444   if (size > share->base.max_key_file_length)
445   {
446     _ma_check_print_warning(param,
447                             "Size of indexfile is: %-8s which is bigger than max indexfile size: %s",
448                             ullstr(size,buff),
449                             ullstr(share->base.max_key_file_length, buff2));
450   }
451   else if (!(param->testflag & T_VERY_SILENT) &&
452            ! (share->options & HA_OPTION_COMPRESS_RECORD) &&
453            ulonglong2double(share->state.state.key_file_length) >
454            ulonglong2double(share->base.margin_key_file_length)*0.9)
455     _ma_check_print_warning(param,"Keyfile is almost full, %10s of %10s used",
456                             llstr(share->state.state.key_file_length,buff),
457                             llstr(share->base.max_key_file_length,buff));
458 
459   size= mysql_file_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
460   skr=(my_off_t) share->state.state.data_file_length;
461   if (share->options & HA_OPTION_COMPRESS_RECORD)
462     skr+= MEMMAP_EXTRA_MARGIN;
463 #ifdef USE_RELOC
464   if (share->data_file_type == STATIC_RECORD &&
465       skr < (my_off_t) share->base.reloc*share->base.min_pack_length)
466     skr=(my_off_t) share->base.reloc*share->base.min_pack_length;
467 #endif
468   if (skr != size)
469   {
470     share->state.state.data_file_length=size;	/* Skip other errors */
471     if (skr > size && skr != size + MEMMAP_EXTRA_MARGIN)
472     {
473       error=1;
474       _ma_check_print_error(param,"Size of datafile is: %-9s         Expected: %s",
475 		    llstr(size,buff), llstr(skr,buff2));
476       param->testflag|=T_RETRY_WITHOUT_QUICK;
477     }
478     else
479     {
480       _ma_check_print_warning(param,
481                               "Size of datafile is: %-9s       Expected: %s",
482                               llstr(size,buff), llstr(skr,buff2));
483     }
484   }
485   if (size > share->base.max_data_file_length)
486   {
487     _ma_check_print_warning(param,
488                             "Size of datafile is: %-8s which is bigger than max datafile size: %s",
489                             ullstr(size,buff),
490                             ullstr(share->base.max_data_file_length, buff2));
491   } else if (!(param->testflag & T_VERY_SILENT) &&
492              !(share->options & HA_OPTION_COMPRESS_RECORD) &&
493              ulonglong2double(share->state.state.data_file_length) >
494              (ulonglong2double(share->base.max_data_file_length)*0.9))
495     _ma_check_print_warning(param, "Datafile is almost full, %10s of %10s used",
496                             llstr(share->state.state.data_file_length,buff),
497                             llstr(share->base.max_data_file_length,buff2));
498   DBUG_RETURN(error);
499 } /* maria_chk_size */
500 
501 
502 /* Check keys */
503 
maria_chk_key(HA_CHECK * param,register MARIA_HA * info)504 int maria_chk_key(HA_CHECK *param, register MARIA_HA *info)
505 {
506   uint key,found_keys=0,full_text_keys=0,result=0;
507   ha_rows keys;
508   ha_checksum old_record_checksum,init_checksum;
509   my_off_t all_keydata,all_totaldata,key_totlength,length;
510   double  *rec_per_key_part;
511   MARIA_SHARE *share= info->s;
512   MARIA_KEYDEF *keyinfo;
513   char buff[22],buff2[22];
514   MARIA_PAGE page;
515   DBUG_ENTER("maria_chk_key");
516 
517   if (!(param->testflag & T_SILENT))
518     puts("- check key delete-chain");
519 
520   param->key_file_blocks=share->base.keystart;
521   if (check_k_link(param, info, share->state.key_del))
522   {
523     if (param->testflag & T_VERBOSE) puts("");
524     _ma_check_print_error(param,"key delete-link-chain corrupted");
525     DBUG_RETURN(-1);
526   }
527 
528   if (!(param->testflag & T_SILENT))
529     puts("- check index reference");
530 
531   all_keydata=all_totaldata=key_totlength=0;
532   init_checksum=param->record_checksum;
533   old_record_checksum=0;
534   if (share->data_file_type == STATIC_RECORD)
535     old_record_checksum= (calc_checksum(share->state.state.records +
536                                         share->state.state.del-1) *
537                           share->base.pack_reclength);
538   rec_per_key_part= param->new_rec_per_key_part;
539   for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
540        rec_per_key_part+=keyinfo->keysegs, key++, keyinfo++)
541   {
542     param->key_crc[key]=0;
543     if (! maria_is_key_active(share->state.key_map, key))
544     {
545       /* Remember old statistics for key */
546       memcpy((char*) rec_per_key_part,
547 	     (char*) (share->state.rec_per_key_part +
548 		      (uint) (rec_per_key_part - param->new_rec_per_key_part)),
549 	     keyinfo->keysegs*sizeof(*rec_per_key_part));
550       continue;
551     }
552     found_keys++;
553     _ma_report_progress(param, key, share->base.keys);
554 
555     param->record_checksum=init_checksum;
556 
557     bzero((char*) &param->unique_count,sizeof(param->unique_count));
558     bzero((char*) &param->notnull_count,sizeof(param->notnull_count));
559 
560     if ((!(param->testflag & T_SILENT)))
561       printf ("- check data record references index: %d\n",key+1);
562     if (keyinfo->flag & (HA_FULLTEXT | HA_SPATIAL))
563       full_text_keys++;
564     if (share->state.key_root[key] == HA_OFFSET_ERROR)
565     {
566       if (share->state.state.records != 0 && !(keyinfo->flag & HA_FULLTEXT))
567         _ma_check_print_error(param, "Key tree %u is empty", key + 1);
568       goto do_stat;
569     }
570     if (_ma_fetch_keypage(&page, info, keyinfo, share->state.key_root[key],
571                           PAGECACHE_LOCK_LEFT_UNLOCKED, DFLT_INIT_HITS,
572                           info->buff, 0))
573     {
574       report_keypage_fault(param, info, share->state.key_root[key]);
575       if (!(param->testflag & T_INFO))
576 	DBUG_RETURN(-1);
577       result= -1;
578       continue;
579     }
580     param->key_file_blocks+=keyinfo->block_length;
581     keys=0;
582     param->keydata=param->totaldata=0;
583     param->key_blocks=0;
584     param->max_level=0;
585     if (chk_index(param, info,keyinfo, &page, &keys, param->key_crc+key,1))
586       DBUG_RETURN(-1);
587     if (!(keyinfo->flag & (HA_FULLTEXT | HA_SPATIAL | HA_RTREE_INDEX)))
588     {
589       if (keys != share->state.state.records)
590       {
591 	_ma_check_print_error(param,"Found %s keys of %s",llstr(keys,buff),
592 		    llstr(share->state.state.records,buff2));
593 	if (!(param->testflag & T_INFO))
594 	DBUG_RETURN(-1);
595 	result= -1;
596 	continue;
597       }
598       if ((found_keys - full_text_keys == 1 &&
599            !(share->data_file_type == STATIC_RECORD)) ||
600           (param->testflag & T_DONT_CHECK_CHECKSUM))
601 	old_record_checksum= param->record_checksum;
602       else if (old_record_checksum != param->record_checksum)
603       {
604 	if (key)
605 	  _ma_check_print_error(param,
606                                 "Key %u doesn't point at same records as "
607                                 "key 1",
608 		      key+1);
609 	else
610 	  _ma_check_print_error(param,"Key 1 doesn't point at all records");
611 	if (!(param->testflag & T_INFO))
612 	  DBUG_RETURN(-1);
613 	result= -1;
614 	continue;
615       }
616     }
617     if ((uint) share->base.auto_key -1 == key)
618     {
619       /* Check that auto_increment key is bigger than max key value */
620       ulonglong auto_increment;
621       const HA_KEYSEG *keyseg= share->keyinfo[share->base.auto_key-1].seg;
622       info->lastinx=key;
623       _ma_read_key_record(info, info->rec_buff, 0);
624       auto_increment=
625         ma_retrieve_auto_increment(info->rec_buff + keyseg->start,
626                                    keyseg->type);
627       if (auto_increment > share->state.auto_increment)
628       {
629 	_ma_check_print_warning(param, "Auto-increment value: %s is smaller "
630                                 "than max used value: %s",
631                                 llstr(share->state.auto_increment,buff2),
632                                 llstr(auto_increment, buff));
633       }
634       if (param->testflag & T_AUTO_INC)
635       {
636         set_if_bigger(share->state.auto_increment,
637                       auto_increment);
638         set_if_bigger(share->state.auto_increment,
639                       param->auto_increment_value);
640       }
641 
642       /* Check that there isn't a row with auto_increment = 0 in the table */
643       maria_extra(info,HA_EXTRA_KEYREAD,0);
644       bzero(info->lastkey_buff, keyinfo->seg->length);
645       if (!maria_rkey(info, info->rec_buff, key,
646                       info->lastkey_buff,
647                       (key_part_map) 1, HA_READ_KEY_EXACT))
648       {
649 	/* Don't count this as a real warning, as maria_chk can't correct it */
650 	my_bool save=param->warning_printed;
651 	_ma_check_print_warning(param, "Found row where the auto_increment "
652                                 "column has the value 0");
653 	param->warning_printed=save;
654       }
655       maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
656     }
657 
658     length=(my_off_t) isam_key_length(info,keyinfo)*keys + param->key_blocks*2;
659     if (param->testflag & T_INFO && param->totaldata != 0L && keys != 0L)
660       printf("Key: %2d:  Keyblocks used: %3d%%  Packed: %4d%%  Max levels: %2d\n",
661 	     key+1,
662 	     (int) (my_off_t2double(param->keydata)*100.0/my_off_t2double(param->totaldata)),
663 	     (int) ((my_off_t2double(length) - my_off_t2double(param->keydata))*100.0/
664 		    my_off_t2double(length)),
665 	     param->max_level);
666     all_keydata+=param->keydata; all_totaldata+=param->totaldata; key_totlength+=length;
667 
668 do_stat:
669     if (param->testflag & T_STATISTICS)
670       maria_update_key_parts(keyinfo, rec_per_key_part, param->unique_count,
671                        param->stats_method == MI_STATS_METHOD_IGNORE_NULLS?
672                        param->notnull_count: NULL,
673                        (ulonglong)share->state.state.records);
674   }
675   if (param->testflag & T_INFO)
676   {
677     if (all_totaldata != 0L && found_keys > 0)
678       printf("Total:    Keyblocks used: %3d%%  Packed: %4d%%\n\n",
679 	     (int) (my_off_t2double(all_keydata)*100.0/
680 		    my_off_t2double(all_totaldata)),
681 	     (int) ((my_off_t2double(key_totlength) -
682 		     my_off_t2double(all_keydata))*100.0/
683 		     my_off_t2double(key_totlength)));
684     else if (all_totaldata != 0L && maria_is_any_key_active(share->state.key_map))
685       puts("");
686   }
687   if (param->key_file_blocks != share->state.state.key_file_length &&
688       share->state.key_map == ~(ulonglong) 0)
689     _ma_check_print_warning(param, "Some data are unreferenced in keyfile");
690   if (found_keys != full_text_keys)
691     param->record_checksum=old_record_checksum-init_checksum;	/* Remove delete links */
692   else
693     param->record_checksum=0;
694   DBUG_RETURN(result);
695 } /* maria_chk_key */
696 
697 
698 
chk_index_down(HA_CHECK * param,MARIA_HA * info,MARIA_KEYDEF * keyinfo,my_off_t page,uchar * buff,ha_rows * keys,ha_checksum * key_checksum,uint level)699 static int chk_index_down(HA_CHECK *param, MARIA_HA *info,
700                           MARIA_KEYDEF *keyinfo,
701                           my_off_t page, uchar *buff, ha_rows *keys,
702                           ha_checksum *key_checksum, uint level)
703 {
704   char llbuff[22],llbuff2[22];
705   MARIA_SHARE *share= info->s;
706   MARIA_PAGE ma_page;
707   DBUG_ENTER("chk_index_down");
708 
709   /* Key blocks must lay within the key file length entirely. */
710   if (page + keyinfo->block_length > share->state.state.key_file_length)
711   {
712     /* purecov: begin tested */
713     /* Give it a chance to fit in the real file size. */
714     my_off_t max_length= mysql_file_seek(info->s->kfile.file, 0L, MY_SEEK_END,
715                                  MYF(MY_THREADSAFE));
716     _ma_check_print_error(param, "Invalid key block position: %s  "
717                           "key block size: %u  file_length: %s",
718                           llstr(page, llbuff), keyinfo->block_length,
719                           llstr(share->state.state.key_file_length, llbuff2));
720     if (page + keyinfo->block_length > max_length)
721       goto err;
722     /* Fix the remembered key file length. */
723     share->state.state.key_file_length= (max_length &
724                                           ~ (my_off_t) (keyinfo->block_length -
725                                                         1));
726     /* purecov: end */
727   }
728 
729   /* Key blocks must be aligned at block length */
730   if (page & (info->s->block_size -1))
731   {
732     /* purecov: begin tested */
733     _ma_check_print_error(param, "Mis-aligned key block: %s  "
734                           "key block length: %u",
735                           llstr(page, llbuff), info->s->block_size);
736     goto err;
737     /* purecov: end */
738   }
739 
740   if (_ma_fetch_keypage(&ma_page, info, keyinfo, page,
741                         PAGECACHE_LOCK_LEFT_UNLOCKED,
742                         DFLT_INIT_HITS, buff, 0))
743   {
744     report_keypage_fault(param, info, page);
745     goto err;
746   }
747   param->key_file_blocks+=keyinfo->block_length;
748   if (chk_index(param, info, keyinfo, &ma_page, keys, key_checksum,level))
749     goto err;
750 
751   DBUG_RETURN(0);
752 
753   /* purecov: begin tested */
754 err:
755   DBUG_RETURN(1);
756   /* purecov: end */
757 }
758 
759 
760 /*
761   "Ignore NULLs" statistics collection method: process first index tuple.
762 
763   SYNOPSIS
764     maria_collect_stats_nonulls_first()
765       keyseg   IN     Array of key part descriptions
766       notnull  INOUT  Array, notnull[i] = (number of {keypart1...keypart_i}
767                                            tuples that don't contain NULLs)
768       key      IN     Key values tuple
769 
770   DESCRIPTION
771     Process the first index tuple - find out which prefix tuples don't
772     contain NULLs, and update the array of notnull counters accordingly.
773 */
774 
775 static
maria_collect_stats_nonulls_first(HA_KEYSEG * keyseg,ulonglong * notnull,const uchar * key)776 void maria_collect_stats_nonulls_first(HA_KEYSEG *keyseg, ulonglong *notnull,
777                                        const uchar *key)
778 {
779   size_t first_null, kp;
780   first_null= ha_find_null(keyseg, key) - keyseg;
781   /*
782     All prefix tuples that don't include keypart_{first_null} are not-null
783     tuples (and all others aren't), increment counters for them.
784   */
785   for (kp= 0; kp < first_null; kp++)
786     notnull[kp]++;
787 }
788 
789 
790 /*
791   "Ignore NULLs" statistics collection method: process next index tuple.
792 
793   SYNOPSIS
794     maria_collect_stats_nonulls_next()
795       keyseg   IN     Array of key part descriptions
796       notnull  INOUT  Array, notnull[i] = (number of {keypart1...keypart_i}
797                                            tuples that don't contain NULLs)
798       prev_key IN     Previous key values tuple
799       last_key IN     Next key values tuple
800 
801   DESCRIPTION
802     Process the next index tuple:
803     1. Find out which prefix tuples of last_key don't contain NULLs, and
804        update the array of notnull counters accordingly.
805     2. Find the first keypart number where the prev_key and last_key tuples
806        are different(A), or last_key has NULL value(B), and return it, so the
807        caller can count number of unique tuples for each key prefix. We don't
808        need (B) to be counted, and that is compensated back in
809        maria_update_key_parts().
810 
811   RETURN
812     1 + number of first keypart where values differ or last_key tuple has NULL
813 */
814 
815 static
maria_collect_stats_nonulls_next(HA_KEYSEG * keyseg,ulonglong * notnull,const uchar * prev_key,const uchar * last_key)816 int maria_collect_stats_nonulls_next(HA_KEYSEG *keyseg, ulonglong *notnull,
817                                      const uchar *prev_key,
818                                      const uchar *last_key)
819 {
820   uint diffs[2];
821   size_t first_null_seg, kp;
822   HA_KEYSEG *seg;
823 
824   /*
825      Find the first keypart where values are different or either of them is
826      NULL. We get results in diffs array:
827      diffs[0]= 1 + number of first different keypart
828      diffs[1]=offset: (last_key + diffs[1]) points to first value in
829                       last_key that is NULL or different from corresponding
830                       value in prev_key.
831   */
832   ha_key_cmp(keyseg, prev_key, last_key, USE_WHOLE_KEY,
833              SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diffs);
834   seg= keyseg + diffs[0] - 1;
835 
836   /* Find first NULL in last_key */
837   first_null_seg= ha_find_null(seg, last_key + diffs[1]) - keyseg;
838   for (kp= 0; kp < first_null_seg; kp++)
839     notnull[kp]++;
840 
841   /*
842     Return 1+ number of first key part where values differ. Don't care if
843     these were NULLs and not .... We compensate for that in
844     maria_update_key_parts.
845   */
846   return diffs[0];
847 }
848 
849 
850 /* Check if index is ok */
851 
chk_index(HA_CHECK * param,MARIA_HA * info,MARIA_KEYDEF * keyinfo,MARIA_PAGE * anc_page,ha_rows * keys,ha_checksum * key_checksum,uint level)852 static int chk_index(HA_CHECK *param, MARIA_HA *info, MARIA_KEYDEF *keyinfo,
853 		     MARIA_PAGE *anc_page, ha_rows *keys,
854 		     ha_checksum *key_checksum, uint level)
855 {
856   int flag;
857   uint comp_flag, page_flag, nod_flag;
858   uchar *temp_buff, *keypos, *old_keypos, *endpos;
859   my_off_t next_page,record;
860   MARIA_SHARE *share= info->s;
861   char llbuff[22];
862   uint diff_pos[2];
863   uchar tmp_key_buff[MARIA_MAX_KEY_BUFF];
864   MARIA_KEY tmp_key;
865   DBUG_ENTER("chk_index");
866   DBUG_DUMP("buff", anc_page->buff, anc_page->size);
867 
868   /* TODO: implement appropriate check for RTree keys */
869   if (keyinfo->flag & (HA_SPATIAL | HA_RTREE_INDEX))
870     DBUG_RETURN(0);
871 
872   if (!(temp_buff=(uchar*) my_alloca((uint) keyinfo->block_length)))
873   {
874     _ma_check_print_error(param,"Not enough memory for keyblock");
875     DBUG_RETURN(-1);
876   }
877 
878   if (keyinfo->flag & HA_NOSAME)
879   {
880     /* Not real duplicates */
881     comp_flag=SEARCH_FIND | SEARCH_UPDATE | SEARCH_INSERT;
882   }
883   else
884     comp_flag=SEARCH_SAME;			/* Keys in positionorder */
885 
886   page_flag=  anc_page->flag;
887   nod_flag=   anc_page->node;
888   old_keypos= anc_page->buff + share->keypage_header;
889   keypos=     old_keypos + nod_flag;
890   endpos=     anc_page->buff + anc_page->size;
891 
892   param->keydata+=   anc_page->size;
893   param->totaldata+= keyinfo->block_length;	/* INFO */
894   param->key_blocks++;
895   if (level > param->max_level)
896     param->max_level=level;
897 
898   if (_ma_get_keynr(share, anc_page->buff) != keyinfo->key_nr)
899     _ma_check_print_error(param, "Page at %s is not marked for index %u",
900                           llstr(anc_page->pos, llbuff),
901                           (uint) keyinfo->key_nr);
902   if ((page_flag & KEYPAGE_FLAG_HAS_TRANSID) &&
903       !share->base.born_transactional)
904   {
905     _ma_check_print_error(param,
906                           "Page at %s is marked with HAS_TRANSID even if "
907                           "table is not transactional",
908                           llstr(anc_page->pos, llbuff));
909   }
910 
911   if (anc_page->size > share->max_index_block_size)
912   {
913     _ma_check_print_error(param,
914                           "Page at %s has impossible (too big) pagelength",
915                           llstr(anc_page->pos, llbuff));
916     goto err;
917   }
918 
919   info->last_key.keyinfo= tmp_key.keyinfo= keyinfo;
920   info->lastinx= ~0;                            /* Safety */
921   tmp_key.data= tmp_key_buff;
922   for ( ;; _ma_copy_key(&info->last_key, &tmp_key))
923   {
924     if (nod_flag)
925     {
926       if (_ma_killed_ptr(param))
927         goto err;
928       next_page= _ma_kpos(nod_flag,keypos);
929       if (chk_index_down(param,info,keyinfo,next_page,
930                          temp_buff,keys,key_checksum,level+1))
931       {
932         DBUG_DUMP("page_data", old_keypos, (uint) (keypos - old_keypos));
933 	goto err;
934       }
935     }
936     old_keypos=keypos;
937     if (keypos >= endpos ||
938 	!(*keyinfo->get_key)(&tmp_key, page_flag, nod_flag, &keypos))
939       break;
940     if (keypos > endpos)
941     {
942       _ma_check_print_error(param,
943                             "Page length and length of keys don't match at "
944                             "page: %s",
945                             llstr(anc_page->pos,llbuff));
946       goto err;
947     }
948     if (share->data_file_type == BLOCK_RECORD &&
949         !(page_flag & KEYPAGE_FLAG_HAS_TRANSID) &&
950         key_has_transid(tmp_key.data + tmp_key.data_length +
951                         share->rec_reflength-1))
952     {
953       _ma_check_print_error(param,
954                             "Found key marked for transid on page that is not "
955                             "marked for transid at: %s",
956                             llstr(anc_page->pos,llbuff));
957       goto err;
958     }
959 
960     if ((*keys)++ &&
961 	(flag=ha_key_cmp(keyinfo->seg, info->last_key.data, tmp_key.data,
962                          tmp_key.data_length + tmp_key.ref_length,
963                          (comp_flag | SEARCH_INSERT | (tmp_key.flag >> 1) |
964                           info->last_key.flag), diff_pos)) >=0)
965     {
966       DBUG_DUMP_KEY("old", &info->last_key);
967       DBUG_DUMP_KEY("new", &tmp_key);
968       DBUG_DUMP("new_in_page", old_keypos, (uint) (keypos-old_keypos));
969 
970       if ((comp_flag & SEARCH_FIND) && flag == 0)
971 	_ma_check_print_error(param,"Found duplicated key at page %s",
972                               llstr(anc_page->pos,llbuff));
973       else
974 	_ma_check_print_error(param,"Key in wrong position at page %s",
975                               llstr(anc_page->pos,llbuff));
976       goto err;
977     }
978 
979     if (param->testflag & T_STATISTICS)
980     {
981       if (*keys != 1L)				/* not first_key */
982       {
983         if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
984           ha_key_cmp(keyinfo->seg, info->last_key.data,
985                      tmp_key.data, tmp_key.data_length,
986                      SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL,
987                      diff_pos);
988         else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
989         {
990           diff_pos[0]= maria_collect_stats_nonulls_next(keyinfo->seg,
991                                                         param->notnull_count,
992                                                         info->last_key.data,
993                                                         tmp_key.data);
994         }
995 	param->unique_count[diff_pos[0]-1]++;
996       }
997       else
998       {
999         if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
1000           maria_collect_stats_nonulls_first(keyinfo->seg, param->notnull_count,
1001                                             tmp_key.data);
1002       }
1003     }
1004     (*key_checksum)+= maria_byte_checksum(tmp_key.data, tmp_key.data_length);
1005     record= _ma_row_pos_from_key(&tmp_key);
1006 
1007     if (keyinfo->flag & HA_FULLTEXT) /* special handling for ft2 */
1008     {
1009       uint off;
1010       int  subkeys;
1011       get_key_full_length_rdonly(off, tmp_key.data);
1012       subkeys= ft_sintXkorr(tmp_key.data + off);
1013       if (subkeys < 0)
1014       {
1015         ha_rows tmp_keys=0;
1016         share->ft2_keyinfo.key_nr= keyinfo->key_nr;
1017         if (chk_index_down(param,info,&share->ft2_keyinfo,record,
1018                            temp_buff,&tmp_keys,key_checksum,1))
1019           goto err;
1020         if (tmp_keys + subkeys)
1021         {
1022           _ma_check_print_error(param,
1023                                "Number of words in the 2nd level tree "
1024                                "does not match the number in the header. "
1025                                "Parent word in on the page %s, offset %u",
1026                                llstr(anc_page->pos,llbuff),
1027                                 (uint) (old_keypos - anc_page->buff));
1028           goto err;
1029         }
1030         (*keys)+=tmp_keys-1;
1031         continue;
1032       }
1033       /* fall through */
1034     }
1035     if ((share->data_file_type != BLOCK_RECORD &&
1036          share->data_file_type != NO_RECORD &&
1037          record >= share->state.state.data_file_length) ||
1038         (share->data_file_type == BLOCK_RECORD &&
1039          ma_recordpos_to_page(record) * share->base.min_block_length >=
1040          share->state.state.data_file_length) ||
1041         (share->data_file_type == NO_RECORD && record != 0))
1042     {
1043 #ifndef DBUG_OFF
1044       char llbuff2[22], llbuff3[22];
1045 #endif
1046       _ma_check_print_error(param,
1047                             "Found key at page %s that points to record "
1048                             "outside datafile",
1049                             llstr(anc_page->pos,llbuff));
1050       DBUG_PRINT("test",("page: %s  record: %s  filelength: %s",
1051 			 llstr(anc_page->pos,llbuff),llstr(record,llbuff2),
1052 			 llstr(share->state.state.data_file_length,llbuff3)));
1053       DBUG_DUMP_KEY("key", &tmp_key);
1054       DBUG_DUMP("new_in_page", old_keypos, (uint) (keypos-old_keypos));
1055       goto err;
1056     }
1057     param->record_checksum+= (ha_checksum) record;
1058   }
1059   if (keypos != endpos)
1060   {
1061     _ma_check_print_error(param,
1062                           "Keyblock size at page %s is not correct. "
1063                           "Block length: %u  key length: %u",
1064                           llstr(anc_page->pos, llbuff), anc_page->size,
1065                           (uint) (keypos - anc_page->buff));
1066     goto err;
1067   }
1068   my_afree(temp_buff);
1069   DBUG_RETURN(0);
1070  err:
1071   my_afree(temp_buff);
1072   DBUG_RETURN(1);
1073 } /* chk_index */
1074 
1075 
1076 	/* Calculate a checksum of 1+2+3+4...N = N*(N+1)/2 without overflow */
1077 
calc_checksum(ha_rows count)1078 static ha_checksum calc_checksum(ha_rows count)
1079 {
1080   ulonglong sum,a,b;
1081   DBUG_ENTER("calc_checksum");
1082 
1083   sum=0;
1084   a=count; b=count+1;
1085   if (a & 1)
1086     b>>=1;
1087   else
1088     a>>=1;
1089   while (b)
1090   {
1091     if (b & 1)
1092       sum+=a;
1093     a<<=1; b>>=1;
1094   }
1095   DBUG_PRINT("exit",("sum: %lx",(ulong) sum));
1096   DBUG_RETURN((ha_checksum) sum);
1097 } /* calc_checksum */
1098 
1099 
1100 	/* Calc length of key in normal isam */
1101 
isam_key_length(MARIA_HA * info,register MARIA_KEYDEF * keyinfo)1102 static uint isam_key_length(MARIA_HA *info, register MARIA_KEYDEF *keyinfo)
1103 {
1104   uint length;
1105   HA_KEYSEG *keyseg;
1106   DBUG_ENTER("isam_key_length");
1107 
1108   length= info->s->rec_reflength;
1109   for (keyseg=keyinfo->seg ; keyseg->type ; keyseg++)
1110     length+= keyseg->length;
1111 
1112   DBUG_PRINT("exit",("length: %d",length));
1113   DBUG_RETURN(length);
1114 } /* key_length */
1115 
1116 
1117 
record_pos_to_txt(MARIA_HA * info,my_off_t recpos,char * buff)1118 static void record_pos_to_txt(MARIA_HA *info, my_off_t recpos,
1119                               char *buff)
1120 {
1121   if (info->s->data_file_type != BLOCK_RECORD)
1122     llstr(recpos, buff);
1123   else
1124   {
1125     my_off_t page= ma_recordpos_to_page(recpos);
1126     uint row= ma_recordpos_to_dir_entry(recpos);
1127     char *end= longlong10_to_str(page, buff, 10);
1128     *(end++)= ':';
1129     longlong10_to_str(row, end, 10);
1130   }
1131 }
1132 
1133 
1134 /*
1135   Check that keys in records exist in index tree
1136 
1137   SYNOPSIS
1138   check_keys_in_record()
1139   param		Check paramenter
1140   info		Maria handler
1141   extend	Type of check (extended or normal)
1142   start_recpos	Position to row
1143   record	Record buffer
1144 
1145   NOTES
1146     This function also calculates record checksum & number of rows
1147 */
1148 
check_keys_in_record(HA_CHECK * param,MARIA_HA * info,int extend,my_off_t start_recpos,uchar * record)1149 static int check_keys_in_record(HA_CHECK *param, MARIA_HA *info, int extend,
1150                                 my_off_t start_recpos, uchar *record)
1151 {
1152   MARIA_SHARE *share= info->s;
1153   MARIA_KEYDEF *keyinfo;
1154   char llbuff[22+4];
1155   uint keynr;
1156 
1157   param->tmp_record_checksum+= (ha_checksum) start_recpos;
1158   param->records++;
1159   if (param->records % WRITE_COUNT == 0)
1160   {
1161     if (param->testflag & T_WRITE_LOOP)
1162     {
1163       printf("%s\r", llstr(param->records, llbuff));
1164       fflush(stdout);
1165     }
1166     _ma_report_progress(param, param->records, share->state.state.records);
1167   }
1168 
1169   /* Check if keys match the record */
1170   for (keynr=0, keyinfo= share->keyinfo; keynr < share->base.keys;
1171        keynr++, keyinfo++)
1172   {
1173     if (maria_is_key_active(share->state.key_map, keynr))
1174     {
1175       MARIA_KEY key;
1176       if (!(keyinfo->flag & HA_FULLTEXT))
1177       {
1178         (*keyinfo->make_key)(info, &key, keynr, info->lastkey_buff, record,
1179                              start_recpos, 0);
1180         info->last_key.keyinfo= key.keyinfo;
1181         if (extend)
1182         {
1183           /* We don't need to lock the key tree here as we don't allow
1184              concurrent threads when running maria_chk
1185           */
1186           int search_result=
1187 #ifdef HAVE_RTREE_KEYS
1188             (keyinfo->flag & (HA_SPATIAL | HA_RTREE_INDEX)) ?
1189             maria_rtree_find_first(info, &key, MBR_EQUAL | MBR_DATA) :
1190 #endif
1191             _ma_search(info, &key, SEARCH_SAME, share->state.key_root[keynr]);
1192           if (search_result)
1193           {
1194             record_pos_to_txt(info, start_recpos, llbuff);
1195             _ma_check_print_error(param,
1196                                   "Record at: %14s  "
1197                                   "Can't find key for index: %2d",
1198                                   llbuff, keynr+1);
1199             if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1200               return -1;
1201           }
1202         }
1203         else
1204           param->tmp_key_crc[keynr]+=
1205             maria_byte_checksum(key.data, key.data_length);
1206       }
1207     }
1208   }
1209   return 0;
1210 }
1211 
1212 
1213 /*
1214   Functions to loop through all rows and check if they are ok
1215 
1216   NOTES
1217     One function for each record format
1218 
1219   RESULT
1220     0  ok
1221     -1 Interrupted by user
1222     1  Error
1223 */
1224 
check_static_record(HA_CHECK * param,MARIA_HA * info,int extend,uchar * record)1225 static int check_static_record(HA_CHECK *param, MARIA_HA *info, int extend,
1226                                uchar *record)
1227 {
1228   MARIA_SHARE *share= info->s;
1229   my_off_t start_recpos, pos;
1230   char llbuff[22];
1231 
1232   pos= 0;
1233   while (pos < share->state.state.data_file_length)
1234   {
1235     if (_ma_killed_ptr(param))
1236       return -1;
1237     if (my_b_read(&param->read_cache, record,
1238                   share->base.pack_reclength))
1239     {
1240       _ma_check_print_error(param,
1241                             "got error: %d when reading datafile at position: "
1242                             "%s",
1243                             my_errno, llstr(pos, llbuff));
1244       return 1;
1245     }
1246     start_recpos= pos;
1247     pos+= share->base.pack_reclength;
1248     param->splits++;
1249     if (*record == '\0')
1250     {
1251       param->del_blocks++;
1252       param->del_length+= share->base.pack_reclength;
1253       continue;					/* Record removed */
1254     }
1255     param->glob_crc+= _ma_static_checksum(info,record);
1256     param->used+= share->base.pack_reclength;
1257     if (check_keys_in_record(param, info, extend, start_recpos, record))
1258       return 1;
1259   }
1260   return 0;
1261 }
1262 
1263 
check_dynamic_record(HA_CHECK * param,MARIA_HA * info,int extend,uchar * record)1264 static int check_dynamic_record(HA_CHECK *param, MARIA_HA *info, int extend,
1265                                 uchar *record)
1266 {
1267   MARIA_BLOCK_INFO block_info;
1268   MARIA_SHARE *share= info->s;
1269   my_off_t UNINIT_VAR(start_recpos), start_block, pos;
1270   uchar *UNINIT_VAR(to);
1271   ulong UNINIT_VAR(left_length);
1272   uint	b_type;
1273   char llbuff[22],llbuff2[22],llbuff3[22];
1274   myf myflag= MY_WME | (share->temporary ? MY_THREAD_SPECIFIC : 0);
1275   DBUG_ENTER("check_dynamic_record");
1276 
1277   pos= 0;
1278   while (pos < share->state.state.data_file_length)
1279   {
1280     my_bool got_error= 0;
1281     int flag;
1282     if (_ma_killed_ptr(param))
1283       DBUG_RETURN(-1);
1284 
1285     flag= block_info.second_read=0;
1286     block_info.next_filepos=pos;
1287     do
1288     {
1289       if (_ma_read_cache(info, &param->read_cache, block_info.header,
1290                          (start_block=block_info.next_filepos),
1291                          sizeof(block_info.header),
1292                          (flag ? 0 : READING_NEXT) | READING_HEADER))
1293       {
1294         _ma_check_print_error(param,
1295                               "got error: %d when reading datafile at "
1296                               "position: %s",
1297                               my_errno, llstr(start_block, llbuff));
1298         DBUG_RETURN(1);
1299       }
1300 
1301       if (start_block & (MARIA_DYN_ALIGN_SIZE-1))
1302       {
1303         _ma_check_print_error(param,"Wrong aligned block at %s",
1304                               llstr(start_block,llbuff));
1305         DBUG_RETURN(1);
1306       }
1307       b_type= _ma_get_block_info(info, &block_info,-1,start_block);
1308       if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
1309                     BLOCK_FATAL_ERROR))
1310       {
1311         if (b_type & BLOCK_SYNC_ERROR)
1312         {
1313           if (flag)
1314           {
1315             _ma_check_print_error(param,"Unexpected byte: %d at link: %s",
1316                                   (int) block_info.header[0],
1317                                   llstr(start_block,llbuff));
1318             DBUG_RETURN(1);
1319           }
1320           pos=block_info.filepos+block_info.block_len;
1321           goto next;
1322         }
1323         if (b_type & BLOCK_DELETED)
1324         {
1325           if (block_info.block_len < share->base.min_block_length)
1326           {
1327             _ma_check_print_error(param,
1328                                   "Deleted block with impossible length %lu "
1329                                   "at %s",
1330                                   block_info.block_len,llstr(pos,llbuff));
1331             DBUG_RETURN(1);
1332           }
1333           if ((block_info.next_filepos != HA_OFFSET_ERROR &&
1334                block_info.next_filepos >= share->state.state.data_file_length) ||
1335               (block_info.prev_filepos != HA_OFFSET_ERROR &&
1336                block_info.prev_filepos >= share->state.state.data_file_length))
1337           {
1338             _ma_check_print_error(param,"Delete link points outside datafile "
1339                                   "at %s",
1340                                   llstr(pos,llbuff));
1341             DBUG_RETURN(1);
1342           }
1343           param->del_blocks++;
1344           param->del_length+= block_info.block_len;
1345           param->splits++;
1346           pos= block_info.filepos+block_info.block_len;
1347           goto next;
1348         }
1349         _ma_check_print_error(param,"Wrong bytesec: %d-%d-%d at linkstart: %s",
1350                               block_info.header[0],block_info.header[1],
1351                               block_info.header[2],
1352                               llstr(start_block,llbuff));
1353         DBUG_RETURN(1);
1354       }
1355       if (share->state.state.data_file_length < block_info.filepos+
1356           block_info.block_len)
1357       {
1358         _ma_check_print_error(param,
1359                               "Recordlink that points outside datafile at %s",
1360                               llstr(pos,llbuff));
1361         got_error=1;
1362         break;
1363       }
1364       param->splits++;
1365       if (!flag++)				/* First block */
1366       {
1367         start_recpos=pos;
1368         pos=block_info.filepos+block_info.block_len;
1369         if (block_info.rec_len > (uint) share->base.max_pack_length)
1370         {
1371           my_errno= HA_ERR_WRONG_IN_RECORD;
1372           _ma_check_print_error(param,"Found too long record (%lu) at %s",
1373                                 (ulong) block_info.rec_len,
1374                                 llstr(start_recpos,llbuff));
1375           got_error=1;
1376           break;
1377         }
1378         if (share->base.blobs)
1379         {
1380           if (_ma_alloc_buffer(&info->rec_buff, &info->rec_buff_size,
1381                                block_info.rec_len +
1382                                share->base.extra_rec_buff_size, myflag))
1383 
1384           {
1385             _ma_check_print_error(param,
1386                                   "Not enough memory (%lu) for blob at %s",
1387                                   (ulong) block_info.rec_len,
1388                                   llstr(start_recpos,llbuff));
1389             got_error=1;
1390             break;
1391           }
1392         }
1393         to= info->rec_buff;
1394         left_length= block_info.rec_len;
1395       }
1396       if (left_length < block_info.data_len)
1397       {
1398         _ma_check_print_error(param,"Found too long record (%lu) at %s",
1399                               (ulong) block_info.data_len,
1400                               llstr(start_recpos,llbuff));
1401         got_error=1;
1402         break;
1403       }
1404       if (_ma_read_cache(info, &param->read_cache, to, block_info.filepos,
1405                          (uint) block_info.data_len,
1406                          flag == 1 ? READING_NEXT : 0))
1407       {
1408         _ma_check_print_error(param,
1409                               "got error: %d when reading datafile at "
1410                               "position: %s", my_errno,
1411                               llstr(block_info.filepos, llbuff));
1412 
1413         DBUG_RETURN(1);
1414       }
1415       to+=block_info.data_len;
1416       param->link_used+= block_info.filepos-start_block;
1417       param->used+= block_info.filepos - start_block + block_info.data_len;
1418       param->empty+= block_info.block_len-block_info.data_len;
1419       left_length-= block_info.data_len;
1420       if (left_length)
1421       {
1422         if (b_type & BLOCK_LAST)
1423         {
1424           _ma_check_print_error(param,
1425                                 "Wrong record length %s of %s at %s",
1426                                 llstr(block_info.rec_len-left_length,llbuff),
1427                                 llstr(block_info.rec_len, llbuff2),
1428                                 llstr(start_recpos,llbuff3));
1429           got_error=1;
1430           break;
1431         }
1432         if (share->state.state.data_file_length < block_info.next_filepos)
1433         {
1434           _ma_check_print_error(param,
1435                                 "Found next-recordlink that points outside "
1436                                 "datafile at %s",
1437                                 llstr(block_info.filepos,llbuff));
1438           got_error=1;
1439           break;
1440         }
1441       }
1442     } while (left_length);
1443 
1444     if (! got_error)
1445     {
1446       if (_ma_rec_unpack(info,record,info->rec_buff,block_info.rec_len) ==
1447           MY_FILE_ERROR)
1448       {
1449         _ma_check_print_error(param,"Found wrong record at %s",
1450                               llstr(start_recpos,llbuff));
1451         got_error=1;
1452       }
1453       else
1454       {
1455         ha_checksum checksum= 0;
1456         if (share->calc_checksum)
1457           checksum= (*share->calc_checksum)(info, record);
1458 
1459         if (param->testflag & (T_EXTEND | T_MEDIUM | T_VERBOSE))
1460         {
1461           if (_ma_rec_check(info,record, info->rec_buff,block_info.rec_len,
1462                             MY_TEST(share->calc_checksum), checksum))
1463           {
1464             _ma_check_print_error(param,"Found wrong packed record at %s",
1465                                   llstr(start_recpos,llbuff));
1466             got_error= 1;
1467           }
1468         }
1469         param->glob_crc+= checksum;
1470       }
1471 
1472       if (! got_error)
1473       {
1474         if (check_keys_in_record(param, info, extend, start_recpos, record))
1475           DBUG_RETURN(1);
1476       }
1477       else
1478       {
1479         if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1480           DBUG_RETURN(1);
1481       }
1482     }
1483     else if (!flag)
1484       pos= block_info.filepos+block_info.block_len;
1485 next:;
1486   }
1487   DBUG_RETURN(0);
1488 }
1489 
1490 
check_compressed_record(HA_CHECK * param,MARIA_HA * info,int extend,uchar * record)1491 static int check_compressed_record(HA_CHECK *param, MARIA_HA *info, int extend,
1492                                    uchar *record)
1493 {
1494   MARIA_BLOCK_INFO block_info;
1495   MARIA_SHARE *share= info->s;
1496   my_off_t start_recpos, pos;
1497   char llbuff[22];
1498   my_bool got_error= 0;
1499   DBUG_ENTER("check_compressed_record");
1500 
1501   pos= share->pack.header_length;             /* Skip header */
1502   while (pos < share->state.state.data_file_length)
1503   {
1504     if (_ma_killed_ptr(param))
1505       DBUG_RETURN(-1);
1506 
1507     if (_ma_read_cache(info, &param->read_cache, block_info.header, pos,
1508                        share->pack.ref_length, READING_NEXT))
1509     {
1510       _ma_check_print_error(param,
1511                             "got error: %d when reading datafile at position: "
1512                             "%s",
1513                             my_errno, llstr(pos, llbuff));
1514       DBUG_RETURN(1);
1515     }
1516 
1517     start_recpos= pos;
1518     param->splits++;
1519     _ma_pack_get_block_info(info, &info->bit_buff, &block_info,
1520                                  &info->rec_buff, &info->rec_buff_size, -1,
1521                                  start_recpos);
1522     pos=block_info.filepos+block_info.rec_len;
1523     if (block_info.rec_len < (uint) share->min_pack_length ||
1524         block_info.rec_len > (uint) share->max_pack_length)
1525     {
1526       _ma_check_print_error(param,
1527                             "Found block with wrong recordlength: %lu at %s",
1528                             block_info.rec_len, llstr(start_recpos,llbuff));
1529       got_error=1;
1530       goto end;
1531     }
1532     if (_ma_read_cache(info, &param->read_cache, info->rec_buff,
1533                        block_info.filepos, block_info.rec_len, READING_NEXT))
1534     {
1535       _ma_check_print_error(param,
1536                             "got error: %d when reading datafile at position: "
1537                             "%s",
1538                             my_errno, llstr(block_info.filepos, llbuff));
1539       DBUG_RETURN(1);
1540     }
1541     if (_ma_pack_rec_unpack(info, &info->bit_buff, record,
1542                             info->rec_buff, block_info.rec_len))
1543     {
1544       _ma_check_print_error(param,"Found wrong record at %s",
1545                             llstr(start_recpos,llbuff));
1546       got_error=1;
1547       goto end;
1548     }
1549     param->glob_crc+= (*share->calc_checksum)(info,record);
1550     param->link_used+= (block_info.filepos - start_recpos);
1551     param->used+= (pos-start_recpos);
1552 
1553 end:
1554     if (! got_error)
1555     {
1556       if (check_keys_in_record(param, info, extend, start_recpos, record))
1557         DBUG_RETURN(1);
1558     }
1559     else
1560     {
1561       got_error= 0;                             /* Reset for next loop */
1562       if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1563         DBUG_RETURN(1);
1564     }
1565   }
1566   DBUG_RETURN(0);
1567 }
1568 
1569 
1570 /*
1571   Check if layout on head or tail page is ok
1572 
1573   NOTES
1574     This is for rows-in-block format.
1575 */
1576 
check_page_layout(HA_CHECK * param,MARIA_HA * info,my_off_t page_pos,uchar * page,uint row_count,uint head_empty,uint * real_rows_found,uint * free_slots_found)1577 static int check_page_layout(HA_CHECK *param, MARIA_HA *info,
1578                              my_off_t page_pos, uchar *page,
1579                              uint row_count, uint head_empty,
1580                              uint *real_rows_found, uint *free_slots_found)
1581 {
1582   uint empty, last_row_end, row, first_dir_entry, free_entry, block_size;
1583   uint free_entries, prev_free_entry;
1584   uchar *dir_entry;
1585   char llbuff[22];
1586   my_bool error_in_free_list= 0;
1587   DBUG_ENTER("check_page_layout");
1588 
1589   block_size= info->s->block_size;
1590   empty= 0;
1591   last_row_end= PAGE_HEADER_SIZE(info->s);
1592   *real_rows_found= 0;
1593 
1594   /* Check free directory list */
1595   free_entry= (uint) page[DIR_FREE_OFFSET];
1596   free_entries= 0;
1597   prev_free_entry= END_OF_DIR_FREE_LIST;
1598   while (free_entry != END_OF_DIR_FREE_LIST)
1599   {
1600     uchar *dir;
1601     if (free_entry > row_count)
1602     {
1603       _ma_check_print_error(param,
1604                             "Page %9s:  Directory free entry points outside "
1605                             "directory",
1606                             llstr(page_pos, llbuff));
1607       error_in_free_list= 1;
1608       break;
1609     }
1610     dir= dir_entry_pos(page, block_size, free_entry);
1611     if (uint2korr(dir) != 0)
1612     {
1613       _ma_check_print_error(param,
1614                             "Page %9s:  Directory free entry points to "
1615                             "not deleted entry",
1616                             llstr(page_pos, llbuff));
1617       error_in_free_list= 1;
1618       break;
1619     }
1620     if (dir[2] != prev_free_entry)
1621     {
1622       _ma_check_print_error(param,
1623                             "Page %9s:  Directory free list back pointer "
1624                             "points to wrong entry",
1625                             llstr(page_pos, llbuff));
1626       error_in_free_list= 1;
1627       break;
1628     }
1629     prev_free_entry= free_entry;
1630     free_entry= dir[3];
1631     free_entries++;
1632   }
1633   *free_slots_found= free_entries;
1634 
1635   /* Check directry */
1636   dir_entry= page+ block_size - PAGE_SUFFIX_SIZE;
1637   first_dir_entry= (block_size - row_count * DIR_ENTRY_SIZE -
1638                     PAGE_SUFFIX_SIZE);
1639   for (row= 0 ; row < row_count ; row++)
1640   {
1641     uint pos, length;
1642     dir_entry-= DIR_ENTRY_SIZE;
1643     pos= uint2korr(dir_entry);
1644     if (!pos)
1645     {
1646       free_entries--;
1647       if (row == row_count -1)
1648       {
1649         _ma_check_print_error(param,
1650                               "Page %9s:  First entry in directory is 0",
1651                               llstr(page_pos, llbuff));
1652         if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1653           DBUG_RETURN(1);
1654       }
1655       continue;                                 /* Deleted row */
1656     }
1657     (*real_rows_found)++;
1658     length= uint2korr(dir_entry+2);
1659     param->used+= length;
1660     if (pos < last_row_end)
1661     {
1662       _ma_check_print_error(param,
1663                             "Page %9s:  Row %3u overlapps with previous row",
1664                             llstr(page_pos, llbuff), row);
1665       DBUG_RETURN(1);
1666     }
1667     empty+= (pos - last_row_end);
1668     last_row_end= pos + length;
1669     if (last_row_end > first_dir_entry)
1670     {
1671       _ma_check_print_error(param,
1672                             "Page %9s:  Row %3u overlapps with directory",
1673                             llstr(page_pos, llbuff), row);
1674       DBUG_RETURN(1);
1675     }
1676   }
1677   empty+= (first_dir_entry - last_row_end);
1678 
1679   if (empty != head_empty)
1680   {
1681     _ma_check_print_error(param,
1682                           "Page %9s:  Wrong empty size.  Stored: %5u  "
1683                           "Actual: %5u",
1684                           llstr(page_pos, llbuff), head_empty, empty);
1685     param->err_count++;
1686   }
1687   if (free_entries != 0 && !error_in_free_list)
1688   {
1689     _ma_check_print_error(param,
1690                           "Page %9s:  Directory free link don't include "
1691                           "all free entries",
1692                           llstr(page_pos, llbuff));
1693     param->err_count++;
1694   }
1695   DBUG_RETURN(param->err_count &&
1696               (param->err_count >= MAXERR || !(param->testflag & T_VERBOSE)));
1697 }
1698 
1699 
1700 /*
1701   Check all rows on head page
1702 
1703   NOTES
1704     This is for rows-in-block format.
1705 
1706     Before this, we have already called check_page_layout(), so
1707     we know the block is logicaly correct (even if the rows may not be that)
1708 
1709   RETURN
1710    0  ok
1711    1  error
1712 */
1713 
1714 
check_head_page(HA_CHECK * param,MARIA_HA * info,uchar * record,int extend,my_off_t page_pos,uchar * page_buff,uint row_count)1715 static my_bool check_head_page(HA_CHECK *param, MARIA_HA *info, uchar *record,
1716                                int extend, my_off_t page_pos, uchar *page_buff,
1717                                uint row_count)
1718 {
1719   MARIA_SHARE *share= info->s;
1720   uchar *dir_entry;
1721   uint row;
1722   char llbuff[22], llbuff2[22];
1723   ulonglong page= page_pos / share->block_size;
1724   DBUG_ENTER("check_head_page");
1725 
1726   dir_entry= page_buff+ share->block_size - PAGE_SUFFIX_SIZE;
1727   for (row= 0 ; row < row_count ; row++)
1728   {
1729     uint pos, length, flag;
1730     dir_entry-= DIR_ENTRY_SIZE;
1731     pos= uint2korr(dir_entry);
1732     if (!pos)
1733       continue;
1734     length= uint2korr(dir_entry+2);
1735     if (length < share->base.min_block_length)
1736     {
1737       _ma_check_print_error(param,
1738                             "Page %9s:  Row %3u is too short "
1739                             "(%d of min %d bytes)",
1740                             llstr(page, llbuff), row, length,
1741                             (uint) share->base.min_block_length);
1742       DBUG_RETURN(1);
1743     }
1744     flag= (uint) (uchar) page_buff[pos];
1745     if (flag & ~(ROW_FLAG_ALL))
1746       _ma_check_print_error(param,
1747                             "Page %9s: Row %3u has wrong flag: %u",
1748                             llstr(page, llbuff), row, flag);
1749 
1750     DBUG_PRINT("info", ("rowid: %s  page: %lu  row: %u",
1751                         llstr(ma_recordpos(page, row), llbuff),
1752                         (ulong) page, row));
1753     info->cur_row.trid= 0;
1754     if (_ma_read_block_record2(info, record, page_buff+pos,
1755                                page_buff+pos+length))
1756     {
1757       _ma_check_print_error(param,
1758                             "Page %9s:  Row %3d is crashed",
1759                             llstr(page, llbuff), row);
1760       if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1761         DBUG_RETURN(1);
1762       continue;
1763     }
1764     set_if_bigger(param->max_found_trid, info->cur_row.trid);
1765     if (info->cur_row.trid > param->max_trid)
1766       _ma_check_print_not_visible_error(param, info->cur_row.trid);
1767 
1768     if (share->calc_checksum)
1769     {
1770       ha_checksum checksum= (*share->calc_checksum)(info, record);
1771       if (info->cur_row.checksum != (checksum & 255))
1772         _ma_check_print_error(param, "Page %9s:  Row %3d has wrong checksum",
1773                               llstr(page, llbuff), row);
1774       param->glob_crc+= checksum;
1775     }
1776     if (info->cur_row.extents_count)
1777     {
1778       uchar *extents= info->cur_row.extents;
1779       uint i;
1780       /* Check that bitmap has the right marker for the found extents */
1781       for (i= 0 ; i < info->cur_row.extents_count ; i++)
1782       {
1783         pgcache_page_no_t extent_page;
1784         uint page_count, page_type;
1785         extent_page= uint5korr(extents);
1786         page_count=  uint2korr(extents+5) & ~START_EXTENT_BIT;
1787         extents+=    ROW_EXTENT_SIZE;
1788         page_type=   BLOB_PAGE;
1789         if (page_count & TAIL_BIT)
1790         {
1791           page_count= 1;
1792           page_type= TAIL_PAGE;
1793         }
1794         /*
1795           TODO OPTIMIZE:
1796           Check the whole extent with one test and only do the loop if
1797           something is wrong (for exact error reporting)
1798         */
1799         for ( ; page_count--; extent_page++)
1800         {
1801           uint bitmap_pattern;
1802           if (_ma_check_if_right_bitmap_type(info, page_type, extent_page,
1803                                              &bitmap_pattern))
1804           {
1805             _ma_check_print_error(param,
1806                                   "Page %9s:  Row: %3d has an extent with "
1807                                   "wrong information in bitmap:  "
1808                                   "Page: %9s  Page_type: %d  Bitmap: %d",
1809                                   llstr(page, llbuff), row,
1810                                   llstr(extent_page, llbuff2),
1811                                   page_type, bitmap_pattern);
1812             if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1813               DBUG_RETURN(1);
1814           }
1815         }
1816       }
1817     }
1818     param->full_page_count+= info->cur_row.full_page_count;
1819     param->tail_count+= info->cur_row.tail_count;
1820     if (check_keys_in_record(param, info, extend,
1821                              ma_recordpos(page, row), record))
1822       DBUG_RETURN(1);
1823   }
1824   DBUG_RETURN(0);
1825 }
1826 
1827 
1828 /*
1829   Check if rows-in-block data file is consistent
1830 */
1831 
check_block_record(HA_CHECK * param,MARIA_HA * info,int extend,uchar * record)1832 static int check_block_record(HA_CHECK *param, MARIA_HA *info, int extend,
1833                               uchar *record)
1834 {
1835   MARIA_SHARE *share= info->s;
1836   my_off_t pos;
1837   pgcache_page_no_t page;
1838   uchar *page_buff, *bitmap_buff, *data;
1839   char llbuff[22], llbuff2[22];
1840   uint block_size= share->block_size;
1841   ha_rows full_page_count, tail_count;
1842   my_bool UNINIT_VAR(full_dir), now_transactional;
1843   uint offset_page, offset, free_count;
1844 
1845   if (_ma_scan_init_block_record(info))
1846   {
1847     _ma_check_print_error(param, "got error %d when initializing scan",
1848                           my_errno);
1849     return 1;
1850   }
1851 
1852   now_transactional= info->s->now_transactional;
1853   info->s->now_transactional= 0;                /* Don't log changes */
1854 
1855   bitmap_buff= info->scan.bitmap_buff;
1856   page_buff= info->scan.page_buff;
1857   full_page_count= tail_count= 0;
1858   param->full_page_count= param->tail_count= 0;
1859   param->used= param->link_used= 0;
1860   param->splits= share->state.state.data_file_length / block_size;
1861 
1862   for (pos= 0, page= 0;
1863        pos < share->state.state.data_file_length;
1864        pos+= block_size, page++)
1865   {
1866     uint UNINIT_VAR(row_count), real_row_count, UNINIT_VAR(empty_space),
1867          page_type, bitmap_pattern;
1868     uint bitmap_for_page;
1869 
1870     if (_ma_killed_ptr(param))
1871     {
1872       _ma_scan_end_block_record(info);
1873       info->s->now_transactional= now_transactional;
1874       return -1;                                /* Interrupted */
1875     }
1876     if ((page % share->bitmap.pages_covered) == 0)
1877     {
1878       /* Bitmap page */
1879       if (pagecache_read(share->pagecache,
1880                          &info->s->bitmap.file,
1881                          page, 1,
1882                          bitmap_buff,
1883                          PAGECACHE_PLAIN_PAGE,
1884                          PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0)
1885       {
1886         _ma_check_print_error(param,
1887                               "Page %9s:  Got error: %d when reading datafile",
1888                               llstr(page, llbuff), my_errno);
1889         goto err;
1890       }
1891       param->used+= block_size;
1892       param->link_used+= block_size;
1893       if (param->verbose > 2)
1894         print_bitmap_description(share, page, bitmap_buff);
1895       continue;
1896     }
1897     /* Skip pages marked as empty in bitmap */
1898     offset_page= (uint) ((page % share->bitmap.pages_covered) -1) * 3;
1899     offset= offset_page & 7;
1900     data= bitmap_buff + offset_page / 8;
1901     bitmap_pattern= uint2korr(data);
1902     if (!(bitmap_for_page= ((bitmap_pattern >> offset) & 7)))
1903     {
1904       param->empty+= block_size;
1905       param->del_blocks++;
1906       continue;
1907     }
1908 
1909     if (pagecache_read(share->pagecache,
1910                        &info->dfile,
1911                        page, 1,
1912                        page_buff,
1913                        share->page_type,
1914                        PAGECACHE_LOCK_LEFT_UNLOCKED, 0) == 0)
1915     {
1916       _ma_check_print_error(param,
1917                             "Page %9s:  Got error: %d when reading datafile",
1918                             llstr(page, llbuff), my_errno);
1919       goto err;
1920     }
1921     page_type= page_buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK;
1922     if (page_type == UNALLOCATED_PAGE || page_type >= MAX_PAGE_TYPE)
1923     {
1924       _ma_check_print_error(param,
1925                             "Page: %9s  Found wrong page type %d. Bitmap: %d '%s'",
1926                             llstr(page, llbuff), page_type,
1927                             bitmap_for_page, bits_to_txt[bitmap_for_page]);
1928       if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1929         goto err;
1930       continue;
1931     }
1932     switch ((enum en_page_type) page_type) {
1933     case UNALLOCATED_PAGE:
1934     case MAX_PAGE_TYPE:
1935     default:
1936       DBUG_ASSERT(0);                           /* Impossible */
1937       break;
1938     case HEAD_PAGE:
1939       row_count= page_buff[DIR_COUNT_OFFSET];
1940       empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET);
1941       param->used+= block_size - empty_space;
1942       param->link_used+= (PAGE_HEADER_SIZE(info->s) + PAGE_SUFFIX_SIZE +
1943                           row_count * DIR_ENTRY_SIZE);
1944       if (empty_space < share->bitmap.sizes[3])
1945         param->lost+= empty_space;
1946       if (check_page_layout(param, info, pos, page_buff, row_count,
1947                             empty_space, &real_row_count, &free_count))
1948         goto err;
1949       full_dir= (row_count == MAX_ROWS_PER_PAGE &&
1950                  page_buff[DIR_FREE_OFFSET] == END_OF_DIR_FREE_LIST);
1951       break;
1952     case TAIL_PAGE:
1953       row_count= page_buff[DIR_COUNT_OFFSET];
1954       empty_space= uint2korr(page_buff + EMPTY_SPACE_OFFSET);
1955       param->used+= block_size - empty_space;
1956       param->link_used+= (PAGE_HEADER_SIZE(info->s) + PAGE_SUFFIX_SIZE +
1957                           row_count * DIR_ENTRY_SIZE);
1958       if (empty_space < share->bitmap.sizes[6])
1959         param->lost+= empty_space;
1960       if (check_page_layout(param, info, pos, page_buff, row_count,
1961                             empty_space, &real_row_count, &free_count))
1962         goto err;
1963       full_dir= (row_count - free_count >= MAX_ROWS_PER_PAGE -
1964                  share->base.blobs);
1965       break;
1966     case BLOB_PAGE:
1967       full_page_count++;
1968       full_dir= 0;
1969       empty_space= block_size;                  /* for error reporting */
1970       param->link_used+= FULL_PAGE_HEADER_SIZE(info->s);
1971       param->used+= block_size;
1972       break;
1973     }
1974     if (_ma_check_bitmap_data(info, page_type,
1975                               full_dir ? 0 : empty_space,
1976                               bitmap_for_page))
1977     {
1978         _ma_check_print_error(param,
1979                               "Page %9s:  Wrong data in bitmap.  Page_type: "
1980                               "%d  full: %d  empty_space: %u  Bitmap-bits: %d "
1981                               "'%s'",
1982                               llstr(page, llbuff), page_type, full_dir,
1983                               empty_space, bitmap_for_page,
1984                               bits_to_txt[bitmap_for_page]);
1985       if (param->err_count++ > MAXERR || !(param->testflag & T_VERBOSE))
1986         goto err;
1987     }
1988     if ((enum en_page_type) page_type == BLOB_PAGE)
1989       continue;
1990     param->empty+= empty_space;
1991     if ((enum en_page_type) page_type == TAIL_PAGE)
1992     {
1993       tail_count+= real_row_count;
1994       continue;
1995     }
1996     if (check_head_page(param, info, record, extend, pos, page_buff,
1997                         row_count))
1998       goto err;
1999   }
2000 
2001   /* Verify that rest of bitmap is zero */
2002 
2003   if (page % share->bitmap.pages_covered)
2004   {
2005     /* Not at end of bitmap */
2006     uint bitmap_pattern;
2007     uint byte_offset;
2008 
2009     offset_page= (uint) ((page % share->bitmap.pages_covered) -1) * 3;
2010     offset= offset_page & 7;
2011     byte_offset= offset_page / 8;
2012     data= bitmap_buff + byte_offset;
2013     bitmap_pattern= uint2korr(data);
2014     if (byte_offset + 1 == share->bitmap.max_total_size)
2015     {
2016       /* On last byte of bitmap; Remove possible checksum */
2017       bitmap_pattern&= 0xff;
2018     }
2019     if (((bitmap_pattern >> offset)) ||
2020         (byte_offset + 2 < share->bitmap.max_total_size &&
2021          _ma_check_if_zero(data+2, share->bitmap.max_total_size -
2022                            byte_offset - 2)))
2023     {
2024       ulonglong bitmap_page;
2025       bitmap_page= page / share->bitmap.pages_covered;
2026       bitmap_page*= share->bitmap.pages_covered;
2027 
2028       _ma_check_print_error(param,
2029                             "Bitmap at page %s has pages reserved outside of "
2030                             "data file length",
2031                             llstr(bitmap_page, llbuff));
2032       DBUG_EXECUTE("bitmap", _ma_print_bitmap(&share->bitmap, bitmap_buff,
2033                                               bitmap_page););
2034     }
2035   }
2036 
2037   _ma_scan_end_block_record(info);
2038 
2039   if (full_page_count != param->full_page_count)
2040     _ma_check_print_error(param, "Full page count read through records was %s "
2041                           "but we found %s pages while scanning table",
2042                           llstr(param->full_page_count, llbuff),
2043                           llstr(full_page_count, llbuff2));
2044   if (tail_count != param->tail_count)
2045     _ma_check_print_error(param, "Tail count read through records was %s but "
2046                           "we found %s tails while scanning table",
2047                           llstr(param->tail_count, llbuff),
2048                           llstr(tail_count, llbuff2));
2049 
2050   info->s->now_transactional= now_transactional;
2051   return param->error_printed != 0;
2052 
2053 err:
2054   _ma_scan_end_block_record(info);
2055   info->s->now_transactional= now_transactional;
2056   return 1;
2057 }
2058 
2059 
2060 /* Check that record-link is ok */
2061 
maria_chk_data_link(HA_CHECK * param,MARIA_HA * info,my_bool extend)2062 int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, my_bool extend)
2063 {
2064   MARIA_SHARE *share= info->s;
2065   int	error;
2066   uchar *record;
2067   char llbuff[22],llbuff2[22],llbuff3[22];
2068   DBUG_ENTER("maria_chk_data_link");
2069 
2070   if (!(param->testflag & T_SILENT))
2071   {
2072     if (extend)
2073       puts("- check records and index references");
2074     else
2075       puts("- check record links");
2076   }
2077 
2078   if (!(record= (uchar*) my_malloc(share->base.default_rec_buff_size, MYF(0))))
2079   {
2080     _ma_check_print_error(param,"Not enough memory for record");
2081     DBUG_RETURN(-1);
2082   }
2083   param->records= param->del_blocks= 0;
2084   param->used= param->link_used= param->splits= param->del_length= 0;
2085   param->lost= 0;
2086   param->tmp_record_checksum= param->glob_crc= 0;
2087   param->err_count= 0;
2088 
2089   error= 0;
2090   param->empty= share->pack.header_length;
2091 
2092   bzero((char*) param->tmp_key_crc,
2093         share->base.keys * sizeof(param->tmp_key_crc[0]));
2094 
2095   info->in_check_table= 1;       /* Don't assert on checksum errors */
2096 
2097   switch (share->data_file_type) {
2098   case BLOCK_RECORD:
2099     error= check_block_record(param, info, extend, record);
2100     break;
2101   case STATIC_RECORD:
2102     error= check_static_record(param, info, extend, record);
2103     break;
2104   case DYNAMIC_RECORD:
2105     error= check_dynamic_record(param, info, extend, record);
2106     break;
2107   case COMPRESSED_RECORD:
2108     error= check_compressed_record(param, info, extend, record);
2109     break;
2110   case NO_RECORD:
2111     param->records= share->state.state.records;
2112     param->record_checksum= 0;
2113     extend= 1;                                  /* No row checksums */
2114     /* no data, nothing to do */
2115     break;
2116   } /* switch */
2117 
2118   info->in_check_table= 0;
2119 
2120   if (error)
2121     goto err;
2122 
2123   if (param->testflag & T_WRITE_LOOP)
2124   {
2125     fputs("          \r",stdout);
2126     fflush(stdout);
2127   }
2128   if (param->records != share->state.state.records)
2129   {
2130     _ma_check_print_error(param,
2131                           "Record-count is not ok; found %-10s  Should be: %s",
2132                           llstr(param->records,llbuff),
2133                           llstr(share->state.state.records,llbuff2));
2134     error=1;
2135   }
2136   if (param->record_checksum &&
2137 	   param->record_checksum != param->tmp_record_checksum)
2138   {
2139     _ma_check_print_error(param,
2140                           "Key pointers and record positions doesn't match");
2141     error=1;
2142   }
2143   if (param->glob_crc != share->state.state.checksum &&
2144       (share->options &
2145        (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)))
2146   {
2147     _ma_check_print_warning(param,
2148                             "Record checksum is not the same as checksum "
2149                             "stored in the index file");
2150     error=1;
2151   }
2152   if (!extend)
2153   {
2154     uint key;
2155     for (key=0 ; key < share->base.keys;  key++)
2156     {
2157       if (param->tmp_key_crc[key] != param->key_crc[key] &&
2158           !(share->keyinfo[key].flag &
2159             (HA_FULLTEXT | HA_SPATIAL | HA_RTREE_INDEX)))
2160       {
2161 	_ma_check_print_error(param,"Checksum for key: %2d doesn't match "
2162                               "checksum for records",
2163                               key+1);
2164 	error=1;
2165       }
2166     }
2167   }
2168 
2169   if (param->del_length != share->state.state.empty)
2170   {
2171     _ma_check_print_warning(param,
2172                             "Found %s deleted space.   Should be %s",
2173                             llstr(param->del_length,llbuff2),
2174                             llstr(share->state.state.empty,llbuff));
2175   }
2176   /* Skip following checks for BLOCK RECORD as they don't make any sence */
2177   if (share->data_file_type != BLOCK_RECORD)
2178   {
2179     if (param->used + param->empty + param->del_length !=
2180         share->state.state.data_file_length)
2181     {
2182       _ma_check_print_warning(param,
2183                               "Found %s record data and %s unused data and %s "
2184                               "deleted data",
2185                               llstr(param->used, llbuff),
2186                               llstr(param->empty,llbuff2),
2187                               llstr(param->del_length,llbuff3));
2188       _ma_check_print_warning(param,
2189                               "Total %s   Should be: %s",
2190                               llstr((param->used+param->empty +
2191                                      param->del_length), llbuff),
2192                               llstr(share->state.state.data_file_length,
2193                                     llbuff2));
2194     }
2195     if (param->del_blocks != share->state.state.del)
2196     {
2197       _ma_check_print_warning(param,
2198                               "Found %10s deleted blocks.  Should be: %s",
2199                               llstr(param->del_blocks,llbuff),
2200                               llstr(share->state.state.del,llbuff2));
2201     }
2202     if (param->splits != share->state.split)
2203     {
2204       _ma_check_print_warning(param,
2205                               "Found %10s parts.  Should be: %s",
2206                               llstr(param->splits, llbuff),
2207                               llstr(share->state.split,llbuff2));
2208     }
2209   }
2210   if (param->testflag & T_INFO)
2211   {
2212     if (param->warning_printed || param->error_printed)
2213       puts("");
2214     if (param->used != 0 && ! param->error_printed)
2215     {
2216       if (param->records)
2217       {
2218         printf("Records:%18s    M.recordlength:%9lu   Packed:%14.0f%%\n",
2219                llstr(param->records,llbuff),
2220                (long)((param->used - param->link_used)/param->records),
2221                (share->base.blobs ? 0.0 :
2222                 (ulonglong2double((ulonglong) share->base.reclength *
2223                                   param->records)-
2224                  my_off_t2double(param->used))/
2225                 ulonglong2double((ulonglong) share->base.reclength *
2226                                  param->records)*100.0));
2227         printf("Recordspace used:%9.0f%%   Empty space:%12d%%  "
2228                "Blocks/Record: %6.2f\n",
2229                (ulonglong2double(param->used - param->link_used)/
2230                 ulonglong2double(param->used-param->link_used+param->empty) *
2231                 100.0),
2232                (!param->records ? 100 :
2233                 (int) (ulonglong2double(param->del_length+param->empty)/
2234                        my_off_t2double(param->used)*100.0)),
2235                ulonglong2double(param->splits - param->del_blocks) /
2236                param->records);
2237       }
2238       else
2239         printf("Records:%18s\n", "0");
2240     }
2241     printf("Record blocks:%12s    Delete blocks:%10s\n",
2242            llstr(param->splits - param->del_blocks, llbuff),
2243            llstr(param->del_blocks, llbuff2));
2244     printf("Record data:  %12s    Deleted data: %10s\n",
2245            llstr(param->used - param->link_used,llbuff),
2246            llstr(param->del_length, llbuff2));
2247     printf("Empty space:  %12s    Linkdata:     %10s\n",
2248            llstr(param->empty, llbuff),llstr(param->link_used, llbuff2));
2249     if (share->data_file_type == BLOCK_RECORD)
2250     {
2251       printf("Full pages:   %12s    Tail count: %12s\n",
2252              llstr(param->full_page_count, llbuff),
2253              llstr(param->tail_count, llbuff2));
2254       printf("Lost space:   %12s\n", llstr(param->lost, llbuff));
2255       if (param->max_found_trid)
2256       {
2257         printf("Max trans. id: %11s\n",
2258                llstr(param->max_found_trid, llbuff));
2259       }
2260     }
2261   }
2262   my_free(record);
2263   DBUG_RETURN (error);
2264 
2265 err:
2266   my_free(record);
2267   param->testflag|=T_RETRY_WITHOUT_QUICK;
2268   DBUG_RETURN(1);
2269 } /* maria_chk_data_link */
2270 
2271 
2272 /**
2273   Prepares a table for a repair or index sort: flushes pages, records durably
2274   in the table that it is undergoing the operation (if that op crashes, that
2275   info will serve for Recovery and the user).
2276 
2277   If we start overwriting the index file, and crash then, old REDOs will
2278   be tried and fail. To prevent that, we bump skip_redo_lsn, and thus we have
2279   to flush and sync pages so that old REDOs can be skipped.
2280   If this is not a bulk insert, which Recovery can handle gracefully (by
2281   truncating files, see UNDO_BULK_INSERT) we also mark the table
2282   crashed-on-repair, so that user knows it has to re-repair. If bulk insert we
2283   shouldn't mark it crashed-on-repair, because if we did this, the UNDO phase
2284   would skip the table (UNDO_BULK_INSERT would not be applied),
2285   and maria_chk would not improve that.
2286   If this is an OPTIMIZE which merely sorts index, we need to do the same
2287   too: old REDOs should not apply to the new index file.
2288   Only the flush is needed when in maria_chk which is not crash-safe.
2289 
2290   @param  info             table
2291   @param  param            repair parameters
2292   @param  discard_index    if index pages can be thrown away
2293 */
2294 
protect_against_repair_crash(MARIA_HA * info,const HA_CHECK * param,my_bool discard_index)2295 static my_bool protect_against_repair_crash(MARIA_HA *info,
2296                                             const HA_CHECK *param,
2297                                             my_bool discard_index)
2298 {
2299   MARIA_SHARE *share= info->s;
2300 
2301   /*
2302     There are other than recovery-related reasons to do the writes below:
2303     - the physical size of the data file is sometimes used during repair: we
2304     need to flush to have it exact
2305     - we flush the state because maria_open(HA_OPEN_COPY) will want to read
2306     it from disk.
2307   */
2308   if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
2309                             FLUSH_FORCE_WRITE,
2310                             discard_index ? FLUSH_IGNORE_CHANGED :
2311                             FLUSH_FORCE_WRITE) ||
2312       (share->changed &&
2313        _ma_state_info_write(share,
2314                             MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
2315                             MA_STATE_INFO_WRITE_FULL_INFO |
2316                             MA_STATE_INFO_WRITE_LOCK)))
2317     return TRUE;
2318   /* In maria_chk this is not needed: */
2319   if (maria_multi_threaded && share->base.born_transactional)
2320   {
2321     if ((param->testflag & T_NO_CREATE_RENAME_LSN) == 0)
2322     {
2323       /* this can be true only for a transactional table */
2324       maria_mark_in_repair(info);
2325       if (_ma_state_info_write(share,
2326                                MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
2327                                MA_STATE_INFO_WRITE_LOCK))
2328         return TRUE;
2329     }
2330     if (translog_status == TRANSLOG_OK &&
2331         _ma_update_state_lsns(share, translog_get_horizon(),
2332                               share->state.create_trid, FALSE, FALSE))
2333       return TRUE;
2334     if (_ma_sync_table_files(info))
2335       return TRUE;
2336   }
2337   return FALSE;
2338 }
2339 
2340 
2341 /**
2342    @brief Initialize variables for repair
2343 */
2344 
initialize_variables_for_repair(HA_CHECK * param,MARIA_SORT_INFO * sort_info,MARIA_SORT_PARAM * sort_param,MARIA_HA * info,my_bool rep_quick,MARIA_SHARE * org_share)2345 static int initialize_variables_for_repair(HA_CHECK *param,
2346                                            MARIA_SORT_INFO *sort_info,
2347                                            MARIA_SORT_PARAM *sort_param,
2348                                            MARIA_HA *info,
2349                                            my_bool rep_quick,
2350                                            MARIA_SHARE *org_share)
2351 {
2352   MARIA_SHARE *share= info->s;
2353 
2354   /*
2355     We have to clear these variables first, as the cleanup-in-case-of-error
2356     handling may touch these.
2357   */
2358   bzero((char*) sort_info,  sizeof(*sort_info));
2359   bzero((char*) sort_param, sizeof(*sort_param));
2360   bzero(&info->rec_cache, sizeof(info->rec_cache));
2361 
2362   if (share->data_file_type == NO_RECORD)
2363   {
2364     _ma_check_print_error(param,
2365                           "Can't repair tables with record type NO_DATA");
2366     return 1;
2367   }
2368 
2369   /* Make a copy to allow us to restore state and check how state changed */
2370   memcpy(org_share, share, sizeof(*share));
2371 
2372   /* Repair code relies on share->state.state so we have to update it here */
2373   if (share->lock.update_status)
2374     (*share->lock.update_status)(info);
2375 
2376   param->testflag|= T_REP;                     /* for easy checking */
2377   if (share->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD))
2378     param->testflag|= T_CALC_CHECKSUM;
2379   param->glob_crc= 0;
2380   if (rep_quick)
2381     param->testflag|= T_QUICK;
2382   else
2383     param->testflag&= ~T_QUICK;
2384   param->org_key_map= share->state.key_map;
2385 
2386   /*
2387     Clear check variables set by repair. This is needed to allow one to run
2388     several repair's in a row with same param
2389   */
2390   param->retry_repair= 0;
2391   param->warning_printed= 0;
2392   param->error_printed= 0;
2393   param->wrong_trd_printed= 0;
2394 
2395   sort_param->sort_info= sort_info;
2396   sort_param->fix_datafile= ! rep_quick;
2397   sort_param->calc_checksum= MY_TEST(param->testflag & T_CALC_CHECKSUM);
2398   sort_info->info= sort_info->new_info= info;
2399   sort_info->param= param;
2400   set_data_file_type(sort_info, info->s);
2401   sort_info->org_data_file_type= share->data_file_type;
2402 
2403   info->rec_cache.file= info->dfile.file;
2404   info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
2405 
2406   if (protect_against_repair_crash(info, param,
2407                                    !MY_TEST(param->testflag &
2408                                             T_CREATE_MISSING_KEYS)))
2409     return 1;
2410 
2411   /* calculate max_records */
2412   sort_info->filelength= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
2413   param->max_progress= sort_info->filelength;
2414   if ((param->testflag & T_CREATE_MISSING_KEYS) ||
2415       sort_info->org_data_file_type == COMPRESSED_RECORD)
2416     sort_info->max_records= share->state.state.records;
2417   else
2418   {
2419     ulong rec_length;
2420     rec_length= MY_MAX(share->base.min_pack_length,
2421                     share->base.min_block_length);
2422     sort_info->max_records= (ha_rows) (sort_info->filelength / rec_length);
2423   }
2424 
2425   /* Set up transaction handler so that we can see all rows */
2426   if (param->max_trid == 0)
2427   {
2428     if (!ma_control_file_inited())
2429       param->max_trid= 0;      /* Give warning for first trid found */
2430     else
2431       param->max_trid= max_trid_in_system();
2432   }
2433   maria_ignore_trids(info);
2434   /* Don't write transid's during repair */
2435   maria_versioning(info, 0);
2436   /* remember original number of rows */
2437   *info->state= info->s->state.state;
2438   return 0;
2439 }
2440 
2441 
2442 /*
2443   During initialize_variables_for_repair and related functions we set some
2444   variables to values that makes sence during repair.
2445   This function restores these values to their original values so that we can
2446   use the handler in MariaDB without having to close and open the table.
2447 */
2448 
restore_table_state_after_repair(MARIA_HA * info,MARIA_SHARE * org_share)2449 static void restore_table_state_after_repair(MARIA_HA *info,
2450                                              MARIA_SHARE *org_share)
2451 {
2452   maria_versioning(info, info->s->have_versioning);
2453   info->s->lock_key_trees= org_share->lock_key_trees;
2454   DBUG_ASSERT(!info->s->have_versioning || info->s->lock_key_trees);
2455 }
2456 
2457 
2458 /**
2459   @brief Drop all indexes
2460 
2461   @param[in]    param           check parameters
2462   @param[in]    info            MARIA_HA handle
2463   @param[in]    force           if to force drop all indexes
2464 
2465   @return       status
2466     @retval     0               OK
2467     @retval     != 0            Error
2468 
2469   @note
2470     Once allocated, index blocks remain part of the key file forever.
2471     When indexes are disabled, no block is freed. When enabling indexes,
2472     no block is freed either. The new indexes are create from new
2473     blocks. (Bug #4692)
2474 
2475     Before recreating formerly disabled indexes, the unused blocks
2476     must be freed. There are two options to do this:
2477     - Follow the tree of disabled indexes, add all blocks to the
2478       deleted blocks chain. Would require a lot of random I/O.
2479     - Drop all blocks by clearing all index root pointers and all
2480       delete chain pointers and resetting key_file_length to the end
2481       of the index file header. This requires to recreate all indexes,
2482       even those that may still be intact.
2483     The second method is probably faster in most cases.
2484 
2485     When disabling indexes, MySQL disables either all indexes or all
2486     non-unique indexes. When MySQL [re-]enables disabled indexes
2487     (T_CREATE_MISSING_KEYS), then we either have "lost" blocks in the
2488     index file, or there are no non-unique indexes. In the latter case,
2489     maria_repair*() would not be called as there would be no disabled
2490     indexes.
2491 
2492     If there would be more unique indexes than disabled (non-unique)
2493     indexes, we could do the first method. But this is not implemented
2494     yet. By now we drop and recreate all indexes when repair is called.
2495 
2496     However, there is an exception. Sometimes MySQL disables non-unique
2497     indexes when the table is empty (e.g. when copying a table in
2498     mysql_alter_table()). When enabling the non-unique indexes, they
2499     are still empty. So there is no index block that can be lost. This
2500     optimization is implemented in this function.
2501 
2502     Note that in normal repair (T_CREATE_MISSING_KEYS not set) we
2503     recreate all enabled indexes unconditonally. We do not change the
2504     key_map. Otherwise we invert the key map temporarily (outside of
2505     this function) and recreate the then "seemingly" enabled indexes.
2506     When we cannot use the optimization, and drop all indexes, we
2507     pretend that all indexes were disabled. By the inversion, we will
2508     then recrate all indexes.
2509 */
2510 
maria_drop_all_indexes(HA_CHECK * param,MARIA_HA * info,my_bool force)2511 static int maria_drop_all_indexes(HA_CHECK *param, MARIA_HA *info,
2512                                   my_bool force)
2513 {
2514   MARIA_SHARE *share= info->s;
2515   MARIA_STATE_INFO *state= &share->state;
2516   uint i;
2517   DBUG_ENTER("maria_drop_all_indexes");
2518 
2519   /*
2520     If any of the disabled indexes has a key block assigned, we must
2521     drop and recreate all indexes to avoid losing index blocks.
2522 
2523     If we want to recreate disabled indexes only _and_ all of these
2524     indexes are empty, we don't need to recreate the existing indexes.
2525   */
2526   if (!force && (param->testflag & T_CREATE_MISSING_KEYS))
2527   {
2528     DBUG_PRINT("repair", ("creating missing indexes"));
2529     for (i= 0; i < share->base.keys; i++)
2530     {
2531       DBUG_PRINT("repair", ("index #: %u  key_root:%lld  active: %d",
2532                             i, state->key_root[i],
2533                             maria_is_key_active(state->key_map, i)));
2534       if ((state->key_root[i] != HA_OFFSET_ERROR) &&
2535           !maria_is_key_active(state->key_map, i))
2536       {
2537         /*
2538           This index has at least one key block and it is disabled.
2539           We would lose its block(s) if would just recreate it.
2540           So we need to drop and recreate all indexes.
2541         */
2542         DBUG_PRINT("repair", ("nonempty and disabled: recreate all"));
2543         break;
2544       }
2545     }
2546     if (i >= share->base.keys)
2547       goto end;
2548 
2549     /*
2550       We do now drop all indexes and declare them disabled. With the
2551       T_CREATE_MISSING_KEYS flag, maria_repair*() will recreate all
2552       disabled indexes and enable them.
2553     */
2554     maria_clear_all_keys_active(state->key_map);
2555     DBUG_PRINT("repair", ("declared all indexes disabled"));
2556   }
2557 
2558   /* Flush obsolete index data from key cache */
2559   _ma_flush_table_files(info, MARIA_FLUSH_INDEX,
2560                         FLUSH_IGNORE_CHANGED, FLUSH_IGNORE_CHANGED);
2561   /* Clear index root block pointers. */
2562   for (i= 0; i < share->base.keys; i++)
2563     state->key_root[i]= HA_OFFSET_ERROR;
2564 
2565   /* Drop the delete chain. */
2566   share->state.key_del=  HA_OFFSET_ERROR;
2567 
2568   /* Reset index file length to end of index file header. */
2569   share->state.state.key_file_length= share->base.keystart;
2570 
2571 end:
2572   DBUG_RETURN(0);
2573 }
2574 
2575 
2576 /*
2577   Recover old table by reading each record and writing all keys
2578 
2579   NOTES
2580     Save new datafile-name in temp_filename.
2581     We overwrite the index file as we go (writekeys() for example), so if we
2582     crash during this the table is unusable and user (or Recovery in the
2583     future) must repeat the REPAIR/OPTIMIZE operation. We could use a
2584     temporary index file in the future (drawback: more disk space).
2585 
2586   IMPLEMENTATION (for hard repair with block format)
2587    - Create new, unrelated MARIA_HA of the table
2588    - Create new datafile and associate it with new handler
2589    - Reset all statistic information in new handler
2590    - Copy all data to new handler with normal write operations
2591    - Move state of new handler to old handler
2592    - Close new handler
2593    - Close data file in old handler
2594    - Rename old data file to new data file.
2595    - Reopen data file in old handler
2596 */
2597 
maria_repair(HA_CHECK * param,register MARIA_HA * info,char * name,my_bool rep_quick)2598 int maria_repair(HA_CHECK *param, register MARIA_HA *info,
2599                  char *name, my_bool rep_quick)
2600 {
2601   int error, got_error;
2602   ha_rows start_records,new_header_length;
2603   my_off_t del;
2604   File new_file;
2605   MARIA_SHARE *share= info->s;
2606   char llbuff[22],llbuff2[22];
2607   MARIA_SORT_INFO sort_info;
2608   MARIA_SORT_PARAM sort_param;
2609   my_bool block_record, scan_inited= 0, reenable_logging= 0;
2610   enum data_file_type org_data_file_type= share->data_file_type;
2611   myf sync_dir= ((share->now_transactional && !share->temporary) ?
2612                  MY_SYNC_DIR : 0);
2613   MARIA_SHARE backup_share;
2614   DBUG_ENTER("maria_repair");
2615 
2616   got_error= 1;
2617   new_file= -1;
2618   start_records= share->state.state.records;
2619   if (!(param->testflag & T_SILENT))
2620   {
2621     printf("- recovering (with keycache) Aria-table '%s'\n",name);
2622     printf("Data records: %s\n", llstr(start_records, llbuff));
2623   }
2624 
2625   if (initialize_variables_for_repair(param, &sort_info, &sort_param, info,
2626                                       rep_quick, &backup_share))
2627     goto err;
2628 
2629   if ((reenable_logging= share->now_transactional))
2630     _ma_tmp_disable_logging_for_table(info, 0);
2631 
2632   sort_param.current_filepos= sort_param.filepos= new_header_length=
2633     ((param->testflag & T_UNPACK) ? 0L : share->pack.header_length);
2634 
2635   if (!rep_quick)
2636   {
2637     /* Get real path for data file */
2638     if ((new_file= mysql_file_create(key_file_tmp,
2639                                      fn_format(param->temp_filename,
2640                                                share->data_file_name.str, "",
2641                                                DATA_TMP_EXT, 2+4),
2642                                      0,param->tmpfile_createflag,
2643                                      MYF(0))) < 0)
2644     {
2645       _ma_check_print_error(param,"Can't create new tempfile: '%s'",
2646 			   param->temp_filename);
2647       goto err;
2648     }
2649     if (new_header_length &&
2650         maria_filecopy(param, new_file, info->dfile.file, 0L,
2651                        new_header_length, "datafile-header"))
2652       goto err;
2653     share->state.dellink= HA_OFFSET_ERROR;
2654     info->rec_cache.file= new_file;             /* For sort_delete_record */
2655     if (share->data_file_type == BLOCK_RECORD ||
2656         (param->testflag & T_UNPACK))
2657     {
2658       if (create_new_data_handle(&sort_param, new_file))
2659         goto err;
2660       sort_info.new_info->rec_cache.file= new_file;
2661     }
2662   }
2663 
2664   block_record= sort_info.new_info->s->data_file_type == BLOCK_RECORD;
2665 
2666   if (org_data_file_type != BLOCK_RECORD)
2667   {
2668     /* We need a read buffer to read rows in big blocks */
2669     if (init_io_cache(&param->read_cache, info->dfile.file,
2670                       (uint) param->read_buffer_length,
2671                       READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)))
2672       goto err;
2673   }
2674   if (sort_info.new_info->s->data_file_type != BLOCK_RECORD)
2675   {
2676     /* When writing to not block records, we need a write buffer */
2677     if (!rep_quick)
2678     {
2679       if (init_io_cache(&sort_info.new_info->rec_cache, new_file,
2680                         (uint) param->write_buffer_length,
2681                         WRITE_CACHE, new_header_length, 1,
2682                         MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
2683         goto err;
2684       sort_info.new_info->opt_flag|=WRITE_CACHE_USED;
2685     }
2686   }
2687   else if (block_record)
2688   {
2689     scan_inited= 1;
2690     if (maria_scan_init(sort_info.info))
2691       goto err;
2692   }
2693 
2694   if (!(sort_param.record=
2695         (uchar *) my_malloc((uint)
2696                             share->base.default_rec_buff_size, MYF(0))) ||
2697       _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size,
2698                        share->base.default_rec_buff_size, MYF(0)))
2699   {
2700     _ma_check_print_error(param, "Not enough memory for extra record");
2701     goto err;
2702   }
2703 
2704   sort_param.read_cache=param->read_cache;
2705   sort_param.pos=sort_param.max_pos=share->pack.header_length;
2706   param->read_cache.end_of_file= sort_info.filelength;
2707   sort_param.master=1;
2708   sort_info.max_records= ~(ha_rows) 0;
2709 
2710   del= share->state.state.del;
2711   share->state.state.records= share->state.state.del= share->state.split= 0;
2712   share->state.state.empty= 0;
2713 
2714   if (param->testflag & T_CREATE_MISSING_KEYS)
2715     maria_set_all_keys_active(share->state.key_map, share->base.keys);
2716   maria_drop_all_indexes(param, info, TRUE);
2717 
2718   maria_lock_memory(param);			/* Everything is alloced */
2719 
2720   sort_param.sort_info->info->in_check_table= 1;
2721   /* Re-create all keys, which are set in key_map. */
2722   while (!(error=sort_get_next_record(&sort_param)))
2723   {
2724     if (block_record && _ma_sort_write_record(&sort_param))
2725       goto err;
2726 
2727     if (writekeys(&sort_param))
2728     {
2729       if (my_errno != HA_ERR_FOUND_DUPP_KEY)
2730 	goto err;
2731       DBUG_DUMP("record", sort_param.record,
2732                 share->base.default_rec_buff_size);
2733       _ma_check_print_warning(param,
2734                               "Duplicate key %2d for record at %10s against "
2735                               "new record at %10s",
2736                               info->errkey+1,
2737                               llstr(sort_param.current_filepos, llbuff),
2738                               llstr(info->dup_key_pos,llbuff2));
2739       if (param->testflag & T_VERBOSE)
2740       {
2741         MARIA_KEY tmp_key;
2742         MARIA_KEYDEF *keyinfo= share->keyinfo + info->errkey;
2743 	(*keyinfo->make_key)(info, &tmp_key, (uint) info->errkey,
2744                              info->lastkey_buff,
2745                              sort_param.record, 0L, 0);
2746         _ma_print_key(stdout, &tmp_key);
2747       }
2748       sort_info.dupp++;
2749       if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
2750       {
2751         param->testflag|=T_RETRY_WITHOUT_QUICK;
2752 	param->error_printed=1;
2753 	goto err;
2754       }
2755       /* purecov: begin tested */
2756       if (block_record)
2757       {
2758         sort_info.new_info->s->state.state.records--;
2759         if ((*sort_info.new_info->s->write_record_abort)(sort_info.new_info))
2760         {
2761           _ma_check_print_error(param,"Couldn't delete duplicate row");
2762           goto err;
2763         }
2764       }
2765       /* purecov: end */
2766       continue;
2767     }
2768     if (!block_record)
2769     {
2770       if (_ma_sort_write_record(&sort_param))
2771         goto err;
2772       /* Filepos is pointer to where next row will be stored */
2773       sort_param.current_filepos= sort_param.filepos;
2774     }
2775   }
2776   if (error > 0 || maria_write_data_suffix(&sort_info, !rep_quick) ||
2777       flush_io_cache(&sort_info.new_info->rec_cache) ||
2778       param->read_cache.error < 0)
2779     goto err;
2780 
2781   if (param->testflag & T_WRITE_LOOP)
2782   {
2783     fputs("          \r",stdout); fflush(stdout);
2784   }
2785   if (mysql_file_chsize(share->kfile.file,
2786                         share->state.state.key_file_length, 0, MYF(0)))
2787   {
2788     _ma_check_print_warning(param,
2789 			   "Can't change size of indexfile, error: %d",
2790 			   my_errno);
2791     goto err;
2792   }
2793 
2794   if (rep_quick && del+sort_info.dupp != share->state.state.del)
2795   {
2796     _ma_check_print_error(param,"Couldn't fix table with quick recovery: "
2797                           "Found wrong number of deleted records");
2798     _ma_check_print_error(param,"Run recovery again without -q");
2799     param->retry_repair=1;
2800     param->testflag|=T_RETRY_WITHOUT_QUICK;
2801     goto err;
2802   }
2803 
2804   if (param->testflag & T_SAFE_REPAIR)
2805   {
2806     /* Don't repair if we loosed more than one row */
2807     if (sort_info.new_info->s->state.state.records+1 < start_records)
2808     {
2809       share->state.state.records= start_records;
2810       goto err;
2811     }
2812   }
2813 
2814   end_io_cache(&sort_info.new_info->rec_cache);
2815   info->opt_flag&= ~WRITE_CACHE_USED;
2816 
2817   /*
2818     As we have read the data file (sort_get_next_record()) we may have
2819     cached, non-changed blocks of it in the page cache. We must throw them
2820     away as we are going to close their descriptor ('new_file'). We also want
2821     to flush any index block, so that it is ready for the upcoming sync.
2822   */
2823   if (_ma_flush_table_files_before_swap(param, info))
2824     goto err;
2825 
2826   if (!rep_quick)
2827   {
2828     sort_info.new_info->s->state.state.data_file_length= sort_param.filepos;
2829     if (sort_info.new_info != sort_info.info)
2830     {
2831       MARIA_STATE_INFO save_state= sort_info.new_info->s->state;
2832       if (maria_close(sort_info.new_info))
2833       {
2834         _ma_check_print_error(param, "Got error %d on close", my_errno);
2835         goto err;
2836       }
2837       copy_data_file_state(&share->state, &save_state);
2838       new_file= -1;
2839       sort_info.new_info= info;
2840     }
2841     share->state.version=(ulong) time((time_t*) 0);	/* Force reopen */
2842 
2843     /* Replace the actual file with the temporary file */
2844     if (new_file >= 0)
2845       mysql_file_close(new_file, MYF(MY_WME));
2846     new_file= -1;
2847     change_data_file_descriptor(info, -1);
2848     if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
2849                                 DATA_TMP_EXT, param->backup_time,
2850                                 (param->testflag & T_BACKUP_DATA ?
2851                                  MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
2852                                 sync_dir) ||
2853         _ma_open_datafile(info, share))
2854     {
2855       goto err;
2856     }
2857   }
2858   else
2859   {
2860     share->state.state.data_file_length= sort_param.max_pos;
2861   }
2862   if (param->testflag & T_CALC_CHECKSUM)
2863     share->state.state.checksum= param->glob_crc;
2864 
2865   if (!(param->testflag & T_SILENT))
2866   {
2867     if (start_records != share->state.state.records)
2868       printf("Data records: %s\n", llstr(share->state.state.records,llbuff));
2869   }
2870   if (sort_info.dupp)
2871     _ma_check_print_warning(param,
2872                             "%s records have been removed",
2873                             llstr(sort_info.dupp,llbuff));
2874 
2875   got_error= 0;
2876   /* If invoked by external program that uses thr_lock */
2877   if (&share->state.state != info->state)
2878     *info->state= *info->state_start= share->state.state;
2879 
2880 err:
2881   if (scan_inited)
2882     maria_scan_end(sort_info.info);
2883   _ma_reset_state(info);
2884 
2885   end_io_cache(&param->read_cache);
2886   if (sort_info.new_info)
2887   {
2888     end_io_cache(&sort_info.new_info->rec_cache);
2889     sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
2890   }
2891   info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
2892 
2893   sort_param.sort_info->info->in_check_table= 0;
2894   /* this below could fail, shouldn't we detect error? */
2895   if (got_error)
2896   {
2897     if (! param->error_printed)
2898       _ma_check_print_error(param,"%d for record at pos %s",my_errno,
2899 		  llstr(sort_param.start_recpos,llbuff));
2900     (void)_ma_flush_table_files_before_swap(param, info);
2901     if (sort_info.new_info && sort_info.new_info != sort_info.info)
2902     {
2903       unuse_data_file_descriptor(sort_info.new_info);
2904       maria_close(sort_info.new_info);
2905     }
2906     if (new_file >= 0)
2907     {
2908       mysql_file_close(new_file,MYF(0));
2909       mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
2910     }
2911     maria_mark_crashed_on_repair(info);
2912   }
2913   /* If caller had disabled logging it's not up to us to re-enable it */
2914   if (reenable_logging)
2915     _ma_reenable_logging_for_table(info, FALSE);
2916   restore_table_state_after_repair(info, &backup_share);
2917 
2918   my_free(sort_param.rec_buff);
2919   my_free(sort_param.record);
2920   my_free(sort_info.buff);
2921   if (!got_error && (param->testflag & T_UNPACK))
2922     restore_data_file_type(share);
2923   share->state.changed|= (STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES |
2924 			  STATE_NOT_ANALYZED | STATE_NOT_ZEROFILLED);
2925   if (!rep_quick)
2926     share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_MOVABLE);
2927   DBUG_RETURN(got_error);
2928 }
2929 
2930 
2931 /* Uppdate keyfile when doing repair */
2932 
writekeys(MARIA_SORT_PARAM * sort_param)2933 static int writekeys(MARIA_SORT_PARAM *sort_param)
2934 {
2935   uint i;
2936   MARIA_HA *info=     sort_param->sort_info->info;
2937   MARIA_SHARE *share= info->s;
2938   uchar *record=    sort_param->record;
2939   uchar *key_buff;
2940   my_off_t filepos=   sort_param->current_filepos;
2941   MARIA_KEY key;
2942   DBUG_ENTER("writekeys");
2943 
2944   key_buff= info->lastkey_buff+share->base.max_key_length;
2945 
2946   for (i=0 ; i < share->base.keys ; i++)
2947   {
2948     if (maria_is_key_active(share->state.key_map, i))
2949     {
2950       if (share->keyinfo[i].flag & HA_FULLTEXT )
2951       {
2952         if (_ma_ft_add(info, i, key_buff, record, filepos))
2953 	  goto err;
2954       }
2955       else
2956       {
2957 	if (!(*share->keyinfo[i].make_key)(info, &key, i, key_buff, record,
2958                                          filepos, 0))
2959           goto err;
2960 	if ((*share->keyinfo[i].ck_insert)(info, &key))
2961 	  goto err;
2962       }
2963     }
2964   }
2965   DBUG_RETURN(0);
2966 
2967  err:
2968   if (my_errno == HA_ERR_FOUND_DUPP_KEY)
2969   {
2970     info->errkey=(int) i;			/* This key was found */
2971     while ( i-- > 0 )
2972     {
2973       if (maria_is_key_active(share->state.key_map, i))
2974       {
2975 	if (share->keyinfo[i].flag & HA_FULLTEXT)
2976         {
2977           if (_ma_ft_del(info,i,key_buff,record,filepos))
2978 	    break;
2979         }
2980         else
2981 	{
2982 	  (*share->keyinfo[i].make_key)(info, &key, i, key_buff, record,
2983                                         filepos, 0);
2984 	  if (_ma_ck_delete(info, &key))
2985 	    break;
2986 	}
2987       }
2988     }
2989   }
2990   /* Remove checksum that was added to glob_crc in sort_get_next_record */
2991   if (sort_param->calc_checksum)
2992     sort_param->sort_info->param->glob_crc-= info->cur_row.checksum;
2993   DBUG_PRINT("error",("errno: %d",my_errno));
2994   DBUG_RETURN(-1);
2995 } /* writekeys */
2996 
2997 
2998 	/* Change all key-pointers that points to a records */
2999 
maria_movepoint(register MARIA_HA * info,uchar * record,MARIA_RECORD_POS oldpos,MARIA_RECORD_POS newpos,uint prot_key)3000 int maria_movepoint(register MARIA_HA *info, uchar *record,
3001                     MARIA_RECORD_POS oldpos, MARIA_RECORD_POS newpos,
3002                     uint prot_key)
3003 {
3004   uint i;
3005   uchar *key_buff;
3006   MARIA_SHARE *share= info->s;
3007   MARIA_PAGE page;
3008   DBUG_ENTER("maria_movepoint");
3009 
3010   key_buff= info->lastkey_buff + share->base.max_key_length;
3011   for (i=0 ; i < share->base.keys; i++)
3012   {
3013     if (i != prot_key && maria_is_key_active(share->state.key_map, i))
3014     {
3015       MARIA_KEY key;
3016       (*share->keyinfo[i].make_key)(info, &key, i, key_buff, record, oldpos,
3017                                     0);
3018       if (key.keyinfo->flag & HA_NOSAME)
3019       {					/* Change pointer direct */
3020 	MARIA_KEYDEF *keyinfo;
3021 	keyinfo=share->keyinfo+i;
3022 	if (_ma_search(info, &key, (uint32) (SEARCH_SAME | SEARCH_SAVE_BUFF),
3023 		       share->state.key_root[i]))
3024 	  DBUG_RETURN(-1);
3025         _ma_page_setup(&page, info, keyinfo, info->last_keypage,
3026                        info->keyread_buff);
3027 
3028 	_ma_dpointer(share, info->int_keypos - page.node -
3029 		     share->rec_reflength,newpos);
3030 
3031 	if (_ma_write_keypage(&page, PAGECACHE_LOCK_LEFT_UNLOCKED,
3032                               DFLT_INIT_HITS))
3033 	  DBUG_RETURN(-1);
3034       }
3035       else
3036       {					/* Change old key to new */
3037 	if (_ma_ck_delete(info, &key))
3038 	  DBUG_RETURN(-1);
3039 	(*share->keyinfo[i].make_key)(info, &key, i, key_buff, record, newpos,
3040                                       0);
3041 	if (_ma_ck_write(info, &key))
3042 	  DBUG_RETURN(-1);
3043       }
3044     }
3045   }
3046   DBUG_RETURN(0);
3047 } /* maria_movepoint */
3048 
3049 
3050 	/* Tell system that we want all memory for our cache */
3051 
maria_lock_memory(HA_CHECK * param)3052 void maria_lock_memory(HA_CHECK *param __attribute__((unused)))
3053 {
3054 #ifdef SUN_OS				/* Key-cacheing thrases on sun 4.1 */
3055   if (param->opt_maria_lock_memory)
3056   {
3057     int success = mlockall(MCL_CURRENT);	/* or plock(DATLOCK); */
3058     if (geteuid() == 0 && success != 0)
3059       _ma_check_print_warning(param,
3060 			     "Failed to lock memory. errno %d",my_errno);
3061   }
3062 #endif
3063 } /* maria_lock_memory */
3064 
3065 
3066 /**
3067    Flush all changed blocks to disk.
3068 
3069    We release blocks as it's unlikely that they would all be needed soon.
3070    This function needs to be called before swapping data or index files or
3071    syncing them.
3072 
3073    @param  param           description of the repair operation
3074    @param  info            table
3075 */
3076 
_ma_flush_table_files_before_swap(HA_CHECK * param,MARIA_HA * info)3077 static my_bool _ma_flush_table_files_before_swap(HA_CHECK *param,
3078                                                  MARIA_HA *info)
3079 {
3080   DBUG_ENTER("_ma_flush_table_files_before_swap");
3081   if (_ma_flush_table_files(info, MARIA_FLUSH_DATA | MARIA_FLUSH_INDEX,
3082                             FLUSH_RELEASE, FLUSH_RELEASE))
3083   {
3084     _ma_check_print_error(param, "%d when trying to write buffers", my_errno);
3085     DBUG_RETURN(TRUE);
3086   }
3087   DBUG_RETURN(FALSE);
3088 }
3089 
3090 
3091 	/* Sort index for more efficent reads */
3092 
maria_sort_index(HA_CHECK * param,register MARIA_HA * info,char * name)3093 int maria_sort_index(HA_CHECK *param, register MARIA_HA *info, char *name)
3094 {
3095   reg2 uint key;
3096   reg1 MARIA_KEYDEF *keyinfo;
3097   File new_file;
3098   my_off_t index_pos[HA_MAX_POSSIBLE_KEY];
3099   uint r_locks,w_locks;
3100   int old_lock;
3101   MARIA_SHARE *share= info->s;
3102   MARIA_STATE_INFO old_state;
3103   myf sync_dir= ((share->now_transactional && !share->temporary) ?
3104                  MY_SYNC_DIR : 0);
3105   DBUG_ENTER("maria_sort_index");
3106 
3107   /* cannot sort index files with R-tree indexes */
3108   for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
3109        key++,keyinfo++)
3110     if (keyinfo->key_alg == HA_KEY_ALG_RTREE)
3111       DBUG_RETURN(0);
3112 
3113   if (!(param->testflag & T_SILENT))
3114     printf("- Sorting index for Aria-table '%s'\n",name);
3115 
3116   if (protect_against_repair_crash(info, param, FALSE))
3117     DBUG_RETURN(1);
3118 
3119   /* Get real path for index file */
3120   fn_format(param->temp_filename,name,"", MARIA_NAME_IEXT,2+4+32);
3121   if ((new_file=mysql_file_create(key_file_kfile, fn_format(param->temp_filename,param->temp_filename,
3122 				    "", INDEX_TMP_EXT,2+4),
3123                                   0, param->tmpfile_createflag, MYF(0))) < 0)
3124   {
3125     _ma_check_print_error(param,"Can't create new tempfile: '%s'",
3126 			 param->temp_filename);
3127     DBUG_RETURN(-1);
3128   }
3129   if (maria_filecopy(param, new_file, share->kfile.file, 0L,
3130                      (ulong) share->base.keystart, "headerblock"))
3131     goto err;
3132 
3133   param->new_file_pos=share->base.keystart;
3134   for (key= 0,keyinfo= &share->keyinfo[0]; key < share->base.keys ;
3135        key++,keyinfo++)
3136   {
3137     if (maria_is_key_active(share->state.key_map, key) &&
3138         share->state.key_root[key] != HA_OFFSET_ERROR)
3139     {
3140       index_pos[key]=param->new_file_pos;	/* Write first block here */
3141       if (sort_one_index(param,info,keyinfo,share->state.key_root[key],
3142 			 new_file))
3143 	goto err;
3144     }
3145     else
3146       index_pos[key]= HA_OFFSET_ERROR;		/* No blocks */
3147   }
3148 
3149   /* Flush key cache for this file if we are calling this outside maria_chk */
3150   flush_pagecache_blocks(share->pagecache, &share->kfile,
3151                          FLUSH_IGNORE_CHANGED);
3152 
3153   share->state.version=(ulong) time((time_t*) 0);
3154   old_state= share->state;			/* save state if not stored */
3155   r_locks=   share->r_locks;
3156   w_locks=   share->w_locks;
3157   old_lock=  info->lock_type;
3158 
3159 	/* Put same locks as old file */
3160   share->r_locks= share->w_locks= share->tot_locks= 0;
3161   (void) _ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE);
3162   mysql_mutex_lock(&share->intern_lock);
3163   mysql_file_close(share->kfile.file, MYF(MY_WME));
3164   share->kfile.file = -1;
3165   mysql_mutex_unlock(&share->intern_lock);
3166   mysql_file_close(new_file, MYF(MY_WME));
3167   if (maria_change_to_newfile(share->index_file_name.str, MARIA_NAME_IEXT,
3168                               INDEX_TMP_EXT, 0, sync_dir) ||
3169       _ma_open_keyfile(share))
3170     goto err2;
3171   info->lock_type= F_UNLCK;			/* Force maria_readinfo to lock */
3172   _ma_readinfo(info,F_WRLCK,0);			/* Will lock the table */
3173   info->lock_type=  old_lock;
3174   share->r_locks=   r_locks;
3175   share->w_locks=   w_locks;
3176   share->tot_locks= r_locks+w_locks;
3177   share->state=     old_state;			/* Restore old state */
3178 
3179   share->state.state.key_file_length=param->new_file_pos;
3180   info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
3181   for (key=0 ; key < share->base.keys ; key++)
3182     share->state.key_root[key]=index_pos[key];
3183   share->state.key_del=  HA_OFFSET_ERROR;
3184 
3185   share->state.changed&= ~STATE_NOT_SORTED_PAGES;
3186   DBUG_EXECUTE_IF("maria_flush_whole_log",
3187                   {
3188                     DBUG_PRINT("maria_flush_whole_log", ("now"));
3189                     translog_flush(translog_get_horizon());
3190                   });
3191   DBUG_EXECUTE_IF("maria_crash_sort_index",
3192                   {
3193                     DBUG_PRINT("maria_crash_sort_index", ("now"));
3194                     DBUG_SUICIDE();
3195                   });
3196   DBUG_RETURN(0);
3197 
3198 err:
3199   mysql_file_close(new_file, MYF(MY_WME));
3200 err2:
3201   mysql_file_delete(key_file_tmp, param->temp_filename,MYF(MY_WME));
3202   DBUG_RETURN(-1);
3203 } /* maria_sort_index */
3204 
3205 
3206 /**
3207   @brief write a page directly to index file
3208 
3209 */
3210 
write_page(MARIA_SHARE * share,File file,uchar * buff,uint block_size,my_off_t pos,int myf_rw)3211 static int write_page(MARIA_SHARE *share, File file,
3212                       uchar *buff, uint block_size,
3213                       my_off_t pos, int myf_rw)
3214 {
3215   int res;
3216   PAGECACHE_IO_HOOK_ARGS args;
3217   args.page= buff;
3218   args.pageno= (pgcache_page_no_t) (pos / share->block_size);
3219   args.data= (uchar*) share;
3220   args.crypt_buf= NULL;
3221   (* share->kfile.pre_write_hook)(&args);
3222   res= (int)my_pwrite(file, args.page, block_size, pos, myf_rw);
3223   (* share->kfile.post_write_hook)(res, &args);
3224   return res;
3225 }
3226 
3227 
3228 /* Sort index blocks recursive using one index */
3229 
sort_one_index(HA_CHECK * param,MARIA_HA * info,MARIA_KEYDEF * keyinfo,my_off_t pagepos,File new_file)3230 static int sort_one_index(HA_CHECK *param, MARIA_HA *info,
3231                           MARIA_KEYDEF *keyinfo,
3232 			  my_off_t pagepos, File new_file)
3233 {
3234   uint length,nod_flag;
3235   uchar *buff,*keypos,*endpos;
3236   my_off_t new_page_pos,next_page;
3237   MARIA_SHARE *share= info->s;
3238   MARIA_KEY key;
3239   MARIA_PAGE page;
3240   DBUG_ENTER("sort_one_index");
3241 
3242   /* cannot walk over R-tree indices */
3243   DBUG_ASSERT(keyinfo->key_alg != HA_KEY_ALG_RTREE);
3244   new_page_pos=param->new_file_pos;
3245   param->new_file_pos+=keyinfo->block_length;
3246   key.keyinfo= keyinfo;
3247 
3248   if (!(buff= (uchar*) my_alloca((uint) keyinfo->block_length +
3249                                  keyinfo->maxlength +
3250                                  MARIA_INDEX_OVERHEAD_SIZE)))
3251   {
3252     _ma_check_print_error(param,"Not enough memory for key block");
3253     DBUG_RETURN(-1);
3254   }
3255   key.data= buff + keyinfo->block_length;
3256 
3257   if (_ma_fetch_keypage(&page, info, keyinfo, pagepos,
3258                         PAGECACHE_LOCK_LEFT_UNLOCKED,
3259                         DFLT_INIT_HITS, buff, 0))
3260   {
3261     report_keypage_fault(param, info, pagepos);
3262     goto err;
3263   }
3264 
3265   if ((nod_flag= page.node) || keyinfo->flag & HA_FULLTEXT)
3266   {
3267     keypos= page.buff + share->keypage_header + nod_flag;
3268     endpos= page.buff + page.size;
3269 
3270     for ( ;; )
3271     {
3272       if (nod_flag)
3273       {
3274 	next_page= _ma_kpos(nod_flag,keypos);
3275         /* Save new pos */
3276 	_ma_kpointer(info,keypos-nod_flag,param->new_file_pos);
3277 	if (sort_one_index(param,info,keyinfo,next_page,new_file))
3278 	{
3279 	  DBUG_PRINT("error",
3280 		     ("From page: %ld, keyoffset: %lu  used_length: %d",
3281 		      (ulong) pagepos, (ulong) (keypos - buff),
3282 		      (int) page.size));
3283 	  DBUG_DUMP("buff", page.buff, page.size);
3284 	  goto err;
3285 	}
3286       }
3287       if (keypos >= endpos ||
3288 	  !(*keyinfo->get_key)(&key, page.flag, nod_flag, &keypos))
3289 	break;
3290       DBUG_ASSERT(keypos <= endpos);
3291       if (keyinfo->flag & HA_FULLTEXT)
3292       {
3293         uint off;
3294         int  subkeys;
3295         get_key_full_length_rdonly(off, key.data);
3296         subkeys= ft_sintXkorr(key.data + off);
3297         if (subkeys < 0)
3298         {
3299           next_page= _ma_row_pos_from_key(&key);
3300           _ma_dpointer(share, keypos - nod_flag - share->rec_reflength,
3301                        param->new_file_pos); /* Save new pos */
3302           if (sort_one_index(param,info,&share->ft2_keyinfo,
3303                              next_page,new_file))
3304             goto err;
3305         }
3306       }
3307     }
3308   }
3309 
3310   /* Fill block with zero and write it to the new index file */
3311   length= page.size;
3312   bzero(buff+length,keyinfo->block_length-length);
3313   if (write_page(share, new_file, buff, keyinfo->block_length,
3314                  new_page_pos, MYF(MY_NABP | MY_WAIT_IF_FULL)))
3315   {
3316     _ma_check_print_error(param,"Can't write indexblock, error: %d",my_errno);
3317     goto err;
3318   }
3319   my_afree(buff);
3320   DBUG_RETURN(0);
3321 err:
3322   my_afree(buff);
3323   DBUG_RETURN(1);
3324 } /* sort_one_index */
3325 
3326 
3327 /**
3328    @brief Fill empty space in index file with zeroes
3329 
3330    @return
3331    @retval 0  Ok
3332    @retval 1  Error
3333 */
3334 
maria_zerofill_index(HA_CHECK * param,MARIA_HA * info,const char * name)3335 static my_bool maria_zerofill_index(HA_CHECK *param, MARIA_HA *info,
3336                                     const char *name)
3337 {
3338   MARIA_SHARE *share= info->s;
3339   MARIA_PINNED_PAGE page_link;
3340   char llbuff[21];
3341   uchar *buff;
3342   pgcache_page_no_t page;
3343   my_off_t pos;
3344   my_off_t key_file_length= share->state.state.key_file_length;
3345   uint block_size= share->block_size;
3346   my_bool zero_lsn= (share->base.born_transactional &&
3347                      !(param->testflag & T_ZEROFILL_KEEP_LSN));
3348   int error= 1;
3349   DBUG_ENTER("maria_zerofill_index");
3350 
3351   if (!(param->testflag & T_SILENT))
3352     printf("- Zerofilling index for Aria-table '%s'\n",name);
3353 
3354   /* Go through the index file */
3355   for (pos= share->base.keystart, page= (ulonglong) (pos / block_size);
3356        pos < key_file_length;
3357        pos+= block_size, page++)
3358   {
3359     uint length;
3360     if (!(buff= pagecache_read(share->pagecache,
3361                                &share->kfile, page,
3362                                DFLT_INIT_HITS, 0,
3363                                PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
3364                                &page_link.link)))
3365     {
3366       pagecache_unlock_by_link(share->pagecache, page_link.link,
3367                                PAGECACHE_LOCK_WRITE_UNLOCK,
3368                                PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3369                                LSN_IMPOSSIBLE, 0, FALSE);
3370       _ma_check_print_error(param,
3371                             "Page %9s: Got error %d when reading index file",
3372                             llstr(pos, llbuff), my_errno);
3373       goto end;
3374     }
3375     if (zero_lsn)
3376       bzero(buff, LSN_SIZE);
3377 
3378     if (share->base.born_transactional)
3379     {
3380       uint keynr= _ma_get_keynr(share, buff);
3381       if (keynr < share->base.keys)
3382       {
3383         MARIA_PAGE page;
3384         DBUG_ASSERT(keynr < share->base.keys);
3385 
3386         _ma_page_setup(&page, info, share->keyinfo + keynr, pos, buff);
3387         if (_ma_compact_keypage(&page, ~(TrID) 0))
3388         {
3389           _ma_check_print_error(param,
3390                                 "Page %9s: Got error %d when reading index "
3391                                 "file",
3392                                 llstr(pos, llbuff), my_errno);
3393           goto end;
3394         }
3395       }
3396     }
3397 
3398     length= _ma_get_page_used(share, buff);
3399     DBUG_ASSERT(length <= block_size);
3400     if (length < block_size)
3401       bzero(buff + length, block_size - length);
3402     pagecache_unlock_by_link(share->pagecache, page_link.link,
3403                              PAGECACHE_LOCK_WRITE_UNLOCK,
3404                              PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3405                              LSN_IMPOSSIBLE, 1, FALSE);
3406   }
3407   error= 0;                                     /* ok */
3408 
3409 end:
3410   if (flush_pagecache_blocks(share->pagecache, &share->kfile,
3411                              FLUSH_FORCE_WRITE))
3412     DBUG_RETURN(1);
3413   DBUG_RETURN(error);
3414 }
3415 
3416 
3417 /**
3418    @brief Fill empty space in data file with zeroes
3419 
3420    @todo
3421    Zerofill all pages marked in bitmap as empty and change them to
3422    be of type UNALLOCATED_PAGE
3423 
3424    @return
3425    @retval 0  Ok
3426    @retval 1  Error
3427 */
3428 
maria_zerofill_data(HA_CHECK * param,MARIA_HA * info,const char * name)3429 static my_bool maria_zerofill_data(HA_CHECK *param, MARIA_HA *info,
3430                                    const char *name)
3431 {
3432   MARIA_SHARE *share= info->s;
3433   MARIA_PINNED_PAGE page_link;
3434   char llbuff[21];
3435   my_off_t pos;
3436   pgcache_page_no_t page;
3437   uint block_size= share->block_size;
3438   MARIA_FILE_BITMAP *bitmap= &share->bitmap;
3439   my_bool zero_lsn= !(param->testflag & T_ZEROFILL_KEEP_LSN), error;
3440   DBUG_ENTER("maria_zerofill_data");
3441 
3442   /* This works only with BLOCK_RECORD files */
3443   if (share->data_file_type != BLOCK_RECORD)
3444     DBUG_RETURN(0);
3445 
3446   if (!(param->testflag & T_SILENT))
3447     printf("- Zerofilling data  for Aria-table '%s'\n",name);
3448 
3449   /* Go through the record file */
3450   for (page= 1, pos= block_size;
3451        pos < share->state.state.data_file_length;
3452        pos+= block_size, page++)
3453   {
3454     uchar *buff;
3455     enum en_page_type page_type;
3456 
3457     /* Ignore bitmap pages */
3458     if ((page % share->bitmap.pages_covered) == 0)
3459       continue;
3460     if (!(buff= pagecache_read(share->pagecache,
3461                                &info->dfile,
3462                                page, 1, 0,
3463                                PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE,
3464                                &page_link.link)))
3465     {
3466       _ma_check_print_error(param,
3467                             "Page %9s:  Got error: %d when reading datafile",
3468                             llstr(pos, llbuff), my_errno);
3469       goto err;
3470     }
3471     page_type= (enum en_page_type) (buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK);
3472     switch (page_type) {
3473     case UNALLOCATED_PAGE:
3474       if (zero_lsn)
3475         bzero(buff, block_size);
3476       else
3477         bzero(buff + LSN_SIZE, block_size - LSN_SIZE);
3478       break;
3479     case BLOB_PAGE:
3480       if (_ma_bitmap_get_page_bits(info, bitmap, page) == 0)
3481       {
3482         /* Unallocated page */
3483         if (zero_lsn)
3484           bzero(buff, block_size);
3485         else
3486           bzero(buff + LSN_SIZE, block_size - LSN_SIZE);
3487       }
3488       else
3489         if (zero_lsn)
3490           bzero(buff, LSN_SIZE);
3491       break;
3492     case HEAD_PAGE:
3493     case TAIL_PAGE:
3494     {
3495       uint max_entry= (uint) buff[DIR_COUNT_OFFSET];
3496       uint offset, dir_start, empty_space;
3497       uchar *dir;
3498 
3499       if (zero_lsn)
3500         bzero(buff, LSN_SIZE);
3501       if (max_entry != 0)
3502       {
3503         my_bool is_head_page= (page_type == HEAD_PAGE);
3504         dir= dir_entry_pos(buff, block_size, max_entry - 1);
3505         _ma_compact_block_page(share,
3506                                buff, max_entry -1, 0,
3507                                is_head_page ? ~(TrID) 0 : 0,
3508                                is_head_page ?
3509                                share->base.min_block_length : 0);
3510 
3511         /* compactation may have increased free space */
3512         empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET);
3513         if (!enough_free_entries_on_page(share, buff))
3514           empty_space= 0;                         /* Page is full */
3515         if (_ma_bitmap_set(info, page, is_head_page,
3516                            empty_space))
3517           goto err;
3518 
3519         /* Zerofill the not used part */
3520         offset= uint2korr(dir) + uint2korr(dir+2);
3521         dir_start= (uint) (dir - buff);
3522         DBUG_ASSERT(dir_start >= offset);
3523         if (dir_start > offset)
3524           bzero(buff + offset, dir_start - offset);
3525       }
3526       break;
3527     }
3528     default:
3529       _ma_check_print_error(param,
3530                             "Page %9s:  Found unrecognizable block of type %d",
3531                             llstr(pos, llbuff), page_type);
3532       goto err;
3533     }
3534     pagecache_unlock_by_link(share->pagecache, page_link.link,
3535                              PAGECACHE_LOCK_WRITE_UNLOCK,
3536                              PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3537                              LSN_IMPOSSIBLE, 1, FALSE);
3538   }
3539   error= _ma_bitmap_flush(share);
3540   if (flush_pagecache_blocks(share->pagecache, &info->dfile,
3541                              FLUSH_FORCE_WRITE))
3542     error= 1;
3543   DBUG_RETURN(error);
3544 
3545 err:
3546   pagecache_unlock_by_link(share->pagecache, page_link.link,
3547                            PAGECACHE_LOCK_WRITE_UNLOCK,
3548                            PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
3549                            LSN_IMPOSSIBLE, 0, FALSE);
3550   /* flush what was changed so far */
3551   (void) _ma_bitmap_flush(share);
3552   (void) flush_pagecache_blocks(share->pagecache, &info->dfile,
3553                                 FLUSH_FORCE_WRITE);
3554 
3555   DBUG_RETURN(1);
3556 }
3557 
3558 
3559 /**
3560    @brief Fill empty space in index and data files with zeroes
3561 
3562    @return
3563    @retval 0  Ok
3564    @retval 1  Error
3565 */
3566 
maria_zerofill(HA_CHECK * param,MARIA_HA * info,const char * name)3567 int maria_zerofill(HA_CHECK *param, MARIA_HA *info, const char *name)
3568 {
3569   my_bool error, reenable_logging,
3570     zero_lsn= !(param->testflag & T_ZEROFILL_KEEP_LSN);
3571   MARIA_SHARE *share= info->s;
3572   DBUG_ENTER("maria_zerofill");
3573   if ((reenable_logging= share->now_transactional))
3574     _ma_tmp_disable_logging_for_table(info, 0);
3575   if (!(error= (maria_zerofill_index(param, info, name) ||
3576                 maria_zerofill_data(param, info, name) ||
3577                 _ma_set_uuid(info->s, 0))))
3578   {
3579     /*
3580       Mark that we have done zerofill of data and index. If we zeroed pages'
3581       LSN, table is movable.
3582     */
3583     share->state.changed&= ~STATE_NOT_ZEROFILLED;
3584     if (zero_lsn)
3585     {
3586       share->state.changed&= ~(STATE_NOT_MOVABLE | STATE_MOVED);
3587       /* Table should get new LSNs */
3588       share->state.create_rename_lsn= share->state.is_of_horizon=
3589         share->state.skip_redo_lsn= LSN_NEEDS_NEW_STATE_LSNS;
3590     }
3591     /* Ensure state is later flushed to disk, if within maria_chk */
3592     info->update= (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED);
3593 
3594     /*
3595       Reset create_trid to make file comparable and to ensure that new
3596       trid's in the file starts from 0.
3597     */
3598     share->state.create_trid= 0;
3599   }
3600   if (reenable_logging)
3601     _ma_reenable_logging_for_table(info, FALSE);
3602   DBUG_RETURN(error);
3603 }
3604 
3605 
3606 /*
3607   Let temporary file replace old file.
3608   This assumes that the new file was created in the same
3609   directory as given by realpath(filename).
3610   This will ensure that any symlinks that are used will still work.
3611   Copy stats from old file to new file, deletes orignal and
3612   changes new file name to old file name
3613 */
3614 
maria_change_to_newfile(const char * filename,const char * old_ext,const char * new_ext,time_t backup_time,myf MyFlags)3615 int maria_change_to_newfile(const char * filename, const char * old_ext,
3616                             const char * new_ext, time_t backup_time,
3617                             myf MyFlags)
3618 {
3619   char old_filename[FN_REFLEN],new_filename[FN_REFLEN];
3620   /* Get real path to filename */
3621   (void) fn_format(old_filename,filename,"",old_ext,2+4+32);
3622   return my_redel(old_filename,
3623 		  fn_format(new_filename,old_filename,"",new_ext,2+4),
3624                   backup_time,
3625 		  MYF(MY_WME | MY_LINK_WARNING | MyFlags));
3626 } /* maria_change_to_newfile */
3627 
3628 
3629 /* Copy a block between two files */
3630 
maria_filecopy(HA_CHECK * param,File to,File from,my_off_t start,my_off_t length,const char * type)3631 int maria_filecopy(HA_CHECK *param, File to,File from,my_off_t start,
3632                    my_off_t length, const char *type)
3633 {
3634   uchar tmp_buff[IO_SIZE], *buff;
3635   ulong buff_length;
3636   DBUG_ENTER("maria_filecopy");
3637 
3638   buff_length=(ulong) MY_MIN(param->write_buffer_length,length);
3639   if (!(buff=my_malloc(buff_length,MYF(0))))
3640   {
3641     buff=tmp_buff; buff_length=IO_SIZE;
3642   }
3643 
3644   mysql_file_seek(from, start, MY_SEEK_SET,MYF(0));
3645   while (length > buff_length)
3646   {
3647     if (mysql_file_read(from, buff, buff_length, MYF(MY_NABP)) ||
3648 	mysql_file_write(to,  buff, buff_length, param->myf_rw))
3649       goto err;
3650     length-= buff_length;
3651   }
3652   if (mysql_file_read(from, buff, (size_t) length,MYF(MY_NABP)) ||
3653       mysql_file_write(to,  buff, (size_t) length,param->myf_rw))
3654     goto err;
3655   if (buff != tmp_buff)
3656     my_free(buff);
3657   DBUG_RETURN(0);
3658 err:
3659   if (buff != tmp_buff)
3660     my_free(buff);
3661   _ma_check_print_error(param,"Can't copy %s to tempfile, error %d",
3662 		       type,my_errno);
3663   DBUG_RETURN(1);
3664 }
3665 
3666 
3667 /*
3668   Repair table or given index using sorting
3669 
3670   SYNOPSIS
3671     maria_repair_by_sort()
3672     param		Repair parameters
3673     info		MARIA handler to repair
3674     name		Name of table (for warnings)
3675     rep_quick		set to <> 0 if we should not change data file
3676 
3677   RESULT
3678     0	ok
3679     <>0	Error
3680 */
3681 
maria_repair_by_sort(HA_CHECK * param,register MARIA_HA * info,const char * name,my_bool rep_quick)3682 int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
3683                          const char * name, my_bool rep_quick)
3684 {
3685   int got_error;
3686   uint i, keys_to_repair;
3687   ha_rows start_records;
3688   my_off_t new_header_length, org_header_length, del;
3689   File new_file;
3690   MARIA_SORT_PARAM sort_param;
3691   MARIA_SHARE *share= info->s;
3692   HA_KEYSEG *keyseg;
3693   double  *rec_per_key_part;
3694   char llbuff[22];
3695   MARIA_SORT_INFO sort_info;
3696   ulonglong UNINIT_VAR(key_map);
3697   myf sync_dir= ((share->now_transactional && !share->temporary) ?
3698                  MY_SYNC_DIR : 0);
3699   my_bool scan_inited= 0, reenable_logging= 0;
3700   MARIA_SHARE backup_share;
3701   DBUG_ENTER("maria_repair_by_sort");
3702 
3703   got_error= 1;
3704   new_file= -1;
3705   start_records= share->state.state.records;
3706   if (!(param->testflag & T_SILENT))
3707   {
3708     printf("- recovering (with sort) Aria-table '%s'\n",name);
3709     printf("Data records: %s\n", llstr(start_records,llbuff));
3710   }
3711 
3712   if (initialize_variables_for_repair(param, &sort_info, &sort_param, info,
3713                                       rep_quick, &backup_share))
3714     goto err;
3715 
3716   if ((reenable_logging= share->now_transactional))
3717     _ma_tmp_disable_logging_for_table(info, 0);
3718 
3719   org_header_length= share->pack.header_length;
3720   new_header_length= (param->testflag & T_UNPACK) ? 0 : org_header_length;
3721   sort_param.filepos= new_header_length;
3722 
3723   if (!rep_quick)
3724   {
3725     /* Get real path for data file */
3726     if ((new_file=mysql_file_create(key_file_tmp,
3727                                     fn_format(param->temp_filename,
3728                                               share->data_file_name.str, "",
3729                                               DATA_TMP_EXT, 2+4),
3730                                     0,param->tmpfile_createflag,
3731                                     MYF(0))) < 0)
3732     {
3733       _ma_check_print_error(param,"Can't create new tempfile: '%s'",
3734 			   param->temp_filename);
3735       goto err;
3736     }
3737     if (new_header_length &&
3738         maria_filecopy(param, new_file, info->dfile.file, 0L,
3739                        new_header_length, "datafile-header"))
3740       goto err;
3741 
3742     share->state.dellink= HA_OFFSET_ERROR;
3743     info->rec_cache.file= new_file;             /* For sort_delete_record */
3744     if (share->data_file_type == BLOCK_RECORD ||
3745         (param->testflag & T_UNPACK))
3746     {
3747       if (create_new_data_handle(&sort_param, new_file))
3748         goto err;
3749       sort_info.new_info->rec_cache.file= new_file;
3750     }
3751   }
3752 
3753   if (!(sort_info.key_block=
3754 	alloc_key_blocks(param,
3755 			 (uint) param->sort_key_blocks,
3756 			 share->base.max_key_block_length)))
3757     goto err;
3758   sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;
3759 
3760   if (share->data_file_type != BLOCK_RECORD)
3761   {
3762     /* We need a read buffer to read rows in big blocks */
3763     if (init_io_cache(&param->read_cache, info->dfile.file,
3764                       (uint) param->read_buffer_length,
3765                       READ_CACHE, org_header_length, 1, MYF(MY_WME)))
3766       goto err;
3767   }
3768   if (sort_info.new_info->s->data_file_type != BLOCK_RECORD)
3769   {
3770     /* When writing to not block records, we need a write buffer */
3771     if (!rep_quick)
3772     {
3773       if (init_io_cache(&sort_info.new_info->rec_cache, new_file,
3774                         (uint) param->write_buffer_length,
3775                         WRITE_CACHE, new_header_length, 1,
3776                         MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
3777         goto err;
3778       sort_info.new_info->opt_flag|= WRITE_CACHE_USED;
3779     }
3780   }
3781 
3782   if (!(sort_param.record=
3783         (uchar*) my_malloc((size_t) share->base.default_rec_buff_size,
3784                            MYF(0))) ||
3785       _ma_alloc_buffer(&sort_param.rec_buff, &sort_param.rec_buff_size,
3786                        share->base.default_rec_buff_size, MYF(0)))
3787   {
3788     _ma_check_print_error(param, "Not enough memory for extra record");
3789     goto err;
3790   }
3791 
3792   /* Optionally drop indexes and optionally modify the key_map */
3793   maria_drop_all_indexes(param, info, FALSE);
3794   key_map= share->state.key_map;
3795   if (param->testflag & T_CREATE_MISSING_KEYS)
3796   {
3797     /* Invert the copied key_map to recreate all disabled indexes. */
3798     key_map= ~key_map;
3799   }
3800 
3801   param->read_cache.end_of_file= sort_info.filelength;
3802   sort_param.wordlist=NULL;
3803   init_alloc_root(&sort_param.wordroot, "sort", FTPARSER_MEMROOT_ALLOC_SIZE, 0,
3804                   MYF(param->malloc_flags));
3805 
3806   sort_param.key_cmp=sort_key_cmp;
3807   sort_param.lock_in_memory=maria_lock_memory;
3808   sort_param.tmpdir=param->tmpdir;
3809   sort_param.master =1;
3810 
3811   del=share->state.state.del;
3812 
3813   /* Calculate number of keys to repair */
3814   keys_to_repair= 0;
3815   for (sort_param.key=0 ; sort_param.key < share->base.keys ;
3816        sort_param.key++)
3817   {
3818     if (maria_is_key_active(key_map, sort_param.key))
3819       keys_to_repair++;
3820   }
3821   /* For each key we scan and merge sort the keys */
3822   param->max_stage= keys_to_repair*2;
3823 
3824   rec_per_key_part= param->new_rec_per_key_part;
3825   for (sort_param.key=0 ; sort_param.key < share->base.keys ;
3826        rec_per_key_part+=sort_param.keyinfo->keysegs, sort_param.key++)
3827   {
3828     sort_param.keyinfo=share->keyinfo+sort_param.key;
3829     /*
3830       Skip this index if it is marked disabled in the copied
3831       (and possibly inverted) key_map.
3832     */
3833     if (! maria_is_key_active(key_map, sort_param.key))
3834     {
3835       /* Remember old statistics for key */
3836       memcpy((char*) rec_per_key_part,
3837 	     (char*) (share->state.rec_per_key_part +
3838 		      (uint) (rec_per_key_part - param->new_rec_per_key_part)),
3839 	     sort_param.keyinfo->keysegs*sizeof(*rec_per_key_part));
3840       DBUG_PRINT("repair", ("skipping seemingly disabled index #: %u",
3841                             sort_param.key));
3842       continue;
3843     }
3844 
3845     if ((!(param->testflag & T_SILENT)))
3846       printf ("- Fixing index %d\n",sort_param.key+1);
3847 
3848     sort_param.read_cache=param->read_cache;
3849     sort_param.seg=sort_param.keyinfo->seg;
3850     sort_param.max_pos= sort_param.pos= org_header_length;
3851     keyseg=sort_param.seg;
3852     bzero((char*) sort_param.unique,sizeof(sort_param.unique));
3853     sort_param.key_length=share->rec_reflength;
3854     for (i=0 ; keyseg[i].type != HA_KEYTYPE_END; i++)
3855     {
3856       sort_param.key_length+=keyseg[i].length;
3857       if (keyseg[i].flag & HA_SPACE_PACK)
3858 	sort_param.key_length+=get_pack_length(keyseg[i].length);
3859       if (keyseg[i].flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
3860         sort_param.key_length+= 2 + MY_TEST(keyseg[i].length >= 127);
3861       if (keyseg[i].flag & HA_NULL_PART)
3862 	sort_param.key_length++;
3863     }
3864     share->state.state.records=share->state.state.del=share->state.split=0;
3865     share->state.state.empty=0;
3866 
3867     if (sort_param.keyinfo->flag & HA_FULLTEXT)
3868     {
3869       uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
3870                                     sort_param.keyinfo->seg->charset->mbmaxlen;
3871       sort_param.key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
3872       /*
3873         fulltext indexes may have much more entries than the
3874         number of rows in the table. We estimate the number here.
3875 
3876         Note, built-in parser is always nr. 0 - see ftparser_call_initializer()
3877       */
3878       if (sort_param.keyinfo->ftkey_nr == 0)
3879       {
3880         /*
3881           for built-in parser the number of generated index entries
3882           cannot be larger than the size of the data file divided
3883           by the minimal word's length
3884         */
3885         sort_info.max_records=
3886           (ha_rows) (sort_info.filelength/ft_min_word_len+1);
3887       }
3888       else
3889       {
3890         /*
3891           for external plugin parser we cannot tell anything at all :(
3892           so, we'll use all the sort memory and start from ~10 buffpeks.
3893           (see _ma_create_index_by_sort)
3894         */
3895         sort_info.max_records=
3896           10*param->sort_buffer_length/sort_param.key_length;
3897       }
3898 
3899       sort_param.key_read=  sort_maria_ft_key_read;
3900       sort_param.key_write= sort_maria_ft_key_write;
3901     }
3902     else
3903     {
3904       sort_param.key_read=  sort_key_read;
3905       sort_param.key_write= sort_key_write;
3906     }
3907 
3908     if (sort_info.new_info->s->data_file_type == BLOCK_RECORD)
3909     {
3910       scan_inited= 1;
3911       if (maria_scan_init(sort_info.info))
3912         goto err;
3913     }
3914     if (_ma_create_index_by_sort(&sort_param,
3915                                  (my_bool) (!(param->testflag & T_VERBOSE)),
3916                                  (size_t) param->sort_buffer_length))
3917     {
3918       if ((param->testflag & T_CREATE_UNIQUE_BY_SORT) && sort_param.sort_info->dupp)
3919         share->state.dupp_key= sort_param.key;
3920       else
3921         param->retry_repair= 1;
3922       _ma_check_print_error(param, "Create index by sort failed");
3923       goto err;
3924     }
3925     DBUG_EXECUTE_IF("maria_flush_whole_log",
3926                     {
3927                       DBUG_PRINT("maria_flush_whole_log", ("now"));
3928                       translog_flush(translog_get_horizon());
3929                     });
3930     DBUG_EXECUTE_IF("maria_crash_create_index_by_sort",
3931                     {
3932                       DBUG_PRINT("maria_crash_create_index_by_sort", ("now"));
3933                       DBUG_SUICIDE();
3934                     });
3935     if (scan_inited)
3936     {
3937       scan_inited= 0;
3938       maria_scan_end(sort_info.info);
3939     }
3940 
3941     /* No need to calculate checksum again. */
3942     sort_param.calc_checksum= 0;
3943     free_root(&sort_param.wordroot, MYF(0));
3944 
3945     /* Set for next loop */
3946     sort_info.max_records= (ha_rows) sort_info.new_info->s->state.state.records;
3947     param->stage++;                             /* Next stage */
3948     param->progress= 0;
3949 
3950     if (param->testflag & T_STATISTICS)
3951       maria_update_key_parts(sort_param.keyinfo, rec_per_key_part,
3952                              sort_param.unique,
3953                              (param->stats_method ==
3954                               MI_STATS_METHOD_IGNORE_NULLS ?
3955                               sort_param.notnull : NULL),
3956                              (ulonglong) share->state.state.records);
3957     maria_set_key_active(share->state.key_map, sort_param.key);
3958     DBUG_PRINT("repair", ("set enabled index #: %u", sort_param.key));
3959 
3960     if (_ma_flush_table_files_before_swap(param, info))
3961       goto err;
3962 
3963     if (sort_param.fix_datafile)
3964     {
3965       param->read_cache.end_of_file=sort_param.filepos;
3966       if (maria_write_data_suffix(&sort_info,1) ||
3967           end_io_cache(&sort_info.new_info->rec_cache))
3968       {
3969         _ma_check_print_error(param, "Got error when flushing row cache");
3970 	goto err;
3971       }
3972       sort_info.new_info->opt_flag&= ~WRITE_CACHE_USED;
3973 
3974       if (param->testflag & T_SAFE_REPAIR)
3975       {
3976 	/* Don't repair if we loosed more than one row */
3977         if (sort_info.new_info->s->state.state.records+1 < start_records)
3978 	{
3979           _ma_check_print_error(param,
3980                                 "Rows lost (Found %lu of %lu); Aborting "
3981                                 "because safe repair was requested",
3982                                 (ulong) sort_info.new_info->s->
3983                                 state.state.records,
3984                                 (ulong) start_records);
3985           share->state.state.records=start_records;
3986 	  goto err;
3987 	}
3988       }
3989 
3990       sort_info.new_info->s->state.state.data_file_length= sort_param.filepos;
3991       if (sort_info.new_info != sort_info.info)
3992       {
3993         MARIA_STATE_INFO save_state= sort_info.new_info->s->state;
3994         if (maria_close(sort_info.new_info))
3995         {
3996           _ma_check_print_error(param, "Got error %d on close", my_errno);
3997           goto err;
3998         }
3999         copy_data_file_state(&share->state, &save_state);
4000         new_file= -1;
4001         sort_info.new_info= info;
4002         info->rec_cache.file= info->dfile.file;
4003       }
4004 
4005       share->state.version=(ulong) time((time_t*) 0);	/* Force reopen */
4006 
4007       /* Replace the actual file with the temporary file */
4008       if (new_file >= 0)
4009       {
4010         mysql_file_close(new_file, MYF(MY_WME));
4011         new_file= -1;
4012       }
4013       change_data_file_descriptor(info, -1);
4014       if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
4015                                   DATA_TMP_EXT, param->backup_time,
4016                                   (param->testflag & T_BACKUP_DATA ?
4017                                    MYF(MY_REDEL_MAKE_BACKUP): MYF(0)) |
4018                                   sync_dir) ||
4019           _ma_open_datafile(info, share))
4020       {
4021         _ma_check_print_error(param, "Couldn't change to new data file");
4022         goto err;
4023       }
4024       if (param->testflag & T_UNPACK)
4025         restore_data_file_type(share);
4026 
4027       org_header_length= share->pack.header_length;
4028       sort_info.org_data_file_type= share->data_file_type;
4029       sort_info.filelength= share->state.state.data_file_length;
4030       sort_param.fix_datafile=0;
4031 
4032       /* Offsets are now in proportion to the new file length */
4033       param->max_progress= sort_info.filelength;
4034 
4035     }
4036     else
4037       share->state.state.data_file_length=sort_param.max_pos;
4038 
4039     param->read_cache.file= info->dfile.file;	/* re-init read cache */
4040     if (share->data_file_type != BLOCK_RECORD)
4041       reinit_io_cache(&param->read_cache, READ_CACHE,
4042                       share->pack.header_length, 1, 1);
4043   }
4044 
4045   if (param->testflag & T_WRITE_LOOP)
4046   {
4047     fputs("          \r",stdout);
4048     fflush(stdout);
4049   }
4050 
4051   if (rep_quick && del+sort_info.dupp != share->state.state.del)
4052   {
4053     _ma_check_print_error(param,"Couldn't fix table with quick recovery: "
4054                           "Found wrong number of deleted records");
4055     _ma_check_print_error(param,"Run recovery again without -q");
4056     got_error=1;
4057     param->retry_repair=1;
4058     param->testflag|=T_RETRY_WITHOUT_QUICK;
4059     goto err;
4060   }
4061 
4062   if (rep_quick && (param->testflag & T_FORCE_UNIQUENESS))
4063   {
4064     my_off_t skr= share->state.state.data_file_length +
4065                    ((sort_info.org_data_file_type == COMPRESSED_RECORD) ?
4066                    MEMMAP_EXTRA_MARGIN : 0);
4067 #ifdef USE_RELOC
4068     if (sort_info.org_data_file_type == STATIC_RECORD &&
4069 	skr < share->base.reloc*share->base.min_pack_length)
4070       skr=share->base.reloc*share->base.min_pack_length;
4071 #endif
4072     if (skr != sort_info.filelength)
4073       if (mysql_file_chsize(info->dfile.file, skr, 0, MYF(0)))
4074 	_ma_check_print_warning(param,
4075 			       "Can't change size of datafile,  error: %d",
4076 			       my_errno);
4077   }
4078 
4079   if (param->testflag & T_CALC_CHECKSUM)
4080     share->state.state.checksum=param->glob_crc;
4081 
4082   if (mysql_file_chsize(share->kfile.file,
4083                         share->state.state.key_file_length, 0, MYF(0)))
4084     _ma_check_print_warning(param,
4085 			   "Can't change size of indexfile, error: %d",
4086 			   my_errno);
4087 
4088   if (!(param->testflag & T_SILENT))
4089   {
4090     if (start_records != share->state.state.records)
4091       printf("Data records: %s\n", llstr(share->state.state.records,llbuff));
4092   }
4093   if (sort_info.dupp)
4094     _ma_check_print_warning(param,
4095                             "%s records have been removed",
4096                             llstr(sort_info.dupp,llbuff));
4097   got_error=0;
4098   /* If invoked by external program that uses thr_lock */
4099   if (&share->state.state != info->state)
4100     *info->state= *info->state_start= share->state.state;
4101 
4102 err:
4103   if (scan_inited)
4104     maria_scan_end(sort_info.info);
4105   _ma_reset_state(info);
4106 
4107   if (sort_info.new_info)
4108   {
4109     end_io_cache(&sort_info.new_info->rec_cache);
4110     sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4111   }
4112   end_io_cache(&param->read_cache);
4113   info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4114   if (got_error)
4115   {
4116     if (! param->error_printed)
4117       _ma_check_print_error(param,"%d when fixing table",my_errno);
4118     (void)_ma_flush_table_files_before_swap(param, info);
4119     if (sort_info.new_info && sort_info.new_info != sort_info.info)
4120     {
4121       unuse_data_file_descriptor(sort_info.new_info);
4122       maria_close(sort_info.new_info);
4123     }
4124     if (new_file >= 0)
4125     {
4126       mysql_file_close(new_file, MYF(0));
4127       mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
4128     }
4129     maria_mark_crashed_on_repair(info);
4130   }
4131   else
4132   {
4133     if (key_map == share->state.key_map)
4134       share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS;
4135     /*
4136       Now that we have flushed and forced everything, we can bump
4137       create_rename_lsn:
4138     */
4139     DBUG_EXECUTE_IF("maria_flush_whole_log",
4140                     {
4141                       DBUG_PRINT("maria_flush_whole_log", ("now"));
4142                       translog_flush(translog_get_horizon());
4143                     });
4144     DBUG_EXECUTE_IF("maria_crash_repair",
4145                     {
4146                       DBUG_PRINT("maria_crash_repair", ("now"));
4147                       DBUG_SUICIDE();
4148                     });
4149   }
4150   share->state.changed|= STATE_NOT_SORTED_PAGES;
4151   if (!rep_quick)
4152     share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_ZEROFILLED |
4153                              STATE_NOT_MOVABLE);
4154 
4155   /* If caller had disabled logging it's not up to us to re-enable it */
4156   if (reenable_logging)
4157     _ma_reenable_logging_for_table(info, FALSE);
4158   restore_table_state_after_repair(info, &backup_share);
4159 
4160   my_free(sort_param.rec_buff);
4161   my_free(sort_param.record);
4162   my_free(sort_info.key_block);
4163   my_free(sort_info.ft_buf);
4164   my_free(sort_info.buff);
4165   DBUG_RETURN(got_error);
4166 }
4167 
4168 
4169 /*
4170   Threaded repair of table using sorting
4171 
4172   SYNOPSIS
4173     maria_repair_parallel()
4174     param		Repair parameters
4175     info		MARIA handler to repair
4176     name		Name of table (for warnings)
4177     rep_quick		set to <> 0 if we should not change data file
4178 
4179   DESCRIPTION
4180     Same as maria_repair_by_sort but do it multithreaded
4181     Each key is handled by a separate thread.
4182     TODO: make a number of threads a parameter
4183 
4184     In parallel repair we use one thread per index. There are two modes:
4185 
4186     Quick
4187 
4188       Only the indexes are rebuilt. All threads share a read buffer.
4189       Every thread that needs fresh data in the buffer enters the shared
4190       cache lock. The last thread joining the lock reads the buffer from
4191       the data file and wakes all other threads.
4192 
4193     Non-quick
4194 
4195       The data file is rebuilt and all indexes are rebuilt to point to
4196       the new record positions. One thread is the master thread. It
4197       reads from the old data file and writes to the new data file. It
4198       also creates one of the indexes. The other threads read from a
4199       buffer which is filled by the master. If they need fresh data,
4200       they enter the shared cache lock. If the masters write buffer is
4201       full, it flushes it to the new data file and enters the shared
4202       cache lock too. When all threads joined in the lock, the master
4203       copies its write buffer to the read buffer for the other threads
4204       and wakes them.
4205 
4206   RESULT
4207     0	ok
4208     <>0	Error
4209 */
4210 
maria_repair_parallel(HA_CHECK * param,register MARIA_HA * info,const char * name,my_bool rep_quick)4211 int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info,
4212 			const char * name, my_bool rep_quick)
4213 {
4214   int got_error;
4215   uint i,key, istep;
4216   ha_rows start_records;
4217   my_off_t new_header_length,del;
4218   File new_file;
4219   MARIA_SORT_PARAM *sort_param=0, tmp_sort_param;
4220   MARIA_SHARE *share= info->s;
4221   double  *rec_per_key_part;
4222   HA_KEYSEG *keyseg;
4223   char llbuff[22];
4224   IO_CACHE new_data_cache; /* For non-quick repair. */
4225   IO_CACHE_SHARE io_share;
4226   MARIA_SORT_INFO sort_info;
4227   MARIA_SHARE backup_share;
4228   ulonglong UNINIT_VAR(key_map);
4229   pthread_attr_t thr_attr;
4230   myf sync_dir= ((share->now_transactional && !share->temporary) ?
4231                  MY_SYNC_DIR : 0);
4232   my_bool reenable_logging= 0;
4233   DBUG_ENTER("maria_repair_parallel");
4234 
4235   got_error= 1;
4236   new_file= -1;
4237   start_records= share->state.state.records;
4238   if (!(param->testflag & T_SILENT))
4239   {
4240     printf("- parallel recovering (with sort) Aria-table '%s'\n",name);
4241     printf("Data records: %s\n", llstr(start_records, llbuff));
4242   }
4243 
4244   bzero(&new_data_cache, sizeof(new_data_cache));
4245   if (initialize_variables_for_repair(param, &sort_info, &tmp_sort_param, info,
4246                                       rep_quick, &backup_share))
4247     goto err;
4248 
4249   if ((reenable_logging= share->now_transactional))
4250     _ma_tmp_disable_logging_for_table(info, 0);
4251 
4252   new_header_length= ((param->testflag & T_UNPACK) ? 0 :
4253                       share->pack.header_length);
4254 
4255   /*
4256     Quick repair (not touching data file, rebuilding indexes):
4257     {
4258       Read cache is (HA_CHECK *param)->read_cache using info->dfile.file.
4259     }
4260 
4261     Non-quick repair (rebuilding data file and indexes):
4262     {
4263       Master thread:
4264 
4265         Read  cache is (HA_CHECK *param)->read_cache using info->dfile.file.
4266         Write cache is (MARIA_INFO *info)->rec_cache using new_file.
4267 
4268       Slave threads:
4269 
4270         Read cache is new_data_cache synced to master rec_cache.
4271 
4272       The final assignment of the filedescriptor for rec_cache is done
4273       after the cache creation.
4274 
4275       Don't check file size on new_data_cache, as the resulting file size
4276       is not known yet.
4277 
4278       As rec_cache and new_data_cache are synced, write_buffer_length is
4279       used for the read cache 'new_data_cache'. Both start at the same
4280       position 'new_header_length'.
4281     }
4282   */
4283   DBUG_PRINT("info", ("is quick repair: %d", (int) rep_quick));
4284   if (!rep_quick)
4285     my_b_clear(&new_data_cache);
4286 
4287   /* Initialize pthread structures before goto err. */
4288   mysql_mutex_init(key_SORT_INFO_mutex, &sort_info.mutex, MY_MUTEX_INIT_FAST);
4289   mysql_cond_init(key_SORT_INFO_cond, &sort_info.cond, 0);
4290 
4291   if (!(sort_info.key_block=
4292 	alloc_key_blocks(param, (uint) param->sort_key_blocks,
4293 			 share->base.max_key_block_length)))
4294     goto err;
4295 
4296   if (init_io_cache(&param->read_cache, info->dfile.file,
4297                     (uint) param->read_buffer_length,
4298                     READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)))
4299     goto err;
4300 
4301   sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks;
4302   info->opt_flag|=WRITE_CACHE_USED;
4303   info->rec_cache.file= info->dfile.file;         /* for sort_delete_record */
4304 
4305   if (!rep_quick)
4306   {
4307     /* Get real path for data file */
4308     if ((new_file= mysql_file_create(key_file_tmp,
4309                                      fn_format(param->temp_filename,
4310                                                share->data_file_name.str, "",
4311                                                DATA_TMP_EXT,
4312                                                2+4),
4313                                      0,param->tmpfile_createflag,
4314                                      MYF(0))) < 0)
4315     {
4316       _ma_check_print_error(param,"Can't create new tempfile: '%s'",
4317 			   param->temp_filename);
4318       goto err;
4319     }
4320     if (new_header_length &&
4321         maria_filecopy(param, new_file, info->dfile.file,0L,new_header_length,
4322                        "datafile-header"))
4323       goto err;
4324     if (param->testflag & T_UNPACK)
4325       restore_data_file_type(share);
4326     share->state.dellink= HA_OFFSET_ERROR;
4327 
4328     if (init_io_cache(&new_data_cache, -1,
4329                         (uint) param->write_buffer_length,
4330                         READ_CACHE, new_header_length, 1,
4331                         MYF(MY_WME | MY_DONT_CHECK_FILESIZE)))
4332       goto err;
4333 
4334     if (init_io_cache(&info->rec_cache, new_file,
4335                         (uint) param->write_buffer_length,
4336                         WRITE_CACHE, new_header_length, 1,
4337                         MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))
4338       goto err;
4339 
4340   }
4341 
4342   /* Optionally drop indexes and optionally modify the key_map. */
4343   maria_drop_all_indexes(param, info, FALSE);
4344   key_map= share->state.key_map;
4345   if (param->testflag & T_CREATE_MISSING_KEYS)
4346   {
4347     /* Invert the copied key_map to recreate all disabled indexes. */
4348     key_map= ~key_map;
4349   }
4350 
4351   param->read_cache.end_of_file= sort_info.filelength;
4352 
4353   /*
4354     +1 below is required hack for parallel repair mode.
4355     The share->state.state.records value, that is compared later
4356     to sort_info.max_records and cannot exceed it, is
4357     increased in sort_key_write. In maria_repair_by_sort, sort_key_write
4358     is called after sort_key_read, where the comparison is performed,
4359     but in parallel mode master thread can call sort_key_write
4360     before some other repair thread calls sort_key_read.
4361     Furthermore I'm not even sure +1 would be enough.
4362     May be sort_info.max_records shold be always set to max value in
4363     parallel mode.
4364   */
4365   sort_info.max_records++;
4366 
4367   del=share->state.state.del;
4368 
4369   if (!(sort_param=(MARIA_SORT_PARAM *)
4370         my_malloc((uint) share->base.keys *
4371 		  (sizeof(MARIA_SORT_PARAM) + share->base.pack_reclength),
4372 		  MYF(MY_ZEROFILL))))
4373   {
4374     _ma_check_print_error(param,"Not enough memory for key!");
4375     goto err;
4376   }
4377 #ifdef USING_SECOND_APPROACH
4378   uint total_key_length=0;
4379 #endif
4380   rec_per_key_part= param->new_rec_per_key_part;
4381   share->state.state.records=share->state.state.del=share->state.split=0;
4382   share->state.state.empty=0;
4383 
4384   for (i=key=0, istep=1 ; key < share->base.keys ;
4385        rec_per_key_part+=sort_param[i].keyinfo->keysegs, i+=istep, key++)
4386   {
4387     sort_param[i].key=key;
4388     sort_param[i].keyinfo=share->keyinfo+key;
4389     sort_param[i].seg=sort_param[i].keyinfo->seg;
4390     /*
4391       Skip this index if it is marked disabled in the copied
4392       (and possibly inverted) key_map.
4393     */
4394     if (! maria_is_key_active(key_map, key))
4395     {
4396       /* Remember old statistics for key */
4397       memcpy((char*) rec_per_key_part,
4398 	     (char*) (share->state.rec_per_key_part+
4399 		      (uint) (rec_per_key_part - param->new_rec_per_key_part)),
4400 	     sort_param[i].keyinfo->keysegs*sizeof(*rec_per_key_part));
4401       istep=0;
4402       continue;
4403     }
4404     istep=1;
4405     if ((!(param->testflag & T_SILENT)))
4406       printf ("- Fixing index %d\n",key+1);
4407     if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
4408     {
4409       sort_param[i].key_read=sort_maria_ft_key_read;
4410       sort_param[i].key_write=sort_maria_ft_key_write;
4411     }
4412     else
4413     {
4414       sort_param[i].key_read=sort_key_read;
4415       sort_param[i].key_write=sort_key_write;
4416     }
4417     sort_param[i].key_cmp=sort_key_cmp;
4418     sort_param[i].lock_in_memory=maria_lock_memory;
4419     sort_param[i].tmpdir=param->tmpdir;
4420     sort_param[i].sort_info=&sort_info;
4421     sort_param[i].master=0;
4422     sort_param[i].fix_datafile=0;
4423     sort_param[i].calc_checksum= 0;
4424 
4425     sort_param[i].filepos=new_header_length;
4426     sort_param[i].max_pos=sort_param[i].pos=share->pack.header_length;
4427 
4428     sort_param[i].record= (((uchar *)(sort_param+share->base.keys))+
4429                           (share->base.pack_reclength * i));
4430     if (_ma_alloc_buffer(&sort_param[i].rec_buff, &sort_param[i].rec_buff_size,
4431                          share->base.default_rec_buff_size, MYF(0)))
4432     {
4433       _ma_check_print_error(param,"Not enough memory!");
4434       goto err;
4435     }
4436     sort_param[i].key_length=share->rec_reflength;
4437     for (keyseg=sort_param[i].seg; keyseg->type != HA_KEYTYPE_END;
4438 	 keyseg++)
4439     {
4440       sort_param[i].key_length+=keyseg->length;
4441       if (keyseg->flag & HA_SPACE_PACK)
4442         sort_param[i].key_length+=get_pack_length(keyseg->length);
4443       if (keyseg->flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART))
4444         sort_param[i].key_length+= 2 + MY_TEST(keyseg->length >= 127);
4445       if (keyseg->flag & HA_NULL_PART)
4446         sort_param[i].key_length++;
4447     }
4448 #ifdef USING_SECOND_APPROACH
4449     total_key_length+=sort_param[i].key_length;
4450 #endif
4451 
4452     if (sort_param[i].keyinfo->flag & HA_FULLTEXT)
4453     {
4454       uint ft_max_word_len_for_sort=
4455         (FT_MAX_WORD_LEN_FOR_SORT *
4456          sort_param[i].keyinfo->seg->charset->mbmaxlen);
4457       sort_param[i].key_length+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
4458       init_alloc_root(&sort_param[i].wordroot, "sort",
4459                       FTPARSER_MEMROOT_ALLOC_SIZE, 0,
4460                       MYF(param->malloc_flags));
4461     }
4462   }
4463   sort_info.total_keys=i;
4464   sort_param[0].master= 1;
4465   sort_param[0].fix_datafile= ! rep_quick;
4466   sort_param[0].calc_checksum= MY_TEST(param->testflag & T_CALC_CHECKSUM);
4467 
4468   if (!maria_ftparser_alloc_param(info))
4469     goto err;
4470 
4471   sort_info.got_error=0;
4472   mysql_mutex_lock(&sort_info.mutex);
4473 
4474   /*
4475     Initialize the I/O cache share for use with the read caches and, in
4476     case of non-quick repair, the write cache. When all threads join on
4477     the cache lock, the writer copies the write cache contents to the
4478     read caches.
4479   */
4480   if (i > 1)
4481   {
4482     if (rep_quick)
4483       init_io_cache_share(&param->read_cache, &io_share, NULL, i);
4484     else
4485       init_io_cache_share(&new_data_cache, &io_share, &info->rec_cache, i);
4486   }
4487   else
4488     io_share.total_threads= 0; /* share not used */
4489 
4490   (void) pthread_attr_init(&thr_attr);
4491   (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED);
4492 
4493   for (i=0 ; i < sort_info.total_keys ; i++)
4494   {
4495     /*
4496       Copy the properly initialized IO_CACHE structure so that every
4497       thread has its own copy. In quick mode param->read_cache is shared
4498       for use by all threads. In non-quick mode all threads but the
4499       first copy the shared new_data_cache, which is synchronized to the
4500       write cache of the first thread. The first thread copies
4501       param->read_cache, which is not shared.
4502     */
4503     sort_param[i].read_cache= ((rep_quick || !i) ? param->read_cache :
4504                                new_data_cache);
4505     DBUG_PRINT("io_cache_share", ("thread: %u  read_cache: %p",
4506                                   i, &sort_param[i].read_cache));
4507 
4508     /*
4509       two approaches: the same amount of memory for each thread
4510       or the memory for the same number of keys for each thread...
4511       In the second one all the threads will fill their sort_buffers
4512       (and call write_keys) at the same time, putting more stress on i/o.
4513     */
4514     sort_param[i].sortbuff_size=
4515 #ifndef USING_SECOND_APPROACH
4516       param->sort_buffer_length/sort_info.total_keys;
4517 #else
4518       param->sort_buffer_length*sort_param[i].key_length/total_key_length;
4519 #endif
4520     if (mysql_thread_create(key_thread_find_all_keys,
4521                             &sort_param[i].thr, &thr_attr,
4522 	                    _ma_thr_find_all_keys, (void *) (sort_param+i)))
4523     {
4524       _ma_check_print_error(param,"Cannot start a repair thread");
4525       /* Cleanup: Detach from the share. Avoid others to be blocked. */
4526       if (io_share.total_threads)
4527         remove_io_thread(&sort_param[i].read_cache);
4528       DBUG_PRINT("error", ("Cannot start a repair thread"));
4529       sort_info.got_error=1;
4530     }
4531     else
4532       sort_info.threads_running++;
4533   }
4534   (void) pthread_attr_destroy(&thr_attr);
4535 
4536   /* waiting for all threads to finish */
4537   while (sort_info.threads_running)
4538     mysql_cond_wait(&sort_info.cond, &sort_info.mutex);
4539   mysql_mutex_unlock(&sort_info.mutex);
4540 
4541   if ((got_error= _ma_thr_write_keys(sort_param)))
4542   {
4543     param->retry_repair=1;
4544     goto err;
4545   }
4546   got_error=1;				/* Assume the following may go wrong */
4547 
4548   if (_ma_flush_table_files_before_swap(param, info))
4549     goto err;
4550 
4551   if (sort_param[0].fix_datafile)
4552   {
4553     /*
4554       Append some nulls to the end of a memory mapped file. Destroy the
4555       write cache. The master thread did already detach from the share
4556       by remove_io_thread() in sort.c:thr_find_all_keys().
4557     */
4558     if (maria_write_data_suffix(&sort_info,1) ||
4559         end_io_cache(&info->rec_cache))
4560       goto err;
4561     if (param->testflag & T_SAFE_REPAIR)
4562     {
4563       /* Don't repair if we loosed more than one row */
4564       if (sort_info.new_info->s->state.state.records+1 < start_records)
4565       {
4566         _ma_check_print_error(param,
4567                               "Rows lost (Found %lu of %lu); Aborting "
4568                               "because safe repair was requested",
4569                               (ulong) share->state.state.records,
4570                               (ulong) start_records);
4571         share->state.state.records=start_records;
4572         goto err;
4573       }
4574     }
4575     share->state.state.data_file_length= sort_param->filepos;
4576     /* Only whole records */
4577     share->state.version= (ulong) time((time_t*) 0);
4578     /*
4579       Exchange the data file descriptor of the table, so that we use the
4580       new file from now on.
4581      */
4582     mysql_file_close(info->dfile.file, MYF(0));
4583     info->dfile.file= new_file;
4584     share->pack.header_length=(ulong) new_header_length;
4585   }
4586   else
4587     share->state.state.data_file_length=sort_param->max_pos;
4588 
4589   if (rep_quick && del+sort_info.dupp != share->state.state.del)
4590   {
4591     _ma_check_print_error(param,"Couldn't fix table with quick recovery: "
4592                           "Found wrong number of deleted records");
4593     _ma_check_print_error(param,"Run recovery again without -q");
4594     param->retry_repair=1;
4595     param->testflag|=T_RETRY_WITHOUT_QUICK;
4596     goto err;
4597   }
4598 
4599   if (rep_quick && (param->testflag & T_FORCE_UNIQUENESS))
4600   {
4601     my_off_t skr= share->state.state.data_file_length +
4602                    ((sort_info.org_data_file_type == COMPRESSED_RECORD) ?
4603                    MEMMAP_EXTRA_MARGIN : 0);
4604 #ifdef USE_RELOC
4605     if (sort_info.org_data_file_type == STATIC_RECORD &&
4606 	skr < share->base.reloc*share->base.min_pack_length)
4607       skr=share->base.reloc*share->base.min_pack_length;
4608 #endif
4609     if (skr != sort_info.filelength)
4610       if (mysql_file_chsize(info->dfile.file, skr, 0, MYF(0)))
4611 	_ma_check_print_warning(param,
4612 			       "Can't change size of datafile,  error: %d",
4613 			       my_errno);
4614   }
4615   if (param->testflag & T_CALC_CHECKSUM)
4616     share->state.state.checksum=param->glob_crc;
4617 
4618   if (mysql_file_chsize(share->kfile.file,
4619                         share->state.state.key_file_length, 0, MYF(0)))
4620     _ma_check_print_warning(param,
4621 			   "Can't change size of indexfile, error: %d",
4622                             my_errno);
4623 
4624   if (!(param->testflag & T_SILENT))
4625   {
4626     if (start_records != share->state.state.records)
4627       printf("Data records: %s\n", llstr(share->state.state.records,llbuff));
4628   }
4629   if (sort_info.dupp)
4630     _ma_check_print_warning(param,
4631                             "%s records have been removed",
4632                             llstr(sort_info.dupp,llbuff));
4633   got_error=0;
4634   /* If invoked by external program that uses thr_lock */
4635   if (&share->state.state != info->state)
4636     *info->state= *info->state_start= share->state.state;
4637 
4638 err:
4639   _ma_reset_state(info);
4640 
4641   /*
4642     Destroy the write cache. The master thread did already detach from
4643     the share by remove_io_thread() or it was not yet started (if the
4644     error happend before creating the thread).
4645   */
4646   if (sort_info.new_info)
4647   {
4648     end_io_cache(&sort_info.new_info->rec_cache);
4649     sort_info.new_info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4650   }
4651   end_io_cache(&param->read_cache);
4652   info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
4653   /*
4654     Destroy the new data cache in case of non-quick repair. All slave
4655     threads did either detach from the share by remove_io_thread()
4656     already or they were not yet started (if the error happend before
4657     creating the threads).
4658   */
4659   if (!rep_quick && my_b_inited(&new_data_cache))
4660     end_io_cache(&new_data_cache);
4661   if (!got_error)
4662   {
4663     /* Replace the actual file with the temporary file */
4664     if (new_file >= 0)
4665     {
4666       mysql_file_close(new_file,MYF(0));
4667       info->dfile.file= new_file= -1;
4668       if (maria_change_to_newfile(share->data_file_name.str, MARIA_NAME_DEXT,
4669                                   DATA_TMP_EXT, param->backup_time,
4670                                   MYF((param->testflag & T_BACKUP_DATA ?
4671                                        MY_REDEL_MAKE_BACKUP : 0) |
4672                                       sync_dir)) ||
4673 	  _ma_open_datafile(info,share))
4674 	got_error=1;
4675     }
4676   }
4677   if (got_error)
4678   {
4679     if (! param->error_printed)
4680       _ma_check_print_error(param,"%d when fixing table",my_errno);
4681     (void)_ma_flush_table_files_before_swap(param, info);
4682     if (new_file >= 0)
4683     {
4684       mysql_file_close(new_file,MYF(0));
4685       mysql_file_delete(key_file_tmp, param->temp_filename, MYF(MY_WME));
4686       if (info->dfile.file == new_file)
4687 	info->dfile.file= -1;
4688     }
4689     maria_mark_crashed_on_repair(info);
4690   }
4691   else if (key_map == share->state.key_map)
4692     share->state.changed&= ~STATE_NOT_OPTIMIZED_KEYS;
4693   share->state.changed|= STATE_NOT_SORTED_PAGES;
4694   if (!rep_quick)
4695     share->state.changed&= ~(STATE_NOT_OPTIMIZED_ROWS | STATE_NOT_ZEROFILLED |
4696                              STATE_NOT_MOVABLE);
4697 
4698   mysql_cond_destroy (&sort_info.cond);
4699   mysql_mutex_destroy(&sort_info.mutex);
4700 
4701   /* If caller had disabled logging it's not up to us to re-enable it */
4702   if (reenable_logging)
4703     _ma_reenable_logging_for_table(info, FALSE);
4704   restore_table_state_after_repair(info, &backup_share);
4705 
4706   my_free(sort_info.ft_buf);
4707   my_free(sort_info.key_block);
4708   my_free(sort_param);
4709   my_free(sort_info.buff);
4710   if (!got_error && (param->testflag & T_UNPACK))
4711     restore_data_file_type(share);
4712   DBUG_RETURN(got_error);
4713 }
4714 
4715 	/* Read next record and return next key */
4716 
sort_key_read(MARIA_SORT_PARAM * sort_param,uchar * key)4717 static int sort_key_read(MARIA_SORT_PARAM *sort_param, uchar *key)
4718 {
4719   int error;
4720   MARIA_SORT_INFO *sort_info= sort_param->sort_info;
4721   MARIA_HA *info= sort_info->info;
4722   MARIA_KEY int_key;
4723   DBUG_ENTER("sort_key_read");
4724 
4725   if ((error=sort_get_next_record(sort_param)))
4726     DBUG_RETURN(error);
4727   if (info->s->state.state.records == sort_info->max_records)
4728   {
4729     _ma_check_print_error(sort_info->param,
4730 			 "Key %d - Found too many records; Can't continue",
4731                          sort_param->key+1);
4732     DBUG_RETURN(1);
4733   }
4734   if (_ma_sort_write_record(sort_param))
4735     DBUG_RETURN(1);
4736 
4737   (*info->s->keyinfo[sort_param->key].make_key)(info, &int_key,
4738                                                 sort_param->key, key,
4739                                                 sort_param->record,
4740                                                 sort_param->current_filepos,
4741                                                 0);
4742   sort_param->real_key_length= int_key.data_length + int_key.ref_length;
4743 #ifdef HAVE_valgrind
4744   bzero(key+sort_param->real_key_length,
4745 	(sort_param->key_length-sort_param->real_key_length));
4746 #endif
4747   DBUG_RETURN(0);
4748 } /* sort_key_read */
4749 
4750 
sort_maria_ft_key_read(MARIA_SORT_PARAM * sort_param,uchar * key)4751 static int sort_maria_ft_key_read(MARIA_SORT_PARAM *sort_param, uchar *key)
4752 {
4753   int error;
4754   MARIA_SORT_INFO *sort_info=sort_param->sort_info;
4755   MARIA_HA *info=sort_info->info;
4756   FT_WORD *wptr=0;
4757   MARIA_KEY int_key;
4758   DBUG_ENTER("sort_maria_ft_key_read");
4759 
4760   if (!sort_param->wordlist)
4761   {
4762     for (;;)
4763     {
4764       free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE));
4765       if ((error=sort_get_next_record(sort_param)))
4766         DBUG_RETURN(error);
4767       if ((error= _ma_sort_write_record(sort_param)))
4768         DBUG_RETURN(error);
4769       if (!(wptr= _ma_ft_parserecord(info,sort_param->key,sort_param->record,
4770                                      &sort_param->wordroot)))
4771 
4772         DBUG_RETURN(1);
4773       if (wptr->pos)
4774         break;
4775     }
4776     sort_param->wordptr=sort_param->wordlist=wptr;
4777   }
4778   else
4779   {
4780     error=0;
4781     wptr=(FT_WORD*)(sort_param->wordptr);
4782   }
4783 
4784   _ma_ft_make_key(info, &int_key, sort_param->key, key, wptr++,
4785                   sort_param->current_filepos);
4786   sort_param->real_key_length= int_key.data_length + int_key.ref_length;
4787 
4788 #ifdef HAVE_valgrind
4789   if (sort_param->key_length > sort_param->real_key_length)
4790     bzero(key+sort_param->real_key_length,
4791 	  (sort_param->key_length-sort_param->real_key_length));
4792 #endif
4793   if (!wptr->pos)
4794   {
4795     free_root(&sort_param->wordroot, MYF(MY_MARK_BLOCKS_FREE));
4796     sort_param->wordlist=0;
4797   }
4798   else
4799     sort_param->wordptr=(void*)wptr;
4800 
4801   DBUG_RETURN(error);
4802 } /* sort_maria_ft_key_read */
4803 
4804 
4805 /*
4806   Read next record from file using parameters in sort_info.
4807 
4808   SYNOPSIS
4809     sort_get_next_record()
4810       sort_param                Information about and for the sort process
4811 
4812   NOTES
4813     Dynamic Records With Non-Quick Parallel Repair
4814 
4815     For non-quick parallel repair we use a synchronized read/write
4816     cache. This means that one thread is the master who fixes the data
4817     file by reading each record from the old data file and writing it
4818     to the new data file. By doing this the records in the new data
4819     file are written contiguously. Whenever the write buffer is full,
4820     it is copied to the read buffer. The slaves read from the read
4821     buffer, which is not associated with a file. Thus read_cache.file
4822     is -1. When using _mi_read_cache(), the slaves must always set
4823     flag to READING_NEXT so that the function never tries to read from
4824     file. This is safe because the records are contiguous. There is no
4825     need to read outside the cache. This condition is evaluated in the
4826     variable 'parallel_flag' for quick reference. read_cache.file must
4827     be >= 0 in every other case.
4828 
4829   RETURN
4830     -1          end of file
4831     0           ok
4832                 sort_param->current_filepos points to record position.
4833                 sort_param->record contains record
4834                 sort_param->max_pos contains position to last byte read
4835     > 0         error
4836 */
4837 
sort_get_next_record(MARIA_SORT_PARAM * sort_param)4838 static int sort_get_next_record(MARIA_SORT_PARAM *sort_param)
4839 {
4840   int searching;
4841   int parallel_flag;
4842   uint found_record,b_type,left_length;
4843   my_off_t pos;
4844   MARIA_BLOCK_INFO block_info;
4845   MARIA_SORT_INFO *sort_info=sort_param->sort_info;
4846   HA_CHECK *param=sort_info->param;
4847   MARIA_HA *info=sort_info->info;
4848   MARIA_SHARE *share= info->s;
4849   char llbuff[22],llbuff2[22];
4850   DBUG_ENTER("sort_get_next_record");
4851 
4852   if (_ma_killed_ptr(param))
4853     DBUG_RETURN(1);
4854   if (param->progress_counter++ >= WRITE_COUNT)
4855   {
4856     param->progress_counter= 0;
4857     _ma_report_progress(param, param->progress, param->max_progress);
4858   }
4859 
4860   switch (sort_info->org_data_file_type) {
4861   case BLOCK_RECORD:
4862   {
4863     for (;;)
4864     {
4865       int flag;
4866       /*
4867         Assume table is transactional and it had LSN pages in the
4868         cache. Repair has flushed them, left data pages stay in
4869         cache, and disabled transactionality (so share's current page
4870         type is PLAIN); page cache would assert if it finds a cached LSN page
4871         while _ma_scan_block_record() requested a PLAIN page. So we use
4872         UNKNOWN.
4873       */
4874       enum pagecache_page_type save_page_type= share->page_type;
4875       share->page_type= PAGECACHE_READ_UNKNOWN_PAGE;
4876       if (info != sort_info->new_info)
4877       {
4878         /* Safe scanning */
4879         flag= _ma_safe_scan_block_record(sort_info, info,
4880                                          sort_param->record);
4881       }
4882       else
4883       {
4884         /*
4885           Scan on clean table.
4886           It requires a reliable data_file_length so we set it.
4887         */
4888         share->state.state.data_file_length= sort_info->filelength;
4889         info->cur_row.trid= 0;
4890         flag= _ma_scan_block_record(info, sort_param->record,
4891                                     info->cur_row.nextpos, 1);
4892         set_if_bigger(param->max_found_trid, info->cur_row.trid);
4893         if (info->cur_row.trid > param->max_trid)
4894         {
4895           _ma_check_print_not_visible_error(param, info->cur_row.trid);
4896           flag= HA_ERR_ROW_NOT_VISIBLE;
4897         }
4898       }
4899       param->progress= (ma_recordpos_to_page(info->cur_row.lastpos)*
4900                         share->block_size);
4901 
4902       share->page_type= save_page_type;
4903       if (!flag)
4904       {
4905 	if (sort_param->calc_checksum)
4906         {
4907           ha_checksum checksum;
4908           checksum= (*share->calc_check_checksum)(info, sort_param->record);
4909           if (share->calc_checksum &&
4910               info->cur_row.checksum != (checksum & 255))
4911           {
4912             if (param->testflag & T_VERBOSE)
4913             {
4914               record_pos_to_txt(info, info->cur_row.lastpos, llbuff);
4915               _ma_check_print_info(param,
4916                                    "Found record with wrong checksum at %s",
4917                                    llbuff);
4918             }
4919             continue;
4920           }
4921           info->cur_row.checksum= checksum;
4922 	  param->glob_crc+= checksum;
4923         }
4924         sort_param->start_recpos= sort_param->current_filepos=
4925           info->cur_row.lastpos;
4926         DBUG_RETURN(0);
4927       }
4928       if (flag == HA_ERR_END_OF_FILE)
4929       {
4930         sort_param->max_pos= share->state.state.data_file_length;
4931         DBUG_RETURN(-1);
4932       }
4933       /* Retry only if wrong record, not if disk error */
4934       if (flag != HA_ERR_WRONG_IN_RECORD && flag != HA_ERR_WRONG_CRC &&
4935           flag != HA_ERR_DECRYPTION_FAILED)
4936       {
4937         retry_if_quick(sort_param, flag);
4938         DBUG_RETURN(flag);
4939       }
4940     }
4941     break;                                      /* Impossible */
4942   }
4943   case STATIC_RECORD:
4944     for (;;)
4945     {
4946       if (my_b_read(&sort_param->read_cache,sort_param->record,
4947 		    share->base.pack_reclength))
4948       {
4949 	if (sort_param->read_cache.error)
4950 	  param->out_flag |= O_DATA_LOST;
4951         retry_if_quick(sort_param, my_errno);
4952 	DBUG_RETURN(-1);
4953       }
4954       sort_param->start_recpos=sort_param->pos;
4955       param->progress= sort_param->pos;
4956       if (!sort_param->fix_datafile)
4957       {
4958 	sort_param->current_filepos= sort_param->pos;
4959         if (sort_param->master)
4960 	  share->state.split++;
4961       }
4962       sort_param->max_pos=(sort_param->pos+=share->base.pack_reclength);
4963       if (*sort_param->record)
4964       {
4965 	if (sort_param->calc_checksum)
4966 	  param->glob_crc+= (info->cur_row.checksum=
4967 			     _ma_static_checksum(info,sort_param->record));
4968 	DBUG_RETURN(0);
4969       }
4970       if (!sort_param->fix_datafile && sort_param->master)
4971       {
4972 	share->state.state.del++;
4973 	share->state.state.empty+=share->base.pack_reclength;
4974       }
4975     }
4976   case DYNAMIC_RECORD:
4977   {
4978     uchar *UNINIT_VAR(to);
4979     ha_checksum checksum= 0;
4980 
4981     pos=sort_param->pos;
4982     param->progress= pos;
4983     searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND));
4984     parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0;
4985     for (;;)
4986     {
4987       found_record=block_info.second_read= 0;
4988       left_length=1;
4989       if (searching)
4990       {
4991 	pos=MY_ALIGN(pos,MARIA_DYN_ALIGN_SIZE);
4992         param->testflag|=T_RETRY_WITHOUT_QUICK;
4993 	sort_param->start_recpos=pos;
4994       }
4995       do
4996       {
4997 	if (pos > sort_param->max_pos)
4998 	  sort_param->max_pos=pos;
4999 	if (pos & (MARIA_DYN_ALIGN_SIZE-1))
5000 	{
5001 	  if ((param->testflag & T_VERBOSE) || searching == 0)
5002 	    _ma_check_print_info(param,"Wrong aligned block at %s",
5003 				llstr(pos,llbuff));
5004 	  if (searching)
5005 	    goto try_next;
5006 	}
5007 	if (found_record && pos == param->search_after_block)
5008 	  _ma_check_print_info(param,"Block: %s used by record at %s",
5009 		     llstr(param->search_after_block,llbuff),
5010 		     llstr(sort_param->start_recpos,llbuff2));
5011 	if (_ma_read_cache(info, &sort_param->read_cache,
5012                            block_info.header, pos,
5013 			   MARIA_BLOCK_INFO_HEADER_LENGTH,
5014 			   (! found_record ? READING_NEXT : 0) |
5015 			   parallel_flag | READING_HEADER))
5016 	{
5017 	  if (found_record)
5018 	  {
5019 	    _ma_check_print_info(param,
5020 				"Can't read whole record at %s (errno: %d)",
5021 				llstr(sort_param->start_recpos,llbuff),errno);
5022 	    goto try_next;
5023 	  }
5024 	  DBUG_RETURN(-1);
5025 	}
5026 	if (searching && ! sort_param->fix_datafile)
5027 	{
5028 	  param->error_printed=1;
5029           param->retry_repair=1;
5030           param->testflag|=T_RETRY_WITHOUT_QUICK;
5031           my_errno= HA_ERR_WRONG_IN_RECORD;
5032 	  DBUG_RETURN(1);	/* Something wrong with data */
5033 	}
5034 	b_type= _ma_get_block_info(info, &block_info,-1,pos);
5035 	if ((b_type & (BLOCK_ERROR | BLOCK_FATAL_ERROR)) ||
5036 	   ((b_type & BLOCK_FIRST) &&
5037 	     (block_info.rec_len < (uint) share->base.min_pack_length ||
5038 	      block_info.rec_len > (uint) share->base.max_pack_length)))
5039 	{
5040 	  uint i;
5041 	  if (param->testflag & T_VERBOSE || searching == 0)
5042 	    _ma_check_print_info(param,
5043 				"Wrong bytesec: %3d-%3d-%3d at %10s; Skipped",
5044 		       block_info.header[0],block_info.header[1],
5045 		       block_info.header[2],llstr(pos,llbuff));
5046 	  if (found_record)
5047 	    goto try_next;
5048 	  block_info.second_read=0;
5049 	  searching=1;
5050 	  /* Search after block in read header string */
5051 	  for (i=MARIA_DYN_ALIGN_SIZE ;
5052 	       i < MARIA_BLOCK_INFO_HEADER_LENGTH ;
5053 	       i+= MARIA_DYN_ALIGN_SIZE)
5054 	    if (block_info.header[i] >= 1 &&
5055 		block_info.header[i] <= MARIA_MAX_DYN_HEADER_BYTE)
5056 	      break;
5057 	  pos+=(ulong) i;
5058 	  sort_param->start_recpos=pos;
5059 	  continue;
5060 	}
5061 	if (b_type & BLOCK_DELETED)
5062 	{
5063 	  my_bool error=0;
5064 	  if (block_info.block_len+ (uint) (block_info.filepos-pos) <
5065 	      share->base.min_block_length)
5066 	  {
5067 	    if (!searching)
5068 	      _ma_check_print_info(param,
5069                                    "Deleted block with impossible length %lu "
5070                                    "at %s",
5071                                    block_info.block_len,llstr(pos,llbuff));
5072 	    error=1;
5073 	  }
5074 	  else
5075 	  {
5076 	    if ((block_info.next_filepos != HA_OFFSET_ERROR &&
5077 		 block_info.next_filepos >=
5078 		 share->state.state.data_file_length) ||
5079 		(block_info.prev_filepos != HA_OFFSET_ERROR &&
5080 		 block_info.prev_filepos >=
5081                  share->state.state.data_file_length))
5082 	    {
5083 	      if (!searching)
5084 		_ma_check_print_info(param,
5085 				    "Delete link points outside datafile at "
5086                                      "%s",
5087                                      llstr(pos,llbuff));
5088 	      error=1;
5089 	    }
5090 	  }
5091 	  if (error)
5092 	  {
5093 	    if (found_record)
5094 	      goto try_next;
5095 	    searching=1;
5096 	    pos+= MARIA_DYN_ALIGN_SIZE;
5097 	    sort_param->start_recpos=pos;
5098 	    block_info.second_read=0;
5099 	    continue;
5100 	  }
5101 	}
5102 	else
5103 	{
5104 	  if (block_info.block_len+ (uint) (block_info.filepos-pos) <
5105 	      share->base.min_block_length ||
5106 	      block_info.block_len > (uint) share->base.max_pack_length+
5107 	      MARIA_SPLIT_LENGTH)
5108 	  {
5109 	    if (!searching)
5110 	      _ma_check_print_info(param,
5111                                    "Found block with impossible length %lu "
5112                                    "at %s; Skipped",
5113                                    block_info.block_len+
5114                                    (uint) (block_info.filepos-pos),
5115                                    llstr(pos,llbuff));
5116 	    if (found_record)
5117 	      goto try_next;
5118 	    searching=1;
5119 	    pos+= MARIA_DYN_ALIGN_SIZE;
5120 	    sort_param->start_recpos=pos;
5121 	    block_info.second_read=0;
5122 	    continue;
5123 	  }
5124 	}
5125 	if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
5126 	{
5127           if (!sort_param->fix_datafile && sort_param->master &&
5128               (b_type & BLOCK_DELETED))
5129 	  {
5130 	    share->state.state.empty+=block_info.block_len;
5131 	    share->state.state.del++;
5132 	    share->state.split++;
5133 	  }
5134 	  if (found_record)
5135 	    goto try_next;
5136 	  if (searching)
5137 	  {
5138 	    pos+=MARIA_DYN_ALIGN_SIZE;
5139 	    sort_param->start_recpos=pos;
5140 	  }
5141 	  else
5142 	    pos=block_info.filepos+block_info.block_len;
5143 	  block_info.second_read=0;
5144 	  continue;
5145 	}
5146 
5147 	if (!sort_param->fix_datafile && sort_param->master)
5148 	  share->state.split++;
5149 	if (! found_record++)
5150 	{
5151 	  sort_param->find_length=left_length=block_info.rec_len;
5152 	  sort_param->start_recpos=pos;
5153 	  if (!sort_param->fix_datafile)
5154 	    sort_param->current_filepos= sort_param->start_recpos;
5155 	  if (sort_param->fix_datafile && (param->testflag & T_EXTEND))
5156 	    sort_param->pos=block_info.filepos+1;
5157 	  else
5158 	    sort_param->pos=block_info.filepos+block_info.block_len;
5159 	  if (share->base.blobs)
5160 	  {
5161 	    if (_ma_alloc_buffer(&sort_param->rec_buff,
5162                                  &sort_param->rec_buff_size,
5163                                  block_info.rec_len +
5164                                  share->base.extra_rec_buff_size, MYF(0)))
5165 
5166 	    {
5167 	      if (param->max_record_length >= block_info.rec_len)
5168 	      {
5169 		_ma_check_print_error(param,"Not enough memory for blob at %s "
5170                                       "(need %lu)",
5171 				     llstr(sort_param->start_recpos,llbuff),
5172 				     (ulong) block_info.rec_len);
5173 		DBUG_RETURN(1);
5174 	      }
5175 	      else
5176 	      {
5177 		_ma_check_print_info(param,"Not enough memory for blob at %s "
5178                                      "(need %lu); Row skipped",
5179 				    llstr(sort_param->start_recpos,llbuff),
5180 				    (ulong) block_info.rec_len);
5181 		goto try_next;
5182 	      }
5183 	    }
5184 	  }
5185           to= sort_param->rec_buff;
5186 	}
5187 	if (left_length < block_info.data_len || ! block_info.data_len)
5188 	{
5189 	  _ma_check_print_info(param,
5190 			      "Found block with too small length at %s; "
5191                                "Skipped",
5192                                llstr(sort_param->start_recpos,llbuff));
5193 	  goto try_next;
5194 	}
5195 	if (block_info.filepos + block_info.data_len >
5196 	    sort_param->read_cache.end_of_file)
5197 	{
5198 	  _ma_check_print_info(param,
5199 			      "Found block that points outside data file "
5200                                "at %s",
5201                                llstr(sort_param->start_recpos,llbuff));
5202 	  goto try_next;
5203 	}
5204         /*
5205           Copy information that is already read. Avoid accessing data
5206           below the cache start. This could happen if the header
5207           streched over the end of the previous buffer contents.
5208         */
5209         {
5210           uint header_len= (uint) (block_info.filepos - pos);
5211           uint prefetch_len= (MARIA_BLOCK_INFO_HEADER_LENGTH - header_len);
5212 
5213           if (prefetch_len > block_info.data_len)
5214             prefetch_len= block_info.data_len;
5215           if (prefetch_len)
5216           {
5217             memcpy(to, block_info.header + header_len, prefetch_len);
5218             block_info.filepos+= prefetch_len;
5219             block_info.data_len-= prefetch_len;
5220             left_length-= prefetch_len;
5221             to+= prefetch_len;
5222           }
5223         }
5224         if (block_info.data_len &&
5225             _ma_read_cache(info, &sort_param->read_cache,to,block_info.filepos,
5226                            block_info.data_len,
5227                            (found_record == 1 ? READING_NEXT : 0) |
5228                            parallel_flag))
5229 	{
5230 	  _ma_check_print_info(param,
5231 			      "Read error for block at: %s (error: %d); "
5232                                "Skipped",
5233 			      llstr(block_info.filepos,llbuff),my_errno);
5234 	  goto try_next;
5235 	}
5236 	left_length-=block_info.data_len;
5237 	to+=block_info.data_len;
5238 	pos=block_info.next_filepos;
5239 	if (pos == HA_OFFSET_ERROR && left_length)
5240 	{
5241 	  _ma_check_print_info(param,
5242                                "Wrong block with wrong total length "
5243                                "starting at %s",
5244 			      llstr(sort_param->start_recpos,llbuff));
5245 	  goto try_next;
5246 	}
5247 	if (pos + MARIA_BLOCK_INFO_HEADER_LENGTH >
5248             sort_param->read_cache.end_of_file)
5249 	{
5250 	  _ma_check_print_info(param,
5251                                "Found link that points at %s (outside data "
5252                                "file) at %s",
5253 			      llstr(pos,llbuff2),
5254 			      llstr(sort_param->start_recpos,llbuff));
5255 	  goto try_next;
5256 	}
5257       } while (left_length);
5258 
5259       if (_ma_rec_unpack(info,sort_param->record,sort_param->rec_buff,
5260 			 sort_param->find_length) != MY_FILE_ERROR)
5261       {
5262 	if (sort_param->read_cache.error < 0)
5263 	  DBUG_RETURN(1);
5264 	if (sort_param->calc_checksum)
5265 	  checksum= (share->calc_check_checksum)(info, sort_param->record);
5266 	if ((param->testflag & (T_EXTEND | T_REP)) || searching)
5267 	{
5268 	  if (_ma_rec_check(info, sort_param->record, sort_param->rec_buff,
5269                             sort_param->find_length,
5270                             (param->testflag & T_QUICK) &&
5271                             sort_param->calc_checksum &&
5272                             MY_TEST(share->calc_checksum), checksum))
5273 	  {
5274 	    _ma_check_print_info(param,"Found wrong packed record at %s",
5275 				llstr(sort_param->start_recpos,llbuff));
5276 	    goto try_next;
5277 	  }
5278 	}
5279 	if (sort_param->calc_checksum)
5280 	  param->glob_crc+= checksum;
5281 	DBUG_RETURN(0);
5282       }
5283       if (!searching)
5284         _ma_check_print_info(param,"Key %d - Found wrong stored record at %s",
5285                             sort_param->key+1,
5286                             llstr(sort_param->start_recpos,llbuff));
5287     try_next:
5288       pos=(sort_param->start_recpos+=MARIA_DYN_ALIGN_SIZE);
5289       searching=1;
5290     }
5291   }
5292   case COMPRESSED_RECORD:
5293     param->progress= sort_param->pos;
5294     for (searching=0 ;; searching=1, sort_param->pos++)
5295     {
5296       if (_ma_read_cache(info, &sort_param->read_cache, block_info.header,
5297 			 sort_param->pos,
5298 			 share->pack.ref_length,READING_NEXT))
5299 	DBUG_RETURN(-1);
5300       if (searching && ! sort_param->fix_datafile)
5301       {
5302 	param->error_printed=1;
5303         param->retry_repair=1;
5304         param->testflag|=T_RETRY_WITHOUT_QUICK;
5305         my_errno= HA_ERR_WRONG_IN_RECORD;
5306 	DBUG_RETURN(1);		/* Something wrong with data */
5307       }
5308       sort_param->start_recpos=sort_param->pos;
5309       if (_ma_pack_get_block_info(info, &sort_param->bit_buff, &block_info,
5310                                   &sort_param->rec_buff,
5311                                   &sort_param->rec_buff_size, -1,
5312                                   sort_param->pos))
5313 	DBUG_RETURN(-1);
5314       if (!block_info.rec_len &&
5315 	  sort_param->pos + MEMMAP_EXTRA_MARGIN ==
5316 	  sort_param->read_cache.end_of_file)
5317 	DBUG_RETURN(-1);
5318       if (block_info.rec_len < (uint) share->min_pack_length ||
5319 	  block_info.rec_len > (uint) share->max_pack_length)
5320       {
5321 	if (! searching)
5322 	  _ma_check_print_info(param,
5323                                "Found block with wrong recordlength: %lu "
5324                                "at %s\n",
5325                                block_info.rec_len,
5326                                llstr(sort_param->pos,llbuff));
5327 	continue;
5328       }
5329       if (_ma_read_cache(info, &sort_param->read_cache, sort_param->rec_buff,
5330 			 block_info.filepos, block_info.rec_len,
5331 			 READING_NEXT))
5332       {
5333 	if (! searching)
5334 	  _ma_check_print_info(param,"Couldn't read whole record from %s",
5335 			      llstr(sort_param->pos,llbuff));
5336 	continue;
5337       }
5338 #ifdef HAVE_valgrind
5339       bzero(sort_param->rec_buff + block_info.rec_len,
5340             share->base.extra_rec_buff_size);
5341 #endif
5342       if (_ma_pack_rec_unpack(info, &sort_param->bit_buff, sort_param->record,
5343                               sort_param->rec_buff, block_info.rec_len))
5344       {
5345 	if (! searching)
5346 	  _ma_check_print_info(param,"Found wrong record at %s",
5347 			      llstr(sort_param->pos,llbuff));
5348 	continue;
5349       }
5350       if (!sort_param->fix_datafile)
5351       {
5352 	sort_param->current_filepos= sort_param->pos;
5353         if (sort_param->master)
5354 	  share->state.split++;
5355       }
5356       sort_param->max_pos= (sort_param->pos=block_info.filepos+
5357                             block_info.rec_len);
5358       info->packed_length=block_info.rec_len;
5359 
5360       if (sort_param->calc_checksum)
5361       {
5362         info->cur_row.checksum= (*share->calc_check_checksum)(info,
5363                                                                 sort_param->
5364                                                                 record);
5365 	param->glob_crc+= info->cur_row.checksum;
5366       }
5367       DBUG_RETURN(0);
5368     }
5369   case NO_RECORD:
5370     DBUG_RETURN(1);                             /* Impossible */
5371   }
5372   DBUG_RETURN(1);                               /* Impossible */
5373 }
5374 
5375 
5376 /**
5377    @brief Write record to new file.
5378 
5379    @fn    _ma_sort_write_record()
5380    @param sort_param                Sort parameters.
5381 
5382    @note
5383    This is only called by a master thread if parallel repair is used.
5384 
5385    @return
5386    @retval  0   OK
5387                 sort_param->current_filepos points to inserted record for
5388                 block_records and to the place for the next record for
5389                 other row types.
5390                 sort_param->filepos points to end of file
5391   @retval   1   Error
5392 */
5393 
_ma_sort_write_record(MARIA_SORT_PARAM * sort_param)5394 int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param)
5395 {
5396   int flag;
5397   uint length;
5398   ulong block_length,reclength;
5399   uchar *from;
5400   uchar block_buff[8];
5401   MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5402   HA_CHECK *param= sort_info->param;
5403   MARIA_HA *info= sort_info->new_info;
5404   MARIA_SHARE *share= info->s;
5405   DBUG_ENTER("_ma_sort_write_record");
5406 
5407   if (sort_param->fix_datafile)
5408   {
5409     sort_param->current_filepos= sort_param->filepos;
5410     switch (sort_info->new_data_file_type) {
5411     case BLOCK_RECORD:
5412       if ((sort_param->current_filepos=
5413            (*share->write_record_init)(info, sort_param->record)) ==
5414           HA_OFFSET_ERROR)
5415       {
5416         _ma_check_print_error(param, "%d when writing to datafile", my_errno);
5417         DBUG_RETURN(1);
5418       }
5419       /* Pointer to end of file */
5420       sort_param->filepos= share->state.state.data_file_length;
5421       break;
5422     case STATIC_RECORD:
5423       if (my_b_write(&info->rec_cache,sort_param->record,
5424 		     share->base.pack_reclength))
5425       {
5426 	_ma_check_print_error(param,"%d when writing to datafile",my_errno);
5427 	DBUG_RETURN(1);
5428       }
5429       sort_param->filepos+=share->base.pack_reclength;
5430       share->state.split++;
5431       break;
5432     case DYNAMIC_RECORD:
5433       if (! info->blobs)
5434 	from=sort_param->rec_buff;
5435       else
5436       {
5437 	/* must be sure that local buffer is big enough */
5438 	reclength=share->base.pack_reclength+
5439 	  _ma_calc_total_blob_length(info,sort_param->record)+
5440 	  ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER)+MARIA_SPLIT_LENGTH+
5441 	  MARIA_DYN_DELETE_BLOCK_HEADER;
5442 	if (sort_info->buff_length < reclength)
5443 	{
5444 	  if (!(sort_info->buff=my_realloc(sort_info->buff, (uint) reclength,
5445 					   MYF(MY_FREE_ON_ERROR |
5446 					       MY_ALLOW_ZERO_PTR))))
5447 	    DBUG_RETURN(1);
5448 	  sort_info->buff_length=reclength;
5449 	}
5450 	from= (uchar *) sort_info->buff+ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER);
5451       }
5452       /* We can use info->checksum here as only one thread calls this */
5453       info->cur_row.checksum= (*share->calc_check_checksum)(info,
5454                                                               sort_param->
5455                                                               record);
5456       if (!(reclength= _ma_rec_pack(info,from,sort_param->record)))
5457       {
5458         _ma_check_print_error(param,"Got error %d when packing record",
5459                               my_errno);
5460         DBUG_RETURN(1);
5461       }
5462       flag=0;
5463 
5464       do
5465       {
5466         block_length= reclength + 3 + MY_TEST(reclength >= (65520 - 3));
5467 	if (block_length < share->base.min_block_length)
5468 	  block_length=share->base.min_block_length;
5469 	info->update|=HA_STATE_WRITE_AT_END;
5470 	block_length=MY_ALIGN(block_length,MARIA_DYN_ALIGN_SIZE);
5471 	if (block_length > MARIA_MAX_BLOCK_LENGTH)
5472 	  block_length=MARIA_MAX_BLOCK_LENGTH;
5473 	if (_ma_write_part_record(info,0L,block_length,
5474 				  sort_param->filepos+block_length,
5475 				  &from,&reclength,&flag))
5476 	{
5477 	  _ma_check_print_error(param,"%d when writing to datafile",my_errno);
5478 	  DBUG_RETURN(1);
5479 	}
5480 	sort_param->filepos+=block_length;
5481 	share->state.split++;
5482       } while (reclength);
5483       break;
5484     case COMPRESSED_RECORD:
5485       reclength=info->packed_length;
5486       length= _ma_save_pack_length((uint) share->pack.version, block_buff,
5487                                reclength);
5488       if (share->base.blobs)
5489 	length+= _ma_save_pack_length((uint) share->pack.version,
5490 	                          block_buff + length, info->blob_length);
5491       if (my_b_write(&info->rec_cache,block_buff,length) ||
5492 	  my_b_write(&info->rec_cache, sort_param->rec_buff, reclength))
5493       {
5494 	_ma_check_print_error(param,"%d when writing to datafile",my_errno);
5495 	DBUG_RETURN(1);
5496       }
5497       sort_param->filepos+=reclength+length;
5498       share->state.split++;
5499       break;
5500     case NO_RECORD:
5501       DBUG_RETURN(1);                           /* Impossible */
5502     }
5503   }
5504   if (sort_param->master)
5505   {
5506     share->state.state.records++;
5507     if ((param->testflag & T_WRITE_LOOP) &&
5508         (share->state.state.records % WRITE_COUNT) == 0)
5509     {
5510       char llbuff[22];
5511       printf("%s\r", llstr(share->state.state.records,llbuff));
5512       fflush(stdout);
5513     }
5514   }
5515   DBUG_RETURN(0);
5516 } /* _ma_sort_write_record */
5517 
5518 
5519 /* Compare two keys from _ma_create_index_by_sort */
5520 
sort_key_cmp(MARIA_SORT_PARAM * sort_param,const void * a,const void * b)5521 static int sort_key_cmp(MARIA_SORT_PARAM *sort_param, const void *a,
5522 			const void *b)
5523 {
5524   uint not_used[2];
5525   return (ha_key_cmp(sort_param->seg, *((uchar* const *) a),
5526                      *((uchar* const *) b),
5527 		     USE_WHOLE_KEY, SEARCH_SAME, not_used));
5528 } /* sort_key_cmp */
5529 
5530 
sort_key_write(MARIA_SORT_PARAM * sort_param,const uchar * a)5531 static int sort_key_write(MARIA_SORT_PARAM *sort_param, const uchar *a)
5532 {
5533   uint diff_pos[2];
5534   char llbuff[22],llbuff2[22];
5535   MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5536   HA_CHECK *param= sort_info->param;
5537   int cmp;
5538 
5539   if (sort_info->key_block->inited)
5540   {
5541     cmp= ha_key_cmp(sort_param->seg, sort_info->key_block->lastkey,
5542                     a, USE_WHOLE_KEY,
5543                     SEARCH_FIND | SEARCH_UPDATE | SEARCH_INSERT,
5544                     diff_pos);
5545     if (param->stats_method == MI_STATS_METHOD_NULLS_NOT_EQUAL)
5546       ha_key_cmp(sort_param->seg, sort_info->key_block->lastkey,
5547                  a, USE_WHOLE_KEY,
5548                  SEARCH_FIND | SEARCH_NULL_ARE_NOT_EQUAL, diff_pos);
5549     else if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
5550     {
5551       diff_pos[0]= maria_collect_stats_nonulls_next(sort_param->seg,
5552                                                  sort_param->notnull,
5553                                                  sort_info->key_block->lastkey,
5554                                                  a);
5555     }
5556     sort_param->unique[diff_pos[0]-1]++;
5557   }
5558   else
5559   {
5560     cmp= -1;
5561     if (param->stats_method == MI_STATS_METHOD_IGNORE_NULLS)
5562       maria_collect_stats_nonulls_first(sort_param->seg, sort_param->notnull,
5563                                         a);
5564   }
5565   if ((sort_param->keyinfo->flag & HA_NOSAME) && cmp == 0)
5566   {
5567     DBUG_EXECUTE("key", _ma_print_keydata(DBUG_FILE, sort_param->seg, a,
5568                                           USE_WHOLE_KEY););
5569     sort_info->dupp++;
5570     sort_info->info->cur_row.lastpos= get_record_for_key(sort_param->keyinfo,
5571                                                          a);
5572     if ((param->testflag & (T_CREATE_UNIQUE_BY_SORT | T_SUPPRESS_ERR_HANDLING))
5573         == T_CREATE_UNIQUE_BY_SORT)
5574       param->testflag|= T_SUPPRESS_ERR_HANDLING;
5575     _ma_check_print_warning(param,
5576 			   "Duplicate key %2u for record at %10s against "
5577                             "record at %10s",
5578                             sort_param->key + 1,
5579                             llstr(sort_info->info->cur_row.lastpos, llbuff),
5580                             llstr(get_record_for_key(sort_param->keyinfo,
5581                                                      sort_info->key_block->
5582                                                      lastkey),
5583                                   llbuff2));
5584     param->testflag|=T_RETRY_WITHOUT_QUICK;
5585     if (sort_info->param->testflag & T_VERBOSE)
5586       _ma_print_keydata(stdout,sort_param->seg, a, USE_WHOLE_KEY);
5587     return (sort_delete_record(sort_param));
5588   }
5589 #ifndef DBUG_OFF
5590   if (cmp > 0)
5591   {
5592     _ma_check_print_error(param,
5593 			 "Internal error: Keys are not in order from sort");
5594     return(1);
5595   }
5596 #endif
5597   return (sort_insert_key(sort_param, sort_info->key_block,
5598 			  a, HA_OFFSET_ERROR));
5599 } /* sort_key_write */
5600 
5601 
_ma_sort_ft_buf_flush(MARIA_SORT_PARAM * sort_param)5602 int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param)
5603 {
5604   MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5605   SORT_KEY_BLOCKS *key_block=sort_info->key_block;
5606   MARIA_SHARE *share=sort_info->info->s;
5607   uint val_off, val_len;
5608   int error;
5609   SORT_FT_BUF *maria_ft_buf=sort_info->ft_buf;
5610   uchar *from, *to;
5611 
5612   val_len=share->ft2_keyinfo.keylength;
5613   get_key_full_length_rdonly(val_off, maria_ft_buf->lastkey);
5614   to= maria_ft_buf->lastkey+val_off;
5615 
5616   if (maria_ft_buf->buf)
5617   {
5618     /* flushing first-level tree */
5619     error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey,
5620                            HA_OFFSET_ERROR);
5621     for (from=to+val_len;
5622          !error && from < maria_ft_buf->buf;
5623          from+= val_len)
5624     {
5625       memcpy(to, from, val_len);
5626       error= sort_insert_key(sort_param,key_block,maria_ft_buf->lastkey,
5627                              HA_OFFSET_ERROR);
5628     }
5629     return error;
5630   }
5631   /* flushing second-level tree keyblocks */
5632   error=_ma_flush_pending_blocks(sort_param);
5633   /* updating lastkey with second-level tree info */
5634   ft_intXstore(maria_ft_buf->lastkey+val_off, -maria_ft_buf->count);
5635   _ma_dpointer(sort_info->info->s, maria_ft_buf->lastkey+val_off+HA_FT_WLEN,
5636       share->state.key_root[sort_param->key]);
5637   /* restoring first level tree data in sort_info/sort_param */
5638   sort_info->key_block=sort_info->key_block_end- sort_info->param->sort_key_blocks;
5639   sort_param->keyinfo=share->keyinfo+sort_param->key;
5640   share->state.key_root[sort_param->key]=HA_OFFSET_ERROR;
5641   /* writing lastkey in first-level tree */
5642   return error ? error :
5643                  sort_insert_key(sort_param,sort_info->key_block,
5644                                  maria_ft_buf->lastkey,HA_OFFSET_ERROR);
5645 }
5646 
5647 
sort_maria_ft_key_write(MARIA_SORT_PARAM * sort_param,const uchar * a)5648 static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
5649                                    const uchar *a)
5650 {
5651   uint a_len, val_off, val_len, error;
5652   MARIA_SORT_INFO *sort_info= sort_param->sort_info;
5653   SORT_FT_BUF *ft_buf= sort_info->ft_buf;
5654   SORT_KEY_BLOCKS *key_block= sort_info->key_block;
5655   MARIA_SHARE *share= sort_info->info->s;
5656 
5657   val_len=HA_FT_WLEN+share->rec_reflength;
5658   get_key_full_length_rdonly(a_len, a);
5659 
5660   if (!ft_buf)
5661   {
5662     /*
5663       use two-level tree only if key_reflength fits in rec_reflength place
5664       and row format is NOT static - for _ma_dpointer not to garble offsets
5665      */
5666     if ((share->base.key_reflength <=
5667          share->rec_reflength) &&
5668         (share->options &
5669           (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)))
5670       ft_buf= (SORT_FT_BUF *)my_malloc(sort_param->keyinfo->block_length +
5671                                        sizeof(SORT_FT_BUF), MYF(MY_WME));
5672 
5673     if (!ft_buf)
5674     {
5675       sort_param->key_write=sort_key_write;
5676       return sort_key_write(sort_param, a);
5677     }
5678     sort_info->ft_buf= ft_buf;
5679     goto word_init_ft_buf;              /* no need to duplicate the code */
5680   }
5681   get_key_full_length_rdonly(val_off, ft_buf->lastkey);
5682 
5683   if (ha_compare_text(sort_param->seg->charset,
5684                       a+1,a_len-1,
5685                       ft_buf->lastkey+1,val_off-1, 0)==0)
5686   {
5687     uchar *p;
5688     if (!ft_buf->buf)                   /* store in second-level tree */
5689     {
5690       ft_buf->count++;
5691       return sort_insert_key(sort_param,key_block,
5692                              a + a_len, HA_OFFSET_ERROR);
5693     }
5694 
5695     /* storing the key in the buffer. */
5696     memcpy (ft_buf->buf, (const char *)a+a_len, val_len);
5697     ft_buf->buf+=val_len;
5698     if (ft_buf->buf < ft_buf->end)
5699       return 0;
5700 
5701     /* converting to two-level tree */
5702     p=ft_buf->lastkey+val_off;
5703 
5704     while (key_block->inited)
5705       key_block++;
5706     sort_info->key_block=key_block;
5707     sort_param->keyinfo= &share->ft2_keyinfo;
5708     ft_buf->count=(uint)(ft_buf->buf - p)/val_len;
5709 
5710     /* flushing buffer to second-level tree */
5711     for (error=0; !error && p < ft_buf->buf; p+= val_len)
5712       error=sort_insert_key(sort_param,key_block,p,HA_OFFSET_ERROR);
5713     ft_buf->buf=0;
5714     return error;
5715   }
5716 
5717   /* flushing buffer */
5718   if ((error=_ma_sort_ft_buf_flush(sort_param)))
5719     return error;
5720 
5721 word_init_ft_buf:
5722   a_len+=val_len;
5723   memcpy(ft_buf->lastkey, a, a_len);
5724   ft_buf->buf=ft_buf->lastkey+a_len;
5725   /*
5726     32 is just a safety margin here
5727     (at least MY_MAX(val_len, sizeof(nod_flag)) should be there).
5728     May be better performance could be achieved if we'd put
5729       (sort_info->keyinfo->block_length-32)/XXX
5730       instead.
5731         TODO: benchmark the best value for XXX.
5732   */
5733   ft_buf->end= ft_buf->lastkey+ (sort_param->keyinfo->block_length-32);
5734   return 0;
5735 } /* sort_maria_ft_key_write */
5736 
5737 
5738 /* get pointer to record from a key */
5739 
get_record_for_key(MARIA_KEYDEF * keyinfo,const uchar * key_data)5740 static my_off_t get_record_for_key(MARIA_KEYDEF *keyinfo,
5741 				   const uchar *key_data)
5742 {
5743   MARIA_KEY key;
5744   key.keyinfo= keyinfo;
5745   key.data= (uchar*) key_data;
5746   key.data_length= (_ma_keylength(keyinfo, key_data) -
5747                     keyinfo->share->rec_reflength);
5748   return _ma_row_pos_from_key(&key);
5749 } /* get_record_for_key */
5750 
5751 
5752 /* Insert a key in sort-key-blocks */
5753 
sort_insert_key(MARIA_SORT_PARAM * sort_param,register SORT_KEY_BLOCKS * key_block,const uchar * key,my_off_t prev_block)5754 static int sort_insert_key(MARIA_SORT_PARAM *sort_param,
5755 			   register SORT_KEY_BLOCKS *key_block,
5756                            const uchar *key,
5757 			   my_off_t prev_block)
5758 {
5759   uint a_length,t_length,nod_flag;
5760   my_off_t filepos;
5761   uchar *anc_buff,*lastkey;
5762   MARIA_KEY_PARAM s_temp;
5763   MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
5764   MARIA_SORT_INFO *sort_info= sort_param->sort_info;
5765   HA_CHECK *param=sort_info->param;
5766   MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
5767   MARIA_KEY tmp_key;
5768   MARIA_HA *info= sort_info->info;
5769   MARIA_SHARE *share= info->s;
5770   DBUG_ENTER("sort_insert_key");
5771 
5772   anc_buff= key_block->buff;
5773   lastkey=key_block->lastkey;
5774   nod_flag= (key_block == sort_info->key_block ? 0 :
5775 	     share->base.key_reflength);
5776 
5777   if (!key_block->inited)
5778   {
5779     key_block->inited=1;
5780     if (key_block == sort_info->key_block_end)
5781     {
5782       _ma_check_print_error(param,
5783                             "To many key-block-levels; "
5784                             "Try increasing sort_key_blocks");
5785       DBUG_RETURN(1);
5786     }
5787     a_length= share->keypage_header + nod_flag;
5788     key_block->end_pos= anc_buff + share->keypage_header;
5789     bzero(anc_buff, share->keypage_header);
5790     _ma_store_keynr(share, anc_buff, sort_param->keyinfo->key_nr);
5791     lastkey=0;					/* No previous key in block */
5792   }
5793   else
5794     a_length= _ma_get_page_used(share, anc_buff);
5795 
5796 	/* Save pointer to previous block */
5797   if (nod_flag)
5798   {
5799     _ma_store_keypage_flag(share, anc_buff, KEYPAGE_FLAG_ISNOD);
5800     _ma_kpointer(info,key_block->end_pos,prev_block);
5801   }
5802 
5803   tmp_key.keyinfo= keyinfo;
5804   tmp_key.data= (uchar*) key;
5805   tmp_key.data_length= _ma_keylength(keyinfo, key) - share->rec_reflength;
5806   tmp_key.ref_length=  share->rec_reflength;
5807 
5808   t_length= (*keyinfo->pack_key)(&tmp_key, nod_flag,
5809                                  (uchar*) 0, lastkey, lastkey, &s_temp);
5810   (*keyinfo->store_key)(keyinfo, key_block->end_pos+nod_flag,&s_temp);
5811   a_length+=t_length;
5812   _ma_store_page_used(share, anc_buff, a_length);
5813   key_block->end_pos+=t_length;
5814   if (a_length <= share->max_index_block_size)
5815   {
5816     MARIA_KEY tmp_key2;
5817     tmp_key2.data= key_block->lastkey;
5818     _ma_copy_key(&tmp_key2, &tmp_key);
5819     key_block->last_length=a_length-t_length;
5820     DBUG_RETURN(0);
5821   }
5822 
5823   /* Fill block with end-zero and write filled block */
5824   _ma_store_page_used(share, anc_buff, key_block->last_length);
5825   bzero(anc_buff+key_block->last_length,
5826 	keyinfo->block_length- key_block->last_length);
5827   if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) == HA_OFFSET_ERROR)
5828     DBUG_RETURN(1);
5829   _ma_fast_unlock_key_del(info);
5830 
5831   /* If we read the page from the key cache, we have to write it back to it */
5832   if (page_link->changed)
5833   {
5834     MARIA_PAGE page;
5835     pop_dynamic(&info->pinned_pages);
5836     _ma_page_setup(&page, info, keyinfo, filepos, anc_buff);
5837     if (_ma_write_keypage(&page, PAGECACHE_LOCK_WRITE_UNLOCK, DFLT_INIT_HITS))
5838       DBUG_RETURN(1);
5839   }
5840   else
5841   {
5842     if (write_page(share, share->kfile.file, anc_buff,
5843                    keyinfo->block_length, filepos, param->myf_rw))
5844       DBUG_RETURN(1);
5845   }
5846   DBUG_DUMP("buff", anc_buff, _ma_get_page_used(share, anc_buff));
5847 
5848 	/* Write separator-key to block in next level */
5849   if (sort_insert_key(sort_param,key_block+1,key_block->lastkey,filepos))
5850     DBUG_RETURN(1);
5851 
5852 	/* clear old block and write new key in it */
5853   key_block->inited=0;
5854   DBUG_RETURN(sort_insert_key(sort_param, key_block,key,prev_block));
5855 } /* sort_insert_key */
5856 
5857 
5858 /* Delete record when we found a duplicated key */
5859 
sort_delete_record(MARIA_SORT_PARAM * sort_param)5860 static int sort_delete_record(MARIA_SORT_PARAM *sort_param)
5861 {
5862   uint i;
5863   int old_file,error;
5864   uchar *key;
5865   MARIA_SORT_INFO *sort_info=sort_param->sort_info;
5866   HA_CHECK *param=sort_info->param;
5867   MARIA_HA *row_info= sort_info->new_info, *key_info= sort_info->info;
5868   DBUG_ENTER("sort_delete_record");
5869 
5870   if ((param->testflag & (T_FORCE_UNIQUENESS|T_QUICK)) == T_QUICK)
5871   {
5872     _ma_check_print_error(param,
5873 			 "Quick-recover aborted; Run recovery without switch "
5874                           "-q or with switch -qq");
5875     DBUG_RETURN(1);
5876   }
5877   if (key_info->s->options & HA_OPTION_COMPRESS_RECORD)
5878   {
5879     _ma_check_print_error(param,
5880                           "Recover aborted; Can't run standard recovery on "
5881                           "compressed tables with errors in data-file. "
5882                           "Use 'aria_chk --safe-recover' to fix it");
5883     DBUG_RETURN(1);
5884   }
5885 
5886   old_file= row_info->dfile.file;
5887   /* This only affects static and dynamic row formats */
5888   row_info->dfile.file= row_info->rec_cache.file;
5889   if (flush_io_cache(&row_info->rec_cache))
5890     DBUG_RETURN(1);
5891 
5892   key= key_info->lastkey_buff + key_info->s->base.max_key_length;
5893   if ((error=(*row_info->s->read_record)(row_info, sort_param->record,
5894                                          key_info->cur_row.lastpos)) &&
5895 	error != HA_ERR_RECORD_DELETED)
5896   {
5897     _ma_check_print_error(param,"Can't read record to be removed");
5898     row_info->dfile.file= old_file;
5899     DBUG_RETURN(1);
5900   }
5901   row_info->cur_row.lastpos= key_info->cur_row.lastpos;
5902 
5903   for (i=0 ; i < sort_info->current_key ; i++)
5904   {
5905     MARIA_KEY tmp_key;
5906     (*key_info->s->keyinfo[i].make_key)(key_info, &tmp_key, i, key,
5907                                         sort_param->record,
5908                                         key_info->cur_row.lastpos, 0);
5909     if (_ma_ck_delete(key_info, &tmp_key))
5910     {
5911       _ma_check_print_error(param,
5912                             "Can't delete key %d from record to be removed",
5913                             i+1);
5914       row_info->dfile.file= old_file;
5915       DBUG_RETURN(1);
5916     }
5917   }
5918   if (sort_param->calc_checksum)
5919     param->glob_crc-=(*key_info->s->calc_check_checksum)(key_info,
5920                                                          sort_param->record);
5921   error= (*row_info->s->delete_record)(row_info, sort_param->record);
5922   if (error)
5923     _ma_check_print_error(param,"Got error %d when deleting record",
5924                           my_errno);
5925   row_info->dfile.file= old_file;           /* restore actual value */
5926   row_info->s->state.state.records--;
5927   DBUG_RETURN(error);
5928 } /* sort_delete_record */
5929 
5930 
5931 /* Fix all pending blocks and flush everything to disk */
5932 
_ma_flush_pending_blocks(MARIA_SORT_PARAM * sort_param)5933 int _ma_flush_pending_blocks(MARIA_SORT_PARAM *sort_param)
5934 {
5935   uint nod_flag,length;
5936   my_off_t filepos;
5937   SORT_KEY_BLOCKS *key_block;
5938   MARIA_SORT_INFO *sort_info= sort_param->sort_info;
5939   myf myf_rw=sort_info->param->myf_rw;
5940   MARIA_HA *info=sort_info->info;
5941   MARIA_KEYDEF *keyinfo=sort_param->keyinfo;
5942   MARIA_PINNED_PAGE tmp_page_link, *page_link= &tmp_page_link;
5943   DBUG_ENTER("_ma_flush_pending_blocks");
5944 
5945   filepos= HA_OFFSET_ERROR;			/* if empty file */
5946   nod_flag=0;
5947   for (key_block=sort_info->key_block ; key_block->inited ; key_block++)
5948   {
5949     key_block->inited=0;
5950     length= _ma_get_page_used(info->s, key_block->buff);
5951     if (nod_flag)
5952       _ma_kpointer(info,key_block->end_pos,filepos);
5953     bzero(key_block->buff+length, keyinfo->block_length-length);
5954     if ((filepos= _ma_new(info, DFLT_INIT_HITS, &page_link)) ==
5955         HA_OFFSET_ERROR)
5956       goto err;
5957 
5958     /* If we read the page from the key cache, we have to write it back */
5959     if (page_link->changed)
5960     {
5961       MARIA_PAGE page;
5962       pop_dynamic(&info->pinned_pages);
5963 
5964       _ma_page_setup(&page, info, keyinfo, filepos, key_block->buff);
5965       if (_ma_write_keypage(&page, PAGECACHE_LOCK_WRITE_UNLOCK,
5966                             DFLT_INIT_HITS))
5967 	goto err;
5968     }
5969     else
5970     {
5971       if (write_page(info->s, info->s->kfile.file, key_block->buff,
5972                      keyinfo->block_length, filepos, myf_rw))
5973         goto err;
5974     }
5975     DBUG_DUMP("buff",key_block->buff,length);
5976     nod_flag=1;
5977   }
5978   info->s->state.key_root[sort_param->key]=filepos; /* Last is root for tree */
5979   _ma_fast_unlock_key_del(info);
5980   DBUG_RETURN(0);
5981 
5982 err:
5983   _ma_fast_unlock_key_del(info);
5984   DBUG_RETURN(1);
5985 } /* _ma_flush_pending_blocks */
5986 
5987 	/* alloc space and pointers for key_blocks */
5988 
alloc_key_blocks(HA_CHECK * param,uint blocks,uint buffer_length)5989 static SORT_KEY_BLOCKS *alloc_key_blocks(HA_CHECK *param, uint blocks,
5990                                          uint buffer_length)
5991 {
5992   reg1 uint i;
5993   SORT_KEY_BLOCKS *block;
5994   DBUG_ENTER("alloc_key_blocks");
5995 
5996   if (!(block= (SORT_KEY_BLOCKS*) my_malloc((sizeof(SORT_KEY_BLOCKS)+
5997                                              buffer_length+IO_SIZE)*blocks,
5998                                             MYF(0))))
5999   {
6000     _ma_check_print_error(param,"Not enough memory for sort-key-blocks");
6001     return(0);
6002   }
6003   for (i=0 ; i < blocks ; i++)
6004   {
6005     block[i].inited=0;
6006     block[i].buff= (uchar*) (block+blocks)+(buffer_length+IO_SIZE)*i;
6007   }
6008   DBUG_RETURN(block);
6009 } /* alloc_key_blocks */
6010 
6011 
6012 	/* Check if file is almost full */
6013 
maria_test_if_almost_full(MARIA_HA * info)6014 int maria_test_if_almost_full(MARIA_HA *info)
6015 {
6016   MARIA_SHARE *share= info->s;
6017 
6018   if (share->options & HA_OPTION_COMPRESS_RECORD)
6019     return 0;
6020   return mysql_file_seek(share->kfile.file, 0L, MY_SEEK_END,
6021                  MYF(MY_THREADSAFE))/10*9 >
6022     (my_off_t) share->base.max_key_file_length ||
6023     mysql_file_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0)) / 10 * 9 >
6024     (my_off_t) share->base.max_data_file_length;
6025 }
6026 
6027 
6028 /* Recreate table with bigger more alloced record-data */
6029 
maria_recreate_table(HA_CHECK * param,MARIA_HA ** org_info,char * filename)6030 int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename)
6031 {
6032   int error;
6033   MARIA_HA info;
6034   MARIA_SHARE share;
6035   MARIA_KEYDEF *keyinfo,*key,*key_end;
6036   HA_KEYSEG *keysegs,*keyseg;
6037   MARIA_COLUMNDEF *columndef,*column,*end;
6038   MARIA_UNIQUEDEF *uniquedef,*u_ptr,*u_end;
6039   MARIA_STATUS_INFO status_info;
6040   uint unpack,key_parts;
6041   ha_rows max_records;
6042   ulonglong file_length,tmp_length;
6043   MARIA_CREATE_INFO create_info;
6044   DBUG_ENTER("maria_recreate_table");
6045 
6046   if ((!(param->testflag & T_SILENT)))
6047     printf("Recreating table '%s'\n", param->isam_file_name);
6048 
6049   error=1;					/* Default error */
6050   info= **org_info;
6051   status_info= (*org_info)->state[0];
6052   info.state= &status_info;
6053   share= *(*org_info)->s;
6054   unpack= ((share.data_file_type == COMPRESSED_RECORD) &&
6055            (param->testflag & T_UNPACK));
6056   if (!(keyinfo=(MARIA_KEYDEF*) my_alloca(sizeof(MARIA_KEYDEF) *
6057                                           share.base.keys)))
6058     DBUG_RETURN(0);
6059   memcpy((uchar*) keyinfo,(uchar*) share.keyinfo,
6060 	 (size_t) (sizeof(MARIA_KEYDEF)*share.base.keys));
6061 
6062   key_parts= share.base.all_key_parts;
6063   if (!(keysegs=(HA_KEYSEG*) my_alloca(sizeof(HA_KEYSEG)*
6064 				       (key_parts+share.base.keys))))
6065   {
6066     my_afree(keyinfo);
6067     DBUG_RETURN(1);
6068   }
6069   if (!(columndef=(MARIA_COLUMNDEF*)
6070 	my_alloca(sizeof(MARIA_COLUMNDEF)*(share.base.fields+1))))
6071   {
6072     my_afree(keyinfo);
6073     my_afree(keysegs);
6074     DBUG_RETURN(1);
6075   }
6076   if (!(uniquedef=(MARIA_UNIQUEDEF*)
6077 	my_alloca(sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques+1))))
6078   {
6079     my_afree(columndef);
6080     my_afree(keyinfo);
6081     my_afree(keysegs);
6082     DBUG_RETURN(1);
6083   }
6084 
6085   /* Copy the column definitions in their original order */
6086   for (column= share.columndef, end= share.columndef+share.base.fields;
6087        column != end ;
6088        column++)
6089     columndef[column->column_nr]= *column;
6090 
6091   /* Change the new key to point at the saved key segments */
6092   memcpy((uchar*) keysegs,(uchar*) share.keyparts,
6093 	 (size_t) (sizeof(HA_KEYSEG)*(key_parts+share.base.keys+
6094 				      share.state.header.uniques)));
6095   keyseg=keysegs;
6096   for (key=keyinfo,key_end=keyinfo+share.base.keys; key != key_end ; key++)
6097   {
6098     key->seg=keyseg;
6099     for (; keyseg->type ; keyseg++)
6100     {
6101       if (param->language)
6102 	keyseg->language=param->language;	/* change language */
6103     }
6104     keyseg++;					/* Skip end pointer */
6105   }
6106 
6107   /*
6108     Copy the unique definitions and change them to point at the new key
6109     segments
6110   */
6111   memcpy((uchar*) uniquedef,(uchar*) share.uniqueinfo,
6112 	 (size_t) (sizeof(MARIA_UNIQUEDEF)*(share.state.header.uniques)));
6113   for (u_ptr=uniquedef,u_end=uniquedef+share.state.header.uniques;
6114        u_ptr != u_end ; u_ptr++)
6115   {
6116     u_ptr->seg=keyseg;
6117     keyseg+=u_ptr->keysegs+1;
6118   }
6119 
6120   file_length=(ulonglong) mysql_file_seek(info.dfile.file, 0L, MY_SEEK_END, MYF(0));
6121   if (share.options & HA_OPTION_COMPRESS_RECORD)
6122     share.base.records=max_records=info.state->records;
6123   else if (share.base.min_pack_length)
6124     max_records=(ha_rows) (file_length / share.base.min_pack_length);
6125   else
6126     max_records=0;
6127   share.options&= ~HA_OPTION_TEMP_COMPRESS_RECORD;
6128 
6129   tmp_length= file_length+file_length/10;
6130   set_if_bigger(file_length,param->max_data_file_length);
6131   set_if_bigger(file_length,tmp_length);
6132   set_if_bigger(file_length,(ulonglong) share.base.max_data_file_length);
6133 
6134   maria_close(*org_info);
6135 
6136   bzero((char*) &create_info,sizeof(create_info));
6137   create_info.max_rows=MY_MAX(max_records,share.base.records);
6138   create_info.reloc_rows=share.base.reloc;
6139   create_info.old_options=(share.options |
6140 			   (unpack ? HA_OPTION_TEMP_COMPRESS_RECORD : 0));
6141 
6142   create_info.data_file_length=file_length;
6143   create_info.auto_increment=share.state.auto_increment;
6144   create_info.language = (param->language ? param->language :
6145 			  share.base.language);
6146   create_info.key_file_length=  status_info.key_file_length;
6147   create_info.org_data_file_type= ((enum data_file_type)
6148                                    share.state.header.org_data_file_type);
6149 
6150   /*
6151     Allow for creating an auto_increment key. This has an effect only if
6152     an auto_increment key exists in the original table.
6153   */
6154   create_info.with_auto_increment= TRUE;
6155   create_info.null_bytes= share.base.null_bytes;
6156   create_info.transactional= share.base.born_transactional;
6157 
6158   /*
6159     We don't have to handle symlinks here because we are using
6160     HA_DONT_TOUCH_DATA
6161   */
6162   if (maria_create(filename, share.data_file_type,
6163                    share.base.keys - share.state.header.uniques,
6164                    keyinfo, share.base.fields, columndef,
6165                    share.state.header.uniques, uniquedef,
6166                    &create_info,
6167                    HA_DONT_TOUCH_DATA))
6168   {
6169     _ma_check_print_error(param,
6170                           "Got error %d when trying to recreate indexfile",
6171                           my_errno);
6172     goto end;
6173   }
6174   *org_info= maria_open(filename,O_RDWR,
6175                         (HA_OPEN_FOR_REPAIR |
6176                          ((param->testflag & T_WAIT_FOREVER) ?
6177                           HA_OPEN_WAIT_IF_LOCKED :
6178                           (param->testflag & T_DESCRIPT) ?
6179                           HA_OPEN_IGNORE_IF_LOCKED :
6180                           HA_OPEN_ABORT_IF_LOCKED)));
6181   if (!*org_info)
6182   {
6183     _ma_check_print_error(param,
6184                           "Got error %d when trying to open re-created "
6185                           "indexfile", my_errno);
6186     goto end;
6187   }
6188   /* We are modifing */
6189   (*org_info)->s->options&= ~HA_OPTION_READ_ONLY_DATA;
6190   _ma_readinfo(*org_info,F_WRLCK,0);
6191   (*org_info)->s->state.state.records= info.state->records;
6192   if (share.state.create_time)
6193     (*org_info)->s->state.create_time=share.state.create_time;
6194 #ifdef MARIA_EXTERNAL_LOCKING
6195   (*org_info)->s->state.unique= (*org_info)->this_unique= share.state.unique;
6196 #endif
6197   (*org_info)->s->state.state.checksum= info.state->checksum;
6198   (*org_info)->s->state.state.del= info.state->del;
6199   (*org_info)->s->state.dellink= share.state.dellink;
6200   (*org_info)->s->state.state.empty= info.state->empty;
6201   (*org_info)->s->state.state.data_file_length= info.state->data_file_length;
6202   *(*org_info)->state= (*org_info)->s->state.state;
6203   if (maria_update_state_info(param,*org_info,UPDATE_TIME | UPDATE_STAT |
6204                               UPDATE_OPEN_COUNT))
6205     goto end;
6206   error=0;
6207 end:
6208   my_afree(uniquedef);
6209   my_afree(keyinfo);
6210   my_afree(columndef);
6211   my_afree(keysegs);
6212   DBUG_RETURN(error);
6213 }
6214 
6215 
6216 /* Write suffix to data file if needed */
6217 
maria_write_data_suffix(MARIA_SORT_INFO * sort_info,my_bool fix_datafile)6218 int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile)
6219 {
6220   MARIA_HA *info=sort_info->new_info;
6221 
6222   if (info->s->data_file_type == COMPRESSED_RECORD && fix_datafile)
6223   {
6224     uchar buff[MEMMAP_EXTRA_MARGIN];
6225     bzero(buff,sizeof(buff));
6226     if (my_b_write(&info->rec_cache,buff,sizeof(buff)))
6227     {
6228       _ma_check_print_error(sort_info->param,
6229 			   "%d when writing to datafile",my_errno);
6230       return 1;
6231     }
6232     sort_info->param->read_cache.end_of_file+=sizeof(buff);
6233   }
6234   return 0;
6235 }
6236 
6237 
6238 /* Update state and maria_chk time of indexfile */
6239 
maria_update_state_info(HA_CHECK * param,MARIA_HA * info,uint update)6240 int maria_update_state_info(HA_CHECK *param, MARIA_HA *info,uint update)
6241 {
6242   MARIA_SHARE *share= info->s;
6243   DBUG_ENTER("maria_update_state_info");
6244 
6245   if (update & UPDATE_OPEN_COUNT)
6246   {
6247     share->state.open_count=0;
6248     share->global_changed=0;
6249     share->changed= 1;
6250   }
6251   if (update & UPDATE_STAT)
6252   {
6253     uint i, key_parts= mi_uint2korr(share->state.header.key_parts);
6254     share->state.records_at_analyze= share->state.state.records;
6255     share->state.changed&= ~STATE_NOT_ANALYZED;
6256     if (share->state.state.records)
6257     {
6258       for (i=0; i<key_parts; i++)
6259       {
6260         if (!(share->state.rec_per_key_part[i]=param->new_rec_per_key_part[i]))
6261           share->state.changed|= STATE_NOT_ANALYZED;
6262       }
6263     }
6264   }
6265   if (update & (UPDATE_STAT | UPDATE_SORT | UPDATE_TIME | UPDATE_AUTO_INC))
6266   {
6267     if (update & UPDATE_TIME)
6268     {
6269       share->state.check_time= time((time_t*) 0);
6270       if (!share->state.create_time)
6271 	share->state.create_time= share->state.check_time;
6272     }
6273     if (_ma_state_info_write(share,
6274                              MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET |
6275                              MA_STATE_INFO_WRITE_FULL_INFO))
6276       goto err;
6277   }
6278   {						/* Force update of status */
6279     int error;
6280     uint r_locks=share->r_locks,w_locks=share->w_locks;
6281     share->r_locks= share->w_locks= share->tot_locks= 0;
6282     error= _ma_writeinfo(info,WRITEINFO_NO_UNLOCK);
6283     share->r_locks=r_locks;
6284     share->w_locks=w_locks;
6285     share->tot_locks=r_locks+w_locks;
6286     if (!error)
6287       DBUG_RETURN(0);
6288   }
6289 err:
6290   _ma_check_print_error(param,"%d when updating keyfile",my_errno);
6291   DBUG_RETURN(1);
6292 }
6293 
6294 /*
6295   Update auto increment value for a table
6296   When setting the 'repair_only' flag we only want to change the
6297   old auto_increment value if its wrong (smaller than some given key).
6298   The reason is that we shouldn't change the auto_increment value
6299   for a table without good reason when only doing a repair; If the
6300   user have inserted and deleted rows, the auto_increment value
6301   may be bigger than the biggest current row and this is ok.
6302 
6303   If repair_only is not set, we will update the flag to the value in
6304   param->auto_increment is bigger than the biggest key.
6305 */
6306 
_ma_update_auto_increment_key(HA_CHECK * param,MARIA_HA * info,my_bool repair_only)6307 void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info,
6308                                    my_bool repair_only)
6309 {
6310   MARIA_SHARE *share= info->s;
6311   uchar *record;
6312   DBUG_ENTER("update_auto_increment_key");
6313 
6314   if (!share->base.auto_key ||
6315       ! maria_is_key_active(share->state.key_map, share->base.auto_key - 1))
6316   {
6317     if (!(param->testflag & T_VERY_SILENT))
6318       _ma_check_print_info(param,
6319 			  "Table: %s doesn't have an auto increment key\n",
6320 			  param->isam_file_name);
6321     DBUG_VOID_RETURN;
6322   }
6323   if (!(param->testflag & T_SILENT) &&
6324       !(param->testflag & T_REP))
6325     printf("Updating Aria file: %s\n", param->isam_file_name);
6326   /*
6327     We have to use an allocated buffer instead of info->rec_buff as
6328     _ma_put_key_in_record() may use info->rec_buff
6329   */
6330   if (!(record= (uchar*) my_malloc((size_t) share->base.default_rec_buff_size,
6331                                    MYF(0))))
6332   {
6333     _ma_check_print_error(param,"Not enough memory for extra record");
6334     DBUG_VOID_RETURN;
6335   }
6336 
6337   maria_extra(info,HA_EXTRA_KEYREAD,0);
6338   if (maria_rlast(info, record, share->base.auto_key-1))
6339   {
6340     if (my_errno != HA_ERR_END_OF_FILE)
6341     {
6342       maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
6343       my_free(record);
6344       _ma_check_print_error(param,"%d when reading last record",my_errno);
6345       DBUG_VOID_RETURN;
6346     }
6347     if (!repair_only)
6348       share->state.auto_increment=param->auto_increment_value;
6349   }
6350   else
6351   {
6352     const HA_KEYSEG *keyseg= share->keyinfo[share->base.auto_key-1].seg;
6353     ulonglong auto_increment=
6354       ma_retrieve_auto_increment(record + keyseg->start, keyseg->type);
6355     set_if_bigger(share->state.auto_increment,auto_increment);
6356     if (!repair_only)
6357       set_if_bigger(share->state.auto_increment, param->auto_increment_value);
6358   }
6359   maria_extra(info,HA_EXTRA_NO_KEYREAD,0);
6360   my_free(record);
6361   maria_update_state_info(param, info, UPDATE_AUTO_INC);
6362   DBUG_VOID_RETURN;
6363 }
6364 
6365 
6366 /*
6367   Update statistics for each part of an index
6368 
6369   SYNOPSIS
6370     maria_update_key_parts()
6371       keyinfo           IN  Index information (only key->keysegs used)
6372       rec_per_key_part  OUT Store statistics here
6373       unique            IN  Array of (#distinct tuples)
6374       notnull_tuples    IN  Array of (#tuples), or NULL
6375       records               Number of records in the table
6376 
6377   DESCRIPTION
6378     This function is called produce index statistics values from unique and
6379     notnull_tuples arrays after these arrays were produced with sequential
6380     index scan (the scan is done in two places: chk_index() and
6381     sort_key_write()).
6382 
6383     This function handles all 3 index statistics collection methods.
6384 
6385     Unique is an array:
6386       unique[0]= (#different values of {keypart1}) - 1
6387       unique[1]= (#different values of {keypart1,keypart2} tuple)-unique[0]-1
6388       ...
6389 
6390     For MI_STATS_METHOD_IGNORE_NULLS method, notnull_tuples is an array too:
6391       notnull_tuples[0]= (#of {keypart1} tuples such that keypart1 is not NULL)
6392       notnull_tuples[1]= (#of {keypart1,keypart2} tuples such that all
6393                           keypart{i} are not NULL)
6394       ...
6395     For all other statistics collection methods notnull_tuples==NULL.
6396 
6397     Output is an array:
6398     rec_per_key_part[k] =
6399      = E(#records in the table such that keypart_1=c_1 AND ... AND
6400          keypart_k=c_k for arbitrary constants c_1 ... c_k)
6401 
6402      = {assuming that values have uniform distribution and index contains all
6403         tuples from the domain (or that {c_1, ..., c_k} tuple is choosen from
6404         index tuples}
6405 
6406      = #tuples-in-the-index / #distinct-tuples-in-the-index.
6407 
6408     The #tuples-in-the-index and #distinct-tuples-in-the-index have different
6409     meaning depending on which statistics collection method is used:
6410 
6411     MI_STATS_METHOD_*  how are nulls compared?  which tuples are counted?
6412      NULLS_EQUAL            NULL == NULL           all tuples in table
6413      NULLS_NOT_EQUAL        NULL != NULL           all tuples in table
6414      IGNORE_NULLS               n/a             tuples that don't have NULLs
6415 */
6416 
maria_update_key_parts(MARIA_KEYDEF * keyinfo,double * rec_per_key_part,ulonglong * unique,ulonglong * notnull,ulonglong records)6417 void maria_update_key_parts(MARIA_KEYDEF *keyinfo, double *rec_per_key_part,
6418                       ulonglong *unique, ulonglong *notnull,
6419                       ulonglong records)
6420 {
6421   ulonglong count=0, unique_tuples;
6422   ulonglong tuples= records;
6423   uint parts;
6424   double tmp;
6425   for (parts=0 ; parts < keyinfo->keysegs  ; parts++)
6426   {
6427     count+=unique[parts];
6428     unique_tuples= count + 1;
6429     if (notnull)
6430     {
6431       tuples= notnull[parts];
6432       /*
6433         #(unique_tuples not counting tuples with NULLs) =
6434           #(unique_tuples counting tuples with NULLs as different) -
6435           #(tuples with NULLs)
6436       */
6437       unique_tuples -= (records - notnull[parts]);
6438     }
6439 
6440     if (unique_tuples == 0)
6441       tmp= 1;
6442     else if (count == 0)
6443       tmp= ulonglong2double(tuples); /* 1 unique tuple */
6444     else
6445       tmp= ulonglong2double(tuples) / ulonglong2double(unique_tuples);
6446 
6447     /*
6448       for some weird keys (e.g. FULLTEXT) tmp can be <1 here.
6449       let's ensure it is not
6450     */
6451     set_if_bigger(tmp,1);
6452 
6453     *rec_per_key_part++= tmp;
6454   }
6455 }
6456 
6457 
maria_byte_checksum(const uchar * buf,uint length)6458 static ha_checksum maria_byte_checksum(const uchar *buf, uint length)
6459 {
6460   ha_checksum crc;
6461   const uchar *end=buf+length;
6462   for (crc=0; buf != end; buf++)
6463     crc=((crc << 1) + *buf) +
6464       MY_TEST(crc & (((ha_checksum) 1) << (8 * sizeof(ha_checksum) - 1)));
6465   return crc;
6466 }
6467 
maria_too_big_key_for_sort(MARIA_KEYDEF * key,ha_rows rows)6468 my_bool maria_too_big_key_for_sort(MARIA_KEYDEF *key, ha_rows rows)
6469 {
6470   uint key_maxlength=key->maxlength;
6471   if (key->flag & HA_FULLTEXT)
6472   {
6473     uint ft_max_word_len_for_sort=FT_MAX_WORD_LEN_FOR_SORT*
6474                                   key->seg->charset->mbmaxlen;
6475     key_maxlength+=ft_max_word_len_for_sort-HA_FT_MAXBYTELEN;
6476   }
6477   return (key->flag & HA_SPATIAL) ||
6478           (key->flag & (HA_BINARY_PACK_KEY | HA_VAR_LENGTH_KEY | HA_FULLTEXT) &&
6479 	  ((ulonglong) rows * key_maxlength >
6480 	   (ulonglong) maria_max_temp_length));
6481 }
6482 
6483 /*
6484   Return TRUE if we can use repair by sorting
6485   One can set the force argument to force to use sorting
6486   even if the temporary file would be quite big!
6487 */
6488 
maria_test_if_sort_rep(MARIA_HA * info,ha_rows rows,ulonglong key_map,my_bool force)6489 my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows,
6490                                ulonglong key_map, my_bool force)
6491 {
6492   MARIA_SHARE *share= info->s;
6493   MARIA_KEYDEF *key=share->keyinfo;
6494   uint i;
6495 
6496   /*
6497     maria_repair_by_sort only works if we have at least one key. If we don't
6498     have any keys, we should use the normal repair.
6499   */
6500   if (! maria_is_any_key_active(key_map))
6501     return FALSE;				/* Can't use sort */
6502   for (i=0 ; i < share->base.keys ; i++,key++)
6503   {
6504     if (!force && maria_too_big_key_for_sort(key,rows))
6505       return FALSE;
6506   }
6507   return TRUE;
6508 }
6509 
6510 
6511 /**
6512    @brief Create a new handle for manipulation the new record file
6513 
6514    @note
6515    It's ok for Recovery to have two MARIA_SHARE on the same index file
6516    because the one we create here is not transactional
6517 */
6518 
create_new_data_handle(MARIA_SORT_PARAM * param,File new_file)6519 static my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file)
6520 {
6521 
6522   MARIA_SORT_INFO *sort_info= param->sort_info;
6523   MARIA_HA *info= sort_info->info;
6524   MARIA_HA *new_info;
6525   DBUG_ENTER("create_new_data_handle");
6526 
6527   if (!(sort_info->new_info= maria_open(info->s->open_file_name.str, O_RDWR,
6528                                         HA_OPEN_COPY | HA_OPEN_FOR_REPAIR |
6529                                         HA_OPEN_INTERNAL_TABLE)))
6530     DBUG_RETURN(1);
6531 
6532   new_info= sort_info->new_info;
6533   _ma_bitmap_set_pagecache_callbacks(&new_info->s->bitmap.file,
6534                                      new_info->s);
6535   _ma_set_data_pagecache_callbacks(&new_info->dfile, new_info->s);
6536   change_data_file_descriptor(new_info, new_file);
6537   maria_lock_database(new_info, F_EXTRA_LCK);
6538   if ((sort_info->param->testflag & T_UNPACK) &&
6539       info->s->data_file_type == COMPRESSED_RECORD)
6540   {
6541     (*new_info->s->once_end)(new_info->s);
6542     (*new_info->s->end)(new_info);
6543     restore_data_file_type(new_info->s);
6544     _ma_setup_functions(new_info->s);
6545     if ((*new_info->s->once_init)(new_info->s, new_file) ||
6546         (*new_info->s->init)(new_info))
6547       DBUG_RETURN(1);
6548   }
6549   _ma_reset_status(new_info);
6550   if (_ma_initialize_data_file(new_info->s, new_file))
6551     DBUG_RETURN(1);
6552 
6553   /* Take into account any bitmap page created above: */
6554   param->filepos= new_info->s->state.state.data_file_length;
6555 
6556   /* Use new virtual functions for key generation */
6557   info->s->keypos_to_recpos= new_info->s->keypos_to_recpos;
6558   info->s->recpos_to_keypos= new_info->s->recpos_to_keypos;
6559   DBUG_RETURN(0);
6560 }
6561 
6562 
6563 static void
set_data_file_type(MARIA_SORT_INFO * sort_info,MARIA_SHARE * share)6564 set_data_file_type(MARIA_SORT_INFO *sort_info, MARIA_SHARE *share)
6565 {
6566   if ((sort_info->new_data_file_type=share->data_file_type) ==
6567       COMPRESSED_RECORD && sort_info->param->testflag & T_UNPACK)
6568   {
6569     MARIA_SHARE tmp;
6570     sort_info->new_data_file_type= share->state.header.org_data_file_type;
6571     /* Set delete_function for sort_delete_record() */
6572     tmp= *share;
6573     tmp.state.header.data_file_type= tmp.state.header.org_data_file_type;
6574     tmp.options= ~HA_OPTION_COMPRESS_RECORD;
6575     _ma_setup_functions(&tmp);
6576     share->delete_record=tmp.delete_record;
6577   }
6578 }
6579 
restore_data_file_type(MARIA_SHARE * share)6580 static void restore_data_file_type(MARIA_SHARE *share)
6581 {
6582   MARIA_SHARE tmp_share;
6583   share->options&= ~HA_OPTION_COMPRESS_RECORD;
6584   mi_int2store(share->state.header.options,share->options);
6585   share->state.header.data_file_type=
6586     share->state.header.org_data_file_type;
6587   share->data_file_type= share->state.header.data_file_type;
6588   share->pack.header_length= 0;
6589 
6590   /* Use new virtual functions for key generation */
6591   tmp_share= *share;
6592   _ma_setup_functions(&tmp_share);
6593   share->keypos_to_recpos= tmp_share.keypos_to_recpos;
6594   share->recpos_to_keypos= tmp_share.recpos_to_keypos;
6595 }
6596 
6597 
change_data_file_descriptor(MARIA_HA * info,File new_file)6598 static void change_data_file_descriptor(MARIA_HA *info, File new_file)
6599 {
6600   mysql_file_close(info->dfile.file, MYF(MY_WME));
6601   info->dfile.file= info->s->bitmap.file.file= new_file;
6602   _ma_bitmap_reset_cache(info->s);
6603 }
6604 
6605 
6606 /**
6607    @brief Mark the data file to not be used
6608 
6609    @note
6610    This is used in repair when we want to ensure the handler will not
6611    write anything to the data file anymore
6612 */
6613 
unuse_data_file_descriptor(MARIA_HA * info)6614 static void unuse_data_file_descriptor(MARIA_HA *info)
6615 {
6616   (void) flush_pagecache_blocks(info->s->pagecache,
6617                                 &info->s->bitmap.file,
6618                                 FLUSH_IGNORE_CHANGED);
6619   info->dfile.file= info->s->bitmap.file.file= -1;
6620   _ma_bitmap_reset_cache(info->s);
6621 }
6622 
6623 
6624 /*
6625   Copy all states that has to do with the data file
6626 
6627   NOTES
6628     This is done to copy the state from the data file generated from
6629     repair to the original handler
6630 */
6631 
copy_data_file_state(MARIA_STATE_INFO * to,MARIA_STATE_INFO * from)6632 static void copy_data_file_state(MARIA_STATE_INFO *to,
6633                                  MARIA_STATE_INFO *from)
6634 {
6635   to->state.records=           from->state.records;
6636   to->state.del=               from->state.del;
6637   to->state.empty=             from->state.empty;
6638   to->state.data_file_length=  from->state.data_file_length;
6639   to->split=                   from->split;
6640   to->dellink=		       from->dellink;
6641   to->first_bitmap_with_space= from->first_bitmap_with_space;
6642 }
6643 
6644 
6645 /*
6646   Read 'safely' next record while scanning table.
6647 
6648   SYNOPSIS
6649     _ma_safe_scan_block_record()
6650     info                Maria handler
6651     record              Store found here
6652 
6653   NOTES
6654     - One must have called mi_scan() before this
6655 
6656     Differences compared to  _ma_scan_block_records() are:
6657     - We read all blocks, not only blocks marked by the bitmap to be safe
6658     - In case of errors, next read will read next record.
6659     - More sanity checks
6660 
6661   RETURN
6662     0   ok
6663     HA_ERR_END_OF_FILE  End of file
6664     #   error number
6665 */
6666 
6667 
_ma_safe_scan_block_record(MARIA_SORT_INFO * sort_info,MARIA_HA * info,uchar * record)6668 static int _ma_safe_scan_block_record(MARIA_SORT_INFO *sort_info,
6669                                       MARIA_HA *info, uchar *record)
6670 {
6671   MARIA_SHARE *share= info->s;
6672   MARIA_RECORD_POS record_pos= info->cur_row.nextpos;
6673   pgcache_page_no_t page= sort_info->page;
6674   DBUG_ENTER("_ma_safe_scan_block_record");
6675 
6676   for (;;)
6677   {
6678     /* Find next row in current page */
6679     if (likely(record_pos < info->scan.number_of_rows))
6680     {
6681       uint length, offset;
6682       uchar *data, *end_of_data;
6683       char llbuff[22];
6684 
6685       while (!(offset= uint2korr(info->scan.dir)))
6686       {
6687         info->scan.dir-= DIR_ENTRY_SIZE;
6688         record_pos++;
6689         if (info->scan.dir < info->scan.dir_end)
6690         {
6691           _ma_check_print_info(sort_info->param,
6692                                "Wrong directory on page %s",
6693                                llstr(page, llbuff));
6694           goto read_next_page;
6695         }
6696       }
6697       /* found row */
6698       info->cur_row.lastpos= info->scan.row_base_page + record_pos;
6699       info->cur_row.nextpos= record_pos + 1;
6700       data= info->scan.page_buff + offset;
6701       length= uint2korr(info->scan.dir + 2);
6702       end_of_data= data + length;
6703       info->scan.dir-= DIR_ENTRY_SIZE;          /* Point to previous row */
6704 
6705       if (end_of_data > info->scan.dir_end ||
6706           offset < PAGE_HEADER_SIZE(info->s) ||
6707           length < share->base.min_block_length)
6708       {
6709         _ma_check_print_info(sort_info->param,
6710                              "Wrong directory entry %3u at page %s",
6711                              (uint) record_pos, llstr(page, llbuff));
6712         record_pos++;
6713         continue;
6714       }
6715       else
6716       {
6717         DBUG_PRINT("info", ("rowid: %lu", (ulong) info->cur_row.lastpos));
6718         DBUG_RETURN(_ma_read_block_record2(info, record, data, end_of_data));
6719       }
6720     }
6721 
6722 read_next_page:
6723     /* Read until we find next head page */
6724     for (;;)
6725     {
6726       uint page_type;
6727       char llbuff[22];
6728 
6729       sort_info->page++;                        /* In case of errors */
6730       page++;
6731       if (!(page % share->bitmap.pages_covered))
6732       {
6733         /* Skip bitmap */
6734         page++;
6735         sort_info->page++;
6736       }
6737       if ((my_off_t) (page + 1) * share->block_size > sort_info->filelength)
6738         DBUG_RETURN(HA_ERR_END_OF_FILE);
6739       if (!(pagecache_read(share->pagecache,
6740                            &info->dfile,
6741                            page, 0, info->scan.page_buff,
6742                            PAGECACHE_READ_UNKNOWN_PAGE,
6743                            PAGECACHE_LOCK_LEFT_UNLOCKED, 0)))
6744       {
6745         if (my_errno == HA_ERR_WRONG_CRC ||
6746             my_errno == HA_ERR_DECRYPTION_FAILED)
6747         {
6748           /*
6749             Don't give errors for zero filled blocks. These can
6750             sometimes be found at end of a bitmap when we wrote a big
6751             record last that was moved to the next bitmap.
6752           */
6753           if (_ma_check_bitmap_data(info, UNALLOCATED_PAGE, 0,
6754                                     _ma_bitmap_get_page_bits(info,
6755                                                              &share->bitmap,
6756                                                              page)))
6757           {
6758             _ma_check_print_info(sort_info->param,
6759                                  "Wrong CRC on datapage at %s",
6760                                  llstr(page, llbuff));
6761           }
6762           continue;
6763         }
6764         DBUG_RETURN(my_errno);
6765       }
6766       page_type= (info->scan.page_buff[PAGE_TYPE_OFFSET] &
6767                   PAGE_TYPE_MASK);
6768       if (page_type == HEAD_PAGE)
6769       {
6770         if ((info->scan.number_of_rows=
6771              (uint) (uchar) info->scan.page_buff[DIR_COUNT_OFFSET]) != 0)
6772           break;
6773         _ma_check_print_info(sort_info->param,
6774                              "Wrong head page at page %s",
6775                              llstr(page, llbuff));
6776       }
6777       else if (page_type >= MAX_PAGE_TYPE)
6778       {
6779         _ma_check_print_info(sort_info->param,
6780                              "Found wrong page type: %d at page %s",
6781                              page_type, llstr(page, llbuff));
6782       }
6783     }
6784 
6785     /* New head page */
6786     info->scan.dir= (info->scan.page_buff + share->block_size -
6787                      PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE);
6788     info->scan.dir_end= (info->scan.dir -
6789                          (info->scan.number_of_rows - 1) *
6790                          DIR_ENTRY_SIZE);
6791     info->scan.row_base_page= ma_recordpos(page, 0);
6792     record_pos= 0;
6793   }
6794 }
6795 
6796 
6797 /**
6798    @brief Writes a LOGREC_REPAIR_TABLE record and updates create_rename_lsn
6799    if needed (so that maria_read_log does not redo the repair).
6800 
6801    @param  param            description of the REPAIR operation
6802    @param  info             table
6803 
6804    @return Operation status
6805      @retval 0      ok
6806      @retval 1      error (disk problem)
6807 */
6808 
write_log_record_for_repair(const HA_CHECK * param,MARIA_HA * info)6809 my_bool write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info)
6810 {
6811   MARIA_SHARE *share= info->s;
6812   /* in case this is maria_chk or recovery... */
6813   if (translog_status == TRANSLOG_OK && !maria_in_recovery &&
6814       share->base.born_transactional)
6815   {
6816     my_bool save_now_transactional= share->now_transactional;
6817 
6818     /*
6819       For now this record is only informative. It could serve when applying
6820       logs to a backup, but that needs more thought. Assume table became
6821       corrupted. It is repaired, then some writes happen to it.
6822       Later we restore an old backup, and want to apply this REDO_REPAIR_TABLE
6823       record. For it to give the same result as originally, the table should
6824       be corrupted the same way, so applying previous REDOs should produce the
6825       same corruption; that's really not guaranteed (different execution paths
6826       in execution of REDOs vs runtime code so not same bugs hit, temporary
6827       hardware issues not repeatable etc). Corruption may not be repeatable.
6828       A reasonable solution is to execute the REDO_REPAIR_TABLE record and
6829       check if the checksum of the resulting table matches what it was at the
6830       end of the original repair (should be stored in log record); or execute
6831       the REDO_REPAIR_TABLE if the checksum of the table-before-repair matches
6832       was it was at the start of the original repair (should be stored in log
6833       record).
6834     */
6835     LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
6836     uchar log_data[FILEID_STORE_SIZE + 8 + 8];
6837     LSN lsn;
6838 
6839     /*
6840       testflag gives an idea of what REPAIR did (in particular T_QUICK
6841       or not: did it touch the data file or not?).
6842     */
6843     int8store(log_data + FILEID_STORE_SIZE, param->testflag);
6844     /* org_key_map is used when recreating index after a load data infile */
6845     int8store(log_data + FILEID_STORE_SIZE + 8, param->org_key_map);
6846 
6847     log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    log_data;
6848     log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
6849 
6850     share->now_transactional= 1;
6851     if (unlikely(translog_write_record(&lsn, LOGREC_REDO_REPAIR_TABLE,
6852                                        &dummy_transaction_object, info,
6853                                        (translog_size_t) sizeof(log_data),
6854                                        sizeof(log_array)/sizeof(log_array[0]),
6855                                        log_array, log_data, NULL) ||
6856                  translog_flush(lsn)))
6857       return TRUE;
6858     /*
6859       The table's existence was made durable earlier (MY_SYNC_DIR passed to
6860       maria_change_to_newfile()). All pages have been flushed, state too, we
6861       need to force it to disk. Old REDOs should not be applied to the table,
6862       which is already enforced as skip_redos_lsn was increased in
6863       protect_against_repair_crash(). But if this is an explicit repair,
6864       even UNDO phase should ignore this table: create_rename_lsn should be
6865       increased, and this also serves for the REDO_REPAIR to be ignored by
6866       maria_read_log.
6867       The fully correct order would be: sync data and index file, remove crash
6868       mark and update LSNs then write state and sync index file. But at this
6869       point state (without crash mark) is already written.
6870     */
6871     if ((!(param->testflag & T_NO_CREATE_RENAME_LSN) &&
6872          _ma_update_state_lsns(share, lsn, share->state.create_trid, FALSE,
6873                                FALSE)) ||
6874         _ma_sync_table_files(info))
6875       return TRUE;
6876     share->now_transactional= save_now_transactional;
6877   }
6878   return FALSE;
6879 }
6880 
6881 
6882 /**
6883   Writes an UNDO record which if executed in UNDO phase, will empty the
6884   table. Such record is thus logged only in certain cases of bulk insert
6885   (table needs to be empty etc).
6886 */
write_log_record_for_bulk_insert(MARIA_HA * info)6887 my_bool write_log_record_for_bulk_insert(MARIA_HA *info)
6888 {
6889   LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
6890   uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE];
6891   LSN lsn;
6892   lsn_store(log_data, info->trn->undo_lsn);
6893   log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    log_data;
6894   log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
6895   return translog_write_record(&lsn, LOGREC_UNDO_BULK_INSERT,
6896                                info->trn, info,
6897                                (translog_size_t)
6898                                log_array[TRANSLOG_INTERNAL_PARTS +
6899                                          0].length,
6900                                TRANSLOG_INTERNAL_PARTS + 1, log_array,
6901                                log_data + LSN_STORE_SIZE, NULL) ||
6902     translog_flush(lsn); /* WAL */
6903 }
6904 
6905 
6906 /* Give error message why reading of key page failed */
6907 
report_keypage_fault(HA_CHECK * param,MARIA_HA * info,my_off_t position)6908 static void report_keypage_fault(HA_CHECK *param, MARIA_HA *info,
6909                                  my_off_t position)
6910 {
6911   char buff[11];
6912   uint32 block_size= info->s->block_size;
6913 
6914   if (my_errno == HA_ERR_CRASHED)
6915     _ma_check_print_error(param,
6916                           "Wrong base information on indexpage at page: %s",
6917                           llstr(position / block_size, buff));
6918   else
6919     _ma_check_print_error(param,
6920                           "Can't read indexpage from page: %s, "
6921                           "error: %d",
6922                           llstr(position / block_size, buff), my_errno);
6923 }
6924 
6925 
6926 /**
6927   When we want to check a table, we verify that the transaction ids of rows
6928   and keys are not bigger than the biggest id generated by Maria so far, which
6929   is returned by the function below.
6930 
6931   @note If control file is not open, 0 may be returned; to not confuse
6932   this with a valid max trid of 0, the caller should notice that it failed to
6933   open the control file (ma_control_file_inited() can serve for that).
6934 */
6935 
max_trid_in_system(void)6936 static TrID max_trid_in_system(void)
6937 {
6938   TrID id= trnman_get_max_trid(); /* 0 if transac manager not initialized */
6939   /* 'id' may be far bigger, if last shutdown is old */
6940   return MY_MAX(id, max_trid_in_control_file);
6941 }
6942 
6943 
_ma_check_print_not_visible_error(HA_CHECK * param,TrID used_trid)6944 static void _ma_check_print_not_visible_error(HA_CHECK *param, TrID used_trid)
6945 {
6946   char buff[22], buff2[22];
6947   if (!param->not_visible_rows_found++)
6948   {
6949     if (!ma_control_file_inited())
6950     {
6951       _ma_check_print_warning(param,
6952                               "Found row with transaction id %s but no "
6953                               "aria_control_file was used or specified.  "
6954                               "The table may be corrupted",
6955                               llstr(used_trid, buff));
6956     }
6957     else
6958     {
6959       _ma_check_print_error(param,
6960                             "Found row with transaction id %s when max "
6961                             "transaction id according to aria_control_file "
6962                             "is %s",
6963                             llstr(used_trid, buff),
6964                             llstr(param->max_trid, buff2));
6965     }
6966   }
6967 }
6968 
6969 
6970 /**
6971   Mark that we can retry normal repair if we used quick repair
6972 
6973   We shouldn't do this in case of disk error as in this case we are likely
6974   to loose much more than expected.
6975 */
6976 
retry_if_quick(MARIA_SORT_PARAM * sort_param,int error)6977 void retry_if_quick(MARIA_SORT_PARAM *sort_param, int error)
6978 {
6979   HA_CHECK *param=sort_param->sort_info->param;
6980 
6981   if (!sort_param->fix_datafile && error >= HA_ERR_FIRST)
6982   {
6983     param->retry_repair=1;
6984     param->testflag|=T_RETRY_WITHOUT_QUICK;
6985   }
6986 }
6987 
6988 /* Print information about bitmap page */
6989 
print_bitmap_description(MARIA_SHARE * share,pgcache_page_no_t page,uchar * bitmap_data)6990 static void print_bitmap_description(MARIA_SHARE *share,
6991                                      pgcache_page_no_t page,
6992                                      uchar *bitmap_data)
6993 {
6994   char *tmp= my_malloc(MAX_BITMAP_INFO_LENGTH, MYF(MY_WME));
6995   if (!tmp)
6996     return;
6997   _ma_get_bitmap_description(&share->bitmap, bitmap_data, page, tmp);
6998   printf("Bitmap page %lu\n%s", (ulong) page, tmp);
6999   my_free(tmp);
7000 }
7001