1 /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2    Copyright (c) 2009, 2019, MariaDB Corporation.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
16 
17 /* open an Aria table */
18 
19 #include "ma_fulltext.h"
20 #include "ma_sp_defs.h"
21 #include "ma_rt_index.h"
22 #include "ma_blockrec.h"
23 #include "trnman.h"
24 #include "ma_trnman.h"
25 #include <m_ctype.h>
26 #include "ma_crypt.h"
27 
28 #if defined(MSDOS) || defined(__WIN__)
29 #ifdef __WIN__
30 #include <fcntl.h>
31 #else
32 #include <process.h>			/* Prototype for getpid */
33 #endif
34 #endif
35 
36 static void setup_key_functions(MARIA_KEYDEF *keyinfo);
37 static my_bool maria_scan_init_dummy(MARIA_HA *info);
38 static void maria_scan_end_dummy(MARIA_HA *info);
39 static my_bool maria_once_init_dummy(MARIA_SHARE *, File);
40 static my_bool maria_once_end_dummy(MARIA_SHARE *);
41 static uchar *_ma_base_info_read(uchar *ptr, MARIA_BASE_INFO *base);
42 static uchar *_ma_state_info_read(uchar *, MARIA_STATE_INFO *, myf);
43 
44 #define get_next_element(to,pos,size) { memcpy((char*) to,pos,(size_t) size); \
45 					pos+=size;}
46 
47 
48 #define disk_pos_assert(share, pos, end_pos)     \
49 if (pos > end_pos)             \
50 {                              \
51   _ma_set_fatal_error(share, HA_ERR_CRASHED);    \
52   goto err;                    \
53 }
54 
55 
56 /******************************************************************************
57 ** Return the shared struct if the table is already open.
58 ** In MySQL the server will handle version issues.
59 ******************************************************************************/
60 
_ma_test_if_reopen(const char * filename)61 MARIA_HA *_ma_test_if_reopen(const char *filename)
62 {
63   LIST *pos;
64 
65   for (pos=maria_open_list ; pos ; pos=pos->next)
66   {
67     MARIA_HA *info=(MARIA_HA*) pos->data;
68     MARIA_SHARE *share= info->s;
69     if (!strcmp(share->unique_file_name.str,filename) && share->last_version)
70       return info;
71   }
72   return 0;
73 }
74 
75 
76 /*
77   Open a new instance of an already opened Maria table
78 
79   SYNOPSIS
80     maria_clone_internal()
81     share	Share of already open table
82     mode	Mode of table (O_RDONLY | O_RDWR)
83     data_file   Filedescriptor of data file to use < 0 if one should open
84 	        open it.
85     internal_table <> 0 if this is an internal temporary table
86 
87  RETURN
88     #   Maria handler
89     0   Error
90 */
91 
92 
maria_clone_internal(MARIA_SHARE * share,int mode,File data_file,uint internal_table)93 static MARIA_HA *maria_clone_internal(MARIA_SHARE *share,
94                                       int mode, File data_file,
95                                       uint internal_table)
96 {
97   int save_errno;
98   uint errpos;
99   MARIA_HA info,*m_info;
100   my_bitmap_map *changed_fields_bitmap;
101   myf flag= MY_WME | (share->temporary ? MY_THREAD_SPECIFIC : 0);
102   DBUG_ENTER("maria_clone_internal");
103 
104   errpos= 0;
105   bzero((uchar*) &info,sizeof(info));
106 
107   if (mode == O_RDWR && share->mode == O_RDONLY)
108   {
109     my_errno=EACCES;				/* Can't open in write mode */
110     goto err;
111   }
112   if (data_file >= 0)
113     info.dfile.file= data_file;
114   else if (_ma_open_datafile(&info, share))
115     goto err;
116   errpos= 5;
117 
118   /* alloc and set up private structure parts */
119   if (!my_multi_malloc(flag,
120 		       &m_info,sizeof(MARIA_HA),
121 		       &info.blobs,sizeof(MARIA_BLOB)*share->base.blobs,
122 		       &info.buff,(share->base.max_key_block_length*2+
123 				   share->base.max_key_length),
124 		       &info.lastkey_buff,share->base.max_key_length*2+1,
125 		       &info.first_mbr_key, share->base.max_key_length,
126 		       &info.maria_rtree_recursion_state,
127                        share->have_rtree ? 1024 : 0,
128                        &changed_fields_bitmap,
129                        bitmap_buffer_size(share->base.fields),
130 		       NullS))
131     goto err;
132   errpos= 6;
133 
134   memcpy(info.blobs,share->blobs,sizeof(MARIA_BLOB)*share->base.blobs);
135   info.lastkey_buff2= info.lastkey_buff + share->base.max_key_length;
136   info.last_key.data= info.lastkey_buff;
137 
138   info.s=share;
139   info.cur_row.lastpos= HA_OFFSET_ERROR;
140   /* Impossible first index to force initialization in _ma_check_index() */
141   info.lastinx= ~0;
142   info.update= (short) (HA_STATE_NEXT_FOUND+HA_STATE_PREV_FOUND);
143   info.opt_flag=READ_CHECK_USED;
144   info.this_unique= (ulong) info.dfile.file; /* Uniq number in process */
145 #ifdef MARIA_EXTERNAL_LOCKING
146   if (share->data_file_type == COMPRESSED_RECORD)
147     info.this_unique= share->state.unique;
148   info.this_loop=0;				/* Update counter */
149   info.last_unique= share->state.unique;
150   info.last_loop=   share->state.update_count;
151 #endif
152   info.errkey= -1;
153   info.page_changed=1;
154   info.keyread_buff= info.buff + share->base.max_key_block_length;
155 
156   info.lock_type= F_UNLCK;
157   if (share->options & HA_OPTION_TMP_TABLE)
158     info.lock_type= F_WRLCK;
159 
160   _ma_set_data_pagecache_callbacks(&info.dfile, share);
161   my_bitmap_init(&info.changed_fields, changed_fields_bitmap,
162                  share->base.fields, 0);
163   if ((*share->init)(&info))
164     goto err;
165 
166   /* The following should be big enough for all pinning purposes */
167   if (my_init_dynamic_array(&info.pinned_pages, sizeof(MARIA_PINNED_PAGE),
168                             MY_MAX(share->base.blobs*2 + 4,
169                             MARIA_MAX_TREE_LEVELS*3), 16, flag))
170     goto err;
171 
172 
173   mysql_mutex_lock(&share->intern_lock);
174   info.read_record= share->read_record;
175   share->reopen++;
176   share->write_flag=MYF(MY_NABP | MY_WAIT_IF_FULL);
177   if (share->options & HA_OPTION_READ_ONLY_DATA)
178   {
179     info.lock_type=F_RDLCK;
180     share->r_locks++;
181     share->tot_locks++;
182   }
183   if ((share->options & HA_OPTION_DELAY_KEY_WRITE) &&
184       maria_delay_key_write)
185     share->delay_key_write=1;
186 
187   if (!share->base.born_transactional)   /* For transactional ones ... */
188   {
189     /* ... force crash if no trn given */
190     _ma_set_tmp_trn_for_table(&info, &dummy_transaction_object);
191     info.state= &share->state.state;	/* Change global values by default */
192   }
193   else
194   {
195     info.state=  &share->state.common;
196     *info.state= share->state.state;            /* Initial values */
197   }
198   info.state_start= info.state;                 /* Initial values */
199 
200   mysql_mutex_unlock(&share->intern_lock);
201 
202   /* Allocate buffer for one record */
203   /* prerequisites: info->rec_buffer == 0 && info->rec_buff_size == 0 */
204   if (_ma_alloc_buffer(&info.rec_buff, &info.rec_buff_size,
205                        share->base.default_rec_buff_size, flag))
206     goto err;
207 
208   bzero(info.rec_buff, share->base.default_rec_buff_size);
209 
210   *m_info=info;
211   thr_lock_data_init(&share->lock,&m_info->lock,(void*) m_info);
212 
213   if (share->options & HA_OPTION_TMP_TABLE)
214     m_info->lock.type= TL_WRITE;
215 
216   if (!internal_table)
217   {
218     m_info->open_list.data= m_info->share_list.data= (void*) m_info;
219     maria_open_list=  list_add(maria_open_list,  &m_info->open_list);
220     share->open_list= list_add(share->open_list, &m_info->share_list);
221   }
222   else
223   {
224     /* We don't need to mark internal temporary tables as changed on disk */
225     share->internal_table= 1;
226     share->global_changed= 1;
227   }
228   DBUG_RETURN(m_info);
229 
230 err:
231   DBUG_PRINT("error", ("error: %d", my_errno));
232   save_errno=my_errno ? my_errno : HA_ERR_END_OF_FILE;
233   if ((save_errno == HA_ERR_CRASHED) ||
234       (save_errno == HA_ERR_CRASHED_ON_USAGE) ||
235       (save_errno == HA_ERR_CRASHED_ON_REPAIR))
236     _ma_report_error(save_errno, &share->open_file_name);
237   switch (errpos) {
238   case 6:
239     (*share->end)(&info);
240     delete_dynamic(&info.pinned_pages);
241     my_free(m_info);
242     /* fall through */
243   case 5:
244     if (data_file < 0)
245       mysql_file_close(info.dfile.file, MYF(0));
246     break;
247   }
248   my_errno=save_errno;
249   DBUG_RETURN (NULL);
250 } /* maria_clone_internal */
251 
252 
253 /******************************************************************************
254   open a MARIA table
255 
256   See my_base.h for the handle_locking argument
257   if handle_locking and HA_OPEN_ABORT_IF_CRASHED then abort if the table
258   is marked crashed or if we are not using locking and the table doesn't
259   have an open count of 0.
260 ******************************************************************************/
261 
maria_open(const char * name,int mode,uint open_flags)262 MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
263 {
264   int kfile,open_mode,save_errno;
265   uint i,j,len,errpos,head_length,base_pos,keys, realpath_err,
266     key_parts,base_key_parts,unique_key_parts,fulltext_keys,uniques;
267   uint internal_table= MY_TEST(open_flags & HA_OPEN_INTERNAL_TABLE);
268   myf common_flag= open_flags & HA_OPEN_TMP_TABLE ? MY_THREAD_SPECIFIC : 0;
269   uint file_version;
270   size_t info_length;
271   char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN],
272        data_name[FN_REFLEN];
273   uchar *UNINIT_VAR(disk_cache), *disk_pos, *end_pos;
274   MARIA_HA info, *UNINIT_VAR(m_info), *old_info;
275   MARIA_SHARE share_buff,*share;
276   double *rec_per_key_part;
277   ulong  *nulls_per_key_part;
278   my_off_t key_root[HA_MAX_POSSIBLE_KEY];
279   ulonglong max_key_file_length, max_data_file_length;
280   my_bool versioning= 1;
281   File data_file= -1;
282   DBUG_ENTER("maria_open");
283 
284   kfile= -1;
285   errpos= 0;
286   head_length=sizeof(share_buff.state.header);
287   bzero((uchar*) &info,sizeof(info));
288 
289   realpath_err= my_realpath(name_buff, fn_format(org_name, name, "",
290                                                  MARIA_NAME_IEXT,
291                                                  MY_UNPACK_FILENAME),MYF(0));
292   if (realpath_err > 0) /* File not found, no point in looking further. */
293   {
294     DBUG_RETURN(NULL);
295   }
296 
297   if (my_is_symlink(org_name) &&
298       (realpath_err || mysys_test_invalid_symlink(name_buff)))
299   {
300     my_errno= HA_WRONG_CREATE_OPTION;
301     DBUG_RETURN(0);
302   }
303 
304   old_info= 0;
305   if (!internal_table)
306     mysql_mutex_lock(&THR_LOCK_maria);
307   if ((open_flags & HA_OPEN_COPY) ||
308       (internal_table || !(old_info=_ma_test_if_reopen(name_buff))))
309   {
310     share= &share_buff;
311     bzero((uchar*) &share_buff,sizeof(share_buff));
312     share_buff.state.key_root=key_root;
313     share_buff.pagecache= multi_pagecache_search((uchar*) name_buff,
314 						 (uint) strlen(name_buff),
315                                                  maria_pagecache);
316 
317     DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_open",
318                     if (strstr(name, "/t1"))
319                     {
320                       my_errno= HA_ERR_CRASHED;
321                       goto err;
322                     });
323     DEBUG_SYNC_C("mi_open_kfile");
324     if ((kfile=mysql_file_open(key_file_kfile, name_buff,
325                                (open_mode=O_RDWR) | O_SHARE | O_NOFOLLOW | O_CLOEXEC,
326                                MYF(common_flag | MY_NOSYMLINKS))) < 0)
327     {
328       if ((errno != EROFS && errno != EACCES) ||
329 	  mode != O_RDONLY ||
330 	  (kfile=mysql_file_open(key_file_kfile, name_buff,
331                                  (open_mode=O_RDONLY) | O_SHARE | O_NOFOLLOW | O_CLOEXEC,
332                                  MYF(common_flag | MY_NOSYMLINKS))) < 0)
333 	goto err;
334     }
335     share->mode=open_mode;
336     errpos= 1;
337     if (mysql_file_pread(kfile,share->state.header.file_version, head_length,
338                          0, MYF(MY_NABP)))
339     {
340       my_errno= HA_ERR_NOT_A_TABLE;
341       goto err;
342     }
343     if (memcmp(share->state.header.file_version, maria_file_magic, 4))
344     {
345       DBUG_PRINT("error",("Wrong header in %s",name_buff));
346       DBUG_DUMP("error_dump", share->state.header.file_version,
347 		head_length);
348       my_errno=HA_ERR_NOT_A_TABLE;
349       goto err;
350     }
351     share->options= mi_uint2korr(share->state.header.options);
352     if (share->options &
353 	~(HA_OPTION_PACK_RECORD | HA_OPTION_PACK_KEYS |
354 	  HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA |
355 	  HA_OPTION_TEMP_COMPRESS_RECORD | HA_OPTION_CHECKSUM |
356           HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE |
357           HA_OPTION_RELIES_ON_SQL_LAYER | HA_OPTION_NULL_FIELDS |
358           HA_OPTION_PAGE_CHECKSUM))
359     {
360       DBUG_PRINT("error",("wrong options: 0x%lx", share->options));
361       my_errno=HA_ERR_NEW_FILE;
362       goto err;
363     }
364     if ((share->options & HA_OPTION_RELIES_ON_SQL_LAYER) &&
365         ! (open_flags & HA_OPEN_FROM_SQL_LAYER))
366     {
367       DBUG_PRINT("error", ("table cannot be opened from non-sql layer"));
368       my_errno= HA_ERR_UNSUPPORTED;
369       goto err;
370     }
371     /* Don't call realpath() if the name can't be a link */
372     if (!strcmp(name_buff, org_name) ||
373         my_readlink(index_name, org_name, MYF(0)) == -1)
374       (void) strmov(index_name, org_name);
375     *strrchr(org_name, FN_EXTCHAR)= '\0';
376     (void) fn_format(data_name,org_name,"",MARIA_NAME_DEXT,
377                      MY_APPEND_EXT|MY_UNPACK_FILENAME);
378     if (my_is_symlink(data_name))
379     {
380       if (my_realpath(data_name, data_name, MYF(0)))
381         goto err;
382       if (mysys_test_invalid_symlink(data_name))
383       {
384         my_errno= HA_WRONG_CREATE_OPTION;
385         goto err;
386       }
387       share->mode|= O_NOFOLLOW; /* all symlinks are resolved by realpath() */
388     }
389 
390     info_length=mi_uint2korr(share->state.header.header_length);
391     base_pos= mi_uint2korr(share->state.header.base_pos);
392 
393     /*
394       Allocate space for header information and for data that is too
395       big to keep on stack
396     */
397     if (!(disk_cache= my_malloc(info_length+128, MYF(MY_WME | common_flag))))
398     {
399       my_errno=ENOMEM;
400       goto err;
401     }
402 
403     end_pos=disk_cache+info_length;
404     errpos= 3;
405     if (mysql_file_pread(kfile, disk_cache, info_length, 0L, MYF(MY_NABP)))
406     {
407       _ma_set_fatal_error(share, HA_ERR_CRASHED);
408       goto err;
409     }
410     len=mi_uint2korr(share->state.header.state_info_length);
411     keys=    (uint) share->state.header.keys;
412     uniques= (uint) share->state.header.uniques;
413     fulltext_keys= (uint) share->state.header.fulltext_keys;
414     base_key_parts= key_parts= mi_uint2korr(share->state.header.key_parts);
415     unique_key_parts= mi_uint2korr(share->state.header.unique_key_parts);
416     if (len != MARIA_STATE_INFO_SIZE)
417     {
418       DBUG_PRINT("warning",
419 		 ("saved_state_info_length: %d  state_info_length: %d",
420 		  len,MARIA_STATE_INFO_SIZE));
421     }
422     share->state_diff_length=len-MARIA_STATE_INFO_SIZE;
423 
424     if (!_ma_state_info_read(disk_cache, &share->state, common_flag))
425       goto err;
426     len= mi_uint2korr(share->state.header.base_info_length);
427     if (len != MARIA_BASE_INFO_SIZE)
428     {
429       DBUG_PRINT("warning",("saved_base_info_length: %d  base_info_length: %d",
430 			    len,MARIA_BASE_INFO_SIZE));
431     }
432     disk_pos= _ma_base_info_read(disk_cache + base_pos, &share->base);
433     /*
434       Check if old version of Aria file. Version 0 has language
435       stored in header.not_used
436     */
437     file_version= (share->state.header.not_used == 0);
438     if (file_version == 0)
439       share->base.language= share->state.header.not_used;
440 
441     share->state.state_length=base_pos;
442     /* For newly opened tables we reset the error-has-been-printed flag */
443     share->state.changed&= ~STATE_CRASHED_PRINTED;
444     share->state.org_changed= share->state.changed;
445 
446     if (!(open_flags & HA_OPEN_FOR_REPAIR) &&
447 	((share->state.changed & STATE_CRASHED_FLAGS) ||
448 	 ((open_flags & HA_OPEN_ABORT_IF_CRASHED) &&
449 	  (my_disable_locking && share->state.open_count))))
450     {
451       DBUG_PRINT("error",("Table is marked as crashed. open_flags: %u  "
452                           "changed: %u  open_count: %u  !locking: %d",
453                           open_flags, share->state.changed,
454                           share->state.open_count, my_disable_locking));
455       my_errno=((share->state.changed & STATE_CRASHED_ON_REPAIR) ?
456 		HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE);
457       goto err;
458     }
459     if (share->state.open_count)
460       share->open_count_not_zero_on_open= 1;
461 
462     /*
463       A transactional table is not usable on this system if:
464       - share->state.create_trid > trnman_get_max_trid()
465         - Critical as trid as stored releative to create_trid.
466       - uuid is different
467 
468         STATE_NOT_MOVABLE is reset when a table is zerofilled
469         (has no LSN's and no trids)
470 
471       We can ignore testing uuid if STATE_NOT_MOVABLE is not set, as in this
472       case the uuid will be set in _ma_mark_file_changed().
473     */
474     if (share->base.born_transactional &&
475         ((share->state.create_trid > trnman_get_max_trid() &&
476          !maria_in_recovery) ||
477          ((share->state.changed & STATE_NOT_MOVABLE) &&
478           ((!(open_flags & HA_OPEN_IGNORE_MOVED_STATE) &&
479             memcmp(share->base.uuid, maria_uuid, MY_UUID_SIZE))))))
480     {
481       DBUG_PRINT("warning", ("table is moved from another system.  uuid_diff: %d  create_trid: %lu  max_trid: %lu",
482                             memcmp(share->base.uuid, maria_uuid,
483                                    MY_UUID_SIZE) != 0,
484                              (ulong) share->state.create_trid,
485                              (ulong) trnman_get_max_trid()));
486       if (open_flags & HA_OPEN_FOR_REPAIR)
487         share->state.changed|= STATE_MOVED;
488       else
489       {
490         my_errno= HA_ERR_OLD_FILE;
491         goto err;
492       }
493     }
494 
495     /* sanity check */
496     if (share->base.keystart > 65535 || share->base.rec_reflength > 8)
497     {
498       _ma_set_fatal_error(share, HA_ERR_CRASHED);
499       goto err;
500     }
501 
502     key_parts+=fulltext_keys*FT_SEGS;
503     if (share->base.max_key_length > _ma_max_key_length() ||
504         keys > MARIA_MAX_KEY || key_parts > MARIA_MAX_KEY * HA_MAX_KEY_SEG)
505     {
506       DBUG_PRINT("error",("Wrong key info:  Max_key_length: %d  keys: %d  key_parts: %d", share->base.max_key_length, keys, key_parts));
507       my_errno=HA_ERR_UNSUPPORTED;
508       goto err;
509     }
510 
511     /* Ensure we have space in the key buffer for transaction id's */
512     if (share->base.born_transactional)
513       share->base.max_key_length= ALIGN_SIZE(share->base.max_key_length +
514                                              MARIA_MAX_PACK_TRANSID_SIZE);
515 
516     /*
517       If page cache is not initialized, then assume we will create the
518       page_cache after the table is opened!
519       This is only used by maria_check to allow it to check/repair tables
520       with different block sizes.
521     */
522     if (share->base.block_size != maria_block_size &&
523         share_buff.pagecache->inited != 0)
524     {
525       DBUG_PRINT("error", ("Wrong block size %u; Expected %u",
526                            (uint) share->base.block_size,
527                            (uint) maria_block_size));
528       my_errno=HA_ERR_UNSUPPORTED;
529       my_printf_error(my_errno, "Wrong block size %u; Expected %u",
530                       MYF(0),
531                       (uint) share->base.block_size,
532                       (uint) maria_block_size);
533       goto err;
534     }
535 
536     /* Correct max_file_length based on length of sizeof(off_t) */
537     max_data_file_length=
538       (share->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ?
539       (((ulonglong) 1 << (share->base.rec_reflength*8))-1) :
540       (_ma_safe_mul(share->base.pack_reclength,
541 		   (ulonglong) 1 << (share->base.rec_reflength*8))-1);
542 
543     max_key_file_length=
544       _ma_safe_mul(share->base.block_size,
545 		  ((ulonglong) 1 << (share->base.key_reflength*8))-1);
546 #if SIZEOF_OFF_T == 4
547     set_if_smaller(max_data_file_length, INT_MAX32);
548     set_if_smaller(max_key_file_length, INT_MAX32);
549 #endif
550     /* For internal temporary tables, max_data_file_length is already set */
551     if (!internal_table || !share->base.max_data_file_length)
552       share->base.max_data_file_length=(my_off_t) max_data_file_length;
553     DBUG_ASSERT(share->base.max_data_file_length);
554     share->base.max_key_file_length=(my_off_t) max_key_file_length;
555 
556     if (share->options & HA_OPTION_COMPRESS_RECORD)
557       share->base.max_key_length+=2;	/* For safety */
558     /* Add space for node pointer */
559     share->base.max_key_length+= share->base.key_reflength;
560 
561     share->unique_file_name.length= strlen(name_buff);
562     share->index_file_name.length=  strlen(index_name);
563     share->data_file_name.length=   strlen(data_name);
564     share->open_file_name.length=   strlen(name);
565     if (!my_multi_malloc(MYF(MY_WME | common_flag),
566 			 &share,sizeof(*share),
567 			 &rec_per_key_part, sizeof(double) * key_parts,
568                          &nulls_per_key_part, sizeof(long)* key_parts,
569 			 &share->keyinfo,keys*sizeof(MARIA_KEYDEF),
570 			 &share->uniqueinfo,uniques*sizeof(MARIA_UNIQUEDEF),
571 			 &share->keyparts,
572 			 (key_parts+unique_key_parts+keys+uniques) *
573 			 sizeof(HA_KEYSEG),
574 			 &share->columndef,
575 			 (share->base.fields+1)*sizeof(MARIA_COLUMNDEF),
576                          &share->column_nr, share->base.fields*sizeof(uint16),
577 			 &share->blobs,sizeof(MARIA_BLOB)*share->base.blobs,
578 			 &share->unique_file_name.str,
579 			 share->unique_file_name.length+1,
580 			 &share->index_file_name.str,
581                          share->index_file_name.length+1,
582 			 &share->data_file_name.str,
583                          share->data_file_name.length+1,
584                          &share->open_file_name.str,
585                          share->open_file_name.length+1,
586 			 &share->state.key_root,keys*sizeof(my_off_t),
587 			 &share->mmap_lock,sizeof(mysql_rwlock_t),
588 			 NullS))
589       goto err;
590     errpos= 4;
591 
592     *share= share_buff;
593     share->state.rec_per_key_part=   rec_per_key_part;
594     share->state.nulls_per_key_part= nulls_per_key_part;
595 
596     memcpy((char*) rec_per_key_part,
597 	   (char*) share_buff.state.rec_per_key_part,
598            sizeof(double)*base_key_parts);
599     memcpy((char*) nulls_per_key_part,
600 	   (char*) share_buff.state.nulls_per_key_part,
601            sizeof(long)*base_key_parts);
602     memcpy((char*) share->state.key_root,
603 	   (char*) key_root, sizeof(my_off_t)*keys);
604     strmov(share->unique_file_name.str, name_buff);
605     strmov(share->index_file_name.str, index_name);
606     strmov(share->data_file_name.str,  data_name);
607     strmov(share->open_file_name.str,  name);
608 
609     share->block_size= share->base.block_size;   /* Convenience */
610     share->max_index_block_size= share->block_size - KEYPAGE_CHECKSUM_SIZE;
611     share->keypage_header= ((share->base.born_transactional ?
612                              LSN_STORE_SIZE + TRANSID_SIZE :
613                              0) + KEYPAGE_KEYID_SIZE + KEYPAGE_FLAG_SIZE +
614                             KEYPAGE_USED_SIZE);
615 
616     if (MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_ENCRYPTED))
617     {
618       share->keypage_header+= ma_crypt_get_index_page_header_space(share);
619     }
620 
621     {
622       HA_KEYSEG *pos=share->keyparts;
623       uint32 ftkey_nr= 1;
624       for (i=0 ; i < keys ; i++)
625       {
626         MARIA_KEYDEF *keyinfo= &share->keyinfo[i];
627         keyinfo->share= share;
628 	disk_pos=_ma_keydef_read(disk_pos, keyinfo);
629         keyinfo->key_nr= i;
630 
631         /* See ma_delete.cc::underflow() */
632         if (!(keyinfo->flag & (HA_BINARY_PACK_KEY | HA_PACK_KEY)))
633           keyinfo->underflow_block_length= keyinfo->block_length/3;
634         else
635         {
636           /* Packed key, ensure we don't get overflow in underflow() */
637           keyinfo->underflow_block_length=
638             MY_MAX((int) (share->max_index_block_size - keyinfo->maxlength * 3),
639                 (int) (share->keypage_header + share->base.key_reflength));
640           set_if_smaller(keyinfo->underflow_block_length,
641                          keyinfo->block_length/3);
642         }
643 
644         disk_pos_assert(share,
645                         disk_pos + keyinfo->keysegs * HA_KEYSEG_SIZE,
646  			end_pos);
647         if (keyinfo->key_alg == HA_KEY_ALG_RTREE)
648           share->have_rtree= 1;
649 	keyinfo->seg=pos;
650 	for (j=0 ; j < keyinfo->keysegs; j++,pos++)
651 	{
652 	  disk_pos=_ma_keyseg_read(disk_pos, pos);
653 	  if (pos->type == HA_KEYTYPE_TEXT ||
654               pos->type == HA_KEYTYPE_VARTEXT1 ||
655               pos->type == HA_KEYTYPE_VARTEXT2)
656 	  {
657 	    if (!pos->language)
658 	      pos->charset=default_charset_info;
659 	    else if (!(pos->charset= get_charset(pos->language, MYF(MY_WME))))
660 	    {
661 	      my_errno=HA_ERR_UNKNOWN_CHARSET;
662 	      goto err;
663 	    }
664 	  }
665 	  else if (pos->type == HA_KEYTYPE_BINARY)
666 	    pos->charset= &my_charset_bin;
667 	}
668 	if (keyinfo->flag & HA_SPATIAL)
669 	{
670 #ifdef HAVE_SPATIAL
671 	  uint sp_segs=SPDIMS*2;
672 	  keyinfo->seg=pos-sp_segs;
673 	  keyinfo->keysegs--;
674           versioning= 0;
675 #else
676 	  my_errno=HA_ERR_UNSUPPORTED;
677 	  goto err;
678 #endif
679 	}
680         else if (keyinfo->flag & HA_FULLTEXT)
681 	{
682           versioning= 0;
683           DBUG_ASSERT(fulltext_keys);
684           {
685             uint k;
686             keyinfo->seg=pos;
687             for (k=0; k < FT_SEGS; k++)
688             {
689               *pos= ft_keysegs[k];
690               pos[0].language= pos[-1].language;
691               if (!(pos[0].charset= pos[-1].charset))
692               {
693                 _ma_set_fatal_error(share, HA_ERR_CRASHED);
694                 goto err;
695               }
696               pos++;
697             }
698           }
699           if (!share->ft2_keyinfo.seg)
700           {
701             memcpy(&share->ft2_keyinfo, keyinfo, sizeof(MARIA_KEYDEF));
702             share->ft2_keyinfo.keysegs=1;
703             share->ft2_keyinfo.flag=0;
704             share->ft2_keyinfo.keylength=
705             share->ft2_keyinfo.minlength=
706             share->ft2_keyinfo.maxlength=HA_FT_WLEN+share->base.rec_reflength;
707             share->ft2_keyinfo.seg=pos-1;
708             share->ft2_keyinfo.end=pos;
709             setup_key_functions(& share->ft2_keyinfo);
710           }
711           keyinfo->ftkey_nr= ftkey_nr++;
712 	}
713         setup_key_functions(keyinfo);
714 	keyinfo->end=pos;
715 	pos->type=HA_KEYTYPE_END;			/* End */
716 	pos->length=share->base.rec_reflength;
717 	pos->null_bit=0;
718 	pos->flag=0;					/* For purify */
719 	pos++;
720       }
721       for (i=0 ; i < uniques ; i++)
722       {
723 	disk_pos=_ma_uniquedef_read(disk_pos, &share->uniqueinfo[i]);
724         disk_pos_assert(share,
725                         disk_pos + share->uniqueinfo[i].keysegs *
726 			HA_KEYSEG_SIZE, end_pos);
727 	share->uniqueinfo[i].seg=pos;
728 	for (j=0 ; j < share->uniqueinfo[i].keysegs; j++,pos++)
729 	{
730 	  disk_pos=_ma_keyseg_read(disk_pos, pos);
731 	  if (pos->type == HA_KEYTYPE_TEXT ||
732               pos->type == HA_KEYTYPE_VARTEXT1 ||
733               pos->type == HA_KEYTYPE_VARTEXT2)
734 	  {
735 	    if (!pos->language)
736 	      pos->charset=default_charset_info;
737 	    else if (!(pos->charset= get_charset(pos->language, MYF(MY_WME))))
738 	    {
739 	      my_errno=HA_ERR_UNKNOWN_CHARSET;
740 	      goto err;
741 	    }
742 	  }
743 	}
744 	share->uniqueinfo[i].end=pos;
745 	pos->type=HA_KEYTYPE_END;			/* End */
746 	pos->null_bit=0;
747 	pos->flag=0;
748 	pos++;
749       }
750       share->ftkeys= ftkey_nr;
751     }
752     share->data_file_type= share->state.header.data_file_type;
753     share->base_length= (BASE_ROW_HEADER_SIZE +
754                          share->base.is_nulls_extended +
755                          share->base.null_bytes +
756                          share->base.pack_bytes +
757                          MY_TEST(share->options & HA_OPTION_CHECKSUM));
758     share->kfile.file= kfile;
759 
760     if (open_flags & HA_OPEN_COPY)
761     {
762       /*
763         this instance will be a temporary one used just to create a data
764         file for REPAIR. Don't do logging. This base information will not go
765         to disk.
766       */
767       share->base.born_transactional= FALSE;
768     }
769     if (share->base.born_transactional)
770     {
771       share->page_type= PAGECACHE_LSN_PAGE;
772       if (share->state.create_rename_lsn == LSN_NEEDS_NEW_STATE_LSNS)
773       {
774         /*
775           Was repaired with maria_chk, maybe later maria_pack-ed. Some sort of
776           import into the server. It starts its existence (from the point of
777           view of the server, including server's recovery) now.
778         */
779         if (((open_flags & HA_OPEN_FROM_SQL_LAYER) &&
780              (share->state.changed & STATE_NOT_MOVABLE)) || maria_in_recovery)
781           _ma_update_state_lsns_sub(share, LSN_IMPOSSIBLE,
782                                     trnman_get_min_safe_trid(), TRUE, TRUE);
783       }
784       else if ((!LSN_VALID(share->state.create_rename_lsn) ||
785                 !LSN_VALID(share->state.is_of_horizon) ||
786                 (cmp_translog_addr(share->state.create_rename_lsn,
787                                    share->state.is_of_horizon) > 0) ||
788                 !LSN_VALID(share->state.skip_redo_lsn) ||
789                 (cmp_translog_addr(share->state.create_rename_lsn,
790                                    share->state.skip_redo_lsn) > 0)))
791       {
792         if (!(open_flags & HA_OPEN_FOR_REPAIR))
793         {
794           /*
795             If in Recovery, it will not work. If LSN is invalid and not
796             LSN_NEEDS_NEW_STATE_LSNS, header must be corrupted.
797             In both cases, must repair.
798           */
799           my_errno=((share->state.changed & STATE_CRASHED_ON_REPAIR) ?
800                     HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE);
801           goto err;
802         }
803         else
804         {
805           /*
806             Open in repair mode. Ensure that we mark the table crashed, so
807             that we run auto_repair on it
808           */
809           maria_mark_crashed_share(share);
810         }
811       }
812       else if (!(open_flags & HA_OPEN_FOR_REPAIR))
813       {
814         /* create_rename_lsn != LSN_NEEDS_NEW_STATE_LSNS */
815         share->state.changed|= STATE_NOT_MOVABLE;
816       }
817     }
818     else
819       share->page_type= PAGECACHE_PLAIN_PAGE;
820     share->now_transactional= share->base.born_transactional;
821 
822     /* Use pack_reclength as we don't want to modify base.pack_recklength */
823     if (share->state.header.org_data_file_type == DYNAMIC_RECORD)
824     {
825       /* add bits used to pack data to pack_reclength for faster allocation */
826       share->base.pack_reclength+= share->base.pack_bytes;
827       share->base.extra_rec_buff_size=
828         (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER) + MARIA_SPLIT_LENGTH +
829          MARIA_REC_BUFF_OFFSET);
830     }
831     if (share->data_file_type == COMPRESSED_RECORD)
832     {
833       /* Need some extra bytes for decode_bytes */
834       share->base.extra_rec_buff_size+= 7;
835     }
836     share->base.default_rec_buff_size= MY_MAX(share->base.pack_reclength +
837                                            share->base.extra_rec_buff_size,
838                                            share->base.max_key_length);
839 
840     disk_pos_assert(share,
841                     disk_pos + share->base.fields *MARIA_COLUMNDEF_SIZE,
842                     end_pos);
843     for (i= j= 0 ; i < share->base.fields ; i++)
844     {
845       disk_pos=_ma_columndef_read(disk_pos,&share->columndef[i]);
846       share->columndef[i].pack_type=0;
847       share->columndef[i].huff_tree=0;
848       if (share->columndef[i].type == FIELD_BLOB)
849       {
850 	share->blobs[j].pack_length=
851 	  share->columndef[i].length-portable_sizeof_char_ptr;
852 	share->blobs[j].offset= share->columndef[i].offset;
853 	j++;
854       }
855       if (share->columndef[i].type == FIELD_VARCHAR)
856         share->has_varchar_fields= 1;
857       if (share->columndef[i].null_bit)
858         share->has_null_fields= 1;
859     }
860     share->columndef[i].type= FIELD_LAST;	/* End marker */
861     disk_pos= _ma_column_nr_read(disk_pos, share->column_nr,
862                                  share->base.fields);
863 
864     if (MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_ENCRYPTED))
865     {
866       if (!(disk_pos= ma_crypt_read(share, disk_pos)))
867         goto err;
868     }
869 
870     if ((share->data_file_type == BLOCK_RECORD ||
871          share->data_file_type == COMPRESSED_RECORD))
872     {
873       if (_ma_open_datafile(&info, share))
874         goto err;
875       data_file= info.dfile.file;
876     }
877     errpos= 5;
878 
879     if (open_flags & HA_OPEN_DELAY_KEY_WRITE)
880       share->options|= HA_OPTION_DELAY_KEY_WRITE;
881     if (mode == O_RDONLY)
882       share->options|= HA_OPTION_READ_ONLY_DATA;
883     share->is_log_table= FALSE;
884 
885     if (open_flags & HA_OPEN_TMP_TABLE || share->options & HA_OPTION_TMP_TABLE)
886     {
887       common_flag|= MY_THREAD_SPECIFIC;
888       share->options|= HA_OPTION_TMP_TABLE;
889       share->temporary= share->delay_key_write= 1;
890       share->write_flag=MYF(MY_NABP);
891       share->w_locks++;			/* We don't have to update status */
892       share->tot_locks++;
893     }
894 
895     _ma_set_index_pagecache_callbacks(&share->kfile, share);
896     share->this_process=(ulong) getpid();
897 #ifdef MARIA_EXTERNAL_LOCKING
898     share->last_process= share->state.process;
899 #endif
900     share->base.key_parts=key_parts;
901     share->base.all_key_parts=key_parts+unique_key_parts;
902     if (!(share->last_version=share->state.version))
903       share->last_version=1;			/* Safety */
904     share->rec_reflength=share->base.rec_reflength; /* May be changed */
905     share->base.margin_key_file_length=(share->base.max_key_file_length -
906 					(keys ? MARIA_INDEX_BLOCK_MARGIN *
907 					 share->block_size * keys : 0));
908     my_free(disk_cache);
909     my_free(share_buff.state.rec_per_key_part);
910     disk_cache= 0;
911     share_buff.state.rec_per_key_part= 0;
912 
913     _ma_setup_functions(share);
914     max_data_file_length= share->base.max_data_file_length;
915     if ((*share->once_init)(share, info.dfile.file))
916       goto err;
917     if (internal_table)
918       set_if_smaller(share->base.max_data_file_length,
919                      max_data_file_length);
920     if (share->now_transactional)
921     {
922       /* Setup initial state that is visible for all */
923       MARIA_STATE_HISTORY_CLOSED *history;
924       if ((history= (MARIA_STATE_HISTORY_CLOSED *)
925            my_hash_search(&maria_stored_state,
926                        (uchar*) &share->state.create_rename_lsn, 0)))
927       {
928         /*
929           Move history from hash to share. This is safe to do as we
930           know we are the only one that is using the share.
931         */
932         share->state_history=
933           _ma_remove_not_visible_states(history->state_history, 0, 0);
934         history->state_history= 0;
935         (void) my_hash_delete(&maria_stored_state, (uchar*) history);
936         DBUG_PRINT("info", ("Reading state history.  trid: %lu  records: %lld",
937                             (ulong) share->state_history->trid,
938                             share->state_history->state.records));
939       }
940       else
941       {
942         /* Table is not part of any active transaction; Create new history */
943         if (!(share->state_history= (MARIA_STATE_HISTORY *)
944               my_malloc(sizeof(*share->state_history), MYF(MY_WME))))
945           goto err;
946         share->state_history->trid= 0;          /* Visible by all */
947         share->state_history->state= share->state.state;
948         share->state_history->next= 0;
949       }
950     }
951     thr_lock_init(&share->lock);
952     mysql_mutex_init(key_SHARE_intern_lock,
953                      &share->intern_lock, MY_MUTEX_INIT_FAST);
954     mysql_mutex_init(key_SHARE_key_del_lock,
955                      &share->key_del_lock, MY_MUTEX_INIT_FAST);
956     mysql_cond_init(key_SHARE_key_del_cond, &share->key_del_cond, 0);
957     mysql_mutex_init(key_SHARE_close_lock,
958                      &share->close_lock, MY_MUTEX_INIT_FAST);
959     for (i=0; i<keys; i++)
960       mysql_rwlock_init(key_KEYINFO_root_lock,
961                         &share->keyinfo[i].root_lock);
962     mysql_rwlock_init(key_SHARE_mmap_lock, &share->mmap_lock);
963 
964     share->row_is_visible=   _ma_row_visible_always;
965     share->lock.get_status=  _ma_reset_update_flag;
966     share->lock.start_trans= _ma_start_trans;
967 
968     if (!thr_lock_inited)
969     {
970       /* Probably a single threaded program; Don't use concurrent inserts */
971       maria_concurrent_insert=0;
972     }
973     else if (maria_concurrent_insert)
974     {
975       share->non_transactional_concurrent_insert=
976 	((share->options & (HA_OPTION_READ_ONLY_DATA | HA_OPTION_TMP_TABLE |
977                             HA_OPTION_COMPRESS_RECORD |
978                             HA_OPTION_TEMP_COMPRESS_RECORD)) ||
979 	 (open_flags & HA_OPEN_TMP_TABLE) ||
980          share->data_file_type == BLOCK_RECORD ||
981 	 share->have_rtree) ? 0 : 1;
982       if (share->non_transactional_concurrent_insert ||
983           (!share->temporary && share->now_transactional && versioning))
984       {
985         share->lock_key_trees= 1;
986         if (share->data_file_type == BLOCK_RECORD)
987         {
988           DBUG_ASSERT(share->now_transactional);
989           share->have_versioning= 1;
990           share->row_is_visible=     _ma_row_visible_transactional_table;
991           share->lock.get_status=    _ma_block_get_status;
992           share->lock.check_status=  _ma_block_check_status;
993           share->lock.start_trans=   _ma_block_start_trans;
994           /*
995             We can for the moment only allow multiple concurrent inserts
996             only if there is no auto-increment key.  To lift this restriction
997             we have to:
998             - Extend statement base replication to support auto-increment
999             intervalls.
1000             - Fix that we allocate auto-increment in intervals and that
1001               it's properly reset if the interval was not used
1002           */
1003           share->lock.allow_multiple_concurrent_insert=
1004             share->base.auto_key == 0;
1005           share->lock_restore_status= 0;
1006         }
1007         else
1008         {
1009           share->row_is_visible=      _ma_row_visible_non_transactional_table;
1010           share->lock.get_status=     _ma_get_status;
1011           share->lock.copy_status=    _ma_copy_status;
1012           share->lock.update_status=  _ma_update_status;
1013           share->lock.restore_status= _ma_restore_status;
1014           share->lock.check_status=   _ma_check_status;
1015           share->lock_restore_status= _ma_restore_status;
1016         }
1017       }
1018       else if (share->now_transactional)
1019       {
1020         DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
1021         share->lock.start_trans=    _ma_block_start_trans_no_versioning;
1022       }
1023     }
1024 #ifdef SAFE_MUTEX
1025     if (share->data_file_type == BLOCK_RECORD)
1026     {
1027       /*
1028         We must have internal_lock before bitmap_lock because we call
1029         _ma_flush_table_files() with internal_lock locked.
1030       */
1031       mysql_mutex_lock(&share->intern_lock);
1032       mysql_mutex_lock(&share->bitmap.bitmap_lock);
1033       mysql_mutex_unlock(&share->bitmap.bitmap_lock);
1034       mysql_mutex_unlock(&share->intern_lock);
1035     }
1036 #endif
1037     /*
1038       Memory mapping can only be requested after initializing intern_lock.
1039     */
1040     if (open_flags & HA_OPEN_MMAP)
1041     {
1042       info.s= share;
1043       maria_extra(&info, HA_EXTRA_MMAP, 0);
1044     }
1045   }
1046   else
1047   {
1048     share= old_info->s;
1049     if (share->data_file_type == BLOCK_RECORD)
1050       data_file= share->bitmap.file.file;       /* Only opened once */
1051   }
1052 
1053   if (!(m_info= maria_clone_internal(share, mode, data_file,
1054                                      internal_table)))
1055     goto err;
1056 
1057   if (maria_is_crashed(m_info))
1058     DBUG_PRINT("warning", ("table is crashed: changed: %u",
1059                            share->state.changed));
1060 
1061   if (!internal_table)
1062     mysql_mutex_unlock(&THR_LOCK_maria);
1063 
1064   m_info->open_flags= open_flags;
1065   DBUG_PRINT("exit", ("table: %p  name: %s",m_info, name));
1066   DBUG_RETURN(m_info);
1067 
1068 err:
1069   DBUG_PRINT("error", ("error: %d  errpos: %d", my_errno, errpos));
1070   save_errno=my_errno ? my_errno : HA_ERR_END_OF_FILE;
1071   if ((save_errno == HA_ERR_CRASHED) ||
1072       (save_errno == HA_ERR_CRASHED_ON_USAGE) ||
1073       (save_errno == HA_ERR_CRASHED_ON_REPAIR))
1074   {
1075     LEX_STRING tmp_name;
1076     tmp_name.str= (char*) name;
1077     tmp_name.length= strlen(name);
1078     _ma_report_error(save_errno, &tmp_name);
1079   }
1080   switch (errpos) {
1081   case 5:
1082     if (data_file >= 0)
1083       mysql_file_close(data_file, MYF(0));
1084     if (old_info)
1085       break;					/* Don't remove open table */
1086     (*share->once_end)(share);
1087     /* fall through */
1088   case 4:
1089     ma_crypt_free(share);
1090     my_free(share);
1091     /* fall through */
1092   case 3:
1093     my_free(disk_cache);
1094     my_free(share_buff.state.rec_per_key_part);
1095     /* fall through */
1096   case 1:
1097     mysql_file_close(kfile,MYF(0));
1098     /* fall through */
1099   case 0:
1100   default:
1101     break;
1102   }
1103   if (!internal_table)
1104     mysql_mutex_unlock(&THR_LOCK_maria);
1105   my_errno= save_errno;
1106   DBUG_RETURN (NULL);
1107 } /* maria_open */
1108 
1109 
1110 /*
1111   Reallocate a buffer, if the current buffer is not large enough
1112 */
1113 
_ma_alloc_buffer(uchar ** old_addr,size_t * old_size,size_t new_size,myf flag)1114 my_bool _ma_alloc_buffer(uchar **old_addr, size_t *old_size,
1115                          size_t new_size, myf flag)
1116 {
1117   if (*old_size < new_size)
1118   {
1119     uchar *addr;
1120     if (!(addr= (uchar*) my_realloc(*old_addr, new_size,
1121                                     MYF(MY_ALLOW_ZERO_PTR | flag))))
1122       return 1;
1123     *old_addr= addr;
1124     *old_size= new_size;
1125   }
1126   return 0;
1127 }
1128 
1129 
_ma_safe_mul(ulonglong a,ulonglong b)1130 ulonglong _ma_safe_mul(ulonglong a, ulonglong b)
1131 {
1132   ulonglong max_val= ~ (ulonglong) 0;		/* my_off_t is unsigned */
1133 
1134   if (!a || max_val / a < b)
1135     return max_val;
1136   return a*b;
1137 }
1138 
1139 	/* Set up functions in structs */
1140 
_ma_setup_functions(register MARIA_SHARE * share)1141 void _ma_setup_functions(register MARIA_SHARE *share)
1142 {
1143   share->once_init=          maria_once_init_dummy;
1144   share->once_end=           maria_once_end_dummy;
1145   share->init=      	     maria_scan_init_dummy;
1146   share->end=       	     maria_scan_end_dummy;
1147   share->scan_init=          maria_scan_init_dummy;/* Compat. dummy function */
1148   share->scan_end=           maria_scan_end_dummy;/* Compat. dummy function */
1149   share->scan_remember_pos=  _ma_def_scan_remember_pos;
1150   share->scan_restore_pos=   _ma_def_scan_restore_pos;
1151 
1152   share->write_record_init=  _ma_write_init_default;
1153   share->write_record_abort= _ma_write_abort_default;
1154   share->keypos_to_recpos=   _ma_transparent_recpos;
1155   share->recpos_to_keypos=   _ma_transparent_recpos;
1156 
1157   switch (share->data_file_type) {
1158   case COMPRESSED_RECORD:
1159     share->read_record= _ma_read_pack_record;
1160     share->scan= _ma_read_rnd_pack_record;
1161     share->once_init= _ma_once_init_pack_row;
1162     share->once_end=  _ma_once_end_pack_row;
1163     /*
1164       Calculate checksum according to data in the original, not compressed,
1165       row.
1166     */
1167     if (share->state.header.org_data_file_type == STATIC_RECORD &&
1168         ! (share->options & HA_OPTION_NULL_FIELDS))
1169       share->calc_checksum= _ma_static_checksum;
1170     else
1171       share->calc_checksum= _ma_checksum;
1172     share->calc_write_checksum= share->calc_checksum;
1173     break;
1174   case DYNAMIC_RECORD:
1175     share->read_record= _ma_read_dynamic_record;
1176     share->scan= _ma_read_rnd_dynamic_record;
1177     share->delete_record= _ma_delete_dynamic_record;
1178     share->compare_record= _ma_cmp_dynamic_record;
1179     share->compare_unique= _ma_cmp_dynamic_unique;
1180     share->calc_checksum= share->calc_write_checksum= _ma_checksum;
1181     if (share->base.blobs)
1182     {
1183       share->update_record= _ma_update_blob_record;
1184       share->write_record= _ma_write_blob_record;
1185     }
1186     else
1187     {
1188       share->write_record= _ma_write_dynamic_record;
1189       share->update_record= _ma_update_dynamic_record;
1190     }
1191     break;
1192   case STATIC_RECORD:
1193     share->read_record=      _ma_read_static_record;
1194     share->scan=             _ma_read_rnd_static_record;
1195     share->delete_record=    _ma_delete_static_record;
1196     share->compare_record=   _ma_cmp_static_record;
1197     share->update_record=    _ma_update_static_record;
1198     share->write_record=     _ma_write_static_record;
1199     share->compare_unique=   _ma_cmp_static_unique;
1200     share->keypos_to_recpos= _ma_static_keypos_to_recpos;
1201     share->recpos_to_keypos= _ma_static_recpos_to_keypos;
1202     if (share->state.header.org_data_file_type == STATIC_RECORD &&
1203         ! (share->options & HA_OPTION_NULL_FIELDS))
1204       share->calc_checksum= _ma_static_checksum;
1205     else
1206       share->calc_checksum= _ma_checksum;
1207     break;
1208   case NO_RECORD:
1209     share->read_record=      _ma_read_no_record;
1210     share->scan=             _ma_read_rnd_no_record;
1211     share->delete_record=    _ma_delete_no_record;
1212     share->update_record=    _ma_update_no_record;
1213     share->write_record=     _ma_write_no_record;
1214     share->recpos_to_keypos= _ma_no_keypos_to_recpos;
1215     share->keypos_to_recpos= _ma_no_keypos_to_recpos;
1216 
1217     /* Abort if following functions are called */
1218     share->compare_record=   0;
1219     share->compare_unique=   0;
1220     share->calc_checksum= 0;
1221     break;
1222   case BLOCK_RECORD:
1223     share->once_init= _ma_once_init_block_record;
1224     share->once_end=  _ma_once_end_block_record;
1225     share->init=      _ma_init_block_record;
1226     share->end=       _ma_end_block_record;
1227     share->write_record_init= _ma_write_init_block_record;
1228     share->write_record_abort= _ma_write_abort_block_record;
1229     share->scan_init=   _ma_scan_init_block_record;
1230     share->scan_end=    _ma_scan_end_block_record;
1231     share->scan=        _ma_scan_block_record;
1232     share->scan_remember_pos=  _ma_scan_remember_block_record;
1233     share->scan_restore_pos=   _ma_scan_restore_block_record;
1234     share->read_record= _ma_read_block_record;
1235     share->delete_record= _ma_delete_block_record;
1236     share->compare_record= _ma_compare_block_record;
1237     share->update_record= _ma_update_block_record;
1238     share->write_record=  _ma_write_block_record;
1239     share->compare_unique= _ma_cmp_block_unique;
1240     share->calc_checksum= _ma_checksum;
1241     share->keypos_to_recpos= _ma_transaction_keypos_to_recpos;
1242     share->recpos_to_keypos= _ma_transaction_recpos_to_keypos;
1243 
1244     /*
1245       write_block_record() will calculate the checksum; Tell maria_write()
1246       that it doesn't have to do this.
1247     */
1248     share->calc_write_checksum= 0;
1249     break;
1250   }
1251   share->file_read= _ma_nommap_pread;
1252   share->file_write= _ma_nommap_pwrite;
1253   share->calc_check_checksum= share->calc_checksum;
1254 
1255   if (!(share->options & HA_OPTION_CHECKSUM) &&
1256       share->data_file_type != COMPRESSED_RECORD)
1257     share->calc_checksum= share->calc_write_checksum= 0;
1258   return;
1259 }
1260 
1261 
setup_key_functions(register MARIA_KEYDEF * keyinfo)1262 static void setup_key_functions(register MARIA_KEYDEF *keyinfo)
1263 {
1264   if (keyinfo->key_alg == HA_KEY_ALG_RTREE)
1265   {
1266 #ifdef HAVE_RTREE_KEYS
1267     keyinfo->ck_insert = maria_rtree_insert;
1268     keyinfo->ck_delete = maria_rtree_delete;
1269 #else
1270     DBUG_ASSERT(0); /* maria_open should check it never happens */
1271 #endif
1272   }
1273   else
1274   {
1275     keyinfo->ck_insert = _ma_ck_write;
1276     keyinfo->ck_delete = _ma_ck_delete;
1277   }
1278   if (keyinfo->flag & HA_SPATIAL)
1279     keyinfo->make_key= _ma_sp_make_key;
1280   else
1281     keyinfo->make_key= _ma_make_key;
1282 
1283   if (keyinfo->flag & HA_BINARY_PACK_KEY)
1284   {						/* Simple prefix compression */
1285     keyinfo->bin_search= _ma_seq_search;
1286     keyinfo->get_key= _ma_get_binary_pack_key;
1287     keyinfo->skip_key= _ma_skip_binary_pack_key;
1288     keyinfo->pack_key= _ma_calc_bin_pack_key_length;
1289     keyinfo->store_key= _ma_store_bin_pack_key;
1290   }
1291   else if (keyinfo->flag & HA_VAR_LENGTH_KEY)
1292   {
1293     keyinfo->get_key=  _ma_get_pack_key;
1294     keyinfo->skip_key= _ma_skip_pack_key;
1295     if (keyinfo->seg[0].flag & HA_PACK_KEY)
1296     {						/* Prefix compression */
1297       /*
1298         _ma_prefix_search() compares end-space against ASCII blank (' ').
1299         It cannot be used for character sets, that do not encode the
1300         blank character like ASCII does. UCS2 is an example. All
1301         character sets with a fixed width > 1 or a mimimum width > 1
1302         cannot represent blank like ASCII does. In these cases we have
1303         to use _ma_seq_search() for the search.
1304       */
1305       if (!keyinfo->seg->charset || use_strnxfrm(keyinfo->seg->charset) ||
1306           (keyinfo->seg->flag & HA_NULL_PART) ||
1307           keyinfo->seg->charset->mbminlen > 1)
1308         keyinfo->bin_search= _ma_seq_search;
1309       else
1310         keyinfo->bin_search= _ma_prefix_search;
1311       keyinfo->pack_key= _ma_calc_var_pack_key_length;
1312       keyinfo->store_key= _ma_store_var_pack_key;
1313     }
1314     else
1315     {
1316       keyinfo->bin_search= _ma_seq_search;
1317       keyinfo->pack_key= _ma_calc_var_key_length; /* Variable length key */
1318       keyinfo->store_key= _ma_store_static_key;
1319     }
1320   }
1321   else
1322   {
1323     keyinfo->bin_search= _ma_bin_search;
1324     keyinfo->get_key= _ma_get_static_key;
1325     keyinfo->skip_key= _ma_skip_static_key;
1326     keyinfo->pack_key= _ma_calc_static_key_length;
1327     keyinfo->store_key= _ma_store_static_key;
1328   }
1329 
1330   /* set keyinfo->write_comp_flag */
1331   if (keyinfo->flag & HA_SORT_ALLOWS_SAME)
1332     keyinfo->write_comp_flag=SEARCH_BIGGER; /* Put after same key */
1333   else if (keyinfo->flag & ( HA_NOSAME | HA_FULLTEXT))
1334   {
1335     keyinfo->write_comp_flag= SEARCH_FIND | SEARCH_UPDATE; /* No duplicates */
1336     if (keyinfo->flag & HA_NULL_ARE_EQUAL)
1337       keyinfo->write_comp_flag|= SEARCH_NULL_ARE_EQUAL;
1338   }
1339   else
1340     keyinfo->write_comp_flag= SEARCH_SAME; /* Keys in rec-pos order */
1341   keyinfo->write_comp_flag|= SEARCH_INSERT;
1342   return;
1343 }
1344 
1345 
1346 /**
1347    @brief Function to save and store the header in the index file (.MAI)
1348 
1349    Operates under MARIA_SHARE::intern_lock if requested.
1350    Sets MARIA_SHARE::MARIA_STATE_INFO::is_of_horizon if transactional table.
1351    Then calls _ma_state_info_write_sub().
1352 
1353    @param  share           table
1354    @param  pWrite          bitmap: if 1 (MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET)
1355                            is set my_pwrite() is used otherwise my_write();
1356                            if 2 (MA_STATE_INFO_WRITE_FULL_INFO) is set, info
1357                            about keys is written (should only be needed
1358                            after ALTER TABLE ENABLE/DISABLE KEYS, and
1359                            REPAIR/OPTIMIZE); if 4 (MA_STATE_INFO_WRITE_LOCK)
1360                            is set, MARIA_SHARE::intern_lock is taken.
1361 
1362    @return Operation status
1363      @retval 0      OK
1364      @retval 1      Error
1365 */
1366 
_ma_state_info_write(MARIA_SHARE * share,uint pWrite)1367 uint _ma_state_info_write(MARIA_SHARE *share, uint pWrite)
1368 {
1369   uint res;
1370   if (share->options & HA_OPTION_READ_ONLY_DATA)
1371     return 0;
1372 
1373   if (pWrite & MA_STATE_INFO_WRITE_LOCK)
1374     mysql_mutex_lock(&share->intern_lock);
1375   else if (maria_multi_threaded && !share->temporary)
1376     mysql_mutex_assert_owner(&share->intern_lock);
1377   if (share->base.born_transactional && translog_status == TRANSLOG_OK &&
1378       !maria_in_recovery)
1379   {
1380     /*
1381       In a recovery, we want to set is_of_horizon to the LSN of the last
1382       record executed by Recovery, not the current EOF of the log (which
1383       is too new). Recovery does it by itself.
1384     */
1385     share->state.is_of_horizon= translog_get_horizon();
1386     DBUG_PRINT("info", ("is_of_horizon set to LSN " LSN_FMT "",
1387                         LSN_IN_PARTS(share->state.is_of_horizon)));
1388   }
1389   res= _ma_state_info_write_sub(share->kfile.file, &share->state, pWrite);
1390   if (pWrite & MA_STATE_INFO_WRITE_LOCK)
1391     mysql_mutex_unlock(&share->intern_lock);
1392   /* If open_count != 0 we have to write the state again at close */
1393   share->changed= share->state.open_count != 0;
1394   return res;
1395 }
1396 
1397 
1398 /**
1399    @brief Function to save and store the header in the index file (.MYI).
1400 
1401    Shortcut to use instead of _ma_state_info_write() when appropriate.
1402 
1403    @param  file            descriptor of the index file to write
1404    @param  state           state information to write to the file
1405    @param  pWrite          bitmap: if 1 (MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET)
1406                            is set my_pwrite() is used otherwise my_write();
1407                            if 2 (MA_STATE_INFO_WRITE_FULL_INFO) is set, info
1408                            about keys is written (should only be needed
1409                            after ALTER TABLE ENABLE/DISABLE KEYS, and
1410                            REPAIR/OPTIMIZE).
1411 
1412    @notes
1413      For transactional multiuser tables, this function is called
1414      with intern_lock & translog_lock or when the last thread who
1415      is using the table is closing it.
1416      Because of the translog_lock we don't need to have a lock on
1417      key_del_lock.
1418 
1419    @return Operation status
1420      @retval 0      OK
1421      @retval 1      Error
1422 */
1423 
_ma_state_info_write_sub(File file,MARIA_STATE_INFO * state,uint pWrite)1424 uint _ma_state_info_write_sub(File file, MARIA_STATE_INFO *state, uint pWrite)
1425 {
1426   uchar  buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE];
1427   uchar *ptr=buff;
1428   uint	i, keys= (uint) state->header.keys;
1429   size_t res;
1430   DBUG_ENTER("_ma_state_info_write_sub");
1431   DBUG_PRINT("info", ("Records: %lld", state->state.records));
1432 
1433   memcpy(ptr,&state->header,sizeof(state->header));
1434   ptr+=sizeof(state->header);
1435 
1436   /* open_count must be first because of _ma_mark_file_changed ! */
1437   mi_int2store(ptr,state->open_count);			ptr+= 2;
1438   /* changed must be second, because of _ma_mark_file_crashed */
1439   mi_int2store(ptr,state->changed);			ptr+= 2;
1440 
1441   /*
1442     If you change the offset of these LSNs, note that some functions do a
1443     direct write of them without going through this function.
1444   */
1445   lsn_store(ptr, state->create_rename_lsn);		ptr+= LSN_STORE_SIZE;
1446   lsn_store(ptr, state->is_of_horizon);			ptr+= LSN_STORE_SIZE;
1447   lsn_store(ptr, state->skip_redo_lsn);			ptr+= LSN_STORE_SIZE;
1448   mi_rowstore(ptr,state->state.records);		ptr+= 8;
1449   mi_rowstore(ptr,state->state.del);			ptr+= 8;
1450   mi_rowstore(ptr,state->split);			ptr+= 8;
1451   mi_sizestore(ptr,state->dellink);			ptr+= 8;
1452   mi_sizestore(ptr,state->first_bitmap_with_space);	ptr+= 8;
1453   mi_sizestore(ptr,state->state.key_file_length);	ptr+= 8;
1454   mi_sizestore(ptr,state->state.data_file_length);	ptr+= 8;
1455   mi_sizestore(ptr,state->state.empty);			ptr+= 8;
1456   mi_sizestore(ptr,state->state.key_empty);		ptr+= 8;
1457   mi_int8store(ptr,state->auto_increment);		ptr+= 8;
1458   mi_int8store(ptr,(ulonglong) state->state.checksum);	ptr+= 8;
1459   mi_int8store(ptr,state->create_trid);			ptr+= 8;
1460   mi_int4store(ptr,state->status);			ptr+= 4;
1461   mi_int4store(ptr,state->update_count);		ptr+= 4;
1462   *ptr++= state->sortkey;
1463   *ptr++= 0;                                    /* Reserved */
1464   ptr+=	state->state_diff_length;
1465 
1466   for (i=0; i < keys; i++)
1467   {
1468     mi_sizestore(ptr,state->key_root[i]);		ptr+= 8;
1469   }
1470   mi_sizestore(ptr,state->key_del);	        	ptr+= 8;
1471   if (pWrite & MA_STATE_INFO_WRITE_FULL_INFO)	/* From maria_chk */
1472   {
1473     uint key_parts= mi_uint2korr(state->header.key_parts);
1474     mi_int4store(ptr,state->sec_index_changed); 	ptr+= 4;
1475     mi_int4store(ptr,state->sec_index_used);		ptr+= 4;
1476     mi_int4store(ptr,state->version);			ptr+= 4;
1477     mi_int8store(ptr,state->key_map);			ptr+= 8;
1478     mi_int8store(ptr,(ulonglong) state->create_time);	ptr+= 8;
1479     mi_int8store(ptr,(ulonglong) state->recover_time);	ptr+= 8;
1480     mi_int8store(ptr,(ulonglong) state->check_time);	ptr+= 8;
1481     mi_sizestore(ptr, state->records_at_analyze);	ptr+= 8;
1482     /* reserve place for some information per key */
1483     bzero(ptr, keys*4); 				ptr+= keys*4;
1484     for (i=0 ; i < key_parts ; i++)
1485     {
1486       float8store(ptr, state->rec_per_key_part[i]);  	ptr+= 8;
1487       mi_int4store(ptr, state->nulls_per_key_part[i]);  ptr+= 4;
1488     }
1489   }
1490 
1491   res= (pWrite & MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET) ?
1492     mysql_file_pwrite(file, buff, (size_t) (ptr-buff), 0L,
1493               MYF(MY_NABP | MY_THREADSAFE)) :
1494     mysql_file_write(file,  buff, (size_t) (ptr-buff),
1495              MYF(MY_NABP));
1496   DBUG_RETURN(res != 0);
1497 }
1498 
1499 
_ma_state_info_read(uchar * ptr,MARIA_STATE_INFO * state,myf flag)1500 static uchar *_ma_state_info_read(uchar *ptr, MARIA_STATE_INFO *state, myf flag)
1501 {
1502   uint i,keys,key_parts;
1503   DBUG_ENTER("_ma_state_info_read");
1504 
1505   memcpy(&state->header,ptr, sizeof(state->header));
1506   ptr+= sizeof(state->header);
1507   keys= (uint) state->header.keys;
1508   key_parts= mi_uint2korr(state->header.key_parts);
1509 
1510   /* Allocate memory for key parts if not already done */
1511   if (!state->rec_per_key_part &&
1512       !my_multi_malloc(MYF(MY_WME | flag),
1513                        &state->rec_per_key_part,
1514                        sizeof(*state->rec_per_key_part) * key_parts,
1515                        &state->nulls_per_key_part,
1516                        sizeof(*state->nulls_per_key_part) * key_parts,
1517                        NullS))
1518     DBUG_RETURN(0);
1519 
1520   state->open_count = mi_uint2korr(ptr);		ptr+= 2;
1521   state->changed= mi_uint2korr(ptr);			ptr+= 2;
1522   state->create_rename_lsn= lsn_korr(ptr);		ptr+= LSN_STORE_SIZE;
1523   state->is_of_horizon= lsn_korr(ptr);			ptr+= LSN_STORE_SIZE;
1524   state->skip_redo_lsn= lsn_korr(ptr);			ptr+= LSN_STORE_SIZE;
1525   state->state.records= mi_rowkorr(ptr);		ptr+= 8;
1526   state->state.del = mi_rowkorr(ptr);			ptr+= 8;
1527   state->split	= mi_rowkorr(ptr);			ptr+= 8;
1528   state->dellink= mi_sizekorr(ptr);			ptr+= 8;
1529   state->first_bitmap_with_space= mi_sizekorr(ptr);	ptr+= 8;
1530   state->state.key_file_length = mi_sizekorr(ptr);	ptr+= 8;
1531   state->state.data_file_length= mi_sizekorr(ptr);	ptr+= 8;
1532   state->state.empty	= mi_sizekorr(ptr);		ptr+= 8;
1533   state->state.key_empty= mi_sizekorr(ptr);		ptr+= 8;
1534   state->auto_increment=mi_uint8korr(ptr);		ptr+= 8;
1535   state->state.checksum=(ha_checksum) mi_uint8korr(ptr);ptr+= 8;
1536   state->create_trid= mi_uint8korr(ptr);		ptr+= 8;
1537   state->status = mi_uint4korr(ptr);			ptr+= 4;
1538   state->update_count=mi_uint4korr(ptr);		ptr+= 4;
1539   state->sortkey= 					(uint) *ptr++;
1540   ptr++;                                                /* reserved */
1541 
1542   ptr+= state->state_diff_length;
1543 
1544   for (i=0; i < keys; i++)
1545   {
1546     state->key_root[i]= mi_sizekorr(ptr);		ptr+= 8;
1547   }
1548   state->key_del= mi_sizekorr(ptr);			ptr+= 8;
1549   state->sec_index_changed = mi_uint4korr(ptr); 	ptr+= 4;
1550   state->sec_index_used =    mi_uint4korr(ptr); 	ptr+= 4;
1551   state->version     = mi_uint4korr(ptr);		ptr+= 4;
1552   state->key_map     = mi_uint8korr(ptr);		ptr+= 8;
1553   state->create_time = (time_t) mi_sizekorr(ptr);	ptr+= 8;
1554   state->recover_time =(time_t) mi_sizekorr(ptr);	ptr+= 8;
1555   state->check_time =  (time_t) mi_sizekorr(ptr);	ptr+= 8;
1556   state->records_at_analyze=    mi_sizekorr(ptr);	ptr+= 8;
1557   ptr+= keys * 4;                               /* Skip reserved bytes */
1558   for (i=0 ; i < key_parts ; i++)
1559   {
1560     float8get(state->rec_per_key_part[i], ptr);		ptr+= 8;
1561     state->nulls_per_key_part[i]= mi_uint4korr(ptr);	ptr+= 4;
1562   }
1563 
1564   DBUG_PRINT("info", ("Records: %lld", state->state.records));
1565   DBUG_RETURN(ptr);
1566 }
1567 
1568 
1569 /**
1570    @brief Fills the state by reading its copy on disk.
1571 
1572    Should not be called for transactional tables, as their state on disk is
1573    rarely current and so is often misleading for a reader.
1574    Does nothing in single user mode.
1575 
1576    @param  file            file to read from
1577    @param  state           state which will be filled
1578 */
1579 
_ma_state_info_read_dsk(File file,MARIA_STATE_INFO * state)1580 uint _ma_state_info_read_dsk(File file __attribute__((unused)),
1581                              MARIA_STATE_INFO *state __attribute__((unused)))
1582 {
1583 #ifdef MARIA_EXTERNAL_LOCKING
1584   uchar	buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE];
1585 
1586   /* trick to detect transactional tables */
1587   DBUG_ASSERT(state->create_rename_lsn == LSN_IMPOSSIBLE);
1588   if (!maria_single_user)
1589   {
1590     if (mysql_file_pread(file, buff, state->state_length, 0L, MYF(MY_NABP)))
1591       return 1;
1592     _ma_state_info_read(buff, state);
1593   }
1594 #endif
1595   return 0;
1596 }
1597 
1598 
1599 /****************************************************************************
1600 **  store and read of MARIA_BASE_INFO
1601 ****************************************************************************/
1602 
_ma_base_info_write(File file,MARIA_BASE_INFO * base)1603 uint _ma_base_info_write(File file, MARIA_BASE_INFO *base)
1604 {
1605   uchar buff[MARIA_BASE_INFO_SIZE], *ptr=buff;
1606 
1607   bmove(ptr, maria_uuid, MY_UUID_SIZE);
1608   ptr+= MY_UUID_SIZE;
1609   mi_sizestore(ptr,base->keystart);			ptr+= 8;
1610   mi_sizestore(ptr,base->max_data_file_length);		ptr+= 8;
1611   mi_sizestore(ptr,base->max_key_file_length);		ptr+= 8;
1612   mi_rowstore(ptr,base->records);			ptr+= 8;
1613   mi_rowstore(ptr,base->reloc);				ptr+= 8;
1614   mi_int4store(ptr,base->mean_row_length);		ptr+= 4;
1615   mi_int4store(ptr,base->reclength);			ptr+= 4;
1616   mi_int4store(ptr,base->pack_reclength);		ptr+= 4;
1617   mi_int4store(ptr,base->min_pack_length);		ptr+= 4;
1618   mi_int4store(ptr,base->max_pack_length);		ptr+= 4;
1619   mi_int4store(ptr,base->min_block_length);		ptr+= 4;
1620   mi_int2store(ptr,base->fields);			ptr+= 2;
1621   mi_int2store(ptr,base->fixed_not_null_fields);	ptr+= 2;
1622   mi_int2store(ptr,base->fixed_not_null_fields_length);	ptr+= 2;
1623   mi_int2store(ptr,base->max_field_lengths);		ptr+= 2;
1624   mi_int2store(ptr,base->pack_fields);			ptr+= 2;
1625   mi_int2store(ptr,base->extra_options)			ptr+= 2;
1626   mi_int2store(ptr,base->null_bytes);                   ptr+= 2;
1627   mi_int2store(ptr,base->original_null_bytes);	        ptr+= 2;
1628   mi_int2store(ptr,base->field_offsets);	        ptr+= 2;
1629   mi_int2store(ptr,base->language);		        ptr+= 2;
1630   mi_int2store(ptr,base->block_size);	        	ptr+= 2;
1631   *ptr++= base->rec_reflength;
1632   *ptr++= base->key_reflength;
1633   *ptr++= base->keys;
1634   *ptr++= base->auto_key;
1635   *ptr++= base->born_transactional;
1636   *ptr++= 0;                                    /* Reserved */
1637   mi_int2store(ptr,base->pack_bytes);			ptr+= 2;
1638   mi_int2store(ptr,base->blobs);			ptr+= 2;
1639   mi_int2store(ptr,base->max_key_block_length);		ptr+= 2;
1640   mi_int2store(ptr,base->max_key_length);		ptr+= 2;
1641   mi_int2store(ptr,base->extra_alloc_bytes);		ptr+= 2;
1642   *ptr++= base->extra_alloc_procent;
1643   bzero(ptr,16);					ptr+= 16; /* extra */
1644   DBUG_ASSERT((ptr - buff) == MARIA_BASE_INFO_SIZE);
1645   return mysql_file_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
1646 }
1647 
1648 
_ma_base_info_read(uchar * ptr,MARIA_BASE_INFO * base)1649 static uchar *_ma_base_info_read(uchar *ptr, MARIA_BASE_INFO *base)
1650 {
1651   bmove(base->uuid, ptr, MY_UUID_SIZE);                 ptr+= MY_UUID_SIZE;
1652   base->keystart= mi_sizekorr(ptr);			ptr+= 8;
1653   base->max_data_file_length= mi_sizekorr(ptr); 	ptr+= 8;
1654   base->max_key_file_length= mi_sizekorr(ptr);		ptr+= 8;
1655   base->records=  (ha_rows) mi_sizekorr(ptr);		ptr+= 8;
1656   base->reloc= (ha_rows) mi_sizekorr(ptr);		ptr+= 8;
1657   base->mean_row_length= mi_uint4korr(ptr);		ptr+= 4;
1658   base->reclength= mi_uint4korr(ptr);			ptr+= 4;
1659   base->pack_reclength= mi_uint4korr(ptr);		ptr+= 4;
1660   base->min_pack_length= mi_uint4korr(ptr);		ptr+= 4;
1661   base->max_pack_length= mi_uint4korr(ptr);		ptr+= 4;
1662   base->min_block_length= mi_uint4korr(ptr);		ptr+= 4;
1663   base->fields= mi_uint2korr(ptr);			ptr+= 2;
1664   base->fixed_not_null_fields= mi_uint2korr(ptr);       ptr+= 2;
1665   base->fixed_not_null_fields_length= mi_uint2korr(ptr);ptr+= 2;
1666   base->max_field_lengths= mi_uint2korr(ptr);	        ptr+= 2;
1667   base->pack_fields= mi_uint2korr(ptr);			ptr+= 2;
1668   base->extra_options= mi_uint2korr(ptr);		ptr+= 2;
1669   base->null_bytes= mi_uint2korr(ptr);			ptr+= 2;
1670   base->original_null_bytes= mi_uint2korr(ptr);		ptr+= 2;
1671   base->field_offsets= mi_uint2korr(ptr);		ptr+= 2;
1672   base->language= mi_uint2korr(ptr);		        ptr+= 2;
1673   base->block_size= mi_uint2korr(ptr);			ptr+= 2;
1674 
1675   base->rec_reflength= *ptr++;
1676   base->key_reflength= *ptr++;
1677   base->keys=	       *ptr++;
1678   base->auto_key=      *ptr++;
1679   base->born_transactional= *ptr++;
1680   ptr++;
1681   base->pack_bytes= mi_uint2korr(ptr);			ptr+= 2;
1682   base->blobs= mi_uint2korr(ptr);			ptr+= 2;
1683   base->max_key_block_length= mi_uint2korr(ptr);	ptr+= 2;
1684   base->max_key_length= mi_uint2korr(ptr);		ptr+= 2;
1685   base->extra_alloc_bytes= mi_uint2korr(ptr);		ptr+= 2;
1686   base->extra_alloc_procent= *ptr++;
1687   ptr+= 16;
1688   return ptr;
1689 }
1690 
1691 /*--------------------------------------------------------------------------
1692   maria_keydef
1693 ---------------------------------------------------------------------------*/
1694 
_ma_keydef_write(File file,MARIA_KEYDEF * keydef)1695 my_bool _ma_keydef_write(File file, MARIA_KEYDEF *keydef)
1696 {
1697   uchar buff[MARIA_KEYDEF_SIZE];
1698   uchar *ptr=buff;
1699 
1700   *ptr++= (uchar) keydef->keysegs;
1701   *ptr++= keydef->key_alg;			/* Rtree or Btree */
1702   mi_int2store(ptr,keydef->flag);		ptr+= 2;
1703   mi_int2store(ptr,keydef->block_length);	ptr+= 2;
1704   mi_int2store(ptr,keydef->keylength);		ptr+= 2;
1705   mi_int2store(ptr,keydef->minlength);		ptr+= 2;
1706   mi_int2store(ptr,keydef->maxlength);		ptr+= 2;
1707   return mysql_file_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
1708 }
1709 
_ma_keydef_read(uchar * ptr,MARIA_KEYDEF * keydef)1710 uchar *_ma_keydef_read(uchar *ptr, MARIA_KEYDEF *keydef)
1711 {
1712    keydef->keysegs	= (uint) *ptr++;
1713    keydef->key_alg	= *ptr++;		/* Rtree or Btree */
1714 
1715    keydef->flag		= mi_uint2korr(ptr);	ptr+= 2;
1716    keydef->block_length = mi_uint2korr(ptr);	ptr+= 2;
1717    keydef->keylength	= mi_uint2korr(ptr);	ptr+= 2;
1718    keydef->minlength	= mi_uint2korr(ptr);	ptr+= 2;
1719    keydef->maxlength	= mi_uint2korr(ptr);	ptr+= 2;
1720    keydef->version	= 0;			/* Not saved */
1721    keydef->parser       = &ft_default_parser;
1722    keydef->ftkey_nr     = 0;
1723    return ptr;
1724 }
1725 
1726 /***************************************************************************
1727 **  maria_keyseg
1728 ***************************************************************************/
1729 
_ma_keyseg_write(File file,const HA_KEYSEG * keyseg)1730 my_bool _ma_keyseg_write(File file, const HA_KEYSEG *keyseg)
1731 {
1732   uchar buff[HA_KEYSEG_SIZE];
1733   uchar *ptr=buff;
1734   ulong pos;
1735 
1736   *ptr++= keyseg->type;
1737   *ptr++= keyseg->language & 0xFF; /* Collation ID, low byte */
1738   *ptr++= keyseg->null_bit;
1739   *ptr++= keyseg->bit_start;
1740   *ptr++= keyseg->language >> 8; /* Collation ID, high byte */
1741   *ptr++= keyseg->bit_length;
1742   mi_int2store(ptr,keyseg->flag);	ptr+= 2;
1743   mi_int2store(ptr,keyseg->length);	ptr+= 2;
1744   mi_int4store(ptr,keyseg->start);	ptr+= 4;
1745   pos= keyseg->null_bit ? keyseg->null_pos : keyseg->bit_pos;
1746   mi_int4store(ptr, pos);
1747   ptr+=4;
1748 
1749   return mysql_file_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
1750 }
1751 
1752 
_ma_keyseg_read(uchar * ptr,HA_KEYSEG * keyseg)1753 uchar *_ma_keyseg_read(uchar *ptr, HA_KEYSEG *keyseg)
1754 {
1755    keyseg->type		= *ptr++;
1756    keyseg->language	= *ptr++;
1757    keyseg->null_bit	= *ptr++;
1758    keyseg->bit_start	= *ptr++;
1759    keyseg->language	+= ((uint16) (*ptr++)) << 8;
1760    keyseg->bit_length   = *ptr++;
1761    keyseg->flag		= mi_uint2korr(ptr);  ptr+= 2;
1762    keyseg->length	= mi_uint2korr(ptr);  ptr+= 2;
1763    keyseg->start	= mi_uint4korr(ptr);  ptr+= 4;
1764    keyseg->null_pos	= mi_uint4korr(ptr);  ptr+= 4;
1765    keyseg->charset=0;				/* Will be filled in later */
1766    if (keyseg->null_bit)
1767      keyseg->bit_pos= (uint16)(keyseg->null_pos + (keyseg->null_bit == 7));
1768    else
1769    {
1770      keyseg->bit_pos= (uint16)keyseg->null_pos;
1771      keyseg->null_pos= 0;
1772    }
1773    return ptr;
1774 }
1775 
1776 /*--------------------------------------------------------------------------
1777   maria_uniquedef
1778 ---------------------------------------------------------------------------*/
1779 
_ma_uniquedef_write(File file,MARIA_UNIQUEDEF * def)1780 my_bool _ma_uniquedef_write(File file, MARIA_UNIQUEDEF *def)
1781 {
1782   uchar buff[MARIA_UNIQUEDEF_SIZE];
1783   uchar *ptr=buff;
1784 
1785   mi_int2store(ptr,def->keysegs);		ptr+=2;
1786   *ptr++=  (uchar) def->key;
1787   *ptr++ = (uchar) def->null_are_equal;
1788 
1789   return mysql_file_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
1790 }
1791 
_ma_uniquedef_read(uchar * ptr,MARIA_UNIQUEDEF * def)1792 uchar *_ma_uniquedef_read(uchar *ptr, MARIA_UNIQUEDEF *def)
1793 {
1794    def->keysegs = mi_uint2korr(ptr);
1795    def->key	= ptr[2];
1796    def->null_are_equal=ptr[3];
1797    return ptr+4;				/* 1 extra uchar */
1798 }
1799 
1800 /***************************************************************************
1801 **  MARIA_COLUMNDEF
1802 ***************************************************************************/
1803 
_ma_columndef_write(File file,MARIA_COLUMNDEF * columndef)1804 my_bool _ma_columndef_write(File file, MARIA_COLUMNDEF *columndef)
1805 {
1806   uchar buff[MARIA_COLUMNDEF_SIZE];
1807   uchar *ptr=buff;
1808   uint low_offset=  (uint) (columndef->offset & 0xffff);
1809   uint high_offset= (uint) (columndef->offset >> 16);
1810 
1811   mi_int2store(ptr,(ulong) columndef->column_nr); ptr+= 2;
1812   mi_int2store(ptr, low_offset);		  ptr+= 2;
1813   mi_int2store(ptr,columndef->type);		  ptr+= 2;
1814   mi_int2store(ptr,columndef->length);		  ptr+= 2;
1815   mi_int2store(ptr,columndef->fill_length);	  ptr+= 2;
1816   mi_int2store(ptr,columndef->null_pos);	  ptr+= 2;
1817   mi_int2store(ptr,columndef->empty_pos);	  ptr+= 2;
1818 
1819   (*ptr++)= columndef->null_bit;
1820   (*ptr++)= columndef->empty_bit;
1821   mi_int2store(ptr, high_offset);                 ptr+= 2;
1822   ptr[0]= ptr[1]= 0;                              ptr+= 2;  /* For future */
1823   return mysql_file_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
1824 }
1825 
_ma_columndef_read(uchar * ptr,MARIA_COLUMNDEF * columndef)1826 uchar *_ma_columndef_read(uchar *ptr, MARIA_COLUMNDEF *columndef)
1827 {
1828   uint high_offset;
1829   columndef->column_nr= mi_uint2korr(ptr);      ptr+= 2;
1830   columndef->offset= mi_uint2korr(ptr);         ptr+= 2;
1831   columndef->type=   mi_sint2korr(ptr);		ptr+= 2;
1832   columndef->length= mi_uint2korr(ptr);		ptr+= 2;
1833   columndef->fill_length= mi_uint2korr(ptr);	ptr+= 2;
1834   columndef->null_pos= mi_uint2korr(ptr);	ptr+= 2;
1835   columndef->empty_pos= mi_uint2korr(ptr);	ptr+= 2;
1836   columndef->null_bit=  (uint8) *ptr++;
1837   columndef->empty_bit= (uint8) *ptr++;
1838   high_offset=       mi_uint2korr(ptr);         ptr+= 2;
1839   columndef->offset|= ((ulong) high_offset << 16);
1840   ptr+= 2;
1841   return ptr;
1842 }
1843 
_ma_column_nr_write(File file,uint16 * offsets,uint columns)1844 my_bool _ma_column_nr_write(File file, uint16 *offsets, uint columns)
1845 {
1846   uchar *buff, *ptr, *end;
1847   size_t size= columns*2;
1848   my_bool res;
1849 
1850   if (!(buff= (uchar*) my_alloca(size)))
1851     return 1;
1852   for (ptr= buff, end= ptr + size; ptr < end ; ptr+= 2, offsets++)
1853     int2store(ptr, *offsets);
1854   res= mysql_file_write(file, buff, size, MYF(MY_NABP)) != 0;
1855   my_afree(buff);
1856   return res;
1857 }
1858 
1859 
_ma_column_nr_read(uchar * ptr,uint16 * offsets,uint columns)1860 uchar *_ma_column_nr_read(uchar *ptr, uint16 *offsets, uint columns)
1861 {
1862   uchar *end;
1863   size_t size= columns*2;
1864   for (end= ptr + size; ptr < end ; ptr+=2, offsets++)
1865     *offsets= uint2korr(ptr);
1866   return ptr;
1867 }
1868 
1869 /**
1870    @brief Set callbacks for data pages
1871 
1872    @note
1873    We don't use pagecache_file_init here, as we want to keep the
1874    code readable
1875 */
1876 
_ma_set_data_pagecache_callbacks(PAGECACHE_FILE * file,MARIA_SHARE * share)1877 void _ma_set_data_pagecache_callbacks(PAGECACHE_FILE *file,
1878                                       MARIA_SHARE *share)
1879 {
1880   pagecache_file_set_null_hooks(file);
1881   file->callback_data= (uchar*) share;
1882   file->flush_log_callback= &maria_flush_log_for_page_none; /* Do nothing */
1883   file->post_write_hook= maria_page_write_failure;
1884 
1885   if (share->temporary)
1886   {
1887     file->post_read_hook= &maria_page_crc_check_none;
1888     file->pre_write_hook= &maria_page_filler_set_none;
1889   }
1890   else
1891   {
1892     file->post_read_hook= &maria_page_crc_check_data;
1893     if (share->options & HA_OPTION_PAGE_CHECKSUM)
1894       file->pre_write_hook= &maria_page_crc_set_normal;
1895     else
1896       file->pre_write_hook= &maria_page_filler_set_normal;
1897     if (share->now_transactional)
1898       file->flush_log_callback= maria_flush_log_for_page;
1899   }
1900 
1901   if (MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_ENCRYPTED))
1902   {
1903     ma_crypt_set_data_pagecache_callbacks(file, share);
1904   }
1905 }
1906 
1907 
1908 /**
1909    @brief Set callbacks for index pages
1910 
1911    @note
1912    We don't use pagecache_file_init here, as we want to keep the
1913    code readable
1914 */
1915 
_ma_set_index_pagecache_callbacks(PAGECACHE_FILE * file,MARIA_SHARE * share)1916 void _ma_set_index_pagecache_callbacks(PAGECACHE_FILE *file,
1917                                        MARIA_SHARE *share)
1918 {
1919   pagecache_file_set_null_hooks(file);
1920   file->callback_data= (uchar*) share;
1921   file->flush_log_callback= &maria_flush_log_for_page_none; /* Do nothing */
1922   file->post_write_hook= maria_page_write_failure;
1923 
1924   if (share->temporary)
1925   {
1926     file->post_read_hook= &maria_page_crc_check_none;
1927     file->pre_write_hook= &maria_page_filler_set_none;
1928   }
1929   else
1930   {
1931     file->post_read_hook=  &maria_page_crc_check_index;
1932     if (share->options & HA_OPTION_PAGE_CHECKSUM)
1933       file->pre_write_hook= &maria_page_crc_set_index;
1934     else
1935       file->pre_write_hook= &maria_page_filler_set_normal;
1936 
1937     if (share->now_transactional)
1938       file->flush_log_callback= maria_flush_log_for_page;
1939   }
1940 
1941   if (MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_ENCRYPTED))
1942   {
1943     ma_crypt_set_index_pagecache_callbacks(file, share);
1944   }
1945 }
1946 
1947 
1948 /**************************************************************************
1949  Open data file
1950   We can't use dup() here as the data file descriptors need to have different
1951   active seek-positions.
1952 *************************************************************************/
1953 
_ma_open_datafile(MARIA_HA * info,MARIA_SHARE * share)1954 int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share)
1955 {
1956   myf flags= MY_WME | (share->mode & O_NOFOLLOW ? MY_NOSYMLINKS : 0);
1957   if (share->temporary)
1958     flags|= MY_THREAD_SPECIFIC;
1959   DEBUG_SYNC_C("mi_open_datafile");
1960   info->dfile.file= share->bitmap.file.file=
1961     mysql_file_open(key_file_dfile, share->data_file_name.str,
1962                     share->mode | O_SHARE | O_CLOEXEC, MYF(flags));
1963   return info->dfile.file >= 0 ? 0 : 1;
1964 }
1965 
1966 
_ma_open_keyfile(MARIA_SHARE * share)1967 int _ma_open_keyfile(MARIA_SHARE *share)
1968 {
1969   /*
1970     Modifications to share->kfile should be under intern_lock to protect
1971     against a concurrent checkpoint.
1972   */
1973   mysql_mutex_lock(&share->intern_lock);
1974   share->kfile.file= mysql_file_open(key_file_kfile,
1975                                      share->unique_file_name.str,
1976                                      share->mode | O_SHARE | O_NOFOLLOW | O_CLOEXEC,
1977                              MYF(MY_WME | MY_NOSYMLINKS));
1978   mysql_mutex_unlock(&share->intern_lock);
1979   return (share->kfile.file < 0);
1980 }
1981 
1982 
1983 /*
1984   Disable all indexes.
1985 
1986   SYNOPSIS
1987     maria_disable_indexes()
1988     info        A pointer to the MARIA storage engine MARIA_HA struct.
1989 
1990   DESCRIPTION
1991     Disable all indexes.
1992 
1993   RETURN
1994     0  ok
1995 */
1996 
maria_disable_indexes(MARIA_HA * info)1997 int maria_disable_indexes(MARIA_HA *info)
1998 {
1999   MARIA_SHARE *share= info->s;
2000 
2001   maria_clear_all_keys_active(share->state.key_map);
2002   return 0;
2003 }
2004 
2005 
2006 /*
2007   Enable all indexes
2008 
2009   SYNOPSIS
2010     maria_enable_indexes()
2011     info        A pointer to the MARIA storage engine MARIA_HA struct.
2012 
2013   DESCRIPTION
2014     Enable all indexes. The indexes might have been disabled
2015     by maria_disable_index() before.
2016     The function works only if both data and indexes are empty,
2017     otherwise a repair is required.
2018     To be sure, call handler::delete_all_rows() before.
2019 
2020   RETURN
2021     0  ok
2022     HA_ERR_CRASHED data or index is non-empty.
2023 */
2024 
maria_enable_indexes(MARIA_HA * info)2025 int maria_enable_indexes(MARIA_HA *info)
2026 {
2027   int error= 0;
2028   MARIA_SHARE *share= info->s;
2029   DBUG_ENTER("maria_enable_indexes");
2030 
2031   if ((share->state.state.data_file_length !=
2032        (share->data_file_type == BLOCK_RECORD ? share->block_size : 0)) ||
2033       (share->state.state.key_file_length != share->base.keystart))
2034   {
2035     DBUG_PRINT("error", ("data_file_length: %lu  key_file_length: %lu",
2036                          (ulong) share->state.state.data_file_length,
2037                          (ulong) share->state.state.key_file_length));
2038     _ma_set_fatal_error(share, HA_ERR_CRASHED);
2039     error= HA_ERR_CRASHED;
2040   }
2041   else
2042     maria_set_all_keys_active(share->state.key_map, share->base.keys);
2043   DBUG_RETURN(error);
2044 }
2045 
2046 
2047 /*
2048   Test if indexes are disabled.
2049 
2050   SYNOPSIS
2051     maria_indexes_are_disabled()
2052     info        A pointer to the MARIA storage engine MARIA_HA struct.
2053 
2054   DESCRIPTION
2055     Test if indexes are disabled.
2056 
2057   RETURN
2058     0  indexes are not disabled
2059     1  all indexes are disabled
2060     2  non-unique indexes are disabled
2061 */
2062 
maria_indexes_are_disabled(MARIA_HA * info)2063 int maria_indexes_are_disabled(MARIA_HA *info)
2064 {
2065   MARIA_SHARE *share= info->s;
2066 
2067   /*
2068     No keys or all are enabled. keys is the number of keys. Left shifted
2069     gives us only one bit set. When decreased by one, gives us all all bits
2070     up to this one set and it gets unset.
2071   */
2072   if (!share->base.keys ||
2073       (maria_is_all_keys_active(share->state.key_map, share->base.keys)))
2074     return 0;
2075 
2076   /* All are disabled */
2077   if (maria_is_any_key_active(share->state.key_map))
2078     return 1;
2079 
2080   /*
2081     We have keys. Some enabled, some disabled.
2082     Don't check for any non-unique disabled but return directly 2
2083   */
2084   return 2;
2085 }
2086 
2087 
maria_scan_init_dummy(MARIA_HA * info)2088 static my_bool maria_scan_init_dummy(MARIA_HA *info __attribute__((unused)))
2089 {
2090   return 0;
2091 }
2092 
maria_scan_end_dummy(MARIA_HA * info)2093 static void maria_scan_end_dummy(MARIA_HA *info __attribute__((unused)))
2094 {
2095 }
2096 
maria_once_init_dummy(MARIA_SHARE * share,File dfile)2097 static my_bool maria_once_init_dummy(MARIA_SHARE *share
2098                                      __attribute__((unused)),
2099                                      File dfile __attribute__((unused)))
2100 {
2101   return 0;
2102 }
2103 
maria_once_end_dummy(MARIA_SHARE * share)2104 static my_bool maria_once_end_dummy(MARIA_SHARE *share __attribute__((unused)))
2105 {
2106   return 0;
2107 }
2108