1 /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2 Copyright (c) 2009, 2021, MariaDB Corporation Ab
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
16
17 /* open an Aria table */
18
19 #include "ma_fulltext.h"
20 #include "ma_sp_defs.h"
21 #include "ma_rt_index.h"
22 #include "ma_blockrec.h"
23 #include "trnman.h"
24 #include "ma_trnman.h"
25 #include <m_ctype.h>
26 #include "ma_crypt.h"
27 #include "s3_func.h"
28
29 #if defined(MSDOS) || defined(__WIN__)
30 #ifdef __WIN__
31 #include <fcntl.h>
32 #else
33 #include <process.h> /* Prototype for getpid */
34 #endif
35 #endif
36
37 static void setup_key_functions(MARIA_KEYDEF *keyinfo);
38 static my_bool maria_scan_init_dummy(MARIA_HA *info);
39 static void maria_scan_end_dummy(MARIA_HA *info);
40 static my_bool maria_once_init_dummy(MARIA_SHARE *, File);
41 static my_bool maria_once_end_dummy(MARIA_SHARE *);
42 static uchar *_ma_state_info_read(uchar *, MARIA_STATE_INFO *, myf);
43
44 #define get_next_element(to,pos,size) { memcpy((char*) to,pos,(size_t) size); \
45 pos+=size;}
46
47
48 #define disk_pos_assert(share, pos, end_pos) \
49 if (pos > end_pos) \
50 { \
51 _ma_set_fatal_error(share, HA_ERR_CRASHED); \
52 goto err; \
53 }
54
55
56 /******************************************************************************
57 ** Return the shared struct if the table is already open.
58 ** In MySQL the server will handle version issues.
59 ******************************************************************************/
60
_ma_test_if_reopen(const char * filename)61 MARIA_HA *_ma_test_if_reopen(const char *filename)
62 {
63 LIST *pos;
64
65 for (pos=maria_open_list ; pos ; pos=pos->next)
66 {
67 MARIA_HA *info=(MARIA_HA*) pos->data;
68 MARIA_SHARE *share= info->s;
69 if (!strcmp(share->unique_file_name.str,filename) && share->last_version)
70 return info;
71 }
72 return 0;
73 }
74
75
76 /*
77 Open a new instance of an already opened Maria table
78
79 SYNOPSIS
80 maria_clone_internal()
81 share Share of already open table
82 mode Mode of table (O_RDONLY | O_RDWR)
83 data_file Filedescriptor of data file to use < 0 if one should open
84 open it.
85 internal_table <> 0 if this is an internal temporary table
86
87 RETURN
88 # Maria handler
89 0 Error
90 */
91
maria_clone_internal(MARIA_SHARE * share,int mode,File data_file,uint internal_table,struct ms3_st * s3)92 static MARIA_HA *maria_clone_internal(MARIA_SHARE *share,
93 int mode, File data_file,
94 uint internal_table,
95 struct ms3_st *s3)
96 {
97 int save_errno;
98 uint errpos;
99 MARIA_HA info,*m_info;
100 my_bitmap_map *changed_fields_bitmap;
101 myf flag= MY_WME | (share->temporary ? MY_THREAD_SPECIFIC : 0);
102 DBUG_ENTER("maria_clone_internal");
103
104 errpos= 0;
105 bzero((uchar*) &info,sizeof(info));
106
107 if (mode == O_RDWR && share->mode == O_RDONLY)
108 {
109 my_errno=EACCES; /* Can't open in write mode */
110 goto err;
111 }
112 if (data_file >= 0)
113 info.dfile.file= data_file;
114 else if (_ma_open_datafile(&info, share))
115 goto err;
116 errpos= 5;
117
118 /* alloc and set up private structure parts */
119 if (!my_multi_malloc(PSI_INSTRUMENT_ME, flag,
120 &m_info,sizeof(MARIA_HA),
121 &info.blobs,sizeof(MARIA_BLOB)*share->base.blobs,
122 &info.buff,(share->base.max_key_block_length*2+
123 share->base.max_key_length),
124 &info.lastkey_buff,share->base.max_key_length*2+1,
125 &info.first_mbr_key, share->base.max_key_length,
126 &info.maria_rtree_recursion_state,
127 share->have_rtree ? 1024 : 0,
128 &changed_fields_bitmap,
129 bitmap_buffer_size(share->base.fields),
130 NullS))
131 goto err;
132 errpos= 6;
133
134 info.s3= s3;
135 memcpy(info.blobs,share->blobs,sizeof(MARIA_BLOB)*share->base.blobs);
136 info.lastkey_buff2= info.lastkey_buff + share->base.max_key_length;
137 info.last_key.data= info.lastkey_buff;
138
139 info.s=share;
140 info.cur_row.lastpos= HA_OFFSET_ERROR;
141 /* Impossible first index to force initialization in _ma_check_index() */
142 info.lastinx= ~0;
143 info.update= (short) (HA_STATE_NEXT_FOUND+HA_STATE_PREV_FOUND);
144 info.opt_flag=READ_CHECK_USED;
145 info.this_unique= (ulong) info.dfile.file; /* Uniq number in process */
146 #ifdef MARIA_EXTERNAL_LOCKING
147 if (share->data_file_type == COMPRESSED_RECORD)
148 info.this_unique= share->state.unique;
149 info.this_loop=0; /* Update counter */
150 info.last_unique= share->state.unique;
151 info.last_loop= share->state.update_count;
152 #endif
153 info.errkey= -1;
154 info.page_changed= 1;
155 info.autocommit= 1;
156 info.keyread_buff= info.buff + share->base.max_key_block_length;
157
158 info.lock_type= F_UNLCK;
159 if (share->options & HA_OPTION_TMP_TABLE)
160 info.lock_type= F_WRLCK;
161
162 _ma_set_data_pagecache_callbacks(&info.dfile, share);
163 my_bitmap_init(&info.changed_fields, changed_fields_bitmap,
164 share->base.fields, 0);
165 if ((*share->init)(&info))
166 goto err;
167
168 /* The following should be big enough for all pinning purposes */
169 if (my_init_dynamic_array(PSI_INSTRUMENT_ME, &info.pinned_pages,
170 sizeof(MARIA_PINNED_PAGE),
171 MY_MAX(share->base.blobs*2 + 4,
172 MARIA_MAX_TREE_LEVELS*3), 16, flag))
173 goto err;
174
175
176 mysql_mutex_lock(&share->intern_lock);
177 info.read_record= share->read_record;
178 share->reopen++;
179 share->write_flag=MYF(MY_NABP | MY_WAIT_IF_FULL);
180 if (share->options & HA_OPTION_READ_ONLY_DATA)
181 {
182 info.lock_type=F_RDLCK;
183 share->r_locks++;
184 share->tot_locks++;
185 }
186 if ((share->options & HA_OPTION_DELAY_KEY_WRITE) &&
187 maria_delay_key_write)
188 share->delay_key_write=1;
189
190 if (!share->now_transactional) /* If not transctional table */
191 {
192 /* Pagecache requires access to info->trn->rec_lsn */
193 _ma_set_tmp_trn_for_table(&info, &dummy_transaction_object);
194 info.state= &share->state.state; /* Change global values by default */
195 }
196 else
197 {
198 info.state= &share->state.common;
199 *info.state= share->state.state; /* Initial values */
200 }
201 info.state_start= info.state; /* Initial values */
202
203 mysql_mutex_unlock(&share->intern_lock);
204
205 /* Allocate buffer for one record */
206 /* prerequisites: info->rec_buffer == 0 && info->rec_buff_size == 0 */
207 if (_ma_alloc_buffer(&info.rec_buff, &info.rec_buff_size,
208 share->base.default_rec_buff_size, flag))
209 goto err;
210
211 bzero(info.rec_buff, share->base.default_rec_buff_size);
212
213 *m_info=info;
214 thr_lock_data_init(&share->lock,&m_info->lock,(void*) m_info);
215
216 if (share->options & HA_OPTION_TMP_TABLE)
217 m_info->lock.type= TL_WRITE;
218
219 if (!internal_table)
220 {
221 m_info->open_list.data= m_info->share_list.data= (void*) m_info;
222 maria_open_list= list_add(maria_open_list, &m_info->open_list);
223 share->open_list= list_add(share->open_list, &m_info->share_list);
224 }
225 else
226 {
227 /* We don't need to mark internal temporary tables as changed on disk */
228 share->internal_table= 1;
229 share->global_changed= 1;
230 }
231 DBUG_RETURN(m_info);
232
233 err:
234 DBUG_PRINT("error", ("error: %d", my_errno));
235 save_errno=my_errno ? my_errno : HA_ERR_END_OF_FILE;
236 if ((save_errno == HA_ERR_CRASHED) ||
237 (save_errno == HA_ERR_CRASHED_ON_USAGE) ||
238 (save_errno == HA_ERR_CRASHED_ON_REPAIR))
239 _ma_report_error(save_errno, &share->open_file_name);
240 switch (errpos) {
241 case 6:
242 (*share->end)(&info);
243 delete_dynamic(&info.pinned_pages);
244 my_free(m_info->s3);
245 my_free(m_info);
246 /* fall through */
247 case 5:
248 if (data_file < 0)
249 mysql_file_close(info.dfile.file, MYF(0));
250 break;
251 }
252 my_errno=save_errno;
253 DBUG_RETURN (NULL);
254 } /* maria_clone_internal */
255
256
257 /******************************************************************************
258 open a MARIA table
259
260 See my_base.h for the handle_locking argument
261 if handle_locking and HA_OPEN_ABORT_IF_CRASHED then abort if the table
262 is marked crashed or if we are not using locking and the table doesn't
263 have an open count of 0.
264 ******************************************************************************/
265
maria_open(const char * name,int mode,uint open_flags,S3_INFO * s3)266 MARIA_HA *maria_open(const char *name, int mode, uint open_flags,
267 S3_INFO *s3)
268 {
269 int open_mode= 0,save_errno;
270 uint i,j,len,errpos,head_length,base_pos,keys, realpath_err,
271 key_parts,base_key_parts,unique_key_parts,fulltext_keys,uniques;
272 uint internal_table= MY_TEST(open_flags & HA_OPEN_INTERNAL_TABLE);
273 myf common_flag= open_flags & HA_OPEN_TMP_TABLE ? MY_THREAD_SPECIFIC : 0;
274 uint file_version;
275 size_t info_length;
276 char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN],
277 data_name[FN_REFLEN];
278 uchar *UNINIT_VAR(disk_cache), *disk_pos, *end_pos;
279 MARIA_HA info, *UNINIT_VAR(m_info), *old_info;
280 MARIA_SHARE share_buff,*share;
281 double *rec_per_key_part;
282 ulong *nulls_per_key_part;
283 my_off_t key_root[HA_MAX_POSSIBLE_KEY];
284 ulonglong max_key_file_length, max_data_file_length;
285 my_bool versioning= 1, born_transactional;
286 File data_file= -1, kfile= -1;
287 struct ms3_st *s3_client= 0;
288 S3_INFO *share_s3= 0;
289 S3_BLOCK index_header;
290 DBUG_ENTER("maria_open");
291
292 errpos= 0;
293 head_length=sizeof(share_buff.state.header);
294 bzero((uchar*) &info,sizeof(info));
295 bzero((uchar*) &index_header, sizeof(index_header));
296
297 #ifndef WITH_S3_STORAGE_ENGINE
298 DBUG_ASSERT(!s3);
299 #endif /* WITH_S3_STORAGE_ENGINE */
300
301 if (!s3)
302 {
303 realpath_err= my_realpath(name_buff, fn_format(org_name, name, "",
304 MARIA_NAME_IEXT,
305 MY_UNPACK_FILENAME),MYF(0));
306 if (realpath_err > 0) /* File not found, no point in looking further. */
307 {
308 DBUG_RETURN(NULL);
309 }
310
311 if (my_is_symlink(org_name) &&
312 (realpath_err || mysys_test_invalid_symlink(name_buff)))
313 {
314 my_errno= HA_WRONG_CREATE_OPTION;
315 DBUG_RETURN(0);
316 }
317 }
318 #ifdef WITH_S3_STORAGE_ENGINE
319 else
320 {
321 strmake(name_buff, name, sizeof(name_buff)-1); /* test_if_reopen() */
322 if (!(s3_client= s3f.open_connection(s3)))
323 {
324 internal_table= 1; /* Avoid unlock on error */
325 goto err;
326 }
327 }
328 #endif /* WITH_S3_STORAGE_ENGINE */
329
330 old_info= 0;
331 if (!internal_table)
332 mysql_mutex_lock(&THR_LOCK_maria);
333 if ((open_flags & HA_OPEN_COPY) ||
334 (internal_table || !(old_info=_ma_test_if_reopen(name_buff))))
335 {
336 share= &share_buff;
337 bzero((uchar*) &share_buff,sizeof(share_buff));
338 share_buff.state.key_root=key_root;
339 share_buff.pagecache= multi_pagecache_search((uchar*) name_buff,
340 (uint) strlen(name_buff),
341 maria_pagecache);
342
343 if (!s3)
344 {
345 DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_open",
346 if (strstr(name, "/t1"))
347 {
348 my_errno= HA_ERR_CRASHED;
349 goto err;
350 });
351 DEBUG_SYNC_C("mi_open_kfile");
352 if ((kfile=mysql_file_open(key_file_kfile, name_buff,
353 (open_mode=O_RDWR) | O_SHARE | O_NOFOLLOW | O_CLOEXEC,
354 MYF(common_flag | MY_NOSYMLINKS))) < 0)
355 {
356 if ((errno != EROFS && errno != EACCES) ||
357 mode != O_RDONLY ||
358 (kfile=mysql_file_open(key_file_kfile, name_buff,
359 (open_mode=O_RDONLY) | O_SHARE | O_NOFOLLOW | O_CLOEXEC,
360 MYF(common_flag | MY_NOSYMLINKS))) < 0)
361 goto err;
362 }
363 errpos= 1;
364 if (mysql_file_pread(kfile,share->state.header.file_version, head_length,
365 0, MYF(MY_NABP)))
366 {
367 my_errno= HA_ERR_NOT_A_TABLE;
368 goto err;
369 }
370 }
371 #ifdef WITH_S3_STORAGE_ENGINE
372 else
373 {
374 open_mode= mode;
375 errpos= 1;
376 if (s3f.set_database_and_table_from_path(s3, name_buff))
377 {
378 my_printf_error(HA_ERR_NO_SUCH_TABLE,
379 "Can't find database and path from %s", MYF(0),
380 name_buff);
381 my_errno= HA_ERR_NO_SUCH_TABLE;
382 goto err;
383 }
384 if (!(share_s3= share->s3_path= s3f.info_copy(s3)))
385 goto err; /* EiOM */
386
387 /* Check if table has changed in S3 */
388 if (s3f.check_frm_version(s3_client, share_s3) == 1)
389 {
390 my_errno= HA_ERR_TABLE_DEF_CHANGED;
391 goto err;
392 }
393
394 if (s3f.read_index_header(s3_client, share_s3, &index_header))
395 goto err;
396 if (index_header.length < head_length)
397 {
398 my_errno=HA_ERR_NOT_A_TABLE;
399 goto err;
400 }
401 memcpy(share->state.header.file_version, index_header.str,
402 head_length);
403 kfile= s3f.unique_file_number();
404 }
405 #endif /* WITH_S3_STORAGE_ENGINE */
406
407 share->mode=open_mode;
408 if (memcmp(share->state.header.file_version, maria_file_magic, 4))
409 {
410 DBUG_PRINT("error",("Wrong header in %s",name_buff));
411 DBUG_DUMP("error_dump", share->state.header.file_version,
412 head_length);
413 my_errno=HA_ERR_NOT_A_TABLE;
414 goto err;
415 }
416 share->options= mi_uint2korr(share->state.header.options);
417 if (share->options &
418 ~(HA_OPTION_PACK_RECORD | HA_OPTION_PACK_KEYS |
419 HA_OPTION_COMPRESS_RECORD | HA_OPTION_READ_ONLY_DATA |
420 HA_OPTION_TEMP_COMPRESS_RECORD | HA_OPTION_CHECKSUM |
421 HA_OPTION_TMP_TABLE | HA_OPTION_DELAY_KEY_WRITE |
422 HA_OPTION_RELIES_ON_SQL_LAYER | HA_OPTION_NULL_FIELDS |
423 HA_OPTION_PAGE_CHECKSUM))
424 {
425 DBUG_PRINT("error",("wrong options: 0x%lx", share->options));
426 my_errno=HA_ERR_NEW_FILE;
427 goto err;
428 }
429 if ((share->options & HA_OPTION_RELIES_ON_SQL_LAYER) &&
430 ! (open_flags & HA_OPEN_FROM_SQL_LAYER))
431 {
432 DBUG_PRINT("error", ("table cannot be opened from non-sql layer"));
433 my_errno= HA_ERR_UNSUPPORTED;
434 goto err;
435 }
436 if (!s3)
437 {
438 /* Don't call realpath() if the name can't be a link */
439 if (!strcmp(name_buff, org_name) ||
440 my_readlink(index_name, org_name, MYF(0)) == -1)
441 (void) strmov(index_name, org_name);
442 *strrchr(org_name, FN_EXTCHAR)= '\0';
443 (void) fn_format(data_name,org_name,"",MARIA_NAME_DEXT,
444 MY_APPEND_EXT|MY_UNPACK_FILENAME);
445 if (my_is_symlink(data_name))
446 {
447 if (my_realpath(data_name, data_name, MYF(0)))
448 goto err;
449 if (mysys_test_invalid_symlink(data_name))
450 {
451 my_errno= HA_WRONG_CREATE_OPTION;
452 goto err;
453 }
454 share->mode|= O_NOFOLLOW; /* all symlinks are resolved by realpath() */
455 }
456 }
457 else
458 {
459 /* Don't show DIRECTORY in show create table */
460 index_name[0]= data_name[0]= 0;
461 }
462
463 info_length=mi_uint2korr(share->state.header.header_length);
464 base_pos= mi_uint2korr(share->state.header.base_pos);
465
466 /*
467 Allocate space for header information and for data that is too
468 big to keep on stack
469 */
470 if (!(disk_cache= my_malloc(PSI_INSTRUMENT_ME, info_length+128,
471 MYF(MY_WME | common_flag))))
472 {
473 my_errno=ENOMEM;
474 goto err;
475 }
476
477 end_pos=disk_cache+info_length;
478 errpos= 3;
479 if (!s3)
480 {
481 if (mysql_file_pread(kfile, disk_cache, info_length, 0L, MYF(MY_NABP)))
482 {
483 _ma_set_fatal_error(share, HA_ERR_CRASHED);
484 goto err;
485 }
486 }
487 #ifdef WITH_S3_STORAGE_ENGINE
488 else
489 {
490 if (index_header.length < info_length)
491 {
492 my_errno=HA_ERR_NOT_A_TABLE;
493 goto err;
494 }
495 memcpy(disk_cache, index_header.str, info_length);
496 }
497 #endif /* WITH_S3_STORAGE_ENGINE */
498
499 len=mi_uint2korr(share->state.header.state_info_length);
500 keys= (uint) share->state.header.keys;
501 uniques= (uint) share->state.header.uniques;
502 fulltext_keys= (uint) share->state.header.fulltext_keys;
503 base_key_parts= key_parts= mi_uint2korr(share->state.header.key_parts);
504 unique_key_parts= mi_uint2korr(share->state.header.unique_key_parts);
505 if (len != MARIA_STATE_INFO_SIZE)
506 {
507 DBUG_PRINT("warning",
508 ("saved_state_info_length: %d state_info_length: %d",
509 len,MARIA_STATE_INFO_SIZE));
510 }
511 share->state_diff_length=len-MARIA_STATE_INFO_SIZE;
512
513 if (!_ma_state_info_read(disk_cache, &share->state, common_flag))
514 goto err;
515 len= mi_uint2korr(share->state.header.base_info_length);
516 if (len != MARIA_BASE_INFO_SIZE)
517 {
518 DBUG_PRINT("warning",("saved_base_info_length: %d base_info_length: %d",
519 len,MARIA_BASE_INFO_SIZE));
520 }
521 disk_pos= _ma_base_info_read(disk_cache + base_pos, &share->base);
522 /*
523 Check if old version of Aria file. Version 0 has language
524 stored in header.not_used
525 */
526 file_version= (share->state.header.not_used == 0);
527 if (file_version == 0)
528 share->base.language= share->state.header.not_used;
529 born_transactional= share->base.born_transactional;
530
531 share->state.state_length=base_pos;
532 /* For newly opened tables we reset the error-has-been-printed flag */
533 share->state.changed&= ~STATE_CRASHED_PRINTED;
534 share->state.org_changed= share->state.changed;
535
536 if (!(open_flags & HA_OPEN_FOR_REPAIR) &&
537 ((share->state.changed & STATE_CRASHED_FLAGS) ||
538 ((open_flags & HA_OPEN_ABORT_IF_CRASHED) &&
539 (my_disable_locking && share->state.open_count))))
540 {
541 DBUG_PRINT("error",("Table is marked as crashed. open_flags: %u "
542 "changed: %u open_count: %u !locking: %d",
543 open_flags, share->state.changed,
544 share->state.open_count, my_disable_locking));
545 my_errno=((share->state.changed & STATE_CRASHED_ON_REPAIR) ?
546 HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE);
547 goto err;
548 }
549 if (share->state.open_count)
550 share->open_count_not_zero_on_open= 1;
551
552 /*
553 A transactional table is not usable on this system if:
554 - share->state.create_trid > trnman_get_max_trid()
555 - Critical as trid as stored releative to create_trid.
556 - uuid is different
557
558 STATE_NOT_MOVABLE is reset when a table is zerofilled
559 (has no LSN's and no trids)
560
561 We can ignore testing uuid if STATE_NOT_MOVABLE is not set, as in this
562 case the uuid will be set in _ma_mark_file_changed().
563 */
564 if (born_transactional &&
565 ((share->state.create_trid > trnman_get_max_trid() &&
566 !maria_in_recovery) ||
567 ((share->state.changed & STATE_NOT_MOVABLE) &&
568 ((!(open_flags & HA_OPEN_IGNORE_MOVED_STATE) &&
569 memcmp(share->base.uuid, maria_uuid, MY_UUID_SIZE)))) ||
570 ((share->state.changed & (STATE_MOVED | STATE_NOT_ZEROFILLED)) ==
571 (STATE_MOVED | STATE_NOT_ZEROFILLED))))
572 {
573 DBUG_PRINT("warning", ("table is moved from another system. uuid_diff: %d create_trid: %lu max_trid: %lu moved: %d",
574 memcmp(share->base.uuid, maria_uuid,
575 MY_UUID_SIZE) != 0,
576 (ulong) share->state.create_trid,
577 (ulong) trnman_get_max_trid(),
578 MY_TEST((share->state.changed & STATE_MOVED))));
579 if (open_flags & HA_OPEN_FOR_REPAIR)
580 share->state.changed|= STATE_MOVED;
581 else
582 {
583 my_errno= HA_ERR_OLD_FILE;
584 goto err;
585 }
586 }
587
588 /* sanity check */
589 if (share->base.keystart > 65535 || share->base.rec_reflength > 8)
590 {
591 _ma_set_fatal_error(share, HA_ERR_CRASHED);
592 goto err;
593 }
594
595 key_parts+=fulltext_keys*FT_SEGS;
596 if (share->base.max_key_length > _ma_max_key_length() ||
597 keys > MARIA_MAX_KEY || key_parts > MARIA_MAX_KEY * HA_MAX_KEY_SEG)
598 {
599 DBUG_PRINT("error",("Wrong key info: Max_key_length: %d keys: %d key_parts: %d", share->base.max_key_length, keys, key_parts));
600 my_errno=HA_ERR_UNSUPPORTED;
601 goto err;
602 }
603
604 /* Ensure we have space in the key buffer for transaction id's */
605 if (born_transactional)
606 share->base.max_key_length= ALIGN_SIZE(share->base.max_key_length +
607 MARIA_MAX_PACK_TRANSID_SIZE);
608
609 /*
610 If page cache is not initialized, then assume we will create the
611 page_cache after the table is opened!
612 This is only used by maria_check to allow it to check/repair tables
613 with different block sizes.
614 */
615 if (share->base.block_size != maria_block_size &&
616 share_buff.pagecache->inited != 0)
617 {
618 DBUG_PRINT("error", ("Wrong block size %u; Expected %u",
619 (uint) share->base.block_size,
620 (uint) maria_block_size));
621 my_errno=HA_ERR_UNSUPPORTED;
622 my_printf_error(my_errno, "Wrong block size %u; Expected %u",
623 MYF(0),
624 (uint) share->base.block_size,
625 (uint) maria_block_size);
626 goto err;
627 }
628
629 /* Correct max_file_length based on length of sizeof(off_t) */
630 max_data_file_length=
631 (share->options & (HA_OPTION_PACK_RECORD | HA_OPTION_COMPRESS_RECORD)) ?
632 (((ulonglong) 1 << (share->base.rec_reflength*8))-1) :
633 (_ma_safe_mul(share->base.pack_reclength,
634 (ulonglong) 1 << (share->base.rec_reflength*8))-1);
635
636 max_key_file_length=
637 _ma_safe_mul(share->base.block_size,
638 ((ulonglong) 1 << (share->base.key_reflength*8))-1);
639 #if SIZEOF_OFF_T == 4
640 set_if_smaller(max_data_file_length, INT_MAX32);
641 set_if_smaller(max_key_file_length, INT_MAX32);
642 #endif
643 /* For internal temporary tables, max_data_file_length is already set */
644 if (!internal_table || !share->base.max_data_file_length)
645 share->base.max_data_file_length=(my_off_t) max_data_file_length;
646 DBUG_ASSERT(share->base.max_data_file_length);
647 share->base.max_key_file_length=(my_off_t) max_key_file_length;
648
649 if (share->options & HA_OPTION_COMPRESS_RECORD)
650 share->base.max_key_length+=2; /* For safety */
651 /* Add space for node pointer */
652 share->base.max_key_length+= share->base.key_reflength;
653
654 share->unique_file_name.length= strlen(name_buff);
655 share->index_file_name.length= strlen(index_name);
656 share->data_file_name.length= strlen(data_name);
657 share->open_file_name.length= strlen(name);
658 if (!my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME | common_flag),
659 &share,sizeof(*share),
660 &rec_per_key_part, sizeof(double) * key_parts,
661 &nulls_per_key_part, sizeof(long)* key_parts,
662 &share->keyinfo,keys*sizeof(MARIA_KEYDEF),
663 &share->uniqueinfo,uniques*sizeof(MARIA_UNIQUEDEF),
664 &share->keyparts,
665 (key_parts+unique_key_parts+keys+uniques) *
666 sizeof(HA_KEYSEG),
667 &share->columndef,
668 (share->base.fields+1)*sizeof(MARIA_COLUMNDEF),
669 &share->column_nr, share->base.fields*sizeof(uint16),
670 &share->blobs,sizeof(MARIA_BLOB)*share->base.blobs,
671 &share->unique_file_name.str,
672 share->unique_file_name.length+1,
673 &share->index_file_name.str,
674 share->index_file_name.length+1,
675 &share->data_file_name.str,
676 share->data_file_name.length+1,
677 &share->open_file_name.str,
678 share->open_file_name.length+1,
679 &share->state.key_root,keys*sizeof(my_off_t),
680 &share->mmap_lock,sizeof(mysql_rwlock_t),
681 NullS))
682 goto err;
683 errpos= 4;
684
685 *share= share_buff;
686 share->state.rec_per_key_part= rec_per_key_part;
687 share->state.nulls_per_key_part= nulls_per_key_part;
688
689 memcpy((char*) rec_per_key_part,
690 (char*) share_buff.state.rec_per_key_part,
691 sizeof(double)*base_key_parts);
692 memcpy((char*) nulls_per_key_part,
693 (char*) share_buff.state.nulls_per_key_part,
694 sizeof(long)*base_key_parts);
695 memcpy((char*) share->state.key_root,
696 (char*) key_root, sizeof(my_off_t)*keys);
697 strmov(share->unique_file_name.str, name_buff);
698 strmov(share->index_file_name.str, index_name);
699 strmov(share->data_file_name.str, data_name);
700 strmov(share->open_file_name.str, name);
701
702 share->block_size= share->base.block_size; /* Convenience */
703 share->max_index_block_size= share->block_size - KEYPAGE_CHECKSUM_SIZE;
704 share->keypage_header= ((born_transactional ?
705 LSN_STORE_SIZE + TRANSID_SIZE :
706 0) + KEYPAGE_KEYID_SIZE + KEYPAGE_FLAG_SIZE +
707 KEYPAGE_USED_SIZE);
708
709 if (MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_ENCRYPTED))
710 {
711 share->keypage_header+= ma_crypt_get_index_page_header_space(share);
712 }
713
714 {
715 HA_KEYSEG *pos=share->keyparts;
716 uint32 ftkey_nr= 1;
717 for (i=0 ; i < keys ; i++)
718 {
719 MARIA_KEYDEF *keyinfo= &share->keyinfo[i];
720 keyinfo->share= share;
721 disk_pos=_ma_keydef_read(disk_pos, keyinfo);
722 keyinfo->key_nr= i;
723
724 /* Calculate length to store a key + nod flag and transaction info */
725 keyinfo->max_store_length= (keyinfo->maxlength +
726 share->base.key_reflength);
727 if (born_transactional)
728 keyinfo->max_store_length+= MARIA_INDEX_OVERHEAD_SIZE;
729
730 /* See ma_delete.cc::underflow() */
731 if (!(keyinfo->flag & (HA_BINARY_PACK_KEY | HA_PACK_KEY)))
732 keyinfo->underflow_block_length= keyinfo->block_length/3;
733 else
734 {
735 /* Packed key, ensure we don't get overflow in underflow() */
736 keyinfo->underflow_block_length=
737 MY_MAX((int) (share->max_index_block_size - keyinfo->maxlength * 3),
738 (int) (share->keypage_header + share->base.key_reflength));
739 set_if_smaller(keyinfo->underflow_block_length,
740 keyinfo->block_length/3);
741 }
742
743 disk_pos_assert(share,
744 disk_pos + keyinfo->keysegs * HA_KEYSEG_SIZE,
745 end_pos);
746 if (keyinfo->key_alg == HA_KEY_ALG_RTREE)
747 share->have_rtree= 1;
748 keyinfo->seg=pos;
749 for (j=0 ; j < keyinfo->keysegs; j++,pos++)
750 {
751 disk_pos=_ma_keyseg_read(disk_pos, pos);
752 if (pos->type == HA_KEYTYPE_TEXT ||
753 pos->type == HA_KEYTYPE_VARTEXT1 ||
754 pos->type == HA_KEYTYPE_VARTEXT2)
755 {
756 if (!pos->language)
757 pos->charset=default_charset_info;
758 else if (!(pos->charset= get_charset(pos->language, MYF(MY_WME))))
759 {
760 my_errno=HA_ERR_UNKNOWN_CHARSET;
761 goto err;
762 }
763 }
764 else if (pos->type == HA_KEYTYPE_BINARY)
765 pos->charset= &my_charset_bin;
766 }
767 if (keyinfo->flag & HA_SPATIAL)
768 {
769 #ifdef HAVE_SPATIAL
770 uint sp_segs=SPDIMS*2;
771 keyinfo->seg=pos-sp_segs;
772 keyinfo->keysegs--;
773 versioning= 0;
774 #else
775 my_errno=HA_ERR_UNSUPPORTED;
776 goto err;
777 #endif
778 }
779 else if (keyinfo->flag & HA_FULLTEXT)
780 {
781 versioning= 0;
782 DBUG_ASSERT(fulltext_keys);
783 {
784 uint k;
785 keyinfo->seg=pos;
786 for (k=0; k < FT_SEGS; k++)
787 {
788 *pos= ft_keysegs[k];
789 pos[0].language= pos[-1].language;
790 if (!(pos[0].charset= pos[-1].charset))
791 {
792 _ma_set_fatal_error(share, HA_ERR_CRASHED);
793 goto err;
794 }
795 pos++;
796 }
797 }
798 if (!share->ft2_keyinfo.seg)
799 {
800 memcpy(&share->ft2_keyinfo, keyinfo, sizeof(MARIA_KEYDEF));
801 share->ft2_keyinfo.keysegs=1;
802 share->ft2_keyinfo.flag=0;
803 share->ft2_keyinfo.keylength=
804 share->ft2_keyinfo.minlength=
805 share->ft2_keyinfo.maxlength=HA_FT_WLEN+share->base.rec_reflength;
806 share->ft2_keyinfo.seg=pos-1;
807 share->ft2_keyinfo.end=pos;
808 setup_key_functions(& share->ft2_keyinfo);
809 }
810 keyinfo->ftkey_nr= ftkey_nr++;
811 }
812 setup_key_functions(keyinfo);
813 keyinfo->end=pos;
814 pos->type=HA_KEYTYPE_END; /* End */
815 pos->length=share->base.rec_reflength;
816 pos->null_bit=0;
817 pos->flag=0; /* For purify */
818 pos++;
819 }
820 for (i=0 ; i < uniques ; i++)
821 {
822 disk_pos=_ma_uniquedef_read(disk_pos, &share->uniqueinfo[i]);
823 disk_pos_assert(share,
824 disk_pos + share->uniqueinfo[i].keysegs *
825 HA_KEYSEG_SIZE, end_pos);
826 share->uniqueinfo[i].seg=pos;
827 for (j=0 ; j < share->uniqueinfo[i].keysegs; j++,pos++)
828 {
829 disk_pos=_ma_keyseg_read(disk_pos, pos);
830 if (pos->type == HA_KEYTYPE_TEXT ||
831 pos->type == HA_KEYTYPE_VARTEXT1 ||
832 pos->type == HA_KEYTYPE_VARTEXT2)
833 {
834 if (!pos->language)
835 pos->charset=default_charset_info;
836 else if (!(pos->charset= get_charset(pos->language, MYF(MY_WME))))
837 {
838 my_errno=HA_ERR_UNKNOWN_CHARSET;
839 goto err;
840 }
841 }
842 }
843 share->uniqueinfo[i].end=pos;
844 pos->type=HA_KEYTYPE_END; /* End */
845 pos->null_bit=0;
846 pos->flag=0;
847 pos++;
848 }
849 share->ftkeys= ftkey_nr;
850 }
851 share->data_file_type= share->state.header.data_file_type;
852 share->base_length= (BASE_ROW_HEADER_SIZE +
853 share->base.is_nulls_extended +
854 share->base.null_bytes +
855 share->base.pack_bytes +
856 MY_TEST(share->options & HA_OPTION_CHECKSUM));
857 share->kfile.file= kfile;
858
859 if (open_flags & HA_OPEN_COPY)
860 {
861 /*
862 this instance will be a temporary one used just to create a data
863 file for REPAIR. Don't do logging. This base information will not go
864 to disk.
865 */
866 born_transactional= FALSE;
867 }
868 if (born_transactional)
869 {
870 share->page_type= PAGECACHE_LSN_PAGE;
871 if (share->state.create_rename_lsn == LSN_NEEDS_NEW_STATE_LSNS)
872 {
873 /*
874 Was repaired with maria_chk, maybe later maria_pack-ed. Some sort of
875 import into the server. It starts its existence (from the point of
876 view of the server, including server's recovery) now.
877 */
878 if (((open_flags & HA_OPEN_FROM_SQL_LAYER) &&
879 (share->state.changed & STATE_NOT_MOVABLE)) || maria_in_recovery)
880 _ma_update_state_lsns_sub(share, LSN_IMPOSSIBLE,
881 trnman_get_min_safe_trid(), TRUE, TRUE);
882 }
883 else if ((!LSN_VALID(share->state.create_rename_lsn) ||
884 !LSN_VALID(share->state.is_of_horizon) ||
885 (cmp_translog_addr(share->state.create_rename_lsn,
886 share->state.is_of_horizon) > 0) ||
887 !LSN_VALID(share->state.skip_redo_lsn) ||
888 (cmp_translog_addr(share->state.create_rename_lsn,
889 share->state.skip_redo_lsn) > 0)))
890 {
891 if (!(open_flags & HA_OPEN_FOR_REPAIR))
892 {
893 /*
894 If in Recovery, it will not work. If LSN is invalid and not
895 LSN_NEEDS_NEW_STATE_LSNS, header must be corrupted.
896 In both cases, must repair.
897 */
898 my_errno=((share->state.changed & STATE_CRASHED_ON_REPAIR) ?
899 HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE);
900 goto err;
901 }
902 else
903 {
904 /*
905 Open in repair mode. Ensure that we mark the table crashed, so
906 that we run auto_repair on it
907 */
908 maria_mark_crashed_share(share);
909 }
910 }
911 else if (!(open_flags & HA_OPEN_FOR_REPAIR))
912 {
913 /* create_rename_lsn != LSN_NEEDS_NEW_STATE_LSNS */
914 share->state.changed|= STATE_NOT_MOVABLE;
915 }
916 }
917 else
918 share->page_type= PAGECACHE_PLAIN_PAGE;
919 share->now_transactional= born_transactional;
920
921 /* Use pack_reclength as we don't want to modify base.pack_recklength */
922 if (share->state.header.org_data_file_type == DYNAMIC_RECORD)
923 {
924 /* add bits used to pack data to pack_reclength for faster allocation */
925 share->base.pack_reclength+= share->base.pack_bytes;
926 share->base.extra_rec_buff_size=
927 (ALIGN_SIZE(MARIA_MAX_DYN_BLOCK_HEADER) + MARIA_SPLIT_LENGTH +
928 MARIA_REC_BUFF_OFFSET);
929 }
930 if (share->data_file_type == COMPRESSED_RECORD)
931 {
932 /* Need some extra bytes for decode_bytes */
933 share->base.extra_rec_buff_size+= 7;
934 }
935 share->base.default_rec_buff_size= MY_MAX(share->base.pack_reclength +
936 share->base.extra_rec_buff_size,
937 share->base.max_key_length);
938
939 disk_pos_assert(share,
940 disk_pos + share->base.fields *MARIA_COLUMNDEF_SIZE,
941 end_pos);
942 for (i= j= 0 ; i < share->base.fields ; i++)
943 {
944 disk_pos=_ma_columndef_read(disk_pos,&share->columndef[i]);
945 share->columndef[i].pack_type=0;
946 share->columndef[i].huff_tree=0;
947 if (share->columndef[i].type == FIELD_BLOB)
948 {
949 share->blobs[j].pack_length=
950 share->columndef[i].length-portable_sizeof_char_ptr;
951 share->blobs[j].offset= share->columndef[i].offset;
952 j++;
953 }
954 if (share->columndef[i].type == FIELD_VARCHAR)
955 share->has_varchar_fields= 1;
956 if (share->columndef[i].null_bit)
957 share->has_null_fields= 1;
958 }
959 share->columndef[i].type= FIELD_LAST; /* End marker */
960 disk_pos= _ma_column_nr_read(disk_pos, share->column_nr,
961 share->base.fields);
962
963 if (MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_ENCRYPTED))
964 {
965 if (!(disk_pos= ma_crypt_read(share, disk_pos)))
966 goto err;
967 }
968
969 if ((share->data_file_type == BLOCK_RECORD ||
970 share->data_file_type == COMPRESSED_RECORD))
971 {
972 if (!s3)
973 {
974 if (_ma_open_datafile(&info, share))
975 goto err;
976 data_file= info.dfile.file;
977 }
978 #ifdef WITH_S3_STORAGE_ENGINE
979 else
980 data_file= info.dfile.file= s3f.unique_file_number();
981 #endif /* WITH_S3_STORAGE_ENGINE */
982 }
983 errpos= 5;
984
985 if (open_flags & HA_OPEN_DELAY_KEY_WRITE)
986 share->options|= HA_OPTION_DELAY_KEY_WRITE;
987 if (mode == O_RDONLY)
988 share->options|= HA_OPTION_READ_ONLY_DATA;
989 share->is_log_table= FALSE;
990
991 if (open_flags & HA_OPEN_TMP_TABLE || share->options & HA_OPTION_TMP_TABLE)
992 {
993 common_flag|= MY_THREAD_SPECIFIC;
994 share->options|= HA_OPTION_TMP_TABLE;
995 share->temporary= share->delay_key_write= 1;
996 share->write_flag=MYF(MY_NABP);
997 share->w_locks++; /* We don't have to update status */
998 share->tot_locks++;
999 }
1000
1001 _ma_set_index_pagecache_callbacks(&share->kfile, share);
1002 share->this_process=(ulong) getpid();
1003 #ifdef MARIA_EXTERNAL_LOCKING
1004 share->last_process= share->state.process;
1005 #endif
1006 share->base.key_parts=key_parts;
1007 share->base.all_key_parts=key_parts+unique_key_parts;
1008 if (!(share->last_version=share->state.version))
1009 share->last_version=1; /* Safety */
1010 share->rec_reflength=share->base.rec_reflength; /* May be changed */
1011 share->base.margin_key_file_length=(share->base.max_key_file_length -
1012 (keys ? MARIA_INDEX_BLOCK_MARGIN *
1013 share->block_size * keys : 0));
1014 my_free(disk_cache);
1015 my_free(share_buff.state.rec_per_key_part);
1016 disk_cache= 0;
1017 share_buff.state.rec_per_key_part= 0;
1018
1019 _ma_setup_functions(share);
1020 max_data_file_length= share->base.max_data_file_length;
1021 if ((*share->once_init)(share, info.dfile.file))
1022 goto err;
1023 errpos= 6;
1024 if (internal_table)
1025 set_if_smaller(share->base.max_data_file_length,
1026 max_data_file_length);
1027 if (share->now_transactional)
1028 {
1029 /* Setup initial state that is visible for all */
1030 MARIA_STATE_HISTORY_CLOSED *history;
1031 if ((history= (MARIA_STATE_HISTORY_CLOSED *)
1032 my_hash_search(&maria_stored_state,
1033 (uchar*) &share->state.create_rename_lsn, 0)))
1034 {
1035 /*
1036 Move history from hash to share. This is safe to do as we
1037 know we are the only one that is using the share.
1038 */
1039 share->state_history=
1040 _ma_remove_not_visible_states(history->state_history, 0, 0);
1041 history->state_history= 0;
1042 (void) my_hash_delete(&maria_stored_state, (uchar*) history);
1043 DBUG_PRINT("info", ("Reading state history. trid: %lu records: %lld",
1044 (ulong) share->state_history->trid,
1045 share->state_history->state.records));
1046 }
1047 else
1048 {
1049 /* Table is not part of any active transaction; Create new history */
1050 if (!(share->state_history= (MARIA_STATE_HISTORY *)
1051 my_malloc(PSI_INSTRUMENT_ME, sizeof(*share->state_history),
1052 MYF(MY_WME))))
1053 goto err;
1054 share->state_history->trid= 0; /* Visible by all */
1055 share->state_history->state= share->state.state;
1056 share->state_history->next= 0;
1057 }
1058 }
1059 errpos= 7;
1060 thr_lock_init(&share->lock);
1061 mysql_mutex_init(key_SHARE_intern_lock,
1062 &share->intern_lock, MY_MUTEX_INIT_FAST);
1063 mysql_mutex_init(key_SHARE_key_del_lock,
1064 &share->key_del_lock, MY_MUTEX_INIT_FAST);
1065 mysql_cond_init(key_SHARE_key_del_cond, &share->key_del_cond, 0);
1066 mysql_mutex_init(key_SHARE_close_lock,
1067 &share->close_lock, MY_MUTEX_INIT_FAST);
1068 for (i=0; i<keys; i++)
1069 mysql_rwlock_init(key_KEYINFO_root_lock,
1070 &share->keyinfo[i].root_lock);
1071 mysql_rwlock_init(key_SHARE_mmap_lock, &share->mmap_lock);
1072
1073 share->row_is_visible= _ma_row_visible_always;
1074 share->lock.get_status= _ma_reset_update_flag;
1075 share->lock.start_trans= _ma_start_trans;
1076
1077 if (!thr_lock_inited)
1078 {
1079 /* Probably a single threaded program; Don't use concurrent inserts */
1080 maria_concurrent_insert=0;
1081 }
1082 else if (maria_concurrent_insert)
1083 {
1084 share->non_transactional_concurrent_insert=
1085 ((share->options & (HA_OPTION_READ_ONLY_DATA | HA_OPTION_TMP_TABLE |
1086 HA_OPTION_COMPRESS_RECORD |
1087 HA_OPTION_TEMP_COMPRESS_RECORD)) ||
1088 (open_flags & HA_OPEN_TMP_TABLE) ||
1089 share->data_file_type == BLOCK_RECORD ||
1090 share->have_rtree) ? 0 : 1;
1091 if (share->non_transactional_concurrent_insert ||
1092 (!share->temporary && share->now_transactional && versioning))
1093 {
1094 share->lock_key_trees= 1;
1095 if (share->data_file_type == BLOCK_RECORD)
1096 {
1097 DBUG_ASSERT(share->now_transactional);
1098 share->have_versioning= 1;
1099 share->row_is_visible= _ma_row_visible_transactional_table;
1100 share->lock.get_status= _ma_block_get_status;
1101 share->lock.check_status= _ma_block_check_status;
1102 share->lock.start_trans= _ma_block_start_trans;
1103 /*
1104 We can for the moment only allow multiple concurrent inserts
1105 only if there is no auto-increment key. To lift this restriction
1106 we have to:
1107 - Extend statement base replication to support auto-increment
1108 intervalls.
1109 - Fix that we allocate auto-increment in intervals and that
1110 it's properly reset if the interval was not used
1111 */
1112 share->lock.allow_multiple_concurrent_insert=
1113 share->base.auto_key == 0;
1114 share->lock_restore_status= 0;
1115 }
1116 else
1117 {
1118 share->row_is_visible= _ma_row_visible_non_transactional_table;
1119 share->lock.get_status= _ma_get_status;
1120 share->lock.copy_status= _ma_copy_status;
1121 share->lock.update_status= _ma_update_status;
1122 share->lock.restore_status= _ma_restore_status;
1123 share->lock.check_status= _ma_check_status;
1124 share->lock_restore_status= _ma_restore_status;
1125 }
1126 }
1127 else if (share->now_transactional)
1128 {
1129 DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
1130 share->lock.start_trans= _ma_block_start_trans_no_versioning;
1131 }
1132 }
1133 #ifdef SAFE_MUTEX
1134 if (share->data_file_type == BLOCK_RECORD)
1135 {
1136 /*
1137 We must have internal_lock before bitmap_lock because we call
1138 _ma_flush_table_files() with internal_lock locked.
1139 */
1140 mysql_mutex_lock(&share->intern_lock);
1141 mysql_mutex_lock(&share->bitmap.bitmap_lock);
1142 mysql_mutex_unlock(&share->bitmap.bitmap_lock);
1143 mysql_mutex_unlock(&share->intern_lock);
1144 }
1145 #endif
1146 /*
1147 Memory mapping can only be requested after initializing intern_lock.
1148 */
1149 if (open_flags & HA_OPEN_MMAP)
1150 {
1151 info.s= share;
1152 maria_extra(&info, HA_EXTRA_MMAP, 0);
1153 }
1154 #ifdef WITH_S3_STORAGE_ENGINE
1155 if (s3_client)
1156 {
1157 size_t block_size= share->base.s3_block_size;
1158 s3f.set_option(s3_client, MS3_OPT_BUFFER_CHUNK_SIZE, &block_size);
1159 }
1160 #endif /* WITH_S3_STORAGE_ENGINE */
1161 }
1162 else
1163 {
1164 share= old_info->s;
1165 if (share->data_file_type == BLOCK_RECORD)
1166 data_file= share->bitmap.file.file; /* Only opened once */
1167 }
1168
1169 #ifdef WITH_S3_STORAGE_ENGINE
1170 if (index_header.alloc_ptr)
1171 s3f.free(&index_header);
1172 #endif /* WITH_S3_STORAGE_ENGINE */
1173
1174 if (!(m_info= maria_clone_internal(share, mode, data_file,
1175 internal_table, s3_client)))
1176 goto err;
1177
1178 if (maria_is_crashed(m_info))
1179 DBUG_PRINT("warning", ("table is crashed: changed: %u",
1180 share->state.changed));
1181
1182 if (!internal_table)
1183 mysql_mutex_unlock(&THR_LOCK_maria);
1184
1185 m_info->open_flags= open_flags;
1186 m_info->stack_end_ptr= &my_thread_var->stack_ends_here;
1187 DBUG_PRINT("exit", ("table: %p name: %s",m_info, name));
1188 DBUG_RETURN(m_info);
1189
1190 err:
1191 DBUG_PRINT("error", ("error: %d errpos: %d", my_errno, errpos));
1192 save_errno=my_errno ? my_errno : HA_ERR_END_OF_FILE;
1193 if ((save_errno == HA_ERR_CRASHED) ||
1194 (save_errno == HA_ERR_CRASHED_ON_USAGE) ||
1195 (save_errno == HA_ERR_CRASHED_ON_REPAIR))
1196 {
1197 LEX_STRING tmp_name;
1198 tmp_name.str= (char*) name;
1199 tmp_name.length= strlen(name);
1200 _ma_report_error(save_errno, &tmp_name);
1201 }
1202 switch (errpos) {
1203 case 7:
1204 thr_lock_delete(&share->lock);
1205 /* fall through */
1206 case 6:
1207 /* Avoid mutex test in _ma_bitmap_end() */
1208 share->internal_table= 1;
1209 (*share->once_end)(share);
1210 /* fall through */
1211 case 5:
1212 if (data_file >= 0 && !s3_client)
1213 mysql_file_close(data_file, MYF(0));
1214 if (old_info)
1215 break; /* Don't remove open table */
1216 /* fall through */
1217 case 4:
1218 ma_crypt_free(share);
1219 my_free(share);
1220 /* fall through */
1221 case 3:
1222 my_free(disk_cache);
1223 my_free(share_buff.state.rec_per_key_part);
1224 /* fall through */
1225 case 1:
1226 if (!s3)
1227 mysql_file_close(kfile,MYF(0));
1228 my_free(share_s3);
1229 /* fall through */
1230 case 0:
1231 default:
1232 break;
1233 }
1234 #ifdef WITH_S3_STORAGE_ENGINE
1235 if (s3_client)
1236 s3f.deinit(s3_client);
1237 if (index_header.alloc_ptr)
1238 s3f.free(&index_header);
1239 #endif /* WITH_S3_STORAGE_ENGINE */
1240 if (!internal_table)
1241 mysql_mutex_unlock(&THR_LOCK_maria);
1242 my_errno= save_errno;
1243 DBUG_RETURN (NULL);
1244 } /* maria_open */
1245
1246
1247 /*
1248 Reallocate a buffer, if the current buffer is not large enough
1249 */
1250
_ma_alloc_buffer(uchar ** old_addr,size_t * old_size,size_t new_size,myf flag)1251 my_bool _ma_alloc_buffer(uchar **old_addr, size_t *old_size,
1252 size_t new_size, myf flag)
1253 {
1254 if (*old_size < new_size)
1255 {
1256 uchar *addr;
1257 if (!(addr= (uchar*) my_realloc(PSI_INSTRUMENT_ME, *old_addr, new_size,
1258 MYF(MY_ALLOW_ZERO_PTR | flag))))
1259 return 1;
1260 *old_addr= addr;
1261 *old_size= new_size;
1262 }
1263 return 0;
1264 }
1265
1266
_ma_safe_mul(ulonglong a,ulonglong b)1267 ulonglong _ma_safe_mul(ulonglong a, ulonglong b)
1268 {
1269 ulonglong max_val= ~ (ulonglong) 0; /* my_off_t is unsigned */
1270
1271 if (!a || max_val / a < b)
1272 return max_val;
1273 return a*b;
1274 }
1275
1276 /* Set up functions in structs */
1277
_ma_setup_functions(register MARIA_SHARE * share)1278 void _ma_setup_functions(register MARIA_SHARE *share)
1279 {
1280 share->once_init= maria_once_init_dummy;
1281 share->once_end= maria_once_end_dummy;
1282 share->init= maria_scan_init_dummy;
1283 share->end= maria_scan_end_dummy;
1284 share->scan_init= maria_scan_init_dummy;/* Compat. dummy function */
1285 share->scan_end= maria_scan_end_dummy;/* Compat. dummy function */
1286 share->scan_remember_pos= _ma_def_scan_remember_pos;
1287 share->scan_restore_pos= _ma_def_scan_restore_pos;
1288
1289 share->write_record_init= _ma_write_init_default;
1290 share->write_record_abort= _ma_write_abort_default;
1291 share->keypos_to_recpos= _ma_transparent_recpos;
1292 share->recpos_to_keypos= _ma_transparent_recpos;
1293
1294 switch (share->data_file_type) {
1295 case COMPRESSED_RECORD:
1296 share->read_record= _ma_read_pack_record;
1297 share->scan= _ma_read_rnd_pack_record;
1298 share->once_init= _ma_once_init_pack_row;
1299 share->once_end= _ma_once_end_pack_row;
1300 /*
1301 Calculate checksum according to data in the original, not compressed,
1302 row.
1303 */
1304 if (share->state.header.org_data_file_type == STATIC_RECORD &&
1305 ! (share->options & HA_OPTION_NULL_FIELDS))
1306 share->calc_checksum= _ma_static_checksum;
1307 else
1308 share->calc_checksum= _ma_checksum;
1309 share->calc_write_checksum= share->calc_checksum;
1310 break;
1311 case DYNAMIC_RECORD:
1312 share->read_record= _ma_read_dynamic_record;
1313 share->scan= _ma_read_rnd_dynamic_record;
1314 share->delete_record= _ma_delete_dynamic_record;
1315 share->compare_record= _ma_cmp_dynamic_record;
1316 share->compare_unique= _ma_cmp_dynamic_unique;
1317 share->calc_checksum= share->calc_write_checksum= _ma_checksum;
1318 if (share->base.blobs)
1319 {
1320 share->update_record= _ma_update_blob_record;
1321 share->write_record= _ma_write_blob_record;
1322 }
1323 else
1324 {
1325 share->write_record= _ma_write_dynamic_record;
1326 share->update_record= _ma_update_dynamic_record;
1327 }
1328 break;
1329 case STATIC_RECORD:
1330 share->read_record= _ma_read_static_record;
1331 share->scan= _ma_read_rnd_static_record;
1332 share->delete_record= _ma_delete_static_record;
1333 share->compare_record= _ma_cmp_static_record;
1334 share->update_record= _ma_update_static_record;
1335 share->write_record= _ma_write_static_record;
1336 share->compare_unique= _ma_cmp_static_unique;
1337 share->keypos_to_recpos= _ma_static_keypos_to_recpos;
1338 share->recpos_to_keypos= _ma_static_recpos_to_keypos;
1339 if (share->state.header.org_data_file_type == STATIC_RECORD &&
1340 ! (share->options & HA_OPTION_NULL_FIELDS))
1341 share->calc_checksum= _ma_static_checksum;
1342 else
1343 share->calc_checksum= _ma_checksum;
1344 break;
1345 case NO_RECORD:
1346 share->read_record= _ma_read_no_record;
1347 share->scan= _ma_read_rnd_no_record;
1348 share->delete_record= _ma_delete_no_record;
1349 share->update_record= _ma_update_no_record;
1350 share->write_record= _ma_write_no_record;
1351 share->recpos_to_keypos= _ma_no_keypos_to_recpos;
1352 share->keypos_to_recpos= _ma_no_keypos_to_recpos;
1353
1354 /* Abort if following functions are called */
1355 share->compare_record= 0;
1356 share->compare_unique= 0;
1357 share->calc_checksum= 0;
1358 break;
1359 case BLOCK_RECORD:
1360 share->once_init= _ma_once_init_block_record;
1361 share->once_end= _ma_once_end_block_record;
1362 share->init= _ma_init_block_record;
1363 share->end= _ma_end_block_record;
1364 share->write_record_init= _ma_write_init_block_record;
1365 share->write_record_abort= _ma_write_abort_block_record;
1366 share->scan_init= _ma_scan_init_block_record;
1367 share->scan_end= _ma_scan_end_block_record;
1368 share->scan= _ma_scan_block_record;
1369 share->scan_remember_pos= _ma_scan_remember_block_record;
1370 share->scan_restore_pos= _ma_scan_restore_block_record;
1371 share->read_record= _ma_read_block_record;
1372 share->delete_record= _ma_delete_block_record;
1373 share->compare_record= _ma_compare_block_record;
1374 share->update_record= _ma_update_block_record;
1375 share->write_record= _ma_write_block_record;
1376 share->compare_unique= _ma_cmp_block_unique;
1377 share->calc_checksum= _ma_checksum;
1378 share->keypos_to_recpos= _ma_transaction_keypos_to_recpos;
1379 share->recpos_to_keypos= _ma_transaction_recpos_to_keypos;
1380
1381 /*
1382 write_block_record() will calculate the checksum; Tell maria_write()
1383 that it doesn't have to do this.
1384 */
1385 share->calc_write_checksum= 0;
1386 break;
1387 }
1388 share->file_read= _ma_nommap_pread;
1389 share->file_write= _ma_nommap_pwrite;
1390 share->calc_check_checksum= share->calc_checksum;
1391
1392 if (!(share->options & HA_OPTION_CHECKSUM) &&
1393 share->data_file_type != COMPRESSED_RECORD)
1394 share->calc_checksum= share->calc_write_checksum= 0;
1395 return;
1396 }
1397
1398
setup_key_functions(register MARIA_KEYDEF * keyinfo)1399 static void setup_key_functions(register MARIA_KEYDEF *keyinfo)
1400 {
1401 if (keyinfo->key_alg == HA_KEY_ALG_RTREE)
1402 {
1403 #ifdef HAVE_RTREE_KEYS
1404 keyinfo->ck_insert = maria_rtree_insert;
1405 keyinfo->ck_delete = maria_rtree_delete;
1406 #else
1407 DBUG_ASSERT(0); /* maria_open should check it never happens */
1408 #endif
1409 }
1410 else
1411 {
1412 keyinfo->ck_insert = _ma_ck_write;
1413 keyinfo->ck_delete = _ma_ck_delete;
1414 }
1415 if (keyinfo->flag & HA_SPATIAL)
1416 keyinfo->make_key= _ma_sp_make_key;
1417 else
1418 keyinfo->make_key= _ma_make_key;
1419
1420 if (keyinfo->flag & HA_BINARY_PACK_KEY)
1421 { /* Simple prefix compression */
1422 keyinfo->bin_search= _ma_seq_search;
1423 keyinfo->get_key= _ma_get_binary_pack_key;
1424 keyinfo->skip_key= _ma_skip_binary_pack_key;
1425 keyinfo->pack_key= _ma_calc_bin_pack_key_length;
1426 keyinfo->store_key= _ma_store_bin_pack_key;
1427 }
1428 else if (keyinfo->flag & HA_VAR_LENGTH_KEY)
1429 {
1430 keyinfo->get_key= _ma_get_pack_key;
1431 keyinfo->skip_key= _ma_skip_pack_key;
1432 if (keyinfo->seg[0].flag & HA_PACK_KEY)
1433 { /* Prefix compression */
1434 /*
1435 _ma_prefix_search() compares end-space against ASCII blank (' ').
1436 It cannot be used for character sets, that do not encode the
1437 blank character like ASCII does. UCS2 is an example. All
1438 character sets with a fixed width > 1 or a mimimum width > 1
1439 cannot represent blank like ASCII does. In these cases we have
1440 to use _ma_seq_search() for the search.
1441 */
1442 if (!keyinfo->seg->charset || use_strnxfrm(keyinfo->seg->charset) ||
1443 (keyinfo->seg->flag & HA_NULL_PART) ||
1444 keyinfo->seg->charset->mbminlen > 1)
1445 keyinfo->bin_search= _ma_seq_search;
1446 else
1447 keyinfo->bin_search= _ma_prefix_search;
1448 keyinfo->pack_key= _ma_calc_var_pack_key_length;
1449 keyinfo->store_key= _ma_store_var_pack_key;
1450 }
1451 else
1452 {
1453 keyinfo->bin_search= _ma_seq_search;
1454 keyinfo->pack_key= _ma_calc_var_key_length; /* Variable length key */
1455 keyinfo->store_key= _ma_store_static_key;
1456 }
1457 }
1458 else
1459 {
1460 keyinfo->bin_search= _ma_bin_search;
1461 keyinfo->get_key= _ma_get_static_key;
1462 keyinfo->skip_key= _ma_skip_static_key;
1463 keyinfo->pack_key= _ma_calc_static_key_length;
1464 keyinfo->store_key= _ma_store_static_key;
1465 }
1466
1467 /* set keyinfo->write_comp_flag */
1468 if (keyinfo->flag & HA_SORT_ALLOWS_SAME)
1469 keyinfo->write_comp_flag=SEARCH_BIGGER; /* Put after same key */
1470 else if (keyinfo->flag & ( HA_NOSAME | HA_FULLTEXT))
1471 {
1472 keyinfo->write_comp_flag= SEARCH_FIND | SEARCH_UPDATE; /* No duplicates */
1473 if (keyinfo->flag & HA_NULL_ARE_EQUAL)
1474 keyinfo->write_comp_flag|= SEARCH_NULL_ARE_EQUAL;
1475 }
1476 else
1477 keyinfo->write_comp_flag= SEARCH_SAME; /* Keys in rec-pos order */
1478 keyinfo->write_comp_flag|= SEARCH_INSERT;
1479 return;
1480 }
1481
1482
1483 /**
1484 @brief Function to save and store the header in the index file (.MAI)
1485
1486 Operates under MARIA_SHARE::intern_lock if requested.
1487 Sets MARIA_SHARE::MARIA_STATE_INFO::is_of_horizon if transactional table.
1488 Then calls _ma_state_info_write_sub().
1489
1490 @param share table
1491 @param pWrite bitmap: if 1 (MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET)
1492 is set my_pwrite() is used otherwise my_write();
1493 if 2 (MA_STATE_INFO_WRITE_FULL_INFO) is set, info
1494 about keys is written (should only be needed
1495 after ALTER TABLE ENABLE/DISABLE KEYS, and
1496 REPAIR/OPTIMIZE); if 4 (MA_STATE_INFO_WRITE_LOCK)
1497 is set, MARIA_SHARE::intern_lock is taken.
1498
1499 @return Operation status
1500 @retval 0 OK
1501 @retval 1 Error
1502 */
1503
_ma_state_info_write(MARIA_SHARE * share,uint pWrite)1504 uint _ma_state_info_write(MARIA_SHARE *share, uint pWrite)
1505 {
1506 uint res;
1507 if (share->options & HA_OPTION_READ_ONLY_DATA)
1508 return 0;
1509
1510 if (pWrite & MA_STATE_INFO_WRITE_LOCK)
1511 mysql_mutex_lock(&share->intern_lock);
1512 else if (maria_multi_threaded && !share->temporary)
1513 mysql_mutex_assert_owner(&share->intern_lock);
1514 if (share->base.born_transactional && translog_status == TRANSLOG_OK &&
1515 !maria_in_recovery)
1516 {
1517 /*
1518 In a recovery, we want to set is_of_horizon to the LSN of the last
1519 record executed by Recovery, not the current EOF of the log (which
1520 is too new). Recovery does it by itself.
1521 */
1522 share->state.is_of_horizon= translog_get_horizon();
1523 DBUG_PRINT("info", ("is_of_horizon set to LSN " LSN_FMT "",
1524 LSN_IN_PARTS(share->state.is_of_horizon)));
1525 }
1526 res= _ma_state_info_write_sub(share->kfile.file, &share->state, pWrite);
1527 if (pWrite & MA_STATE_INFO_WRITE_LOCK)
1528 mysql_mutex_unlock(&share->intern_lock);
1529 /* If open_count != 0 we have to write the state again at close */
1530 share->changed= share->state.open_count != 0;
1531 return res;
1532 }
1533
1534
1535 /**
1536 @brief Function to save and store the header in the index file (.MYI).
1537
1538 Shortcut to use instead of _ma_state_info_write() when appropriate.
1539
1540 @param file descriptor of the index file to write
1541 @param state state information to write to the file
1542 @param pWrite bitmap: if 1 (MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET)
1543 is set my_pwrite() is used otherwise my_write();
1544 if 2 (MA_STATE_INFO_WRITE_FULL_INFO) is set, info
1545 about keys is written (should only be needed
1546 after ALTER TABLE ENABLE/DISABLE KEYS, and
1547 REPAIR/OPTIMIZE).
1548
1549 @notes
1550 For transactional multiuser tables, this function is called
1551 with intern_lock & translog_lock or when the last thread who
1552 is using the table is closing it.
1553 Because of the translog_lock we don't need to have a lock on
1554 key_del_lock.
1555
1556 @return Operation status
1557 @retval 0 OK
1558 @retval 1 Error
1559 */
1560
_ma_state_info_write_sub(File file,MARIA_STATE_INFO * state,uint pWrite)1561 uint _ma_state_info_write_sub(File file, MARIA_STATE_INFO *state, uint pWrite)
1562 {
1563 uchar buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE];
1564 uchar *ptr=buff;
1565 uint i, keys= (uint) state->header.keys;
1566 size_t res;
1567 DBUG_ENTER("_ma_state_info_write_sub");
1568 DBUG_PRINT("info", ("Records: %lld", state->state.records));
1569
1570 memcpy(ptr,&state->header,sizeof(state->header));
1571 ptr+=sizeof(state->header);
1572
1573 /* open_count must be first because of _ma_mark_file_changed ! */
1574 mi_int2store(ptr,state->open_count); ptr+= 2;
1575 /* changed must be second, because of _ma_mark_file_crashed */
1576 mi_int2store(ptr,state->changed); ptr+= 2;
1577
1578 /*
1579 If you change the offset of these LSNs, note that some functions do a
1580 direct write of them without going through this function.
1581 */
1582 lsn_store(ptr, state->create_rename_lsn); ptr+= LSN_STORE_SIZE;
1583 lsn_store(ptr, state->is_of_horizon); ptr+= LSN_STORE_SIZE;
1584 lsn_store(ptr, state->skip_redo_lsn); ptr+= LSN_STORE_SIZE;
1585 mi_rowstore(ptr,state->state.records); ptr+= 8;
1586 mi_rowstore(ptr,state->state.del); ptr+= 8;
1587 mi_rowstore(ptr,state->split); ptr+= 8;
1588 mi_sizestore(ptr,state->dellink); ptr+= 8;
1589 mi_sizestore(ptr,state->first_bitmap_with_space); ptr+= 8;
1590 mi_sizestore(ptr,state->state.key_file_length); ptr+= 8;
1591 mi_sizestore(ptr,state->state.data_file_length); ptr+= 8;
1592 mi_sizestore(ptr,state->state.empty); ptr+= 8;
1593 mi_sizestore(ptr,state->state.key_empty); ptr+= 8;
1594 mi_int8store(ptr,state->auto_increment); ptr+= 8;
1595 mi_int8store(ptr,(ulonglong) state->state.checksum); ptr+= 8;
1596 mi_int8store(ptr,state->create_trid); ptr+= 8;
1597 mi_int4store(ptr,state->status); ptr+= 4;
1598 mi_int4store(ptr,state->update_count); ptr+= 4;
1599 *ptr++= state->sortkey;
1600 *ptr++= 0; /* Reserved */
1601 ptr+= state->state_diff_length;
1602
1603 for (i=0; i < keys; i++)
1604 {
1605 mi_sizestore(ptr,state->key_root[i]); ptr+= 8;
1606 }
1607 mi_sizestore(ptr,state->key_del); ptr+= 8;
1608 if (pWrite & MA_STATE_INFO_WRITE_FULL_INFO) /* From maria_chk */
1609 {
1610 uint key_parts= mi_uint2korr(state->header.key_parts);
1611 mi_int4store(ptr,state->sec_index_changed); ptr+= 4;
1612 mi_int4store(ptr,state->sec_index_used); ptr+= 4;
1613 mi_int4store(ptr,state->version); ptr+= 4;
1614 mi_int8store(ptr,state->key_map); ptr+= 8;
1615 mi_int8store(ptr,(ulonglong) state->create_time); ptr+= 8;
1616 mi_int8store(ptr,(ulonglong) state->recover_time); ptr+= 8;
1617 mi_int8store(ptr,(ulonglong) state->check_time); ptr+= 8;
1618 mi_sizestore(ptr, state->records_at_analyze); ptr+= 8;
1619 /* reserve place for some information per key */
1620 bzero(ptr, keys*4); ptr+= keys*4;
1621 for (i=0 ; i < key_parts ; i++)
1622 {
1623 float8store(ptr, state->rec_per_key_part[i]); ptr+= 8;
1624 mi_int4store(ptr, state->nulls_per_key_part[i]); ptr+= 4;
1625 }
1626 }
1627
1628 res= (pWrite & MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET) ?
1629 mysql_file_pwrite(file, buff, (size_t) (ptr-buff), 0L,
1630 MYF(MY_NABP | MY_THREADSAFE)) :
1631 mysql_file_write(file, buff, (size_t) (ptr-buff),
1632 MYF(MY_NABP));
1633 DBUG_RETURN(res != 0);
1634 }
1635
1636
_ma_state_info_read(uchar * ptr,MARIA_STATE_INFO * state,myf flag)1637 static uchar *_ma_state_info_read(uchar *ptr, MARIA_STATE_INFO *state, myf flag)
1638 {
1639 uint i,keys,key_parts;
1640 DBUG_ENTER("_ma_state_info_read");
1641
1642 memcpy(&state->header,ptr, sizeof(state->header));
1643 ptr+= sizeof(state->header);
1644 keys= (uint) state->header.keys;
1645 key_parts= mi_uint2korr(state->header.key_parts);
1646
1647 /* Allocate memory for key parts if not already done */
1648 if (!state->rec_per_key_part &&
1649 !my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME | flag),
1650 &state->rec_per_key_part,
1651 sizeof(*state->rec_per_key_part) * key_parts,
1652 &state->nulls_per_key_part,
1653 sizeof(*state->nulls_per_key_part) * key_parts,
1654 NullS))
1655 DBUG_RETURN(0);
1656
1657 state->open_count = mi_uint2korr(ptr); ptr+= 2;
1658 state->changed= mi_uint2korr(ptr); ptr+= 2;
1659 state->create_rename_lsn= lsn_korr(ptr); ptr+= LSN_STORE_SIZE;
1660 state->is_of_horizon= lsn_korr(ptr); ptr+= LSN_STORE_SIZE;
1661 state->skip_redo_lsn= lsn_korr(ptr); ptr+= LSN_STORE_SIZE;
1662 state->state.records= mi_rowkorr(ptr); ptr+= 8;
1663 state->state.del = mi_rowkorr(ptr); ptr+= 8;
1664 state->split = mi_rowkorr(ptr); ptr+= 8;
1665 state->dellink= mi_sizekorr(ptr); ptr+= 8;
1666 state->first_bitmap_with_space= mi_sizekorr(ptr); ptr+= 8;
1667 state->state.key_file_length = mi_sizekorr(ptr); ptr+= 8;
1668 state->state.data_file_length= mi_sizekorr(ptr); ptr+= 8;
1669 state->state.empty = mi_sizekorr(ptr); ptr+= 8;
1670 state->state.key_empty= mi_sizekorr(ptr); ptr+= 8;
1671 state->auto_increment=mi_uint8korr(ptr); ptr+= 8;
1672 state->state.checksum=(ha_checksum) mi_uint8korr(ptr);ptr+= 8;
1673 state->create_trid= mi_uint8korr(ptr); ptr+= 8;
1674 state->status = mi_uint4korr(ptr); ptr+= 4;
1675 state->update_count=mi_uint4korr(ptr); ptr+= 4;
1676 state->sortkey= (uint) *ptr++;
1677 ptr++; /* reserved */
1678
1679 ptr+= state->state_diff_length;
1680
1681 for (i=0; i < keys; i++)
1682 {
1683 state->key_root[i]= mi_sizekorr(ptr); ptr+= 8;
1684 }
1685 state->key_del= mi_sizekorr(ptr); ptr+= 8;
1686 state->sec_index_changed = mi_uint4korr(ptr); ptr+= 4;
1687 state->sec_index_used = mi_uint4korr(ptr); ptr+= 4;
1688 state->version = mi_uint4korr(ptr); ptr+= 4;
1689 state->key_map = mi_uint8korr(ptr); ptr+= 8;
1690 state->create_time = (time_t) mi_sizekorr(ptr); ptr+= 8;
1691 state->recover_time =(time_t) mi_sizekorr(ptr); ptr+= 8;
1692 state->check_time = (time_t) mi_sizekorr(ptr); ptr+= 8;
1693 state->records_at_analyze= mi_sizekorr(ptr); ptr+= 8;
1694 ptr+= keys * 4; /* Skip reserved bytes */
1695 for (i=0 ; i < key_parts ; i++)
1696 {
1697 float8get(state->rec_per_key_part[i], ptr); ptr+= 8;
1698 state->nulls_per_key_part[i]= mi_uint4korr(ptr); ptr+= 4;
1699 }
1700
1701 DBUG_PRINT("info", ("Records: %lld", state->state.records));
1702 DBUG_RETURN(ptr);
1703 }
1704
1705
1706 /**
1707 @brief Fills the state by reading its copy on disk.
1708
1709 Should not be called for transactional tables, as their state on disk is
1710 rarely current and so is often misleading for a reader.
1711 Does nothing in single user mode.
1712
1713 @param file file to read from
1714 @param state state which will be filled
1715 */
1716
_ma_state_info_read_dsk(File file,MARIA_STATE_INFO * state)1717 uint _ma_state_info_read_dsk(File file __attribute__((unused)),
1718 MARIA_STATE_INFO *state __attribute__((unused)))
1719 {
1720 #ifdef MARIA_EXTERNAL_LOCKING
1721 uchar buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE];
1722
1723 /* trick to detect transactional tables */
1724 DBUG_ASSERT(state->create_rename_lsn == LSN_IMPOSSIBLE);
1725 if (!maria_single_user)
1726 {
1727 if (mysql_file_pread(file, buff, state->state_length, 0L, MYF(MY_NABP)))
1728 return 1;
1729 _ma_state_info_read(buff, state);
1730 }
1731 #endif
1732 return 0;
1733 }
1734
1735
1736 /****************************************************************************
1737 ** store MARIA_BASE_INFO
1738 ****************************************************************************/
1739
_ma_base_info_write(File file,MARIA_BASE_INFO * base)1740 uint _ma_base_info_write(File file, MARIA_BASE_INFO *base)
1741 {
1742 uchar buff[MARIA_BASE_INFO_SIZE], *ptr=buff;
1743
1744 bmove(ptr, maria_uuid, MY_UUID_SIZE);
1745 ptr+= MY_UUID_SIZE;
1746 mi_sizestore(ptr,base->keystart); ptr+= 8;
1747 mi_sizestore(ptr,base->max_data_file_length); ptr+= 8;
1748 mi_sizestore(ptr,base->max_key_file_length); ptr+= 8;
1749 mi_rowstore(ptr,base->records); ptr+= 8;
1750 mi_rowstore(ptr,base->reloc); ptr+= 8;
1751 mi_int4store(ptr,base->mean_row_length); ptr+= 4;
1752 mi_int4store(ptr,base->reclength); ptr+= 4;
1753 mi_int4store(ptr,base->pack_reclength); ptr+= 4;
1754 mi_int4store(ptr,base->min_pack_length); ptr+= 4;
1755 mi_int4store(ptr,base->max_pack_length); ptr+= 4;
1756 mi_int4store(ptr,base->min_block_length); ptr+= 4;
1757 mi_int2store(ptr,base->fields); ptr+= 2;
1758 mi_int2store(ptr,base->fixed_not_null_fields); ptr+= 2;
1759 mi_int2store(ptr,base->fixed_not_null_fields_length); ptr+= 2;
1760 mi_int2store(ptr,base->max_field_lengths); ptr+= 2;
1761 mi_int2store(ptr,base->pack_fields); ptr+= 2;
1762 mi_int2store(ptr,base->extra_options) ptr+= 2;
1763 mi_int2store(ptr,base->null_bytes); ptr+= 2;
1764 mi_int2store(ptr,base->original_null_bytes); ptr+= 2;
1765 mi_int2store(ptr,base->field_offsets); ptr+= 2;
1766 mi_int2store(ptr,base->language); ptr+= 2;
1767 mi_int2store(ptr,base->block_size); ptr+= 2;
1768 *ptr++= base->rec_reflength;
1769 *ptr++= base->key_reflength;
1770 *ptr++= base->keys;
1771 *ptr++= base->auto_key;
1772 *ptr++= base->born_transactional;
1773 *ptr++= base->compression_algorithm;
1774 mi_int2store(ptr,base->pack_bytes); ptr+= 2;
1775 mi_int2store(ptr,base->blobs); ptr+= 2;
1776 mi_int2store(ptr,base->max_key_block_length); ptr+= 2;
1777 mi_int2store(ptr,base->max_key_length); ptr+= 2;
1778 mi_int2store(ptr,base->extra_alloc_bytes); ptr+= 2;
1779 *ptr++= base->extra_alloc_procent;
1780 mi_int3store(ptr, base->s3_block_size); ptr+= 3;
1781 bzero(ptr,13); ptr+= 13; /* extra */
1782 DBUG_ASSERT((ptr - buff) == MARIA_BASE_INFO_SIZE);
1783 return mysql_file_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
1784 }
1785
1786
1787 /*--------------------------------------------------------------------------
1788 maria_keydef
1789 ---------------------------------------------------------------------------*/
1790
_ma_keydef_write(File file,MARIA_KEYDEF * keydef)1791 my_bool _ma_keydef_write(File file, MARIA_KEYDEF *keydef)
1792 {
1793 uchar buff[MARIA_KEYDEF_SIZE];
1794 uchar *ptr=buff;
1795
1796 *ptr++= (uchar) keydef->keysegs;
1797 *ptr++= keydef->key_alg; /* Rtree or Btree */
1798 mi_int2store(ptr,keydef->flag); ptr+= 2;
1799 mi_int2store(ptr,keydef->block_length); ptr+= 2;
1800 mi_int2store(ptr,keydef->keylength); ptr+= 2;
1801 mi_int2store(ptr,keydef->minlength); ptr+= 2;
1802 mi_int2store(ptr,keydef->maxlength); ptr+= 2;
1803 return mysql_file_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
1804 }
1805
_ma_keydef_read(uchar * ptr,MARIA_KEYDEF * keydef)1806 uchar *_ma_keydef_read(uchar *ptr, MARIA_KEYDEF *keydef)
1807 {
1808 keydef->keysegs = (uint) *ptr++;
1809 keydef->key_alg = *ptr++; /* Rtree or Btree */
1810
1811 keydef->flag = mi_uint2korr(ptr); ptr+= 2;
1812 keydef->block_length = mi_uint2korr(ptr); ptr+= 2;
1813 keydef->keylength = mi_uint2korr(ptr); ptr+= 2;
1814 keydef->minlength = mi_uint2korr(ptr); ptr+= 2;
1815 keydef->maxlength = mi_uint2korr(ptr); ptr+= 2;
1816 keydef->version = 0; /* Not saved */
1817 keydef->parser = &ft_default_parser;
1818 keydef->ftkey_nr = 0;
1819 return ptr;
1820 }
1821
1822 /***************************************************************************
1823 ** maria_keyseg
1824 ***************************************************************************/
1825
_ma_keyseg_write(File file,const HA_KEYSEG * keyseg)1826 my_bool _ma_keyseg_write(File file, const HA_KEYSEG *keyseg)
1827 {
1828 uchar buff[HA_KEYSEG_SIZE];
1829 uchar *ptr=buff;
1830 ulong pos;
1831
1832 *ptr++= keyseg->type;
1833 *ptr++= keyseg->language & 0xFF; /* Collation ID, low byte */
1834 *ptr++= keyseg->null_bit;
1835 *ptr++= keyseg->bit_start;
1836 *ptr++= keyseg->language >> 8; /* Collation ID, high byte */
1837 *ptr++= keyseg->bit_length;
1838 mi_int2store(ptr,keyseg->flag); ptr+= 2;
1839 mi_int2store(ptr,keyseg->length); ptr+= 2;
1840 mi_int4store(ptr,keyseg->start); ptr+= 4;
1841 pos= keyseg->null_bit ? keyseg->null_pos : keyseg->bit_pos;
1842 mi_int4store(ptr, pos);
1843 ptr+=4;
1844
1845 return mysql_file_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
1846 }
1847
1848
_ma_keyseg_read(uchar * ptr,HA_KEYSEG * keyseg)1849 uchar *_ma_keyseg_read(uchar *ptr, HA_KEYSEG *keyseg)
1850 {
1851 keyseg->type = *ptr++;
1852 keyseg->language = *ptr++;
1853 keyseg->null_bit = *ptr++;
1854 keyseg->bit_start = *ptr++;
1855 keyseg->language += ((uint16) (*ptr++)) << 8;
1856 keyseg->bit_length = *ptr++;
1857 keyseg->flag = mi_uint2korr(ptr); ptr+= 2;
1858 keyseg->length = mi_uint2korr(ptr); ptr+= 2;
1859 keyseg->start = mi_uint4korr(ptr); ptr+= 4;
1860 keyseg->null_pos = mi_uint4korr(ptr); ptr+= 4;
1861 keyseg->charset=0; /* Will be filled in later */
1862 if (keyseg->null_bit)
1863 keyseg->bit_pos= (uint16)(keyseg->null_pos + (keyseg->null_bit == 7));
1864 else
1865 {
1866 keyseg->bit_pos= (uint16)keyseg->null_pos;
1867 keyseg->null_pos= 0;
1868 }
1869 return ptr;
1870 }
1871
1872 /*--------------------------------------------------------------------------
1873 maria_uniquedef
1874 ---------------------------------------------------------------------------*/
1875
_ma_uniquedef_write(File file,MARIA_UNIQUEDEF * def)1876 my_bool _ma_uniquedef_write(File file, MARIA_UNIQUEDEF *def)
1877 {
1878 uchar buff[MARIA_UNIQUEDEF_SIZE];
1879 uchar *ptr=buff;
1880
1881 mi_int2store(ptr,def->keysegs); ptr+=2;
1882 *ptr++= (uchar) def->key;
1883 *ptr++ = (uchar) def->null_are_equal;
1884
1885 return mysql_file_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
1886 }
1887
_ma_uniquedef_read(uchar * ptr,MARIA_UNIQUEDEF * def)1888 uchar *_ma_uniquedef_read(uchar *ptr, MARIA_UNIQUEDEF *def)
1889 {
1890 def->keysegs = mi_uint2korr(ptr);
1891 def->key = ptr[2];
1892 def->null_are_equal=ptr[3];
1893 return ptr+4; /* 1 extra uchar */
1894 }
1895
1896 /***************************************************************************
1897 ** MARIA_COLUMNDEF
1898 ***************************************************************************/
1899
_ma_columndef_write(File file,MARIA_COLUMNDEF * columndef)1900 my_bool _ma_columndef_write(File file, MARIA_COLUMNDEF *columndef)
1901 {
1902 uchar buff[MARIA_COLUMNDEF_SIZE];
1903 uchar *ptr=buff;
1904 uint low_offset= (uint) (columndef->offset & 0xffff);
1905 uint high_offset= (uint) (columndef->offset >> 16);
1906
1907 mi_int2store(ptr,(ulong) columndef->column_nr); ptr+= 2;
1908 mi_int2store(ptr, low_offset); ptr+= 2;
1909 mi_int2store(ptr,columndef->type); ptr+= 2;
1910 mi_int2store(ptr,columndef->length); ptr+= 2;
1911 mi_int2store(ptr,columndef->fill_length); ptr+= 2;
1912 mi_int2store(ptr,columndef->null_pos); ptr+= 2;
1913 mi_int2store(ptr,columndef->empty_pos); ptr+= 2;
1914
1915 (*ptr++)= columndef->null_bit;
1916 (*ptr++)= columndef->empty_bit;
1917 mi_int2store(ptr, high_offset); ptr+= 2;
1918 ptr[0]= ptr[1]= 0; ptr+= 2; /* For future */
1919 return mysql_file_write(file, buff, (size_t) (ptr-buff), MYF(MY_NABP)) != 0;
1920 }
1921
_ma_columndef_read(uchar * ptr,MARIA_COLUMNDEF * columndef)1922 uchar *_ma_columndef_read(uchar *ptr, MARIA_COLUMNDEF *columndef)
1923 {
1924 uint high_offset;
1925 columndef->column_nr= mi_uint2korr(ptr); ptr+= 2;
1926 columndef->offset= mi_uint2korr(ptr); ptr+= 2;
1927 columndef->type= mi_sint2korr(ptr); ptr+= 2;
1928 columndef->length= mi_uint2korr(ptr); ptr+= 2;
1929 columndef->fill_length= mi_uint2korr(ptr); ptr+= 2;
1930 columndef->null_pos= mi_uint2korr(ptr); ptr+= 2;
1931 columndef->empty_pos= mi_uint2korr(ptr); ptr+= 2;
1932 columndef->null_bit= (uint8) *ptr++;
1933 columndef->empty_bit= (uint8) *ptr++;
1934 high_offset= mi_uint2korr(ptr); ptr+= 2;
1935 columndef->offset|= ((ulong) high_offset << 16);
1936 ptr+= 2;
1937 return ptr;
1938 }
1939
_ma_column_nr_write(File file,uint16 * offsets,uint columns)1940 my_bool _ma_column_nr_write(File file, uint16 *offsets, uint columns)
1941 {
1942 uchar *buff, *ptr, *end;
1943 size_t size= columns*2;
1944 my_bool res;
1945
1946 if (!(buff= (uchar*) my_alloca(size)))
1947 return 1;
1948 for (ptr= buff, end= ptr + size; ptr < end ; ptr+= 2, offsets++)
1949 int2store(ptr, *offsets);
1950 res= mysql_file_write(file, buff, size, MYF(MY_NABP)) != 0;
1951 my_afree(buff);
1952 return res;
1953 }
1954
1955
_ma_column_nr_read(uchar * ptr,uint16 * offsets,uint columns)1956 uchar *_ma_column_nr_read(uchar *ptr, uint16 *offsets, uint columns)
1957 {
1958 uchar *end;
1959 size_t size= columns*2;
1960 for (end= ptr + size; ptr < end ; ptr+=2, offsets++)
1961 *offsets= uint2korr(ptr);
1962 return ptr;
1963 }
1964
1965 /**
1966 @brief Set callbacks for data pages
1967
1968 @note
1969 We don't use pagecache_file_init here, as we want to keep the
1970 code readable
1971 */
1972
_ma_set_data_pagecache_callbacks(PAGECACHE_FILE * file,MARIA_SHARE * share)1973 void _ma_set_data_pagecache_callbacks(PAGECACHE_FILE *file,
1974 MARIA_SHARE *share)
1975 {
1976 pagecache_file_set_null_hooks(file);
1977 file->callback_data= (uchar*) share;
1978 file->flush_log_callback= &maria_flush_log_for_page_none; /* Do nothing */
1979 file->post_write_hook= maria_page_write_failure;
1980
1981 if (share->temporary)
1982 {
1983 file->post_read_hook= &maria_page_crc_check_none;
1984 file->pre_write_hook= &maria_page_filler_set_none;
1985 }
1986 else
1987 {
1988 file->post_read_hook= &maria_page_crc_check_data;
1989 if (share->options & HA_OPTION_PAGE_CHECKSUM)
1990 file->pre_write_hook= &maria_page_crc_set_normal;
1991 else
1992 file->pre_write_hook= &maria_page_filler_set_normal;
1993 if (share->now_transactional)
1994 file->flush_log_callback= maria_flush_log_for_page;
1995 }
1996
1997 if (MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_ENCRYPTED))
1998 {
1999 ma_crypt_set_data_pagecache_callbacks(file, share);
2000 }
2001 }
2002
2003
2004 /**
2005 @brief Set callbacks for index pages
2006
2007 @note
2008 We don't use pagecache_file_init here, as we want to keep the
2009 code readable
2010 */
2011
_ma_set_index_pagecache_callbacks(PAGECACHE_FILE * file,MARIA_SHARE * share)2012 void _ma_set_index_pagecache_callbacks(PAGECACHE_FILE *file,
2013 MARIA_SHARE *share)
2014 {
2015 pagecache_file_set_null_hooks(file);
2016 file->callback_data= (uchar*) share;
2017 file->flush_log_callback= &maria_flush_log_for_page_none; /* Do nothing */
2018 file->post_write_hook= maria_page_write_failure;
2019
2020 if (share->temporary)
2021 {
2022 file->post_read_hook= &maria_page_crc_check_none;
2023 file->pre_write_hook= &maria_page_filler_set_none;
2024 }
2025 else
2026 {
2027 file->post_read_hook= &maria_page_crc_check_index;
2028 if (share->options & HA_OPTION_PAGE_CHECKSUM)
2029 file->pre_write_hook= &maria_page_crc_set_index;
2030 else
2031 file->pre_write_hook= &maria_page_filler_set_normal;
2032
2033 if (share->now_transactional)
2034 file->flush_log_callback= maria_flush_log_for_page;
2035 }
2036
2037 if (MY_TEST(share->base.extra_options & MA_EXTRA_OPTIONS_ENCRYPTED))
2038 {
2039 ma_crypt_set_index_pagecache_callbacks(file, share);
2040 }
2041 }
2042
2043
2044 /**************************************************************************
2045 Open data file
2046 We can't use dup() here as the data file descriptors need to have different
2047 active seek-positions.
2048 *************************************************************************/
2049
_ma_open_datafile(MARIA_HA * info,MARIA_SHARE * share)2050 int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share)
2051 {
2052 myf flags= (share->mode & O_NOFOLLOW) ? MY_NOSYMLINKS | MY_WME : MY_WME;
2053 if (share->temporary)
2054 flags|= MY_THREAD_SPECIFIC;
2055 DEBUG_SYNC_C("mi_open_datafile");
2056 info->dfile.file= share->bitmap.file.file=
2057 mysql_file_open(key_file_dfile, share->data_file_name.str,
2058 share->mode | O_SHARE | O_CLOEXEC, flags);
2059 return info->dfile.file >= 0 ? 0 : 1;
2060 }
2061
2062
_ma_open_keyfile(MARIA_SHARE * share)2063 int _ma_open_keyfile(MARIA_SHARE *share)
2064 {
2065 /*
2066 Modifications to share->kfile should be under intern_lock to protect
2067 against a concurrent checkpoint.
2068 */
2069 mysql_mutex_lock(&share->intern_lock);
2070 share->kfile.file= mysql_file_open(key_file_kfile,
2071 share->unique_file_name.str,
2072 share->mode | O_SHARE | O_NOFOLLOW | O_CLOEXEC,
2073 MYF(MY_WME | MY_NOSYMLINKS));
2074 mysql_mutex_unlock(&share->intern_lock);
2075 return (share->kfile.file < 0);
2076 }
2077
2078
2079 /*
2080 Disable all indexes.
2081
2082 SYNOPSIS
2083 maria_disable_indexes()
2084 info A pointer to the MARIA storage engine MARIA_HA struct.
2085
2086 DESCRIPTION
2087 Disable all indexes.
2088
2089 RETURN
2090 0 ok
2091 */
2092
maria_disable_indexes(MARIA_HA * info)2093 int maria_disable_indexes(MARIA_HA *info)
2094 {
2095 MARIA_SHARE *share= info->s;
2096
2097 maria_clear_all_keys_active(share->state.key_map);
2098 return 0;
2099 }
2100
2101
2102 /*
2103 Enable all indexes
2104
2105 SYNOPSIS
2106 maria_enable_indexes()
2107 info A pointer to the MARIA storage engine MARIA_HA struct.
2108
2109 DESCRIPTION
2110 Enable all indexes. The indexes might have been disabled
2111 by maria_disable_index() before.
2112 The function works only if both data and indexes are empty,
2113 otherwise a repair is required.
2114 To be sure, call handler::delete_all_rows() before.
2115
2116 RETURN
2117 0 ok
2118 HA_ERR_CRASHED data or index is non-empty.
2119 */
2120
maria_enable_indexes(MARIA_HA * info)2121 int maria_enable_indexes(MARIA_HA *info)
2122 {
2123 int error= 0;
2124 MARIA_SHARE *share= info->s;
2125 DBUG_ENTER("maria_enable_indexes");
2126
2127 if ((share->state.state.data_file_length !=
2128 (share->data_file_type == BLOCK_RECORD ? share->block_size : 0)) ||
2129 (share->state.state.key_file_length != share->base.keystart))
2130 {
2131 DBUG_PRINT("error", ("data_file_length: %lu key_file_length: %lu",
2132 (ulong) share->state.state.data_file_length,
2133 (ulong) share->state.state.key_file_length));
2134 _ma_set_fatal_error(share, HA_ERR_CRASHED);
2135 error= HA_ERR_CRASHED;
2136 }
2137 else
2138 maria_set_all_keys_active(share->state.key_map, share->base.keys);
2139 DBUG_RETURN(error);
2140 }
2141
2142
2143 /*
2144 Test if indexes are disabled.
2145
2146 SYNOPSIS
2147 maria_indexes_are_disabled()
2148 info A pointer to the MARIA storage engine MARIA_HA struct.
2149
2150 DESCRIPTION
2151 Test if indexes are disabled.
2152
2153 RETURN
2154 0 indexes are not disabled
2155 1 all indexes are disabled
2156 2 non-unique indexes are disabled
2157 */
2158
maria_indexes_are_disabled(MARIA_HA * info)2159 int maria_indexes_are_disabled(MARIA_HA *info)
2160 {
2161 MARIA_SHARE *share= info->s;
2162
2163 /*
2164 No keys or all are enabled. keys is the number of keys. Left shifted
2165 gives us only one bit set. When decreased by one, gives us all all bits
2166 up to this one set and it gets unset.
2167 */
2168 if (!share->base.keys ||
2169 (maria_is_all_keys_active(share->state.key_map, share->base.keys)))
2170 return 0;
2171
2172 /* All are disabled */
2173 if (maria_is_any_key_active(share->state.key_map))
2174 return 1;
2175
2176 /*
2177 We have keys. Some enabled, some disabled.
2178 Don't check for any non-unique disabled but return directly 2
2179 */
2180 return 2;
2181 }
2182
2183
maria_scan_init_dummy(MARIA_HA * info)2184 static my_bool maria_scan_init_dummy(MARIA_HA *info __attribute__((unused)))
2185 {
2186 return 0;
2187 }
2188
maria_scan_end_dummy(MARIA_HA * info)2189 static void maria_scan_end_dummy(MARIA_HA *info __attribute__((unused)))
2190 {
2191 }
2192
maria_once_init_dummy(MARIA_SHARE * share,File dfile)2193 static my_bool maria_once_init_dummy(MARIA_SHARE *share
2194 __attribute__((unused)),
2195 File dfile __attribute__((unused)))
2196 {
2197 return 0;
2198 }
2199
maria_once_end_dummy(MARIA_SHARE * share)2200 static my_bool maria_once_end_dummy(MARIA_SHARE *share __attribute__((unused)))
2201 {
2202 return 0;
2203 }
2204