1 /* Copyright (C) 2006 MySQL AB & MySQL Finland AB & TCX DataKonsult AB
2 Copyright (c) 2009, 2020, MariaDB Corporation Ab
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
16
17 /* This file is included by all internal maria files */
18
19 #ifndef MARIA_DEF_INCLUDED
20 #define MARIA_DEF_INCLUDED
21
22 #include <my_global.h>
23
24 #ifdef EMBEDDED_LIBRARY
25 #undef WITH_S3_STORAGE_ENGINE
26 #endif
27
28 #include "maria.h" /* Structs & some defines */
29 #include "ma_pagecache.h"
30 #include <myisampack.h> /* packing of keys */
31 #include <my_tree.h>
32 #include <my_bitmap.h>
33 #include <my_pthread.h>
34 #include <thr_lock.h>
35 #include <hash.h>
36 #include "ma_loghandler.h"
37 #include "ma_control_file.h"
38 #include "ma_state.h"
39 #include <waiting_threads.h>
40 #include <mysql/psi/mysql_file.h>
41
42 #define MARIA_CANNOT_ROLLBACK
43
44 C_MODE_START
45
46 /*
47 Limit max keys according to HA_MAX_POSSIBLE_KEY; See myisamchk.h for details
48 */
49
50 #if MAX_INDEXES > HA_MAX_POSSIBLE_KEY
51 #define MARIA_MAX_KEY HA_MAX_POSSIBLE_KEY /* Max allowed keys */
52 #else
53 #define MARIA_MAX_KEY MAX_INDEXES /* Max allowed keys */
54 #endif
55
56 #define MARIA_NAME_IEXT ".MAI"
57 #define MARIA_NAME_DEXT ".MAD"
58 /* Max extra space to use when sorting keys */
59 #define MARIA_MAX_TEMP_LENGTH (2*1024L*1024L*1024L)
60 /* Possible values for maria_block_size (must be power of 2) */
61 #define MARIA_KEY_BLOCK_LENGTH 8192 /* default key block length */
62 #define MARIA_MIN_KEY_BLOCK_LENGTH 1024 /* Min key block length */
63 #define MARIA_MAX_KEY_BLOCK_LENGTH 32768
64 /* Minimal page cache when we only want to be able to scan a table */
65 #define MARIA_MIN_PAGE_CACHE_SIZE (8192L*16L)
66
67 /*
68 In the following macros '_keyno_' is 0 .. keys-1.
69 If there can be more keys than bits in the key_map, the highest bit
70 is for all upper keys. They cannot be switched individually.
71 This means that clearing of high keys is ignored, setting one high key
72 sets all high keys.
73 */
74 #define MARIA_KEYMAP_BITS (8 * SIZEOF_LONG_LONG)
75 #define MARIA_KEYMAP_HIGH_MASK (1ULL << (MARIA_KEYMAP_BITS - 1))
76 #define maria_get_mask_all_keys_active(_keys_) \
77 (((_keys_) < MARIA_KEYMAP_BITS) ? \
78 ((1ULL << (_keys_)) - 1ULL) : \
79 (~ 0ULL))
80 #if MARIA_MAX_KEY > MARIA_KEYMAP_BITS
81 #define maria_is_key_active(_keymap_,_keyno_) \
82 (((_keyno_) < MARIA_KEYMAP_BITS) ? \
83 MY_TEST((_keymap_) & (1ULL << (_keyno_))) : \
84 MY_TEST((_keymap_) & MARIA_KEYMAP_HIGH_MASK))
85 #define maria_set_key_active(_keymap_,_keyno_) \
86 (_keymap_)|= (((_keyno_) < MARIA_KEYMAP_BITS) ? \
87 (1ULL << (_keyno_)) : \
88 MARIA_KEYMAP_HIGH_MASK)
89 #define maria_clear_key_active(_keymap_,_keyno_) \
90 (_keymap_)&= (((_keyno_) < MARIA_KEYMAP_BITS) ? \
91 (~ (1ULL << (_keyno_))) : \
92 (~ (0ULL)) /*ignore*/ )
93 #else
94 #define maria_is_key_active(_keymap_,_keyno_) \
95 MY_TEST((_keymap_) & (1ULL << (_keyno_)))
96 #define maria_set_key_active(_keymap_,_keyno_) \
97 (_keymap_)|= (1ULL << (_keyno_))
98 #define maria_clear_key_active(_keymap_,_keyno_) \
99 (_keymap_)&= (~ (1ULL << (_keyno_)))
100 #endif
101 #define maria_is_any_key_active(_keymap_) \
102 MY_TEST((_keymap_))
103 #define maria_is_all_keys_active(_keymap_,_keys_) \
104 ((_keymap_) == maria_get_mask_all_keys_active(_keys_))
105 #define maria_set_all_keys_active(_keymap_,_keys_) \
106 (_keymap_)= maria_get_mask_all_keys_active(_keys_)
107 #define maria_clear_all_keys_active(_keymap_) \
108 (_keymap_)= 0
109 #define maria_intersect_keys_active(_to_,_from_) \
110 (_to_)&= (_from_)
111 #define maria_is_any_intersect_keys_active(_keymap1_,_keys_,_keymap2_) \
112 ((_keymap1_) & (_keymap2_) & \
113 maria_get_mask_all_keys_active(_keys_))
114 #define maria_copy_keys_active(_to_,_maxkeys_,_from_) \
115 (_to_)= (maria_get_mask_all_keys_active(_maxkeys_) & \
116 (_from_))
117
118 /* Param to/from maria_info */
119
120 typedef struct st_maria_info
121 {
122 ha_rows records; /* Records in database */
123 ha_rows deleted; /* Deleted records in database */
124 MARIA_RECORD_POS recpos; /* Pos for last used record */
125 MARIA_RECORD_POS newrecpos; /* Pos if we write new record */
126 MARIA_RECORD_POS dup_key_pos; /* Position to record with dup key */
127 my_off_t data_file_length; /* Length of data file */
128 my_off_t max_data_file_length, index_file_length;
129 my_off_t max_index_file_length, delete_length;
130 ulonglong auto_increment;
131 ulonglong key_map; /* Which keys are used */
132 time_t create_time; /* When table was created */
133 time_t check_time;
134 time_t update_time;
135 ulong record_offset;
136 double *rec_per_key; /* for sql optimizing */
137 ulong reclength; /* Recordlength */
138 ulong mean_reclength; /* Mean recordlength (if packed) */
139 char *data_file_name, *index_file_name;
140 enum data_file_type data_file_type;
141 uint keys; /* Number of keys in use */
142 uint options; /* HA_OPTION_... used */
143 uint reflength;
144 int errkey, /* With key was dupplicated on err */
145 sortkey; /* clustered by this key */
146 File filenr; /* (uniq) filenr for datafile */
147 } MARIA_INFO;
148
149 struct st_maria_share;
150 struct st_maria_handler; /* For referense */
151 struct st_maria_keydef;
152
153 typedef struct st_maria_key /* Internal info about a key */
154 {
155 uchar *data; /* Data for key */
156 struct st_maria_keydef *keyinfo; /* Definition for key */
157 uint data_length; /* Length of key data */
158 uint ref_length; /* record ref + transid */
159 uint32 flag; /* 0 or SEARCH_PART_KEY */
160 } MARIA_KEY;
161
162 typedef struct st_maria_decode_tree /* Decode huff-table */
163 {
164 uint16 *table;
165 uint quick_table_bits;
166 uchar *intervalls;
167 } MARIA_DECODE_TREE;
168
169
170 typedef struct s3_info S3_INFO;
171
172 extern ulong maria_block_size, maria_checkpoint_frequency;
173 extern ulong maria_concurrent_insert;
174 extern my_bool maria_flush, maria_single_user, maria_page_checksums;
175 extern my_off_t maria_max_temp_length;
176 extern ulong maria_bulk_insert_tree_size, maria_data_pointer_size;
177 extern MY_TMPDIR *maria_tmpdir;
178 extern my_bool maria_encrypt_tables;
179
180 /*
181 This is used to check if a symlink points into the mysql data home,
182 which is normally forbidden as it can be used to get access to
183 not privileged data
184 */
185 extern int (*maria_test_invalid_symlink)(const char *filename);
186
187 /* Prototypes for maria-functions */
188
189 extern int maria_init(void);
190 extern void maria_end(void);
191 extern my_bool maria_upgrade(void);
192 extern int maria_close(MARIA_HA *file);
193 extern int maria_delete(MARIA_HA *file, const uchar *buff);
194 extern MARIA_HA *maria_open(const char *name, int mode,
195 uint wait_if_locked, S3_INFO *s3);
196 extern int maria_panic(enum ha_panic_function function);
197 extern int maria_rfirst(MARIA_HA *file, uchar *buf, int inx);
198 extern int maria_rkey(MARIA_HA *file, uchar *buf, int inx,
199 const uchar *key, key_part_map keypart_map,
200 enum ha_rkey_function search_flag);
201 extern int maria_rlast(MARIA_HA *file, uchar *buf, int inx);
202 extern int maria_rnext(MARIA_HA *file, uchar *buf, int inx);
203 extern int maria_rnext_same(MARIA_HA *info, uchar *buf);
204 extern int maria_rprev(MARIA_HA *file, uchar *buf, int inx);
205 extern int maria_rrnd(MARIA_HA *file, uchar *buf,
206 MARIA_RECORD_POS pos);
207 extern int maria_scan_init(MARIA_HA *file);
208 extern int maria_scan(MARIA_HA *file, uchar *buf);
209 extern void maria_scan_end(MARIA_HA *file);
210 extern int maria_rsame(MARIA_HA *file, uchar *record, int inx);
211 extern int maria_rsame_with_pos(MARIA_HA *file, uchar *record,
212 int inx, MARIA_RECORD_POS pos);
213 extern int maria_update(MARIA_HA *file, const uchar *old,
214 const uchar *new_record);
215 extern int maria_write(MARIA_HA *file, const uchar *buff);
216 extern MARIA_RECORD_POS maria_position(MARIA_HA *file);
217 extern int maria_status(MARIA_HA *info, MARIA_INFO *x, uint flag);
218 extern int maria_lock_database(MARIA_HA *file, int lock_type);
219 extern int maria_delete_table(const char *name);
220 extern int maria_rename(const char *from, const char *to);
221 extern int maria_extra(MARIA_HA *file,
222 enum ha_extra_function function, void *extra_arg);
223 extern int maria_reset(MARIA_HA *file);
224 extern ha_rows maria_records_in_range(MARIA_HA *info, int inx,
225 const key_range *min_key,
226 const key_range *max_key,
227 page_range *page);
228 extern int maria_is_changed(MARIA_HA *info);
229 extern int maria_delete_all_rows(MARIA_HA *info);
230 extern uint maria_get_pointer_length(ulonglong file_length, uint def);
231 extern int maria_commit(MARIA_HA *info);
232 extern int maria_begin(MARIA_HA *info);
233 extern void maria_disable_logging(MARIA_HA *info);
234 extern void maria_enable_logging(MARIA_HA *info);
235
236 #define HA_RECOVER_NONE 0 /* No automatic recover */
237 #define HA_RECOVER_DEFAULT 1 /* Automatic recover active */
238 #define HA_RECOVER_BACKUP 2 /* Make a backupfile on recover */
239 #define HA_RECOVER_FORCE 4 /* Recover even if we loose rows */
240 #define HA_RECOVER_QUICK 8 /* Don't check rows in data file */
241
242 #define HA_RECOVER_ANY (HA_RECOVER_DEFAULT | HA_RECOVER_BACKUP | HA_RECOVER_FORCE | HA_RECOVER_QUICK)
243
244 /* this is used to pass to mysql_mariachk_table */
245
246 #define MARIA_CHK_REPAIR 1 /* equivalent to mariachk -r */
247 #define MARIA_CHK_VERIFY 2 /* Verify, run repair if failure */
248
249 typedef uint maria_bit_type;
250
251 typedef struct st_maria_bit_buff
252 { /* Used for packing of record */
253 maria_bit_type current_byte;
254 uint bits;
255 uchar *pos, *end, *blob_pos, *blob_end;
256 uint error;
257 } MARIA_BIT_BUFF;
258
259 /* functions in maria_check */
260 void maria_chk_init(HA_CHECK *param);
261 void maria_chk_init_for_check(HA_CHECK *param, MARIA_HA *info);
262 int maria_chk_status(HA_CHECK *param, MARIA_HA *info);
263 int maria_chk_del(HA_CHECK *param, MARIA_HA *info, ulonglong test_flag);
264 int maria_chk_size(HA_CHECK *param, MARIA_HA *info);
265 int maria_chk_key(HA_CHECK *param, MARIA_HA *info);
266 int maria_chk_data_link(HA_CHECK *param, MARIA_HA *info, my_bool extend);
267 int maria_repair(HA_CHECK *param, MARIA_HA *info, char * name, my_bool);
268 int maria_sort_index(HA_CHECK *param, MARIA_HA *info, char * name);
269 int maria_zerofill(HA_CHECK *param, MARIA_HA *info, const char *name);
270 int maria_repair_by_sort(HA_CHECK *param, MARIA_HA *info,
271 const char *name, my_bool rep_quick);
272 int maria_repair_parallel(HA_CHECK *param, MARIA_HA *info,
273 const char *name, my_bool rep_quick);
274 int maria_change_to_newfile(const char *filename, const char *old_ext,
275 const char *new_ext, time_t backup_time,
276 myf myflags);
277 void maria_lock_memory(HA_CHECK *param);
278 int maria_update_state_info(HA_CHECK *param, MARIA_HA *info, uint update);
279 void maria_update_key_parts(MARIA_KEYDEF *keyinfo, double *rec_per_key_part,
280 ulonglong *unique, ulonglong *notnull,
281 ulonglong records);
282 int maria_filecopy(HA_CHECK *param, File to, File from, my_off_t start,
283 my_off_t length, const char *type);
284 int maria_movepoint(MARIA_HA *info, uchar *record, my_off_t oldpos,
285 my_off_t newpos, uint prot_key);
286 int maria_test_if_almost_full(MARIA_HA *info);
287 int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename);
288 int maria_disable_indexes(MARIA_HA *info);
289 int maria_enable_indexes(MARIA_HA *info);
290 int maria_indexes_are_disabled(MARIA_HA *info);
291 void maria_disable_indexes_for_rebuild(MARIA_HA *info, ha_rows rows,
292 my_bool all_keys);
293 my_bool maria_test_if_sort_rep(MARIA_HA *info, ha_rows rows, ulonglong key_map,
294 my_bool force);
295
296 int maria_init_bulk_insert(MARIA_HA *info, size_t cache_size, ha_rows rows);
297 void maria_flush_bulk_insert(MARIA_HA *info, uint inx);
298 int maria_end_bulk_insert(MARIA_HA *info, my_bool abort);
299 int maria_preload(MARIA_HA *info, ulonglong key_map, my_bool ignore_leaves);
300 void maria_ignore_trids(MARIA_HA *info);
301 my_bool maria_too_big_key_for_sort(MARIA_KEYDEF *key, ha_rows rows);
302
303 /* fulltext functions */
304 FT_INFO *maria_ft_init_search(uint,void *, uint, uchar *, size_t,
305 CHARSET_INFO *, uchar *);
306
307 /* 'Almost-internal' Maria functions */
308
309 void _ma_update_auto_increment_key(HA_CHECK *param, MARIA_HA *info,
310 my_bool repair);
311
312
313 /* Do extra sanity checking */
314 #define SANITY_CHECKS 1
315 #ifdef EXTRA_DEBUG
316 #define EXTRA_DEBUG_KEY_CHANGES
317 #endif
318 /*
319 The following defines can be used when one has problems with redo logging
320 Setting this will log the full key page which can be compared with the
321 redo-changed key page. This will however make the aria log files MUCH bigger.
322 */
323 #if defined(EXTRA_ARIA_DEBUG)
324 #define EXTRA_STORE_FULL_PAGE_IN_KEY_CHANGES
325 #endif
326 /* For testing recovery */
327 #ifdef TO_BE_REMOVED
328 #define IDENTICAL_PAGES_AFTER_RECOVERY 1
329 #endif
330
331 #define MAX_NONMAPPED_INSERTS 1000
332 #define MARIA_MAX_TREE_LEVELS 32
333 #define MARIA_MAX_RECORD_ON_STACK 16384
334
335 /* maria_open() flag, specific for maria_pack */
336 #define HA_OPEN_IGNORE_MOVED_STATE (1U << 30)
337
338 typedef struct st_sort_key_blocks MA_SORT_KEY_BLOCKS;
339 typedef struct st_sort_ftbuf MA_SORT_FT_BUF;
340
341 extern PAGECACHE maria_pagecache_var, *maria_pagecache;
342 int maria_assign_to_pagecache(MARIA_HA *info, ulonglong key_map,
343 PAGECACHE *key_cache);
344 void maria_change_pagecache(PAGECACHE *old_key_cache,
345 PAGECACHE *new_key_cache);
346
347 typedef struct st_maria_sort_info
348 {
349 /* sync things */
350 mysql_mutex_t mutex;
351 mysql_cond_t cond;
352 MARIA_HA *info, *new_info;
353 HA_CHECK *param;
354 char *buff;
355 MA_SORT_KEY_BLOCKS *key_block, *key_block_end;
356 MA_SORT_FT_BUF *ft_buf;
357 my_off_t filelength, dupp, buff_length;
358 pgcache_page_no_t page;
359 ha_rows max_records;
360 uint current_key, total_keys;
361 volatile uint got_error;
362 uint threads_running;
363 myf myf_rw;
364 enum data_file_type new_data_file_type, org_data_file_type;
365 } MARIA_SORT_INFO;
366
367 typedef struct st_maria_sort_param
368 {
369 pthread_t thr;
370 IO_CACHE read_cache, tempfile, tempfile_for_exceptions;
371 DYNAMIC_ARRAY buffpek;
372 MARIA_BIT_BUFF bit_buff; /* For parallel repair of packrec. */
373
374 MARIA_KEYDEF *keyinfo;
375 MARIA_SORT_INFO *sort_info;
376 HA_KEYSEG *seg;
377 uchar **sort_keys;
378 uchar *rec_buff;
379 void *wordlist, *wordptr;
380 MEM_ROOT wordroot;
381 uchar *record;
382 MY_TMPDIR *tmpdir;
383
384 /*
385 The next two are used to collect statistics, see maria_update_key_parts for
386 description.
387 */
388 ulonglong unique[HA_MAX_KEY_SEG+1];
389 ulonglong notnull[HA_MAX_KEY_SEG+1];
390 ulonglong sortbuff_size;
391
392 MARIA_RECORD_POS pos,max_pos,filepos,start_recpos, current_filepos;
393 uint key, key_length,real_key_length;
394 uint maxbuffers, keys, find_length, sort_keys_length;
395 my_bool fix_datafile, master;
396 my_bool calc_checksum; /* calculate table checksum */
397 size_t rec_buff_size;
398
399 int (*key_cmp)(struct st_maria_sort_param *, const void *, const void *);
400 int (*key_read)(struct st_maria_sort_param *, uchar *);
401 int (*key_write)(struct st_maria_sort_param *, const uchar *);
402 void (*lock_in_memory)(HA_CHECK *);
403 int (*write_keys)(struct st_maria_sort_param *, uchar **,
404 ulonglong , struct st_buffpek *, IO_CACHE *);
405 my_off_t (*read_to_buffer)(IO_CACHE *,struct st_buffpek *, uint);
406 int (*write_key)(struct st_maria_sort_param *, IO_CACHE *,uchar *,
407 uint, ulonglong);
408 } MARIA_SORT_PARAM;
409
410 int maria_write_data_suffix(MARIA_SORT_INFO *sort_info, my_bool fix_datafile);
411
412 struct st_transaction;
413
414 /* undef map from my_nosys; We need test-if-disk full */
415 #undef my_write
416
417 #define CRC_SIZE 4
418
419 typedef struct st_maria_state_info
420 {
421 struct
422 { /* Fileheader (24 bytes) */
423 uchar file_version[4];
424 uchar options[2];
425 uchar header_length[2];
426 uchar state_info_length[2];
427 uchar base_info_length[2];
428 uchar base_pos[2];
429 uchar key_parts[2]; /* Key parts */
430 uchar unique_key_parts[2]; /* Key parts + unique parts */
431 uchar keys; /* number of keys in file */
432 uchar uniques; /* number of UNIQUE definitions */
433 uchar not_used; /* Language for indexes */
434 uchar fulltext_keys;
435 uchar data_file_type;
436 /* Used by mariapack to store the original data_file_type */
437 uchar org_data_file_type;
438 } header;
439
440 MARIA_STATUS_INFO state;
441 /* maria_ha->state points here for crash-safe but not versioned tables */
442 MARIA_STATUS_INFO common;
443 /* State for a versioned table that is temporary non versioned */
444 MARIA_STATUS_INFO no_logging;
445 ha_rows split; /* number of split blocks */
446 my_off_t dellink; /* Link to next removed block */
447 pgcache_page_no_t first_bitmap_with_space;
448 ulonglong auto_increment;
449 TrID create_trid; /* Minum trid for file */
450 TrID last_change_trn; /* selfdescriptive */
451 ulong update_count; /* Updated for each write lock */
452 ulong status;
453 double *rec_per_key_part;
454 ulong *nulls_per_key_part;
455 ha_checksum checksum; /* Table checksum */
456 my_off_t *key_root; /* Start of key trees */
457 my_off_t key_del; /* delete links for index pages */
458 my_off_t records_at_analyze; /* Rows when calculating rec_per_key */
459
460 ulong sec_index_changed; /* Updated when new sec_index */
461 ulong sec_index_used; /* which extra index are in use */
462 ulonglong key_map; /* Which keys are in use */
463 ulong version; /* timestamp of create */
464 time_t create_time; /* Time when created database */
465 time_t recover_time; /* Time for last recover */
466 time_t check_time; /* Time for last check */
467 uint sortkey; /* sorted by this key (not used) */
468 uint open_count;
469 uint changed; /* Changed since maria_chk */
470 uint org_changed; /* Changed since open */
471 /**
472 Birthday of the table: no record in the log before this LSN should ever
473 be applied to the table. Updated when created, renamed, explicitly
474 repaired (REPAIR|OPTIMIZE TABLE, ALTER TABLE ENABLE KEYS, maria_chk).
475 */
476 LSN create_rename_lsn;
477 /** @brief Log horizon when state was last updated on disk */
478 TRANSLOG_ADDRESS is_of_horizon;
479 /**
480 REDO phase should ignore any record before this LSN. UNDO phase
481 shouldn't, this is the difference with create_rename_lsn.
482 skip_redo_lsn >= create_rename_lsn.
483 The distinction is for these cases:
484 - after a repair at end of bulk insert (enabling indices), REDO phase
485 should skip the table but UNDO phase should not, so only skip_redo_lsn is
486 increased, not create_rename_lsn
487 - if one table is corrupted and so recovery fails, user may repair the
488 table with maria_chk and let recovery restart: that recovery should then
489 skip the repaired table even in the UNDO phase, so create_rename_lsn is
490 increased.
491 */
492 LSN skip_redo_lsn;
493 /* LSN when we wrote file id to the log */
494 LSN logrec_file_id;
495
496 uint8 dupp_key; /* Lastly processed index with */
497 /* violated uniqueness constraint */
498
499 /* the following isn't saved on disk */
500 uint state_diff_length; /* Should be 0 */
501 uint state_length; /* Length of state header in file */
502 ulong *key_info;
503 } MARIA_STATE_INFO;
504
505
506 /* Number of bytes written be _ma_state_info_write_sub() */
507 #define MARIA_STATE_INFO_SIZE \
508 (24 + 2 + LSN_STORE_SIZE*3 + 4 + 11*8 + 4*4 + 8 + 3*4 + 5*8)
509 #define MARIA_FILE_OPEN_COUNT_OFFSET 0
510 #define MARIA_FILE_CHANGED_OFFSET 2
511 #define MARIA_FILE_CREATE_RENAME_LSN_OFFSET 4
512 #define MARIA_FILE_CREATE_TRID_OFFSET (4 + LSN_STORE_SIZE*3 + 11*8)
513
514 #define MARIA_MAX_KEY_LENGTH 2000
515 #define MARIA_MAX_KEY_BUFF (MARIA_MAX_KEY_LENGTH+HA_MAX_KEY_SEG*6+8+8 + \
516 MARIA_MAX_PACK_TRANSID_SIZE)
517 #define MARIA_MAX_POSSIBLE_KEY_BUFF (MARIA_MAX_KEY_LENGTH + 24+ 6+6)
518 #define MARIA_STATE_KEY_SIZE (8 + 4)
519 #define MARIA_STATE_KEYBLOCK_SIZE 8
520 #define MARIA_STATE_KEYSEG_SIZE 12
521 #define MARIA_STATE_EXTRA_SIZE (MARIA_MAX_KEY*MARIA_STATE_KEY_SIZE + MARIA_MAX_KEY*HA_MAX_KEY_SEG*MARIA_STATE_KEYSEG_SIZE)
522 #define MARIA_KEYDEF_SIZE (2+ 5*2)
523 #define MARIA_UNIQUEDEF_SIZE (2+1+1)
524 #define HA_KEYSEG_SIZE (6+ 2*2 + 4*2)
525 #define MARIA_COLUMNDEF_SIZE (2*7+1+1+4)
526 #define MARIA_BASE_INFO_SIZE (MY_UUID_SIZE + 5*8 + 6*4 + 11*2 + 6 + 5*2 + 1 + 16)
527 #define MARIA_INDEX_BLOCK_MARGIN 16 /* Safety margin for .MYI tables */
528 #define MARIA_MAX_POINTER_LENGTH 7 /* Node pointer */
529 /* Internal management bytes needed to store 2 transid/key on an index page */
530 #define MARIA_MAX_PACK_TRANSID_SIZE (TRANSID_SIZE+1)
531 #define MARIA_TRANSID_PACK_OFFSET (256- TRANSID_SIZE - 1)
532 #define MARIA_MIN_TRANSID_PACK_OFFSET (MARIA_TRANSID_PACK_OFFSET-TRANSID_SIZE)
533 #define MARIA_INDEX_OVERHEAD_SIZE (MARIA_MAX_PACK_TRANSID_SIZE * 2 + \
534 MARIA_MAX_POINTER_LENGTH)
535 #define MARIA_DELETE_KEY_NR 255 /* keynr for deleted blocks */
536
537 /* extra options */
538 #define MA_EXTRA_OPTIONS_ENCRYPTED (1 << 0)
539 #define MA_EXTRA_OPTIONS_INSERT_ORDER (1 << 1)
540
541 #include "ma_check.h"
542
543 /*
544 Basic information of the Maria table. This is stored on disk
545 and not changed (unless we do DLL changes).
546 */
547
548 typedef struct st_ma_base_info
549 {
550 my_off_t keystart; /* Start of keys */
551 my_off_t max_data_file_length;
552 my_off_t max_key_file_length;
553 my_off_t margin_key_file_length;
554 ha_rows records, reloc; /* Create information */
555 ulong mean_row_length; /* Create information */
556 ulong reclength; /* length of unpacked record */
557 ulong pack_reclength; /* Length of full packed rec */
558 ulong min_pack_length;
559 ulong max_pack_length; /* Max possibly length of packed rec */
560 ulong min_block_length;
561 ulong s3_block_size; /* Block length for S3 files */
562 uint fields; /* fields in table */
563 uint fixed_not_null_fields;
564 uint fixed_not_null_fields_length;
565 uint max_field_lengths;
566 uint pack_fields; /* packed fields in table */
567 uint varlength_fields; /* char/varchar/blobs */
568 /* Number of bytes in the index used to refer to a row (2-8) */
569 uint rec_reflength;
570 /* Number of bytes in the index used to refer to another index page (2-8) */
571 uint key_reflength; /* = 2-8 */
572 uint keys; /* same as in state.header */
573 uint auto_key; /* Which key-1 is a auto key */
574 uint blobs; /* Number of blobs */
575 /* Length of packed bits (when table was created first time) */
576 uint pack_bytes;
577 /* Length of null bits (when table was created first time) */
578 uint original_null_bytes;
579 uint null_bytes; /* Null bytes in record */
580 uint field_offsets; /* Number of field offsets */
581 uint max_key_block_length; /* Max block length */
582 uint max_key_length; /* Max key length */
583 /* Extra allocation when using dynamic record format */
584 uint extra_alloc_bytes;
585 uint extra_alloc_procent;
586 uint is_nulls_extended; /* 1 if new null bytes */
587 uint default_row_flag; /* 0 or ROW_FLAG_NULLS_EXTENDED */
588 uint block_size;
589 /* Size of initial record buffer */
590 uint default_rec_buff_size;
591 /* Extra number of bytes the row format require in the record buffer */
592 uint extra_rec_buff_size;
593 /* Tuning flags that can be ignored by older Maria versions */
594 uint extra_options;
595 /* default language, not really used but displayed by maria_chk */
596 uint language;
597 /* Compression library used. 0 for no compression */
598 uint compression_algorithm;
599
600 /* The following are from the header */
601 uint key_parts, all_key_parts;
602 uchar uuid[MY_UUID_SIZE];
603 /**
604 @brief If false, we disable logging, versioning, transaction etc. Observe
605 difference with MARIA_SHARE::now_transactional
606 */
607 my_bool born_transactional;
608 } MARIA_BASE_INFO;
609
610 uchar *_ma_base_info_read(uchar *ptr, MARIA_BASE_INFO *base);
611
612 /* Structs used intern in database */
613
614 typedef struct st_maria_blob /* Info of record */
615 {
616 ulong offset; /* Offset to blob in record */
617 uint pack_length; /* Type of packed length */
618 ulong length; /* Calc:ed for each record */
619 } MARIA_BLOB;
620
621
622 typedef struct st_maria_pack
623 {
624 ulong header_length;
625 uint ref_length;
626 uchar version;
627 } MARIA_PACK;
628
629 typedef struct st_maria_file_bitmap
630 {
631 struct st_maria_share *share;
632 uchar *map;
633 pgcache_page_no_t page; /* Page number for current bitmap */
634 pgcache_page_no_t last_bitmap_page; /* Last possible bitmap page */
635 my_bool changed; /* 1 if page needs to be written */
636 my_bool changed_not_flushed; /* 1 if some bitmap is not flushed */
637 my_bool return_first_match; /* Shortcut find_head() */
638 uint used_size; /* Size of bitmap head that is not 0 */
639 uint full_head_size; /* Where to start search for head */
640 uint full_tail_size; /* Where to start search for tail */
641 uint flush_all_requested; /**< If _ma_bitmap_flush_all waiting */
642 uint waiting_for_flush_all_requested; /* If someone is waiting for above */
643 uint non_flushable; /**< 0 if bitmap and log are in sync */
644 uint waiting_for_non_flushable; /* If someone is waiting for above */
645 PAGECACHE_FILE file; /* datafile where bitmap is stored */
646
647 mysql_mutex_t bitmap_lock;
648 mysql_cond_t bitmap_cond; /**< When bitmap becomes flushable */
649 /* Constants, allocated when initiating bitmaps */
650 uint sizes[8]; /* Size per bit combination */
651 uint total_size; /* Total usable size of bitmap page */
652 uint max_total_size; /* Max value for total_size */
653 uint last_total_size; /* Size of bitmap on last_bitmap_page */
654 uint block_size; /* Block size of file */
655 ulong pages_covered; /* Pages covered by bitmap + 1 */
656 DYNAMIC_ARRAY pinned_pages; /**< not-yet-flushable bitmap pages */
657 } MARIA_FILE_BITMAP;
658
659 #define MARIA_CHECKPOINT_LOOKS_AT_ME 1
660 #define MARIA_CHECKPOINT_SHOULD_FREE_ME 2
661 #define MARIA_CHECKPOINT_SEEN_IN_LOOP 4
662
663 typedef struct st_maria_crypt_data MARIA_CRYPT_DATA;
664 struct ms3_st;
665
666 typedef struct st_maria_share
667 { /* Shared between opens */
668 MARIA_STATE_INFO state;
669 MARIA_STATE_INFO checkpoint_state; /* Copy of saved state by checkpoint */
670 MARIA_BASE_INFO base;
671 MARIA_STATE_HISTORY *state_history;
672 MARIA_KEYDEF ft2_keyinfo; /* Second-level ft-key definition */
673 MARIA_KEYDEF *keyinfo; /* Key definitions */
674 MARIA_UNIQUEDEF *uniqueinfo; /* unique definitions */
675 HA_KEYSEG *keyparts; /* key part info */
676 MARIA_COLUMNDEF *columndef; /* Pointer to column information */
677 MARIA_PACK pack; /* Data about packed records */
678 MARIA_BLOB *blobs; /* Pointer to blobs */
679 uint16 *column_nr; /* Original column order */
680 LEX_STRING unique_file_name; /* realpath() of index file */
681 LEX_STRING data_file_name; /* Resolved path names from symlinks */
682 LEX_STRING index_file_name;
683 LEX_STRING open_file_name; /* parameter to open filename */
684 uchar *file_map; /* mem-map of file if possible */
685 LIST *open_list; /* Tables open with this share */
686 PAGECACHE *pagecache; /* ref to the current key cache */
687 MARIA_DECODE_TREE *decode_trees;
688 /*
689 Previous auto-increment value. Used to verify if we can restore the
690 auto-increment counter if we have to abort an insert (duplicate key).
691 */
692 ulonglong last_auto_increment;
693 uint16 *decode_tables;
694 uint16 id; /**< 2-byte id by which log records refer to the table */
695 /* Called the first time the table instance is opened */
696 my_bool (*once_init)(struct st_maria_share *, File);
697 /* Called when the last instance of the table is closed */
698 my_bool (*once_end)(struct st_maria_share *);
699 /* Is called for every open of the table */
700 my_bool (*init)(MARIA_HA *);
701 /* Is called for every close of the table */
702 void (*end)(MARIA_HA *);
703 /* Called when we want to read a record from a specific position */
704 int (*read_record)(MARIA_HA *, uchar *, MARIA_RECORD_POS);
705 /* Initialize a scan */
706 my_bool (*scan_init)(MARIA_HA *);
707 /* Read next record while scanning */
708 int (*scan)(MARIA_HA *, uchar *, MARIA_RECORD_POS, my_bool);
709 /* End scan */
710 void (*scan_end)(MARIA_HA *);
711 int (*scan_remember_pos)(MARIA_HA *, MARIA_RECORD_POS*);
712 int (*scan_restore_pos)(MARIA_HA *, MARIA_RECORD_POS);
713 /* Pre-write of row (some handlers may do the actual write here) */
714 MARIA_RECORD_POS (*write_record_init)(MARIA_HA *, const uchar *);
715 /* Write record (or accept write_record_init) */
716 my_bool (*write_record)(MARIA_HA *, const uchar *);
717 /* Called when write failed */
718 my_bool (*write_record_abort)(MARIA_HA *);
719 my_bool (*update_record)(MARIA_HA *, MARIA_RECORD_POS,
720 const uchar *, const uchar *);
721 my_bool (*delete_record)(MARIA_HA *, const uchar *record);
722 my_bool (*compare_record)(MARIA_HA *, const uchar *);
723 /* calculate checksum for a row */
724 ha_checksum(*calc_checksum)(MARIA_HA *, const uchar *);
725 /*
726 Calculate checksum for a row during write. May be 0 if we calculate
727 the checksum in write_record_init()
728 */
729 ha_checksum(*calc_write_checksum)(MARIA_HA *, const uchar *);
730 /* calculate checksum for a row during check table */
731 ha_checksum(*calc_check_checksum)(MARIA_HA *, const uchar *);
732 /* Compare a row in memory with a row on disk */
733 my_bool (*compare_unique)(MARIA_HA *, MARIA_UNIQUEDEF *,
734 const uchar *record, MARIA_RECORD_POS pos);
735 my_off_t (*keypos_to_recpos)(struct st_maria_share *share, my_off_t pos);
736 my_off_t (*recpos_to_keypos)(struct st_maria_share *share, my_off_t pos);
737 my_bool (*row_is_visible)(MARIA_HA *);
738
739 /* Mapings to read/write the data file */
740 size_t (*file_read)(MARIA_HA *, uchar *, size_t, my_off_t, myf);
741 size_t (*file_write)(MARIA_HA *, const uchar *, size_t, my_off_t, myf);
742 /* query cache invalidator for merged tables */
743 invalidator_by_filename invalidator;
744 /* query cache invalidator for changing state */
745 invalidator_by_filename chst_invalidator;
746 my_off_t key_del_current; /* delete links for index pages */
747 ulong this_process; /* processid */
748 ulong last_process; /* For table-change-check */
749 ulong last_version; /* Version on start */
750 ulong options; /* Options used */
751 ulong min_pack_length; /* These are used by packed data */
752 ulong max_pack_length;
753 ulong state_diff_length;
754 uint rec_reflength; /* rec_reflength in use now */
755 uint keypage_header;
756 uint32 ftkeys; /* Number of distinct full-text keys
757 + 1 */
758 PAGECACHE_FILE kfile; /* Shared keyfile */
759 S3_INFO *s3_path; /* Connection and path in s3 */
760 File data_file; /* Shared data file */
761 int mode; /* mode of file on open */
762 uint reopen; /* How many times opened */
763 uint in_trans; /* Number of references by trn */
764 uint w_locks, r_locks, tot_locks; /* Number of read/write locks */
765 uint block_size; /* block_size of keyfile & data file*/
766 uint max_index_block_size; /* block_size - end_of_page_info */
767 /* Fixed length part of a packed row in BLOCK_RECORD format */
768 uint base_length;
769 myf write_flag;
770 enum data_file_type data_file_type;
771 enum pagecache_page_type page_type; /* value depending transactional */
772 /**
773 if Checkpoint looking at table; protected by close_lock or THR_LOCK_maria
774 */
775 uint8 in_checkpoint;
776 my_bool temporary;
777 /* Below flag is needed to make log tables work with concurrent insert */
778 my_bool is_log_table;
779 my_bool has_null_fields;
780 my_bool has_varchar_fields; /* If table has varchar fields */
781 /*
782 Set to 1 if open_count was wrong at open. Set to avoid asserts for
783 wrong open count on close.
784 */
785 my_bool open_count_not_zero_on_open;
786
787 my_bool changed, /* If changed since lock */
788 global_changed, /* If changed since open */
789 not_flushed;
790 my_bool internal_table; /* Internal tmp table */
791 my_bool lock_key_trees; /* If we have to lock trees on read */
792 my_bool non_transactional_concurrent_insert;
793 my_bool delay_key_write;
794 my_bool have_rtree;
795 /**
796 @brief if the table is transactional right now. It may have been created
797 transactional (base.born_transactional==TRUE) but with transactionality
798 (logging) temporarily disabled (now_transactional==FALSE). The opposite
799 (FALSE, TRUE) is impossible.
800 */
801 my_bool now_transactional;
802 my_bool have_versioning;
803 my_bool key_del_used; /* != 0 if key_del is locked */
804 my_bool deleting; /* we are going to delete this table */
805 my_bool redo_error_given; /* Used during recovery */
806 my_bool silence_encryption_errors; /* Used during recovery */
807 THR_LOCK lock;
808 void (*lock_restore_status)(void *);
809 /**
810 Protects kfile, dfile, most members of the state, state disk writes,
811 versioning information (like in_trans, state_history).
812 @todo find the exhaustive list.
813 */
814 mysql_mutex_t intern_lock;
815 mysql_mutex_t key_del_lock;
816 mysql_cond_t key_del_cond;
817 /**
818 _Always_ held while closing table; prevents checkpoint from looking at
819 structures freed during closure (like bitmap). If you need close_lock and
820 intern_lock, lock them in this order.
821 */
822 mysql_mutex_t close_lock;
823 my_off_t mmaped_length;
824 uint nonmmaped_inserts; /* counter of writing in
825 non-mmaped area */
826 MARIA_FILE_BITMAP bitmap;
827 mysql_rwlock_t mmap_lock;
828 LSN lsn_of_file_id; /**< LSN of its last LOGREC_FILE_ID */
829
830 /**
831 Crypt data
832 */
833 uint crypt_page_header_space;
834 MARIA_CRYPT_DATA *crypt_data;
835
836 /**
837 Keep of track of last insert page, used to implement insert order
838 */
839 uint last_insert_page;
840 pgcache_page_no_t last_insert_bitmap;
841 } MARIA_SHARE;
842
843
844 typedef uchar MARIA_BITMAP_BUFFER;
845
846 typedef struct st_maria_bitmap_block
847 {
848 pgcache_page_no_t page; /* Page number */
849 /* Number of continuous pages. TAIL_BIT is set if this is a tail page */
850 uint page_count;
851 uint empty_space; /* Set for head and tail pages */
852 /*
853 Number of BLOCKS for block-region (holds all non-blob-fields or one blob)
854 */
855 uint sub_blocks;
856 /* set to <> 0 in write_record() if this block was actually used */
857 uint8 used;
858 uint8 org_bitmap_value;
859 } MARIA_BITMAP_BLOCK;
860
861
862 typedef struct st_maria_bitmap_blocks
863 {
864 MARIA_BITMAP_BLOCK *block;
865 uint count;
866 my_bool tail_page_skipped; /* If some tail pages was not used */
867 my_bool page_skipped; /* If some full pages was not used */
868 } MARIA_BITMAP_BLOCKS;
869
870
871 /* Data about the currently read row */
872 typedef struct st_maria_row
873 {
874 MARIA_BITMAP_BLOCKS insert_blocks;
875 MARIA_BITMAP_BUFFER *extents;
876 MARIA_RECORD_POS lastpos, nextpos;
877 MARIA_RECORD_POS *tail_positions;
878 ha_checksum checksum;
879 LSN orig_undo_lsn; /* Lsn at start of row insert */
880 TrID trid; /* Transaction id for current row */
881 uchar *empty_bits, *field_lengths;
882 uint *null_field_lengths; /* All null field lengths */
883 ulong *blob_lengths; /* Length for each blob */
884 ulong min_length, normal_length, char_length, varchar_length;
885 ulong blob_length, total_length;
886 size_t extents_buffer_length; /* Size of 'extents' buffer */
887 uint head_length, header_length;
888 uint field_lengths_length; /* Length of data in field_lengths */
889 uint extents_count; /* number of extents in 'extents' */
890 uint full_page_count, tail_count; /* For maria_chk */
891 uint space_on_head_page;
892 } MARIA_ROW;
893
894 /* Data to scan row in blocked format */
895 typedef struct st_maria_block_scan
896 {
897 uchar *bitmap_buff, *bitmap_pos, *bitmap_end, *page_buff;
898 uchar *dir, *dir_end;
899 pgcache_page_no_t bitmap_page, max_page;
900 ulonglong bits;
901 uint number_of_rows, bit_pos;
902 MARIA_RECORD_POS row_base_page;
903 ulonglong row_changes;
904 } MARIA_BLOCK_SCAN;
905
906
907 struct st_maria_handler
908 {
909 MARIA_SHARE *s; /* Shared between open:s */
910 struct st_ma_transaction *trn; /* Pointer to active transaction */
911 struct st_maria_handler *trn_next,**trn_prev;
912 MARIA_STATUS_INFO *state, state_save;
913 MARIA_STATUS_INFO *state_start; /* State at start of transaction */
914 MARIA_USED_TABLES *used_tables;
915 struct ms3_st *s3;
916 void **stack_end_ptr;
917 MARIA_ROW cur_row; /* The active row that we just read */
918 MARIA_ROW new_row; /* Storage for a row during update */
919 MARIA_KEY last_key; /* Last found key */
920 MARIA_BLOCK_SCAN scan, *scan_save;
921 MARIA_BLOB *blobs; /* Pointer to blobs */
922 MARIA_BIT_BUFF bit_buff;
923 DYNAMIC_ARRAY bitmap_blocks;
924 DYNAMIC_ARRAY pinned_pages;
925 /* accumulate indexfile changes between write's */
926 TREE *bulk_insert;
927 LEX_CUSTRING *log_row_parts; /* For logging */
928 DYNAMIC_ARRAY *ft1_to_ft2; /* used only in ft1->ft2 conversion */
929 MEM_ROOT ft_memroot; /* used by the parser */
930 MYSQL_FTPARSER_PARAM *ftparser_param; /* share info between init/deinit */
931 void *external_ref; /* For MariaDB TABLE */
932 uchar *buff; /* page buffer */
933 uchar *keyread_buff; /* Buffer for last key read */
934 uchar *lastkey_buff; /* Last used search key */
935 uchar *lastkey_buff2;
936 uchar *first_mbr_key; /* Searhed spatial key */
937 uchar *rec_buff; /* Temp buffer for recordpack */
938 uchar *blob_buff; /* Temp buffer for blobs */
939 uchar *int_keypos; /* Save position for next/previous */
940 uchar *int_maxpos; /* -""- */
941 uint keypos_offset; /* Tmp storage for offset int_keypos */
942 uint maxpos_offset; /* Tmp storage for offset int_maxpos */
943 uchar *update_field_data; /* Used by update in rows-in-block */
944 uint int_nod_flag; /* -""- */
945 uint32 int_keytree_version; /* -""- */
946 int (*read_record)(MARIA_HA *, uchar*, MARIA_RECORD_POS);
947 invalidator_by_filename invalidator; /* query cache invalidator */
948 ulonglong last_auto_increment; /* auto value at start of statement */
949 ulonglong row_changes; /* Incremented for each change */
950 ulonglong start_row_changes; /* Row changes since start trans */
951 ulong this_unique; /* uniq filenumber or thread */
952 ulong last_unique; /* last unique number */
953 ulong this_loop; /* counter for this open */
954 ulong last_loop; /* last used counter */
955 MARIA_RECORD_POS save_lastpos;
956 MARIA_RECORD_POS dup_key_pos;
957 TrID dup_key_trid;
958 my_off_t pos; /* Intern variable */
959 my_off_t last_keypage; /* Last key page read */
960 my_off_t last_search_keypage; /* Last keypage when searching */
961
962 /*
963 QQ: the folloing two xxx_length fields should be removed,
964 as they are not compatible with parallel repair
965 */
966 ulong packed_length, blob_length; /* Length of found, packed record */
967 size_t rec_buff_size, blob_buff_size;
968 PAGECACHE_FILE dfile; /* The datafile */
969 IO_CACHE rec_cache; /* When cacheing records */
970 LIST open_list;
971 LIST share_list;
972 MY_BITMAP changed_fields;
973 ulong row_base_length; /* Length of row header */
974 uint row_flag; /* Flag to store in row header */
975 uint opt_flag; /* Optim. for space/speed */
976 uint open_flags; /* Flags used in open() */
977 uint update; /* If file changed since open */
978 int lastinx; /* Last used index */
979 uint last_rkey_length; /* Last length in maria_rkey() */
980 uint *last_rtree_keypos; /* Last key positions for rtrees */
981 uint bulk_insert_ref_length; /* Lenght of row ref during bi */
982 uint non_flushable_state;
983 enum ha_rkey_function last_key_func; /* CONTAIN, OVERLAP, etc */
984 uint save_lastkey_data_length;
985 uint save_lastkey_ref_length;
986 uint pack_key_length; /* For MARIA_MRG */
987 myf lock_wait; /* is 0 or MY_SHORT_WAIT */
988 int errkey; /* Got last error on this key */
989 int lock_type; /* How database was locked */
990 int tmp_lock_type; /* When locked by readinfo */
991 uint data_changed; /* Somebody has changed data */
992 uint save_update; /* When using KEY_READ */
993 int save_lastinx;
994 uint preload_buff_size; /* When preloading indexes */
995 uint16 last_used_keyseg; /* For MARIAMRG */
996 uint8 key_del_used; /* != 0 if key_del is used */
997 my_bool was_locked; /* Was locked in panic */
998 my_bool intern_lock_locked; /* locked in ma_extra() */
999 my_bool append_insert_at_end; /* Set if concurrent insert */
1000 my_bool quick_mode;
1001 my_bool in_check_table; /* We are running check tables */
1002 /* Marker if key_del_changed */
1003 /* If info->keyread_buff can't be used for rnext */
1004 my_bool page_changed;
1005 /* If info->keyread_buff has to be re-read for rnext */
1006 my_bool keyread_buff_used;
1007 my_bool once_flags; /* For MARIA_MRG */
1008 /* For bulk insert enable/disable transactions control */
1009 my_bool switched_transactional;
1010 /* If transaction will autocommit */
1011 my_bool autocommit;
1012 #ifdef _WIN32
1013 my_bool owned_by_merge; /* This Maria table is part of a merge union */
1014 #endif
1015 THR_LOCK_DATA lock;
1016 uchar *maria_rtree_recursion_state; /* For RTREE */
1017 uchar length_buff[5]; /* temp buff to store blob lengths */
1018 int maria_rtree_recursion_depth;
1019
1020 my_bool create_unique_index_by_sort;
1021 index_cond_func_t index_cond_func; /* Index condition function */
1022 void *index_cond_func_arg; /* parameter for the func */
1023 };
1024
1025 /* Table options for the Aria and S3 storage engine */
1026
1027 struct ha_table_option_struct
1028 {
1029 ulonglong s3_block_size;
1030 uint compression_algorithm;
1031 };
1032
1033 /* Some defines used by maria-functions */
1034
1035 #define USE_WHOLE_KEY 65535 /* Use whole key in _search() */
1036 #define F_EXTRA_LCK -1
1037
1038 /* bits in opt_flag */
1039 #define MEMMAP_USED 32U
1040 #define REMEMBER_OLD_POS 64U
1041
1042 #define WRITEINFO_UPDATE_KEYFILE 1U
1043 #define WRITEINFO_NO_UNLOCK 2U
1044
1045 /* once_flags */
1046 #define USE_PACKED_KEYS 1U
1047 #define RRND_PRESERVE_LASTINX 2U
1048
1049 /* bits in state.changed */
1050
1051 #define STATE_CHANGED 1U
1052 #define STATE_CRASHED 2U
1053 #define STATE_CRASHED_ON_REPAIR 4U
1054 #define STATE_NOT_ANALYZED 8U
1055 #define STATE_NOT_OPTIMIZED_KEYS 16U
1056 #define STATE_NOT_SORTED_PAGES 32U
1057 #define STATE_NOT_OPTIMIZED_ROWS 64U
1058 #define STATE_NOT_ZEROFILLED 128U
1059 #define STATE_NOT_MOVABLE 256U
1060 #define STATE_MOVED 512U /* set if base->uuid != maria_uuid */
1061 #define STATE_IN_REPAIR 1024U /* We are running repair on table */
1062 #define STATE_CRASHED_PRINTED 2048U
1063 #define STATE_DATA_FILE_FULL 4096U
1064
1065 #define STATE_CRASHED_FLAGS (STATE_CRASHED | STATE_CRASHED_ON_REPAIR | STATE_CRASHED_PRINTED)
1066
1067 /* options to maria_read_cache */
1068
1069 #define READING_NEXT 1U
1070 #define READING_HEADER 2U
1071
1072 /* Number of bytes on key pages to indicate used size */
1073 #define KEYPAGE_USED_SIZE 2U
1074 #define KEYPAGE_KEYID_SIZE 1U
1075 #define KEYPAGE_FLAG_SIZE 1U
1076 #define KEYPAGE_KEY_VERSION_SIZE 4U /* encryption */
1077 #define KEYPAGE_CHECKSUM_SIZE 4U
1078 #define MAX_KEYPAGE_HEADER_SIZE (LSN_STORE_SIZE + KEYPAGE_USED_SIZE + \
1079 KEYPAGE_KEYID_SIZE + KEYPAGE_FLAG_SIZE + \
1080 TRANSID_SIZE + KEYPAGE_KEY_VERSION_SIZE)
1081 #define KEYPAGE_FLAG_ISNOD 1U
1082 #define KEYPAGE_FLAG_HAS_TRANSID 2U
1083
1084 #define _ma_get_page_used(share,x) \
1085 ((uint) mi_uint2korr((x) + (share)->keypage_header - KEYPAGE_USED_SIZE))
1086 #define _ma_store_page_used(share,x,y) \
1087 mi_int2store((x) + (share)->keypage_header - KEYPAGE_USED_SIZE, (y))
1088 #define _ma_get_keypage_flag(share,x) x[(share)->keypage_header - KEYPAGE_USED_SIZE - KEYPAGE_FLAG_SIZE]
1089 #define _ma_test_if_nod(share,x) \
1090 ((_ma_get_keypage_flag(share,x) & KEYPAGE_FLAG_ISNOD) ? (share)->base.key_reflength : 0)
1091
1092 #define _ma_store_keynr(share, x, nr) x[(share)->keypage_header - KEYPAGE_KEYID_SIZE - KEYPAGE_FLAG_SIZE - KEYPAGE_USED_SIZE]= (nr)
1093 #define _ma_get_keynr(share, x) ((uchar) x[(share)->keypage_header - KEYPAGE_KEYID_SIZE - KEYPAGE_FLAG_SIZE - KEYPAGE_USED_SIZE])
1094 #define _ma_store_transid(buff, transid) \
1095 transid_store((buff) + LSN_STORE_SIZE, (transid))
1096 #define _ma_korr_transid(buff) \
1097 transid_korr((buff) + LSN_STORE_SIZE)
1098 #define _ma_store_keypage_flag(share,x,flag) x[(share)->keypage_header - KEYPAGE_USED_SIZE - KEYPAGE_FLAG_SIZE]= (flag)
1099 #define _ma_mark_page_with_transid(share, page) \
1100 do { (page)->flag|= KEYPAGE_FLAG_HAS_TRANSID; \
1101 (page)->buff[(share)->keypage_header - KEYPAGE_USED_SIZE - KEYPAGE_FLAG_SIZE]= (page)->flag; } while (0)
1102
1103 #define KEYPAGE_KEY_VERSION(share, x) ((x) + \
1104 (share)->keypage_header - \
1105 (KEYPAGE_USED_SIZE + \
1106 KEYPAGE_FLAG_SIZE + \
1107 KEYPAGE_KEYID_SIZE + \
1108 KEYPAGE_KEY_VERSION_SIZE))
1109
1110 #define _ma_get_key_version(share,x) \
1111 ((uint) uint4korr(KEYPAGE_KEY_VERSION((share), (x))))
1112
1113 #define _ma_store_key_version(share,x,kv) \
1114 int4store(KEYPAGE_KEY_VERSION((share), (x)), (kv))
1115
1116 /*
1117 TODO: write int4store_aligned as *((uint32 *) (T))= (uint32) (A) for
1118 architectures where it is possible
1119 */
1120 #define int4store_aligned(A,B) int4store((A),(B))
1121
1122 #define maria_mark_crashed(x) do{(x)->s->state.changed|= STATE_CRASHED; \
1123 DBUG_PRINT("error", ("Marked table crashed")); \
1124 }while(0)
1125 #define maria_mark_crashed_share(x) \
1126 do{(x)->state.changed|= STATE_CRASHED; \
1127 DBUG_PRINT("error", ("Marked table crashed")); \
1128 }while(0)
1129 #define maria_mark_crashed_on_repair(x) do{(x)->s->state.changed|= \
1130 STATE_CRASHED|STATE_CRASHED_ON_REPAIR; \
1131 (x)->update|= HA_STATE_CHANGED; \
1132 DBUG_PRINT("error", ("Marked table crashed on repair")); \
1133 }while(0)
1134 #define maria_mark_in_repair(x) do{(x)->s->state.changed|= \
1135 STATE_CRASHED | STATE_IN_REPAIR; \
1136 (x)->update|= HA_STATE_CHANGED; \
1137 DBUG_PRINT("error", ("Marked table crashed for repair")); \
1138 }while(0)
1139 #define maria_is_crashed(x) ((x)->s->state.changed & STATE_CRASHED)
1140 #define maria_is_crashed_on_repair(x) ((x)->s->state.changed & STATE_CRASHED_ON_REPAIR)
1141 #define maria_in_repair(x) ((x)->s->state.changed & STATE_IN_REPAIR)
1142
1143 #ifdef EXTRA_DEBUG
1144 /**
1145 Brings additional information in certain debug builds and in standalone
1146 (non-ha_maria) programs. To help debugging. Not in ha_maria, to not spam the
1147 user (some messages can be produced many times per statement, or even
1148 wrongly during some repair operations).
1149 */
1150 #define maria_print_error(SHARE, ERRNO) \
1151 do{ if (!maria_in_ha_maria) \
1152 _ma_report_error((ERRNO), &(SHARE)->index_file_name); } \
1153 while(0)
1154 #else
1155 #define maria_print_error(SHARE, ERRNO) while (0)
1156 #endif
1157 #define DBUG_DUMP_KEY(name, key) DBUG_DUMP(name, (key)->data, (key)->data_length + (key)->ref_length)
1158
1159 /* Functions to store length of space packed keys, VARCHAR or BLOB keys */
1160
1161 #define store_key_length(key,length) \
1162 { if ((length) < 255) \
1163 { *(key)=(length); } \
1164 else \
1165 { *(key)=255; mi_int2store((key)+1,(length)); } \
1166 }
1167
1168 #define get_key_full_length(length,key) \
1169 { if (*(const uchar*) (key) != 255) \
1170 length= ((uint) *(const uchar*) ((key)++))+1; \
1171 else \
1172 { length=mi_uint2korr((key)+1)+3; (key)+=3; } \
1173 }
1174
1175 #define get_key_full_length_rdonly(length,key) \
1176 { if (*(const uchar*) (key) != 255) \
1177 length= ((uint) *(const uchar*) ((key)))+1; \
1178 else \
1179 { length=mi_uint2korr((key)+1)+3; } \
1180 }
1181
1182 #define _ma_max_key_length() ((maria_block_size - MAX_KEYPAGE_HEADER_SIZE)/3 - MARIA_INDEX_OVERHEAD_SIZE)
1183 #define get_pack_length(length) ((length) >= 255 ? 3 : 1)
1184 #define _ma_have_versioning(info) ((info)->row_flag & ROW_FLAG_TRANSID)
1185
1186 #define MARIA_MIN_BLOCK_LENGTH 20 /* Because of delete-link */
1187 /* Don't use to small record-blocks */
1188 #define MARIA_EXTEND_BLOCK_LENGTH 20
1189 #define MARIA_SPLIT_LENGTH ((MARIA_EXTEND_BLOCK_LENGTH+4)*2)
1190 /* Max prefix of record-block */
1191 #define MARIA_MAX_DYN_BLOCK_HEADER 20
1192 #define MARIA_BLOCK_INFO_HEADER_LENGTH 20
1193 #define MARIA_DYN_DELETE_BLOCK_HEADER 20 /* length of delete-block-header */
1194 #define MARIA_DYN_MAX_BLOCK_LENGTH ((1L << 24)-4L)
1195 #define MARIA_DYN_MAX_ROW_LENGTH (MARIA_DYN_MAX_BLOCK_LENGTH - MARIA_SPLIT_LENGTH)
1196 #define MARIA_DYN_ALIGN_SIZE 4 /* Align blocks on this */
1197 #define MARIA_MAX_DYN_HEADER_BYTE 13 /* max header uchar for dynamic rows */
1198 #define MARIA_MAX_BLOCK_LENGTH ((((ulong) 1 << 24)-1) & (~ (ulong) (MARIA_DYN_ALIGN_SIZE-1)))
1199 #define MARIA_REC_BUFF_OFFSET ALIGN_SIZE(MARIA_DYN_DELETE_BLOCK_HEADER+sizeof(uint32))
1200
1201 #define MEMMAP_EXTRA_MARGIN 7 /* Write this as a suffix for file */
1202
1203 #define PACK_TYPE_SELECTED 1U /* Bits in field->pack_type */
1204 #define PACK_TYPE_SPACE_FIELDS 2U
1205 #define PACK_TYPE_ZERO_FILL 4U
1206 #define MARIA_FOUND_WRONG_KEY 32768U /* Impossible value from ha_key_cmp */
1207
1208 #define MARIA_BLOCK_SIZE(key_length,data_pointer,key_pointer,block_size) (((((key_length)+(data_pointer)+(key_pointer))*4+(key_pointer)+2)/(block_size)+1)*(block_size))
1209 #define MARIA_MAX_KEYPTR_SIZE 5 /* For calculating block lengths */
1210
1211 /* Marker for impossible delete link */
1212 #define IMPOSSIBLE_PAGE_NO 0xFFFFFFFFFFLL
1213
1214 /* The UNIQUE check is done with a hashed long key */
1215
1216 #define MARIA_UNIQUE_HASH_TYPE HA_KEYTYPE_ULONG_INT
1217 #define maria_unique_store(A,B) mi_int4store((A),(B))
1218
1219 extern mysql_mutex_t THR_LOCK_maria;
1220 #ifdef DONT_USE_RW_LOCKS
1221 #define mysql_rwlock_wrlock(A) {}
1222 #define mysql_rwlock_rdlock(A) {}
1223 #define mysql_rwlock_unlock(A) {}
1224 #endif
1225
1226 /* Some tuning parameters */
1227 #define MARIA_MIN_KEYBLOCK_LENGTH 50 /* When to split delete blocks */
1228 #define MARIA_MIN_SIZE_BULK_INSERT_TREE 16384U /* this is per key */
1229 #define MARIA_MIN_ROWS_TO_USE_BULK_INSERT 100
1230 #define MARIA_MIN_ROWS_TO_DISABLE_INDEXES 100
1231 #define MARIA_MIN_ROWS_TO_USE_WRITE_CACHE 10
1232 /* Keep a small buffer for tables only using small blobs */
1233 #define MARIA_SMALL_BLOB_BUFFER 1024U
1234 #define MARIA_MAX_CONTROL_FILE_LOCK_RETRY 30 /* Retry this many times */
1235
1236 /* Some extern variables */
1237 extern LIST *maria_open_list;
1238 extern uchar maria_file_magic[], maria_pack_file_magic[];
1239 extern uchar maria_uuid[MY_UUID_SIZE];
1240 extern uint32 maria_read_vec[], maria_readnext_vec[];
1241 extern uint maria_quick_table_bits;
1242 extern const char *maria_data_root;
1243 extern uchar maria_zero_string[];
1244 extern my_bool maria_inited, maria_in_ha_maria, maria_recovery_changed_data;
1245 extern my_bool maria_recovery_verbose, maria_checkpoint_disabled;
1246 extern my_bool maria_assert_if_crashed_table, aria_readonly;
1247 extern ulong maria_checkpoint_min_log_activity;
1248 extern HASH maria_stored_state;
1249 extern int (*maria_create_trn_hook)(MARIA_HA *);
1250 extern my_bool (*ma_killed)(MARIA_HA *);
1251
1252 #ifdef HAVE_PSI_INTERFACE
1253 extern PSI_mutex_key key_SHARE_BITMAP_lock, key_SORT_INFO_mutex,
1254 key_THR_LOCK_maria, key_TRANSLOG_BUFFER_mutex,
1255 key_LOCK_soft_sync,
1256 key_TRANSLOG_DESCRIPTOR_dirty_buffer_mask_lock,
1257 key_TRANSLOG_DESCRIPTOR_sent_to_disk_lock,
1258 key_TRANSLOG_DESCRIPTOR_log_flush_lock,
1259 key_TRANSLOG_DESCRIPTOR_file_header_lock,
1260 key_TRANSLOG_DESCRIPTOR_unfinished_files_lock,
1261 key_TRANSLOG_DESCRIPTOR_purger_lock,
1262 key_SHARE_intern_lock, key_SHARE_key_del_lock,
1263 key_SHARE_close_lock,
1264 key_SERVICE_THREAD_CONTROL_lock,
1265 key_PAGECACHE_cache_lock;
1266
1267 extern PSI_mutex_key key_CRYPT_DATA_lock;
1268
1269 extern PSI_cond_key key_SHARE_key_del_cond, key_SERVICE_THREAD_CONTROL_cond,
1270 key_SORT_INFO_cond, key_SHARE_BITMAP_cond,
1271 key_COND_soft_sync, key_TRANSLOG_BUFFER_waiting_filling_buffer,
1272 key_TRANSLOG_BUFFER_prev_sent_to_disk_cond,
1273 key_TRANSLOG_DESCRIPTOR_log_flush_cond,
1274 key_TRANSLOG_DESCRIPTOR_new_goal_cond;
1275
1276 extern PSI_rwlock_key key_KEYINFO_root_lock, key_SHARE_mmap_lock,
1277 key_TRANSLOG_DESCRIPTOR_open_files_lock;
1278
1279 extern PSI_thread_key key_thread_checkpoint, key_thread_find_all_keys,
1280 key_thread_soft_sync;
1281
1282 extern PSI_file_key key_file_translog, key_file_kfile, key_file_dfile,
1283 key_file_control, key_file_tmp;
1284
1285 #endif
1286
1287 /* Note that PSI_stage_info globals must always be declared. */
1288 extern PSI_stage_info stage_waiting_for_a_resource;
1289
1290 /* This is used by _ma_calc_xxx_key_length och _ma_store_key */
1291 typedef struct st_maria_s_param
1292 {
1293 const uchar *key;
1294 uchar *prev_key, *next_key_pos;
1295 uchar *key_pos; /* For balance page */
1296 uint ref_length, key_length, n_ref_length;
1297 uint n_length, totlength, part_of_prev_key, prev_length, pack_marker;
1298 uint changed_length;
1299 int move_length; /* For balance_page */
1300 my_bool store_not_null;
1301 } MARIA_KEY_PARAM;
1302
1303
1304 /* Used to store reference to pinned page */
1305 typedef struct st_pinned_page
1306 {
1307 PAGECACHE_BLOCK_LINK *link;
1308 enum pagecache_page_lock unlock, write_lock;
1309 my_bool changed;
1310 } MARIA_PINNED_PAGE;
1311
1312
1313 /* Keeps all information about a page and related to a page */
1314 typedef struct st_maria_page
1315 {
1316 MARIA_HA *info;
1317 const MARIA_KEYDEF *keyinfo;
1318 uchar *buff; /* Data for page */
1319 my_off_t pos; /* Disk address to page */
1320 uint size; /* Size of data on page */
1321 uint org_size; /* Size of page at read or after log */
1322 uint node; /* 0 or share->base.key_reflength */
1323 uint flag; /* Page flag */
1324 uint link_offset;
1325 } MARIA_PAGE;
1326
1327
1328 /* Prototypes for intern functions */
1329 extern int _ma_read_dynamic_record(MARIA_HA *, uchar *, MARIA_RECORD_POS);
1330 extern int _ma_read_rnd_dynamic_record(MARIA_HA *, uchar *, MARIA_RECORD_POS,
1331 my_bool);
1332 extern my_bool _ma_write_dynamic_record(MARIA_HA *, const uchar *);
1333 extern my_bool _ma_update_dynamic_record(MARIA_HA *, MARIA_RECORD_POS,
1334 const uchar *, const uchar *);
1335 extern my_bool _ma_delete_dynamic_record(MARIA_HA *info, const uchar *record);
1336 extern my_bool _ma_cmp_dynamic_record(MARIA_HA *info, const uchar *record);
1337 extern my_bool _ma_write_blob_record(MARIA_HA *, const uchar *);
1338 extern my_bool _ma_update_blob_record(MARIA_HA *, MARIA_RECORD_POS,
1339 const uchar *, const uchar *);
1340 extern int _ma_read_static_record(MARIA_HA *info, uchar *, MARIA_RECORD_POS);
1341 extern int _ma_read_rnd_static_record(MARIA_HA *, uchar *, MARIA_RECORD_POS,
1342 my_bool);
1343 extern my_bool _ma_write_static_record(MARIA_HA *, const uchar *);
1344 extern my_bool _ma_update_static_record(MARIA_HA *, MARIA_RECORD_POS,
1345 const uchar *, const uchar *);
1346 extern my_bool _ma_delete_static_record(MARIA_HA *info, const uchar *record);
1347 extern my_bool _ma_cmp_static_record(MARIA_HA *info, const uchar *record);
1348
1349 extern my_bool _ma_write_no_record(MARIA_HA *info, const uchar *record);
1350 extern my_bool _ma_update_no_record(MARIA_HA *info, MARIA_RECORD_POS pos,
1351 const uchar *oldrec, const uchar *record);
1352 extern my_bool _ma_delete_no_record(MARIA_HA *info, const uchar *record);
1353 extern int _ma_read_no_record(MARIA_HA *info, uchar *record,
1354 MARIA_RECORD_POS pos);
1355 extern int _ma_read_rnd_no_record(MARIA_HA *info, uchar *buf,
1356 MARIA_RECORD_POS filepos,
1357 my_bool skip_deleted_blocks);
1358 my_off_t _ma_no_keypos_to_recpos(MARIA_SHARE *share, my_off_t pos);
1359
1360 extern my_bool _ma_ck_write(MARIA_HA *info, MARIA_KEY *key);
1361 extern my_bool _ma_enlarge_root(MARIA_HA *info, MARIA_KEY *key,
1362 MARIA_RECORD_POS *root);
1363 int _ma_insert(MARIA_HA *info, MARIA_KEY *key,
1364 MARIA_PAGE *anc_page, uchar *key_pos, uchar *key_buff,
1365 MARIA_PAGE *father_page, uchar *father_key_pos,
1366 my_bool insert_last);
1367 extern my_bool _ma_ck_real_write_btree(MARIA_HA *info, MARIA_KEY *key,
1368 MARIA_RECORD_POS *root, uint32 comp_flag);
1369 extern int _ma_split_page(MARIA_HA *info, MARIA_KEY *key,
1370 MARIA_PAGE *split_page,
1371 uint org_split_length,
1372 uchar *inserted_key_pos, uint changed_length,
1373 int move_length,
1374 uchar *key_buff, my_bool insert_last_key);
1375 extern uchar *_ma_find_half_pos(MARIA_KEY *key, MARIA_PAGE *page,
1376 uchar ** after_key);
1377 extern int _ma_calc_static_key_length(const MARIA_KEY *key, uint nod_flag,
1378 uchar *key_pos, uchar *org_key,
1379 uchar *key_buff,
1380 MARIA_KEY_PARAM *s_temp);
1381 extern int _ma_calc_var_key_length(const MARIA_KEY *key, uint nod_flag,
1382 uchar *key_pos, uchar *org_key,
1383 uchar *key_buff,
1384 MARIA_KEY_PARAM *s_temp);
1385 extern int _ma_calc_var_pack_key_length(const MARIA_KEY *key,
1386 uint nod_flag, uchar *next_key,
1387 uchar *org_key, uchar *prev_key,
1388 MARIA_KEY_PARAM *s_temp);
1389 extern int _ma_calc_bin_pack_key_length(const MARIA_KEY *key,
1390 uint nod_flag, uchar *next_key,
1391 uchar *org_key, uchar *prev_key,
1392 MARIA_KEY_PARAM *s_temp);
1393 extern void _ma_store_static_key(MARIA_KEYDEF *keyinfo, uchar *key_pos,
1394 MARIA_KEY_PARAM *s_temp);
1395 extern void _ma_store_var_pack_key(MARIA_KEYDEF *keyinfo, uchar *key_pos,
1396 MARIA_KEY_PARAM *s_temp);
1397 #ifdef NOT_USED
1398 extern void _ma_store_pack_key(MARIA_KEYDEF *keyinfo, uchar *key_pos,
1399 MARIA_KEY_PARAM *s_temp);
1400 #endif
1401 extern void _ma_store_bin_pack_key(MARIA_KEYDEF *keyinfo, uchar *key_pos,
1402 MARIA_KEY_PARAM *s_temp);
1403
1404 extern my_bool _ma_ck_delete(MARIA_HA *info, MARIA_KEY *key);
1405 extern my_bool _ma_ck_real_delete(MARIA_HA *info, MARIA_KEY *key,
1406 my_off_t *root);
1407 extern int _ma_readinfo(MARIA_HA *info, int lock_flag, int check_keybuffer);
1408 extern int _ma_writeinfo(MARIA_HA *info, uint options);
1409 extern int _ma_test_if_changed(MARIA_HA *info);
1410 extern int _ma_mark_file_changed(MARIA_SHARE *info);
1411 extern int _ma_mark_file_changed_now(MARIA_SHARE *info);
1412 extern void _ma_mark_file_crashed(MARIA_SHARE *share);
1413 void _ma_set_fatal_error(MARIA_SHARE *share, int error);
1414 extern my_bool _ma_set_uuid(MARIA_SHARE *info, my_bool reset_uuid);
1415 extern my_bool _ma_check_if_zero(uchar *pos, size_t size);
1416 extern int _ma_decrement_open_count(MARIA_HA *info, my_bool lock_table);
1417 extern int _ma_check_index(MARIA_HA *info, int inx);
1418 extern int _ma_search(MARIA_HA *info, MARIA_KEY *key, uint32 nextflag,
1419 my_off_t pos);
1420 extern int _ma_bin_search(const MARIA_KEY *key, const MARIA_PAGE *page,
1421 uint32 comp_flag, uchar **ret_pos, uchar *buff,
1422 my_bool *was_last_key);
1423 extern int _ma_seq_search(const MARIA_KEY *key, const MARIA_PAGE *page,
1424 uint comp_flag, uchar ** ret_pos, uchar *buff,
1425 my_bool *was_last_key);
1426 extern int _ma_prefix_search(const MARIA_KEY *key, const MARIA_PAGE *page,
1427 uint32 comp_flag, uchar ** ret_pos, uchar *buff,
1428 my_bool *was_last_key);
1429 extern my_off_t _ma_kpos(uint nod_flag, const uchar *after_key);
1430 extern void _ma_kpointer(MARIA_HA *info, uchar *buff, my_off_t pos);
1431 MARIA_RECORD_POS _ma_row_pos_from_key(const MARIA_KEY *key);
1432 TrID _ma_trid_from_key(const MARIA_KEY *key);
1433 extern MARIA_RECORD_POS _ma_rec_pos(MARIA_SHARE *share, uchar *ptr);
1434 extern void _ma_dpointer(MARIA_SHARE *share, uchar *buff,
1435 MARIA_RECORD_POS pos);
1436 extern uint _ma_get_static_key(MARIA_KEY *key, uint page_flag, uint nod_flag,
1437 uchar **page);
1438 extern uchar *_ma_skip_static_key(MARIA_KEY *key, uint page_flag,
1439 uint nod_flag, uchar *page);
1440 extern uint _ma_get_pack_key(MARIA_KEY *key, uint page_flag, uint nod_flag,
1441 uchar **page);
1442 extern uchar *_ma_skip_pack_key(MARIA_KEY *key, uint page_flag,
1443 uint nod_flag, uchar *page);
1444 extern uint _ma_get_binary_pack_key(MARIA_KEY *key, uint page_flag,
1445 uint nod_flag, uchar **page_pos);
1446 uchar *_ma_skip_binary_pack_key(MARIA_KEY *key, uint page_flag,
1447 uint nod_flag, uchar *page);
1448 extern uchar *_ma_get_last_key(MARIA_KEY *key, MARIA_PAGE *page,
1449 uchar *endpos);
1450 extern uchar *_ma_get_key(MARIA_KEY *key, MARIA_PAGE *page, uchar *keypos);
1451 extern uint _ma_keylength(MARIA_KEYDEF *keyinfo, const uchar *key);
1452 extern uint _ma_keylength_part(MARIA_KEYDEF *keyinfo, const uchar *key,
1453 HA_KEYSEG *end);
1454 extern int _ma_search_next(MARIA_HA *info, MARIA_KEY *key,
1455 uint32 nextflag, my_off_t pos);
1456 extern int _ma_search_first(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
1457 my_off_t pos);
1458 extern int _ma_search_last(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
1459 my_off_t pos);
1460 extern my_off_t _ma_static_keypos_to_recpos(MARIA_SHARE *share, my_off_t pos);
1461 extern my_off_t _ma_static_recpos_to_keypos(MARIA_SHARE *share, my_off_t pos);
1462 extern my_off_t _ma_transparent_recpos(MARIA_SHARE *share, my_off_t pos);
1463 extern my_off_t _ma_transaction_keypos_to_recpos(MARIA_SHARE *, my_off_t pos);
1464 extern my_off_t _ma_transaction_recpos_to_keypos(MARIA_SHARE *, my_off_t pos);
1465
1466 extern void _ma_page_setup(MARIA_PAGE *page, MARIA_HA *info,
1467 const MARIA_KEYDEF *keyinfo, my_off_t pos,
1468 uchar *buff);
1469 extern my_bool _ma_fetch_keypage(MARIA_PAGE *page, MARIA_HA *info,
1470 const MARIA_KEYDEF *keyinfo,
1471 my_off_t pos, enum pagecache_page_lock lock,
1472 int level, uchar *buff,
1473 my_bool return_buffer);
1474 extern my_bool _ma_write_keypage(MARIA_PAGE *page,
1475 enum pagecache_page_lock lock, int level);
1476 extern int _ma_dispose(MARIA_HA *info, my_off_t pos, my_bool page_not_read);
1477 extern my_off_t _ma_new(MARIA_HA *info, int level,
1478 MARIA_PINNED_PAGE **page_link);
1479 extern my_bool _ma_compact_keypage(MARIA_PAGE *page, TrID min_read_from);
1480 extern uint transid_store_packed(MARIA_HA *info, uchar *to, ulonglong trid);
1481 extern ulonglong transid_get_packed(MARIA_SHARE *share, const uchar *from);
1482 #define transid_packed_length(data) \
1483 ((data)[0] < MARIA_MIN_TRANSID_PACK_OFFSET ? 1 : \
1484 (uint) ((uchar) (data)[0]) - (MARIA_TRANSID_PACK_OFFSET - 1))
1485 #define key_has_transid(key) (*(key) & 1)
1486
1487 #define page_mark_changed(info, page) \
1488 dynamic_element(&(info)->pinned_pages, (page)->link_offset, \
1489 MARIA_PINNED_PAGE*)->changed= 1;
1490 #define page_store_size(share, page) \
1491 _ma_store_page_used((share), (page)->buff, (page)->size);
1492 #define page_store_info(share, page) \
1493 _ma_store_keypage_flag((share), (page)->buff, (page)->flag); \
1494 _ma_store_page_used((share), (page)->buff, (page)->size);
1495 #ifdef IDENTICAL_PAGES_AFTER_RECOVERY
1496 void page_cleanup(MARIA_SHARE *share, MARIA_PAGE *page)
1497 #else
1498 #define page_cleanup(A,B) do { } while (0)
1499 #endif
1500
1501 extern MARIA_KEY *_ma_make_key(MARIA_HA *info, MARIA_KEY *int_key, uint keynr,
1502 uchar *key, const uchar *record,
1503 MARIA_RECORD_POS filepos, ulonglong trid);
1504 extern MARIA_KEY *_ma_pack_key(MARIA_HA *info, MARIA_KEY *int_key,
1505 uint keynr, uchar *key,
1506 const uchar *old, key_part_map keypart_map,
1507 HA_KEYSEG ** last_used_keyseg);
1508 extern void _ma_copy_key(MARIA_KEY *to, const MARIA_KEY *from);
1509 extern int _ma_read_key_record(MARIA_HA *info, uchar *buf, MARIA_RECORD_POS);
1510 extern my_bool _ma_read_cache(MARIA_HA *, IO_CACHE *info, uchar *buff,
1511 MARIA_RECORD_POS pos, size_t length,
1512 uint re_read_if_possibly);
1513 extern ulonglong ma_retrieve_auto_increment(const uchar *key, uint8 key_type);
1514 extern my_bool _ma_alloc_buffer(uchar **old_addr, size_t *old_size,
1515 size_t new_size, myf flag);
1516 extern size_t _ma_rec_unpack(MARIA_HA *info, uchar *to, uchar *from,
1517 size_t reclength);
1518 extern my_bool _ma_rec_check(MARIA_HA *info, const uchar *record,
1519 uchar *packpos, ulong packed_length,
1520 my_bool with_checkum, ha_checksum checksum);
1521 extern int _ma_write_part_record(MARIA_HA *info, my_off_t filepos,
1522 ulong length, my_off_t next_filepos,
1523 uchar ** record, ulong *reclength,
1524 int *flag);
1525 extern void _ma_print_key(FILE *stream, MARIA_KEY *key);
1526 extern void _ma_print_keydata(FILE *stream, HA_KEYSEG *keyseg,
1527 const uchar *key, uint length);
1528 extern my_bool _ma_once_init_pack_row(MARIA_SHARE *share, File dfile);
1529 extern my_bool _ma_once_end_pack_row(MARIA_SHARE *share);
1530 extern int _ma_read_pack_record(MARIA_HA *info, uchar *buf,
1531 MARIA_RECORD_POS filepos);
1532 extern int _ma_read_rnd_pack_record(MARIA_HA *, uchar *, MARIA_RECORD_POS,
1533 my_bool);
1534 extern int _ma_pack_rec_unpack(MARIA_HA *info, MARIA_BIT_BUFF *bit_buff,
1535 uchar *to, uchar *from, ulong reclength);
1536 extern ulonglong _ma_safe_mul(ulonglong a, ulonglong b);
1537 extern int _ma_ft_update(MARIA_HA *info, uint keynr, uchar *keybuf,
1538 const uchar *oldrec, const uchar *newrec,
1539 my_off_t pos);
1540
1541 /*
1542 Parameter to _ma_get_block_info
1543 The dynamic row header is read into this struct. For an explanation of
1544 the fields, look at the function _ma_get_block_info().
1545 */
1546
1547 typedef struct st_maria_block_info
1548 {
1549 uchar header[MARIA_BLOCK_INFO_HEADER_LENGTH];
1550 ulong rec_len;
1551 ulong data_len;
1552 ulong block_len;
1553 ulong blob_len;
1554 MARIA_RECORD_POS filepos;
1555 MARIA_RECORD_POS next_filepos;
1556 MARIA_RECORD_POS prev_filepos;
1557 uint second_read;
1558 uint offset;
1559 } MARIA_BLOCK_INFO;
1560
1561
1562 /* bits in return from _ma_get_block_info */
1563
1564 #define BLOCK_FIRST 1U
1565 #define BLOCK_LAST 2U
1566 #define BLOCK_DELETED 4U
1567 #define BLOCK_ERROR 8U /* Wrong data */
1568 #define BLOCK_SYNC_ERROR 16U /* Right data at wrong place */
1569 #define BLOCK_FATAL_ERROR 32U /* hardware-error */
1570
1571 #define NEED_MEM ((uint) 10*4*(IO_SIZE+32)+32) /* Nead for recursion */
1572 #define MAXERR 20
1573 #define BUFFERS_WHEN_SORTING 16 /* Alloc for sort-key-tree */
1574 #define WRITE_COUNT MY_HOW_OFTEN_TO_WRITE
1575 #define INDEX_TMP_EXT ".TMM"
1576 #define DATA_TMP_EXT ".TMD"
1577
1578 #define UPDATE_TIME 1U
1579 #define UPDATE_STAT 2U
1580 #define UPDATE_SORT 4U
1581 #define UPDATE_AUTO_INC 8U
1582 #define UPDATE_OPEN_COUNT 16U
1583
1584 /* We use MY_ALIGN_DOWN here mainly to ensure that we get stable values for mysqld --help ) */
1585 #define PAGE_BUFFER_INIT MY_ALIGN_DOWN(1024L*1024L*256L-MALLOC_OVERHEAD, 8192)
1586 #define READ_BUFFER_INIT MY_ALIGN_DOWN(1024L*256L-MALLOC_OVERHEAD, 1024)
1587 #define SORT_BUFFER_INIT MY_ALIGN_DOWN(1024L*1024L*256L-MALLOC_OVERHEAD, 1024)
1588 #define MIN_SORT_BUFFER 4096U
1589
1590 #define fast_ma_writeinfo(INFO) if (!(INFO)->s->tot_locks) (void) _ma_writeinfo((INFO),0)
1591 #define fast_ma_readinfo(INFO) ((INFO)->lock_type == F_UNLCK) && _ma_readinfo((INFO),F_RDLCK,1)
1592
1593 extern uint _ma_get_block_info(MARIA_HA *, MARIA_BLOCK_INFO *, File, my_off_t);
1594 extern uint _ma_rec_pack(MARIA_HA *info, uchar *to, const uchar *from);
1595 extern uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BIT_BUFF *bit_buff,
1596 MARIA_BLOCK_INFO *info, uchar **rec_buff_p,
1597 size_t *rec_buff_size,
1598 File file, my_off_t filepos);
1599 extern void _ma_store_blob_length(uchar *pos, uint pack_length, uint length);
1600 extern void _ma_report_error(int errcode, const LEX_STRING *file_name);
1601 extern my_bool _ma_memmap_file(MARIA_HA *info);
1602 extern void _ma_unmap_file(MARIA_HA *info);
1603 extern uint _ma_save_pack_length(uint version, uchar * block_buff,
1604 ulong length);
1605 extern uint _ma_calc_pack_length(uint version, ulong length);
1606 extern ulong _ma_calc_blob_length(uint length, const uchar *pos);
1607 extern size_t _ma_mmap_pread(MARIA_HA *info, uchar *Buffer,
1608 size_t Count, my_off_t offset, myf MyFlags);
1609 extern size_t _ma_mmap_pwrite(MARIA_HA *info, const uchar *Buffer,
1610 size_t Count, my_off_t offset, myf MyFlags);
1611 extern size_t _ma_nommap_pread(MARIA_HA *info, uchar *Buffer,
1612 size_t Count, my_off_t offset, myf MyFlags);
1613 extern size_t _ma_nommap_pwrite(MARIA_HA *info, const uchar *Buffer,
1614 size_t Count, my_off_t offset, myf MyFlags);
1615
1616 /* my_pwrite instead of my_write used */
1617 #define MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET 1
1618 /* info should be written */
1619 #define MA_STATE_INFO_WRITE_FULL_INFO 2
1620 /* intern_lock taking is needed */
1621 #define MA_STATE_INFO_WRITE_LOCK 4
1622 uint _ma_state_info_write(MARIA_SHARE *share, uint pWrite)__attribute__((visibility("default"))) ;
1623 uint _ma_state_info_write_sub(File file, MARIA_STATE_INFO *state, uint pWrite);
1624 uint _ma_state_info_read_dsk(File file, MARIA_STATE_INFO *state);
1625 uint _ma_base_info_write(File file, MARIA_BASE_INFO *base);
1626 my_bool _ma_keyseg_write(File file, const HA_KEYSEG *keyseg);
1627 uchar *_ma_keyseg_read(uchar *ptr, HA_KEYSEG *keyseg);
1628 my_bool _ma_keydef_write(File file, MARIA_KEYDEF *keydef);
1629 uchar *_ma_keydef_read(uchar *ptr, MARIA_KEYDEF *keydef);
1630 my_bool _ma_uniquedef_write(File file, MARIA_UNIQUEDEF *keydef);
1631 uchar *_ma_uniquedef_read(uchar *ptr, MARIA_UNIQUEDEF *keydef);
1632 my_bool _ma_columndef_write(File file, MARIA_COLUMNDEF *columndef);
1633 uchar *_ma_columndef_read(uchar *ptr, MARIA_COLUMNDEF *columndef);
1634 my_bool _ma_column_nr_write(File file, uint16 *offsets, uint columns);
1635 uchar *_ma_column_nr_read(uchar *ptr, uint16 *offsets, uint columns);
1636 ulong _ma_calc_total_blob_length(MARIA_HA *info, const uchar *record);
1637 ha_checksum _ma_checksum(MARIA_HA *info, const uchar *buf);
1638 ha_checksum _ma_static_checksum(MARIA_HA *info, const uchar *buf);
1639 my_bool _ma_check_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
1640 const uchar *record, ha_checksum unique_hash,
1641 MARIA_RECORD_POS pos);
1642 ha_checksum _ma_unique_hash(MARIA_UNIQUEDEF *def, const uchar *buf);
1643 my_bool _ma_cmp_static_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
1644 const uchar *record, MARIA_RECORD_POS pos);
1645 my_bool _ma_cmp_dynamic_unique(MARIA_HA *info, MARIA_UNIQUEDEF *def,
1646 const uchar *record, MARIA_RECORD_POS pos);
1647 my_bool _ma_unique_comp(MARIA_UNIQUEDEF *def, const uchar *a, const uchar *b,
1648 my_bool null_are_equal);
1649 void _ma_reset_status(MARIA_HA *maria);
1650 int _ma_def_scan_remember_pos(MARIA_HA *info, MARIA_RECORD_POS *lastpos);
1651 int _ma_def_scan_restore_pos(MARIA_HA *info, MARIA_RECORD_POS lastpos);
1652
1653 #include "ma_commit.h"
1654
1655 extern MARIA_HA *_ma_test_if_reopen(const char *filename);
1656 my_bool _ma_check_table_is_closed(const char *name, const char *where);
1657 int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share);
1658 int _ma_open_keyfile(MARIA_SHARE *share);
1659 void _ma_setup_functions(MARIA_SHARE *share);
1660 my_bool _ma_dynmap_file(MARIA_HA *info, my_off_t size);
1661 void _ma_remap_file(MARIA_HA *info, my_off_t size);
1662
1663 MARIA_RECORD_POS _ma_write_init_default(MARIA_HA *info, const uchar *record);
1664 my_bool _ma_write_abort_default(MARIA_HA *info);
1665 int maria_delete_table_files(const char *name, my_bool temporary,
1666 myf flags)__attribute__((visibility("default"))) ;
1667
1668
1669 /*
1670 This cannot be in my_base.h as it clashes with HA_SPATIAL.
1671 But it was introduced for Aria engine, and is only used there.
1672 So it can safely stay here, only visible to Aria
1673 */
1674 #define HA_RTREE_INDEX 16384 /* For RTREE search */
1675
1676 #define MARIA_FLUSH_DATA 1
1677 #define MARIA_FLUSH_INDEX 2
1678 int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index,
1679 enum flush_type flush_type_for_data,
1680 enum flush_type flush_type_for_index);
1681 /*
1682 Functions needed by _ma_check (are overridden in MySQL/ha_maria.cc).
1683 See ma_check_standalone.h .
1684 */
1685 int _ma_killed_ptr(HA_CHECK *param);
1686 void _ma_report_progress(HA_CHECK *param, ulonglong progress,
1687 ulonglong max_progress);
1688 void _ma_check_print_error(HA_CHECK *param, const char *fmt, ...)
1689 ATTRIBUTE_FORMAT(printf, 2, 3);
1690 void _ma_check_print_warning(HA_CHECK *param, const char *fmt, ...)
1691 ATTRIBUTE_FORMAT(printf, 2, 3);
1692 void _ma_check_print_info(HA_CHECK *param, const char *fmt, ...)
1693 ATTRIBUTE_FORMAT(printf, 2, 3);
1694 my_bool write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info);
1695
1696 int _ma_flush_pending_blocks(MARIA_SORT_PARAM *param);
1697 int _ma_sort_ft_buf_flush(MARIA_SORT_PARAM *sort_param);
1698 int _ma_thr_write_keys(MARIA_SORT_PARAM *sort_param);
1699 pthread_handler_t _ma_thr_find_all_keys(void *arg);
1700
1701 int _ma_sort_write_record(MARIA_SORT_PARAM *sort_param);
1702 int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages,
1703 size_t);
1704 int _ma_sync_table_files(const MARIA_HA *info);
1705 int _ma_initialize_data_file(MARIA_SHARE *share, File dfile);
1706 int _ma_update_state_lsns(MARIA_SHARE *share,
1707 LSN lsn, TrID create_trid, my_bool do_sync,
1708 my_bool update_create_rename_lsn);
1709 int _ma_update_state_lsns_sub(MARIA_SHARE *share, LSN lsn,
1710 TrID create_trid, my_bool do_sync,
1711 my_bool update_create_rename_lsn);
1712 void _ma_set_data_pagecache_callbacks(PAGECACHE_FILE *file,
1713 MARIA_SHARE *share);
1714 void _ma_set_index_pagecache_callbacks(PAGECACHE_FILE *file,
1715 MARIA_SHARE *share);
1716 void _ma_tmp_disable_logging_for_table(MARIA_HA *info,
1717 my_bool log_incomplete);
1718 my_bool _ma_reenable_logging_for_table(MARIA_HA *info, my_bool flush_pages);
1719 my_bool write_log_record_for_bulk_insert(MARIA_HA *info);
1720 void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn);
1721
1722 #define MARIA_NO_CRC_NORMAL_PAGE 0xffffffff
1723 #define MARIA_NO_CRC_BITMAP_PAGE 0xfffffffe
1724 extern my_bool maria_page_crc_set_index(PAGECACHE_IO_HOOK_ARGS *args);
1725 extern my_bool maria_page_crc_set_normal(PAGECACHE_IO_HOOK_ARGS *args);
1726 extern my_bool maria_page_crc_check_bitmap(int, PAGECACHE_IO_HOOK_ARGS *args);
1727 extern my_bool maria_page_crc_check_data(int, PAGECACHE_IO_HOOK_ARGS *args);
1728 extern my_bool maria_page_crc_check_index(int, PAGECACHE_IO_HOOK_ARGS *args);
1729 extern my_bool maria_page_crc_check_none(int, PAGECACHE_IO_HOOK_ARGS *args);
1730 extern my_bool maria_page_crc_check(uchar *page, pgcache_page_no_t page_no,
1731 MARIA_SHARE *share, uint32 no_crc_val,
1732 int data_length);
1733 extern my_bool maria_page_filler_set_bitmap(PAGECACHE_IO_HOOK_ARGS *args);
1734 extern my_bool maria_page_filler_set_normal(PAGECACHE_IO_HOOK_ARGS *args);
1735 extern my_bool maria_page_filler_set_none(PAGECACHE_IO_HOOK_ARGS *args);
1736 extern void maria_page_write_failure(int error, PAGECACHE_IO_HOOK_ARGS *args);
1737 extern my_bool maria_flush_log_for_page(PAGECACHE_IO_HOOK_ARGS *args);
1738 extern my_bool maria_flush_log_for_page_none(PAGECACHE_IO_HOOK_ARGS *args);
1739
1740 extern PAGECACHE *maria_log_pagecache;
1741 extern void ma_set_index_cond_func(MARIA_HA *info, index_cond_func_t func,
1742 void *func_arg);
1743 check_result_t ma_check_index_cond(MARIA_HA *info, uint keynr, uchar *record);
1744
1745 extern my_bool ma_yield_and_check_if_killed(MARIA_HA *info, int inx);
1746 extern my_bool ma_killed_standalone(MARIA_HA *);
1747
1748 extern uint _ma_file_callback_to_id(void *callback_data);
1749 extern void free_maria_share(MARIA_SHARE *share);
1750
unmap_file(MARIA_HA * info)1751 static inline void unmap_file(MARIA_HA *info __attribute__((unused)))
1752 {
1753 #ifdef HAVE_MMAP
1754 if (info->s->file_map)
1755 _ma_unmap_file(info);
1756 #endif
1757 }
1758
decrement_share_in_trans(MARIA_SHARE * share)1759 static inline void decrement_share_in_trans(MARIA_SHARE *share)
1760 {
1761 /* Internal tables doesn't have transactions */
1762 DBUG_ASSERT(!share->internal_table);
1763 if (!--share->in_trans)
1764 free_maria_share(share);
1765 else
1766 mysql_mutex_unlock(&share->intern_lock);
1767 }
1768 C_MODE_END
1769 #endif
1770