1 /* Copyright (C) 2007 MySQL AB & Sanja Belkin 2 3 This program is free software; you can redistribute it and/or modify 4 it under the terms of the GNU General Public License as published by 5 the Free Software Foundation; version 2 of the License. 6 7 This program is distributed in the hope that it will be useful, 8 but WITHOUT ANY WARRANTY; without even the implied warranty of 9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 GNU General Public License for more details. 11 12 You should have received a copy of the GNU General Public License 13 along with this program; if not, write to the Free Software 14 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ 15 16 #ifndef _ma_loghandler_h 17 #define _ma_loghandler_h 18 19 #define MB (1024UL*1024) 20 21 /* transaction log default cache size (TODO: make it global variable) */ 22 #define TRANSLOG_PAGECACHE_SIZE (2*MB) 23 /* transaction log default file size */ 24 #define TRANSLOG_FILE_SIZE (1024U*MB) 25 /* minimum possible transaction log size */ 26 #define TRANSLOG_MIN_FILE_SIZE (8*MB) 27 /* transaction log default flags (TODO: make it global variable) */ 28 #define TRANSLOG_DEFAULT_FLAGS 0 29 30 /* 31 Transaction log flags. 32 33 We allow all kind protections to be switched on together for people who 34 really unsure in their hardware/OS. 35 */ 36 #define TRANSLOG_PAGE_CRC 1U 37 #define TRANSLOG_SECTOR_PROTECTION (1U<<1) 38 #define TRANSLOG_RECORD_CRC (1U<<2) 39 #define TRANSLOG_FLAGS_NUM ((TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION | \ 40 TRANSLOG_RECORD_CRC) + 1) 41 42 #define RECHEADER_READ_ERROR -1 43 #define RECHEADER_READ_EOF -2 44 45 /* 46 Page size in transaction log 47 It should be Power of 2 and multiple of DISK_DRIVE_SECTOR_SIZE 48 (DISK_DRIVE_SECTOR_SIZE * 2^N) 49 */ 50 #define TRANSLOG_PAGE_SIZE (8U*1024) 51 52 #include "ma_loghandler_lsn.h" 53 #include "trnman_public.h" 54 55 /* short transaction ID type */ 56 typedef uint16 SHORT_TRANSACTION_ID; 57 58 struct st_maria_handler; 59 60 /* Changing one of the "SIZE" below will break backward-compatibility! */ 61 /* Length of CRC at end of pages */ 62 #define ROW_EXTENT_PAGE_SIZE 5 63 #define ROW_EXTENT_COUNT_SIZE 2 64 /* Size of file id in logs */ 65 #define FILEID_STORE_SIZE 2 66 /* Size of page reference in log */ 67 #define PAGE_STORE_SIZE ROW_EXTENT_PAGE_SIZE 68 /* Size of page ranges in log */ 69 #define PAGERANGE_STORE_SIZE ROW_EXTENT_COUNT_SIZE 70 #define DIRPOS_STORE_SIZE 1 71 #define CLR_TYPE_STORE_SIZE 1 72 /* If table has live checksum we store its changes in UNDOs */ 73 #define HA_CHECKSUM_STORE_SIZE 4 74 #define KEY_NR_STORE_SIZE 1 75 #define PAGE_LENGTH_STORE_SIZE 2 76 77 /* Store methods to match the above sizes */ 78 #define fileid_store(T,A) int2store(T,A) 79 #define page_store(T,A) int5store(T,((ulonglong)(A))) 80 #define dirpos_store(T,A) ((*(uchar*) (T)) = A) 81 #define pagerange_store(T,A) int2store(T,A) 82 #define clr_type_store(T,A) ((*(uchar*) (T)) = A) 83 #define key_nr_store(T, A) ((*(uchar*) (T)) = A) 84 #define ha_checksum_store(T,A) int4store(T,A) 85 #define fileid_korr(P) uint2korr(P) 86 #define page_korr(P) uint5korr(P) 87 #define dirpos_korr(P) (*(const uchar *) (P)) 88 #define pagerange_korr(P) uint2korr(P) 89 #define clr_type_korr(P) (*(const uchar *) (P)) 90 #define key_nr_korr(P) (*(const uchar *) (P)) 91 #define ha_checksum_korr(P) uint4korr(P) 92 93 /* 94 Length of disk drive sector size (we assume that writing it 95 to disk is an atomic operation) 96 */ 97 #define DISK_DRIVE_SECTOR_SIZE 512U 98 99 /* position reserved in an array of parts of a log record */ 100 #define TRANSLOG_INTERNAL_PARTS 2 101 102 /* types of records in the transaction log */ 103 /* TODO: Set numbers for these when we have all entries figured out */ 104 105 enum translog_record_type 106 { 107 LOGREC_RESERVED_FOR_CHUNKS23= 0, 108 LOGREC_REDO_INSERT_ROW_HEAD, 109 LOGREC_REDO_INSERT_ROW_TAIL, 110 LOGREC_REDO_NEW_ROW_HEAD, 111 LOGREC_REDO_NEW_ROW_TAIL, 112 LOGREC_REDO_INSERT_ROW_BLOBS, 113 LOGREC_REDO_PURGE_ROW_HEAD, 114 LOGREC_REDO_PURGE_ROW_TAIL, 115 LOGREC_REDO_FREE_BLOCKS, 116 LOGREC_REDO_FREE_HEAD_OR_TAIL, 117 LOGREC_REDO_DELETE_ROW, /* unused */ 118 LOGREC_REDO_UPDATE_ROW_HEAD, /* unused */ 119 LOGREC_REDO_INDEX, 120 LOGREC_REDO_INDEX_NEW_PAGE, 121 LOGREC_REDO_INDEX_FREE_PAGE, 122 LOGREC_REDO_UNDELETE_ROW, 123 LOGREC_CLR_END, 124 LOGREC_PURGE_END, 125 LOGREC_UNDO_ROW_INSERT, 126 LOGREC_UNDO_ROW_DELETE, 127 LOGREC_UNDO_ROW_UPDATE, 128 LOGREC_UNDO_KEY_INSERT, 129 LOGREC_UNDO_KEY_INSERT_WITH_ROOT, 130 LOGREC_UNDO_KEY_DELETE, 131 LOGREC_UNDO_KEY_DELETE_WITH_ROOT, 132 LOGREC_PREPARE, 133 LOGREC_PREPARE_WITH_UNDO_PURGE, 134 LOGREC_COMMIT, 135 LOGREC_COMMIT_WITH_UNDO_PURGE, 136 LOGREC_CHECKPOINT, 137 LOGREC_REDO_CREATE_TABLE, 138 LOGREC_REDO_RENAME_TABLE, 139 LOGREC_REDO_DROP_TABLE, 140 LOGREC_REDO_DELETE_ALL, 141 LOGREC_REDO_REPAIR_TABLE, 142 LOGREC_FILE_ID, 143 LOGREC_LONG_TRANSACTION_ID, 144 LOGREC_INCOMPLETE_LOG, 145 LOGREC_INCOMPLETE_GROUP, 146 LOGREC_UNDO_BULK_INSERT, 147 LOGREC_REDO_BITMAP_NEW_PAGE, 148 LOGREC_IMPORTED_TABLE, 149 LOGREC_DEBUG_INFO, 150 LOGREC_FIRST_FREE, 151 LOGREC_RESERVED_FUTURE_EXTENSION= 63 152 }; 153 #define LOGREC_NUMBER_OF_TYPES 64 /* Maximum, can't be extended */ 154 155 /* Type of operations in LOGREC_REDO_INDEX */ 156 157 enum en_key_op 158 { 159 KEY_OP_NONE, /* Not used */ 160 KEY_OP_OFFSET, /* Set current position */ 161 KEY_OP_SHIFT, /* Shift up/or down at current position */ 162 KEY_OP_CHANGE, /* Change data at current position */ 163 KEY_OP_ADD_PREFIX, /* Insert data at start of page */ 164 KEY_OP_DEL_PREFIX, /* Delete data at start of page */ 165 KEY_OP_ADD_SUFFIX, /* Insert data at end of page */ 166 KEY_OP_DEL_SUFFIX, /* Delete data at end of page */ 167 KEY_OP_CHECK, /* For debugging; CRC of used part of page */ 168 KEY_OP_MULTI_COPY, /* List of memcpy()s with fixed-len sources in page */ 169 KEY_OP_SET_PAGEFLAG, /* Set pageflag from next byte */ 170 KEY_OP_COMPACT_PAGE, /* Compact key page */ 171 KEY_OP_MAX_PAGELENGTH, /* Set page to max page length */ 172 KEY_OP_DEBUG, /* Entry for storing what triggered redo_index */ 173 KEY_OP_DEBUG_2 /* Entry for pagelengths */ 174 }; 175 176 enum en_key_debug 177 { 178 KEY_OP_DEBUG_RTREE_COMBINE, /* 0 */ 179 KEY_OP_DEBUG_RTREE_SPLIT, /* 1 */ 180 KEY_OP_DEBUG_RTREE_SET_KEY, /* 2 */ 181 KEY_OP_DEBUG_FATHER_CHANGED_1, /* 3 */ 182 KEY_OP_DEBUG_FATHER_CHANGED_2, /* 4 */ 183 KEY_OP_DEBUG_LOG_SPLIT, /* 5 */ 184 KEY_OP_DEBUG_LOG_ADD_1, /* 6 */ 185 KEY_OP_DEBUG_LOG_ADD_2, /* 7 */ 186 KEY_OP_DEBUG_LOG_ADD_3, /* 8 */ 187 KEY_OP_DEBUG_LOG_ADD_4, /* 9 */ 188 KEY_OP_DEBUG_LOG_PREFIX_1, /* 10 */ 189 KEY_OP_DEBUG_LOG_PREFIX_2, /* 11 */ 190 KEY_OP_DEBUG_LOG_PREFIX_3, /* 12 */ 191 KEY_OP_DEBUG_LOG_PREFIX_4, /* 13 */ 192 KEY_OP_DEBUG_LOG_PREFIX_5, /* 14 */ 193 KEY_OP_DEBUG_LOG_DEL_CHANGE_1, /* 15 */ 194 KEY_OP_DEBUG_LOG_DEL_CHANGE_2, /* 16 */ 195 KEY_OP_DEBUG_LOG_DEL_CHANGE_3, /* 17 */ 196 KEY_OP_DEBUG_LOG_DEL_CHANGE_RT, /* 18 */ 197 KEY_OP_DEBUG_LOG_DEL_PREFIX, /* 19 */ 198 KEY_OP_DEBUG_LOG_MIDDLE /* 20 */ 199 }; 200 201 202 enum translog_debug_info_type 203 { 204 LOGREC_DEBUG_INFO_QUERY 205 }; 206 207 /* Size of log file; One log file is restricted to 4G */ 208 typedef uint32 translog_size_t; 209 210 #define TRANSLOG_RECORD_HEADER_MAX_SIZE 1024U 211 212 typedef struct st_translog_group_descriptor 213 { 214 TRANSLOG_ADDRESS addr; 215 uint8 num; 216 } TRANSLOG_GROUP; 217 218 219 typedef struct st_translog_header_buffer 220 { 221 /* LSN of the read record */ 222 LSN lsn; 223 /* array of groups descriptors, can be used only if groups_no > 0 */ 224 TRANSLOG_GROUP *groups; 225 /* short transaction ID or 0 if it has no sense for the record */ 226 SHORT_TRANSACTION_ID short_trid; 227 /* 228 The Record length in buffer (including read header, but excluding 229 hidden part of record (type, short TrID, length) 230 */ 231 translog_size_t record_length; 232 /* 233 Buffer for write decoded header of the record (depend on the record 234 type) 235 */ 236 uchar header[TRANSLOG_RECORD_HEADER_MAX_SIZE]; 237 /* number of groups listed in */ 238 uint groups_no; 239 /* in multi-group number of chunk0 pages (valid only if groups_no > 0) */ 240 uint chunk0_pages; 241 /* type of the read record */ 242 enum translog_record_type type; 243 /* chunk 0 data address (valid only if groups_no > 0) */ 244 TRANSLOG_ADDRESS chunk0_data_addr; 245 /* 246 Real compressed LSN(s) size economy (<number of LSN(s)>*7 - <real_size>) 247 */ 248 int16 compressed_LSN_economy; 249 /* short transaction ID or 0 if it has no sense for the record */ 250 uint16 non_header_data_start_offset; 251 /* non read body data length in this first chunk */ 252 uint16 non_header_data_len; 253 /* chunk 0 data size (valid only if groups_no > 0) */ 254 uint16 chunk0_data_len; 255 } TRANSLOG_HEADER_BUFFER; 256 257 258 typedef struct st_translog_scanner_data 259 { 260 uchar buffer[TRANSLOG_PAGE_SIZE]; /* buffer for page content */ 261 TRANSLOG_ADDRESS page_addr; /* current page address */ 262 /* end of the log which we saw last time */ 263 TRANSLOG_ADDRESS horizon; 264 TRANSLOG_ADDRESS last_file_page; /* Last page on in this file */ 265 uchar *page; /* page content pointer */ 266 /* direct link on the current page or NULL if not supported/requested */ 267 PAGECACHE_BLOCK_LINK *direct_link; 268 /* offset of the chunk in the page */ 269 translog_size_t page_offset; 270 /* set horizon only once at init */ 271 my_bool fixed_horizon; 272 /* try to get direct link on the page if it is possible */ 273 my_bool use_direct_link; 274 } TRANSLOG_SCANNER_DATA; 275 276 277 typedef struct st_translog_reader_data 278 { 279 TRANSLOG_HEADER_BUFFER header; /* Header */ 280 TRANSLOG_SCANNER_DATA scanner; /* chunks scanner */ 281 translog_size_t body_offset; /* current chunk body offset */ 282 /* data offset from the record beginning */ 283 translog_size_t current_offset; 284 /* number of bytes read in header */ 285 uint16 read_header; 286 uint16 chunk_size; /* current chunk size */ 287 uint current_group; /* current group */ 288 uint current_chunk; /* current chunk in the group */ 289 my_bool eor; /* end of the record */ 290 } TRANSLOG_READER_DATA; 291 292 C_MODE_START 293 294 /* Records types for unittests */ 295 #define LOGREC_FIXED_RECORD_0LSN_EXAMPLE 1 296 #define LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE 2 297 #define LOGREC_FIXED_RECORD_1LSN_EXAMPLE 3 298 #define LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE 4 299 #define LOGREC_FIXED_RECORD_2LSN_EXAMPLE 5 300 #define LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE 6 301 302 extern void translog_example_table_init(); 303 extern void translog_table_init(); 304 #define translog_init(D,M,V,I,C,F,R) \ 305 translog_init_with_table(D,M,V,I,C,F,R,&translog_table_init,0) 306 extern my_bool translog_init_with_table(const char *directory, 307 uint32 log_file_max_size, 308 uint32 server_version, 309 uint32 server_id, 310 PAGECACHE *pagecache, 311 uint flags, 312 my_bool readonly, 313 void (*init_table_func)(), 314 my_bool no_error); 315 #ifndef DBUG_OFF 316 void check_translog_description_table(int num); 317 #endif 318 319 extern my_bool 320 translog_write_record(LSN *lsn, enum translog_record_type type, TRN *trn, 321 MARIA_HA *tbl_info, 322 translog_size_t rec_len, uint part_no, 323 LEX_CUSTRING *parts_data, uchar *store_share_id, 324 void *hook_arg); 325 326 extern void translog_destroy(); 327 328 extern int translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff); 329 330 extern void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff); 331 332 extern translog_size_t translog_read_record(LSN lsn, 333 translog_size_t offset, 334 translog_size_t length, 335 uchar *buffer, 336 struct st_translog_reader_data 337 *data); 338 339 extern my_bool translog_flush(TRANSLOG_ADDRESS lsn); 340 341 extern my_bool translog_scanner_init(LSN lsn, 342 my_bool fixed_horizon, 343 struct st_translog_scanner_data *scanner, 344 my_bool use_direct_link); 345 extern void translog_destroy_scanner(TRANSLOG_SCANNER_DATA *scanner); 346 347 extern int translog_read_next_record_header(TRANSLOG_SCANNER_DATA *scanner, 348 TRANSLOG_HEADER_BUFFER *buff); 349 extern LSN translog_get_file_max_lsn_stored(uint32 file); 350 extern my_bool translog_purge(TRANSLOG_ADDRESS low); 351 extern my_bool translog_is_file(uint file_no); 352 extern void translog_lock(); 353 extern void translog_unlock(); 354 extern void translog_lock_handler_assert_owner(); 355 extern TRANSLOG_ADDRESS translog_get_horizon(); 356 extern TRANSLOG_ADDRESS translog_get_horizon_no_lock(); 357 extern int translog_assign_id_to_share(struct st_maria_handler *tbl_info, 358 TRN *trn); 359 extern void translog_deassign_id_from_share(struct st_maria_share *share); 360 extern void 361 translog_assign_id_to_share_from_recovery(struct st_maria_share *share, 362 uint16 id); 363 extern my_bool translog_walk_filenames(const char *directory, 364 my_bool (*callback)(const char *, 365 const char *)); 366 extern void dump_page(uchar *buffer, File handler); 367 extern my_bool translog_log_debug_info(TRN *trn, 368 enum translog_debug_info_type type, 369 uchar *info, size_t length); 370 extern void translog_disable_purge(void); 371 extern void translog_enable_purge(void); 372 373 enum enum_translog_status 374 { 375 TRANSLOG_UNINITED, /* no initialization done or error during initialization */ 376 TRANSLOG_OK, /* transaction log is functioning */ 377 TRANSLOG_READONLY, /* read only mode due to write errors */ 378 TRANSLOG_SHUTDOWN /* going to shutdown the loghandler */ 379 }; 380 extern enum enum_translog_status translog_status; 381 extern ulonglong translog_syncs; /* Number of sync()s */ 382 383 void translog_soft_sync(my_bool mode); 384 void translog_hard_group_commit(my_bool mode); 385 int translog_soft_sync_start(void); 386 void translog_soft_sync_end(void); 387 void translog_sync(); 388 void translog_set_group_commit_interval(uint32 interval); 389 extern void check_skipped_lsn(MARIA_HA *info, LSN lsn, my_bool index_file, 390 pgcache_page_no_t page); 391 392 /* 393 all the rest added because of recovery; should we make 394 ma_loghandler_for_recovery.h ? 395 */ 396 397 /* 398 Information from transaction log file header 399 */ 400 401 typedef struct st_loghandler_file_info 402 { 403 /* 404 LSN_IMPOSSIBLE for current file (not finished file). 405 Maximum LSN of the record which parts stored in the 406 file. 407 */ 408 LSN max_lsn; 409 ulonglong timestamp; /* Time stamp */ 410 ulong maria_version; /* Version of maria loghandler */ 411 ulong mysql_version; /* Version of mysql server */ 412 ulong server_id; /* Server ID */ 413 ulong page_size; /* Loghandler page size */ 414 ulong file_number; /* Number of the file (from the file header) */ 415 } LOGHANDLER_FILE_INFO; 416 417 #define SHARE_ID_MAX 65535 /* array's size */ 418 419 extern void translog_fill_overhead_table(); 420 extern void translog_interpret_file_header(LOGHANDLER_FILE_INFO *desc, 421 uchar *page_buff); 422 extern LSN translog_first_lsn_in_log(); 423 extern LSN translog_first_theoretical_lsn(); 424 extern LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon); 425 extern my_bool translog_purge_at_flush(); 426 extern uint32 translog_get_first_file(TRANSLOG_ADDRESS horizon); 427 extern uint32 translog_get_first_needed_file(); 428 extern char *translog_filename_by_fileno(uint32 file_no, char *path); 429 extern void translog_set_file_size(uint32 size); 430 431 /* record parts descriptor */ 432 struct st_translog_parts 433 { 434 /* full record length */ 435 translog_size_t record_length; 436 /* full record length with chunk headers */ 437 translog_size_t total_record_length; 438 /* current part index */ 439 uint current; 440 /* total number of elements in parts */ 441 uint elements; 442 /* array of parts */ 443 LEX_CUSTRING *parts; 444 }; 445 446 typedef my_bool(*prewrite_rec_hook) (enum translog_record_type type, 447 TRN *trn, 448 struct st_maria_handler *tbl_info, 449 void *hook_arg); 450 451 typedef my_bool(*inwrite_rec_hook) (enum translog_record_type type, 452 TRN *trn, 453 struct st_maria_handler *tbl_info, 454 LSN *lsn, void *hook_arg); 455 456 typedef uint16(*read_rec_hook) (enum translog_record_type type, 457 uint16 read_length, uchar *read_buff, 458 uchar *decoded_buff); 459 460 461 /* record classes */ 462 enum record_class 463 { 464 LOGRECTYPE_NOT_ALLOWED, 465 LOGRECTYPE_VARIABLE_LENGTH, 466 LOGRECTYPE_PSEUDOFIXEDLENGTH, 467 LOGRECTYPE_FIXEDLENGTH 468 }; 469 470 enum enum_record_in_group { 471 LOGREC_NOT_LAST_IN_GROUP= 0, LOGREC_LAST_IN_GROUP, LOGREC_IS_GROUP_ITSELF 472 }; 473 474 /* 475 Descriptor of log record type 476 */ 477 typedef struct st_log_record_type_descriptor 478 { 479 /* internal class of the record */ 480 enum record_class rclass; 481 /* 482 length for fixed-size record, pseudo-fixed record 483 length with uncompressed LSNs 484 */ 485 uint16 fixed_length; 486 /* how much record body (belonged to headers too) read with headers */ 487 uint16 read_header_len; 488 /* HOOK for writing the record called before lock */ 489 prewrite_rec_hook prewrite_hook; 490 /* HOOK for writing the record called when LSN is known, inside lock */ 491 inwrite_rec_hook inwrite_hook; 492 /* HOOK for reading headers */ 493 read_rec_hook read_hook; 494 /* 495 For pseudo fixed records number of compressed LSNs followed by 496 system header 497 */ 498 int16 compressed_LSN; 499 /* the rest is for maria_read_log & Recovery */ 500 /** @brief for debug error messages or "maria_read_log" command-line tool */ 501 const char *name; 502 enum enum_record_in_group record_in_group; 503 /* a function to execute when we see the record during the REDO phase */ 504 int (*record_execute_in_redo_phase)(const TRANSLOG_HEADER_BUFFER *); 505 /* a function to execute when we see the record during the UNDO phase */ 506 int (*record_execute_in_undo_phase)(const TRANSLOG_HEADER_BUFFER *, TRN *); 507 } LOG_DESC; 508 509 extern LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES]; 510 511 typedef enum 512 { 513 TRANSLOG_GCOMMIT_NONE, 514 TRANSLOG_GCOMMIT_HARD, 515 TRANSLOG_GCOMMIT_SOFT 516 } enum_maria_group_commit; 517 extern ulong maria_group_commit; 518 extern ulong maria_group_commit_interval; 519 typedef enum 520 { 521 TRANSLOG_PURGE_IMMIDIATE, 522 TRANSLOG_PURGE_EXTERNAL, 523 TRANSLOG_PURGE_ONDEMAND 524 } enum_maria_translog_purge_type; 525 extern ulong log_purge_type; 526 extern ulong log_file_size; 527 extern uint log_purge_disabled; /* For backup */ 528 529 typedef enum 530 { 531 TRANSLOG_SYNC_DIR_NEVER, 532 TRANSLOG_SYNC_DIR_NEWFILE, 533 TRANSLOG_SYNC_DIR_ALWAYS 534 } enum_maria_sync_log_dir; 535 extern ulong sync_log_dir; 536 537 C_MODE_END 538 #endif 539