1 /* Copyright (C) 2007 MySQL AB & Sanja Belkin
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation; version 2 of the License.
6 
7    This program is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10    GNU General Public License for more details.
11 
12    You should have received a copy of the GNU General Public License
13    along with this program; if not, write to the Free Software
14    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
15 
16 #ifndef _ma_loghandler_h
17 #define _ma_loghandler_h
18 
19 #define MB (1024UL*1024)
20 
21 /* transaction log default cache size  (TODO: make it global variable) */
22 #define TRANSLOG_PAGECACHE_SIZE (2*MB)
23 /* transaction log default file size */
24 #define TRANSLOG_FILE_SIZE (1024U*MB)
25 /* minimum possible transaction log size */
26 #define TRANSLOG_MIN_FILE_SIZE (8*MB)
27 /* transaction log default flags (TODO: make it global variable) */
28 #define TRANSLOG_DEFAULT_FLAGS 0
29 
30 /*
31   Transaction log flags.
32 
33   We allow all kind protections to be switched on together for people who
34   really unsure in their hardware/OS.
35 */
36 #define TRANSLOG_PAGE_CRC              1U
37 #define TRANSLOG_SECTOR_PROTECTION     (1U<<1)
38 #define TRANSLOG_RECORD_CRC            (1U<<2)
39 #define TRANSLOG_FLAGS_NUM ((TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION | \
40                            TRANSLOG_RECORD_CRC) + 1)
41 
42 #define RECHEADER_READ_ERROR -1
43 #define RECHEADER_READ_EOF   -2
44 
45 /*
46   Page size in transaction log
47   It should be Power of 2 and multiple of DISK_DRIVE_SECTOR_SIZE
48   (DISK_DRIVE_SECTOR_SIZE * 2^N)
49 */
50 #define TRANSLOG_PAGE_SIZE (8U*1024)
51 
52 #include "ma_loghandler_lsn.h"
53 #include "trnman_public.h"
54 
55 /* short transaction ID type */
56 typedef uint16 SHORT_TRANSACTION_ID;
57 
58 struct st_maria_handler;
59 
60 /* Changing one of the "SIZE" below will break backward-compatibility! */
61 /* Length of CRC at end of pages */
62 #define ROW_EXTENT_PAGE_SIZE	5
63 #define ROW_EXTENT_COUNT_SIZE   2
64 /* Size of file id in logs */
65 #define FILEID_STORE_SIZE 2
66 /* Size of page reference in log */
67 #define PAGE_STORE_SIZE ROW_EXTENT_PAGE_SIZE
68 /* Size of page ranges in log */
69 #define PAGERANGE_STORE_SIZE ROW_EXTENT_COUNT_SIZE
70 #define DIRPOS_STORE_SIZE 1
71 #define CLR_TYPE_STORE_SIZE 1
72 /* If table has live checksum we store its changes in UNDOs */
73 #define HA_CHECKSUM_STORE_SIZE 4
74 #define KEY_NR_STORE_SIZE 1
75 #define PAGE_LENGTH_STORE_SIZE 2
76 
77 /* Store methods to match the above sizes */
78 #define fileid_store(T,A) int2store(T,A)
79 #define page_store(T,A)   int5store(T,((ulonglong)(A)))
80 #define dirpos_store(T,A) ((*(uchar*) (T)) = A)
81 #define pagerange_store(T,A) int2store(T,A)
82 #define clr_type_store(T,A) ((*(uchar*) (T)) = A)
83 #define key_nr_store(T, A) ((*(uchar*) (T)) = A)
84 #define ha_checksum_store(T,A) int4store(T,A)
85 #define fileid_korr(P) uint2korr(P)
86 #define page_korr(P)   uint5korr(P)
87 #define dirpos_korr(P) (*(const uchar *) (P))
88 #define pagerange_korr(P) uint2korr(P)
89 #define clr_type_korr(P) (*(const uchar *) (P))
90 #define key_nr_korr(P) (*(const uchar *) (P))
91 #define ha_checksum_korr(P) uint4korr(P)
92 
93 /*
94   Length of disk drive sector size (we assume that writing it
95   to disk is an atomic operation)
96 */
97 #define DISK_DRIVE_SECTOR_SIZE 512U
98 
99 /* position reserved in an array of parts of a log record */
100 #define TRANSLOG_INTERNAL_PARTS 2
101 
102 /* types of records in the transaction log */
103 /* TODO: Set numbers for these when we have all entries figured out */
104 
105 enum translog_record_type
106 {
107   LOGREC_RESERVED_FOR_CHUNKS23= 0,
108   LOGREC_REDO_INSERT_ROW_HEAD,
109   LOGREC_REDO_INSERT_ROW_TAIL,
110   LOGREC_REDO_NEW_ROW_HEAD,
111   LOGREC_REDO_NEW_ROW_TAIL,
112   LOGREC_REDO_INSERT_ROW_BLOBS,
113   LOGREC_REDO_PURGE_ROW_HEAD,
114   LOGREC_REDO_PURGE_ROW_TAIL,
115   LOGREC_REDO_FREE_BLOCKS,
116   LOGREC_REDO_FREE_HEAD_OR_TAIL,
117   LOGREC_REDO_DELETE_ROW, /* unused */
118   LOGREC_REDO_UPDATE_ROW_HEAD, /* unused */
119   LOGREC_REDO_INDEX,
120   LOGREC_REDO_INDEX_NEW_PAGE,
121   LOGREC_REDO_INDEX_FREE_PAGE,
122   LOGREC_REDO_UNDELETE_ROW,
123   LOGREC_CLR_END,
124   LOGREC_PURGE_END,
125   LOGREC_UNDO_ROW_INSERT,
126   LOGREC_UNDO_ROW_DELETE,
127   LOGREC_UNDO_ROW_UPDATE,
128   LOGREC_UNDO_KEY_INSERT,
129   LOGREC_UNDO_KEY_INSERT_WITH_ROOT,
130   LOGREC_UNDO_KEY_DELETE,
131   LOGREC_UNDO_KEY_DELETE_WITH_ROOT,
132   LOGREC_PREPARE,
133   LOGREC_PREPARE_WITH_UNDO_PURGE,
134   LOGREC_COMMIT,
135   LOGREC_COMMIT_WITH_UNDO_PURGE,
136   LOGREC_CHECKPOINT,
137   LOGREC_REDO_CREATE_TABLE,
138   LOGREC_REDO_RENAME_TABLE,
139   LOGREC_REDO_DROP_TABLE,
140   LOGREC_REDO_DELETE_ALL,
141   LOGREC_REDO_REPAIR_TABLE,
142   LOGREC_FILE_ID,
143   LOGREC_LONG_TRANSACTION_ID,
144   LOGREC_INCOMPLETE_LOG,
145   LOGREC_INCOMPLETE_GROUP,
146   LOGREC_UNDO_BULK_INSERT,
147   LOGREC_REDO_BITMAP_NEW_PAGE,
148   LOGREC_IMPORTED_TABLE,
149   LOGREC_DEBUG_INFO,
150   LOGREC_FIRST_FREE,
151   LOGREC_RESERVED_FUTURE_EXTENSION= 63
152 };
153 #define LOGREC_NUMBER_OF_TYPES 64              /* Maximum, can't be extended */
154 
155 /* Type of operations in LOGREC_REDO_INDEX */
156 
157 enum en_key_op
158 {
159   KEY_OP_NONE,		/* Not used */
160   KEY_OP_OFFSET,	/* Set current position */
161   KEY_OP_SHIFT,		/* Shift up/or down at current position */
162   KEY_OP_CHANGE,	/* Change data at current position */
163   KEY_OP_ADD_PREFIX,    /* Insert data at start of page */
164   KEY_OP_DEL_PREFIX,	/* Delete data at start of page */
165   KEY_OP_ADD_SUFFIX,    /* Insert data at end of page */
166   KEY_OP_DEL_SUFFIX,    /* Delete data at end of page */
167   KEY_OP_CHECK,         /* For debugging; CRC of used part of page */
168   KEY_OP_MULTI_COPY,    /* List of memcpy()s with fixed-len sources in page */
169   KEY_OP_SET_PAGEFLAG,  /* Set pageflag from next byte */
170   KEY_OP_COMPACT_PAGE,	/* Compact key page */
171   KEY_OP_MAX_PAGELENGTH, /* Set page to max page length */
172   KEY_OP_DEBUG,		/* Entry for storing what triggered redo_index */
173   KEY_OP_DEBUG_2	/* Entry for pagelengths */
174 };
175 
176 enum en_key_debug
177 {
178   KEY_OP_DEBUG_RTREE_COMBINE, 		/* 0 */
179   KEY_OP_DEBUG_RTREE_SPLIT,		/* 1 */
180   KEY_OP_DEBUG_RTREE_SET_KEY,		/* 2 */
181   KEY_OP_DEBUG_FATHER_CHANGED_1,	/* 3 */
182   KEY_OP_DEBUG_FATHER_CHANGED_2,	/* 4 */
183   KEY_OP_DEBUG_LOG_SPLIT,		/* 5 */
184   KEY_OP_DEBUG_LOG_ADD_1,		/* 6 */
185   KEY_OP_DEBUG_LOG_ADD_2,		/* 7 */
186   KEY_OP_DEBUG_LOG_ADD_3,		/* 8 */
187   KEY_OP_DEBUG_LOG_ADD_4,		/* 9 */
188   KEY_OP_DEBUG_LOG_PREFIX_1,		/* 10 */
189   KEY_OP_DEBUG_LOG_PREFIX_2,		/* 11 */
190   KEY_OP_DEBUG_LOG_PREFIX_3,		/* 12 */
191   KEY_OP_DEBUG_LOG_PREFIX_4,		/* 13 */
192   KEY_OP_DEBUG_LOG_PREFIX_5,		/* 14 */
193   KEY_OP_DEBUG_LOG_DEL_CHANGE_1,	/* 15 */
194   KEY_OP_DEBUG_LOG_DEL_CHANGE_2,	/* 16 */
195   KEY_OP_DEBUG_LOG_DEL_CHANGE_3,	/* 17 */
196   KEY_OP_DEBUG_LOG_DEL_CHANGE_RT,	/* 18 */
197   KEY_OP_DEBUG_LOG_DEL_PREFIX,		/* 19 */
198   KEY_OP_DEBUG_LOG_MIDDLE		/* 20 */
199 };
200 
201 
202 enum translog_debug_info_type
203 {
204   LOGREC_DEBUG_INFO_QUERY
205 };
206 
207 /* Size of log file; One log file is restricted to 4G */
208 typedef uint32 translog_size_t;
209 
210 #define TRANSLOG_RECORD_HEADER_MAX_SIZE 1024U
211 
212 typedef struct st_translog_group_descriptor
213 {
214   TRANSLOG_ADDRESS addr;
215   uint8 num;
216 } TRANSLOG_GROUP;
217 
218 
219 typedef struct st_translog_header_buffer
220 {
221   /* LSN of the read record */
222   LSN lsn;
223   /* array of groups descriptors, can be used only if groups_no > 0 */
224   TRANSLOG_GROUP *groups;
225   /* short transaction ID or 0 if it has no sense for the record */
226   SHORT_TRANSACTION_ID short_trid;
227   /*
228      The Record length in buffer (including read header, but excluding
229      hidden part of record (type, short TrID, length)
230   */
231   translog_size_t record_length;
232   /*
233      Buffer for write decoded header of the record (depend on the record
234      type)
235   */
236   uchar header[TRANSLOG_RECORD_HEADER_MAX_SIZE];
237   /* number of groups listed in  */
238   uint groups_no;
239   /* in multi-group number of chunk0 pages (valid only if groups_no > 0) */
240   uint chunk0_pages;
241   /* type of the read record */
242   enum translog_record_type type;
243   /* chunk 0 data address (valid only if groups_no > 0) */
244   TRANSLOG_ADDRESS chunk0_data_addr;
245    /*
246      Real compressed LSN(s) size economy (<number of LSN(s)>*7 - <real_size>)
247   */
248   int16 compressed_LSN_economy;
249   /* short transaction ID or 0 if it has no sense for the record */
250   uint16 non_header_data_start_offset;
251   /* non read body data length in this first chunk */
252   uint16 non_header_data_len;
253   /* chunk 0 data size (valid only if groups_no > 0) */
254   uint16 chunk0_data_len;
255 } TRANSLOG_HEADER_BUFFER;
256 
257 
258 typedef struct st_translog_scanner_data
259 {
260   uchar buffer[TRANSLOG_PAGE_SIZE];             /* buffer for page content */
261   TRANSLOG_ADDRESS page_addr;                  /* current page address */
262   /* end of the log which we saw last time */
263   TRANSLOG_ADDRESS horizon;
264   TRANSLOG_ADDRESS last_file_page;             /* Last page on in this file */
265   uchar *page;                                  /* page content pointer */
266   /* direct link on the current page or NULL if not supported/requested */
267   PAGECACHE_BLOCK_LINK *direct_link;
268   /* offset of the chunk in the page */
269   translog_size_t page_offset;
270   /* set horizon only once at init */
271   my_bool fixed_horizon;
272   /* try to get direct link on the page if it is possible */
273   my_bool use_direct_link;
274 } TRANSLOG_SCANNER_DATA;
275 
276 
277 typedef struct st_translog_reader_data
278 {
279   TRANSLOG_HEADER_BUFFER header;                /* Header */
280   TRANSLOG_SCANNER_DATA scanner;                /* chunks scanner */
281   translog_size_t body_offset;                  /* current chunk body offset */
282   /* data offset from the record beginning */
283   translog_size_t current_offset;
284   /* number of bytes read in header */
285   uint16 read_header;
286   uint16 chunk_size;                            /* current chunk size */
287   uint current_group;                           /* current group */
288   uint current_chunk;                           /* current chunk in the group */
289   my_bool eor;                                  /* end of the record */
290 } TRANSLOG_READER_DATA;
291 
292 C_MODE_START
293 
294 /* Records types for unittests */
295 #define LOGREC_FIXED_RECORD_0LSN_EXAMPLE 1
296 #define LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE 2
297 #define LOGREC_FIXED_RECORD_1LSN_EXAMPLE 3
298 #define LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE 4
299 #define LOGREC_FIXED_RECORD_2LSN_EXAMPLE 5
300 #define LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE 6
301 
302 extern void translog_example_table_init();
303 extern void translog_table_init();
304 #define translog_init(D,M,V,I,C,F,R) \
305   translog_init_with_table(D,M,V,I,C,F,R,&translog_table_init,0)
306 extern my_bool translog_init_with_table(const char *directory,
307                                         uint32 log_file_max_size,
308                                         uint32 server_version,
309                                         uint32 server_id,
310                                         PAGECACHE *pagecache,
311                                         uint flags,
312                                         my_bool readonly,
313                                         void (*init_table_func)(),
314                                         my_bool no_error);
315 #ifndef DBUG_OFF
316 void check_translog_description_table(int num);
317 #endif
318 
319 extern my_bool
320 translog_write_record(LSN *lsn, enum translog_record_type type, TRN *trn,
321                       MARIA_HA *tbl_info,
322                       translog_size_t rec_len, uint part_no,
323                       LEX_CUSTRING *parts_data, uchar *store_share_id,
324                       void *hook_arg);
325 
326 extern void translog_destroy();
327 
328 extern int translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff);
329 
330 extern void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff);
331 
332 extern translog_size_t translog_read_record(LSN lsn,
333 					    translog_size_t offset,
334 					    translog_size_t length,
335 					    uchar *buffer,
336 					    struct st_translog_reader_data
337 					    *data);
338 
339 extern my_bool translog_flush(TRANSLOG_ADDRESS lsn);
340 
341 extern my_bool translog_scanner_init(LSN lsn,
342 				     my_bool fixed_horizon,
343 				     struct st_translog_scanner_data *scanner,
344                                      my_bool use_direct_link);
345 extern void translog_destroy_scanner(TRANSLOG_SCANNER_DATA *scanner);
346 
347 extern int translog_read_next_record_header(TRANSLOG_SCANNER_DATA *scanner,
348                                             TRANSLOG_HEADER_BUFFER *buff);
349 extern LSN translog_get_file_max_lsn_stored(uint32 file);
350 extern my_bool translog_purge(TRANSLOG_ADDRESS low);
351 extern my_bool translog_is_file(uint file_no);
352 extern void translog_lock();
353 extern void translog_unlock();
354 extern void translog_lock_handler_assert_owner();
355 extern TRANSLOG_ADDRESS translog_get_horizon();
356 extern TRANSLOG_ADDRESS translog_get_horizon_no_lock();
357 extern int translog_assign_id_to_share(struct st_maria_handler *tbl_info,
358                                        TRN *trn);
359 extern void translog_deassign_id_from_share(struct st_maria_share *share);
360 extern void
361 translog_assign_id_to_share_from_recovery(struct st_maria_share *share,
362                                           uint16 id);
363 extern my_bool translog_walk_filenames(const char *directory,
364                                        my_bool (*callback)(const char *,
365                                                            const char *));
366 extern void dump_page(uchar *buffer, File handler);
367 extern my_bool translog_log_debug_info(TRN *trn,
368                                        enum translog_debug_info_type type,
369                                        uchar *info, size_t length);
370 extern void translog_disable_purge(void);
371 extern void translog_enable_purge(void);
372 
373 enum enum_translog_status
374 {
375   TRANSLOG_UNINITED, /* no initialization done or error during initialization */
376   TRANSLOG_OK,       /* transaction log is functioning */
377   TRANSLOG_READONLY, /* read only mode due to write errors */
378   TRANSLOG_SHUTDOWN  /* going to shutdown the loghandler */
379 };
380 extern enum enum_translog_status translog_status;
381 extern ulonglong translog_syncs; /* Number of sync()s */
382 
383 void translog_soft_sync(my_bool mode);
384 void translog_hard_group_commit(my_bool mode);
385 int translog_soft_sync_start(void);
386 void  translog_soft_sync_end(void);
387 void translog_sync();
388 void translog_set_group_commit_interval(uint32 interval);
389 extern void check_skipped_lsn(MARIA_HA *info, LSN lsn, my_bool index_file,
390                               pgcache_page_no_t page);
391 
392 /*
393   all the rest added because of recovery; should we make
394   ma_loghandler_for_recovery.h ?
395 */
396 
397 /*
398   Information from transaction log file header
399 */
400 
401 typedef struct st_loghandler_file_info
402 {
403   /*
404     LSN_IMPOSSIBLE for current file (not finished file).
405     Maximum LSN of the record which parts stored in the
406     file.
407   */
408   LSN max_lsn;
409   ulonglong timestamp;   /* Time stamp */
410   ulong maria_version;   /* Version of maria loghandler */
411   ulong mysql_version;   /* Version of mysql server */
412   ulong server_id;       /* Server ID */
413   ulong page_size;       /* Loghandler page size */
414   ulong file_number;     /* Number of the file (from the file header) */
415 } LOGHANDLER_FILE_INFO;
416 
417 #define SHARE_ID_MAX 65535 /* array's size */
418 
419 extern void translog_fill_overhead_table();
420 extern void translog_interpret_file_header(LOGHANDLER_FILE_INFO *desc,
421                                            uchar *page_buff);
422 extern LSN translog_first_lsn_in_log();
423 extern LSN translog_first_theoretical_lsn();
424 extern LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon);
425 extern my_bool translog_purge_at_flush();
426 extern uint32 translog_get_first_file(TRANSLOG_ADDRESS horizon);
427 extern uint32 translog_get_first_needed_file();
428 extern char *translog_filename_by_fileno(uint32 file_no, char *path);
429 extern void translog_set_file_size(uint32 size);
430 
431 /* record parts descriptor */
432 struct st_translog_parts
433 {
434   /* full record length */
435   translog_size_t record_length;
436   /* full record length with chunk headers */
437   translog_size_t total_record_length;
438   /* current part index */
439   uint current;
440   /* total number of elements in parts */
441   uint elements;
442   /* array of parts */
443   LEX_CUSTRING *parts;
444 };
445 
446 typedef my_bool(*prewrite_rec_hook) (enum translog_record_type type,
447                                      TRN *trn,
448                                      struct st_maria_handler *tbl_info,
449                                      void *hook_arg);
450 
451 typedef my_bool(*inwrite_rec_hook) (enum translog_record_type type,
452                                     TRN *trn,
453                                     struct st_maria_handler *tbl_info,
454                                     LSN *lsn, void *hook_arg);
455 
456 typedef uint16(*read_rec_hook) (enum translog_record_type type,
457                                 uint16 read_length, uchar *read_buff,
458                                 uchar *decoded_buff);
459 
460 
461 /* record classes */
462 enum record_class
463 {
464   LOGRECTYPE_NOT_ALLOWED,
465   LOGRECTYPE_VARIABLE_LENGTH,
466   LOGRECTYPE_PSEUDOFIXEDLENGTH,
467   LOGRECTYPE_FIXEDLENGTH
468 };
469 
470 enum enum_record_in_group {
471   LOGREC_NOT_LAST_IN_GROUP= 0, LOGREC_LAST_IN_GROUP, LOGREC_IS_GROUP_ITSELF
472 };
473 
474 /*
475   Descriptor of log record type
476 */
477 typedef struct st_log_record_type_descriptor
478 {
479   /* internal class of the record */
480   enum record_class rclass;
481   /*
482     length for fixed-size record, pseudo-fixed record
483     length with uncompressed LSNs
484   */
485   uint16 fixed_length;
486   /* how much record body (belonged to headers too) read with headers */
487   uint16 read_header_len;
488   /* HOOK for writing the record called before lock */
489   prewrite_rec_hook prewrite_hook;
490   /* HOOK for writing the record called when LSN is known, inside lock */
491   inwrite_rec_hook inwrite_hook;
492   /* HOOK for reading headers */
493   read_rec_hook read_hook;
494   /*
495     For pseudo fixed records number of compressed LSNs followed by
496     system header
497   */
498   int16 compressed_LSN;
499   /*  the rest is for maria_read_log & Recovery */
500   /** @brief for debug error messages or "maria_read_log" command-line tool */
501   const char *name;
502   enum enum_record_in_group record_in_group;
503   /* a function to execute when we see the record during the REDO phase */
504   int (*record_execute_in_redo_phase)(const TRANSLOG_HEADER_BUFFER *);
505   /* a function to execute when we see the record during the UNDO phase */
506   int (*record_execute_in_undo_phase)(const TRANSLOG_HEADER_BUFFER *, TRN *);
507 } LOG_DESC;
508 
509 extern LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES];
510 
511 typedef enum
512 {
513   TRANSLOG_GCOMMIT_NONE,
514   TRANSLOG_GCOMMIT_HARD,
515   TRANSLOG_GCOMMIT_SOFT
516 } enum_maria_group_commit;
517 extern ulong maria_group_commit;
518 extern ulong maria_group_commit_interval;
519 typedef enum
520 {
521   TRANSLOG_PURGE_IMMIDIATE,
522   TRANSLOG_PURGE_EXTERNAL,
523   TRANSLOG_PURGE_ONDEMAND
524 } enum_maria_translog_purge_type;
525 extern ulong log_purge_type;
526 extern ulong log_file_size;
527 extern uint  log_purge_disabled;                /* For backup */
528 
529 typedef enum
530 {
531   TRANSLOG_SYNC_DIR_NEVER,
532   TRANSLOG_SYNC_DIR_NEWFILE,
533   TRANSLOG_SYNC_DIR_ALWAYS
534 } enum_maria_sync_log_dir;
535 extern ulong sync_log_dir;
536 
537 C_MODE_END
538 #endif
539