1 /* Copyright (C) 2007 MySQL AB & Sanja Belkin. 2010 Monty Program Ab.
2 Copyright (c) 2020, MariaDB Corporation.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
16
17 #include "maria_def.h"
18 #include "trnman.h"
19 #include "ma_blockrec.h" /* for some constants and in-write hooks */
20 #include "ma_key_recover.h" /* For some in-write hooks */
21 #include "ma_checkpoint.h"
22 #include "ma_servicethread.h"
23 #include "ma_recovery.h"
24 #include "ma_loghandler_lsn.h"
25 #include "ma_recovery_util.h"
26
27 /*
28 On Windows, neither my_open() nor mysql_file_sync() work for directories.
29 Also there is no need to flush filesystem changes ,i.e to sync()
30 directories.
31 */
32 #ifdef __WIN__
33 #define sync_dir(A,B) 0
34 #else
35 #define sync_dir(A,B) mysql_file_sync(A,B)
36 #endif
37
38 /**
39 @file
40 @brief Module which writes and reads to a transaction log
41 */
42
43 /* 0xFF can never be valid first byte of a chunk */
44 #define TRANSLOG_FILLER 0xFF
45
46 /* number of opened log files in the pagecache (should be at least 2) */
47 #define OPENED_FILES_NUM 3
48 #define CACHED_FILES_NUM 5
49 #define CACHED_FILES_NUM_DIRECT_SEARCH_LIMIT 7
50 #if CACHED_FILES_NUM > CACHED_FILES_NUM_DIRECT_SEARCH_LIMIT
51 #include <hash.h>
52 #include <m_ctype.h>
53 #endif
54
55 /** @brief protects checkpoint_in_progress */
56 static mysql_mutex_t LOCK_soft_sync;
57 /** @brief for killing the background checkpoint thread */
58 static mysql_cond_t COND_soft_sync;
59 /** @brief control structure for checkpoint background thread */
60 static MA_SERVICE_THREAD_CONTROL soft_sync_control=
61 {0, FALSE, FALSE, &LOCK_soft_sync, &COND_soft_sync};
62
63 uint log_purge_disabled= 0;
64
65
66 /* transaction log file descriptor */
67 typedef struct st_translog_file
68 {
69 uint32 number;
70 PAGECACHE_FILE handler;
71 my_bool was_recovered;
72 my_bool is_sync;
73 } TRANSLOG_FILE;
74
75 /* records buffer size (should be TRANSLOG_PAGE_SIZE * n) */
76 #define TRANSLOG_WRITE_BUFFER (1024*1024)
77 /*
78 pagecache_read/write/inject() use bmove512() on their buffers so those must
79 be long-aligned, which we guarantee by using the type below:
80 */
81 typedef union
82 {
83 ulonglong dummy;
84 uchar buffer[TRANSLOG_PAGE_SIZE];
85 } TRANSLOG_PAGE_SIZE_BUFF;
86
87 #define MAX_TRUNSLOG_USED_BUFFERS 3
88
89 typedef struct
90 {
91 struct st_translog_buffer *buff[MAX_TRUNSLOG_USED_BUFFERS];
92 uint8 wrt_ptr;
93 uint8 unlck_ptr;
94 } TRUNSLOG_USED_BUFFERS;
95
96 static void
used_buffs_init(TRUNSLOG_USED_BUFFERS * buffs)97 used_buffs_init(TRUNSLOG_USED_BUFFERS *buffs)
98 {
99 buffs->unlck_ptr= buffs->wrt_ptr= 0;
100 }
101
102 static void
103 used_buffs_add(TRUNSLOG_USED_BUFFERS *buffs,
104 struct st_translog_buffer *buff);
105
106 static void
107 used_buffs_register_unlock(TRUNSLOG_USED_BUFFERS *buffs,
108 struct st_translog_buffer *buff);
109
110 static void
111 used_buffs_urgent_unlock(TRUNSLOG_USED_BUFFERS *buffs);
112
113 /* min chunk length */
114 #define TRANSLOG_MIN_CHUNK 3
115 /*
116 Number of buffers used by loghandler
117
118 Should be at least 4, because one thread can block up to 2 buffers in
119 normal circumstances (less then half of one and full other, or just
120 switched one and other), But if we met end of the file in the middle and
121 have to switch buffer it will be 3. + 1 buffer for flushing/writing.
122 We have a bigger number here for higher concurrency and to make division
123 faster.
124
125 The number should be power of 2 to be fast.
126 */
127 #define TRANSLOG_BUFFERS_NO 8
128 /* number of bytes (+ header) which can be unused on first page in sequence */
129 #define TRANSLOG_MINCHUNK_CONTENT 1
130 /* version of log file */
131 #define TRANSLOG_VERSION_ID 10000 /* 1.00.00 */
132
133 #define TRANSLOG_PAGE_FLAGS 6 /* transaction log page flags offset */
134
135 /* Maximum length of compressed LSNs (the worst case of whole LSN storing) */
136 #define COMPRESSED_LSN_MAX_STORE_SIZE (2 + LSN_STORE_SIZE)
137 #define MAX_NUMBER_OF_LSNS_PER_RECORD 2
138
139
140 /* max lsn calculation for buffer */
141 #define BUFFER_MAX_LSN(B) \
142 ((B)->last_lsn == LSN_IMPOSSIBLE ? (B)->prev_last_lsn : (B)->last_lsn)
143
144 /* log write buffer descriptor */
145 struct st_translog_buffer
146 {
147 /*
148 Cache for current log. Comes first to be aligned for bmove512() in
149 pagecache_inject()
150 */
151 uchar buffer[TRANSLOG_WRITE_BUFFER];
152 /*
153 Maximum LSN of records which ends in this buffer (or IMPOSSIBLE_LSN
154 if no LSNs ends here)
155 */
156 LSN last_lsn;
157 /* last_lsn of previous buffer or IMPOSSIBLE_LSN if it is very first one */
158 LSN prev_last_lsn;
159 /* This buffer offset in the file */
160 TRANSLOG_ADDRESS offset;
161 /*
162 Next buffer offset in the file (it is not always offset + size,
163 in case of flush by LSN it can be offset + size - TRANSLOG_PAGE_SIZE)
164 */
165 TRANSLOG_ADDRESS next_buffer_offset;
166 /* Previous buffer offset to detect it flush finish */
167 TRANSLOG_ADDRESS prev_buffer_offset;
168 /*
169 If the buffer was forced to close it save value of its horizon
170 otherwise LSN_IMPOSSIBLE
171 */
172 TRANSLOG_ADDRESS pre_force_close_horizon;
173 /*
174 How much is written (or will be written when copy_to_buffer_in_progress
175 become 0) to this buffer
176 */
177 translog_size_t size;
178 /*
179 When moving from one log buffer to another, we write the last of the
180 previous buffer to file and then move to start using the new log
181 buffer. In the case of a part filed last page, this page is not moved
182 to the start of the new buffer but instead we set the 'skip_data'
183 variable to tell us how much data at the beginning of the buffer is not
184 relevant.
185 */
186 uint skipped_data;
187 /* File handler for this buffer */
188 TRANSLOG_FILE *file;
189 /* Threads which are waiting for buffer filling/freeing */
190 mysql_cond_t waiting_filling_buffer;
191 /*
192 Number of records which are in copy progress.
193
194 Controlled via translog_buffer_increase_writers() and
195 translog_buffer_decrease_writers().
196
197 1 Simple case: translog_force_current_buffer_to_finish both called in
198 the same procedure.
199
200 2 Simple case: translog_write_variable_record_1group:
201 translog_advance_pointer() increase writer of the buffer and
202 translog_buffer_decrease_writers() decrease it.
203
204 Usual case:
205 1) translog_advance_pointer (i.e. reserve place for future writing)
206 increase writers for all buffers where place reserved.
207 Simpliest case: just all space reserved in one buffer
208 complex case: end of the first buffer, all second buffer, beginning
209 of the third buffer.
210 2) When we finish with writing translog_chaser_page_next() will be
211 called and unlock the buffer by decreasing number of writers.
212 */
213 uint copy_to_buffer_in_progress;
214 /* list of waiting buffer ready threads */
215 struct st_my_thread_var *waiting_flush;
216 /*
217 If true then previous buffer overlap with this one (due to flush of
218 loghandler, the last page of that buffer is the same as the first page
219 of this buffer) and have to be written first (because contain old
220 content of page which present in both buffers)
221 */
222 my_bool overlay;
223 uint buffer_no;
224 /*
225 Lock for the buffer.
226
227 Current buffer also lock the whole handler (if one want lock the handler
228 one should lock the current buffer).
229
230 Buffers are locked only in one direction (with overflow and beginning
231 from the first buffer). If we keep lock on buffer N we can lock only
232 buffer N+1 (never N-1).
233
234 One thread do not lock more then 2 buffer in a time, so to make dead
235 lock it should be N thread (where N equal number of buffers) takes one
236 buffer and try to lock next. But it is impossible because there is only
237 2 cases when thread take 2 buffers: 1) one thread finishes current
238 buffer (where horizon is) and start next (to which horizon moves). 2)
239 flush start from buffer after current (oldest) and go till the current
240 crabbing by buffer sequence. And there is only one flush in a moment
241 (they are serialised).
242
243 Because of above and number of buffers equal 5 we can't get dead lock (it is
244 impossible to get all 5 buffers locked simultaneously).
245 */
246 mysql_mutex_t mutex;
247 /*
248 Some thread is going to close the buffer and it should be
249 done only by that thread
250 */
251 my_bool is_closing_buffer;
252 /*
253 Version of the buffer increases every time buffer the buffer flushed.
254 With file and offset it allow detect buffer changes
255 */
256 uint8 ver;
257
258 /*
259 When previous buffer sent to disk it set its address here to allow
260 to detect when it is done
261 (we have to keep it in this buffer to lock buffers only in one direction).
262 */
263 TRANSLOG_ADDRESS prev_sent_to_disk;
264 mysql_cond_t prev_sent_to_disk_cond;
265 };
266
267
268 struct st_buffer_cursor
269 {
270 TRUNSLOG_USED_BUFFERS buffs;
271 /* pointer into the buffer */
272 uchar *ptr;
273 /* current buffer */
274 struct st_translog_buffer *buffer;
275 /* How many bytes we wrote on the current page */
276 uint16 current_page_fill;
277 /*
278 How many times we write the page on the disk during flushing process
279 (for sector protection).
280 */
281 uint16 write_counter;
282 /* previous write offset */
283 uint16 previous_offset;
284 /* Number of current buffer */
285 uint8 buffer_no;
286 /*
287 True if it is just filling buffer after advancing the pointer to
288 the horizon.
289 */
290 my_bool chaser;
291 /*
292 Is current page of the cursor already finished (sector protection
293 should be applied if it is needed)
294 */
295 my_bool protected;
296 };
297
298
299 typedef uint8 dirty_buffer_mask_t;
300
301 struct st_translog_descriptor
302 {
303 /* *** Parameters of the log handler *** */
304
305 /* Page cache for the log reads */
306 PAGECACHE *pagecache;
307 uint flags;
308 /* File open flags */
309 uint open_flags;
310 /* max size of one log size (for new logs creation) */
311 uint32 log_file_max_size;
312 uint32 server_version;
313 /* server ID (used for replication) */
314 uint32 server_id;
315 /* Loghandler's buffer capacity in case of chunk 2 filling */
316 uint32 buffer_capacity_chunk_2;
317 /*
318 Half of the buffer capacity in case of chunk 2 filling,
319 used to decide will we write a record in one group or many.
320 It is written to the variable just to avoid devision every
321 time we need it.
322 */
323 uint32 half_buffer_capacity_chunk_2;
324 /* Page overhead calculated by flags (whether CRC is enabled, etc) */
325 uint16 page_overhead;
326 /*
327 Page capacity ("useful load") calculated by flags
328 (TRANSLOG_PAGE_SIZE - page_overhead-1)
329 */
330 uint16 page_capacity_chunk_2;
331 /* Path to the directory where we store log store files */
332 char directory[FN_REFLEN];
333
334 /* *** Current state of the log handler *** */
335 /* list of opened files */
336 DYNAMIC_ARRAY open_files;
337 /* min/max number of file in the array */
338 uint32 max_file, min_file;
339 /* the opened files list guard */
340 mysql_rwlock_t open_files_lock;
341
342 /*
343 File descriptor of the directory where we store log files for syncing
344 it.
345 */
346 File directory_fd;
347 /* buffers for log writing */
348 struct st_translog_buffer buffers[TRANSLOG_BUFFERS_NO];
349 /* Mask where 1 in position N mean that buffer N is not flushed */
350 dirty_buffer_mask_t dirty_buffer_mask;
351 /* The above variable protection */
352 mysql_mutex_t dirty_buffer_mask_lock;
353 /*
354 horizon - visible end of the log (here is absolute end of the log:
355 position where next chunk can start
356 */
357 TRANSLOG_ADDRESS horizon;
358 /* horizon buffer cursor */
359 struct st_buffer_cursor bc;
360 /* maximum LSN of the current (not finished) file */
361 LSN max_lsn;
362
363 /*
364 Last flushed LSN (protected by log_flush_lock).
365 Pointers in the log ordered like this:
366 last_lsn_checked <= flushed <= sent_to_disk <= in_buffers_only <=
367 max_lsn <= horizon
368 */
369 LSN flushed;
370 /* Last LSN sent to the disk (but maybe not written yet) */
371 LSN sent_to_disk;
372 /* Horizon from which log started after initialization */
373 TRANSLOG_ADDRESS log_start;
374 TRANSLOG_ADDRESS previous_flush_horizon;
375 /* All what is after this address is not sent to disk yet */
376 TRANSLOG_ADDRESS in_buffers_only;
377 /* protection of sent_to_disk and in_buffers_only */
378 mysql_mutex_t sent_to_disk_lock;
379 /*
380 Protect flushed (see above) and for flush serialization (will
381 be removed in v1.5
382 */
383 mysql_mutex_t log_flush_lock;
384 mysql_cond_t log_flush_cond;
385 mysql_cond_t new_goal_cond;
386
387 /* Protects changing of headers of finished files (max_lsn) */
388 mysql_mutex_t file_header_lock;
389
390 /*
391 Sorted array (with protection) of files where we started writing process
392 and so we can't give last LSN yet
393 */
394 mysql_mutex_t unfinished_files_lock;
395 DYNAMIC_ARRAY unfinished_files;
396
397 /*
398 minimum number of still need file calculeted during last
399 translog_purge call
400 */
401 uint32 min_need_file;
402 /* Purger data: minimum file in the log (or 0 if unknown) */
403 uint32 min_file_number;
404 /* Protect purger from many calls and it's data */
405 mysql_mutex_t purger_lock;
406 /* last low water mark checked */
407 LSN last_lsn_checked;
408 /**
409 Must be set to 0 under loghandler lock every time a new LSN
410 is generated.
411 */
412 my_bool is_everything_flushed;
413 /* True when flush pass is in progress */
414 my_bool flush_in_progress;
415 /* The flush number (used to distinguish two flushes goes one by one) */
416 volatile int flush_no;
417 /* Next flush pass variables */
418 TRANSLOG_ADDRESS next_pass_max_lsn;
419 pthread_t max_lsn_requester;
420 };
421
422 static struct st_translog_descriptor log_descriptor;
423
424 ulong log_purge_type= TRANSLOG_PURGE_IMMIDIATE;
425 ulong log_file_size= TRANSLOG_FILE_SIZE;
426 /* sync() of log files directory mode */
427 ulong sync_log_dir= TRANSLOG_SYNC_DIR_NEWFILE;
428 ulong maria_group_commit= TRANSLOG_GCOMMIT_NONE;
429 ulong maria_group_commit_interval= 0;
430
431 /* Marker for end of log */
432 static uchar end_of_log= 0;
433 #define END_OF_LOG &end_of_log
434 /**
435 Switch for "soft" sync (no real sync() but periodical sync by service
436 thread)
437 */
438 static volatile my_bool soft_sync= FALSE;
439 /**
440 Switch for "hard" group commit mode
441 */
442 static volatile my_bool hard_group_commit= FALSE;
443 /**
444 File numbers interval which have to be sync()
445 */
446 static uint32 soft_sync_min= 0;
447 static uint32 soft_sync_max= 0;
448 static uint32 soft_need_sync= 1;
449 /**
450 stores interval in microseconds
451 */
452 static uint32 group_commit_wait= 0;
453
454 enum enum_translog_status translog_status= TRANSLOG_UNINITED;
455 ulonglong translog_syncs= 0; /* Number of sync()s */
456
457 /* time of last flush */
458 static ulonglong flush_start= 0;
459
460 /* chunk types */
461 #define TRANSLOG_CHUNK_LSN 0x00 /* 0 chunk refer as LSN (head or tail */
462 #define TRANSLOG_CHUNK_FIXED (1 << 6) /* 1 (pseudo)fixed record (also LSN) */
463 #define TRANSLOG_CHUNK_NOHDR (2 << 6) /* 2 no head chunk (till page end) */
464 #define TRANSLOG_CHUNK_LNGTH (3 << 6) /* 3 chunk with chunk length */
465 #define TRANSLOG_CHUNK_TYPE (3 << 6) /* Mask to get chunk type */
466 #define TRANSLOG_REC_TYPE 0x3F /* Mask to get record type */
467 #define TRANSLOG_CHUNK_0_CONT 0x3F /* the type to mark chunk 0 continue */
468
469 /* compressed (relative) LSN constants */
470 #define TRANSLOG_CLSN_LEN_BITS 0xC0 /* Mask to get compressed LSN length */
471
472
473 /* an array that maps id of a MARIA_SHARE to this MARIA_SHARE */
474 static MARIA_SHARE **id_to_share= NULL;
475
476 static my_bool translog_page_validator(int res, PAGECACHE_IO_HOOK_ARGS *args);
477
478 static my_bool translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner);
479 static uint32 translog_first_file(TRANSLOG_ADDRESS horizon, int is_protected);
480 LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon);
481
482
483 /*
484 Initialize log_record_type_descriptors
485 */
486
487 LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES];
488
489
490 #ifndef DBUG_OFF
491
492 #define translog_buffer_lock_assert_owner(B) \
493 mysql_mutex_assert_owner(&(B)->mutex)
494 #define translog_lock_assert_owner() \
495 mysql_mutex_assert_owner(&log_descriptor.bc.buffer->mutex)
translog_lock_handler_assert_owner()496 void translog_lock_handler_assert_owner()
497 {
498 translog_lock_assert_owner();
499 }
500
501 /**
502 @brief check the description table validity
503
504 @param num how many records should be filled
505 */
506
507 static uint max_allowed_translog_type= 0;
508
check_translog_description_table(int num)509 void check_translog_description_table(int num)
510 {
511 int i;
512 DBUG_ENTER("check_translog_description_table");
513 DBUG_PRINT("enter", ("last record: %d", num));
514 DBUG_ASSERT(num > 0);
515 /* last is reserved for extending the table */
516 DBUG_ASSERT(num < LOGREC_NUMBER_OF_TYPES - 1);
517 DBUG_ASSERT(log_record_type_descriptor[0].rclass == LOGRECTYPE_NOT_ALLOWED);
518 max_allowed_translog_type= num;
519
520 for (i= 0; i <= num; i++)
521 {
522 DBUG_PRINT("info",
523 ("record type: %d class: %d fixed: %u header: %u LSNs: %u "
524 "name: %s",
525 i, log_record_type_descriptor[i].rclass,
526 (uint)log_record_type_descriptor[i].fixed_length,
527 (uint)log_record_type_descriptor[i].read_header_len,
528 (uint)log_record_type_descriptor[i].compressed_LSN,
529 log_record_type_descriptor[i].name));
530 switch (log_record_type_descriptor[i].rclass) {
531 case LOGRECTYPE_NOT_ALLOWED:
532 DBUG_ASSERT(i == 0);
533 break;
534 case LOGRECTYPE_VARIABLE_LENGTH:
535 DBUG_ASSERT(log_record_type_descriptor[i].fixed_length == 0);
536 DBUG_ASSERT((log_record_type_descriptor[i].compressed_LSN == 0) ||
537 ((log_record_type_descriptor[i].compressed_LSN == 1) &&
538 (log_record_type_descriptor[i].read_header_len >=
539 LSN_STORE_SIZE)) ||
540 ((log_record_type_descriptor[i].compressed_LSN == 2) &&
541 (log_record_type_descriptor[i].read_header_len >=
542 LSN_STORE_SIZE * 2)));
543 break;
544 case LOGRECTYPE_PSEUDOFIXEDLENGTH:
545 DBUG_ASSERT(log_record_type_descriptor[i].fixed_length ==
546 log_record_type_descriptor[i].read_header_len);
547 DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN > 0);
548 DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN <= 2);
549 break;
550 case LOGRECTYPE_FIXEDLENGTH:
551 DBUG_ASSERT(log_record_type_descriptor[i].fixed_length ==
552 log_record_type_descriptor[i].read_header_len);
553 DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN == 0);
554 break;
555 default:
556 DBUG_ASSERT(0);
557 }
558 }
559 for (i= num + 1; i < LOGREC_NUMBER_OF_TYPES; i++)
560 {
561 DBUG_ASSERT(log_record_type_descriptor[i].rclass ==
562 LOGRECTYPE_NOT_ALLOWED);
563 }
564 DBUG_VOID_RETURN;
565 }
566 #else
567 #define translog_buffer_lock_assert_owner(B) {}
568 #define translog_lock_assert_owner() {}
569 #endif
570
571 static LOG_DESC INIT_LOGREC_RESERVED_FOR_CHUNKS23=
572 {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0,
573 "reserved", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL };
574
575 static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_HEAD=
576 {LOGRECTYPE_VARIABLE_LENGTH, 0,
577 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
578 write_hook_for_redo, NULL, 0,
579 "redo_insert_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
580
581 static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_TAIL=
582 {LOGRECTYPE_VARIABLE_LENGTH, 0,
583 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
584 write_hook_for_redo, NULL, 0,
585 "redo_insert_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
586
587 static LOG_DESC INIT_LOGREC_REDO_NEW_ROW_HEAD=
588 {LOGRECTYPE_VARIABLE_LENGTH, 0,
589 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
590 write_hook_for_redo, NULL, 0,
591 "redo_new_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
592
593 static LOG_DESC INIT_LOGREC_REDO_NEW_ROW_TAIL=
594 {LOGRECTYPE_VARIABLE_LENGTH, 0,
595 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
596 write_hook_for_redo, NULL, 0,
597 "redo_new_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
598
599 static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_BLOBS=
600 {LOGRECTYPE_VARIABLE_LENGTH, 0, FILEID_STORE_SIZE, NULL,
601 write_hook_for_redo, NULL, 0,
602 "redo_insert_row_blobs", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
603
604 static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_HEAD=
605 {LOGRECTYPE_FIXEDLENGTH,
606 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
607 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
608 NULL, write_hook_for_redo, NULL, 0,
609 "redo_purge_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
610
611 static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_TAIL=
612 {LOGRECTYPE_FIXEDLENGTH,
613 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
614 FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
615 NULL, write_hook_for_redo, NULL, 0,
616 "redo_purge_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
617
618 static LOG_DESC INIT_LOGREC_REDO_FREE_BLOCKS=
619 {LOGRECTYPE_VARIABLE_LENGTH, 0,
620 FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE,
621 NULL, write_hook_for_redo, NULL, 0,
622 "redo_free_blocks", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
623
624 static LOG_DESC INIT_LOGREC_REDO_FREE_HEAD_OR_TAIL=
625 {LOGRECTYPE_FIXEDLENGTH,
626 FILEID_STORE_SIZE + PAGE_STORE_SIZE,
627 FILEID_STORE_SIZE + PAGE_STORE_SIZE,
628 NULL, write_hook_for_redo, NULL, 0,
629 "redo_free_head_or_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
630
631 /* not yet used; for when we have versioning */
632 static LOG_DESC INIT_LOGREC_REDO_DELETE_ROW=
633 {LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
634 "redo_delete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
635
636 /** @todo RECOVERY BUG unused, remove? */
637 static LOG_DESC INIT_LOGREC_REDO_UPDATE_ROW_HEAD=
638 {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
639 "redo_update_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
640
641 static LOG_DESC INIT_LOGREC_REDO_INDEX=
642 {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
643 "redo_index", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
644
645 static LOG_DESC INIT_LOGREC_REDO_INDEX_NEW_PAGE=
646 {LOGRECTYPE_VARIABLE_LENGTH, 0,
647 FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2 + KEY_NR_STORE_SIZE + 1,
648 NULL, write_hook_for_redo, NULL, 0,
649 "redo_index_new_page", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
650
651 static LOG_DESC INIT_LOGREC_REDO_INDEX_FREE_PAGE=
652 {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
653 FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
654 NULL, write_hook_for_redo, NULL, 0,
655 "redo_index_free_page", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
656
657 static LOG_DESC INIT_LOGREC_REDO_UNDELETE_ROW=
658 {LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
659 "redo_undelete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
660
661 static LOG_DESC INIT_LOGREC_CLR_END=
662 {LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE + FILEID_STORE_SIZE +
663 CLR_TYPE_STORE_SIZE, NULL, write_hook_for_clr_end, NULL, 1,
664 "clr_end", LOGREC_LAST_IN_GROUP, NULL, NULL};
665
666 static LOG_DESC INIT_LOGREC_PURGE_END=
667 {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1,
668 "purge_end", LOGREC_LAST_IN_GROUP, NULL, NULL};
669
670 static LOG_DESC INIT_LOGREC_UNDO_ROW_INSERT=
671 {LOGRECTYPE_VARIABLE_LENGTH, 0,
672 LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
673 NULL, write_hook_for_undo_row_insert, NULL, 1,
674 "undo_row_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
675
676 static LOG_DESC INIT_LOGREC_UNDO_ROW_DELETE=
677 {LOGRECTYPE_VARIABLE_LENGTH, 0,
678 LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
679 NULL, write_hook_for_undo_row_delete, NULL, 1,
680 "undo_row_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
681
682 static LOG_DESC INIT_LOGREC_UNDO_ROW_UPDATE=
683 {LOGRECTYPE_VARIABLE_LENGTH, 0,
684 LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
685 NULL, write_hook_for_undo_row_update, NULL, 1,
686 "undo_row_update", LOGREC_LAST_IN_GROUP, NULL, NULL};
687
688 static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT=
689 {LOGRECTYPE_VARIABLE_LENGTH, 0,
690 LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE,
691 NULL, write_hook_for_undo_key_insert, NULL, 1,
692 "undo_key_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
693
694 /* This will never be in the log, only in the clr */
695 static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT_WITH_ROOT=
696 {LOGRECTYPE_VARIABLE_LENGTH, 0,
697 LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE + PAGE_STORE_SIZE,
698 NULL, write_hook_for_undo_key, NULL, 1,
699 "undo_key_insert_with_root", LOGREC_LAST_IN_GROUP, NULL, NULL};
700
701 static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE=
702 {LOGRECTYPE_VARIABLE_LENGTH, 0,
703 LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE,
704 NULL, write_hook_for_undo_key_delete, NULL, 1,
705 "undo_key_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
706
707 static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE_WITH_ROOT=
708 {LOGRECTYPE_VARIABLE_LENGTH, 0,
709 LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE + PAGE_STORE_SIZE,
710 NULL, write_hook_for_undo_key_delete, NULL, 1,
711 "undo_key_delete_with_root", LOGREC_LAST_IN_GROUP, NULL, NULL};
712
713 static LOG_DESC INIT_LOGREC_PREPARE=
714 {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
715 "prepare", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
716
717 static LOG_DESC INIT_LOGREC_PREPARE_WITH_UNDO_PURGE=
718 {LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE, NULL, NULL, NULL, 1,
719 "prepare_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
720
721 static LOG_DESC INIT_LOGREC_COMMIT=
722 {LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL,
723 write_hook_for_commit, NULL, 0, "commit", LOGREC_IS_GROUP_ITSELF, NULL,
724 NULL};
725
726 static LOG_DESC INIT_LOGREC_COMMIT_WITH_UNDO_PURGE=
727 {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, write_hook_for_commit, NULL, 1,
728 "commit_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
729
730 static LOG_DESC INIT_LOGREC_CHECKPOINT=
731 {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
732 "checkpoint", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
733
734 static LOG_DESC INIT_LOGREC_REDO_CREATE_TABLE=
735 {LOGRECTYPE_VARIABLE_LENGTH, 0, 1 + 2, NULL, NULL, NULL, 0,
736 "redo_create_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
737
738 static LOG_DESC INIT_LOGREC_REDO_RENAME_TABLE=
739 {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
740 "redo_rename_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
741
742 static LOG_DESC INIT_LOGREC_REDO_DROP_TABLE=
743 {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
744 "redo_drop_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
745
746 static LOG_DESC INIT_LOGREC_REDO_DELETE_ALL=
747 {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE,
748 NULL, write_hook_for_redo_delete_all, NULL, 0,
749 "redo_delete_all", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
750
751 static LOG_DESC INIT_LOGREC_REDO_REPAIR_TABLE=
752 {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + 8 + 8, FILEID_STORE_SIZE + 8 + 8,
753 NULL, NULL, NULL, 0,
754 "redo_repair_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
755
756 static LOG_DESC INIT_LOGREC_FILE_ID=
757 {LOGRECTYPE_VARIABLE_LENGTH, 0, 2, NULL, write_hook_for_file_id, NULL, 0,
758 "file_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
759
760 static LOG_DESC INIT_LOGREC_LONG_TRANSACTION_ID=
761 {LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0,
762 "long_transaction_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
763
764 static LOG_DESC INIT_LOGREC_INCOMPLETE_LOG=
765 {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE,
766 NULL, NULL, NULL, 0,
767 "incomplete_log", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
768
769 static LOG_DESC INIT_LOGREC_INCOMPLETE_GROUP=
770 {LOGRECTYPE_FIXEDLENGTH, 0, 0,
771 NULL, NULL, NULL, 0,
772 "incomplete_group", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
773
774 static LOG_DESC INIT_LOGREC_UNDO_BULK_INSERT=
775 {LOGRECTYPE_VARIABLE_LENGTH, 0,
776 LSN_STORE_SIZE + FILEID_STORE_SIZE,
777 NULL, write_hook_for_undo_bulk_insert, NULL, 1,
778 "undo_bulk_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
779
780 static LOG_DESC INIT_LOGREC_REDO_BITMAP_NEW_PAGE=
781 {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
782 FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
783 NULL, NULL, NULL, 0,
784 "redo_create_bitmap", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
785
786 static LOG_DESC INIT_LOGREC_IMPORTED_TABLE=
787 {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
788 "imported_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
789
790 static LOG_DESC INIT_LOGREC_DEBUG_INFO=
791 {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
792 "info", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
793
794 const myf log_write_flags= MY_WME | MY_NABP | MY_WAIT_IF_FULL;
795
translog_table_init()796 void translog_table_init()
797 {
798 int i;
799 log_record_type_descriptor[LOGREC_RESERVED_FOR_CHUNKS23]=
800 INIT_LOGREC_RESERVED_FOR_CHUNKS23;
801 log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_HEAD]=
802 INIT_LOGREC_REDO_INSERT_ROW_HEAD;
803 log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_TAIL]=
804 INIT_LOGREC_REDO_INSERT_ROW_TAIL;
805 log_record_type_descriptor[LOGREC_REDO_NEW_ROW_HEAD]=
806 INIT_LOGREC_REDO_NEW_ROW_HEAD;
807 log_record_type_descriptor[LOGREC_REDO_NEW_ROW_TAIL]=
808 INIT_LOGREC_REDO_NEW_ROW_TAIL;
809 log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_BLOBS]=
810 INIT_LOGREC_REDO_INSERT_ROW_BLOBS;
811 log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_HEAD]=
812 INIT_LOGREC_REDO_PURGE_ROW_HEAD;
813 log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_TAIL]=
814 INIT_LOGREC_REDO_PURGE_ROW_TAIL;
815 log_record_type_descriptor[LOGREC_REDO_FREE_BLOCKS]=
816 INIT_LOGREC_REDO_FREE_BLOCKS;
817 log_record_type_descriptor[LOGREC_REDO_FREE_HEAD_OR_TAIL]=
818 INIT_LOGREC_REDO_FREE_HEAD_OR_TAIL;
819 log_record_type_descriptor[LOGREC_REDO_DELETE_ROW]=
820 INIT_LOGREC_REDO_DELETE_ROW;
821 log_record_type_descriptor[LOGREC_REDO_UPDATE_ROW_HEAD]=
822 INIT_LOGREC_REDO_UPDATE_ROW_HEAD;
823 log_record_type_descriptor[LOGREC_REDO_INDEX]=
824 INIT_LOGREC_REDO_INDEX;
825 log_record_type_descriptor[LOGREC_REDO_INDEX_NEW_PAGE]=
826 INIT_LOGREC_REDO_INDEX_NEW_PAGE;
827 log_record_type_descriptor[LOGREC_REDO_INDEX_FREE_PAGE]=
828 INIT_LOGREC_REDO_INDEX_FREE_PAGE;
829 log_record_type_descriptor[LOGREC_REDO_UNDELETE_ROW]=
830 INIT_LOGREC_REDO_UNDELETE_ROW;
831 log_record_type_descriptor[LOGREC_CLR_END]=
832 INIT_LOGREC_CLR_END;
833 log_record_type_descriptor[LOGREC_PURGE_END]=
834 INIT_LOGREC_PURGE_END;
835 log_record_type_descriptor[LOGREC_UNDO_ROW_INSERT]=
836 INIT_LOGREC_UNDO_ROW_INSERT;
837 log_record_type_descriptor[LOGREC_UNDO_ROW_DELETE]=
838 INIT_LOGREC_UNDO_ROW_DELETE;
839 log_record_type_descriptor[LOGREC_UNDO_ROW_UPDATE]=
840 INIT_LOGREC_UNDO_ROW_UPDATE;
841 log_record_type_descriptor[LOGREC_UNDO_KEY_INSERT]=
842 INIT_LOGREC_UNDO_KEY_INSERT;
843 log_record_type_descriptor[LOGREC_UNDO_KEY_INSERT_WITH_ROOT]=
844 INIT_LOGREC_UNDO_KEY_INSERT_WITH_ROOT;
845 log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE]=
846 INIT_LOGREC_UNDO_KEY_DELETE;
847 log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE_WITH_ROOT]=
848 INIT_LOGREC_UNDO_KEY_DELETE_WITH_ROOT;
849 log_record_type_descriptor[LOGREC_PREPARE]=
850 INIT_LOGREC_PREPARE;
851 log_record_type_descriptor[LOGREC_PREPARE_WITH_UNDO_PURGE]=
852 INIT_LOGREC_PREPARE_WITH_UNDO_PURGE;
853 log_record_type_descriptor[LOGREC_COMMIT]=
854 INIT_LOGREC_COMMIT;
855 log_record_type_descriptor[LOGREC_COMMIT_WITH_UNDO_PURGE]=
856 INIT_LOGREC_COMMIT_WITH_UNDO_PURGE;
857 log_record_type_descriptor[LOGREC_CHECKPOINT]=
858 INIT_LOGREC_CHECKPOINT;
859 log_record_type_descriptor[LOGREC_REDO_CREATE_TABLE]=
860 INIT_LOGREC_REDO_CREATE_TABLE;
861 log_record_type_descriptor[LOGREC_REDO_RENAME_TABLE]=
862 INIT_LOGREC_REDO_RENAME_TABLE;
863 log_record_type_descriptor[LOGREC_REDO_DROP_TABLE]=
864 INIT_LOGREC_REDO_DROP_TABLE;
865 log_record_type_descriptor[LOGREC_REDO_DELETE_ALL]=
866 INIT_LOGREC_REDO_DELETE_ALL;
867 log_record_type_descriptor[LOGREC_REDO_REPAIR_TABLE]=
868 INIT_LOGREC_REDO_REPAIR_TABLE;
869 log_record_type_descriptor[LOGREC_FILE_ID]=
870 INIT_LOGREC_FILE_ID;
871 log_record_type_descriptor[LOGREC_LONG_TRANSACTION_ID]=
872 INIT_LOGREC_LONG_TRANSACTION_ID;
873 log_record_type_descriptor[LOGREC_INCOMPLETE_LOG]=
874 INIT_LOGREC_INCOMPLETE_LOG;
875 log_record_type_descriptor[LOGREC_INCOMPLETE_GROUP]=
876 INIT_LOGREC_INCOMPLETE_GROUP;
877 log_record_type_descriptor[LOGREC_UNDO_BULK_INSERT]=
878 INIT_LOGREC_UNDO_BULK_INSERT;
879 log_record_type_descriptor[LOGREC_REDO_BITMAP_NEW_PAGE]=
880 INIT_LOGREC_REDO_BITMAP_NEW_PAGE;
881 log_record_type_descriptor[LOGREC_IMPORTED_TABLE]=
882 INIT_LOGREC_IMPORTED_TABLE;
883 log_record_type_descriptor[LOGREC_DEBUG_INFO]=
884 INIT_LOGREC_DEBUG_INFO;
885
886 for (i= LOGREC_FIRST_FREE; i < LOGREC_NUMBER_OF_TYPES; i++)
887 log_record_type_descriptor[i].rclass= LOGRECTYPE_NOT_ALLOWED;
888 #ifndef DBUG_OFF
889 check_translog_description_table(LOGREC_FIRST_FREE -1);
890 #endif
891 }
892
893
894 /* all possible flags page overheads */
895 static uint page_overhead[TRANSLOG_FLAGS_NUM];
896
897 typedef struct st_translog_validator_data
898 {
899 TRANSLOG_ADDRESS *addr;
900 my_bool was_recovered;
901 } TRANSLOG_VALIDATOR_DATA;
902
903
904 /*
905 Check cursor/buffer consistence
906
907 SYNOPSIS
908 translog_check_cursor
909 cursor cursor which will be checked
910 */
911
translog_check_cursor(struct st_buffer_cursor * cursor)912 static void translog_check_cursor(struct st_buffer_cursor *cursor
913 __attribute__((unused)))
914 {
915 DBUG_ASSERT(cursor->chaser ||
916 ((ulong) (cursor->ptr - cursor->buffer->buffer) ==
917 cursor->buffer->size));
918 DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no);
919 DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE ==
920 cursor->current_page_fill % TRANSLOG_PAGE_SIZE);
921 DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
922 }
923
924
925 /**
926 @brief switch the loghandler in read only mode in case of write error
927 */
928
translog_stop_writing()929 void translog_stop_writing()
930 {
931 DBUG_ENTER("translog_stop_writing");
932 DBUG_PRINT("error", ("errno: %d my_errno: %d", errno, my_errno));
933 translog_status= (translog_status == TRANSLOG_SHUTDOWN ?
934 TRANSLOG_UNINITED :
935 TRANSLOG_READONLY);
936 log_descriptor.is_everything_flushed= 1;
937 log_descriptor.open_flags= O_BINARY | O_RDONLY;
938 DBUG_ASSERT(0);
939 DBUG_VOID_RETURN;
940 }
941
942
943 /*
944 @brief Get file name of the log by log number
945
946 @param file_no Number of the log we want to open
947 @param path Pointer to buffer where file name will be
948 stored (must be FN_REFLEN bytes at least)
949
950 @return pointer to path
951 */
952
translog_filename_by_fileno(uint32 file_no,char * path)953 char *translog_filename_by_fileno(uint32 file_no, char *path)
954 {
955 char buff[11], *end;
956 uint length;
957 DBUG_ENTER("translog_filename_by_fileno");
958 DBUG_ASSERT(file_no <= 0xfffffff);
959
960 /* log_descriptor.directory is already formated */
961 end= strxmov(path, log_descriptor.directory, "aria_log.0000000", NullS);
962 length= (uint) (int10_to_str(file_no, buff, 10) - buff);
963 strmov(end - length +1, buff);
964
965 DBUG_PRINT("info", ("Path: '%s' path: %p", path, path));
966 DBUG_RETURN(path);
967 }
968
969
970 /**
971 @brief Create log file with given number without cache
972
973 @param file_no Number of the log we want to open
974
975 retval -1 error
976 retval # file descriptor number
977 */
978
create_logfile_by_number_no_cache(uint32 file_no)979 static File create_logfile_by_number_no_cache(uint32 file_no)
980 {
981 File file;
982 char path[FN_REFLEN];
983 DBUG_ENTER("create_logfile_by_number_no_cache");
984
985 if (translog_status != TRANSLOG_OK)
986 DBUG_RETURN(-1);
987
988 /* TODO: add O_DIRECT to open flags (when buffer is aligned) */
989 if ((file= mysql_file_create(key_file_translog,
990 translog_filename_by_fileno(file_no, path),
991 0, O_BINARY | O_RDWR | O_CLOEXEC, MYF(MY_WME))) < 0)
992 {
993 DBUG_PRINT("error", ("Error %d during creating file '%s'", errno, path));
994 translog_stop_writing();
995 DBUG_RETURN(-1);
996 }
997 if (sync_log_dir >= TRANSLOG_SYNC_DIR_NEWFILE &&
998 sync_dir(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD)))
999 {
1000 DBUG_PRINT("error", ("Error %d during syncing directory '%s'",
1001 errno, log_descriptor.directory));
1002 mysql_file_close(file, MYF(0));
1003 translog_stop_writing();
1004 DBUG_RETURN(-1);
1005 }
1006 DBUG_PRINT("info", ("File: '%s' handler: %d", path, file));
1007 DBUG_RETURN(file);
1008 }
1009
1010 /**
1011 @brief Open (not create) log file with given number without cache
1012
1013 @param file_no Number of the log we want to open
1014
1015 retval -1 error
1016 retval # file descriptor number
1017 */
1018
open_logfile_by_number_no_cache(uint32 file_no)1019 static File open_logfile_by_number_no_cache(uint32 file_no)
1020 {
1021 File file;
1022 char path[FN_REFLEN];
1023 DBUG_ENTER("open_logfile_by_number_no_cache");
1024
1025 /* TODO: add O_DIRECT to open flags (when buffer is aligned) */
1026 /* TODO: use mysql_file_create() */
1027 if ((file= mysql_file_open(key_file_translog,
1028 translog_filename_by_fileno(file_no, path),
1029 log_descriptor.open_flags | O_CLOEXEC,
1030 MYF(MY_WME))) < 0)
1031 {
1032 DBUG_PRINT("error", ("Error %d during opening file '%s'", errno, path));
1033 DBUG_RETURN(-1);
1034 }
1035 DBUG_PRINT("info", ("File: '%s' handler: %d", path, file));
1036 DBUG_RETURN(file);
1037 }
1038
1039
1040 /**
1041 @brief get file descriptor by given number using cache
1042
1043 @param file_no Number of the log we want to open
1044
1045 retval # file descriptor
1046 retval NULL file is not opened
1047 */
1048
get_logfile_by_number(uint32 file_no)1049 static TRANSLOG_FILE *get_logfile_by_number(uint32 file_no)
1050 {
1051 TRANSLOG_FILE *file;
1052 DBUG_ENTER("get_logfile_by_number");
1053 mysql_rwlock_rdlock(&log_descriptor.open_files_lock);
1054 if (log_descriptor.max_file - file_no >=
1055 log_descriptor.open_files.elements)
1056 {
1057 DBUG_PRINT("info", ("File #%u is not opened", file_no));
1058 mysql_rwlock_unlock(&log_descriptor.open_files_lock);
1059 DBUG_RETURN(NULL);
1060 }
1061 DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
1062 log_descriptor.open_files.elements);
1063 DBUG_ASSERT(log_descriptor.max_file >= file_no);
1064 DBUG_ASSERT(log_descriptor.min_file <= file_no);
1065
1066 file= *dynamic_element(&log_descriptor.open_files,
1067 log_descriptor.max_file - file_no, TRANSLOG_FILE **);
1068 mysql_rwlock_unlock(&log_descriptor.open_files_lock);
1069 DBUG_PRINT("info", ("File %p File no: %u, File handler: %d",
1070 file, file_no,
1071 (file ? file->handler.file : -1)));
1072 DBUG_ASSERT(!file || file->number == file_no);
1073 DBUG_RETURN(file);
1074 }
1075
1076
1077 /**
1078 @brief get current file descriptor
1079
1080 retval # file descriptor
1081 */
1082
get_current_logfile()1083 static TRANSLOG_FILE *get_current_logfile()
1084 {
1085 TRANSLOG_FILE *file;
1086 DBUG_ENTER("get_current_logfile");
1087 mysql_rwlock_rdlock(&log_descriptor.open_files_lock);
1088 DBUG_PRINT("info", ("max_file: %lu min_file: %lu open_files: %lu",
1089 (ulong) log_descriptor.max_file,
1090 (ulong) log_descriptor.min_file,
1091 (ulong) log_descriptor.open_files.elements));
1092 DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
1093 log_descriptor.open_files.elements);
1094 file= *dynamic_element(&log_descriptor.open_files, 0, TRANSLOG_FILE **);
1095 mysql_rwlock_unlock(&log_descriptor.open_files_lock);
1096 DBUG_RETURN(file);
1097 }
1098
1099 uchar maria_trans_file_magic[]=
1100 { (uchar) 254, (uchar) 254, (uchar) 11, '\001', 'M', 'A', 'R', 'I', 'A',
1101 'L', 'O', 'G' };
1102 #define LOG_HEADER_DATA_SIZE (sizeof(maria_trans_file_magic) + \
1103 8 + 4 + 4 + 4 + 2 + 3 + \
1104 LSN_STORE_SIZE)
1105
1106
1107 /*
1108 Write log file page header in the just opened new log file
1109
1110 SYNOPSIS
1111 translog_write_file_header();
1112
1113 NOTES
1114 First page is just a marker page; We don't store any real log data in it.
1115
1116 RETURN
1117 0 OK
1118 1 ERROR
1119 */
1120
translog_write_file_header()1121 static my_bool translog_write_file_header()
1122 {
1123 TRANSLOG_FILE *file;
1124 ulonglong timestamp;
1125 uchar page_buff[TRANSLOG_PAGE_SIZE], *page= page_buff;
1126 my_bool rc;
1127 DBUG_ENTER("translog_write_file_header");
1128
1129 /* file tag */
1130 memcpy(page, maria_trans_file_magic, sizeof(maria_trans_file_magic));
1131 page+= sizeof(maria_trans_file_magic);
1132 /* timestamp */
1133 timestamp= my_hrtime().val;
1134 int8store(page, timestamp);
1135 page+= 8;
1136 /* maria version */
1137 int4store(page, TRANSLOG_VERSION_ID);
1138 page+= 4;
1139 /* mysql version (MYSQL_VERSION_ID) */
1140 int4store(page, log_descriptor.server_version);
1141 page+= 4;
1142 /* server ID */
1143 int4store(page, log_descriptor.server_id);
1144 page+= 4;
1145 /* loghandler page_size */
1146 int2store(page, TRANSLOG_PAGE_SIZE - 1);
1147 page+= 2;
1148 /* file number */
1149 int3store(page, LSN_FILE_NO(log_descriptor.horizon));
1150 page+= 3;
1151 lsn_store(page, LSN_IMPOSSIBLE);
1152 page+= LSN_STORE_SIZE;
1153 memset(page, TRANSLOG_FILLER, sizeof(page_buff) - (page- page_buff));
1154
1155 file= get_current_logfile();
1156 rc= my_pwrite(file->handler.file, page_buff, sizeof(page_buff), 0,
1157 log_write_flags) != 0;
1158 /*
1159 Dropping the flag in such way can make false alarm: signalling than the
1160 file in not sync when it is sync, but the situation is quite rare and
1161 protections with mutexes give much more overhead to the whole engine
1162 */
1163 file->is_sync= 0;
1164 DBUG_RETURN(rc);
1165 }
1166
1167 /*
1168 @brief write the new LSN on the given file header
1169
1170 @param file The file descriptor
1171 @param lsn That LSN which should be written
1172
1173 @retval 0 OK
1174 @retval 1 Error
1175 */
1176
translog_max_lsn_to_header(File file,LSN lsn)1177 static my_bool translog_max_lsn_to_header(File file, LSN lsn)
1178 {
1179 uchar lsn_buff[LSN_STORE_SIZE];
1180 my_bool rc;
1181 DBUG_ENTER("translog_max_lsn_to_header");
1182 DBUG_PRINT("enter", ("File descriptor: %ld "
1183 "lsn: " LSN_FMT,
1184 (long) file,
1185 LSN_IN_PARTS(lsn)));
1186
1187 lsn_store(lsn_buff, lsn);
1188
1189 rc= (my_pwrite(file, lsn_buff,
1190 LSN_STORE_SIZE,
1191 (LOG_HEADER_DATA_SIZE - LSN_STORE_SIZE),
1192 log_write_flags) != 0 ||
1193 mysql_file_sync(file, MYF(MY_WME)) != 0);
1194 /*
1195 We should not increase counter in case of error above, but it is so
1196 unlikely that we can ignore this case
1197 */
1198 translog_syncs++;
1199 DBUG_RETURN(rc);
1200 }
1201
1202
1203 /*
1204 @brief Extract hander file information from loghandler file page
1205
1206 @param desc header information descriptor to be filled with information
1207 @param page_buff buffer with the page content
1208 */
1209
translog_interpret_file_header(LOGHANDLER_FILE_INFO * desc,uchar * page_buff)1210 void translog_interpret_file_header(LOGHANDLER_FILE_INFO *desc,
1211 uchar *page_buff)
1212 {
1213 uchar *ptr;
1214
1215 ptr= page_buff + sizeof(maria_trans_file_magic);
1216 desc->timestamp= uint8korr(ptr);
1217 ptr+= 8;
1218 desc->maria_version= uint4korr(ptr);
1219 ptr+= 4;
1220 desc->mysql_version= uint4korr(ptr);
1221 ptr+= 4;
1222 desc->server_id= uint4korr(ptr + 4);
1223 ptr+= 4;
1224 desc->page_size= uint2korr(ptr) + 1;
1225 ptr+= 2;
1226 desc->file_number= uint3korr(ptr);
1227 ptr+=3;
1228 desc->max_lsn= lsn_korr(ptr);
1229 }
1230
1231
1232 /*
1233 @brief Read hander file information from loghandler file
1234
1235 @param desc header information descriptor to be filled with information
1236 @param file file descriptor to read
1237
1238 @retval 0 OK
1239 @retval 1 Error
1240 */
1241
translog_read_file_header(LOGHANDLER_FILE_INFO * desc,File file)1242 my_bool translog_read_file_header(LOGHANDLER_FILE_INFO *desc, File file)
1243 {
1244 uchar page_buff[LOG_HEADER_DATA_SIZE];
1245 DBUG_ENTER("translog_read_file_header");
1246
1247 if (mysql_file_pread(file, page_buff,
1248 sizeof(page_buff), 0, MYF(MY_FNABP | MY_WME)))
1249 {
1250 DBUG_PRINT("info", ("log read fail error: %d", my_errno));
1251 DBUG_RETURN(1);
1252 }
1253 translog_interpret_file_header(desc, page_buff);
1254 DBUG_PRINT("info", ("timestamp: %llu aria ver: %lu mysql ver: %lu "
1255 "server id %lu page size %lu file number %lu "
1256 "max lsn: " LSN_FMT,
1257 (ulonglong) desc->timestamp,
1258 (ulong) desc->maria_version,
1259 (ulong) desc->mysql_version,
1260 (ulong) desc->server_id,
1261 desc->page_size, (ulong) desc->file_number,
1262 LSN_IN_PARTS(desc->max_lsn)));
1263 DBUG_RETURN(0);
1264 }
1265
1266
1267 /*
1268 @brief set the lsn to the files from_file - to_file if it is greater
1269 then written in the file
1270
1271 @param from_file first file number (min)
1272 @param to_file last file number (max)
1273 @param lsn the lsn for writing
1274 @param is_locked true if current thread locked the log handler
1275
1276 @retval 0 OK
1277 @retval 1 Error
1278 */
1279
translog_set_lsn_for_files(uint32 from_file,uint32 to_file,LSN lsn,my_bool is_locked)1280 static my_bool translog_set_lsn_for_files(uint32 from_file, uint32 to_file,
1281 LSN lsn, my_bool is_locked)
1282 {
1283 uint32 file;
1284 DBUG_ENTER("translog_set_lsn_for_files");
1285 DBUG_PRINT("enter", ("From: %lu to: %lu lsn: " LSN_FMT " locked: %d",
1286 (ulong) from_file, (ulong) to_file,
1287 LSN_IN_PARTS(lsn),
1288 is_locked));
1289 DBUG_ASSERT(from_file <= to_file);
1290 DBUG_ASSERT(from_file > 0); /* we have not file 0 */
1291
1292 /* Checks the current file (not finished yet file) */
1293 if (!is_locked)
1294 translog_lock();
1295 if (to_file == (uint32) LSN_FILE_NO(log_descriptor.horizon))
1296 {
1297 if (likely(cmp_translog_addr(lsn, log_descriptor.max_lsn) > 0))
1298 log_descriptor.max_lsn= lsn;
1299 to_file--;
1300 }
1301 if (!is_locked)
1302 translog_unlock();
1303
1304 /* Checks finished files if they are */
1305 mysql_mutex_lock(&log_descriptor.file_header_lock);
1306 for (file= from_file; file <= to_file; file++)
1307 {
1308 LOGHANDLER_FILE_INFO info;
1309 File fd;
1310
1311 fd= open_logfile_by_number_no_cache(file);
1312 if ((fd < 0) ||
1313 ((translog_read_file_header(&info, fd) ||
1314 (cmp_translog_addr(lsn, info.max_lsn) > 0 &&
1315 translog_max_lsn_to_header(fd, lsn))) |
1316 mysql_file_close(fd, MYF(MY_WME))))
1317 {
1318 translog_stop_writing();
1319 mysql_mutex_unlock(&log_descriptor.file_header_lock);
1320 DBUG_RETURN(1);
1321 }
1322 }
1323 mysql_mutex_unlock(&log_descriptor.file_header_lock);
1324
1325 DBUG_RETURN(0);
1326 }
1327
1328
1329 /* descriptor of file in unfinished_files */
1330 struct st_file_counter
1331 {
1332 uint32 file; /* file number */
1333 uint32 counter; /* counter for started writes */
1334 };
1335
1336
1337 /*
1338 @brief mark file "in progress" (for multi-group records)
1339
1340 @param file log file number
1341 */
1342
translog_mark_file_unfinished(uint32 file)1343 static void translog_mark_file_unfinished(uint32 file)
1344 {
1345 int place, i;
1346 struct st_file_counter fc, *fc_ptr;
1347
1348 DBUG_ENTER("translog_mark_file_unfinished");
1349 DBUG_PRINT("enter", ("file: %lu", (ulong) file));
1350
1351 fc.file= file; fc.counter= 1;
1352 mysql_mutex_lock(&log_descriptor.unfinished_files_lock);
1353
1354 if (log_descriptor.unfinished_files.elements == 0)
1355 {
1356 insert_dynamic(&log_descriptor.unfinished_files, (uchar*) &fc);
1357 DBUG_PRINT("info", ("The first element inserted"));
1358 goto end;
1359 }
1360
1361 for (place= log_descriptor.unfinished_files.elements - 1;
1362 place >= 0;
1363 place--)
1364 {
1365 fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
1366 place, struct st_file_counter *);
1367 if (fc_ptr->file <= file)
1368 break;
1369 }
1370
1371 if (place >= 0 && fc_ptr->file == file)
1372 {
1373 fc_ptr->counter++;
1374 DBUG_PRINT("info", ("counter increased"));
1375 goto end;
1376 }
1377
1378 if (place == (int)log_descriptor.unfinished_files.elements)
1379 {
1380 insert_dynamic(&log_descriptor.unfinished_files, (uchar*) &fc);
1381 DBUG_PRINT("info", ("The last element inserted"));
1382 goto end;
1383 }
1384 /* shift and assign new element */
1385 insert_dynamic(&log_descriptor.unfinished_files,
1386 (uchar*)
1387 dynamic_element(&log_descriptor.unfinished_files,
1388 log_descriptor.unfinished_files.elements- 1,
1389 struct st_file_counter *));
1390 for(i= log_descriptor.unfinished_files.elements - 1; i > place; i--)
1391 {
1392 /* we do not use set_dynamic() to avoid unneeded checks */
1393 memcpy(dynamic_element(&log_descriptor.unfinished_files,
1394 i, struct st_file_counter *),
1395 dynamic_element(&log_descriptor.unfinished_files,
1396 i + 1, struct st_file_counter *),
1397 sizeof(struct st_file_counter));
1398 }
1399 memcpy(dynamic_element(&log_descriptor.unfinished_files,
1400 place + 1, struct st_file_counter *),
1401 &fc, sizeof(struct st_file_counter));
1402 end:
1403 mysql_mutex_unlock(&log_descriptor.unfinished_files_lock);
1404 DBUG_VOID_RETURN;
1405 }
1406
1407
1408 /*
1409 @brief remove file mark "in progress" (for multi-group records)
1410
1411 @param file log file number
1412 */
1413
translog_mark_file_finished(uint32 file)1414 static void translog_mark_file_finished(uint32 file)
1415 {
1416 int i;
1417 struct st_file_counter *UNINIT_VAR(fc_ptr);
1418 DBUG_ENTER("translog_mark_file_finished");
1419 DBUG_PRINT("enter", ("file: %lu", (ulong) file));
1420
1421 mysql_mutex_lock(&log_descriptor.unfinished_files_lock);
1422
1423 DBUG_ASSERT(log_descriptor.unfinished_files.elements > 0);
1424 for (i= 0;
1425 i < (int) log_descriptor.unfinished_files.elements;
1426 i++)
1427 {
1428 fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
1429 i, struct st_file_counter *);
1430 if (fc_ptr->file == file)
1431 {
1432 break;
1433 }
1434 }
1435 DBUG_ASSERT(i < (int) log_descriptor.unfinished_files.elements);
1436
1437 if (! --fc_ptr->counter)
1438 delete_dynamic_element(&log_descriptor.unfinished_files, i);
1439 mysql_mutex_unlock(&log_descriptor.unfinished_files_lock);
1440 DBUG_VOID_RETURN;
1441 }
1442
1443
1444 /*
1445 @brief get max LSN of the record which parts stored in this file
1446
1447 @param file file number
1448
1449 @return requested LSN or LSN_IMPOSSIBLE/LSN_ERROR
1450 @retval LSN_IMPOSSIBLE File is still not finished
1451 @retval LSN_ERROR Error opening file
1452 @retval # LSN of the record which parts stored in this file
1453 */
1454
translog_get_file_max_lsn_stored(uint32 file)1455 LSN translog_get_file_max_lsn_stored(uint32 file)
1456 {
1457 uint32 limit= FILENO_IMPOSSIBLE;
1458 DBUG_ENTER("translog_get_file_max_lsn_stored");
1459 DBUG_PRINT("enter", ("file: %lu", (ulong)file));
1460 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
1461 translog_status == TRANSLOG_READONLY);
1462
1463 mysql_mutex_lock(&log_descriptor.unfinished_files_lock);
1464
1465 /* find file with minimum file number "in progress" */
1466 if (log_descriptor.unfinished_files.elements > 0)
1467 {
1468 struct st_file_counter *fc_ptr;
1469 fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
1470 0, struct st_file_counter *);
1471 limit= fc_ptr->file; /* minimal file number "in progress" */
1472 }
1473 mysql_mutex_unlock(&log_descriptor.unfinished_files_lock);
1474
1475 /*
1476 if there is no "in progress file" then unfinished file is in progress
1477 for sure
1478 */
1479 if (limit == FILENO_IMPOSSIBLE)
1480 {
1481 TRANSLOG_ADDRESS horizon= translog_get_horizon();
1482 limit= LSN_FILE_NO(horizon);
1483 }
1484
1485 if (file >= limit)
1486 {
1487 DBUG_PRINT("info", ("The file in in progress"));
1488 DBUG_RETURN(LSN_IMPOSSIBLE);
1489 }
1490
1491 {
1492 LOGHANDLER_FILE_INFO info;
1493 File fd;
1494
1495 fd= open_logfile_by_number_no_cache(file);
1496 if(fd < 0)
1497 {
1498 DBUG_PRINT("error", ("Can't open file"));
1499 DBUG_RETURN(LSN_ERROR);
1500 }
1501
1502 if (translog_read_file_header(&info, fd))
1503 {
1504 DBUG_PRINT("error", ("Can't read file header"));
1505 info.max_lsn= LSN_ERROR;
1506 }
1507
1508 if (mysql_file_close(fd, MYF(MY_WME)))
1509 {
1510 DBUG_PRINT("error", ("Can't close file"));
1511 info.max_lsn= LSN_ERROR;
1512 }
1513
1514 DBUG_PRINT("info", ("Max lsn: " LSN_FMT, LSN_IN_PARTS(info.max_lsn)));
1515 DBUG_RETURN(info.max_lsn);
1516 }
1517 }
1518
1519 /*
1520 Initialize transaction log file buffer
1521
1522 SYNOPSIS
1523 translog_buffer_init()
1524 buffer The buffer to initialize
1525 num Number of this buffer
1526
1527 RETURN
1528 0 OK
1529 1 Error
1530 */
1531
translog_buffer_init(struct st_translog_buffer * buffer,int num)1532 static my_bool translog_buffer_init(struct st_translog_buffer *buffer, int num)
1533 {
1534 DBUG_ENTER("translog_buffer_init");
1535 buffer->pre_force_close_horizon=
1536 buffer->prev_last_lsn= buffer->last_lsn=
1537 LSN_IMPOSSIBLE;
1538 DBUG_PRINT("info", ("last_lsn and prev_last_lsn set to 0 buffer: %p",
1539 buffer));
1540
1541 buffer->buffer_no= (uint8) num;
1542 /* This Buffer File */
1543 buffer->file= NULL;
1544 buffer->overlay= 0;
1545 /* cache for current log */
1546 memset(buffer->buffer, TRANSLOG_FILLER, TRANSLOG_WRITE_BUFFER);
1547 /* Buffer size */
1548 buffer->size= 0;
1549 buffer->skipped_data= 0;
1550 /* cond of thread which is waiting for buffer filling */
1551 if (mysql_cond_init(key_TRANSLOG_BUFFER_waiting_filling_buffer,
1552 &buffer->waiting_filling_buffer, 0))
1553 DBUG_RETURN(1);
1554 /* Number of records which are in copy progress */
1555 buffer->copy_to_buffer_in_progress= 0;
1556 /* list of waiting buffer ready threads */
1557 buffer->waiting_flush= 0;
1558 /*
1559 Buffers locked by the following mutex. As far as buffers create logical
1560 circle (after last buffer goes first) it trigger false alarm of deadlock
1561 detect system, so we remove check of deadlock for this buffers. Indeed
1562 all mutex locks concentrated around current buffer except flushing
1563 thread (but it is only one thread). One thread can't take more then
1564 2 buffer locks at once. So deadlock is impossible here.
1565
1566 To prevent false alarm of dead lock detection we switch dead lock
1567 detection for one buffer in the middle of the buffers chain. Excluding
1568 only one of eight buffers from deadlock detection hardly can hide other
1569 possible problems which include this mutexes.
1570 */
1571
1572 if (mysql_mutex_init(key_TRANSLOG_BUFFER_mutex,
1573 &buffer->mutex, MY_MUTEX_INIT_FAST) ||
1574 mysql_cond_init(key_TRANSLOG_BUFFER_prev_sent_to_disk_cond,
1575 &buffer->prev_sent_to_disk_cond, 0))
1576 DBUG_RETURN(1);
1577 mysql_mutex_setflags(&buffer->mutex, MYF_NO_DEADLOCK_DETECTION);
1578 buffer->is_closing_buffer= 0;
1579 buffer->prev_sent_to_disk= LSN_IMPOSSIBLE;
1580 buffer->prev_buffer_offset= LSN_IMPOSSIBLE;
1581 buffer->ver= 0;
1582 DBUG_RETURN(0);
1583 }
1584
1585
1586 /*
1587 @brief close transaction log file by descriptor
1588
1589 @param file pagegecache file descriptor reference
1590
1591 @return Operation status
1592 @retval 0 OK
1593 @retval 1 Error
1594 */
1595
translog_close_log_file(TRANSLOG_FILE * file)1596 static my_bool translog_close_log_file(TRANSLOG_FILE *file)
1597 {
1598 int rc= 0;
1599 flush_pagecache_blocks(log_descriptor.pagecache, &file->handler,
1600 FLUSH_RELEASE);
1601 /*
1602 Sync file when we close it
1603 TODO: sync only we have changed the log
1604 */
1605 if (!file->is_sync)
1606 {
1607 rc= mysql_file_sync(file->handler.file, MYF(MY_WME));
1608 translog_syncs++;
1609 }
1610 rc|= mysql_file_close(file->handler.file, MYF(MY_WME));
1611 my_free(file);
1612 return MY_TEST(rc);
1613 }
1614
1615
1616 /**
1617 @brief Initializes TRANSLOG_FILE structure
1618
1619 @param file reference on the file to initialize
1620 @param number file number
1621 @param is_sync is file synced on disk
1622 */
1623
translog_file_init(TRANSLOG_FILE * file,uint32 number,my_bool is_sync)1624 static void translog_file_init(TRANSLOG_FILE *file, uint32 number,
1625 my_bool is_sync)
1626 {
1627 pagecache_file_set_null_hooks(&file->handler);
1628 file->handler.post_read_hook= translog_page_validator;
1629 file->handler.flush_log_callback= maria_flush_log_for_page_none;
1630 file->handler.callback_data= (uchar*)file;
1631
1632 file->number= number;
1633 file->was_recovered= 0;
1634 file->is_sync= is_sync;
1635 }
1636
1637
1638 /**
1639 @brief Create and fill header of new file.
1640
1641 @note the caller must call it right after it has increased
1642 log_descriptor.horizon to the new file
1643 (log_descriptor.horizon+= LSN_ONE_FILE)
1644
1645
1646 @retval 0 OK
1647 @retval 1 Error
1648 */
1649
translog_create_new_file()1650 static my_bool translog_create_new_file()
1651 {
1652 TRANSLOG_FILE *file= (TRANSLOG_FILE*)my_malloc(PSI_INSTRUMENT_ME, sizeof(TRANSLOG_FILE),
1653 MYF(0));
1654
1655 TRANSLOG_FILE *old= get_current_logfile();
1656 uint32 file_no= LSN_FILE_NO(log_descriptor.horizon);
1657 DBUG_ENTER("translog_create_new_file");
1658
1659 if (file == NULL)
1660 goto error;
1661
1662 /*
1663 Writes max_lsn to the file header before finishing it (there is no need
1664 to lock file header buffer because it is still unfinished file, so only
1665 one thread can finish the file and nobody interested of LSN of current
1666 (unfinished) file, because no one can purge it).
1667 */
1668 if (translog_max_lsn_to_header(old->handler.file, log_descriptor.max_lsn))
1669 goto error;
1670
1671 mysql_rwlock_wrlock(&log_descriptor.open_files_lock);
1672 DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
1673 log_descriptor.open_files.elements);
1674 DBUG_ASSERT(file_no == log_descriptor.max_file + 1);
1675 if (allocate_dynamic(&log_descriptor.open_files,
1676 log_descriptor.max_file - log_descriptor.min_file + 2))
1677 goto error_lock;
1678
1679 /* this call just expand the array */
1680 if (insert_dynamic(&log_descriptor.open_files, (uchar*)&file))
1681 goto error_lock;
1682
1683 if ((file->handler.file= create_logfile_by_number_no_cache(file_no)) == -1)
1684 goto error_lock;
1685 translog_file_init(file, file_no, 0);
1686
1687 log_descriptor.max_file++;
1688 {
1689 char *start= (char*) dynamic_element(&log_descriptor.open_files, 0,
1690 TRANSLOG_FILE**);
1691 memmove(start + sizeof(TRANSLOG_FILE*), start,
1692 sizeof(TRANSLOG_FILE*) *
1693 (log_descriptor.max_file - log_descriptor.min_file + 1 - 1));
1694 }
1695 /* can't fail we because we expanded array */
1696 set_dynamic(&log_descriptor.open_files, (uchar*)&file, 0);
1697 DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
1698 log_descriptor.open_files.elements);
1699 mysql_rwlock_unlock(&log_descriptor.open_files_lock);
1700
1701 DBUG_PRINT("info", ("file_no: %lu", (ulong)file_no));
1702
1703 if (translog_write_file_header())
1704 goto error;
1705
1706 if (ma_control_file_write_and_force(last_checkpoint_lsn, file_no,
1707 max_trid_in_control_file,
1708 recovery_failures))
1709 goto error;
1710
1711 DBUG_RETURN(0);
1712
1713 error_lock:
1714 mysql_rwlock_unlock(&log_descriptor.open_files_lock);
1715 error:
1716 translog_stop_writing();
1717 my_free(file);
1718 DBUG_RETURN(1);
1719 }
1720
1721
1722 /**
1723 @brief Locks the loghandler buffer.
1724
1725 @param buffer This buffer which should be locked
1726
1727 @note See comment before buffer 'mutex' variable.
1728
1729 @retval 0 OK
1730 @retval 1 Error
1731 */
1732
translog_buffer_lock(struct st_translog_buffer * buffer)1733 static void translog_buffer_lock(struct st_translog_buffer *buffer)
1734 {
1735 DBUG_ENTER("translog_buffer_lock");
1736 DBUG_PRINT("enter",
1737 ("Lock buffer #%u: %p", buffer->buffer_no,
1738 buffer));
1739 mysql_mutex_lock(&buffer->mutex);
1740 DBUG_VOID_RETURN;
1741 }
1742
1743
1744 /*
1745 Unlock the loghandler buffer
1746
1747 SYNOPSIS
1748 translog_buffer_unlock()
1749 buffer This buffer which should be unlocked
1750 */
1751
translog_buffer_unlock(struct st_translog_buffer * buffer)1752 static void translog_buffer_unlock(struct st_translog_buffer *buffer)
1753 {
1754 DBUG_ENTER("translog_buffer_unlock");
1755 DBUG_PRINT("enter", ("Unlock buffer... #%u (%p)",
1756 (uint) buffer->buffer_no, buffer));
1757
1758 mysql_mutex_unlock(&buffer->mutex);
1759 DBUG_VOID_RETURN;
1760 }
1761
1762
1763 /*
1764 Write a header on the page
1765
1766 SYNOPSIS
1767 translog_new_page_header()
1768 horizon Where to write the page
1769 cursor Where to write the page
1770
1771 NOTE
1772 - space for page header should be checked before
1773 */
1774
1775 static uchar translog_sector_random;
1776
translog_new_page_header(TRANSLOG_ADDRESS * horizon,struct st_buffer_cursor * cursor)1777 static void translog_new_page_header(TRANSLOG_ADDRESS *horizon,
1778 struct st_buffer_cursor *cursor)
1779 {
1780 uchar *ptr;
1781
1782 DBUG_ENTER("translog_new_page_header");
1783 DBUG_ASSERT(cursor->ptr);
1784
1785 cursor->protected= 0;
1786
1787 ptr= cursor->ptr;
1788 /* Page number */
1789 int3store(ptr, LSN_OFFSET(*horizon) / TRANSLOG_PAGE_SIZE);
1790 ptr+= 3;
1791 /* File number */
1792 int3store(ptr, LSN_FILE_NO(*horizon));
1793 ptr+= 3;
1794 DBUG_ASSERT(TRANSLOG_PAGE_FLAGS == (ptr - cursor->ptr));
1795 cursor->ptr[TRANSLOG_PAGE_FLAGS]= (uchar) log_descriptor.flags;
1796 ptr++;
1797 if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
1798 {
1799 #ifndef DBUG_OFF
1800 DBUG_PRINT("info", ("write 0x11223344 CRC to " LSN_FMT,
1801 LSN_IN_PARTS(*horizon)));
1802 /* This will be overwritten by real CRC; This is just for debugging */
1803 int4store(ptr, 0x11223344);
1804 #endif
1805 /* CRC will be put when page is finished */
1806 ptr+= CRC_SIZE;
1807 }
1808 if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
1809 {
1810 /*
1811 translog_sector_randmo works like "random" values producer because
1812 it is enough to have such "random" for this purpose and it will
1813 not interfere with higher level pseudo random value generator
1814 */
1815 ptr[0]= translog_sector_random++;
1816 ptr+= TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
1817 }
1818 {
1819 size_t len= (ptr - cursor->ptr);
1820 (*horizon)+= len; /* increasing the offset part of the address */
1821 cursor->current_page_fill= (uint16)len;
1822 if (!cursor->chaser)
1823 cursor->buffer->size+= (translog_size_t)len;
1824 }
1825 cursor->ptr= ptr;
1826 DBUG_PRINT("info", ("NewP buffer #%u: %p chaser: %d Size: %lu (%lu) "
1827 "Horizon: " LSN_FMT,
1828 (uint) cursor->buffer->buffer_no, cursor->buffer,
1829 cursor->chaser, (ulong) cursor->buffer->size,
1830 (ulong) (cursor->ptr - cursor->buffer->buffer),
1831 LSN_IN_PARTS(*horizon)));
1832 translog_check_cursor(cursor);
1833 DBUG_VOID_RETURN;
1834 }
1835
1836
1837 /*
1838 Put sector protection on the page image
1839
1840 SYNOPSIS
1841 translog_put_sector_protection()
1842 page reference on the page content
1843 cursor cursor of the buffer
1844
1845 NOTES
1846 We put a sector protection on all following sectors on the page,
1847 except the first sector that is protected by page header.
1848 */
1849
translog_put_sector_protection(uchar * page,struct st_buffer_cursor * cursor)1850 static void translog_put_sector_protection(uchar *page,
1851 struct st_buffer_cursor *cursor)
1852 {
1853 uchar *table= page + log_descriptor.page_overhead -
1854 TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
1855 uint i, offset;
1856 uint16 last_protected_sector= ((cursor->previous_offset - 1) /
1857 DISK_DRIVE_SECTOR_SIZE);
1858 uint16 start_sector= cursor->previous_offset / DISK_DRIVE_SECTOR_SIZE;
1859 uint8 value= table[0] + cursor->write_counter;
1860 DBUG_ENTER("translog_put_sector_protection");
1861
1862 if (start_sector == 0)
1863 {
1864 /* First sector is protected by file & page numbers in the page header. */
1865 start_sector= 1;
1866 }
1867
1868 DBUG_PRINT("enter", ("Write counter:%u value:%u offset:%u, "
1869 "last protected:%u start sector:%u",
1870 (uint) cursor->write_counter,
1871 (uint) value,
1872 (uint) cursor->previous_offset,
1873 (uint) last_protected_sector, (uint) start_sector));
1874 if (last_protected_sector == start_sector)
1875 {
1876 i= last_protected_sector;
1877 offset= last_protected_sector * DISK_DRIVE_SECTOR_SIZE;
1878 /* restore data, because we modified sector which was protected */
1879 if (offset < cursor->previous_offset)
1880 page[offset]= table[i];
1881 }
1882 for (i= start_sector, offset= start_sector * DISK_DRIVE_SECTOR_SIZE;
1883 i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
1884 i++, (offset+= DISK_DRIVE_SECTOR_SIZE))
1885 {
1886 DBUG_PRINT("info", ("sector:%u offset:%u data 0x%x",
1887 i, offset, (uint) page[offset]));
1888 table[i]= page[offset];
1889 page[offset]= value;
1890 DBUG_PRINT("info", ("sector:%u offset:%u data 0x%x",
1891 i, offset, (uint) page[offset]));
1892 }
1893 DBUG_VOID_RETURN;
1894 }
1895
1896
1897 /*
1898 Calculate CRC32 of given area
1899
1900 SYNOPSIS
1901 translog_crc()
1902 area Pointer of the area beginning
1903 length The Area length
1904
1905 RETURN
1906 CRC32
1907 */
1908
translog_crc(uchar * area,uint length)1909 static uint32 translog_crc(uchar *area, uint length)
1910 {
1911 DBUG_ENTER("translog_crc");
1912 DBUG_RETURN(my_checksum(0L, area, length));
1913 }
1914
1915
1916 /*
1917 Finish current page with zeros
1918
1919 SYNOPSIS
1920 translog_finish_page()
1921 horizon \ horizon & buffer pointers
1922 cursor /
1923 */
1924
translog_finish_page(TRANSLOG_ADDRESS * horizon,struct st_buffer_cursor * cursor)1925 static void translog_finish_page(TRANSLOG_ADDRESS *horizon,
1926 struct st_buffer_cursor *cursor)
1927 {
1928 uint16 left= TRANSLOG_PAGE_SIZE - cursor->current_page_fill;
1929 uchar *page= cursor->ptr - cursor->current_page_fill;
1930 DBUG_ENTER("translog_finish_page");
1931 DBUG_PRINT("enter", ("Buffer: #%u %p "
1932 "Buffer addr: " LSN_FMT " "
1933 "Page addr: " LSN_FMT " "
1934 "size:%u (%u) Pg:%u left:%u",
1935 (uint) cursor->buffer_no, cursor->buffer,
1936 LSN_IN_PARTS(cursor->buffer->offset),
1937 (uint)LSN_FILE_NO(*horizon),
1938 (uint)(LSN_OFFSET(*horizon) -
1939 cursor->current_page_fill),
1940 (uint) cursor->buffer->size,
1941 (uint) (cursor->ptr -cursor->buffer->buffer),
1942 (uint) cursor->current_page_fill, (uint) left));
1943 DBUG_ASSERT(LSN_FILE_NO(*horizon) == LSN_FILE_NO(cursor->buffer->offset)
1944 || translog_status == TRANSLOG_UNINITED);
1945 if ((LSN_FILE_NO(*horizon) != LSN_FILE_NO(cursor->buffer->offset)))
1946 DBUG_VOID_RETURN; // everything wrong do not write to awoid more problems
1947 translog_check_cursor(cursor);
1948 if (cursor->protected)
1949 {
1950 DBUG_PRINT("info", ("Already protected and finished"));
1951 DBUG_VOID_RETURN;
1952 }
1953 cursor->protected= 1;
1954
1955 DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
1956 if (left != 0)
1957 {
1958 DBUG_PRINT("info", ("left: %u", (uint) left));
1959 memset(cursor->ptr, TRANSLOG_FILLER, left);
1960 cursor->ptr+= left;
1961 (*horizon)+= left; /* offset increasing */
1962 if (!cursor->chaser)
1963 cursor->buffer->size+= left;
1964 /* We are finishing the page so reset the counter */
1965 cursor->current_page_fill= 0;
1966 DBUG_PRINT("info", ("Finish Page buffer #%u: %p "
1967 "chaser: %d Size: %lu (%lu)",
1968 (uint) cursor->buffer->buffer_no,
1969 cursor->buffer, cursor->chaser,
1970 (ulong) cursor->buffer->size,
1971 (ulong) (cursor->ptr - cursor->buffer->buffer)));
1972 translog_check_cursor(cursor);
1973 }
1974 /*
1975 When we are finishing the page other thread might not finish the page
1976 header yet (in case if we started from the middle of the page) so we
1977 have to read log_descriptor.flags but not the flags from the page.
1978 */
1979 if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
1980 {
1981 translog_put_sector_protection(page, cursor);
1982 DBUG_PRINT("info", ("drop write_counter"));
1983 cursor->write_counter= 0;
1984 cursor->previous_offset= 0;
1985 }
1986 if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
1987 {
1988 uint32 crc= translog_crc(page + log_descriptor.page_overhead,
1989 TRANSLOG_PAGE_SIZE -
1990 log_descriptor.page_overhead);
1991 DBUG_PRINT("info", ("CRC: %lx", (ulong) crc));
1992 /* We have page number, file number and flag before crc */
1993 int4store(page + 3 + 3 + 1, crc);
1994 }
1995 DBUG_VOID_RETURN;
1996 }
1997
1998
1999 /*
2000 @brief Wait until all threads have finished closing this buffer.
2001
2002 @param buffer This buffer should be check
2003 */
2004
translog_wait_for_closing(struct st_translog_buffer * buffer)2005 static void translog_wait_for_closing(struct st_translog_buffer *buffer)
2006 {
2007 DBUG_ENTER("translog_wait_for_closing");
2008 DBUG_PRINT("enter", ("Buffer #%u %p copies in progress: %u "
2009 "is closing %u File: %d size: %lu",
2010 (uint) buffer->buffer_no, buffer,
2011 (uint) buffer->copy_to_buffer_in_progress,
2012 (uint) buffer->is_closing_buffer,
2013 (buffer->file ? buffer->file->handler.file : -1),
2014 (ulong) buffer->size));
2015 translog_buffer_lock_assert_owner(buffer);
2016
2017 while (buffer->is_closing_buffer)
2018 {
2019 DBUG_PRINT("info", ("wait for writers... buffer: #%u %p",
2020 (uint) buffer->buffer_no, buffer));
2021 DBUG_ASSERT(buffer->file != NULL);
2022 mysql_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
2023 DBUG_PRINT("info", ("wait for writers done buffer: #%u %p",
2024 (uint) buffer->buffer_no, buffer));
2025 }
2026
2027 DBUG_VOID_RETURN;
2028 }
2029
2030
2031 /*
2032 @brief Wait until all threads have finished filling this buffer.
2033
2034 @param buffer This buffer should be check
2035 */
2036
translog_wait_for_writers(struct st_translog_buffer * buffer)2037 static void translog_wait_for_writers(struct st_translog_buffer *buffer)
2038 {
2039 DBUG_ENTER("translog_wait_for_writers");
2040 DBUG_PRINT("enter", ("Buffer #%u %p copies in progress: %u "
2041 "is closing %u File: %d size: %lu",
2042 (uint) buffer->buffer_no, buffer,
2043 (uint) buffer->copy_to_buffer_in_progress,
2044 (uint) buffer->is_closing_buffer,
2045 (buffer->file ? buffer->file->handler.file : -1),
2046 (ulong) buffer->size));
2047 translog_buffer_lock_assert_owner(buffer);
2048
2049 while (buffer->copy_to_buffer_in_progress)
2050 {
2051 DBUG_PRINT("info", ("wait for writers... buffer: #%u %p",
2052 (uint) buffer->buffer_no, buffer));
2053 DBUG_ASSERT(buffer->file != NULL);
2054 mysql_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
2055 DBUG_PRINT("info", ("wait for writers done buffer: #%u %p",
2056 (uint) buffer->buffer_no, buffer));
2057 }
2058
2059 DBUG_VOID_RETURN;
2060 }
2061
2062
2063 /*
2064
2065 Wait for buffer to become free
2066
2067 SYNOPSIS
2068 translog_wait_for_buffer_free()
2069 buffer The buffer we are waiting for
2070
2071 NOTE
2072 - this buffer should be locked
2073 */
2074
translog_wait_for_buffer_free(struct st_translog_buffer * buffer)2075 static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer)
2076 {
2077 TRANSLOG_ADDRESS offset= buffer->offset;
2078 TRANSLOG_FILE *file= buffer->file;
2079 uint8 ver= buffer->ver;
2080 DBUG_ENTER("translog_wait_for_buffer_free");
2081 DBUG_PRINT("enter", ("Buffer #%u %p copies in progress: %u "
2082 "is closing %u File: %d size: %lu",
2083 (uint) buffer->buffer_no, buffer,
2084 (uint) buffer->copy_to_buffer_in_progress,
2085 (uint) buffer->is_closing_buffer,
2086 (buffer->file ? buffer->file->handler.file : -1),
2087 (ulong) buffer->size));
2088
2089 translog_wait_for_writers(buffer);
2090
2091 if (offset != buffer->offset || file != buffer->file || ver != buffer->ver)
2092 DBUG_VOID_RETURN; /* the buffer if already freed */
2093
2094 while (buffer->file != NULL)
2095 {
2096 DBUG_PRINT("info", ("wait for writers... buffer: #%u %p",
2097 (uint) buffer->buffer_no, buffer));
2098 mysql_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
2099 DBUG_PRINT("info", ("wait for writers done. buffer: #%u %p",
2100 (uint) buffer->buffer_no, buffer));
2101 }
2102 DBUG_ASSERT(buffer->copy_to_buffer_in_progress == 0);
2103 DBUG_VOID_RETURN;
2104 }
2105
2106
2107 /*
2108 Initialize the cursor for a buffer
2109
2110 SYNOPSIS
2111 translog_cursor_init()
2112 buffer The buffer
2113 cursor It's cursor
2114 buffer_no Number of buffer
2115 */
2116
translog_cursor_init(struct st_buffer_cursor * cursor,struct st_translog_buffer * buffer,uint8 buffer_no)2117 static void translog_cursor_init(struct st_buffer_cursor *cursor,
2118 struct st_translog_buffer *buffer,
2119 uint8 buffer_no)
2120 {
2121 DBUG_ENTER("translog_cursor_init");
2122 cursor->ptr= buffer->buffer;
2123 cursor->buffer= buffer;
2124 cursor->buffer_no= buffer_no;
2125 cursor->current_page_fill= 0;
2126 cursor->chaser= (cursor != &log_descriptor.bc);
2127 cursor->write_counter= 0;
2128 cursor->previous_offset= 0;
2129 cursor->protected= 0;
2130 DBUG_VOID_RETURN;
2131 }
2132
2133
2134 /*
2135 @brief Initialize buffer for the current file, and a cursor for this buffer.
2136
2137 @param buffer The buffer
2138 @param cursor It's cursor
2139 @param buffer_no Number of buffer
2140 */
2141
translog_start_buffer(struct st_translog_buffer * buffer,struct st_buffer_cursor * cursor,uint buffer_no)2142 static void translog_start_buffer(struct st_translog_buffer *buffer,
2143 struct st_buffer_cursor *cursor,
2144 uint buffer_no)
2145 {
2146 DBUG_ENTER("translog_start_buffer");
2147 DBUG_PRINT("enter",
2148 ("Assign buffer: #%u (%p) offset: 0x%x(%u)",
2149 (uint) buffer->buffer_no, buffer,
2150 (uint) LSN_OFFSET(log_descriptor.horizon),
2151 (uint) LSN_OFFSET(log_descriptor.horizon)));
2152 DBUG_ASSERT(buffer_no == buffer->buffer_no);
2153 buffer->pre_force_close_horizon=
2154 buffer->prev_last_lsn= buffer->last_lsn= LSN_IMPOSSIBLE;
2155 DBUG_PRINT("info", ("last_lsn and prev_last_lsn set to 0 buffer: %p",
2156 buffer));
2157 buffer->offset= log_descriptor.horizon;
2158 buffer->next_buffer_offset= LSN_IMPOSSIBLE;
2159 buffer->file= get_current_logfile();
2160 buffer->overlay= 0;
2161 buffer->size= 0;
2162 buffer->skipped_data= 0;
2163 translog_cursor_init(cursor, buffer, buffer_no);
2164 DBUG_PRINT("info", ("file: #%ld (%d) init cursor #%u: %p "
2165 "chaser: %d Size: %lu (%lu)",
2166 (long) (buffer->file ? buffer->file->number : 0),
2167 (buffer->file ? buffer->file->handler.file : -1),
2168 (uint) cursor->buffer->buffer_no, cursor->buffer,
2169 cursor->chaser, (ulong) cursor->buffer->size,
2170 (ulong) (cursor->ptr - cursor->buffer->buffer)));
2171 translog_check_cursor(cursor);
2172 mysql_mutex_lock(&log_descriptor.dirty_buffer_mask_lock);
2173 log_descriptor.dirty_buffer_mask|= (1 << buffer->buffer_no);
2174 mysql_mutex_unlock(&log_descriptor.dirty_buffer_mask_lock);
2175
2176 DBUG_VOID_RETURN;
2177 }
2178
2179
2180 /*
2181 @brief Switch to the next buffer in a chain.
2182
2183 @param horizon \ Pointers on current position in file and buffer
2184 @param cursor /
2185 @param new_file Also start new file
2186
2187 @note
2188 - loghandler should be locked
2189 - after return new and old buffer still are locked
2190
2191 @retval 0 OK
2192 @retval 1 Error
2193 */
2194
translog_buffer_next(TRANSLOG_ADDRESS * horizon,struct st_buffer_cursor * cursor,my_bool new_file)2195 static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon,
2196 struct st_buffer_cursor *cursor,
2197 my_bool new_file)
2198 {
2199 uint old_buffer_no= cursor->buffer_no;
2200 uint new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
2201 struct st_translog_buffer *new_buffer= log_descriptor.buffers + new_buffer_no;
2202 my_bool chasing= cursor->chaser;
2203 DBUG_ENTER("translog_buffer_next");
2204
2205 DBUG_PRINT("info", ("horizon: " LSN_FMT " chasing: %d",
2206 LSN_IN_PARTS(log_descriptor.horizon), chasing));
2207
2208 DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, *horizon) >= 0);
2209
2210 translog_finish_page(horizon, cursor);
2211
2212 if (!chasing)
2213 {
2214 translog_buffer_lock(new_buffer);
2215 #ifndef DBUG_OFF
2216 {
2217 TRANSLOG_ADDRESS offset= new_buffer->offset;
2218 TRANSLOG_FILE *file= new_buffer->file;
2219 uint8 ver= new_buffer->ver;
2220 translog_lock_assert_owner();
2221 #endif
2222 translog_wait_for_buffer_free(new_buffer);
2223 #ifndef DBUG_OFF
2224 /* We keep the handler locked so nobody can start this new buffer */
2225 DBUG_ASSERT(offset == new_buffer->offset && new_buffer->file == NULL &&
2226 (file == NULL ? ver : (uint8)(ver + 1)) == new_buffer->ver);
2227 }
2228 #endif
2229 }
2230 else
2231 DBUG_ASSERT(new_buffer->file != NULL);
2232
2233 if (new_file)
2234 {
2235 /* move the horizon to the next file and its header page */
2236 (*horizon)+= LSN_ONE_FILE;
2237 (*horizon)= LSN_REPLACE_OFFSET(*horizon, TRANSLOG_PAGE_SIZE);
2238 if (!chasing && translog_create_new_file())
2239 {
2240 DBUG_RETURN(1);
2241 }
2242 }
2243
2244 /* prepare next page */
2245 if (chasing)
2246 translog_cursor_init(cursor, new_buffer, new_buffer_no);
2247 else
2248 {
2249 translog_lock_assert_owner();
2250 translog_start_buffer(new_buffer, cursor, new_buffer_no);
2251 new_buffer->prev_buffer_offset=
2252 log_descriptor.buffers[old_buffer_no].offset;
2253 new_buffer->prev_last_lsn=
2254 BUFFER_MAX_LSN(log_descriptor.buffers + old_buffer_no);
2255 }
2256 log_descriptor.buffers[old_buffer_no].next_buffer_offset= new_buffer->offset;
2257 DBUG_PRINT("info", ("prev_last_lsn set to " LSN_FMT " buffer:%p",
2258 LSN_IN_PARTS(new_buffer->prev_last_lsn),
2259 new_buffer));
2260 translog_new_page_header(horizon, cursor);
2261 DBUG_RETURN(0);
2262 }
2263
2264
2265 /*
2266 Sets max LSN sent to file, and address from which data is only in the buffer
2267
2268 SYNOPSIS
2269 translog_set_sent_to_disk()
2270 buffer buffer which we have sent to disk
2271
2272 TODO: use atomic operations if possible (64bit architectures?)
2273 */
2274
translog_set_sent_to_disk(struct st_translog_buffer * buffer)2275 static void translog_set_sent_to_disk(struct st_translog_buffer *buffer)
2276 {
2277 LSN lsn= buffer->last_lsn;
2278 TRANSLOG_ADDRESS in_buffers= buffer->next_buffer_offset;
2279
2280 DBUG_ENTER("translog_set_sent_to_disk");
2281 mysql_mutex_lock(&log_descriptor.sent_to_disk_lock);
2282 DBUG_PRINT("enter", ("lsn: " LSN_FMT " in_buffers: " LSN_FMT " "
2283 "in_buffers_only: " LSN_FMT " start: " LSN_FMT " "
2284 "sent_to_disk: " LSN_FMT,
2285 LSN_IN_PARTS(lsn),
2286 LSN_IN_PARTS(in_buffers),
2287 LSN_IN_PARTS(log_descriptor.log_start),
2288 LSN_IN_PARTS(log_descriptor.in_buffers_only),
2289 LSN_IN_PARTS(log_descriptor.sent_to_disk)));
2290 /*
2291 We write sequentially (first part of following assert) but we rewrite
2292 the same page in case we started mysql and shut it down immediately
2293 (second part of the following assert)
2294 */
2295 DBUG_ASSERT(cmp_translog_addr(lsn, log_descriptor.sent_to_disk) >= 0 ||
2296 cmp_translog_addr(lsn, log_descriptor.log_start) < 0);
2297 log_descriptor.sent_to_disk= lsn;
2298 /* LSN_IMPOSSIBLE == 0 => it will work for very first time */
2299 if (cmp_translog_addr(in_buffers, log_descriptor.in_buffers_only) > 0)
2300 {
2301 log_descriptor.in_buffers_only= in_buffers;
2302 DBUG_PRINT("info", ("set new in_buffers_only"));
2303 }
2304 mysql_mutex_unlock(&log_descriptor.sent_to_disk_lock);
2305 DBUG_VOID_RETURN;
2306 }
2307
2308
2309 /*
2310 Sets address from which data is only in the buffer
2311
2312 SYNOPSIS
2313 translog_set_only_in_buffers()
2314 lsn LSN to assign
2315 in_buffers to assign to in_buffers_only
2316 */
2317
translog_set_only_in_buffers(TRANSLOG_ADDRESS in_buffers)2318 static void translog_set_only_in_buffers(TRANSLOG_ADDRESS in_buffers)
2319 {
2320 DBUG_ENTER("translog_set_only_in_buffers");
2321 mysql_mutex_lock(&log_descriptor.sent_to_disk_lock);
2322 DBUG_PRINT("enter", ("in_buffers: " LSN_FMT " "
2323 "in_buffers_only: " LSN_FMT,
2324 LSN_IN_PARTS(in_buffers),
2325 LSN_IN_PARTS(log_descriptor.in_buffers_only)));
2326 /* LSN_IMPOSSIBLE == 0 => it will work for very first time */
2327 if (cmp_translog_addr(in_buffers, log_descriptor.in_buffers_only) > 0)
2328 {
2329 if (translog_status != TRANSLOG_OK)
2330 goto end;
2331 log_descriptor.in_buffers_only= in_buffers;
2332 DBUG_PRINT("info", ("set new in_buffers_only"));
2333 }
2334 end:
2335 mysql_mutex_unlock(&log_descriptor.sent_to_disk_lock);
2336 DBUG_VOID_RETURN;
2337 }
2338
2339
2340 /*
2341 Gets address from which data is only in the buffer
2342
2343 SYNOPSIS
2344 translog_only_in_buffers()
2345
2346 RETURN
2347 address from which data is only in the buffer
2348 */
2349
translog_only_in_buffers()2350 static TRANSLOG_ADDRESS translog_only_in_buffers()
2351 {
2352 register TRANSLOG_ADDRESS addr;
2353 DBUG_ENTER("translog_only_in_buffers");
2354 mysql_mutex_lock(&log_descriptor.sent_to_disk_lock);
2355 addr= log_descriptor.in_buffers_only;
2356 mysql_mutex_unlock(&log_descriptor.sent_to_disk_lock);
2357 DBUG_RETURN(addr);
2358 }
2359
2360
2361 /*
2362 Get max LSN sent to file
2363
2364 SYNOPSIS
2365 translog_get_sent_to_disk()
2366
2367 RETURN
2368 max LSN send to file
2369 */
2370
translog_get_sent_to_disk()2371 static LSN translog_get_sent_to_disk()
2372 {
2373 register LSN lsn;
2374 DBUG_ENTER("translog_get_sent_to_disk");
2375 mysql_mutex_lock(&log_descriptor.sent_to_disk_lock);
2376 lsn= log_descriptor.sent_to_disk;
2377 DBUG_PRINT("info", ("sent to disk up to " LSN_FMT, LSN_IN_PARTS(lsn)));
2378 mysql_mutex_unlock(&log_descriptor.sent_to_disk_lock);
2379 DBUG_RETURN(lsn);
2380 }
2381
2382
2383 /*
2384 Get first chunk address on the given page
2385
2386 SYNOPSIS
2387 translog_get_first_chunk_offset()
2388 page The page where to find first chunk
2389
2390 RETURN
2391 first chunk offset
2392 */
2393
translog_get_first_chunk_offset(uchar * page)2394 static my_bool translog_get_first_chunk_offset(uchar *page)
2395 {
2396 DBUG_ENTER("translog_get_first_chunk_offset");
2397 DBUG_ASSERT(page[TRANSLOG_PAGE_FLAGS] < TRANSLOG_FLAGS_NUM);
2398 DBUG_RETURN(page_overhead[page[TRANSLOG_PAGE_FLAGS]]);
2399 }
2400
2401
2402 /*
2403 Write coded length of record
2404
2405 SYNOPSIS
2406 translog_write_variable_record_1group_code_len
2407 dst Destination buffer pointer
2408 length Length which should be coded
2409 header_len Calculated total header length
2410 */
2411
2412 static void
translog_write_variable_record_1group_code_len(uchar * dst,translog_size_t length,uint16 header_len)2413 translog_write_variable_record_1group_code_len(uchar *dst,
2414 translog_size_t length,
2415 uint16 header_len)
2416 {
2417 switch (header_len) {
2418 case 6: /* (5 + 1) */
2419 DBUG_ASSERT(length <= 250);
2420 *dst= (uint8) length;
2421 return;
2422 case 8: /* (5 + 3) */
2423 DBUG_ASSERT(length <= 0xFFFF);
2424 *dst= 251;
2425 int2store(dst + 1, length);
2426 return;
2427 case 9: /* (5 + 4) */
2428 DBUG_ASSERT(length <= (ulong) 0xFFFFFF);
2429 *dst= 252;
2430 int3store(dst + 1, length);
2431 return;
2432 case 10: /* (5 + 5) */
2433 *dst= 253;
2434 int4store(dst + 1, length);
2435 return;
2436 default:
2437 DBUG_ASSERT(0);
2438 }
2439 return;
2440 }
2441
2442
2443 /*
2444 Decode record data length and advance given pointer to the next field
2445
2446 SYNOPSIS
2447 translog_variable_record_1group_decode_len()
2448 src The pointer to the pointer to the length beginning
2449
2450 RETURN
2451 decoded length
2452 */
2453
translog_variable_record_1group_decode_len(uchar ** src)2454 static translog_size_t translog_variable_record_1group_decode_len(uchar **src)
2455 {
2456 uint8 first= (uint8) (**src);
2457 switch (first) {
2458 case 251:
2459 (*src)+= 3;
2460 return (uint2korr((*src) - 2));
2461 case 252:
2462 (*src)+= 4;
2463 return (uint3korr((*src) - 3));
2464 case 253:
2465 (*src)+= 5;
2466 return (uint4korr((*src) - 4));
2467 case 254:
2468 case 255:
2469 DBUG_ASSERT(0); /* reserved for future use */
2470 return (0);
2471 default:
2472 (*src)++;
2473 return (first);
2474 }
2475 }
2476
2477
2478 /*
2479 Get total length of this chunk (not only body)
2480
2481 SYNOPSIS
2482 translog_get_total_chunk_length()
2483 page The page where chunk placed
2484 offset Offset of the chunk on this place
2485
2486 RETURN
2487 total length of the chunk
2488 */
2489
translog_get_total_chunk_length(uchar * page,uint16 offset)2490 static uint16 translog_get_total_chunk_length(uchar *page, uint16 offset)
2491 {
2492 DBUG_ENTER("translog_get_total_chunk_length");
2493 switch (page[offset] & TRANSLOG_CHUNK_TYPE) {
2494 case TRANSLOG_CHUNK_LSN:
2495 {
2496 /* 0 chunk referred as LSN (head or tail) */
2497 translog_size_t rec_len;
2498 uchar *start= page + offset;
2499 uchar *ptr= start + 1 + 2; /* chunk type and short trid */
2500 uint16 chunk_len, header_len, page_rest;
2501 DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
2502 rec_len= translog_variable_record_1group_decode_len(&ptr);
2503 chunk_len= uint2korr(ptr);
2504 header_len= (uint16) (ptr -start) + 2;
2505 DBUG_PRINT("info", ("rec len: %lu chunk len: %u header len: %u",
2506 (ulong) rec_len, (uint) chunk_len, (uint) header_len));
2507 if (chunk_len)
2508 {
2509 DBUG_PRINT("info", ("chunk len: %u + %u = %u",
2510 (uint) header_len, (uint) chunk_len,
2511 (uint) (chunk_len + header_len)));
2512 DBUG_RETURN(chunk_len + header_len);
2513 }
2514 page_rest= TRANSLOG_PAGE_SIZE - offset;
2515 DBUG_PRINT("info", ("page_rest %u", (uint) page_rest));
2516 if (rec_len + header_len < page_rest)
2517 DBUG_RETURN(rec_len + header_len);
2518 DBUG_RETURN(page_rest);
2519 }
2520 case TRANSLOG_CHUNK_FIXED:
2521 {
2522 uchar *ptr;
2523 uint type= page[offset] & TRANSLOG_REC_TYPE;
2524 uint length;
2525 int i;
2526 /* 1 (pseudo)fixed record (also LSN) */
2527 DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED"));
2528 DBUG_ASSERT(log_record_type_descriptor[type].rclass ==
2529 LOGRECTYPE_FIXEDLENGTH ||
2530 log_record_type_descriptor[type].rclass ==
2531 LOGRECTYPE_PSEUDOFIXEDLENGTH);
2532 if (log_record_type_descriptor[type].rclass == LOGRECTYPE_FIXEDLENGTH)
2533 {
2534 DBUG_PRINT("info",
2535 ("Fixed length: %u",
2536 (uint) (log_record_type_descriptor[type].fixed_length + 3)));
2537 DBUG_RETURN(log_record_type_descriptor[type].fixed_length + 3);
2538 }
2539
2540 ptr= page + offset + 3; /* first compressed LSN */
2541 length= log_record_type_descriptor[type].fixed_length + 3;
2542 for (i= 0; i < log_record_type_descriptor[type].compressed_LSN; i++)
2543 {
2544 /* first 2 bits is length - 2 */
2545 uint len= (((uint8) (*ptr)) >> 6) + 2;
2546 if (ptr[0] == 0 && ((uint8) ptr[1]) == 1)
2547 len+= LSN_STORE_SIZE; /* case of full LSN storing */
2548 ptr+= len;
2549 /* subtract saved bytes */
2550 length-= (LSN_STORE_SIZE - len);
2551 }
2552 DBUG_PRINT("info", ("Pseudo-fixed length: %u", length));
2553 DBUG_RETURN(length);
2554 }
2555 case TRANSLOG_CHUNK_NOHDR:
2556 /* 2 no header chunk (till page end) */
2557 DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR length: %u",
2558 (uint) (TRANSLOG_PAGE_SIZE - offset)));
2559 DBUG_RETURN(TRANSLOG_PAGE_SIZE - offset);
2560 case TRANSLOG_CHUNK_LNGTH: /* 3 chunk with chunk length */
2561 DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH"));
2562 DBUG_ASSERT(TRANSLOG_PAGE_SIZE - offset >= 3);
2563 DBUG_PRINT("info", ("length: %u", uint2korr(page + offset + 1) + 3));
2564 DBUG_RETURN(uint2korr(page + offset + 1) + 3);
2565 default:
2566 DBUG_ASSERT(0);
2567 DBUG_RETURN(0);
2568 }
2569 }
2570
2571 /*
2572 @brief Waits previous buffer flush finish
2573
2574 @param buffer buffer for check
2575
2576 @retval 0 previous buffer flushed and this thread have to flush this one
2577 @retval 1 previous buffer flushed and this buffer flushed by other thread too
2578 */
2579
translog_prev_buffer_flush_wait(struct st_translog_buffer * buffer)2580 my_bool translog_prev_buffer_flush_wait(struct st_translog_buffer *buffer)
2581 {
2582 TRANSLOG_ADDRESS offset= buffer->offset;
2583 TRANSLOG_FILE *file= buffer->file;
2584 uint8 ver= buffer->ver;
2585 DBUG_ENTER("translog_prev_buffer_flush_wait");
2586 DBUG_PRINT("enter", ("buffer: %p #%u offset: " LSN_FMT " "
2587 "prev sent: " LSN_FMT " prev offset: " LSN_FMT,
2588 buffer, (uint) buffer->buffer_no,
2589 LSN_IN_PARTS(buffer->offset),
2590 LSN_IN_PARTS(buffer->prev_sent_to_disk),
2591 LSN_IN_PARTS(buffer->prev_buffer_offset)));
2592 translog_buffer_lock_assert_owner(buffer);
2593 if (buffer->prev_buffer_offset != buffer->prev_sent_to_disk)
2594 {
2595 do {
2596 mysql_cond_wait(&buffer->prev_sent_to_disk_cond, &buffer->mutex);
2597 if (buffer->file != file || buffer->offset != offset ||
2598 buffer->ver != ver)
2599 DBUG_RETURN(1); /* some the thread flushed the buffer already */
2600 } while(buffer->prev_buffer_offset != buffer->prev_sent_to_disk);
2601 }
2602 DBUG_RETURN(0);
2603 }
2604
2605
2606 /*
2607 Flush given buffer
2608
2609 SYNOPSIS
2610 translog_buffer_flush()
2611 buffer This buffer should be flushed
2612
2613 RETURN
2614 0 OK
2615 1 Error
2616 */
2617
translog_buffer_flush(struct st_translog_buffer * buffer)2618 static my_bool translog_buffer_flush(struct st_translog_buffer *buffer)
2619 {
2620 uint32 i, pg;
2621 TRANSLOG_ADDRESS offset= buffer->offset;
2622 TRANSLOG_FILE *file= buffer->file;
2623 uint8 ver= buffer->ver;
2624 uint skipped_data;
2625 DBUG_ENTER("translog_buffer_flush");
2626 DBUG_PRINT("enter",
2627 ("Buffer: #%u %p file: %d offset: " LSN_FMT " size: %lu",
2628 (uint) buffer->buffer_no, buffer,
2629 buffer->file->handler.file,
2630 LSN_IN_PARTS(buffer->offset),
2631 (ulong) buffer->size));
2632 translog_buffer_lock_assert_owner(buffer);
2633
2634 if (buffer->file == NULL)
2635 DBUG_RETURN(0);
2636
2637 translog_wait_for_writers(buffer);
2638
2639 if (buffer->file != file || buffer->offset != offset || buffer->ver != ver)
2640 DBUG_RETURN(0); /* some the thread flushed the buffer already */
2641
2642 if (buffer->is_closing_buffer)
2643 {
2644 /* some other flush in progress */
2645 translog_wait_for_closing(buffer);
2646 if (buffer->file != file || buffer->offset != offset || buffer->ver != ver)
2647 DBUG_RETURN(0); /* some the thread flushed the buffer already */
2648 }
2649
2650 if (buffer->overlay && translog_prev_buffer_flush_wait(buffer))
2651 DBUG_RETURN(0); /* some the thread flushed the buffer already */
2652
2653 /*
2654 Send page by page in the pagecache what we are going to write on the
2655 disk
2656 */
2657 file= buffer->file;
2658 skipped_data= buffer->skipped_data;
2659 DBUG_ASSERT(skipped_data < TRANSLOG_PAGE_SIZE);
2660 for (i= 0, pg= LSN_OFFSET(buffer->offset) / TRANSLOG_PAGE_SIZE;
2661 i < buffer->size;
2662 i+= TRANSLOG_PAGE_SIZE, pg++)
2663 {
2664 #ifndef DBUG_OFF
2665 TRANSLOG_ADDRESS addr= (buffer->offset + i);
2666 #endif
2667 DBUG_PRINT("info", ("send log form %lu till %lu address: " LSN_FMT " "
2668 "page #: %lu buffer size: %lu buffer: %p",
2669 (ulong) i, (ulong) (i + TRANSLOG_PAGE_SIZE),
2670 LSN_IN_PARTS(addr), (ulong) pg, (ulong) buffer->size,
2671 buffer));
2672 DBUG_ASSERT(log_descriptor.pagecache->block_size == TRANSLOG_PAGE_SIZE);
2673 DBUG_ASSERT(i + TRANSLOG_PAGE_SIZE <= buffer->size);
2674 if (translog_status != TRANSLOG_OK && translog_status != TRANSLOG_SHUTDOWN)
2675 DBUG_RETURN(1);
2676 if (pagecache_write_part(log_descriptor.pagecache,
2677 &file->handler, pg, 3,
2678 buffer->buffer + i,
2679 PAGECACHE_PLAIN_PAGE,
2680 PAGECACHE_LOCK_LEFT_UNLOCKED,
2681 PAGECACHE_PIN_LEFT_UNPINNED,
2682 PAGECACHE_WRITE_DONE, 0,
2683 LSN_IMPOSSIBLE,
2684 skipped_data,
2685 TRANSLOG_PAGE_SIZE - skipped_data))
2686 {
2687 DBUG_PRINT("error",
2688 ("Can't write page " LSN_FMT " to pagecache, error: %d",
2689 buffer->file->number,
2690 (uint)(LSN_OFFSET(buffer->offset)+ i),
2691 my_errno));
2692 translog_stop_writing();
2693 DBUG_RETURN(1);
2694 }
2695 skipped_data= 0;
2696 }
2697 file->is_sync= 0;
2698 if (my_pwrite(file->handler.file, buffer->buffer + buffer->skipped_data,
2699 buffer->size - buffer->skipped_data,
2700 LSN_OFFSET(buffer->offset) + buffer->skipped_data,
2701 log_write_flags))
2702 {
2703 DBUG_PRINT("error", ("Can't write buffer " LSN_FMT " size %lu "
2704 "to the disk (%d)",
2705 (uint) file->handler.file,
2706 (uint) LSN_OFFSET(buffer->offset),
2707 (ulong) buffer->size, errno));
2708 translog_stop_writing();
2709 DBUG_RETURN(1);
2710 }
2711 /*
2712 Dropping the flag in such way can make false alarm: signalling than the
2713 file in not sync when it is sync, but the situation is quite rare and
2714 protections with mutexes give much more overhead to the whole engine
2715 */
2716 file->is_sync= 0;
2717
2718 if (LSN_OFFSET(buffer->last_lsn) != 0) /* if buffer->last_lsn is set */
2719 {
2720 if (translog_prev_buffer_flush_wait(buffer))
2721 DBUG_RETURN(0); /* some the thread flushed the buffer already */
2722 translog_set_sent_to_disk(buffer);
2723 }
2724 else
2725 translog_set_only_in_buffers(buffer->next_buffer_offset);
2726
2727 /* say to next buffer that we are finished */
2728 {
2729 struct st_translog_buffer *next_buffer=
2730 log_descriptor.buffers + ((buffer->buffer_no + 1) % TRANSLOG_BUFFERS_NO);
2731 if (likely(translog_status == TRANSLOG_OK)){
2732 translog_buffer_lock(next_buffer);
2733 next_buffer->prev_sent_to_disk= buffer->offset;
2734 translog_buffer_unlock(next_buffer);
2735 mysql_cond_broadcast(&next_buffer->prev_sent_to_disk_cond);
2736 }
2737 else
2738 {
2739 /*
2740 It is shutdown =>
2741 1) there is only one thread
2742 2) mutexes of other buffers can be destroyed => we can't use them
2743 */
2744 next_buffer->prev_sent_to_disk= buffer->offset;
2745 }
2746 }
2747 /* Free buffer */
2748 buffer->file= NULL;
2749 buffer->overlay= 0;
2750 buffer->ver++;
2751 mysql_mutex_lock(&log_descriptor.dirty_buffer_mask_lock);
2752 log_descriptor.dirty_buffer_mask&= ~(1 << buffer->buffer_no);
2753 mysql_mutex_unlock(&log_descriptor.dirty_buffer_mask_lock);
2754 mysql_cond_broadcast(&buffer->waiting_filling_buffer);
2755 DBUG_RETURN(0);
2756 }
2757
2758
2759 /*
2760 Recover page with sector protection (wipe out failed chunks)
2761
2762 SYNOPSYS
2763 translog_recover_page_up_to_sector()
2764 page reference on the page
2765 offset offset of failed sector
2766
2767 RETURN
2768 0 OK
2769 1 Error
2770 */
2771
translog_recover_page_up_to_sector(uchar * page,uint16 offset)2772 static my_bool translog_recover_page_up_to_sector(uchar *page, uint16 offset)
2773 {
2774 uint16 chunk_offset= translog_get_first_chunk_offset(page), valid_chunk_end;
2775 DBUG_ENTER("translog_recover_page_up_to_sector");
2776 DBUG_PRINT("enter", ("offset: %u first chunk: %u",
2777 (uint) offset, (uint) chunk_offset));
2778
2779 while (chunk_offset < offset && page[chunk_offset] != TRANSLOG_FILLER)
2780 {
2781 uint16 chunk_length;
2782 if ((chunk_length=
2783 translog_get_total_chunk_length(page, chunk_offset)) == 0)
2784 {
2785 DBUG_PRINT("error", ("cant get chunk length (offset %u)",
2786 (uint) chunk_offset));
2787 DBUG_RETURN(1);
2788 }
2789 DBUG_PRINT("info", ("chunk: offset: %u length %u",
2790 (uint) chunk_offset, (uint) chunk_length));
2791 if (((ulong) chunk_offset) + ((ulong) chunk_length) > TRANSLOG_PAGE_SIZE)
2792 {
2793 DBUG_PRINT("error", ("damaged chunk (offset %u) in trusted area",
2794 (uint) chunk_offset));
2795 DBUG_RETURN(1);
2796 }
2797 chunk_offset+= chunk_length;
2798 }
2799
2800 valid_chunk_end= chunk_offset;
2801 /* end of trusted area - sector parsing */
2802 while (page[chunk_offset] != TRANSLOG_FILLER)
2803 {
2804 uint16 chunk_length;
2805 if ((chunk_length=
2806 translog_get_total_chunk_length(page, chunk_offset)) == 0)
2807 break;
2808
2809 DBUG_PRINT("info", ("chunk: offset: %u length %u",
2810 (uint) chunk_offset, (uint) chunk_length));
2811 if (((ulong) chunk_offset) + ((ulong) chunk_length) >
2812 (uint) (offset + DISK_DRIVE_SECTOR_SIZE))
2813 break;
2814
2815 chunk_offset+= chunk_length;
2816 valid_chunk_end= chunk_offset;
2817 }
2818 DBUG_PRINT("info", ("valid chunk end offset: %u", (uint) valid_chunk_end));
2819
2820 memset(page + valid_chunk_end, TRANSLOG_FILLER,
2821 TRANSLOG_PAGE_SIZE - valid_chunk_end);
2822
2823 DBUG_RETURN(0);
2824 }
2825
2826
2827 /**
2828 @brief Checks and removes sector protection.
2829
2830 @param page reference on the page content.
2831 @param file transaction log descriptor.
2832
2833 @retvat 0 OK
2834 @retval 1 Error
2835 */
2836
2837 static my_bool
translog_check_sector_protection(uchar * page,TRANSLOG_FILE * file)2838 translog_check_sector_protection(uchar *page, TRANSLOG_FILE *file)
2839 {
2840 uint i, offset;
2841 uchar *table= page + page_overhead[page[TRANSLOG_PAGE_FLAGS]] -
2842 TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
2843 uint8 current= table[0];
2844 DBUG_ENTER("translog_check_sector_protection");
2845
2846 for (i= 1, offset= DISK_DRIVE_SECTOR_SIZE;
2847 i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
2848 i++, offset+= DISK_DRIVE_SECTOR_SIZE)
2849 {
2850 /*
2851 TODO: add chunk counting for "suspecting" sectors (difference is
2852 more than 1-2), if difference more then present chunks then it is
2853 the problem.
2854 */
2855 uint8 test= page[offset];
2856 DBUG_PRINT("info", ("sector: #%u offset: %u current: %lx "
2857 "read: 0x%x stored: 0x%x%x",
2858 i, offset, (ulong) current,
2859 (uint) uint2korr(page + offset), (uint) table[i],
2860 (uint) table[i + 1]));
2861 /*
2862 3 is minimal possible record length. So we can have "distance"
2863 between 2 sectors value more then DISK_DRIVE_SECTOR_SIZE / 3
2864 only if it is old value, i.e. the sector was not written.
2865 */
2866 if (((test < current) &&
2867 ((uint)(0xFFL - current + test) > DISK_DRIVE_SECTOR_SIZE / 3)) ||
2868 ((test >= current) &&
2869 ((uint)(test - current) > DISK_DRIVE_SECTOR_SIZE / 3)))
2870 {
2871 if (translog_recover_page_up_to_sector(page, offset))
2872 DBUG_RETURN(1);
2873 file->was_recovered= 1;
2874 DBUG_RETURN(0);
2875 }
2876
2877 /* Restore value on the page */
2878 page[offset]= table[i];
2879 current= test;
2880 DBUG_PRINT("info", ("sector: #%u offset: %u current: %lx "
2881 "read: 0x%x stored: 0x%x",
2882 i, offset, (ulong) current,
2883 (uint) page[offset], (uint) table[i]));
2884 }
2885 DBUG_RETURN(0);
2886 }
2887
2888
2889 /**
2890 @brief Log page validator (read callback)
2891
2892 @param page The page data to check
2893 @param page_no The page number (<offset>/<page length>)
2894 @param data_ptr Read callback data pointer (pointer to TRANSLOG_FILE)
2895
2896 @todo: add turning loghandler to read-only mode after merging with
2897 that patch.
2898
2899 @retval 0 OK
2900 @retval 1 Error
2901 */
2902
translog_page_validator(int res,PAGECACHE_IO_HOOK_ARGS * args)2903 static my_bool translog_page_validator(int res, PAGECACHE_IO_HOOK_ARGS *args)
2904 {
2905 uchar *page= args->page;
2906 pgcache_page_no_t page_no= args->pageno;
2907 uint this_page_page_overhead;
2908 uint flags;
2909 uchar *page_pos;
2910 TRANSLOG_FILE *data= (TRANSLOG_FILE *) args->data;
2911 #ifndef DBUG_OFF
2912 pgcache_page_no_t offset= page_no * TRANSLOG_PAGE_SIZE;
2913 #endif
2914 DBUG_ENTER("translog_page_validator");
2915
2916 data->was_recovered= 0;
2917
2918 if (res)
2919 {
2920 DBUG_RETURN(1);
2921 }
2922
2923 if ((pgcache_page_no_t) uint3korr(page) != page_no ||
2924 (uint32) uint3korr(page + 3) != data->number)
2925 {
2926 DBUG_PRINT("error", ("Page " LSN_FMT ": "
2927 "page address written in the page is incorrect: "
2928 "File %lu instead of %lu or page %lu instead of %lu",
2929 (uint)data->number, (uint)offset,
2930 (ulong) uint3korr(page + 3), (ulong) data->number,
2931 (ulong) uint3korr(page),
2932 (ulong) page_no));
2933 DBUG_RETURN(1);
2934 }
2935 flags= (uint)(page[TRANSLOG_PAGE_FLAGS]);
2936 this_page_page_overhead= page_overhead[flags];
2937 if (flags & ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
2938 TRANSLOG_RECORD_CRC))
2939 {
2940 DBUG_PRINT("error", ("Page " LSN_FMT ": "
2941 "Garbage in the page flags field detected : %x",
2942 (uint) data->number, (uint) offset,
2943 (uint) flags));
2944 DBUG_RETURN(1);
2945 }
2946 page_pos= page + (3 + 3 + 1);
2947 if (flags & TRANSLOG_PAGE_CRC)
2948 {
2949 uint32 crc= translog_crc(page + this_page_page_overhead,
2950 TRANSLOG_PAGE_SIZE -
2951 this_page_page_overhead);
2952 if (crc != uint4korr(page_pos))
2953 {
2954 DBUG_PRINT("error", ("Page " LSN_FMT ": "
2955 "CRC mismatch: calculated: %lx on the page %lx",
2956 (uint) data->number, (uint) offset,
2957 (ulong) crc, (ulong) uint4korr(page_pos)));
2958 DBUG_RETURN(1);
2959 }
2960 page_pos+= CRC_SIZE; /* Skip crc */
2961 }
2962 if (flags & TRANSLOG_SECTOR_PROTECTION &&
2963 translog_check_sector_protection(page, data))
2964 {
2965 DBUG_RETURN(1);
2966 }
2967 DBUG_RETURN(0);
2968 }
2969
2970
2971 /**
2972 @brief Locks the loghandler.
2973 */
2974
translog_lock()2975 void translog_lock()
2976 {
2977 uint8 current_buffer;
2978 DBUG_ENTER("translog_lock");
2979
2980 /*
2981 Locking the loghandler mean locking current buffer, but it can change
2982 during locking, so we should check it
2983 */
2984 for (;;)
2985 {
2986 /*
2987 log_descriptor.bc.buffer_no is only one byte so its reading is
2988 an atomic operation
2989 */
2990 current_buffer= log_descriptor.bc.buffer_no;
2991 translog_buffer_lock(log_descriptor.buffers + current_buffer);
2992 if (log_descriptor.bc.buffer_no == current_buffer)
2993 break;
2994 translog_buffer_unlock(log_descriptor.buffers + current_buffer);
2995 }
2996 DBUG_VOID_RETURN;
2997 }
2998
2999
3000 /*
3001 Unlock the loghandler
3002
3003 SYNOPSIS
3004 translog_unlock()
3005
3006 RETURN
3007 0 OK
3008 1 Error
3009 */
3010
translog_unlock()3011 void translog_unlock()
3012 {
3013 translog_buffer_unlock(log_descriptor.bc.buffer);
3014 }
3015
3016
3017 /**
3018 @brief Get log page by file number and offset of the beginning of the page
3019
3020 @param data validator data, which contains the page address
3021 @param buffer buffer for page placing
3022 (might not be used in some cache implementations)
3023 @param direct_link if it is not NULL then caller can accept direct
3024 link to the page cache
3025
3026 @retval NULL Error
3027 @retval # pointer to the page cache which should be used to read this page
3028 */
3029
translog_get_page(TRANSLOG_VALIDATOR_DATA * data,uchar * buffer,PAGECACHE_BLOCK_LINK ** direct_link)3030 static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer,
3031 PAGECACHE_BLOCK_LINK **direct_link)
3032 {
3033 TRANSLOG_ADDRESS addr= *(data->addr), in_buffers;
3034 uint32 file_no= LSN_FILE_NO(addr);
3035 TRANSLOG_FILE *file;
3036 DBUG_ENTER("translog_get_page");
3037 DBUG_PRINT("enter", ("File: %u Offset: %u(0x%x)",
3038 file_no,
3039 (uint) LSN_OFFSET(addr),
3040 (uint) LSN_OFFSET(addr)));
3041
3042 /* it is really page address */
3043 DBUG_ASSERT(LSN_OFFSET(addr) % TRANSLOG_PAGE_SIZE == 0);
3044 if (direct_link)
3045 *direct_link= NULL;
3046
3047 restart:
3048
3049 in_buffers= translog_only_in_buffers();
3050 DBUG_PRINT("info", ("in_buffers: " LSN_FMT,
3051 LSN_IN_PARTS(in_buffers)));
3052 if (in_buffers != LSN_IMPOSSIBLE &&
3053 cmp_translog_addr(addr, in_buffers) >= 0)
3054 {
3055 translog_lock();
3056 DBUG_ASSERT(cmp_translog_addr(addr, log_descriptor.horizon) < 0);
3057 /* recheck with locked loghandler */
3058 in_buffers= translog_only_in_buffers();
3059 if (cmp_translog_addr(addr, in_buffers) >= 0)
3060 {
3061 uint16 buffer_no= log_descriptor.bc.buffer_no;
3062 #ifdef DBUG_ASSERT_EXISTS
3063 uint16 buffer_start= buffer_no;
3064 #endif
3065 struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer;
3066 struct st_translog_buffer *curr_buffer= log_descriptor.bc.buffer;
3067 for (;;)
3068 {
3069 /*
3070 if the page is in the buffer and it is the last version of the
3071 page (in case of division the page by buffer flush)
3072 */
3073 if (curr_buffer->file != NULL &&
3074 cmp_translog_addr(addr, curr_buffer->offset) >= 0 &&
3075 cmp_translog_addr(addr,
3076 (curr_buffer->next_buffer_offset ?
3077 curr_buffer->next_buffer_offset:
3078 curr_buffer->offset + curr_buffer->size)) < 0)
3079 {
3080 TRANSLOG_ADDRESS offset= curr_buffer->offset;
3081 TRANSLOG_FILE *fl= curr_buffer->file;
3082 uchar *from, *table= NULL;
3083 int is_last_unfinished_page;
3084 uint last_protected_sector= 0;
3085 uint skipped_data= curr_buffer->skipped_data;
3086 TRANSLOG_FILE file_copy;
3087 uint8 ver= curr_buffer->ver;
3088 translog_wait_for_writers(curr_buffer);
3089 if (offset != curr_buffer->offset || fl != curr_buffer->file ||
3090 ver != curr_buffer->ver)
3091 {
3092 DBUG_ASSERT(buffer_unlock == curr_buffer);
3093 translog_buffer_unlock(buffer_unlock);
3094 goto restart;
3095 }
3096 DBUG_ASSERT(LSN_FILE_NO(addr) == LSN_FILE_NO(curr_buffer->offset));
3097 from= curr_buffer->buffer + (addr - curr_buffer->offset);
3098 if (skipped_data && addr == curr_buffer->offset)
3099 {
3100 /*
3101 We read page part of which is not present in buffer,
3102 so we should read absent part from file (page cache actually)
3103 */
3104 file= get_logfile_by_number(file_no);
3105 DBUG_ASSERT(file != NULL);
3106 /*
3107 it's ok to not lock the page because:
3108 - The log handler has it's own page cache.
3109 - There is only one thread that can access the log
3110 cache at a time
3111 */
3112 if (!(buffer= pagecache_read(log_descriptor.pagecache,
3113 &file->handler,
3114 LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE,
3115 3, buffer,
3116 PAGECACHE_PLAIN_PAGE,
3117 PAGECACHE_LOCK_LEFT_UNLOCKED,
3118 NULL)))
3119 DBUG_RETURN(NULL);
3120 }
3121 else
3122 skipped_data= 0; /* Read after skipped in buffer data */
3123 /*
3124 Now we have correct data in buffer up to 'skipped_data'. The
3125 following memcpy() will move the data from the internal buffer
3126 that was not yet on disk.
3127 */
3128 memcpy(buffer + skipped_data, from + skipped_data,
3129 TRANSLOG_PAGE_SIZE - skipped_data);
3130 /*
3131 We can use copy then in translog_page_validator() because it
3132 do not put it permanently somewhere.
3133 We have to use copy because after releasing log lock we can't
3134 guaranty that the file still be present (in real life it will be
3135 present but theoretically possible that it will be released
3136 already from last files cache);
3137 */
3138 file_copy= *(curr_buffer->file);
3139 file_copy.handler.callback_data= (uchar*) &file_copy;
3140 is_last_unfinished_page= ((log_descriptor.bc.buffer ==
3141 curr_buffer) &&
3142 (log_descriptor.bc.ptr >= from) &&
3143 (log_descriptor.bc.ptr <
3144 from + TRANSLOG_PAGE_SIZE));
3145 if (is_last_unfinished_page &&
3146 (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION))
3147 {
3148 last_protected_sector= ((log_descriptor.bc.previous_offset - 1) /
3149 DISK_DRIVE_SECTOR_SIZE);
3150 table= buffer + log_descriptor.page_overhead -
3151 TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
3152 }
3153
3154 DBUG_ASSERT(buffer_unlock == curr_buffer);
3155 translog_buffer_unlock(buffer_unlock);
3156 if (is_last_unfinished_page)
3157 {
3158 uint i;
3159 /*
3160 This is last unfinished page => we should not check CRC and
3161 remove only that protection which already installed (no need
3162 to check it)
3163
3164 We do not check the flag of sector protection, because if
3165 (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) is
3166 not set then last_protected_sector will be 0 so following loop
3167 will be never executed
3168 */
3169 DBUG_PRINT("info", ("This is last unfinished page, "
3170 "last protected sector %u",
3171 last_protected_sector));
3172 for (i= 1; i <= last_protected_sector; i++)
3173 {
3174 uint offset= i * DISK_DRIVE_SECTOR_SIZE;
3175 DBUG_PRINT("info", ("Sector %u: 0x%02x <- 0x%02x",
3176 i, buffer[offset],
3177 table[i]));
3178 buffer[offset]= table[i];
3179 }
3180 }
3181 else
3182 {
3183 /*
3184 This IF should be true because we use in-memory data which
3185 supposed to be correct.
3186 */
3187 PAGECACHE_IO_HOOK_ARGS args;
3188 args.page= buffer;
3189 args.pageno= LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE;
3190 args.data= (uchar*) &file_copy;
3191 if (translog_page_validator(0, &args))
3192 {
3193 DBUG_ASSERT(0);
3194 buffer= NULL;
3195 }
3196 }
3197 DBUG_RETURN(buffer);
3198 }
3199 buffer_no= (buffer_no + 1) % TRANSLOG_BUFFERS_NO;
3200 curr_buffer= log_descriptor.buffers + buffer_no;
3201 translog_buffer_lock(curr_buffer);
3202 translog_buffer_unlock(buffer_unlock);
3203 buffer_unlock= curr_buffer;
3204 /* we can't make a full circle */
3205 DBUG_ASSERT(buffer_start != buffer_no);
3206 }
3207 }
3208 translog_unlock();
3209 }
3210 file= get_logfile_by_number(file_no);
3211 DBUG_ASSERT(file != NULL);
3212 buffer= pagecache_read(log_descriptor.pagecache, &file->handler,
3213 LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE,
3214 3, (direct_link ? NULL : buffer),
3215 PAGECACHE_PLAIN_PAGE,
3216 (direct_link ?
3217 PAGECACHE_LOCK_READ :
3218 PAGECACHE_LOCK_LEFT_UNLOCKED),
3219 direct_link);
3220 DBUG_PRINT("info", ("Direct link is assigned to : %p * %p",
3221 direct_link,
3222 (direct_link ? *direct_link : NULL)));
3223 data->was_recovered= file->was_recovered;
3224 DBUG_RETURN(buffer);
3225 }
3226
3227
3228 /**
3229 @brief free direct log page link
3230
3231 @param direct_link the direct log page link to be freed
3232
3233 */
3234
translog_free_link(PAGECACHE_BLOCK_LINK * direct_link)3235 static void translog_free_link(PAGECACHE_BLOCK_LINK *direct_link)
3236 {
3237 DBUG_ENTER("translog_free_link");
3238 DBUG_PRINT("info", ("Direct link: %p",
3239 direct_link));
3240 if (direct_link)
3241 pagecache_unlock_by_link(log_descriptor.pagecache, direct_link,
3242 PAGECACHE_LOCK_READ_UNLOCK, PAGECACHE_UNPIN,
3243 LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, 0, FALSE);
3244 DBUG_VOID_RETURN;
3245 }
3246
3247
3248 /**
3249 @brief Finds last full page of the given log file.
3250
3251 @param addr address structure to fill with data, which contain
3252 file number of the log file
3253 @param last_page_ok Result of the check whether last page OK.
3254 (for now only we check only that file length
3255 divisible on page length).
3256 @param no_errors suppress messages about non-critical errors
3257
3258 @retval 0 OK
3259 @retval 1 Error
3260 */
3261
translog_get_last_page_addr(TRANSLOG_ADDRESS * addr,my_bool * last_page_ok,my_bool no_errors)3262 static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr,
3263 my_bool *last_page_ok,
3264 my_bool no_errors)
3265 {
3266 char path[FN_REFLEN];
3267 uint32 rec_offset;
3268 my_off_t file_size;
3269 uint32 file_no= LSN_FILE_NO(*addr);
3270 TRANSLOG_FILE *file;
3271 #ifndef DBUG_OFF
3272 char buff[21];
3273 #endif
3274 DBUG_ENTER("translog_get_last_page_addr");
3275
3276 if (likely((file= get_logfile_by_number(file_no)) != NULL))
3277 {
3278 /*
3279 This function used only during initialization of loghandler or in
3280 scanner (which mean we need read that part of the log), so the
3281 requested log file have to be opened and can't be freed after
3282 returning pointer on it (file_size).
3283 */
3284 file_size= mysql_file_seek(file->handler.file, 0, SEEK_END, MYF(0));
3285 }
3286 else
3287 {
3288 /*
3289 This branch is used only during very early initialization
3290 when files are not opened.
3291 */
3292 File fd;
3293 if ((fd= mysql_file_open(key_file_translog,
3294 translog_filename_by_fileno(file_no, path),
3295 O_RDONLY | O_CLOEXEC, (no_errors ? MYF(0) : MYF(MY_WME)))) < 0)
3296 {
3297 my_errno= errno;
3298 DBUG_PRINT("error", ("Error %d during opening file #%d",
3299 errno, file_no));
3300 DBUG_RETURN(1);
3301 }
3302 file_size= mysql_file_seek(fd, 0, SEEK_END, MYF(0));
3303 mysql_file_close(fd, MYF(0));
3304 }
3305 DBUG_PRINT("info", ("File size: %s", llstr(file_size, buff)));
3306 if (file_size == MY_FILEPOS_ERROR)
3307 DBUG_RETURN(1);
3308 DBUG_ASSERT(file_size < 0xffffffffULL);
3309 if (((uint32)file_size) > TRANSLOG_PAGE_SIZE)
3310 {
3311 rec_offset= (((((uint32)file_size) / TRANSLOG_PAGE_SIZE) - 1) *
3312 TRANSLOG_PAGE_SIZE);
3313 *last_page_ok= (((uint32)file_size) == rec_offset + TRANSLOG_PAGE_SIZE);
3314 }
3315 else
3316 {
3317 *last_page_ok= 0;
3318 rec_offset= 0;
3319 }
3320 *addr= MAKE_LSN(file_no, rec_offset);
3321 DBUG_PRINT("info", ("Last page: 0x%lx ok: %d", (ulong) rec_offset,
3322 *last_page_ok));
3323 DBUG_RETURN(0);
3324 }
3325
3326
3327 /**
3328 @brief Get number bytes for record length storing
3329
3330 @param length Record length which will be encoded
3331
3332 @return 1,3,4,5 - number of bytes to store given length
3333 */
3334
translog_variable_record_length_bytes(translog_size_t length)3335 static uint translog_variable_record_length_bytes(translog_size_t length)
3336 {
3337 if (length < 250)
3338 return 1;
3339 if (length < 0xFFFF)
3340 return 3;
3341 if (length < (ulong) 0xFFFFFF)
3342 return 4;
3343 return 5;
3344 }
3345
3346
3347 /**
3348 @brief Gets header of this chunk.
3349
3350 @param chunk The pointer to the chunk beginning
3351
3352 @retval # total length of the chunk
3353 @retval 0 Error
3354 */
3355
translog_get_chunk_header_length(uchar * chunk)3356 static uint16 translog_get_chunk_header_length(uchar *chunk)
3357 {
3358 DBUG_ENTER("translog_get_chunk_header_length");
3359 switch (*chunk & TRANSLOG_CHUNK_TYPE) {
3360 case TRANSLOG_CHUNK_LSN:
3361 {
3362 /* 0 chunk referred as LSN (head or tail) */
3363 translog_size_t rec_len __attribute__((unused));
3364 uchar *start= chunk;
3365 uchar *ptr= start + 1 + 2;
3366 uint16 chunk_len, header_len;
3367 DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
3368 rec_len= translog_variable_record_1group_decode_len(&ptr);
3369 chunk_len= uint2korr(ptr);
3370 header_len= (uint16) (ptr - start) +2;
3371 DBUG_PRINT("info", ("rec len: %lu chunk len: %u header len: %u",
3372 (ulong) rec_len, (uint) chunk_len, (uint) header_len));
3373 if (chunk_len)
3374 {
3375 /* TODO: fine header end */
3376 /*
3377 The last chunk of multi-group record can be base for it header
3378 calculation (we skip to the first group to read the header) so if we
3379 stuck here something is wrong.
3380 */
3381 DBUG_ASSERT(0);
3382 DBUG_RETURN(0); /* Keep compiler happy */
3383 }
3384 DBUG_RETURN(header_len);
3385 }
3386 case TRANSLOG_CHUNK_FIXED:
3387 {
3388 /* 1 (pseudo)fixed record (also LSN) */
3389 DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED = 3"));
3390 DBUG_RETURN(3);
3391 }
3392 case TRANSLOG_CHUNK_NOHDR:
3393 /* 2 no header chunk (till page end) */
3394 DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR = 1"));
3395 DBUG_RETURN(1);
3396 break;
3397 case TRANSLOG_CHUNK_LNGTH:
3398 /* 3 chunk with chunk length */
3399 DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH = 3"));
3400 DBUG_RETURN(3);
3401 break;
3402 }
3403 DBUG_ASSERT(0);
3404 DBUG_RETURN(0); /* Keep compiler happy */
3405 }
3406
3407
3408 /**
3409 @brief Truncate the log to the given address. Used during the startup if the
3410 end of log if corrupted.
3411
3412 @param addr new horizon
3413
3414 @retval 0 OK
3415 @retval 1 Error
3416 */
3417
translog_truncate_log(TRANSLOG_ADDRESS addr)3418 static my_bool translog_truncate_log(TRANSLOG_ADDRESS addr)
3419 {
3420 uchar *page;
3421 TRANSLOG_ADDRESS current_page;
3422 uint32 next_page_offset, page_rest;
3423 uint32 i;
3424 File fd;
3425 int rc;
3426 TRANSLOG_VALIDATOR_DATA data;
3427 char path[FN_REFLEN];
3428 uchar page_buff[TRANSLOG_PAGE_SIZE];
3429 DBUG_ENTER("translog_truncate_log");
3430 /* TODO: write warning to the client */
3431 DBUG_PRINT("warning", ("removing all records from " LSN_FMT " "
3432 "till " LSN_FMT,
3433 LSN_IN_PARTS(addr),
3434 LSN_IN_PARTS(log_descriptor.horizon)));
3435 DBUG_ASSERT(cmp_translog_addr(addr, log_descriptor.horizon) < 0);
3436 /* remove files between the address and horizon */
3437 for (i= LSN_FILE_NO(addr) + 1; i <= LSN_FILE_NO(log_descriptor.horizon); i++)
3438 if (mysql_file_delete(key_file_translog,
3439 translog_filename_by_fileno(i, path), MYF(MY_WME)))
3440 {
3441 translog_unlock();
3442 DBUG_RETURN(1);
3443 }
3444
3445 /* truncate the last file up to the last page */
3446 next_page_offset= LSN_OFFSET(addr);
3447 next_page_offset= (next_page_offset -
3448 ((next_page_offset - 1) % TRANSLOG_PAGE_SIZE + 1) +
3449 TRANSLOG_PAGE_SIZE);
3450 page_rest= next_page_offset - LSN_OFFSET(addr);
3451 memset(page_buff, TRANSLOG_FILLER, page_rest);
3452 rc= ((fd= open_logfile_by_number_no_cache(LSN_FILE_NO(addr))) < 0 ||
3453 ((mysql_file_chsize(fd, next_page_offset, TRANSLOG_FILLER, MYF(MY_WME)) ||
3454 (page_rest && my_pwrite(fd, page_buff, page_rest, LSN_OFFSET(addr),
3455 log_write_flags)) ||
3456 mysql_file_sync(fd, MYF(MY_WME)))));
3457 translog_syncs++;
3458 rc|= (fd > 0 && mysql_file_close(fd, MYF(MY_WME)));
3459 if (sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS)
3460 {
3461 rc|= sync_dir(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD));
3462 translog_syncs++;
3463 }
3464 if (rc)
3465 DBUG_RETURN(1);
3466
3467 /* fix the horizon */
3468 log_descriptor.horizon= addr;
3469 /* fix the buffer data */
3470 current_page= MAKE_LSN(LSN_FILE_NO(addr), (next_page_offset -
3471 TRANSLOG_PAGE_SIZE));
3472 data.addr= ¤t_page;
3473 if ((page= translog_get_page(&data, log_descriptor.buffers->buffer, NULL)) ==
3474 NULL)
3475 DBUG_RETURN(1);
3476 if (page != log_descriptor.buffers->buffer)
3477 memcpy(log_descriptor.buffers->buffer, page, TRANSLOG_PAGE_SIZE);
3478 log_descriptor.bc.buffer->offset= current_page;
3479 log_descriptor.bc.buffer->size= LSN_OFFSET(addr) - LSN_OFFSET(current_page);
3480 log_descriptor.bc.ptr=
3481 log_descriptor.buffers->buffer + log_descriptor.bc.buffer->size;
3482 log_descriptor.bc.current_page_fill= log_descriptor.bc.buffer->size;
3483 DBUG_RETURN(0);
3484 }
3485
3486
3487 /**
3488 Applies function 'callback' to all files (in a directory) which
3489 name looks like a log's name (aria_log.[0-9]{7}).
3490 If 'callback' returns TRUE this interrupts the walk and returns
3491 TRUE. Otherwise FALSE is returned after processing all log files.
3492 It cannot just use log_descriptor.directory because that may not yet have
3493 been initialized.
3494
3495 @param directory directory to scan
3496 @param callback function to apply; is passed directory and base
3497 name of found file
3498 */
3499
translog_walk_filenames(const char * directory,my_bool (* callback)(const char *,const char *))3500 my_bool translog_walk_filenames(const char *directory,
3501 my_bool (*callback)(const char *,
3502 const char *))
3503 {
3504 MY_DIR *dirp;
3505 uint i;
3506 my_bool rc= FALSE;
3507
3508 /* Finds and removes transaction log files */
3509 if (!(dirp = my_dir(directory, MYF(MY_DONT_SORT))))
3510 return FALSE;
3511
3512 for (i= 0; i < dirp->number_of_files; i++)
3513 {
3514 char *file= dirp->dir_entry[i].name;
3515 if (strncmp(file, "aria_log.", 10) == 0 &&
3516 file[10] >= '0' && file[10] <= '9' &&
3517 file[11] >= '0' && file[11] <= '9' &&
3518 file[12] >= '0' && file[12] <= '9' &&
3519 file[13] >= '0' && file[13] <= '9' &&
3520 file[14] >= '0' && file[14] <= '9' &&
3521 file[15] >= '0' && file[15] <= '9' &&
3522 file[16] >= '0' && file[16] <= '9' &&
3523 file[17] >= '0' && file[17] <= '9' &&
3524 file[18] == '\0' && (*callback)(directory, file))
3525 {
3526 rc= TRUE;
3527 break;
3528 }
3529 }
3530 my_dirend(dirp);
3531 return rc;
3532 }
3533
3534
3535 /**
3536 @brief Fills table of dependence length of page header from page flags
3537 */
3538
translog_fill_overhead_table()3539 void translog_fill_overhead_table()
3540 {
3541 uint i;
3542 for (i= 0; i < TRANSLOG_FLAGS_NUM; i++)
3543 {
3544 page_overhead[i]= 7;
3545 if (i & TRANSLOG_PAGE_CRC)
3546 page_overhead[i]+= CRC_SIZE;
3547 if (i & TRANSLOG_SECTOR_PROTECTION)
3548 page_overhead[i]+= TRANSLOG_PAGE_SIZE /
3549 DISK_DRIVE_SECTOR_SIZE;
3550 }
3551 }
3552
3553
3554 /**
3555 Callback to find first log in directory.
3556 */
3557
translog_callback_search_first(const char * directory,const char * filename)3558 static my_bool translog_callback_search_first(const char *directory
3559 __attribute__((unused)),
3560 const char *filename
3561 __attribute__((unused)))
3562 {
3563 return TRUE;
3564 }
3565
3566
3567 /**
3568 @brief Checks that chunk is LSN one
3569
3570 @param type type of the chunk
3571
3572 @retval 1 the chunk is LNS
3573 @retval 0 the chunk is not LSN
3574 */
3575
translog_is_LSN_chunk(uchar type)3576 static my_bool translog_is_LSN_chunk(uchar type)
3577 {
3578 DBUG_ENTER("translog_is_LSN_chunk");
3579 DBUG_PRINT("info", ("byte: %x chunk type: %u record type: %u",
3580 type, type >> 6, type & TRANSLOG_REC_TYPE));
3581 DBUG_RETURN(((type & TRANSLOG_CHUNK_TYPE) == TRANSLOG_CHUNK_FIXED) ||
3582 (((type & TRANSLOG_CHUNK_TYPE) == TRANSLOG_CHUNK_LSN) &&
3583 ((type & TRANSLOG_REC_TYPE)) != TRANSLOG_CHUNK_0_CONT));
3584 }
3585
3586
3587 /**
3588 @brief Initialize transaction log
3589
3590 @param directory Directory where log files are put
3591 @param log_file_max_size max size of one log size (for new logs creation)
3592 @param server_version version of MySQL server (MYSQL_VERSION_ID)
3593 @param server_id server ID (replication & Co)
3594 @param pagecache Page cache for the log reads
3595 @param flags flags (TRANSLOG_PAGE_CRC, TRANSLOG_SECTOR_PROTECTION
3596 TRANSLOG_RECORD_CRC)
3597 @param read_only Put transaction log in read-only mode
3598 @param init_table_func function to initialize record descriptors table
3599 @param no_errors suppress messages about non-critical errors
3600
3601 @todo
3602 Free used resources in case of error.
3603
3604 @retval 0 OK
3605 @retval 1 Error
3606 */
3607
translog_init_with_table(const char * directory,uint32 log_file_max_size,uint32 server_version,uint32 server_id,PAGECACHE * pagecache,uint flags,my_bool readonly,void (* init_table_func)(),my_bool no_errors)3608 my_bool translog_init_with_table(const char *directory,
3609 uint32 log_file_max_size,
3610 uint32 server_version,
3611 uint32 server_id, PAGECACHE *pagecache,
3612 uint flags, my_bool readonly,
3613 void (*init_table_func)(),
3614 my_bool no_errors)
3615 {
3616 int i;
3617 int old_log_was_recovered= 0, logs_found= 0;
3618 uint old_flags= flags;
3619 uint32 start_file_num= 1;
3620 TRANSLOG_ADDRESS UNINIT_VAR(sure_page), last_page, last_valid_page,
3621 checkpoint_lsn;
3622 my_bool version_changed= 0;
3623 DBUG_ENTER("translog_init_with_table");
3624
3625 translog_syncs= 0;
3626 flush_start= 0;
3627 id_to_share= NULL;
3628 log_purge_disabled= 0;
3629
3630 log_descriptor.directory_fd= -1;
3631 log_descriptor.is_everything_flushed= 1;
3632 log_descriptor.flush_in_progress= 0;
3633 log_descriptor.flush_no= 0;
3634 log_descriptor.next_pass_max_lsn= LSN_IMPOSSIBLE;
3635
3636 /* Normally in Aria this this calls translog_table_init() */
3637 (*init_table_func)();
3638 compile_time_assert(sizeof(log_descriptor.dirty_buffer_mask) * 8 >=
3639 TRANSLOG_BUFFERS_NO);
3640 log_descriptor.dirty_buffer_mask= 0;
3641 if (readonly)
3642 log_descriptor.open_flags= O_BINARY | O_RDONLY;
3643 else
3644 log_descriptor.open_flags= O_BINARY | O_RDWR;
3645 if (mysql_mutex_init(key_TRANSLOG_BUFFER_mutex,
3646 &log_descriptor.sent_to_disk_lock, MY_MUTEX_INIT_FAST) ||
3647 mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_file_header_lock,
3648 &log_descriptor.file_header_lock, MY_MUTEX_INIT_FAST) ||
3649 mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_unfinished_files_lock,
3650 &log_descriptor.unfinished_files_lock, MY_MUTEX_INIT_FAST) ||
3651 mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_purger_lock,
3652 &log_descriptor.purger_lock, MY_MUTEX_INIT_FAST) ||
3653 mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_log_flush_lock,
3654 &log_descriptor.log_flush_lock, MY_MUTEX_INIT_FAST) ||
3655 mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_dirty_buffer_mask_lock,
3656 &log_descriptor.dirty_buffer_mask_lock, MY_MUTEX_INIT_FAST) ||
3657 mysql_cond_init(key_TRANSLOG_DESCRIPTOR_log_flush_cond,
3658 &log_descriptor.log_flush_cond, 0) ||
3659 mysql_cond_init(key_TRANSLOG_DESCRIPTOR_new_goal_cond,
3660 &log_descriptor.new_goal_cond, 0) ||
3661 mysql_rwlock_init(key_TRANSLOG_DESCRIPTOR_open_files_lock,
3662 &log_descriptor.open_files_lock) ||
3663 my_init_dynamic_array(PSI_INSTRUMENT_ME, &log_descriptor.open_files,
3664 sizeof(TRANSLOG_FILE*), 10, 10, MYF(0)) ||
3665 my_init_dynamic_array(PSI_INSTRUMENT_ME, &log_descriptor.unfinished_files,
3666 sizeof(struct st_file_counter),
3667 10, 10, MYF(0)))
3668 goto err;
3669 log_descriptor.min_need_file= 0;
3670 log_descriptor.min_file_number= 0;
3671 log_descriptor.last_lsn_checked= LSN_IMPOSSIBLE;
3672
3673 /* Directory to store files */
3674 unpack_dirname(log_descriptor.directory, directory);
3675 #ifndef __WIN__
3676 if ((log_descriptor.directory_fd= my_open(log_descriptor.directory,
3677 O_RDONLY, MYF(MY_WME))) < 0)
3678 {
3679 my_errno= errno;
3680 DBUG_PRINT("error", ("Error %d during opening directory '%s'",
3681 errno, log_descriptor.directory));
3682 goto err;
3683 }
3684 #endif
3685 log_descriptor.in_buffers_only= LSN_IMPOSSIBLE;
3686 DBUG_ASSERT(log_file_max_size % TRANSLOG_PAGE_SIZE == 0 &&
3687 log_file_max_size >= TRANSLOG_MIN_FILE_SIZE);
3688 /* max size of one log size (for new logs creation) */
3689 log_file_size= log_descriptor.log_file_max_size=
3690 log_file_max_size;
3691 /* server version */
3692 log_descriptor.server_version= server_version;
3693 /* server ID */
3694 log_descriptor.server_id= server_id;
3695 /* Page cache for the log reads */
3696 log_descriptor.pagecache= pagecache;
3697 /* Flags */
3698 DBUG_ASSERT((flags &
3699 ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
3700 TRANSLOG_RECORD_CRC)) == 0);
3701 log_descriptor.flags= flags;
3702 translog_fill_overhead_table();
3703 log_descriptor.page_overhead= page_overhead[flags];
3704 log_descriptor.page_capacity_chunk_2=
3705 TRANSLOG_PAGE_SIZE - log_descriptor.page_overhead - 1;
3706 compile_time_assert(TRANSLOG_WRITE_BUFFER % TRANSLOG_PAGE_SIZE == 0);
3707 log_descriptor.buffer_capacity_chunk_2=
3708 (TRANSLOG_WRITE_BUFFER / TRANSLOG_PAGE_SIZE) *
3709 log_descriptor.page_capacity_chunk_2;
3710 log_descriptor.half_buffer_capacity_chunk_2=
3711 log_descriptor.buffer_capacity_chunk_2 / 2;
3712 DBUG_PRINT("info",
3713 ("Overhead: %u pc2: %u bc2: %u, bc2/2: %u",
3714 log_descriptor.page_overhead,
3715 log_descriptor.page_capacity_chunk_2,
3716 log_descriptor.buffer_capacity_chunk_2,
3717 log_descriptor.half_buffer_capacity_chunk_2));
3718
3719 /* Just to init it somehow (hack for bootstrap)*/
3720 {
3721 TRANSLOG_FILE *file= 0;
3722 log_descriptor.min_file = log_descriptor.max_file= 1;
3723 insert_dynamic(&log_descriptor.open_files, (uchar *)&file);
3724 translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
3725 pop_dynamic(&log_descriptor.open_files);
3726 }
3727
3728 /* Buffers for log writing */
3729 for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
3730 {
3731 if (translog_buffer_init(log_descriptor.buffers + i, i))
3732 goto err;
3733 DBUG_PRINT("info", ("translog_buffer buffer #%u:%p",
3734 i, log_descriptor.buffers + i));
3735 }
3736
3737 /*
3738 last_logno and last_checkpoint_lsn were set in
3739 ma_control_file_create_or_open()
3740 */
3741 logs_found= (last_logno != FILENO_IMPOSSIBLE);
3742
3743 translog_status= (readonly ? TRANSLOG_READONLY : TRANSLOG_OK);
3744 checkpoint_lsn= last_checkpoint_lsn;
3745
3746 if (logs_found)
3747 {
3748 my_bool pageok;
3749 DBUG_PRINT("info", ("log found..."));
3750 /*
3751 TODO: scan directory for aria_log.XXXXXXXX files and find
3752 highest XXXXXXXX & set logs_found
3753 TODO: check that last checkpoint within present log addresses space
3754
3755 find the log end
3756 */
3757 if (LSN_FILE_NO(last_checkpoint_lsn) == FILENO_IMPOSSIBLE)
3758 {
3759 DBUG_ASSERT(LSN_OFFSET(last_checkpoint_lsn) == 0);
3760 /* only last log needs to be checked */
3761 sure_page= MAKE_LSN(last_logno, TRANSLOG_PAGE_SIZE);
3762 }
3763 else
3764 {
3765 sure_page= last_checkpoint_lsn;
3766 DBUG_ASSERT(LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE != 0);
3767 sure_page-= LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE;
3768 }
3769 /* Set horizon to the beginning of the last file first */
3770 log_descriptor.horizon= last_page= MAKE_LSN(last_logno, 0);
3771 if (translog_get_last_page_addr(&last_page, &pageok, no_errors))
3772 {
3773 if (!translog_walk_filenames(log_descriptor.directory,
3774 &translog_callback_search_first))
3775 {
3776 /*
3777 Files was deleted, just start from the next log number, so that
3778 existing tables are in the past.
3779 */
3780 start_file_num= last_logno + 1;
3781 checkpoint_lsn= LSN_IMPOSSIBLE; /* no log so no checkpoint */
3782 logs_found= 0;
3783 }
3784 else
3785 goto err;
3786 }
3787 else if (LSN_OFFSET(last_page) == 0)
3788 {
3789 if (LSN_FILE_NO(last_page) == 1)
3790 {
3791 logs_found= 0; /* file #1 has no pages */
3792 DBUG_PRINT("info", ("log found. But is is empty => no log assumed"));
3793 }
3794 else
3795 {
3796 last_page-= LSN_ONE_FILE;
3797 if (translog_get_last_page_addr(&last_page, &pageok, 0))
3798 goto err;
3799 }
3800 }
3801 if (logs_found)
3802 {
3803 uint32 i;
3804 log_descriptor.min_file= translog_first_file(log_descriptor.horizon, 1);
3805 log_descriptor.max_file= last_logno;
3806 /* Open all files */
3807 if (allocate_dynamic(&log_descriptor.open_files,
3808 log_descriptor.max_file -
3809 log_descriptor.min_file + 1))
3810 goto err;
3811 for (i = log_descriptor.max_file; i >= log_descriptor.min_file; i--)
3812 {
3813 /*
3814 We can't allocate all file together because they will be freed
3815 one by one
3816 */
3817 TRANSLOG_FILE *file= (TRANSLOG_FILE *)my_malloc(PSI_INSTRUMENT_ME, sizeof(TRANSLOG_FILE),
3818 MYF(0));
3819
3820 compile_time_assert(MY_FILEPOS_ERROR > 0xffffffffULL);
3821 if (file == NULL ||
3822 (file->handler.file=
3823 open_logfile_by_number_no_cache(i)) < 0 ||
3824 mysql_file_seek(file->handler.file, 0, SEEK_END, MYF(0)) >=
3825 0xffffffffULL)
3826 {
3827 int j;
3828 for (j= i - log_descriptor.min_file - 1; j > 0; j--)
3829 {
3830 TRANSLOG_FILE *el=
3831 *dynamic_element(&log_descriptor.open_files, j,
3832 TRANSLOG_FILE **);
3833 mysql_file_close(el->handler.file, MYF(MY_WME));
3834 my_free(el);
3835 }
3836 if (file)
3837 {
3838 free(file);
3839 goto err;
3840 }
3841 else
3842 goto err;
3843 }
3844 translog_file_init(file, i, 1);
3845 /* we allocated space so it can't fail */
3846 insert_dynamic(&log_descriptor.open_files, (uchar *)&file);
3847 }
3848 DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
3849 log_descriptor.open_files.elements);
3850 }
3851 }
3852 else if (readonly)
3853 {
3854 /* There is no logs and there is read-only mode => nothing to read */
3855 DBUG_PRINT("error", ("No logs and read-only mode"));
3856 goto err;
3857 }
3858
3859 if (logs_found)
3860 {
3861 TRANSLOG_ADDRESS current_page= sure_page;
3862 my_bool pageok;
3863
3864 DBUG_PRINT("info", ("The log is really present"));
3865 if (sure_page > last_page)
3866 {
3867 my_printf_error(HA_ERR_GENERIC, "Aria engine: log data error\n"
3868 "last_log_page: " LSN_FMT " is less than\n"
3869 "checkpoint page: " LSN_FMT, MYF(0),
3870 LSN_IN_PARTS(last_page), LSN_IN_PARTS(sure_page));
3871 goto err;
3872 }
3873
3874 /* TODO: check page size */
3875
3876 last_valid_page= LSN_IMPOSSIBLE;
3877 /*
3878 Scans and validate pages. We need it to show "outside" only for sure
3879 valid part of the log. If the log was damaged then fixed we have to
3880 cut off damaged part before some other process start write something
3881 in the log.
3882 */
3883 do
3884 {
3885 TRANSLOG_ADDRESS current_file_last_page;
3886 current_file_last_page= current_page;
3887 if (translog_get_last_page_addr(¤t_file_last_page, &pageok, 0))
3888 goto err;
3889 if (!pageok)
3890 {
3891 DBUG_PRINT("error", ("File %lu have no complete last page",
3892 (ulong) LSN_FILE_NO(current_file_last_page)));
3893 old_log_was_recovered= 1;
3894 /* This file is not written till the end so it should be last */
3895 last_page= current_file_last_page;
3896 /* TODO: issue warning */
3897 }
3898 do
3899 {
3900 TRANSLOG_VALIDATOR_DATA data;
3901 TRANSLOG_PAGE_SIZE_BUFF psize_buff;
3902 uchar *page;
3903 data.addr= ¤t_page;
3904 if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL)
3905 goto err;
3906 if (data.was_recovered)
3907 {
3908 DBUG_PRINT("error", ("file no: %lu (%d) "
3909 "rec_offset: 0x%lx (%lu) (%d)",
3910 (ulong) LSN_FILE_NO(current_page),
3911 (uint3korr(page + 3) !=
3912 LSN_FILE_NO(current_page)),
3913 (ulong) LSN_OFFSET(current_page),
3914 (ulong) (LSN_OFFSET(current_page) /
3915 TRANSLOG_PAGE_SIZE),
3916 (uint3korr(page) !=
3917 LSN_OFFSET(current_page) /
3918 TRANSLOG_PAGE_SIZE)));
3919 old_log_was_recovered= 1;
3920 break;
3921 }
3922 old_flags= page[TRANSLOG_PAGE_FLAGS];
3923 last_valid_page= current_page;
3924 current_page+= TRANSLOG_PAGE_SIZE; /* increase offset */
3925 } while (current_page <= current_file_last_page);
3926 current_page+= LSN_ONE_FILE;
3927 current_page= LSN_REPLACE_OFFSET(current_page, TRANSLOG_PAGE_SIZE);
3928 } while (LSN_FILE_NO(current_page) <= LSN_FILE_NO(last_page) &&
3929 !old_log_was_recovered);
3930 if (last_valid_page == LSN_IMPOSSIBLE)
3931 {
3932 /* Panic!!! Even page which should be valid is invalid */
3933 /* TODO: issue error */
3934 goto err;
3935 }
3936 DBUG_PRINT("info", ("Last valid page is in file: %lu "
3937 "offset: %lu (0x%lx) "
3938 "Logs found: %d was recovered: %d "
3939 "flags match: %d",
3940 (ulong) LSN_FILE_NO(last_valid_page),
3941 (ulong) LSN_OFFSET(last_valid_page),
3942 (ulong) LSN_OFFSET(last_valid_page),
3943 logs_found, old_log_was_recovered,
3944 (old_flags == flags)));
3945
3946 /* TODO: check server ID */
3947 if (logs_found && !old_log_was_recovered && old_flags == flags)
3948 {
3949 TRANSLOG_VALIDATOR_DATA data;
3950 TRANSLOG_PAGE_SIZE_BUFF psize_buff;
3951 uchar *page;
3952 uint16 chunk_offset;
3953 data.addr= &last_valid_page;
3954 /* continue old log */
3955 DBUG_ASSERT(LSN_FILE_NO(last_valid_page)==
3956 LSN_FILE_NO(log_descriptor.horizon));
3957 if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL ||
3958 (chunk_offset= translog_get_first_chunk_offset(page)) == 0)
3959 goto err;
3960
3961 /* Puts filled part of old page in the buffer */
3962 log_descriptor.horizon= last_valid_page;
3963 translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
3964 /*
3965 Free space if filled with TRANSLOG_FILLER and first uchar of
3966 real chunk can't be TRANSLOG_FILLER
3967 */
3968 while (chunk_offset < TRANSLOG_PAGE_SIZE &&
3969 page[chunk_offset] != TRANSLOG_FILLER)
3970 {
3971 uint16 chunk_length;
3972 if ((chunk_length=
3973 translog_get_total_chunk_length(page, chunk_offset)) == 0)
3974 goto err;
3975 DBUG_PRINT("info", ("chunk: offset: %u length: %u",
3976 (uint) chunk_offset, (uint) chunk_length));
3977 chunk_offset+= chunk_length;
3978
3979 /* chunk can't cross the page border */
3980 DBUG_ASSERT(chunk_offset <= TRANSLOG_PAGE_SIZE);
3981 }
3982 memcpy(log_descriptor.buffers->buffer, page, chunk_offset);
3983 log_descriptor.bc.buffer->size+= chunk_offset;
3984 log_descriptor.bc.ptr+= chunk_offset;
3985 log_descriptor.bc.current_page_fill= chunk_offset;
3986 log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
3987 (chunk_offset +
3988 LSN_OFFSET(last_valid_page)));
3989 DBUG_PRINT("info", ("Move Page #%u: %p chaser: %d Size: %lu (%lu)",
3990 (uint) log_descriptor.bc.buffer_no,
3991 log_descriptor.bc.buffer,
3992 log_descriptor.bc.chaser,
3993 (ulong) log_descriptor.bc.buffer->size,
3994 (ulong) (log_descriptor.bc.ptr - log_descriptor.bc.
3995 buffer->buffer)));
3996 translog_check_cursor(&log_descriptor.bc);
3997 }
3998 if (!old_log_was_recovered && old_flags == flags)
3999 {
4000 LOGHANDLER_FILE_INFO info;
4001
4002 /*
4003 Accessing &log_descriptor.open_files without mutex is safe
4004 because it is initialization
4005 */
4006 if (translog_read_file_header(&info,
4007 (*dynamic_element(&log_descriptor.
4008 open_files,
4009 0, TRANSLOG_FILE **))->
4010 handler.file))
4011 goto err;
4012 version_changed= (info.maria_version != TRANSLOG_VERSION_ID);
4013 }
4014 }
4015 DBUG_PRINT("info", ("Logs found: %d was recovered: %d",
4016 logs_found, old_log_was_recovered));
4017 if (!logs_found)
4018 {
4019 TRANSLOG_FILE *file= (TRANSLOG_FILE*)my_malloc(PSI_INSTRUMENT_ME,
4020 sizeof(TRANSLOG_FILE), MYF(MY_WME));
4021 DBUG_PRINT("info", ("The log is not found => we will create new log"));
4022 if (file == NULL)
4023 goto err;
4024 /* Start new log system from scratch */
4025 log_descriptor.horizon= MAKE_LSN(start_file_num,
4026 TRANSLOG_PAGE_SIZE); /* header page */
4027 translog_file_init(file, start_file_num, 0);
4028 if (insert_dynamic(&log_descriptor.open_files, (uchar*)&file))
4029 {
4030 my_free(file);
4031 goto err;
4032 }
4033 if ((file->handler.file=
4034 create_logfile_by_number_no_cache(start_file_num)) == -1)
4035 goto err;
4036 log_descriptor.min_file= log_descriptor.max_file= start_file_num;
4037 if (translog_write_file_header())
4038 goto err;
4039 DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
4040 log_descriptor.open_files.elements);
4041
4042 if (ma_control_file_write_and_force(checkpoint_lsn, start_file_num,
4043 max_trid_in_control_file,
4044 recovery_failures))
4045 goto err;
4046 /* assign buffer 0 */
4047 translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
4048 translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
4049 }
4050 else if ((old_log_was_recovered || old_flags != flags || version_changed) &&
4051 !readonly)
4052 {
4053 /* leave the damaged file untouched */
4054 log_descriptor.horizon+= LSN_ONE_FILE;
4055 /* header page */
4056 log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
4057 TRANSLOG_PAGE_SIZE);
4058 if (translog_create_new_file())
4059 goto err;
4060 /*
4061 Buffer system left untouched after recovery => we should init it
4062 (starting from buffer 0)
4063 */
4064 translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
4065 translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
4066 }
4067
4068 /* all LSNs that are on disk are flushed */
4069 log_descriptor.log_start= log_descriptor.sent_to_disk=
4070 log_descriptor.flushed= log_descriptor.horizon;
4071 log_descriptor.in_buffers_only= log_descriptor.bc.buffer->offset;
4072 log_descriptor.max_lsn= LSN_IMPOSSIBLE; /* set to 0 */
4073 /*
4074 Now 'flushed' is set to 'horizon' value, but 'horizon' is (potentially)
4075 address of the next LSN and we want indicate that all LSNs that are
4076 already on the disk are flushed so we need decrease horizon on 1 (we are
4077 sure that there is no LSN on the disk which is greater then 'flushed'
4078 and there will not be LSN created that is equal or less then the value
4079 of the 'flushed').
4080 */
4081 log_descriptor.flushed--; /* offset decreased */
4082 log_descriptor.sent_to_disk--; /* offset decreased */
4083 /*
4084 Log records will refer to a MARIA_SHARE by a unique 2-byte id; set up
4085 structures for generating 2-byte ids:
4086 */
4087 id_to_share= (MARIA_SHARE **) my_malloc(PSI_INSTRUMENT_ME, SHARE_ID_MAX * sizeof(MARIA_SHARE*),
4088 MYF(MY_WME | MY_ZEROFILL));
4089 if (unlikely(!id_to_share))
4090 goto err;
4091 id_to_share--; /* min id is 1 */
4092
4093 /* Check the last LSN record integrity */
4094 if (logs_found)
4095 {
4096 TRANSLOG_SCANNER_DATA scanner;
4097 TRANSLOG_ADDRESS page_addr;
4098 LSN last_lsn= LSN_IMPOSSIBLE;
4099 /*
4100 take very last page address and try to find LSN record on it
4101 if it fail take address of previous page and so on
4102 */
4103 page_addr= (log_descriptor.horizon -
4104 ((log_descriptor.horizon - 1) % TRANSLOG_PAGE_SIZE + 1));
4105 if (translog_scanner_init(page_addr, 1, &scanner, 1))
4106 goto err;
4107 scanner.page_offset= page_overhead[scanner.page[TRANSLOG_PAGE_FLAGS]];
4108 for (;;)
4109 {
4110 uint chunk_1byte;
4111 chunk_1byte= scanner.page[scanner.page_offset];
4112 while (!translog_is_LSN_chunk(chunk_1byte) &&
4113 scanner.page != END_OF_LOG &&
4114 scanner.page[scanner.page_offset] != TRANSLOG_FILLER &&
4115 scanner.page_addr == page_addr)
4116 {
4117 if (translog_get_next_chunk(&scanner))
4118 {
4119 translog_destroy_scanner(&scanner);
4120 goto err;
4121 }
4122 if (scanner.page != END_OF_LOG)
4123 chunk_1byte= scanner.page[scanner.page_offset];
4124 }
4125 if (translog_is_LSN_chunk(chunk_1byte))
4126 {
4127 last_lsn= scanner.page_addr + scanner.page_offset;
4128 if (translog_get_next_chunk(&scanner))
4129 {
4130 translog_destroy_scanner(&scanner);
4131 goto err;
4132 }
4133 if (scanner.page == END_OF_LOG)
4134 break; /* it was the last record */
4135 chunk_1byte= scanner.page[scanner.page_offset];
4136 continue; /* try to find other record on this page */
4137 }
4138
4139 if (last_lsn != LSN_IMPOSSIBLE)
4140 break; /* there is no more records on the page */
4141
4142 /* We have to make step back */
4143 if (unlikely(LSN_OFFSET(page_addr) == TRANSLOG_PAGE_SIZE))
4144 {
4145 uint32 file_no= LSN_FILE_NO(page_addr);
4146 my_bool last_page_ok;
4147 /* it is beginning of the current file */
4148 if (unlikely(file_no == 1))
4149 {
4150 /*
4151 It is beginning of the log => there is no LSNs in the log =>
4152 There is no harm in leaving it "as-is".
4153 */
4154 log_descriptor.previous_flush_horizon= log_descriptor.horizon;
4155 DBUG_PRINT("info", ("previous_flush_horizon: " LSN_FMT,
4156 LSN_IN_PARTS(log_descriptor.
4157 previous_flush_horizon)));
4158 DBUG_RETURN(0);
4159 }
4160 file_no--;
4161 page_addr= MAKE_LSN(file_no, TRANSLOG_PAGE_SIZE);
4162 translog_get_last_page_addr(&page_addr, &last_page_ok, 0);
4163 /* page should be OK as it is not the last file */
4164 DBUG_ASSERT(last_page_ok);
4165 }
4166 else
4167 {
4168 page_addr-= TRANSLOG_PAGE_SIZE;
4169 }
4170 translog_destroy_scanner(&scanner);
4171 if (translog_scanner_init(page_addr, 1, &scanner, 1))
4172 goto err;
4173 scanner.page_offset= page_overhead[scanner.page[TRANSLOG_PAGE_FLAGS]];
4174 }
4175 translog_destroy_scanner(&scanner);
4176
4177 /* Now scanner points to the last LSN chunk, lets check it */
4178 {
4179 TRANSLOG_HEADER_BUFFER rec;
4180 translog_size_t rec_len;
4181 int len;
4182 uchar buffer[1];
4183 DBUG_PRINT("info", ("going to check the last found record " LSN_FMT,
4184 LSN_IN_PARTS(last_lsn)));
4185
4186 len=
4187 translog_read_record_header(last_lsn, &rec);
4188 if (unlikely (len == RECHEADER_READ_ERROR ||
4189 len == RECHEADER_READ_EOF))
4190 {
4191 DBUG_PRINT("error", ("unexpected end of log or record during "
4192 "reading record header: " LSN_FMT " len: %d",
4193 LSN_IN_PARTS(last_lsn), len));
4194 if (readonly)
4195 log_descriptor.log_start= log_descriptor.horizon= last_lsn;
4196 else if (translog_truncate_log(last_lsn))
4197 {
4198 translog_free_record_header(&rec);
4199 goto err;
4200 }
4201 }
4202 else
4203 {
4204 DBUG_ASSERT(last_lsn == rec.lsn);
4205 if (likely(rec.record_length != 0))
4206 {
4207 /*
4208 Reading the last byte of record will trigger scanning all
4209 record chunks for now
4210 */
4211 rec_len= translog_read_record(rec.lsn, rec.record_length - 1, 1,
4212 buffer, NULL);
4213 if (rec_len != 1)
4214 {
4215 DBUG_PRINT("error", ("unexpected end of log or record during "
4216 "reading record body: " LSN_FMT " len: %d",
4217 LSN_IN_PARTS(rec.lsn),
4218 len));
4219 if (readonly)
4220 log_descriptor.log_start= log_descriptor.horizon= last_lsn;
4221
4222 else if (translog_truncate_log(last_lsn))
4223 {
4224 translog_free_record_header(&rec);
4225 goto err;
4226 }
4227 }
4228 }
4229 }
4230 translog_free_record_header(&rec);
4231 }
4232 }
4233 log_descriptor.previous_flush_horizon= log_descriptor.horizon;
4234 DBUG_PRINT("info", ("previous_flush_horizon: " LSN_FMT,
4235 LSN_IN_PARTS(log_descriptor.previous_flush_horizon)));
4236 DBUG_RETURN(0);
4237 err:
4238 ma_message_no_user(0, "log initialization failed");
4239 DBUG_RETURN(1);
4240 }
4241
4242
4243 /*
4244 @brief Free transaction log file buffer.
4245
4246 @param buffer_no The buffer to free
4247 */
4248
translog_buffer_destroy(struct st_translog_buffer * buffer)4249 static void translog_buffer_destroy(struct st_translog_buffer *buffer)
4250 {
4251 DBUG_ENTER("translog_buffer_destroy");
4252 DBUG_PRINT("enter",
4253 ("Buffer #%u: %p file: %d offset: " LSN_FMT " size: %lu",
4254 (uint) buffer->buffer_no, buffer,
4255 (buffer->file ? buffer->file->handler.file : -1),
4256 LSN_IN_PARTS(buffer->offset),
4257 (ulong) buffer->size));
4258 if (buffer->file != NULL)
4259 {
4260 /*
4261 We ignore errors here, because we can't do something about it
4262 (it is shutting down)
4263
4264 We also have to take the locks even if there can't be any other
4265 threads running, because translog_buffer_flush()
4266 requires that we have the buffer locked.
4267 */
4268 translog_buffer_lock(buffer);
4269 translog_buffer_flush(buffer);
4270 translog_buffer_unlock(buffer);
4271 }
4272 DBUG_PRINT("info", ("Destroy mutex: %p", &buffer->mutex));
4273 mysql_mutex_destroy(&buffer->mutex);
4274 mysql_cond_destroy(&buffer->waiting_filling_buffer);
4275 DBUG_VOID_RETURN;
4276 }
4277
4278
4279 /*
4280 Free log handler resources
4281
4282 SYNOPSIS
4283 translog_destroy()
4284 */
4285
translog_destroy()4286 void translog_destroy()
4287 {
4288 TRANSLOG_FILE **file;
4289 uint i;
4290 uint8 current_buffer;
4291 DBUG_ENTER("translog_destroy");
4292
4293 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
4294 translog_status == TRANSLOG_READONLY);
4295 translog_lock();
4296 current_buffer= log_descriptor.bc.buffer_no;
4297 translog_status= (translog_status == TRANSLOG_READONLY ?
4298 TRANSLOG_UNINITED :
4299 TRANSLOG_SHUTDOWN);
4300 if (log_descriptor.bc.buffer->file != NULL)
4301 translog_finish_page(&log_descriptor.horizon, &log_descriptor.bc);
4302 translog_unlock();
4303
4304 for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
4305 {
4306 struct st_translog_buffer *buffer= (log_descriptor.buffers +
4307 ((i + current_buffer + 1) %
4308 TRANSLOG_BUFFERS_NO));
4309 translog_buffer_destroy(buffer);
4310 }
4311 translog_status= TRANSLOG_UNINITED;
4312
4313 /* close files */
4314 while ((file= (TRANSLOG_FILE **)pop_dynamic(&log_descriptor.open_files)))
4315 translog_close_log_file(*file);
4316 mysql_mutex_destroy(&log_descriptor.sent_to_disk_lock);
4317 mysql_mutex_destroy(&log_descriptor.file_header_lock);
4318 mysql_mutex_destroy(&log_descriptor.unfinished_files_lock);
4319 mysql_mutex_destroy(&log_descriptor.purger_lock);
4320 mysql_mutex_destroy(&log_descriptor.log_flush_lock);
4321 mysql_mutex_destroy(&log_descriptor.dirty_buffer_mask_lock);
4322 mysql_cond_destroy(&log_descriptor.log_flush_cond);
4323 mysql_cond_destroy(&log_descriptor.new_goal_cond);
4324 mysql_rwlock_destroy(&log_descriptor.open_files_lock);
4325 delete_dynamic(&log_descriptor.open_files);
4326 delete_dynamic(&log_descriptor.unfinished_files);
4327
4328 if (log_descriptor.directory_fd >= 0)
4329 mysql_file_close(log_descriptor.directory_fd, MYF(MY_WME));
4330 if (id_to_share != NULL)
4331 my_free(id_to_share + 1);
4332 DBUG_VOID_RETURN;
4333 }
4334
4335
4336 /*
4337 @brief Starts new page.
4338
4339 @param horizon \ Position in file and buffer where we are
4340 @param cursor /
4341 @param prev_buffer Buffer which should be flushed will be assigned here.
4342 This is always set (to NULL if nothing to flush).
4343
4344 @note We do not want to flush the buffer immediately because we want to
4345 let caller of this function first advance 'horizon' pointer and unlock the
4346 loghandler and only then flush the log which can take some time.
4347
4348 @retval 0 OK
4349 @retval 1 Error
4350 */
4351
translog_page_next(TRANSLOG_ADDRESS * horizon,struct st_buffer_cursor * cursor,struct st_translog_buffer ** prev_buffer)4352 static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon,
4353 struct st_buffer_cursor *cursor,
4354 struct st_translog_buffer **prev_buffer)
4355 {
4356 struct st_translog_buffer *buffer= cursor->buffer;
4357 DBUG_ENTER("translog_page_next");
4358
4359 *prev_buffer= NULL;
4360 if ((cursor->ptr + TRANSLOG_PAGE_SIZE >
4361 cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER) ||
4362 (LSN_OFFSET(*horizon) >
4363 log_descriptor.log_file_max_size - TRANSLOG_PAGE_SIZE))
4364 {
4365 DBUG_PRINT("info", ("Switch to next buffer Buffer Size: %lu (%lu) => %d "
4366 "File size: %lu max: %lu => %d",
4367 (ulong) cursor->buffer->size,
4368 (ulong) (cursor->ptr - cursor->buffer->buffer),
4369 (cursor->ptr + TRANSLOG_PAGE_SIZE >
4370 cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER),
4371 (ulong) LSN_OFFSET(*horizon),
4372 (ulong) log_descriptor.log_file_max_size,
4373 (LSN_OFFSET(*horizon) >
4374 (log_descriptor.log_file_max_size -
4375 TRANSLOG_PAGE_SIZE))));
4376 if (translog_buffer_next(horizon, cursor,
4377 LSN_OFFSET(*horizon) >
4378 (log_descriptor.log_file_max_size -
4379 TRANSLOG_PAGE_SIZE)))
4380 DBUG_RETURN(1);
4381 *prev_buffer= buffer;
4382 DBUG_PRINT("info", ("Buffer #%u (%p): have to be flushed",
4383 (uint) buffer->buffer_no, buffer));
4384 }
4385 else
4386 {
4387 DBUG_PRINT("info", ("Use the same buffer #%u (%p): "
4388 "Buffer Size: %lu (%lu)",
4389 (uint) buffer->buffer_no,
4390 buffer,
4391 (ulong) cursor->buffer->size,
4392 (ulong) (cursor->ptr - cursor->buffer->buffer)));
4393 translog_finish_page(horizon, cursor);
4394 translog_new_page_header(horizon, cursor);
4395 }
4396 DBUG_RETURN(0);
4397 }
4398
4399
4400 /*
4401 Write data of given length to the current page
4402
4403 SYNOPSIS
4404 translog_write_data_on_page()
4405 horizon \ Pointers on file and buffer
4406 cursor /
4407 length IN length of the chunk
4408 buffer buffer with data
4409
4410 RETURN
4411 0 OK
4412 1 Error
4413 */
4414
translog_write_data_on_page(TRANSLOG_ADDRESS * horizon,struct st_buffer_cursor * cursor,translog_size_t length,uchar * buffer)4415 static my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon,
4416 struct st_buffer_cursor *cursor,
4417 translog_size_t length,
4418 uchar *buffer)
4419 {
4420 DBUG_ENTER("translog_write_data_on_page");
4421 DBUG_PRINT("enter", ("Chunk length: %lu Page size %u",
4422 (ulong) length, (uint) cursor->current_page_fill));
4423 DBUG_ASSERT(length > 0);
4424 DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
4425 DBUG_ASSERT(length + cursor->ptr <= cursor->buffer->buffer +
4426 TRANSLOG_WRITE_BUFFER);
4427
4428 memcpy(cursor->ptr, buffer, length);
4429 cursor->ptr+= length;
4430 (*horizon)+= length; /* adds offset */
4431 cursor->current_page_fill+= length;
4432 if (!cursor->chaser)
4433 cursor->buffer->size+= length;
4434 DBUG_PRINT("info", ("Write data buffer #%u: %p "
4435 "chaser: %d Size: %lu (%lu)",
4436 (uint) cursor->buffer->buffer_no, cursor->buffer,
4437 cursor->chaser, (ulong) cursor->buffer->size,
4438 (ulong) (cursor->ptr - cursor->buffer->buffer)));
4439 translog_check_cursor(cursor);
4440
4441 DBUG_RETURN(0);
4442 }
4443
4444
4445 /*
4446 Write data from parts of given length to the current page
4447
4448 SYNOPSIS
4449 translog_write_parts_on_page()
4450 horizon \ Pointers on file and buffer
4451 cursor /
4452 length IN length of the chunk
4453 parts IN/OUT chunk source
4454
4455 RETURN
4456 0 OK
4457 1 Error
4458 */
4459
translog_write_parts_on_page(TRANSLOG_ADDRESS * horizon,struct st_buffer_cursor * cursor,translog_size_t length,struct st_translog_parts * parts)4460 static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon,
4461 struct st_buffer_cursor *cursor,
4462 translog_size_t length,
4463 struct st_translog_parts *parts)
4464 {
4465 translog_size_t left= length;
4466 uint cur= (uint) parts->current;
4467 DBUG_ENTER("translog_write_parts_on_page");
4468 DBUG_PRINT("enter", ("Chunk length: %lu parts: %u of %u. Page size: %u "
4469 "Buffer size: %lu (%lu)",
4470 (ulong) length,
4471 (uint) (cur + 1), (uint) parts->elements,
4472 (uint) cursor->current_page_fill,
4473 (ulong) cursor->buffer->size,
4474 (ulong) (cursor->ptr - cursor->buffer->buffer)));
4475 DBUG_ASSERT(length > 0);
4476 DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
4477 DBUG_ASSERT(length + cursor->ptr <= cursor->buffer->buffer +
4478 TRANSLOG_WRITE_BUFFER);
4479
4480 do
4481 {
4482 translog_size_t len;
4483 LEX_CUSTRING *part;
4484 const uchar *buff;
4485
4486 DBUG_ASSERT(cur < parts->elements);
4487 part= parts->parts + cur;
4488 buff= part->str;
4489 DBUG_PRINT("info", ("Part: %u Length: %lu left: %lu buff: %p",
4490 (uint) (cur + 1), (ulong) part->length, (ulong) left,
4491 buff));
4492
4493 if (part->length > left)
4494 {
4495 /* we should write less then the current part */
4496 len= left;
4497 part->length-= len;
4498 part->str+= len;
4499 DBUG_PRINT("info", ("Set new part: %u Length: %lu",
4500 (uint) (cur + 1), (ulong) part->length));
4501 }
4502 else
4503 {
4504 len= (translog_size_t) part->length;
4505 cur++;
4506 DBUG_PRINT("info", ("moved to next part (len: %lu)", (ulong) len));
4507 }
4508 DBUG_PRINT("info", ("copy: %p <- %p %u",
4509 cursor->ptr, buff, len));
4510 if (likely(len))
4511 {
4512 memcpy(cursor->ptr, buff, len);
4513 left-= len;
4514 cursor->ptr+= len;
4515 }
4516 } while (left);
4517
4518 DBUG_PRINT("info", ("Horizon: " LSN_FMT " Length %u(0x%x)",
4519 LSN_IN_PARTS(*horizon),
4520 length, length));
4521 parts->current= cur;
4522 (*horizon)+= length; /* offset increasing */
4523 cursor->current_page_fill+= length;
4524 if (!cursor->chaser)
4525 cursor->buffer->size+= length;
4526 /*
4527 We do not not updating parts->total_record_length here because it is
4528 need only before writing record to have total length
4529 */
4530 DBUG_PRINT("info", ("Write parts buffer #%u: %p "
4531 "chaser: %d Size: %lu (%lu) "
4532 "Horizon: " LSN_FMT " buff offset: 0x%x",
4533 (uint) cursor->buffer->buffer_no, cursor->buffer,
4534 cursor->chaser, (ulong) cursor->buffer->size,
4535 (ulong) (cursor->ptr - cursor->buffer->buffer),
4536 LSN_IN_PARTS(*horizon),
4537 (uint) (LSN_OFFSET(cursor->buffer->offset) +
4538 cursor->buffer->size)));
4539 translog_check_cursor(cursor);
4540
4541 DBUG_RETURN(0);
4542 }
4543
4544
4545 /*
4546 Put 1 group chunk type 0 header into parts array
4547
4548 SYNOPSIS
4549 translog_write_variable_record_1group_header()
4550 parts Descriptor of record source parts
4551 type The log record type
4552 short_trid Short transaction ID or 0 if it has no sense
4553 header_length Calculated header length of chunk type 0
4554 chunk0_header Buffer for the chunk header writing
4555 */
4556
4557 static void
translog_write_variable_record_1group_header(struct st_translog_parts * parts,enum translog_record_type type,SHORT_TRANSACTION_ID short_trid,uint16 header_length,uchar * chunk0_header)4558 translog_write_variable_record_1group_header(struct st_translog_parts *parts,
4559 enum translog_record_type type,
4560 SHORT_TRANSACTION_ID short_trid,
4561 uint16 header_length,
4562 uchar *chunk0_header)
4563 {
4564 LEX_CUSTRING *part;
4565 DBUG_ASSERT(parts->current != 0); /* first part is left for header */
4566 part= parts->parts + (--parts->current);
4567 parts->total_record_length+= (translog_size_t) (part->length= header_length);
4568 part->str= chunk0_header;
4569 /* puts chunk type */
4570 *chunk0_header= (uchar) (type | TRANSLOG_CHUNK_LSN);
4571 int2store(chunk0_header + 1, short_trid);
4572 /* puts record length */
4573 translog_write_variable_record_1group_code_len(chunk0_header + 3,
4574 parts->record_length,
4575 header_length);
4576 /* puts 0 as chunk length which indicate 1 group record */
4577 int2store(chunk0_header + header_length - 2, 0);
4578 }
4579
4580
4581 /*
4582 Increase number of writers for this buffer
4583
4584 SYNOPSIS
4585 translog_buffer_increase_writers()
4586 buffer target buffer
4587 */
4588
4589 static inline void
translog_buffer_increase_writers(struct st_translog_buffer * buffer)4590 translog_buffer_increase_writers(struct st_translog_buffer *buffer)
4591 {
4592 DBUG_ENTER("translog_buffer_increase_writers");
4593 translog_buffer_lock_assert_owner(buffer);
4594 buffer->copy_to_buffer_in_progress++;
4595 DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u %p progress: %d",
4596 (uint) buffer->buffer_no, buffer,
4597 buffer->copy_to_buffer_in_progress));
4598 DBUG_VOID_RETURN;
4599 }
4600
4601
4602 /*
4603 Decrease number of writers for this buffer
4604
4605 SYNOPSIS
4606 translog_buffer_decrease_writers()
4607 buffer target buffer
4608 */
4609
translog_buffer_decrease_writers(struct st_translog_buffer * buffer)4610 static void translog_buffer_decrease_writers(struct st_translog_buffer *buffer)
4611 {
4612 DBUG_ENTER("translog_buffer_decrease_writers");
4613 translog_buffer_lock_assert_owner(buffer);
4614 buffer->copy_to_buffer_in_progress--;
4615 DBUG_PRINT("info",
4616 ("copy_to_buffer_in_progress. Buffer #%u %p progress: %d",
4617 (uint) buffer->buffer_no, buffer,
4618 buffer->copy_to_buffer_in_progress));
4619 if (buffer->copy_to_buffer_in_progress == 0)
4620 mysql_cond_broadcast(&buffer->waiting_filling_buffer);
4621 DBUG_VOID_RETURN;
4622 }
4623
4624
4625 /**
4626 @brief Skip to the next page for chaser (thread which advanced horizon
4627 pointer and now feeling the buffer)
4628
4629 @param horizon \ Pointers on file position and buffer
4630 @param cursor /
4631
4632 @retval 1 OK
4633 @retval 0 Error
4634 */
4635
translog_chaser_page_next(TRANSLOG_ADDRESS * horizon,struct st_buffer_cursor * cursor)4636 static my_bool translog_chaser_page_next(TRANSLOG_ADDRESS *horizon,
4637 struct st_buffer_cursor *cursor)
4638 {
4639 struct st_translog_buffer *buffer_to_flush;
4640 my_bool rc;
4641 DBUG_ENTER("translog_chaser_page_next");
4642 DBUG_ASSERT(cursor->chaser);
4643 rc= translog_page_next(horizon, cursor, &buffer_to_flush);
4644 if (buffer_to_flush != NULL)
4645 {
4646 translog_buffer_lock(buffer_to_flush);
4647 translog_buffer_decrease_writers(buffer_to_flush);
4648 used_buffs_register_unlock(&cursor->buffs, buffer_to_flush);
4649 if (!rc)
4650 rc= translog_buffer_flush(buffer_to_flush);
4651 translog_buffer_unlock(buffer_to_flush);
4652 }
4653 DBUG_RETURN(rc);
4654 }
4655
4656 /*
4657 Put chunk 2 from new page beginning
4658
4659 SYNOPSIS
4660 translog_write_variable_record_chunk2_page()
4661 parts Descriptor of record source parts
4662 horizon \ Pointers on file position and buffer
4663 cursor /
4664
4665 RETURN
4666 0 OK
4667 1 Error
4668 */
4669
4670 static my_bool
translog_write_variable_record_chunk2_page(struct st_translog_parts * parts,TRANSLOG_ADDRESS * horizon,struct st_buffer_cursor * cursor)4671 translog_write_variable_record_chunk2_page(struct st_translog_parts *parts,
4672 TRANSLOG_ADDRESS *horizon,
4673 struct st_buffer_cursor *cursor)
4674 {
4675 uchar chunk2_header[1];
4676 DBUG_ENTER("translog_write_variable_record_chunk2_page");
4677 chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;
4678
4679 if (translog_chaser_page_next(horizon, cursor))
4680 DBUG_RETURN(1);
4681
4682 /* Puts chunk type */
4683 translog_write_data_on_page(horizon, cursor, 1, chunk2_header);
4684 /* Puts chunk body */
4685 translog_write_parts_on_page(horizon, cursor,
4686 log_descriptor.page_capacity_chunk_2, parts);
4687 DBUG_RETURN(0);
4688 }
4689
4690
4691 /*
4692 Put chunk 3 of requested length in the buffer from new page beginning
4693
4694 SYNOPSIS
4695 translog_write_variable_record_chunk3_page()
4696 parts Descriptor of record source parts
4697 length Length of this chunk
4698 horizon \ Pointers on file position and buffer
4699 cursor /
4700
4701 RETURN
4702 0 OK
4703 1 Error
4704 */
4705
4706 static my_bool
translog_write_variable_record_chunk3_page(struct st_translog_parts * parts,uint16 length,TRANSLOG_ADDRESS * horizon,struct st_buffer_cursor * cursor)4707 translog_write_variable_record_chunk3_page(struct st_translog_parts *parts,
4708 uint16 length,
4709 TRANSLOG_ADDRESS *horizon,
4710 struct st_buffer_cursor *cursor)
4711 {
4712 LEX_CUSTRING *part;
4713 uchar chunk3_header[1 + 2];
4714 DBUG_ENTER("translog_write_variable_record_chunk3_page");
4715
4716 if (translog_chaser_page_next(horizon, cursor))
4717 DBUG_RETURN(1);
4718
4719 if (length == 0)
4720 {
4721 /* It was call to write page header only (no data for chunk 3) */
4722 DBUG_PRINT("info", ("It is a call to make page header only"));
4723 DBUG_RETURN(0);
4724 }
4725
4726 DBUG_ASSERT(parts->current != 0); /* first part is left for header */
4727 part= parts->parts + (--parts->current);
4728 parts->total_record_length+= (translog_size_t) (part->length= 1 + 2);
4729 part->str= chunk3_header;
4730 /* Puts chunk type */
4731 *chunk3_header= (uchar) (TRANSLOG_CHUNK_LNGTH);
4732 /* Puts chunk length */
4733 int2store(chunk3_header + 1, length);
4734
4735 translog_write_parts_on_page(horizon, cursor, length + 1 + 2, parts);
4736 DBUG_RETURN(0);
4737 }
4738
4739 /*
4740 Move log pointer (horizon) on given number pages starting from next page,
4741 and given offset on the last page
4742
4743 SYNOPSIS
4744 translog_advance_pointer()
4745 pages Number of full pages starting from the next one
4746 last_page_data Plus this data on the last page
4747
4748 RETURN
4749 0 OK
4750 1 Error
4751 */
4752
translog_advance_pointer(int pages,uint16 last_page_data,TRUNSLOG_USED_BUFFERS * buffs)4753 static my_bool translog_advance_pointer(int pages, uint16 last_page_data,
4754 TRUNSLOG_USED_BUFFERS *buffs)
4755 {
4756 translog_size_t last_page_offset= (log_descriptor.page_overhead +
4757 last_page_data);
4758 translog_size_t offset= (TRANSLOG_PAGE_SIZE -
4759 log_descriptor.bc.current_page_fill +
4760 pages * TRANSLOG_PAGE_SIZE + last_page_offset);
4761 translog_size_t buffer_end_offset, file_end_offset, min_offset;
4762 DBUG_ENTER("translog_advance_pointer");
4763 DBUG_PRINT("enter", ("Pointer: " LSN_FMT " + %u + %u pages + %u + %u",
4764 LSN_IN_PARTS(log_descriptor.horizon),
4765 (uint) (TRANSLOG_PAGE_SIZE -
4766 log_descriptor.bc.current_page_fill),
4767 pages, (uint) log_descriptor.page_overhead,
4768 (uint) last_page_data));
4769 translog_lock_assert_owner();
4770
4771 used_buffs_init(buffs);
4772
4773 if (pages == -1)
4774 {
4775 /*
4776 It is special case when we advance the pointer on the same page.
4777 It can happened when we write last part of multi-group record.
4778 */
4779 DBUG_ASSERT(last_page_data + log_descriptor.bc.current_page_fill <=
4780 TRANSLOG_PAGE_SIZE);
4781 offset= last_page_data;
4782 last_page_offset= log_descriptor.bc.current_page_fill + last_page_data;
4783 goto end;
4784 }
4785 DBUG_PRINT("info", ("last_page_offset %lu", (ulong) last_page_offset));
4786 DBUG_ASSERT(last_page_offset <= TRANSLOG_PAGE_SIZE);
4787
4788 /*
4789 The loop will be executed 1-3 times. Usually we advance the
4790 pointer to fill only the current buffer (if we have more then 1/2 of
4791 buffer free or 2 buffers (rest of current and all next). In case of
4792 really huge record end where we write last group with "table of
4793 content" of all groups and ignore buffer borders we can occupy
4794 3 buffers.
4795 */
4796 for (;;)
4797 {
4798 uint8 new_buffer_no;
4799 struct st_translog_buffer *new_buffer;
4800 struct st_translog_buffer *old_buffer;
4801 buffer_end_offset= TRANSLOG_WRITE_BUFFER - log_descriptor.bc.buffer->size;
4802 if (likely(log_descriptor.log_file_max_size >=
4803 LSN_OFFSET(log_descriptor.horizon)))
4804 file_end_offset= (log_descriptor.log_file_max_size -
4805 LSN_OFFSET(log_descriptor.horizon));
4806 else
4807 {
4808 /*
4809 We already have written more then current file limit allow,
4810 So we will finish this page and start new file
4811 */
4812 file_end_offset= (TRANSLOG_PAGE_SIZE -
4813 log_descriptor.bc.current_page_fill);
4814 }
4815 DBUG_PRINT("info", ("offset: %u buffer_end_offs: %u, "
4816 "file_end_offs: %u",
4817 offset, buffer_end_offset,
4818 file_end_offset));
4819 DBUG_PRINT("info", ("Buff #%u %u (%p) offset 0x%x + size 0x%x = "
4820 "0x%x (0x%x)",
4821 log_descriptor.bc.buffer->buffer_no,
4822 log_descriptor.bc.buffer_no,
4823 log_descriptor.bc.buffer,
4824 (uint) LSN_OFFSET(log_descriptor.bc.buffer->offset),
4825 log_descriptor.bc.buffer->size,
4826 (uint) (LSN_OFFSET(log_descriptor.bc.buffer->offset) +
4827 log_descriptor.bc.buffer->size),
4828 (uint) LSN_OFFSET(log_descriptor.horizon)));
4829 DBUG_ASSERT(LSN_OFFSET(log_descriptor.bc.buffer->offset) +
4830 log_descriptor.bc.buffer->size ==
4831 LSN_OFFSET(log_descriptor.horizon));
4832
4833 if (offset <= buffer_end_offset && offset <= file_end_offset)
4834 break;
4835 old_buffer= log_descriptor.bc.buffer;
4836 new_buffer_no= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO;
4837 new_buffer= log_descriptor.buffers + new_buffer_no;
4838
4839 translog_buffer_lock(new_buffer);
4840 #ifndef DBUG_OFF
4841 {
4842 TRANSLOG_ADDRESS offset= new_buffer->offset;
4843 TRANSLOG_FILE *file= new_buffer->file;
4844 uint8 ver= new_buffer->ver;
4845 translog_lock_assert_owner();
4846 #endif
4847 translog_wait_for_buffer_free(new_buffer);
4848 #ifndef DBUG_OFF
4849 /* We keep the handler locked so nobody can start this new buffer */
4850 DBUG_ASSERT((offset == new_buffer->offset && new_buffer->file == NULL &&
4851 (file == NULL ? ver : (uint8)(ver + 1)) ==
4852 new_buffer->ver) ||
4853 translog_status == TRANSLOG_READONLY);
4854 }
4855 #endif
4856
4857 min_offset= MY_MIN(buffer_end_offset, file_end_offset);
4858 /* TODO: check is it ptr or size enough */
4859 log_descriptor.bc.buffer->size+= min_offset;
4860 log_descriptor.bc.ptr+= min_offset;
4861 DBUG_PRINT("info", ("NewP buffer #%u: %p chaser: %d Size: %lu (%lu)",
4862 (uint) log_descriptor.bc.buffer->buffer_no,
4863 log_descriptor.bc.buffer,
4864 log_descriptor.bc.chaser,
4865 (ulong) log_descriptor.bc.buffer->size,
4866 (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
4867 buffer->buffer)));
4868 DBUG_ASSERT((ulong) (log_descriptor.bc.ptr -
4869 log_descriptor.bc.buffer->buffer) ==
4870 log_descriptor.bc.buffer->size);
4871 DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
4872 log_descriptor.bc.buffer_no);
4873 translog_buffer_increase_writers(log_descriptor.bc.buffer);
4874 // register for case of error
4875 used_buffs_add(buffs, log_descriptor.bc.buffer);
4876
4877 if (file_end_offset <= buffer_end_offset)
4878 {
4879 log_descriptor.horizon+= LSN_ONE_FILE;
4880 log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
4881 TRANSLOG_PAGE_SIZE);
4882 DBUG_PRINT("info", ("New file: %lu",
4883 (ulong) LSN_FILE_NO(log_descriptor.horizon)));
4884 if (translog_create_new_file())
4885 {
4886 struct st_translog_buffer *ob= log_descriptor.bc.buffer;
4887 translog_buffer_unlock(ob);
4888 used_buffs_urgent_unlock(buffs);
4889 translog_buffer_lock(ob);
4890 DBUG_RETURN(1);
4891 }
4892 }
4893 else
4894 {
4895 DBUG_PRINT("info", ("The same file"));
4896 log_descriptor.horizon+= min_offset; /* offset increasing */
4897 }
4898 translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
4899 old_buffer->next_buffer_offset= new_buffer->offset;
4900 new_buffer->prev_buffer_offset= old_buffer->offset;
4901 translog_buffer_unlock(old_buffer);
4902 offset-= min_offset;
4903 }
4904 DBUG_PRINT("info", ("drop write_counter"));
4905 log_descriptor.bc.write_counter= 0;
4906 log_descriptor.bc.previous_offset= 0;
4907 end:
4908 log_descriptor.bc.ptr+= offset;
4909 log_descriptor.bc.buffer->size+= offset;
4910 translog_buffer_increase_writers(log_descriptor.bc.buffer);
4911 used_buffs_add(buffs, log_descriptor.bc.buffer);
4912 log_descriptor.horizon+= offset; /* offset increasing */
4913 log_descriptor.bc.current_page_fill= last_page_offset;
4914 DBUG_PRINT("info", ("NewP buffer #%u: %p chaser: %d Size: %lu (%lu) "
4915 "offset: %u last page: %u",
4916 (uint) log_descriptor.bc.buffer->buffer_no,
4917 log_descriptor.bc.buffer,
4918 log_descriptor.bc.chaser,
4919 (ulong) log_descriptor.bc.buffer->size,
4920 (ulong) (log_descriptor.bc.ptr -
4921 log_descriptor.bc.buffer->
4922 buffer), (uint) offset,
4923 (uint) last_page_offset));
4924 DBUG_PRINT("info",
4925 ("pointer moved to: " LSN_FMT,
4926 LSN_IN_PARTS(log_descriptor.horizon)));
4927 translog_check_cursor(&log_descriptor.bc);
4928 log_descriptor.bc.protected= 0;
4929 DBUG_RETURN(0);
4930 }
4931
4932 static void
used_buffs_add(TRUNSLOG_USED_BUFFERS * buffs,struct st_translog_buffer * buff)4933 used_buffs_add(TRUNSLOG_USED_BUFFERS *buffs,
4934 struct st_translog_buffer *buff)
4935 {
4936 DBUG_ENTER("used_buffs_add");
4937 DBUG_PRINT("enter", ("ADD buffs: %p unlk %u (%p) wrt_ptr: %u (%p)"
4938 " buff %p (%u)",
4939 buffs,
4940 buffs->wrt_ptr, buffs->buff[buffs->wrt_ptr],
4941 buffs->unlck_ptr, buffs->buff[buffs->unlck_ptr],
4942 buff, buff->buffer_no));
4943 DBUG_ASSERT(buffs->wrt_ptr < MAX_TRUNSLOG_USED_BUFFERS);
4944 buffs->buff[buffs->wrt_ptr++]= buff;
4945 DBUG_VOID_RETURN;
4946 }
4947
4948 static void
used_buffs_register_unlock(TRUNSLOG_USED_BUFFERS * buffs,struct st_translog_buffer * buff)4949 used_buffs_register_unlock(TRUNSLOG_USED_BUFFERS *buffs,
4950 struct st_translog_buffer *buff
4951 __attribute__((unused)) )
4952 {
4953 DBUG_ENTER("used_buffs_register_unlock");
4954 DBUG_PRINT("enter", ("SUB buffs: %p unlk %u (%p) wrt_ptr: %u (%p)"
4955 " buff %p (%u)",
4956 buffs,
4957 buffs->wrt_ptr, buffs->buff[buffs->wrt_ptr],
4958 buffs->unlck_ptr, buffs->buff[buffs->unlck_ptr],
4959 buff, buff->buffer_no));
4960 DBUG_ASSERT(buffs->buff[buffs->unlck_ptr] == buff);
4961 buffs->unlck_ptr++;
4962 DBUG_VOID_RETURN;
4963 }
used_buffs_urgent_unlock(TRUNSLOG_USED_BUFFERS * buffs)4964 static void used_buffs_urgent_unlock(TRUNSLOG_USED_BUFFERS *buffs)
4965 {
4966 uint i;
4967 DBUG_ENTER("used_buffs_urgent_unlock");
4968 translog_lock();
4969 translog_stop_writing();
4970 translog_unlock();
4971 for (i= buffs->unlck_ptr; i < buffs->wrt_ptr; i++)
4972 {
4973 struct st_translog_buffer *buf= buffs->buff[i];
4974 translog_buffer_lock(buf);
4975 translog_buffer_decrease_writers(buf);
4976 translog_buffer_unlock(buf);
4977 buffs->buff[i]= NULL;
4978 }
4979 used_buffs_init(buffs);
4980 DBUG_VOID_RETURN;
4981 }
4982
4983 /*
4984 Get page rest
4985
4986 SYNOPSIS
4987 translog_get_current_page_rest()
4988
4989 NOTE loghandler should be locked
4990
4991 RETURN
4992 number of bytes left on the current page
4993 */
4994
translog_get_current_page_rest()4995 static uint translog_get_current_page_rest()
4996 {
4997 return (TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill);
4998 }
4999
5000
5001 /*
5002 Get buffer rest in full pages
5003
5004 SYNOPSIS
5005 translog_get_current_buffer_rest()
5006
5007 NOTE loghandler should be locked
5008
5009 RETURN
5010 number of full pages left on the current buffer
5011 */
5012
translog_get_current_buffer_rest()5013 static uint translog_get_current_buffer_rest()
5014 {
5015 return (uint)((log_descriptor.bc.buffer->buffer + TRANSLOG_WRITE_BUFFER -
5016 log_descriptor.bc.ptr) /
5017 TRANSLOG_PAGE_SIZE);
5018 }
5019
5020 /*
5021 Calculate possible group size without first (current) page
5022
5023 SYNOPSIS
5024 translog_get_current_group_size()
5025
5026 NOTE loghandler should be locked
5027
5028 RETURN
5029 group size without first (current) page
5030 */
5031
translog_get_current_group_size()5032 static translog_size_t translog_get_current_group_size()
5033 {
5034 /* buffer rest in full pages */
5035 translog_size_t buffer_rest= translog_get_current_buffer_rest();
5036 DBUG_ENTER("translog_get_current_group_size");
5037 DBUG_PRINT("info", ("buffer_rest in pages: %u", buffer_rest));
5038
5039 buffer_rest*= log_descriptor.page_capacity_chunk_2;
5040 /* in case of only half of buffer free we can write this and next buffer */
5041 if (buffer_rest < log_descriptor.half_buffer_capacity_chunk_2)
5042 {
5043 DBUG_PRINT("info", ("buffer_rest: %lu -> add %lu",
5044 (ulong) buffer_rest,
5045 (ulong) log_descriptor.buffer_capacity_chunk_2));
5046 buffer_rest+= log_descriptor.buffer_capacity_chunk_2;
5047 }
5048
5049 DBUG_PRINT("info", ("buffer_rest: %lu", (ulong) buffer_rest));
5050
5051 DBUG_RETURN(buffer_rest);
5052 }
5053
5054
set_lsn(LSN * lsn,LSN value)5055 static inline void set_lsn(LSN *lsn, LSN value)
5056 {
5057 DBUG_ENTER("set_lsn");
5058 translog_lock_assert_owner();
5059 *lsn= value;
5060 /* we generate LSN so something is not flushed in log */
5061 log_descriptor.is_everything_flushed= 0;
5062 DBUG_PRINT("info", ("new LSN appeared: " LSN_FMT, LSN_IN_PARTS(value)));
5063 DBUG_VOID_RETURN;
5064 }
5065
5066
5067 /**
5068 @brief Write variable record in 1 group.
5069
5070 @param lsn LSN of the record will be written here
5071 @param type the log record type
5072 @param short_trid Short transaction ID or 0 if it has no sense
5073 @param parts Descriptor of record source parts
5074 @param buffer_to_flush Buffer which have to be flushed if it is not 0
5075 @param header_length Calculated header length of chunk type 0
5076 @param trn Transaction structure pointer for hooks by
5077 record log type, for short_id
5078 @param hook_arg Argument which will be passed to pre-write and
5079 in-write hooks of this record.
5080
5081 @note
5082 We must have a translog_lock() when entering this function
5083 We must have buffer_to_flush locked (if not null)
5084
5085 @return Operation status
5086 @retval 0 OK
5087 @retval 1 Error
5088 */
5089
5090 static my_bool
translog_write_variable_record_1group(LSN * lsn,enum translog_record_type type,MARIA_HA * tbl_info,SHORT_TRANSACTION_ID short_trid,struct st_translog_parts * parts,struct st_translog_buffer * buffer_to_flush,uint16 header_length,TRN * trn,void * hook_arg)5091 translog_write_variable_record_1group(LSN *lsn,
5092 enum translog_record_type type,
5093 MARIA_HA *tbl_info,
5094 SHORT_TRANSACTION_ID short_trid,
5095 struct st_translog_parts *parts,
5096 struct st_translog_buffer
5097 *buffer_to_flush, uint16 header_length,
5098 TRN *trn, void *hook_arg)
5099 {
5100 TRANSLOG_ADDRESS horizon;
5101 struct st_buffer_cursor cursor;
5102 int rc= 0;
5103 uint i;
5104 translog_size_t record_rest, full_pages, first_page;
5105 uint additional_chunk3_page= 0;
5106 uchar chunk0_header[1 + 2 + 5 + 2];
5107 DBUG_ENTER("translog_write_variable_record_1group");
5108 translog_lock_assert_owner();
5109 if (buffer_to_flush)
5110 translog_buffer_lock_assert_owner(buffer_to_flush);
5111
5112 set_lsn(lsn, horizon= log_descriptor.horizon);
5113 if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
5114 *lsn, TRUE) ||
5115 (log_record_type_descriptor[type].inwrite_hook &&
5116 (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
5117 lsn, hook_arg)))
5118 {
5119 translog_unlock();
5120 if (buffer_to_flush != NULL)
5121 {
5122 translog_buffer_flush(buffer_to_flush);
5123 translog_buffer_unlock(buffer_to_flush);
5124 }
5125 DBUG_RETURN(1);
5126 }
5127 cursor= log_descriptor.bc;
5128 cursor.chaser= 1;
5129
5130 /* Advance pointer to be able unlock the loghandler */
5131 first_page= translog_get_current_page_rest();
5132 record_rest= parts->record_length - (first_page - header_length);
5133 full_pages= record_rest / log_descriptor.page_capacity_chunk_2;
5134 record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
5135
5136 if (record_rest + 1 == log_descriptor.page_capacity_chunk_2)
5137 {
5138 DBUG_PRINT("info", ("2 chunks type 3 is needed"));
5139 /* We will write 2 chunks type 3 at the end of this group */
5140 additional_chunk3_page= 1;
5141 record_rest= 1;
5142 }
5143
5144 DBUG_PRINT("info", ("first_page: %u (%u) full_pages: %u (%lu) "
5145 "additional: %u (%u) rest %u = %u",
5146 first_page, first_page - header_length,
5147 full_pages,
5148 (ulong) full_pages *
5149 log_descriptor.page_capacity_chunk_2,
5150 additional_chunk3_page,
5151 additional_chunk3_page *
5152 (log_descriptor.page_capacity_chunk_2 - 1),
5153 record_rest, parts->record_length));
5154 /* record_rest + 3 is chunk type 3 overhead + record_rest */
5155 rc= translog_advance_pointer((int)(full_pages + additional_chunk3_page),
5156 (record_rest ? record_rest + 3 : 0),
5157 &cursor.buffs);
5158 log_descriptor.bc.buffer->last_lsn= *lsn;
5159 DBUG_PRINT("info", ("last_lsn set to " LSN_FMT " buffer: %p",
5160 LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn),
5161 log_descriptor.bc.buffer));
5162
5163 translog_unlock();
5164
5165 /*
5166 Check if we switched buffer and need process it (current buffer is
5167 unlocked already => we will not delay other threads
5168 */
5169 if (buffer_to_flush != NULL)
5170 {
5171 if (!rc)
5172 rc= translog_buffer_flush(buffer_to_flush);
5173 translog_buffer_unlock(buffer_to_flush);
5174 }
5175 if (rc)
5176 {
5177 //translog_advance_pointer decreased writers so it is OK
5178 DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr);
5179 DBUG_RETURN(1);
5180 }
5181
5182 translog_write_variable_record_1group_header(parts, type, short_trid,
5183 header_length, chunk0_header);
5184
5185 /* fill the pages */
5186 translog_write_parts_on_page(&horizon, &cursor, first_page, parts);
5187
5188 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT,
5189 LSN_IN_PARTS(log_descriptor.horizon),
5190 LSN_IN_PARTS(horizon)));
5191
5192 for (i= 0; i < full_pages; i++)
5193 {
5194 if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
5195 goto error;
5196
5197 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT,
5198 LSN_IN_PARTS(log_descriptor.horizon),
5199 LSN_IN_PARTS(horizon)));
5200 }
5201
5202 if (additional_chunk3_page)
5203 {
5204 if (translog_write_variable_record_chunk3_page(parts,
5205 log_descriptor.
5206 page_capacity_chunk_2 - 2,
5207 &horizon, &cursor))
5208 goto error;
5209 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT,
5210 LSN_IN_PARTS(log_descriptor.horizon),
5211 LSN_IN_PARTS(horizon)));
5212 DBUG_ASSERT(cursor.current_page_fill == TRANSLOG_PAGE_SIZE);
5213 }
5214
5215 if (translog_write_variable_record_chunk3_page(parts,
5216 record_rest,
5217 &horizon, &cursor))
5218 goto error;
5219 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT,
5220 (uint) LSN_FILE_NO(log_descriptor.horizon),
5221 (uint) LSN_OFFSET(log_descriptor.horizon),
5222 (uint) LSN_FILE_NO(horizon),
5223 (uint) LSN_OFFSET(horizon)));
5224
5225 translog_buffer_lock(cursor.buffer);
5226 translog_buffer_decrease_writers(cursor.buffer);
5227 used_buffs_register_unlock(&cursor.buffs, cursor.buffer);
5228 translog_buffer_unlock(cursor.buffer);
5229 DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr);
5230 DBUG_RETURN(0);
5231 error:
5232 used_buffs_urgent_unlock(&cursor.buffs);
5233 DBUG_RETURN(1);
5234 }
5235
5236
5237 /**
5238 @brief Write variable record in 1 chunk.
5239
5240 @param lsn LSN of the record will be written here
5241 @param type the log record type
5242 @param short_trid Short transaction ID or 0 if it has no sense
5243 @param parts Descriptor of record source parts
5244 @param buffer_to_flush Buffer which have to be flushed if it is not 0
5245 @param header_length Calculated header length of chunk type 0
5246 @param trn Transaction structure pointer for hooks by
5247 record log type, for short_id
5248 @param hook_arg Argument which will be passed to pre-write and
5249 in-write hooks of this record.
5250
5251 @note
5252 We must have a translog_lock() when entering this function
5253 We must have buffer_to_flush locked (if not null)
5254
5255 @return Operation status
5256 @retval 0 OK
5257 @retval 1 Error
5258 */
5259
5260 static my_bool
translog_write_variable_record_1chunk(LSN * lsn,enum translog_record_type type,MARIA_HA * tbl_info,SHORT_TRANSACTION_ID short_trid,struct st_translog_parts * parts,struct st_translog_buffer * buffer_to_flush,uint16 header_length,TRN * trn,void * hook_arg)5261 translog_write_variable_record_1chunk(LSN *lsn,
5262 enum translog_record_type type,
5263 MARIA_HA *tbl_info,
5264 SHORT_TRANSACTION_ID short_trid,
5265 struct st_translog_parts *parts,
5266 struct st_translog_buffer
5267 *buffer_to_flush, uint16 header_length,
5268 TRN *trn, void *hook_arg)
5269 {
5270 int rc;
5271 uchar chunk0_header[1 + 2 + 5 + 2];
5272 DBUG_ENTER("translog_write_variable_record_1chunk");
5273 translog_lock_assert_owner();
5274 if (buffer_to_flush)
5275 translog_buffer_lock_assert_owner(buffer_to_flush);
5276
5277 translog_write_variable_record_1group_header(parts, type, short_trid,
5278 header_length, chunk0_header);
5279 set_lsn(lsn, log_descriptor.horizon);
5280 if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
5281 *lsn, TRUE) ||
5282 (log_record_type_descriptor[type].inwrite_hook &&
5283 (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
5284 lsn, hook_arg)))
5285 {
5286 translog_unlock();
5287 rc= 1;
5288 goto err;
5289 }
5290
5291 rc= translog_write_parts_on_page(&log_descriptor.horizon,
5292 &log_descriptor.bc,
5293 parts->total_record_length, parts);
5294 log_descriptor.bc.buffer->last_lsn= *lsn;
5295 DBUG_PRINT("info", ("last_lsn set to " LSN_FMT " buffer: %p",
5296 LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn),
5297 log_descriptor.bc.buffer));
5298 translog_unlock();
5299
5300 /*
5301 check if we switched buffer and need process it (current buffer is
5302 unlocked already => we will not delay other threads
5303 */
5304 err:
5305 if (buffer_to_flush != NULL)
5306 {
5307 if (!rc)
5308 rc= translog_buffer_flush(buffer_to_flush);
5309 translog_buffer_unlock(buffer_to_flush);
5310 }
5311
5312 DBUG_RETURN(rc);
5313 }
5314
5315
5316 /*
5317 @brief Calculates and write LSN difference (compressed LSN).
5318
5319 @param base_lsn LSN from which we calculate difference
5320 @param lsn LSN for codding
5321 @param dst Result will be written to dst[-pack_length] .. dst[-1]
5322
5323 @note To store an LSN in a compact way we will use the following compression:
5324 If a log record has LSN1, and it contains the LSN2 as a back reference,
5325 Instead of LSN2 we write LSN1-LSN2, encoded as:
5326 two bits the number N (see below)
5327 14 bits
5328 N bytes
5329 That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
5330 is stored in the first two bits.
5331
5332 @note function made to write the result in backward direction with no
5333 special sense or tricks both directions are equal in complicity
5334
5335 @retval # pointer on coded LSN
5336 */
5337
translog_put_LSN_diff(LSN base_lsn,LSN lsn,uchar * dst)5338 static uchar *translog_put_LSN_diff(LSN base_lsn, LSN lsn, uchar *dst)
5339 {
5340 uint64 diff;
5341 DBUG_ENTER("translog_put_LSN_diff");
5342 DBUG_PRINT("enter", ("Base: " LSN_FMT " val: " LSN_FMT " dst: %p",
5343 LSN_IN_PARTS(base_lsn), LSN_IN_PARTS(lsn),
5344 dst));
5345 DBUG_ASSERT(base_lsn > lsn);
5346 diff= base_lsn - lsn;
5347 DBUG_PRINT("info", ("Diff: 0x%llx", (ulonglong) diff));
5348 if (diff <= 0x3FFF)
5349 {
5350 dst-= 2;
5351 /*
5352 Note we store this high uchar first to ensure that first uchar has
5353 0 in the 3 upper bits.
5354 */
5355 dst[0]= (uchar)(diff >> 8);
5356 dst[1]= (uchar)(diff & 0xFF);
5357 }
5358 else if (diff <= 0x3FFFFFL)
5359 {
5360 dst-= 3;
5361 dst[0]= (uchar)(0x40 | (diff >> 16));
5362 int2store(dst + 1, diff & 0xFFFF);
5363 }
5364 else if (diff <= 0x3FFFFFFFL)
5365 {
5366 dst-= 4;
5367 dst[0]= (uchar)(0x80 | (diff >> 24));
5368 int3store(dst + 1, diff & 0xFFFFFFL);
5369 }
5370 else if (diff <= 0x3FFFFFFFFFLL)
5371
5372 {
5373 dst-= 5;
5374 dst[0]= (uchar)(0xC0 | (diff >> 32));
5375 int4store(dst + 1, diff & 0xFFFFFFFFL);
5376 }
5377 else
5378 {
5379 /*
5380 It is full LSN after special 1 diff (which is impossible
5381 in real life)
5382 */
5383 dst-= 2 + LSN_STORE_SIZE;
5384 dst[0]= 0;
5385 dst[1]= 1;
5386 lsn_store(dst + 2, lsn);
5387 }
5388 DBUG_PRINT("info", ("new dst: %p", dst));
5389 DBUG_RETURN(dst);
5390 }
5391
5392
5393 /*
5394 Get LSN from LSN-difference (compressed LSN)
5395
5396 SYNOPSIS
5397 translog_get_LSN_from_diff()
5398 base_lsn LSN from which we calculate difference
5399 src pointer to coded lsn
5400 dst pointer to buffer where to write 7byte LSN
5401
5402 NOTE:
5403 To store an LSN in a compact way we will use the following compression:
5404
5405 If a log record has LSN1, and it contains the lSN2 as a back reference,
5406 Instead of LSN2 we write LSN1-LSN2, encoded as:
5407
5408 two bits the number N (see below)
5409 14 bits
5410 N bytes
5411
5412 That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
5413 is stored in the first two bits.
5414
5415 RETURN
5416 pointer to buffer after decoded LSN
5417 */
5418
translog_get_LSN_from_diff(LSN base_lsn,uchar * src,uchar * dst)5419 static uchar *translog_get_LSN_from_diff(LSN base_lsn, uchar *src, uchar *dst)
5420 {
5421 LSN lsn;
5422 uint32 diff;
5423 uint32 first_byte;
5424 uint32 file_no, rec_offset;
5425 uint8 code;
5426 DBUG_ENTER("translog_get_LSN_from_diff");
5427 DBUG_PRINT("enter", ("Base: " LSN_FMT " src:%p dst %p",
5428 LSN_IN_PARTS(base_lsn), src, dst));
5429 first_byte= *((uint8*) src);
5430 code= first_byte >> 6; /* Length is in 2 most significant bits */
5431 first_byte&= 0x3F;
5432 src++; /* Skip length + encode */
5433 file_no= LSN_FILE_NO(base_lsn); /* Assume relative */
5434 DBUG_PRINT("info", ("code: %u first byte: %lu",
5435 (uint) code, (ulong) first_byte));
5436 switch (code) {
5437 case 0:
5438 if (first_byte == 0 && *((uint8*)src) == 1)
5439 {
5440 /*
5441 It is full LSN after special 1 diff (which is impossible
5442 in real life)
5443 */
5444 memcpy(dst, src + 1, LSN_STORE_SIZE);
5445 DBUG_PRINT("info", ("Special case of full LSN, new src:%p",
5446 src + 1 + LSN_STORE_SIZE));
5447 DBUG_RETURN(src + 1 + LSN_STORE_SIZE);
5448 }
5449 rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 8) | *((uint8*)src));
5450 break;
5451 case 1:
5452 diff= uint2korr(src);
5453 rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 16) | diff);
5454 break;
5455 case 2:
5456 diff= uint3korr(src);
5457 rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 24) | diff);
5458 break;
5459 case 3:
5460 {
5461 ulonglong base_offset= LSN_OFFSET(base_lsn);
5462 diff= uint4korr(src);
5463 if (diff > LSN_OFFSET(base_lsn))
5464 {
5465 /* take 1 from file offset */
5466 first_byte++;
5467 base_offset+= 0x100000000LL;
5468 }
5469 file_no= LSN_FILE_NO(base_lsn) - first_byte;
5470 DBUG_ASSERT(base_offset - diff <= UINT_MAX);
5471 rec_offset= (uint32)(base_offset - diff);
5472 break;
5473 }
5474 default:
5475 DBUG_ASSERT(0);
5476 DBUG_RETURN(NULL);
5477 }
5478 lsn= MAKE_LSN(file_no, rec_offset);
5479 src+= code + 1;
5480 lsn_store(dst, lsn);
5481 DBUG_PRINT("info", ("new src:%p", src));
5482 DBUG_RETURN(src);
5483 }
5484
5485
5486 /**
5487 @brief Encodes relative LSNs listed in the parameters.
5488
5489 @param parts Parts list with encoded LSN(s)
5490 @param base_lsn LSN which is base for encoding
5491 @param lsns number of LSN(s) to encode
5492 @param compressed_LSNs buffer which can be used for storing compressed LSN(s)
5493 */
5494
translog_relative_LSN_encode(struct st_translog_parts * parts,LSN base_lsn,uint lsns,uchar * compressed_LSNs)5495 static void translog_relative_LSN_encode(struct st_translog_parts *parts,
5496 LSN base_lsn,
5497 uint lsns, uchar *compressed_LSNs)
5498 {
5499 LEX_CUSTRING *part;
5500 uint lsns_len= lsns * LSN_STORE_SIZE;
5501 uchar buffer_src[MAX_NUMBER_OF_LSNS_PER_RECORD * LSN_STORE_SIZE];
5502 uchar *buffer= buffer_src;
5503 const uchar *cbuffer;
5504
5505 DBUG_ENTER("translog_relative_LSN_encode");
5506
5507 DBUG_ASSERT(parts->current != 0);
5508 part= parts->parts + parts->current;
5509
5510 /* collect all LSN(s) in one chunk if it (they) is (are) divided */
5511 if (part->length < lsns_len)
5512 {
5513 size_t copied= part->length;
5514 LEX_CUSTRING *next_part;
5515 DBUG_PRINT("info", ("Using buffer:%p", compressed_LSNs));
5516 memcpy(buffer, part->str, part->length);
5517 next_part= parts->parts + parts->current + 1;
5518 do
5519 {
5520 DBUG_ASSERT(next_part < parts->parts + parts->elements);
5521 if ((next_part->length + copied) < lsns_len)
5522 {
5523 memcpy(buffer + copied, next_part->str,
5524 next_part->length);
5525 copied+= next_part->length;
5526 next_part->length= 0; next_part->str= 0;
5527 /* delete_dynamic_element(&parts->parts, parts->current + 1); */
5528 next_part++;
5529 parts->current++;
5530 part= parts->parts + parts->current;
5531 }
5532 else
5533 {
5534 size_t len= lsns_len - copied;
5535 memcpy(buffer + copied, next_part->str, len);
5536 copied= lsns_len;
5537 next_part->str+= len;
5538 next_part->length-= len;
5539 }
5540 } while (copied < lsns_len);
5541 cbuffer= buffer;
5542 }
5543 else
5544 {
5545 cbuffer= part->str;
5546 part->str+= lsns_len;
5547 part->length-= lsns_len;
5548 parts->current--;
5549 part= parts->parts + parts->current;
5550 }
5551
5552 {
5553 /* Compress */
5554 LSN ref;
5555 int economy;
5556 const uchar *src_ptr;
5557 uchar *dst_ptr= compressed_LSNs + (MAX_NUMBER_OF_LSNS_PER_RECORD *
5558 COMPRESSED_LSN_MAX_STORE_SIZE);
5559 /*
5560 We write the result in backward direction with no special sense or
5561 tricks both directions are equal in complicity
5562 */
5563 for (src_ptr= cbuffer + lsns_len - LSN_STORE_SIZE;
5564 src_ptr >= (const uchar*)cbuffer;
5565 src_ptr-= LSN_STORE_SIZE)
5566 {
5567 ref= lsn_korr(src_ptr);
5568 dst_ptr= translog_put_LSN_diff(base_lsn, ref, dst_ptr);
5569 }
5570 part->length= (size_t)((compressed_LSNs +
5571 (MAX_NUMBER_OF_LSNS_PER_RECORD *
5572 COMPRESSED_LSN_MAX_STORE_SIZE)) -
5573 dst_ptr);
5574 economy= lsns_len - (uint)part->length;
5575 parts->record_length-= economy;
5576 DBUG_PRINT("info", ("new length of LSNs: %lu economy: %d",
5577 (ulong)part->length, economy));
5578 parts->total_record_length-= economy;
5579 part->str= dst_ptr;
5580 }
5581 DBUG_VOID_RETURN;
5582 }
5583
5584
5585 /**
5586 @brief Write multi-group variable-size record.
5587
5588 @param lsn LSN of the record will be written here
5589 @param type the log record type
5590 @param short_trid Short transaction ID or 0 if it has no sense
5591 @param parts Descriptor of record source parts
5592 @param buffer_to_flush Buffer which have to be flushed if it is not 0
5593 @param header_length Header length calculated for 1 group
5594 @param buffer_rest Beginning from which we plan to write in full pages
5595 @param trn Transaction structure pointer for hooks by
5596 record log type, for short_id
5597 @param hook_arg Argument which will be passed to pre-write and
5598 in-write hooks of this record.
5599
5600 @note
5601 We must have a translog_lock() when entering this function
5602
5603 We must have buffer_to_flush locked (if not null)
5604 buffer_to_flush should *NOT* be locked when calling this function.
5605 (This is note is here as this is different from most other
5606 translog_write...() functions which require the buffer to be locked)
5607
5608 @return Operation status
5609 @retval 0 OK
5610 @retval 1 Error
5611 */
5612
5613 static my_bool
translog_write_variable_record_mgroup(LSN * lsn,enum translog_record_type type,MARIA_HA * tbl_info,SHORT_TRANSACTION_ID short_trid,struct st_translog_parts * parts,struct st_translog_buffer * buffer_to_flush,uint16 header_length,translog_size_t buffer_rest,TRN * trn,void * hook_arg)5614 translog_write_variable_record_mgroup(LSN *lsn,
5615 enum translog_record_type type,
5616 MARIA_HA *tbl_info,
5617 SHORT_TRANSACTION_ID short_trid,
5618 struct st_translog_parts *parts,
5619 struct st_translog_buffer
5620 *buffer_to_flush,
5621 uint16 header_length,
5622 translog_size_t buffer_rest,
5623 TRN *trn, void *hook_arg)
5624 {
5625 TRANSLOG_ADDRESS horizon;
5626 struct st_buffer_cursor cursor;
5627 int rc= 0;
5628 uint i, chunk2_page, full_pages;
5629 uint curr_group= 0;
5630 translog_size_t record_rest, first_page, chunk3_pages, chunk0_pages= 1;
5631 translog_size_t done= 0;
5632 struct st_translog_group_descriptor group;
5633 DYNAMIC_ARRAY groups;
5634 uint16 chunk3_size;
5635 uint16 page_capacity= log_descriptor.page_capacity_chunk_2 + 1;
5636 uint16 last_page_capacity;
5637 my_bool new_page_before_chunk0= 1, first_chunk0= 1;
5638 uchar chunk0_header[1 + 2 + 5 + 2 + 2], group_desc[7 + 1];
5639 uchar chunk2_header[1];
5640 uint header_fixed_part= header_length + 2;
5641 uint groups_per_page= (page_capacity - header_fixed_part) / (7 + 1);
5642 uint file_of_the_first_group;
5643 int pages_to_skip;
5644 struct st_translog_buffer *buffer_of_last_lsn;
5645 my_bool external_buffer_to_flush= TRUE;
5646 DBUG_ENTER("translog_write_variable_record_mgroup");
5647 translog_lock_assert_owner();
5648
5649 used_buffs_init(&cursor.buffs);
5650 chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;
5651
5652 if (my_init_dynamic_array(PSI_INSTRUMENT_ME, &groups,
5653 sizeof(struct st_translog_group_descriptor),
5654 10, 10, MYF(0)))
5655 {
5656 translog_unlock();
5657 if (buffer_to_flush != NULL)
5658 {
5659 translog_buffer_flush(buffer_to_flush);
5660 translog_buffer_unlock(buffer_to_flush);
5661 }
5662 DBUG_PRINT("error", ("init array failed"));
5663 DBUG_RETURN(1);
5664 }
5665
5666 first_page= translog_get_current_page_rest();
5667 record_rest= parts->record_length - (first_page - 1);
5668 DBUG_PRINT("info", ("Record Rest: %lu", (ulong) record_rest));
5669
5670 if (record_rest < buffer_rest)
5671 {
5672 /*
5673 The record (group 1 type) is larger than the free space on the page
5674 - we need to split it in two. But when we split it in two, the first
5675 part is big enough to hold all the data of the record (because the
5676 header of the first part of the split is smaller than the header of
5677 the record as a whole when it takes only one chunk)
5678 */
5679 DBUG_PRINT("info", ("too many free space because changing header"));
5680 buffer_rest-= log_descriptor.page_capacity_chunk_2;
5681 DBUG_ASSERT(record_rest >= buffer_rest);
5682 }
5683
5684 file_of_the_first_group= LSN_FILE_NO(log_descriptor.horizon);
5685 translog_mark_file_unfinished(file_of_the_first_group);
5686 do
5687 {
5688 DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr);
5689 group.addr= horizon= log_descriptor.horizon;
5690 cursor= log_descriptor.bc;
5691 cursor.chaser= 1;
5692 if ((full_pages= buffer_rest / log_descriptor.page_capacity_chunk_2) > 255)
5693 {
5694 /* sizeof(uint8) == 256 is max number of chunk in multi-chunks group */
5695 full_pages= 255;
5696 buffer_rest= full_pages * log_descriptor.page_capacity_chunk_2;
5697 }
5698 /*
5699 group chunks =
5700 full pages + first page (which actually can be full, too).
5701 But here we assign number of chunks - 1
5702 */
5703 group.num= full_pages;
5704 if (insert_dynamic(&groups, (uchar*) &group))
5705 {
5706 DBUG_PRINT("error", ("insert into array failed"));
5707 goto err_unlock;
5708 }
5709
5710 DBUG_PRINT("info", ("chunk: #%u first_page: %u (%u) "
5711 "full_pages: %lu (%lu) "
5712 "Left %lu",
5713 groups.elements,
5714 first_page, first_page - 1,
5715 (ulong) full_pages,
5716 (ulong) (full_pages *
5717 log_descriptor.page_capacity_chunk_2),
5718 (ulong)(parts->record_length - (first_page - 1 +
5719 buffer_rest) -
5720 done)));
5721 rc= translog_advance_pointer((int)full_pages, 0, &cursor.buffs);
5722
5723 translog_unlock();
5724
5725 if (buffer_to_flush != NULL)
5726 {
5727 if (!external_buffer_to_flush)
5728 translog_buffer_decrease_writers(buffer_to_flush);
5729 if (!rc)
5730 rc= translog_buffer_flush(buffer_to_flush);
5731 translog_buffer_unlock(buffer_to_flush);
5732 buffer_to_flush= NULL;
5733 }
5734 external_buffer_to_flush= FALSE;
5735
5736 if (rc)
5737 {
5738 DBUG_PRINT("error", ("flush of unlock buffer failed"));
5739 //translog_advance_pointer decreased writers so it is OK
5740 DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr);
5741 goto err;
5742 }
5743
5744 translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
5745 translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
5746 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT " "
5747 "Left %lu",
5748 LSN_IN_PARTS(log_descriptor.horizon),
5749 LSN_IN_PARTS(horizon),
5750 (ulong) (parts->record_length - (first_page - 1) -
5751 done)));
5752
5753 for (i= 0; i < full_pages; i++)
5754 {
5755 if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
5756 goto err;
5757
5758 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " "
5759 "local: " LSN_FMT " "
5760 "Left: %lu",
5761 LSN_IN_PARTS(log_descriptor.horizon),
5762 LSN_IN_PARTS(horizon),
5763 (ulong) (parts->record_length - (first_page - 1) -
5764 i * log_descriptor.page_capacity_chunk_2 -
5765 done)));
5766 }
5767
5768 done+= (first_page - 1 + buffer_rest);
5769
5770 if (translog_chaser_page_next(&horizon, &cursor))
5771 {
5772 DBUG_PRINT("error", ("flush of unlock buffer failed"));
5773 goto err;
5774 }
5775 translog_buffer_lock(cursor.buffer);
5776 translog_buffer_decrease_writers(cursor.buffer);
5777 used_buffs_register_unlock(&cursor.buffs, cursor.buffer);
5778 translog_buffer_unlock(cursor.buffer);
5779
5780 translog_lock();
5781
5782 /* Check that we have place for chunk type 2 */
5783 first_page= translog_get_current_page_rest();
5784 if (first_page <= 1)
5785 {
5786 if (translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
5787 &buffer_to_flush))
5788 goto err_unlock;
5789 first_page= translog_get_current_page_rest();
5790 }
5791 buffer_rest= translog_get_current_group_size();
5792
5793 if (buffer_to_flush)
5794 used_buffs_register_unlock(&cursor.buffs,
5795 buffer_to_flush); // will be unlocked
5796
5797 } while ((translog_size_t)(first_page + buffer_rest) <
5798 (translog_size_t)(parts->record_length - done));
5799
5800 group.addr= horizon= log_descriptor.horizon;
5801 cursor= log_descriptor.bc;
5802 cursor.chaser= 1;
5803 group.num= 0; /* 0 because it does not matter */
5804 if (insert_dynamic(&groups, (uchar*) &group))
5805 {
5806 DBUG_PRINT("error", ("insert into array failed"));
5807 goto err_unlock;
5808 }
5809 record_rest= parts->record_length - done;
5810 DBUG_PRINT("info", ("Record rest: %lu", (ulong) record_rest));
5811 if (first_page > record_rest + 1)
5812 {
5813 /*
5814 We have not so much data to fill all first page
5815 (no speaking about full pages)
5816 so it will be:
5817 <chunk0 <data>>
5818 or
5819 <chunk0>...<chunk0><chunk0 <data>>
5820 or
5821 <chunk3 <data>><chunk0>...<chunk0><chunk0 <possible data of 1 byte>>
5822 */
5823 chunk2_page= full_pages= 0;
5824 last_page_capacity= first_page;
5825 pages_to_skip= -1;
5826 }
5827 else
5828 {
5829 /*
5830 We will have:
5831 <chunk2 <data>>...<chunk2 <data>><chunk0 <data>>
5832 or
5833 <chunk2 <data>>...<chunk2 <data>><chunk0>...<chunk0><chunk0 <data>>
5834 or
5835 <chunk3 <data>><chunk0>...<chunk0><chunk0 <possible data of 1 byte>>
5836 */
5837 chunk2_page= 1;
5838 record_rest-= (first_page - 1);
5839 pages_to_skip= full_pages=
5840 record_rest / log_descriptor.page_capacity_chunk_2;
5841 record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
5842 last_page_capacity= page_capacity;
5843 }
5844 chunk3_size= 0;
5845 chunk3_pages= 0;
5846 if (last_page_capacity > record_rest + 1 && record_rest != 0)
5847 {
5848 if (last_page_capacity >
5849 record_rest + header_fixed_part + groups.elements * (7 + 1))
5850 {
5851 /* 1 record of type 0 */
5852 chunk3_pages= 0;
5853 }
5854 else
5855 {
5856 pages_to_skip++;
5857 chunk3_pages= 1;
5858 if (record_rest + 2 == last_page_capacity)
5859 {
5860 chunk3_size= record_rest - 1;
5861 record_rest= 1;
5862 }
5863 else
5864 {
5865 chunk3_size= record_rest;
5866 record_rest= 0;
5867 }
5868 }
5869 }
5870 /*
5871 A first non-full page will hold type 0 chunk only if it fit in it with
5872 all its headers
5873 */
5874 while (page_capacity <
5875 record_rest + header_fixed_part +
5876 (groups.elements - groups_per_page * (chunk0_pages - 1)) * (7 + 1))
5877 chunk0_pages++;
5878 DBUG_PRINT("info", ("chunk0_pages: %u groups %u groups per full page: %u "
5879 "Group on last page: %u",
5880 chunk0_pages, groups.elements,
5881 groups_per_page,
5882 (groups.elements -
5883 ((page_capacity - header_fixed_part) / (7 + 1)) *
5884 (chunk0_pages - 1))));
5885 DBUG_PRINT("info", ("first_page: %u chunk2: %u full_pages: %u (%lu) "
5886 "chunk3: %u (%u) rest: %u",
5887 first_page,
5888 chunk2_page, full_pages,
5889 (ulong) full_pages *
5890 log_descriptor.page_capacity_chunk_2,
5891 chunk3_pages, (uint) chunk3_size, (uint) record_rest));
5892
5893 DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr);
5894 rc= translog_advance_pointer(pages_to_skip + (int)(chunk0_pages - 1),
5895 record_rest + header_fixed_part +
5896 (groups.elements -
5897 ((page_capacity -
5898 header_fixed_part) / (7 + 1)) *
5899 (chunk0_pages - 1)) * (7 + 1),
5900 &cursor.buffs);
5901 buffer_of_last_lsn= log_descriptor.bc.buffer;
5902 translog_unlock();
5903
5904 if (buffer_to_flush != NULL)
5905 {
5906 DBUG_ASSERT(!external_buffer_to_flush);
5907 translog_buffer_decrease_writers(buffer_to_flush);
5908 if (!rc)
5909 rc= translog_buffer_flush(buffer_to_flush);
5910 translog_buffer_unlock(buffer_to_flush);
5911 buffer_to_flush= NULL;
5912 }
5913 if (rc)
5914 {
5915 DBUG_PRINT("error", ("flush of unlock buffer failed"));
5916 goto err;
5917 }
5918
5919 if (rc)
5920 goto err;
5921
5922 if (chunk2_page)
5923 {
5924 DBUG_PRINT("info", ("chunk 2 to finish first page"));
5925 translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
5926 translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
5927 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT " "
5928 "Left: %lu",
5929 LSN_IN_PARTS(log_descriptor.horizon),
5930 LSN_IN_PARTS(horizon),
5931 (ulong) (parts->record_length - (first_page - 1) -
5932 done)));
5933 }
5934 else if (chunk3_pages)
5935 {
5936 uchar chunk3_header[3];
5937 DBUG_PRINT("info", ("chunk 3"));
5938 DBUG_ASSERT(full_pages == 0);
5939 chunk3_pages= 0;
5940 chunk3_header[0]= TRANSLOG_CHUNK_LNGTH;
5941 int2store(chunk3_header + 1, chunk3_size);
5942 translog_write_data_on_page(&horizon, &cursor, 3, chunk3_header);
5943 translog_write_parts_on_page(&horizon, &cursor, chunk3_size, parts);
5944 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT " "
5945 "Left: %lu",
5946 LSN_IN_PARTS(log_descriptor.horizon),
5947 LSN_IN_PARTS(horizon),
5948 (ulong) (parts->record_length - chunk3_size - done)));
5949 }
5950 else
5951 {
5952 DBUG_PRINT("info", ("no new_page_before_chunk0"));
5953 new_page_before_chunk0= 0;
5954 }
5955
5956 for (i= 0; i < full_pages; i++)
5957 {
5958 DBUG_ASSERT(chunk2_page != 0);
5959 if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
5960 goto err;
5961
5962 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT " "
5963 "Left: %lu",
5964 LSN_IN_PARTS(log_descriptor.horizon),
5965 LSN_IN_PARTS(horizon),
5966 (ulong) (parts->record_length - (first_page - 1) -
5967 i * log_descriptor.page_capacity_chunk_2 -
5968 done)));
5969 }
5970
5971 if (chunk3_pages &&
5972 translog_write_variable_record_chunk3_page(parts,
5973 chunk3_size,
5974 &horizon, &cursor))
5975 goto err;
5976 DBUG_PRINT("info", ("absolute horizon: " LSN_FMT " local: " LSN_FMT,
5977 LSN_IN_PARTS(log_descriptor.horizon),
5978 LSN_IN_PARTS(horizon)));
5979
5980 *chunk0_header= (uchar) (type | TRANSLOG_CHUNK_LSN);
5981 int2store(chunk0_header + 1, short_trid);
5982 translog_write_variable_record_1group_code_len(chunk0_header + 3,
5983 parts->record_length,
5984 header_length);
5985 do
5986 {
5987 int limit;
5988 if (new_page_before_chunk0 &&
5989 translog_chaser_page_next(&horizon, &cursor))
5990 {
5991 DBUG_PRINT("error", ("flush of unlock buffer failed"));
5992 goto err;
5993 }
5994 new_page_before_chunk0= 1;
5995
5996 if (first_chunk0)
5997 {
5998 first_chunk0= 0;
5999
6000 /*
6001 We can drop "log_descriptor.is_everything_flushed" earlier when have
6002 lock on loghandler and assign initial value of "horizon" variable or
6003 before unlocking loghandler (because we will increase writers
6004 counter on the buffer and every thread which wanted flush the buffer
6005 will wait till we finish with it). But IMHO better here take short
6006 lock and do not bother other threads with waiting.
6007 */
6008 translog_lock();
6009 set_lsn(lsn, horizon);
6010 buffer_of_last_lsn->last_lsn= *lsn;
6011 DBUG_PRINT("info", ("last_lsn set to " LSN_FMT " buffer: %p",
6012 LSN_IN_PARTS(buffer_of_last_lsn->last_lsn),
6013 buffer_of_last_lsn));
6014 if (log_record_type_descriptor[type].inwrite_hook &&
6015 (*log_record_type_descriptor[type].inwrite_hook) (type, trn,
6016 tbl_info,
6017 lsn, hook_arg))
6018 goto err_unlock;
6019 translog_unlock();
6020 }
6021
6022 /*
6023 A first non-full page will hold type 0 chunk only if it fit in it with
6024 all its headers => the fist page is full or number of groups less then
6025 possible number of full page.
6026 */
6027 limit= (groups_per_page < groups.elements - curr_group ?
6028 groups_per_page : groups.elements - curr_group);
6029 DBUG_PRINT("info", ("Groups: %u curr: %u limit: %u",
6030 (uint) groups.elements, (uint) curr_group,
6031 (uint) limit));
6032
6033 if (chunk0_pages == 1)
6034 {
6035 DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) + %u = %u",
6036 (uint) limit, (uint) record_rest,
6037 (uint) (2 + limit * (7 + 1) + record_rest)));
6038 int2store(chunk0_header + header_length - 2,
6039 2 + limit * (7 + 1) + record_rest);
6040 }
6041 else
6042 {
6043 DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) = %u",
6044 (uint) limit, (uint) (2 + limit * (7 + 1))));
6045 int2store(chunk0_header + header_length - 2, 2 + limit * (7 + 1));
6046 }
6047 int2store(chunk0_header + header_length, groups.elements - curr_group);
6048 translog_write_data_on_page(&horizon, &cursor, header_fixed_part,
6049 chunk0_header);
6050 for (i= curr_group; i < limit + curr_group; i++)
6051 {
6052 struct st_translog_group_descriptor *grp_ptr;
6053 grp_ptr= dynamic_element(&groups, i,
6054 struct st_translog_group_descriptor *);
6055 lsn_store(group_desc, grp_ptr->addr);
6056 group_desc[7]= grp_ptr->num;
6057 translog_write_data_on_page(&horizon, &cursor, (7 + 1), group_desc);
6058 }
6059
6060 if (chunk0_pages == 1 && record_rest != 0)
6061 translog_write_parts_on_page(&horizon, &cursor, record_rest, parts);
6062
6063 chunk0_pages--;
6064 curr_group+= limit;
6065 /* put special type to indicate that it is not LSN chunk */
6066 *chunk0_header= (uchar) (TRANSLOG_CHUNK_LSN | TRANSLOG_CHUNK_0_CONT);
6067 } while (chunk0_pages != 0);
6068 translog_buffer_lock(cursor.buffer);
6069 translog_buffer_decrease_writers(cursor.buffer);
6070 used_buffs_register_unlock(&cursor.buffs, cursor.buffer);
6071 translog_buffer_unlock(cursor.buffer);
6072 rc= 0;
6073 DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr);
6074
6075 if (translog_set_lsn_for_files(file_of_the_first_group, LSN_FILE_NO(*lsn),
6076 *lsn, FALSE))
6077 goto err;
6078
6079 translog_mark_file_finished(file_of_the_first_group);
6080
6081 delete_dynamic(&groups);
6082 DBUG_RETURN(0);
6083
6084 err_unlock:
6085
6086 translog_unlock();
6087
6088 err:
6089
6090 if (cursor.buffs.unlck_ptr != cursor.buffs.wrt_ptr)
6091 used_buffs_urgent_unlock(&cursor.buffs);
6092
6093 if (buffer_to_flush != NULL)
6094 {
6095 /* This is to prevent locking buffer forever in case of error */
6096 if (!external_buffer_to_flush)
6097 translog_buffer_decrease_writers(buffer_to_flush);
6098 if (!rc)
6099 rc= translog_buffer_flush(buffer_to_flush);
6100 translog_buffer_unlock(buffer_to_flush);
6101 buffer_to_flush= NULL;
6102 }
6103
6104
6105 translog_mark_file_finished(file_of_the_first_group);
6106
6107 delete_dynamic(&groups);
6108 DBUG_RETURN(1);
6109 }
6110
6111
6112 /**
6113 @brief Write the variable length log record.
6114
6115 @param lsn LSN of the record will be written here
6116 @param type the log record type
6117 @param short_trid Short transaction ID or 0 if it has no sense
6118 @param parts Descriptor of record source parts
6119 @param trn Transaction structure pointer for hooks by
6120 record log type, for short_id
6121 @param hook_arg Argument which will be passed to pre-write and
6122 in-write hooks of this record.
6123
6124 @return Operation status
6125 @retval 0 OK
6126 @retval 1 Error
6127 */
6128
translog_write_variable_record(LSN * lsn,enum translog_record_type type,MARIA_HA * tbl_info,SHORT_TRANSACTION_ID short_trid,struct st_translog_parts * parts,TRN * trn,void * hook_arg)6129 static my_bool translog_write_variable_record(LSN *lsn,
6130 enum translog_record_type type,
6131 MARIA_HA *tbl_info,
6132 SHORT_TRANSACTION_ID short_trid,
6133 struct st_translog_parts *parts,
6134 TRN *trn, void *hook_arg)
6135 {
6136 struct st_translog_buffer *buffer_to_flush= NULL;
6137 uint header_length1= 1 + 2 + 2 +
6138 translog_variable_record_length_bytes(parts->record_length);
6139 ulong buffer_rest;
6140 uint page_rest;
6141 /* Max number of such LSNs per record is 2 */
6142 uchar compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD *
6143 COMPRESSED_LSN_MAX_STORE_SIZE];
6144 my_bool res;
6145 DBUG_ENTER("translog_write_variable_record");
6146
6147 translog_lock();
6148 DBUG_PRINT("info", ("horizon: " LSN_FMT,
6149 LSN_IN_PARTS(log_descriptor.horizon)));
6150 page_rest= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
6151 DBUG_PRINT("info", ("header length: %u page_rest: %u",
6152 header_length1, page_rest));
6153
6154 /*
6155 header and part which we should read have to fit in one chunk
6156 TODO: allow to divide readable header
6157 */
6158 if (page_rest <
6159 (header_length1 + log_record_type_descriptor[type].read_header_len))
6160 {
6161 DBUG_PRINT("info",
6162 ("Next page, size: %u header: %u + %u",
6163 log_descriptor.bc.current_page_fill,
6164 header_length1,
6165 log_record_type_descriptor[type].read_header_len));
6166 translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
6167 &buffer_to_flush);
6168 /* Chunk 2 header is 1 byte, so full page capacity will be one uchar more */
6169 page_rest= log_descriptor.page_capacity_chunk_2 + 1;
6170 DBUG_PRINT("info", ("page_rest: %u", page_rest));
6171 }
6172
6173 /*
6174 To minimize compressed size we will compress always relative to
6175 very first chunk address (log_descriptor.horizon for now)
6176 */
6177 if (log_record_type_descriptor[type].compressed_LSN > 0)
6178 {
6179 translog_relative_LSN_encode(parts, log_descriptor.horizon,
6180 log_record_type_descriptor[type].
6181 compressed_LSN, compressed_LSNs);
6182 /* recalculate header length after compression */
6183 header_length1= 1 + 2 + 2 +
6184 translog_variable_record_length_bytes(parts->record_length);
6185 DBUG_PRINT("info", ("after compressing LSN(s) header length: %u "
6186 "record length: %lu",
6187 header_length1, (ulong)parts->record_length));
6188 }
6189
6190 /* TODO: check space on current page for header + few bytes */
6191 if (page_rest >= parts->record_length + header_length1)
6192 {
6193 /* following function makes translog_unlock(); */
6194 res= translog_write_variable_record_1chunk(lsn, type, tbl_info,
6195 short_trid,
6196 parts, buffer_to_flush,
6197 header_length1, trn, hook_arg);
6198 DBUG_RETURN(res);
6199 }
6200
6201 buffer_rest= translog_get_current_group_size();
6202
6203 if (buffer_rest >= parts->record_length + header_length1 - page_rest)
6204 {
6205 /* following function makes translog_unlock(); */
6206 res= translog_write_variable_record_1group(lsn, type, tbl_info,
6207 short_trid,
6208 parts, buffer_to_flush,
6209 header_length1, trn, hook_arg);
6210 DBUG_RETURN(res);
6211 }
6212 /* following function makes translog_unlock(); */
6213 res= translog_write_variable_record_mgroup(lsn, type, tbl_info,
6214 short_trid,
6215 parts, buffer_to_flush,
6216 header_length1,
6217 buffer_rest, trn, hook_arg);
6218 DBUG_RETURN(res);
6219 }
6220
6221
6222 /**
6223 @brief Write the fixed and pseudo-fixed log record.
6224
6225 @param lsn LSN of the record will be written here
6226 @param type the log record type
6227 @param short_trid Short transaction ID or 0 if it has no sense
6228 @param parts Descriptor of record source parts
6229 @param trn Transaction structure pointer for hooks by
6230 record log type, for short_id
6231 @param hook_arg Argument which will be passed to pre-write and
6232 in-write hooks of this record.
6233
6234 @return Operation status
6235 @retval 0 OK
6236 @retval 1 Error
6237 */
6238
translog_write_fixed_record(LSN * lsn,enum translog_record_type type,MARIA_HA * tbl_info,SHORT_TRANSACTION_ID short_trid,struct st_translog_parts * parts,TRN * trn,void * hook_arg)6239 static my_bool translog_write_fixed_record(LSN *lsn,
6240 enum translog_record_type type,
6241 MARIA_HA *tbl_info,
6242 SHORT_TRANSACTION_ID short_trid,
6243 struct st_translog_parts *parts,
6244 TRN *trn, void *hook_arg)
6245 {
6246 struct st_translog_buffer *buffer_to_flush= NULL;
6247 uchar chunk1_header[1 + 2];
6248 /* Max number of such LSNs per record is 2 */
6249 uchar compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD *
6250 COMPRESSED_LSN_MAX_STORE_SIZE];
6251 LEX_CUSTRING *part;
6252 int rc= 1;
6253 DBUG_ENTER("translog_write_fixed_record");
6254 DBUG_ASSERT((log_record_type_descriptor[type].rclass ==
6255 LOGRECTYPE_FIXEDLENGTH &&
6256 parts->record_length ==
6257 log_record_type_descriptor[type].fixed_length) ||
6258 (log_record_type_descriptor[type].rclass ==
6259 LOGRECTYPE_PSEUDOFIXEDLENGTH &&
6260 parts->record_length ==
6261 log_record_type_descriptor[type].fixed_length));
6262
6263 translog_lock();
6264 DBUG_PRINT("info", ("horizon: " LSN_FMT,
6265 LSN_IN_PARTS(log_descriptor.horizon)));
6266
6267 DBUG_ASSERT(log_descriptor.bc.current_page_fill <= TRANSLOG_PAGE_SIZE);
6268 DBUG_PRINT("info",
6269 ("Page size: %u record: %u next cond: %d",
6270 log_descriptor.bc.current_page_fill,
6271 (parts->record_length +
6272 log_record_type_descriptor[type].compressed_LSN * 2 + 3),
6273 ((((uint) log_descriptor.bc.current_page_fill) +
6274 (parts->record_length +
6275 log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
6276 TRANSLOG_PAGE_SIZE)));
6277 /*
6278 check that there is enough place on current page.
6279 NOTE: compressing may increase page LSN size on two bytes for every LSN
6280 */
6281 if ((((uint) log_descriptor.bc.current_page_fill) +
6282 (parts->record_length +
6283 log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
6284 TRANSLOG_PAGE_SIZE)
6285 {
6286 DBUG_PRINT("info", ("Next page"));
6287 if (translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
6288 &buffer_to_flush))
6289 goto err; /* rc == 1 */
6290 if (buffer_to_flush)
6291 translog_buffer_lock_assert_owner(buffer_to_flush);
6292 }
6293
6294 set_lsn(lsn, log_descriptor.horizon);
6295 if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
6296 *lsn, TRUE) ||
6297 (log_record_type_descriptor[type].inwrite_hook &&
6298 (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
6299 lsn, hook_arg)))
6300 goto err;
6301
6302 /* compress LSNs */
6303 if (log_record_type_descriptor[type].rclass ==
6304 LOGRECTYPE_PSEUDOFIXEDLENGTH)
6305 {
6306 DBUG_ASSERT(log_record_type_descriptor[type].compressed_LSN > 0);
6307 translog_relative_LSN_encode(parts, *lsn,
6308 log_record_type_descriptor[type].
6309 compressed_LSN, compressed_LSNs);
6310 }
6311
6312 /*
6313 Write the whole record at once (we know that there is enough place on
6314 the destination page)
6315 */
6316 DBUG_ASSERT(parts->current != 0); /* first part is left for header */
6317 part= parts->parts + (--parts->current);
6318 parts->total_record_length+= (translog_size_t) (part->length= 1 + 2);
6319 part->str= chunk1_header;
6320 *chunk1_header= (uchar) (type | TRANSLOG_CHUNK_FIXED);
6321 int2store(chunk1_header + 1, short_trid);
6322
6323 rc= translog_write_parts_on_page(&log_descriptor.horizon,
6324 &log_descriptor.bc,
6325 parts->total_record_length, parts);
6326
6327 log_descriptor.bc.buffer->last_lsn= *lsn;
6328 DBUG_PRINT("info", ("last_lsn set to " LSN_FMT " buffer: %p",
6329 LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn),
6330 log_descriptor.bc.buffer));
6331
6332 err:
6333 translog_unlock();
6334
6335 /*
6336 check if we switched buffer and need process it (current buffer is
6337 unlocked already => we will not delay other threads
6338 */
6339 if (buffer_to_flush != NULL)
6340 {
6341 if (!rc)
6342 rc= translog_buffer_flush(buffer_to_flush);
6343 translog_buffer_unlock(buffer_to_flush);
6344 }
6345
6346 DBUG_RETURN(rc);
6347 }
6348
6349
6350 /**
6351 @brief Writes the log record
6352
6353 If share has no 2-byte-id yet, gives an id to the share and logs
6354 LOGREC_FILE_ID. If transaction has not logged LOGREC_LONG_TRANSACTION_ID
6355 yet, logs it.
6356
6357 @param lsn LSN of the record will be written here
6358 @param type the log record type
6359 @param trn Transaction structure pointer for hooks by
6360 record log type, for short_id
6361 @param tbl_info MARIA_HA of table or NULL
6362 @param rec_len record length or 0 (count it)
6363 @param part_no number of parts or 0 (count it)
6364 @param parts_data zero ended (in case of number of parts is 0)
6365 array of LEX_STRINGs (parts), first
6366 TRANSLOG_INTERNAL_PARTS positions in the log
6367 should be unused (need for loghandler)
6368 @param store_share_id if tbl_info!=NULL then share's id will
6369 automatically be stored in the two first bytes
6370 pointed (so pointer is assumed to be !=NULL)
6371 @param hook_arg argument which will be passed to pre-write and
6372 in-write hooks of this record.
6373
6374 @return Operation status
6375 @retval 0 OK
6376 @retval 1 Error
6377 */
6378
translog_write_record(LSN * lsn,enum translog_record_type type,TRN * trn,MARIA_HA * tbl_info,translog_size_t rec_len,uint part_no,LEX_CUSTRING * parts_data,uchar * store_share_id,void * hook_arg)6379 my_bool translog_write_record(LSN *lsn,
6380 enum translog_record_type type,
6381 TRN *trn, MARIA_HA *tbl_info,
6382 translog_size_t rec_len,
6383 uint part_no,
6384 LEX_CUSTRING *parts_data,
6385 uchar *store_share_id,
6386 void *hook_arg)
6387 {
6388 struct st_translog_parts parts;
6389 LEX_CUSTRING *part;
6390 int rc;
6391 uint short_trid= trn->short_id;
6392 DBUG_ENTER("translog_write_record");
6393 DBUG_PRINT("enter", ("type: %u (%s) ShortTrID: %u rec_len: %lu",
6394 (uint) type, log_record_type_descriptor[type].name,
6395 (uint) short_trid, (ulong) rec_len));
6396 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6397 translog_status == TRANSLOG_READONLY);
6398 DBUG_ASSERT(type != 0);
6399 DBUG_SLOW_ASSERT((uint)type <= max_allowed_translog_type);
6400 if (unlikely(translog_status != TRANSLOG_OK))
6401 {
6402 DBUG_PRINT("error", ("Transaction log is write protected"));
6403 DBUG_RETURN(1);
6404 }
6405
6406 if (tbl_info && type != LOGREC_FILE_ID)
6407 {
6408 MARIA_SHARE *share= tbl_info->s;
6409 DBUG_ASSERT(share->now_transactional);
6410 if (unlikely(share->id == 0))
6411 {
6412 /*
6413 First log write for this MARIA_SHARE; give it a short id.
6414 When the lock manager is enabled and needs a short id, it should be
6415 assigned in the lock manager (because row locks will be taken before
6416 log records are written; for example SELECT FOR UPDATE takes locks but
6417 writes no log record.
6418 */
6419 if (unlikely(translog_assign_id_to_share(tbl_info, trn)))
6420 DBUG_RETURN(1);
6421 }
6422 fileid_store(store_share_id, share->id);
6423 }
6424 if (unlikely(!(trn->first_undo_lsn & TRANSACTION_LOGGED_LONG_ID)))
6425 {
6426 LSN dummy_lsn;
6427 LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
6428 uchar log_data[6];
6429 DBUG_ASSERT(trn->undo_lsn == LSN_IMPOSSIBLE);
6430 int6store(log_data, trn->trid);
6431 log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
6432 log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
6433 trn->first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID; /* no recursion */
6434 if (unlikely(translog_write_record(&dummy_lsn, LOGREC_LONG_TRANSACTION_ID,
6435 trn, NULL, sizeof(log_data),
6436 sizeof(log_array)/sizeof(log_array[0]),
6437 log_array, NULL, NULL)))
6438 DBUG_RETURN(1);
6439 }
6440
6441 parts.parts= parts_data;
6442
6443 /* count parts if they are not counted by upper level */
6444 if (part_no == 0)
6445 {
6446 for (part_no= TRANSLOG_INTERNAL_PARTS;
6447 parts_data[part_no].length != 0;
6448 part_no++);
6449 }
6450 parts.elements= part_no;
6451 parts.current= TRANSLOG_INTERNAL_PARTS;
6452
6453 /* clear TRANSLOG_INTERNAL_PARTS */
6454 compile_time_assert(TRANSLOG_INTERNAL_PARTS != 0);
6455 parts_data[0].str= 0;
6456 parts_data[0].length= 0;
6457
6458 /* count length of the record */
6459 if (rec_len == 0)
6460 {
6461 for(part= parts_data + TRANSLOG_INTERNAL_PARTS;\
6462 part < parts_data + part_no;
6463 part++)
6464 {
6465 rec_len+= (translog_size_t) part->length;
6466 }
6467 }
6468 parts.record_length= rec_len;
6469
6470 #ifndef DBUG_OFF
6471 {
6472 uint i;
6473 size_t len= 0;
6474 #ifdef HAVE_valgrind
6475 ha_checksum checksum= 0;
6476 #endif
6477 for (i= TRANSLOG_INTERNAL_PARTS; i < part_no; i++)
6478 {
6479 #ifdef HAVE_valgrind
6480 /* Find unitialized bytes early */
6481 checksum+= my_checksum(checksum, parts_data[i].str,
6482 parts_data[i].length);
6483 #endif
6484 len+= parts_data[i].length;
6485 }
6486 DBUG_ASSERT(len == rec_len);
6487 }
6488 #endif
6489 /*
6490 Start total_record_length from record_length then overhead will
6491 be add
6492 */
6493 parts.total_record_length= parts.record_length;
6494 DBUG_PRINT("info", ("record length: %lu", (ulong) parts.record_length));
6495
6496 /* process this parts */
6497 if (!(rc= (log_record_type_descriptor[type].prewrite_hook &&
6498 (*log_record_type_descriptor[type].prewrite_hook)(type, trn,
6499 tbl_info,
6500 hook_arg))))
6501 {
6502 switch (log_record_type_descriptor[type].rclass) {
6503 case LOGRECTYPE_VARIABLE_LENGTH:
6504 rc= translog_write_variable_record(lsn, type, tbl_info,
6505 short_trid, &parts, trn, hook_arg);
6506 break;
6507 case LOGRECTYPE_PSEUDOFIXEDLENGTH:
6508 case LOGRECTYPE_FIXEDLENGTH:
6509 rc= translog_write_fixed_record(lsn, type, tbl_info,
6510 short_trid, &parts, trn, hook_arg);
6511 break;
6512 case LOGRECTYPE_NOT_ALLOWED:
6513 default:
6514 DBUG_ASSERT(0);
6515 rc= 1;
6516 }
6517 }
6518
6519 DBUG_PRINT("info", ("LSN: " LSN_FMT, LSN_IN_PARTS(*lsn)));
6520 DBUG_RETURN(rc);
6521 }
6522
6523
6524 /*
6525 Decode compressed (relative) LSN(s)
6526
6527 SYNOPSIS
6528 translog_relative_lsn_decode()
6529 base_lsn LSN for encoding
6530 src Decode LSN(s) from here
6531 dst Put decoded LSNs here
6532 lsns number of LSN(s)
6533
6534 RETURN
6535 position in sources after decoded LSN(s)
6536 */
6537
translog_relative_LSN_decode(LSN base_lsn,uchar * src,uchar * dst,uint lsns)6538 static uchar *translog_relative_LSN_decode(LSN base_lsn,
6539 uchar *src, uchar *dst, uint lsns)
6540 {
6541 uint i;
6542 for (i= 0; i < lsns; i++, dst+= LSN_STORE_SIZE)
6543 {
6544 src= translog_get_LSN_from_diff(base_lsn, src, dst);
6545 }
6546 return src;
6547 }
6548
6549 /**
6550 @brief Get header of fixed/pseudo length record and call hook for
6551 it processing
6552
6553 @param page Pointer to the buffer with page where LSN chunk is
6554 placed
6555 @param page_offset Offset of the first chunk in the page
6556 @param buff Buffer to be filled with header data
6557
6558 @return Length of header or operation status
6559 @retval # number of bytes in TRANSLOG_HEADER_BUFFER::header where
6560 stored decoded part of the header
6561 */
6562
translog_fixed_length_header(uchar * page,translog_size_t page_offset,TRANSLOG_HEADER_BUFFER * buff)6563 static int translog_fixed_length_header(uchar *page,
6564 translog_size_t page_offset,
6565 TRANSLOG_HEADER_BUFFER *buff)
6566 {
6567 struct st_log_record_type_descriptor *desc=
6568 log_record_type_descriptor + buff->type;
6569 uchar *src= page + page_offset + 3;
6570 uchar *dst= buff->header;
6571 uchar *start= src;
6572 int lsns= desc->compressed_LSN;
6573 uint length= desc->fixed_length;
6574 DBUG_ENTER("translog_fixed_length_header");
6575
6576 buff->record_length= length;
6577
6578 if (desc->rclass == LOGRECTYPE_PSEUDOFIXEDLENGTH)
6579 {
6580 DBUG_ASSERT(lsns > 0);
6581 src= translog_relative_LSN_decode(buff->lsn, src, dst, lsns);
6582 lsns*= LSN_STORE_SIZE;
6583 dst+= lsns;
6584 length-= lsns;
6585 buff->compressed_LSN_economy= (lsns - (int) (src - start));
6586 }
6587 else
6588 buff->compressed_LSN_economy= 0;
6589
6590 memcpy(dst, src, length);
6591 buff->non_header_data_start_offset= (uint16) (page_offset +
6592 ((src + length) -
6593 (page + page_offset)));
6594 buff->non_header_data_len= 0;
6595 DBUG_RETURN(buff->record_length);
6596 }
6597
6598
6599 /*
6600 Free resources used by TRANSLOG_HEADER_BUFFER
6601
6602 SYNOPSIS
6603 translog_free_record_header();
6604 */
6605
translog_free_record_header(TRANSLOG_HEADER_BUFFER * buff)6606 void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff)
6607 {
6608 DBUG_ENTER("translog_free_record_header");
6609 if (buff->groups_no != 0)
6610 {
6611 my_free(buff->groups);
6612 buff->groups_no= 0;
6613 }
6614 DBUG_VOID_RETURN;
6615 }
6616
6617
6618 /**
6619 @brief Returns the current horizon at the end of the current log
6620
6621 @return Horizon
6622 @retval LSN_ERROR error
6623 @retvar # Horizon
6624 */
6625
translog_get_horizon()6626 TRANSLOG_ADDRESS translog_get_horizon()
6627 {
6628 TRANSLOG_ADDRESS res;
6629 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6630 translog_status == TRANSLOG_READONLY);
6631 translog_lock();
6632 res= log_descriptor.horizon;
6633 translog_unlock();
6634 return res;
6635 }
6636
6637
6638 /**
6639 @brief Returns the current horizon at the end of the current log, caller is
6640 assumed to already hold the lock
6641
6642 @return Horizon
6643 @retval LSN_ERROR error
6644 @retvar # Horizon
6645 */
6646
translog_get_horizon_no_lock()6647 TRANSLOG_ADDRESS translog_get_horizon_no_lock()
6648 {
6649 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6650 translog_status == TRANSLOG_READONLY);
6651 translog_lock_assert_owner();
6652 return log_descriptor.horizon;
6653 }
6654
6655
6656 /*
6657 Set last page in the scanner data structure
6658
6659 SYNOPSIS
6660 translog_scanner_set_last_page()
6661 scanner Information about current chunk during scanning
6662
6663 RETURN
6664 0 OK
6665 1 Error
6666 */
6667
translog_scanner_set_last_page(TRANSLOG_SCANNER_DATA * scanner)6668 static my_bool translog_scanner_set_last_page(TRANSLOG_SCANNER_DATA *scanner)
6669 {
6670 my_bool page_ok;
6671 if (LSN_FILE_NO(scanner->page_addr) == LSN_FILE_NO(scanner->horizon))
6672 {
6673 /* It is last file => we can easy find last page address by horizon */
6674 uint pagegrest= LSN_OFFSET(scanner->horizon) % TRANSLOG_PAGE_SIZE;
6675 scanner->last_file_page= (scanner->horizon -
6676 (pagegrest ? pagegrest : TRANSLOG_PAGE_SIZE));
6677 return (0);
6678 }
6679 scanner->last_file_page= scanner->page_addr;
6680 return (translog_get_last_page_addr(&scanner->last_file_page, &page_ok, 0));
6681 }
6682
6683
6684 /**
6685 @brief Get page from page cache according to requested method
6686
6687 @param scanner The scanner data
6688
6689 @return operation status
6690 @retval 0 OK
6691 @retval 1 Error
6692 */
6693
6694 static my_bool
translog_scanner_get_page(TRANSLOG_SCANNER_DATA * scanner)6695 translog_scanner_get_page(TRANSLOG_SCANNER_DATA *scanner)
6696 {
6697 TRANSLOG_VALIDATOR_DATA data;
6698 DBUG_ENTER("translog_scanner_get_page");
6699 data.addr= &scanner->page_addr;
6700 data.was_recovered= 0;
6701 DBUG_RETURN((scanner->page=
6702 translog_get_page(&data, scanner->buffer,
6703 (scanner->use_direct_link ?
6704 &scanner->direct_link :
6705 NULL))) ==
6706 NULL);
6707 }
6708
6709
6710 /**
6711 @brief Initialize reader scanner.
6712
6713 @param lsn LSN with which it have to be inited
6714 @param fixed_horizon true if it is OK do not read records which was written
6715 after scanning beginning
6716 @param scanner scanner which have to be inited
6717 @param use_direct prefer using direct lings from page handler
6718 where it is possible.
6719
6720 @note If direct link was used translog_destroy_scanner should be
6721 called after it using
6722
6723 @return status of the operation
6724 @retval 0 OK
6725 @retval 1 Error
6726 */
6727
translog_scanner_init(LSN lsn,my_bool fixed_horizon,TRANSLOG_SCANNER_DATA * scanner,my_bool use_direct)6728 my_bool translog_scanner_init(LSN lsn,
6729 my_bool fixed_horizon,
6730 TRANSLOG_SCANNER_DATA *scanner,
6731 my_bool use_direct)
6732 {
6733 DBUG_ENTER("translog_scanner_init");
6734 DBUG_PRINT("enter", ("Scanner: %p LSN: " LSN_FMT,
6735 scanner, LSN_IN_PARTS(lsn)));
6736 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6737 translog_status == TRANSLOG_READONLY);
6738
6739 scanner->page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
6740
6741 scanner->fixed_horizon= fixed_horizon;
6742 scanner->use_direct_link= use_direct;
6743 scanner->direct_link= NULL;
6744
6745 scanner->horizon= translog_get_horizon();
6746 DBUG_PRINT("info", ("horizon: " LSN_FMT, LSN_IN_PARTS(scanner->horizon)));
6747
6748 /* lsn < horizon */
6749 DBUG_ASSERT(lsn <= scanner->horizon);
6750
6751 scanner->page_addr= lsn;
6752 scanner->page_addr-= scanner->page_offset; /*decrease offset */
6753
6754 if (translog_scanner_set_last_page(scanner))
6755 DBUG_RETURN(1);
6756
6757 if (translog_scanner_get_page(scanner))
6758 DBUG_RETURN(1);
6759 DBUG_RETURN(0);
6760 }
6761
6762
6763 /**
6764 @brief Destroy scanner object;
6765
6766 @param scanner The scanner object to destroy
6767 */
6768
translog_destroy_scanner(TRANSLOG_SCANNER_DATA * scanner)6769 void translog_destroy_scanner(TRANSLOG_SCANNER_DATA *scanner)
6770 {
6771 DBUG_ENTER("translog_destroy_scanner");
6772 DBUG_PRINT("enter", ("Scanner: %p", scanner));
6773 translog_free_link(scanner->direct_link);
6774 DBUG_VOID_RETURN;
6775 }
6776
6777
6778 /*
6779 Checks End of the Log
6780
6781 SYNOPSIS
6782 translog_scanner_eol()
6783 scanner Information about current chunk during scanning
6784
6785 RETURN
6786 1 End of the Log
6787 0 OK
6788 */
6789
translog_scanner_eol(TRANSLOG_SCANNER_DATA * scanner)6790 static my_bool translog_scanner_eol(TRANSLOG_SCANNER_DATA *scanner)
6791 {
6792 DBUG_ENTER("translog_scanner_eol");
6793 DBUG_PRINT("enter",
6794 ("Horizon: " LSN_FMT " Current: (%u, 0x%x+0x%x=0x%x)",
6795 LSN_IN_PARTS(scanner->horizon),
6796 LSN_IN_PARTS(scanner->page_addr),
6797 (uint) scanner->page_offset,
6798 (uint) (LSN_OFFSET(scanner->page_addr) + scanner->page_offset)));
6799 if (scanner->horizon > (scanner->page_addr +
6800 scanner->page_offset))
6801 {
6802 DBUG_PRINT("info", ("Horizon is not reached"));
6803 DBUG_RETURN(0);
6804 }
6805 if (scanner->fixed_horizon)
6806 {
6807 DBUG_PRINT("info", ("Horizon is fixed and reached"));
6808 DBUG_RETURN(1);
6809 }
6810 scanner->horizon= translog_get_horizon();
6811 DBUG_PRINT("info",
6812 ("Horizon is re-read, EOL: %d",
6813 scanner->horizon <= (scanner->page_addr +
6814 scanner->page_offset)));
6815 DBUG_RETURN(scanner->horizon <= (scanner->page_addr +
6816 scanner->page_offset));
6817 }
6818
6819
6820 /**
6821 @brief Cheks End of the Page
6822
6823 @param scanner Information about current chunk during scanning
6824
6825 @retval 1 End of the Page
6826 @retval 0 OK
6827 */
6828
translog_scanner_eop(TRANSLOG_SCANNER_DATA * scanner)6829 static my_bool translog_scanner_eop(TRANSLOG_SCANNER_DATA *scanner)
6830 {
6831 DBUG_ENTER("translog_scanner_eop");
6832 DBUG_RETURN(scanner->page_offset >= TRANSLOG_PAGE_SIZE ||
6833 scanner->page[scanner->page_offset] == TRANSLOG_FILLER);
6834 }
6835
6836
6837 /**
6838 @brief Checks End of the File (i.e. we are scanning last page, which do not
6839 mean end of this page)
6840
6841 @param scanner Information about current chunk during scanning
6842
6843 @retval 1 End of the File
6844 @retval 0 OK
6845 */
6846
translog_scanner_eof(TRANSLOG_SCANNER_DATA * scanner)6847 static my_bool translog_scanner_eof(TRANSLOG_SCANNER_DATA *scanner)
6848 {
6849 DBUG_ENTER("translog_scanner_eof");
6850 DBUG_ASSERT(LSN_FILE_NO(scanner->page_addr) ==
6851 LSN_FILE_NO(scanner->last_file_page));
6852 DBUG_PRINT("enter", ("curr Page: 0x%lx last page: 0x%lx "
6853 "normal EOF: %d",
6854 (ulong) LSN_OFFSET(scanner->page_addr),
6855 (ulong) LSN_OFFSET(scanner->last_file_page),
6856 LSN_OFFSET(scanner->page_addr) ==
6857 LSN_OFFSET(scanner->last_file_page)));
6858 /*
6859 TODO: detect damaged file EOF,
6860 TODO: issue warning if damaged file EOF detected
6861 */
6862 DBUG_RETURN(scanner->page_addr ==
6863 scanner->last_file_page);
6864 }
6865
6866 /*
6867 Move scanner to the next chunk
6868
6869 SYNOPSIS
6870 translog_get_next_chunk()
6871 scanner Information about current chunk during scanning
6872
6873 RETURN
6874 0 OK
6875 1 Error
6876 */
6877
6878 static my_bool
translog_get_next_chunk(TRANSLOG_SCANNER_DATA * scanner)6879 translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner)
6880 {
6881 uint16 len;
6882 DBUG_ENTER("translog_get_next_chunk");
6883
6884 if (translog_scanner_eop(scanner))
6885 len= TRANSLOG_PAGE_SIZE - scanner->page_offset;
6886 else if ((len= translog_get_total_chunk_length(scanner->page,
6887 scanner->page_offset)) == 0)
6888 DBUG_RETURN(1);
6889 scanner->page_offset+= len;
6890
6891 if (translog_scanner_eol(scanner))
6892 {
6893 scanner->page= END_OF_LOG;
6894 scanner->page_offset= 0;
6895 DBUG_RETURN(0);
6896 }
6897 if (translog_scanner_eop(scanner))
6898 {
6899 /* before reading next page we should unpin current one if it was pinned */
6900 translog_free_link(scanner->direct_link);
6901 if (translog_scanner_eof(scanner))
6902 {
6903 DBUG_PRINT("info", ("horizon: " LSN_FMT " pageaddr: " LSN_FMT,
6904 LSN_IN_PARTS(scanner->horizon),
6905 LSN_IN_PARTS(scanner->page_addr)));
6906 /* if it is log end it have to be caught before */
6907 DBUG_ASSERT(LSN_FILE_NO(scanner->horizon) >
6908 LSN_FILE_NO(scanner->page_addr));
6909 scanner->page_addr+= LSN_ONE_FILE;
6910 scanner->page_addr= LSN_REPLACE_OFFSET(scanner->page_addr,
6911 TRANSLOG_PAGE_SIZE);
6912 if (translog_scanner_set_last_page(scanner))
6913 DBUG_RETURN(1);
6914 }
6915 else
6916 {
6917 scanner->page_addr+= TRANSLOG_PAGE_SIZE; /* offset increased */
6918 }
6919
6920 if (translog_scanner_get_page(scanner))
6921 DBUG_RETURN(1);
6922
6923 scanner->page_offset= translog_get_first_chunk_offset(scanner->page);
6924 if (translog_scanner_eol(scanner))
6925 {
6926 scanner->page= END_OF_LOG;
6927 scanner->page_offset= 0;
6928 DBUG_RETURN(0);
6929 }
6930 DBUG_ASSERT(scanner->page[scanner->page_offset] != TRANSLOG_FILLER);
6931 }
6932 DBUG_RETURN(0);
6933 }
6934
6935
6936 /**
6937 @brief Get header of variable length record and call hook for it processing
6938
6939 @param page Pointer to the buffer with page where LSN chunk is
6940 placed
6941 @param page_offset Offset of the first chunk in the page
6942 @param buff Buffer to be filled with header data
6943 @param scanner If present should be moved to the header page if
6944 it differ from LSN page
6945
6946 @return Length of header or operation status
6947 @retval RECHEADER_READ_ERROR error
6948 @retval RECHEADER_READ_EOF End of the log reached during the read
6949 @retval # number of bytes in
6950 TRANSLOG_HEADER_BUFFER::header where
6951 stored decoded part of the header
6952 */
6953
6954 static int
translog_variable_length_header(uchar * page,translog_size_t page_offset,TRANSLOG_HEADER_BUFFER * buff,TRANSLOG_SCANNER_DATA * scanner)6955 translog_variable_length_header(uchar *page, translog_size_t page_offset,
6956 TRANSLOG_HEADER_BUFFER *buff,
6957 TRANSLOG_SCANNER_DATA *scanner)
6958 {
6959 struct st_log_record_type_descriptor *desc= (log_record_type_descriptor +
6960 buff->type);
6961 uchar *src= page + page_offset + 1 + 2;
6962 uchar *dst= buff->header;
6963 LSN base_lsn;
6964 uint lsns= desc->compressed_LSN;
6965 uint16 chunk_len;
6966 uint16 length= desc->read_header_len;
6967 uint16 buffer_length= length;
6968 uint16 body_len;
6969 int rc;
6970 TRANSLOG_SCANNER_DATA internal_scanner;
6971 DBUG_ENTER("translog_variable_length_header");
6972
6973 buff->record_length= translog_variable_record_1group_decode_len(&src);
6974 chunk_len= uint2korr(src);
6975 DBUG_PRINT("info", ("rec len: %lu chunk len: %u length: %u bufflen: %u",
6976 (ulong) buff->record_length, (uint) chunk_len,
6977 (uint) length, (uint) buffer_length));
6978 if (chunk_len == 0)
6979 {
6980 uint16 page_rest;
6981 DBUG_PRINT("info", ("1 group"));
6982 src+= 2;
6983 page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
6984
6985 base_lsn= buff->lsn;
6986 body_len= MY_MIN(page_rest, buff->record_length);
6987 }
6988 else
6989 {
6990 uint grp_no, curr;
6991 uint header_to_skip;
6992 uint16 page_rest;
6993
6994 DBUG_PRINT("info", ("multi-group"));
6995 grp_no= buff->groups_no= uint2korr(src + 2);
6996 if (!(buff->groups=
6997 (TRANSLOG_GROUP*) my_malloc(PSI_INSTRUMENT_ME, sizeof(TRANSLOG_GROUP) * grp_no,
6998 MYF(0))))
6999 DBUG_RETURN(RECHEADER_READ_ERROR);
7000 DBUG_PRINT("info", ("Groups: %u", (uint) grp_no));
7001 src+= (2 + 2);
7002 page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
7003 curr= 0;
7004 header_to_skip= (uint) (src - (page + page_offset));
7005 buff->chunk0_pages= 0;
7006
7007 for (;;)
7008 {
7009 uint i, read_length= grp_no;
7010
7011 buff->chunk0_pages++;
7012 if (page_rest < grp_no * (7 + 1))
7013 read_length= page_rest / (7 + 1);
7014 DBUG_PRINT("info", ("Read chunk0 page#%u read: %u left: %u "
7015 "start from: %u",
7016 buff->chunk0_pages, read_length, grp_no, curr));
7017 for (i= 0; i < read_length; i++, curr++)
7018 {
7019 DBUG_ASSERT(curr < buff->groups_no);
7020 buff->groups[curr].addr= lsn_korr(src + i * (7 + 1));
7021 buff->groups[curr].num= src[i * (7 + 1) + 7];
7022 DBUG_PRINT("info", ("group #%u " LSN_FMT " chunks: %u",
7023 curr,
7024 LSN_IN_PARTS(buff->groups[curr].addr),
7025 (uint) buff->groups[curr].num));
7026 }
7027 grp_no-= read_length;
7028 if (grp_no == 0)
7029 {
7030 if (scanner)
7031 {
7032 buff->chunk0_data_addr= scanner->page_addr;
7033 /* offset increased */
7034 buff->chunk0_data_addr+= (page_offset + header_to_skip +
7035 read_length * (7 + 1));
7036 }
7037 else
7038 {
7039 buff->chunk0_data_addr= buff->lsn;
7040 /* offset increased */
7041 buff->chunk0_data_addr+= (header_to_skip + read_length * (7 + 1));
7042 }
7043 buff->chunk0_data_len= chunk_len - 2 - read_length * (7 + 1);
7044 DBUG_PRINT("info", ("Data address: " LSN_FMT " len: %u",
7045 LSN_IN_PARTS(buff->chunk0_data_addr),
7046 buff->chunk0_data_len));
7047 break;
7048 }
7049 if (scanner == NULL)
7050 {
7051 DBUG_PRINT("info", ("use internal scanner for header reading"));
7052 scanner= &internal_scanner;
7053 if (translog_scanner_init(buff->lsn, 1, scanner, 0))
7054 {
7055 rc= RECHEADER_READ_ERROR;
7056 goto exit_and_free;
7057 }
7058 }
7059 if (translog_get_next_chunk(scanner))
7060 {
7061 if (scanner == &internal_scanner)
7062 translog_destroy_scanner(scanner);
7063 rc= RECHEADER_READ_ERROR;
7064 goto exit_and_free;
7065 }
7066 if (scanner->page == END_OF_LOG)
7067 {
7068 if (scanner == &internal_scanner)
7069 translog_destroy_scanner(scanner);
7070 rc= RECHEADER_READ_EOF;
7071 goto exit_and_free;
7072 }
7073 page= scanner->page;
7074 page_offset= scanner->page_offset;
7075 src= page + page_offset + header_to_skip;
7076 chunk_len= uint2korr(src - 2 - 2);
7077 DBUG_PRINT("info", ("Chunk len: %u", (uint) chunk_len));
7078 page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
7079 }
7080
7081 if (scanner == NULL)
7082 {
7083 DBUG_PRINT("info", ("use internal scanner"));
7084 scanner= &internal_scanner;
7085 }
7086 else
7087 {
7088 translog_destroy_scanner(scanner);
7089 }
7090 base_lsn= buff->groups[0].addr;
7091 translog_scanner_init(base_lsn, 1, scanner, scanner == &internal_scanner);
7092 /* first group chunk is always chunk type 2 */
7093 page= scanner->page;
7094 page_offset= scanner->page_offset;
7095 src= page + page_offset + 1;
7096 page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
7097 body_len= page_rest;
7098 if (scanner == &internal_scanner)
7099 translog_destroy_scanner(scanner);
7100 }
7101 if (lsns)
7102 {
7103 uchar *start= src;
7104 src= translog_relative_LSN_decode(base_lsn, src, dst, lsns);
7105 lsns*= LSN_STORE_SIZE;
7106 dst+= lsns;
7107 length-= lsns;
7108 buff->record_length+= (buff->compressed_LSN_economy=
7109 (int) (lsns - (src - start)));
7110 DBUG_PRINT("info", ("lsns: %u length: %u economy: %d new length: %lu",
7111 lsns / LSN_STORE_SIZE, (uint) length,
7112 (int) buff->compressed_LSN_economy,
7113 (ulong) buff->record_length));
7114 body_len-= (uint16) (src - start);
7115 }
7116 else
7117 buff->compressed_LSN_economy= 0;
7118
7119 DBUG_ASSERT(body_len >= length);
7120 body_len-= length;
7121 memcpy(dst, src, length);
7122 buff->non_header_data_start_offset= (uint16) (src + length - page);
7123 buff->non_header_data_len= body_len;
7124 DBUG_PRINT("info", ("non_header_data_start_offset: %u len: %u buffer: %u",
7125 buff->non_header_data_start_offset,
7126 buff->non_header_data_len, buffer_length));
7127 DBUG_RETURN(buffer_length);
7128
7129 exit_and_free:
7130 my_free(buff->groups);
7131 buff->groups_no= 0; /* prevent try to use of buff->groups */
7132 DBUG_RETURN(rc);
7133 }
7134
7135
7136 /**
7137 @brief Read record header from the given buffer
7138
7139 @param page page content buffer
7140 @param page_offset offset of the chunk in the page
7141 @param buff destination buffer
7142 @param scanner If this is set the scanner will be moved to the
7143 record header page (differ from LSN page in case of
7144 multi-group records)
7145
7146 @return Length of header or operation status
7147 @retval RECHEADER_READ_ERROR error
7148 @retval # number of bytes in
7149 TRANSLOG_HEADER_BUFFER::header where
7150 stored decoded part of the header
7151 */
7152
translog_read_record_header_from_buffer(uchar * page,uint16 page_offset,TRANSLOG_HEADER_BUFFER * buff,TRANSLOG_SCANNER_DATA * scanner)7153 int translog_read_record_header_from_buffer(uchar *page,
7154 uint16 page_offset,
7155 TRANSLOG_HEADER_BUFFER *buff,
7156 TRANSLOG_SCANNER_DATA *scanner)
7157 {
7158 translog_size_t res;
7159 DBUG_ENTER("translog_read_record_header_from_buffer");
7160 DBUG_PRINT("info", ("page byte: 0x%x offset: %u",
7161 (uint) page[page_offset], (uint) page_offset));
7162 DBUG_ASSERT(translog_is_LSN_chunk(page[page_offset]));
7163 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
7164 translog_status == TRANSLOG_READONLY);
7165 buff->type= (page[page_offset] & TRANSLOG_REC_TYPE);
7166 buff->short_trid= uint2korr(page + page_offset + 1);
7167 DBUG_PRINT("info", ("Type %u, Short TrID %u, LSN " LSN_FMT,
7168 (uint) buff->type, (uint)buff->short_trid,
7169 LSN_IN_PARTS(buff->lsn)));
7170 /* Read required bytes from the header and call hook */
7171 switch (log_record_type_descriptor[buff->type].rclass) {
7172 case LOGRECTYPE_VARIABLE_LENGTH:
7173 res= translog_variable_length_header(page, page_offset, buff,
7174 scanner);
7175 break;
7176 case LOGRECTYPE_PSEUDOFIXEDLENGTH:
7177 case LOGRECTYPE_FIXEDLENGTH:
7178 res= translog_fixed_length_header(page, page_offset, buff);
7179 break;
7180 default:
7181 DBUG_ASSERT(0); /* we read some junk (got no LSN) */
7182 res= RECHEADER_READ_ERROR;
7183 }
7184 DBUG_RETURN(res);
7185 }
7186
7187
7188 /**
7189 @brief Read record header and some fixed part of a record (the part depend
7190 on record type).
7191
7192 @param lsn log record serial number (address of the record)
7193 @param buff log record header buffer
7194
7195 @note Some type of record can be read completely by this call
7196 @note "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
7197 LSN can be translated to absolute one), some fields can be added (like
7198 actual header length in the record if the header has variable length)
7199
7200 @return Length of header or operation status
7201 @retval RECHEADER_READ_ERROR error
7202 @retval # number of bytes in
7203 TRANSLOG_HEADER_BUFFER::header where
7204 stored decoded part of the header
7205 */
7206
translog_read_record_header(LSN lsn,TRANSLOG_HEADER_BUFFER * buff)7207 int translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff)
7208 {
7209 TRANSLOG_PAGE_SIZE_BUFF psize_buff;
7210 uchar *page;
7211 translog_size_t res, page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
7212 PAGECACHE_BLOCK_LINK *direct_link;
7213 TRANSLOG_ADDRESS addr;
7214 TRANSLOG_VALIDATOR_DATA data;
7215 DBUG_ENTER("translog_read_record_header");
7216 DBUG_PRINT("enter", ("LSN: " LSN_FMT, LSN_IN_PARTS(lsn)));
7217 DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0);
7218 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
7219 translog_status == TRANSLOG_READONLY);
7220
7221 buff->lsn= lsn;
7222 buff->groups_no= 0;
7223 data.addr= &addr;
7224 data.was_recovered= 0;
7225 addr= lsn;
7226 addr-= page_offset; /* offset decreasing */
7227 res= (!(page= translog_get_page(&data, psize_buff.buffer, &direct_link))) ?
7228 RECHEADER_READ_ERROR :
7229 translog_read_record_header_from_buffer(page, page_offset, buff, 0);
7230 translog_free_link(direct_link);
7231 DBUG_RETURN(res);
7232 }
7233
7234
7235 /**
7236 @brief Read record header and some fixed part of a record (the part depend
7237 on record type).
7238
7239 @param scan scanner position to read
7240 @param buff log record header buffer
7241 @param move_scanner request to move scanner to the header position
7242
7243 @note Some type of record can be read completely by this call
7244 @note "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
7245 LSN can be translated to absolute one), some fields can be added (like
7246 actual header length in the record if the header has variable length)
7247
7248 @return Length of header or operation status
7249 @retval RECHEADER_READ_ERROR error
7250 @retval # number of bytes in
7251 TRANSLOG_HEADER_BUFFER::header where stored
7252 decoded part of the header
7253 */
7254
translog_read_record_header_scan(TRANSLOG_SCANNER_DATA * scanner,TRANSLOG_HEADER_BUFFER * buff,my_bool move_scanner)7255 int translog_read_record_header_scan(TRANSLOG_SCANNER_DATA *scanner,
7256 TRANSLOG_HEADER_BUFFER *buff,
7257 my_bool move_scanner)
7258 {
7259 translog_size_t res;
7260 DBUG_ENTER("translog_read_record_header_scan");
7261 DBUG_PRINT("enter", ("Scanner: Cur: " LSN_FMT " Hrz: " LSN_FMT " "
7262 "Lst: " LSN_FMT " Offset: %u(%x) fixed %d",
7263 LSN_IN_PARTS(scanner->page_addr),
7264 LSN_IN_PARTS(scanner->horizon),
7265 LSN_IN_PARTS(scanner->last_file_page),
7266 (uint) scanner->page_offset,
7267 (uint) scanner->page_offset, scanner->fixed_horizon));
7268 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
7269 translog_status == TRANSLOG_READONLY);
7270 buff->groups_no= 0;
7271 buff->lsn= scanner->page_addr;
7272 buff->lsn+= scanner->page_offset; /* offset increasing */
7273 res= translog_read_record_header_from_buffer(scanner->page,
7274 scanner->page_offset,
7275 buff,
7276 (move_scanner ?
7277 scanner : 0));
7278 DBUG_RETURN(res);
7279 }
7280
7281
7282 /**
7283 @brief Read record header and some fixed part of the next record (the part
7284 depend on record type).
7285
7286 @param scanner data for scanning if lsn is NULL scanner data
7287 will be used for continue scanning.
7288 The scanner can be NULL.
7289
7290 @param buff log record header buffer
7291
7292 @return Length of header or operation status
7293 @retval RECHEADER_READ_ERROR error
7294 @retval RECHEADER_READ_EOF EOF
7295 @retval # number of bytes in
7296 TRANSLOG_HEADER_BUFFER::header where
7297 stored decoded part of the header
7298 */
7299
translog_read_next_record_header(TRANSLOG_SCANNER_DATA * scanner,TRANSLOG_HEADER_BUFFER * buff)7300 int translog_read_next_record_header(TRANSLOG_SCANNER_DATA *scanner,
7301 TRANSLOG_HEADER_BUFFER *buff)
7302 {
7303 translog_size_t res;
7304
7305 DBUG_ENTER("translog_read_next_record_header");
7306 buff->groups_no= 0; /* to be sure that we will free it right */
7307 DBUG_PRINT("enter", ("scanner: %p", scanner));
7308 DBUG_PRINT("info", ("Scanner: Cur: " LSN_FMT " Hrz: " LSN_FMT " "
7309 "Lst: " LSN_FMT " Offset: %u(%x) fixed: %d",
7310 LSN_IN_PARTS(scanner->page_addr),
7311 LSN_IN_PARTS(scanner->horizon),
7312 LSN_IN_PARTS(scanner->last_file_page),
7313 (uint) scanner->page_offset,
7314 (uint) scanner->page_offset, scanner->fixed_horizon));
7315 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
7316 translog_status == TRANSLOG_READONLY);
7317
7318 do
7319 {
7320 if (translog_get_next_chunk(scanner))
7321 DBUG_RETURN(RECHEADER_READ_ERROR);
7322 if (scanner->page == END_OF_LOG)
7323 {
7324 DBUG_PRINT("info", ("End of file from the scanner"));
7325 /* Last record was read */
7326 buff->lsn= LSN_IMPOSSIBLE;
7327 DBUG_RETURN(RECHEADER_READ_EOF);
7328 }
7329 DBUG_PRINT("info", ("Page: " LSN_FMT " offset: %lu byte: %x",
7330 LSN_IN_PARTS(scanner->page_addr),
7331 (ulong) scanner->page_offset,
7332 (uint) scanner->page[scanner->page_offset]));
7333 } while (!translog_is_LSN_chunk(scanner->page[scanner->page_offset]) &&
7334 scanner->page[scanner->page_offset] != TRANSLOG_FILLER);
7335
7336 if (scanner->page[scanner->page_offset] == TRANSLOG_FILLER)
7337 {
7338 DBUG_PRINT("info", ("End of file"));
7339 /* Last record was read */
7340 buff->lsn= LSN_IMPOSSIBLE;
7341 /* Return 'end of log' marker */
7342 res= RECHEADER_READ_EOF;
7343 }
7344 else
7345 res= translog_read_record_header_scan(scanner, buff, 0);
7346 DBUG_RETURN(res);
7347 }
7348
7349
7350 /*
7351 Moves record data reader to the next chunk and fill the data reader
7352 information about that chunk.
7353
7354 SYNOPSIS
7355 translog_record_read_next_chunk()
7356 data data cursor
7357
7358 RETURN
7359 0 OK
7360 1 Error
7361 */
7362
translog_record_read_next_chunk(TRANSLOG_READER_DATA * data)7363 static my_bool translog_record_read_next_chunk(TRANSLOG_READER_DATA *data)
7364 {
7365 translog_size_t new_current_offset= data->current_offset + data->chunk_size;
7366 uint16 chunk_header_len, chunk_len;
7367 uint8 type;
7368 DBUG_ENTER("translog_record_read_next_chunk");
7369
7370 if (data->eor)
7371 {
7372 DBUG_PRINT("info", ("end of the record flag set"));
7373 DBUG_RETURN(1);
7374 }
7375
7376 if (data->header.groups_no &&
7377 data->header.groups_no - 1 != data->current_group &&
7378 data->header.groups[data->current_group].num == data->current_chunk)
7379 {
7380 /* Goto next group */
7381 data->current_group++;
7382 data->current_chunk= 0;
7383 DBUG_PRINT("info", ("skip to group: #%u", data->current_group));
7384 translog_destroy_scanner(&data->scanner);
7385 translog_scanner_init(data->header.groups[data->current_group].addr,
7386 1, &data->scanner, 1);
7387 }
7388 else
7389 {
7390 data->current_chunk++;
7391 if (translog_get_next_chunk(&data->scanner))
7392 DBUG_RETURN(1);
7393 if (data->scanner.page == END_OF_LOG)
7394 {
7395 /*
7396 Actually it should not happened, but we want to quit nicely in case
7397 of a truncated log
7398 */
7399 DBUG_RETURN(1);
7400 }
7401 }
7402 type= data->scanner.page[data->scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
7403
7404 if (type == TRANSLOG_CHUNK_LSN && data->header.groups_no)
7405 {
7406 DBUG_PRINT("info",
7407 ("Last chunk: data len: %u offset: %u group: %u of %u",
7408 data->header.chunk0_data_len, data->scanner.page_offset,
7409 data->current_group, data->header.groups_no - 1));
7410 DBUG_ASSERT(data->header.groups_no - 1 == data->current_group);
7411 DBUG_ASSERT(data->header.lsn ==
7412 data->scanner.page_addr + data->scanner.page_offset);
7413 translog_destroy_scanner(&data->scanner);
7414 translog_scanner_init(data->header.chunk0_data_addr, 1, &data->scanner, 1);
7415 data->chunk_size= data->header.chunk0_data_len;
7416 data->body_offset= data->scanner.page_offset;
7417 data->current_offset= new_current_offset;
7418 data->eor= 1;
7419 DBUG_RETURN(0);
7420 }
7421
7422 if (type == TRANSLOG_CHUNK_LSN || type == TRANSLOG_CHUNK_FIXED)
7423 {
7424 data->eor= 1;
7425 DBUG_RETURN(1); /* End of record */
7426 }
7427
7428 chunk_header_len=
7429 translog_get_chunk_header_length(data->scanner.page +
7430 data->scanner.page_offset);
7431 chunk_len= translog_get_total_chunk_length(data->scanner.page,
7432 data->scanner.page_offset);
7433 data->chunk_size= chunk_len - chunk_header_len;
7434 data->body_offset= data->scanner.page_offset + chunk_header_len;
7435 data->current_offset= new_current_offset;
7436 DBUG_PRINT("info", ("grp: %u chunk: %u body_offset: %u chunk_size: %u "
7437 "current_offset: %lu",
7438 (uint) data->current_group,
7439 (uint) data->current_chunk,
7440 (uint) data->body_offset,
7441 (uint) data->chunk_size, (ulong) data->current_offset));
7442 DBUG_RETURN(0);
7443 }
7444
7445
7446 /*
7447 Initialize record reader data from LSN
7448
7449 SYNOPSIS
7450 translog_init_reader_data()
7451 lsn reference to LSN we should start from
7452 data reader data to initialize
7453
7454 RETURN
7455 0 OK
7456 1 Error
7457 */
7458
translog_init_reader_data(LSN lsn,TRANSLOG_READER_DATA * data)7459 static my_bool translog_init_reader_data(LSN lsn,
7460 TRANSLOG_READER_DATA *data)
7461 {
7462 int read_header;
7463 DBUG_ENTER("translog_init_reader_data");
7464 if (translog_scanner_init(lsn, 1, &data->scanner, 1) ||
7465 ((read_header=
7466 translog_read_record_header_scan(&data->scanner, &data->header, 1))
7467 == RECHEADER_READ_ERROR))
7468 DBUG_RETURN(1);
7469 data->read_header= read_header;
7470 data->body_offset= data->header.non_header_data_start_offset;
7471 data->chunk_size= data->header.non_header_data_len;
7472 data->current_offset= data->read_header;
7473 data->current_group= 0;
7474 data->current_chunk= 0;
7475 data->eor= 0;
7476 DBUG_PRINT("info", ("read_header: %u "
7477 "body_offset: %u chunk_size: %u current_offset: %lu",
7478 (uint) data->read_header,
7479 (uint) data->body_offset,
7480 (uint) data->chunk_size, (ulong) data->current_offset));
7481 DBUG_RETURN(0);
7482 }
7483
7484
7485 /**
7486 @brief Destroy reader data object
7487 */
7488
translog_destroy_reader_data(TRANSLOG_READER_DATA * data)7489 static void translog_destroy_reader_data(TRANSLOG_READER_DATA *data)
7490 {
7491 translog_destroy_scanner(&data->scanner);
7492 translog_free_record_header(&data->header);
7493 }
7494
7495
7496 /*
7497 Read a part of the record.
7498
7499 SYNOPSIS
7500 translog_read_record_header()
7501 lsn log record serial number (address of the record)
7502 offset From the beginning of the record beginning (read
7503 by translog_read_record_header).
7504 length Length of record part which have to be read.
7505 buffer Buffer where to read the record part (have to be at
7506 least 'length' bytes length)
7507
7508 RETURN
7509 length of data actually read
7510 */
7511
translog_read_record(LSN lsn,translog_size_t offset,translog_size_t length,uchar * buffer,TRANSLOG_READER_DATA * data)7512 translog_size_t translog_read_record(LSN lsn,
7513 translog_size_t offset,
7514 translog_size_t length,
7515 uchar *buffer,
7516 TRANSLOG_READER_DATA *data)
7517 {
7518 translog_size_t requested_length= length;
7519 translog_size_t end= offset + length;
7520 TRANSLOG_READER_DATA internal_data;
7521 DBUG_ENTER("translog_read_record");
7522 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
7523 translog_status == TRANSLOG_READONLY);
7524
7525 if (data == NULL)
7526 {
7527 DBUG_ASSERT(lsn != LSN_IMPOSSIBLE);
7528 data= &internal_data;
7529 }
7530 if (lsn ||
7531 (offset < data->current_offset &&
7532 !(offset < data->read_header && offset + length < data->read_header)))
7533 {
7534 if (translog_init_reader_data(lsn, data))
7535 DBUG_RETURN(0);
7536 }
7537 DBUG_PRINT("info", ("Offset: %lu length: %lu "
7538 "Scanner: Cur: " LSN_FMT " Hrz: " LSN_FMT " "
7539 "Lst: " LSN_FMT " Offset: %u(%x) fixed: %d",
7540 (ulong) offset, (ulong) length,
7541 LSN_IN_PARTS(data->scanner.page_addr),
7542 LSN_IN_PARTS(data->scanner.horizon),
7543 LSN_IN_PARTS(data->scanner.last_file_page),
7544 (uint) data->scanner.page_offset,
7545 (uint) data->scanner.page_offset,
7546 data->scanner.fixed_horizon));
7547 if (offset < data->read_header)
7548 {
7549 uint16 len= MY_MIN(data->read_header, end) - offset;
7550 DBUG_PRINT("info",
7551 ("enter header offset: %lu length: %lu",
7552 (ulong) offset, (ulong) length));
7553 memcpy(buffer, data->header.header + offset, len);
7554 length-= len;
7555 if (length == 0)
7556 {
7557 translog_destroy_reader_data(data);
7558 DBUG_RETURN(requested_length);
7559 }
7560 offset+= len;
7561 buffer+= len;
7562 DBUG_PRINT("info",
7563 ("len: %u offset: %lu curr: %lu length: %lu",
7564 len, (ulong) offset, (ulong) data->current_offset,
7565 (ulong) length));
7566 }
7567 /* TODO: find first page which we should read by offset */
7568
7569 /* read the record chunk by chunk */
7570 for(;;)
7571 {
7572 uint page_end= data->current_offset + data->chunk_size;
7573 DBUG_PRINT("info",
7574 ("enter body offset: %lu curr: %lu "
7575 "length: %lu page_end: %lu",
7576 (ulong) offset, (ulong) data->current_offset, (ulong) length,
7577 (ulong) page_end));
7578 if (offset < page_end)
7579 {
7580 uint len= page_end - offset;
7581 set_if_smaller(len, length); /* in case we read beyond record's end */
7582 DBUG_ASSERT(offset >= data->current_offset);
7583 memcpy(buffer,
7584 data->scanner.page + data->body_offset +
7585 (offset - data->current_offset), len);
7586 length-= len;
7587 if (length == 0)
7588 {
7589 translog_destroy_reader_data(data);
7590 DBUG_RETURN(requested_length);
7591 }
7592 offset+= len;
7593 buffer+= len;
7594 DBUG_PRINT("info",
7595 ("len: %u offset: %lu curr: %lu length: %lu",
7596 len, (ulong) offset, (ulong) data->current_offset,
7597 (ulong) length));
7598 }
7599 if (translog_record_read_next_chunk(data))
7600 {
7601 translog_destroy_reader_data(data);
7602 DBUG_RETURN(requested_length - length);
7603 }
7604 }
7605 }
7606
7607
7608 /*
7609 @brief Force skipping to the next buffer
7610
7611 @todo Do not copy old page content if all page protections are switched off
7612 (because we do not need calculate something or change old parts of the page)
7613 */
7614
translog_force_current_buffer_to_finish()7615 static void translog_force_current_buffer_to_finish()
7616 {
7617 TRANSLOG_ADDRESS new_buff_beginning;
7618 uint16 old_buffer_no= log_descriptor.bc.buffer_no;
7619 uint16 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
7620 struct st_translog_buffer *new_buffer= (log_descriptor.buffers +
7621 new_buffer_no);
7622 struct st_translog_buffer *old_buffer= log_descriptor.bc.buffer;
7623 uchar *data= log_descriptor.bc.ptr - log_descriptor.bc.current_page_fill;
7624 uint16 left= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
7625 uint16 UNINIT_VAR(current_page_fill), write_counter, previous_offset;
7626 DBUG_ENTER("translog_force_current_buffer_to_finish");
7627
7628 DBUG_PRINT("enter", ("Buffer #%u %p "
7629 "Buffer addr: " LSN_FMT " "
7630 "Page addr: " LSN_FMT " "
7631 "size: %lu (%lu) Pg: %u left: %u in progress %u",
7632 (uint) old_buffer_no,
7633 old_buffer,
7634 LSN_IN_PARTS(old_buffer->offset),
7635 LSN_FILE_NO(log_descriptor.horizon),
7636 (uint)(LSN_OFFSET(log_descriptor.horizon) -
7637 log_descriptor.bc.current_page_fill),
7638 (ulong) old_buffer->size,
7639 (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
7640 buffer->buffer),
7641 (uint) log_descriptor.bc.current_page_fill,
7642 (uint) left,
7643 (uint) old_buffer->
7644 copy_to_buffer_in_progress));
7645 translog_lock_assert_owner();
7646 new_buff_beginning= old_buffer->offset;
7647 new_buff_beginning+= old_buffer->size; /* increase offset */
7648
7649 DBUG_ASSERT(log_descriptor.bc.ptr !=NULL);
7650 DBUG_ASSERT(LSN_FILE_NO(log_descriptor.horizon) ==
7651 LSN_FILE_NO(old_buffer->offset) ||
7652 translog_status == TRANSLOG_READONLY );
7653 translog_check_cursor(&log_descriptor.bc);
7654 DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
7655 if (left)
7656 {
7657 /*
7658 TODO: if 'left' is so small that can't hold any other record
7659 then do not move the page
7660 */
7661 DBUG_PRINT("info", ("left: %u", (uint) left));
7662
7663 old_buffer->pre_force_close_horizon=
7664 old_buffer->offset + old_buffer->size;
7665 /* decrease offset */
7666 new_buff_beginning-= log_descriptor.bc.current_page_fill;
7667 current_page_fill= log_descriptor.bc.current_page_fill;
7668
7669 memset(log_descriptor.bc.ptr, TRANSLOG_FILLER, left);
7670 old_buffer->size+= left;
7671 DBUG_PRINT("info", ("Finish Page buffer #%u: %p "
7672 "Size: %lu",
7673 (uint) old_buffer->buffer_no,
7674 old_buffer,
7675 (ulong) old_buffer->size));
7676 DBUG_ASSERT(old_buffer->buffer_no ==
7677 log_descriptor.bc.buffer_no);
7678 }
7679 else
7680 {
7681 log_descriptor.bc.current_page_fill= 0;
7682 }
7683
7684 translog_buffer_lock(new_buffer);
7685 #ifndef DBUG_OFF
7686 {
7687 TRANSLOG_ADDRESS offset= new_buffer->offset;
7688 TRANSLOG_FILE *file= new_buffer->file;
7689 uint8 ver= new_buffer->ver;
7690 translog_lock_assert_owner();
7691 #endif
7692 translog_wait_for_buffer_free(new_buffer);
7693 #ifndef DBUG_OFF
7694 /* We keep the handler locked so nobody can start this new buffer */
7695 DBUG_ASSERT(offset == new_buffer->offset && new_buffer->file == NULL &&
7696 (file == NULL ? ver : (uint8)(ver + 1)) == new_buffer->ver);
7697 }
7698 #endif
7699
7700 write_counter= log_descriptor.bc.write_counter;
7701 previous_offset= log_descriptor.bc.previous_offset;
7702 translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
7703 /* Fix buffer offset (which was incorrectly set to horizon) */
7704 log_descriptor.bc.buffer->offset= new_buff_beginning;
7705 log_descriptor.bc.write_counter= write_counter;
7706 log_descriptor.bc.previous_offset= previous_offset;
7707 new_buffer->prev_last_lsn= BUFFER_MAX_LSN(old_buffer);
7708 DBUG_PRINT("info", ("prev_last_lsn set to " LSN_FMT " buffer: %p",
7709 LSN_IN_PARTS(new_buffer->prev_last_lsn),
7710 new_buffer));
7711
7712 /*
7713 Advances this log pointer, increases writers and let other threads to
7714 write to the log while we process old page content
7715 */
7716 if (left)
7717 {
7718 log_descriptor.bc.ptr+= current_page_fill;
7719 log_descriptor.bc.buffer->size= log_descriptor.bc.current_page_fill=
7720 current_page_fill;
7721 new_buffer->overlay= 1;
7722 }
7723 else
7724 translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
7725 translog_buffer_increase_writers(new_buffer);
7726 translog_buffer_unlock(new_buffer);
7727
7728 /*
7729 We have to wait until all writers finish before start changing the
7730 pages by applying protection and copying the page content in the
7731 new buffer.
7732 */
7733 #ifndef DBUG_OFF
7734 {
7735 TRANSLOG_ADDRESS offset= old_buffer->offset;
7736 TRANSLOG_FILE *file= old_buffer->file;
7737 uint8 ver= old_buffer->ver;
7738 #endif
7739 /*
7740 Now only one thread can flush log (buffer can flush many threads but
7741 log flush log flush where this function is used can do only one thread)
7742 so no other thread can set is_closing_buffer.
7743 */
7744 DBUG_ASSERT(!old_buffer->is_closing_buffer);
7745 old_buffer->is_closing_buffer= 1; /* Other flushes will wait */
7746 DBUG_PRINT("enter", ("Buffer #%u %p is_closing_buffer set",
7747 (uint) old_buffer->buffer_no, old_buffer));
7748 translog_wait_for_writers(old_buffer);
7749 #ifndef DBUG_OFF
7750 /* We blocked flushing this buffer so the buffer should not changed */
7751 DBUG_ASSERT(offset == old_buffer->offset && file == old_buffer->file &&
7752 ver == old_buffer->ver);
7753 }
7754 #endif
7755
7756 if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
7757 {
7758 translog_put_sector_protection(data, &log_descriptor.bc);
7759 if (left)
7760 {
7761 log_descriptor.bc.write_counter++;
7762 log_descriptor.bc.previous_offset= current_page_fill;
7763 }
7764 else
7765 {
7766 DBUG_PRINT("info", ("drop write_counter"));
7767 log_descriptor.bc.write_counter= 0;
7768 log_descriptor.bc.previous_offset= 0;
7769 }
7770 }
7771
7772 if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
7773 {
7774 uint32 crc= translog_crc(data + log_descriptor.page_overhead,
7775 TRANSLOG_PAGE_SIZE -
7776 log_descriptor.page_overhead);
7777 DBUG_PRINT("info", ("CRC: 0x%x", crc));
7778 int4store(data + 3 + 3 + 1, crc);
7779 }
7780 old_buffer->is_closing_buffer= 0;
7781 DBUG_PRINT("enter", ("Buffer #%u %p is_closing_buffer cleared",
7782 (uint) old_buffer->buffer_no, old_buffer));
7783 mysql_cond_broadcast(&old_buffer->waiting_filling_buffer);
7784
7785 if (left)
7786 {
7787 if (log_descriptor.flags &
7788 (TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION))
7789 memcpy(new_buffer->buffer, data, current_page_fill);
7790 else
7791 {
7792 /*
7793 This page header does not change if we add more data to the page so
7794 we can not copy it and will not overwrite later
7795 */
7796 new_buffer->skipped_data= current_page_fill;
7797 TRASH_ALLOC(new_buffer->buffer, current_page_fill);
7798 DBUG_ASSERT(new_buffer->skipped_data < TRANSLOG_PAGE_SIZE);
7799 }
7800 }
7801 old_buffer->next_buffer_offset= new_buffer->offset;
7802 translog_buffer_lock(new_buffer);
7803 new_buffer->prev_buffer_offset= old_buffer->offset;
7804 translog_buffer_decrease_writers(new_buffer);
7805 translog_buffer_unlock(new_buffer);
7806
7807 DBUG_VOID_RETURN;
7808 }
7809
7810
7811 /**
7812 @brief Waits while given lsn will be flushed
7813
7814 @param lsn log record serial number up to which (inclusive)
7815 the log has to be flushed
7816 */
7817
translog_flush_wait_for_end(LSN lsn)7818 void translog_flush_wait_for_end(LSN lsn)
7819 {
7820 DBUG_ENTER("translog_flush_wait_for_end");
7821 DBUG_PRINT("enter", ("LSN: " LSN_FMT, LSN_IN_PARTS(lsn)));
7822 mysql_mutex_assert_owner(&log_descriptor.log_flush_lock);
7823 while (cmp_translog_addr(log_descriptor.flushed, lsn) < 0)
7824 mysql_cond_wait(&log_descriptor.log_flush_cond,
7825 &log_descriptor.log_flush_lock);
7826 DBUG_VOID_RETURN;
7827 }
7828
7829
7830 /**
7831 @brief Sets goal for the next flush pass and waits for this pass end.
7832
7833 @param lsn log record serial number up to which (inclusive)
7834 the log has to be flushed
7835 */
7836
translog_flush_set_new_goal_and_wait(TRANSLOG_ADDRESS lsn)7837 void translog_flush_set_new_goal_and_wait(TRANSLOG_ADDRESS lsn)
7838 {
7839 int flush_no= log_descriptor.flush_no;
7840 DBUG_ENTER("translog_flush_set_new_goal_and_wait");
7841 DBUG_PRINT("enter", ("LSN: " LSN_FMT, LSN_IN_PARTS(lsn)));
7842 mysql_mutex_assert_owner(&log_descriptor.log_flush_lock);
7843 if (cmp_translog_addr(lsn, log_descriptor.next_pass_max_lsn) > 0)
7844 {
7845 log_descriptor.next_pass_max_lsn= lsn;
7846 log_descriptor.max_lsn_requester= pthread_self();
7847 mysql_cond_broadcast(&log_descriptor.new_goal_cond);
7848 }
7849 while (flush_no == log_descriptor.flush_no)
7850 {
7851 mysql_cond_wait(&log_descriptor.log_flush_cond,
7852 &log_descriptor.log_flush_lock);
7853 }
7854 DBUG_VOID_RETURN;
7855 }
7856
7857
7858 /**
7859 @brief sync() range of files (inclusive) and directory (by request)
7860
7861 @param min min internal file number to flush
7862 @param max max internal file number to flush
7863 @param sync_dir need sync directory
7864
7865 return Operation status
7866 @retval 0 OK
7867 @retval 1 Error
7868 */
7869
translog_sync_files(uint32 min,uint32 max,my_bool sync_dir)7870 static my_bool translog_sync_files(uint32 min, uint32 max,
7871 my_bool sync_dir)
7872 {
7873 uint fn;
7874 my_bool rc= 0;
7875 ulonglong flush_interval;
7876 DBUG_ENTER("translog_sync_files");
7877 DBUG_PRINT("info", ("min: %lu max: %lu sync dir: %d",
7878 (ulong) min, (ulong) max, (int) sync_dir));
7879 DBUG_ASSERT(min <= max);
7880
7881 flush_interval= group_commit_wait;
7882 if (flush_interval)
7883 flush_start= microsecond_interval_timer();
7884 for (fn= min; fn <= max; fn++)
7885 {
7886 TRANSLOG_FILE *file= get_logfile_by_number(fn);
7887 DBUG_ASSERT(file != NULL);
7888 if (!file->is_sync)
7889 {
7890 if (mysql_file_sync(file->handler.file, MYF(MY_WME)))
7891 {
7892 rc= 1;
7893 translog_stop_writing();
7894 DBUG_RETURN(rc);
7895 }
7896 translog_syncs++;
7897 file->is_sync= 1;
7898 }
7899 }
7900
7901 if (sync_dir)
7902 {
7903 if (!(rc= sync_dir(log_descriptor.directory_fd,
7904 MYF(MY_WME | MY_IGNORE_BADFD))))
7905 translog_syncs++;
7906 }
7907
7908 DBUG_RETURN(rc);
7909 }
7910
7911
7912 /**
7913 check_skipped_lsn
7914
7915 Check if lsn skipped in redo is ok
7916 */
7917
check_skipped_lsn(MARIA_HA * info,LSN lsn,my_bool index_file,pgcache_page_no_t page)7918 void check_skipped_lsn(MARIA_HA *info, LSN lsn, my_bool index_file,
7919 pgcache_page_no_t page)
7920 {
7921 if (lsn <= log_descriptor.horizon)
7922 {
7923 DBUG_PRINT("info", ("Page is up to date, skipping redo"));
7924 }
7925 else
7926 {
7927 /* Give error, but don't flood the log */
7928 if (skipped_lsn_err_count++ < MAX_LSN_ERRORS &&
7929 ! info->s->redo_error_given++)
7930 {
7931 eprint(tracef, "Table %s has wrong LSN: " LSN_FMT " on page: %llu",
7932 (index_file ? info->s->data_file_name.str :
7933 info->s->index_file_name.str),
7934 LSN_IN_PARTS(lsn), (ulonglong) page);
7935 recovery_found_crashed_tables++;
7936 }
7937 }
7938 }
7939
7940
7941 /*
7942 @brief Flushes buffers with LSNs in them less or equal address <lsn>
7943
7944 @param lsn address up to which all LSNs should be flushed,
7945 can be reset to real last LSN address
7946 @parem sent_to_disk returns 'sent to disk' position
7947 @param flush_horizon returns horizon of the flush
7948
7949 @note About terminology see comment to translog_flush().
7950 */
7951
translog_flush_buffers(TRANSLOG_ADDRESS * lsn,TRANSLOG_ADDRESS * sent_to_disk,TRANSLOG_ADDRESS * flush_horizon)7952 void translog_flush_buffers(TRANSLOG_ADDRESS *lsn,
7953 TRANSLOG_ADDRESS *sent_to_disk,
7954 TRANSLOG_ADDRESS *flush_horizon)
7955 {
7956 dirty_buffer_mask_t dirty_buffer_mask;
7957 uint i;
7958 uint8 UNINIT_VAR(last_buffer_no), start_buffer_no;
7959 DBUG_ENTER("translog_flush_buffers");
7960
7961 /*
7962 We will recheck information when will lock buffers one by
7963 one so we can use unprotected read here (this is just for
7964 speed up buffers processing)
7965 */
7966 dirty_buffer_mask= log_descriptor.dirty_buffer_mask;
7967 DBUG_PRINT("info", ("Dirty buffer mask: %lx current buffer: %u",
7968 (ulong) dirty_buffer_mask,
7969 (uint) log_descriptor.bc.buffer_no));
7970 for (i= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO;
7971 i != log_descriptor.bc.buffer_no && !(dirty_buffer_mask & (1 << i));
7972 i= (i + 1) % TRANSLOG_BUFFERS_NO) {}
7973 start_buffer_no= i;
7974
7975 DBUG_PRINT("info",
7976 ("start from: %u current: %u prev last lsn: " LSN_FMT,
7977 (uint) start_buffer_no, (uint) log_descriptor.bc.buffer_no,
7978 LSN_IN_PARTS(log_descriptor.bc.buffer->prev_last_lsn)));
7979
7980 /*
7981 if LSN up to which we have to flush bigger then maximum LSN of previous
7982 buffer and at least one LSN was saved in the current buffer (last_lsn !=
7983 LSN_IMPOSSIBLE) then we have to close the current buffer.
7984 */
7985 if (cmp_translog_addr(*lsn, log_descriptor.bc.buffer->prev_last_lsn) > 0 &&
7986 log_descriptor.bc.buffer->last_lsn != LSN_IMPOSSIBLE)
7987 {
7988 struct st_translog_buffer *buffer= log_descriptor.bc.buffer;
7989 *lsn= log_descriptor.bc.buffer->last_lsn; /* fix lsn if it was horizon */
7990 DBUG_PRINT("info", ("LSN to flush fixed to last lsn: " LSN_FMT,
7991 LSN_IN_PARTS(*lsn)));
7992 last_buffer_no= log_descriptor.bc.buffer_no;
7993 log_descriptor.is_everything_flushed= 1;
7994 translog_force_current_buffer_to_finish();
7995 translog_buffer_unlock(buffer);
7996 }
7997 else
7998 {
7999 if (log_descriptor.bc.buffer->last_lsn == LSN_IMPOSSIBLE)
8000 {
8001 /*
8002 In this case both last_lsn & prev_last_lsn are LSN_IMPOSSIBLE
8003 otherwise it will go in the first IF because LSN_IMPOSSIBLE less
8004 then any real LSN and cmp_translog_addr(*lsn,
8005 log_descriptor.bc.buffer->prev_last_lsn) will be TRUE
8006 */
8007 DBUG_ASSERT(log_descriptor.bc.buffer->prev_last_lsn ==
8008 LSN_IMPOSSIBLE);
8009 DBUG_PRINT("info", ("There is no LSNs yet generated => do nothing"));
8010 translog_unlock();
8011 DBUG_VOID_RETURN;
8012 }
8013
8014 DBUG_ASSERT(log_descriptor.bc.buffer->prev_last_lsn != LSN_IMPOSSIBLE);
8015 /* fix lsn if it was horizon */
8016 *lsn= log_descriptor.bc.buffer->prev_last_lsn;
8017 DBUG_PRINT("info", ("LSN to flush fixed to prev last lsn: " LSN_FMT,
8018 LSN_IN_PARTS(*lsn)));
8019 last_buffer_no= ((log_descriptor.bc.buffer_no + TRANSLOG_BUFFERS_NO -1) %
8020 TRANSLOG_BUFFERS_NO);
8021 translog_unlock();
8022 }
8023 /* flush buffers */
8024 *sent_to_disk= translog_get_sent_to_disk();
8025 if (cmp_translog_addr(*lsn, *sent_to_disk) > 0)
8026 {
8027
8028 DBUG_PRINT("info", ("Start buffer #: %u last buffer #: %u",
8029 (uint) start_buffer_no, (uint) last_buffer_no));
8030 last_buffer_no= (last_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
8031 i= start_buffer_no;
8032 do
8033 {
8034 struct st_translog_buffer *buffer= log_descriptor.buffers + i;
8035 translog_buffer_lock(buffer);
8036 DBUG_PRINT("info", ("Check buffer: %p #: %u "
8037 "prev last LSN: " LSN_FMT " "
8038 "last LSN: " LSN_FMT " status: %s",
8039 buffer,
8040 (uint) i,
8041 LSN_IN_PARTS(buffer->prev_last_lsn),
8042 LSN_IN_PARTS(buffer->last_lsn),
8043 (buffer->file ?
8044 "dirty" : "closed")));
8045 if (buffer->prev_last_lsn <= *lsn &&
8046 buffer->file != NULL)
8047 {
8048 DBUG_ASSERT(*flush_horizon <= buffer->offset + buffer->size);
8049 *flush_horizon= (buffer->pre_force_close_horizon != LSN_IMPOSSIBLE ?
8050 buffer->pre_force_close_horizon :
8051 buffer->offset + buffer->size);
8052 /* pre_force_close_horizon is reset during new buffer start */
8053 DBUG_PRINT("info", ("flush_horizon: " LSN_FMT,
8054 LSN_IN_PARTS(*flush_horizon)));
8055 DBUG_ASSERT(*flush_horizon <= log_descriptor.horizon);
8056
8057 translog_buffer_flush(buffer);
8058 }
8059 translog_buffer_unlock(buffer);
8060 i= (i + 1) % TRANSLOG_BUFFERS_NO;
8061 } while (i != last_buffer_no);
8062 *sent_to_disk= translog_get_sent_to_disk();
8063 }
8064
8065 DBUG_VOID_RETURN;
8066 }
8067
8068 /**
8069 @brief Flush the log up to given LSN (included)
8070
8071 @param lsn log record serial number up to which (inclusive)
8072 the log has to be flushed
8073
8074 @return Operation status
8075 @retval 0 OK
8076 @retval 1 Error
8077
8078 @note
8079
8080 - Non group commit logic: Commits made in passes. Thread which started
8081 flush first is performing actual flush, other threads sets new goal (LSN)
8082 of the next pass (if it is maximum) and waits for the pass end or just
8083 wait for the pass end.
8084
8085 - If hard group commit enabled and rate set to zero:
8086 The first thread sends all changed buffers to disk. This is repeated
8087 as long as there are new LSNs added. The process can not loop
8088 forever because we have limited number of threads and they will wait
8089 for the data to be synced.
8090 Pseudo code:
8091
8092 do
8093 send changed buffers to disk
8094 while new_goal
8095 sync
8096
8097 - If hard group commit switched ON and less than rate microseconds has
8098 passed from last sync, then after buffers have been sent to disk
8099 wait until rate microseconds has passed since last sync, do sync and return.
8100 This ensures that if we call sync infrequently we don't do any waits.
8101
8102 - If soft group commit enabled everything works as with 'non group commit'
8103 but the thread doesn't do any real sync(). If rate is not zero the
8104 sync() will be performed by a service thread with the given rate
8105 when needed (new LSN appears).
8106
8107 @note Terminology:
8108 'sent to disk' means written to disk but not sync()ed,
8109 'flushed' mean sent to disk and synced().
8110 */
8111
translog_flush(TRANSLOG_ADDRESS lsn)8112 my_bool translog_flush(TRANSLOG_ADDRESS lsn)
8113 {
8114 struct timespec abstime;
8115 ulonglong UNINIT_VAR(flush_interval);
8116 ulonglong time_spent;
8117 LSN sent_to_disk= LSN_IMPOSSIBLE;
8118 TRANSLOG_ADDRESS flush_horizon;
8119 my_bool rc= 0;
8120 my_bool hgroup_commit_at_start;
8121 DBUG_ENTER("translog_flush");
8122 DBUG_PRINT("enter", ("Flush up to LSN: " LSN_FMT, LSN_IN_PARTS(lsn)));
8123 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
8124 translog_status == TRANSLOG_READONLY);
8125
8126 mysql_mutex_lock(&log_descriptor.log_flush_lock);
8127 DBUG_PRINT("info", ("Everything is flushed up to " LSN_FMT,
8128 LSN_IN_PARTS(log_descriptor.flushed)));
8129 if (cmp_translog_addr(log_descriptor.flushed, lsn) >= 0)
8130 {
8131 mysql_mutex_unlock(&log_descriptor.log_flush_lock);
8132 DBUG_RETURN(0);
8133 }
8134 if (log_descriptor.flush_in_progress)
8135 {
8136 translog_lock();
8137 /* fix lsn if it was horizon */
8138 if (cmp_translog_addr(lsn, log_descriptor.bc.buffer->last_lsn) > 0)
8139 lsn= BUFFER_MAX_LSN(log_descriptor.bc.buffer);
8140 translog_unlock();
8141 translog_flush_set_new_goal_and_wait(lsn);
8142 if (!pthread_equal(log_descriptor.max_lsn_requester, pthread_self()))
8143 {
8144 /*
8145 translog_flush_wait_for_end() release log_flush_lock while is
8146 waiting then acquire it again
8147 */
8148 translog_flush_wait_for_end(lsn);
8149 mysql_mutex_unlock(&log_descriptor.log_flush_lock);
8150 DBUG_RETURN(0);
8151 }
8152 log_descriptor.next_pass_max_lsn= LSN_IMPOSSIBLE;
8153 }
8154 log_descriptor.flush_in_progress= 1;
8155 flush_horizon= log_descriptor.previous_flush_horizon;
8156 DBUG_PRINT("info", ("flush_in_progress is set, flush_horizon: " LSN_FMT,
8157 LSN_IN_PARTS(flush_horizon)));
8158 mysql_mutex_unlock(&log_descriptor.log_flush_lock);
8159
8160 hgroup_commit_at_start= hard_group_commit;
8161 if (hgroup_commit_at_start)
8162 flush_interval= group_commit_wait;
8163
8164 translog_lock();
8165 if (log_descriptor.is_everything_flushed)
8166 {
8167 DBUG_PRINT("info", ("everything is flushed"));
8168 translog_unlock();
8169 mysql_mutex_lock(&log_descriptor.log_flush_lock);
8170 goto out;
8171 }
8172
8173 for (;;)
8174 {
8175 /* Following function flushes buffers and makes translog_unlock() */
8176 translog_flush_buffers(&lsn, &sent_to_disk, &flush_horizon);
8177
8178 if (!hgroup_commit_at_start)
8179 break; /* flush pass is ended */
8180
8181 retest:
8182 /*
8183 We do not check time here because mysql_mutex_lock rarely takes
8184 a lot of time so we can sacrifice a bit precision to performance
8185 (taking into account that microsecond_interval_timer() might be
8186 expensive call).
8187 */
8188 if (flush_interval == 0)
8189 break; /* flush pass is ended */
8190
8191 mysql_mutex_lock(&log_descriptor.log_flush_lock);
8192 if (log_descriptor.next_pass_max_lsn == LSN_IMPOSSIBLE)
8193 {
8194 if (flush_interval == 0 ||
8195 (time_spent= (microsecond_interval_timer() - flush_start)) >=
8196 flush_interval)
8197 {
8198 mysql_mutex_unlock(&log_descriptor.log_flush_lock);
8199 break;
8200 }
8201 DBUG_PRINT("info", ("flush waits: %llu interval: %llu spent: %llu",
8202 flush_interval - time_spent,
8203 flush_interval, time_spent));
8204 /* wait time or next goal */
8205 set_timespec_nsec(abstime, flush_interval - time_spent);
8206 mysql_cond_timedwait(&log_descriptor.new_goal_cond,
8207 &log_descriptor.log_flush_lock,
8208 &abstime);
8209 mysql_mutex_unlock(&log_descriptor.log_flush_lock);
8210 DBUG_PRINT("info", ("retest conditions"));
8211 goto retest;
8212 }
8213
8214 /* take next goal */
8215 lsn= log_descriptor.next_pass_max_lsn;
8216 log_descriptor.next_pass_max_lsn= LSN_IMPOSSIBLE;
8217 /* prevent other thread from continue */
8218 log_descriptor.max_lsn_requester= pthread_self();
8219 DBUG_PRINT("info", ("flush took next goal: " LSN_FMT,
8220 LSN_IN_PARTS(lsn)));
8221 mysql_mutex_unlock(&log_descriptor.log_flush_lock);
8222
8223 /* next flush pass */
8224 DBUG_PRINT("info", ("next flush pass"));
8225 translog_lock();
8226 }
8227
8228 /*
8229 sync() files from previous flush till current one
8230 */
8231 if (!soft_sync || hgroup_commit_at_start)
8232 {
8233 if ((rc=
8234 translog_sync_files(LSN_FILE_NO(log_descriptor.flushed),
8235 LSN_FILE_NO(lsn),
8236 sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS &&
8237 (LSN_FILE_NO(log_descriptor.
8238 previous_flush_horizon) !=
8239 LSN_FILE_NO(flush_horizon) ||
8240 (LSN_OFFSET(log_descriptor.
8241 previous_flush_horizon) /
8242 TRANSLOG_PAGE_SIZE) !=
8243 (LSN_OFFSET(flush_horizon) /
8244 TRANSLOG_PAGE_SIZE)))))
8245 {
8246 sent_to_disk= LSN_IMPOSSIBLE;
8247 mysql_mutex_lock(&log_descriptor.log_flush_lock);
8248 goto out;
8249 }
8250 /* keep values for soft sync() and forced sync() actual */
8251 {
8252 uint32 fileno= LSN_FILE_NO(lsn);
8253 soft_sync_min= fileno;
8254 soft_sync_max= fileno;
8255 }
8256 }
8257 else
8258 {
8259 soft_sync_max= LSN_FILE_NO(lsn);
8260 soft_need_sync= 1;
8261 }
8262
8263 DBUG_ASSERT(flush_horizon <= log_descriptor.horizon);
8264
8265 mysql_mutex_lock(&log_descriptor.log_flush_lock);
8266 log_descriptor.previous_flush_horizon= flush_horizon;
8267 out:
8268 if (sent_to_disk != LSN_IMPOSSIBLE)
8269 log_descriptor.flushed= sent_to_disk;
8270 log_descriptor.flush_in_progress= 0;
8271 log_descriptor.flush_no++;
8272 DBUG_PRINT("info", ("flush_in_progress is dropped"));
8273 mysql_mutex_unlock(&log_descriptor.log_flush_lock);
8274 mysql_cond_broadcast(&log_descriptor.log_flush_cond);
8275 DBUG_RETURN(rc);
8276 }
8277
8278
8279 /**
8280 @brief Gives a 2-byte-id to MARIA_SHARE and logs this fact
8281
8282 If a MARIA_SHARE does not yet have a 2-byte-id (unique over all currently
8283 open MARIA_SHAREs), give it one and record this assignment in the log
8284 (LOGREC_FILE_ID log record).
8285
8286 @param tbl_info table
8287 @param trn calling transaction
8288
8289 @return Operation status
8290 @retval 0 OK
8291 @retval 1 Error
8292
8293 @note Can be called even if share already has an id (then will do nothing)
8294 */
8295
translog_assign_id_to_share(MARIA_HA * tbl_info,TRN * trn)8296 int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn)
8297 {
8298 uint16 id;
8299 MARIA_SHARE *share= tbl_info->s;
8300 /*
8301 If you give an id to a non-BLOCK_RECORD table, you also need to release
8302 this id somewhere. Then you can change the assertion.
8303 */
8304 DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
8305 /* re-check under mutex to avoid having 2 ids for the same share */
8306 mysql_mutex_lock(&share->intern_lock);
8307 if (unlikely(share->id == 0))
8308 {
8309 LSN lsn;
8310 LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
8311 uchar log_data[FILEID_STORE_SIZE];
8312 /* Inspired by set_short_trid() of trnman.c */
8313 uint i= share->kfile.file % SHARE_ID_MAX + 1;
8314 id= 0;
8315 do
8316 {
8317 for ( ; i <= SHARE_ID_MAX ; i++) /* the range is [1..SHARE_ID_MAX] */
8318 {
8319 void *tmp= NULL;
8320 if (id_to_share[i] == NULL &&
8321 my_atomic_casptr((void **)&id_to_share[i], &tmp, share))
8322 {
8323 id= (uint16) i;
8324 break;
8325 }
8326 }
8327 i= 1; /* scan the whole array */
8328 } while (id == 0);
8329 DBUG_PRINT("info", ("id_to_share: %p -> %u", share, id));
8330 fileid_store(log_data, id);
8331 log_array[TRANSLOG_INTERNAL_PARTS + 0].str= log_data;
8332 log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
8333 /*
8334 open_file_name is an unresolved name (symlinks are not resolved, datadir
8335 is not realpath-ed, etc) which is good: the log can be moved to another
8336 directory and continue working.
8337 */
8338 log_array[TRANSLOG_INTERNAL_PARTS + 1].str=
8339 (uchar *)share->open_file_name.str;
8340 log_array[TRANSLOG_INTERNAL_PARTS + 1].length=
8341 share->open_file_name.length + 1;
8342 /*
8343 We can't unlock share->intern_lock before the log entry is written to
8344 ensure no one uses the id before it's logged.
8345 */
8346 if (unlikely(translog_write_record(&lsn, LOGREC_FILE_ID, trn, tbl_info,
8347 (translog_size_t)
8348 (sizeof(log_data) +
8349 log_array[TRANSLOG_INTERNAL_PARTS +
8350 1].length),
8351 sizeof(log_array)/sizeof(log_array[0]),
8352 log_array, NULL, NULL)))
8353 {
8354 mysql_mutex_unlock(&share->intern_lock);
8355 return 1;
8356 }
8357 /*
8358 Now when translog record is done, we can set share->id.
8359 If we set it before, then translog_write_record may pick up the id
8360 before it's written to the log.
8361 */
8362 share->id= id;
8363 share->state.logrec_file_id= lsn;
8364 }
8365 mysql_mutex_unlock(&share->intern_lock);
8366 return 0;
8367 }
8368
8369
8370 /**
8371 @brief Recycles a MARIA_SHARE's short id.
8372
8373 @param share table
8374
8375 @note Must be called only if share has an id (i.e. id != 0)
8376 */
8377
translog_deassign_id_from_share(MARIA_SHARE * share)8378 void translog_deassign_id_from_share(MARIA_SHARE *share)
8379 {
8380 DBUG_PRINT("info", ("id_to_share: %p id %u -> 0",
8381 share, share->id));
8382 /*
8383 We don't need any mutex as we are called only when closing the last
8384 instance of the table or at the end of REPAIR: no writes can be
8385 happening. But a Checkpoint may be reading share->id, so we require this
8386 mutex:
8387 */
8388 mysql_mutex_assert_owner(&share->intern_lock);
8389 my_atomic_storeptr((void **)&id_to_share[share->id], 0);
8390 share->id= 0;
8391 /* useless but safety: */
8392 share->lsn_of_file_id= LSN_IMPOSSIBLE;
8393 }
8394
8395
translog_assign_id_to_share_from_recovery(MARIA_SHARE * share,uint16 id)8396 void translog_assign_id_to_share_from_recovery(MARIA_SHARE *share,
8397 uint16 id)
8398 {
8399 DBUG_ASSERT(maria_in_recovery && !maria_multi_threaded);
8400 DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
8401 DBUG_ASSERT(share->id == 0);
8402 DBUG_ASSERT(id_to_share[id] == NULL);
8403 id_to_share[share->id= id]= share;
8404 }
8405
8406
8407 /**
8408 @brief check if such log file exists
8409
8410 @param file_no number of the file to test
8411
8412 @retval 0 no such file
8413 @retval 1 there is file with such number
8414 */
8415
translog_is_file(uint file_no)8416 my_bool translog_is_file(uint file_no)
8417 {
8418 MY_STAT stat_buff;
8419 char path[FN_REFLEN];
8420 return (MY_TEST(mysql_file_stat(key_file_translog,
8421 translog_filename_by_fileno(file_no, path),
8422 &stat_buff, MYF(0))));
8423 }
8424
8425
8426 /**
8427 @brief returns minimum log file number
8428
8429 @param horizon the end of the log
8430 @param is_protected true if it is under purge_log protection
8431
8432 @retval minimum file number
8433 @retval 0 no files found
8434 */
8435
translog_first_file(TRANSLOG_ADDRESS horizon,int is_protected)8436 static uint32 translog_first_file(TRANSLOG_ADDRESS horizon, int is_protected)
8437 {
8438 uint min_file= 0, max_file;
8439 DBUG_ENTER("translog_first_file");
8440 if (!is_protected)
8441 mysql_mutex_lock(&log_descriptor.purger_lock);
8442 if (log_descriptor.min_file_number &&
8443 translog_is_file(log_descriptor.min_file_number))
8444 {
8445 DBUG_PRINT("info", ("cached %lu",
8446 (ulong) log_descriptor.min_file_number));
8447 if (!is_protected)
8448 mysql_mutex_unlock(&log_descriptor.purger_lock);
8449 DBUG_RETURN(log_descriptor.min_file_number);
8450 }
8451
8452 max_file= LSN_FILE_NO(horizon);
8453
8454 /* binary search for last file */
8455 while (min_file != max_file && min_file != (max_file - 1))
8456 {
8457 uint test= (min_file + max_file) / 2;
8458 DBUG_PRINT("info", ("min_file: %u test: %u max_file: %u",
8459 min_file, test, max_file));
8460 if (test == max_file)
8461 test--;
8462 if (translog_is_file(test))
8463 max_file= test;
8464 else
8465 min_file= test;
8466 }
8467 log_descriptor.min_file_number= max_file;
8468 if (!is_protected)
8469 mysql_mutex_unlock(&log_descriptor.purger_lock);
8470 DBUG_PRINT("info", ("first file :%lu", (ulong) max_file));
8471 DBUG_ASSERT(max_file >= 1);
8472 DBUG_RETURN(max_file);
8473 }
8474
8475
8476 /**
8477 @brief returns the most close LSN higher the given chunk address
8478
8479 @param addr the chunk address to start from
8480 @param horizon the horizon if it is known or LSN_IMPOSSIBLE
8481
8482 @retval LSN_ERROR Error
8483 @retval LSN_IMPOSSIBLE no LSNs after the address
8484 @retval # LSN of the most close LSN higher the given chunk address
8485 */
8486
translog_next_LSN(TRANSLOG_ADDRESS addr,TRANSLOG_ADDRESS horizon)8487 LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon)
8488 {
8489 TRANSLOG_SCANNER_DATA scanner;
8490 LSN result;
8491 DBUG_ENTER("translog_next_LSN");
8492
8493 if (horizon == LSN_IMPOSSIBLE)
8494 horizon= translog_get_horizon();
8495
8496 if (addr == horizon)
8497 DBUG_RETURN(LSN_IMPOSSIBLE);
8498
8499 translog_scanner_init(addr, 0, &scanner, 1);
8500 /*
8501 addr can point not to a chunk beginning but page end so next
8502 page beginning.
8503 */
8504 if (addr % TRANSLOG_PAGE_SIZE == 0)
8505 {
8506 /*
8507 We are emulating the page end which cased such horizon value to
8508 trigger translog_scanner_eop().
8509
8510 We can't just increase addr on page header overhead because it
8511 can be file end so we allow translog_get_next_chunk() to skip
8512 to the next page in correct way
8513 */
8514 scanner.page_addr-= TRANSLOG_PAGE_SIZE;
8515 scanner.page_offset= TRANSLOG_PAGE_SIZE;
8516 #ifndef DBUG_OFF
8517 scanner.page= NULL; /* prevent using incorrect page content */
8518 #endif
8519 }
8520 /* addr can point not to a chunk beginning but to a page end */
8521 if (translog_scanner_eop(&scanner))
8522 {
8523 if (translog_get_next_chunk(&scanner))
8524 {
8525 result= LSN_ERROR;
8526 goto out;
8527 }
8528 if (scanner.page == END_OF_LOG)
8529 {
8530 result= LSN_IMPOSSIBLE;
8531 goto out;
8532 }
8533 }
8534
8535 while (!translog_is_LSN_chunk(scanner.page[scanner.page_offset]) &&
8536 scanner.page[scanner.page_offset] != TRANSLOG_FILLER)
8537 {
8538 if (translog_get_next_chunk(&scanner))
8539 {
8540 result= LSN_ERROR;
8541 goto out;
8542 }
8543 if (scanner.page == END_OF_LOG)
8544 {
8545 result= LSN_IMPOSSIBLE;
8546 goto out;
8547 }
8548 }
8549
8550 if (scanner.page[scanner.page_offset] == TRANSLOG_FILLER)
8551 result= LSN_IMPOSSIBLE; /* reached page filler */
8552 else
8553 result= scanner.page_addr + scanner.page_offset;
8554 out:
8555 translog_destroy_scanner(&scanner);
8556 DBUG_RETURN(result);
8557 }
8558
8559
8560 /**
8561 @brief returns the LSN of the first record starting in this log
8562
8563 @retval LSN_ERROR Error
8564 @retval LSN_IMPOSSIBLE no log or the log is empty
8565 @retval # LSN of the first record
8566 */
8567
translog_first_lsn_in_log()8568 LSN translog_first_lsn_in_log()
8569 {
8570 TRANSLOG_ADDRESS addr, horizon= translog_get_horizon();
8571 TRANSLOG_VALIDATOR_DATA data;
8572 uint file;
8573 uint16 chunk_offset;
8574 uchar *page;
8575 DBUG_ENTER("translog_first_lsn_in_log");
8576 DBUG_PRINT("info", ("Horizon: " LSN_FMT, LSN_IN_PARTS(horizon)));
8577 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
8578 translog_status == TRANSLOG_READONLY);
8579
8580 if (!(file= translog_first_file(horizon, 0)))
8581 {
8582 /* log has no records yet */
8583 DBUG_RETURN(LSN_IMPOSSIBLE);
8584 }
8585
8586 addr= MAKE_LSN(file, TRANSLOG_PAGE_SIZE); /* the first page of the file */
8587 data.addr= &addr;
8588 {
8589 TRANSLOG_PAGE_SIZE_BUFF psize_buff;
8590 if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL ||
8591 (chunk_offset= translog_get_first_chunk_offset(page)) == 0)
8592 DBUG_RETURN(LSN_ERROR);
8593 }
8594 addr+= chunk_offset;
8595
8596 DBUG_RETURN(translog_next_LSN(addr, horizon));
8597 }
8598
8599
8600 /**
8601 @brief Returns theoretical first LSN if first log is present
8602
8603 @retval LSN_ERROR Error
8604 @retval LSN_IMPOSSIBLE no log
8605 @retval # LSN of the first record
8606 */
8607
translog_first_theoretical_lsn()8608 LSN translog_first_theoretical_lsn()
8609 {
8610 TRANSLOG_ADDRESS addr= translog_get_horizon();
8611 TRANSLOG_PAGE_SIZE_BUFF psize_buff;
8612 uchar *page;
8613 TRANSLOG_VALIDATOR_DATA data;
8614 DBUG_ENTER("translog_first_theoretical_lsn");
8615 DBUG_PRINT("info", ("Horizon: " LSN_FMT, LSN_IN_PARTS(addr)));
8616 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
8617 translog_status == TRANSLOG_READONLY);
8618
8619 if (!translog_is_file(1))
8620 DBUG_RETURN(LSN_IMPOSSIBLE);
8621 if (addr == MAKE_LSN(1, TRANSLOG_PAGE_SIZE))
8622 {
8623 /* log has no records yet */
8624 DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE +
8625 log_descriptor.page_overhead));
8626 }
8627
8628 addr= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); /* the first page of the file */
8629 data.addr= &addr;
8630 if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL)
8631 DBUG_RETURN(LSN_ERROR);
8632
8633 DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE +
8634 page_overhead[page[TRANSLOG_PAGE_FLAGS]]));
8635 }
8636
8637
8638 /**
8639 @brief Checks given low water mark and purge files if it is need
8640
8641 @param low the last (minimum) address which is need
8642
8643 @retval 0 OK
8644 @retval 1 Error
8645 */
8646
translog_purge(TRANSLOG_ADDRESS low)8647 my_bool translog_purge(TRANSLOG_ADDRESS low)
8648 {
8649 uint32 last_need_file= LSN_FILE_NO(low);
8650 uint32 min_unsync;
8651 int soft;
8652 TRANSLOG_ADDRESS horizon= translog_get_horizon();
8653 int rc= 0;
8654 DBUG_ENTER("translog_purge");
8655 DBUG_PRINT("enter", ("low: " LSN_FMT, LSN_IN_PARTS(low)));
8656 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
8657 translog_status == TRANSLOG_READONLY);
8658
8659 soft= soft_sync;
8660 min_unsync= soft_sync_min;
8661 DBUG_PRINT("info", ("min_unsync: %lu", (ulong) min_unsync));
8662 if (soft && min_unsync < last_need_file)
8663 {
8664 last_need_file= min_unsync;
8665 DBUG_PRINT("info", ("last_need_file set to %lu", (ulong)last_need_file));
8666 }
8667
8668 mysql_mutex_lock(&log_descriptor.purger_lock);
8669 DBUG_PRINT("info", ("last_lsn_checked file: %lu:",
8670 (ulong) log_descriptor.last_lsn_checked));
8671 if (LSN_FILE_NO(log_descriptor.last_lsn_checked) < last_need_file)
8672 {
8673 uint32 i;
8674 uint32 min_file= translog_first_file(horizon, 1);
8675 DBUG_ASSERT(min_file != 0); /* log is already started */
8676 DBUG_PRINT("info", ("min_file: %lu:",(ulong) min_file));
8677 for(i= min_file; i < last_need_file && rc == 0; i++)
8678 {
8679 LSN lsn= translog_get_file_max_lsn_stored(i);
8680 if (lsn == LSN_IMPOSSIBLE)
8681 break; /* files are still in writing */
8682 if (lsn == LSN_ERROR)
8683 {
8684 rc= 1;
8685 break;
8686 }
8687 if (cmp_translog_addr(lsn, low) >= 0)
8688 break;
8689
8690 DBUG_PRINT("info", ("purge file %lu", (ulong) i));
8691
8692 /* remove file descriptor from the cache */
8693 /*
8694 log_descriptor.min_file can be changed only here during execution
8695 and the function is serialized, so we can access it without problems
8696 */
8697 if (i >= log_descriptor.min_file)
8698 {
8699 TRANSLOG_FILE *file;
8700 mysql_rwlock_wrlock(&log_descriptor.open_files_lock);
8701 DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
8702 log_descriptor.open_files.elements);
8703 DBUG_ASSERT(log_descriptor.min_file == i);
8704 file= *((TRANSLOG_FILE **)pop_dynamic(&log_descriptor.open_files));
8705 DBUG_PRINT("info", ("Files : %d", log_descriptor.open_files.elements));
8706 DBUG_ASSERT(i == file->number);
8707 log_descriptor.min_file++;
8708 DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
8709 log_descriptor.open_files.elements);
8710 mysql_rwlock_unlock(&log_descriptor.open_files_lock);
8711 translog_close_log_file(file);
8712 }
8713 if (log_purge_type == TRANSLOG_PURGE_IMMIDIATE && ! log_purge_disabled)
8714 {
8715 char path[FN_REFLEN], *file_name;
8716 file_name= translog_filename_by_fileno(i, path);
8717 rc= MY_TEST(mysql_file_delete(key_file_translog,
8718 file_name, MYF(MY_WME)));
8719 }
8720 }
8721 if (unlikely(rc == 1))
8722 log_descriptor.min_need_file= 0; /* impossible value */
8723 else
8724 log_descriptor.min_need_file= i;
8725 }
8726
8727 mysql_mutex_unlock(&log_descriptor.purger_lock);
8728 DBUG_RETURN(rc);
8729 }
8730
8731
8732 /**
8733 @brief Purges files by stored min need file in case of
8734 "ondemend" purge type
8735
8736 @note This function do real work only if it is "ondemend" purge type
8737 and translog_purge() was called at least once and last time without
8738 errors
8739
8740 @retval 0 OK
8741 @retval 1 Error
8742 */
8743
translog_purge_at_flush()8744 my_bool translog_purge_at_flush()
8745 {
8746 uint32 i, min_file;
8747 int rc= 0;
8748 DBUG_ENTER("translog_purge_at_flush");
8749 DBUG_ASSERT(translog_status == TRANSLOG_OK ||
8750 translog_status == TRANSLOG_READONLY);
8751
8752 if (unlikely(translog_status == TRANSLOG_READONLY))
8753 {
8754 DBUG_PRINT("info", ("The log is read only => exit"));
8755 DBUG_RETURN(0);
8756 }
8757
8758 if (log_purge_type != TRANSLOG_PURGE_ONDEMAND)
8759 {
8760 DBUG_PRINT("info", ("It is not \"at_flush\" => exit"));
8761 DBUG_RETURN(0);
8762 }
8763
8764 mysql_mutex_lock(&log_descriptor.purger_lock);
8765
8766 if (unlikely(log_descriptor.min_need_file == 0 || log_purge_disabled))
8767 {
8768 DBUG_PRINT("info", ("No info about min need file => exit"));
8769 mysql_mutex_unlock(&log_descriptor.purger_lock);
8770 DBUG_RETURN(0);
8771 }
8772
8773 min_file= translog_first_file(translog_get_horizon(), 1);
8774 DBUG_ASSERT(min_file != 0); /* log is already started */
8775 for(i= min_file; i < log_descriptor.min_need_file && rc == 0; i++)
8776 {
8777 char path[FN_REFLEN], *file_name;
8778 DBUG_PRINT("info", ("purge file %lu\n", (ulong) i));
8779 file_name= translog_filename_by_fileno(i, path);
8780 rc= MY_TEST(mysql_file_delete(key_file_translog,
8781 file_name, MYF(MY_WME)));
8782 }
8783
8784 mysql_mutex_unlock(&log_descriptor.purger_lock);
8785 DBUG_RETURN(rc);
8786 }
8787
8788
8789 /**
8790 @brief Gets min file number
8791
8792 @param horizon the end of the log
8793
8794 @retval minimum file number
8795 @retval 0 no files found
8796 */
8797
translog_get_first_file(TRANSLOG_ADDRESS horizon)8798 uint32 translog_get_first_file(TRANSLOG_ADDRESS horizon)
8799 {
8800 return translog_first_file(horizon, 0);
8801 }
8802
8803
8804 /**
8805 @brief Gets min file number which is needed
8806
8807 @retval minimum file number
8808 @retval 0 unknown
8809 */
8810
translog_get_first_needed_file()8811 uint32 translog_get_first_needed_file()
8812 {
8813 uint32 file_no;
8814 mysql_mutex_lock(&log_descriptor.purger_lock);
8815 file_no= log_descriptor.min_need_file;
8816 mysql_mutex_unlock(&log_descriptor.purger_lock);
8817 return file_no;
8818 }
8819
8820
8821 /**
8822 @brief Gets transaction log file size
8823
8824 @return transaction log file size
8825 */
8826
translog_get_file_size()8827 uint32 translog_get_file_size()
8828 {
8829 uint32 res;
8830 translog_lock();
8831 res= log_descriptor.log_file_max_size;
8832 translog_unlock();
8833 return (res);
8834 }
8835
8836
8837 /**
8838 @brief Sets transaction log file size
8839
8840 @return Returns actually set transaction log size
8841 */
8842
translog_set_file_size(uint32 size)8843 void translog_set_file_size(uint32 size)
8844 {
8845 struct st_translog_buffer *old_buffer= NULL;
8846 DBUG_ENTER("translog_set_file_size");
8847 translog_lock();
8848 DBUG_PRINT("enter", ("Size: %lu", (ulong) size));
8849 DBUG_ASSERT(size % TRANSLOG_PAGE_SIZE == 0);
8850 DBUG_ASSERT(size >= TRANSLOG_MIN_FILE_SIZE);
8851 log_descriptor.log_file_max_size= size;
8852 /* if current file longer then finish it*/
8853 if (LSN_OFFSET(log_descriptor.horizon) >= log_descriptor.log_file_max_size)
8854 {
8855 old_buffer= log_descriptor.bc.buffer;
8856 translog_buffer_next(&log_descriptor.horizon, &log_descriptor.bc, 1);
8857 translog_buffer_unlock(old_buffer);
8858 }
8859 translog_unlock();
8860 if (old_buffer)
8861 {
8862 translog_buffer_lock(old_buffer);
8863 translog_buffer_flush(old_buffer);
8864 translog_buffer_unlock(old_buffer);
8865 }
8866 DBUG_VOID_RETURN;
8867 }
8868
8869
8870 /**
8871 Write debug information to log if we EXTRA_DEBUG is enabled
8872 */
8873
translog_log_debug_info(TRN * trn,enum translog_debug_info_type type,uchar * info,size_t length)8874 my_bool translog_log_debug_info(TRN *trn __attribute__((unused)),
8875 enum translog_debug_info_type type
8876 __attribute__((unused)),
8877 uchar *info __attribute__((unused)),
8878 size_t length __attribute__((unused)))
8879 {
8880 #ifdef EXTRA_DEBUG
8881 LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
8882 uchar debug_type;
8883 LSN lsn;
8884
8885 if (!trn)
8886 {
8887 /*
8888 We can't log the current transaction because we don't have
8889 an active transaction. Use a temporary transaction object instead
8890 */
8891 trn= &dummy_transaction_object;
8892 }
8893 debug_type= (uchar) type;
8894 log_array[TRANSLOG_INTERNAL_PARTS + 0].str= &debug_type;
8895 log_array[TRANSLOG_INTERNAL_PARTS + 0].length= 1;
8896 log_array[TRANSLOG_INTERNAL_PARTS + 1].str= info;
8897 log_array[TRANSLOG_INTERNAL_PARTS + 1].length= length;
8898 return translog_write_record(&lsn, LOGREC_DEBUG_INFO,
8899 trn, NULL,
8900 (translog_size_t) (1+ length),
8901 sizeof(log_array)/sizeof(log_array[0]),
8902 log_array, NULL, NULL);
8903 #else
8904 return 0;
8905 #endif
8906 }
8907
8908
8909
8910 /**
8911 Sets soft sync mode
8912
8913 @param mode TRUE if we need switch soft sync on else off
8914 */
8915
translog_soft_sync(my_bool mode)8916 void translog_soft_sync(my_bool mode)
8917 {
8918 soft_sync= mode;
8919 }
8920
8921
8922 /**
8923 Sets hard group commit
8924
8925 @param mode TRUE if we need switch hard group commit on else off
8926 */
8927
translog_hard_group_commit(my_bool mode)8928 void translog_hard_group_commit(my_bool mode)
8929 {
8930 hard_group_commit= mode;
8931 }
8932
8933
8934 /**
8935 @brief forced log sync (used when we are switching modes)
8936 */
8937
translog_sync()8938 void translog_sync()
8939 {
8940 uint32 max= get_current_logfile()->number;
8941 uint32 min;
8942 DBUG_ENTER("ma_translog_sync");
8943
8944 min= soft_sync_min;
8945 if (!min)
8946 min= max;
8947
8948 translog_sync_files(min, max, sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS);
8949
8950 DBUG_VOID_RETURN;
8951 }
8952
8953
8954 /**
8955 @brief set rate for group commit
8956
8957 @param interval interval to set.
8958
8959 @note We use this function with additional variable because have to
8960 restart service thread with new value which we can't make inside changing
8961 variable routine (update_maria_group_commit_interval)
8962 */
8963
translog_set_group_commit_interval(uint32 interval)8964 void translog_set_group_commit_interval(uint32 interval)
8965 {
8966 DBUG_ENTER("translog_set_group_commit_interval");
8967 group_commit_wait= interval;
8968 DBUG_PRINT("info", ("wait: %llu",
8969 (ulonglong)group_commit_wait));
8970 DBUG_VOID_RETURN;
8971 }
8972
8973
8974 /**
8975 @brief syncing service thread
8976 */
8977
8978 static pthread_handler_t
ma_soft_sync_background(void * arg)8979 ma_soft_sync_background( void *arg __attribute__((unused)))
8980 {
8981
8982 my_thread_init();
8983 {
8984 DBUG_ENTER("ma_soft_sync_background");
8985 for(;;)
8986 {
8987 ulonglong prev_loop= microsecond_interval_timer();
8988 ulonglong time, sleep;
8989 uint32 min, max, sync_request;
8990 min= soft_sync_min;
8991 max= soft_sync_max;
8992 sync_request= soft_need_sync;
8993 soft_sync_min= max;
8994 soft_need_sync= 0;
8995
8996 sleep= group_commit_wait;
8997 if (sync_request)
8998 translog_sync_files(min, max, FALSE);
8999 time= microsecond_interval_timer() - prev_loop;
9000 if (time > sleep)
9001 sleep= 0;
9002 else
9003 sleep-= time;
9004 if (my_service_thread_sleep(&soft_sync_control, sleep))
9005 break;
9006 }
9007 my_thread_end();
9008 DBUG_RETURN(0);
9009 }
9010 }
9011
9012
9013 /**
9014 @brief Starts syncing thread
9015 */
9016
translog_soft_sync_start(void)9017 int translog_soft_sync_start(void)
9018 {
9019 int res= 0;
9020 uint32 min, max;
9021 DBUG_ENTER("translog_soft_sync_start");
9022
9023 /* check and init variables */
9024 min= soft_sync_min;
9025 max= soft_sync_max;
9026 if (!max)
9027 soft_sync_max= max= get_current_logfile()->number;
9028 if (!min)
9029 soft_sync_min= max;
9030 soft_need_sync= 1;
9031
9032 if (!(res= ma_service_thread_control_init(&soft_sync_control)))
9033 if ((res= mysql_thread_create(key_thread_soft_sync,
9034 &soft_sync_control.thread, NULL,
9035 ma_soft_sync_background, NULL)))
9036 soft_sync_control.killed= TRUE;
9037 DBUG_RETURN(res);
9038 }
9039
9040
9041 /**
9042 @brief Stops syncing thread
9043 */
9044
translog_soft_sync_end(void)9045 void translog_soft_sync_end(void)
9046 {
9047 DBUG_ENTER("translog_soft_sync_end");
9048 if (soft_sync_control.inited)
9049 {
9050 ma_service_thread_control_end(&soft_sync_control);
9051 }
9052 DBUG_VOID_RETURN;
9053 }
9054
9055
9056 /**
9057 @brief Dump information about file header page.
9058 */
9059
dump_header_page(uchar * buff)9060 static void dump_header_page(uchar *buff)
9061 {
9062 LOGHANDLER_FILE_INFO desc;
9063 char strbuff[21];
9064 translog_interpret_file_header(&desc, buff);
9065 printf(" This can be header page:\n"
9066 " Timestamp: %s\n"
9067 " Aria log version: %lu\n"
9068 " Server version: %lu\n"
9069 " Server id %lu\n"
9070 " Page size %lu\n",
9071 llstr(desc.timestamp, strbuff),
9072 desc.maria_version,
9073 desc.mysql_version,
9074 desc.server_id,
9075 desc.page_size);
9076 if (desc.page_size != TRANSLOG_PAGE_SIZE)
9077 printf(" WARNING: page size is not equal compiled in one %lu!!!\n",
9078 (ulong) TRANSLOG_PAGE_SIZE);
9079 printf(" File number %lu\n"
9080 " Max lsn: " LSN_FMT "\n",
9081 desc.file_number,
9082 LSN_IN_PARTS(desc.max_lsn));
9083 }
9084
9085 static const char *record_class_string[]=
9086 {
9087 "LOGRECTYPE_NOT_ALLOWED",
9088 "LOGRECTYPE_VARIABLE_LENGTH",
9089 "LOGRECTYPE_PSEUDOFIXEDLENGTH",
9090 "LOGRECTYPE_FIXEDLENGTH"
9091 };
9092
9093
9094 /**
9095 @brief dump information about transaction log chunk
9096
9097 @param buffer reference to the whole page
9098 @param ptr pointer to the chunk
9099
9100 @reval # reference to the next chunk
9101 @retval NULL can't interpret data
9102 */
9103
dump_chunk(uchar * buffer,uchar * ptr)9104 static uchar *dump_chunk(uchar *buffer, uchar *ptr)
9105 {
9106 uint length;
9107 if (*ptr == TRANSLOG_FILLER)
9108 {
9109 printf(" Filler till the page end\n");
9110 for (; ptr < buffer + TRANSLOG_PAGE_SIZE; ptr++)
9111 {
9112 if (*ptr != TRANSLOG_FILLER)
9113 {
9114 printf(" WARNING: non filler character met before page end "
9115 "(page + 0x%04x: 0x%02x) (stop interpretation)!!!",
9116 (uint) (ptr - buffer), (uint) ptr[0]);
9117 return NULL;
9118 }
9119 }
9120 return ptr;
9121 }
9122 if (*ptr == 0 || *ptr == 0xFF)
9123 {
9124 printf(" WARNING: chunk can't start from 0x0 "
9125 "(stop interpretation)!!!\n");
9126 return NULL;
9127 }
9128 switch (ptr[0] & TRANSLOG_CHUNK_TYPE) {
9129 case TRANSLOG_CHUNK_LSN:
9130 printf(" LSN chunk type 0 (variable length)\n");
9131 if (likely((ptr[0] & TRANSLOG_REC_TYPE) != TRANSLOG_CHUNK_0_CONT))
9132 {
9133 printf(" Record type %u: %s record class %s compressed LSNs: %u\n",
9134 ptr[0] & TRANSLOG_REC_TYPE,
9135 (log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name ?
9136 log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name :
9137 "NULL"),
9138 record_class_string[log_record_type_descriptor[ptr[0] &
9139 TRANSLOG_REC_TYPE].
9140 rclass],
9141 log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].
9142 compressed_LSN);
9143 if (log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].rclass !=
9144 LOGRECTYPE_VARIABLE_LENGTH)
9145 {
9146 printf(" WARNING: this record class here can't be used "
9147 "(stop interpretation)!!!\n");
9148 break;
9149 }
9150 }
9151 else
9152 printf(" Continuation of previous chunk 0 header \n");
9153 printf(" Short transaction id: %u\n", (uint) uint2korr(ptr + 1));
9154 {
9155 uchar *hdr_ptr= ptr + 1 + 2; /* chunk type and short trid */
9156 uint16 chunk_len;
9157 printf (" Record length: %lu\n",
9158 (ulong) translog_variable_record_1group_decode_len(&hdr_ptr));
9159 chunk_len= uint2korr(hdr_ptr);
9160 if (chunk_len == 0)
9161 printf (" It is 1 group record (chunk length == 0)\n");
9162 else
9163 {
9164 uint16 groups, i;
9165
9166 printf (" Chunk length %u\n", (uint) chunk_len);
9167 groups= uint2korr(hdr_ptr + 2);
9168 hdr_ptr+= 4;
9169 printf (" Number of groups left to the end %u:\n", (uint) groups);
9170 for(i= 0;
9171 i < groups && hdr_ptr < buffer + TRANSLOG_PAGE_SIZE;
9172 i++, hdr_ptr+= LSN_STORE_SIZE + 1)
9173 {
9174 TRANSLOG_ADDRESS gpr_addr= lsn_korr(hdr_ptr);
9175 uint pages= hdr_ptr[LSN_STORE_SIZE];
9176 printf (" Group +#%u: " LSN_FMT " pages: %u\n",
9177 (uint) i, LSN_IN_PARTS(gpr_addr), pages);
9178 }
9179 }
9180 }
9181 break;
9182 case TRANSLOG_CHUNK_FIXED:
9183 printf(" LSN chunk type 1 (fixed size)\n");
9184 printf(" Record type %u: %s record class %s compressed LSNs: %u\n",
9185 ptr[0] & TRANSLOG_REC_TYPE,
9186 (log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name ?
9187 log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name :
9188 "NULL"),
9189 record_class_string[log_record_type_descriptor[ptr[0] &
9190 TRANSLOG_REC_TYPE].
9191 rclass],
9192 log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].
9193 compressed_LSN);
9194 if (log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].rclass !=
9195 LOGRECTYPE_PSEUDOFIXEDLENGTH &&
9196 log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].rclass !=
9197 LOGRECTYPE_FIXEDLENGTH)
9198 {
9199 printf(" WARNING: this record class here can't be used "
9200 "(stop interpretation)!!!\n");
9201 }
9202 printf(" Short transaction id: %u\n", (uint) uint2korr(ptr + 1));
9203 break;
9204 case TRANSLOG_CHUNK_NOHDR:
9205 printf(" No header chunk type 2(till the end of the page)\n");
9206 if (ptr[0] & TRANSLOG_REC_TYPE)
9207 {
9208 printf(" WARNING: chunk header content record type: 0x%02x "
9209 "(dtop interpretation)!!!",
9210 (uint) ptr[0]);
9211 return NULL;
9212 }
9213 break;
9214 case TRANSLOG_CHUNK_LNGTH:
9215 printf(" Chunk with length type 3\n");
9216 if (ptr[0] & TRANSLOG_REC_TYPE)
9217 {
9218 printf(" WARNING: chunk header content record type: 0x%02x "
9219 "(dtop interpretation)!!!",
9220 (uint) ptr[0]);
9221 return NULL;
9222 }
9223 break;
9224 }
9225 {
9226 intptr offset= ptr - buffer;
9227 DBUG_ASSERT(offset <= UINT_MAX16);
9228 length= translog_get_total_chunk_length(buffer, (uint16)offset);
9229 }
9230 printf(" Length %u\n", length);
9231 ptr+= length;
9232 return ptr;
9233 }
9234
9235
9236 /**
9237 @brief Dump information about page with data.
9238 */
9239
dump_datapage(uchar * buffer,File handler)9240 static void dump_datapage(uchar *buffer, File handler)
9241 {
9242 uchar *ptr;
9243 ulong offset;
9244 uint32 page, file;
9245 uint header_len;
9246 printf(" Page: %ld File number: %ld\n",
9247 (ulong) (page= uint3korr(buffer)),
9248 (ulong) (file= uint3korr(buffer + 3)));
9249 if (page == 0)
9250 printf(" WARNING: page == 0!!!\n");
9251 if (file == 0)
9252 printf(" WARNING: file == 0!!!\n");
9253 offset= page * TRANSLOG_PAGE_SIZE;
9254 printf(" Flags (0x%x):\n", (uint) buffer[TRANSLOG_PAGE_FLAGS]);
9255 if (buffer[TRANSLOG_PAGE_FLAGS])
9256 {
9257 if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC)
9258 printf(" Page CRC\n");
9259 if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION)
9260 printf(" Sector protection\n");
9261 if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_RECORD_CRC)
9262 printf(" Record CRC (WARNING: not yet implemented!!!)\n");
9263 if (buffer[TRANSLOG_PAGE_FLAGS] & ~(TRANSLOG_PAGE_CRC |
9264 TRANSLOG_SECTOR_PROTECTION |
9265 TRANSLOG_RECORD_CRC))
9266 {
9267 printf(" WARNING: unknown flags (stop interpretation)!!!\n");
9268 return;
9269 }
9270 }
9271 else
9272 printf(" No flags\n");
9273 printf(" Page header length: %u\n",
9274 (header_len= page_overhead[buffer[TRANSLOG_PAGE_FLAGS]]));
9275 if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_RECORD_CRC)
9276 {
9277 uint32 crc= uint4korr(buffer + TRANSLOG_PAGE_FLAGS + 1);
9278 uint32 ccrc;
9279 printf (" Page CRC 0x%04lx\n", (ulong) crc);
9280 ccrc= translog_crc(buffer + header_len, TRANSLOG_PAGE_SIZE - header_len);
9281 if (crc != ccrc)
9282 printf(" WARNING: calculated CRC: 0x%04lx!!!\n", (ulong) ccrc);
9283 }
9284 if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION)
9285 {
9286 TRANSLOG_FILE tfile;
9287 {
9288 uchar *table= buffer + header_len -
9289 TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
9290 uint i;
9291 printf(" Sector protection current value: 0x%02x\n", (uint) table[0]);
9292 for (i= 1; i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE; i++)
9293 {
9294 printf(" Sector protection in sector: 0x%02x saved value 0x%02x\n",
9295 (uint)buffer[i * DISK_DRIVE_SECTOR_SIZE],
9296 (uint)table[i]);
9297 }
9298 }
9299 tfile.number= file;
9300 bzero(&tfile.handler, sizeof(tfile.handler));
9301 tfile.handler.file= handler;
9302 tfile.was_recovered= 0;
9303 tfile.is_sync= 1;
9304 if (translog_check_sector_protection(buffer, &tfile))
9305 printf(" WARNING: sector protection found problems!!!\n");
9306 }
9307 ptr= buffer + header_len;
9308 while (ptr && ptr < buffer + TRANSLOG_PAGE_SIZE)
9309 {
9310 printf(" Chunk %d %lld:\n",
9311 file,((longlong) (ptr - buffer)+ offset));
9312 ptr= dump_chunk(buffer, ptr);
9313 }
9314 }
9315
9316
9317 /**
9318 @brief Dump information about page.
9319 */
9320
dump_page(uchar * buffer,File handler)9321 void dump_page(uchar *buffer, File handler)
9322 {
9323 if (strncmp((char*)maria_trans_file_magic, (char*)buffer,
9324 sizeof(maria_trans_file_magic)) == 0)
9325 {
9326 dump_header_page(buffer);
9327 }
9328 dump_datapage(buffer, handler);
9329 }
9330
9331
9332 /*
9333 Handle backup calls
9334 */
9335
translog_disable_purge()9336 void translog_disable_purge()
9337 {
9338 mysql_mutex_lock(&log_descriptor.purger_lock);
9339 log_purge_disabled++;
9340 mysql_mutex_unlock(&log_descriptor.purger_lock);
9341 }
9342
translog_enable_purge()9343 void translog_enable_purge()
9344 {
9345 mysql_mutex_lock(&log_descriptor.purger_lock);
9346 log_purge_disabled--;
9347 mysql_mutex_unlock(&log_descriptor.purger_lock);
9348 }
9349