1 /* Copyright (C) 2007 MySQL AB & Sanja Belkin. 2010 Monty Program Ab.
2    Copyright (c) 2020, MariaDB Corporation.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
16 
17 #include "maria_def.h"
18 #include "trnman.h"
19 #include "ma_blockrec.h" /* for some constants and in-write hooks */
20 #include "ma_key_recover.h" /* For some in-write hooks */
21 #include "ma_checkpoint.h"
22 #include "ma_servicethread.h"
23 #include "ma_recovery.h"
24 #include "ma_loghandler_lsn.h"
25 #include "ma_recovery_util.h"
26 
27 /*
28   On Windows, neither my_open() nor mysql_file_sync() work for directories.
29   Also there is no need to flush filesystem changes ,i.e to sync()
30   directories.
31 */
32 #ifdef __WIN__
33 #define sync_dir(A,B) 0
34 #else
35 #define sync_dir(A,B) mysql_file_sync(A,B)
36 #endif
37 
38 /**
39    @file
40    @brief Module which writes and reads to a transaction log
41 */
42 
43 /* 0xFF can never be valid first byte of a chunk */
44 #define TRANSLOG_FILLER 0xFF
45 
46 /* number of opened log files in the pagecache (should be at least 2) */
47 #define OPENED_FILES_NUM 3
48 #define CACHED_FILES_NUM 5
49 #define CACHED_FILES_NUM_DIRECT_SEARCH_LIMIT 7
50 #if CACHED_FILES_NUM > CACHED_FILES_NUM_DIRECT_SEARCH_LIMIT
51 #include <hash.h>
52 #include <m_ctype.h>
53 #endif
54 
55 /** @brief protects checkpoint_in_progress */
56 static mysql_mutex_t LOCK_soft_sync;
57 /** @brief for killing the background checkpoint thread */
58 static mysql_cond_t  COND_soft_sync;
59 /** @brief control structure for checkpoint background thread */
60 static MA_SERVICE_THREAD_CONTROL soft_sync_control=
61   {0, FALSE, FALSE, &LOCK_soft_sync, &COND_soft_sync};
62 
63 uint log_purge_disabled= 0;
64 
65 
66 /* transaction log file descriptor */
67 typedef struct st_translog_file
68 {
69   uint32 number;
70   PAGECACHE_FILE handler;
71   my_bool was_recovered;
72   my_bool is_sync;
73 } TRANSLOG_FILE;
74 
75 /* records buffer size (should be TRANSLOG_PAGE_SIZE * n) */
76 #define TRANSLOG_WRITE_BUFFER (1024*1024)
77 /*
78   pagecache_read/write/inject() use bmove512() on their buffers so those must
79   be long-aligned, which we guarantee by using the type below:
80 */
81 typedef union
82 {
83   ulonglong dummy;
84   uchar buffer[TRANSLOG_PAGE_SIZE];
85 } TRANSLOG_PAGE_SIZE_BUFF;
86 
87 #define MAX_TRUNSLOG_USED_BUFFERS 3
88 
89 typedef struct
90 {
91  struct st_translog_buffer *buff[MAX_TRUNSLOG_USED_BUFFERS];
92  uint8 wrt_ptr;
93  uint8 unlck_ptr;
94 } TRUNSLOG_USED_BUFFERS;
95 
96 static void
used_buffs_init(TRUNSLOG_USED_BUFFERS * buffs)97 used_buffs_init(TRUNSLOG_USED_BUFFERS *buffs)
98 {
99   buffs->unlck_ptr= buffs->wrt_ptr= 0;
100 }
101 
102 static void
103 used_buffs_add(TRUNSLOG_USED_BUFFERS *buffs,
104                 struct st_translog_buffer *buff);
105 
106 static void
107 used_buffs_register_unlock(TRUNSLOG_USED_BUFFERS *buffs,
108                            struct st_translog_buffer *buff);
109 
110 static void
111 used_buffs_urgent_unlock(TRUNSLOG_USED_BUFFERS *buffs);
112 
113 /* min chunk length */
114 #define TRANSLOG_MIN_CHUNK 3
115 /*
116   Number of buffers used by loghandler
117 
118   Should be at least 4, because one thread can block up to 2 buffers in
119   normal circumstances (less then half of one and full other, or just
120   switched one and other), But if we met end of the file in the middle and
121   have to switch buffer it will be 3.  + 1 buffer for flushing/writing.
122   We have a bigger number here for higher concurrency and to make division
123   faster.
124 
125   The number should be power of 2 to be fast.
126 */
127 #define TRANSLOG_BUFFERS_NO 8
128 /* number of bytes (+ header) which can be unused on first page in sequence */
129 #define TRANSLOG_MINCHUNK_CONTENT 1
130 /* version of log file */
131 #define TRANSLOG_VERSION_ID 10000               /* 1.00.00 */
132 
133 #define TRANSLOG_PAGE_FLAGS 6 /* transaction log page flags offset */
134 
135 /* Maximum length of compressed LSNs (the worst case of whole LSN storing) */
136 #define COMPRESSED_LSN_MAX_STORE_SIZE (2 + LSN_STORE_SIZE)
137 #define MAX_NUMBER_OF_LSNS_PER_RECORD 2
138 
139 
140 /* max lsn calculation for buffer */
141 #define BUFFER_MAX_LSN(B)  \
142   ((B)->last_lsn == LSN_IMPOSSIBLE ? (B)->prev_last_lsn : (B)->last_lsn)
143 
144 /* log write buffer descriptor */
145 struct st_translog_buffer
146 {
147   /*
148     Cache for current log. Comes first to be aligned for bmove512() in
149     pagecache_inject()
150   */
151   uchar buffer[TRANSLOG_WRITE_BUFFER];
152   /*
153     Maximum LSN of records which ends in this buffer (or IMPOSSIBLE_LSN
154     if no LSNs ends here)
155   */
156   LSN last_lsn;
157   /* last_lsn of previous buffer or IMPOSSIBLE_LSN if it is very first one */
158   LSN prev_last_lsn;
159   /* This buffer offset in the file */
160   TRANSLOG_ADDRESS offset;
161   /*
162     Next buffer offset in the file (it is not always offset + size,
163     in case of flush by LSN it can be offset + size - TRANSLOG_PAGE_SIZE)
164   */
165   TRANSLOG_ADDRESS next_buffer_offset;
166   /* Previous buffer offset to detect it flush finish */
167   TRANSLOG_ADDRESS prev_buffer_offset;
168   /*
169     If the buffer was forced to close it save value of its horizon
170     otherwise LSN_IMPOSSIBLE
171   */
172   TRANSLOG_ADDRESS pre_force_close_horizon;
173   /*
174      How much is written (or will be written when copy_to_buffer_in_progress
175      become 0) to this buffer
176   */
177   translog_size_t size;
178   /*
179      When moving from one log buffer to another, we write the last of the
180      previous buffer to file and then move to start using the new log
181      buffer.  In the case of a part filed last page, this page is not moved
182      to the start of the new buffer but instead we set the 'skip_data'
183      variable to tell us how much data at the beginning of the buffer is not
184      relevant.
185   */
186   uint skipped_data;
187   /* File handler for this buffer */
188   TRANSLOG_FILE *file;
189   /* Threads which are waiting for buffer filling/freeing */
190   mysql_cond_t waiting_filling_buffer;
191   /*
192     Number of records which are in copy progress.
193 
194     Controlled via translog_buffer_increase_writers() and
195     translog_buffer_decrease_writers().
196 
197     1 Simple case: translog_force_current_buffer_to_finish both called in
198     the same procedure.
199 
200     2 Simple case: translog_write_variable_record_1group:
201     translog_advance_pointer() increase writer of the buffer and
202     translog_buffer_decrease_writers() decrease it.
203 
204     Usual case:
205      1) translog_advance_pointer (i.e. reserve place for future writing)
206      increase writers for all buffers where place reserved.
207        Simpliest case: just all space reserved in one buffer
208        complex case: end of the first buffer, all second buffer, beginning
209        of the third buffer.
210      2) When we finish with writing translog_chaser_page_next() will be
211      called and unlock the buffer by decreasing number of writers.
212   */
213   uint copy_to_buffer_in_progress;
214   /* list of waiting buffer ready threads */
215   struct st_my_thread_var *waiting_flush;
216   /*
217     If true then previous buffer overlap with this one (due to flush of
218     loghandler, the last page of that buffer is the same as the first page
219     of this buffer) and have to be written first (because contain old
220     content of page which present in both buffers)
221   */
222   my_bool overlay;
223   uint buffer_no;
224   /*
225     Lock for the buffer.
226 
227     Current buffer also lock the whole handler (if one want lock the handler
228     one should lock the current buffer).
229 
230     Buffers are locked only in one direction (with overflow and beginning
231     from the first buffer). If we keep lock on buffer N we can lock only
232     buffer N+1 (never N-1).
233 
234     One thread do not lock more then 2 buffer in a time, so to make dead
235     lock it should be N thread (where N equal number of buffers) takes one
236     buffer and try to lock next. But it is impossible because there is only
237     2 cases when thread take 2 buffers: 1) one thread finishes current
238     buffer (where horizon is) and start next (to which horizon moves).  2)
239     flush start from buffer after current (oldest) and go till the current
240     crabbing by buffer sequence. And there is  only one flush in a moment
241     (they are serialised).
242 
243    Because of above and number of buffers equal 5 we can't get dead lock (it is
244    impossible to get all 5 buffers locked simultaneously).
245   */
246   mysql_mutex_t mutex;
247   /*
248     Some thread is going to close the buffer and it should be
249     done only by that thread
250   */
251   my_bool is_closing_buffer;
252   /*
253     Version of the buffer increases every time buffer the buffer flushed.
254     With file and offset it allow detect buffer changes
255   */
256   uint8 ver;
257 
258   /*
259     When previous buffer sent to disk it set its address here to allow
260     to detect when it is done
261     (we have to keep it in this buffer to lock buffers only in one direction).
262   */
263   TRANSLOG_ADDRESS prev_sent_to_disk;
264   mysql_cond_t prev_sent_to_disk_cond;
265 };
266 
267 
268 struct st_buffer_cursor
269 {
270   TRUNSLOG_USED_BUFFERS buffs;
271   /* pointer into the buffer */
272   uchar *ptr;
273   /* current buffer */
274   struct st_translog_buffer *buffer;
275   /* How many bytes we wrote on the current page */
276   uint16 current_page_fill;
277   /*
278     How many times we write the page on the disk during flushing process
279     (for sector protection).
280   */
281   uint16 write_counter;
282   /* previous write offset */
283   uint16 previous_offset;
284   /* Number of current buffer */
285   uint8 buffer_no;
286   /*
287     True if it is just filling buffer after advancing the pointer to
288     the horizon.
289   */
290   my_bool chaser;
291   /*
292     Is current page of the cursor already finished (sector protection
293     should be applied if it is needed)
294   */
295   my_bool protected;
296 };
297 
298 
299 typedef uint8 dirty_buffer_mask_t;
300 
301 struct st_translog_descriptor
302 {
303   /* *** Parameters of the log handler *** */
304 
305   /* Page cache for the log reads */
306   PAGECACHE *pagecache;
307   uint flags;
308   /* File open flags */
309   uint open_flags;
310   /* max size of one log size (for new logs creation) */
311   uint32 log_file_max_size;
312   uint32 server_version;
313   /* server ID (used for replication) */
314   uint32 server_id;
315   /* Loghandler's buffer capacity in case of chunk 2 filling */
316   uint32 buffer_capacity_chunk_2;
317   /*
318     Half of the buffer capacity in case of chunk 2 filling,
319     used to decide will we write a record in one group or many.
320     It is written to the variable just to avoid devision every
321     time we need it.
322   */
323   uint32 half_buffer_capacity_chunk_2;
324   /* Page overhead calculated by flags (whether CRC is enabled, etc) */
325   uint16 page_overhead;
326   /*
327     Page capacity ("useful load") calculated by flags
328     (TRANSLOG_PAGE_SIZE - page_overhead-1)
329   */
330   uint16 page_capacity_chunk_2;
331   /* Path to the directory where we store log store files */
332   char directory[FN_REFLEN];
333 
334   /* *** Current state of the log handler *** */
335   /* list of opened files */
336   DYNAMIC_ARRAY open_files;
337   /* min/max number of file in the array */
338   uint32 max_file, min_file;
339   /* the opened files list guard */
340   mysql_rwlock_t open_files_lock;
341 
342   /*
343     File descriptor of the directory where we store log files for syncing
344     it.
345   */
346   File directory_fd;
347   /* buffers for log writing */
348   struct st_translog_buffer buffers[TRANSLOG_BUFFERS_NO];
349   /* Mask where 1 in position N mean that buffer N is not flushed */
350   dirty_buffer_mask_t dirty_buffer_mask;
351   /* The above variable protection */
352   mysql_mutex_t dirty_buffer_mask_lock;
353   /*
354      horizon - visible end of the log (here is absolute end of the log:
355      position where next chunk can start
356   */
357   TRANSLOG_ADDRESS horizon;
358   /* horizon buffer cursor */
359   struct st_buffer_cursor bc;
360   /* maximum LSN of the current (not finished) file */
361   LSN max_lsn;
362 
363   /*
364     Last flushed LSN (protected by log_flush_lock).
365     Pointers in the log ordered like this:
366     last_lsn_checked <= flushed <= sent_to_disk <= in_buffers_only <=
367     max_lsn <= horizon
368   */
369   LSN flushed;
370   /* Last LSN sent to the disk (but maybe not written yet) */
371   LSN sent_to_disk;
372   /* Horizon from which log started after initialization */
373   TRANSLOG_ADDRESS log_start;
374   TRANSLOG_ADDRESS previous_flush_horizon;
375   /* All what is after this address is not sent to disk yet */
376   TRANSLOG_ADDRESS in_buffers_only;
377   /* protection of sent_to_disk and in_buffers_only */
378   mysql_mutex_t sent_to_disk_lock;
379   /*
380     Protect flushed (see above) and for flush serialization (will
381     be removed in v1.5
382   */
383   mysql_mutex_t log_flush_lock;
384   mysql_cond_t log_flush_cond;
385   mysql_cond_t new_goal_cond;
386 
387   /* Protects changing of headers of finished files (max_lsn) */
388   mysql_mutex_t file_header_lock;
389 
390   /*
391     Sorted array (with protection) of files where we started writing process
392     and so we can't give last LSN yet
393   */
394   mysql_mutex_t unfinished_files_lock;
395   DYNAMIC_ARRAY unfinished_files;
396 
397   /*
398     minimum number of still need file calculeted during last
399     translog_purge call
400   */
401   uint32 min_need_file;
402   /* Purger data: minimum file in the log (or 0 if unknown) */
403   uint32 min_file_number;
404   /* Protect purger from many calls and it's data */
405   mysql_mutex_t purger_lock;
406   /* last low water mark checked */
407   LSN last_lsn_checked;
408   /**
409     Must be set to 0 under loghandler lock every time a new LSN
410     is generated.
411   */
412   my_bool is_everything_flushed;
413   /* True when flush pass is in progress */
414   my_bool flush_in_progress;
415   /* The flush number (used to distinguish two flushes goes one by one) */
416   volatile int flush_no;
417   /* Next flush pass variables */
418   TRANSLOG_ADDRESS next_pass_max_lsn;
419   pthread_t max_lsn_requester;
420 };
421 
422 static struct st_translog_descriptor log_descriptor;
423 
424 ulong log_purge_type= TRANSLOG_PURGE_IMMIDIATE;
425 ulong log_file_size= TRANSLOG_FILE_SIZE;
426 /* sync() of log files directory mode */
427 ulong sync_log_dir= TRANSLOG_SYNC_DIR_NEWFILE;
428 ulong maria_group_commit= TRANSLOG_GCOMMIT_NONE;
429 ulong maria_group_commit_interval= 0;
430 
431 /* Marker for end of log */
432 static uchar end_of_log= 0;
433 #define END_OF_LOG &end_of_log
434 /**
435   Switch for "soft" sync (no real sync() but periodical sync by service
436   thread)
437 */
438 static volatile my_bool soft_sync= FALSE;
439 /**
440   Switch for "hard" group commit mode
441 */
442 static volatile my_bool hard_group_commit= FALSE;
443 /**
444   File numbers interval which have to be sync()
445 */
446 static uint32 soft_sync_min= 0;
447 static uint32 soft_sync_max= 0;
448 static uint32 soft_need_sync= 1;
449 /**
450   stores interval in microseconds
451 */
452 static uint32 group_commit_wait= 0;
453 
454 enum enum_translog_status translog_status= TRANSLOG_UNINITED;
455 ulonglong translog_syncs= 0; /* Number of sync()s */
456 
457 /* time of last flush */
458 static ulonglong flush_start= 0;
459 
460 /* chunk types */
461 #define TRANSLOG_CHUNK_LSN   0x00      /* 0 chunk refer as LSN (head or tail */
462 #define TRANSLOG_CHUNK_FIXED (1 << 6)  /* 1 (pseudo)fixed record (also LSN) */
463 #define TRANSLOG_CHUNK_NOHDR (2 << 6)  /* 2 no head chunk (till page end) */
464 #define TRANSLOG_CHUNK_LNGTH (3 << 6)  /* 3 chunk with chunk length */
465 #define TRANSLOG_CHUNK_TYPE  (3 << 6)  /* Mask to get chunk type */
466 #define TRANSLOG_REC_TYPE    0x3F      /* Mask to get record type */
467 #define TRANSLOG_CHUNK_0_CONT 0x3F     /* the type to mark chunk 0 continue */
468 
469 /* compressed (relative) LSN constants */
470 #define TRANSLOG_CLSN_LEN_BITS 0xC0    /* Mask to get compressed LSN length */
471 
472 
473 /* an array that maps id of a MARIA_SHARE to this MARIA_SHARE */
474 static MARIA_SHARE **id_to_share= NULL;
475 
476 static my_bool translog_page_validator(int res, PAGECACHE_IO_HOOK_ARGS *args);
477 
478 static my_bool translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner);
479 static uint32 translog_first_file(TRANSLOG_ADDRESS horizon, int is_protected);
480 LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon);
481 
482 
483 /*
484   Initialize log_record_type_descriptors
485 */
486 
487 LOG_DESC log_record_type_descriptor[LOGREC_NUMBER_OF_TYPES];
488 
489 
490 #ifndef DBUG_OFF
491 
492 #define translog_buffer_lock_assert_owner(B) \
493   mysql_mutex_assert_owner(&(B)->mutex)
494 #define translog_lock_assert_owner() \
495   mysql_mutex_assert_owner(&log_descriptor.bc.buffer->mutex)
translog_lock_handler_assert_owner()496 void translog_lock_handler_assert_owner()
497 {
498   translog_lock_assert_owner();
499 }
500 
501 /**
502   @brief check the description table validity
503 
504   @param num             how many records should be filled
505 */
506 
507 static uint max_allowed_translog_type= 0;
508 
check_translog_description_table(int num)509 void check_translog_description_table(int num)
510 {
511   int i;
512   DBUG_ENTER("check_translog_description_table");
513   DBUG_PRINT("enter", ("last record: %d", num));
514   DBUG_ASSERT(num > 0);
515   /* last is reserved for extending the table */
516   DBUG_ASSERT(num < LOGREC_NUMBER_OF_TYPES - 1);
517   DBUG_ASSERT(log_record_type_descriptor[0].rclass == LOGRECTYPE_NOT_ALLOWED);
518   max_allowed_translog_type= num;
519 
520   for (i= 0; i <= num; i++)
521   {
522     DBUG_PRINT("info",
523                ("record type: %d  class: %d  fixed: %u  header: %u  LSNs: %u  "
524                 "name: %s",
525                 i, log_record_type_descriptor[i].rclass,
526                 (uint)log_record_type_descriptor[i].fixed_length,
527                 (uint)log_record_type_descriptor[i].read_header_len,
528                 (uint)log_record_type_descriptor[i].compressed_LSN,
529                 log_record_type_descriptor[i].name));
530     switch (log_record_type_descriptor[i].rclass) {
531     case LOGRECTYPE_NOT_ALLOWED:
532       DBUG_ASSERT(i == 0);
533       break;
534     case LOGRECTYPE_VARIABLE_LENGTH:
535       DBUG_ASSERT(log_record_type_descriptor[i].fixed_length == 0);
536       DBUG_ASSERT((log_record_type_descriptor[i].compressed_LSN == 0) ||
537                   ((log_record_type_descriptor[i].compressed_LSN == 1) &&
538                    (log_record_type_descriptor[i].read_header_len >=
539                     LSN_STORE_SIZE)) ||
540                   ((log_record_type_descriptor[i].compressed_LSN == 2) &&
541                    (log_record_type_descriptor[i].read_header_len >=
542                     LSN_STORE_SIZE * 2)));
543       break;
544     case LOGRECTYPE_PSEUDOFIXEDLENGTH:
545       DBUG_ASSERT(log_record_type_descriptor[i].fixed_length ==
546                   log_record_type_descriptor[i].read_header_len);
547       DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN > 0);
548       DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN <= 2);
549       break;
550     case LOGRECTYPE_FIXEDLENGTH:
551       DBUG_ASSERT(log_record_type_descriptor[i].fixed_length ==
552                   log_record_type_descriptor[i].read_header_len);
553       DBUG_ASSERT(log_record_type_descriptor[i].compressed_LSN == 0);
554       break;
555     default:
556       DBUG_ASSERT(0);
557     }
558   }
559   for (i= num + 1; i < LOGREC_NUMBER_OF_TYPES; i++)
560   {
561     DBUG_ASSERT(log_record_type_descriptor[i].rclass ==
562                 LOGRECTYPE_NOT_ALLOWED);
563   }
564   DBUG_VOID_RETURN;
565 }
566 #else
567 #define translog_buffer_lock_assert_owner(B) {}
568 #define translog_lock_assert_owner() {}
569 #endif
570 
571 static LOG_DESC INIT_LOGREC_RESERVED_FOR_CHUNKS23=
572 {LOGRECTYPE_NOT_ALLOWED, 0, 0, NULL, NULL, NULL, 0,
573  "reserved", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL };
574 
575 static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_HEAD=
576 {LOGRECTYPE_VARIABLE_LENGTH, 0,
577  FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
578  write_hook_for_redo, NULL, 0,
579  "redo_insert_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
580 
581 static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_TAIL=
582 {LOGRECTYPE_VARIABLE_LENGTH, 0,
583  FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
584  write_hook_for_redo, NULL, 0,
585  "redo_insert_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
586 
587 static LOG_DESC INIT_LOGREC_REDO_NEW_ROW_HEAD=
588 {LOGRECTYPE_VARIABLE_LENGTH, 0,
589  FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
590  write_hook_for_redo, NULL, 0,
591  "redo_new_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
592 
593 static LOG_DESC INIT_LOGREC_REDO_NEW_ROW_TAIL=
594 {LOGRECTYPE_VARIABLE_LENGTH, 0,
595  FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL,
596  write_hook_for_redo, NULL, 0,
597  "redo_new_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
598 
599 static LOG_DESC INIT_LOGREC_REDO_INSERT_ROW_BLOBS=
600 {LOGRECTYPE_VARIABLE_LENGTH, 0, FILEID_STORE_SIZE, NULL,
601  write_hook_for_redo, NULL, 0,
602  "redo_insert_row_blobs", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
603 
604 static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_HEAD=
605 {LOGRECTYPE_FIXEDLENGTH,
606  FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
607  FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
608  NULL, write_hook_for_redo, NULL, 0,
609  "redo_purge_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
610 
611 static LOG_DESC INIT_LOGREC_REDO_PURGE_ROW_TAIL=
612 {LOGRECTYPE_FIXEDLENGTH,
613  FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
614  FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
615  NULL, write_hook_for_redo, NULL, 0,
616  "redo_purge_row_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
617 
618 static LOG_DESC INIT_LOGREC_REDO_FREE_BLOCKS=
619 {LOGRECTYPE_VARIABLE_LENGTH, 0,
620  FILEID_STORE_SIZE + PAGERANGE_STORE_SIZE,
621  NULL, write_hook_for_redo, NULL, 0,
622  "redo_free_blocks", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
623 
624 static LOG_DESC INIT_LOGREC_REDO_FREE_HEAD_OR_TAIL=
625 {LOGRECTYPE_FIXEDLENGTH,
626  FILEID_STORE_SIZE + PAGE_STORE_SIZE,
627  FILEID_STORE_SIZE + PAGE_STORE_SIZE,
628  NULL, write_hook_for_redo, NULL, 0,
629  "redo_free_head_or_tail", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
630 
631 /* not yet used; for when we have versioning */
632 static LOG_DESC INIT_LOGREC_REDO_DELETE_ROW=
633 {LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
634  "redo_delete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
635 
636 /** @todo RECOVERY BUG unused, remove? */
637 static LOG_DESC INIT_LOGREC_REDO_UPDATE_ROW_HEAD=
638 {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
639  "redo_update_row_head", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
640 
641 static LOG_DESC INIT_LOGREC_REDO_INDEX=
642 {LOGRECTYPE_VARIABLE_LENGTH, 0, 9, NULL, write_hook_for_redo, NULL, 0,
643  "redo_index", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
644 
645 static LOG_DESC INIT_LOGREC_REDO_INDEX_NEW_PAGE=
646 {LOGRECTYPE_VARIABLE_LENGTH, 0,
647  FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2 + KEY_NR_STORE_SIZE + 1,
648  NULL, write_hook_for_redo, NULL, 0,
649  "redo_index_new_page", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
650 
651 static LOG_DESC INIT_LOGREC_REDO_INDEX_FREE_PAGE=
652 {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
653  FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
654  NULL, write_hook_for_redo, NULL, 0,
655  "redo_index_free_page", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
656 
657 static LOG_DESC INIT_LOGREC_REDO_UNDELETE_ROW=
658 {LOGRECTYPE_FIXEDLENGTH, 16, 16, NULL, write_hook_for_redo, NULL, 0,
659  "redo_undelete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL};
660 
661 static LOG_DESC INIT_LOGREC_CLR_END=
662 {LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE + FILEID_STORE_SIZE +
663  CLR_TYPE_STORE_SIZE, NULL, write_hook_for_clr_end, NULL, 1,
664  "clr_end", LOGREC_LAST_IN_GROUP, NULL, NULL};
665 
666 static LOG_DESC INIT_LOGREC_PURGE_END=
667 {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1,
668  "purge_end", LOGREC_LAST_IN_GROUP, NULL, NULL};
669 
670 static LOG_DESC INIT_LOGREC_UNDO_ROW_INSERT=
671 {LOGRECTYPE_VARIABLE_LENGTH, 0,
672  LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
673  NULL, write_hook_for_undo_row_insert, NULL, 1,
674  "undo_row_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
675 
676 static LOG_DESC INIT_LOGREC_UNDO_ROW_DELETE=
677 {LOGRECTYPE_VARIABLE_LENGTH, 0,
678  LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
679  NULL, write_hook_for_undo_row_delete, NULL, 1,
680  "undo_row_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
681 
682 static LOG_DESC INIT_LOGREC_UNDO_ROW_UPDATE=
683 {LOGRECTYPE_VARIABLE_LENGTH, 0,
684  LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE,
685  NULL, write_hook_for_undo_row_update, NULL, 1,
686  "undo_row_update", LOGREC_LAST_IN_GROUP, NULL, NULL};
687 
688 static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT=
689 {LOGRECTYPE_VARIABLE_LENGTH, 0,
690  LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE,
691  NULL, write_hook_for_undo_key_insert, NULL, 1,
692  "undo_key_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
693 
694 /* This will never be in the log, only in the clr */
695 static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT_WITH_ROOT=
696 {LOGRECTYPE_VARIABLE_LENGTH, 0,
697  LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE + PAGE_STORE_SIZE,
698  NULL, write_hook_for_undo_key, NULL, 1,
699  "undo_key_insert_with_root", LOGREC_LAST_IN_GROUP, NULL, NULL};
700 
701 static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE=
702 {LOGRECTYPE_VARIABLE_LENGTH, 0,
703  LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE,
704  NULL, write_hook_for_undo_key_delete, NULL, 1,
705  "undo_key_delete", LOGREC_LAST_IN_GROUP, NULL, NULL};
706 
707 static LOG_DESC INIT_LOGREC_UNDO_KEY_DELETE_WITH_ROOT=
708 {LOGRECTYPE_VARIABLE_LENGTH, 0,
709  LSN_STORE_SIZE + FILEID_STORE_SIZE + KEY_NR_STORE_SIZE + PAGE_STORE_SIZE,
710  NULL, write_hook_for_undo_key_delete, NULL, 1,
711  "undo_key_delete_with_root", LOGREC_LAST_IN_GROUP, NULL, NULL};
712 
713 static LOG_DESC INIT_LOGREC_PREPARE=
714 {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
715  "prepare", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
716 
717 static LOG_DESC INIT_LOGREC_PREPARE_WITH_UNDO_PURGE=
718 {LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE, NULL, NULL, NULL, 1,
719  "prepare_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
720 
721 static LOG_DESC INIT_LOGREC_COMMIT=
722 {LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL,
723  write_hook_for_commit, NULL, 0, "commit", LOGREC_IS_GROUP_ITSELF, NULL,
724  NULL};
725 
726 static LOG_DESC INIT_LOGREC_COMMIT_WITH_UNDO_PURGE=
727 {LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, write_hook_for_commit, NULL, 1,
728  "commit_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
729 
730 static LOG_DESC INIT_LOGREC_CHECKPOINT=
731 {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
732  "checkpoint", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
733 
734 static LOG_DESC INIT_LOGREC_REDO_CREATE_TABLE=
735 {LOGRECTYPE_VARIABLE_LENGTH, 0, 1 + 2, NULL, NULL, NULL, 0,
736 "redo_create_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
737 
738 static LOG_DESC INIT_LOGREC_REDO_RENAME_TABLE=
739 {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
740  "redo_rename_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
741 
742 static LOG_DESC INIT_LOGREC_REDO_DROP_TABLE=
743 {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
744  "redo_drop_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
745 
746 static LOG_DESC INIT_LOGREC_REDO_DELETE_ALL=
747 {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE,
748  NULL, write_hook_for_redo_delete_all, NULL, 0,
749  "redo_delete_all", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
750 
751 static LOG_DESC INIT_LOGREC_REDO_REPAIR_TABLE=
752 {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + 8 + 8, FILEID_STORE_SIZE + 8 + 8,
753  NULL, NULL, NULL, 0,
754  "redo_repair_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
755 
756 static LOG_DESC INIT_LOGREC_FILE_ID=
757 {LOGRECTYPE_VARIABLE_LENGTH, 0, 2, NULL, write_hook_for_file_id, NULL, 0,
758  "file_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
759 
760 static LOG_DESC INIT_LOGREC_LONG_TRANSACTION_ID=
761 {LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0,
762  "long_transaction_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
763 
764 static LOG_DESC INIT_LOGREC_INCOMPLETE_LOG=
765 {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE,
766  NULL, NULL, NULL, 0,
767  "incomplete_log", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
768 
769 static LOG_DESC INIT_LOGREC_INCOMPLETE_GROUP=
770 {LOGRECTYPE_FIXEDLENGTH, 0, 0,
771  NULL, NULL, NULL, 0,
772  "incomplete_group", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
773 
774 static LOG_DESC INIT_LOGREC_UNDO_BULK_INSERT=
775 {LOGRECTYPE_VARIABLE_LENGTH, 0,
776  LSN_STORE_SIZE + FILEID_STORE_SIZE,
777  NULL, write_hook_for_undo_bulk_insert, NULL, 1,
778  "undo_bulk_insert", LOGREC_LAST_IN_GROUP, NULL, NULL};
779 
780 static LOG_DESC INIT_LOGREC_REDO_BITMAP_NEW_PAGE=
781 {LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
782  FILEID_STORE_SIZE + PAGE_STORE_SIZE * 2,
783  NULL, NULL, NULL, 0,
784  "redo_create_bitmap", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
785 
786 static LOG_DESC INIT_LOGREC_IMPORTED_TABLE=
787 {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
788  "imported_table", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
789 
790 static LOG_DESC INIT_LOGREC_DEBUG_INFO=
791 {LOGRECTYPE_VARIABLE_LENGTH, 0, 0, NULL, NULL, NULL, 0,
792  "info", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
793 
794 const myf log_write_flags= MY_WME | MY_NABP | MY_WAIT_IF_FULL;
795 
translog_table_init()796 void translog_table_init()
797 {
798   int i;
799   log_record_type_descriptor[LOGREC_RESERVED_FOR_CHUNKS23]=
800     INIT_LOGREC_RESERVED_FOR_CHUNKS23;
801   log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_HEAD]=
802     INIT_LOGREC_REDO_INSERT_ROW_HEAD;
803   log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_TAIL]=
804     INIT_LOGREC_REDO_INSERT_ROW_TAIL;
805   log_record_type_descriptor[LOGREC_REDO_NEW_ROW_HEAD]=
806     INIT_LOGREC_REDO_NEW_ROW_HEAD;
807   log_record_type_descriptor[LOGREC_REDO_NEW_ROW_TAIL]=
808     INIT_LOGREC_REDO_NEW_ROW_TAIL;
809   log_record_type_descriptor[LOGREC_REDO_INSERT_ROW_BLOBS]=
810     INIT_LOGREC_REDO_INSERT_ROW_BLOBS;
811   log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_HEAD]=
812     INIT_LOGREC_REDO_PURGE_ROW_HEAD;
813   log_record_type_descriptor[LOGREC_REDO_PURGE_ROW_TAIL]=
814     INIT_LOGREC_REDO_PURGE_ROW_TAIL;
815   log_record_type_descriptor[LOGREC_REDO_FREE_BLOCKS]=
816     INIT_LOGREC_REDO_FREE_BLOCKS;
817   log_record_type_descriptor[LOGREC_REDO_FREE_HEAD_OR_TAIL]=
818     INIT_LOGREC_REDO_FREE_HEAD_OR_TAIL;
819   log_record_type_descriptor[LOGREC_REDO_DELETE_ROW]=
820     INIT_LOGREC_REDO_DELETE_ROW;
821   log_record_type_descriptor[LOGREC_REDO_UPDATE_ROW_HEAD]=
822     INIT_LOGREC_REDO_UPDATE_ROW_HEAD;
823   log_record_type_descriptor[LOGREC_REDO_INDEX]=
824     INIT_LOGREC_REDO_INDEX;
825   log_record_type_descriptor[LOGREC_REDO_INDEX_NEW_PAGE]=
826     INIT_LOGREC_REDO_INDEX_NEW_PAGE;
827   log_record_type_descriptor[LOGREC_REDO_INDEX_FREE_PAGE]=
828     INIT_LOGREC_REDO_INDEX_FREE_PAGE;
829   log_record_type_descriptor[LOGREC_REDO_UNDELETE_ROW]=
830     INIT_LOGREC_REDO_UNDELETE_ROW;
831   log_record_type_descriptor[LOGREC_CLR_END]=
832     INIT_LOGREC_CLR_END;
833   log_record_type_descriptor[LOGREC_PURGE_END]=
834     INIT_LOGREC_PURGE_END;
835   log_record_type_descriptor[LOGREC_UNDO_ROW_INSERT]=
836     INIT_LOGREC_UNDO_ROW_INSERT;
837   log_record_type_descriptor[LOGREC_UNDO_ROW_DELETE]=
838     INIT_LOGREC_UNDO_ROW_DELETE;
839   log_record_type_descriptor[LOGREC_UNDO_ROW_UPDATE]=
840     INIT_LOGREC_UNDO_ROW_UPDATE;
841   log_record_type_descriptor[LOGREC_UNDO_KEY_INSERT]=
842     INIT_LOGREC_UNDO_KEY_INSERT;
843   log_record_type_descriptor[LOGREC_UNDO_KEY_INSERT_WITH_ROOT]=
844     INIT_LOGREC_UNDO_KEY_INSERT_WITH_ROOT;
845   log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE]=
846     INIT_LOGREC_UNDO_KEY_DELETE;
847   log_record_type_descriptor[LOGREC_UNDO_KEY_DELETE_WITH_ROOT]=
848     INIT_LOGREC_UNDO_KEY_DELETE_WITH_ROOT;
849   log_record_type_descriptor[LOGREC_PREPARE]=
850     INIT_LOGREC_PREPARE;
851   log_record_type_descriptor[LOGREC_PREPARE_WITH_UNDO_PURGE]=
852     INIT_LOGREC_PREPARE_WITH_UNDO_PURGE;
853   log_record_type_descriptor[LOGREC_COMMIT]=
854     INIT_LOGREC_COMMIT;
855   log_record_type_descriptor[LOGREC_COMMIT_WITH_UNDO_PURGE]=
856     INIT_LOGREC_COMMIT_WITH_UNDO_PURGE;
857   log_record_type_descriptor[LOGREC_CHECKPOINT]=
858     INIT_LOGREC_CHECKPOINT;
859   log_record_type_descriptor[LOGREC_REDO_CREATE_TABLE]=
860     INIT_LOGREC_REDO_CREATE_TABLE;
861   log_record_type_descriptor[LOGREC_REDO_RENAME_TABLE]=
862     INIT_LOGREC_REDO_RENAME_TABLE;
863   log_record_type_descriptor[LOGREC_REDO_DROP_TABLE]=
864     INIT_LOGREC_REDO_DROP_TABLE;
865   log_record_type_descriptor[LOGREC_REDO_DELETE_ALL]=
866     INIT_LOGREC_REDO_DELETE_ALL;
867   log_record_type_descriptor[LOGREC_REDO_REPAIR_TABLE]=
868     INIT_LOGREC_REDO_REPAIR_TABLE;
869   log_record_type_descriptor[LOGREC_FILE_ID]=
870     INIT_LOGREC_FILE_ID;
871   log_record_type_descriptor[LOGREC_LONG_TRANSACTION_ID]=
872     INIT_LOGREC_LONG_TRANSACTION_ID;
873   log_record_type_descriptor[LOGREC_INCOMPLETE_LOG]=
874     INIT_LOGREC_INCOMPLETE_LOG;
875   log_record_type_descriptor[LOGREC_INCOMPLETE_GROUP]=
876     INIT_LOGREC_INCOMPLETE_GROUP;
877   log_record_type_descriptor[LOGREC_UNDO_BULK_INSERT]=
878     INIT_LOGREC_UNDO_BULK_INSERT;
879   log_record_type_descriptor[LOGREC_REDO_BITMAP_NEW_PAGE]=
880     INIT_LOGREC_REDO_BITMAP_NEW_PAGE;
881   log_record_type_descriptor[LOGREC_IMPORTED_TABLE]=
882     INIT_LOGREC_IMPORTED_TABLE;
883   log_record_type_descriptor[LOGREC_DEBUG_INFO]=
884     INIT_LOGREC_DEBUG_INFO;
885 
886   for (i= LOGREC_FIRST_FREE; i < LOGREC_NUMBER_OF_TYPES; i++)
887     log_record_type_descriptor[i].rclass= LOGRECTYPE_NOT_ALLOWED;
888 #ifndef DBUG_OFF
889   check_translog_description_table(LOGREC_FIRST_FREE -1);
890 #endif
891 }
892 
893 
894 /* all possible flags page overheads */
895 static uint page_overhead[TRANSLOG_FLAGS_NUM];
896 
897 typedef struct st_translog_validator_data
898 {
899   TRANSLOG_ADDRESS *addr;
900   my_bool was_recovered;
901 } TRANSLOG_VALIDATOR_DATA;
902 
903 
904 /*
905   Check cursor/buffer consistence
906 
907   SYNOPSIS
908     translog_check_cursor
909     cursor               cursor which will be checked
910 */
911 
translog_check_cursor(struct st_buffer_cursor * cursor)912 static void translog_check_cursor(struct st_buffer_cursor *cursor
913                                  __attribute__((unused)))
914 {
915   DBUG_ASSERT(cursor->chaser ||
916               ((ulong) (cursor->ptr - cursor->buffer->buffer) ==
917                cursor->buffer->size));
918   DBUG_ASSERT(cursor->buffer->buffer_no == cursor->buffer_no);
919   DBUG_ASSERT((cursor->ptr -cursor->buffer->buffer) %TRANSLOG_PAGE_SIZE ==
920               cursor->current_page_fill % TRANSLOG_PAGE_SIZE);
921   DBUG_ASSERT(cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
922 }
923 
924 
925 /**
926   @brief switch the loghandler in read only mode in case of write error
927 */
928 
translog_stop_writing()929 void translog_stop_writing()
930 {
931   DBUG_ENTER("translog_stop_writing");
932   DBUG_PRINT("error", ("errno: %d   my_errno: %d", errno, my_errno));
933   translog_status= (translog_status == TRANSLOG_SHUTDOWN ?
934                     TRANSLOG_UNINITED :
935                     TRANSLOG_READONLY);
936   log_descriptor.is_everything_flushed= 1;
937   log_descriptor.open_flags= O_BINARY | O_RDONLY;
938   DBUG_ASSERT(0);
939   DBUG_VOID_RETURN;
940 }
941 
942 
943 /*
944   @brief Get file name of the log by log number
945 
946   @param file_no         Number of the log we want to open
947   @param path            Pointer to buffer where file name will be
948                          stored (must be FN_REFLEN bytes at least)
949 
950   @return pointer to path
951 */
952 
translog_filename_by_fileno(uint32 file_no,char * path)953 char *translog_filename_by_fileno(uint32 file_no, char *path)
954 {
955   char buff[11], *end;
956   uint length;
957   DBUG_ENTER("translog_filename_by_fileno");
958   DBUG_ASSERT(file_no <= 0xfffffff);
959 
960   /* log_descriptor.directory is already formated */
961   end= strxmov(path, log_descriptor.directory, "aria_log.0000000", NullS);
962   length= (uint) (int10_to_str(file_no, buff, 10) - buff);
963   strmov(end - length +1, buff);
964 
965   DBUG_PRINT("info", ("Path: '%s'  path: %p", path, path));
966   DBUG_RETURN(path);
967 }
968 
969 
970 /**
971   @brief Create log file with given number without cache
972 
973   @param file_no         Number of the log we want to open
974 
975   retval -1  error
976   retval # file descriptor number
977 */
978 
create_logfile_by_number_no_cache(uint32 file_no)979 static File create_logfile_by_number_no_cache(uint32 file_no)
980 {
981   File file;
982   char path[FN_REFLEN];
983   DBUG_ENTER("create_logfile_by_number_no_cache");
984 
985   if (translog_status != TRANSLOG_OK)
986      DBUG_RETURN(-1);
987 
988   /* TODO: add O_DIRECT to open flags (when buffer is aligned) */
989   if ((file= mysql_file_create(key_file_translog,
990                                translog_filename_by_fileno(file_no, path),
991                                0, O_BINARY | O_RDWR | O_CLOEXEC, MYF(MY_WME))) < 0)
992   {
993     DBUG_PRINT("error", ("Error %d during creating file '%s'", errno, path));
994     translog_stop_writing();
995     DBUG_RETURN(-1);
996   }
997   if (sync_log_dir >= TRANSLOG_SYNC_DIR_NEWFILE &&
998       sync_dir(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD)))
999   {
1000     DBUG_PRINT("error", ("Error %d during syncing directory '%s'",
1001                          errno, log_descriptor.directory));
1002     mysql_file_close(file, MYF(0));
1003     translog_stop_writing();
1004     DBUG_RETURN(-1);
1005   }
1006   DBUG_PRINT("info", ("File: '%s'  handler: %d", path, file));
1007   DBUG_RETURN(file);
1008 }
1009 
1010 /**
1011   @brief Open (not create) log file with given number without cache
1012 
1013   @param file_no         Number of the log we want to open
1014 
1015   retval -1  error
1016   retval # file descriptor number
1017 */
1018 
open_logfile_by_number_no_cache(uint32 file_no)1019 static File open_logfile_by_number_no_cache(uint32 file_no)
1020 {
1021   File file;
1022   char path[FN_REFLEN];
1023   DBUG_ENTER("open_logfile_by_number_no_cache");
1024 
1025   /* TODO: add O_DIRECT to open flags (when buffer is aligned) */
1026   /* TODO: use mysql_file_create() */
1027   if ((file= mysql_file_open(key_file_translog,
1028                              translog_filename_by_fileno(file_no, path),
1029                              log_descriptor.open_flags | O_CLOEXEC,
1030                              MYF(MY_WME))) < 0)
1031   {
1032     DBUG_PRINT("error", ("Error %d during opening file '%s'", errno, path));
1033     DBUG_RETURN(-1);
1034   }
1035   DBUG_PRINT("info", ("File: '%s'  handler: %d", path, file));
1036   DBUG_RETURN(file);
1037 }
1038 
1039 
1040 /**
1041   @brief get file descriptor by given number using cache
1042 
1043   @param file_no         Number of the log we want to open
1044 
1045   retval # file descriptor
1046   retval NULL file is not opened
1047 */
1048 
get_logfile_by_number(uint32 file_no)1049 static TRANSLOG_FILE *get_logfile_by_number(uint32 file_no)
1050 {
1051   TRANSLOG_FILE *file;
1052   DBUG_ENTER("get_logfile_by_number");
1053   mysql_rwlock_rdlock(&log_descriptor.open_files_lock);
1054   if (log_descriptor.max_file - file_no >=
1055       log_descriptor.open_files.elements)
1056   {
1057     DBUG_PRINT("info", ("File #%u is not opened", file_no));
1058     mysql_rwlock_unlock(&log_descriptor.open_files_lock);
1059     DBUG_RETURN(NULL);
1060   }
1061   DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
1062               log_descriptor.open_files.elements);
1063   DBUG_ASSERT(log_descriptor.max_file >= file_no);
1064   DBUG_ASSERT(log_descriptor.min_file <= file_no);
1065 
1066   file= *dynamic_element(&log_descriptor.open_files,
1067                          log_descriptor.max_file - file_no, TRANSLOG_FILE **);
1068   mysql_rwlock_unlock(&log_descriptor.open_files_lock);
1069   DBUG_PRINT("info", ("File %p File no: %u, File handler: %d",
1070                        file, file_no,
1071                       (file ? file->handler.file : -1)));
1072   DBUG_ASSERT(!file || file->number == file_no);
1073   DBUG_RETURN(file);
1074 }
1075 
1076 
1077 /**
1078   @brief get current file descriptor
1079 
1080   retval # file descriptor
1081 */
1082 
get_current_logfile()1083 static TRANSLOG_FILE *get_current_logfile()
1084 {
1085   TRANSLOG_FILE *file;
1086   DBUG_ENTER("get_current_logfile");
1087   mysql_rwlock_rdlock(&log_descriptor.open_files_lock);
1088   DBUG_PRINT("info", ("max_file: %lu  min_file: %lu  open_files: %lu",
1089                       (ulong) log_descriptor.max_file,
1090                       (ulong) log_descriptor.min_file,
1091                       (ulong) log_descriptor.open_files.elements));
1092   DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
1093               log_descriptor.open_files.elements);
1094   file= *dynamic_element(&log_descriptor.open_files, 0, TRANSLOG_FILE **);
1095   mysql_rwlock_unlock(&log_descriptor.open_files_lock);
1096   DBUG_RETURN(file);
1097 }
1098 
1099 uchar	maria_trans_file_magic[]=
1100 { (uchar) 254, (uchar) 254, (uchar) 11, '\001', 'M', 'A', 'R', 'I', 'A',
1101  'L', 'O', 'G' };
1102 #define LOG_HEADER_DATA_SIZE (sizeof(maria_trans_file_magic) + \
1103                               8 + 4 + 4 + 4 + 2 + 3 + \
1104                               LSN_STORE_SIZE)
1105 
1106 
1107 /*
1108   Write log file page header in the just opened new log file
1109 
1110   SYNOPSIS
1111     translog_write_file_header();
1112 
1113    NOTES
1114     First page is just a marker page; We don't store any real log data in it.
1115 
1116   RETURN
1117     0 OK
1118     1 ERROR
1119 */
1120 
translog_write_file_header()1121 static my_bool translog_write_file_header()
1122 {
1123   TRANSLOG_FILE *file;
1124   ulonglong timestamp;
1125   uchar page_buff[TRANSLOG_PAGE_SIZE], *page= page_buff;
1126   my_bool rc;
1127   DBUG_ENTER("translog_write_file_header");
1128 
1129   /* file tag */
1130   memcpy(page, maria_trans_file_magic, sizeof(maria_trans_file_magic));
1131   page+= sizeof(maria_trans_file_magic);
1132   /* timestamp */
1133   timestamp= my_hrtime().val;
1134   int8store(page, timestamp);
1135   page+= 8;
1136   /* maria version */
1137   int4store(page, TRANSLOG_VERSION_ID);
1138   page+= 4;
1139   /* mysql version (MYSQL_VERSION_ID) */
1140   int4store(page, log_descriptor.server_version);
1141   page+= 4;
1142   /* server ID */
1143   int4store(page, log_descriptor.server_id);
1144   page+= 4;
1145   /* loghandler page_size */
1146   int2store(page, TRANSLOG_PAGE_SIZE - 1);
1147   page+= 2;
1148   /* file number */
1149   int3store(page, LSN_FILE_NO(log_descriptor.horizon));
1150   page+= 3;
1151   lsn_store(page, LSN_IMPOSSIBLE);
1152   page+= LSN_STORE_SIZE;
1153   memset(page, TRANSLOG_FILLER, sizeof(page_buff) - (page- page_buff));
1154 
1155   file= get_current_logfile();
1156   rc= my_pwrite(file->handler.file, page_buff, sizeof(page_buff), 0,
1157                 log_write_flags) != 0;
1158   /*
1159     Dropping the flag in such way can make false alarm: signalling than the
1160     file in not sync when it is sync, but the situation is quite rare and
1161     protections with mutexes give much more overhead to the whole engine
1162   */
1163   file->is_sync= 0;
1164   DBUG_RETURN(rc);
1165 }
1166 
1167 /*
1168   @brief write the new LSN on the given file header
1169 
1170   @param file            The file descriptor
1171   @param lsn             That LSN which should be written
1172 
1173   @retval 0 OK
1174   @retval 1 Error
1175 */
1176 
translog_max_lsn_to_header(File file,LSN lsn)1177 static my_bool translog_max_lsn_to_header(File file, LSN lsn)
1178 {
1179   uchar lsn_buff[LSN_STORE_SIZE];
1180   my_bool rc;
1181   DBUG_ENTER("translog_max_lsn_to_header");
1182   DBUG_PRINT("enter", ("File descriptor: %ld  "
1183                        "lsn: " LSN_FMT,
1184                        (long) file,
1185                        LSN_IN_PARTS(lsn)));
1186 
1187   lsn_store(lsn_buff, lsn);
1188 
1189   rc= (my_pwrite(file, lsn_buff,
1190                  LSN_STORE_SIZE,
1191                  (LOG_HEADER_DATA_SIZE - LSN_STORE_SIZE),
1192                  log_write_flags) != 0 ||
1193        mysql_file_sync(file, MYF(MY_WME)) != 0);
1194   /*
1195     We should not increase counter in case of error above, but it is so
1196     unlikely that we can ignore this case
1197   */
1198   translog_syncs++;
1199   DBUG_RETURN(rc);
1200 }
1201 
1202 
1203 /*
1204   @brief Extract hander file information from loghandler file page
1205 
1206   @param desc header information descriptor to be filled with information
1207   @param page_buff buffer with the page content
1208 */
1209 
translog_interpret_file_header(LOGHANDLER_FILE_INFO * desc,uchar * page_buff)1210 void translog_interpret_file_header(LOGHANDLER_FILE_INFO *desc,
1211                                     uchar *page_buff)
1212 {
1213   uchar *ptr;
1214 
1215   ptr= page_buff + sizeof(maria_trans_file_magic);
1216   desc->timestamp= uint8korr(ptr);
1217   ptr+= 8;
1218   desc->maria_version= uint4korr(ptr);
1219   ptr+= 4;
1220   desc->mysql_version= uint4korr(ptr);
1221   ptr+= 4;
1222   desc->server_id= uint4korr(ptr + 4);
1223   ptr+= 4;
1224   desc->page_size= uint2korr(ptr) + 1;
1225   ptr+= 2;
1226   desc->file_number= uint3korr(ptr);
1227   ptr+=3;
1228   desc->max_lsn= lsn_korr(ptr);
1229 }
1230 
1231 
1232 /*
1233   @brief Read hander file information from loghandler file
1234 
1235   @param desc header information descriptor to be filled with information
1236   @param file file descriptor to read
1237 
1238   @retval 0 OK
1239   @retval 1 Error
1240 */
1241 
translog_read_file_header(LOGHANDLER_FILE_INFO * desc,File file)1242 my_bool translog_read_file_header(LOGHANDLER_FILE_INFO *desc, File file)
1243 {
1244   uchar page_buff[LOG_HEADER_DATA_SIZE];
1245   DBUG_ENTER("translog_read_file_header");
1246 
1247   if (mysql_file_pread(file, page_buff,
1248                sizeof(page_buff), 0, MYF(MY_FNABP | MY_WME)))
1249   {
1250     DBUG_PRINT("info", ("log read fail error: %d", my_errno));
1251     DBUG_RETURN(1);
1252   }
1253   translog_interpret_file_header(desc, page_buff);
1254   DBUG_PRINT("info", ("timestamp: %llu  aria ver: %lu mysql ver: %lu  "
1255                       "server id %lu page size %lu file number %lu  "
1256                       "max lsn: " LSN_FMT,
1257                       (ulonglong) desc->timestamp,
1258                       (ulong) desc->maria_version,
1259                       (ulong) desc->mysql_version,
1260                       (ulong) desc->server_id,
1261                       desc->page_size, (ulong) desc->file_number,
1262                       LSN_IN_PARTS(desc->max_lsn)));
1263   DBUG_RETURN(0);
1264 }
1265 
1266 
1267 /*
1268   @brief set the lsn to the files from_file - to_file if it is greater
1269   then written in the file
1270 
1271   @param from_file       first file number (min)
1272   @param to_file         last file number (max)
1273   @param lsn             the lsn for writing
1274   @param is_locked       true if current thread locked the log handler
1275 
1276   @retval 0 OK
1277   @retval 1 Error
1278 */
1279 
translog_set_lsn_for_files(uint32 from_file,uint32 to_file,LSN lsn,my_bool is_locked)1280 static my_bool translog_set_lsn_for_files(uint32 from_file, uint32 to_file,
1281                                           LSN lsn, my_bool is_locked)
1282 {
1283   uint32 file;
1284   DBUG_ENTER("translog_set_lsn_for_files");
1285   DBUG_PRINT("enter", ("From: %lu  to: %lu  lsn: " LSN_FMT "  locked: %d",
1286                        (ulong) from_file, (ulong) to_file,
1287                        LSN_IN_PARTS(lsn),
1288                        is_locked));
1289   DBUG_ASSERT(from_file <= to_file);
1290   DBUG_ASSERT(from_file > 0); /* we have not file 0 */
1291 
1292   /* Checks the current file (not finished yet file) */
1293   if (!is_locked)
1294     translog_lock();
1295   if (to_file == (uint32) LSN_FILE_NO(log_descriptor.horizon))
1296   {
1297     if (likely(cmp_translog_addr(lsn, log_descriptor.max_lsn) > 0))
1298       log_descriptor.max_lsn= lsn;
1299     to_file--;
1300   }
1301   if (!is_locked)
1302     translog_unlock();
1303 
1304   /* Checks finished files if they are */
1305   mysql_mutex_lock(&log_descriptor.file_header_lock);
1306   for (file= from_file; file <= to_file; file++)
1307   {
1308     LOGHANDLER_FILE_INFO info;
1309     File fd;
1310 
1311     fd= open_logfile_by_number_no_cache(file);
1312     if ((fd < 0) ||
1313         ((translog_read_file_header(&info, fd) ||
1314           (cmp_translog_addr(lsn, info.max_lsn) > 0 &&
1315            translog_max_lsn_to_header(fd, lsn))) |
1316           mysql_file_close(fd, MYF(MY_WME))))
1317     {
1318       translog_stop_writing();
1319       mysql_mutex_unlock(&log_descriptor.file_header_lock);
1320       DBUG_RETURN(1);
1321     }
1322   }
1323   mysql_mutex_unlock(&log_descriptor.file_header_lock);
1324 
1325   DBUG_RETURN(0);
1326 }
1327 
1328 
1329 /* descriptor of file in unfinished_files */
1330 struct st_file_counter
1331 {
1332   uint32 file;            /* file number */
1333   uint32 counter;         /* counter for started writes */
1334 };
1335 
1336 
1337 /*
1338   @brief mark file "in progress" (for multi-group records)
1339 
1340   @param file            log file number
1341 */
1342 
translog_mark_file_unfinished(uint32 file)1343 static void translog_mark_file_unfinished(uint32 file)
1344 {
1345   int place, i;
1346   struct st_file_counter fc, *fc_ptr;
1347 
1348   DBUG_ENTER("translog_mark_file_unfinished");
1349   DBUG_PRINT("enter", ("file: %lu", (ulong) file));
1350 
1351   fc.file= file; fc.counter= 1;
1352   mysql_mutex_lock(&log_descriptor.unfinished_files_lock);
1353 
1354   if (log_descriptor.unfinished_files.elements == 0)
1355   {
1356     insert_dynamic(&log_descriptor.unfinished_files, (uchar*) &fc);
1357     DBUG_PRINT("info", ("The first element inserted"));
1358     goto end;
1359   }
1360 
1361   for (place= log_descriptor.unfinished_files.elements - 1;
1362        place >= 0;
1363        place--)
1364   {
1365     fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
1366                             place, struct st_file_counter *);
1367     if (fc_ptr->file <= file)
1368       break;
1369   }
1370 
1371   if (place >= 0 && fc_ptr->file == file)
1372   {
1373      fc_ptr->counter++;
1374      DBUG_PRINT("info", ("counter increased"));
1375      goto end;
1376   }
1377 
1378   if (place == (int)log_descriptor.unfinished_files.elements)
1379   {
1380     insert_dynamic(&log_descriptor.unfinished_files, (uchar*) &fc);
1381     DBUG_PRINT("info", ("The last element inserted"));
1382     goto end;
1383   }
1384   /* shift and assign new element */
1385   insert_dynamic(&log_descriptor.unfinished_files,
1386                  (uchar*)
1387                  dynamic_element(&log_descriptor.unfinished_files,
1388                                  log_descriptor.unfinished_files.elements- 1,
1389                                  struct st_file_counter *));
1390   for(i= log_descriptor.unfinished_files.elements - 1; i > place; i--)
1391   {
1392     /* we do not use set_dynamic() to avoid unneeded checks */
1393     memcpy(dynamic_element(&log_descriptor.unfinished_files,
1394                            i, struct st_file_counter *),
1395            dynamic_element(&log_descriptor.unfinished_files,
1396                            i + 1, struct st_file_counter *),
1397            sizeof(struct st_file_counter));
1398   }
1399   memcpy(dynamic_element(&log_descriptor.unfinished_files,
1400                          place + 1, struct st_file_counter *),
1401          &fc, sizeof(struct st_file_counter));
1402 end:
1403   mysql_mutex_unlock(&log_descriptor.unfinished_files_lock);
1404   DBUG_VOID_RETURN;
1405 }
1406 
1407 
1408 /*
1409   @brief remove file mark "in progress" (for multi-group records)
1410 
1411   @param file            log file number
1412 */
1413 
translog_mark_file_finished(uint32 file)1414 static void translog_mark_file_finished(uint32 file)
1415 {
1416   int i;
1417   struct st_file_counter *UNINIT_VAR(fc_ptr);
1418   DBUG_ENTER("translog_mark_file_finished");
1419   DBUG_PRINT("enter", ("file: %lu", (ulong) file));
1420 
1421   mysql_mutex_lock(&log_descriptor.unfinished_files_lock);
1422 
1423   DBUG_ASSERT(log_descriptor.unfinished_files.elements > 0);
1424   for (i= 0;
1425        i < (int) log_descriptor.unfinished_files.elements;
1426        i++)
1427   {
1428     fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
1429                             i, struct st_file_counter *);
1430     if (fc_ptr->file == file)
1431     {
1432       break;
1433     }
1434   }
1435   DBUG_ASSERT(i < (int) log_descriptor.unfinished_files.elements);
1436 
1437   if (! --fc_ptr->counter)
1438     delete_dynamic_element(&log_descriptor.unfinished_files, i);
1439   mysql_mutex_unlock(&log_descriptor.unfinished_files_lock);
1440   DBUG_VOID_RETURN;
1441 }
1442 
1443 
1444 /*
1445   @brief get max LSN of the record which parts stored in this file
1446 
1447   @param file            file number
1448 
1449   @return requested LSN or LSN_IMPOSSIBLE/LSN_ERROR
1450     @retval LSN_IMPOSSIBLE File is still not finished
1451     @retval LSN_ERROR Error opening file
1452     @retval # LSN of the record which parts stored in this file
1453 */
1454 
translog_get_file_max_lsn_stored(uint32 file)1455 LSN translog_get_file_max_lsn_stored(uint32 file)
1456 {
1457   uint32 limit= FILENO_IMPOSSIBLE;
1458   DBUG_ENTER("translog_get_file_max_lsn_stored");
1459   DBUG_PRINT("enter", ("file: %lu", (ulong)file));
1460   DBUG_ASSERT(translog_status == TRANSLOG_OK ||
1461               translog_status == TRANSLOG_READONLY);
1462 
1463   mysql_mutex_lock(&log_descriptor.unfinished_files_lock);
1464 
1465   /* find file with minimum file number "in progress" */
1466   if (log_descriptor.unfinished_files.elements > 0)
1467   {
1468     struct st_file_counter *fc_ptr;
1469     fc_ptr= dynamic_element(&log_descriptor.unfinished_files,
1470                             0, struct st_file_counter *);
1471     limit= fc_ptr->file; /* minimal file number "in progress" */
1472   }
1473   mysql_mutex_unlock(&log_descriptor.unfinished_files_lock);
1474 
1475   /*
1476     if there is no "in progress file" then unfinished file is in progress
1477     for sure
1478   */
1479   if (limit == FILENO_IMPOSSIBLE)
1480   {
1481     TRANSLOG_ADDRESS horizon= translog_get_horizon();
1482     limit= LSN_FILE_NO(horizon);
1483   }
1484 
1485   if (file >= limit)
1486   {
1487     DBUG_PRINT("info", ("The file in in progress"));
1488     DBUG_RETURN(LSN_IMPOSSIBLE);
1489   }
1490 
1491   {
1492     LOGHANDLER_FILE_INFO info;
1493     File fd;
1494 
1495     fd= open_logfile_by_number_no_cache(file);
1496     if(fd < 0)
1497     {
1498       DBUG_PRINT("error", ("Can't open file"));
1499       DBUG_RETURN(LSN_ERROR);
1500     }
1501 
1502     if (translog_read_file_header(&info, fd))
1503     {
1504       DBUG_PRINT("error", ("Can't read file header"));
1505       info.max_lsn= LSN_ERROR;
1506     }
1507 
1508     if (mysql_file_close(fd, MYF(MY_WME)))
1509     {
1510       DBUG_PRINT("error", ("Can't close file"));
1511       info.max_lsn= LSN_ERROR;
1512     }
1513 
1514     DBUG_PRINT("info", ("Max lsn: " LSN_FMT, LSN_IN_PARTS(info.max_lsn)));
1515     DBUG_RETURN(info.max_lsn);
1516   }
1517 }
1518 
1519 /*
1520   Initialize transaction log file buffer
1521 
1522   SYNOPSIS
1523     translog_buffer_init()
1524     buffer               The buffer to initialize
1525     num                  Number of this buffer
1526 
1527   RETURN
1528     0  OK
1529     1  Error
1530 */
1531 
translog_buffer_init(struct st_translog_buffer * buffer,int num)1532 static my_bool translog_buffer_init(struct st_translog_buffer *buffer, int num)
1533 {
1534   DBUG_ENTER("translog_buffer_init");
1535   buffer->pre_force_close_horizon=
1536     buffer->prev_last_lsn= buffer->last_lsn=
1537     LSN_IMPOSSIBLE;
1538   DBUG_PRINT("info", ("last_lsn  and prev_last_lsn set to 0  buffer: %p",
1539                       buffer));
1540 
1541   buffer->buffer_no= (uint8) num;
1542   /* This Buffer File */
1543   buffer->file= NULL;
1544   buffer->overlay= 0;
1545   /* cache for current log */
1546   memset(buffer->buffer, TRANSLOG_FILLER, TRANSLOG_WRITE_BUFFER);
1547   /* Buffer size */
1548   buffer->size= 0;
1549   buffer->skipped_data= 0;
1550   /* cond of thread which is waiting for buffer filling */
1551   if (mysql_cond_init(key_TRANSLOG_BUFFER_waiting_filling_buffer,
1552                       &buffer->waiting_filling_buffer, 0))
1553     DBUG_RETURN(1);
1554   /* Number of records which are in copy progress */
1555   buffer->copy_to_buffer_in_progress= 0;
1556   /* list of waiting buffer ready threads */
1557   buffer->waiting_flush= 0;
1558   /*
1559     Buffers locked by the following mutex. As far as buffers create logical
1560     circle (after last buffer goes first) it trigger false alarm of deadlock
1561     detect system, so we remove check of deadlock for this buffers. Indeed
1562     all mutex locks concentrated around current buffer except flushing
1563     thread (but it is only one thread). One thread can't take more then
1564     2 buffer locks at once. So deadlock is impossible here.
1565 
1566     To prevent false alarm of dead lock detection we switch dead lock
1567     detection for one buffer in the middle of the buffers chain. Excluding
1568     only one of eight buffers from deadlock detection hardly can hide other
1569     possible problems which include this mutexes.
1570   */
1571 
1572   if (mysql_mutex_init(key_TRANSLOG_BUFFER_mutex,
1573                        &buffer->mutex, MY_MUTEX_INIT_FAST) ||
1574       mysql_cond_init(key_TRANSLOG_BUFFER_prev_sent_to_disk_cond,
1575                       &buffer->prev_sent_to_disk_cond, 0))
1576     DBUG_RETURN(1);
1577   mysql_mutex_setflags(&buffer->mutex, MYF_NO_DEADLOCK_DETECTION);
1578   buffer->is_closing_buffer= 0;
1579   buffer->prev_sent_to_disk= LSN_IMPOSSIBLE;
1580   buffer->prev_buffer_offset= LSN_IMPOSSIBLE;
1581   buffer->ver= 0;
1582   DBUG_RETURN(0);
1583 }
1584 
1585 
1586 /*
1587   @brief close transaction log file by descriptor
1588 
1589   @param file            pagegecache file descriptor reference
1590 
1591   @return Operation status
1592     @retval 0  OK
1593     @retval 1  Error
1594 */
1595 
translog_close_log_file(TRANSLOG_FILE * file)1596 static my_bool translog_close_log_file(TRANSLOG_FILE *file)
1597 {
1598   int rc= 0;
1599   flush_pagecache_blocks(log_descriptor.pagecache, &file->handler,
1600                          FLUSH_RELEASE);
1601   /*
1602     Sync file when we close it
1603     TODO: sync only we have changed the log
1604   */
1605   if (!file->is_sync)
1606   {
1607     rc= mysql_file_sync(file->handler.file, MYF(MY_WME));
1608     translog_syncs++;
1609   }
1610   rc|= mysql_file_close(file->handler.file, MYF(MY_WME));
1611   my_free(file);
1612   return MY_TEST(rc);
1613 }
1614 
1615 
1616 /**
1617   @brief Initializes TRANSLOG_FILE structure
1618 
1619   @param file            reference on the file to initialize
1620   @param number          file number
1621   @param is_sync         is file synced on disk
1622 */
1623 
translog_file_init(TRANSLOG_FILE * file,uint32 number,my_bool is_sync)1624 static void translog_file_init(TRANSLOG_FILE *file, uint32 number,
1625                                my_bool is_sync)
1626 {
1627   pagecache_file_set_null_hooks(&file->handler);
1628   file->handler.post_read_hook= translog_page_validator;
1629   file->handler.flush_log_callback= maria_flush_log_for_page_none;
1630   file->handler.callback_data= (uchar*)file;
1631 
1632   file->number= number;
1633   file->was_recovered= 0;
1634   file->is_sync= is_sync;
1635 }
1636 
1637 
1638 /**
1639   @brief Create and fill header of new file.
1640 
1641   @note the caller must call it right after it has increased
1642    log_descriptor.horizon to the new file
1643    (log_descriptor.horizon+= LSN_ONE_FILE)
1644 
1645 
1646   @retval 0 OK
1647   @retval 1 Error
1648 */
1649 
translog_create_new_file()1650 static my_bool translog_create_new_file()
1651 {
1652   TRANSLOG_FILE *file= (TRANSLOG_FILE*)my_malloc(PSI_INSTRUMENT_ME, sizeof(TRANSLOG_FILE),
1653                                                  MYF(0));
1654 
1655   TRANSLOG_FILE *old= get_current_logfile();
1656   uint32 file_no= LSN_FILE_NO(log_descriptor.horizon);
1657   DBUG_ENTER("translog_create_new_file");
1658 
1659   if (file == NULL)
1660     goto error;
1661 
1662   /*
1663     Writes max_lsn to the file header before finishing it (there is no need
1664     to lock file header buffer because it is still unfinished file, so only
1665     one thread can finish the file and nobody interested of LSN of current
1666     (unfinished) file, because no one can purge it).
1667   */
1668   if (translog_max_lsn_to_header(old->handler.file, log_descriptor.max_lsn))
1669     goto error;
1670 
1671   mysql_rwlock_wrlock(&log_descriptor.open_files_lock);
1672   DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
1673               log_descriptor.open_files.elements);
1674   DBUG_ASSERT(file_no == log_descriptor.max_file + 1);
1675   if (allocate_dynamic(&log_descriptor.open_files,
1676                        log_descriptor.max_file - log_descriptor.min_file + 2))
1677     goto error_lock;
1678 
1679   /* this call just expand the array */
1680   if (insert_dynamic(&log_descriptor.open_files, (uchar*)&file))
1681     goto error_lock;
1682 
1683   if ((file->handler.file= create_logfile_by_number_no_cache(file_no)) == -1)
1684     goto error_lock;
1685   translog_file_init(file, file_no, 0);
1686 
1687   log_descriptor.max_file++;
1688   {
1689     char *start= (char*) dynamic_element(&log_descriptor.open_files, 0,
1690                                          TRANSLOG_FILE**);
1691     memmove(start + sizeof(TRANSLOG_FILE*), start,
1692             sizeof(TRANSLOG_FILE*) *
1693             (log_descriptor.max_file - log_descriptor.min_file + 1 - 1));
1694   }
1695   /* can't fail we because we expanded array */
1696   set_dynamic(&log_descriptor.open_files, (uchar*)&file, 0);
1697   DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
1698               log_descriptor.open_files.elements);
1699   mysql_rwlock_unlock(&log_descriptor.open_files_lock);
1700 
1701   DBUG_PRINT("info", ("file_no: %lu", (ulong)file_no));
1702 
1703   if (translog_write_file_header())
1704     goto error;
1705 
1706   if (ma_control_file_write_and_force(last_checkpoint_lsn, file_no,
1707                                       max_trid_in_control_file,
1708                                       recovery_failures))
1709     goto error;
1710 
1711   DBUG_RETURN(0);
1712 
1713 error_lock:
1714   mysql_rwlock_unlock(&log_descriptor.open_files_lock);
1715 error:
1716   translog_stop_writing();
1717   my_free(file);
1718   DBUG_RETURN(1);
1719 }
1720 
1721 
1722 /**
1723   @brief Locks the loghandler buffer.
1724 
1725   @param buffer          This buffer which should be locked
1726 
1727   @note See comment before buffer 'mutex' variable.
1728 
1729   @retval 0 OK
1730   @retval 1 Error
1731 */
1732 
translog_buffer_lock(struct st_translog_buffer * buffer)1733 static void translog_buffer_lock(struct st_translog_buffer *buffer)
1734 {
1735   DBUG_ENTER("translog_buffer_lock");
1736   DBUG_PRINT("enter",
1737              ("Lock buffer #%u: %p", buffer->buffer_no,
1738               buffer));
1739   mysql_mutex_lock(&buffer->mutex);
1740   DBUG_VOID_RETURN;
1741 }
1742 
1743 
1744 /*
1745   Unlock the loghandler buffer
1746 
1747   SYNOPSIS
1748     translog_buffer_unlock()
1749     buffer               This buffer which should be unlocked
1750 */
1751 
translog_buffer_unlock(struct st_translog_buffer * buffer)1752 static void translog_buffer_unlock(struct st_translog_buffer *buffer)
1753 {
1754   DBUG_ENTER("translog_buffer_unlock");
1755   DBUG_PRINT("enter", ("Unlock buffer... #%u (%p)",
1756                        (uint) buffer->buffer_no, buffer));
1757 
1758   mysql_mutex_unlock(&buffer->mutex);
1759   DBUG_VOID_RETURN;
1760 }
1761 
1762 
1763 /*
1764   Write a header on the page
1765 
1766   SYNOPSIS
1767     translog_new_page_header()
1768     horizon              Where to write the page
1769     cursor               Where to write the page
1770 
1771   NOTE
1772     - space for page header should be checked before
1773 */
1774 
1775 static uchar translog_sector_random;
1776 
translog_new_page_header(TRANSLOG_ADDRESS * horizon,struct st_buffer_cursor * cursor)1777 static void translog_new_page_header(TRANSLOG_ADDRESS *horizon,
1778                                      struct st_buffer_cursor *cursor)
1779 {
1780   uchar *ptr;
1781 
1782   DBUG_ENTER("translog_new_page_header");
1783   DBUG_ASSERT(cursor->ptr);
1784 
1785   cursor->protected= 0;
1786 
1787   ptr= cursor->ptr;
1788   /* Page number */
1789   int3store(ptr, LSN_OFFSET(*horizon) / TRANSLOG_PAGE_SIZE);
1790   ptr+= 3;
1791   /* File number */
1792   int3store(ptr, LSN_FILE_NO(*horizon));
1793   ptr+= 3;
1794   DBUG_ASSERT(TRANSLOG_PAGE_FLAGS == (ptr - cursor->ptr));
1795   cursor->ptr[TRANSLOG_PAGE_FLAGS]= (uchar) log_descriptor.flags;
1796   ptr++;
1797   if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
1798   {
1799 #ifndef DBUG_OFF
1800     DBUG_PRINT("info", ("write  0x11223344 CRC to " LSN_FMT,
1801                         LSN_IN_PARTS(*horizon)));
1802     /* This will be overwritten by real CRC; This is just for debugging */
1803     int4store(ptr, 0x11223344);
1804 #endif
1805     /* CRC will be put when page is finished */
1806     ptr+= CRC_SIZE;
1807   }
1808   if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
1809   {
1810     /*
1811       translog_sector_randmo works like "random" values producer because
1812       it is enough to have such "random" for this purpose and it will
1813       not interfere with higher level pseudo random value generator
1814     */
1815     ptr[0]= translog_sector_random++;
1816     ptr+= TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
1817   }
1818   {
1819     size_t len= (ptr - cursor->ptr);
1820     (*horizon)+= len; /* increasing the offset part of the address */
1821     cursor->current_page_fill= (uint16)len;
1822     if (!cursor->chaser)
1823       cursor->buffer->size+= (translog_size_t)len;
1824   }
1825   cursor->ptr= ptr;
1826   DBUG_PRINT("info", ("NewP buffer #%u: %p  chaser: %d  Size: %lu (%lu)  "
1827                       "Horizon: " LSN_FMT,
1828                       (uint) cursor->buffer->buffer_no, cursor->buffer,
1829                       cursor->chaser, (ulong) cursor->buffer->size,
1830                       (ulong) (cursor->ptr - cursor->buffer->buffer),
1831                       LSN_IN_PARTS(*horizon)));
1832   translog_check_cursor(cursor);
1833   DBUG_VOID_RETURN;
1834 }
1835 
1836 
1837 /*
1838   Put sector protection on the page image
1839 
1840   SYNOPSIS
1841     translog_put_sector_protection()
1842     page                 reference on the page content
1843     cursor               cursor of the buffer
1844 
1845   NOTES
1846     We put a sector protection on all following sectors on the page,
1847     except the first sector that is protected by page header.
1848 */
1849 
translog_put_sector_protection(uchar * page,struct st_buffer_cursor * cursor)1850 static void translog_put_sector_protection(uchar *page,
1851                                            struct st_buffer_cursor *cursor)
1852 {
1853   uchar *table= page + log_descriptor.page_overhead -
1854     TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
1855   uint i, offset;
1856   uint16 last_protected_sector= ((cursor->previous_offset - 1) /
1857                                  DISK_DRIVE_SECTOR_SIZE);
1858   uint16 start_sector= cursor->previous_offset / DISK_DRIVE_SECTOR_SIZE;
1859   uint8 value= table[0] + cursor->write_counter;
1860   DBUG_ENTER("translog_put_sector_protection");
1861 
1862   if (start_sector == 0)
1863   {
1864     /* First sector is protected by file & page numbers in the page header. */
1865     start_sector= 1;
1866   }
1867 
1868   DBUG_PRINT("enter", ("Write counter:%u  value:%u  offset:%u, "
1869                        "last protected:%u  start sector:%u",
1870                        (uint) cursor->write_counter,
1871                        (uint) value,
1872                        (uint) cursor->previous_offset,
1873                        (uint) last_protected_sector, (uint) start_sector));
1874   if (last_protected_sector == start_sector)
1875   {
1876     i= last_protected_sector;
1877     offset= last_protected_sector * DISK_DRIVE_SECTOR_SIZE;
1878     /* restore data, because we modified sector which was protected */
1879     if (offset < cursor->previous_offset)
1880       page[offset]= table[i];
1881   }
1882   for (i= start_sector, offset= start_sector * DISK_DRIVE_SECTOR_SIZE;
1883        i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
1884        i++, (offset+= DISK_DRIVE_SECTOR_SIZE))
1885   {
1886     DBUG_PRINT("info", ("sector:%u  offset:%u  data 0x%x",
1887                         i, offset, (uint) page[offset]));
1888     table[i]= page[offset];
1889     page[offset]= value;
1890     DBUG_PRINT("info", ("sector:%u  offset:%u  data 0x%x",
1891                         i, offset, (uint) page[offset]));
1892   }
1893   DBUG_VOID_RETURN;
1894 }
1895 
1896 
1897 /*
1898   Calculate CRC32 of given area
1899 
1900   SYNOPSIS
1901     translog_crc()
1902     area                 Pointer of the area beginning
1903     length               The Area length
1904 
1905   RETURN
1906     CRC32
1907 */
1908 
translog_crc(uchar * area,uint length)1909 static uint32 translog_crc(uchar *area, uint length)
1910 {
1911   DBUG_ENTER("translog_crc");
1912   DBUG_RETURN(my_checksum(0L, area, length));
1913 }
1914 
1915 
1916 /*
1917   Finish current page with zeros
1918 
1919   SYNOPSIS
1920     translog_finish_page()
1921     horizon              \ horizon & buffer pointers
1922     cursor               /
1923 */
1924 
translog_finish_page(TRANSLOG_ADDRESS * horizon,struct st_buffer_cursor * cursor)1925 static void translog_finish_page(TRANSLOG_ADDRESS *horizon,
1926                                  struct st_buffer_cursor *cursor)
1927 {
1928   uint16 left= TRANSLOG_PAGE_SIZE - cursor->current_page_fill;
1929   uchar *page= cursor->ptr - cursor->current_page_fill;
1930   DBUG_ENTER("translog_finish_page");
1931   DBUG_PRINT("enter", ("Buffer: #%u %p  "
1932                        "Buffer addr: " LSN_FMT "  "
1933                        "Page addr: " LSN_FMT "  "
1934                        "size:%u (%u)  Pg:%u  left:%u",
1935                        (uint) cursor->buffer_no, cursor->buffer,
1936                        LSN_IN_PARTS(cursor->buffer->offset),
1937                        (uint)LSN_FILE_NO(*horizon),
1938                        (uint)(LSN_OFFSET(*horizon) -
1939                                 cursor->current_page_fill),
1940                        (uint) cursor->buffer->size,
1941                        (uint) (cursor->ptr -cursor->buffer->buffer),
1942                        (uint) cursor->current_page_fill, (uint) left));
1943   DBUG_ASSERT(LSN_FILE_NO(*horizon) == LSN_FILE_NO(cursor->buffer->offset)
1944               || translog_status == TRANSLOG_UNINITED);
1945   if ((LSN_FILE_NO(*horizon) != LSN_FILE_NO(cursor->buffer->offset)))
1946     DBUG_VOID_RETURN; // everything wrong do not write to awoid more problems
1947   translog_check_cursor(cursor);
1948   if (cursor->protected)
1949   {
1950     DBUG_PRINT("info", ("Already protected and finished"));
1951     DBUG_VOID_RETURN;
1952   }
1953   cursor->protected= 1;
1954 
1955   DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
1956   if (left != 0)
1957   {
1958     DBUG_PRINT("info", ("left: %u", (uint) left));
1959     memset(cursor->ptr, TRANSLOG_FILLER, left);
1960     cursor->ptr+= left;
1961     (*horizon)+= left; /* offset increasing */
1962     if (!cursor->chaser)
1963       cursor->buffer->size+= left;
1964     /* We are finishing the page so reset the counter */
1965     cursor->current_page_fill= 0;
1966     DBUG_PRINT("info", ("Finish Page buffer #%u: %p "
1967                         "chaser: %d  Size: %lu (%lu)",
1968                         (uint) cursor->buffer->buffer_no,
1969                         cursor->buffer, cursor->chaser,
1970                         (ulong) cursor->buffer->size,
1971                         (ulong) (cursor->ptr - cursor->buffer->buffer)));
1972     translog_check_cursor(cursor);
1973   }
1974   /*
1975     When we are finishing the page other thread might not finish the page
1976     header yet (in case if we started from the middle of the page) so we
1977     have to read log_descriptor.flags but not the flags from the page.
1978   */
1979   if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
1980   {
1981     translog_put_sector_protection(page, cursor);
1982     DBUG_PRINT("info", ("drop write_counter"));
1983     cursor->write_counter= 0;
1984     cursor->previous_offset= 0;
1985   }
1986   if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
1987   {
1988     uint32 crc= translog_crc(page + log_descriptor.page_overhead,
1989                              TRANSLOG_PAGE_SIZE -
1990                              log_descriptor.page_overhead);
1991     DBUG_PRINT("info", ("CRC: %lx", (ulong) crc));
1992     /* We have page number, file number and flag before crc */
1993     int4store(page + 3 + 3 + 1, crc);
1994   }
1995   DBUG_VOID_RETURN;
1996 }
1997 
1998 
1999 /*
2000   @brief Wait until all threads have finished closing this buffer.
2001 
2002   @param buffer          This buffer should be check
2003 */
2004 
translog_wait_for_closing(struct st_translog_buffer * buffer)2005 static void translog_wait_for_closing(struct st_translog_buffer *buffer)
2006 {
2007   DBUG_ENTER("translog_wait_for_closing");
2008   DBUG_PRINT("enter", ("Buffer #%u %p  copies in progress: %u  "
2009                        "is closing %u  File: %d  size: %lu",
2010                        (uint) buffer->buffer_no, buffer,
2011                        (uint) buffer->copy_to_buffer_in_progress,
2012                        (uint) buffer->is_closing_buffer,
2013                        (buffer->file ? buffer->file->handler.file : -1),
2014                        (ulong) buffer->size));
2015   translog_buffer_lock_assert_owner(buffer);
2016 
2017   while (buffer->is_closing_buffer)
2018   {
2019     DBUG_PRINT("info", ("wait for writers... buffer: #%u %p",
2020                         (uint) buffer->buffer_no, buffer));
2021     DBUG_ASSERT(buffer->file != NULL);
2022     mysql_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
2023     DBUG_PRINT("info", ("wait for writers done buffer: #%u %p",
2024                         (uint) buffer->buffer_no, buffer));
2025   }
2026 
2027   DBUG_VOID_RETURN;
2028 }
2029 
2030 
2031 /*
2032   @brief Wait until all threads have finished filling this buffer.
2033 
2034   @param buffer          This buffer should be check
2035 */
2036 
translog_wait_for_writers(struct st_translog_buffer * buffer)2037 static void translog_wait_for_writers(struct st_translog_buffer *buffer)
2038 {
2039   DBUG_ENTER("translog_wait_for_writers");
2040   DBUG_PRINT("enter", ("Buffer #%u %p copies in progress: %u  "
2041                        "is closing %u  File: %d  size: %lu",
2042                        (uint) buffer->buffer_no, buffer,
2043                        (uint) buffer->copy_to_buffer_in_progress,
2044                        (uint) buffer->is_closing_buffer,
2045                        (buffer->file ? buffer->file->handler.file : -1),
2046                        (ulong) buffer->size));
2047   translog_buffer_lock_assert_owner(buffer);
2048 
2049   while (buffer->copy_to_buffer_in_progress)
2050   {
2051     DBUG_PRINT("info", ("wait for writers... buffer: #%u  %p",
2052                         (uint) buffer->buffer_no, buffer));
2053     DBUG_ASSERT(buffer->file != NULL);
2054     mysql_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
2055     DBUG_PRINT("info", ("wait for writers done buffer: #%u  %p",
2056                         (uint) buffer->buffer_no, buffer));
2057   }
2058 
2059   DBUG_VOID_RETURN;
2060 }
2061 
2062 
2063 /*
2064 
2065   Wait for buffer to become free
2066 
2067   SYNOPSIS
2068     translog_wait_for_buffer_free()
2069     buffer               The buffer we are waiting for
2070 
2071   NOTE
2072     - this buffer should be locked
2073 */
2074 
translog_wait_for_buffer_free(struct st_translog_buffer * buffer)2075 static void translog_wait_for_buffer_free(struct st_translog_buffer *buffer)
2076 {
2077   TRANSLOG_ADDRESS offset= buffer->offset;
2078   TRANSLOG_FILE *file= buffer->file;
2079   uint8 ver= buffer->ver;
2080   DBUG_ENTER("translog_wait_for_buffer_free");
2081   DBUG_PRINT("enter", ("Buffer #%u %p  copies in progress: %u  "
2082                        "is closing %u  File: %d  size: %lu",
2083                        (uint) buffer->buffer_no, buffer,
2084                        (uint) buffer->copy_to_buffer_in_progress,
2085                        (uint) buffer->is_closing_buffer,
2086                        (buffer->file ? buffer->file->handler.file : -1),
2087                        (ulong) buffer->size));
2088 
2089   translog_wait_for_writers(buffer);
2090 
2091   if (offset != buffer->offset || file != buffer->file || ver != buffer->ver)
2092     DBUG_VOID_RETURN; /* the buffer if already freed */
2093 
2094   while (buffer->file != NULL)
2095   {
2096     DBUG_PRINT("info", ("wait for writers... buffer: #%u  %p",
2097                         (uint) buffer->buffer_no, buffer));
2098     mysql_cond_wait(&buffer->waiting_filling_buffer, &buffer->mutex);
2099     DBUG_PRINT("info", ("wait for writers done. buffer: #%u  %p",
2100                         (uint) buffer->buffer_no, buffer));
2101   }
2102   DBUG_ASSERT(buffer->copy_to_buffer_in_progress == 0);
2103   DBUG_VOID_RETURN;
2104 }
2105 
2106 
2107 /*
2108   Initialize the cursor for a buffer
2109 
2110   SYNOPSIS
2111     translog_cursor_init()
2112     buffer               The buffer
2113     cursor               It's cursor
2114     buffer_no            Number of buffer
2115 */
2116 
translog_cursor_init(struct st_buffer_cursor * cursor,struct st_translog_buffer * buffer,uint8 buffer_no)2117 static void translog_cursor_init(struct st_buffer_cursor *cursor,
2118                                  struct st_translog_buffer *buffer,
2119                                  uint8 buffer_no)
2120 {
2121   DBUG_ENTER("translog_cursor_init");
2122   cursor->ptr= buffer->buffer;
2123   cursor->buffer= buffer;
2124   cursor->buffer_no= buffer_no;
2125   cursor->current_page_fill= 0;
2126   cursor->chaser= (cursor != &log_descriptor.bc);
2127   cursor->write_counter= 0;
2128   cursor->previous_offset= 0;
2129   cursor->protected= 0;
2130   DBUG_VOID_RETURN;
2131 }
2132 
2133 
2134 /*
2135   @brief Initialize buffer for the current file, and a cursor for this buffer.
2136 
2137   @param buffer          The buffer
2138   @param cursor          It's cursor
2139   @param buffer_no       Number of buffer
2140 */
2141 
translog_start_buffer(struct st_translog_buffer * buffer,struct st_buffer_cursor * cursor,uint buffer_no)2142 static void translog_start_buffer(struct st_translog_buffer *buffer,
2143                                   struct st_buffer_cursor *cursor,
2144                                   uint buffer_no)
2145 {
2146   DBUG_ENTER("translog_start_buffer");
2147   DBUG_PRINT("enter",
2148              ("Assign buffer: #%u (%p) offset: 0x%x(%u)",
2149               (uint) buffer->buffer_no, buffer,
2150               (uint) LSN_OFFSET(log_descriptor.horizon),
2151               (uint) LSN_OFFSET(log_descriptor.horizon)));
2152   DBUG_ASSERT(buffer_no == buffer->buffer_no);
2153   buffer->pre_force_close_horizon=
2154     buffer->prev_last_lsn= buffer->last_lsn= LSN_IMPOSSIBLE;
2155   DBUG_PRINT("info", ("last_lsn and prev_last_lsn set to 0  buffer: %p",
2156                       buffer));
2157   buffer->offset= log_descriptor.horizon;
2158   buffer->next_buffer_offset= LSN_IMPOSSIBLE;
2159   buffer->file= get_current_logfile();
2160   buffer->overlay= 0;
2161   buffer->size= 0;
2162   buffer->skipped_data= 0;
2163   translog_cursor_init(cursor, buffer, buffer_no);
2164   DBUG_PRINT("info", ("file: #%ld (%d)  init cursor #%u: %p "
2165                       "chaser: %d  Size: %lu (%lu)",
2166                       (long) (buffer->file ? buffer->file->number : 0),
2167                       (buffer->file ? buffer->file->handler.file : -1),
2168                       (uint) cursor->buffer->buffer_no, cursor->buffer,
2169                       cursor->chaser, (ulong) cursor->buffer->size,
2170                       (ulong) (cursor->ptr - cursor->buffer->buffer)));
2171   translog_check_cursor(cursor);
2172   mysql_mutex_lock(&log_descriptor.dirty_buffer_mask_lock);
2173   log_descriptor.dirty_buffer_mask|= (1 << buffer->buffer_no);
2174   mysql_mutex_unlock(&log_descriptor.dirty_buffer_mask_lock);
2175 
2176   DBUG_VOID_RETURN;
2177 }
2178 
2179 
2180 /*
2181   @brief Switch to the next buffer in a chain.
2182 
2183   @param horizon         \ Pointers on current position in file and buffer
2184   @param cursor          /
2185   @param new_file        Also start new file
2186 
2187   @note
2188    - loghandler should be locked
2189    - after return new and old buffer still are locked
2190 
2191   @retval 0 OK
2192   @retval 1 Error
2193 */
2194 
translog_buffer_next(TRANSLOG_ADDRESS * horizon,struct st_buffer_cursor * cursor,my_bool new_file)2195 static my_bool translog_buffer_next(TRANSLOG_ADDRESS *horizon,
2196                                     struct st_buffer_cursor *cursor,
2197                                     my_bool new_file)
2198 {
2199   uint old_buffer_no= cursor->buffer_no;
2200   uint new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
2201   struct st_translog_buffer *new_buffer= log_descriptor.buffers + new_buffer_no;
2202   my_bool chasing= cursor->chaser;
2203   DBUG_ENTER("translog_buffer_next");
2204 
2205   DBUG_PRINT("info", ("horizon: " LSN_FMT "  chasing: %d",
2206                       LSN_IN_PARTS(log_descriptor.horizon), chasing));
2207 
2208   DBUG_ASSERT(cmp_translog_addr(log_descriptor.horizon, *horizon) >= 0);
2209 
2210   translog_finish_page(horizon, cursor);
2211 
2212   if (!chasing)
2213   {
2214     translog_buffer_lock(new_buffer);
2215 #ifndef DBUG_OFF
2216     {
2217       TRANSLOG_ADDRESS offset= new_buffer->offset;
2218       TRANSLOG_FILE *file= new_buffer->file;
2219       uint8 ver= new_buffer->ver;
2220       translog_lock_assert_owner();
2221 #endif
2222       translog_wait_for_buffer_free(new_buffer);
2223 #ifndef DBUG_OFF
2224       /* We keep the handler locked so nobody can start this new buffer */
2225       DBUG_ASSERT(offset == new_buffer->offset && new_buffer->file == NULL &&
2226                   (file == NULL ? ver : (uint8)(ver + 1)) == new_buffer->ver);
2227     }
2228 #endif
2229   }
2230   else
2231     DBUG_ASSERT(new_buffer->file != NULL);
2232 
2233   if (new_file)
2234   {
2235     /* move the horizon to the next file and its header page */
2236     (*horizon)+= LSN_ONE_FILE;
2237     (*horizon)= LSN_REPLACE_OFFSET(*horizon, TRANSLOG_PAGE_SIZE);
2238     if (!chasing && translog_create_new_file())
2239     {
2240       DBUG_RETURN(1);
2241     }
2242   }
2243 
2244   /* prepare next page */
2245   if (chasing)
2246     translog_cursor_init(cursor, new_buffer, new_buffer_no);
2247   else
2248   {
2249     translog_lock_assert_owner();
2250     translog_start_buffer(new_buffer, cursor, new_buffer_no);
2251     new_buffer->prev_buffer_offset=
2252       log_descriptor.buffers[old_buffer_no].offset;
2253     new_buffer->prev_last_lsn=
2254       BUFFER_MAX_LSN(log_descriptor.buffers + old_buffer_no);
2255   }
2256   log_descriptor.buffers[old_buffer_no].next_buffer_offset= new_buffer->offset;
2257   DBUG_PRINT("info", ("prev_last_lsn set to " LSN_FMT "  buffer:%p",
2258                       LSN_IN_PARTS(new_buffer->prev_last_lsn),
2259                       new_buffer));
2260   translog_new_page_header(horizon, cursor);
2261   DBUG_RETURN(0);
2262 }
2263 
2264 
2265 /*
2266   Sets max LSN sent to file, and address from which data is only in the buffer
2267 
2268   SYNOPSIS
2269     translog_set_sent_to_disk()
2270     buffer               buffer which we have sent to disk
2271 
2272   TODO: use atomic operations if possible (64bit architectures?)
2273 */
2274 
translog_set_sent_to_disk(struct st_translog_buffer * buffer)2275 static void translog_set_sent_to_disk(struct st_translog_buffer *buffer)
2276 {
2277   LSN lsn= buffer->last_lsn;
2278   TRANSLOG_ADDRESS in_buffers= buffer->next_buffer_offset;
2279 
2280   DBUG_ENTER("translog_set_sent_to_disk");
2281   mysql_mutex_lock(&log_descriptor.sent_to_disk_lock);
2282   DBUG_PRINT("enter", ("lsn: " LSN_FMT " in_buffers: " LSN_FMT "  "
2283                        "in_buffers_only: " LSN_FMT "  start: " LSN_FMT "  "
2284                        "sent_to_disk: " LSN_FMT,
2285                        LSN_IN_PARTS(lsn),
2286                        LSN_IN_PARTS(in_buffers),
2287                        LSN_IN_PARTS(log_descriptor.log_start),
2288                        LSN_IN_PARTS(log_descriptor.in_buffers_only),
2289                        LSN_IN_PARTS(log_descriptor.sent_to_disk)));
2290   /*
2291     We write sequentially (first part of following assert) but we rewrite
2292     the same page in case we started mysql and shut it down immediately
2293     (second part of the following assert)
2294   */
2295   DBUG_ASSERT(cmp_translog_addr(lsn, log_descriptor.sent_to_disk) >= 0 ||
2296               cmp_translog_addr(lsn, log_descriptor.log_start) < 0);
2297   log_descriptor.sent_to_disk= lsn;
2298   /* LSN_IMPOSSIBLE == 0 => it will work for very first time */
2299   if (cmp_translog_addr(in_buffers, log_descriptor.in_buffers_only) > 0)
2300   {
2301     log_descriptor.in_buffers_only= in_buffers;
2302     DBUG_PRINT("info", ("set new in_buffers_only"));
2303   }
2304   mysql_mutex_unlock(&log_descriptor.sent_to_disk_lock);
2305   DBUG_VOID_RETURN;
2306 }
2307 
2308 
2309 /*
2310   Sets address from which data is only in the buffer
2311 
2312   SYNOPSIS
2313     translog_set_only_in_buffers()
2314     lsn                  LSN to assign
2315     in_buffers           to assign to in_buffers_only
2316 */
2317 
translog_set_only_in_buffers(TRANSLOG_ADDRESS in_buffers)2318 static void translog_set_only_in_buffers(TRANSLOG_ADDRESS in_buffers)
2319 {
2320   DBUG_ENTER("translog_set_only_in_buffers");
2321   mysql_mutex_lock(&log_descriptor.sent_to_disk_lock);
2322   DBUG_PRINT("enter", ("in_buffers: " LSN_FMT "  "
2323                        "in_buffers_only: " LSN_FMT,
2324                        LSN_IN_PARTS(in_buffers),
2325                        LSN_IN_PARTS(log_descriptor.in_buffers_only)));
2326   /* LSN_IMPOSSIBLE == 0 => it will work for very first time */
2327   if (cmp_translog_addr(in_buffers, log_descriptor.in_buffers_only) > 0)
2328   {
2329     if (translog_status != TRANSLOG_OK)
2330       goto end;
2331     log_descriptor.in_buffers_only= in_buffers;
2332     DBUG_PRINT("info", ("set new in_buffers_only"));
2333   }
2334 end:
2335   mysql_mutex_unlock(&log_descriptor.sent_to_disk_lock);
2336   DBUG_VOID_RETURN;
2337 }
2338 
2339 
2340 /*
2341   Gets address from which data is only in the buffer
2342 
2343   SYNOPSIS
2344     translog_only_in_buffers()
2345 
2346   RETURN
2347     address from which data is only in the buffer
2348 */
2349 
translog_only_in_buffers()2350 static TRANSLOG_ADDRESS translog_only_in_buffers()
2351 {
2352   register TRANSLOG_ADDRESS addr;
2353   DBUG_ENTER("translog_only_in_buffers");
2354   mysql_mutex_lock(&log_descriptor.sent_to_disk_lock);
2355   addr= log_descriptor.in_buffers_only;
2356   mysql_mutex_unlock(&log_descriptor.sent_to_disk_lock);
2357   DBUG_RETURN(addr);
2358 }
2359 
2360 
2361 /*
2362   Get max LSN sent to file
2363 
2364   SYNOPSIS
2365     translog_get_sent_to_disk()
2366 
2367   RETURN
2368     max LSN send to file
2369 */
2370 
translog_get_sent_to_disk()2371 static LSN translog_get_sent_to_disk()
2372 {
2373   register LSN lsn;
2374   DBUG_ENTER("translog_get_sent_to_disk");
2375   mysql_mutex_lock(&log_descriptor.sent_to_disk_lock);
2376   lsn= log_descriptor.sent_to_disk;
2377   DBUG_PRINT("info", ("sent to disk up to " LSN_FMT, LSN_IN_PARTS(lsn)));
2378   mysql_mutex_unlock(&log_descriptor.sent_to_disk_lock);
2379   DBUG_RETURN(lsn);
2380 }
2381 
2382 
2383 /*
2384   Get first chunk address on the given page
2385 
2386   SYNOPSIS
2387     translog_get_first_chunk_offset()
2388     page                 The page where to find first chunk
2389 
2390   RETURN
2391     first chunk offset
2392 */
2393 
translog_get_first_chunk_offset(uchar * page)2394 static my_bool translog_get_first_chunk_offset(uchar *page)
2395 {
2396   DBUG_ENTER("translog_get_first_chunk_offset");
2397   DBUG_ASSERT(page[TRANSLOG_PAGE_FLAGS] < TRANSLOG_FLAGS_NUM);
2398   DBUG_RETURN(page_overhead[page[TRANSLOG_PAGE_FLAGS]]);
2399 }
2400 
2401 
2402 /*
2403   Write coded length of record
2404 
2405   SYNOPSIS
2406     translog_write_variable_record_1group_code_len
2407     dst                  Destination buffer pointer
2408     length               Length which should be coded
2409     header_len           Calculated total header length
2410 */
2411 
2412 static void
translog_write_variable_record_1group_code_len(uchar * dst,translog_size_t length,uint16 header_len)2413 translog_write_variable_record_1group_code_len(uchar *dst,
2414                                                translog_size_t length,
2415                                                uint16 header_len)
2416 {
2417   switch (header_len) {
2418   case 6:                                      /* (5 + 1) */
2419     DBUG_ASSERT(length <= 250);
2420     *dst= (uint8) length;
2421     return;
2422   case 8:                                      /* (5 + 3) */
2423     DBUG_ASSERT(length <= 0xFFFF);
2424     *dst= 251;
2425     int2store(dst + 1, length);
2426     return;
2427   case 9:                                      /* (5 + 4) */
2428     DBUG_ASSERT(length <= (ulong) 0xFFFFFF);
2429     *dst= 252;
2430     int3store(dst + 1, length);
2431     return;
2432   case 10:                                     /* (5 + 5) */
2433     *dst= 253;
2434     int4store(dst + 1, length);
2435     return;
2436   default:
2437     DBUG_ASSERT(0);
2438   }
2439   return;
2440 }
2441 
2442 
2443 /*
2444   Decode record data length and advance given pointer to the next field
2445 
2446   SYNOPSIS
2447     translog_variable_record_1group_decode_len()
2448     src                  The pointer to the pointer to the length beginning
2449 
2450   RETURN
2451     decoded length
2452 */
2453 
translog_variable_record_1group_decode_len(uchar ** src)2454 static translog_size_t translog_variable_record_1group_decode_len(uchar **src)
2455 {
2456   uint8 first= (uint8) (**src);
2457   switch (first) {
2458   case 251:
2459     (*src)+= 3;
2460     return (uint2korr((*src) - 2));
2461   case 252:
2462     (*src)+= 4;
2463     return (uint3korr((*src) - 3));
2464   case 253:
2465     (*src)+= 5;
2466     return (uint4korr((*src) - 4));
2467   case 254:
2468   case 255:
2469     DBUG_ASSERT(0);                             /* reserved for future use */
2470     return (0);
2471   default:
2472     (*src)++;
2473     return (first);
2474   }
2475 }
2476 
2477 
2478 /*
2479   Get total length of this chunk (not only body)
2480 
2481   SYNOPSIS
2482     translog_get_total_chunk_length()
2483     page                 The page where chunk placed
2484     offset               Offset of the chunk on this place
2485 
2486   RETURN
2487     total length of the chunk
2488 */
2489 
translog_get_total_chunk_length(uchar * page,uint16 offset)2490 static uint16 translog_get_total_chunk_length(uchar *page, uint16 offset)
2491 {
2492   DBUG_ENTER("translog_get_total_chunk_length");
2493   switch (page[offset] & TRANSLOG_CHUNK_TYPE) {
2494   case TRANSLOG_CHUNK_LSN:
2495   {
2496     /* 0 chunk referred as LSN (head or tail) */
2497     translog_size_t rec_len;
2498     uchar *start= page + offset;
2499     uchar *ptr= start + 1 + 2; /* chunk type and short trid */
2500     uint16 chunk_len, header_len, page_rest;
2501     DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
2502     rec_len= translog_variable_record_1group_decode_len(&ptr);
2503     chunk_len= uint2korr(ptr);
2504     header_len= (uint16) (ptr -start) + 2;
2505     DBUG_PRINT("info", ("rec len: %lu  chunk len: %u  header len: %u",
2506                         (ulong) rec_len, (uint) chunk_len, (uint) header_len));
2507     if (chunk_len)
2508     {
2509       DBUG_PRINT("info", ("chunk len: %u + %u = %u",
2510                           (uint) header_len, (uint) chunk_len,
2511                           (uint) (chunk_len + header_len)));
2512       DBUG_RETURN(chunk_len + header_len);
2513     }
2514     page_rest= TRANSLOG_PAGE_SIZE - offset;
2515     DBUG_PRINT("info", ("page_rest %u", (uint) page_rest));
2516     if (rec_len + header_len < page_rest)
2517       DBUG_RETURN(rec_len + header_len);
2518     DBUG_RETURN(page_rest);
2519   }
2520   case TRANSLOG_CHUNK_FIXED:
2521   {
2522     uchar *ptr;
2523     uint type= page[offset] & TRANSLOG_REC_TYPE;
2524     uint length;
2525     int i;
2526     /* 1 (pseudo)fixed record (also LSN) */
2527     DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED"));
2528     DBUG_ASSERT(log_record_type_descriptor[type].rclass ==
2529                 LOGRECTYPE_FIXEDLENGTH ||
2530                 log_record_type_descriptor[type].rclass ==
2531                 LOGRECTYPE_PSEUDOFIXEDLENGTH);
2532     if (log_record_type_descriptor[type].rclass == LOGRECTYPE_FIXEDLENGTH)
2533     {
2534       DBUG_PRINT("info",
2535                  ("Fixed length: %u",
2536                   (uint) (log_record_type_descriptor[type].fixed_length + 3)));
2537       DBUG_RETURN(log_record_type_descriptor[type].fixed_length + 3);
2538     }
2539 
2540     ptr= page + offset + 3;            /* first compressed LSN */
2541     length= log_record_type_descriptor[type].fixed_length + 3;
2542     for (i= 0; i < log_record_type_descriptor[type].compressed_LSN; i++)
2543     {
2544       /* first 2 bits is length - 2 */
2545       uint len= (((uint8) (*ptr)) >> 6) + 2;
2546       if (ptr[0] == 0 && ((uint8) ptr[1]) == 1)
2547         len+= LSN_STORE_SIZE; /* case of full LSN storing */
2548       ptr+= len;
2549       /* subtract saved bytes */
2550       length-= (LSN_STORE_SIZE - len);
2551     }
2552     DBUG_PRINT("info", ("Pseudo-fixed length: %u", length));
2553     DBUG_RETURN(length);
2554   }
2555   case TRANSLOG_CHUNK_NOHDR:
2556     /* 2 no header chunk (till page end) */
2557     DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR  length: %u",
2558                         (uint) (TRANSLOG_PAGE_SIZE - offset)));
2559     DBUG_RETURN(TRANSLOG_PAGE_SIZE - offset);
2560   case TRANSLOG_CHUNK_LNGTH:                   /* 3 chunk with chunk length */
2561     DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH"));
2562     DBUG_ASSERT(TRANSLOG_PAGE_SIZE - offset >= 3);
2563     DBUG_PRINT("info", ("length: %u", uint2korr(page + offset + 1) + 3));
2564     DBUG_RETURN(uint2korr(page + offset + 1) + 3);
2565   default:
2566     DBUG_ASSERT(0);
2567     DBUG_RETURN(0);
2568   }
2569 }
2570 
2571 /*
2572   @brief Waits previous buffer flush finish
2573 
2574   @param buffer          buffer for check
2575 
2576   @retval 0 previous buffer flushed and this thread have to flush this one
2577   @retval 1 previous buffer flushed and this buffer flushed by other thread too
2578 */
2579 
translog_prev_buffer_flush_wait(struct st_translog_buffer * buffer)2580 my_bool translog_prev_buffer_flush_wait(struct st_translog_buffer *buffer)
2581 {
2582   TRANSLOG_ADDRESS offset= buffer->offset;
2583   TRANSLOG_FILE *file= buffer->file;
2584   uint8 ver= buffer->ver;
2585   DBUG_ENTER("translog_prev_buffer_flush_wait");
2586   DBUG_PRINT("enter", ("buffer: %p  #%u  offset: " LSN_FMT "  "
2587                        "prev sent: " LSN_FMT " prev offset: " LSN_FMT,
2588                        buffer, (uint) buffer->buffer_no,
2589                        LSN_IN_PARTS(buffer->offset),
2590                        LSN_IN_PARTS(buffer->prev_sent_to_disk),
2591                        LSN_IN_PARTS(buffer->prev_buffer_offset)));
2592   translog_buffer_lock_assert_owner(buffer);
2593   if (buffer->prev_buffer_offset != buffer->prev_sent_to_disk)
2594   {
2595     do {
2596       mysql_cond_wait(&buffer->prev_sent_to_disk_cond, &buffer->mutex);
2597       if (buffer->file != file || buffer->offset != offset ||
2598           buffer->ver != ver)
2599         DBUG_RETURN(1); /* some the thread flushed the buffer already */
2600     } while(buffer->prev_buffer_offset != buffer->prev_sent_to_disk);
2601   }
2602   DBUG_RETURN(0);
2603 }
2604 
2605 
2606 /*
2607   Flush given buffer
2608 
2609   SYNOPSIS
2610     translog_buffer_flush()
2611     buffer               This buffer should be flushed
2612 
2613   RETURN
2614     0  OK
2615     1  Error
2616 */
2617 
translog_buffer_flush(struct st_translog_buffer * buffer)2618 static my_bool translog_buffer_flush(struct st_translog_buffer *buffer)
2619 {
2620   uint32 i, pg;
2621   TRANSLOG_ADDRESS offset= buffer->offset;
2622   TRANSLOG_FILE *file= buffer->file;
2623   uint8 ver= buffer->ver;
2624   uint skipped_data;
2625   DBUG_ENTER("translog_buffer_flush");
2626   DBUG_PRINT("enter",
2627              ("Buffer: #%u %p file: %d  offset: " LSN_FMT "  size: %lu",
2628               (uint) buffer->buffer_no, buffer,
2629               buffer->file->handler.file,
2630               LSN_IN_PARTS(buffer->offset),
2631               (ulong) buffer->size));
2632   translog_buffer_lock_assert_owner(buffer);
2633 
2634   if (buffer->file == NULL)
2635     DBUG_RETURN(0);
2636 
2637   translog_wait_for_writers(buffer);
2638 
2639   if (buffer->file != file || buffer->offset != offset || buffer->ver != ver)
2640     DBUG_RETURN(0); /* some the thread flushed the buffer already */
2641 
2642   if (buffer->is_closing_buffer)
2643   {
2644     /* some other flush in progress */
2645     translog_wait_for_closing(buffer);
2646     if (buffer->file != file || buffer->offset != offset || buffer->ver != ver)
2647       DBUG_RETURN(0); /* some the thread flushed the buffer already */
2648   }
2649 
2650   if (buffer->overlay && translog_prev_buffer_flush_wait(buffer))
2651     DBUG_RETURN(0); /* some the thread flushed the buffer already */
2652 
2653   /*
2654     Send page by page in the pagecache what we are going to write on the
2655     disk
2656   */
2657   file= buffer->file;
2658   skipped_data= buffer->skipped_data;
2659   DBUG_ASSERT(skipped_data < TRANSLOG_PAGE_SIZE);
2660   for (i= 0, pg= LSN_OFFSET(buffer->offset) / TRANSLOG_PAGE_SIZE;
2661        i < buffer->size;
2662        i+= TRANSLOG_PAGE_SIZE, pg++)
2663   {
2664 #ifndef DBUG_OFF
2665     TRANSLOG_ADDRESS addr= (buffer->offset + i);
2666 #endif
2667     DBUG_PRINT("info", ("send log form %lu till %lu  address: " LSN_FMT "  "
2668                         "page #: %lu  buffer size: %lu  buffer: %p",
2669                         (ulong) i, (ulong) (i + TRANSLOG_PAGE_SIZE),
2670                         LSN_IN_PARTS(addr), (ulong) pg, (ulong) buffer->size,
2671                         buffer));
2672     DBUG_ASSERT(log_descriptor.pagecache->block_size == TRANSLOG_PAGE_SIZE);
2673     DBUG_ASSERT(i + TRANSLOG_PAGE_SIZE <= buffer->size);
2674     if (translog_status != TRANSLOG_OK && translog_status != TRANSLOG_SHUTDOWN)
2675       DBUG_RETURN(1);
2676     if (pagecache_write_part(log_descriptor.pagecache,
2677                         &file->handler, pg, 3,
2678                         buffer->buffer + i,
2679                         PAGECACHE_PLAIN_PAGE,
2680                         PAGECACHE_LOCK_LEFT_UNLOCKED,
2681                         PAGECACHE_PIN_LEFT_UNPINNED,
2682                         PAGECACHE_WRITE_DONE, 0,
2683                         LSN_IMPOSSIBLE,
2684                         skipped_data,
2685                         TRANSLOG_PAGE_SIZE - skipped_data))
2686     {
2687       DBUG_PRINT("error",
2688                  ("Can't write page " LSN_FMT " to pagecache, error: %d",
2689                   buffer->file->number,
2690                   (uint)(LSN_OFFSET(buffer->offset)+ i),
2691                   my_errno));
2692       translog_stop_writing();
2693       DBUG_RETURN(1);
2694     }
2695     skipped_data= 0;
2696   }
2697   file->is_sync= 0;
2698   if (my_pwrite(file->handler.file, buffer->buffer + buffer->skipped_data,
2699                 buffer->size - buffer->skipped_data,
2700                 LSN_OFFSET(buffer->offset) + buffer->skipped_data,
2701                 log_write_flags))
2702   {
2703     DBUG_PRINT("error", ("Can't write buffer " LSN_FMT " size %lu "
2704                          "to the disk (%d)",
2705                          (uint) file->handler.file,
2706                          (uint) LSN_OFFSET(buffer->offset),
2707                          (ulong) buffer->size, errno));
2708     translog_stop_writing();
2709     DBUG_RETURN(1);
2710   }
2711   /*
2712     Dropping the flag in such way can make false alarm: signalling than the
2713     file in not sync when it is sync, but the situation is quite rare and
2714     protections with mutexes give much more overhead to the whole engine
2715   */
2716   file->is_sync= 0;
2717 
2718   if (LSN_OFFSET(buffer->last_lsn) != 0)    /* if buffer->last_lsn is set */
2719   {
2720     if (translog_prev_buffer_flush_wait(buffer))
2721       DBUG_RETURN(0); /* some the thread flushed the buffer already */
2722     translog_set_sent_to_disk(buffer);
2723   }
2724   else
2725     translog_set_only_in_buffers(buffer->next_buffer_offset);
2726 
2727   /* say to next buffer that we are finished */
2728   {
2729     struct st_translog_buffer *next_buffer=
2730       log_descriptor.buffers + ((buffer->buffer_no + 1) % TRANSLOG_BUFFERS_NO);
2731     if (likely(translog_status == TRANSLOG_OK)){
2732       translog_buffer_lock(next_buffer);
2733       next_buffer->prev_sent_to_disk= buffer->offset;
2734       translog_buffer_unlock(next_buffer);
2735       mysql_cond_broadcast(&next_buffer->prev_sent_to_disk_cond);
2736     }
2737     else
2738     {
2739       /*
2740         It is shutdown =>
2741           1) there is only one thread
2742           2) mutexes of other buffers can be destroyed => we can't use them
2743       */
2744       next_buffer->prev_sent_to_disk= buffer->offset;
2745     }
2746   }
2747   /* Free buffer */
2748   buffer->file= NULL;
2749   buffer->overlay= 0;
2750   buffer->ver++;
2751   mysql_mutex_lock(&log_descriptor.dirty_buffer_mask_lock);
2752   log_descriptor.dirty_buffer_mask&= ~(1 << buffer->buffer_no);
2753   mysql_mutex_unlock(&log_descriptor.dirty_buffer_mask_lock);
2754   mysql_cond_broadcast(&buffer->waiting_filling_buffer);
2755   DBUG_RETURN(0);
2756 }
2757 
2758 
2759 /*
2760   Recover page with sector protection (wipe out failed chunks)
2761 
2762   SYNOPSYS
2763     translog_recover_page_up_to_sector()
2764     page                 reference on the page
2765     offset               offset of failed sector
2766 
2767   RETURN
2768     0  OK
2769     1  Error
2770 */
2771 
translog_recover_page_up_to_sector(uchar * page,uint16 offset)2772 static my_bool translog_recover_page_up_to_sector(uchar *page, uint16 offset)
2773 {
2774   uint16 chunk_offset= translog_get_first_chunk_offset(page), valid_chunk_end;
2775   DBUG_ENTER("translog_recover_page_up_to_sector");
2776   DBUG_PRINT("enter", ("offset: %u  first chunk: %u",
2777                        (uint) offset, (uint) chunk_offset));
2778 
2779   while (chunk_offset < offset && page[chunk_offset] != TRANSLOG_FILLER)
2780   {
2781     uint16 chunk_length;
2782     if ((chunk_length=
2783          translog_get_total_chunk_length(page, chunk_offset)) == 0)
2784     {
2785       DBUG_PRINT("error", ("cant get chunk length (offset %u)",
2786                            (uint) chunk_offset));
2787       DBUG_RETURN(1);
2788     }
2789     DBUG_PRINT("info", ("chunk: offset: %u  length %u",
2790                         (uint) chunk_offset, (uint) chunk_length));
2791     if (((ulong) chunk_offset) + ((ulong) chunk_length) > TRANSLOG_PAGE_SIZE)
2792     {
2793       DBUG_PRINT("error", ("damaged chunk (offset %u) in trusted area",
2794                            (uint) chunk_offset));
2795       DBUG_RETURN(1);
2796     }
2797     chunk_offset+= chunk_length;
2798   }
2799 
2800   valid_chunk_end= chunk_offset;
2801   /* end of trusted area - sector parsing */
2802   while (page[chunk_offset] != TRANSLOG_FILLER)
2803   {
2804     uint16 chunk_length;
2805     if ((chunk_length=
2806          translog_get_total_chunk_length(page, chunk_offset)) == 0)
2807       break;
2808 
2809     DBUG_PRINT("info", ("chunk: offset: %u  length %u",
2810                         (uint) chunk_offset, (uint) chunk_length));
2811     if (((ulong) chunk_offset) + ((ulong) chunk_length) >
2812         (uint) (offset + DISK_DRIVE_SECTOR_SIZE))
2813       break;
2814 
2815     chunk_offset+= chunk_length;
2816     valid_chunk_end= chunk_offset;
2817   }
2818   DBUG_PRINT("info", ("valid chunk end offset: %u", (uint) valid_chunk_end));
2819 
2820   memset(page + valid_chunk_end, TRANSLOG_FILLER,
2821          TRANSLOG_PAGE_SIZE - valid_chunk_end);
2822 
2823   DBUG_RETURN(0);
2824 }
2825 
2826 
2827 /**
2828   @brief Checks and removes sector protection.
2829 
2830   @param page            reference on the page content.
2831   @param file            transaction log descriptor.
2832 
2833   @retvat 0 OK
2834   @retval 1 Error
2835 */
2836 
2837 static my_bool
translog_check_sector_protection(uchar * page,TRANSLOG_FILE * file)2838 translog_check_sector_protection(uchar *page, TRANSLOG_FILE *file)
2839 {
2840   uint i, offset;
2841   uchar *table= page + page_overhead[page[TRANSLOG_PAGE_FLAGS]] -
2842     TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
2843   uint8 current= table[0];
2844   DBUG_ENTER("translog_check_sector_protection");
2845 
2846   for (i= 1, offset= DISK_DRIVE_SECTOR_SIZE;
2847        i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
2848        i++, offset+= DISK_DRIVE_SECTOR_SIZE)
2849   {
2850     /*
2851       TODO: add chunk counting for "suspecting" sectors (difference is
2852       more than 1-2), if difference more then present chunks then it is
2853       the problem.
2854     */
2855     uint8 test= page[offset];
2856     DBUG_PRINT("info", ("sector: #%u  offset: %u  current: %lx "
2857                         "read: 0x%x  stored: 0x%x%x",
2858                         i, offset, (ulong) current,
2859                         (uint) uint2korr(page + offset), (uint) table[i],
2860                         (uint) table[i + 1]));
2861     /*
2862       3 is minimal possible record length. So we can have "distance"
2863       between 2 sectors value more then DISK_DRIVE_SECTOR_SIZE / 3
2864       only if it is old value, i.e. the sector was not written.
2865     */
2866     if (((test < current) &&
2867          ((uint)(0xFFL - current + test) > DISK_DRIVE_SECTOR_SIZE / 3)) ||
2868         ((test >= current) &&
2869          ((uint)(test - current) > DISK_DRIVE_SECTOR_SIZE / 3)))
2870     {
2871       if (translog_recover_page_up_to_sector(page, offset))
2872         DBUG_RETURN(1);
2873       file->was_recovered= 1;
2874       DBUG_RETURN(0);
2875     }
2876 
2877     /* Restore value on the page */
2878     page[offset]= table[i];
2879     current= test;
2880     DBUG_PRINT("info", ("sector: #%u  offset: %u  current: %lx  "
2881                         "read: 0x%x  stored: 0x%x",
2882                         i, offset, (ulong) current,
2883                         (uint) page[offset], (uint) table[i]));
2884   }
2885   DBUG_RETURN(0);
2886 }
2887 
2888 
2889 /**
2890   @brief Log page validator (read callback)
2891 
2892   @param page            The page data to check
2893   @param page_no         The page number (<offset>/<page length>)
2894   @param data_ptr        Read callback data pointer (pointer to TRANSLOG_FILE)
2895 
2896   @todo: add turning loghandler to read-only mode after merging with
2897   that patch.
2898 
2899   @retval 0 OK
2900   @retval 1 Error
2901 */
2902 
translog_page_validator(int res,PAGECACHE_IO_HOOK_ARGS * args)2903 static my_bool translog_page_validator(int res, PAGECACHE_IO_HOOK_ARGS *args)
2904 {
2905   uchar *page= args->page;
2906   pgcache_page_no_t page_no= args->pageno;
2907   uint this_page_page_overhead;
2908   uint flags;
2909   uchar *page_pos;
2910   TRANSLOG_FILE *data= (TRANSLOG_FILE *) args->data;
2911 #ifndef DBUG_OFF
2912   pgcache_page_no_t offset= page_no * TRANSLOG_PAGE_SIZE;
2913 #endif
2914   DBUG_ENTER("translog_page_validator");
2915 
2916   data->was_recovered= 0;
2917 
2918   if (res)
2919   {
2920     DBUG_RETURN(1);
2921   }
2922 
2923   if ((pgcache_page_no_t) uint3korr(page) != page_no ||
2924       (uint32) uint3korr(page + 3) != data->number)
2925   {
2926     DBUG_PRINT("error", ("Page " LSN_FMT ": "
2927                          "page address written in the page is incorrect: "
2928                          "File %lu instead of %lu or page %lu instead of %lu",
2929                          (uint)data->number, (uint)offset,
2930                          (ulong) uint3korr(page + 3), (ulong) data->number,
2931                          (ulong) uint3korr(page),
2932                          (ulong) page_no));
2933     DBUG_RETURN(1);
2934   }
2935   flags= (uint)(page[TRANSLOG_PAGE_FLAGS]);
2936   this_page_page_overhead= page_overhead[flags];
2937   if (flags & ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
2938                 TRANSLOG_RECORD_CRC))
2939   {
2940     DBUG_PRINT("error", ("Page " LSN_FMT ": "
2941                          "Garbage in the page flags field detected : %x",
2942                          (uint) data->number, (uint) offset,
2943                          (uint) flags));
2944     DBUG_RETURN(1);
2945   }
2946   page_pos= page + (3 + 3 + 1);
2947   if (flags & TRANSLOG_PAGE_CRC)
2948   {
2949     uint32 crc= translog_crc(page + this_page_page_overhead,
2950                              TRANSLOG_PAGE_SIZE -
2951                              this_page_page_overhead);
2952     if (crc != uint4korr(page_pos))
2953     {
2954       DBUG_PRINT("error", ("Page " LSN_FMT ": "
2955                            "CRC mismatch: calculated: %lx on the page %lx",
2956                            (uint) data->number, (uint) offset,
2957                            (ulong) crc, (ulong) uint4korr(page_pos)));
2958       DBUG_RETURN(1);
2959     }
2960     page_pos+= CRC_SIZE;                      /* Skip crc */
2961   }
2962   if (flags & TRANSLOG_SECTOR_PROTECTION &&
2963       translog_check_sector_protection(page, data))
2964   {
2965     DBUG_RETURN(1);
2966   }
2967   DBUG_RETURN(0);
2968 }
2969 
2970 
2971 /**
2972   @brief Locks the loghandler.
2973 */
2974 
translog_lock()2975 void translog_lock()
2976 {
2977   uint8 current_buffer;
2978   DBUG_ENTER("translog_lock");
2979 
2980   /*
2981      Locking the loghandler mean locking current buffer, but it can change
2982      during locking, so we should check it
2983   */
2984   for (;;)
2985   {
2986     /*
2987       log_descriptor.bc.buffer_no is only one byte so its reading is
2988       an atomic operation
2989     */
2990     current_buffer= log_descriptor.bc.buffer_no;
2991     translog_buffer_lock(log_descriptor.buffers + current_buffer);
2992     if (log_descriptor.bc.buffer_no == current_buffer)
2993       break;
2994     translog_buffer_unlock(log_descriptor.buffers + current_buffer);
2995   }
2996   DBUG_VOID_RETURN;
2997 }
2998 
2999 
3000 /*
3001   Unlock the loghandler
3002 
3003   SYNOPSIS
3004     translog_unlock()
3005 
3006   RETURN
3007     0  OK
3008     1  Error
3009 */
3010 
translog_unlock()3011 void translog_unlock()
3012 {
3013   translog_buffer_unlock(log_descriptor.bc.buffer);
3014 }
3015 
3016 
3017 /**
3018   @brief Get log page by file number and offset of the beginning of the page
3019 
3020   @param data            validator data, which contains the page address
3021   @param buffer          buffer for page placing
3022                          (might not be used in some cache implementations)
3023   @param direct_link     if it is not NULL then caller can accept direct
3024                          link to the page cache
3025 
3026   @retval NULL Error
3027   @retval #    pointer to the page cache which should be used to read this page
3028 */
3029 
translog_get_page(TRANSLOG_VALIDATOR_DATA * data,uchar * buffer,PAGECACHE_BLOCK_LINK ** direct_link)3030 static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer,
3031                                 PAGECACHE_BLOCK_LINK **direct_link)
3032 {
3033   TRANSLOG_ADDRESS addr= *(data->addr), in_buffers;
3034   uint32 file_no= LSN_FILE_NO(addr);
3035   TRANSLOG_FILE *file;
3036   DBUG_ENTER("translog_get_page");
3037   DBUG_PRINT("enter", ("File: %u  Offset: %u(0x%x)",
3038                        file_no,
3039                        (uint) LSN_OFFSET(addr),
3040                        (uint) LSN_OFFSET(addr)));
3041 
3042   /* it is really page address */
3043   DBUG_ASSERT(LSN_OFFSET(addr) % TRANSLOG_PAGE_SIZE == 0);
3044   if (direct_link)
3045     *direct_link= NULL;
3046 
3047 restart:
3048 
3049   in_buffers= translog_only_in_buffers();
3050   DBUG_PRINT("info", ("in_buffers: " LSN_FMT,
3051                       LSN_IN_PARTS(in_buffers)));
3052   if (in_buffers != LSN_IMPOSSIBLE &&
3053       cmp_translog_addr(addr, in_buffers) >= 0)
3054   {
3055     translog_lock();
3056     DBUG_ASSERT(cmp_translog_addr(addr, log_descriptor.horizon) < 0);
3057     /* recheck with locked loghandler */
3058     in_buffers= translog_only_in_buffers();
3059     if (cmp_translog_addr(addr, in_buffers) >= 0)
3060     {
3061       uint16 buffer_no= log_descriptor.bc.buffer_no;
3062 #ifdef DBUG_ASSERT_EXISTS
3063       uint16 buffer_start= buffer_no;
3064 #endif
3065       struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer;
3066       struct st_translog_buffer *curr_buffer= log_descriptor.bc.buffer;
3067       for (;;)
3068       {
3069         /*
3070           if the page is in the buffer and it is the last version of the
3071           page (in case of division the page by buffer flush)
3072         */
3073         if (curr_buffer->file != NULL &&
3074             cmp_translog_addr(addr, curr_buffer->offset) >= 0 &&
3075             cmp_translog_addr(addr,
3076                               (curr_buffer->next_buffer_offset ?
3077                                curr_buffer->next_buffer_offset:
3078                                curr_buffer->offset + curr_buffer->size)) < 0)
3079         {
3080           TRANSLOG_ADDRESS offset= curr_buffer->offset;
3081           TRANSLOG_FILE *fl= curr_buffer->file;
3082           uchar *from, *table= NULL;
3083           int is_last_unfinished_page;
3084           uint last_protected_sector= 0;
3085           uint skipped_data= curr_buffer->skipped_data;
3086           TRANSLOG_FILE file_copy;
3087           uint8 ver= curr_buffer->ver;
3088           translog_wait_for_writers(curr_buffer);
3089           if (offset != curr_buffer->offset || fl != curr_buffer->file ||
3090               ver != curr_buffer->ver)
3091           {
3092             DBUG_ASSERT(buffer_unlock == curr_buffer);
3093             translog_buffer_unlock(buffer_unlock);
3094             goto restart;
3095           }
3096           DBUG_ASSERT(LSN_FILE_NO(addr) ==  LSN_FILE_NO(curr_buffer->offset));
3097           from= curr_buffer->buffer + (addr - curr_buffer->offset);
3098           if (skipped_data && addr == curr_buffer->offset)
3099           {
3100             /*
3101               We read page part of which is not present in buffer,
3102               so we should read absent part from file (page cache actually)
3103             */
3104             file= get_logfile_by_number(file_no);
3105             DBUG_ASSERT(file != NULL);
3106             /*
3107               it's ok to not lock the page because:
3108                 - The log handler has it's own page cache.
3109                 - There is only one thread that can access the log
3110                 cache at a time
3111             */
3112             if (!(buffer= pagecache_read(log_descriptor.pagecache,
3113                                          &file->handler,
3114                                          LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE,
3115                                          3, buffer,
3116                                          PAGECACHE_PLAIN_PAGE,
3117                                          PAGECACHE_LOCK_LEFT_UNLOCKED,
3118                                          NULL)))
3119               DBUG_RETURN(NULL);
3120           }
3121           else
3122             skipped_data= 0;  /* Read after skipped in buffer data */
3123           /*
3124             Now we have correct data in buffer up to 'skipped_data'. The
3125             following memcpy() will move the data from the internal buffer
3126             that was not yet on disk.
3127           */
3128           memcpy(buffer + skipped_data, from + skipped_data,
3129                  TRANSLOG_PAGE_SIZE - skipped_data);
3130           /*
3131             We can use copy then in translog_page_validator() because it
3132             do not put it permanently somewhere.
3133             We have to use copy because after releasing log lock we can't
3134             guaranty that the file still be present (in real life it will be
3135             present but theoretically possible that it will be released
3136             already from last files cache);
3137           */
3138           file_copy= *(curr_buffer->file);
3139           file_copy.handler.callback_data= (uchar*) &file_copy;
3140           is_last_unfinished_page= ((log_descriptor.bc.buffer ==
3141                                      curr_buffer) &&
3142                                     (log_descriptor.bc.ptr >= from) &&
3143                                     (log_descriptor.bc.ptr <
3144                                      from + TRANSLOG_PAGE_SIZE));
3145           if (is_last_unfinished_page &&
3146               (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION))
3147           {
3148             last_protected_sector= ((log_descriptor.bc.previous_offset - 1) /
3149                                     DISK_DRIVE_SECTOR_SIZE);
3150             table= buffer + log_descriptor.page_overhead -
3151               TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
3152           }
3153 
3154           DBUG_ASSERT(buffer_unlock == curr_buffer);
3155           translog_buffer_unlock(buffer_unlock);
3156           if (is_last_unfinished_page)
3157           {
3158             uint i;
3159             /*
3160               This is last unfinished page => we should not check CRC and
3161               remove only that protection which already installed (no need
3162               to check it)
3163 
3164               We do not check the flag of sector protection, because if
3165               (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION) is
3166               not set then last_protected_sector will be 0 so following loop
3167               will be never executed
3168             */
3169             DBUG_PRINT("info", ("This is last unfinished page, "
3170                                 "last protected sector %u",
3171                                 last_protected_sector));
3172             for (i= 1; i <= last_protected_sector; i++)
3173             {
3174               uint offset= i * DISK_DRIVE_SECTOR_SIZE;
3175               DBUG_PRINT("info", ("Sector %u: 0x%02x <- 0x%02x",
3176                                   i, buffer[offset],
3177                                   table[i]));
3178               buffer[offset]= table[i];
3179             }
3180           }
3181           else
3182           {
3183             /*
3184               This IF should be true because we use in-memory data which
3185               supposed to be correct.
3186             */
3187             PAGECACHE_IO_HOOK_ARGS args;
3188             args.page= buffer;
3189             args.pageno= LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE;
3190             args.data= (uchar*) &file_copy;
3191             if (translog_page_validator(0, &args))
3192             {
3193               DBUG_ASSERT(0);
3194               buffer= NULL;
3195             }
3196           }
3197           DBUG_RETURN(buffer);
3198         }
3199         buffer_no= (buffer_no + 1) % TRANSLOG_BUFFERS_NO;
3200         curr_buffer= log_descriptor.buffers + buffer_no;
3201         translog_buffer_lock(curr_buffer);
3202         translog_buffer_unlock(buffer_unlock);
3203         buffer_unlock= curr_buffer;
3204         /* we can't make a full circle */
3205         DBUG_ASSERT(buffer_start != buffer_no);
3206       }
3207     }
3208     translog_unlock();
3209   }
3210   file= get_logfile_by_number(file_no);
3211   DBUG_ASSERT(file != NULL);
3212   buffer= pagecache_read(log_descriptor.pagecache, &file->handler,
3213                          LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE,
3214                          3, (direct_link ? NULL : buffer),
3215                          PAGECACHE_PLAIN_PAGE,
3216                          (direct_link ?
3217                           PAGECACHE_LOCK_READ :
3218                           PAGECACHE_LOCK_LEFT_UNLOCKED),
3219                          direct_link);
3220   DBUG_PRINT("info", ("Direct link is assigned to : %p * %p",
3221                       direct_link,
3222                       (direct_link ? *direct_link : NULL)));
3223   data->was_recovered= file->was_recovered;
3224   DBUG_RETURN(buffer);
3225 }
3226 
3227 
3228 /**
3229   @brief free direct log page link
3230 
3231   @param direct_link the direct log page link to be freed
3232 
3233 */
3234 
translog_free_link(PAGECACHE_BLOCK_LINK * direct_link)3235 static void translog_free_link(PAGECACHE_BLOCK_LINK *direct_link)
3236 {
3237   DBUG_ENTER("translog_free_link");
3238   DBUG_PRINT("info", ("Direct link: %p",
3239                       direct_link));
3240   if (direct_link)
3241     pagecache_unlock_by_link(log_descriptor.pagecache, direct_link,
3242                              PAGECACHE_LOCK_READ_UNLOCK, PAGECACHE_UNPIN,
3243                              LSN_IMPOSSIBLE, LSN_IMPOSSIBLE, 0, FALSE);
3244   DBUG_VOID_RETURN;
3245 }
3246 
3247 
3248 /**
3249   @brief Finds last full page of the given log file.
3250 
3251   @param addr            address structure to fill with data, which contain
3252                          file number of the log file
3253   @param last_page_ok    Result of the check whether last page OK.
3254                          (for now only we check only that file length
3255                          divisible on page length).
3256   @param no_errors       suppress messages about non-critical errors
3257 
3258   @retval 0 OK
3259   @retval 1 Error
3260 */
3261 
translog_get_last_page_addr(TRANSLOG_ADDRESS * addr,my_bool * last_page_ok,my_bool no_errors)3262 static my_bool translog_get_last_page_addr(TRANSLOG_ADDRESS *addr,
3263                                            my_bool *last_page_ok,
3264                                            my_bool no_errors)
3265 {
3266   char path[FN_REFLEN];
3267   uint32 rec_offset;
3268   my_off_t file_size;
3269   uint32 file_no= LSN_FILE_NO(*addr);
3270   TRANSLOG_FILE *file;
3271 #ifndef DBUG_OFF
3272   char buff[21];
3273 #endif
3274   DBUG_ENTER("translog_get_last_page_addr");
3275 
3276   if (likely((file= get_logfile_by_number(file_no)) != NULL))
3277   {
3278     /*
3279       This function used only during initialization of loghandler or in
3280       scanner (which mean we need read that part of the log), so the
3281       requested log file have to be opened and can't be freed after
3282       returning pointer on it (file_size).
3283     */
3284     file_size= mysql_file_seek(file->handler.file, 0, SEEK_END, MYF(0));
3285   }
3286   else
3287   {
3288     /*
3289       This branch is used only during very early initialization
3290       when files are not opened.
3291     */
3292     File fd;
3293     if ((fd= mysql_file_open(key_file_translog,
3294                              translog_filename_by_fileno(file_no, path),
3295                              O_RDONLY | O_CLOEXEC, (no_errors ? MYF(0) : MYF(MY_WME)))) < 0)
3296     {
3297       my_errno= errno;
3298       DBUG_PRINT("error", ("Error %d during opening file #%d",
3299                            errno, file_no));
3300       DBUG_RETURN(1);
3301     }
3302     file_size= mysql_file_seek(fd, 0, SEEK_END, MYF(0));
3303     mysql_file_close(fd, MYF(0));
3304   }
3305   DBUG_PRINT("info", ("File size: %s", llstr(file_size, buff)));
3306   if (file_size == MY_FILEPOS_ERROR)
3307     DBUG_RETURN(1);
3308   DBUG_ASSERT(file_size < 0xffffffffULL);
3309   if (((uint32)file_size) > TRANSLOG_PAGE_SIZE)
3310   {
3311     rec_offset= (((((uint32)file_size) / TRANSLOG_PAGE_SIZE) - 1) *
3312                        TRANSLOG_PAGE_SIZE);
3313     *last_page_ok= (((uint32)file_size) == rec_offset + TRANSLOG_PAGE_SIZE);
3314   }
3315   else
3316   {
3317     *last_page_ok= 0;
3318     rec_offset= 0;
3319   }
3320   *addr= MAKE_LSN(file_no, rec_offset);
3321   DBUG_PRINT("info", ("Last page: 0x%lx  ok: %d", (ulong) rec_offset,
3322                       *last_page_ok));
3323   DBUG_RETURN(0);
3324 }
3325 
3326 
3327 /**
3328   @brief Get number bytes for record length storing
3329 
3330   @param length          Record length which will be encoded
3331 
3332   @return 1,3,4,5 - number of bytes to store given length
3333 */
3334 
translog_variable_record_length_bytes(translog_size_t length)3335 static uint translog_variable_record_length_bytes(translog_size_t length)
3336 {
3337   if (length < 250)
3338     return 1;
3339   if (length < 0xFFFF)
3340     return 3;
3341   if (length < (ulong) 0xFFFFFF)
3342     return 4;
3343   return 5;
3344 }
3345 
3346 
3347 /**
3348   @brief Gets header of this chunk.
3349 
3350   @param chunk           The pointer to the chunk beginning
3351 
3352   @retval # total length of the chunk
3353   @retval 0 Error
3354 */
3355 
translog_get_chunk_header_length(uchar * chunk)3356 static uint16 translog_get_chunk_header_length(uchar *chunk)
3357 {
3358   DBUG_ENTER("translog_get_chunk_header_length");
3359   switch (*chunk & TRANSLOG_CHUNK_TYPE) {
3360   case TRANSLOG_CHUNK_LSN:
3361   {
3362     /* 0 chunk referred as LSN (head or tail) */
3363     translog_size_t rec_len __attribute__((unused));
3364     uchar *start= chunk;
3365     uchar *ptr= start + 1 + 2;
3366     uint16 chunk_len, header_len;
3367     DBUG_PRINT("info", ("TRANSLOG_CHUNK_LSN"));
3368     rec_len= translog_variable_record_1group_decode_len(&ptr);
3369     chunk_len= uint2korr(ptr);
3370     header_len= (uint16) (ptr - start) +2;
3371     DBUG_PRINT("info", ("rec len: %lu  chunk len: %u  header len: %u",
3372                         (ulong) rec_len, (uint) chunk_len, (uint) header_len));
3373     if (chunk_len)
3374     {
3375       /* TODO: fine header end */
3376       /*
3377         The last chunk of multi-group record can be base for it header
3378         calculation (we skip to the first group to read the header) so if we
3379         stuck here something is wrong.
3380       */
3381       DBUG_ASSERT(0);
3382       DBUG_RETURN(0);                               /* Keep compiler happy */
3383     }
3384     DBUG_RETURN(header_len);
3385   }
3386   case TRANSLOG_CHUNK_FIXED:
3387   {
3388     /* 1 (pseudo)fixed record (also LSN) */
3389     DBUG_PRINT("info", ("TRANSLOG_CHUNK_FIXED = 3"));
3390     DBUG_RETURN(3);
3391   }
3392   case TRANSLOG_CHUNK_NOHDR:
3393     /* 2 no header chunk (till page end) */
3394     DBUG_PRINT("info", ("TRANSLOG_CHUNK_NOHDR = 1"));
3395     DBUG_RETURN(1);
3396     break;
3397   case TRANSLOG_CHUNK_LNGTH:
3398     /* 3 chunk with chunk length */
3399     DBUG_PRINT("info", ("TRANSLOG_CHUNK_LNGTH = 3"));
3400     DBUG_RETURN(3);
3401     break;
3402   }
3403   DBUG_ASSERT(0);
3404   DBUG_RETURN(0);                               /* Keep compiler happy */
3405 }
3406 
3407 
3408 /**
3409   @brief Truncate the log to the given address. Used during the startup if the
3410          end of log if corrupted.
3411 
3412   @param addr            new horizon
3413 
3414   @retval 0 OK
3415   @retval 1 Error
3416 */
3417 
translog_truncate_log(TRANSLOG_ADDRESS addr)3418 static my_bool translog_truncate_log(TRANSLOG_ADDRESS addr)
3419 {
3420   uchar *page;
3421   TRANSLOG_ADDRESS current_page;
3422   uint32 next_page_offset, page_rest;
3423   uint32 i;
3424   File fd;
3425   int rc;
3426   TRANSLOG_VALIDATOR_DATA data;
3427   char path[FN_REFLEN];
3428   uchar page_buff[TRANSLOG_PAGE_SIZE];
3429   DBUG_ENTER("translog_truncate_log");
3430   /* TODO: write warning to the client */
3431   DBUG_PRINT("warning", ("removing all records from " LSN_FMT " "
3432                          "till " LSN_FMT,
3433                          LSN_IN_PARTS(addr),
3434                          LSN_IN_PARTS(log_descriptor.horizon)));
3435   DBUG_ASSERT(cmp_translog_addr(addr, log_descriptor.horizon) < 0);
3436   /* remove files between the address and horizon */
3437   for (i= LSN_FILE_NO(addr) + 1; i <= LSN_FILE_NO(log_descriptor.horizon); i++)
3438     if (mysql_file_delete(key_file_translog,
3439                           translog_filename_by_fileno(i, path),  MYF(MY_WME)))
3440     {
3441       translog_unlock();
3442       DBUG_RETURN(1);
3443     }
3444 
3445   /* truncate the last file up to the last page */
3446   next_page_offset= LSN_OFFSET(addr);
3447   next_page_offset= (next_page_offset -
3448                      ((next_page_offset - 1) % TRANSLOG_PAGE_SIZE + 1) +
3449                      TRANSLOG_PAGE_SIZE);
3450   page_rest= next_page_offset - LSN_OFFSET(addr);
3451   memset(page_buff, TRANSLOG_FILLER, page_rest);
3452   rc= ((fd= open_logfile_by_number_no_cache(LSN_FILE_NO(addr))) < 0 ||
3453        ((mysql_file_chsize(fd, next_page_offset, TRANSLOG_FILLER, MYF(MY_WME)) ||
3454          (page_rest && my_pwrite(fd, page_buff, page_rest, LSN_OFFSET(addr),
3455                                  log_write_flags)) ||
3456          mysql_file_sync(fd, MYF(MY_WME)))));
3457   translog_syncs++;
3458   rc|= (fd > 0 && mysql_file_close(fd, MYF(MY_WME)));
3459   if (sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS)
3460   {
3461     rc|= sync_dir(log_descriptor.directory_fd, MYF(MY_WME | MY_IGNORE_BADFD));
3462     translog_syncs++;
3463   }
3464   if (rc)
3465     DBUG_RETURN(1);
3466 
3467   /* fix the horizon */
3468   log_descriptor.horizon= addr;
3469   /* fix the buffer data */
3470   current_page= MAKE_LSN(LSN_FILE_NO(addr), (next_page_offset -
3471                                              TRANSLOG_PAGE_SIZE));
3472   data.addr= &current_page;
3473   if ((page= translog_get_page(&data, log_descriptor.buffers->buffer, NULL)) ==
3474       NULL)
3475     DBUG_RETURN(1);
3476   if (page != log_descriptor.buffers->buffer)
3477     memcpy(log_descriptor.buffers->buffer, page, TRANSLOG_PAGE_SIZE);
3478   log_descriptor.bc.buffer->offset= current_page;
3479   log_descriptor.bc.buffer->size= LSN_OFFSET(addr) - LSN_OFFSET(current_page);
3480   log_descriptor.bc.ptr=
3481     log_descriptor.buffers->buffer + log_descriptor.bc.buffer->size;
3482   log_descriptor.bc.current_page_fill= log_descriptor.bc.buffer->size;
3483   DBUG_RETURN(0);
3484 }
3485 
3486 
3487 /**
3488   Applies function 'callback' to all files (in a directory) which
3489   name looks like a log's name (aria_log.[0-9]{7}).
3490   If 'callback' returns TRUE this interrupts the walk and returns
3491   TRUE. Otherwise FALSE is returned after processing all log files.
3492   It cannot just use log_descriptor.directory because that may not yet have
3493   been initialized.
3494 
3495   @param  directory        directory to scan
3496   @param  callback         function to apply; is passed directory and base
3497                            name of found file
3498 */
3499 
translog_walk_filenames(const char * directory,my_bool (* callback)(const char *,const char *))3500 my_bool translog_walk_filenames(const char *directory,
3501                                 my_bool (*callback)(const char *,
3502                                                     const char *))
3503 {
3504   MY_DIR *dirp;
3505   uint i;
3506   my_bool rc= FALSE;
3507 
3508   /* Finds and removes transaction log files */
3509   if (!(dirp = my_dir(directory, MYF(MY_DONT_SORT))))
3510     return FALSE;
3511 
3512   for (i= 0; i < dirp->number_of_files; i++)
3513   {
3514     char *file= dirp->dir_entry[i].name;
3515     if (strncmp(file, "aria_log.", 10) == 0 &&
3516         file[10] >= '0' && file[10] <= '9' &&
3517         file[11] >= '0' && file[11] <= '9' &&
3518         file[12] >= '0' && file[12] <= '9' &&
3519         file[13] >= '0' && file[13] <= '9' &&
3520         file[14] >= '0' && file[14] <= '9' &&
3521         file[15] >= '0' && file[15] <= '9' &&
3522         file[16] >= '0' && file[16] <= '9' &&
3523         file[17] >= '0' && file[17] <= '9' &&
3524         file[18] == '\0' && (*callback)(directory, file))
3525     {
3526       rc= TRUE;
3527       break;
3528     }
3529   }
3530   my_dirend(dirp);
3531   return rc;
3532 }
3533 
3534 
3535 /**
3536   @brief Fills table of dependence length of page header from page flags
3537 */
3538 
translog_fill_overhead_table()3539 void translog_fill_overhead_table()
3540 {
3541   uint i;
3542   for (i= 0; i < TRANSLOG_FLAGS_NUM; i++)
3543   {
3544      page_overhead[i]= 7;
3545      if (i & TRANSLOG_PAGE_CRC)
3546        page_overhead[i]+= CRC_SIZE;
3547      if (i & TRANSLOG_SECTOR_PROTECTION)
3548        page_overhead[i]+= TRANSLOG_PAGE_SIZE /
3549                            DISK_DRIVE_SECTOR_SIZE;
3550   }
3551 }
3552 
3553 
3554 /**
3555   Callback to find first log in directory.
3556 */
3557 
translog_callback_search_first(const char * directory,const char * filename)3558 static my_bool translog_callback_search_first(const char *directory
3559                                               __attribute__((unused)),
3560                                               const char *filename
3561                                               __attribute__((unused)))
3562 {
3563   return TRUE;
3564 }
3565 
3566 
3567 /**
3568   @brief Checks that chunk is LSN one
3569 
3570   @param type            type of the chunk
3571 
3572   @retval 1 the chunk is LNS
3573   @retval 0 the chunk is not LSN
3574 */
3575 
translog_is_LSN_chunk(uchar type)3576 static my_bool translog_is_LSN_chunk(uchar type)
3577 {
3578   DBUG_ENTER("translog_is_LSN_chunk");
3579   DBUG_PRINT("info", ("byte: %x  chunk type: %u  record type: %u",
3580                       type, type >> 6, type & TRANSLOG_REC_TYPE));
3581   DBUG_RETURN(((type & TRANSLOG_CHUNK_TYPE) == TRANSLOG_CHUNK_FIXED) ||
3582               (((type & TRANSLOG_CHUNK_TYPE) == TRANSLOG_CHUNK_LSN)  &&
3583                ((type & TRANSLOG_REC_TYPE)) != TRANSLOG_CHUNK_0_CONT));
3584 }
3585 
3586 
3587 /**
3588   @brief Initialize transaction log
3589 
3590   @param directory       Directory where log files are put
3591   @param log_file_max_size max size of one log size (for new logs creation)
3592   @param server_version  version of MySQL server (MYSQL_VERSION_ID)
3593   @param server_id       server ID (replication & Co)
3594   @param pagecache       Page cache for the log reads
3595   @param flags           flags (TRANSLOG_PAGE_CRC, TRANSLOG_SECTOR_PROTECTION
3596                            TRANSLOG_RECORD_CRC)
3597   @param read_only       Put transaction log in read-only mode
3598   @param init_table_func function to initialize record descriptors table
3599   @param no_errors       suppress messages about non-critical errors
3600 
3601   @todo
3602     Free used resources in case of error.
3603 
3604   @retval 0 OK
3605   @retval 1 Error
3606 */
3607 
translog_init_with_table(const char * directory,uint32 log_file_max_size,uint32 server_version,uint32 server_id,PAGECACHE * pagecache,uint flags,my_bool readonly,void (* init_table_func)(),my_bool no_errors)3608 my_bool translog_init_with_table(const char *directory,
3609                                  uint32 log_file_max_size,
3610                                  uint32 server_version,
3611                                  uint32 server_id, PAGECACHE *pagecache,
3612                                  uint flags, my_bool readonly,
3613                                  void (*init_table_func)(),
3614                                  my_bool no_errors)
3615 {
3616   int i;
3617   int old_log_was_recovered= 0, logs_found= 0;
3618   uint old_flags= flags;
3619   uint32 start_file_num= 1;
3620   TRANSLOG_ADDRESS UNINIT_VAR(sure_page), last_page, last_valid_page,
3621     checkpoint_lsn;
3622   my_bool version_changed= 0;
3623   DBUG_ENTER("translog_init_with_table");
3624 
3625   translog_syncs= 0;
3626   flush_start= 0;
3627   id_to_share= NULL;
3628   log_purge_disabled= 0;
3629 
3630   log_descriptor.directory_fd= -1;
3631   log_descriptor.is_everything_flushed= 1;
3632   log_descriptor.flush_in_progress= 0;
3633   log_descriptor.flush_no= 0;
3634   log_descriptor.next_pass_max_lsn= LSN_IMPOSSIBLE;
3635 
3636   /* Normally in Aria this this calls translog_table_init() */
3637   (*init_table_func)();
3638   compile_time_assert(sizeof(log_descriptor.dirty_buffer_mask) * 8 >=
3639                       TRANSLOG_BUFFERS_NO);
3640   log_descriptor.dirty_buffer_mask= 0;
3641   if (readonly)
3642     log_descriptor.open_flags= O_BINARY | O_RDONLY;
3643   else
3644     log_descriptor.open_flags= O_BINARY | O_RDWR;
3645   if (mysql_mutex_init(key_TRANSLOG_BUFFER_mutex,
3646                        &log_descriptor.sent_to_disk_lock, MY_MUTEX_INIT_FAST) ||
3647       mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_file_header_lock,
3648                        &log_descriptor.file_header_lock, MY_MUTEX_INIT_FAST) ||
3649       mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_unfinished_files_lock,
3650                        &log_descriptor.unfinished_files_lock, MY_MUTEX_INIT_FAST) ||
3651       mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_purger_lock,
3652                        &log_descriptor.purger_lock, MY_MUTEX_INIT_FAST) ||
3653       mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_log_flush_lock,
3654                        &log_descriptor.log_flush_lock, MY_MUTEX_INIT_FAST) ||
3655       mysql_mutex_init(key_TRANSLOG_DESCRIPTOR_dirty_buffer_mask_lock,
3656                        &log_descriptor.dirty_buffer_mask_lock, MY_MUTEX_INIT_FAST) ||
3657       mysql_cond_init(key_TRANSLOG_DESCRIPTOR_log_flush_cond,
3658                       &log_descriptor.log_flush_cond, 0) ||
3659       mysql_cond_init(key_TRANSLOG_DESCRIPTOR_new_goal_cond,
3660                       &log_descriptor.new_goal_cond, 0) ||
3661       mysql_rwlock_init(key_TRANSLOG_DESCRIPTOR_open_files_lock,
3662                         &log_descriptor.open_files_lock) ||
3663       my_init_dynamic_array(PSI_INSTRUMENT_ME, &log_descriptor.open_files,
3664                             sizeof(TRANSLOG_FILE*), 10, 10, MYF(0)) ||
3665       my_init_dynamic_array(PSI_INSTRUMENT_ME, &log_descriptor.unfinished_files,
3666                             sizeof(struct st_file_counter),
3667                             10, 10, MYF(0)))
3668     goto err;
3669   log_descriptor.min_need_file= 0;
3670   log_descriptor.min_file_number= 0;
3671   log_descriptor.last_lsn_checked= LSN_IMPOSSIBLE;
3672 
3673   /* Directory to store files */
3674   unpack_dirname(log_descriptor.directory, directory);
3675 #ifndef __WIN__
3676   if ((log_descriptor.directory_fd= my_open(log_descriptor.directory,
3677                                             O_RDONLY, MYF(MY_WME))) < 0)
3678   {
3679     my_errno= errno;
3680     DBUG_PRINT("error", ("Error %d during opening directory '%s'",
3681                          errno, log_descriptor.directory));
3682     goto err;
3683   }
3684 #endif
3685   log_descriptor.in_buffers_only= LSN_IMPOSSIBLE;
3686   DBUG_ASSERT(log_file_max_size % TRANSLOG_PAGE_SIZE == 0 &&
3687               log_file_max_size >= TRANSLOG_MIN_FILE_SIZE);
3688   /* max size of one log size (for new logs creation) */
3689   log_file_size= log_descriptor.log_file_max_size=
3690     log_file_max_size;
3691   /* server version */
3692   log_descriptor.server_version= server_version;
3693   /* server ID */
3694   log_descriptor.server_id= server_id;
3695   /* Page cache for the log reads */
3696   log_descriptor.pagecache= pagecache;
3697   /* Flags */
3698   DBUG_ASSERT((flags &
3699                ~(TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION |
3700                  TRANSLOG_RECORD_CRC)) == 0);
3701   log_descriptor.flags= flags;
3702   translog_fill_overhead_table();
3703   log_descriptor.page_overhead= page_overhead[flags];
3704   log_descriptor.page_capacity_chunk_2=
3705     TRANSLOG_PAGE_SIZE - log_descriptor.page_overhead - 1;
3706   compile_time_assert(TRANSLOG_WRITE_BUFFER % TRANSLOG_PAGE_SIZE == 0);
3707   log_descriptor.buffer_capacity_chunk_2=
3708     (TRANSLOG_WRITE_BUFFER / TRANSLOG_PAGE_SIZE) *
3709     log_descriptor.page_capacity_chunk_2;
3710   log_descriptor.half_buffer_capacity_chunk_2=
3711     log_descriptor.buffer_capacity_chunk_2 / 2;
3712   DBUG_PRINT("info",
3713              ("Overhead: %u  pc2: %u  bc2: %u,  bc2/2: %u",
3714               log_descriptor.page_overhead,
3715               log_descriptor.page_capacity_chunk_2,
3716               log_descriptor.buffer_capacity_chunk_2,
3717               log_descriptor.half_buffer_capacity_chunk_2));
3718 
3719   /* Just to init it somehow (hack for bootstrap)*/
3720   {
3721     TRANSLOG_FILE *file= 0;
3722     log_descriptor.min_file = log_descriptor.max_file= 1;
3723     insert_dynamic(&log_descriptor.open_files, (uchar *)&file);
3724     translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
3725     pop_dynamic(&log_descriptor.open_files);
3726   }
3727 
3728   /* Buffers for log writing */
3729   for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
3730   {
3731     if (translog_buffer_init(log_descriptor.buffers + i, i))
3732       goto err;
3733     DBUG_PRINT("info", ("translog_buffer buffer #%u:%p",
3734                         i, log_descriptor.buffers + i));
3735   }
3736 
3737   /*
3738     last_logno and last_checkpoint_lsn were set in
3739     ma_control_file_create_or_open()
3740   */
3741   logs_found= (last_logno != FILENO_IMPOSSIBLE);
3742 
3743   translog_status= (readonly ? TRANSLOG_READONLY : TRANSLOG_OK);
3744   checkpoint_lsn= last_checkpoint_lsn;
3745 
3746   if (logs_found)
3747   {
3748     my_bool pageok;
3749     DBUG_PRINT("info", ("log found..."));
3750     /*
3751       TODO: scan directory for aria_log.XXXXXXXX files and find
3752        highest XXXXXXXX & set logs_found
3753       TODO: check that last checkpoint within present log addresses space
3754 
3755       find the log end
3756     */
3757     if (LSN_FILE_NO(last_checkpoint_lsn) == FILENO_IMPOSSIBLE)
3758     {
3759       DBUG_ASSERT(LSN_OFFSET(last_checkpoint_lsn) == 0);
3760       /* only last log needs to be checked */
3761       sure_page= MAKE_LSN(last_logno, TRANSLOG_PAGE_SIZE);
3762     }
3763     else
3764     {
3765       sure_page= last_checkpoint_lsn;
3766       DBUG_ASSERT(LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE != 0);
3767       sure_page-= LSN_OFFSET(sure_page) % TRANSLOG_PAGE_SIZE;
3768     }
3769     /* Set horizon to the beginning of the last file first */
3770     log_descriptor.horizon= last_page= MAKE_LSN(last_logno, 0);
3771     if (translog_get_last_page_addr(&last_page, &pageok, no_errors))
3772     {
3773       if (!translog_walk_filenames(log_descriptor.directory,
3774                                    &translog_callback_search_first))
3775       {
3776         /*
3777           Files was deleted, just start from the next log number, so that
3778           existing tables are in the past.
3779         */
3780         start_file_num= last_logno + 1;
3781         checkpoint_lsn= LSN_IMPOSSIBLE; /* no log so no checkpoint */
3782         logs_found= 0;
3783       }
3784       else
3785         goto err;
3786     }
3787     else if (LSN_OFFSET(last_page) == 0)
3788     {
3789       if (LSN_FILE_NO(last_page) == 1)
3790       {
3791         logs_found= 0;                          /* file #1 has no pages */
3792         DBUG_PRINT("info", ("log found. But is is empty => no log assumed"));
3793       }
3794       else
3795       {
3796         last_page-= LSN_ONE_FILE;
3797         if (translog_get_last_page_addr(&last_page, &pageok, 0))
3798           goto err;
3799       }
3800     }
3801     if (logs_found)
3802     {
3803       uint32 i;
3804       log_descriptor.min_file= translog_first_file(log_descriptor.horizon, 1);
3805       log_descriptor.max_file= last_logno;
3806       /* Open all files */
3807       if (allocate_dynamic(&log_descriptor.open_files,
3808                            log_descriptor.max_file -
3809                            log_descriptor.min_file + 1))
3810         goto err;
3811       for (i = log_descriptor.max_file; i >= log_descriptor.min_file; i--)
3812       {
3813         /*
3814           We can't allocate all file together because they will be freed
3815           one by one
3816         */
3817         TRANSLOG_FILE *file= (TRANSLOG_FILE *)my_malloc(PSI_INSTRUMENT_ME, sizeof(TRANSLOG_FILE),
3818                                                         MYF(0));
3819 
3820         compile_time_assert(MY_FILEPOS_ERROR > 0xffffffffULL);
3821         if (file == NULL ||
3822             (file->handler.file=
3823              open_logfile_by_number_no_cache(i)) < 0 ||
3824             mysql_file_seek(file->handler.file, 0, SEEK_END, MYF(0)) >=
3825             0xffffffffULL)
3826         {
3827           int j;
3828           for (j= i - log_descriptor.min_file - 1; j > 0; j--)
3829           {
3830             TRANSLOG_FILE *el=
3831               *dynamic_element(&log_descriptor.open_files, j,
3832                                TRANSLOG_FILE **);
3833             mysql_file_close(el->handler.file, MYF(MY_WME));
3834             my_free(el);
3835           }
3836           if (file)
3837           {
3838             free(file);
3839             goto err;
3840           }
3841           else
3842             goto err;
3843         }
3844         translog_file_init(file, i, 1);
3845         /* we allocated space so it can't fail */
3846         insert_dynamic(&log_descriptor.open_files, (uchar *)&file);
3847       }
3848       DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
3849                   log_descriptor.open_files.elements);
3850     }
3851   }
3852   else if (readonly)
3853   {
3854     /* There is no logs and there is read-only mode => nothing to read */
3855     DBUG_PRINT("error", ("No logs and read-only mode"));
3856     goto err;
3857   }
3858 
3859   if (logs_found)
3860   {
3861     TRANSLOG_ADDRESS current_page= sure_page;
3862     my_bool pageok;
3863 
3864     DBUG_PRINT("info", ("The log is really present"));
3865     if (sure_page > last_page)
3866     {
3867       my_printf_error(HA_ERR_GENERIC, "Aria engine: log data error\n"
3868                       "last_log_page:   " LSN_FMT " is less than\n"
3869                       "checkpoint page: " LSN_FMT, MYF(0),
3870                       LSN_IN_PARTS(last_page), LSN_IN_PARTS(sure_page));
3871       goto err;
3872     }
3873 
3874     /* TODO: check page size */
3875 
3876     last_valid_page= LSN_IMPOSSIBLE;
3877     /*
3878       Scans and validate pages. We need it to show "outside" only for sure
3879       valid part of the log. If the log was damaged then fixed we have to
3880       cut off damaged part before some other process start write something
3881       in the log.
3882     */
3883     do
3884     {
3885       TRANSLOG_ADDRESS current_file_last_page;
3886       current_file_last_page= current_page;
3887       if (translog_get_last_page_addr(&current_file_last_page, &pageok, 0))
3888         goto err;
3889       if (!pageok)
3890       {
3891         DBUG_PRINT("error", ("File %lu have no complete last page",
3892                              (ulong) LSN_FILE_NO(current_file_last_page)));
3893         old_log_was_recovered= 1;
3894         /* This file is not written till the end so it should be last */
3895         last_page= current_file_last_page;
3896         /* TODO: issue warning */
3897       }
3898       do
3899       {
3900         TRANSLOG_VALIDATOR_DATA data;
3901         TRANSLOG_PAGE_SIZE_BUFF psize_buff;
3902         uchar *page;
3903         data.addr= &current_page;
3904         if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL)
3905           goto err;
3906         if (data.was_recovered)
3907         {
3908           DBUG_PRINT("error", ("file no: %lu (%d)  "
3909                                "rec_offset: 0x%lx (%lu) (%d)",
3910                                (ulong) LSN_FILE_NO(current_page),
3911                                (uint3korr(page + 3) !=
3912                                 LSN_FILE_NO(current_page)),
3913                                (ulong) LSN_OFFSET(current_page),
3914                                (ulong) (LSN_OFFSET(current_page) /
3915                                         TRANSLOG_PAGE_SIZE),
3916                                (uint3korr(page) !=
3917                                 LSN_OFFSET(current_page) /
3918                                 TRANSLOG_PAGE_SIZE)));
3919           old_log_was_recovered= 1;
3920           break;
3921         }
3922         old_flags= page[TRANSLOG_PAGE_FLAGS];
3923         last_valid_page= current_page;
3924         current_page+= TRANSLOG_PAGE_SIZE; /* increase offset */
3925       } while (current_page <= current_file_last_page);
3926       current_page+= LSN_ONE_FILE;
3927       current_page= LSN_REPLACE_OFFSET(current_page, TRANSLOG_PAGE_SIZE);
3928     } while (LSN_FILE_NO(current_page) <= LSN_FILE_NO(last_page) &&
3929              !old_log_was_recovered);
3930     if (last_valid_page == LSN_IMPOSSIBLE)
3931     {
3932       /* Panic!!! Even page which should be valid is invalid */
3933       /* TODO: issue error */
3934       goto err;
3935     }
3936     DBUG_PRINT("info", ("Last valid page is in file: %lu  "
3937                         "offset: %lu (0x%lx)  "
3938                         "Logs found: %d  was recovered: %d  "
3939                         "flags match: %d",
3940                         (ulong) LSN_FILE_NO(last_valid_page),
3941                         (ulong) LSN_OFFSET(last_valid_page),
3942                         (ulong) LSN_OFFSET(last_valid_page),
3943                         logs_found, old_log_was_recovered,
3944                         (old_flags == flags)));
3945 
3946     /* TODO: check server ID */
3947     if (logs_found && !old_log_was_recovered && old_flags == flags)
3948     {
3949       TRANSLOG_VALIDATOR_DATA data;
3950       TRANSLOG_PAGE_SIZE_BUFF psize_buff;
3951       uchar *page;
3952       uint16 chunk_offset;
3953       data.addr= &last_valid_page;
3954       /* continue old log */
3955       DBUG_ASSERT(LSN_FILE_NO(last_valid_page)==
3956                   LSN_FILE_NO(log_descriptor.horizon));
3957       if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL ||
3958           (chunk_offset= translog_get_first_chunk_offset(page)) == 0)
3959         goto err;
3960 
3961       /* Puts filled part of old page in the buffer */
3962       log_descriptor.horizon= last_valid_page;
3963       translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
3964       /*
3965          Free space if filled with TRANSLOG_FILLER and first uchar of
3966          real chunk can't be TRANSLOG_FILLER
3967       */
3968       while (chunk_offset < TRANSLOG_PAGE_SIZE &&
3969              page[chunk_offset] != TRANSLOG_FILLER)
3970       {
3971         uint16 chunk_length;
3972         if ((chunk_length=
3973              translog_get_total_chunk_length(page, chunk_offset)) == 0)
3974           goto err;
3975         DBUG_PRINT("info", ("chunk: offset: %u  length: %u",
3976                             (uint) chunk_offset, (uint) chunk_length));
3977         chunk_offset+= chunk_length;
3978 
3979         /* chunk can't cross the page border */
3980         DBUG_ASSERT(chunk_offset <= TRANSLOG_PAGE_SIZE);
3981       }
3982       memcpy(log_descriptor.buffers->buffer, page, chunk_offset);
3983       log_descriptor.bc.buffer->size+= chunk_offset;
3984       log_descriptor.bc.ptr+= chunk_offset;
3985       log_descriptor.bc.current_page_fill= chunk_offset;
3986       log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
3987                                                  (chunk_offset +
3988                                                   LSN_OFFSET(last_valid_page)));
3989       DBUG_PRINT("info", ("Move Page #%u: %p  chaser: %d  Size: %lu (%lu)",
3990                           (uint) log_descriptor.bc.buffer_no,
3991                           log_descriptor.bc.buffer,
3992                           log_descriptor.bc.chaser,
3993                           (ulong) log_descriptor.bc.buffer->size,
3994                           (ulong) (log_descriptor.bc.ptr - log_descriptor.bc.
3995                                    buffer->buffer)));
3996       translog_check_cursor(&log_descriptor.bc);
3997     }
3998     if (!old_log_was_recovered && old_flags == flags)
3999     {
4000       LOGHANDLER_FILE_INFO info;
4001 
4002       /*
4003         Accessing &log_descriptor.open_files without mutex is safe
4004         because it is initialization
4005       */
4006       if (translog_read_file_header(&info,
4007                                     (*dynamic_element(&log_descriptor.
4008                                                       open_files,
4009                                                       0, TRANSLOG_FILE **))->
4010                                     handler.file))
4011         goto err;
4012       version_changed= (info.maria_version != TRANSLOG_VERSION_ID);
4013     }
4014   }
4015   DBUG_PRINT("info", ("Logs found: %d  was recovered: %d",
4016                       logs_found, old_log_was_recovered));
4017   if (!logs_found)
4018   {
4019     TRANSLOG_FILE *file= (TRANSLOG_FILE*)my_malloc(PSI_INSTRUMENT_ME,
4020                                            sizeof(TRANSLOG_FILE), MYF(MY_WME));
4021     DBUG_PRINT("info", ("The log is not found => we will create new log"));
4022     if (file == NULL)
4023        goto err;
4024     /* Start new log system from scratch */
4025     log_descriptor.horizon= MAKE_LSN(start_file_num,
4026                                      TRANSLOG_PAGE_SIZE); /* header page */
4027     translog_file_init(file, start_file_num, 0);
4028     if (insert_dynamic(&log_descriptor.open_files, (uchar*)&file))
4029     {
4030       my_free(file);
4031       goto err;
4032     }
4033     if ((file->handler.file=
4034          create_logfile_by_number_no_cache(start_file_num)) == -1)
4035       goto err;
4036     log_descriptor.min_file= log_descriptor.max_file= start_file_num;
4037     if (translog_write_file_header())
4038       goto err;
4039     DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
4040                 log_descriptor.open_files.elements);
4041 
4042     if (ma_control_file_write_and_force(checkpoint_lsn, start_file_num,
4043                                         max_trid_in_control_file,
4044                                         recovery_failures))
4045       goto err;
4046     /* assign buffer 0 */
4047     translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
4048     translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
4049   }
4050   else if ((old_log_was_recovered || old_flags != flags || version_changed) &&
4051            !readonly)
4052   {
4053     /* leave the damaged file untouched */
4054     log_descriptor.horizon+= LSN_ONE_FILE;
4055     /* header page */
4056     log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
4057                                                TRANSLOG_PAGE_SIZE);
4058     if (translog_create_new_file())
4059       goto err;
4060     /*
4061       Buffer system left untouched after recovery => we should init it
4062       (starting from buffer 0)
4063     */
4064     translog_start_buffer(log_descriptor.buffers, &log_descriptor.bc, 0);
4065     translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
4066   }
4067 
4068   /* all LSNs that are on disk are flushed */
4069   log_descriptor.log_start= log_descriptor.sent_to_disk=
4070     log_descriptor.flushed= log_descriptor.horizon;
4071   log_descriptor.in_buffers_only= log_descriptor.bc.buffer->offset;
4072   log_descriptor.max_lsn= LSN_IMPOSSIBLE; /* set to 0 */
4073   /*
4074     Now 'flushed' is set to 'horizon' value, but 'horizon' is (potentially)
4075     address of the next LSN and we want indicate that all LSNs that are
4076     already on the disk are flushed so we need decrease horizon on 1 (we are
4077     sure that there is no LSN on the disk which is greater then 'flushed'
4078     and there will not be LSN created that is equal or less then the value
4079     of the 'flushed').
4080   */
4081   log_descriptor.flushed--; /* offset decreased */
4082   log_descriptor.sent_to_disk--; /* offset decreased */
4083   /*
4084     Log records will refer to a MARIA_SHARE by a unique 2-byte id; set up
4085     structures for generating 2-byte ids:
4086   */
4087   id_to_share= (MARIA_SHARE **) my_malloc(PSI_INSTRUMENT_ME, SHARE_ID_MAX * sizeof(MARIA_SHARE*),
4088                                           MYF(MY_WME | MY_ZEROFILL));
4089   if (unlikely(!id_to_share))
4090     goto err;
4091   id_to_share--; /* min id is 1 */
4092 
4093   /* Check the last LSN record integrity */
4094   if (logs_found)
4095   {
4096     TRANSLOG_SCANNER_DATA scanner;
4097     TRANSLOG_ADDRESS page_addr;
4098     LSN last_lsn= LSN_IMPOSSIBLE;
4099     /*
4100       take very last page address and try to find LSN record on it
4101       if it fail take address of previous page and so on
4102     */
4103     page_addr= (log_descriptor.horizon -
4104                 ((log_descriptor.horizon - 1) % TRANSLOG_PAGE_SIZE + 1));
4105     if (translog_scanner_init(page_addr, 1, &scanner, 1))
4106       goto err;
4107     scanner.page_offset= page_overhead[scanner.page[TRANSLOG_PAGE_FLAGS]];
4108     for (;;)
4109     {
4110       uint chunk_1byte;
4111       chunk_1byte= scanner.page[scanner.page_offset];
4112       while (!translog_is_LSN_chunk(chunk_1byte) &&
4113              scanner.page != END_OF_LOG &&
4114              scanner.page[scanner.page_offset] != TRANSLOG_FILLER &&
4115              scanner.page_addr == page_addr)
4116       {
4117         if (translog_get_next_chunk(&scanner))
4118         {
4119           translog_destroy_scanner(&scanner);
4120           goto err;
4121         }
4122         if (scanner.page != END_OF_LOG)
4123           chunk_1byte= scanner.page[scanner.page_offset];
4124       }
4125       if (translog_is_LSN_chunk(chunk_1byte))
4126       {
4127         last_lsn= scanner.page_addr + scanner.page_offset;
4128         if (translog_get_next_chunk(&scanner))
4129         {
4130           translog_destroy_scanner(&scanner);
4131           goto err;
4132         }
4133         if (scanner.page == END_OF_LOG)
4134           break; /* it was the last record */
4135         chunk_1byte= scanner.page[scanner.page_offset];
4136         continue; /* try to find other record on this page */
4137       }
4138 
4139       if (last_lsn != LSN_IMPOSSIBLE)
4140         break; /* there is no more records on the page */
4141 
4142       /* We have to make step back */
4143       if (unlikely(LSN_OFFSET(page_addr) == TRANSLOG_PAGE_SIZE))
4144       {
4145         uint32 file_no= LSN_FILE_NO(page_addr);
4146         my_bool last_page_ok;
4147         /* it is beginning of the current file */
4148         if (unlikely(file_no == 1))
4149         {
4150           /*
4151             It is beginning of the log => there is no LSNs in the log =>
4152             There is no harm in leaving it "as-is".
4153           */
4154           log_descriptor.previous_flush_horizon= log_descriptor.horizon;
4155           DBUG_PRINT("info", ("previous_flush_horizon: " LSN_FMT,
4156                               LSN_IN_PARTS(log_descriptor.
4157                                            previous_flush_horizon)));
4158           DBUG_RETURN(0);
4159         }
4160         file_no--;
4161         page_addr= MAKE_LSN(file_no, TRANSLOG_PAGE_SIZE);
4162         translog_get_last_page_addr(&page_addr, &last_page_ok, 0);
4163         /* page should be OK as it is not the last file */
4164         DBUG_ASSERT(last_page_ok);
4165       }
4166       else
4167       {
4168          page_addr-= TRANSLOG_PAGE_SIZE;
4169       }
4170       translog_destroy_scanner(&scanner);
4171       if (translog_scanner_init(page_addr, 1, &scanner, 1))
4172         goto err;
4173       scanner.page_offset= page_overhead[scanner.page[TRANSLOG_PAGE_FLAGS]];
4174     }
4175     translog_destroy_scanner(&scanner);
4176 
4177     /* Now scanner points to the last LSN chunk, lets check it */
4178     {
4179       TRANSLOG_HEADER_BUFFER rec;
4180       translog_size_t rec_len;
4181       int len;
4182       uchar buffer[1];
4183       DBUG_PRINT("info", ("going to check the last found record " LSN_FMT,
4184                           LSN_IN_PARTS(last_lsn)));
4185 
4186       len=
4187         translog_read_record_header(last_lsn, &rec);
4188       if (unlikely (len == RECHEADER_READ_ERROR ||
4189                     len == RECHEADER_READ_EOF))
4190       {
4191         DBUG_PRINT("error", ("unexpected end of log or record during "
4192                              "reading record header: " LSN_FMT "  len: %d",
4193                              LSN_IN_PARTS(last_lsn), len));
4194         if (readonly)
4195           log_descriptor.log_start= log_descriptor.horizon= last_lsn;
4196         else if (translog_truncate_log(last_lsn))
4197         {
4198           translog_free_record_header(&rec);
4199           goto err;
4200         }
4201       }
4202       else
4203       {
4204         DBUG_ASSERT(last_lsn == rec.lsn);
4205         if (likely(rec.record_length != 0))
4206         {
4207           /*
4208             Reading the last byte of record will trigger scanning all
4209             record chunks for now
4210           */
4211           rec_len= translog_read_record(rec.lsn, rec.record_length - 1, 1,
4212                                         buffer, NULL);
4213           if (rec_len != 1)
4214           {
4215             DBUG_PRINT("error", ("unexpected end of log or record during "
4216                                  "reading record body: " LSN_FMT "  len: %d",
4217                                  LSN_IN_PARTS(rec.lsn),
4218                                  len));
4219             if (readonly)
4220               log_descriptor.log_start= log_descriptor.horizon= last_lsn;
4221 
4222             else if (translog_truncate_log(last_lsn))
4223             {
4224               translog_free_record_header(&rec);
4225               goto err;
4226             }
4227           }
4228         }
4229       }
4230       translog_free_record_header(&rec);
4231     }
4232   }
4233   log_descriptor.previous_flush_horizon= log_descriptor.horizon;
4234   DBUG_PRINT("info", ("previous_flush_horizon: " LSN_FMT,
4235                       LSN_IN_PARTS(log_descriptor.previous_flush_horizon)));
4236   DBUG_RETURN(0);
4237 err:
4238   ma_message_no_user(0, "log initialization failed");
4239   DBUG_RETURN(1);
4240 }
4241 
4242 
4243 /*
4244   @brief Free transaction log file buffer.
4245 
4246   @param buffer_no       The buffer to free
4247 */
4248 
translog_buffer_destroy(struct st_translog_buffer * buffer)4249 static void translog_buffer_destroy(struct st_translog_buffer *buffer)
4250 {
4251   DBUG_ENTER("translog_buffer_destroy");
4252   DBUG_PRINT("enter",
4253              ("Buffer #%u: %p  file: %d  offset: " LSN_FMT "  size: %lu",
4254               (uint) buffer->buffer_no, buffer,
4255               (buffer->file ? buffer->file->handler.file : -1),
4256               LSN_IN_PARTS(buffer->offset),
4257               (ulong) buffer->size));
4258   if (buffer->file != NULL)
4259   {
4260     /*
4261       We ignore errors here, because we can't do something about it
4262       (it is shutting down)
4263 
4264       We also have to take the locks even if there can't be any other
4265       threads running, because translog_buffer_flush()
4266       requires that we have the buffer locked.
4267     */
4268     translog_buffer_lock(buffer);
4269     translog_buffer_flush(buffer);
4270     translog_buffer_unlock(buffer);
4271   }
4272   DBUG_PRINT("info", ("Destroy mutex: %p",  &buffer->mutex));
4273   mysql_mutex_destroy(&buffer->mutex);
4274   mysql_cond_destroy(&buffer->waiting_filling_buffer);
4275   DBUG_VOID_RETURN;
4276 }
4277 
4278 
4279 /*
4280   Free log handler resources
4281 
4282   SYNOPSIS
4283     translog_destroy()
4284 */
4285 
translog_destroy()4286 void translog_destroy()
4287 {
4288   TRANSLOG_FILE **file;
4289   uint i;
4290   uint8 current_buffer;
4291   DBUG_ENTER("translog_destroy");
4292 
4293   DBUG_ASSERT(translog_status == TRANSLOG_OK ||
4294               translog_status == TRANSLOG_READONLY);
4295   translog_lock();
4296   current_buffer= log_descriptor.bc.buffer_no;
4297   translog_status= (translog_status == TRANSLOG_READONLY ?
4298                     TRANSLOG_UNINITED :
4299                     TRANSLOG_SHUTDOWN);
4300   if (log_descriptor.bc.buffer->file != NULL)
4301     translog_finish_page(&log_descriptor.horizon, &log_descriptor.bc);
4302   translog_unlock();
4303 
4304   for (i= 0; i < TRANSLOG_BUFFERS_NO; i++)
4305   {
4306     struct st_translog_buffer *buffer= (log_descriptor.buffers +
4307                                         ((i + current_buffer + 1) %
4308                                          TRANSLOG_BUFFERS_NO));
4309     translog_buffer_destroy(buffer);
4310   }
4311   translog_status= TRANSLOG_UNINITED;
4312 
4313   /* close files */
4314   while ((file= (TRANSLOG_FILE **)pop_dynamic(&log_descriptor.open_files)))
4315     translog_close_log_file(*file);
4316   mysql_mutex_destroy(&log_descriptor.sent_to_disk_lock);
4317   mysql_mutex_destroy(&log_descriptor.file_header_lock);
4318   mysql_mutex_destroy(&log_descriptor.unfinished_files_lock);
4319   mysql_mutex_destroy(&log_descriptor.purger_lock);
4320   mysql_mutex_destroy(&log_descriptor.log_flush_lock);
4321   mysql_mutex_destroy(&log_descriptor.dirty_buffer_mask_lock);
4322   mysql_cond_destroy(&log_descriptor.log_flush_cond);
4323   mysql_cond_destroy(&log_descriptor.new_goal_cond);
4324   mysql_rwlock_destroy(&log_descriptor.open_files_lock);
4325   delete_dynamic(&log_descriptor.open_files);
4326   delete_dynamic(&log_descriptor.unfinished_files);
4327 
4328   if (log_descriptor.directory_fd >= 0)
4329     mysql_file_close(log_descriptor.directory_fd, MYF(MY_WME));
4330   if (id_to_share != NULL)
4331     my_free(id_to_share + 1);
4332   DBUG_VOID_RETURN;
4333 }
4334 
4335 
4336 /*
4337   @brief Starts new page.
4338 
4339   @param horizon         \ Position in file and buffer where we are
4340   @param cursor          /
4341   @param prev_buffer     Buffer which should be flushed will be assigned here.
4342                          This is always set (to NULL if nothing to flush).
4343 
4344   @note We do not want to flush the buffer immediately because we want to
4345   let caller of this function first advance 'horizon' pointer and unlock the
4346   loghandler and only then flush the log which can take some time.
4347 
4348   @retval 0 OK
4349   @retval 1 Error
4350 */
4351 
translog_page_next(TRANSLOG_ADDRESS * horizon,struct st_buffer_cursor * cursor,struct st_translog_buffer ** prev_buffer)4352 static my_bool translog_page_next(TRANSLOG_ADDRESS *horizon,
4353                                   struct st_buffer_cursor *cursor,
4354                                   struct st_translog_buffer **prev_buffer)
4355 {
4356   struct st_translog_buffer *buffer= cursor->buffer;
4357   DBUG_ENTER("translog_page_next");
4358 
4359   *prev_buffer= NULL;
4360   if ((cursor->ptr + TRANSLOG_PAGE_SIZE >
4361        cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER) ||
4362       (LSN_OFFSET(*horizon) >
4363        log_descriptor.log_file_max_size - TRANSLOG_PAGE_SIZE))
4364   {
4365     DBUG_PRINT("info", ("Switch to next buffer  Buffer Size: %lu (%lu) => %d  "
4366                         "File size: %lu  max: %lu => %d",
4367                         (ulong) cursor->buffer->size,
4368                         (ulong) (cursor->ptr - cursor->buffer->buffer),
4369                         (cursor->ptr + TRANSLOG_PAGE_SIZE >
4370                          cursor->buffer->buffer + TRANSLOG_WRITE_BUFFER),
4371                         (ulong) LSN_OFFSET(*horizon),
4372                         (ulong) log_descriptor.log_file_max_size,
4373                         (LSN_OFFSET(*horizon) >
4374                          (log_descriptor.log_file_max_size -
4375                           TRANSLOG_PAGE_SIZE))));
4376     if (translog_buffer_next(horizon, cursor,
4377                              LSN_OFFSET(*horizon) >
4378                              (log_descriptor.log_file_max_size -
4379                               TRANSLOG_PAGE_SIZE)))
4380       DBUG_RETURN(1);
4381     *prev_buffer= buffer;
4382     DBUG_PRINT("info", ("Buffer #%u (%p): have to be flushed",
4383                         (uint) buffer->buffer_no, buffer));
4384   }
4385   else
4386   {
4387     DBUG_PRINT("info", ("Use the same buffer #%u (%p): "
4388                         "Buffer Size: %lu (%lu)",
4389                         (uint) buffer->buffer_no,
4390                         buffer,
4391                         (ulong) cursor->buffer->size,
4392                         (ulong) (cursor->ptr - cursor->buffer->buffer)));
4393     translog_finish_page(horizon, cursor);
4394     translog_new_page_header(horizon, cursor);
4395   }
4396   DBUG_RETURN(0);
4397 }
4398 
4399 
4400 /*
4401   Write data of given length to the current page
4402 
4403   SYNOPSIS
4404     translog_write_data_on_page()
4405     horizon              \ Pointers on file and buffer
4406     cursor               /
4407     length               IN     length of the chunk
4408     buffer               buffer with data
4409 
4410   RETURN
4411     0  OK
4412     1  Error
4413 */
4414 
translog_write_data_on_page(TRANSLOG_ADDRESS * horizon,struct st_buffer_cursor * cursor,translog_size_t length,uchar * buffer)4415 static my_bool translog_write_data_on_page(TRANSLOG_ADDRESS *horizon,
4416                                            struct st_buffer_cursor *cursor,
4417                                            translog_size_t length,
4418                                            uchar *buffer)
4419 {
4420   DBUG_ENTER("translog_write_data_on_page");
4421   DBUG_PRINT("enter", ("Chunk length: %lu  Page size %u",
4422                        (ulong) length, (uint) cursor->current_page_fill));
4423   DBUG_ASSERT(length > 0);
4424   DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
4425   DBUG_ASSERT(length + cursor->ptr <= cursor->buffer->buffer +
4426               TRANSLOG_WRITE_BUFFER);
4427 
4428   memcpy(cursor->ptr, buffer, length);
4429   cursor->ptr+= length;
4430   (*horizon)+= length; /* adds offset */
4431   cursor->current_page_fill+= length;
4432   if (!cursor->chaser)
4433     cursor->buffer->size+= length;
4434   DBUG_PRINT("info", ("Write data buffer #%u: %p  "
4435                       "chaser: %d  Size: %lu (%lu)",
4436                       (uint) cursor->buffer->buffer_no, cursor->buffer,
4437                       cursor->chaser, (ulong) cursor->buffer->size,
4438                       (ulong) (cursor->ptr - cursor->buffer->buffer)));
4439   translog_check_cursor(cursor);
4440 
4441   DBUG_RETURN(0);
4442 }
4443 
4444 
4445 /*
4446   Write data from parts of given length to the current page
4447 
4448   SYNOPSIS
4449     translog_write_parts_on_page()
4450     horizon              \ Pointers on file and buffer
4451     cursor               /
4452     length               IN     length of the chunk
4453     parts                IN/OUT chunk source
4454 
4455   RETURN
4456     0  OK
4457     1  Error
4458 */
4459 
translog_write_parts_on_page(TRANSLOG_ADDRESS * horizon,struct st_buffer_cursor * cursor,translog_size_t length,struct st_translog_parts * parts)4460 static my_bool translog_write_parts_on_page(TRANSLOG_ADDRESS *horizon,
4461                                             struct st_buffer_cursor *cursor,
4462                                             translog_size_t length,
4463                                             struct st_translog_parts *parts)
4464 {
4465   translog_size_t left= length;
4466   uint cur= (uint) parts->current;
4467   DBUG_ENTER("translog_write_parts_on_page");
4468   DBUG_PRINT("enter", ("Chunk length: %lu  parts: %u of %u. Page size: %u  "
4469                        "Buffer size: %lu (%lu)",
4470                        (ulong) length,
4471                        (uint) (cur + 1), (uint) parts->elements,
4472                        (uint) cursor->current_page_fill,
4473                        (ulong) cursor->buffer->size,
4474                        (ulong) (cursor->ptr - cursor->buffer->buffer)));
4475   DBUG_ASSERT(length > 0);
4476   DBUG_ASSERT(length + cursor->current_page_fill <= TRANSLOG_PAGE_SIZE);
4477   DBUG_ASSERT(length + cursor->ptr <= cursor->buffer->buffer +
4478               TRANSLOG_WRITE_BUFFER);
4479 
4480   do
4481   {
4482     translog_size_t len;
4483     LEX_CUSTRING *part;
4484     const uchar *buff;
4485 
4486     DBUG_ASSERT(cur < parts->elements);
4487     part= parts->parts + cur;
4488     buff= part->str;
4489     DBUG_PRINT("info", ("Part: %u  Length: %lu  left: %lu  buff: %p",
4490                         (uint) (cur + 1), (ulong) part->length, (ulong) left,
4491                         buff));
4492 
4493     if (part->length > left)
4494     {
4495       /* we should write less then the current part */
4496       len= left;
4497       part->length-= len;
4498       part->str+= len;
4499       DBUG_PRINT("info", ("Set new part: %u  Length: %lu",
4500                           (uint) (cur + 1), (ulong) part->length));
4501     }
4502     else
4503     {
4504       len= (translog_size_t) part->length;
4505       cur++;
4506       DBUG_PRINT("info", ("moved to next part (len: %lu)", (ulong) len));
4507     }
4508     DBUG_PRINT("info", ("copy: %p <- %p  %u",
4509                         cursor->ptr, buff, len));
4510     if (likely(len))
4511     {
4512       memcpy(cursor->ptr, buff, len);
4513       left-= len;
4514       cursor->ptr+= len;
4515     }
4516   } while (left);
4517 
4518   DBUG_PRINT("info", ("Horizon: " LSN_FMT "  Length %u(0x%x)",
4519                       LSN_IN_PARTS(*horizon),
4520                       length, length));
4521   parts->current= cur;
4522   (*horizon)+= length; /* offset increasing */
4523   cursor->current_page_fill+= length;
4524   if (!cursor->chaser)
4525     cursor->buffer->size+= length;
4526   /*
4527     We do not not updating parts->total_record_length here because it is
4528     need only before writing record to have total length
4529   */
4530   DBUG_PRINT("info", ("Write parts buffer #%u: %p  "
4531                       "chaser: %d  Size: %lu (%lu)  "
4532                       "Horizon: " LSN_FMT "  buff offset: 0x%x",
4533                       (uint) cursor->buffer->buffer_no, cursor->buffer,
4534                       cursor->chaser, (ulong) cursor->buffer->size,
4535                       (ulong) (cursor->ptr - cursor->buffer->buffer),
4536                       LSN_IN_PARTS(*horizon),
4537                       (uint) (LSN_OFFSET(cursor->buffer->offset) +
4538                                cursor->buffer->size)));
4539   translog_check_cursor(cursor);
4540 
4541   DBUG_RETURN(0);
4542 }
4543 
4544 
4545 /*
4546   Put 1 group chunk type 0 header into parts array
4547 
4548   SYNOPSIS
4549     translog_write_variable_record_1group_header()
4550     parts                Descriptor of record source parts
4551     type                 The log record type
4552     short_trid           Short transaction ID or 0 if it has no sense
4553     header_length        Calculated header length of chunk type 0
4554     chunk0_header        Buffer for the chunk header writing
4555 */
4556 
4557 static void
translog_write_variable_record_1group_header(struct st_translog_parts * parts,enum translog_record_type type,SHORT_TRANSACTION_ID short_trid,uint16 header_length,uchar * chunk0_header)4558 translog_write_variable_record_1group_header(struct st_translog_parts *parts,
4559                                              enum translog_record_type type,
4560                                              SHORT_TRANSACTION_ID short_trid,
4561                                              uint16 header_length,
4562                                              uchar *chunk0_header)
4563 {
4564   LEX_CUSTRING *part;
4565   DBUG_ASSERT(parts->current != 0);     /* first part is left for header */
4566   part= parts->parts + (--parts->current);
4567   parts->total_record_length+= (translog_size_t) (part->length= header_length);
4568   part->str= chunk0_header;
4569   /* puts chunk type */
4570   *chunk0_header= (uchar) (type | TRANSLOG_CHUNK_LSN);
4571   int2store(chunk0_header + 1, short_trid);
4572   /* puts record length */
4573   translog_write_variable_record_1group_code_len(chunk0_header + 3,
4574                                                  parts->record_length,
4575                                                  header_length);
4576   /* puts 0 as chunk length which indicate 1 group record */
4577   int2store(chunk0_header + header_length - 2, 0);
4578 }
4579 
4580 
4581 /*
4582   Increase number of writers for this buffer
4583 
4584   SYNOPSIS
4585     translog_buffer_increase_writers()
4586     buffer               target buffer
4587 */
4588 
4589 static inline void
translog_buffer_increase_writers(struct st_translog_buffer * buffer)4590 translog_buffer_increase_writers(struct st_translog_buffer *buffer)
4591 {
4592   DBUG_ENTER("translog_buffer_increase_writers");
4593   translog_buffer_lock_assert_owner(buffer);
4594   buffer->copy_to_buffer_in_progress++;
4595   DBUG_PRINT("info", ("copy_to_buffer_in_progress. Buffer #%u  %p  progress: %d",
4596                       (uint) buffer->buffer_no, buffer,
4597                       buffer->copy_to_buffer_in_progress));
4598   DBUG_VOID_RETURN;
4599 }
4600 
4601 
4602 /*
4603   Decrease number of writers for this buffer
4604 
4605   SYNOPSIS
4606     translog_buffer_decrease_writers()
4607     buffer               target buffer
4608 */
4609 
translog_buffer_decrease_writers(struct st_translog_buffer * buffer)4610 static void translog_buffer_decrease_writers(struct st_translog_buffer *buffer)
4611 {
4612   DBUG_ENTER("translog_buffer_decrease_writers");
4613   translog_buffer_lock_assert_owner(buffer);
4614   buffer->copy_to_buffer_in_progress--;
4615   DBUG_PRINT("info",
4616              ("copy_to_buffer_in_progress. Buffer #%u  %p  progress: %d",
4617               (uint) buffer->buffer_no, buffer,
4618               buffer->copy_to_buffer_in_progress));
4619   if (buffer->copy_to_buffer_in_progress == 0)
4620     mysql_cond_broadcast(&buffer->waiting_filling_buffer);
4621   DBUG_VOID_RETURN;
4622 }
4623 
4624 
4625 /**
4626   @brief Skip to the next page for chaser (thread which advanced horizon
4627   pointer and now feeling the buffer)
4628 
4629   @param horizon         \ Pointers on file position and buffer
4630   @param cursor          /
4631 
4632   @retval 1 OK
4633   @retval 0 Error
4634 */
4635 
translog_chaser_page_next(TRANSLOG_ADDRESS * horizon,struct st_buffer_cursor * cursor)4636 static my_bool translog_chaser_page_next(TRANSLOG_ADDRESS *horizon,
4637                                          struct st_buffer_cursor *cursor)
4638 {
4639   struct st_translog_buffer *buffer_to_flush;
4640   my_bool rc;
4641   DBUG_ENTER("translog_chaser_page_next");
4642   DBUG_ASSERT(cursor->chaser);
4643   rc= translog_page_next(horizon, cursor, &buffer_to_flush);
4644   if (buffer_to_flush != NULL)
4645   {
4646     translog_buffer_lock(buffer_to_flush);
4647     translog_buffer_decrease_writers(buffer_to_flush);
4648     used_buffs_register_unlock(&cursor->buffs, buffer_to_flush);
4649     if (!rc)
4650       rc= translog_buffer_flush(buffer_to_flush);
4651     translog_buffer_unlock(buffer_to_flush);
4652   }
4653   DBUG_RETURN(rc);
4654 }
4655 
4656 /*
4657   Put chunk 2 from new page beginning
4658 
4659   SYNOPSIS
4660     translog_write_variable_record_chunk2_page()
4661     parts                Descriptor of record source parts
4662     horizon              \ Pointers on file position and buffer
4663     cursor               /
4664 
4665   RETURN
4666     0  OK
4667     1  Error
4668 */
4669 
4670 static my_bool
translog_write_variable_record_chunk2_page(struct st_translog_parts * parts,TRANSLOG_ADDRESS * horizon,struct st_buffer_cursor * cursor)4671 translog_write_variable_record_chunk2_page(struct st_translog_parts *parts,
4672                                            TRANSLOG_ADDRESS *horizon,
4673                                            struct st_buffer_cursor *cursor)
4674 {
4675   uchar chunk2_header[1];
4676   DBUG_ENTER("translog_write_variable_record_chunk2_page");
4677   chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;
4678 
4679   if (translog_chaser_page_next(horizon, cursor))
4680     DBUG_RETURN(1);
4681 
4682   /* Puts chunk type */
4683   translog_write_data_on_page(horizon, cursor, 1, chunk2_header);
4684   /* Puts chunk body */
4685   translog_write_parts_on_page(horizon, cursor,
4686                                log_descriptor.page_capacity_chunk_2, parts);
4687   DBUG_RETURN(0);
4688 }
4689 
4690 
4691 /*
4692   Put chunk 3 of requested length in the buffer from new page beginning
4693 
4694   SYNOPSIS
4695     translog_write_variable_record_chunk3_page()
4696     parts                Descriptor of record source parts
4697     length               Length of this chunk
4698     horizon              \ Pointers on file position and buffer
4699     cursor               /
4700 
4701   RETURN
4702     0  OK
4703     1  Error
4704 */
4705 
4706 static my_bool
translog_write_variable_record_chunk3_page(struct st_translog_parts * parts,uint16 length,TRANSLOG_ADDRESS * horizon,struct st_buffer_cursor * cursor)4707 translog_write_variable_record_chunk3_page(struct st_translog_parts *parts,
4708                                            uint16 length,
4709                                            TRANSLOG_ADDRESS *horizon,
4710                                            struct st_buffer_cursor *cursor)
4711 {
4712   LEX_CUSTRING *part;
4713   uchar chunk3_header[1 + 2];
4714   DBUG_ENTER("translog_write_variable_record_chunk3_page");
4715 
4716   if (translog_chaser_page_next(horizon, cursor))
4717     DBUG_RETURN(1);
4718 
4719   if (length == 0)
4720   {
4721     /* It was call to write page header only (no data for chunk 3) */
4722     DBUG_PRINT("info", ("It is a call to make page header only"));
4723     DBUG_RETURN(0);
4724   }
4725 
4726   DBUG_ASSERT(parts->current != 0);       /* first part is left for header */
4727   part= parts->parts + (--parts->current);
4728   parts->total_record_length+= (translog_size_t) (part->length= 1 + 2);
4729   part->str= chunk3_header;
4730   /* Puts chunk type */
4731   *chunk3_header= (uchar) (TRANSLOG_CHUNK_LNGTH);
4732   /* Puts chunk length */
4733   int2store(chunk3_header + 1, length);
4734 
4735   translog_write_parts_on_page(horizon, cursor, length + 1 + 2, parts);
4736   DBUG_RETURN(0);
4737 }
4738 
4739 /*
4740   Move log pointer (horizon) on given number pages starting from next page,
4741   and given offset on the last page
4742 
4743   SYNOPSIS
4744     translog_advance_pointer()
4745     pages                Number of full pages starting from the next one
4746     last_page_data       Plus this data on the last page
4747 
4748   RETURN
4749     0  OK
4750     1  Error
4751 */
4752 
translog_advance_pointer(int pages,uint16 last_page_data,TRUNSLOG_USED_BUFFERS * buffs)4753 static my_bool translog_advance_pointer(int pages, uint16 last_page_data,
4754                                         TRUNSLOG_USED_BUFFERS *buffs)
4755 {
4756   translog_size_t last_page_offset= (log_descriptor.page_overhead +
4757                                      last_page_data);
4758   translog_size_t offset= (TRANSLOG_PAGE_SIZE -
4759                            log_descriptor.bc.current_page_fill +
4760                            pages * TRANSLOG_PAGE_SIZE + last_page_offset);
4761   translog_size_t buffer_end_offset, file_end_offset, min_offset;
4762   DBUG_ENTER("translog_advance_pointer");
4763   DBUG_PRINT("enter", ("Pointer:  " LSN_FMT " + %u + %u pages + %u + %u",
4764                        LSN_IN_PARTS(log_descriptor.horizon),
4765                        (uint) (TRANSLOG_PAGE_SIZE -
4766                                log_descriptor.bc.current_page_fill),
4767                        pages, (uint) log_descriptor.page_overhead,
4768                        (uint) last_page_data));
4769   translog_lock_assert_owner();
4770 
4771   used_buffs_init(buffs);
4772 
4773   if (pages == -1)
4774   {
4775     /*
4776       It is special case when we advance the pointer on the same page.
4777       It can happened when we write last part of multi-group record.
4778     */
4779     DBUG_ASSERT(last_page_data + log_descriptor.bc.current_page_fill <=
4780                 TRANSLOG_PAGE_SIZE);
4781     offset= last_page_data;
4782     last_page_offset= log_descriptor.bc.current_page_fill + last_page_data;
4783     goto end;
4784   }
4785   DBUG_PRINT("info", ("last_page_offset %lu", (ulong) last_page_offset));
4786   DBUG_ASSERT(last_page_offset <= TRANSLOG_PAGE_SIZE);
4787 
4788   /*
4789     The loop will be executed 1-3 times. Usually we advance the
4790     pointer to fill only the current buffer (if we have more then 1/2 of
4791     buffer free or 2 buffers (rest of current and all next). In case of
4792     really huge record end where we write last group with "table of
4793     content" of all groups and ignore buffer borders we can occupy
4794     3 buffers.
4795   */
4796   for (;;)
4797   {
4798     uint8 new_buffer_no;
4799     struct st_translog_buffer *new_buffer;
4800     struct st_translog_buffer *old_buffer;
4801     buffer_end_offset= TRANSLOG_WRITE_BUFFER - log_descriptor.bc.buffer->size;
4802     if (likely(log_descriptor.log_file_max_size >=
4803                LSN_OFFSET(log_descriptor.horizon)))
4804       file_end_offset= (log_descriptor.log_file_max_size -
4805                         LSN_OFFSET(log_descriptor.horizon));
4806     else
4807     {
4808       /*
4809         We already have written more then current file limit allow,
4810         So we will finish this page and start new file
4811       */
4812       file_end_offset= (TRANSLOG_PAGE_SIZE -
4813                         log_descriptor.bc.current_page_fill);
4814     }
4815     DBUG_PRINT("info", ("offset: %u  buffer_end_offs: %u, "
4816                         "file_end_offs:  %u",
4817                         offset, buffer_end_offset,
4818                         file_end_offset));
4819     DBUG_PRINT("info", ("Buff #%u %u (%p) offset 0x%x + size 0x%x = "
4820                         "0x%x (0x%x)",
4821                         log_descriptor.bc.buffer->buffer_no,
4822                         log_descriptor.bc.buffer_no,
4823                         log_descriptor.bc.buffer,
4824                         (uint) LSN_OFFSET(log_descriptor.bc.buffer->offset),
4825                         log_descriptor.bc.buffer->size,
4826                         (uint) (LSN_OFFSET(log_descriptor.bc.buffer->offset) +
4827                                  log_descriptor.bc.buffer->size),
4828                         (uint) LSN_OFFSET(log_descriptor.horizon)));
4829     DBUG_ASSERT(LSN_OFFSET(log_descriptor.bc.buffer->offset) +
4830                 log_descriptor.bc.buffer->size ==
4831                 LSN_OFFSET(log_descriptor.horizon));
4832 
4833     if (offset <= buffer_end_offset && offset <= file_end_offset)
4834       break;
4835     old_buffer= log_descriptor.bc.buffer;
4836     new_buffer_no= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO;
4837     new_buffer= log_descriptor.buffers + new_buffer_no;
4838 
4839     translog_buffer_lock(new_buffer);
4840 #ifndef DBUG_OFF
4841     {
4842       TRANSLOG_ADDRESS offset= new_buffer->offset;
4843       TRANSLOG_FILE *file= new_buffer->file;
4844       uint8 ver= new_buffer->ver;
4845       translog_lock_assert_owner();
4846 #endif
4847       translog_wait_for_buffer_free(new_buffer);
4848 #ifndef DBUG_OFF
4849       /* We keep the handler locked so nobody can start this new buffer */
4850       DBUG_ASSERT((offset == new_buffer->offset && new_buffer->file == NULL &&
4851                    (file == NULL ? ver : (uint8)(ver + 1)) ==
4852                     new_buffer->ver) ||
4853                    translog_status == TRANSLOG_READONLY);
4854     }
4855 #endif
4856 
4857     min_offset= MY_MIN(buffer_end_offset, file_end_offset);
4858     /* TODO: check is it ptr or size enough */
4859     log_descriptor.bc.buffer->size+= min_offset;
4860     log_descriptor.bc.ptr+= min_offset;
4861     DBUG_PRINT("info", ("NewP buffer #%u: %p  chaser: %d  Size: %lu (%lu)",
4862                         (uint) log_descriptor.bc.buffer->buffer_no,
4863                         log_descriptor.bc.buffer,
4864                         log_descriptor.bc.chaser,
4865                         (ulong) log_descriptor.bc.buffer->size,
4866                         (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
4867                                  buffer->buffer)));
4868     DBUG_ASSERT((ulong) (log_descriptor.bc.ptr -
4869                          log_descriptor.bc.buffer->buffer) ==
4870                 log_descriptor.bc.buffer->size);
4871     DBUG_ASSERT(log_descriptor.bc.buffer->buffer_no ==
4872                 log_descriptor.bc.buffer_no);
4873     translog_buffer_increase_writers(log_descriptor.bc.buffer);
4874     // register for case of error
4875     used_buffs_add(buffs, log_descriptor.bc.buffer);
4876 
4877     if (file_end_offset <= buffer_end_offset)
4878     {
4879       log_descriptor.horizon+= LSN_ONE_FILE;
4880       log_descriptor.horizon= LSN_REPLACE_OFFSET(log_descriptor.horizon,
4881                                                  TRANSLOG_PAGE_SIZE);
4882       DBUG_PRINT("info", ("New file: %lu",
4883                           (ulong) LSN_FILE_NO(log_descriptor.horizon)));
4884       if (translog_create_new_file())
4885       {
4886         struct st_translog_buffer *ob= log_descriptor.bc.buffer;
4887         translog_buffer_unlock(ob);
4888         used_buffs_urgent_unlock(buffs);
4889         translog_buffer_lock(ob);
4890         DBUG_RETURN(1);
4891       }
4892     }
4893     else
4894     {
4895       DBUG_PRINT("info", ("The same file"));
4896       log_descriptor.horizon+= min_offset; /* offset increasing */
4897     }
4898     translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
4899     old_buffer->next_buffer_offset= new_buffer->offset;
4900     new_buffer->prev_buffer_offset= old_buffer->offset;
4901     translog_buffer_unlock(old_buffer);
4902     offset-= min_offset;
4903   }
4904   DBUG_PRINT("info", ("drop write_counter"));
4905   log_descriptor.bc.write_counter= 0;
4906   log_descriptor.bc.previous_offset= 0;
4907 end:
4908   log_descriptor.bc.ptr+= offset;
4909   log_descriptor.bc.buffer->size+= offset;
4910   translog_buffer_increase_writers(log_descriptor.bc.buffer);
4911   used_buffs_add(buffs, log_descriptor.bc.buffer);
4912   log_descriptor.horizon+= offset; /* offset increasing */
4913   log_descriptor.bc.current_page_fill= last_page_offset;
4914   DBUG_PRINT("info", ("NewP buffer #%u: %p  chaser: %d  Size: %lu (%lu)  "
4915                       "offset: %u  last page: %u",
4916                       (uint) log_descriptor.bc.buffer->buffer_no,
4917                       log_descriptor.bc.buffer,
4918                       log_descriptor.bc.chaser,
4919                       (ulong) log_descriptor.bc.buffer->size,
4920                       (ulong) (log_descriptor.bc.ptr -
4921                                log_descriptor.bc.buffer->
4922                                buffer), (uint) offset,
4923                       (uint) last_page_offset));
4924   DBUG_PRINT("info",
4925              ("pointer moved to: " LSN_FMT,
4926               LSN_IN_PARTS(log_descriptor.horizon)));
4927   translog_check_cursor(&log_descriptor.bc);
4928   log_descriptor.bc.protected= 0;
4929   DBUG_RETURN(0);
4930 }
4931 
4932 static void
used_buffs_add(TRUNSLOG_USED_BUFFERS * buffs,struct st_translog_buffer * buff)4933 used_buffs_add(TRUNSLOG_USED_BUFFERS *buffs,
4934                 struct st_translog_buffer *buff)
4935 {
4936   DBUG_ENTER("used_buffs_add");
4937   DBUG_PRINT("enter", ("ADD buffs: %p unlk %u (%p)  wrt_ptr: %u (%p)"
4938                        "  buff %p (%u)",
4939                        buffs,
4940                        buffs->wrt_ptr, buffs->buff[buffs->wrt_ptr],
4941                        buffs->unlck_ptr, buffs->buff[buffs->unlck_ptr],
4942                        buff, buff->buffer_no));
4943   DBUG_ASSERT(buffs->wrt_ptr < MAX_TRUNSLOG_USED_BUFFERS);
4944   buffs->buff[buffs->wrt_ptr++]= buff;
4945   DBUG_VOID_RETURN;
4946 }
4947 
4948 static void
used_buffs_register_unlock(TRUNSLOG_USED_BUFFERS * buffs,struct st_translog_buffer * buff)4949 used_buffs_register_unlock(TRUNSLOG_USED_BUFFERS *buffs,
4950                            struct st_translog_buffer *buff
4951                            __attribute__((unused)) )
4952 {
4953   DBUG_ENTER("used_buffs_register_unlock");
4954   DBUG_PRINT("enter", ("SUB buffs: %p unlk %u (%p)  wrt_ptr: %u (%p)"
4955                        "  buff %p (%u)",
4956                        buffs,
4957                        buffs->wrt_ptr, buffs->buff[buffs->wrt_ptr],
4958                        buffs->unlck_ptr, buffs->buff[buffs->unlck_ptr],
4959                        buff, buff->buffer_no));
4960   DBUG_ASSERT(buffs->buff[buffs->unlck_ptr] == buff);
4961   buffs->unlck_ptr++;
4962   DBUG_VOID_RETURN;
4963 }
used_buffs_urgent_unlock(TRUNSLOG_USED_BUFFERS * buffs)4964 static void used_buffs_urgent_unlock(TRUNSLOG_USED_BUFFERS *buffs)
4965 {
4966   uint i;
4967   DBUG_ENTER("used_buffs_urgent_unlock");
4968   translog_lock();
4969   translog_stop_writing();
4970   translog_unlock();
4971   for (i= buffs->unlck_ptr; i < buffs->wrt_ptr; i++)
4972   {
4973     struct st_translog_buffer *buf= buffs->buff[i];
4974     translog_buffer_lock(buf);
4975     translog_buffer_decrease_writers(buf);
4976     translog_buffer_unlock(buf);
4977     buffs->buff[i]= NULL;
4978   }
4979   used_buffs_init(buffs);
4980   DBUG_VOID_RETURN;
4981 }
4982 
4983 /*
4984   Get page rest
4985 
4986   SYNOPSIS
4987     translog_get_current_page_rest()
4988 
4989   NOTE loghandler should be locked
4990 
4991   RETURN
4992     number of bytes left on the current page
4993 */
4994 
translog_get_current_page_rest()4995 static uint translog_get_current_page_rest()
4996 {
4997   return (TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill);
4998 }
4999 
5000 
5001 /*
5002   Get buffer rest in full pages
5003 
5004   SYNOPSIS
5005      translog_get_current_buffer_rest()
5006 
5007   NOTE loghandler should be locked
5008 
5009   RETURN
5010     number of full pages left on the current buffer
5011 */
5012 
translog_get_current_buffer_rest()5013 static uint translog_get_current_buffer_rest()
5014 {
5015   return (uint)((log_descriptor.bc.buffer->buffer + TRANSLOG_WRITE_BUFFER -
5016            log_descriptor.bc.ptr) /
5017           TRANSLOG_PAGE_SIZE);
5018 }
5019 
5020 /*
5021   Calculate possible group size without first (current) page
5022 
5023   SYNOPSIS
5024     translog_get_current_group_size()
5025 
5026   NOTE loghandler should be locked
5027 
5028   RETURN
5029     group size without first (current) page
5030 */
5031 
translog_get_current_group_size()5032 static translog_size_t translog_get_current_group_size()
5033 {
5034   /* buffer rest in full pages */
5035   translog_size_t buffer_rest= translog_get_current_buffer_rest();
5036   DBUG_ENTER("translog_get_current_group_size");
5037   DBUG_PRINT("info", ("buffer_rest in pages: %u", buffer_rest));
5038 
5039   buffer_rest*= log_descriptor.page_capacity_chunk_2;
5040   /* in case of only half of buffer free we can write this and next buffer */
5041   if (buffer_rest < log_descriptor.half_buffer_capacity_chunk_2)
5042   {
5043     DBUG_PRINT("info", ("buffer_rest: %lu -> add %lu",
5044                         (ulong) buffer_rest,
5045                         (ulong) log_descriptor.buffer_capacity_chunk_2));
5046     buffer_rest+= log_descriptor.buffer_capacity_chunk_2;
5047   }
5048 
5049   DBUG_PRINT("info", ("buffer_rest: %lu", (ulong) buffer_rest));
5050 
5051   DBUG_RETURN(buffer_rest);
5052 }
5053 
5054 
set_lsn(LSN * lsn,LSN value)5055 static inline void set_lsn(LSN *lsn, LSN value)
5056 {
5057   DBUG_ENTER("set_lsn");
5058   translog_lock_assert_owner();
5059   *lsn= value;
5060   /* we generate LSN so something is not flushed in log */
5061   log_descriptor.is_everything_flushed= 0;
5062   DBUG_PRINT("info", ("new LSN appeared: " LSN_FMT, LSN_IN_PARTS(value)));
5063   DBUG_VOID_RETURN;
5064 }
5065 
5066 
5067 /**
5068    @brief Write variable record in 1 group.
5069 
5070    @param  lsn             LSN of the record will be written here
5071    @param  type            the log record type
5072    @param  short_trid      Short transaction ID or 0 if it has no sense
5073    @param  parts           Descriptor of record source parts
5074    @param  buffer_to_flush Buffer which have to be flushed if it is not 0
5075    @param  header_length   Calculated header length of chunk type 0
5076    @param  trn             Transaction structure pointer for hooks by
5077                            record log type, for short_id
5078    @param  hook_arg        Argument which will be passed to pre-write and
5079                            in-write hooks of this record.
5080 
5081    @note
5082      We must have a translog_lock() when entering this function
5083      We must have buffer_to_flush locked (if not null)
5084 
5085    @return Operation status
5086      @retval 0      OK
5087      @retval 1      Error
5088 */
5089 
5090 static my_bool
translog_write_variable_record_1group(LSN * lsn,enum translog_record_type type,MARIA_HA * tbl_info,SHORT_TRANSACTION_ID short_trid,struct st_translog_parts * parts,struct st_translog_buffer * buffer_to_flush,uint16 header_length,TRN * trn,void * hook_arg)5091 translog_write_variable_record_1group(LSN *lsn,
5092                                       enum translog_record_type type,
5093                                       MARIA_HA *tbl_info,
5094                                       SHORT_TRANSACTION_ID short_trid,
5095                                       struct st_translog_parts *parts,
5096                                       struct st_translog_buffer
5097                                       *buffer_to_flush, uint16 header_length,
5098                                       TRN *trn, void *hook_arg)
5099 {
5100   TRANSLOG_ADDRESS horizon;
5101   struct st_buffer_cursor cursor;
5102   int rc= 0;
5103   uint i;
5104   translog_size_t record_rest, full_pages, first_page;
5105   uint additional_chunk3_page= 0;
5106   uchar chunk0_header[1 + 2 + 5 + 2];
5107   DBUG_ENTER("translog_write_variable_record_1group");
5108   translog_lock_assert_owner();
5109   if (buffer_to_flush)
5110     translog_buffer_lock_assert_owner(buffer_to_flush);
5111 
5112   set_lsn(lsn, horizon= log_descriptor.horizon);
5113   if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
5114                                  *lsn, TRUE) ||
5115       (log_record_type_descriptor[type].inwrite_hook &&
5116        (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
5117                                                         lsn, hook_arg)))
5118   {
5119     translog_unlock();
5120     if (buffer_to_flush != NULL)
5121     {
5122       translog_buffer_flush(buffer_to_flush);
5123       translog_buffer_unlock(buffer_to_flush);
5124     }
5125     DBUG_RETURN(1);
5126   }
5127   cursor= log_descriptor.bc;
5128   cursor.chaser= 1;
5129 
5130   /* Advance pointer to be able unlock the loghandler */
5131   first_page= translog_get_current_page_rest();
5132   record_rest= parts->record_length - (first_page - header_length);
5133   full_pages= record_rest / log_descriptor.page_capacity_chunk_2;
5134   record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
5135 
5136   if (record_rest + 1 == log_descriptor.page_capacity_chunk_2)
5137   {
5138     DBUG_PRINT("info", ("2 chunks type 3 is needed"));
5139     /* We will write 2 chunks type 3 at the end of this group */
5140     additional_chunk3_page= 1;
5141     record_rest= 1;
5142   }
5143 
5144   DBUG_PRINT("info", ("first_page: %u (%u)  full_pages: %u (%lu)  "
5145                       "additional: %u (%u)  rest %u = %u",
5146                       first_page, first_page - header_length,
5147                       full_pages,
5148                       (ulong) full_pages *
5149                       log_descriptor.page_capacity_chunk_2,
5150                       additional_chunk3_page,
5151                       additional_chunk3_page *
5152                       (log_descriptor.page_capacity_chunk_2 - 1),
5153                       record_rest, parts->record_length));
5154   /* record_rest + 3 is chunk type 3 overhead + record_rest */
5155   rc= translog_advance_pointer((int)(full_pages + additional_chunk3_page),
5156                                (record_rest ? record_rest + 3 : 0),
5157                                &cursor.buffs);
5158   log_descriptor.bc.buffer->last_lsn= *lsn;
5159   DBUG_PRINT("info", ("last_lsn set to " LSN_FMT "  buffer: %p",
5160                       LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn),
5161                       log_descriptor.bc.buffer));
5162 
5163   translog_unlock();
5164 
5165   /*
5166      Check if we switched buffer and need process it (current buffer is
5167      unlocked already => we will not delay other threads
5168   */
5169   if (buffer_to_flush != NULL)
5170   {
5171     if (!rc)
5172       rc= translog_buffer_flush(buffer_to_flush);
5173     translog_buffer_unlock(buffer_to_flush);
5174   }
5175   if (rc)
5176   {
5177     //translog_advance_pointer decreased writers so it is OK
5178     DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr);
5179     DBUG_RETURN(1);
5180   }
5181 
5182   translog_write_variable_record_1group_header(parts, type, short_trid,
5183                                                header_length, chunk0_header);
5184 
5185   /* fill the pages */
5186   translog_write_parts_on_page(&horizon, &cursor, first_page, parts);
5187 
5188   DBUG_PRINT("info", ("absolute horizon: " LSN_FMT "  local: " LSN_FMT,
5189                       LSN_IN_PARTS(log_descriptor.horizon),
5190                       LSN_IN_PARTS(horizon)));
5191 
5192   for (i= 0; i < full_pages; i++)
5193   {
5194     if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
5195       goto error;
5196 
5197     DBUG_PRINT("info", ("absolute horizon: " LSN_FMT "  local: " LSN_FMT,
5198                         LSN_IN_PARTS(log_descriptor.horizon),
5199                         LSN_IN_PARTS(horizon)));
5200   }
5201 
5202   if (additional_chunk3_page)
5203   {
5204     if (translog_write_variable_record_chunk3_page(parts,
5205                                                    log_descriptor.
5206                                                    page_capacity_chunk_2 - 2,
5207                                                    &horizon, &cursor))
5208       goto error;
5209     DBUG_PRINT("info", ("absolute horizon: " LSN_FMT "  local: " LSN_FMT,
5210                         LSN_IN_PARTS(log_descriptor.horizon),
5211                         LSN_IN_PARTS(horizon)));
5212     DBUG_ASSERT(cursor.current_page_fill == TRANSLOG_PAGE_SIZE);
5213   }
5214 
5215   if (translog_write_variable_record_chunk3_page(parts,
5216                                                  record_rest,
5217                                                  &horizon, &cursor))
5218     goto error;
5219   DBUG_PRINT("info", ("absolute horizon: " LSN_FMT "  local: " LSN_FMT,
5220                       (uint) LSN_FILE_NO(log_descriptor.horizon),
5221                       (uint) LSN_OFFSET(log_descriptor.horizon),
5222                       (uint) LSN_FILE_NO(horizon),
5223                       (uint) LSN_OFFSET(horizon)));
5224 
5225   translog_buffer_lock(cursor.buffer);
5226   translog_buffer_decrease_writers(cursor.buffer);
5227   used_buffs_register_unlock(&cursor.buffs, cursor.buffer);
5228   translog_buffer_unlock(cursor.buffer);
5229   DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr);
5230   DBUG_RETURN(0);
5231 error:
5232   used_buffs_urgent_unlock(&cursor.buffs);
5233   DBUG_RETURN(1);
5234 }
5235 
5236 
5237 /**
5238    @brief Write variable record in 1 chunk.
5239 
5240    @param  lsn             LSN of the record will be written here
5241    @param  type            the log record type
5242    @param  short_trid      Short transaction ID or 0 if it has no sense
5243    @param  parts           Descriptor of record source parts
5244    @param  buffer_to_flush Buffer which have to be flushed if it is not 0
5245    @param  header_length   Calculated header length of chunk type 0
5246    @param  trn             Transaction structure pointer for hooks by
5247                            record log type, for short_id
5248    @param  hook_arg        Argument which will be passed to pre-write and
5249                            in-write hooks of this record.
5250 
5251    @note
5252      We must have a translog_lock() when entering this function
5253      We must have buffer_to_flush locked (if not null)
5254 
5255    @return Operation status
5256      @retval 0      OK
5257      @retval 1      Error
5258 */
5259 
5260 static my_bool
translog_write_variable_record_1chunk(LSN * lsn,enum translog_record_type type,MARIA_HA * tbl_info,SHORT_TRANSACTION_ID short_trid,struct st_translog_parts * parts,struct st_translog_buffer * buffer_to_flush,uint16 header_length,TRN * trn,void * hook_arg)5261 translog_write_variable_record_1chunk(LSN *lsn,
5262                                       enum translog_record_type type,
5263                                       MARIA_HA *tbl_info,
5264                                       SHORT_TRANSACTION_ID short_trid,
5265                                       struct st_translog_parts *parts,
5266                                       struct st_translog_buffer
5267                                       *buffer_to_flush, uint16 header_length,
5268                                       TRN *trn, void *hook_arg)
5269 {
5270   int rc;
5271   uchar chunk0_header[1 + 2 + 5 + 2];
5272   DBUG_ENTER("translog_write_variable_record_1chunk");
5273   translog_lock_assert_owner();
5274   if (buffer_to_flush)
5275     translog_buffer_lock_assert_owner(buffer_to_flush);
5276 
5277   translog_write_variable_record_1group_header(parts, type, short_trid,
5278                                                header_length, chunk0_header);
5279   set_lsn(lsn, log_descriptor.horizon);
5280   if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
5281                                  *lsn, TRUE) ||
5282       (log_record_type_descriptor[type].inwrite_hook &&
5283        (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
5284                                                         lsn, hook_arg)))
5285   {
5286     translog_unlock();
5287     rc= 1;
5288     goto err;
5289   }
5290 
5291   rc= translog_write_parts_on_page(&log_descriptor.horizon,
5292                                    &log_descriptor.bc,
5293                                    parts->total_record_length, parts);
5294   log_descriptor.bc.buffer->last_lsn= *lsn;
5295   DBUG_PRINT("info", ("last_lsn set to " LSN_FMT "  buffer: %p",
5296                       LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn),
5297                       log_descriptor.bc.buffer));
5298   translog_unlock();
5299 
5300   /*
5301      check if we switched buffer and need process it (current buffer is
5302      unlocked already => we will not delay other threads
5303   */
5304 err:
5305   if (buffer_to_flush != NULL)
5306   {
5307     if (!rc)
5308       rc= translog_buffer_flush(buffer_to_flush);
5309     translog_buffer_unlock(buffer_to_flush);
5310   }
5311 
5312   DBUG_RETURN(rc);
5313 }
5314 
5315 
5316 /*
5317   @brief Calculates and write LSN difference (compressed LSN).
5318 
5319   @param base_lsn        LSN from which we calculate difference
5320   @param lsn             LSN for codding
5321   @param dst             Result will be written to dst[-pack_length] .. dst[-1]
5322 
5323   @note To store an LSN in a compact way we will use the following compression:
5324     If a log record has LSN1, and it contains the LSN2 as a back reference,
5325     Instead of LSN2 we write LSN1-LSN2, encoded as:
5326      two bits     the number N (see below)
5327      14 bits
5328      N bytes
5329      That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
5330      is stored in the first two bits.
5331 
5332   @note function made to write the result in backward direction with no
5333   special sense or tricks both directions are equal in complicity
5334 
5335   @retval #    pointer on coded LSN
5336 */
5337 
translog_put_LSN_diff(LSN base_lsn,LSN lsn,uchar * dst)5338 static uchar *translog_put_LSN_diff(LSN base_lsn, LSN lsn, uchar *dst)
5339 {
5340   uint64 diff;
5341   DBUG_ENTER("translog_put_LSN_diff");
5342   DBUG_PRINT("enter", ("Base: " LSN_FMT "  val: " LSN_FMT "  dst: %p",
5343                        LSN_IN_PARTS(base_lsn), LSN_IN_PARTS(lsn),
5344                        dst));
5345   DBUG_ASSERT(base_lsn > lsn);
5346   diff= base_lsn - lsn;
5347   DBUG_PRINT("info", ("Diff: 0x%llx", (ulonglong) diff));
5348   if (diff <= 0x3FFF)
5349   {
5350     dst-= 2;
5351     /*
5352       Note we store this high uchar first to ensure that first uchar has
5353       0 in the 3 upper bits.
5354     */
5355     dst[0]= (uchar)(diff >> 8);
5356     dst[1]= (uchar)(diff & 0xFF);
5357   }
5358   else if (diff <= 0x3FFFFFL)
5359   {
5360     dst-= 3;
5361     dst[0]= (uchar)(0x40 | (diff >> 16));
5362     int2store(dst + 1, diff & 0xFFFF);
5363   }
5364   else if (diff <= 0x3FFFFFFFL)
5365   {
5366     dst-= 4;
5367     dst[0]= (uchar)(0x80 | (diff >> 24));
5368     int3store(dst + 1, diff & 0xFFFFFFL);
5369   }
5370   else if (diff <= 0x3FFFFFFFFFLL)
5371 
5372   {
5373     dst-= 5;
5374     dst[0]= (uchar)(0xC0 | (diff >> 32));
5375     int4store(dst + 1, diff & 0xFFFFFFFFL);
5376   }
5377   else
5378   {
5379     /*
5380       It is full LSN after special 1 diff (which is impossible
5381       in real life)
5382     */
5383     dst-= 2 + LSN_STORE_SIZE;
5384     dst[0]= 0;
5385     dst[1]= 1;
5386     lsn_store(dst + 2, lsn);
5387   }
5388   DBUG_PRINT("info", ("new dst: %p", dst));
5389   DBUG_RETURN(dst);
5390 }
5391 
5392 
5393 /*
5394   Get LSN from LSN-difference (compressed LSN)
5395 
5396   SYNOPSIS
5397     translog_get_LSN_from_diff()
5398     base_lsn             LSN from which we calculate difference
5399     src                  pointer to coded lsn
5400     dst                  pointer to buffer where to write 7byte LSN
5401 
5402   NOTE:
5403     To store an LSN in a compact way we will use the following compression:
5404 
5405     If a log record has LSN1, and it contains the lSN2 as a back reference,
5406     Instead of LSN2 we write LSN1-LSN2, encoded as:
5407 
5408      two bits     the number N (see below)
5409      14 bits
5410      N bytes
5411 
5412     That is, LSN is encoded in 2..5 bytes, and the number of bytes minus 2
5413     is stored in the first two bits.
5414 
5415   RETURN
5416     pointer to buffer after decoded LSN
5417 */
5418 
translog_get_LSN_from_diff(LSN base_lsn,uchar * src,uchar * dst)5419 static uchar *translog_get_LSN_from_diff(LSN base_lsn, uchar *src, uchar *dst)
5420 {
5421   LSN lsn;
5422   uint32 diff;
5423   uint32 first_byte;
5424   uint32 file_no, rec_offset;
5425   uint8 code;
5426   DBUG_ENTER("translog_get_LSN_from_diff");
5427   DBUG_PRINT("enter", ("Base: " LSN_FMT "  src:%p  dst %p",
5428                        LSN_IN_PARTS(base_lsn), src, dst));
5429   first_byte= *((uint8*) src);
5430   code= first_byte >> 6; /* Length is in 2 most significant bits */
5431   first_byte&= 0x3F;
5432   src++;                                        /* Skip length + encode */
5433   file_no= LSN_FILE_NO(base_lsn);               /* Assume relative */
5434   DBUG_PRINT("info", ("code: %u  first byte: %lu",
5435                       (uint) code, (ulong) first_byte));
5436   switch (code) {
5437   case 0:
5438     if (first_byte == 0 && *((uint8*)src) == 1)
5439     {
5440       /*
5441         It is full LSN after special 1 diff (which is impossible
5442         in real life)
5443       */
5444       memcpy(dst, src + 1, LSN_STORE_SIZE);
5445       DBUG_PRINT("info", ("Special case of full LSN, new src:%p",
5446                           src + 1 + LSN_STORE_SIZE));
5447       DBUG_RETURN(src + 1 + LSN_STORE_SIZE);
5448     }
5449     rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 8) | *((uint8*)src));
5450     break;
5451   case 1:
5452     diff= uint2korr(src);
5453     rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 16) | diff);
5454     break;
5455   case 2:
5456     diff= uint3korr(src);
5457     rec_offset= LSN_OFFSET(base_lsn) - ((first_byte << 24) | diff);
5458     break;
5459   case 3:
5460   {
5461     ulonglong base_offset= LSN_OFFSET(base_lsn);
5462     diff= uint4korr(src);
5463     if (diff > LSN_OFFSET(base_lsn))
5464     {
5465       /* take 1 from file offset */
5466       first_byte++;
5467       base_offset+= 0x100000000LL;
5468     }
5469     file_no= LSN_FILE_NO(base_lsn) - first_byte;
5470     DBUG_ASSERT(base_offset - diff <= UINT_MAX);
5471     rec_offset= (uint32)(base_offset - diff);
5472     break;
5473   }
5474   default:
5475     DBUG_ASSERT(0);
5476     DBUG_RETURN(NULL);
5477   }
5478   lsn= MAKE_LSN(file_no, rec_offset);
5479   src+= code + 1;
5480   lsn_store(dst, lsn);
5481   DBUG_PRINT("info", ("new src:%p", src));
5482   DBUG_RETURN(src);
5483 }
5484 
5485 
5486 /**
5487   @brief Encodes relative LSNs listed in the parameters.
5488 
5489   @param parts           Parts list with encoded LSN(s)
5490   @param base_lsn        LSN which is base for encoding
5491   @param lsns            number of LSN(s) to encode
5492   @param compressed_LSNs buffer which can be used for storing compressed LSN(s)
5493 */
5494 
translog_relative_LSN_encode(struct st_translog_parts * parts,LSN base_lsn,uint lsns,uchar * compressed_LSNs)5495 static void  translog_relative_LSN_encode(struct st_translog_parts *parts,
5496                                           LSN base_lsn,
5497                                           uint lsns, uchar *compressed_LSNs)
5498 {
5499   LEX_CUSTRING *part;
5500   uint lsns_len= lsns * LSN_STORE_SIZE;
5501   uchar buffer_src[MAX_NUMBER_OF_LSNS_PER_RECORD * LSN_STORE_SIZE];
5502   uchar *buffer= buffer_src;
5503   const uchar *cbuffer;
5504 
5505   DBUG_ENTER("translog_relative_LSN_encode");
5506 
5507   DBUG_ASSERT(parts->current != 0);
5508   part= parts->parts + parts->current;
5509 
5510   /* collect all LSN(s) in one chunk if it (they) is (are) divided */
5511   if (part->length < lsns_len)
5512   {
5513     size_t copied= part->length;
5514     LEX_CUSTRING *next_part;
5515     DBUG_PRINT("info", ("Using buffer:%p", compressed_LSNs));
5516     memcpy(buffer, part->str, part->length);
5517     next_part= parts->parts + parts->current + 1;
5518     do
5519     {
5520       DBUG_ASSERT(next_part < parts->parts + parts->elements);
5521       if ((next_part->length + copied) < lsns_len)
5522       {
5523         memcpy(buffer + copied, next_part->str,
5524                next_part->length);
5525         copied+= next_part->length;
5526         next_part->length= 0; next_part->str= 0;
5527         /* delete_dynamic_element(&parts->parts, parts->current + 1); */
5528         next_part++;
5529         parts->current++;
5530         part= parts->parts + parts->current;
5531       }
5532       else
5533       {
5534         size_t len= lsns_len - copied;
5535         memcpy(buffer + copied, next_part->str, len);
5536         copied= lsns_len;
5537         next_part->str+= len;
5538         next_part->length-= len;
5539       }
5540     } while (copied < lsns_len);
5541     cbuffer= buffer;
5542   }
5543   else
5544   {
5545     cbuffer= part->str;
5546     part->str+= lsns_len;
5547     part->length-= lsns_len;
5548     parts->current--;
5549     part= parts->parts + parts->current;
5550   }
5551 
5552   {
5553     /* Compress */
5554     LSN ref;
5555     int economy;
5556     const uchar *src_ptr;
5557     uchar *dst_ptr= compressed_LSNs + (MAX_NUMBER_OF_LSNS_PER_RECORD *
5558                                       COMPRESSED_LSN_MAX_STORE_SIZE);
5559     /*
5560       We write the result in backward direction with no special sense or
5561       tricks both directions are equal in complicity
5562     */
5563     for (src_ptr= cbuffer + lsns_len - LSN_STORE_SIZE;
5564          src_ptr >= (const uchar*)cbuffer;
5565          src_ptr-= LSN_STORE_SIZE)
5566     {
5567       ref= lsn_korr(src_ptr);
5568       dst_ptr= translog_put_LSN_diff(base_lsn, ref, dst_ptr);
5569     }
5570     part->length= (size_t)((compressed_LSNs +
5571                           (MAX_NUMBER_OF_LSNS_PER_RECORD *
5572                            COMPRESSED_LSN_MAX_STORE_SIZE)) -
5573                          dst_ptr);
5574     economy= lsns_len - (uint)part->length;
5575     parts->record_length-= economy;
5576     DBUG_PRINT("info", ("new length of LSNs: %lu  economy: %d",
5577                         (ulong)part->length, economy));
5578     parts->total_record_length-= economy;
5579     part->str= dst_ptr;
5580   }
5581   DBUG_VOID_RETURN;
5582 }
5583 
5584 
5585 /**
5586    @brief Write multi-group variable-size record.
5587 
5588    @param  lsn             LSN of the record will be written here
5589    @param  type            the log record type
5590    @param  short_trid      Short transaction ID or 0 if it has no sense
5591    @param  parts           Descriptor of record source parts
5592    @param  buffer_to_flush Buffer which have to be flushed if it is not 0
5593    @param  header_length   Header length calculated for 1 group
5594    @param  buffer_rest     Beginning from which we plan to write in full pages
5595    @param  trn             Transaction structure pointer for hooks by
5596                            record log type, for short_id
5597    @param  hook_arg        Argument which will be passed to pre-write and
5598                            in-write hooks of this record.
5599 
5600    @note
5601      We must have a translog_lock() when entering this function
5602 
5603      We must have buffer_to_flush locked (if not null)
5604      buffer_to_flush should *NOT* be locked when calling this function.
5605      (This is note is here as this is different from most other
5606      translog_write...() functions which require the buffer to be locked)
5607 
5608    @return Operation status
5609      @retval 0      OK
5610      @retval 1      Error
5611 */
5612 
5613 static my_bool
translog_write_variable_record_mgroup(LSN * lsn,enum translog_record_type type,MARIA_HA * tbl_info,SHORT_TRANSACTION_ID short_trid,struct st_translog_parts * parts,struct st_translog_buffer * buffer_to_flush,uint16 header_length,translog_size_t buffer_rest,TRN * trn,void * hook_arg)5614 translog_write_variable_record_mgroup(LSN *lsn,
5615                                       enum translog_record_type type,
5616                                       MARIA_HA *tbl_info,
5617                                       SHORT_TRANSACTION_ID short_trid,
5618                                       struct st_translog_parts *parts,
5619                                       struct st_translog_buffer
5620                                       *buffer_to_flush,
5621                                       uint16 header_length,
5622                                       translog_size_t buffer_rest,
5623                                       TRN *trn, void *hook_arg)
5624 {
5625   TRANSLOG_ADDRESS horizon;
5626   struct st_buffer_cursor cursor;
5627   int rc= 0;
5628   uint i, chunk2_page, full_pages;
5629   uint curr_group= 0;
5630   translog_size_t record_rest, first_page, chunk3_pages, chunk0_pages= 1;
5631   translog_size_t done= 0;
5632   struct st_translog_group_descriptor group;
5633   DYNAMIC_ARRAY groups;
5634   uint16 chunk3_size;
5635   uint16 page_capacity= log_descriptor.page_capacity_chunk_2 + 1;
5636   uint16 last_page_capacity;
5637   my_bool new_page_before_chunk0= 1, first_chunk0= 1;
5638   uchar chunk0_header[1 + 2 + 5 + 2 + 2], group_desc[7 + 1];
5639   uchar chunk2_header[1];
5640   uint header_fixed_part= header_length + 2;
5641   uint groups_per_page= (page_capacity - header_fixed_part) / (7 + 1);
5642   uint file_of_the_first_group;
5643   int pages_to_skip;
5644   struct st_translog_buffer *buffer_of_last_lsn;
5645   my_bool external_buffer_to_flush= TRUE;
5646   DBUG_ENTER("translog_write_variable_record_mgroup");
5647   translog_lock_assert_owner();
5648 
5649   used_buffs_init(&cursor.buffs);
5650   chunk2_header[0]= TRANSLOG_CHUNK_NOHDR;
5651 
5652   if (my_init_dynamic_array(PSI_INSTRUMENT_ME, &groups,
5653                             sizeof(struct st_translog_group_descriptor),
5654                             10, 10, MYF(0)))
5655   {
5656     translog_unlock();
5657     if (buffer_to_flush != NULL)
5658     {
5659       translog_buffer_flush(buffer_to_flush);
5660       translog_buffer_unlock(buffer_to_flush);
5661     }
5662     DBUG_PRINT("error", ("init array failed"));
5663     DBUG_RETURN(1);
5664   }
5665 
5666   first_page= translog_get_current_page_rest();
5667   record_rest= parts->record_length - (first_page - 1);
5668   DBUG_PRINT("info", ("Record Rest: %lu", (ulong) record_rest));
5669 
5670   if (record_rest < buffer_rest)
5671   {
5672     /*
5673       The record (group 1 type) is larger than the free space on the page
5674       - we need to split it in two. But when we split it in two, the first
5675       part is big enough to hold all the data of the record (because the
5676       header of the first part of the split is smaller than the header of
5677       the record as a whole when it takes only one chunk)
5678     */
5679     DBUG_PRINT("info", ("too many free space because changing header"));
5680     buffer_rest-= log_descriptor.page_capacity_chunk_2;
5681     DBUG_ASSERT(record_rest >= buffer_rest);
5682   }
5683 
5684   file_of_the_first_group= LSN_FILE_NO(log_descriptor.horizon);
5685   translog_mark_file_unfinished(file_of_the_first_group);
5686   do
5687   {
5688     DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr);
5689     group.addr= horizon= log_descriptor.horizon;
5690     cursor= log_descriptor.bc;
5691     cursor.chaser= 1;
5692     if ((full_pages= buffer_rest / log_descriptor.page_capacity_chunk_2) > 255)
5693     {
5694       /* sizeof(uint8) == 256 is max number of chunk in multi-chunks group */
5695       full_pages= 255;
5696       buffer_rest= full_pages * log_descriptor.page_capacity_chunk_2;
5697     }
5698     /*
5699        group chunks =
5700        full pages + first page (which actually can be full, too).
5701        But here we assign number of chunks - 1
5702     */
5703     group.num= full_pages;
5704     if (insert_dynamic(&groups, (uchar*) &group))
5705     {
5706       DBUG_PRINT("error", ("insert into array failed"));
5707       goto err_unlock;
5708     }
5709 
5710     DBUG_PRINT("info", ("chunk: #%u  first_page: %u (%u)  "
5711                         "full_pages: %lu (%lu)  "
5712                         "Left %lu",
5713                         groups.elements,
5714                         first_page, first_page - 1,
5715                         (ulong) full_pages,
5716                         (ulong) (full_pages *
5717                                  log_descriptor.page_capacity_chunk_2),
5718                         (ulong)(parts->record_length - (first_page - 1 +
5719                                                         buffer_rest) -
5720                                 done)));
5721     rc= translog_advance_pointer((int)full_pages, 0, &cursor.buffs);
5722 
5723     translog_unlock();
5724 
5725     if (buffer_to_flush != NULL)
5726     {
5727       if (!external_buffer_to_flush)
5728         translog_buffer_decrease_writers(buffer_to_flush);
5729       if (!rc)
5730         rc= translog_buffer_flush(buffer_to_flush);
5731       translog_buffer_unlock(buffer_to_flush);
5732       buffer_to_flush= NULL;
5733     }
5734     external_buffer_to_flush= FALSE;
5735 
5736     if (rc)
5737     {
5738       DBUG_PRINT("error", ("flush of unlock buffer failed"));
5739       //translog_advance_pointer decreased writers so it is OK
5740       DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr);
5741       goto err;
5742     }
5743 
5744     translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
5745     translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
5746     DBUG_PRINT("info", ("absolute horizon: " LSN_FMT "  local: " LSN_FMT "  "
5747                         "Left  %lu",
5748                         LSN_IN_PARTS(log_descriptor.horizon),
5749                         LSN_IN_PARTS(horizon),
5750                         (ulong) (parts->record_length - (first_page - 1) -
5751                                  done)));
5752 
5753     for (i= 0; i < full_pages; i++)
5754     {
5755       if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
5756         goto err;
5757 
5758       DBUG_PRINT("info", ("absolute horizon: " LSN_FMT "  "
5759                           "local: " LSN_FMT "  "
5760                           "Left: %lu",
5761                           LSN_IN_PARTS(log_descriptor.horizon),
5762                           LSN_IN_PARTS(horizon),
5763                           (ulong) (parts->record_length - (first_page - 1) -
5764                                    i * log_descriptor.page_capacity_chunk_2 -
5765                                    done)));
5766     }
5767 
5768     done+= (first_page - 1 + buffer_rest);
5769 
5770     if (translog_chaser_page_next(&horizon, &cursor))
5771     {
5772       DBUG_PRINT("error", ("flush of unlock buffer failed"));
5773       goto err;
5774     }
5775     translog_buffer_lock(cursor.buffer);
5776     translog_buffer_decrease_writers(cursor.buffer);
5777     used_buffs_register_unlock(&cursor.buffs, cursor.buffer);
5778     translog_buffer_unlock(cursor.buffer);
5779 
5780     translog_lock();
5781 
5782     /* Check that we have place for chunk type 2 */
5783     first_page= translog_get_current_page_rest();
5784     if (first_page <= 1)
5785     {
5786       if (translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
5787                              &buffer_to_flush))
5788         goto err_unlock;
5789       first_page= translog_get_current_page_rest();
5790     }
5791     buffer_rest= translog_get_current_group_size();
5792 
5793     if (buffer_to_flush)
5794       used_buffs_register_unlock(&cursor.buffs,
5795                                 buffer_to_flush); // will be unlocked
5796 
5797   } while ((translog_size_t)(first_page + buffer_rest) <
5798            (translog_size_t)(parts->record_length - done));
5799 
5800   group.addr= horizon= log_descriptor.horizon;
5801   cursor= log_descriptor.bc;
5802   cursor.chaser= 1;
5803   group.num= 0;                       /* 0 because it does not matter */
5804   if (insert_dynamic(&groups, (uchar*) &group))
5805   {
5806     DBUG_PRINT("error", ("insert into array failed"));
5807     goto err_unlock;
5808   }
5809   record_rest= parts->record_length - done;
5810   DBUG_PRINT("info", ("Record rest: %lu", (ulong) record_rest));
5811   if (first_page > record_rest + 1)
5812   {
5813     /*
5814       We have not so much data to fill all first page
5815       (no speaking about full pages)
5816       so it will be:
5817       <chunk0 <data>>
5818       or
5819       <chunk0>...<chunk0><chunk0 <data>>
5820       or
5821       <chunk3 <data>><chunk0>...<chunk0><chunk0 <possible data of 1 byte>>
5822     */
5823     chunk2_page= full_pages= 0;
5824     last_page_capacity= first_page;
5825     pages_to_skip= -1;
5826   }
5827   else
5828   {
5829     /*
5830       We will have:
5831       <chunk2 <data>>...<chunk2 <data>><chunk0 <data>>
5832       or
5833       <chunk2 <data>>...<chunk2 <data>><chunk0>...<chunk0><chunk0 <data>>
5834       or
5835       <chunk3 <data>><chunk0>...<chunk0><chunk0 <possible data of 1 byte>>
5836     */
5837     chunk2_page= 1;
5838     record_rest-= (first_page - 1);
5839     pages_to_skip= full_pages=
5840       record_rest / log_descriptor.page_capacity_chunk_2;
5841     record_rest= (record_rest % log_descriptor.page_capacity_chunk_2);
5842     last_page_capacity= page_capacity;
5843   }
5844   chunk3_size= 0;
5845   chunk3_pages= 0;
5846   if (last_page_capacity > record_rest + 1 && record_rest != 0)
5847   {
5848     if (last_page_capacity >
5849         record_rest + header_fixed_part + groups.elements * (7 + 1))
5850     {
5851       /* 1 record of type 0 */
5852       chunk3_pages= 0;
5853     }
5854     else
5855     {
5856       pages_to_skip++;
5857       chunk3_pages= 1;
5858       if (record_rest + 2 == last_page_capacity)
5859       {
5860         chunk3_size= record_rest - 1;
5861         record_rest= 1;
5862       }
5863       else
5864       {
5865         chunk3_size= record_rest;
5866         record_rest= 0;
5867       }
5868     }
5869   }
5870   /*
5871      A first non-full page will hold type 0 chunk only if it fit in it with
5872      all its headers
5873   */
5874   while (page_capacity <
5875          record_rest + header_fixed_part +
5876          (groups.elements - groups_per_page * (chunk0_pages - 1)) * (7 + 1))
5877     chunk0_pages++;
5878   DBUG_PRINT("info", ("chunk0_pages: %u  groups %u  groups per full page: %u  "
5879                       "Group on last page: %u",
5880                       chunk0_pages, groups.elements,
5881                       groups_per_page,
5882                       (groups.elements -
5883                        ((page_capacity - header_fixed_part) / (7 + 1)) *
5884                        (chunk0_pages - 1))));
5885   DBUG_PRINT("info", ("first_page: %u  chunk2: %u  full_pages: %u (%lu)  "
5886                       "chunk3: %u (%u)  rest: %u",
5887                       first_page,
5888                       chunk2_page, full_pages,
5889                       (ulong) full_pages *
5890                       log_descriptor.page_capacity_chunk_2,
5891                       chunk3_pages, (uint) chunk3_size, (uint) record_rest));
5892 
5893   DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr);
5894   rc= translog_advance_pointer(pages_to_skip + (int)(chunk0_pages - 1),
5895                                record_rest + header_fixed_part +
5896                                (groups.elements -
5897                                 ((page_capacity -
5898                                   header_fixed_part) / (7 + 1)) *
5899                                 (chunk0_pages - 1)) * (7 + 1),
5900                                 &cursor.buffs);
5901   buffer_of_last_lsn= log_descriptor.bc.buffer;
5902   translog_unlock();
5903 
5904   if (buffer_to_flush != NULL)
5905   {
5906     DBUG_ASSERT(!external_buffer_to_flush);
5907     translog_buffer_decrease_writers(buffer_to_flush);
5908     if (!rc)
5909       rc= translog_buffer_flush(buffer_to_flush);
5910     translog_buffer_unlock(buffer_to_flush);
5911     buffer_to_flush= NULL;
5912   }
5913   if (rc)
5914   {
5915     DBUG_PRINT("error", ("flush of unlock buffer failed"));
5916     goto err;
5917   }
5918 
5919   if (rc)
5920     goto err;
5921 
5922   if (chunk2_page)
5923   {
5924     DBUG_PRINT("info", ("chunk 2 to finish first page"));
5925     translog_write_data_on_page(&horizon, &cursor, 1, chunk2_header);
5926     translog_write_parts_on_page(&horizon, &cursor, first_page - 1, parts);
5927     DBUG_PRINT("info", ("absolute horizon: " LSN_FMT "  local: " LSN_FMT " "
5928                         "Left: %lu",
5929                         LSN_IN_PARTS(log_descriptor.horizon),
5930                         LSN_IN_PARTS(horizon),
5931                         (ulong) (parts->record_length - (first_page - 1) -
5932                                  done)));
5933   }
5934   else if (chunk3_pages)
5935   {
5936     uchar chunk3_header[3];
5937     DBUG_PRINT("info", ("chunk 3"));
5938     DBUG_ASSERT(full_pages == 0);
5939     chunk3_pages= 0;
5940     chunk3_header[0]= TRANSLOG_CHUNK_LNGTH;
5941     int2store(chunk3_header + 1, chunk3_size);
5942     translog_write_data_on_page(&horizon, &cursor, 3, chunk3_header);
5943     translog_write_parts_on_page(&horizon, &cursor, chunk3_size, parts);
5944     DBUG_PRINT("info", ("absolute horizon: " LSN_FMT "  local: " LSN_FMT " "
5945                         "Left: %lu",
5946                         LSN_IN_PARTS(log_descriptor.horizon),
5947                         LSN_IN_PARTS(horizon),
5948                         (ulong) (parts->record_length - chunk3_size - done)));
5949   }
5950   else
5951   {
5952     DBUG_PRINT("info", ("no new_page_before_chunk0"));
5953     new_page_before_chunk0= 0;
5954   }
5955 
5956   for (i= 0; i < full_pages; i++)
5957   {
5958     DBUG_ASSERT(chunk2_page != 0);
5959     if (translog_write_variable_record_chunk2_page(parts, &horizon, &cursor))
5960       goto err;
5961 
5962     DBUG_PRINT("info", ("absolute horizon: " LSN_FMT "  local: " LSN_FMT " "
5963                         "Left: %lu",
5964                         LSN_IN_PARTS(log_descriptor.horizon),
5965                         LSN_IN_PARTS(horizon),
5966                         (ulong) (parts->record_length - (first_page - 1) -
5967                                  i * log_descriptor.page_capacity_chunk_2 -
5968                                  done)));
5969   }
5970 
5971   if (chunk3_pages &&
5972       translog_write_variable_record_chunk3_page(parts,
5973                                                  chunk3_size,
5974                                                  &horizon, &cursor))
5975     goto err;
5976   DBUG_PRINT("info", ("absolute horizon: " LSN_FMT "  local: " LSN_FMT,
5977                       LSN_IN_PARTS(log_descriptor.horizon),
5978                       LSN_IN_PARTS(horizon)));
5979 
5980   *chunk0_header= (uchar) (type | TRANSLOG_CHUNK_LSN);
5981   int2store(chunk0_header + 1, short_trid);
5982   translog_write_variable_record_1group_code_len(chunk0_header + 3,
5983                                                  parts->record_length,
5984                                                  header_length);
5985   do
5986   {
5987     int limit;
5988     if (new_page_before_chunk0 &&
5989         translog_chaser_page_next(&horizon, &cursor))
5990     {
5991       DBUG_PRINT("error", ("flush of unlock buffer failed"));
5992       goto err;
5993     }
5994     new_page_before_chunk0= 1;
5995 
5996     if (first_chunk0)
5997     {
5998       first_chunk0= 0;
5999 
6000       /*
6001         We can drop "log_descriptor.is_everything_flushed" earlier when have
6002         lock on loghandler and assign initial value of "horizon" variable or
6003         before unlocking loghandler (because we will increase writers
6004         counter on the buffer and every thread which wanted flush the buffer
6005         will wait till we finish with it). But IMHO better here take short
6006         lock and do not bother other threads with waiting.
6007       */
6008       translog_lock();
6009       set_lsn(lsn, horizon);
6010       buffer_of_last_lsn->last_lsn= *lsn;
6011       DBUG_PRINT("info", ("last_lsn set to " LSN_FMT "  buffer: %p",
6012                           LSN_IN_PARTS(buffer_of_last_lsn->last_lsn),
6013                           buffer_of_last_lsn));
6014       if (log_record_type_descriptor[type].inwrite_hook &&
6015           (*log_record_type_descriptor[type].inwrite_hook) (type, trn,
6016                                                             tbl_info,
6017                                                             lsn, hook_arg))
6018         goto err_unlock;
6019       translog_unlock();
6020     }
6021 
6022     /*
6023        A first non-full page will hold type 0 chunk only if it fit in it with
6024        all its headers => the fist page is full or number of groups less then
6025        possible number of full page.
6026     */
6027     limit= (groups_per_page < groups.elements - curr_group ?
6028             groups_per_page : groups.elements - curr_group);
6029     DBUG_PRINT("info", ("Groups: %u  curr: %u  limit: %u",
6030                         (uint) groups.elements, (uint) curr_group,
6031                         (uint) limit));
6032 
6033     if (chunk0_pages == 1)
6034     {
6035       DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) + %u = %u",
6036                           (uint) limit, (uint) record_rest,
6037                           (uint) (2 + limit * (7 + 1) + record_rest)));
6038       int2store(chunk0_header + header_length - 2,
6039                 2 + limit * (7 + 1) + record_rest);
6040     }
6041     else
6042     {
6043       DBUG_PRINT("info", ("chunk_len: 2 + %u * (7+1) = %u",
6044                           (uint) limit, (uint) (2 + limit * (7 + 1))));
6045       int2store(chunk0_header + header_length - 2, 2 + limit * (7 + 1));
6046     }
6047     int2store(chunk0_header + header_length, groups.elements - curr_group);
6048     translog_write_data_on_page(&horizon, &cursor, header_fixed_part,
6049                                 chunk0_header);
6050     for (i= curr_group; i < limit + curr_group; i++)
6051     {
6052       struct st_translog_group_descriptor *grp_ptr;
6053       grp_ptr= dynamic_element(&groups, i,
6054                                struct st_translog_group_descriptor *);
6055       lsn_store(group_desc, grp_ptr->addr);
6056       group_desc[7]= grp_ptr->num;
6057       translog_write_data_on_page(&horizon, &cursor, (7 + 1), group_desc);
6058     }
6059 
6060     if (chunk0_pages == 1 && record_rest != 0)
6061       translog_write_parts_on_page(&horizon, &cursor, record_rest, parts);
6062 
6063     chunk0_pages--;
6064     curr_group+= limit;
6065     /* put special type to indicate that it is not LSN chunk */
6066     *chunk0_header= (uchar) (TRANSLOG_CHUNK_LSN | TRANSLOG_CHUNK_0_CONT);
6067   } while (chunk0_pages != 0);
6068   translog_buffer_lock(cursor.buffer);
6069   translog_buffer_decrease_writers(cursor.buffer);
6070   used_buffs_register_unlock(&cursor.buffs, cursor.buffer);
6071   translog_buffer_unlock(cursor.buffer);
6072   rc= 0;
6073   DBUG_ASSERT(cursor.buffs.unlck_ptr == cursor.buffs.wrt_ptr);
6074 
6075   if (translog_set_lsn_for_files(file_of_the_first_group, LSN_FILE_NO(*lsn),
6076                                  *lsn, FALSE))
6077     goto err;
6078 
6079   translog_mark_file_finished(file_of_the_first_group);
6080 
6081   delete_dynamic(&groups);
6082   DBUG_RETURN(0);
6083 
6084 err_unlock:
6085 
6086   translog_unlock();
6087 
6088 err:
6089 
6090   if (cursor.buffs.unlck_ptr != cursor.buffs.wrt_ptr)
6091     used_buffs_urgent_unlock(&cursor.buffs);
6092 
6093   if (buffer_to_flush != NULL)
6094   {
6095     /* This is to prevent locking buffer forever in case of error */
6096     if (!external_buffer_to_flush)
6097       translog_buffer_decrease_writers(buffer_to_flush);
6098     if (!rc)
6099       rc= translog_buffer_flush(buffer_to_flush);
6100     translog_buffer_unlock(buffer_to_flush);
6101     buffer_to_flush= NULL;
6102   }
6103 
6104 
6105   translog_mark_file_finished(file_of_the_first_group);
6106 
6107   delete_dynamic(&groups);
6108   DBUG_RETURN(1);
6109 }
6110 
6111 
6112 /**
6113    @brief Write the variable length log record.
6114 
6115    @param  lsn             LSN of the record will be written here
6116    @param  type            the log record type
6117    @param  short_trid      Short transaction ID or 0 if it has no sense
6118    @param  parts           Descriptor of record source parts
6119    @param  trn             Transaction structure pointer for hooks by
6120                            record log type, for short_id
6121    @param  hook_arg        Argument which will be passed to pre-write and
6122                            in-write hooks of this record.
6123 
6124    @return Operation status
6125      @retval 0      OK
6126      @retval 1      Error
6127 */
6128 
translog_write_variable_record(LSN * lsn,enum translog_record_type type,MARIA_HA * tbl_info,SHORT_TRANSACTION_ID short_trid,struct st_translog_parts * parts,TRN * trn,void * hook_arg)6129 static my_bool translog_write_variable_record(LSN *lsn,
6130                                               enum translog_record_type type,
6131                                               MARIA_HA *tbl_info,
6132                                               SHORT_TRANSACTION_ID short_trid,
6133                                               struct st_translog_parts *parts,
6134                                               TRN *trn, void *hook_arg)
6135 {
6136   struct st_translog_buffer *buffer_to_flush= NULL;
6137   uint header_length1= 1 + 2 + 2 +
6138     translog_variable_record_length_bytes(parts->record_length);
6139   ulong buffer_rest;
6140   uint page_rest;
6141   /* Max number of such LSNs per record is 2 */
6142   uchar compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD *
6143     COMPRESSED_LSN_MAX_STORE_SIZE];
6144   my_bool res;
6145   DBUG_ENTER("translog_write_variable_record");
6146 
6147   translog_lock();
6148   DBUG_PRINT("info", ("horizon: " LSN_FMT,
6149                       LSN_IN_PARTS(log_descriptor.horizon)));
6150   page_rest= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
6151   DBUG_PRINT("info", ("header length: %u  page_rest: %u",
6152                       header_length1, page_rest));
6153 
6154   /*
6155     header and part which we should read have to fit in one chunk
6156     TODO: allow to divide readable header
6157   */
6158   if (page_rest <
6159       (header_length1 + log_record_type_descriptor[type].read_header_len))
6160   {
6161     DBUG_PRINT("info",
6162                ("Next page, size: %u  header: %u + %u",
6163                 log_descriptor.bc.current_page_fill,
6164                 header_length1,
6165                 log_record_type_descriptor[type].read_header_len));
6166     translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
6167                        &buffer_to_flush);
6168     /* Chunk 2 header is 1 byte, so full page capacity will be one uchar more */
6169     page_rest= log_descriptor.page_capacity_chunk_2 + 1;
6170     DBUG_PRINT("info", ("page_rest: %u", page_rest));
6171   }
6172 
6173   /*
6174      To minimize compressed size we will compress always relative to
6175      very first chunk address (log_descriptor.horizon for now)
6176   */
6177   if (log_record_type_descriptor[type].compressed_LSN > 0)
6178   {
6179     translog_relative_LSN_encode(parts, log_descriptor.horizon,
6180                                  log_record_type_descriptor[type].
6181                                  compressed_LSN, compressed_LSNs);
6182     /* recalculate header length after compression */
6183     header_length1= 1 + 2 + 2 +
6184       translog_variable_record_length_bytes(parts->record_length);
6185     DBUG_PRINT("info", ("after compressing LSN(s) header length: %u  "
6186                         "record length: %lu",
6187                         header_length1, (ulong)parts->record_length));
6188   }
6189 
6190   /* TODO: check space on current page for header + few bytes */
6191   if (page_rest >= parts->record_length + header_length1)
6192   {
6193     /* following function makes translog_unlock(); */
6194     res= translog_write_variable_record_1chunk(lsn, type, tbl_info,
6195                                                short_trid,
6196                                                parts, buffer_to_flush,
6197                                                header_length1, trn, hook_arg);
6198     DBUG_RETURN(res);
6199   }
6200 
6201   buffer_rest= translog_get_current_group_size();
6202 
6203   if (buffer_rest >= parts->record_length + header_length1 - page_rest)
6204   {
6205     /* following function makes translog_unlock(); */
6206     res= translog_write_variable_record_1group(lsn, type, tbl_info,
6207                                                short_trid,
6208                                                parts, buffer_to_flush,
6209                                                header_length1, trn, hook_arg);
6210     DBUG_RETURN(res);
6211   }
6212   /* following function makes translog_unlock(); */
6213   res= translog_write_variable_record_mgroup(lsn, type, tbl_info,
6214                                              short_trid,
6215                                              parts, buffer_to_flush,
6216                                              header_length1,
6217                                              buffer_rest, trn, hook_arg);
6218   DBUG_RETURN(res);
6219 }
6220 
6221 
6222 /**
6223    @brief Write the fixed and pseudo-fixed log record.
6224 
6225    @param  lsn             LSN of the record will be written here
6226    @param  type            the log record type
6227    @param  short_trid      Short transaction ID or 0 if it has no sense
6228    @param  parts           Descriptor of record source parts
6229    @param  trn             Transaction structure pointer for hooks by
6230                            record log type, for short_id
6231    @param  hook_arg        Argument which will be passed to pre-write and
6232                            in-write hooks of this record.
6233 
6234    @return Operation status
6235      @retval 0      OK
6236      @retval 1      Error
6237 */
6238 
translog_write_fixed_record(LSN * lsn,enum translog_record_type type,MARIA_HA * tbl_info,SHORT_TRANSACTION_ID short_trid,struct st_translog_parts * parts,TRN * trn,void * hook_arg)6239 static my_bool translog_write_fixed_record(LSN *lsn,
6240                                            enum translog_record_type type,
6241                                            MARIA_HA *tbl_info,
6242                                            SHORT_TRANSACTION_ID short_trid,
6243                                            struct st_translog_parts *parts,
6244                                            TRN *trn, void *hook_arg)
6245 {
6246   struct st_translog_buffer *buffer_to_flush= NULL;
6247   uchar chunk1_header[1 + 2];
6248   /* Max number of such LSNs per record is 2 */
6249   uchar compressed_LSNs[MAX_NUMBER_OF_LSNS_PER_RECORD *
6250     COMPRESSED_LSN_MAX_STORE_SIZE];
6251   LEX_CUSTRING *part;
6252   int rc= 1;
6253   DBUG_ENTER("translog_write_fixed_record");
6254   DBUG_ASSERT((log_record_type_descriptor[type].rclass ==
6255                LOGRECTYPE_FIXEDLENGTH &&
6256                parts->record_length ==
6257                log_record_type_descriptor[type].fixed_length) ||
6258               (log_record_type_descriptor[type].rclass ==
6259                LOGRECTYPE_PSEUDOFIXEDLENGTH &&
6260                parts->record_length ==
6261                log_record_type_descriptor[type].fixed_length));
6262 
6263   translog_lock();
6264   DBUG_PRINT("info", ("horizon: " LSN_FMT,
6265                       LSN_IN_PARTS(log_descriptor.horizon)));
6266 
6267   DBUG_ASSERT(log_descriptor.bc.current_page_fill <= TRANSLOG_PAGE_SIZE);
6268   DBUG_PRINT("info",
6269              ("Page size: %u  record: %u  next cond: %d",
6270               log_descriptor.bc.current_page_fill,
6271               (parts->record_length +
6272                log_record_type_descriptor[type].compressed_LSN * 2 + 3),
6273               ((((uint) log_descriptor.bc.current_page_fill) +
6274                 (parts->record_length +
6275                  log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
6276                TRANSLOG_PAGE_SIZE)));
6277   /*
6278     check that there is enough place on current page.
6279     NOTE: compressing may increase page LSN size on two bytes for every LSN
6280   */
6281   if ((((uint) log_descriptor.bc.current_page_fill) +
6282        (parts->record_length +
6283         log_record_type_descriptor[type].compressed_LSN * 2 + 3)) >
6284       TRANSLOG_PAGE_SIZE)
6285   {
6286     DBUG_PRINT("info", ("Next page"));
6287     if (translog_page_next(&log_descriptor.horizon, &log_descriptor.bc,
6288                            &buffer_to_flush))
6289       goto err;                                 /* rc == 1 */
6290     if (buffer_to_flush)
6291       translog_buffer_lock_assert_owner(buffer_to_flush);
6292   }
6293 
6294   set_lsn(lsn, log_descriptor.horizon);
6295   if (translog_set_lsn_for_files(LSN_FILE_NO(*lsn), LSN_FILE_NO(*lsn),
6296                              *lsn, TRUE) ||
6297       (log_record_type_descriptor[type].inwrite_hook &&
6298        (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info,
6299                                                         lsn, hook_arg)))
6300     goto err;
6301 
6302   /* compress LSNs */
6303   if (log_record_type_descriptor[type].rclass ==
6304       LOGRECTYPE_PSEUDOFIXEDLENGTH)
6305   {
6306     DBUG_ASSERT(log_record_type_descriptor[type].compressed_LSN > 0);
6307     translog_relative_LSN_encode(parts, *lsn,
6308                                  log_record_type_descriptor[type].
6309                                  compressed_LSN, compressed_LSNs);
6310   }
6311 
6312   /*
6313     Write the whole record at once (we know that there is enough place on
6314     the destination page)
6315   */
6316   DBUG_ASSERT(parts->current != 0);       /* first part is left for header */
6317   part= parts->parts + (--parts->current);
6318   parts->total_record_length+= (translog_size_t) (part->length= 1 + 2);
6319   part->str= chunk1_header;
6320   *chunk1_header= (uchar) (type | TRANSLOG_CHUNK_FIXED);
6321   int2store(chunk1_header + 1, short_trid);
6322 
6323   rc= translog_write_parts_on_page(&log_descriptor.horizon,
6324                                    &log_descriptor.bc,
6325                                    parts->total_record_length, parts);
6326 
6327   log_descriptor.bc.buffer->last_lsn= *lsn;
6328   DBUG_PRINT("info", ("last_lsn set to " LSN_FMT "  buffer: %p",
6329                       LSN_IN_PARTS(log_descriptor.bc.buffer->last_lsn),
6330                       log_descriptor.bc.buffer));
6331 
6332 err:
6333   translog_unlock();
6334 
6335   /*
6336     check if we switched buffer and need process it (current buffer is
6337     unlocked already => we will not delay other threads
6338   */
6339   if (buffer_to_flush != NULL)
6340   {
6341     if (!rc)
6342       rc= translog_buffer_flush(buffer_to_flush);
6343     translog_buffer_unlock(buffer_to_flush);
6344   }
6345 
6346   DBUG_RETURN(rc);
6347 }
6348 
6349 
6350 /**
6351    @brief Writes the log record
6352 
6353    If share has no 2-byte-id yet, gives an id to the share and logs
6354    LOGREC_FILE_ID. If transaction has not logged LOGREC_LONG_TRANSACTION_ID
6355    yet, logs it.
6356 
6357    @param  lsn             LSN of the record will be written here
6358    @param  type            the log record type
6359    @param  trn             Transaction structure pointer for hooks by
6360                            record log type, for short_id
6361    @param  tbl_info        MARIA_HA of table or NULL
6362    @param  rec_len         record length or 0 (count it)
6363    @param  part_no         number of parts or 0 (count it)
6364    @param  parts_data      zero ended (in case of number of parts is 0)
6365                            array of LEX_STRINGs (parts), first
6366                            TRANSLOG_INTERNAL_PARTS positions in the log
6367                            should be unused (need for loghandler)
6368    @param  store_share_id  if tbl_info!=NULL then share's id will
6369                            automatically be stored in the two first bytes
6370                            pointed (so pointer is assumed to be !=NULL)
6371    @param  hook_arg        argument which will be passed to pre-write and
6372                            in-write hooks of this record.
6373 
6374    @return Operation status
6375      @retval 0      OK
6376      @retval 1      Error
6377 */
6378 
translog_write_record(LSN * lsn,enum translog_record_type type,TRN * trn,MARIA_HA * tbl_info,translog_size_t rec_len,uint part_no,LEX_CUSTRING * parts_data,uchar * store_share_id,void * hook_arg)6379 my_bool translog_write_record(LSN *lsn,
6380                               enum translog_record_type type,
6381                               TRN *trn, MARIA_HA *tbl_info,
6382                               translog_size_t rec_len,
6383                               uint part_no,
6384                               LEX_CUSTRING *parts_data,
6385                               uchar *store_share_id,
6386                               void *hook_arg)
6387 {
6388   struct st_translog_parts parts;
6389   LEX_CUSTRING *part;
6390   int rc;
6391   uint short_trid= trn->short_id;
6392   DBUG_ENTER("translog_write_record");
6393   DBUG_PRINT("enter", ("type: %u (%s)  ShortTrID: %u  rec_len: %lu",
6394                        (uint) type, log_record_type_descriptor[type].name,
6395                        (uint) short_trid, (ulong) rec_len));
6396   DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6397               translog_status == TRANSLOG_READONLY);
6398   DBUG_ASSERT(type != 0);
6399   DBUG_SLOW_ASSERT((uint)type <= max_allowed_translog_type);
6400   if (unlikely(translog_status != TRANSLOG_OK))
6401   {
6402     DBUG_PRINT("error", ("Transaction log is write protected"));
6403     DBUG_RETURN(1);
6404   }
6405 
6406   if (tbl_info && type != LOGREC_FILE_ID)
6407   {
6408     MARIA_SHARE *share= tbl_info->s;
6409     DBUG_ASSERT(share->now_transactional);
6410     if (unlikely(share->id == 0))
6411     {
6412       /*
6413         First log write for this MARIA_SHARE; give it a short id.
6414         When the lock manager is enabled and needs a short id, it should be
6415         assigned in the lock manager (because row locks will be taken before
6416         log records are written; for example SELECT FOR UPDATE takes locks but
6417         writes no log record.
6418       */
6419       if (unlikely(translog_assign_id_to_share(tbl_info, trn)))
6420         DBUG_RETURN(1);
6421     }
6422     fileid_store(store_share_id, share->id);
6423   }
6424   if (unlikely(!(trn->first_undo_lsn & TRANSACTION_LOGGED_LONG_ID)))
6425   {
6426     LSN dummy_lsn;
6427     LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
6428     uchar log_data[6];
6429     DBUG_ASSERT(trn->undo_lsn == LSN_IMPOSSIBLE);
6430     int6store(log_data, trn->trid);
6431     log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    log_data;
6432     log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
6433     trn->first_undo_lsn|= TRANSACTION_LOGGED_LONG_ID; /* no recursion */
6434     if (unlikely(translog_write_record(&dummy_lsn, LOGREC_LONG_TRANSACTION_ID,
6435                                        trn, NULL, sizeof(log_data),
6436                                        sizeof(log_array)/sizeof(log_array[0]),
6437                                        log_array, NULL, NULL)))
6438       DBUG_RETURN(1);
6439   }
6440 
6441   parts.parts= parts_data;
6442 
6443   /* count parts if they are not counted by upper level */
6444   if (part_no == 0)
6445   {
6446     for (part_no= TRANSLOG_INTERNAL_PARTS;
6447          parts_data[part_no].length != 0;
6448          part_no++);
6449   }
6450   parts.elements= part_no;
6451   parts.current= TRANSLOG_INTERNAL_PARTS;
6452 
6453   /* clear TRANSLOG_INTERNAL_PARTS */
6454   compile_time_assert(TRANSLOG_INTERNAL_PARTS != 0);
6455   parts_data[0].str= 0;
6456   parts_data[0].length= 0;
6457 
6458   /* count length of the record */
6459   if (rec_len == 0)
6460   {
6461     for(part= parts_data + TRANSLOG_INTERNAL_PARTS;\
6462         part < parts_data + part_no;
6463         part++)
6464     {
6465       rec_len+= (translog_size_t) part->length;
6466     }
6467   }
6468   parts.record_length= rec_len;
6469 
6470 #ifndef DBUG_OFF
6471   {
6472     uint i;
6473     size_t len= 0;
6474 #ifdef HAVE_valgrind
6475     ha_checksum checksum= 0;
6476 #endif
6477     for (i= TRANSLOG_INTERNAL_PARTS; i < part_no; i++)
6478     {
6479 #ifdef HAVE_valgrind
6480       /* Find unitialized bytes early */
6481       checksum+= my_checksum(checksum, parts_data[i].str,
6482                              parts_data[i].length);
6483 #endif
6484       len+= parts_data[i].length;
6485     }
6486     DBUG_ASSERT(len == rec_len);
6487   }
6488 #endif
6489   /*
6490     Start total_record_length from record_length then overhead will
6491     be add
6492   */
6493   parts.total_record_length= parts.record_length;
6494   DBUG_PRINT("info", ("record length: %lu", (ulong) parts.record_length));
6495 
6496   /* process this parts */
6497   if (!(rc= (log_record_type_descriptor[type].prewrite_hook &&
6498              (*log_record_type_descriptor[type].prewrite_hook)(type, trn,
6499                                                                tbl_info,
6500                                                                hook_arg))))
6501   {
6502     switch (log_record_type_descriptor[type].rclass) {
6503     case LOGRECTYPE_VARIABLE_LENGTH:
6504       rc= translog_write_variable_record(lsn, type, tbl_info,
6505                                          short_trid, &parts, trn, hook_arg);
6506       break;
6507     case LOGRECTYPE_PSEUDOFIXEDLENGTH:
6508     case LOGRECTYPE_FIXEDLENGTH:
6509       rc= translog_write_fixed_record(lsn, type, tbl_info,
6510                                       short_trid, &parts, trn, hook_arg);
6511       break;
6512     case LOGRECTYPE_NOT_ALLOWED:
6513     default:
6514       DBUG_ASSERT(0);
6515       rc= 1;
6516     }
6517   }
6518 
6519   DBUG_PRINT("info", ("LSN: " LSN_FMT, LSN_IN_PARTS(*lsn)));
6520   DBUG_RETURN(rc);
6521 }
6522 
6523 
6524 /*
6525   Decode compressed (relative) LSN(s)
6526 
6527   SYNOPSIS
6528    translog_relative_lsn_decode()
6529    base_lsn              LSN for encoding
6530    src                   Decode LSN(s) from here
6531    dst                   Put decoded LSNs here
6532    lsns                  number of LSN(s)
6533 
6534    RETURN
6535      position in sources after decoded LSN(s)
6536 */
6537 
translog_relative_LSN_decode(LSN base_lsn,uchar * src,uchar * dst,uint lsns)6538 static uchar *translog_relative_LSN_decode(LSN base_lsn,
6539                                           uchar *src, uchar *dst, uint lsns)
6540 {
6541   uint i;
6542   for (i= 0; i < lsns; i++, dst+= LSN_STORE_SIZE)
6543   {
6544     src= translog_get_LSN_from_diff(base_lsn, src, dst);
6545   }
6546   return src;
6547 }
6548 
6549 /**
6550    @brief Get header of fixed/pseudo length record and call hook for
6551    it processing
6552 
6553    @param page            Pointer to the buffer with page where LSN chunk is
6554                           placed
6555    @param page_offset     Offset of the first chunk in the page
6556    @param buff            Buffer to be filled with header data
6557 
6558    @return Length of header or operation status
6559      @retval #  number of bytes in TRANSLOG_HEADER_BUFFER::header where
6560                 stored decoded part of the header
6561 */
6562 
translog_fixed_length_header(uchar * page,translog_size_t page_offset,TRANSLOG_HEADER_BUFFER * buff)6563 static int translog_fixed_length_header(uchar *page,
6564                                         translog_size_t page_offset,
6565                                         TRANSLOG_HEADER_BUFFER *buff)
6566 {
6567   struct st_log_record_type_descriptor *desc=
6568     log_record_type_descriptor + buff->type;
6569   uchar *src= page + page_offset + 3;
6570   uchar *dst= buff->header;
6571   uchar *start= src;
6572   int lsns= desc->compressed_LSN;
6573   uint length= desc->fixed_length;
6574   DBUG_ENTER("translog_fixed_length_header");
6575 
6576   buff->record_length= length;
6577 
6578   if (desc->rclass == LOGRECTYPE_PSEUDOFIXEDLENGTH)
6579   {
6580     DBUG_ASSERT(lsns > 0);
6581     src= translog_relative_LSN_decode(buff->lsn, src, dst, lsns);
6582     lsns*= LSN_STORE_SIZE;
6583     dst+= lsns;
6584     length-= lsns;
6585     buff->compressed_LSN_economy= (lsns - (int) (src - start));
6586   }
6587   else
6588     buff->compressed_LSN_economy= 0;
6589 
6590   memcpy(dst, src, length);
6591   buff->non_header_data_start_offset= (uint16) (page_offset +
6592                                                 ((src + length) -
6593                                                  (page + page_offset)));
6594   buff->non_header_data_len= 0;
6595   DBUG_RETURN(buff->record_length);
6596 }
6597 
6598 
6599 /*
6600   Free resources used by TRANSLOG_HEADER_BUFFER
6601 
6602   SYNOPSIS
6603     translog_free_record_header();
6604 */
6605 
translog_free_record_header(TRANSLOG_HEADER_BUFFER * buff)6606 void translog_free_record_header(TRANSLOG_HEADER_BUFFER *buff)
6607 {
6608   DBUG_ENTER("translog_free_record_header");
6609   if (buff->groups_no != 0)
6610   {
6611     my_free(buff->groups);
6612     buff->groups_no= 0;
6613   }
6614   DBUG_VOID_RETURN;
6615 }
6616 
6617 
6618 /**
6619    @brief Returns the current horizon at the end of the current log
6620 
6621    @return Horizon
6622    @retval LSN_ERROR     error
6623    @retvar #             Horizon
6624 */
6625 
translog_get_horizon()6626 TRANSLOG_ADDRESS translog_get_horizon()
6627 {
6628   TRANSLOG_ADDRESS res;
6629   DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6630               translog_status == TRANSLOG_READONLY);
6631   translog_lock();
6632   res= log_descriptor.horizon;
6633   translog_unlock();
6634   return res;
6635 }
6636 
6637 
6638 /**
6639    @brief Returns the current horizon at the end of the current log, caller is
6640    assumed to already hold the lock
6641 
6642    @return Horizon
6643    @retval LSN_ERROR     error
6644    @retvar #             Horizon
6645 */
6646 
translog_get_horizon_no_lock()6647 TRANSLOG_ADDRESS translog_get_horizon_no_lock()
6648 {
6649   DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6650               translog_status == TRANSLOG_READONLY);
6651   translog_lock_assert_owner();
6652   return log_descriptor.horizon;
6653 }
6654 
6655 
6656 /*
6657   Set last page in the scanner data structure
6658 
6659   SYNOPSIS
6660     translog_scanner_set_last_page()
6661     scanner              Information about current chunk during scanning
6662 
6663   RETURN
6664     0  OK
6665     1  Error
6666 */
6667 
translog_scanner_set_last_page(TRANSLOG_SCANNER_DATA * scanner)6668 static my_bool translog_scanner_set_last_page(TRANSLOG_SCANNER_DATA *scanner)
6669 {
6670   my_bool page_ok;
6671   if (LSN_FILE_NO(scanner->page_addr) == LSN_FILE_NO(scanner->horizon))
6672   {
6673     /* It is last file => we can easy find last page address by horizon */
6674     uint pagegrest= LSN_OFFSET(scanner->horizon) % TRANSLOG_PAGE_SIZE;
6675     scanner->last_file_page= (scanner->horizon -
6676                               (pagegrest ? pagegrest : TRANSLOG_PAGE_SIZE));
6677     return (0);
6678   }
6679   scanner->last_file_page= scanner->page_addr;
6680   return (translog_get_last_page_addr(&scanner->last_file_page, &page_ok, 0));
6681 }
6682 
6683 
6684 /**
6685   @brief Get page from page cache according to requested method
6686 
6687   @param scanner         The scanner data
6688 
6689   @return operation status
6690   @retval 0 OK
6691   @retval 1 Error
6692 */
6693 
6694 static my_bool
translog_scanner_get_page(TRANSLOG_SCANNER_DATA * scanner)6695 translog_scanner_get_page(TRANSLOG_SCANNER_DATA *scanner)
6696 {
6697   TRANSLOG_VALIDATOR_DATA data;
6698   DBUG_ENTER("translog_scanner_get_page");
6699   data.addr= &scanner->page_addr;
6700   data.was_recovered= 0;
6701   DBUG_RETURN((scanner->page=
6702                translog_get_page(&data, scanner->buffer,
6703                                  (scanner->use_direct_link ?
6704                                   &scanner->direct_link :
6705                                   NULL))) ==
6706                NULL);
6707 }
6708 
6709 
6710 /**
6711   @brief Initialize reader scanner.
6712 
6713   @param lsn             LSN with which it have to be inited
6714   @param fixed_horizon   true if it is OK do not read records which was written
6715                          after scanning beginning
6716   @param scanner         scanner which have to be inited
6717   @param use_direct      prefer using direct lings from page handler
6718                          where it is possible.
6719 
6720   @note If direct link was used translog_destroy_scanner should be
6721         called after it using
6722 
6723   @return status of the operation
6724   @retval 0 OK
6725   @retval 1 Error
6726 */
6727 
translog_scanner_init(LSN lsn,my_bool fixed_horizon,TRANSLOG_SCANNER_DATA * scanner,my_bool use_direct)6728 my_bool translog_scanner_init(LSN lsn,
6729                               my_bool fixed_horizon,
6730                               TRANSLOG_SCANNER_DATA *scanner,
6731                               my_bool use_direct)
6732 {
6733   DBUG_ENTER("translog_scanner_init");
6734   DBUG_PRINT("enter", ("Scanner: %p  LSN: " LSN_FMT,
6735                        scanner, LSN_IN_PARTS(lsn)));
6736   DBUG_ASSERT(translog_status == TRANSLOG_OK ||
6737               translog_status == TRANSLOG_READONLY);
6738 
6739   scanner->page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
6740 
6741   scanner->fixed_horizon= fixed_horizon;
6742   scanner->use_direct_link= use_direct;
6743   scanner->direct_link= NULL;
6744 
6745   scanner->horizon= translog_get_horizon();
6746   DBUG_PRINT("info", ("horizon: " LSN_FMT, LSN_IN_PARTS(scanner->horizon)));
6747 
6748   /* lsn < horizon */
6749   DBUG_ASSERT(lsn <= scanner->horizon);
6750 
6751   scanner->page_addr= lsn;
6752   scanner->page_addr-= scanner->page_offset; /*decrease offset */
6753 
6754   if (translog_scanner_set_last_page(scanner))
6755     DBUG_RETURN(1);
6756 
6757   if (translog_scanner_get_page(scanner))
6758     DBUG_RETURN(1);
6759   DBUG_RETURN(0);
6760 }
6761 
6762 
6763 /**
6764   @brief Destroy scanner object;
6765 
6766   @param scanner         The scanner object to destroy
6767 */
6768 
translog_destroy_scanner(TRANSLOG_SCANNER_DATA * scanner)6769 void translog_destroy_scanner(TRANSLOG_SCANNER_DATA *scanner)
6770 {
6771   DBUG_ENTER("translog_destroy_scanner");
6772   DBUG_PRINT("enter", ("Scanner: %p", scanner));
6773   translog_free_link(scanner->direct_link);
6774   DBUG_VOID_RETURN;
6775 }
6776 
6777 
6778 /*
6779   Checks End of the Log
6780 
6781   SYNOPSIS
6782     translog_scanner_eol()
6783     scanner              Information about current chunk during scanning
6784 
6785   RETURN
6786     1  End of the Log
6787     0  OK
6788 */
6789 
translog_scanner_eol(TRANSLOG_SCANNER_DATA * scanner)6790 static my_bool translog_scanner_eol(TRANSLOG_SCANNER_DATA *scanner)
6791 {
6792   DBUG_ENTER("translog_scanner_eol");
6793   DBUG_PRINT("enter",
6794              ("Horizon: " LSN_FMT "  Current: (%u, 0x%x+0x%x=0x%x)",
6795               LSN_IN_PARTS(scanner->horizon),
6796               LSN_IN_PARTS(scanner->page_addr),
6797               (uint) scanner->page_offset,
6798               (uint) (LSN_OFFSET(scanner->page_addr) + scanner->page_offset)));
6799   if (scanner->horizon > (scanner->page_addr +
6800                           scanner->page_offset))
6801   {
6802     DBUG_PRINT("info", ("Horizon is not reached"));
6803     DBUG_RETURN(0);
6804   }
6805   if (scanner->fixed_horizon)
6806   {
6807     DBUG_PRINT("info", ("Horizon is fixed and reached"));
6808     DBUG_RETURN(1);
6809   }
6810   scanner->horizon= translog_get_horizon();
6811   DBUG_PRINT("info",
6812              ("Horizon is re-read, EOL: %d",
6813               scanner->horizon <= (scanner->page_addr +
6814                                    scanner->page_offset)));
6815   DBUG_RETURN(scanner->horizon <= (scanner->page_addr +
6816                                    scanner->page_offset));
6817 }
6818 
6819 
6820 /**
6821   @brief Cheks End of the Page
6822 
6823   @param scanner         Information about current chunk during scanning
6824 
6825   @retval 1  End of the Page
6826   @retval 0  OK
6827 */
6828 
translog_scanner_eop(TRANSLOG_SCANNER_DATA * scanner)6829 static my_bool translog_scanner_eop(TRANSLOG_SCANNER_DATA *scanner)
6830 {
6831   DBUG_ENTER("translog_scanner_eop");
6832   DBUG_RETURN(scanner->page_offset >= TRANSLOG_PAGE_SIZE ||
6833               scanner->page[scanner->page_offset] == TRANSLOG_FILLER);
6834 }
6835 
6836 
6837 /**
6838   @brief Checks End of the File (i.e. we are scanning last page, which do not
6839     mean end of this page)
6840 
6841   @param scanner         Information about current chunk during scanning
6842 
6843   @retval 1 End of the File
6844   @retval 0 OK
6845 */
6846 
translog_scanner_eof(TRANSLOG_SCANNER_DATA * scanner)6847 static my_bool translog_scanner_eof(TRANSLOG_SCANNER_DATA *scanner)
6848 {
6849   DBUG_ENTER("translog_scanner_eof");
6850   DBUG_ASSERT(LSN_FILE_NO(scanner->page_addr) ==
6851               LSN_FILE_NO(scanner->last_file_page));
6852   DBUG_PRINT("enter", ("curr Page: 0x%lx  last page: 0x%lx  "
6853                        "normal EOF: %d",
6854                        (ulong) LSN_OFFSET(scanner->page_addr),
6855                        (ulong) LSN_OFFSET(scanner->last_file_page),
6856                        LSN_OFFSET(scanner->page_addr) ==
6857                        LSN_OFFSET(scanner->last_file_page)));
6858   /*
6859      TODO: detect damaged file EOF,
6860      TODO: issue warning if damaged file EOF detected
6861   */
6862   DBUG_RETURN(scanner->page_addr ==
6863               scanner->last_file_page);
6864 }
6865 
6866 /*
6867   Move scanner to the next chunk
6868 
6869   SYNOPSIS
6870     translog_get_next_chunk()
6871     scanner              Information about current chunk during scanning
6872 
6873   RETURN
6874     0  OK
6875     1  Error
6876 */
6877 
6878 static my_bool
translog_get_next_chunk(TRANSLOG_SCANNER_DATA * scanner)6879 translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner)
6880 {
6881   uint16 len;
6882   DBUG_ENTER("translog_get_next_chunk");
6883 
6884   if (translog_scanner_eop(scanner))
6885     len= TRANSLOG_PAGE_SIZE - scanner->page_offset;
6886   else if ((len= translog_get_total_chunk_length(scanner->page,
6887                                                  scanner->page_offset)) == 0)
6888     DBUG_RETURN(1);
6889   scanner->page_offset+= len;
6890 
6891   if (translog_scanner_eol(scanner))
6892   {
6893     scanner->page= END_OF_LOG;
6894     scanner->page_offset= 0;
6895     DBUG_RETURN(0);
6896   }
6897   if (translog_scanner_eop(scanner))
6898   {
6899     /* before reading next page we should unpin current one if it was pinned */
6900     translog_free_link(scanner->direct_link);
6901     if (translog_scanner_eof(scanner))
6902     {
6903       DBUG_PRINT("info", ("horizon: " LSN_FMT "  pageaddr: " LSN_FMT,
6904                           LSN_IN_PARTS(scanner->horizon),
6905                           LSN_IN_PARTS(scanner->page_addr)));
6906       /* if it is log end it have to be caught before */
6907       DBUG_ASSERT(LSN_FILE_NO(scanner->horizon) >
6908                   LSN_FILE_NO(scanner->page_addr));
6909       scanner->page_addr+= LSN_ONE_FILE;
6910       scanner->page_addr= LSN_REPLACE_OFFSET(scanner->page_addr,
6911                                              TRANSLOG_PAGE_SIZE);
6912       if (translog_scanner_set_last_page(scanner))
6913         DBUG_RETURN(1);
6914     }
6915     else
6916     {
6917       scanner->page_addr+= TRANSLOG_PAGE_SIZE; /* offset increased */
6918     }
6919 
6920     if (translog_scanner_get_page(scanner))
6921       DBUG_RETURN(1);
6922 
6923     scanner->page_offset= translog_get_first_chunk_offset(scanner->page);
6924     if (translog_scanner_eol(scanner))
6925     {
6926       scanner->page= END_OF_LOG;
6927       scanner->page_offset= 0;
6928       DBUG_RETURN(0);
6929     }
6930     DBUG_ASSERT(scanner->page[scanner->page_offset] != TRANSLOG_FILLER);
6931   }
6932   DBUG_RETURN(0);
6933 }
6934 
6935 
6936 /**
6937    @brief Get header of variable length record and call hook for it processing
6938 
6939    @param page            Pointer to the buffer with page where LSN chunk is
6940                           placed
6941    @param page_offset     Offset of the first chunk in the page
6942    @param buff            Buffer to be filled with header data
6943    @param scanner         If present should be moved to the header page if
6944                           it differ from LSN page
6945 
6946    @return                Length of header or operation status
6947      @retval RECHEADER_READ_ERROR  error
6948      @retval RECHEADER_READ_EOF    End of the log reached during the read
6949      @retval #                     number of bytes in
6950                                    TRANSLOG_HEADER_BUFFER::header where
6951                                    stored decoded part of the header
6952 */
6953 
6954 static int
translog_variable_length_header(uchar * page,translog_size_t page_offset,TRANSLOG_HEADER_BUFFER * buff,TRANSLOG_SCANNER_DATA * scanner)6955 translog_variable_length_header(uchar *page, translog_size_t page_offset,
6956                                 TRANSLOG_HEADER_BUFFER *buff,
6957                                 TRANSLOG_SCANNER_DATA *scanner)
6958 {
6959   struct st_log_record_type_descriptor *desc= (log_record_type_descriptor +
6960                                                buff->type);
6961   uchar *src= page + page_offset + 1 + 2;
6962   uchar *dst= buff->header;
6963   LSN base_lsn;
6964   uint lsns= desc->compressed_LSN;
6965   uint16 chunk_len;
6966   uint16 length= desc->read_header_len;
6967   uint16 buffer_length= length;
6968   uint16 body_len;
6969   int rc;
6970   TRANSLOG_SCANNER_DATA internal_scanner;
6971   DBUG_ENTER("translog_variable_length_header");
6972 
6973   buff->record_length= translog_variable_record_1group_decode_len(&src);
6974   chunk_len= uint2korr(src);
6975   DBUG_PRINT("info", ("rec len: %lu  chunk len: %u  length: %u  bufflen: %u",
6976                       (ulong) buff->record_length, (uint) chunk_len,
6977                       (uint) length, (uint) buffer_length));
6978   if (chunk_len == 0)
6979   {
6980     uint16 page_rest;
6981     DBUG_PRINT("info", ("1 group"));
6982     src+= 2;
6983     page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
6984 
6985     base_lsn= buff->lsn;
6986     body_len= MY_MIN(page_rest, buff->record_length);
6987   }
6988   else
6989   {
6990     uint grp_no, curr;
6991     uint header_to_skip;
6992     uint16 page_rest;
6993 
6994     DBUG_PRINT("info", ("multi-group"));
6995     grp_no= buff->groups_no= uint2korr(src + 2);
6996     if (!(buff->groups=
6997           (TRANSLOG_GROUP*) my_malloc(PSI_INSTRUMENT_ME, sizeof(TRANSLOG_GROUP) * grp_no,
6998                                       MYF(0))))
6999       DBUG_RETURN(RECHEADER_READ_ERROR);
7000     DBUG_PRINT("info", ("Groups: %u", (uint) grp_no));
7001     src+= (2 + 2);
7002     page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
7003     curr= 0;
7004     header_to_skip= (uint) (src - (page + page_offset));
7005     buff->chunk0_pages= 0;
7006 
7007     for (;;)
7008     {
7009       uint i, read_length= grp_no;
7010 
7011       buff->chunk0_pages++;
7012       if (page_rest < grp_no * (7 + 1))
7013         read_length= page_rest / (7 + 1);
7014       DBUG_PRINT("info", ("Read chunk0 page#%u  read: %u  left: %u  "
7015                           "start from: %u",
7016                           buff->chunk0_pages, read_length, grp_no, curr));
7017       for (i= 0; i < read_length; i++, curr++)
7018       {
7019         DBUG_ASSERT(curr < buff->groups_no);
7020         buff->groups[curr].addr= lsn_korr(src + i * (7 + 1));
7021         buff->groups[curr].num= src[i * (7 + 1) + 7];
7022         DBUG_PRINT("info", ("group #%u " LSN_FMT "  chunks: %u",
7023                             curr,
7024                             LSN_IN_PARTS(buff->groups[curr].addr),
7025                             (uint) buff->groups[curr].num));
7026       }
7027       grp_no-= read_length;
7028       if (grp_no == 0)
7029       {
7030         if (scanner)
7031         {
7032           buff->chunk0_data_addr= scanner->page_addr;
7033           /* offset increased */
7034           buff->chunk0_data_addr+= (page_offset + header_to_skip +
7035                                     read_length * (7 + 1));
7036         }
7037         else
7038         {
7039           buff->chunk0_data_addr= buff->lsn;
7040           /* offset increased */
7041           buff->chunk0_data_addr+= (header_to_skip + read_length * (7 + 1));
7042         }
7043         buff->chunk0_data_len= chunk_len - 2 - read_length * (7 + 1);
7044         DBUG_PRINT("info", ("Data address: " LSN_FMT "  len: %u",
7045                             LSN_IN_PARTS(buff->chunk0_data_addr),
7046                             buff->chunk0_data_len));
7047         break;
7048       }
7049       if (scanner == NULL)
7050       {
7051         DBUG_PRINT("info", ("use internal scanner for header reading"));
7052         scanner= &internal_scanner;
7053         if (translog_scanner_init(buff->lsn, 1, scanner, 0))
7054         {
7055           rc= RECHEADER_READ_ERROR;
7056           goto exit_and_free;
7057         }
7058       }
7059       if (translog_get_next_chunk(scanner))
7060       {
7061         if (scanner == &internal_scanner)
7062           translog_destroy_scanner(scanner);
7063         rc= RECHEADER_READ_ERROR;
7064         goto exit_and_free;
7065       }
7066       if (scanner->page == END_OF_LOG)
7067       {
7068         if (scanner == &internal_scanner)
7069           translog_destroy_scanner(scanner);
7070         rc= RECHEADER_READ_EOF;
7071         goto exit_and_free;
7072       }
7073       page= scanner->page;
7074       page_offset= scanner->page_offset;
7075       src= page + page_offset + header_to_skip;
7076       chunk_len= uint2korr(src - 2 - 2);
7077       DBUG_PRINT("info", ("Chunk len: %u", (uint) chunk_len));
7078       page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
7079     }
7080 
7081     if (scanner == NULL)
7082     {
7083       DBUG_PRINT("info", ("use internal scanner"));
7084       scanner= &internal_scanner;
7085     }
7086     else
7087     {
7088       translog_destroy_scanner(scanner);
7089     }
7090     base_lsn= buff->groups[0].addr;
7091     translog_scanner_init(base_lsn, 1, scanner, scanner == &internal_scanner);
7092     /* first group chunk is always chunk type 2 */
7093     page= scanner->page;
7094     page_offset= scanner->page_offset;
7095     src= page + page_offset + 1;
7096     page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
7097     body_len= page_rest;
7098     if (scanner == &internal_scanner)
7099       translog_destroy_scanner(scanner);
7100   }
7101   if (lsns)
7102   {
7103     uchar *start= src;
7104     src= translog_relative_LSN_decode(base_lsn, src, dst, lsns);
7105     lsns*= LSN_STORE_SIZE;
7106     dst+= lsns;
7107     length-= lsns;
7108     buff->record_length+= (buff->compressed_LSN_economy=
7109                            (int) (lsns - (src - start)));
7110     DBUG_PRINT("info", ("lsns: %u  length: %u  economy: %d  new length: %lu",
7111                         lsns / LSN_STORE_SIZE, (uint) length,
7112                         (int) buff->compressed_LSN_economy,
7113                         (ulong) buff->record_length));
7114     body_len-= (uint16) (src - start);
7115   }
7116   else
7117     buff->compressed_LSN_economy= 0;
7118 
7119   DBUG_ASSERT(body_len >= length);
7120   body_len-= length;
7121   memcpy(dst, src, length);
7122   buff->non_header_data_start_offset= (uint16) (src + length - page);
7123   buff->non_header_data_len= body_len;
7124   DBUG_PRINT("info", ("non_header_data_start_offset: %u  len: %u  buffer: %u",
7125                       buff->non_header_data_start_offset,
7126                       buff->non_header_data_len, buffer_length));
7127   DBUG_RETURN(buffer_length);
7128 
7129 exit_and_free:
7130   my_free(buff->groups);
7131   buff->groups_no= 0; /* prevent try to use of buff->groups */
7132   DBUG_RETURN(rc);
7133 }
7134 
7135 
7136 /**
7137    @brief Read record header from the given buffer
7138 
7139    @param page            page content buffer
7140    @param page_offset     offset of the chunk in the page
7141    @param buff            destination buffer
7142    @param scanner         If this is set the scanner will be moved to the
7143                           record header page (differ from LSN page in case of
7144                           multi-group records)
7145 
7146    @return Length of header or operation status
7147      @retval RECHEADER_READ_ERROR  error
7148      @retval #                     number of bytes in
7149                                    TRANSLOG_HEADER_BUFFER::header where
7150                                    stored decoded part of the header
7151 */
7152 
translog_read_record_header_from_buffer(uchar * page,uint16 page_offset,TRANSLOG_HEADER_BUFFER * buff,TRANSLOG_SCANNER_DATA * scanner)7153 int translog_read_record_header_from_buffer(uchar *page,
7154                                             uint16 page_offset,
7155                                             TRANSLOG_HEADER_BUFFER *buff,
7156                                             TRANSLOG_SCANNER_DATA *scanner)
7157 {
7158   translog_size_t res;
7159   DBUG_ENTER("translog_read_record_header_from_buffer");
7160   DBUG_PRINT("info", ("page byte: 0x%x  offset: %u",
7161                       (uint) page[page_offset], (uint) page_offset));
7162   DBUG_ASSERT(translog_is_LSN_chunk(page[page_offset]));
7163   DBUG_ASSERT(translog_status == TRANSLOG_OK ||
7164               translog_status == TRANSLOG_READONLY);
7165   buff->type= (page[page_offset] & TRANSLOG_REC_TYPE);
7166   buff->short_trid= uint2korr(page + page_offset + 1);
7167   DBUG_PRINT("info", ("Type %u, Short TrID %u, LSN " LSN_FMT,
7168                       (uint) buff->type, (uint)buff->short_trid,
7169                       LSN_IN_PARTS(buff->lsn)));
7170   /* Read required bytes from the header and call hook */
7171   switch (log_record_type_descriptor[buff->type].rclass) {
7172   case LOGRECTYPE_VARIABLE_LENGTH:
7173     res= translog_variable_length_header(page, page_offset, buff,
7174                                          scanner);
7175     break;
7176   case LOGRECTYPE_PSEUDOFIXEDLENGTH:
7177   case LOGRECTYPE_FIXEDLENGTH:
7178     res= translog_fixed_length_header(page, page_offset, buff);
7179     break;
7180   default:
7181     DBUG_ASSERT(0); /* we read some junk (got no LSN) */
7182     res= RECHEADER_READ_ERROR;
7183   }
7184   DBUG_RETURN(res);
7185 }
7186 
7187 
7188 /**
7189    @brief Read record header and some fixed part of a record (the part depend
7190    on record type).
7191 
7192    @param lsn             log record serial number (address of the record)
7193    @param buff            log record header buffer
7194 
7195    @note Some type of record can be read completely by this call
7196    @note "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
7197    LSN can be translated to absolute one), some fields can be added (like
7198    actual header length in the record if the header has variable length)
7199 
7200    @return Length of header or operation status
7201      @retval RECHEADER_READ_ERROR  error
7202      @retval #                     number of bytes in
7203                                    TRANSLOG_HEADER_BUFFER::header where
7204                                    stored decoded part of the header
7205 */
7206 
translog_read_record_header(LSN lsn,TRANSLOG_HEADER_BUFFER * buff)7207 int translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff)
7208 {
7209   TRANSLOG_PAGE_SIZE_BUFF psize_buff;
7210   uchar *page;
7211   translog_size_t res, page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE;
7212   PAGECACHE_BLOCK_LINK *direct_link;
7213   TRANSLOG_ADDRESS addr;
7214   TRANSLOG_VALIDATOR_DATA data;
7215   DBUG_ENTER("translog_read_record_header");
7216   DBUG_PRINT("enter", ("LSN: " LSN_FMT, LSN_IN_PARTS(lsn)));
7217   DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0);
7218   DBUG_ASSERT(translog_status == TRANSLOG_OK ||
7219               translog_status == TRANSLOG_READONLY);
7220 
7221   buff->lsn= lsn;
7222   buff->groups_no= 0;
7223   data.addr= &addr;
7224   data.was_recovered= 0;
7225   addr= lsn;
7226   addr-= page_offset; /* offset decreasing */
7227   res= (!(page= translog_get_page(&data, psize_buff.buffer, &direct_link))) ?
7228     RECHEADER_READ_ERROR :
7229     translog_read_record_header_from_buffer(page, page_offset, buff, 0);
7230   translog_free_link(direct_link);
7231   DBUG_RETURN(res);
7232 }
7233 
7234 
7235 /**
7236    @brief Read record header and some fixed part of a record (the part depend
7237    on record type).
7238 
7239    @param scan            scanner position to read
7240    @param buff            log record header buffer
7241    @param move_scanner    request to move scanner to the header position
7242 
7243    @note Some type of record can be read completely by this call
7244    @note "Decoded" header stored in TRANSLOG_HEADER_BUFFER::header (relative
7245    LSN can be translated to absolute one), some fields can be added (like
7246    actual header length in the record if the header has variable length)
7247 
7248    @return Length of header or operation status
7249      @retval RECHEADER_READ_ERROR  error
7250      @retval #                     number of bytes in
7251                                    TRANSLOG_HEADER_BUFFER::header where stored
7252                                    decoded part of the header
7253 */
7254 
translog_read_record_header_scan(TRANSLOG_SCANNER_DATA * scanner,TRANSLOG_HEADER_BUFFER * buff,my_bool move_scanner)7255 int translog_read_record_header_scan(TRANSLOG_SCANNER_DATA *scanner,
7256                                      TRANSLOG_HEADER_BUFFER *buff,
7257                                      my_bool move_scanner)
7258 {
7259   translog_size_t res;
7260   DBUG_ENTER("translog_read_record_header_scan");
7261   DBUG_PRINT("enter", ("Scanner: Cur: " LSN_FMT "  Hrz: " LSN_FMT "  "
7262                        "Lst: " LSN_FMT "  Offset: %u(%x)  fixed %d",
7263                        LSN_IN_PARTS(scanner->page_addr),
7264                        LSN_IN_PARTS(scanner->horizon),
7265                        LSN_IN_PARTS(scanner->last_file_page),
7266                        (uint) scanner->page_offset,
7267                        (uint) scanner->page_offset, scanner->fixed_horizon));
7268   DBUG_ASSERT(translog_status == TRANSLOG_OK ||
7269               translog_status == TRANSLOG_READONLY);
7270   buff->groups_no= 0;
7271   buff->lsn= scanner->page_addr;
7272   buff->lsn+= scanner->page_offset; /* offset increasing */
7273   res= translog_read_record_header_from_buffer(scanner->page,
7274                                                scanner->page_offset,
7275                                                buff,
7276                                                (move_scanner ?
7277                                                 scanner : 0));
7278   DBUG_RETURN(res);
7279 }
7280 
7281 
7282 /**
7283    @brief Read record header and some fixed part of the next record (the part
7284    depend on record type).
7285 
7286    @param scanner         data for scanning if lsn is NULL scanner data
7287                           will be used for continue scanning.
7288                           The scanner can be NULL.
7289 
7290    @param buff            log record header buffer
7291 
7292    @return Length of header or operation status
7293      @retval RECHEADER_READ_ERROR  error
7294      @retval RECHEADER_READ_EOF    EOF
7295      @retval #                     number of bytes in
7296                                    TRANSLOG_HEADER_BUFFER::header where
7297                                    stored decoded part of the header
7298 */
7299 
translog_read_next_record_header(TRANSLOG_SCANNER_DATA * scanner,TRANSLOG_HEADER_BUFFER * buff)7300 int translog_read_next_record_header(TRANSLOG_SCANNER_DATA *scanner,
7301                                      TRANSLOG_HEADER_BUFFER *buff)
7302 {
7303   translog_size_t res;
7304 
7305   DBUG_ENTER("translog_read_next_record_header");
7306   buff->groups_no= 0;        /* to be sure that we will free it right */
7307   DBUG_PRINT("enter", ("scanner: %p", scanner));
7308   DBUG_PRINT("info", ("Scanner: Cur: " LSN_FMT "  Hrz: " LSN_FMT "  "
7309                       "Lst: " LSN_FMT "  Offset: %u(%x)  fixed: %d",
7310                       LSN_IN_PARTS(scanner->page_addr),
7311                       LSN_IN_PARTS(scanner->horizon),
7312                       LSN_IN_PARTS(scanner->last_file_page),
7313                       (uint) scanner->page_offset,
7314                       (uint) scanner->page_offset, scanner->fixed_horizon));
7315   DBUG_ASSERT(translog_status == TRANSLOG_OK ||
7316               translog_status == TRANSLOG_READONLY);
7317 
7318   do
7319   {
7320     if (translog_get_next_chunk(scanner))
7321       DBUG_RETURN(RECHEADER_READ_ERROR);
7322     if (scanner->page == END_OF_LOG)
7323     {
7324        DBUG_PRINT("info", ("End of file from the scanner"));
7325        /* Last record was read */
7326        buff->lsn= LSN_IMPOSSIBLE;
7327        DBUG_RETURN(RECHEADER_READ_EOF);
7328     }
7329     DBUG_PRINT("info", ("Page: " LSN_FMT "  offset: %lu  byte: %x",
7330                         LSN_IN_PARTS(scanner->page_addr),
7331                         (ulong) scanner->page_offset,
7332                         (uint) scanner->page[scanner->page_offset]));
7333   } while (!translog_is_LSN_chunk(scanner->page[scanner->page_offset]) &&
7334            scanner->page[scanner->page_offset] != TRANSLOG_FILLER);
7335 
7336   if (scanner->page[scanner->page_offset] == TRANSLOG_FILLER)
7337   {
7338     DBUG_PRINT("info", ("End of file"));
7339     /* Last record was read */
7340     buff->lsn= LSN_IMPOSSIBLE;
7341     /* Return 'end of log' marker */
7342     res= RECHEADER_READ_EOF;
7343   }
7344   else
7345     res= translog_read_record_header_scan(scanner, buff, 0);
7346   DBUG_RETURN(res);
7347 }
7348 
7349 
7350 /*
7351   Moves record data reader to the next chunk and fill the data reader
7352   information about that chunk.
7353 
7354   SYNOPSIS
7355     translog_record_read_next_chunk()
7356     data                 data cursor
7357 
7358   RETURN
7359     0  OK
7360     1  Error
7361 */
7362 
translog_record_read_next_chunk(TRANSLOG_READER_DATA * data)7363 static my_bool translog_record_read_next_chunk(TRANSLOG_READER_DATA *data)
7364 {
7365   translog_size_t new_current_offset= data->current_offset + data->chunk_size;
7366   uint16 chunk_header_len, chunk_len;
7367   uint8 type;
7368   DBUG_ENTER("translog_record_read_next_chunk");
7369 
7370   if (data->eor)
7371   {
7372     DBUG_PRINT("info", ("end of the record flag set"));
7373     DBUG_RETURN(1);
7374   }
7375 
7376   if (data->header.groups_no &&
7377       data->header.groups_no - 1 != data->current_group &&
7378       data->header.groups[data->current_group].num == data->current_chunk)
7379   {
7380     /* Goto next group */
7381     data->current_group++;
7382     data->current_chunk= 0;
7383     DBUG_PRINT("info", ("skip to group: #%u", data->current_group));
7384     translog_destroy_scanner(&data->scanner);
7385     translog_scanner_init(data->header.groups[data->current_group].addr,
7386                           1, &data->scanner, 1);
7387   }
7388   else
7389   {
7390     data->current_chunk++;
7391     if (translog_get_next_chunk(&data->scanner))
7392       DBUG_RETURN(1);
7393      if (data->scanner.page == END_OF_LOG)
7394      {
7395        /*
7396          Actually it should not happened, but we want to quit nicely in case
7397          of a truncated log
7398        */
7399        DBUG_RETURN(1);
7400      }
7401   }
7402   type= data->scanner.page[data->scanner.page_offset] & TRANSLOG_CHUNK_TYPE;
7403 
7404   if (type == TRANSLOG_CHUNK_LSN && data->header.groups_no)
7405   {
7406     DBUG_PRINT("info",
7407                ("Last chunk: data len: %u  offset: %u  group: %u of %u",
7408                 data->header.chunk0_data_len, data->scanner.page_offset,
7409                 data->current_group, data->header.groups_no - 1));
7410     DBUG_ASSERT(data->header.groups_no - 1 == data->current_group);
7411     DBUG_ASSERT(data->header.lsn ==
7412                 data->scanner.page_addr + data->scanner.page_offset);
7413     translog_destroy_scanner(&data->scanner);
7414     translog_scanner_init(data->header.chunk0_data_addr, 1, &data->scanner, 1);
7415     data->chunk_size= data->header.chunk0_data_len;
7416     data->body_offset= data->scanner.page_offset;
7417     data->current_offset= new_current_offset;
7418     data->eor= 1;
7419     DBUG_RETURN(0);
7420   }
7421 
7422   if (type == TRANSLOG_CHUNK_LSN || type == TRANSLOG_CHUNK_FIXED)
7423   {
7424     data->eor= 1;
7425     DBUG_RETURN(1);                             /* End of record */
7426   }
7427 
7428   chunk_header_len=
7429     translog_get_chunk_header_length(data->scanner.page +
7430                                      data->scanner.page_offset);
7431   chunk_len= translog_get_total_chunk_length(data->scanner.page,
7432                                              data->scanner.page_offset);
7433   data->chunk_size= chunk_len - chunk_header_len;
7434   data->body_offset= data->scanner.page_offset + chunk_header_len;
7435   data->current_offset= new_current_offset;
7436   DBUG_PRINT("info", ("grp: %u  chunk: %u  body_offset: %u  chunk_size: %u  "
7437                       "current_offset: %lu",
7438                       (uint) data->current_group,
7439                       (uint) data->current_chunk,
7440                       (uint) data->body_offset,
7441                       (uint) data->chunk_size, (ulong) data->current_offset));
7442   DBUG_RETURN(0);
7443 }
7444 
7445 
7446 /*
7447   Initialize record reader data from LSN
7448 
7449   SYNOPSIS
7450     translog_init_reader_data()
7451     lsn                  reference to LSN we should start from
7452     data                 reader data to initialize
7453 
7454   RETURN
7455     0  OK
7456     1  Error
7457 */
7458 
translog_init_reader_data(LSN lsn,TRANSLOG_READER_DATA * data)7459 static my_bool translog_init_reader_data(LSN lsn,
7460                                          TRANSLOG_READER_DATA *data)
7461 {
7462   int read_header;
7463   DBUG_ENTER("translog_init_reader_data");
7464   if (translog_scanner_init(lsn, 1, &data->scanner, 1) ||
7465       ((read_header=
7466         translog_read_record_header_scan(&data->scanner, &data->header, 1))
7467        == RECHEADER_READ_ERROR))
7468     DBUG_RETURN(1);
7469   data->read_header= read_header;
7470   data->body_offset= data->header.non_header_data_start_offset;
7471   data->chunk_size= data->header.non_header_data_len;
7472   data->current_offset= data->read_header;
7473   data->current_group= 0;
7474   data->current_chunk= 0;
7475   data->eor= 0;
7476   DBUG_PRINT("info", ("read_header: %u  "
7477                       "body_offset: %u  chunk_size: %u  current_offset: %lu",
7478                       (uint) data->read_header,
7479                       (uint) data->body_offset,
7480                       (uint) data->chunk_size, (ulong) data->current_offset));
7481   DBUG_RETURN(0);
7482 }
7483 
7484 
7485 /**
7486   @brief Destroy reader data object
7487 */
7488 
translog_destroy_reader_data(TRANSLOG_READER_DATA * data)7489 static void translog_destroy_reader_data(TRANSLOG_READER_DATA *data)
7490 {
7491   translog_destroy_scanner(&data->scanner);
7492   translog_free_record_header(&data->header);
7493 }
7494 
7495 
7496 /*
7497   Read a part of the record.
7498 
7499   SYNOPSIS
7500     translog_read_record_header()
7501     lsn                  log record serial number (address of the record)
7502     offset               From the beginning of the record beginning (read
7503                          by translog_read_record_header).
7504     length               Length of record part which have to be read.
7505     buffer               Buffer where to read the record part (have to be at
7506                          least 'length' bytes length)
7507 
7508   RETURN
7509     length of data actually read
7510 */
7511 
translog_read_record(LSN lsn,translog_size_t offset,translog_size_t length,uchar * buffer,TRANSLOG_READER_DATA * data)7512 translog_size_t translog_read_record(LSN lsn,
7513                                      translog_size_t offset,
7514                                      translog_size_t length,
7515                                      uchar *buffer,
7516                                      TRANSLOG_READER_DATA *data)
7517 {
7518   translog_size_t requested_length= length;
7519   translog_size_t end= offset + length;
7520   TRANSLOG_READER_DATA internal_data;
7521   DBUG_ENTER("translog_read_record");
7522   DBUG_ASSERT(translog_status == TRANSLOG_OK ||
7523               translog_status == TRANSLOG_READONLY);
7524 
7525   if (data == NULL)
7526   {
7527     DBUG_ASSERT(lsn != LSN_IMPOSSIBLE);
7528     data= &internal_data;
7529   }
7530   if (lsn ||
7531       (offset < data->current_offset &&
7532        !(offset < data->read_header && offset + length < data->read_header)))
7533   {
7534     if (translog_init_reader_data(lsn, data))
7535       DBUG_RETURN(0);
7536   }
7537   DBUG_PRINT("info", ("Offset: %lu  length: %lu  "
7538                       "Scanner: Cur: " LSN_FMT "  Hrz: " LSN_FMT "  "
7539                       "Lst: " LSN_FMT "  Offset: %u(%x)  fixed: %d",
7540                       (ulong) offset, (ulong) length,
7541                       LSN_IN_PARTS(data->scanner.page_addr),
7542                       LSN_IN_PARTS(data->scanner.horizon),
7543                       LSN_IN_PARTS(data->scanner.last_file_page),
7544                       (uint) data->scanner.page_offset,
7545                       (uint) data->scanner.page_offset,
7546                       data->scanner.fixed_horizon));
7547   if (offset < data->read_header)
7548   {
7549     uint16 len= MY_MIN(data->read_header, end) - offset;
7550     DBUG_PRINT("info",
7551                ("enter header offset: %lu  length: %lu",
7552                 (ulong) offset, (ulong) length));
7553     memcpy(buffer, data->header.header + offset, len);
7554     length-= len;
7555     if (length == 0)
7556     {
7557       translog_destroy_reader_data(data);
7558       DBUG_RETURN(requested_length);
7559     }
7560     offset+= len;
7561     buffer+= len;
7562     DBUG_PRINT("info",
7563                ("len: %u  offset: %lu   curr: %lu  length: %lu",
7564                 len, (ulong) offset, (ulong) data->current_offset,
7565                 (ulong) length));
7566   }
7567   /* TODO: find first page which we should read by offset */
7568 
7569   /* read the record chunk by chunk */
7570   for(;;)
7571   {
7572     uint page_end= data->current_offset + data->chunk_size;
7573     DBUG_PRINT("info",
7574                ("enter body offset: %lu  curr: %lu  "
7575                 "length: %lu  page_end: %lu",
7576                 (ulong) offset, (ulong) data->current_offset, (ulong) length,
7577                 (ulong) page_end));
7578     if (offset < page_end)
7579     {
7580       uint len= page_end - offset;
7581       set_if_smaller(len, length); /* in case we read beyond record's end */
7582       DBUG_ASSERT(offset >= data->current_offset);
7583       memcpy(buffer,
7584               data->scanner.page + data->body_offset +
7585               (offset - data->current_offset), len);
7586       length-= len;
7587       if (length == 0)
7588       {
7589         translog_destroy_reader_data(data);
7590         DBUG_RETURN(requested_length);
7591       }
7592       offset+= len;
7593       buffer+= len;
7594       DBUG_PRINT("info",
7595                  ("len: %u  offset: %lu  curr: %lu  length: %lu",
7596                   len, (ulong) offset, (ulong) data->current_offset,
7597                   (ulong) length));
7598     }
7599     if (translog_record_read_next_chunk(data))
7600     {
7601       translog_destroy_reader_data(data);
7602       DBUG_RETURN(requested_length - length);
7603     }
7604   }
7605 }
7606 
7607 
7608 /*
7609   @brief Force skipping to the next buffer
7610 
7611   @todo Do not copy old page content if all page protections are switched off
7612   (because we do not need calculate something or change old parts of the page)
7613 */
7614 
translog_force_current_buffer_to_finish()7615 static void translog_force_current_buffer_to_finish()
7616 {
7617   TRANSLOG_ADDRESS new_buff_beginning;
7618   uint16 old_buffer_no= log_descriptor.bc.buffer_no;
7619   uint16 new_buffer_no= (old_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
7620   struct st_translog_buffer *new_buffer= (log_descriptor.buffers +
7621                                           new_buffer_no);
7622   struct st_translog_buffer *old_buffer= log_descriptor.bc.buffer;
7623   uchar *data= log_descriptor.bc.ptr - log_descriptor.bc.current_page_fill;
7624   uint16 left= TRANSLOG_PAGE_SIZE - log_descriptor.bc.current_page_fill;
7625   uint16 UNINIT_VAR(current_page_fill), write_counter, previous_offset;
7626   DBUG_ENTER("translog_force_current_buffer_to_finish");
7627 
7628   DBUG_PRINT("enter", ("Buffer #%u %p  "
7629                        "Buffer addr: " LSN_FMT "  "
7630                        "Page addr: " LSN_FMT "  "
7631                        "size: %lu (%lu)  Pg: %u  left: %u  in progress %u",
7632                        (uint) old_buffer_no,
7633                        old_buffer,
7634                        LSN_IN_PARTS(old_buffer->offset),
7635                        LSN_FILE_NO(log_descriptor.horizon),
7636                        (uint)(LSN_OFFSET(log_descriptor.horizon) -
7637                                 log_descriptor.bc.current_page_fill),
7638                        (ulong) old_buffer->size,
7639                        (ulong) (log_descriptor.bc.ptr -log_descriptor.bc.
7640                                 buffer->buffer),
7641                        (uint) log_descriptor.bc.current_page_fill,
7642                        (uint) left,
7643                        (uint) old_buffer->
7644                        copy_to_buffer_in_progress));
7645   translog_lock_assert_owner();
7646   new_buff_beginning= old_buffer->offset;
7647   new_buff_beginning+= old_buffer->size; /* increase offset */
7648 
7649   DBUG_ASSERT(log_descriptor.bc.ptr !=NULL);
7650   DBUG_ASSERT(LSN_FILE_NO(log_descriptor.horizon) ==
7651               LSN_FILE_NO(old_buffer->offset) ||
7652               translog_status == TRANSLOG_READONLY );
7653   translog_check_cursor(&log_descriptor.bc);
7654   DBUG_ASSERT(left < TRANSLOG_PAGE_SIZE);
7655   if (left)
7656   {
7657     /*
7658        TODO: if 'left' is so small that can't hold any other record
7659        then do not move the page
7660     */
7661     DBUG_PRINT("info", ("left: %u", (uint) left));
7662 
7663     old_buffer->pre_force_close_horizon=
7664       old_buffer->offset + old_buffer->size;
7665     /* decrease offset */
7666     new_buff_beginning-= log_descriptor.bc.current_page_fill;
7667     current_page_fill= log_descriptor.bc.current_page_fill;
7668 
7669     memset(log_descriptor.bc.ptr, TRANSLOG_FILLER, left);
7670     old_buffer->size+= left;
7671     DBUG_PRINT("info", ("Finish Page buffer #%u: %p  "
7672                         "Size: %lu",
7673                         (uint) old_buffer->buffer_no,
7674                         old_buffer,
7675                         (ulong) old_buffer->size));
7676     DBUG_ASSERT(old_buffer->buffer_no ==
7677                 log_descriptor.bc.buffer_no);
7678   }
7679   else
7680   {
7681     log_descriptor.bc.current_page_fill= 0;
7682   }
7683 
7684   translog_buffer_lock(new_buffer);
7685 #ifndef DBUG_OFF
7686   {
7687     TRANSLOG_ADDRESS offset= new_buffer->offset;
7688     TRANSLOG_FILE *file= new_buffer->file;
7689     uint8 ver= new_buffer->ver;
7690     translog_lock_assert_owner();
7691 #endif
7692     translog_wait_for_buffer_free(new_buffer);
7693 #ifndef DBUG_OFF
7694     /* We keep the handler locked so nobody can start this new buffer */
7695     DBUG_ASSERT(offset == new_buffer->offset && new_buffer->file == NULL &&
7696                 (file == NULL ? ver : (uint8)(ver + 1)) == new_buffer->ver);
7697   }
7698 #endif
7699 
7700   write_counter= log_descriptor.bc.write_counter;
7701   previous_offset= log_descriptor.bc.previous_offset;
7702   translog_start_buffer(new_buffer, &log_descriptor.bc, new_buffer_no);
7703   /* Fix buffer offset (which was incorrectly set to horizon) */
7704   log_descriptor.bc.buffer->offset= new_buff_beginning;
7705   log_descriptor.bc.write_counter= write_counter;
7706   log_descriptor.bc.previous_offset= previous_offset;
7707   new_buffer->prev_last_lsn= BUFFER_MAX_LSN(old_buffer);
7708   DBUG_PRINT("info", ("prev_last_lsn set to " LSN_FMT "  buffer: %p",
7709                       LSN_IN_PARTS(new_buffer->prev_last_lsn),
7710                       new_buffer));
7711 
7712   /*
7713     Advances this log pointer, increases writers and let other threads to
7714     write to the log while we process old page content
7715   */
7716   if (left)
7717   {
7718     log_descriptor.bc.ptr+= current_page_fill;
7719     log_descriptor.bc.buffer->size= log_descriptor.bc.current_page_fill=
7720       current_page_fill;
7721     new_buffer->overlay= 1;
7722   }
7723   else
7724     translog_new_page_header(&log_descriptor.horizon, &log_descriptor.bc);
7725   translog_buffer_increase_writers(new_buffer);
7726   translog_buffer_unlock(new_buffer);
7727 
7728   /*
7729     We have to wait until all writers finish before start changing the
7730     pages by applying protection and copying the page content in the
7731     new buffer.
7732   */
7733 #ifndef DBUG_OFF
7734   {
7735     TRANSLOG_ADDRESS offset= old_buffer->offset;
7736     TRANSLOG_FILE *file= old_buffer->file;
7737     uint8 ver= old_buffer->ver;
7738 #endif
7739     /*
7740       Now only one thread can flush log (buffer can flush many threads but
7741       log flush log flush where this function is used can do only one thread)
7742       so no other thread can set is_closing_buffer.
7743     */
7744     DBUG_ASSERT(!old_buffer->is_closing_buffer);
7745     old_buffer->is_closing_buffer= 1; /* Other flushes will wait */
7746     DBUG_PRINT("enter", ("Buffer #%u %p is_closing_buffer set",
7747                          (uint) old_buffer->buffer_no, old_buffer));
7748     translog_wait_for_writers(old_buffer);
7749 #ifndef DBUG_OFF
7750     /* We blocked flushing this buffer so the buffer should not changed */
7751     DBUG_ASSERT(offset == old_buffer->offset && file == old_buffer->file &&
7752                 ver == old_buffer->ver);
7753   }
7754 #endif
7755 
7756   if (log_descriptor.flags & TRANSLOG_SECTOR_PROTECTION)
7757   {
7758     translog_put_sector_protection(data, &log_descriptor.bc);
7759     if (left)
7760     {
7761       log_descriptor.bc.write_counter++;
7762       log_descriptor.bc.previous_offset= current_page_fill;
7763     }
7764     else
7765     {
7766       DBUG_PRINT("info", ("drop write_counter"));
7767       log_descriptor.bc.write_counter= 0;
7768       log_descriptor.bc.previous_offset= 0;
7769     }
7770   }
7771 
7772   if (log_descriptor.flags & TRANSLOG_PAGE_CRC)
7773   {
7774     uint32 crc= translog_crc(data + log_descriptor.page_overhead,
7775                              TRANSLOG_PAGE_SIZE -
7776                              log_descriptor.page_overhead);
7777     DBUG_PRINT("info", ("CRC: 0x%x", crc));
7778     int4store(data + 3 + 3 + 1, crc);
7779   }
7780   old_buffer->is_closing_buffer= 0;
7781   DBUG_PRINT("enter", ("Buffer #%u %p  is_closing_buffer cleared",
7782                        (uint) old_buffer->buffer_no, old_buffer));
7783   mysql_cond_broadcast(&old_buffer->waiting_filling_buffer);
7784 
7785   if (left)
7786   {
7787     if (log_descriptor.flags &
7788         (TRANSLOG_PAGE_CRC | TRANSLOG_SECTOR_PROTECTION))
7789       memcpy(new_buffer->buffer, data, current_page_fill);
7790     else
7791     {
7792       /*
7793         This page header does not change if we add more data to the page so
7794         we can not copy it and will not overwrite later
7795       */
7796       new_buffer->skipped_data= current_page_fill;
7797       TRASH_ALLOC(new_buffer->buffer, current_page_fill);
7798       DBUG_ASSERT(new_buffer->skipped_data < TRANSLOG_PAGE_SIZE);
7799     }
7800   }
7801   old_buffer->next_buffer_offset= new_buffer->offset;
7802   translog_buffer_lock(new_buffer);
7803   new_buffer->prev_buffer_offset= old_buffer->offset;
7804   translog_buffer_decrease_writers(new_buffer);
7805   translog_buffer_unlock(new_buffer);
7806 
7807   DBUG_VOID_RETURN;
7808 }
7809 
7810 
7811 /**
7812   @brief Waits while given lsn will be flushed
7813 
7814   @param  lsn            log record serial number up to which (inclusive)
7815                          the log has to be flushed
7816 */
7817 
translog_flush_wait_for_end(LSN lsn)7818 void  translog_flush_wait_for_end(LSN lsn)
7819 {
7820   DBUG_ENTER("translog_flush_wait_for_end");
7821   DBUG_PRINT("enter", ("LSN: " LSN_FMT, LSN_IN_PARTS(lsn)));
7822   mysql_mutex_assert_owner(&log_descriptor.log_flush_lock);
7823   while (cmp_translog_addr(log_descriptor.flushed, lsn) < 0)
7824     mysql_cond_wait(&log_descriptor.log_flush_cond,
7825                       &log_descriptor.log_flush_lock);
7826   DBUG_VOID_RETURN;
7827 }
7828 
7829 
7830 /**
7831   @brief Sets goal for the next flush pass and waits for this pass end.
7832 
7833   @param  lsn            log record serial number up to which (inclusive)
7834                          the log has to be flushed
7835 */
7836 
translog_flush_set_new_goal_and_wait(TRANSLOG_ADDRESS lsn)7837 void translog_flush_set_new_goal_and_wait(TRANSLOG_ADDRESS lsn)
7838 {
7839   int flush_no= log_descriptor.flush_no;
7840   DBUG_ENTER("translog_flush_set_new_goal_and_wait");
7841   DBUG_PRINT("enter", ("LSN: " LSN_FMT, LSN_IN_PARTS(lsn)));
7842   mysql_mutex_assert_owner(&log_descriptor.log_flush_lock);
7843   if (cmp_translog_addr(lsn, log_descriptor.next_pass_max_lsn) > 0)
7844   {
7845     log_descriptor.next_pass_max_lsn= lsn;
7846     log_descriptor.max_lsn_requester= pthread_self();
7847     mysql_cond_broadcast(&log_descriptor.new_goal_cond);
7848   }
7849   while (flush_no == log_descriptor.flush_no)
7850   {
7851     mysql_cond_wait(&log_descriptor.log_flush_cond,
7852                       &log_descriptor.log_flush_lock);
7853   }
7854   DBUG_VOID_RETURN;
7855 }
7856 
7857 
7858 /**
7859   @brief sync() range of files (inclusive) and directory (by request)
7860 
7861   @param min             min internal file number to flush
7862   @param max             max internal file number to flush
7863   @param sync_dir        need sync directory
7864 
7865   return Operation status
7866     @retval 0      OK
7867     @retval 1      Error
7868 */
7869 
translog_sync_files(uint32 min,uint32 max,my_bool sync_dir)7870 static my_bool translog_sync_files(uint32 min, uint32 max,
7871                                    my_bool sync_dir)
7872 {
7873   uint fn;
7874   my_bool rc= 0;
7875   ulonglong flush_interval;
7876   DBUG_ENTER("translog_sync_files");
7877   DBUG_PRINT("info", ("min: %lu  max: %lu  sync dir: %d",
7878                       (ulong) min, (ulong) max, (int) sync_dir));
7879   DBUG_ASSERT(min <= max);
7880 
7881   flush_interval= group_commit_wait;
7882   if (flush_interval)
7883     flush_start= microsecond_interval_timer();
7884   for (fn= min; fn <= max; fn++)
7885   {
7886     TRANSLOG_FILE *file= get_logfile_by_number(fn);
7887     DBUG_ASSERT(file != NULL);
7888     if (!file->is_sync)
7889     {
7890       if (mysql_file_sync(file->handler.file, MYF(MY_WME)))
7891       {
7892         rc= 1;
7893         translog_stop_writing();
7894         DBUG_RETURN(rc);
7895       }
7896       translog_syncs++;
7897       file->is_sync= 1;
7898     }
7899   }
7900 
7901   if (sync_dir)
7902   {
7903     if (!(rc= sync_dir(log_descriptor.directory_fd,
7904                        MYF(MY_WME | MY_IGNORE_BADFD))))
7905       translog_syncs++;
7906   }
7907 
7908   DBUG_RETURN(rc);
7909 }
7910 
7911 
7912 /**
7913    check_skipped_lsn
7914 
7915    Check if lsn skipped in redo is ok
7916 */
7917 
check_skipped_lsn(MARIA_HA * info,LSN lsn,my_bool index_file,pgcache_page_no_t page)7918 void check_skipped_lsn(MARIA_HA *info, LSN lsn, my_bool index_file,
7919                        pgcache_page_no_t page)
7920 {
7921   if (lsn <= log_descriptor.horizon)
7922   {
7923     DBUG_PRINT("info", ("Page is up to date, skipping redo"));
7924   }
7925   else
7926   {
7927     /* Give error, but don't flood the log */
7928     if (skipped_lsn_err_count++ < MAX_LSN_ERRORS &&
7929         ! info->s->redo_error_given++)
7930     {
7931       eprint(tracef, "Table %s has wrong LSN: " LSN_FMT " on page: %llu",
7932              (index_file ? info->s->data_file_name.str :
7933               info->s->index_file_name.str),
7934              LSN_IN_PARTS(lsn), (ulonglong) page);
7935       recovery_found_crashed_tables++;
7936     }
7937   }
7938 }
7939 
7940 
7941 /*
7942   @brief Flushes buffers with LSNs in them less or equal address <lsn>
7943 
7944   @param lsn             address up to which all LSNs should be flushed,
7945                          can be reset to real last LSN address
7946   @parem sent_to_disk    returns 'sent to disk' position
7947   @param flush_horizon   returns horizon of the flush
7948 
7949   @note About terminology see comment to translog_flush().
7950 */
7951 
translog_flush_buffers(TRANSLOG_ADDRESS * lsn,TRANSLOG_ADDRESS * sent_to_disk,TRANSLOG_ADDRESS * flush_horizon)7952 void translog_flush_buffers(TRANSLOG_ADDRESS *lsn,
7953                                TRANSLOG_ADDRESS *sent_to_disk,
7954                                TRANSLOG_ADDRESS *flush_horizon)
7955 {
7956   dirty_buffer_mask_t dirty_buffer_mask;
7957   uint i;
7958   uint8 UNINIT_VAR(last_buffer_no), start_buffer_no;
7959   DBUG_ENTER("translog_flush_buffers");
7960 
7961   /*
7962     We will recheck information when will lock buffers one by
7963     one so we can use unprotected read here (this is just for
7964     speed up buffers processing)
7965   */
7966   dirty_buffer_mask= log_descriptor.dirty_buffer_mask;
7967   DBUG_PRINT("info", ("Dirty buffer mask: %lx  current buffer: %u",
7968                       (ulong) dirty_buffer_mask,
7969                       (uint) log_descriptor.bc.buffer_no));
7970   for (i= (log_descriptor.bc.buffer_no + 1) % TRANSLOG_BUFFERS_NO;
7971        i != log_descriptor.bc.buffer_no && !(dirty_buffer_mask & (1 << i));
7972        i= (i + 1) % TRANSLOG_BUFFERS_NO) {}
7973   start_buffer_no= i;
7974 
7975   DBUG_PRINT("info",
7976              ("start from: %u  current: %u  prev last lsn: " LSN_FMT,
7977               (uint) start_buffer_no, (uint) log_descriptor.bc.buffer_no,
7978               LSN_IN_PARTS(log_descriptor.bc.buffer->prev_last_lsn)));
7979 
7980   /*
7981     if LSN up to which we have to flush bigger then maximum LSN of previous
7982     buffer and at least one LSN was saved in the current buffer (last_lsn !=
7983     LSN_IMPOSSIBLE) then we have to close the current buffer.
7984   */
7985   if (cmp_translog_addr(*lsn, log_descriptor.bc.buffer->prev_last_lsn) > 0 &&
7986       log_descriptor.bc.buffer->last_lsn != LSN_IMPOSSIBLE)
7987   {
7988     struct st_translog_buffer *buffer= log_descriptor.bc.buffer;
7989     *lsn= log_descriptor.bc.buffer->last_lsn; /* fix lsn if it was horizon */
7990     DBUG_PRINT("info", ("LSN to flush fixed to last lsn: " LSN_FMT,
7991                         LSN_IN_PARTS(*lsn)));
7992     last_buffer_no= log_descriptor.bc.buffer_no;
7993     log_descriptor.is_everything_flushed= 1;
7994     translog_force_current_buffer_to_finish();
7995     translog_buffer_unlock(buffer);
7996   }
7997   else
7998   {
7999     if (log_descriptor.bc.buffer->last_lsn == LSN_IMPOSSIBLE)
8000     {
8001       /*
8002         In this case both last_lsn & prev_last_lsn are LSN_IMPOSSIBLE
8003         otherwise it will go in the first IF because LSN_IMPOSSIBLE less
8004         then any real LSN and cmp_translog_addr(*lsn,
8005         log_descriptor.bc.buffer->prev_last_lsn) will be TRUE
8006       */
8007       DBUG_ASSERT(log_descriptor.bc.buffer->prev_last_lsn ==
8008                   LSN_IMPOSSIBLE);
8009       DBUG_PRINT("info", ("There is no LSNs yet generated => do nothing"));
8010       translog_unlock();
8011       DBUG_VOID_RETURN;
8012     }
8013 
8014     DBUG_ASSERT(log_descriptor.bc.buffer->prev_last_lsn != LSN_IMPOSSIBLE);
8015     /* fix lsn if it was horizon */
8016     *lsn= log_descriptor.bc.buffer->prev_last_lsn;
8017     DBUG_PRINT("info", ("LSN to flush fixed to prev last lsn: " LSN_FMT,
8018                LSN_IN_PARTS(*lsn)));
8019     last_buffer_no= ((log_descriptor.bc.buffer_no + TRANSLOG_BUFFERS_NO -1) %
8020                      TRANSLOG_BUFFERS_NO);
8021     translog_unlock();
8022   }
8023   /* flush buffers */
8024   *sent_to_disk= translog_get_sent_to_disk();
8025   if (cmp_translog_addr(*lsn, *sent_to_disk) > 0)
8026   {
8027 
8028     DBUG_PRINT("info", ("Start buffer #: %u  last buffer #: %u",
8029                         (uint) start_buffer_no, (uint) last_buffer_no));
8030     last_buffer_no= (last_buffer_no + 1) % TRANSLOG_BUFFERS_NO;
8031     i= start_buffer_no;
8032     do
8033     {
8034       struct st_translog_buffer *buffer= log_descriptor.buffers + i;
8035       translog_buffer_lock(buffer);
8036       DBUG_PRINT("info", ("Check buffer: %p  #: %u  "
8037                           "prev last LSN: " LSN_FMT "  "
8038                           "last LSN: " LSN_FMT "  status: %s",
8039                           buffer,
8040                           (uint) i,
8041                           LSN_IN_PARTS(buffer->prev_last_lsn),
8042                           LSN_IN_PARTS(buffer->last_lsn),
8043                           (buffer->file ?
8044                            "dirty" : "closed")));
8045       if (buffer->prev_last_lsn <= *lsn &&
8046           buffer->file != NULL)
8047       {
8048         DBUG_ASSERT(*flush_horizon <= buffer->offset + buffer->size);
8049         *flush_horizon= (buffer->pre_force_close_horizon != LSN_IMPOSSIBLE ?
8050                          buffer->pre_force_close_horizon :
8051                          buffer->offset + buffer->size);
8052         /* pre_force_close_horizon is reset during new buffer start */
8053         DBUG_PRINT("info", ("flush_horizon: " LSN_FMT,
8054                             LSN_IN_PARTS(*flush_horizon)));
8055         DBUG_ASSERT(*flush_horizon <= log_descriptor.horizon);
8056 
8057         translog_buffer_flush(buffer);
8058       }
8059       translog_buffer_unlock(buffer);
8060       i= (i + 1) % TRANSLOG_BUFFERS_NO;
8061     } while (i != last_buffer_no);
8062     *sent_to_disk= translog_get_sent_to_disk();
8063   }
8064 
8065   DBUG_VOID_RETURN;
8066 }
8067 
8068 /**
8069   @brief Flush the log up to given LSN (included)
8070 
8071   @param  lsn            log record serial number up to which (inclusive)
8072                          the log has to be flushed
8073 
8074   @return Operation status
8075     @retval 0      OK
8076     @retval 1      Error
8077 
8078   @note
8079 
8080   - Non group commit logic: Commits made in passes. Thread which started
8081   flush first is performing actual flush, other threads sets new goal (LSN)
8082   of the next pass (if it is maximum) and waits for the pass end or just
8083   wait for the pass end.
8084 
8085   - If hard group commit enabled and rate set to zero:
8086   The first thread sends all changed buffers to disk. This is repeated
8087   as long as there are new LSNs added. The process can not loop
8088   forever because we have limited number of threads and they will wait
8089   for the data to be synced.
8090   Pseudo code:
8091 
8092    do
8093      send changed buffers to disk
8094    while new_goal
8095    sync
8096 
8097   - If hard group commit switched ON and less than rate microseconds has
8098   passed from last sync, then after buffers have been sent to disk
8099   wait until rate microseconds has passed since last sync, do sync and return.
8100   This ensures that if we call sync infrequently we don't do any waits.
8101 
8102   - If soft group commit enabled everything works as with 'non group commit'
8103   but the thread doesn't do any real sync(). If rate is not zero the
8104   sync() will be performed by a service thread with the given rate
8105   when needed (new LSN appears).
8106 
8107   @note Terminology:
8108   'sent to disk' means written to disk but not sync()ed,
8109   'flushed' mean sent to disk and synced().
8110 */
8111 
translog_flush(TRANSLOG_ADDRESS lsn)8112 my_bool translog_flush(TRANSLOG_ADDRESS lsn)
8113 {
8114   struct timespec abstime;
8115   ulonglong UNINIT_VAR(flush_interval);
8116   ulonglong time_spent;
8117   LSN sent_to_disk= LSN_IMPOSSIBLE;
8118   TRANSLOG_ADDRESS flush_horizon;
8119   my_bool rc= 0;
8120   my_bool hgroup_commit_at_start;
8121   DBUG_ENTER("translog_flush");
8122   DBUG_PRINT("enter", ("Flush up to LSN: " LSN_FMT, LSN_IN_PARTS(lsn)));
8123   DBUG_ASSERT(translog_status == TRANSLOG_OK ||
8124               translog_status == TRANSLOG_READONLY);
8125 
8126   mysql_mutex_lock(&log_descriptor.log_flush_lock);
8127   DBUG_PRINT("info", ("Everything is flushed up to " LSN_FMT,
8128                       LSN_IN_PARTS(log_descriptor.flushed)));
8129   if (cmp_translog_addr(log_descriptor.flushed, lsn) >= 0)
8130   {
8131     mysql_mutex_unlock(&log_descriptor.log_flush_lock);
8132     DBUG_RETURN(0);
8133   }
8134   if (log_descriptor.flush_in_progress)
8135   {
8136     translog_lock();
8137     /* fix lsn if it was horizon */
8138     if (cmp_translog_addr(lsn, log_descriptor.bc.buffer->last_lsn) > 0)
8139       lsn= BUFFER_MAX_LSN(log_descriptor.bc.buffer);
8140     translog_unlock();
8141     translog_flush_set_new_goal_and_wait(lsn);
8142     if (!pthread_equal(log_descriptor.max_lsn_requester, pthread_self()))
8143     {
8144       /*
8145         translog_flush_wait_for_end() release log_flush_lock while is
8146         waiting then acquire it again
8147       */
8148       translog_flush_wait_for_end(lsn);
8149       mysql_mutex_unlock(&log_descriptor.log_flush_lock);
8150       DBUG_RETURN(0);
8151     }
8152     log_descriptor.next_pass_max_lsn= LSN_IMPOSSIBLE;
8153   }
8154   log_descriptor.flush_in_progress= 1;
8155   flush_horizon= log_descriptor.previous_flush_horizon;
8156   DBUG_PRINT("info", ("flush_in_progress is set, flush_horizon: " LSN_FMT,
8157                       LSN_IN_PARTS(flush_horizon)));
8158   mysql_mutex_unlock(&log_descriptor.log_flush_lock);
8159 
8160   hgroup_commit_at_start= hard_group_commit;
8161   if (hgroup_commit_at_start)
8162     flush_interval= group_commit_wait;
8163 
8164   translog_lock();
8165   if (log_descriptor.is_everything_flushed)
8166   {
8167     DBUG_PRINT("info", ("everything is flushed"));
8168     translog_unlock();
8169     mysql_mutex_lock(&log_descriptor.log_flush_lock);
8170     goto out;
8171   }
8172 
8173   for (;;)
8174   {
8175     /* Following function flushes buffers and makes translog_unlock() */
8176     translog_flush_buffers(&lsn, &sent_to_disk, &flush_horizon);
8177 
8178     if (!hgroup_commit_at_start)
8179       break;  /* flush pass is ended */
8180 
8181 retest:
8182     /*
8183       We do not check time here because mysql_mutex_lock rarely takes
8184       a lot of time so we can sacrifice a bit precision to performance
8185       (taking into account that microsecond_interval_timer() might be
8186       expensive call).
8187     */
8188     if (flush_interval == 0)
8189       break;  /* flush pass is ended */
8190 
8191     mysql_mutex_lock(&log_descriptor.log_flush_lock);
8192     if (log_descriptor.next_pass_max_lsn == LSN_IMPOSSIBLE)
8193     {
8194       if (flush_interval == 0 ||
8195           (time_spent= (microsecond_interval_timer() - flush_start)) >=
8196           flush_interval)
8197       {
8198         mysql_mutex_unlock(&log_descriptor.log_flush_lock);
8199         break;
8200       }
8201       DBUG_PRINT("info", ("flush waits: %llu  interval: %llu  spent: %llu",
8202                           flush_interval - time_spent,
8203                           flush_interval, time_spent));
8204       /* wait time or next goal */
8205       set_timespec_nsec(abstime, flush_interval - time_spent);
8206       mysql_cond_timedwait(&log_descriptor.new_goal_cond,
8207                            &log_descriptor.log_flush_lock,
8208                            &abstime);
8209       mysql_mutex_unlock(&log_descriptor.log_flush_lock);
8210       DBUG_PRINT("info", ("retest conditions"));
8211       goto retest;
8212     }
8213 
8214     /* take next goal */
8215     lsn= log_descriptor.next_pass_max_lsn;
8216     log_descriptor.next_pass_max_lsn= LSN_IMPOSSIBLE;
8217     /* prevent other thread from continue */
8218     log_descriptor.max_lsn_requester= pthread_self();
8219     DBUG_PRINT("info", ("flush took next goal: " LSN_FMT,
8220                         LSN_IN_PARTS(lsn)));
8221     mysql_mutex_unlock(&log_descriptor.log_flush_lock);
8222 
8223     /* next flush pass */
8224     DBUG_PRINT("info", ("next flush pass"));
8225     translog_lock();
8226   }
8227 
8228   /*
8229     sync() files from previous flush till current one
8230   */
8231   if (!soft_sync || hgroup_commit_at_start)
8232   {
8233     if ((rc=
8234          translog_sync_files(LSN_FILE_NO(log_descriptor.flushed),
8235                              LSN_FILE_NO(lsn),
8236                              sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS &&
8237                              (LSN_FILE_NO(log_descriptor.
8238                                           previous_flush_horizon) !=
8239                               LSN_FILE_NO(flush_horizon) ||
8240                               (LSN_OFFSET(log_descriptor.
8241                                           previous_flush_horizon) /
8242                                TRANSLOG_PAGE_SIZE) !=
8243                               (LSN_OFFSET(flush_horizon) /
8244                                TRANSLOG_PAGE_SIZE)))))
8245     {
8246       sent_to_disk= LSN_IMPOSSIBLE;
8247       mysql_mutex_lock(&log_descriptor.log_flush_lock);
8248       goto out;
8249     }
8250     /* keep values for soft sync() and forced sync() actual */
8251     {
8252       uint32 fileno= LSN_FILE_NO(lsn);
8253       soft_sync_min= fileno;
8254       soft_sync_max= fileno;
8255     }
8256   }
8257   else
8258   {
8259     soft_sync_max= LSN_FILE_NO(lsn);
8260     soft_need_sync= 1;
8261   }
8262 
8263   DBUG_ASSERT(flush_horizon <= log_descriptor.horizon);
8264 
8265   mysql_mutex_lock(&log_descriptor.log_flush_lock);
8266   log_descriptor.previous_flush_horizon= flush_horizon;
8267 out:
8268   if (sent_to_disk != LSN_IMPOSSIBLE)
8269     log_descriptor.flushed= sent_to_disk;
8270   log_descriptor.flush_in_progress= 0;
8271   log_descriptor.flush_no++;
8272   DBUG_PRINT("info", ("flush_in_progress is dropped"));
8273   mysql_mutex_unlock(&log_descriptor.log_flush_lock);
8274   mysql_cond_broadcast(&log_descriptor.log_flush_cond);
8275   DBUG_RETURN(rc);
8276 }
8277 
8278 
8279 /**
8280    @brief Gives a 2-byte-id to MARIA_SHARE and logs this fact
8281 
8282    If a MARIA_SHARE does not yet have a 2-byte-id (unique over all currently
8283    open MARIA_SHAREs), give it one and record this assignment in the log
8284    (LOGREC_FILE_ID log record).
8285 
8286    @param  tbl_info        table
8287    @param  trn             calling transaction
8288 
8289    @return Operation status
8290      @retval 0      OK
8291      @retval 1      Error
8292 
8293    @note Can be called even if share already has an id (then will do nothing)
8294 */
8295 
translog_assign_id_to_share(MARIA_HA * tbl_info,TRN * trn)8296 int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn)
8297 {
8298   uint16 id;
8299   MARIA_SHARE *share= tbl_info->s;
8300   /*
8301     If you give an id to a non-BLOCK_RECORD table, you also need to release
8302     this id somewhere. Then you can change the assertion.
8303   */
8304   DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
8305   /* re-check under mutex to avoid having 2 ids for the same share */
8306   mysql_mutex_lock(&share->intern_lock);
8307   if (unlikely(share->id == 0))
8308   {
8309     LSN lsn;
8310     LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
8311     uchar log_data[FILEID_STORE_SIZE];
8312     /* Inspired by set_short_trid() of trnman.c */
8313     uint i= share->kfile.file % SHARE_ID_MAX + 1;
8314     id= 0;
8315     do
8316     {
8317       for ( ; i <= SHARE_ID_MAX ; i++) /* the range is [1..SHARE_ID_MAX] */
8318       {
8319         void *tmp= NULL;
8320         if (id_to_share[i] == NULL &&
8321             my_atomic_casptr((void **)&id_to_share[i], &tmp, share))
8322         {
8323           id= (uint16) i;
8324           break;
8325         }
8326       }
8327       i= 1; /* scan the whole array */
8328     } while (id == 0);
8329     DBUG_PRINT("info", ("id_to_share: %p -> %u", share, id));
8330     fileid_store(log_data, id);
8331     log_array[TRANSLOG_INTERNAL_PARTS + 0].str=    log_data;
8332     log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
8333     /*
8334       open_file_name is an unresolved name (symlinks are not resolved, datadir
8335       is not realpath-ed, etc) which is good: the log can be moved to another
8336       directory and continue working.
8337     */
8338     log_array[TRANSLOG_INTERNAL_PARTS + 1].str=
8339       (uchar *)share->open_file_name.str;
8340     log_array[TRANSLOG_INTERNAL_PARTS + 1].length=
8341       share->open_file_name.length + 1;
8342     /*
8343       We can't unlock share->intern_lock before the log entry is written to
8344       ensure no one uses the id before it's logged.
8345     */
8346     if (unlikely(translog_write_record(&lsn, LOGREC_FILE_ID, trn, tbl_info,
8347                                        (translog_size_t)
8348                                        (sizeof(log_data) +
8349                                         log_array[TRANSLOG_INTERNAL_PARTS +
8350                                                   1].length),
8351                                        sizeof(log_array)/sizeof(log_array[0]),
8352                                        log_array, NULL, NULL)))
8353     {
8354       mysql_mutex_unlock(&share->intern_lock);
8355       return 1;
8356     }
8357     /*
8358       Now when translog record is done, we can set share->id.
8359       If we set it before, then translog_write_record may pick up the id
8360       before it's written to the log.
8361     */
8362     share->id= id;
8363     share->state.logrec_file_id= lsn;
8364   }
8365   mysql_mutex_unlock(&share->intern_lock);
8366   return 0;
8367 }
8368 
8369 
8370 /**
8371    @brief Recycles a MARIA_SHARE's short id.
8372 
8373    @param  share           table
8374 
8375    @note Must be called only if share has an id (i.e. id != 0)
8376 */
8377 
translog_deassign_id_from_share(MARIA_SHARE * share)8378 void translog_deassign_id_from_share(MARIA_SHARE *share)
8379 {
8380   DBUG_PRINT("info", ("id_to_share: %p id %u -> 0",
8381                       share, share->id));
8382   /*
8383     We don't need any mutex as we are called only when closing the last
8384     instance of the table or at the end of REPAIR: no writes can be
8385     happening. But a Checkpoint may be reading share->id, so we require this
8386     mutex:
8387   */
8388   mysql_mutex_assert_owner(&share->intern_lock);
8389   my_atomic_storeptr((void **)&id_to_share[share->id], 0);
8390   share->id= 0;
8391   /* useless but safety: */
8392   share->lsn_of_file_id= LSN_IMPOSSIBLE;
8393 }
8394 
8395 
translog_assign_id_to_share_from_recovery(MARIA_SHARE * share,uint16 id)8396 void translog_assign_id_to_share_from_recovery(MARIA_SHARE *share,
8397                                                uint16 id)
8398 {
8399   DBUG_ASSERT(maria_in_recovery && !maria_multi_threaded);
8400   DBUG_ASSERT(share->data_file_type == BLOCK_RECORD);
8401   DBUG_ASSERT(share->id == 0);
8402   DBUG_ASSERT(id_to_share[id] == NULL);
8403   id_to_share[share->id= id]= share;
8404 }
8405 
8406 
8407 /**
8408    @brief check if such log file exists
8409 
8410    @param file_no number of the file to test
8411 
8412    @retval 0 no such file
8413    @retval 1 there is file with such number
8414 */
8415 
translog_is_file(uint file_no)8416 my_bool translog_is_file(uint file_no)
8417 {
8418   MY_STAT stat_buff;
8419   char path[FN_REFLEN];
8420   return (MY_TEST(mysql_file_stat(key_file_translog,
8421                                   translog_filename_by_fileno(file_no, path),
8422                                   &stat_buff, MYF(0))));
8423 }
8424 
8425 
8426 /**
8427   @brief returns minimum log file number
8428 
8429   @param horizon         the end of the log
8430   @param is_protected    true if it is under purge_log protection
8431 
8432   @retval minimum file number
8433   @retval 0 no files found
8434 */
8435 
translog_first_file(TRANSLOG_ADDRESS horizon,int is_protected)8436 static uint32 translog_first_file(TRANSLOG_ADDRESS horizon, int is_protected)
8437 {
8438   uint min_file= 0, max_file;
8439   DBUG_ENTER("translog_first_file");
8440   if (!is_protected)
8441     mysql_mutex_lock(&log_descriptor.purger_lock);
8442   if (log_descriptor.min_file_number &&
8443       translog_is_file(log_descriptor.min_file_number))
8444   {
8445     DBUG_PRINT("info", ("cached %lu",
8446                         (ulong) log_descriptor.min_file_number));
8447     if (!is_protected)
8448       mysql_mutex_unlock(&log_descriptor.purger_lock);
8449     DBUG_RETURN(log_descriptor.min_file_number);
8450   }
8451 
8452   max_file= LSN_FILE_NO(horizon);
8453 
8454   /* binary search for last file */
8455   while (min_file != max_file && min_file != (max_file - 1))
8456   {
8457     uint test= (min_file + max_file) / 2;
8458     DBUG_PRINT("info", ("min_file: %u  test: %u  max_file: %u",
8459                         min_file, test, max_file));
8460     if (test == max_file)
8461       test--;
8462     if (translog_is_file(test))
8463       max_file= test;
8464     else
8465       min_file= test;
8466   }
8467   log_descriptor.min_file_number= max_file;
8468   if (!is_protected)
8469     mysql_mutex_unlock(&log_descriptor.purger_lock);
8470   DBUG_PRINT("info", ("first file :%lu", (ulong) max_file));
8471   DBUG_ASSERT(max_file >= 1);
8472   DBUG_RETURN(max_file);
8473 }
8474 
8475 
8476 /**
8477   @brief returns the most close LSN higher the given chunk address
8478 
8479   @param addr the chunk address to start from
8480   @param horizon the horizon if it is known or LSN_IMPOSSIBLE
8481 
8482   @retval LSN_ERROR Error
8483   @retval LSN_IMPOSSIBLE no LSNs after the address
8484   @retval # LSN of the most close LSN higher the given chunk address
8485 */
8486 
translog_next_LSN(TRANSLOG_ADDRESS addr,TRANSLOG_ADDRESS horizon)8487 LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon)
8488 {
8489   TRANSLOG_SCANNER_DATA scanner;
8490   LSN result;
8491   DBUG_ENTER("translog_next_LSN");
8492 
8493   if (horizon == LSN_IMPOSSIBLE)
8494     horizon= translog_get_horizon();
8495 
8496   if (addr == horizon)
8497     DBUG_RETURN(LSN_IMPOSSIBLE);
8498 
8499   translog_scanner_init(addr, 0, &scanner, 1);
8500   /*
8501     addr can point not to a chunk beginning but page end so next
8502     page beginning.
8503   */
8504   if (addr % TRANSLOG_PAGE_SIZE == 0)
8505   {
8506     /*
8507       We are emulating the page end which cased such horizon value to
8508       trigger translog_scanner_eop().
8509 
8510       We can't just increase addr on page header overhead because it
8511       can be file end so we allow translog_get_next_chunk() to skip
8512       to the next page in correct way
8513     */
8514     scanner.page_addr-= TRANSLOG_PAGE_SIZE;
8515     scanner.page_offset= TRANSLOG_PAGE_SIZE;
8516 #ifndef DBUG_OFF
8517     scanner.page= NULL; /* prevent using incorrect page content */
8518 #endif
8519   }
8520   /* addr can point not to a chunk beginning but to a page end */
8521   if (translog_scanner_eop(&scanner))
8522   {
8523     if (translog_get_next_chunk(&scanner))
8524     {
8525       result= LSN_ERROR;
8526       goto out;
8527     }
8528     if (scanner.page == END_OF_LOG)
8529     {
8530       result= LSN_IMPOSSIBLE;
8531       goto out;
8532     }
8533   }
8534 
8535   while (!translog_is_LSN_chunk(scanner.page[scanner.page_offset]) &&
8536          scanner.page[scanner.page_offset] != TRANSLOG_FILLER)
8537   {
8538     if (translog_get_next_chunk(&scanner))
8539     {
8540       result= LSN_ERROR;
8541       goto out;
8542     }
8543     if (scanner.page == END_OF_LOG)
8544     {
8545       result= LSN_IMPOSSIBLE;
8546       goto out;
8547     }
8548   }
8549 
8550   if (scanner.page[scanner.page_offset] == TRANSLOG_FILLER)
8551     result= LSN_IMPOSSIBLE; /* reached page filler */
8552   else
8553     result= scanner.page_addr + scanner.page_offset;
8554 out:
8555   translog_destroy_scanner(&scanner);
8556   DBUG_RETURN(result);
8557 }
8558 
8559 
8560 /**
8561    @brief returns the LSN of the first record starting in this log
8562 
8563    @retval LSN_ERROR Error
8564    @retval LSN_IMPOSSIBLE no log or the log is empty
8565    @retval # LSN of the first record
8566 */
8567 
translog_first_lsn_in_log()8568 LSN translog_first_lsn_in_log()
8569 {
8570   TRANSLOG_ADDRESS addr, horizon= translog_get_horizon();
8571   TRANSLOG_VALIDATOR_DATA data;
8572   uint file;
8573   uint16 chunk_offset;
8574   uchar *page;
8575   DBUG_ENTER("translog_first_lsn_in_log");
8576   DBUG_PRINT("info", ("Horizon: " LSN_FMT, LSN_IN_PARTS(horizon)));
8577   DBUG_ASSERT(translog_status == TRANSLOG_OK ||
8578               translog_status == TRANSLOG_READONLY);
8579 
8580   if (!(file= translog_first_file(horizon, 0)))
8581   {
8582     /* log has no records yet */
8583     DBUG_RETURN(LSN_IMPOSSIBLE);
8584   }
8585 
8586   addr= MAKE_LSN(file, TRANSLOG_PAGE_SIZE); /* the first page of the file */
8587   data.addr= &addr;
8588   {
8589     TRANSLOG_PAGE_SIZE_BUFF psize_buff;
8590     if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL ||
8591         (chunk_offset= translog_get_first_chunk_offset(page)) == 0)
8592       DBUG_RETURN(LSN_ERROR);
8593   }
8594   addr+= chunk_offset;
8595 
8596   DBUG_RETURN(translog_next_LSN(addr, horizon));
8597 }
8598 
8599 
8600 /**
8601    @brief Returns theoretical first LSN if first log is present
8602 
8603    @retval LSN_ERROR Error
8604    @retval LSN_IMPOSSIBLE no log
8605    @retval # LSN of the first record
8606 */
8607 
translog_first_theoretical_lsn()8608 LSN translog_first_theoretical_lsn()
8609 {
8610   TRANSLOG_ADDRESS addr= translog_get_horizon();
8611   TRANSLOG_PAGE_SIZE_BUFF psize_buff;
8612   uchar *page;
8613   TRANSLOG_VALIDATOR_DATA data;
8614   DBUG_ENTER("translog_first_theoretical_lsn");
8615   DBUG_PRINT("info", ("Horizon: " LSN_FMT, LSN_IN_PARTS(addr)));
8616   DBUG_ASSERT(translog_status == TRANSLOG_OK ||
8617               translog_status == TRANSLOG_READONLY);
8618 
8619   if (!translog_is_file(1))
8620     DBUG_RETURN(LSN_IMPOSSIBLE);
8621   if (addr == MAKE_LSN(1, TRANSLOG_PAGE_SIZE))
8622   {
8623     /* log has no records yet */
8624     DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE +
8625                          log_descriptor.page_overhead));
8626   }
8627 
8628   addr= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); /* the first page of the file */
8629   data.addr= &addr;
8630   if ((page= translog_get_page(&data, psize_buff.buffer, NULL)) == NULL)
8631     DBUG_RETURN(LSN_ERROR);
8632 
8633   DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE +
8634                        page_overhead[page[TRANSLOG_PAGE_FLAGS]]));
8635 }
8636 
8637 
8638 /**
8639   @brief Checks given low water mark and purge files if it is need
8640 
8641   @param low the last (minimum) address which is need
8642 
8643   @retval 0 OK
8644   @retval 1 Error
8645 */
8646 
translog_purge(TRANSLOG_ADDRESS low)8647 my_bool translog_purge(TRANSLOG_ADDRESS low)
8648 {
8649   uint32 last_need_file= LSN_FILE_NO(low);
8650   uint32 min_unsync;
8651   int soft;
8652   TRANSLOG_ADDRESS horizon= translog_get_horizon();
8653   int rc= 0;
8654   DBUG_ENTER("translog_purge");
8655   DBUG_PRINT("enter", ("low: " LSN_FMT, LSN_IN_PARTS(low)));
8656   DBUG_ASSERT(translog_status == TRANSLOG_OK ||
8657               translog_status == TRANSLOG_READONLY);
8658 
8659   soft= soft_sync;
8660   min_unsync= soft_sync_min;
8661   DBUG_PRINT("info", ("min_unsync: %lu", (ulong) min_unsync));
8662   if (soft && min_unsync < last_need_file)
8663   {
8664     last_need_file= min_unsync;
8665     DBUG_PRINT("info", ("last_need_file set to %lu", (ulong)last_need_file));
8666   }
8667 
8668   mysql_mutex_lock(&log_descriptor.purger_lock);
8669   DBUG_PRINT("info", ("last_lsn_checked file: %lu:",
8670                       (ulong) log_descriptor.last_lsn_checked));
8671   if (LSN_FILE_NO(log_descriptor.last_lsn_checked) < last_need_file)
8672   {
8673     uint32 i;
8674     uint32 min_file= translog_first_file(horizon, 1);
8675     DBUG_ASSERT(min_file != 0); /* log is already started */
8676     DBUG_PRINT("info", ("min_file:  %lu:",(ulong) min_file));
8677     for(i= min_file; i < last_need_file && rc == 0; i++)
8678     {
8679       LSN lsn= translog_get_file_max_lsn_stored(i);
8680       if (lsn == LSN_IMPOSSIBLE)
8681         break;   /* files are still in writing */
8682       if (lsn == LSN_ERROR)
8683       {
8684         rc= 1;
8685         break;
8686       }
8687       if (cmp_translog_addr(lsn, low) >= 0)
8688         break;
8689 
8690       DBUG_PRINT("info", ("purge file %lu", (ulong) i));
8691 
8692       /* remove file descriptor from the cache */
8693       /*
8694         log_descriptor.min_file can be changed only here during execution
8695         and the function is serialized, so we can access it without problems
8696       */
8697       if (i >= log_descriptor.min_file)
8698       {
8699         TRANSLOG_FILE *file;
8700         mysql_rwlock_wrlock(&log_descriptor.open_files_lock);
8701         DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
8702                     log_descriptor.open_files.elements);
8703         DBUG_ASSERT(log_descriptor.min_file == i);
8704         file= *((TRANSLOG_FILE **)pop_dynamic(&log_descriptor.open_files));
8705         DBUG_PRINT("info", ("Files : %d", log_descriptor.open_files.elements));
8706         DBUG_ASSERT(i == file->number);
8707         log_descriptor.min_file++;
8708         DBUG_ASSERT(log_descriptor.max_file - log_descriptor.min_file + 1 ==
8709                     log_descriptor.open_files.elements);
8710         mysql_rwlock_unlock(&log_descriptor.open_files_lock);
8711         translog_close_log_file(file);
8712       }
8713       if (log_purge_type == TRANSLOG_PURGE_IMMIDIATE && ! log_purge_disabled)
8714       {
8715         char path[FN_REFLEN], *file_name;
8716         file_name= translog_filename_by_fileno(i, path);
8717         rc= MY_TEST(mysql_file_delete(key_file_translog,
8718                                       file_name, MYF(MY_WME)));
8719       }
8720     }
8721     if (unlikely(rc == 1))
8722       log_descriptor.min_need_file= 0; /* impossible value */
8723     else
8724       log_descriptor.min_need_file= i;
8725   }
8726 
8727   mysql_mutex_unlock(&log_descriptor.purger_lock);
8728   DBUG_RETURN(rc);
8729 }
8730 
8731 
8732 /**
8733   @brief Purges files by stored min need file in case of
8734     "ondemend" purge type
8735 
8736   @note This function do real work only if it is "ondemend" purge type
8737     and translog_purge() was called at least once and last time without
8738     errors
8739 
8740   @retval 0 OK
8741   @retval 1 Error
8742 */
8743 
translog_purge_at_flush()8744 my_bool translog_purge_at_flush()
8745 {
8746   uint32 i, min_file;
8747   int rc= 0;
8748   DBUG_ENTER("translog_purge_at_flush");
8749   DBUG_ASSERT(translog_status == TRANSLOG_OK ||
8750               translog_status == TRANSLOG_READONLY);
8751 
8752   if (unlikely(translog_status == TRANSLOG_READONLY))
8753   {
8754     DBUG_PRINT("info", ("The log is read only => exit"));
8755     DBUG_RETURN(0);
8756   }
8757 
8758   if (log_purge_type != TRANSLOG_PURGE_ONDEMAND)
8759   {
8760     DBUG_PRINT("info", ("It is not \"at_flush\" => exit"));
8761     DBUG_RETURN(0);
8762   }
8763 
8764   mysql_mutex_lock(&log_descriptor.purger_lock);
8765 
8766   if (unlikely(log_descriptor.min_need_file == 0 || log_purge_disabled))
8767   {
8768     DBUG_PRINT("info", ("No info about min need file => exit"));
8769     mysql_mutex_unlock(&log_descriptor.purger_lock);
8770     DBUG_RETURN(0);
8771   }
8772 
8773   min_file= translog_first_file(translog_get_horizon(), 1);
8774   DBUG_ASSERT(min_file != 0); /* log is already started */
8775   for(i= min_file; i < log_descriptor.min_need_file && rc == 0; i++)
8776   {
8777     char path[FN_REFLEN], *file_name;
8778     DBUG_PRINT("info", ("purge file %lu\n", (ulong) i));
8779     file_name= translog_filename_by_fileno(i, path);
8780     rc= MY_TEST(mysql_file_delete(key_file_translog,
8781                                   file_name, MYF(MY_WME)));
8782   }
8783 
8784   mysql_mutex_unlock(&log_descriptor.purger_lock);
8785   DBUG_RETURN(rc);
8786 }
8787 
8788 
8789 /**
8790   @brief Gets min file number
8791 
8792   @param horizon         the end of the log
8793 
8794   @retval minimum file number
8795   @retval 0 no files found
8796 */
8797 
translog_get_first_file(TRANSLOG_ADDRESS horizon)8798 uint32 translog_get_first_file(TRANSLOG_ADDRESS horizon)
8799 {
8800   return translog_first_file(horizon, 0);
8801 }
8802 
8803 
8804 /**
8805   @brief Gets min file number which is needed
8806 
8807   @retval minimum file number
8808   @retval 0 unknown
8809 */
8810 
translog_get_first_needed_file()8811 uint32 translog_get_first_needed_file()
8812 {
8813   uint32 file_no;
8814   mysql_mutex_lock(&log_descriptor.purger_lock);
8815   file_no= log_descriptor.min_need_file;
8816   mysql_mutex_unlock(&log_descriptor.purger_lock);
8817   return file_no;
8818 }
8819 
8820 
8821 /**
8822   @brief Gets transaction log file size
8823 
8824   @return transaction log file size
8825 */
8826 
translog_get_file_size()8827 uint32 translog_get_file_size()
8828 {
8829   uint32 res;
8830   translog_lock();
8831   res= log_descriptor.log_file_max_size;
8832   translog_unlock();
8833   return (res);
8834 }
8835 
8836 
8837 /**
8838   @brief Sets transaction log file size
8839 
8840   @return Returns actually set transaction log size
8841 */
8842 
translog_set_file_size(uint32 size)8843 void translog_set_file_size(uint32 size)
8844 {
8845   struct st_translog_buffer *old_buffer= NULL;
8846   DBUG_ENTER("translog_set_file_size");
8847   translog_lock();
8848   DBUG_PRINT("enter", ("Size: %lu", (ulong) size));
8849   DBUG_ASSERT(size % TRANSLOG_PAGE_SIZE == 0);
8850   DBUG_ASSERT(size >= TRANSLOG_MIN_FILE_SIZE);
8851   log_descriptor.log_file_max_size= size;
8852   /* if current file longer then finish it*/
8853   if (LSN_OFFSET(log_descriptor.horizon) >=  log_descriptor.log_file_max_size)
8854   {
8855     old_buffer= log_descriptor.bc.buffer;
8856     translog_buffer_next(&log_descriptor.horizon, &log_descriptor.bc, 1);
8857     translog_buffer_unlock(old_buffer);
8858   }
8859   translog_unlock();
8860   if (old_buffer)
8861   {
8862     translog_buffer_lock(old_buffer);
8863     translog_buffer_flush(old_buffer);
8864     translog_buffer_unlock(old_buffer);
8865   }
8866   DBUG_VOID_RETURN;
8867 }
8868 
8869 
8870 /**
8871    Write debug information to log if we EXTRA_DEBUG is enabled
8872 */
8873 
translog_log_debug_info(TRN * trn,enum translog_debug_info_type type,uchar * info,size_t length)8874 my_bool translog_log_debug_info(TRN *trn __attribute__((unused)),
8875                                 enum translog_debug_info_type type
8876                                 __attribute__((unused)),
8877                                 uchar *info __attribute__((unused)),
8878                                 size_t length __attribute__((unused)))
8879 {
8880 #ifdef EXTRA_DEBUG
8881   LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS + 2];
8882   uchar debug_type;
8883   LSN lsn;
8884 
8885   if (!trn)
8886   {
8887     /*
8888       We can't log the current transaction because we don't have
8889       an active transaction. Use a temporary transaction object instead
8890     */
8891     trn= &dummy_transaction_object;
8892   }
8893   debug_type= (uchar) type;
8894   log_array[TRANSLOG_INTERNAL_PARTS + 0].str= &debug_type;
8895   log_array[TRANSLOG_INTERNAL_PARTS + 0].length= 1;
8896   log_array[TRANSLOG_INTERNAL_PARTS + 1].str= info;
8897   log_array[TRANSLOG_INTERNAL_PARTS + 1].length= length;
8898   return translog_write_record(&lsn, LOGREC_DEBUG_INFO,
8899                                trn, NULL,
8900                                (translog_size_t) (1+ length),
8901                                sizeof(log_array)/sizeof(log_array[0]),
8902                                log_array, NULL, NULL);
8903 #else
8904   return 0;
8905 #endif
8906 }
8907 
8908 
8909 
8910 /**
8911   Sets soft sync mode
8912 
8913   @param mode            TRUE if we need switch soft sync on else off
8914 */
8915 
translog_soft_sync(my_bool mode)8916 void translog_soft_sync(my_bool mode)
8917 {
8918   soft_sync= mode;
8919 }
8920 
8921 
8922 /**
8923   Sets hard group commit
8924 
8925   @param mode            TRUE if we need switch hard group commit on else off
8926 */
8927 
translog_hard_group_commit(my_bool mode)8928 void translog_hard_group_commit(my_bool mode)
8929 {
8930   hard_group_commit= mode;
8931 }
8932 
8933 
8934 /**
8935   @brief forced log sync (used when we are switching modes)
8936 */
8937 
translog_sync()8938 void translog_sync()
8939 {
8940   uint32 max= get_current_logfile()->number;
8941   uint32 min;
8942   DBUG_ENTER("ma_translog_sync");
8943 
8944   min= soft_sync_min;
8945   if (!min)
8946     min= max;
8947 
8948   translog_sync_files(min, max, sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS);
8949 
8950   DBUG_VOID_RETURN;
8951 }
8952 
8953 
8954 /**
8955   @brief set rate for group commit
8956 
8957   @param interval            interval to set.
8958 
8959   @note We use this function with additional variable because have to
8960   restart service thread with new value which we can't make inside changing
8961   variable routine (update_maria_group_commit_interval)
8962 */
8963 
translog_set_group_commit_interval(uint32 interval)8964 void translog_set_group_commit_interval(uint32 interval)
8965 {
8966   DBUG_ENTER("translog_set_group_commit_interval");
8967   group_commit_wait= interval;
8968   DBUG_PRINT("info", ("wait: %llu",
8969                       (ulonglong)group_commit_wait));
8970   DBUG_VOID_RETURN;
8971 }
8972 
8973 
8974 /**
8975   @brief syncing service thread
8976 */
8977 
8978 static pthread_handler_t
ma_soft_sync_background(void * arg)8979 ma_soft_sync_background( void *arg __attribute__((unused)))
8980 {
8981 
8982   my_thread_init();
8983   {
8984     DBUG_ENTER("ma_soft_sync_background");
8985     for(;;)
8986     {
8987       ulonglong prev_loop= microsecond_interval_timer();
8988       ulonglong time, sleep;
8989       uint32 min, max, sync_request;
8990       min= soft_sync_min;
8991       max= soft_sync_max;
8992       sync_request= soft_need_sync;
8993       soft_sync_min= max;
8994       soft_need_sync= 0;
8995 
8996       sleep= group_commit_wait;
8997       if (sync_request)
8998         translog_sync_files(min, max, FALSE);
8999       time= microsecond_interval_timer() - prev_loop;
9000       if (time > sleep)
9001         sleep= 0;
9002       else
9003         sleep-= time;
9004       if (my_service_thread_sleep(&soft_sync_control, sleep))
9005         break;
9006     }
9007     my_thread_end();
9008     DBUG_RETURN(0);
9009   }
9010 }
9011 
9012 
9013 /**
9014   @brief Starts syncing thread
9015 */
9016 
translog_soft_sync_start(void)9017 int translog_soft_sync_start(void)
9018 {
9019   int res= 0;
9020   uint32 min, max;
9021   DBUG_ENTER("translog_soft_sync_start");
9022 
9023   /* check and init variables */
9024   min= soft_sync_min;
9025   max= soft_sync_max;
9026   if (!max)
9027     soft_sync_max= max= get_current_logfile()->number;
9028   if (!min)
9029     soft_sync_min= max;
9030   soft_need_sync= 1;
9031 
9032   if (!(res= ma_service_thread_control_init(&soft_sync_control)))
9033     if ((res= mysql_thread_create(key_thread_soft_sync,
9034                                   &soft_sync_control.thread, NULL,
9035                                   ma_soft_sync_background, NULL)))
9036       soft_sync_control.killed= TRUE;
9037   DBUG_RETURN(res);
9038 }
9039 
9040 
9041 /**
9042   @brief Stops syncing thread
9043 */
9044 
translog_soft_sync_end(void)9045 void  translog_soft_sync_end(void)
9046 {
9047   DBUG_ENTER("translog_soft_sync_end");
9048   if (soft_sync_control.inited)
9049   {
9050     ma_service_thread_control_end(&soft_sync_control);
9051   }
9052   DBUG_VOID_RETURN;
9053 }
9054 
9055 
9056 /**
9057   @brief Dump information about file header page.
9058 */
9059 
dump_header_page(uchar * buff)9060 static void dump_header_page(uchar *buff)
9061 {
9062   LOGHANDLER_FILE_INFO desc;
9063   char strbuff[21];
9064   translog_interpret_file_header(&desc, buff);
9065   printf("  This can be header page:\n"
9066          "    Timestamp: %s\n"
9067          "    Aria log version: %lu\n"
9068          "    Server version: %lu\n"
9069          "    Server id %lu\n"
9070          "    Page size %lu\n",
9071          llstr(desc.timestamp, strbuff),
9072          desc.maria_version,
9073          desc.mysql_version,
9074          desc.server_id,
9075          desc.page_size);
9076   if (desc.page_size != TRANSLOG_PAGE_SIZE)
9077     printf("      WARNING: page size is not equal compiled in one %lu!!!\n",
9078            (ulong) TRANSLOG_PAGE_SIZE);
9079   printf("    File number %lu\n"
9080          "    Max lsn: " LSN_FMT "\n",
9081          desc.file_number,
9082          LSN_IN_PARTS(desc.max_lsn));
9083 }
9084 
9085 static const char *record_class_string[]=
9086 {
9087   "LOGRECTYPE_NOT_ALLOWED",
9088   "LOGRECTYPE_VARIABLE_LENGTH",
9089   "LOGRECTYPE_PSEUDOFIXEDLENGTH",
9090   "LOGRECTYPE_FIXEDLENGTH"
9091 };
9092 
9093 
9094 /**
9095   @brief dump information about transaction log chunk
9096 
9097   @param buffer          reference to the whole page
9098   @param ptr             pointer to the chunk
9099 
9100   @reval # reference to the next chunk
9101   @retval NULL can't interpret data
9102 */
9103 
dump_chunk(uchar * buffer,uchar * ptr)9104 static uchar *dump_chunk(uchar *buffer, uchar *ptr)
9105 {
9106   uint length;
9107   if (*ptr == TRANSLOG_FILLER)
9108   {
9109     printf("  Filler till the page end\n");
9110     for (; ptr < buffer + TRANSLOG_PAGE_SIZE; ptr++)
9111     {
9112       if (*ptr != TRANSLOG_FILLER)
9113       {
9114         printf("    WARNING: non filler character met before page end "
9115                "(page + 0x%04x: 0x%02x) (stop interpretation)!!!",
9116                (uint) (ptr - buffer), (uint) ptr[0]);
9117         return NULL;
9118       }
9119     }
9120     return ptr;
9121   }
9122   if (*ptr == 0 || *ptr == 0xFF)
9123   {
9124     printf("    WARNING: chunk can't start from 0x0 "
9125            "(stop interpretation)!!!\n");
9126     return NULL;
9127   }
9128   switch (ptr[0] & TRANSLOG_CHUNK_TYPE) {
9129   case TRANSLOG_CHUNK_LSN:
9130     printf("    LSN chunk type 0 (variable length)\n");
9131     if (likely((ptr[0] & TRANSLOG_REC_TYPE) != TRANSLOG_CHUNK_0_CONT))
9132     {
9133       printf("      Record type %u: %s  record class %s compressed LSNs: %u\n",
9134              ptr[0] & TRANSLOG_REC_TYPE,
9135              (log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name ?
9136               log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name :
9137               "NULL"),
9138              record_class_string[log_record_type_descriptor[ptr[0] &
9139                                                             TRANSLOG_REC_TYPE].
9140                                                             rclass],
9141              log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].
9142              compressed_LSN);
9143       if (log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].rclass !=
9144           LOGRECTYPE_VARIABLE_LENGTH)
9145       {
9146         printf("        WARNING: this record class here can't be used "
9147                "(stop interpretation)!!!\n");
9148         break;
9149       }
9150     }
9151     else
9152       printf("      Continuation of previous chunk 0 header \n");
9153     printf("      Short transaction id: %u\n", (uint) uint2korr(ptr + 1));
9154     {
9155       uchar *hdr_ptr= ptr + 1 + 2; /* chunk type and short trid */
9156       uint16 chunk_len;
9157       printf ("      Record length: %lu\n",
9158               (ulong) translog_variable_record_1group_decode_len(&hdr_ptr));
9159       chunk_len= uint2korr(hdr_ptr);
9160       if (chunk_len == 0)
9161         printf ("      It is 1 group record (chunk length == 0)\n");
9162       else
9163       {
9164         uint16 groups, i;
9165 
9166         printf ("      Chunk length %u\n", (uint) chunk_len);
9167         groups= uint2korr(hdr_ptr + 2);
9168         hdr_ptr+= 4;
9169         printf ("      Number of groups left to the end %u:\n", (uint) groups);
9170         for(i= 0;
9171             i < groups && hdr_ptr < buffer + TRANSLOG_PAGE_SIZE;
9172             i++, hdr_ptr+= LSN_STORE_SIZE + 1)
9173         {
9174           TRANSLOG_ADDRESS gpr_addr= lsn_korr(hdr_ptr);
9175           uint pages= hdr_ptr[LSN_STORE_SIZE];
9176           printf ("        Group +#%u: " LSN_FMT "  pages: %u\n",
9177                   (uint) i, LSN_IN_PARTS(gpr_addr), pages);
9178         }
9179       }
9180     }
9181     break;
9182   case TRANSLOG_CHUNK_FIXED:
9183     printf("    LSN chunk type 1 (fixed size)\n");
9184     printf("      Record type %u: %s  record class %s compressed LSNs: %u\n",
9185            ptr[0] & TRANSLOG_REC_TYPE,
9186            (log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name ?
9187             log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].name :
9188             "NULL"),
9189            record_class_string[log_record_type_descriptor[ptr[0] &
9190                                                           TRANSLOG_REC_TYPE].
9191                                                           rclass],
9192            log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].
9193            compressed_LSN);
9194     if (log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].rclass !=
9195         LOGRECTYPE_PSEUDOFIXEDLENGTH &&
9196         log_record_type_descriptor[ptr[0] & TRANSLOG_REC_TYPE].rclass !=
9197         LOGRECTYPE_FIXEDLENGTH)
9198     {
9199       printf("        WARNING: this record class here can't be used "
9200              "(stop interpretation)!!!\n");
9201     }
9202     printf("      Short transaction id: %u\n", (uint) uint2korr(ptr + 1));
9203     break;
9204   case TRANSLOG_CHUNK_NOHDR:
9205     printf("    No header chunk type 2(till the end of the page)\n");
9206     if (ptr[0] & TRANSLOG_REC_TYPE)
9207     {
9208       printf("      WARNING: chunk header content record type: 0x%02x "
9209              "(dtop interpretation)!!!",
9210              (uint) ptr[0]);
9211       return NULL;
9212     }
9213     break;
9214   case TRANSLOG_CHUNK_LNGTH:
9215     printf("    Chunk with length type 3\n");
9216     if (ptr[0] & TRANSLOG_REC_TYPE)
9217     {
9218       printf("      WARNING: chunk header content record type: 0x%02x "
9219              "(dtop interpretation)!!!",
9220              (uint) ptr[0]);
9221       return NULL;
9222     }
9223     break;
9224   }
9225   {
9226     intptr offset= ptr - buffer;
9227     DBUG_ASSERT(offset <= UINT_MAX16);
9228     length= translog_get_total_chunk_length(buffer, (uint16)offset);
9229   }
9230   printf("      Length %u\n", length);
9231   ptr+= length;
9232   return ptr;
9233 }
9234 
9235 
9236 /**
9237   @brief Dump information about page with data.
9238 */
9239 
dump_datapage(uchar * buffer,File handler)9240 static void dump_datapage(uchar *buffer, File handler)
9241 {
9242   uchar *ptr;
9243   ulong offset;
9244   uint32 page, file;
9245   uint header_len;
9246   printf("  Page: %ld  File number: %ld\n",
9247          (ulong) (page= uint3korr(buffer)),
9248          (ulong) (file= uint3korr(buffer + 3)));
9249   if (page == 0)
9250     printf("    WARNING: page == 0!!!\n");
9251   if (file == 0)
9252     printf("    WARNING: file == 0!!!\n");
9253   offset= page * TRANSLOG_PAGE_SIZE;
9254   printf("  Flags (0x%x):\n", (uint) buffer[TRANSLOG_PAGE_FLAGS]);
9255   if (buffer[TRANSLOG_PAGE_FLAGS])
9256   {
9257     if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_PAGE_CRC)
9258       printf("    Page CRC\n");
9259     if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION)
9260       printf("    Sector protection\n");
9261     if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_RECORD_CRC)
9262       printf("    Record CRC (WARNING: not yet implemented!!!)\n");
9263     if (buffer[TRANSLOG_PAGE_FLAGS] & ~(TRANSLOG_PAGE_CRC |
9264                                         TRANSLOG_SECTOR_PROTECTION |
9265                                         TRANSLOG_RECORD_CRC))
9266     {
9267       printf("    WARNING: unknown flags (stop interpretation)!!!\n");
9268       return;
9269     }
9270   }
9271   else
9272     printf("    No flags\n");
9273   printf("  Page header length: %u\n",
9274          (header_len= page_overhead[buffer[TRANSLOG_PAGE_FLAGS]]));
9275   if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_RECORD_CRC)
9276   {
9277     uint32 crc= uint4korr(buffer + TRANSLOG_PAGE_FLAGS + 1);
9278     uint32 ccrc;
9279     printf ("  Page CRC 0x%04lx\n", (ulong) crc);
9280     ccrc= translog_crc(buffer + header_len, TRANSLOG_PAGE_SIZE - header_len);
9281     if (crc != ccrc)
9282       printf("    WARNING: calculated CRC: 0x%04lx!!!\n", (ulong) ccrc);
9283   }
9284   if (buffer[TRANSLOG_PAGE_FLAGS] & TRANSLOG_SECTOR_PROTECTION)
9285   {
9286     TRANSLOG_FILE tfile;
9287     {
9288       uchar *table= buffer + header_len -
9289         TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE;
9290       uint i;
9291       printf("    Sector protection current value: 0x%02x\n", (uint) table[0]);
9292       for (i= 1; i < TRANSLOG_PAGE_SIZE / DISK_DRIVE_SECTOR_SIZE; i++)
9293       {
9294          printf("    Sector protection in sector: 0x%02x  saved value 0x%02x\n",
9295                 (uint)buffer[i * DISK_DRIVE_SECTOR_SIZE],
9296                 (uint)table[i]);
9297       }
9298     }
9299     tfile.number= file;
9300     bzero(&tfile.handler, sizeof(tfile.handler));
9301     tfile.handler.file= handler;
9302     tfile.was_recovered= 0;
9303     tfile.is_sync= 1;
9304     if (translog_check_sector_protection(buffer, &tfile))
9305       printf("    WARNING: sector protection found problems!!!\n");
9306   }
9307   ptr= buffer + header_len;
9308   while (ptr && ptr < buffer + TRANSLOG_PAGE_SIZE)
9309   {
9310     printf("  Chunk %d %lld:\n",
9311            file,((longlong) (ptr - buffer)+ offset));
9312     ptr= dump_chunk(buffer, ptr);
9313   }
9314 }
9315 
9316 
9317 /**
9318   @brief Dump information about page.
9319 */
9320 
dump_page(uchar * buffer,File handler)9321 void dump_page(uchar *buffer, File handler)
9322 {
9323   if (strncmp((char*)maria_trans_file_magic, (char*)buffer,
9324               sizeof(maria_trans_file_magic)) == 0)
9325   {
9326     dump_header_page(buffer);
9327   }
9328   dump_datapage(buffer, handler);
9329 }
9330 
9331 
9332 /*
9333   Handle backup calls
9334 */
9335 
translog_disable_purge()9336 void translog_disable_purge()
9337 {
9338   mysql_mutex_lock(&log_descriptor.purger_lock);
9339   log_purge_disabled++;
9340   mysql_mutex_unlock(&log_descriptor.purger_lock);
9341 }
9342 
translog_enable_purge()9343 void translog_enable_purge()
9344 {
9345   mysql_mutex_lock(&log_descriptor.purger_lock);
9346   log_purge_disabled--;
9347   mysql_mutex_unlock(&log_descriptor.purger_lock);
9348 }
9349