1 /* Copyright (c) 2000, 2018, Oracle and/or its affiliates.
2 Copyright (c) 2009, 2021, MariaDB Corporation.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
16
17
18 /**
19 @file
20
21 @brief
22 logging of commands
23
24 @todo
25 Abort logging when we get an error in reading or writing log files
26 */
27
28 #include "mariadb.h" /* NO_EMBEDDED_ACCESS_CHECKS */
29 #include "sql_priv.h"
30 #include "log.h"
31 #include "sql_base.h" // open_log_table
32 #include "sql_repl.h"
33 #include "sql_delete.h" // mysql_truncate
34 #include "sql_parse.h" // command_name
35 #include "sql_time.h" // calc_time_from_sec, my_time_compare
36 #include "tztime.h" // my_tz_OFFSET0, struct Time_zone
37 #include "log_event.h" // Query_log_event
38 #include "rpl_filter.h"
39 #include "rpl_rli.h"
40 #include "sql_audit.h"
41 #include "mysqld.h"
42
43 #include <my_dir.h>
44 #include <m_ctype.h> // For test_if_number
45
46 #include <set_var.h> // for Sys_last_gtid_ptr
47
48 #ifdef _WIN32
49 #include "message.h"
50 #endif
51
52 #include "sql_plugin.h"
53 #include "debug_sync.h"
54 #include "sql_show.h"
55 #include "my_pthread.h"
56 #include "semisync_master.h"
57 #include "sp_rcontext.h"
58 #include "sp_head.h"
59
60 #include "wsrep_mysqld.h"
61 #ifdef WITH_WSREP
62 #include "wsrep_trans_observer.h"
63 #endif /* WITH_WSREP */
64
65 /* max size of the log message */
66 #define MAX_LOG_BUFFER_SIZE 1024
67 #define MAX_TIME_SIZE 32
68 #define MY_OFF_T_UNDEF (~(my_off_t)0UL)
69 /* Truncate cache log files bigger than this */
70 #define CACHE_FILE_TRUNC_SIZE 65536
71
72 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
73
74 handlerton *binlog_hton;
75 LOGGER logger;
76
77 const char *log_bin_index= 0;
78 const char *log_bin_basename= 0;
79
80 MYSQL_BIN_LOG mysql_bin_log(&sync_binlog_period);
81
82 static bool test_if_number(const char *str,
83 ulong *res, bool allow_wildcards);
84 static int binlog_init(void *p);
85 static int binlog_close_connection(handlerton *hton, THD *thd);
86 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
87 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
88 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
89 THD *thd);
90 static int binlog_commit(handlerton *hton, THD *thd, bool all);
91 static int binlog_rollback(handlerton *hton, THD *thd, bool all);
92 static int binlog_prepare(handlerton *hton, THD *thd, bool all);
93 static int binlog_xa_recover_dummy(handlerton *hton, XID *xid_list, uint len);
94 static int binlog_commit_by_xid(handlerton *hton, XID *xid);
95 static int binlog_rollback_by_xid(handlerton *hton, XID *xid);
96 static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd);
97 static int binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr,
98 Log_event *end_ev, bool all, bool using_stmt,
99 bool using_trx);
100
101 static const LEX_CSTRING write_error_msg=
102 { STRING_WITH_LEN("error writing to the binary log") };
103
104 static my_bool opt_optimize_thread_scheduling= TRUE;
105 ulong binlog_checksum_options;
106 #ifndef DBUG_OFF
107 ulong opt_binlog_dbug_fsync_sleep= 0;
108 #endif
109
110 mysql_mutex_t LOCK_prepare_ordered;
111 mysql_cond_t COND_prepare_ordered;
112 mysql_mutex_t LOCK_after_binlog_sync;
113 mysql_mutex_t LOCK_commit_ordered;
114
115 static ulonglong binlog_status_var_num_commits;
116 static ulonglong binlog_status_var_num_group_commits;
117 static ulonglong binlog_status_group_commit_trigger_count;
118 static ulonglong binlog_status_group_commit_trigger_lock_wait;
119 static ulonglong binlog_status_group_commit_trigger_timeout;
120 static char binlog_snapshot_file[FN_REFLEN];
121 static ulonglong binlog_snapshot_position;
122
123 static const char *fatal_log_error=
124 "Could not use %s for logging (error %d). "
125 "Turning logging off for the whole duration of the MariaDB server process. "
126 "To turn it on again: fix the cause, shutdown the MariaDB server and "
127 "restart it.";
128
129
130 static SHOW_VAR binlog_status_vars_detail[]=
131 {
132 {"commits",
133 (char *)&binlog_status_var_num_commits, SHOW_LONGLONG},
134 {"group_commits",
135 (char *)&binlog_status_var_num_group_commits, SHOW_LONGLONG},
136 {"group_commit_trigger_count",
137 (char *)&binlog_status_group_commit_trigger_count, SHOW_LONGLONG},
138 {"group_commit_trigger_lock_wait",
139 (char *)&binlog_status_group_commit_trigger_lock_wait, SHOW_LONGLONG},
140 {"group_commit_trigger_timeout",
141 (char *)&binlog_status_group_commit_trigger_timeout, SHOW_LONGLONG},
142 {"snapshot_file",
143 (char *)&binlog_snapshot_file, SHOW_CHAR},
144 {"snapshot_position",
145 (char *)&binlog_snapshot_position, SHOW_LONGLONG},
146 {NullS, NullS, SHOW_LONG}
147 };
148
149 /*
150 Variables for the binlog background thread.
151 Protected by the MYSQL_BIN_LOG::LOCK_binlog_background_thread mutex.
152 */
153 static bool binlog_background_thread_started= false;
154 static bool binlog_background_thread_stop= false;
155 static MYSQL_BIN_LOG::xid_count_per_binlog *
156 binlog_background_thread_queue= NULL;
157
158 static bool start_binlog_background_thread();
159
160 static rpl_binlog_state rpl_global_gtid_binlog_state;
161
setup_log_handling()162 void setup_log_handling()
163 {
164 rpl_global_gtid_binlog_state.init();
165 }
166
167
168 /**
169 purge logs, master and slave sides both, related error code
170 converter.
171 Called from @c purge_error_message(), @c MYSQL_BIN_LOG::reset_logs()
172
173 @param res an internal to purging routines error code
174
175 @return the user level error code ER_*
176 */
purge_log_get_error_code(int res)177 uint purge_log_get_error_code(int res)
178 {
179 uint errcode= 0;
180
181 switch (res) {
182 case 0: break;
183 case LOG_INFO_EOF: errcode= ER_UNKNOWN_TARGET_BINLOG; break;
184 case LOG_INFO_IO: errcode= ER_IO_ERR_LOG_INDEX_READ; break;
185 case LOG_INFO_INVALID:errcode= ER_BINLOG_PURGE_PROHIBITED; break;
186 case LOG_INFO_SEEK: errcode= ER_FSEEK_FAIL; break;
187 case LOG_INFO_MEM: errcode= ER_OUT_OF_RESOURCES; break;
188 case LOG_INFO_FATAL: errcode= ER_BINLOG_PURGE_FATAL_ERR; break;
189 case LOG_INFO_IN_USE: errcode= ER_LOG_IN_USE; break;
190 case LOG_INFO_EMFILE: errcode= ER_BINLOG_PURGE_EMFILE; break;
191 default: errcode= ER_LOG_PURGE_UNKNOWN_ERR; break;
192 }
193
194 return errcode;
195 }
196
197 /**
198 Silence all errors and warnings reported when performing a write
199 to a log table.
200 Errors and warnings are not reported to the client or SQL exception
201 handlers, so that the presence of logging does not interfere and affect
202 the logic of an application.
203 */
204 class Silence_log_table_errors : public Internal_error_handler
205 {
206 char m_message[MYSQL_ERRMSG_SIZE];
207 public:
Silence_log_table_errors()208 Silence_log_table_errors()
209 {
210 m_message[0]= '\0';
211 }
212
~Silence_log_table_errors()213 virtual ~Silence_log_table_errors() {}
214
215 virtual bool handle_condition(THD *thd,
216 uint sql_errno,
217 const char* sql_state,
218 Sql_condition::enum_warning_level *level,
219 const char* msg,
220 Sql_condition ** cond_hdl);
message() const221 const char *message() const { return m_message; }
222 };
223
224 bool
handle_condition(THD *,uint,const char *,Sql_condition::enum_warning_level *,const char * msg,Sql_condition ** cond_hdl)225 Silence_log_table_errors::handle_condition(THD *,
226 uint,
227 const char*,
228 Sql_condition::enum_warning_level*,
229 const char* msg,
230 Sql_condition ** cond_hdl)
231 {
232 *cond_hdl= NULL;
233 strmake_buf(m_message, msg);
234 return TRUE;
235 }
236
237 sql_print_message_func sql_print_message_handlers[3] =
238 {
239 sql_print_information,
240 sql_print_warning,
241 sql_print_error
242 };
243
244
245 /**
246 Create the name of the log file
247
248 @param[OUT] out a pointer to a new allocated name will go there
249 @param[IN] log_ext The extension for the file (e.g .log)
250 @param[IN] once whether to use malloc_once or a normal malloc.
251 */
make_default_log_name(char ** out,const char * log_ext,bool once)252 void make_default_log_name(char **out, const char* log_ext, bool once)
253 {
254 char buff[FN_REFLEN+10];
255 fn_format(buff, opt_log_basename, "", log_ext, MYF(MY_REPLACE_EXT));
256 if (once)
257 *out= my_once_strdup(buff, MYF(MY_WME));
258 else
259 {
260 my_free(*out);
261 *out= my_strdup(PSI_INSTRUMENT_ME, buff, MYF(MY_WME));
262 }
263 }
264
265
266 /*
267 Helper classes to store non-transactional and transactional data
268 before copying it to the binary log.
269 */
270 class binlog_cache_data
271 {
272 public:
binlog_cache_data()273 binlog_cache_data(): m_pending(0), status(0),
274 before_stmt_pos(MY_OFF_T_UNDEF),
275 incident(FALSE),
276 saved_max_binlog_cache_size(0), ptr_binlog_cache_use(0),
277 ptr_binlog_cache_disk_use(0)
278 { }
279
~binlog_cache_data()280 ~binlog_cache_data()
281 {
282 DBUG_ASSERT(empty());
283 close_cached_file(&cache_log);
284 }
285
286 /*
287 Return 1 if there is no relevant entries in the cache
288
289 This is:
290 - Cache is empty
291 - There are row or critical (DDL?) events in the cache
292
293 The status test is needed to avoid writing entries with only
294 a table map entry, which would crash in do_apply_event() on the slave
295 as it assumes that there is always a row entry after a table map.
296 */
empty() const297 bool empty() const
298 {
299 return (pending() == NULL &&
300 (my_b_write_tell(&cache_log) == 0 ||
301 ((status & (LOGGED_ROW_EVENT | LOGGED_CRITICAL)) == 0)));
302 }
303
pending() const304 Rows_log_event *pending() const
305 {
306 return m_pending;
307 }
308
set_pending(Rows_log_event * const pending_arg)309 void set_pending(Rows_log_event *const pending_arg)
310 {
311 m_pending= pending_arg;
312 }
313
set_incident(void)314 void set_incident(void)
315 {
316 incident= TRUE;
317 }
318
has_incident(void)319 bool has_incident(void)
320 {
321 return(incident);
322 }
323
reset()324 void reset()
325 {
326 bool cache_was_empty= empty();
327 bool truncate_file= (cache_log.file != -1 &&
328 my_b_write_tell(&cache_log) > CACHE_FILE_TRUNC_SIZE);
329 truncate(0,1); // Forget what's in cache
330 if (!cache_was_empty)
331 compute_statistics();
332 if (truncate_file)
333 my_chsize(cache_log.file, 0, 0, MYF(MY_WME));
334
335 status= 0;
336 incident= FALSE;
337 before_stmt_pos= MY_OFF_T_UNDEF;
338 DBUG_ASSERT(empty());
339 }
340
get_byte_position() const341 my_off_t get_byte_position() const
342 {
343 return my_b_tell(&cache_log);
344 }
345
get_prev_position()346 my_off_t get_prev_position()
347 {
348 return(before_stmt_pos);
349 }
350
set_prev_position(my_off_t pos)351 void set_prev_position(my_off_t pos)
352 {
353 before_stmt_pos= pos;
354 }
355
restore_prev_position()356 void restore_prev_position()
357 {
358 truncate(before_stmt_pos);
359 }
360
restore_savepoint(my_off_t pos)361 void restore_savepoint(my_off_t pos)
362 {
363 truncate(pos);
364 if (pos < before_stmt_pos)
365 before_stmt_pos= MY_OFF_T_UNDEF;
366 }
367
set_binlog_cache_info(my_off_t param_max_binlog_cache_size,ulong * param_ptr_binlog_cache_use,ulong * param_ptr_binlog_cache_disk_use)368 void set_binlog_cache_info(my_off_t param_max_binlog_cache_size,
369 ulong *param_ptr_binlog_cache_use,
370 ulong *param_ptr_binlog_cache_disk_use)
371 {
372 /*
373 The assertions guarantee that the set_binlog_cache_info is
374 called just once and information passed as parameters are
375 never zero.
376
377 This is done while calling the constructor binlog_cache_mngr.
378 We cannot set information in the constructor binlog_cache_data
379 because the space for binlog_cache_mngr is allocated through
380 a placement new.
381
382 In the future, we can refactor this and change it to avoid
383 the set_binlog_info.
384 */
385 DBUG_ASSERT(saved_max_binlog_cache_size == 0);
386 DBUG_ASSERT(param_max_binlog_cache_size != 0);
387 DBUG_ASSERT(ptr_binlog_cache_use == 0);
388 DBUG_ASSERT(param_ptr_binlog_cache_use != 0);
389 DBUG_ASSERT(ptr_binlog_cache_disk_use == 0);
390 DBUG_ASSERT(param_ptr_binlog_cache_disk_use != 0);
391
392 saved_max_binlog_cache_size= param_max_binlog_cache_size;
393 ptr_binlog_cache_use= param_ptr_binlog_cache_use;
394 ptr_binlog_cache_disk_use= param_ptr_binlog_cache_disk_use;
395 cache_log.end_of_file= saved_max_binlog_cache_size;
396 }
397
add_status(enum_logged_status status_arg)398 void add_status(enum_logged_status status_arg)
399 {
400 status|= status_arg;
401 }
402
403 /*
404 Cache to store data before copying it to the binary log.
405 */
406 IO_CACHE cache_log;
407
408 private:
409 /*
410 Pending binrows event. This event is the event where the rows are currently
411 written.
412 */
413 Rows_log_event *m_pending;
414
415 /*
416 Bit flags for what has been writting to cache. Used to
417 discard logs without any data changes.
418 see enum_logged_status;
419 */
420 uint32 status;
421
422 /*
423 Binlog position before the start of the current statement.
424 */
425 my_off_t before_stmt_pos;
426
427 /*
428 This indicates that some events did not get into the cache and most likely
429 it is corrupted.
430 */
431 bool incident;
432
433 /**
434 This function computes binlog cache and disk usage.
435 */
compute_statistics()436 void compute_statistics()
437 {
438 statistic_increment(*ptr_binlog_cache_use, &LOCK_status);
439 if (cache_log.disk_writes != 0)
440 {
441 #ifdef REAL_STATISTICS
442 statistic_add(*ptr_binlog_cache_disk_use,
443 cache_log.disk_writes, &LOCK_status);
444 #else
445 statistic_increment(*ptr_binlog_cache_disk_use, &LOCK_status);
446 #endif
447 cache_log.disk_writes= 0;
448 }
449 }
450
451 /*
452 Stores the values of maximum size of the cache allowed when this cache
453 is configured. This corresponds to either
454 . max_binlog_cache_size or max_binlog_stmt_cache_size.
455 */
456 my_off_t saved_max_binlog_cache_size;
457
458 /*
459 Stores a pointer to the status variable that keeps track of the in-memory
460 cache usage. This corresponds to either
461 . binlog_cache_use or binlog_stmt_cache_use.
462 */
463 ulong *ptr_binlog_cache_use;
464
465 /*
466 Stores a pointer to the status variable that keeps track of the disk
467 cache usage. This corresponds to either
468 . binlog_cache_disk_use or binlog_stmt_cache_disk_use.
469 */
470 ulong *ptr_binlog_cache_disk_use;
471
472 /*
473 It truncates the cache to a certain position. This includes deleting the
474 pending event.
475 */
truncate(my_off_t pos,bool reset_cache=0)476 void truncate(my_off_t pos, bool reset_cache=0)
477 {
478 DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos));
479 cache_log.error=0;
480 if (pending())
481 {
482 delete pending();
483 set_pending(0);
484 }
485 reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, reset_cache);
486 cache_log.end_of_file= saved_max_binlog_cache_size;
487 }
488
489 binlog_cache_data& operator=(const binlog_cache_data& info);
490 binlog_cache_data(const binlog_cache_data& info);
491 };
492
493
add_status(enum_logged_status status)494 void Log_event_writer::add_status(enum_logged_status status)
495 {
496 if (likely(cache_data))
497 cache_data->add_status(status);
498 }
499
set_incident()500 void Log_event_writer::set_incident()
501 {
502 cache_data->set_incident();
503 }
504
505
506 class binlog_cache_mngr {
507 public:
binlog_cache_mngr(my_off_t param_max_binlog_stmt_cache_size,my_off_t param_max_binlog_cache_size,ulong * param_ptr_binlog_stmt_cache_use,ulong * param_ptr_binlog_stmt_cache_disk_use,ulong * param_ptr_binlog_cache_use,ulong * param_ptr_binlog_cache_disk_use)508 binlog_cache_mngr(my_off_t param_max_binlog_stmt_cache_size,
509 my_off_t param_max_binlog_cache_size,
510 ulong *param_ptr_binlog_stmt_cache_use,
511 ulong *param_ptr_binlog_stmt_cache_disk_use,
512 ulong *param_ptr_binlog_cache_use,
513 ulong *param_ptr_binlog_cache_disk_use)
514 : last_commit_pos_offset(0), using_xa(FALSE), xa_xid(0)
515 {
516 stmt_cache.set_binlog_cache_info(param_max_binlog_stmt_cache_size,
517 param_ptr_binlog_stmt_cache_use,
518 param_ptr_binlog_stmt_cache_disk_use);
519 trx_cache.set_binlog_cache_info(param_max_binlog_cache_size,
520 param_ptr_binlog_cache_use,
521 param_ptr_binlog_cache_disk_use);
522 last_commit_pos_file[0]= 0;
523 }
524
reset(bool do_stmt,bool do_trx)525 void reset(bool do_stmt, bool do_trx)
526 {
527 if (do_stmt)
528 stmt_cache.reset();
529 if (do_trx)
530 {
531 trx_cache.reset();
532 using_xa= FALSE;
533 last_commit_pos_file[0]= 0;
534 last_commit_pos_offset= 0;
535 }
536 }
537
get_binlog_cache_data(bool is_transactional)538 binlog_cache_data* get_binlog_cache_data(bool is_transactional)
539 {
540 return (is_transactional ? &trx_cache : &stmt_cache);
541 }
542
get_binlog_cache_log(bool is_transactional)543 IO_CACHE* get_binlog_cache_log(bool is_transactional)
544 {
545 return (is_transactional ? &trx_cache.cache_log : &stmt_cache.cache_log);
546 }
547
548 binlog_cache_data stmt_cache;
549
550 binlog_cache_data trx_cache;
551
552 /*
553 Binlog position for current transaction.
554 For START TRANSACTION WITH CONSISTENT SNAPSHOT, this is the binlog
555 position corresponding to the snapshot taken. During (and after) commit,
556 this is set to the binlog position corresponding to just after the
557 commit (so storage engines can store it in their transaction log).
558 */
559 char last_commit_pos_file[FN_REFLEN];
560 my_off_t last_commit_pos_offset;
561
562 /*
563 Flag set true if this transaction is committed with log_xid() as part of
564 XA, false if not.
565 */
566 bool using_xa;
567 my_xid xa_xid;
568 bool need_unlog;
569 /*
570 Id of binlog that transaction was written to; only needed if need_unlog is
571 true.
572 */
573 ulong binlog_id;
574 /* Set if we get an error during commit that must be returned from unlog(). */
575 bool delayed_error;
576
577 private:
578
579 binlog_cache_mngr& operator=(const binlog_cache_mngr& info);
580 binlog_cache_mngr(const binlog_cache_mngr& info);
581 };
582
is_log_table_enabled(uint log_table_type)583 bool LOGGER::is_log_table_enabled(uint log_table_type)
584 {
585 switch (log_table_type) {
586 case QUERY_LOG_SLOW:
587 return (table_log_handler != NULL) && global_system_variables.sql_log_slow
588 && (log_output_options & LOG_TABLE);
589 case QUERY_LOG_GENERAL:
590 return (table_log_handler != NULL) && opt_log
591 && (log_output_options & LOG_TABLE);
592 default:
593 DBUG_ASSERT(0);
594 return FALSE; /* make compiler happy */
595 }
596 }
597
598 /**
599 Check if a given table is opened log table
600
601 @param table Table to check
602 @param check_if_opened Only fail if it's a log table in use
603 @param error_msg String to put in error message if not ok.
604 No error message if 0
605 @return 0 ok
606 @return # Type of log file
607 */
608
check_if_log_table(const TABLE_LIST * table,bool check_if_opened,const char * error_msg)609 int check_if_log_table(const TABLE_LIST *table,
610 bool check_if_opened,
611 const char *error_msg)
612 {
613 int result= 0;
614 if (table->db.length == 5 &&
615 !my_strcasecmp(table_alias_charset, table->db.str, "mysql"))
616 {
617 const char *table_name= table->table_name.str;
618
619 if (table->table_name.length == 11 &&
620 !my_strcasecmp(table_alias_charset, table_name, "general_log"))
621 {
622 result= QUERY_LOG_GENERAL;
623 goto end;
624 }
625
626 if (table->table_name.length == 8 &&
627 !my_strcasecmp(table_alias_charset, table_name, "slow_log"))
628 {
629 result= QUERY_LOG_SLOW;
630 goto end;
631 }
632 }
633 return 0;
634
635 end:
636 if (!check_if_opened || logger.is_log_table_enabled(result))
637 {
638 if (error_msg)
639 my_error(ER_BAD_LOG_STATEMENT, MYF(0), error_msg);
640 return result;
641 }
642 return 0;
643 }
644
645
Log_to_csv_event_handler()646 Log_to_csv_event_handler::Log_to_csv_event_handler()
647 {
648 }
649
650
~Log_to_csv_event_handler()651 Log_to_csv_event_handler::~Log_to_csv_event_handler()
652 {
653 }
654
655
cleanup()656 void Log_to_csv_event_handler::cleanup()
657 {
658 logger.is_log_tables_initialized= FALSE;
659 }
660
661 /* log event handlers */
662
663 /**
664 Log command to the general log table
665
666 Log given command to the general log table.
667
668 @param event_time command start timestamp
669 @param user_host the pointer to the string with user@host info
670 @param user_host_len length of the user_host string. this is computed
671 once and passed to all general log event handlers
672 @param thread_id Id of the thread, issued a query
673 @param command_type the type of the command being logged
674 @param command_type_len the length of the string above
675 @param sql_text the very text of the query being executed
676 @param sql_text_len the length of sql_text string
677
678
679 @return This function attempts to never call my_error(). This is
680 necessary, because general logging happens already after a statement
681 status has been sent to the client, so the client can not see the
682 error anyway. Besides, the error is not related to the statement
683 being executed and is internal, and thus should be handled
684 internally (@todo: how?).
685 If a write to the table has failed, the function attempts to
686 write to a short error message to the file. The failure is also
687 indicated in the return value.
688
689 @retval FALSE OK
690 @retval TRUE error occurred
691 */
692
693 bool Log_to_csv_event_handler::
log_general(THD * thd,my_hrtime_t event_time,const char * user_host,size_t user_host_len,my_thread_id thread_id_arg,const char * command_type,size_t command_type_len,const char * sql_text,size_t sql_text_len,CHARSET_INFO * client_cs)694 log_general(THD *thd, my_hrtime_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg,
695 const char *command_type, size_t command_type_len,
696 const char *sql_text, size_t sql_text_len,
697 CHARSET_INFO *client_cs)
698 {
699 TABLE_LIST table_list;
700 TABLE *table;
701 bool result= TRUE;
702 bool need_close= FALSE;
703 bool need_pop= FALSE;
704 bool need_rnd_end= FALSE;
705 uint field_index;
706 Silence_log_table_errors error_handler;
707 Open_tables_backup open_tables_backup;
708 bool save_time_zone_used;
709 DBUG_ENTER("log_general");
710
711 /*
712 CSV uses TIME_to_timestamp() internally if table needs to be repaired
713 which will set thd->time_zone_used
714 */
715 save_time_zone_used= thd->time_zone_used;
716
717 table_list.init_one_table(&MYSQL_SCHEMA_NAME, &GENERAL_LOG_NAME, 0,
718 TL_WRITE_CONCURRENT_INSERT);
719
720 /*
721 1) open_log_table generates an error of the
722 table can not be opened or is corrupted.
723 2) "INSERT INTO general_log" can generate warning sometimes.
724
725 Suppress these warnings and errors, they can't be dealt with
726 properly anyway.
727
728 QQ: this problem needs to be studied in more detail.
729 Comment this 2 lines and run "cast.test" to see what's happening.
730 */
731 thd->push_internal_handler(& error_handler);
732 need_pop= TRUE;
733
734 if (!(table= open_log_table(thd, &table_list, &open_tables_backup)))
735 goto err;
736
737 need_close= TRUE;
738
739 if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) ||
740 table->file->ha_rnd_init_with_error(0))
741 goto err;
742
743 need_rnd_end= TRUE;
744
745 /* Honor next number columns if present */
746 table->next_number_field= table->found_next_number_field;
747
748 /*
749 NOTE: we do not call restore_record() here, as all fields are
750 filled by the Logger (=> no need to load default ones).
751 */
752
753 /*
754 We do not set a value for table->field[0], as it will use
755 default value (which is CURRENT_TIMESTAMP).
756 */
757
758 /* check that all columns exist */
759 if (table->s->fields < 6)
760 goto err;
761
762 DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP);
763
764 table->field[0]->store_timestamp(
765 hrtime_to_my_time(event_time), hrtime_sec_part(event_time));
766
767 /* do a write */
768 if (table->field[1]->store(user_host, user_host_len, client_cs) ||
769 table->field[2]->store((longlong) thread_id_arg, TRUE) ||
770 table->field[3]->store((longlong) global_system_variables.server_id,
771 TRUE) ||
772 table->field[4]->store(command_type, command_type_len, client_cs))
773 goto err;
774
775 /*
776 A positive return value in store() means truncation.
777 Still logging a message in the log in this case.
778 */
779 table->field[5]->flags|= FIELDFLAG_HEX_ESCAPE;
780 if (table->field[5]->store(sql_text, sql_text_len, client_cs) < 0)
781 goto err;
782
783 /* mark all fields as not null */
784 table->field[1]->set_notnull();
785 table->field[2]->set_notnull();
786 table->field[3]->set_notnull();
787 table->field[4]->set_notnull();
788 table->field[5]->set_notnull();
789
790 /* Set any extra columns to their default values */
791 for (field_index= 6 ; field_index < table->s->fields ; field_index++)
792 {
793 table->field[field_index]->set_default();
794 }
795
796 if (table->file->ha_write_row(table->record[0]))
797 goto err;
798
799 result= FALSE;
800
801 err:
802 if (result && !thd->killed)
803 sql_print_error("Failed to write to mysql.general_log: %s",
804 error_handler.message());
805
806 if (need_rnd_end)
807 {
808 table->file->ha_rnd_end();
809 table->file->ha_release_auto_increment();
810 }
811 if (need_pop)
812 thd->pop_internal_handler();
813 if (need_close)
814 close_log_table(thd, &open_tables_backup);
815
816 thd->time_zone_used= save_time_zone_used;
817 DBUG_RETURN(result);
818 }
819
820
821 /*
822 Log a query to the slow log table
823
824 SYNOPSIS
825 log_slow()
826 thd THD of the query
827 current_time current timestamp
828 user_host the pointer to the string with user@host info
829 user_host_len length of the user_host string. this is computed once
830 and passed to all general log event handlers
831 query_time Amount of time the query took to execute (in microseconds)
832 lock_time Amount of time the query was locked (in microseconds)
833 is_command The flag, which determines, whether the sql_text is a
834 query or an administrator command (these are treated
835 differently by the old logging routines)
836 sql_text the very text of the query or administrator command
837 processed
838 sql_text_len the length of sql_text string
839
840 DESCRIPTION
841
842 Log a query to the slow log table
843
844 RETURN
845 FALSE - OK
846 TRUE - error occurred
847 */
848
849 bool Log_to_csv_event_handler::
log_slow(THD * thd,my_hrtime_t current_time,const char * user_host,size_t user_host_len,ulonglong query_utime,ulonglong lock_utime,bool is_command,const char * sql_text,size_t sql_text_len)850 log_slow(THD *thd, my_hrtime_t current_time,
851 const char *user_host, size_t user_host_len,
852 ulonglong query_utime, ulonglong lock_utime, bool is_command,
853 const char *sql_text, size_t sql_text_len)
854 {
855 TABLE_LIST table_list;
856 TABLE *table;
857 bool result= TRUE;
858 bool need_close= FALSE;
859 bool need_rnd_end= FALSE;
860 Silence_log_table_errors error_handler;
861 Open_tables_backup open_tables_backup;
862 CHARSET_INFO *client_cs= thd->variables.character_set_client;
863 bool save_time_zone_used;
864 ulong query_time= (ulong) MY_MIN(query_utime/1000000, TIME_MAX_VALUE_SECONDS);
865 ulong lock_time= (ulong) MY_MIN(lock_utime/1000000, TIME_MAX_VALUE_SECONDS);
866 ulong query_time_micro= (ulong) (query_utime % 1000000);
867 ulong lock_time_micro= (ulong) (lock_utime % 1000000);
868 DBUG_ENTER("Log_to_csv_event_handler::log_slow");
869
870 thd->push_internal_handler(& error_handler);
871 /*
872 CSV uses TIME_to_timestamp() internally if table needs to be repaired
873 which will set thd->time_zone_used
874 */
875 save_time_zone_used= thd->time_zone_used;
876
877 table_list.init_one_table(&MYSQL_SCHEMA_NAME, &SLOW_LOG_NAME, 0,
878 TL_WRITE_CONCURRENT_INSERT);
879
880 if (!(table= open_log_table(thd, &table_list, &open_tables_backup)))
881 goto err;
882
883 need_close= TRUE;
884
885 if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) ||
886 table->file->ha_rnd_init_with_error(0))
887 goto err;
888
889 need_rnd_end= TRUE;
890
891 /* Honor next number columns if present */
892 table->next_number_field= table->found_next_number_field;
893
894 restore_record(table, s->default_values); // Get empty record
895
896 /* check that all columns exist */
897 if (table->s->fields < 13)
898 goto err;
899
900 /* store the time and user values */
901 DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP);
902 table->field[0]->store_timestamp(
903 hrtime_to_my_time(current_time), hrtime_sec_part(current_time));
904 if (table->field[1]->store(user_host, user_host_len, client_cs))
905 goto err;
906
907 /*
908 A TIME field can not hold the full longlong range; query_time or
909 lock_time may be truncated without warning here, if greater than
910 839 hours (~35 days)
911 */
912 MYSQL_TIME t;
913 t.neg= 0;
914
915 /* fill in query_time field */
916 calc_time_from_sec(&t, query_time, query_time_micro);
917 if (table->field[2]->store_time(&t))
918 goto err;
919 /* lock_time */
920 calc_time_from_sec(&t, lock_time, lock_time_micro);
921 if (table->field[3]->store_time(&t))
922 goto err;
923 /* rows_sent */
924 if (table->field[4]->store((longlong) thd->get_sent_row_count(), TRUE))
925 goto err;
926 /* rows_examined */
927 if (table->field[5]->store((longlong) thd->get_examined_row_count(), TRUE))
928 goto err;
929
930 /* fill database field */
931 if (thd->db.str)
932 {
933 if (table->field[6]->store(thd->db.str, thd->db.length, client_cs))
934 goto err;
935 table->field[6]->set_notnull();
936 }
937
938 if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
939 {
940 if (table->
941 field[7]->store((longlong)
942 thd->first_successful_insert_id_in_prev_stmt_for_binlog,
943 TRUE))
944 goto err;
945 table->field[7]->set_notnull();
946 }
947
948 /*
949 Set value if we do an insert on autoincrement column. Note that for
950 some engines (those for which get_auto_increment() does not leave a
951 table lock until the statement ends), this is just the first value and
952 the next ones used may not be contiguous to it.
953 */
954 if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
955 {
956 if (table->
957 field[8]->store((longlong)
958 thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(), TRUE))
959 goto err;
960 table->field[8]->set_notnull();
961 }
962
963 if (table->field[9]->store((longlong)global_system_variables.server_id, TRUE))
964 goto err;
965 table->field[9]->set_notnull();
966
967 /*
968 Column sql_text.
969 A positive return value in store() means truncation.
970 Still logging a message in the log in this case.
971 */
972 if (table->field[10]->store(sql_text, sql_text_len, client_cs) < 0)
973 goto err;
974
975 if (table->field[11]->store((longlong) thd->thread_id, TRUE))
976 goto err;
977
978 /* Rows_affected */
979 if (table->field[12]->store(thd->get_stmt_da()->is_ok() ?
980 (longlong) thd->get_stmt_da()->affected_rows() :
981 0, TRUE))
982 goto err;
983
984 if (table->file->ha_write_row(table->record[0]))
985 goto err;
986
987 result= FALSE;
988
989 err:
990 thd->pop_internal_handler();
991
992 if (result && !thd->killed)
993 sql_print_error("Failed to write to mysql.slow_log: %s",
994 error_handler.message());
995
996 if (need_rnd_end)
997 {
998 table->file->ha_rnd_end();
999 table->file->ha_release_auto_increment();
1000 }
1001 if (need_close)
1002 close_log_table(thd, &open_tables_backup);
1003 thd->time_zone_used= save_time_zone_used;
1004 DBUG_RETURN(result);
1005 }
1006
1007 int Log_to_csv_event_handler::
activate_log(THD * thd,uint log_table_type)1008 activate_log(THD *thd, uint log_table_type)
1009 {
1010 TABLE_LIST table_list;
1011 TABLE *table;
1012 LEX_CSTRING *UNINIT_VAR(log_name);
1013 int result;
1014 Open_tables_backup open_tables_backup;
1015
1016 DBUG_ENTER("Log_to_csv_event_handler::activate_log");
1017
1018 if (log_table_type == QUERY_LOG_GENERAL)
1019 {
1020 log_name= &GENERAL_LOG_NAME;
1021 }
1022 else
1023 {
1024 DBUG_ASSERT(log_table_type == QUERY_LOG_SLOW);
1025
1026 log_name= &SLOW_LOG_NAME;
1027 }
1028 table_list.init_one_table(&MYSQL_SCHEMA_NAME, log_name, 0, TL_WRITE_CONCURRENT_INSERT);
1029
1030 table= open_log_table(thd, &table_list, &open_tables_backup);
1031 if (table)
1032 {
1033 result= 0;
1034 close_log_table(thd, &open_tables_backup);
1035 }
1036 else
1037 result= 1;
1038
1039 DBUG_RETURN(result);
1040 }
1041
1042 bool Log_to_csv_event_handler::
log_error(enum loglevel level,const char * format,va_list args)1043 log_error(enum loglevel level, const char *format, va_list args)
1044 {
1045 /* No log table is implemented */
1046 DBUG_ASSERT(0);
1047 return FALSE;
1048 }
1049
1050 bool Log_to_file_event_handler::
log_error(enum loglevel level,const char * format,va_list args)1051 log_error(enum loglevel level, const char *format,
1052 va_list args)
1053 {
1054 return vprint_msg_to_log(level, format, args);
1055 }
1056
init_pthread_objects()1057 void Log_to_file_event_handler::init_pthread_objects()
1058 {
1059 mysql_log.init_pthread_objects();
1060 mysql_slow_log.init_pthread_objects();
1061 }
1062
1063
1064 /** Wrapper around MYSQL_LOG::write() for slow log. */
1065
1066 bool Log_to_file_event_handler::
log_slow(THD * thd,my_hrtime_t current_time,const char * user_host,size_t user_host_len,ulonglong query_utime,ulonglong lock_utime,bool is_command,const char * sql_text,size_t sql_text_len)1067 log_slow(THD *thd, my_hrtime_t current_time,
1068 const char *user_host, size_t user_host_len,
1069 ulonglong query_utime, ulonglong lock_utime, bool is_command,
1070 const char *sql_text, size_t sql_text_len)
1071 {
1072 Silence_log_table_errors error_handler;
1073 thd->push_internal_handler(&error_handler);
1074 bool retval= mysql_slow_log.write(thd, hrtime_to_my_time(current_time),
1075 user_host, user_host_len,
1076 query_utime, lock_utime, is_command,
1077 sql_text, sql_text_len);
1078 thd->pop_internal_handler();
1079 return retval;
1080 }
1081
1082
1083 /**
1084 Wrapper around MYSQL_LOG::write() for general log. We need it since we
1085 want all log event handlers to have the same signature.
1086 */
1087
1088 bool Log_to_file_event_handler::
log_general(THD * thd,my_hrtime_t event_time,const char * user_host,size_t user_host_len,my_thread_id thread_id_arg,const char * command_type,size_t command_type_len,const char * sql_text,size_t sql_text_len,CHARSET_INFO * client_cs)1089 log_general(THD *thd, my_hrtime_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg,
1090 const char *command_type, size_t command_type_len,
1091 const char *sql_text, size_t sql_text_len,
1092 CHARSET_INFO *client_cs)
1093 {
1094 Silence_log_table_errors error_handler;
1095 thd->push_internal_handler(&error_handler);
1096 bool retval= mysql_log.write(hrtime_to_time(event_time), user_host,
1097 user_host_len,
1098 thread_id_arg, command_type, command_type_len,
1099 sql_text, sql_text_len);
1100 thd->pop_internal_handler();
1101 return retval;
1102 }
1103
1104
init()1105 bool Log_to_file_event_handler::init()
1106 {
1107 if (!is_initialized)
1108 {
1109 if (global_system_variables.sql_log_slow)
1110 mysql_slow_log.open_slow_log(opt_slow_logname);
1111
1112 if (opt_log)
1113 mysql_log.open_query_log(opt_logname);
1114
1115 is_initialized= TRUE;
1116 }
1117
1118 return FALSE;
1119 }
1120
1121
cleanup()1122 void Log_to_file_event_handler::cleanup()
1123 {
1124 mysql_log.cleanup();
1125 mysql_slow_log.cleanup();
1126 }
1127
flush()1128 void Log_to_file_event_handler::flush()
1129 {
1130 /* reopen log files */
1131 if (opt_log)
1132 mysql_log.reopen_file();
1133 if (global_system_variables.sql_log_slow)
1134 mysql_slow_log.reopen_file();
1135 }
1136
1137 /*
1138 Log error with all enabled log event handlers
1139
1140 SYNOPSIS
1141 error_log_print()
1142
1143 level The level of the error significance: NOTE,
1144 WARNING or ERROR.
1145 format format string for the error message
1146 args list of arguments for the format string
1147
1148 RETURN
1149 FALSE - OK
1150 TRUE - error occurred
1151 */
1152
error_log_print(enum loglevel level,const char * format,va_list args)1153 bool LOGGER::error_log_print(enum loglevel level, const char *format,
1154 va_list args)
1155 {
1156 bool error= FALSE;
1157 Log_event_handler **current_handler;
1158 THD *thd= current_thd;
1159
1160 if (likely(thd))
1161 thd->error_printed_to_log= 1;
1162
1163 /* currently we don't need locking here as there is no error_log table */
1164 for (current_handler= error_log_handler_list ; *current_handler ;)
1165 error= (*current_handler++)->log_error(level, format, args) || error;
1166
1167 return error;
1168 }
1169
1170
cleanup_base()1171 void LOGGER::cleanup_base()
1172 {
1173 DBUG_ASSERT(inited == 1);
1174 mysql_rwlock_destroy(&LOCK_logger);
1175 if (table_log_handler)
1176 {
1177 table_log_handler->cleanup();
1178 delete table_log_handler;
1179 table_log_handler= NULL;
1180 }
1181 if (file_log_handler)
1182 file_log_handler->cleanup();
1183 }
1184
1185
cleanup_end()1186 void LOGGER::cleanup_end()
1187 {
1188 DBUG_ASSERT(inited == 1);
1189 if (file_log_handler)
1190 {
1191 delete file_log_handler;
1192 file_log_handler=NULL;
1193 }
1194 inited= 0;
1195 }
1196
1197
1198 /**
1199 Perform basic log initialization: create file-based log handler and
1200 init error log.
1201 */
init_base()1202 void LOGGER::init_base()
1203 {
1204 DBUG_ASSERT(inited == 0);
1205 inited= 1;
1206
1207 /*
1208 Here we create file log handler. We don't do it for the table log handler
1209 here as it cannot be created so early. The reason is THD initialization,
1210 which depends on the system variables (parsed later).
1211 */
1212 if (!file_log_handler)
1213 file_log_handler= new Log_to_file_event_handler;
1214
1215 /* by default we use traditional error log */
1216 init_error_log(LOG_FILE);
1217
1218 file_log_handler->init_pthread_objects();
1219 mysql_rwlock_init(key_rwlock_LOCK_logger, &LOCK_logger);
1220 }
1221
1222
init_log_tables()1223 void LOGGER::init_log_tables()
1224 {
1225 if (!table_log_handler)
1226 table_log_handler= new Log_to_csv_event_handler;
1227
1228 if (!is_log_tables_initialized &&
1229 !table_log_handler->init() && !file_log_handler->init())
1230 is_log_tables_initialized= TRUE;
1231 }
1232
1233
1234 /**
1235 Close and reopen the slow log (with locks).
1236
1237 @returns FALSE.
1238 */
flush_slow_log()1239 bool LOGGER::flush_slow_log()
1240 {
1241 /*
1242 Now we lock logger, as nobody should be able to use logging routines while
1243 log tables are closed
1244 */
1245 logger.lock_exclusive();
1246
1247 /* Reopen slow log file */
1248 if (global_system_variables.sql_log_slow)
1249 file_log_handler->get_mysql_slow_log()->reopen_file();
1250
1251 /* End of log flush */
1252 logger.unlock();
1253
1254 return 0;
1255 }
1256
1257
1258 /**
1259 Close and reopen the general log (with locks).
1260
1261 @returns FALSE.
1262 */
flush_general_log()1263 bool LOGGER::flush_general_log()
1264 {
1265 /*
1266 Now we lock logger, as nobody should be able to use logging routines while
1267 log tables are closed
1268 */
1269 logger.lock_exclusive();
1270
1271 /* Reopen general log file */
1272 if (opt_log)
1273 file_log_handler->get_mysql_log()->reopen_file();
1274
1275 /* End of log flush */
1276 logger.unlock();
1277
1278 return 0;
1279 }
1280
1281
1282 /*
1283 Log slow query with all enabled log event handlers
1284
1285 SYNOPSIS
1286 slow_log_print()
1287
1288 thd THD of the query being logged
1289 query The query being logged
1290 query_length The length of the query string
1291 current_utime Current time in microseconds (from undefined start)
1292
1293 RETURN
1294 FALSE OK
1295 TRUE error occurred
1296 */
1297
slow_log_print(THD * thd,const char * query,size_t query_length,ulonglong current_utime)1298 bool LOGGER::slow_log_print(THD *thd, const char *query, size_t query_length,
1299 ulonglong current_utime)
1300
1301 {
1302 bool error= FALSE;
1303 Log_event_handler **current_handler;
1304 bool is_command= FALSE;
1305 char user_host_buff[MAX_USER_HOST_SIZE + 1];
1306 Security_context *sctx= thd->security_ctx;
1307 uint user_host_len= 0;
1308 ulonglong query_utime, lock_utime;
1309
1310 DBUG_ASSERT(thd->enable_slow_log);
1311 /*
1312 Print the message to the buffer if we have slow log enabled
1313 */
1314
1315 if (*slow_log_handler_list)
1316 {
1317 /* do not log slow queries from replication threads */
1318 if (!thd->variables.sql_log_slow)
1319 return 0;
1320
1321 lock_shared();
1322 if (!global_system_variables.sql_log_slow)
1323 {
1324 unlock();
1325 return 0;
1326 }
1327
1328 /* fill in user_host value: the format is "%s[%s] @ %s [%s]" */
1329 user_host_len= (uint)(strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
1330 sctx->priv_user, "[",
1331 sctx->user ? sctx->user : (thd->slave_thread ? "SQL_SLAVE" : ""), "] @ ",
1332 sctx->host ? sctx->host : "", " [",
1333 sctx->ip ? sctx->ip : "", "]", NullS) -
1334 user_host_buff);
1335
1336 DBUG_ASSERT(thd->start_utime);
1337 DBUG_ASSERT(thd->start_time);
1338 query_utime= (current_utime - thd->start_utime);
1339 lock_utime= (thd->utime_after_lock - thd->start_utime);
1340 my_hrtime_t current_time= { hrtime_from_time(thd->start_time) +
1341 thd->start_time_sec_part + query_utime };
1342
1343 if (!query || thd->get_command() == COM_STMT_PREPARE)
1344 {
1345 is_command= TRUE;
1346 query= command_name[thd->get_command()].str;
1347 query_length= (uint)command_name[thd->get_command()].length;
1348 }
1349
1350 for (current_handler= slow_log_handler_list; *current_handler ;)
1351 error= (*current_handler++)->log_slow(thd, current_time,
1352 user_host_buff, user_host_len,
1353 query_utime, lock_utime, is_command,
1354 query, query_length) || error;
1355
1356 unlock();
1357 }
1358 return error;
1359 }
1360
general_log_write(THD * thd,enum enum_server_command command,const char * query,size_t query_length)1361 bool LOGGER::general_log_write(THD *thd, enum enum_server_command command,
1362 const char *query, size_t query_length)
1363 {
1364 bool error= FALSE;
1365 Log_event_handler **current_handler= general_log_handler_list;
1366 char user_host_buff[MAX_USER_HOST_SIZE + 1];
1367 uint user_host_len= 0;
1368 my_hrtime_t current_time;
1369
1370 DBUG_ASSERT(thd);
1371
1372 user_host_len= make_user_name(thd, user_host_buff);
1373
1374 current_time= my_hrtime();
1375
1376 mysql_audit_general_log(thd, hrtime_to_time(current_time),
1377 user_host_buff, user_host_len,
1378 command_name[(uint) command].str,
1379 (uint)command_name[(uint) command].length,
1380 query, (uint)query_length);
1381
1382 if (opt_log && log_command(thd, command))
1383 {
1384 lock_shared();
1385 while (*current_handler)
1386 error|= (*current_handler++)->
1387 log_general(thd, current_time, user_host_buff,
1388 user_host_len, thd->thread_id,
1389 command_name[(uint) command].str,
1390 command_name[(uint) command].length,
1391 query, query_length,
1392 thd->variables.character_set_client) || error;
1393 unlock();
1394 }
1395
1396 return error;
1397 }
1398
general_log_print(THD * thd,enum enum_server_command command,const char * format,va_list args)1399 bool LOGGER::general_log_print(THD *thd, enum enum_server_command command,
1400 const char *format, va_list args)
1401 {
1402 size_t message_buff_len= 0;
1403 char message_buff[MAX_LOG_BUFFER_SIZE];
1404
1405 /* prepare message */
1406 if (format)
1407 message_buff_len= my_vsnprintf(message_buff, sizeof(message_buff),
1408 format, args);
1409 else
1410 message_buff[0]= '\0';
1411
1412 return general_log_write(thd, command, message_buff, message_buff_len);
1413 }
1414
init_error_log(ulonglong error_log_printer)1415 void LOGGER::init_error_log(ulonglong error_log_printer)
1416 {
1417 if (error_log_printer & LOG_NONE)
1418 {
1419 error_log_handler_list[0]= 0;
1420 return;
1421 }
1422
1423 switch (error_log_printer) {
1424 case LOG_FILE:
1425 error_log_handler_list[0]= file_log_handler;
1426 error_log_handler_list[1]= 0;
1427 break;
1428 /* these two are disabled for now */
1429 case LOG_TABLE:
1430 DBUG_ASSERT(0);
1431 break;
1432 case LOG_TABLE|LOG_FILE:
1433 DBUG_ASSERT(0);
1434 break;
1435 }
1436 }
1437
init_slow_log(ulonglong slow_log_printer)1438 void LOGGER::init_slow_log(ulonglong slow_log_printer)
1439 {
1440 if (slow_log_printer & LOG_NONE)
1441 {
1442 slow_log_handler_list[0]= 0;
1443 return;
1444 }
1445
1446 switch (slow_log_printer) {
1447 case LOG_FILE:
1448 slow_log_handler_list[0]= file_log_handler;
1449 slow_log_handler_list[1]= 0;
1450 break;
1451 case LOG_TABLE:
1452 slow_log_handler_list[0]= table_log_handler;
1453 slow_log_handler_list[1]= 0;
1454 break;
1455 case LOG_TABLE|LOG_FILE:
1456 slow_log_handler_list[0]= file_log_handler;
1457 slow_log_handler_list[1]= table_log_handler;
1458 slow_log_handler_list[2]= 0;
1459 break;
1460 }
1461 }
1462
init_general_log(ulonglong general_log_printer)1463 void LOGGER::init_general_log(ulonglong general_log_printer)
1464 {
1465 if (general_log_printer & LOG_NONE)
1466 {
1467 general_log_handler_list[0]= 0;
1468 return;
1469 }
1470
1471 switch (general_log_printer) {
1472 case LOG_FILE:
1473 general_log_handler_list[0]= file_log_handler;
1474 general_log_handler_list[1]= 0;
1475 break;
1476 case LOG_TABLE:
1477 general_log_handler_list[0]= table_log_handler;
1478 general_log_handler_list[1]= 0;
1479 break;
1480 case LOG_TABLE|LOG_FILE:
1481 general_log_handler_list[0]= file_log_handler;
1482 general_log_handler_list[1]= table_log_handler;
1483 general_log_handler_list[2]= 0;
1484 break;
1485 }
1486 }
1487
1488
activate_log_handler(THD * thd,uint log_type)1489 bool LOGGER::activate_log_handler(THD* thd, uint log_type)
1490 {
1491 MYSQL_QUERY_LOG *file_log;
1492 bool res= FALSE;
1493 lock_exclusive();
1494 switch (log_type) {
1495 case QUERY_LOG_SLOW:
1496 if (!global_system_variables.sql_log_slow)
1497 {
1498 file_log= file_log_handler->get_mysql_slow_log();
1499
1500 file_log->open_slow_log(opt_slow_logname);
1501 if (table_log_handler->activate_log(thd, QUERY_LOG_SLOW))
1502 {
1503 /* Error printed by open table in activate_log() */
1504 res= TRUE;
1505 file_log->close(0);
1506 }
1507 else
1508 {
1509 init_slow_log(log_output_options);
1510 global_system_variables.sql_log_slow= TRUE;
1511 }
1512 }
1513 break;
1514 case QUERY_LOG_GENERAL:
1515 if (!opt_log)
1516 {
1517 file_log= file_log_handler->get_mysql_log();
1518
1519 file_log->open_query_log(opt_logname);
1520 if (table_log_handler->activate_log(thd, QUERY_LOG_GENERAL))
1521 {
1522 /* Error printed by open table in activate_log() */
1523 res= TRUE;
1524 file_log->close(0);
1525 }
1526 else
1527 {
1528 init_general_log(log_output_options);
1529 opt_log= TRUE;
1530 }
1531 }
1532 break;
1533 default:
1534 DBUG_ASSERT(0);
1535 }
1536 unlock();
1537 return res;
1538 }
1539
1540
deactivate_log_handler(THD * thd,uint log_type)1541 void LOGGER::deactivate_log_handler(THD *thd, uint log_type)
1542 {
1543 my_bool *tmp_opt= 0;
1544 MYSQL_LOG *UNINIT_VAR(file_log);
1545
1546 switch (log_type) {
1547 case QUERY_LOG_SLOW:
1548 tmp_opt= &global_system_variables.sql_log_slow;
1549 file_log= file_log_handler->get_mysql_slow_log();
1550 break;
1551 case QUERY_LOG_GENERAL:
1552 tmp_opt= &opt_log;
1553 file_log= file_log_handler->get_mysql_log();
1554 break;
1555 default:
1556 MY_ASSERT_UNREACHABLE();
1557 }
1558
1559 if (!(*tmp_opt))
1560 return;
1561
1562 lock_exclusive();
1563 file_log->close(0);
1564 *tmp_opt= FALSE;
1565 unlock();
1566 }
1567
1568
1569 /* the parameters are unused for the log tables */
init()1570 bool Log_to_csv_event_handler::init()
1571 {
1572 return 0;
1573 }
1574
set_handlers(ulonglong error_log_printer,ulonglong slow_log_printer,ulonglong general_log_printer)1575 int LOGGER::set_handlers(ulonglong error_log_printer,
1576 ulonglong slow_log_printer,
1577 ulonglong general_log_printer)
1578 {
1579 /* error log table is not supported yet */
1580 DBUG_ASSERT(error_log_printer < LOG_TABLE);
1581
1582 lock_exclusive();
1583
1584 if ((slow_log_printer & LOG_TABLE || general_log_printer & LOG_TABLE) &&
1585 !is_log_tables_initialized)
1586 {
1587 slow_log_printer= (slow_log_printer & ~LOG_TABLE) | LOG_FILE;
1588 general_log_printer= (general_log_printer & ~LOG_TABLE) | LOG_FILE;
1589
1590 sql_print_error("Failed to initialize log tables. "
1591 "Falling back to the old-fashioned logs");
1592 }
1593
1594 init_error_log(error_log_printer);
1595 init_slow_log(slow_log_printer);
1596 init_general_log(general_log_printer);
1597
1598 unlock();
1599
1600 return 0;
1601 }
1602
1603 /*
1604 Save position of binary log transaction cache.
1605
1606 SYNPOSIS
1607 binlog_trans_log_savepos()
1608
1609 thd The thread to take the binlog data from
1610 pos Pointer to variable where the position will be stored
1611
1612 DESCRIPTION
1613
1614 Save the current position in the binary log transaction cache into
1615 the variable pointed to by 'pos'
1616 */
1617
1618 static void
binlog_trans_log_savepos(THD * thd,my_off_t * pos)1619 binlog_trans_log_savepos(THD *thd, my_off_t *pos)
1620 {
1621 DBUG_ENTER("binlog_trans_log_savepos");
1622 DBUG_ASSERT(pos != NULL);
1623 binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
1624 DBUG_ASSERT((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open());
1625 *pos= cache_mngr->trx_cache.get_byte_position();
1626 DBUG_PRINT("return", ("*pos: %lu", (ulong) *pos));
1627 DBUG_VOID_RETURN;
1628 }
1629
1630
1631 /*
1632 Truncate the binary log transaction cache.
1633
1634 SYNPOSIS
1635 binlog_trans_log_truncate()
1636
1637 thd The thread to take the binlog data from
1638 pos Position to truncate to
1639
1640 DESCRIPTION
1641
1642 Truncate the binary log to the given position. Will not change
1643 anything else.
1644
1645 */
1646 static void
binlog_trans_log_truncate(THD * thd,my_off_t pos)1647 binlog_trans_log_truncate(THD *thd, my_off_t pos)
1648 {
1649 DBUG_ENTER("binlog_trans_log_truncate");
1650 DBUG_PRINT("enter", ("pos: %lu", (ulong) pos));
1651
1652 DBUG_ASSERT(thd_get_ha_data(thd, binlog_hton) != NULL);
1653 /* Only true if binlog_trans_log_savepos() wasn't called before */
1654 DBUG_ASSERT(pos != ~(my_off_t) 0);
1655
1656 binlog_cache_mngr *const cache_mngr=
1657 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
1658 cache_mngr->trx_cache.restore_savepoint(pos);
1659 DBUG_VOID_RETURN;
1660 }
1661
1662
1663 /*
1664 this function is mostly a placeholder.
1665 conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open)
1666 should be moved here.
1667 */
1668
binlog_init(void * p)1669 int binlog_init(void *p)
1670 {
1671 binlog_hton= (handlerton *)p;
1672 binlog_hton->savepoint_offset= sizeof(my_off_t);
1673 binlog_hton->close_connection= binlog_close_connection;
1674 binlog_hton->savepoint_set= binlog_savepoint_set;
1675 binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
1676 binlog_hton->savepoint_rollback_can_release_mdl=
1677 binlog_savepoint_rollback_can_release_mdl;
1678 binlog_hton->commit= binlog_commit;
1679 binlog_hton->rollback= binlog_rollback;
1680 binlog_hton->drop_table= [](handlerton *, const char*) { return -1; };
1681 if (WSREP_ON || opt_bin_log)
1682 {
1683 binlog_hton->prepare= binlog_prepare;
1684 binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot;
1685 binlog_hton->commit_by_xid= binlog_commit_by_xid;
1686 binlog_hton->rollback_by_xid= binlog_rollback_by_xid;
1687 // recover needs to be set to make xa{commit,rollback}_handlerton effective
1688 binlog_hton->recover= binlog_xa_recover_dummy;
1689 }
1690 binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN | HTON_NO_ROLLBACK;
1691 return 0;
1692 }
1693
1694 #ifdef WITH_WSREP
1695 #include "wsrep_binlog.h"
1696 #endif /* WITH_WSREP */
binlog_close_connection(handlerton * hton,THD * thd)1697 static int binlog_close_connection(handlerton *hton, THD *thd)
1698 {
1699 DBUG_ENTER("binlog_close_connection");
1700 binlog_cache_mngr *const cache_mngr=
1701 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
1702 #ifdef WITH_WSREP
1703 if (WSREP(thd) && cache_mngr && !cache_mngr->trx_cache.empty()) {
1704 IO_CACHE* cache= cache_mngr->get_binlog_cache_log(true);
1705 uchar *buf;
1706 size_t len=0;
1707 wsrep_write_cache_buf(cache, &buf, &len);
1708 WSREP_WARN("binlog trx cache not empty (%zu bytes) @ connection close %lld",
1709 len, (longlong) thd->thread_id);
1710 if (len > 0) wsrep_dump_rbr_buf(thd, buf, len);
1711
1712 cache = cache_mngr->get_binlog_cache_log(false);
1713 wsrep_write_cache_buf(cache, &buf, &len);
1714 WSREP_WARN("binlog stmt cache not empty (%zu bytes) @ connection close %lld",
1715 len, (longlong) thd->thread_id);
1716 if (len > 0) wsrep_dump_rbr_buf(thd, buf, len);
1717 }
1718 #endif /* WITH_WSREP */
1719 DBUG_ASSERT(cache_mngr->trx_cache.empty());
1720 DBUG_ASSERT(cache_mngr->stmt_cache.empty());
1721 cache_mngr->~binlog_cache_mngr();
1722 my_free(cache_mngr);
1723 DBUG_RETURN(0);
1724 }
1725
1726 /*
1727 This function flushes a cache upon commit/rollback.
1728
1729 SYNOPSIS
1730 binlog_flush_cache()
1731
1732 thd The thread whose transaction should be ended
1733 cache_mngr Pointer to the binlog_cache_mngr to use
1734 all True if the entire transaction should be ended, false if
1735 only the statement transaction should be ended.
1736 end_ev The end event to use (COMMIT, ROLLBACK, or commit XID)
1737 using_stmt True if the statement cache should be flushed
1738 using_trx True if the transaction cache should be flushed
1739
1740 DESCRIPTION
1741
1742 End the currently transaction or statement. The transaction can be either
1743 a real transaction or a statement transaction.
1744
1745 This can be to commit a transaction, with a COMMIT query event or an XA
1746 commit XID event. But it can also be to rollback a transaction with a
1747 ROLLBACK query event, used for rolling back transactions which also
1748 contain updates to non-transactional tables. Or it can be a flush of
1749 a statement cache.
1750 */
1751
1752 static int
binlog_flush_cache(THD * thd,binlog_cache_mngr * cache_mngr,Log_event * end_ev,bool all,bool using_stmt,bool using_trx)1753 binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr,
1754 Log_event *end_ev, bool all, bool using_stmt,
1755 bool using_trx)
1756 {
1757 int error= 0;
1758 DBUG_ENTER("binlog_flush_cache");
1759 DBUG_PRINT("enter", ("end_ev: %p", end_ev));
1760
1761 if ((using_stmt && !cache_mngr->stmt_cache.empty()) ||
1762 (using_trx && !cache_mngr->trx_cache.empty()) ||
1763 thd->transaction->xid_state.is_explicit_XA())
1764 {
1765 if (using_stmt && thd->binlog_flush_pending_rows_event(TRUE, FALSE))
1766 DBUG_RETURN(1);
1767 if (using_trx && thd->binlog_flush_pending_rows_event(TRUE, TRUE))
1768 DBUG_RETURN(1);
1769
1770 /*
1771 Doing a commit or a rollback including non-transactional tables,
1772 i.e., ending a transaction where we might write the transaction
1773 cache to the binary log.
1774
1775 We can always end the statement when ending a transaction since
1776 transactions are not allowed inside stored functions. If they
1777 were, we would have to ensure that we're not ending a statement
1778 inside a stored function.
1779 */
1780 error= mysql_bin_log.write_transaction_to_binlog(thd, cache_mngr,
1781 end_ev, all,
1782 using_stmt, using_trx);
1783 }
1784 else
1785 {
1786 /*
1787 This can happen in row-format binlog with something like
1788 BEGIN; INSERT INTO nontrans_table; INSERT IGNORE INTO trans_table;
1789 The nontrans_table is written directly into the binlog before commit,
1790 and if the trans_table is ignored there will be no rows to write when
1791 we get here.
1792
1793 So there is no work to do. Therefore, we will not increment any XID
1794 count, so we must not decrement any XID count in unlog().
1795 */
1796 cache_mngr->need_unlog= 0;
1797 }
1798 cache_mngr->reset(using_stmt, using_trx);
1799
1800 DBUG_ASSERT(!using_stmt || cache_mngr->stmt_cache.empty());
1801 DBUG_ASSERT(!using_trx || cache_mngr->trx_cache.empty());
1802 DBUG_RETURN(error);
1803 }
1804
1805
1806 /**
1807 This function flushes the stmt-cache upon commit.
1808
1809 @param thd The thread whose transaction should be flushed
1810 @param cache_mngr Pointer to the cache manager
1811
1812 @return
1813 nonzero if an error pops up when flushing the cache.
1814 */
1815 static inline int
binlog_commit_flush_stmt_cache(THD * thd,bool all,binlog_cache_mngr * cache_mngr)1816 binlog_commit_flush_stmt_cache(THD *thd, bool all,
1817 binlog_cache_mngr *cache_mngr)
1818 {
1819 DBUG_ENTER("binlog_commit_flush_stmt_cache");
1820 #ifdef WITH_WSREP
1821 if (thd->wsrep_mysql_replicated > 0)
1822 {
1823 DBUG_ASSERT(WSREP(thd));
1824 WSREP_DEBUG("avoiding binlog_commit_flush_trx_cache: %d",
1825 thd->wsrep_mysql_replicated);
1826 return 0;
1827 }
1828 #endif
1829
1830 Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
1831 FALSE, TRUE, TRUE, 0);
1832 DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, FALSE));
1833 }
1834
1835
serialize_with_xid(XID * xid,char * buf,const char * query,size_t q_len)1836 inline size_t serialize_with_xid(XID *xid, char *buf,
1837 const char *query, size_t q_len)
1838 {
1839 memcpy(buf, query, q_len);
1840
1841 return
1842 q_len + strlen(static_cast<event_xid_t*>(xid)->serialize(buf + q_len));
1843 }
1844
1845
1846 /**
1847 This function flushes the trx-cache upon commit.
1848
1849 @param thd The thread whose transaction should be flushed
1850 @param cache_mngr Pointer to the cache manager
1851
1852 @return
1853 nonzero if an error pops up when flushing the cache.
1854 */
1855 static inline int
binlog_commit_flush_trx_cache(THD * thd,bool all,binlog_cache_mngr * cache_mngr)1856 binlog_commit_flush_trx_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr)
1857 {
1858 DBUG_ENTER("binlog_commit_flush_trx_cache");
1859
1860 const char query[]= "XA COMMIT ";
1861 const size_t q_len= sizeof(query) - 1; // do not count trailing 0
1862 char buf[q_len + ser_buf_size]= "COMMIT";
1863 size_t buflen= sizeof("COMMIT") - 1;
1864
1865 if (thd->lex->sql_command == SQLCOM_XA_COMMIT &&
1866 thd->lex->xa_opt != XA_ONE_PHASE)
1867 {
1868 DBUG_ASSERT(thd->transaction->xid_state.is_explicit_XA());
1869 DBUG_ASSERT(thd->transaction->xid_state.get_state_code() ==
1870 XA_PREPARED);
1871
1872 buflen= serialize_with_xid(thd->transaction->xid_state.get_xid(),
1873 buf, query, q_len);
1874 }
1875 Query_log_event end_evt(thd, buf, buflen, TRUE, TRUE, TRUE, 0);
1876
1877 DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE));
1878 }
1879
1880
1881 /**
1882 This function flushes the trx-cache upon rollback.
1883
1884 @param thd The thread whose transaction should be flushed
1885 @param cache_mngr Pointer to the cache manager
1886
1887 @return
1888 nonzero if an error pops up when flushing the cache.
1889 */
1890 static inline int
binlog_rollback_flush_trx_cache(THD * thd,bool all,binlog_cache_mngr * cache_mngr)1891 binlog_rollback_flush_trx_cache(THD *thd, bool all,
1892 binlog_cache_mngr *cache_mngr)
1893 {
1894 const char query[]= "XA ROLLBACK ";
1895 const size_t q_len= sizeof(query) - 1; // do not count trailing 0
1896 char buf[q_len + ser_buf_size]= "ROLLBACK";
1897 size_t buflen= sizeof("ROLLBACK") - 1;
1898
1899 if (thd->transaction->xid_state.is_explicit_XA())
1900 {
1901 /* for not prepared use plain ROLLBACK */
1902 if (thd->transaction->xid_state.get_state_code() == XA_PREPARED)
1903 buflen= serialize_with_xid(thd->transaction->xid_state.get_xid(),
1904 buf, query, q_len);
1905 }
1906 Query_log_event end_evt(thd, buf, buflen, TRUE, TRUE, TRUE, 0);
1907
1908 return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE));
1909 }
1910
1911 /**
1912 This function flushes the trx-cache upon commit.
1913
1914 @param thd The thread whose transaction should be flushed
1915 @param cache_mngr Pointer to the cache manager
1916 @param xid Transaction Id
1917
1918 @return
1919 nonzero if an error pops up when flushing the cache.
1920 */
1921 static inline int
binlog_commit_flush_xid_caches(THD * thd,binlog_cache_mngr * cache_mngr,bool all,my_xid xid)1922 binlog_commit_flush_xid_caches(THD *thd, binlog_cache_mngr *cache_mngr,
1923 bool all, my_xid xid)
1924 {
1925 DBUG_ASSERT(xid); // replaced former treatment of ONE-PHASE XA
1926
1927 Xid_log_event end_evt(thd, xid, TRUE);
1928 return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE));
1929 }
1930
1931 /**
1932 This function truncates the transactional cache upon committing or rolling
1933 back either a transaction or a statement.
1934
1935 @param thd The thread whose transaction should be flushed
1936 @param cache_mngr Pointer to the cache data to be flushed
1937 @param all @c true means truncate the transaction, otherwise the
1938 statement must be truncated.
1939
1940 @return
1941 nonzero if an error pops up when truncating the transactional cache.
1942 */
1943 static int
binlog_truncate_trx_cache(THD * thd,binlog_cache_mngr * cache_mngr,bool all)1944 binlog_truncate_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr, bool all)
1945 {
1946 DBUG_ENTER("binlog_truncate_trx_cache");
1947 int error=0;
1948 /*
1949 This function handles transactional changes and as such this flag
1950 equals to true.
1951 */
1952 bool const is_transactional= TRUE;
1953
1954 DBUG_PRINT("info", ("thd->options={ %s %s}, transaction: %s",
1955 FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
1956 FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
1957 all ? "all" : "stmt"));
1958
1959 thd->binlog_remove_pending_rows_event(TRUE, is_transactional);
1960 /*
1961 If rolling back an entire transaction or a single statement not
1962 inside a transaction, we reset the transaction cache.
1963 */
1964 if (ending_trans(thd, all))
1965 {
1966 if (cache_mngr->trx_cache.has_incident())
1967 error= mysql_bin_log.write_incident(thd);
1968
1969 thd->reset_binlog_for_next_statement();
1970
1971 cache_mngr->reset(false, true);
1972 }
1973 /*
1974 If rolling back a statement in a transaction, we truncate the
1975 transaction cache to remove the statement.
1976 */
1977 else
1978 cache_mngr->trx_cache.restore_prev_position();
1979
1980 DBUG_ASSERT(thd->binlog_get_pending_rows_event(is_transactional) == NULL);
1981 DBUG_RETURN(error);
1982 }
1983
1984
is_preparing_xa(THD * thd)1985 inline bool is_preparing_xa(THD *thd)
1986 {
1987 return
1988 thd->transaction->xid_state.is_explicit_XA() &&
1989 thd->lex->sql_command == SQLCOM_XA_PREPARE;
1990 }
1991
1992
binlog_prepare(handlerton * hton,THD * thd,bool all)1993 static int binlog_prepare(handlerton *hton, THD *thd, bool all)
1994 {
1995 /* Do nothing unless the transaction is a user XA. */
1996 return is_preparing_xa(thd) ? binlog_commit(NULL, thd, all) : 0;
1997 }
1998
1999
binlog_xa_recover_dummy(handlerton * hton,XID * xid_list,uint len)2000 static int binlog_xa_recover_dummy(handlerton *hton __attribute__((unused)),
2001 XID *xid_list __attribute__((unused)),
2002 uint len __attribute__((unused)))
2003 {
2004 /* Does nothing. */
2005 return 0;
2006 }
2007
2008
binlog_commit_by_xid(handlerton * hton,XID * xid)2009 static int binlog_commit_by_xid(handlerton *hton, XID *xid)
2010 {
2011 THD *thd= current_thd;
2012
2013 (void) thd->binlog_setup_trx_data();
2014
2015 DBUG_ASSERT(thd->lex->sql_command == SQLCOM_XA_COMMIT);
2016
2017 return binlog_commit(hton, thd, TRUE);
2018 }
2019
2020
binlog_rollback_by_xid(handlerton * hton,XID * xid)2021 static int binlog_rollback_by_xid(handlerton *hton, XID *xid)
2022 {
2023 THD *thd= current_thd;
2024
2025 (void) thd->binlog_setup_trx_data();
2026
2027 DBUG_ASSERT(thd->lex->sql_command == SQLCOM_XA_ROLLBACK ||
2028 (thd->transaction->xid_state.get_state_code() == XA_ROLLBACK_ONLY));
2029 return binlog_rollback(hton, thd, TRUE);
2030 }
2031
2032
is_prepared_xa(THD * thd)2033 inline bool is_prepared_xa(THD *thd)
2034 {
2035 return thd->transaction->xid_state.is_explicit_XA() &&
2036 thd->transaction->xid_state.get_state_code() == XA_PREPARED;
2037 }
2038
2039
2040 /*
2041 We flush the cache wrapped in a beging/rollback if:
2042 . aborting a single or multi-statement transaction and;
2043 . the OPTION_KEEP_LOG is active or;
2044 . the format is STMT and a non-trans table was updated or;
2045 . the format is MIXED and a temporary non-trans table was
2046 updated or;
2047 . the format is MIXED, non-trans table was updated and
2048 aborting a single statement transaction;
2049 */
trans_cannot_safely_rollback(THD * thd,bool all)2050 static bool trans_cannot_safely_rollback(THD *thd, bool all)
2051 {
2052 DBUG_ASSERT(ending_trans(thd, all));
2053
2054 return ((thd->variables.option_bits & OPTION_KEEP_LOG) ||
2055 (trans_has_updated_non_trans_table(thd) &&
2056 thd->wsrep_binlog_format() == BINLOG_FORMAT_STMT) ||
2057 (thd->transaction->all.has_modified_non_trans_temp_table() &&
2058 thd->wsrep_binlog_format() == BINLOG_FORMAT_MIXED) ||
2059 (trans_has_updated_non_trans_table(thd) &&
2060 ending_single_stmt_trans(thd,all) &&
2061 thd->wsrep_binlog_format() == BINLOG_FORMAT_MIXED) ||
2062 is_prepared_xa(thd));
2063 }
2064
2065
2066 /**
2067 Specific log flusher invoked through log_xa_prepare().
2068 */
binlog_commit_flush_xa_prepare(THD * thd,bool all,binlog_cache_mngr * cache_mngr)2069 static int binlog_commit_flush_xa_prepare(THD *thd, bool all,
2070 binlog_cache_mngr *cache_mngr)
2071 {
2072 XID *xid= thd->transaction->xid_state.get_xid();
2073 {
2074 // todo assert wsrep_simulate || is_open()
2075
2076 /*
2077 Log the XA END event first.
2078 We don't do that in trans_xa_end() as XA COMMIT ONE PHASE
2079 is logged as simple BEGIN/COMMIT so the XA END should
2080 not get to the log.
2081 */
2082 const char query[]= "XA END ";
2083 const size_t q_len= sizeof(query) - 1; // do not count trailing 0
2084 char buf[q_len + ser_buf_size];
2085 size_t buflen;
2086 binlog_cache_data *cache_data;
2087 IO_CACHE *file;
2088
2089 memcpy(buf, query, q_len);
2090 buflen= q_len +
2091 strlen(static_cast<event_xid_t*>(xid)->serialize(buf + q_len));
2092 cache_data= cache_mngr->get_binlog_cache_data(true);
2093 file= &cache_data->cache_log;
2094 thd->lex->sql_command= SQLCOM_XA_END;
2095 Query_log_event xa_end(thd, buf, buflen, true, false, true, 0);
2096 if (mysql_bin_log.write_event(&xa_end, cache_data, file))
2097 return 1;
2098 thd->lex->sql_command= SQLCOM_XA_PREPARE;
2099 }
2100
2101 cache_mngr->using_xa= FALSE;
2102 XA_prepare_log_event end_evt(thd, xid, FALSE);
2103
2104 return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE));
2105 }
2106
2107
2108 /**
2109 This function is called once after each statement.
2110
2111 It has the responsibility to flush the caches to the binary log on commits.
2112
2113 @param hton The binlog handlerton.
2114 @param thd The client thread that executes the transaction.
2115 @param all This is @c true if this is a real transaction commit, and
2116 @false otherwise.
2117
2118 @see handlerton::commit
2119 */
binlog_commit(handlerton * hton,THD * thd,bool all)2120 static int binlog_commit(handlerton *hton, THD *thd, bool all)
2121 {
2122 int error= 0;
2123 PSI_stage_info org_stage;
2124 DBUG_ENTER("binlog_commit");
2125
2126 binlog_cache_mngr *const cache_mngr=
2127 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
2128
2129 if (!cache_mngr)
2130 {
2131 DBUG_ASSERT(WSREP(thd) ||
2132 (thd->lex->sql_command != SQLCOM_XA_PREPARE &&
2133 !(thd->lex->sql_command == SQLCOM_XA_COMMIT &&
2134 thd->lex->xa_opt == XA_ONE_PHASE)));
2135
2136 DBUG_RETURN(0);
2137 }
2138 /*
2139 This is true if we are doing an alter table that is replicated as
2140 CREATE TABLE ... SELECT
2141 */
2142 if (thd->variables.option_bits & OPTION_BIN_COMMIT_OFF)
2143 DBUG_RETURN(0);
2144
2145 DBUG_PRINT("debug",
2146 ("all: %d, in_transaction: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
2147 all,
2148 YESNO(thd->in_multi_stmt_transaction_mode()),
2149 YESNO(thd->transaction->all.modified_non_trans_table),
2150 YESNO(thd->transaction->stmt.modified_non_trans_table)));
2151
2152
2153 thd->backup_stage(&org_stage);
2154 THD_STAGE_INFO(thd, stage_binlog_write);
2155 if (!cache_mngr->stmt_cache.empty())
2156 {
2157 error= binlog_commit_flush_stmt_cache(thd, all, cache_mngr);
2158 }
2159
2160 if (cache_mngr->trx_cache.empty() &&
2161 thd->transaction->xid_state.get_state_code() != XA_PREPARED)
2162 {
2163 /*
2164 we're here because cache_log was flushed in MYSQL_BIN_LOG::log_xid()
2165 */
2166 cache_mngr->reset(false, true);
2167 THD_STAGE_INFO(thd, org_stage);
2168 DBUG_RETURN(error);
2169 }
2170
2171 /*
2172 We commit the transaction if:
2173 - We are not in a transaction and committing a statement, or
2174 - We are in a transaction and a full transaction is committed.
2175 Otherwise, we accumulate the changes.
2176 */
2177 if (likely(!error) && ending_trans(thd, all))
2178 {
2179 error= is_preparing_xa(thd) ?
2180 binlog_commit_flush_xa_prepare(thd, all, cache_mngr) :
2181 binlog_commit_flush_trx_cache (thd, all, cache_mngr);
2182 }
2183 /*
2184 This is part of the stmt rollback.
2185 */
2186 if (!all)
2187 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
2188
2189 THD_STAGE_INFO(thd, org_stage);
2190 DBUG_RETURN(error);
2191 }
2192
2193 /**
2194 This function is called when a transaction or a statement is rolled back.
2195
2196 @param hton The binlog handlerton.
2197 @param thd The client thread that executes the transaction.
2198 @param all This is @c true if this is a real transaction rollback, and
2199 @false otherwise.
2200
2201 @see handlerton::rollback
2202 */
binlog_rollback(handlerton * hton,THD * thd,bool all)2203 static int binlog_rollback(handlerton *hton, THD *thd, bool all)
2204 {
2205 DBUG_ENTER("binlog_rollback");
2206
2207 int error= 0;
2208 binlog_cache_mngr *const cache_mngr=
2209 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
2210
2211 if (!cache_mngr)
2212 {
2213 DBUG_ASSERT(WSREP(thd));
2214 DBUG_ASSERT(thd->lex->sql_command != SQLCOM_XA_ROLLBACK);
2215
2216 DBUG_RETURN(0);
2217 }
2218
2219 DBUG_PRINT("debug", ("all: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
2220 YESNO(all),
2221 YESNO(thd->transaction->all.modified_non_trans_table),
2222 YESNO(thd->transaction->stmt.modified_non_trans_table)));
2223
2224 /*
2225 If an incident event is set we do not flush the content of the statement
2226 cache because it may be corrupted.
2227 */
2228 if (cache_mngr->stmt_cache.has_incident())
2229 {
2230 error |= static_cast<int>(mysql_bin_log.write_incident(thd));
2231 cache_mngr->reset(true, false);
2232 }
2233 else if (!cache_mngr->stmt_cache.empty())
2234 {
2235 error |= binlog_commit_flush_stmt_cache(thd, all, cache_mngr);
2236 }
2237
2238 if (cache_mngr->trx_cache.empty() &&
2239 thd->transaction->xid_state.get_state_code() != XA_PREPARED)
2240 {
2241 /*
2242 we're here because cache_log was flushed in MYSQL_BIN_LOG::log_xid()
2243 */
2244 cache_mngr->reset(false, true);
2245 thd->reset_binlog_for_next_statement();
2246 DBUG_RETURN(error);
2247 }
2248 if (!wsrep_emulate_bin_log && mysql_bin_log.check_write_error(thd))
2249 {
2250 /*
2251 "all == true" means that a "rollback statement" triggered the error and
2252 this function was called. However, this must not happen as a rollback
2253 is written directly to the binary log. And in auto-commit mode, a single
2254 statement that is rolled back has the flag all == false.
2255 */
2256 DBUG_ASSERT(!all);
2257 /*
2258 We reach this point if the effect of a statement did not properly get into
2259 a cache and need to be rolled back.
2260 */
2261 error |= binlog_truncate_trx_cache(thd, cache_mngr, all);
2262 }
2263 else if (likely(!error))
2264 {
2265 if (ending_trans(thd, all) && trans_cannot_safely_rollback(thd, all))
2266 error= binlog_rollback_flush_trx_cache(thd, all, cache_mngr);
2267 /*
2268 Truncate the cache if:
2269 . aborting a single or multi-statement transaction or;
2270 . the current statement created or dropped a temporary table
2271 while having actual STATEMENT format;
2272 . the format is not STMT or no non-trans table was
2273 updated and;
2274 . the format is not MIXED or no temporary non-trans table
2275 was updated.
2276 */
2277 else if (ending_trans(thd, all) ||
2278 (!(thd->transaction->stmt.has_created_dropped_temp_table() &&
2279 !thd->is_current_stmt_binlog_format_row()) &&
2280 (!stmt_has_updated_non_trans_table(thd) ||
2281 thd->wsrep_binlog_format() != BINLOG_FORMAT_STMT) &&
2282 (!thd->transaction->stmt.has_modified_non_trans_temp_table() ||
2283 thd->wsrep_binlog_format() != BINLOG_FORMAT_MIXED)))
2284 error= binlog_truncate_trx_cache(thd, cache_mngr, all);
2285 }
2286
2287 /*
2288 This is part of the stmt rollback.
2289 */
2290 if (!all)
2291 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
2292 thd->reset_binlog_for_next_statement();
2293
2294 DBUG_RETURN(error);
2295 }
2296
2297
binlog_reset_cache(THD * thd)2298 void binlog_reset_cache(THD *thd)
2299 {
2300 binlog_cache_mngr *const cache_mngr= opt_bin_log ?
2301 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton) : 0;
2302 DBUG_ENTER("binlog_reset_cache");
2303 if (cache_mngr)
2304 {
2305 thd->binlog_remove_pending_rows_event(TRUE, TRUE);
2306 cache_mngr->reset(true, true);
2307 }
2308 DBUG_VOID_RETURN;
2309 }
2310
2311
set_write_error(THD * thd,bool is_transactional)2312 void MYSQL_BIN_LOG::set_write_error(THD *thd, bool is_transactional)
2313 {
2314 DBUG_ENTER("MYSQL_BIN_LOG::set_write_error");
2315
2316 write_error= 1;
2317
2318 if (unlikely(check_write_error(thd)))
2319 DBUG_VOID_RETURN;
2320
2321 if (my_errno == EFBIG)
2322 {
2323 if (is_transactional)
2324 {
2325 my_message(ER_TRANS_CACHE_FULL, ER_THD(thd, ER_TRANS_CACHE_FULL), MYF(0));
2326 }
2327 else
2328 {
2329 my_message(ER_STMT_CACHE_FULL, ER_THD(thd, ER_STMT_CACHE_FULL), MYF(0));
2330 }
2331 }
2332 else
2333 {
2334 my_error(ER_ERROR_ON_WRITE, MYF(0), name, errno);
2335 }
2336 #ifdef WITH_WSREP
2337 /* If wsrep transaction is active and binlog emulation is on,
2338 binlog write error may leave transaction without any registered
2339 htons. This makes wsrep rollback hooks to be skipped and the
2340 transaction will remain alive in wsrep world after rollback.
2341 Register binlog hton here to ensure that rollback happens in full. */
2342 if (WSREP_EMULATE_BINLOG(thd))
2343 {
2344 if (is_transactional)
2345 trans_register_ha(thd, TRUE, binlog_hton, 0);
2346 trans_register_ha(thd, FALSE, binlog_hton, 0);
2347 }
2348 #endif /* WITH_WSREP */
2349 DBUG_VOID_RETURN;
2350 }
2351
check_write_error(THD * thd)2352 bool MYSQL_BIN_LOG::check_write_error(THD *thd)
2353 {
2354 DBUG_ENTER("MYSQL_BIN_LOG::check_write_error");
2355
2356 bool checked= FALSE;
2357
2358 if (likely(!thd->is_error()))
2359 DBUG_RETURN(checked);
2360
2361 switch (thd->get_stmt_da()->sql_errno())
2362 {
2363 case ER_TRANS_CACHE_FULL:
2364 case ER_STMT_CACHE_FULL:
2365 case ER_ERROR_ON_WRITE:
2366 case ER_BINLOG_LOGGING_IMPOSSIBLE:
2367 checked= TRUE;
2368 break;
2369 }
2370
2371 DBUG_RETURN(checked);
2372 }
2373
2374
2375 /**
2376 @note
2377 How do we handle this (unlikely but legal) case:
2378 @verbatim
2379 [transaction] + [update to non-trans table] + [rollback to savepoint] ?
2380 @endverbatim
2381 The problem occurs when a savepoint is before the update to the
2382 non-transactional table. Then when there's a rollback to the savepoint, if we
2383 simply truncate the binlog cache, we lose the part of the binlog cache where
2384 the update is. If we want to not lose it, we need to write the SAVEPOINT
2385 command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
2386 is easy: it's just write at the end of the binlog cache, but the former
2387 should be *inserted* to the place where the user called SAVEPOINT. The
2388 solution is that when the user calls SAVEPOINT, we write it to the binlog
2389 cache (so no need to later insert it). As transactions are never intermixed
2390 in the binary log (i.e. they are serialized), we won't have conflicts with
2391 savepoint names when using mysqlbinlog or in the slave SQL thread.
2392 Then when ROLLBACK TO SAVEPOINT is called, if we updated some
2393 non-transactional table, we don't truncate the binlog cache but instead write
2394 ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
2395 will chop the SAVEPOINT command from the binlog cache, which is good as in
2396 that case there is no need to have it in the binlog).
2397 */
2398
binlog_savepoint_set(handlerton * hton,THD * thd,void * sv)2399 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
2400 {
2401 int error= 1;
2402 DBUG_ENTER("binlog_savepoint_set");
2403
2404 char buf[1024];
2405
2406 String log_query(buf, sizeof(buf), &my_charset_bin);
2407 if (log_query.copy(STRING_WITH_LEN("SAVEPOINT "), &my_charset_bin) ||
2408 append_identifier(thd, &log_query, &thd->lex->ident))
2409 DBUG_RETURN(1);
2410 int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
2411 Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
2412 TRUE, FALSE, TRUE, errcode);
2413 /*
2414 We cannot record the position before writing the statement
2415 because a rollback to a savepoint (.e.g. consider it "S") would
2416 prevent the savepoint statement (i.e. "SAVEPOINT S") from being
2417 written to the binary log despite the fact that the server could
2418 still issue other rollback statements to the same savepoint (i.e.
2419 "S").
2420 Given that the savepoint is valid until the server releases it,
2421 ie, until the transaction commits or it is released explicitly,
2422 we need to log it anyway so that we don't have "ROLLBACK TO S"
2423 or "RELEASE S" without the preceding "SAVEPOINT S" in the binary
2424 log.
2425 */
2426 if (likely(!(error= mysql_bin_log.write(&qinfo))))
2427 binlog_trans_log_savepos(thd, (my_off_t*) sv);
2428
2429 DBUG_RETURN(error);
2430 }
2431
binlog_savepoint_rollback(handlerton * hton,THD * thd,void * sv)2432 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
2433 {
2434 DBUG_ENTER("binlog_savepoint_rollback");
2435
2436 /*
2437 Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
2438 non-transactional table. Otherwise, truncate the binlog cache starting
2439 from the SAVEPOINT command.
2440 */
2441 #ifdef WITH_WSREP
2442 /* for streaming replication, we must replicate savepoint rollback so that
2443 slaves can maintain SR transactions
2444 */
2445 if (unlikely(thd->wsrep_trx().is_streaming() ||
2446 (trans_has_updated_non_trans_table(thd)) ||
2447 (thd->variables.option_bits & OPTION_KEEP_LOG)))
2448 #else
2449 if (unlikely(trans_has_updated_non_trans_table(thd) ||
2450 (thd->variables.option_bits & OPTION_KEEP_LOG)))
2451 #endif /* WITH_WSREP */
2452 {
2453 char buf[1024];
2454 String log_query(buf, sizeof(buf), &my_charset_bin);
2455 if (log_query.copy(STRING_WITH_LEN("ROLLBACK TO "), &my_charset_bin) ||
2456 append_identifier(thd, &log_query, &thd->lex->ident))
2457 DBUG_RETURN(1);
2458 int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
2459 Query_log_event qinfo(thd, log_query.ptr(), log_query.length(),
2460 TRUE, FALSE, TRUE, errcode);
2461 DBUG_RETURN(mysql_bin_log.write(&qinfo));
2462 }
2463
2464 binlog_trans_log_truncate(thd, *(my_off_t*)sv);
2465
2466 /*
2467 When a SAVEPOINT is executed inside a stored function/trigger we force the
2468 pending event to be flushed with a STMT_END_F flag and reset binlog
2469 as well to ensure that following DMLs will have a clean state to start
2470 with. ROLLBACK inside a stored routine has to finalize possibly existing
2471 current row-based pending event with cleaning up table maps. That ensures
2472 that following DMLs will have a clean state to start with.
2473 */
2474 if (thd->in_sub_stmt)
2475 thd->reset_binlog_for_next_statement();
2476
2477 DBUG_RETURN(0);
2478 }
2479
2480
2481 /**
2482 Check whether binlog state allows to safely release MDL locks after
2483 rollback to savepoint.
2484
2485 @param hton The binlog handlerton.
2486 @param thd The client thread that executes the transaction.
2487
2488 @return true - It is safe to release MDL locks.
2489 false - If it is not.
2490 */
binlog_savepoint_rollback_can_release_mdl(handlerton * hton,THD * thd)2491 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
2492 THD *thd)
2493 {
2494 DBUG_ENTER("binlog_savepoint_rollback_can_release_mdl");
2495 /*
2496 If we have not updated any non-transactional tables rollback
2497 to savepoint will simply truncate binlog cache starting from
2498 SAVEPOINT command. So it should be safe to release MDL acquired
2499 after SAVEPOINT command in this case.
2500 */
2501 DBUG_RETURN(!trans_cannot_safely_rollback(thd, true));
2502 }
2503
2504
check_binlog_magic(IO_CACHE * log,const char ** errmsg)2505 int check_binlog_magic(IO_CACHE* log, const char** errmsg)
2506 {
2507 uchar magic[4];
2508 DBUG_ASSERT(my_b_tell(log) == 0);
2509
2510 if (my_b_read(log, magic, sizeof(magic)))
2511 {
2512 *errmsg = "I/O error reading the header from the binary log";
2513 sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno,
2514 log->error);
2515 return 1;
2516 }
2517 if (bcmp(magic, BINLOG_MAGIC, sizeof(magic)))
2518 {
2519 *errmsg = "Binlog has bad magic number; It's not a binary log file that can be used by this version of MySQL";
2520 return 1;
2521 }
2522 return 0;
2523 }
2524
2525
open_binlog(IO_CACHE * log,const char * log_file_name,const char ** errmsg)2526 File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg)
2527 {
2528 File file;
2529 DBUG_ENTER("open_binlog");
2530
2531 if ((file= mysql_file_open(key_file_binlog,
2532 log_file_name, O_RDONLY | O_BINARY | O_SHARE,
2533 MYF(MY_WME))) < 0)
2534 {
2535 sql_print_error("Failed to open log (file '%s', errno %d)",
2536 log_file_name, my_errno);
2537 *errmsg = "Could not open log file";
2538 goto err;
2539 }
2540 if (init_io_cache_ext(log, file, (size_t)binlog_file_cache_size, READ_CACHE,
2541 0, 0, MYF(MY_WME|MY_DONT_CHECK_FILESIZE), key_file_binlog_cache))
2542 {
2543 sql_print_error("Failed to create a cache on log (file '%s')",
2544 log_file_name);
2545 *errmsg = "Could not open log file";
2546 goto err;
2547 }
2548 if (check_binlog_magic(log,errmsg))
2549 goto err;
2550 DBUG_RETURN(file);
2551
2552 err:
2553 if (file >= 0)
2554 {
2555 mysql_file_close(file, MYF(0));
2556 end_io_cache(log);
2557 }
2558 DBUG_RETURN(-1);
2559 }
2560
2561 #ifdef _WIN32
2562 static int eventSource = 0;
2563
setup_windows_event_source()2564 static void setup_windows_event_source()
2565 {
2566 HKEY hRegKey= NULL;
2567 DWORD dwError= 0;
2568 TCHAR szPath[MAX_PATH];
2569 DWORD dwTypes;
2570
2571 if (eventSource) // Ensure that we are only called once
2572 return;
2573 eventSource= 1;
2574
2575 // Create the event source registry key
2576 dwError= RegCreateKey(HKEY_LOCAL_MACHINE,
2577 "SYSTEM\\CurrentControlSet\\Services\\EventLog\\Application\\MariaDB",
2578 &hRegKey);
2579
2580 /* Name of the PE module that contains the message resource */
2581 GetModuleFileName(NULL, szPath, MAX_PATH);
2582
2583 /* Register EventMessageFile */
2584 dwError = RegSetValueEx(hRegKey, "EventMessageFile", 0, REG_EXPAND_SZ,
2585 (PBYTE) szPath, (DWORD) (strlen(szPath) + 1));
2586
2587 /* Register supported event types */
2588 dwTypes= (EVENTLOG_ERROR_TYPE | EVENTLOG_WARNING_TYPE |
2589 EVENTLOG_INFORMATION_TYPE);
2590 dwError= RegSetValueEx(hRegKey, "TypesSupported", 0, REG_DWORD,
2591 (LPBYTE) &dwTypes, sizeof dwTypes);
2592
2593 RegCloseKey(hRegKey);
2594 }
2595
2596 #endif /* _WIN32 */
2597
2598
2599 /**
2600 Find a unique filename for 'filename.#'.
2601
2602 Set '#' to the number next to the maximum found in the most
2603 recent log file extension.
2604
2605 This function will return nonzero if: (i) the generated name
2606 exceeds FN_REFLEN; (ii) if the number of extensions is exhausted;
2607 or (iii) some other error happened while examining the filesystem.
2608
2609 @param name Base name of file
2610 @param min_log_number_to_use minimum log number to choose. Set by
2611 CHANGE MASTER .. TO
2612 @param last_used_log_number If 0, find log number based on files.
2613 If not 0, then use *last_used_log_number +1
2614 Will be update to new generated number
2615 @return
2616 0 ok
2617 nonzero if not possible to get unique filename.
2618 */
2619
find_uniq_filename(char * name,ulong min_log_number_to_use,ulong * last_used_log_number)2620 static int find_uniq_filename(char *name, ulong min_log_number_to_use,
2621 ulong *last_used_log_number)
2622 {
2623 uint i;
2624 char buff[FN_REFLEN], ext_buf[FN_REFLEN];
2625 struct st_my_dir *dir_info;
2626 struct fileinfo *file_info;
2627 ulong max_found= 0, next= 0, number= 0;
2628 size_t buf_length, length;
2629 char *start, *end;
2630 int error= 0;
2631 DBUG_ENTER("find_uniq_filename");
2632
2633 length= dirname_part(buff, name, &buf_length);
2634 start= name + length;
2635 end= strend(start);
2636
2637 *end='.';
2638 length= (size_t) (end - start + 1);
2639
2640 /* The following matches the code for my_dir () below */
2641 DBUG_EXECUTE_IF("error_unique_log_filename",
2642 {
2643 strmov(end,".1");
2644 DBUG_RETURN(1);
2645 });
2646
2647 if (*last_used_log_number)
2648 max_found= *last_used_log_number;
2649 else
2650 {
2651 if (unlikely(!(dir_info= my_dir(buff, MYF(MY_DONT_SORT)))))
2652 { // This shouldn't happen
2653 strmov(end,".1"); // use name+1
2654 DBUG_RETURN(1);
2655 }
2656 file_info= dir_info->dir_entry;
2657 max_found= min_log_number_to_use ? min_log_number_to_use-1 : 0;
2658 for (i= dir_info->number_of_files ; i-- ; file_info++)
2659 {
2660 if (strncmp(file_info->name, start, length) == 0 &&
2661 test_if_number(file_info->name+length, &number,0))
2662 {
2663 set_if_bigger(max_found, number);
2664 }
2665 }
2666 my_dirend(dir_info);
2667 }
2668
2669 /* check if reached the maximum possible extension number */
2670 if (max_found >= MAX_LOG_UNIQUE_FN_EXT)
2671 {
2672 sql_print_error("Log filename extension number exhausted: %06lu. \
2673 Please fix this by archiving old logs and \
2674 updating the index files.", max_found);
2675 error= 1;
2676 goto end;
2677 }
2678
2679 next= max_found + 1;
2680 if (sprintf(ext_buf, "%06lu", next)<0)
2681 {
2682 error= 1;
2683 goto end;
2684 }
2685 *end++='.';
2686
2687 /*
2688 Check if the generated extension size + the file name exceeds the
2689 buffer size used. If one did not check this, then the filename might be
2690 truncated, resulting in error.
2691 */
2692 if (((strlen(ext_buf) + (end - name)) >= FN_REFLEN))
2693 {
2694 sql_print_error("Log filename too large: %s%s (%zu). \
2695 Please fix this by archiving old logs and updating the \
2696 index files.", name, ext_buf, (strlen(ext_buf) + (end - name)));
2697 error= 1;
2698 goto end;
2699 }
2700
2701 if (sprintf(end, "%06lu", next)<0)
2702 {
2703 error= 1;
2704 goto end;
2705 }
2706 *last_used_log_number= next;
2707
2708 /* print warning if reaching the end of available extensions. */
2709 if ((next > (MAX_LOG_UNIQUE_FN_EXT - LOG_WARN_UNIQUE_FN_EXT_LEFT)))
2710 sql_print_warning("Next log extension: %lu. \
2711 Remaining log filename extensions: %lu. \
2712 Please consider archiving some logs.", next, (MAX_LOG_UNIQUE_FN_EXT - next));
2713
2714 end:
2715 DBUG_RETURN(error);
2716 }
2717
2718
init_and_set_log_file_name(const char * log_name,const char * new_name,ulong next_log_number,enum_log_type log_type_arg,enum cache_type io_cache_type_arg)2719 bool MYSQL_LOG::init_and_set_log_file_name(const char *log_name,
2720 const char *new_name,
2721 ulong next_log_number,
2722 enum_log_type log_type_arg,
2723 enum cache_type io_cache_type_arg)
2724 {
2725 log_type= log_type_arg;
2726 io_cache_type= io_cache_type_arg;
2727
2728 if (new_name)
2729 {
2730 strmov(log_file_name, new_name);
2731 }
2732 else if (!new_name && generate_new_name(log_file_name, log_name,
2733 next_log_number))
2734 return TRUE;
2735
2736 return FALSE;
2737 }
2738
2739
2740 /*
2741 Open a (new) log file.
2742
2743 SYNOPSIS
2744 open()
2745
2746 log_name The name of the log to open
2747 log_type_arg The type of the log. E.g. LOG_NORMAL
2748 new_name The new name for the logfile. This is only needed
2749 when the method is used to open the binlog file.
2750 io_cache_type_arg The type of the IO_CACHE to use for this log file
2751
2752 DESCRIPTION
2753 Open the logfile, init IO_CACHE and write startup messages
2754 (in case of general and slow query logs).
2755
2756 RETURN VALUES
2757 0 ok
2758 1 error
2759 */
2760
open(PSI_file_key log_file_key,const char * log_name,enum_log_type log_type_arg,const char * new_name,ulong next_log_number,enum cache_type io_cache_type_arg)2761 bool MYSQL_LOG::open(
2762 #ifdef HAVE_PSI_INTERFACE
2763 PSI_file_key log_file_key,
2764 #endif
2765 const char *log_name, enum_log_type log_type_arg,
2766 const char *new_name, ulong next_log_number,
2767 enum cache_type io_cache_type_arg)
2768 {
2769 char buff[FN_REFLEN];
2770 MY_STAT f_stat;
2771 File file= -1;
2772 my_off_t seek_offset;
2773 bool is_fifo = false;
2774 int open_flags= O_CREAT | O_BINARY | O_CLOEXEC;
2775 DBUG_ENTER("MYSQL_LOG::open");
2776 DBUG_PRINT("enter", ("log_type: %d", (int) log_type_arg));
2777
2778 write_error= 0;
2779
2780 if (!(name= my_strdup(key_memory_MYSQL_LOG_name, log_name, MYF(MY_WME))))
2781 {
2782 name= (char *)log_name; // for the error message
2783 goto err;
2784 }
2785
2786 /*
2787 log_type is LOG_UNKNOWN if we should not generate a new name
2788 This is only used when called from MYSQL_BINARY_LOG::open, which
2789 has already updated log_file_name.
2790 */
2791 if (log_type_arg != LOG_UNKNOWN &&
2792 init_and_set_log_file_name(name, new_name, next_log_number,
2793 log_type_arg, io_cache_type_arg))
2794 goto err;
2795
2796 is_fifo = my_stat(log_file_name, &f_stat, MYF(0)) &&
2797 MY_S_ISFIFO(f_stat.st_mode);
2798
2799 if (io_cache_type == SEQ_READ_APPEND)
2800 open_flags |= O_RDWR | O_APPEND;
2801 else
2802 open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND);
2803
2804 if (is_fifo)
2805 open_flags |= O_NONBLOCK;
2806
2807 db[0]= 0;
2808
2809 #ifdef HAVE_PSI_INTERFACE
2810 /* Keep the key for reopen */
2811 m_log_file_key= log_file_key;
2812 #endif
2813
2814 if ((file= mysql_file_open(log_file_key, log_file_name, open_flags,
2815 MYF(MY_WME))) < 0)
2816 goto err;
2817
2818 if (is_fifo)
2819 seek_offset= 0;
2820 else if ((seek_offset= mysql_file_tell(file, MYF(MY_WME))))
2821 goto err;
2822
2823 if (init_io_cache(&log_file, file, (log_type == LOG_NORMAL ? IO_SIZE :
2824 LOG_BIN_IO_SIZE),
2825 io_cache_type, seek_offset, 0,
2826 MYF(MY_WME | MY_NABP |
2827 ((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0))))
2828 goto err;
2829
2830 if (log_type == LOG_NORMAL)
2831 {
2832 char *end;
2833 size_t len=my_snprintf(buff, sizeof(buff), "%s, Version: %s (%s). "
2834 #ifdef EMBEDDED_LIBRARY
2835 "embedded library\n",
2836 my_progname, server_version, MYSQL_COMPILATION_COMMENT
2837 #elif defined(_WIN32)
2838 "started with:\nTCP Port: %d, Named Pipe: %s\n",
2839 my_progname, server_version, MYSQL_COMPILATION_COMMENT,
2840 mysqld_port, mysqld_unix_port
2841 #else
2842 "started with:\nTcp port: %d Unix socket: %s\n",
2843 my_progname, server_version, MYSQL_COMPILATION_COMMENT,
2844 mysqld_port, mysqld_unix_port
2845 #endif
2846 );
2847 end= strnmov(buff + len, "Time\t\t Id Command\tArgument\n",
2848 sizeof(buff) - len);
2849 if (my_b_write(&log_file, (uchar*) buff, (uint) (end-buff)) ||
2850 flush_io_cache(&log_file))
2851 goto err;
2852 }
2853
2854 log_state= LOG_OPENED;
2855 DBUG_RETURN(0);
2856
2857 err:
2858 sql_print_error(fatal_log_error, name, errno);
2859 if (file >= 0)
2860 mysql_file_close(file, MYF(0));
2861 end_io_cache(&log_file);
2862 my_free(name);
2863 name= NULL;
2864 log_state= LOG_CLOSED;
2865 DBUG_RETURN(1);
2866 }
2867
MYSQL_LOG()2868 MYSQL_LOG::MYSQL_LOG()
2869 : name(0), write_error(FALSE), inited(FALSE), log_type(LOG_UNKNOWN),
2870 log_state(LOG_CLOSED)
2871 {
2872 /*
2873 We don't want to initialize LOCK_Log here as such initialization depends on
2874 safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
2875 called only in main(). Doing initialization here would make it happen
2876 before main().
2877 */
2878 bzero((char*) &log_file, sizeof(log_file));
2879 }
2880
init_pthread_objects()2881 void MYSQL_LOG::init_pthread_objects()
2882 {
2883 DBUG_ASSERT(inited == 0);
2884 inited= 1;
2885 mysql_mutex_init(key_LOG_LOCK_log, &LOCK_log, MY_MUTEX_INIT_SLOW);
2886 }
2887
2888 /*
2889 Close the log file
2890
2891 SYNOPSIS
2892 close()
2893 exiting Bitmask. LOG_CLOSE_TO_BE_OPENED is used if we intend to call
2894 open at once after close. LOG_CLOSE_DELAYED_CLOSE is used for
2895 binlog rotation, to delay actual close of the old file until
2896 we have successfully created the new file.
2897
2898 NOTES
2899 One can do an open on the object at once after doing a close.
2900 The internal structures are not freed until cleanup() is called
2901 */
2902
close(uint exiting)2903 void MYSQL_LOG::close(uint exiting)
2904 { // One can't set log_type here!
2905 DBUG_ENTER("MYSQL_LOG::close");
2906 DBUG_PRINT("enter",("exiting: %d", (int) exiting));
2907 if (log_state == LOG_OPENED)
2908 {
2909 end_io_cache(&log_file);
2910
2911 if (log_type == LOG_BIN && mysql_file_sync(log_file.file, MYF(MY_WME)) && ! write_error)
2912 {
2913 write_error= 1;
2914 sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno);
2915 }
2916
2917 if (!(exiting & LOG_CLOSE_DELAYED_CLOSE) &&
2918 mysql_file_close(log_file.file, MYF(MY_WME)) && ! write_error)
2919 {
2920 write_error= 1;
2921 sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno);
2922 }
2923 }
2924
2925 log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
2926 my_free(name);
2927 name= NULL;
2928 DBUG_VOID_RETURN;
2929 }
2930
2931 /** This is called only once. */
2932
cleanup()2933 void MYSQL_LOG::cleanup()
2934 {
2935 DBUG_ENTER("cleanup");
2936 if (inited)
2937 {
2938 inited= 0;
2939 mysql_mutex_destroy(&LOCK_log);
2940 close(0);
2941 }
2942 DBUG_VOID_RETURN;
2943 }
2944
2945
generate_new_name(char * new_name,const char * log_name,ulong next_log_number)2946 int MYSQL_LOG::generate_new_name(char *new_name, const char *log_name,
2947 ulong next_log_number)
2948 {
2949 fn_format(new_name, log_name, mysql_data_home, "", 4);
2950 return 0;
2951 }
2952
generate_new_name(char * new_name,const char * log_name,ulong next_log_number)2953 int MYSQL_BIN_LOG::generate_new_name(char *new_name, const char *log_name,
2954 ulong next_log_number)
2955 {
2956 fn_format(new_name, log_name, mysql_data_home, "", 4);
2957 if (!fn_ext(log_name)[0])
2958 {
2959 if (DBUG_EVALUATE_IF("binlog_inject_new_name_error", TRUE, FALSE) ||
2960 unlikely(find_uniq_filename(new_name, next_log_number,
2961 &last_used_log_number)))
2962 {
2963 THD *thd= current_thd;
2964 if (unlikely(thd))
2965 my_error(ER_NO_UNIQUE_LOGFILE, MYF(ME_FATAL), log_name);
2966 sql_print_error(ER_DEFAULT(ER_NO_UNIQUE_LOGFILE), log_name);
2967 return 1;
2968 }
2969 }
2970 return 0;
2971 }
2972
2973
2974 /*
2975 Reopen the log file
2976
2977 SYNOPSIS
2978 reopen_file()
2979
2980 DESCRIPTION
2981 Reopen the log file. The method is used during FLUSH LOGS
2982 and locks LOCK_log mutex
2983 */
2984
2985
reopen_file()2986 void MYSQL_QUERY_LOG::reopen_file()
2987 {
2988 char *save_name;
2989 DBUG_ENTER("MYSQL_LOG::reopen_file");
2990
2991 mysql_mutex_lock(&LOCK_log);
2992 if (!is_open())
2993 {
2994 DBUG_PRINT("info",("log is closed"));
2995 mysql_mutex_unlock(&LOCK_log);
2996 DBUG_VOID_RETURN;
2997 }
2998
2999 save_name= name;
3000 name= 0; // Don't free name
3001 close(LOG_CLOSE_TO_BE_OPENED);
3002
3003 /*
3004 Note that at this point, log_state != LOG_CLOSED (important for is_open()).
3005 */
3006
3007 open(
3008 #ifdef HAVE_PSI_INTERFACE
3009 m_log_file_key,
3010 #endif
3011 save_name, log_type, 0, 0, io_cache_type);
3012 my_free(save_name);
3013
3014 mysql_mutex_unlock(&LOCK_log);
3015
3016 DBUG_VOID_RETURN;
3017 }
3018
3019
3020 /*
3021 Write a command to traditional general log file
3022
3023 SYNOPSIS
3024 write()
3025
3026 event_time command start timestamp
3027 user_host the pointer to the string with user@host info
3028 user_host_len length of the user_host string. this is computed once
3029 and passed to all general log event handlers
3030 thread_id Id of the thread, issued a query
3031 command_type the type of the command being logged
3032 command_type_len the length of the string above
3033 sql_text the very text of the query being executed
3034 sql_text_len the length of sql_text string
3035
3036 DESCRIPTION
3037
3038 Log given command to to normal (not rotable) log file
3039
3040 RETURN
3041 FASE - OK
3042 TRUE - error occurred
3043 */
3044
write(time_t event_time,const char * user_host,size_t user_host_len,my_thread_id thread_id_arg,const char * command_type,size_t command_type_len,const char * sql_text,size_t sql_text_len)3045 bool MYSQL_QUERY_LOG::write(time_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg,
3046 const char *command_type, size_t command_type_len,
3047 const char *sql_text, size_t sql_text_len)
3048 {
3049 char buff[32];
3050 char local_time_buff[MAX_TIME_SIZE];
3051 struct tm start;
3052 size_t time_buff_len= 0;
3053
3054 mysql_mutex_lock(&LOCK_log);
3055
3056 /* Test if someone closed between the is_open test and lock */
3057 if (is_open())
3058 {
3059 /* for testing output of timestamp and thread id */
3060 DBUG_EXECUTE_IF("reset_log_last_time", last_time= 0;);
3061
3062 /* Note that my_b_write() assumes it knows the length for this */
3063 if (event_time != last_time)
3064 {
3065 last_time= event_time;
3066
3067 localtime_r(&event_time, &start);
3068
3069 time_buff_len= my_snprintf(local_time_buff, MAX_TIME_SIZE,
3070 "%02d%02d%02d %2d:%02d:%02d\t",
3071 start.tm_year % 100, start.tm_mon + 1,
3072 start.tm_mday, start.tm_hour,
3073 start.tm_min, start.tm_sec);
3074
3075 if (my_b_write(&log_file, (uchar*) local_time_buff, time_buff_len))
3076 goto err;
3077 }
3078 else
3079 if (my_b_write(&log_file, (uchar*) "\t\t" ,2) < 0)
3080 goto err;
3081
3082 /* command_type, thread_id */
3083 size_t length= my_snprintf(buff, 32, "%6llu ", thread_id_arg);
3084
3085 if (my_b_write(&log_file, (uchar*) buff, length))
3086 goto err;
3087
3088 if (my_b_write(&log_file, (uchar*) command_type, command_type_len))
3089 goto err;
3090
3091 if (my_b_write(&log_file, (uchar*) "\t", 1))
3092 goto err;
3093
3094 /* sql_text */
3095 if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len))
3096 goto err;
3097
3098 if (my_b_write(&log_file, (uchar*) "\n", 1) ||
3099 flush_io_cache(&log_file))
3100 goto err;
3101 }
3102
3103 mysql_mutex_unlock(&LOCK_log);
3104 return FALSE;
3105 err:
3106
3107 if (!write_error)
3108 {
3109 write_error= 1;
3110 sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno);
3111 }
3112 mysql_mutex_unlock(&LOCK_log);
3113 return TRUE;
3114 }
3115
3116
3117 /*
3118 Log a query to the traditional slow log file
3119
3120 SYNOPSIS
3121 write()
3122
3123 thd THD of the query
3124 current_time current timestamp
3125 user_host the pointer to the string with user@host info
3126 user_host_len length of the user_host string. this is computed once
3127 and passed to all general log event handlers
3128 query_utime Amount of time the query took to execute (in microseconds)
3129 lock_utime Amount of time the query was locked (in microseconds)
3130 is_command The flag, which determines, whether the sql_text is a
3131 query or an administrator command.
3132 sql_text the very text of the query or administrator command
3133 processed
3134 sql_text_len the length of sql_text string
3135
3136 DESCRIPTION
3137
3138 Log a query to the slow log file.
3139
3140 RETURN
3141 FALSE - OK
3142 TRUE - error occurred
3143 */
3144
write(THD * thd,time_t current_time,const char * user_host,size_t user_host_len,ulonglong query_utime,ulonglong lock_utime,bool is_command,const char * sql_text,size_t sql_text_len)3145 bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
3146 const char *user_host, size_t user_host_len, ulonglong query_utime,
3147 ulonglong lock_utime, bool is_command,
3148 const char *sql_text, size_t sql_text_len)
3149 {
3150 bool error= 0;
3151 char llbuff[22];
3152 DBUG_ENTER("MYSQL_QUERY_LOG::write");
3153
3154 mysql_mutex_lock(&LOCK_log);
3155 if (is_open())
3156 { // Safety against reopen
3157 char buff[80], *end;
3158 char query_time_buff[22+7], lock_time_buff[22+7];
3159 size_t buff_len;
3160 end= buff;
3161
3162 if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
3163 {
3164 if (current_time != last_time)
3165 {
3166 last_time= current_time;
3167 struct tm start;
3168 localtime_r(¤t_time, &start);
3169
3170 buff_len= my_snprintf(buff, sizeof buff,
3171 "# Time: %02d%02d%02d %2d:%02d:%02d\n",
3172 start.tm_year % 100, start.tm_mon + 1,
3173 start.tm_mday, start.tm_hour,
3174 start.tm_min, start.tm_sec);
3175
3176 /* Note that my_b_write() assumes it knows the length for this */
3177 if (my_b_write(&log_file, (uchar*) buff, buff_len))
3178 goto err;
3179 }
3180 const uchar uh[]= "# User@Host: ";
3181 if (my_b_write(&log_file, uh, sizeof(uh) - 1) ||
3182 my_b_write(&log_file, (uchar*) user_host, user_host_len) ||
3183 my_b_write(&log_file, (uchar*) "\n", 1))
3184 goto err;
3185
3186 /* For slow query log */
3187 sprintf(query_time_buff, "%.6f", ulonglong2double(query_utime)/1000000.0);
3188 sprintf(lock_time_buff, "%.6f", ulonglong2double(lock_utime)/1000000.0);
3189 if (my_b_printf(&log_file,
3190 "# Thread_id: %lu Schema: %s QC_hit: %s\n"
3191 "# Query_time: %s Lock_time: %s Rows_sent: %lu Rows_examined: %lu\n"
3192 "# Rows_affected: %lu Bytes_sent: %lu\n",
3193 (ulong) thd->thread_id, thd->get_db(),
3194 ((thd->query_plan_flags & QPLAN_QC) ? "Yes" : "No"),
3195 query_time_buff, lock_time_buff,
3196 (ulong) thd->get_sent_row_count(),
3197 (ulong) thd->get_examined_row_count(),
3198 (ulong) thd->get_affected_rows(),
3199 (ulong) (thd->status_var.bytes_sent - thd->bytes_sent_old)))
3200 goto err;
3201
3202 if ((thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_QUERY_PLAN)
3203 && thd->tmp_tables_used &&
3204 my_b_printf(&log_file,
3205 "# Tmp_tables: %lu Tmp_disk_tables: %lu "
3206 "Tmp_table_sizes: %s\n",
3207 (ulong) thd->tmp_tables_used,
3208 (ulong) thd->tmp_tables_disk_used,
3209 llstr(thd->tmp_tables_size, llbuff)))
3210 goto err;
3211
3212 if (thd->spcont &&
3213 my_b_printf(&log_file, "# Stored_routine: %s\n",
3214 ErrConvDQName(thd->spcont->m_sp).ptr()))
3215 goto err;
3216
3217 if ((thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_QUERY_PLAN) &&
3218 (thd->query_plan_flags &
3219 (QPLAN_FULL_SCAN | QPLAN_FULL_JOIN | QPLAN_TMP_TABLE |
3220 QPLAN_TMP_DISK | QPLAN_FILESORT | QPLAN_FILESORT_DISK |
3221 QPLAN_FILESORT_PRIORITY_QUEUE)) &&
3222 my_b_printf(&log_file,
3223 "# Full_scan: %s Full_join: %s "
3224 "Tmp_table: %s Tmp_table_on_disk: %s\n"
3225 "# Filesort: %s Filesort_on_disk: %s Merge_passes: %lu "
3226 "Priority_queue: %s\n",
3227 ((thd->query_plan_flags & QPLAN_FULL_SCAN) ? "Yes" : "No"),
3228 ((thd->query_plan_flags & QPLAN_FULL_JOIN) ? "Yes" : "No"),
3229 (thd->tmp_tables_used ? "Yes" : "No"),
3230 (thd->tmp_tables_disk_used ? "Yes" : "No"),
3231 ((thd->query_plan_flags & QPLAN_FILESORT) ? "Yes" : "No"),
3232 ((thd->query_plan_flags & QPLAN_FILESORT_DISK) ?
3233 "Yes" : "No"),
3234 thd->query_plan_fsort_passes,
3235 ((thd->query_plan_flags & QPLAN_FILESORT_PRIORITY_QUEUE) ?
3236 "Yes" : "No")
3237 ))
3238 goto err;
3239 if (thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_EXPLAIN &&
3240 thd->lex->explain)
3241 {
3242 StringBuffer<128> buf;
3243 DBUG_ASSERT(!thd->free_list);
3244 if (!print_explain_for_slow_log(thd->lex, thd, &buf))
3245 if (my_b_printf(&log_file, "%s", buf.c_ptr_safe()))
3246 goto err;
3247 thd->free_items();
3248 }
3249 if (thd->db.str && strcmp(thd->db.str, db))
3250 { // Database changed
3251 if (my_b_printf(&log_file,"use %s;\n",thd->db.str))
3252 goto err;
3253 strmov(db,thd->db.str);
3254 }
3255 if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
3256 {
3257 end=strmov(end, ",last_insert_id=");
3258 end=longlong10_to_str((longlong)
3259 thd->first_successful_insert_id_in_prev_stmt_for_binlog,
3260 end, -10);
3261 }
3262 // Save value if we do an insert.
3263 if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
3264 {
3265 if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
3266 {
3267 end=strmov(end,",insert_id=");
3268 end=longlong10_to_str((longlong)
3269 thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(),
3270 end, -10);
3271 }
3272 }
3273
3274 /*
3275 This info used to show up randomly, depending on whether the query
3276 checked the query start time or not. now we always write current
3277 timestamp to the slow log
3278 */
3279 end= strmov(end, ",timestamp=");
3280 end= int10_to_str((long) current_time, end, 10);
3281
3282 if (end != buff)
3283 {
3284 *end++=';';
3285 *end='\n';
3286 if (my_b_write(&log_file, (uchar*) "SET ", 4) ||
3287 my_b_write(&log_file, (uchar*) buff + 1, (uint) (end-buff)))
3288 goto err;
3289 }
3290 if (is_command)
3291 {
3292 end= strxmov(buff, "# administrator command: ", NullS);
3293 buff_len= (ulong) (end - buff);
3294 DBUG_EXECUTE_IF("simulate_slow_log_write_error",
3295 {DBUG_SET("+d,simulate_file_write_error");});
3296 if(my_b_write(&log_file, (uchar*) buff, buff_len))
3297 goto err;
3298 }
3299 if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len) ||
3300 my_b_write(&log_file, (uchar*) ";\n",2) ||
3301 flush_io_cache(&log_file))
3302 goto err;
3303
3304 }
3305 }
3306 end:
3307 mysql_mutex_unlock(&LOCK_log);
3308 DBUG_RETURN(error);
3309
3310 err:
3311 error= 1;
3312 if (!write_error)
3313 {
3314 write_error= 1;
3315 sql_print_error(ER_THD(thd, ER_ERROR_ON_WRITE), name, errno);
3316 }
3317 goto end;
3318 }
3319
3320
3321 /**
3322 @todo
3323 The following should be using fn_format(); We just need to
3324 first change fn_format() to cut the file name if it's too long.
3325 */
generate_name(const char * log_name,const char * suffix,bool strip_ext,char * buff)3326 const char *MYSQL_LOG::generate_name(const char *log_name,
3327 const char *suffix,
3328 bool strip_ext, char *buff)
3329 {
3330 if (!log_name || !log_name[0])
3331 {
3332 strmake(buff, pidfile_name, FN_REFLEN - strlen(suffix) - 1);
3333 return (const char *)
3334 fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR));
3335 }
3336 // get rid of extension if the log is binary to avoid problems
3337 if (strip_ext)
3338 {
3339 char *p= fn_ext(log_name);
3340 uint length= (uint) (p - log_name);
3341 strmake(buff, log_name, MY_MIN(length, FN_REFLEN-1));
3342 return (const char*)buff;
3343 }
3344 return log_name;
3345 }
3346
3347
3348 /*
3349 Print some additional information about addition/removal of
3350 XID list entries.
3351 TODO: Remove once MDEV-9510 is fixed.
3352 */
3353 #ifdef WITH_WSREP
3354 #define WSREP_XID_LIST_ENTRY(X, Y) \
3355 if (wsrep_debug) \
3356 { \
3357 char buf[FN_REFLEN]; \
3358 strmake(buf, Y->binlog_name, Y->binlog_name_len); \
3359 WSREP_DEBUG(X, buf, Y->binlog_id); \
3360 }
3361 #else
3362 #define WSREP_XID_LIST_ENTRY(X, Y) do { } while(0)
3363 #endif
3364
MYSQL_BIN_LOG(uint * sync_period)3365 MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
3366 :reset_master_pending(0), mark_xid_done_waiting(0),
3367 bytes_written(0), last_used_log_number(0),
3368 file_id(1), open_count(1),
3369 group_commit_queue(0), group_commit_queue_busy(FALSE),
3370 num_commits(0), num_group_commits(0),
3371 group_commit_trigger_count(0), group_commit_trigger_timeout(0),
3372 group_commit_trigger_lock_wait(0),
3373 sync_period_ptr(sync_period), sync_counter(0),
3374 state_file_deleted(false), binlog_state_recover_done(false),
3375 is_relay_log(0), relay_signal_cnt(0),
3376 checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF),
3377 relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF),
3378 description_event_for_exec(0), description_event_for_queue(0),
3379 current_binlog_id(0), reset_master_count(0)
3380 {
3381 /*
3382 We don't want to initialize locks here as such initialization depends on
3383 safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
3384 called only in main(). Doing initialization here would make it happen
3385 before main().
3386 */
3387 index_file_name[0] = 0;
3388 bzero((char*) &index_file, sizeof(index_file));
3389 bzero((char*) &purge_index_file, sizeof(purge_index_file));
3390 }
3391
stop_background_thread()3392 void MYSQL_BIN_LOG::stop_background_thread()
3393 {
3394 if (binlog_background_thread_started)
3395 {
3396 mysql_mutex_lock(&LOCK_binlog_background_thread);
3397 binlog_background_thread_stop= true;
3398 mysql_cond_signal(&COND_binlog_background_thread);
3399 while (binlog_background_thread_stop)
3400 mysql_cond_wait(&COND_binlog_background_thread_end,
3401 &LOCK_binlog_background_thread);
3402 mysql_mutex_unlock(&LOCK_binlog_background_thread);
3403 binlog_background_thread_started= false;
3404 }
3405 }
3406
3407 /* this is called only once */
3408
cleanup()3409 void MYSQL_BIN_LOG::cleanup()
3410 {
3411 DBUG_ENTER("cleanup");
3412 if (inited)
3413 {
3414 xid_count_per_binlog *b;
3415
3416 /* Wait for the binlog background thread to stop. */
3417 if (!is_relay_log)
3418 stop_background_thread();
3419
3420 inited= 0;
3421 mysql_mutex_lock(&LOCK_log);
3422 close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT);
3423 mysql_mutex_unlock(&LOCK_log);
3424 delete description_event_for_queue;
3425 delete description_event_for_exec;
3426
3427 while ((b= binlog_xid_count_list.get()))
3428 {
3429 /*
3430 There should be no pending XIDs at shutdown, and only one entry (for
3431 the active binlog file) in the list.
3432 */
3433 DBUG_ASSERT(b->xid_count == 0);
3434 DBUG_ASSERT(!binlog_xid_count_list.head());
3435 WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::cleanup(): Removing xid_list_entry "
3436 "for %s (%lu)", b);
3437 delete b;
3438 }
3439
3440 mysql_mutex_destroy(&LOCK_log);
3441 mysql_mutex_destroy(&LOCK_index);
3442 mysql_mutex_destroy(&LOCK_xid_list);
3443 mysql_mutex_destroy(&LOCK_binlog_background_thread);
3444 mysql_mutex_destroy(&LOCK_binlog_end_pos);
3445 mysql_cond_destroy(&COND_relay_log_updated);
3446 mysql_cond_destroy(&COND_bin_log_updated);
3447 mysql_cond_destroy(&COND_queue_busy);
3448 mysql_cond_destroy(&COND_xid_list);
3449 mysql_cond_destroy(&COND_binlog_background_thread);
3450 mysql_cond_destroy(&COND_binlog_background_thread_end);
3451 }
3452
3453 /*
3454 Free data for global binlog state.
3455 We can't do that automatically as we need to do this before
3456 safemalloc is shut down
3457 */
3458 if (!is_relay_log)
3459 rpl_global_gtid_binlog_state.free();
3460 DBUG_VOID_RETURN;
3461 }
3462
3463
3464 /* Init binlog-specific vars */
init(ulong max_size_arg)3465 void MYSQL_BIN_LOG::init(ulong max_size_arg)
3466 {
3467 DBUG_ENTER("MYSQL_BIN_LOG::init");
3468 max_size= max_size_arg;
3469 DBUG_PRINT("info",("max_size: %lu", max_size));
3470 DBUG_VOID_RETURN;
3471 }
3472
3473
init_pthread_objects()3474 void MYSQL_BIN_LOG::init_pthread_objects()
3475 {
3476 MYSQL_LOG::init_pthread_objects();
3477 mysql_mutex_init(m_key_LOCK_index, &LOCK_index, MY_MUTEX_INIT_SLOW);
3478 mysql_mutex_setflags(&LOCK_index, MYF_NO_DEADLOCK_DETECTION);
3479 mysql_mutex_init(key_BINLOG_LOCK_xid_list,
3480 &LOCK_xid_list, MY_MUTEX_INIT_FAST);
3481 mysql_cond_init(m_key_relay_log_update, &COND_relay_log_updated, 0);
3482 mysql_cond_init(m_key_bin_log_update, &COND_bin_log_updated, 0);
3483 mysql_cond_init(m_key_COND_queue_busy, &COND_queue_busy, 0);
3484 mysql_cond_init(key_BINLOG_COND_xid_list, &COND_xid_list, 0);
3485
3486 mysql_mutex_init(key_BINLOG_LOCK_binlog_background_thread,
3487 &LOCK_binlog_background_thread, MY_MUTEX_INIT_FAST);
3488 mysql_cond_init(key_BINLOG_COND_binlog_background_thread,
3489 &COND_binlog_background_thread, 0);
3490 mysql_cond_init(key_BINLOG_COND_binlog_background_thread_end,
3491 &COND_binlog_background_thread_end, 0);
3492
3493 mysql_mutex_init(m_key_LOCK_binlog_end_pos, &LOCK_binlog_end_pos,
3494 MY_MUTEX_INIT_SLOW);
3495 }
3496
3497
open_index_file(const char * index_file_name_arg,const char * log_name,bool need_mutex)3498 bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
3499 const char *log_name, bool need_mutex)
3500 {
3501 File index_file_nr= -1;
3502 DBUG_ASSERT(!my_b_inited(&index_file));
3503
3504 /*
3505 First open of this class instance
3506 Create an index file that will hold all file names uses for logging.
3507 Add new entries to the end of it.
3508 */
3509 myf opt= MY_UNPACK_FILENAME;
3510 if (!index_file_name_arg)
3511 {
3512 index_file_name_arg= log_name; // Use same basename for index file
3513 opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
3514 }
3515 fn_format(index_file_name, index_file_name_arg, mysql_data_home,
3516 ".index", opt);
3517 if ((index_file_nr= mysql_file_open(m_key_file_log_index,
3518 index_file_name,
3519 O_RDWR | O_CREAT | O_BINARY | O_CLOEXEC,
3520 MYF(MY_WME))) < 0 ||
3521 mysql_file_sync(index_file_nr, MYF(MY_WME)) ||
3522 init_io_cache_ext(&index_file, index_file_nr,
3523 IO_SIZE, WRITE_CACHE,
3524 mysql_file_seek(index_file_nr, 0L, MY_SEEK_END, MYF(0)),
3525 0, MYF(MY_WME | MY_WAIT_IF_FULL),
3526 m_key_file_log_index_cache) ||
3527 DBUG_EVALUATE_IF("fault_injection_openning_index", 1, 0))
3528 {
3529 /*
3530 TODO: all operations creating/deleting the index file or a log, should
3531 call my_sync_dir() or my_sync_dir_by_file() to be durable.
3532 TODO: file creation should be done with mysql_file_create()
3533 not mysql_file_open().
3534 */
3535 if (index_file_nr >= 0)
3536 mysql_file_close(index_file_nr, MYF(0));
3537 return TRUE;
3538 }
3539
3540 #ifdef HAVE_REPLICATION
3541 /*
3542 Sync the index by purging any binary log file that is not registered.
3543 In other words, either purge binary log files that were removed from
3544 the index but not purged from the file system due to a crash or purge
3545 any binary log file that was created but not register in the index
3546 due to a crash.
3547 */
3548
3549 if (set_purge_index_file_name(index_file_name_arg) ||
3550 open_purge_index_file(FALSE) ||
3551 purge_index_entry(NULL, NULL, need_mutex) ||
3552 close_purge_index_file() ||
3553 DBUG_EVALUATE_IF("fault_injection_recovering_index", 1, 0))
3554 {
3555 sql_print_error("MYSQL_BIN_LOG::open_index_file failed to sync the index "
3556 "file.");
3557 return TRUE;
3558 }
3559 #endif
3560
3561 return FALSE;
3562 }
3563
3564
3565 /**
3566 Open a (new) binlog file.
3567
3568 - Open the log file and the index file. Register the new
3569 file name in it
3570 - When calling this when the file is in use, you must have a locks
3571 on LOCK_log and LOCK_index.
3572
3573 @retval
3574 0 ok
3575 @retval
3576 1 error
3577 */
3578
open(const char * log_name,const char * new_name,ulong next_log_number,enum cache_type io_cache_type_arg,ulong max_size_arg,bool null_created_arg,bool need_mutex)3579 bool MYSQL_BIN_LOG::open(const char *log_name,
3580 const char *new_name,
3581 ulong next_log_number,
3582 enum cache_type io_cache_type_arg,
3583 ulong max_size_arg,
3584 bool null_created_arg,
3585 bool need_mutex)
3586 {
3587 File file= -1;
3588 xid_count_per_binlog *new_xid_list_entry= NULL, *b;
3589 DBUG_ENTER("MYSQL_BIN_LOG::open");
3590
3591 mysql_mutex_assert_owner(&LOCK_log);
3592
3593 if (!is_relay_log)
3594 {
3595 if (!binlog_state_recover_done)
3596 {
3597 binlog_state_recover_done= true;
3598 if (do_binlog_recovery(opt_bin_logname, false))
3599 DBUG_RETURN(1);
3600 }
3601
3602 if (!binlog_background_thread_started &&
3603 start_binlog_background_thread())
3604 DBUG_RETURN(1);
3605 }
3606
3607 /* We need to calculate new log file name for purge to delete old */
3608 if (init_and_set_log_file_name(log_name, new_name, next_log_number,
3609 LOG_BIN, io_cache_type_arg))
3610 {
3611 sql_print_error("MYSQL_BIN_LOG::open failed to generate new file name.");
3612 if (!is_relay_log)
3613 goto err;
3614 DBUG_RETURN(1);
3615 }
3616
3617 #ifdef HAVE_REPLICATION
3618 if (open_purge_index_file(TRUE) ||
3619 register_create_index_entry(log_file_name) ||
3620 sync_purge_index_file() ||
3621 DBUG_EVALUATE_IF("fault_injection_registering_index", 1, 0))
3622 {
3623 /**
3624 TODO:
3625 Although this was introduced to appease valgrind when
3626 injecting emulated faults using
3627 fault_injection_registering_index it may be good to consider
3628 what actually happens when open_purge_index_file succeeds but
3629 register or sync fails.
3630
3631 Perhaps we might need the code below in MYSQL_LOG_BIN::cleanup
3632 for "real life" purposes as well?
3633 */
3634 DBUG_EXECUTE_IF("fault_injection_registering_index", {
3635 if (my_b_inited(&purge_index_file))
3636 {
3637 end_io_cache(&purge_index_file);
3638 my_close(purge_index_file.file, MYF(0));
3639 }
3640 });
3641
3642 sql_print_error("MYSQL_BIN_LOG::open failed to sync the index file.");
3643 DBUG_RETURN(1);
3644 }
3645 DBUG_EXECUTE_IF("crash_create_non_critical_before_update_index", DBUG_SUICIDE(););
3646 #endif
3647
3648 write_error= 0;
3649
3650 /* open the main log file */
3651 if (MYSQL_LOG::open(
3652 #ifdef HAVE_PSI_INTERFACE
3653 m_key_file_log,
3654 #endif
3655 log_name,
3656 LOG_UNKNOWN, /* Don't generate new name */
3657 0, 0, io_cache_type_arg))
3658 {
3659 #ifdef HAVE_REPLICATION
3660 close_purge_index_file();
3661 #endif
3662 DBUG_RETURN(1); /* all warnings issued */
3663 }
3664
3665 init(max_size_arg);
3666
3667 open_count++;
3668
3669 DBUG_ASSERT(log_type == LOG_BIN);
3670
3671 {
3672 bool write_file_name_to_index_file=0;
3673
3674 if (!my_b_filelength(&log_file))
3675 {
3676 /*
3677 The binary log file was empty (probably newly created)
3678 This is the normal case and happens when the user doesn't specify
3679 an extension for the binary log files.
3680 In this case we write a standard header to it.
3681 */
3682 if (my_b_safe_write(&log_file, BINLOG_MAGIC,
3683 BIN_LOG_HEADER_SIZE))
3684 goto err;
3685 bytes_written+= BIN_LOG_HEADER_SIZE;
3686 write_file_name_to_index_file= 1;
3687 }
3688
3689 {
3690 /*
3691 In 4.x we put Start event only in the first binlog. But from 5.0 we
3692 want a Start event even if this is not the very first binlog.
3693 */
3694 Format_description_log_event s(BINLOG_VERSION);
3695 /*
3696 don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
3697 as we won't be able to reset it later
3698 */
3699 if (io_cache_type == WRITE_CACHE)
3700 s.flags |= LOG_EVENT_BINLOG_IN_USE_F;
3701
3702 if (is_relay_log)
3703 {
3704 if (relay_log_checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
3705 relay_log_checksum_alg=
3706 opt_slave_sql_verify_checksum ? (enum_binlog_checksum_alg) binlog_checksum_options
3707 : BINLOG_CHECKSUM_ALG_OFF;
3708 s.checksum_alg= relay_log_checksum_alg;
3709 s.set_relay_log_event();
3710 }
3711 else
3712 s.checksum_alg= (enum_binlog_checksum_alg)binlog_checksum_options;
3713
3714 crypto.scheme = 0;
3715 DBUG_ASSERT(s.checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
3716 if (!s.is_valid())
3717 goto err;
3718 s.dont_set_created= null_created_arg;
3719 if (write_event(&s))
3720 goto err;
3721 bytes_written+= s.data_written;
3722
3723 if (encrypt_binlog)
3724 {
3725 uint key_version= encryption_key_get_latest_version(ENCRYPTION_KEY_SYSTEM_DATA);
3726 if (key_version == ENCRYPTION_KEY_VERSION_INVALID)
3727 {
3728 sql_print_error("Failed to enable encryption of binary logs");
3729 goto err;
3730 }
3731
3732 if (key_version != ENCRYPTION_KEY_NOT_ENCRYPTED)
3733 {
3734 if (my_random_bytes(crypto.nonce, sizeof(crypto.nonce)))
3735 goto err;
3736
3737 Start_encryption_log_event sele(1, key_version, crypto.nonce);
3738 sele.checksum_alg= s.checksum_alg;
3739 if (write_event(&sele))
3740 goto err;
3741
3742 // Start_encryption_log_event is written, enable the encryption
3743 if (crypto.init(sele.crypto_scheme, key_version))
3744 goto err;
3745 }
3746 }
3747
3748 if (!is_relay_log)
3749 {
3750 char buf[FN_REFLEN];
3751
3752 /*
3753 Output a Gtid_list_log_event at the start of the binlog file.
3754
3755 This is used to quickly determine which GTIDs are found in binlog
3756 files earlier than this one, and which are found in this (or later)
3757 binlogs.
3758
3759 The list gives a mapping from (domain_id, server_id) -> seq_no (so
3760 this means that there is at most one entry for every unique pair
3761 (domain_id, server_id) in the list). It indicates that this seq_no is
3762 the last one found in an earlier binlog file for this (domain_id,
3763 server_id) combination - so any higher seq_no should be search for
3764 from this binlog file, or a later one.
3765
3766 This allows to locate the binlog file containing a given GTID by
3767 scanning backwards, reading just the Gtid_list_log_event at the
3768 start of each file, and scanning only the relevant binlog file when
3769 found, not all binlog files.
3770
3771 The existence of a given entry (domain_id, server_id, seq_no)
3772 guarantees only that this seq_no will not be found in this or any
3773 later binlog file. It does not guarantee that it can be found it an
3774 earlier binlog file, for example the file may have been purged.
3775
3776 If there is no entry for a given (domain_id, server_id) pair, then
3777 it means that no such GTID exists in any earlier binlog. It is
3778 permissible to remove such pair from future Gtid_list_log_events
3779 if all previous binlog files containing such GTIDs have been purged
3780 (though such optimization is not performed at the time of this
3781 writing). So if there is no entry for given GTID it means that such
3782 GTID should be search for in this or later binlog file, same as if
3783 there had been an entry (domain_id, server_id, 0).
3784 */
3785
3786 Gtid_list_log_event gl_ev(&rpl_global_gtid_binlog_state, 0);
3787 if (write_event(&gl_ev))
3788 goto err;
3789
3790 /* Output a binlog checkpoint event at the start of the binlog file. */
3791
3792 /*
3793 Construct an entry in the binlog_xid_count_list for the new binlog
3794 file (we will not link it into the list until we know the new file
3795 is successfully created; otherwise we would have to remove it again
3796 if creation failed, which gets tricky since other threads may have
3797 seen the entry in the meantime - and we do not want to hold
3798 LOCK_xid_list for long periods of time).
3799
3800 Write the current binlog checkpoint into the log, so XA recovery will
3801 know from where to start recovery.
3802 */
3803 size_t off= dirname_length(log_file_name);
3804 uint len= static_cast<uint>(strlen(log_file_name) - off);
3805 new_xid_list_entry= new xid_count_per_binlog(log_file_name+off, len);
3806 if (!new_xid_list_entry)
3807 goto err;
3808
3809 /*
3810 Find the name for the Initial binlog checkpoint.
3811
3812 Normally this will just be the first entry, as we delete entries
3813 when their count drops to zero. But we scan the list to handle any
3814 corner case, eg. for the first binlog file opened after startup, the
3815 list will be empty.
3816 */
3817 mysql_mutex_lock(&LOCK_xid_list);
3818 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
3819 while ((b= it++) && b->xid_count == 0)
3820 ;
3821 mysql_mutex_unlock(&LOCK_xid_list);
3822 if (!b)
3823 b= new_xid_list_entry;
3824 if (b->binlog_name)
3825 strmake(buf, b->binlog_name, b->binlog_name_len);
3826 else
3827 goto err;
3828 Binlog_checkpoint_log_event ev(buf, len);
3829 DBUG_EXECUTE_IF("crash_before_write_checkpoint_event",
3830 flush_io_cache(&log_file);
3831 mysql_file_sync(log_file.file, MYF(MY_WME));
3832 DBUG_SUICIDE(););
3833 if (write_event(&ev))
3834 goto err;
3835 bytes_written+= ev.data_written;
3836 }
3837 }
3838 if (description_event_for_queue &&
3839 description_event_for_queue->binlog_version>=4)
3840 {
3841 /*
3842 This is a relay log written to by the I/O slave thread.
3843 Write the event so that others can later know the format of this relay
3844 log.
3845 Note that this event is very close to the original event from the
3846 master (it has binlog version of the master, event types of the
3847 master), so this is suitable to parse the next relay log's event. It
3848 has been produced by
3849 Format_description_log_event::Format_description_log_event(char* buf,).
3850 Why don't we want to write the description_event_for_queue if this
3851 event is for format<4 (3.23 or 4.x): this is because in that case, the
3852 description_event_for_queue describes the data received from the
3853 master, but not the data written to the relay log (*conversion*),
3854 which is in format 4 (slave's).
3855 */
3856 /*
3857 Set 'created' to 0, so that in next relay logs this event does not
3858 trigger cleaning actions on the slave in
3859 Format_description_log_event::apply_event_impl().
3860 */
3861 description_event_for_queue->created= 0;
3862 /* Don't set log_pos in event header */
3863 description_event_for_queue->set_artificial_event();
3864
3865 if (write_event(description_event_for_queue))
3866 goto err;
3867 bytes_written+= description_event_for_queue->data_written;
3868 }
3869 if (flush_io_cache(&log_file) ||
3870 mysql_file_sync(log_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))
3871 goto err;
3872
3873 my_off_t offset= my_b_tell(&log_file);
3874
3875 if (!is_relay_log)
3876 {
3877 /* update binlog_end_pos so that it can be read by after sync hook */
3878 reset_binlog_end_pos(log_file_name, offset);
3879
3880 mysql_mutex_lock(&LOCK_commit_ordered);
3881 strmake_buf(last_commit_pos_file, log_file_name);
3882 last_commit_pos_offset= offset;
3883 mysql_mutex_unlock(&LOCK_commit_ordered);
3884 }
3885
3886 if (write_file_name_to_index_file)
3887 {
3888 #ifdef HAVE_REPLICATION
3889 #ifdef ENABLED_DEBUG_SYNC
3890 if (current_thd)
3891 DEBUG_SYNC(current_thd, "binlog_open_before_update_index");
3892 #endif
3893 DBUG_EXECUTE_IF("crash_create_critical_before_update_index", DBUG_SUICIDE(););
3894 #endif
3895
3896 DBUG_ASSERT(my_b_inited(&index_file) != 0);
3897 reinit_io_cache(&index_file, WRITE_CACHE,
3898 my_b_filelength(&index_file), 0, 0);
3899 /*
3900 As this is a new log file, we write the file name to the index
3901 file. As every time we write to the index file, we sync it.
3902 */
3903 if (DBUG_EVALUATE_IF("fault_injection_updating_index", 1, 0) ||
3904 my_b_write(&index_file, (uchar*) log_file_name,
3905 strlen(log_file_name)) ||
3906 my_b_write(&index_file, (uchar*) "\n", 1) ||
3907 flush_io_cache(&index_file) ||
3908 mysql_file_sync(index_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))
3909 goto err;
3910
3911 #ifdef HAVE_REPLICATION
3912 DBUG_EXECUTE_IF("crash_create_after_update_index", DBUG_SUICIDE(););
3913 #endif
3914 }
3915 }
3916
3917 if (!is_relay_log)
3918 {
3919 /*
3920 Now the file was created successfully, so we can link in the entry for
3921 the new binlog file in binlog_xid_count_list.
3922 */
3923 mysql_mutex_lock(&LOCK_xid_list);
3924 ++current_binlog_id;
3925 new_xid_list_entry->binlog_id= current_binlog_id;
3926 /* Remove any initial entries with no pending XIDs. */
3927 while ((b= binlog_xid_count_list.head()) && b->xid_count == 0)
3928 {
3929 WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::open(): Removing xid_list_entry for "
3930 "%s (%lu)", b);
3931 delete binlog_xid_count_list.get();
3932 }
3933 mysql_cond_broadcast(&COND_xid_list);
3934 WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::open(): Adding new xid_list_entry for "
3935 "%s (%lu)", new_xid_list_entry);
3936 binlog_xid_count_list.push_back(new_xid_list_entry);
3937 mysql_mutex_unlock(&LOCK_xid_list);
3938
3939 /*
3940 Now that we have synced a new binlog file with an initial Gtid_list
3941 event, it is safe to delete the binlog state file. We will write out
3942 a new, updated file at shutdown, and if we crash before we can recover
3943 the state from the newly written binlog file.
3944
3945 Since the state file will contain out-of-date data as soon as the first
3946 new GTID is binlogged, it is better to remove it, to avoid any risk of
3947 accidentally reading incorrect data later.
3948 */
3949 if (!state_file_deleted)
3950 {
3951 char buf[FN_REFLEN];
3952 fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
3953 MY_UNPACK_FILENAME);
3954 my_delete(buf, MY_SYNC_DIR);
3955 state_file_deleted= true;
3956 }
3957 }
3958
3959 log_state= LOG_OPENED;
3960
3961 #ifdef HAVE_REPLICATION
3962 close_purge_index_file();
3963 #endif
3964
3965 /* Notify the io thread that binlog is rotated to a new file */
3966 if (is_relay_log)
3967 signal_relay_log_update();
3968 else
3969 update_binlog_end_pos();
3970 DBUG_RETURN(0);
3971
3972 err:
3973 int tmp_errno= errno;
3974 #ifdef HAVE_REPLICATION
3975 if (is_inited_purge_index_file())
3976 purge_index_entry(NULL, NULL, need_mutex);
3977 close_purge_index_file();
3978 #endif
3979 sql_print_error(fatal_log_error, (name) ? name : log_name, tmp_errno);
3980 if (new_xid_list_entry)
3981 delete new_xid_list_entry;
3982 if (file >= 0)
3983 mysql_file_close(file, MYF(0));
3984 close(LOG_CLOSE_INDEX);
3985 DBUG_RETURN(1);
3986 }
3987
3988
get_current_log(LOG_INFO * linfo)3989 int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo)
3990 {
3991 mysql_mutex_lock(&LOCK_log);
3992 int ret = raw_get_current_log(linfo);
3993 mysql_mutex_unlock(&LOCK_log);
3994 return ret;
3995 }
3996
raw_get_current_log(LOG_INFO * linfo)3997 int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
3998 {
3999 mysql_mutex_assert_owner(&LOCK_log);
4000 strmake_buf(linfo->log_file_name, log_file_name);
4001 linfo->pos = my_b_tell(&log_file);
4002 return 0;
4003 }
4004
4005 /**
4006 Move all data up in a file in an filename index file.
4007
4008 We do the copy outside of the IO_CACHE as the cache buffers would just
4009 make things slower and more complicated.
4010 In most cases the copy loop should only do one read.
4011
4012 @param index_file File to move
4013 @param offset Move everything from here to beginning
4014
4015 @note
4016 File will be truncated to be 'offset' shorter or filled up with newlines
4017
4018 @retval
4019 0 ok
4020 */
4021
4022 #ifdef HAVE_REPLICATION
4023
copy_up_file_and_fill(IO_CACHE * index_file,my_off_t offset)4024 static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset)
4025 {
4026 int bytes_read;
4027 my_off_t init_offset= offset;
4028 File file= index_file->file;
4029 uchar io_buf[IO_SIZE*2];
4030 DBUG_ENTER("copy_up_file_and_fill");
4031
4032 for (;; offset+= bytes_read)
4033 {
4034 mysql_file_seek(file, offset, MY_SEEK_SET, MYF(0));
4035 if ((bytes_read= (int) mysql_file_read(file, io_buf, sizeof(io_buf),
4036 MYF(MY_WME)))
4037 < 0)
4038 goto err;
4039 if (!bytes_read)
4040 break; // end of file
4041 mysql_file_seek(file, offset-init_offset, MY_SEEK_SET, MYF(0));
4042 if (mysql_file_write(file, io_buf, bytes_read,
4043 MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
4044 goto err;
4045 }
4046 /* The following will either truncate the file or fill the end with \n' */
4047 if (mysql_file_chsize(file, offset - init_offset, '\n', MYF(MY_WME)) ||
4048 mysql_file_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE)))
4049 goto err;
4050
4051 /* Reset data in old index cache */
4052 reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 1);
4053 DBUG_RETURN(0);
4054
4055 err:
4056 DBUG_RETURN(1);
4057 }
4058
4059 #endif /* HAVE_REPLICATION */
4060
4061 /**
4062 Find the position in the log-index-file for the given log name.
4063
4064 @param linfo Store here the found log file name and position to
4065 the NEXT log file name in the index file.
4066 @param log_name Filename to find in the index file.
4067 Is a null pointer if we want to read the first entry
4068 @param need_lock Set this to 1 if the parent doesn't already have a
4069 lock on LOCK_index
4070
4071 @note
4072 On systems without the truncate function the file will end with one or
4073 more empty lines. These will be ignored when reading the file.
4074
4075 @retval
4076 0 ok
4077 @retval
4078 LOG_INFO_EOF End of log-index-file found
4079 @retval
4080 LOG_INFO_IO Got IO error while reading file
4081 */
4082
find_log_pos(LOG_INFO * linfo,const char * log_name,bool need_lock)4083 int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
4084 bool need_lock)
4085 {
4086 int error= 0;
4087 char *full_fname= linfo->log_file_name;
4088 char full_log_name[FN_REFLEN], fname[FN_REFLEN];
4089 uint log_name_len= 0, fname_len= 0;
4090 DBUG_ENTER("find_log_pos");
4091 full_log_name[0]= full_fname[0]= 0;
4092
4093 /*
4094 Mutex needed because we need to make sure the file pointer does not
4095 move from under our feet
4096 */
4097 if (need_lock)
4098 mysql_mutex_lock(&LOCK_index);
4099 mysql_mutex_assert_owner(&LOCK_index);
4100
4101 // extend relative paths for log_name to be searched
4102 if (log_name)
4103 {
4104 if(normalize_binlog_name(full_log_name, log_name, is_relay_log))
4105 {
4106 error= LOG_INFO_EOF;
4107 goto end;
4108 }
4109 }
4110
4111 log_name_len= log_name ? (uint) strlen(full_log_name) : 0;
4112 DBUG_PRINT("enter", ("log_name: %s, full_log_name: %s",
4113 log_name ? log_name : "NULL", full_log_name));
4114
4115 /* As the file is flushed, we can't get an error here */
4116 (void) reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 0);
4117
4118 for (;;)
4119 {
4120 size_t length;
4121 my_off_t offset= my_b_tell(&index_file);
4122
4123 DBUG_EXECUTE_IF("simulate_find_log_pos_error",
4124 error= LOG_INFO_EOF; break;);
4125 /* If we get 0 or 1 characters, this is the end of the file */
4126 if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
4127 {
4128 /* Did not find the given entry; Return not found or error */
4129 error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
4130 break;
4131 }
4132 if (fname[length-1] != '\n')
4133 continue; // Not a log entry
4134 fname[length-1]= 0; // Remove end \n
4135
4136 // extend relative paths and match against full path
4137 if (normalize_binlog_name(full_fname, fname, is_relay_log))
4138 {
4139 error= LOG_INFO_EOF;
4140 break;
4141 }
4142 fname_len= (uint) strlen(full_fname);
4143
4144 // if the log entry matches, null string matching anything
4145 if (!log_name ||
4146 (log_name_len == fname_len &&
4147 !strncmp(full_fname, full_log_name, log_name_len)))
4148 {
4149 DBUG_PRINT("info", ("Found log file entry"));
4150 linfo->index_file_start_offset= offset;
4151 linfo->index_file_offset = my_b_tell(&index_file);
4152 break;
4153 }
4154 }
4155
4156 end:
4157 if (need_lock)
4158 mysql_mutex_unlock(&LOCK_index);
4159 DBUG_RETURN(error);
4160 }
4161
4162
4163 /**
4164 Find the position in the log-index-file for the given log name.
4165
4166 @param
4167 linfo Store here the next log file name and position to
4168 the file name after that.
4169 @param
4170 need_lock Set this to 1 if the parent doesn't already have a
4171 lock on LOCK_index
4172
4173 @note
4174 - Before calling this function, one has to call find_log_pos()
4175 to set up 'linfo'
4176 - Mutex needed because we need to make sure the file pointer does not move
4177 from under our feet
4178
4179 @retval
4180 0 ok
4181 @retval
4182 LOG_INFO_EOF End of log-index-file found
4183 @retval
4184 LOG_INFO_IO Got IO error while reading file
4185 */
4186
find_next_log(LOG_INFO * linfo,bool need_lock)4187 int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock)
4188 {
4189 int error= 0;
4190 size_t length;
4191 char fname[FN_REFLEN];
4192 char *full_fname= linfo->log_file_name;
4193
4194 if (need_lock)
4195 mysql_mutex_lock(&LOCK_index);
4196 mysql_mutex_assert_owner(&LOCK_index);
4197
4198 /* As the file is flushed, we can't get an error here */
4199 (void) reinit_io_cache(&index_file, READ_CACHE, linfo->index_file_offset, 0,
4200 0);
4201
4202 linfo->index_file_start_offset= linfo->index_file_offset;
4203 if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
4204 {
4205 error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
4206 goto err;
4207 }
4208
4209 if (fname[0] != 0)
4210 {
4211 if(normalize_binlog_name(full_fname, fname, is_relay_log))
4212 {
4213 error= LOG_INFO_EOF;
4214 goto err;
4215 }
4216 length= strlen(full_fname);
4217 }
4218
4219 full_fname[length-1]= 0; // kill \n
4220 linfo->index_file_offset= my_b_tell(&index_file);
4221
4222 err:
4223 if (need_lock)
4224 mysql_mutex_unlock(&LOCK_index);
4225 return error;
4226 }
4227
4228
4229 /**
4230 Delete all logs referred to in the index file.
4231
4232 The new index file will only contain this file.
4233
4234 @param thd Thread id. This can be zero in case of resetting
4235 relay logs
4236 @param create_new_log 1 if we should start writing to a new log file
4237 @param next_log_number min number of next log file to use, if possible.
4238
4239 @note
4240 If not called from slave thread, write start event to new log
4241
4242 @retval
4243 0 ok
4244 @retval
4245 1 error
4246 */
4247
reset_logs(THD * thd,bool create_new_log,rpl_gtid * init_state,uint32 init_state_len,ulong next_log_number)4248 bool MYSQL_BIN_LOG::reset_logs(THD *thd, bool create_new_log,
4249 rpl_gtid *init_state, uint32 init_state_len,
4250 ulong next_log_number)
4251 {
4252 LOG_INFO linfo;
4253 bool error=0;
4254 int err;
4255 const char* save_name;
4256 DBUG_ENTER("reset_logs");
4257
4258 if (!is_relay_log)
4259 {
4260 if (init_state && !is_empty_state())
4261 {
4262 my_error(ER_BINLOG_MUST_BE_EMPTY, MYF(0));
4263 DBUG_RETURN(1);
4264 }
4265
4266 /*
4267 Mark that a RESET MASTER is in progress.
4268 This ensures that a binlog checkpoint will not try to write binlog
4269 checkpoint events, which would be useless (as we are deleting the binlog
4270 anyway) and could deadlock, as we are holding LOCK_log.
4271
4272 Wait for any mark_xid_done() calls that might be already running to
4273 complete (mark_xid_done_waiting counter to drop to zero); we need to
4274 do this before we take the LOCK_log to not deadlock.
4275 */
4276 mysql_mutex_lock(&LOCK_xid_list);
4277 reset_master_pending++;
4278 while (mark_xid_done_waiting > 0)
4279 mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4280 mysql_mutex_unlock(&LOCK_xid_list);
4281 }
4282
4283 DEBUG_SYNC_C_IF_THD(thd, "reset_logs_after_set_reset_master_pending");
4284 /*
4285 We need to get both locks to be sure that no one is trying to
4286 write to the index log file.
4287 */
4288 mysql_mutex_lock(&LOCK_log);
4289 mysql_mutex_lock(&LOCK_index);
4290
4291 if (!is_relay_log)
4292 {
4293 /*
4294 We are going to nuke all binary log files.
4295 Without binlog, we cannot XA recover prepared-but-not-committed
4296 transactions in engines. So force a commit checkpoint first.
4297
4298 Note that we take and immediately
4299 release LOCK_after_binlog_sync/LOCK_commit_ordered. This has
4300 the effect to ensure that any on-going group commit (in
4301 trx_group_commit_leader()) has completed before we request the checkpoint,
4302 due to the chaining of LOCK_log and LOCK_commit_ordered in that function.
4303 (We are holding LOCK_log, so no new group commit can start).
4304
4305 Without this, it is possible (though perhaps unlikely) that the RESET
4306 MASTER could run in-between the write to the binlog and the
4307 commit_ordered() in the engine of some transaction, and then a crash
4308 later would leave such transaction not recoverable.
4309 */
4310
4311 mysql_mutex_lock(&LOCK_after_binlog_sync);
4312 mysql_mutex_lock(&LOCK_commit_ordered);
4313 mysql_mutex_unlock(&LOCK_after_binlog_sync);
4314 mysql_mutex_unlock(&LOCK_commit_ordered);
4315
4316 mark_xids_active(current_binlog_id, 1);
4317 do_checkpoint_request(current_binlog_id);
4318
4319 /* Now wait for all checkpoint requests and pending unlog() to complete. */
4320 mysql_mutex_lock(&LOCK_xid_list);
4321 for (;;)
4322 {
4323 if (is_xidlist_idle_nolock())
4324 break;
4325 /*
4326 Wait until signalled that one more binlog dropped to zero, then check
4327 again.
4328 */
4329 mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4330 }
4331
4332 /*
4333 Now all XIDs are fully flushed to disk, and we are holding LOCK_log so
4334 no new ones will be written. So we can proceed to delete the logs.
4335 */
4336 mysql_mutex_unlock(&LOCK_xid_list);
4337 }
4338
4339 /* Save variables so that we can reopen the log */
4340 save_name=name;
4341 name=0; // Protect against free
4342 close(LOG_CLOSE_TO_BE_OPENED);
4343
4344 last_used_log_number= 0; // Reset log number cache
4345
4346 /*
4347 First delete all old log files and then update the index file.
4348 As we first delete the log files and do not use sort of logging,
4349 a crash may lead to an inconsistent state where the index has
4350 references to non-existent files.
4351
4352 We need to invert the steps and use the purge_index_file methods
4353 in order to make the operation safe.
4354 */
4355
4356 if ((err= find_log_pos(&linfo, NullS, 0)) != 0)
4357 {
4358 uint errcode= purge_log_get_error_code(err);
4359 sql_print_error("Failed to locate old binlog or relay log files");
4360 my_message(errcode, ER_THD_OR_DEFAULT(thd, errcode), MYF(0));
4361 error= 1;
4362 goto err;
4363 }
4364
4365 for (;;)
4366 {
4367 if (unlikely((error= my_delete(linfo.log_file_name, MYF(0)))))
4368 {
4369 if (my_errno == ENOENT)
4370 {
4371 if (thd)
4372 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4373 ER_LOG_PURGE_NO_FILE,
4374 ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4375 linfo.log_file_name);
4376
4377 sql_print_information("Failed to delete file '%s'",
4378 linfo.log_file_name);
4379 my_errno= 0;
4380 error= 0;
4381 }
4382 else
4383 {
4384 if (thd)
4385 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4386 ER_BINLOG_PURGE_FATAL_ERR,
4387 "a problem with deleting %s; "
4388 "consider examining correspondence "
4389 "of your binlog index file "
4390 "to the actual binlog files",
4391 linfo.log_file_name);
4392 error= 1;
4393 goto err;
4394 }
4395 }
4396 if (find_next_log(&linfo, 0))
4397 break;
4398 }
4399
4400 if (!is_relay_log)
4401 {
4402 if (init_state)
4403 rpl_global_gtid_binlog_state.load(init_state, init_state_len);
4404 else
4405 rpl_global_gtid_binlog_state.reset();
4406 }
4407
4408 /* Start logging with a new file */
4409 close(LOG_CLOSE_INDEX | LOG_CLOSE_TO_BE_OPENED);
4410 // Reset (open will update)
4411 if (unlikely((error= my_delete(index_file_name, MYF(0)))))
4412 {
4413 if (my_errno == ENOENT)
4414 {
4415 if (thd)
4416 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4417 ER_LOG_PURGE_NO_FILE,
4418 ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4419 index_file_name);
4420 sql_print_information("Failed to delete file '%s'",
4421 index_file_name);
4422 my_errno= 0;
4423 error= 0;
4424 }
4425 else
4426 {
4427 if (thd)
4428 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4429 ER_BINLOG_PURGE_FATAL_ERR,
4430 "a problem with deleting %s; "
4431 "consider examining correspondence "
4432 "of your binlog index file "
4433 "to the actual binlog files",
4434 index_file_name);
4435 error= 1;
4436 goto err;
4437 }
4438 }
4439 if (create_new_log && !open_index_file(index_file_name, 0, FALSE))
4440 if (unlikely((error= open(save_name, 0, next_log_number,
4441 io_cache_type, max_size, 0, FALSE))))
4442 goto err;
4443 my_free((void *) save_name);
4444
4445 err:
4446 if (error == 1)
4447 name= const_cast<char*>(save_name);
4448
4449 if (!is_relay_log)
4450 {
4451 xid_count_per_binlog *b;
4452 /*
4453 Remove all entries in the xid_count list except the last.
4454 Normally we will just be deleting all the entries that we waited for to
4455 drop to zero above. But if we fail during RESET MASTER for some reason
4456 then we will not have created any new log file, and we may keep the last
4457 of the old entries.
4458 */
4459 mysql_mutex_lock(&LOCK_xid_list);
4460 for (;;)
4461 {
4462 b= binlog_xid_count_list.head();
4463 DBUG_ASSERT(b /* List can never become empty. */);
4464 if (b->binlog_id == current_binlog_id)
4465 break;
4466 DBUG_ASSERT(b->xid_count == 0);
4467 WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::reset_logs(): Removing "
4468 "xid_list_entry for %s (%lu)", b);
4469 delete binlog_xid_count_list.get();
4470 }
4471 mysql_cond_broadcast(&COND_xid_list);
4472 reset_master_pending--;
4473 reset_master_count++;
4474 mysql_mutex_unlock(&LOCK_xid_list);
4475 }
4476
4477 mysql_mutex_unlock(&LOCK_index);
4478 mysql_mutex_unlock(&LOCK_log);
4479 DBUG_RETURN(error);
4480 }
4481
4482
wait_for_last_checkpoint_event()4483 void MYSQL_BIN_LOG::wait_for_last_checkpoint_event()
4484 {
4485 mysql_mutex_lock(&LOCK_xid_list);
4486 for (;;)
4487 {
4488 if (binlog_xid_count_list.is_last(binlog_xid_count_list.head()))
4489 break;
4490 mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4491 }
4492 mysql_mutex_unlock(&LOCK_xid_list);
4493
4494 /*
4495 LOCK_xid_list and LOCK_log are chained, so the LOCK_log will only be
4496 obtained after mark_xid_done() has written the last checkpoint event.
4497 */
4498 mysql_mutex_lock(&LOCK_log);
4499 mysql_mutex_unlock(&LOCK_log);
4500 }
4501
4502
4503 /**
4504 Delete relay log files prior to rli->group_relay_log_name
4505 (i.e. all logs which are not involved in a non-finished group
4506 (transaction)), remove them from the index file and start on next
4507 relay log.
4508
4509 IMPLEMENTATION
4510
4511 - You must hold rli->data_lock before calling this function, since
4512 it writes group_relay_log_pos and similar fields of
4513 Relay_log_info.
4514 - Protects index file with LOCK_index
4515 - Delete relevant relay log files
4516 - Copy all file names after these ones to the front of the index file
4517 - If the OS has truncate, truncate the file, else fill it with \n'
4518 - Read the next file name from the index file and store in rli->linfo
4519
4520 @param rli Relay log information
4521 @param included If false, all relay logs that are strictly before
4522 rli->group_relay_log_name are deleted ; if true, the
4523 latter is deleted too (i.e. all relay logs
4524 read by the SQL slave thread are deleted).
4525
4526 @note
4527 - This is only called from the slave SQL thread when it has read
4528 all commands from a relay log and want to switch to a new relay log.
4529 - When this happens, we can be in an active transaction as
4530 a transaction can span over two relay logs
4531 (although it is always written as a single block to the master's binary
4532 log, hence cannot span over two master's binary logs).
4533
4534 @retval
4535 0 ok
4536 @retval
4537 LOG_INFO_EOF End of log-index-file found
4538 @retval
4539 LOG_INFO_SEEK Could not allocate IO cache
4540 @retval
4541 LOG_INFO_IO Got IO error while reading file
4542 */
4543
4544 #ifdef HAVE_REPLICATION
4545
purge_first_log(Relay_log_info * rli,bool included)4546 int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
4547 {
4548 int error, errcode;
4549 char *to_purge_if_included= NULL;
4550 inuse_relaylog *ir;
4551 ulonglong log_space_reclaimed= 0;
4552 DBUG_ENTER("purge_first_log");
4553
4554 DBUG_ASSERT(is_open());
4555 DBUG_ASSERT(rli->slave_running == MYSQL_SLAVE_RUN_NOT_CONNECT);
4556 DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
4557
4558 mysql_mutex_assert_owner(&rli->data_lock);
4559
4560 mysql_mutex_lock(&LOCK_index);
4561
4562 ir= rli->inuse_relaylog_list;
4563 while (ir)
4564 {
4565 inuse_relaylog *next= ir->next;
4566 if (!ir->completed || ir->dequeued_count < ir->queued_count)
4567 {
4568 included= false;
4569 break;
4570 }
4571 if (!included && !strcmp(ir->name, rli->group_relay_log_name))
4572 break;
4573 if (!next)
4574 {
4575 rli->last_inuse_relaylog= NULL;
4576 included= 1;
4577 to_purge_if_included= my_strdup(key_memory_Relay_log_info_group_relay_log_name,
4578 ir->name, MYF(0));
4579 }
4580 rli->free_inuse_relaylog(ir);
4581 ir= next;
4582 }
4583 rli->inuse_relaylog_list= ir;
4584 if (ir)
4585 to_purge_if_included= my_strdup(key_memory_Relay_log_info_group_relay_log_name,
4586 ir->name, MYF(0));
4587
4588 /*
4589 Read the next log file name from the index file and pass it back to
4590 the caller.
4591 */
4592 if (unlikely((error=find_log_pos(&rli->linfo, rli->event_relay_log_name,
4593 0))) ||
4594 unlikely((error=find_next_log(&rli->linfo, 0))))
4595 {
4596 sql_print_error("next log error: %d offset: %llu log: %s included: %d",
4597 error, rli->linfo.index_file_offset,
4598 rli->event_relay_log_name, included);
4599 goto err;
4600 }
4601
4602 /*
4603 Reset rli's coordinates to the current log.
4604 */
4605 rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
4606 strmake_buf(rli->event_relay_log_name,rli->linfo.log_file_name);
4607
4608 /*
4609 If we removed the rli->group_relay_log_name file,
4610 we must update the rli->group* coordinates, otherwise do not touch it as the
4611 group's execution is not finished (e.g. COMMIT not executed)
4612 */
4613 if (included)
4614 {
4615 rli->group_relay_log_pos = BIN_LOG_HEADER_SIZE;
4616 strmake_buf(rli->group_relay_log_name,rli->linfo.log_file_name);
4617 rli->notify_group_relay_log_name_update();
4618 }
4619
4620 /* Store where we are in the new file for the execution thread */
4621 if (rli->flush())
4622 error= LOG_INFO_IO;
4623
4624 DBUG_EXECUTE_IF("crash_before_purge_logs", DBUG_SUICIDE(););
4625
4626 rli->relay_log.purge_logs(to_purge_if_included, included,
4627 0, 0, &log_space_reclaimed);
4628
4629 mysql_mutex_lock(&rli->log_space_lock);
4630 rli->log_space_total-= log_space_reclaimed;
4631 mysql_cond_broadcast(&rli->log_space_cond);
4632 mysql_mutex_unlock(&rli->log_space_lock);
4633
4634 /*
4635 * Need to update the log pos because purge logs has been called
4636 * after fetching initially the log pos at the beginning of the method.
4637 */
4638 if ((errcode= find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)))
4639 {
4640 sql_print_error("next log error: %d offset: %llu log: %s included: %d",
4641 errcode, rli->linfo.index_file_offset,
4642 rli->group_relay_log_name, included);
4643 goto err;
4644 }
4645
4646 /* If included was passed, rli->linfo should be the first entry. */
4647 DBUG_ASSERT(!included || rli->linfo.index_file_start_offset == 0);
4648
4649 err:
4650 my_free(to_purge_if_included);
4651 mysql_mutex_unlock(&LOCK_index);
4652 DBUG_RETURN(error);
4653 }
4654
4655 /**
4656 Update log index_file.
4657 */
4658
update_log_index(LOG_INFO * log_info,bool need_update_threads)4659 int MYSQL_BIN_LOG::update_log_index(LOG_INFO* log_info, bool need_update_threads)
4660 {
4661 if (copy_up_file_and_fill(&index_file, log_info->index_file_start_offset))
4662 return LOG_INFO_IO;
4663
4664 // now update offsets in index file for running threads
4665 if (need_update_threads)
4666 adjust_linfo_offsets(log_info->index_file_start_offset);
4667 return 0;
4668 }
4669
4670 /**
4671 Remove all logs before the given log from disk and from the index file.
4672
4673 @param to_log Delete all log file name before this file.
4674 @param included If true, to_log is deleted too.
4675 @param need_mutex
4676 @param need_update_threads If we want to update the log coordinates of
4677 all threads. False for relay logs, true otherwise.
4678 @param reclaimeed_log_space If not null, increment this variable to
4679 the amount of log space freed
4680
4681 @note
4682 If any of the logs before the deleted one is in use,
4683 only purge logs up to this one.
4684
4685 @retval
4686 0 ok
4687 @retval
4688 LOG_INFO_EOF to_log not found
4689 LOG_INFO_EMFILE too many files opened
4690 LOG_INFO_FATAL if any other than ENOENT error from
4691 mysql_file_stat() or mysql_file_delete()
4692 */
4693
purge_logs(const char * to_log,bool included,bool need_mutex,bool need_update_threads,ulonglong * reclaimed_space)4694 int MYSQL_BIN_LOG::purge_logs(const char *to_log,
4695 bool included,
4696 bool need_mutex,
4697 bool need_update_threads,
4698 ulonglong *reclaimed_space)
4699 {
4700 int error= 0;
4701 bool exit_loop= 0;
4702 LOG_INFO log_info;
4703 THD *thd= current_thd;
4704 DBUG_ENTER("purge_logs");
4705 DBUG_PRINT("info",("to_log= %s",to_log));
4706
4707 if (need_mutex)
4708 mysql_mutex_lock(&LOCK_index);
4709 if (unlikely((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/))) )
4710 {
4711 sql_print_error("MYSQL_BIN_LOG::purge_logs was called with file %s not "
4712 "listed in the index.", to_log);
4713 goto err;
4714 }
4715
4716 if (unlikely((error= open_purge_index_file(TRUE))))
4717 {
4718 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to sync the index file.");
4719 goto err;
4720 }
4721
4722 /*
4723 File name exists in index file; delete until we find this file
4724 or a file that is used.
4725 */
4726 if (unlikely((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/))))
4727 goto err;
4728 while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
4729 can_purge_log(log_info.log_file_name))
4730 {
4731 if (unlikely((error= register_purge_index_entry(log_info.log_file_name))))
4732 {
4733 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to copy %s to register file.",
4734 log_info.log_file_name);
4735 goto err;
4736 }
4737
4738 if (find_next_log(&log_info, 0) || exit_loop)
4739 break;
4740 }
4741
4742 DBUG_EXECUTE_IF("crash_purge_before_update_index", DBUG_SUICIDE(););
4743
4744 if (unlikely((error= sync_purge_index_file())))
4745 {
4746 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to flush register file.");
4747 goto err;
4748 }
4749
4750 /* We know how many files to delete. Update index file. */
4751 if (unlikely((error=update_log_index(&log_info, need_update_threads))))
4752 {
4753 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to update the index file");
4754 goto err;
4755 }
4756
4757 DBUG_EXECUTE_IF("crash_purge_critical_after_update_index", DBUG_SUICIDE(););
4758
4759 err:
4760 /* Read each entry from purge_index_file and delete the file. */
4761 if (is_inited_purge_index_file() &&
4762 (error= purge_index_entry(thd, reclaimed_space, FALSE)))
4763 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to process registered files"
4764 " that would be purged.");
4765 close_purge_index_file();
4766
4767 DBUG_EXECUTE_IF("crash_purge_non_critical_after_update_index", DBUG_SUICIDE(););
4768
4769 if (need_mutex)
4770 mysql_mutex_unlock(&LOCK_index);
4771 DBUG_RETURN(error);
4772 }
4773
set_purge_index_file_name(const char * base_file_name)4774 int MYSQL_BIN_LOG::set_purge_index_file_name(const char *base_file_name)
4775 {
4776 int error= 0;
4777 DBUG_ENTER("MYSQL_BIN_LOG::set_purge_index_file_name");
4778 if (fn_format(purge_index_file_name, base_file_name, mysql_data_home,
4779 ".~rec~", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
4780 MY_REPLACE_EXT)) == NULL)
4781 {
4782 error= 1;
4783 sql_print_error("MYSQL_BIN_LOG::set_purge_index_file_name failed to set "
4784 "file name.");
4785 }
4786 DBUG_RETURN(error);
4787 }
4788
open_purge_index_file(bool destroy)4789 int MYSQL_BIN_LOG::open_purge_index_file(bool destroy)
4790 {
4791 int error= 0;
4792 File file= -1;
4793
4794 DBUG_ENTER("MYSQL_BIN_LOG::open_purge_index_file");
4795
4796 if (destroy)
4797 close_purge_index_file();
4798
4799 if (!my_b_inited(&purge_index_file))
4800 {
4801 if ((file= my_open(purge_index_file_name, O_RDWR | O_CREAT | O_BINARY,
4802 MYF(MY_WME))) < 0 ||
4803 init_io_cache(&purge_index_file, file, IO_SIZE,
4804 (destroy ? WRITE_CACHE : READ_CACHE),
4805 0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
4806 {
4807 error= 1;
4808 sql_print_error("MYSQL_BIN_LOG::open_purge_index_file failed to open register "
4809 " file.");
4810 }
4811 }
4812 DBUG_RETURN(error);
4813 }
4814
close_purge_index_file()4815 int MYSQL_BIN_LOG::close_purge_index_file()
4816 {
4817 int error= 0;
4818
4819 DBUG_ENTER("MYSQL_BIN_LOG::close_purge_index_file");
4820
4821 if (my_b_inited(&purge_index_file))
4822 {
4823 end_io_cache(&purge_index_file);
4824 error= my_close(purge_index_file.file, MYF(0));
4825 }
4826 my_delete(purge_index_file_name, MYF(0));
4827 bzero((char*) &purge_index_file, sizeof(purge_index_file));
4828
4829 DBUG_RETURN(error);
4830 }
4831
is_inited_purge_index_file()4832 bool MYSQL_BIN_LOG::is_inited_purge_index_file()
4833 {
4834 return my_b_inited(&purge_index_file);
4835 }
4836
sync_purge_index_file()4837 int MYSQL_BIN_LOG::sync_purge_index_file()
4838 {
4839 int error= 0;
4840 DBUG_ENTER("MYSQL_BIN_LOG::sync_purge_index_file");
4841
4842 if (unlikely((error= flush_io_cache(&purge_index_file))) ||
4843 unlikely((error= my_sync(purge_index_file.file,
4844 MYF(MY_WME | MY_SYNC_FILESIZE)))))
4845 DBUG_RETURN(error);
4846
4847 DBUG_RETURN(error);
4848 }
4849
register_purge_index_entry(const char * entry)4850 int MYSQL_BIN_LOG::register_purge_index_entry(const char *entry)
4851 {
4852 int error= 0;
4853 DBUG_ENTER("MYSQL_BIN_LOG::register_purge_index_entry");
4854
4855 if (unlikely((error=my_b_write(&purge_index_file, (const uchar*)entry,
4856 strlen(entry)))) ||
4857 unlikely((error=my_b_write(&purge_index_file, (const uchar*)"\n", 1))))
4858 DBUG_RETURN (error);
4859
4860 DBUG_RETURN(error);
4861 }
4862
register_create_index_entry(const char * entry)4863 int MYSQL_BIN_LOG::register_create_index_entry(const char *entry)
4864 {
4865 DBUG_ENTER("MYSQL_BIN_LOG::register_create_index_entry");
4866 DBUG_RETURN(register_purge_index_entry(entry));
4867 }
4868
purge_index_entry(THD * thd,ulonglong * reclaimed_space,bool need_mutex)4869 int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *reclaimed_space,
4870 bool need_mutex)
4871 {
4872 DBUG_ENTER("MYSQL_BIN_LOG:purge_index_entry");
4873 MY_STAT s;
4874 int error= 0;
4875 LOG_INFO log_info;
4876 LOG_INFO check_log_info;
4877
4878 DBUG_ASSERT(my_b_inited(&purge_index_file));
4879
4880 if (unlikely((error= reinit_io_cache(&purge_index_file, READ_CACHE, 0, 0,
4881 0))))
4882 {
4883 sql_print_error("MYSQL_BIN_LOG::purge_index_entry failed to reinit register file "
4884 "for read");
4885 goto err;
4886 }
4887
4888 for (;;)
4889 {
4890 size_t length;
4891
4892 if ((length=my_b_gets(&purge_index_file, log_info.log_file_name,
4893 FN_REFLEN)) <= 1)
4894 {
4895 if (purge_index_file.error)
4896 {
4897 error= purge_index_file.error;
4898 sql_print_error("MYSQL_BIN_LOG::purge_index_entry error %d reading from "
4899 "register file.", error);
4900 goto err;
4901 }
4902
4903 /* Reached EOF */
4904 break;
4905 }
4906
4907 /* Get rid of the trailing '\n' */
4908 log_info.log_file_name[length-1]= 0;
4909
4910 if (unlikely(!mysql_file_stat(m_key_file_log, log_info.log_file_name, &s,
4911 MYF(0))))
4912 {
4913 if (my_errno == ENOENT)
4914 {
4915 /*
4916 It's not fatal if we can't stat a log file that does not exist;
4917 If we could not stat, we won't delete.
4918 */
4919 if (thd)
4920 {
4921 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4922 ER_LOG_PURGE_NO_FILE, ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4923 log_info.log_file_name);
4924 }
4925 sql_print_information("Failed to execute mysql_file_stat on file '%s'",
4926 log_info.log_file_name);
4927 my_errno= 0;
4928 }
4929 else
4930 {
4931 /*
4932 Other than ENOENT are fatal
4933 */
4934 if (thd)
4935 {
4936 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4937 ER_BINLOG_PURGE_FATAL_ERR,
4938 "a problem with getting info on being purged %s; "
4939 "consider examining correspondence "
4940 "of your binlog index file "
4941 "to the actual binlog files",
4942 log_info.log_file_name);
4943 }
4944 else
4945 {
4946 sql_print_information("Failed to delete log file '%s'; "
4947 "consider examining correspondence "
4948 "of your binlog index file "
4949 "to the actual binlog files",
4950 log_info.log_file_name);
4951 }
4952 error= LOG_INFO_FATAL;
4953 goto err;
4954 }
4955 }
4956 else
4957 {
4958 if (unlikely((error= find_log_pos(&check_log_info,
4959 log_info.log_file_name, need_mutex))))
4960 {
4961 if (error != LOG_INFO_EOF)
4962 {
4963 if (thd)
4964 {
4965 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4966 ER_BINLOG_PURGE_FATAL_ERR,
4967 "a problem with deleting %s and "
4968 "reading the binlog index file",
4969 log_info.log_file_name);
4970 }
4971 else
4972 {
4973 sql_print_information("Failed to delete file '%s' and "
4974 "read the binlog index file",
4975 log_info.log_file_name);
4976 }
4977 goto err;
4978 }
4979
4980 error= 0;
4981
4982 DBUG_PRINT("info",("purging %s",log_info.log_file_name));
4983 if (!my_delete(log_info.log_file_name, MYF(0)))
4984 {
4985 if (reclaimed_space)
4986 *reclaimed_space+= s.st_size;
4987 }
4988 else
4989 {
4990 if (my_errno == ENOENT)
4991 {
4992 if (thd)
4993 {
4994 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4995 ER_LOG_PURGE_NO_FILE, ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4996 log_info.log_file_name);
4997 }
4998 sql_print_information("Failed to delete file '%s'",
4999 log_info.log_file_name);
5000 my_errno= 0;
5001 }
5002 else
5003 {
5004 if (thd)
5005 {
5006 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
5007 ER_BINLOG_PURGE_FATAL_ERR,
5008 "a problem with deleting %s; "
5009 "consider examining correspondence "
5010 "of your binlog index file "
5011 "to the actual binlog files",
5012 log_info.log_file_name);
5013 }
5014 else
5015 {
5016 sql_print_information("Failed to delete file '%s'; "
5017 "consider examining correspondence "
5018 "of your binlog index file "
5019 "to the actual binlog files",
5020 log_info.log_file_name);
5021 }
5022 if (my_errno == EMFILE)
5023 {
5024 DBUG_PRINT("info",
5025 ("my_errno: %d, set ret = LOG_INFO_EMFILE", my_errno));
5026 error= LOG_INFO_EMFILE;
5027 goto err;
5028 }
5029 error= LOG_INFO_FATAL;
5030 goto err;
5031 }
5032 }
5033 }
5034 }
5035 }
5036
5037 err:
5038 DBUG_RETURN(error);
5039 }
5040
5041 /**
5042 Remove all logs before the given file date from disk and from the
5043 index file.
5044
5045 @param thd Thread pointer
5046 @param purge_time Delete all log files before given date.
5047
5048 @note
5049 If any of the logs before the deleted one is in use,
5050 only purge logs up to this one.
5051
5052 @retval
5053 0 ok
5054 @retval
5055 LOG_INFO_PURGE_NO_ROTATE Binary file that can't be rotated
5056 LOG_INFO_FATAL if any other than ENOENT error from
5057 mysql_file_stat() or mysql_file_delete()
5058 */
5059
purge_logs_before_date(time_t purge_time)5060 int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
5061 {
5062 int error;
5063 char to_log[FN_REFLEN];
5064 LOG_INFO log_info;
5065 MY_STAT stat_area;
5066 THD *thd= current_thd;
5067 DBUG_ENTER("purge_logs_before_date");
5068
5069 mysql_mutex_lock(&LOCK_index);
5070 to_log[0]= 0;
5071
5072 if (unlikely((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/))))
5073 goto err;
5074
5075 while (strcmp(log_file_name, log_info.log_file_name) &&
5076 can_purge_log(log_info.log_file_name))
5077 {
5078 if (!mysql_file_stat(m_key_file_log,
5079 log_info.log_file_name, &stat_area, MYF(0)))
5080 {
5081 if (my_errno == ENOENT)
5082 {
5083 /*
5084 It's not fatal if we can't stat a log file that does not exist.
5085 */
5086 my_errno= 0;
5087 }
5088 else
5089 {
5090 /*
5091 Other than ENOENT are fatal
5092 */
5093 if (thd)
5094 {
5095 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
5096 ER_BINLOG_PURGE_FATAL_ERR,
5097 "a problem with getting info on being purged %s; "
5098 "consider examining correspondence "
5099 "of your binlog index file "
5100 "to the actual binlog files",
5101 log_info.log_file_name);
5102 }
5103 else
5104 {
5105 sql_print_information("Failed to delete log file '%s'",
5106 log_info.log_file_name);
5107 }
5108 error= LOG_INFO_FATAL;
5109 goto err;
5110 }
5111 }
5112 else
5113 {
5114 if (stat_area.st_mtime < purge_time)
5115 strmake_buf(to_log, log_info.log_file_name);
5116 else
5117 break;
5118 }
5119 if (find_next_log(&log_info, 0))
5120 break;
5121 }
5122
5123 error= (to_log[0] ? purge_logs(to_log, 1, 0, 1, (ulonglong *) 0) : 0);
5124
5125 err:
5126 mysql_mutex_unlock(&LOCK_index);
5127 DBUG_RETURN(error);
5128 }
5129
5130
5131 bool
can_purge_log(const char * log_file_name_arg)5132 MYSQL_BIN_LOG::can_purge_log(const char *log_file_name_arg)
5133 {
5134 xid_count_per_binlog *b;
5135
5136 if (is_active(log_file_name_arg))
5137 return false;
5138 mysql_mutex_lock(&LOCK_xid_list);
5139 {
5140 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
5141 while ((b= it++) &&
5142 0 != strncmp(log_file_name_arg+dirname_length(log_file_name_arg),
5143 b->binlog_name, b->binlog_name_len))
5144 ;
5145 }
5146 mysql_mutex_unlock(&LOCK_xid_list);
5147 if (b)
5148 return false;
5149 return !log_in_use(log_file_name_arg);
5150 }
5151 #endif /* HAVE_REPLICATION */
5152
5153
5154 bool
is_xidlist_idle()5155 MYSQL_BIN_LOG::is_xidlist_idle()
5156 {
5157 bool res;
5158 mysql_mutex_lock(&LOCK_xid_list);
5159 res= is_xidlist_idle_nolock();
5160 mysql_mutex_unlock(&LOCK_xid_list);
5161 return res;
5162 }
5163
5164
5165 bool
is_xidlist_idle_nolock()5166 MYSQL_BIN_LOG::is_xidlist_idle_nolock()
5167 {
5168 xid_count_per_binlog *b;
5169
5170 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
5171 while ((b= it++))
5172 {
5173 if (b->xid_count > 0)
5174 return false;
5175 }
5176 return true;
5177 }
5178
5179 /**
5180 Create a new log file name.
5181
5182 @param buf buf of at least FN_REFLEN where new name is stored
5183
5184 @note
5185 If file name will be longer then FN_REFLEN it will be truncated
5186 */
5187
make_log_name(char * buf,const char * log_ident)5188 void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
5189 {
5190 size_t dir_len = dirname_length(log_file_name);
5191 if (dir_len >= FN_REFLEN)
5192 dir_len=FN_REFLEN-1;
5193 strnmov(buf, log_file_name, dir_len);
5194 strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
5195 }
5196
5197
5198 /**
5199 Check if we are writing/reading to the given log file.
5200 */
5201
is_active(const char * log_file_name_arg)5202 bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
5203 {
5204 /**
5205 * there should/must be mysql_mutex_assert_owner(&LOCK_log) here...
5206 * but code violates this! (scary monsters and super creeps!)
5207 *
5208 * example stacktrace:
5209 * #8 MYSQL_BIN_LOG::is_active
5210 * #9 MYSQL_BIN_LOG::can_purge_log
5211 * #10 MYSQL_BIN_LOG::purge_logs
5212 * #11 MYSQL_BIN_LOG::purge_first_log
5213 * #12 next_event
5214 * #13 exec_relay_log_event
5215 *
5216 * I didn't investigate if this is ligit...(i.e if my comment is wrong)
5217 */
5218 return !strcmp(log_file_name, log_file_name_arg);
5219 }
5220
5221
5222 /*
5223 Wrappers around new_file_impl to avoid using argument
5224 to control locking. The argument 1) less readable 2) breaks
5225 incapsulation 3) allows external access to the class without
5226 a lock (which is not possible with private new_file_without_locking
5227 method).
5228
5229 @retval
5230 nonzero - error
5231 */
5232
new_file()5233 int MYSQL_BIN_LOG::new_file()
5234 {
5235 int res;
5236 mysql_mutex_lock(&LOCK_log);
5237 res= new_file_impl();
5238 mysql_mutex_unlock(&LOCK_log);
5239 return res;
5240 }
5241
5242 /*
5243 @retval
5244 nonzero - error
5245 */
new_file_without_locking()5246 int MYSQL_BIN_LOG::new_file_without_locking()
5247 {
5248 return new_file_impl();
5249 }
5250
5251
5252 /**
5253 Start writing to a new log file or reopen the old file.
5254
5255 @param need_lock Set to 1 if caller has not locked LOCK_log
5256
5257 @retval
5258 nonzero - error
5259
5260 @note
5261 The new file name is stored last in the index file
5262 */
5263
new_file_impl()5264 int MYSQL_BIN_LOG::new_file_impl()
5265 {
5266 int error= 0, close_on_error= FALSE;
5267 char new_name[FN_REFLEN], *new_name_ptr, *old_name, *file_to_open;
5268 uint close_flag;
5269 bool delay_close= false;
5270 File UNINIT_VAR(old_file);
5271 DBUG_ENTER("MYSQL_BIN_LOG::new_file_impl");
5272
5273 DBUG_ASSERT(log_type == LOG_BIN);
5274 mysql_mutex_assert_owner(&LOCK_log);
5275
5276 if (!is_open())
5277 {
5278 DBUG_PRINT("info",("log is closed"));
5279 DBUG_RETURN(error);
5280 }
5281
5282 mysql_mutex_lock(&LOCK_index);
5283
5284 /* Reuse old name if not binlog and not update log */
5285 new_name_ptr= name;
5286
5287 /*
5288 If user hasn't specified an extension, generate a new log name
5289 We have to do this here and not in open as we want to store the
5290 new file name in the current binary log file.
5291 */
5292 if (unlikely((error= generate_new_name(new_name, name, 0))))
5293 {
5294 #ifdef ENABLE_AND_FIX_HANG
5295 close_on_error= TRUE;
5296 #endif
5297 goto end2;
5298 }
5299 new_name_ptr=new_name;
5300
5301 {
5302 /*
5303 We log the whole file name for log file as the user may decide
5304 to change base names at some point.
5305 */
5306 Rotate_log_event r(new_name + dirname_length(new_name), 0, LOG_EVENT_OFFSET,
5307 is_relay_log ? Rotate_log_event::RELAY_LOG : 0);
5308 /*
5309 The current relay-log's closing Rotate event must have checksum
5310 value computed with an algorithm of the last relay-logged FD event.
5311 */
5312 if (is_relay_log)
5313 r.checksum_alg= relay_log_checksum_alg;
5314 DBUG_ASSERT(!is_relay_log ||
5315 relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
5316 if (DBUG_EVALUATE_IF("fault_injection_new_file_rotate_event",
5317 (error= close_on_error= TRUE), FALSE) ||
5318 (error= write_event(&r)))
5319 {
5320 DBUG_EXECUTE_IF("fault_injection_new_file_rotate_event", errno= 2;);
5321 close_on_error= TRUE;
5322 my_printf_error(ER_ERROR_ON_WRITE,
5323 ER_THD_OR_DEFAULT(current_thd, ER_CANT_OPEN_FILE),
5324 MYF(ME_FATAL), name, errno);
5325 goto end;
5326 }
5327 bytes_written+= r.data_written;
5328 }
5329
5330 /*
5331 Update needs to be signalled even if there is no rotate event
5332 log rotation should give the waiting thread a signal to
5333 discover EOF and move on to the next log.
5334 */
5335 if (unlikely((error= flush_io_cache(&log_file))))
5336 {
5337 close_on_error= TRUE;
5338 goto end;
5339 }
5340 update_binlog_end_pos();
5341
5342 old_name=name;
5343 name=0; // Don't free name
5344 close_flag= LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX;
5345 if (!is_relay_log)
5346 {
5347 /*
5348 We need to keep the old binlog file open (and marked as in-use) until
5349 the new one is fully created and synced to disk and index. Otherwise we
5350 leave a window where if we crash, there is no binlog file marked as
5351 crashed for server restart to detect the need for recovery.
5352 */
5353 old_file= log_file.file;
5354 close_flag|= LOG_CLOSE_DELAYED_CLOSE;
5355 delay_close= true;
5356 }
5357 close(close_flag);
5358 if (checksum_alg_reset != BINLOG_CHECKSUM_ALG_UNDEF)
5359 {
5360 DBUG_ASSERT(!is_relay_log);
5361 DBUG_ASSERT(binlog_checksum_options != checksum_alg_reset);
5362 binlog_checksum_options= checksum_alg_reset;
5363 }
5364 /*
5365 Note that at this point, log_state != LOG_CLOSED
5366 (important for is_open()).
5367 */
5368
5369 /*
5370 new_file() is only used for rotation (in FLUSH LOGS or because size >
5371 max_binlog_size or max_relay_log_size).
5372 If this is a binary log, the Format_description_log_event at the
5373 beginning of the new file should have created=0 (to distinguish with the
5374 Format_description_log_event written at server startup, which should
5375 trigger temp tables deletion on slaves.
5376 */
5377
5378 /* reopen index binlog file, BUG#34582 */
5379 file_to_open= index_file_name;
5380 error= open_index_file(index_file_name, 0, FALSE);
5381 if (likely(!error))
5382 {
5383 /* reopen the binary log file. */
5384 file_to_open= new_name_ptr;
5385 error= open(old_name, new_name_ptr, 0, io_cache_type, max_size, 1, FALSE);
5386 }
5387
5388 /* handle reopening errors */
5389 if (unlikely(error))
5390 {
5391 my_error(ER_CANT_OPEN_FILE, MYF(ME_FATAL), file_to_open, error);
5392 close_on_error= TRUE;
5393 }
5394
5395 my_free(old_name);
5396
5397 end:
5398 /* In case of errors, reuse the last generated log file name */
5399 if (unlikely(error))
5400 {
5401 DBUG_ASSERT(last_used_log_number > 0);
5402 last_used_log_number--;
5403 }
5404
5405 end2:
5406 if (delay_close)
5407 {
5408 clear_inuse_flag_when_closing(old_file);
5409 mysql_file_close(old_file, MYF(MY_WME));
5410 }
5411
5412 if (unlikely(error && close_on_error)) /* rotate or reopen failed */
5413 {
5414 /*
5415 Close whatever was left opened.
5416
5417 We are keeping the behavior as it exists today, ie,
5418 we disable logging and move on (see: BUG#51014).
5419
5420 TODO: as part of WL#1790 consider other approaches:
5421 - kill mysql (safety);
5422 - try multiple locations for opening a log file;
5423 - switch server to protected/readonly mode
5424 - ...
5425 */
5426 close(LOG_CLOSE_INDEX);
5427 sql_print_error(fatal_log_error, new_name_ptr, errno);
5428 }
5429
5430 mysql_mutex_unlock(&LOCK_index);
5431
5432 DBUG_RETURN(error);
5433 }
5434
write_event(Log_event * ev,binlog_cache_data * cache_data,IO_CACHE * file)5435 bool MYSQL_BIN_LOG::write_event(Log_event *ev, binlog_cache_data *cache_data,
5436 IO_CACHE *file)
5437 {
5438 Log_event_writer writer(file, 0, &crypto);
5439 if (crypto.scheme && file == &log_file)
5440 {
5441 writer.ctx= alloca(crypto.ctx_size);
5442 writer.set_encrypted_writer();
5443 }
5444 if (cache_data)
5445 cache_data->add_status(ev->logged_status());
5446 return writer.write(ev);
5447 }
5448
append(Log_event * ev)5449 bool MYSQL_BIN_LOG::append(Log_event *ev)
5450 {
5451 bool res;
5452 mysql_mutex_lock(&LOCK_log);
5453 res= append_no_lock(ev);
5454 mysql_mutex_unlock(&LOCK_log);
5455 return res;
5456 }
5457
5458
append_no_lock(Log_event * ev)5459 bool MYSQL_BIN_LOG::append_no_lock(Log_event* ev)
5460 {
5461 bool error = 0;
5462 DBUG_ENTER("MYSQL_BIN_LOG::append");
5463
5464 mysql_mutex_assert_owner(&LOCK_log);
5465 DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
5466
5467 if (write_event(ev))
5468 {
5469 error=1;
5470 goto err;
5471 }
5472 bytes_written+= ev->data_written;
5473 DBUG_PRINT("info",("max_size: %lu",max_size));
5474 if (flush_and_sync(0))
5475 goto err;
5476 if (my_b_append_tell(&log_file) > max_size)
5477 error= new_file_without_locking();
5478 err:
5479 update_binlog_end_pos();
5480 DBUG_RETURN(error);
5481 }
5482
write_event_buffer(uchar * buf,uint len)5483 bool MYSQL_BIN_LOG::write_event_buffer(uchar* buf, uint len)
5484 {
5485 bool error= 1;
5486 uchar *ebuf= 0;
5487 DBUG_ENTER("MYSQL_BIN_LOG::write_event_buffer");
5488
5489 DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
5490
5491 mysql_mutex_assert_owner(&LOCK_log);
5492
5493 if (crypto.scheme != 0)
5494 {
5495 DBUG_ASSERT(crypto.scheme == 1);
5496
5497 uint elen;
5498 uchar iv[BINLOG_IV_LENGTH];
5499
5500 ebuf= (uchar*)my_safe_alloca(len);
5501 if (!ebuf)
5502 goto err;
5503
5504 crypto.set_iv(iv, (uint32)my_b_append_tell(&log_file));
5505
5506 /*
5507 we want to encrypt everything, excluding the event length:
5508 massage the data before the encryption
5509 */
5510 memcpy(buf + EVENT_LEN_OFFSET, buf, 4);
5511
5512 if (encryption_crypt(buf + 4, len - 4,
5513 ebuf + 4, &elen,
5514 crypto.key, crypto.key_length, iv, sizeof(iv),
5515 ENCRYPTION_FLAG_ENCRYPT | ENCRYPTION_FLAG_NOPAD,
5516 ENCRYPTION_KEY_SYSTEM_DATA, crypto.key_version))
5517 goto err;
5518
5519 DBUG_ASSERT(elen == len - 4);
5520
5521 /* massage the data after the encryption */
5522 memcpy(ebuf, ebuf + EVENT_LEN_OFFSET, 4);
5523 int4store(ebuf + EVENT_LEN_OFFSET, len);
5524
5525 buf= ebuf;
5526 }
5527 if (my_b_append(&log_file, buf, len))
5528 goto err;
5529 bytes_written+= len;
5530
5531 error= 0;
5532 DBUG_PRINT("info",("max_size: %lu",max_size));
5533 if (flush_and_sync(0))
5534 goto err;
5535 if (my_b_append_tell(&log_file) > max_size)
5536 error= new_file_without_locking();
5537 err:
5538 my_safe_afree(ebuf, len);
5539 if (likely(!error))
5540 update_binlog_end_pos();
5541 DBUG_RETURN(error);
5542 }
5543
flush_and_sync(bool * synced)5544 bool MYSQL_BIN_LOG::flush_and_sync(bool *synced)
5545 {
5546 int err=0, fd=log_file.file;
5547 if (synced)
5548 *synced= 0;
5549 mysql_mutex_assert_owner(&LOCK_log);
5550 if (flush_io_cache(&log_file))
5551 return 1;
5552 uint sync_period= get_sync_period();
5553 if (sync_period && ++sync_counter >= sync_period)
5554 {
5555 sync_counter= 0;
5556 err= mysql_file_sync(fd, MYF(MY_WME|MY_SYNC_FILESIZE));
5557 if (synced)
5558 *synced= 1;
5559 #ifndef DBUG_OFF
5560 if (opt_binlog_dbug_fsync_sleep > 0)
5561 my_sleep(opt_binlog_dbug_fsync_sleep);
5562 #endif
5563 }
5564 return err;
5565 }
5566
start_union_events(THD * thd,query_id_t query_id_param)5567 void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param)
5568 {
5569 DBUG_ASSERT(!thd->binlog_evt_union.do_union);
5570 thd->binlog_evt_union.do_union= TRUE;
5571 thd->binlog_evt_union.unioned_events= FALSE;
5572 thd->binlog_evt_union.unioned_events_trans= FALSE;
5573 thd->binlog_evt_union.first_query_id= query_id_param;
5574 }
5575
stop_union_events(THD * thd)5576 void MYSQL_BIN_LOG::stop_union_events(THD *thd)
5577 {
5578 DBUG_ASSERT(thd->binlog_evt_union.do_union);
5579 thd->binlog_evt_union.do_union= FALSE;
5580 }
5581
is_query_in_union(THD * thd,query_id_t query_id_param)5582 bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
5583 {
5584 return (thd->binlog_evt_union.do_union &&
5585 query_id_param >= thd->binlog_evt_union.first_query_id);
5586 }
5587
5588 /**
5589 This function checks if a transactional table was updated by the
5590 current transaction.
5591
5592 @param thd The client thread that executed the current statement.
5593 @return
5594 @c true if a transactional table was updated, @c false otherwise.
5595 */
5596 bool
trans_has_updated_trans_table(const THD * thd)5597 trans_has_updated_trans_table(const THD* thd)
5598 {
5599 binlog_cache_mngr *const cache_mngr=
5600 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5601
5602 return (cache_mngr ? !cache_mngr->trx_cache.empty() : 0);
5603 }
5604
5605 /**
5606 This function checks if a transactional table was updated by the
5607 current statement.
5608
5609 @param thd The client thread that executed the current statement.
5610 @return
5611 @c true if a transactional table with rollback was updated,
5612 @c false otherwise.
5613 */
5614 bool
stmt_has_updated_trans_table(const THD * thd)5615 stmt_has_updated_trans_table(const THD *thd)
5616 {
5617 Ha_trx_info *ha_info;
5618
5619 for (ha_info= thd->transaction->stmt.ha_list; ha_info;
5620 ha_info= ha_info->next())
5621 {
5622 if (ha_info->is_trx_read_write() &&
5623 !(ha_info->ht()->flags & HTON_NO_ROLLBACK))
5624 return (TRUE);
5625 }
5626 return (FALSE);
5627 }
5628
5629 /**
5630 This function checks if either a trx-cache or a non-trx-cache should
5631 be used. If @c bin_log_direct_non_trans_update is active or the format
5632 is either MIXED or ROW, the cache to be used depends on the flag @c
5633 is_transactional.
5634
5635 On the other hand, if binlog_format is STMT or direct option is
5636 OFF, the trx-cache should be used if and only if the statement is
5637 transactional or the trx-cache is not empty. Otherwise, the
5638 non-trx-cache should be used.
5639
5640 @param thd The client thread.
5641 @param is_transactional The changes are related to a trx-table.
5642 @return
5643 @c true if a trx-cache should be used, @c false otherwise.
5644 */
use_trans_cache(const THD * thd,bool is_transactional)5645 bool use_trans_cache(const THD* thd, bool is_transactional)
5646 {
5647 if (is_transactional)
5648 return 1;
5649 binlog_cache_mngr *const cache_mngr=
5650 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5651
5652 return ((thd->is_current_stmt_binlog_format_row() ||
5653 thd->variables.binlog_direct_non_trans_update) ? 0 :
5654 !cache_mngr->trx_cache.empty());
5655 }
5656
5657 /**
5658 This function checks if a transaction, either a multi-statement
5659 or a single statement transaction is about to commit or not.
5660
5661 @param thd The client thread that executed the current statement.
5662 @param all Committing a transaction (i.e. TRUE) or a statement
5663 (i.e. FALSE).
5664 @return
5665 @c true if committing a transaction, otherwise @c false.
5666 */
ending_trans(THD * thd,const bool all)5667 bool ending_trans(THD* thd, const bool all)
5668 {
5669 return (all || ending_single_stmt_trans(thd, all));
5670 }
5671
5672 /**
5673 This function checks if a single statement transaction is about
5674 to commit or not.
5675
5676 @param thd The client thread that executed the current statement.
5677 @param all Committing a transaction (i.e. TRUE) or a statement
5678 (i.e. FALSE).
5679 @return
5680 @c true if committing a single statement transaction, otherwise
5681 @c false.
5682 */
ending_single_stmt_trans(THD * thd,const bool all)5683 bool ending_single_stmt_trans(THD* thd, const bool all)
5684 {
5685 return (!all && !thd->in_multi_stmt_transaction_mode());
5686 }
5687
5688 /**
5689 This function checks if a non-transactional table was updated by
5690 the current transaction.
5691
5692 @param thd The client thread that executed the current statement.
5693 @return
5694 @c true if a non-transactional table was updated, @c false
5695 otherwise.
5696 */
trans_has_updated_non_trans_table(const THD * thd)5697 bool trans_has_updated_non_trans_table(const THD* thd)
5698 {
5699 return (thd->transaction->all.modified_non_trans_table ||
5700 thd->transaction->stmt.modified_non_trans_table);
5701 }
5702
5703 /**
5704 This function checks if a non-transactional table was updated by the
5705 current statement.
5706
5707 @param thd The client thread that executed the current statement.
5708 @return
5709 @c true if a non-transactional table was updated, @c false otherwise.
5710 */
stmt_has_updated_non_trans_table(const THD * thd)5711 bool stmt_has_updated_non_trans_table(const THD* thd)
5712 {
5713 return (thd->transaction->stmt.modified_non_trans_table);
5714 }
5715
5716 /*
5717 These functions are placed in this file since they need access to
5718 binlog_hton, which has internal linkage.
5719 */
5720
binlog_setup_trx_data()5721 binlog_cache_mngr *THD::binlog_setup_trx_data()
5722 {
5723 DBUG_ENTER("THD::binlog_setup_trx_data");
5724 binlog_cache_mngr *cache_mngr=
5725 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5726
5727 if (cache_mngr)
5728 DBUG_RETURN(cache_mngr); // Already set up
5729
5730 cache_mngr= (binlog_cache_mngr*) my_malloc(key_memory_binlog_cache_mngr,
5731 sizeof(binlog_cache_mngr), MYF(MY_ZEROFILL));
5732 if (!cache_mngr ||
5733 open_cached_file(&cache_mngr->stmt_cache.cache_log, mysql_tmpdir,
5734 LOG_PREFIX, (size_t)binlog_stmt_cache_size, MYF(MY_WME)) ||
5735 open_cached_file(&cache_mngr->trx_cache.cache_log, mysql_tmpdir,
5736 LOG_PREFIX, (size_t)binlog_cache_size, MYF(MY_WME)))
5737 {
5738 my_free(cache_mngr);
5739 DBUG_RETURN(0); // Didn't manage to set it up
5740 }
5741 thd_set_ha_data(this, binlog_hton, cache_mngr);
5742
5743 cache_mngr= new (cache_mngr)
5744 binlog_cache_mngr(max_binlog_stmt_cache_size,
5745 max_binlog_cache_size,
5746 &binlog_stmt_cache_use,
5747 &binlog_stmt_cache_disk_use,
5748 &binlog_cache_use,
5749 &binlog_cache_disk_use);
5750 DBUG_RETURN(cache_mngr);
5751 }
5752
5753 /*
5754 Function to start a statement and optionally a transaction for the
5755 binary log.
5756
5757 SYNOPSIS
5758 binlog_start_trans_and_stmt()
5759
5760 DESCRIPTION
5761
5762 This function does three things:
5763 - Start a transaction if not in autocommit mode or if a BEGIN
5764 statement has been seen.
5765
5766 - Start a statement transaction to allow us to truncate the cache.
5767
5768 - Save the current binlog position so that we can roll back the
5769 statement by truncating the cache.
5770
5771 We only update the saved position if the old one was undefined,
5772 the reason is that there are some cases (e.g., for CREATE-SELECT)
5773 where the position is saved twice (e.g., both in
5774 select_create::prepare() and binlog_write_table_map()) , but
5775 we should use the first. This means that calls to this function
5776 can be used to start the statement before the first table map
5777 event, to include some extra events.
5778 */
5779
5780 void
binlog_start_trans_and_stmt()5781 THD::binlog_start_trans_and_stmt()
5782 {
5783 binlog_cache_mngr *cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5784 DBUG_ENTER("binlog_start_trans_and_stmt");
5785 DBUG_PRINT("enter", ("cache_mngr: %p cache_mngr->trx_cache.get_prev_position(): %lu",
5786 cache_mngr,
5787 (cache_mngr ? (ulong) cache_mngr->trx_cache.get_prev_position() :
5788 (ulong) 0)));
5789
5790 if (cache_mngr == NULL ||
5791 cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF)
5792 {
5793 this->binlog_set_stmt_begin();
5794 bool mstmt_mode= in_multi_stmt_transaction_mode();
5795 #ifdef WITH_WSREP
5796 /*
5797 With wsrep binlog emulation we can skip the rest because the
5798 binlog cache will not be written into binlog. Note however that
5799 because of this the hton callbacks will not get called to clean
5800 up the cache, so this must be done explicitly when the transaction
5801 terminates.
5802 */
5803 if (WSREP_EMULATE_BINLOG_NNULL(this))
5804 {
5805 DBUG_VOID_RETURN;
5806 }
5807 /* If this event replicates through a master-slave then we need to
5808 inject manually GTID so it is preserved in the cluster. We are writing
5809 directly to WSREP buffer and not in IO cache because in case of IO cache
5810 GTID event will be duplicated in binlog.
5811 We have to do this only one time in mysql transaction.
5812 Since this function is called multiple times , We will check for
5813 ha_info->is_started().
5814 */
5815 Ha_trx_info *ha_info;
5816 ha_info= this->ha_data[binlog_hton->slot].ha_info + (mstmt_mode ? 1 : 0);
5817
5818 if (!ha_info->is_started() &&
5819 (this->variables.gtid_seq_no || this->variables.wsrep_gtid_seq_no) &&
5820 wsrep_on(this) &&
5821 (this->wsrep_cs().mode() == wsrep::client_state::m_local))
5822 {
5823 uchar *buf= 0;
5824 size_t len= 0;
5825 IO_CACHE tmp_io_cache;
5826 Log_event_writer writer(&tmp_io_cache, 0);
5827 if(!open_cached_file(&tmp_io_cache, mysql_tmpdir, TEMP_PREFIX,
5828 128, MYF(MY_WME)))
5829 {
5830 uint64 seqno= this->variables.gtid_seq_no;
5831 uint32 domain_id= this->variables.gtid_domain_id;
5832 uint32 server_id= this->variables.server_id;
5833 if (!this->variables.gtid_seq_no && this->variables.wsrep_gtid_seq_no)
5834 {
5835 seqno= this->variables.wsrep_gtid_seq_no;
5836 domain_id= wsrep_gtid_server.domain_id;
5837 server_id= wsrep_gtid_server.server_id;
5838 }
5839 Gtid_log_event gtid_event(this, seqno, domain_id, true,
5840 LOG_EVENT_SUPPRESS_USE_F, true, 0);
5841 gtid_event.server_id= server_id;
5842 writer.write(>id_event);
5843 wsrep_write_cache_buf(&tmp_io_cache, &buf, &len);
5844 if (len > 0) this->wsrep_cs().append_data(wsrep::const_buffer(buf, len));
5845 if (buf) my_free(buf);
5846 close_cached_file(&tmp_io_cache);
5847 }
5848 }
5849 #endif
5850 if (mstmt_mode)
5851 trans_register_ha(this, TRUE, binlog_hton, 0);
5852 trans_register_ha(this, FALSE, binlog_hton, 0);
5853 /*
5854 Mark statement transaction as read/write. We never start
5855 a binary log transaction and keep it read-only,
5856 therefore it's best to mark the transaction read/write just
5857 at the same time we start it.
5858 Not necessary to mark the normal transaction read/write
5859 since the statement-level flag will be propagated automatically
5860 inside ha_commit_trans.
5861 */
5862 ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
5863 }
5864 DBUG_VOID_RETURN;
5865 }
5866
binlog_set_stmt_begin()5867 void THD::binlog_set_stmt_begin() {
5868 binlog_cache_mngr *cache_mngr=
5869 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5870
5871 /*
5872 The call to binlog_trans_log_savepos() might create the cache_mngr
5873 structure, if it didn't exist before, so we save the position
5874 into an auto variable and then write it into the transaction
5875 data for the binary log (i.e., cache_mngr).
5876 */
5877 my_off_t pos= 0;
5878 binlog_trans_log_savepos(this, &pos);
5879 cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5880 cache_mngr->trx_cache.set_prev_position(pos);
5881 }
5882
5883 static int
binlog_start_consistent_snapshot(handlerton * hton,THD * thd)5884 binlog_start_consistent_snapshot(handlerton *hton, THD *thd)
5885 {
5886 int err= 0;
5887 DBUG_ENTER("binlog_start_consistent_snapshot");
5888
5889 binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
5890
5891 /* Server layer calls us with LOCK_commit_ordered locked, so this is safe. */
5892 mysql_mutex_assert_owner(&LOCK_commit_ordered);
5893 strmake_buf(cache_mngr->last_commit_pos_file, mysql_bin_log.last_commit_pos_file);
5894 cache_mngr->last_commit_pos_offset= mysql_bin_log.last_commit_pos_offset;
5895
5896 trans_register_ha(thd, TRUE, binlog_hton, 0);
5897
5898 DBUG_RETURN(err);
5899 }
5900
5901
5902 /**
5903 Prepare all tables that are updated for row logging
5904
5905 Annotate events and table maps are written by binlog_write_table_maps()
5906 */
5907
binlog_prepare_for_row_logging()5908 void THD::binlog_prepare_for_row_logging()
5909 {
5910 DBUG_ENTER("THD::binlog_prepare_for_row_logging");
5911 for (TABLE *table= open_tables ; table; table= table->next)
5912 {
5913 if (table->query_id == query_id && table->current_lock == F_WRLCK)
5914 table->file->prepare_for_row_logging();
5915 }
5916 DBUG_VOID_RETURN;
5917 }
5918
5919 /**
5920 Write annnotated row event (the query) if needed
5921 */
5922
binlog_write_annotated_row(Log_event_writer * writer)5923 bool THD::binlog_write_annotated_row(Log_event_writer *writer)
5924 {
5925 int error;
5926 DBUG_ENTER("THD::binlog_write_annotated_row");
5927
5928 if (!(IF_WSREP(!wsrep_fragments_certified_for_stmt(this), true) &&
5929 variables.binlog_annotate_row_events &&
5930 query_length()))
5931 DBUG_RETURN(0);
5932
5933 Annotate_rows_log_event anno(this, 0, false);
5934 if (unlikely((error= writer->write(&anno))))
5935 {
5936 if (my_errno == EFBIG)
5937 writer->set_incident();
5938 DBUG_RETURN(error);
5939 }
5940 DBUG_RETURN(0);
5941 }
5942
5943
5944 /**
5945 Write table map events for all tables that are using row logging.
5946 This includes all tables used by this statement, including tables
5947 used in triggers.
5948
5949 Also write annotate events and start transactions.
5950 This is using the "tables_with_row_logging" list prepared by
5951 THD::binlog_prepare_for_row_logging
5952 */
5953
binlog_write_table_maps()5954 bool THD::binlog_write_table_maps()
5955 {
5956 bool with_annotate;
5957 MYSQL_LOCK *locks[2], **locks_end= locks;
5958 DBUG_ENTER("THD::binlog_write_table_maps");
5959
5960 DBUG_ASSERT(!binlog_table_maps);
5961 DBUG_ASSERT(is_current_stmt_binlog_format_row());
5962
5963 /* Initialize cache_mngr once per statement */
5964 binlog_start_trans_and_stmt();
5965 with_annotate= 1; // Write annotate with first map
5966
5967 if ((*locks_end= extra_lock))
5968 locks_end++;
5969 if ((*locks_end= lock))
5970 locks_end++;
5971
5972 for (MYSQL_LOCK **cur_lock= locks ; cur_lock < locks_end ; cur_lock++)
5973 {
5974 TABLE **const end_ptr= (*cur_lock)->table + (*cur_lock)->table_count;
5975 for (TABLE **table_ptr= (*cur_lock)->table;
5976 table_ptr != end_ptr ;
5977 ++table_ptr)
5978 {
5979 TABLE *table= *table_ptr;
5980 bool restore= 0;
5981 /*
5982 We have to also write table maps for tables that have not yet been
5983 used, like for tables in after triggers
5984 */
5985 if (!table->file->row_logging &&
5986 table->query_id != query_id && table->current_lock == F_WRLCK)
5987 {
5988 if (table->file->prepare_for_row_logging())
5989 restore= 1;
5990 }
5991 if (table->file->row_logging)
5992 {
5993 if (binlog_write_table_map(table, with_annotate))
5994 DBUG_RETURN(1);
5995 with_annotate= 0;
5996 }
5997 if (restore)
5998 {
5999 /*
6000 Restore original setting so that it doesn't cause problem for the
6001 next statement
6002 */
6003 table->file->row_logging= table->file->row_logging_init= 0;
6004 }
6005 }
6006 }
6007 binlog_table_maps= 1; // Table maps written
6008 DBUG_RETURN(0);
6009 }
6010
6011
6012 /**
6013 This function writes a table map to the binary log.
6014 Note that in order to keep the signature uniform with related methods,
6015 we use a redundant parameter to indicate whether a transactional table
6016 was changed or not.
6017
6018 @param table a pointer to the table.
6019 @param with_annotate If true call binlog_write_annotated_row()
6020
6021 @return
6022 nonzero if an error pops up when writing the table map event.
6023 */
6024
binlog_write_table_map(TABLE * table,bool with_annotate)6025 bool THD::binlog_write_table_map(TABLE *table, bool with_annotate)
6026 {
6027 int error;
6028 bool is_transactional= table->file->row_logging_has_trans;
6029 DBUG_ENTER("THD::binlog_write_table_map");
6030 DBUG_PRINT("enter", ("table: %p (%s: #%lu)",
6031 table, table->s->table_name.str,
6032 table->s->table_map_id));
6033
6034 /* Pre-conditions */
6035 DBUG_ASSERT(table->s->table_map_id != ULONG_MAX);
6036
6037 /* Ensure that all events in a GTID group are in the same cache */
6038 if (variables.option_bits & OPTION_GTID_BEGIN)
6039 is_transactional= 1;
6040
6041 Table_map_log_event
6042 the_event(this, table, table->s->table_map_id, is_transactional);
6043
6044 binlog_cache_mngr *const cache_mngr=
6045 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
6046 binlog_cache_data *cache_data= (cache_mngr->
6047 get_binlog_cache_data(is_transactional));
6048 IO_CACHE *file= &cache_data->cache_log;
6049 Log_event_writer writer(file, cache_data);
6050
6051 if (with_annotate)
6052 if (binlog_write_annotated_row(&writer))
6053 DBUG_RETURN(1);
6054
6055 if (unlikely((error= writer.write(&the_event))))
6056 DBUG_RETURN(error);
6057
6058 DBUG_RETURN(0);
6059 }
6060
6061
6062 /**
6063 This function retrieves a pending row event from a cache which is
6064 specified through the parameter @c is_transactional. Respectively, when it
6065 is @c true, the pending event is returned from the transactional cache.
6066 Otherwise from the non-transactional cache.
6067
6068 @param is_transactional @c true indicates a transactional cache,
6069 otherwise @c false a non-transactional.
6070 @return
6071 The row event if any.
6072 */
6073 Rows_log_event*
binlog_get_pending_rows_event(bool is_transactional) const6074 THD::binlog_get_pending_rows_event(bool is_transactional) const
6075 {
6076 Rows_log_event* rows= NULL;
6077 binlog_cache_mngr *const cache_mngr=
6078 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
6079
6080 /*
6081 This is less than ideal, but here's the story: If there is no cache_mngr,
6082 prepare_pending_rows_event() has never been called (since the cache_mngr
6083 is set up there). In that case, we just return NULL.
6084 */
6085 if (cache_mngr)
6086 {
6087 binlog_cache_data *cache_data=
6088 cache_mngr->get_binlog_cache_data(use_trans_cache(this, is_transactional));
6089
6090 rows= cache_data->pending();
6091 }
6092 return (rows);
6093 }
6094
6095 /**
6096 This function stores a pending row event into a cache which is specified
6097 through the parameter @c is_transactional. Respectively, when it is @c
6098 true, the pending event is stored into the transactional cache. Otherwise
6099 into the non-transactional cache.
6100
6101 @param evt a pointer to the row event.
6102 @param is_transactional @c true indicates a transactional cache,
6103 otherwise @c false a non-transactional.
6104 */
6105 void
binlog_set_pending_rows_event(Rows_log_event * ev,bool is_transactional)6106 THD::binlog_set_pending_rows_event(Rows_log_event* ev, bool is_transactional)
6107 {
6108 binlog_cache_mngr *const cache_mngr= binlog_setup_trx_data();
6109
6110 DBUG_ASSERT(cache_mngr);
6111
6112 binlog_cache_data *cache_data=
6113 cache_mngr->get_binlog_cache_data(use_trans_cache(this, is_transactional));
6114
6115 cache_data->set_pending(ev);
6116 }
6117
6118
6119 /**
6120 This function removes the pending rows event, discarding any outstanding
6121 rows. If there is no pending rows event available, this is effectively a
6122 no-op.
6123
6124 @param thd a pointer to the user thread.
6125 @param is_transactional @c true indicates a transactional cache,
6126 otherwise @c false a non-transactional.
6127 */
6128 int
remove_pending_rows_event(THD * thd,bool is_transactional)6129 MYSQL_BIN_LOG::remove_pending_rows_event(THD *thd, bool is_transactional)
6130 {
6131 DBUG_ENTER("MYSQL_BIN_LOG::remove_pending_rows_event");
6132
6133 binlog_cache_mngr *const cache_mngr=
6134 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
6135
6136 DBUG_ASSERT(cache_mngr);
6137
6138 binlog_cache_data *cache_data=
6139 cache_mngr->get_binlog_cache_data(use_trans_cache(thd, is_transactional));
6140
6141 if (Rows_log_event* pending= cache_data->pending())
6142 {
6143 delete pending;
6144 cache_data->set_pending(NULL);
6145 }
6146
6147 DBUG_RETURN(0);
6148 }
6149
6150 /*
6151 Moves the last bunch of rows from the pending Rows event to a cache (either
6152 transactional cache if is_transaction is @c true, or the non-transactional
6153 cache otherwise. Sets a new pending event.
6154
6155 @param thd a pointer to the user thread.
6156 @param evt a pointer to the row event.
6157 @param is_transactional @c true indicates a transactional cache,
6158 otherwise @c false a non-transactional.
6159 */
6160 int
flush_and_set_pending_rows_event(THD * thd,Rows_log_event * event,bool is_transactional)6161 MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
6162 Rows_log_event* event,
6163 bool is_transactional)
6164 {
6165 DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)");
6166 DBUG_ASSERT(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open());
6167 DBUG_PRINT("enter", ("event: %p", event));
6168
6169 binlog_cache_mngr *const cache_mngr=
6170 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
6171
6172 DBUG_ASSERT(cache_mngr);
6173
6174 binlog_cache_data *cache_data=
6175 cache_mngr->get_binlog_cache_data(use_trans_cache(thd, is_transactional));
6176
6177 DBUG_PRINT("info", ("cache_mngr->pending(): %p", cache_data->pending()));
6178
6179 if (Rows_log_event* pending= cache_data->pending())
6180 {
6181 Log_event_writer writer(&cache_data->cache_log, cache_data);
6182
6183 /*
6184 Write pending event to the cache.
6185 */
6186 DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
6187 {DBUG_SET("+d,simulate_file_write_error");});
6188 if (writer.write(pending))
6189 {
6190 set_write_error(thd, is_transactional);
6191 if (check_write_error(thd) && cache_data &&
6192 stmt_has_updated_non_trans_table(thd))
6193 cache_data->set_incident();
6194 delete pending;
6195 cache_data->set_pending(NULL);
6196 DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
6197 {DBUG_SET("-d,simulate_file_write_error");});
6198 DBUG_RETURN(1);
6199 }
6200
6201 delete pending;
6202 }
6203
6204 thd->binlog_set_pending_rows_event(event, is_transactional);
6205
6206 DBUG_RETURN(0);
6207 }
6208
6209
6210 /* Generate a new global transaction ID, and write it to the binlog */
6211
6212 bool
write_gtid_event(THD * thd,bool standalone,bool is_transactional,uint64 commit_id)6213 MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone,
6214 bool is_transactional, uint64 commit_id)
6215 {
6216 rpl_gtid gtid;
6217 uint32 domain_id;
6218 uint32 local_server_id;
6219 uint64 seq_no;
6220 int err;
6221 DBUG_ENTER("write_gtid_event");
6222 DBUG_PRINT("enter", ("standalone: %d", standalone));
6223
6224 seq_no= thd->variables.gtid_seq_no;
6225 domain_id= thd->variables.gtid_domain_id;
6226 local_server_id= thd->variables.server_id;
6227
6228 DBUG_ASSERT(local_server_id != 0);
6229
6230 if (thd->variables.option_bits & OPTION_GTID_BEGIN)
6231 {
6232 DBUG_PRINT("error", ("OPTION_GTID_BEGIN is set. "
6233 "Master and slave will have different GTID values"));
6234 /* Reset the flag, as we will write out a GTID anyway */
6235 thd->variables.option_bits&= ~OPTION_GTID_BEGIN;
6236 }
6237
6238 /*
6239 Reset the session variable gtid_seq_no, to reduce the risk of accidentally
6240 producing a duplicate GTID.
6241 */
6242 thd->variables.gtid_seq_no= 0;
6243 if (seq_no != 0)
6244 {
6245 /* Use the specified sequence number. */
6246 gtid.domain_id= domain_id;
6247 gtid.server_id= local_server_id;
6248 gtid.seq_no= seq_no;
6249 err= rpl_global_gtid_binlog_state.update(>id, opt_gtid_strict_mode);
6250 if (err && thd->get_stmt_da()->sql_errno()==ER_GTID_STRICT_OUT_OF_ORDER)
6251 errno= ER_GTID_STRICT_OUT_OF_ORDER;
6252 }
6253 else
6254 {
6255 /* Allocate the next sequence number for the GTID. */
6256 err= rpl_global_gtid_binlog_state.update_with_next_gtid(domain_id,
6257 local_server_id, >id);
6258 seq_no= gtid.seq_no;
6259 }
6260 if (err)
6261 DBUG_RETURN(true);
6262
6263 thd->set_last_commit_gtid(gtid);
6264
6265 Gtid_log_event gtid_event(thd, seq_no, domain_id, standalone,
6266 LOG_EVENT_SUPPRESS_USE_F, is_transactional,
6267 commit_id);
6268
6269 /* Write the event to the binary log. */
6270 DBUG_ASSERT(this == &mysql_bin_log);
6271
6272 #ifdef WITH_WSREP
6273 if (wsrep_gtid_mode)
6274 {
6275 thd->variables.gtid_domain_id= global_system_variables.gtid_domain_id;
6276 thd->variables.server_id= global_system_variables.server_id;
6277 }
6278 #endif
6279
6280 if (write_event(>id_event))
6281 DBUG_RETURN(true);
6282 status_var_add(thd->status_var.binlog_bytes_written, gtid_event.data_written);
6283
6284 DBUG_RETURN(false);
6285 }
6286
6287
6288 int
write_state_to_file()6289 MYSQL_BIN_LOG::write_state_to_file()
6290 {
6291 File file_no;
6292 IO_CACHE cache;
6293 char buf[FN_REFLEN];
6294 int err;
6295 bool opened= false;
6296 bool log_inited= false;
6297
6298 fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
6299 MY_UNPACK_FILENAME);
6300 if ((file_no= mysql_file_open(key_file_binlog_state, buf,
6301 O_RDWR|O_CREAT|O_TRUNC|O_BINARY,
6302 MYF(MY_WME))) < 0)
6303 {
6304 err= 1;
6305 goto err;
6306 }
6307 opened= true;
6308 if ((err= init_io_cache(&cache, file_no, IO_SIZE, WRITE_CACHE, 0, 0,
6309 MYF(MY_WME|MY_WAIT_IF_FULL))))
6310 goto err;
6311 log_inited= true;
6312 if ((err= rpl_global_gtid_binlog_state.write_to_iocache(&cache)))
6313 goto err;
6314 log_inited= false;
6315 if ((err= end_io_cache(&cache)))
6316 goto err;
6317 if ((err= mysql_file_sync(file_no, MYF(MY_WME|MY_SYNC_FILESIZE))))
6318 goto err;
6319 goto end;
6320
6321 err:
6322 sql_print_error("Error writing binlog state to file '%s'.", buf);
6323 if (log_inited)
6324 end_io_cache(&cache);
6325 end:
6326 if (opened)
6327 mysql_file_close(file_no, MYF(0));
6328
6329 return err;
6330 }
6331
6332
6333 /*
6334 Initialize the binlog state from the master-bin.state file, at server startup.
6335
6336 Returns:
6337 0 for success.
6338 2 for when .state file did not exist.
6339 1 for other error.
6340 */
6341 int
read_state_from_file()6342 MYSQL_BIN_LOG::read_state_from_file()
6343 {
6344 File file_no;
6345 IO_CACHE cache;
6346 char buf[FN_REFLEN];
6347 int err;
6348 bool opened= false;
6349 bool log_inited= false;
6350
6351 fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
6352 MY_UNPACK_FILENAME);
6353 if ((file_no= mysql_file_open(key_file_binlog_state, buf,
6354 O_RDONLY|O_BINARY, MYF(0))) < 0)
6355 {
6356 if (my_errno != ENOENT)
6357 {
6358 err= 1;
6359 goto err;
6360 }
6361 else
6362 {
6363 /*
6364 If the state file does not exist, this is the first server startup
6365 with GTID enabled. So initialize to empty state.
6366 */
6367 rpl_global_gtid_binlog_state.reset();
6368 err= 2;
6369 goto end;
6370 }
6371 }
6372 opened= true;
6373 if ((err= init_io_cache(&cache, file_no, IO_SIZE, READ_CACHE, 0, 0,
6374 MYF(MY_WME|MY_WAIT_IF_FULL))))
6375 goto err;
6376 log_inited= true;
6377 if ((err= rpl_global_gtid_binlog_state.read_from_iocache(&cache)))
6378 goto err;
6379 goto end;
6380
6381 err:
6382 sql_print_error("Error reading binlog GTID state from file '%s'.", buf);
6383 end:
6384 if (log_inited)
6385 end_io_cache(&cache);
6386 if (opened)
6387 mysql_file_close(file_no, MYF(0));
6388
6389 return err;
6390 }
6391
6392
6393 int
get_most_recent_gtid_list(rpl_gtid ** list,uint32 * size)6394 MYSQL_BIN_LOG::get_most_recent_gtid_list(rpl_gtid **list, uint32 *size)
6395 {
6396 return rpl_global_gtid_binlog_state.get_most_recent_gtid_list(list, size);
6397 }
6398
6399
6400 bool
append_state_pos(String * str)6401 MYSQL_BIN_LOG::append_state_pos(String *str)
6402 {
6403 return rpl_global_gtid_binlog_state.append_pos(str);
6404 }
6405
6406
6407 bool
append_state(String * str)6408 MYSQL_BIN_LOG::append_state(String *str)
6409 {
6410 return rpl_global_gtid_binlog_state.append_state(str);
6411 }
6412
6413
6414 bool
is_empty_state()6415 MYSQL_BIN_LOG::is_empty_state()
6416 {
6417 return (rpl_global_gtid_binlog_state.count() == 0);
6418 }
6419
6420
6421 bool
find_in_binlog_state(uint32 domain_id,uint32 server_id_arg,rpl_gtid * out_gtid)6422 MYSQL_BIN_LOG::find_in_binlog_state(uint32 domain_id, uint32 server_id_arg,
6423 rpl_gtid *out_gtid)
6424 {
6425 rpl_gtid *gtid;
6426 if ((gtid= rpl_global_gtid_binlog_state.find(domain_id, server_id_arg)))
6427 *out_gtid= *gtid;
6428 return gtid != NULL;
6429 }
6430
6431
6432 bool
lookup_domain_in_binlog_state(uint32 domain_id,rpl_gtid * out_gtid)6433 MYSQL_BIN_LOG::lookup_domain_in_binlog_state(uint32 domain_id,
6434 rpl_gtid *out_gtid)
6435 {
6436 rpl_gtid *found_gtid;
6437
6438 if ((found_gtid= rpl_global_gtid_binlog_state.find_most_recent(domain_id)))
6439 {
6440 *out_gtid= *found_gtid;
6441 return true;
6442 }
6443
6444 return false;
6445 }
6446
6447
6448 int
bump_seq_no_counter_if_needed(uint32 domain_id,uint64 seq_no)6449 MYSQL_BIN_LOG::bump_seq_no_counter_if_needed(uint32 domain_id, uint64 seq_no)
6450 {
6451 return rpl_global_gtid_binlog_state.bump_seq_no_if_needed(domain_id, seq_no);
6452 }
6453
6454
6455 bool
check_strict_gtid_sequence(uint32 domain_id,uint32 server_id_arg,uint64 seq_no)6456 MYSQL_BIN_LOG::check_strict_gtid_sequence(uint32 domain_id,
6457 uint32 server_id_arg,
6458 uint64 seq_no)
6459 {
6460 return rpl_global_gtid_binlog_state.check_strict_sequence(domain_id,
6461 server_id_arg,
6462 seq_no);
6463 }
6464
6465
6466 /**
6467 Write an event to the binary log. If with_annotate != NULL and
6468 *with_annotate = TRUE write also Annotate_rows before the event
6469 (this should happen only if the event is a Table_map).
6470 */
6471
write(Log_event * event_info,my_bool * with_annotate)6472 bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
6473 {
6474 THD *thd= event_info->thd;
6475 bool error= 1;
6476 binlog_cache_data *cache_data= 0;
6477 bool is_trans_cache= FALSE;
6478 bool using_trans= event_info->use_trans_cache();
6479 bool direct= event_info->use_direct_logging();
6480 ulong UNINIT_VAR(prev_binlog_id);
6481 DBUG_ENTER("MYSQL_BIN_LOG::write(Log_event *)");
6482
6483 /*
6484 When binary logging is not enabled (--log-bin=0), wsrep-patch partially
6485 enables it without opening the binlog file (MYSQL_BIN_LOG::open().
6486 So, avoid writing to binlog file.
6487 */
6488 if (direct &&
6489 (wsrep_emulate_bin_log ||
6490 (WSREP(thd) && !(thd->variables.option_bits & OPTION_BIN_LOG))))
6491 DBUG_RETURN(0);
6492
6493 if (thd->variables.option_bits &
6494 (OPTION_GTID_BEGIN | OPTION_BIN_COMMIT_OFF))
6495 {
6496 DBUG_PRINT("info", ("OPTION_GTID_BEGIN was set"));
6497 /* Wait for commit from binary log before we commit */
6498 direct= 0;
6499 using_trans= 1;
6500 /* Set cache_type to ensure we don't get checksums for this event */
6501 event_info->cache_type= Log_event::EVENT_TRANSACTIONAL_CACHE;
6502 }
6503
6504 if (thd->binlog_evt_union.do_union)
6505 {
6506 /*
6507 In Stored function; Remember that function call caused an update.
6508 We will log the function call to the binary log on function exit
6509 */
6510 thd->binlog_evt_union.unioned_events= TRUE;
6511 thd->binlog_evt_union.unioned_events_trans |= using_trans;
6512 DBUG_RETURN(0);
6513 }
6514
6515 /*
6516 We only end the statement if we are in a top-level statement. If
6517 we are inside a stored function, we do not end the statement since
6518 this will close all tables on the slave. But there can be a special case
6519 where we are inside a stored function/trigger and a SAVEPOINT is being
6520 set in side the stored function/trigger. This SAVEPOINT execution will
6521 force the pending event to be flushed without an STMT_END_F flag. This
6522 will result in a case where following DMLs will be considered as part of
6523 same statement and result in data loss on slave. Hence in this case we
6524 force the end_stmt to be true.
6525 */
6526 bool const end_stmt= (thd->in_sub_stmt && thd->lex->sql_command ==
6527 SQLCOM_SAVEPOINT) ? true :
6528 (thd->locked_tables_mode && thd->lex->requires_prelocking());
6529 if (thd->binlog_flush_pending_rows_event(end_stmt, using_trans))
6530 DBUG_RETURN(error);
6531
6532 /*
6533 In most cases this is only called if 'is_open()' is true; in fact this is
6534 mostly called if is_open() *was* true a few instructions before, but it
6535 could have changed since.
6536 */
6537 /* applier and replayer can skip writing binlog events */
6538 if ((WSREP_EMULATE_BINLOG(thd) &&
6539 IF_WSREP(thd->wsrep_cs().mode() == wsrep::client_state::m_local, 0)) || is_open())
6540 {
6541 my_off_t UNINIT_VAR(my_org_b_tell);
6542 #ifdef HAVE_REPLICATION
6543 /*
6544 In the future we need to add to the following if tests like
6545 "do the involved tables match (to be implemented)
6546 binlog_[wild_]{do|ignore}_table?" (WL#1049)"
6547 */
6548 const char *local_db= event_info->get_db();
6549
6550 bool option_bin_log_flag= (thd->variables.option_bits & OPTION_BIN_LOG);
6551
6552 /*
6553 Log all updates to binlog cache so that they can get replicated to other
6554 nodes. A check has been added to stop them from getting logged into
6555 binary log files.
6556 */
6557 if (WSREP(thd))
6558 option_bin_log_flag= true;
6559
6560 if ((!(option_bin_log_flag)) ||
6561 (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT &&
6562 thd->lex->sql_command != SQLCOM_SAVEPOINT &&
6563 !binlog_filter->db_ok(local_db)))
6564 DBUG_RETURN(0);
6565 #endif /* HAVE_REPLICATION */
6566
6567 IO_CACHE *file= NULL;
6568
6569 if (direct)
6570 {
6571 /* We come here only for incident events */
6572 int res;
6573 uint64 commit_id= 0;
6574 MDL_request mdl_request;
6575 DBUG_PRINT("info", ("direct is set"));
6576 DBUG_ASSERT(!thd->backup_commit_lock);
6577
6578 MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT,
6579 MDL_EXPLICIT);
6580 if (thd->mdl_context.acquire_lock(&mdl_request,
6581 thd->variables.lock_wait_timeout))
6582 DBUG_RETURN(1);
6583 thd->backup_commit_lock= &mdl_request;
6584
6585 if ((res= thd->wait_for_prior_commit()))
6586 {
6587 if (mdl_request.ticket)
6588 thd->mdl_context.release_lock(mdl_request.ticket);
6589 thd->backup_commit_lock= 0;
6590 DBUG_RETURN(res);
6591 }
6592 file= &log_file;
6593 my_org_b_tell= my_b_tell(file);
6594 mysql_mutex_lock(&LOCK_log);
6595 prev_binlog_id= current_binlog_id;
6596 DBUG_EXECUTE_IF("binlog_force_commit_id",
6597 {
6598 const LEX_CSTRING commit_name= { STRING_WITH_LEN("commit_id") };
6599 bool null_value;
6600 user_var_entry *entry=
6601 (user_var_entry*) my_hash_search(&thd->user_vars,
6602 (uchar*) commit_name.str,
6603 commit_name.length);
6604 commit_id= entry->val_int(&null_value);
6605 });
6606 res= write_gtid_event(thd, true, using_trans, commit_id);
6607 if (mdl_request.ticket)
6608 thd->mdl_context.release_lock(mdl_request.ticket);
6609 thd->backup_commit_lock= 0;
6610 if (res)
6611 goto err;
6612 }
6613 else
6614 {
6615 binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
6616 if (!cache_mngr)
6617 goto err;
6618
6619 is_trans_cache= use_trans_cache(thd, using_trans);
6620 cache_data= cache_mngr->get_binlog_cache_data(is_trans_cache);
6621 file= &cache_data->cache_log;
6622
6623 if (thd->lex->stmt_accessed_non_trans_temp_table() && is_trans_cache)
6624 thd->transaction->stmt.mark_modified_non_trans_temp_table();
6625 thd->binlog_start_trans_and_stmt();
6626 }
6627 DBUG_PRINT("info",("event type: %d",event_info->get_type_code()));
6628
6629 /*
6630 No check for auto events flag here - this write method should
6631 never be called if auto-events are enabled.
6632
6633 Write first log events which describe the 'run environment'
6634 of the SQL command. If row-based binlogging, Insert_id, Rand
6635 and other kind of "setting context" events are not needed.
6636 */
6637
6638 if (with_annotate && *with_annotate)
6639 {
6640 DBUG_ASSERT(event_info->get_type_code() == TABLE_MAP_EVENT);
6641 Annotate_rows_log_event anno(thd, using_trans, direct);
6642 /* Annotate event should be written not more than once */
6643 *with_annotate= 0;
6644 if (write_event(&anno, cache_data, file))
6645 goto err;
6646 }
6647
6648 {
6649 if (!thd->is_current_stmt_binlog_format_row())
6650 {
6651 if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
6652 {
6653 Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
6654 thd->first_successful_insert_id_in_prev_stmt_for_binlog,
6655 using_trans, direct);
6656 if (write_event(&e, cache_data, file))
6657 goto err;
6658 }
6659 if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
6660 {
6661 DBUG_PRINT("info",("number of auto_inc intervals: %u",
6662 thd->auto_inc_intervals_in_cur_stmt_for_binlog.
6663 nb_elements()));
6664 Intvar_log_event e(thd, (uchar) INSERT_ID_EVENT,
6665 thd->auto_inc_intervals_in_cur_stmt_for_binlog.
6666 minimum(), using_trans, direct);
6667 if (write_event(&e, cache_data, file))
6668 goto err;
6669 }
6670 if (thd->rand_used)
6671 {
6672 Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2,
6673 using_trans, direct);
6674 if (write_event(&e, cache_data, file))
6675 goto err;
6676 }
6677 if (thd->user_var_events.elements)
6678 {
6679 for (uint i= 0; i < thd->user_var_events.elements; i++)
6680 {
6681 BINLOG_USER_VAR_EVENT *user_var_event;
6682 get_dynamic(&thd->user_var_events,(uchar*) &user_var_event, i);
6683
6684 /* setting flags for user var log event */
6685 uchar flags= User_var_log_event::UNDEF_F;
6686 if (user_var_event->unsigned_flag)
6687 flags|= User_var_log_event::UNSIGNED_F;
6688
6689 User_var_log_event e(thd, user_var_event->user_var_event->name.str,
6690 user_var_event->user_var_event->name.length,
6691 user_var_event->value,
6692 user_var_event->length,
6693 user_var_event->type,
6694 user_var_event->charset_number,
6695 flags,
6696 using_trans,
6697 direct);
6698 if (write_event(&e, cache_data, file))
6699 goto err;
6700 }
6701 }
6702 }
6703 }
6704
6705 /*
6706 Write the event.
6707 */
6708 if (write_event(event_info, cache_data, file) ||
6709 DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
6710 goto err;
6711
6712 error= 0;
6713 err:
6714 if (direct)
6715 {
6716 my_off_t offset= my_b_tell(file);
6717 bool check_purge= false;
6718 DBUG_ASSERT(!is_relay_log);
6719
6720 if (likely(!error))
6721 {
6722 bool synced;
6723
6724 if ((error= flush_and_sync(&synced)))
6725 {
6726 }
6727 else
6728 {
6729 mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
6730 mysql_mutex_assert_owner(&LOCK_log);
6731 mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync);
6732 mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
6733 #ifdef HAVE_REPLICATION
6734 if (repl_semisync_master.report_binlog_update(thd, log_file_name,
6735 file->pos_in_file))
6736 {
6737 sql_print_error("Failed to run 'after_flush' hooks");
6738 error= 1;
6739 }
6740 else
6741 #endif
6742 {
6743 /*
6744 update binlog_end_pos so it can be read by dump thread
6745 note: must be _after_ the RUN_HOOK(after_flush) or else
6746 semi-sync might not have put the transaction into
6747 it's list before dump-thread tries to send it
6748 */
6749 update_binlog_end_pos(offset);
6750 if (unlikely((error= rotate(false, &check_purge))))
6751 check_purge= false;
6752 }
6753 }
6754 }
6755
6756 status_var_add(thd->status_var.binlog_bytes_written,
6757 offset - my_org_b_tell);
6758
6759 mysql_mutex_lock(&LOCK_after_binlog_sync);
6760 mysql_mutex_unlock(&LOCK_log);
6761
6762 mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
6763 mysql_mutex_assert_not_owner(&LOCK_log);
6764 mysql_mutex_assert_owner(&LOCK_after_binlog_sync);
6765 mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
6766 #ifdef HAVE_REPLICATION
6767 if (repl_semisync_master.wait_after_sync(log_file_name,
6768 file->pos_in_file))
6769 {
6770 error=1;
6771 /* error is already printed inside hook */
6772 }
6773 #endif
6774
6775 /*
6776 Take mutex to protect against a reader seeing partial writes of 64-bit
6777 offset on 32-bit CPUs.
6778 */
6779 mysql_mutex_lock(&LOCK_commit_ordered);
6780 mysql_mutex_unlock(&LOCK_after_binlog_sync);
6781 last_commit_pos_offset= offset;
6782 mysql_mutex_unlock(&LOCK_commit_ordered);
6783
6784 if (check_purge)
6785 checkpoint_and_purge(prev_binlog_id);
6786 }
6787
6788 if (unlikely(error))
6789 {
6790 set_write_error(thd, is_trans_cache);
6791 if (check_write_error(thd) && cache_data &&
6792 stmt_has_updated_non_trans_table(thd))
6793 cache_data->set_incident();
6794 }
6795 }
6796
6797 DBUG_RETURN(error);
6798 }
6799
6800
error_log_print(enum loglevel level,const char * format,va_list args)6801 int error_log_print(enum loglevel level, const char *format,
6802 va_list args)
6803 {
6804 return logger.error_log_print(level, format, args);
6805 }
6806
6807
slow_log_print(THD * thd,const char * query,uint query_length,ulonglong current_utime)6808 bool slow_log_print(THD *thd, const char *query, uint query_length,
6809 ulonglong current_utime)
6810 {
6811 return logger.slow_log_print(thd, query, query_length, current_utime);
6812 }
6813
6814
6815 /**
6816 Decide if we should log the command to general log
6817
6818 @retval
6819 FALSE No logging
6820 TRUE Ok to log
6821 */
6822
log_command(THD * thd,enum enum_server_command command)6823 bool LOGGER::log_command(THD *thd, enum enum_server_command command)
6824 {
6825 /*
6826 Log command if we have at least one log event handler enabled and want
6827 to log this king of commands
6828 */
6829 if (!(*general_log_handler_list && (what_to_log & (1L << (uint) command))))
6830 return FALSE;
6831
6832 /*
6833 If LOG_SLOW_DISABLE_SLAVE is set when slave thread starts, then
6834 OPTION_LOG_OFF is set.
6835 Only the super user can set this bit.
6836 */
6837 return !(thd->variables.option_bits & OPTION_LOG_OFF);
6838 }
6839
6840
general_log_print(THD * thd,enum enum_server_command command,const char * format,...)6841 bool general_log_print(THD *thd, enum enum_server_command command,
6842 const char *format, ...)
6843 {
6844 va_list args;
6845 uint error= 0;
6846
6847 /* Print the message to the buffer if we want to log this kind of commands */
6848 if (! logger.log_command(thd, command))
6849 return FALSE;
6850
6851 va_start(args, format);
6852 error= logger.general_log_print(thd, command, format, args);
6853 va_end(args);
6854
6855 return error;
6856 }
6857
general_log_write(THD * thd,enum enum_server_command command,const char * query,size_t query_length)6858 bool general_log_write(THD *thd, enum enum_server_command command,
6859 const char *query, size_t query_length)
6860 {
6861 /* Write the message to the log if we want to log this king of commands */
6862 if (logger.log_command(thd, command) || mysql_audit_general_enabled())
6863 return logger.general_log_write(thd, command, query, query_length);
6864
6865 return FALSE;
6866 }
6867
6868
6869 static void
binlog_checkpoint_callback(void * cookie)6870 binlog_checkpoint_callback(void *cookie)
6871 {
6872 MYSQL_BIN_LOG::xid_count_per_binlog *entry=
6873 (MYSQL_BIN_LOG::xid_count_per_binlog *)cookie;
6874 /*
6875 For every supporting engine, we increment the xid_count and issue a
6876 commit_checkpoint_request(). Then we can count when all
6877 commit_checkpoint_notify() callbacks have occurred, and then log a new
6878 binlog checkpoint event.
6879 */
6880 mysql_bin_log.mark_xids_active(entry->binlog_id, 1);
6881 }
6882
6883
6884 /*
6885 Request a commit checkpoint from each supporting engine.
6886 This must be called after each binlog rotate, and after LOCK_log has been
6887 released. The xid_count value in the xid_count_per_binlog entry was
6888 incremented by 1 and will be decremented in this function; this ensures
6889 that the entry will not go away early despite LOCK_log not being held.
6890 */
6891 void
do_checkpoint_request(ulong binlog_id)6892 MYSQL_BIN_LOG::do_checkpoint_request(ulong binlog_id)
6893 {
6894 xid_count_per_binlog *entry;
6895
6896 /*
6897 Find the binlog entry, and invoke commit_checkpoint_request() on it in
6898 each supporting storage engine.
6899 */
6900 mysql_mutex_lock(&LOCK_xid_list);
6901 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
6902 do {
6903 entry= it++;
6904 DBUG_ASSERT(entry /* binlog_id is always somewhere in the list. */);
6905 } while (entry->binlog_id != binlog_id);
6906 mysql_mutex_unlock(&LOCK_xid_list);
6907
6908 ha_commit_checkpoint_request(entry, binlog_checkpoint_callback);
6909 /*
6910 When we rotated the binlog, we incremented xid_count to make sure the
6911 entry would not go away until this point, where we have done all necessary
6912 commit_checkpoint_request() calls.
6913 So now we can (and must) decrease the count - when it reaches zero, we
6914 will know that both all pending unlog() and all pending
6915 commit_checkpoint_notify() calls are done, and we can log a new binlog
6916 checkpoint.
6917 */
6918 mark_xid_done(binlog_id, true);
6919 }
6920
6921
6922 /**
6923 The method executes rotation when LOCK_log is already acquired
6924 by the caller.
6925
6926 @param force_rotate caller can request the log rotation
6927 @param check_purge is set to true if rotation took place
6928
6929 @note
6930 Caller _must_ check the check_purge variable. If this is set, it means
6931 that the binlog was rotated, and caller _must_ ensure that
6932 do_checkpoint_request() is called later with the binlog_id of the rotated
6933 binlog file. The call to do_checkpoint_request() must happen after
6934 LOCK_log is released (which is why we cannot simply do it here).
6935 Usually, checkpoint_and_purge() is appropriate, as it will both handle
6936 the checkpointing and any needed purging of old logs.
6937
6938 @note
6939 If rotation fails, for instance the server was unable
6940 to create a new log file, we still try to write an
6941 incident event to the current log.
6942
6943 @retval
6944 nonzero - error in rotating routine.
6945 */
rotate(bool force_rotate,bool * check_purge)6946 int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge)
6947 {
6948 int error= 0;
6949 DBUG_ENTER("MYSQL_BIN_LOG::rotate");
6950
6951 #ifdef WITH_WSREP
6952 if (WSREP_ON && wsrep_to_isolation)
6953 {
6954 *check_purge= false;
6955 WSREP_DEBUG("avoiding binlog rotate due to TO isolation: %d",
6956 wsrep_to_isolation);
6957 DBUG_RETURN(0);
6958 }
6959 #endif /* WITH_WSREP */
6960
6961 //todo: fix the macro def and restore safe_mutex_assert_owner(&LOCK_log);
6962 *check_purge= false;
6963
6964 if (force_rotate || (my_b_tell(&log_file) >= (my_off_t) max_size))
6965 {
6966 ulong binlog_id= current_binlog_id;
6967 /*
6968 We rotate the binlog, so we need to start a commit checkpoint in all
6969 supporting engines - when it finishes, we can log a new binlog checkpoint
6970 event.
6971
6972 But we cannot start the checkpoint here - there could be a group commit
6973 still in progress which needs to be included in the checkpoint, and
6974 besides we do not want to do the (possibly expensive) checkpoint while
6975 LOCK_log is held.
6976
6977 On the other hand, we must be sure that the xid_count entry for the
6978 previous log does not go away until we start the checkpoint - which it
6979 could do as it is no longer the most recent. So we increment xid_count
6980 (to count the pending checkpoint request) - this will fix the entry in
6981 place until we decrement again in do_checkpoint_request().
6982 */
6983 mark_xids_active(binlog_id, 1);
6984
6985 if (unlikely((error= new_file_without_locking())))
6986 {
6987 /**
6988 Be conservative... There are possible lost events (eg,
6989 failing to log the Execute_load_query_log_event
6990 on a LOAD DATA while using a non-transactional
6991 table)!
6992
6993 We give it a shot and try to write an incident event anyway
6994 to the current log.
6995 */
6996 if (!write_incident_already_locked(current_thd))
6997 flush_and_sync(0);
6998
6999 /*
7000 We failed to rotate - so we have to decrement the xid_count back that
7001 we incremented before attempting the rotate.
7002 */
7003 mark_xid_done(binlog_id, false);
7004 }
7005 else
7006 *check_purge= true;
7007 }
7008 DBUG_RETURN(error);
7009 }
7010
7011 /**
7012 The method executes logs purging routine.
7013
7014 @retval
7015 nonzero - error in rotating routine.
7016 */
purge()7017 void MYSQL_BIN_LOG::purge()
7018 {
7019 mysql_mutex_assert_not_owner(&LOCK_log);
7020 #ifdef HAVE_REPLICATION
7021 if (expire_logs_days)
7022 {
7023 DEBUG_SYNC(current_thd, "at_purge_logs_before_date");
7024 time_t purge_time= my_time(0) - expire_logs_days*24*60*60;
7025 if (purge_time >= 0)
7026 {
7027 purge_logs_before_date(purge_time);
7028 }
7029 DEBUG_SYNC(current_thd, "after_purge_logs_before_date");
7030 }
7031 #endif
7032 }
7033
7034
checkpoint_and_purge(ulong binlog_id)7035 void MYSQL_BIN_LOG::checkpoint_and_purge(ulong binlog_id)
7036 {
7037 do_checkpoint_request(binlog_id);
7038 purge();
7039 }
7040
7041
7042 /**
7043 Searches for the first (oldest) binlog file name in in the binlog index.
7044
7045 @param[in,out] buf_arg pointer to a buffer to hold found
7046 the first binary log file name
7047 @return NULL on success, otherwise error message
7048 */
get_first_binlog(char * buf_arg)7049 static const char* get_first_binlog(char* buf_arg)
7050 {
7051 IO_CACHE *index_file;
7052 size_t length;
7053 char fname[FN_REFLEN];
7054 const char* errmsg= NULL;
7055
7056 DBUG_ENTER("get_first_binlog");
7057
7058 DBUG_ASSERT(mysql_bin_log.is_open());
7059
7060 mysql_bin_log.lock_index();
7061
7062 index_file=mysql_bin_log.get_index_file();
7063 if (reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 0))
7064 {
7065 errmsg= "failed to create a cache on binlog index";
7066 goto end;
7067 }
7068 /* The file ends with EOF or empty line */
7069 if ((length=my_b_gets(index_file, fname, sizeof(fname))) <= 1)
7070 {
7071 errmsg= "empty binlog index";
7072 goto end;
7073 }
7074 else
7075 {
7076 fname[length-1]= 0; // Remove end \n
7077 }
7078 if (normalize_binlog_name(buf_arg, fname, false))
7079 {
7080 errmsg= "could not normalize the first file name in the binlog index";
7081 goto end;
7082 }
7083 end:
7084 mysql_bin_log.unlock_index();
7085
7086 DBUG_RETURN(errmsg);
7087 }
7088
7089 /**
7090 Check weather the gtid binlog state can safely remove gtid
7091 domains passed as the argument. A safety condition is satisfied when
7092 there are no events from the being deleted domains in the currently existing
7093 binlog files. Upon successful check the supplied domains are removed
7094 from @@gtid_binlog_state. The caller is supposed to rotate binlog so that
7095 the active latest file won't have the deleted domains in its Gtid_list header.
7096
7097 @param domain_drop_lex gtid domain id sequence from lex.
7098 Passed as a pointer to dynamic array must be not empty
7099 unless pointer value NULL.
7100 @retval zero on success
7101 @retval > 0 ineffective call none from the *non* empty
7102 gtid domain sequence is deleted
7103 @retval < 0 on error
7104 */
do_delete_gtid_domain(DYNAMIC_ARRAY * domain_drop_lex)7105 static int do_delete_gtid_domain(DYNAMIC_ARRAY *domain_drop_lex)
7106 {
7107 int rc= 0;
7108 Gtid_list_log_event *glev= NULL;
7109 char buf[FN_REFLEN];
7110 File file;
7111 IO_CACHE cache;
7112 const char* errmsg= NULL;
7113 char errbuf[MYSQL_ERRMSG_SIZE]= {0};
7114
7115 if (!domain_drop_lex)
7116 return 0; // still "effective" having empty domain sequence to delete
7117
7118 DBUG_ASSERT(domain_drop_lex->elements > 0);
7119 mysql_mutex_assert_owner(mysql_bin_log.get_log_lock());
7120
7121 if ((errmsg= get_first_binlog(buf)) != NULL)
7122 goto end;
7123 bzero((char*) &cache, sizeof(cache));
7124 if ((file= open_binlog(&cache, buf, &errmsg)) == (File) -1)
7125 goto end;
7126 errmsg= get_gtid_list_event(&cache, &glev);
7127 end_io_cache(&cache);
7128 mysql_file_close(file, MYF(MY_WME));
7129
7130 DBUG_EXECUTE_IF("inject_binlog_delete_domain_init_error",
7131 errmsg= "injected error";);
7132 if (errmsg)
7133 goto end;
7134 errmsg= rpl_global_gtid_binlog_state.drop_domain(domain_drop_lex,
7135 glev, errbuf);
7136
7137 end:
7138 if (errmsg)
7139 {
7140 if (strlen(errmsg) > 0)
7141 {
7142 my_error(ER_BINLOG_CANT_DELETE_GTID_DOMAIN, MYF(0), errmsg);
7143 rc= -1;
7144 }
7145 else
7146 {
7147 rc= 1;
7148 }
7149 }
7150 delete glev;
7151
7152 return rc;
7153 }
7154
7155 /**
7156 The method is a shortcut of @c rotate() and @c purge().
7157 LOCK_log is acquired prior to rotate and is released after it.
7158
7159 @param force_rotate caller can request the log rotation
7160
7161 @retval
7162 nonzero - error in rotating routine.
7163 */
rotate_and_purge(bool force_rotate,DYNAMIC_ARRAY * domain_drop_lex)7164 int MYSQL_BIN_LOG::rotate_and_purge(bool force_rotate,
7165 DYNAMIC_ARRAY *domain_drop_lex)
7166 {
7167 int err_gtid=0, error= 0;
7168 ulong prev_binlog_id;
7169 DBUG_ENTER("MYSQL_BIN_LOG::rotate_and_purge");
7170 bool check_purge= false;
7171
7172 mysql_mutex_lock(&LOCK_log);
7173
7174 DEBUG_SYNC(current_thd, "rotate_after_acquire_LOCK_log");
7175
7176 prev_binlog_id= current_binlog_id;
7177
7178 if ((err_gtid= do_delete_gtid_domain(domain_drop_lex)))
7179 {
7180 // inffective attempt to delete merely skips rotate and purge
7181 if (err_gtid < 0)
7182 error= 1; // otherwise error is propagated the user
7183 }
7184 else if (unlikely((error= rotate(force_rotate, &check_purge))))
7185 check_purge= false;
7186
7187 DEBUG_SYNC(current_thd, "rotate_after_rotate");
7188
7189 /*
7190 NOTE: Run purge_logs wo/ holding LOCK_log because it does not need
7191 the mutex. Otherwise causes various deadlocks.
7192 Explicit binlog rotation must be synchronized with a concurrent
7193 binlog ordered commit, in particular not let binlog
7194 checkpoint notification request until early binlogged
7195 concurrent commits have has been completed.
7196 */
7197 mysql_mutex_lock(&LOCK_after_binlog_sync);
7198 mysql_mutex_unlock(&LOCK_log);
7199 mysql_mutex_lock(&LOCK_commit_ordered);
7200 mysql_mutex_unlock(&LOCK_after_binlog_sync);
7201 mysql_mutex_unlock(&LOCK_commit_ordered);
7202
7203 if (check_purge)
7204 checkpoint_and_purge(prev_binlog_id);
7205
7206 DBUG_RETURN(error);
7207 }
7208
next_file_id()7209 uint MYSQL_BIN_LOG::next_file_id()
7210 {
7211 uint res;
7212 mysql_mutex_lock(&LOCK_log);
7213 res = file_id++;
7214 mysql_mutex_unlock(&LOCK_log);
7215 return res;
7216 }
7217
7218 class CacheWriter: public Log_event_writer
7219 {
7220 public:
7221 size_t remains;
7222
CacheWriter(THD * thd_arg,IO_CACHE * file_arg,bool do_checksum,Binlog_crypt_data * cr)7223 CacheWriter(THD *thd_arg, IO_CACHE *file_arg, bool do_checksum,
7224 Binlog_crypt_data *cr)
7225 : Log_event_writer(file_arg, 0, cr), remains(0), thd(thd_arg),
7226 first(true)
7227 { checksum_len= do_checksum ? BINLOG_CHECKSUM_LEN : 0; }
7228
~CacheWriter()7229 ~CacheWriter()
7230 { status_var_add(thd->status_var.binlog_bytes_written, bytes_written); }
7231
write(uchar * pos,size_t len)7232 int write(uchar* pos, size_t len)
7233 {
7234 DBUG_ENTER("CacheWriter::write");
7235 if (first)
7236 write_header(pos, len);
7237 else
7238 write_data(pos, len);
7239
7240 remains -= len;
7241 if ((first= !remains))
7242 write_footer();
7243 DBUG_RETURN(0);
7244 }
7245 private:
7246 THD *thd;
7247 bool first;
7248 };
7249
7250 /*
7251 Write the contents of a cache to the binary log.
7252
7253 SYNOPSIS
7254 write_cache()
7255 thd Current_thread
7256 cache Cache to write to the binary log
7257
7258 DESCRIPTION
7259 Write the contents of the cache to the binary log. The cache will
7260 be reset as a READ_CACHE to be able to read the contents from it.
7261
7262 Reading from the trans cache with possible (per @c binlog_checksum_options)
7263 adding checksum value and then fixing the length and the end_log_pos of
7264 events prior to fill in the binlog cache.
7265 */
7266
write_cache(THD * thd,IO_CACHE * cache)7267 int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache)
7268 {
7269 DBUG_ENTER("MYSQL_BIN_LOG::write_cache");
7270
7271 mysql_mutex_assert_owner(&LOCK_log);
7272 if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
7273 DBUG_RETURN(ER_ERROR_ON_WRITE);
7274 size_t length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
7275 size_t val;
7276 size_t end_log_pos_inc= 0; // each event processed adds BINLOG_CHECKSUM_LEN 2 t
7277 uchar header[LOG_EVENT_HEADER_LEN];
7278 CacheWriter writer(thd, &log_file, binlog_checksum_options, &crypto);
7279
7280 if (crypto.scheme)
7281 {
7282 writer.ctx= alloca(crypto.ctx_size);
7283 writer.set_encrypted_writer();
7284 }
7285 // while there is just one alg the following must hold:
7286 DBUG_ASSERT(binlog_checksum_options == BINLOG_CHECKSUM_ALG_OFF ||
7287 binlog_checksum_options == BINLOG_CHECKSUM_ALG_CRC32);
7288
7289 /*
7290 The events in the buffer have incorrect end_log_pos data
7291 (relative to beginning of group rather than absolute),
7292 so we'll recalculate them in situ so the binlog is always
7293 correct, even in the middle of a group. This is possible
7294 because we now know the start position of the group (the
7295 offset of this cache in the log, if you will); all we need
7296 to do is to find all event-headers, and add the position of
7297 the group to the end_log_pos of each event. This is pretty
7298 straight forward, except that we read the cache in segments,
7299 so an event-header might end up on the cache-border and get
7300 split.
7301 */
7302
7303 group= (size_t)my_b_tell(&log_file);
7304 hdr_offs= carry= 0;
7305
7306 do
7307 {
7308 /*
7309 if we only got a partial header in the last iteration,
7310 get the other half now and process a full header.
7311 */
7312 if (unlikely(carry > 0))
7313 {
7314 DBUG_ASSERT(carry < LOG_EVENT_HEADER_LEN);
7315 size_t tail= LOG_EVENT_HEADER_LEN - carry;
7316
7317 /* assemble both halves */
7318 memcpy(&header[carry], (char *)cache->read_pos, tail);
7319
7320 uint32 len= uint4korr(header + EVENT_LEN_OFFSET);
7321 writer.remains= len;
7322
7323 /* fix end_log_pos */
7324 end_log_pos_inc += writer.checksum_len;
7325 val= uint4korr(header + LOG_POS_OFFSET) + group + end_log_pos_inc;
7326 int4store(header + LOG_POS_OFFSET, val);
7327
7328 /* fix len */
7329 len+= writer.checksum_len;
7330 int4store(header + EVENT_LEN_OFFSET, len);
7331
7332 if (writer.write(header, LOG_EVENT_HEADER_LEN))
7333 DBUG_RETURN(ER_ERROR_ON_WRITE);
7334
7335 cache->read_pos+= tail;
7336 length-= tail;
7337 carry= 0;
7338
7339 /* next event header at ... */
7340 hdr_offs= len - LOG_EVENT_HEADER_LEN - writer.checksum_len;
7341 }
7342
7343 /* if there is anything to write, process it. */
7344
7345 if (likely(length > 0))
7346 {
7347 DBUG_EXECUTE_IF("fail_binlog_write_1",
7348 errno= 28; DBUG_RETURN(ER_ERROR_ON_WRITE););
7349 /*
7350 process all event-headers in this (partial) cache.
7351 if next header is beyond current read-buffer,
7352 we'll get it later (though not necessarily in the
7353 very next iteration, just "eventually").
7354 */
7355
7356 if (hdr_offs >= length)
7357 {
7358 if (writer.write(cache->read_pos, length))
7359 DBUG_RETURN(ER_ERROR_ON_WRITE);
7360 }
7361
7362 while (hdr_offs < length)
7363 {
7364 /*
7365 finish off with remains of the last event that crawls
7366 from previous into the current buffer
7367 */
7368 if (writer.remains != 0)
7369 {
7370 if (writer.write(cache->read_pos, hdr_offs))
7371 DBUG_RETURN(ER_ERROR_ON_WRITE);
7372 }
7373
7374 /*
7375 partial header only? save what we can get, process once
7376 we get the rest.
7377 */
7378 if (hdr_offs + LOG_EVENT_HEADER_LEN > length)
7379 {
7380 carry= length - hdr_offs;
7381 memcpy(header, (char *)cache->read_pos + hdr_offs, carry);
7382 length= hdr_offs;
7383 }
7384 else
7385 {
7386 /* we've got a full event-header, and it came in one piece */
7387 uchar *ev= (uchar *)cache->read_pos + hdr_offs;
7388 uint ev_len= uint4korr(ev + EVENT_LEN_OFFSET); // netto len
7389 uchar *log_pos= ev + LOG_POS_OFFSET;
7390
7391 end_log_pos_inc += writer.checksum_len;
7392 /* fix end_log_pos */
7393 val= uint4korr(log_pos) + group + end_log_pos_inc;
7394 int4store(log_pos, val);
7395
7396 /* fix length */
7397 int4store(ev + EVENT_LEN_OFFSET, ev_len + writer.checksum_len);
7398
7399 writer.remains= ev_len;
7400 if (writer.write(ev, MY_MIN(ev_len, length - hdr_offs)))
7401 DBUG_RETURN(ER_ERROR_ON_WRITE);
7402
7403 /* next event header at ... */
7404 hdr_offs += ev_len; // incr by the netto len
7405
7406 DBUG_ASSERT(!writer.checksum_len || writer.remains == 0 || hdr_offs >= length);
7407 }
7408 }
7409
7410 /*
7411 Adjust hdr_offs. Note that it may still point beyond the segment
7412 read in the next iteration; if the current event is very long,
7413 it may take a couple of read-iterations (and subsequent adjustments
7414 of hdr_offs) for it to point into the then-current segment.
7415 If we have a split header (!carry), hdr_offs will be set at the
7416 beginning of the next iteration, overwriting the value we set here:
7417 */
7418 hdr_offs -= length;
7419 }
7420 } while ((length= my_b_fill(cache)));
7421
7422 DBUG_ASSERT(carry == 0);
7423 DBUG_ASSERT(!writer.checksum_len || writer.remains == 0);
7424
7425 DBUG_RETURN(0); // All OK
7426 }
7427
7428 /*
7429 Helper function to get the error code of the query to be binlogged.
7430 */
query_error_code(THD * thd,bool not_killed)7431 int query_error_code(THD *thd, bool not_killed)
7432 {
7433 int error;
7434
7435 if (not_killed || (killed_mask_hard(thd->killed) == KILL_BAD_DATA))
7436 {
7437 error= thd->is_error() ? thd->get_stmt_da()->sql_errno() : 0;
7438 if (!error)
7439 return error;
7440
7441 /* thd->get_get_stmt_da()->sql_errno() might be ER_SERVER_SHUTDOWN or
7442 ER_QUERY_INTERRUPTED, So here we need to make sure that error
7443 is not set to these errors when specified not_killed by the
7444 caller.
7445 */
7446 if (error == ER_SERVER_SHUTDOWN || error == ER_QUERY_INTERRUPTED ||
7447 error == ER_NEW_ABORTING_CONNECTION || error == ER_CONNECTION_KILLED)
7448 error= 0;
7449 }
7450 else
7451 {
7452 /* killed status for DELAYED INSERT thread should never be used */
7453 DBUG_ASSERT(!(thd->system_thread & SYSTEM_THREAD_DELAYED_INSERT));
7454 error= thd->killed_errno();
7455 }
7456
7457 return error;
7458 }
7459
7460
write_incident_already_locked(THD * thd)7461 bool MYSQL_BIN_LOG::write_incident_already_locked(THD *thd)
7462 {
7463 uint error= 0;
7464 DBUG_ENTER("MYSQL_BIN_LOG::write_incident_already_locked");
7465 Incident incident= INCIDENT_LOST_EVENTS;
7466 Incident_log_event ev(thd, incident, &write_error_msg);
7467
7468 if (likely(is_open()))
7469 {
7470 error= write_event(&ev);
7471 status_var_add(thd->status_var.binlog_bytes_written, ev.data_written);
7472 }
7473
7474 DBUG_RETURN(error);
7475 }
7476
7477
write_incident(THD * thd)7478 bool MYSQL_BIN_LOG::write_incident(THD *thd)
7479 {
7480 uint error= 0;
7481 my_off_t offset;
7482 bool check_purge= false;
7483 ulong prev_binlog_id;
7484 DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
7485
7486 mysql_mutex_lock(&LOCK_log);
7487 if (likely(is_open()))
7488 {
7489 prev_binlog_id= current_binlog_id;
7490 if (likely(!(error= write_incident_already_locked(thd))) &&
7491 likely(!(error= flush_and_sync(0))))
7492 {
7493 update_binlog_end_pos();
7494 if (unlikely((error= rotate(false, &check_purge))))
7495 check_purge= false;
7496 }
7497
7498 offset= my_b_tell(&log_file);
7499
7500 update_binlog_end_pos(offset);
7501
7502 /*
7503 Take mutex to protect against a reader seeing partial writes of 64-bit
7504 offset on 32-bit CPUs.
7505 */
7506 mysql_mutex_lock(&LOCK_commit_ordered);
7507 last_commit_pos_offset= offset;
7508 mysql_mutex_unlock(&LOCK_commit_ordered);
7509 mysql_mutex_unlock(&LOCK_log);
7510
7511 if (check_purge)
7512 checkpoint_and_purge(prev_binlog_id);
7513 }
7514 else
7515 {
7516 mysql_mutex_unlock(&LOCK_log);
7517 }
7518
7519 DBUG_RETURN(error);
7520 }
7521
7522 void
write_binlog_checkpoint_event_already_locked(const char * name_arg,uint len)7523 MYSQL_BIN_LOG::write_binlog_checkpoint_event_already_locked(const char *name_arg, uint len)
7524 {
7525 my_off_t offset;
7526 Binlog_checkpoint_log_event ev(name_arg, len);
7527 /*
7528 Note that we must sync the binlog checkpoint to disk.
7529 Otherwise a subsequent log purge could delete binlogs that XA recovery
7530 thinks are needed (even though they are not really).
7531 */
7532 if (!write_event(&ev) && !flush_and_sync(0))
7533 {
7534 update_binlog_end_pos();
7535 }
7536 else
7537 {
7538 /*
7539 If we fail to write the checkpoint event, something is probably really
7540 bad with the binlog. We complain in the error log.
7541
7542 Note that failure to write binlog checkpoint does not compromise the
7543 ability to do crash recovery - crash recovery will just have to scan a
7544 bit more of the binlog than strictly necessary.
7545 */
7546 sql_print_error("Failed to write binlog checkpoint event to binary log");
7547 }
7548
7549 offset= my_b_tell(&log_file);
7550
7551 update_binlog_end_pos(offset);
7552
7553 /*
7554 Take mutex to protect against a reader seeing partial writes of 64-bit
7555 offset on 32-bit CPUs.
7556 */
7557 mysql_mutex_lock(&LOCK_commit_ordered);
7558 last_commit_pos_offset= offset;
7559 mysql_mutex_unlock(&LOCK_commit_ordered);
7560 }
7561
7562
7563 /**
7564 Write a cached log entry to the binary log.
7565 - To support transaction over replication, we wrap the transaction
7566 with BEGIN/COMMIT or BEGIN/ROLLBACK in the binary log.
7567 We want to write a BEGIN/ROLLBACK block when a non-transactional table
7568 was updated in a transaction which was rolled back. This is to ensure
7569 that the same updates are run on the slave.
7570
7571 @param thd
7572 @param cache The cache to copy to the binlog
7573 @param commit_event The commit event to print after writing the
7574 contents of the cache.
7575 @param incident Defines if an incident event should be created to
7576 notify that some non-transactional changes did
7577 not get into the binlog.
7578
7579 @note
7580 We only come here if there is something in the cache.
7581 @note
7582 The thing in the cache is always a complete transaction.
7583 @note
7584 'cache' needs to be reinitialized after this functions returns.
7585 */
7586
7587 bool
write_transaction_to_binlog(THD * thd,binlog_cache_mngr * cache_mngr,Log_event * end_ev,bool all,bool using_stmt_cache,bool using_trx_cache)7588 MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd,
7589 binlog_cache_mngr *cache_mngr,
7590 Log_event *end_ev, bool all,
7591 bool using_stmt_cache,
7592 bool using_trx_cache)
7593 {
7594 group_commit_entry entry;
7595 Ha_trx_info *ha_info;
7596 DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog");
7597
7598 /*
7599 Control should not be allowed beyond this point in wsrep_emulate_bin_log
7600 mode. Also, do not write the cached updates to binlog if binary logging is
7601 disabled (log-bin/sql_log_bin).
7602 */
7603 if (wsrep_emulate_bin_log)
7604 {
7605 DBUG_RETURN(0);
7606 }
7607 else if (!(thd->variables.option_bits & OPTION_BIN_LOG))
7608 {
7609 cache_mngr->need_unlog= false;
7610 DBUG_RETURN(0);
7611 }
7612
7613 entry.thd= thd;
7614 entry.cache_mngr= cache_mngr;
7615 entry.error= 0;
7616 entry.all= all;
7617 entry.using_stmt_cache= using_stmt_cache;
7618 entry.using_trx_cache= using_trx_cache;
7619 entry.need_unlog= is_preparing_xa(thd);
7620 ha_info= all ? thd->transaction->all.ha_list : thd->transaction->stmt.ha_list;
7621
7622 for (; !entry.need_unlog && ha_info; ha_info= ha_info->next())
7623 {
7624 if (ha_info->is_started() && ha_info->ht() != binlog_hton &&
7625 !ha_info->ht()->commit_checkpoint_request)
7626 entry.need_unlog= true;
7627 break;
7628 }
7629
7630 entry.end_event= end_ev;
7631 if (cache_mngr->stmt_cache.has_incident() ||
7632 cache_mngr->trx_cache.has_incident())
7633 {
7634 Incident_log_event inc_ev(thd, INCIDENT_LOST_EVENTS, &write_error_msg);
7635 entry.incident_event= &inc_ev;
7636 DBUG_RETURN(write_transaction_to_binlog_events(&entry));
7637 }
7638 else
7639 {
7640 entry.incident_event= NULL;
7641 DBUG_RETURN(write_transaction_to_binlog_events(&entry));
7642 }
7643 }
7644
7645
7646 /*
7647 Put a transaction that is ready to commit in the group commit queue.
7648 The transaction is identified by the ENTRY object passed into this function.
7649
7650 To facilitate group commit for the binlog, we first queue up ourselves in
7651 this function. Then later the first thread to enter the queue waits for
7652 the LOCK_log mutex, and commits for everyone in the queue once it gets the
7653 lock. Any other threads in the queue just wait for the first one to finish
7654 the commit and wake them up. This way, all transactions in the queue get
7655 committed in a single disk operation.
7656
7657 The main work in this function is when the commit in one transaction has
7658 been marked to wait for the commit of another transaction to happen
7659 first. This is used to support in-order parallel replication, where
7660 transactions can execute out-of-order but need to be committed in-order with
7661 how they happened on the master. The waiting of one commit on another needs
7662 to be integrated with the group commit queue, to ensure that the waiting
7663 transaction can participate in the same group commit as the waited-for
7664 transaction.
7665
7666 So when we put a transaction in the queue, we check if there were other
7667 transactions already prepared to commit but just waiting for the first one
7668 to commit. If so, we add those to the queue as well, transitively for all
7669 waiters.
7670
7671 And if a transaction is marked to wait for a prior transaction, but that
7672 prior transaction is already queued for group commit, then we can queue the
7673 new transaction directly to participate in the group commit.
7674
7675 @retval < 0 Error
7676 @retval -2 WSREP error with commit ordering
7677 @retval -3 WSREP return code to mark the leader
7678 @retval > 0 If queued as the first entry in the queue (meaning this
7679 is the leader)
7680 @retval 0 Otherwise (queued as participant, leader handles the commit)
7681 */
7682
7683 int
queue_for_group_commit(group_commit_entry * orig_entry)7684 MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry)
7685 {
7686 group_commit_entry *entry, *orig_queue, *last;
7687 wait_for_commit *cur;
7688 wait_for_commit *wfc;
7689 bool backup_lock_released= 0;
7690 int result= 0;
7691 THD *thd= orig_entry->thd;
7692 DBUG_ENTER("MYSQL_BIN_LOG::queue_for_group_commit");
7693 DBUG_ASSERT(thd == current_thd);
7694
7695 /*
7696 Check if we need to wait for another transaction to commit before us.
7697
7698 It is safe to do a quick check without lock first in the case where we do
7699 not have to wait. But if the quick check shows we need to wait, we must do
7700 another safe check under lock, to avoid the race where the other
7701 transaction wakes us up between the check and the wait.
7702 */
7703 wfc= orig_entry->thd->wait_for_commit_ptr;
7704 orig_entry->queued_by_other= false;
7705 if (wfc && wfc->waitee.load(std::memory_order_acquire))
7706 {
7707 wait_for_commit *loc_waitee;
7708
7709 mysql_mutex_lock(&wfc->LOCK_wait_commit);
7710 /*
7711 Do an extra check here, this time safely under lock.
7712
7713 If waitee->commit_started is set, it means that the transaction we need
7714 to wait for has already queued up for group commit. In this case it is
7715 safe for us to queue up immediately as well, increasing the opprtunities
7716 for group commit. Because waitee has taken the LOCK_prepare_ordered
7717 before setting the flag, so there is no risk that we can queue ahead of
7718 it.
7719 */
7720 if ((loc_waitee= wfc->waitee.load(std::memory_order_relaxed)) &&
7721 !loc_waitee->commit_started)
7722 {
7723 PSI_stage_info old_stage;
7724
7725 /*
7726 Release MDL_BACKUP_COMMIT LOCK while waiting for other threads to
7727 commit.
7728 This is needed to avoid deadlock between the other threads (which not
7729 yet have the MDL_BACKUP_COMMIT_LOCK) and any threads using
7730 BACKUP LOCK BLOCK_COMMIT.
7731 */
7732 if (thd->backup_commit_lock && thd->backup_commit_lock->ticket &&
7733 !backup_lock_released)
7734 {
7735 backup_lock_released= 1;
7736 thd->mdl_context.release_lock(thd->backup_commit_lock->ticket);
7737 thd->backup_commit_lock->ticket= 0;
7738 }
7739
7740 /*
7741 By setting wfc->opaque_pointer to our own entry, we mark that we are
7742 ready to commit, but waiting for another transaction to commit before
7743 us.
7744
7745 This other transaction may then take over the commit process for us to
7746 get us included in its own group commit. If this happens, the
7747 queued_by_other flag is set.
7748
7749 Setting this flag may or may not be seen by the other thread, but we
7750 are safe in any case: The other thread will set queued_by_other under
7751 its LOCK_wait_commit, and we will not check queued_by_other only after
7752 we have been woken up.
7753 */
7754 wfc->opaque_pointer= orig_entry;
7755 DEBUG_SYNC(orig_entry->thd, "group_commit_waiting_for_prior");
7756 orig_entry->thd->ENTER_COND(&wfc->COND_wait_commit,
7757 &wfc->LOCK_wait_commit,
7758 &stage_waiting_for_prior_transaction_to_commit,
7759 &old_stage);
7760 while ((loc_waitee= wfc->waitee.load(std::memory_order_relaxed)) &&
7761 !orig_entry->thd->check_killed(1))
7762 mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit);
7763 wfc->opaque_pointer= NULL;
7764 DBUG_PRINT("info", ("After waiting for prior commit, queued_by_other=%d",
7765 orig_entry->queued_by_other));
7766
7767 if (loc_waitee)
7768 {
7769 /* Wait terminated due to kill. */
7770 mysql_mutex_lock(&loc_waitee->LOCK_wait_commit);
7771 if (loc_waitee->wakeup_subsequent_commits_running ||
7772 orig_entry->queued_by_other)
7773 {
7774 /* Our waitee is already waking us up, so ignore the kill. */
7775 mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
7776 do
7777 {
7778 mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit);
7779 } while (wfc->waitee.load(std::memory_order_relaxed));
7780 }
7781 else
7782 {
7783 /* We were killed, so remove us from the list of waitee. */
7784 wfc->remove_from_list(&loc_waitee->subsequent_commits_list);
7785 mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
7786 /*
7787 This is the thread clearing its own status, it is no longer on
7788 the list of waiters. So no memory barriers are needed here.
7789 */
7790 wfc->waitee.store(NULL, std::memory_order_relaxed);
7791
7792 orig_entry->thd->EXIT_COND(&old_stage);
7793 /* Interrupted by kill. */
7794 DEBUG_SYNC(orig_entry->thd, "group_commit_waiting_for_prior_killed");
7795 wfc->wakeup_error= orig_entry->thd->killed_errno();
7796 if (!wfc->wakeup_error)
7797 wfc->wakeup_error= ER_QUERY_INTERRUPTED;
7798 my_message(wfc->wakeup_error,
7799 ER_THD(orig_entry->thd, wfc->wakeup_error), MYF(0));
7800 result= -1;
7801 goto end;
7802 }
7803 }
7804 orig_entry->thd->EXIT_COND(&old_stage);
7805 }
7806 else
7807 mysql_mutex_unlock(&wfc->LOCK_wait_commit);
7808 }
7809 /*
7810 If the transaction we were waiting for has already put us into the group
7811 commit queue (and possibly already done the entire binlog commit for us),
7812 then there is nothing else to do.
7813 */
7814 if (orig_entry->queued_by_other)
7815 goto end;
7816
7817 if (wfc && wfc->wakeup_error)
7818 {
7819 my_error(ER_PRIOR_COMMIT_FAILED, MYF(0));
7820 result= -1;
7821 goto end;
7822 }
7823
7824 /* Now enqueue ourselves in the group commit queue. */
7825 DEBUG_SYNC(orig_entry->thd, "commit_before_enqueue");
7826 orig_entry->thd->clear_wakeup_ready();
7827 mysql_mutex_lock(&LOCK_prepare_ordered);
7828 orig_queue= group_commit_queue;
7829
7830 /*
7831 Iteratively process everything added to the queue, looking for waiters,
7832 and their waiters, and so on. If a waiter is ready to commit, we
7833 immediately add it to the queue, and mark it as queued_by_other.
7834
7835 This would be natural to do with recursion, but we want to avoid
7836 potentially unbounded recursion blowing the C stack, so we use the list
7837 approach instead.
7838
7839 We keep a list of the group_commit_entry of all the waiters that need to
7840 be processed. Initially this list contains only the entry passed into this
7841 function.
7842
7843 We process entries in the list one by one. The element currently being
7844 processed is pointed to by `entry`, and the element at the end of the list
7845 is pointed to by `last` (we do not use NULL to terminate the list).
7846
7847 As we process an entry, any waiters for that entry are added at the end of
7848 the list, to be processed in subsequent iterations. The the entry is added
7849 to the group_commit_queue. This continues until the list is exhausted,
7850 with all entries ever added eventually processed.
7851
7852 The end result is a breath-first traversal of the tree of waiters,
7853 re-using the `next' pointers of the group_commit_entry objects in place of
7854 extra stack space in a recursive traversal.
7855
7856 The temporary list linked through these `next' pointers is not used by the
7857 caller or any other function; it only exists while doing the iterative
7858 tree traversal. After, all the processed entries are linked into the
7859 group_commit_queue.
7860 */
7861
7862 cur= wfc;
7863 last= orig_entry;
7864 entry= orig_entry;
7865 for (;;)
7866 {
7867 group_commit_entry *next_entry;
7868
7869 if (entry->cache_mngr->using_xa)
7870 {
7871 DEBUG_SYNC(entry->thd, "commit_before_prepare_ordered");
7872 run_prepare_ordered(entry->thd, entry->all);
7873 DEBUG_SYNC(entry->thd, "commit_after_prepare_ordered");
7874 }
7875
7876 if (cur)
7877 {
7878 /*
7879 Now that we have taken LOCK_prepare_ordered and will queue up in the
7880 group commit queue, it is safe for following transactions to queue
7881 themselves. We will grab here any transaction that is now ready to
7882 queue up, but after that, more transactions may become ready while the
7883 leader is waiting to start the group commit. So set the flag
7884 `commit_started', so that later transactions can still participate in
7885 the group commit..
7886 */
7887 cur->commit_started= true;
7888
7889 /*
7890 Check if this transaction has other transaction waiting for it to
7891 commit.
7892
7893 If so, process the waiting transactions, and their waiters and so on,
7894 transitively.
7895 */
7896 if (cur->subsequent_commits_list)
7897 {
7898 wait_for_commit *waiter, **waiter_ptr;
7899
7900 mysql_mutex_lock(&cur->LOCK_wait_commit);
7901 /*
7902 Grab the list, now safely under lock, and process it if still
7903 non-empty.
7904 */
7905 waiter= cur->subsequent_commits_list;
7906 waiter_ptr= &cur->subsequent_commits_list;
7907 while (waiter)
7908 {
7909 wait_for_commit *next_waiter= waiter->next_subsequent_commit;
7910 group_commit_entry *entry2=
7911 (group_commit_entry *)waiter->opaque_pointer;
7912 if (entry2)
7913 {
7914 /*
7915 This is another transaction ready to be written to the binary
7916 log. We can put it into the queue directly, without needing a
7917 separate context switch to the other thread. We just set a flag
7918 so that the other thread will know when it wakes up that it was
7919 already processed.
7920
7921 So remove it from the list of our waiters, and instead put it at
7922 the end of the list to be processed in a subsequent iteration of
7923 the outer loop.
7924 */
7925 *waiter_ptr= next_waiter;
7926 entry2->queued_by_other= true;
7927 last->next= entry2;
7928 last= entry2;
7929 /*
7930 As a small optimisation, we do not actually need to set
7931 entry2->next to NULL, as we can use the pointer `last' to check
7932 for end-of-list.
7933 */
7934 }
7935 else
7936 {
7937 /*
7938 This transaction is not ready to participate in the group commit
7939 yet, so leave it in the waiter list. It might join the group
7940 commit later, if it completes soon enough to do so (it will see
7941 our wfc->commit_started flag set), or it might commit later in a
7942 later group commit.
7943 */
7944 waiter_ptr= &waiter->next_subsequent_commit;
7945 }
7946 waiter= next_waiter;
7947 }
7948 mysql_mutex_unlock(&cur->LOCK_wait_commit);
7949 }
7950 }
7951
7952 /*
7953 Handle the heuristics that if another transaction is waiting for this
7954 transaction (or if it does so later), then we want to trigger group
7955 commit immediately, without waiting for the binlog_commit_wait_usec
7956 timeout to expire.
7957 */
7958 entry->thd->waiting_on_group_commit= true;
7959
7960 /* Add the entry to the group commit queue. */
7961 next_entry= entry->next;
7962 entry->next= group_commit_queue;
7963 group_commit_queue= entry;
7964 if (entry == last)
7965 break;
7966 /*
7967 Move to the next entry in the flattened list of waiting transactions
7968 that still need to be processed transitively.
7969 */
7970 entry= next_entry;
7971 DBUG_ASSERT(entry != NULL);
7972 cur= entry->thd->wait_for_commit_ptr;
7973 }
7974
7975 result= orig_queue == NULL;
7976
7977 #ifdef WITH_WSREP
7978 if (wsrep_is_active(entry->thd) &&
7979 wsrep_run_commit_hook(entry->thd, entry->all))
7980 {
7981 /* Release commit order here */
7982 if (wsrep_ordered_commit(entry->thd, entry->all))
7983 result= -2;
7984
7985 /* return -3, if this is leader */
7986 if (orig_queue == NULL)
7987 result= -3;
7988 }
7989 else
7990 DBUG_ASSERT(result != -2 && result != -3);
7991 #endif /* WITH_WSREP */
7992
7993 if (opt_binlog_commit_wait_count > 0 && orig_queue != NULL)
7994 mysql_cond_signal(&COND_prepare_ordered);
7995 mysql_mutex_unlock(&LOCK_prepare_ordered);
7996 DEBUG_SYNC(orig_entry->thd, "commit_after_release_LOCK_prepare_ordered");
7997
7998 DBUG_PRINT("info", ("Queued for group commit as %s",
7999 (orig_queue == NULL) ? "leader" : "participant"));
8000
8001 end:
8002 if (backup_lock_released)
8003 thd->mdl_context.acquire_lock(thd->backup_commit_lock,
8004 thd->variables.lock_wait_timeout);
8005 DBUG_RETURN(result);
8006 }
8007
8008 bool
write_transaction_to_binlog_events(group_commit_entry * entry)8009 MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry)
8010 {
8011 int is_leader= queue_for_group_commit(entry);
8012 #ifdef WITH_WSREP
8013 /* commit order was released in queue_for_group_commit() call,
8014 here we check if wsrep_commit_ordered() failed or if we are leader */
8015 switch (is_leader)
8016 {
8017 case -2: /* wsrep_ordered_commit() has failed */
8018 DBUG_ASSERT(wsrep_is_active(entry->thd));
8019 DBUG_ASSERT(wsrep_run_commit_hook(entry->thd, entry->all));
8020 entry->thd->wakeup_subsequent_commits(1);
8021 return true;
8022 case -3: /* this is leader, wait for prior commit to
8023 complete. This establishes total order for group leaders
8024 */
8025 DBUG_ASSERT(wsrep_is_active(entry->thd));
8026 DBUG_ASSERT(wsrep_run_commit_hook(entry->thd, entry->all));
8027 if (entry->thd->wait_for_prior_commit())
8028 return true;
8029
8030 /* retain the correct is_leader value */
8031 is_leader= 1;
8032 break;
8033
8034 default: /* native MariaDB cases */
8035 break;
8036 }
8037 #endif /* WITH_WSREP */
8038
8039 /*
8040 The first in the queue handles group commit for all; the others just wait
8041 to be signalled when group commit is done.
8042 */
8043 if (is_leader < 0)
8044 return true; /* Error */
8045 else if (is_leader)
8046 trx_group_commit_leader(entry);
8047 else if (!entry->queued_by_other)
8048 {
8049 DEBUG_SYNC(entry->thd, "after_semisync_queue");
8050
8051 entry->thd->wait_for_wakeup_ready();
8052 }
8053 else
8054 {
8055 /*
8056 If we were queued by another prior commit, then we are woken up
8057 only when the leader has already completed the commit for us.
8058 So nothing to do here then.
8059 */
8060 }
8061
8062 if (!opt_optimize_thread_scheduling)
8063 {
8064 /* For the leader, trx_group_commit_leader() already took the lock. */
8065 if (!is_leader)
8066 mysql_mutex_lock(&LOCK_commit_ordered);
8067
8068 DEBUG_SYNC(entry->thd, "commit_loop_entry_commit_ordered");
8069 ++num_commits;
8070 if (entry->cache_mngr->using_xa && !entry->error)
8071 run_commit_ordered(entry->thd, entry->all);
8072
8073 group_commit_entry *next= entry->next;
8074 if (!next)
8075 {
8076 group_commit_queue_busy= FALSE;
8077 mysql_cond_signal(&COND_queue_busy);
8078 DEBUG_SYNC(entry->thd, "commit_after_group_run_commit_ordered");
8079 }
8080 mysql_mutex_unlock(&LOCK_commit_ordered);
8081 entry->thd->wakeup_subsequent_commits(entry->error);
8082
8083 if (next)
8084 {
8085 /*
8086 Wake up the next thread in the group commit.
8087
8088 The next thread can be waiting in two different ways, depending on
8089 whether it put itself in the queue, or if it was put in queue by us
8090 because it had to wait for us to commit first.
8091
8092 So execute the appropriate wakeup, identified by the queued_by_other
8093 field.
8094 */
8095 if (next->queued_by_other)
8096 next->thd->wait_for_commit_ptr->wakeup(entry->error);
8097 else
8098 next->thd->signal_wakeup_ready();
8099 }
8100 else
8101 {
8102 /*
8103 If we rotated the binlog, and if we are using the unoptimized thread
8104 scheduling where every thread runs its own commit_ordered(), then we
8105 must do the commit checkpoint and log purge here, after all
8106 commit_ordered() calls have finished, and locks have been released.
8107 */
8108 if (entry->check_purge)
8109 checkpoint_and_purge(entry->binlog_id);
8110 }
8111
8112 }
8113
8114 if (likely(!entry->error))
8115 return entry->thd->wait_for_prior_commit();
8116
8117 switch (entry->error)
8118 {
8119 case ER_ERROR_ON_WRITE:
8120 my_error(ER_ERROR_ON_WRITE, MYF(ME_ERROR_LOG), name, entry->commit_errno);
8121 break;
8122 case ER_ERROR_ON_READ:
8123 my_error(ER_ERROR_ON_READ, MYF(ME_ERROR_LOG),
8124 entry->error_cache->file_name, entry->commit_errno);
8125 break;
8126 default:
8127 /*
8128 There are not (and should not be) any errors thrown not covered above.
8129 But just in case one is added later without updating the above switch
8130 statement, include a catch-all.
8131 */
8132 my_printf_error(entry->error,
8133 "Error writing transaction to binary log: %d",
8134 MYF(ME_ERROR_LOG), entry->error);
8135 }
8136
8137 /*
8138 Since we return error, this transaction XID will not be committed, so
8139 we need to mark it as not needed for recovery (unlog() is not called
8140 for a transaction if log_xid() fails).
8141 */
8142 if (entry->cache_mngr->using_xa && entry->cache_mngr->xa_xid &&
8143 entry->cache_mngr->need_unlog)
8144 mark_xid_done(entry->cache_mngr->binlog_id, true);
8145
8146 return 1;
8147 }
8148
8149 /*
8150 Do binlog group commit as the lead thread.
8151
8152 This must be called when this statement/transaction is queued at the start of
8153 the group_commit_queue. It will wait to obtain the LOCK_log mutex, then group
8154 commit all the transactions in the queue (more may have entered while waiting
8155 for LOCK_log). After commit is done, all other threads in the queue will be
8156 signalled.
8157
8158 */
8159 void
trx_group_commit_leader(group_commit_entry * leader)8160 MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
8161 {
8162 uint xid_count= 0;
8163 my_off_t UNINIT_VAR(commit_offset);
8164 group_commit_entry *current, *last_in_queue;
8165 group_commit_entry *queue= NULL;
8166 bool check_purge= false;
8167 ulong UNINIT_VAR(binlog_id);
8168 uint64 commit_id;
8169 DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader");
8170
8171 {
8172 DBUG_EXECUTE_IF("inject_binlog_commit_before_get_LOCK_log",
8173 DBUG_ASSERT(!debug_sync_set_action(leader->thd, STRING_WITH_LEN
8174 ("commit_before_get_LOCK_log SIGNAL waiting WAIT_FOR cont TIMEOUT 1")));
8175 );
8176 /*
8177 Lock the LOCK_log(), and once we get it, collect any additional writes
8178 that queued up while we were waiting.
8179 */
8180 DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_log");
8181 mysql_mutex_lock(&LOCK_log);
8182 DEBUG_SYNC(leader->thd, "commit_after_get_LOCK_log");
8183
8184 mysql_mutex_lock(&LOCK_prepare_ordered);
8185 if (opt_binlog_commit_wait_count)
8186 wait_for_sufficient_commits();
8187 /*
8188 Note that wait_for_sufficient_commits() may have released and
8189 re-acquired the LOCK_log and LOCK_prepare_ordered if it needed to wait.
8190 */
8191 current= group_commit_queue;
8192 group_commit_queue= NULL;
8193 mysql_mutex_unlock(&LOCK_prepare_ordered);
8194 binlog_id= current_binlog_id;
8195
8196 /* As the queue is in reverse order of entering, reverse it. */
8197 last_in_queue= current;
8198 while (current)
8199 {
8200 group_commit_entry *next= current->next;
8201 /*
8202 Now that group commit is started, we can clear the flag; there is no
8203 longer any use in waiters on this commit trying to trigger it early.
8204 */
8205 current->thd->waiting_on_group_commit= false;
8206 current->next= queue;
8207 queue= current;
8208 current= next;
8209 }
8210 DBUG_ASSERT(leader == queue /* the leader should be first in queue */);
8211
8212 /* Now we have in queue the list of transactions to be committed in order. */
8213 }
8214
8215 DBUG_ASSERT(is_open());
8216 if (likely(is_open())) // Should always be true
8217 {
8218 commit_id= (last_in_queue == leader ? 0 : (uint64)leader->thd->query_id);
8219 DBUG_EXECUTE_IF("binlog_force_commit_id",
8220 {
8221 const LEX_CSTRING commit_name= { STRING_WITH_LEN("commit_id") };
8222 bool null_value;
8223 user_var_entry *entry=
8224 (user_var_entry*) my_hash_search(&leader->thd->user_vars,
8225 (uchar*) commit_name.str,
8226 commit_name.length);
8227 commit_id= entry->val_int(&null_value);
8228 });
8229 /*
8230 Commit every transaction in the queue.
8231
8232 Note that we are doing this in a different thread than the one running
8233 the transaction! So we are limited in the operations we can do. In
8234 particular, we cannot call my_error() on behalf of a transaction, as
8235 that obtains the THD from thread local storage. Instead, we must set
8236 current->error and let the thread do the error reporting itself once
8237 we wake it up.
8238 */
8239 for (current= queue; current != NULL; current= current->next)
8240 {
8241 set_current_thd(current->thd);
8242 binlog_cache_mngr *cache_mngr= current->cache_mngr;
8243
8244 /*
8245 We already checked before that at least one cache is non-empty; if both
8246 are empty we would have skipped calling into here.
8247 */
8248 DBUG_ASSERT(!cache_mngr->stmt_cache.empty() ||
8249 !cache_mngr->trx_cache.empty() ||
8250 current->thd->transaction->xid_state.is_explicit_XA());
8251
8252 if (unlikely((current->error= write_transaction_or_stmt(current,
8253 commit_id))))
8254 current->commit_errno= errno;
8255
8256 strmake_buf(cache_mngr->last_commit_pos_file, log_file_name);
8257 commit_offset= my_b_write_tell(&log_file);
8258 cache_mngr->last_commit_pos_offset= commit_offset;
8259 if ((cache_mngr->using_xa && cache_mngr->xa_xid) || current->need_unlog)
8260 {
8261 /*
8262 If all storage engines support commit_checkpoint_request(), then we
8263 do not need to keep track of when this XID is durably committed.
8264 Instead we will just ask the storage engine to durably commit all its
8265 XIDs when we rotate a binlog file.
8266 */
8267 if (current->need_unlog)
8268 {
8269 xid_count++;
8270 cache_mngr->need_unlog= true;
8271 cache_mngr->binlog_id= binlog_id;
8272 }
8273 else
8274 cache_mngr->need_unlog= false;
8275
8276 cache_mngr->delayed_error= false;
8277 }
8278 }
8279 set_current_thd(leader->thd);
8280
8281 bool synced= 0;
8282 if (unlikely(flush_and_sync(&synced)))
8283 {
8284 for (current= queue; current != NULL; current= current->next)
8285 {
8286 if (!current->error)
8287 {
8288 current->error= ER_ERROR_ON_WRITE;
8289 current->commit_errno= errno;
8290 current->error_cache= NULL;
8291 }
8292 }
8293 }
8294 else
8295 {
8296 bool any_error= false;
8297
8298 mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
8299 mysql_mutex_assert_owner(&LOCK_log);
8300 mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync);
8301 mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
8302
8303 for (current= queue; current != NULL; current= current->next)
8304 {
8305 #ifdef HAVE_REPLICATION
8306 if (likely(!current->error) &&
8307 unlikely(repl_semisync_master.
8308 report_binlog_update(current->thd,
8309 current->cache_mngr->
8310 last_commit_pos_file,
8311 current->cache_mngr->
8312 last_commit_pos_offset)))
8313 {
8314 current->error= ER_ERROR_ON_WRITE;
8315 current->commit_errno= -1;
8316 current->error_cache= NULL;
8317 any_error= true;
8318 }
8319 #endif
8320 }
8321
8322 /*
8323 update binlog_end_pos so it can be read by dump thread
8324 Note: must be _after_ the RUN_HOOK(after_flush) or else
8325 semi-sync might not have put the transaction into
8326 it's list before dump-thread tries to send it
8327 */
8328 update_binlog_end_pos(commit_offset);
8329
8330 if (unlikely(any_error))
8331 sql_print_error("Failed to run 'after_flush' hooks");
8332 }
8333
8334 /*
8335 If any commit_events are Xid_log_event, increase the number of pending
8336 XIDs in current binlog (it's decreased in ::unlog()). When the count in
8337 a (not active) binlog file reaches zero, we know that it is no longer
8338 needed in XA recovery, and we can log a new binlog checkpoint event.
8339 */
8340 if (xid_count > 0)
8341 {
8342 mark_xids_active(binlog_id, xid_count);
8343 }
8344
8345 if (rotate(false, &check_purge))
8346 {
8347 /*
8348 If we fail to rotate, which thread should get the error?
8349 We give the error to the leader, as any my_error() thrown inside
8350 rotate() will have been registered for the leader THD.
8351
8352 However we must not return error from here - that would cause
8353 ha_commit_trans() to abort and rollback the transaction, which would
8354 leave an inconsistent state with the transaction committed in the
8355 binlog but rolled back in the engine.
8356
8357 Instead set a flag so that we can return error later, from unlog(),
8358 when the transaction has been safely committed in the engine.
8359 */
8360 leader->cache_mngr->delayed_error= true;
8361 my_error(ER_ERROR_ON_WRITE, MYF(ME_ERROR_LOG), name, errno);
8362 check_purge= false;
8363 }
8364 /* In case of binlog rotate, update the correct current binlog offset. */
8365 commit_offset= my_b_write_tell(&log_file);
8366 }
8367
8368 DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_after_binlog_sync");
8369 mysql_mutex_lock(&LOCK_after_binlog_sync);
8370 /*
8371 We cannot unlock LOCK_log until we have locked LOCK_after_binlog_sync;
8372 otherwise scheduling could allow the next group commit to run ahead of us,
8373 messing up the order of commit_ordered() calls. But as soon as
8374 LOCK_after_binlog_sync is obtained, we can let the next group commit start.
8375 */
8376 mysql_mutex_unlock(&LOCK_log);
8377
8378 DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_log");
8379
8380 /*
8381 Loop through threads and run the binlog_sync hook
8382 */
8383 {
8384 mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
8385 mysql_mutex_assert_not_owner(&LOCK_log);
8386 mysql_mutex_assert_owner(&LOCK_after_binlog_sync);
8387 mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
8388
8389 bool first __attribute__((unused))= true;
8390 bool last __attribute__((unused));
8391 for (current= queue; current != NULL; current= current->next)
8392 {
8393 last= current->next == NULL;
8394 #ifdef HAVE_REPLICATION
8395 if (likely(!current->error))
8396 current->error=
8397 repl_semisync_master.wait_after_sync(current->cache_mngr->
8398 last_commit_pos_file,
8399 current->cache_mngr->
8400 last_commit_pos_offset);
8401 #endif
8402 first= false;
8403 }
8404 }
8405
8406 DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered");
8407
8408 mysql_mutex_lock(&LOCK_commit_ordered);
8409 DBUG_EXECUTE_IF("crash_before_engine_commit",
8410 {
8411 DBUG_SUICIDE();
8412 });
8413 last_commit_pos_offset= commit_offset;
8414
8415 /*
8416 Unlock LOCK_after_binlog_sync only *after* LOCK_commit_ordered has been
8417 acquired so that groups can not reorder for the different stages of
8418 the group commit procedure.
8419 */
8420 mysql_mutex_unlock(&LOCK_after_binlog_sync);
8421 DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_after_binlog_sync");
8422 ++num_group_commits;
8423
8424 if (!opt_optimize_thread_scheduling)
8425 {
8426 /*
8427 If we want to run commit_ordered() each in the transaction's own thread
8428 context, then we need to mark the queue reserved; we need to finish all
8429 threads in one group commit before the next group commit can be allowed
8430 to proceed, and we cannot unlock a simple pthreads mutex in a different
8431 thread from the one that locked it.
8432 */
8433
8434 while (group_commit_queue_busy)
8435 mysql_cond_wait(&COND_queue_busy, &LOCK_commit_ordered);
8436 group_commit_queue_busy= TRUE;
8437
8438 /*
8439 Set these so parent can run checkpoint_and_purge() in last thread.
8440 (When using optimized thread scheduling, we run checkpoint_and_purge()
8441 in this function, so parent does not need to and we need not set these
8442 values).
8443 */
8444 last_in_queue->check_purge= check_purge;
8445 last_in_queue->binlog_id= binlog_id;
8446
8447 /* Note that we return with LOCK_commit_ordered locked! */
8448 DBUG_VOID_RETURN;
8449 }
8450
8451 /*
8452 Wakeup each participant waiting for our group commit, first calling the
8453 commit_ordered() methods for any transactions doing 2-phase commit.
8454 */
8455 current= queue;
8456 while (current != NULL)
8457 {
8458 group_commit_entry *next;
8459
8460 DEBUG_SYNC(leader->thd, "commit_loop_entry_commit_ordered");
8461 ++num_commits;
8462 if (current->cache_mngr->using_xa && likely(!current->error) &&
8463 DBUG_EVALUATE_IF("skip_commit_ordered", 0, 1))
8464 run_commit_ordered(current->thd, current->all);
8465 current->thd->wakeup_subsequent_commits(current->error);
8466
8467 /*
8468 Careful not to access current->next after waking up the other thread! As
8469 it may change immediately after wakeup.
8470 */
8471 next= current->next;
8472 if (current != leader) // Don't wake up ourself
8473 {
8474 if (current->queued_by_other)
8475 current->thd->wait_for_commit_ptr->wakeup(current->error);
8476 else
8477 current->thd->signal_wakeup_ready();
8478 }
8479 current= next;
8480 }
8481 DEBUG_SYNC(leader->thd, "commit_after_group_run_commit_ordered");
8482 mysql_mutex_unlock(&LOCK_commit_ordered);
8483 DEBUG_SYNC(leader->thd, "commit_after_group_release_commit_ordered");
8484
8485 if (check_purge)
8486 checkpoint_and_purge(binlog_id);
8487
8488 DBUG_VOID_RETURN;
8489 }
8490
8491
8492 int
write_transaction_or_stmt(group_commit_entry * entry,uint64 commit_id)8493 MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry,
8494 uint64 commit_id)
8495 {
8496 binlog_cache_mngr *mngr= entry->cache_mngr;
8497 DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_or_stmt");
8498
8499 if (write_gtid_event(entry->thd, is_prepared_xa(entry->thd),
8500 entry->using_trx_cache, commit_id))
8501 DBUG_RETURN(ER_ERROR_ON_WRITE);
8502
8503 if (entry->using_stmt_cache && !mngr->stmt_cache.empty() &&
8504 write_cache(entry->thd, mngr->get_binlog_cache_log(FALSE)))
8505 {
8506 entry->error_cache= &mngr->stmt_cache.cache_log;
8507 DBUG_RETURN(ER_ERROR_ON_WRITE);
8508 }
8509
8510 if (entry->using_trx_cache && !mngr->trx_cache.empty())
8511 {
8512 DBUG_EXECUTE_IF("crash_before_writing_xid",
8513 {
8514 if ((write_cache(entry->thd,
8515 mngr->get_binlog_cache_log(TRUE))))
8516 DBUG_PRINT("info", ("error writing binlog cache"));
8517 else
8518 flush_and_sync(0);
8519
8520 DBUG_PRINT("info", ("crashing before writing xid"));
8521 DBUG_SUICIDE();
8522 });
8523
8524 if (write_cache(entry->thd, mngr->get_binlog_cache_log(TRUE)))
8525 {
8526 entry->error_cache= &mngr->trx_cache.cache_log;
8527 DBUG_RETURN(ER_ERROR_ON_WRITE);
8528 }
8529 }
8530
8531 DBUG_EXECUTE_IF("inject_error_writing_xid",
8532 {
8533 entry->error_cache= NULL;
8534 errno= 28;
8535 DBUG_RETURN(ER_ERROR_ON_WRITE);
8536 });
8537
8538 if (write_event(entry->end_event))
8539 {
8540 entry->error_cache= NULL;
8541 DBUG_RETURN(ER_ERROR_ON_WRITE);
8542 }
8543 status_var_add(entry->thd->status_var.binlog_bytes_written,
8544 entry->end_event->data_written);
8545
8546 if (entry->incident_event)
8547 {
8548 if (write_event(entry->incident_event))
8549 {
8550 entry->error_cache= NULL;
8551 DBUG_RETURN(ER_ERROR_ON_WRITE);
8552 }
8553 }
8554
8555 if (unlikely(mngr->get_binlog_cache_log(FALSE)->error))
8556 {
8557 entry->error_cache= &mngr->stmt_cache.cache_log;
8558 DBUG_RETURN(ER_ERROR_ON_WRITE);
8559 }
8560 if (unlikely(mngr->get_binlog_cache_log(TRUE)->error)) // Error on read
8561 {
8562 entry->error_cache= &mngr->trx_cache.cache_log;
8563 DBUG_RETURN(ER_ERROR_ON_WRITE);
8564 }
8565
8566 DBUG_RETURN(0);
8567 }
8568
8569
8570 /*
8571 Wait for sufficient commits to queue up for group commit, according to the
8572 values of binlog_commit_wait_count and binlog_commit_wait_usec.
8573
8574 Note that this function may release and re-acquire LOCK_log and
8575 LOCK_prepare_ordered if it needs to wait.
8576 */
8577
8578 void
wait_for_sufficient_commits()8579 MYSQL_BIN_LOG::wait_for_sufficient_commits()
8580 {
8581 size_t count;
8582 group_commit_entry *e;
8583 group_commit_entry *last_head;
8584 struct timespec wait_until;
8585
8586 mysql_mutex_assert_owner(&LOCK_log);
8587 mysql_mutex_assert_owner(&LOCK_prepare_ordered);
8588
8589 for (e= last_head= group_commit_queue, count= 0; e; e= e->next)
8590 {
8591 if (++count >= opt_binlog_commit_wait_count)
8592 {
8593 group_commit_trigger_count++;
8594 return;
8595 }
8596 if (unlikely(e->thd->has_waiter))
8597 {
8598 group_commit_trigger_lock_wait++;
8599 return;
8600 }
8601 }
8602
8603 mysql_mutex_unlock(&LOCK_log);
8604 set_timespec_nsec(wait_until, (ulonglong)1000*opt_binlog_commit_wait_usec);
8605
8606 for (;;)
8607 {
8608 int err;
8609 group_commit_entry *head;
8610
8611 err= mysql_cond_timedwait(&COND_prepare_ordered, &LOCK_prepare_ordered,
8612 &wait_until);
8613 if (err == ETIMEDOUT)
8614 {
8615 group_commit_trigger_timeout++;
8616 break;
8617 }
8618 if (unlikely(last_head->thd->has_waiter))
8619 {
8620 group_commit_trigger_lock_wait++;
8621 break;
8622 }
8623 head= group_commit_queue;
8624 for (e= head; e && e != last_head; e= e->next)
8625 {
8626 ++count;
8627 if (unlikely(e->thd->has_waiter))
8628 {
8629 group_commit_trigger_lock_wait++;
8630 goto after_loop;
8631 }
8632 }
8633 if (count >= opt_binlog_commit_wait_count)
8634 {
8635 group_commit_trigger_count++;
8636 break;
8637 }
8638 last_head= head;
8639 }
8640 after_loop:
8641
8642 /*
8643 We must not wait for LOCK_log while holding LOCK_prepare_ordered.
8644 LOCK_log can be held for long periods (eg. we do I/O under it), while
8645 LOCK_prepare_ordered must only be held for short periods.
8646
8647 In addition, waiting for LOCK_log while holding LOCK_prepare_ordered would
8648 violate locking order of LOCK_log-before-LOCK_prepare_ordered. This could
8649 cause SAFEMUTEX warnings (even if it cannot actually deadlock with current
8650 code, as there can be at most one group commit leader thread at a time).
8651
8652 So release and re-acquire LOCK_prepare_ordered if we need to wait for the
8653 LOCK_log.
8654 */
8655 if (mysql_mutex_trylock(&LOCK_log))
8656 {
8657 mysql_mutex_unlock(&LOCK_prepare_ordered);
8658 mysql_mutex_lock(&LOCK_log);
8659 mysql_mutex_lock(&LOCK_prepare_ordered);
8660 }
8661 }
8662
8663
8664 void
binlog_trigger_immediate_group_commit()8665 MYSQL_BIN_LOG::binlog_trigger_immediate_group_commit()
8666 {
8667 group_commit_entry *head;
8668 mysql_mutex_assert_owner(&LOCK_prepare_ordered);
8669 head= group_commit_queue;
8670 if (head)
8671 {
8672 head->thd->has_waiter= true;
8673 mysql_cond_signal(&COND_prepare_ordered);
8674 }
8675 }
8676
8677
8678 /*
8679 This function is called when a transaction T1 goes to wait for another
8680 transaction T2. It is used to cut short any binlog group commit delay from
8681 --binlog-commit-wait-count in the case where another transaction is stalled
8682 on the wait due to conflicting row locks.
8683
8684 If T2 is already ready to group commit, any waiting group commit will be
8685 signalled to proceed immediately. Otherwise, a flag will be set in T2, and
8686 when T2 later becomes ready, immediate group commit will be triggered.
8687 */
8688 void
binlog_report_wait_for(THD * thd1,THD * thd2)8689 binlog_report_wait_for(THD *thd1, THD *thd2)
8690 {
8691 if (opt_binlog_commit_wait_count == 0)
8692 return;
8693 mysql_mutex_lock(&LOCK_prepare_ordered);
8694 thd2->has_waiter= true;
8695 if (thd2->waiting_on_group_commit)
8696 mysql_bin_log.binlog_trigger_immediate_group_commit();
8697 mysql_mutex_unlock(&LOCK_prepare_ordered);
8698 }
8699
8700
8701 /**
8702 Wait until we get a signal that the relay log has been updated.
8703
8704 @param thd Thread variable
8705
8706 @note
8707 One must have a lock on LOCK_log before calling this function.
8708 This lock will be released before return! That's required by
8709 THD::enter_cond() (see NOTES in sql_class.h).
8710 */
8711
wait_for_update_relay_log(THD * thd)8712 void MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd)
8713 {
8714 PSI_stage_info old_stage;
8715 DBUG_ENTER("wait_for_update_relay_log");
8716
8717 mysql_mutex_assert_owner(&LOCK_log);
8718 thd->ENTER_COND(&COND_relay_log_updated, &LOCK_log,
8719 &stage_slave_has_read_all_relay_log,
8720 &old_stage);
8721 mysql_cond_wait(&COND_relay_log_updated, &LOCK_log);
8722 thd->EXIT_COND(&old_stage);
8723 DBUG_VOID_RETURN;
8724 }
8725
8726 /**
8727 Wait until we get a signal that the binary log has been updated.
8728 Applies to master only.
8729
8730 NOTES
8731 @param[in] thd a THD struct
8732 @param[in] timeout a pointer to a timespec;
8733 NULL means to wait w/o timeout.
8734 @retval 0 if got signalled on update
8735 @retval non-0 if wait timeout elapsed
8736 @note
8737 LOCK_log must be taken before calling this function.
8738 LOCK_log is being released while the thread is waiting.
8739 LOCK_log is released by the caller.
8740 */
8741
wait_for_update_binlog_end_pos(THD * thd,struct timespec * timeout)8742 int MYSQL_BIN_LOG::wait_for_update_binlog_end_pos(THD* thd,
8743 struct timespec *timeout)
8744 {
8745 int ret= 0;
8746 DBUG_ENTER("wait_for_update_binlog_end_pos");
8747
8748 thd_wait_begin(thd, THD_WAIT_BINLOG);
8749 mysql_mutex_assert_owner(get_binlog_end_pos_lock());
8750 if (!timeout)
8751 mysql_cond_wait(&COND_bin_log_updated, get_binlog_end_pos_lock());
8752 else
8753 ret= mysql_cond_timedwait(&COND_bin_log_updated, get_binlog_end_pos_lock(),
8754 timeout);
8755 thd_wait_end(thd);
8756 DBUG_RETURN(ret);
8757 }
8758
8759
8760 /**
8761 Close the log file.
8762
8763 @param exiting Bitmask for one or more of the following bits:
8764 - LOG_CLOSE_INDEX : if we should close the index file
8765 - LOG_CLOSE_TO_BE_OPENED : if we intend to call open
8766 at once after close.
8767 - LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
8768 - LOG_CLOSE_DELAYED_CLOSE : do not yet close the file and clear the
8769 LOG_EVENT_BINLOG_IN_USE_F flag
8770
8771 @note
8772 One can do an open on the object at once after doing a close.
8773 The internal structures are not freed until cleanup() is called
8774 */
8775
close(uint exiting)8776 void MYSQL_BIN_LOG::close(uint exiting)
8777 { // One can't set log_type here!
8778 bool failed_to_save_state= false;
8779 DBUG_ENTER("MYSQL_BIN_LOG::close");
8780 DBUG_PRINT("enter",("exiting: %d", (int) exiting));
8781
8782 mysql_mutex_assert_owner(&LOCK_log);
8783
8784 if (log_state == LOG_OPENED)
8785 {
8786 DBUG_ASSERT(log_type == LOG_BIN);
8787 #ifdef HAVE_REPLICATION
8788 if (exiting & LOG_CLOSE_STOP_EVENT)
8789 {
8790 Stop_log_event s;
8791 // the checksumming rule for relay-log case is similar to Rotate
8792 s.checksum_alg= is_relay_log ? relay_log_checksum_alg
8793 : (enum_binlog_checksum_alg)binlog_checksum_options;
8794 DBUG_ASSERT(!is_relay_log ||
8795 relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
8796 write_event(&s);
8797 bytes_written+= s.data_written;
8798 flush_io_cache(&log_file);
8799 update_binlog_end_pos();
8800
8801 /*
8802 When we shut down server, write out the binlog state to a separate
8803 file so we do not have to scan an entire binlog file to recover it
8804 at next server start.
8805
8806 Note that this must be written and synced to disk before marking the
8807 last binlog file as "not crashed".
8808 */
8809 if (!is_relay_log && write_state_to_file())
8810 {
8811 sql_print_error("Failed to save binlog GTID state during shutdown. "
8812 "Binlog will be marked as crashed, so that crash "
8813 "recovery can recover the state at next server "
8814 "startup.");
8815 /*
8816 Leave binlog file marked as crashed, so we can recover state by
8817 scanning it now that we failed to write out the state properly.
8818 */
8819 failed_to_save_state= true;
8820 }
8821 }
8822 #endif /* HAVE_REPLICATION */
8823
8824 /* don't pwrite in a file opened with O_APPEND - it doesn't work */
8825 if (log_file.type == WRITE_CACHE && !(exiting & LOG_CLOSE_DELAYED_CLOSE))
8826 {
8827 my_off_t org_position= mysql_file_tell(log_file.file, MYF(0));
8828 if (!failed_to_save_state)
8829 clear_inuse_flag_when_closing(log_file.file);
8830 /*
8831 Restore position so that anything we have in the IO_cache is written
8832 to the correct position.
8833 We need the seek here, as mysql_file_pwrite() is not guaranteed to keep the
8834 original position on system that doesn't support pwrite().
8835 */
8836 mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
8837 }
8838
8839 /* this will cleanup IO_CACHE, sync and close the file */
8840 MYSQL_LOG::close(exiting);
8841 }
8842
8843 /*
8844 The following test is needed even if is_open() is not set, as we may have
8845 called a not complete close earlier and the index file is still open.
8846 */
8847
8848 if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
8849 {
8850 end_io_cache(&index_file);
8851 if (unlikely(mysql_file_close(index_file.file, MYF(0)) < 0) &&
8852 ! write_error)
8853 {
8854 write_error= 1;
8855 sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), index_file_name, errno);
8856 }
8857 }
8858 log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
8859 my_free(name);
8860 name= NULL;
8861 DBUG_VOID_RETURN;
8862 }
8863
8864
8865 /*
8866 Clear the LOG_EVENT_BINLOG_IN_USE_F; this marks the binlog file as cleanly
8867 closed and not needing crash recovery.
8868 */
clear_inuse_flag_when_closing(File file)8869 void MYSQL_BIN_LOG::clear_inuse_flag_when_closing(File file)
8870 {
8871 my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
8872 uchar flags= 0; // clearing LOG_EVENT_BINLOG_IN_USE_F
8873 mysql_file_pwrite(file, &flags, 1, offset, MYF(0));
8874 }
8875
8876
set_max_size(ulong max_size_arg)8877 void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg)
8878 {
8879 /*
8880 We need to take locks, otherwise this may happen:
8881 new_file() is called, calls open(old_max_size), then before open() starts,
8882 set_max_size() sets max_size to max_size_arg, then open() starts and
8883 uses the old_max_size argument, so max_size_arg has been overwritten and
8884 it's like if the SET command was never run.
8885 */
8886 DBUG_ENTER("MYSQL_BIN_LOG::set_max_size");
8887 mysql_mutex_lock(&LOCK_log);
8888 if (is_open())
8889 max_size= max_size_arg;
8890 mysql_mutex_unlock(&LOCK_log);
8891 DBUG_VOID_RETURN;
8892 }
8893
8894
8895 /**
8896 Check if a string is a valid number.
8897
8898 @param str String to test
8899 @param res Store value here
8900 @param allow_wildcards Set to 1 if we should ignore '%' and '_'
8901
8902 @note
8903 For the moment the allow_wildcards argument is not used
8904 Should be move to some other file.
8905
8906 @retval
8907 1 String is a number
8908 @retval
8909 0 String is not a number
8910 */
8911
test_if_number(const char * str,ulong * res,bool allow_wildcards)8912 static bool test_if_number(const char *str, ulong *res, bool allow_wildcards)
8913 {
8914 int flag;
8915 const char *start;
8916 DBUG_ENTER("test_if_number");
8917
8918 flag=0; start=str;
8919 while (*str++ == ' ') ;
8920 if (*--str == '-' || *str == '+')
8921 str++;
8922 while (my_isdigit(files_charset_info,*str) ||
8923 (allow_wildcards && (*str == wild_many || *str == wild_one)))
8924 {
8925 flag=1;
8926 str++;
8927 }
8928 if (*str == '.')
8929 {
8930 for (str++ ;
8931 my_isdigit(files_charset_info,*str) ||
8932 (allow_wildcards && (*str == wild_many || *str == wild_one)) ;
8933 str++, flag=1) ;
8934 }
8935 if (*str != 0 || flag == 0)
8936 DBUG_RETURN(0);
8937 if (res)
8938 *res=atol(start);
8939 DBUG_RETURN(1); /* Number ok */
8940 } /* test_if_number */
8941
8942
sql_perror(const char * message)8943 void sql_perror(const char *message)
8944 {
8945 #if defined(_WIN32)
8946 char* buf;
8947 DWORD dw= GetLastError();
8948 if (FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
8949 FORMAT_MESSAGE_IGNORE_INSERTS, NULL, dw,
8950 MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&buf, 0, NULL ) > 0)
8951 {
8952 sql_print_error("%s: %s",message, buf);
8953 LocalFree((HLOCAL)buf);
8954 }
8955 else
8956 {
8957 sql_print_error("%s", message);
8958 }
8959 #elif defined(HAVE_STRERROR)
8960 sql_print_error("%s: %s",message, strerror(errno));
8961 #else
8962 perror(message);
8963 #endif
8964 }
8965
8966
8967 /*
8968 Change the file associated with two output streams. Used to
8969 redirect stdout and stderr to a file. The streams are reopened
8970 only for appending (writing at end of file).
8971 */
reopen_fstreams(const char * filename,FILE * outstream,FILE * errstream)8972 bool reopen_fstreams(const char *filename, FILE *outstream, FILE *errstream)
8973 {
8974 if ((outstream && !my_freopen(filename, "a", outstream)) ||
8975 (errstream && !my_freopen(filename, "a", errstream)))
8976 {
8977 my_error(ER_CANT_CREATE_FILE, MYF(0), filename, errno);
8978 return TRUE;
8979 }
8980
8981 /* The error stream must be unbuffered. */
8982 if (errstream)
8983 setbuf(errstream, NULL);
8984
8985 return FALSE;
8986 }
8987
8988
8989 /*
8990 Unfortunately, there seems to be no good way
8991 to restore the original streams upon failure.
8992 */
redirect_std_streams(const char * file)8993 static bool redirect_std_streams(const char *file)
8994 {
8995 if (reopen_fstreams(file, stdout, stderr))
8996 return TRUE;
8997
8998 setbuf(stderr, NULL);
8999 return FALSE;
9000 }
9001
9002
flush_error_log()9003 bool flush_error_log()
9004 {
9005 bool result= 0;
9006 if (opt_error_log)
9007 {
9008 mysql_mutex_lock(&LOCK_error_log);
9009 if (redirect_std_streams(log_error_file))
9010 result= 1;
9011 mysql_mutex_unlock(&LOCK_error_log);
9012 }
9013 return result;
9014 }
9015
9016 #ifdef _WIN32
9017 struct eventlog_source
9018 {
9019 HANDLE handle;
eventlog_sourceeventlog_source9020 eventlog_source()
9021 {
9022 setup_windows_event_source();
9023 handle = RegisterEventSource(NULL, "MariaDB");
9024 }
9025
~eventlog_sourceeventlog_source9026 ~eventlog_source()
9027 {
9028 if (handle)
9029 DeregisterEventSource(handle);
9030 }
9031 };
9032
9033 static eventlog_source eventlog;
9034
print_buffer_to_nt_eventlog(enum loglevel level,char * buff,size_t length,size_t buffLen)9035 static void print_buffer_to_nt_eventlog(enum loglevel level, char *buff,
9036 size_t length, size_t buffLen)
9037 {
9038 HANDLE event= eventlog.handle;
9039 char *buffptr= buff;
9040 DBUG_ENTER("print_buffer_to_nt_eventlog");
9041
9042 /* Add ending CR/LF's to string, overwrite last chars if necessary */
9043 strmov(buffptr+MY_MIN(length, buffLen-5), "\r\n\r\n");
9044
9045 if (event)
9046 {
9047 switch (level) {
9048 case ERROR_LEVEL:
9049 ReportEvent(event, EVENTLOG_ERROR_TYPE, 0, MSG_DEFAULT, NULL, 1, 0,
9050 (LPCSTR*)&buffptr, NULL);
9051 break;
9052 case WARNING_LEVEL:
9053 ReportEvent(event, EVENTLOG_WARNING_TYPE, 0, MSG_DEFAULT, NULL, 1, 0,
9054 (LPCSTR*) &buffptr, NULL);
9055 break;
9056 case INFORMATION_LEVEL:
9057 ReportEvent(event, EVENTLOG_INFORMATION_TYPE, 0, MSG_DEFAULT, NULL, 1,
9058 0, (LPCSTR*) &buffptr, NULL);
9059 break;
9060 }
9061 }
9062
9063 DBUG_VOID_RETURN;
9064 }
9065 #endif /* _WIN32 */
9066
9067
9068 #ifndef EMBEDDED_LIBRARY
print_buffer_to_file(enum loglevel level,const char * buffer,size_t length)9069 static void print_buffer_to_file(enum loglevel level, const char *buffer,
9070 size_t length)
9071 {
9072 time_t skr;
9073 struct tm tm_tmp;
9074 struct tm *start;
9075 THD *thd= 0;
9076 size_t tag_length= 0;
9077 char tag[NAME_LEN];
9078 DBUG_ENTER("print_buffer_to_file");
9079 DBUG_PRINT("enter",("buffer: %s", buffer));
9080
9081 if (mysqld_server_initialized && (thd= current_thd))
9082 {
9083 if (thd->connection_name.length)
9084 {
9085 /*
9086 Add tag for slaves so that the user can see from which connection
9087 the error originates.
9088 */
9089 tag_length= my_snprintf(tag, sizeof(tag),
9090 ER_THD(thd, ER_MASTER_LOG_PREFIX),
9091 (int) thd->connection_name.length,
9092 thd->connection_name.str);
9093 }
9094 }
9095
9096 mysql_mutex_lock(&LOCK_error_log);
9097
9098 skr= my_time(0);
9099 localtime_r(&skr, &tm_tmp);
9100 start=&tm_tmp;
9101
9102 fprintf(stderr, "%d-%02d-%02d %2d:%02d:%02d %lu [%s] %.*s%.*s\n",
9103 start->tm_year + 1900,
9104 start->tm_mon+1,
9105 start->tm_mday,
9106 start->tm_hour,
9107 start->tm_min,
9108 start->tm_sec,
9109 (unsigned long) (thd ? thd->thread_id : 0),
9110 (level == ERROR_LEVEL ? "ERROR" : level == WARNING_LEVEL ?
9111 "Warning" : "Note"),
9112 (int) tag_length, tag,
9113 (int) length, buffer);
9114
9115 fflush(stderr);
9116
9117 mysql_mutex_unlock(&LOCK_error_log);
9118 DBUG_VOID_RETURN;
9119 }
9120
9121 /**
9122 Prints a printf style message to the error log and, under NT, to the
9123 Windows event log.
9124
9125 This function prints the message into a buffer and then sends that buffer
9126 to other functions to write that message to other logging sources.
9127
9128 @param level The level of the msg significance
9129 @param format Printf style format of message
9130 @param args va_list list of arguments for the message
9131
9132 @returns
9133 The function always returns 0. The return value is present in the
9134 signature to be compatible with other logging routines, which could
9135 return an error (e.g. logging to the log tables)
9136 */
vprint_msg_to_log(enum loglevel level,const char * format,va_list args)9137 int vprint_msg_to_log(enum loglevel level, const char *format, va_list args)
9138 {
9139 char buff[1024];
9140 size_t length;
9141 DBUG_ENTER("vprint_msg_to_log");
9142
9143 length= my_vsnprintf(buff, sizeof(buff), format, args);
9144 print_buffer_to_file(level, buff, length);
9145
9146 #ifdef _WIN32
9147 print_buffer_to_nt_eventlog(level, buff, length, sizeof(buff));
9148 #endif
9149
9150 DBUG_RETURN(0);
9151 }
9152 #endif /* EMBEDDED_LIBRARY */
9153
9154
sql_print_error(const char * format,...)9155 void sql_print_error(const char *format, ...)
9156 {
9157 va_list args;
9158 DBUG_ENTER("sql_print_error");
9159
9160 va_start(args, format);
9161 error_log_print(ERROR_LEVEL, format, args);
9162 va_end(args);
9163
9164 DBUG_VOID_RETURN;
9165 }
9166
9167
sql_print_warning(const char * format,...)9168 void sql_print_warning(const char *format, ...)
9169 {
9170 va_list args;
9171 DBUG_ENTER("sql_print_warning");
9172
9173 va_start(args, format);
9174 error_log_print(WARNING_LEVEL, format, args);
9175 va_end(args);
9176
9177 DBUG_VOID_RETURN;
9178 }
9179
9180
sql_print_information(const char * format,...)9181 void sql_print_information(const char *format, ...)
9182 {
9183 va_list args;
9184 DBUG_ENTER("sql_print_information");
9185
9186 va_start(args, format);
9187 sql_print_information_v(format, args);
9188 va_end(args);
9189
9190 DBUG_VOID_RETURN;
9191 }
9192
sql_print_information_v(const char * format,va_list ap)9193 void sql_print_information_v(const char *format, va_list ap)
9194 {
9195 if (disable_log_notes)
9196 return; // Skip notes during start/shutdown
9197
9198 error_log_print(INFORMATION_LEVEL, format, ap);
9199 }
9200
9201 void
run_prepare_ordered(THD * thd,bool all)9202 TC_LOG::run_prepare_ordered(THD *thd, bool all)
9203 {
9204 Ha_trx_info *ha_info=
9205 all ? thd->transaction->all.ha_list : thd->transaction->stmt.ha_list;
9206
9207 mysql_mutex_assert_owner(&LOCK_prepare_ordered);
9208 for (; ha_info; ha_info= ha_info->next())
9209 {
9210 handlerton *ht= ha_info->ht();
9211 if (!ht->prepare_ordered)
9212 continue;
9213 ht->prepare_ordered(ht, thd, all);
9214 }
9215 }
9216
9217
9218 void
run_commit_ordered(THD * thd,bool all)9219 TC_LOG::run_commit_ordered(THD *thd, bool all)
9220 {
9221 Ha_trx_info *ha_info=
9222 all ? thd->transaction->all.ha_list : thd->transaction->stmt.ha_list;
9223
9224 mysql_mutex_assert_owner(&LOCK_commit_ordered);
9225 for (; ha_info; ha_info= ha_info->next())
9226 {
9227 handlerton *ht= ha_info->ht();
9228 if (!ht->commit_ordered)
9229 continue;
9230 ht->commit_ordered(ht, thd, all);
9231 DEBUG_SYNC(thd, "commit_after_run_commit_ordered");
9232 }
9233 }
9234
9235
log_and_order(THD * thd,my_xid xid,bool all,bool need_prepare_ordered,bool need_commit_ordered)9236 int TC_LOG_MMAP::log_and_order(THD *thd, my_xid xid, bool all,
9237 bool need_prepare_ordered,
9238 bool need_commit_ordered)
9239 {
9240 int cookie;
9241 struct commit_entry entry;
9242 bool UNINIT_VAR(is_group_commit_leader);
9243
9244 if (need_prepare_ordered)
9245 {
9246 mysql_mutex_lock(&LOCK_prepare_ordered);
9247 run_prepare_ordered(thd, all);
9248 if (need_commit_ordered)
9249 {
9250 /*
9251 Must put us in queue so we can run_commit_ordered() in same sequence
9252 as we did run_prepare_ordered().
9253 */
9254 thd->clear_wakeup_ready();
9255 entry.thd= thd;
9256 commit_entry *previous_queue= commit_ordered_queue;
9257 entry.next= previous_queue;
9258 commit_ordered_queue= &entry;
9259 is_group_commit_leader= (previous_queue == NULL);
9260 }
9261 mysql_mutex_unlock(&LOCK_prepare_ordered);
9262 }
9263
9264 if (thd->wait_for_prior_commit())
9265 return 0;
9266
9267 cookie= 0;
9268 if (xid)
9269 cookie= log_one_transaction(xid);
9270
9271 if (need_commit_ordered)
9272 {
9273 if (need_prepare_ordered)
9274 {
9275 /*
9276 We did the run_prepare_ordered() serialised, then ran the log_xid() in
9277 parallel. Now we have to do run_commit_ordered() serialised in the
9278 same sequence as run_prepare_ordered().
9279
9280 We do this starting from the head of the queue, each thread doing
9281 run_commit_ordered() and signalling the next in queue.
9282 */
9283 if (is_group_commit_leader)
9284 {
9285 /* The first in queue starts the ball rolling. */
9286 mysql_mutex_lock(&LOCK_prepare_ordered);
9287 while (commit_ordered_queue_busy)
9288 mysql_cond_wait(&COND_queue_busy, &LOCK_prepare_ordered);
9289 commit_entry *queue= commit_ordered_queue;
9290 commit_ordered_queue= NULL;
9291 /*
9292 Mark the queue busy while we bounce it from one thread to the
9293 next.
9294 */
9295 commit_ordered_queue_busy= true;
9296 mysql_mutex_unlock(&LOCK_prepare_ordered);
9297
9298 /* Reverse the queue list so we get correct order. */
9299 commit_entry *prev= NULL;
9300 while (queue)
9301 {
9302 commit_entry *next= queue->next;
9303 queue->next= prev;
9304 prev= queue;
9305 queue= next;
9306 }
9307 DBUG_ASSERT(prev == &entry);
9308 DBUG_ASSERT(prev->thd == thd);
9309 }
9310 else
9311 {
9312 /* Not first in queue; just wait until previous thread wakes us up. */
9313 thd->wait_for_wakeup_ready();
9314 }
9315 }
9316
9317 /* Only run commit_ordered() if log_xid was successful. */
9318 if (cookie)
9319 {
9320 mysql_mutex_lock(&LOCK_commit_ordered);
9321 run_commit_ordered(thd, all);
9322 mysql_mutex_unlock(&LOCK_commit_ordered);
9323 }
9324
9325 if (need_prepare_ordered)
9326 {
9327 commit_entry *next= entry.next;
9328 if (next)
9329 {
9330 next->thd->signal_wakeup_ready();
9331 }
9332 else
9333 {
9334 mysql_mutex_lock(&LOCK_prepare_ordered);
9335 commit_ordered_queue_busy= false;
9336 mysql_cond_signal(&COND_queue_busy);
9337 mysql_mutex_unlock(&LOCK_prepare_ordered);
9338 }
9339 }
9340 }
9341
9342 return cookie;
9343 }
9344
9345
9346 /********* transaction coordinator log for 2pc - mmap() based solution *******/
9347
9348 /*
9349 the log consists of a file, mapped to memory.
9350 file is divided into pages of tc_log_page_size size.
9351 (usable size of the first page is smaller because of the log header)
9352 there is a PAGE control structure for each page
9353 each page (or rather its PAGE control structure) can be in one of
9354 the three states - active, syncing, pool.
9355 there could be only one page in the active or syncing state,
9356 but many in pool - pool is a fifo queue.
9357 the usual lifecycle of a page is pool->active->syncing->pool.
9358 the "active" page is a page where new xid's are logged.
9359 the page stays active as long as the syncing slot is taken.
9360 the "syncing" page is being synced to disk. no new xid can be added to it.
9361 when the syncing is done the page is moved to a pool and an active page
9362 becomes "syncing".
9363
9364 the result of such an architecture is a natural "commit grouping" -
9365 If commits are coming faster than the system can sync, they do not
9366 stall. Instead, all commits that came since the last sync are
9367 logged to the same "active" page, and they all are synced with the next -
9368 one - sync. Thus, thought individual commits are delayed, throughput
9369 is not decreasing.
9370
9371 when an xid is added to an active page, the thread of this xid waits
9372 for a page's condition until the page is synced. when syncing slot
9373 becomes vacant one of these waiters is awaken to take care of syncing.
9374 it syncs the page and signals all waiters that the page is synced.
9375 PAGE::waiters is used to count these waiters, and a page may never
9376 become active again until waiters==0 (that is all waiters from the
9377 previous sync have noticed that the sync was completed)
9378
9379 note, that the page becomes "dirty" and has to be synced only when a
9380 new xid is added into it. Removing a xid from a page does not make it
9381 dirty - we don't sync xid removals to disk.
9382 */
9383
9384 ulong tc_log_page_waits= 0;
9385
9386 #ifdef HAVE_MMAP
9387
9388 #define TC_LOG_HEADER_SIZE (sizeof(tc_log_magic)+1)
9389
9390 static const uchar tc_log_magic[]={(uchar) 254, 0x23, 0x05, 0x74};
9391
9392 ulong opt_tc_log_size;
9393 ulong tc_log_max_pages_used=0, tc_log_page_size=0, tc_log_cur_pages_used=0;
9394
open(const char * opt_name)9395 int TC_LOG_MMAP::open(const char *opt_name)
9396 {
9397 uint i;
9398 bool crashed=FALSE;
9399 PAGE *pg;
9400
9401 DBUG_ASSERT(total_ha_2pc > 1);
9402 DBUG_ASSERT(opt_name);
9403 DBUG_ASSERT(opt_name[0]);
9404
9405 tc_log_page_size= my_getpagesize();
9406
9407 fn_format(logname,opt_name,mysql_data_home,"",MY_UNPACK_FILENAME);
9408 if ((fd= mysql_file_open(key_file_tclog, logname, O_RDWR | O_CLOEXEC, MYF(0))) < 0)
9409 {
9410 if (my_errno != ENOENT)
9411 goto err;
9412 if (using_heuristic_recover())
9413 return 1;
9414 if ((fd= mysql_file_create(key_file_tclog, logname, CREATE_MODE,
9415 O_RDWR | O_CLOEXEC, MYF(MY_WME))) < 0)
9416 goto err;
9417 inited=1;
9418 file_length= opt_tc_log_size;
9419 if (mysql_file_chsize(fd, file_length, 0, MYF(MY_WME)))
9420 goto err;
9421 }
9422 else
9423 {
9424 inited= 1;
9425 crashed= TRUE;
9426 sql_print_information("Recovering after a crash using %s", opt_name);
9427 if (tc_heuristic_recover)
9428 {
9429 sql_print_error("Cannot perform automatic crash recovery when "
9430 "--tc-heuristic-recover is used");
9431 goto err;
9432 }
9433 file_length= mysql_file_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME+MY_FAE));
9434 if (file_length == MY_FILEPOS_ERROR || file_length % tc_log_page_size)
9435 goto err;
9436 }
9437
9438 data= (uchar *)my_mmap(0, (size_t)file_length, PROT_READ|PROT_WRITE,
9439 MAP_NOSYNC|MAP_SHARED, fd, 0);
9440 if (data == MAP_FAILED)
9441 {
9442 my_errno=errno;
9443 goto err;
9444 }
9445 inited=2;
9446
9447 npages=(uint)file_length/tc_log_page_size;
9448 if (npages < 3) // to guarantee non-empty pool
9449 goto err;
9450 if (!(pages=(PAGE *)my_malloc(key_memory_TC_LOG_MMAP_pages,
9451 npages*sizeof(PAGE), MYF(MY_WME|MY_ZEROFILL))))
9452 goto err;
9453 inited=3;
9454 for (pg=pages, i=0; i < npages; i++, pg++)
9455 {
9456 pg->next=pg+1;
9457 pg->waiters=0;
9458 pg->state=PS_POOL;
9459 mysql_mutex_init(key_PAGE_lock, &pg->lock, MY_MUTEX_INIT_FAST);
9460 mysql_cond_init(key_PAGE_cond, &pg->cond, 0);
9461 pg->ptr= pg->start=(my_xid *)(data + i*tc_log_page_size);
9462 pg->size=pg->free=tc_log_page_size/sizeof(my_xid);
9463 pg->end=pg->start + pg->size;
9464 }
9465 pages[0].size=pages[0].free=
9466 (tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid);
9467 pages[0].start=pages[0].end-pages[0].size;
9468 pages[npages-1].next=0;
9469 inited=4;
9470
9471 if (crashed && recover())
9472 goto err;
9473
9474 memcpy(data, tc_log_magic, sizeof(tc_log_magic));
9475 data[sizeof(tc_log_magic)]= (uchar)total_ha_2pc;
9476 my_msync(fd, data, tc_log_page_size, MS_SYNC);
9477 inited=5;
9478
9479 mysql_mutex_init(key_LOCK_sync, &LOCK_sync, MY_MUTEX_INIT_FAST);
9480 mysql_mutex_init(key_LOCK_active, &LOCK_active, MY_MUTEX_INIT_FAST);
9481 mysql_mutex_init(key_LOCK_pool, &LOCK_pool, MY_MUTEX_INIT_FAST);
9482 mysql_mutex_init(key_LOCK_pending_checkpoint, &LOCK_pending_checkpoint,
9483 MY_MUTEX_INIT_FAST);
9484 mysql_cond_init(key_COND_active, &COND_active, 0);
9485 mysql_cond_init(key_COND_pool, &COND_pool, 0);
9486 mysql_cond_init(key_TC_LOG_MMAP_COND_queue_busy, &COND_queue_busy, 0);
9487
9488 inited=6;
9489
9490 syncing= 0;
9491 active=pages;
9492 DBUG_ASSERT(npages >= 2);
9493 pool=pages+1;
9494 pool_last_ptr= &((pages+npages-1)->next);
9495 commit_ordered_queue= NULL;
9496 commit_ordered_queue_busy= false;
9497
9498 return 0;
9499
9500 err:
9501 close();
9502 return 1;
9503 }
9504
9505 /**
9506 there is no active page, let's got one from the pool.
9507
9508 Two strategies here:
9509 -# take the first from the pool
9510 -# if there're waiters - take the one with the most free space.
9511
9512 @todo
9513 page merging. try to allocate adjacent page first,
9514 so that they can be flushed both in one sync
9515 */
9516
get_active_from_pool()9517 void TC_LOG_MMAP::get_active_from_pool()
9518 {
9519 PAGE **p, **best_p=0;
9520 int best_free;
9521
9522 mysql_mutex_lock(&LOCK_pool);
9523
9524 do
9525 {
9526 best_p= p= &pool;
9527 if ((*p)->waiters == 0 && (*p)->free > 0) // can the first page be used ?
9528 break; // yes - take it.
9529
9530 best_free=0; // no - trying second strategy
9531 for (p=&(*p)->next; *p; p=&(*p)->next)
9532 {
9533 if ((*p)->waiters == 0 && (*p)->free > best_free)
9534 {
9535 best_free=(*p)->free;
9536 best_p=p;
9537 }
9538 }
9539 }
9540 while ((*best_p == 0 || best_free == 0) && overflow());
9541
9542 mysql_mutex_assert_owner(&LOCK_active);
9543 active=*best_p;
9544
9545 /* Unlink the page from the pool. */
9546 if (!(*best_p)->next)
9547 pool_last_ptr= best_p;
9548 *best_p=(*best_p)->next;
9549 mysql_mutex_unlock(&LOCK_pool);
9550
9551 mysql_mutex_lock(&active->lock);
9552 if (active->free == active->size) // we've chosen an empty page
9553 {
9554 tc_log_cur_pages_used++;
9555 set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used);
9556 }
9557 }
9558
9559 /**
9560 @todo
9561 perhaps, increase log size ?
9562 */
overflow()9563 int TC_LOG_MMAP::overflow()
9564 {
9565 /*
9566 simple overflow handling - just wait
9567 TODO perhaps, increase log size ?
9568 let's check the behaviour of tc_log_page_waits first
9569 */
9570 tc_log_page_waits++;
9571 mysql_cond_wait(&COND_pool, &LOCK_pool);
9572 return 1; // always return 1
9573 }
9574
9575 /**
9576 Record that transaction XID is committed on the persistent storage.
9577
9578 This function is called in the middle of two-phase commit:
9579 First all resources prepare the transaction, then tc_log->log() is called,
9580 then all resources commit the transaction, then tc_log->unlog() is called.
9581
9582 All access to active page is serialized but it's not a problem, as
9583 we're assuming that fsync() will be a main bottleneck.
9584 That is, parallelizing writes to log pages we'll decrease number of
9585 threads waiting for a page, but then all these threads will be waiting
9586 for a fsync() anyway
9587
9588 If tc_log == MYSQL_LOG then tc_log writes transaction to binlog and
9589 records XID in a special Xid_log_event.
9590 If tc_log = TC_LOG_MMAP then xid is written in a special memory-mapped
9591 log.
9592
9593 @retval
9594 0 - error
9595 @retval
9596 \# - otherwise, "cookie", a number that will be passed as an argument
9597 to unlog() call. tc_log can define it any way it wants,
9598 and use for whatever purposes. TC_LOG_MMAP sets it
9599 to the position in memory where xid was logged to.
9600 */
9601
log_one_transaction(my_xid xid)9602 int TC_LOG_MMAP::log_one_transaction(my_xid xid)
9603 {
9604 int err;
9605 PAGE *p;
9606 ulong cookie;
9607
9608 mysql_mutex_lock(&LOCK_active);
9609
9610 /*
9611 if the active page is full - just wait...
9612 frankly speaking, active->free here accessed outside of mutex
9613 protection, but it's safe, because it only means we may miss an
9614 unlog() for the active page, and we're not waiting for it here -
9615 unlog() does not signal COND_active.
9616 */
9617 while (unlikely(active && active->free == 0))
9618 mysql_cond_wait(&COND_active, &LOCK_active);
9619
9620 /* no active page ? take one from the pool */
9621 if (active == 0)
9622 get_active_from_pool();
9623 else
9624 mysql_mutex_lock(&active->lock);
9625
9626 p=active;
9627
9628 /*
9629 p->free is always > 0 here because to decrease it one needs
9630 to take p->lock and before it one needs to take LOCK_active.
9631 But checked that active->free > 0 under LOCK_active and
9632 haven't release it ever since
9633 */
9634
9635 /* searching for an empty slot */
9636 while (*p->ptr)
9637 {
9638 p->ptr++;
9639 DBUG_ASSERT(p->ptr < p->end); // because p->free > 0
9640 }
9641
9642 /* found! store xid there and mark the page dirty */
9643 cookie= (ulong)((uchar *)p->ptr - data); // can never be zero
9644 *p->ptr++= xid;
9645 p->free--;
9646 p->state= PS_DIRTY;
9647 mysql_mutex_unlock(&p->lock);
9648
9649 mysql_mutex_lock(&LOCK_sync);
9650 if (syncing)
9651 { // somebody's syncing. let's wait
9652 mysql_mutex_unlock(&LOCK_active);
9653 mysql_mutex_lock(&p->lock);
9654 p->waiters++;
9655 while (p->state == PS_DIRTY && syncing)
9656 {
9657 mysql_mutex_unlock(&p->lock);
9658 mysql_cond_wait(&p->cond, &LOCK_sync);
9659 mysql_mutex_lock(&p->lock);
9660 }
9661 p->waiters--;
9662 err= p->state == PS_ERROR;
9663 if (p->state != PS_DIRTY) // page was synced
9664 {
9665 mysql_mutex_unlock(&LOCK_sync);
9666 if (p->waiters == 0)
9667 mysql_cond_signal(&COND_pool); // in case somebody's waiting
9668 mysql_mutex_unlock(&p->lock);
9669 goto done; // we're done
9670 }
9671 DBUG_ASSERT(!syncing);
9672 mysql_mutex_unlock(&p->lock);
9673 syncing = p;
9674 mysql_mutex_unlock(&LOCK_sync);
9675
9676 mysql_mutex_lock(&LOCK_active);
9677 active=0; // page is not active anymore
9678 mysql_cond_broadcast(&COND_active);
9679 mysql_mutex_unlock(&LOCK_active);
9680 }
9681 else
9682 {
9683 syncing = p; // place is vacant - take it
9684 mysql_mutex_unlock(&LOCK_sync);
9685 active = 0; // page is not active anymore
9686 mysql_cond_broadcast(&COND_active);
9687 mysql_mutex_unlock(&LOCK_active);
9688 }
9689 err= sync();
9690
9691 done:
9692 return err ? 0 : cookie;
9693 }
9694
sync()9695 int TC_LOG_MMAP::sync()
9696 {
9697 int err;
9698
9699 DBUG_ASSERT(syncing != active);
9700
9701 /*
9702 sit down and relax - this can take a while...
9703 note - no locks are held at this point
9704 */
9705 err= my_msync(fd, syncing->start, syncing->size * sizeof(my_xid), MS_SYNC);
9706
9707 /* page is synced. let's move it to the pool */
9708 mysql_mutex_lock(&LOCK_pool);
9709 (*pool_last_ptr)=syncing;
9710 pool_last_ptr=&(syncing->next);
9711 syncing->next=0;
9712 syncing->state= err ? PS_ERROR : PS_POOL;
9713 mysql_cond_signal(&COND_pool); // in case somebody's waiting
9714 mysql_mutex_unlock(&LOCK_pool);
9715
9716 /* marking 'syncing' slot free */
9717 mysql_mutex_lock(&LOCK_sync);
9718 mysql_cond_broadcast(&syncing->cond); // signal "sync done"
9719 syncing=0;
9720 /*
9721 we check the "active" pointer without LOCK_active. Still, it's safe -
9722 "active" can change from NULL to not NULL any time, but it
9723 will take LOCK_sync before waiting on active->cond. That is, it can never
9724 miss a signal.
9725 And "active" can change to NULL only by the syncing thread
9726 (the thread that will send a signal below)
9727 */
9728 if (active)
9729 mysql_cond_signal(&active->cond); // wake up a new syncer
9730 mysql_mutex_unlock(&LOCK_sync);
9731 return err;
9732 }
9733
9734 static void
mmap_do_checkpoint_callback(void * data)9735 mmap_do_checkpoint_callback(void *data)
9736 {
9737 TC_LOG_MMAP::pending_cookies *pending=
9738 static_cast<TC_LOG_MMAP::pending_cookies *>(data);
9739 ++pending->pending_count;
9740 }
9741
unlog(ulong cookie,my_xid xid)9742 int TC_LOG_MMAP::unlog(ulong cookie, my_xid xid)
9743 {
9744 pending_cookies *full_buffer= NULL;
9745 uint32 ncookies= tc_log_page_size / sizeof(my_xid);
9746 DBUG_ASSERT(*(my_xid *)(data+cookie) == xid);
9747
9748 /*
9749 Do not delete the entry immediately, as there may be participating storage
9750 engines which implement commit_checkpoint_request(), and thus have not yet
9751 flushed the commit durably to disk.
9752
9753 Instead put it in a queue - and periodically, we will request a checkpoint
9754 from all engines and delete a whole batch at once.
9755 */
9756 mysql_mutex_lock(&LOCK_pending_checkpoint);
9757 if (pending_checkpoint == NULL)
9758 {
9759 uint32 size= sizeof(*pending_checkpoint) + sizeof(ulong) * (ncookies - 1);
9760 if (!(pending_checkpoint=
9761 (pending_cookies *)my_malloc(PSI_INSTRUMENT_ME, size,
9762 MYF(MY_ZEROFILL))))
9763 {
9764 my_error(ER_OUTOFMEMORY, MYF(0), size);
9765 mysql_mutex_unlock(&LOCK_pending_checkpoint);
9766 return 1;
9767 }
9768 }
9769
9770 pending_checkpoint->cookies[pending_checkpoint->count++]= cookie;
9771 if (pending_checkpoint->count == ncookies)
9772 {
9773 full_buffer= pending_checkpoint;
9774 pending_checkpoint= NULL;
9775 }
9776 mysql_mutex_unlock(&LOCK_pending_checkpoint);
9777
9778 if (full_buffer)
9779 {
9780 /*
9781 We do an extra increment and notify here - this ensures that
9782 things work also if there are no engines at all that support
9783 commit_checkpoint_request.
9784 */
9785 ++full_buffer->pending_count;
9786 ha_commit_checkpoint_request(full_buffer, mmap_do_checkpoint_callback);
9787 commit_checkpoint_notify(full_buffer);
9788 }
9789 return 0;
9790 }
9791
9792
9793 void
commit_checkpoint_notify(void * cookie)9794 TC_LOG_MMAP::commit_checkpoint_notify(void *cookie)
9795 {
9796 uint count;
9797 pending_cookies *pending= static_cast<pending_cookies *>(cookie);
9798 mysql_mutex_lock(&LOCK_pending_checkpoint);
9799 DBUG_ASSERT(pending->pending_count > 0);
9800 count= --pending->pending_count;
9801 mysql_mutex_unlock(&LOCK_pending_checkpoint);
9802 if (count == 0)
9803 {
9804 uint i;
9805 for (i= 0; i < tc_log_page_size / sizeof(my_xid); ++i)
9806 delete_entry(pending->cookies[i]);
9807 my_free(pending);
9808 }
9809 }
9810
9811
9812 /**
9813 erase xid from the page, update page free space counters/pointers.
9814 cookie points directly to the memory where xid was logged.
9815 */
9816
delete_entry(ulong cookie)9817 int TC_LOG_MMAP::delete_entry(ulong cookie)
9818 {
9819 PAGE *p=pages+(cookie/tc_log_page_size);
9820 my_xid *x=(my_xid *)(data+cookie);
9821
9822 DBUG_ASSERT(x >= p->start);
9823 DBUG_ASSERT(x < p->end);
9824
9825 mysql_mutex_lock(&p->lock);
9826 *x=0;
9827 p->free++;
9828 DBUG_ASSERT(p->free <= p->size);
9829 set_if_smaller(p->ptr, x);
9830 if (p->free == p->size) // the page is completely empty
9831 statistic_decrement(tc_log_cur_pages_used, &LOCK_status);
9832 if (p->waiters == 0) // the page is in pool and ready to rock
9833 mysql_cond_signal(&COND_pool); // ping ... for overflow()
9834 mysql_mutex_unlock(&p->lock);
9835 return 0;
9836 }
9837
close()9838 void TC_LOG_MMAP::close()
9839 {
9840 uint i;
9841 switch (inited) {
9842 case 6:
9843 mysql_mutex_destroy(&LOCK_sync);
9844 mysql_mutex_destroy(&LOCK_active);
9845 mysql_mutex_destroy(&LOCK_pool);
9846 mysql_mutex_destroy(&LOCK_pending_checkpoint);
9847 mysql_cond_destroy(&COND_pool);
9848 mysql_cond_destroy(&COND_active);
9849 mysql_cond_destroy(&COND_queue_busy);
9850 /* fall through */
9851 case 5:
9852 data[0]='A'; // garble the first (signature) byte, in case mysql_file_delete fails
9853 /* fall through */
9854 case 4:
9855 for (i=0; i < npages; i++)
9856 {
9857 if (pages[i].ptr == 0)
9858 break;
9859 mysql_mutex_destroy(&pages[i].lock);
9860 mysql_cond_destroy(&pages[i].cond);
9861 }
9862 /* fall through */
9863 case 3:
9864 my_free(pages);
9865 /* fall through */
9866 case 2:
9867 my_munmap((char*)data, (size_t)file_length);
9868 /* fall through */
9869 case 1:
9870 mysql_file_close(fd, MYF(0));
9871 }
9872 if (inited>=5) // cannot do in the switch because of Windows
9873 mysql_file_delete(key_file_tclog, logname, MYF(MY_WME));
9874 if (pending_checkpoint)
9875 my_free(pending_checkpoint);
9876 inited=0;
9877 }
9878
9879
recover()9880 int TC_LOG_MMAP::recover()
9881 {
9882 HASH xids;
9883 PAGE *p=pages, *end_p=pages+npages;
9884
9885 if (bcmp(data, tc_log_magic, sizeof(tc_log_magic)))
9886 {
9887 sql_print_error("Bad magic header in tc log");
9888 goto err1;
9889 }
9890
9891 /*
9892 the first byte after magic signature is set to current
9893 number of storage engines on startup
9894 */
9895 if (data[sizeof(tc_log_magic)] > total_ha_2pc)
9896 {
9897 sql_print_error("Recovery failed! You must enable "
9898 "all engines that were enabled at the moment of the crash");
9899 goto err1;
9900 }
9901
9902 if (my_hash_init(PSI_INSTRUMENT_ME, &xids, &my_charset_bin,
9903 tc_log_page_size/3, 0, sizeof(my_xid), 0, 0, MYF(0)))
9904 goto err1;
9905
9906 for ( ; p < end_p ; p++)
9907 {
9908 for (my_xid *x=p->start; x < p->end; x++)
9909 if (*x && my_hash_insert(&xids, (uchar *)x))
9910 goto err2; // OOM
9911 }
9912
9913 if (ha_recover(&xids))
9914 goto err2;
9915
9916 my_hash_free(&xids);
9917 bzero(data, (size_t)file_length);
9918 return 0;
9919
9920 err2:
9921 my_hash_free(&xids);
9922 err1:
9923 sql_print_error("Crash recovery failed. Either correct the problem "
9924 "(if it's, for example, out of memory error) and restart, "
9925 "or delete tc log and start mysqld with "
9926 "--tc-heuristic-recover={commit|rollback}");
9927 return 1;
9928 }
9929 #endif
9930
9931 TC_LOG *tc_log;
9932 TC_LOG_DUMMY tc_log_dummy;
9933 TC_LOG_MMAP tc_log_mmap;
9934
9935 /**
9936 Perform heuristic recovery, if --tc-heuristic-recover was used.
9937
9938 @note
9939 no matter whether heuristic recovery was successful or not
9940 mysqld must exit. So, return value is the same in both cases.
9941
9942 @retval
9943 0 no heuristic recovery was requested
9944 @retval
9945 1 heuristic recovery was performed
9946 */
9947
using_heuristic_recover()9948 int TC_LOG::using_heuristic_recover()
9949 {
9950 if (!tc_heuristic_recover)
9951 return 0;
9952
9953 sql_print_information("Heuristic crash recovery mode");
9954 if (ha_recover(0))
9955 sql_print_error("Heuristic crash recovery failed");
9956 sql_print_information("Please restart mysqld without --tc-heuristic-recover");
9957 return 1;
9958 }
9959
9960 /****** transaction coordinator log for 2pc - binlog() based solution ******/
9961 #define TC_LOG_BINLOG MYSQL_BIN_LOG
9962
open(const char * opt_name)9963 int TC_LOG_BINLOG::open(const char *opt_name)
9964 {
9965 int error= 1;
9966
9967 DBUG_ASSERT(total_ha_2pc > 1);
9968 DBUG_ASSERT(opt_name);
9969 DBUG_ASSERT(opt_name[0]);
9970
9971 if (!my_b_inited(&index_file))
9972 {
9973 /* There was a failure to open the index file, can't open the binlog */
9974 cleanup();
9975 return 1;
9976 }
9977
9978 if (using_heuristic_recover())
9979 {
9980 mysql_mutex_lock(&LOCK_log);
9981 /* generate a new binlog to mask a corrupted one */
9982 open(opt_name, 0, 0, WRITE_CACHE, max_binlog_size, 0, TRUE);
9983 mysql_mutex_unlock(&LOCK_log);
9984 cleanup();
9985 return 1;
9986 }
9987
9988 error= do_binlog_recovery(opt_name, true);
9989 binlog_state_recover_done= true;
9990 return error;
9991 }
9992
9993 /** This is called on shutdown, after ha_panic. */
close()9994 void TC_LOG_BINLOG::close()
9995 {
9996 }
9997
9998 /*
9999 Do a binlog log_xid() for a group of transactions, linked through
10000 thd->next_commit_ordered.
10001 */
10002 int
log_and_order(THD * thd,my_xid xid,bool all,bool need_prepare_ordered,bool need_commit_ordered)10003 TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all,
10004 bool need_prepare_ordered __attribute__((unused)),
10005 bool need_commit_ordered __attribute__((unused)))
10006 {
10007 int err;
10008 DBUG_ENTER("TC_LOG_BINLOG::log_and_order");
10009
10010 binlog_cache_mngr *cache_mngr= thd->binlog_setup_trx_data();
10011 if (!cache_mngr)
10012 {
10013 WSREP_DEBUG("Skipping empty log_xid: %s", thd->query());
10014 DBUG_RETURN(0);
10015 }
10016
10017 cache_mngr->using_xa= TRUE;
10018 cache_mngr->xa_xid= xid;
10019 err= binlog_commit_flush_xid_caches(thd, cache_mngr, all, xid);
10020
10021 DEBUG_SYNC(thd, "binlog_after_log_and_order");
10022
10023 if (err)
10024 DBUG_RETURN(0);
10025
10026 bool need_unlog= cache_mngr->need_unlog;
10027 /*
10028 The transaction won't need the flag anymore.
10029 Todo/fixme: consider to move the statement into cache_mngr->reset()
10030 relocated to the current or later point.
10031 */
10032 cache_mngr->need_unlog= false;
10033 /*
10034 If using explicit user XA, we will not have XID. We must still return a
10035 non-zero cookie (as zero cookie signals error).
10036 */
10037 if (!xid || !need_unlog)
10038 DBUG_RETURN(BINLOG_COOKIE_DUMMY(cache_mngr->delayed_error));
10039
10040 DBUG_RETURN(BINLOG_COOKIE_MAKE(cache_mngr->binlog_id,
10041 cache_mngr->delayed_error));
10042 }
10043
10044 /*
10045 After an XID is logged, we need to hold on to the current binlog file until
10046 it is fully committed in the storage engine. The reason is that crash
10047 recovery only looks at the latest binlog, so we must make sure there are no
10048 outstanding prepared (but not committed) transactions before rotating the
10049 binlog.
10050
10051 To handle this, we keep a count of outstanding XIDs. This function is used
10052 to increase this count when committing one or more transactions to the
10053 binary log.
10054 */
10055 void
mark_xids_active(ulong binlog_id,uint xid_count)10056 TC_LOG_BINLOG::mark_xids_active(ulong binlog_id, uint xid_count)
10057 {
10058 xid_count_per_binlog *b;
10059
10060 DBUG_ENTER("TC_LOG_BINLOG::mark_xids_active");
10061 DBUG_PRINT("info", ("binlog_id=%lu xid_count=%u", binlog_id, xid_count));
10062
10063 mysql_mutex_lock(&LOCK_xid_list);
10064 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
10065 while ((b= it++))
10066 {
10067 if (b->binlog_id == binlog_id)
10068 {
10069 b->xid_count += xid_count;
10070 break;
10071 }
10072 }
10073 /*
10074 As we do not delete elements until count reach zero, elements should always
10075 be found.
10076 */
10077 DBUG_ASSERT(b);
10078 mysql_mutex_unlock(&LOCK_xid_list);
10079 DBUG_VOID_RETURN;
10080 }
10081
10082 /*
10083 Once an XID is committed, it can no longer be needed during crash recovery,
10084 as it has been durably recorded on disk as "committed".
10085
10086 This function is called to mark an XID this way. It needs to decrease the
10087 count of pending XIDs in the corresponding binlog. When the count reaches
10088 zero (for an "old" binlog that is not the active one), that binlog file no
10089 longer need to be scanned during crash recovery, so we can log a new binlog
10090 checkpoint.
10091 */
10092 void
mark_xid_done(ulong binlog_id,bool write_checkpoint)10093 TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint)
10094 {
10095 xid_count_per_binlog *b;
10096 bool first;
10097 ulong current;
10098
10099 DBUG_ENTER("TC_LOG_BINLOG::mark_xid_done");
10100
10101 mysql_mutex_lock(&LOCK_xid_list);
10102 current= current_binlog_id;
10103 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
10104 first= true;
10105 while ((b= it++))
10106 {
10107 if (b->binlog_id == binlog_id)
10108 {
10109 --b->xid_count;
10110
10111 DBUG_ASSERT(b->xid_count >= 0); // catch unmatched (++) decrement
10112
10113 break;
10114 }
10115 first= false;
10116 }
10117 /* Binlog is always found, as we do not remove until count reaches 0 */
10118 DBUG_ASSERT(b);
10119 /*
10120 If a RESET MASTER is pending, we are about to remove all log files, and
10121 the RESET MASTER thread is waiting for all pending unlog() calls to
10122 complete while holding LOCK_log. In this case we should not log a binlog
10123 checkpoint event (it would be deleted immediately anyway and we would
10124 deadlock on LOCK_log) but just signal the thread.
10125 */
10126 if (unlikely(reset_master_pending))
10127 {
10128 mysql_cond_broadcast(&COND_xid_list);
10129 mysql_mutex_unlock(&LOCK_xid_list);
10130 DBUG_VOID_RETURN;
10131 }
10132
10133 if (likely(binlog_id == current) || b->xid_count != 0 || !first ||
10134 !write_checkpoint)
10135 {
10136 /* No new binlog checkpoint reached yet. */
10137 mysql_mutex_unlock(&LOCK_xid_list);
10138 DBUG_VOID_RETURN;
10139 }
10140
10141 /*
10142 Now log a binlog checkpoint for the first binlog file with a non-zero count.
10143
10144 Note that it is possible (though perhaps unlikely) that when count of
10145 binlog (N-2) drops to zero, binlog (N-1) is already at zero. So we may
10146 need to skip several entries before we find the one to log in the binlog
10147 checkpoint event.
10148
10149 We chain the locking of LOCK_xid_list and LOCK_log, so that we ensure that
10150 Binlog_checkpoint_events are logged in order. This simplifies recovery a
10151 bit, as it can just take the last binlog checkpoint in the log, rather
10152 than compare all found against each other to find the one pointing to the
10153 most recent binlog.
10154
10155 Note also that we need to first release LOCK_xid_list, then acquire
10156 LOCK_log, then re-aquire LOCK_xid_list. If we were to take LOCK_log while
10157 holding LOCK_xid_list, we might deadlock with other threads that take the
10158 locks in the opposite order.
10159 */
10160
10161 ++mark_xid_done_waiting;
10162 mysql_mutex_unlock(&LOCK_xid_list);
10163 mysql_mutex_lock(&LOCK_log);
10164 mysql_mutex_lock(&LOCK_xid_list);
10165 --mark_xid_done_waiting;
10166 mysql_cond_broadcast(&COND_xid_list);
10167 /* We need to reload current_binlog_id due to release/re-take of lock. */
10168 current= current_binlog_id;
10169
10170 for (;;)
10171 {
10172 /* Remove initial element(s) with zero count. */
10173 b= binlog_xid_count_list.head();
10174 /*
10175 We must not remove all elements in the list - the entry for the current
10176 binlog must be present always.
10177 */
10178 DBUG_ASSERT(b);
10179 if (b->binlog_id == current || b->xid_count > 0)
10180 break;
10181 WSREP_XID_LIST_ENTRY("TC_LOG_BINLOG::mark_xid_done(): Removing "
10182 "xid_list_entry for %s (%lu)", b);
10183 delete binlog_xid_count_list.get();
10184 }
10185
10186 mysql_mutex_unlock(&LOCK_xid_list);
10187 write_binlog_checkpoint_event_already_locked(b->binlog_name,
10188 b->binlog_name_len);
10189 mysql_mutex_unlock(&LOCK_log);
10190 DBUG_VOID_RETURN;
10191 }
10192
unlog(ulong cookie,my_xid xid)10193 int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
10194 {
10195 DBUG_ENTER("TC_LOG_BINLOG::unlog");
10196 if (!xid)
10197 DBUG_RETURN(0);
10198
10199 if (!BINLOG_COOKIE_IS_DUMMY(cookie))
10200 mark_xid_done(BINLOG_COOKIE_GET_ID(cookie), true);
10201 /*
10202 See comment in trx_group_commit_leader() - if rotate() gave a failure,
10203 we delay the return of error code to here.
10204 */
10205 DBUG_RETURN(BINLOG_COOKIE_GET_ERROR_FLAG(cookie));
10206 }
10207
write_empty_xa_prepare(THD * thd,binlog_cache_mngr * cache_mngr)10208 static bool write_empty_xa_prepare(THD *thd, binlog_cache_mngr *cache_mngr)
10209 {
10210 return binlog_commit_flush_xa_prepare(thd, true, cache_mngr);
10211 }
10212
unlog_xa_prepare(THD * thd,bool all)10213 int TC_LOG_BINLOG::unlog_xa_prepare(THD *thd, bool all)
10214 {
10215 DBUG_ASSERT(is_preparing_xa(thd));
10216
10217 binlog_cache_mngr *cache_mngr= thd->binlog_setup_trx_data();
10218 int cookie= 0;
10219
10220 if (!cache_mngr->need_unlog)
10221 {
10222 Ha_trx_info *ha_info;
10223 uint rw_count= ha_count_rw_all(thd, &ha_info);
10224 bool rc= false;
10225
10226 /*
10227 This transaction has not been binlogged as indicated by need_unlog.
10228 Such exceptional cases include transactions with no effect to engines,
10229 e.g REPLACE that does not change the dat but still the Engine
10230 transaction branch claims to be rw, and few more.
10231 In all such cases an empty XA-prepare group of events is bin-logged.
10232 */
10233 if (rw_count > 0)
10234 {
10235 /* an empty XA-prepare event group is logged */
10236 rc= write_empty_xa_prepare(thd, cache_mngr); // normally gains need_unlog
10237 trans_register_ha(thd, true, binlog_hton, 0); // do it for future commmit
10238 }
10239 if (rw_count == 0 || !cache_mngr->need_unlog)
10240 return rc;
10241 }
10242
10243 cookie= BINLOG_COOKIE_MAKE(cache_mngr->binlog_id, cache_mngr->delayed_error);
10244 cache_mngr->need_unlog= false;
10245
10246 return unlog(cookie, 1);
10247 }
10248
10249
10250 void
commit_checkpoint_notify(void * cookie)10251 TC_LOG_BINLOG::commit_checkpoint_notify(void *cookie)
10252 {
10253 xid_count_per_binlog *entry= static_cast<xid_count_per_binlog *>(cookie);
10254 bool found_entry= false;
10255 mysql_mutex_lock(&LOCK_binlog_background_thread);
10256 /* count the same notification kind from different engines */
10257 for (xid_count_per_binlog *link= binlog_background_thread_queue;
10258 link && !found_entry; link= link->next_in_queue)
10259 {
10260 if ((found_entry= (entry == link)))
10261 entry->notify_count++;
10262 }
10263 if (!found_entry)
10264 {
10265 entry->next_in_queue= binlog_background_thread_queue;
10266 binlog_background_thread_queue= entry;
10267 }
10268 mysql_cond_signal(&COND_binlog_background_thread);
10269 mysql_mutex_unlock(&LOCK_binlog_background_thread);
10270 }
10271
10272 /*
10273 Binlog background thread.
10274
10275 This thread is used to log binlog checkpoints in the background, rather than
10276 in the context of random storage engine threads that happen to call
10277 commit_checkpoint_notify_ha() and may not like the delays while syncing
10278 binlog to disk or may not be setup with all my_thread_init() and other
10279 necessary stuff.
10280
10281 In the future, this thread could also be used to do log rotation in the
10282 background, which could eliminate all stalls around binlog rotations.
10283 */
10284 pthread_handler_t
binlog_background_thread(void * arg)10285 binlog_background_thread(void *arg __attribute__((unused)))
10286 {
10287 bool stop;
10288 MYSQL_BIN_LOG::xid_count_per_binlog *queue, *next;
10289 THD *thd;
10290 my_thread_init();
10291 DBUG_ENTER("binlog_background_thread");
10292
10293 thd= new THD(next_thread_id());
10294 thd->system_thread= SYSTEM_THREAD_BINLOG_BACKGROUND;
10295 thd->thread_stack= (char*) &thd; /* Set approximate stack start */
10296 thd->store_globals();
10297 thd->security_ctx->skip_grants();
10298 thd->set_command(COM_DAEMON);
10299
10300 /*
10301 Load the slave replication GTID state from the mysql.gtid_slave_pos
10302 table.
10303
10304 This is mostly so that we can start our seq_no counter from the highest
10305 seq_no seen by a slave. This way, we have a way to tell if a transaction
10306 logged by ourselves as master is newer or older than a replicated
10307 transaction.
10308 */
10309 #ifdef HAVE_REPLICATION
10310 if (rpl_load_gtid_slave_state(thd))
10311 sql_print_warning("Failed to load slave replication state from table "
10312 "%s.%s: %u: %s", "mysql",
10313 rpl_gtid_slave_state_table_name.str,
10314 thd->get_stmt_da()->sql_errno(),
10315 thd->get_stmt_da()->message());
10316 #endif
10317
10318 mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
10319 binlog_background_thread_started= true;
10320 mysql_cond_signal(&mysql_bin_log.COND_binlog_background_thread_end);
10321 mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
10322
10323 for (;;)
10324 {
10325 /*
10326 Wait until there is something in the queue to process, or we are asked
10327 to shut down.
10328 */
10329 THD_STAGE_INFO(thd, stage_binlog_waiting_background_tasks);
10330 mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
10331 for (;;)
10332 {
10333 stop= binlog_background_thread_stop;
10334 queue= binlog_background_thread_queue;
10335 if (stop && !mysql_bin_log.is_xidlist_idle())
10336 {
10337 /*
10338 Delay stop until all pending binlog checkpoints have been processed.
10339 */
10340 stop= false;
10341 }
10342 if (stop || queue)
10343 break;
10344 mysql_cond_wait(&mysql_bin_log.COND_binlog_background_thread,
10345 &mysql_bin_log.LOCK_binlog_background_thread);
10346 }
10347 /* Grab the queue, if any. */
10348 binlog_background_thread_queue= NULL;
10349 mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
10350
10351 /* Process any incoming commit_checkpoint_notify() calls. */
10352 DBUG_EXECUTE_IF("inject_binlog_background_thread_before_mark_xid_done",
10353 DBUG_ASSERT(!debug_sync_set_action(
10354 thd,
10355 STRING_WITH_LEN("binlog_background_thread_before_mark_xid_done "
10356 "SIGNAL injected_binlog_background_thread "
10357 "WAIT_FOR something_that_will_never_happen "
10358 "TIMEOUT 2")));
10359 );
10360 while (queue)
10361 {
10362 long count= queue->notify_count;
10363 THD_STAGE_INFO(thd, stage_binlog_processing_checkpoint_notify);
10364 DEBUG_SYNC(thd, "binlog_background_thread_before_mark_xid_done");
10365 /* Set the thread start time */
10366 thd->set_time();
10367 /* Grab next pointer first, as mark_xid_done() may free the element. */
10368 next= queue->next_in_queue;
10369 queue->notify_count= 0;
10370 for (long i= 0; i <= count; i++)
10371 mysql_bin_log.mark_xid_done(queue->binlog_id, true);
10372 queue= next;
10373
10374 DBUG_EXECUTE_IF("binlog_background_checkpoint_processed",
10375 DBUG_ASSERT(!debug_sync_set_action(
10376 thd,
10377 STRING_WITH_LEN("now SIGNAL binlog_background_checkpoint_processed")));
10378 );
10379 }
10380
10381 if (stop)
10382 break;
10383 }
10384
10385 THD_STAGE_INFO(thd, stage_binlog_stopping_background_thread);
10386
10387 /* No need to use mutex as thd is not linked into other threads */
10388 delete thd;
10389
10390 my_thread_end();
10391
10392 /* Signal that we are (almost) stopped. */
10393 mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
10394 binlog_background_thread_stop= false;
10395 mysql_cond_signal(&mysql_bin_log.COND_binlog_background_thread_end);
10396 mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
10397
10398 DBUG_RETURN(0);
10399 }
10400
10401 #ifdef HAVE_PSI_INTERFACE
10402 static PSI_thread_key key_thread_binlog;
10403
10404 static PSI_thread_info all_binlog_threads[]=
10405 {
10406 { &key_thread_binlog, "binlog_background", PSI_FLAG_GLOBAL},
10407 };
10408 #endif /* HAVE_PSI_INTERFACE */
10409
10410 static bool
start_binlog_background_thread()10411 start_binlog_background_thread()
10412 {
10413 pthread_t th;
10414
10415 #ifdef HAVE_PSI_INTERFACE
10416 if (PSI_server)
10417 PSI_server->register_thread("sql", all_binlog_threads,
10418 array_elements(all_binlog_threads));
10419 #endif
10420
10421 if (mysql_thread_create(key_thread_binlog, &th, &connection_attrib,
10422 binlog_background_thread, NULL))
10423 return 1;
10424
10425 /*
10426 Wait for the thread to have started (so we know that the slave replication
10427 state is loaded and we have correct global_gtid_counter).
10428 */
10429 mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
10430 while (!binlog_background_thread_started)
10431 mysql_cond_wait(&mysql_bin_log.COND_binlog_background_thread_end,
10432 &mysql_bin_log.LOCK_binlog_background_thread);
10433 mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
10434
10435 return 0;
10436 }
10437
10438
recover(LOG_INFO * linfo,const char * last_log_name,IO_CACHE * first_log,Format_description_log_event * fdle,bool do_xa)10439 int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
10440 IO_CACHE *first_log,
10441 Format_description_log_event *fdle, bool do_xa)
10442 {
10443 Log_event *ev= NULL;
10444 HASH xids;
10445 MEM_ROOT mem_root;
10446 char binlog_checkpoint_name[FN_REFLEN];
10447 bool binlog_checkpoint_found;
10448 bool first_round;
10449 IO_CACHE log;
10450 File file= -1;
10451 const char *errmsg;
10452 #ifdef HAVE_REPLICATION
10453 rpl_gtid last_gtid;
10454 bool last_gtid_standalone= false;
10455 bool last_gtid_valid= false;
10456 #endif
10457
10458 if (! fdle->is_valid() ||
10459 (do_xa && my_hash_init(key_memory_binlog_recover_exec, &xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
10460 sizeof(my_xid), 0, 0, MYF(0))))
10461 goto err1;
10462
10463 if (do_xa)
10464 init_alloc_root(key_memory_binlog_recover_exec, &mem_root,
10465 TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE, MYF(0));
10466
10467 fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
10468
10469 /*
10470 Scan the binlog for XIDs that need to be committed if still in the
10471 prepared stage.
10472
10473 Start with the latest binlog file, then continue with any other binlog
10474 files if the last found binlog checkpoint indicates it is needed.
10475 */
10476
10477 binlog_checkpoint_found= false;
10478 first_round= true;
10479 for (;;)
10480 {
10481 while ((ev= Log_event::read_log_event(first_round ? first_log : &log,
10482 fdle, opt_master_verify_checksum))
10483 && ev->is_valid())
10484 {
10485 enum Log_event_type typ= ev->get_type_code();
10486 switch (typ)
10487 {
10488 case XID_EVENT:
10489 {
10490 if (do_xa)
10491 {
10492 Xid_log_event *xev=(Xid_log_event *)ev;
10493 uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
10494 sizeof(xev->xid));
10495 if (!x || my_hash_insert(&xids, x))
10496 goto err2;
10497 }
10498 break;
10499 }
10500 case BINLOG_CHECKPOINT_EVENT:
10501 if (first_round && do_xa)
10502 {
10503 size_t dir_len;
10504 Binlog_checkpoint_log_event *cev= (Binlog_checkpoint_log_event *)ev;
10505 if (cev->binlog_file_len >= FN_REFLEN)
10506 sql_print_warning("Incorrect binlog checkpoint event with too "
10507 "long file name found.");
10508 else
10509 {
10510 /*
10511 Note that we cannot use make_log_name() here, as we have not yet
10512 initialised MYSQL_BIN_LOG::log_file_name.
10513 */
10514 dir_len= dirname_length(last_log_name);
10515 strmake(strnmov(binlog_checkpoint_name, last_log_name, dir_len),
10516 cev->binlog_file_name, FN_REFLEN - 1 - dir_len);
10517 binlog_checkpoint_found= true;
10518 }
10519 }
10520 break;
10521 case GTID_LIST_EVENT:
10522 if (first_round)
10523 {
10524 Gtid_list_log_event *glev= (Gtid_list_log_event *)ev;
10525
10526 /* Initialise the binlog state from the Gtid_list event. */
10527 if (rpl_global_gtid_binlog_state.load(glev->list, glev->count))
10528 goto err2;
10529 }
10530 break;
10531
10532 #ifdef HAVE_REPLICATION
10533 case GTID_EVENT:
10534 if (first_round)
10535 {
10536 Gtid_log_event *gev= (Gtid_log_event *)ev;
10537
10538 /* Update the binlog state with any GTID logged after Gtid_list. */
10539 last_gtid.domain_id= gev->domain_id;
10540 last_gtid.server_id= gev->server_id;
10541 last_gtid.seq_no= gev->seq_no;
10542 last_gtid_standalone=
10543 ((gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false);
10544 last_gtid_valid= true;
10545 }
10546 break;
10547 #endif
10548
10549 case START_ENCRYPTION_EVENT:
10550 {
10551 if (fdle->start_decryption((Start_encryption_log_event*) ev))
10552 goto err2;
10553 }
10554 break;
10555
10556 default:
10557 /* Nothing. */
10558 break;
10559 }
10560
10561 #ifdef HAVE_REPLICATION
10562 if (last_gtid_valid &&
10563 ((last_gtid_standalone && !ev->is_part_of_group(typ)) ||
10564 (!last_gtid_standalone &&
10565 (typ == XID_EVENT ||
10566 typ == XA_PREPARE_LOG_EVENT ||
10567 (LOG_EVENT_IS_QUERY(typ) &&
10568 (((Query_log_event *)ev)->is_commit() ||
10569 ((Query_log_event *)ev)->is_rollback()))))))
10570 {
10571 if (rpl_global_gtid_binlog_state.update_nolock(&last_gtid, false))
10572 goto err2;
10573 last_gtid_valid= false;
10574 }
10575 #endif
10576
10577 delete ev;
10578 ev= NULL;
10579 }
10580
10581 if (!do_xa)
10582 break;
10583 /*
10584 If the last binlog checkpoint event points to an older log, we have to
10585 scan all logs from there also, to get all possible XIDs to recover.
10586
10587 If there was no binlog checkpoint event at all, this means the log was
10588 written by an older version of MariaDB (or MySQL) - these always have an
10589 (implicit) binlog checkpoint event at the start of the last binlog file.
10590 */
10591 if (first_round)
10592 {
10593 if (!binlog_checkpoint_found)
10594 break;
10595 first_round= false;
10596 DBUG_EXECUTE_IF("xa_recover_expect_master_bin_000004",
10597 if (0 != strcmp("./master-bin.000004", binlog_checkpoint_name) &&
10598 0 != strcmp(".\\master-bin.000004", binlog_checkpoint_name))
10599 DBUG_SUICIDE();
10600 );
10601 if (find_log_pos(linfo, binlog_checkpoint_name, 1))
10602 {
10603 sql_print_error("Binlog file '%s' not found in binlog index, needed "
10604 "for recovery. Aborting.", binlog_checkpoint_name);
10605 goto err2;
10606 }
10607 }
10608 else
10609 {
10610 end_io_cache(&log);
10611 mysql_file_close(file, MYF(MY_WME));
10612 file= -1;
10613 }
10614
10615 if (!strcmp(linfo->log_file_name, last_log_name))
10616 break; // No more files to do
10617 if ((file= open_binlog(&log, linfo->log_file_name, &errmsg)) < 0)
10618 {
10619 sql_print_error("%s", errmsg);
10620 goto err2;
10621 }
10622 /*
10623 We do not need to read the Format_description_log_event of other binlog
10624 files. It is not possible for a binlog checkpoint to span multiple
10625 binlog files written by different versions of the server. So we can use
10626 the first one read for reading from all binlog files.
10627 */
10628 if (find_next_log(linfo, 1))
10629 {
10630 sql_print_error("Error reading binlog files during recovery. Aborting.");
10631 goto err2;
10632 }
10633 fdle->reset_crypto();
10634 }
10635
10636 if (do_xa)
10637 {
10638 if (ha_recover(&xids))
10639 goto err2;
10640
10641 free_root(&mem_root, MYF(0));
10642 my_hash_free(&xids);
10643 }
10644 return 0;
10645
10646 err2:
10647 delete ev;
10648 if (file >= 0)
10649 {
10650 end_io_cache(&log);
10651 mysql_file_close(file, MYF(MY_WME));
10652 }
10653 if (do_xa)
10654 {
10655 free_root(&mem_root, MYF(0));
10656 my_hash_free(&xids);
10657 }
10658 err1:
10659 sql_print_error("Crash recovery failed. Either correct the problem "
10660 "(if it's, for example, out of memory error) and restart, "
10661 "or delete (or rename) binary log and start mysqld with "
10662 "--tc-heuristic-recover={commit|rollback}");
10663 return 1;
10664 }
10665
10666
10667 int
do_binlog_recovery(const char * opt_name,bool do_xa_recovery)10668 MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery)
10669 {
10670 LOG_INFO log_info;
10671 const char *errmsg;
10672 IO_CACHE log;
10673 File file;
10674 Log_event *ev= 0;
10675 Format_description_log_event fdle(BINLOG_VERSION);
10676 char log_name[FN_REFLEN];
10677 int error;
10678
10679 if (unlikely((error= find_log_pos(&log_info, NullS, 1))))
10680 {
10681 /*
10682 If there are no binlog files (LOG_INFO_EOF), then we still try to read
10683 the .state file to restore the binlog state. This allows to copy a server
10684 to provision a new one without copying the binlog files (except the
10685 master-bin.state file) and still preserve the correct binlog state.
10686 */
10687 if (error != LOG_INFO_EOF)
10688 sql_print_error("find_log_pos() failed (error: %d)", error);
10689 else
10690 {
10691 error= read_state_from_file();
10692 if (error == 2)
10693 {
10694 /*
10695 No binlog files and no binlog state is not an error (eg. just initial
10696 server start after fresh installation).
10697 */
10698 error= 0;
10699 }
10700 }
10701 return error;
10702 }
10703
10704 if (! fdle.is_valid())
10705 return 1;
10706
10707 do
10708 {
10709 strmake_buf(log_name, log_info.log_file_name);
10710 } while (!(error= find_next_log(&log_info, 1)));
10711
10712 if (error != LOG_INFO_EOF)
10713 {
10714 sql_print_error("find_log_pos() failed (error: %d)", error);
10715 return error;
10716 }
10717
10718 if ((file= open_binlog(&log, log_name, &errmsg)) < 0)
10719 {
10720 sql_print_error("%s", errmsg);
10721 return 1;
10722 }
10723
10724 if ((ev= Log_event::read_log_event(&log, &fdle,
10725 opt_master_verify_checksum)) &&
10726 ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
10727 {
10728 if (ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
10729 {
10730 sql_print_information("Recovering after a crash using %s", opt_name);
10731 error= recover(&log_info, log_name, &log,
10732 (Format_description_log_event *)ev, do_xa_recovery);
10733 }
10734 else
10735 {
10736 error= read_state_from_file();
10737 if (unlikely(error == 2))
10738 {
10739 /*
10740 The binlog exists, but the .state file is missing. This is normal if
10741 this is the first master start after a major upgrade to 10.0 (with
10742 GTID support).
10743
10744 However, it could also be that the .state file was lost somehow, and
10745 in this case it could be a serious issue, as we would set the wrong
10746 binlog state in the next binlog file to be created, and GTID
10747 processing would be corrupted. A common way would be copying files
10748 from an old server to a new one and forgetting the .state file.
10749
10750 So in this case, we want to try to recover the binlog state by
10751 scanning the last binlog file (but we do not need any XA recovery).
10752
10753 ToDo: We could avoid one scan at first start after major upgrade, by
10754 detecting that there is no GTID_LIST event at the start of the
10755 binlog file, and stopping the scan in that case.
10756 */
10757 error= recover(&log_info, log_name, &log,
10758 (Format_description_log_event *)ev, false);
10759 }
10760 }
10761 }
10762
10763 delete ev;
10764 end_io_cache(&log);
10765 mysql_file_close(file, MYF(MY_WME));
10766
10767 return error;
10768 }
10769
10770
10771 #ifdef INNODB_COMPATIBILITY_HOOKS
10772 /*
10773 Get the current position of the MySQL binlog for transaction currently being
10774 committed.
10775
10776 This is valid to call from within storage engine commit_ordered() and
10777 commit() methods only.
10778
10779 Since it stores the position inside THD, it is safe to call without any
10780 locking.
10781 */
10782 void
mysql_bin_log_commit_pos(THD * thd,ulonglong * out_pos,const char ** out_file)10783 mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file)
10784 {
10785 binlog_cache_mngr *cache_mngr;
10786 if (opt_bin_log &&
10787 (cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton)))
10788 {
10789 *out_file= cache_mngr->last_commit_pos_file;
10790 *out_pos= (ulonglong)(cache_mngr->last_commit_pos_offset);
10791 }
10792 else
10793 {
10794 *out_file= NULL;
10795 *out_pos= 0;
10796 }
10797 }
10798 #endif /* INNODB_COMPATIBILITY_HOOKS */
10799
10800
10801 static void
binlog_checksum_update(MYSQL_THD thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)10802 binlog_checksum_update(MYSQL_THD thd, struct st_mysql_sys_var *var,
10803 void *var_ptr, const void *save)
10804 {
10805 ulong value= *((ulong *)save);
10806 bool check_purge= false;
10807 ulong UNINIT_VAR(prev_binlog_id);
10808
10809 mysql_mutex_lock(mysql_bin_log.get_log_lock());
10810 if(mysql_bin_log.is_open())
10811 {
10812 prev_binlog_id= mysql_bin_log.current_binlog_id;
10813 if (binlog_checksum_options != value)
10814 mysql_bin_log.checksum_alg_reset= (enum_binlog_checksum_alg)value;
10815 if (mysql_bin_log.rotate(true, &check_purge))
10816 check_purge= false;
10817 }
10818 else
10819 {
10820 binlog_checksum_options= value;
10821 }
10822 DBUG_ASSERT(binlog_checksum_options == value);
10823 mysql_bin_log.checksum_alg_reset= BINLOG_CHECKSUM_ALG_UNDEF;
10824 mysql_mutex_unlock(mysql_bin_log.get_log_lock());
10825 if (check_purge)
10826 mysql_bin_log.checkpoint_and_purge(prev_binlog_id);
10827 }
10828
10829
show_binlog_vars(THD * thd,SHOW_VAR * var,void *,system_status_var * status_var,enum_var_type)10830 static int show_binlog_vars(THD *thd, SHOW_VAR *var, void *,
10831 system_status_var *status_var, enum_var_type)
10832 {
10833 mysql_bin_log.set_status_variables(thd);
10834 var->type= SHOW_ARRAY;
10835 var->value= (char *)&binlog_status_vars_detail;
10836 return 0;
10837 }
10838
10839 static SHOW_VAR binlog_status_vars_top[]= {
10840 {"Binlog", (char *) &show_binlog_vars, SHOW_FUNC},
10841 {NullS, NullS, SHOW_LONG}
10842 };
10843
10844 static MYSQL_SYSVAR_BOOL(
10845 optimize_thread_scheduling,
10846 opt_optimize_thread_scheduling,
10847 PLUGIN_VAR_READONLY,
10848 "Run fast part of group commit in a single thread, to optimize kernel "
10849 "thread scheduling. On by default. Disable to run each transaction in group "
10850 "commit in its own thread, which can be slower at very high concurrency. "
10851 "This option is mostly for testing one algorithm versus the other, and it "
10852 "should not normally be necessary to change it.",
10853 NULL,
10854 NULL,
10855 1);
10856
10857 static MYSQL_SYSVAR_ENUM(
10858 checksum,
10859 binlog_checksum_options,
10860 PLUGIN_VAR_RQCMDARG,
10861 "Type of BINLOG_CHECKSUM_ALG. Include checksum for "
10862 "log events in the binary log",
10863 NULL,
10864 binlog_checksum_update,
10865 BINLOG_CHECKSUM_ALG_CRC32,
10866 &binlog_checksum_typelib);
10867
10868 static struct st_mysql_sys_var *binlog_sys_vars[]=
10869 {
10870 MYSQL_SYSVAR(optimize_thread_scheduling),
10871 MYSQL_SYSVAR(checksum),
10872 NULL
10873 };
10874
10875
10876 /*
10877 Copy out the non-directory part of binlog position filename for the
10878 `binlog_snapshot_file' status variable, same way as it is done for
10879 SHOW BINLOG STATUS.
10880 */
10881 static void
set_binlog_snapshot_file(const char * src)10882 set_binlog_snapshot_file(const char *src)
10883 {
10884 size_t dir_len = dirname_length(src);
10885 strmake_buf(binlog_snapshot_file, src + dir_len);
10886 }
10887
10888 /*
10889 Copy out current values of status variables, for SHOW STATUS or
10890 information_schema.global_status.
10891
10892 This is called only under LOCK_all_status_vars, so we can fill in a static array.
10893 */
10894 void
set_status_variables(THD * thd)10895 TC_LOG_BINLOG::set_status_variables(THD *thd)
10896 {
10897 binlog_cache_mngr *cache_mngr;
10898
10899 if (thd && opt_bin_log)
10900 cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10901 else
10902 cache_mngr= 0;
10903
10904 bool have_snapshot= (cache_mngr && cache_mngr->last_commit_pos_file[0] != 0);
10905 mysql_mutex_lock(&LOCK_commit_ordered);
10906 binlog_status_var_num_commits= this->num_commits;
10907 binlog_status_var_num_group_commits= this->num_group_commits;
10908 if (!have_snapshot)
10909 {
10910 set_binlog_snapshot_file(last_commit_pos_file);
10911 binlog_snapshot_position= last_commit_pos_offset;
10912 }
10913 mysql_mutex_unlock(&LOCK_commit_ordered);
10914 mysql_mutex_lock(&LOCK_prepare_ordered);
10915 binlog_status_group_commit_trigger_count= this->group_commit_trigger_count;
10916 binlog_status_group_commit_trigger_timeout= this->group_commit_trigger_timeout;
10917 binlog_status_group_commit_trigger_lock_wait= this->group_commit_trigger_lock_wait;
10918 mysql_mutex_unlock(&LOCK_prepare_ordered);
10919
10920 if (have_snapshot)
10921 {
10922 set_binlog_snapshot_file(cache_mngr->last_commit_pos_file);
10923 binlog_snapshot_position= cache_mngr->last_commit_pos_offset;
10924 }
10925 }
10926
10927
10928 /*
10929 Find the Gtid_list_log_event at the start of a binlog.
10930
10931 NULL for ok, non-NULL error message for error.
10932
10933 If ok, then the event is returned in *out_gtid_list. This can be NULL if we
10934 get back to binlogs written by old server version without GTID support. If
10935 so, it means we have reached the point to start from, as no GTID events can
10936 exist in earlier binlogs.
10937 */
10938 const char *
get_gtid_list_event(IO_CACHE * cache,Gtid_list_log_event ** out_gtid_list)10939 get_gtid_list_event(IO_CACHE *cache, Gtid_list_log_event **out_gtid_list)
10940 {
10941 Format_description_log_event init_fdle(BINLOG_VERSION);
10942 Format_description_log_event *fdle;
10943 Log_event *ev;
10944 const char *errormsg = NULL;
10945
10946 *out_gtid_list= NULL;
10947
10948 if (!(ev= Log_event::read_log_event(cache, &init_fdle,
10949 opt_master_verify_checksum)) ||
10950 ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
10951 {
10952 if (ev)
10953 delete ev;
10954 return "Could not read format description log event while looking for "
10955 "GTID position in binlog";
10956 }
10957
10958 fdle= static_cast<Format_description_log_event *>(ev);
10959
10960 for (;;)
10961 {
10962 Log_event_type typ;
10963
10964 ev= Log_event::read_log_event(cache, fdle, opt_master_verify_checksum);
10965 if (!ev)
10966 {
10967 errormsg= "Could not read GTID list event while looking for GTID "
10968 "position in binlog";
10969 break;
10970 }
10971 typ= ev->get_type_code();
10972 if (typ == GTID_LIST_EVENT)
10973 break; /* Done, found it */
10974 if (typ == START_ENCRYPTION_EVENT)
10975 {
10976 if (fdle->start_decryption((Start_encryption_log_event*) ev))
10977 errormsg= "Could not set up decryption for binlog.";
10978 }
10979 delete ev;
10980 if (typ == ROTATE_EVENT || typ == STOP_EVENT ||
10981 typ == FORMAT_DESCRIPTION_EVENT || typ == START_ENCRYPTION_EVENT)
10982 continue; /* Continue looking */
10983
10984 /* We did not find any Gtid_list_log_event, must be old binlog. */
10985 ev= NULL;
10986 break;
10987 }
10988
10989 delete fdle;
10990 *out_gtid_list= static_cast<Gtid_list_log_event *>(ev);
10991 return errormsg;
10992 }
10993
10994
10995 struct st_mysql_storage_engine binlog_storage_engine=
10996 { MYSQL_HANDLERTON_INTERFACE_VERSION };
10997
maria_declare_plugin(binlog)10998 maria_declare_plugin(binlog)
10999 {
11000 MYSQL_STORAGE_ENGINE_PLUGIN,
11001 &binlog_storage_engine,
11002 "binlog",
11003 "MySQL AB",
11004 "This is a pseudo storage engine to represent the binlog in a transaction",
11005 PLUGIN_LICENSE_GPL,
11006 binlog_init, /* Plugin Init */
11007 NULL, /* Plugin Deinit */
11008 0x0100 /* 1.0 */,
11009 binlog_status_vars_top, /* status variables */
11010 binlog_sys_vars, /* system variables */
11011 "1.0", /* string version */
11012 MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
11013 }
11014 maria_declare_plugin_end;
11015
11016 #ifdef WITH_WSREP
11017 #include "wsrep_mysqld.h"
11018
wsrep_get_trans_cache(THD * thd)11019 IO_CACHE *wsrep_get_trans_cache(THD * thd)
11020 {
11021 DBUG_ASSERT(binlog_hton->slot != HA_SLOT_UNDEF);
11022 binlog_cache_mngr *cache_mngr = (binlog_cache_mngr*)
11023 thd_get_ha_data(thd, binlog_hton);
11024 if (cache_mngr)
11025 return cache_mngr->get_binlog_cache_log(true);
11026
11027 WSREP_DEBUG("binlog cache not initialized, conn: %llu",
11028 thd->thread_id);
11029 return NULL;
11030 }
11031
wsrep_thd_binlog_trx_reset(THD * thd)11032 void wsrep_thd_binlog_trx_reset(THD * thd)
11033 {
11034 DBUG_ENTER("wsrep_thd_binlog_trx_reset");
11035 WSREP_DEBUG("wsrep_thd_binlog_reset");
11036 /*
11037 todo: fix autocommit select to not call the caller
11038 */
11039 binlog_cache_mngr *const cache_mngr=
11040 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
11041 if (cache_mngr)
11042 {
11043 cache_mngr->reset(false, true);
11044 if (!cache_mngr->stmt_cache.empty())
11045 {
11046 WSREP_DEBUG("pending events in stmt cache, sql: %s", thd->query());
11047 cache_mngr->stmt_cache.reset();
11048 }
11049 }
11050 thd->reset_binlog_for_next_statement();
11051 DBUG_VOID_RETURN;
11052 }
11053
wsrep_thd_binlog_stmt_rollback(THD * thd)11054 void wsrep_thd_binlog_stmt_rollback(THD * thd)
11055 {
11056 DBUG_ENTER("wsrep_thd_binlog_stmt_rollback");
11057 WSREP_DEBUG("wsrep_thd_binlog_stmt_rollback");
11058 binlog_cache_mngr *const cache_mngr=
11059 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
11060 if (cache_mngr)
11061 {
11062 thd->binlog_remove_pending_rows_event(TRUE, TRUE);
11063 cache_mngr->stmt_cache.reset();
11064 }
11065 DBUG_VOID_RETURN;
11066 }
11067
wsrep_register_binlog_handler(THD * thd,bool trx)11068 void wsrep_register_binlog_handler(THD *thd, bool trx)
11069 {
11070 DBUG_ENTER("register_binlog_handler");
11071 /*
11072 If this is the first call to this function while processing a statement,
11073 the transactional cache does not have a savepoint defined. So, in what
11074 follows:
11075 . an implicit savepoint is defined;
11076 . callbacks are registered;
11077 . binary log is set as read/write.
11078
11079 The savepoint allows for truncating the trx-cache transactional changes
11080 fail. Callbacks are necessary to flush caches upon committing or rolling
11081 back a statement or a transaction. However, notifications do not happen
11082 if the binary log is set as read/write.
11083 */
11084 binlog_cache_mngr *cache_mngr=
11085 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
11086 /* cache_mngr may be missing e.g. in mtr test ev51914.test */
11087 if (cache_mngr)
11088 {
11089 /*
11090 Set an implicit savepoint in order to be able to truncate a trx-cache.
11091 */
11092 if (cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF)
11093 {
11094 my_off_t pos= 0;
11095 binlog_trans_log_savepos(thd, &pos);
11096 cache_mngr->trx_cache.set_prev_position(pos);
11097 }
11098
11099 /*
11100 Set callbacks in order to be able to call commmit or rollback.
11101 */
11102 if (trx)
11103 trans_register_ha(thd, TRUE, binlog_hton, 0);
11104 trans_register_ha(thd, FALSE, binlog_hton, 0);
11105
11106 /*
11107 Set the binary log as read/write otherwise callbacks are not called.
11108 */
11109 thd->ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
11110 }
11111 DBUG_VOID_RETURN;
11112 }
11113
11114 #endif /* WITH_WSREP */
11115