1 /* Copyright (c) 2000, 2018, Oracle and/or its affiliates.
2 Copyright (c) 2009, 2020, MariaDB Corporation.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
16
17
18 /**
19 @file
20
21 @brief
22 logging of commands
23
24 @todo
25 Abort logging when we get an error in reading or writing log files
26 */
27
28 #include "mariadb.h" /* NO_EMBEDDED_ACCESS_CHECKS */
29 #include "sql_priv.h"
30 #include "log.h"
31 #include "sql_base.h" // open_log_table
32 #include "sql_repl.h"
33 #include "sql_delete.h" // mysql_truncate
34 #include "sql_parse.h" // command_name
35 #include "sql_time.h" // calc_time_from_sec, my_time_compare
36 #include "tztime.h" // my_tz_OFFSET0, struct Time_zone
37 #include "sql_acl.h" // SUPER_ACL
38 #include "log_event.h" // Query_log_event
39 #include "rpl_filter.h"
40 #include "rpl_rli.h"
41 #include "sql_audit.h"
42 #include "mysqld.h"
43
44 #include <my_dir.h>
45 #include <m_ctype.h> // For test_if_number
46
47 #include <set_var.h> // for Sys_last_gtid_ptr
48
49 #ifdef _WIN32
50 #include "message.h"
51 #endif
52
53 #include "sql_plugin.h"
54 #include "debug_sync.h"
55 #include "sql_show.h"
56 #include "my_pthread.h"
57 #include "semisync_master.h"
58 #include "sp_rcontext.h"
59 #include "sp_head.h"
60
61 #include "wsrep_mysqld.h"
62 #ifdef WITH_WSREP
63 #include "wsrep_trans_observer.h"
64 #endif /* WITH_WSREP */
65
66 /* max size of the log message */
67 #define MAX_LOG_BUFFER_SIZE 1024
68 #define MAX_TIME_SIZE 32
69 #define MY_OFF_T_UNDEF (~(my_off_t)0UL)
70 /* Truncate cache log files bigger than this */
71 #define CACHE_FILE_TRUNC_SIZE 65536
72
73 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
74
75 handlerton *binlog_hton;
76 LOGGER logger;
77
78 const char *log_bin_index= 0;
79 const char *log_bin_basename= 0;
80
81 MYSQL_BIN_LOG mysql_bin_log(&sync_binlog_period);
82
83 static bool test_if_number(const char *str,
84 ulong *res, bool allow_wildcards);
85 static int binlog_init(void *p);
86 static int binlog_close_connection(handlerton *hton, THD *thd);
87 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
88 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
89 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
90 THD *thd);
91 static int binlog_commit(handlerton *hton, THD *thd, bool all);
92 static int binlog_rollback(handlerton *hton, THD *thd, bool all);
93 static int binlog_prepare(handlerton *hton, THD *thd, bool all);
94 static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd);
95
96 static const LEX_CSTRING write_error_msg=
97 { STRING_WITH_LEN("error writing to the binary log") };
98
99 static my_bool opt_optimize_thread_scheduling= TRUE;
100 ulong binlog_checksum_options;
101 #ifndef DBUG_OFF
102 ulong opt_binlog_dbug_fsync_sleep= 0;
103 #endif
104
105 mysql_mutex_t LOCK_prepare_ordered;
106 mysql_cond_t COND_prepare_ordered;
107 mysql_mutex_t LOCK_after_binlog_sync;
108 mysql_mutex_t LOCK_commit_ordered;
109
110 static ulonglong binlog_status_var_num_commits;
111 static ulonglong binlog_status_var_num_group_commits;
112 static ulonglong binlog_status_group_commit_trigger_count;
113 static ulonglong binlog_status_group_commit_trigger_lock_wait;
114 static ulonglong binlog_status_group_commit_trigger_timeout;
115 static char binlog_snapshot_file[FN_REFLEN];
116 static ulonglong binlog_snapshot_position;
117
118 static const char *fatal_log_error=
119 "Could not use %s for logging (error %d). "
120 "Turning logging off for the whole duration of the MariaDB server process. "
121 "To turn it on again: fix the cause, shutdown the MariaDB server and "
122 "restart it.";
123
124
125 static SHOW_VAR binlog_status_vars_detail[]=
126 {
127 {"commits",
128 (char *)&binlog_status_var_num_commits, SHOW_LONGLONG},
129 {"group_commits",
130 (char *)&binlog_status_var_num_group_commits, SHOW_LONGLONG},
131 {"group_commit_trigger_count",
132 (char *)&binlog_status_group_commit_trigger_count, SHOW_LONGLONG},
133 {"group_commit_trigger_lock_wait",
134 (char *)&binlog_status_group_commit_trigger_lock_wait, SHOW_LONGLONG},
135 {"group_commit_trigger_timeout",
136 (char *)&binlog_status_group_commit_trigger_timeout, SHOW_LONGLONG},
137 {"snapshot_file",
138 (char *)&binlog_snapshot_file, SHOW_CHAR},
139 {"snapshot_position",
140 (char *)&binlog_snapshot_position, SHOW_LONGLONG},
141 {NullS, NullS, SHOW_LONG}
142 };
143
144 /*
145 Variables for the binlog background thread.
146 Protected by the MYSQL_BIN_LOG::LOCK_binlog_background_thread mutex.
147 */
148 static bool binlog_background_thread_started= false;
149 static bool binlog_background_thread_stop= false;
150 static MYSQL_BIN_LOG::xid_count_per_binlog *
151 binlog_background_thread_queue= NULL;
152
153 static bool start_binlog_background_thread();
154
155 static rpl_binlog_state rpl_global_gtid_binlog_state;
156
setup_log_handling()157 void setup_log_handling()
158 {
159 rpl_global_gtid_binlog_state.init();
160 }
161
162
163 /**
164 purge logs, master and slave sides both, related error code
165 converter.
166 Called from @c purge_error_message(), @c MYSQL_BIN_LOG::reset_logs()
167
168 @param res an internal to purging routines error code
169
170 @return the user level error code ER_*
171 */
purge_log_get_error_code(int res)172 uint purge_log_get_error_code(int res)
173 {
174 uint errcode= 0;
175
176 switch (res) {
177 case 0: break;
178 case LOG_INFO_EOF: errcode= ER_UNKNOWN_TARGET_BINLOG; break;
179 case LOG_INFO_IO: errcode= ER_IO_ERR_LOG_INDEX_READ; break;
180 case LOG_INFO_INVALID:errcode= ER_BINLOG_PURGE_PROHIBITED; break;
181 case LOG_INFO_SEEK: errcode= ER_FSEEK_FAIL; break;
182 case LOG_INFO_MEM: errcode= ER_OUT_OF_RESOURCES; break;
183 case LOG_INFO_FATAL: errcode= ER_BINLOG_PURGE_FATAL_ERR; break;
184 case LOG_INFO_IN_USE: errcode= ER_LOG_IN_USE; break;
185 case LOG_INFO_EMFILE: errcode= ER_BINLOG_PURGE_EMFILE; break;
186 default: errcode= ER_LOG_PURGE_UNKNOWN_ERR; break;
187 }
188
189 return errcode;
190 }
191
192 /**
193 Silence all errors and warnings reported when performing a write
194 to a log table.
195 Errors and warnings are not reported to the client or SQL exception
196 handlers, so that the presence of logging does not interfere and affect
197 the logic of an application.
198 */
199 class Silence_log_table_errors : public Internal_error_handler
200 {
201 char m_message[MYSQL_ERRMSG_SIZE];
202 public:
Silence_log_table_errors()203 Silence_log_table_errors()
204 {
205 m_message[0]= '\0';
206 }
207
~Silence_log_table_errors()208 virtual ~Silence_log_table_errors() {}
209
210 virtual bool handle_condition(THD *thd,
211 uint sql_errno,
212 const char* sql_state,
213 Sql_condition::enum_warning_level *level,
214 const char* msg,
215 Sql_condition ** cond_hdl);
message() const216 const char *message() const { return m_message; }
217 };
218
219 bool
handle_condition(THD *,uint,const char *,Sql_condition::enum_warning_level *,const char * msg,Sql_condition ** cond_hdl)220 Silence_log_table_errors::handle_condition(THD *,
221 uint,
222 const char*,
223 Sql_condition::enum_warning_level*,
224 const char* msg,
225 Sql_condition ** cond_hdl)
226 {
227 *cond_hdl= NULL;
228 strmake_buf(m_message, msg);
229 return TRUE;
230 }
231
232 sql_print_message_func sql_print_message_handlers[3] =
233 {
234 sql_print_information,
235 sql_print_warning,
236 sql_print_error
237 };
238
239
240 /**
241 Create the name of the log file
242
243 @param[OUT] out a pointer to a new allocated name will go there
244 @param[IN] log_ext The extension for the file (e.g .log)
245 @param[IN] once whether to use malloc_once or a normal malloc.
246 */
make_default_log_name(char ** out,const char * log_ext,bool once)247 void make_default_log_name(char **out, const char* log_ext, bool once)
248 {
249 char buff[FN_REFLEN+10];
250 fn_format(buff, opt_log_basename, "", log_ext, MYF(MY_REPLACE_EXT));
251 if (once)
252 *out= my_once_strdup(buff, MYF(MY_WME));
253 else
254 {
255 my_free(*out);
256 *out= my_strdup(buff, MYF(MY_WME));
257 }
258 }
259
260
261 /*
262 Helper classes to store non-transactional and transactional data
263 before copying it to the binary log.
264 */
265 class binlog_cache_data
266 {
267 public:
binlog_cache_data()268 binlog_cache_data(): m_pending(0), status(0),
269 before_stmt_pos(MY_OFF_T_UNDEF),
270 incident(FALSE),
271 saved_max_binlog_cache_size(0), ptr_binlog_cache_use(0),
272 ptr_binlog_cache_disk_use(0)
273 { }
274
~binlog_cache_data()275 ~binlog_cache_data()
276 {
277 DBUG_ASSERT(empty());
278 close_cached_file(&cache_log);
279 }
280
281 /*
282 Return 1 if there is no relevant entries in the cache
283
284 This is:
285 - Cache is empty
286 - There are row or critical (DDL?) events in the cache
287
288 The status test is needed to avoid writing entries with only
289 a table map entry, which would crash in do_apply_event() on the slave
290 as it assumes that there is always a row entry after a table map.
291 */
empty() const292 bool empty() const
293 {
294 return (pending() == NULL &&
295 (my_b_write_tell(&cache_log) == 0 ||
296 ((status & (LOGGED_ROW_EVENT | LOGGED_CRITICAL)) == 0)));
297 }
298
pending() const299 Rows_log_event *pending() const
300 {
301 return m_pending;
302 }
303
set_pending(Rows_log_event * const pending_arg)304 void set_pending(Rows_log_event *const pending_arg)
305 {
306 m_pending= pending_arg;
307 }
308
set_incident(void)309 void set_incident(void)
310 {
311 incident= TRUE;
312 }
313
has_incident(void)314 bool has_incident(void)
315 {
316 return(incident);
317 }
318
reset()319 void reset()
320 {
321 bool cache_was_empty= empty();
322 bool truncate_file= (cache_log.file != -1 &&
323 my_b_write_tell(&cache_log) > CACHE_FILE_TRUNC_SIZE);
324 truncate(0,1); // Forget what's in cache
325 if (!cache_was_empty)
326 compute_statistics();
327 if (truncate_file)
328 my_chsize(cache_log.file, 0, 0, MYF(MY_WME));
329
330 status= 0;
331 incident= FALSE;
332 before_stmt_pos= MY_OFF_T_UNDEF;
333 DBUG_ASSERT(empty());
334 }
335
get_byte_position() const336 my_off_t get_byte_position() const
337 {
338 return my_b_tell(&cache_log);
339 }
340
get_prev_position()341 my_off_t get_prev_position()
342 {
343 return(before_stmt_pos);
344 }
345
set_prev_position(my_off_t pos)346 void set_prev_position(my_off_t pos)
347 {
348 before_stmt_pos= pos;
349 }
350
restore_prev_position()351 void restore_prev_position()
352 {
353 truncate(before_stmt_pos);
354 }
355
restore_savepoint(my_off_t pos)356 void restore_savepoint(my_off_t pos)
357 {
358 truncate(pos);
359 if (pos < before_stmt_pos)
360 before_stmt_pos= MY_OFF_T_UNDEF;
361 }
362
set_binlog_cache_info(my_off_t param_max_binlog_cache_size,ulong * param_ptr_binlog_cache_use,ulong * param_ptr_binlog_cache_disk_use)363 void set_binlog_cache_info(my_off_t param_max_binlog_cache_size,
364 ulong *param_ptr_binlog_cache_use,
365 ulong *param_ptr_binlog_cache_disk_use)
366 {
367 /*
368 The assertions guarantee that the set_binlog_cache_info is
369 called just once and information passed as parameters are
370 never zero.
371
372 This is done while calling the constructor binlog_cache_mngr.
373 We cannot set information in the constructor binlog_cache_data
374 because the space for binlog_cache_mngr is allocated through
375 a placement new.
376
377 In the future, we can refactor this and change it to avoid
378 the set_binlog_info.
379 */
380 DBUG_ASSERT(saved_max_binlog_cache_size == 0 &&
381 param_max_binlog_cache_size != 0 &&
382 ptr_binlog_cache_use == 0 &&
383 param_ptr_binlog_cache_use != 0 &&
384 ptr_binlog_cache_disk_use == 0 &&
385 param_ptr_binlog_cache_disk_use != 0);
386
387 saved_max_binlog_cache_size= param_max_binlog_cache_size;
388 ptr_binlog_cache_use= param_ptr_binlog_cache_use;
389 ptr_binlog_cache_disk_use= param_ptr_binlog_cache_disk_use;
390 cache_log.end_of_file= saved_max_binlog_cache_size;
391 }
392
add_status(enum_logged_status status_arg)393 void add_status(enum_logged_status status_arg)
394 {
395 status|= status_arg;
396 }
397
398 /*
399 Cache to store data before copying it to the binary log.
400 */
401 IO_CACHE cache_log;
402
403 private:
404 /*
405 Pending binrows event. This event is the event where the rows are currently
406 written.
407 */
408 Rows_log_event *m_pending;
409
410 /*
411 Bit flags for what has been writting to cache. Used to
412 discard logs without any data changes.
413 see enum_logged_status;
414 */
415 uint32 status;
416
417 /*
418 Binlog position before the start of the current statement.
419 */
420 my_off_t before_stmt_pos;
421
422 /*
423 This indicates that some events did not get into the cache and most likely
424 it is corrupted.
425 */
426 bool incident;
427
428 /**
429 This function computes binlog cache and disk usage.
430 */
compute_statistics()431 void compute_statistics()
432 {
433 statistic_increment(*ptr_binlog_cache_use, &LOCK_status);
434 if (cache_log.disk_writes != 0)
435 {
436 #ifdef REAL_STATISTICS
437 statistic_add(*ptr_binlog_cache_disk_use,
438 cache_log.disk_writes, &LOCK_status);
439 #else
440 statistic_increment(*ptr_binlog_cache_disk_use, &LOCK_status);
441 #endif
442 cache_log.disk_writes= 0;
443 }
444 }
445
446 /*
447 Stores the values of maximum size of the cache allowed when this cache
448 is configured. This corresponds to either
449 . max_binlog_cache_size or max_binlog_stmt_cache_size.
450 */
451 my_off_t saved_max_binlog_cache_size;
452
453 /*
454 Stores a pointer to the status variable that keeps track of the in-memory
455 cache usage. This corresponds to either
456 . binlog_cache_use or binlog_stmt_cache_use.
457 */
458 ulong *ptr_binlog_cache_use;
459
460 /*
461 Stores a pointer to the status variable that keeps track of the disk
462 cache usage. This corresponds to either
463 . binlog_cache_disk_use or binlog_stmt_cache_disk_use.
464 */
465 ulong *ptr_binlog_cache_disk_use;
466
467 /*
468 It truncates the cache to a certain position. This includes deleting the
469 pending event.
470 */
truncate(my_off_t pos,bool reset_cache=0)471 void truncate(my_off_t pos, bool reset_cache=0)
472 {
473 DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos));
474 cache_log.error=0;
475 if (pending())
476 {
477 delete pending();
478 set_pending(0);
479 }
480 reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, reset_cache);
481 cache_log.end_of_file= saved_max_binlog_cache_size;
482 }
483
484 binlog_cache_data& operator=(const binlog_cache_data& info);
485 binlog_cache_data(const binlog_cache_data& info);
486 };
487
488
add_status(enum_logged_status status)489 void Log_event_writer::add_status(enum_logged_status status)
490 {
491 if (likely(cache_data))
492 cache_data->add_status(status);
493 }
494
495 class binlog_cache_mngr {
496 public:
binlog_cache_mngr(my_off_t param_max_binlog_stmt_cache_size,my_off_t param_max_binlog_cache_size,ulong * param_ptr_binlog_stmt_cache_use,ulong * param_ptr_binlog_stmt_cache_disk_use,ulong * param_ptr_binlog_cache_use,ulong * param_ptr_binlog_cache_disk_use)497 binlog_cache_mngr(my_off_t param_max_binlog_stmt_cache_size,
498 my_off_t param_max_binlog_cache_size,
499 ulong *param_ptr_binlog_stmt_cache_use,
500 ulong *param_ptr_binlog_stmt_cache_disk_use,
501 ulong *param_ptr_binlog_cache_use,
502 ulong *param_ptr_binlog_cache_disk_use)
503 : last_commit_pos_offset(0), using_xa(FALSE), xa_xid(0)
504 {
505 stmt_cache.set_binlog_cache_info(param_max_binlog_stmt_cache_size,
506 param_ptr_binlog_stmt_cache_use,
507 param_ptr_binlog_stmt_cache_disk_use);
508 trx_cache.set_binlog_cache_info(param_max_binlog_cache_size,
509 param_ptr_binlog_cache_use,
510 param_ptr_binlog_cache_disk_use);
511 last_commit_pos_file[0]= 0;
512 }
513
reset(bool do_stmt,bool do_trx)514 void reset(bool do_stmt, bool do_trx)
515 {
516 if (do_stmt)
517 stmt_cache.reset();
518 if (do_trx)
519 {
520 trx_cache.reset();
521 using_xa= FALSE;
522 last_commit_pos_file[0]= 0;
523 last_commit_pos_offset= 0;
524 }
525 }
526
get_binlog_cache_data(bool is_transactional)527 binlog_cache_data* get_binlog_cache_data(bool is_transactional)
528 {
529 return (is_transactional ? &trx_cache : &stmt_cache);
530 }
531
get_binlog_cache_log(bool is_transactional)532 IO_CACHE* get_binlog_cache_log(bool is_transactional)
533 {
534 return (is_transactional ? &trx_cache.cache_log : &stmt_cache.cache_log);
535 }
536
537 binlog_cache_data stmt_cache;
538
539 binlog_cache_data trx_cache;
540
541 /*
542 Binlog position for current transaction.
543 For START TRANSACTION WITH CONSISTENT SNAPSHOT, this is the binlog
544 position corresponding to the snapshot taken. During (and after) commit,
545 this is set to the binlog position corresponding to just after the
546 commit (so storage engines can store it in their transaction log).
547 */
548 char last_commit_pos_file[FN_REFLEN];
549 my_off_t last_commit_pos_offset;
550
551 /*
552 Flag set true if this transaction is committed with log_xid() as part of
553 XA, false if not.
554 */
555 bool using_xa;
556 my_xid xa_xid;
557 bool need_unlog;
558 /*
559 Id of binlog that transaction was written to; only needed if need_unlog is
560 true.
561 */
562 ulong binlog_id;
563 /* Set if we get an error during commit that must be returned from unlog(). */
564 bool delayed_error;
565
566 private:
567
568 binlog_cache_mngr& operator=(const binlog_cache_mngr& info);
569 binlog_cache_mngr(const binlog_cache_mngr& info);
570 };
571
is_log_table_enabled(uint log_table_type)572 bool LOGGER::is_log_table_enabled(uint log_table_type)
573 {
574 switch (log_table_type) {
575 case QUERY_LOG_SLOW:
576 return (table_log_handler != NULL) && global_system_variables.sql_log_slow
577 && (log_output_options & LOG_TABLE);
578 case QUERY_LOG_GENERAL:
579 return (table_log_handler != NULL) && opt_log
580 && (log_output_options & LOG_TABLE);
581 default:
582 DBUG_ASSERT(0);
583 return FALSE; /* make compiler happy */
584 }
585 }
586
587 /**
588 Check if a given table is opened log table
589
590 @param table Table to check
591 @param check_if_opened Only fail if it's a log table in use
592 @param error_msg String to put in error message if not ok.
593 No error message if 0
594 @return 0 ok
595 @return # Type of log file
596 */
597
check_if_log_table(const TABLE_LIST * table,bool check_if_opened,const char * error_msg)598 int check_if_log_table(const TABLE_LIST *table,
599 bool check_if_opened,
600 const char *error_msg)
601 {
602 int result= 0;
603 if (table->db.length == 5 &&
604 !my_strcasecmp(table_alias_charset, table->db.str, "mysql"))
605 {
606 const char *table_name= table->table_name.str;
607
608 if (table->table_name.length == 11 &&
609 !my_strcasecmp(table_alias_charset, table_name, "general_log"))
610 {
611 result= QUERY_LOG_GENERAL;
612 goto end;
613 }
614
615 if (table->table_name.length == 8 &&
616 !my_strcasecmp(table_alias_charset, table_name, "slow_log"))
617 {
618 result= QUERY_LOG_SLOW;
619 goto end;
620 }
621 }
622 return 0;
623
624 end:
625 if (!check_if_opened || logger.is_log_table_enabled(result))
626 {
627 if (error_msg)
628 my_error(ER_BAD_LOG_STATEMENT, MYF(0), error_msg);
629 return result;
630 }
631 return 0;
632 }
633
634
Log_to_csv_event_handler()635 Log_to_csv_event_handler::Log_to_csv_event_handler()
636 {
637 }
638
639
~Log_to_csv_event_handler()640 Log_to_csv_event_handler::~Log_to_csv_event_handler()
641 {
642 }
643
644
cleanup()645 void Log_to_csv_event_handler::cleanup()
646 {
647 logger.is_log_tables_initialized= FALSE;
648 }
649
650 /* log event handlers */
651
652 /**
653 Log command to the general log table
654
655 Log given command to the general log table.
656
657 @param event_time command start timestamp
658 @param user_host the pointer to the string with user@host info
659 @param user_host_len length of the user_host string. this is computed
660 once and passed to all general log event handlers
661 @param thread_id Id of the thread, issued a query
662 @param command_type the type of the command being logged
663 @param command_type_len the length of the string above
664 @param sql_text the very text of the query being executed
665 @param sql_text_len the length of sql_text string
666
667
668 @return This function attempts to never call my_error(). This is
669 necessary, because general logging happens already after a statement
670 status has been sent to the client, so the client can not see the
671 error anyway. Besides, the error is not related to the statement
672 being executed and is internal, and thus should be handled
673 internally (@todo: how?).
674 If a write to the table has failed, the function attempts to
675 write to a short error message to the file. The failure is also
676 indicated in the return value.
677
678 @retval FALSE OK
679 @retval TRUE error occurred
680 */
681
682 bool Log_to_csv_event_handler::
log_general(THD * thd,my_hrtime_t event_time,const char * user_host,size_t user_host_len,my_thread_id thread_id_arg,const char * command_type,size_t command_type_len,const char * sql_text,size_t sql_text_len,CHARSET_INFO * client_cs)683 log_general(THD *thd, my_hrtime_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg,
684 const char *command_type, size_t command_type_len,
685 const char *sql_text, size_t sql_text_len,
686 CHARSET_INFO *client_cs)
687 {
688 TABLE_LIST table_list;
689 TABLE *table;
690 bool result= TRUE;
691 bool need_close= FALSE;
692 bool need_pop= FALSE;
693 bool need_rnd_end= FALSE;
694 uint field_index;
695 Silence_log_table_errors error_handler;
696 Open_tables_backup open_tables_backup;
697 ulonglong save_thd_options;
698 bool save_time_zone_used;
699 DBUG_ENTER("log_general");
700
701 /*
702 CSV uses TIME_to_timestamp() internally if table needs to be repaired
703 which will set thd->time_zone_used
704 */
705 save_time_zone_used= thd->time_zone_used;
706
707 save_thd_options= thd->variables.option_bits;
708 thd->variables.option_bits&= ~OPTION_BIN_LOG;
709
710 table_list.init_one_table(&MYSQL_SCHEMA_NAME, &GENERAL_LOG_NAME, 0,
711 TL_WRITE_CONCURRENT_INSERT);
712
713 /*
714 1) open_log_table generates an error of the
715 table can not be opened or is corrupted.
716 2) "INSERT INTO general_log" can generate warning sometimes.
717
718 Suppress these warnings and errors, they can't be dealt with
719 properly anyway.
720
721 QQ: this problem needs to be studied in more detail.
722 Comment this 2 lines and run "cast.test" to see what's happening.
723 */
724 thd->push_internal_handler(& error_handler);
725 need_pop= TRUE;
726
727 if (!(table= open_log_table(thd, &table_list, &open_tables_backup)))
728 goto err;
729
730 need_close= TRUE;
731
732 if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) ||
733 table->file->ha_rnd_init_with_error(0))
734 goto err;
735
736 need_rnd_end= TRUE;
737
738 /* Honor next number columns if present */
739 table->next_number_field= table->found_next_number_field;
740
741 /*
742 NOTE: we do not call restore_record() here, as all fields are
743 filled by the Logger (=> no need to load default ones).
744 */
745
746 /*
747 We do not set a value for table->field[0], as it will use
748 default value (which is CURRENT_TIMESTAMP).
749 */
750
751 /* check that all columns exist */
752 if (table->s->fields < 6)
753 goto err;
754
755 DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP);
756
757 ((Field_timestamp*) table->field[0])->store_TIME(
758 hrtime_to_my_time(event_time), hrtime_sec_part(event_time));
759
760 /* do a write */
761 if (table->field[1]->store(user_host, user_host_len, client_cs) ||
762 table->field[2]->store((longlong) thread_id_arg, TRUE) ||
763 table->field[3]->store((longlong) global_system_variables.server_id,
764 TRUE) ||
765 table->field[4]->store(command_type, command_type_len, client_cs))
766 goto err;
767
768 /*
769 A positive return value in store() means truncation.
770 Still logging a message in the log in this case.
771 */
772 table->field[5]->flags|= FIELDFLAG_HEX_ESCAPE;
773 if (table->field[5]->store(sql_text, sql_text_len, client_cs) < 0)
774 goto err;
775
776 /* mark all fields as not null */
777 table->field[1]->set_notnull();
778 table->field[2]->set_notnull();
779 table->field[3]->set_notnull();
780 table->field[4]->set_notnull();
781 table->field[5]->set_notnull();
782
783 /* Set any extra columns to their default values */
784 for (field_index= 6 ; field_index < table->s->fields ; field_index++)
785 {
786 table->field[field_index]->set_default();
787 }
788
789 /* log table entries are not replicated */
790 if (table->file->ha_write_row(table->record[0]))
791 goto err;
792
793 result= FALSE;
794
795 err:
796 if (result && !thd->killed)
797 sql_print_error("Failed to write to mysql.general_log: %s",
798 error_handler.message());
799
800 if (need_rnd_end)
801 {
802 table->file->ha_rnd_end();
803 table->file->ha_release_auto_increment();
804 }
805 if (need_pop)
806 thd->pop_internal_handler();
807 if (need_close)
808 close_log_table(thd, &open_tables_backup);
809
810 thd->variables.option_bits= save_thd_options;
811 thd->time_zone_used= save_time_zone_used;
812 DBUG_RETURN(result);
813 }
814
815
816 /*
817 Log a query to the slow log table
818
819 SYNOPSIS
820 log_slow()
821 thd THD of the query
822 current_time current timestamp
823 user_host the pointer to the string with user@host info
824 user_host_len length of the user_host string. this is computed once
825 and passed to all general log event handlers
826 query_time Amount of time the query took to execute (in microseconds)
827 lock_time Amount of time the query was locked (in microseconds)
828 is_command The flag, which determines, whether the sql_text is a
829 query or an administrator command (these are treated
830 differently by the old logging routines)
831 sql_text the very text of the query or administrator command
832 processed
833 sql_text_len the length of sql_text string
834
835 DESCRIPTION
836
837 Log a query to the slow log table
838
839 RETURN
840 FALSE - OK
841 TRUE - error occurred
842 */
843
844 bool Log_to_csv_event_handler::
log_slow(THD * thd,my_hrtime_t current_time,const char * user_host,size_t user_host_len,ulonglong query_utime,ulonglong lock_utime,bool is_command,const char * sql_text,size_t sql_text_len)845 log_slow(THD *thd, my_hrtime_t current_time,
846 const char *user_host, size_t user_host_len,
847 ulonglong query_utime, ulonglong lock_utime, bool is_command,
848 const char *sql_text, size_t sql_text_len)
849 {
850 TABLE_LIST table_list;
851 TABLE *table;
852 bool result= TRUE;
853 bool need_close= FALSE;
854 bool need_rnd_end= FALSE;
855 Silence_log_table_errors error_handler;
856 Open_tables_backup open_tables_backup;
857 CHARSET_INFO *client_cs= thd->variables.character_set_client;
858 bool save_time_zone_used;
859 ulong query_time= (ulong) MY_MIN(query_utime/1000000, TIME_MAX_VALUE_SECONDS);
860 ulong lock_time= (ulong) MY_MIN(lock_utime/1000000, TIME_MAX_VALUE_SECONDS);
861 ulong query_time_micro= (ulong) (query_utime % 1000000);
862 ulong lock_time_micro= (ulong) (lock_utime % 1000000);
863
864 DBUG_ENTER("Log_to_csv_event_handler::log_slow");
865
866 thd->push_internal_handler(& error_handler);
867 /*
868 CSV uses TIME_to_timestamp() internally if table needs to be repaired
869 which will set thd->time_zone_used
870 */
871 save_time_zone_used= thd->time_zone_used;
872
873 table_list.init_one_table(&MYSQL_SCHEMA_NAME, &SLOW_LOG_NAME, 0,
874 TL_WRITE_CONCURRENT_INSERT);
875
876 if (!(table= open_log_table(thd, &table_list, &open_tables_backup)))
877 goto err;
878
879 need_close= TRUE;
880
881 if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) ||
882 table->file->ha_rnd_init_with_error(0))
883 goto err;
884
885 need_rnd_end= TRUE;
886
887 /* Honor next number columns if present */
888 table->next_number_field= table->found_next_number_field;
889
890 restore_record(table, s->default_values); // Get empty record
891
892 /* check that all columns exist */
893 if (table->s->fields < 13)
894 goto err;
895
896 /* store the time and user values */
897 DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP);
898 ((Field_timestamp*) table->field[0])->store_TIME(
899 hrtime_to_my_time(current_time), hrtime_sec_part(current_time));
900 if (table->field[1]->store(user_host, user_host_len, client_cs))
901 goto err;
902
903 /*
904 A TIME field can not hold the full longlong range; query_time or
905 lock_time may be truncated without warning here, if greater than
906 839 hours (~35 days)
907 */
908 MYSQL_TIME t;
909 t.neg= 0;
910
911 /* fill in query_time field */
912 calc_time_from_sec(&t, query_time, query_time_micro);
913 if (table->field[2]->store_time(&t))
914 goto err;
915 /* lock_time */
916 calc_time_from_sec(&t, lock_time, lock_time_micro);
917 if (table->field[3]->store_time(&t))
918 goto err;
919 /* rows_sent */
920 if (table->field[4]->store((longlong) thd->get_sent_row_count(), TRUE))
921 goto err;
922 /* rows_examined */
923 if (table->field[5]->store((longlong) thd->get_examined_row_count(), TRUE))
924 goto err;
925
926 /* fill database field */
927 if (thd->db.str)
928 {
929 if (table->field[6]->store(thd->db.str, thd->db.length, client_cs))
930 goto err;
931 table->field[6]->set_notnull();
932 }
933
934 if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
935 {
936 if (table->
937 field[7]->store((longlong)
938 thd->first_successful_insert_id_in_prev_stmt_for_binlog,
939 TRUE))
940 goto err;
941 table->field[7]->set_notnull();
942 }
943
944 /*
945 Set value if we do an insert on autoincrement column. Note that for
946 some engines (those for which get_auto_increment() does not leave a
947 table lock until the statement ends), this is just the first value and
948 the next ones used may not be contiguous to it.
949 */
950 if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
951 {
952 if (table->
953 field[8]->store((longlong)
954 thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(), TRUE))
955 goto err;
956 table->field[8]->set_notnull();
957 }
958
959 if (table->field[9]->store((longlong)global_system_variables.server_id, TRUE))
960 goto err;
961 table->field[9]->set_notnull();
962
963 /*
964 Column sql_text.
965 A positive return value in store() means truncation.
966 Still logging a message in the log in this case.
967 */
968 if (table->field[10]->store(sql_text, sql_text_len, client_cs) < 0)
969 goto err;
970
971 if (table->field[11]->store((longlong) thd->thread_id, TRUE))
972 goto err;
973
974 /* Rows_affected */
975 if (table->field[12]->store(thd->get_stmt_da()->is_ok() ?
976 (longlong) thd->get_stmt_da()->affected_rows() :
977 0, TRUE))
978 goto err;
979
980 /* log table entries are not replicated */
981 if (table->file->ha_write_row(table->record[0]))
982 goto err;
983
984 result= FALSE;
985
986 err:
987 thd->pop_internal_handler();
988
989 if (result && !thd->killed)
990 sql_print_error("Failed to write to mysql.slow_log: %s",
991 error_handler.message());
992
993 if (need_rnd_end)
994 {
995 table->file->ha_rnd_end();
996 table->file->ha_release_auto_increment();
997 }
998 if (need_close)
999 close_log_table(thd, &open_tables_backup);
1000 thd->time_zone_used= save_time_zone_used;
1001 DBUG_RETURN(result);
1002 }
1003
1004 int Log_to_csv_event_handler::
activate_log(THD * thd,uint log_table_type)1005 activate_log(THD *thd, uint log_table_type)
1006 {
1007 TABLE_LIST table_list;
1008 TABLE *table;
1009 LEX_CSTRING *UNINIT_VAR(log_name);
1010 int result;
1011 Open_tables_backup open_tables_backup;
1012
1013 DBUG_ENTER("Log_to_csv_event_handler::activate_log");
1014
1015 if (log_table_type == QUERY_LOG_GENERAL)
1016 {
1017 log_name= &GENERAL_LOG_NAME;
1018 }
1019 else
1020 {
1021 DBUG_ASSERT(log_table_type == QUERY_LOG_SLOW);
1022
1023 log_name= &SLOW_LOG_NAME;
1024 }
1025 table_list.init_one_table(&MYSQL_SCHEMA_NAME, log_name, 0, TL_WRITE_CONCURRENT_INSERT);
1026
1027 table= open_log_table(thd, &table_list, &open_tables_backup);
1028 if (table)
1029 {
1030 result= 0;
1031 close_log_table(thd, &open_tables_backup);
1032 }
1033 else
1034 result= 1;
1035
1036 DBUG_RETURN(result);
1037 }
1038
1039 bool Log_to_csv_event_handler::
log_error(enum loglevel level,const char * format,va_list args)1040 log_error(enum loglevel level, const char *format, va_list args)
1041 {
1042 /* No log table is implemented */
1043 DBUG_ASSERT(0);
1044 return FALSE;
1045 }
1046
1047 bool Log_to_file_event_handler::
log_error(enum loglevel level,const char * format,va_list args)1048 log_error(enum loglevel level, const char *format,
1049 va_list args)
1050 {
1051 return vprint_msg_to_log(level, format, args);
1052 }
1053
init_pthread_objects()1054 void Log_to_file_event_handler::init_pthread_objects()
1055 {
1056 mysql_log.init_pthread_objects();
1057 mysql_slow_log.init_pthread_objects();
1058 }
1059
1060
1061 /** Wrapper around MYSQL_LOG::write() for slow log. */
1062
1063 bool Log_to_file_event_handler::
log_slow(THD * thd,my_hrtime_t current_time,const char * user_host,size_t user_host_len,ulonglong query_utime,ulonglong lock_utime,bool is_command,const char * sql_text,size_t sql_text_len)1064 log_slow(THD *thd, my_hrtime_t current_time,
1065 const char *user_host, size_t user_host_len,
1066 ulonglong query_utime, ulonglong lock_utime, bool is_command,
1067 const char *sql_text, size_t sql_text_len)
1068 {
1069 Silence_log_table_errors error_handler;
1070 thd->push_internal_handler(&error_handler);
1071 bool retval= mysql_slow_log.write(thd, hrtime_to_my_time(current_time),
1072 user_host, user_host_len,
1073 query_utime, lock_utime, is_command,
1074 sql_text, sql_text_len);
1075 thd->pop_internal_handler();
1076 return retval;
1077 }
1078
1079
1080 /**
1081 Wrapper around MYSQL_LOG::write() for general log. We need it since we
1082 want all log event handlers to have the same signature.
1083 */
1084
1085 bool Log_to_file_event_handler::
log_general(THD * thd,my_hrtime_t event_time,const char * user_host,size_t user_host_len,my_thread_id thread_id_arg,const char * command_type,size_t command_type_len,const char * sql_text,size_t sql_text_len,CHARSET_INFO * client_cs)1086 log_general(THD *thd, my_hrtime_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg,
1087 const char *command_type, size_t command_type_len,
1088 const char *sql_text, size_t sql_text_len,
1089 CHARSET_INFO *client_cs)
1090 {
1091 Silence_log_table_errors error_handler;
1092 thd->push_internal_handler(&error_handler);
1093 bool retval= mysql_log.write(hrtime_to_time(event_time), user_host,
1094 user_host_len,
1095 thread_id_arg, command_type, command_type_len,
1096 sql_text, sql_text_len);
1097 thd->pop_internal_handler();
1098 return retval;
1099 }
1100
1101
init()1102 bool Log_to_file_event_handler::init()
1103 {
1104 if (!is_initialized)
1105 {
1106 if (global_system_variables.sql_log_slow)
1107 mysql_slow_log.open_slow_log(opt_slow_logname);
1108
1109 if (opt_log)
1110 mysql_log.open_query_log(opt_logname);
1111
1112 is_initialized= TRUE;
1113 }
1114
1115 return FALSE;
1116 }
1117
1118
cleanup()1119 void Log_to_file_event_handler::cleanup()
1120 {
1121 mysql_log.cleanup();
1122 mysql_slow_log.cleanup();
1123 }
1124
flush()1125 void Log_to_file_event_handler::flush()
1126 {
1127 /* reopen log files */
1128 if (opt_log)
1129 mysql_log.reopen_file();
1130 if (global_system_variables.sql_log_slow)
1131 mysql_slow_log.reopen_file();
1132 }
1133
1134 /*
1135 Log error with all enabled log event handlers
1136
1137 SYNOPSIS
1138 error_log_print()
1139
1140 level The level of the error significance: NOTE,
1141 WARNING or ERROR.
1142 format format string for the error message
1143 args list of arguments for the format string
1144
1145 RETURN
1146 FALSE - OK
1147 TRUE - error occurred
1148 */
1149
error_log_print(enum loglevel level,const char * format,va_list args)1150 bool LOGGER::error_log_print(enum loglevel level, const char *format,
1151 va_list args)
1152 {
1153 bool error= FALSE;
1154 Log_event_handler **current_handler;
1155 THD *thd= current_thd;
1156
1157 if (likely(thd))
1158 thd->error_printed_to_log= 1;
1159
1160 /* currently we don't need locking here as there is no error_log table */
1161 for (current_handler= error_log_handler_list ; *current_handler ;)
1162 error= (*current_handler++)->log_error(level, format, args) || error;
1163
1164 return error;
1165 }
1166
1167
cleanup_base()1168 void LOGGER::cleanup_base()
1169 {
1170 DBUG_ASSERT(inited == 1);
1171 mysql_rwlock_destroy(&LOCK_logger);
1172 if (table_log_handler)
1173 {
1174 table_log_handler->cleanup();
1175 delete table_log_handler;
1176 table_log_handler= NULL;
1177 }
1178 if (file_log_handler)
1179 file_log_handler->cleanup();
1180 }
1181
1182
cleanup_end()1183 void LOGGER::cleanup_end()
1184 {
1185 DBUG_ASSERT(inited == 1);
1186 if (file_log_handler)
1187 {
1188 delete file_log_handler;
1189 file_log_handler=NULL;
1190 }
1191 inited= 0;
1192 }
1193
1194
1195 /**
1196 Perform basic log initialization: create file-based log handler and
1197 init error log.
1198 */
init_base()1199 void LOGGER::init_base()
1200 {
1201 DBUG_ASSERT(inited == 0);
1202 inited= 1;
1203
1204 /*
1205 Here we create file log handler. We don't do it for the table log handler
1206 here as it cannot be created so early. The reason is THD initialization,
1207 which depends on the system variables (parsed later).
1208 */
1209 if (!file_log_handler)
1210 file_log_handler= new Log_to_file_event_handler;
1211
1212 /* by default we use traditional error log */
1213 init_error_log(LOG_FILE);
1214
1215 file_log_handler->init_pthread_objects();
1216 mysql_rwlock_init(key_rwlock_LOCK_logger, &LOCK_logger);
1217 }
1218
1219
init_log_tables()1220 void LOGGER::init_log_tables()
1221 {
1222 if (!table_log_handler)
1223 table_log_handler= new Log_to_csv_event_handler;
1224
1225 if (!is_log_tables_initialized &&
1226 !table_log_handler->init() && !file_log_handler->init())
1227 is_log_tables_initialized= TRUE;
1228 }
1229
1230
1231 /**
1232 Close and reopen the slow log (with locks).
1233
1234 @returns FALSE.
1235 */
flush_slow_log()1236 bool LOGGER::flush_slow_log()
1237 {
1238 /*
1239 Now we lock logger, as nobody should be able to use logging routines while
1240 log tables are closed
1241 */
1242 logger.lock_exclusive();
1243
1244 /* Reopen slow log file */
1245 if (global_system_variables.sql_log_slow)
1246 file_log_handler->get_mysql_slow_log()->reopen_file();
1247
1248 /* End of log flush */
1249 logger.unlock();
1250
1251 return 0;
1252 }
1253
1254
1255 /**
1256 Close and reopen the general log (with locks).
1257
1258 @returns FALSE.
1259 */
flush_general_log()1260 bool LOGGER::flush_general_log()
1261 {
1262 /*
1263 Now we lock logger, as nobody should be able to use logging routines while
1264 log tables are closed
1265 */
1266 logger.lock_exclusive();
1267
1268 /* Reopen general log file */
1269 if (opt_log)
1270 file_log_handler->get_mysql_log()->reopen_file();
1271
1272 /* End of log flush */
1273 logger.unlock();
1274
1275 return 0;
1276 }
1277
1278
1279 /*
1280 Log slow query with all enabled log event handlers
1281
1282 SYNOPSIS
1283 slow_log_print()
1284
1285 thd THD of the query being logged
1286 query The query being logged
1287 query_length The length of the query string
1288 current_utime Current time in microseconds (from undefined start)
1289
1290 RETURN
1291 FALSE OK
1292 TRUE error occurred
1293 */
1294
slow_log_print(THD * thd,const char * query,size_t query_length,ulonglong current_utime)1295 bool LOGGER::slow_log_print(THD *thd, const char *query, size_t query_length,
1296 ulonglong current_utime)
1297
1298 {
1299 bool error= FALSE;
1300 Log_event_handler **current_handler;
1301 bool is_command= FALSE;
1302 char user_host_buff[MAX_USER_HOST_SIZE + 1];
1303 Security_context *sctx= thd->security_ctx;
1304 uint user_host_len= 0;
1305 ulonglong query_utime, lock_utime;
1306
1307 DBUG_ASSERT(thd->enable_slow_log);
1308 /*
1309 Print the message to the buffer if we have slow log enabled
1310 */
1311
1312 if (*slow_log_handler_list)
1313 {
1314 /* do not log slow queries from replication threads */
1315 if (!thd->variables.sql_log_slow)
1316 return 0;
1317
1318 lock_shared();
1319 if (!global_system_variables.sql_log_slow)
1320 {
1321 unlock();
1322 return 0;
1323 }
1324
1325 /* fill in user_host value: the format is "%s[%s] @ %s [%s]" */
1326 user_host_len= (uint)(strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
1327 sctx->priv_user, "[",
1328 sctx->user ? sctx->user : (thd->slave_thread ? "SQL_SLAVE" : ""), "] @ ",
1329 sctx->host ? sctx->host : "", " [",
1330 sctx->ip ? sctx->ip : "", "]", NullS) -
1331 user_host_buff);
1332
1333 DBUG_ASSERT(thd->start_utime);
1334 DBUG_ASSERT(thd->start_time);
1335 query_utime= (current_utime - thd->start_utime);
1336 lock_utime= (thd->utime_after_lock - thd->start_utime);
1337 my_hrtime_t current_time= { hrtime_from_time(thd->start_time) +
1338 thd->start_time_sec_part + query_utime };
1339
1340 if (!query)
1341 {
1342 is_command= TRUE;
1343 query= command_name[thd->get_command()].str;
1344 query_length= (uint)command_name[thd->get_command()].length;
1345 }
1346
1347 for (current_handler= slow_log_handler_list; *current_handler ;)
1348 error= (*current_handler++)->log_slow(thd, current_time,
1349 user_host_buff, user_host_len,
1350 query_utime, lock_utime, is_command,
1351 query, query_length) || error;
1352
1353 unlock();
1354 }
1355 return error;
1356 }
1357
general_log_write(THD * thd,enum enum_server_command command,const char * query,size_t query_length)1358 bool LOGGER::general_log_write(THD *thd, enum enum_server_command command,
1359 const char *query, size_t query_length)
1360 {
1361 bool error= FALSE;
1362 Log_event_handler **current_handler= general_log_handler_list;
1363 char user_host_buff[MAX_USER_HOST_SIZE + 1];
1364 uint user_host_len= 0;
1365 my_hrtime_t current_time;
1366
1367 DBUG_ASSERT(thd);
1368
1369 user_host_len= make_user_name(thd, user_host_buff);
1370
1371 current_time= my_hrtime();
1372
1373 mysql_audit_general_log(thd, hrtime_to_time(current_time),
1374 user_host_buff, user_host_len,
1375 command_name[(uint) command].str,
1376 (uint)command_name[(uint) command].length,
1377 query, (uint)query_length);
1378
1379 if (opt_log && log_command(thd, command))
1380 {
1381 lock_shared();
1382 while (*current_handler)
1383 error|= (*current_handler++)->
1384 log_general(thd, current_time, user_host_buff,
1385 user_host_len, thd->thread_id,
1386 command_name[(uint) command].str,
1387 command_name[(uint) command].length,
1388 query, query_length,
1389 thd->variables.character_set_client) || error;
1390 unlock();
1391 }
1392
1393 return error;
1394 }
1395
general_log_print(THD * thd,enum enum_server_command command,const char * format,va_list args)1396 bool LOGGER::general_log_print(THD *thd, enum enum_server_command command,
1397 const char *format, va_list args)
1398 {
1399 size_t message_buff_len= 0;
1400 char message_buff[MAX_LOG_BUFFER_SIZE];
1401
1402 /* prepare message */
1403 if (format)
1404 message_buff_len= my_vsnprintf(message_buff, sizeof(message_buff),
1405 format, args);
1406 else
1407 message_buff[0]= '\0';
1408
1409 return general_log_write(thd, command, message_buff, message_buff_len);
1410 }
1411
init_error_log(ulonglong error_log_printer)1412 void LOGGER::init_error_log(ulonglong error_log_printer)
1413 {
1414 if (error_log_printer & LOG_NONE)
1415 {
1416 error_log_handler_list[0]= 0;
1417 return;
1418 }
1419
1420 switch (error_log_printer) {
1421 case LOG_FILE:
1422 error_log_handler_list[0]= file_log_handler;
1423 error_log_handler_list[1]= 0;
1424 break;
1425 /* these two are disabled for now */
1426 case LOG_TABLE:
1427 DBUG_ASSERT(0);
1428 break;
1429 case LOG_TABLE|LOG_FILE:
1430 DBUG_ASSERT(0);
1431 break;
1432 }
1433 }
1434
init_slow_log(ulonglong slow_log_printer)1435 void LOGGER::init_slow_log(ulonglong slow_log_printer)
1436 {
1437 if (slow_log_printer & LOG_NONE)
1438 {
1439 slow_log_handler_list[0]= 0;
1440 return;
1441 }
1442
1443 switch (slow_log_printer) {
1444 case LOG_FILE:
1445 slow_log_handler_list[0]= file_log_handler;
1446 slow_log_handler_list[1]= 0;
1447 break;
1448 case LOG_TABLE:
1449 slow_log_handler_list[0]= table_log_handler;
1450 slow_log_handler_list[1]= 0;
1451 break;
1452 case LOG_TABLE|LOG_FILE:
1453 slow_log_handler_list[0]= file_log_handler;
1454 slow_log_handler_list[1]= table_log_handler;
1455 slow_log_handler_list[2]= 0;
1456 break;
1457 }
1458 }
1459
init_general_log(ulonglong general_log_printer)1460 void LOGGER::init_general_log(ulonglong general_log_printer)
1461 {
1462 if (general_log_printer & LOG_NONE)
1463 {
1464 general_log_handler_list[0]= 0;
1465 return;
1466 }
1467
1468 switch (general_log_printer) {
1469 case LOG_FILE:
1470 general_log_handler_list[0]= file_log_handler;
1471 general_log_handler_list[1]= 0;
1472 break;
1473 case LOG_TABLE:
1474 general_log_handler_list[0]= table_log_handler;
1475 general_log_handler_list[1]= 0;
1476 break;
1477 case LOG_TABLE|LOG_FILE:
1478 general_log_handler_list[0]= file_log_handler;
1479 general_log_handler_list[1]= table_log_handler;
1480 general_log_handler_list[2]= 0;
1481 break;
1482 }
1483 }
1484
1485
activate_log_handler(THD * thd,uint log_type)1486 bool LOGGER::activate_log_handler(THD* thd, uint log_type)
1487 {
1488 MYSQL_QUERY_LOG *file_log;
1489 bool res= FALSE;
1490 lock_exclusive();
1491 switch (log_type) {
1492 case QUERY_LOG_SLOW:
1493 if (!global_system_variables.sql_log_slow)
1494 {
1495 file_log= file_log_handler->get_mysql_slow_log();
1496
1497 file_log->open_slow_log(opt_slow_logname);
1498 if (table_log_handler->activate_log(thd, QUERY_LOG_SLOW))
1499 {
1500 /* Error printed by open table in activate_log() */
1501 res= TRUE;
1502 file_log->close(0);
1503 }
1504 else
1505 {
1506 init_slow_log(log_output_options);
1507 global_system_variables.sql_log_slow= TRUE;
1508 }
1509 }
1510 break;
1511 case QUERY_LOG_GENERAL:
1512 if (!opt_log)
1513 {
1514 file_log= file_log_handler->get_mysql_log();
1515
1516 file_log->open_query_log(opt_logname);
1517 if (table_log_handler->activate_log(thd, QUERY_LOG_GENERAL))
1518 {
1519 /* Error printed by open table in activate_log() */
1520 res= TRUE;
1521 file_log->close(0);
1522 }
1523 else
1524 {
1525 init_general_log(log_output_options);
1526 opt_log= TRUE;
1527 }
1528 }
1529 break;
1530 default:
1531 DBUG_ASSERT(0);
1532 }
1533 unlock();
1534 return res;
1535 }
1536
1537
deactivate_log_handler(THD * thd,uint log_type)1538 void LOGGER::deactivate_log_handler(THD *thd, uint log_type)
1539 {
1540 my_bool *tmp_opt= 0;
1541 MYSQL_LOG *UNINIT_VAR(file_log);
1542
1543 switch (log_type) {
1544 case QUERY_LOG_SLOW:
1545 tmp_opt= &global_system_variables.sql_log_slow;
1546 file_log= file_log_handler->get_mysql_slow_log();
1547 break;
1548 case QUERY_LOG_GENERAL:
1549 tmp_opt= &opt_log;
1550 file_log= file_log_handler->get_mysql_log();
1551 break;
1552 default:
1553 MY_ASSERT_UNREACHABLE();
1554 }
1555
1556 if (!(*tmp_opt))
1557 return;
1558
1559 lock_exclusive();
1560 file_log->close(0);
1561 *tmp_opt= FALSE;
1562 unlock();
1563 }
1564
1565
1566 /* the parameters are unused for the log tables */
init()1567 bool Log_to_csv_event_handler::init()
1568 {
1569 return 0;
1570 }
1571
set_handlers(ulonglong error_log_printer,ulonglong slow_log_printer,ulonglong general_log_printer)1572 int LOGGER::set_handlers(ulonglong error_log_printer,
1573 ulonglong slow_log_printer,
1574 ulonglong general_log_printer)
1575 {
1576 /* error log table is not supported yet */
1577 DBUG_ASSERT(error_log_printer < LOG_TABLE);
1578
1579 lock_exclusive();
1580
1581 if ((slow_log_printer & LOG_TABLE || general_log_printer & LOG_TABLE) &&
1582 !is_log_tables_initialized)
1583 {
1584 slow_log_printer= (slow_log_printer & ~LOG_TABLE) | LOG_FILE;
1585 general_log_printer= (general_log_printer & ~LOG_TABLE) | LOG_FILE;
1586
1587 sql_print_error("Failed to initialize log tables. "
1588 "Falling back to the old-fashioned logs");
1589 }
1590
1591 init_error_log(error_log_printer);
1592 init_slow_log(slow_log_printer);
1593 init_general_log(general_log_printer);
1594
1595 unlock();
1596
1597 return 0;
1598 }
1599
1600 /*
1601 Save position of binary log transaction cache.
1602
1603 SYNPOSIS
1604 binlog_trans_log_savepos()
1605
1606 thd The thread to take the binlog data from
1607 pos Pointer to variable where the position will be stored
1608
1609 DESCRIPTION
1610
1611 Save the current position in the binary log transaction cache into
1612 the variable pointed to by 'pos'
1613 */
1614
1615 static void
binlog_trans_log_savepos(THD * thd,my_off_t * pos)1616 binlog_trans_log_savepos(THD *thd, my_off_t *pos)
1617 {
1618 DBUG_ENTER("binlog_trans_log_savepos");
1619 DBUG_ASSERT(pos != NULL);
1620 binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
1621 DBUG_ASSERT((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open());
1622 *pos= cache_mngr->trx_cache.get_byte_position();
1623 DBUG_PRINT("return", ("*pos: %lu", (ulong) *pos));
1624 DBUG_VOID_RETURN;
1625 }
1626
1627
1628 /*
1629 Truncate the binary log transaction cache.
1630
1631 SYNPOSIS
1632 binlog_trans_log_truncate()
1633
1634 thd The thread to take the binlog data from
1635 pos Position to truncate to
1636
1637 DESCRIPTION
1638
1639 Truncate the binary log to the given position. Will not change
1640 anything else.
1641
1642 */
1643 static void
binlog_trans_log_truncate(THD * thd,my_off_t pos)1644 binlog_trans_log_truncate(THD *thd, my_off_t pos)
1645 {
1646 DBUG_ENTER("binlog_trans_log_truncate");
1647 DBUG_PRINT("enter", ("pos: %lu", (ulong) pos));
1648
1649 DBUG_ASSERT(thd_get_ha_data(thd, binlog_hton) != NULL);
1650 /* Only true if binlog_trans_log_savepos() wasn't called before */
1651 DBUG_ASSERT(pos != ~(my_off_t) 0);
1652
1653 binlog_cache_mngr *const cache_mngr=
1654 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
1655 cache_mngr->trx_cache.restore_savepoint(pos);
1656 DBUG_VOID_RETURN;
1657 }
1658
1659
1660 /*
1661 this function is mostly a placeholder.
1662 conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open)
1663 should be moved here.
1664 */
1665
binlog_init(void * p)1666 int binlog_init(void *p)
1667 {
1668 binlog_hton= (handlerton *)p;
1669 binlog_hton->state= (WSREP_ON || opt_bin_log) ? SHOW_OPTION_YES
1670 : SHOW_OPTION_NO;
1671 binlog_hton->db_type=DB_TYPE_BINLOG;
1672 binlog_hton->savepoint_offset= sizeof(my_off_t);
1673 binlog_hton->close_connection= binlog_close_connection;
1674 binlog_hton->savepoint_set= binlog_savepoint_set;
1675 binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
1676 binlog_hton->savepoint_rollback_can_release_mdl=
1677 binlog_savepoint_rollback_can_release_mdl;
1678 binlog_hton->commit= binlog_commit;
1679 binlog_hton->rollback= binlog_rollback;
1680 binlog_hton->prepare= binlog_prepare;
1681 binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot;
1682 binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
1683 return 0;
1684 }
1685
1686 #ifdef WITH_WSREP
1687 #include "wsrep_binlog.h"
1688 #endif /* WITH_WSREP */
binlog_close_connection(handlerton * hton,THD * thd)1689 static int binlog_close_connection(handlerton *hton, THD *thd)
1690 {
1691 DBUG_ENTER("binlog_close_connection");
1692 binlog_cache_mngr *const cache_mngr=
1693 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
1694 #ifdef WITH_WSREP
1695 if (WSREP(thd) && cache_mngr && !cache_mngr->trx_cache.empty()) {
1696 IO_CACHE* cache= cache_mngr->get_binlog_cache_log(true);
1697 uchar *buf;
1698 size_t len=0;
1699 wsrep_write_cache_buf(cache, &buf, &len);
1700 WSREP_WARN("binlog trx cache not empty (%zu bytes) @ connection close %lld",
1701 len, (longlong) thd->thread_id);
1702 if (len > 0) wsrep_dump_rbr_buf(thd, buf, len);
1703
1704 cache = cache_mngr->get_binlog_cache_log(false);
1705 wsrep_write_cache_buf(cache, &buf, &len);
1706 WSREP_WARN("binlog stmt cache not empty (%zu bytes) @ connection close %lld",
1707 len, (longlong) thd->thread_id);
1708 if (len > 0) wsrep_dump_rbr_buf(thd, buf, len);
1709 }
1710 #endif /* WITH_WSREP */
1711 DBUG_ASSERT(cache_mngr->trx_cache.empty() && cache_mngr->stmt_cache.empty());
1712 thd_set_ha_data(thd, binlog_hton, NULL);
1713 cache_mngr->~binlog_cache_mngr();
1714 my_free(cache_mngr);
1715 DBUG_RETURN(0);
1716 }
1717
1718 /*
1719 This function flushes a cache upon commit/rollback.
1720
1721 SYNOPSIS
1722 binlog_flush_cache()
1723
1724 thd The thread whose transaction should be ended
1725 cache_mngr Pointer to the binlog_cache_mngr to use
1726 all True if the entire transaction should be ended, false if
1727 only the statement transaction should be ended.
1728 end_ev The end event to use (COMMIT, ROLLBACK, or commit XID)
1729 using_stmt True if the statement cache should be flushed
1730 using_trx True if the transaction cache should be flushed
1731
1732 DESCRIPTION
1733
1734 End the currently transaction or statement. The transaction can be either
1735 a real transaction or a statement transaction.
1736
1737 This can be to commit a transaction, with a COMMIT query event or an XA
1738 commit XID event. But it can also be to rollback a transaction with a
1739 ROLLBACK query event, used for rolling back transactions which also
1740 contain updates to non-transactional tables. Or it can be a flush of
1741 a statement cache.
1742 */
1743
1744 static int
binlog_flush_cache(THD * thd,binlog_cache_mngr * cache_mngr,Log_event * end_ev,bool all,bool using_stmt,bool using_trx)1745 binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr,
1746 Log_event *end_ev, bool all, bool using_stmt,
1747 bool using_trx)
1748 {
1749 int error= 0;
1750 DBUG_ENTER("binlog_flush_cache");
1751 DBUG_PRINT("enter", ("end_ev: %p", end_ev));
1752
1753 if ((using_stmt && !cache_mngr->stmt_cache.empty()) ||
1754 (using_trx && !cache_mngr->trx_cache.empty()))
1755 {
1756 if (using_stmt && thd->binlog_flush_pending_rows_event(TRUE, FALSE))
1757 DBUG_RETURN(1);
1758 if (using_trx && thd->binlog_flush_pending_rows_event(TRUE, TRUE))
1759 DBUG_RETURN(1);
1760
1761 /*
1762 Doing a commit or a rollback including non-transactional tables,
1763 i.e., ending a transaction where we might write the transaction
1764 cache to the binary log.
1765
1766 We can always end the statement when ending a transaction since
1767 transactions are not allowed inside stored functions. If they
1768 were, we would have to ensure that we're not ending a statement
1769 inside a stored function.
1770 */
1771 error= mysql_bin_log.write_transaction_to_binlog(thd, cache_mngr,
1772 end_ev, all,
1773 using_stmt, using_trx);
1774 }
1775 else
1776 {
1777 /*
1778 This can happen in row-format binlog with something like
1779 BEGIN; INSERT INTO nontrans_table; INSERT IGNORE INTO trans_table;
1780 The nontrans_table is written directly into the binlog before commit,
1781 and if the trans_table is ignored there will be no rows to write when
1782 we get here.
1783
1784 So there is no work to do. Therefore, we will not increment any XID
1785 count, so we must not decrement any XID count in unlog().
1786 */
1787 cache_mngr->need_unlog= 0;
1788 }
1789 cache_mngr->reset(using_stmt, using_trx);
1790
1791 DBUG_ASSERT((!using_stmt || cache_mngr->stmt_cache.empty()) &&
1792 (!using_trx || cache_mngr->trx_cache.empty()));
1793 DBUG_RETURN(error);
1794 }
1795
1796
1797 /**
1798 This function flushes the stmt-cache upon commit.
1799
1800 @param thd The thread whose transaction should be flushed
1801 @param cache_mngr Pointer to the cache manager
1802
1803 @return
1804 nonzero if an error pops up when flushing the cache.
1805 */
1806 static inline int
binlog_commit_flush_stmt_cache(THD * thd,bool all,binlog_cache_mngr * cache_mngr)1807 binlog_commit_flush_stmt_cache(THD *thd, bool all,
1808 binlog_cache_mngr *cache_mngr)
1809 {
1810 DBUG_ENTER("binlog_commit_flush_stmt_cache");
1811 #ifdef WITH_WSREP
1812 if (thd->wsrep_mysql_replicated > 0)
1813 {
1814 DBUG_ASSERT(WSREP(thd));
1815 WSREP_DEBUG("avoiding binlog_commit_flush_trx_cache: %d",
1816 thd->wsrep_mysql_replicated);
1817 return 0;
1818 }
1819 #endif
1820
1821 Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
1822 FALSE, TRUE, TRUE, 0);
1823 DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, FALSE));
1824 }
1825
1826 /**
1827 This function flushes the trx-cache upon commit.
1828
1829 @param thd The thread whose transaction should be flushed
1830 @param cache_mngr Pointer to the cache manager
1831
1832 @return
1833 nonzero if an error pops up when flushing the cache.
1834 */
1835 static inline int
binlog_commit_flush_trx_cache(THD * thd,bool all,binlog_cache_mngr * cache_mngr)1836 binlog_commit_flush_trx_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr)
1837 {
1838 DBUG_ENTER("binlog_commit_flush_trx_cache");
1839 Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
1840 TRUE, TRUE, TRUE, 0);
1841 DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE));
1842 }
1843
1844 /**
1845 This function flushes the trx-cache upon rollback.
1846
1847 @param thd The thread whose transaction should be flushed
1848 @param cache_mngr Pointer to the cache manager
1849
1850 @return
1851 nonzero if an error pops up when flushing the cache.
1852 */
1853 static inline int
binlog_rollback_flush_trx_cache(THD * thd,bool all,binlog_cache_mngr * cache_mngr)1854 binlog_rollback_flush_trx_cache(THD *thd, bool all,
1855 binlog_cache_mngr *cache_mngr)
1856 {
1857 Query_log_event end_evt(thd, STRING_WITH_LEN("ROLLBACK"),
1858 TRUE, TRUE, TRUE, 0);
1859 return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE));
1860 }
1861
1862 /**
1863 This function flushes the trx-cache upon commit.
1864
1865 @param thd The thread whose transaction should be flushed
1866 @param cache_mngr Pointer to the cache manager
1867 @param xid Transaction Id
1868
1869 @return
1870 nonzero if an error pops up when flushing the cache.
1871 */
1872 static inline int
binlog_commit_flush_xid_caches(THD * thd,binlog_cache_mngr * cache_mngr,bool all,my_xid xid)1873 binlog_commit_flush_xid_caches(THD *thd, binlog_cache_mngr *cache_mngr,
1874 bool all, my_xid xid)
1875 {
1876 if (xid)
1877 {
1878 Xid_log_event end_evt(thd, xid, TRUE);
1879 return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE));
1880 }
1881 else
1882 {
1883 /*
1884 Empty xid occurs in XA COMMIT ... ONE PHASE.
1885 In this case, we do not have a MySQL xid for the transaction, and the
1886 external XA transaction coordinator will have to handle recovery if
1887 needed. So we end the transaction with a plain COMMIT query event.
1888 */
1889 Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
1890 TRUE, TRUE, TRUE, 0);
1891 return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE));
1892 }
1893 }
1894
1895 /**
1896 This function truncates the transactional cache upon committing or rolling
1897 back either a transaction or a statement.
1898
1899 @param thd The thread whose transaction should be flushed
1900 @param cache_mngr Pointer to the cache data to be flushed
1901 @param all @c true means truncate the transaction, otherwise the
1902 statement must be truncated.
1903
1904 @return
1905 nonzero if an error pops up when truncating the transactional cache.
1906 */
1907 static int
binlog_truncate_trx_cache(THD * thd,binlog_cache_mngr * cache_mngr,bool all)1908 binlog_truncate_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr, bool all)
1909 {
1910 DBUG_ENTER("binlog_truncate_trx_cache");
1911 int error=0;
1912 /*
1913 This function handles transactional changes and as such this flag
1914 equals to true.
1915 */
1916 bool const is_transactional= TRUE;
1917
1918 DBUG_PRINT("info", ("thd->options={ %s %s}, transaction: %s",
1919 FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
1920 FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
1921 all ? "all" : "stmt"));
1922
1923 thd->binlog_remove_pending_rows_event(TRUE, is_transactional);
1924 /*
1925 If rolling back an entire transaction or a single statement not
1926 inside a transaction, we reset the transaction cache.
1927 */
1928 if (ending_trans(thd, all))
1929 {
1930 if (cache_mngr->trx_cache.has_incident())
1931 error= mysql_bin_log.write_incident(thd);
1932
1933 thd->clear_binlog_table_maps();
1934
1935 cache_mngr->reset(false, true);
1936 }
1937 /*
1938 If rolling back a statement in a transaction, we truncate the
1939 transaction cache to remove the statement.
1940 */
1941 else
1942 cache_mngr->trx_cache.restore_prev_position();
1943
1944 DBUG_ASSERT(thd->binlog_get_pending_rows_event(is_transactional) == NULL);
1945 DBUG_RETURN(error);
1946 }
1947
binlog_prepare(handlerton * hton,THD * thd,bool all)1948 static int binlog_prepare(handlerton *hton, THD *thd, bool all)
1949 {
1950 /*
1951 do nothing.
1952 just pretend we can do 2pc, so that MySQL won't
1953 switch to 1pc.
1954 real work will be done in MYSQL_BIN_LOG::log_and_order()
1955 */
1956 return 0;
1957 }
1958
1959 /*
1960 We flush the cache wrapped in a beging/rollback if:
1961 . aborting a single or multi-statement transaction and;
1962 . the OPTION_KEEP_LOG is active or;
1963 . the format is STMT and a non-trans table was updated or;
1964 . the format is MIXED and a temporary non-trans table was
1965 updated or;
1966 . the format is MIXED, non-trans table was updated and
1967 aborting a single statement transaction;
1968 */
trans_cannot_safely_rollback(THD * thd,bool all)1969 static bool trans_cannot_safely_rollback(THD *thd, bool all)
1970 {
1971 DBUG_ASSERT(ending_trans(thd, all));
1972
1973 return ((thd->variables.option_bits & OPTION_KEEP_LOG) ||
1974 (trans_has_updated_non_trans_table(thd) &&
1975 thd->wsrep_binlog_format() == BINLOG_FORMAT_STMT) ||
1976 (thd->transaction.all.has_modified_non_trans_temp_table() &&
1977 thd->wsrep_binlog_format() == BINLOG_FORMAT_MIXED) ||
1978 (trans_has_updated_non_trans_table(thd) &&
1979 ending_single_stmt_trans(thd,all) &&
1980 thd->wsrep_binlog_format() == BINLOG_FORMAT_MIXED));
1981 }
1982
1983
1984 /**
1985 This function is called once after each statement.
1986
1987 It has the responsibility to flush the caches to the binary log on commits.
1988
1989 @param hton The binlog handlerton.
1990 @param thd The client thread that executes the transaction.
1991 @param all This is @c true if this is a real transaction commit, and
1992 @false otherwise.
1993
1994 @see handlerton::commit
1995 */
binlog_commit(handlerton * hton,THD * thd,bool all)1996 static int binlog_commit(handlerton *hton, THD *thd, bool all)
1997 {
1998 int error= 0;
1999 PSI_stage_info org_stage;
2000 DBUG_ENTER("binlog_commit");
2001
2002 binlog_cache_mngr *const cache_mngr=
2003 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
2004
2005 if (!cache_mngr)
2006 {
2007 DBUG_ASSERT(WSREP(thd));
2008 DBUG_RETURN(0);
2009 }
2010
2011 DBUG_PRINT("debug",
2012 ("all: %d, in_transaction: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
2013 all,
2014 YESNO(thd->in_multi_stmt_transaction_mode()),
2015 YESNO(thd->transaction.all.modified_non_trans_table),
2016 YESNO(thd->transaction.stmt.modified_non_trans_table)));
2017
2018
2019 thd->backup_stage(&org_stage);
2020 THD_STAGE_INFO(thd, stage_binlog_write);
2021 if (!cache_mngr->stmt_cache.empty())
2022 {
2023 error= binlog_commit_flush_stmt_cache(thd, all, cache_mngr);
2024 }
2025
2026 if (cache_mngr->trx_cache.empty())
2027 {
2028 /*
2029 we're here because cache_log was flushed in MYSQL_BIN_LOG::log_xid()
2030 */
2031 cache_mngr->reset(false, true);
2032 THD_STAGE_INFO(thd, org_stage);
2033 DBUG_RETURN(error);
2034 }
2035
2036 /*
2037 We commit the transaction if:
2038 - We are not in a transaction and committing a statement, or
2039 - We are in a transaction and a full transaction is committed.
2040 Otherwise, we accumulate the changes.
2041 */
2042 if (likely(!error) && ending_trans(thd, all))
2043 error= binlog_commit_flush_trx_cache(thd, all, cache_mngr);
2044
2045 /*
2046 This is part of the stmt rollback.
2047 */
2048 if (!all)
2049 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
2050
2051 THD_STAGE_INFO(thd, org_stage);
2052 DBUG_RETURN(error);
2053 }
2054
2055 /**
2056 This function is called when a transaction or a statement is rolled back.
2057
2058 @param hton The binlog handlerton.
2059 @param thd The client thread that executes the transaction.
2060 @param all This is @c true if this is a real transaction rollback, and
2061 @false otherwise.
2062
2063 @see handlerton::rollback
2064 */
binlog_rollback(handlerton * hton,THD * thd,bool all)2065 static int binlog_rollback(handlerton *hton, THD *thd, bool all)
2066 {
2067 DBUG_ENTER("binlog_rollback");
2068 int error= 0;
2069 binlog_cache_mngr *const cache_mngr=
2070 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
2071
2072 if (!cache_mngr)
2073 {
2074 DBUG_ASSERT(WSREP(thd));
2075 DBUG_RETURN(0);
2076 }
2077
2078 DBUG_PRINT("debug", ("all: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
2079 YESNO(all),
2080 YESNO(thd->transaction.all.modified_non_trans_table),
2081 YESNO(thd->transaction.stmt.modified_non_trans_table)));
2082
2083 /*
2084 If an incident event is set we do not flush the content of the statement
2085 cache because it may be corrupted.
2086 */
2087 if (cache_mngr->stmt_cache.has_incident())
2088 {
2089 error= mysql_bin_log.write_incident(thd);
2090 cache_mngr->reset(true, false);
2091 }
2092 else if (!cache_mngr->stmt_cache.empty())
2093 {
2094 error= binlog_commit_flush_stmt_cache(thd, all, cache_mngr);
2095 }
2096
2097 if (cache_mngr->trx_cache.empty())
2098 {
2099 /*
2100 we're here because cache_log was flushed in MYSQL_BIN_LOG::log_xid()
2101 */
2102 cache_mngr->reset(false, true);
2103 DBUG_RETURN(error);
2104 }
2105 if (!wsrep_emulate_bin_log && mysql_bin_log.check_write_error(thd))
2106 {
2107 /*
2108 "all == true" means that a "rollback statement" triggered the error and
2109 this function was called. However, this must not happen as a rollback
2110 is written directly to the binary log. And in auto-commit mode, a single
2111 statement that is rolled back has the flag all == false.
2112 */
2113 DBUG_ASSERT(!all);
2114 /*
2115 We reach this point if the effect of a statement did not properly get into
2116 a cache and need to be rolled back.
2117 */
2118 error |= binlog_truncate_trx_cache(thd, cache_mngr, all);
2119 }
2120 else if (likely(!error))
2121 {
2122 if (ending_trans(thd, all) && trans_cannot_safely_rollback(thd, all))
2123 error= binlog_rollback_flush_trx_cache(thd, all, cache_mngr);
2124 /*
2125 Truncate the cache if:
2126 . aborting a single or multi-statement transaction or;
2127 . the current statement created or dropped a temporary table
2128 while having actual STATEMENT format;
2129 . the format is not STMT or no non-trans table was
2130 updated and;
2131 . the format is not MIXED or no temporary non-trans table
2132 was updated.
2133 */
2134 else if (ending_trans(thd, all) ||
2135 (!(thd->transaction.stmt.has_created_dropped_temp_table() &&
2136 !thd->is_current_stmt_binlog_format_row()) &&
2137 (!stmt_has_updated_non_trans_table(thd) ||
2138 thd->wsrep_binlog_format() != BINLOG_FORMAT_STMT) &&
2139 (!thd->transaction.stmt.has_modified_non_trans_temp_table() ||
2140 thd->wsrep_binlog_format() != BINLOG_FORMAT_MIXED)))
2141 error= binlog_truncate_trx_cache(thd, cache_mngr, all);
2142 }
2143
2144 /*
2145 This is part of the stmt rollback.
2146 */
2147 if (!all)
2148 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
2149
2150 DBUG_RETURN(error);
2151 }
2152
2153
binlog_reset_cache(THD * thd)2154 void binlog_reset_cache(THD *thd)
2155 {
2156 binlog_cache_mngr *const cache_mngr= opt_bin_log ?
2157 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton) : 0;
2158 DBUG_ENTER("binlog_reset_cache");
2159 if (cache_mngr)
2160 {
2161 thd->binlog_remove_pending_rows_event(TRUE, TRUE);
2162 cache_mngr->reset(true, true);
2163 }
2164 DBUG_VOID_RETURN;
2165 }
2166
2167
set_write_error(THD * thd,bool is_transactional)2168 void MYSQL_BIN_LOG::set_write_error(THD *thd, bool is_transactional)
2169 {
2170 DBUG_ENTER("MYSQL_BIN_LOG::set_write_error");
2171
2172 write_error= 1;
2173
2174 if (unlikely(check_write_error(thd)))
2175 DBUG_VOID_RETURN;
2176
2177 if (my_errno == EFBIG)
2178 {
2179 if (is_transactional)
2180 {
2181 my_message(ER_TRANS_CACHE_FULL, ER_THD(thd, ER_TRANS_CACHE_FULL), MYF(0));
2182 }
2183 else
2184 {
2185 my_message(ER_STMT_CACHE_FULL, ER_THD(thd, ER_STMT_CACHE_FULL), MYF(0));
2186 }
2187 }
2188 else
2189 {
2190 my_error(ER_ERROR_ON_WRITE, MYF(0), name, errno);
2191 }
2192 #ifdef WITH_WSREP
2193 /* If wsrep transaction is active and binlog emulation is on,
2194 binlog write error may leave transaction without any registered
2195 htons. This makes wsrep rollback hooks to be skipped and the
2196 transaction will remain alive in wsrep world after rollback.
2197 Register binlog hton here to ensure that rollback happens in full. */
2198 if (WSREP_EMULATE_BINLOG(thd))
2199 {
2200 if (is_transactional)
2201 trans_register_ha(thd, TRUE, binlog_hton);
2202 trans_register_ha(thd, FALSE, binlog_hton);
2203 }
2204 #endif /* WITH_WSREP */
2205 DBUG_VOID_RETURN;
2206 }
2207
check_write_error(THD * thd)2208 bool MYSQL_BIN_LOG::check_write_error(THD *thd)
2209 {
2210 DBUG_ENTER("MYSQL_BIN_LOG::check_write_error");
2211
2212 bool checked= FALSE;
2213
2214 if (likely(!thd->is_error()))
2215 DBUG_RETURN(checked);
2216
2217 switch (thd->get_stmt_da()->sql_errno())
2218 {
2219 case ER_TRANS_CACHE_FULL:
2220 case ER_STMT_CACHE_FULL:
2221 case ER_ERROR_ON_WRITE:
2222 case ER_BINLOG_LOGGING_IMPOSSIBLE:
2223 checked= TRUE;
2224 break;
2225 }
2226
2227 DBUG_RETURN(checked);
2228 }
2229
2230
2231 /**
2232 @note
2233 How do we handle this (unlikely but legal) case:
2234 @verbatim
2235 [transaction] + [update to non-trans table] + [rollback to savepoint] ?
2236 @endverbatim
2237 The problem occurs when a savepoint is before the update to the
2238 non-transactional table. Then when there's a rollback to the savepoint, if we
2239 simply truncate the binlog cache, we lose the part of the binlog cache where
2240 the update is. If we want to not lose it, we need to write the SAVEPOINT
2241 command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
2242 is easy: it's just write at the end of the binlog cache, but the former
2243 should be *inserted* to the place where the user called SAVEPOINT. The
2244 solution is that when the user calls SAVEPOINT, we write it to the binlog
2245 cache (so no need to later insert it). As transactions are never intermixed
2246 in the binary log (i.e. they are serialized), we won't have conflicts with
2247 savepoint names when using mysqlbinlog or in the slave SQL thread.
2248 Then when ROLLBACK TO SAVEPOINT is called, if we updated some
2249 non-transactional table, we don't truncate the binlog cache but instead write
2250 ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
2251 will chop the SAVEPOINT command from the binlog cache, which is good as in
2252 that case there is no need to have it in the binlog).
2253 */
2254
binlog_savepoint_set(handlerton * hton,THD * thd,void * sv)2255 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
2256 {
2257 int error= 1;
2258 DBUG_ENTER("binlog_savepoint_set");
2259
2260 char buf[1024];
2261
2262 String log_query(buf, sizeof(buf), &my_charset_bin);
2263 if (log_query.copy(STRING_WITH_LEN("SAVEPOINT "), &my_charset_bin) ||
2264 append_identifier(thd, &log_query, &thd->lex->ident))
2265 DBUG_RETURN(1);
2266 int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
2267 Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
2268 TRUE, FALSE, TRUE, errcode);
2269 /*
2270 We cannot record the position before writing the statement
2271 because a rollback to a savepoint (.e.g. consider it "S") would
2272 prevent the savepoint statement (i.e. "SAVEPOINT S") from being
2273 written to the binary log despite the fact that the server could
2274 still issue other rollback statements to the same savepoint (i.e.
2275 "S").
2276 Given that the savepoint is valid until the server releases it,
2277 ie, until the transaction commits or it is released explicitly,
2278 we need to log it anyway so that we don't have "ROLLBACK TO S"
2279 or "RELEASE S" without the preceding "SAVEPOINT S" in the binary
2280 log.
2281 */
2282 if (likely(!(error= mysql_bin_log.write(&qinfo))))
2283 binlog_trans_log_savepos(thd, (my_off_t*) sv);
2284
2285 DBUG_RETURN(error);
2286 }
2287
binlog_savepoint_rollback(handlerton * hton,THD * thd,void * sv)2288 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
2289 {
2290 DBUG_ENTER("binlog_savepoint_rollback");
2291
2292 /*
2293 Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
2294 non-transactional table. Otherwise, truncate the binlog cache starting
2295 from the SAVEPOINT command.
2296 */
2297 #ifdef WITH_WSREP
2298 /* for streaming replication, we must replicate savepoint rollback so that
2299 slaves can maintain SR transactions
2300 */
2301 if (unlikely(thd->wsrep_trx().is_streaming() ||
2302 (trans_has_updated_non_trans_table(thd)) ||
2303 (thd->variables.option_bits & OPTION_KEEP_LOG)))
2304 #else
2305 if (unlikely(trans_has_updated_non_trans_table(thd) ||
2306 (thd->variables.option_bits & OPTION_KEEP_LOG)))
2307 #endif /* WITH_WSREP */
2308 {
2309 char buf[1024];
2310 String log_query(buf, sizeof(buf), &my_charset_bin);
2311 if (log_query.copy(STRING_WITH_LEN("ROLLBACK TO "), &my_charset_bin) ||
2312 append_identifier(thd, &log_query, &thd->lex->ident))
2313 DBUG_RETURN(1);
2314 int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
2315 Query_log_event qinfo(thd, log_query.ptr(), log_query.length(),
2316 TRUE, FALSE, TRUE, errcode);
2317 DBUG_RETURN(mysql_bin_log.write(&qinfo));
2318 }
2319
2320 binlog_trans_log_truncate(thd, *(my_off_t*)sv);
2321
2322 /*
2323 When a SAVEPOINT is executed inside a stored function/trigger we force the
2324 pending event to be flushed with a STMT_END_F flag and clear the table maps
2325 as well to ensure that following DMLs will have a clean state to start
2326 with. ROLLBACK inside a stored routine has to finalize possibly existing
2327 current row-based pending event with cleaning up table maps. That ensures
2328 that following DMLs will have a clean state to start with.
2329 */
2330 if (thd->in_sub_stmt)
2331 thd->clear_binlog_table_maps();
2332
2333 DBUG_RETURN(0);
2334 }
2335
2336
2337 /**
2338 Check whether binlog state allows to safely release MDL locks after
2339 rollback to savepoint.
2340
2341 @param hton The binlog handlerton.
2342 @param thd The client thread that executes the transaction.
2343
2344 @return true - It is safe to release MDL locks.
2345 false - If it is not.
2346 */
binlog_savepoint_rollback_can_release_mdl(handlerton * hton,THD * thd)2347 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
2348 THD *thd)
2349 {
2350 DBUG_ENTER("binlog_savepoint_rollback_can_release_mdl");
2351 /*
2352 If we have not updated any non-transactional tables rollback
2353 to savepoint will simply truncate binlog cache starting from
2354 SAVEPOINT command. So it should be safe to release MDL acquired
2355 after SAVEPOINT command in this case.
2356 */
2357 DBUG_RETURN(!trans_cannot_safely_rollback(thd, true));
2358 }
2359
2360
check_binlog_magic(IO_CACHE * log,const char ** errmsg)2361 int check_binlog_magic(IO_CACHE* log, const char** errmsg)
2362 {
2363 uchar magic[4];
2364 DBUG_ASSERT(my_b_tell(log) == 0);
2365
2366 if (my_b_read(log, magic, sizeof(magic)))
2367 {
2368 *errmsg = "I/O error reading the header from the binary log";
2369 sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno,
2370 log->error);
2371 return 1;
2372 }
2373 if (bcmp(magic, BINLOG_MAGIC, sizeof(magic)))
2374 {
2375 *errmsg = "Binlog has bad magic number; It's not a binary log file that can be used by this version of MySQL";
2376 return 1;
2377 }
2378 return 0;
2379 }
2380
2381
open_binlog(IO_CACHE * log,const char * log_file_name,const char ** errmsg)2382 File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg)
2383 {
2384 File file;
2385 DBUG_ENTER("open_binlog");
2386
2387 if ((file= mysql_file_open(key_file_binlog,
2388 log_file_name, O_RDONLY | O_BINARY | O_SHARE,
2389 MYF(MY_WME))) < 0)
2390 {
2391 sql_print_error("Failed to open log (file '%s', errno %d)",
2392 log_file_name, my_errno);
2393 *errmsg = "Could not open log file";
2394 goto err;
2395 }
2396 if (init_io_cache(log, file, (size_t)binlog_file_cache_size, READ_CACHE, 0, 0,
2397 MYF(MY_WME|MY_DONT_CHECK_FILESIZE)))
2398 {
2399 sql_print_error("Failed to create a cache on log (file '%s')",
2400 log_file_name);
2401 *errmsg = "Could not open log file";
2402 goto err;
2403 }
2404 if (check_binlog_magic(log,errmsg))
2405 goto err;
2406 DBUG_RETURN(file);
2407
2408 err:
2409 if (file >= 0)
2410 {
2411 mysql_file_close(file, MYF(0));
2412 end_io_cache(log);
2413 }
2414 DBUG_RETURN(-1);
2415 }
2416
2417 #ifdef _WIN32
2418 static int eventSource = 0;
2419
setup_windows_event_source()2420 static void setup_windows_event_source()
2421 {
2422 HKEY hRegKey= NULL;
2423 DWORD dwError= 0;
2424 TCHAR szPath[MAX_PATH];
2425 DWORD dwTypes;
2426
2427 if (eventSource) // Ensure that we are only called once
2428 return;
2429 eventSource= 1;
2430
2431 // Create the event source registry key
2432 dwError= RegCreateKey(HKEY_LOCAL_MACHINE,
2433 "SYSTEM\\CurrentControlSet\\Services\\EventLog\\Application\\MariaDB",
2434 &hRegKey);
2435
2436 /* Name of the PE module that contains the message resource */
2437 GetModuleFileName(NULL, szPath, MAX_PATH);
2438
2439 /* Register EventMessageFile */
2440 dwError = RegSetValueEx(hRegKey, "EventMessageFile", 0, REG_EXPAND_SZ,
2441 (PBYTE) szPath, (DWORD) (strlen(szPath) + 1));
2442
2443 /* Register supported event types */
2444 dwTypes= (EVENTLOG_ERROR_TYPE | EVENTLOG_WARNING_TYPE |
2445 EVENTLOG_INFORMATION_TYPE);
2446 dwError= RegSetValueEx(hRegKey, "TypesSupported", 0, REG_DWORD,
2447 (LPBYTE) &dwTypes, sizeof dwTypes);
2448
2449 RegCloseKey(hRegKey);
2450 }
2451
2452 #endif /* _WIN32 */
2453
2454
2455 /**
2456 Find a unique filename for 'filename.#'.
2457
2458 Set '#' to the number next to the maximum found in the most
2459 recent log file extension.
2460
2461 This function will return nonzero if: (i) the generated name
2462 exceeds FN_REFLEN; (ii) if the number of extensions is exhausted;
2463 or (iii) some other error happened while examining the filesystem.
2464
2465 @param name Base name of file
2466 @param min_log_number_to_use minimum log number to choose. Set by
2467 CHANGE MASTER .. TO
2468 @param last_used_log_number If 0, find log number based on files.
2469 If not 0, then use *last_used_log_number +1
2470 Will be update to new generated number
2471 @return
2472 0 ok
2473 nonzero if not possible to get unique filename.
2474 */
2475
find_uniq_filename(char * name,ulong min_log_number_to_use,ulong * last_used_log_number)2476 static int find_uniq_filename(char *name, ulong min_log_number_to_use,
2477 ulong *last_used_log_number)
2478 {
2479 uint i;
2480 char buff[FN_REFLEN], ext_buf[FN_REFLEN];
2481 struct st_my_dir *dir_info;
2482 struct fileinfo *file_info;
2483 ulong max_found= 0, next= 0, number= 0;
2484 size_t buf_length, length;
2485 char *start, *end;
2486 int error= 0;
2487 DBUG_ENTER("find_uniq_filename");
2488
2489 length= dirname_part(buff, name, &buf_length);
2490 start= name + length;
2491 end= strend(start);
2492
2493 *end='.';
2494 length= (size_t) (end - start + 1);
2495
2496 /* The following matches the code for my_dir () below */
2497 DBUG_EXECUTE_IF("error_unique_log_filename",
2498 {
2499 strmov(end,".1");
2500 DBUG_RETURN(1);
2501 });
2502
2503 if (*last_used_log_number)
2504 max_found= *last_used_log_number;
2505 else
2506 {
2507 if (unlikely(!(dir_info= my_dir(buff, MYF(MY_DONT_SORT)))))
2508 { // This shouldn't happen
2509 strmov(end,".1"); // use name+1
2510 DBUG_RETURN(1);
2511 }
2512 file_info= dir_info->dir_entry;
2513 max_found= min_log_number_to_use ? min_log_number_to_use-1 : 0;
2514 for (i= dir_info->number_of_files ; i-- ; file_info++)
2515 {
2516 if (strncmp(file_info->name, start, length) == 0 &&
2517 test_if_number(file_info->name+length, &number,0))
2518 {
2519 set_if_bigger(max_found, number);
2520 }
2521 }
2522 my_dirend(dir_info);
2523 }
2524
2525 /* check if reached the maximum possible extension number */
2526 if (max_found >= MAX_LOG_UNIQUE_FN_EXT)
2527 {
2528 sql_print_error("Log filename extension number exhausted: %06lu. \
2529 Please fix this by archiving old logs and \
2530 updating the index files.", max_found);
2531 error= 1;
2532 goto end;
2533 }
2534
2535 next= max_found + 1;
2536 if (sprintf(ext_buf, "%06lu", next)<0)
2537 {
2538 error= 1;
2539 goto end;
2540 }
2541 *end++='.';
2542
2543 /*
2544 Check if the generated extension size + the file name exceeds the
2545 buffer size used. If one did not check this, then the filename might be
2546 truncated, resulting in error.
2547 */
2548 if (((strlen(ext_buf) + (end - name)) >= FN_REFLEN))
2549 {
2550 sql_print_error("Log filename too large: %s%s (%zu). \
2551 Please fix this by archiving old logs and updating the \
2552 index files.", name, ext_buf, (strlen(ext_buf) + (end - name)));
2553 error= 1;
2554 goto end;
2555 }
2556
2557 if (sprintf(end, "%06lu", next)<0)
2558 {
2559 error= 1;
2560 goto end;
2561 }
2562 *last_used_log_number= next;
2563
2564 /* print warning if reaching the end of available extensions. */
2565 if ((next > (MAX_LOG_UNIQUE_FN_EXT - LOG_WARN_UNIQUE_FN_EXT_LEFT)))
2566 sql_print_warning("Next log extension: %lu. \
2567 Remaining log filename extensions: %lu. \
2568 Please consider archiving some logs.", next, (MAX_LOG_UNIQUE_FN_EXT - next));
2569
2570 end:
2571 DBUG_RETURN(error);
2572 }
2573
2574
init(enum_log_type log_type_arg,enum cache_type io_cache_type_arg)2575 void MYSQL_LOG::init(enum_log_type log_type_arg,
2576 enum cache_type io_cache_type_arg)
2577 {
2578 DBUG_ENTER("MYSQL_LOG::init");
2579 log_type= log_type_arg;
2580 io_cache_type= io_cache_type_arg;
2581 DBUG_PRINT("info",("log_type: %d", log_type));
2582 DBUG_VOID_RETURN;
2583 }
2584
2585
init_and_set_log_file_name(const char * log_name,const char * new_name,ulong next_log_number,enum_log_type log_type_arg,enum cache_type io_cache_type_arg)2586 bool MYSQL_LOG::init_and_set_log_file_name(const char *log_name,
2587 const char *new_name,
2588 ulong next_log_number,
2589 enum_log_type log_type_arg,
2590 enum cache_type io_cache_type_arg)
2591 {
2592 init(log_type_arg, io_cache_type_arg);
2593
2594 if (new_name)
2595 {
2596 strmov(log_file_name, new_name);
2597 }
2598 else if (!new_name && generate_new_name(log_file_name, log_name,
2599 next_log_number))
2600 return TRUE;
2601
2602 return FALSE;
2603 }
2604
2605
2606 /*
2607 Open a (new) log file.
2608
2609 SYNOPSIS
2610 open()
2611
2612 log_name The name of the log to open
2613 log_type_arg The type of the log. E.g. LOG_NORMAL
2614 new_name The new name for the logfile. This is only needed
2615 when the method is used to open the binlog file.
2616 io_cache_type_arg The type of the IO_CACHE to use for this log file
2617
2618 DESCRIPTION
2619 Open the logfile, init IO_CACHE and write startup messages
2620 (in case of general and slow query logs).
2621
2622 RETURN VALUES
2623 0 ok
2624 1 error
2625 */
2626
open(PSI_file_key log_file_key,const char * log_name,enum_log_type log_type_arg,const char * new_name,ulong next_log_number,enum cache_type io_cache_type_arg)2627 bool MYSQL_LOG::open(
2628 #ifdef HAVE_PSI_INTERFACE
2629 PSI_file_key log_file_key,
2630 #endif
2631 const char *log_name, enum_log_type log_type_arg,
2632 const char *new_name, ulong next_log_number,
2633 enum cache_type io_cache_type_arg)
2634 {
2635 char buff[FN_REFLEN];
2636 MY_STAT f_stat;
2637 File file= -1;
2638 my_off_t seek_offset;
2639 bool is_fifo = false;
2640 int open_flags= O_CREAT | O_BINARY | O_CLOEXEC;
2641 DBUG_ENTER("MYSQL_LOG::open");
2642 DBUG_PRINT("enter", ("log_type: %d", (int) log_type_arg));
2643
2644 write_error= 0;
2645
2646 if (!(name= my_strdup(log_name, MYF(MY_WME))))
2647 {
2648 name= (char *)log_name; // for the error message
2649 goto err;
2650 }
2651
2652 /*
2653 log_type is LOG_UNKNOWN if we should not generate a new name
2654 This is only used when called from MYSQL_BINARY_LOG::open, which
2655 has already updated log_file_name.
2656 */
2657 if (log_type_arg != LOG_UNKNOWN &&
2658 init_and_set_log_file_name(name, new_name, next_log_number,
2659 log_type_arg, io_cache_type_arg))
2660 goto err;
2661
2662 is_fifo = my_stat(log_file_name, &f_stat, MYF(0)) &&
2663 MY_S_ISFIFO(f_stat.st_mode);
2664
2665 if (io_cache_type == SEQ_READ_APPEND)
2666 open_flags |= O_RDWR | O_APPEND;
2667 else
2668 open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND);
2669
2670 if (is_fifo)
2671 open_flags |= O_NONBLOCK;
2672
2673 db[0]= 0;
2674
2675 #ifdef HAVE_PSI_INTERFACE
2676 /* Keep the key for reopen */
2677 m_log_file_key= log_file_key;
2678 #endif
2679
2680 if ((file= mysql_file_open(log_file_key, log_file_name, open_flags,
2681 MYF(MY_WME))) < 0)
2682 goto err;
2683
2684 if (is_fifo)
2685 seek_offset= 0;
2686 else if ((seek_offset= mysql_file_tell(file, MYF(MY_WME))))
2687 goto err;
2688
2689 if (init_io_cache(&log_file, file, IO_SIZE, io_cache_type, seek_offset, 0,
2690 MYF(MY_WME | MY_NABP |
2691 ((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0))))
2692 goto err;
2693
2694 if (log_type == LOG_NORMAL)
2695 {
2696 char *end;
2697 size_t len=my_snprintf(buff, sizeof(buff), "%s, Version: %s (%s). "
2698 #ifdef EMBEDDED_LIBRARY
2699 "embedded library\n",
2700 my_progname, server_version, MYSQL_COMPILATION_COMMENT
2701 #elif defined(_WIN32)
2702 "started with:\nTCP Port: %d, Named Pipe: %s\n",
2703 my_progname, server_version, MYSQL_COMPILATION_COMMENT,
2704 mysqld_port, mysqld_unix_port
2705 #else
2706 "started with:\nTcp port: %d Unix socket: %s\n",
2707 my_progname, server_version, MYSQL_COMPILATION_COMMENT,
2708 mysqld_port, mysqld_unix_port
2709 #endif
2710 );
2711 end= strnmov(buff + len, "Time\t\t Id Command\tArgument\n",
2712 sizeof(buff) - len);
2713 if (my_b_write(&log_file, (uchar*) buff, (uint) (end-buff)) ||
2714 flush_io_cache(&log_file))
2715 goto err;
2716 }
2717
2718 log_state= LOG_OPENED;
2719 DBUG_RETURN(0);
2720
2721 err:
2722 sql_print_error(fatal_log_error, name, errno);
2723 if (file >= 0)
2724 mysql_file_close(file, MYF(0));
2725 end_io_cache(&log_file);
2726 my_free(name);
2727 name= NULL;
2728 log_state= LOG_CLOSED;
2729 DBUG_RETURN(1);
2730 }
2731
MYSQL_LOG()2732 MYSQL_LOG::MYSQL_LOG()
2733 : name(0), write_error(FALSE), inited(FALSE), log_type(LOG_UNKNOWN),
2734 log_state(LOG_CLOSED)
2735 {
2736 /*
2737 We don't want to initialize LOCK_Log here as such initialization depends on
2738 safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
2739 called only in main(). Doing initialization here would make it happen
2740 before main().
2741 */
2742 bzero((char*) &log_file, sizeof(log_file));
2743 }
2744
init_pthread_objects()2745 void MYSQL_LOG::init_pthread_objects()
2746 {
2747 DBUG_ASSERT(inited == 0);
2748 inited= 1;
2749 mysql_mutex_init(key_LOG_LOCK_log, &LOCK_log, MY_MUTEX_INIT_SLOW);
2750 }
2751
2752 /*
2753 Close the log file
2754
2755 SYNOPSIS
2756 close()
2757 exiting Bitmask. LOG_CLOSE_TO_BE_OPENED is used if we intend to call
2758 open at once after close. LOG_CLOSE_DELAYED_CLOSE is used for
2759 binlog rotation, to delay actual close of the old file until
2760 we have successfully created the new file.
2761
2762 NOTES
2763 One can do an open on the object at once after doing a close.
2764 The internal structures are not freed until cleanup() is called
2765 */
2766
close(uint exiting)2767 void MYSQL_LOG::close(uint exiting)
2768 { // One can't set log_type here!
2769 DBUG_ENTER("MYSQL_LOG::close");
2770 DBUG_PRINT("enter",("exiting: %d", (int) exiting));
2771 if (log_state == LOG_OPENED)
2772 {
2773 end_io_cache(&log_file);
2774
2775 if (log_type == LOG_BIN && mysql_file_sync(log_file.file, MYF(MY_WME)) && ! write_error)
2776 {
2777 write_error= 1;
2778 sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno);
2779 }
2780
2781 if (!(exiting & LOG_CLOSE_DELAYED_CLOSE) &&
2782 mysql_file_close(log_file.file, MYF(MY_WME)) && ! write_error)
2783 {
2784 write_error= 1;
2785 sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno);
2786 }
2787 }
2788
2789 log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
2790 my_free(name);
2791 name= NULL;
2792 DBUG_VOID_RETURN;
2793 }
2794
2795 /** This is called only once. */
2796
cleanup()2797 void MYSQL_LOG::cleanup()
2798 {
2799 DBUG_ENTER("cleanup");
2800 if (inited)
2801 {
2802 inited= 0;
2803 mysql_mutex_destroy(&LOCK_log);
2804 close(0);
2805 }
2806 DBUG_VOID_RETURN;
2807 }
2808
2809
generate_new_name(char * new_name,const char * log_name,ulong next_log_number)2810 int MYSQL_LOG::generate_new_name(char *new_name, const char *log_name,
2811 ulong next_log_number)
2812 {
2813 fn_format(new_name, log_name, mysql_data_home, "", 4);
2814 return 0;
2815 }
2816
generate_new_name(char * new_name,const char * log_name,ulong next_log_number)2817 int MYSQL_BIN_LOG::generate_new_name(char *new_name, const char *log_name,
2818 ulong next_log_number)
2819 {
2820 fn_format(new_name, log_name, mysql_data_home, "", 4);
2821 if (!fn_ext(log_name)[0])
2822 {
2823 if (DBUG_EVALUATE_IF("binlog_inject_new_name_error", TRUE, FALSE) ||
2824 unlikely(find_uniq_filename(new_name, next_log_number,
2825 &last_used_log_number)))
2826 {
2827 THD *thd= current_thd;
2828 if (unlikely(thd))
2829 my_error(ER_NO_UNIQUE_LOGFILE, MYF(ME_FATAL), log_name);
2830 sql_print_error(ER_DEFAULT(ER_NO_UNIQUE_LOGFILE), log_name);
2831 return 1;
2832 }
2833 }
2834 return 0;
2835 }
2836
2837
2838 /*
2839 Reopen the log file
2840
2841 SYNOPSIS
2842 reopen_file()
2843
2844 DESCRIPTION
2845 Reopen the log file. The method is used during FLUSH LOGS
2846 and locks LOCK_log mutex
2847 */
2848
2849
reopen_file()2850 void MYSQL_QUERY_LOG::reopen_file()
2851 {
2852 char *save_name;
2853 DBUG_ENTER("MYSQL_LOG::reopen_file");
2854
2855 mysql_mutex_lock(&LOCK_log);
2856 if (!is_open())
2857 {
2858 DBUG_PRINT("info",("log is closed"));
2859 mysql_mutex_unlock(&LOCK_log);
2860 DBUG_VOID_RETURN;
2861 }
2862
2863 save_name= name;
2864 name= 0; // Don't free name
2865 close(LOG_CLOSE_TO_BE_OPENED);
2866
2867 /*
2868 Note that at this point, log_state != LOG_CLOSED (important for is_open()).
2869 */
2870
2871 open(
2872 #ifdef HAVE_PSI_INTERFACE
2873 m_log_file_key,
2874 #endif
2875 save_name, log_type, 0, 0, io_cache_type);
2876 my_free(save_name);
2877
2878 mysql_mutex_unlock(&LOCK_log);
2879
2880 DBUG_VOID_RETURN;
2881 }
2882
2883
2884 /*
2885 Write a command to traditional general log file
2886
2887 SYNOPSIS
2888 write()
2889
2890 event_time command start timestamp
2891 user_host the pointer to the string with user@host info
2892 user_host_len length of the user_host string. this is computed once
2893 and passed to all general log event handlers
2894 thread_id Id of the thread, issued a query
2895 command_type the type of the command being logged
2896 command_type_len the length of the string above
2897 sql_text the very text of the query being executed
2898 sql_text_len the length of sql_text string
2899
2900 DESCRIPTION
2901
2902 Log given command to to normal (not rotable) log file
2903
2904 RETURN
2905 FASE - OK
2906 TRUE - error occurred
2907 */
2908
write(time_t event_time,const char * user_host,size_t user_host_len,my_thread_id thread_id_arg,const char * command_type,size_t command_type_len,const char * sql_text,size_t sql_text_len)2909 bool MYSQL_QUERY_LOG::write(time_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg,
2910 const char *command_type, size_t command_type_len,
2911 const char *sql_text, size_t sql_text_len)
2912 {
2913 char buff[32];
2914 char local_time_buff[MAX_TIME_SIZE];
2915 struct tm start;
2916 size_t time_buff_len= 0;
2917
2918 mysql_mutex_lock(&LOCK_log);
2919
2920 /* Test if someone closed between the is_open test and lock */
2921 if (is_open())
2922 {
2923 /* for testing output of timestamp and thread id */
2924 DBUG_EXECUTE_IF("reset_log_last_time", last_time= 0;);
2925
2926 /* Note that my_b_write() assumes it knows the length for this */
2927 if (event_time != last_time)
2928 {
2929 last_time= event_time;
2930
2931 localtime_r(&event_time, &start);
2932
2933 time_buff_len= my_snprintf(local_time_buff, MAX_TIME_SIZE,
2934 "%02d%02d%02d %2d:%02d:%02d\t",
2935 start.tm_year % 100, start.tm_mon + 1,
2936 start.tm_mday, start.tm_hour,
2937 start.tm_min, start.tm_sec);
2938
2939 if (my_b_write(&log_file, (uchar*) local_time_buff, time_buff_len))
2940 goto err;
2941 }
2942 else
2943 if (my_b_write(&log_file, (uchar*) "\t\t" ,2) < 0)
2944 goto err;
2945
2946 /* command_type, thread_id */
2947 size_t length= my_snprintf(buff, 32, "%6llu ", thread_id_arg);
2948
2949 if (my_b_write(&log_file, (uchar*) buff, length))
2950 goto err;
2951
2952 if (my_b_write(&log_file, (uchar*) command_type, command_type_len))
2953 goto err;
2954
2955 if (my_b_write(&log_file, (uchar*) "\t", 1))
2956 goto err;
2957
2958 /* sql_text */
2959 if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len))
2960 goto err;
2961
2962 if (my_b_write(&log_file, (uchar*) "\n", 1) ||
2963 flush_io_cache(&log_file))
2964 goto err;
2965 }
2966
2967 mysql_mutex_unlock(&LOCK_log);
2968 return FALSE;
2969 err:
2970
2971 if (!write_error)
2972 {
2973 write_error= 1;
2974 sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno);
2975 }
2976 mysql_mutex_unlock(&LOCK_log);
2977 return TRUE;
2978 }
2979
2980
2981 /*
2982 Log a query to the traditional slow log file
2983
2984 SYNOPSIS
2985 write()
2986
2987 thd THD of the query
2988 current_time current timestamp
2989 user_host the pointer to the string with user@host info
2990 user_host_len length of the user_host string. this is computed once
2991 and passed to all general log event handlers
2992 query_utime Amount of time the query took to execute (in microseconds)
2993 lock_utime Amount of time the query was locked (in microseconds)
2994 is_command The flag, which determines, whether the sql_text is a
2995 query or an administrator command.
2996 sql_text the very text of the query or administrator command
2997 processed
2998 sql_text_len the length of sql_text string
2999
3000 DESCRIPTION
3001
3002 Log a query to the slow log file.
3003
3004 RETURN
3005 FALSE - OK
3006 TRUE - error occurred
3007 */
3008
write(THD * thd,time_t current_time,const char * user_host,size_t user_host_len,ulonglong query_utime,ulonglong lock_utime,bool is_command,const char * sql_text,size_t sql_text_len)3009 bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
3010 const char *user_host, size_t user_host_len, ulonglong query_utime,
3011 ulonglong lock_utime, bool is_command,
3012 const char *sql_text, size_t sql_text_len)
3013 {
3014 bool error= 0;
3015 char llbuff[22];
3016 DBUG_ENTER("MYSQL_QUERY_LOG::write");
3017
3018 mysql_mutex_lock(&LOCK_log);
3019 if (is_open())
3020 { // Safety against reopen
3021 char buff[80], *end;
3022 char query_time_buff[22+7], lock_time_buff[22+7];
3023 size_t buff_len;
3024 end= buff;
3025
3026 if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
3027 {
3028 if (current_time != last_time)
3029 {
3030 last_time= current_time;
3031 struct tm start;
3032 localtime_r(¤t_time, &start);
3033
3034 buff_len= my_snprintf(buff, sizeof buff,
3035 "# Time: %02d%02d%02d %2d:%02d:%02d\n",
3036 start.tm_year % 100, start.tm_mon + 1,
3037 start.tm_mday, start.tm_hour,
3038 start.tm_min, start.tm_sec);
3039
3040 /* Note that my_b_write() assumes it knows the length for this */
3041 if (my_b_write(&log_file, (uchar*) buff, buff_len))
3042 goto err;
3043 }
3044 const uchar uh[]= "# User@Host: ";
3045 if (my_b_write(&log_file, uh, sizeof(uh) - 1) ||
3046 my_b_write(&log_file, (uchar*) user_host, user_host_len) ||
3047 my_b_write(&log_file, (uchar*) "\n", 1))
3048 goto err;
3049
3050 /* For slow query log */
3051 sprintf(query_time_buff, "%.6f", ulonglong2double(query_utime)/1000000.0);
3052 sprintf(lock_time_buff, "%.6f", ulonglong2double(lock_utime)/1000000.0);
3053 if (my_b_printf(&log_file,
3054 "# Thread_id: %lu Schema: %s QC_hit: %s\n"
3055 "# Query_time: %s Lock_time: %s Rows_sent: %lu Rows_examined: %lu\n"
3056 "# Rows_affected: %lu Bytes_sent: %lu\n",
3057 (ulong) thd->thread_id, thd->get_db(),
3058 ((thd->query_plan_flags & QPLAN_QC) ? "Yes" : "No"),
3059 query_time_buff, lock_time_buff,
3060 (ulong) thd->get_sent_row_count(),
3061 (ulong) thd->get_examined_row_count(),
3062 (ulong) thd->get_affected_rows(),
3063 (ulong) (thd->status_var.bytes_sent - thd->bytes_sent_old)))
3064 goto err;
3065
3066 if ((thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_QUERY_PLAN)
3067 && thd->tmp_tables_used &&
3068 my_b_printf(&log_file,
3069 "# Tmp_tables: %lu Tmp_disk_tables: %lu "
3070 "Tmp_table_sizes: %s\n",
3071 (ulong) thd->tmp_tables_used,
3072 (ulong) thd->tmp_tables_disk_used,
3073 llstr(thd->tmp_tables_size, llbuff)))
3074 goto err;
3075
3076 if (thd->spcont &&
3077 my_b_printf(&log_file, "# Stored_routine: %s\n",
3078 ErrConvDQName(thd->spcont->m_sp).ptr()))
3079 goto err;
3080
3081 if ((thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_QUERY_PLAN) &&
3082 (thd->query_plan_flags &
3083 (QPLAN_FULL_SCAN | QPLAN_FULL_JOIN | QPLAN_TMP_TABLE |
3084 QPLAN_TMP_DISK | QPLAN_FILESORT | QPLAN_FILESORT_DISK |
3085 QPLAN_FILESORT_PRIORITY_QUEUE)) &&
3086 my_b_printf(&log_file,
3087 "# Full_scan: %s Full_join: %s "
3088 "Tmp_table: %s Tmp_table_on_disk: %s\n"
3089 "# Filesort: %s Filesort_on_disk: %s Merge_passes: %lu "
3090 "Priority_queue: %s\n",
3091 ((thd->query_plan_flags & QPLAN_FULL_SCAN) ? "Yes" : "No"),
3092 ((thd->query_plan_flags & QPLAN_FULL_JOIN) ? "Yes" : "No"),
3093 (thd->tmp_tables_used ? "Yes" : "No"),
3094 (thd->tmp_tables_disk_used ? "Yes" : "No"),
3095 ((thd->query_plan_flags & QPLAN_FILESORT) ? "Yes" : "No"),
3096 ((thd->query_plan_flags & QPLAN_FILESORT_DISK) ?
3097 "Yes" : "No"),
3098 thd->query_plan_fsort_passes,
3099 ((thd->query_plan_flags & QPLAN_FILESORT_PRIORITY_QUEUE) ?
3100 "Yes" : "No")
3101 ))
3102 goto err;
3103 if (thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_EXPLAIN &&
3104 thd->lex->explain)
3105 {
3106 StringBuffer<128> buf;
3107 DBUG_ASSERT(!thd->free_list);
3108 if (!print_explain_for_slow_log(thd->lex, thd, &buf))
3109 if (my_b_printf(&log_file, "%s", buf.c_ptr_safe()))
3110 goto err;
3111 thd->free_items();
3112 }
3113 if (thd->db.str && strcmp(thd->db.str, db))
3114 { // Database changed
3115 if (my_b_printf(&log_file,"use %s;\n",thd->db.str))
3116 goto err;
3117 strmov(db,thd->db.str);
3118 }
3119 if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
3120 {
3121 end=strmov(end, ",last_insert_id=");
3122 end=longlong10_to_str((longlong)
3123 thd->first_successful_insert_id_in_prev_stmt_for_binlog,
3124 end, -10);
3125 }
3126 // Save value if we do an insert.
3127 if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
3128 {
3129 if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
3130 {
3131 end=strmov(end,",insert_id=");
3132 end=longlong10_to_str((longlong)
3133 thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(),
3134 end, -10);
3135 }
3136 }
3137
3138 /*
3139 This info used to show up randomly, depending on whether the query
3140 checked the query start time or not. now we always write current
3141 timestamp to the slow log
3142 */
3143 end= strmov(end, ",timestamp=");
3144 end= int10_to_str((long) current_time, end, 10);
3145
3146 if (end != buff)
3147 {
3148 *end++=';';
3149 *end='\n';
3150 if (my_b_write(&log_file, (uchar*) "SET ", 4) ||
3151 my_b_write(&log_file, (uchar*) buff + 1, (uint) (end-buff)))
3152 goto err;
3153 }
3154 if (is_command)
3155 {
3156 end= strxmov(buff, "# administrator command: ", NullS);
3157 buff_len= (ulong) (end - buff);
3158 DBUG_EXECUTE_IF("simulate_slow_log_write_error",
3159 {DBUG_SET("+d,simulate_file_write_error");});
3160 if(my_b_write(&log_file, (uchar*) buff, buff_len))
3161 goto err;
3162 }
3163 if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len) ||
3164 my_b_write(&log_file, (uchar*) ";\n",2) ||
3165 flush_io_cache(&log_file))
3166 goto err;
3167
3168 }
3169 }
3170 end:
3171 mysql_mutex_unlock(&LOCK_log);
3172 DBUG_RETURN(error);
3173
3174 err:
3175 error= 1;
3176 if (!write_error)
3177 {
3178 write_error= 1;
3179 sql_print_error(ER_THD(thd, ER_ERROR_ON_WRITE), name, errno);
3180 }
3181 goto end;
3182 }
3183
3184
3185 /**
3186 @todo
3187 The following should be using fn_format(); We just need to
3188 first change fn_format() to cut the file name if it's too long.
3189 */
generate_name(const char * log_name,const char * suffix,bool strip_ext,char * buff)3190 const char *MYSQL_LOG::generate_name(const char *log_name,
3191 const char *suffix,
3192 bool strip_ext, char *buff)
3193 {
3194 if (!log_name || !log_name[0])
3195 {
3196 strmake(buff, pidfile_name, FN_REFLEN - strlen(suffix) - 1);
3197 return (const char *)
3198 fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR));
3199 }
3200 // get rid of extension if the log is binary to avoid problems
3201 if (strip_ext)
3202 {
3203 char *p= fn_ext(log_name);
3204 uint length= (uint) (p - log_name);
3205 strmake(buff, log_name, MY_MIN(length, FN_REFLEN-1));
3206 return (const char*)buff;
3207 }
3208 return log_name;
3209 }
3210
3211
3212 /*
3213 Print some additional information about addition/removal of
3214 XID list entries.
3215 TODO: Remove once MDEV-9510 is fixed.
3216 */
3217 #ifdef WITH_WSREP
3218 #define WSREP_XID_LIST_ENTRY(X, Y) \
3219 if (wsrep_debug) \
3220 { \
3221 char buf[FN_REFLEN]; \
3222 strmake(buf, Y->binlog_name, Y->binlog_name_len); \
3223 WSREP_DEBUG(X, buf, Y->binlog_id); \
3224 }
3225 #else
3226 #define WSREP_XID_LIST_ENTRY(X, Y) do { } while(0)
3227 #endif
3228
MYSQL_BIN_LOG(uint * sync_period)3229 MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
3230 :reset_master_pending(0), mark_xid_done_waiting(0),
3231 bytes_written(0), last_used_log_number(0),
3232 file_id(1), open_count(1),
3233 group_commit_queue(0), group_commit_queue_busy(FALSE),
3234 num_commits(0), num_group_commits(0),
3235 group_commit_trigger_count(0), group_commit_trigger_timeout(0),
3236 group_commit_trigger_lock_wait(0),
3237 sync_period_ptr(sync_period), sync_counter(0),
3238 state_file_deleted(false), binlog_state_recover_done(false),
3239 is_relay_log(0), relay_signal_cnt(0),
3240 checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF),
3241 relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF),
3242 description_event_for_exec(0), description_event_for_queue(0),
3243 current_binlog_id(0), reset_master_count(0)
3244 {
3245 /*
3246 We don't want to initialize locks here as such initialization depends on
3247 safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
3248 called only in main(). Doing initialization here would make it happen
3249 before main().
3250 */
3251 index_file_name[0] = 0;
3252 bzero((char*) &index_file, sizeof(index_file));
3253 bzero((char*) &purge_index_file, sizeof(purge_index_file));
3254 }
3255
stop_background_thread()3256 void MYSQL_BIN_LOG::stop_background_thread()
3257 {
3258 if (binlog_background_thread_started)
3259 {
3260 mysql_mutex_lock(&LOCK_binlog_background_thread);
3261 binlog_background_thread_stop= true;
3262 mysql_cond_signal(&COND_binlog_background_thread);
3263 while (binlog_background_thread_stop)
3264 mysql_cond_wait(&COND_binlog_background_thread_end,
3265 &LOCK_binlog_background_thread);
3266 mysql_mutex_unlock(&LOCK_binlog_background_thread);
3267 binlog_background_thread_started= false;
3268 }
3269 }
3270
3271 /* this is called only once */
3272
cleanup()3273 void MYSQL_BIN_LOG::cleanup()
3274 {
3275 DBUG_ENTER("cleanup");
3276 if (inited)
3277 {
3278 xid_count_per_binlog *b;
3279
3280 /* Wait for the binlog background thread to stop. */
3281 if (!is_relay_log)
3282 stop_background_thread();
3283
3284 inited= 0;
3285 mysql_mutex_lock(&LOCK_log);
3286 close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT);
3287 mysql_mutex_unlock(&LOCK_log);
3288 delete description_event_for_queue;
3289 delete description_event_for_exec;
3290
3291 while ((b= binlog_xid_count_list.get()))
3292 {
3293 /*
3294 There should be no pending XIDs at shutdown, and only one entry (for
3295 the active binlog file) in the list.
3296 */
3297 DBUG_ASSERT(b->xid_count == 0);
3298 DBUG_ASSERT(!binlog_xid_count_list.head());
3299 WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::cleanup(): Removing xid_list_entry "
3300 "for %s (%lu)", b);
3301 delete b;
3302 }
3303
3304 mysql_mutex_destroy(&LOCK_log);
3305 mysql_mutex_destroy(&LOCK_index);
3306 mysql_mutex_destroy(&LOCK_xid_list);
3307 mysql_mutex_destroy(&LOCK_binlog_background_thread);
3308 mysql_mutex_destroy(&LOCK_binlog_end_pos);
3309 mysql_cond_destroy(&COND_relay_log_updated);
3310 mysql_cond_destroy(&COND_bin_log_updated);
3311 mysql_cond_destroy(&COND_queue_busy);
3312 mysql_cond_destroy(&COND_xid_list);
3313 mysql_cond_destroy(&COND_binlog_background_thread);
3314 mysql_cond_destroy(&COND_binlog_background_thread_end);
3315 }
3316
3317 /*
3318 Free data for global binlog state.
3319 We can't do that automatically as we need to do this before
3320 safemalloc is shut down
3321 */
3322 if (!is_relay_log)
3323 rpl_global_gtid_binlog_state.free();
3324 DBUG_VOID_RETURN;
3325 }
3326
3327
3328 /* Init binlog-specific vars */
init(ulong max_size_arg)3329 void MYSQL_BIN_LOG::init(ulong max_size_arg)
3330 {
3331 DBUG_ENTER("MYSQL_BIN_LOG::init");
3332 max_size= max_size_arg;
3333 DBUG_PRINT("info",("max_size: %lu", max_size));
3334 DBUG_VOID_RETURN;
3335 }
3336
3337
init_pthread_objects()3338 void MYSQL_BIN_LOG::init_pthread_objects()
3339 {
3340 MYSQL_LOG::init_pthread_objects();
3341 mysql_mutex_init(m_key_LOCK_index, &LOCK_index, MY_MUTEX_INIT_SLOW);
3342 mysql_mutex_setflags(&LOCK_index, MYF_NO_DEADLOCK_DETECTION);
3343 mysql_mutex_init(key_BINLOG_LOCK_xid_list,
3344 &LOCK_xid_list, MY_MUTEX_INIT_FAST);
3345 mysql_cond_init(m_key_relay_log_update, &COND_relay_log_updated, 0);
3346 mysql_cond_init(m_key_bin_log_update, &COND_bin_log_updated, 0);
3347 mysql_cond_init(m_key_COND_queue_busy, &COND_queue_busy, 0);
3348 mysql_cond_init(key_BINLOG_COND_xid_list, &COND_xid_list, 0);
3349
3350 mysql_mutex_init(key_BINLOG_LOCK_binlog_background_thread,
3351 &LOCK_binlog_background_thread, MY_MUTEX_INIT_FAST);
3352 mysql_cond_init(key_BINLOG_COND_binlog_background_thread,
3353 &COND_binlog_background_thread, 0);
3354 mysql_cond_init(key_BINLOG_COND_binlog_background_thread_end,
3355 &COND_binlog_background_thread_end, 0);
3356
3357 mysql_mutex_init(m_key_LOCK_binlog_end_pos, &LOCK_binlog_end_pos,
3358 MY_MUTEX_INIT_SLOW);
3359 }
3360
3361
open_index_file(const char * index_file_name_arg,const char * log_name,bool need_mutex)3362 bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
3363 const char *log_name, bool need_mutex)
3364 {
3365 File index_file_nr= -1;
3366 DBUG_ASSERT(!my_b_inited(&index_file));
3367
3368 /*
3369 First open of this class instance
3370 Create an index file that will hold all file names uses for logging.
3371 Add new entries to the end of it.
3372 */
3373 myf opt= MY_UNPACK_FILENAME;
3374 if (!index_file_name_arg)
3375 {
3376 index_file_name_arg= log_name; // Use same basename for index file
3377 opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
3378 }
3379 fn_format(index_file_name, index_file_name_arg, mysql_data_home,
3380 ".index", opt);
3381 if ((index_file_nr= mysql_file_open(m_key_file_log_index,
3382 index_file_name,
3383 O_RDWR | O_CREAT | O_BINARY | O_CLOEXEC,
3384 MYF(MY_WME))) < 0 ||
3385 mysql_file_sync(index_file_nr, MYF(MY_WME)) ||
3386 init_io_cache(&index_file, index_file_nr,
3387 IO_SIZE, WRITE_CACHE,
3388 mysql_file_seek(index_file_nr, 0L, MY_SEEK_END, MYF(0)),
3389 0, MYF(MY_WME | MY_WAIT_IF_FULL)) ||
3390 DBUG_EVALUATE_IF("fault_injection_openning_index", 1, 0))
3391 {
3392 /*
3393 TODO: all operations creating/deleting the index file or a log, should
3394 call my_sync_dir() or my_sync_dir_by_file() to be durable.
3395 TODO: file creation should be done with mysql_file_create()
3396 not mysql_file_open().
3397 */
3398 if (index_file_nr >= 0)
3399 mysql_file_close(index_file_nr, MYF(0));
3400 return TRUE;
3401 }
3402
3403 #ifdef HAVE_REPLICATION
3404 /*
3405 Sync the index by purging any binary log file that is not registered.
3406 In other words, either purge binary log files that were removed from
3407 the index but not purged from the file system due to a crash or purge
3408 any binary log file that was created but not register in the index
3409 due to a crash.
3410 */
3411
3412 if (set_purge_index_file_name(index_file_name_arg) ||
3413 open_purge_index_file(FALSE) ||
3414 purge_index_entry(NULL, NULL, need_mutex) ||
3415 close_purge_index_file() ||
3416 DBUG_EVALUATE_IF("fault_injection_recovering_index", 1, 0))
3417 {
3418 sql_print_error("MYSQL_BIN_LOG::open_index_file failed to sync the index "
3419 "file.");
3420 return TRUE;
3421 }
3422 #endif
3423
3424 return FALSE;
3425 }
3426
3427
3428 /**
3429 Open a (new) binlog file.
3430
3431 - Open the log file and the index file. Register the new
3432 file name in it
3433 - When calling this when the file is in use, you must have a locks
3434 on LOCK_log and LOCK_index.
3435
3436 @retval
3437 0 ok
3438 @retval
3439 1 error
3440 */
3441
open(const char * log_name,enum_log_type log_type_arg,const char * new_name,ulong next_log_number,enum cache_type io_cache_type_arg,ulong max_size_arg,bool null_created_arg,bool need_mutex)3442 bool MYSQL_BIN_LOG::open(const char *log_name,
3443 enum_log_type log_type_arg,
3444 const char *new_name,
3445 ulong next_log_number,
3446 enum cache_type io_cache_type_arg,
3447 ulong max_size_arg,
3448 bool null_created_arg,
3449 bool need_mutex)
3450 {
3451 File file= -1;
3452 xid_count_per_binlog *new_xid_list_entry= NULL, *b;
3453 DBUG_ENTER("MYSQL_BIN_LOG::open");
3454 DBUG_PRINT("enter",("log_type: %d",(int) log_type_arg));
3455
3456 mysql_mutex_assert_owner(&LOCK_log);
3457
3458 if (!is_relay_log)
3459 {
3460 if (!binlog_state_recover_done)
3461 {
3462 binlog_state_recover_done= true;
3463 if (do_binlog_recovery(opt_bin_logname, false))
3464 DBUG_RETURN(1);
3465 }
3466
3467 if (!binlog_background_thread_started &&
3468 start_binlog_background_thread())
3469 DBUG_RETURN(1);
3470 }
3471
3472 /* We need to calculate new log file name for purge to delete old */
3473 if (init_and_set_log_file_name(log_name, new_name, next_log_number,
3474 log_type_arg, io_cache_type_arg))
3475 {
3476 sql_print_error("MYSQL_BIN_LOG::open failed to generate new file name.");
3477 if (!is_relay_log)
3478 goto err;
3479 DBUG_RETURN(1);
3480 }
3481
3482 #ifdef HAVE_REPLICATION
3483 if (open_purge_index_file(TRUE) ||
3484 register_create_index_entry(log_file_name) ||
3485 sync_purge_index_file() ||
3486 DBUG_EVALUATE_IF("fault_injection_registering_index", 1, 0))
3487 {
3488 /**
3489 TODO:
3490 Although this was introduced to appease valgrind when
3491 injecting emulated faults using
3492 fault_injection_registering_index it may be good to consider
3493 what actually happens when open_purge_index_file succeeds but
3494 register or sync fails.
3495
3496 Perhaps we might need the code below in MYSQL_LOG_BIN::cleanup
3497 for "real life" purposes as well?
3498 */
3499 DBUG_EXECUTE_IF("fault_injection_registering_index", {
3500 if (my_b_inited(&purge_index_file))
3501 {
3502 end_io_cache(&purge_index_file);
3503 my_close(purge_index_file.file, MYF(0));
3504 }
3505 });
3506
3507 sql_print_error("MYSQL_BIN_LOG::open failed to sync the index file.");
3508 DBUG_RETURN(1);
3509 }
3510 DBUG_EXECUTE_IF("crash_create_non_critical_before_update_index", DBUG_SUICIDE(););
3511 #endif
3512
3513 write_error= 0;
3514
3515 /* open the main log file */
3516 if (MYSQL_LOG::open(
3517 #ifdef HAVE_PSI_INTERFACE
3518 m_key_file_log,
3519 #endif
3520 log_name,
3521 LOG_UNKNOWN, /* Don't generate new name */
3522 0, 0, io_cache_type_arg))
3523 {
3524 #ifdef HAVE_REPLICATION
3525 close_purge_index_file();
3526 #endif
3527 DBUG_RETURN(1); /* all warnings issued */
3528 }
3529
3530 init(max_size_arg);
3531
3532 open_count++;
3533
3534 DBUG_ASSERT(log_type == LOG_BIN);
3535
3536 {
3537 bool write_file_name_to_index_file=0;
3538
3539 if (!my_b_filelength(&log_file))
3540 {
3541 /*
3542 The binary log file was empty (probably newly created)
3543 This is the normal case and happens when the user doesn't specify
3544 an extension for the binary log files.
3545 In this case we write a standard header to it.
3546 */
3547 if (my_b_safe_write(&log_file, BINLOG_MAGIC,
3548 BIN_LOG_HEADER_SIZE))
3549 goto err;
3550 bytes_written+= BIN_LOG_HEADER_SIZE;
3551 write_file_name_to_index_file= 1;
3552 }
3553
3554 {
3555 /*
3556 In 4.x we put Start event only in the first binlog. But from 5.0 we
3557 want a Start event even if this is not the very first binlog.
3558 */
3559 Format_description_log_event s(BINLOG_VERSION);
3560 /*
3561 don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
3562 as we won't be able to reset it later
3563 */
3564 if (io_cache_type == WRITE_CACHE)
3565 s.flags |= LOG_EVENT_BINLOG_IN_USE_F;
3566
3567 if (is_relay_log)
3568 {
3569 if (relay_log_checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
3570 relay_log_checksum_alg=
3571 opt_slave_sql_verify_checksum ? (enum_binlog_checksum_alg) binlog_checksum_options
3572 : BINLOG_CHECKSUM_ALG_OFF;
3573 s.checksum_alg= relay_log_checksum_alg;
3574 s.set_relay_log_event();
3575 }
3576 else
3577 s.checksum_alg= (enum_binlog_checksum_alg)binlog_checksum_options;
3578
3579 crypto.scheme = 0;
3580 DBUG_ASSERT(s.checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
3581 if (!s.is_valid())
3582 goto err;
3583 s.dont_set_created= null_created_arg;
3584 if (write_event(&s))
3585 goto err;
3586 bytes_written+= s.data_written;
3587
3588 if (encrypt_binlog)
3589 {
3590 uint key_version= encryption_key_get_latest_version(ENCRYPTION_KEY_SYSTEM_DATA);
3591 if (key_version == ENCRYPTION_KEY_VERSION_INVALID)
3592 {
3593 sql_print_error("Failed to enable encryption of binary logs");
3594 goto err;
3595 }
3596
3597 if (key_version != ENCRYPTION_KEY_NOT_ENCRYPTED)
3598 {
3599 if (my_random_bytes(crypto.nonce, sizeof(crypto.nonce)))
3600 goto err;
3601
3602 Start_encryption_log_event sele(1, key_version, crypto.nonce);
3603 sele.checksum_alg= s.checksum_alg;
3604 if (write_event(&sele))
3605 goto err;
3606
3607 // Start_encryption_log_event is written, enable the encryption
3608 if (crypto.init(sele.crypto_scheme, key_version))
3609 goto err;
3610 }
3611 }
3612
3613 if (!is_relay_log)
3614 {
3615 char buf[FN_REFLEN];
3616
3617 /*
3618 Output a Gtid_list_log_event at the start of the binlog file.
3619
3620 This is used to quickly determine which GTIDs are found in binlog
3621 files earlier than this one, and which are found in this (or later)
3622 binlogs.
3623
3624 The list gives a mapping from (domain_id, server_id) -> seq_no (so
3625 this means that there is at most one entry for every unique pair
3626 (domain_id, server_id) in the list). It indicates that this seq_no is
3627 the last one found in an earlier binlog file for this (domain_id,
3628 server_id) combination - so any higher seq_no should be search for
3629 from this binlog file, or a later one.
3630
3631 This allows to locate the binlog file containing a given GTID by
3632 scanning backwards, reading just the Gtid_list_log_event at the
3633 start of each file, and scanning only the relevant binlog file when
3634 found, not all binlog files.
3635
3636 The existence of a given entry (domain_id, server_id, seq_no)
3637 guarantees only that this seq_no will not be found in this or any
3638 later binlog file. It does not guarantee that it can be found it an
3639 earlier binlog file, for example the file may have been purged.
3640
3641 If there is no entry for a given (domain_id, server_id) pair, then
3642 it means that no such GTID exists in any earlier binlog. It is
3643 permissible to remove such pair from future Gtid_list_log_events
3644 if all previous binlog files containing such GTIDs have been purged
3645 (though such optimization is not performed at the time of this
3646 writing). So if there is no entry for given GTID it means that such
3647 GTID should be search for in this or later binlog file, same as if
3648 there had been an entry (domain_id, server_id, 0).
3649 */
3650
3651 Gtid_list_log_event gl_ev(&rpl_global_gtid_binlog_state, 0);
3652 if (write_event(&gl_ev))
3653 goto err;
3654
3655 /* Output a binlog checkpoint event at the start of the binlog file. */
3656
3657 /*
3658 Construct an entry in the binlog_xid_count_list for the new binlog
3659 file (we will not link it into the list until we know the new file
3660 is successfully created; otherwise we would have to remove it again
3661 if creation failed, which gets tricky since other threads may have
3662 seen the entry in the meantime - and we do not want to hold
3663 LOCK_xid_list for long periods of time).
3664
3665 Write the current binlog checkpoint into the log, so XA recovery will
3666 know from where to start recovery.
3667 */
3668 size_t off= dirname_length(log_file_name);
3669 uint len= static_cast<uint>(strlen(log_file_name) - off);
3670 new_xid_list_entry= new xid_count_per_binlog(log_file_name+off, len);
3671 if (!new_xid_list_entry)
3672 goto err;
3673
3674 /*
3675 Find the name for the Initial binlog checkpoint.
3676
3677 Normally this will just be the first entry, as we delete entries
3678 when their count drops to zero. But we scan the list to handle any
3679 corner case, eg. for the first binlog file opened after startup, the
3680 list will be empty.
3681 */
3682 mysql_mutex_lock(&LOCK_xid_list);
3683 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
3684 while ((b= it++) && b->xid_count == 0)
3685 ;
3686 mysql_mutex_unlock(&LOCK_xid_list);
3687 if (!b)
3688 b= new_xid_list_entry;
3689 if (b->binlog_name)
3690 strmake(buf, b->binlog_name, b->binlog_name_len);
3691 else
3692 goto err;
3693 Binlog_checkpoint_log_event ev(buf, len);
3694 DBUG_EXECUTE_IF("crash_before_write_checkpoint_event",
3695 flush_io_cache(&log_file);
3696 mysql_file_sync(log_file.file, MYF(MY_WME));
3697 DBUG_SUICIDE(););
3698 if (write_event(&ev))
3699 goto err;
3700 bytes_written+= ev.data_written;
3701 }
3702 }
3703 if (description_event_for_queue &&
3704 description_event_for_queue->binlog_version>=4)
3705 {
3706 /*
3707 This is a relay log written to by the I/O slave thread.
3708 Write the event so that others can later know the format of this relay
3709 log.
3710 Note that this event is very close to the original event from the
3711 master (it has binlog version of the master, event types of the
3712 master), so this is suitable to parse the next relay log's event. It
3713 has been produced by
3714 Format_description_log_event::Format_description_log_event(char* buf,).
3715 Why don't we want to write the description_event_for_queue if this
3716 event is for format<4 (3.23 or 4.x): this is because in that case, the
3717 description_event_for_queue describes the data received from the
3718 master, but not the data written to the relay log (*conversion*),
3719 which is in format 4 (slave's).
3720 */
3721 /*
3722 Set 'created' to 0, so that in next relay logs this event does not
3723 trigger cleaning actions on the slave in
3724 Format_description_log_event::apply_event_impl().
3725 */
3726 description_event_for_queue->created= 0;
3727 /* Don't set log_pos in event header */
3728 description_event_for_queue->set_artificial_event();
3729
3730 if (write_event(description_event_for_queue))
3731 goto err;
3732 bytes_written+= description_event_for_queue->data_written;
3733 }
3734 if (flush_io_cache(&log_file) ||
3735 mysql_file_sync(log_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))
3736 goto err;
3737
3738 my_off_t offset= my_b_tell(&log_file);
3739
3740 if (!is_relay_log)
3741 {
3742 /* update binlog_end_pos so that it can be read by after sync hook */
3743 reset_binlog_end_pos(log_file_name, offset);
3744
3745 mysql_mutex_lock(&LOCK_commit_ordered);
3746 strmake_buf(last_commit_pos_file, log_file_name);
3747 last_commit_pos_offset= offset;
3748 mysql_mutex_unlock(&LOCK_commit_ordered);
3749 }
3750
3751 if (write_file_name_to_index_file)
3752 {
3753 #ifdef HAVE_REPLICATION
3754 #ifdef ENABLED_DEBUG_SYNC
3755 if (current_thd)
3756 DEBUG_SYNC(current_thd, "binlog_open_before_update_index");
3757 #endif
3758 DBUG_EXECUTE_IF("crash_create_critical_before_update_index", DBUG_SUICIDE(););
3759 #endif
3760
3761 DBUG_ASSERT(my_b_inited(&index_file) != 0);
3762 reinit_io_cache(&index_file, WRITE_CACHE,
3763 my_b_filelength(&index_file), 0, 0);
3764 /*
3765 As this is a new log file, we write the file name to the index
3766 file. As every time we write to the index file, we sync it.
3767 */
3768 if (DBUG_EVALUATE_IF("fault_injection_updating_index", 1, 0) ||
3769 my_b_write(&index_file, (uchar*) log_file_name,
3770 strlen(log_file_name)) ||
3771 my_b_write(&index_file, (uchar*) "\n", 1) ||
3772 flush_io_cache(&index_file) ||
3773 mysql_file_sync(index_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))
3774 goto err;
3775
3776 #ifdef HAVE_REPLICATION
3777 DBUG_EXECUTE_IF("crash_create_after_update_index", DBUG_SUICIDE(););
3778 #endif
3779 }
3780 }
3781
3782 if (!is_relay_log)
3783 {
3784 /*
3785 Now the file was created successfully, so we can link in the entry for
3786 the new binlog file in binlog_xid_count_list.
3787 */
3788 mysql_mutex_lock(&LOCK_xid_list);
3789 ++current_binlog_id;
3790 new_xid_list_entry->binlog_id= current_binlog_id;
3791 /* Remove any initial entries with no pending XIDs. */
3792 while ((b= binlog_xid_count_list.head()) && b->xid_count == 0)
3793 {
3794 WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::open(): Removing xid_list_entry for "
3795 "%s (%lu)", b);
3796 delete binlog_xid_count_list.get();
3797 }
3798 mysql_cond_broadcast(&COND_xid_list);
3799 WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::open(): Adding new xid_list_entry for "
3800 "%s (%lu)", new_xid_list_entry);
3801 binlog_xid_count_list.push_back(new_xid_list_entry);
3802 mysql_mutex_unlock(&LOCK_xid_list);
3803
3804 /*
3805 Now that we have synced a new binlog file with an initial Gtid_list
3806 event, it is safe to delete the binlog state file. We will write out
3807 a new, updated file at shutdown, and if we crash before we can recover
3808 the state from the newly written binlog file.
3809
3810 Since the state file will contain out-of-date data as soon as the first
3811 new GTID is binlogged, it is better to remove it, to avoid any risk of
3812 accidentally reading incorrect data later.
3813 */
3814 if (!state_file_deleted)
3815 {
3816 char buf[FN_REFLEN];
3817 fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
3818 MY_UNPACK_FILENAME);
3819 my_delete(buf, MY_SYNC_DIR);
3820 state_file_deleted= true;
3821 }
3822 }
3823
3824 log_state= LOG_OPENED;
3825
3826 #ifdef HAVE_REPLICATION
3827 close_purge_index_file();
3828 #endif
3829
3830 /* Notify the io thread that binlog is rotated to a new file */
3831 if (is_relay_log)
3832 signal_relay_log_update();
3833 else
3834 update_binlog_end_pos();
3835 DBUG_RETURN(0);
3836
3837 err:
3838 int tmp_errno= errno;
3839 #ifdef HAVE_REPLICATION
3840 if (is_inited_purge_index_file())
3841 purge_index_entry(NULL, NULL, need_mutex);
3842 close_purge_index_file();
3843 #endif
3844 sql_print_error(fatal_log_error, (name) ? name : log_name, tmp_errno);
3845 if (new_xid_list_entry)
3846 delete new_xid_list_entry;
3847 if (file >= 0)
3848 mysql_file_close(file, MYF(0));
3849 close(LOG_CLOSE_INDEX);
3850 DBUG_RETURN(1);
3851 }
3852
3853
get_current_log(LOG_INFO * linfo)3854 int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo)
3855 {
3856 mysql_mutex_lock(&LOCK_log);
3857 int ret = raw_get_current_log(linfo);
3858 mysql_mutex_unlock(&LOCK_log);
3859 return ret;
3860 }
3861
raw_get_current_log(LOG_INFO * linfo)3862 int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
3863 {
3864 mysql_mutex_assert_owner(&LOCK_log);
3865 strmake_buf(linfo->log_file_name, log_file_name);
3866 linfo->pos = my_b_tell(&log_file);
3867 return 0;
3868 }
3869
3870 /**
3871 Move all data up in a file in an filename index file.
3872
3873 We do the copy outside of the IO_CACHE as the cache buffers would just
3874 make things slower and more complicated.
3875 In most cases the copy loop should only do one read.
3876
3877 @param index_file File to move
3878 @param offset Move everything from here to beginning
3879
3880 @note
3881 File will be truncated to be 'offset' shorter or filled up with newlines
3882
3883 @retval
3884 0 ok
3885 */
3886
3887 #ifdef HAVE_REPLICATION
3888
copy_up_file_and_fill(IO_CACHE * index_file,my_off_t offset)3889 static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset)
3890 {
3891 int bytes_read;
3892 my_off_t init_offset= offset;
3893 File file= index_file->file;
3894 uchar io_buf[IO_SIZE*2];
3895 DBUG_ENTER("copy_up_file_and_fill");
3896
3897 for (;; offset+= bytes_read)
3898 {
3899 mysql_file_seek(file, offset, MY_SEEK_SET, MYF(0));
3900 if ((bytes_read= (int) mysql_file_read(file, io_buf, sizeof(io_buf),
3901 MYF(MY_WME)))
3902 < 0)
3903 goto err;
3904 if (!bytes_read)
3905 break; // end of file
3906 mysql_file_seek(file, offset-init_offset, MY_SEEK_SET, MYF(0));
3907 if (mysql_file_write(file, io_buf, bytes_read,
3908 MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
3909 goto err;
3910 }
3911 /* The following will either truncate the file or fill the end with \n' */
3912 if (mysql_file_chsize(file, offset - init_offset, '\n', MYF(MY_WME)) ||
3913 mysql_file_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE)))
3914 goto err;
3915
3916 /* Reset data in old index cache */
3917 reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 1);
3918 DBUG_RETURN(0);
3919
3920 err:
3921 DBUG_RETURN(1);
3922 }
3923
3924 #endif /* HAVE_REPLICATION */
3925
3926 /**
3927 Find the position in the log-index-file for the given log name.
3928
3929 @param linfo Store here the found log file name and position to
3930 the NEXT log file name in the index file.
3931 @param log_name Filename to find in the index file.
3932 Is a null pointer if we want to read the first entry
3933 @param need_lock Set this to 1 if the parent doesn't already have a
3934 lock on LOCK_index
3935
3936 @note
3937 On systems without the truncate function the file will end with one or
3938 more empty lines. These will be ignored when reading the file.
3939
3940 @retval
3941 0 ok
3942 @retval
3943 LOG_INFO_EOF End of log-index-file found
3944 @retval
3945 LOG_INFO_IO Got IO error while reading file
3946 */
3947
find_log_pos(LOG_INFO * linfo,const char * log_name,bool need_lock)3948 int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
3949 bool need_lock)
3950 {
3951 int error= 0;
3952 char *full_fname= linfo->log_file_name;
3953 char full_log_name[FN_REFLEN], fname[FN_REFLEN];
3954 uint log_name_len= 0, fname_len= 0;
3955 DBUG_ENTER("find_log_pos");
3956 full_log_name[0]= full_fname[0]= 0;
3957
3958 /*
3959 Mutex needed because we need to make sure the file pointer does not
3960 move from under our feet
3961 */
3962 if (need_lock)
3963 mysql_mutex_lock(&LOCK_index);
3964 mysql_mutex_assert_owner(&LOCK_index);
3965
3966 // extend relative paths for log_name to be searched
3967 if (log_name)
3968 {
3969 if(normalize_binlog_name(full_log_name, log_name, is_relay_log))
3970 {
3971 error= LOG_INFO_EOF;
3972 goto end;
3973 }
3974 }
3975
3976 log_name_len= log_name ? (uint) strlen(full_log_name) : 0;
3977 DBUG_PRINT("enter", ("log_name: %s, full_log_name: %s",
3978 log_name ? log_name : "NULL", full_log_name));
3979
3980 /* As the file is flushed, we can't get an error here */
3981 (void) reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 0);
3982
3983 for (;;)
3984 {
3985 size_t length;
3986 my_off_t offset= my_b_tell(&index_file);
3987
3988 DBUG_EXECUTE_IF("simulate_find_log_pos_error",
3989 error= LOG_INFO_EOF; break;);
3990 /* If we get 0 or 1 characters, this is the end of the file */
3991 if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
3992 {
3993 /* Did not find the given entry; Return not found or error */
3994 error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
3995 break;
3996 }
3997 if (fname[length-1] != '\n')
3998 continue; // Not a log entry
3999 fname[length-1]= 0; // Remove end \n
4000
4001 // extend relative paths and match against full path
4002 if (normalize_binlog_name(full_fname, fname, is_relay_log))
4003 {
4004 error= LOG_INFO_EOF;
4005 break;
4006 }
4007 fname_len= (uint) strlen(full_fname);
4008
4009 // if the log entry matches, null string matching anything
4010 if (!log_name ||
4011 (log_name_len == fname_len &&
4012 !strncmp(full_fname, full_log_name, log_name_len)))
4013 {
4014 DBUG_PRINT("info", ("Found log file entry"));
4015 linfo->index_file_start_offset= offset;
4016 linfo->index_file_offset = my_b_tell(&index_file);
4017 break;
4018 }
4019 }
4020
4021 end:
4022 if (need_lock)
4023 mysql_mutex_unlock(&LOCK_index);
4024 DBUG_RETURN(error);
4025 }
4026
4027
4028 /**
4029 Find the position in the log-index-file for the given log name.
4030
4031 @param
4032 linfo Store here the next log file name and position to
4033 the file name after that.
4034 @param
4035 need_lock Set this to 1 if the parent doesn't already have a
4036 lock on LOCK_index
4037
4038 @note
4039 - Before calling this function, one has to call find_log_pos()
4040 to set up 'linfo'
4041 - Mutex needed because we need to make sure the file pointer does not move
4042 from under our feet
4043
4044 @retval
4045 0 ok
4046 @retval
4047 LOG_INFO_EOF End of log-index-file found
4048 @retval
4049 LOG_INFO_IO Got IO error while reading file
4050 */
4051
find_next_log(LOG_INFO * linfo,bool need_lock)4052 int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock)
4053 {
4054 int error= 0;
4055 size_t length;
4056 char fname[FN_REFLEN];
4057 char *full_fname= linfo->log_file_name;
4058
4059 if (need_lock)
4060 mysql_mutex_lock(&LOCK_index);
4061 mysql_mutex_assert_owner(&LOCK_index);
4062
4063 /* As the file is flushed, we can't get an error here */
4064 (void) reinit_io_cache(&index_file, READ_CACHE, linfo->index_file_offset, 0,
4065 0);
4066
4067 linfo->index_file_start_offset= linfo->index_file_offset;
4068 if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
4069 {
4070 error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
4071 goto err;
4072 }
4073
4074 if (fname[0] != 0)
4075 {
4076 if(normalize_binlog_name(full_fname, fname, is_relay_log))
4077 {
4078 error= LOG_INFO_EOF;
4079 goto err;
4080 }
4081 length= strlen(full_fname);
4082 }
4083
4084 full_fname[length-1]= 0; // kill \n
4085 linfo->index_file_offset= my_b_tell(&index_file);
4086
4087 err:
4088 if (need_lock)
4089 mysql_mutex_unlock(&LOCK_index);
4090 return error;
4091 }
4092
4093
4094 /**
4095 Delete all logs referred to in the index file.
4096
4097 The new index file will only contain this file.
4098
4099 @param thd Thread id. This can be zero in case of resetting
4100 relay logs
4101 @param create_new_log 1 if we should start writing to a new log file
4102 @param next_log_number min number of next log file to use, if possible.
4103
4104 @note
4105 If not called from slave thread, write start event to new log
4106
4107 @retval
4108 0 ok
4109 @retval
4110 1 error
4111 */
4112
reset_logs(THD * thd,bool create_new_log,rpl_gtid * init_state,uint32 init_state_len,ulong next_log_number)4113 bool MYSQL_BIN_LOG::reset_logs(THD *thd, bool create_new_log,
4114 rpl_gtid *init_state, uint32 init_state_len,
4115 ulong next_log_number)
4116 {
4117 LOG_INFO linfo;
4118 bool error=0;
4119 int err;
4120 const char* save_name;
4121 DBUG_ENTER("reset_logs");
4122
4123 if (!is_relay_log)
4124 {
4125 if (init_state && !is_empty_state())
4126 {
4127 my_error(ER_BINLOG_MUST_BE_EMPTY, MYF(0));
4128 DBUG_RETURN(1);
4129 }
4130
4131 /*
4132 Mark that a RESET MASTER is in progress.
4133 This ensures that a binlog checkpoint will not try to write binlog
4134 checkpoint events, which would be useless (as we are deleting the binlog
4135 anyway) and could deadlock, as we are holding LOCK_log.
4136
4137 Wait for any mark_xid_done() calls that might be already running to
4138 complete (mark_xid_done_waiting counter to drop to zero); we need to
4139 do this before we take the LOCK_log to not deadlock.
4140 */
4141 mysql_mutex_lock(&LOCK_xid_list);
4142 reset_master_pending++;
4143 while (mark_xid_done_waiting > 0)
4144 mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4145 mysql_mutex_unlock(&LOCK_xid_list);
4146 }
4147
4148 DEBUG_SYNC_C_IF_THD(thd, "reset_logs_after_set_reset_master_pending");
4149 /*
4150 We need to get both locks to be sure that no one is trying to
4151 write to the index log file.
4152 */
4153 mysql_mutex_lock(&LOCK_log);
4154 mysql_mutex_lock(&LOCK_index);
4155
4156 if (!is_relay_log)
4157 {
4158 /*
4159 We are going to nuke all binary log files.
4160 Without binlog, we cannot XA recover prepared-but-not-committed
4161 transactions in engines. So force a commit checkpoint first.
4162
4163 Note that we take and immediately
4164 release LOCK_after_binlog_sync/LOCK_commit_ordered. This has
4165 the effect to ensure that any on-going group commit (in
4166 trx_group_commit_leader()) has completed before we request the checkpoint,
4167 due to the chaining of LOCK_log and LOCK_commit_ordered in that function.
4168 (We are holding LOCK_log, so no new group commit can start).
4169
4170 Without this, it is possible (though perhaps unlikely) that the RESET
4171 MASTER could run in-between the write to the binlog and the
4172 commit_ordered() in the engine of some transaction, and then a crash
4173 later would leave such transaction not recoverable.
4174 */
4175
4176 mysql_mutex_lock(&LOCK_after_binlog_sync);
4177 mysql_mutex_lock(&LOCK_commit_ordered);
4178 mysql_mutex_unlock(&LOCK_after_binlog_sync);
4179 mysql_mutex_unlock(&LOCK_commit_ordered);
4180
4181 mark_xids_active(current_binlog_id, 1);
4182 do_checkpoint_request(current_binlog_id);
4183
4184 /* Now wait for all checkpoint requests and pending unlog() to complete. */
4185 mysql_mutex_lock(&LOCK_xid_list);
4186 for (;;)
4187 {
4188 if (is_xidlist_idle_nolock())
4189 break;
4190 /*
4191 Wait until signalled that one more binlog dropped to zero, then check
4192 again.
4193 */
4194 mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4195 }
4196
4197 /*
4198 Now all XIDs are fully flushed to disk, and we are holding LOCK_log so
4199 no new ones will be written. So we can proceed to delete the logs.
4200 */
4201 mysql_mutex_unlock(&LOCK_xid_list);
4202 }
4203
4204 /* Save variables so that we can reopen the log */
4205 save_name=name;
4206 name=0; // Protect against free
4207 close(LOG_CLOSE_TO_BE_OPENED);
4208
4209 last_used_log_number= 0; // Reset log number cache
4210
4211 /*
4212 First delete all old log files and then update the index file.
4213 As we first delete the log files and do not use sort of logging,
4214 a crash may lead to an inconsistent state where the index has
4215 references to non-existent files.
4216
4217 We need to invert the steps and use the purge_index_file methods
4218 in order to make the operation safe.
4219 */
4220
4221 if ((err= find_log_pos(&linfo, NullS, 0)) != 0)
4222 {
4223 uint errcode= purge_log_get_error_code(err);
4224 sql_print_error("Failed to locate old binlog or relay log files");
4225 my_message(errcode, ER_THD_OR_DEFAULT(thd, errcode), MYF(0));
4226 error= 1;
4227 goto err;
4228 }
4229
4230 for (;;)
4231 {
4232 if (unlikely((error= my_delete(linfo.log_file_name, MYF(0)))))
4233 {
4234 if (my_errno == ENOENT)
4235 {
4236 if (thd)
4237 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4238 ER_LOG_PURGE_NO_FILE,
4239 ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4240 linfo.log_file_name);
4241
4242 sql_print_information("Failed to delete file '%s'",
4243 linfo.log_file_name);
4244 my_errno= 0;
4245 error= 0;
4246 }
4247 else
4248 {
4249 if (thd)
4250 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4251 ER_BINLOG_PURGE_FATAL_ERR,
4252 "a problem with deleting %s; "
4253 "consider examining correspondence "
4254 "of your binlog index file "
4255 "to the actual binlog files",
4256 linfo.log_file_name);
4257 error= 1;
4258 goto err;
4259 }
4260 }
4261 if (find_next_log(&linfo, 0))
4262 break;
4263 }
4264
4265 if (!is_relay_log)
4266 {
4267 if (init_state)
4268 rpl_global_gtid_binlog_state.load(init_state, init_state_len);
4269 else
4270 rpl_global_gtid_binlog_state.reset();
4271 }
4272
4273 /* Start logging with a new file */
4274 close(LOG_CLOSE_INDEX | LOG_CLOSE_TO_BE_OPENED);
4275 // Reset (open will update)
4276 if (unlikely((error= my_delete(index_file_name, MYF(0)))))
4277 {
4278 if (my_errno == ENOENT)
4279 {
4280 if (thd)
4281 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4282 ER_LOG_PURGE_NO_FILE,
4283 ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4284 index_file_name);
4285 sql_print_information("Failed to delete file '%s'",
4286 index_file_name);
4287 my_errno= 0;
4288 error= 0;
4289 }
4290 else
4291 {
4292 if (thd)
4293 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4294 ER_BINLOG_PURGE_FATAL_ERR,
4295 "a problem with deleting %s; "
4296 "consider examining correspondence "
4297 "of your binlog index file "
4298 "to the actual binlog files",
4299 index_file_name);
4300 error= 1;
4301 goto err;
4302 }
4303 }
4304 if (create_new_log && !open_index_file(index_file_name, 0, FALSE))
4305 if (unlikely((error= open(save_name, log_type, 0, next_log_number,
4306 io_cache_type, max_size, 0, FALSE))))
4307 goto err;
4308 my_free((void *) save_name);
4309
4310 err:
4311 if (error == 1)
4312 name= const_cast<char*>(save_name);
4313
4314 if (!is_relay_log)
4315 {
4316 xid_count_per_binlog *b;
4317 /*
4318 Remove all entries in the xid_count list except the last.
4319 Normally we will just be deleting all the entries that we waited for to
4320 drop to zero above. But if we fail during RESET MASTER for some reason
4321 then we will not have created any new log file, and we may keep the last
4322 of the old entries.
4323 */
4324 mysql_mutex_lock(&LOCK_xid_list);
4325 for (;;)
4326 {
4327 b= binlog_xid_count_list.head();
4328 DBUG_ASSERT(b /* List can never become empty. */);
4329 if (b->binlog_id == current_binlog_id)
4330 break;
4331 DBUG_ASSERT(b->xid_count == 0);
4332 WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::reset_logs(): Removing "
4333 "xid_list_entry for %s (%lu)", b);
4334 delete binlog_xid_count_list.get();
4335 }
4336 mysql_cond_broadcast(&COND_xid_list);
4337 reset_master_pending--;
4338 reset_master_count++;
4339 mysql_mutex_unlock(&LOCK_xid_list);
4340 }
4341
4342 mysql_mutex_unlock(&LOCK_index);
4343 mysql_mutex_unlock(&LOCK_log);
4344 DBUG_RETURN(error);
4345 }
4346
4347
wait_for_last_checkpoint_event()4348 void MYSQL_BIN_LOG::wait_for_last_checkpoint_event()
4349 {
4350 mysql_mutex_lock(&LOCK_xid_list);
4351 for (;;)
4352 {
4353 if (binlog_xid_count_list.is_last(binlog_xid_count_list.head()))
4354 break;
4355 mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4356 }
4357 mysql_mutex_unlock(&LOCK_xid_list);
4358
4359 /*
4360 LOCK_xid_list and LOCK_log are chained, so the LOCK_log will only be
4361 obtained after mark_xid_done() has written the last checkpoint event.
4362 */
4363 mysql_mutex_lock(&LOCK_log);
4364 mysql_mutex_unlock(&LOCK_log);
4365 }
4366
4367
4368 /**
4369 Delete relay log files prior to rli->group_relay_log_name
4370 (i.e. all logs which are not involved in a non-finished group
4371 (transaction)), remove them from the index file and start on next
4372 relay log.
4373
4374 IMPLEMENTATION
4375
4376 - You must hold rli->data_lock before calling this function, since
4377 it writes group_relay_log_pos and similar fields of
4378 Relay_log_info.
4379 - Protects index file with LOCK_index
4380 - Delete relevant relay log files
4381 - Copy all file names after these ones to the front of the index file
4382 - If the OS has truncate, truncate the file, else fill it with \n'
4383 - Read the next file name from the index file and store in rli->linfo
4384
4385 @param rli Relay log information
4386 @param included If false, all relay logs that are strictly before
4387 rli->group_relay_log_name are deleted ; if true, the
4388 latter is deleted too (i.e. all relay logs
4389 read by the SQL slave thread are deleted).
4390
4391 @note
4392 - This is only called from the slave SQL thread when it has read
4393 all commands from a relay log and want to switch to a new relay log.
4394 - When this happens, we can be in an active transaction as
4395 a transaction can span over two relay logs
4396 (although it is always written as a single block to the master's binary
4397 log, hence cannot span over two master's binary logs).
4398
4399 @retval
4400 0 ok
4401 @retval
4402 LOG_INFO_EOF End of log-index-file found
4403 @retval
4404 LOG_INFO_SEEK Could not allocate IO cache
4405 @retval
4406 LOG_INFO_IO Got IO error while reading file
4407 */
4408
4409 #ifdef HAVE_REPLICATION
4410
purge_first_log(Relay_log_info * rli,bool included)4411 int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
4412 {
4413 int error, errcode;
4414 char *to_purge_if_included= NULL;
4415 inuse_relaylog *ir;
4416 ulonglong log_space_reclaimed= 0;
4417 DBUG_ENTER("purge_first_log");
4418
4419 DBUG_ASSERT(is_open());
4420 DBUG_ASSERT(rli->slave_running == MYSQL_SLAVE_RUN_NOT_CONNECT);
4421 DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
4422
4423 mysql_mutex_assert_owner(&rli->data_lock);
4424
4425 mysql_mutex_lock(&LOCK_index);
4426
4427 ir= rli->inuse_relaylog_list;
4428 while (ir)
4429 {
4430 inuse_relaylog *next= ir->next;
4431 if (!ir->completed || ir->dequeued_count < ir->queued_count)
4432 {
4433 included= false;
4434 break;
4435 }
4436 if (!included && !strcmp(ir->name, rli->group_relay_log_name))
4437 break;
4438 if (!next)
4439 {
4440 rli->last_inuse_relaylog= NULL;
4441 included= 1;
4442 to_purge_if_included= my_strdup(ir->name, MYF(0));
4443 }
4444 rli->free_inuse_relaylog(ir);
4445 ir= next;
4446 }
4447 rli->inuse_relaylog_list= ir;
4448 if (ir)
4449 to_purge_if_included= my_strdup(ir->name, MYF(0));
4450
4451 /*
4452 Read the next log file name from the index file and pass it back to
4453 the caller.
4454 */
4455 if (unlikely((error=find_log_pos(&rli->linfo, rli->event_relay_log_name,
4456 0))) ||
4457 unlikely((error=find_next_log(&rli->linfo, 0))))
4458 {
4459 sql_print_error("next log error: %d offset: %llu log: %s included: %d",
4460 error, rli->linfo.index_file_offset,
4461 rli->event_relay_log_name, included);
4462 goto err;
4463 }
4464
4465 /*
4466 Reset rli's coordinates to the current log.
4467 */
4468 rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
4469 strmake_buf(rli->event_relay_log_name,rli->linfo.log_file_name);
4470
4471 /*
4472 If we removed the rli->group_relay_log_name file,
4473 we must update the rli->group* coordinates, otherwise do not touch it as the
4474 group's execution is not finished (e.g. COMMIT not executed)
4475 */
4476 if (included)
4477 {
4478 rli->group_relay_log_pos = BIN_LOG_HEADER_SIZE;
4479 strmake_buf(rli->group_relay_log_name,rli->linfo.log_file_name);
4480 rli->notify_group_relay_log_name_update();
4481 }
4482
4483 /* Store where we are in the new file for the execution thread */
4484 if (rli->flush())
4485 error= LOG_INFO_IO;
4486
4487 DBUG_EXECUTE_IF("crash_before_purge_logs", DBUG_SUICIDE(););
4488
4489 rli->relay_log.purge_logs(to_purge_if_included, included,
4490 0, 0, &log_space_reclaimed);
4491
4492 mysql_mutex_lock(&rli->log_space_lock);
4493 rli->log_space_total-= log_space_reclaimed;
4494 mysql_cond_broadcast(&rli->log_space_cond);
4495 mysql_mutex_unlock(&rli->log_space_lock);
4496
4497 /*
4498 * Need to update the log pos because purge logs has been called
4499 * after fetching initially the log pos at the beginning of the method.
4500 */
4501 if ((errcode= find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)))
4502 {
4503 sql_print_error("next log error: %d offset: %llu log: %s included: %d",
4504 errcode, rli->linfo.index_file_offset,
4505 rli->group_relay_log_name, included);
4506 goto err;
4507 }
4508
4509 /* If included was passed, rli->linfo should be the first entry. */
4510 DBUG_ASSERT(!included || rli->linfo.index_file_start_offset == 0);
4511
4512 err:
4513 my_free(to_purge_if_included);
4514 mysql_mutex_unlock(&LOCK_index);
4515 DBUG_RETURN(error);
4516 }
4517
4518 /**
4519 Update log index_file.
4520 */
4521
update_log_index(LOG_INFO * log_info,bool need_update_threads)4522 int MYSQL_BIN_LOG::update_log_index(LOG_INFO* log_info, bool need_update_threads)
4523 {
4524 if (copy_up_file_and_fill(&index_file, log_info->index_file_start_offset))
4525 return LOG_INFO_IO;
4526
4527 // now update offsets in index file for running threads
4528 if (need_update_threads)
4529 adjust_linfo_offsets(log_info->index_file_start_offset);
4530 return 0;
4531 }
4532
4533 /**
4534 Remove all logs before the given log from disk and from the index file.
4535
4536 @param to_log Delete all log file name before this file.
4537 @param included If true, to_log is deleted too.
4538 @param need_mutex
4539 @param need_update_threads If we want to update the log coordinates of
4540 all threads. False for relay logs, true otherwise.
4541 @param reclaimeed_log_space If not null, increment this variable to
4542 the amount of log space freed
4543
4544 @note
4545 If any of the logs before the deleted one is in use,
4546 only purge logs up to this one.
4547
4548 @retval
4549 0 ok
4550 @retval
4551 LOG_INFO_EOF to_log not found
4552 LOG_INFO_EMFILE too many files opened
4553 LOG_INFO_FATAL if any other than ENOENT error from
4554 mysql_file_stat() or mysql_file_delete()
4555 */
4556
purge_logs(const char * to_log,bool included,bool need_mutex,bool need_update_threads,ulonglong * reclaimed_space)4557 int MYSQL_BIN_LOG::purge_logs(const char *to_log,
4558 bool included,
4559 bool need_mutex,
4560 bool need_update_threads,
4561 ulonglong *reclaimed_space)
4562 {
4563 int error= 0;
4564 bool exit_loop= 0;
4565 LOG_INFO log_info;
4566 THD *thd= current_thd;
4567 DBUG_ENTER("purge_logs");
4568 DBUG_PRINT("info",("to_log= %s",to_log));
4569
4570 if (need_mutex)
4571 mysql_mutex_lock(&LOCK_index);
4572 if (unlikely((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/))) )
4573 {
4574 sql_print_error("MYSQL_BIN_LOG::purge_logs was called with file %s not "
4575 "listed in the index.", to_log);
4576 goto err;
4577 }
4578
4579 if (unlikely((error= open_purge_index_file(TRUE))))
4580 {
4581 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to sync the index file.");
4582 goto err;
4583 }
4584
4585 /*
4586 File name exists in index file; delete until we find this file
4587 or a file that is used.
4588 */
4589 if (unlikely((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/))))
4590 goto err;
4591 while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
4592 can_purge_log(log_info.log_file_name))
4593 {
4594 if (unlikely((error= register_purge_index_entry(log_info.log_file_name))))
4595 {
4596 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to copy %s to register file.",
4597 log_info.log_file_name);
4598 goto err;
4599 }
4600
4601 if (find_next_log(&log_info, 0) || exit_loop)
4602 break;
4603 }
4604
4605 DBUG_EXECUTE_IF("crash_purge_before_update_index", DBUG_SUICIDE(););
4606
4607 if (unlikely((error= sync_purge_index_file())))
4608 {
4609 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to flush register file.");
4610 goto err;
4611 }
4612
4613 /* We know how many files to delete. Update index file. */
4614 if (unlikely((error=update_log_index(&log_info, need_update_threads))))
4615 {
4616 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to update the index file");
4617 goto err;
4618 }
4619
4620 DBUG_EXECUTE_IF("crash_purge_critical_after_update_index", DBUG_SUICIDE(););
4621
4622 err:
4623 /* Read each entry from purge_index_file and delete the file. */
4624 if (is_inited_purge_index_file() &&
4625 (error= purge_index_entry(thd, reclaimed_space, FALSE)))
4626 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to process registered files"
4627 " that would be purged.");
4628 close_purge_index_file();
4629
4630 DBUG_EXECUTE_IF("crash_purge_non_critical_after_update_index", DBUG_SUICIDE(););
4631
4632 if (need_mutex)
4633 mysql_mutex_unlock(&LOCK_index);
4634 DBUG_RETURN(error);
4635 }
4636
set_purge_index_file_name(const char * base_file_name)4637 int MYSQL_BIN_LOG::set_purge_index_file_name(const char *base_file_name)
4638 {
4639 int error= 0;
4640 DBUG_ENTER("MYSQL_BIN_LOG::set_purge_index_file_name");
4641 if (fn_format(purge_index_file_name, base_file_name, mysql_data_home,
4642 ".~rec~", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
4643 MY_REPLACE_EXT)) == NULL)
4644 {
4645 error= 1;
4646 sql_print_error("MYSQL_BIN_LOG::set_purge_index_file_name failed to set "
4647 "file name.");
4648 }
4649 DBUG_RETURN(error);
4650 }
4651
open_purge_index_file(bool destroy)4652 int MYSQL_BIN_LOG::open_purge_index_file(bool destroy)
4653 {
4654 int error= 0;
4655 File file= -1;
4656
4657 DBUG_ENTER("MYSQL_BIN_LOG::open_purge_index_file");
4658
4659 if (destroy)
4660 close_purge_index_file();
4661
4662 if (!my_b_inited(&purge_index_file))
4663 {
4664 if ((file= my_open(purge_index_file_name, O_RDWR | O_CREAT | O_BINARY,
4665 MYF(MY_WME))) < 0 ||
4666 init_io_cache(&purge_index_file, file, IO_SIZE,
4667 (destroy ? WRITE_CACHE : READ_CACHE),
4668 0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
4669 {
4670 error= 1;
4671 sql_print_error("MYSQL_BIN_LOG::open_purge_index_file failed to open register "
4672 " file.");
4673 }
4674 }
4675 DBUG_RETURN(error);
4676 }
4677
close_purge_index_file()4678 int MYSQL_BIN_LOG::close_purge_index_file()
4679 {
4680 int error= 0;
4681
4682 DBUG_ENTER("MYSQL_BIN_LOG::close_purge_index_file");
4683
4684 if (my_b_inited(&purge_index_file))
4685 {
4686 end_io_cache(&purge_index_file);
4687 error= my_close(purge_index_file.file, MYF(0));
4688 }
4689 my_delete(purge_index_file_name, MYF(0));
4690 bzero((char*) &purge_index_file, sizeof(purge_index_file));
4691
4692 DBUG_RETURN(error);
4693 }
4694
is_inited_purge_index_file()4695 bool MYSQL_BIN_LOG::is_inited_purge_index_file()
4696 {
4697 return my_b_inited(&purge_index_file);
4698 }
4699
sync_purge_index_file()4700 int MYSQL_BIN_LOG::sync_purge_index_file()
4701 {
4702 int error= 0;
4703 DBUG_ENTER("MYSQL_BIN_LOG::sync_purge_index_file");
4704
4705 if (unlikely((error= flush_io_cache(&purge_index_file))) ||
4706 unlikely((error= my_sync(purge_index_file.file,
4707 MYF(MY_WME | MY_SYNC_FILESIZE)))))
4708 DBUG_RETURN(error);
4709
4710 DBUG_RETURN(error);
4711 }
4712
register_purge_index_entry(const char * entry)4713 int MYSQL_BIN_LOG::register_purge_index_entry(const char *entry)
4714 {
4715 int error= 0;
4716 DBUG_ENTER("MYSQL_BIN_LOG::register_purge_index_entry");
4717
4718 if (unlikely((error=my_b_write(&purge_index_file, (const uchar*)entry,
4719 strlen(entry)))) ||
4720 unlikely((error=my_b_write(&purge_index_file, (const uchar*)"\n", 1))))
4721 DBUG_RETURN (error);
4722
4723 DBUG_RETURN(error);
4724 }
4725
register_create_index_entry(const char * entry)4726 int MYSQL_BIN_LOG::register_create_index_entry(const char *entry)
4727 {
4728 DBUG_ENTER("MYSQL_BIN_LOG::register_create_index_entry");
4729 DBUG_RETURN(register_purge_index_entry(entry));
4730 }
4731
purge_index_entry(THD * thd,ulonglong * reclaimed_space,bool need_mutex)4732 int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *reclaimed_space,
4733 bool need_mutex)
4734 {
4735 DBUG_ENTER("MYSQL_BIN_LOG:purge_index_entry");
4736 MY_STAT s;
4737 int error= 0;
4738 LOG_INFO log_info;
4739 LOG_INFO check_log_info;
4740
4741 DBUG_ASSERT(my_b_inited(&purge_index_file));
4742
4743 if (unlikely((error= reinit_io_cache(&purge_index_file, READ_CACHE, 0, 0,
4744 0))))
4745 {
4746 sql_print_error("MYSQL_BIN_LOG::purge_index_entry failed to reinit register file "
4747 "for read");
4748 goto err;
4749 }
4750
4751 for (;;)
4752 {
4753 size_t length;
4754
4755 if ((length=my_b_gets(&purge_index_file, log_info.log_file_name,
4756 FN_REFLEN)) <= 1)
4757 {
4758 if (purge_index_file.error)
4759 {
4760 error= purge_index_file.error;
4761 sql_print_error("MYSQL_BIN_LOG::purge_index_entry error %d reading from "
4762 "register file.", error);
4763 goto err;
4764 }
4765
4766 /* Reached EOF */
4767 break;
4768 }
4769
4770 /* Get rid of the trailing '\n' */
4771 log_info.log_file_name[length-1]= 0;
4772
4773 if (unlikely(!mysql_file_stat(m_key_file_log, log_info.log_file_name, &s,
4774 MYF(0))))
4775 {
4776 if (my_errno == ENOENT)
4777 {
4778 /*
4779 It's not fatal if we can't stat a log file that does not exist;
4780 If we could not stat, we won't delete.
4781 */
4782 if (thd)
4783 {
4784 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4785 ER_LOG_PURGE_NO_FILE, ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4786 log_info.log_file_name);
4787 }
4788 sql_print_information("Failed to execute mysql_file_stat on file '%s'",
4789 log_info.log_file_name);
4790 my_errno= 0;
4791 }
4792 else
4793 {
4794 /*
4795 Other than ENOENT are fatal
4796 */
4797 if (thd)
4798 {
4799 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4800 ER_BINLOG_PURGE_FATAL_ERR,
4801 "a problem with getting info on being purged %s; "
4802 "consider examining correspondence "
4803 "of your binlog index file "
4804 "to the actual binlog files",
4805 log_info.log_file_name);
4806 }
4807 else
4808 {
4809 sql_print_information("Failed to delete log file '%s'; "
4810 "consider examining correspondence "
4811 "of your binlog index file "
4812 "to the actual binlog files",
4813 log_info.log_file_name);
4814 }
4815 error= LOG_INFO_FATAL;
4816 goto err;
4817 }
4818 }
4819 else
4820 {
4821 if (unlikely((error= find_log_pos(&check_log_info,
4822 log_info.log_file_name, need_mutex))))
4823 {
4824 if (error != LOG_INFO_EOF)
4825 {
4826 if (thd)
4827 {
4828 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4829 ER_BINLOG_PURGE_FATAL_ERR,
4830 "a problem with deleting %s and "
4831 "reading the binlog index file",
4832 log_info.log_file_name);
4833 }
4834 else
4835 {
4836 sql_print_information("Failed to delete file '%s' and "
4837 "read the binlog index file",
4838 log_info.log_file_name);
4839 }
4840 goto err;
4841 }
4842
4843 error= 0;
4844
4845 DBUG_PRINT("info",("purging %s",log_info.log_file_name));
4846 if (!my_delete(log_info.log_file_name, MYF(0)))
4847 {
4848 if (reclaimed_space)
4849 *reclaimed_space+= s.st_size;
4850 }
4851 else
4852 {
4853 if (my_errno == ENOENT)
4854 {
4855 if (thd)
4856 {
4857 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4858 ER_LOG_PURGE_NO_FILE, ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4859 log_info.log_file_name);
4860 }
4861 sql_print_information("Failed to delete file '%s'",
4862 log_info.log_file_name);
4863 my_errno= 0;
4864 }
4865 else
4866 {
4867 if (thd)
4868 {
4869 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4870 ER_BINLOG_PURGE_FATAL_ERR,
4871 "a problem with deleting %s; "
4872 "consider examining correspondence "
4873 "of your binlog index file "
4874 "to the actual binlog files",
4875 log_info.log_file_name);
4876 }
4877 else
4878 {
4879 sql_print_information("Failed to delete file '%s'; "
4880 "consider examining correspondence "
4881 "of your binlog index file "
4882 "to the actual binlog files",
4883 log_info.log_file_name);
4884 }
4885 if (my_errno == EMFILE)
4886 {
4887 DBUG_PRINT("info",
4888 ("my_errno: %d, set ret = LOG_INFO_EMFILE", my_errno));
4889 error= LOG_INFO_EMFILE;
4890 goto err;
4891 }
4892 error= LOG_INFO_FATAL;
4893 goto err;
4894 }
4895 }
4896 }
4897 }
4898 }
4899
4900 err:
4901 DBUG_RETURN(error);
4902 }
4903
4904 /**
4905 Remove all logs before the given file date from disk and from the
4906 index file.
4907
4908 @param thd Thread pointer
4909 @param purge_time Delete all log files before given date.
4910
4911 @note
4912 If any of the logs before the deleted one is in use,
4913 only purge logs up to this one.
4914
4915 @retval
4916 0 ok
4917 @retval
4918 LOG_INFO_PURGE_NO_ROTATE Binary file that can't be rotated
4919 LOG_INFO_FATAL if any other than ENOENT error from
4920 mysql_file_stat() or mysql_file_delete()
4921 */
4922
purge_logs_before_date(time_t purge_time)4923 int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
4924 {
4925 int error;
4926 char to_log[FN_REFLEN];
4927 LOG_INFO log_info;
4928 MY_STAT stat_area;
4929 THD *thd= current_thd;
4930 DBUG_ENTER("purge_logs_before_date");
4931
4932 mysql_mutex_lock(&LOCK_index);
4933 to_log[0]= 0;
4934
4935 if (unlikely((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/))))
4936 goto err;
4937
4938 while (strcmp(log_file_name, log_info.log_file_name) &&
4939 can_purge_log(log_info.log_file_name))
4940 {
4941 if (!mysql_file_stat(m_key_file_log,
4942 log_info.log_file_name, &stat_area, MYF(0)))
4943 {
4944 if (my_errno == ENOENT)
4945 {
4946 /*
4947 It's not fatal if we can't stat a log file that does not exist.
4948 */
4949 my_errno= 0;
4950 }
4951 else
4952 {
4953 /*
4954 Other than ENOENT are fatal
4955 */
4956 if (thd)
4957 {
4958 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4959 ER_BINLOG_PURGE_FATAL_ERR,
4960 "a problem with getting info on being purged %s; "
4961 "consider examining correspondence "
4962 "of your binlog index file "
4963 "to the actual binlog files",
4964 log_info.log_file_name);
4965 }
4966 else
4967 {
4968 sql_print_information("Failed to delete log file '%s'",
4969 log_info.log_file_name);
4970 }
4971 error= LOG_INFO_FATAL;
4972 goto err;
4973 }
4974 }
4975 else
4976 {
4977 if (stat_area.st_mtime < purge_time)
4978 strmake_buf(to_log, log_info.log_file_name);
4979 else
4980 break;
4981 }
4982 if (find_next_log(&log_info, 0))
4983 break;
4984 }
4985
4986 error= (to_log[0] ? purge_logs(to_log, 1, 0, 1, (ulonglong *) 0) : 0);
4987
4988 err:
4989 mysql_mutex_unlock(&LOCK_index);
4990 DBUG_RETURN(error);
4991 }
4992
4993
4994 bool
can_purge_log(const char * log_file_name_arg)4995 MYSQL_BIN_LOG::can_purge_log(const char *log_file_name_arg)
4996 {
4997 xid_count_per_binlog *b;
4998
4999 if (is_active(log_file_name_arg))
5000 return false;
5001 mysql_mutex_lock(&LOCK_xid_list);
5002 {
5003 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
5004 while ((b= it++) &&
5005 0 != strncmp(log_file_name_arg+dirname_length(log_file_name_arg),
5006 b->binlog_name, b->binlog_name_len))
5007 ;
5008 }
5009 mysql_mutex_unlock(&LOCK_xid_list);
5010 if (b)
5011 return false;
5012 return !log_in_use(log_file_name_arg);
5013 }
5014 #endif /* HAVE_REPLICATION */
5015
5016
5017 bool
is_xidlist_idle()5018 MYSQL_BIN_LOG::is_xidlist_idle()
5019 {
5020 bool res;
5021 mysql_mutex_lock(&LOCK_xid_list);
5022 res= is_xidlist_idle_nolock();
5023 mysql_mutex_unlock(&LOCK_xid_list);
5024 return res;
5025 }
5026
5027
5028 bool
is_xidlist_idle_nolock()5029 MYSQL_BIN_LOG::is_xidlist_idle_nolock()
5030 {
5031 xid_count_per_binlog *b;
5032
5033 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
5034 while ((b= it++))
5035 {
5036 if (b->xid_count > 0)
5037 return false;
5038 }
5039 return true;
5040 }
5041
5042 #ifdef WITH_WSREP
5043 inline bool
is_gtid_cached_internal(IO_CACHE * file)5044 is_gtid_cached_internal(IO_CACHE *file)
5045 {
5046 uchar data[EVENT_TYPE_OFFSET+1];
5047 bool result= false;
5048 my_off_t write_pos= my_b_tell(file);
5049 if (reinit_io_cache(file, READ_CACHE, 0, 0, 0))
5050 return false;
5051 /*
5052 In the cache we have gtid event if , below condition is true,
5053 */
5054 my_b_read(file, data, sizeof(data));
5055 uint event_type= (uchar)data[EVENT_TYPE_OFFSET];
5056 if (event_type == GTID_LOG_EVENT)
5057 result= true;
5058 /*
5059 Cleanup , Why because we have not read the full buffer
5060 and this will cause next to next reinit_io_cache(called in write_cache)
5061 to make cache empty.
5062 */
5063 file->read_pos= file->read_end;
5064 if (reinit_io_cache(file, WRITE_CACHE, write_pos, 0, 0))
5065 return false;
5066 return result;
5067 }
5068 #endif
5069
5070 #ifdef WITH_WSREP
5071 inline bool
is_gtid_cached(THD * thd)5072 MYSQL_BIN_LOG::is_gtid_cached(THD *thd)
5073 {
5074 binlog_cache_mngr *mngr= (binlog_cache_mngr *) thd_get_ha_data(
5075 thd, binlog_hton);
5076 if (!mngr)
5077 return false;
5078 binlog_cache_data *cache_trans= mngr->get_binlog_cache_data(
5079 use_trans_cache(thd, true));
5080 binlog_cache_data *cache_stmt= mngr->get_binlog_cache_data(
5081 use_trans_cache(thd, false));
5082 if (cache_trans && !cache_trans->empty() &&
5083 is_gtid_cached_internal(&cache_trans->cache_log))
5084 return true;
5085 if (cache_stmt && !cache_stmt->empty() &&
5086 is_gtid_cached_internal(&cache_stmt->cache_log))
5087 return true;
5088 return false;
5089 }
5090 #endif
5091 /**
5092 Create a new log file name.
5093
5094 @param buf buf of at least FN_REFLEN where new name is stored
5095
5096 @note
5097 If file name will be longer then FN_REFLEN it will be truncated
5098 */
5099
make_log_name(char * buf,const char * log_ident)5100 void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
5101 {
5102 size_t dir_len = dirname_length(log_file_name);
5103 if (dir_len >= FN_REFLEN)
5104 dir_len=FN_REFLEN-1;
5105 strnmov(buf, log_file_name, dir_len);
5106 strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
5107 }
5108
5109
5110 /**
5111 Check if we are writing/reading to the given log file.
5112 */
5113
is_active(const char * log_file_name_arg)5114 bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
5115 {
5116 /**
5117 * there should/must be mysql_mutex_assert_owner(&LOCK_log) here...
5118 * but code violates this! (scary monsters and super creeps!)
5119 *
5120 * example stacktrace:
5121 * #8 MYSQL_BIN_LOG::is_active
5122 * #9 MYSQL_BIN_LOG::can_purge_log
5123 * #10 MYSQL_BIN_LOG::purge_logs
5124 * #11 MYSQL_BIN_LOG::purge_first_log
5125 * #12 next_event
5126 * #13 exec_relay_log_event
5127 *
5128 * I didn't investigate if this is ligit...(i.e if my comment is wrong)
5129 */
5130 return !strcmp(log_file_name, log_file_name_arg);
5131 }
5132
5133
5134 /*
5135 Wrappers around new_file_impl to avoid using argument
5136 to control locking. The argument 1) less readable 2) breaks
5137 incapsulation 3) allows external access to the class without
5138 a lock (which is not possible with private new_file_without_locking
5139 method).
5140
5141 @retval
5142 nonzero - error
5143 */
5144
new_file()5145 int MYSQL_BIN_LOG::new_file()
5146 {
5147 int res;
5148 mysql_mutex_lock(&LOCK_log);
5149 res= new_file_impl();
5150 mysql_mutex_unlock(&LOCK_log);
5151 return res;
5152 }
5153
5154 /*
5155 @retval
5156 nonzero - error
5157 */
new_file_without_locking()5158 int MYSQL_BIN_LOG::new_file_without_locking()
5159 {
5160 return new_file_impl();
5161 }
5162
5163
5164 /**
5165 Start writing to a new log file or reopen the old file.
5166
5167 @param need_lock Set to 1 if caller has not locked LOCK_log
5168
5169 @retval
5170 nonzero - error
5171
5172 @note
5173 The new file name is stored last in the index file
5174 */
5175
new_file_impl()5176 int MYSQL_BIN_LOG::new_file_impl()
5177 {
5178 int error= 0, close_on_error= FALSE;
5179 char new_name[FN_REFLEN], *new_name_ptr, *old_name, *file_to_open;
5180 uint close_flag;
5181 bool delay_close= false;
5182 File UNINIT_VAR(old_file);
5183 DBUG_ENTER("MYSQL_BIN_LOG::new_file_impl");
5184
5185 DBUG_ASSERT(log_type == LOG_BIN);
5186 mysql_mutex_assert_owner(&LOCK_log);
5187
5188 if (!is_open())
5189 {
5190 DBUG_PRINT("info",("log is closed"));
5191 DBUG_RETURN(error);
5192 }
5193
5194 mysql_mutex_lock(&LOCK_index);
5195
5196 /* Reuse old name if not binlog and not update log */
5197 new_name_ptr= name;
5198
5199 /*
5200 If user hasn't specified an extension, generate a new log name
5201 We have to do this here and not in open as we want to store the
5202 new file name in the current binary log file.
5203 */
5204 if (unlikely((error= generate_new_name(new_name, name, 0))))
5205 {
5206 #ifdef ENABLE_AND_FIX_HANG
5207 close_on_error= TRUE;
5208 #endif
5209 goto end2;
5210 }
5211 new_name_ptr=new_name;
5212
5213 if (log_type == LOG_BIN)
5214 {
5215 {
5216 /*
5217 We log the whole file name for log file as the user may decide
5218 to change base names at some point.
5219 */
5220 Rotate_log_event r(new_name+dirname_length(new_name), 0, LOG_EVENT_OFFSET,
5221 is_relay_log ? Rotate_log_event::RELAY_LOG : 0);
5222 /*
5223 The current relay-log's closing Rotate event must have checksum
5224 value computed with an algorithm of the last relay-logged FD event.
5225 */
5226 if (is_relay_log)
5227 r.checksum_alg= relay_log_checksum_alg;
5228 DBUG_ASSERT(!is_relay_log || relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
5229 if(DBUG_EVALUATE_IF("fault_injection_new_file_rotate_event", (error=close_on_error=TRUE), FALSE) ||
5230 (error= write_event(&r)))
5231 {
5232 DBUG_EXECUTE_IF("fault_injection_new_file_rotate_event", errno=2;);
5233 close_on_error= TRUE;
5234 my_printf_error(ER_ERROR_ON_WRITE,
5235 ER_THD_OR_DEFAULT(current_thd, ER_CANT_OPEN_FILE),
5236 MYF(ME_FATAL), name, errno);
5237 goto end;
5238 }
5239 bytes_written += r.data_written;
5240 }
5241 }
5242
5243 /*
5244 Update needs to be signalled even if there is no rotate event
5245 log rotation should give the waiting thread a signal to
5246 discover EOF and move on to the next log.
5247 */
5248 if (unlikely((error= flush_io_cache(&log_file))))
5249 {
5250 close_on_error= TRUE;
5251 goto end;
5252 }
5253 update_binlog_end_pos();
5254
5255 old_name=name;
5256 name=0; // Don't free name
5257 close_flag= LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX;
5258 if (!is_relay_log)
5259 {
5260 /*
5261 We need to keep the old binlog file open (and marked as in-use) until
5262 the new one is fully created and synced to disk and index. Otherwise we
5263 leave a window where if we crash, there is no binlog file marked as
5264 crashed for server restart to detect the need for recovery.
5265 */
5266 old_file= log_file.file;
5267 close_flag|= LOG_CLOSE_DELAYED_CLOSE;
5268 delay_close= true;
5269 }
5270 close(close_flag);
5271 if (log_type == LOG_BIN && checksum_alg_reset != BINLOG_CHECKSUM_ALG_UNDEF)
5272 {
5273 DBUG_ASSERT(!is_relay_log);
5274 DBUG_ASSERT(binlog_checksum_options != checksum_alg_reset);
5275 binlog_checksum_options= checksum_alg_reset;
5276 }
5277 /*
5278 Note that at this point, log_state != LOG_CLOSED
5279 (important for is_open()).
5280 */
5281
5282 /*
5283 new_file() is only used for rotation (in FLUSH LOGS or because size >
5284 max_binlog_size or max_relay_log_size).
5285 If this is a binary log, the Format_description_log_event at the
5286 beginning of the new file should have created=0 (to distinguish with the
5287 Format_description_log_event written at server startup, which should
5288 trigger temp tables deletion on slaves.
5289 */
5290
5291 /* reopen index binlog file, BUG#34582 */
5292 file_to_open= index_file_name;
5293 error= open_index_file(index_file_name, 0, FALSE);
5294 if (likely(!error))
5295 {
5296 /* reopen the binary log file. */
5297 file_to_open= new_name_ptr;
5298 error= open(old_name, log_type, new_name_ptr, 0, io_cache_type,
5299 max_size, 1, FALSE);
5300 }
5301
5302 /* handle reopening errors */
5303 if (unlikely(error))
5304 {
5305 my_error(ER_CANT_OPEN_FILE, MYF(ME_FATAL), file_to_open, error);
5306 close_on_error= TRUE;
5307 }
5308
5309 my_free(old_name);
5310
5311 end:
5312 /* In case of errors, reuse the last generated log file name */
5313 if (unlikely(error))
5314 {
5315 DBUG_ASSERT(last_used_log_number > 0);
5316 last_used_log_number--;
5317 }
5318
5319 end2:
5320 if (delay_close)
5321 {
5322 clear_inuse_flag_when_closing(old_file);
5323 mysql_file_close(old_file, MYF(MY_WME));
5324 }
5325
5326 if (unlikely(error && close_on_error)) /* rotate or reopen failed */
5327 {
5328 /*
5329 Close whatever was left opened.
5330
5331 We are keeping the behavior as it exists today, ie,
5332 we disable logging and move on (see: BUG#51014).
5333
5334 TODO: as part of WL#1790 consider other approaches:
5335 - kill mysql (safety);
5336 - try multiple locations for opening a log file;
5337 - switch server to protected/readonly mode
5338 - ...
5339 */
5340 close(LOG_CLOSE_INDEX);
5341 sql_print_error(fatal_log_error, new_name_ptr, errno);
5342 }
5343
5344 mysql_mutex_unlock(&LOCK_index);
5345
5346 DBUG_RETURN(error);
5347 }
5348
write_event(Log_event * ev,binlog_cache_data * cache_data,IO_CACHE * file)5349 bool MYSQL_BIN_LOG::write_event(Log_event *ev, binlog_cache_data *cache_data,
5350 IO_CACHE *file)
5351 {
5352 Log_event_writer writer(file, 0, &crypto);
5353 if (crypto.scheme && file == &log_file)
5354 writer.ctx= alloca(crypto.ctx_size);
5355 if (cache_data)
5356 cache_data->add_status(ev->logged_status());
5357 return writer.write(ev);
5358 }
5359
append(Log_event * ev)5360 bool MYSQL_BIN_LOG::append(Log_event *ev)
5361 {
5362 bool res;
5363 mysql_mutex_lock(&LOCK_log);
5364 res= append_no_lock(ev);
5365 mysql_mutex_unlock(&LOCK_log);
5366 return res;
5367 }
5368
5369
append_no_lock(Log_event * ev)5370 bool MYSQL_BIN_LOG::append_no_lock(Log_event* ev)
5371 {
5372 bool error = 0;
5373 DBUG_ENTER("MYSQL_BIN_LOG::append");
5374
5375 mysql_mutex_assert_owner(&LOCK_log);
5376 DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
5377
5378 if (write_event(ev))
5379 {
5380 error=1;
5381 goto err;
5382 }
5383 bytes_written+= ev->data_written;
5384 DBUG_PRINT("info",("max_size: %lu",max_size));
5385 if (flush_and_sync(0))
5386 goto err;
5387 if (my_b_append_tell(&log_file) > max_size)
5388 error= new_file_without_locking();
5389 err:
5390 update_binlog_end_pos();
5391 DBUG_RETURN(error);
5392 }
5393
write_event_buffer(uchar * buf,uint len)5394 bool MYSQL_BIN_LOG::write_event_buffer(uchar* buf, uint len)
5395 {
5396 bool error= 1;
5397 uchar *ebuf= 0;
5398 DBUG_ENTER("MYSQL_BIN_LOG::write_event_buffer");
5399
5400 DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
5401
5402 mysql_mutex_assert_owner(&LOCK_log);
5403
5404 if (crypto.scheme != 0)
5405 {
5406 DBUG_ASSERT(crypto.scheme == 1);
5407
5408 uint elen;
5409 uchar iv[BINLOG_IV_LENGTH];
5410
5411 ebuf= (uchar*)my_safe_alloca(len);
5412 if (!ebuf)
5413 goto err;
5414
5415 crypto.set_iv(iv, (uint32)my_b_append_tell(&log_file));
5416
5417 /*
5418 we want to encrypt everything, excluding the event length:
5419 massage the data before the encryption
5420 */
5421 memcpy(buf + EVENT_LEN_OFFSET, buf, 4);
5422
5423 if (encryption_crypt(buf + 4, len - 4,
5424 ebuf + 4, &elen,
5425 crypto.key, crypto.key_length, iv, sizeof(iv),
5426 ENCRYPTION_FLAG_ENCRYPT | ENCRYPTION_FLAG_NOPAD,
5427 ENCRYPTION_KEY_SYSTEM_DATA, crypto.key_version))
5428 goto err;
5429
5430 DBUG_ASSERT(elen == len - 4);
5431
5432 /* massage the data after the encryption */
5433 memcpy(ebuf, ebuf + EVENT_LEN_OFFSET, 4);
5434 int4store(ebuf + EVENT_LEN_OFFSET, len);
5435
5436 buf= ebuf;
5437 }
5438 if (my_b_append(&log_file, buf, len))
5439 goto err;
5440 bytes_written+= len;
5441
5442 error= 0;
5443 DBUG_PRINT("info",("max_size: %lu",max_size));
5444 if (flush_and_sync(0))
5445 goto err;
5446 if (my_b_append_tell(&log_file) > max_size)
5447 error= new_file_without_locking();
5448 err:
5449 my_safe_afree(ebuf, len);
5450 if (likely(!error))
5451 update_binlog_end_pos();
5452 DBUG_RETURN(error);
5453 }
5454
flush_and_sync(bool * synced)5455 bool MYSQL_BIN_LOG::flush_and_sync(bool *synced)
5456 {
5457 int err=0, fd=log_file.file;
5458 if (synced)
5459 *synced= 0;
5460 mysql_mutex_assert_owner(&LOCK_log);
5461 if (flush_io_cache(&log_file))
5462 return 1;
5463 uint sync_period= get_sync_period();
5464 if (sync_period && ++sync_counter >= sync_period)
5465 {
5466 sync_counter= 0;
5467 err= mysql_file_sync(fd, MYF(MY_WME|MY_SYNC_FILESIZE));
5468 if (synced)
5469 *synced= 1;
5470 #ifndef DBUG_OFF
5471 if (opt_binlog_dbug_fsync_sleep > 0)
5472 my_sleep(opt_binlog_dbug_fsync_sleep);
5473 #endif
5474 }
5475 return err;
5476 }
5477
start_union_events(THD * thd,query_id_t query_id_param)5478 void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param)
5479 {
5480 DBUG_ASSERT(!thd->binlog_evt_union.do_union);
5481 thd->binlog_evt_union.do_union= TRUE;
5482 thd->binlog_evt_union.unioned_events= FALSE;
5483 thd->binlog_evt_union.unioned_events_trans= FALSE;
5484 thd->binlog_evt_union.first_query_id= query_id_param;
5485 }
5486
stop_union_events(THD * thd)5487 void MYSQL_BIN_LOG::stop_union_events(THD *thd)
5488 {
5489 DBUG_ASSERT(thd->binlog_evt_union.do_union);
5490 thd->binlog_evt_union.do_union= FALSE;
5491 }
5492
is_query_in_union(THD * thd,query_id_t query_id_param)5493 bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
5494 {
5495 return (thd->binlog_evt_union.do_union &&
5496 query_id_param >= thd->binlog_evt_union.first_query_id);
5497 }
5498
5499 /**
5500 This function checks if a transactional table was updated by the
5501 current transaction.
5502
5503 @param thd The client thread that executed the current statement.
5504 @return
5505 @c true if a transactional table was updated, @c false otherwise.
5506 */
5507 bool
trans_has_updated_trans_table(const THD * thd)5508 trans_has_updated_trans_table(const THD* thd)
5509 {
5510 binlog_cache_mngr *const cache_mngr=
5511 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5512
5513 return (cache_mngr ? !cache_mngr->trx_cache.empty() : 0);
5514 }
5515
5516 /**
5517 This function checks if a transactional table was updated by the
5518 current statement.
5519
5520 @param thd The client thread that executed the current statement.
5521 @return
5522 @c true if a transactional table was updated, @c false otherwise.
5523 */
5524 bool
stmt_has_updated_trans_table(const THD * thd)5525 stmt_has_updated_trans_table(const THD *thd)
5526 {
5527 Ha_trx_info *ha_info;
5528
5529 for (ha_info= thd->transaction.stmt.ha_list; ha_info;
5530 ha_info= ha_info->next())
5531 {
5532 if (ha_info->is_trx_read_write() && ha_info->ht() != binlog_hton)
5533 return (TRUE);
5534 }
5535 return (FALSE);
5536 }
5537
5538 /**
5539 This function checks if either a trx-cache or a non-trx-cache should
5540 be used. If @c bin_log_direct_non_trans_update is active or the format
5541 is either MIXED or ROW, the cache to be used depends on the flag @c
5542 is_transactional.
5543
5544 On the other hand, if binlog_format is STMT or direct option is
5545 OFF, the trx-cache should be used if and only if the statement is
5546 transactional or the trx-cache is not empty. Otherwise, the
5547 non-trx-cache should be used.
5548
5549 @param thd The client thread.
5550 @param is_transactional The changes are related to a trx-table.
5551 @return
5552 @c true if a trx-cache should be used, @c false otherwise.
5553 */
use_trans_cache(const THD * thd,bool is_transactional)5554 bool use_trans_cache(const THD* thd, bool is_transactional)
5555 {
5556 if (is_transactional)
5557 return 1;
5558 binlog_cache_mngr *const cache_mngr=
5559 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5560
5561 return ((thd->is_current_stmt_binlog_format_row() ||
5562 thd->variables.binlog_direct_non_trans_update) ? 0 :
5563 !cache_mngr->trx_cache.empty());
5564 }
5565
5566 /**
5567 This function checks if a transaction, either a multi-statement
5568 or a single statement transaction is about to commit or not.
5569
5570 @param thd The client thread that executed the current statement.
5571 @param all Committing a transaction (i.e. TRUE) or a statement
5572 (i.e. FALSE).
5573 @return
5574 @c true if committing a transaction, otherwise @c false.
5575 */
ending_trans(THD * thd,const bool all)5576 bool ending_trans(THD* thd, const bool all)
5577 {
5578 return (all || ending_single_stmt_trans(thd, all));
5579 }
5580
5581 /**
5582 This function checks if a single statement transaction is about
5583 to commit or not.
5584
5585 @param thd The client thread that executed the current statement.
5586 @param all Committing a transaction (i.e. TRUE) or a statement
5587 (i.e. FALSE).
5588 @return
5589 @c true if committing a single statement transaction, otherwise
5590 @c false.
5591 */
ending_single_stmt_trans(THD * thd,const bool all)5592 bool ending_single_stmt_trans(THD* thd, const bool all)
5593 {
5594 return (!all && !thd->in_multi_stmt_transaction_mode());
5595 }
5596
5597 /**
5598 This function checks if a non-transactional table was updated by
5599 the current transaction.
5600
5601 @param thd The client thread that executed the current statement.
5602 @return
5603 @c true if a non-transactional table was updated, @c false
5604 otherwise.
5605 */
trans_has_updated_non_trans_table(const THD * thd)5606 bool trans_has_updated_non_trans_table(const THD* thd)
5607 {
5608 return (thd->transaction.all.modified_non_trans_table ||
5609 thd->transaction.stmt.modified_non_trans_table);
5610 }
5611
5612 /**
5613 This function checks if a non-transactional table was updated by the
5614 current statement.
5615
5616 @param thd The client thread that executed the current statement.
5617 @return
5618 @c true if a non-transactional table was updated, @c false otherwise.
5619 */
stmt_has_updated_non_trans_table(const THD * thd)5620 bool stmt_has_updated_non_trans_table(const THD* thd)
5621 {
5622 return (thd->transaction.stmt.modified_non_trans_table);
5623 }
5624
5625 /*
5626 These functions are placed in this file since they need access to
5627 binlog_hton, which has internal linkage.
5628 */
5629
binlog_setup_trx_data()5630 binlog_cache_mngr *THD::binlog_setup_trx_data()
5631 {
5632 DBUG_ENTER("THD::binlog_setup_trx_data");
5633 binlog_cache_mngr *cache_mngr=
5634 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5635
5636 if (cache_mngr)
5637 DBUG_RETURN(cache_mngr); // Already set up
5638
5639 cache_mngr= (binlog_cache_mngr*) my_malloc(sizeof(binlog_cache_mngr), MYF(MY_ZEROFILL));
5640 if (!cache_mngr ||
5641 open_cached_file(&cache_mngr->stmt_cache.cache_log, mysql_tmpdir,
5642 LOG_PREFIX, (size_t)binlog_stmt_cache_size, MYF(MY_WME)) ||
5643 open_cached_file(&cache_mngr->trx_cache.cache_log, mysql_tmpdir,
5644 LOG_PREFIX, (size_t)binlog_cache_size, MYF(MY_WME)))
5645 {
5646 my_free(cache_mngr);
5647 DBUG_RETURN(0); // Didn't manage to set it up
5648 }
5649 thd_set_ha_data(this, binlog_hton, cache_mngr);
5650
5651 cache_mngr= new (cache_mngr)
5652 binlog_cache_mngr(max_binlog_stmt_cache_size,
5653 max_binlog_cache_size,
5654 &binlog_stmt_cache_use,
5655 &binlog_stmt_cache_disk_use,
5656 &binlog_cache_use,
5657 &binlog_cache_disk_use);
5658 DBUG_RETURN(cache_mngr);
5659 }
5660
5661 /*
5662 Function to start a statement and optionally a transaction for the
5663 binary log.
5664
5665 SYNOPSIS
5666 binlog_start_trans_and_stmt()
5667
5668 DESCRIPTION
5669
5670 This function does three things:
5671 - Start a transaction if not in autocommit mode or if a BEGIN
5672 statement has been seen.
5673
5674 - Start a statement transaction to allow us to truncate the cache.
5675
5676 - Save the current binlog position so that we can roll back the
5677 statement by truncating the cache.
5678
5679 We only update the saved position if the old one was undefined,
5680 the reason is that there are some cases (e.g., for CREATE-SELECT)
5681 where the position is saved twice (e.g., both in
5682 select_create::prepare() and THD::binlog_write_table_map()) , but
5683 we should use the first. This means that calls to this function
5684 can be used to start the statement before the first table map
5685 event, to include some extra events.
5686 */
5687
5688 void
binlog_start_trans_and_stmt()5689 THD::binlog_start_trans_and_stmt()
5690 {
5691 binlog_cache_mngr *cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5692 DBUG_ENTER("binlog_start_trans_and_stmt");
5693 DBUG_PRINT("enter", ("cache_mngr: %p cache_mngr->trx_cache.get_prev_position(): %lu",
5694 cache_mngr,
5695 (cache_mngr ? (ulong) cache_mngr->trx_cache.get_prev_position() :
5696 (ulong) 0)));
5697
5698 if (cache_mngr == NULL ||
5699 cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF)
5700 {
5701 this->binlog_set_stmt_begin();
5702 bool mstmt_mode= in_multi_stmt_transaction_mode();
5703 #ifdef WITH_WSREP
5704 /*
5705 With wsrep binlog emulation we can skip the rest because the
5706 binlog cache will not be written into binlog. Note however that
5707 because of this the hton callbacks will not get called to clean
5708 up the cache, so this must be done explicitly when the transaction
5709 terminates.
5710 */
5711 if (WSREP_EMULATE_BINLOG_NNULL(this))
5712 {
5713 DBUG_VOID_RETURN;
5714 }
5715 /* Write Gtid
5716 Get domain id only when gtid mode is set
5717 If this event is replicate through a master then ,
5718 we will forward the same gtid another nodes
5719 We have to do this only one time in mysql transaction.
5720 Since this function is called multiple times , We will check for
5721 ha_info->is_started()
5722 */
5723 Ha_trx_info *ha_info;
5724 ha_info= this->ha_data[binlog_hton->slot].ha_info + (mstmt_mode ? 1 : 0);
5725
5726 if (!ha_info->is_started() && wsrep_gtid_mode
5727 && this->variables.gtid_seq_no)
5728 {
5729 binlog_cache_mngr *const cache_mngr=
5730 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5731 binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(1);
5732 IO_CACHE *file= &cache_data->cache_log;
5733 Log_event_writer writer(file, cache_data);
5734 Gtid_log_event gtid_event(this, this->variables.gtid_seq_no,
5735 this->variables.gtid_domain_id,
5736 true, LOG_EVENT_SUPPRESS_USE_F,
5737 true, 0);
5738 gtid_event.server_id= this->variables.server_id;
5739 writer.write(>id_event);
5740 }
5741 #endif
5742 if (mstmt_mode)
5743 trans_register_ha(this, TRUE, binlog_hton);
5744 trans_register_ha(this, FALSE, binlog_hton);
5745 /*
5746 Mark statement transaction as read/write. We never start
5747 a binary log transaction and keep it read-only,
5748 therefore it's best to mark the transaction read/write just
5749 at the same time we start it.
5750 Not necessary to mark the normal transaction read/write
5751 since the statement-level flag will be propagated automatically
5752 inside ha_commit_trans.
5753 */
5754 ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
5755 }
5756 DBUG_VOID_RETURN;
5757 }
5758
binlog_set_stmt_begin()5759 void THD::binlog_set_stmt_begin() {
5760 binlog_cache_mngr *cache_mngr=
5761 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5762
5763 /*
5764 The call to binlog_trans_log_savepos() might create the cache_mngr
5765 structure, if it didn't exist before, so we save the position
5766 into an auto variable and then write it into the transaction
5767 data for the binary log (i.e., cache_mngr).
5768 */
5769 my_off_t pos= 0;
5770 binlog_trans_log_savepos(this, &pos);
5771 cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5772 cache_mngr->trx_cache.set_prev_position(pos);
5773 }
5774
5775 static int
binlog_start_consistent_snapshot(handlerton * hton,THD * thd)5776 binlog_start_consistent_snapshot(handlerton *hton, THD *thd)
5777 {
5778 int err= 0;
5779 DBUG_ENTER("binlog_start_consistent_snapshot");
5780
5781 binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
5782
5783 /* Server layer calls us with LOCK_commit_ordered locked, so this is safe. */
5784 mysql_mutex_assert_owner(&LOCK_commit_ordered);
5785 strmake_buf(cache_mngr->last_commit_pos_file, mysql_bin_log.last_commit_pos_file);
5786 cache_mngr->last_commit_pos_offset= mysql_bin_log.last_commit_pos_offset;
5787
5788 trans_register_ha(thd, TRUE, hton);
5789
5790 DBUG_RETURN(err);
5791 }
5792
5793 /**
5794 This function writes a table map to the binary log.
5795 Note that in order to keep the signature uniform with related methods,
5796 we use a redundant parameter to indicate whether a transactional table
5797 was changed or not.
5798
5799 If with_annotate != NULL and
5800 *with_annotate = TRUE write also Annotate_rows before the table map.
5801
5802 @param table a pointer to the table.
5803 @param is_transactional @c true indicates a transactional table,
5804 otherwise @c false a non-transactional.
5805 @return
5806 nonzero if an error pops up when writing the table map event.
5807 */
binlog_write_table_map(TABLE * table,bool is_transactional,my_bool * with_annotate)5808 int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
5809 my_bool *with_annotate)
5810 {
5811 int error;
5812 DBUG_ENTER("THD::binlog_write_table_map");
5813 DBUG_PRINT("enter", ("table: %p (%s: #%lu)",
5814 table, table->s->table_name.str,
5815 table->s->table_map_id));
5816
5817 /* Ensure that all events in a GTID group are in the same cache */
5818 if (variables.option_bits & OPTION_GTID_BEGIN)
5819 is_transactional= 1;
5820
5821 /* Pre-conditions */
5822 DBUG_ASSERT(is_current_stmt_binlog_format_row());
5823 DBUG_ASSERT(WSREP_EMULATE_BINLOG_NNULL(this) || mysql_bin_log.is_open());
5824 DBUG_ASSERT(table->s->table_map_id != ULONG_MAX);
5825
5826 Table_map_log_event
5827 the_event(this, table, table->s->table_map_id, is_transactional);
5828
5829 if (binlog_table_maps == 0)
5830 binlog_start_trans_and_stmt();
5831
5832 binlog_cache_mngr *const cache_mngr=
5833 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5834 binlog_cache_data *cache_data= (cache_mngr->
5835 get_binlog_cache_data(is_transactional));
5836 IO_CACHE *file= &cache_data->cache_log;
5837 Log_event_writer writer(file, cache_data);
5838
5839 if (with_annotate && *with_annotate)
5840 {
5841 Annotate_rows_log_event anno(table->in_use, is_transactional, false);
5842 /* Annotate event should be written not more than once */
5843 *with_annotate= 0;
5844 if (unlikely((error= writer.write(&anno))))
5845 {
5846 if (my_errno == EFBIG)
5847 cache_data->set_incident();
5848 DBUG_RETURN(error);
5849 }
5850 }
5851 if (unlikely((error= writer.write(&the_event))))
5852 DBUG_RETURN(error);
5853
5854 binlog_table_maps++;
5855 DBUG_RETURN(0);
5856 }
5857
5858 /**
5859 This function retrieves a pending row event from a cache which is
5860 specified through the parameter @c is_transactional. Respectively, when it
5861 is @c true, the pending event is returned from the transactional cache.
5862 Otherwise from the non-transactional cache.
5863
5864 @param is_transactional @c true indicates a transactional cache,
5865 otherwise @c false a non-transactional.
5866 @return
5867 The row event if any.
5868 */
5869 Rows_log_event*
binlog_get_pending_rows_event(bool is_transactional) const5870 THD::binlog_get_pending_rows_event(bool is_transactional) const
5871 {
5872 Rows_log_event* rows= NULL;
5873 binlog_cache_mngr *const cache_mngr=
5874 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5875
5876 /*
5877 This is less than ideal, but here's the story: If there is no cache_mngr,
5878 prepare_pending_rows_event() has never been called (since the cache_mngr
5879 is set up there). In that case, we just return NULL.
5880 */
5881 if (cache_mngr)
5882 {
5883 binlog_cache_data *cache_data=
5884 cache_mngr->get_binlog_cache_data(use_trans_cache(this, is_transactional));
5885
5886 rows= cache_data->pending();
5887 }
5888 return (rows);
5889 }
5890
5891 /**
5892 This function stores a pending row event into a cache which is specified
5893 through the parameter @c is_transactional. Respectively, when it is @c
5894 true, the pending event is stored into the transactional cache. Otherwise
5895 into the non-transactional cache.
5896
5897 @param evt a pointer to the row event.
5898 @param is_transactional @c true indicates a transactional cache,
5899 otherwise @c false a non-transactional.
5900 */
5901 void
binlog_set_pending_rows_event(Rows_log_event * ev,bool is_transactional)5902 THD::binlog_set_pending_rows_event(Rows_log_event* ev, bool is_transactional)
5903 {
5904 binlog_cache_mngr *const cache_mngr= binlog_setup_trx_data();
5905
5906 DBUG_ASSERT(cache_mngr);
5907
5908 binlog_cache_data *cache_data=
5909 cache_mngr->get_binlog_cache_data(use_trans_cache(this, is_transactional));
5910
5911 cache_data->set_pending(ev);
5912 }
5913
5914
5915 /**
5916 This function removes the pending rows event, discarding any outstanding
5917 rows. If there is no pending rows event available, this is effectively a
5918 no-op.
5919
5920 @param thd a pointer to the user thread.
5921 @param is_transactional @c true indicates a transactional cache,
5922 otherwise @c false a non-transactional.
5923 */
5924 int
remove_pending_rows_event(THD * thd,bool is_transactional)5925 MYSQL_BIN_LOG::remove_pending_rows_event(THD *thd, bool is_transactional)
5926 {
5927 DBUG_ENTER("MYSQL_BIN_LOG::remove_pending_rows_event");
5928
5929 binlog_cache_mngr *const cache_mngr=
5930 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5931
5932 DBUG_ASSERT(cache_mngr);
5933
5934 binlog_cache_data *cache_data=
5935 cache_mngr->get_binlog_cache_data(use_trans_cache(thd, is_transactional));
5936
5937 if (Rows_log_event* pending= cache_data->pending())
5938 {
5939 delete pending;
5940 cache_data->set_pending(NULL);
5941 }
5942
5943 DBUG_RETURN(0);
5944 }
5945
5946 /*
5947 Moves the last bunch of rows from the pending Rows event to a cache (either
5948 transactional cache if is_transaction is @c true, or the non-transactional
5949 cache otherwise. Sets a new pending event.
5950
5951 @param thd a pointer to the user thread.
5952 @param evt a pointer to the row event.
5953 @param is_transactional @c true indicates a transactional cache,
5954 otherwise @c false a non-transactional.
5955 */
5956 int
flush_and_set_pending_rows_event(THD * thd,Rows_log_event * event,bool is_transactional)5957 MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
5958 Rows_log_event* event,
5959 bool is_transactional)
5960 {
5961 DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)");
5962 DBUG_ASSERT(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open());
5963 DBUG_PRINT("enter", ("event: %p", event));
5964
5965 binlog_cache_mngr *const cache_mngr=
5966 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5967
5968 DBUG_ASSERT(cache_mngr);
5969
5970 binlog_cache_data *cache_data=
5971 cache_mngr->get_binlog_cache_data(use_trans_cache(thd, is_transactional));
5972
5973 DBUG_PRINT("info", ("cache_mngr->pending(): %p", cache_data->pending()));
5974
5975 if (Rows_log_event* pending= cache_data->pending())
5976 {
5977 Log_event_writer writer(&cache_data->cache_log, cache_data);
5978
5979 /*
5980 Write pending event to the cache.
5981 */
5982 DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
5983 {DBUG_SET("+d,simulate_file_write_error");});
5984 if (writer.write(pending))
5985 {
5986 set_write_error(thd, is_transactional);
5987 if (check_write_error(thd) && cache_data &&
5988 stmt_has_updated_non_trans_table(thd))
5989 cache_data->set_incident();
5990 delete pending;
5991 cache_data->set_pending(NULL);
5992 DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
5993 {DBUG_SET("-d,simulate_file_write_error");});
5994 DBUG_RETURN(1);
5995 }
5996
5997 delete pending;
5998 }
5999
6000 thd->binlog_set_pending_rows_event(event, is_transactional);
6001
6002 DBUG_RETURN(0);
6003 }
6004
6005
6006 /* Generate a new global transaction ID, and write it to the binlog */
6007
6008 bool
write_gtid_event(THD * thd,bool standalone,bool is_transactional,uint64 commit_id)6009 MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone,
6010 bool is_transactional, uint64 commit_id)
6011 {
6012 rpl_gtid gtid;
6013 uint32 domain_id;
6014 uint32 local_server_id;
6015 uint64 seq_no;
6016 int err;
6017 DBUG_ENTER("write_gtid_event");
6018 DBUG_PRINT("enter", ("standalone: %d", standalone));
6019
6020 #ifdef WITH_WSREP
6021 if (WSREP(thd) &&
6022 (wsrep_thd_trx_seqno(thd) > 0) &&
6023 wsrep_gtid_mode && !thd->variables.gtid_seq_no)
6024 {
6025 domain_id= wsrep_gtid_domain_id;
6026 } else {
6027 #endif /* WITH_WSREP */
6028 domain_id= thd->variables.gtid_domain_id;
6029 #ifdef WITH_WSREP
6030 }
6031 #endif /* WITH_WSREP */
6032 local_server_id= thd->variables.server_id;
6033 seq_no= thd->variables.gtid_seq_no;
6034
6035 DBUG_ASSERT(local_server_id != 0);
6036
6037 if (thd->variables.option_bits & OPTION_GTID_BEGIN)
6038 {
6039 DBUG_PRINT("error", ("OPTION_GTID_BEGIN is set. "
6040 "Master and slave will have different GTID values"));
6041 /* Reset the flag, as we will write out a GTID anyway */
6042 thd->variables.option_bits&= ~OPTION_GTID_BEGIN;
6043 }
6044
6045 /*
6046 Reset the session variable gtid_seq_no, to reduce the risk of accidentally
6047 producing a duplicate GTID.
6048 */
6049 thd->variables.gtid_seq_no= 0;
6050 if (seq_no != 0)
6051 {
6052 /* Use the specified sequence number. */
6053 gtid.domain_id= domain_id;
6054 gtid.server_id= local_server_id;
6055 gtid.seq_no= seq_no;
6056 err= rpl_global_gtid_binlog_state.update(>id, opt_gtid_strict_mode);
6057 if (err && thd->get_stmt_da()->sql_errno()==ER_GTID_STRICT_OUT_OF_ORDER)
6058 errno= ER_GTID_STRICT_OUT_OF_ORDER;
6059 }
6060 else
6061 {
6062 /* Allocate the next sequence number for the GTID. */
6063 err= rpl_global_gtid_binlog_state.update_with_next_gtid(domain_id,
6064 local_server_id, >id);
6065 seq_no= gtid.seq_no;
6066 }
6067 if (err)
6068 DBUG_RETURN(true);
6069
6070 thd->set_last_commit_gtid(gtid);
6071
6072 Gtid_log_event gtid_event(thd, seq_no, domain_id, standalone,
6073 LOG_EVENT_SUPPRESS_USE_F, is_transactional,
6074 commit_id);
6075
6076 /* Write the event to the binary log. */
6077 DBUG_ASSERT(this == &mysql_bin_log);
6078
6079 #ifdef WITH_WSREP
6080 if (wsrep_gtid_mode && is_gtid_cached(thd))
6081 DBUG_RETURN(false);
6082 #endif
6083
6084 if (write_event(>id_event))
6085 DBUG_RETURN(true);
6086 status_var_add(thd->status_var.binlog_bytes_written, gtid_event.data_written);
6087
6088 DBUG_RETURN(false);
6089 }
6090
6091
6092 int
write_state_to_file()6093 MYSQL_BIN_LOG::write_state_to_file()
6094 {
6095 File file_no;
6096 IO_CACHE cache;
6097 char buf[FN_REFLEN];
6098 int err;
6099 bool opened= false;
6100 bool log_inited= false;
6101
6102 fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
6103 MY_UNPACK_FILENAME);
6104 if ((file_no= mysql_file_open(key_file_binlog_state, buf,
6105 O_RDWR|O_CREAT|O_TRUNC|O_BINARY,
6106 MYF(MY_WME))) < 0)
6107 {
6108 err= 1;
6109 goto err;
6110 }
6111 opened= true;
6112 if ((err= init_io_cache(&cache, file_no, IO_SIZE, WRITE_CACHE, 0, 0,
6113 MYF(MY_WME|MY_WAIT_IF_FULL))))
6114 goto err;
6115 log_inited= true;
6116 if ((err= rpl_global_gtid_binlog_state.write_to_iocache(&cache)))
6117 goto err;
6118 log_inited= false;
6119 if ((err= end_io_cache(&cache)))
6120 goto err;
6121 if ((err= mysql_file_sync(file_no, MYF(MY_WME|MY_SYNC_FILESIZE))))
6122 goto err;
6123 goto end;
6124
6125 err:
6126 sql_print_error("Error writing binlog state to file '%s'.", buf);
6127 if (log_inited)
6128 end_io_cache(&cache);
6129 end:
6130 if (opened)
6131 mysql_file_close(file_no, MYF(0));
6132
6133 return err;
6134 }
6135
6136
6137 /*
6138 Initialize the binlog state from the master-bin.state file, at server startup.
6139
6140 Returns:
6141 0 for success.
6142 2 for when .state file did not exist.
6143 1 for other error.
6144 */
6145 int
read_state_from_file()6146 MYSQL_BIN_LOG::read_state_from_file()
6147 {
6148 File file_no;
6149 IO_CACHE cache;
6150 char buf[FN_REFLEN];
6151 int err;
6152 bool opened= false;
6153 bool log_inited= false;
6154
6155 fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
6156 MY_UNPACK_FILENAME);
6157 if ((file_no= mysql_file_open(key_file_binlog_state, buf,
6158 O_RDONLY|O_BINARY, MYF(0))) < 0)
6159 {
6160 if (my_errno != ENOENT)
6161 {
6162 err= 1;
6163 goto err;
6164 }
6165 else
6166 {
6167 /*
6168 If the state file does not exist, this is the first server startup
6169 with GTID enabled. So initialize to empty state.
6170 */
6171 rpl_global_gtid_binlog_state.reset();
6172 err= 2;
6173 goto end;
6174 }
6175 }
6176 opened= true;
6177 if ((err= init_io_cache(&cache, file_no, IO_SIZE, READ_CACHE, 0, 0,
6178 MYF(MY_WME|MY_WAIT_IF_FULL))))
6179 goto err;
6180 log_inited= true;
6181 if ((err= rpl_global_gtid_binlog_state.read_from_iocache(&cache)))
6182 goto err;
6183 goto end;
6184
6185 err:
6186 sql_print_error("Error reading binlog GTID state from file '%s'.", buf);
6187 end:
6188 if (log_inited)
6189 end_io_cache(&cache);
6190 if (opened)
6191 mysql_file_close(file_no, MYF(0));
6192
6193 return err;
6194 }
6195
6196
6197 int
get_most_recent_gtid_list(rpl_gtid ** list,uint32 * size)6198 MYSQL_BIN_LOG::get_most_recent_gtid_list(rpl_gtid **list, uint32 *size)
6199 {
6200 return rpl_global_gtid_binlog_state.get_most_recent_gtid_list(list, size);
6201 }
6202
6203
6204 bool
append_state_pos(String * str)6205 MYSQL_BIN_LOG::append_state_pos(String *str)
6206 {
6207 return rpl_global_gtid_binlog_state.append_pos(str);
6208 }
6209
6210
6211 bool
append_state(String * str)6212 MYSQL_BIN_LOG::append_state(String *str)
6213 {
6214 return rpl_global_gtid_binlog_state.append_state(str);
6215 }
6216
6217
6218 bool
is_empty_state()6219 MYSQL_BIN_LOG::is_empty_state()
6220 {
6221 return (rpl_global_gtid_binlog_state.count() == 0);
6222 }
6223
6224
6225 bool
find_in_binlog_state(uint32 domain_id,uint32 server_id_arg,rpl_gtid * out_gtid)6226 MYSQL_BIN_LOG::find_in_binlog_state(uint32 domain_id, uint32 server_id_arg,
6227 rpl_gtid *out_gtid)
6228 {
6229 rpl_gtid *gtid;
6230 if ((gtid= rpl_global_gtid_binlog_state.find(domain_id, server_id_arg)))
6231 *out_gtid= *gtid;
6232 return gtid != NULL;
6233 }
6234
6235
6236 bool
lookup_domain_in_binlog_state(uint32 domain_id,rpl_gtid * out_gtid)6237 MYSQL_BIN_LOG::lookup_domain_in_binlog_state(uint32 domain_id,
6238 rpl_gtid *out_gtid)
6239 {
6240 rpl_gtid *found_gtid;
6241
6242 if ((found_gtid= rpl_global_gtid_binlog_state.find_most_recent(domain_id)))
6243 {
6244 *out_gtid= *found_gtid;
6245 return true;
6246 }
6247
6248 return false;
6249 }
6250
6251
6252 int
bump_seq_no_counter_if_needed(uint32 domain_id,uint64 seq_no)6253 MYSQL_BIN_LOG::bump_seq_no_counter_if_needed(uint32 domain_id, uint64 seq_no)
6254 {
6255 return rpl_global_gtid_binlog_state.bump_seq_no_if_needed(domain_id, seq_no);
6256 }
6257
6258
6259 bool
check_strict_gtid_sequence(uint32 domain_id,uint32 server_id_arg,uint64 seq_no)6260 MYSQL_BIN_LOG::check_strict_gtid_sequence(uint32 domain_id,
6261 uint32 server_id_arg,
6262 uint64 seq_no)
6263 {
6264 return rpl_global_gtid_binlog_state.check_strict_sequence(domain_id,
6265 server_id_arg,
6266 seq_no);
6267 }
6268
6269
6270 /**
6271 Write an event to the binary log. If with_annotate != NULL and
6272 *with_annotate = TRUE write also Annotate_rows before the event
6273 (this should happen only if the event is a Table_map).
6274 */
6275
write(Log_event * event_info,my_bool * with_annotate)6276 bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
6277 {
6278 THD *thd= event_info->thd;
6279 bool error= 1;
6280 binlog_cache_data *cache_data= 0;
6281 bool is_trans_cache= FALSE;
6282 bool using_trans= event_info->use_trans_cache();
6283 bool direct= event_info->use_direct_logging();
6284 ulong UNINIT_VAR(prev_binlog_id);
6285 DBUG_ENTER("MYSQL_BIN_LOG::write(Log_event *)");
6286
6287 /*
6288 When binary logging is not enabled (--log-bin=0), wsrep-patch partially
6289 enables it without opening the binlog file (MYSQL_BIN_LOG::open().
6290 So, avoid writing to binlog file.
6291 */
6292 if (direct &&
6293 (wsrep_emulate_bin_log ||
6294 (WSREP(thd) && !(thd->variables.option_bits & OPTION_BIN_LOG))))
6295 DBUG_RETURN(0);
6296
6297 if (thd->variables.option_bits & OPTION_GTID_BEGIN)
6298 {
6299 DBUG_PRINT("info", ("OPTION_GTID_BEGIN was set"));
6300 /* Wait for commit from binary log before we commit */
6301 direct= 0;
6302 using_trans= 1;
6303 }
6304
6305 if (thd->binlog_evt_union.do_union)
6306 {
6307 /*
6308 In Stored function; Remember that function call caused an update.
6309 We will log the function call to the binary log on function exit
6310 */
6311 thd->binlog_evt_union.unioned_events= TRUE;
6312 thd->binlog_evt_union.unioned_events_trans |= using_trans;
6313 DBUG_RETURN(0);
6314 }
6315
6316 /*
6317 We only end the statement if we are in a top-level statement. If
6318 we are inside a stored function, we do not end the statement since
6319 this will close all tables on the slave. But there can be a special case
6320 where we are inside a stored function/trigger and a SAVEPOINT is being
6321 set in side the stored function/trigger. This SAVEPOINT execution will
6322 force the pending event to be flushed without an STMT_END_F flag. This
6323 will result in a case where following DMLs will be considered as part of
6324 same statement and result in data loss on slave. Hence in this case we
6325 force the end_stmt to be true.
6326 */
6327 bool const end_stmt= (thd->in_sub_stmt && thd->lex->sql_command ==
6328 SQLCOM_SAVEPOINT) ? true :
6329 (thd->locked_tables_mode && thd->lex->requires_prelocking());
6330 if (thd->binlog_flush_pending_rows_event(end_stmt, using_trans))
6331 DBUG_RETURN(error);
6332
6333 /*
6334 In most cases this is only called if 'is_open()' is true; in fact this is
6335 mostly called if is_open() *was* true a few instructions before, but it
6336 could have changed since.
6337 */
6338 /* applier and replayer can skip writing binlog events */
6339 if ((WSREP_EMULATE_BINLOG(thd) &&
6340 IF_WSREP(thd->wsrep_cs().mode() == wsrep::client_state::m_local, 0)) || is_open())
6341 {
6342 my_off_t UNINIT_VAR(my_org_b_tell);
6343 #ifdef HAVE_REPLICATION
6344 /*
6345 In the future we need to add to the following if tests like
6346 "do the involved tables match (to be implemented)
6347 binlog_[wild_]{do|ignore}_table?" (WL#1049)"
6348 */
6349 const char *local_db= event_info->get_db();
6350
6351 bool option_bin_log_flag= (thd->variables.option_bits & OPTION_BIN_LOG);
6352
6353 /*
6354 Log all updates to binlog cache so that they can get replicated to other
6355 nodes. A check has been added to stop them from getting logged into
6356 binary log files.
6357 */
6358 if (WSREP(thd)) option_bin_log_flag= true;
6359
6360 if ((!(option_bin_log_flag)) ||
6361 (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT &&
6362 thd->lex->sql_command != SQLCOM_SAVEPOINT &&
6363 !binlog_filter->db_ok(local_db)))
6364 DBUG_RETURN(0);
6365 #endif /* HAVE_REPLICATION */
6366
6367 IO_CACHE *file= NULL;
6368
6369 if (direct)
6370 {
6371 /* We come here only for incident events */
6372 int res;
6373 uint64 commit_id= 0;
6374 MDL_request mdl_request;
6375 DBUG_PRINT("info", ("direct is set"));
6376 DBUG_ASSERT(!thd->backup_commit_lock);
6377
6378 mdl_request.init(MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, MDL_EXPLICIT);
6379 if (thd->mdl_context.acquire_lock(&mdl_request,
6380 thd->variables.lock_wait_timeout))
6381 DBUG_RETURN(1);
6382 thd->backup_commit_lock= &mdl_request;
6383
6384 if ((res= thd->wait_for_prior_commit()))
6385 {
6386 if (mdl_request.ticket)
6387 thd->mdl_context.release_lock(mdl_request.ticket);
6388 thd->backup_commit_lock= 0;
6389 DBUG_RETURN(res);
6390 }
6391 file= &log_file;
6392 my_org_b_tell= my_b_tell(file);
6393 mysql_mutex_lock(&LOCK_log);
6394 prev_binlog_id= current_binlog_id;
6395 DBUG_EXECUTE_IF("binlog_force_commit_id",
6396 {
6397 const LEX_CSTRING commit_name= { STRING_WITH_LEN("commit_id") };
6398 bool null_value;
6399 user_var_entry *entry=
6400 (user_var_entry*) my_hash_search(&thd->user_vars,
6401 (uchar*) commit_name.str,
6402 commit_name.length);
6403 commit_id= entry->val_int(&null_value);
6404 });
6405 res= write_gtid_event(thd, true, using_trans, commit_id);
6406 if (mdl_request.ticket)
6407 thd->mdl_context.release_lock(mdl_request.ticket);
6408 thd->backup_commit_lock= 0;
6409 if (res)
6410 goto err;
6411 }
6412 else
6413 {
6414 binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
6415 if (!cache_mngr)
6416 goto err;
6417
6418 is_trans_cache= use_trans_cache(thd, using_trans);
6419 cache_data= cache_mngr->get_binlog_cache_data(is_trans_cache);
6420 file= &cache_data->cache_log;
6421
6422 if (thd->lex->stmt_accessed_non_trans_temp_table() && is_trans_cache)
6423 thd->transaction.stmt.mark_modified_non_trans_temp_table();
6424 thd->binlog_start_trans_and_stmt();
6425 }
6426 DBUG_PRINT("info",("event type: %d",event_info->get_type_code()));
6427
6428 /*
6429 No check for auto events flag here - this write method should
6430 never be called if auto-events are enabled.
6431
6432 Write first log events which describe the 'run environment'
6433 of the SQL command. If row-based binlogging, Insert_id, Rand
6434 and other kind of "setting context" events are not needed.
6435 */
6436
6437 if (with_annotate && *with_annotate)
6438 {
6439 DBUG_ASSERT(event_info->get_type_code() == TABLE_MAP_EVENT);
6440 Annotate_rows_log_event anno(thd, using_trans, direct);
6441 /* Annotate event should be written not more than once */
6442 *with_annotate= 0;
6443 if (write_event(&anno, cache_data, file))
6444 goto err;
6445 }
6446
6447 {
6448 if (!thd->is_current_stmt_binlog_format_row())
6449 {
6450 if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
6451 {
6452 Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
6453 thd->first_successful_insert_id_in_prev_stmt_for_binlog,
6454 using_trans, direct);
6455 if (write_event(&e, cache_data, file))
6456 goto err;
6457 }
6458 if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
6459 {
6460 DBUG_PRINT("info",("number of auto_inc intervals: %u",
6461 thd->auto_inc_intervals_in_cur_stmt_for_binlog.
6462 nb_elements()));
6463 Intvar_log_event e(thd, (uchar) INSERT_ID_EVENT,
6464 thd->auto_inc_intervals_in_cur_stmt_for_binlog.
6465 minimum(), using_trans, direct);
6466 if (write_event(&e, cache_data, file))
6467 goto err;
6468 }
6469 if (thd->rand_used)
6470 {
6471 Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2,
6472 using_trans, direct);
6473 if (write_event(&e, cache_data, file))
6474 goto err;
6475 }
6476 if (thd->user_var_events.elements)
6477 {
6478 for (uint i= 0; i < thd->user_var_events.elements; i++)
6479 {
6480 BINLOG_USER_VAR_EVENT *user_var_event;
6481 get_dynamic(&thd->user_var_events,(uchar*) &user_var_event, i);
6482
6483 /* setting flags for user var log event */
6484 uchar flags= User_var_log_event::UNDEF_F;
6485 if (user_var_event->unsigned_flag)
6486 flags|= User_var_log_event::UNSIGNED_F;
6487
6488 User_var_log_event e(thd, user_var_event->user_var_event->name.str,
6489 user_var_event->user_var_event->name.length,
6490 user_var_event->value,
6491 user_var_event->length,
6492 user_var_event->type,
6493 user_var_event->charset_number,
6494 flags,
6495 using_trans,
6496 direct);
6497 if (write_event(&e, cache_data, file))
6498 goto err;
6499 }
6500 }
6501 }
6502 }
6503
6504 /*
6505 Write the event.
6506 */
6507 if (write_event(event_info, cache_data, file) ||
6508 DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
6509 goto err;
6510
6511 error= 0;
6512 err:
6513 if (direct)
6514 {
6515 my_off_t offset= my_b_tell(file);
6516 bool check_purge= false;
6517 DBUG_ASSERT(!is_relay_log);
6518
6519 if (likely(!error))
6520 {
6521 bool synced;
6522
6523 if ((error= flush_and_sync(&synced)))
6524 {
6525 }
6526 else
6527 {
6528 mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
6529 mysql_mutex_assert_owner(&LOCK_log);
6530 mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync);
6531 mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
6532 #ifdef HAVE_REPLICATION
6533 if (repl_semisync_master.report_binlog_update(thd, log_file_name,
6534 file->pos_in_file))
6535 {
6536 sql_print_error("Failed to run 'after_flush' hooks");
6537 error= 1;
6538 }
6539 else
6540 #endif
6541 {
6542 /*
6543 update binlog_end_pos so it can be read by dump thread
6544 note: must be _after_ the RUN_HOOK(after_flush) or else
6545 semi-sync might not have put the transaction into
6546 it's list before dump-thread tries to send it
6547 */
6548 update_binlog_end_pos(offset);
6549 if (unlikely((error= rotate(false, &check_purge))))
6550 check_purge= false;
6551 }
6552 }
6553 }
6554
6555 status_var_add(thd->status_var.binlog_bytes_written,
6556 offset - my_org_b_tell);
6557
6558 mysql_mutex_lock(&LOCK_after_binlog_sync);
6559 mysql_mutex_unlock(&LOCK_log);
6560
6561 mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
6562 mysql_mutex_assert_not_owner(&LOCK_log);
6563 mysql_mutex_assert_owner(&LOCK_after_binlog_sync);
6564 mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
6565 #ifdef HAVE_REPLICATION
6566 if (repl_semisync_master.wait_after_sync(log_file_name,
6567 file->pos_in_file))
6568 {
6569 error=1;
6570 /* error is already printed inside hook */
6571 }
6572 #endif
6573
6574 /*
6575 Take mutex to protect against a reader seeing partial writes of 64-bit
6576 offset on 32-bit CPUs.
6577 */
6578 mysql_mutex_lock(&LOCK_commit_ordered);
6579 mysql_mutex_unlock(&LOCK_after_binlog_sync);
6580 last_commit_pos_offset= offset;
6581 mysql_mutex_unlock(&LOCK_commit_ordered);
6582
6583 if (check_purge)
6584 checkpoint_and_purge(prev_binlog_id);
6585 }
6586
6587 if (unlikely(error))
6588 {
6589 set_write_error(thd, is_trans_cache);
6590 if (check_write_error(thd) && cache_data &&
6591 stmt_has_updated_non_trans_table(thd))
6592 cache_data->set_incident();
6593 }
6594 }
6595
6596 DBUG_RETURN(error);
6597 }
6598
6599
error_log_print(enum loglevel level,const char * format,va_list args)6600 int error_log_print(enum loglevel level, const char *format,
6601 va_list args)
6602 {
6603 return logger.error_log_print(level, format, args);
6604 }
6605
6606
slow_log_print(THD * thd,const char * query,uint query_length,ulonglong current_utime)6607 bool slow_log_print(THD *thd, const char *query, uint query_length,
6608 ulonglong current_utime)
6609 {
6610 return logger.slow_log_print(thd, query, query_length, current_utime);
6611 }
6612
6613
6614 /**
6615 Decide if we should log the command to general log
6616
6617 @retval
6618 FALSE No logging
6619 TRUE Ok to log
6620 */
6621
log_command(THD * thd,enum enum_server_command command)6622 bool LOGGER::log_command(THD *thd, enum enum_server_command command)
6623 {
6624 /*
6625 Log command if we have at least one log event handler enabled and want
6626 to log this king of commands
6627 */
6628 if (!(*general_log_handler_list && (what_to_log & (1L << (uint) command))))
6629 return FALSE;
6630
6631 /*
6632 If LOG_SLOW_DISABLE_SLAVE is set when slave thread starts, then
6633 OPTION_LOG_OFF is set.
6634 Only the super user can set this bit.
6635 */
6636 return !(thd->variables.option_bits & OPTION_LOG_OFF);
6637 }
6638
6639
general_log_print(THD * thd,enum enum_server_command command,const char * format,...)6640 bool general_log_print(THD *thd, enum enum_server_command command,
6641 const char *format, ...)
6642 {
6643 va_list args;
6644 uint error= 0;
6645
6646 /* Print the message to the buffer if we want to log this kind of commands */
6647 if (! logger.log_command(thd, command))
6648 return FALSE;
6649
6650 va_start(args, format);
6651 error= logger.general_log_print(thd, command, format, args);
6652 va_end(args);
6653
6654 return error;
6655 }
6656
general_log_write(THD * thd,enum enum_server_command command,const char * query,size_t query_length)6657 bool general_log_write(THD *thd, enum enum_server_command command,
6658 const char *query, size_t query_length)
6659 {
6660 /* Write the message to the log if we want to log this king of commands */
6661 if (logger.log_command(thd, command) || mysql_audit_general_enabled())
6662 return logger.general_log_write(thd, command, query, query_length);
6663
6664 return FALSE;
6665 }
6666
6667
6668 static void
binlog_checkpoint_callback(void * cookie)6669 binlog_checkpoint_callback(void *cookie)
6670 {
6671 MYSQL_BIN_LOG::xid_count_per_binlog *entry=
6672 (MYSQL_BIN_LOG::xid_count_per_binlog *)cookie;
6673 /*
6674 For every supporting engine, we increment the xid_count and issue a
6675 commit_checkpoint_request(). Then we can count when all
6676 commit_checkpoint_notify() callbacks have occurred, and then log a new
6677 binlog checkpoint event.
6678 */
6679 mysql_bin_log.mark_xids_active(entry->binlog_id, 1);
6680 }
6681
6682
6683 /*
6684 Request a commit checkpoint from each supporting engine.
6685 This must be called after each binlog rotate, and after LOCK_log has been
6686 released. The xid_count value in the xid_count_per_binlog entry was
6687 incremented by 1 and will be decremented in this function; this ensures
6688 that the entry will not go away early despite LOCK_log not being held.
6689 */
6690 void
do_checkpoint_request(ulong binlog_id)6691 MYSQL_BIN_LOG::do_checkpoint_request(ulong binlog_id)
6692 {
6693 xid_count_per_binlog *entry;
6694
6695 /*
6696 Find the binlog entry, and invoke commit_checkpoint_request() on it in
6697 each supporting storage engine.
6698 */
6699 mysql_mutex_lock(&LOCK_xid_list);
6700 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
6701 do {
6702 entry= it++;
6703 DBUG_ASSERT(entry /* binlog_id is always somewhere in the list. */);
6704 } while (entry->binlog_id != binlog_id);
6705 mysql_mutex_unlock(&LOCK_xid_list);
6706
6707 ha_commit_checkpoint_request(entry, binlog_checkpoint_callback);
6708 /*
6709 When we rotated the binlog, we incremented xid_count to make sure the
6710 entry would not go away until this point, where we have done all necessary
6711 commit_checkpoint_request() calls.
6712 So now we can (and must) decrease the count - when it reaches zero, we
6713 will know that both all pending unlog() and all pending
6714 commit_checkpoint_notify() calls are done, and we can log a new binlog
6715 checkpoint.
6716 */
6717 mark_xid_done(binlog_id, true);
6718 }
6719
6720
6721 /**
6722 The method executes rotation when LOCK_log is already acquired
6723 by the caller.
6724
6725 @param force_rotate caller can request the log rotation
6726 @param check_purge is set to true if rotation took place
6727
6728 @note
6729 Caller _must_ check the check_purge variable. If this is set, it means
6730 that the binlog was rotated, and caller _must_ ensure that
6731 do_checkpoint_request() is called later with the binlog_id of the rotated
6732 binlog file. The call to do_checkpoint_request() must happen after
6733 LOCK_log is released (which is why we cannot simply do it here).
6734 Usually, checkpoint_and_purge() is appropriate, as it will both handle
6735 the checkpointing and any needed purging of old logs.
6736
6737 @note
6738 If rotation fails, for instance the server was unable
6739 to create a new log file, we still try to write an
6740 incident event to the current log.
6741
6742 @retval
6743 nonzero - error in rotating routine.
6744 */
rotate(bool force_rotate,bool * check_purge)6745 int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge)
6746 {
6747 int error= 0;
6748 DBUG_ENTER("MYSQL_BIN_LOG::rotate");
6749
6750 #ifdef WITH_WSREP
6751 if (WSREP_ON && wsrep_to_isolation)
6752 {
6753 *check_purge= false;
6754 WSREP_DEBUG("avoiding binlog rotate due to TO isolation: %d",
6755 wsrep_to_isolation);
6756 DBUG_RETURN(0);
6757 }
6758 #endif /* WITH_WSREP */
6759
6760 //todo: fix the macro def and restore safe_mutex_assert_owner(&LOCK_log);
6761 *check_purge= false;
6762
6763 if (force_rotate || (my_b_tell(&log_file) >= (my_off_t) max_size))
6764 {
6765 ulong binlog_id= current_binlog_id;
6766 /*
6767 We rotate the binlog, so we need to start a commit checkpoint in all
6768 supporting engines - when it finishes, we can log a new binlog checkpoint
6769 event.
6770
6771 But we cannot start the checkpoint here - there could be a group commit
6772 still in progress which needs to be included in the checkpoint, and
6773 besides we do not want to do the (possibly expensive) checkpoint while
6774 LOCK_log is held.
6775
6776 On the other hand, we must be sure that the xid_count entry for the
6777 previous log does not go away until we start the checkpoint - which it
6778 could do as it is no longer the most recent. So we increment xid_count
6779 (to count the pending checkpoint request) - this will fix the entry in
6780 place until we decrement again in do_checkpoint_request().
6781 */
6782 mark_xids_active(binlog_id, 1);
6783
6784 if (unlikely((error= new_file_without_locking())))
6785 {
6786 /**
6787 Be conservative... There are possible lost events (eg,
6788 failing to log the Execute_load_query_log_event
6789 on a LOAD DATA while using a non-transactional
6790 table)!
6791
6792 We give it a shot and try to write an incident event anyway
6793 to the current log.
6794 */
6795 if (!write_incident_already_locked(current_thd))
6796 flush_and_sync(0);
6797
6798 /*
6799 We failed to rotate - so we have to decrement the xid_count back that
6800 we incremented before attempting the rotate.
6801 */
6802 mark_xid_done(binlog_id, false);
6803 }
6804 else
6805 *check_purge= true;
6806 }
6807 DBUG_RETURN(error);
6808 }
6809
6810 /**
6811 The method executes logs purging routine.
6812
6813 @retval
6814 nonzero - error in rotating routine.
6815 */
purge()6816 void MYSQL_BIN_LOG::purge()
6817 {
6818 mysql_mutex_assert_not_owner(&LOCK_log);
6819 #ifdef HAVE_REPLICATION
6820 if (expire_logs_days)
6821 {
6822 DEBUG_SYNC(current_thd, "at_purge_logs_before_date");
6823 time_t purge_time= my_time(0) - expire_logs_days*24*60*60;
6824 if (purge_time >= 0)
6825 {
6826 purge_logs_before_date(purge_time);
6827 }
6828 DEBUG_SYNC(current_thd, "after_purge_logs_before_date");
6829 }
6830 #endif
6831 }
6832
6833
checkpoint_and_purge(ulong binlog_id)6834 void MYSQL_BIN_LOG::checkpoint_and_purge(ulong binlog_id)
6835 {
6836 do_checkpoint_request(binlog_id);
6837 purge();
6838 }
6839
6840
6841 /**
6842 Searches for the first (oldest) binlog file name in in the binlog index.
6843
6844 @param[in,out] buf_arg pointer to a buffer to hold found
6845 the first binary log file name
6846 @return NULL on success, otherwise error message
6847 */
get_first_binlog(char * buf_arg)6848 static const char* get_first_binlog(char* buf_arg)
6849 {
6850 IO_CACHE *index_file;
6851 size_t length;
6852 char fname[FN_REFLEN];
6853 const char* errmsg= NULL;
6854
6855 DBUG_ENTER("get_first_binlog");
6856
6857 DBUG_ASSERT(mysql_bin_log.is_open());
6858
6859 mysql_bin_log.lock_index();
6860
6861 index_file=mysql_bin_log.get_index_file();
6862 if (reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 0))
6863 {
6864 errmsg= "failed to create a cache on binlog index";
6865 goto end;
6866 }
6867 /* The file ends with EOF or empty line */
6868 if ((length=my_b_gets(index_file, fname, sizeof(fname))) <= 1)
6869 {
6870 errmsg= "empty binlog index";
6871 goto end;
6872 }
6873 else
6874 {
6875 fname[length-1]= 0; // Remove end \n
6876 }
6877 if (normalize_binlog_name(buf_arg, fname, false))
6878 {
6879 errmsg= "could not normalize the first file name in the binlog index";
6880 goto end;
6881 }
6882 end:
6883 mysql_bin_log.unlock_index();
6884
6885 DBUG_RETURN(errmsg);
6886 }
6887
6888 /**
6889 Check weather the gtid binlog state can safely remove gtid
6890 domains passed as the argument. A safety condition is satisfied when
6891 there are no events from the being deleted domains in the currently existing
6892 binlog files. Upon successful check the supplied domains are removed
6893 from @@gtid_binlog_state. The caller is supposed to rotate binlog so that
6894 the active latest file won't have the deleted domains in its Gtid_list header.
6895
6896 @param domain_drop_lex gtid domain id sequence from lex.
6897 Passed as a pointer to dynamic array must be not empty
6898 unless pointer value NULL.
6899 @retval zero on success
6900 @retval > 0 ineffective call none from the *non* empty
6901 gtid domain sequence is deleted
6902 @retval < 0 on error
6903 */
do_delete_gtid_domain(DYNAMIC_ARRAY * domain_drop_lex)6904 static int do_delete_gtid_domain(DYNAMIC_ARRAY *domain_drop_lex)
6905 {
6906 int rc= 0;
6907 Gtid_list_log_event *glev= NULL;
6908 char buf[FN_REFLEN];
6909 File file;
6910 IO_CACHE cache;
6911 const char* errmsg= NULL;
6912 char errbuf[MYSQL_ERRMSG_SIZE]= {0};
6913
6914 if (!domain_drop_lex)
6915 return 0; // still "effective" having empty domain sequence to delete
6916
6917 DBUG_ASSERT(domain_drop_lex->elements > 0);
6918 mysql_mutex_assert_owner(mysql_bin_log.get_log_lock());
6919
6920 if ((errmsg= get_first_binlog(buf)) != NULL)
6921 goto end;
6922 bzero((char*) &cache, sizeof(cache));
6923 if ((file= open_binlog(&cache, buf, &errmsg)) == (File) -1)
6924 goto end;
6925 errmsg= get_gtid_list_event(&cache, &glev);
6926 end_io_cache(&cache);
6927 mysql_file_close(file, MYF(MY_WME));
6928
6929 DBUG_EXECUTE_IF("inject_binlog_delete_domain_init_error",
6930 errmsg= "injected error";);
6931 if (errmsg)
6932 goto end;
6933 errmsg= rpl_global_gtid_binlog_state.drop_domain(domain_drop_lex,
6934 glev, errbuf);
6935
6936 end:
6937 if (errmsg)
6938 {
6939 if (strlen(errmsg) > 0)
6940 {
6941 my_error(ER_BINLOG_CANT_DELETE_GTID_DOMAIN, MYF(0), errmsg);
6942 rc= -1;
6943 }
6944 else
6945 {
6946 rc= 1;
6947 }
6948 }
6949 delete glev;
6950
6951 return rc;
6952 }
6953
6954 /**
6955 The method is a shortcut of @c rotate() and @c purge().
6956 LOCK_log is acquired prior to rotate and is released after it.
6957
6958 @param force_rotate caller can request the log rotation
6959
6960 @retval
6961 nonzero - error in rotating routine.
6962 */
rotate_and_purge(bool force_rotate,DYNAMIC_ARRAY * domain_drop_lex)6963 int MYSQL_BIN_LOG::rotate_and_purge(bool force_rotate,
6964 DYNAMIC_ARRAY *domain_drop_lex)
6965 {
6966 int err_gtid=0, error= 0;
6967 ulong prev_binlog_id;
6968 DBUG_ENTER("MYSQL_BIN_LOG::rotate_and_purge");
6969 bool check_purge= false;
6970
6971 mysql_mutex_lock(&LOCK_log);
6972
6973 DEBUG_SYNC(current_thd, "rotate_after_acquire_LOCK_log");
6974
6975 prev_binlog_id= current_binlog_id;
6976
6977 if ((err_gtid= do_delete_gtid_domain(domain_drop_lex)))
6978 {
6979 // inffective attempt to delete merely skips rotate and purge
6980 if (err_gtid < 0)
6981 error= 1; // otherwise error is propagated the user
6982 }
6983 else if (unlikely((error= rotate(force_rotate, &check_purge))))
6984 check_purge= false;
6985
6986 DEBUG_SYNC(current_thd, "rotate_after_rotate");
6987
6988 /*
6989 NOTE: Run purge_logs wo/ holding LOCK_log because it does not need
6990 the mutex. Otherwise causes various deadlocks.
6991 Explicit binlog rotation must be synchronized with a concurrent
6992 binlog ordered commit, in particular not let binlog
6993 checkpoint notification request until early binlogged
6994 concurrent commits have has been completed.
6995 */
6996 mysql_mutex_lock(&LOCK_after_binlog_sync);
6997 mysql_mutex_unlock(&LOCK_log);
6998 mysql_mutex_lock(&LOCK_commit_ordered);
6999 mysql_mutex_unlock(&LOCK_after_binlog_sync);
7000 mysql_mutex_unlock(&LOCK_commit_ordered);
7001
7002 if (check_purge)
7003 checkpoint_and_purge(prev_binlog_id);
7004
7005 DBUG_RETURN(error);
7006 }
7007
next_file_id()7008 uint MYSQL_BIN_LOG::next_file_id()
7009 {
7010 uint res;
7011 mysql_mutex_lock(&LOCK_log);
7012 res = file_id++;
7013 mysql_mutex_unlock(&LOCK_log);
7014 return res;
7015 }
7016
7017 class CacheWriter: public Log_event_writer
7018 {
7019 public:
7020 size_t remains;
7021
CacheWriter(THD * thd_arg,IO_CACHE * file_arg,bool do_checksum,Binlog_crypt_data * cr)7022 CacheWriter(THD *thd_arg, IO_CACHE *file_arg, bool do_checksum,
7023 Binlog_crypt_data *cr)
7024 : Log_event_writer(file_arg, 0, cr), remains(0), thd(thd_arg),
7025 first(true)
7026 { checksum_len= do_checksum ? BINLOG_CHECKSUM_LEN : 0; }
7027
~CacheWriter()7028 ~CacheWriter()
7029 { status_var_add(thd->status_var.binlog_bytes_written, bytes_written); }
7030
write(uchar * pos,size_t len)7031 int write(uchar* pos, size_t len)
7032 {
7033 DBUG_ENTER("CacheWriter::write");
7034 if (first)
7035 write_header(pos, len);
7036 else
7037 write_data(pos, len);
7038
7039 remains -= len;
7040 if ((first= !remains))
7041 write_footer();
7042 DBUG_RETURN(0);
7043 }
7044 private:
7045 THD *thd;
7046 bool first;
7047 };
7048
7049 /*
7050 Write the contents of a cache to the binary log.
7051
7052 SYNOPSIS
7053 write_cache()
7054 thd Current_thread
7055 cache Cache to write to the binary log
7056
7057 DESCRIPTION
7058 Write the contents of the cache to the binary log. The cache will
7059 be reset as a READ_CACHE to be able to read the contents from it.
7060
7061 Reading from the trans cache with possible (per @c binlog_checksum_options)
7062 adding checksum value and then fixing the length and the end_log_pos of
7063 events prior to fill in the binlog cache.
7064 */
7065
write_cache(THD * thd,IO_CACHE * cache)7066 int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache)
7067 {
7068 DBUG_ENTER("MYSQL_BIN_LOG::write_cache");
7069
7070 mysql_mutex_assert_owner(&LOCK_log);
7071 if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
7072 DBUG_RETURN(ER_ERROR_ON_WRITE);
7073 size_t length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
7074 size_t val;
7075 size_t end_log_pos_inc= 0; // each event processed adds BINLOG_CHECKSUM_LEN 2 t
7076 uchar header[LOG_EVENT_HEADER_LEN];
7077 CacheWriter writer(thd, &log_file, binlog_checksum_options, &crypto);
7078
7079 if (crypto.scheme)
7080 writer.ctx= alloca(crypto.ctx_size);
7081
7082 // while there is just one alg the following must hold:
7083 DBUG_ASSERT(binlog_checksum_options == BINLOG_CHECKSUM_ALG_OFF ||
7084 binlog_checksum_options == BINLOG_CHECKSUM_ALG_CRC32);
7085
7086 /*
7087 The events in the buffer have incorrect end_log_pos data
7088 (relative to beginning of group rather than absolute),
7089 so we'll recalculate them in situ so the binlog is always
7090 correct, even in the middle of a group. This is possible
7091 because we now know the start position of the group (the
7092 offset of this cache in the log, if you will); all we need
7093 to do is to find all event-headers, and add the position of
7094 the group to the end_log_pos of each event. This is pretty
7095 straight forward, except that we read the cache in segments,
7096 so an event-header might end up on the cache-border and get
7097 split.
7098 */
7099
7100 group= (size_t)my_b_tell(&log_file);
7101 hdr_offs= carry= 0;
7102
7103 do
7104 {
7105 /*
7106 if we only got a partial header in the last iteration,
7107 get the other half now and process a full header.
7108 */
7109 if (unlikely(carry > 0))
7110 {
7111 DBUG_ASSERT(carry < LOG_EVENT_HEADER_LEN);
7112 size_t tail= LOG_EVENT_HEADER_LEN - carry;
7113
7114 /* assemble both halves */
7115 memcpy(&header[carry], (char *)cache->read_pos, tail);
7116
7117 uint32 len= uint4korr(header + EVENT_LEN_OFFSET);
7118 writer.remains= len;
7119
7120 /* fix end_log_pos */
7121 end_log_pos_inc += writer.checksum_len;
7122 val= uint4korr(header + LOG_POS_OFFSET) + group + end_log_pos_inc;
7123 int4store(header + LOG_POS_OFFSET, val);
7124
7125 /* fix len */
7126 len+= writer.checksum_len;
7127 int4store(header + EVENT_LEN_OFFSET, len);
7128
7129 if (writer.write(header, LOG_EVENT_HEADER_LEN))
7130 DBUG_RETURN(ER_ERROR_ON_WRITE);
7131
7132 cache->read_pos+= tail;
7133 length-= tail;
7134 carry= 0;
7135
7136 /* next event header at ... */
7137 hdr_offs= len - LOG_EVENT_HEADER_LEN - writer.checksum_len;
7138 }
7139
7140 /* if there is anything to write, process it. */
7141
7142 if (likely(length > 0))
7143 {
7144 DBUG_EXECUTE_IF("fail_binlog_write_1",
7145 errno= 28; DBUG_RETURN(ER_ERROR_ON_WRITE););
7146 /*
7147 process all event-headers in this (partial) cache.
7148 if next header is beyond current read-buffer,
7149 we'll get it later (though not necessarily in the
7150 very next iteration, just "eventually").
7151 */
7152
7153 if (hdr_offs >= length)
7154 {
7155 if (writer.write(cache->read_pos, length))
7156 DBUG_RETURN(ER_ERROR_ON_WRITE);
7157 }
7158
7159 while (hdr_offs < length)
7160 {
7161 /*
7162 finish off with remains of the last event that crawls
7163 from previous into the current buffer
7164 */
7165 if (writer.remains != 0)
7166 {
7167 if (writer.write(cache->read_pos, hdr_offs))
7168 DBUG_RETURN(ER_ERROR_ON_WRITE);
7169 }
7170
7171 /*
7172 partial header only? save what we can get, process once
7173 we get the rest.
7174 */
7175 if (hdr_offs + LOG_EVENT_HEADER_LEN > length)
7176 {
7177 carry= length - hdr_offs;
7178 memcpy(header, (char *)cache->read_pos + hdr_offs, carry);
7179 length= hdr_offs;
7180 }
7181 else
7182 {
7183 /* we've got a full event-header, and it came in one piece */
7184 uchar *ev= (uchar *)cache->read_pos + hdr_offs;
7185 uint ev_len= uint4korr(ev + EVENT_LEN_OFFSET); // netto len
7186 uchar *log_pos= ev + LOG_POS_OFFSET;
7187
7188 end_log_pos_inc += writer.checksum_len;
7189 /* fix end_log_pos */
7190 val= uint4korr(log_pos) + group + end_log_pos_inc;
7191 int4store(log_pos, val);
7192
7193 /* fix length */
7194 int4store(ev + EVENT_LEN_OFFSET, ev_len + writer.checksum_len);
7195
7196 writer.remains= ev_len;
7197 if (writer.write(ev, MY_MIN(ev_len, length - hdr_offs)))
7198 DBUG_RETURN(ER_ERROR_ON_WRITE);
7199
7200 /* next event header at ... */
7201 hdr_offs += ev_len; // incr by the netto len
7202
7203 DBUG_ASSERT(!writer.checksum_len || writer.remains == 0 || hdr_offs >= length);
7204 }
7205 }
7206
7207 /*
7208 Adjust hdr_offs. Note that it may still point beyond the segment
7209 read in the next iteration; if the current event is very long,
7210 it may take a couple of read-iterations (and subsequent adjustments
7211 of hdr_offs) for it to point into the then-current segment.
7212 If we have a split header (!carry), hdr_offs will be set at the
7213 beginning of the next iteration, overwriting the value we set here:
7214 */
7215 hdr_offs -= length;
7216 }
7217 } while ((length= my_b_fill(cache)));
7218
7219 DBUG_ASSERT(carry == 0);
7220 DBUG_ASSERT(!writer.checksum_len || writer.remains == 0);
7221
7222 DBUG_RETURN(0); // All OK
7223 }
7224
7225 /*
7226 Helper function to get the error code of the query to be binlogged.
7227 */
query_error_code(THD * thd,bool not_killed)7228 int query_error_code(THD *thd, bool not_killed)
7229 {
7230 int error;
7231
7232 if (not_killed || (killed_mask_hard(thd->killed) == KILL_BAD_DATA))
7233 {
7234 error= thd->is_error() ? thd->get_stmt_da()->sql_errno() : 0;
7235 if (!error)
7236 return error;
7237
7238 /* thd->get_get_stmt_da()->sql_errno() might be ER_SERVER_SHUTDOWN or
7239 ER_QUERY_INTERRUPTED, So here we need to make sure that error
7240 is not set to these errors when specified not_killed by the
7241 caller.
7242 */
7243 if (error == ER_SERVER_SHUTDOWN || error == ER_QUERY_INTERRUPTED ||
7244 error == ER_NEW_ABORTING_CONNECTION || error == ER_CONNECTION_KILLED)
7245 error= 0;
7246 }
7247 else
7248 {
7249 /* killed status for DELAYED INSERT thread should never be used */
7250 DBUG_ASSERT(!(thd->system_thread & SYSTEM_THREAD_DELAYED_INSERT));
7251 error= thd->killed_errno();
7252 }
7253
7254 return error;
7255 }
7256
7257
write_incident_already_locked(THD * thd)7258 bool MYSQL_BIN_LOG::write_incident_already_locked(THD *thd)
7259 {
7260 uint error= 0;
7261 DBUG_ENTER("MYSQL_BIN_LOG::write_incident_already_locked");
7262 Incident incident= INCIDENT_LOST_EVENTS;
7263 Incident_log_event ev(thd, incident, &write_error_msg);
7264
7265 if (likely(is_open()))
7266 {
7267 error= write_event(&ev);
7268 status_var_add(thd->status_var.binlog_bytes_written, ev.data_written);
7269 }
7270
7271 DBUG_RETURN(error);
7272 }
7273
7274
write_incident(THD * thd)7275 bool MYSQL_BIN_LOG::write_incident(THD *thd)
7276 {
7277 uint error= 0;
7278 my_off_t offset;
7279 bool check_purge= false;
7280 ulong prev_binlog_id;
7281 DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
7282
7283 mysql_mutex_lock(&LOCK_log);
7284 if (likely(is_open()))
7285 {
7286 prev_binlog_id= current_binlog_id;
7287 if (likely(!(error= write_incident_already_locked(thd))) &&
7288 likely(!(error= flush_and_sync(0))))
7289 {
7290 update_binlog_end_pos();
7291 if (unlikely((error= rotate(false, &check_purge))))
7292 check_purge= false;
7293 }
7294
7295 offset= my_b_tell(&log_file);
7296
7297 update_binlog_end_pos(offset);
7298
7299 /*
7300 Take mutex to protect against a reader seeing partial writes of 64-bit
7301 offset on 32-bit CPUs.
7302 */
7303 mysql_mutex_lock(&LOCK_commit_ordered);
7304 last_commit_pos_offset= offset;
7305 mysql_mutex_unlock(&LOCK_commit_ordered);
7306 mysql_mutex_unlock(&LOCK_log);
7307
7308 if (check_purge)
7309 checkpoint_and_purge(prev_binlog_id);
7310 }
7311 else
7312 {
7313 mysql_mutex_unlock(&LOCK_log);
7314 }
7315
7316 DBUG_RETURN(error);
7317 }
7318
7319 void
write_binlog_checkpoint_event_already_locked(const char * name_arg,uint len)7320 MYSQL_BIN_LOG::write_binlog_checkpoint_event_already_locked(const char *name_arg, uint len)
7321 {
7322 my_off_t offset;
7323 Binlog_checkpoint_log_event ev(name_arg, len);
7324 /*
7325 Note that we must sync the binlog checkpoint to disk.
7326 Otherwise a subsequent log purge could delete binlogs that XA recovery
7327 thinks are needed (even though they are not really).
7328 */
7329 if (!write_event(&ev) && !flush_and_sync(0))
7330 {
7331 update_binlog_end_pos();
7332 }
7333 else
7334 {
7335 /*
7336 If we fail to write the checkpoint event, something is probably really
7337 bad with the binlog. We complain in the error log.
7338
7339 Note that failure to write binlog checkpoint does not compromise the
7340 ability to do crash recovery - crash recovery will just have to scan a
7341 bit more of the binlog than strictly necessary.
7342 */
7343 sql_print_error("Failed to write binlog checkpoint event to binary log");
7344 }
7345
7346 offset= my_b_tell(&log_file);
7347
7348 update_binlog_end_pos(offset);
7349
7350 /*
7351 Take mutex to protect against a reader seeing partial writes of 64-bit
7352 offset on 32-bit CPUs.
7353 */
7354 mysql_mutex_lock(&LOCK_commit_ordered);
7355 last_commit_pos_offset= offset;
7356 mysql_mutex_unlock(&LOCK_commit_ordered);
7357 }
7358
7359
7360 /**
7361 Write a cached log entry to the binary log.
7362 - To support transaction over replication, we wrap the transaction
7363 with BEGIN/COMMIT or BEGIN/ROLLBACK in the binary log.
7364 We want to write a BEGIN/ROLLBACK block when a non-transactional table
7365 was updated in a transaction which was rolled back. This is to ensure
7366 that the same updates are run on the slave.
7367
7368 @param thd
7369 @param cache The cache to copy to the binlog
7370 @param commit_event The commit event to print after writing the
7371 contents of the cache.
7372 @param incident Defines if an incident event should be created to
7373 notify that some non-transactional changes did
7374 not get into the binlog.
7375
7376 @note
7377 We only come here if there is something in the cache.
7378 @note
7379 The thing in the cache is always a complete transaction.
7380 @note
7381 'cache' needs to be reinitialized after this functions returns.
7382 */
7383
7384 bool
write_transaction_to_binlog(THD * thd,binlog_cache_mngr * cache_mngr,Log_event * end_ev,bool all,bool using_stmt_cache,bool using_trx_cache)7385 MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd,
7386 binlog_cache_mngr *cache_mngr,
7387 Log_event *end_ev, bool all,
7388 bool using_stmt_cache,
7389 bool using_trx_cache)
7390 {
7391 group_commit_entry entry;
7392 Ha_trx_info *ha_info;
7393 DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog");
7394
7395 /*
7396 Control should not be allowed beyond this point in wsrep_emulate_bin_log
7397 mode. Also, do not write the cached updates to binlog if binary logging is
7398 disabled (log-bin/sql_log_bin).
7399 */
7400 if (wsrep_emulate_bin_log)
7401 {
7402 DBUG_RETURN(0);
7403 }
7404 else if (!(thd->variables.option_bits & OPTION_BIN_LOG))
7405 {
7406 cache_mngr->need_unlog= false;
7407 DBUG_RETURN(0);
7408 }
7409
7410 entry.thd= thd;
7411 entry.cache_mngr= cache_mngr;
7412 entry.error= 0;
7413 entry.all= all;
7414 entry.using_stmt_cache= using_stmt_cache;
7415 entry.using_trx_cache= using_trx_cache;
7416 entry.need_unlog= false;
7417 ha_info= all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
7418
7419 for (; ha_info; ha_info= ha_info->next())
7420 {
7421 if (ha_info->is_started() && ha_info->ht() != binlog_hton &&
7422 !ha_info->ht()->commit_checkpoint_request)
7423 entry.need_unlog= true;
7424 break;
7425 }
7426
7427 entry.end_event= end_ev;
7428 if (cache_mngr->stmt_cache.has_incident() ||
7429 cache_mngr->trx_cache.has_incident())
7430 {
7431 Incident_log_event inc_ev(thd, INCIDENT_LOST_EVENTS, &write_error_msg);
7432 entry.incident_event= &inc_ev;
7433 DBUG_RETURN(write_transaction_to_binlog_events(&entry));
7434 }
7435 else
7436 {
7437 entry.incident_event= NULL;
7438 DBUG_RETURN(write_transaction_to_binlog_events(&entry));
7439 }
7440 }
7441
7442
7443 /*
7444 Put a transaction that is ready to commit in the group commit queue.
7445 The transaction is identified by the ENTRY object passed into this function.
7446
7447 To facilitate group commit for the binlog, we first queue up ourselves in
7448 this function. Then later the first thread to enter the queue waits for
7449 the LOCK_log mutex, and commits for everyone in the queue once it gets the
7450 lock. Any other threads in the queue just wait for the first one to finish
7451 the commit and wake them up. This way, all transactions in the queue get
7452 committed in a single disk operation.
7453
7454 The main work in this function is when the commit in one transaction has
7455 been marked to wait for the commit of another transaction to happen
7456 first. This is used to support in-order parallel replication, where
7457 transactions can execute out-of-order but need to be committed in-order with
7458 how they happened on the master. The waiting of one commit on another needs
7459 to be integrated with the group commit queue, to ensure that the waiting
7460 transaction can participate in the same group commit as the waited-for
7461 transaction.
7462
7463 So when we put a transaction in the queue, we check if there were other
7464 transactions already prepared to commit but just waiting for the first one
7465 to commit. If so, we add those to the queue as well, transitively for all
7466 waiters.
7467
7468 And if a transaction is marked to wait for a prior transaction, but that
7469 prior transaction is already queued for group commit, then we can queue the
7470 new transaction directly to participate in the group commit.
7471
7472 @retval < 0 Error
7473 @retval -2 WSREP error with commit ordering
7474 @retval -3 WSREP return code to mark the leader
7475 @retval > 0 If queued as the first entry in the queue (meaning this
7476 is the leader)
7477 @retval 0 Otherwise (queued as participant, leader handles the commit)
7478 */
7479
7480 int
queue_for_group_commit(group_commit_entry * orig_entry)7481 MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry)
7482 {
7483 group_commit_entry *entry, *orig_queue, *last;
7484 wait_for_commit *cur;
7485 wait_for_commit *wfc;
7486 bool backup_lock_released= 0;
7487 int result= 0;
7488 THD *thd= orig_entry->thd;
7489 DBUG_ENTER("MYSQL_BIN_LOG::queue_for_group_commit");
7490 DBUG_ASSERT(thd == current_thd);
7491
7492 /*
7493 Check if we need to wait for another transaction to commit before us.
7494
7495 It is safe to do a quick check without lock first in the case where we do
7496 not have to wait. But if the quick check shows we need to wait, we must do
7497 another safe check under lock, to avoid the race where the other
7498 transaction wakes us up between the check and the wait.
7499 */
7500 wfc= orig_entry->thd->wait_for_commit_ptr;
7501 orig_entry->queued_by_other= false;
7502 if (wfc && wfc->waitee.load(std::memory_order_acquire))
7503 {
7504 wait_for_commit *loc_waitee;
7505
7506 mysql_mutex_lock(&wfc->LOCK_wait_commit);
7507 /*
7508 Do an extra check here, this time safely under lock.
7509
7510 If waitee->commit_started is set, it means that the transaction we need
7511 to wait for has already queued up for group commit. In this case it is
7512 safe for us to queue up immediately as well, increasing the opprtunities
7513 for group commit. Because waitee has taken the LOCK_prepare_ordered
7514 before setting the flag, so there is no risk that we can queue ahead of
7515 it.
7516 */
7517 if ((loc_waitee= wfc->waitee.load(std::memory_order_relaxed)) &&
7518 !loc_waitee->commit_started)
7519 {
7520 PSI_stage_info old_stage;
7521
7522 /*
7523 Release MDL_BACKUP_COMMIT LOCK while waiting for other threads to
7524 commit.
7525 This is needed to avoid deadlock between the other threads (which not
7526 yet have the MDL_BACKUP_COMMIT_LOCK) and any threads using
7527 BACKUP LOCK BLOCK_COMMIT.
7528 */
7529 if (thd->backup_commit_lock && thd->backup_commit_lock->ticket &&
7530 !backup_lock_released)
7531 {
7532 backup_lock_released= 1;
7533 thd->mdl_context.release_lock(thd->backup_commit_lock->ticket);
7534 thd->backup_commit_lock->ticket= 0;
7535 }
7536
7537 /*
7538 By setting wfc->opaque_pointer to our own entry, we mark that we are
7539 ready to commit, but waiting for another transaction to commit before
7540 us.
7541
7542 This other transaction may then take over the commit process for us to
7543 get us included in its own group commit. If this happens, the
7544 queued_by_other flag is set.
7545
7546 Setting this flag may or may not be seen by the other thread, but we
7547 are safe in any case: The other thread will set queued_by_other under
7548 its LOCK_wait_commit, and we will not check queued_by_other only after
7549 we have been woken up.
7550 */
7551 wfc->opaque_pointer= orig_entry;
7552 DEBUG_SYNC(orig_entry->thd, "group_commit_waiting_for_prior");
7553 orig_entry->thd->ENTER_COND(&wfc->COND_wait_commit,
7554 &wfc->LOCK_wait_commit,
7555 &stage_waiting_for_prior_transaction_to_commit,
7556 &old_stage);
7557 while ((loc_waitee= wfc->waitee.load(std::memory_order_relaxed)) &&
7558 !orig_entry->thd->check_killed(1))
7559 mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit);
7560 wfc->opaque_pointer= NULL;
7561 DBUG_PRINT("info", ("After waiting for prior commit, queued_by_other=%d",
7562 orig_entry->queued_by_other));
7563
7564 if (loc_waitee)
7565 {
7566 /* Wait terminated due to kill. */
7567 mysql_mutex_lock(&loc_waitee->LOCK_wait_commit);
7568 if (loc_waitee->wakeup_subsequent_commits_running ||
7569 orig_entry->queued_by_other)
7570 {
7571 /* Our waitee is already waking us up, so ignore the kill. */
7572 mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
7573 do
7574 {
7575 mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit);
7576 } while (wfc->waitee.load(std::memory_order_relaxed));
7577 }
7578 else
7579 {
7580 /* We were killed, so remove us from the list of waitee. */
7581 wfc->remove_from_list(&loc_waitee->subsequent_commits_list);
7582 mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
7583 /*
7584 This is the thread clearing its own status, it is no longer on
7585 the list of waiters. So no memory barriers are needed here.
7586 */
7587 wfc->waitee.store(NULL, std::memory_order_relaxed);
7588
7589 orig_entry->thd->EXIT_COND(&old_stage);
7590 /* Interrupted by kill. */
7591 DEBUG_SYNC(orig_entry->thd, "group_commit_waiting_for_prior_killed");
7592 wfc->wakeup_error= orig_entry->thd->killed_errno();
7593 if (!wfc->wakeup_error)
7594 wfc->wakeup_error= ER_QUERY_INTERRUPTED;
7595 my_message(wfc->wakeup_error,
7596 ER_THD(orig_entry->thd, wfc->wakeup_error), MYF(0));
7597 result= -1;
7598 goto end;
7599 }
7600 }
7601 orig_entry->thd->EXIT_COND(&old_stage);
7602 }
7603 else
7604 mysql_mutex_unlock(&wfc->LOCK_wait_commit);
7605 }
7606 /*
7607 If the transaction we were waiting for has already put us into the group
7608 commit queue (and possibly already done the entire binlog commit for us),
7609 then there is nothing else to do.
7610 */
7611 if (orig_entry->queued_by_other)
7612 goto end;
7613
7614 if (wfc && wfc->wakeup_error)
7615 {
7616 my_error(ER_PRIOR_COMMIT_FAILED, MYF(0));
7617 result= -1;
7618 goto end;
7619 }
7620
7621 /* Now enqueue ourselves in the group commit queue. */
7622 DEBUG_SYNC(orig_entry->thd, "commit_before_enqueue");
7623 orig_entry->thd->clear_wakeup_ready();
7624 mysql_mutex_lock(&LOCK_prepare_ordered);
7625 orig_queue= group_commit_queue;
7626
7627 /*
7628 Iteratively process everything added to the queue, looking for waiters,
7629 and their waiters, and so on. If a waiter is ready to commit, we
7630 immediately add it to the queue, and mark it as queued_by_other.
7631
7632 This would be natural to do with recursion, but we want to avoid
7633 potentially unbounded recursion blowing the C stack, so we use the list
7634 approach instead.
7635
7636 We keep a list of the group_commit_entry of all the waiters that need to
7637 be processed. Initially this list contains only the entry passed into this
7638 function.
7639
7640 We process entries in the list one by one. The element currently being
7641 processed is pointed to by `entry`, and the element at the end of the list
7642 is pointed to by `last` (we do not use NULL to terminate the list).
7643
7644 As we process an entry, any waiters for that entry are added at the end of
7645 the list, to be processed in subsequent iterations. The the entry is added
7646 to the group_commit_queue. This continues until the list is exhausted,
7647 with all entries ever added eventually processed.
7648
7649 The end result is a breath-first traversal of the tree of waiters,
7650 re-using the `next' pointers of the group_commit_entry objects in place of
7651 extra stack space in a recursive traversal.
7652
7653 The temporary list linked through these `next' pointers is not used by the
7654 caller or any other function; it only exists while doing the iterative
7655 tree traversal. After, all the processed entries are linked into the
7656 group_commit_queue.
7657 */
7658
7659 cur= wfc;
7660 last= orig_entry;
7661 entry= orig_entry;
7662 for (;;)
7663 {
7664 group_commit_entry *next_entry;
7665
7666 if (entry->cache_mngr->using_xa)
7667 {
7668 DEBUG_SYNC(entry->thd, "commit_before_prepare_ordered");
7669 run_prepare_ordered(entry->thd, entry->all);
7670 DEBUG_SYNC(entry->thd, "commit_after_prepare_ordered");
7671 }
7672
7673 if (cur)
7674 {
7675 /*
7676 Now that we have taken LOCK_prepare_ordered and will queue up in the
7677 group commit queue, it is safe for following transactions to queue
7678 themselves. We will grab here any transaction that is now ready to
7679 queue up, but after that, more transactions may become ready while the
7680 leader is waiting to start the group commit. So set the flag
7681 `commit_started', so that later transactions can still participate in
7682 the group commit..
7683 */
7684 cur->commit_started= true;
7685
7686 /*
7687 Check if this transaction has other transaction waiting for it to
7688 commit.
7689
7690 If so, process the waiting transactions, and their waiters and so on,
7691 transitively.
7692 */
7693 if (cur->subsequent_commits_list)
7694 {
7695 wait_for_commit *waiter, **waiter_ptr;
7696
7697 mysql_mutex_lock(&cur->LOCK_wait_commit);
7698 /*
7699 Grab the list, now safely under lock, and process it if still
7700 non-empty.
7701 */
7702 waiter= cur->subsequent_commits_list;
7703 waiter_ptr= &cur->subsequent_commits_list;
7704 while (waiter)
7705 {
7706 wait_for_commit *next_waiter= waiter->next_subsequent_commit;
7707 group_commit_entry *entry2=
7708 (group_commit_entry *)waiter->opaque_pointer;
7709 if (entry2)
7710 {
7711 /*
7712 This is another transaction ready to be written to the binary
7713 log. We can put it into the queue directly, without needing a
7714 separate context switch to the other thread. We just set a flag
7715 so that the other thread will know when it wakes up that it was
7716 already processed.
7717
7718 So remove it from the list of our waiters, and instead put it at
7719 the end of the list to be processed in a subsequent iteration of
7720 the outer loop.
7721 */
7722 *waiter_ptr= next_waiter;
7723 entry2->queued_by_other= true;
7724 last->next= entry2;
7725 last= entry2;
7726 /*
7727 As a small optimisation, we do not actually need to set
7728 entry2->next to NULL, as we can use the pointer `last' to check
7729 for end-of-list.
7730 */
7731 }
7732 else
7733 {
7734 /*
7735 This transaction is not ready to participate in the group commit
7736 yet, so leave it in the waiter list. It might join the group
7737 commit later, if it completes soon enough to do so (it will see
7738 our wfc->commit_started flag set), or it might commit later in a
7739 later group commit.
7740 */
7741 waiter_ptr= &waiter->next_subsequent_commit;
7742 }
7743 waiter= next_waiter;
7744 }
7745 mysql_mutex_unlock(&cur->LOCK_wait_commit);
7746 }
7747 }
7748
7749 /*
7750 Handle the heuristics that if another transaction is waiting for this
7751 transaction (or if it does so later), then we want to trigger group
7752 commit immediately, without waiting for the binlog_commit_wait_usec
7753 timeout to expire.
7754 */
7755 entry->thd->waiting_on_group_commit= true;
7756
7757 /* Add the entry to the group commit queue. */
7758 next_entry= entry->next;
7759 entry->next= group_commit_queue;
7760 group_commit_queue= entry;
7761 if (entry == last)
7762 break;
7763 /*
7764 Move to the next entry in the flattened list of waiting transactions
7765 that still need to be processed transitively.
7766 */
7767 entry= next_entry;
7768 DBUG_ASSERT(entry != NULL);
7769 cur= entry->thd->wait_for_commit_ptr;
7770 }
7771
7772 result= orig_queue == NULL;
7773
7774 #ifdef WITH_WSREP
7775 if (wsrep_is_active(entry->thd) &&
7776 wsrep_run_commit_hook(entry->thd, entry->all))
7777 {
7778 /* Release commit order here */
7779 if (wsrep_ordered_commit(entry->thd, entry->all, wsrep_apply_error()))
7780 result= -2;
7781
7782 /* return -3, if this is leader */
7783 if (orig_queue == NULL)
7784 result= -3;
7785 }
7786 else
7787 DBUG_ASSERT(result != -2 && result != -3);
7788 #endif /* WITH_WSREP */
7789
7790 if (opt_binlog_commit_wait_count > 0 && orig_queue != NULL)
7791 mysql_cond_signal(&COND_prepare_ordered);
7792 mysql_mutex_unlock(&LOCK_prepare_ordered);
7793 DEBUG_SYNC(orig_entry->thd, "commit_after_release_LOCK_prepare_ordered");
7794
7795 DBUG_PRINT("info", ("Queued for group commit as %s",
7796 (orig_queue == NULL) ? "leader" : "participant"));
7797
7798 end:
7799 if (backup_lock_released)
7800 thd->mdl_context.acquire_lock(thd->backup_commit_lock,
7801 thd->variables.lock_wait_timeout);
7802 DBUG_RETURN(result);
7803 }
7804
7805 bool
write_transaction_to_binlog_events(group_commit_entry * entry)7806 MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry)
7807 {
7808 int is_leader= queue_for_group_commit(entry);
7809 #ifdef WITH_WSREP
7810 /* commit order was released in queue_for_group_commit() call,
7811 here we check if wsrep_commit_ordered() failed or if we are leader */
7812 switch (is_leader)
7813 {
7814 case -2: /* wsrep_ordered_commit() has failed */
7815 DBUG_ASSERT(wsrep_is_active(entry->thd));
7816 DBUG_ASSERT(wsrep_run_commit_hook(entry->thd, entry->all));
7817 entry->thd->wakeup_subsequent_commits(1);
7818 return true;
7819 case -3: /* this is leader, wait for prior commit to
7820 complete. This establishes total order for group leaders
7821 */
7822 DBUG_ASSERT(wsrep_is_active(entry->thd));
7823 DBUG_ASSERT(wsrep_run_commit_hook(entry->thd, entry->all));
7824 if (entry->thd->wait_for_prior_commit())
7825 return true;
7826
7827 /* retain the correct is_leader value */
7828 is_leader= 1;
7829 break;
7830
7831 default: /* native MariaDB cases */
7832 break;
7833 }
7834 #endif /* WITH_WSREP */
7835
7836 /*
7837 The first in the queue handles group commit for all; the others just wait
7838 to be signalled when group commit is done.
7839 */
7840 if (is_leader < 0)
7841 return true; /* Error */
7842 else if (is_leader)
7843 trx_group_commit_leader(entry);
7844 else if (!entry->queued_by_other)
7845 {
7846 DEBUG_SYNC(entry->thd, "after_semisync_queue");
7847
7848 entry->thd->wait_for_wakeup_ready();
7849 }
7850 else
7851 {
7852 /*
7853 If we were queued by another prior commit, then we are woken up
7854 only when the leader has already completed the commit for us.
7855 So nothing to do here then.
7856 */
7857 }
7858
7859 if (!opt_optimize_thread_scheduling)
7860 {
7861 /* For the leader, trx_group_commit_leader() already took the lock. */
7862 if (!is_leader)
7863 mysql_mutex_lock(&LOCK_commit_ordered);
7864
7865 DEBUG_SYNC(entry->thd, "commit_loop_entry_commit_ordered");
7866 ++num_commits;
7867 if (entry->cache_mngr->using_xa && !entry->error)
7868 run_commit_ordered(entry->thd, entry->all);
7869
7870 group_commit_entry *next= entry->next;
7871 if (!next)
7872 {
7873 group_commit_queue_busy= FALSE;
7874 mysql_cond_signal(&COND_queue_busy);
7875 DEBUG_SYNC(entry->thd, "commit_after_group_run_commit_ordered");
7876 }
7877 mysql_mutex_unlock(&LOCK_commit_ordered);
7878 entry->thd->wakeup_subsequent_commits(entry->error);
7879
7880 if (next)
7881 {
7882 /*
7883 Wake up the next thread in the group commit.
7884
7885 The next thread can be waiting in two different ways, depending on
7886 whether it put itself in the queue, or if it was put in queue by us
7887 because it had to wait for us to commit first.
7888
7889 So execute the appropriate wakeup, identified by the queued_by_other
7890 field.
7891 */
7892 if (next->queued_by_other)
7893 next->thd->wait_for_commit_ptr->wakeup(entry->error);
7894 else
7895 next->thd->signal_wakeup_ready();
7896 }
7897 else
7898 {
7899 /*
7900 If we rotated the binlog, and if we are using the unoptimized thread
7901 scheduling where every thread runs its own commit_ordered(), then we
7902 must do the commit checkpoint and log purge here, after all
7903 commit_ordered() calls have finished, and locks have been released.
7904 */
7905 if (entry->check_purge)
7906 checkpoint_and_purge(entry->binlog_id);
7907 }
7908
7909 }
7910
7911 if (likely(!entry->error))
7912 return entry->thd->wait_for_prior_commit();
7913
7914 switch (entry->error)
7915 {
7916 case ER_ERROR_ON_WRITE:
7917 my_error(ER_ERROR_ON_WRITE, MYF(ME_ERROR_LOG), name, entry->commit_errno);
7918 break;
7919 case ER_ERROR_ON_READ:
7920 my_error(ER_ERROR_ON_READ, MYF(ME_ERROR_LOG),
7921 entry->error_cache->file_name, entry->commit_errno);
7922 break;
7923 default:
7924 /*
7925 There are not (and should not be) any errors thrown not covered above.
7926 But just in case one is added later without updating the above switch
7927 statement, include a catch-all.
7928 */
7929 my_printf_error(entry->error,
7930 "Error writing transaction to binary log: %d",
7931 MYF(ME_ERROR_LOG), entry->error);
7932 }
7933
7934 /*
7935 Since we return error, this transaction XID will not be committed, so
7936 we need to mark it as not needed for recovery (unlog() is not called
7937 for a transaction if log_xid() fails).
7938 */
7939 if (entry->cache_mngr->using_xa && entry->cache_mngr->xa_xid &&
7940 entry->cache_mngr->need_unlog)
7941 mark_xid_done(entry->cache_mngr->binlog_id, true);
7942
7943 return 1;
7944 }
7945
7946 /*
7947 Do binlog group commit as the lead thread.
7948
7949 This must be called when this statement/transaction is queued at the start of
7950 the group_commit_queue. It will wait to obtain the LOCK_log mutex, then group
7951 commit all the transactions in the queue (more may have entered while waiting
7952 for LOCK_log). After commit is done, all other threads in the queue will be
7953 signalled.
7954
7955 */
7956 void
trx_group_commit_leader(group_commit_entry * leader)7957 MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
7958 {
7959 uint xid_count= 0;
7960 my_off_t UNINIT_VAR(commit_offset);
7961 group_commit_entry *current, *last_in_queue;
7962 group_commit_entry *queue= NULL;
7963 bool check_purge= false;
7964 ulong UNINIT_VAR(binlog_id);
7965 uint64 commit_id;
7966 DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader");
7967
7968 {
7969 DBUG_EXECUTE_IF("inject_binlog_commit_before_get_LOCK_log",
7970 DBUG_ASSERT(!debug_sync_set_action(leader->thd, STRING_WITH_LEN
7971 ("commit_before_get_LOCK_log SIGNAL waiting WAIT_FOR cont TIMEOUT 1")));
7972 );
7973 /*
7974 Lock the LOCK_log(), and once we get it, collect any additional writes
7975 that queued up while we were waiting.
7976 */
7977 DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_log");
7978 mysql_mutex_lock(&LOCK_log);
7979 DEBUG_SYNC(leader->thd, "commit_after_get_LOCK_log");
7980
7981 mysql_mutex_lock(&LOCK_prepare_ordered);
7982 if (opt_binlog_commit_wait_count)
7983 wait_for_sufficient_commits();
7984 /*
7985 Note that wait_for_sufficient_commits() may have released and
7986 re-acquired the LOCK_log and LOCK_prepare_ordered if it needed to wait.
7987 */
7988 current= group_commit_queue;
7989 group_commit_queue= NULL;
7990 mysql_mutex_unlock(&LOCK_prepare_ordered);
7991 binlog_id= current_binlog_id;
7992
7993 /* As the queue is in reverse order of entering, reverse it. */
7994 last_in_queue= current;
7995 while (current)
7996 {
7997 group_commit_entry *next= current->next;
7998 /*
7999 Now that group commit is started, we can clear the flag; there is no
8000 longer any use in waiters on this commit trying to trigger it early.
8001 */
8002 current->thd->waiting_on_group_commit= false;
8003 current->next= queue;
8004 queue= current;
8005 current= next;
8006 }
8007 DBUG_ASSERT(leader == queue /* the leader should be first in queue */);
8008
8009 /* Now we have in queue the list of transactions to be committed in order. */
8010 }
8011
8012 DBUG_ASSERT(is_open());
8013 if (likely(is_open())) // Should always be true
8014 {
8015 commit_id= (last_in_queue == leader ? 0 : (uint64)leader->thd->query_id);
8016 DBUG_EXECUTE_IF("binlog_force_commit_id",
8017 {
8018 const LEX_CSTRING commit_name= { STRING_WITH_LEN("commit_id") };
8019 bool null_value;
8020 user_var_entry *entry=
8021 (user_var_entry*) my_hash_search(&leader->thd->user_vars,
8022 (uchar*) commit_name.str,
8023 commit_name.length);
8024 commit_id= entry->val_int(&null_value);
8025 });
8026 /*
8027 Commit every transaction in the queue.
8028
8029 Note that we are doing this in a different thread than the one running
8030 the transaction! So we are limited in the operations we can do. In
8031 particular, we cannot call my_error() on behalf of a transaction, as
8032 that obtains the THD from thread local storage. Instead, we must set
8033 current->error and let the thread do the error reporting itself once
8034 we wake it up.
8035 */
8036 for (current= queue; current != NULL; current= current->next)
8037 {
8038 set_current_thd(current->thd);
8039 binlog_cache_mngr *cache_mngr= current->cache_mngr;
8040
8041 /*
8042 We already checked before that at least one cache is non-empty; if both
8043 are empty we would have skipped calling into here.
8044 */
8045 DBUG_ASSERT(!cache_mngr->stmt_cache.empty() || !cache_mngr->trx_cache.empty());
8046
8047 if (unlikely((current->error= write_transaction_or_stmt(current,
8048 commit_id))))
8049 current->commit_errno= errno;
8050
8051 strmake_buf(cache_mngr->last_commit_pos_file, log_file_name);
8052 commit_offset= my_b_write_tell(&log_file);
8053 cache_mngr->last_commit_pos_offset= commit_offset;
8054 if (cache_mngr->using_xa && cache_mngr->xa_xid)
8055 {
8056 /*
8057 If all storage engines support commit_checkpoint_request(), then we
8058 do not need to keep track of when this XID is durably committed.
8059 Instead we will just ask the storage engine to durably commit all its
8060 XIDs when we rotate a binlog file.
8061 */
8062 if (current->need_unlog)
8063 {
8064 xid_count++;
8065 cache_mngr->need_unlog= true;
8066 cache_mngr->binlog_id= binlog_id;
8067 }
8068 else
8069 cache_mngr->need_unlog= false;
8070
8071 cache_mngr->delayed_error= false;
8072 }
8073 }
8074 set_current_thd(leader->thd);
8075
8076 bool synced= 0;
8077 if (unlikely(flush_and_sync(&synced)))
8078 {
8079 for (current= queue; current != NULL; current= current->next)
8080 {
8081 if (!current->error)
8082 {
8083 current->error= ER_ERROR_ON_WRITE;
8084 current->commit_errno= errno;
8085 current->error_cache= NULL;
8086 }
8087 }
8088 }
8089 else
8090 {
8091 bool any_error= false;
8092
8093 mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
8094 mysql_mutex_assert_owner(&LOCK_log);
8095 mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync);
8096 mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
8097
8098 for (current= queue; current != NULL; current= current->next)
8099 {
8100 #ifdef HAVE_REPLICATION
8101 if (likely(!current->error) &&
8102 unlikely(repl_semisync_master.
8103 report_binlog_update(current->thd,
8104 current->cache_mngr->
8105 last_commit_pos_file,
8106 current->cache_mngr->
8107 last_commit_pos_offset)))
8108 {
8109 current->error= ER_ERROR_ON_WRITE;
8110 current->commit_errno= -1;
8111 current->error_cache= NULL;
8112 any_error= true;
8113 }
8114 #endif
8115 }
8116
8117 /*
8118 update binlog_end_pos so it can be read by dump thread
8119 Note: must be _after_ the RUN_HOOK(after_flush) or else
8120 semi-sync might not have put the transaction into
8121 it's list before dump-thread tries to send it
8122 */
8123 update_binlog_end_pos(commit_offset);
8124
8125 if (unlikely(any_error))
8126 sql_print_error("Failed to run 'after_flush' hooks");
8127 }
8128
8129 /*
8130 If any commit_events are Xid_log_event, increase the number of pending
8131 XIDs in current binlog (it's decreased in ::unlog()). When the count in
8132 a (not active) binlog file reaches zero, we know that it is no longer
8133 needed in XA recovery, and we can log a new binlog checkpoint event.
8134 */
8135 if (xid_count > 0)
8136 {
8137 mark_xids_active(binlog_id, xid_count);
8138 }
8139
8140 if (rotate(false, &check_purge))
8141 {
8142 /*
8143 If we fail to rotate, which thread should get the error?
8144 We give the error to the leader, as any my_error() thrown inside
8145 rotate() will have been registered for the leader THD.
8146
8147 However we must not return error from here - that would cause
8148 ha_commit_trans() to abort and rollback the transaction, which would
8149 leave an inconsistent state with the transaction committed in the
8150 binlog but rolled back in the engine.
8151
8152 Instead set a flag so that we can return error later, from unlog(),
8153 when the transaction has been safely committed in the engine.
8154 */
8155 leader->cache_mngr->delayed_error= true;
8156 my_error(ER_ERROR_ON_WRITE, MYF(ME_ERROR_LOG), name, errno);
8157 check_purge= false;
8158 }
8159 /* In case of binlog rotate, update the correct current binlog offset. */
8160 commit_offset= my_b_write_tell(&log_file);
8161 }
8162
8163 DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_after_binlog_sync");
8164 mysql_mutex_lock(&LOCK_after_binlog_sync);
8165 /*
8166 We cannot unlock LOCK_log until we have locked LOCK_after_binlog_sync;
8167 otherwise scheduling could allow the next group commit to run ahead of us,
8168 messing up the order of commit_ordered() calls. But as soon as
8169 LOCK_after_binlog_sync is obtained, we can let the next group commit start.
8170 */
8171 mysql_mutex_unlock(&LOCK_log);
8172
8173 DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_log");
8174
8175 /*
8176 Loop through threads and run the binlog_sync hook
8177 */
8178 {
8179 mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
8180 mysql_mutex_assert_not_owner(&LOCK_log);
8181 mysql_mutex_assert_owner(&LOCK_after_binlog_sync);
8182 mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
8183
8184 bool first __attribute__((unused))= true;
8185 bool last __attribute__((unused));
8186 for (current= queue; current != NULL; current= current->next)
8187 {
8188 last= current->next == NULL;
8189 #ifdef HAVE_REPLICATION
8190 if (likely(!current->error))
8191 current->error=
8192 repl_semisync_master.wait_after_sync(current->cache_mngr->
8193 last_commit_pos_file,
8194 current->cache_mngr->
8195 last_commit_pos_offset);
8196 #endif
8197 first= false;
8198 }
8199 }
8200
8201 DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered");
8202
8203 mysql_mutex_lock(&LOCK_commit_ordered);
8204 DBUG_EXECUTE_IF("crash_before_engine_commit",
8205 {
8206 DBUG_SUICIDE();
8207 });
8208 last_commit_pos_offset= commit_offset;
8209
8210 /*
8211 Unlock LOCK_after_binlog_sync only *after* LOCK_commit_ordered has been
8212 acquired so that groups can not reorder for the different stages of
8213 the group commit procedure.
8214 */
8215 mysql_mutex_unlock(&LOCK_after_binlog_sync);
8216 DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_after_binlog_sync");
8217 ++num_group_commits;
8218
8219 if (!opt_optimize_thread_scheduling)
8220 {
8221 /*
8222 If we want to run commit_ordered() each in the transaction's own thread
8223 context, then we need to mark the queue reserved; we need to finish all
8224 threads in one group commit before the next group commit can be allowed
8225 to proceed, and we cannot unlock a simple pthreads mutex in a different
8226 thread from the one that locked it.
8227 */
8228
8229 while (group_commit_queue_busy)
8230 mysql_cond_wait(&COND_queue_busy, &LOCK_commit_ordered);
8231 group_commit_queue_busy= TRUE;
8232
8233 /*
8234 Set these so parent can run checkpoint_and_purge() in last thread.
8235 (When using optimized thread scheduling, we run checkpoint_and_purge()
8236 in this function, so parent does not need to and we need not set these
8237 values).
8238 */
8239 last_in_queue->check_purge= check_purge;
8240 last_in_queue->binlog_id= binlog_id;
8241
8242 /* Note that we return with LOCK_commit_ordered locked! */
8243 DBUG_VOID_RETURN;
8244 }
8245
8246 /*
8247 Wakeup each participant waiting for our group commit, first calling the
8248 commit_ordered() methods for any transactions doing 2-phase commit.
8249 */
8250 current= queue;
8251 while (current != NULL)
8252 {
8253 group_commit_entry *next;
8254
8255 DEBUG_SYNC(leader->thd, "commit_loop_entry_commit_ordered");
8256 ++num_commits;
8257 if (current->cache_mngr->using_xa && likely(!current->error) &&
8258 DBUG_EVALUATE_IF("skip_commit_ordered", 0, 1))
8259 run_commit_ordered(current->thd, current->all);
8260 current->thd->wakeup_subsequent_commits(current->error);
8261
8262 /*
8263 Careful not to access current->next after waking up the other thread! As
8264 it may change immediately after wakeup.
8265 */
8266 next= current->next;
8267 if (current != leader) // Don't wake up ourself
8268 {
8269 if (current->queued_by_other)
8270 current->thd->wait_for_commit_ptr->wakeup(current->error);
8271 else
8272 current->thd->signal_wakeup_ready();
8273 }
8274 current= next;
8275 }
8276 DEBUG_SYNC(leader->thd, "commit_after_group_run_commit_ordered");
8277 mysql_mutex_unlock(&LOCK_commit_ordered);
8278 DEBUG_SYNC(leader->thd, "commit_after_group_release_commit_ordered");
8279
8280 if (check_purge)
8281 checkpoint_and_purge(binlog_id);
8282
8283 DBUG_VOID_RETURN;
8284 }
8285
8286
8287 int
write_transaction_or_stmt(group_commit_entry * entry,uint64 commit_id)8288 MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry,
8289 uint64 commit_id)
8290 {
8291 binlog_cache_mngr *mngr= entry->cache_mngr;
8292 DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_or_stmt");
8293
8294 if (write_gtid_event(entry->thd, false, entry->using_trx_cache, commit_id))
8295 DBUG_RETURN(ER_ERROR_ON_WRITE);
8296
8297 if (entry->using_stmt_cache && !mngr->stmt_cache.empty() &&
8298 write_cache(entry->thd, mngr->get_binlog_cache_log(FALSE)))
8299 {
8300 entry->error_cache= &mngr->stmt_cache.cache_log;
8301 DBUG_RETURN(ER_ERROR_ON_WRITE);
8302 }
8303
8304 if (entry->using_trx_cache && !mngr->trx_cache.empty())
8305 {
8306 DBUG_EXECUTE_IF("crash_before_writing_xid",
8307 {
8308 if ((write_cache(entry->thd,
8309 mngr->get_binlog_cache_log(TRUE))))
8310 DBUG_PRINT("info", ("error writing binlog cache"));
8311 else
8312 flush_and_sync(0);
8313
8314 DBUG_PRINT("info", ("crashing before writing xid"));
8315 DBUG_SUICIDE();
8316 });
8317
8318 if (write_cache(entry->thd, mngr->get_binlog_cache_log(TRUE)))
8319 {
8320 entry->error_cache= &mngr->trx_cache.cache_log;
8321 DBUG_RETURN(ER_ERROR_ON_WRITE);
8322 }
8323 }
8324
8325 DBUG_EXECUTE_IF("inject_error_writing_xid",
8326 {
8327 entry->error_cache= NULL;
8328 errno= 28;
8329 DBUG_RETURN(ER_ERROR_ON_WRITE);
8330 });
8331
8332 if (write_event(entry->end_event))
8333 {
8334 entry->error_cache= NULL;
8335 DBUG_RETURN(ER_ERROR_ON_WRITE);
8336 }
8337 status_var_add(entry->thd->status_var.binlog_bytes_written,
8338 entry->end_event->data_written);
8339
8340 if (entry->incident_event)
8341 {
8342 if (write_event(entry->incident_event))
8343 {
8344 entry->error_cache= NULL;
8345 DBUG_RETURN(ER_ERROR_ON_WRITE);
8346 }
8347 }
8348
8349 if (unlikely(mngr->get_binlog_cache_log(FALSE)->error))
8350 {
8351 entry->error_cache= &mngr->stmt_cache.cache_log;
8352 DBUG_RETURN(ER_ERROR_ON_WRITE);
8353 }
8354 if (unlikely(mngr->get_binlog_cache_log(TRUE)->error)) // Error on read
8355 {
8356 entry->error_cache= &mngr->trx_cache.cache_log;
8357 DBUG_RETURN(ER_ERROR_ON_WRITE);
8358 }
8359
8360 DBUG_RETURN(0);
8361 }
8362
8363
8364 /*
8365 Wait for sufficient commits to queue up for group commit, according to the
8366 values of binlog_commit_wait_count and binlog_commit_wait_usec.
8367
8368 Note that this function may release and re-acquire LOCK_log and
8369 LOCK_prepare_ordered if it needs to wait.
8370 */
8371
8372 void
wait_for_sufficient_commits()8373 MYSQL_BIN_LOG::wait_for_sufficient_commits()
8374 {
8375 size_t count;
8376 group_commit_entry *e;
8377 group_commit_entry *last_head;
8378 struct timespec wait_until;
8379
8380 mysql_mutex_assert_owner(&LOCK_log);
8381 mysql_mutex_assert_owner(&LOCK_prepare_ordered);
8382
8383 for (e= last_head= group_commit_queue, count= 0; e; e= e->next)
8384 {
8385 if (++count >= opt_binlog_commit_wait_count)
8386 {
8387 group_commit_trigger_count++;
8388 return;
8389 }
8390 if (unlikely(e->thd->has_waiter))
8391 {
8392 group_commit_trigger_lock_wait++;
8393 return;
8394 }
8395 }
8396
8397 mysql_mutex_unlock(&LOCK_log);
8398 set_timespec_nsec(wait_until, (ulonglong)1000*opt_binlog_commit_wait_usec);
8399
8400 for (;;)
8401 {
8402 int err;
8403 group_commit_entry *head;
8404
8405 err= mysql_cond_timedwait(&COND_prepare_ordered, &LOCK_prepare_ordered,
8406 &wait_until);
8407 if (err == ETIMEDOUT)
8408 {
8409 group_commit_trigger_timeout++;
8410 break;
8411 }
8412 if (unlikely(last_head->thd->has_waiter))
8413 {
8414 group_commit_trigger_lock_wait++;
8415 break;
8416 }
8417 head= group_commit_queue;
8418 for (e= head; e && e != last_head; e= e->next)
8419 {
8420 ++count;
8421 if (unlikely(e->thd->has_waiter))
8422 {
8423 group_commit_trigger_lock_wait++;
8424 goto after_loop;
8425 }
8426 }
8427 if (count >= opt_binlog_commit_wait_count)
8428 {
8429 group_commit_trigger_count++;
8430 break;
8431 }
8432 last_head= head;
8433 }
8434 after_loop:
8435
8436 /*
8437 We must not wait for LOCK_log while holding LOCK_prepare_ordered.
8438 LOCK_log can be held for long periods (eg. we do I/O under it), while
8439 LOCK_prepare_ordered must only be held for short periods.
8440
8441 In addition, waiting for LOCK_log while holding LOCK_prepare_ordered would
8442 violate locking order of LOCK_log-before-LOCK_prepare_ordered. This could
8443 cause SAFEMUTEX warnings (even if it cannot actually deadlock with current
8444 code, as there can be at most one group commit leader thread at a time).
8445
8446 So release and re-acquire LOCK_prepare_ordered if we need to wait for the
8447 LOCK_log.
8448 */
8449 if (mysql_mutex_trylock(&LOCK_log))
8450 {
8451 mysql_mutex_unlock(&LOCK_prepare_ordered);
8452 mysql_mutex_lock(&LOCK_log);
8453 mysql_mutex_lock(&LOCK_prepare_ordered);
8454 }
8455 }
8456
8457
8458 void
binlog_trigger_immediate_group_commit()8459 MYSQL_BIN_LOG::binlog_trigger_immediate_group_commit()
8460 {
8461 group_commit_entry *head;
8462 mysql_mutex_assert_owner(&LOCK_prepare_ordered);
8463 head= group_commit_queue;
8464 if (head)
8465 {
8466 head->thd->has_waiter= true;
8467 mysql_cond_signal(&COND_prepare_ordered);
8468 }
8469 }
8470
8471
8472 /*
8473 This function is called when a transaction T1 goes to wait for another
8474 transaction T2. It is used to cut short any binlog group commit delay from
8475 --binlog-commit-wait-count in the case where another transaction is stalled
8476 on the wait due to conflicting row locks.
8477
8478 If T2 is already ready to group commit, any waiting group commit will be
8479 signalled to proceed immediately. Otherwise, a flag will be set in T2, and
8480 when T2 later becomes ready, immediate group commit will be triggered.
8481 */
8482 void
binlog_report_wait_for(THD * thd1,THD * thd2)8483 binlog_report_wait_for(THD *thd1, THD *thd2)
8484 {
8485 if (opt_binlog_commit_wait_count == 0)
8486 return;
8487 mysql_mutex_lock(&LOCK_prepare_ordered);
8488 thd2->has_waiter= true;
8489 if (thd2->waiting_on_group_commit)
8490 mysql_bin_log.binlog_trigger_immediate_group_commit();
8491 mysql_mutex_unlock(&LOCK_prepare_ordered);
8492 }
8493
8494
8495 /**
8496 Wait until we get a signal that the relay log has been updated.
8497
8498 @param thd Thread variable
8499
8500 @note
8501 One must have a lock on LOCK_log before calling this function.
8502 This lock will be released before return! That's required by
8503 THD::enter_cond() (see NOTES in sql_class.h).
8504 */
8505
wait_for_update_relay_log(THD * thd)8506 void MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd)
8507 {
8508 PSI_stage_info old_stage;
8509 DBUG_ENTER("wait_for_update_relay_log");
8510
8511 mysql_mutex_assert_owner(&LOCK_log);
8512 thd->ENTER_COND(&COND_relay_log_updated, &LOCK_log,
8513 &stage_slave_has_read_all_relay_log,
8514 &old_stage);
8515 mysql_cond_wait(&COND_relay_log_updated, &LOCK_log);
8516 thd->EXIT_COND(&old_stage);
8517 DBUG_VOID_RETURN;
8518 }
8519
8520 /**
8521 Wait until we get a signal that the binary log has been updated.
8522 Applies to master only.
8523
8524 NOTES
8525 @param[in] thd a THD struct
8526 @param[in] timeout a pointer to a timespec;
8527 NULL means to wait w/o timeout.
8528 @retval 0 if got signalled on update
8529 @retval non-0 if wait timeout elapsed
8530 @note
8531 LOCK_log must be taken before calling this function.
8532 LOCK_log is being released while the thread is waiting.
8533 LOCK_log is released by the caller.
8534 */
8535
wait_for_update_binlog_end_pos(THD * thd,struct timespec * timeout)8536 int MYSQL_BIN_LOG::wait_for_update_binlog_end_pos(THD* thd,
8537 struct timespec *timeout)
8538 {
8539 int ret= 0;
8540 DBUG_ENTER("wait_for_update_binlog_end_pos");
8541
8542 thd_wait_begin(thd, THD_WAIT_BINLOG);
8543 mysql_mutex_assert_owner(get_binlog_end_pos_lock());
8544 if (!timeout)
8545 mysql_cond_wait(&COND_bin_log_updated, get_binlog_end_pos_lock());
8546 else
8547 ret= mysql_cond_timedwait(&COND_bin_log_updated, get_binlog_end_pos_lock(),
8548 timeout);
8549 thd_wait_end(thd);
8550 DBUG_RETURN(ret);
8551 }
8552
8553
8554 /**
8555 Close the log file.
8556
8557 @param exiting Bitmask for one or more of the following bits:
8558 - LOG_CLOSE_INDEX : if we should close the index file
8559 - LOG_CLOSE_TO_BE_OPENED : if we intend to call open
8560 at once after close.
8561 - LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
8562 - LOG_CLOSE_DELAYED_CLOSE : do not yet close the file and clear the
8563 LOG_EVENT_BINLOG_IN_USE_F flag
8564
8565 @note
8566 One can do an open on the object at once after doing a close.
8567 The internal structures are not freed until cleanup() is called
8568 */
8569
close(uint exiting)8570 void MYSQL_BIN_LOG::close(uint exiting)
8571 { // One can't set log_type here!
8572 bool failed_to_save_state= false;
8573 DBUG_ENTER("MYSQL_BIN_LOG::close");
8574 DBUG_PRINT("enter",("exiting: %d", (int) exiting));
8575
8576 mysql_mutex_assert_owner(&LOCK_log);
8577
8578 if (log_state == LOG_OPENED)
8579 {
8580 #ifdef HAVE_REPLICATION
8581 if (log_type == LOG_BIN &&
8582 (exiting & LOG_CLOSE_STOP_EVENT))
8583 {
8584 Stop_log_event s;
8585 // the checksumming rule for relay-log case is similar to Rotate
8586 s.checksum_alg= is_relay_log ? relay_log_checksum_alg
8587 : (enum_binlog_checksum_alg)binlog_checksum_options;
8588 DBUG_ASSERT(!is_relay_log ||
8589 relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
8590 write_event(&s);
8591 bytes_written+= s.data_written;
8592 flush_io_cache(&log_file);
8593 update_binlog_end_pos();
8594
8595 /*
8596 When we shut down server, write out the binlog state to a separate
8597 file so we do not have to scan an entire binlog file to recover it
8598 at next server start.
8599
8600 Note that this must be written and synced to disk before marking the
8601 last binlog file as "not crashed".
8602 */
8603 if (!is_relay_log && write_state_to_file())
8604 {
8605 sql_print_error("Failed to save binlog GTID state during shutdown. "
8606 "Binlog will be marked as crashed, so that crash "
8607 "recovery can recover the state at next server "
8608 "startup.");
8609 /*
8610 Leave binlog file marked as crashed, so we can recover state by
8611 scanning it now that we failed to write out the state properly.
8612 */
8613 failed_to_save_state= true;
8614 }
8615 }
8616 #endif /* HAVE_REPLICATION */
8617
8618 /* don't pwrite in a file opened with O_APPEND - it doesn't work */
8619 if (log_file.type == WRITE_CACHE && log_type == LOG_BIN
8620 && !(exiting & LOG_CLOSE_DELAYED_CLOSE))
8621 {
8622 my_off_t org_position= mysql_file_tell(log_file.file, MYF(0));
8623 if (!failed_to_save_state)
8624 clear_inuse_flag_when_closing(log_file.file);
8625 /*
8626 Restore position so that anything we have in the IO_cache is written
8627 to the correct position.
8628 We need the seek here, as mysql_file_pwrite() is not guaranteed to keep the
8629 original position on system that doesn't support pwrite().
8630 */
8631 mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
8632 }
8633
8634 /* this will cleanup IO_CACHE, sync and close the file */
8635 MYSQL_LOG::close(exiting);
8636 }
8637
8638 /*
8639 The following test is needed even if is_open() is not set, as we may have
8640 called a not complete close earlier and the index file is still open.
8641 */
8642
8643 if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
8644 {
8645 end_io_cache(&index_file);
8646 if (unlikely(mysql_file_close(index_file.file, MYF(0)) < 0) &&
8647 ! write_error)
8648 {
8649 write_error= 1;
8650 sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), index_file_name, errno);
8651 }
8652 }
8653 log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
8654 my_free(name);
8655 name= NULL;
8656 DBUG_VOID_RETURN;
8657 }
8658
8659
8660 /*
8661 Clear the LOG_EVENT_BINLOG_IN_USE_F; this marks the binlog file as cleanly
8662 closed and not needing crash recovery.
8663 */
clear_inuse_flag_when_closing(File file)8664 void MYSQL_BIN_LOG::clear_inuse_flag_when_closing(File file)
8665 {
8666 my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
8667 uchar flags= 0; // clearing LOG_EVENT_BINLOG_IN_USE_F
8668 mysql_file_pwrite(file, &flags, 1, offset, MYF(0));
8669 }
8670
8671
set_max_size(ulong max_size_arg)8672 void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg)
8673 {
8674 /*
8675 We need to take locks, otherwise this may happen:
8676 new_file() is called, calls open(old_max_size), then before open() starts,
8677 set_max_size() sets max_size to max_size_arg, then open() starts and
8678 uses the old_max_size argument, so max_size_arg has been overwritten and
8679 it's like if the SET command was never run.
8680 */
8681 DBUG_ENTER("MYSQL_BIN_LOG::set_max_size");
8682 mysql_mutex_lock(&LOCK_log);
8683 if (is_open())
8684 max_size= max_size_arg;
8685 mysql_mutex_unlock(&LOCK_log);
8686 DBUG_VOID_RETURN;
8687 }
8688
8689
8690 /**
8691 Check if a string is a valid number.
8692
8693 @param str String to test
8694 @param res Store value here
8695 @param allow_wildcards Set to 1 if we should ignore '%' and '_'
8696
8697 @note
8698 For the moment the allow_wildcards argument is not used
8699 Should be move to some other file.
8700
8701 @retval
8702 1 String is a number
8703 @retval
8704 0 String is not a number
8705 */
8706
test_if_number(const char * str,ulong * res,bool allow_wildcards)8707 static bool test_if_number(const char *str, ulong *res, bool allow_wildcards)
8708 {
8709 int flag;
8710 const char *start;
8711 DBUG_ENTER("test_if_number");
8712
8713 flag=0; start=str;
8714 while (*str++ == ' ') ;
8715 if (*--str == '-' || *str == '+')
8716 str++;
8717 while (my_isdigit(files_charset_info,*str) ||
8718 (allow_wildcards && (*str == wild_many || *str == wild_one)))
8719 {
8720 flag=1;
8721 str++;
8722 }
8723 if (*str == '.')
8724 {
8725 for (str++ ;
8726 my_isdigit(files_charset_info,*str) ||
8727 (allow_wildcards && (*str == wild_many || *str == wild_one)) ;
8728 str++, flag=1) ;
8729 }
8730 if (*str != 0 || flag == 0)
8731 DBUG_RETURN(0);
8732 if (res)
8733 *res=atol(start);
8734 DBUG_RETURN(1); /* Number ok */
8735 } /* test_if_number */
8736
8737
sql_perror(const char * message)8738 void sql_perror(const char *message)
8739 {
8740 #if defined(_WIN32)
8741 char* buf;
8742 DWORD dw= GetLastError();
8743 if (FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
8744 FORMAT_MESSAGE_IGNORE_INSERTS, NULL, dw,
8745 MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&buf, 0, NULL ) > 0)
8746 {
8747 sql_print_error("%s: %s",message, buf);
8748 LocalFree((HLOCAL)buf);
8749 }
8750 else
8751 {
8752 sql_print_error("%s", message);
8753 }
8754 #elif defined(HAVE_STRERROR)
8755 sql_print_error("%s: %s",message, strerror(errno));
8756 #else
8757 perror(message);
8758 #endif
8759 }
8760
8761
8762 /*
8763 Change the file associated with two output streams. Used to
8764 redirect stdout and stderr to a file. The streams are reopened
8765 only for appending (writing at end of file).
8766 */
reopen_fstreams(const char * filename,FILE * outstream,FILE * errstream)8767 bool reopen_fstreams(const char *filename, FILE *outstream, FILE *errstream)
8768 {
8769 if ((outstream && !my_freopen(filename, "a", outstream)) ||
8770 (errstream && !my_freopen(filename, "a", errstream)))
8771 {
8772 my_error(ER_CANT_CREATE_FILE, MYF(0), filename, errno);
8773 return TRUE;
8774 }
8775
8776 /* The error stream must be unbuffered. */
8777 if (errstream)
8778 setbuf(errstream, NULL);
8779
8780 return FALSE;
8781 }
8782
8783
8784 /*
8785 Unfortunately, there seems to be no good way
8786 to restore the original streams upon failure.
8787 */
redirect_std_streams(const char * file)8788 static bool redirect_std_streams(const char *file)
8789 {
8790 if (reopen_fstreams(file, stdout, stderr))
8791 return TRUE;
8792
8793 setbuf(stderr, NULL);
8794 return FALSE;
8795 }
8796
8797
flush_error_log()8798 bool flush_error_log()
8799 {
8800 bool result= 0;
8801 if (opt_error_log)
8802 {
8803 mysql_mutex_lock(&LOCK_error_log);
8804 if (redirect_std_streams(log_error_file))
8805 result= 1;
8806 mysql_mutex_unlock(&LOCK_error_log);
8807 }
8808 return result;
8809 }
8810
8811 #ifdef _WIN32
8812 struct eventlog_source
8813 {
8814 HANDLE handle;
eventlog_sourceeventlog_source8815 eventlog_source()
8816 {
8817 setup_windows_event_source();
8818 handle = RegisterEventSource(NULL, "MariaDB");
8819 }
8820
~eventlog_sourceeventlog_source8821 ~eventlog_source()
8822 {
8823 if (handle)
8824 DeregisterEventSource(handle);
8825 }
8826 };
8827
8828 static eventlog_source eventlog;
8829
print_buffer_to_nt_eventlog(enum loglevel level,char * buff,size_t length,size_t buffLen)8830 static void print_buffer_to_nt_eventlog(enum loglevel level, char *buff,
8831 size_t length, size_t buffLen)
8832 {
8833 HANDLE event= eventlog.handle;
8834 char *buffptr= buff;
8835 DBUG_ENTER("print_buffer_to_nt_eventlog");
8836
8837 /* Add ending CR/LF's to string, overwrite last chars if necessary */
8838 strmov(buffptr+MY_MIN(length, buffLen-5), "\r\n\r\n");
8839
8840 if (event)
8841 {
8842 switch (level) {
8843 case ERROR_LEVEL:
8844 ReportEvent(event, EVENTLOG_ERROR_TYPE, 0, MSG_DEFAULT, NULL, 1, 0,
8845 (LPCSTR*)&buffptr, NULL);
8846 break;
8847 case WARNING_LEVEL:
8848 ReportEvent(event, EVENTLOG_WARNING_TYPE, 0, MSG_DEFAULT, NULL, 1, 0,
8849 (LPCSTR*) &buffptr, NULL);
8850 break;
8851 case INFORMATION_LEVEL:
8852 ReportEvent(event, EVENTLOG_INFORMATION_TYPE, 0, MSG_DEFAULT, NULL, 1,
8853 0, (LPCSTR*) &buffptr, NULL);
8854 break;
8855 }
8856 }
8857
8858 DBUG_VOID_RETURN;
8859 }
8860 #endif /* _WIN32 */
8861
8862
8863 #ifndef EMBEDDED_LIBRARY
print_buffer_to_file(enum loglevel level,const char * buffer,size_t length)8864 static void print_buffer_to_file(enum loglevel level, const char *buffer,
8865 size_t length)
8866 {
8867 time_t skr;
8868 struct tm tm_tmp;
8869 struct tm *start;
8870 THD *thd= 0;
8871 size_t tag_length= 0;
8872 char tag[NAME_LEN];
8873 DBUG_ENTER("print_buffer_to_file");
8874 DBUG_PRINT("enter",("buffer: %s", buffer));
8875
8876 if (mysqld_server_initialized && (thd= current_thd))
8877 {
8878 if (thd->connection_name.length)
8879 {
8880 /*
8881 Add tag for slaves so that the user can see from which connection
8882 the error originates.
8883 */
8884 tag_length= my_snprintf(tag, sizeof(tag),
8885 ER_THD(thd, ER_MASTER_LOG_PREFIX),
8886 (int) thd->connection_name.length,
8887 thd->connection_name.str);
8888 }
8889 }
8890
8891 mysql_mutex_lock(&LOCK_error_log);
8892
8893 skr= my_time(0);
8894 localtime_r(&skr, &tm_tmp);
8895 start=&tm_tmp;
8896
8897 fprintf(stderr, "%d-%02d-%02d %2d:%02d:%02d %lu [%s] %.*s%.*s\n",
8898 start->tm_year + 1900,
8899 start->tm_mon+1,
8900 start->tm_mday,
8901 start->tm_hour,
8902 start->tm_min,
8903 start->tm_sec,
8904 (unsigned long) (thd ? thd->thread_id : 0),
8905 (level == ERROR_LEVEL ? "ERROR" : level == WARNING_LEVEL ?
8906 "Warning" : "Note"),
8907 (int) tag_length, tag,
8908 (int) length, buffer);
8909
8910 fflush(stderr);
8911
8912 mysql_mutex_unlock(&LOCK_error_log);
8913 DBUG_VOID_RETURN;
8914 }
8915
8916 /**
8917 Prints a printf style message to the error log and, under NT, to the
8918 Windows event log.
8919
8920 This function prints the message into a buffer and then sends that buffer
8921 to other functions to write that message to other logging sources.
8922
8923 @param level The level of the msg significance
8924 @param format Printf style format of message
8925 @param args va_list list of arguments for the message
8926
8927 @returns
8928 The function always returns 0. The return value is present in the
8929 signature to be compatible with other logging routines, which could
8930 return an error (e.g. logging to the log tables)
8931 */
vprint_msg_to_log(enum loglevel level,const char * format,va_list args)8932 int vprint_msg_to_log(enum loglevel level, const char *format, va_list args)
8933 {
8934 char buff[1024];
8935 size_t length;
8936 DBUG_ENTER("vprint_msg_to_log");
8937
8938 length= my_vsnprintf(buff, sizeof(buff), format, args);
8939 print_buffer_to_file(level, buff, length);
8940
8941 #ifdef _WIN32
8942 print_buffer_to_nt_eventlog(level, buff, length, sizeof(buff));
8943 #endif
8944
8945 DBUG_RETURN(0);
8946 }
8947 #endif /* EMBEDDED_LIBRARY */
8948
8949
sql_print_error(const char * format,...)8950 void sql_print_error(const char *format, ...)
8951 {
8952 va_list args;
8953 DBUG_ENTER("sql_print_error");
8954
8955 va_start(args, format);
8956 error_log_print(ERROR_LEVEL, format, args);
8957 va_end(args);
8958
8959 DBUG_VOID_RETURN;
8960 }
8961
8962
sql_print_warning(const char * format,...)8963 void sql_print_warning(const char *format, ...)
8964 {
8965 va_list args;
8966 DBUG_ENTER("sql_print_warning");
8967
8968 va_start(args, format);
8969 error_log_print(WARNING_LEVEL, format, args);
8970 va_end(args);
8971
8972 DBUG_VOID_RETURN;
8973 }
8974
8975
sql_print_information(const char * format,...)8976 void sql_print_information(const char *format, ...)
8977 {
8978 va_list args;
8979 DBUG_ENTER("sql_print_information");
8980
8981 va_start(args, format);
8982 sql_print_information_v(format, args);
8983 va_end(args);
8984
8985 DBUG_VOID_RETURN;
8986 }
8987
sql_print_information_v(const char * format,va_list ap)8988 void sql_print_information_v(const char *format, va_list ap)
8989 {
8990 if (disable_log_notes)
8991 return; // Skip notes during start/shutdown
8992
8993 error_log_print(INFORMATION_LEVEL, format, ap);
8994 }
8995
8996 void
run_prepare_ordered(THD * thd,bool all)8997 TC_LOG::run_prepare_ordered(THD *thd, bool all)
8998 {
8999 Ha_trx_info *ha_info=
9000 all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
9001
9002 mysql_mutex_assert_owner(&LOCK_prepare_ordered);
9003 for (; ha_info; ha_info= ha_info->next())
9004 {
9005 handlerton *ht= ha_info->ht();
9006 if (!ht->prepare_ordered)
9007 continue;
9008 ht->prepare_ordered(ht, thd, all);
9009 }
9010 }
9011
9012
9013 void
run_commit_ordered(THD * thd,bool all)9014 TC_LOG::run_commit_ordered(THD *thd, bool all)
9015 {
9016 Ha_trx_info *ha_info=
9017 all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
9018
9019 mysql_mutex_assert_owner(&LOCK_commit_ordered);
9020 for (; ha_info; ha_info= ha_info->next())
9021 {
9022 handlerton *ht= ha_info->ht();
9023 if (!ht->commit_ordered)
9024 continue;
9025 ht->commit_ordered(ht, thd, all);
9026 DEBUG_SYNC(thd, "commit_after_run_commit_ordered");
9027 }
9028 }
9029
9030
log_and_order(THD * thd,my_xid xid,bool all,bool need_prepare_ordered,bool need_commit_ordered)9031 int TC_LOG_MMAP::log_and_order(THD *thd, my_xid xid, bool all,
9032 bool need_prepare_ordered,
9033 bool need_commit_ordered)
9034 {
9035 int cookie;
9036 struct commit_entry entry;
9037 bool UNINIT_VAR(is_group_commit_leader);
9038
9039 if (need_prepare_ordered)
9040 {
9041 mysql_mutex_lock(&LOCK_prepare_ordered);
9042 run_prepare_ordered(thd, all);
9043 if (need_commit_ordered)
9044 {
9045 /*
9046 Must put us in queue so we can run_commit_ordered() in same sequence
9047 as we did run_prepare_ordered().
9048 */
9049 thd->clear_wakeup_ready();
9050 entry.thd= thd;
9051 commit_entry *previous_queue= commit_ordered_queue;
9052 entry.next= previous_queue;
9053 commit_ordered_queue= &entry;
9054 is_group_commit_leader= (previous_queue == NULL);
9055 }
9056 mysql_mutex_unlock(&LOCK_prepare_ordered);
9057 }
9058
9059 if (thd->wait_for_prior_commit())
9060 return 0;
9061
9062 cookie= 0;
9063 if (xid)
9064 cookie= log_one_transaction(xid);
9065
9066 if (need_commit_ordered)
9067 {
9068 if (need_prepare_ordered)
9069 {
9070 /*
9071 We did the run_prepare_ordered() serialised, then ran the log_xid() in
9072 parallel. Now we have to do run_commit_ordered() serialised in the
9073 same sequence as run_prepare_ordered().
9074
9075 We do this starting from the head of the queue, each thread doing
9076 run_commit_ordered() and signalling the next in queue.
9077 */
9078 if (is_group_commit_leader)
9079 {
9080 /* The first in queue starts the ball rolling. */
9081 mysql_mutex_lock(&LOCK_prepare_ordered);
9082 while (commit_ordered_queue_busy)
9083 mysql_cond_wait(&COND_queue_busy, &LOCK_prepare_ordered);
9084 commit_entry *queue= commit_ordered_queue;
9085 commit_ordered_queue= NULL;
9086 /*
9087 Mark the queue busy while we bounce it from one thread to the
9088 next.
9089 */
9090 commit_ordered_queue_busy= true;
9091 mysql_mutex_unlock(&LOCK_prepare_ordered);
9092
9093 /* Reverse the queue list so we get correct order. */
9094 commit_entry *prev= NULL;
9095 while (queue)
9096 {
9097 commit_entry *next= queue->next;
9098 queue->next= prev;
9099 prev= queue;
9100 queue= next;
9101 }
9102 DBUG_ASSERT(prev == &entry && prev->thd == thd);
9103 }
9104 else
9105 {
9106 /* Not first in queue; just wait until previous thread wakes us up. */
9107 thd->wait_for_wakeup_ready();
9108 }
9109 }
9110
9111 /* Only run commit_ordered() if log_xid was successful. */
9112 if (cookie)
9113 {
9114 mysql_mutex_lock(&LOCK_commit_ordered);
9115 run_commit_ordered(thd, all);
9116 mysql_mutex_unlock(&LOCK_commit_ordered);
9117 }
9118
9119 if (need_prepare_ordered)
9120 {
9121 commit_entry *next= entry.next;
9122 if (next)
9123 {
9124 next->thd->signal_wakeup_ready();
9125 }
9126 else
9127 {
9128 mysql_mutex_lock(&LOCK_prepare_ordered);
9129 commit_ordered_queue_busy= false;
9130 mysql_cond_signal(&COND_queue_busy);
9131 mysql_mutex_unlock(&LOCK_prepare_ordered);
9132 }
9133 }
9134 }
9135
9136 return cookie;
9137 }
9138
9139
9140 /********* transaction coordinator log for 2pc - mmap() based solution *******/
9141
9142 /*
9143 the log consists of a file, mapped to memory.
9144 file is divided into pages of tc_log_page_size size.
9145 (usable size of the first page is smaller because of the log header)
9146 there is a PAGE control structure for each page
9147 each page (or rather its PAGE control structure) can be in one of
9148 the three states - active, syncing, pool.
9149 there could be only one page in the active or syncing state,
9150 but many in pool - pool is a fifo queue.
9151 the usual lifecycle of a page is pool->active->syncing->pool.
9152 the "active" page is a page where new xid's are logged.
9153 the page stays active as long as the syncing slot is taken.
9154 the "syncing" page is being synced to disk. no new xid can be added to it.
9155 when the syncing is done the page is moved to a pool and an active page
9156 becomes "syncing".
9157
9158 the result of such an architecture is a natural "commit grouping" -
9159 If commits are coming faster than the system can sync, they do not
9160 stall. Instead, all commits that came since the last sync are
9161 logged to the same "active" page, and they all are synced with the next -
9162 one - sync. Thus, thought individual commits are delayed, throughput
9163 is not decreasing.
9164
9165 when an xid is added to an active page, the thread of this xid waits
9166 for a page's condition until the page is synced. when syncing slot
9167 becomes vacant one of these waiters is awaken to take care of syncing.
9168 it syncs the page and signals all waiters that the page is synced.
9169 PAGE::waiters is used to count these waiters, and a page may never
9170 become active again until waiters==0 (that is all waiters from the
9171 previous sync have noticed that the sync was completed)
9172
9173 note, that the page becomes "dirty" and has to be synced only when a
9174 new xid is added into it. Removing a xid from a page does not make it
9175 dirty - we don't sync xid removals to disk.
9176 */
9177
9178 ulong tc_log_page_waits= 0;
9179
9180 #ifdef HAVE_MMAP
9181
9182 #define TC_LOG_HEADER_SIZE (sizeof(tc_log_magic)+1)
9183
9184 static const uchar tc_log_magic[]={(uchar) 254, 0x23, 0x05, 0x74};
9185
9186 ulong opt_tc_log_size;
9187 ulong tc_log_max_pages_used=0, tc_log_page_size=0, tc_log_cur_pages_used=0;
9188
open(const char * opt_name)9189 int TC_LOG_MMAP::open(const char *opt_name)
9190 {
9191 uint i;
9192 bool crashed=FALSE;
9193 PAGE *pg;
9194
9195 DBUG_ASSERT(total_ha_2pc > 1);
9196 DBUG_ASSERT(opt_name && opt_name[0]);
9197
9198 tc_log_page_size= my_getpagesize();
9199
9200 fn_format(logname,opt_name,mysql_data_home,"",MY_UNPACK_FILENAME);
9201 if ((fd= mysql_file_open(key_file_tclog, logname, O_RDWR | O_CLOEXEC, MYF(0))) < 0)
9202 {
9203 if (my_errno != ENOENT)
9204 goto err;
9205 if (using_heuristic_recover())
9206 return 1;
9207 if ((fd= mysql_file_create(key_file_tclog, logname, CREATE_MODE,
9208 O_RDWR | O_CLOEXEC, MYF(MY_WME))) < 0)
9209 goto err;
9210 inited=1;
9211 file_length= opt_tc_log_size;
9212 if (mysql_file_chsize(fd, file_length, 0, MYF(MY_WME)))
9213 goto err;
9214 }
9215 else
9216 {
9217 inited= 1;
9218 crashed= TRUE;
9219 sql_print_information("Recovering after a crash using %s", opt_name);
9220 if (tc_heuristic_recover)
9221 {
9222 sql_print_error("Cannot perform automatic crash recovery when "
9223 "--tc-heuristic-recover is used");
9224 goto err;
9225 }
9226 file_length= mysql_file_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME+MY_FAE));
9227 if (file_length == MY_FILEPOS_ERROR || file_length % tc_log_page_size)
9228 goto err;
9229 }
9230
9231 data= (uchar *)my_mmap(0, (size_t)file_length, PROT_READ|PROT_WRITE,
9232 MAP_NOSYNC|MAP_SHARED, fd, 0);
9233 if (data == MAP_FAILED)
9234 {
9235 my_errno=errno;
9236 goto err;
9237 }
9238 inited=2;
9239
9240 npages=(uint)file_length/tc_log_page_size;
9241 if (npages < 3) // to guarantee non-empty pool
9242 goto err;
9243 if (!(pages=(PAGE *)my_malloc(npages*sizeof(PAGE), MYF(MY_WME|MY_ZEROFILL))))
9244 goto err;
9245 inited=3;
9246 for (pg=pages, i=0; i < npages; i++, pg++)
9247 {
9248 pg->next=pg+1;
9249 pg->waiters=0;
9250 pg->state=PS_POOL;
9251 mysql_mutex_init(key_PAGE_lock, &pg->lock, MY_MUTEX_INIT_FAST);
9252 mysql_cond_init(key_PAGE_cond, &pg->cond, 0);
9253 pg->ptr= pg->start=(my_xid *)(data + i*tc_log_page_size);
9254 pg->size=pg->free=tc_log_page_size/sizeof(my_xid);
9255 pg->end=pg->start + pg->size;
9256 }
9257 pages[0].size=pages[0].free=
9258 (tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid);
9259 pages[0].start=pages[0].end-pages[0].size;
9260 pages[npages-1].next=0;
9261 inited=4;
9262
9263 if (crashed && recover())
9264 goto err;
9265
9266 memcpy(data, tc_log_magic, sizeof(tc_log_magic));
9267 data[sizeof(tc_log_magic)]= (uchar)total_ha_2pc;
9268 my_msync(fd, data, tc_log_page_size, MS_SYNC);
9269 inited=5;
9270
9271 mysql_mutex_init(key_LOCK_sync, &LOCK_sync, MY_MUTEX_INIT_FAST);
9272 mysql_mutex_init(key_LOCK_active, &LOCK_active, MY_MUTEX_INIT_FAST);
9273 mysql_mutex_init(key_LOCK_pool, &LOCK_pool, MY_MUTEX_INIT_FAST);
9274 mysql_mutex_init(key_LOCK_pending_checkpoint, &LOCK_pending_checkpoint,
9275 MY_MUTEX_INIT_FAST);
9276 mysql_cond_init(key_COND_active, &COND_active, 0);
9277 mysql_cond_init(key_COND_pool, &COND_pool, 0);
9278 mysql_cond_init(key_TC_LOG_MMAP_COND_queue_busy, &COND_queue_busy, 0);
9279
9280 inited=6;
9281
9282 syncing= 0;
9283 active=pages;
9284 DBUG_ASSERT(npages >= 2);
9285 pool=pages+1;
9286 pool_last_ptr= &((pages+npages-1)->next);
9287 commit_ordered_queue= NULL;
9288 commit_ordered_queue_busy= false;
9289
9290 return 0;
9291
9292 err:
9293 close();
9294 return 1;
9295 }
9296
9297 /**
9298 there is no active page, let's got one from the pool.
9299
9300 Two strategies here:
9301 -# take the first from the pool
9302 -# if there're waiters - take the one with the most free space.
9303
9304 @todo
9305 page merging. try to allocate adjacent page first,
9306 so that they can be flushed both in one sync
9307 */
9308
get_active_from_pool()9309 void TC_LOG_MMAP::get_active_from_pool()
9310 {
9311 PAGE **p, **best_p=0;
9312 int best_free;
9313
9314 mysql_mutex_lock(&LOCK_pool);
9315
9316 do
9317 {
9318 best_p= p= &pool;
9319 if ((*p)->waiters == 0 && (*p)->free > 0) // can the first page be used ?
9320 break; // yes - take it.
9321
9322 best_free=0; // no - trying second strategy
9323 for (p=&(*p)->next; *p; p=&(*p)->next)
9324 {
9325 if ((*p)->waiters == 0 && (*p)->free > best_free)
9326 {
9327 best_free=(*p)->free;
9328 best_p=p;
9329 }
9330 }
9331 }
9332 while ((*best_p == 0 || best_free == 0) && overflow());
9333
9334 mysql_mutex_assert_owner(&LOCK_active);
9335 active=*best_p;
9336
9337 /* Unlink the page from the pool. */
9338 if (!(*best_p)->next)
9339 pool_last_ptr= best_p;
9340 *best_p=(*best_p)->next;
9341 mysql_mutex_unlock(&LOCK_pool);
9342
9343 mysql_mutex_lock(&active->lock);
9344 if (active->free == active->size) // we've chosen an empty page
9345 {
9346 tc_log_cur_pages_used++;
9347 set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used);
9348 }
9349 }
9350
9351 /**
9352 @todo
9353 perhaps, increase log size ?
9354 */
overflow()9355 int TC_LOG_MMAP::overflow()
9356 {
9357 /*
9358 simple overflow handling - just wait
9359 TODO perhaps, increase log size ?
9360 let's check the behaviour of tc_log_page_waits first
9361 */
9362 tc_log_page_waits++;
9363 mysql_cond_wait(&COND_pool, &LOCK_pool);
9364 return 1; // always return 1
9365 }
9366
9367 /**
9368 Record that transaction XID is committed on the persistent storage.
9369
9370 This function is called in the middle of two-phase commit:
9371 First all resources prepare the transaction, then tc_log->log() is called,
9372 then all resources commit the transaction, then tc_log->unlog() is called.
9373
9374 All access to active page is serialized but it's not a problem, as
9375 we're assuming that fsync() will be a main bottleneck.
9376 That is, parallelizing writes to log pages we'll decrease number of
9377 threads waiting for a page, but then all these threads will be waiting
9378 for a fsync() anyway
9379
9380 If tc_log == MYSQL_LOG then tc_log writes transaction to binlog and
9381 records XID in a special Xid_log_event.
9382 If tc_log = TC_LOG_MMAP then xid is written in a special memory-mapped
9383 log.
9384
9385 @retval
9386 0 - error
9387 @retval
9388 \# - otherwise, "cookie", a number that will be passed as an argument
9389 to unlog() call. tc_log can define it any way it wants,
9390 and use for whatever purposes. TC_LOG_MMAP sets it
9391 to the position in memory where xid was logged to.
9392 */
9393
log_one_transaction(my_xid xid)9394 int TC_LOG_MMAP::log_one_transaction(my_xid xid)
9395 {
9396 int err;
9397 PAGE *p;
9398 ulong cookie;
9399
9400 mysql_mutex_lock(&LOCK_active);
9401
9402 /*
9403 if the active page is full - just wait...
9404 frankly speaking, active->free here accessed outside of mutex
9405 protection, but it's safe, because it only means we may miss an
9406 unlog() for the active page, and we're not waiting for it here -
9407 unlog() does not signal COND_active.
9408 */
9409 while (unlikely(active && active->free == 0))
9410 mysql_cond_wait(&COND_active, &LOCK_active);
9411
9412 /* no active page ? take one from the pool */
9413 if (active == 0)
9414 get_active_from_pool();
9415 else
9416 mysql_mutex_lock(&active->lock);
9417
9418 p=active;
9419
9420 /*
9421 p->free is always > 0 here because to decrease it one needs
9422 to take p->lock and before it one needs to take LOCK_active.
9423 But checked that active->free > 0 under LOCK_active and
9424 haven't release it ever since
9425 */
9426
9427 /* searching for an empty slot */
9428 while (*p->ptr)
9429 {
9430 p->ptr++;
9431 DBUG_ASSERT(p->ptr < p->end); // because p->free > 0
9432 }
9433
9434 /* found! store xid there and mark the page dirty */
9435 cookie= (ulong)((uchar *)p->ptr - data); // can never be zero
9436 *p->ptr++= xid;
9437 p->free--;
9438 p->state= PS_DIRTY;
9439 mysql_mutex_unlock(&p->lock);
9440
9441 mysql_mutex_lock(&LOCK_sync);
9442 if (syncing)
9443 { // somebody's syncing. let's wait
9444 mysql_mutex_unlock(&LOCK_active);
9445 mysql_mutex_lock(&p->lock);
9446 p->waiters++;
9447 while (p->state == PS_DIRTY && syncing)
9448 {
9449 mysql_mutex_unlock(&p->lock);
9450 mysql_cond_wait(&p->cond, &LOCK_sync);
9451 mysql_mutex_lock(&p->lock);
9452 }
9453 p->waiters--;
9454 err= p->state == PS_ERROR;
9455 if (p->state != PS_DIRTY) // page was synced
9456 {
9457 mysql_mutex_unlock(&LOCK_sync);
9458 if (p->waiters == 0)
9459 mysql_cond_signal(&COND_pool); // in case somebody's waiting
9460 mysql_mutex_unlock(&p->lock);
9461 goto done; // we're done
9462 }
9463 DBUG_ASSERT(!syncing);
9464 mysql_mutex_unlock(&p->lock);
9465 syncing = p;
9466 mysql_mutex_unlock(&LOCK_sync);
9467
9468 mysql_mutex_lock(&LOCK_active);
9469 active=0; // page is not active anymore
9470 mysql_cond_broadcast(&COND_active);
9471 mysql_mutex_unlock(&LOCK_active);
9472 }
9473 else
9474 {
9475 syncing = p; // place is vacant - take it
9476 mysql_mutex_unlock(&LOCK_sync);
9477 active = 0; // page is not active anymore
9478 mysql_cond_broadcast(&COND_active);
9479 mysql_mutex_unlock(&LOCK_active);
9480 }
9481 err= sync();
9482
9483 done:
9484 return err ? 0 : cookie;
9485 }
9486
sync()9487 int TC_LOG_MMAP::sync()
9488 {
9489 int err;
9490
9491 DBUG_ASSERT(syncing != active);
9492
9493 /*
9494 sit down and relax - this can take a while...
9495 note - no locks are held at this point
9496 */
9497 err= my_msync(fd, syncing->start, syncing->size * sizeof(my_xid), MS_SYNC);
9498
9499 /* page is synced. let's move it to the pool */
9500 mysql_mutex_lock(&LOCK_pool);
9501 (*pool_last_ptr)=syncing;
9502 pool_last_ptr=&(syncing->next);
9503 syncing->next=0;
9504 syncing->state= err ? PS_ERROR : PS_POOL;
9505 mysql_cond_signal(&COND_pool); // in case somebody's waiting
9506 mysql_mutex_unlock(&LOCK_pool);
9507
9508 /* marking 'syncing' slot free */
9509 mysql_mutex_lock(&LOCK_sync);
9510 mysql_cond_broadcast(&syncing->cond); // signal "sync done"
9511 syncing=0;
9512 /*
9513 we check the "active" pointer without LOCK_active. Still, it's safe -
9514 "active" can change from NULL to not NULL any time, but it
9515 will take LOCK_sync before waiting on active->cond. That is, it can never
9516 miss a signal.
9517 And "active" can change to NULL only by the syncing thread
9518 (the thread that will send a signal below)
9519 */
9520 if (active)
9521 mysql_cond_signal(&active->cond); // wake up a new syncer
9522 mysql_mutex_unlock(&LOCK_sync);
9523 return err;
9524 }
9525
9526 static void
mmap_do_checkpoint_callback(void * data)9527 mmap_do_checkpoint_callback(void *data)
9528 {
9529 TC_LOG_MMAP::pending_cookies *pending=
9530 static_cast<TC_LOG_MMAP::pending_cookies *>(data);
9531 ++pending->pending_count;
9532 }
9533
unlog(ulong cookie,my_xid xid)9534 int TC_LOG_MMAP::unlog(ulong cookie, my_xid xid)
9535 {
9536 pending_cookies *full_buffer= NULL;
9537 uint32 ncookies= tc_log_page_size / sizeof(my_xid);
9538 DBUG_ASSERT(*(my_xid *)(data+cookie) == xid);
9539
9540 /*
9541 Do not delete the entry immediately, as there may be participating storage
9542 engines which implement commit_checkpoint_request(), and thus have not yet
9543 flushed the commit durably to disk.
9544
9545 Instead put it in a queue - and periodically, we will request a checkpoint
9546 from all engines and delete a whole batch at once.
9547 */
9548 mysql_mutex_lock(&LOCK_pending_checkpoint);
9549 if (pending_checkpoint == NULL)
9550 {
9551 uint32 size= sizeof(*pending_checkpoint) + sizeof(ulong) * (ncookies - 1);
9552 if (!(pending_checkpoint=
9553 (pending_cookies *)my_malloc(size, MYF(MY_ZEROFILL))))
9554 {
9555 my_error(ER_OUTOFMEMORY, MYF(0), size);
9556 mysql_mutex_unlock(&LOCK_pending_checkpoint);
9557 return 1;
9558 }
9559 }
9560
9561 pending_checkpoint->cookies[pending_checkpoint->count++]= cookie;
9562 if (pending_checkpoint->count == ncookies)
9563 {
9564 full_buffer= pending_checkpoint;
9565 pending_checkpoint= NULL;
9566 }
9567 mysql_mutex_unlock(&LOCK_pending_checkpoint);
9568
9569 if (full_buffer)
9570 {
9571 /*
9572 We do an extra increment and notify here - this ensures that
9573 things work also if there are no engines at all that support
9574 commit_checkpoint_request.
9575 */
9576 ++full_buffer->pending_count;
9577 ha_commit_checkpoint_request(full_buffer, mmap_do_checkpoint_callback);
9578 commit_checkpoint_notify(full_buffer);
9579 }
9580 return 0;
9581 }
9582
9583
9584 void
commit_checkpoint_notify(void * cookie)9585 TC_LOG_MMAP::commit_checkpoint_notify(void *cookie)
9586 {
9587 uint count;
9588 pending_cookies *pending= static_cast<pending_cookies *>(cookie);
9589 mysql_mutex_lock(&LOCK_pending_checkpoint);
9590 DBUG_ASSERT(pending->pending_count > 0);
9591 count= --pending->pending_count;
9592 mysql_mutex_unlock(&LOCK_pending_checkpoint);
9593 if (count == 0)
9594 {
9595 uint i;
9596 for (i= 0; i < tc_log_page_size / sizeof(my_xid); ++i)
9597 delete_entry(pending->cookies[i]);
9598 my_free(pending);
9599 }
9600 }
9601
9602
9603 /**
9604 erase xid from the page, update page free space counters/pointers.
9605 cookie points directly to the memory where xid was logged.
9606 */
9607
delete_entry(ulong cookie)9608 int TC_LOG_MMAP::delete_entry(ulong cookie)
9609 {
9610 PAGE *p=pages+(cookie/tc_log_page_size);
9611 my_xid *x=(my_xid *)(data+cookie);
9612
9613 DBUG_ASSERT(x >= p->start && x < p->end);
9614
9615 mysql_mutex_lock(&p->lock);
9616 *x=0;
9617 p->free++;
9618 DBUG_ASSERT(p->free <= p->size);
9619 set_if_smaller(p->ptr, x);
9620 if (p->free == p->size) // the page is completely empty
9621 statistic_decrement(tc_log_cur_pages_used, &LOCK_status);
9622 if (p->waiters == 0) // the page is in pool and ready to rock
9623 mysql_cond_signal(&COND_pool); // ping ... for overflow()
9624 mysql_mutex_unlock(&p->lock);
9625 return 0;
9626 }
9627
close()9628 void TC_LOG_MMAP::close()
9629 {
9630 uint i;
9631 switch (inited) {
9632 case 6:
9633 mysql_mutex_destroy(&LOCK_sync);
9634 mysql_mutex_destroy(&LOCK_active);
9635 mysql_mutex_destroy(&LOCK_pool);
9636 mysql_mutex_destroy(&LOCK_pending_checkpoint);
9637 mysql_cond_destroy(&COND_pool);
9638 mysql_cond_destroy(&COND_active);
9639 mysql_cond_destroy(&COND_queue_busy);
9640 /* fall through */
9641 case 5:
9642 data[0]='A'; // garble the first (signature) byte, in case mysql_file_delete fails
9643 /* fall through */
9644 case 4:
9645 for (i=0; i < npages; i++)
9646 {
9647 if (pages[i].ptr == 0)
9648 break;
9649 mysql_mutex_destroy(&pages[i].lock);
9650 mysql_cond_destroy(&pages[i].cond);
9651 }
9652 /* fall through */
9653 case 3:
9654 my_free(pages);
9655 /* fall through */
9656 case 2:
9657 my_munmap((char*)data, (size_t)file_length);
9658 /* fall through */
9659 case 1:
9660 mysql_file_close(fd, MYF(0));
9661 }
9662 if (inited>=5) // cannot do in the switch because of Windows
9663 mysql_file_delete(key_file_tclog, logname, MYF(MY_WME));
9664 if (pending_checkpoint)
9665 my_free(pending_checkpoint);
9666 inited=0;
9667 }
9668
9669
recover()9670 int TC_LOG_MMAP::recover()
9671 {
9672 HASH xids;
9673 PAGE *p=pages, *end_p=pages+npages;
9674
9675 if (bcmp(data, tc_log_magic, sizeof(tc_log_magic)))
9676 {
9677 sql_print_error("Bad magic header in tc log");
9678 goto err1;
9679 }
9680
9681 /*
9682 the first byte after magic signature is set to current
9683 number of storage engines on startup
9684 */
9685 if (data[sizeof(tc_log_magic)] > total_ha_2pc)
9686 {
9687 sql_print_error("Recovery failed! You must enable "
9688 "all engines that were enabled at the moment of the crash");
9689 goto err1;
9690 }
9691
9692 if (my_hash_init(&xids, &my_charset_bin, tc_log_page_size/3, 0,
9693 sizeof(my_xid), 0, 0, MYF(0)))
9694 goto err1;
9695
9696 for ( ; p < end_p ; p++)
9697 {
9698 for (my_xid *x=p->start; x < p->end; x++)
9699 if (*x && my_hash_insert(&xids, (uchar *)x))
9700 goto err2; // OOM
9701 }
9702
9703 if (ha_recover(&xids))
9704 goto err2;
9705
9706 my_hash_free(&xids);
9707 bzero(data, (size_t)file_length);
9708 return 0;
9709
9710 err2:
9711 my_hash_free(&xids);
9712 err1:
9713 sql_print_error("Crash recovery failed. Either correct the problem "
9714 "(if it's, for example, out of memory error) and restart, "
9715 "or delete tc log and start mysqld with "
9716 "--tc-heuristic-recover={commit|rollback}");
9717 return 1;
9718 }
9719 #endif
9720
9721 TC_LOG *tc_log;
9722 TC_LOG_DUMMY tc_log_dummy;
9723 TC_LOG_MMAP tc_log_mmap;
9724
9725 /**
9726 Perform heuristic recovery, if --tc-heuristic-recover was used.
9727
9728 @note
9729 no matter whether heuristic recovery was successful or not
9730 mysqld must exit. So, return value is the same in both cases.
9731
9732 @retval
9733 0 no heuristic recovery was requested
9734 @retval
9735 1 heuristic recovery was performed
9736 */
9737
using_heuristic_recover()9738 int TC_LOG::using_heuristic_recover()
9739 {
9740 if (!tc_heuristic_recover)
9741 return 0;
9742
9743 sql_print_information("Heuristic crash recovery mode");
9744 if (ha_recover(0))
9745 sql_print_error("Heuristic crash recovery failed");
9746 sql_print_information("Please restart mysqld without --tc-heuristic-recover");
9747 return 1;
9748 }
9749
9750 /****** transaction coordinator log for 2pc - binlog() based solution ******/
9751 #define TC_LOG_BINLOG MYSQL_BIN_LOG
9752
open(const char * opt_name)9753 int TC_LOG_BINLOG::open(const char *opt_name)
9754 {
9755 int error= 1;
9756
9757 DBUG_ASSERT(total_ha_2pc > 1);
9758 DBUG_ASSERT(opt_name && opt_name[0]);
9759
9760 if (!my_b_inited(&index_file))
9761 {
9762 /* There was a failure to open the index file, can't open the binlog */
9763 cleanup();
9764 return 1;
9765 }
9766
9767 if (using_heuristic_recover())
9768 {
9769 mysql_mutex_lock(&LOCK_log);
9770 /* generate a new binlog to mask a corrupted one */
9771 open(opt_name, LOG_BIN, 0, 0, WRITE_CACHE, max_binlog_size, 0, TRUE);
9772 mysql_mutex_unlock(&LOCK_log);
9773 cleanup();
9774 return 1;
9775 }
9776
9777 error= do_binlog_recovery(opt_name, true);
9778 binlog_state_recover_done= true;
9779 return error;
9780 }
9781
9782 /** This is called on shutdown, after ha_panic. */
close()9783 void TC_LOG_BINLOG::close()
9784 {
9785 }
9786
9787 /*
9788 Do a binlog log_xid() for a group of transactions, linked through
9789 thd->next_commit_ordered.
9790 */
9791 int
log_and_order(THD * thd,my_xid xid,bool all,bool need_prepare_ordered,bool need_commit_ordered)9792 TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all,
9793 bool need_prepare_ordered __attribute__((unused)),
9794 bool need_commit_ordered __attribute__((unused)))
9795 {
9796 int err;
9797 DBUG_ENTER("TC_LOG_BINLOG::log_and_order");
9798
9799 binlog_cache_mngr *cache_mngr= thd->binlog_setup_trx_data();
9800 if (!cache_mngr)
9801 {
9802 WSREP_DEBUG("Skipping empty log_xid: %s", thd->query());
9803 DBUG_RETURN(0);
9804 }
9805
9806 cache_mngr->using_xa= TRUE;
9807 cache_mngr->xa_xid= xid;
9808 err= binlog_commit_flush_xid_caches(thd, cache_mngr, all, xid);
9809
9810 DEBUG_SYNC(thd, "binlog_after_log_and_order");
9811
9812 if (err)
9813 DBUG_RETURN(0);
9814
9815 bool need_unlog= cache_mngr->need_unlog;
9816 /*
9817 The transaction won't need the flag anymore.
9818 Todo/fixme: consider to move the statement into cache_mngr->reset()
9819 relocated to the current or later point.
9820 */
9821 cache_mngr->need_unlog= false;
9822 /*
9823 If using explicit user XA, we will not have XID. We must still return a
9824 non-zero cookie (as zero cookie signals error).
9825 */
9826 if (!xid || !need_unlog)
9827 DBUG_RETURN(BINLOG_COOKIE_DUMMY(cache_mngr->delayed_error));
9828
9829 DBUG_RETURN(BINLOG_COOKIE_MAKE(cache_mngr->binlog_id,
9830 cache_mngr->delayed_error));
9831 }
9832
9833 /*
9834 After an XID is logged, we need to hold on to the current binlog file until
9835 it is fully committed in the storage engine. The reason is that crash
9836 recovery only looks at the latest binlog, so we must make sure there are no
9837 outstanding prepared (but not committed) transactions before rotating the
9838 binlog.
9839
9840 To handle this, we keep a count of outstanding XIDs. This function is used
9841 to increase this count when committing one or more transactions to the
9842 binary log.
9843 */
9844 void
mark_xids_active(ulong binlog_id,uint xid_count)9845 TC_LOG_BINLOG::mark_xids_active(ulong binlog_id, uint xid_count)
9846 {
9847 xid_count_per_binlog *b;
9848
9849 DBUG_ENTER("TC_LOG_BINLOG::mark_xids_active");
9850 DBUG_PRINT("info", ("binlog_id=%lu xid_count=%u", binlog_id, xid_count));
9851
9852 mysql_mutex_lock(&LOCK_xid_list);
9853 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
9854 while ((b= it++))
9855 {
9856 if (b->binlog_id == binlog_id)
9857 {
9858 b->xid_count += xid_count;
9859 break;
9860 }
9861 }
9862 /*
9863 As we do not delete elements until count reach zero, elements should always
9864 be found.
9865 */
9866 DBUG_ASSERT(b);
9867 mysql_mutex_unlock(&LOCK_xid_list);
9868 DBUG_VOID_RETURN;
9869 }
9870
9871 /*
9872 Once an XID is committed, it can no longer be needed during crash recovery,
9873 as it has been durably recorded on disk as "committed".
9874
9875 This function is called to mark an XID this way. It needs to decrease the
9876 count of pending XIDs in the corresponding binlog. When the count reaches
9877 zero (for an "old" binlog that is not the active one), that binlog file no
9878 longer need to be scanned during crash recovery, so we can log a new binlog
9879 checkpoint.
9880 */
9881 void
mark_xid_done(ulong binlog_id,bool write_checkpoint)9882 TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint)
9883 {
9884 xid_count_per_binlog *b;
9885 bool first;
9886 ulong current;
9887
9888 DBUG_ENTER("TC_LOG_BINLOG::mark_xid_done");
9889
9890 mysql_mutex_lock(&LOCK_xid_list);
9891 current= current_binlog_id;
9892 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
9893 first= true;
9894 while ((b= it++))
9895 {
9896 if (b->binlog_id == binlog_id)
9897 {
9898 --b->xid_count;
9899
9900 DBUG_ASSERT(b->xid_count >= 0); // catch unmatched (++) decrement
9901
9902 break;
9903 }
9904 first= false;
9905 }
9906 /* Binlog is always found, as we do not remove until count reaches 0 */
9907 DBUG_ASSERT(b);
9908 /*
9909 If a RESET MASTER is pending, we are about to remove all log files, and
9910 the RESET MASTER thread is waiting for all pending unlog() calls to
9911 complete while holding LOCK_log. In this case we should not log a binlog
9912 checkpoint event (it would be deleted immediately anyway and we would
9913 deadlock on LOCK_log) but just signal the thread.
9914 */
9915 if (unlikely(reset_master_pending))
9916 {
9917 mysql_cond_broadcast(&COND_xid_list);
9918 mysql_mutex_unlock(&LOCK_xid_list);
9919 DBUG_VOID_RETURN;
9920 }
9921
9922 if (likely(binlog_id == current) || b->xid_count != 0 || !first ||
9923 !write_checkpoint)
9924 {
9925 /* No new binlog checkpoint reached yet. */
9926 mysql_mutex_unlock(&LOCK_xid_list);
9927 DBUG_VOID_RETURN;
9928 }
9929
9930 /*
9931 Now log a binlog checkpoint for the first binlog file with a non-zero count.
9932
9933 Note that it is possible (though perhaps unlikely) that when count of
9934 binlog (N-2) drops to zero, binlog (N-1) is already at zero. So we may
9935 need to skip several entries before we find the one to log in the binlog
9936 checkpoint event.
9937
9938 We chain the locking of LOCK_xid_list and LOCK_log, so that we ensure that
9939 Binlog_checkpoint_events are logged in order. This simplifies recovery a
9940 bit, as it can just take the last binlog checkpoint in the log, rather
9941 than compare all found against each other to find the one pointing to the
9942 most recent binlog.
9943
9944 Note also that we need to first release LOCK_xid_list, then acquire
9945 LOCK_log, then re-aquire LOCK_xid_list. If we were to take LOCK_log while
9946 holding LOCK_xid_list, we might deadlock with other threads that take the
9947 locks in the opposite order.
9948 */
9949
9950 ++mark_xid_done_waiting;
9951 mysql_mutex_unlock(&LOCK_xid_list);
9952 mysql_mutex_lock(&LOCK_log);
9953 mysql_mutex_lock(&LOCK_xid_list);
9954 --mark_xid_done_waiting;
9955 mysql_cond_broadcast(&COND_xid_list);
9956 /* We need to reload current_binlog_id due to release/re-take of lock. */
9957 current= current_binlog_id;
9958
9959 for (;;)
9960 {
9961 /* Remove initial element(s) with zero count. */
9962 b= binlog_xid_count_list.head();
9963 /*
9964 We must not remove all elements in the list - the entry for the current
9965 binlog must be present always.
9966 */
9967 DBUG_ASSERT(b);
9968 if (b->binlog_id == current || b->xid_count > 0)
9969 break;
9970 WSREP_XID_LIST_ENTRY("TC_LOG_BINLOG::mark_xid_done(): Removing "
9971 "xid_list_entry for %s (%lu)", b);
9972 delete binlog_xid_count_list.get();
9973 }
9974
9975 mysql_mutex_unlock(&LOCK_xid_list);
9976 write_binlog_checkpoint_event_already_locked(b->binlog_name,
9977 b->binlog_name_len);
9978 mysql_mutex_unlock(&LOCK_log);
9979 DBUG_VOID_RETURN;
9980 }
9981
unlog(ulong cookie,my_xid xid)9982 int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
9983 {
9984 DBUG_ENTER("TC_LOG_BINLOG::unlog");
9985 if (!xid)
9986 DBUG_RETURN(0);
9987
9988 if (!BINLOG_COOKIE_IS_DUMMY(cookie))
9989 mark_xid_done(BINLOG_COOKIE_GET_ID(cookie), true);
9990 /*
9991 See comment in trx_group_commit_leader() - if rotate() gave a failure,
9992 we delay the return of error code to here.
9993 */
9994 DBUG_RETURN(BINLOG_COOKIE_GET_ERROR_FLAG(cookie));
9995 }
9996
9997 void
commit_checkpoint_notify(void * cookie)9998 TC_LOG_BINLOG::commit_checkpoint_notify(void *cookie)
9999 {
10000 xid_count_per_binlog *entry= static_cast<xid_count_per_binlog *>(cookie);
10001 bool found_entry= false;
10002 mysql_mutex_lock(&LOCK_binlog_background_thread);
10003 /* count the same notification kind from different engines */
10004 for (xid_count_per_binlog *link= binlog_background_thread_queue;
10005 link && !found_entry; link= link->next_in_queue)
10006 {
10007 if ((found_entry= (entry == link)))
10008 entry->notify_count++;
10009 }
10010 if (!found_entry)
10011 {
10012 entry->next_in_queue= binlog_background_thread_queue;
10013 binlog_background_thread_queue= entry;
10014 }
10015 mysql_cond_signal(&COND_binlog_background_thread);
10016 mysql_mutex_unlock(&LOCK_binlog_background_thread);
10017 }
10018
10019 /*
10020 Binlog background thread.
10021
10022 This thread is used to log binlog checkpoints in the background, rather than
10023 in the context of random storage engine threads that happen to call
10024 commit_checkpoint_notify_ha() and may not like the delays while syncing
10025 binlog to disk or may not be setup with all my_thread_init() and other
10026 necessary stuff.
10027
10028 In the future, this thread could also be used to do log rotation in the
10029 background, which could eliminate all stalls around binlog rotations.
10030 */
10031 pthread_handler_t
binlog_background_thread(void * arg)10032 binlog_background_thread(void *arg __attribute__((unused)))
10033 {
10034 bool stop;
10035 MYSQL_BIN_LOG::xid_count_per_binlog *queue, *next;
10036 THD *thd;
10037 my_thread_init();
10038 DBUG_ENTER("binlog_background_thread");
10039
10040 thd= new THD(next_thread_id());
10041 thd->system_thread= SYSTEM_THREAD_BINLOG_BACKGROUND;
10042 thd->thread_stack= (char*) &thd; /* Set approximate stack start */
10043 thd->store_globals();
10044 thd->security_ctx->skip_grants();
10045 thd->set_command(COM_DAEMON);
10046
10047 /*
10048 Load the slave replication GTID state from the mysql.gtid_slave_pos
10049 table.
10050
10051 This is mostly so that we can start our seq_no counter from the highest
10052 seq_no seen by a slave. This way, we have a way to tell if a transaction
10053 logged by ourselves as master is newer or older than a replicated
10054 transaction.
10055 */
10056 #ifdef HAVE_REPLICATION
10057 if (rpl_load_gtid_slave_state(thd))
10058 sql_print_warning("Failed to load slave replication state from table "
10059 "%s.%s: %u: %s", "mysql",
10060 rpl_gtid_slave_state_table_name.str,
10061 thd->get_stmt_da()->sql_errno(),
10062 thd->get_stmt_da()->message());
10063 #endif
10064
10065 mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
10066 binlog_background_thread_started= true;
10067 mysql_cond_signal(&mysql_bin_log.COND_binlog_background_thread_end);
10068 mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
10069
10070 for (;;)
10071 {
10072 /*
10073 Wait until there is something in the queue to process, or we are asked
10074 to shut down.
10075 */
10076 THD_STAGE_INFO(thd, stage_binlog_waiting_background_tasks);
10077 mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
10078 for (;;)
10079 {
10080 stop= binlog_background_thread_stop;
10081 queue= binlog_background_thread_queue;
10082 if (stop && !mysql_bin_log.is_xidlist_idle())
10083 {
10084 /*
10085 Delay stop until all pending binlog checkpoints have been processed.
10086 */
10087 stop= false;
10088 }
10089 if (stop || queue)
10090 break;
10091 mysql_cond_wait(&mysql_bin_log.COND_binlog_background_thread,
10092 &mysql_bin_log.LOCK_binlog_background_thread);
10093 }
10094 /* Grab the queue, if any. */
10095 binlog_background_thread_queue= NULL;
10096 mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
10097
10098 /* Process any incoming commit_checkpoint_notify() calls. */
10099 DBUG_EXECUTE_IF("inject_binlog_background_thread_before_mark_xid_done",
10100 DBUG_ASSERT(!debug_sync_set_action(
10101 thd,
10102 STRING_WITH_LEN("binlog_background_thread_before_mark_xid_done "
10103 "SIGNAL injected_binlog_background_thread "
10104 "WAIT_FOR something_that_will_never_happen "
10105 "TIMEOUT 2")));
10106 );
10107 while (queue)
10108 {
10109 long count= queue->notify_count;
10110 THD_STAGE_INFO(thd, stage_binlog_processing_checkpoint_notify);
10111 DEBUG_SYNC(thd, "binlog_background_thread_before_mark_xid_done");
10112 /* Set the thread start time */
10113 thd->set_time();
10114 /* Grab next pointer first, as mark_xid_done() may free the element. */
10115 next= queue->next_in_queue;
10116 queue->notify_count= 0;
10117 for (long i= 0; i <= count; i++)
10118 mysql_bin_log.mark_xid_done(queue->binlog_id, true);
10119 queue= next;
10120
10121 DBUG_EXECUTE_IF("binlog_background_checkpoint_processed",
10122 DBUG_ASSERT(!debug_sync_set_action(
10123 thd,
10124 STRING_WITH_LEN("now SIGNAL binlog_background_checkpoint_processed")));
10125 );
10126 }
10127
10128 if (stop)
10129 break;
10130 }
10131
10132 THD_STAGE_INFO(thd, stage_binlog_stopping_background_thread);
10133
10134 /* No need to use mutex as thd is not linked into other threads */
10135 delete thd;
10136
10137 my_thread_end();
10138
10139 /* Signal that we are (almost) stopped. */
10140 mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
10141 binlog_background_thread_stop= false;
10142 mysql_cond_signal(&mysql_bin_log.COND_binlog_background_thread_end);
10143 mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
10144
10145 DBUG_RETURN(0);
10146 }
10147
10148 #ifdef HAVE_PSI_INTERFACE
10149 static PSI_thread_key key_thread_binlog;
10150
10151 static PSI_thread_info all_binlog_threads[]=
10152 {
10153 { &key_thread_binlog, "binlog_background", PSI_FLAG_GLOBAL},
10154 };
10155 #endif /* HAVE_PSI_INTERFACE */
10156
10157 static bool
start_binlog_background_thread()10158 start_binlog_background_thread()
10159 {
10160 pthread_t th;
10161
10162 #ifdef HAVE_PSI_INTERFACE
10163 if (PSI_server)
10164 PSI_server->register_thread("sql", all_binlog_threads,
10165 array_elements(all_binlog_threads));
10166 #endif
10167
10168 if (mysql_thread_create(key_thread_binlog, &th, &connection_attrib,
10169 binlog_background_thread, NULL))
10170 return 1;
10171
10172 /*
10173 Wait for the thread to have started (so we know that the slave replication
10174 state is loaded and we have correct global_gtid_counter).
10175 */
10176 mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
10177 while (!binlog_background_thread_started)
10178 mysql_cond_wait(&mysql_bin_log.COND_binlog_background_thread_end,
10179 &mysql_bin_log.LOCK_binlog_background_thread);
10180 mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
10181
10182 return 0;
10183 }
10184
10185
recover(LOG_INFO * linfo,const char * last_log_name,IO_CACHE * first_log,Format_description_log_event * fdle,bool do_xa)10186 int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
10187 IO_CACHE *first_log,
10188 Format_description_log_event *fdle, bool do_xa)
10189 {
10190 Log_event *ev= NULL;
10191 HASH xids;
10192 MEM_ROOT mem_root;
10193 char binlog_checkpoint_name[FN_REFLEN];
10194 bool binlog_checkpoint_found;
10195 bool first_round;
10196 IO_CACHE log;
10197 File file= -1;
10198 const char *errmsg;
10199 #ifdef HAVE_REPLICATION
10200 rpl_gtid last_gtid;
10201 bool last_gtid_standalone= false;
10202 bool last_gtid_valid= false;
10203 #endif
10204
10205 if (! fdle->is_valid() ||
10206 (do_xa && my_hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
10207 sizeof(my_xid), 0, 0, MYF(0))))
10208 goto err1;
10209
10210 if (do_xa)
10211 init_alloc_root(&mem_root, "TC_LOG_BINLOG", TC_LOG_PAGE_SIZE,
10212 TC_LOG_PAGE_SIZE, MYF(0));
10213
10214 fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
10215
10216 /*
10217 Scan the binlog for XIDs that need to be committed if still in the
10218 prepared stage.
10219
10220 Start with the latest binlog file, then continue with any other binlog
10221 files if the last found binlog checkpoint indicates it is needed.
10222 */
10223
10224 binlog_checkpoint_found= false;
10225 first_round= true;
10226 for (;;)
10227 {
10228 while ((ev= Log_event::read_log_event(first_round ? first_log : &log,
10229 fdle, opt_master_verify_checksum))
10230 && ev->is_valid())
10231 {
10232 enum Log_event_type typ= ev->get_type_code();
10233 switch (typ)
10234 {
10235 case XID_EVENT:
10236 {
10237 if (do_xa)
10238 {
10239 Xid_log_event *xev=(Xid_log_event *)ev;
10240 uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
10241 sizeof(xev->xid));
10242 if (!x || my_hash_insert(&xids, x))
10243 goto err2;
10244 }
10245 break;
10246 }
10247 case BINLOG_CHECKPOINT_EVENT:
10248 if (first_round && do_xa)
10249 {
10250 size_t dir_len;
10251 Binlog_checkpoint_log_event *cev= (Binlog_checkpoint_log_event *)ev;
10252 if (cev->binlog_file_len >= FN_REFLEN)
10253 sql_print_warning("Incorrect binlog checkpoint event with too "
10254 "long file name found.");
10255 else
10256 {
10257 /*
10258 Note that we cannot use make_log_name() here, as we have not yet
10259 initialised MYSQL_BIN_LOG::log_file_name.
10260 */
10261 dir_len= dirname_length(last_log_name);
10262 strmake(strnmov(binlog_checkpoint_name, last_log_name, dir_len),
10263 cev->binlog_file_name, FN_REFLEN - 1 - dir_len);
10264 binlog_checkpoint_found= true;
10265 }
10266 }
10267 break;
10268 case GTID_LIST_EVENT:
10269 if (first_round)
10270 {
10271 Gtid_list_log_event *glev= (Gtid_list_log_event *)ev;
10272
10273 /* Initialise the binlog state from the Gtid_list event. */
10274 if (rpl_global_gtid_binlog_state.load(glev->list, glev->count))
10275 goto err2;
10276 }
10277 break;
10278
10279 #ifdef HAVE_REPLICATION
10280 case GTID_EVENT:
10281 if (first_round)
10282 {
10283 Gtid_log_event *gev= (Gtid_log_event *)ev;
10284
10285 /* Update the binlog state with any GTID logged after Gtid_list. */
10286 last_gtid.domain_id= gev->domain_id;
10287 last_gtid.server_id= gev->server_id;
10288 last_gtid.seq_no= gev->seq_no;
10289 last_gtid_standalone=
10290 ((gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false);
10291 last_gtid_valid= true;
10292 }
10293 break;
10294 #endif
10295
10296 case START_ENCRYPTION_EVENT:
10297 {
10298 if (fdle->start_decryption((Start_encryption_log_event*) ev))
10299 goto err2;
10300 }
10301 break;
10302
10303 default:
10304 /* Nothing. */
10305 break;
10306 }
10307
10308 #ifdef HAVE_REPLICATION
10309 if (last_gtid_valid &&
10310 ((last_gtid_standalone && !ev->is_part_of_group(typ)) ||
10311 (!last_gtid_standalone &&
10312 (typ == XID_EVENT ||
10313 (LOG_EVENT_IS_QUERY(typ) &&
10314 (((Query_log_event *)ev)->is_commit() ||
10315 ((Query_log_event *)ev)->is_rollback()))))))
10316 {
10317 if (rpl_global_gtid_binlog_state.update_nolock(&last_gtid, false))
10318 goto err2;
10319 last_gtid_valid= false;
10320 }
10321 #endif
10322
10323 delete ev;
10324 ev= NULL;
10325 }
10326
10327 if (!do_xa)
10328 break;
10329 /*
10330 If the last binlog checkpoint event points to an older log, we have to
10331 scan all logs from there also, to get all possible XIDs to recover.
10332
10333 If there was no binlog checkpoint event at all, this means the log was
10334 written by an older version of MariaDB (or MySQL) - these always have an
10335 (implicit) binlog checkpoint event at the start of the last binlog file.
10336 */
10337 if (first_round)
10338 {
10339 if (!binlog_checkpoint_found)
10340 break;
10341 first_round= false;
10342 DBUG_EXECUTE_IF("xa_recover_expect_master_bin_000004",
10343 if (0 != strcmp("./master-bin.000004", binlog_checkpoint_name) &&
10344 0 != strcmp(".\\master-bin.000004", binlog_checkpoint_name))
10345 DBUG_SUICIDE();
10346 );
10347 if (find_log_pos(linfo, binlog_checkpoint_name, 1))
10348 {
10349 sql_print_error("Binlog file '%s' not found in binlog index, needed "
10350 "for recovery. Aborting.", binlog_checkpoint_name);
10351 goto err2;
10352 }
10353 }
10354 else
10355 {
10356 end_io_cache(&log);
10357 mysql_file_close(file, MYF(MY_WME));
10358 file= -1;
10359 }
10360
10361 if (!strcmp(linfo->log_file_name, last_log_name))
10362 break; // No more files to do
10363 if ((file= open_binlog(&log, linfo->log_file_name, &errmsg)) < 0)
10364 {
10365 sql_print_error("%s", errmsg);
10366 goto err2;
10367 }
10368 /*
10369 We do not need to read the Format_description_log_event of other binlog
10370 files. It is not possible for a binlog checkpoint to span multiple
10371 binlog files written by different versions of the server. So we can use
10372 the first one read for reading from all binlog files.
10373 */
10374 if (find_next_log(linfo, 1))
10375 {
10376 sql_print_error("Error reading binlog files during recovery. Aborting.");
10377 goto err2;
10378 }
10379 fdle->reset_crypto();
10380 }
10381
10382 if (do_xa)
10383 {
10384 if (ha_recover(&xids))
10385 goto err2;
10386
10387 free_root(&mem_root, MYF(0));
10388 my_hash_free(&xids);
10389 }
10390 return 0;
10391
10392 err2:
10393 delete ev;
10394 if (file >= 0)
10395 {
10396 end_io_cache(&log);
10397 mysql_file_close(file, MYF(MY_WME));
10398 }
10399 if (do_xa)
10400 {
10401 free_root(&mem_root, MYF(0));
10402 my_hash_free(&xids);
10403 }
10404 err1:
10405 sql_print_error("Crash recovery failed. Either correct the problem "
10406 "(if it's, for example, out of memory error) and restart, "
10407 "or delete (or rename) binary log and start mysqld with "
10408 "--tc-heuristic-recover={commit|rollback}");
10409 return 1;
10410 }
10411
10412
10413 int
do_binlog_recovery(const char * opt_name,bool do_xa_recovery)10414 MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery)
10415 {
10416 LOG_INFO log_info;
10417 const char *errmsg;
10418 IO_CACHE log;
10419 File file;
10420 Log_event *ev= 0;
10421 Format_description_log_event fdle(BINLOG_VERSION);
10422 char log_name[FN_REFLEN];
10423 int error;
10424
10425 if (unlikely((error= find_log_pos(&log_info, NullS, 1))))
10426 {
10427 /*
10428 If there are no binlog files (LOG_INFO_EOF), then we still try to read
10429 the .state file to restore the binlog state. This allows to copy a server
10430 to provision a new one without copying the binlog files (except the
10431 master-bin.state file) and still preserve the correct binlog state.
10432 */
10433 if (error != LOG_INFO_EOF)
10434 sql_print_error("find_log_pos() failed (error: %d)", error);
10435 else
10436 {
10437 error= read_state_from_file();
10438 if (error == 2)
10439 {
10440 /*
10441 No binlog files and no binlog state is not an error (eg. just initial
10442 server start after fresh installation).
10443 */
10444 error= 0;
10445 }
10446 }
10447 return error;
10448 }
10449
10450 if (! fdle.is_valid())
10451 return 1;
10452
10453 do
10454 {
10455 strmake_buf(log_name, log_info.log_file_name);
10456 } while (!(error= find_next_log(&log_info, 1)));
10457
10458 if (error != LOG_INFO_EOF)
10459 {
10460 sql_print_error("find_log_pos() failed (error: %d)", error);
10461 return error;
10462 }
10463
10464 if ((file= open_binlog(&log, log_name, &errmsg)) < 0)
10465 {
10466 sql_print_error("%s", errmsg);
10467 return 1;
10468 }
10469
10470 if ((ev= Log_event::read_log_event(&log, &fdle,
10471 opt_master_verify_checksum)) &&
10472 ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
10473 {
10474 if (ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
10475 {
10476 sql_print_information("Recovering after a crash using %s", opt_name);
10477 error= recover(&log_info, log_name, &log,
10478 (Format_description_log_event *)ev, do_xa_recovery);
10479 }
10480 else
10481 {
10482 error= read_state_from_file();
10483 if (unlikely(error == 2))
10484 {
10485 /*
10486 The binlog exists, but the .state file is missing. This is normal if
10487 this is the first master start after a major upgrade to 10.0 (with
10488 GTID support).
10489
10490 However, it could also be that the .state file was lost somehow, and
10491 in this case it could be a serious issue, as we would set the wrong
10492 binlog state in the next binlog file to be created, and GTID
10493 processing would be corrupted. A common way would be copying files
10494 from an old server to a new one and forgetting the .state file.
10495
10496 So in this case, we want to try to recover the binlog state by
10497 scanning the last binlog file (but we do not need any XA recovery).
10498
10499 ToDo: We could avoid one scan at first start after major upgrade, by
10500 detecting that there is no GTID_LIST event at the start of the
10501 binlog file, and stopping the scan in that case.
10502 */
10503 error= recover(&log_info, log_name, &log,
10504 (Format_description_log_event *)ev, false);
10505 }
10506 }
10507 }
10508
10509 delete ev;
10510 end_io_cache(&log);
10511 mysql_file_close(file, MYF(MY_WME));
10512
10513 return error;
10514 }
10515
10516
10517 #ifdef INNODB_COMPATIBILITY_HOOKS
10518 /**
10519 Get the file name of the MySQL binlog.
10520 @return the name of the binlog file
10521 */
10522 extern "C"
mysql_bin_log_file_name(void)10523 const char* mysql_bin_log_file_name(void)
10524 {
10525 return mysql_bin_log.get_log_fname();
10526 }
10527 /**
10528 Get the current position of the MySQL binlog.
10529 @return byte offset from the beginning of the binlog
10530 */
10531 extern "C"
mysql_bin_log_file_pos(void)10532 ulonglong mysql_bin_log_file_pos(void)
10533 {
10534 return (ulonglong) mysql_bin_log.get_log_file()->pos_in_file;
10535 }
10536 /*
10537 Get the current position of the MySQL binlog for transaction currently being
10538 committed.
10539
10540 This is valid to call from within storage engine commit_ordered() and
10541 commit() methods only.
10542
10543 Since it stores the position inside THD, it is safe to call without any
10544 locking.
10545 */
10546 void
mysql_bin_log_commit_pos(THD * thd,ulonglong * out_pos,const char ** out_file)10547 mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file)
10548 {
10549 binlog_cache_mngr *cache_mngr;
10550 if (opt_bin_log &&
10551 (cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton)))
10552 {
10553 *out_file= cache_mngr->last_commit_pos_file;
10554 *out_pos= (ulonglong)(cache_mngr->last_commit_pos_offset);
10555 }
10556 else
10557 {
10558 *out_file= NULL;
10559 *out_pos= 0;
10560 }
10561 }
10562 #endif /* INNODB_COMPATIBILITY_HOOKS */
10563
10564
10565 static void
binlog_checksum_update(MYSQL_THD thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)10566 binlog_checksum_update(MYSQL_THD thd, struct st_mysql_sys_var *var,
10567 void *var_ptr, const void *save)
10568 {
10569 ulong value= *((ulong *)save);
10570 bool check_purge= false;
10571 ulong UNINIT_VAR(prev_binlog_id);
10572
10573 mysql_mutex_lock(mysql_bin_log.get_log_lock());
10574 if(mysql_bin_log.is_open())
10575 {
10576 prev_binlog_id= mysql_bin_log.current_binlog_id;
10577 if (binlog_checksum_options != value)
10578 mysql_bin_log.checksum_alg_reset= (enum_binlog_checksum_alg)value;
10579 if (mysql_bin_log.rotate(true, &check_purge))
10580 check_purge= false;
10581 }
10582 else
10583 {
10584 binlog_checksum_options= value;
10585 }
10586 DBUG_ASSERT(binlog_checksum_options == value);
10587 mysql_bin_log.checksum_alg_reset= BINLOG_CHECKSUM_ALG_UNDEF;
10588 mysql_mutex_unlock(mysql_bin_log.get_log_lock());
10589 if (check_purge)
10590 mysql_bin_log.checkpoint_and_purge(prev_binlog_id);
10591 }
10592
10593
show_binlog_vars(THD * thd,SHOW_VAR * var,void *,system_status_var * status_var,enum_var_type)10594 static int show_binlog_vars(THD *thd, SHOW_VAR *var, void *,
10595 system_status_var *status_var, enum_var_type)
10596 {
10597 mysql_bin_log.set_status_variables(thd);
10598 var->type= SHOW_ARRAY;
10599 var->value= (char *)&binlog_status_vars_detail;
10600 return 0;
10601 }
10602
10603 static SHOW_VAR binlog_status_vars_top[]= {
10604 {"Binlog", (char *) &show_binlog_vars, SHOW_FUNC},
10605 {NullS, NullS, SHOW_LONG}
10606 };
10607
10608 static MYSQL_SYSVAR_BOOL(
10609 optimize_thread_scheduling,
10610 opt_optimize_thread_scheduling,
10611 PLUGIN_VAR_READONLY,
10612 "Run fast part of group commit in a single thread, to optimize kernel "
10613 "thread scheduling. On by default. Disable to run each transaction in group "
10614 "commit in its own thread, which can be slower at very high concurrency. "
10615 "This option is mostly for testing one algorithm versus the other, and it "
10616 "should not normally be necessary to change it.",
10617 NULL,
10618 NULL,
10619 1);
10620
10621 static MYSQL_SYSVAR_ENUM(
10622 checksum,
10623 binlog_checksum_options,
10624 PLUGIN_VAR_RQCMDARG,
10625 "Type of BINLOG_CHECKSUM_ALG. Include checksum for "
10626 "log events in the binary log",
10627 NULL,
10628 binlog_checksum_update,
10629 BINLOG_CHECKSUM_ALG_CRC32,
10630 &binlog_checksum_typelib);
10631
10632 static struct st_mysql_sys_var *binlog_sys_vars[]=
10633 {
10634 MYSQL_SYSVAR(optimize_thread_scheduling),
10635 MYSQL_SYSVAR(checksum),
10636 NULL
10637 };
10638
10639
10640 /*
10641 Copy out the non-directory part of binlog position filename for the
10642 `binlog_snapshot_file' status variable, same way as it is done for
10643 SHOW MASTER STATUS.
10644 */
10645 static void
set_binlog_snapshot_file(const char * src)10646 set_binlog_snapshot_file(const char *src)
10647 {
10648 size_t dir_len = dirname_length(src);
10649 strmake_buf(binlog_snapshot_file, src + dir_len);
10650 }
10651
10652 /*
10653 Copy out current values of status variables, for SHOW STATUS or
10654 information_schema.global_status.
10655
10656 This is called only under LOCK_all_status_vars, so we can fill in a static array.
10657 */
10658 void
set_status_variables(THD * thd)10659 TC_LOG_BINLOG::set_status_variables(THD *thd)
10660 {
10661 binlog_cache_mngr *cache_mngr;
10662
10663 if (thd && opt_bin_log)
10664 cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10665 else
10666 cache_mngr= 0;
10667
10668 bool have_snapshot= (cache_mngr && cache_mngr->last_commit_pos_file[0] != 0);
10669 mysql_mutex_lock(&LOCK_commit_ordered);
10670 binlog_status_var_num_commits= this->num_commits;
10671 binlog_status_var_num_group_commits= this->num_group_commits;
10672 if (!have_snapshot)
10673 {
10674 set_binlog_snapshot_file(last_commit_pos_file);
10675 binlog_snapshot_position= last_commit_pos_offset;
10676 }
10677 mysql_mutex_unlock(&LOCK_commit_ordered);
10678 mysql_mutex_lock(&LOCK_prepare_ordered);
10679 binlog_status_group_commit_trigger_count= this->group_commit_trigger_count;
10680 binlog_status_group_commit_trigger_timeout= this->group_commit_trigger_timeout;
10681 binlog_status_group_commit_trigger_lock_wait= this->group_commit_trigger_lock_wait;
10682 mysql_mutex_unlock(&LOCK_prepare_ordered);
10683
10684 if (have_snapshot)
10685 {
10686 set_binlog_snapshot_file(cache_mngr->last_commit_pos_file);
10687 binlog_snapshot_position= cache_mngr->last_commit_pos_offset;
10688 }
10689 }
10690
10691
10692 /*
10693 Find the Gtid_list_log_event at the start of a binlog.
10694
10695 NULL for ok, non-NULL error message for error.
10696
10697 If ok, then the event is returned in *out_gtid_list. This can be NULL if we
10698 get back to binlogs written by old server version without GTID support. If
10699 so, it means we have reached the point to start from, as no GTID events can
10700 exist in earlier binlogs.
10701 */
10702 const char *
get_gtid_list_event(IO_CACHE * cache,Gtid_list_log_event ** out_gtid_list)10703 get_gtid_list_event(IO_CACHE *cache, Gtid_list_log_event **out_gtid_list)
10704 {
10705 Format_description_log_event init_fdle(BINLOG_VERSION);
10706 Format_description_log_event *fdle;
10707 Log_event *ev;
10708 const char *errormsg = NULL;
10709
10710 *out_gtid_list= NULL;
10711
10712 if (!(ev= Log_event::read_log_event(cache, &init_fdle,
10713 opt_master_verify_checksum)) ||
10714 ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
10715 {
10716 if (ev)
10717 delete ev;
10718 return "Could not read format description log event while looking for "
10719 "GTID position in binlog";
10720 }
10721
10722 fdle= static_cast<Format_description_log_event *>(ev);
10723
10724 for (;;)
10725 {
10726 Log_event_type typ;
10727
10728 ev= Log_event::read_log_event(cache, fdle, opt_master_verify_checksum);
10729 if (!ev)
10730 {
10731 errormsg= "Could not read GTID list event while looking for GTID "
10732 "position in binlog";
10733 break;
10734 }
10735 typ= ev->get_type_code();
10736 if (typ == GTID_LIST_EVENT)
10737 break; /* Done, found it */
10738 if (typ == START_ENCRYPTION_EVENT)
10739 {
10740 if (fdle->start_decryption((Start_encryption_log_event*) ev))
10741 errormsg= "Could not set up decryption for binlog.";
10742 }
10743 delete ev;
10744 if (typ == ROTATE_EVENT || typ == STOP_EVENT ||
10745 typ == FORMAT_DESCRIPTION_EVENT || typ == START_ENCRYPTION_EVENT)
10746 continue; /* Continue looking */
10747
10748 /* We did not find any Gtid_list_log_event, must be old binlog. */
10749 ev= NULL;
10750 break;
10751 }
10752
10753 delete fdle;
10754 *out_gtid_list= static_cast<Gtid_list_log_event *>(ev);
10755 return errormsg;
10756 }
10757
10758
10759 struct st_mysql_storage_engine binlog_storage_engine=
10760 { MYSQL_HANDLERTON_INTERFACE_VERSION };
10761
maria_declare_plugin(binlog)10762 maria_declare_plugin(binlog)
10763 {
10764 MYSQL_STORAGE_ENGINE_PLUGIN,
10765 &binlog_storage_engine,
10766 "binlog",
10767 "MySQL AB",
10768 "This is a pseudo storage engine to represent the binlog in a transaction",
10769 PLUGIN_LICENSE_GPL,
10770 binlog_init, /* Plugin Init */
10771 NULL, /* Plugin Deinit */
10772 0x0100 /* 1.0 */,
10773 binlog_status_vars_top, /* status variables */
10774 binlog_sys_vars, /* system variables */
10775 "1.0", /* string version */
10776 MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
10777 }
10778 maria_declare_plugin_end;
10779
10780 #ifdef WITH_WSREP
10781 #include "wsrep_mysqld.h"
10782
wsrep_get_trans_cache(THD * thd)10783 IO_CACHE *wsrep_get_trans_cache(THD * thd)
10784 {
10785 DBUG_ASSERT(binlog_hton->slot != HA_SLOT_UNDEF);
10786 binlog_cache_mngr *cache_mngr = (binlog_cache_mngr*)
10787 thd_get_ha_data(thd, binlog_hton);
10788 if (cache_mngr)
10789 return cache_mngr->get_binlog_cache_log(true);
10790
10791 WSREP_DEBUG("binlog cache not initialized, conn: %llu",
10792 thd->thread_id);
10793 return NULL;
10794 }
10795
wsrep_thd_binlog_trx_reset(THD * thd)10796 void wsrep_thd_binlog_trx_reset(THD * thd)
10797 {
10798 DBUG_ENTER("wsrep_thd_binlog_trx_reset");
10799 WSREP_DEBUG("wsrep_thd_binlog_reset");
10800 /*
10801 todo: fix autocommit select to not call the caller
10802 */
10803 binlog_cache_mngr *const cache_mngr=
10804 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10805 if (cache_mngr)
10806 {
10807 cache_mngr->reset(false, true);
10808 if (!cache_mngr->stmt_cache.empty())
10809 {
10810 WSREP_DEBUG("pending events in stmt cache, sql: %s", thd->query());
10811 cache_mngr->stmt_cache.reset();
10812 }
10813 }
10814 thd->clear_binlog_table_maps();
10815 DBUG_VOID_RETURN;
10816 }
10817
wsrep_thd_binlog_stmt_rollback(THD * thd)10818 void wsrep_thd_binlog_stmt_rollback(THD * thd)
10819 {
10820 DBUG_ENTER("wsrep_thd_binlog_stmt_rollback");
10821 WSREP_DEBUG("wsrep_thd_binlog_stmt_rollback");
10822 binlog_cache_mngr *const cache_mngr=
10823 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10824 if (cache_mngr)
10825 {
10826 thd->binlog_remove_pending_rows_event(TRUE, TRUE);
10827 cache_mngr->stmt_cache.reset();
10828 }
10829 DBUG_VOID_RETURN;
10830 }
10831
wsrep_register_binlog_handler(THD * thd,bool trx)10832 void wsrep_register_binlog_handler(THD *thd, bool trx)
10833 {
10834 DBUG_ENTER("register_binlog_handler");
10835 /*
10836 If this is the first call to this function while processing a statement,
10837 the transactional cache does not have a savepoint defined. So, in what
10838 follows:
10839 . an implicit savepoint is defined;
10840 . callbacks are registered;
10841 . binary log is set as read/write.
10842
10843 The savepoint allows for truncating the trx-cache transactional changes
10844 fail. Callbacks are necessary to flush caches upon committing or rolling
10845 back a statement or a transaction. However, notifications do not happen
10846 if the binary log is set as read/write.
10847 */
10848 binlog_cache_mngr *cache_mngr=
10849 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10850 /* cache_mngr may be missing e.g. in mtr test ev51914.test */
10851 if (cache_mngr)
10852 {
10853 /*
10854 Set an implicit savepoint in order to be able to truncate a trx-cache.
10855 */
10856 if (cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF)
10857 {
10858 my_off_t pos= 0;
10859 binlog_trans_log_savepos(thd, &pos);
10860 cache_mngr->trx_cache.set_prev_position(pos);
10861 }
10862
10863 /*
10864 Set callbacks in order to be able to call commmit or rollback.
10865 */
10866 if (trx)
10867 trans_register_ha(thd, TRUE, binlog_hton);
10868 trans_register_ha(thd, FALSE, binlog_hton);
10869
10870 /*
10871 Set the binary log as read/write otherwise callbacks are not called.
10872 */
10873 thd->ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
10874 }
10875 DBUG_VOID_RETURN;
10876 }
10877
10878 #endif /* WITH_WSREP */
10879