1 /* Copyright (c) 2000, 2018, Oracle and/or its affiliates.
2 Copyright (c) 2009, 2020, MariaDB Corporation.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
16
17
18 /**
19 @file
20
21 @brief
22 logging of commands
23
24 @todo
25 Abort logging when we get an error in reading or writing log files
26 */
27
28 #include "mariadb.h" /* NO_EMBEDDED_ACCESS_CHECKS */
29 #include "sql_priv.h"
30 #include "log.h"
31 #include "sql_base.h" // open_log_table
32 #include "sql_repl.h"
33 #include "sql_delete.h" // mysql_truncate
34 #include "sql_parse.h" // command_name
35 #include "sql_time.h" // calc_time_from_sec, my_time_compare
36 #include "tztime.h" // my_tz_OFFSET0, struct Time_zone
37 #include "sql_acl.h" // SUPER_ACL
38 #include "log_event.h" // Query_log_event
39 #include "rpl_filter.h"
40 #include "rpl_rli.h"
41 #include "sql_audit.h"
42 #include "mysqld.h"
43
44 #include <my_dir.h>
45 #include <m_ctype.h> // For test_if_number
46
47 #include <set_var.h> // for Sys_last_gtid_ptr
48
49 #ifdef _WIN32
50 #include "message.h"
51 #endif
52
53 #include "sql_plugin.h"
54 #include "debug_sync.h"
55 #include "sql_show.h"
56 #include "my_pthread.h"
57 #include "semisync_master.h"
58 #include "wsrep_mysqld.h"
59 #include "sp_rcontext.h"
60 #include "sp_head.h"
61
62 /* max size of the log message */
63 #define MAX_LOG_BUFFER_SIZE 1024
64 #define MAX_TIME_SIZE 32
65 #define MY_OFF_T_UNDEF (~(my_off_t)0UL)
66 /* Truncate cache log files bigger than this */
67 #define CACHE_FILE_TRUNC_SIZE 65536
68
SliceComputationResultSliceComputationResult69 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
70
71 handlerton *binlog_hton;
72 LOGGER logger;
73
74 const char *log_bin_index= 0;
75 const char *log_bin_basename= 0;
76
77 MYSQL_BIN_LOG mysql_bin_log(&sync_binlog_period);
78
79 static bool test_if_number(const char *str,
80 ulong *res, bool allow_wildcards);
81 static int binlog_init(void *p);
82 static int binlog_close_connection(handlerton *hton, THD *thd);
83 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
84 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
85 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
86 THD *thd);
87 static int binlog_commit(handlerton *hton, THD *thd, bool all);
88 static int binlog_rollback(handlerton *hton, THD *thd, bool all);
89 static int binlog_prepare(handlerton *hton, THD *thd, bool all);
90 static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd);
91
92 static const LEX_CSTRING write_error_msg=
93 { STRING_WITH_LEN("error writing to the binary log") };
94
95 static my_bool opt_optimize_thread_scheduling= TRUE;
96 ulong binlog_checksum_options;
97 #ifndef DBUG_OFF
98 ulong opt_binlog_dbug_fsync_sleep= 0;
99 #endif
100
101 mysql_mutex_t LOCK_prepare_ordered;
102 mysql_cond_t COND_prepare_ordered;
103 mysql_mutex_t LOCK_after_binlog_sync;
104 mysql_mutex_t LOCK_commit_ordered;
105
106 static ulonglong binlog_status_var_num_commits;
107 static ulonglong binlog_status_var_num_group_commits;
isEmptyComputationSliceState108 static ulonglong binlog_status_group_commit_trigger_count;
109 static ulonglong binlog_status_group_commit_trigger_lock_wait;
110 static ulonglong binlog_status_group_commit_trigger_timeout;
111 static char binlog_snapshot_file[FN_REFLEN];
112 static ulonglong binlog_snapshot_position;
113
114 static const char *fatal_log_error=
115 "Could not use %s for logging (error %d). "
116 "Turning logging off for the whole duration of the MariaDB server process. "
117 "To turn it on again: fix the cause, shutdown the MariaDB server and "
118 "restart it.";
119
120
121 static SHOW_VAR binlog_status_vars_detail[]=
122 {
123 {"commits",
124 (char *)&binlog_status_var_num_commits, SHOW_LONGLONG},
125 {"group_commits",
126 (char *)&binlog_status_var_num_group_commits, SHOW_LONGLONG},
127 {"group_commit_trigger_count",
128 (char *)&binlog_status_group_commit_trigger_count, SHOW_LONGLONG},
129 {"group_commit_trigger_lock_wait",
130 (char *)&binlog_status_group_commit_trigger_lock_wait, SHOW_LONGLONG},
131 {"group_commit_trigger_timeout",
132 (char *)&binlog_status_group_commit_trigger_timeout, SHOW_LONGLONG},
133 {"snapshot_file",
134 (char *)&binlog_snapshot_file, SHOW_CHAR},
135 {"snapshot_position",
136 (char *)&binlog_snapshot_position, SHOW_LONGLONG},
137 {NullS, NullS, SHOW_LONG}
138 };
139
140 /*
141 Variables for the binlog background thread.
142 Protected by the MYSQL_BIN_LOG::LOCK_binlog_background_thread mutex.
143 */
144 static bool binlog_background_thread_started= false;
145 static bool binlog_background_thread_stop= false;
146 static MYSQL_BIN_LOG::xid_count_per_binlog *
147 binlog_background_thread_queue= NULL;
148
149 static bool start_binlog_background_thread();
150
151 static rpl_binlog_state rpl_global_gtid_binlog_state;
152
153 void setup_log_handling()
154 {
155 rpl_global_gtid_binlog_state.init();
156 }
157
158
159 /**
160 purge logs, master and slave sides both, related error code
161 converter.
162 Called from @c purge_error_message(), @c MYSQL_BIN_LOG::reset_logs()
163
164 @param res an internal to purging routines error code
165
166 @return the user level error code ER_*
167 */
168 uint purge_log_get_error_code(int res)
169 {
170 uint errcode= 0;
171
172 switch (res) {
173 case 0: break;
174 case LOG_INFO_EOF: errcode= ER_UNKNOWN_TARGET_BINLOG; break;
175 case LOG_INFO_IO: errcode= ER_IO_ERR_LOG_INDEX_READ; break;
176 case LOG_INFO_INVALID:errcode= ER_BINLOG_PURGE_PROHIBITED; break;
177 case LOG_INFO_SEEK: errcode= ER_FSEEK_FAIL; break;
178 case LOG_INFO_MEM: errcode= ER_OUT_OF_RESOURCES; break;
179 case LOG_INFO_FATAL: errcode= ER_BINLOG_PURGE_FATAL_ERR; break;
180 case LOG_INFO_IN_USE: errcode= ER_LOG_IN_USE; break;
181 case LOG_INFO_EMFILE: errcode= ER_BINLOG_PURGE_EMFILE; break;
182 default: errcode= ER_LOG_PURGE_UNKNOWN_ERR; break;
183 }
184
185 return errcode;
186 }
187
188 /**
189 Silence all errors and warnings reported when performing a write
190 to a log table.
191 Errors and warnings are not reported to the client or SQL exception
192 handlers, so that the presence of logging does not interfere and affect
193 the logic of an application.
194 */
195 class Silence_log_table_errors : public Internal_error_handler
196 {
197 char m_message[MYSQL_ERRMSG_SIZE];
198 public:
199 Silence_log_table_errors()
200 {
201 m_message[0]= '\0';
202 }
203
204 virtual ~Silence_log_table_errors() {}
205
206 virtual bool handle_condition(THD *thd,
207 uint sql_errno,
208 const char* sql_state,
209 Sql_condition::enum_warning_level *level,
210 const char* msg,
211 Sql_condition ** cond_hdl);
212 const char *message() const { return m_message; }
213 };
214
215 bool
216 Silence_log_table_errors::handle_condition(THD *,
217 uint,
218 const char*,
219 Sql_condition::enum_warning_level*,
220 const char* msg,
221 Sql_condition ** cond_hdl)
222 {
223 *cond_hdl= NULL;
224 strmake_buf(m_message, msg);
225 return TRUE;
226 }
227
228 sql_print_message_func sql_print_message_handlers[3] =
229 {
230 sql_print_information,
231 sql_print_warning,
232 sql_print_error
233 };
234
235
236 /**
237 Create the name of the log file
238
239 @param[OUT] out a pointer to a new allocated name will go there
240 @param[IN] log_ext The extension for the file (e.g .log)
241 @param[IN] once whether to use malloc_once or a normal malloc.
242 */
243 void make_default_log_name(char **out, const char* log_ext, bool once)
244 {
245 char buff[FN_REFLEN+10];
246 fn_format(buff, opt_log_basename, "", log_ext, MYF(MY_REPLACE_EXT));
247 if (once)
248 *out= my_once_strdup(buff, MYF(MY_WME));
MemRefRegionMemRefRegion249 else
250 {
251 my_free(*out);
252 *out= my_strdup(buff, MYF(MY_WME));
253 }
254 }
255
256
257 /*
258 Helper classes to store non-transactional and transactional data
259 before copying it to the binary log.
260 */
261 class binlog_cache_data
262 {
263 public:
264 binlog_cache_data(): m_pending(0), status(0),
265 before_stmt_pos(MY_OFF_T_UNDEF),
266 incident(FALSE),
267 saved_max_binlog_cache_size(0), ptr_binlog_cache_use(0),
268 ptr_binlog_cache_disk_use(0)
269 { }
270
271 ~binlog_cache_data()
272 {
273 DBUG_ASSERT(empty());
274 close_cached_file(&cache_log);
275 }
276
277 /*
278 Return 1 if there is no relevant entries in the cache
279
280 This is:
281 - Cache is empty
282 - There are row or critical (DDL?) events in the cache
283
284 The status test is needed to avoid writing entries with only
285 a table map entry, which would crash in do_apply_event() on the slave
286 as it assumes that there is always a row entry after a table map.
getConstraintsMemRefRegion287 */
288 bool empty() const
289 {
290 return (pending() == NULL &&
291 (my_b_write_tell(&cache_log) == 0 ||
292 ((status & (LOGGED_ROW_EVENT | LOGGED_CRITICAL)) == 0)));
293 }
294
295 Rows_log_event *pending() const
296 {
297 return m_pending;
298 }
299
300 void set_pending(Rows_log_event *const pending_arg)
301 {
302 m_pending= pending_arg;
303 }
304
305 void set_incident(void)
306 {
307 incident= TRUE;
308 }
309
310 bool has_incident(void)
311 {
312 return(incident);
313 }
314
315 void reset()
316 {
317 bool cache_was_empty= empty();
318 bool truncate_file= (cache_log.file != -1 &&
319 my_b_write_tell(&cache_log) > CACHE_FILE_TRUNC_SIZE);
320 truncate(0,1); // Forget what's in cache
321 if (!cache_was_empty)
322 compute_statistics();
323 if (truncate_file)
324 my_chsize(cache_log.file, 0, 0, MYF(MY_WME));
325
326 status= 0;
327 incident= FALSE;
328 before_stmt_pos= MY_OFF_T_UNDEF;
329 DBUG_ASSERT(empty());
330 }
331
332 my_off_t get_byte_position() const
333 {
334 return my_b_tell(&cache_log);
335 }
336
337 my_off_t get_prev_position()
338 {
339 return(before_stmt_pos);
340 }
341
342 void set_prev_position(my_off_t pos)
343 {
344 before_stmt_pos= pos;
345 }
346
347 void restore_prev_position()
348 {
349 truncate(before_stmt_pos);
350 }
351
352 void restore_savepoint(my_off_t pos)
353 {
354 truncate(pos);
355 if (pos < before_stmt_pos)
356 before_stmt_pos= MY_OFF_T_UNDEF;
357 }
358
359 void set_binlog_cache_info(my_off_t param_max_binlog_cache_size,
360 ulong *param_ptr_binlog_cache_use,
361 ulong *param_ptr_binlog_cache_disk_use)
362 {
363 /*
364 The assertions guarantee that the set_binlog_cache_info is
365 called just once and information passed as parameters are
366 never zero.
367
368 This is done while calling the constructor binlog_cache_mngr.
369 We cannot set information in the constructor binlog_cache_data
370 because the space for binlog_cache_mngr is allocated through
371 a placement new.
372
373 In the future, we can refactor this and change it to avoid
374 the set_binlog_info.
375 */
376 DBUG_ASSERT(saved_max_binlog_cache_size == 0 &&
377 param_max_binlog_cache_size != 0 &&
378 ptr_binlog_cache_use == 0 &&
379 param_ptr_binlog_cache_use != 0 &&
380 ptr_binlog_cache_disk_use == 0 &&
381 param_ptr_binlog_cache_disk_use != 0);
382
383 saved_max_binlog_cache_size= param_max_binlog_cache_size;
384 ptr_binlog_cache_use= param_ptr_binlog_cache_use;
385 ptr_binlog_cache_disk_use= param_ptr_binlog_cache_disk_use;
386 cache_log.end_of_file= saved_max_binlog_cache_size;
387 }
388
389 void add_status(enum_logged_status status_arg)
390 {
391 status|= status_arg;
392 }
393
394 /*
395 Cache to store data before copying it to the binary log.
396 */
397 IO_CACHE cache_log;
398
399 private:
400 /*
401 Pending binrows event. This event is the event where the rows are currently
402 written.
403 */
404 Rows_log_event *m_pending;
405
406 /*
407 Bit flags for what has been writting to cache. Used to
408 discard logs without any data changes.
409 see enum_logged_status;
410 */
411 uint32 status;
412
413 /*
414 Binlog position before the start of the current statement.
415 */
416 my_off_t before_stmt_pos;
417
418 /*
419 This indicates that some events did not get into the cache and most likely
420 it is corrupted.
421 */
422 bool incident;
423
424 /**
425 This function computes binlog cache and disk usage.
426 */
427 void compute_statistics()
428 {
429 statistic_increment(*ptr_binlog_cache_use, &LOCK_status);
430 if (cache_log.disk_writes != 0)
431 {
432 #ifdef REAL_STATISTICS
433 statistic_add(*ptr_binlog_cache_disk_use,
434 cache_log.disk_writes, &LOCK_status);
435 #else
436 statistic_increment(*ptr_binlog_cache_disk_use, &LOCK_status);
437 #endif
438 cache_log.disk_writes= 0;
439 }
440 }
441
442 /*
443 Stores the values of maximum size of the cache allowed when this cache
444 is configured. This corresponds to either
445 . max_binlog_cache_size or max_binlog_stmt_cache_size.
446 */
447 my_off_t saved_max_binlog_cache_size;
448
449 /*
450 Stores a pointer to the status variable that keeps track of the in-memory
451 cache usage. This corresponds to either
452 . binlog_cache_use or binlog_stmt_cache_use.
453 */
454 ulong *ptr_binlog_cache_use;
455
456 /*
457 Stores a pointer to the status variable that keeps track of the disk
458 cache usage. This corresponds to either
459 . binlog_cache_disk_use or binlog_stmt_cache_disk_use.
460 */
461 ulong *ptr_binlog_cache_disk_use;
462
463 /*
464 It truncates the cache to a certain position. This includes deleting the
465 pending event.
466 */
467 void truncate(my_off_t pos, bool reset_cache=0)
468 {
469 DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos));
470 cache_log.error=0;
471 if (pending())
472 {
473 delete pending();
474 set_pending(0);
475 }
476 reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, reset_cache);
477 cache_log.end_of_file= saved_max_binlog_cache_size;
478 }
479
480 binlog_cache_data& operator=(const binlog_cache_data& info);
481 binlog_cache_data(const binlog_cache_data& info);
482 };
483
484
485 void Log_event_writer::add_status(enum_logged_status status)
486 {
487 if (likely(cache_data))
488 cache_data->add_status(status);
489 }
490
491 class binlog_cache_mngr {
492 public:
493 binlog_cache_mngr(my_off_t param_max_binlog_stmt_cache_size,
494 my_off_t param_max_binlog_cache_size,
495 ulong *param_ptr_binlog_stmt_cache_use,
496 ulong *param_ptr_binlog_stmt_cache_disk_use,
497 ulong *param_ptr_binlog_cache_use,
498 ulong *param_ptr_binlog_cache_disk_use)
499 : last_commit_pos_offset(0), using_xa(FALSE), xa_xid(0)
500 {
501 stmt_cache.set_binlog_cache_info(param_max_binlog_stmt_cache_size,
502 param_ptr_binlog_stmt_cache_use,
503 param_ptr_binlog_stmt_cache_disk_use);
504 trx_cache.set_binlog_cache_info(param_max_binlog_cache_size,
505 param_ptr_binlog_cache_use,
506 param_ptr_binlog_cache_disk_use);
507 last_commit_pos_file[0]= 0;
508 }
509
510 void reset(bool do_stmt, bool do_trx)
511 {
512 if (do_stmt)
513 stmt_cache.reset();
514 if (do_trx)
515 {
516 trx_cache.reset();
517 using_xa= FALSE;
518 last_commit_pos_file[0]= 0;
519 last_commit_pos_offset= 0;
520 }
521 }
522
523 binlog_cache_data* get_binlog_cache_data(bool is_transactional)
524 {
525 return (is_transactional ? &trx_cache : &stmt_cache);
526 }
527
528 IO_CACHE* get_binlog_cache_log(bool is_transactional)
529 {
530 return (is_transactional ? &trx_cache.cache_log : &stmt_cache.cache_log);
531 }
532
533 binlog_cache_data stmt_cache;
534
535 binlog_cache_data trx_cache;
536
537 /*
538 Binlog position for current transaction.
539 For START TRANSACTION WITH CONSISTENT SNAPSHOT, this is the binlog
540 position corresponding to the snapshot taken. During (and after) commit,
541 this is set to the binlog position corresponding to just after the
542 commit (so storage engines can store it in their transaction log).
543 */
544 char last_commit_pos_file[FN_REFLEN];
545 my_off_t last_commit_pos_offset;
546
547 /*
548 Flag set true if this transaction is committed with log_xid() as part of
549 XA, false if not.
550 */
551 bool using_xa;
552 my_xid xa_xid;
553 bool need_unlog;
554 /*
555 Id of binlog that transaction was written to; only needed if need_unlog is
556 true.
557 */
558 ulong binlog_id;
559 /* Set if we get an error during commit that must be returned from unlog(). */
560 bool delayed_error;
561
562 private:
563
564 binlog_cache_mngr& operator=(const binlog_cache_mngr& info);
565 binlog_cache_mngr(const binlog_cache_mngr& info);
566 };
567
568 bool LOGGER::is_log_table_enabled(uint log_table_type)
569 {
570 switch (log_table_type) {
571 case QUERY_LOG_SLOW:
572 return (table_log_handler != NULL) && global_system_variables.sql_log_slow
573 && (log_output_options & LOG_TABLE);
574 case QUERY_LOG_GENERAL:
575 return (table_log_handler != NULL) && opt_log
576 && (log_output_options & LOG_TABLE);
577 default:
578 DBUG_ASSERT(0);
579 return FALSE; /* make compiler happy */
580 }
581 }
582
583 /**
584 Check if a given table is opened log table
585
586 @param table Table to check
587 @param check_if_opened Only fail if it's a log table in use
588 @param error_msg String to put in error message if not ok.
589 No error message if 0
590 @return 0 ok
591 @return # Type of log file
592 */
593
594 int check_if_log_table(const TABLE_LIST *table,
595 bool check_if_opened,
596 const char *error_msg)
597 {
598 int result= 0;
599 if (table->db.length == 5 &&
600 !my_strcasecmp(table_alias_charset, table->db.str, "mysql"))
601 {
602 const char *table_name= table->table_name.str;
603
604 if (table->table_name.length == 11 &&
605 !my_strcasecmp(table_alias_charset, table_name, "general_log"))
606 {
607 result= QUERY_LOG_GENERAL;
608 goto end;
609 }
610
611 if (table->table_name.length == 8 &&
612 !my_strcasecmp(table_alias_charset, table_name, "slow_log"))
613 {
614 result= QUERY_LOG_SLOW;
615 goto end;
616 }
617 }
618 return 0;
619
620 end:
621 if (!check_if_opened || logger.is_log_table_enabled(result))
622 {
623 if (error_msg)
624 my_error(ER_BAD_LOG_STATEMENT, MYF(0), error_msg);
625 return result;
626 }
627 return 0;
628 }
629
630
631 Log_to_csv_event_handler::Log_to_csv_event_handler()
632 {
633 }
634
635
636 Log_to_csv_event_handler::~Log_to_csv_event_handler()
637 {
638 }
639
640
641 void Log_to_csv_event_handler::cleanup()
642 {
643 logger.is_log_tables_initialized= FALSE;
644 }
645
646 /* log event handlers */
647
648 /**
649 Log command to the general log table
650
651 Log given command to the general log table.
652
653 @param event_time command start timestamp
654 @param user_host the pointer to the string with user@host info
655 @param user_host_len length of the user_host string. this is computed
656 once and passed to all general log event handlers
657 @param thread_id Id of the thread, issued a query
658 @param command_type the type of the command being logged
659 @param command_type_len the length of the string above
660 @param sql_text the very text of the query being executed
661 @param sql_text_len the length of sql_text string
662
663
664 @return This function attempts to never call my_error(). This is
665 necessary, because general logging happens already after a statement
666 status has been sent to the client, so the client can not see the
667 error anyway. Besides, the error is not related to the statement
668 being executed and is internal, and thus should be handled
669 internally (@todo: how?).
670 If a write to the table has failed, the function attempts to
671 write to a short error message to the file. The failure is also
672 indicated in the return value.
673
674 @retval FALSE OK
675 @retval TRUE error occurred
676 */
677
678 bool Log_to_csv_event_handler::
679 log_general(THD *thd, my_hrtime_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg,
680 const char *command_type, size_t command_type_len,
681 const char *sql_text, size_t sql_text_len,
682 CHARSET_INFO *client_cs)
683 {
684 TABLE_LIST table_list;
685 TABLE *table;
686 bool result= TRUE;
687 bool need_close= FALSE;
688 bool need_pop= FALSE;
689 bool need_rnd_end= FALSE;
690 uint field_index;
691 Silence_log_table_errors error_handler;
692 Open_tables_backup open_tables_backup;
693 ulonglong save_thd_options;
694 bool save_time_zone_used;
695 DBUG_ENTER("log_general");
696
697 /*
698 CSV uses TIME_to_timestamp() internally if table needs to be repaired
699 which will set thd->time_zone_used
700 */
701 save_time_zone_used= thd->time_zone_used;
702
703 save_thd_options= thd->variables.option_bits;
704 thd->variables.option_bits&= ~OPTION_BIN_LOG;
705
706 table_list.init_one_table(&MYSQL_SCHEMA_NAME, &GENERAL_LOG_NAME, 0,
707 TL_WRITE_CONCURRENT_INSERT);
708
709 /*
710 1) open_log_table generates an error of the
711 table can not be opened or is corrupted.
712 2) "INSERT INTO general_log" can generate warning sometimes.
713
714 Suppress these warnings and errors, they can't be dealt with
715 properly anyway.
716
717 QQ: this problem needs to be studied in more detail.
718 Comment this 2 lines and run "cast.test" to see what's happening.
719 */
720 thd->push_internal_handler(& error_handler);
721 need_pop= TRUE;
722
723 if (!(table= open_log_table(thd, &table_list, &open_tables_backup)))
724 goto err;
725
726 need_close= TRUE;
727
728 if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) ||
729 table->file->ha_rnd_init_with_error(0))
730 goto err;
731
732 need_rnd_end= TRUE;
733
734 /* Honor next number columns if present */
735 table->next_number_field= table->found_next_number_field;
736
737 /*
738 NOTE: we do not call restore_record() here, as all fields are
739 filled by the Logger (=> no need to load default ones).
740 */
741
742 /*
743 We do not set a value for table->field[0], as it will use
744 default value (which is CURRENT_TIMESTAMP).
745 */
746
747 /* check that all columns exist */
748 if (table->s->fields < 6)
749 goto err;
750
751 DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP);
752
753 ((Field_timestamp*) table->field[0])->store_TIME(
754 hrtime_to_my_time(event_time), hrtime_sec_part(event_time));
755
756 /* do a write */
757 if (table->field[1]->store(user_host, user_host_len, client_cs) ||
758 table->field[2]->store((longlong) thread_id_arg, TRUE) ||
759 table->field[3]->store((longlong) global_system_variables.server_id,
760 TRUE) ||
761 table->field[4]->store(command_type, command_type_len, client_cs))
762 goto err;
763
764 /*
765 A positive return value in store() means truncation.
766 Still logging a message in the log in this case.
767 */
768 table->field[5]->flags|= FIELDFLAG_HEX_ESCAPE;
769 if (table->field[5]->store(sql_text, sql_text_len, client_cs) < 0)
770 goto err;
771
772 /* mark all fields as not null */
773 table->field[1]->set_notnull();
774 table->field[2]->set_notnull();
775 table->field[3]->set_notnull();
776 table->field[4]->set_notnull();
777 table->field[5]->set_notnull();
778
779 /* Set any extra columns to their default values */
780 for (field_index= 6 ; field_index < table->s->fields ; field_index++)
781 {
782 table->field[field_index]->set_default();
783 }
784
785 /* log table entries are not replicated */
786 if (table->file->ha_write_row(table->record[0]))
787 goto err;
788
789 result= FALSE;
790
791 err:
792 if (result && !thd->killed)
793 sql_print_error("Failed to write to mysql.general_log: %s",
794 error_handler.message());
795
796 if (need_rnd_end)
797 {
798 table->file->ha_rnd_end();
799 table->file->ha_release_auto_increment();
800 }
801 if (need_pop)
802 thd->pop_internal_handler();
803 if (need_close)
804 close_log_table(thd, &open_tables_backup);
805
806 thd->variables.option_bits= save_thd_options;
807 thd->time_zone_used= save_time_zone_used;
808 DBUG_RETURN(result);
809 }
810
811
812 /*
813 Log a query to the slow log table
814
815 SYNOPSIS
816 log_slow()
817 thd THD of the query
818 current_time current timestamp
819 user_host the pointer to the string with user@host info
820 user_host_len length of the user_host string. this is computed once
821 and passed to all general log event handlers
822 query_time Amount of time the query took to execute (in microseconds)
823 lock_time Amount of time the query was locked (in microseconds)
824 is_command The flag, which determines, whether the sql_text is a
825 query or an administrator command (these are treated
826 differently by the old logging routines)
827 sql_text the very text of the query or administrator command
828 processed
829 sql_text_len the length of sql_text string
830
831 DESCRIPTION
832
833 Log a query to the slow log table
834
835 RETURN
836 FALSE - OK
837 TRUE - error occurred
838 */
839
840 bool Log_to_csv_event_handler::
841 log_slow(THD *thd, my_hrtime_t current_time,
842 const char *user_host, size_t user_host_len,
843 ulonglong query_utime, ulonglong lock_utime, bool is_command,
844 const char *sql_text, size_t sql_text_len)
845 {
846 TABLE_LIST table_list;
847 TABLE *table;
848 bool result= TRUE;
849 bool need_close= FALSE;
850 bool need_rnd_end= FALSE;
851 Silence_log_table_errors error_handler;
852 Open_tables_backup open_tables_backup;
853 CHARSET_INFO *client_cs= thd->variables.character_set_client;
854 bool save_time_zone_used;
855 long query_time= (long) MY_MIN(query_utime/1000000, TIME_MAX_VALUE_SECONDS);
856 long lock_time= (long) MY_MIN(lock_utime/1000000, TIME_MAX_VALUE_SECONDS);
857 long query_time_micro= (long) (query_utime % 1000000);
858 long lock_time_micro= (long) (lock_utime % 1000000);
859
860 DBUG_ENTER("Log_to_csv_event_handler::log_slow");
861
862 thd->push_internal_handler(& error_handler);
863 /*
864 CSV uses TIME_to_timestamp() internally if table needs to be repaired
865 which will set thd->time_zone_used
866 */
867 save_time_zone_used= thd->time_zone_used;
868
869 table_list.init_one_table(&MYSQL_SCHEMA_NAME, &SLOW_LOG_NAME, 0,
870 TL_WRITE_CONCURRENT_INSERT);
871
872 if (!(table= open_log_table(thd, &table_list, &open_tables_backup)))
873 goto err;
874
875 need_close= TRUE;
876
877 if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) ||
878 table->file->ha_rnd_init_with_error(0))
879 goto err;
880
881 need_rnd_end= TRUE;
882
883 /* Honor next number columns if present */
884 table->next_number_field= table->found_next_number_field;
885
886 restore_record(table, s->default_values); // Get empty record
887
888 /* check that all columns exist */
889 if (table->s->fields < 13)
890 goto err;
891
892 /* store the time and user values */
893 DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP);
894 ((Field_timestamp*) table->field[0])->store_TIME(
895 hrtime_to_my_time(current_time), hrtime_sec_part(current_time));
896 if (table->field[1]->store(user_host, user_host_len, client_cs))
897 goto err;
898
899 /*
900 A TIME field can not hold the full longlong range; query_time or
901 lock_time may be truncated without warning here, if greater than
902 839 hours (~35 days)
903 */
904 MYSQL_TIME t;
905 t.neg= 0;
906
907 /* fill in query_time field */
908 calc_time_from_sec(&t, query_time, query_time_micro);
909 if (table->field[2]->store_time(&t))
910 goto err;
911 /* lock_time */
912 calc_time_from_sec(&t, lock_time, lock_time_micro);
913 if (table->field[3]->store_time(&t))
914 goto err;
915 /* rows_sent */
916 if (table->field[4]->store((longlong) thd->get_sent_row_count(), TRUE))
917 goto err;
918 /* rows_examined */
919 if (table->field[5]->store((longlong) thd->get_examined_row_count(), TRUE))
920 goto err;
921
922 /* fill database field */
923 if (thd->db.str)
924 {
925 if (table->field[6]->store(thd->db.str, thd->db.length, client_cs))
926 goto err;
927 table->field[6]->set_notnull();
928 }
929
930 if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
931 {
932 if (table->
933 field[7]->store((longlong)
934 thd->first_successful_insert_id_in_prev_stmt_for_binlog,
935 TRUE))
936 goto err;
937 table->field[7]->set_notnull();
938 }
939
940 /*
941 Set value if we do an insert on autoincrement column. Note that for
942 some engines (those for which get_auto_increment() does not leave a
943 table lock until the statement ends), this is just the first value and
944 the next ones used may not be contiguous to it.
945 */
946 if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
947 {
948 if (table->
949 field[8]->store((longlong)
950 thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(), TRUE))
951 goto err;
952 table->field[8]->set_notnull();
953 }
954
955 if (table->field[9]->store((longlong)global_system_variables.server_id, TRUE))
956 goto err;
957 table->field[9]->set_notnull();
958
959 /*
960 Column sql_text.
961 A positive return value in store() means truncation.
962 Still logging a message in the log in this case.
963 */
964 if (table->field[10]->store(sql_text, sql_text_len, client_cs) < 0)
965 goto err;
966
967 if (table->field[11]->store((longlong) thd->thread_id, TRUE))
968 goto err;
969
970 /* Rows_affected */
971 if (table->field[12]->store(thd->get_stmt_da()->is_ok() ?
972 (longlong) thd->get_stmt_da()->affected_rows() :
973 0, TRUE))
974 goto err;
975
976 /* log table entries are not replicated */
977 if (table->file->ha_write_row(table->record[0]))
978 goto err;
979
980 result= FALSE;
981
982 err:
983 thd->pop_internal_handler();
984
985 if (result && !thd->killed)
986 sql_print_error("Failed to write to mysql.slow_log: %s",
987 error_handler.message());
988
989 if (need_rnd_end)
990 {
991 table->file->ha_rnd_end();
992 table->file->ha_release_auto_increment();
993 }
994 if (need_close)
995 close_log_table(thd, &open_tables_backup);
996 thd->time_zone_used= save_time_zone_used;
997 DBUG_RETURN(result);
998 }
999
1000 int Log_to_csv_event_handler::
1001 activate_log(THD *thd, uint log_table_type)
1002 {
1003 TABLE_LIST table_list;
1004 TABLE *table;
1005 LEX_CSTRING *UNINIT_VAR(log_name);
1006 int result;
1007 Open_tables_backup open_tables_backup;
1008
1009 DBUG_ENTER("Log_to_csv_event_handler::activate_log");
1010
1011 if (log_table_type == QUERY_LOG_GENERAL)
1012 {
1013 log_name= &GENERAL_LOG_NAME;
1014 }
1015 else
1016 {
1017 DBUG_ASSERT(log_table_type == QUERY_LOG_SLOW);
1018
1019 log_name= &SLOW_LOG_NAME;
1020 }
1021 table_list.init_one_table(&MYSQL_SCHEMA_NAME, log_name, 0, TL_WRITE_CONCURRENT_INSERT);
1022
1023 table= open_log_table(thd, &table_list, &open_tables_backup);
1024 if (table)
1025 {
1026 result= 0;
1027 close_log_table(thd, &open_tables_backup);
1028 }
1029 else
1030 result= 1;
1031
1032 DBUG_RETURN(result);
1033 }
1034
1035 bool Log_to_csv_event_handler::
1036 log_error(enum loglevel level, const char *format, va_list args)
1037 {
1038 /* No log table is implemented */
1039 DBUG_ASSERT(0);
1040 return FALSE;
1041 }
1042
1043 bool Log_to_file_event_handler::
1044 log_error(enum loglevel level, const char *format,
1045 va_list args)
1046 {
1047 return vprint_msg_to_log(level, format, args);
1048 }
1049
1050 void Log_to_file_event_handler::init_pthread_objects()
1051 {
1052 mysql_log.init_pthread_objects();
1053 mysql_slow_log.init_pthread_objects();
1054 }
1055
1056
1057 /** Wrapper around MYSQL_LOG::write() for slow log. */
1058
1059 bool Log_to_file_event_handler::
1060 log_slow(THD *thd, my_hrtime_t current_time,
1061 const char *user_host, size_t user_host_len,
1062 ulonglong query_utime, ulonglong lock_utime, bool is_command,
1063 const char *sql_text, size_t sql_text_len)
1064 {
1065 Silence_log_table_errors error_handler;
1066 thd->push_internal_handler(&error_handler);
1067 bool retval= mysql_slow_log.write(thd, hrtime_to_my_time(current_time),
1068 user_host, user_host_len,
1069 query_utime, lock_utime, is_command,
1070 sql_text, sql_text_len);
1071 thd->pop_internal_handler();
1072 return retval;
1073 }
1074
1075
1076 /**
1077 Wrapper around MYSQL_LOG::write() for general log. We need it since we
1078 want all log event handlers to have the same signature.
1079 */
1080
1081 bool Log_to_file_event_handler::
1082 log_general(THD *thd, my_hrtime_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg,
1083 const char *command_type, size_t command_type_len,
1084 const char *sql_text, size_t sql_text_len,
1085 CHARSET_INFO *client_cs)
1086 {
1087 Silence_log_table_errors error_handler;
1088 thd->push_internal_handler(&error_handler);
1089 bool retval= mysql_log.write(hrtime_to_time(event_time), user_host,
1090 user_host_len,
1091 thread_id_arg, command_type, command_type_len,
1092 sql_text, sql_text_len);
1093 thd->pop_internal_handler();
1094 return retval;
1095 }
1096
1097
1098 bool Log_to_file_event_handler::init()
1099 {
1100 if (!is_initialized)
1101 {
1102 if (global_system_variables.sql_log_slow)
1103 mysql_slow_log.open_slow_log(opt_slow_logname);
1104
1105 if (opt_log)
1106 mysql_log.open_query_log(opt_logname);
1107
1108 is_initialized= TRUE;
1109 }
1110
1111 return FALSE;
1112 }
1113
1114
1115 void Log_to_file_event_handler::cleanup()
1116 {
1117 mysql_log.cleanup();
1118 mysql_slow_log.cleanup();
1119 }
1120
1121 void Log_to_file_event_handler::flush()
1122 {
1123 /* reopen log files */
1124 if (opt_log)
1125 mysql_log.reopen_file();
1126 if (global_system_variables.sql_log_slow)
1127 mysql_slow_log.reopen_file();
1128 }
1129
1130 /*
1131 Log error with all enabled log event handlers
1132
1133 SYNOPSIS
1134 error_log_print()
1135
1136 level The level of the error significance: NOTE,
1137 WARNING or ERROR.
1138 format format string for the error message
1139 args list of arguments for the format string
1140
1141 RETURN
1142 FALSE - OK
1143 TRUE - error occurred
1144 */
1145
1146 bool LOGGER::error_log_print(enum loglevel level, const char *format,
1147 va_list args)
1148 {
1149 bool error= FALSE;
1150 Log_event_handler **current_handler;
1151
1152 /* currently we don't need locking here as there is no error_log table */
1153 for (current_handler= error_log_handler_list ; *current_handler ;)
1154 error= (*current_handler++)->log_error(level, format, args) || error;
1155
1156 return error;
1157 }
1158
1159
1160 void LOGGER::cleanup_base()
1161 {
1162 DBUG_ASSERT(inited == 1);
1163 mysql_rwlock_destroy(&LOCK_logger);
1164 if (table_log_handler)
1165 {
1166 table_log_handler->cleanup();
1167 delete table_log_handler;
1168 table_log_handler= NULL;
1169 }
1170 if (file_log_handler)
1171 file_log_handler->cleanup();
1172 }
1173
1174
1175 void LOGGER::cleanup_end()
1176 {
1177 DBUG_ASSERT(inited == 1);
1178 if (file_log_handler)
1179 {
1180 delete file_log_handler;
1181 file_log_handler=NULL;
1182 }
1183 inited= 0;
1184 }
1185
1186
1187 /**
1188 Perform basic log initialization: create file-based log handler and
1189 init error log.
1190 */
1191 void LOGGER::init_base()
1192 {
1193 DBUG_ASSERT(inited == 0);
1194 inited= 1;
1195
1196 /*
1197 Here we create file log handler. We don't do it for the table log handler
1198 here as it cannot be created so early. The reason is THD initialization,
1199 which depends on the system variables (parsed later).
1200 */
1201 if (!file_log_handler)
1202 file_log_handler= new Log_to_file_event_handler;
1203
1204 /* by default we use traditional error log */
1205 init_error_log(LOG_FILE);
1206
1207 file_log_handler->init_pthread_objects();
1208 mysql_rwlock_init(key_rwlock_LOCK_logger, &LOCK_logger);
1209 }
1210
1211
1212 void LOGGER::init_log_tables()
1213 {
1214 if (!table_log_handler)
1215 table_log_handler= new Log_to_csv_event_handler;
1216
1217 if (!is_log_tables_initialized &&
1218 !table_log_handler->init() && !file_log_handler->init())
1219 is_log_tables_initialized= TRUE;
1220 }
1221
1222
1223 /**
1224 Close and reopen the slow log (with locks).
1225
1226 @returns FALSE.
1227 */
1228 bool LOGGER::flush_slow_log()
1229 {
1230 /*
1231 Now we lock logger, as nobody should be able to use logging routines while
1232 log tables are closed
1233 */
1234 logger.lock_exclusive();
1235
1236 /* Reopen slow log file */
1237 if (global_system_variables.sql_log_slow)
1238 file_log_handler->get_mysql_slow_log()->reopen_file();
1239
1240 /* End of log flush */
1241 logger.unlock();
1242
1243 return 0;
1244 }
1245
1246
1247 /**
1248 Close and reopen the general log (with locks).
1249
1250 @returns FALSE.
1251 */
1252 bool LOGGER::flush_general_log()
1253 {
1254 /*
1255 Now we lock logger, as nobody should be able to use logging routines while
1256 log tables are closed
1257 */
1258 logger.lock_exclusive();
1259
1260 /* Reopen general log file */
1261 if (opt_log)
1262 file_log_handler->get_mysql_log()->reopen_file();
1263
1264 /* End of log flush */
1265 logger.unlock();
1266
1267 return 0;
1268 }
1269
1270
1271 /*
1272 Log slow query with all enabled log event handlers
1273
1274 SYNOPSIS
1275 slow_log_print()
1276
1277 thd THD of the query being logged
1278 query The query being logged
1279 query_length The length of the query string
1280 current_utime Current time in microseconds (from undefined start)
1281
1282 RETURN
1283 FALSE OK
1284 TRUE error occurred
1285 */
1286
1287 bool LOGGER::slow_log_print(THD *thd, const char *query, size_t query_length,
1288 ulonglong current_utime)
1289
1290 {
1291 bool error= FALSE;
1292 Log_event_handler **current_handler;
1293 bool is_command= FALSE;
1294 char user_host_buff[MAX_USER_HOST_SIZE + 1];
1295 Security_context *sctx= thd->security_ctx;
1296 uint user_host_len= 0;
1297 ulonglong query_utime, lock_utime;
1298
1299 DBUG_ASSERT(thd->enable_slow_log);
1300 /*
1301 Print the message to the buffer if we have slow log enabled
1302 */
1303
1304 if (*slow_log_handler_list)
1305 {
1306 /* do not log slow queries from replication threads */
1307 if (!thd->variables.sql_log_slow)
1308 return 0;
1309
1310 lock_shared();
1311 if (!global_system_variables.sql_log_slow)
1312 {
1313 unlock();
1314 return 0;
1315 }
1316
1317 /* fill in user_host value: the format is "%s[%s] @ %s [%s]" */
1318 user_host_len= (uint)(strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
1319 sctx->priv_user, "[",
1320 sctx->user ? sctx->user : (thd->slave_thread ? "SQL_SLAVE" : ""), "] @ ",
1321 sctx->host ? sctx->host : "", " [",
1322 sctx->ip ? sctx->ip : "", "]", NullS) -
1323 user_host_buff);
1324
1325 DBUG_ASSERT(thd->start_utime);
1326 DBUG_ASSERT(thd->start_time);
1327 query_utime= (current_utime - thd->start_utime);
1328 lock_utime= (thd->utime_after_lock - thd->start_utime);
1329 my_hrtime_t current_time= { hrtime_from_time(thd->start_time) +
1330 thd->start_time_sec_part + query_utime };
1331
1332 if (!query)
1333 {
1334 is_command= TRUE;
1335 query= command_name[thd->get_command()].str;
1336 query_length= (uint)command_name[thd->get_command()].length;
1337 }
1338
1339 for (current_handler= slow_log_handler_list; *current_handler ;)
1340 error= (*current_handler++)->log_slow(thd, current_time,
1341 user_host_buff, user_host_len,
1342 query_utime, lock_utime, is_command,
1343 query, query_length) || error;
1344
1345 unlock();
1346 }
1347 return error;
1348 }
1349
1350 bool LOGGER::general_log_write(THD *thd, enum enum_server_command command,
1351 const char *query, size_t query_length)
1352 {
1353 bool error= FALSE;
1354 Log_event_handler **current_handler= general_log_handler_list;
1355 char user_host_buff[MAX_USER_HOST_SIZE + 1];
1356 uint user_host_len= 0;
1357 my_hrtime_t current_time;
1358
1359 DBUG_ASSERT(thd);
1360
1361 user_host_len= make_user_name(thd, user_host_buff);
1362
1363 current_time= my_hrtime();
1364
1365 mysql_audit_general_log(thd, hrtime_to_time(current_time),
1366 user_host_buff, user_host_len,
1367 command_name[(uint) command].str,
1368 (uint)command_name[(uint) command].length,
1369 query, (uint)query_length);
1370
1371 if (opt_log && log_command(thd, command))
1372 {
1373 lock_shared();
1374 while (*current_handler)
1375 error|= (*current_handler++)->
1376 log_general(thd, current_time, user_host_buff,
1377 user_host_len, thd->thread_id,
1378 command_name[(uint) command].str,
1379 command_name[(uint) command].length,
1380 query, query_length,
1381 thd->variables.character_set_client) || error;
1382 unlock();
1383 }
1384
1385 return error;
1386 }
1387
1388 bool LOGGER::general_log_print(THD *thd, enum enum_server_command command,
1389 const char *format, va_list args)
1390 {
1391 size_t message_buff_len= 0;
1392 char message_buff[MAX_LOG_BUFFER_SIZE];
1393
1394 /* prepare message */
1395 if (format)
1396 message_buff_len= my_vsnprintf(message_buff, sizeof(message_buff),
1397 format, args);
1398 else
1399 message_buff[0]= '\0';
1400
1401 return general_log_write(thd, command, message_buff, message_buff_len);
1402 }
1403
1404 void LOGGER::init_error_log(ulonglong error_log_printer)
1405 {
1406 if (error_log_printer & LOG_NONE)
1407 {
1408 error_log_handler_list[0]= 0;
1409 return;
1410 }
1411
1412 switch (error_log_printer) {
1413 case LOG_FILE:
1414 error_log_handler_list[0]= file_log_handler;
1415 error_log_handler_list[1]= 0;
1416 break;
1417 /* these two are disabled for now */
1418 case LOG_TABLE:
1419 DBUG_ASSERT(0);
1420 break;
1421 case LOG_TABLE|LOG_FILE:
1422 DBUG_ASSERT(0);
1423 break;
1424 }
1425 }
1426
1427 void LOGGER::init_slow_log(ulonglong slow_log_printer)
1428 {
1429 if (slow_log_printer & LOG_NONE)
1430 {
1431 slow_log_handler_list[0]= 0;
1432 return;
1433 }
1434
1435 switch (slow_log_printer) {
1436 case LOG_FILE:
1437 slow_log_handler_list[0]= file_log_handler;
1438 slow_log_handler_list[1]= 0;
1439 break;
1440 case LOG_TABLE:
1441 slow_log_handler_list[0]= table_log_handler;
1442 slow_log_handler_list[1]= 0;
1443 break;
1444 case LOG_TABLE|LOG_FILE:
1445 slow_log_handler_list[0]= file_log_handler;
1446 slow_log_handler_list[1]= table_log_handler;
1447 slow_log_handler_list[2]= 0;
1448 break;
1449 }
1450 }
1451
1452 void LOGGER::init_general_log(ulonglong general_log_printer)
1453 {
1454 if (general_log_printer & LOG_NONE)
1455 {
1456 general_log_handler_list[0]= 0;
1457 return;
1458 }
1459
1460 switch (general_log_printer) {
1461 case LOG_FILE:
1462 general_log_handler_list[0]= file_log_handler;
1463 general_log_handler_list[1]= 0;
1464 break;
1465 case LOG_TABLE:
1466 general_log_handler_list[0]= table_log_handler;
1467 general_log_handler_list[1]= 0;
1468 break;
1469 case LOG_TABLE|LOG_FILE:
1470 general_log_handler_list[0]= file_log_handler;
1471 general_log_handler_list[1]= table_log_handler;
1472 general_log_handler_list[2]= 0;
1473 break;
1474 }
1475 }
1476
1477
1478 bool LOGGER::activate_log_handler(THD* thd, uint log_type)
1479 {
1480 MYSQL_QUERY_LOG *file_log;
1481 bool res= FALSE;
1482 lock_exclusive();
1483 switch (log_type) {
1484 case QUERY_LOG_SLOW:
1485 if (!global_system_variables.sql_log_slow)
1486 {
1487 file_log= file_log_handler->get_mysql_slow_log();
1488
1489 file_log->open_slow_log(opt_slow_logname);
1490 if (table_log_handler->activate_log(thd, QUERY_LOG_SLOW))
1491 {
1492 /* Error printed by open table in activate_log() */
1493 res= TRUE;
1494 file_log->close(0);
1495 }
1496 else
1497 {
1498 init_slow_log(log_output_options);
1499 global_system_variables.sql_log_slow= TRUE;
1500 }
1501 }
1502 break;
1503 case QUERY_LOG_GENERAL:
1504 if (!opt_log)
1505 {
1506 file_log= file_log_handler->get_mysql_log();
1507
1508 file_log->open_query_log(opt_logname);
1509 if (table_log_handler->activate_log(thd, QUERY_LOG_GENERAL))
1510 {
1511 /* Error printed by open table in activate_log() */
1512 res= TRUE;
1513 file_log->close(0);
1514 }
1515 else
1516 {
1517 init_general_log(log_output_options);
1518 opt_log= TRUE;
1519 }
1520 }
1521 break;
1522 default:
1523 DBUG_ASSERT(0);
1524 }
1525 unlock();
1526 return res;
1527 }
1528
1529
1530 void LOGGER::deactivate_log_handler(THD *thd, uint log_type)
1531 {
1532 my_bool *tmp_opt= 0;
1533 MYSQL_LOG *UNINIT_VAR(file_log);
1534
1535 switch (log_type) {
1536 case QUERY_LOG_SLOW:
1537 tmp_opt= &global_system_variables.sql_log_slow;
1538 file_log= file_log_handler->get_mysql_slow_log();
1539 break;
1540 case QUERY_LOG_GENERAL:
1541 tmp_opt= &opt_log;
1542 file_log= file_log_handler->get_mysql_log();
1543 break;
1544 default:
1545 MY_ASSERT_UNREACHABLE();
1546 }
1547
1548 if (!(*tmp_opt))
1549 return;
1550
1551 lock_exclusive();
1552 file_log->close(0);
1553 *tmp_opt= FALSE;
1554 unlock();
1555 }
1556
1557
1558 /* the parameters are unused for the log tables */
1559 bool Log_to_csv_event_handler::init()
1560 {
1561 return 0;
1562 }
1563
1564 int LOGGER::set_handlers(ulonglong error_log_printer,
1565 ulonglong slow_log_printer,
1566 ulonglong general_log_printer)
1567 {
1568 /* error log table is not supported yet */
1569 DBUG_ASSERT(error_log_printer < LOG_TABLE);
1570
1571 lock_exclusive();
1572
1573 if ((slow_log_printer & LOG_TABLE || general_log_printer & LOG_TABLE) &&
1574 !is_log_tables_initialized)
1575 {
1576 slow_log_printer= (slow_log_printer & ~LOG_TABLE) | LOG_FILE;
1577 general_log_printer= (general_log_printer & ~LOG_TABLE) | LOG_FILE;
1578
1579 sql_print_error("Failed to initialize log tables. "
1580 "Falling back to the old-fashioned logs");
1581 }
1582
1583 init_error_log(error_log_printer);
1584 init_slow_log(slow_log_printer);
1585 init_general_log(general_log_printer);
1586
1587 unlock();
1588
1589 return 0;
1590 }
1591
1592 /*
1593 Save position of binary log transaction cache.
1594
1595 SYNPOSIS
1596 binlog_trans_log_savepos()
1597
1598 thd The thread to take the binlog data from
1599 pos Pointer to variable where the position will be stored
1600
1601 DESCRIPTION
1602
1603 Save the current position in the binary log transaction cache into
1604 the variable pointed to by 'pos'
1605 */
1606
1607 static void
1608 binlog_trans_log_savepos(THD *thd, my_off_t *pos)
1609 {
1610 DBUG_ENTER("binlog_trans_log_savepos");
1611 DBUG_ASSERT(pos != NULL);
1612 binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
1613 DBUG_ASSERT((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open());
1614 *pos= cache_mngr->trx_cache.get_byte_position();
1615 DBUG_PRINT("return", ("*pos: %lu", (ulong) *pos));
1616 DBUG_VOID_RETURN;
1617 }
1618
1619
1620 /*
1621 Truncate the binary log transaction cache.
1622
1623 SYNPOSIS
1624 binlog_trans_log_truncate()
1625
1626 thd The thread to take the binlog data from
1627 pos Position to truncate to
1628
1629 DESCRIPTION
1630
1631 Truncate the binary log to the given position. Will not change
1632 anything else.
1633
1634 */
1635 static void
1636 binlog_trans_log_truncate(THD *thd, my_off_t pos)
1637 {
1638 DBUG_ENTER("binlog_trans_log_truncate");
1639 DBUG_PRINT("enter", ("pos: %lu", (ulong) pos));
1640
1641 DBUG_ASSERT(thd_get_ha_data(thd, binlog_hton) != NULL);
1642 /* Only true if binlog_trans_log_savepos() wasn't called before */
1643 DBUG_ASSERT(pos != ~(my_off_t) 0);
1644
1645 binlog_cache_mngr *const cache_mngr=
1646 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
1647 cache_mngr->trx_cache.restore_savepoint(pos);
1648 DBUG_VOID_RETURN;
1649 }
1650
1651
1652 /*
1653 this function is mostly a placeholder.
1654 conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open)
1655 should be moved here.
1656 */
1657
1658 int binlog_init(void *p)
1659 {
1660 binlog_hton= (handlerton *)p;
1661 binlog_hton->state= (WSREP_ON || opt_bin_log) ? SHOW_OPTION_YES
1662 : SHOW_OPTION_NO;
1663 binlog_hton->db_type=DB_TYPE_BINLOG;
1664 binlog_hton->savepoint_offset= sizeof(my_off_t);
1665 binlog_hton->close_connection= binlog_close_connection;
1666 binlog_hton->savepoint_set= binlog_savepoint_set;
1667 binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
1668 binlog_hton->savepoint_rollback_can_release_mdl=
1669 binlog_savepoint_rollback_can_release_mdl;
1670 binlog_hton->commit= binlog_commit;
1671 binlog_hton->rollback= binlog_rollback;
1672 binlog_hton->prepare= binlog_prepare;
1673 binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot;
1674 binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
1675 return 0;
1676 }
1677
1678 #ifdef WITH_WSREP
1679 #include "wsrep_binlog.h"
1680 #endif /* WITH_WSREP */
1681 static int binlog_close_connection(handlerton *hton, THD *thd)
1682 {
1683 DBUG_ENTER("binlog_close_connection");
1684 binlog_cache_mngr *const cache_mngr=
1685 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
1686 #ifdef WITH_WSREP
1687 if (WSREP(thd) && cache_mngr && !cache_mngr->trx_cache.empty()) {
1688 IO_CACHE* cache= get_trans_log(thd);
1689 uchar *buf;
1690 size_t len=0;
1691 wsrep_write_cache_buf(cache, &buf, &len);
1692 WSREP_WARN("binlog trx cache not empty (%zu bytes) @ connection close %lld",
1693 len, (longlong) thd->thread_id);
1694 if (len > 0) wsrep_dump_rbr_buf(thd, buf, len);
1695
1696 cache = cache_mngr->get_binlog_cache_log(false);
1697 wsrep_write_cache_buf(cache, &buf, &len);
1698 WSREP_WARN("binlog stmt cache not empty (%zu bytes) @ connection close %lld",
1699 len, (longlong) thd->thread_id);
1700 if (len > 0) wsrep_dump_rbr_buf(thd, buf, len);
1701 }
1702 #endif /* WITH_WSREP */
1703 DBUG_ASSERT(cache_mngr->trx_cache.empty() && cache_mngr->stmt_cache.empty());
1704 thd_set_ha_data(thd, binlog_hton, NULL);
1705 cache_mngr->~binlog_cache_mngr();
1706 my_free(cache_mngr);
1707 DBUG_RETURN(0);
1708 }
1709
1710 /*
1711 This function flushes a cache upon commit/rollback.
1712
1713 SYNOPSIS
1714 binlog_flush_cache()
1715
1716 thd The thread whose transaction should be ended
1717 cache_mngr Pointer to the binlog_cache_mngr to use
1718 all True if the entire transaction should be ended, false if
1719 only the statement transaction should be ended.
1720 end_ev The end event to use (COMMIT, ROLLBACK, or commit XID)
1721 using_stmt True if the statement cache should be flushed
1722 using_trx True if the transaction cache should be flushed
1723
1724 DESCRIPTION
1725
1726 End the currently transaction or statement. The transaction can be either
1727 a real transaction or a statement transaction.
1728
1729 This can be to commit a transaction, with a COMMIT query event or an XA
1730 commit XID event. But it can also be to rollback a transaction with a
1731 ROLLBACK query event, used for rolling back transactions which also
1732 contain updates to non-transactional tables. Or it can be a flush of
1733 a statement cache.
1734 */
1735
1736 static int
1737 binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr,
1738 Log_event *end_ev, bool all, bool using_stmt,
1739 bool using_trx)
1740 {
1741 int error= 0;
1742 DBUG_ENTER("binlog_flush_cache");
1743 DBUG_PRINT("enter", ("end_ev: %p", end_ev));
1744
1745 if ((using_stmt && !cache_mngr->stmt_cache.empty()) ||
1746 (using_trx && !cache_mngr->trx_cache.empty()))
1747 {
1748 if (using_stmt && thd->binlog_flush_pending_rows_event(TRUE, FALSE))
1749 DBUG_RETURN(1);
1750 if (using_trx && thd->binlog_flush_pending_rows_event(TRUE, TRUE))
1751 DBUG_RETURN(1);
1752
1753 /*
1754 Doing a commit or a rollback including non-transactional tables,
1755 i.e., ending a transaction where we might write the transaction
1756 cache to the binary log.
1757
1758 We can always end the statement when ending a transaction since
1759 transactions are not allowed inside stored functions. If they
1760 were, we would have to ensure that we're not ending a statement
1761 inside a stored function.
1762 */
1763 error= mysql_bin_log.write_transaction_to_binlog(thd, cache_mngr,
1764 end_ev, all,
1765 using_stmt, using_trx);
1766 }
1767 else
1768 {
1769 /*
1770 This can happen in row-format binlog with something like
1771 BEGIN; INSERT INTO nontrans_table; INSERT IGNORE INTO trans_table;
1772 The nontrans_table is written directly into the binlog before commit,
1773 and if the trans_table is ignored there will be no rows to write when
1774 we get here.
1775
1776 So there is no work to do. Therefore, we will not increment any XID
1777 count, so we must not decrement any XID count in unlog().
1778 */
1779 cache_mngr->need_unlog= 0;
1780 }
1781 cache_mngr->reset(using_stmt, using_trx);
1782
1783 DBUG_ASSERT((!using_stmt || cache_mngr->stmt_cache.empty()) &&
1784 (!using_trx || cache_mngr->trx_cache.empty()));
1785 DBUG_RETURN(error);
1786 }
1787
1788
1789 /**
1790 This function flushes the stmt-cache upon commit.
1791
1792 @param thd The thread whose transaction should be flushed
1793 @param cache_mngr Pointer to the cache manager
1794
1795 @return
1796 nonzero if an error pops up when flushing the cache.
1797 */
1798 static inline int
1799 binlog_commit_flush_stmt_cache(THD *thd, bool all,
1800 binlog_cache_mngr *cache_mngr)
1801 {
1802 DBUG_ENTER("binlog_commit_flush_stmt_cache");
1803 #ifdef WITH_WSREP
1804 if (thd->wsrep_mysql_replicated > 0)
1805 {
1806 DBUG_ASSERT(WSREP(thd));
1807 WSREP_DEBUG("avoiding binlog_commit_flush_trx_cache: %d",
1808 thd->wsrep_mysql_replicated);
1809 return 0;
1810 }
1811 #endif
1812
1813 Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
1814 FALSE, TRUE, TRUE, 0);
1815 DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, FALSE));
1816 }
1817
1818 /**
1819 This function flushes the trx-cache upon commit.
1820
1821 @param thd The thread whose transaction should be flushed
1822 @param cache_mngr Pointer to the cache manager
1823
1824 @return
1825 nonzero if an error pops up when flushing the cache.
1826 */
1827 static inline int
1828 binlog_commit_flush_trx_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr)
1829 {
1830 DBUG_ENTER("binlog_commit_flush_trx_cache");
1831 Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
1832 TRUE, TRUE, TRUE, 0);
1833 DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE));
1834 }
1835
1836 /**
1837 This function flushes the trx-cache upon rollback.
1838
1839 @param thd The thread whose transaction should be flushed
1840 @param cache_mngr Pointer to the cache manager
1841
1842 @return
1843 nonzero if an error pops up when flushing the cache.
1844 */
1845 static inline int
1846 binlog_rollback_flush_trx_cache(THD *thd, bool all,
1847 binlog_cache_mngr *cache_mngr)
1848 {
1849 Query_log_event end_evt(thd, STRING_WITH_LEN("ROLLBACK"),
1850 TRUE, TRUE, TRUE, 0);
1851 return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE));
1852 }
1853
1854 /**
1855 This function flushes the trx-cache upon commit.
1856
1857 @param thd The thread whose transaction should be flushed
1858 @param cache_mngr Pointer to the cache manager
1859 @param xid Transaction Id
1860
1861 @return
1862 nonzero if an error pops up when flushing the cache.
1863 */
1864 static inline int
1865 binlog_commit_flush_xid_caches(THD *thd, binlog_cache_mngr *cache_mngr,
1866 bool all, my_xid xid)
1867 {
1868 if (xid)
1869 {
1870 Xid_log_event end_evt(thd, xid, TRUE);
1871 return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE));
1872 }
1873 else
1874 {
1875 /*
1876 Empty xid occurs in XA COMMIT ... ONE PHASE.
1877 In this case, we do not have a MySQL xid for the transaction, and the
1878 external XA transaction coordinator will have to handle recovery if
1879 needed. So we end the transaction with a plain COMMIT query event.
1880 */
1881 Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
1882 TRUE, TRUE, TRUE, 0);
1883 return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE));
1884 }
1885 }
1886
1887 /**
1888 This function truncates the transactional cache upon committing or rolling
1889 back either a transaction or a statement.
1890
1891 @param thd The thread whose transaction should be flushed
1892 @param cache_mngr Pointer to the cache data to be flushed
1893 @param all @c true means truncate the transaction, otherwise the
1894 statement must be truncated.
1895
1896 @return
1897 nonzero if an error pops up when truncating the transactional cache.
1898 */
1899 static int
1900 binlog_truncate_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr, bool all)
1901 {
1902 DBUG_ENTER("binlog_truncate_trx_cache");
1903 int error=0;
1904 /*
1905 This function handles transactional changes and as such this flag
1906 equals to true.
1907 */
1908 bool const is_transactional= TRUE;
1909
1910 DBUG_PRINT("info", ("thd->options={ %s %s}, transaction: %s",
1911 FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
1912 FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
1913 all ? "all" : "stmt"));
1914
1915 thd->binlog_remove_pending_rows_event(TRUE, is_transactional);
1916 /*
1917 If rolling back an entire transaction or a single statement not
1918 inside a transaction, we reset the transaction cache.
1919 */
1920 if (ending_trans(thd, all))
1921 {
1922 if (cache_mngr->trx_cache.has_incident())
1923 error= mysql_bin_log.write_incident(thd);
1924
1925 thd->clear_binlog_table_maps();
1926
1927 cache_mngr->reset(false, true);
1928 }
1929 /*
1930 If rolling back a statement in a transaction, we truncate the
1931 transaction cache to remove the statement.
1932 */
1933 else
1934 cache_mngr->trx_cache.restore_prev_position();
1935
1936 DBUG_ASSERT(thd->binlog_get_pending_rows_event(is_transactional) == NULL);
1937 DBUG_RETURN(error);
1938 }
1939
1940 static int binlog_prepare(handlerton *hton, THD *thd, bool all)
1941 {
1942 /*
1943 do nothing.
1944 just pretend we can do 2pc, so that MySQL won't
1945 switch to 1pc.
1946 real work will be done in MYSQL_BIN_LOG::log_and_order()
1947 */
1948 return 0;
1949 }
1950
1951 /*
1952 We flush the cache wrapped in a beging/rollback if:
1953 . aborting a single or multi-statement transaction and;
1954 . the OPTION_KEEP_LOG is active or;
1955 . the format is STMT and a non-trans table was updated or;
1956 . the format is MIXED and a temporary non-trans table was
1957 updated or;
1958 . the format is MIXED, non-trans table was updated and
1959 aborting a single statement transaction;
1960 */
1961 static bool trans_cannot_safely_rollback(THD *thd, bool all)
1962 {
1963 DBUG_ASSERT(ending_trans(thd, all));
1964
1965 return ((thd->variables.option_bits & OPTION_KEEP_LOG) ||
1966 (trans_has_updated_non_trans_table(thd) &&
1967 thd->wsrep_binlog_format() == BINLOG_FORMAT_STMT) ||
1968 (thd->transaction.all.has_modified_non_trans_temp_table() &&
1969 thd->wsrep_binlog_format() == BINLOG_FORMAT_MIXED) ||
1970 (trans_has_updated_non_trans_table(thd) &&
1971 ending_single_stmt_trans(thd,all) &&
1972 thd->wsrep_binlog_format() == BINLOG_FORMAT_MIXED));
1973 }
1974
1975
1976 /**
1977 This function is called once after each statement.
1978
1979 It has the responsibility to flush the caches to the binary log on commits.
1980
1981 @param hton The binlog handlerton.
1982 @param thd The client thread that executes the transaction.
1983 @param all This is @c true if this is a real transaction commit, and
1984 @false otherwise.
1985
1986 @see handlerton::commit
1987 */
1988 static int binlog_commit(handlerton *hton, THD *thd, bool all)
1989 {
1990 int error= 0;
1991 PSI_stage_info org_stage;
1992 DBUG_ENTER("binlog_commit");
1993
1994 binlog_cache_mngr *const cache_mngr=
1995 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
1996
1997 if (!cache_mngr)
1998 {
1999 DBUG_ASSERT(WSREP(thd));
2000 DBUG_RETURN(0);
2001 }
2002
2003 DBUG_PRINT("debug",
2004 ("all: %d, in_transaction: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
2005 all,
2006 YESNO(thd->in_multi_stmt_transaction_mode()),
2007 YESNO(thd->transaction.all.modified_non_trans_table),
2008 YESNO(thd->transaction.stmt.modified_non_trans_table)));
2009
2010
2011 thd->backup_stage(&org_stage);
2012 THD_STAGE_INFO(thd, stage_binlog_write);
2013 if (!cache_mngr->stmt_cache.empty())
2014 {
2015 error= binlog_commit_flush_stmt_cache(thd, all, cache_mngr);
2016 }
2017
2018 if (cache_mngr->trx_cache.empty())
2019 {
2020 /*
2021 we're here because cache_log was flushed in MYSQL_BIN_LOG::log_xid()
2022 */
2023 cache_mngr->reset(false, true);
2024 THD_STAGE_INFO(thd, org_stage);
2025 DBUG_RETURN(error);
2026 }
2027
2028 /*
2029 We commit the transaction if:
2030 - We are not in a transaction and committing a statement, or
2031 - We are in a transaction and a full transaction is committed.
2032 Otherwise, we accumulate the changes.
2033 */
2034 if (likely(!error) && ending_trans(thd, all))
2035 error= binlog_commit_flush_trx_cache(thd, all, cache_mngr);
2036
2037 /*
2038 This is part of the stmt rollback.
2039 */
2040 if (!all)
2041 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
2042
2043 THD_STAGE_INFO(thd, org_stage);
2044 DBUG_RETURN(error);
2045 }
2046
2047 /**
2048 This function is called when a transaction or a statement is rolled back.
2049
2050 @param hton The binlog handlerton.
2051 @param thd The client thread that executes the transaction.
2052 @param all This is @c true if this is a real transaction rollback, and
2053 @false otherwise.
2054
2055 @see handlerton::rollback
2056 */
2057 static int binlog_rollback(handlerton *hton, THD *thd, bool all)
2058 {
2059 DBUG_ENTER("binlog_rollback");
2060 int error= 0;
2061 binlog_cache_mngr *const cache_mngr=
2062 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
2063
2064 if (!cache_mngr)
2065 {
2066 DBUG_ASSERT(WSREP(thd));
2067 DBUG_RETURN(0);
2068 }
2069
2070 DBUG_PRINT("debug", ("all: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
2071 YESNO(all),
2072 YESNO(thd->transaction.all.modified_non_trans_table),
2073 YESNO(thd->transaction.stmt.modified_non_trans_table)));
2074
2075 /*
2076 If an incident event is set we do not flush the content of the statement
2077 cache because it may be corrupted.
2078 */
2079 if (cache_mngr->stmt_cache.has_incident())
2080 {
2081 error= mysql_bin_log.write_incident(thd);
2082 cache_mngr->reset(true, false);
2083 }
2084 else if (!cache_mngr->stmt_cache.empty())
2085 {
2086 error= binlog_commit_flush_stmt_cache(thd, all, cache_mngr);
2087 }
2088
2089 if (cache_mngr->trx_cache.empty())
2090 {
2091 /*
2092 we're here because cache_log was flushed in MYSQL_BIN_LOG::log_xid()
2093 */
2094 cache_mngr->reset(false, true);
2095 DBUG_RETURN(error);
2096 }
2097 if (!wsrep_emulate_bin_log && mysql_bin_log.check_write_error(thd))
2098 {
2099 /*
2100 "all == true" means that a "rollback statement" triggered the error and
2101 this function was called. However, this must not happen as a rollback
2102 is written directly to the binary log. And in auto-commit mode, a single
2103 statement that is rolled back has the flag all == false.
2104 */
2105 DBUG_ASSERT(!all);
2106 /*
2107 We reach this point if the effect of a statement did not properly get into
2108 a cache and need to be rolled back.
2109 */
2110 error |= binlog_truncate_trx_cache(thd, cache_mngr, all);
2111 }
2112 else if (likely(!error))
2113 {
2114 if (ending_trans(thd, all) && trans_cannot_safely_rollback(thd, all))
2115 error= binlog_rollback_flush_trx_cache(thd, all, cache_mngr);
2116 /*
2117 Truncate the cache if:
2118 . aborting a single or multi-statement transaction or;
2119 . the current statement created or dropped a temporary table
2120 while having actual STATEMENT format;
2121 . the format is not STMT or no non-trans table was
2122 updated and;
2123 . the format is not MIXED or no temporary non-trans table
2124 was updated.
2125 */
2126 else if (ending_trans(thd, all) ||
2127 (!(thd->transaction.stmt.has_created_dropped_temp_table() &&
2128 !thd->is_current_stmt_binlog_format_row()) &&
2129 (!stmt_has_updated_non_trans_table(thd) ||
2130 thd->wsrep_binlog_format() != BINLOG_FORMAT_STMT) &&
2131 (!thd->transaction.stmt.has_modified_non_trans_temp_table() ||
2132 thd->wsrep_binlog_format() != BINLOG_FORMAT_MIXED)))
2133 error= binlog_truncate_trx_cache(thd, cache_mngr, all);
2134 }
2135
2136 /*
2137 This is part of the stmt rollback.
2138 */
2139 if (!all)
2140 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
2141
2142 DBUG_RETURN(error);
2143 }
2144
2145
2146 void binlog_reset_cache(THD *thd)
2147 {
2148 binlog_cache_mngr *const cache_mngr= opt_bin_log ?
2149 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton) : 0;
2150 DBUG_ENTER("binlog_reset_cache");
2151 if (cache_mngr)
2152 {
2153 thd->binlog_remove_pending_rows_event(TRUE, TRUE);
2154 cache_mngr->reset(true, true);
2155 }
2156 DBUG_VOID_RETURN;
2157 }
2158
2159
2160 void MYSQL_BIN_LOG::set_write_error(THD *thd, bool is_transactional)
2161 {
2162 DBUG_ENTER("MYSQL_BIN_LOG::set_write_error");
2163
2164 write_error= 1;
2165
2166 if (unlikely(check_write_error(thd)))
2167 DBUG_VOID_RETURN;
2168
2169 if (my_errno == EFBIG)
2170 {
2171 if (is_transactional)
2172 {
2173 my_message(ER_TRANS_CACHE_FULL, ER_THD(thd, ER_TRANS_CACHE_FULL), MYF(MY_WME));
2174 }
2175 else
2176 {
2177 my_message(ER_STMT_CACHE_FULL, ER_THD(thd, ER_STMT_CACHE_FULL), MYF(MY_WME));
2178 }
2179 }
2180 else
2181 {
2182 my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), name, errno);
2183 }
2184
2185 DBUG_VOID_RETURN;
2186 }
2187
2188 bool MYSQL_BIN_LOG::check_write_error(THD *thd)
2189 {
2190 DBUG_ENTER("MYSQL_BIN_LOG::check_write_error");
2191
2192 bool checked= FALSE;
2193
2194 if (likely(!thd->is_error()))
2195 DBUG_RETURN(checked);
2196
2197 switch (thd->get_stmt_da()->sql_errno())
2198 {
2199 case ER_TRANS_CACHE_FULL:
2200 case ER_STMT_CACHE_FULL:
2201 case ER_ERROR_ON_WRITE:
2202 case ER_BINLOG_LOGGING_IMPOSSIBLE:
2203 checked= TRUE;
2204 break;
2205 }
2206
2207 DBUG_RETURN(checked);
2208 }
2209
2210
2211 /**
2212 @note
2213 How do we handle this (unlikely but legal) case:
2214 @verbatim
2215 [transaction] + [update to non-trans table] + [rollback to savepoint] ?
2216 @endverbatim
2217 The problem occurs when a savepoint is before the update to the
2218 non-transactional table. Then when there's a rollback to the savepoint, if we
2219 simply truncate the binlog cache, we lose the part of the binlog cache where
2220 the update is. If we want to not lose it, we need to write the SAVEPOINT
2221 command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
2222 is easy: it's just write at the end of the binlog cache, but the former
2223 should be *inserted* to the place where the user called SAVEPOINT. The
2224 solution is that when the user calls SAVEPOINT, we write it to the binlog
2225 cache (so no need to later insert it). As transactions are never intermixed
2226 in the binary log (i.e. they are serialized), we won't have conflicts with
2227 savepoint names when using mysqlbinlog or in the slave SQL thread.
2228 Then when ROLLBACK TO SAVEPOINT is called, if we updated some
2229 non-transactional table, we don't truncate the binlog cache but instead write
2230 ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
2231 will chop the SAVEPOINT command from the binlog cache, which is good as in
2232 that case there is no need to have it in the binlog).
2233 */
2234
2235 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
2236 {
2237 int error= 1;
2238 DBUG_ENTER("binlog_savepoint_set");
2239
2240 char buf[1024];
2241
2242 String log_query(buf, sizeof(buf), &my_charset_bin);
2243 if (log_query.copy(STRING_WITH_LEN("SAVEPOINT "), &my_charset_bin) ||
2244 append_identifier(thd, &log_query, &thd->lex->ident))
2245 DBUG_RETURN(1);
2246 int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
2247 Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
2248 TRUE, FALSE, TRUE, errcode);
2249 /*
2250 We cannot record the position before writing the statement
2251 because a rollback to a savepoint (.e.g. consider it "S") would
2252 prevent the savepoint statement (i.e. "SAVEPOINT S") from being
2253 written to the binary log despite the fact that the server could
2254 still issue other rollback statements to the same savepoint (i.e.
2255 "S").
2256 Given that the savepoint is valid until the server releases it,
2257 ie, until the transaction commits or it is released explicitly,
2258 we need to log it anyway so that we don't have "ROLLBACK TO S"
2259 or "RELEASE S" without the preceding "SAVEPOINT S" in the binary
2260 log.
2261 */
2262 if (likely(!(error= mysql_bin_log.write(&qinfo))))
2263 binlog_trans_log_savepos(thd, (my_off_t*) sv);
2264
2265 DBUG_RETURN(error);
2266 }
2267
2268 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
2269 {
2270 DBUG_ENTER("binlog_savepoint_rollback");
2271
2272 /*
2273 Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
2274 non-transactional table. Otherwise, truncate the binlog cache starting
2275 from the SAVEPOINT command.
2276 */
2277 if (unlikely(trans_has_updated_non_trans_table(thd) ||
2278 (thd->variables.option_bits & OPTION_KEEP_LOG)))
2279 {
2280 char buf[1024];
2281 String log_query(buf, sizeof(buf), &my_charset_bin);
2282 if (log_query.copy(STRING_WITH_LEN("ROLLBACK TO "), &my_charset_bin) ||
2283 append_identifier(thd, &log_query, &thd->lex->ident))
2284 DBUG_RETURN(1);
2285 int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
2286 Query_log_event qinfo(thd, log_query.ptr(), log_query.length(),
2287 TRUE, FALSE, TRUE, errcode);
2288 DBUG_RETURN(mysql_bin_log.write(&qinfo));
2289 }
2290
2291 binlog_trans_log_truncate(thd, *(my_off_t*)sv);
2292
2293 /*
2294 When a SAVEPOINT is executed inside a stored function/trigger we force the
2295 pending event to be flushed with a STMT_END_F flag and clear the table maps
2296 as well to ensure that following DMLs will have a clean state to start
2297 with. ROLLBACK inside a stored routine has to finalize possibly existing
2298 current row-based pending event with cleaning up table maps. That ensures
2299 that following DMLs will have a clean state to start with.
2300 */
2301 if (thd->in_sub_stmt)
2302 thd->clear_binlog_table_maps();
2303
2304 DBUG_RETURN(0);
2305 }
2306
2307
2308 /**
2309 Check whether binlog state allows to safely release MDL locks after
2310 rollback to savepoint.
2311
2312 @param hton The binlog handlerton.
2313 @param thd The client thread that executes the transaction.
2314
2315 @return true - It is safe to release MDL locks.
2316 false - If it is not.
2317 */
2318 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
2319 THD *thd)
2320 {
2321 DBUG_ENTER("binlog_savepoint_rollback_can_release_mdl");
2322 /*
2323 If we have not updated any non-transactional tables rollback
2324 to savepoint will simply truncate binlog cache starting from
2325 SAVEPOINT command. So it should be safe to release MDL acquired
2326 after SAVEPOINT command in this case.
2327 */
2328 DBUG_RETURN(!trans_cannot_safely_rollback(thd, true));
2329 }
2330
2331
2332 int check_binlog_magic(IO_CACHE* log, const char** errmsg)
2333 {
2334 uchar magic[4];
2335 DBUG_ASSERT(my_b_tell(log) == 0);
2336
2337 if (my_b_read(log, magic, sizeof(magic)))
2338 {
2339 *errmsg = "I/O error reading the header from the binary log";
2340 sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno,
2341 log->error);
2342 return 1;
2343 }
2344 if (bcmp(magic, BINLOG_MAGIC, sizeof(magic)))
2345 {
2346 *errmsg = "Binlog has bad magic number; It's not a binary log file that can be used by this version of MySQL";
2347 return 1;
2348 }
2349 return 0;
2350 }
2351
2352
2353 File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg)
2354 {
2355 File file;
2356 DBUG_ENTER("open_binlog");
2357
2358 if ((file= mysql_file_open(key_file_binlog,
2359 log_file_name, O_RDONLY | O_BINARY | O_SHARE,
2360 MYF(MY_WME))) < 0)
2361 {
2362 sql_print_error("Failed to open log (file '%s', errno %d)",
2363 log_file_name, my_errno);
2364 *errmsg = "Could not open log file";
2365 goto err;
2366 }
2367 if (init_io_cache(log, file, (size_t)binlog_file_cache_size, READ_CACHE, 0, 0,
2368 MYF(MY_WME|MY_DONT_CHECK_FILESIZE)))
2369 {
2370 sql_print_error("Failed to create a cache on log (file '%s')",
2371 log_file_name);
2372 *errmsg = "Could not open log file";
2373 goto err;
2374 }
2375 if (check_binlog_magic(log,errmsg))
2376 goto err;
2377 DBUG_RETURN(file);
2378
2379 err:
2380 if (file >= 0)
2381 {
2382 mysql_file_close(file, MYF(0));
2383 end_io_cache(log);
2384 }
2385 DBUG_RETURN(-1);
2386 }
2387
2388 #ifdef _WIN32
2389 static int eventSource = 0;
2390
2391 static void setup_windows_event_source()
2392 {
2393 HKEY hRegKey= NULL;
2394 DWORD dwError= 0;
2395 TCHAR szPath[MAX_PATH];
2396 DWORD dwTypes;
2397
2398 if (eventSource) // Ensure that we are only called once
2399 return;
2400 eventSource= 1;
2401
2402 // Create the event source registry key
2403 dwError= RegCreateKey(HKEY_LOCAL_MACHINE,
2404 "SYSTEM\\CurrentControlSet\\Services\\EventLog\\Application\\MySQL",
2405 &hRegKey);
2406
2407 /* Name of the PE module that contains the message resource */
2408 GetModuleFileName(NULL, szPath, MAX_PATH);
2409
2410 /* Register EventMessageFile */
2411 dwError = RegSetValueEx(hRegKey, "EventMessageFile", 0, REG_EXPAND_SZ,
2412 (PBYTE) szPath, (DWORD) (strlen(szPath) + 1));
2413
2414 /* Register supported event types */
2415 dwTypes= (EVENTLOG_ERROR_TYPE | EVENTLOG_WARNING_TYPE |
2416 EVENTLOG_INFORMATION_TYPE);
2417 dwError= RegSetValueEx(hRegKey, "TypesSupported", 0, REG_DWORD,
2418 (LPBYTE) &dwTypes, sizeof dwTypes);
2419
2420 RegCloseKey(hRegKey);
2421 }
2422
2423 #endif /* _WIN32 */
2424
2425
2426 /**
2427 Find a unique filename for 'filename.#'.
2428
2429 Set '#' to the number next to the maximum found in the most
2430 recent log file extension.
2431
2432 This function will return nonzero if: (i) the generated name
2433 exceeds FN_REFLEN; (ii) if the number of extensions is exhausted;
2434 or (iii) some other error happened while examining the filesystem.
2435
2436 @return
2437 nonzero if not possible to get unique filename.
2438 */
2439
2440 static int find_uniq_filename(char *name, ulong next_log_number)
2441 {
2442 uint i;
2443 char buff[FN_REFLEN], ext_buf[FN_REFLEN];
2444 struct st_my_dir *dir_info;
2445 struct fileinfo *file_info;
2446 ulong max_found= 0, next= 0, number= 0;
2447 size_t buf_length, length;
2448 char *start, *end;
2449 int error= 0;
2450 DBUG_ENTER("find_uniq_filename");
2451
2452 length= dirname_part(buff, name, &buf_length);
2453 start= name + length;
2454 end= strend(start);
2455
2456 *end='.';
2457 length= (size_t) (end - start + 1);
2458
2459 if ((DBUG_EVALUATE_IF("error_unique_log_filename", 1,
2460 unlikely(!(dir_info= my_dir(buff,
2461 MYF(MY_DONT_SORT)))))))
2462 { // This shouldn't happen
2463 strmov(end,".1"); // use name+1
2464 DBUG_RETURN(1);
2465 }
2466 file_info= dir_info->dir_entry;
2467 max_found= next_log_number ? next_log_number-1 : 0;
2468 for (i= dir_info->number_of_files ; i-- ; file_info++)
2469 {
2470 if (strncmp(file_info->name, start, length) == 0 &&
2471 test_if_number(file_info->name+length, &number,0))
2472 {
2473 set_if_bigger(max_found, number);
2474 }
2475 }
2476 my_dirend(dir_info);
2477
2478 /* check if reached the maximum possible extension number */
2479 if (max_found >= MAX_LOG_UNIQUE_FN_EXT)
2480 {
2481 sql_print_error("Log filename extension number exhausted: %06lu. \
2482 Please fix this by archiving old logs and \
2483 updating the index files.", max_found);
2484 error= 1;
2485 goto end;
2486 }
2487
2488 next= max_found + 1;
2489 if (sprintf(ext_buf, "%06lu", next)<0)
2490 {
2491 error= 1;
2492 goto end;
2493 }
2494 *end++='.';
2495
2496 /*
2497 Check if the generated extension size + the file name exceeds the
2498 buffer size used. If one did not check this, then the filename might be
2499 truncated, resulting in error.
2500 */
2501 if (((strlen(ext_buf) + (end - name)) >= FN_REFLEN))
2502 {
2503 sql_print_error("Log filename too large: %s%s (%zu). \
2504 Please fix this by archiving old logs and updating the \
2505 index files.", name, ext_buf, (strlen(ext_buf) + (end - name)));
2506 error= 1;
2507 goto end;
2508 }
2509
2510 if (sprintf(end, "%06lu", next)<0)
2511 {
2512 error= 1;
2513 goto end;
2514 }
2515
2516 /* print warning if reaching the end of available extensions. */
2517 if ((next > (MAX_LOG_UNIQUE_FN_EXT - LOG_WARN_UNIQUE_FN_EXT_LEFT)))
2518 sql_print_warning("Next log extension: %lu. \
2519 Remaining log filename extensions: %lu. \
2520 Please consider archiving some logs.", next, (MAX_LOG_UNIQUE_FN_EXT - next));
2521
2522 end:
2523 DBUG_RETURN(error);
2524 }
2525
2526
2527 void MYSQL_LOG::init(enum_log_type log_type_arg,
2528 enum cache_type io_cache_type_arg)
2529 {
2530 DBUG_ENTER("MYSQL_LOG::init");
2531 log_type= log_type_arg;
2532 io_cache_type= io_cache_type_arg;
2533 DBUG_PRINT("info",("log_type: %d", log_type));
2534 DBUG_VOID_RETURN;
2535 }
2536
2537
2538 bool MYSQL_LOG::init_and_set_log_file_name(const char *log_name,
2539 const char *new_name,
2540 ulong next_log_number,
2541 enum_log_type log_type_arg,
2542 enum cache_type io_cache_type_arg)
2543 {
2544 init(log_type_arg, io_cache_type_arg);
2545
2546 if (new_name)
2547 {
2548 strmov(log_file_name, new_name);
2549 }
2550 else if (!new_name && generate_new_name(log_file_name, log_name,
2551 next_log_number))
2552 return TRUE;
2553
2554 return FALSE;
2555 }
2556
2557
2558 /*
2559 Open a (new) log file.
2560
2561 SYNOPSIS
2562 open()
2563
2564 log_name The name of the log to open
2565 log_type_arg The type of the log. E.g. LOG_NORMAL
2566 new_name The new name for the logfile. This is only needed
2567 when the method is used to open the binlog file.
2568 io_cache_type_arg The type of the IO_CACHE to use for this log file
2569
2570 DESCRIPTION
2571 Open the logfile, init IO_CACHE and write startup messages
2572 (in case of general and slow query logs).
2573
2574 RETURN VALUES
2575 0 ok
2576 1 error
2577 */
2578
2579 bool MYSQL_LOG::open(
2580 #ifdef HAVE_PSI_INTERFACE
2581 PSI_file_key log_file_key,
2582 #endif
2583 const char *log_name, enum_log_type log_type_arg,
2584 const char *new_name, ulong next_log_number,
2585 enum cache_type io_cache_type_arg)
2586 {
2587 char buff[FN_REFLEN];
2588 MY_STAT f_stat;
2589 File file= -1;
2590 my_off_t seek_offset;
2591 bool is_fifo = false;
2592 int open_flags= O_CREAT | O_BINARY | O_CLOEXEC;
2593 DBUG_ENTER("MYSQL_LOG::open");
2594 DBUG_PRINT("enter", ("log_type: %d", (int) log_type_arg));
2595
2596 write_error= 0;
2597
2598 if (!(name= my_strdup(log_name, MYF(MY_WME))))
2599 {
2600 name= (char *)log_name; // for the error message
2601 goto err;
2602 }
2603
2604 /*
2605 log_type is LOG_UNKNOWN if we should not generate a new name
2606 This is only used when called from MYSQL_BINARY_LOG::open, which
2607 has already updated log_file_name.
2608 */
2609 if (log_type_arg != LOG_UNKNOWN &&
2610 init_and_set_log_file_name(name, new_name, next_log_number,
2611 log_type_arg, io_cache_type_arg))
2612 goto err;
2613
2614 is_fifo = my_stat(log_file_name, &f_stat, MYF(0)) &&
2615 MY_S_ISFIFO(f_stat.st_mode);
2616
2617 if (io_cache_type == SEQ_READ_APPEND)
2618 open_flags |= O_RDWR | O_APPEND;
2619 else
2620 open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND);
2621
2622 if (is_fifo)
2623 open_flags |= O_NONBLOCK;
2624
2625 db[0]= 0;
2626
2627 #ifdef HAVE_PSI_INTERFACE
2628 /* Keep the key for reopen */
2629 m_log_file_key= log_file_key;
2630 #endif
2631
2632 if ((file= mysql_file_open(log_file_key, log_file_name, open_flags,
2633 MYF(MY_WME | ME_WAITTANG))) < 0)
2634 goto err;
2635
2636 if (is_fifo)
2637 seek_offset= 0;
2638 else if ((seek_offset= mysql_file_tell(file, MYF(MY_WME))))
2639 goto err;
2640
2641 if (init_io_cache(&log_file, file, IO_SIZE, io_cache_type, seek_offset, 0,
2642 MYF(MY_WME | MY_NABP |
2643 ((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0))))
2644 goto err;
2645
2646 if (log_type == LOG_NORMAL)
2647 {
2648 char *end;
2649 size_t len=my_snprintf(buff, sizeof(buff), "%s, Version: %s (%s). "
2650 #ifdef EMBEDDED_LIBRARY
2651 "embedded library\n",
2652 my_progname, server_version, MYSQL_COMPILATION_COMMENT
2653 #elif defined(_WIN32)
2654 "started with:\nTCP Port: %d, Named Pipe: %s\n",
2655 my_progname, server_version, MYSQL_COMPILATION_COMMENT,
2656 mysqld_port, mysqld_unix_port
2657 #else
2658 "started with:\nTcp port: %d Unix socket: %s\n",
2659 my_progname, server_version, MYSQL_COMPILATION_COMMENT,
2660 mysqld_port, mysqld_unix_port
2661 #endif
2662 );
2663 end= strnmov(buff + len, "Time\t\t Id Command\tArgument\n",
2664 sizeof(buff) - len);
2665 if (my_b_write(&log_file, (uchar*) buff, (uint) (end-buff)) ||
2666 flush_io_cache(&log_file))
2667 goto err;
2668 }
2669
2670 log_state= LOG_OPENED;
2671 DBUG_RETURN(0);
2672
2673 err:
2674 sql_print_error(fatal_log_error, name, errno);
2675 if (file >= 0)
2676 mysql_file_close(file, MYF(0));
2677 end_io_cache(&log_file);
2678 my_free(name);
2679 name= NULL;
2680 log_state= LOG_CLOSED;
2681 DBUG_RETURN(1);
2682 }
2683
2684 MYSQL_LOG::MYSQL_LOG()
2685 : name(0), write_error(FALSE), inited(FALSE), log_type(LOG_UNKNOWN),
2686 log_state(LOG_CLOSED)
2687 {
2688 /*
2689 We don't want to initialize LOCK_Log here as such initialization depends on
2690 safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
2691 called only in main(). Doing initialization here would make it happen
2692 before main().
2693 */
2694 bzero((char*) &log_file, sizeof(log_file));
2695 }
2696
2697 void MYSQL_LOG::init_pthread_objects()
2698 {
2699 DBUG_ASSERT(inited == 0);
2700 inited= 1;
2701 mysql_mutex_init(key_LOG_LOCK_log, &LOCK_log, MY_MUTEX_INIT_SLOW);
2702 }
2703
2704 /*
2705 Close the log file
2706
2707 SYNOPSIS
2708 close()
2709 exiting Bitmask. LOG_CLOSE_TO_BE_OPENED is used if we intend to call
2710 open at once after close. LOG_CLOSE_DELAYED_CLOSE is used for
2711 binlog rotation, to delay actual close of the old file until
2712 we have successfully created the new file.
2713
2714 NOTES
2715 One can do an open on the object at once after doing a close.
2716 The internal structures are not freed until cleanup() is called
2717 */
2718
2719 void MYSQL_LOG::close(uint exiting)
2720 { // One can't set log_type here!
2721 DBUG_ENTER("MYSQL_LOG::close");
2722 DBUG_PRINT("enter",("exiting: %d", (int) exiting));
2723 if (log_state == LOG_OPENED)
2724 {
2725 end_io_cache(&log_file);
2726
2727 if (log_type == LOG_BIN && mysql_file_sync(log_file.file, MYF(MY_WME)) && ! write_error)
2728 {
2729 write_error= 1;
2730 sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno);
2731 }
2732
2733 if (!(exiting & LOG_CLOSE_DELAYED_CLOSE) &&
2734 mysql_file_close(log_file.file, MYF(MY_WME)) && ! write_error)
2735 {
2736 write_error= 1;
2737 sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno);
2738 }
2739 }
2740
2741 log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
2742 my_free(name);
2743 name= NULL;
2744 DBUG_VOID_RETURN;
2745 }
2746
2747 /** This is called only once. */
2748
2749 void MYSQL_LOG::cleanup()
2750 {
2751 DBUG_ENTER("cleanup");
2752 if (inited)
2753 {
2754 inited= 0;
2755 mysql_mutex_destroy(&LOCK_log);
2756 close(0);
2757 }
2758 DBUG_VOID_RETURN;
2759 }
2760
2761
2762 int MYSQL_LOG::generate_new_name(char *new_name, const char *log_name,
2763 ulong next_log_number)
2764 {
2765 fn_format(new_name, log_name, mysql_data_home, "", 4);
2766 if (log_type == LOG_BIN)
2767 {
2768 if (!fn_ext(log_name)[0])
2769 {
2770 if (DBUG_EVALUATE_IF("binlog_inject_new_name_error", TRUE, FALSE) ||
2771 unlikely(find_uniq_filename(new_name, next_log_number)))
2772 {
2773 THD *thd= current_thd;
2774 if (unlikely(thd))
2775 my_error(ER_NO_UNIQUE_LOGFILE, MYF(ME_FATALERROR), log_name);
2776 sql_print_error(ER_DEFAULT(ER_NO_UNIQUE_LOGFILE), log_name);
2777 return 1;
2778 }
2779 }
2780 }
2781 return 0;
2782 }
2783
2784
2785 /*
2786 Reopen the log file
2787
2788 SYNOPSIS
2789 reopen_file()
2790
2791 DESCRIPTION
2792 Reopen the log file. The method is used during FLUSH LOGS
2793 and locks LOCK_log mutex
2794 */
2795
2796
2797 void MYSQL_QUERY_LOG::reopen_file()
2798 {
2799 char *save_name;
2800 DBUG_ENTER("MYSQL_LOG::reopen_file");
2801
2802 mysql_mutex_lock(&LOCK_log);
2803 if (!is_open())
2804 {
2805 DBUG_PRINT("info",("log is closed"));
2806 mysql_mutex_unlock(&LOCK_log);
2807 DBUG_VOID_RETURN;
2808 }
2809
2810 save_name= name;
2811 name= 0; // Don't free name
2812 close(LOG_CLOSE_TO_BE_OPENED);
2813
2814 /*
2815 Note that at this point, log_state != LOG_CLOSED (important for is_open()).
2816 */
2817
2818 open(
2819 #ifdef HAVE_PSI_INTERFACE
2820 m_log_file_key,
2821 #endif
2822 save_name, log_type, 0, 0, io_cache_type);
2823 my_free(save_name);
2824
2825 mysql_mutex_unlock(&LOCK_log);
2826
2827 DBUG_VOID_RETURN;
2828 }
2829
2830
2831 /*
2832 Write a command to traditional general log file
2833
2834 SYNOPSIS
2835 write()
2836
2837 event_time command start timestamp
2838 user_host the pointer to the string with user@host info
2839 user_host_len length of the user_host string. this is computed once
2840 and passed to all general log event handlers
2841 thread_id Id of the thread, issued a query
2842 command_type the type of the command being logged
2843 command_type_len the length of the string above
2844 sql_text the very text of the query being executed
2845 sql_text_len the length of sql_text string
2846
2847 DESCRIPTION
2848
2849 Log given command to to normal (not rotable) log file
2850
2851 RETURN
2852 FASE - OK
2853 TRUE - error occurred
2854 */
2855
2856 bool MYSQL_QUERY_LOG::write(time_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg,
2857 const char *command_type, size_t command_type_len,
2858 const char *sql_text, size_t sql_text_len)
2859 {
2860 char buff[32];
2861 char local_time_buff[MAX_TIME_SIZE];
2862 struct tm start;
2863 size_t time_buff_len= 0;
2864
2865 mysql_mutex_lock(&LOCK_log);
2866
2867 /* Test if someone closed between the is_open test and lock */
2868 if (is_open())
2869 {
2870 /* for testing output of timestamp and thread id */
2871 DBUG_EXECUTE_IF("reset_log_last_time", last_time= 0;);
2872
2873 /* Note that my_b_write() assumes it knows the length for this */
2874 if (event_time != last_time)
2875 {
2876 last_time= event_time;
2877
2878 localtime_r(&event_time, &start);
2879
2880 time_buff_len= my_snprintf(local_time_buff, MAX_TIME_SIZE,
2881 "%02d%02d%02d %2d:%02d:%02d\t",
2882 start.tm_year % 100, start.tm_mon + 1,
2883 start.tm_mday, start.tm_hour,
2884 start.tm_min, start.tm_sec);
2885
2886 if (my_b_write(&log_file, (uchar*) local_time_buff, time_buff_len))
2887 goto err;
2888 }
2889 else
2890 if (my_b_write(&log_file, (uchar*) "\t\t" ,2) < 0)
2891 goto err;
2892
2893 /* command_type, thread_id */
2894 size_t length= my_snprintf(buff, 32, "%6llu ", thread_id_arg);
2895
2896 if (my_b_write(&log_file, (uchar*) buff, length))
2897 goto err;
2898
2899 if (my_b_write(&log_file, (uchar*) command_type, command_type_len))
2900 goto err;
2901
2902 if (my_b_write(&log_file, (uchar*) "\t", 1))
2903 goto err;
2904
2905 /* sql_text */
2906 if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len))
2907 goto err;
2908
2909 if (my_b_write(&log_file, (uchar*) "\n", 1) ||
2910 flush_io_cache(&log_file))
2911 goto err;
2912 }
2913
2914 mysql_mutex_unlock(&LOCK_log);
2915 return FALSE;
2916 err:
2917
2918 if (!write_error)
2919 {
2920 write_error= 1;
2921 sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno);
2922 }
2923 mysql_mutex_unlock(&LOCK_log);
2924 return TRUE;
2925 }
2926
2927
2928 /*
2929 Log a query to the traditional slow log file
2930
2931 SYNOPSIS
2932 write()
2933
2934 thd THD of the query
2935 current_time current timestamp
2936 user_host the pointer to the string with user@host info
2937 user_host_len length of the user_host string. this is computed once
2938 and passed to all general log event handlers
2939 query_utime Amount of time the query took to execute (in microseconds)
2940 lock_utime Amount of time the query was locked (in microseconds)
2941 is_command The flag, which determines, whether the sql_text is a
2942 query or an administrator command.
2943 sql_text the very text of the query or administrator command
2944 processed
2945 sql_text_len the length of sql_text string
2946
2947 DESCRIPTION
2948
2949 Log a query to the slow log file.
2950
2951 RETURN
2952 FALSE - OK
2953 TRUE - error occurred
2954 */
2955
2956 bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
2957 const char *user_host, size_t user_host_len, ulonglong query_utime,
2958 ulonglong lock_utime, bool is_command,
2959 const char *sql_text, size_t sql_text_len)
2960 {
2961 bool error= 0;
2962 char llbuff[22];
2963 DBUG_ENTER("MYSQL_QUERY_LOG::write");
2964
2965 mysql_mutex_lock(&LOCK_log);
2966 if (is_open())
2967 { // Safety against reopen
2968 char buff[80], *end;
2969 char query_time_buff[22+7], lock_time_buff[22+7];
2970 size_t buff_len;
2971 end= buff;
2972
2973 if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
2974 {
2975 if (current_time != last_time)
2976 {
2977 last_time= current_time;
2978 struct tm start;
2979 localtime_r(¤t_time, &start);
2980
2981 buff_len= my_snprintf(buff, sizeof buff,
2982 "# Time: %02d%02d%02d %2d:%02d:%02d\n",
2983 start.tm_year % 100, start.tm_mon + 1,
2984 start.tm_mday, start.tm_hour,
2985 start.tm_min, start.tm_sec);
2986
2987 /* Note that my_b_write() assumes it knows the length for this */
2988 if (my_b_write(&log_file, (uchar*) buff, buff_len))
2989 goto err;
2990 }
2991 const uchar uh[]= "# User@Host: ";
2992 if (my_b_write(&log_file, uh, sizeof(uh) - 1) ||
2993 my_b_write(&log_file, (uchar*) user_host, user_host_len) ||
2994 my_b_write(&log_file, (uchar*) "\n", 1))
2995 goto err;
2996
2997 /* For slow query log */
2998 sprintf(query_time_buff, "%.6f", ulonglong2double(query_utime)/1000000.0);
2999 sprintf(lock_time_buff, "%.6f", ulonglong2double(lock_utime)/1000000.0);
3000 if (my_b_printf(&log_file,
3001 "# Thread_id: %lu Schema: %s QC_hit: %s\n"
3002 "# Query_time: %s Lock_time: %s Rows_sent: %lu Rows_examined: %lu\n"
3003 "# Rows_affected: %lu Bytes_sent: %lu\n",
3004 (ulong) thd->thread_id, thd->get_db(),
3005 ((thd->query_plan_flags & QPLAN_QC) ? "Yes" : "No"),
3006 query_time_buff, lock_time_buff,
3007 (ulong) thd->get_sent_row_count(),
3008 (ulong) thd->get_examined_row_count(),
3009 (ulong) thd->get_affected_rows(),
3010 (ulong) (thd->status_var.bytes_sent - thd->bytes_sent_old)))
3011 goto err;
3012
3013 if ((thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_QUERY_PLAN)
3014 && thd->tmp_tables_used &&
3015 my_b_printf(&log_file,
3016 "# Tmp_tables: %lu Tmp_disk_tables: %lu "
3017 "Tmp_table_sizes: %s\n",
3018 (ulong) thd->tmp_tables_used,
3019 (ulong) thd->tmp_tables_disk_used,
3020 llstr(thd->tmp_tables_size, llbuff)))
3021 goto err;
3022
3023 if (thd->spcont &&
3024 my_b_printf(&log_file, "# Stored_routine: %s\n",
3025 ErrConvDQName(thd->spcont->m_sp).ptr()))
3026 goto err;
3027
3028 if ((thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_QUERY_PLAN) &&
3029 (thd->query_plan_flags &
3030 (QPLAN_FULL_SCAN | QPLAN_FULL_JOIN | QPLAN_TMP_TABLE |
3031 QPLAN_TMP_DISK | QPLAN_FILESORT | QPLAN_FILESORT_DISK |
3032 QPLAN_FILESORT_PRIORITY_QUEUE)) &&
3033 my_b_printf(&log_file,
3034 "# Full_scan: %s Full_join: %s "
3035 "Tmp_table: %s Tmp_table_on_disk: %s\n"
3036 "# Filesort: %s Filesort_on_disk: %s Merge_passes: %lu "
3037 "Priority_queue: %s\n",
3038 ((thd->query_plan_flags & QPLAN_FULL_SCAN) ? "Yes" : "No"),
3039 ((thd->query_plan_flags & QPLAN_FULL_JOIN) ? "Yes" : "No"),
3040 (thd->tmp_tables_used ? "Yes" : "No"),
3041 (thd->tmp_tables_disk_used ? "Yes" : "No"),
3042 ((thd->query_plan_flags & QPLAN_FILESORT) ? "Yes" : "No"),
3043 ((thd->query_plan_flags & QPLAN_FILESORT_DISK) ?
3044 "Yes" : "No"),
3045 thd->query_plan_fsort_passes,
3046 ((thd->query_plan_flags & QPLAN_FILESORT_PRIORITY_QUEUE) ?
3047 "Yes" : "No")
3048 ))
3049 goto err;
3050 if (thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_EXPLAIN &&
3051 thd->lex->explain)
3052 {
3053 StringBuffer<128> buf;
3054 DBUG_ASSERT(!thd->free_list);
3055 if (!print_explain_for_slow_log(thd->lex, thd, &buf))
3056 if (my_b_printf(&log_file, "%s", buf.c_ptr_safe()))
3057 goto err;
3058 thd->free_items();
3059 }
3060 if (thd->db.str && strcmp(thd->db.str, db))
3061 { // Database changed
3062 if (my_b_printf(&log_file,"use %s;\n",thd->db.str))
3063 goto err;
3064 strmov(db,thd->db.str);
3065 }
3066 if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
3067 {
3068 end=strmov(end, ",last_insert_id=");
3069 end=longlong10_to_str((longlong)
3070 thd->first_successful_insert_id_in_prev_stmt_for_binlog,
3071 end, -10);
3072 }
3073 // Save value if we do an insert.
3074 if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
3075 {
3076 if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
3077 {
3078 end=strmov(end,",insert_id=");
3079 end=longlong10_to_str((longlong)
3080 thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(),
3081 end, -10);
3082 }
3083 }
3084
3085 /*
3086 This info used to show up randomly, depending on whether the query
3087 checked the query start time or not. now we always write current
3088 timestamp to the slow log
3089 */
3090 end= strmov(end, ",timestamp=");
3091 end= int10_to_str((long) current_time, end, 10);
3092
3093 if (end != buff)
3094 {
3095 *end++=';';
3096 *end='\n';
3097 if (my_b_write(&log_file, (uchar*) "SET ", 4) ||
3098 my_b_write(&log_file, (uchar*) buff + 1, (uint) (end-buff)))
3099 goto err;
3100 }
3101 if (is_command)
3102 {
3103 end= strxmov(buff, "# administrator command: ", NullS);
3104 buff_len= (ulong) (end - buff);
3105 DBUG_EXECUTE_IF("simulate_slow_log_write_error",
3106 {DBUG_SET("+d,simulate_file_write_error");});
3107 if(my_b_write(&log_file, (uchar*) buff, buff_len))
3108 goto err;
3109 }
3110 if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len) ||
3111 my_b_write(&log_file, (uchar*) ";\n",2) ||
3112 flush_io_cache(&log_file))
3113 goto err;
3114
3115 }
3116 }
3117 end:
3118 mysql_mutex_unlock(&LOCK_log);
3119 DBUG_RETURN(error);
3120
3121 err:
3122 error= 1;
3123 if (!write_error)
3124 {
3125 write_error= 1;
3126 sql_print_error(ER_THD(thd, ER_ERROR_ON_WRITE), name, errno);
3127 }
3128 goto end;
3129 }
3130
3131
3132 /**
3133 @todo
3134 The following should be using fn_format(); We just need to
3135 first change fn_format() to cut the file name if it's too long.
3136 */
3137 const char *MYSQL_LOG::generate_name(const char *log_name,
3138 const char *suffix,
3139 bool strip_ext, char *buff)
3140 {
3141 if (!log_name || !log_name[0])
3142 {
3143 strmake(buff, pidfile_name, FN_REFLEN - strlen(suffix) - 1);
3144 return (const char *)
3145 fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR));
3146 }
3147 // get rid of extension if the log is binary to avoid problems
3148 if (strip_ext)
3149 {
3150 char *p= fn_ext(log_name);
3151 uint length= (uint) (p - log_name);
3152 strmake(buff, log_name, MY_MIN(length, FN_REFLEN-1));
3153 return (const char*)buff;
3154 }
3155 return log_name;
3156 }
3157
3158
3159 /*
3160 Print some additional information about addition/removal of
3161 XID list entries.
3162 TODO: Remove once MDEV-9510 is fixed.
3163 */
3164 #ifdef WITH_WSREP
3165 #define WSREP_XID_LIST_ENTRY(X, Y) \
3166 if (wsrep_debug) \
3167 { \
3168 char buf[FN_REFLEN]; \
3169 strmake(buf, Y->binlog_name, Y->binlog_name_len); \
3170 WSREP_DEBUG(X, buf, Y->binlog_id); \
3171 }
3172 #else
3173 #define WSREP_XID_LIST_ENTRY(X, Y) do { } while(0)
3174 #endif
3175
3176 MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
3177 :reset_master_pending(0), mark_xid_done_waiting(0),
3178 bytes_written(0), file_id(1), open_count(1),
3179 group_commit_queue(0), group_commit_queue_busy(FALSE),
3180 num_commits(0), num_group_commits(0),
3181 group_commit_trigger_count(0), group_commit_trigger_timeout(0),
3182 group_commit_trigger_lock_wait(0),
3183 sync_period_ptr(sync_period), sync_counter(0),
3184 state_file_deleted(false), binlog_state_recover_done(false),
3185 is_relay_log(0), relay_signal_cnt(0),
3186 checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF),
3187 relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF),
3188 description_event_for_exec(0), description_event_for_queue(0),
3189 current_binlog_id(0)
3190 {
3191 /*
3192 We don't want to initialize locks here as such initialization depends on
3193 safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
3194 called only in main(). Doing initialization here would make it happen
3195 before main().
3196 */
3197 index_file_name[0] = 0;
3198 bzero((char*) &index_file, sizeof(index_file));
3199 bzero((char*) &purge_index_file, sizeof(purge_index_file));
3200 }
3201
3202 void MYSQL_BIN_LOG::stop_background_thread()
3203 {
3204 if (binlog_background_thread_started)
3205 {
3206 mysql_mutex_lock(&LOCK_binlog_background_thread);
3207 binlog_background_thread_stop= true;
3208 mysql_cond_signal(&COND_binlog_background_thread);
3209 while (binlog_background_thread_stop)
3210 mysql_cond_wait(&COND_binlog_background_thread_end,
3211 &LOCK_binlog_background_thread);
3212 mysql_mutex_unlock(&LOCK_binlog_background_thread);
3213 binlog_background_thread_started= false;
3214 }
3215 }
3216
3217 /* this is called only once */
3218
3219 void MYSQL_BIN_LOG::cleanup()
3220 {
3221 DBUG_ENTER("cleanup");
3222 if (inited)
3223 {
3224 xid_count_per_binlog *b;
3225
3226 /* Wait for the binlog background thread to stop. */
3227 if (!is_relay_log)
3228 stop_background_thread();
3229
3230 inited= 0;
3231 mysql_mutex_lock(&LOCK_log);
3232 close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT);
3233 mysql_mutex_unlock(&LOCK_log);
3234 delete description_event_for_queue;
3235 delete description_event_for_exec;
3236
3237 while ((b= binlog_xid_count_list.get()))
3238 {
3239 /*
3240 There should be no pending XIDs at shutdown, and only one entry (for
3241 the active binlog file) in the list.
3242 */
3243 DBUG_ASSERT(b->xid_count == 0);
3244 DBUG_ASSERT(!binlog_xid_count_list.head());
3245 WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::cleanup(): Removing xid_list_entry "
3246 "for %s (%lu)", b);
3247 delete b;
3248 }
3249
3250 mysql_mutex_destroy(&LOCK_log);
3251 mysql_mutex_destroy(&LOCK_index);
3252 mysql_mutex_destroy(&LOCK_xid_list);
3253 mysql_mutex_destroy(&LOCK_binlog_background_thread);
3254 mysql_mutex_destroy(&LOCK_binlog_end_pos);
3255 mysql_cond_destroy(&COND_relay_log_updated);
3256 mysql_cond_destroy(&COND_bin_log_updated);
3257 mysql_cond_destroy(&COND_queue_busy);
3258 mysql_cond_destroy(&COND_xid_list);
3259 mysql_cond_destroy(&COND_binlog_background_thread);
3260 mysql_cond_destroy(&COND_binlog_background_thread_end);
3261 }
3262
3263 /*
3264 Free data for global binlog state.
3265 We can't do that automatically as we need to do this before
3266 safemalloc is shut down
3267 */
3268 if (!is_relay_log)
3269 rpl_global_gtid_binlog_state.free();
3270 DBUG_VOID_RETURN;
3271 }
3272
3273
3274 /* Init binlog-specific vars */
3275 void MYSQL_BIN_LOG::init(ulong max_size_arg)
3276 {
3277 DBUG_ENTER("MYSQL_BIN_LOG::init");
3278 max_size= max_size_arg;
3279 DBUG_PRINT("info",("max_size: %lu", max_size));
3280 DBUG_VOID_RETURN;
3281 }
3282
3283
3284 void MYSQL_BIN_LOG::init_pthread_objects()
3285 {
3286 MYSQL_LOG::init_pthread_objects();
3287 mysql_mutex_init(m_key_LOCK_index, &LOCK_index, MY_MUTEX_INIT_SLOW);
3288 mysql_mutex_setflags(&LOCK_index, MYF_NO_DEADLOCK_DETECTION);
3289 mysql_mutex_init(key_BINLOG_LOCK_xid_list,
3290 &LOCK_xid_list, MY_MUTEX_INIT_FAST);
3291 mysql_cond_init(m_key_relay_log_update, &COND_relay_log_updated, 0);
3292 mysql_cond_init(m_key_bin_log_update, &COND_bin_log_updated, 0);
3293 mysql_cond_init(m_key_COND_queue_busy, &COND_queue_busy, 0);
3294 mysql_cond_init(key_BINLOG_COND_xid_list, &COND_xid_list, 0);
3295
3296 mysql_mutex_init(key_BINLOG_LOCK_binlog_background_thread,
3297 &LOCK_binlog_background_thread, MY_MUTEX_INIT_FAST);
3298 mysql_cond_init(key_BINLOG_COND_binlog_background_thread,
3299 &COND_binlog_background_thread, 0);
3300 mysql_cond_init(key_BINLOG_COND_binlog_background_thread_end,
3301 &COND_binlog_background_thread_end, 0);
3302
3303 mysql_mutex_init(m_key_LOCK_binlog_end_pos, &LOCK_binlog_end_pos,
3304 MY_MUTEX_INIT_SLOW);
3305 }
3306
3307
3308 bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
3309 const char *log_name, bool need_mutex)
3310 {
3311 File index_file_nr= -1;
3312 DBUG_ASSERT(!my_b_inited(&index_file));
3313
3314 /*
3315 First open of this class instance
3316 Create an index file that will hold all file names uses for logging.
3317 Add new entries to the end of it.
3318 */
3319 myf opt= MY_UNPACK_FILENAME;
3320 if (!index_file_name_arg)
3321 {
3322 index_file_name_arg= log_name; // Use same basename for index file
3323 opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
3324 }
3325 fn_format(index_file_name, index_file_name_arg, mysql_data_home,
3326 ".index", opt);
3327 if ((index_file_nr= mysql_file_open(m_key_file_log_index,
3328 index_file_name,
3329 O_RDWR | O_CREAT | O_BINARY | O_CLOEXEC,
3330 MYF(MY_WME))) < 0 ||
3331 mysql_file_sync(index_file_nr, MYF(MY_WME)) ||
3332 init_io_cache(&index_file, index_file_nr,
3333 IO_SIZE, WRITE_CACHE,
3334 mysql_file_seek(index_file_nr, 0L, MY_SEEK_END, MYF(0)),
3335 0, MYF(MY_WME | MY_WAIT_IF_FULL)) ||
3336 DBUG_EVALUATE_IF("fault_injection_openning_index", 1, 0))
3337 {
3338 /*
3339 TODO: all operations creating/deleting the index file or a log, should
3340 call my_sync_dir() or my_sync_dir_by_file() to be durable.
3341 TODO: file creation should be done with mysql_file_create()
3342 not mysql_file_open().
3343 */
3344 if (index_file_nr >= 0)
3345 mysql_file_close(index_file_nr, MYF(0));
3346 return TRUE;
3347 }
3348
3349 #ifdef HAVE_REPLICATION
3350 /*
3351 Sync the index by purging any binary log file that is not registered.
3352 In other words, either purge binary log files that were removed from
3353 the index but not purged from the file system due to a crash or purge
3354 any binary log file that was created but not register in the index
3355 due to a crash.
3356 */
3357
3358 if (set_purge_index_file_name(index_file_name_arg) ||
3359 open_purge_index_file(FALSE) ||
3360 purge_index_entry(NULL, NULL, need_mutex) ||
3361 close_purge_index_file() ||
3362 DBUG_EVALUATE_IF("fault_injection_recovering_index", 1, 0))
3363 {
3364 sql_print_error("MYSQL_BIN_LOG::open_index_file failed to sync the index "
3365 "file.");
3366 return TRUE;
3367 }
3368 #endif
3369
3370 return FALSE;
3371 }
3372
3373
3374 /**
3375 Open a (new) binlog file.
3376
3377 - Open the log file and the index file. Register the new
3378 file name in it
3379 - When calling this when the file is in use, you must have a locks
3380 on LOCK_log and LOCK_index.
3381
3382 @retval
3383 0 ok
3384 @retval
3385 1 error
3386 */
3387
3388 bool MYSQL_BIN_LOG::open(const char *log_name,
3389 enum_log_type log_type_arg,
3390 const char *new_name,
3391 ulong next_log_number,
3392 enum cache_type io_cache_type_arg,
3393 ulong max_size_arg,
3394 bool null_created_arg,
3395 bool need_mutex)
3396 {
3397 File file= -1;
3398 xid_count_per_binlog *new_xid_list_entry= NULL, *b;
3399 DBUG_ENTER("MYSQL_BIN_LOG::open");
3400 DBUG_PRINT("enter",("log_type: %d",(int) log_type_arg));
3401
3402 mysql_mutex_assert_owner(&LOCK_log);
3403
3404 if (!is_relay_log)
3405 {
3406 if (!binlog_state_recover_done)
3407 {
3408 binlog_state_recover_done= true;
3409 if (do_binlog_recovery(opt_bin_logname, false))
3410 DBUG_RETURN(1);
3411 }
3412
3413 if (!binlog_background_thread_started &&
3414 start_binlog_background_thread())
3415 DBUG_RETURN(1);
3416 }
3417
3418 /* We need to calculate new log file name for purge to delete old */
3419 if (init_and_set_log_file_name(log_name, new_name, next_log_number,
3420 log_type_arg, io_cache_type_arg))
3421 {
3422 sql_print_error("MYSQL_BIN_LOG::open failed to generate new file name.");
3423 if (!is_relay_log)
3424 goto err;
3425 DBUG_RETURN(1);
3426 }
3427
3428 #ifdef HAVE_REPLICATION
3429 if (open_purge_index_file(TRUE) ||
3430 register_create_index_entry(log_file_name) ||
3431 sync_purge_index_file() ||
3432 DBUG_EVALUATE_IF("fault_injection_registering_index", 1, 0))
3433 {
3434 /**
3435 TODO:
3436 Although this was introduced to appease valgrind when
3437 injecting emulated faults using
3438 fault_injection_registering_index it may be good to consider
3439 what actually happens when open_purge_index_file succeeds but
3440 register or sync fails.
3441
3442 Perhaps we might need the code below in MYSQL_LOG_BIN::cleanup
3443 for "real life" purposes as well?
3444 */
3445 DBUG_EXECUTE_IF("fault_injection_registering_index", {
3446 if (my_b_inited(&purge_index_file))
3447 {
3448 end_io_cache(&purge_index_file);
3449 my_close(purge_index_file.file, MYF(0));
3450 }
3451 });
3452
3453 sql_print_error("MYSQL_BIN_LOG::open failed to sync the index file.");
3454 DBUG_RETURN(1);
3455 }
3456 DBUG_EXECUTE_IF("crash_create_non_critical_before_update_index", DBUG_SUICIDE(););
3457 #endif
3458
3459 write_error= 0;
3460
3461 /* open the main log file */
3462 if (MYSQL_LOG::open(
3463 #ifdef HAVE_PSI_INTERFACE
3464 m_key_file_log,
3465 #endif
3466 log_name,
3467 LOG_UNKNOWN, /* Don't generate new name */
3468 0, 0, io_cache_type_arg))
3469 {
3470 #ifdef HAVE_REPLICATION
3471 close_purge_index_file();
3472 #endif
3473 DBUG_RETURN(1); /* all warnings issued */
3474 }
3475
3476 init(max_size_arg);
3477
3478 open_count++;
3479
3480 DBUG_ASSERT(log_type == LOG_BIN);
3481
3482 {
3483 bool write_file_name_to_index_file=0;
3484
3485 if (!my_b_filelength(&log_file))
3486 {
3487 /*
3488 The binary log file was empty (probably newly created)
3489 This is the normal case and happens when the user doesn't specify
3490 an extension for the binary log files.
3491 In this case we write a standard header to it.
3492 */
3493 if (my_b_safe_write(&log_file, BINLOG_MAGIC,
3494 BIN_LOG_HEADER_SIZE))
3495 goto err;
3496 bytes_written+= BIN_LOG_HEADER_SIZE;
3497 write_file_name_to_index_file= 1;
3498 }
3499
3500 {
3501 /*
3502 In 4.x we put Start event only in the first binlog. But from 5.0 we
3503 want a Start event even if this is not the very first binlog.
3504 */
3505 Format_description_log_event s(BINLOG_VERSION);
3506 /*
3507 don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
3508 as we won't be able to reset it later
3509 */
3510 if (io_cache_type == WRITE_CACHE)
3511 s.flags |= LOG_EVENT_BINLOG_IN_USE_F;
3512
3513 if (is_relay_log)
3514 {
3515 if (relay_log_checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
3516 relay_log_checksum_alg=
3517 opt_slave_sql_verify_checksum ? (enum_binlog_checksum_alg) binlog_checksum_options
3518 : BINLOG_CHECKSUM_ALG_OFF;
3519 s.checksum_alg= relay_log_checksum_alg;
3520 s.set_relay_log_event();
3521 }
3522 else
3523 s.checksum_alg= (enum_binlog_checksum_alg)binlog_checksum_options;
3524
3525 crypto.scheme = 0;
3526 DBUG_ASSERT(s.checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
3527 if (!s.is_valid())
3528 goto err;
3529 s.dont_set_created= null_created_arg;
3530 if (write_event(&s))
3531 goto err;
3532 bytes_written+= s.data_written;
3533
3534 if (encrypt_binlog)
3535 {
3536 uint key_version= encryption_key_get_latest_version(ENCRYPTION_KEY_SYSTEM_DATA);
3537 if (key_version == ENCRYPTION_KEY_VERSION_INVALID)
3538 {
3539 sql_print_error("Failed to enable encryption of binary logs");
3540 goto err;
3541 }
3542
3543 if (key_version != ENCRYPTION_KEY_NOT_ENCRYPTED)
3544 {
3545 if (my_random_bytes(crypto.nonce, sizeof(crypto.nonce)))
3546 goto err;
3547
3548 Start_encryption_log_event sele(1, key_version, crypto.nonce);
3549 sele.checksum_alg= s.checksum_alg;
3550 if (write_event(&sele))
3551 goto err;
3552
3553 // Start_encryption_log_event is written, enable the encryption
3554 if (crypto.init(sele.crypto_scheme, key_version))
3555 goto err;
3556 }
3557 }
3558
3559 if (!is_relay_log)
3560 {
3561 char buf[FN_REFLEN];
3562
3563 /*
3564 Output a Gtid_list_log_event at the start of the binlog file.
3565
3566 This is used to quickly determine which GTIDs are found in binlog
3567 files earlier than this one, and which are found in this (or later)
3568 binlogs.
3569
3570 The list gives a mapping from (domain_id, server_id) -> seq_no (so
3571 this means that there is at most one entry for every unique pair
3572 (domain_id, server_id) in the list). It indicates that this seq_no is
3573 the last one found in an earlier binlog file for this (domain_id,
3574 server_id) combination - so any higher seq_no should be search for
3575 from this binlog file, or a later one.
3576
3577 This allows to locate the binlog file containing a given GTID by
3578 scanning backwards, reading just the Gtid_list_log_event at the
3579 start of each file, and scanning only the relevant binlog file when
3580 found, not all binlog files.
3581
3582 The existence of a given entry (domain_id, server_id, seq_no)
3583 guarantees only that this seq_no will not be found in this or any
3584 later binlog file. It does not guarantee that it can be found it an
3585 earlier binlog file, for example the file may have been purged.
3586
3587 If there is no entry for a given (domain_id, server_id) pair, then
3588 it means that no such GTID exists in any earlier binlog. It is
3589 permissible to remove such pair from future Gtid_list_log_events
3590 if all previous binlog files containing such GTIDs have been purged
3591 (though such optimization is not performed at the time of this
3592 writing). So if there is no entry for given GTID it means that such
3593 GTID should be search for in this or later binlog file, same as if
3594 there had been an entry (domain_id, server_id, 0).
3595 */
3596
3597 Gtid_list_log_event gl_ev(&rpl_global_gtid_binlog_state, 0);
3598 if (write_event(&gl_ev))
3599 goto err;
3600
3601 /* Output a binlog checkpoint event at the start of the binlog file. */
3602
3603 /*
3604 Construct an entry in the binlog_xid_count_list for the new binlog
3605 file (we will not link it into the list until we know the new file
3606 is successfully created; otherwise we would have to remove it again
3607 if creation failed, which gets tricky since other threads may have
3608 seen the entry in the meantime - and we do not want to hold
3609 LOCK_xid_list for long periods of time).
3610
3611 Write the current binlog checkpoint into the log, so XA recovery will
3612 know from where to start recovery.
3613 */
3614 size_t off= dirname_length(log_file_name);
3615 uint len= static_cast<uint>(strlen(log_file_name) - off);
3616 new_xid_list_entry= new xid_count_per_binlog(log_file_name+off, len);
3617 if (!new_xid_list_entry)
3618 goto err;
3619
3620 /*
3621 Find the name for the Initial binlog checkpoint.
3622
3623 Normally this will just be the first entry, as we delete entries
3624 when their count drops to zero. But we scan the list to handle any
3625 corner case, eg. for the first binlog file opened after startup, the
3626 list will be empty.
3627 */
3628 mysql_mutex_lock(&LOCK_xid_list);
3629 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
3630 while ((b= it++) && b->xid_count == 0)
3631 ;
3632 mysql_mutex_unlock(&LOCK_xid_list);
3633 if (!b)
3634 b= new_xid_list_entry;
3635 if (b->binlog_name)
3636 strmake(buf, b->binlog_name, b->binlog_name_len);
3637 else
3638 goto err;
3639 Binlog_checkpoint_log_event ev(buf, len);
3640 DBUG_EXECUTE_IF("crash_before_write_checkpoint_event",
3641 flush_io_cache(&log_file);
3642 mysql_file_sync(log_file.file, MYF(MY_WME));
3643 DBUG_SUICIDE(););
3644 if (write_event(&ev))
3645 goto err;
3646 bytes_written+= ev.data_written;
3647 }
3648 }
3649 if (description_event_for_queue &&
3650 description_event_for_queue->binlog_version>=4)
3651 {
3652 /*
3653 This is a relay log written to by the I/O slave thread.
3654 Write the event so that others can later know the format of this relay
3655 log.
3656 Note that this event is very close to the original event from the
3657 master (it has binlog version of the master, event types of the
3658 master), so this is suitable to parse the next relay log's event. It
3659 has been produced by
3660 Format_description_log_event::Format_description_log_event(char* buf,).
3661 Why don't we want to write the description_event_for_queue if this
3662 event is for format<4 (3.23 or 4.x): this is because in that case, the
3663 description_event_for_queue describes the data received from the
3664 master, but not the data written to the relay log (*conversion*),
3665 which is in format 4 (slave's).
3666 */
3667 /*
3668 Set 'created' to 0, so that in next relay logs this event does not
3669 trigger cleaning actions on the slave in
3670 Format_description_log_event::apply_event_impl().
3671 */
3672 description_event_for_queue->created= 0;
3673 /* Don't set log_pos in event header */
3674 description_event_for_queue->set_artificial_event();
3675
3676 if (write_event(description_event_for_queue))
3677 goto err;
3678 bytes_written+= description_event_for_queue->data_written;
3679 }
3680 if (flush_io_cache(&log_file) ||
3681 mysql_file_sync(log_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))
3682 goto err;
3683
3684 my_off_t offset= my_b_tell(&log_file);
3685
3686 if (!is_relay_log)
3687 {
3688 /* update binlog_end_pos so that it can be read by after sync hook */
3689 reset_binlog_end_pos(log_file_name, offset);
3690
3691 mysql_mutex_lock(&LOCK_commit_ordered);
3692 strmake_buf(last_commit_pos_file, log_file_name);
3693 last_commit_pos_offset= offset;
3694 mysql_mutex_unlock(&LOCK_commit_ordered);
3695 }
3696
3697 if (write_file_name_to_index_file)
3698 {
3699 #ifdef HAVE_REPLICATION
3700 #ifdef ENABLED_DEBUG_SYNC
3701 if (current_thd)
3702 DEBUG_SYNC(current_thd, "binlog_open_before_update_index");
3703 #endif
3704 DBUG_EXECUTE_IF("crash_create_critical_before_update_index", DBUG_SUICIDE(););
3705 #endif
3706
3707 DBUG_ASSERT(my_b_inited(&index_file) != 0);
3708 reinit_io_cache(&index_file, WRITE_CACHE,
3709 my_b_filelength(&index_file), 0, 0);
3710 /*
3711 As this is a new log file, we write the file name to the index
3712 file. As every time we write to the index file, we sync it.
3713 */
3714 if (DBUG_EVALUATE_IF("fault_injection_updating_index", 1, 0) ||
3715 my_b_write(&index_file, (uchar*) log_file_name,
3716 strlen(log_file_name)) ||
3717 my_b_write(&index_file, (uchar*) "\n", 1) ||
3718 flush_io_cache(&index_file) ||
3719 mysql_file_sync(index_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))
3720 goto err;
3721
3722 #ifdef HAVE_REPLICATION
3723 DBUG_EXECUTE_IF("crash_create_after_update_index", DBUG_SUICIDE(););
3724 #endif
3725 }
3726 }
3727
3728 if (!is_relay_log)
3729 {
3730 /*
3731 Now the file was created successfully, so we can link in the entry for
3732 the new binlog file in binlog_xid_count_list.
3733 */
3734 mysql_mutex_lock(&LOCK_xid_list);
3735 ++current_binlog_id;
3736 new_xid_list_entry->binlog_id= current_binlog_id;
3737 /* Remove any initial entries with no pending XIDs. */
3738 while ((b= binlog_xid_count_list.head()) && b->xid_count == 0)
3739 {
3740 WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::open(): Removing xid_list_entry for "
3741 "%s (%lu)", b);
3742 delete binlog_xid_count_list.get();
3743 }
3744 mysql_cond_broadcast(&COND_xid_list);
3745 WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::open(): Adding new xid_list_entry for "
3746 "%s (%lu)", new_xid_list_entry);
3747 binlog_xid_count_list.push_back(new_xid_list_entry);
3748 mysql_mutex_unlock(&LOCK_xid_list);
3749
3750 /*
3751 Now that we have synced a new binlog file with an initial Gtid_list
3752 event, it is safe to delete the binlog state file. We will write out
3753 a new, updated file at shutdown, and if we crash before we can recover
3754 the state from the newly written binlog file.
3755
3756 Since the state file will contain out-of-date data as soon as the first
3757 new GTID is binlogged, it is better to remove it, to avoid any risk of
3758 accidentally reading incorrect data later.
3759 */
3760 if (!state_file_deleted)
3761 {
3762 char buf[FN_REFLEN];
3763 fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
3764 MY_UNPACK_FILENAME);
3765 my_delete(buf, MY_SYNC_DIR);
3766 state_file_deleted= true;
3767 }
3768 }
3769
3770 log_state= LOG_OPENED;
3771
3772 #ifdef HAVE_REPLICATION
3773 close_purge_index_file();
3774 #endif
3775
3776 /* Notify the io thread that binlog is rotated to a new file */
3777 if (is_relay_log)
3778 signal_relay_log_update();
3779 else
3780 update_binlog_end_pos();
3781 DBUG_RETURN(0);
3782
3783 err:
3784 int tmp_errno= errno;
3785 #ifdef HAVE_REPLICATION
3786 if (is_inited_purge_index_file())
3787 purge_index_entry(NULL, NULL, need_mutex);
3788 close_purge_index_file();
3789 #endif
3790 sql_print_error(fatal_log_error, (name) ? name : log_name, tmp_errno);
3791 if (new_xid_list_entry)
3792 delete new_xid_list_entry;
3793 if (file >= 0)
3794 mysql_file_close(file, MYF(0));
3795 close(LOG_CLOSE_INDEX);
3796 DBUG_RETURN(1);
3797 }
3798
3799
3800 int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo)
3801 {
3802 mysql_mutex_lock(&LOCK_log);
3803 int ret = raw_get_current_log(linfo);
3804 mysql_mutex_unlock(&LOCK_log);
3805 return ret;
3806 }
3807
3808 int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
3809 {
3810 mysql_mutex_assert_owner(&LOCK_log);
3811 strmake_buf(linfo->log_file_name, log_file_name);
3812 linfo->pos = my_b_tell(&log_file);
3813 return 0;
3814 }
3815
3816 /**
3817 Move all data up in a file in an filename index file.
3818
3819 We do the copy outside of the IO_CACHE as the cache buffers would just
3820 make things slower and more complicated.
3821 In most cases the copy loop should only do one read.
3822
3823 @param index_file File to move
3824 @param offset Move everything from here to beginning
3825
3826 @note
3827 File will be truncated to be 'offset' shorter or filled up with newlines
3828
3829 @retval
3830 0 ok
3831 */
3832
3833 #ifdef HAVE_REPLICATION
3834
3835 static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset)
3836 {
3837 int bytes_read;
3838 my_off_t init_offset= offset;
3839 File file= index_file->file;
3840 uchar io_buf[IO_SIZE*2];
3841 DBUG_ENTER("copy_up_file_and_fill");
3842
3843 for (;; offset+= bytes_read)
3844 {
3845 mysql_file_seek(file, offset, MY_SEEK_SET, MYF(0));
3846 if ((bytes_read= (int) mysql_file_read(file, io_buf, sizeof(io_buf),
3847 MYF(MY_WME)))
3848 < 0)
3849 goto err;
3850 if (!bytes_read)
3851 break; // end of file
3852 mysql_file_seek(file, offset-init_offset, MY_SEEK_SET, MYF(0));
3853 if (mysql_file_write(file, io_buf, bytes_read,
3854 MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
3855 goto err;
3856 }
3857 /* The following will either truncate the file or fill the end with \n' */
3858 if (mysql_file_chsize(file, offset - init_offset, '\n', MYF(MY_WME)) ||
3859 mysql_file_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE)))
3860 goto err;
3861
3862 /* Reset data in old index cache */
3863 reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 1);
3864 DBUG_RETURN(0);
3865
3866 err:
3867 DBUG_RETURN(1);
3868 }
3869
3870 #endif /* HAVE_REPLICATION */
3871
3872 /**
3873 Find the position in the log-index-file for the given log name.
3874
3875 @param linfo Store here the found log file name and position to
3876 the NEXT log file name in the index file.
3877 @param log_name Filename to find in the index file.
3878 Is a null pointer if we want to read the first entry
3879 @param need_lock Set this to 1 if the parent doesn't already have a
3880 lock on LOCK_index
3881
3882 @note
3883 On systems without the truncate function the file will end with one or
3884 more empty lines. These will be ignored when reading the file.
3885
3886 @retval
3887 0 ok
3888 @retval
3889 LOG_INFO_EOF End of log-index-file found
3890 @retval
3891 LOG_INFO_IO Got IO error while reading file
3892 */
3893
3894 int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
3895 bool need_lock)
3896 {
3897 int error= 0;
3898 char *full_fname= linfo->log_file_name;
3899 char full_log_name[FN_REFLEN], fname[FN_REFLEN];
3900 uint log_name_len= 0, fname_len= 0;
3901 DBUG_ENTER("find_log_pos");
3902 full_log_name[0]= full_fname[0]= 0;
3903
3904 /*
3905 Mutex needed because we need to make sure the file pointer does not
3906 move from under our feet
3907 */
3908 if (need_lock)
3909 mysql_mutex_lock(&LOCK_index);
3910 mysql_mutex_assert_owner(&LOCK_index);
3911
3912 // extend relative paths for log_name to be searched
3913 if (log_name)
3914 {
3915 if(normalize_binlog_name(full_log_name, log_name, is_relay_log))
3916 {
3917 error= LOG_INFO_EOF;
3918 goto end;
3919 }
3920 }
3921
3922 log_name_len= log_name ? (uint) strlen(full_log_name) : 0;
3923 DBUG_PRINT("enter", ("log_name: %s, full_log_name: %s",
3924 log_name ? log_name : "NULL", full_log_name));
3925
3926 /* As the file is flushed, we can't get an error here */
3927 (void) reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 0);
3928
3929 for (;;)
3930 {
3931 size_t length;
3932 my_off_t offset= my_b_tell(&index_file);
3933
3934 DBUG_EXECUTE_IF("simulate_find_log_pos_error",
3935 error= LOG_INFO_EOF; break;);
3936 /* If we get 0 or 1 characters, this is the end of the file */
3937 if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
3938 {
3939 /* Did not find the given entry; Return not found or error */
3940 error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
3941 break;
3942 }
3943 if (fname[length-1] != '\n')
3944 continue; // Not a log entry
3945 fname[length-1]= 0; // Remove end \n
3946
3947 // extend relative paths and match against full path
3948 if (normalize_binlog_name(full_fname, fname, is_relay_log))
3949 {
3950 error= LOG_INFO_EOF;
3951 break;
3952 }
3953 fname_len= (uint) strlen(full_fname);
3954
3955 // if the log entry matches, null string matching anything
3956 if (!log_name ||
3957 (log_name_len == fname_len &&
3958 !strncmp(full_fname, full_log_name, log_name_len)))
3959 {
3960 DBUG_PRINT("info", ("Found log file entry"));
3961 linfo->index_file_start_offset= offset;
3962 linfo->index_file_offset = my_b_tell(&index_file);
3963 break;
3964 }
3965 }
3966
3967 end:
3968 if (need_lock)
3969 mysql_mutex_unlock(&LOCK_index);
3970 DBUG_RETURN(error);
3971 }
3972
3973
3974 /**
3975 Find the position in the log-index-file for the given log name.
3976
3977 @param
3978 linfo Store here the next log file name and position to
3979 the file name after that.
3980 @param
3981 need_lock Set this to 1 if the parent doesn't already have a
3982 lock on LOCK_index
3983
3984 @note
3985 - Before calling this function, one has to call find_log_pos()
3986 to set up 'linfo'
3987 - Mutex needed because we need to make sure the file pointer does not move
3988 from under our feet
3989
3990 @retval
3991 0 ok
3992 @retval
3993 LOG_INFO_EOF End of log-index-file found
3994 @retval
3995 LOG_INFO_IO Got IO error while reading file
3996 */
3997
3998 int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock)
3999 {
4000 int error= 0;
4001 size_t length;
4002 char fname[FN_REFLEN];
4003 char *full_fname= linfo->log_file_name;
4004
4005 if (need_lock)
4006 mysql_mutex_lock(&LOCK_index);
4007 mysql_mutex_assert_owner(&LOCK_index);
4008
4009 /* As the file is flushed, we can't get an error here */
4010 (void) reinit_io_cache(&index_file, READ_CACHE, linfo->index_file_offset, 0,
4011 0);
4012
4013 linfo->index_file_start_offset= linfo->index_file_offset;
4014 if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
4015 {
4016 error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
4017 goto err;
4018 }
4019
4020 if (fname[0] != 0)
4021 {
4022 if(normalize_binlog_name(full_fname, fname, is_relay_log))
4023 {
4024 error= LOG_INFO_EOF;
4025 goto err;
4026 }
4027 length= strlen(full_fname);
4028 }
4029
4030 full_fname[length-1]= 0; // kill \n
4031 linfo->index_file_offset= my_b_tell(&index_file);
4032
4033 err:
4034 if (need_lock)
4035 mysql_mutex_unlock(&LOCK_index);
4036 return error;
4037 }
4038
4039
4040 /**
4041 Delete all logs referred to in the index file.
4042
4043 The new index file will only contain this file.
4044
4045 @param thd Thread id. This can be zero in case of resetting
4046 relay logs
4047 @param create_new_log 1 if we should start writing to a new log file
4048 @param next_log_number min number of next log file to use, if possible.
4049
4050 @note
4051 If not called from slave thread, write start event to new log
4052
4053 @retval
4054 0 ok
4055 @retval
4056 1 error
4057 */
4058
4059 bool MYSQL_BIN_LOG::reset_logs(THD *thd, bool create_new_log,
4060 rpl_gtid *init_state, uint32 init_state_len,
4061 ulong next_log_number)
4062 {
4063 LOG_INFO linfo;
4064 bool error=0;
4065 int err;
4066 const char* save_name;
4067 DBUG_ENTER("reset_logs");
4068
4069 if (!is_relay_log)
4070 {
4071 if (init_state && !is_empty_state())
4072 {
4073 my_error(ER_BINLOG_MUST_BE_EMPTY, MYF(0));
4074 DBUG_RETURN(1);
4075 }
4076
4077 /*
4078 Mark that a RESET MASTER is in progress.
4079 This ensures that a binlog checkpoint will not try to write binlog
4080 checkpoint events, which would be useless (as we are deleting the binlog
4081 anyway) and could deadlock, as we are holding LOCK_log.
4082
4083 Wait for any mark_xid_done() calls that might be already running to
4084 complete (mark_xid_done_waiting counter to drop to zero); we need to
4085 do this before we take the LOCK_log to not deadlock.
4086 */
4087 mysql_mutex_lock(&LOCK_xid_list);
4088 reset_master_pending++;
4089 while (mark_xid_done_waiting > 0)
4090 mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4091 mysql_mutex_unlock(&LOCK_xid_list);
4092 }
4093
4094 DEBUG_SYNC_C_IF_THD(thd, "reset_logs_after_set_reset_master_pending");
4095 /*
4096 We need to get both locks to be sure that no one is trying to
4097 write to the index log file.
4098 */
4099 mysql_mutex_lock(&LOCK_log);
4100 mysql_mutex_lock(&LOCK_index);
4101
4102 if (!is_relay_log)
4103 {
4104 /*
4105 We are going to nuke all binary log files.
4106 Without binlog, we cannot XA recover prepared-but-not-committed
4107 transactions in engines. So force a commit checkpoint first.
4108
4109 Note that we take and immediately
4110 release LOCK_after_binlog_sync/LOCK_commit_ordered. This has
4111 the effect to ensure that any on-going group commit (in
4112 trx_group_commit_leader()) has completed before we request the checkpoint,
4113 due to the chaining of LOCK_log and LOCK_commit_ordered in that function.
4114 (We are holding LOCK_log, so no new group commit can start).
4115
4116 Without this, it is possible (though perhaps unlikely) that the RESET
4117 MASTER could run in-between the write to the binlog and the
4118 commit_ordered() in the engine of some transaction, and then a crash
4119 later would leave such transaction not recoverable.
4120 */
4121
4122 mysql_mutex_lock(&LOCK_after_binlog_sync);
4123 mysql_mutex_lock(&LOCK_commit_ordered);
4124 mysql_mutex_unlock(&LOCK_after_binlog_sync);
4125 mysql_mutex_unlock(&LOCK_commit_ordered);
4126
4127 mark_xids_active(current_binlog_id, 1);
4128 do_checkpoint_request(current_binlog_id);
4129
4130 /* Now wait for all checkpoint requests and pending unlog() to complete. */
4131 mysql_mutex_lock(&LOCK_xid_list);
4132 for (;;)
4133 {
4134 if (is_xidlist_idle_nolock())
4135 break;
4136 /*
4137 Wait until signalled that one more binlog dropped to zero, then check
4138 again.
4139 */
4140 mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4141 }
4142
4143 /*
4144 Now all XIDs are fully flushed to disk, and we are holding LOCK_log so
4145 no new ones will be written. So we can proceed to delete the logs.
4146 */
4147 mysql_mutex_unlock(&LOCK_xid_list);
4148 }
4149
4150 /* Save variables so that we can reopen the log */
4151 save_name=name;
4152 name=0; // Protect against free
4153 close(LOG_CLOSE_TO_BE_OPENED);
4154
4155 /*
4156 First delete all old log files and then update the index file.
4157 As we first delete the log files and do not use sort of logging,
4158 a crash may lead to an inconsistent state where the index has
4159 references to non-existent files.
4160
4161 We need to invert the steps and use the purge_index_file methods
4162 in order to make the operation safe.
4163 */
4164
4165 if ((err= find_log_pos(&linfo, NullS, 0)) != 0)
4166 {
4167 uint errcode= purge_log_get_error_code(err);
4168 sql_print_error("Failed to locate old binlog or relay log files");
4169 my_message(errcode, ER_THD_OR_DEFAULT(thd, errcode), MYF(0));
4170 error= 1;
4171 goto err;
4172 }
4173
4174 for (;;)
4175 {
4176 if (unlikely((error= my_delete(linfo.log_file_name, MYF(0)))))
4177 {
4178 if (my_errno == ENOENT)
4179 {
4180 if (thd)
4181 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4182 ER_LOG_PURGE_NO_FILE,
4183 ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4184 linfo.log_file_name);
4185
4186 sql_print_information("Failed to delete file '%s'",
4187 linfo.log_file_name);
4188 my_errno= 0;
4189 error= 0;
4190 }
4191 else
4192 {
4193 if (thd)
4194 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4195 ER_BINLOG_PURGE_FATAL_ERR,
4196 "a problem with deleting %s; "
4197 "consider examining correspondence "
4198 "of your binlog index file "
4199 "to the actual binlog files",
4200 linfo.log_file_name);
4201 error= 1;
4202 goto err;
4203 }
4204 }
4205 if (find_next_log(&linfo, 0))
4206 break;
4207 }
4208
4209 if (!is_relay_log)
4210 {
4211 if (init_state)
4212 rpl_global_gtid_binlog_state.load(init_state, init_state_len);
4213 else
4214 rpl_global_gtid_binlog_state.reset();
4215 }
4216
4217 /* Start logging with a new file */
4218 close(LOG_CLOSE_INDEX | LOG_CLOSE_TO_BE_OPENED);
4219 // Reset (open will update)
4220 if (unlikely((error= my_delete(index_file_name, MYF(0)))))
4221 {
4222 if (my_errno == ENOENT)
4223 {
4224 if (thd)
4225 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4226 ER_LOG_PURGE_NO_FILE,
4227 ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4228 index_file_name);
4229 sql_print_information("Failed to delete file '%s'",
4230 index_file_name);
4231 my_errno= 0;
4232 error= 0;
4233 }
4234 else
4235 {
4236 if (thd)
4237 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4238 ER_BINLOG_PURGE_FATAL_ERR,
4239 "a problem with deleting %s; "
4240 "consider examining correspondence "
4241 "of your binlog index file "
4242 "to the actual binlog files",
4243 index_file_name);
4244 error= 1;
4245 goto err;
4246 }
4247 }
4248 if (create_new_log && !open_index_file(index_file_name, 0, FALSE))
4249 if (unlikely((error= open(save_name, log_type, 0, next_log_number,
4250 io_cache_type, max_size, 0, FALSE))))
4251 goto err;
4252 my_free((void *) save_name);
4253
4254 err:
4255 if (error == 1)
4256 name= const_cast<char*>(save_name);
4257
4258 if (!is_relay_log)
4259 {
4260 xid_count_per_binlog *b;
4261 /*
4262 Remove all entries in the xid_count list except the last.
4263 Normally we will just be deleting all the entries that we waited for to
4264 drop to zero above. But if we fail during RESET MASTER for some reason
4265 then we will not have created any new log file, and we may keep the last
4266 of the old entries.
4267 */
4268 mysql_mutex_lock(&LOCK_xid_list);
4269 for (;;)
4270 {
4271 b= binlog_xid_count_list.head();
4272 DBUG_ASSERT(b /* List can never become empty. */);
4273 if (b->binlog_id == current_binlog_id)
4274 break;
4275 DBUG_ASSERT(b->xid_count == 0);
4276 WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::reset_logs(): Removing "
4277 "xid_list_entry for %s (%lu)", b);
4278 delete binlog_xid_count_list.get();
4279 }
4280 mysql_cond_broadcast(&COND_xid_list);
4281 reset_master_pending--;
4282 mysql_mutex_unlock(&LOCK_xid_list);
4283 }
4284
4285 mysql_mutex_unlock(&LOCK_index);
4286 mysql_mutex_unlock(&LOCK_log);
4287 DBUG_RETURN(error);
4288 }
4289
4290
4291 void MYSQL_BIN_LOG::wait_for_last_checkpoint_event()
4292 {
4293 mysql_mutex_lock(&LOCK_xid_list);
4294 for (;;)
4295 {
4296 if (binlog_xid_count_list.is_last(binlog_xid_count_list.head()))
4297 break;
4298 mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4299 }
4300 mysql_mutex_unlock(&LOCK_xid_list);
4301
4302 /*
4303 LOCK_xid_list and LOCK_log are chained, so the LOCK_log will only be
4304 obtained after mark_xid_done() has written the last checkpoint event.
4305 */
4306 mysql_mutex_lock(&LOCK_log);
4307 mysql_mutex_unlock(&LOCK_log);
4308 }
4309
4310
4311 /**
4312 Delete relay log files prior to rli->group_relay_log_name
4313 (i.e. all logs which are not involved in a non-finished group
4314 (transaction)), remove them from the index file and start on next
4315 relay log.
4316
4317 IMPLEMENTATION
4318
4319 - You must hold rli->data_lock before calling this function, since
4320 it writes group_relay_log_pos and similar fields of
4321 Relay_log_info.
4322 - Protects index file with LOCK_index
4323 - Delete relevant relay log files
4324 - Copy all file names after these ones to the front of the index file
4325 - If the OS has truncate, truncate the file, else fill it with \n'
4326 - Read the next file name from the index file and store in rli->linfo
4327
4328 @param rli Relay log information
4329 @param included If false, all relay logs that are strictly before
4330 rli->group_relay_log_name are deleted ; if true, the
4331 latter is deleted too (i.e. all relay logs
4332 read by the SQL slave thread are deleted).
4333
4334 @note
4335 - This is only called from the slave SQL thread when it has read
4336 all commands from a relay log and want to switch to a new relay log.
4337 - When this happens, we can be in an active transaction as
4338 a transaction can span over two relay logs
4339 (although it is always written as a single block to the master's binary
4340 log, hence cannot span over two master's binary logs).
4341
4342 @retval
4343 0 ok
4344 @retval
4345 LOG_INFO_EOF End of log-index-file found
4346 @retval
4347 LOG_INFO_SEEK Could not allocate IO cache
4348 @retval
4349 LOG_INFO_IO Got IO error while reading file
4350 */
4351
4352 #ifdef HAVE_REPLICATION
4353
4354 int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
4355 {
4356 int error, errcode;
4357 char *to_purge_if_included= NULL;
4358 inuse_relaylog *ir;
4359 ulonglong log_space_reclaimed= 0;
4360 DBUG_ENTER("purge_first_log");
4361
4362 DBUG_ASSERT(is_open());
4363 DBUG_ASSERT(rli->slave_running == MYSQL_SLAVE_RUN_NOT_CONNECT);
4364 DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
4365
4366 mysql_mutex_assert_owner(&rli->data_lock);
4367
4368 mysql_mutex_lock(&LOCK_index);
4369
4370 ir= rli->inuse_relaylog_list;
4371 while (ir)
4372 {
4373 inuse_relaylog *next= ir->next;
4374 if (!ir->completed || ir->dequeued_count < ir->queued_count)
4375 {
4376 included= false;
4377 break;
4378 }
4379 if (!included && !strcmp(ir->name, rli->group_relay_log_name))
4380 break;
4381 if (!next)
4382 {
4383 rli->last_inuse_relaylog= NULL;
4384 included= 1;
4385 to_purge_if_included= my_strdup(ir->name, MYF(0));
4386 }
4387 rli->free_inuse_relaylog(ir);
4388 ir= next;
4389 }
4390 rli->inuse_relaylog_list= ir;
4391 if (ir)
4392 to_purge_if_included= my_strdup(ir->name, MYF(0));
4393
4394 /*
4395 Read the next log file name from the index file and pass it back to
4396 the caller.
4397 */
4398 if (unlikely((error=find_log_pos(&rli->linfo, rli->event_relay_log_name,
4399 0))) ||
4400 unlikely((error=find_next_log(&rli->linfo, 0))))
4401 {
4402 sql_print_error("next log error: %d offset: %llu log: %s included: %d",
4403 error, rli->linfo.index_file_offset,
4404 rli->event_relay_log_name, included);
4405 goto err;
4406 }
4407
4408 /*
4409 Reset rli's coordinates to the current log.
4410 */
4411 rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
4412 strmake_buf(rli->event_relay_log_name,rli->linfo.log_file_name);
4413
4414 /*
4415 If we removed the rli->group_relay_log_name file,
4416 we must update the rli->group* coordinates, otherwise do not touch it as the
4417 group's execution is not finished (e.g. COMMIT not executed)
4418 */
4419 if (included)
4420 {
4421 rli->group_relay_log_pos = BIN_LOG_HEADER_SIZE;
4422 strmake_buf(rli->group_relay_log_name,rli->linfo.log_file_name);
4423 rli->notify_group_relay_log_name_update();
4424 }
4425
4426 /* Store where we are in the new file for the execution thread */
4427 if (rli->flush())
4428 error= LOG_INFO_IO;
4429
4430 DBUG_EXECUTE_IF("crash_before_purge_logs", DBUG_SUICIDE(););
4431
4432 rli->relay_log.purge_logs(to_purge_if_included, included,
4433 0, 0, &log_space_reclaimed);
4434
4435 mysql_mutex_lock(&rli->log_space_lock);
4436 my_atomic_add64_explicit((volatile int64*)(&rli->log_space_total),
4437 (-(int64)log_space_reclaimed),
4438 MY_MEMORY_ORDER_RELAXED);
4439 mysql_cond_broadcast(&rli->log_space_cond);
4440 mysql_mutex_unlock(&rli->log_space_lock);
4441
4442 /*
4443 * Need to update the log pos because purge logs has been called
4444 * after fetching initially the log pos at the beginning of the method.
4445 */
4446 if ((errcode= find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)))
4447 {
4448 sql_print_error("next log error: %d offset: %llu log: %s included: %d",
4449 errcode, rli->linfo.index_file_offset,
4450 rli->group_relay_log_name, included);
4451 goto err;
4452 }
4453
4454 /* If included was passed, rli->linfo should be the first entry. */
4455 DBUG_ASSERT(!included || rli->linfo.index_file_start_offset == 0);
4456
4457 err:
4458 my_free(to_purge_if_included);
4459 mysql_mutex_unlock(&LOCK_index);
4460 DBUG_RETURN(error);
4461 }
4462
4463 /**
4464 Update log index_file.
4465 */
4466
4467 int MYSQL_BIN_LOG::update_log_index(LOG_INFO* log_info, bool need_update_threads)
4468 {
4469 if (copy_up_file_and_fill(&index_file, log_info->index_file_start_offset))
4470 return LOG_INFO_IO;
4471
4472 // now update offsets in index file for running threads
4473 if (need_update_threads)
4474 adjust_linfo_offsets(log_info->index_file_start_offset);
4475 return 0;
4476 }
4477
4478 /**
4479 Remove all logs before the given log from disk and from the index file.
4480
4481 @param to_log Delete all log file name before this file.
4482 @param included If true, to_log is deleted too.
4483 @param need_mutex
4484 @param need_update_threads If we want to update the log coordinates of
4485 all threads. False for relay logs, true otherwise.
4486 @param reclaimeed_log_space If not null, increment this variable to
4487 the amount of log space freed
4488
4489 @note
4490 If any of the logs before the deleted one is in use,
4491 only purge logs up to this one.
4492
4493 @retval
4494 0 ok
4495 @retval
4496 LOG_INFO_EOF to_log not found
4497 LOG_INFO_EMFILE too many files opened
4498 LOG_INFO_FATAL if any other than ENOENT error from
4499 mysql_file_stat() or mysql_file_delete()
4500 */
4501
4502 int MYSQL_BIN_LOG::purge_logs(const char *to_log,
4503 bool included,
4504 bool need_mutex,
4505 bool need_update_threads,
4506 ulonglong *reclaimed_space)
4507 {
4508 int error= 0;
4509 bool exit_loop= 0;
4510 LOG_INFO log_info;
4511 THD *thd= current_thd;
4512 DBUG_ENTER("purge_logs");
4513 DBUG_PRINT("info",("to_log= %s",to_log));
4514
4515 if (need_mutex)
4516 mysql_mutex_lock(&LOCK_index);
4517 if (unlikely((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/))) )
4518 {
4519 sql_print_error("MYSQL_BIN_LOG::purge_logs was called with file %s not "
4520 "listed in the index.", to_log);
4521 goto err;
4522 }
4523
4524 if (unlikely((error= open_purge_index_file(TRUE))))
4525 {
4526 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to sync the index file.");
4527 goto err;
4528 }
4529
4530 /*
4531 File name exists in index file; delete until we find this file
4532 or a file that is used.
4533 */
4534 if (unlikely((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/))))
4535 goto err;
4536 while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
4537 can_purge_log(log_info.log_file_name))
4538 {
4539 if (unlikely((error= register_purge_index_entry(log_info.log_file_name))))
4540 {
4541 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to copy %s to register file.",
4542 log_info.log_file_name);
4543 goto err;
4544 }
4545
4546 if (find_next_log(&log_info, 0) || exit_loop)
4547 break;
4548 }
4549
4550 DBUG_EXECUTE_IF("crash_purge_before_update_index", DBUG_SUICIDE(););
4551
4552 if (unlikely((error= sync_purge_index_file())))
4553 {
4554 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to flush register file.");
4555 goto err;
4556 }
4557
4558 /* We know how many files to delete. Update index file. */
4559 if (unlikely((error=update_log_index(&log_info, need_update_threads))))
4560 {
4561 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to update the index file");
4562 goto err;
4563 }
4564
4565 DBUG_EXECUTE_IF("crash_purge_critical_after_update_index", DBUG_SUICIDE(););
4566
4567 err:
4568 /* Read each entry from purge_index_file and delete the file. */
4569 if (is_inited_purge_index_file() &&
4570 (error= purge_index_entry(thd, reclaimed_space, FALSE)))
4571 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to process registered files"
4572 " that would be purged.");
4573 close_purge_index_file();
4574
4575 DBUG_EXECUTE_IF("crash_purge_non_critical_after_update_index", DBUG_SUICIDE(););
4576
4577 if (need_mutex)
4578 mysql_mutex_unlock(&LOCK_index);
4579 DBUG_RETURN(error);
4580 }
4581
4582 int MYSQL_BIN_LOG::set_purge_index_file_name(const char *base_file_name)
4583 {
4584 int error= 0;
4585 DBUG_ENTER("MYSQL_BIN_LOG::set_purge_index_file_name");
4586 if (fn_format(purge_index_file_name, base_file_name, mysql_data_home,
4587 ".~rec~", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
4588 MY_REPLACE_EXT)) == NULL)
4589 {
4590 error= 1;
4591 sql_print_error("MYSQL_BIN_LOG::set_purge_index_file_name failed to set "
4592 "file name.");
4593 }
4594 DBUG_RETURN(error);
4595 }
4596
4597 int MYSQL_BIN_LOG::open_purge_index_file(bool destroy)
4598 {
4599 int error= 0;
4600 File file= -1;
4601
4602 DBUG_ENTER("MYSQL_BIN_LOG::open_purge_index_file");
4603
4604 if (destroy)
4605 close_purge_index_file();
4606
4607 if (!my_b_inited(&purge_index_file))
4608 {
4609 if ((file= my_open(purge_index_file_name, O_RDWR | O_CREAT | O_BINARY,
4610 MYF(MY_WME | ME_WAITTANG))) < 0 ||
4611 init_io_cache(&purge_index_file, file, IO_SIZE,
4612 (destroy ? WRITE_CACHE : READ_CACHE),
4613 0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
4614 {
4615 error= 1;
4616 sql_print_error("MYSQL_BIN_LOG::open_purge_index_file failed to open register "
4617 " file.");
4618 }
4619 }
4620 DBUG_RETURN(error);
4621 }
4622
4623 int MYSQL_BIN_LOG::close_purge_index_file()
4624 {
4625 int error= 0;
4626
4627 DBUG_ENTER("MYSQL_BIN_LOG::close_purge_index_file");
4628
4629 if (my_b_inited(&purge_index_file))
4630 {
4631 end_io_cache(&purge_index_file);
4632 error= my_close(purge_index_file.file, MYF(0));
4633 }
4634 my_delete(purge_index_file_name, MYF(0));
4635 bzero((char*) &purge_index_file, sizeof(purge_index_file));
4636
4637 DBUG_RETURN(error);
4638 }
4639
4640 bool MYSQL_BIN_LOG::is_inited_purge_index_file()
4641 {
4642 return my_b_inited(&purge_index_file);
4643 }
4644
4645 int MYSQL_BIN_LOG::sync_purge_index_file()
4646 {
4647 int error= 0;
4648 DBUG_ENTER("MYSQL_BIN_LOG::sync_purge_index_file");
4649
4650 if (unlikely((error= flush_io_cache(&purge_index_file))) ||
4651 unlikely((error= my_sync(purge_index_file.file,
4652 MYF(MY_WME | MY_SYNC_FILESIZE)))))
4653 DBUG_RETURN(error);
4654
4655 DBUG_RETURN(error);
4656 }
4657
4658 int MYSQL_BIN_LOG::register_purge_index_entry(const char *entry)
4659 {
4660 int error= 0;
4661 DBUG_ENTER("MYSQL_BIN_LOG::register_purge_index_entry");
4662
4663 if (unlikely((error=my_b_write(&purge_index_file, (const uchar*)entry,
4664 strlen(entry)))) ||
4665 unlikely((error=my_b_write(&purge_index_file, (const uchar*)"\n", 1))))
4666 DBUG_RETURN (error);
4667
4668 DBUG_RETURN(error);
4669 }
4670
4671 int MYSQL_BIN_LOG::register_create_index_entry(const char *entry)
4672 {
4673 DBUG_ENTER("MYSQL_BIN_LOG::register_create_index_entry");
4674 DBUG_RETURN(register_purge_index_entry(entry));
4675 }
4676
4677 int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *reclaimed_space,
4678 bool need_mutex)
4679 {
4680 DBUG_ENTER("MYSQL_BIN_LOG:purge_index_entry");
4681 MY_STAT s;
4682 int error= 0;
4683 LOG_INFO log_info;
4684 LOG_INFO check_log_info;
4685
4686 DBUG_ASSERT(my_b_inited(&purge_index_file));
4687
4688 if (unlikely((error= reinit_io_cache(&purge_index_file, READ_CACHE, 0, 0,
4689 0))))
4690 {
4691 sql_print_error("MYSQL_BIN_LOG::purge_index_entry failed to reinit register file "
4692 "for read");
4693 goto err;
4694 }
4695
4696 for (;;)
4697 {
4698 size_t length;
4699
4700 if ((length=my_b_gets(&purge_index_file, log_info.log_file_name,
4701 FN_REFLEN)) <= 1)
4702 {
4703 if (purge_index_file.error)
4704 {
4705 error= purge_index_file.error;
4706 sql_print_error("MYSQL_BIN_LOG::purge_index_entry error %d reading from "
4707 "register file.", error);
4708 goto err;
4709 }
4710
4711 /* Reached EOF */
4712 break;
4713 }
4714
4715 /* Get rid of the trailing '\n' */
4716 log_info.log_file_name[length-1]= 0;
4717
4718 if (unlikely(!mysql_file_stat(m_key_file_log, log_info.log_file_name, &s,
4719 MYF(0))))
4720 {
4721 if (my_errno == ENOENT)
4722 {
4723 /*
4724 It's not fatal if we can't stat a log file that does not exist;
4725 If we could not stat, we won't delete.
4726 */
4727 if (thd)
4728 {
4729 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4730 ER_LOG_PURGE_NO_FILE, ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4731 log_info.log_file_name);
4732 }
4733 sql_print_information("Failed to execute mysql_file_stat on file '%s'",
4734 log_info.log_file_name);
4735 my_errno= 0;
4736 }
4737 else
4738 {
4739 /*
4740 Other than ENOENT are fatal
4741 */
4742 if (thd)
4743 {
4744 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4745 ER_BINLOG_PURGE_FATAL_ERR,
4746 "a problem with getting info on being purged %s; "
4747 "consider examining correspondence "
4748 "of your binlog index file "
4749 "to the actual binlog files",
4750 log_info.log_file_name);
4751 }
4752 else
4753 {
4754 sql_print_information("Failed to delete log file '%s'; "
4755 "consider examining correspondence "
4756 "of your binlog index file "
4757 "to the actual binlog files",
4758 log_info.log_file_name);
4759 }
4760 error= LOG_INFO_FATAL;
4761 goto err;
4762 }
4763 }
4764 else
4765 {
4766 if (unlikely((error= find_log_pos(&check_log_info,
4767 log_info.log_file_name, need_mutex))))
4768 {
4769 if (error != LOG_INFO_EOF)
4770 {
4771 if (thd)
4772 {
4773 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4774 ER_BINLOG_PURGE_FATAL_ERR,
4775 "a problem with deleting %s and "
4776 "reading the binlog index file",
4777 log_info.log_file_name);
4778 }
4779 else
4780 {
4781 sql_print_information("Failed to delete file '%s' and "
4782 "read the binlog index file",
4783 log_info.log_file_name);
4784 }
4785 goto err;
4786 }
4787
4788 error= 0;
4789
4790 DBUG_PRINT("info",("purging %s",log_info.log_file_name));
4791 if (!my_delete(log_info.log_file_name, MYF(0)))
4792 {
4793 if (reclaimed_space)
4794 *reclaimed_space+= s.st_size;
4795 }
4796 else
4797 {
4798 if (my_errno == ENOENT)
4799 {
4800 if (thd)
4801 {
4802 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4803 ER_LOG_PURGE_NO_FILE, ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4804 log_info.log_file_name);
4805 }
4806 sql_print_information("Failed to delete file '%s'",
4807 log_info.log_file_name);
4808 my_errno= 0;
4809 }
4810 else
4811 {
4812 if (thd)
4813 {
4814 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4815 ER_BINLOG_PURGE_FATAL_ERR,
4816 "a problem with deleting %s; "
4817 "consider examining correspondence "
4818 "of your binlog index file "
4819 "to the actual binlog files",
4820 log_info.log_file_name);
4821 }
4822 else
4823 {
4824 sql_print_information("Failed to delete file '%s'; "
4825 "consider examining correspondence "
4826 "of your binlog index file "
4827 "to the actual binlog files",
4828 log_info.log_file_name);
4829 }
4830 if (my_errno == EMFILE)
4831 {
4832 DBUG_PRINT("info",
4833 ("my_errno: %d, set ret = LOG_INFO_EMFILE", my_errno));
4834 error= LOG_INFO_EMFILE;
4835 goto err;
4836 }
4837 error= LOG_INFO_FATAL;
4838 goto err;
4839 }
4840 }
4841 }
4842 }
4843 }
4844
4845 err:
4846 DBUG_RETURN(error);
4847 }
4848
4849 /**
4850 Remove all logs before the given file date from disk and from the
4851 index file.
4852
4853 @param thd Thread pointer
4854 @param purge_time Delete all log files before given date.
4855
4856 @note
4857 If any of the logs before the deleted one is in use,
4858 only purge logs up to this one.
4859
4860 @retval
4861 0 ok
4862 @retval
4863 LOG_INFO_PURGE_NO_ROTATE Binary file that can't be rotated
4864 LOG_INFO_FATAL if any other than ENOENT error from
4865 mysql_file_stat() or mysql_file_delete()
4866 */
4867
4868 int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
4869 {
4870 int error;
4871 char to_log[FN_REFLEN];
4872 LOG_INFO log_info;
4873 MY_STAT stat_area;
4874 THD *thd= current_thd;
4875 DBUG_ENTER("purge_logs_before_date");
4876
4877 mysql_mutex_lock(&LOCK_index);
4878 to_log[0]= 0;
4879
4880 if (unlikely((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/))))
4881 goto err;
4882
4883 while (strcmp(log_file_name, log_info.log_file_name) &&
4884 can_purge_log(log_info.log_file_name))
4885 {
4886 if (!mysql_file_stat(m_key_file_log,
4887 log_info.log_file_name, &stat_area, MYF(0)))
4888 {
4889 if (my_errno == ENOENT)
4890 {
4891 /*
4892 It's not fatal if we can't stat a log file that does not exist.
4893 */
4894 my_errno= 0;
4895 }
4896 else
4897 {
4898 /*
4899 Other than ENOENT are fatal
4900 */
4901 if (thd)
4902 {
4903 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4904 ER_BINLOG_PURGE_FATAL_ERR,
4905 "a problem with getting info on being purged %s; "
4906 "consider examining correspondence "
4907 "of your binlog index file "
4908 "to the actual binlog files",
4909 log_info.log_file_name);
4910 }
4911 else
4912 {
4913 sql_print_information("Failed to delete log file '%s'",
4914 log_info.log_file_name);
4915 }
4916 error= LOG_INFO_FATAL;
4917 goto err;
4918 }
4919 }
4920 else
4921 {
4922 if (stat_area.st_mtime < purge_time)
4923 strmake_buf(to_log, log_info.log_file_name);
4924 else
4925 break;
4926 }
4927 if (find_next_log(&log_info, 0))
4928 break;
4929 }
4930
4931 error= (to_log[0] ? purge_logs(to_log, 1, 0, 1, (ulonglong *) 0) : 0);
4932
4933 err:
4934 mysql_mutex_unlock(&LOCK_index);
4935 DBUG_RETURN(error);
4936 }
4937
4938
4939 bool
4940 MYSQL_BIN_LOG::can_purge_log(const char *log_file_name_arg)
4941 {
4942 xid_count_per_binlog *b;
4943
4944 if (is_active(log_file_name_arg))
4945 return false;
4946 mysql_mutex_lock(&LOCK_xid_list);
4947 {
4948 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
4949 while ((b= it++) &&
4950 0 != strncmp(log_file_name_arg+dirname_length(log_file_name_arg),
4951 b->binlog_name, b->binlog_name_len))
4952 ;
4953 }
4954 mysql_mutex_unlock(&LOCK_xid_list);
4955 if (b)
4956 return false;
4957 return !log_in_use(log_file_name_arg);
4958 }
4959 #endif /* HAVE_REPLICATION */
4960
4961
4962 bool
4963 MYSQL_BIN_LOG::is_xidlist_idle()
4964 {
4965 bool res;
4966 mysql_mutex_lock(&LOCK_xid_list);
4967 res= is_xidlist_idle_nolock();
4968 mysql_mutex_unlock(&LOCK_xid_list);
4969 return res;
4970 }
4971
4972
4973 bool
4974 MYSQL_BIN_LOG::is_xidlist_idle_nolock()
4975 {
4976 xid_count_per_binlog *b;
4977
4978 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
4979 while ((b= it++))
4980 {
4981 if (b->xid_count > 0)
4982 return false;
4983 }
4984 return true;
4985 }
4986
4987 #ifdef WITH_WSREP
4988 inline bool
4989 is_gtid_cached_internal(IO_CACHE *file)
4990 {
4991 uchar data[EVENT_TYPE_OFFSET+1];
4992 bool result= false;
4993 my_off_t write_pos= my_b_tell(file);
4994 if (reinit_io_cache(file, READ_CACHE, 0, 0, 0))
4995 return false;
4996 /*
4997 In the cache we have gtid event if , below condition is true,
4998 */
4999 my_b_read(file, data, sizeof(data));
5000 uint event_type= (uchar)data[EVENT_TYPE_OFFSET];
5001 if (event_type == GTID_LOG_EVENT)
5002 result= true;
5003 /*
5004 Cleanup , Why because we have not read the full buffer
5005 and this will cause next to next reinit_io_cache(called in write_cache)
5006 to make cache empty.
5007 */
5008 file->read_pos= file->read_end;
5009 if (reinit_io_cache(file, WRITE_CACHE, write_pos, 0, 0))
5010 return false;
5011 return result;
5012 }
5013 #endif
5014
5015 #ifdef WITH_WSREP
5016 inline bool
5017 MYSQL_BIN_LOG::is_gtid_cached(THD *thd)
5018 {
5019 binlog_cache_mngr *mngr= (binlog_cache_mngr *) thd_get_ha_data(
5020 thd, binlog_hton);
5021 if (!mngr)
5022 return false;
5023 binlog_cache_data *cache_trans= mngr->get_binlog_cache_data(
5024 use_trans_cache(thd, true));
5025 binlog_cache_data *cache_stmt= mngr->get_binlog_cache_data(
5026 use_trans_cache(thd, false));
5027 if (cache_trans && !cache_trans->empty() &&
5028 is_gtid_cached_internal(&cache_trans->cache_log))
5029 return true;
5030 if (cache_stmt && !cache_stmt->empty() &&
5031 is_gtid_cached_internal(&cache_stmt->cache_log))
5032 return true;
5033 return false;
5034 }
5035 #endif
5036 /**
5037 Create a new log file name.
5038
5039 @param buf buf of at least FN_REFLEN where new name is stored
5040
5041 @note
5042 If file name will be longer then FN_REFLEN it will be truncated
5043 */
5044
5045 void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
5046 {
5047 size_t dir_len = dirname_length(log_file_name);
5048 if (dir_len >= FN_REFLEN)
5049 dir_len=FN_REFLEN-1;
5050 strnmov(buf, log_file_name, dir_len);
5051 strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
5052 }
5053
5054
5055 /**
5056 Check if we are writing/reading to the given log file.
5057 */
5058
5059 bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
5060 {
5061 /**
5062 * there should/must be mysql_mutex_assert_owner(&LOCK_log) here...
5063 * but code violates this! (scary monsters and super creeps!)
5064 *
5065 * example stacktrace:
5066 * #8 MYSQL_BIN_LOG::is_active
5067 * #9 MYSQL_BIN_LOG::can_purge_log
5068 * #10 MYSQL_BIN_LOG::purge_logs
5069 * #11 MYSQL_BIN_LOG::purge_first_log
5070 * #12 next_event
5071 * #13 exec_relay_log_event
5072 *
5073 * I didn't investigate if this is ligit...(i.e if my comment is wrong)
5074 */
5075 return !strcmp(log_file_name, log_file_name_arg);
5076 }
5077
5078
5079 /*
5080 Wrappers around new_file_impl to avoid using argument
5081 to control locking. The argument 1) less readable 2) breaks
5082 incapsulation 3) allows external access to the class without
5083 a lock (which is not possible with private new_file_without_locking
5084 method).
5085
5086 @retval
5087 nonzero - error
5088 */
5089
5090 int MYSQL_BIN_LOG::new_file()
5091 {
5092 return new_file_impl(1);
5093 }
5094
5095 /*
5096 @retval
5097 nonzero - error
5098 */
5099 int MYSQL_BIN_LOG::new_file_without_locking()
5100 {
5101 return new_file_impl(0);
5102 }
5103
5104
5105 /**
5106 Start writing to a new log file or reopen the old file.
5107
5108 @param need_lock Set to 1 if caller has not locked LOCK_log
5109
5110 @retval
5111 nonzero - error
5112
5113 @note
5114 The new file name is stored last in the index file
5115 */
5116
5117 int MYSQL_BIN_LOG::new_file_impl(bool need_lock)
5118 {
5119 int error= 0, close_on_error= FALSE;
5120 char new_name[FN_REFLEN], *new_name_ptr, *old_name, *file_to_open;
5121 uint close_flag;
5122 bool delay_close= false;
5123 File UNINIT_VAR(old_file);
5124 DBUG_ENTER("MYSQL_BIN_LOG::new_file_impl");
5125
5126 if (need_lock)
5127 mysql_mutex_lock(&LOCK_log);
5128 mysql_mutex_assert_owner(&LOCK_log);
5129
5130 if (!is_open())
5131 {
5132 DBUG_PRINT("info",("log is closed"));
5133 mysql_mutex_unlock(&LOCK_log);
5134 DBUG_RETURN(error);
5135 }
5136
5137 mysql_mutex_lock(&LOCK_index);
5138
5139 /* Reuse old name if not binlog and not update log */
5140 new_name_ptr= name;
5141
5142 /*
5143 If user hasn't specified an extension, generate a new log name
5144 We have to do this here and not in open as we want to store the
5145 new file name in the current binary log file.
5146 */
5147 if (unlikely((error= generate_new_name(new_name, name, 0))))
5148 {
5149 #ifdef ENABLE_AND_FIX_HANG
5150 close_on_error= TRUE;
5151 #endif
5152 goto end;
5153 }
5154 new_name_ptr=new_name;
5155
5156 if (log_type == LOG_BIN)
5157 {
5158 {
5159 /*
5160 We log the whole file name for log file as the user may decide
5161 to change base names at some point.
5162 */
5163 Rotate_log_event r(new_name+dirname_length(new_name), 0, LOG_EVENT_OFFSET,
5164 is_relay_log ? Rotate_log_event::RELAY_LOG : 0);
5165 /*
5166 The current relay-log's closing Rotate event must have checksum
5167 value computed with an algorithm of the last relay-logged FD event.
5168 */
5169 if (is_relay_log)
5170 r.checksum_alg= relay_log_checksum_alg;
5171 DBUG_ASSERT(!is_relay_log || relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
5172 if(DBUG_EVALUATE_IF("fault_injection_new_file_rotate_event", (error=close_on_error=TRUE), FALSE) ||
5173 (error= write_event(&r)))
5174 {
5175 DBUG_EXECUTE_IF("fault_injection_new_file_rotate_event", errno=2;);
5176 close_on_error= TRUE;
5177 my_printf_error(ER_ERROR_ON_WRITE,
5178 ER_THD_OR_DEFAULT(current_thd, ER_CANT_OPEN_FILE),
5179 MYF(ME_FATALERROR), name, errno);
5180 goto end;
5181 }
5182 bytes_written += r.data_written;
5183 }
5184 }
5185
5186 /*
5187 Update needs to be signalled even if there is no rotate event
5188 log rotation should give the waiting thread a signal to
5189 discover EOF and move on to the next log.
5190 */
5191 if (unlikely((error= flush_io_cache(&log_file))))
5192 {
5193 close_on_error= TRUE;
5194 goto end;
5195 }
5196 update_binlog_end_pos();
5197
5198 old_name=name;
5199 name=0; // Don't free name
5200 close_flag= LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX;
5201 if (!is_relay_log)
5202 {
5203 /*
5204 We need to keep the old binlog file open (and marked as in-use) until
5205 the new one is fully created and synced to disk and index. Otherwise we
5206 leave a window where if we crash, there is no binlog file marked as
5207 crashed for server restart to detect the need for recovery.
5208 */
5209 old_file= log_file.file;
5210 close_flag|= LOG_CLOSE_DELAYED_CLOSE;
5211 delay_close= true;
5212 }
5213 close(close_flag);
5214 if (log_type == LOG_BIN && checksum_alg_reset != BINLOG_CHECKSUM_ALG_UNDEF)
5215 {
5216 DBUG_ASSERT(!is_relay_log);
5217 DBUG_ASSERT(binlog_checksum_options != checksum_alg_reset);
5218 binlog_checksum_options= checksum_alg_reset;
5219 }
5220 /*
5221 Note that at this point, log_state != LOG_CLOSED
5222 (important for is_open()).
5223 */
5224
5225 /*
5226 new_file() is only used for rotation (in FLUSH LOGS or because size >
5227 max_binlog_size or max_relay_log_size).
5228 If this is a binary log, the Format_description_log_event at the
5229 beginning of the new file should have created=0 (to distinguish with the
5230 Format_description_log_event written at server startup, which should
5231 trigger temp tables deletion on slaves.
5232 */
5233
5234 /* reopen index binlog file, BUG#34582 */
5235 file_to_open= index_file_name;
5236 error= open_index_file(index_file_name, 0, FALSE);
5237 if (likely(!error))
5238 {
5239 /* reopen the binary log file. */
5240 file_to_open= new_name_ptr;
5241 error= open(old_name, log_type, new_name_ptr, 0, io_cache_type,
5242 max_size, 1, FALSE);
5243 }
5244
5245 /* handle reopening errors */
5246 if (unlikely(error))
5247 {
5248 my_error(ER_CANT_OPEN_FILE, MYF(ME_FATALERROR), file_to_open, error);
5249 close_on_error= TRUE;
5250 }
5251
5252 my_free(old_name);
5253
5254 end:
5255
5256 if (delay_close)
5257 {
5258 clear_inuse_flag_when_closing(old_file);
5259 mysql_file_close(old_file, MYF(MY_WME));
5260 }
5261
5262 if (unlikely(error && close_on_error)) /* rotate or reopen failed */
5263 {
5264 /*
5265 Close whatever was left opened.
5266
5267 We are keeping the behavior as it exists today, ie,
5268 we disable logging and move on (see: BUG#51014).
5269
5270 TODO: as part of WL#1790 consider other approaches:
5271 - kill mysql (safety);
5272 - try multiple locations for opening a log file;
5273 - switch server to protected/readonly mode
5274 - ...
5275 */
5276 close(LOG_CLOSE_INDEX);
5277 sql_print_error(fatal_log_error, new_name_ptr, errno);
5278 }
5279
5280 mysql_mutex_unlock(&LOCK_index);
5281 if (need_lock)
5282 mysql_mutex_unlock(&LOCK_log);
5283
5284 DBUG_RETURN(error);
5285 }
5286
5287 bool MYSQL_BIN_LOG::write_event(Log_event *ev, binlog_cache_data *cache_data,
5288 IO_CACHE *file)
5289 {
5290 Log_event_writer writer(file, 0, &crypto);
5291 if (crypto.scheme && file == &log_file)
5292 writer.ctx= alloca(crypto.ctx_size);
5293 if (cache_data)
5294 cache_data->add_status(ev->logged_status());
5295 return writer.write(ev);
5296 }
5297
5298 bool MYSQL_BIN_LOG::append(Log_event *ev)
5299 {
5300 bool res;
5301 mysql_mutex_lock(&LOCK_log);
5302 res= append_no_lock(ev);
5303 mysql_mutex_unlock(&LOCK_log);
5304 return res;
5305 }
5306
5307
5308 bool MYSQL_BIN_LOG::append_no_lock(Log_event* ev)
5309 {
5310 bool error = 0;
5311 DBUG_ENTER("MYSQL_BIN_LOG::append");
5312
5313 mysql_mutex_assert_owner(&LOCK_log);
5314 DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
5315
5316 if (write_event(ev))
5317 {
5318 error=1;
5319 goto err;
5320 }
5321 bytes_written+= ev->data_written;
5322 DBUG_PRINT("info",("max_size: %lu",max_size));
5323 if (flush_and_sync(0))
5324 goto err;
5325 if (my_b_append_tell(&log_file) > max_size)
5326 error= new_file_without_locking();
5327 err:
5328 update_binlog_end_pos();
5329 DBUG_RETURN(error);
5330 }
5331
5332 bool MYSQL_BIN_LOG::write_event_buffer(uchar* buf, uint len)
5333 {
5334 bool error= 1;
5335 uchar *ebuf= 0;
5336 DBUG_ENTER("MYSQL_BIN_LOG::write_event_buffer");
5337
5338 DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
5339
5340 mysql_mutex_assert_owner(&LOCK_log);
5341
5342 if (crypto.scheme != 0)
5343 {
5344 DBUG_ASSERT(crypto.scheme == 1);
5345
5346 uint elen;
5347 uchar iv[BINLOG_IV_LENGTH];
5348
5349 ebuf= (uchar*)my_safe_alloca(len);
5350 if (!ebuf)
5351 goto err;
5352
5353 crypto.set_iv(iv, (uint32)my_b_append_tell(&log_file));
5354
5355 /*
5356 we want to encrypt everything, excluding the event length:
5357 massage the data before the encryption
5358 */
5359 memcpy(buf + EVENT_LEN_OFFSET, buf, 4);
5360
5361 if (encryption_crypt(buf + 4, len - 4,
5362 ebuf + 4, &elen,
5363 crypto.key, crypto.key_length, iv, sizeof(iv),
5364 ENCRYPTION_FLAG_ENCRYPT | ENCRYPTION_FLAG_NOPAD,
5365 ENCRYPTION_KEY_SYSTEM_DATA, crypto.key_version))
5366 goto err;
5367
5368 DBUG_ASSERT(elen == len - 4);
5369
5370 /* massage the data after the encryption */
5371 memcpy(ebuf, ebuf + EVENT_LEN_OFFSET, 4);
5372 int4store(ebuf + EVENT_LEN_OFFSET, len);
5373
5374 buf= ebuf;
5375 }
5376 if (my_b_append(&log_file, buf, len))
5377 goto err;
5378 bytes_written+= len;
5379
5380 error= 0;
5381 DBUG_PRINT("info",("max_size: %lu",max_size));
5382 if (flush_and_sync(0))
5383 goto err;
5384 if (my_b_append_tell(&log_file) > max_size)
5385 error= new_file_without_locking();
5386 err:
5387 my_safe_afree(ebuf, len);
5388 if (likely(!error))
5389 update_binlog_end_pos();
5390 DBUG_RETURN(error);
5391 }
5392
5393 bool MYSQL_BIN_LOG::flush_and_sync(bool *synced)
5394 {
5395 int err=0, fd=log_file.file;
5396 if (synced)
5397 *synced= 0;
5398 mysql_mutex_assert_owner(&LOCK_log);
5399 if (flush_io_cache(&log_file))
5400 return 1;
5401 uint sync_period= get_sync_period();
5402 if (sync_period && ++sync_counter >= sync_period)
5403 {
5404 sync_counter= 0;
5405 err= mysql_file_sync(fd, MYF(MY_WME|MY_SYNC_FILESIZE));
5406 if (synced)
5407 *synced= 1;
5408 #ifndef DBUG_OFF
5409 if (opt_binlog_dbug_fsync_sleep > 0)
5410 my_sleep(opt_binlog_dbug_fsync_sleep);
5411 #endif
5412 }
5413 return err;
5414 }
5415
5416 void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param)
5417 {
5418 DBUG_ASSERT(!thd->binlog_evt_union.do_union);
5419 thd->binlog_evt_union.do_union= TRUE;
5420 thd->binlog_evt_union.unioned_events= FALSE;
5421 thd->binlog_evt_union.unioned_events_trans= FALSE;
5422 thd->binlog_evt_union.first_query_id= query_id_param;
5423 }
5424
5425 void MYSQL_BIN_LOG::stop_union_events(THD *thd)
5426 {
5427 DBUG_ASSERT(thd->binlog_evt_union.do_union);
5428 thd->binlog_evt_union.do_union= FALSE;
5429 }
5430
5431 bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
5432 {
5433 return (thd->binlog_evt_union.do_union &&
5434 query_id_param >= thd->binlog_evt_union.first_query_id);
5435 }
5436
5437 /**
5438 This function checks if a transactional table was updated by the
5439 current transaction.
5440
5441 @param thd The client thread that executed the current statement.
5442 @return
5443 @c true if a transactional table was updated, @c false otherwise.
5444 */
5445 bool
5446 trans_has_updated_trans_table(const THD* thd)
5447 {
5448 binlog_cache_mngr *const cache_mngr=
5449 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5450
5451 return (cache_mngr ? !cache_mngr->trx_cache.empty() : 0);
5452 }
5453
5454 /**
5455 This function checks if a transactional table was updated by the
5456 current statement.
5457
5458 @param thd The client thread that executed the current statement.
5459 @return
5460 @c true if a transactional table was updated, @c false otherwise.
5461 */
5462 bool
5463 stmt_has_updated_trans_table(const THD *thd)
5464 {
5465 Ha_trx_info *ha_info;
5466
5467 for (ha_info= thd->transaction.stmt.ha_list; ha_info;
5468 ha_info= ha_info->next())
5469 {
5470 if (ha_info->is_trx_read_write() && ha_info->ht() != binlog_hton)
5471 return (TRUE);
5472 }
5473 return (FALSE);
5474 }
5475
5476 /**
5477 This function checks if either a trx-cache or a non-trx-cache should
5478 be used. If @c bin_log_direct_non_trans_update is active or the format
5479 is either MIXED or ROW, the cache to be used depends on the flag @c
5480 is_transactional.
5481
5482 On the other hand, if binlog_format is STMT or direct option is
5483 OFF, the trx-cache should be used if and only if the statement is
5484 transactional or the trx-cache is not empty. Otherwise, the
5485 non-trx-cache should be used.
5486
5487 @param thd The client thread.
5488 @param is_transactional The changes are related to a trx-table.
5489 @return
5490 @c true if a trx-cache should be used, @c false otherwise.
5491 */
5492 bool use_trans_cache(const THD* thd, bool is_transactional)
5493 {
5494 if (is_transactional)
5495 return 1;
5496 binlog_cache_mngr *const cache_mngr=
5497 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5498
5499 return ((thd->is_current_stmt_binlog_format_row() ||
5500 thd->variables.binlog_direct_non_trans_update) ? 0 :
5501 !cache_mngr->trx_cache.empty());
5502 }
5503
5504 /**
5505 This function checks if a transaction, either a multi-statement
5506 or a single statement transaction is about to commit or not.
5507
5508 @param thd The client thread that executed the current statement.
5509 @param all Committing a transaction (i.e. TRUE) or a statement
5510 (i.e. FALSE).
5511 @return
5512 @c true if committing a transaction, otherwise @c false.
5513 */
5514 bool ending_trans(THD* thd, const bool all)
5515 {
5516 return (all || ending_single_stmt_trans(thd, all));
5517 }
5518
5519 /**
5520 This function checks if a single statement transaction is about
5521 to commit or not.
5522
5523 @param thd The client thread that executed the current statement.
5524 @param all Committing a transaction (i.e. TRUE) or a statement
5525 (i.e. FALSE).
5526 @return
5527 @c true if committing a single statement transaction, otherwise
5528 @c false.
5529 */
5530 bool ending_single_stmt_trans(THD* thd, const bool all)
5531 {
5532 return (!all && !thd->in_multi_stmt_transaction_mode());
5533 }
5534
5535 /**
5536 This function checks if a non-transactional table was updated by
5537 the current transaction.
5538
5539 @param thd The client thread that executed the current statement.
5540 @return
5541 @c true if a non-transactional table was updated, @c false
5542 otherwise.
5543 */
5544 bool trans_has_updated_non_trans_table(const THD* thd)
5545 {
5546 return (thd->transaction.all.modified_non_trans_table ||
5547 thd->transaction.stmt.modified_non_trans_table);
5548 }
5549
5550 /**
5551 This function checks if a non-transactional table was updated by the
5552 current statement.
5553
5554 @param thd The client thread that executed the current statement.
5555 @return
5556 @c true if a non-transactional table was updated, @c false otherwise.
5557 */
5558 bool stmt_has_updated_non_trans_table(const THD* thd)
5559 {
5560 return (thd->transaction.stmt.modified_non_trans_table);
5561 }
5562
5563 /*
5564 These functions are placed in this file since they need access to
5565 binlog_hton, which has internal linkage.
5566 */
5567
5568 binlog_cache_mngr *THD::binlog_setup_trx_data()
5569 {
5570 DBUG_ENTER("THD::binlog_setup_trx_data");
5571 binlog_cache_mngr *cache_mngr=
5572 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5573
5574 if (cache_mngr)
5575 DBUG_RETURN(cache_mngr); // Already set up
5576
5577 cache_mngr= (binlog_cache_mngr*) my_malloc(sizeof(binlog_cache_mngr), MYF(MY_ZEROFILL));
5578 if (!cache_mngr ||
5579 open_cached_file(&cache_mngr->stmt_cache.cache_log, mysql_tmpdir,
5580 LOG_PREFIX, (size_t)binlog_stmt_cache_size, MYF(MY_WME)) ||
5581 open_cached_file(&cache_mngr->trx_cache.cache_log, mysql_tmpdir,
5582 LOG_PREFIX, (size_t)binlog_cache_size, MYF(MY_WME)))
5583 {
5584 my_free(cache_mngr);
5585 DBUG_RETURN(0); // Didn't manage to set it up
5586 }
5587 thd_set_ha_data(this, binlog_hton, cache_mngr);
5588
5589 cache_mngr= new (cache_mngr)
5590 binlog_cache_mngr(max_binlog_stmt_cache_size,
5591 max_binlog_cache_size,
5592 &binlog_stmt_cache_use,
5593 &binlog_stmt_cache_disk_use,
5594 &binlog_cache_use,
5595 &binlog_cache_disk_use);
5596 DBUG_RETURN(cache_mngr);
5597 }
5598
5599 /*
5600 Function to start a statement and optionally a transaction for the
5601 binary log.
5602
5603 SYNOPSIS
5604 binlog_start_trans_and_stmt()
5605
5606 DESCRIPTION
5607
5608 This function does three things:
5609 - Start a transaction if not in autocommit mode or if a BEGIN
5610 statement has been seen.
5611
5612 - Start a statement transaction to allow us to truncate the cache.
5613
5614 - Save the current binlog position so that we can roll back the
5615 statement by truncating the cache.
5616
5617 We only update the saved position if the old one was undefined,
5618 the reason is that there are some cases (e.g., for CREATE-SELECT)
5619 where the position is saved twice (e.g., both in
5620 select_create::prepare() and THD::binlog_write_table_map()) , but
5621 we should use the first. This means that calls to this function
5622 can be used to start the statement before the first table map
5623 event, to include some extra events.
5624 */
5625
5626 void
5627 THD::binlog_start_trans_and_stmt()
5628 {
5629 binlog_cache_mngr *cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5630 DBUG_ENTER("binlog_start_trans_and_stmt");
5631 DBUG_PRINT("enter", ("cache_mngr: %p cache_mngr->trx_cache.get_prev_position(): %lu",
5632 cache_mngr,
5633 (cache_mngr ? (ulong) cache_mngr->trx_cache.get_prev_position() :
5634 (ulong) 0)));
5635
5636 if (cache_mngr == NULL ||
5637 cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF)
5638 {
5639 this->binlog_set_stmt_begin();
5640 bool mstmt_mode= in_multi_stmt_transaction_mode();
5641 #ifdef WITH_WSREP
5642 /* Write Gtid
5643 Get domain id only when gtid mode is set
5644 If this event is replicate through a master then ,
5645 we will forward the same gtid another nodes
5646 We have to do this only one time in mysql transaction.
5647 Since this function is called multiple times , We will check for
5648 ha_info->is_started()
5649 */
5650 Ha_trx_info *ha_info;
5651 ha_info= this->ha_data[binlog_hton->slot].ha_info + (mstmt_mode ? 1 : 0);
5652
5653 if (!ha_info->is_started() && wsrep_gtid_mode
5654 && this->variables.gtid_seq_no)
5655 {
5656 binlog_cache_mngr *const cache_mngr=
5657 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5658 binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(1);
5659 IO_CACHE *file= &cache_data->cache_log;
5660 Log_event_writer writer(file, cache_data);
5661 Gtid_log_event gtid_event(this, this->variables.gtid_seq_no,
5662 this->variables.gtid_domain_id,
5663 true, LOG_EVENT_SUPPRESS_USE_F,
5664 true, 0);
5665 gtid_event.server_id= this->variables.server_id;
5666 writer.write(>id_event);
5667 }
5668 #endif
5669 if (mstmt_mode)
5670 trans_register_ha(this, TRUE, binlog_hton);
5671 trans_register_ha(this, FALSE, binlog_hton);
5672 /*
5673 Mark statement transaction as read/write. We never start
5674 a binary log transaction and keep it read-only,
5675 therefore it's best to mark the transaction read/write just
5676 at the same time we start it.
5677 Not necessary to mark the normal transaction read/write
5678 since the statement-level flag will be propagated automatically
5679 inside ha_commit_trans.
5680 */
5681 ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
5682 }
5683 DBUG_VOID_RETURN;
5684 }
5685
5686 void THD::binlog_set_stmt_begin() {
5687 binlog_cache_mngr *cache_mngr=
5688 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5689
5690 /*
5691 The call to binlog_trans_log_savepos() might create the cache_mngr
5692 structure, if it didn't exist before, so we save the position
5693 into an auto variable and then write it into the transaction
5694 data for the binary log (i.e., cache_mngr).
5695 */
5696 my_off_t pos= 0;
5697 binlog_trans_log_savepos(this, &pos);
5698 cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5699 cache_mngr->trx_cache.set_prev_position(pos);
5700 }
5701
5702 static int
5703 binlog_start_consistent_snapshot(handlerton *hton, THD *thd)
5704 {
5705 int err= 0;
5706 DBUG_ENTER("binlog_start_consistent_snapshot");
5707
5708 binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
5709
5710 /* Server layer calls us with LOCK_commit_ordered locked, so this is safe. */
5711 mysql_mutex_assert_owner(&LOCK_commit_ordered);
5712 strmake_buf(cache_mngr->last_commit_pos_file, mysql_bin_log.last_commit_pos_file);
5713 cache_mngr->last_commit_pos_offset= mysql_bin_log.last_commit_pos_offset;
5714
5715 trans_register_ha(thd, TRUE, hton);
5716
5717 DBUG_RETURN(err);
5718 }
5719
5720 /**
5721 This function writes a table map to the binary log.
5722 Note that in order to keep the signature uniform with related methods,
5723 we use a redundant parameter to indicate whether a transactional table
5724 was changed or not.
5725
5726 If with_annotate != NULL and
5727 *with_annotate = TRUE write also Annotate_rows before the table map.
5728
5729 @param table a pointer to the table.
5730 @param is_transactional @c true indicates a transactional table,
5731 otherwise @c false a non-transactional.
5732 @return
5733 nonzero if an error pops up when writing the table map event.
5734 */
5735 int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
5736 my_bool *with_annotate)
5737 {
5738 int error;
5739 DBUG_ENTER("THD::binlog_write_table_map");
5740 DBUG_PRINT("enter", ("table: %p (%s: #%lu)",
5741 table, table->s->table_name.str,
5742 table->s->table_map_id));
5743
5744 /* Ensure that all events in a GTID group are in the same cache */
5745 if (variables.option_bits & OPTION_GTID_BEGIN)
5746 is_transactional= 1;
5747
5748 /* Pre-conditions */
5749 DBUG_ASSERT(is_current_stmt_binlog_format_row());
5750 DBUG_ASSERT(WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open());
5751 DBUG_ASSERT(table->s->table_map_id != ULONG_MAX);
5752
5753 Table_map_log_event
5754 the_event(this, table, table->s->table_map_id, is_transactional);
5755
5756 if (binlog_table_maps == 0)
5757 binlog_start_trans_and_stmt();
5758
5759 binlog_cache_mngr *const cache_mngr=
5760 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5761 binlog_cache_data *cache_data= (cache_mngr->
5762 get_binlog_cache_data(is_transactional));
5763 IO_CACHE *file= &cache_data->cache_log;
5764 Log_event_writer writer(file, cache_data);
5765
5766 if (with_annotate && *with_annotate)
5767 {
5768 Annotate_rows_log_event anno(table->in_use, is_transactional, false);
5769 /* Annotate event should be written not more than once */
5770 *with_annotate= 0;
5771 if (unlikely((error= writer.write(&anno))))
5772 {
5773 if (my_errno == EFBIG)
5774 cache_data->set_incident();
5775 DBUG_RETURN(error);
5776 }
5777 }
5778 if (unlikely((error= writer.write(&the_event))))
5779 DBUG_RETURN(error);
5780
5781 binlog_table_maps++;
5782 DBUG_RETURN(0);
5783 }
5784
5785 /**
5786 This function retrieves a pending row event from a cache which is
5787 specified through the parameter @c is_transactional. Respectively, when it
5788 is @c true, the pending event is returned from the transactional cache.
5789 Otherwise from the non-transactional cache.
5790
5791 @param is_transactional @c true indicates a transactional cache,
5792 otherwise @c false a non-transactional.
5793 @return
5794 The row event if any.
5795 */
5796 Rows_log_event*
5797 THD::binlog_get_pending_rows_event(bool is_transactional) const
5798 {
5799 Rows_log_event* rows= NULL;
5800 binlog_cache_mngr *const cache_mngr=
5801 (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5802
5803 /*
5804 This is less than ideal, but here's the story: If there is no cache_mngr,
5805 prepare_pending_rows_event() has never been called (since the cache_mngr
5806 is set up there). In that case, we just return NULL.
5807 */
5808 if (cache_mngr)
5809 {
5810 binlog_cache_data *cache_data=
5811 cache_mngr->get_binlog_cache_data(use_trans_cache(this, is_transactional));
5812
5813 rows= cache_data->pending();
5814 }
5815 return (rows);
5816 }
5817
5818 /**
5819 This function stores a pending row event into a cache which is specified
5820 through the parameter @c is_transactional. Respectively, when it is @c
5821 true, the pending event is stored into the transactional cache. Otherwise
5822 into the non-transactional cache.
5823
5824 @param evt a pointer to the row event.
5825 @param is_transactional @c true indicates a transactional cache,
5826 otherwise @c false a non-transactional.
5827 */
5828 void
5829 THD::binlog_set_pending_rows_event(Rows_log_event* ev, bool is_transactional)
5830 {
5831 binlog_cache_mngr *const cache_mngr= binlog_setup_trx_data();
5832
5833 DBUG_ASSERT(cache_mngr);
5834
5835 binlog_cache_data *cache_data=
5836 cache_mngr->get_binlog_cache_data(use_trans_cache(this, is_transactional));
5837
5838 cache_data->set_pending(ev);
5839 }
5840
5841
5842 /**
5843 This function removes the pending rows event, discarding any outstanding
5844 rows. If there is no pending rows event available, this is effectively a
5845 no-op.
5846
5847 @param thd a pointer to the user thread.
5848 @param is_transactional @c true indicates a transactional cache,
5849 otherwise @c false a non-transactional.
5850 */
5851 int
5852 MYSQL_BIN_LOG::remove_pending_rows_event(THD *thd, bool is_transactional)
5853 {
5854 DBUG_ENTER("MYSQL_BIN_LOG::remove_pending_rows_event");
5855
5856 binlog_cache_mngr *const cache_mngr=
5857 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5858
5859 DBUG_ASSERT(cache_mngr);
5860
5861 binlog_cache_data *cache_data=
5862 cache_mngr->get_binlog_cache_data(use_trans_cache(thd, is_transactional));
5863
5864 if (Rows_log_event* pending= cache_data->pending())
5865 {
5866 delete pending;
5867 cache_data->set_pending(NULL);
5868 }
5869
5870 DBUG_RETURN(0);
5871 }
5872
5873 /*
5874 Moves the last bunch of rows from the pending Rows event to a cache (either
5875 transactional cache if is_transaction is @c true, or the non-transactional
5876 cache otherwise. Sets a new pending event.
5877
5878 @param thd a pointer to the user thread.
5879 @param evt a pointer to the row event.
5880 @param is_transactional @c true indicates a transactional cache,
5881 otherwise @c false a non-transactional.
5882 */
5883 int
5884 MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
5885 Rows_log_event* event,
5886 bool is_transactional)
5887 {
5888 DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)");
5889 DBUG_ASSERT(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open());
5890 DBUG_PRINT("enter", ("event: %p", event));
5891
5892 binlog_cache_mngr *const cache_mngr=
5893 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5894
5895 DBUG_ASSERT(cache_mngr);
5896
5897 binlog_cache_data *cache_data=
5898 cache_mngr->get_binlog_cache_data(use_trans_cache(thd, is_transactional));
5899
5900 DBUG_PRINT("info", ("cache_mngr->pending(): %p", cache_data->pending()));
5901
5902 if (Rows_log_event* pending= cache_data->pending())
5903 {
5904 Log_event_writer writer(&cache_data->cache_log, cache_data);
5905
5906 /*
5907 Write pending event to the cache.
5908 */
5909 DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
5910 {DBUG_SET("+d,simulate_file_write_error");});
5911 if (writer.write(pending))
5912 {
5913 set_write_error(thd, is_transactional);
5914 if (check_write_error(thd) && cache_data &&
5915 stmt_has_updated_non_trans_table(thd))
5916 cache_data->set_incident();
5917 delete pending;
5918 cache_data->set_pending(NULL);
5919 DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
5920 {DBUG_SET("-d,simulate_file_write_error");});
5921 DBUG_RETURN(1);
5922 }
5923
5924 delete pending;
5925 }
5926
5927 thd->binlog_set_pending_rows_event(event, is_transactional);
5928
5929 DBUG_RETURN(0);
5930 }
5931
5932
5933 /* Generate a new global transaction ID, and write it to the binlog */
5934
5935 bool
5936 MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone,
5937 bool is_transactional, uint64 commit_id)
5938 {
5939 rpl_gtid gtid;
5940 uint32 domain_id;
5941 uint32 local_server_id;
5942 uint64 seq_no;
5943 int err;
5944 DBUG_ENTER("write_gtid_event");
5945 DBUG_PRINT("enter", ("standalone: %d", standalone));
5946
5947 #ifdef WITH_WSREP
5948 if (WSREP(thd) && thd->wsrep_trx_meta.gtid.seqno != -1 && wsrep_gtid_mode && !thd->variables.gtid_seq_no)
5949 {
5950 domain_id= wsrep_gtid_domain_id;
5951 } else {
5952 #endif /* WITH_WSREP */
5953 domain_id= thd->variables.gtid_domain_id;
5954 #ifdef WITH_WSREP
5955 }
5956 #endif /* WITH_WSREP */
5957 local_server_id= thd->variables.server_id;
5958 seq_no= thd->variables.gtid_seq_no;
5959
5960 DBUG_ASSERT(local_server_id != 0);
5961
5962 if (thd->variables.option_bits & OPTION_GTID_BEGIN)
5963 {
5964 DBUG_PRINT("error", ("OPTION_GTID_BEGIN is set. "
5965 "Master and slave will have different GTID values"));
5966 /* Reset the flag, as we will write out a GTID anyway */
5967 thd->variables.option_bits&= ~OPTION_GTID_BEGIN;
5968 }
5969
5970 /*
5971 Reset the session variable gtid_seq_no, to reduce the risk of accidentally
5972 producing a duplicate GTID.
5973 */
5974 thd->variables.gtid_seq_no= 0;
5975 if (seq_no != 0)
5976 {
5977 /* Use the specified sequence number. */
5978 gtid.domain_id= domain_id;
5979 gtid.server_id= local_server_id;
5980 gtid.seq_no= seq_no;
5981 err= rpl_global_gtid_binlog_state.update(>id, opt_gtid_strict_mode);
5982 if (err && thd->get_stmt_da()->sql_errno()==ER_GTID_STRICT_OUT_OF_ORDER)
5983 errno= ER_GTID_STRICT_OUT_OF_ORDER;
5984 }
5985 else
5986 {
5987 /* Allocate the next sequence number for the GTID. */
5988 err= rpl_global_gtid_binlog_state.update_with_next_gtid(domain_id,
5989 local_server_id, >id);
5990 seq_no= gtid.seq_no;
5991 }
5992 if (err)
5993 DBUG_RETURN(true);
5994
5995 thd->set_last_commit_gtid(gtid);
5996
5997 Gtid_log_event gtid_event(thd, seq_no, domain_id, standalone,
5998 LOG_EVENT_SUPPRESS_USE_F, is_transactional,
5999 commit_id);
6000
6001 /* Write the event to the binary log. */
6002 DBUG_ASSERT(this == &mysql_bin_log);
6003
6004 #ifdef WITH_WSREP
6005 if (wsrep_gtid_mode && is_gtid_cached(thd))
6006 DBUG_RETURN(false);
6007 #endif
6008
6009 if (write_event(>id_event))
6010 DBUG_RETURN(true);
6011 status_var_add(thd->status_var.binlog_bytes_written, gtid_event.data_written);
6012
6013 DBUG_RETURN(false);
6014 }
6015
6016
6017 int
6018 MYSQL_BIN_LOG::write_state_to_file()
6019 {
6020 File file_no;
6021 IO_CACHE cache;
6022 char buf[FN_REFLEN];
6023 int err;
6024 bool opened= false;
6025 bool log_inited= false;
6026
6027 fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
6028 MY_UNPACK_FILENAME);
6029 if ((file_no= mysql_file_open(key_file_binlog_state, buf,
6030 O_RDWR|O_CREAT|O_TRUNC|O_BINARY,
6031 MYF(MY_WME))) < 0)
6032 {
6033 err= 1;
6034 goto err;
6035 }
6036 opened= true;
6037 if ((err= init_io_cache(&cache, file_no, IO_SIZE, WRITE_CACHE, 0, 0,
6038 MYF(MY_WME|MY_WAIT_IF_FULL))))
6039 goto err;
6040 log_inited= true;
6041 if ((err= rpl_global_gtid_binlog_state.write_to_iocache(&cache)))
6042 goto err;
6043 log_inited= false;
6044 if ((err= end_io_cache(&cache)))
6045 goto err;
6046 if ((err= mysql_file_sync(file_no, MYF(MY_WME|MY_SYNC_FILESIZE))))
6047 goto err;
6048 goto end;
6049
6050 err:
6051 sql_print_error("Error writing binlog state to file '%s'.\n", buf);
6052 if (log_inited)
6053 end_io_cache(&cache);
6054 end:
6055 if (opened)
6056 mysql_file_close(file_no, MYF(0));
6057
6058 return err;
6059 }
6060
6061
6062 /*
6063 Initialize the binlog state from the master-bin.state file, at server startup.
6064
6065 Returns:
6066 0 for success.
6067 2 for when .state file did not exist.
6068 1 for other error.
6069 */
6070 int
6071 MYSQL_BIN_LOG::read_state_from_file()
6072 {
6073 File file_no;
6074 IO_CACHE cache;
6075 char buf[FN_REFLEN];
6076 int err;
6077 bool opened= false;
6078 bool log_inited= false;
6079
6080 fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
6081 MY_UNPACK_FILENAME);
6082 if ((file_no= mysql_file_open(key_file_binlog_state, buf,
6083 O_RDONLY|O_BINARY, MYF(0))) < 0)
6084 {
6085 if (my_errno != ENOENT)
6086 {
6087 err= 1;
6088 goto err;
6089 }
6090 else
6091 {
6092 /*
6093 If the state file does not exist, this is the first server startup
6094 with GTID enabled. So initialize to empty state.
6095 */
6096 rpl_global_gtid_binlog_state.reset();
6097 err= 2;
6098 goto end;
6099 }
6100 }
6101 opened= true;
6102 if ((err= init_io_cache(&cache, file_no, IO_SIZE, READ_CACHE, 0, 0,
6103 MYF(MY_WME|MY_WAIT_IF_FULL))))
6104 goto err;
6105 log_inited= true;
6106 if ((err= rpl_global_gtid_binlog_state.read_from_iocache(&cache)))
6107 goto err;
6108 goto end;
6109
6110 err:
6111 sql_print_error("Error reading binlog GTID state from file '%s'.\n", buf);
6112 end:
6113 if (log_inited)
6114 end_io_cache(&cache);
6115 if (opened)
6116 mysql_file_close(file_no, MYF(0));
6117
6118 return err;
6119 }
6120
6121
6122 int
6123 MYSQL_BIN_LOG::get_most_recent_gtid_list(rpl_gtid **list, uint32 *size)
6124 {
6125 return rpl_global_gtid_binlog_state.get_most_recent_gtid_list(list, size);
6126 }
6127
6128
6129 bool
6130 MYSQL_BIN_LOG::append_state_pos(String *str)
6131 {
6132 return rpl_global_gtid_binlog_state.append_pos(str);
6133 }
6134
6135
6136 bool
6137 MYSQL_BIN_LOG::append_state(String *str)
6138 {
6139 return rpl_global_gtid_binlog_state.append_state(str);
6140 }
6141
6142
6143 bool
6144 MYSQL_BIN_LOG::is_empty_state()
6145 {
6146 return (rpl_global_gtid_binlog_state.count() == 0);
6147 }
6148
6149
6150 bool
6151 MYSQL_BIN_LOG::find_in_binlog_state(uint32 domain_id, uint32 server_id_arg,
6152 rpl_gtid *out_gtid)
6153 {
6154 rpl_gtid *gtid;
6155 if ((gtid= rpl_global_gtid_binlog_state.find(domain_id, server_id_arg)))
6156 *out_gtid= *gtid;
6157 return gtid != NULL;
6158 }
6159
6160
6161 bool
6162 MYSQL_BIN_LOG::lookup_domain_in_binlog_state(uint32 domain_id,
6163 rpl_gtid *out_gtid)
6164 {
6165 rpl_gtid *found_gtid;
6166
6167 if ((found_gtid= rpl_global_gtid_binlog_state.find_most_recent(domain_id)))
6168 {
6169 *out_gtid= *found_gtid;
6170 return true;
6171 }
6172
6173 return false;
6174 }
6175
6176
6177 int
6178 MYSQL_BIN_LOG::bump_seq_no_counter_if_needed(uint32 domain_id, uint64 seq_no)
6179 {
6180 return rpl_global_gtid_binlog_state.bump_seq_no_if_needed(domain_id, seq_no);
6181 }
6182
6183
6184 bool
6185 MYSQL_BIN_LOG::check_strict_gtid_sequence(uint32 domain_id,
6186 uint32 server_id_arg,
6187 uint64 seq_no)
6188 {
6189 return rpl_global_gtid_binlog_state.check_strict_sequence(domain_id,
6190 server_id_arg,
6191 seq_no);
6192 }
6193
6194
6195 /**
6196 Write an event to the binary log. If with_annotate != NULL and
6197 *with_annotate = TRUE write also Annotate_rows before the event
6198 (this should happen only if the event is a Table_map).
6199 */
6200
6201 bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
6202 {
6203 THD *thd= event_info->thd;
6204 bool error= 1;
6205 binlog_cache_data *cache_data= 0;
6206 bool is_trans_cache= FALSE;
6207 bool using_trans= event_info->use_trans_cache();
6208 bool direct= event_info->use_direct_logging();
6209 ulong UNINIT_VAR(prev_binlog_id);
6210 DBUG_ENTER("MYSQL_BIN_LOG::write(Log_event *)");
6211
6212 /*
6213 When binary logging is not enabled (--log-bin=0), wsrep-patch partially
6214 enables it without opening the binlog file (MYSQL_BIN_LOG::open().
6215 So, avoid writing to binlog file.
6216 */
6217 if (direct &&
6218 (wsrep_emulate_bin_log ||
6219 (WSREP(thd) && !(thd->variables.option_bits & OPTION_BIN_LOG))))
6220 DBUG_RETURN(0);
6221
6222 if (thd->variables.option_bits & OPTION_GTID_BEGIN)
6223 {
6224 DBUG_PRINT("info", ("OPTION_GTID_BEGIN was set"));
6225 /* Wait for commit from binary log before we commit */
6226 direct= 0;
6227 using_trans= 1;
6228 }
6229
6230 if (thd->binlog_evt_union.do_union)
6231 {
6232 /*
6233 In Stored function; Remember that function call caused an update.
6234 We will log the function call to the binary log on function exit
6235 */
6236 thd->binlog_evt_union.unioned_events= TRUE;
6237 thd->binlog_evt_union.unioned_events_trans |= using_trans;
6238 DBUG_RETURN(0);
6239 }
6240
6241 /*
6242 We only end the statement if we are in a top-level statement. If
6243 we are inside a stored function, we do not end the statement since
6244 this will close all tables on the slave. But there can be a special case
6245 where we are inside a stored function/trigger and a SAVEPOINT is being
6246 set in side the stored function/trigger. This SAVEPOINT execution will
6247 force the pending event to be flushed without an STMT_END_F flag. This
6248 will result in a case where following DMLs will be considered as part of
6249 same statement and result in data loss on slave. Hence in this case we
6250 force the end_stmt to be true.
6251 */
6252 bool const end_stmt= (thd->in_sub_stmt && thd->lex->sql_command ==
6253 SQLCOM_SAVEPOINT) ? true :
6254 (thd->locked_tables_mode && thd->lex->requires_prelocking());
6255 if (thd->binlog_flush_pending_rows_event(end_stmt, using_trans))
6256 DBUG_RETURN(error);
6257
6258 /*
6259 In most cases this is only called if 'is_open()' is true; in fact this is
6260 mostly called if is_open() *was* true a few instructions before, but it
6261 could have changed since.
6262 */
6263 /* applier and replayer can skip writing binlog events */
6264 if ((WSREP_EMULATE_BINLOG(thd) &&
6265 IF_WSREP(thd->wsrep_exec_mode != REPL_RECV, 0)) || is_open())
6266 {
6267 my_off_t UNINIT_VAR(my_org_b_tell);
6268 #ifdef HAVE_REPLICATION
6269 /*
6270 In the future we need to add to the following if tests like
6271 "do the involved tables match (to be implemented)
6272 binlog_[wild_]{do|ignore}_table?" (WL#1049)"
6273 */
6274 const char *local_db= event_info->get_db();
6275
6276 bool option_bin_log_flag= (thd->variables.option_bits & OPTION_BIN_LOG);
6277
6278 /*
6279 Log all updates to binlog cache so that they can get replicated to other
6280 nodes. A check has been added to stop them from getting logged into
6281 binary log files.
6282 */
6283 if (WSREP(thd)) option_bin_log_flag= true;
6284
6285 if ((!(option_bin_log_flag)) ||
6286 (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT &&
6287 thd->lex->sql_command != SQLCOM_SAVEPOINT &&
6288 !binlog_filter->db_ok(local_db)))
6289 DBUG_RETURN(0);
6290 #endif /* HAVE_REPLICATION */
6291
6292 IO_CACHE *file= NULL;
6293
6294 if (direct)
6295 {
6296 int res;
6297 uint64 commit_id= 0;
6298 DBUG_PRINT("info", ("direct is set"));
6299 if ((res= thd->wait_for_prior_commit()))
6300 DBUG_RETURN(res);
6301 file= &log_file;
6302 my_org_b_tell= my_b_tell(file);
6303 mysql_mutex_lock(&LOCK_log);
6304 prev_binlog_id= current_binlog_id;
6305 DBUG_EXECUTE_IF("binlog_force_commit_id",
6306 {
6307 const LEX_CSTRING commit_name= { STRING_WITH_LEN("commit_id") };
6308 bool null_value;
6309 user_var_entry *entry=
6310 (user_var_entry*) my_hash_search(&thd->user_vars,
6311 (uchar*) commit_name.str,
6312 commit_name.length);
6313 commit_id= entry->val_int(&null_value);
6314 });
6315 if (write_gtid_event(thd, true, using_trans, commit_id))
6316 goto err;
6317 }
6318 else
6319 {
6320 binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
6321 if (!cache_mngr)
6322 goto err;
6323
6324 is_trans_cache= use_trans_cache(thd, using_trans);
6325 cache_data= cache_mngr->get_binlog_cache_data(is_trans_cache);
6326 file= &cache_data->cache_log;
6327
6328 if (thd->lex->stmt_accessed_non_trans_temp_table() && is_trans_cache)
6329 thd->transaction.stmt.mark_modified_non_trans_temp_table();
6330 thd->binlog_start_trans_and_stmt();
6331 }
6332 DBUG_PRINT("info",("event type: %d",event_info->get_type_code()));
6333
6334 /*
6335 No check for auto events flag here - this write method should
6336 never be called if auto-events are enabled.
6337
6338 Write first log events which describe the 'run environment'
6339 of the SQL command. If row-based binlogging, Insert_id, Rand
6340 and other kind of "setting context" events are not needed.
6341 */
6342
6343 if (with_annotate && *with_annotate)
6344 {
6345 DBUG_ASSERT(event_info->get_type_code() == TABLE_MAP_EVENT);
6346 Annotate_rows_log_event anno(thd, using_trans, direct);
6347 /* Annotate event should be written not more than once */
6348 *with_annotate= 0;
6349 if (write_event(&anno, cache_data, file))
6350 goto err;
6351 }
6352
6353 {
6354 if (!thd->is_current_stmt_binlog_format_row())
6355 {
6356 if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
6357 {
6358 Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
6359 thd->first_successful_insert_id_in_prev_stmt_for_binlog,
6360 using_trans, direct);
6361 if (write_event(&e, cache_data, file))
6362 goto err;
6363 }
6364 if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
6365 {
6366 DBUG_PRINT("info",("number of auto_inc intervals: %u",
6367 thd->auto_inc_intervals_in_cur_stmt_for_binlog.
6368 nb_elements()));
6369 Intvar_log_event e(thd, (uchar) INSERT_ID_EVENT,
6370 thd->auto_inc_intervals_in_cur_stmt_for_binlog.
6371 minimum(), using_trans, direct);
6372 if (write_event(&e, cache_data, file))
6373 goto err;
6374 }
6375 if (thd->rand_used)
6376 {
6377 Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2,
6378 using_trans, direct);
6379 if (write_event(&e, cache_data, file))
6380 goto err;
6381 }
6382 if (thd->user_var_events.elements)
6383 {
6384 for (uint i= 0; i < thd->user_var_events.elements; i++)
6385 {
6386 BINLOG_USER_VAR_EVENT *user_var_event;
6387 get_dynamic(&thd->user_var_events,(uchar*) &user_var_event, i);
6388
6389 /* setting flags for user var log event */
6390 uchar flags= User_var_log_event::UNDEF_F;
6391 if (user_var_event->unsigned_flag)
6392 flags|= User_var_log_event::UNSIGNED_F;
6393
6394 User_var_log_event e(thd, user_var_event->user_var_event->name.str,
6395 user_var_event->user_var_event->name.length,
6396 user_var_event->value,
6397 user_var_event->length,
6398 user_var_event->type,
6399 user_var_event->charset_number,
6400 flags,
6401 using_trans,
6402 direct);
6403 if (write_event(&e, cache_data, file))
6404 goto err;
6405 }
6406 }
6407 }
6408 }
6409
6410 /*
6411 Write the event.
6412 */
6413 if (write_event(event_info, cache_data, file) ||
6414 DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
6415 goto err;
6416
6417 error= 0;
6418 err:
6419 if (direct)
6420 {
6421 my_off_t offset= my_b_tell(file);
6422 bool check_purge= false;
6423 DBUG_ASSERT(!is_relay_log);
6424
6425 if (likely(!error))
6426 {
6427 bool synced;
6428
6429 if ((error= flush_and_sync(&synced)))
6430 {
6431 }
6432 else
6433 {
6434 mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
6435 mysql_mutex_assert_owner(&LOCK_log);
6436 mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync);
6437 mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
6438 #ifdef HAVE_REPLICATION
6439 if (repl_semisync_master.report_binlog_update(thd, log_file_name,
6440 file->pos_in_file))
6441 {
6442 sql_print_error("Failed to run 'after_flush' hooks");
6443 error= 1;
6444 }
6445 else
6446 #endif
6447 {
6448 /*
6449 update binlog_end_pos so it can be read by dump thread
6450 note: must be _after_ the RUN_HOOK(after_flush) or else
6451 semi-sync might not have put the transaction into
6452 it's list before dump-thread tries to send it
6453 */
6454 update_binlog_end_pos(offset);
6455 /*
6456 If a transaction with the LOAD DATA statement is divided
6457 into logical mini-transactions (of the 10K rows) and binlog
6458 is rotated, then the last portion of data may be lost due to
6459 wsrep handler re-registration at the boundary of the split.
6460 Since splitting of the LOAD DATA into mini-transactions is
6461 logical, we should not allow these mini-transactions to fall
6462 into separate binlogs. Therefore, it is necessary to prohibit
6463 the rotation of binlog in the middle of processing LOAD DATA:
6464 */
6465 #ifdef WITH_WSREP
6466 if (!thd->wsrep_split_flag)
6467 {
6468 #endif /* WITH_WSREP */
6469 if (unlikely((error= rotate(false, &check_purge))))
6470 check_purge= false;
6471 #ifdef WITH_WSREP
6472 }
6473 #endif /* WITH_WSREP */
6474 }
6475 }
6476 }
6477
6478 status_var_add(thd->status_var.binlog_bytes_written,
6479 offset - my_org_b_tell);
6480
6481 mysql_mutex_lock(&LOCK_after_binlog_sync);
6482 mysql_mutex_unlock(&LOCK_log);
6483
6484 mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
6485 mysql_mutex_assert_not_owner(&LOCK_log);
6486 mysql_mutex_assert_owner(&LOCK_after_binlog_sync);
6487 mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
6488 #ifdef HAVE_REPLICATION
6489 if (repl_semisync_master.wait_after_sync(log_file_name,
6490 file->pos_in_file))
6491 {
6492 error=1;
6493 /* error is already printed inside hook */
6494 }
6495 #endif
6496
6497 /*
6498 Take mutex to protect against a reader seeing partial writes of 64-bit
6499 offset on 32-bit CPUs.
6500 */
6501 mysql_mutex_lock(&LOCK_commit_ordered);
6502 mysql_mutex_unlock(&LOCK_after_binlog_sync);
6503 last_commit_pos_offset= offset;
6504 mysql_mutex_unlock(&LOCK_commit_ordered);
6505
6506 if (check_purge)
6507 checkpoint_and_purge(prev_binlog_id);
6508 }
6509
6510 if (unlikely(error))
6511 {
6512 set_write_error(thd, is_trans_cache);
6513 if (check_write_error(thd) && cache_data &&
6514 stmt_has_updated_non_trans_table(thd))
6515 cache_data->set_incident();
6516 }
6517 }
6518
6519 DBUG_RETURN(error);
6520 }
6521
6522
6523 int error_log_print(enum loglevel level, const char *format,
6524 va_list args)
6525 {
6526 return logger.error_log_print(level, format, args);
6527 }
6528
6529
6530 bool slow_log_print(THD *thd, const char *query, uint query_length,
6531 ulonglong current_utime)
6532 {
6533 return logger.slow_log_print(thd, query, query_length, current_utime);
6534 }
6535
6536
6537 /**
6538 Decide if we should log the command to general log
6539
6540 @retval
6541 FALSE No logging
6542 TRUE Ok to log
6543 */
6544
6545 bool LOGGER::log_command(THD *thd, enum enum_server_command command)
6546 {
6547 /*
6548 Log command if we have at least one log event handler enabled and want
6549 to log this king of commands
6550 */
6551 if (!(*general_log_handler_list && (what_to_log & (1L << (uint) command))))
6552 return FALSE;
6553
6554 /*
6555 If LOG_SLOW_DISABLE_SLAVE is set when slave thread starts, then
6556 OPTION_LOG_OFF is set.
6557 Only the super user can set this bit.
6558 */
6559 return !(thd->variables.option_bits & OPTION_LOG_OFF);
6560 }
6561
6562
6563 bool general_log_print(THD *thd, enum enum_server_command command,
6564 const char *format, ...)
6565 {
6566 va_list args;
6567 uint error= 0;
6568
6569 /* Print the message to the buffer if we want to log this kind of commands */
6570 if (! logger.log_command(thd, command))
6571 return FALSE;
6572
6573 va_start(args, format);
6574 error= logger.general_log_print(thd, command, format, args);
6575 va_end(args);
6576
6577 return error;
6578 }
6579
6580 bool general_log_write(THD *thd, enum enum_server_command command,
6581 const char *query, size_t query_length)
6582 {
6583 /* Write the message to the log if we want to log this king of commands */
6584 if (logger.log_command(thd, command) || mysql_audit_general_enabled())
6585 return logger.general_log_write(thd, command, query, query_length);
6586
6587 return FALSE;
6588 }
6589
6590
6591 static void
6592 binlog_checkpoint_callback(void *cookie)
6593 {
6594 MYSQL_BIN_LOG::xid_count_per_binlog *entry=
6595 (MYSQL_BIN_LOG::xid_count_per_binlog *)cookie;
6596 /*
6597 For every supporting engine, we increment the xid_count and issue a
6598 commit_checkpoint_request(). Then we can count when all
6599 commit_checkpoint_notify() callbacks have occurred, and then log a new
6600 binlog checkpoint event.
6601 */
6602 mysql_bin_log.mark_xids_active(entry->binlog_id, 1);
6603 }
6604
6605
6606 /*
6607 Request a commit checkpoint from each supporting engine.
6608 This must be called after each binlog rotate, and after LOCK_log has been
6609 released. The xid_count value in the xid_count_per_binlog entry was
6610 incremented by 1 and will be decremented in this function; this ensures
6611 that the entry will not go away early despite LOCK_log not being held.
6612 */
6613 void
6614 MYSQL_BIN_LOG::do_checkpoint_request(ulong binlog_id)
6615 {
6616 xid_count_per_binlog *entry;
6617
6618 /*
6619 Find the binlog entry, and invoke commit_checkpoint_request() on it in
6620 each supporting storage engine.
6621 */
6622 mysql_mutex_lock(&LOCK_xid_list);
6623 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
6624 do {
6625 entry= it++;
6626 DBUG_ASSERT(entry /* binlog_id is always somewhere in the list. */);
6627 } while (entry->binlog_id != binlog_id);
6628 mysql_mutex_unlock(&LOCK_xid_list);
6629
6630 ha_commit_checkpoint_request(entry, binlog_checkpoint_callback);
6631 /*
6632 When we rotated the binlog, we incremented xid_count to make sure the
6633 entry would not go away until this point, where we have done all necessary
6634 commit_checkpoint_request() calls.
6635 So now we can (and must) decrease the count - when it reaches zero, we
6636 will know that both all pending unlog() and all pending
6637 commit_checkpoint_notify() calls are done, and we can log a new binlog
6638 checkpoint.
6639 */
6640 mark_xid_done(binlog_id, true);
6641 }
6642
6643
6644 /**
6645 The method executes rotation when LOCK_log is already acquired
6646 by the caller.
6647
6648 @param force_rotate caller can request the log rotation
6649 @param check_purge is set to true if rotation took place
6650
6651 @note
6652 Caller _must_ check the check_purge variable. If this is set, it means
6653 that the binlog was rotated, and caller _must_ ensure that
6654 do_checkpoint_request() is called later with the binlog_id of the rotated
6655 binlog file. The call to do_checkpoint_request() must happen after
6656 LOCK_log is released (which is why we cannot simply do it here).
6657 Usually, checkpoint_and_purge() is appropriate, as it will both handle
6658 the checkpointing and any needed purging of old logs.
6659
6660 @note
6661 If rotation fails, for instance the server was unable
6662 to create a new log file, we still try to write an
6663 incident event to the current log.
6664
6665 @retval
6666 nonzero - error in rotating routine.
6667 */
6668 int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge)
6669 {
6670 int error= 0;
6671 DBUG_ENTER("MYSQL_BIN_LOG::rotate");
6672
6673 #ifdef WITH_WSREP
6674 if (WSREP_ON && wsrep_to_isolation)
6675 {
6676 *check_purge= false;
6677 WSREP_DEBUG("avoiding binlog rotate due to TO isolation: %d",
6678 wsrep_to_isolation);
6679 DBUG_RETURN(0);
6680 }
6681 #endif /* WITH_WSREP */
6682
6683 //todo: fix the macro def and restore safe_mutex_assert_owner(&LOCK_log);
6684 *check_purge= false;
6685
6686 if (force_rotate || (my_b_tell(&log_file) >= (my_off_t) max_size))
6687 {
6688 ulong binlog_id= current_binlog_id;
6689 /*
6690 We rotate the binlog, so we need to start a commit checkpoint in all
6691 supporting engines - when it finishes, we can log a new binlog checkpoint
6692 event.
6693
6694 But we cannot start the checkpoint here - there could be a group commit
6695 still in progress which needs to be included in the checkpoint, and
6696 besides we do not want to do the (possibly expensive) checkpoint while
6697 LOCK_log is held.
6698
6699 On the other hand, we must be sure that the xid_count entry for the
6700 previous log does not go away until we start the checkpoint - which it
6701 could do as it is no longer the most recent. So we increment xid_count
6702 (to count the pending checkpoint request) - this will fix the entry in
6703 place until we decrement again in do_checkpoint_request().
6704 */
6705 mark_xids_active(binlog_id, 1);
6706
6707 if (unlikely((error= new_file_without_locking())))
6708 {
6709 /**
6710 Be conservative... There are possible lost events (eg,
6711 failing to log the Execute_load_query_log_event
6712 on a LOAD DATA while using a non-transactional
6713 table)!
6714
6715 We give it a shot and try to write an incident event anyway
6716 to the current log.
6717 */
6718 if (!write_incident_already_locked(current_thd))
6719 flush_and_sync(0);
6720
6721 /*
6722 We failed to rotate - so we have to decrement the xid_count back that
6723 we incremented before attempting the rotate.
6724 */
6725 mark_xid_done(binlog_id, false);
6726 }
6727 else
6728 *check_purge= true;
6729 }
6730 DBUG_RETURN(error);
6731 }
6732
6733 /**
6734 The method executes logs purging routine.
6735
6736 @retval
6737 nonzero - error in rotating routine.
6738 */
6739 void MYSQL_BIN_LOG::purge()
6740 {
6741 mysql_mutex_assert_not_owner(&LOCK_log);
6742 #ifdef HAVE_REPLICATION
6743 if (expire_logs_days)
6744 {
6745 DEBUG_SYNC(current_thd, "at_purge_logs_before_date");
6746 time_t purge_time= my_time(0) - expire_logs_days*24*60*60;
6747 if (purge_time >= 0)
6748 {
6749 purge_logs_before_date(purge_time);
6750 }
6751 DEBUG_SYNC(current_thd, "after_purge_logs_before_date");
6752 }
6753 #endif
6754 }
6755
6756
6757 void MYSQL_BIN_LOG::checkpoint_and_purge(ulong binlog_id)
6758 {
6759 do_checkpoint_request(binlog_id);
6760 purge();
6761 }
6762
6763
6764 /**
6765 Searches for the first (oldest) binlog file name in in the binlog index.
6766
6767 @param[in,out] buf_arg pointer to a buffer to hold found
6768 the first binary log file name
6769 @return NULL on success, otherwise error message
6770 */
6771 static const char* get_first_binlog(char* buf_arg)
6772 {
6773 IO_CACHE *index_file;
6774 size_t length;
6775 char fname[FN_REFLEN];
6776 const char* errmsg= NULL;
6777
6778 DBUG_ENTER("get_first_binlog");
6779
6780 DBUG_ASSERT(mysql_bin_log.is_open());
6781
6782 mysql_bin_log.lock_index();
6783
6784 index_file=mysql_bin_log.get_index_file();
6785 if (reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 0))
6786 {
6787 errmsg= "failed to create a cache on binlog index";
6788 goto end;
6789 }
6790 /* The file ends with EOF or empty line */
6791 if ((length=my_b_gets(index_file, fname, sizeof(fname))) <= 1)
6792 {
6793 errmsg= "empty binlog index";
6794 goto end;
6795 }
6796 else
6797 {
6798 fname[length-1]= 0; // Remove end \n
6799 }
6800 if (normalize_binlog_name(buf_arg, fname, false))
6801 {
6802 errmsg= "could not normalize the first file name in the binlog index";
6803 goto end;
6804 }
6805 end:
6806 mysql_bin_log.unlock_index();
6807
6808 DBUG_RETURN(errmsg);
6809 }
6810
6811 /**
6812 Check weather the gtid binlog state can safely remove gtid
6813 domains passed as the argument. A safety condition is satisfied when
6814 there are no events from the being deleted domains in the currently existing
6815 binlog files. Upon successful check the supplied domains are removed
6816 from @@gtid_binlog_state. The caller is supposed to rotate binlog so that
6817 the active latest file won't have the deleted domains in its Gtid_list header.
6818
6819 @param domain_drop_lex gtid domain id sequence from lex.
6820 Passed as a pointer to dynamic array must be not empty
6821 unless pointer value NULL.
6822 @retval zero on success
6823 @retval > 0 ineffective call none from the *non* empty
6824 gtid domain sequence is deleted
6825 @retval < 0 on error
6826 */
6827 static int do_delete_gtid_domain(DYNAMIC_ARRAY *domain_drop_lex)
6828 {
6829 int rc= 0;
6830 Gtid_list_log_event *glev= NULL;
6831 char buf[FN_REFLEN];
6832 File file;
6833 IO_CACHE cache;
6834 const char* errmsg= NULL;
6835 char errbuf[MYSQL_ERRMSG_SIZE]= {0};
6836
6837 if (!domain_drop_lex)
6838 return 0; // still "effective" having empty domain sequence to delete
6839
6840 DBUG_ASSERT(domain_drop_lex->elements > 0);
6841 mysql_mutex_assert_owner(mysql_bin_log.get_log_lock());
6842
6843 if ((errmsg= get_first_binlog(buf)) != NULL)
6844 goto end;
6845 bzero((char*) &cache, sizeof(cache));
6846 if ((file= open_binlog(&cache, buf, &errmsg)) == (File) -1)
6847 goto end;
6848 errmsg= get_gtid_list_event(&cache, &glev);
6849 end_io_cache(&cache);
6850 mysql_file_close(file, MYF(MY_WME));
6851
6852 DBUG_EXECUTE_IF("inject_binlog_delete_domain_init_error",
6853 errmsg= "injected error";);
6854 if (errmsg)
6855 goto end;
6856 errmsg= rpl_global_gtid_binlog_state.drop_domain(domain_drop_lex,
6857 glev, errbuf);
6858
6859 end:
6860 if (errmsg)
6861 {
6862 if (strlen(errmsg) > 0)
6863 {
6864 my_error(ER_BINLOG_CANT_DELETE_GTID_DOMAIN, MYF(0), errmsg);
6865 rc= -1;
6866 }
6867 else
6868 {
6869 rc= 1;
6870 }
6871 }
6872 delete glev;
6873
6874 return rc;
6875 }
6876
6877 /**
6878 The method is a shortcut of @c rotate() and @c purge().
6879 LOCK_log is acquired prior to rotate and is released after it.
6880
6881 @param force_rotate caller can request the log rotation
6882
6883 @retval
6884 nonzero - error in rotating routine.
6885 */
6886 int MYSQL_BIN_LOG::rotate_and_purge(bool force_rotate,
6887 DYNAMIC_ARRAY *domain_drop_lex)
6888 {
6889 int err_gtid=0, error= 0;
6890 ulong prev_binlog_id;
6891 DBUG_ENTER("MYSQL_BIN_LOG::rotate_and_purge");
6892 bool check_purge= false;
6893
6894 mysql_mutex_lock(&LOCK_log);
6895
6896 DEBUG_SYNC(current_thd, "rotate_after_acquire_LOCK_log");
6897
6898 prev_binlog_id= current_binlog_id;
6899
6900 if ((err_gtid= do_delete_gtid_domain(domain_drop_lex)))
6901 {
6902 // inffective attempt to delete merely skips rotate and purge
6903 if (err_gtid < 0)
6904 error= 1; // otherwise error is propagated the user
6905 }
6906 else if (unlikely((error= rotate(force_rotate, &check_purge))))
6907 check_purge= false;
6908
6909 DEBUG_SYNC(current_thd, "rotate_after_rotate");
6910
6911 /*
6912 NOTE: Run purge_logs wo/ holding LOCK_log because it does not need
6913 the mutex. Otherwise causes various deadlocks.
6914 Explicit binlog rotation must be synchronized with a concurrent
6915 binlog ordered commit, in particular not let binlog
6916 checkpoint notification request until early binlogged
6917 concurrent commits have has been completed.
6918 */
6919 mysql_mutex_lock(&LOCK_after_binlog_sync);
6920 mysql_mutex_unlock(&LOCK_log);
6921 mysql_mutex_lock(&LOCK_commit_ordered);
6922 mysql_mutex_unlock(&LOCK_after_binlog_sync);
6923 mysql_mutex_unlock(&LOCK_commit_ordered);
6924
6925 if (check_purge)
6926 checkpoint_and_purge(prev_binlog_id);
6927
6928 DBUG_RETURN(error);
6929 }
6930
6931 uint MYSQL_BIN_LOG::next_file_id()
6932 {
6933 uint res;
6934 mysql_mutex_lock(&LOCK_log);
6935 res = file_id++;
6936 mysql_mutex_unlock(&LOCK_log);
6937 return res;
6938 }
6939
6940 class CacheWriter: public Log_event_writer
6941 {
6942 public:
6943 size_t remains;
6944
6945 CacheWriter(THD *thd_arg, IO_CACHE *file_arg, bool do_checksum,
6946 Binlog_crypt_data *cr)
6947 : Log_event_writer(file_arg, 0, cr), remains(0), thd(thd_arg),
6948 first(true)
6949 { checksum_len= do_checksum ? BINLOG_CHECKSUM_LEN : 0; }
6950
6951 ~CacheWriter()
6952 { status_var_add(thd->status_var.binlog_bytes_written, bytes_written); }
6953
6954 int write(uchar* pos, size_t len)
6955 {
6956 DBUG_ENTER("CacheWriter::write");
6957 if (first)
6958 write_header(pos, len);
6959 else
6960 write_data(pos, len);
6961
6962 remains -= len;
6963 if ((first= !remains))
6964 write_footer();
6965 DBUG_RETURN(0);
6966 }
6967 private:
6968 THD *thd;
6969 bool first;
6970 };
6971
6972 /*
6973 Write the contents of a cache to the binary log.
6974
6975 SYNOPSIS
6976 write_cache()
6977 thd Current_thread
6978 cache Cache to write to the binary log
6979
6980 DESCRIPTION
6981 Write the contents of the cache to the binary log. The cache will
6982 be reset as a READ_CACHE to be able to read the contents from it.
6983
6984 Reading from the trans cache with possible (per @c binlog_checksum_options)
6985 adding checksum value and then fixing the length and the end_log_pos of
6986 events prior to fill in the binlog cache.
6987 */
6988
6989 int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache)
6990 {
6991 DBUG_ENTER("MYSQL_BIN_LOG::write_cache");
6992
6993 mysql_mutex_assert_owner(&LOCK_log);
6994 if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
6995 DBUG_RETURN(ER_ERROR_ON_WRITE);
6996 size_t length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
6997 size_t val;
6998 size_t end_log_pos_inc= 0; // each event processed adds BINLOG_CHECKSUM_LEN 2 t
6999 uchar header[LOG_EVENT_HEADER_LEN];
7000 CacheWriter writer(thd, &log_file, binlog_checksum_options, &crypto);
7001
7002 if (crypto.scheme)
7003 writer.ctx= alloca(crypto.ctx_size);
7004
7005 // while there is just one alg the following must hold:
7006 DBUG_ASSERT(binlog_checksum_options == BINLOG_CHECKSUM_ALG_OFF ||
7007 binlog_checksum_options == BINLOG_CHECKSUM_ALG_CRC32);
7008
7009 /*
7010 The events in the buffer have incorrect end_log_pos data
7011 (relative to beginning of group rather than absolute),
7012 so we'll recalculate them in situ so the binlog is always
7013 correct, even in the middle of a group. This is possible
7014 because we now know the start position of the group (the
7015 offset of this cache in the log, if you will); all we need
7016 to do is to find all event-headers, and add the position of
7017 the group to the end_log_pos of each event. This is pretty
7018 straight forward, except that we read the cache in segments,
7019 so an event-header might end up on the cache-border and get
7020 split.
7021 */
7022
7023 group= (size_t)my_b_tell(&log_file);
7024 hdr_offs= carry= 0;
7025
7026 do
7027 {
7028 /*
7029 if we only got a partial header in the last iteration,
7030 get the other half now and process a full header.
7031 */
7032 if (unlikely(carry > 0))
7033 {
7034 DBUG_ASSERT(carry < LOG_EVENT_HEADER_LEN);
7035 size_t tail= LOG_EVENT_HEADER_LEN - carry;
7036
7037 /* assemble both halves */
7038 memcpy(&header[carry], (char *)cache->read_pos, tail);
7039
7040 uint32 len= uint4korr(header + EVENT_LEN_OFFSET);
7041 writer.remains= len;
7042
7043 /* fix end_log_pos */
7044 end_log_pos_inc += writer.checksum_len;
7045 val= uint4korr(header + LOG_POS_OFFSET) + group + end_log_pos_inc;
7046 int4store(header + LOG_POS_OFFSET, val);
7047
7048 /* fix len */
7049 len+= writer.checksum_len;
7050 int4store(header + EVENT_LEN_OFFSET, len);
7051
7052 if (writer.write(header, LOG_EVENT_HEADER_LEN))
7053 DBUG_RETURN(ER_ERROR_ON_WRITE);
7054
7055 cache->read_pos+= tail;
7056 length-= tail;
7057 carry= 0;
7058
7059 /* next event header at ... */
7060 hdr_offs= len - LOG_EVENT_HEADER_LEN - writer.checksum_len;
7061 }
7062
7063 /* if there is anything to write, process it. */
7064
7065 if (likely(length > 0))
7066 {
7067 DBUG_EXECUTE_IF("fail_binlog_write_1",
7068 errno= 28; DBUG_RETURN(ER_ERROR_ON_WRITE););
7069 /*
7070 process all event-headers in this (partial) cache.
7071 if next header is beyond current read-buffer,
7072 we'll get it later (though not necessarily in the
7073 very next iteration, just "eventually").
7074 */
7075
7076 if (hdr_offs >= length)
7077 {
7078 if (writer.write(cache->read_pos, length))
7079 DBUG_RETURN(ER_ERROR_ON_WRITE);
7080 }
7081
7082 while (hdr_offs < length)
7083 {
7084 /*
7085 finish off with remains of the last event that crawls
7086 from previous into the current buffer
7087 */
7088 if (writer.remains != 0)
7089 {
7090 if (writer.write(cache->read_pos, hdr_offs))
7091 DBUG_RETURN(ER_ERROR_ON_WRITE);
7092 }
7093
7094 /*
7095 partial header only? save what we can get, process once
7096 we get the rest.
7097 */
7098 if (hdr_offs + LOG_EVENT_HEADER_LEN > length)
7099 {
7100 carry= length - hdr_offs;
7101 memcpy(header, (char *)cache->read_pos + hdr_offs, carry);
7102 length= hdr_offs;
7103 }
7104 else
7105 {
7106 /* we've got a full event-header, and it came in one piece */
7107 uchar *ev= (uchar *)cache->read_pos + hdr_offs;
7108 uint ev_len= uint4korr(ev + EVENT_LEN_OFFSET); // netto len
7109 uchar *log_pos= ev + LOG_POS_OFFSET;
7110
7111 end_log_pos_inc += writer.checksum_len;
7112 /* fix end_log_pos */
7113 val= uint4korr(log_pos) + group + end_log_pos_inc;
7114 int4store(log_pos, val);
7115
7116 /* fix length */
7117 int4store(ev + EVENT_LEN_OFFSET, ev_len + writer.checksum_len);
7118
7119 writer.remains= ev_len;
7120 if (writer.write(ev, MY_MIN(ev_len, length - hdr_offs)))
7121 DBUG_RETURN(ER_ERROR_ON_WRITE);
7122
7123 /* next event header at ... */
7124 hdr_offs += ev_len; // incr by the netto len
7125
7126 DBUG_ASSERT(!writer.checksum_len || writer.remains == 0 || hdr_offs >= length);
7127 }
7128 }
7129
7130 /*
7131 Adjust hdr_offs. Note that it may still point beyond the segment
7132 read in the next iteration; if the current event is very long,
7133 it may take a couple of read-iterations (and subsequent adjustments
7134 of hdr_offs) for it to point into the then-current segment.
7135 If we have a split header (!carry), hdr_offs will be set at the
7136 beginning of the next iteration, overwriting the value we set here:
7137 */
7138 hdr_offs -= length;
7139 }
7140 } while ((length= my_b_fill(cache)));
7141
7142 DBUG_ASSERT(carry == 0);
7143 DBUG_ASSERT(!writer.checksum_len || writer.remains == 0);
7144
7145 DBUG_RETURN(0); // All OK
7146 }
7147
7148 /*
7149 Helper function to get the error code of the query to be binlogged.
7150 */
7151 int query_error_code(THD *thd, bool not_killed)
7152 {
7153 int error;
7154
7155 if (not_killed || (killed_mask_hard(thd->killed) == KILL_BAD_DATA))
7156 {
7157 error= thd->is_error() ? thd->get_stmt_da()->sql_errno() : 0;
7158 if (!error)
7159 return error;
7160
7161 /* thd->get_get_stmt_da()->sql_errno() might be ER_SERVER_SHUTDOWN or
7162 ER_QUERY_INTERRUPTED, So here we need to make sure that error
7163 is not set to these errors when specified not_killed by the
7164 caller.
7165 */
7166 if (error == ER_SERVER_SHUTDOWN || error == ER_QUERY_INTERRUPTED ||
7167 error == ER_NEW_ABORTING_CONNECTION || error == ER_CONNECTION_KILLED)
7168 error= 0;
7169 }
7170 else
7171 {
7172 /* killed status for DELAYED INSERT thread should never be used */
7173 DBUG_ASSERT(!(thd->system_thread & SYSTEM_THREAD_DELAYED_INSERT));
7174 error= thd->killed_errno();
7175 }
7176
7177 return error;
7178 }
7179
7180
7181 bool MYSQL_BIN_LOG::write_incident_already_locked(THD *thd)
7182 {
7183 uint error= 0;
7184 DBUG_ENTER("MYSQL_BIN_LOG::write_incident_already_locked");
7185 Incident incident= INCIDENT_LOST_EVENTS;
7186 Incident_log_event ev(thd, incident, &write_error_msg);
7187
7188 if (likely(is_open()))
7189 {
7190 error= write_event(&ev);
7191 status_var_add(thd->status_var.binlog_bytes_written, ev.data_written);
7192 }
7193
7194 DBUG_RETURN(error);
7195 }
7196
7197
7198 bool MYSQL_BIN_LOG::write_incident(THD *thd)
7199 {
7200 uint error= 0;
7201 my_off_t offset;
7202 bool check_purge= false;
7203 ulong prev_binlog_id;
7204 DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
7205
7206 mysql_mutex_lock(&LOCK_log);
7207 if (likely(is_open()))
7208 {
7209 prev_binlog_id= current_binlog_id;
7210 if (likely(!(error= write_incident_already_locked(thd))) &&
7211 likely(!(error= flush_and_sync(0))))
7212 {
7213 update_binlog_end_pos();
7214 /*
7215 If a transaction with the LOAD DATA statement is divided
7216 into logical mini-transactions (of the 10K rows) and binlog
7217 is rotated, then the last portion of data may be lost due to
7218 wsrep handler re-registration at the boundary of the split.
7219 Since splitting of the LOAD DATA into mini-transactions is
7220 logical, we should not allow these mini-transactions to fall
7221 into separate binlogs. Therefore, it is necessary to prohibit
7222 the rotation of binlog in the middle of processing LOAD DATA:
7223 */
7224 #ifdef WITH_WSREP
7225 if (!thd->wsrep_split_flag)
7226 {
7227 #endif /* WITH_WSREP */
7228 if (unlikely((error= rotate(false, &check_purge))))
7229 check_purge= false;
7230 #ifdef WITH_WSREP
7231 }
7232 #endif /* WITH_WSREP */
7233 }
7234
7235 offset= my_b_tell(&log_file);
7236
7237 update_binlog_end_pos(offset);
7238
7239 /*
7240 Take mutex to protect against a reader seeing partial writes of 64-bit
7241 offset on 32-bit CPUs.
7242 */
7243 mysql_mutex_lock(&LOCK_commit_ordered);
7244 last_commit_pos_offset= offset;
7245 mysql_mutex_unlock(&LOCK_commit_ordered);
7246 mysql_mutex_unlock(&LOCK_log);
7247
7248 if (check_purge)
7249 checkpoint_and_purge(prev_binlog_id);
7250 }
7251 else
7252 {
7253 mysql_mutex_unlock(&LOCK_log);
7254 }
7255
7256 DBUG_RETURN(error);
7257 }
7258
7259 void
7260 MYSQL_BIN_LOG::write_binlog_checkpoint_event_already_locked(const char *name_arg, uint len)
7261 {
7262 my_off_t offset;
7263 Binlog_checkpoint_log_event ev(name_arg, len);
7264 /*
7265 Note that we must sync the binlog checkpoint to disk.
7266 Otherwise a subsequent log purge could delete binlogs that XA recovery
7267 thinks are needed (even though they are not really).
7268 */
7269 if (!write_event(&ev) && !flush_and_sync(0))
7270 {
7271 update_binlog_end_pos();
7272 }
7273 else
7274 {
7275 /*
7276 If we fail to write the checkpoint event, something is probably really
7277 bad with the binlog. We complain in the error log.
7278
7279 Note that failure to write binlog checkpoint does not compromise the
7280 ability to do crash recovery - crash recovery will just have to scan a
7281 bit more of the binlog than strictly necessary.
7282 */
7283 sql_print_error("Failed to write binlog checkpoint event to binary log\n");
7284 }
7285
7286 offset= my_b_tell(&log_file);
7287
7288 update_binlog_end_pos(offset);
7289
7290 /*
7291 Take mutex to protect against a reader seeing partial writes of 64-bit
7292 offset on 32-bit CPUs.
7293 */
7294 mysql_mutex_lock(&LOCK_commit_ordered);
7295 last_commit_pos_offset= offset;
7296 mysql_mutex_unlock(&LOCK_commit_ordered);
7297 }
7298
7299
7300 /**
7301 Write a cached log entry to the binary log.
7302 - To support transaction over replication, we wrap the transaction
7303 with BEGIN/COMMIT or BEGIN/ROLLBACK in the binary log.
7304 We want to write a BEGIN/ROLLBACK block when a non-transactional table
7305 was updated in a transaction which was rolled back. This is to ensure
7306 that the same updates are run on the slave.
7307
7308 @param thd
7309 @param cache The cache to copy to the binlog
7310 @param commit_event The commit event to print after writing the
7311 contents of the cache.
7312 @param incident Defines if an incident event should be created to
7313 notify that some non-transactional changes did
7314 not get into the binlog.
7315
7316 @note
7317 We only come here if there is something in the cache.
7318 @note
7319 The thing in the cache is always a complete transaction.
7320 @note
7321 'cache' needs to be reinitialized after this functions returns.
7322 */
7323
7324 bool
7325 MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd,
7326 binlog_cache_mngr *cache_mngr,
7327 Log_event *end_ev, bool all,
7328 bool using_stmt_cache,
7329 bool using_trx_cache)
7330 {
7331 group_commit_entry entry;
7332 Ha_trx_info *ha_info;
7333 DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog");
7334
7335 /*
7336 Control should not be allowed beyond this point in wsrep_emulate_bin_log
7337 mode. Also, do not write the cached updates to binlog if binary logging is
7338 disabled (log-bin/sql_log_bin).
7339 */
7340 if (wsrep_emulate_bin_log)
7341 {
7342 DBUG_RETURN(0);
7343 }
7344 else if (!(thd->variables.option_bits & OPTION_BIN_LOG))
7345 {
7346 cache_mngr->need_unlog= false;
7347 DBUG_RETURN(0);
7348 }
7349
7350 entry.thd= thd;
7351 entry.cache_mngr= cache_mngr;
7352 entry.error= 0;
7353 entry.all= all;
7354 entry.using_stmt_cache= using_stmt_cache;
7355 entry.using_trx_cache= using_trx_cache;
7356 entry.need_unlog= false;
7357 ha_info= all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
7358
7359 for (; ha_info; ha_info= ha_info->next())
7360 {
7361 if (ha_info->is_started() && ha_info->ht() != binlog_hton &&
7362 !ha_info->ht()->commit_checkpoint_request)
7363 entry.need_unlog= true;
7364 break;
7365 }
7366
7367 entry.end_event= end_ev;
7368 if (cache_mngr->stmt_cache.has_incident() ||
7369 cache_mngr->trx_cache.has_incident())
7370 {
7371 Incident_log_event inc_ev(thd, INCIDENT_LOST_EVENTS, &write_error_msg);
7372 entry.incident_event= &inc_ev;
7373 DBUG_RETURN(write_transaction_to_binlog_events(&entry));
7374 }
7375 else
7376 {
7377 entry.incident_event= NULL;
7378 DBUG_RETURN(write_transaction_to_binlog_events(&entry));
7379 }
7380 }
7381
7382
7383 /*
7384 Put a transaction that is ready to commit in the group commit queue.
7385 The transaction is identified by the ENTRY object passed into this function.
7386
7387 To facilitate group commit for the binlog, we first queue up ourselves in
7388 this function. Then later the first thread to enter the queue waits for
7389 the LOCK_log mutex, and commits for everyone in the queue once it gets the
7390 lock. Any other threads in the queue just wait for the first one to finish
7391 the commit and wake them up. This way, all transactions in the queue get
7392 committed in a single disk operation.
7393
7394 The main work in this function is when the commit in one transaction has
7395 been marked to wait for the commit of another transaction to happen
7396 first. This is used to support in-order parallel replication, where
7397 transactions can execute out-of-order but need to be committed in-order with
7398 how they happened on the master. The waiting of one commit on another needs
7399 to be integrated with the group commit queue, to ensure that the waiting
7400 transaction can participate in the same group commit as the waited-for
7401 transaction.
7402
7403 So when we put a transaction in the queue, we check if there were other
7404 transactions already prepared to commit but just waiting for the first one
7405 to commit. If so, we add those to the queue as well, transitively for all
7406 waiters.
7407
7408 And if a transaction is marked to wait for a prior transaction, but that
7409 prior transaction is already queued for group commit, then we can queue the
7410 new transaction directly to participate in the group commit.
7411
7412 @retval < 0 Error
7413 @retval > 0 If queued as the first entry in the queue (meaning this
7414 is the leader)
7415 @retval 0 Otherwise (queued as participant, leader handles the commit)
7416 */
7417
7418 int
7419 MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry)
7420 {
7421 group_commit_entry *entry, *orig_queue, *last;
7422 wait_for_commit *cur;
7423 wait_for_commit *wfc;
7424 DBUG_ENTER("MYSQL_BIN_LOG::queue_for_group_commit");
7425
7426 /*
7427 Check if we need to wait for another transaction to commit before us.
7428
7429 It is safe to do a quick check without lock first in the case where we do
7430 not have to wait. But if the quick check shows we need to wait, we must do
7431 another safe check under lock, to avoid the race where the other
7432 transaction wakes us up between the check and the wait.
7433 */
7434 wfc= orig_entry->thd->wait_for_commit_ptr;
7435 orig_entry->queued_by_other= false;
7436 if (wfc && wfc->waitee)
7437 {
7438 mysql_mutex_lock(&wfc->LOCK_wait_commit);
7439 /*
7440 Do an extra check here, this time safely under lock.
7441
7442 If waitee->commit_started is set, it means that the transaction we need
7443 to wait for has already queued up for group commit. In this case it is
7444 safe for us to queue up immediately as well, increasing the opprtunities
7445 for group commit. Because waitee has taken the LOCK_prepare_ordered
7446 before setting the flag, so there is no risk that we can queue ahead of
7447 it.
7448 */
7449 if (wfc->waitee && !wfc->waitee->commit_started)
7450 {
7451 PSI_stage_info old_stage;
7452 wait_for_commit *loc_waitee;
7453
7454 /*
7455 By setting wfc->opaque_pointer to our own entry, we mark that we are
7456 ready to commit, but waiting for another transaction to commit before
7457 us.
7458
7459 This other transaction may then take over the commit process for us to
7460 get us included in its own group commit. If this happens, the
7461 queued_by_other flag is set.
7462
7463 Setting this flag may or may not be seen by the other thread, but we
7464 are safe in any case: The other thread will set queued_by_other under
7465 its LOCK_wait_commit, and we will not check queued_by_other only after
7466 we have been woken up.
7467 */
7468 wfc->opaque_pointer= orig_entry;
7469 DEBUG_SYNC(orig_entry->thd, "group_commit_waiting_for_prior");
7470 orig_entry->thd->ENTER_COND(&wfc->COND_wait_commit,
7471 &wfc->LOCK_wait_commit,
7472 &stage_waiting_for_prior_transaction_to_commit,
7473 &old_stage);
7474 while ((loc_waitee= wfc->waitee) && !orig_entry->thd->check_killed(1))
7475 mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit);
7476 wfc->opaque_pointer= NULL;
7477 DBUG_PRINT("info", ("After waiting for prior commit, queued_by_other=%d",
7478 orig_entry->queued_by_other));
7479
7480 if (loc_waitee)
7481 {
7482 /* Wait terminated due to kill. */
7483 mysql_mutex_lock(&loc_waitee->LOCK_wait_commit);
7484 if (loc_waitee->wakeup_subsequent_commits_running ||
7485 orig_entry->queued_by_other)
7486 {
7487 /* Our waitee is already waking us up, so ignore the kill. */
7488 mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
7489 do
7490 {
7491 mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit);
7492 } while (wfc->waitee);
7493 }
7494 else
7495 {
7496 /* We were killed, so remove us from the list of waitee. */
7497 wfc->remove_from_list(&loc_waitee->subsequent_commits_list);
7498 mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
7499 wfc->waitee= NULL;
7500
7501 orig_entry->thd->EXIT_COND(&old_stage);
7502 /* Interrupted by kill. */
7503 DEBUG_SYNC(orig_entry->thd, "group_commit_waiting_for_prior_killed");
7504 wfc->wakeup_error= orig_entry->thd->killed_errno();
7505 if (!wfc->wakeup_error)
7506 wfc->wakeup_error= ER_QUERY_INTERRUPTED;
7507 my_message(wfc->wakeup_error,
7508 ER_THD(orig_entry->thd, wfc->wakeup_error), MYF(0));
7509 DBUG_RETURN(-1);
7510 }
7511 }
7512 orig_entry->thd->EXIT_COND(&old_stage);
7513 }
7514 else
7515 mysql_mutex_unlock(&wfc->LOCK_wait_commit);
7516 }
7517 /*
7518 If the transaction we were waiting for has already put us into the group
7519 commit queue (and possibly already done the entire binlog commit for us),
7520 then there is nothing else to do.
7521 */
7522 if (orig_entry->queued_by_other)
7523 DBUG_RETURN(0);
7524
7525 if (wfc && wfc->wakeup_error)
7526 {
7527 my_error(ER_PRIOR_COMMIT_FAILED, MYF(0));
7528 DBUG_RETURN(-1);
7529 }
7530
7531 /* Now enqueue ourselves in the group commit queue. */
7532 DEBUG_SYNC(orig_entry->thd, "commit_before_enqueue");
7533 orig_entry->thd->clear_wakeup_ready();
7534 mysql_mutex_lock(&LOCK_prepare_ordered);
7535 orig_queue= group_commit_queue;
7536
7537 /*
7538 Iteratively process everything added to the queue, looking for waiters,
7539 and their waiters, and so on. If a waiter is ready to commit, we
7540 immediately add it to the queue, and mark it as queued_by_other.
7541
7542 This would be natural to do with recursion, but we want to avoid
7543 potentially unbounded recursion blowing the C stack, so we use the list
7544 approach instead.
7545
7546 We keep a list of the group_commit_entry of all the waiters that need to
7547 be processed. Initially this list contains only the entry passed into this
7548 function.
7549
7550 We process entries in the list one by one. The element currently being
7551 processed is pointed to by `entry`, and the element at the end of the list
7552 is pointed to by `last` (we do not use NULL to terminate the list).
7553
7554 As we process an entry, any waiters for that entry are added at the end of
7555 the list, to be processed in subsequent iterations. The the entry is added
7556 to the group_commit_queue. This continues until the list is exhausted,
7557 with all entries ever added eventually processed.
7558
7559 The end result is a breath-first traversal of the tree of waiters,
7560 re-using the `next' pointers of the group_commit_entry objects in place of
7561 extra stack space in a recursive traversal.
7562
7563 The temporary list linked through these `next' pointers is not used by the
7564 caller or any other function; it only exists while doing the iterative
7565 tree traversal. After, all the processed entries are linked into the
7566 group_commit_queue.
7567 */
7568
7569 cur= wfc;
7570 last= orig_entry;
7571 entry= orig_entry;
7572 for (;;)
7573 {
7574 group_commit_entry *next_entry;
7575
7576 if (entry->cache_mngr->using_xa)
7577 {
7578 DEBUG_SYNC(entry->thd, "commit_before_prepare_ordered");
7579 run_prepare_ordered(entry->thd, entry->all);
7580 DEBUG_SYNC(entry->thd, "commit_after_prepare_ordered");
7581 }
7582
7583 if (cur)
7584 {
7585 /*
7586 Now that we have taken LOCK_prepare_ordered and will queue up in the
7587 group commit queue, it is safe for following transactions to queue
7588 themselves. We will grab here any transaction that is now ready to
7589 queue up, but after that, more transactions may become ready while the
7590 leader is waiting to start the group commit. So set the flag
7591 `commit_started', so that later transactions can still participate in
7592 the group commit..
7593 */
7594 cur->commit_started= true;
7595
7596 /*
7597 Check if this transaction has other transaction waiting for it to
7598 commit.
7599
7600 If so, process the waiting transactions, and their waiters and so on,
7601 transitively.
7602 */
7603 if (cur->subsequent_commits_list)
7604 {
7605 wait_for_commit *waiter, **waiter_ptr;
7606
7607 mysql_mutex_lock(&cur->LOCK_wait_commit);
7608 /*
7609 Grab the list, now safely under lock, and process it if still
7610 non-empty.
7611 */
7612 waiter= cur->subsequent_commits_list;
7613 waiter_ptr= &cur->subsequent_commits_list;
7614 while (waiter)
7615 {
7616 wait_for_commit *next_waiter= waiter->next_subsequent_commit;
7617 group_commit_entry *entry2=
7618 (group_commit_entry *)waiter->opaque_pointer;
7619 if (entry2)
7620 {
7621 /*
7622 This is another transaction ready to be written to the binary
7623 log. We can put it into the queue directly, without needing a
7624 separate context switch to the other thread. We just set a flag
7625 so that the other thread will know when it wakes up that it was
7626 already processed.
7627
7628 So remove it from the list of our waiters, and instead put it at
7629 the end of the list to be processed in a subsequent iteration of
7630 the outer loop.
7631 */
7632 *waiter_ptr= next_waiter;
7633 entry2->queued_by_other= true;
7634 last->next= entry2;
7635 last= entry2;
7636 /*
7637 As a small optimisation, we do not actually need to set
7638 entry2->next to NULL, as we can use the pointer `last' to check
7639 for end-of-list.
7640 */
7641 }
7642 else
7643 {
7644 /*
7645 This transaction is not ready to participate in the group commit
7646 yet, so leave it in the waiter list. It might join the group
7647 commit later, if it completes soon enough to do so (it will see
7648 our wfc->commit_started flag set), or it might commit later in a
7649 later group commit.
7650 */
7651 waiter_ptr= &waiter->next_subsequent_commit;
7652 }
7653 waiter= next_waiter;
7654 }
7655 mysql_mutex_unlock(&cur->LOCK_wait_commit);
7656 }
7657 }
7658
7659 /*
7660 Handle the heuristics that if another transaction is waiting for this
7661 transaction (or if it does so later), then we want to trigger group
7662 commit immediately, without waiting for the binlog_commit_wait_usec
7663 timeout to expire.
7664 */
7665 entry->thd->waiting_on_group_commit= true;
7666
7667 /* Add the entry to the group commit queue. */
7668 next_entry= entry->next;
7669 entry->next= group_commit_queue;
7670 group_commit_queue= entry;
7671 if (entry == last)
7672 break;
7673 /*
7674 Move to the next entry in the flattened list of waiting transactions
7675 that still need to be processed transitively.
7676 */
7677 entry= next_entry;
7678 DBUG_ASSERT(entry != NULL);
7679 cur= entry->thd->wait_for_commit_ptr;
7680 }
7681
7682 if (opt_binlog_commit_wait_count > 0 && orig_queue != NULL)
7683 mysql_cond_signal(&COND_prepare_ordered);
7684 mysql_mutex_unlock(&LOCK_prepare_ordered);
7685 DEBUG_SYNC(orig_entry->thd, "commit_after_release_LOCK_prepare_ordered");
7686
7687 DBUG_PRINT("info", ("Queued for group commit as %s",
7688 (orig_queue == NULL) ? "leader" : "participant"));
7689 DBUG_RETURN(orig_queue == NULL);
7690 }
7691
7692 bool
7693 MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry)
7694 {
7695 int is_leader= queue_for_group_commit(entry);
7696
7697 /*
7698 The first in the queue handles group commit for all; the others just wait
7699 to be signalled when group commit is done.
7700 */
7701 if (is_leader < 0)
7702 return true; /* Error */
7703 else if (is_leader)
7704 trx_group_commit_leader(entry);
7705 else if (!entry->queued_by_other)
7706 {
7707 DEBUG_SYNC(entry->thd, "after_semisync_queue");
7708
7709 entry->thd->wait_for_wakeup_ready();
7710 }
7711 else
7712 {
7713 /*
7714 If we were queued by another prior commit, then we are woken up
7715 only when the leader has already completed the commit for us.
7716 So nothing to do here then.
7717 */
7718 }
7719
7720 if (!opt_optimize_thread_scheduling)
7721 {
7722 /* For the leader, trx_group_commit_leader() already took the lock. */
7723 if (!is_leader)
7724 mysql_mutex_lock(&LOCK_commit_ordered);
7725
7726 DEBUG_SYNC(entry->thd, "commit_loop_entry_commit_ordered");
7727 ++num_commits;
7728 if (entry->cache_mngr->using_xa && !entry->error)
7729 run_commit_ordered(entry->thd, entry->all);
7730
7731 group_commit_entry *next= entry->next;
7732 if (!next)
7733 {
7734 group_commit_queue_busy= FALSE;
7735 mysql_cond_signal(&COND_queue_busy);
7736 DEBUG_SYNC(entry->thd, "commit_after_group_run_commit_ordered");
7737 }
7738 mysql_mutex_unlock(&LOCK_commit_ordered);
7739 entry->thd->wakeup_subsequent_commits(entry->error);
7740
7741 if (next)
7742 {
7743 /*
7744 Wake up the next thread in the group commit.
7745
7746 The next thread can be waiting in two different ways, depending on
7747 whether it put itself in the queue, or if it was put in queue by us
7748 because it had to wait for us to commit first.
7749
7750 So execute the appropriate wakeup, identified by the queued_by_other
7751 field.
7752 */
7753 if (next->queued_by_other)
7754 next->thd->wait_for_commit_ptr->wakeup(entry->error);
7755 else
7756 next->thd->signal_wakeup_ready();
7757 }
7758 else
7759 {
7760 /*
7761 If we rotated the binlog, and if we are using the unoptimized thread
7762 scheduling where every thread runs its own commit_ordered(), then we
7763 must do the commit checkpoint and log purge here, after all
7764 commit_ordered() calls have finished, and locks have been released.
7765 */
7766 if (entry->check_purge)
7767 checkpoint_and_purge(entry->binlog_id);
7768 }
7769
7770 }
7771
7772 if (likely(!entry->error))
7773 return entry->thd->wait_for_prior_commit();
7774
7775 switch (entry->error)
7776 {
7777 case ER_ERROR_ON_WRITE:
7778 my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, entry->commit_errno);
7779 break;
7780 case ER_ERROR_ON_READ:
7781 my_error(ER_ERROR_ON_READ, MYF(ME_NOREFRESH),
7782 entry->error_cache->file_name, entry->commit_errno);
7783 break;
7784 default:
7785 /*
7786 There are not (and should not be) any errors thrown not covered above.
7787 But just in case one is added later without updating the above switch
7788 statement, include a catch-all.
7789 */
7790 my_printf_error(entry->error,
7791 "Error writing transaction to binary log: %d",
7792 MYF(ME_NOREFRESH), entry->error);
7793 }
7794
7795 /*
7796 Since we return error, this transaction XID will not be committed, so
7797 we need to mark it as not needed for recovery (unlog() is not called
7798 for a transaction if log_xid() fails).
7799 */
7800 if (entry->cache_mngr->using_xa && entry->cache_mngr->xa_xid &&
7801 entry->cache_mngr->need_unlog)
7802 mark_xid_done(entry->cache_mngr->binlog_id, true);
7803
7804 return 1;
7805 }
7806
7807 /*
7808 Do binlog group commit as the lead thread.
7809
7810 This must be called when this statement/transaction is queued at the start of
7811 the group_commit_queue. It will wait to obtain the LOCK_log mutex, then group
7812 commit all the transactions in the queue (more may have entered while waiting
7813 for LOCK_log). After commit is done, all other threads in the queue will be
7814 signalled.
7815
7816 */
7817 void
7818 MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
7819 {
7820 uint xid_count= 0;
7821 my_off_t UNINIT_VAR(commit_offset);
7822 group_commit_entry *current, *last_in_queue;
7823 group_commit_entry *queue= NULL;
7824 bool check_purge= false;
7825 ulong UNINIT_VAR(binlog_id);
7826 uint64 commit_id;
7827 DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader");
7828
7829 {
7830 DBUG_EXECUTE_IF("inject_binlog_commit_before_get_LOCK_log",
7831 DBUG_ASSERT(!debug_sync_set_action(leader->thd, STRING_WITH_LEN
7832 ("commit_before_get_LOCK_log SIGNAL waiting WAIT_FOR cont TIMEOUT 1")));
7833 );
7834 /*
7835 Lock the LOCK_log(), and once we get it, collect any additional writes
7836 that queued up while we were waiting.
7837 */
7838 DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_log");
7839 mysql_mutex_lock(&LOCK_log);
7840 DEBUG_SYNC(leader->thd, "commit_after_get_LOCK_log");
7841
7842 mysql_mutex_lock(&LOCK_prepare_ordered);
7843 if (opt_binlog_commit_wait_count)
7844 wait_for_sufficient_commits();
7845 /*
7846 Note that wait_for_sufficient_commits() may have released and
7847 re-acquired the LOCK_log and LOCK_prepare_ordered if it needed to wait.
7848 */
7849 current= group_commit_queue;
7850 group_commit_queue= NULL;
7851 mysql_mutex_unlock(&LOCK_prepare_ordered);
7852 binlog_id= current_binlog_id;
7853
7854 /* As the queue is in reverse order of entering, reverse it. */
7855 last_in_queue= current;
7856 while (current)
7857 {
7858 group_commit_entry *next= current->next;
7859 /*
7860 Now that group commit is started, we can clear the flag; there is no
7861 longer any use in waiters on this commit trying to trigger it early.
7862 */
7863 current->thd->waiting_on_group_commit= false;
7864 current->next= queue;
7865 queue= current;
7866 current= next;
7867 }
7868 DBUG_ASSERT(leader == queue /* the leader should be first in queue */);
7869
7870 /* Now we have in queue the list of transactions to be committed in order. */
7871 }
7872
7873 DBUG_ASSERT(is_open());
7874 if (likely(is_open())) // Should always be true
7875 {
7876 commit_id= (last_in_queue == leader ? 0 : (uint64)leader->thd->query_id);
7877 DBUG_EXECUTE_IF("binlog_force_commit_id",
7878 {
7879 const LEX_CSTRING commit_name= { STRING_WITH_LEN("commit_id") };
7880 bool null_value;
7881 user_var_entry *entry=
7882 (user_var_entry*) my_hash_search(&leader->thd->user_vars,
7883 (uchar*) commit_name.str,
7884 commit_name.length);
7885 commit_id= entry->val_int(&null_value);
7886 });
7887 /*
7888 Commit every transaction in the queue.
7889
7890 Note that we are doing this in a different thread than the one running
7891 the transaction! So we are limited in the operations we can do. In
7892 particular, we cannot call my_error() on behalf of a transaction, as
7893 that obtains the THD from thread local storage. Instead, we must set
7894 current->error and let the thread do the error reporting itself once
7895 we wake it up.
7896 */
7897 for (current= queue; current != NULL; current= current->next)
7898 {
7899 set_current_thd(current->thd);
7900 binlog_cache_mngr *cache_mngr= current->cache_mngr;
7901
7902 /*
7903 We already checked before that at least one cache is non-empty; if both
7904 are empty we would have skipped calling into here.
7905 */
7906 DBUG_ASSERT(!cache_mngr->stmt_cache.empty() || !cache_mngr->trx_cache.empty());
7907
7908 if (unlikely((current->error= write_transaction_or_stmt(current,
7909 commit_id))))
7910 current->commit_errno= errno;
7911
7912 strmake_buf(cache_mngr->last_commit_pos_file, log_file_name);
7913 commit_offset= my_b_write_tell(&log_file);
7914 cache_mngr->last_commit_pos_offset= commit_offset;
7915 if (cache_mngr->using_xa && cache_mngr->xa_xid)
7916 {
7917 /*
7918 If all storage engines support commit_checkpoint_request(), then we
7919 do not need to keep track of when this XID is durably committed.
7920 Instead we will just ask the storage engine to durably commit all its
7921 XIDs when we rotate a binlog file.
7922 */
7923 if (current->need_unlog)
7924 {
7925 xid_count++;
7926 cache_mngr->need_unlog= true;
7927 cache_mngr->binlog_id= binlog_id;
7928 }
7929 else
7930 cache_mngr->need_unlog= false;
7931
7932 cache_mngr->delayed_error= false;
7933 }
7934 }
7935 set_current_thd(leader->thd);
7936
7937 bool synced= 0;
7938 if (unlikely(flush_and_sync(&synced)))
7939 {
7940 for (current= queue; current != NULL; current= current->next)
7941 {
7942 if (!current->error)
7943 {
7944 current->error= ER_ERROR_ON_WRITE;
7945 current->commit_errno= errno;
7946 current->error_cache= NULL;
7947 }
7948 }
7949 }
7950 else
7951 {
7952 bool any_error= false;
7953
7954 mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
7955 mysql_mutex_assert_owner(&LOCK_log);
7956 mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync);
7957 mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
7958
7959 for (current= queue; current != NULL; current= current->next)
7960 {
7961 #ifdef HAVE_REPLICATION
7962 if (likely(!current->error) &&
7963 unlikely(repl_semisync_master.
7964 report_binlog_update(current->thd,
7965 current->cache_mngr->
7966 last_commit_pos_file,
7967 current->cache_mngr->
7968 last_commit_pos_offset)))
7969 {
7970 current->error= ER_ERROR_ON_WRITE;
7971 current->commit_errno= -1;
7972 current->error_cache= NULL;
7973 any_error= true;
7974 }
7975 #endif
7976 }
7977
7978 /*
7979 update binlog_end_pos so it can be read by dump thread
7980 Note: must be _after_ the RUN_HOOK(after_flush) or else
7981 semi-sync might not have put the transaction into
7982 it's list before dump-thread tries to send it
7983 */
7984 update_binlog_end_pos(commit_offset);
7985
7986 if (unlikely(any_error))
7987 sql_print_error("Failed to run 'after_flush' hooks");
7988 }
7989
7990 /*
7991 If any commit_events are Xid_log_event, increase the number of pending
7992 XIDs in current binlog (it's decreased in ::unlog()). When the count in
7993 a (not active) binlog file reaches zero, we know that it is no longer
7994 needed in XA recovery, and we can log a new binlog checkpoint event.
7995 */
7996 if (xid_count > 0)
7997 {
7998 mark_xids_active(binlog_id, xid_count);
7999 }
8000
8001 /*
8002 If a transaction with the LOAD DATA statement is divided
8003 into logical mini-transactions (of the 10K rows) and binlog
8004 is rotated, then the last portion of data may be lost due to
8005 wsrep handler re-registration at the boundary of the split.
8006 Since splitting of the LOAD DATA into mini-transactions is
8007 logical, we should not allow these mini-transactions to fall
8008 into separate binlogs. Therefore, it is necessary to prohibit
8009 the rotation of binlog in the middle of processing LOAD DATA:
8010 */
8011 #ifdef WITH_WSREP
8012 if (!leader->thd->wsrep_split_flag)
8013 {
8014 #endif /* WITH_WSREP */
8015 if (rotate(false, &check_purge))
8016 {
8017 /*
8018 If we fail to rotate, which thread should get the error?
8019 We give the error to the leader, as any my_error() thrown inside
8020 rotate() will have been registered for the leader THD.
8021
8022 However we must not return error from here - that would cause
8023 ha_commit_trans() to abort and rollback the transaction, which would
8024 leave an inconsistent state with the transaction committed in the
8025 binlog but rolled back in the engine.
8026
8027 Instead set a flag so that we can return error later, from unlog(),
8028 when the transaction has been safely committed in the engine.
8029 */
8030 leader->cache_mngr->delayed_error= true;
8031 my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, errno);
8032 check_purge= false;
8033 }
8034 #ifdef WITH_WSREP
8035 }
8036 #endif /* WITH_WSREP */
8037 /* In case of binlog rotate, update the correct current binlog offset. */
8038 commit_offset= my_b_write_tell(&log_file);
8039 }
8040
8041 DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_after_binlog_sync");
8042 mysql_mutex_lock(&LOCK_after_binlog_sync);
8043 /*
8044 We cannot unlock LOCK_log until we have locked LOCK_after_binlog_sync;
8045 otherwise scheduling could allow the next group commit to run ahead of us,
8046 messing up the order of commit_ordered() calls. But as soon as
8047 LOCK_after_binlog_sync is obtained, we can let the next group commit start.
8048 */
8049 mysql_mutex_unlock(&LOCK_log);
8050
8051 DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_log");
8052
8053 /*
8054 Loop through threads and run the binlog_sync hook
8055 */
8056 {
8057 mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
8058 mysql_mutex_assert_not_owner(&LOCK_log);
8059 mysql_mutex_assert_owner(&LOCK_after_binlog_sync);
8060 mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
8061
8062 bool first __attribute__((unused))= true;
8063 bool last __attribute__((unused));
8064 for (current= queue; current != NULL; current= current->next)
8065 {
8066 last= current->next == NULL;
8067 #ifdef HAVE_REPLICATION
8068 if (likely(!current->error))
8069 current->error=
8070 repl_semisync_master.wait_after_sync(current->cache_mngr->
8071 last_commit_pos_file,
8072 current->cache_mngr->
8073 last_commit_pos_offset);
8074 #endif
8075 first= false;
8076 }
8077 }
8078
8079 DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered");
8080
8081 mysql_mutex_lock(&LOCK_commit_ordered);
8082 DBUG_EXECUTE_IF("crash_before_engine_commit",
8083 {
8084 DBUG_SUICIDE();
8085 });
8086 last_commit_pos_offset= commit_offset;
8087
8088 /*
8089 Unlock LOCK_after_binlog_sync only *after* LOCK_commit_ordered has been
8090 acquired so that groups can not reorder for the different stages of
8091 the group commit procedure.
8092 */
8093 mysql_mutex_unlock(&LOCK_after_binlog_sync);
8094 DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_after_binlog_sync");
8095 ++num_group_commits;
8096
8097 if (!opt_optimize_thread_scheduling)
8098 {
8099 /*
8100 If we want to run commit_ordered() each in the transaction's own thread
8101 context, then we need to mark the queue reserved; we need to finish all
8102 threads in one group commit before the next group commit can be allowed
8103 to proceed, and we cannot unlock a simple pthreads mutex in a different
8104 thread from the one that locked it.
8105 */
8106
8107 while (group_commit_queue_busy)
8108 mysql_cond_wait(&COND_queue_busy, &LOCK_commit_ordered);
8109 group_commit_queue_busy= TRUE;
8110
8111 /*
8112 Set these so parent can run checkpoint_and_purge() in last thread.
8113 (When using optimized thread scheduling, we run checkpoint_and_purge()
8114 in this function, so parent does not need to and we need not set these
8115 values).
8116 */
8117 last_in_queue->check_purge= check_purge;
8118 last_in_queue->binlog_id= binlog_id;
8119
8120 /* Note that we return with LOCK_commit_ordered locked! */
8121 DBUG_VOID_RETURN;
8122 }
8123
8124 /*
8125 Wakeup each participant waiting for our group commit, first calling the
8126 commit_ordered() methods for any transactions doing 2-phase commit.
8127 */
8128 current= queue;
8129 while (current != NULL)
8130 {
8131 group_commit_entry *next;
8132
8133 DEBUG_SYNC(leader->thd, "commit_loop_entry_commit_ordered");
8134 ++num_commits;
8135 if (current->cache_mngr->using_xa && likely(!current->error) &&
8136 DBUG_EVALUATE_IF("skip_commit_ordered", 0, 1))
8137 run_commit_ordered(current->thd, current->all);
8138 current->thd->wakeup_subsequent_commits(current->error);
8139
8140 /*
8141 Careful not to access current->next after waking up the other thread! As
8142 it may change immediately after wakeup.
8143 */
8144 next= current->next;
8145 if (current != leader) // Don't wake up ourself
8146 {
8147 if (current->queued_by_other)
8148 current->thd->wait_for_commit_ptr->wakeup(current->error);
8149 else
8150 current->thd->signal_wakeup_ready();
8151 }
8152 current= next;
8153 }
8154 DEBUG_SYNC(leader->thd, "commit_after_group_run_commit_ordered");
8155 mysql_mutex_unlock(&LOCK_commit_ordered);
8156 DEBUG_SYNC(leader->thd, "commit_after_group_release_commit_ordered");
8157
8158 if (check_purge)
8159 checkpoint_and_purge(binlog_id);
8160
8161 DBUG_VOID_RETURN;
8162 }
8163
8164
8165 int
8166 MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry,
8167 uint64 commit_id)
8168 {
8169 binlog_cache_mngr *mngr= entry->cache_mngr;
8170 DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_or_stmt");
8171
8172 if (write_gtid_event(entry->thd, false, entry->using_trx_cache, commit_id))
8173 DBUG_RETURN(ER_ERROR_ON_WRITE);
8174
8175 if (entry->using_stmt_cache && !mngr->stmt_cache.empty() &&
8176 write_cache(entry->thd, mngr->get_binlog_cache_log(FALSE)))
8177 {
8178 entry->error_cache= &mngr->stmt_cache.cache_log;
8179 DBUG_RETURN(ER_ERROR_ON_WRITE);
8180 }
8181
8182 if (entry->using_trx_cache && !mngr->trx_cache.empty())
8183 {
8184 DBUG_EXECUTE_IF("crash_before_writing_xid",
8185 {
8186 if ((write_cache(entry->thd,
8187 mngr->get_binlog_cache_log(TRUE))))
8188 DBUG_PRINT("info", ("error writing binlog cache"));
8189 else
8190 flush_and_sync(0);
8191
8192 DBUG_PRINT("info", ("crashing before writing xid"));
8193 DBUG_SUICIDE();
8194 });
8195
8196 if (write_cache(entry->thd, mngr->get_binlog_cache_log(TRUE)))
8197 {
8198 entry->error_cache= &mngr->trx_cache.cache_log;
8199 DBUG_RETURN(ER_ERROR_ON_WRITE);
8200 }
8201 }
8202
8203 DBUG_EXECUTE_IF("inject_error_writing_xid",
8204 {
8205 entry->error_cache= NULL;
8206 errno= 28;
8207 DBUG_RETURN(ER_ERROR_ON_WRITE);
8208 });
8209
8210 if (write_event(entry->end_event))
8211 {
8212 entry->error_cache= NULL;
8213 DBUG_RETURN(ER_ERROR_ON_WRITE);
8214 }
8215 status_var_add(entry->thd->status_var.binlog_bytes_written,
8216 entry->end_event->data_written);
8217
8218 if (entry->incident_event)
8219 {
8220 if (write_event(entry->incident_event))
8221 {
8222 entry->error_cache= NULL;
8223 DBUG_RETURN(ER_ERROR_ON_WRITE);
8224 }
8225 }
8226
8227 if (unlikely(mngr->get_binlog_cache_log(FALSE)->error))
8228 {
8229 entry->error_cache= &mngr->stmt_cache.cache_log;
8230 DBUG_RETURN(ER_ERROR_ON_WRITE);
8231 }
8232 if (unlikely(mngr->get_binlog_cache_log(TRUE)->error)) // Error on read
8233 {
8234 entry->error_cache= &mngr->trx_cache.cache_log;
8235 DBUG_RETURN(ER_ERROR_ON_WRITE);
8236 }
8237
8238 DBUG_RETURN(0);
8239 }
8240
8241
8242 /*
8243 Wait for sufficient commits to queue up for group commit, according to the
8244 values of binlog_commit_wait_count and binlog_commit_wait_usec.
8245
8246 Note that this function may release and re-acquire LOCK_log and
8247 LOCK_prepare_ordered if it needs to wait.
8248 */
8249
8250 void
8251 MYSQL_BIN_LOG::wait_for_sufficient_commits()
8252 {
8253 size_t count;
8254 group_commit_entry *e;
8255 group_commit_entry *last_head;
8256 struct timespec wait_until;
8257
8258 mysql_mutex_assert_owner(&LOCK_log);
8259 mysql_mutex_assert_owner(&LOCK_prepare_ordered);
8260
8261 for (e= last_head= group_commit_queue, count= 0; e; e= e->next)
8262 {
8263 if (++count >= opt_binlog_commit_wait_count)
8264 {
8265 group_commit_trigger_count++;
8266 return;
8267 }
8268 if (unlikely(e->thd->has_waiter))
8269 {
8270 group_commit_trigger_lock_wait++;
8271 return;
8272 }
8273 }
8274
8275 mysql_mutex_unlock(&LOCK_log);
8276 set_timespec_nsec(wait_until, (ulonglong)1000*opt_binlog_commit_wait_usec);
8277
8278 for (;;)
8279 {
8280 int err;
8281 group_commit_entry *head;
8282
8283 err= mysql_cond_timedwait(&COND_prepare_ordered, &LOCK_prepare_ordered,
8284 &wait_until);
8285 if (err == ETIMEDOUT)
8286 {
8287 group_commit_trigger_timeout++;
8288 break;
8289 }
8290 if (unlikely(last_head->thd->has_waiter))
8291 {
8292 group_commit_trigger_lock_wait++;
8293 break;
8294 }
8295 head= group_commit_queue;
8296 for (e= head; e && e != last_head; e= e->next)
8297 {
8298 ++count;
8299 if (unlikely(e->thd->has_waiter))
8300 {
8301 group_commit_trigger_lock_wait++;
8302 goto after_loop;
8303 }
8304 }
8305 if (count >= opt_binlog_commit_wait_count)
8306 {
8307 group_commit_trigger_count++;
8308 break;
8309 }
8310 last_head= head;
8311 }
8312 after_loop:
8313
8314 /*
8315 We must not wait for LOCK_log while holding LOCK_prepare_ordered.
8316 LOCK_log can be held for long periods (eg. we do I/O under it), while
8317 LOCK_prepare_ordered must only be held for short periods.
8318
8319 In addition, waiting for LOCK_log while holding LOCK_prepare_ordered would
8320 violate locking order of LOCK_log-before-LOCK_prepare_ordered. This could
8321 cause SAFEMUTEX warnings (even if it cannot actually deadlock with current
8322 code, as there can be at most one group commit leader thread at a time).
8323
8324 So release and re-acquire LOCK_prepare_ordered if we need to wait for the
8325 LOCK_log.
8326 */
8327 if (mysql_mutex_trylock(&LOCK_log))
8328 {
8329 mysql_mutex_unlock(&LOCK_prepare_ordered);
8330 mysql_mutex_lock(&LOCK_log);
8331 mysql_mutex_lock(&LOCK_prepare_ordered);
8332 }
8333 }
8334
8335
8336 void
8337 MYSQL_BIN_LOG::binlog_trigger_immediate_group_commit()
8338 {
8339 group_commit_entry *head;
8340 mysql_mutex_assert_owner(&LOCK_prepare_ordered);
8341 head= group_commit_queue;
8342 if (head)
8343 {
8344 head->thd->has_waiter= true;
8345 mysql_cond_signal(&COND_prepare_ordered);
8346 }
8347 }
8348
8349
8350 /*
8351 This function is called when a transaction T1 goes to wait for another
8352 transaction T2. It is used to cut short any binlog group commit delay from
8353 --binlog-commit-wait-count in the case where another transaction is stalled
8354 on the wait due to conflicting row locks.
8355
8356 If T2 is already ready to group commit, any waiting group commit will be
8357 signalled to proceed immediately. Otherwise, a flag will be set in T2, and
8358 when T2 later becomes ready, immediate group commit will be triggered.
8359 */
8360 void
8361 binlog_report_wait_for(THD *thd1, THD *thd2)
8362 {
8363 if (opt_binlog_commit_wait_count == 0)
8364 return;
8365 mysql_mutex_lock(&LOCK_prepare_ordered);
8366 thd2->has_waiter= true;
8367 if (thd2->waiting_on_group_commit)
8368 mysql_bin_log.binlog_trigger_immediate_group_commit();
8369 mysql_mutex_unlock(&LOCK_prepare_ordered);
8370 }
8371
8372
8373 /**
8374 Wait until we get a signal that the relay log has been updated.
8375
8376 @param thd Thread variable
8377
8378 @note
8379 One must have a lock on LOCK_log before calling this function.
8380 This lock will be released before return! That's required by
8381 THD::enter_cond() (see NOTES in sql_class.h).
8382 */
8383
8384 void MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd)
8385 {
8386 PSI_stage_info old_stage;
8387 DBUG_ENTER("wait_for_update_relay_log");
8388
8389 mysql_mutex_assert_owner(&LOCK_log);
8390 thd->ENTER_COND(&COND_relay_log_updated, &LOCK_log,
8391 &stage_slave_has_read_all_relay_log,
8392 &old_stage);
8393 mysql_cond_wait(&COND_relay_log_updated, &LOCK_log);
8394 thd->EXIT_COND(&old_stage);
8395 DBUG_VOID_RETURN;
8396 }
8397
8398 /**
8399 Wait until we get a signal that the binary log has been updated.
8400 Applies to master only.
8401
8402 NOTES
8403 @param[in] thd a THD struct
8404 @param[in] timeout a pointer to a timespec;
8405 NULL means to wait w/o timeout.
8406 @retval 0 if got signalled on update
8407 @retval non-0 if wait timeout elapsed
8408 @note
8409 LOCK_log must be taken before calling this function.
8410 LOCK_log is being released while the thread is waiting.
8411 LOCK_log is released by the caller.
8412 */
8413
8414 int MYSQL_BIN_LOG::wait_for_update_binlog_end_pos(THD* thd,
8415 struct timespec *timeout)
8416 {
8417 int ret= 0;
8418 DBUG_ENTER("wait_for_update_binlog_end_pos");
8419
8420 thd_wait_begin(thd, THD_WAIT_BINLOG);
8421 mysql_mutex_assert_owner(get_binlog_end_pos_lock());
8422 if (!timeout)
8423 mysql_cond_wait(&COND_bin_log_updated, get_binlog_end_pos_lock());
8424 else
8425 ret= mysql_cond_timedwait(&COND_bin_log_updated, get_binlog_end_pos_lock(),
8426 timeout);
8427 thd_wait_end(thd);
8428 DBUG_RETURN(ret);
8429 }
8430
8431
8432 /**
8433 Close the log file.
8434
8435 @param exiting Bitmask for one or more of the following bits:
8436 - LOG_CLOSE_INDEX : if we should close the index file
8437 - LOG_CLOSE_TO_BE_OPENED : if we intend to call open
8438 at once after close.
8439 - LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
8440 - LOG_CLOSE_DELAYED_CLOSE : do not yet close the file and clear the
8441 LOG_EVENT_BINLOG_IN_USE_F flag
8442
8443 @note
8444 One can do an open on the object at once after doing a close.
8445 The internal structures are not freed until cleanup() is called
8446 */
8447
8448 void MYSQL_BIN_LOG::close(uint exiting)
8449 { // One can't set log_type here!
8450 bool failed_to_save_state= false;
8451 DBUG_ENTER("MYSQL_BIN_LOG::close");
8452 DBUG_PRINT("enter",("exiting: %d", (int) exiting));
8453
8454 mysql_mutex_assert_owner(&LOCK_log);
8455
8456 if (log_state == LOG_OPENED)
8457 {
8458 #ifdef HAVE_REPLICATION
8459 if (log_type == LOG_BIN &&
8460 (exiting & LOG_CLOSE_STOP_EVENT))
8461 {
8462 Stop_log_event s;
8463 // the checksumming rule for relay-log case is similar to Rotate
8464 s.checksum_alg= is_relay_log ? relay_log_checksum_alg
8465 : (enum_binlog_checksum_alg)binlog_checksum_options;
8466 DBUG_ASSERT(!is_relay_log ||
8467 relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
8468 write_event(&s);
8469 bytes_written+= s.data_written;
8470 flush_io_cache(&log_file);
8471 update_binlog_end_pos();
8472
8473 /*
8474 When we shut down server, write out the binlog state to a separate
8475 file so we do not have to scan an entire binlog file to recover it
8476 at next server start.
8477
8478 Note that this must be written and synced to disk before marking the
8479 last binlog file as "not crashed".
8480 */
8481 if (!is_relay_log && write_state_to_file())
8482 {
8483 sql_print_error("Failed to save binlog GTID state during shutdown. "
8484 "Binlog will be marked as crashed, so that crash "
8485 "recovery can recover the state at next server "
8486 "startup.");
8487 /*
8488 Leave binlog file marked as crashed, so we can recover state by
8489 scanning it now that we failed to write out the state properly.
8490 */
8491 failed_to_save_state= true;
8492 }
8493 }
8494 #endif /* HAVE_REPLICATION */
8495
8496 /* don't pwrite in a file opened with O_APPEND - it doesn't work */
8497 if (log_file.type == WRITE_CACHE && log_type == LOG_BIN
8498 && !(exiting & LOG_CLOSE_DELAYED_CLOSE))
8499 {
8500 my_off_t org_position= mysql_file_tell(log_file.file, MYF(0));
8501 if (!failed_to_save_state)
8502 clear_inuse_flag_when_closing(log_file.file);
8503 /*
8504 Restore position so that anything we have in the IO_cache is written
8505 to the correct position.
8506 We need the seek here, as mysql_file_pwrite() is not guaranteed to keep the
8507 original position on system that doesn't support pwrite().
8508 */
8509 mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
8510 }
8511
8512 /* this will cleanup IO_CACHE, sync and close the file */
8513 MYSQL_LOG::close(exiting);
8514 }
8515
8516 /*
8517 The following test is needed even if is_open() is not set, as we may have
8518 called a not complete close earlier and the index file is still open.
8519 */
8520
8521 if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
8522 {
8523 end_io_cache(&index_file);
8524 if (unlikely(mysql_file_close(index_file.file, MYF(0)) < 0) &&
8525 ! write_error)
8526 {
8527 write_error= 1;
8528 sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), index_file_name, errno);
8529 }
8530 }
8531 log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
8532 my_free(name);
8533 name= NULL;
8534 DBUG_VOID_RETURN;
8535 }
8536
8537
8538 /*
8539 Clear the LOG_EVENT_BINLOG_IN_USE_F; this marks the binlog file as cleanly
8540 closed and not needing crash recovery.
8541 */
8542 void MYSQL_BIN_LOG::clear_inuse_flag_when_closing(File file)
8543 {
8544 my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
8545 uchar flags= 0; // clearing LOG_EVENT_BINLOG_IN_USE_F
8546 mysql_file_pwrite(file, &flags, 1, offset, MYF(0));
8547 }
8548
8549
8550 void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg)
8551 {
8552 /*
8553 We need to take locks, otherwise this may happen:
8554 new_file() is called, calls open(old_max_size), then before open() starts,
8555 set_max_size() sets max_size to max_size_arg, then open() starts and
8556 uses the old_max_size argument, so max_size_arg has been overwritten and
8557 it's like if the SET command was never run.
8558 */
8559 DBUG_ENTER("MYSQL_BIN_LOG::set_max_size");
8560 mysql_mutex_lock(&LOCK_log);
8561 if (is_open())
8562 max_size= max_size_arg;
8563 mysql_mutex_unlock(&LOCK_log);
8564 DBUG_VOID_RETURN;
8565 }
8566
8567
8568 /**
8569 Check if a string is a valid number.
8570
8571 @param str String to test
8572 @param res Store value here
8573 @param allow_wildcards Set to 1 if we should ignore '%' and '_'
8574
8575 @note
8576 For the moment the allow_wildcards argument is not used
8577 Should be move to some other file.
8578
8579 @retval
8580 1 String is a number
8581 @retval
8582 0 String is not a number
8583 */
8584
8585 static bool test_if_number(const char *str, ulong *res, bool allow_wildcards)
8586 {
8587 int flag;
8588 const char *start;
8589 DBUG_ENTER("test_if_number");
8590
8591 flag=0; start=str;
8592 while (*str++ == ' ') ;
8593 if (*--str == '-' || *str == '+')
8594 str++;
8595 while (my_isdigit(files_charset_info,*str) ||
8596 (allow_wildcards && (*str == wild_many || *str == wild_one)))
8597 {
8598 flag=1;
8599 str++;
8600 }
8601 if (*str == '.')
8602 {
8603 for (str++ ;
8604 my_isdigit(files_charset_info,*str) ||
8605 (allow_wildcards && (*str == wild_many || *str == wild_one)) ;
8606 str++, flag=1) ;
8607 }
8608 if (*str != 0 || flag == 0)
8609 DBUG_RETURN(0);
8610 if (res)
8611 *res=atol(start);
8612 DBUG_RETURN(1); /* Number ok */
8613 } /* test_if_number */
8614
8615
8616 void sql_perror(const char *message)
8617 {
8618 #if defined(_WIN32)
8619 char* buf;
8620 DWORD dw= GetLastError();
8621 if (FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
8622 FORMAT_MESSAGE_IGNORE_INSERTS, NULL, dw,
8623 MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&buf, 0, NULL ) > 0)
8624 {
8625 sql_print_error("%s: %s",message, buf);
8626 LocalFree((HLOCAL)buf);
8627 }
8628 else
8629 {
8630 sql_print_error("%s", message);
8631 }
8632 #elif defined(HAVE_STRERROR)
8633 sql_print_error("%s: %s",message, strerror(errno));
8634 #else
8635 perror(message);
8636 #endif
8637 }
8638
8639
8640 /*
8641 Change the file associated with two output streams. Used to
8642 redirect stdout and stderr to a file. The streams are reopened
8643 only for appending (writing at end of file).
8644 */
8645 bool reopen_fstreams(const char *filename, FILE *outstream, FILE *errstream)
8646 {
8647 if ((outstream && !my_freopen(filename, "a", outstream)) ||
8648 (errstream && !my_freopen(filename, "a", errstream)))
8649 {
8650 my_error(ER_CANT_CREATE_FILE, MYF(0), filename, errno);
8651 return TRUE;
8652 }
8653
8654 /* The error stream must be unbuffered. */
8655 if (errstream)
8656 setbuf(errstream, NULL);
8657
8658 return FALSE;
8659 }
8660
8661
8662 /*
8663 Unfortunately, there seems to be no good way
8664 to restore the original streams upon failure.
8665 */
8666 static bool redirect_std_streams(const char *file)
8667 {
8668 if (reopen_fstreams(file, stdout, stderr))
8669 return TRUE;
8670
8671 setbuf(stderr, NULL);
8672 return FALSE;
8673 }
8674
8675
8676 bool flush_error_log()
8677 {
8678 bool result= 0;
8679 if (opt_error_log)
8680 {
8681 mysql_mutex_lock(&LOCK_error_log);
8682 if (redirect_std_streams(log_error_file))
8683 result= 1;
8684 mysql_mutex_unlock(&LOCK_error_log);
8685 }
8686 return result;
8687 }
8688
8689 #ifdef _WIN32
8690 static void print_buffer_to_nt_eventlog(enum loglevel level, char *buff,
8691 size_t length, size_t buffLen)
8692 {
8693 HANDLE event;
8694 char *buffptr= buff;
8695 DBUG_ENTER("print_buffer_to_nt_eventlog");
8696
8697 /* Add ending CR/LF's to string, overwrite last chars if necessary */
8698 strmov(buffptr+MY_MIN(length, buffLen-5), "\r\n\r\n");
8699
8700 setup_windows_event_source();
8701 if ((event= RegisterEventSource(NULL,"MySQL")))
8702 {
8703 switch (level) {
8704 case ERROR_LEVEL:
8705 ReportEvent(event, EVENTLOG_ERROR_TYPE, 0, MSG_DEFAULT, NULL, 1, 0,
8706 (LPCSTR*)&buffptr, NULL);
8707 break;
8708 case WARNING_LEVEL:
8709 ReportEvent(event, EVENTLOG_WARNING_TYPE, 0, MSG_DEFAULT, NULL, 1, 0,
8710 (LPCSTR*) &buffptr, NULL);
8711 break;
8712 case INFORMATION_LEVEL:
8713 ReportEvent(event, EVENTLOG_INFORMATION_TYPE, 0, MSG_DEFAULT, NULL, 1,
8714 0, (LPCSTR*) &buffptr, NULL);
8715 break;
8716 }
8717 DeregisterEventSource(event);
8718 }
8719
8720 DBUG_VOID_RETURN;
8721 }
8722 #endif /* _WIN32 */
8723
8724
8725 #ifndef EMBEDDED_LIBRARY
8726 static void print_buffer_to_file(enum loglevel level, const char *buffer,
8727 size_t length)
8728 {
8729 time_t skr;
8730 struct tm tm_tmp;
8731 struct tm *start;
8732 THD *thd= 0;
8733 size_t tag_length= 0;
8734 char tag[NAME_LEN];
8735 DBUG_ENTER("print_buffer_to_file");
8736 DBUG_PRINT("enter",("buffer: %s", buffer));
8737
8738 if (mysqld_server_initialized && (thd= current_thd))
8739 {
8740 if (thd->connection_name.length)
8741 {
8742 /*
8743 Add tag for slaves so that the user can see from which connection
8744 the error originates.
8745 */
8746 tag_length= my_snprintf(tag, sizeof(tag),
8747 ER_THD(thd, ER_MASTER_LOG_PREFIX),
8748 (int) thd->connection_name.length,
8749 thd->connection_name.str);
8750 }
8751 }
8752
8753 mysql_mutex_lock(&LOCK_error_log);
8754
8755 skr= my_time(0);
8756 localtime_r(&skr, &tm_tmp);
8757 start=&tm_tmp;
8758
8759 fprintf(stderr, "%d-%02d-%02d %2d:%02d:%02d %lu [%s] %.*s%.*s\n",
8760 start->tm_year + 1900,
8761 start->tm_mon+1,
8762 start->tm_mday,
8763 start->tm_hour,
8764 start->tm_min,
8765 start->tm_sec,
8766 (unsigned long) (thd ? thd->thread_id : 0),
8767 (level == ERROR_LEVEL ? "ERROR" : level == WARNING_LEVEL ?
8768 "Warning" : "Note"),
8769 (int) tag_length, tag,
8770 (int) length, buffer);
8771
8772 fflush(stderr);
8773
8774 mysql_mutex_unlock(&LOCK_error_log);
8775 DBUG_VOID_RETURN;
8776 }
8777
8778 /**
8779 Prints a printf style message to the error log and, under NT, to the
8780 Windows event log.
8781
8782 This function prints the message into a buffer and then sends that buffer
8783 to other functions to write that message to other logging sources.
8784
8785 @param level The level of the msg significance
8786 @param format Printf style format of message
8787 @param args va_list list of arguments for the message
8788
8789 @returns
8790 The function always returns 0. The return value is present in the
8791 signature to be compatible with other logging routines, which could
8792 return an error (e.g. logging to the log tables)
8793 */
8794 int vprint_msg_to_log(enum loglevel level, const char *format, va_list args)
8795 {
8796 char buff[1024];
8797 size_t length;
8798 DBUG_ENTER("vprint_msg_to_log");
8799
8800 length= my_vsnprintf(buff, sizeof(buff), format, args);
8801 print_buffer_to_file(level, buff, length);
8802
8803 #ifdef _WIN32
8804 print_buffer_to_nt_eventlog(level, buff, length, sizeof(buff));
8805 #endif
8806
8807 DBUG_RETURN(0);
8808 }
8809 #endif /* EMBEDDED_LIBRARY */
8810
8811
8812 void sql_print_error(const char *format, ...)
8813 {
8814 va_list args;
8815 DBUG_ENTER("sql_print_error");
8816
8817 va_start(args, format);
8818 error_log_print(ERROR_LEVEL, format, args);
8819 va_end(args);
8820
8821 DBUG_VOID_RETURN;
8822 }
8823
8824
8825 void sql_print_warning(const char *format, ...)
8826 {
8827 va_list args;
8828 DBUG_ENTER("sql_print_warning");
8829
8830 va_start(args, format);
8831 error_log_print(WARNING_LEVEL, format, args);
8832 va_end(args);
8833
8834 DBUG_VOID_RETURN;
8835 }
8836
8837
8838 void sql_print_information(const char *format, ...)
8839 {
8840 va_list args;
8841 DBUG_ENTER("sql_print_information");
8842
8843 va_start(args, format);
8844 sql_print_information_v(format, args);
8845 va_end(args);
8846
8847 DBUG_VOID_RETURN;
8848 }
8849
8850 void sql_print_information_v(const char *format, va_list ap)
8851 {
8852 if (disable_log_notes)
8853 return; // Skip notes during start/shutdown
8854
8855 error_log_print(INFORMATION_LEVEL, format, ap);
8856 }
8857
8858 void
8859 TC_LOG::run_prepare_ordered(THD *thd, bool all)
8860 {
8861 Ha_trx_info *ha_info=
8862 all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
8863
8864 mysql_mutex_assert_owner(&LOCK_prepare_ordered);
8865 for (; ha_info; ha_info= ha_info->next())
8866 {
8867 handlerton *ht= ha_info->ht();
8868 if (!ht->prepare_ordered)
8869 continue;
8870 ht->prepare_ordered(ht, thd, all);
8871 }
8872 }
8873
8874
8875 void
8876 TC_LOG::run_commit_ordered(THD *thd, bool all)
8877 {
8878 Ha_trx_info *ha_info=
8879 all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
8880
8881 mysql_mutex_assert_owner(&LOCK_commit_ordered);
8882 for (; ha_info; ha_info= ha_info->next())
8883 {
8884 handlerton *ht= ha_info->ht();
8885 if (!ht->commit_ordered)
8886 continue;
8887 ht->commit_ordered(ht, thd, all);
8888 DEBUG_SYNC(thd, "commit_after_run_commit_ordered");
8889 }
8890 }
8891
8892
8893 int TC_LOG_MMAP::log_and_order(THD *thd, my_xid xid, bool all,
8894 bool need_prepare_ordered,
8895 bool need_commit_ordered)
8896 {
8897 int cookie;
8898 struct commit_entry entry;
8899 bool UNINIT_VAR(is_group_commit_leader);
8900
8901 if (need_prepare_ordered)
8902 {
8903 mysql_mutex_lock(&LOCK_prepare_ordered);
8904 run_prepare_ordered(thd, all);
8905 if (need_commit_ordered)
8906 {
8907 /*
8908 Must put us in queue so we can run_commit_ordered() in same sequence
8909 as we did run_prepare_ordered().
8910 */
8911 thd->clear_wakeup_ready();
8912 entry.thd= thd;
8913 commit_entry *previous_queue= commit_ordered_queue;
8914 entry.next= previous_queue;
8915 commit_ordered_queue= &entry;
8916 is_group_commit_leader= (previous_queue == NULL);
8917 }
8918 mysql_mutex_unlock(&LOCK_prepare_ordered);
8919 }
8920
8921 if (thd->wait_for_prior_commit())
8922 return 0;
8923
8924 cookie= 0;
8925 if (xid)
8926 cookie= log_one_transaction(xid);
8927
8928 if (need_commit_ordered)
8929 {
8930 if (need_prepare_ordered)
8931 {
8932 /*
8933 We did the run_prepare_ordered() serialised, then ran the log_xid() in
8934 parallel. Now we have to do run_commit_ordered() serialised in the
8935 same sequence as run_prepare_ordered().
8936
8937 We do this starting from the head of the queue, each thread doing
8938 run_commit_ordered() and signalling the next in queue.
8939 */
8940 if (is_group_commit_leader)
8941 {
8942 /* The first in queue starts the ball rolling. */
8943 mysql_mutex_lock(&LOCK_prepare_ordered);
8944 while (commit_ordered_queue_busy)
8945 mysql_cond_wait(&COND_queue_busy, &LOCK_prepare_ordered);
8946 commit_entry *queue= commit_ordered_queue;
8947 commit_ordered_queue= NULL;
8948 /*
8949 Mark the queue busy while we bounce it from one thread to the
8950 next.
8951 */
8952 commit_ordered_queue_busy= true;
8953 mysql_mutex_unlock(&LOCK_prepare_ordered);
8954
8955 /* Reverse the queue list so we get correct order. */
8956 commit_entry *prev= NULL;
8957 while (queue)
8958 {
8959 commit_entry *next= queue->next;
8960 queue->next= prev;
8961 prev= queue;
8962 queue= next;
8963 }
8964 DBUG_ASSERT(prev == &entry && prev->thd == thd);
8965 }
8966 else
8967 {
8968 /* Not first in queue; just wait until previous thread wakes us up. */
8969 thd->wait_for_wakeup_ready();
8970 }
8971 }
8972
8973 /* Only run commit_ordered() if log_xid was successful. */
8974 if (cookie)
8975 {
8976 mysql_mutex_lock(&LOCK_commit_ordered);
8977 run_commit_ordered(thd, all);
8978 mysql_mutex_unlock(&LOCK_commit_ordered);
8979 }
8980
8981 if (need_prepare_ordered)
8982 {
8983 commit_entry *next= entry.next;
8984 if (next)
8985 {
8986 next->thd->signal_wakeup_ready();
8987 }
8988 else
8989 {
8990 mysql_mutex_lock(&LOCK_prepare_ordered);
8991 commit_ordered_queue_busy= false;
8992 mysql_cond_signal(&COND_queue_busy);
8993 mysql_mutex_unlock(&LOCK_prepare_ordered);
8994 }
8995 }
8996 }
8997
8998 return cookie;
8999 }
9000
9001
9002 /********* transaction coordinator log for 2pc - mmap() based solution *******/
9003
9004 /*
9005 the log consists of a file, mapped to memory.
9006 file is divided into pages of tc_log_page_size size.
9007 (usable size of the first page is smaller because of the log header)
9008 there is a PAGE control structure for each page
9009 each page (or rather its PAGE control structure) can be in one of
9010 the three states - active, syncing, pool.
9011 there could be only one page in the active or syncing state,
9012 but many in pool - pool is a fifo queue.
9013 the usual lifecycle of a page is pool->active->syncing->pool.
9014 the "active" page is a page where new xid's are logged.
9015 the page stays active as long as the syncing slot is taken.
9016 the "syncing" page is being synced to disk. no new xid can be added to it.
9017 when the syncing is done the page is moved to a pool and an active page
9018 becomes "syncing".
9019
9020 the result of such an architecture is a natural "commit grouping" -
9021 If commits are coming faster than the system can sync, they do not
9022 stall. Instead, all commits that came since the last sync are
9023 logged to the same "active" page, and they all are synced with the next -
9024 one - sync. Thus, thought individual commits are delayed, throughput
9025 is not decreasing.
9026
9027 when an xid is added to an active page, the thread of this xid waits
9028 for a page's condition until the page is synced. when syncing slot
9029 becomes vacant one of these waiters is awaken to take care of syncing.
9030 it syncs the page and signals all waiters that the page is synced.
9031 PAGE::waiters is used to count these waiters, and a page may never
9032 become active again until waiters==0 (that is all waiters from the
9033 previous sync have noticed that the sync was completed)
9034
9035 note, that the page becomes "dirty" and has to be synced only when a
9036 new xid is added into it. Removing a xid from a page does not make it
9037 dirty - we don't sync xid removals to disk.
9038 */
9039
9040 ulong tc_log_page_waits= 0;
9041
9042 #ifdef HAVE_MMAP
9043
9044 #define TC_LOG_HEADER_SIZE (sizeof(tc_log_magic)+1)
9045
9046 static const uchar tc_log_magic[]={(uchar) 254, 0x23, 0x05, 0x74};
9047
9048 ulong opt_tc_log_size;
9049 ulong tc_log_max_pages_used=0, tc_log_page_size=0, tc_log_cur_pages_used=0;
9050
9051 int TC_LOG_MMAP::open(const char *opt_name)
9052 {
9053 uint i;
9054 bool crashed=FALSE;
9055 PAGE *pg;
9056
9057 DBUG_ASSERT(total_ha_2pc > 1);
9058 DBUG_ASSERT(opt_name && opt_name[0]);
9059
9060 tc_log_page_size= my_getpagesize();
9061
9062 fn_format(logname,opt_name,mysql_data_home,"",MY_UNPACK_FILENAME);
9063 if ((fd= mysql_file_open(key_file_tclog, logname, O_RDWR | O_CLOEXEC, MYF(0))) < 0)
9064 {
9065 if (my_errno != ENOENT)
9066 goto err;
9067 if (using_heuristic_recover())
9068 return 1;
9069 if ((fd= mysql_file_create(key_file_tclog, logname, CREATE_MODE,
9070 O_RDWR | O_CLOEXEC, MYF(MY_WME))) < 0)
9071 goto err;
9072 inited=1;
9073 file_length= opt_tc_log_size;
9074 if (mysql_file_chsize(fd, file_length, 0, MYF(MY_WME)))
9075 goto err;
9076 }
9077 else
9078 {
9079 inited= 1;
9080 crashed= TRUE;
9081 sql_print_information("Recovering after a crash using %s", opt_name);
9082 if (tc_heuristic_recover)
9083 {
9084 sql_print_error("Cannot perform automatic crash recovery when "
9085 "--tc-heuristic-recover is used");
9086 goto err;
9087 }
9088 file_length= mysql_file_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME+MY_FAE));
9089 if (file_length == MY_FILEPOS_ERROR || file_length % tc_log_page_size)
9090 goto err;
9091 }
9092
9093 data= (uchar *)my_mmap(0, (size_t)file_length, PROT_READ|PROT_WRITE,
9094 MAP_NOSYNC|MAP_SHARED, fd, 0);
9095 if (data == MAP_FAILED)
9096 {
9097 my_errno=errno;
9098 goto err;
9099 }
9100 inited=2;
9101
9102 npages=(uint)file_length/tc_log_page_size;
9103 if (npages < 3) // to guarantee non-empty pool
9104 goto err;
9105 if (!(pages=(PAGE *)my_malloc(npages*sizeof(PAGE), MYF(MY_WME|MY_ZEROFILL))))
9106 goto err;
9107 inited=3;
9108 for (pg=pages, i=0; i < npages; i++, pg++)
9109 {
9110 pg->next=pg+1;
9111 pg->waiters=0;
9112 pg->state=PS_POOL;
9113 mysql_mutex_init(key_PAGE_lock, &pg->lock, MY_MUTEX_INIT_FAST);
9114 mysql_cond_init(key_PAGE_cond, &pg->cond, 0);
9115 pg->ptr= pg->start=(my_xid *)(data + i*tc_log_page_size);
9116 pg->size=pg->free=tc_log_page_size/sizeof(my_xid);
9117 pg->end=pg->start + pg->size;
9118 }
9119 pages[0].size=pages[0].free=
9120 (tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid);
9121 pages[0].start=pages[0].end-pages[0].size;
9122 pages[npages-1].next=0;
9123 inited=4;
9124
9125 if (crashed && recover())
9126 goto err;
9127
9128 memcpy(data, tc_log_magic, sizeof(tc_log_magic));
9129 data[sizeof(tc_log_magic)]= (uchar)total_ha_2pc;
9130 my_msync(fd, data, tc_log_page_size, MS_SYNC);
9131 inited=5;
9132
9133 mysql_mutex_init(key_LOCK_sync, &LOCK_sync, MY_MUTEX_INIT_FAST);
9134 mysql_mutex_init(key_LOCK_active, &LOCK_active, MY_MUTEX_INIT_FAST);
9135 mysql_mutex_init(key_LOCK_pool, &LOCK_pool, MY_MUTEX_INIT_FAST);
9136 mysql_mutex_init(key_LOCK_pending_checkpoint, &LOCK_pending_checkpoint,
9137 MY_MUTEX_INIT_FAST);
9138 mysql_cond_init(key_COND_active, &COND_active, 0);
9139 mysql_cond_init(key_COND_pool, &COND_pool, 0);
9140 mysql_cond_init(key_TC_LOG_MMAP_COND_queue_busy, &COND_queue_busy, 0);
9141
9142 inited=6;
9143
9144 syncing= 0;
9145 active=pages;
9146 DBUG_ASSERT(npages >= 2);
9147 pool=pages+1;
9148 pool_last_ptr= &((pages+npages-1)->next);
9149 commit_ordered_queue= NULL;
9150 commit_ordered_queue_busy= false;
9151
9152 return 0;
9153
9154 err:
9155 close();
9156 return 1;
9157 }
9158
9159 /**
9160 there is no active page, let's got one from the pool.
9161
9162 Two strategies here:
9163 -# take the first from the pool
9164 -# if there're waiters - take the one with the most free space.
9165
9166 @todo
9167 page merging. try to allocate adjacent page first,
9168 so that they can be flushed both in one sync
9169 */
9170
9171 void TC_LOG_MMAP::get_active_from_pool()
9172 {
9173 PAGE **p, **best_p=0;
9174 int best_free;
9175
9176 mysql_mutex_lock(&LOCK_pool);
9177
9178 do
9179 {
9180 best_p= p= &pool;
9181 if ((*p)->waiters == 0 && (*p)->free > 0) // can the first page be used ?
9182 break; // yes - take it.
9183
9184 best_free=0; // no - trying second strategy
9185 for (p=&(*p)->next; *p; p=&(*p)->next)
9186 {
9187 if ((*p)->waiters == 0 && (*p)->free > best_free)
9188 {
9189 best_free=(*p)->free;
9190 best_p=p;
9191 }
9192 }
9193 }
9194 while ((*best_p == 0 || best_free == 0) && overflow());
9195
9196 mysql_mutex_assert_owner(&LOCK_active);
9197 active=*best_p;
9198
9199 /* Unlink the page from the pool. */
9200 if (!(*best_p)->next)
9201 pool_last_ptr= best_p;
9202 *best_p=(*best_p)->next;
9203 mysql_mutex_unlock(&LOCK_pool);
9204
9205 mysql_mutex_lock(&active->lock);
9206 if (active->free == active->size) // we've chosen an empty page
9207 {
9208 tc_log_cur_pages_used++;
9209 set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used);
9210 }
9211 }
9212
9213 /**
9214 @todo
9215 perhaps, increase log size ?
9216 */
9217 int TC_LOG_MMAP::overflow()
9218 {
9219 /*
9220 simple overflow handling - just wait
9221 TODO perhaps, increase log size ?
9222 let's check the behaviour of tc_log_page_waits first
9223 */
9224 tc_log_page_waits++;
9225 mysql_cond_wait(&COND_pool, &LOCK_pool);
9226 return 1; // always return 1
9227 }
9228
9229 /**
9230 Record that transaction XID is committed on the persistent storage.
9231
9232 This function is called in the middle of two-phase commit:
9233 First all resources prepare the transaction, then tc_log->log() is called,
9234 then all resources commit the transaction, then tc_log->unlog() is called.
9235
9236 All access to active page is serialized but it's not a problem, as
9237 we're assuming that fsync() will be a main bottleneck.
9238 That is, parallelizing writes to log pages we'll decrease number of
9239 threads waiting for a page, but then all these threads will be waiting
9240 for a fsync() anyway
9241
9242 If tc_log == MYSQL_LOG then tc_log writes transaction to binlog and
9243 records XID in a special Xid_log_event.
9244 If tc_log = TC_LOG_MMAP then xid is written in a special memory-mapped
9245 log.
9246
9247 @retval
9248 0 - error
9249 @retval
9250 \# - otherwise, "cookie", a number that will be passed as an argument
9251 to unlog() call. tc_log can define it any way it wants,
9252 and use for whatever purposes. TC_LOG_MMAP sets it
9253 to the position in memory where xid was logged to.
9254 */
9255
9256 int TC_LOG_MMAP::log_one_transaction(my_xid xid)
9257 {
9258 int err;
9259 PAGE *p;
9260 ulong cookie;
9261
9262 mysql_mutex_lock(&LOCK_active);
9263
9264 /*
9265 if the active page is full - just wait...
9266 frankly speaking, active->free here accessed outside of mutex
9267 protection, but it's safe, because it only means we may miss an
9268 unlog() for the active page, and we're not waiting for it here -
9269 unlog() does not signal COND_active.
9270 */
9271 while (unlikely(active && active->free == 0))
9272 mysql_cond_wait(&COND_active, &LOCK_active);
9273
9274 /* no active page ? take one from the pool */
9275 if (active == 0)
9276 get_active_from_pool();
9277 else
9278 mysql_mutex_lock(&active->lock);
9279
9280 p=active;
9281
9282 /*
9283 p->free is always > 0 here because to decrease it one needs
9284 to take p->lock and before it one needs to take LOCK_active.
9285 But checked that active->free > 0 under LOCK_active and
9286 haven't release it ever since
9287 */
9288
9289 /* searching for an empty slot */
9290 while (*p->ptr)
9291 {
9292 p->ptr++;
9293 DBUG_ASSERT(p->ptr < p->end); // because p->free > 0
9294 }
9295
9296 /* found! store xid there and mark the page dirty */
9297 cookie= (ulong)((uchar *)p->ptr - data); // can never be zero
9298 *p->ptr++= xid;
9299 p->free--;
9300 p->state= PS_DIRTY;
9301 mysql_mutex_unlock(&p->lock);
9302
9303 mysql_mutex_lock(&LOCK_sync);
9304 if (syncing)
9305 { // somebody's syncing. let's wait
9306 mysql_mutex_unlock(&LOCK_active);
9307 mysql_mutex_lock(&p->lock);
9308 p->waiters++;
9309 while (p->state == PS_DIRTY && syncing)
9310 {
9311 mysql_mutex_unlock(&p->lock);
9312 mysql_cond_wait(&p->cond, &LOCK_sync);
9313 mysql_mutex_lock(&p->lock);
9314 }
9315 p->waiters--;
9316 err= p->state == PS_ERROR;
9317 if (p->state != PS_DIRTY) // page was synced
9318 {
9319 mysql_mutex_unlock(&LOCK_sync);
9320 if (p->waiters == 0)
9321 mysql_cond_signal(&COND_pool); // in case somebody's waiting
9322 mysql_mutex_unlock(&p->lock);
9323 goto done; // we're done
9324 }
9325 DBUG_ASSERT(!syncing);
9326 mysql_mutex_unlock(&p->lock);
9327 syncing = p;
9328 mysql_mutex_unlock(&LOCK_sync);
9329
9330 mysql_mutex_lock(&LOCK_active);
9331 active=0; // page is not active anymore
9332 mysql_cond_broadcast(&COND_active);
9333 mysql_mutex_unlock(&LOCK_active);
9334 }
9335 else
9336 {
9337 syncing = p; // place is vacant - take it
9338 mysql_mutex_unlock(&LOCK_sync);
9339 active = 0; // page is not active anymore
9340 mysql_cond_broadcast(&COND_active);
9341 mysql_mutex_unlock(&LOCK_active);
9342 }
9343 err= sync();
9344
9345 done:
9346 return err ? 0 : cookie;
9347 }
9348
9349 int TC_LOG_MMAP::sync()
9350 {
9351 int err;
9352
9353 DBUG_ASSERT(syncing != active);
9354
9355 /*
9356 sit down and relax - this can take a while...
9357 note - no locks are held at this point
9358 */
9359 err= my_msync(fd, syncing->start, syncing->size * sizeof(my_xid), MS_SYNC);
9360
9361 /* page is synced. let's move it to the pool */
9362 mysql_mutex_lock(&LOCK_pool);
9363 (*pool_last_ptr)=syncing;
9364 pool_last_ptr=&(syncing->next);
9365 syncing->next=0;
9366 syncing->state= err ? PS_ERROR : PS_POOL;
9367 mysql_cond_signal(&COND_pool); // in case somebody's waiting
9368 mysql_mutex_unlock(&LOCK_pool);
9369
9370 /* marking 'syncing' slot free */
9371 mysql_mutex_lock(&LOCK_sync);
9372 mysql_cond_broadcast(&syncing->cond); // signal "sync done"
9373 syncing=0;
9374 /*
9375 we check the "active" pointer without LOCK_active. Still, it's safe -
9376 "active" can change from NULL to not NULL any time, but it
9377 will take LOCK_sync before waiting on active->cond. That is, it can never
9378 miss a signal.
9379 And "active" can change to NULL only by the syncing thread
9380 (the thread that will send a signal below)
9381 */
9382 if (active)
9383 mysql_cond_signal(&active->cond); // wake up a new syncer
9384 mysql_mutex_unlock(&LOCK_sync);
9385 return err;
9386 }
9387
9388 static void
9389 mmap_do_checkpoint_callback(void *data)
9390 {
9391 TC_LOG_MMAP::pending_cookies *pending=
9392 static_cast<TC_LOG_MMAP::pending_cookies *>(data);
9393 ++pending->pending_count;
9394 }
9395
9396 int TC_LOG_MMAP::unlog(ulong cookie, my_xid xid)
9397 {
9398 pending_cookies *full_buffer= NULL;
9399 uint32 ncookies= tc_log_page_size / sizeof(my_xid);
9400 DBUG_ASSERT(*(my_xid *)(data+cookie) == xid);
9401
9402 /*
9403 Do not delete the entry immediately, as there may be participating storage
9404 engines which implement commit_checkpoint_request(), and thus have not yet
9405 flushed the commit durably to disk.
9406
9407 Instead put it in a queue - and periodically, we will request a checkpoint
9408 from all engines and delete a whole batch at once.
9409 */
9410 mysql_mutex_lock(&LOCK_pending_checkpoint);
9411 if (pending_checkpoint == NULL)
9412 {
9413 uint32 size= sizeof(*pending_checkpoint) + sizeof(ulong) * (ncookies - 1);
9414 if (!(pending_checkpoint=
9415 (pending_cookies *)my_malloc(size, MYF(MY_ZEROFILL))))
9416 {
9417 my_error(ER_OUTOFMEMORY, MYF(0), size);
9418 mysql_mutex_unlock(&LOCK_pending_checkpoint);
9419 return 1;
9420 }
9421 }
9422
9423 pending_checkpoint->cookies[pending_checkpoint->count++]= cookie;
9424 if (pending_checkpoint->count == ncookies)
9425 {
9426 full_buffer= pending_checkpoint;
9427 pending_checkpoint= NULL;
9428 }
9429 mysql_mutex_unlock(&LOCK_pending_checkpoint);
9430
9431 if (full_buffer)
9432 {
9433 /*
9434 We do an extra increment and notify here - this ensures that
9435 things work also if there are no engines at all that support
9436 commit_checkpoint_request.
9437 */
9438 ++full_buffer->pending_count;
9439 ha_commit_checkpoint_request(full_buffer, mmap_do_checkpoint_callback);
9440 commit_checkpoint_notify(full_buffer);
9441 }
9442 return 0;
9443 }
9444
9445
9446 void
9447 TC_LOG_MMAP::commit_checkpoint_notify(void *cookie)
9448 {
9449 uint count;
9450 pending_cookies *pending= static_cast<pending_cookies *>(cookie);
9451 mysql_mutex_lock(&LOCK_pending_checkpoint);
9452 DBUG_ASSERT(pending->pending_count > 0);
9453 count= --pending->pending_count;
9454 mysql_mutex_unlock(&LOCK_pending_checkpoint);
9455 if (count == 0)
9456 {
9457 uint i;
9458 for (i= 0; i < tc_log_page_size / sizeof(my_xid); ++i)
9459 delete_entry(pending->cookies[i]);
9460 my_free(pending);
9461 }
9462 }
9463
9464
9465 /**
9466 erase xid from the page, update page free space counters/pointers.
9467 cookie points directly to the memory where xid was logged.
9468 */
9469
9470 int TC_LOG_MMAP::delete_entry(ulong cookie)
9471 {
9472 PAGE *p=pages+(cookie/tc_log_page_size);
9473 my_xid *x=(my_xid *)(data+cookie);
9474
9475 DBUG_ASSERT(x >= p->start && x < p->end);
9476
9477 mysql_mutex_lock(&p->lock);
9478 *x=0;
9479 p->free++;
9480 DBUG_ASSERT(p->free <= p->size);
9481 set_if_smaller(p->ptr, x);
9482 if (p->free == p->size) // the page is completely empty
9483 statistic_decrement(tc_log_cur_pages_used, &LOCK_status);
9484 if (p->waiters == 0) // the page is in pool and ready to rock
9485 mysql_cond_signal(&COND_pool); // ping ... for overflow()
9486 mysql_mutex_unlock(&p->lock);
9487 return 0;
9488 }
9489
9490 void TC_LOG_MMAP::close()
9491 {
9492 uint i;
9493 switch (inited) {
9494 case 6:
9495 mysql_mutex_destroy(&LOCK_sync);
9496 mysql_mutex_destroy(&LOCK_active);
9497 mysql_mutex_destroy(&LOCK_pool);
9498 mysql_mutex_destroy(&LOCK_pending_checkpoint);
9499 mysql_cond_destroy(&COND_pool);
9500 mysql_cond_destroy(&COND_active);
9501 mysql_cond_destroy(&COND_queue_busy);
9502 /* fall through */
9503 case 5:
9504 data[0]='A'; // garble the first (signature) byte, in case mysql_file_delete fails
9505 /* fall through */
9506 case 4:
9507 for (i=0; i < npages; i++)
9508 {
9509 if (pages[i].ptr == 0)
9510 break;
9511 mysql_mutex_destroy(&pages[i].lock);
9512 mysql_cond_destroy(&pages[i].cond);
9513 }
9514 /* fall through */
9515 case 3:
9516 my_free(pages);
9517 /* fall through */
9518 case 2:
9519 my_munmap((char*)data, (size_t)file_length);
9520 /* fall through */
9521 case 1:
9522 mysql_file_close(fd, MYF(0));
9523 }
9524 if (inited>=5) // cannot do in the switch because of Windows
9525 mysql_file_delete(key_file_tclog, logname, MYF(MY_WME));
9526 if (pending_checkpoint)
9527 my_free(pending_checkpoint);
9528 inited=0;
9529 }
9530
9531
9532 int TC_LOG_MMAP::recover()
9533 {
9534 HASH xids;
9535 PAGE *p=pages, *end_p=pages+npages;
9536
9537 if (bcmp(data, tc_log_magic, sizeof(tc_log_magic)))
9538 {
9539 sql_print_error("Bad magic header in tc log");
9540 goto err1;
9541 }
9542
9543 /*
9544 the first byte after magic signature is set to current
9545 number of storage engines on startup
9546 */
9547 if (data[sizeof(tc_log_magic)] > total_ha_2pc)
9548 {
9549 sql_print_error("Recovery failed! You must enable "
9550 "all engines that were enabled at the moment of the crash");
9551 goto err1;
9552 }
9553
9554 if (my_hash_init(&xids, &my_charset_bin, tc_log_page_size/3, 0,
9555 sizeof(my_xid), 0, 0, MYF(0)))
9556 goto err1;
9557
9558 for ( ; p < end_p ; p++)
9559 {
9560 for (my_xid *x=p->start; x < p->end; x++)
9561 if (*x && my_hash_insert(&xids, (uchar *)x))
9562 goto err2; // OOM
9563 }
9564
9565 if (ha_recover(&xids))
9566 goto err2;
9567
9568 my_hash_free(&xids);
9569 bzero(data, (size_t)file_length);
9570 return 0;
9571
9572 err2:
9573 my_hash_free(&xids);
9574 err1:
9575 sql_print_error("Crash recovery failed. Either correct the problem "
9576 "(if it's, for example, out of memory error) and restart, "
9577 "or delete tc log and start mysqld with "
9578 "--tc-heuristic-recover={commit|rollback}");
9579 return 1;
9580 }
9581 #endif
9582
9583 TC_LOG *tc_log;
9584 TC_LOG_DUMMY tc_log_dummy;
9585 TC_LOG_MMAP tc_log_mmap;
9586
9587 /**
9588 Perform heuristic recovery, if --tc-heuristic-recover was used.
9589
9590 @note
9591 no matter whether heuristic recovery was successful or not
9592 mysqld must exit. So, return value is the same in both cases.
9593
9594 @retval
9595 0 no heuristic recovery was requested
9596 @retval
9597 1 heuristic recovery was performed
9598 */
9599
9600 int TC_LOG::using_heuristic_recover()
9601 {
9602 if (!tc_heuristic_recover)
9603 return 0;
9604
9605 sql_print_information("Heuristic crash recovery mode");
9606 if (ha_recover(0))
9607 sql_print_error("Heuristic crash recovery failed");
9608 sql_print_information("Please restart mysqld without --tc-heuristic-recover");
9609 return 1;
9610 }
9611
9612 /****** transaction coordinator log for 2pc - binlog() based solution ******/
9613 #define TC_LOG_BINLOG MYSQL_BIN_LOG
9614
9615 int TC_LOG_BINLOG::open(const char *opt_name)
9616 {
9617 int error= 1;
9618
9619 DBUG_ASSERT(total_ha_2pc > 1);
9620 DBUG_ASSERT(opt_name && opt_name[0]);
9621
9622 if (!my_b_inited(&index_file))
9623 {
9624 /* There was a failure to open the index file, can't open the binlog */
9625 cleanup();
9626 return 1;
9627 }
9628
9629 if (using_heuristic_recover())
9630 {
9631 mysql_mutex_lock(&LOCK_log);
9632 /* generate a new binlog to mask a corrupted one */
9633 open(opt_name, LOG_BIN, 0, 0, WRITE_CACHE, max_binlog_size, 0, TRUE);
9634 mysql_mutex_unlock(&LOCK_log);
9635 cleanup();
9636 return 1;
9637 }
9638
9639 error= do_binlog_recovery(opt_name, true);
9640 binlog_state_recover_done= true;
9641 return error;
9642 }
9643
9644 /** This is called on shutdown, after ha_panic. */
9645 void TC_LOG_BINLOG::close()
9646 {
9647 }
9648
9649 /*
9650 Do a binlog log_xid() for a group of transactions, linked through
9651 thd->next_commit_ordered.
9652 */
9653 int
9654 TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all,
9655 bool need_prepare_ordered __attribute__((unused)),
9656 bool need_commit_ordered __attribute__((unused)))
9657 {
9658 int err;
9659 DBUG_ENTER("TC_LOG_BINLOG::log_and_order");
9660
9661 binlog_cache_mngr *cache_mngr= thd->binlog_setup_trx_data();
9662 if (!cache_mngr)
9663 {
9664 WSREP_DEBUG("Skipping empty log_xid: %s", thd->query());
9665 DBUG_RETURN(0);
9666 }
9667
9668 cache_mngr->using_xa= TRUE;
9669 cache_mngr->xa_xid= xid;
9670 err= binlog_commit_flush_xid_caches(thd, cache_mngr, all, xid);
9671
9672 DEBUG_SYNC(thd, "binlog_after_log_and_order");
9673
9674 if (err)
9675 DBUG_RETURN(0);
9676
9677 bool need_unlog= cache_mngr->need_unlog;
9678 /*
9679 The transaction won't need the flag anymore.
9680 Todo/fixme: consider to move the statement into cache_mngr->reset()
9681 relocated to the current or later point.
9682 */
9683 cache_mngr->need_unlog= false;
9684 /*
9685 If using explicit user XA, we will not have XID. We must still return a
9686 non-zero cookie (as zero cookie signals error).
9687 */
9688 if (!xid || !need_unlog)
9689 DBUG_RETURN(BINLOG_COOKIE_DUMMY(cache_mngr->delayed_error));
9690
9691 DBUG_RETURN(BINLOG_COOKIE_MAKE(cache_mngr->binlog_id,
9692 cache_mngr->delayed_error));
9693 }
9694
9695 /*
9696 After an XID is logged, we need to hold on to the current binlog file until
9697 it is fully committed in the storage engine. The reason is that crash
9698 recovery only looks at the latest binlog, so we must make sure there are no
9699 outstanding prepared (but not committed) transactions before rotating the
9700 binlog.
9701
9702 To handle this, we keep a count of outstanding XIDs. This function is used
9703 to increase this count when committing one or more transactions to the
9704 binary log.
9705 */
9706 void
9707 TC_LOG_BINLOG::mark_xids_active(ulong binlog_id, uint xid_count)
9708 {
9709 xid_count_per_binlog *b;
9710
9711 DBUG_ENTER("TC_LOG_BINLOG::mark_xids_active");
9712 DBUG_PRINT("info", ("binlog_id=%lu xid_count=%u", binlog_id, xid_count));
9713
9714 mysql_mutex_lock(&LOCK_xid_list);
9715 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
9716 while ((b= it++))
9717 {
9718 if (b->binlog_id == binlog_id)
9719 {
9720 b->xid_count += xid_count;
9721 break;
9722 }
9723 }
9724 /*
9725 As we do not delete elements until count reach zero, elements should always
9726 be found.
9727 */
9728 DBUG_ASSERT(b);
9729 mysql_mutex_unlock(&LOCK_xid_list);
9730 DBUG_VOID_RETURN;
9731 }
9732
9733 /*
9734 Once an XID is committed, it can no longer be needed during crash recovery,
9735 as it has been durably recorded on disk as "committed".
9736
9737 This function is called to mark an XID this way. It needs to decrease the
9738 count of pending XIDs in the corresponding binlog. When the count reaches
9739 zero (for an "old" binlog that is not the active one), that binlog file no
9740 longer need to be scanned during crash recovery, so we can log a new binlog
9741 checkpoint.
9742 */
9743 void
9744 TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint)
9745 {
9746 xid_count_per_binlog *b;
9747 bool first;
9748 ulong current;
9749
9750 DBUG_ENTER("TC_LOG_BINLOG::mark_xid_done");
9751
9752 mysql_mutex_lock(&LOCK_xid_list);
9753 current= current_binlog_id;
9754 I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
9755 first= true;
9756 while ((b= it++))
9757 {
9758 if (b->binlog_id == binlog_id)
9759 {
9760 --b->xid_count;
9761
9762 DBUG_ASSERT(b->xid_count >= 0); // catch unmatched (++) decrement
9763
9764 break;
9765 }
9766 first= false;
9767 }
9768 /* Binlog is always found, as we do not remove until count reaches 0 */
9769 DBUG_ASSERT(b);
9770 /*
9771 If a RESET MASTER is pending, we are about to remove all log files, and
9772 the RESET MASTER thread is waiting for all pending unlog() calls to
9773 complete while holding LOCK_log. In this case we should not log a binlog
9774 checkpoint event (it would be deleted immediately anyway and we would
9775 deadlock on LOCK_log) but just signal the thread.
9776 */
9777 if (unlikely(reset_master_pending))
9778 {
9779 mysql_cond_broadcast(&COND_xid_list);
9780 mysql_mutex_unlock(&LOCK_xid_list);
9781 DBUG_VOID_RETURN;
9782 }
9783
9784 if (likely(binlog_id == current) || b->xid_count != 0 || !first ||
9785 !write_checkpoint)
9786 {
9787 /* No new binlog checkpoint reached yet. */
9788 mysql_mutex_unlock(&LOCK_xid_list);
9789 DBUG_VOID_RETURN;
9790 }
9791
9792 /*
9793 Now log a binlog checkpoint for the first binlog file with a non-zero count.
9794
9795 Note that it is possible (though perhaps unlikely) that when count of
9796 binlog (N-2) drops to zero, binlog (N-1) is already at zero. So we may
9797 need to skip several entries before we find the one to log in the binlog
9798 checkpoint event.
9799
9800 We chain the locking of LOCK_xid_list and LOCK_log, so that we ensure that
9801 Binlog_checkpoint_events are logged in order. This simplifies recovery a
9802 bit, as it can just take the last binlog checkpoint in the log, rather
9803 than compare all found against each other to find the one pointing to the
9804 most recent binlog.
9805
9806 Note also that we need to first release LOCK_xid_list, then acquire
9807 LOCK_log, then re-aquire LOCK_xid_list. If we were to take LOCK_log while
9808 holding LOCK_xid_list, we might deadlock with other threads that take the
9809 locks in the opposite order.
9810 */
9811
9812 ++mark_xid_done_waiting;
9813 mysql_mutex_unlock(&LOCK_xid_list);
9814 mysql_mutex_lock(&LOCK_log);
9815 mysql_mutex_lock(&LOCK_xid_list);
9816 --mark_xid_done_waiting;
9817 mysql_cond_broadcast(&COND_xid_list);
9818 /* We need to reload current_binlog_id due to release/re-take of lock. */
9819 current= current_binlog_id;
9820
9821 for (;;)
9822 {
9823 /* Remove initial element(s) with zero count. */
9824 b= binlog_xid_count_list.head();
9825 /*
9826 We must not remove all elements in the list - the entry for the current
9827 binlog must be present always.
9828 */
9829 DBUG_ASSERT(b);
9830 if (b->binlog_id == current || b->xid_count > 0)
9831 break;
9832 WSREP_XID_LIST_ENTRY("TC_LOG_BINLOG::mark_xid_done(): Removing "
9833 "xid_list_entry for %s (%lu)", b);
9834 delete binlog_xid_count_list.get();
9835 }
9836
9837 mysql_mutex_unlock(&LOCK_xid_list);
9838 write_binlog_checkpoint_event_already_locked(b->binlog_name,
9839 b->binlog_name_len);
9840 mysql_mutex_unlock(&LOCK_log);
9841 DBUG_VOID_RETURN;
9842 }
9843
9844 int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
9845 {
9846 DBUG_ENTER("TC_LOG_BINLOG::unlog");
9847 if (!xid)
9848 DBUG_RETURN(0);
9849
9850 if (!BINLOG_COOKIE_IS_DUMMY(cookie))
9851 mark_xid_done(BINLOG_COOKIE_GET_ID(cookie), true);
9852 /*
9853 See comment in trx_group_commit_leader() - if rotate() gave a failure,
9854 we delay the return of error code to here.
9855 */
9856 DBUG_RETURN(BINLOG_COOKIE_GET_ERROR_FLAG(cookie));
9857 }
9858
9859 void
9860 TC_LOG_BINLOG::commit_checkpoint_notify(void *cookie)
9861 {
9862 xid_count_per_binlog *entry= static_cast<xid_count_per_binlog *>(cookie);
9863 bool found_entry= false;
9864 mysql_mutex_lock(&LOCK_binlog_background_thread);
9865 /* count the same notification kind from different engines */
9866 for (xid_count_per_binlog *link= binlog_background_thread_queue;
9867 link && !found_entry; link= link->next_in_queue)
9868 {
9869 if ((found_entry= (entry == link)))
9870 entry->notify_count++;
9871 }
9872 if (!found_entry)
9873 {
9874 entry->next_in_queue= binlog_background_thread_queue;
9875 binlog_background_thread_queue= entry;
9876 }
9877 mysql_cond_signal(&COND_binlog_background_thread);
9878 mysql_mutex_unlock(&LOCK_binlog_background_thread);
9879 }
9880
9881 /*
9882 Binlog background thread.
9883
9884 This thread is used to log binlog checkpoints in the background, rather than
9885 in the context of random storage engine threads that happen to call
9886 commit_checkpoint_notify_ha() and may not like the delays while syncing
9887 binlog to disk or may not be setup with all my_thread_init() and other
9888 necessary stuff.
9889
9890 In the future, this thread could also be used to do log rotation in the
9891 background, which could eliminate all stalls around binlog rotations.
9892 */
9893 pthread_handler_t
9894 binlog_background_thread(void *arg __attribute__((unused)))
9895 {
9896 bool stop;
9897 MYSQL_BIN_LOG::xid_count_per_binlog *queue, *next;
9898 THD *thd;
9899 my_thread_init();
9900 DBUG_ENTER("binlog_background_thread");
9901
9902 thd= new THD(next_thread_id());
9903 thd->system_thread= SYSTEM_THREAD_BINLOG_BACKGROUND;
9904 thd->thread_stack= (char*) &thd; /* Set approximate stack start */
9905 thd->store_globals();
9906 thd->security_ctx->skip_grants();
9907 thd->set_command(COM_DAEMON);
9908
9909 /*
9910 Load the slave replication GTID state from the mysql.gtid_slave_pos
9911 table.
9912
9913 This is mostly so that we can start our seq_no counter from the highest
9914 seq_no seen by a slave. This way, we have a way to tell if a transaction
9915 logged by ourselves as master is newer or older than a replicated
9916 transaction.
9917 */
9918 #ifdef HAVE_REPLICATION
9919 if (rpl_load_gtid_slave_state(thd))
9920 sql_print_warning("Failed to load slave replication state from table "
9921 "%s.%s: %u: %s", "mysql",
9922 rpl_gtid_slave_state_table_name.str,
9923 thd->get_stmt_da()->sql_errno(),
9924 thd->get_stmt_da()->message());
9925 #endif
9926
9927 mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
9928 binlog_background_thread_started= true;
9929 mysql_cond_signal(&mysql_bin_log.COND_binlog_background_thread_end);
9930 mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
9931
9932 for (;;)
9933 {
9934 /*
9935 Wait until there is something in the queue to process, or we are asked
9936 to shut down.
9937 */
9938 THD_STAGE_INFO(thd, stage_binlog_waiting_background_tasks);
9939 mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
9940 for (;;)
9941 {
9942 stop= binlog_background_thread_stop;
9943 queue= binlog_background_thread_queue;
9944 if (stop && !mysql_bin_log.is_xidlist_idle())
9945 {
9946 /*
9947 Delay stop until all pending binlog checkpoints have been processed.
9948 */
9949 stop= false;
9950 }
9951 if (stop || queue)
9952 break;
9953 mysql_cond_wait(&mysql_bin_log.COND_binlog_background_thread,
9954 &mysql_bin_log.LOCK_binlog_background_thread);
9955 }
9956 /* Grab the queue, if any. */
9957 binlog_background_thread_queue= NULL;
9958 mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
9959
9960 /* Process any incoming commit_checkpoint_notify() calls. */
9961 DBUG_EXECUTE_IF("inject_binlog_background_thread_before_mark_xid_done",
9962 DBUG_ASSERT(!debug_sync_set_action(
9963 thd,
9964 STRING_WITH_LEN("binlog_background_thread_before_mark_xid_done "
9965 "SIGNAL injected_binlog_background_thread "
9966 "WAIT_FOR something_that_will_never_happen "
9967 "TIMEOUT 2")));
9968 );
9969 while (queue)
9970 {
9971 long count= queue->notify_count;
9972 THD_STAGE_INFO(thd, stage_binlog_processing_checkpoint_notify);
9973 DEBUG_SYNC(thd, "binlog_background_thread_before_mark_xid_done");
9974 /* Set the thread start time */
9975 thd->set_time();
9976 /* Grab next pointer first, as mark_xid_done() may free the element. */
9977 next= queue->next_in_queue;
9978 queue->notify_count= 0;
9979 for (long i= 0; i <= count; i++)
9980 mysql_bin_log.mark_xid_done(queue->binlog_id, true);
9981 queue= next;
9982
9983 DBUG_EXECUTE_IF("binlog_background_checkpoint_processed",
9984 DBUG_ASSERT(!debug_sync_set_action(
9985 thd,
9986 STRING_WITH_LEN("now SIGNAL binlog_background_checkpoint_processed")));
9987 );
9988 }
9989
9990 if (stop)
9991 break;
9992 }
9993
9994 THD_STAGE_INFO(thd, stage_binlog_stopping_background_thread);
9995
9996 /* No need to use mutex as thd is not linked into other threads */
9997 delete thd;
9998
9999 my_thread_end();
10000
10001 /* Signal that we are (almost) stopped. */
10002 mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
10003 binlog_background_thread_stop= false;
10004 mysql_cond_signal(&mysql_bin_log.COND_binlog_background_thread_end);
10005 mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
10006
10007 DBUG_RETURN(0);
10008 }
10009
10010 #ifdef HAVE_PSI_INTERFACE
10011 static PSI_thread_key key_thread_binlog;
10012
10013 static PSI_thread_info all_binlog_threads[]=
10014 {
10015 { &key_thread_binlog, "binlog_background", PSI_FLAG_GLOBAL},
10016 };
10017 #endif /* HAVE_PSI_INTERFACE */
10018
10019 static bool
10020 start_binlog_background_thread()
10021 {
10022 pthread_t th;
10023
10024 #ifdef HAVE_PSI_INTERFACE
10025 if (PSI_server)
10026 PSI_server->register_thread("sql", all_binlog_threads,
10027 array_elements(all_binlog_threads));
10028 #endif
10029
10030 if (mysql_thread_create(key_thread_binlog, &th, &connection_attrib,
10031 binlog_background_thread, NULL))
10032 return 1;
10033
10034 /*
10035 Wait for the thread to have started (so we know that the slave replication
10036 state is loaded and we have correct global_gtid_counter).
10037 */
10038 mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
10039 while (!binlog_background_thread_started)
10040 mysql_cond_wait(&mysql_bin_log.COND_binlog_background_thread_end,
10041 &mysql_bin_log.LOCK_binlog_background_thread);
10042 mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
10043
10044 return 0;
10045 }
10046
10047
10048 int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
10049 IO_CACHE *first_log,
10050 Format_description_log_event *fdle, bool do_xa)
10051 {
10052 Log_event *ev= NULL;
10053 HASH xids;
10054 MEM_ROOT mem_root;
10055 char binlog_checkpoint_name[FN_REFLEN];
10056 bool binlog_checkpoint_found;
10057 bool first_round;
10058 IO_CACHE log;
10059 File file= -1;
10060 const char *errmsg;
10061 #ifdef HAVE_REPLICATION
10062 rpl_gtid last_gtid;
10063 bool last_gtid_standalone= false;
10064 bool last_gtid_valid= false;
10065 #endif
10066
10067 if (! fdle->is_valid() ||
10068 (do_xa && my_hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
10069 sizeof(my_xid), 0, 0, MYF(0))))
10070 goto err1;
10071
10072 if (do_xa)
10073 init_alloc_root(&mem_root, "TC_LOG_BINLOG", TC_LOG_PAGE_SIZE,
10074 TC_LOG_PAGE_SIZE, MYF(0));
10075
10076 fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
10077
10078 /*
10079 Scan the binlog for XIDs that need to be committed if still in the
10080 prepared stage.
10081
10082 Start with the latest binlog file, then continue with any other binlog
10083 files if the last found binlog checkpoint indicates it is needed.
10084 */
10085
10086 binlog_checkpoint_found= false;
10087 first_round= true;
10088 for (;;)
10089 {
10090 while ((ev= Log_event::read_log_event(first_round ? first_log : &log,
10091 fdle, opt_master_verify_checksum))
10092 && ev->is_valid())
10093 {
10094 enum Log_event_type typ= ev->get_type_code();
10095 switch (typ)
10096 {
10097 case XID_EVENT:
10098 {
10099 if (do_xa)
10100 {
10101 Xid_log_event *xev=(Xid_log_event *)ev;
10102 uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
10103 sizeof(xev->xid));
10104 if (!x || my_hash_insert(&xids, x))
10105 goto err2;
10106 }
10107 break;
10108 }
10109 case BINLOG_CHECKPOINT_EVENT:
10110 if (first_round && do_xa)
10111 {
10112 size_t dir_len;
10113 Binlog_checkpoint_log_event *cev= (Binlog_checkpoint_log_event *)ev;
10114 if (cev->binlog_file_len >= FN_REFLEN)
10115 sql_print_warning("Incorrect binlog checkpoint event with too "
10116 "long file name found.");
10117 else
10118 {
10119 /*
10120 Note that we cannot use make_log_name() here, as we have not yet
10121 initialised MYSQL_BIN_LOG::log_file_name.
10122 */
10123 dir_len= dirname_length(last_log_name);
10124 strmake(strnmov(binlog_checkpoint_name, last_log_name, dir_len),
10125 cev->binlog_file_name, FN_REFLEN - 1 - dir_len);
10126 binlog_checkpoint_found= true;
10127 }
10128 }
10129 break;
10130 case GTID_LIST_EVENT:
10131 if (first_round)
10132 {
10133 Gtid_list_log_event *glev= (Gtid_list_log_event *)ev;
10134
10135 /* Initialise the binlog state from the Gtid_list event. */
10136 if (rpl_global_gtid_binlog_state.load(glev->list, glev->count))
10137 goto err2;
10138 }
10139 break;
10140
10141 #ifdef HAVE_REPLICATION
10142 case GTID_EVENT:
10143 if (first_round)
10144 {
10145 Gtid_log_event *gev= (Gtid_log_event *)ev;
10146
10147 /* Update the binlog state with any GTID logged after Gtid_list. */
10148 last_gtid.domain_id= gev->domain_id;
10149 last_gtid.server_id= gev->server_id;
10150 last_gtid.seq_no= gev->seq_no;
10151 last_gtid_standalone=
10152 ((gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false);
10153 last_gtid_valid= true;
10154 }
10155 break;
10156 #endif
10157
10158 case START_ENCRYPTION_EVENT:
10159 {
10160 if (fdle->start_decryption((Start_encryption_log_event*) ev))
10161 goto err2;
10162 }
10163 break;
10164
10165 default:
10166 /* Nothing. */
10167 break;
10168 }
10169
10170 #ifdef HAVE_REPLICATION
10171 if (last_gtid_valid &&
10172 ((last_gtid_standalone && !ev->is_part_of_group(typ)) ||
10173 (!last_gtid_standalone &&
10174 (typ == XID_EVENT ||
10175 (LOG_EVENT_IS_QUERY(typ) &&
10176 (((Query_log_event *)ev)->is_commit() ||
10177 ((Query_log_event *)ev)->is_rollback()))))))
10178 {
10179 if (rpl_global_gtid_binlog_state.update_nolock(&last_gtid, false))
10180 goto err2;
10181 last_gtid_valid= false;
10182 }
10183 #endif
10184
10185 delete ev;
10186 ev= NULL;
10187 }
10188
10189 if (!do_xa)
10190 break;
10191 /*
10192 If the last binlog checkpoint event points to an older log, we have to
10193 scan all logs from there also, to get all possible XIDs to recover.
10194
10195 If there was no binlog checkpoint event at all, this means the log was
10196 written by an older version of MariaDB (or MySQL) - these always have an
10197 (implicit) binlog checkpoint event at the start of the last binlog file.
10198 */
10199 if (first_round)
10200 {
10201 if (!binlog_checkpoint_found)
10202 break;
10203 first_round= false;
10204 DBUG_EXECUTE_IF("xa_recover_expect_master_bin_000004",
10205 if (0 != strcmp("./master-bin.000004", binlog_checkpoint_name) &&
10206 0 != strcmp(".\\master-bin.000004", binlog_checkpoint_name))
10207 DBUG_SUICIDE();
10208 );
10209 if (find_log_pos(linfo, binlog_checkpoint_name, 1))
10210 {
10211 sql_print_error("Binlog file '%s' not found in binlog index, needed "
10212 "for recovery. Aborting.", binlog_checkpoint_name);
10213 goto err2;
10214 }
10215 }
10216 else
10217 {
10218 end_io_cache(&log);
10219 mysql_file_close(file, MYF(MY_WME));
10220 file= -1;
10221 }
10222
10223 if (!strcmp(linfo->log_file_name, last_log_name))
10224 break; // No more files to do
10225 if ((file= open_binlog(&log, linfo->log_file_name, &errmsg)) < 0)
10226 {
10227 sql_print_error("%s", errmsg);
10228 goto err2;
10229 }
10230 /*
10231 We do not need to read the Format_description_log_event of other binlog
10232 files. It is not possible for a binlog checkpoint to span multiple
10233 binlog files written by different versions of the server. So we can use
10234 the first one read for reading from all binlog files.
10235 */
10236 if (find_next_log(linfo, 1))
10237 {
10238 sql_print_error("Error reading binlog files during recovery. Aborting.");
10239 goto err2;
10240 }
10241 fdle->reset_crypto();
10242 }
10243
10244 if (do_xa)
10245 {
10246 if (ha_recover(&xids))
10247 goto err2;
10248
10249 free_root(&mem_root, MYF(0));
10250 my_hash_free(&xids);
10251 }
10252 return 0;
10253
10254 err2:
10255 delete ev;
10256 if (file >= 0)
10257 {
10258 end_io_cache(&log);
10259 mysql_file_close(file, MYF(MY_WME));
10260 }
10261 if (do_xa)
10262 {
10263 free_root(&mem_root, MYF(0));
10264 my_hash_free(&xids);
10265 }
10266 err1:
10267 sql_print_error("Crash recovery failed. Either correct the problem "
10268 "(if it's, for example, out of memory error) and restart, "
10269 "or delete (or rename) binary log and start mysqld with "
10270 "--tc-heuristic-recover={commit|rollback}");
10271 return 1;
10272 }
10273
10274
10275 int
10276 MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery)
10277 {
10278 LOG_INFO log_info;
10279 const char *errmsg;
10280 IO_CACHE log;
10281 File file;
10282 Log_event *ev= 0;
10283 Format_description_log_event fdle(BINLOG_VERSION);
10284 char log_name[FN_REFLEN];
10285 int error;
10286
10287 if (unlikely((error= find_log_pos(&log_info, NullS, 1))))
10288 {
10289 /*
10290 If there are no binlog files (LOG_INFO_EOF), then we still try to read
10291 the .state file to restore the binlog state. This allows to copy a server
10292 to provision a new one without copying the binlog files (except the
10293 master-bin.state file) and still preserve the correct binlog state.
10294 */
10295 if (error != LOG_INFO_EOF)
10296 sql_print_error("find_log_pos() failed (error: %d)", error);
10297 else
10298 {
10299 error= read_state_from_file();
10300 if (error == 2)
10301 {
10302 /*
10303 No binlog files and no binlog state is not an error (eg. just initial
10304 server start after fresh installation).
10305 */
10306 error= 0;
10307 }
10308 }
10309 return error;
10310 }
10311
10312 if (! fdle.is_valid())
10313 return 1;
10314
10315 do
10316 {
10317 strmake_buf(log_name, log_info.log_file_name);
10318 } while (!(error= find_next_log(&log_info, 1)));
10319
10320 if (error != LOG_INFO_EOF)
10321 {
10322 sql_print_error("find_log_pos() failed (error: %d)", error);
10323 return error;
10324 }
10325
10326 if ((file= open_binlog(&log, log_name, &errmsg)) < 0)
10327 {
10328 sql_print_error("%s", errmsg);
10329 return 1;
10330 }
10331
10332 if ((ev= Log_event::read_log_event(&log, &fdle,
10333 opt_master_verify_checksum)) &&
10334 ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
10335 {
10336 if (ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
10337 {
10338 sql_print_information("Recovering after a crash using %s", opt_name);
10339 error= recover(&log_info, log_name, &log,
10340 (Format_description_log_event *)ev, do_xa_recovery);
10341 }
10342 else
10343 {
10344 error= read_state_from_file();
10345 if (unlikely(error == 2))
10346 {
10347 /*
10348 The binlog exists, but the .state file is missing. This is normal if
10349 this is the first master start after a major upgrade to 10.0 (with
10350 GTID support).
10351
10352 However, it could also be that the .state file was lost somehow, and
10353 in this case it could be a serious issue, as we would set the wrong
10354 binlog state in the next binlog file to be created, and GTID
10355 processing would be corrupted. A common way would be copying files
10356 from an old server to a new one and forgetting the .state file.
10357
10358 So in this case, we want to try to recover the binlog state by
10359 scanning the last binlog file (but we do not need any XA recovery).
10360
10361 ToDo: We could avoid one scan at first start after major upgrade, by
10362 detecting that there is no GTID_LIST event at the start of the
10363 binlog file, and stopping the scan in that case.
10364 */
10365 error= recover(&log_info, log_name, &log,
10366 (Format_description_log_event *)ev, false);
10367 }
10368 }
10369 }
10370
10371 delete ev;
10372 end_io_cache(&log);
10373 mysql_file_close(file, MYF(MY_WME));
10374
10375 return error;
10376 }
10377
10378
10379 #ifdef INNODB_COMPATIBILITY_HOOKS
10380 /**
10381 Get the file name of the MySQL binlog.
10382 @return the name of the binlog file
10383 */
10384 extern "C"
10385 const char* mysql_bin_log_file_name(void)
10386 {
10387 return mysql_bin_log.get_log_fname();
10388 }
10389 /**
10390 Get the current position of the MySQL binlog.
10391 @return byte offset from the beginning of the binlog
10392 */
10393 extern "C"
10394 ulonglong mysql_bin_log_file_pos(void)
10395 {
10396 return (ulonglong) mysql_bin_log.get_log_file()->pos_in_file;
10397 }
10398 /*
10399 Get the current position of the MySQL binlog for transaction currently being
10400 committed.
10401
10402 This is valid to call from within storage engine commit_ordered() and
10403 commit() methods only.
10404
10405 Since it stores the position inside THD, it is safe to call without any
10406 locking.
10407 */
10408 void
10409 mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file)
10410 {
10411 binlog_cache_mngr *cache_mngr;
10412 if (opt_bin_log &&
10413 (cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton)))
10414 {
10415 *out_file= cache_mngr->last_commit_pos_file;
10416 *out_pos= (ulonglong)(cache_mngr->last_commit_pos_offset);
10417 }
10418 else
10419 {
10420 *out_file= NULL;
10421 *out_pos= 0;
10422 }
10423 }
10424 #endif /* INNODB_COMPATIBILITY_HOOKS */
10425
10426
10427 static void
10428 binlog_checksum_update(MYSQL_THD thd, struct st_mysql_sys_var *var,
10429 void *var_ptr, const void *save)
10430 {
10431 ulong value= *((ulong *)save);
10432 bool check_purge= false;
10433 ulong UNINIT_VAR(prev_binlog_id);
10434
10435 mysql_mutex_lock(mysql_bin_log.get_log_lock());
10436 if(mysql_bin_log.is_open())
10437 {
10438 prev_binlog_id= mysql_bin_log.current_binlog_id;
10439 if (binlog_checksum_options != value)
10440 mysql_bin_log.checksum_alg_reset= (enum_binlog_checksum_alg)value;
10441 if (mysql_bin_log.rotate(true, &check_purge))
10442 check_purge= false;
10443 }
10444 else
10445 {
10446 binlog_checksum_options= value;
10447 }
10448 DBUG_ASSERT(binlog_checksum_options == value);
10449 mysql_bin_log.checksum_alg_reset= BINLOG_CHECKSUM_ALG_UNDEF;
10450 mysql_mutex_unlock(mysql_bin_log.get_log_lock());
10451 if (check_purge)
10452 mysql_bin_log.checkpoint_and_purge(prev_binlog_id);
10453 }
10454
10455
10456 static int show_binlog_vars(THD *thd, SHOW_VAR *var, void *,
10457 system_status_var *status_var, enum_var_type)
10458 {
10459 mysql_bin_log.set_status_variables(thd);
10460 var->type= SHOW_ARRAY;
10461 var->value= (char *)&binlog_status_vars_detail;
10462 return 0;
10463 }
10464
10465 static SHOW_VAR binlog_status_vars_top[]= {
10466 {"Binlog", (char *) &show_binlog_vars, SHOW_FUNC},
10467 {NullS, NullS, SHOW_LONG}
10468 };
10469
10470 static MYSQL_SYSVAR_BOOL(
10471 optimize_thread_scheduling,
10472 opt_optimize_thread_scheduling,
10473 PLUGIN_VAR_READONLY,
10474 "Run fast part of group commit in a single thread, to optimize kernel "
10475 "thread scheduling. On by default. Disable to run each transaction in group "
10476 "commit in its own thread, which can be slower at very high concurrency. "
10477 "This option is mostly for testing one algorithm versus the other, and it "
10478 "should not normally be necessary to change it.",
10479 NULL,
10480 NULL,
10481 1);
10482
10483 static MYSQL_SYSVAR_ENUM(
10484 checksum,
10485 binlog_checksum_options,
10486 PLUGIN_VAR_RQCMDARG,
10487 "Type of BINLOG_CHECKSUM_ALG. Include checksum for "
10488 "log events in the binary log",
10489 NULL,
10490 binlog_checksum_update,
10491 BINLOG_CHECKSUM_ALG_CRC32,
10492 &binlog_checksum_typelib);
10493
10494 static struct st_mysql_sys_var *binlog_sys_vars[]=
10495 {
10496 MYSQL_SYSVAR(optimize_thread_scheduling),
10497 MYSQL_SYSVAR(checksum),
10498 NULL
10499 };
10500
10501
10502 /*
10503 Copy out the non-directory part of binlog position filename for the
10504 `binlog_snapshot_file' status variable, same way as it is done for
10505 SHOW MASTER STATUS.
10506 */
10507 static void
10508 set_binlog_snapshot_file(const char *src)
10509 {
10510 size_t dir_len = dirname_length(src);
10511 strmake_buf(binlog_snapshot_file, src + dir_len);
10512 }
10513
10514 /*
10515 Copy out current values of status variables, for SHOW STATUS or
10516 information_schema.global_status.
10517
10518 This is called only under LOCK_show_status, so we can fill in a static array.
10519 */
10520 void
10521 TC_LOG_BINLOG::set_status_variables(THD *thd)
10522 {
10523 binlog_cache_mngr *cache_mngr;
10524
10525 if (thd && opt_bin_log)
10526 cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10527 else
10528 cache_mngr= 0;
10529
10530 bool have_snapshot= (cache_mngr && cache_mngr->last_commit_pos_file[0] != 0);
10531 mysql_mutex_lock(&LOCK_commit_ordered);
10532 binlog_status_var_num_commits= this->num_commits;
10533 binlog_status_var_num_group_commits= this->num_group_commits;
10534 if (!have_snapshot)
10535 {
10536 set_binlog_snapshot_file(last_commit_pos_file);
10537 binlog_snapshot_position= last_commit_pos_offset;
10538 }
10539 mysql_mutex_unlock(&LOCK_commit_ordered);
10540 mysql_mutex_lock(&LOCK_prepare_ordered);
10541 binlog_status_group_commit_trigger_count= this->group_commit_trigger_count;
10542 binlog_status_group_commit_trigger_timeout= this->group_commit_trigger_timeout;
10543 binlog_status_group_commit_trigger_lock_wait= this->group_commit_trigger_lock_wait;
10544 mysql_mutex_unlock(&LOCK_prepare_ordered);
10545
10546 if (have_snapshot)
10547 {
10548 set_binlog_snapshot_file(cache_mngr->last_commit_pos_file);
10549 binlog_snapshot_position= cache_mngr->last_commit_pos_offset;
10550 }
10551 }
10552
10553
10554 /*
10555 Find the Gtid_list_log_event at the start of a binlog.
10556
10557 NULL for ok, non-NULL error message for error.
10558
10559 If ok, then the event is returned in *out_gtid_list. This can be NULL if we
10560 get back to binlogs written by old server version without GTID support. If
10561 so, it means we have reached the point to start from, as no GTID events can
10562 exist in earlier binlogs.
10563 */
10564 const char *
10565 get_gtid_list_event(IO_CACHE *cache, Gtid_list_log_event **out_gtid_list)
10566 {
10567 Format_description_log_event init_fdle(BINLOG_VERSION);
10568 Format_description_log_event *fdle;
10569 Log_event *ev;
10570 const char *errormsg = NULL;
10571
10572 *out_gtid_list= NULL;
10573
10574 if (!(ev= Log_event::read_log_event(cache, &init_fdle,
10575 opt_master_verify_checksum)) ||
10576 ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
10577 {
10578 if (ev)
10579 delete ev;
10580 return "Could not read format description log event while looking for "
10581 "GTID position in binlog";
10582 }
10583
10584 fdle= static_cast<Format_description_log_event *>(ev);
10585
10586 for (;;)
10587 {
10588 Log_event_type typ;
10589
10590 ev= Log_event::read_log_event(cache, fdle, opt_master_verify_checksum);
10591 if (!ev)
10592 {
10593 errormsg= "Could not read GTID list event while looking for GTID "
10594 "position in binlog";
10595 break;
10596 }
10597 typ= ev->get_type_code();
10598 if (typ == GTID_LIST_EVENT)
10599 break; /* Done, found it */
10600 if (typ == START_ENCRYPTION_EVENT)
10601 {
10602 if (fdle->start_decryption((Start_encryption_log_event*) ev))
10603 errormsg= "Could not set up decryption for binlog.";
10604 }
10605 delete ev;
10606 if (typ == ROTATE_EVENT || typ == STOP_EVENT ||
10607 typ == FORMAT_DESCRIPTION_EVENT || typ == START_ENCRYPTION_EVENT)
10608 continue; /* Continue looking */
10609
10610 /* We did not find any Gtid_list_log_event, must be old binlog. */
10611 ev= NULL;
10612 break;
10613 }
10614
10615 delete fdle;
10616 *out_gtid_list= static_cast<Gtid_list_log_event *>(ev);
10617 return errormsg;
10618 }
10619
10620
10621 struct st_mysql_storage_engine binlog_storage_engine=
10622 { MYSQL_HANDLERTON_INTERFACE_VERSION };
10623
10624 maria_declare_plugin(binlog)
10625 {
10626 MYSQL_STORAGE_ENGINE_PLUGIN,
10627 &binlog_storage_engine,
10628 "binlog",
10629 "MySQL AB",
10630 "This is a pseudo storage engine to represent the binlog in a transaction",
10631 PLUGIN_LICENSE_GPL,
10632 binlog_init, /* Plugin Init */
10633 NULL, /* Plugin Deinit */
10634 0x0100 /* 1.0 */,
10635 binlog_status_vars_top, /* status variables */
10636 binlog_sys_vars, /* system variables */
10637 "1.0", /* string version */
10638 MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
10639 }
10640 maria_declare_plugin_end;
10641
10642 #ifdef WITH_WSREP
10643 IO_CACHE * get_trans_log(THD * thd)
10644 {
10645 DBUG_ASSERT(binlog_hton->slot != HA_SLOT_UNDEF);
10646 binlog_cache_mngr *cache_mngr = (binlog_cache_mngr*)
10647 thd_get_ha_data(thd, binlog_hton);
10648 if (cache_mngr)
10649 return cache_mngr->get_binlog_cache_log(true);
10650
10651 WSREP_DEBUG("binlog cache not initialized, conn: %llu",
10652 thd->thread_id);
10653 return NULL;
10654 }
10655
10656
10657 bool wsrep_trans_cache_is_empty(THD *thd)
10658 {
10659 binlog_cache_mngr *const cache_mngr=
10660 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10661 return (!cache_mngr || cache_mngr->trx_cache.empty());
10662 }
10663
10664
10665 void thd_binlog_trx_reset(THD * thd)
10666 {
10667 /*
10668 todo: fix autocommit select to not call the caller
10669 */
10670 if (thd_get_ha_data(thd, binlog_hton) != NULL)
10671 {
10672 binlog_cache_mngr *const cache_mngr=
10673 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10674 if (cache_mngr)
10675 {
10676 cache_mngr->reset(false, true);
10677 if (!cache_mngr->stmt_cache.empty())
10678 {
10679 WSREP_DEBUG("pending events in stmt cache, sql: %s", thd->query());
10680 cache_mngr->stmt_cache.reset();
10681 }
10682 }
10683 }
10684 thd->clear_binlog_table_maps();
10685 }
10686
10687
10688 void thd_binlog_rollback_stmt(THD * thd)
10689 {
10690 WSREP_DEBUG("thd_binlog_rollback_stmt connection: %llu",
10691 thd->thread_id);
10692 binlog_cache_mngr *const cache_mngr=
10693 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10694 if (cache_mngr)
10695 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
10696 }
10697 #endif /* WITH_WSREP */
10698