1 /* Copyright (c) 2000, 2018, Oracle and/or its affiliates.
2    Copyright (c) 2009, 2020, MariaDB Corporation.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA */
16 
17 
18 /**
19   @file
20 
21   @brief
22   logging of commands
23 
24   @todo
25     Abort logging when we get an error in reading or writing log files
26 */
27 
28 #include "mariadb.h"		/* NO_EMBEDDED_ACCESS_CHECKS */
29 #include "sql_priv.h"
30 #include "log.h"
31 #include "sql_base.h"                           // open_log_table
32 #include "sql_repl.h"
33 #include "sql_delete.h"                         // mysql_truncate
34 #include "sql_parse.h"                          // command_name
35 #include "sql_time.h"           // calc_time_from_sec, my_time_compare
36 #include "tztime.h"             // my_tz_OFFSET0, struct Time_zone
37 #include "sql_acl.h"            // SUPER_ACL
38 #include "log_event.h"          // Query_log_event
39 #include "rpl_filter.h"
40 #include "rpl_rli.h"
41 #include "sql_audit.h"
42 #include "mysqld.h"
43 
44 #include <my_dir.h>
45 #include <m_ctype.h>				// For test_if_number
46 
47 #include <set_var.h> // for Sys_last_gtid_ptr
48 
49 #ifdef _WIN32
50 #include "message.h"
51 #endif
52 
53 #include "sql_plugin.h"
54 #include "debug_sync.h"
55 #include "sql_show.h"
56 #include "my_pthread.h"
57 #include "semisync_master.h"
58 #include "sp_rcontext.h"
59 #include "sp_head.h"
60 
61 #include "wsrep_mysqld.h"
62 #ifdef WITH_WSREP
63 #include "wsrep_trans_observer.h"
64 #endif /* WITH_WSREP */
65 
66 /* max size of the log message */
67 #define MAX_LOG_BUFFER_SIZE 1024
68 #define MAX_TIME_SIZE 32
69 #define MY_OFF_T_UNDEF (~(my_off_t)0UL)
70 /* Truncate cache log files bigger than this */
71 #define CACHE_FILE_TRUNC_SIZE 65536
72 
73 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
74 
75 handlerton *binlog_hton;
76 LOGGER logger;
77 
78 const char *log_bin_index= 0;
79 const char *log_bin_basename= 0;
80 
81 MYSQL_BIN_LOG mysql_bin_log(&sync_binlog_period);
82 
83 static bool test_if_number(const char *str,
84 			   ulong *res, bool allow_wildcards);
85 static int binlog_init(void *p);
86 static int binlog_close_connection(handlerton *hton, THD *thd);
87 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
88 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
89 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
90                                                       THD *thd);
91 static int binlog_commit(handlerton *hton, THD *thd, bool all);
92 static int binlog_rollback(handlerton *hton, THD *thd, bool all);
93 static int binlog_prepare(handlerton *hton, THD *thd, bool all);
94 static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd);
95 
96 static const LEX_CSTRING write_error_msg=
97     { STRING_WITH_LEN("error writing to the binary log") };
98 
99 static my_bool opt_optimize_thread_scheduling= TRUE;
100 ulong binlog_checksum_options;
101 #ifndef DBUG_OFF
102 ulong opt_binlog_dbug_fsync_sleep= 0;
103 #endif
104 
105 mysql_mutex_t LOCK_prepare_ordered;
106 mysql_cond_t COND_prepare_ordered;
107 mysql_mutex_t LOCK_after_binlog_sync;
108 mysql_mutex_t LOCK_commit_ordered;
109 
110 static ulonglong binlog_status_var_num_commits;
111 static ulonglong binlog_status_var_num_group_commits;
112 static ulonglong binlog_status_group_commit_trigger_count;
113 static ulonglong binlog_status_group_commit_trigger_lock_wait;
114 static ulonglong binlog_status_group_commit_trigger_timeout;
115 static char binlog_snapshot_file[FN_REFLEN];
116 static ulonglong binlog_snapshot_position;
117 
118 static const char *fatal_log_error=
119   "Could not use %s for logging (error %d). "
120   "Turning logging off for the whole duration of the MariaDB server process. "
121   "To turn it on again: fix the cause, shutdown the MariaDB server and "
122   "restart it.";
123 
124 
125 static SHOW_VAR binlog_status_vars_detail[]=
126 {
127   {"commits",
128     (char *)&binlog_status_var_num_commits, SHOW_LONGLONG},
129   {"group_commits",
130     (char *)&binlog_status_var_num_group_commits, SHOW_LONGLONG},
131   {"group_commit_trigger_count",
132     (char *)&binlog_status_group_commit_trigger_count, SHOW_LONGLONG},
133   {"group_commit_trigger_lock_wait",
134     (char *)&binlog_status_group_commit_trigger_lock_wait, SHOW_LONGLONG},
135   {"group_commit_trigger_timeout",
136     (char *)&binlog_status_group_commit_trigger_timeout, SHOW_LONGLONG},
137   {"snapshot_file",
138     (char *)&binlog_snapshot_file, SHOW_CHAR},
139   {"snapshot_position",
140    (char *)&binlog_snapshot_position, SHOW_LONGLONG},
141   {NullS, NullS, SHOW_LONG}
142 };
143 
144 /*
145   Variables for the binlog background thread.
146   Protected by the MYSQL_BIN_LOG::LOCK_binlog_background_thread mutex.
147  */
148 static bool binlog_background_thread_started= false;
149 static bool binlog_background_thread_stop= false;
150 static MYSQL_BIN_LOG::xid_count_per_binlog *
151     binlog_background_thread_queue= NULL;
152 
153 static bool start_binlog_background_thread();
154 
155 static rpl_binlog_state rpl_global_gtid_binlog_state;
156 
setup_log_handling()157 void setup_log_handling()
158 {
159   rpl_global_gtid_binlog_state.init();
160 }
161 
162 
163 /**
164    purge logs, master and slave sides both, related error code
165    converter.
166    Called from @c purge_error_message(), @c MYSQL_BIN_LOG::reset_logs()
167 
168    @param  res  an internal to purging routines error code
169 
170    @return the user level error code ER_*
171 */
purge_log_get_error_code(int res)172 uint purge_log_get_error_code(int res)
173 {
174   uint errcode= 0;
175 
176   switch (res)  {
177   case 0: break;
178   case LOG_INFO_EOF:	errcode= ER_UNKNOWN_TARGET_BINLOG; break;
179   case LOG_INFO_IO:	errcode= ER_IO_ERR_LOG_INDEX_READ; break;
180   case LOG_INFO_INVALID:errcode= ER_BINLOG_PURGE_PROHIBITED; break;
181   case LOG_INFO_SEEK:	errcode= ER_FSEEK_FAIL; break;
182   case LOG_INFO_MEM:	errcode= ER_OUT_OF_RESOURCES; break;
183   case LOG_INFO_FATAL:	errcode= ER_BINLOG_PURGE_FATAL_ERR; break;
184   case LOG_INFO_IN_USE: errcode= ER_LOG_IN_USE; break;
185   case LOG_INFO_EMFILE: errcode= ER_BINLOG_PURGE_EMFILE; break;
186   default:		errcode= ER_LOG_PURGE_UNKNOWN_ERR; break;
187   }
188 
189   return errcode;
190 }
191 
192 /**
193   Silence all errors and warnings reported when performing a write
194   to a log table.
195   Errors and warnings are not reported to the client or SQL exception
196   handlers, so that the presence of logging does not interfere and affect
197   the logic of an application.
198 */
199 class Silence_log_table_errors : public Internal_error_handler
200 {
201   char m_message[MYSQL_ERRMSG_SIZE];
202 public:
Silence_log_table_errors()203   Silence_log_table_errors()
204   {
205     m_message[0]= '\0';
206   }
207 
~Silence_log_table_errors()208   virtual ~Silence_log_table_errors() {}
209 
210   virtual bool handle_condition(THD *thd,
211                                 uint sql_errno,
212                                 const char* sql_state,
213                                 Sql_condition::enum_warning_level *level,
214                                 const char* msg,
215                                 Sql_condition ** cond_hdl);
message() const216   const char *message() const { return m_message; }
217 };
218 
219 bool
handle_condition(THD *,uint,const char *,Sql_condition::enum_warning_level *,const char * msg,Sql_condition ** cond_hdl)220 Silence_log_table_errors::handle_condition(THD *,
221                                            uint,
222                                            const char*,
223                                            Sql_condition::enum_warning_level*,
224                                            const char* msg,
225                                            Sql_condition ** cond_hdl)
226 {
227   *cond_hdl= NULL;
228   strmake_buf(m_message, msg);
229   return TRUE;
230 }
231 
232 sql_print_message_func sql_print_message_handlers[3] =
233 {
234   sql_print_information,
235   sql_print_warning,
236   sql_print_error
237 };
238 
239 
240 /**
241   Create the name of the log file
242 
243   @param[OUT] out    a pointer to a new allocated name will go there
244   @param[IN] log_ext The extension for the file (e.g .log)
245   @param[IN] once    whether to use malloc_once or a normal malloc.
246 */
make_default_log_name(char ** out,const char * log_ext,bool once)247 void make_default_log_name(char **out, const char* log_ext, bool once)
248 {
249   char buff[FN_REFLEN+10];
250   fn_format(buff, opt_log_basename, "", log_ext, MYF(MY_REPLACE_EXT));
251   if (once)
252     *out= my_once_strdup(buff, MYF(MY_WME));
253   else
254   {
255     my_free(*out);
256     *out= my_strdup(buff, MYF(MY_WME));
257   }
258 }
259 
260 
261 /*
262   Helper classes to store non-transactional and transactional data
263   before copying it to the binary log.
264 */
265 class binlog_cache_data
266 {
267 public:
binlog_cache_data()268   binlog_cache_data(): m_pending(0), status(0),
269   before_stmt_pos(MY_OFF_T_UNDEF),
270   incident(FALSE),
271   saved_max_binlog_cache_size(0), ptr_binlog_cache_use(0),
272   ptr_binlog_cache_disk_use(0)
273   { }
274 
~binlog_cache_data()275   ~binlog_cache_data()
276   {
277     DBUG_ASSERT(empty());
278     close_cached_file(&cache_log);
279   }
280 
281   /*
282     Return 1 if there is no relevant entries in the cache
283 
284     This is:
285     - Cache is empty
286     - There are row or critical (DDL?) events in the cache
287 
288     The status test is needed to avoid writing entries with only
289     a table map entry, which would crash in do_apply_event() on the slave
290     as it assumes that there is always a row entry after a table map.
291   */
empty() const292   bool empty() const
293   {
294     return (pending() == NULL &&
295             (my_b_write_tell(&cache_log) == 0 ||
296              ((status & (LOGGED_ROW_EVENT | LOGGED_CRITICAL)) == 0)));
297   }
298 
pending() const299   Rows_log_event *pending() const
300   {
301     return m_pending;
302   }
303 
set_pending(Rows_log_event * const pending_arg)304   void set_pending(Rows_log_event *const pending_arg)
305   {
306     m_pending= pending_arg;
307   }
308 
set_incident(void)309   void set_incident(void)
310   {
311     incident= TRUE;
312   }
313 
has_incident(void)314   bool has_incident(void)
315   {
316     return(incident);
317   }
318 
reset()319   void reset()
320   {
321     bool cache_was_empty= empty();
322     bool truncate_file= (cache_log.file != -1 &&
323                          my_b_write_tell(&cache_log) > CACHE_FILE_TRUNC_SIZE);
324     truncate(0,1);                              // Forget what's in cache
325     if (!cache_was_empty)
326       compute_statistics();
327     if (truncate_file)
328       my_chsize(cache_log.file, 0, 0, MYF(MY_WME));
329 
330     status= 0;
331     incident= FALSE;
332     before_stmt_pos= MY_OFF_T_UNDEF;
333     DBUG_ASSERT(empty());
334   }
335 
get_byte_position() const336   my_off_t get_byte_position() const
337   {
338     return my_b_tell(&cache_log);
339   }
340 
get_prev_position()341   my_off_t get_prev_position()
342   {
343      return(before_stmt_pos);
344   }
345 
set_prev_position(my_off_t pos)346   void set_prev_position(my_off_t pos)
347   {
348      before_stmt_pos= pos;
349   }
350 
restore_prev_position()351   void restore_prev_position()
352   {
353     truncate(before_stmt_pos);
354   }
355 
restore_savepoint(my_off_t pos)356   void restore_savepoint(my_off_t pos)
357   {
358     truncate(pos);
359     if (pos < before_stmt_pos)
360       before_stmt_pos= MY_OFF_T_UNDEF;
361   }
362 
set_binlog_cache_info(my_off_t param_max_binlog_cache_size,ulong * param_ptr_binlog_cache_use,ulong * param_ptr_binlog_cache_disk_use)363   void set_binlog_cache_info(my_off_t param_max_binlog_cache_size,
364                              ulong *param_ptr_binlog_cache_use,
365                              ulong *param_ptr_binlog_cache_disk_use)
366   {
367     /*
368       The assertions guarantee that the set_binlog_cache_info is
369       called just once and information passed as parameters are
370       never zero.
371 
372       This is done while calling the constructor binlog_cache_mngr.
373       We cannot set information in the constructor binlog_cache_data
374       because the space for binlog_cache_mngr is allocated through
375       a placement new.
376 
377       In the future, we can refactor this and change it to avoid
378       the set_binlog_info.
379     */
380     DBUG_ASSERT(saved_max_binlog_cache_size == 0 &&
381                 param_max_binlog_cache_size != 0 &&
382                 ptr_binlog_cache_use == 0 &&
383                 param_ptr_binlog_cache_use != 0 &&
384                 ptr_binlog_cache_disk_use == 0 &&
385                 param_ptr_binlog_cache_disk_use != 0);
386 
387     saved_max_binlog_cache_size= param_max_binlog_cache_size;
388     ptr_binlog_cache_use= param_ptr_binlog_cache_use;
389     ptr_binlog_cache_disk_use= param_ptr_binlog_cache_disk_use;
390     cache_log.end_of_file= saved_max_binlog_cache_size;
391   }
392 
add_status(enum_logged_status status_arg)393   void add_status(enum_logged_status status_arg)
394   {
395     status|= status_arg;
396   }
397 
398   /*
399     Cache to store data before copying it to the binary log.
400   */
401   IO_CACHE cache_log;
402 
403 private:
404   /*
405     Pending binrows event. This event is the event where the rows are currently
406     written.
407    */
408   Rows_log_event *m_pending;
409 
410   /*
411     Bit flags for what has been writting to cache. Used to
412     discard logs without any data changes.
413     see enum_logged_status;
414   */
415   uint32 status;
416 
417   /*
418     Binlog position before the start of the current statement.
419   */
420   my_off_t before_stmt_pos;
421 
422   /*
423     This indicates that some events did not get into the cache and most likely
424     it is corrupted.
425   */
426   bool incident;
427 
428   /**
429     This function computes binlog cache and disk usage.
430   */
compute_statistics()431   void compute_statistics()
432   {
433     statistic_increment(*ptr_binlog_cache_use, &LOCK_status);
434     if (cache_log.disk_writes != 0)
435     {
436 #ifdef REAL_STATISTICS
437       statistic_add(*ptr_binlog_cache_disk_use,
438                     cache_log.disk_writes, &LOCK_status);
439 #else
440       statistic_increment(*ptr_binlog_cache_disk_use, &LOCK_status);
441 #endif
442       cache_log.disk_writes= 0;
443     }
444   }
445 
446   /*
447     Stores the values of maximum size of the cache allowed when this cache
448     is configured. This corresponds to either
449       . max_binlog_cache_size or max_binlog_stmt_cache_size.
450   */
451   my_off_t saved_max_binlog_cache_size;
452 
453   /*
454     Stores a pointer to the status variable that keeps track of the in-memory
455     cache usage. This corresponds to either
456       . binlog_cache_use or binlog_stmt_cache_use.
457   */
458   ulong *ptr_binlog_cache_use;
459 
460   /*
461     Stores a pointer to the status variable that keeps track of the disk
462     cache usage. This corresponds to either
463       . binlog_cache_disk_use or binlog_stmt_cache_disk_use.
464   */
465   ulong *ptr_binlog_cache_disk_use;
466 
467   /*
468     It truncates the cache to a certain position. This includes deleting the
469     pending event.
470    */
truncate(my_off_t pos,bool reset_cache=0)471   void truncate(my_off_t pos, bool reset_cache=0)
472   {
473     DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos));
474     cache_log.error=0;
475     if (pending())
476     {
477       delete pending();
478       set_pending(0);
479     }
480     reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, reset_cache);
481     cache_log.end_of_file= saved_max_binlog_cache_size;
482   }
483 
484   binlog_cache_data& operator=(const binlog_cache_data& info);
485   binlog_cache_data(const binlog_cache_data& info);
486 };
487 
488 
add_status(enum_logged_status status)489 void Log_event_writer::add_status(enum_logged_status status)
490 {
491   if (likely(cache_data))
492     cache_data->add_status(status);
493 }
494 
495 class binlog_cache_mngr {
496 public:
binlog_cache_mngr(my_off_t param_max_binlog_stmt_cache_size,my_off_t param_max_binlog_cache_size,ulong * param_ptr_binlog_stmt_cache_use,ulong * param_ptr_binlog_stmt_cache_disk_use,ulong * param_ptr_binlog_cache_use,ulong * param_ptr_binlog_cache_disk_use)497   binlog_cache_mngr(my_off_t param_max_binlog_stmt_cache_size,
498                     my_off_t param_max_binlog_cache_size,
499                     ulong *param_ptr_binlog_stmt_cache_use,
500                     ulong *param_ptr_binlog_stmt_cache_disk_use,
501                     ulong *param_ptr_binlog_cache_use,
502                     ulong *param_ptr_binlog_cache_disk_use)
503     : last_commit_pos_offset(0), using_xa(FALSE), xa_xid(0)
504   {
505      stmt_cache.set_binlog_cache_info(param_max_binlog_stmt_cache_size,
506                                       param_ptr_binlog_stmt_cache_use,
507                                       param_ptr_binlog_stmt_cache_disk_use);
508      trx_cache.set_binlog_cache_info(param_max_binlog_cache_size,
509                                      param_ptr_binlog_cache_use,
510                                      param_ptr_binlog_cache_disk_use);
511      last_commit_pos_file[0]= 0;
512   }
513 
reset(bool do_stmt,bool do_trx)514   void reset(bool do_stmt, bool do_trx)
515   {
516     if (do_stmt)
517       stmt_cache.reset();
518     if (do_trx)
519     {
520       trx_cache.reset();
521       using_xa= FALSE;
522       last_commit_pos_file[0]= 0;
523       last_commit_pos_offset= 0;
524     }
525   }
526 
get_binlog_cache_data(bool is_transactional)527   binlog_cache_data* get_binlog_cache_data(bool is_transactional)
528   {
529     return (is_transactional ? &trx_cache : &stmt_cache);
530   }
531 
get_binlog_cache_log(bool is_transactional)532   IO_CACHE* get_binlog_cache_log(bool is_transactional)
533   {
534     return (is_transactional ? &trx_cache.cache_log : &stmt_cache.cache_log);
535   }
536 
537   binlog_cache_data stmt_cache;
538 
539   binlog_cache_data trx_cache;
540 
541   /*
542     Binlog position for current transaction.
543     For START TRANSACTION WITH CONSISTENT SNAPSHOT, this is the binlog
544     position corresponding to the snapshot taken. During (and after) commit,
545     this is set to the binlog position corresponding to just after the
546     commit (so storage engines can store it in their transaction log).
547   */
548   char last_commit_pos_file[FN_REFLEN];
549   my_off_t last_commit_pos_offset;
550 
551   /*
552     Flag set true if this transaction is committed with log_xid() as part of
553     XA, false if not.
554   */
555   bool using_xa;
556   my_xid xa_xid;
557   bool need_unlog;
558   /*
559     Id of binlog that transaction was written to; only needed if need_unlog is
560     true.
561   */
562   ulong binlog_id;
563   /* Set if we get an error during commit that must be returned from unlog(). */
564   bool delayed_error;
565 
566 private:
567 
568   binlog_cache_mngr& operator=(const binlog_cache_mngr& info);
569   binlog_cache_mngr(const binlog_cache_mngr& info);
570 };
571 
is_log_table_enabled(uint log_table_type)572 bool LOGGER::is_log_table_enabled(uint log_table_type)
573 {
574   switch (log_table_type) {
575   case QUERY_LOG_SLOW:
576     return (table_log_handler != NULL) && global_system_variables.sql_log_slow
577             && (log_output_options & LOG_TABLE);
578   case QUERY_LOG_GENERAL:
579     return (table_log_handler != NULL) && opt_log
580             && (log_output_options & LOG_TABLE);
581   default:
582     DBUG_ASSERT(0);
583     return FALSE;                             /* make compiler happy */
584   }
585 }
586 
587 /**
588    Check if a given table is opened log table
589 
590    @param table             Table to check
591    @param check_if_opened   Only fail if it's a log table in use
592    @param error_msg	    String to put in error message if not ok.
593                             No error message if 0
594    @return 0 ok
595    @return # Type of log file
596  */
597 
check_if_log_table(const TABLE_LIST * table,bool check_if_opened,const char * error_msg)598 int check_if_log_table(const TABLE_LIST *table,
599                        bool check_if_opened,
600                        const char *error_msg)
601 {
602   int result= 0;
603   if (table->db.length == 5 &&
604       !my_strcasecmp(table_alias_charset, table->db.str, "mysql"))
605   {
606     const char *table_name= table->table_name.str;
607 
608     if (table->table_name.length == 11 &&
609         !my_strcasecmp(table_alias_charset, table_name, "general_log"))
610     {
611       result= QUERY_LOG_GENERAL;
612       goto end;
613     }
614 
615     if (table->table_name.length == 8 &&
616         !my_strcasecmp(table_alias_charset, table_name, "slow_log"))
617     {
618       result= QUERY_LOG_SLOW;
619       goto end;
620     }
621   }
622   return 0;
623 
624 end:
625   if (!check_if_opened || logger.is_log_table_enabled(result))
626   {
627     if (error_msg)
628       my_error(ER_BAD_LOG_STATEMENT, MYF(0), error_msg);
629     return result;
630   }
631   return 0;
632 }
633 
634 
Log_to_csv_event_handler()635 Log_to_csv_event_handler::Log_to_csv_event_handler()
636 {
637 }
638 
639 
~Log_to_csv_event_handler()640 Log_to_csv_event_handler::~Log_to_csv_event_handler()
641 {
642 }
643 
644 
cleanup()645 void Log_to_csv_event_handler::cleanup()
646 {
647   logger.is_log_tables_initialized= FALSE;
648 }
649 
650 /* log event handlers */
651 
652 /**
653   Log command to the general log table
654 
655   Log given command to the general log table.
656 
657   @param  event_time        command start timestamp
658   @param  user_host         the pointer to the string with user@host info
659   @param  user_host_len     length of the user_host string. this is computed
660                             once and passed to all general log event handlers
661   @param  thread_id         Id of the thread, issued a query
662   @param  command_type      the type of the command being logged
663   @param  command_type_len  the length of the string above
664   @param  sql_text          the very text of the query being executed
665   @param  sql_text_len      the length of sql_text string
666 
667 
668   @return This function attempts to never call my_error(). This is
669   necessary, because general logging happens already after a statement
670   status has been sent to the client, so the client can not see the
671   error anyway. Besides, the error is not related to the statement
672   being executed and is internal, and thus should be handled
673   internally (@todo: how?).
674   If a write to the table has failed, the function attempts to
675   write to a short error message to the file. The failure is also
676   indicated in the return value.
677 
678   @retval  FALSE   OK
679   @retval  TRUE    error occurred
680 */
681 
682 bool Log_to_csv_event_handler::
log_general(THD * thd,my_hrtime_t event_time,const char * user_host,size_t user_host_len,my_thread_id thread_id_arg,const char * command_type,size_t command_type_len,const char * sql_text,size_t sql_text_len,CHARSET_INFO * client_cs)683   log_general(THD *thd, my_hrtime_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg,
684               const char *command_type, size_t command_type_len,
685               const char *sql_text, size_t sql_text_len,
686               CHARSET_INFO *client_cs)
687 {
688   TABLE_LIST table_list;
689   TABLE *table;
690   bool result= TRUE;
691   bool need_close= FALSE;
692   bool need_pop= FALSE;
693   bool need_rnd_end= FALSE;
694   uint field_index;
695   Silence_log_table_errors error_handler;
696   Open_tables_backup open_tables_backup;
697   ulonglong save_thd_options;
698   bool save_time_zone_used;
699   DBUG_ENTER("log_general");
700 
701   /*
702     CSV uses TIME_to_timestamp() internally if table needs to be repaired
703     which will set thd->time_zone_used
704   */
705   save_time_zone_used= thd->time_zone_used;
706 
707   save_thd_options= thd->variables.option_bits;
708   thd->variables.option_bits&= ~OPTION_BIN_LOG;
709 
710   table_list.init_one_table(&MYSQL_SCHEMA_NAME, &GENERAL_LOG_NAME, 0,
711                             TL_WRITE_CONCURRENT_INSERT);
712 
713   /*
714     1) open_log_table generates an error of the
715     table can not be opened or is corrupted.
716     2) "INSERT INTO general_log" can generate warning sometimes.
717 
718     Suppress these warnings and errors, they can't be dealt with
719     properly anyway.
720 
721     QQ: this problem needs to be studied in more detail.
722     Comment this 2 lines and run "cast.test" to see what's happening.
723   */
724   thd->push_internal_handler(& error_handler);
725   need_pop= TRUE;
726 
727   if (!(table= open_log_table(thd, &table_list, &open_tables_backup)))
728     goto err;
729 
730   need_close= TRUE;
731 
732   if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) ||
733       table->file->ha_rnd_init_with_error(0))
734     goto err;
735 
736   need_rnd_end= TRUE;
737 
738   /* Honor next number columns if present */
739   table->next_number_field= table->found_next_number_field;
740 
741   /*
742     NOTE: we do not call restore_record() here, as all fields are
743     filled by the Logger (=> no need to load default ones).
744   */
745 
746   /*
747     We do not set a value for table->field[0], as it will use
748     default value (which is CURRENT_TIMESTAMP).
749   */
750 
751   /* check that all columns exist */
752   if (table->s->fields < 6)
753     goto err;
754 
755   DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP);
756 
757   ((Field_timestamp*) table->field[0])->store_TIME(
758                   hrtime_to_my_time(event_time), hrtime_sec_part(event_time));
759 
760   /* do a write */
761   if (table->field[1]->store(user_host, user_host_len, client_cs) ||
762       table->field[2]->store((longlong) thread_id_arg, TRUE) ||
763       table->field[3]->store((longlong) global_system_variables.server_id,
764                              TRUE) ||
765       table->field[4]->store(command_type, command_type_len, client_cs))
766     goto err;
767 
768   /*
769     A positive return value in store() means truncation.
770     Still logging a message in the log in this case.
771   */
772   table->field[5]->flags|= FIELDFLAG_HEX_ESCAPE;
773   if (table->field[5]->store(sql_text, sql_text_len, client_cs) < 0)
774     goto err;
775 
776   /* mark all fields as not null */
777   table->field[1]->set_notnull();
778   table->field[2]->set_notnull();
779   table->field[3]->set_notnull();
780   table->field[4]->set_notnull();
781   table->field[5]->set_notnull();
782 
783   /* Set any extra columns to their default values */
784   for (field_index= 6 ; field_index < table->s->fields ; field_index++)
785   {
786     table->field[field_index]->set_default();
787   }
788 
789   /* log table entries are not replicated */
790   if (table->file->ha_write_row(table->record[0]))
791     goto err;
792 
793   result= FALSE;
794 
795 err:
796   if (result && !thd->killed)
797     sql_print_error("Failed to write to mysql.general_log: %s",
798                     error_handler.message());
799 
800   if (need_rnd_end)
801   {
802     table->file->ha_rnd_end();
803     table->file->ha_release_auto_increment();
804   }
805   if (need_pop)
806     thd->pop_internal_handler();
807   if (need_close)
808     close_log_table(thd, &open_tables_backup);
809 
810   thd->variables.option_bits= save_thd_options;
811   thd->time_zone_used= save_time_zone_used;
812   DBUG_RETURN(result);
813 }
814 
815 
816 /*
817   Log a query to the slow log table
818 
819   SYNOPSIS
820     log_slow()
821     thd               THD of the query
822     current_time      current timestamp
823     user_host         the pointer to the string with user@host info
824     user_host_len     length of the user_host string. this is computed once
825                       and passed to all general log event handlers
826     query_time        Amount of time the query took to execute (in microseconds)
827     lock_time         Amount of time the query was locked (in microseconds)
828     is_command        The flag, which determines, whether the sql_text is a
829                       query or an administrator command (these are treated
830                       differently by the old logging routines)
831     sql_text          the very text of the query or administrator command
832                       processed
833     sql_text_len      the length of sql_text string
834 
835   DESCRIPTION
836 
837    Log a query to the slow log table
838 
839   RETURN
840     FALSE - OK
841     TRUE - error occurred
842 */
843 
844 bool Log_to_csv_event_handler::
log_slow(THD * thd,my_hrtime_t current_time,const char * user_host,size_t user_host_len,ulonglong query_utime,ulonglong lock_utime,bool is_command,const char * sql_text,size_t sql_text_len)845   log_slow(THD *thd, my_hrtime_t current_time,
846            const char *user_host, size_t user_host_len,
847            ulonglong query_utime, ulonglong lock_utime, bool is_command,
848            const char *sql_text, size_t sql_text_len)
849 {
850   TABLE_LIST table_list;
851   TABLE *table;
852   bool result= TRUE;
853   bool need_close= FALSE;
854   bool need_rnd_end= FALSE;
855   Silence_log_table_errors error_handler;
856   Open_tables_backup open_tables_backup;
857   CHARSET_INFO *client_cs= thd->variables.character_set_client;
858   bool save_time_zone_used;
859   ulong query_time= (ulong) MY_MIN(query_utime/1000000, TIME_MAX_VALUE_SECONDS);
860   ulong lock_time=  (ulong) MY_MIN(lock_utime/1000000, TIME_MAX_VALUE_SECONDS);
861   ulong query_time_micro= (ulong) (query_utime % 1000000);
862   ulong lock_time_micro=  (ulong) (lock_utime % 1000000);
863 
864   DBUG_ENTER("Log_to_csv_event_handler::log_slow");
865 
866   thd->push_internal_handler(& error_handler);
867   /*
868     CSV uses TIME_to_timestamp() internally if table needs to be repaired
869     which will set thd->time_zone_used
870   */
871   save_time_zone_used= thd->time_zone_used;
872 
873   table_list.init_one_table(&MYSQL_SCHEMA_NAME, &SLOW_LOG_NAME, 0,
874                             TL_WRITE_CONCURRENT_INSERT);
875 
876   if (!(table= open_log_table(thd, &table_list, &open_tables_backup)))
877     goto err;
878 
879   need_close= TRUE;
880 
881   if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) ||
882       table->file->ha_rnd_init_with_error(0))
883     goto err;
884 
885   need_rnd_end= TRUE;
886 
887   /* Honor next number columns if present */
888   table->next_number_field= table->found_next_number_field;
889 
890   restore_record(table, s->default_values);    // Get empty record
891 
892   /* check that all columns exist */
893   if (table->s->fields < 13)
894     goto err;
895 
896   /* store the time and user values */
897   DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP);
898   ((Field_timestamp*) table->field[0])->store_TIME(
899              hrtime_to_my_time(current_time), hrtime_sec_part(current_time));
900   if (table->field[1]->store(user_host, user_host_len, client_cs))
901     goto err;
902 
903   /*
904     A TIME field can not hold the full longlong range; query_time or
905     lock_time may be truncated without warning here, if greater than
906     839 hours (~35 days)
907   */
908   MYSQL_TIME t;
909   t.neg= 0;
910 
911   /* fill in query_time field */
912   calc_time_from_sec(&t, query_time, query_time_micro);
913   if (table->field[2]->store_time(&t))
914     goto err;
915   /* lock_time */
916   calc_time_from_sec(&t, lock_time, lock_time_micro);
917   if (table->field[3]->store_time(&t))
918     goto err;
919   /* rows_sent */
920   if (table->field[4]->store((longlong) thd->get_sent_row_count(), TRUE))
921     goto err;
922   /* rows_examined */
923   if (table->field[5]->store((longlong) thd->get_examined_row_count(), TRUE))
924     goto err;
925 
926   /* fill database field */
927   if (thd->db.str)
928   {
929     if (table->field[6]->store(thd->db.str, thd->db.length, client_cs))
930       goto err;
931     table->field[6]->set_notnull();
932   }
933 
934   if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
935   {
936     if (table->
937         field[7]->store((longlong)
938                         thd->first_successful_insert_id_in_prev_stmt_for_binlog,
939                         TRUE))
940       goto err;
941     table->field[7]->set_notnull();
942   }
943 
944   /*
945     Set value if we do an insert on autoincrement column. Note that for
946     some engines (those for which get_auto_increment() does not leave a
947     table lock until the statement ends), this is just the first value and
948     the next ones used may not be contiguous to it.
949   */
950   if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
951   {
952     if (table->
953         field[8]->store((longlong)
954           thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(), TRUE))
955       goto err;
956     table->field[8]->set_notnull();
957   }
958 
959   if (table->field[9]->store((longlong)global_system_variables.server_id, TRUE))
960     goto err;
961   table->field[9]->set_notnull();
962 
963   /*
964     Column sql_text.
965     A positive return value in store() means truncation.
966     Still logging a message in the log in this case.
967   */
968   if (table->field[10]->store(sql_text, sql_text_len, client_cs) < 0)
969     goto err;
970 
971   if (table->field[11]->store((longlong) thd->thread_id, TRUE))
972     goto err;
973 
974   /* Rows_affected */
975   if (table->field[12]->store(thd->get_stmt_da()->is_ok() ?
976                               (longlong) thd->get_stmt_da()->affected_rows() :
977                               0, TRUE))
978     goto err;
979 
980   /* log table entries are not replicated */
981   if (table->file->ha_write_row(table->record[0]))
982     goto err;
983 
984   result= FALSE;
985 
986 err:
987   thd->pop_internal_handler();
988 
989   if (result && !thd->killed)
990     sql_print_error("Failed to write to mysql.slow_log: %s",
991                     error_handler.message());
992 
993   if (need_rnd_end)
994   {
995     table->file->ha_rnd_end();
996     table->file->ha_release_auto_increment();
997   }
998   if (need_close)
999     close_log_table(thd, &open_tables_backup);
1000   thd->time_zone_used= save_time_zone_used;
1001   DBUG_RETURN(result);
1002 }
1003 
1004 int Log_to_csv_event_handler::
activate_log(THD * thd,uint log_table_type)1005   activate_log(THD *thd, uint log_table_type)
1006 {
1007   TABLE_LIST table_list;
1008   TABLE *table;
1009   LEX_CSTRING *UNINIT_VAR(log_name);
1010   int result;
1011   Open_tables_backup open_tables_backup;
1012 
1013   DBUG_ENTER("Log_to_csv_event_handler::activate_log");
1014 
1015   if (log_table_type == QUERY_LOG_GENERAL)
1016   {
1017     log_name= &GENERAL_LOG_NAME;
1018   }
1019   else
1020   {
1021     DBUG_ASSERT(log_table_type == QUERY_LOG_SLOW);
1022 
1023     log_name= &SLOW_LOG_NAME;
1024   }
1025   table_list.init_one_table(&MYSQL_SCHEMA_NAME, log_name, 0, TL_WRITE_CONCURRENT_INSERT);
1026 
1027   table= open_log_table(thd, &table_list, &open_tables_backup);
1028   if (table)
1029   {
1030     result= 0;
1031     close_log_table(thd, &open_tables_backup);
1032   }
1033   else
1034     result= 1;
1035 
1036   DBUG_RETURN(result);
1037 }
1038 
1039 bool Log_to_csv_event_handler::
log_error(enum loglevel level,const char * format,va_list args)1040   log_error(enum loglevel level, const char *format, va_list args)
1041 {
1042   /* No log table is implemented */
1043   DBUG_ASSERT(0);
1044   return FALSE;
1045 }
1046 
1047 bool Log_to_file_event_handler::
log_error(enum loglevel level,const char * format,va_list args)1048   log_error(enum loglevel level, const char *format,
1049             va_list args)
1050 {
1051   return vprint_msg_to_log(level, format, args);
1052 }
1053 
init_pthread_objects()1054 void Log_to_file_event_handler::init_pthread_objects()
1055 {
1056   mysql_log.init_pthread_objects();
1057   mysql_slow_log.init_pthread_objects();
1058 }
1059 
1060 
1061 /** Wrapper around MYSQL_LOG::write() for slow log. */
1062 
1063 bool Log_to_file_event_handler::
log_slow(THD * thd,my_hrtime_t current_time,const char * user_host,size_t user_host_len,ulonglong query_utime,ulonglong lock_utime,bool is_command,const char * sql_text,size_t sql_text_len)1064   log_slow(THD *thd, my_hrtime_t current_time,
1065            const char *user_host, size_t user_host_len,
1066            ulonglong query_utime, ulonglong lock_utime, bool is_command,
1067            const char *sql_text, size_t sql_text_len)
1068 {
1069   Silence_log_table_errors error_handler;
1070   thd->push_internal_handler(&error_handler);
1071   bool retval= mysql_slow_log.write(thd, hrtime_to_my_time(current_time),
1072                                     user_host, user_host_len,
1073                                     query_utime, lock_utime, is_command,
1074                                     sql_text, sql_text_len);
1075   thd->pop_internal_handler();
1076   return retval;
1077 }
1078 
1079 
1080 /**
1081    Wrapper around MYSQL_LOG::write() for general log. We need it since we
1082    want all log event handlers to have the same signature.
1083 */
1084 
1085 bool Log_to_file_event_handler::
log_general(THD * thd,my_hrtime_t event_time,const char * user_host,size_t user_host_len,my_thread_id thread_id_arg,const char * command_type,size_t command_type_len,const char * sql_text,size_t sql_text_len,CHARSET_INFO * client_cs)1086   log_general(THD *thd, my_hrtime_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg,
1087               const char *command_type, size_t command_type_len,
1088               const char *sql_text, size_t sql_text_len,
1089               CHARSET_INFO *client_cs)
1090 {
1091   Silence_log_table_errors error_handler;
1092   thd->push_internal_handler(&error_handler);
1093   bool retval= mysql_log.write(hrtime_to_time(event_time), user_host,
1094                                user_host_len,
1095                                thread_id_arg, command_type, command_type_len,
1096                                sql_text, sql_text_len);
1097   thd->pop_internal_handler();
1098   return retval;
1099 }
1100 
1101 
init()1102 bool Log_to_file_event_handler::init()
1103 {
1104   if (!is_initialized)
1105   {
1106     if (global_system_variables.sql_log_slow)
1107       mysql_slow_log.open_slow_log(opt_slow_logname);
1108 
1109     if (opt_log)
1110       mysql_log.open_query_log(opt_logname);
1111 
1112     is_initialized= TRUE;
1113   }
1114 
1115   return FALSE;
1116 }
1117 
1118 
cleanup()1119 void Log_to_file_event_handler::cleanup()
1120 {
1121   mysql_log.cleanup();
1122   mysql_slow_log.cleanup();
1123 }
1124 
flush()1125 void Log_to_file_event_handler::flush()
1126 {
1127   /* reopen log files */
1128   if (opt_log)
1129     mysql_log.reopen_file();
1130   if (global_system_variables.sql_log_slow)
1131     mysql_slow_log.reopen_file();
1132 }
1133 
1134 /*
1135   Log error with all enabled log event handlers
1136 
1137   SYNOPSIS
1138     error_log_print()
1139 
1140     level             The level of the error significance: NOTE,
1141                       WARNING or ERROR.
1142     format            format string for the error message
1143     args              list of arguments for the format string
1144 
1145   RETURN
1146     FALSE - OK
1147     TRUE - error occurred
1148 */
1149 
error_log_print(enum loglevel level,const char * format,va_list args)1150 bool LOGGER::error_log_print(enum loglevel level, const char *format,
1151                              va_list args)
1152 {
1153   bool error= FALSE;
1154   Log_event_handler **current_handler;
1155   THD *thd= current_thd;
1156 
1157   if (likely(thd))
1158     thd->error_printed_to_log= 1;
1159 
1160   /* currently we don't need locking here as there is no error_log table */
1161   for (current_handler= error_log_handler_list ; *current_handler ;)
1162     error= (*current_handler++)->log_error(level, format, args) || error;
1163 
1164   return error;
1165 }
1166 
1167 
cleanup_base()1168 void LOGGER::cleanup_base()
1169 {
1170   DBUG_ASSERT(inited == 1);
1171   mysql_rwlock_destroy(&LOCK_logger);
1172   if (table_log_handler)
1173   {
1174     table_log_handler->cleanup();
1175     delete table_log_handler;
1176     table_log_handler= NULL;
1177   }
1178   if (file_log_handler)
1179     file_log_handler->cleanup();
1180 }
1181 
1182 
cleanup_end()1183 void LOGGER::cleanup_end()
1184 {
1185   DBUG_ASSERT(inited == 1);
1186   if (file_log_handler)
1187   {
1188     delete file_log_handler;
1189     file_log_handler=NULL;
1190   }
1191   inited= 0;
1192 }
1193 
1194 
1195 /**
1196   Perform basic log initialization: create file-based log handler and
1197   init error log.
1198 */
init_base()1199 void LOGGER::init_base()
1200 {
1201   DBUG_ASSERT(inited == 0);
1202   inited= 1;
1203 
1204   /*
1205     Here we create file log handler. We don't do it for the table log handler
1206     here as it cannot be created so early. The reason is THD initialization,
1207     which depends on the system variables (parsed later).
1208   */
1209   if (!file_log_handler)
1210     file_log_handler= new Log_to_file_event_handler;
1211 
1212   /* by default we use traditional error log */
1213   init_error_log(LOG_FILE);
1214 
1215   file_log_handler->init_pthread_objects();
1216   mysql_rwlock_init(key_rwlock_LOCK_logger, &LOCK_logger);
1217 }
1218 
1219 
init_log_tables()1220 void LOGGER::init_log_tables()
1221 {
1222   if (!table_log_handler)
1223     table_log_handler= new Log_to_csv_event_handler;
1224 
1225   if (!is_log_tables_initialized &&
1226       !table_log_handler->init() && !file_log_handler->init())
1227     is_log_tables_initialized= TRUE;
1228 }
1229 
1230 
1231 /**
1232   Close and reopen the slow log (with locks).
1233 
1234   @returns FALSE.
1235 */
flush_slow_log()1236 bool LOGGER::flush_slow_log()
1237 {
1238   /*
1239     Now we lock logger, as nobody should be able to use logging routines while
1240     log tables are closed
1241   */
1242   logger.lock_exclusive();
1243 
1244   /* Reopen slow log file */
1245   if (global_system_variables.sql_log_slow)
1246     file_log_handler->get_mysql_slow_log()->reopen_file();
1247 
1248   /* End of log flush */
1249   logger.unlock();
1250 
1251   return 0;
1252 }
1253 
1254 
1255 /**
1256   Close and reopen the general log (with locks).
1257 
1258   @returns FALSE.
1259 */
flush_general_log()1260 bool LOGGER::flush_general_log()
1261 {
1262   /*
1263     Now we lock logger, as nobody should be able to use logging routines while
1264     log tables are closed
1265   */
1266   logger.lock_exclusive();
1267 
1268   /* Reopen general log file */
1269   if (opt_log)
1270     file_log_handler->get_mysql_log()->reopen_file();
1271 
1272   /* End of log flush */
1273   logger.unlock();
1274 
1275   return 0;
1276 }
1277 
1278 
1279 /*
1280   Log slow query with all enabled log event handlers
1281 
1282   SYNOPSIS
1283     slow_log_print()
1284 
1285     thd                 THD of the query being logged
1286     query               The query being logged
1287     query_length        The length of the query string
1288     current_utime       Current time in microseconds (from undefined start)
1289 
1290   RETURN
1291     FALSE   OK
1292     TRUE    error occurred
1293 */
1294 
slow_log_print(THD * thd,const char * query,size_t query_length,ulonglong current_utime)1295 bool LOGGER::slow_log_print(THD *thd, const char *query, size_t query_length,
1296                             ulonglong current_utime)
1297 
1298 {
1299   bool error= FALSE;
1300   Log_event_handler **current_handler;
1301   bool is_command= FALSE;
1302   char user_host_buff[MAX_USER_HOST_SIZE + 1];
1303   Security_context *sctx= thd->security_ctx;
1304   uint user_host_len= 0;
1305   ulonglong query_utime, lock_utime;
1306 
1307   DBUG_ASSERT(thd->enable_slow_log);
1308   /*
1309     Print the message to the buffer if we have slow log enabled
1310   */
1311 
1312   if (*slow_log_handler_list)
1313   {
1314     /* do not log slow queries from replication threads */
1315     if (!thd->variables.sql_log_slow)
1316       return 0;
1317 
1318     lock_shared();
1319     if (!global_system_variables.sql_log_slow)
1320     {
1321       unlock();
1322       return 0;
1323     }
1324 
1325     /* fill in user_host value: the format is "%s[%s] @ %s [%s]" */
1326     user_host_len= (uint)(strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
1327                              sctx->priv_user, "[",
1328                              sctx->user ? sctx->user : (thd->slave_thread ? "SQL_SLAVE" : ""), "] @ ",
1329                              sctx->host ? sctx->host : "", " [",
1330                              sctx->ip ? sctx->ip : "", "]", NullS) -
1331                     user_host_buff);
1332 
1333     DBUG_ASSERT(thd->start_utime);
1334     DBUG_ASSERT(thd->start_time);
1335     query_utime= (current_utime - thd->start_utime);
1336     lock_utime=  (thd->utime_after_lock - thd->start_utime);
1337     my_hrtime_t current_time= { hrtime_from_time(thd->start_time) +
1338                                 thd->start_time_sec_part + query_utime };
1339 
1340     if (!query)
1341     {
1342       is_command= TRUE;
1343       query= command_name[thd->get_command()].str;
1344       query_length= (uint)command_name[thd->get_command()].length;
1345     }
1346 
1347     for (current_handler= slow_log_handler_list; *current_handler ;)
1348       error= (*current_handler++)->log_slow(thd, current_time,
1349                                             user_host_buff, user_host_len,
1350                                             query_utime, lock_utime, is_command,
1351                                             query, query_length) || error;
1352 
1353     unlock();
1354   }
1355   return error;
1356 }
1357 
general_log_write(THD * thd,enum enum_server_command command,const char * query,size_t query_length)1358 bool LOGGER::general_log_write(THD *thd, enum enum_server_command command,
1359                                const char *query, size_t query_length)
1360 {
1361   bool error= FALSE;
1362   Log_event_handler **current_handler= general_log_handler_list;
1363   char user_host_buff[MAX_USER_HOST_SIZE + 1];
1364   uint user_host_len= 0;
1365   my_hrtime_t current_time;
1366 
1367   DBUG_ASSERT(thd);
1368 
1369   user_host_len= make_user_name(thd, user_host_buff);
1370 
1371   current_time= my_hrtime();
1372 
1373   mysql_audit_general_log(thd, hrtime_to_time(current_time),
1374                           user_host_buff, user_host_len,
1375                           command_name[(uint) command].str,
1376                           (uint)command_name[(uint) command].length,
1377                           query, (uint)query_length);
1378 
1379   if (opt_log && log_command(thd, command))
1380   {
1381     lock_shared();
1382     while (*current_handler)
1383       error|= (*current_handler++)->
1384         log_general(thd, current_time, user_host_buff,
1385                     user_host_len, thd->thread_id,
1386                     command_name[(uint) command].str,
1387                     command_name[(uint) command].length,
1388                     query, query_length,
1389                     thd->variables.character_set_client) || error;
1390     unlock();
1391   }
1392 
1393   return error;
1394 }
1395 
general_log_print(THD * thd,enum enum_server_command command,const char * format,va_list args)1396 bool LOGGER::general_log_print(THD *thd, enum enum_server_command command,
1397                                const char *format, va_list args)
1398 {
1399   size_t message_buff_len= 0;
1400   char message_buff[MAX_LOG_BUFFER_SIZE];
1401 
1402   /* prepare message */
1403   if (format)
1404     message_buff_len= my_vsnprintf(message_buff, sizeof(message_buff),
1405                                    format, args);
1406   else
1407     message_buff[0]= '\0';
1408 
1409   return general_log_write(thd, command, message_buff, message_buff_len);
1410 }
1411 
init_error_log(ulonglong error_log_printer)1412 void LOGGER::init_error_log(ulonglong error_log_printer)
1413 {
1414   if (error_log_printer & LOG_NONE)
1415   {
1416     error_log_handler_list[0]= 0;
1417     return;
1418   }
1419 
1420   switch (error_log_printer) {
1421   case LOG_FILE:
1422     error_log_handler_list[0]= file_log_handler;
1423     error_log_handler_list[1]= 0;
1424     break;
1425     /* these two are disabled for now */
1426   case LOG_TABLE:
1427     DBUG_ASSERT(0);
1428     break;
1429   case LOG_TABLE|LOG_FILE:
1430     DBUG_ASSERT(0);
1431     break;
1432   }
1433 }
1434 
init_slow_log(ulonglong slow_log_printer)1435 void LOGGER::init_slow_log(ulonglong slow_log_printer)
1436 {
1437   if (slow_log_printer & LOG_NONE)
1438   {
1439     slow_log_handler_list[0]= 0;
1440     return;
1441   }
1442 
1443   switch (slow_log_printer) {
1444   case LOG_FILE:
1445     slow_log_handler_list[0]= file_log_handler;
1446     slow_log_handler_list[1]= 0;
1447     break;
1448   case LOG_TABLE:
1449     slow_log_handler_list[0]= table_log_handler;
1450     slow_log_handler_list[1]= 0;
1451     break;
1452   case LOG_TABLE|LOG_FILE:
1453     slow_log_handler_list[0]= file_log_handler;
1454     slow_log_handler_list[1]= table_log_handler;
1455     slow_log_handler_list[2]= 0;
1456     break;
1457   }
1458 }
1459 
init_general_log(ulonglong general_log_printer)1460 void LOGGER::init_general_log(ulonglong general_log_printer)
1461 {
1462   if (general_log_printer & LOG_NONE)
1463   {
1464     general_log_handler_list[0]= 0;
1465     return;
1466   }
1467 
1468   switch (general_log_printer) {
1469   case LOG_FILE:
1470     general_log_handler_list[0]= file_log_handler;
1471     general_log_handler_list[1]= 0;
1472     break;
1473   case LOG_TABLE:
1474     general_log_handler_list[0]= table_log_handler;
1475     general_log_handler_list[1]= 0;
1476     break;
1477   case LOG_TABLE|LOG_FILE:
1478     general_log_handler_list[0]= file_log_handler;
1479     general_log_handler_list[1]= table_log_handler;
1480     general_log_handler_list[2]= 0;
1481     break;
1482   }
1483 }
1484 
1485 
activate_log_handler(THD * thd,uint log_type)1486 bool LOGGER::activate_log_handler(THD* thd, uint log_type)
1487 {
1488   MYSQL_QUERY_LOG *file_log;
1489   bool res= FALSE;
1490   lock_exclusive();
1491   switch (log_type) {
1492   case QUERY_LOG_SLOW:
1493     if (!global_system_variables.sql_log_slow)
1494     {
1495       file_log= file_log_handler->get_mysql_slow_log();
1496 
1497       file_log->open_slow_log(opt_slow_logname);
1498       if (table_log_handler->activate_log(thd, QUERY_LOG_SLOW))
1499       {
1500         /* Error printed by open table in activate_log() */
1501         res= TRUE;
1502         file_log->close(0);
1503       }
1504       else
1505       {
1506         init_slow_log(log_output_options);
1507         global_system_variables.sql_log_slow= TRUE;
1508       }
1509     }
1510     break;
1511   case QUERY_LOG_GENERAL:
1512     if (!opt_log)
1513     {
1514       file_log= file_log_handler->get_mysql_log();
1515 
1516       file_log->open_query_log(opt_logname);
1517       if (table_log_handler->activate_log(thd, QUERY_LOG_GENERAL))
1518       {
1519         /* Error printed by open table in activate_log() */
1520         res= TRUE;
1521         file_log->close(0);
1522       }
1523       else
1524       {
1525         init_general_log(log_output_options);
1526         opt_log= TRUE;
1527       }
1528     }
1529     break;
1530   default:
1531     DBUG_ASSERT(0);
1532   }
1533   unlock();
1534   return res;
1535 }
1536 
1537 
deactivate_log_handler(THD * thd,uint log_type)1538 void LOGGER::deactivate_log_handler(THD *thd, uint log_type)
1539 {
1540   my_bool *tmp_opt= 0;
1541   MYSQL_LOG *UNINIT_VAR(file_log);
1542 
1543   switch (log_type) {
1544   case QUERY_LOG_SLOW:
1545     tmp_opt= &global_system_variables.sql_log_slow;
1546     file_log= file_log_handler->get_mysql_slow_log();
1547     break;
1548   case QUERY_LOG_GENERAL:
1549     tmp_opt= &opt_log;
1550     file_log= file_log_handler->get_mysql_log();
1551     break;
1552   default:
1553     MY_ASSERT_UNREACHABLE();
1554   }
1555 
1556   if (!(*tmp_opt))
1557     return;
1558 
1559   lock_exclusive();
1560   file_log->close(0);
1561   *tmp_opt= FALSE;
1562   unlock();
1563 }
1564 
1565 
1566 /* the parameters are unused for the log tables */
init()1567 bool Log_to_csv_event_handler::init()
1568 {
1569   return 0;
1570 }
1571 
set_handlers(ulonglong error_log_printer,ulonglong slow_log_printer,ulonglong general_log_printer)1572 int LOGGER::set_handlers(ulonglong error_log_printer,
1573                          ulonglong slow_log_printer,
1574                          ulonglong general_log_printer)
1575 {
1576   /* error log table is not supported yet */
1577   DBUG_ASSERT(error_log_printer < LOG_TABLE);
1578 
1579   lock_exclusive();
1580 
1581   if ((slow_log_printer & LOG_TABLE || general_log_printer & LOG_TABLE) &&
1582       !is_log_tables_initialized)
1583   {
1584     slow_log_printer= (slow_log_printer & ~LOG_TABLE) | LOG_FILE;
1585     general_log_printer= (general_log_printer & ~LOG_TABLE) | LOG_FILE;
1586 
1587     sql_print_error("Failed to initialize log tables. "
1588                     "Falling back to the old-fashioned logs");
1589   }
1590 
1591   init_error_log(error_log_printer);
1592   init_slow_log(slow_log_printer);
1593   init_general_log(general_log_printer);
1594 
1595   unlock();
1596 
1597   return 0;
1598 }
1599 
1600  /*
1601   Save position of binary log transaction cache.
1602 
1603   SYNPOSIS
1604     binlog_trans_log_savepos()
1605 
1606     thd      The thread to take the binlog data from
1607     pos      Pointer to variable where the position will be stored
1608 
1609   DESCRIPTION
1610 
1611     Save the current position in the binary log transaction cache into
1612     the variable pointed to by 'pos'
1613  */
1614 
1615 static void
binlog_trans_log_savepos(THD * thd,my_off_t * pos)1616 binlog_trans_log_savepos(THD *thd, my_off_t *pos)
1617 {
1618   DBUG_ENTER("binlog_trans_log_savepos");
1619   DBUG_ASSERT(pos != NULL);
1620   binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
1621   DBUG_ASSERT((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open());
1622   *pos= cache_mngr->trx_cache.get_byte_position();
1623   DBUG_PRINT("return", ("*pos: %lu", (ulong) *pos));
1624   DBUG_VOID_RETURN;
1625 }
1626 
1627 
1628 /*
1629   Truncate the binary log transaction cache.
1630 
1631   SYNPOSIS
1632     binlog_trans_log_truncate()
1633 
1634     thd      The thread to take the binlog data from
1635     pos      Position to truncate to
1636 
1637   DESCRIPTION
1638 
1639     Truncate the binary log to the given position. Will not change
1640     anything else.
1641 
1642  */
1643 static void
binlog_trans_log_truncate(THD * thd,my_off_t pos)1644 binlog_trans_log_truncate(THD *thd, my_off_t pos)
1645 {
1646   DBUG_ENTER("binlog_trans_log_truncate");
1647   DBUG_PRINT("enter", ("pos: %lu", (ulong) pos));
1648 
1649   DBUG_ASSERT(thd_get_ha_data(thd, binlog_hton) != NULL);
1650   /* Only true if binlog_trans_log_savepos() wasn't called before */
1651   DBUG_ASSERT(pos != ~(my_off_t) 0);
1652 
1653   binlog_cache_mngr *const cache_mngr=
1654     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
1655   cache_mngr->trx_cache.restore_savepoint(pos);
1656   DBUG_VOID_RETURN;
1657 }
1658 
1659 
1660 /*
1661   this function is mostly a placeholder.
1662   conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open)
1663   should be moved here.
1664 */
1665 
binlog_init(void * p)1666 int binlog_init(void *p)
1667 {
1668   binlog_hton= (handlerton *)p;
1669   binlog_hton->state= (WSREP_ON || opt_bin_log) ? SHOW_OPTION_YES
1670                                                 : SHOW_OPTION_NO;
1671   binlog_hton->db_type=DB_TYPE_BINLOG;
1672   binlog_hton->savepoint_offset= sizeof(my_off_t);
1673   binlog_hton->close_connection= binlog_close_connection;
1674   binlog_hton->savepoint_set= binlog_savepoint_set;
1675   binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
1676   binlog_hton->savepoint_rollback_can_release_mdl=
1677                                      binlog_savepoint_rollback_can_release_mdl;
1678   binlog_hton->commit= binlog_commit;
1679   binlog_hton->rollback= binlog_rollback;
1680   binlog_hton->prepare= binlog_prepare;
1681   binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot;
1682   binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
1683   return 0;
1684 }
1685 
1686 #ifdef WITH_WSREP
1687 #include "wsrep_binlog.h"
1688 #endif /* WITH_WSREP */
binlog_close_connection(handlerton * hton,THD * thd)1689 static int binlog_close_connection(handlerton *hton, THD *thd)
1690 {
1691   DBUG_ENTER("binlog_close_connection");
1692   binlog_cache_mngr *const cache_mngr=
1693     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
1694 #ifdef WITH_WSREP
1695   if (WSREP(thd) && cache_mngr && !cache_mngr->trx_cache.empty()) {
1696     IO_CACHE* cache= cache_mngr->get_binlog_cache_log(true);
1697     uchar *buf;
1698     size_t len=0;
1699     wsrep_write_cache_buf(cache, &buf, &len);
1700     WSREP_WARN("binlog trx cache not empty (%zu bytes) @ connection close %lld",
1701                len, (longlong) thd->thread_id);
1702     if (len > 0) wsrep_dump_rbr_buf(thd, buf, len);
1703 
1704     cache = cache_mngr->get_binlog_cache_log(false);
1705     wsrep_write_cache_buf(cache, &buf, &len);
1706     WSREP_WARN("binlog stmt cache not empty (%zu bytes) @ connection close %lld",
1707                len, (longlong) thd->thread_id);
1708     if (len > 0) wsrep_dump_rbr_buf(thd, buf, len);
1709   }
1710 #endif /* WITH_WSREP */
1711   DBUG_ASSERT(cache_mngr->trx_cache.empty() && cache_mngr->stmt_cache.empty());
1712   thd_set_ha_data(thd, binlog_hton, NULL);
1713   cache_mngr->~binlog_cache_mngr();
1714   my_free(cache_mngr);
1715   DBUG_RETURN(0);
1716 }
1717 
1718 /*
1719   This function flushes a cache upon commit/rollback.
1720 
1721   SYNOPSIS
1722     binlog_flush_cache()
1723 
1724     thd        The thread whose transaction should be ended
1725     cache_mngr Pointer to the binlog_cache_mngr to use
1726     all        True if the entire transaction should be ended, false if
1727                only the statement transaction should be ended.
1728     end_ev     The end event to use (COMMIT, ROLLBACK, or commit XID)
1729     using_stmt True if the statement cache should be flushed
1730     using_trx  True if the transaction cache should be flushed
1731 
1732   DESCRIPTION
1733 
1734     End the currently transaction or statement. The transaction can be either
1735     a real transaction or a statement transaction.
1736 
1737     This can be to commit a transaction, with a COMMIT query event or an XA
1738     commit XID event. But it can also be to rollback a transaction with a
1739     ROLLBACK query event, used for rolling back transactions which also
1740     contain updates to non-transactional tables. Or it can be a flush of
1741     a statement cache.
1742  */
1743 
1744 static int
binlog_flush_cache(THD * thd,binlog_cache_mngr * cache_mngr,Log_event * end_ev,bool all,bool using_stmt,bool using_trx)1745 binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr,
1746                    Log_event *end_ev, bool all, bool using_stmt,
1747                    bool using_trx)
1748 {
1749   int error= 0;
1750   DBUG_ENTER("binlog_flush_cache");
1751   DBUG_PRINT("enter", ("end_ev: %p", end_ev));
1752 
1753   if ((using_stmt && !cache_mngr->stmt_cache.empty()) ||
1754       (using_trx && !cache_mngr->trx_cache.empty()))
1755   {
1756     if (using_stmt && thd->binlog_flush_pending_rows_event(TRUE, FALSE))
1757       DBUG_RETURN(1);
1758     if (using_trx && thd->binlog_flush_pending_rows_event(TRUE, TRUE))
1759       DBUG_RETURN(1);
1760 
1761     /*
1762       Doing a commit or a rollback including non-transactional tables,
1763       i.e., ending a transaction where we might write the transaction
1764       cache to the binary log.
1765 
1766       We can always end the statement when ending a transaction since
1767       transactions are not allowed inside stored functions.  If they
1768       were, we would have to ensure that we're not ending a statement
1769       inside a stored function.
1770     */
1771     error= mysql_bin_log.write_transaction_to_binlog(thd, cache_mngr,
1772                                                      end_ev, all,
1773                                                      using_stmt, using_trx);
1774   }
1775   else
1776   {
1777     /*
1778       This can happen in row-format binlog with something like
1779           BEGIN; INSERT INTO nontrans_table; INSERT IGNORE INTO trans_table;
1780       The nontrans_table is written directly into the binlog before commit,
1781       and if the trans_table is ignored there will be no rows to write when
1782       we get here.
1783 
1784       So there is no work to do. Therefore, we will not increment any XID
1785       count, so we must not decrement any XID count in unlog().
1786     */
1787     cache_mngr->need_unlog= 0;
1788   }
1789   cache_mngr->reset(using_stmt, using_trx);
1790 
1791   DBUG_ASSERT((!using_stmt || cache_mngr->stmt_cache.empty()) &&
1792               (!using_trx || cache_mngr->trx_cache.empty()));
1793   DBUG_RETURN(error);
1794 }
1795 
1796 
1797 /**
1798   This function flushes the stmt-cache upon commit.
1799 
1800   @param thd                The thread whose transaction should be flushed
1801   @param cache_mngr         Pointer to the cache manager
1802 
1803   @return
1804     nonzero if an error pops up when flushing the cache.
1805 */
1806 static inline int
binlog_commit_flush_stmt_cache(THD * thd,bool all,binlog_cache_mngr * cache_mngr)1807 binlog_commit_flush_stmt_cache(THD *thd, bool all,
1808                                binlog_cache_mngr *cache_mngr)
1809 {
1810   DBUG_ENTER("binlog_commit_flush_stmt_cache");
1811 #ifdef WITH_WSREP
1812   if (thd->wsrep_mysql_replicated > 0)
1813   {
1814     DBUG_ASSERT(WSREP(thd));
1815     WSREP_DEBUG("avoiding binlog_commit_flush_trx_cache: %d",
1816                 thd->wsrep_mysql_replicated);
1817     return 0;
1818   }
1819 #endif
1820 
1821   Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
1822                           FALSE, TRUE, TRUE, 0);
1823   DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, FALSE));
1824 }
1825 
1826 /**
1827   This function flushes the trx-cache upon commit.
1828 
1829   @param thd                The thread whose transaction should be flushed
1830   @param cache_mngr         Pointer to the cache manager
1831 
1832   @return
1833     nonzero if an error pops up when flushing the cache.
1834 */
1835 static inline int
binlog_commit_flush_trx_cache(THD * thd,bool all,binlog_cache_mngr * cache_mngr)1836 binlog_commit_flush_trx_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr)
1837 {
1838   DBUG_ENTER("binlog_commit_flush_trx_cache");
1839   Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
1840                           TRUE, TRUE, TRUE, 0);
1841   DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE));
1842 }
1843 
1844 /**
1845   This function flushes the trx-cache upon rollback.
1846 
1847   @param thd                The thread whose transaction should be flushed
1848   @param cache_mngr         Pointer to the cache manager
1849 
1850   @return
1851     nonzero if an error pops up when flushing the cache.
1852 */
1853 static inline int
binlog_rollback_flush_trx_cache(THD * thd,bool all,binlog_cache_mngr * cache_mngr)1854 binlog_rollback_flush_trx_cache(THD *thd, bool all,
1855                                 binlog_cache_mngr *cache_mngr)
1856 {
1857   Query_log_event end_evt(thd, STRING_WITH_LEN("ROLLBACK"),
1858                           TRUE, TRUE, TRUE, 0);
1859   return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE));
1860 }
1861 
1862 /**
1863   This function flushes the trx-cache upon commit.
1864 
1865   @param thd                The thread whose transaction should be flushed
1866   @param cache_mngr         Pointer to the cache manager
1867   @param xid                Transaction Id
1868 
1869   @return
1870     nonzero if an error pops up when flushing the cache.
1871 */
1872 static inline int
binlog_commit_flush_xid_caches(THD * thd,binlog_cache_mngr * cache_mngr,bool all,my_xid xid)1873 binlog_commit_flush_xid_caches(THD *thd, binlog_cache_mngr *cache_mngr,
1874                                bool all, my_xid xid)
1875 {
1876   if (xid)
1877   {
1878     Xid_log_event end_evt(thd, xid, TRUE);
1879     return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE));
1880   }
1881   else
1882   {
1883     /*
1884       Empty xid occurs in XA COMMIT ... ONE PHASE.
1885       In this case, we do not have a MySQL xid for the transaction, and the
1886       external XA transaction coordinator will have to handle recovery if
1887       needed. So we end the transaction with a plain COMMIT query event.
1888     */
1889     Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
1890                             TRUE, TRUE, TRUE, 0);
1891     return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE));
1892   }
1893 }
1894 
1895 /**
1896   This function truncates the transactional cache upon committing or rolling
1897   back either a transaction or a statement.
1898 
1899   @param thd        The thread whose transaction should be flushed
1900   @param cache_mngr Pointer to the cache data to be flushed
1901   @param all        @c true means truncate the transaction, otherwise the
1902                     statement must be truncated.
1903 
1904   @return
1905     nonzero if an error pops up when truncating the transactional cache.
1906 */
1907 static int
binlog_truncate_trx_cache(THD * thd,binlog_cache_mngr * cache_mngr,bool all)1908 binlog_truncate_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr, bool all)
1909 {
1910   DBUG_ENTER("binlog_truncate_trx_cache");
1911   int error=0;
1912   /*
1913     This function handles transactional changes and as such this flag
1914     equals to true.
1915   */
1916   bool const is_transactional= TRUE;
1917 
1918   DBUG_PRINT("info", ("thd->options={ %s %s}, transaction: %s",
1919                       FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
1920                       FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
1921                       all ? "all" : "stmt"));
1922 
1923   thd->binlog_remove_pending_rows_event(TRUE, is_transactional);
1924   /*
1925     If rolling back an entire transaction or a single statement not
1926     inside a transaction, we reset the transaction cache.
1927   */
1928   if (ending_trans(thd, all))
1929   {
1930     if (cache_mngr->trx_cache.has_incident())
1931       error= mysql_bin_log.write_incident(thd);
1932 
1933     thd->clear_binlog_table_maps();
1934 
1935     cache_mngr->reset(false, true);
1936   }
1937   /*
1938     If rolling back a statement in a transaction, we truncate the
1939     transaction cache to remove the statement.
1940   */
1941   else
1942     cache_mngr->trx_cache.restore_prev_position();
1943 
1944   DBUG_ASSERT(thd->binlog_get_pending_rows_event(is_transactional) == NULL);
1945   DBUG_RETURN(error);
1946 }
1947 
binlog_prepare(handlerton * hton,THD * thd,bool all)1948 static int binlog_prepare(handlerton *hton, THD *thd, bool all)
1949 {
1950   /*
1951     do nothing.
1952     just pretend we can do 2pc, so that MySQL won't
1953     switch to 1pc.
1954     real work will be done in MYSQL_BIN_LOG::log_and_order()
1955   */
1956   return 0;
1957 }
1958 
1959 /*
1960   We flush the cache wrapped in a beging/rollback if:
1961     . aborting a single or multi-statement transaction and;
1962     . the OPTION_KEEP_LOG is active or;
1963     . the format is STMT and a non-trans table was updated or;
1964     . the format is MIXED and a temporary non-trans table was
1965       updated or;
1966     . the format is MIXED, non-trans table was updated and
1967       aborting a single statement transaction;
1968 */
trans_cannot_safely_rollback(THD * thd,bool all)1969 static bool trans_cannot_safely_rollback(THD *thd, bool all)
1970 {
1971   DBUG_ASSERT(ending_trans(thd, all));
1972 
1973   return ((thd->variables.option_bits & OPTION_KEEP_LOG) ||
1974           (trans_has_updated_non_trans_table(thd) &&
1975            thd->wsrep_binlog_format() == BINLOG_FORMAT_STMT) ||
1976           (thd->transaction.all.has_modified_non_trans_temp_table() &&
1977            thd->wsrep_binlog_format() == BINLOG_FORMAT_MIXED) ||
1978           (trans_has_updated_non_trans_table(thd) &&
1979            ending_single_stmt_trans(thd,all) &&
1980            thd->wsrep_binlog_format() == BINLOG_FORMAT_MIXED));
1981 }
1982 
1983 
1984 /**
1985   This function is called once after each statement.
1986 
1987   It has the responsibility to flush the caches to the binary log on commits.
1988 
1989   @param hton  The binlog handlerton.
1990   @param thd   The client thread that executes the transaction.
1991   @param all   This is @c true if this is a real transaction commit, and
1992                @false otherwise.
1993 
1994   @see handlerton::commit
1995 */
binlog_commit(handlerton * hton,THD * thd,bool all)1996 static int binlog_commit(handlerton *hton, THD *thd, bool all)
1997 {
1998   int error= 0;
1999   PSI_stage_info org_stage;
2000   DBUG_ENTER("binlog_commit");
2001 
2002   binlog_cache_mngr *const cache_mngr=
2003     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
2004 
2005   if (!cache_mngr)
2006   {
2007     DBUG_ASSERT(WSREP(thd));
2008     DBUG_RETURN(0);
2009   }
2010 
2011   DBUG_PRINT("debug",
2012              ("all: %d, in_transaction: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
2013               all,
2014               YESNO(thd->in_multi_stmt_transaction_mode()),
2015               YESNO(thd->transaction.all.modified_non_trans_table),
2016               YESNO(thd->transaction.stmt.modified_non_trans_table)));
2017 
2018 
2019   thd->backup_stage(&org_stage);
2020   THD_STAGE_INFO(thd, stage_binlog_write);
2021   if (!cache_mngr->stmt_cache.empty())
2022   {
2023     error= binlog_commit_flush_stmt_cache(thd, all, cache_mngr);
2024   }
2025 
2026   if (cache_mngr->trx_cache.empty())
2027   {
2028     /*
2029       we're here because cache_log was flushed in MYSQL_BIN_LOG::log_xid()
2030     */
2031     cache_mngr->reset(false, true);
2032     THD_STAGE_INFO(thd, org_stage);
2033     DBUG_RETURN(error);
2034   }
2035 
2036   /*
2037     We commit the transaction if:
2038      - We are not in a transaction and committing a statement, or
2039      - We are in a transaction and a full transaction is committed.
2040     Otherwise, we accumulate the changes.
2041   */
2042   if (likely(!error) && ending_trans(thd, all))
2043     error= binlog_commit_flush_trx_cache(thd, all, cache_mngr);
2044 
2045   /*
2046     This is part of the stmt rollback.
2047   */
2048   if (!all)
2049     cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
2050 
2051   THD_STAGE_INFO(thd, org_stage);
2052   DBUG_RETURN(error);
2053 }
2054 
2055 /**
2056   This function is called when a transaction or a statement is rolled back.
2057 
2058   @param hton  The binlog handlerton.
2059   @param thd   The client thread that executes the transaction.
2060   @param all   This is @c true if this is a real transaction rollback, and
2061                @false otherwise.
2062 
2063   @see handlerton::rollback
2064 */
binlog_rollback(handlerton * hton,THD * thd,bool all)2065 static int binlog_rollback(handlerton *hton, THD *thd, bool all)
2066 {
2067   DBUG_ENTER("binlog_rollback");
2068   int error= 0;
2069   binlog_cache_mngr *const cache_mngr=
2070     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
2071 
2072   if (!cache_mngr)
2073   {
2074     DBUG_ASSERT(WSREP(thd));
2075     DBUG_RETURN(0);
2076   }
2077 
2078   DBUG_PRINT("debug", ("all: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
2079                        YESNO(all),
2080                        YESNO(thd->transaction.all.modified_non_trans_table),
2081                        YESNO(thd->transaction.stmt.modified_non_trans_table)));
2082 
2083   /*
2084     If an incident event is set we do not flush the content of the statement
2085     cache because it may be corrupted.
2086   */
2087   if (cache_mngr->stmt_cache.has_incident())
2088   {
2089     error= mysql_bin_log.write_incident(thd);
2090     cache_mngr->reset(true, false);
2091   }
2092   else if (!cache_mngr->stmt_cache.empty())
2093   {
2094     error= binlog_commit_flush_stmt_cache(thd, all, cache_mngr);
2095   }
2096 
2097   if (cache_mngr->trx_cache.empty())
2098   {
2099     /*
2100       we're here because cache_log was flushed in MYSQL_BIN_LOG::log_xid()
2101     */
2102     cache_mngr->reset(false, true);
2103     DBUG_RETURN(error);
2104   }
2105   if (!wsrep_emulate_bin_log && mysql_bin_log.check_write_error(thd))
2106   {
2107     /*
2108       "all == true" means that a "rollback statement" triggered the error and
2109       this function was called. However, this must not happen as a rollback
2110       is written directly to the binary log. And in auto-commit mode, a single
2111       statement that is rolled back has the flag all == false.
2112     */
2113     DBUG_ASSERT(!all);
2114     /*
2115       We reach this point if the effect of a statement did not properly get into
2116       a cache and need to be rolled back.
2117     */
2118     error |= binlog_truncate_trx_cache(thd, cache_mngr, all);
2119   }
2120   else if (likely(!error))
2121   {
2122     if (ending_trans(thd, all) && trans_cannot_safely_rollback(thd, all))
2123       error= binlog_rollback_flush_trx_cache(thd, all, cache_mngr);
2124     /*
2125       Truncate the cache if:
2126         . aborting a single or multi-statement transaction or;
2127         . the current statement created or dropped a temporary table
2128           while having actual STATEMENT format;
2129         . the format is not STMT or no non-trans table was
2130           updated and;
2131         . the format is not MIXED or no temporary non-trans table
2132           was updated.
2133     */
2134     else if (ending_trans(thd, all) ||
2135              (!(thd->transaction.stmt.has_created_dropped_temp_table() &&
2136                 !thd->is_current_stmt_binlog_format_row()) &&
2137               (!stmt_has_updated_non_trans_table(thd) ||
2138                thd->wsrep_binlog_format() != BINLOG_FORMAT_STMT) &&
2139               (!thd->transaction.stmt.has_modified_non_trans_temp_table() ||
2140                thd->wsrep_binlog_format() != BINLOG_FORMAT_MIXED)))
2141       error= binlog_truncate_trx_cache(thd, cache_mngr, all);
2142   }
2143 
2144   /*
2145     This is part of the stmt rollback.
2146   */
2147   if (!all)
2148     cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
2149 
2150   DBUG_RETURN(error);
2151 }
2152 
2153 
binlog_reset_cache(THD * thd)2154 void binlog_reset_cache(THD *thd)
2155 {
2156   binlog_cache_mngr *const cache_mngr= opt_bin_log ?
2157     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton) : 0;
2158   DBUG_ENTER("binlog_reset_cache");
2159   if (cache_mngr)
2160   {
2161     thd->binlog_remove_pending_rows_event(TRUE, TRUE);
2162     cache_mngr->reset(true, true);
2163   }
2164   DBUG_VOID_RETURN;
2165 }
2166 
2167 
set_write_error(THD * thd,bool is_transactional)2168 void MYSQL_BIN_LOG::set_write_error(THD *thd, bool is_transactional)
2169 {
2170   DBUG_ENTER("MYSQL_BIN_LOG::set_write_error");
2171 
2172   write_error= 1;
2173 
2174   if (unlikely(check_write_error(thd)))
2175     DBUG_VOID_RETURN;
2176 
2177   if (my_errno == EFBIG)
2178   {
2179     if (is_transactional)
2180     {
2181       my_message(ER_TRANS_CACHE_FULL, ER_THD(thd, ER_TRANS_CACHE_FULL), MYF(0));
2182     }
2183     else
2184     {
2185       my_message(ER_STMT_CACHE_FULL, ER_THD(thd, ER_STMT_CACHE_FULL), MYF(0));
2186     }
2187   }
2188   else
2189   {
2190     my_error(ER_ERROR_ON_WRITE, MYF(0), name, errno);
2191   }
2192 #ifdef WITH_WSREP
2193   /* If wsrep transaction is active and binlog emulation is on,
2194      binlog write error may leave transaction without any registered
2195      htons. This makes wsrep rollback hooks to be skipped and the
2196      transaction will remain alive in wsrep world after rollback.
2197      Register binlog hton here to ensure that rollback happens in full. */
2198   if (WSREP_EMULATE_BINLOG(thd))
2199   {
2200     if (is_transactional)
2201       trans_register_ha(thd, TRUE, binlog_hton);
2202     trans_register_ha(thd, FALSE, binlog_hton);
2203   }
2204 #endif /* WITH_WSREP */
2205   DBUG_VOID_RETURN;
2206 }
2207 
check_write_error(THD * thd)2208 bool MYSQL_BIN_LOG::check_write_error(THD *thd)
2209 {
2210   DBUG_ENTER("MYSQL_BIN_LOG::check_write_error");
2211 
2212   bool checked= FALSE;
2213 
2214   if (likely(!thd->is_error()))
2215     DBUG_RETURN(checked);
2216 
2217   switch (thd->get_stmt_da()->sql_errno())
2218   {
2219     case ER_TRANS_CACHE_FULL:
2220     case ER_STMT_CACHE_FULL:
2221     case ER_ERROR_ON_WRITE:
2222     case ER_BINLOG_LOGGING_IMPOSSIBLE:
2223       checked= TRUE;
2224     break;
2225   }
2226 
2227   DBUG_RETURN(checked);
2228 }
2229 
2230 
2231 /**
2232   @note
2233   How do we handle this (unlikely but legal) case:
2234   @verbatim
2235     [transaction] + [update to non-trans table] + [rollback to savepoint] ?
2236   @endverbatim
2237   The problem occurs when a savepoint is before the update to the
2238   non-transactional table. Then when there's a rollback to the savepoint, if we
2239   simply truncate the binlog cache, we lose the part of the binlog cache where
2240   the update is. If we want to not lose it, we need to write the SAVEPOINT
2241   command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
2242   is easy: it's just write at the end of the binlog cache, but the former
2243   should be *inserted* to the place where the user called SAVEPOINT. The
2244   solution is that when the user calls SAVEPOINT, we write it to the binlog
2245   cache (so no need to later insert it). As transactions are never intermixed
2246   in the binary log (i.e. they are serialized), we won't have conflicts with
2247   savepoint names when using mysqlbinlog or in the slave SQL thread.
2248   Then when ROLLBACK TO SAVEPOINT is called, if we updated some
2249   non-transactional table, we don't truncate the binlog cache but instead write
2250   ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
2251   will chop the SAVEPOINT command from the binlog cache, which is good as in
2252   that case there is no need to have it in the binlog).
2253 */
2254 
binlog_savepoint_set(handlerton * hton,THD * thd,void * sv)2255 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
2256 {
2257   int error= 1;
2258   DBUG_ENTER("binlog_savepoint_set");
2259 
2260   char buf[1024];
2261 
2262   String log_query(buf, sizeof(buf), &my_charset_bin);
2263   if (log_query.copy(STRING_WITH_LEN("SAVEPOINT "), &my_charset_bin) ||
2264       append_identifier(thd, &log_query, &thd->lex->ident))
2265     DBUG_RETURN(1);
2266   int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
2267   Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
2268                         TRUE, FALSE, TRUE, errcode);
2269   /*
2270     We cannot record the position before writing the statement
2271     because a rollback to a savepoint (.e.g. consider it "S") would
2272     prevent the savepoint statement (i.e. "SAVEPOINT S") from being
2273     written to the binary log despite the fact that the server could
2274     still issue other rollback statements to the same savepoint (i.e.
2275     "S").
2276     Given that the savepoint is valid until the server releases it,
2277     ie, until the transaction commits or it is released explicitly,
2278     we need to log it anyway so that we don't have "ROLLBACK TO S"
2279     or "RELEASE S" without the preceding "SAVEPOINT S" in the binary
2280     log.
2281   */
2282   if (likely(!(error= mysql_bin_log.write(&qinfo))))
2283     binlog_trans_log_savepos(thd, (my_off_t*) sv);
2284 
2285   DBUG_RETURN(error);
2286 }
2287 
binlog_savepoint_rollback(handlerton * hton,THD * thd,void * sv)2288 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
2289 {
2290   DBUG_ENTER("binlog_savepoint_rollback");
2291 
2292   /*
2293     Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
2294     non-transactional table. Otherwise, truncate the binlog cache starting
2295     from the SAVEPOINT command.
2296   */
2297 #ifdef WITH_WSREP
2298   /* for streaming replication, we  must replicate savepoint rollback so that
2299      slaves can maintain SR transactions
2300    */
2301   if (unlikely(thd->wsrep_trx().is_streaming() ||
2302                (trans_has_updated_non_trans_table(thd)) ||
2303                (thd->variables.option_bits & OPTION_KEEP_LOG)))
2304 #else
2305   if (unlikely(trans_has_updated_non_trans_table(thd) ||
2306                (thd->variables.option_bits & OPTION_KEEP_LOG)))
2307 #endif /* WITH_WSREP */
2308   {
2309     char buf[1024];
2310     String log_query(buf, sizeof(buf), &my_charset_bin);
2311     if (log_query.copy(STRING_WITH_LEN("ROLLBACK TO "), &my_charset_bin) ||
2312         append_identifier(thd, &log_query, &thd->lex->ident))
2313       DBUG_RETURN(1);
2314     int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
2315     Query_log_event qinfo(thd, log_query.ptr(), log_query.length(),
2316                           TRUE, FALSE, TRUE, errcode);
2317     DBUG_RETURN(mysql_bin_log.write(&qinfo));
2318   }
2319 
2320   binlog_trans_log_truncate(thd, *(my_off_t*)sv);
2321 
2322   /*
2323     When a SAVEPOINT is executed inside a stored function/trigger we force the
2324     pending event to be flushed with a STMT_END_F flag and clear the table maps
2325     as well to ensure that following DMLs will have a clean state to start
2326     with. ROLLBACK inside a stored routine has to finalize possibly existing
2327     current row-based pending event with cleaning up table maps. That ensures
2328     that following DMLs will have a clean state to start with.
2329    */
2330   if (thd->in_sub_stmt)
2331     thd->clear_binlog_table_maps();
2332 
2333   DBUG_RETURN(0);
2334 }
2335 
2336 
2337 /**
2338   Check whether binlog state allows to safely release MDL locks after
2339   rollback to savepoint.
2340 
2341   @param hton  The binlog handlerton.
2342   @param thd   The client thread that executes the transaction.
2343 
2344   @return true  - It is safe to release MDL locks.
2345           false - If it is not.
2346 */
binlog_savepoint_rollback_can_release_mdl(handlerton * hton,THD * thd)2347 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
2348                                                       THD *thd)
2349 {
2350   DBUG_ENTER("binlog_savepoint_rollback_can_release_mdl");
2351   /*
2352     If we have not updated any non-transactional tables rollback
2353     to savepoint will simply truncate binlog cache starting from
2354     SAVEPOINT command. So it should be safe to release MDL acquired
2355     after SAVEPOINT command in this case.
2356   */
2357   DBUG_RETURN(!trans_cannot_safely_rollback(thd, true));
2358 }
2359 
2360 
check_binlog_magic(IO_CACHE * log,const char ** errmsg)2361 int check_binlog_magic(IO_CACHE* log, const char** errmsg)
2362 {
2363   uchar magic[4];
2364   DBUG_ASSERT(my_b_tell(log) == 0);
2365 
2366   if (my_b_read(log, magic, sizeof(magic)))
2367   {
2368     *errmsg = "I/O error reading the header from the binary log";
2369     sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno,
2370 		    log->error);
2371     return 1;
2372   }
2373   if (bcmp(magic, BINLOG_MAGIC, sizeof(magic)))
2374   {
2375     *errmsg = "Binlog has bad magic number;  It's not a binary log file that can be used by this version of MySQL";
2376     return 1;
2377   }
2378   return 0;
2379 }
2380 
2381 
open_binlog(IO_CACHE * log,const char * log_file_name,const char ** errmsg)2382 File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg)
2383 {
2384   File file;
2385   DBUG_ENTER("open_binlog");
2386 
2387   if ((file= mysql_file_open(key_file_binlog,
2388                              log_file_name, O_RDONLY | O_BINARY | O_SHARE,
2389                              MYF(MY_WME))) < 0)
2390   {
2391     sql_print_error("Failed to open log (file '%s', errno %d)",
2392                     log_file_name, my_errno);
2393     *errmsg = "Could not open log file";
2394     goto err;
2395   }
2396   if (init_io_cache(log, file, (size_t)binlog_file_cache_size, READ_CACHE, 0, 0,
2397                     MYF(MY_WME|MY_DONT_CHECK_FILESIZE)))
2398   {
2399     sql_print_error("Failed to create a cache on log (file '%s')",
2400                     log_file_name);
2401     *errmsg = "Could not open log file";
2402     goto err;
2403   }
2404   if (check_binlog_magic(log,errmsg))
2405     goto err;
2406   DBUG_RETURN(file);
2407 
2408 err:
2409   if (file >= 0)
2410   {
2411     mysql_file_close(file, MYF(0));
2412     end_io_cache(log);
2413   }
2414   DBUG_RETURN(-1);
2415 }
2416 
2417 #ifdef _WIN32
2418 static int eventSource = 0;
2419 
setup_windows_event_source()2420 static void setup_windows_event_source()
2421 {
2422   HKEY    hRegKey= NULL;
2423   DWORD   dwError= 0;
2424   TCHAR   szPath[MAX_PATH];
2425   DWORD dwTypes;
2426 
2427   if (eventSource)               // Ensure that we are only called once
2428     return;
2429   eventSource= 1;
2430 
2431   // Create the event source registry key
2432   dwError= RegCreateKey(HKEY_LOCAL_MACHINE,
2433                           "SYSTEM\\CurrentControlSet\\Services\\EventLog\\Application\\MariaDB",
2434                           &hRegKey);
2435 
2436   /* Name of the PE module that contains the message resource */
2437   GetModuleFileName(NULL, szPath, MAX_PATH);
2438 
2439   /* Register EventMessageFile */
2440   dwError = RegSetValueEx(hRegKey, "EventMessageFile", 0, REG_EXPAND_SZ,
2441                           (PBYTE) szPath, (DWORD) (strlen(szPath) + 1));
2442 
2443   /* Register supported event types */
2444   dwTypes= (EVENTLOG_ERROR_TYPE | EVENTLOG_WARNING_TYPE |
2445             EVENTLOG_INFORMATION_TYPE);
2446   dwError= RegSetValueEx(hRegKey, "TypesSupported", 0, REG_DWORD,
2447                          (LPBYTE) &dwTypes, sizeof dwTypes);
2448 
2449   RegCloseKey(hRegKey);
2450 }
2451 
2452 #endif /* _WIN32 */
2453 
2454 
2455 /**
2456   Find a unique filename for 'filename.#'.
2457 
2458   Set '#' to the number next to the maximum found in the most
2459   recent log file extension.
2460 
2461   This function will return nonzero if: (i) the generated name
2462   exceeds FN_REFLEN; (ii) if the number of extensions is exhausted;
2463   or (iii) some other error happened while examining the filesystem.
2464 
2465   @param name                   Base name of file
2466   @param min_log_number_to_use  minimum log number to choose. Set by
2467                                 CHANGE MASTER .. TO
2468   @param last_used_log_number   If 0, find log number based on files.
2469                                 If not 0, then use *last_used_log_number +1
2470                                 Will be update to new generated number
2471   @return
2472     0       ok
2473     nonzero if not possible to get unique filename.
2474 */
2475 
find_uniq_filename(char * name,ulong min_log_number_to_use,ulong * last_used_log_number)2476 static int find_uniq_filename(char *name, ulong min_log_number_to_use,
2477                               ulong *last_used_log_number)
2478 {
2479   uint                  i;
2480   char                  buff[FN_REFLEN], ext_buf[FN_REFLEN];
2481   struct st_my_dir     *dir_info;
2482   struct fileinfo *file_info;
2483   ulong                 max_found= 0, next= 0, number= 0;
2484   size_t		buf_length, length;
2485   char			*start, *end;
2486   int                   error= 0;
2487   DBUG_ENTER("find_uniq_filename");
2488 
2489   length= dirname_part(buff, name, &buf_length);
2490   start=  name + length;
2491   end=    strend(start);
2492 
2493   *end='.';
2494   length= (size_t) (end - start + 1);
2495 
2496   /* The following matches the code for my_dir () below */
2497   DBUG_EXECUTE_IF("error_unique_log_filename",
2498                   {
2499                     strmov(end,".1");
2500                     DBUG_RETURN(1);
2501                   });
2502 
2503   if (*last_used_log_number)
2504     max_found= *last_used_log_number;
2505   else
2506   {
2507     if (unlikely(!(dir_info= my_dir(buff, MYF(MY_DONT_SORT)))))
2508     {						// This shouldn't happen
2509       strmov(end,".1");				// use name+1
2510       DBUG_RETURN(1);
2511     }
2512     file_info= dir_info->dir_entry;
2513     max_found= min_log_number_to_use ? min_log_number_to_use-1 : 0;
2514     for (i= dir_info->number_of_files ; i-- ; file_info++)
2515     {
2516       if (strncmp(file_info->name, start, length) == 0 &&
2517           test_if_number(file_info->name+length, &number,0))
2518       {
2519         set_if_bigger(max_found, number);
2520       }
2521     }
2522     my_dirend(dir_info);
2523   }
2524 
2525   /* check if reached the maximum possible extension number */
2526   if (max_found >= MAX_LOG_UNIQUE_FN_EXT)
2527   {
2528     sql_print_error("Log filename extension number exhausted: %06lu. \
2529 Please fix this by archiving old logs and \
2530 updating the index files.", max_found);
2531     error= 1;
2532     goto end;
2533   }
2534 
2535   next= max_found + 1;
2536   if (sprintf(ext_buf, "%06lu", next)<0)
2537   {
2538     error= 1;
2539     goto end;
2540   }
2541   *end++='.';
2542 
2543   /*
2544     Check if the generated extension size + the file name exceeds the
2545     buffer size used. If one did not check this, then the filename might be
2546     truncated, resulting in error.
2547    */
2548   if (((strlen(ext_buf) + (end - name)) >= FN_REFLEN))
2549   {
2550     sql_print_error("Log filename too large: %s%s (%zu). \
2551 Please fix this by archiving old logs and updating the \
2552 index files.", name, ext_buf, (strlen(ext_buf) + (end - name)));
2553     error= 1;
2554     goto end;
2555   }
2556 
2557   if (sprintf(end, "%06lu", next)<0)
2558   {
2559     error= 1;
2560     goto end;
2561   }
2562   *last_used_log_number= next;
2563 
2564   /* print warning if reaching the end of available extensions. */
2565   if ((next > (MAX_LOG_UNIQUE_FN_EXT - LOG_WARN_UNIQUE_FN_EXT_LEFT)))
2566     sql_print_warning("Next log extension: %lu. \
2567 Remaining log filename extensions: %lu. \
2568 Please consider archiving some logs.", next, (MAX_LOG_UNIQUE_FN_EXT - next));
2569 
2570 end:
2571   DBUG_RETURN(error);
2572 }
2573 
2574 
init(enum_log_type log_type_arg,enum cache_type io_cache_type_arg)2575 void MYSQL_LOG::init(enum_log_type log_type_arg,
2576                      enum cache_type io_cache_type_arg)
2577 {
2578   DBUG_ENTER("MYSQL_LOG::init");
2579   log_type= log_type_arg;
2580   io_cache_type= io_cache_type_arg;
2581   DBUG_PRINT("info",("log_type: %d", log_type));
2582   DBUG_VOID_RETURN;
2583 }
2584 
2585 
init_and_set_log_file_name(const char * log_name,const char * new_name,ulong next_log_number,enum_log_type log_type_arg,enum cache_type io_cache_type_arg)2586 bool MYSQL_LOG::init_and_set_log_file_name(const char *log_name,
2587                                            const char *new_name,
2588                                            ulong next_log_number,
2589                                            enum_log_type log_type_arg,
2590                                            enum cache_type io_cache_type_arg)
2591 {
2592   init(log_type_arg, io_cache_type_arg);
2593 
2594   if (new_name)
2595   {
2596     strmov(log_file_name, new_name);
2597   }
2598   else if (!new_name && generate_new_name(log_file_name, log_name,
2599                                           next_log_number))
2600     return TRUE;
2601 
2602   return FALSE;
2603 }
2604 
2605 
2606 /*
2607   Open a (new) log file.
2608 
2609   SYNOPSIS
2610     open()
2611 
2612     log_name            The name of the log to open
2613     log_type_arg        The type of the log. E.g. LOG_NORMAL
2614     new_name            The new name for the logfile. This is only needed
2615                         when the method is used to open the binlog file.
2616     io_cache_type_arg   The type of the IO_CACHE to use for this log file
2617 
2618   DESCRIPTION
2619     Open the logfile, init IO_CACHE and write startup messages
2620     (in case of general and slow query logs).
2621 
2622   RETURN VALUES
2623     0   ok
2624     1   error
2625 */
2626 
open(PSI_file_key log_file_key,const char * log_name,enum_log_type log_type_arg,const char * new_name,ulong next_log_number,enum cache_type io_cache_type_arg)2627 bool MYSQL_LOG::open(
2628 #ifdef HAVE_PSI_INTERFACE
2629                      PSI_file_key log_file_key,
2630 #endif
2631                      const char *log_name, enum_log_type log_type_arg,
2632                      const char *new_name, ulong next_log_number,
2633                      enum cache_type io_cache_type_arg)
2634 {
2635   char buff[FN_REFLEN];
2636   MY_STAT f_stat;
2637   File file= -1;
2638   my_off_t seek_offset;
2639   bool is_fifo = false;
2640   int open_flags= O_CREAT | O_BINARY | O_CLOEXEC;
2641   DBUG_ENTER("MYSQL_LOG::open");
2642   DBUG_PRINT("enter", ("log_type: %d", (int) log_type_arg));
2643 
2644   write_error= 0;
2645 
2646   if (!(name= my_strdup(log_name, MYF(MY_WME))))
2647   {
2648     name= (char *)log_name; // for the error message
2649     goto err;
2650   }
2651 
2652   /*
2653     log_type is LOG_UNKNOWN if we should not generate a new name
2654     This is only used when called from MYSQL_BINARY_LOG::open, which
2655     has already updated log_file_name.
2656    */
2657   if (log_type_arg != LOG_UNKNOWN &&
2658       init_and_set_log_file_name(name, new_name, next_log_number,
2659                                  log_type_arg, io_cache_type_arg))
2660     goto err;
2661 
2662   is_fifo = my_stat(log_file_name, &f_stat, MYF(0)) &&
2663             MY_S_ISFIFO(f_stat.st_mode);
2664 
2665   if (io_cache_type == SEQ_READ_APPEND)
2666     open_flags |= O_RDWR | O_APPEND;
2667   else
2668     open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND);
2669 
2670   if (is_fifo)
2671     open_flags |= O_NONBLOCK;
2672 
2673   db[0]= 0;
2674 
2675 #ifdef HAVE_PSI_INTERFACE
2676   /* Keep the key for reopen */
2677   m_log_file_key= log_file_key;
2678 #endif
2679 
2680   if ((file= mysql_file_open(log_file_key, log_file_name, open_flags,
2681                              MYF(MY_WME))) < 0)
2682     goto err;
2683 
2684   if (is_fifo)
2685     seek_offset= 0;
2686   else if ((seek_offset= mysql_file_tell(file, MYF(MY_WME))))
2687     goto err;
2688 
2689   if (init_io_cache(&log_file, file, IO_SIZE, io_cache_type, seek_offset, 0,
2690                     MYF(MY_WME | MY_NABP |
2691                         ((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0))))
2692     goto err;
2693 
2694   if (log_type == LOG_NORMAL)
2695   {
2696     char *end;
2697     size_t len=my_snprintf(buff, sizeof(buff), "%s, Version: %s (%s). "
2698 #ifdef EMBEDDED_LIBRARY
2699                         "embedded library\n",
2700                         my_progname, server_version, MYSQL_COMPILATION_COMMENT
2701 #elif defined(_WIN32)
2702 			"started with:\nTCP Port: %d, Named Pipe: %s\n",
2703                         my_progname, server_version, MYSQL_COMPILATION_COMMENT,
2704                         mysqld_port, mysqld_unix_port
2705 #else
2706 			"started with:\nTcp port: %d  Unix socket: %s\n",
2707                         my_progname, server_version, MYSQL_COMPILATION_COMMENT,
2708                         mysqld_port, mysqld_unix_port
2709 #endif
2710                        );
2711     end= strnmov(buff + len, "Time\t\t    Id Command\tArgument\n",
2712                  sizeof(buff) - len);
2713     if (my_b_write(&log_file, (uchar*) buff, (uint) (end-buff)) ||
2714 	flush_io_cache(&log_file))
2715       goto err;
2716   }
2717 
2718   log_state= LOG_OPENED;
2719   DBUG_RETURN(0);
2720 
2721 err:
2722   sql_print_error(fatal_log_error, name, errno);
2723   if (file >= 0)
2724     mysql_file_close(file, MYF(0));
2725   end_io_cache(&log_file);
2726   my_free(name);
2727   name= NULL;
2728   log_state= LOG_CLOSED;
2729   DBUG_RETURN(1);
2730 }
2731 
MYSQL_LOG()2732 MYSQL_LOG::MYSQL_LOG()
2733   : name(0), write_error(FALSE), inited(FALSE), log_type(LOG_UNKNOWN),
2734     log_state(LOG_CLOSED)
2735 {
2736   /*
2737     We don't want to initialize LOCK_Log here as such initialization depends on
2738     safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
2739     called only in main(). Doing initialization here would make it happen
2740     before main().
2741   */
2742   bzero((char*) &log_file, sizeof(log_file));
2743 }
2744 
init_pthread_objects()2745 void MYSQL_LOG::init_pthread_objects()
2746 {
2747   DBUG_ASSERT(inited == 0);
2748   inited= 1;
2749   mysql_mutex_init(key_LOG_LOCK_log, &LOCK_log, MY_MUTEX_INIT_SLOW);
2750 }
2751 
2752 /*
2753   Close the log file
2754 
2755   SYNOPSIS
2756     close()
2757     exiting     Bitmask. LOG_CLOSE_TO_BE_OPENED is used if we intend to call
2758                 open at once after close. LOG_CLOSE_DELAYED_CLOSE is used for
2759                 binlog rotation, to delay actual close of the old file until
2760                 we have successfully created the new file.
2761 
2762   NOTES
2763     One can do an open on the object at once after doing a close.
2764     The internal structures are not freed until cleanup() is called
2765 */
2766 
close(uint exiting)2767 void MYSQL_LOG::close(uint exiting)
2768 {					// One can't set log_type here!
2769   DBUG_ENTER("MYSQL_LOG::close");
2770   DBUG_PRINT("enter",("exiting: %d", (int) exiting));
2771   if (log_state == LOG_OPENED)
2772   {
2773     end_io_cache(&log_file);
2774 
2775     if (log_type == LOG_BIN && mysql_file_sync(log_file.file, MYF(MY_WME)) && ! write_error)
2776     {
2777       write_error= 1;
2778       sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno);
2779     }
2780 
2781     if (!(exiting & LOG_CLOSE_DELAYED_CLOSE) &&
2782         mysql_file_close(log_file.file, MYF(MY_WME)) && ! write_error)
2783     {
2784       write_error= 1;
2785       sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno);
2786     }
2787   }
2788 
2789   log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
2790   my_free(name);
2791   name= NULL;
2792   DBUG_VOID_RETURN;
2793 }
2794 
2795 /** This is called only once. */
2796 
cleanup()2797 void MYSQL_LOG::cleanup()
2798 {
2799   DBUG_ENTER("cleanup");
2800   if (inited)
2801   {
2802     inited= 0;
2803     mysql_mutex_destroy(&LOCK_log);
2804     close(0);
2805   }
2806   DBUG_VOID_RETURN;
2807 }
2808 
2809 
generate_new_name(char * new_name,const char * log_name,ulong next_log_number)2810 int MYSQL_LOG::generate_new_name(char *new_name, const char *log_name,
2811                                  ulong next_log_number)
2812 {
2813   fn_format(new_name, log_name, mysql_data_home, "", 4);
2814   return 0;
2815 }
2816 
generate_new_name(char * new_name,const char * log_name,ulong next_log_number)2817 int MYSQL_BIN_LOG::generate_new_name(char *new_name, const char *log_name,
2818                                      ulong next_log_number)
2819 {
2820   fn_format(new_name, log_name, mysql_data_home, "", 4);
2821   if (!fn_ext(log_name)[0])
2822   {
2823     if (DBUG_EVALUATE_IF("binlog_inject_new_name_error", TRUE, FALSE) ||
2824         unlikely(find_uniq_filename(new_name, next_log_number,
2825                                     &last_used_log_number)))
2826     {
2827       THD *thd= current_thd;
2828       if (unlikely(thd))
2829         my_error(ER_NO_UNIQUE_LOGFILE, MYF(ME_FATAL), log_name);
2830       sql_print_error(ER_DEFAULT(ER_NO_UNIQUE_LOGFILE), log_name);
2831       return 1;
2832     }
2833   }
2834   return 0;
2835 }
2836 
2837 
2838 /*
2839   Reopen the log file
2840 
2841   SYNOPSIS
2842     reopen_file()
2843 
2844   DESCRIPTION
2845     Reopen the log file. The method is used during FLUSH LOGS
2846     and locks LOCK_log mutex
2847 */
2848 
2849 
reopen_file()2850 void MYSQL_QUERY_LOG::reopen_file()
2851 {
2852   char *save_name;
2853   DBUG_ENTER("MYSQL_LOG::reopen_file");
2854 
2855   mysql_mutex_lock(&LOCK_log);
2856   if (!is_open())
2857   {
2858     DBUG_PRINT("info",("log is closed"));
2859     mysql_mutex_unlock(&LOCK_log);
2860     DBUG_VOID_RETURN;
2861   }
2862 
2863   save_name= name;
2864   name= 0;				// Don't free name
2865   close(LOG_CLOSE_TO_BE_OPENED);
2866 
2867   /*
2868      Note that at this point, log_state != LOG_CLOSED (important for is_open()).
2869   */
2870 
2871   open(
2872 #ifdef HAVE_PSI_INTERFACE
2873        m_log_file_key,
2874 #endif
2875        save_name, log_type, 0, 0, io_cache_type);
2876   my_free(save_name);
2877 
2878   mysql_mutex_unlock(&LOCK_log);
2879 
2880   DBUG_VOID_RETURN;
2881 }
2882 
2883 
2884 /*
2885   Write a command to traditional general log file
2886 
2887   SYNOPSIS
2888     write()
2889 
2890     event_time        command start timestamp
2891     user_host         the pointer to the string with user@host info
2892     user_host_len     length of the user_host string. this is computed once
2893                       and passed to all general log  event handlers
2894     thread_id         Id of the thread, issued a query
2895     command_type      the type of the command being logged
2896     command_type_len  the length of the string above
2897     sql_text          the very text of the query being executed
2898     sql_text_len      the length of sql_text string
2899 
2900   DESCRIPTION
2901 
2902    Log given command to to normal (not rotable) log file
2903 
2904   RETURN
2905     FASE - OK
2906     TRUE - error occurred
2907 */
2908 
write(time_t event_time,const char * user_host,size_t user_host_len,my_thread_id thread_id_arg,const char * command_type,size_t command_type_len,const char * sql_text,size_t sql_text_len)2909 bool MYSQL_QUERY_LOG::write(time_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg,
2910                             const char *command_type, size_t command_type_len,
2911                             const char *sql_text, size_t sql_text_len)
2912 {
2913   char buff[32];
2914   char local_time_buff[MAX_TIME_SIZE];
2915   struct tm start;
2916   size_t time_buff_len= 0;
2917 
2918   mysql_mutex_lock(&LOCK_log);
2919 
2920   /* Test if someone closed between the is_open test and lock */
2921   if (is_open())
2922   {
2923     /* for testing output of timestamp and thread id */
2924     DBUG_EXECUTE_IF("reset_log_last_time", last_time= 0;);
2925 
2926     /* Note that my_b_write() assumes it knows the length for this */
2927     if (event_time != last_time)
2928     {
2929       last_time= event_time;
2930 
2931       localtime_r(&event_time, &start);
2932 
2933       time_buff_len= my_snprintf(local_time_buff, MAX_TIME_SIZE,
2934                                  "%02d%02d%02d %2d:%02d:%02d\t",
2935                                  start.tm_year % 100, start.tm_mon + 1,
2936                                  start.tm_mday, start.tm_hour,
2937                                  start.tm_min, start.tm_sec);
2938 
2939       if (my_b_write(&log_file, (uchar*) local_time_buff, time_buff_len))
2940         goto err;
2941     }
2942     else
2943       if (my_b_write(&log_file, (uchar*) "\t\t" ,2) < 0)
2944         goto err;
2945 
2946     /* command_type, thread_id */
2947     size_t length= my_snprintf(buff, 32, "%6llu ", thread_id_arg);
2948 
2949     if (my_b_write(&log_file, (uchar*) buff, length))
2950       goto err;
2951 
2952     if (my_b_write(&log_file, (uchar*) command_type, command_type_len))
2953       goto err;
2954 
2955     if (my_b_write(&log_file, (uchar*) "\t", 1))
2956       goto err;
2957 
2958     /* sql_text */
2959     if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len))
2960       goto err;
2961 
2962     if (my_b_write(&log_file, (uchar*) "\n", 1) ||
2963         flush_io_cache(&log_file))
2964       goto err;
2965   }
2966 
2967   mysql_mutex_unlock(&LOCK_log);
2968   return FALSE;
2969 err:
2970 
2971   if (!write_error)
2972   {
2973     write_error= 1;
2974     sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno);
2975   }
2976   mysql_mutex_unlock(&LOCK_log);
2977   return TRUE;
2978 }
2979 
2980 
2981 /*
2982   Log a query to the traditional slow log file
2983 
2984   SYNOPSIS
2985     write()
2986 
2987     thd               THD of the query
2988     current_time      current timestamp
2989     user_host         the pointer to the string with user@host info
2990     user_host_len     length of the user_host string. this is computed once
2991                       and passed to all general log event handlers
2992     query_utime       Amount of time the query took to execute (in microseconds)
2993     lock_utime        Amount of time the query was locked (in microseconds)
2994     is_command        The flag, which determines, whether the sql_text is a
2995                       query or an administrator command.
2996     sql_text          the very text of the query or administrator command
2997                       processed
2998     sql_text_len      the length of sql_text string
2999 
3000   DESCRIPTION
3001 
3002    Log a query to the slow log file.
3003 
3004   RETURN
3005     FALSE - OK
3006     TRUE - error occurred
3007 */
3008 
write(THD * thd,time_t current_time,const char * user_host,size_t user_host_len,ulonglong query_utime,ulonglong lock_utime,bool is_command,const char * sql_text,size_t sql_text_len)3009 bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
3010                             const char *user_host, size_t user_host_len, ulonglong query_utime,
3011                             ulonglong lock_utime, bool is_command,
3012                             const char *sql_text, size_t sql_text_len)
3013 {
3014   bool error= 0;
3015   char llbuff[22];
3016   DBUG_ENTER("MYSQL_QUERY_LOG::write");
3017 
3018   mysql_mutex_lock(&LOCK_log);
3019   if (is_open())
3020   {						// Safety against reopen
3021     char buff[80], *end;
3022     char query_time_buff[22+7], lock_time_buff[22+7];
3023     size_t buff_len;
3024     end= buff;
3025 
3026     if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
3027     {
3028       if (current_time != last_time)
3029       {
3030         last_time= current_time;
3031         struct tm start;
3032         localtime_r(&current_time, &start);
3033 
3034         buff_len= my_snprintf(buff, sizeof buff,
3035                               "# Time: %02d%02d%02d %2d:%02d:%02d\n",
3036                               start.tm_year % 100, start.tm_mon + 1,
3037                               start.tm_mday, start.tm_hour,
3038                               start.tm_min, start.tm_sec);
3039 
3040         /* Note that my_b_write() assumes it knows the length for this */
3041         if (my_b_write(&log_file, (uchar*) buff, buff_len))
3042           goto err;
3043       }
3044       const uchar uh[]= "# User@Host: ";
3045       if (my_b_write(&log_file, uh, sizeof(uh) - 1) ||
3046           my_b_write(&log_file, (uchar*) user_host, user_host_len) ||
3047           my_b_write(&log_file, (uchar*) "\n", 1))
3048         goto err;
3049 
3050     /* For slow query log */
3051     sprintf(query_time_buff, "%.6f", ulonglong2double(query_utime)/1000000.0);
3052     sprintf(lock_time_buff,  "%.6f", ulonglong2double(lock_utime)/1000000.0);
3053     if (my_b_printf(&log_file,
3054                     "# Thread_id: %lu  Schema: %s  QC_hit: %s\n"
3055                     "# Query_time: %s  Lock_time: %s  Rows_sent: %lu  Rows_examined: %lu\n"
3056                     "# Rows_affected: %lu  Bytes_sent: %lu\n",
3057                     (ulong) thd->thread_id, thd->get_db(),
3058                     ((thd->query_plan_flags & QPLAN_QC) ? "Yes" : "No"),
3059                     query_time_buff, lock_time_buff,
3060                     (ulong) thd->get_sent_row_count(),
3061                     (ulong) thd->get_examined_row_count(),
3062                     (ulong) thd->get_affected_rows(),
3063                     (ulong) (thd->status_var.bytes_sent - thd->bytes_sent_old)))
3064       goto err;
3065 
3066     if ((thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_QUERY_PLAN)
3067         && thd->tmp_tables_used &&
3068         my_b_printf(&log_file,
3069                     "# Tmp_tables: %lu  Tmp_disk_tables: %lu  "
3070                     "Tmp_table_sizes: %s\n",
3071                     (ulong) thd->tmp_tables_used,
3072                     (ulong) thd->tmp_tables_disk_used,
3073                     llstr(thd->tmp_tables_size, llbuff)))
3074       goto err;
3075 
3076     if (thd->spcont &&
3077         my_b_printf(&log_file, "# Stored_routine: %s\n",
3078                     ErrConvDQName(thd->spcont->m_sp).ptr()))
3079       goto err;
3080 
3081      if ((thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_QUERY_PLAN) &&
3082          (thd->query_plan_flags &
3083           (QPLAN_FULL_SCAN | QPLAN_FULL_JOIN | QPLAN_TMP_TABLE |
3084            QPLAN_TMP_DISK | QPLAN_FILESORT | QPLAN_FILESORT_DISK |
3085            QPLAN_FILESORT_PRIORITY_QUEUE)) &&
3086          my_b_printf(&log_file,
3087                      "# Full_scan: %s  Full_join: %s  "
3088                      "Tmp_table: %s  Tmp_table_on_disk: %s\n"
3089                      "# Filesort: %s  Filesort_on_disk: %s  Merge_passes: %lu  "
3090                      "Priority_queue: %s\n",
3091                      ((thd->query_plan_flags & QPLAN_FULL_SCAN) ? "Yes" : "No"),
3092                      ((thd->query_plan_flags & QPLAN_FULL_JOIN) ? "Yes" : "No"),
3093                      (thd->tmp_tables_used ? "Yes" : "No"),
3094                      (thd->tmp_tables_disk_used ? "Yes" : "No"),
3095                      ((thd->query_plan_flags & QPLAN_FILESORT) ? "Yes" : "No"),
3096                      ((thd->query_plan_flags & QPLAN_FILESORT_DISK) ?
3097                       "Yes" : "No"),
3098                      thd->query_plan_fsort_passes,
3099                      ((thd->query_plan_flags & QPLAN_FILESORT_PRIORITY_QUEUE) ?
3100                        "Yes" : "No")
3101                      ))
3102       goto err;
3103     if (thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_EXPLAIN &&
3104         thd->lex->explain)
3105     {
3106       StringBuffer<128> buf;
3107       DBUG_ASSERT(!thd->free_list);
3108       if (!print_explain_for_slow_log(thd->lex, thd, &buf))
3109         if (my_b_printf(&log_file, "%s", buf.c_ptr_safe()))
3110           goto err;
3111       thd->free_items();
3112     }
3113     if (thd->db.str && strcmp(thd->db.str, db))
3114     {						// Database changed
3115       if (my_b_printf(&log_file,"use %s;\n",thd->db.str))
3116         goto err;
3117       strmov(db,thd->db.str);
3118     }
3119     if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
3120     {
3121       end=strmov(end, ",last_insert_id=");
3122       end=longlong10_to_str((longlong)
3123                             thd->first_successful_insert_id_in_prev_stmt_for_binlog,
3124                             end, -10);
3125     }
3126     // Save value if we do an insert.
3127     if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
3128     {
3129       if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
3130       {
3131         end=strmov(end,",insert_id=");
3132         end=longlong10_to_str((longlong)
3133                               thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(),
3134                               end, -10);
3135       }
3136     }
3137 
3138     /*
3139       This info used to show up randomly, depending on whether the query
3140       checked the query start time or not. now we always write current
3141       timestamp to the slow log
3142     */
3143     end= strmov(end, ",timestamp=");
3144     end= int10_to_str((long) current_time, end, 10);
3145 
3146     if (end != buff)
3147     {
3148       *end++=';';
3149       *end='\n';
3150       if (my_b_write(&log_file, (uchar*) "SET ", 4) ||
3151           my_b_write(&log_file, (uchar*) buff + 1, (uint) (end-buff)))
3152         goto err;
3153     }
3154     if (is_command)
3155     {
3156       end= strxmov(buff, "# administrator command: ", NullS);
3157       buff_len= (ulong) (end - buff);
3158       DBUG_EXECUTE_IF("simulate_slow_log_write_error",
3159                       {DBUG_SET("+d,simulate_file_write_error");});
3160       if(my_b_write(&log_file, (uchar*) buff, buff_len))
3161         goto err;
3162     }
3163     if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len) ||
3164         my_b_write(&log_file, (uchar*) ";\n",2) ||
3165         flush_io_cache(&log_file))
3166       goto err;
3167 
3168     }
3169   }
3170 end:
3171   mysql_mutex_unlock(&LOCK_log);
3172   DBUG_RETURN(error);
3173 
3174 err:
3175   error= 1;
3176   if (!write_error)
3177   {
3178     write_error= 1;
3179     sql_print_error(ER_THD(thd, ER_ERROR_ON_WRITE), name, errno);
3180   }
3181   goto end;
3182 }
3183 
3184 
3185 /**
3186   @todo
3187   The following should be using fn_format();  We just need to
3188   first change fn_format() to cut the file name if it's too long.
3189 */
generate_name(const char * log_name,const char * suffix,bool strip_ext,char * buff)3190 const char *MYSQL_LOG::generate_name(const char *log_name,
3191                                      const char *suffix,
3192                                      bool strip_ext, char *buff)
3193 {
3194   if (!log_name || !log_name[0])
3195   {
3196     strmake(buff, pidfile_name, FN_REFLEN - strlen(suffix) - 1);
3197     return (const char *)
3198       fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR));
3199   }
3200   // get rid of extension if the log is binary to avoid problems
3201   if (strip_ext)
3202   {
3203     char *p= fn_ext(log_name);
3204     uint length= (uint) (p - log_name);
3205     strmake(buff, log_name, MY_MIN(length, FN_REFLEN-1));
3206     return (const char*)buff;
3207   }
3208   return log_name;
3209 }
3210 
3211 
3212 /*
3213   Print some additional information about addition/removal of
3214   XID list entries.
3215   TODO: Remove once MDEV-9510 is fixed.
3216 */
3217 #ifdef WITH_WSREP
3218 #define WSREP_XID_LIST_ENTRY(X, Y)                    \
3219   if (wsrep_debug)                                    \
3220   {                                                   \
3221     char buf[FN_REFLEN];                              \
3222     strmake(buf, Y->binlog_name, Y->binlog_name_len); \
3223     WSREP_DEBUG(X, buf, Y->binlog_id);                \
3224   }
3225 #else
3226 #define WSREP_XID_LIST_ENTRY(X, Y) do { } while(0)
3227 #endif
3228 
MYSQL_BIN_LOG(uint * sync_period)3229 MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
3230   :reset_master_pending(0), mark_xid_done_waiting(0),
3231    bytes_written(0), last_used_log_number(0),
3232    file_id(1), open_count(1),
3233    group_commit_queue(0), group_commit_queue_busy(FALSE),
3234    num_commits(0), num_group_commits(0),
3235    group_commit_trigger_count(0), group_commit_trigger_timeout(0),
3236    group_commit_trigger_lock_wait(0),
3237    sync_period_ptr(sync_period), sync_counter(0),
3238    state_file_deleted(false), binlog_state_recover_done(false),
3239    is_relay_log(0), relay_signal_cnt(0),
3240    checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF),
3241    relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF),
3242    description_event_for_exec(0), description_event_for_queue(0),
3243    current_binlog_id(0), reset_master_count(0)
3244 {
3245   /*
3246     We don't want to initialize locks here as such initialization depends on
3247     safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
3248     called only in main(). Doing initialization here would make it happen
3249     before main().
3250   */
3251   index_file_name[0] = 0;
3252   bzero((char*) &index_file, sizeof(index_file));
3253   bzero((char*) &purge_index_file, sizeof(purge_index_file));
3254 }
3255 
stop_background_thread()3256 void MYSQL_BIN_LOG::stop_background_thread()
3257 {
3258   if (binlog_background_thread_started)
3259   {
3260     mysql_mutex_lock(&LOCK_binlog_background_thread);
3261     binlog_background_thread_stop= true;
3262     mysql_cond_signal(&COND_binlog_background_thread);
3263     while (binlog_background_thread_stop)
3264       mysql_cond_wait(&COND_binlog_background_thread_end,
3265                       &LOCK_binlog_background_thread);
3266     mysql_mutex_unlock(&LOCK_binlog_background_thread);
3267     binlog_background_thread_started= false;
3268   }
3269 }
3270 
3271 /* this is called only once */
3272 
cleanup()3273 void MYSQL_BIN_LOG::cleanup()
3274 {
3275   DBUG_ENTER("cleanup");
3276   if (inited)
3277   {
3278     xid_count_per_binlog *b;
3279 
3280     /* Wait for the binlog background thread to stop. */
3281     if (!is_relay_log)
3282       stop_background_thread();
3283 
3284     inited= 0;
3285     mysql_mutex_lock(&LOCK_log);
3286     close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT);
3287     mysql_mutex_unlock(&LOCK_log);
3288     delete description_event_for_queue;
3289     delete description_event_for_exec;
3290 
3291     while ((b= binlog_xid_count_list.get()))
3292     {
3293       /*
3294         There should be no pending XIDs at shutdown, and only one entry (for
3295         the active binlog file) in the list.
3296       */
3297       DBUG_ASSERT(b->xid_count == 0);
3298       DBUG_ASSERT(!binlog_xid_count_list.head());
3299       WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::cleanup(): Removing xid_list_entry "
3300                            "for %s (%lu)", b);
3301       delete b;
3302     }
3303 
3304     mysql_mutex_destroy(&LOCK_log);
3305     mysql_mutex_destroy(&LOCK_index);
3306     mysql_mutex_destroy(&LOCK_xid_list);
3307     mysql_mutex_destroy(&LOCK_binlog_background_thread);
3308     mysql_mutex_destroy(&LOCK_binlog_end_pos);
3309     mysql_cond_destroy(&COND_relay_log_updated);
3310     mysql_cond_destroy(&COND_bin_log_updated);
3311     mysql_cond_destroy(&COND_queue_busy);
3312     mysql_cond_destroy(&COND_xid_list);
3313     mysql_cond_destroy(&COND_binlog_background_thread);
3314     mysql_cond_destroy(&COND_binlog_background_thread_end);
3315   }
3316 
3317   /*
3318     Free data for global binlog state.
3319     We can't do that automatically as we need to do this before
3320     safemalloc is shut down
3321   */
3322   if (!is_relay_log)
3323     rpl_global_gtid_binlog_state.free();
3324   DBUG_VOID_RETURN;
3325 }
3326 
3327 
3328 /* Init binlog-specific vars */
init(ulong max_size_arg)3329 void MYSQL_BIN_LOG::init(ulong max_size_arg)
3330 {
3331   DBUG_ENTER("MYSQL_BIN_LOG::init");
3332   max_size= max_size_arg;
3333   DBUG_PRINT("info",("max_size: %lu", max_size));
3334   DBUG_VOID_RETURN;
3335 }
3336 
3337 
init_pthread_objects()3338 void MYSQL_BIN_LOG::init_pthread_objects()
3339 {
3340   MYSQL_LOG::init_pthread_objects();
3341   mysql_mutex_init(m_key_LOCK_index, &LOCK_index, MY_MUTEX_INIT_SLOW);
3342   mysql_mutex_setflags(&LOCK_index, MYF_NO_DEADLOCK_DETECTION);
3343   mysql_mutex_init(key_BINLOG_LOCK_xid_list,
3344                    &LOCK_xid_list, MY_MUTEX_INIT_FAST);
3345   mysql_cond_init(m_key_relay_log_update, &COND_relay_log_updated, 0);
3346   mysql_cond_init(m_key_bin_log_update, &COND_bin_log_updated, 0);
3347   mysql_cond_init(m_key_COND_queue_busy, &COND_queue_busy, 0);
3348   mysql_cond_init(key_BINLOG_COND_xid_list, &COND_xid_list, 0);
3349 
3350   mysql_mutex_init(key_BINLOG_LOCK_binlog_background_thread,
3351                    &LOCK_binlog_background_thread, MY_MUTEX_INIT_FAST);
3352   mysql_cond_init(key_BINLOG_COND_binlog_background_thread,
3353                   &COND_binlog_background_thread, 0);
3354   mysql_cond_init(key_BINLOG_COND_binlog_background_thread_end,
3355                   &COND_binlog_background_thread_end, 0);
3356 
3357   mysql_mutex_init(m_key_LOCK_binlog_end_pos, &LOCK_binlog_end_pos,
3358                    MY_MUTEX_INIT_SLOW);
3359 }
3360 
3361 
open_index_file(const char * index_file_name_arg,const char * log_name,bool need_mutex)3362 bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
3363                                     const char *log_name, bool need_mutex)
3364 {
3365   File index_file_nr= -1;
3366   DBUG_ASSERT(!my_b_inited(&index_file));
3367 
3368   /*
3369     First open of this class instance
3370     Create an index file that will hold all file names uses for logging.
3371     Add new entries to the end of it.
3372   */
3373   myf opt= MY_UNPACK_FILENAME;
3374   if (!index_file_name_arg)
3375   {
3376     index_file_name_arg= log_name;    // Use same basename for index file
3377     opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
3378   }
3379   fn_format(index_file_name, index_file_name_arg, mysql_data_home,
3380             ".index", opt);
3381   if ((index_file_nr= mysql_file_open(m_key_file_log_index,
3382                                       index_file_name,
3383                                       O_RDWR | O_CREAT | O_BINARY | O_CLOEXEC,
3384                                       MYF(MY_WME))) < 0 ||
3385        mysql_file_sync(index_file_nr, MYF(MY_WME)) ||
3386        init_io_cache(&index_file, index_file_nr,
3387                      IO_SIZE, WRITE_CACHE,
3388                      mysql_file_seek(index_file_nr, 0L, MY_SEEK_END, MYF(0)),
3389                                      0, MYF(MY_WME | MY_WAIT_IF_FULL)) ||
3390       DBUG_EVALUATE_IF("fault_injection_openning_index", 1, 0))
3391   {
3392     /*
3393       TODO: all operations creating/deleting the index file or a log, should
3394       call my_sync_dir() or my_sync_dir_by_file() to be durable.
3395       TODO: file creation should be done with mysql_file_create()
3396       not mysql_file_open().
3397     */
3398     if (index_file_nr >= 0)
3399       mysql_file_close(index_file_nr, MYF(0));
3400     return TRUE;
3401   }
3402 
3403 #ifdef HAVE_REPLICATION
3404   /*
3405     Sync the index by purging any binary log file that is not registered.
3406     In other words, either purge binary log files that were removed from
3407     the index but not purged from the file system due to a crash or purge
3408     any binary log file that was created but not register in the index
3409     due to a crash.
3410   */
3411 
3412   if (set_purge_index_file_name(index_file_name_arg) ||
3413       open_purge_index_file(FALSE) ||
3414       purge_index_entry(NULL, NULL, need_mutex) ||
3415       close_purge_index_file() ||
3416       DBUG_EVALUATE_IF("fault_injection_recovering_index", 1, 0))
3417   {
3418     sql_print_error("MYSQL_BIN_LOG::open_index_file failed to sync the index "
3419                     "file.");
3420     return TRUE;
3421   }
3422 #endif
3423 
3424   return FALSE;
3425 }
3426 
3427 
3428 /**
3429   Open a (new) binlog file.
3430 
3431   - Open the log file and the index file. Register the new
3432   file name in it
3433   - When calling this when the file is in use, you must have a locks
3434   on LOCK_log and LOCK_index.
3435 
3436   @retval
3437     0	ok
3438   @retval
3439     1	error
3440 */
3441 
open(const char * log_name,enum_log_type log_type_arg,const char * new_name,ulong next_log_number,enum cache_type io_cache_type_arg,ulong max_size_arg,bool null_created_arg,bool need_mutex)3442 bool MYSQL_BIN_LOG::open(const char *log_name,
3443                          enum_log_type log_type_arg,
3444                          const char *new_name,
3445                          ulong next_log_number,
3446                          enum cache_type io_cache_type_arg,
3447                          ulong max_size_arg,
3448                          bool null_created_arg,
3449                          bool need_mutex)
3450 {
3451   File file= -1;
3452   xid_count_per_binlog *new_xid_list_entry= NULL, *b;
3453   DBUG_ENTER("MYSQL_BIN_LOG::open");
3454   DBUG_PRINT("enter",("log_type: %d",(int) log_type_arg));
3455 
3456   mysql_mutex_assert_owner(&LOCK_log);
3457 
3458   if (!is_relay_log)
3459   {
3460     if (!binlog_state_recover_done)
3461     {
3462       binlog_state_recover_done= true;
3463       if (do_binlog_recovery(opt_bin_logname, false))
3464         DBUG_RETURN(1);
3465     }
3466 
3467     if (!binlog_background_thread_started &&
3468         start_binlog_background_thread())
3469       DBUG_RETURN(1);
3470   }
3471 
3472   /* We need to calculate new log file name for purge to delete old */
3473   if (init_and_set_log_file_name(log_name, new_name, next_log_number,
3474                                  log_type_arg, io_cache_type_arg))
3475   {
3476     sql_print_error("MYSQL_BIN_LOG::open failed to generate new file name.");
3477     if (!is_relay_log)
3478       goto err;
3479     DBUG_RETURN(1);
3480   }
3481 
3482 #ifdef HAVE_REPLICATION
3483   if (open_purge_index_file(TRUE) ||
3484       register_create_index_entry(log_file_name) ||
3485       sync_purge_index_file() ||
3486       DBUG_EVALUATE_IF("fault_injection_registering_index", 1, 0))
3487   {
3488     /**
3489         TODO:
3490         Although this was introduced to appease valgrind when
3491         injecting emulated faults using
3492         fault_injection_registering_index it may be good to consider
3493         what actually happens when open_purge_index_file succeeds but
3494         register or sync fails.
3495 
3496         Perhaps we might need the code below in MYSQL_LOG_BIN::cleanup
3497         for "real life" purposes as well?
3498      */
3499     DBUG_EXECUTE_IF("fault_injection_registering_index", {
3500       if (my_b_inited(&purge_index_file))
3501       {
3502         end_io_cache(&purge_index_file);
3503         my_close(purge_index_file.file, MYF(0));
3504       }
3505     });
3506 
3507     sql_print_error("MYSQL_BIN_LOG::open failed to sync the index file.");
3508     DBUG_RETURN(1);
3509   }
3510   DBUG_EXECUTE_IF("crash_create_non_critical_before_update_index", DBUG_SUICIDE(););
3511 #endif
3512 
3513   write_error= 0;
3514 
3515   /* open the main log file */
3516   if (MYSQL_LOG::open(
3517 #ifdef HAVE_PSI_INTERFACE
3518                       m_key_file_log,
3519 #endif
3520                       log_name,
3521                       LOG_UNKNOWN, /* Don't generate new name */
3522                       0, 0, io_cache_type_arg))
3523   {
3524 #ifdef HAVE_REPLICATION
3525     close_purge_index_file();
3526 #endif
3527     DBUG_RETURN(1);                            /* all warnings issued */
3528   }
3529 
3530   init(max_size_arg);
3531 
3532   open_count++;
3533 
3534   DBUG_ASSERT(log_type == LOG_BIN);
3535 
3536   {
3537     bool write_file_name_to_index_file=0;
3538 
3539     if (!my_b_filelength(&log_file))
3540     {
3541       /*
3542 	The binary log file was empty (probably newly created)
3543 	This is the normal case and happens when the user doesn't specify
3544 	an extension for the binary log files.
3545 	In this case we write a standard header to it.
3546       */
3547       if (my_b_safe_write(&log_file, BINLOG_MAGIC,
3548 			  BIN_LOG_HEADER_SIZE))
3549         goto err;
3550       bytes_written+= BIN_LOG_HEADER_SIZE;
3551       write_file_name_to_index_file= 1;
3552     }
3553 
3554     {
3555       /*
3556         In 4.x we put Start event only in the first binlog. But from 5.0 we
3557         want a Start event even if this is not the very first binlog.
3558       */
3559       Format_description_log_event s(BINLOG_VERSION);
3560       /*
3561         don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
3562         as we won't be able to reset it later
3563       */
3564       if (io_cache_type == WRITE_CACHE)
3565         s.flags |= LOG_EVENT_BINLOG_IN_USE_F;
3566 
3567       if (is_relay_log)
3568       {
3569         if (relay_log_checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
3570           relay_log_checksum_alg=
3571             opt_slave_sql_verify_checksum ? (enum_binlog_checksum_alg) binlog_checksum_options
3572                                           : BINLOG_CHECKSUM_ALG_OFF;
3573         s.checksum_alg= relay_log_checksum_alg;
3574         s.set_relay_log_event();
3575       }
3576       else
3577         s.checksum_alg= (enum_binlog_checksum_alg)binlog_checksum_options;
3578 
3579       crypto.scheme = 0;
3580       DBUG_ASSERT(s.checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
3581       if (!s.is_valid())
3582         goto err;
3583       s.dont_set_created= null_created_arg;
3584       if (write_event(&s))
3585         goto err;
3586       bytes_written+= s.data_written;
3587 
3588       if (encrypt_binlog)
3589       {
3590         uint key_version= encryption_key_get_latest_version(ENCRYPTION_KEY_SYSTEM_DATA);
3591         if (key_version == ENCRYPTION_KEY_VERSION_INVALID)
3592         {
3593           sql_print_error("Failed to enable encryption of binary logs");
3594           goto err;
3595         }
3596 
3597         if (key_version != ENCRYPTION_KEY_NOT_ENCRYPTED)
3598         {
3599           if (my_random_bytes(crypto.nonce, sizeof(crypto.nonce)))
3600             goto err;
3601 
3602           Start_encryption_log_event sele(1, key_version, crypto.nonce);
3603           sele.checksum_alg= s.checksum_alg;
3604           if (write_event(&sele))
3605             goto err;
3606 
3607           // Start_encryption_log_event is written, enable the encryption
3608           if (crypto.init(sele.crypto_scheme, key_version))
3609             goto err;
3610         }
3611       }
3612 
3613       if (!is_relay_log)
3614       {
3615         char buf[FN_REFLEN];
3616 
3617         /*
3618           Output a Gtid_list_log_event at the start of the binlog file.
3619 
3620           This is used to quickly determine which GTIDs are found in binlog
3621           files earlier than this one, and which are found in this (or later)
3622           binlogs.
3623 
3624           The list gives a mapping from (domain_id, server_id) -> seq_no (so
3625           this means that there is at most one entry for every unique pair
3626           (domain_id, server_id) in the list). It indicates that this seq_no is
3627           the last one found in an earlier binlog file for this (domain_id,
3628           server_id) combination - so any higher seq_no should be search for
3629           from this binlog file, or a later one.
3630 
3631           This allows to locate the binlog file containing a given GTID by
3632           scanning backwards, reading just the Gtid_list_log_event at the
3633           start of each file, and scanning only the relevant binlog file when
3634           found, not all binlog files.
3635 
3636           The existence of a given entry (domain_id, server_id, seq_no)
3637           guarantees only that this seq_no will not be found in this or any
3638           later binlog file. It does not guarantee that it can be found it an
3639           earlier binlog file, for example the file may have been purged.
3640 
3641           If there is no entry for a given (domain_id, server_id) pair, then
3642           it means that no such GTID exists in any earlier binlog. It is
3643           permissible to remove such pair from future Gtid_list_log_events
3644           if all previous binlog files containing such GTIDs have been purged
3645           (though such optimization is not performed at the time of this
3646           writing). So if there is no entry for given GTID it means that such
3647           GTID should be search for in this or later binlog file, same as if
3648           there had been an entry (domain_id, server_id, 0).
3649         */
3650 
3651         Gtid_list_log_event gl_ev(&rpl_global_gtid_binlog_state, 0);
3652         if (write_event(&gl_ev))
3653           goto err;
3654 
3655         /* Output a binlog checkpoint event at the start of the binlog file. */
3656 
3657         /*
3658           Construct an entry in the binlog_xid_count_list for the new binlog
3659           file (we will not link it into the list until we know the new file
3660           is successfully created; otherwise we would have to remove it again
3661           if creation failed, which gets tricky since other threads may have
3662           seen the entry in the meantime - and we do not want to hold
3663           LOCK_xid_list for long periods of time).
3664 
3665           Write the current binlog checkpoint into the log, so XA recovery will
3666           know from where to start recovery.
3667         */
3668         size_t off= dirname_length(log_file_name);
3669         uint len= static_cast<uint>(strlen(log_file_name) - off);
3670         new_xid_list_entry= new xid_count_per_binlog(log_file_name+off, len);
3671         if (!new_xid_list_entry)
3672           goto err;
3673 
3674         /*
3675           Find the name for the Initial binlog checkpoint.
3676 
3677           Normally this will just be the first entry, as we delete entries
3678           when their count drops to zero. But we scan the list to handle any
3679           corner case, eg. for the first binlog file opened after startup, the
3680           list will be empty.
3681         */
3682         mysql_mutex_lock(&LOCK_xid_list);
3683         I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
3684         while ((b= it++) && b->xid_count == 0)
3685           ;
3686         mysql_mutex_unlock(&LOCK_xid_list);
3687         if (!b)
3688           b= new_xid_list_entry;
3689         if (b->binlog_name)
3690           strmake(buf, b->binlog_name, b->binlog_name_len);
3691         else
3692           goto err;
3693         Binlog_checkpoint_log_event ev(buf, len);
3694         DBUG_EXECUTE_IF("crash_before_write_checkpoint_event",
3695                         flush_io_cache(&log_file);
3696                         mysql_file_sync(log_file.file, MYF(MY_WME));
3697                         DBUG_SUICIDE(););
3698         if (write_event(&ev))
3699           goto err;
3700         bytes_written+= ev.data_written;
3701       }
3702     }
3703     if (description_event_for_queue &&
3704         description_event_for_queue->binlog_version>=4)
3705     {
3706       /*
3707         This is a relay log written to by the I/O slave thread.
3708         Write the event so that others can later know the format of this relay
3709         log.
3710         Note that this event is very close to the original event from the
3711         master (it has binlog version of the master, event types of the
3712         master), so this is suitable to parse the next relay log's event. It
3713         has been produced by
3714         Format_description_log_event::Format_description_log_event(char* buf,).
3715         Why don't we want to write the description_event_for_queue if this
3716         event is for format<4 (3.23 or 4.x): this is because in that case, the
3717         description_event_for_queue describes the data received from the
3718         master, but not the data written to the relay log (*conversion*),
3719         which is in format 4 (slave's).
3720       */
3721       /*
3722         Set 'created' to 0, so that in next relay logs this event does not
3723         trigger cleaning actions on the slave in
3724         Format_description_log_event::apply_event_impl().
3725       */
3726       description_event_for_queue->created= 0;
3727       /* Don't set log_pos in event header */
3728       description_event_for_queue->set_artificial_event();
3729 
3730       if (write_event(description_event_for_queue))
3731         goto err;
3732       bytes_written+= description_event_for_queue->data_written;
3733     }
3734     if (flush_io_cache(&log_file) ||
3735         mysql_file_sync(log_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))
3736       goto err;
3737 
3738     my_off_t offset= my_b_tell(&log_file);
3739 
3740     if (!is_relay_log)
3741     {
3742       /* update binlog_end_pos so that it can be read by after sync hook */
3743       reset_binlog_end_pos(log_file_name, offset);
3744 
3745       mysql_mutex_lock(&LOCK_commit_ordered);
3746       strmake_buf(last_commit_pos_file, log_file_name);
3747       last_commit_pos_offset= offset;
3748       mysql_mutex_unlock(&LOCK_commit_ordered);
3749     }
3750 
3751     if (write_file_name_to_index_file)
3752     {
3753 #ifdef HAVE_REPLICATION
3754 #ifdef ENABLED_DEBUG_SYNC
3755       if (current_thd)
3756         DEBUG_SYNC(current_thd, "binlog_open_before_update_index");
3757 #endif
3758       DBUG_EXECUTE_IF("crash_create_critical_before_update_index", DBUG_SUICIDE(););
3759 #endif
3760 
3761       DBUG_ASSERT(my_b_inited(&index_file) != 0);
3762       reinit_io_cache(&index_file, WRITE_CACHE,
3763                       my_b_filelength(&index_file), 0, 0);
3764       /*
3765         As this is a new log file, we write the file name to the index
3766         file. As every time we write to the index file, we sync it.
3767       */
3768       if (DBUG_EVALUATE_IF("fault_injection_updating_index", 1, 0) ||
3769           my_b_write(&index_file, (uchar*) log_file_name,
3770                      strlen(log_file_name)) ||
3771           my_b_write(&index_file, (uchar*) "\n", 1) ||
3772           flush_io_cache(&index_file) ||
3773           mysql_file_sync(index_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))
3774         goto err;
3775 
3776 #ifdef HAVE_REPLICATION
3777       DBUG_EXECUTE_IF("crash_create_after_update_index", DBUG_SUICIDE(););
3778 #endif
3779     }
3780   }
3781 
3782   if (!is_relay_log)
3783   {
3784     /*
3785       Now the file was created successfully, so we can link in the entry for
3786       the new binlog file in binlog_xid_count_list.
3787     */
3788     mysql_mutex_lock(&LOCK_xid_list);
3789     ++current_binlog_id;
3790     new_xid_list_entry->binlog_id= current_binlog_id;
3791     /* Remove any initial entries with no pending XIDs.  */
3792     while ((b= binlog_xid_count_list.head()) && b->xid_count == 0)
3793     {
3794       WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::open(): Removing xid_list_entry for "
3795                            "%s (%lu)", b);
3796       delete binlog_xid_count_list.get();
3797     }
3798     mysql_cond_broadcast(&COND_xid_list);
3799     WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::open(): Adding new xid_list_entry for "
3800                          "%s (%lu)", new_xid_list_entry);
3801     binlog_xid_count_list.push_back(new_xid_list_entry);
3802     mysql_mutex_unlock(&LOCK_xid_list);
3803 
3804     /*
3805       Now that we have synced a new binlog file with an initial Gtid_list
3806       event, it is safe to delete the binlog state file. We will write out
3807       a new, updated file at shutdown, and if we crash before we can recover
3808       the state from the newly written binlog file.
3809 
3810       Since the state file will contain out-of-date data as soon as the first
3811       new GTID is binlogged, it is better to remove it, to avoid any risk of
3812       accidentally reading incorrect data later.
3813     */
3814     if (!state_file_deleted)
3815     {
3816       char buf[FN_REFLEN];
3817       fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
3818                 MY_UNPACK_FILENAME);
3819       my_delete(buf, MY_SYNC_DIR);
3820       state_file_deleted= true;
3821     }
3822   }
3823 
3824   log_state= LOG_OPENED;
3825 
3826 #ifdef HAVE_REPLICATION
3827   close_purge_index_file();
3828 #endif
3829 
3830   /* Notify the io thread that binlog is rotated to a new file */
3831   if (is_relay_log)
3832     signal_relay_log_update();
3833   else
3834     update_binlog_end_pos();
3835   DBUG_RETURN(0);
3836 
3837 err:
3838   int tmp_errno= errno;
3839 #ifdef HAVE_REPLICATION
3840   if (is_inited_purge_index_file())
3841     purge_index_entry(NULL, NULL, need_mutex);
3842   close_purge_index_file();
3843 #endif
3844   sql_print_error(fatal_log_error, (name) ? name : log_name, tmp_errno);
3845   if (new_xid_list_entry)
3846     delete new_xid_list_entry;
3847   if (file >= 0)
3848     mysql_file_close(file, MYF(0));
3849   close(LOG_CLOSE_INDEX);
3850   DBUG_RETURN(1);
3851 }
3852 
3853 
get_current_log(LOG_INFO * linfo)3854 int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo)
3855 {
3856   mysql_mutex_lock(&LOCK_log);
3857   int ret = raw_get_current_log(linfo);
3858   mysql_mutex_unlock(&LOCK_log);
3859   return ret;
3860 }
3861 
raw_get_current_log(LOG_INFO * linfo)3862 int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
3863 {
3864   mysql_mutex_assert_owner(&LOCK_log);
3865   strmake_buf(linfo->log_file_name, log_file_name);
3866   linfo->pos = my_b_tell(&log_file);
3867   return 0;
3868 }
3869 
3870 /**
3871   Move all data up in a file in an filename index file.
3872 
3873     We do the copy outside of the IO_CACHE as the cache buffers would just
3874     make things slower and more complicated.
3875     In most cases the copy loop should only do one read.
3876 
3877   @param index_file			File to move
3878   @param offset			Move everything from here to beginning
3879 
3880   @note
3881     File will be truncated to be 'offset' shorter or filled up with newlines
3882 
3883   @retval
3884     0	ok
3885 */
3886 
3887 #ifdef HAVE_REPLICATION
3888 
copy_up_file_and_fill(IO_CACHE * index_file,my_off_t offset)3889 static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset)
3890 {
3891   int bytes_read;
3892   my_off_t init_offset= offset;
3893   File file= index_file->file;
3894   uchar io_buf[IO_SIZE*2];
3895   DBUG_ENTER("copy_up_file_and_fill");
3896 
3897   for (;; offset+= bytes_read)
3898   {
3899     mysql_file_seek(file, offset, MY_SEEK_SET, MYF(0));
3900     if ((bytes_read= (int) mysql_file_read(file, io_buf, sizeof(io_buf),
3901                                            MYF(MY_WME)))
3902 	< 0)
3903       goto err;
3904     if (!bytes_read)
3905       break;					// end of file
3906     mysql_file_seek(file, offset-init_offset, MY_SEEK_SET, MYF(0));
3907     if (mysql_file_write(file, io_buf, bytes_read,
3908                          MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
3909       goto err;
3910   }
3911   /* The following will either truncate the file or fill the end with \n' */
3912   if (mysql_file_chsize(file, offset - init_offset, '\n', MYF(MY_WME)) ||
3913       mysql_file_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE)))
3914     goto err;
3915 
3916   /* Reset data in old index cache */
3917   reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 1);
3918   DBUG_RETURN(0);
3919 
3920 err:
3921   DBUG_RETURN(1);
3922 }
3923 
3924 #endif /* HAVE_REPLICATION */
3925 
3926 /**
3927   Find the position in the log-index-file for the given log name.
3928 
3929   @param linfo		Store here the found log file name and position to
3930                        the NEXT log file name in the index file.
3931   @param log_name	Filename to find in the index file.
3932                        Is a null pointer if we want to read the first entry
3933   @param need_lock	Set this to 1 if the parent doesn't already have a
3934                        lock on LOCK_index
3935 
3936   @note
3937     On systems without the truncate function the file will end with one or
3938     more empty lines.  These will be ignored when reading the file.
3939 
3940   @retval
3941     0			ok
3942   @retval
3943     LOG_INFO_EOF	        End of log-index-file found
3944   @retval
3945     LOG_INFO_IO		Got IO error while reading file
3946 */
3947 
find_log_pos(LOG_INFO * linfo,const char * log_name,bool need_lock)3948 int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
3949 			    bool need_lock)
3950 {
3951   int error= 0;
3952   char *full_fname= linfo->log_file_name;
3953   char full_log_name[FN_REFLEN], fname[FN_REFLEN];
3954   uint log_name_len= 0, fname_len= 0;
3955   DBUG_ENTER("find_log_pos");
3956   full_log_name[0]= full_fname[0]= 0;
3957 
3958   /*
3959     Mutex needed because we need to make sure the file pointer does not
3960     move from under our feet
3961   */
3962   if (need_lock)
3963     mysql_mutex_lock(&LOCK_index);
3964   mysql_mutex_assert_owner(&LOCK_index);
3965 
3966   // extend relative paths for log_name to be searched
3967   if (log_name)
3968   {
3969     if(normalize_binlog_name(full_log_name, log_name, is_relay_log))
3970     {
3971       error= LOG_INFO_EOF;
3972       goto end;
3973     }
3974   }
3975 
3976   log_name_len= log_name ? (uint) strlen(full_log_name) : 0;
3977   DBUG_PRINT("enter", ("log_name: %s, full_log_name: %s",
3978                        log_name ? log_name : "NULL", full_log_name));
3979 
3980   /* As the file is flushed, we can't get an error here */
3981   (void) reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 0);
3982 
3983   for (;;)
3984   {
3985     size_t length;
3986     my_off_t offset= my_b_tell(&index_file);
3987 
3988     DBUG_EXECUTE_IF("simulate_find_log_pos_error",
3989                     error=  LOG_INFO_EOF; break;);
3990     /* If we get 0 or 1 characters, this is the end of the file */
3991     if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
3992     {
3993       /* Did not find the given entry; Return not found or error */
3994       error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
3995       break;
3996     }
3997     if (fname[length-1] != '\n')
3998       continue;                                 // Not a log entry
3999     fname[length-1]= 0;                         // Remove end \n
4000 
4001     // extend relative paths and match against full path
4002     if (normalize_binlog_name(full_fname, fname, is_relay_log))
4003     {
4004       error= LOG_INFO_EOF;
4005       break;
4006     }
4007     fname_len= (uint) strlen(full_fname);
4008 
4009     // if the log entry matches, null string matching anything
4010     if (!log_name ||
4011         (log_name_len == fname_len &&
4012 	 !strncmp(full_fname, full_log_name, log_name_len)))
4013     {
4014       DBUG_PRINT("info", ("Found log file entry"));
4015       linfo->index_file_start_offset= offset;
4016       linfo->index_file_offset = my_b_tell(&index_file);
4017       break;
4018     }
4019   }
4020 
4021 end:
4022   if (need_lock)
4023     mysql_mutex_unlock(&LOCK_index);
4024   DBUG_RETURN(error);
4025 }
4026 
4027 
4028 /**
4029   Find the position in the log-index-file for the given log name.
4030 
4031   @param
4032     linfo		Store here the next log file name and position to
4033 			the file name after that.
4034   @param
4035     need_lock		Set this to 1 if the parent doesn't already have a
4036 			lock on LOCK_index
4037 
4038   @note
4039     - Before calling this function, one has to call find_log_pos()
4040     to set up 'linfo'
4041     - Mutex needed because we need to make sure the file pointer does not move
4042     from under our feet
4043 
4044   @retval
4045     0			ok
4046   @retval
4047     LOG_INFO_EOF	        End of log-index-file found
4048   @retval
4049     LOG_INFO_IO		Got IO error while reading file
4050 */
4051 
find_next_log(LOG_INFO * linfo,bool need_lock)4052 int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock)
4053 {
4054   int error= 0;
4055   size_t length;
4056   char fname[FN_REFLEN];
4057   char *full_fname= linfo->log_file_name;
4058 
4059   if (need_lock)
4060     mysql_mutex_lock(&LOCK_index);
4061   mysql_mutex_assert_owner(&LOCK_index);
4062 
4063   /* As the file is flushed, we can't get an error here */
4064   (void) reinit_io_cache(&index_file, READ_CACHE, linfo->index_file_offset, 0,
4065 			 0);
4066 
4067   linfo->index_file_start_offset= linfo->index_file_offset;
4068   if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
4069   {
4070     error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
4071     goto err;
4072   }
4073 
4074   if (fname[0] != 0)
4075   {
4076     if(normalize_binlog_name(full_fname, fname, is_relay_log))
4077     {
4078       error= LOG_INFO_EOF;
4079       goto err;
4080     }
4081     length= strlen(full_fname);
4082   }
4083 
4084   full_fname[length-1]= 0;			// kill \n
4085   linfo->index_file_offset= my_b_tell(&index_file);
4086 
4087 err:
4088   if (need_lock)
4089     mysql_mutex_unlock(&LOCK_index);
4090   return error;
4091 }
4092 
4093 
4094 /**
4095   Delete all logs referred to in the index file.
4096 
4097   The new index file will only contain this file.
4098 
4099   @param thd		  Thread id. This can be zero in case of resetting
4100                           relay logs
4101   @param create_new_log   1 if we should start writing to a new log file
4102   @param next_log_number  min number of next log file to use, if possible.
4103 
4104   @note
4105     If not called from slave thread, write start event to new log
4106 
4107   @retval
4108     0	ok
4109   @retval
4110     1   error
4111 */
4112 
reset_logs(THD * thd,bool create_new_log,rpl_gtid * init_state,uint32 init_state_len,ulong next_log_number)4113 bool MYSQL_BIN_LOG::reset_logs(THD *thd, bool create_new_log,
4114                                rpl_gtid *init_state, uint32 init_state_len,
4115                                ulong next_log_number)
4116 {
4117   LOG_INFO linfo;
4118   bool error=0;
4119   int err;
4120   const char* save_name;
4121   DBUG_ENTER("reset_logs");
4122 
4123   if (!is_relay_log)
4124   {
4125     if (init_state && !is_empty_state())
4126     {
4127       my_error(ER_BINLOG_MUST_BE_EMPTY, MYF(0));
4128       DBUG_RETURN(1);
4129     }
4130 
4131     /*
4132       Mark that a RESET MASTER is in progress.
4133       This ensures that a binlog checkpoint will not try to write binlog
4134       checkpoint events, which would be useless (as we are deleting the binlog
4135       anyway) and could deadlock, as we are holding LOCK_log.
4136 
4137       Wait for any mark_xid_done() calls that might be already running to
4138       complete (mark_xid_done_waiting counter to drop to zero); we need to
4139       do this before we take the LOCK_log to not deadlock.
4140     */
4141     mysql_mutex_lock(&LOCK_xid_list);
4142     reset_master_pending++;
4143     while (mark_xid_done_waiting > 0)
4144       mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4145     mysql_mutex_unlock(&LOCK_xid_list);
4146   }
4147 
4148   DEBUG_SYNC_C_IF_THD(thd, "reset_logs_after_set_reset_master_pending");
4149   /*
4150     We need to get both locks to be sure that no one is trying to
4151     write to the index log file.
4152   */
4153   mysql_mutex_lock(&LOCK_log);
4154   mysql_mutex_lock(&LOCK_index);
4155 
4156   if (!is_relay_log)
4157   {
4158     /*
4159       We are going to nuke all binary log files.
4160       Without binlog, we cannot XA recover prepared-but-not-committed
4161       transactions in engines. So force a commit checkpoint first.
4162 
4163       Note that we take and immediately
4164       release LOCK_after_binlog_sync/LOCK_commit_ordered. This has
4165       the effect to ensure that any on-going group commit (in
4166       trx_group_commit_leader()) has completed before we request the checkpoint,
4167       due to the chaining of LOCK_log and LOCK_commit_ordered in that function.
4168       (We are holding LOCK_log, so no new group commit can start).
4169 
4170       Without this, it is possible (though perhaps unlikely) that the RESET
4171       MASTER could run in-between the write to the binlog and the
4172       commit_ordered() in the engine of some transaction, and then a crash
4173       later would leave such transaction not recoverable.
4174     */
4175 
4176     mysql_mutex_lock(&LOCK_after_binlog_sync);
4177     mysql_mutex_lock(&LOCK_commit_ordered);
4178     mysql_mutex_unlock(&LOCK_after_binlog_sync);
4179     mysql_mutex_unlock(&LOCK_commit_ordered);
4180 
4181     mark_xids_active(current_binlog_id, 1);
4182     do_checkpoint_request(current_binlog_id);
4183 
4184     /* Now wait for all checkpoint requests and pending unlog() to complete. */
4185     mysql_mutex_lock(&LOCK_xid_list);
4186     for (;;)
4187     {
4188       if (is_xidlist_idle_nolock())
4189         break;
4190       /*
4191         Wait until signalled that one more binlog dropped to zero, then check
4192         again.
4193       */
4194       mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4195     }
4196 
4197     /*
4198       Now all XIDs are fully flushed to disk, and we are holding LOCK_log so
4199       no new ones will be written. So we can proceed to delete the logs.
4200     */
4201     mysql_mutex_unlock(&LOCK_xid_list);
4202   }
4203 
4204   /* Save variables so that we can reopen the log */
4205   save_name=name;
4206   name=0;					// Protect against free
4207   close(LOG_CLOSE_TO_BE_OPENED);
4208 
4209   last_used_log_number= 0;                      // Reset log number cache
4210 
4211   /*
4212     First delete all old log files and then update the index file.
4213     As we first delete the log files and do not use sort of logging,
4214     a crash may lead to an inconsistent state where the index has
4215     references to non-existent files.
4216 
4217     We need to invert the steps and use the purge_index_file methods
4218     in order to make the operation safe.
4219   */
4220 
4221   if ((err= find_log_pos(&linfo, NullS, 0)) != 0)
4222   {
4223     uint errcode= purge_log_get_error_code(err);
4224     sql_print_error("Failed to locate old binlog or relay log files");
4225     my_message(errcode, ER_THD_OR_DEFAULT(thd, errcode), MYF(0));
4226     error= 1;
4227     goto err;
4228   }
4229 
4230   for (;;)
4231   {
4232     if (unlikely((error= my_delete(linfo.log_file_name, MYF(0)))))
4233     {
4234       if (my_errno == ENOENT)
4235       {
4236         if (thd)
4237           push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4238                               ER_LOG_PURGE_NO_FILE,
4239                               ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4240                               linfo.log_file_name);
4241 
4242         sql_print_information("Failed to delete file '%s'",
4243                               linfo.log_file_name);
4244         my_errno= 0;
4245         error= 0;
4246       }
4247       else
4248       {
4249         if (thd)
4250           push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4251                               ER_BINLOG_PURGE_FATAL_ERR,
4252                               "a problem with deleting %s; "
4253                               "consider examining correspondence "
4254                               "of your binlog index file "
4255                               "to the actual binlog files",
4256                               linfo.log_file_name);
4257         error= 1;
4258         goto err;
4259       }
4260     }
4261     if (find_next_log(&linfo, 0))
4262       break;
4263   }
4264 
4265   if (!is_relay_log)
4266   {
4267     if (init_state)
4268       rpl_global_gtid_binlog_state.load(init_state, init_state_len);
4269     else
4270       rpl_global_gtid_binlog_state.reset();
4271   }
4272 
4273   /* Start logging with a new file */
4274   close(LOG_CLOSE_INDEX | LOG_CLOSE_TO_BE_OPENED);
4275   // Reset (open will update)
4276   if (unlikely((error= my_delete(index_file_name, MYF(0)))))
4277   {
4278     if (my_errno == ENOENT)
4279     {
4280       if (thd)
4281         push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4282                             ER_LOG_PURGE_NO_FILE,
4283                             ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4284                             index_file_name);
4285       sql_print_information("Failed to delete file '%s'",
4286                             index_file_name);
4287       my_errno= 0;
4288       error= 0;
4289     }
4290     else
4291     {
4292       if (thd)
4293         push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4294                             ER_BINLOG_PURGE_FATAL_ERR,
4295                             "a problem with deleting %s; "
4296                             "consider examining correspondence "
4297                             "of your binlog index file "
4298                             "to the actual binlog files",
4299                             index_file_name);
4300       error= 1;
4301       goto err;
4302     }
4303   }
4304   if (create_new_log && !open_index_file(index_file_name, 0, FALSE))
4305     if (unlikely((error= open(save_name, log_type, 0, next_log_number,
4306                               io_cache_type, max_size, 0, FALSE))))
4307       goto err;
4308   my_free((void *) save_name);
4309 
4310 err:
4311   if (error == 1)
4312     name= const_cast<char*>(save_name);
4313 
4314   if (!is_relay_log)
4315   {
4316     xid_count_per_binlog *b;
4317     /*
4318       Remove all entries in the xid_count list except the last.
4319       Normally we will just be deleting all the entries that we waited for to
4320       drop to zero above. But if we fail during RESET MASTER for some reason
4321       then we will not have created any new log file, and we may keep the last
4322       of the old entries.
4323     */
4324     mysql_mutex_lock(&LOCK_xid_list);
4325     for (;;)
4326     {
4327       b= binlog_xid_count_list.head();
4328       DBUG_ASSERT(b /* List can never become empty. */);
4329       if (b->binlog_id == current_binlog_id)
4330         break;
4331       DBUG_ASSERT(b->xid_count == 0);
4332       WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::reset_logs(): Removing "
4333                            "xid_list_entry for %s (%lu)", b);
4334       delete binlog_xid_count_list.get();
4335     }
4336     mysql_cond_broadcast(&COND_xid_list);
4337     reset_master_pending--;
4338     reset_master_count++;
4339     mysql_mutex_unlock(&LOCK_xid_list);
4340   }
4341 
4342   mysql_mutex_unlock(&LOCK_index);
4343   mysql_mutex_unlock(&LOCK_log);
4344   DBUG_RETURN(error);
4345 }
4346 
4347 
wait_for_last_checkpoint_event()4348 void MYSQL_BIN_LOG::wait_for_last_checkpoint_event()
4349 {
4350   mysql_mutex_lock(&LOCK_xid_list);
4351   for (;;)
4352   {
4353     if (binlog_xid_count_list.is_last(binlog_xid_count_list.head()))
4354       break;
4355     mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4356   }
4357   mysql_mutex_unlock(&LOCK_xid_list);
4358 
4359   /*
4360     LOCK_xid_list and LOCK_log are chained, so the LOCK_log will only be
4361     obtained after mark_xid_done() has written the last checkpoint event.
4362   */
4363   mysql_mutex_lock(&LOCK_log);
4364   mysql_mutex_unlock(&LOCK_log);
4365 }
4366 
4367 
4368 /**
4369   Delete relay log files prior to rli->group_relay_log_name
4370   (i.e. all logs which are not involved in a non-finished group
4371   (transaction)), remove them from the index file and start on next
4372   relay log.
4373 
4374   IMPLEMENTATION
4375 
4376   - You must hold rli->data_lock before calling this function, since
4377     it writes group_relay_log_pos and similar fields of
4378     Relay_log_info.
4379   - Protects index file with LOCK_index
4380   - Delete relevant relay log files
4381   - Copy all file names after these ones to the front of the index file
4382   - If the OS has truncate, truncate the file, else fill it with \n'
4383   - Read the next file name from the index file and store in rli->linfo
4384 
4385   @param rli	       Relay log information
4386   @param included     If false, all relay logs that are strictly before
4387                       rli->group_relay_log_name are deleted ; if true, the
4388                       latter is deleted too (i.e. all relay logs
4389                       read by the SQL slave thread are deleted).
4390 
4391   @note
4392     - This is only called from the slave SQL thread when it has read
4393     all commands from a relay log and want to switch to a new relay log.
4394     - When this happens, we can be in an active transaction as
4395     a transaction can span over two relay logs
4396     (although it is always written as a single block to the master's binary
4397     log, hence cannot span over two master's binary logs).
4398 
4399   @retval
4400     0			ok
4401   @retval
4402     LOG_INFO_EOF	        End of log-index-file found
4403   @retval
4404     LOG_INFO_SEEK	Could not allocate IO cache
4405   @retval
4406     LOG_INFO_IO		Got IO error while reading file
4407 */
4408 
4409 #ifdef HAVE_REPLICATION
4410 
purge_first_log(Relay_log_info * rli,bool included)4411 int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
4412 {
4413   int error, errcode;
4414   char *to_purge_if_included= NULL;
4415   inuse_relaylog *ir;
4416   ulonglong log_space_reclaimed= 0;
4417   DBUG_ENTER("purge_first_log");
4418 
4419   DBUG_ASSERT(is_open());
4420   DBUG_ASSERT(rli->slave_running == MYSQL_SLAVE_RUN_NOT_CONNECT);
4421   DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
4422 
4423   mysql_mutex_assert_owner(&rli->data_lock);
4424 
4425   mysql_mutex_lock(&LOCK_index);
4426 
4427   ir= rli->inuse_relaylog_list;
4428   while (ir)
4429   {
4430     inuse_relaylog *next= ir->next;
4431     if (!ir->completed || ir->dequeued_count < ir->queued_count)
4432     {
4433       included= false;
4434       break;
4435     }
4436     if (!included && !strcmp(ir->name, rli->group_relay_log_name))
4437       break;
4438     if (!next)
4439     {
4440       rli->last_inuse_relaylog= NULL;
4441       included= 1;
4442       to_purge_if_included= my_strdup(ir->name, MYF(0));
4443     }
4444     rli->free_inuse_relaylog(ir);
4445     ir= next;
4446   }
4447   rli->inuse_relaylog_list= ir;
4448   if (ir)
4449     to_purge_if_included= my_strdup(ir->name, MYF(0));
4450 
4451   /*
4452     Read the next log file name from the index file and pass it back to
4453     the caller.
4454   */
4455   if (unlikely((error=find_log_pos(&rli->linfo, rli->event_relay_log_name,
4456                                    0))) ||
4457       unlikely((error=find_next_log(&rli->linfo, 0))))
4458   {
4459     sql_print_error("next log error: %d  offset: %llu  log: %s included: %d",
4460                     error, rli->linfo.index_file_offset,
4461                     rli->event_relay_log_name, included);
4462     goto err;
4463   }
4464 
4465   /*
4466     Reset rli's coordinates to the current log.
4467   */
4468   rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
4469   strmake_buf(rli->event_relay_log_name,rli->linfo.log_file_name);
4470 
4471   /*
4472     If we removed the rli->group_relay_log_name file,
4473     we must update the rli->group* coordinates, otherwise do not touch it as the
4474     group's execution is not finished (e.g. COMMIT not executed)
4475   */
4476   if (included)
4477   {
4478     rli->group_relay_log_pos = BIN_LOG_HEADER_SIZE;
4479     strmake_buf(rli->group_relay_log_name,rli->linfo.log_file_name);
4480     rli->notify_group_relay_log_name_update();
4481   }
4482 
4483   /* Store where we are in the new file for the execution thread */
4484   if (rli->flush())
4485     error= LOG_INFO_IO;
4486 
4487   DBUG_EXECUTE_IF("crash_before_purge_logs", DBUG_SUICIDE(););
4488 
4489   rli->relay_log.purge_logs(to_purge_if_included, included,
4490                             0, 0, &log_space_reclaimed);
4491 
4492   mysql_mutex_lock(&rli->log_space_lock);
4493   rli->log_space_total-= log_space_reclaimed;
4494   mysql_cond_broadcast(&rli->log_space_cond);
4495   mysql_mutex_unlock(&rli->log_space_lock);
4496 
4497   /*
4498    * Need to update the log pos because purge logs has been called
4499    * after fetching initially the log pos at the beginning of the method.
4500    */
4501   if ((errcode= find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)))
4502   {
4503     sql_print_error("next log error: %d  offset: %llu  log: %s included: %d",
4504                     errcode, rli->linfo.index_file_offset,
4505                     rli->group_relay_log_name, included);
4506     goto err;
4507   }
4508 
4509   /* If included was passed, rli->linfo should be the first entry. */
4510   DBUG_ASSERT(!included || rli->linfo.index_file_start_offset == 0);
4511 
4512 err:
4513   my_free(to_purge_if_included);
4514   mysql_mutex_unlock(&LOCK_index);
4515   DBUG_RETURN(error);
4516 }
4517 
4518 /**
4519   Update log index_file.
4520 */
4521 
update_log_index(LOG_INFO * log_info,bool need_update_threads)4522 int MYSQL_BIN_LOG::update_log_index(LOG_INFO* log_info, bool need_update_threads)
4523 {
4524   if (copy_up_file_and_fill(&index_file, log_info->index_file_start_offset))
4525     return LOG_INFO_IO;
4526 
4527   // now update offsets in index file for running threads
4528   if (need_update_threads)
4529     adjust_linfo_offsets(log_info->index_file_start_offset);
4530   return 0;
4531 }
4532 
4533 /**
4534   Remove all logs before the given log from disk and from the index file.
4535 
4536   @param to_log	      Delete all log file name before this file.
4537   @param included            If true, to_log is deleted too.
4538   @param need_mutex
4539   @param need_update_threads If we want to update the log coordinates of
4540                              all threads. False for relay logs, true otherwise.
4541   @param reclaimeed_log_space If not null, increment this variable to
4542                               the amount of log space freed
4543 
4544   @note
4545     If any of the logs before the deleted one is in use,
4546     only purge logs up to this one.
4547 
4548   @retval
4549     0			ok
4550   @retval
4551     LOG_INFO_EOF		to_log not found
4552     LOG_INFO_EMFILE             too many files opened
4553     LOG_INFO_FATAL              if any other than ENOENT error from
4554                                 mysql_file_stat() or mysql_file_delete()
4555 */
4556 
purge_logs(const char * to_log,bool included,bool need_mutex,bool need_update_threads,ulonglong * reclaimed_space)4557 int MYSQL_BIN_LOG::purge_logs(const char *to_log,
4558                               bool included,
4559                               bool need_mutex,
4560                               bool need_update_threads,
4561                               ulonglong *reclaimed_space)
4562 {
4563   int error= 0;
4564   bool exit_loop= 0;
4565   LOG_INFO log_info;
4566   THD *thd= current_thd;
4567   DBUG_ENTER("purge_logs");
4568   DBUG_PRINT("info",("to_log= %s",to_log));
4569 
4570   if (need_mutex)
4571     mysql_mutex_lock(&LOCK_index);
4572   if (unlikely((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/))) )
4573   {
4574     sql_print_error("MYSQL_BIN_LOG::purge_logs was called with file %s not "
4575                     "listed in the index.", to_log);
4576     goto err;
4577   }
4578 
4579   if (unlikely((error= open_purge_index_file(TRUE))))
4580   {
4581     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to sync the index file.");
4582     goto err;
4583   }
4584 
4585   /*
4586     File name exists in index file; delete until we find this file
4587     or a file that is used.
4588   */
4589   if (unlikely((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/))))
4590     goto err;
4591   while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
4592          can_purge_log(log_info.log_file_name))
4593   {
4594     if (unlikely((error= register_purge_index_entry(log_info.log_file_name))))
4595     {
4596       sql_print_error("MYSQL_BIN_LOG::purge_logs failed to copy %s to register file.",
4597                       log_info.log_file_name);
4598       goto err;
4599     }
4600 
4601     if (find_next_log(&log_info, 0) || exit_loop)
4602       break;
4603   }
4604 
4605   DBUG_EXECUTE_IF("crash_purge_before_update_index", DBUG_SUICIDE(););
4606 
4607   if (unlikely((error= sync_purge_index_file())))
4608   {
4609     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to flush register file.");
4610     goto err;
4611   }
4612 
4613   /* We know how many files to delete. Update index file. */
4614   if (unlikely((error=update_log_index(&log_info, need_update_threads))))
4615   {
4616     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to update the index file");
4617     goto err;
4618   }
4619 
4620   DBUG_EXECUTE_IF("crash_purge_critical_after_update_index", DBUG_SUICIDE(););
4621 
4622 err:
4623   /* Read each entry from purge_index_file and delete the file. */
4624   if (is_inited_purge_index_file() &&
4625       (error= purge_index_entry(thd, reclaimed_space, FALSE)))
4626     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to process registered files"
4627                     " that would be purged.");
4628   close_purge_index_file();
4629 
4630   DBUG_EXECUTE_IF("crash_purge_non_critical_after_update_index", DBUG_SUICIDE(););
4631 
4632   if (need_mutex)
4633     mysql_mutex_unlock(&LOCK_index);
4634   DBUG_RETURN(error);
4635 }
4636 
set_purge_index_file_name(const char * base_file_name)4637 int MYSQL_BIN_LOG::set_purge_index_file_name(const char *base_file_name)
4638 {
4639   int error= 0;
4640   DBUG_ENTER("MYSQL_BIN_LOG::set_purge_index_file_name");
4641   if (fn_format(purge_index_file_name, base_file_name, mysql_data_home,
4642                 ".~rec~", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
4643                               MY_REPLACE_EXT)) == NULL)
4644   {
4645     error= 1;
4646     sql_print_error("MYSQL_BIN_LOG::set_purge_index_file_name failed to set "
4647                       "file name.");
4648   }
4649   DBUG_RETURN(error);
4650 }
4651 
open_purge_index_file(bool destroy)4652 int MYSQL_BIN_LOG::open_purge_index_file(bool destroy)
4653 {
4654   int error= 0;
4655   File file= -1;
4656 
4657   DBUG_ENTER("MYSQL_BIN_LOG::open_purge_index_file");
4658 
4659   if (destroy)
4660     close_purge_index_file();
4661 
4662   if (!my_b_inited(&purge_index_file))
4663   {
4664     if ((file= my_open(purge_index_file_name, O_RDWR | O_CREAT | O_BINARY,
4665                        MYF(MY_WME))) < 0  ||
4666         init_io_cache(&purge_index_file, file, IO_SIZE,
4667                       (destroy ? WRITE_CACHE : READ_CACHE),
4668                       0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
4669     {
4670       error= 1;
4671       sql_print_error("MYSQL_BIN_LOG::open_purge_index_file failed to open register "
4672                       " file.");
4673     }
4674   }
4675   DBUG_RETURN(error);
4676 }
4677 
close_purge_index_file()4678 int MYSQL_BIN_LOG::close_purge_index_file()
4679 {
4680   int error= 0;
4681 
4682   DBUG_ENTER("MYSQL_BIN_LOG::close_purge_index_file");
4683 
4684   if (my_b_inited(&purge_index_file))
4685   {
4686     end_io_cache(&purge_index_file);
4687     error= my_close(purge_index_file.file, MYF(0));
4688   }
4689   my_delete(purge_index_file_name, MYF(0));
4690   bzero((char*) &purge_index_file, sizeof(purge_index_file));
4691 
4692   DBUG_RETURN(error);
4693 }
4694 
is_inited_purge_index_file()4695 bool MYSQL_BIN_LOG::is_inited_purge_index_file()
4696 {
4697   return my_b_inited(&purge_index_file);
4698 }
4699 
sync_purge_index_file()4700 int MYSQL_BIN_LOG::sync_purge_index_file()
4701 {
4702   int error= 0;
4703   DBUG_ENTER("MYSQL_BIN_LOG::sync_purge_index_file");
4704 
4705   if (unlikely((error= flush_io_cache(&purge_index_file))) ||
4706       unlikely((error= my_sync(purge_index_file.file,
4707                                MYF(MY_WME | MY_SYNC_FILESIZE)))))
4708     DBUG_RETURN(error);
4709 
4710   DBUG_RETURN(error);
4711 }
4712 
register_purge_index_entry(const char * entry)4713 int MYSQL_BIN_LOG::register_purge_index_entry(const char *entry)
4714 {
4715   int error= 0;
4716   DBUG_ENTER("MYSQL_BIN_LOG::register_purge_index_entry");
4717 
4718   if (unlikely((error=my_b_write(&purge_index_file, (const uchar*)entry,
4719                                  strlen(entry)))) ||
4720       unlikely((error=my_b_write(&purge_index_file, (const uchar*)"\n", 1))))
4721     DBUG_RETURN (error);
4722 
4723   DBUG_RETURN(error);
4724 }
4725 
register_create_index_entry(const char * entry)4726 int MYSQL_BIN_LOG::register_create_index_entry(const char *entry)
4727 {
4728   DBUG_ENTER("MYSQL_BIN_LOG::register_create_index_entry");
4729   DBUG_RETURN(register_purge_index_entry(entry));
4730 }
4731 
purge_index_entry(THD * thd,ulonglong * reclaimed_space,bool need_mutex)4732 int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *reclaimed_space,
4733                                      bool need_mutex)
4734 {
4735   DBUG_ENTER("MYSQL_BIN_LOG:purge_index_entry");
4736   MY_STAT s;
4737   int error= 0;
4738   LOG_INFO log_info;
4739   LOG_INFO check_log_info;
4740 
4741   DBUG_ASSERT(my_b_inited(&purge_index_file));
4742 
4743   if (unlikely((error= reinit_io_cache(&purge_index_file, READ_CACHE, 0, 0,
4744                                        0))))
4745   {
4746     sql_print_error("MYSQL_BIN_LOG::purge_index_entry failed to reinit register file "
4747                     "for read");
4748     goto err;
4749   }
4750 
4751   for (;;)
4752   {
4753     size_t length;
4754 
4755     if ((length=my_b_gets(&purge_index_file, log_info.log_file_name,
4756                           FN_REFLEN)) <= 1)
4757     {
4758       if (purge_index_file.error)
4759       {
4760         error= purge_index_file.error;
4761         sql_print_error("MYSQL_BIN_LOG::purge_index_entry error %d reading from "
4762                         "register file.", error);
4763         goto err;
4764       }
4765 
4766       /* Reached EOF */
4767       break;
4768     }
4769 
4770     /* Get rid of the trailing '\n' */
4771     log_info.log_file_name[length-1]= 0;
4772 
4773     if (unlikely(!mysql_file_stat(m_key_file_log, log_info.log_file_name, &s,
4774                                   MYF(0))))
4775     {
4776       if (my_errno == ENOENT)
4777       {
4778         /*
4779           It's not fatal if we can't stat a log file that does not exist;
4780           If we could not stat, we won't delete.
4781         */
4782         if (thd)
4783         {
4784           push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4785                               ER_LOG_PURGE_NO_FILE, ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4786                               log_info.log_file_name);
4787         }
4788         sql_print_information("Failed to execute mysql_file_stat on file '%s'",
4789 			      log_info.log_file_name);
4790         my_errno= 0;
4791       }
4792       else
4793       {
4794         /*
4795           Other than ENOENT are fatal
4796         */
4797         if (thd)
4798         {
4799           push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4800                               ER_BINLOG_PURGE_FATAL_ERR,
4801                               "a problem with getting info on being purged %s; "
4802                               "consider examining correspondence "
4803                               "of your binlog index file "
4804                               "to the actual binlog files",
4805                               log_info.log_file_name);
4806         }
4807         else
4808         {
4809           sql_print_information("Failed to delete log file '%s'; "
4810                                 "consider examining correspondence "
4811                                 "of your binlog index file "
4812                                 "to the actual binlog files",
4813                                 log_info.log_file_name);
4814         }
4815         error= LOG_INFO_FATAL;
4816         goto err;
4817       }
4818     }
4819     else
4820     {
4821       if (unlikely((error= find_log_pos(&check_log_info,
4822                                         log_info.log_file_name, need_mutex))))
4823       {
4824         if (error != LOG_INFO_EOF)
4825         {
4826           if (thd)
4827           {
4828             push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4829                                 ER_BINLOG_PURGE_FATAL_ERR,
4830                                 "a problem with deleting %s and "
4831                                 "reading the binlog index file",
4832                                 log_info.log_file_name);
4833           }
4834           else
4835           {
4836             sql_print_information("Failed to delete file '%s' and "
4837                                   "read the binlog index file",
4838                                   log_info.log_file_name);
4839           }
4840           goto err;
4841         }
4842 
4843         error= 0;
4844 
4845         DBUG_PRINT("info",("purging %s",log_info.log_file_name));
4846         if (!my_delete(log_info.log_file_name, MYF(0)))
4847         {
4848           if (reclaimed_space)
4849             *reclaimed_space+= s.st_size;
4850         }
4851         else
4852         {
4853           if (my_errno == ENOENT)
4854           {
4855             if (thd)
4856             {
4857               push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4858                                   ER_LOG_PURGE_NO_FILE, ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4859                                   log_info.log_file_name);
4860             }
4861             sql_print_information("Failed to delete file '%s'",
4862                                   log_info.log_file_name);
4863             my_errno= 0;
4864           }
4865           else
4866           {
4867             if (thd)
4868             {
4869               push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4870                                   ER_BINLOG_PURGE_FATAL_ERR,
4871                                   "a problem with deleting %s; "
4872                                   "consider examining correspondence "
4873                                   "of your binlog index file "
4874                                   "to the actual binlog files",
4875                                   log_info.log_file_name);
4876             }
4877             else
4878             {
4879               sql_print_information("Failed to delete file '%s'; "
4880                                     "consider examining correspondence "
4881                                     "of your binlog index file "
4882                                     "to the actual binlog files",
4883                                     log_info.log_file_name);
4884             }
4885             if (my_errno == EMFILE)
4886             {
4887               DBUG_PRINT("info",
4888                          ("my_errno: %d, set ret = LOG_INFO_EMFILE", my_errno));
4889               error= LOG_INFO_EMFILE;
4890               goto err;
4891             }
4892             error= LOG_INFO_FATAL;
4893             goto err;
4894           }
4895         }
4896       }
4897     }
4898   }
4899 
4900 err:
4901   DBUG_RETURN(error);
4902 }
4903 
4904 /**
4905   Remove all logs before the given file date from disk and from the
4906   index file.
4907 
4908   @param thd		Thread pointer
4909   @param purge_time	Delete all log files before given date.
4910 
4911   @note
4912     If any of the logs before the deleted one is in use,
4913     only purge logs up to this one.
4914 
4915   @retval
4916     0				ok
4917   @retval
4918     LOG_INFO_PURGE_NO_ROTATE	Binary file that can't be rotated
4919     LOG_INFO_FATAL              if any other than ENOENT error from
4920                                 mysql_file_stat() or mysql_file_delete()
4921 */
4922 
purge_logs_before_date(time_t purge_time)4923 int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
4924 {
4925   int error;
4926   char to_log[FN_REFLEN];
4927   LOG_INFO log_info;
4928   MY_STAT stat_area;
4929   THD *thd= current_thd;
4930   DBUG_ENTER("purge_logs_before_date");
4931 
4932   mysql_mutex_lock(&LOCK_index);
4933   to_log[0]= 0;
4934 
4935   if (unlikely((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/))))
4936     goto err;
4937 
4938   while (strcmp(log_file_name, log_info.log_file_name) &&
4939 	 can_purge_log(log_info.log_file_name))
4940   {
4941     if (!mysql_file_stat(m_key_file_log,
4942                          log_info.log_file_name, &stat_area, MYF(0)))
4943     {
4944       if (my_errno == ENOENT)
4945       {
4946         /*
4947           It's not fatal if we can't stat a log file that does not exist.
4948         */
4949         my_errno= 0;
4950       }
4951       else
4952       {
4953         /*
4954           Other than ENOENT are fatal
4955         */
4956         if (thd)
4957         {
4958           push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4959                               ER_BINLOG_PURGE_FATAL_ERR,
4960                               "a problem with getting info on being purged %s; "
4961                               "consider examining correspondence "
4962                               "of your binlog index file "
4963                               "to the actual binlog files",
4964                               log_info.log_file_name);
4965         }
4966         else
4967         {
4968           sql_print_information("Failed to delete log file '%s'",
4969                                 log_info.log_file_name);
4970         }
4971         error= LOG_INFO_FATAL;
4972         goto err;
4973       }
4974     }
4975     else
4976     {
4977       if (stat_area.st_mtime < purge_time)
4978         strmake_buf(to_log, log_info.log_file_name);
4979       else
4980         break;
4981     }
4982     if (find_next_log(&log_info, 0))
4983       break;
4984   }
4985 
4986   error= (to_log[0] ? purge_logs(to_log, 1, 0, 1, (ulonglong *) 0) : 0);
4987 
4988 err:
4989   mysql_mutex_unlock(&LOCK_index);
4990   DBUG_RETURN(error);
4991 }
4992 
4993 
4994 bool
can_purge_log(const char * log_file_name_arg)4995 MYSQL_BIN_LOG::can_purge_log(const char *log_file_name_arg)
4996 {
4997   xid_count_per_binlog *b;
4998 
4999   if (is_active(log_file_name_arg))
5000     return false;
5001   mysql_mutex_lock(&LOCK_xid_list);
5002   {
5003     I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
5004     while ((b= it++) &&
5005            0 != strncmp(log_file_name_arg+dirname_length(log_file_name_arg),
5006                         b->binlog_name, b->binlog_name_len))
5007       ;
5008   }
5009   mysql_mutex_unlock(&LOCK_xid_list);
5010   if (b)
5011     return false;
5012   return !log_in_use(log_file_name_arg);
5013 }
5014 #endif /* HAVE_REPLICATION */
5015 
5016 
5017 bool
is_xidlist_idle()5018 MYSQL_BIN_LOG::is_xidlist_idle()
5019 {
5020   bool res;
5021   mysql_mutex_lock(&LOCK_xid_list);
5022   res= is_xidlist_idle_nolock();
5023   mysql_mutex_unlock(&LOCK_xid_list);
5024   return res;
5025 }
5026 
5027 
5028 bool
is_xidlist_idle_nolock()5029 MYSQL_BIN_LOG::is_xidlist_idle_nolock()
5030 {
5031   xid_count_per_binlog *b;
5032 
5033   I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
5034   while ((b= it++))
5035   {
5036     if (b->xid_count > 0)
5037       return false;
5038   }
5039   return true;
5040 }
5041 
5042 #ifdef WITH_WSREP
5043 inline bool
is_gtid_cached_internal(IO_CACHE * file)5044 is_gtid_cached_internal(IO_CACHE *file)
5045 {
5046   uchar data[EVENT_TYPE_OFFSET+1];
5047   bool result= false;
5048   my_off_t write_pos= my_b_tell(file);
5049   if (reinit_io_cache(file, READ_CACHE, 0, 0, 0))
5050     return false;
5051   /*
5052    In the cache we have gtid event if , below condition is true,
5053   */
5054   my_b_read(file, data, sizeof(data));
5055   uint event_type= (uchar)data[EVENT_TYPE_OFFSET];
5056   if (event_type == GTID_LOG_EVENT)
5057     result= true;
5058   /*
5059     Cleanup , Why because we have not read the full buffer
5060     and this will cause next to next reinit_io_cache(called in write_cache)
5061     to make cache empty.
5062    */
5063   file->read_pos= file->read_end;
5064   if (reinit_io_cache(file, WRITE_CACHE, write_pos, 0, 0))
5065     return false;
5066   return result;
5067 }
5068 #endif
5069 
5070 #ifdef WITH_WSREP
5071 inline bool
is_gtid_cached(THD * thd)5072 MYSQL_BIN_LOG::is_gtid_cached(THD *thd)
5073 {
5074   binlog_cache_mngr *mngr= (binlog_cache_mngr *) thd_get_ha_data(
5075           thd, binlog_hton);
5076   if (!mngr)
5077     return false;
5078   binlog_cache_data *cache_trans= mngr->get_binlog_cache_data(
5079           use_trans_cache(thd, true));
5080   binlog_cache_data *cache_stmt= mngr->get_binlog_cache_data(
5081           use_trans_cache(thd, false));
5082   if (cache_trans && !cache_trans->empty() &&
5083           is_gtid_cached_internal(&cache_trans->cache_log))
5084     return true;
5085   if (cache_stmt && !cache_stmt->empty() &&
5086           is_gtid_cached_internal(&cache_stmt->cache_log))
5087     return true;
5088   return false;
5089 }
5090 #endif
5091 /**
5092   Create a new log file name.
5093 
5094   @param buf		buf of at least FN_REFLEN where new name is stored
5095 
5096   @note
5097     If file name will be longer then FN_REFLEN it will be truncated
5098 */
5099 
make_log_name(char * buf,const char * log_ident)5100 void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
5101 {
5102   size_t dir_len = dirname_length(log_file_name);
5103   if (dir_len >= FN_REFLEN)
5104     dir_len=FN_REFLEN-1;
5105   strnmov(buf, log_file_name, dir_len);
5106   strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
5107 }
5108 
5109 
5110 /**
5111   Check if we are writing/reading to the given log file.
5112 */
5113 
is_active(const char * log_file_name_arg)5114 bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
5115 {
5116   /**
5117    * there should/must be mysql_mutex_assert_owner(&LOCK_log) here...
5118    * but code violates this! (scary monsters and super creeps!)
5119    *
5120    * example stacktrace:
5121    * #8  MYSQL_BIN_LOG::is_active
5122    * #9  MYSQL_BIN_LOG::can_purge_log
5123    * #10 MYSQL_BIN_LOG::purge_logs
5124    * #11 MYSQL_BIN_LOG::purge_first_log
5125    * #12 next_event
5126    * #13 exec_relay_log_event
5127    *
5128    * I didn't investigate if this is ligit...(i.e if my comment is wrong)
5129    */
5130   return !strcmp(log_file_name, log_file_name_arg);
5131 }
5132 
5133 
5134 /*
5135   Wrappers around new_file_impl to avoid using argument
5136   to control locking. The argument 1) less readable 2) breaks
5137   incapsulation 3) allows external access to the class without
5138   a lock (which is not possible with private new_file_without_locking
5139   method).
5140 
5141   @retval
5142     nonzero - error
5143 */
5144 
new_file()5145 int MYSQL_BIN_LOG::new_file()
5146 {
5147   int res;
5148   mysql_mutex_lock(&LOCK_log);
5149   res= new_file_impl();
5150   mysql_mutex_unlock(&LOCK_log);
5151   return res;
5152 }
5153 
5154 /*
5155   @retval
5156     nonzero - error
5157  */
new_file_without_locking()5158 int MYSQL_BIN_LOG::new_file_without_locking()
5159 {
5160   return new_file_impl();
5161 }
5162 
5163 
5164 /**
5165   Start writing to a new log file or reopen the old file.
5166 
5167   @param need_lock		Set to 1 if caller has not locked LOCK_log
5168 
5169   @retval
5170     nonzero - error
5171 
5172   @note
5173     The new file name is stored last in the index file
5174 */
5175 
new_file_impl()5176 int MYSQL_BIN_LOG::new_file_impl()
5177 {
5178   int error= 0, close_on_error= FALSE;
5179   char new_name[FN_REFLEN], *new_name_ptr, *old_name, *file_to_open;
5180   uint close_flag;
5181   bool delay_close= false;
5182   File UNINIT_VAR(old_file);
5183   DBUG_ENTER("MYSQL_BIN_LOG::new_file_impl");
5184 
5185   DBUG_ASSERT(log_type == LOG_BIN);
5186   mysql_mutex_assert_owner(&LOCK_log);
5187 
5188   if (!is_open())
5189   {
5190     DBUG_PRINT("info",("log is closed"));
5191     DBUG_RETURN(error);
5192   }
5193 
5194   mysql_mutex_lock(&LOCK_index);
5195 
5196   /* Reuse old name if not binlog and not update log */
5197   new_name_ptr= name;
5198 
5199   /*
5200     If user hasn't specified an extension, generate a new log name
5201     We have to do this here and not in open as we want to store the
5202     new file name in the current binary log file.
5203   */
5204   if (unlikely((error= generate_new_name(new_name, name, 0))))
5205   {
5206 #ifdef ENABLE_AND_FIX_HANG
5207     close_on_error= TRUE;
5208 #endif
5209     goto end2;
5210   }
5211   new_name_ptr=new_name;
5212 
5213   if (log_type == LOG_BIN)
5214   {
5215     {
5216       /*
5217         We log the whole file name for log file as the user may decide
5218         to change base names at some point.
5219       */
5220       Rotate_log_event r(new_name+dirname_length(new_name), 0, LOG_EVENT_OFFSET,
5221                          is_relay_log ? Rotate_log_event::RELAY_LOG : 0);
5222       /*
5223          The current relay-log's closing Rotate event must have checksum
5224          value computed with an algorithm of the last relay-logged FD event.
5225       */
5226       if (is_relay_log)
5227         r.checksum_alg= relay_log_checksum_alg;
5228       DBUG_ASSERT(!is_relay_log || relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
5229       if(DBUG_EVALUATE_IF("fault_injection_new_file_rotate_event", (error=close_on_error=TRUE), FALSE) ||
5230          (error= write_event(&r)))
5231       {
5232         DBUG_EXECUTE_IF("fault_injection_new_file_rotate_event", errno=2;);
5233         close_on_error= TRUE;
5234         my_printf_error(ER_ERROR_ON_WRITE,
5235                         ER_THD_OR_DEFAULT(current_thd, ER_CANT_OPEN_FILE),
5236                         MYF(ME_FATAL), name, errno);
5237         goto end;
5238       }
5239       bytes_written += r.data_written;
5240     }
5241   }
5242 
5243   /*
5244     Update needs to be signalled even if there is no rotate event
5245     log rotation should give the waiting thread a signal to
5246     discover EOF and move on to the next log.
5247   */
5248   if (unlikely((error= flush_io_cache(&log_file))))
5249   {
5250     close_on_error= TRUE;
5251     goto end;
5252   }
5253   update_binlog_end_pos();
5254 
5255   old_name=name;
5256   name=0;				// Don't free name
5257   close_flag= LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX;
5258   if (!is_relay_log)
5259   {
5260     /*
5261       We need to keep the old binlog file open (and marked as in-use) until
5262       the new one is fully created and synced to disk and index. Otherwise we
5263       leave a window where if we crash, there is no binlog file marked as
5264       crashed for server restart to detect the need for recovery.
5265     */
5266     old_file= log_file.file;
5267     close_flag|= LOG_CLOSE_DELAYED_CLOSE;
5268     delay_close= true;
5269   }
5270   close(close_flag);
5271   if (log_type == LOG_BIN && checksum_alg_reset != BINLOG_CHECKSUM_ALG_UNDEF)
5272   {
5273     DBUG_ASSERT(!is_relay_log);
5274     DBUG_ASSERT(binlog_checksum_options != checksum_alg_reset);
5275     binlog_checksum_options= checksum_alg_reset;
5276   }
5277   /*
5278      Note that at this point, log_state != LOG_CLOSED
5279      (important for is_open()).
5280   */
5281 
5282   /*
5283      new_file() is only used for rotation (in FLUSH LOGS or because size >
5284      max_binlog_size or max_relay_log_size).
5285      If this is a binary log, the Format_description_log_event at the
5286      beginning of the new file should have created=0 (to distinguish with the
5287      Format_description_log_event written at server startup, which should
5288      trigger temp tables deletion on slaves.
5289   */
5290 
5291   /* reopen index binlog file, BUG#34582 */
5292   file_to_open= index_file_name;
5293   error= open_index_file(index_file_name, 0, FALSE);
5294   if (likely(!error))
5295   {
5296     /* reopen the binary log file. */
5297     file_to_open= new_name_ptr;
5298     error= open(old_name, log_type, new_name_ptr, 0, io_cache_type,
5299                 max_size, 1, FALSE);
5300   }
5301 
5302   /* handle reopening errors */
5303   if (unlikely(error))
5304   {
5305     my_error(ER_CANT_OPEN_FILE, MYF(ME_FATAL), file_to_open, error);
5306     close_on_error= TRUE;
5307   }
5308 
5309   my_free(old_name);
5310 
5311 end:
5312   /* In case of errors, reuse the last generated log file name */
5313   if (unlikely(error))
5314   {
5315     DBUG_ASSERT(last_used_log_number > 0);
5316     last_used_log_number--;
5317   }
5318 
5319 end2:
5320   if (delay_close)
5321   {
5322     clear_inuse_flag_when_closing(old_file);
5323     mysql_file_close(old_file, MYF(MY_WME));
5324   }
5325 
5326   if (unlikely(error && close_on_error)) /* rotate or reopen failed */
5327   {
5328     /*
5329       Close whatever was left opened.
5330 
5331       We are keeping the behavior as it exists today, ie,
5332       we disable logging and move on (see: BUG#51014).
5333 
5334       TODO: as part of WL#1790 consider other approaches:
5335        - kill mysql (safety);
5336        - try multiple locations for opening a log file;
5337        - switch server to protected/readonly mode
5338        - ...
5339     */
5340     close(LOG_CLOSE_INDEX);
5341     sql_print_error(fatal_log_error, new_name_ptr, errno);
5342   }
5343 
5344   mysql_mutex_unlock(&LOCK_index);
5345 
5346   DBUG_RETURN(error);
5347 }
5348 
write_event(Log_event * ev,binlog_cache_data * cache_data,IO_CACHE * file)5349 bool MYSQL_BIN_LOG::write_event(Log_event *ev, binlog_cache_data *cache_data,
5350                                 IO_CACHE *file)
5351 {
5352   Log_event_writer writer(file, 0, &crypto);
5353   if (crypto.scheme && file == &log_file)
5354     writer.ctx= alloca(crypto.ctx_size);
5355   if (cache_data)
5356     cache_data->add_status(ev->logged_status());
5357   return writer.write(ev);
5358 }
5359 
append(Log_event * ev)5360 bool MYSQL_BIN_LOG::append(Log_event *ev)
5361 {
5362   bool res;
5363   mysql_mutex_lock(&LOCK_log);
5364   res= append_no_lock(ev);
5365   mysql_mutex_unlock(&LOCK_log);
5366   return res;
5367 }
5368 
5369 
append_no_lock(Log_event * ev)5370 bool MYSQL_BIN_LOG::append_no_lock(Log_event* ev)
5371 {
5372   bool error = 0;
5373   DBUG_ENTER("MYSQL_BIN_LOG::append");
5374 
5375   mysql_mutex_assert_owner(&LOCK_log);
5376   DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
5377 
5378   if (write_event(ev))
5379   {
5380     error=1;
5381     goto err;
5382   }
5383   bytes_written+= ev->data_written;
5384   DBUG_PRINT("info",("max_size: %lu",max_size));
5385   if (flush_and_sync(0))
5386     goto err;
5387   if (my_b_append_tell(&log_file) > max_size)
5388     error= new_file_without_locking();
5389 err:
5390   update_binlog_end_pos();
5391   DBUG_RETURN(error);
5392 }
5393 
write_event_buffer(uchar * buf,uint len)5394 bool MYSQL_BIN_LOG::write_event_buffer(uchar* buf, uint len)
5395 {
5396   bool error= 1;
5397   uchar *ebuf= 0;
5398   DBUG_ENTER("MYSQL_BIN_LOG::write_event_buffer");
5399 
5400   DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
5401 
5402   mysql_mutex_assert_owner(&LOCK_log);
5403 
5404   if (crypto.scheme != 0)
5405   {
5406     DBUG_ASSERT(crypto.scheme == 1);
5407 
5408     uint elen;
5409     uchar iv[BINLOG_IV_LENGTH];
5410 
5411     ebuf= (uchar*)my_safe_alloca(len);
5412     if (!ebuf)
5413       goto err;
5414 
5415     crypto.set_iv(iv, (uint32)my_b_append_tell(&log_file));
5416 
5417     /*
5418       we want to encrypt everything, excluding the event length:
5419       massage the data before the encryption
5420     */
5421     memcpy(buf + EVENT_LEN_OFFSET, buf, 4);
5422 
5423     if (encryption_crypt(buf + 4, len - 4,
5424                          ebuf + 4, &elen,
5425                          crypto.key, crypto.key_length, iv, sizeof(iv),
5426                          ENCRYPTION_FLAG_ENCRYPT | ENCRYPTION_FLAG_NOPAD,
5427                          ENCRYPTION_KEY_SYSTEM_DATA, crypto.key_version))
5428       goto err;
5429 
5430     DBUG_ASSERT(elen == len - 4);
5431 
5432     /* massage the data after the encryption */
5433     memcpy(ebuf, ebuf + EVENT_LEN_OFFSET, 4);
5434     int4store(ebuf + EVENT_LEN_OFFSET, len);
5435 
5436     buf= ebuf;
5437   }
5438   if (my_b_append(&log_file, buf, len))
5439     goto err;
5440   bytes_written+= len;
5441 
5442   error= 0;
5443   DBUG_PRINT("info",("max_size: %lu",max_size));
5444   if (flush_and_sync(0))
5445     goto err;
5446   if (my_b_append_tell(&log_file) > max_size)
5447     error= new_file_without_locking();
5448 err:
5449   my_safe_afree(ebuf, len);
5450   if (likely(!error))
5451     update_binlog_end_pos();
5452   DBUG_RETURN(error);
5453 }
5454 
flush_and_sync(bool * synced)5455 bool MYSQL_BIN_LOG::flush_and_sync(bool *synced)
5456 {
5457   int err=0, fd=log_file.file;
5458   if (synced)
5459     *synced= 0;
5460   mysql_mutex_assert_owner(&LOCK_log);
5461   if (flush_io_cache(&log_file))
5462     return 1;
5463   uint sync_period= get_sync_period();
5464   if (sync_period && ++sync_counter >= sync_period)
5465   {
5466     sync_counter= 0;
5467     err= mysql_file_sync(fd, MYF(MY_WME|MY_SYNC_FILESIZE));
5468     if (synced)
5469       *synced= 1;
5470 #ifndef DBUG_OFF
5471     if (opt_binlog_dbug_fsync_sleep > 0)
5472       my_sleep(opt_binlog_dbug_fsync_sleep);
5473 #endif
5474   }
5475   return err;
5476 }
5477 
start_union_events(THD * thd,query_id_t query_id_param)5478 void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param)
5479 {
5480   DBUG_ASSERT(!thd->binlog_evt_union.do_union);
5481   thd->binlog_evt_union.do_union= TRUE;
5482   thd->binlog_evt_union.unioned_events= FALSE;
5483   thd->binlog_evt_union.unioned_events_trans= FALSE;
5484   thd->binlog_evt_union.first_query_id= query_id_param;
5485 }
5486 
stop_union_events(THD * thd)5487 void MYSQL_BIN_LOG::stop_union_events(THD *thd)
5488 {
5489   DBUG_ASSERT(thd->binlog_evt_union.do_union);
5490   thd->binlog_evt_union.do_union= FALSE;
5491 }
5492 
is_query_in_union(THD * thd,query_id_t query_id_param)5493 bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
5494 {
5495   return (thd->binlog_evt_union.do_union &&
5496           query_id_param >= thd->binlog_evt_union.first_query_id);
5497 }
5498 
5499 /**
5500   This function checks if a transactional table was updated by the
5501   current transaction.
5502 
5503   @param thd The client thread that executed the current statement.
5504   @return
5505     @c true if a transactional table was updated, @c false otherwise.
5506 */
5507 bool
trans_has_updated_trans_table(const THD * thd)5508 trans_has_updated_trans_table(const THD* thd)
5509 {
5510   binlog_cache_mngr *const cache_mngr=
5511     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5512 
5513   return (cache_mngr ? !cache_mngr->trx_cache.empty() : 0);
5514 }
5515 
5516 /**
5517   This function checks if a transactional table was updated by the
5518   current statement.
5519 
5520   @param thd The client thread that executed the current statement.
5521   @return
5522     @c true if a transactional table was updated, @c false otherwise.
5523 */
5524 bool
stmt_has_updated_trans_table(const THD * thd)5525 stmt_has_updated_trans_table(const THD *thd)
5526 {
5527   Ha_trx_info *ha_info;
5528 
5529   for (ha_info= thd->transaction.stmt.ha_list; ha_info;
5530        ha_info= ha_info->next())
5531   {
5532     if (ha_info->is_trx_read_write() && ha_info->ht() != binlog_hton)
5533       return (TRUE);
5534   }
5535   return (FALSE);
5536 }
5537 
5538 /**
5539   This function checks if either a trx-cache or a non-trx-cache should
5540   be used. If @c bin_log_direct_non_trans_update is active or the format
5541   is either MIXED or ROW, the cache to be used depends on the flag @c
5542   is_transactional.
5543 
5544   On the other hand, if binlog_format is STMT or direct option is
5545   OFF, the trx-cache should be used if and only if the statement is
5546   transactional or the trx-cache is not empty. Otherwise, the
5547   non-trx-cache should be used.
5548 
5549   @param thd              The client thread.
5550   @param is_transactional The changes are related to a trx-table.
5551   @return
5552     @c true if a trx-cache should be used, @c false otherwise.
5553 */
use_trans_cache(const THD * thd,bool is_transactional)5554 bool use_trans_cache(const THD* thd, bool is_transactional)
5555 {
5556   if (is_transactional)
5557     return 1;
5558   binlog_cache_mngr *const cache_mngr=
5559     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5560 
5561   return ((thd->is_current_stmt_binlog_format_row() ||
5562            thd->variables.binlog_direct_non_trans_update) ? 0 :
5563           !cache_mngr->trx_cache.empty());
5564 }
5565 
5566 /**
5567   This function checks if a transaction, either a multi-statement
5568   or a single statement transaction is about to commit or not.
5569 
5570   @param thd The client thread that executed the current statement.
5571   @param all Committing a transaction (i.e. TRUE) or a statement
5572              (i.e. FALSE).
5573   @return
5574     @c true if committing a transaction, otherwise @c false.
5575 */
ending_trans(THD * thd,const bool all)5576 bool ending_trans(THD* thd, const bool all)
5577 {
5578   return (all || ending_single_stmt_trans(thd, all));
5579 }
5580 
5581 /**
5582   This function checks if a single statement transaction is about
5583   to commit or not.
5584 
5585   @param thd The client thread that executed the current statement.
5586   @param all Committing a transaction (i.e. TRUE) or a statement
5587              (i.e. FALSE).
5588   @return
5589     @c true if committing a single statement transaction, otherwise
5590     @c false.
5591 */
ending_single_stmt_trans(THD * thd,const bool all)5592 bool ending_single_stmt_trans(THD* thd, const bool all)
5593 {
5594   return (!all && !thd->in_multi_stmt_transaction_mode());
5595 }
5596 
5597 /**
5598   This function checks if a non-transactional table was updated by
5599   the current transaction.
5600 
5601   @param thd The client thread that executed the current statement.
5602   @return
5603     @c true if a non-transactional table was updated, @c false
5604     otherwise.
5605 */
trans_has_updated_non_trans_table(const THD * thd)5606 bool trans_has_updated_non_trans_table(const THD* thd)
5607 {
5608   return (thd->transaction.all.modified_non_trans_table ||
5609           thd->transaction.stmt.modified_non_trans_table);
5610 }
5611 
5612 /**
5613   This function checks if a non-transactional table was updated by the
5614   current statement.
5615 
5616   @param thd The client thread that executed the current statement.
5617   @return
5618     @c true if a non-transactional table was updated, @c false otherwise.
5619 */
stmt_has_updated_non_trans_table(const THD * thd)5620 bool stmt_has_updated_non_trans_table(const THD* thd)
5621 {
5622   return (thd->transaction.stmt.modified_non_trans_table);
5623 }
5624 
5625 /*
5626   These functions are placed in this file since they need access to
5627   binlog_hton, which has internal linkage.
5628 */
5629 
binlog_setup_trx_data()5630 binlog_cache_mngr *THD::binlog_setup_trx_data()
5631 {
5632   DBUG_ENTER("THD::binlog_setup_trx_data");
5633   binlog_cache_mngr *cache_mngr=
5634     (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5635 
5636   if (cache_mngr)
5637     DBUG_RETURN(cache_mngr);                             // Already set up
5638 
5639   cache_mngr= (binlog_cache_mngr*) my_malloc(sizeof(binlog_cache_mngr), MYF(MY_ZEROFILL));
5640   if (!cache_mngr ||
5641       open_cached_file(&cache_mngr->stmt_cache.cache_log, mysql_tmpdir,
5642                        LOG_PREFIX, (size_t)binlog_stmt_cache_size, MYF(MY_WME)) ||
5643       open_cached_file(&cache_mngr->trx_cache.cache_log, mysql_tmpdir,
5644                        LOG_PREFIX, (size_t)binlog_cache_size, MYF(MY_WME)))
5645   {
5646     my_free(cache_mngr);
5647     DBUG_RETURN(0);                      // Didn't manage to set it up
5648   }
5649   thd_set_ha_data(this, binlog_hton, cache_mngr);
5650 
5651   cache_mngr= new (cache_mngr)
5652               binlog_cache_mngr(max_binlog_stmt_cache_size,
5653                                 max_binlog_cache_size,
5654                                 &binlog_stmt_cache_use,
5655                                 &binlog_stmt_cache_disk_use,
5656                                 &binlog_cache_use,
5657                                 &binlog_cache_disk_use);
5658   DBUG_RETURN(cache_mngr);
5659 }
5660 
5661 /*
5662   Function to start a statement and optionally a transaction for the
5663   binary log.
5664 
5665   SYNOPSIS
5666     binlog_start_trans_and_stmt()
5667 
5668   DESCRIPTION
5669 
5670     This function does three things:
5671     - Start a transaction if not in autocommit mode or if a BEGIN
5672       statement has been seen.
5673 
5674     - Start a statement transaction to allow us to truncate the cache.
5675 
5676     - Save the current binlog position so that we can roll back the
5677       statement by truncating the cache.
5678 
5679       We only update the saved position if the old one was undefined,
5680       the reason is that there are some cases (e.g., for CREATE-SELECT)
5681       where the position is saved twice (e.g., both in
5682       select_create::prepare() and THD::binlog_write_table_map()) , but
5683       we should use the first. This means that calls to this function
5684       can be used to start the statement before the first table map
5685       event, to include some extra events.
5686  */
5687 
5688 void
binlog_start_trans_and_stmt()5689 THD::binlog_start_trans_and_stmt()
5690 {
5691   binlog_cache_mngr *cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5692   DBUG_ENTER("binlog_start_trans_and_stmt");
5693   DBUG_PRINT("enter", ("cache_mngr: %p  cache_mngr->trx_cache.get_prev_position(): %lu",
5694                        cache_mngr,
5695                        (cache_mngr ? (ulong) cache_mngr->trx_cache.get_prev_position() :
5696                         (ulong) 0)));
5697 
5698   if (cache_mngr == NULL ||
5699       cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF)
5700   {
5701     this->binlog_set_stmt_begin();
5702     bool mstmt_mode= in_multi_stmt_transaction_mode();
5703 #ifdef WITH_WSREP
5704     /*
5705       With wsrep binlog emulation we can skip the rest because the
5706       binlog cache will not be written into binlog. Note however that
5707       because of this the hton callbacks will not get called to clean
5708       up the cache, so this must be done explicitly when the transaction
5709       terminates.
5710     */
5711     if (WSREP_EMULATE_BINLOG_NNULL(this))
5712     {
5713       DBUG_VOID_RETURN;
5714     }
5715     /* Write Gtid
5716          Get domain id only when gtid mode is set
5717          If this event is replicate through a master then ,
5718          we will forward the same gtid another nodes
5719          We have to do this only one time in mysql transaction.
5720          Since this function is called multiple times , We will check for
5721          ha_info->is_started()
5722        */
5723     Ha_trx_info *ha_info;
5724     ha_info= this->ha_data[binlog_hton->slot].ha_info + (mstmt_mode ? 1 : 0);
5725 
5726     if (!ha_info->is_started() && wsrep_gtid_mode
5727             && this->variables.gtid_seq_no)
5728     {
5729       binlog_cache_mngr *const cache_mngr=
5730         (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5731       binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(1);
5732       IO_CACHE *file= &cache_data->cache_log;
5733       Log_event_writer writer(file, cache_data);
5734         Gtid_log_event gtid_event(this, this->variables.gtid_seq_no,
5735                             this->variables.gtid_domain_id,
5736                             true, LOG_EVENT_SUPPRESS_USE_F,
5737                             true, 0);
5738       gtid_event.server_id= this->variables.server_id;
5739       writer.write(&gtid_event);
5740     }
5741 #endif
5742     if (mstmt_mode)
5743       trans_register_ha(this, TRUE, binlog_hton);
5744     trans_register_ha(this, FALSE, binlog_hton);
5745     /*
5746       Mark statement transaction as read/write. We never start
5747       a binary log transaction and keep it read-only,
5748       therefore it's best to mark the transaction read/write just
5749       at the same time we start it.
5750       Not necessary to mark the normal transaction read/write
5751       since the statement-level flag will be propagated automatically
5752       inside ha_commit_trans.
5753     */
5754     ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
5755   }
5756   DBUG_VOID_RETURN;
5757 }
5758 
binlog_set_stmt_begin()5759 void THD::binlog_set_stmt_begin() {
5760   binlog_cache_mngr *cache_mngr=
5761     (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5762 
5763   /*
5764     The call to binlog_trans_log_savepos() might create the cache_mngr
5765     structure, if it didn't exist before, so we save the position
5766     into an auto variable and then write it into the transaction
5767     data for the binary log (i.e., cache_mngr).
5768   */
5769   my_off_t pos= 0;
5770   binlog_trans_log_savepos(this, &pos);
5771   cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5772   cache_mngr->trx_cache.set_prev_position(pos);
5773 }
5774 
5775 static int
binlog_start_consistent_snapshot(handlerton * hton,THD * thd)5776 binlog_start_consistent_snapshot(handlerton *hton, THD *thd)
5777 {
5778   int err= 0;
5779   DBUG_ENTER("binlog_start_consistent_snapshot");
5780 
5781   binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
5782 
5783   /* Server layer calls us with LOCK_commit_ordered locked, so this is safe. */
5784   mysql_mutex_assert_owner(&LOCK_commit_ordered);
5785   strmake_buf(cache_mngr->last_commit_pos_file, mysql_bin_log.last_commit_pos_file);
5786   cache_mngr->last_commit_pos_offset= mysql_bin_log.last_commit_pos_offset;
5787 
5788   trans_register_ha(thd, TRUE, hton);
5789 
5790   DBUG_RETURN(err);
5791 }
5792 
5793 /**
5794   This function writes a table map to the binary log.
5795   Note that in order to keep the signature uniform with related methods,
5796   we use a redundant parameter to indicate whether a transactional table
5797   was changed or not.
5798 
5799   If with_annotate != NULL and
5800   *with_annotate = TRUE write also Annotate_rows before the table map.
5801 
5802   @param table             a pointer to the table.
5803   @param is_transactional  @c true indicates a transactional table,
5804                            otherwise @c false a non-transactional.
5805   @return
5806     nonzero if an error pops up when writing the table map event.
5807 */
binlog_write_table_map(TABLE * table,bool is_transactional,my_bool * with_annotate)5808 int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
5809                                 my_bool *with_annotate)
5810 {
5811   int error;
5812   DBUG_ENTER("THD::binlog_write_table_map");
5813   DBUG_PRINT("enter", ("table: %p  (%s: #%lu)",
5814                        table, table->s->table_name.str,
5815                        table->s->table_map_id));
5816 
5817   /* Ensure that all events in a GTID group are in the same cache */
5818   if (variables.option_bits & OPTION_GTID_BEGIN)
5819     is_transactional= 1;
5820 
5821   /* Pre-conditions */
5822   DBUG_ASSERT(is_current_stmt_binlog_format_row());
5823   DBUG_ASSERT(WSREP_EMULATE_BINLOG_NNULL(this) || mysql_bin_log.is_open());
5824   DBUG_ASSERT(table->s->table_map_id != ULONG_MAX);
5825 
5826   Table_map_log_event
5827     the_event(this, table, table->s->table_map_id, is_transactional);
5828 
5829   if (binlog_table_maps == 0)
5830     binlog_start_trans_and_stmt();
5831 
5832   binlog_cache_mngr *const cache_mngr=
5833     (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5834   binlog_cache_data *cache_data= (cache_mngr->
5835                                   get_binlog_cache_data(is_transactional));
5836   IO_CACHE *file= &cache_data->cache_log;
5837   Log_event_writer writer(file, cache_data);
5838 
5839   if (with_annotate && *with_annotate)
5840   {
5841     Annotate_rows_log_event anno(table->in_use, is_transactional, false);
5842     /* Annotate event should be written not more than once */
5843     *with_annotate= 0;
5844     if (unlikely((error= writer.write(&anno))))
5845     {
5846       if (my_errno == EFBIG)
5847         cache_data->set_incident();
5848       DBUG_RETURN(error);
5849     }
5850   }
5851   if (unlikely((error= writer.write(&the_event))))
5852     DBUG_RETURN(error);
5853 
5854   binlog_table_maps++;
5855   DBUG_RETURN(0);
5856 }
5857 
5858 /**
5859   This function retrieves a pending row event from a cache which is
5860   specified through the parameter @c is_transactional. Respectively, when it
5861   is @c true, the pending event is returned from the transactional cache.
5862   Otherwise from the non-transactional cache.
5863 
5864   @param is_transactional  @c true indicates a transactional cache,
5865                            otherwise @c false a non-transactional.
5866   @return
5867     The row event if any.
5868 */
5869 Rows_log_event*
binlog_get_pending_rows_event(bool is_transactional) const5870 THD::binlog_get_pending_rows_event(bool is_transactional) const
5871 {
5872   Rows_log_event* rows= NULL;
5873   binlog_cache_mngr *const cache_mngr=
5874     (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5875 
5876   /*
5877     This is less than ideal, but here's the story: If there is no cache_mngr,
5878     prepare_pending_rows_event() has never been called (since the cache_mngr
5879     is set up there). In that case, we just return NULL.
5880    */
5881   if (cache_mngr)
5882   {
5883     binlog_cache_data *cache_data=
5884       cache_mngr->get_binlog_cache_data(use_trans_cache(this, is_transactional));
5885 
5886     rows= cache_data->pending();
5887   }
5888   return (rows);
5889 }
5890 
5891 /**
5892   This function stores a pending row event into a cache which is specified
5893   through the parameter @c is_transactional. Respectively, when it is @c
5894   true, the pending event is stored into the transactional cache. Otherwise
5895   into the non-transactional cache.
5896 
5897   @param evt               a pointer to the row event.
5898   @param is_transactional  @c true indicates a transactional cache,
5899                            otherwise @c false a non-transactional.
5900 */
5901 void
binlog_set_pending_rows_event(Rows_log_event * ev,bool is_transactional)5902 THD::binlog_set_pending_rows_event(Rows_log_event* ev, bool is_transactional)
5903 {
5904   binlog_cache_mngr *const cache_mngr= binlog_setup_trx_data();
5905 
5906   DBUG_ASSERT(cache_mngr);
5907 
5908   binlog_cache_data *cache_data=
5909     cache_mngr->get_binlog_cache_data(use_trans_cache(this, is_transactional));
5910 
5911   cache_data->set_pending(ev);
5912 }
5913 
5914 
5915 /**
5916   This function removes the pending rows event, discarding any outstanding
5917   rows. If there is no pending rows event available, this is effectively a
5918   no-op.
5919 
5920   @param thd               a pointer to the user thread.
5921   @param is_transactional  @c true indicates a transactional cache,
5922                            otherwise @c false a non-transactional.
5923 */
5924 int
remove_pending_rows_event(THD * thd,bool is_transactional)5925 MYSQL_BIN_LOG::remove_pending_rows_event(THD *thd, bool is_transactional)
5926 {
5927   DBUG_ENTER("MYSQL_BIN_LOG::remove_pending_rows_event");
5928 
5929   binlog_cache_mngr *const cache_mngr=
5930     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5931 
5932   DBUG_ASSERT(cache_mngr);
5933 
5934   binlog_cache_data *cache_data=
5935     cache_mngr->get_binlog_cache_data(use_trans_cache(thd, is_transactional));
5936 
5937   if (Rows_log_event* pending= cache_data->pending())
5938   {
5939     delete pending;
5940     cache_data->set_pending(NULL);
5941   }
5942 
5943   DBUG_RETURN(0);
5944 }
5945 
5946 /*
5947   Moves the last bunch of rows from the pending Rows event to a cache (either
5948   transactional cache if is_transaction is @c true, or the non-transactional
5949   cache otherwise. Sets a new pending event.
5950 
5951   @param thd               a pointer to the user thread.
5952   @param evt               a pointer to the row event.
5953   @param is_transactional  @c true indicates a transactional cache,
5954                            otherwise @c false a non-transactional.
5955 */
5956 int
flush_and_set_pending_rows_event(THD * thd,Rows_log_event * event,bool is_transactional)5957 MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
5958                                                 Rows_log_event* event,
5959                                                 bool is_transactional)
5960 {
5961   DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)");
5962   DBUG_ASSERT(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open());
5963   DBUG_PRINT("enter", ("event: %p", event));
5964 
5965   binlog_cache_mngr *const cache_mngr=
5966     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5967 
5968   DBUG_ASSERT(cache_mngr);
5969 
5970   binlog_cache_data *cache_data=
5971     cache_mngr->get_binlog_cache_data(use_trans_cache(thd, is_transactional));
5972 
5973   DBUG_PRINT("info", ("cache_mngr->pending(): %p", cache_data->pending()));
5974 
5975   if (Rows_log_event* pending= cache_data->pending())
5976   {
5977     Log_event_writer writer(&cache_data->cache_log, cache_data);
5978 
5979     /*
5980       Write pending event to the cache.
5981     */
5982     DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
5983                     {DBUG_SET("+d,simulate_file_write_error");});
5984     if (writer.write(pending))
5985     {
5986       set_write_error(thd, is_transactional);
5987       if (check_write_error(thd) && cache_data &&
5988           stmt_has_updated_non_trans_table(thd))
5989         cache_data->set_incident();
5990       delete pending;
5991       cache_data->set_pending(NULL);
5992       DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
5993                       {DBUG_SET("-d,simulate_file_write_error");});
5994       DBUG_RETURN(1);
5995     }
5996 
5997     delete pending;
5998   }
5999 
6000   thd->binlog_set_pending_rows_event(event, is_transactional);
6001 
6002   DBUG_RETURN(0);
6003 }
6004 
6005 
6006 /* Generate a new global transaction ID, and write it to the binlog */
6007 
6008 bool
write_gtid_event(THD * thd,bool standalone,bool is_transactional,uint64 commit_id)6009 MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone,
6010                                 bool is_transactional, uint64 commit_id)
6011 {
6012   rpl_gtid gtid;
6013   uint32 domain_id;
6014   uint32 local_server_id;
6015   uint64 seq_no;
6016   int err;
6017   DBUG_ENTER("write_gtid_event");
6018   DBUG_PRINT("enter", ("standalone: %d", standalone));
6019 
6020 #ifdef WITH_WSREP
6021   if (WSREP(thd)                               &&
6022       (wsrep_thd_trx_seqno(thd) > 0)           &&
6023       wsrep_gtid_mode && !thd->variables.gtid_seq_no)
6024   {
6025     domain_id= wsrep_gtid_domain_id;
6026   } else {
6027 #endif /* WITH_WSREP */
6028   domain_id= thd->variables.gtid_domain_id;
6029 #ifdef WITH_WSREP
6030   }
6031 #endif /* WITH_WSREP */
6032   local_server_id= thd->variables.server_id;
6033   seq_no= thd->variables.gtid_seq_no;
6034 
6035   DBUG_ASSERT(local_server_id != 0);
6036 
6037   if (thd->variables.option_bits & OPTION_GTID_BEGIN)
6038   {
6039     DBUG_PRINT("error", ("OPTION_GTID_BEGIN is set. "
6040                          "Master and slave will have different GTID values"));
6041     /* Reset the flag, as we will write out a GTID anyway */
6042     thd->variables.option_bits&= ~OPTION_GTID_BEGIN;
6043   }
6044 
6045   /*
6046     Reset the session variable gtid_seq_no, to reduce the risk of accidentally
6047     producing a duplicate GTID.
6048   */
6049   thd->variables.gtid_seq_no= 0;
6050   if (seq_no != 0)
6051   {
6052     /* Use the specified sequence number. */
6053     gtid.domain_id= domain_id;
6054     gtid.server_id= local_server_id;
6055     gtid.seq_no= seq_no;
6056     err= rpl_global_gtid_binlog_state.update(&gtid, opt_gtid_strict_mode);
6057     if (err && thd->get_stmt_da()->sql_errno()==ER_GTID_STRICT_OUT_OF_ORDER)
6058       errno= ER_GTID_STRICT_OUT_OF_ORDER;
6059   }
6060   else
6061   {
6062     /* Allocate the next sequence number for the GTID. */
6063     err= rpl_global_gtid_binlog_state.update_with_next_gtid(domain_id,
6064                                                             local_server_id, &gtid);
6065     seq_no= gtid.seq_no;
6066   }
6067   if (err)
6068     DBUG_RETURN(true);
6069 
6070   thd->set_last_commit_gtid(gtid);
6071 
6072   Gtid_log_event gtid_event(thd, seq_no, domain_id, standalone,
6073                             LOG_EVENT_SUPPRESS_USE_F, is_transactional,
6074                             commit_id);
6075 
6076   /* Write the event to the binary log. */
6077   DBUG_ASSERT(this == &mysql_bin_log);
6078 
6079 #ifdef WITH_WSREP
6080   if (wsrep_gtid_mode && is_gtid_cached(thd))
6081     DBUG_RETURN(false);
6082 #endif
6083 
6084   if (write_event(&gtid_event))
6085     DBUG_RETURN(true);
6086   status_var_add(thd->status_var.binlog_bytes_written, gtid_event.data_written);
6087 
6088   DBUG_RETURN(false);
6089 }
6090 
6091 
6092 int
write_state_to_file()6093 MYSQL_BIN_LOG::write_state_to_file()
6094 {
6095   File file_no;
6096   IO_CACHE cache;
6097   char buf[FN_REFLEN];
6098   int err;
6099   bool opened= false;
6100   bool log_inited= false;
6101 
6102   fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
6103             MY_UNPACK_FILENAME);
6104   if ((file_no= mysql_file_open(key_file_binlog_state, buf,
6105                                 O_RDWR|O_CREAT|O_TRUNC|O_BINARY,
6106                                 MYF(MY_WME))) < 0)
6107   {
6108     err= 1;
6109     goto err;
6110   }
6111   opened= true;
6112   if ((err= init_io_cache(&cache, file_no, IO_SIZE, WRITE_CACHE, 0, 0,
6113                            MYF(MY_WME|MY_WAIT_IF_FULL))))
6114     goto err;
6115   log_inited= true;
6116   if ((err= rpl_global_gtid_binlog_state.write_to_iocache(&cache)))
6117     goto err;
6118   log_inited= false;
6119   if ((err= end_io_cache(&cache)))
6120     goto err;
6121   if ((err= mysql_file_sync(file_no, MYF(MY_WME|MY_SYNC_FILESIZE))))
6122     goto err;
6123   goto end;
6124 
6125 err:
6126   sql_print_error("Error writing binlog state to file '%s'.", buf);
6127   if (log_inited)
6128     end_io_cache(&cache);
6129 end:
6130   if (opened)
6131     mysql_file_close(file_no, MYF(0));
6132 
6133   return err;
6134 }
6135 
6136 
6137 /*
6138   Initialize the binlog state from the master-bin.state file, at server startup.
6139 
6140   Returns:
6141     0 for success.
6142     2 for when .state file did not exist.
6143     1 for other error.
6144 */
6145 int
read_state_from_file()6146 MYSQL_BIN_LOG::read_state_from_file()
6147 {
6148   File file_no;
6149   IO_CACHE cache;
6150   char buf[FN_REFLEN];
6151   int err;
6152   bool opened= false;
6153   bool log_inited= false;
6154 
6155   fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
6156             MY_UNPACK_FILENAME);
6157   if ((file_no= mysql_file_open(key_file_binlog_state, buf,
6158                                 O_RDONLY|O_BINARY, MYF(0))) < 0)
6159   {
6160     if (my_errno != ENOENT)
6161     {
6162       err= 1;
6163       goto err;
6164     }
6165     else
6166     {
6167       /*
6168         If the state file does not exist, this is the first server startup
6169         with GTID enabled. So initialize to empty state.
6170       */
6171       rpl_global_gtid_binlog_state.reset();
6172       err= 2;
6173       goto end;
6174     }
6175   }
6176   opened= true;
6177   if ((err= init_io_cache(&cache, file_no, IO_SIZE, READ_CACHE, 0, 0,
6178                           MYF(MY_WME|MY_WAIT_IF_FULL))))
6179     goto err;
6180   log_inited= true;
6181   if ((err= rpl_global_gtid_binlog_state.read_from_iocache(&cache)))
6182     goto err;
6183   goto end;
6184 
6185 err:
6186   sql_print_error("Error reading binlog GTID state from file '%s'.", buf);
6187 end:
6188   if (log_inited)
6189     end_io_cache(&cache);
6190   if (opened)
6191     mysql_file_close(file_no, MYF(0));
6192 
6193   return err;
6194 }
6195 
6196 
6197 int
get_most_recent_gtid_list(rpl_gtid ** list,uint32 * size)6198 MYSQL_BIN_LOG::get_most_recent_gtid_list(rpl_gtid **list, uint32 *size)
6199 {
6200   return rpl_global_gtid_binlog_state.get_most_recent_gtid_list(list, size);
6201 }
6202 
6203 
6204 bool
append_state_pos(String * str)6205 MYSQL_BIN_LOG::append_state_pos(String *str)
6206 {
6207   return rpl_global_gtid_binlog_state.append_pos(str);
6208 }
6209 
6210 
6211 bool
append_state(String * str)6212 MYSQL_BIN_LOG::append_state(String *str)
6213 {
6214   return rpl_global_gtid_binlog_state.append_state(str);
6215 }
6216 
6217 
6218 bool
is_empty_state()6219 MYSQL_BIN_LOG::is_empty_state()
6220 {
6221   return (rpl_global_gtid_binlog_state.count() == 0);
6222 }
6223 
6224 
6225 bool
find_in_binlog_state(uint32 domain_id,uint32 server_id_arg,rpl_gtid * out_gtid)6226 MYSQL_BIN_LOG::find_in_binlog_state(uint32 domain_id, uint32 server_id_arg,
6227                                     rpl_gtid *out_gtid)
6228 {
6229   rpl_gtid *gtid;
6230   if ((gtid= rpl_global_gtid_binlog_state.find(domain_id, server_id_arg)))
6231     *out_gtid= *gtid;
6232   return gtid != NULL;
6233 }
6234 
6235 
6236 bool
lookup_domain_in_binlog_state(uint32 domain_id,rpl_gtid * out_gtid)6237 MYSQL_BIN_LOG::lookup_domain_in_binlog_state(uint32 domain_id,
6238                                              rpl_gtid *out_gtid)
6239 {
6240   rpl_gtid *found_gtid;
6241 
6242   if ((found_gtid= rpl_global_gtid_binlog_state.find_most_recent(domain_id)))
6243   {
6244     *out_gtid= *found_gtid;
6245     return true;
6246   }
6247 
6248   return false;
6249 }
6250 
6251 
6252 int
bump_seq_no_counter_if_needed(uint32 domain_id,uint64 seq_no)6253 MYSQL_BIN_LOG::bump_seq_no_counter_if_needed(uint32 domain_id, uint64 seq_no)
6254 {
6255   return rpl_global_gtid_binlog_state.bump_seq_no_if_needed(domain_id, seq_no);
6256 }
6257 
6258 
6259 bool
check_strict_gtid_sequence(uint32 domain_id,uint32 server_id_arg,uint64 seq_no)6260 MYSQL_BIN_LOG::check_strict_gtid_sequence(uint32 domain_id,
6261                                           uint32 server_id_arg,
6262                                           uint64 seq_no)
6263 {
6264   return rpl_global_gtid_binlog_state.check_strict_sequence(domain_id,
6265                                                             server_id_arg,
6266                                                             seq_no);
6267 }
6268 
6269 
6270 /**
6271   Write an event to the binary log. If with_annotate != NULL and
6272   *with_annotate = TRUE write also Annotate_rows before the event
6273   (this should happen only if the event is a Table_map).
6274 */
6275 
write(Log_event * event_info,my_bool * with_annotate)6276 bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
6277 {
6278   THD *thd= event_info->thd;
6279   bool error= 1;
6280   binlog_cache_data *cache_data= 0;
6281   bool is_trans_cache= FALSE;
6282   bool using_trans= event_info->use_trans_cache();
6283   bool direct= event_info->use_direct_logging();
6284   ulong UNINIT_VAR(prev_binlog_id);
6285   DBUG_ENTER("MYSQL_BIN_LOG::write(Log_event *)");
6286 
6287   /*
6288     When binary logging is not enabled (--log-bin=0), wsrep-patch partially
6289     enables it without opening the binlog file (MYSQL_BIN_LOG::open().
6290     So, avoid writing to binlog file.
6291   */
6292   if (direct &&
6293       (wsrep_emulate_bin_log ||
6294        (WSREP(thd) && !(thd->variables.option_bits & OPTION_BIN_LOG))))
6295     DBUG_RETURN(0);
6296 
6297   if (thd->variables.option_bits & OPTION_GTID_BEGIN)
6298   {
6299     DBUG_PRINT("info", ("OPTION_GTID_BEGIN was set"));
6300     /* Wait for commit from binary log before we commit */
6301     direct= 0;
6302     using_trans= 1;
6303   }
6304 
6305   if (thd->binlog_evt_union.do_union)
6306   {
6307     /*
6308       In Stored function; Remember that function call caused an update.
6309       We will log the function call to the binary log on function exit
6310     */
6311     thd->binlog_evt_union.unioned_events= TRUE;
6312     thd->binlog_evt_union.unioned_events_trans |= using_trans;
6313     DBUG_RETURN(0);
6314   }
6315 
6316   /*
6317     We only end the statement if we are in a top-level statement.  If
6318     we are inside a stored function, we do not end the statement since
6319     this will close all tables on the slave. But there can be a special case
6320     where we are inside a stored function/trigger and a SAVEPOINT is being
6321     set in side the stored function/trigger. This SAVEPOINT execution will
6322     force the pending event to be flushed without an STMT_END_F flag. This
6323     will result in a case where following DMLs will be considered as part of
6324     same statement and result in data loss on slave. Hence in this case we
6325     force the end_stmt to be true.
6326   */
6327   bool const end_stmt= (thd->in_sub_stmt && thd->lex->sql_command ==
6328                         SQLCOM_SAVEPOINT) ? true :
6329     (thd->locked_tables_mode && thd->lex->requires_prelocking());
6330   if (thd->binlog_flush_pending_rows_event(end_stmt, using_trans))
6331     DBUG_RETURN(error);
6332 
6333   /*
6334      In most cases this is only called if 'is_open()' is true; in fact this is
6335      mostly called if is_open() *was* true a few instructions before, but it
6336      could have changed since.
6337   */
6338   /* applier and replayer can skip writing binlog events */
6339   if ((WSREP_EMULATE_BINLOG(thd) &&
6340        IF_WSREP(thd->wsrep_cs().mode() == wsrep::client_state::m_local, 0)) || is_open())
6341   {
6342     my_off_t UNINIT_VAR(my_org_b_tell);
6343 #ifdef HAVE_REPLICATION
6344     /*
6345       In the future we need to add to the following if tests like
6346       "do the involved tables match (to be implemented)
6347       binlog_[wild_]{do|ignore}_table?" (WL#1049)"
6348     */
6349     const char *local_db= event_info->get_db();
6350 
6351     bool option_bin_log_flag= (thd->variables.option_bits & OPTION_BIN_LOG);
6352 
6353     /*
6354       Log all updates to binlog cache so that they can get replicated to other
6355       nodes. A check has been added to stop them from getting logged into
6356       binary log files.
6357     */
6358     if (WSREP(thd)) option_bin_log_flag= true;
6359 
6360     if ((!(option_bin_log_flag)) ||
6361 	(thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT &&
6362          thd->lex->sql_command != SQLCOM_SAVEPOINT &&
6363          !binlog_filter->db_ok(local_db)))
6364       DBUG_RETURN(0);
6365 #endif /* HAVE_REPLICATION */
6366 
6367     IO_CACHE *file= NULL;
6368 
6369     if (direct)
6370     {
6371       /* We come here only for incident events */
6372       int res;
6373       uint64 commit_id= 0;
6374       MDL_request mdl_request;
6375       DBUG_PRINT("info", ("direct is set"));
6376       DBUG_ASSERT(!thd->backup_commit_lock);
6377 
6378       mdl_request.init(MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT, MDL_EXPLICIT);
6379       if (thd->mdl_context.acquire_lock(&mdl_request,
6380                                         thd->variables.lock_wait_timeout))
6381         DBUG_RETURN(1);
6382       thd->backup_commit_lock= &mdl_request;
6383 
6384       if ((res= thd->wait_for_prior_commit()))
6385       {
6386         if (mdl_request.ticket)
6387           thd->mdl_context.release_lock(mdl_request.ticket);
6388         thd->backup_commit_lock= 0;
6389         DBUG_RETURN(res);
6390       }
6391       file= &log_file;
6392       my_org_b_tell= my_b_tell(file);
6393       mysql_mutex_lock(&LOCK_log);
6394       prev_binlog_id= current_binlog_id;
6395       DBUG_EXECUTE_IF("binlog_force_commit_id",
6396         {
6397           const LEX_CSTRING commit_name= { STRING_WITH_LEN("commit_id") };
6398           bool null_value;
6399           user_var_entry *entry=
6400             (user_var_entry*) my_hash_search(&thd->user_vars,
6401                                              (uchar*) commit_name.str,
6402                                              commit_name.length);
6403           commit_id= entry->val_int(&null_value);
6404         });
6405       res= write_gtid_event(thd, true, using_trans, commit_id);
6406       if (mdl_request.ticket)
6407         thd->mdl_context.release_lock(mdl_request.ticket);
6408       thd->backup_commit_lock= 0;
6409       if (res)
6410         goto err;
6411     }
6412     else
6413     {
6414       binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
6415       if (!cache_mngr)
6416         goto err;
6417 
6418       is_trans_cache= use_trans_cache(thd, using_trans);
6419       cache_data= cache_mngr->get_binlog_cache_data(is_trans_cache);
6420       file= &cache_data->cache_log;
6421 
6422       if (thd->lex->stmt_accessed_non_trans_temp_table() && is_trans_cache)
6423         thd->transaction.stmt.mark_modified_non_trans_temp_table();
6424       thd->binlog_start_trans_and_stmt();
6425     }
6426     DBUG_PRINT("info",("event type: %d",event_info->get_type_code()));
6427 
6428     /*
6429        No check for auto events flag here - this write method should
6430        never be called if auto-events are enabled.
6431 
6432        Write first log events which describe the 'run environment'
6433        of the SQL command. If row-based binlogging, Insert_id, Rand
6434        and other kind of "setting context" events are not needed.
6435     */
6436 
6437     if (with_annotate && *with_annotate)
6438     {
6439       DBUG_ASSERT(event_info->get_type_code() == TABLE_MAP_EVENT);
6440       Annotate_rows_log_event anno(thd, using_trans, direct);
6441       /* Annotate event should be written not more than once */
6442       *with_annotate= 0;
6443       if (write_event(&anno, cache_data, file))
6444         goto err;
6445     }
6446 
6447     {
6448       if (!thd->is_current_stmt_binlog_format_row())
6449       {
6450         if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
6451         {
6452           Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
6453                              thd->first_successful_insert_id_in_prev_stmt_for_binlog,
6454                              using_trans, direct);
6455           if (write_event(&e, cache_data, file))
6456             goto err;
6457         }
6458         if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
6459         {
6460           DBUG_PRINT("info",("number of auto_inc intervals: %u",
6461                              thd->auto_inc_intervals_in_cur_stmt_for_binlog.
6462                              nb_elements()));
6463           Intvar_log_event e(thd, (uchar) INSERT_ID_EVENT,
6464                              thd->auto_inc_intervals_in_cur_stmt_for_binlog.
6465                              minimum(), using_trans, direct);
6466           if (write_event(&e, cache_data, file))
6467             goto err;
6468         }
6469         if (thd->rand_used)
6470         {
6471           Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2,
6472                            using_trans, direct);
6473           if (write_event(&e, cache_data, file))
6474             goto err;
6475         }
6476         if (thd->user_var_events.elements)
6477         {
6478           for (uint i= 0; i < thd->user_var_events.elements; i++)
6479           {
6480             BINLOG_USER_VAR_EVENT *user_var_event;
6481             get_dynamic(&thd->user_var_events,(uchar*) &user_var_event, i);
6482 
6483             /* setting flags for user var log event */
6484             uchar flags= User_var_log_event::UNDEF_F;
6485             if (user_var_event->unsigned_flag)
6486               flags|= User_var_log_event::UNSIGNED_F;
6487 
6488             User_var_log_event e(thd, user_var_event->user_var_event->name.str,
6489                                  user_var_event->user_var_event->name.length,
6490                                  user_var_event->value,
6491                                  user_var_event->length,
6492                                  user_var_event->type,
6493                                  user_var_event->charset_number,
6494                                  flags,
6495                                  using_trans,
6496                                  direct);
6497             if (write_event(&e, cache_data, file))
6498               goto err;
6499           }
6500         }
6501       }
6502     }
6503 
6504     /*
6505       Write the event.
6506     */
6507     if (write_event(event_info, cache_data, file) ||
6508         DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
6509       goto err;
6510 
6511     error= 0;
6512 err:
6513     if (direct)
6514     {
6515       my_off_t offset= my_b_tell(file);
6516       bool check_purge= false;
6517       DBUG_ASSERT(!is_relay_log);
6518 
6519       if (likely(!error))
6520       {
6521         bool synced;
6522 
6523         if ((error= flush_and_sync(&synced)))
6524         {
6525         }
6526         else
6527         {
6528           mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
6529           mysql_mutex_assert_owner(&LOCK_log);
6530           mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync);
6531           mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
6532 #ifdef HAVE_REPLICATION
6533           if (repl_semisync_master.report_binlog_update(thd, log_file_name,
6534                                                         file->pos_in_file))
6535           {
6536             sql_print_error("Failed to run 'after_flush' hooks");
6537             error= 1;
6538           }
6539           else
6540 #endif
6541           {
6542             /*
6543               update binlog_end_pos so it can be read by dump thread
6544               note: must be _after_ the RUN_HOOK(after_flush) or else
6545               semi-sync might not have put the transaction into
6546               it's list before dump-thread tries to send it
6547             */
6548             update_binlog_end_pos(offset);
6549             if (unlikely((error= rotate(false, &check_purge))))
6550               check_purge= false;
6551           }
6552         }
6553       }
6554 
6555       status_var_add(thd->status_var.binlog_bytes_written,
6556                      offset - my_org_b_tell);
6557 
6558       mysql_mutex_lock(&LOCK_after_binlog_sync);
6559       mysql_mutex_unlock(&LOCK_log);
6560 
6561       mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
6562       mysql_mutex_assert_not_owner(&LOCK_log);
6563       mysql_mutex_assert_owner(&LOCK_after_binlog_sync);
6564       mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
6565 #ifdef HAVE_REPLICATION
6566       if (repl_semisync_master.wait_after_sync(log_file_name,
6567                                                file->pos_in_file))
6568       {
6569         error=1;
6570         /* error is already printed inside hook */
6571       }
6572 #endif
6573 
6574       /*
6575         Take mutex to protect against a reader seeing partial writes of 64-bit
6576         offset on 32-bit CPUs.
6577       */
6578       mysql_mutex_lock(&LOCK_commit_ordered);
6579       mysql_mutex_unlock(&LOCK_after_binlog_sync);
6580       last_commit_pos_offset= offset;
6581       mysql_mutex_unlock(&LOCK_commit_ordered);
6582 
6583       if (check_purge)
6584         checkpoint_and_purge(prev_binlog_id);
6585     }
6586 
6587     if (unlikely(error))
6588     {
6589       set_write_error(thd, is_trans_cache);
6590       if (check_write_error(thd) && cache_data &&
6591           stmt_has_updated_non_trans_table(thd))
6592         cache_data->set_incident();
6593     }
6594   }
6595 
6596   DBUG_RETURN(error);
6597 }
6598 
6599 
error_log_print(enum loglevel level,const char * format,va_list args)6600 int error_log_print(enum loglevel level, const char *format,
6601                     va_list args)
6602 {
6603   return logger.error_log_print(level, format, args);
6604 }
6605 
6606 
slow_log_print(THD * thd,const char * query,uint query_length,ulonglong current_utime)6607 bool slow_log_print(THD *thd, const char *query, uint query_length,
6608                     ulonglong current_utime)
6609 {
6610   return logger.slow_log_print(thd, query, query_length, current_utime);
6611 }
6612 
6613 
6614 /**
6615   Decide if we should log the command to general log
6616 
6617   @retval
6618      FALSE  No logging
6619      TRUE   Ok to log
6620 */
6621 
log_command(THD * thd,enum enum_server_command command)6622 bool LOGGER::log_command(THD *thd, enum enum_server_command command)
6623 {
6624   /*
6625     Log command if we have at least one log event handler enabled and want
6626     to log this king of commands
6627   */
6628   if (!(*general_log_handler_list && (what_to_log & (1L << (uint) command))))
6629     return FALSE;
6630 
6631   /*
6632     If LOG_SLOW_DISABLE_SLAVE is set when slave thread starts, then
6633     OPTION_LOG_OFF is set.
6634     Only the super user can set this bit.
6635   */
6636   return !(thd->variables.option_bits & OPTION_LOG_OFF);
6637 }
6638 
6639 
general_log_print(THD * thd,enum enum_server_command command,const char * format,...)6640 bool general_log_print(THD *thd, enum enum_server_command command,
6641                        const char *format, ...)
6642 {
6643   va_list args;
6644   uint error= 0;
6645 
6646   /* Print the message to the buffer if we want to log this kind of commands */
6647   if (! logger.log_command(thd, command))
6648     return FALSE;
6649 
6650   va_start(args, format);
6651   error= logger.general_log_print(thd, command, format, args);
6652   va_end(args);
6653 
6654   return error;
6655 }
6656 
general_log_write(THD * thd,enum enum_server_command command,const char * query,size_t query_length)6657 bool general_log_write(THD *thd, enum enum_server_command command,
6658                        const char *query, size_t query_length)
6659 {
6660   /* Write the message to the log if we want to log this king of commands */
6661   if (logger.log_command(thd, command) || mysql_audit_general_enabled())
6662     return logger.general_log_write(thd, command, query, query_length);
6663 
6664   return FALSE;
6665 }
6666 
6667 
6668 static void
binlog_checkpoint_callback(void * cookie)6669 binlog_checkpoint_callback(void *cookie)
6670 {
6671   MYSQL_BIN_LOG::xid_count_per_binlog *entry=
6672     (MYSQL_BIN_LOG::xid_count_per_binlog *)cookie;
6673   /*
6674     For every supporting engine, we increment the xid_count and issue a
6675     commit_checkpoint_request(). Then we can count when all
6676     commit_checkpoint_notify() callbacks have occurred, and then log a new
6677     binlog checkpoint event.
6678   */
6679   mysql_bin_log.mark_xids_active(entry->binlog_id, 1);
6680 }
6681 
6682 
6683 /*
6684   Request a commit checkpoint from each supporting engine.
6685   This must be called after each binlog rotate, and after LOCK_log has been
6686   released. The xid_count value in the xid_count_per_binlog entry was
6687   incremented by 1 and will be decremented in this function; this ensures
6688   that the entry will not go away early despite LOCK_log not being held.
6689 */
6690 void
do_checkpoint_request(ulong binlog_id)6691 MYSQL_BIN_LOG::do_checkpoint_request(ulong binlog_id)
6692 {
6693   xid_count_per_binlog *entry;
6694 
6695   /*
6696     Find the binlog entry, and invoke commit_checkpoint_request() on it in
6697     each supporting storage engine.
6698   */
6699   mysql_mutex_lock(&LOCK_xid_list);
6700   I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
6701   do {
6702     entry= it++;
6703     DBUG_ASSERT(entry /* binlog_id is always somewhere in the list. */);
6704   } while (entry->binlog_id != binlog_id);
6705   mysql_mutex_unlock(&LOCK_xid_list);
6706 
6707   ha_commit_checkpoint_request(entry, binlog_checkpoint_callback);
6708   /*
6709     When we rotated the binlog, we incremented xid_count to make sure the
6710     entry would not go away until this point, where we have done all necessary
6711     commit_checkpoint_request() calls.
6712     So now we can (and must) decrease the count - when it reaches zero, we
6713     will know that both all pending unlog() and all pending
6714     commit_checkpoint_notify() calls are done, and we can log a new binlog
6715     checkpoint.
6716   */
6717   mark_xid_done(binlog_id, true);
6718 }
6719 
6720 
6721 /**
6722   The method executes rotation when LOCK_log is already acquired
6723   by the caller.
6724 
6725   @param force_rotate  caller can request the log rotation
6726   @param check_purge   is set to true if rotation took place
6727 
6728   @note
6729     Caller _must_ check the check_purge variable. If this is set, it means
6730     that the binlog was rotated, and caller _must_ ensure that
6731     do_checkpoint_request() is called later with the binlog_id of the rotated
6732     binlog file. The call to do_checkpoint_request() must happen after
6733     LOCK_log is released (which is why we cannot simply do it here).
6734     Usually, checkpoint_and_purge() is appropriate, as it will both handle
6735     the checkpointing and any needed purging of old logs.
6736 
6737   @note
6738     If rotation fails, for instance the server was unable
6739     to create a new log file, we still try to write an
6740     incident event to the current log.
6741 
6742   @retval
6743     nonzero - error in rotating routine.
6744 */
rotate(bool force_rotate,bool * check_purge)6745 int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge)
6746 {
6747   int error= 0;
6748   DBUG_ENTER("MYSQL_BIN_LOG::rotate");
6749 
6750 #ifdef WITH_WSREP
6751   if (WSREP_ON && wsrep_to_isolation)
6752   {
6753     *check_purge= false;
6754     WSREP_DEBUG("avoiding binlog rotate due to TO isolation: %d",
6755                 wsrep_to_isolation);
6756     DBUG_RETURN(0);
6757   }
6758 #endif /* WITH_WSREP */
6759 
6760   //todo: fix the macro def and restore safe_mutex_assert_owner(&LOCK_log);
6761   *check_purge= false;
6762 
6763   if (force_rotate || (my_b_tell(&log_file) >= (my_off_t) max_size))
6764   {
6765     ulong binlog_id= current_binlog_id;
6766     /*
6767       We rotate the binlog, so we need to start a commit checkpoint in all
6768       supporting engines - when it finishes, we can log a new binlog checkpoint
6769       event.
6770 
6771       But we cannot start the checkpoint here - there could be a group commit
6772       still in progress which needs to be included in the checkpoint, and
6773       besides we do not want to do the (possibly expensive) checkpoint while
6774       LOCK_log is held.
6775 
6776       On the other hand, we must be sure that the xid_count entry for the
6777       previous log does not go away until we start the checkpoint - which it
6778       could do as it is no longer the most recent. So we increment xid_count
6779       (to count the pending checkpoint request) - this will fix the entry in
6780       place until we decrement again in do_checkpoint_request().
6781     */
6782     mark_xids_active(binlog_id, 1);
6783 
6784     if (unlikely((error= new_file_without_locking())))
6785     {
6786       /**
6787          Be conservative... There are possible lost events (eg,
6788          failing to log the Execute_load_query_log_event
6789          on a LOAD DATA while using a non-transactional
6790          table)!
6791 
6792          We give it a shot and try to write an incident event anyway
6793          to the current log.
6794       */
6795       if (!write_incident_already_locked(current_thd))
6796         flush_and_sync(0);
6797 
6798       /*
6799         We failed to rotate - so we have to decrement the xid_count back that
6800         we incremented before attempting the rotate.
6801       */
6802       mark_xid_done(binlog_id, false);
6803     }
6804     else
6805       *check_purge= true;
6806   }
6807   DBUG_RETURN(error);
6808 }
6809 
6810 /**
6811   The method executes logs purging routine.
6812 
6813   @retval
6814     nonzero - error in rotating routine.
6815 */
purge()6816 void MYSQL_BIN_LOG::purge()
6817 {
6818   mysql_mutex_assert_not_owner(&LOCK_log);
6819 #ifdef HAVE_REPLICATION
6820   if (expire_logs_days)
6821   {
6822     DEBUG_SYNC(current_thd, "at_purge_logs_before_date");
6823     time_t purge_time= my_time(0) - expire_logs_days*24*60*60;
6824     if (purge_time >= 0)
6825     {
6826       purge_logs_before_date(purge_time);
6827     }
6828     DEBUG_SYNC(current_thd, "after_purge_logs_before_date");
6829   }
6830 #endif
6831 }
6832 
6833 
checkpoint_and_purge(ulong binlog_id)6834 void MYSQL_BIN_LOG::checkpoint_and_purge(ulong binlog_id)
6835 {
6836   do_checkpoint_request(binlog_id);
6837   purge();
6838 }
6839 
6840 
6841 /**
6842   Searches for the first (oldest) binlog file name in in the binlog index.
6843 
6844   @param[in,out]  buf_arg  pointer to a buffer to hold found
6845                            the first binary log file name
6846   @return         NULL     on success, otherwise error message
6847 */
get_first_binlog(char * buf_arg)6848 static const char* get_first_binlog(char* buf_arg)
6849 {
6850   IO_CACHE *index_file;
6851   size_t length;
6852   char fname[FN_REFLEN];
6853   const char* errmsg= NULL;
6854 
6855   DBUG_ENTER("get_first_binlog");
6856 
6857   DBUG_ASSERT(mysql_bin_log.is_open());
6858 
6859   mysql_bin_log.lock_index();
6860 
6861   index_file=mysql_bin_log.get_index_file();
6862   if (reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 0))
6863   {
6864     errmsg= "failed to create a cache on binlog index";
6865     goto end;
6866   }
6867   /* The file ends with EOF or empty line */
6868   if ((length=my_b_gets(index_file, fname, sizeof(fname))) <= 1)
6869   {
6870     errmsg= "empty binlog index";
6871     goto end;
6872   }
6873   else
6874   {
6875     fname[length-1]= 0;                         // Remove end \n
6876   }
6877   if (normalize_binlog_name(buf_arg, fname, false))
6878   {
6879     errmsg= "could not normalize the first file name in the binlog index";
6880     goto end;
6881   }
6882 end:
6883   mysql_bin_log.unlock_index();
6884 
6885   DBUG_RETURN(errmsg);
6886 }
6887 
6888 /**
6889   Check weather the gtid binlog state can safely remove gtid
6890   domains passed as the argument. A safety condition is satisfied when
6891   there are no events from the being deleted domains in the currently existing
6892   binlog files. Upon successful check the supplied domains are removed
6893   from @@gtid_binlog_state. The caller is supposed to rotate binlog so that
6894   the active latest file won't have the deleted domains in its Gtid_list header.
6895 
6896   @param  domain_drop_lex  gtid domain id sequence from lex.
6897                            Passed as a pointer to dynamic array must be not empty
6898                            unless pointer value NULL.
6899   @retval zero             on success
6900   @retval > 0              ineffective call none from the *non* empty
6901                            gtid domain sequence is deleted
6902   @retval < 0              on error
6903 */
do_delete_gtid_domain(DYNAMIC_ARRAY * domain_drop_lex)6904 static int do_delete_gtid_domain(DYNAMIC_ARRAY *domain_drop_lex)
6905 {
6906   int rc= 0;
6907   Gtid_list_log_event *glev= NULL;
6908   char buf[FN_REFLEN];
6909   File file;
6910   IO_CACHE cache;
6911   const char* errmsg= NULL;
6912   char errbuf[MYSQL_ERRMSG_SIZE]= {0};
6913 
6914   if (!domain_drop_lex)
6915     return 0; // still "effective" having empty domain sequence to delete
6916 
6917   DBUG_ASSERT(domain_drop_lex->elements > 0);
6918   mysql_mutex_assert_owner(mysql_bin_log.get_log_lock());
6919 
6920   if ((errmsg= get_first_binlog(buf)) != NULL)
6921     goto end;
6922   bzero((char*) &cache, sizeof(cache));
6923   if ((file= open_binlog(&cache, buf, &errmsg)) == (File) -1)
6924     goto end;
6925   errmsg= get_gtid_list_event(&cache, &glev);
6926   end_io_cache(&cache);
6927   mysql_file_close(file, MYF(MY_WME));
6928 
6929   DBUG_EXECUTE_IF("inject_binlog_delete_domain_init_error",
6930                   errmsg= "injected error";);
6931   if (errmsg)
6932     goto end;
6933   errmsg= rpl_global_gtid_binlog_state.drop_domain(domain_drop_lex,
6934                                                    glev, errbuf);
6935 
6936 end:
6937   if (errmsg)
6938   {
6939     if (strlen(errmsg) > 0)
6940     {
6941       my_error(ER_BINLOG_CANT_DELETE_GTID_DOMAIN, MYF(0), errmsg);
6942       rc= -1;
6943     }
6944     else
6945     {
6946       rc= 1;
6947     }
6948   }
6949   delete glev;
6950 
6951   return rc;
6952 }
6953 
6954 /**
6955   The method is a shortcut of @c rotate() and @c purge().
6956   LOCK_log is acquired prior to rotate and is released after it.
6957 
6958   @param force_rotate  caller can request the log rotation
6959 
6960   @retval
6961     nonzero - error in rotating routine.
6962 */
rotate_and_purge(bool force_rotate,DYNAMIC_ARRAY * domain_drop_lex)6963 int MYSQL_BIN_LOG::rotate_and_purge(bool force_rotate,
6964                                     DYNAMIC_ARRAY *domain_drop_lex)
6965 {
6966   int err_gtid=0, error= 0;
6967   ulong prev_binlog_id;
6968   DBUG_ENTER("MYSQL_BIN_LOG::rotate_and_purge");
6969   bool check_purge= false;
6970 
6971   mysql_mutex_lock(&LOCK_log);
6972 
6973   DEBUG_SYNC(current_thd, "rotate_after_acquire_LOCK_log");
6974 
6975   prev_binlog_id= current_binlog_id;
6976 
6977   if ((err_gtid= do_delete_gtid_domain(domain_drop_lex)))
6978   {
6979     // inffective attempt to delete merely skips rotate and purge
6980     if (err_gtid < 0)
6981       error= 1; // otherwise error is propagated the user
6982   }
6983   else if (unlikely((error= rotate(force_rotate, &check_purge))))
6984     check_purge= false;
6985 
6986   DEBUG_SYNC(current_thd, "rotate_after_rotate");
6987 
6988   /*
6989     NOTE: Run purge_logs wo/ holding LOCK_log because it does not need
6990           the mutex. Otherwise causes various deadlocks.
6991           Explicit binlog rotation must be synchronized with a concurrent
6992           binlog ordered commit, in particular not let binlog
6993           checkpoint notification request until early binlogged
6994           concurrent commits have has been completed.
6995   */
6996   mysql_mutex_lock(&LOCK_after_binlog_sync);
6997   mysql_mutex_unlock(&LOCK_log);
6998   mysql_mutex_lock(&LOCK_commit_ordered);
6999   mysql_mutex_unlock(&LOCK_after_binlog_sync);
7000   mysql_mutex_unlock(&LOCK_commit_ordered);
7001 
7002   if (check_purge)
7003     checkpoint_and_purge(prev_binlog_id);
7004 
7005   DBUG_RETURN(error);
7006 }
7007 
next_file_id()7008 uint MYSQL_BIN_LOG::next_file_id()
7009 {
7010   uint res;
7011   mysql_mutex_lock(&LOCK_log);
7012   res = file_id++;
7013   mysql_mutex_unlock(&LOCK_log);
7014   return res;
7015 }
7016 
7017 class CacheWriter: public Log_event_writer
7018 {
7019 public:
7020   size_t remains;
7021 
CacheWriter(THD * thd_arg,IO_CACHE * file_arg,bool do_checksum,Binlog_crypt_data * cr)7022   CacheWriter(THD *thd_arg, IO_CACHE *file_arg, bool do_checksum,
7023               Binlog_crypt_data *cr)
7024     : Log_event_writer(file_arg, 0, cr), remains(0), thd(thd_arg),
7025       first(true)
7026   { checksum_len= do_checksum ? BINLOG_CHECKSUM_LEN : 0; }
7027 
~CacheWriter()7028   ~CacheWriter()
7029   { status_var_add(thd->status_var.binlog_bytes_written, bytes_written); }
7030 
write(uchar * pos,size_t len)7031   int write(uchar* pos, size_t len)
7032   {
7033     DBUG_ENTER("CacheWriter::write");
7034     if (first)
7035       write_header(pos, len);
7036     else
7037       write_data(pos, len);
7038 
7039     remains -= len;
7040     if ((first= !remains))
7041       write_footer();
7042     DBUG_RETURN(0);
7043   }
7044 private:
7045   THD *thd;
7046   bool first;
7047 };
7048 
7049 /*
7050   Write the contents of a cache to the binary log.
7051 
7052   SYNOPSIS
7053     write_cache()
7054     thd      Current_thread
7055     cache    Cache to write to the binary log
7056 
7057   DESCRIPTION
7058     Write the contents of the cache to the binary log. The cache will
7059     be reset as a READ_CACHE to be able to read the contents from it.
7060 
7061     Reading from the trans cache with possible (per @c binlog_checksum_options)
7062     adding checksum value  and then fixing the length and the end_log_pos of
7063     events prior to fill in the binlog cache.
7064 */
7065 
write_cache(THD * thd,IO_CACHE * cache)7066 int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache)
7067 {
7068   DBUG_ENTER("MYSQL_BIN_LOG::write_cache");
7069 
7070   mysql_mutex_assert_owner(&LOCK_log);
7071   if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
7072     DBUG_RETURN(ER_ERROR_ON_WRITE);
7073   size_t length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
7074   size_t val;
7075   size_t end_log_pos_inc= 0; // each event processed adds BINLOG_CHECKSUM_LEN 2 t
7076   uchar header[LOG_EVENT_HEADER_LEN];
7077   CacheWriter writer(thd, &log_file, binlog_checksum_options, &crypto);
7078 
7079   if (crypto.scheme)
7080     writer.ctx= alloca(crypto.ctx_size);
7081 
7082   // while there is just one alg the following must hold:
7083   DBUG_ASSERT(binlog_checksum_options == BINLOG_CHECKSUM_ALG_OFF ||
7084               binlog_checksum_options == BINLOG_CHECKSUM_ALG_CRC32);
7085 
7086   /*
7087     The events in the buffer have incorrect end_log_pos data
7088     (relative to beginning of group rather than absolute),
7089     so we'll recalculate them in situ so the binlog is always
7090     correct, even in the middle of a group. This is possible
7091     because we now know the start position of the group (the
7092     offset of this cache in the log, if you will); all we need
7093     to do is to find all event-headers, and add the position of
7094     the group to the end_log_pos of each event.  This is pretty
7095     straight forward, except that we read the cache in segments,
7096     so an event-header might end up on the cache-border and get
7097     split.
7098   */
7099 
7100   group= (size_t)my_b_tell(&log_file);
7101   hdr_offs= carry= 0;
7102 
7103   do
7104   {
7105     /*
7106       if we only got a partial header in the last iteration,
7107       get the other half now and process a full header.
7108     */
7109     if (unlikely(carry > 0))
7110     {
7111       DBUG_ASSERT(carry < LOG_EVENT_HEADER_LEN);
7112       size_t tail= LOG_EVENT_HEADER_LEN - carry;
7113 
7114       /* assemble both halves */
7115       memcpy(&header[carry], (char *)cache->read_pos, tail);
7116 
7117       uint32 len= uint4korr(header + EVENT_LEN_OFFSET);
7118       writer.remains= len;
7119 
7120       /* fix end_log_pos */
7121       end_log_pos_inc += writer.checksum_len;
7122       val= uint4korr(header + LOG_POS_OFFSET) + group + end_log_pos_inc;
7123       int4store(header + LOG_POS_OFFSET, val);
7124 
7125       /* fix len */
7126       len+= writer.checksum_len;
7127       int4store(header + EVENT_LEN_OFFSET, len);
7128 
7129       if (writer.write(header, LOG_EVENT_HEADER_LEN))
7130         DBUG_RETURN(ER_ERROR_ON_WRITE);
7131 
7132       cache->read_pos+= tail;
7133       length-= tail;
7134       carry= 0;
7135 
7136       /* next event header at ... */
7137       hdr_offs= len - LOG_EVENT_HEADER_LEN - writer.checksum_len;
7138     }
7139 
7140     /* if there is anything to write, process it. */
7141 
7142     if (likely(length > 0))
7143     {
7144       DBUG_EXECUTE_IF("fail_binlog_write_1",
7145                       errno= 28; DBUG_RETURN(ER_ERROR_ON_WRITE););
7146       /*
7147         process all event-headers in this (partial) cache.
7148         if next header is beyond current read-buffer,
7149         we'll get it later (though not necessarily in the
7150         very next iteration, just "eventually").
7151       */
7152 
7153       if (hdr_offs >= length)
7154       {
7155         if (writer.write(cache->read_pos, length))
7156           DBUG_RETURN(ER_ERROR_ON_WRITE);
7157       }
7158 
7159       while (hdr_offs < length)
7160       {
7161         /*
7162           finish off with remains of the last event that crawls
7163           from previous into the current buffer
7164         */
7165         if (writer.remains != 0)
7166         {
7167           if (writer.write(cache->read_pos, hdr_offs))
7168             DBUG_RETURN(ER_ERROR_ON_WRITE);
7169         }
7170 
7171         /*
7172           partial header only? save what we can get, process once
7173           we get the rest.
7174         */
7175         if (hdr_offs + LOG_EVENT_HEADER_LEN > length)
7176         {
7177           carry= length - hdr_offs;
7178           memcpy(header, (char *)cache->read_pos + hdr_offs, carry);
7179           length= hdr_offs;
7180         }
7181         else
7182         {
7183           /* we've got a full event-header, and it came in one piece */
7184           uchar *ev= (uchar *)cache->read_pos + hdr_offs;
7185           uint ev_len= uint4korr(ev + EVENT_LEN_OFFSET); // netto len
7186           uchar *log_pos= ev + LOG_POS_OFFSET;
7187 
7188           end_log_pos_inc += writer.checksum_len;
7189           /* fix end_log_pos */
7190           val= uint4korr(log_pos) + group + end_log_pos_inc;
7191           int4store(log_pos, val);
7192 
7193           /* fix length */
7194           int4store(ev + EVENT_LEN_OFFSET, ev_len + writer.checksum_len);
7195 
7196           writer.remains= ev_len;
7197           if (writer.write(ev, MY_MIN(ev_len, length - hdr_offs)))
7198             DBUG_RETURN(ER_ERROR_ON_WRITE);
7199 
7200           /* next event header at ... */
7201           hdr_offs += ev_len; // incr by the netto len
7202 
7203           DBUG_ASSERT(!writer.checksum_len || writer.remains == 0 || hdr_offs >= length);
7204         }
7205       }
7206 
7207       /*
7208         Adjust hdr_offs. Note that it may still point beyond the segment
7209         read in the next iteration; if the current event is very long,
7210         it may take a couple of read-iterations (and subsequent adjustments
7211         of hdr_offs) for it to point into the then-current segment.
7212         If we have a split header (!carry), hdr_offs will be set at the
7213         beginning of the next iteration, overwriting the value we set here:
7214       */
7215       hdr_offs -= length;
7216     }
7217   } while ((length= my_b_fill(cache)));
7218 
7219   DBUG_ASSERT(carry == 0);
7220   DBUG_ASSERT(!writer.checksum_len || writer.remains == 0);
7221 
7222   DBUG_RETURN(0);                               // All OK
7223 }
7224 
7225 /*
7226   Helper function to get the error code of the query to be binlogged.
7227  */
query_error_code(THD * thd,bool not_killed)7228 int query_error_code(THD *thd, bool not_killed)
7229 {
7230   int error;
7231 
7232   if (not_killed || (killed_mask_hard(thd->killed) == KILL_BAD_DATA))
7233   {
7234     error= thd->is_error() ? thd->get_stmt_da()->sql_errno() : 0;
7235     if (!error)
7236       return error;
7237 
7238     /* thd->get_get_stmt_da()->sql_errno() might be ER_SERVER_SHUTDOWN or
7239        ER_QUERY_INTERRUPTED, So here we need to make sure that error
7240        is not set to these errors when specified not_killed by the
7241        caller.
7242     */
7243     if (error == ER_SERVER_SHUTDOWN || error == ER_QUERY_INTERRUPTED ||
7244         error == ER_NEW_ABORTING_CONNECTION || error == ER_CONNECTION_KILLED)
7245       error= 0;
7246   }
7247   else
7248   {
7249     /* killed status for DELAYED INSERT thread should never be used */
7250     DBUG_ASSERT(!(thd->system_thread & SYSTEM_THREAD_DELAYED_INSERT));
7251     error= thd->killed_errno();
7252   }
7253 
7254   return error;
7255 }
7256 
7257 
write_incident_already_locked(THD * thd)7258 bool MYSQL_BIN_LOG::write_incident_already_locked(THD *thd)
7259 {
7260   uint error= 0;
7261   DBUG_ENTER("MYSQL_BIN_LOG::write_incident_already_locked");
7262   Incident incident= INCIDENT_LOST_EVENTS;
7263   Incident_log_event ev(thd, incident, &write_error_msg);
7264 
7265   if (likely(is_open()))
7266   {
7267     error= write_event(&ev);
7268     status_var_add(thd->status_var.binlog_bytes_written, ev.data_written);
7269   }
7270 
7271   DBUG_RETURN(error);
7272 }
7273 
7274 
write_incident(THD * thd)7275 bool MYSQL_BIN_LOG::write_incident(THD *thd)
7276 {
7277   uint error= 0;
7278   my_off_t offset;
7279   bool check_purge= false;
7280   ulong prev_binlog_id;
7281   DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
7282 
7283   mysql_mutex_lock(&LOCK_log);
7284   if (likely(is_open()))
7285   {
7286     prev_binlog_id= current_binlog_id;
7287     if (likely(!(error= write_incident_already_locked(thd))) &&
7288         likely(!(error= flush_and_sync(0))))
7289     {
7290       update_binlog_end_pos();
7291       if (unlikely((error= rotate(false, &check_purge))))
7292         check_purge= false;
7293     }
7294 
7295     offset= my_b_tell(&log_file);
7296 
7297     update_binlog_end_pos(offset);
7298 
7299     /*
7300       Take mutex to protect against a reader seeing partial writes of 64-bit
7301       offset on 32-bit CPUs.
7302     */
7303     mysql_mutex_lock(&LOCK_commit_ordered);
7304     last_commit_pos_offset= offset;
7305     mysql_mutex_unlock(&LOCK_commit_ordered);
7306     mysql_mutex_unlock(&LOCK_log);
7307 
7308     if (check_purge)
7309       checkpoint_and_purge(prev_binlog_id);
7310   }
7311   else
7312   {
7313     mysql_mutex_unlock(&LOCK_log);
7314   }
7315 
7316   DBUG_RETURN(error);
7317 }
7318 
7319 void
write_binlog_checkpoint_event_already_locked(const char * name_arg,uint len)7320 MYSQL_BIN_LOG::write_binlog_checkpoint_event_already_locked(const char *name_arg, uint len)
7321 {
7322   my_off_t offset;
7323   Binlog_checkpoint_log_event ev(name_arg, len);
7324   /*
7325     Note that we must sync the binlog checkpoint to disk.
7326     Otherwise a subsequent log purge could delete binlogs that XA recovery
7327     thinks are needed (even though they are not really).
7328   */
7329   if (!write_event(&ev) && !flush_and_sync(0))
7330   {
7331     update_binlog_end_pos();
7332   }
7333   else
7334   {
7335     /*
7336       If we fail to write the checkpoint event, something is probably really
7337       bad with the binlog. We complain in the error log.
7338 
7339       Note that failure to write binlog checkpoint does not compromise the
7340       ability to do crash recovery - crash recovery will just have to scan a
7341       bit more of the binlog than strictly necessary.
7342     */
7343     sql_print_error("Failed to write binlog checkpoint event to binary log");
7344   }
7345 
7346   offset= my_b_tell(&log_file);
7347 
7348   update_binlog_end_pos(offset);
7349 
7350   /*
7351     Take mutex to protect against a reader seeing partial writes of 64-bit
7352     offset on 32-bit CPUs.
7353   */
7354   mysql_mutex_lock(&LOCK_commit_ordered);
7355   last_commit_pos_offset= offset;
7356   mysql_mutex_unlock(&LOCK_commit_ordered);
7357 }
7358 
7359 
7360 /**
7361   Write a cached log entry to the binary log.
7362   - To support transaction over replication, we wrap the transaction
7363   with BEGIN/COMMIT or BEGIN/ROLLBACK in the binary log.
7364   We want to write a BEGIN/ROLLBACK block when a non-transactional table
7365   was updated in a transaction which was rolled back. This is to ensure
7366   that the same updates are run on the slave.
7367 
7368   @param thd
7369   @param cache		The cache to copy to the binlog
7370   @param commit_event   The commit event to print after writing the
7371                         contents of the cache.
7372   @param incident       Defines if an incident event should be created to
7373                         notify that some non-transactional changes did
7374                         not get into the binlog.
7375 
7376   @note
7377     We only come here if there is something in the cache.
7378   @note
7379     The thing in the cache is always a complete transaction.
7380   @note
7381     'cache' needs to be reinitialized after this functions returns.
7382 */
7383 
7384 bool
write_transaction_to_binlog(THD * thd,binlog_cache_mngr * cache_mngr,Log_event * end_ev,bool all,bool using_stmt_cache,bool using_trx_cache)7385 MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd,
7386                                            binlog_cache_mngr *cache_mngr,
7387                                            Log_event *end_ev, bool all,
7388                                            bool using_stmt_cache,
7389                                            bool using_trx_cache)
7390 {
7391   group_commit_entry entry;
7392   Ha_trx_info *ha_info;
7393   DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog");
7394 
7395   /*
7396     Control should not be allowed beyond this point in wsrep_emulate_bin_log
7397     mode. Also, do not write the cached updates to binlog if binary logging is
7398     disabled (log-bin/sql_log_bin).
7399   */
7400   if (wsrep_emulate_bin_log)
7401   {
7402     DBUG_RETURN(0);
7403   }
7404   else if (!(thd->variables.option_bits & OPTION_BIN_LOG))
7405   {
7406     cache_mngr->need_unlog= false;
7407     DBUG_RETURN(0);
7408   }
7409 
7410   entry.thd= thd;
7411   entry.cache_mngr= cache_mngr;
7412   entry.error= 0;
7413   entry.all= all;
7414   entry.using_stmt_cache= using_stmt_cache;
7415   entry.using_trx_cache= using_trx_cache;
7416   entry.need_unlog= false;
7417   ha_info= all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
7418 
7419   for (; ha_info; ha_info= ha_info->next())
7420   {
7421     if (ha_info->is_started() && ha_info->ht() != binlog_hton &&
7422         !ha_info->ht()->commit_checkpoint_request)
7423       entry.need_unlog= true;
7424     break;
7425   }
7426 
7427   entry.end_event= end_ev;
7428   if (cache_mngr->stmt_cache.has_incident() ||
7429       cache_mngr->trx_cache.has_incident())
7430   {
7431     Incident_log_event inc_ev(thd, INCIDENT_LOST_EVENTS, &write_error_msg);
7432     entry.incident_event= &inc_ev;
7433     DBUG_RETURN(write_transaction_to_binlog_events(&entry));
7434   }
7435   else
7436   {
7437     entry.incident_event= NULL;
7438     DBUG_RETURN(write_transaction_to_binlog_events(&entry));
7439   }
7440 }
7441 
7442 
7443 /*
7444   Put a transaction that is ready to commit in the group commit queue.
7445   The transaction is identified by the ENTRY object passed into this function.
7446 
7447   To facilitate group commit for the binlog, we first queue up ourselves in
7448   this function. Then later the first thread to enter the queue waits for
7449   the LOCK_log mutex, and commits for everyone in the queue once it gets the
7450   lock. Any other threads in the queue just wait for the first one to finish
7451   the commit and wake them up. This way, all transactions in the queue get
7452   committed in a single disk operation.
7453 
7454   The main work in this function is when the commit in one transaction has
7455   been marked to wait for the commit of another transaction to happen
7456   first. This is used to support in-order parallel replication, where
7457   transactions can execute out-of-order but need to be committed in-order with
7458   how they happened on the master. The waiting of one commit on another needs
7459   to be integrated with the group commit queue, to ensure that the waiting
7460   transaction can participate in the same group commit as the waited-for
7461   transaction.
7462 
7463   So when we put a transaction in the queue, we check if there were other
7464   transactions already prepared to commit but just waiting for the first one
7465   to commit. If so, we add those to the queue as well, transitively for all
7466   waiters.
7467 
7468   And if a transaction is marked to wait for a prior transaction, but that
7469   prior transaction is already queued for group commit, then we can queue the
7470   new transaction directly to participate in the group commit.
7471 
7472   @retval < 0   Error
7473   @retval  -2   WSREP error with commit ordering
7474   @retval  -3   WSREP return code to mark the leader
7475   @retval > 0   If queued as the first entry in the queue (meaning this
7476                 is the leader)
7477   @retval   0   Otherwise (queued as participant, leader handles the commit)
7478 */
7479 
7480 int
queue_for_group_commit(group_commit_entry * orig_entry)7481 MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry)
7482 {
7483   group_commit_entry *entry, *orig_queue, *last;
7484   wait_for_commit *cur;
7485   wait_for_commit *wfc;
7486   bool backup_lock_released= 0;
7487   int result= 0;
7488   THD *thd= orig_entry->thd;
7489   DBUG_ENTER("MYSQL_BIN_LOG::queue_for_group_commit");
7490   DBUG_ASSERT(thd == current_thd);
7491 
7492   /*
7493     Check if we need to wait for another transaction to commit before us.
7494 
7495     It is safe to do a quick check without lock first in the case where we do
7496     not have to wait. But if the quick check shows we need to wait, we must do
7497     another safe check under lock, to avoid the race where the other
7498     transaction wakes us up between the check and the wait.
7499   */
7500   wfc= orig_entry->thd->wait_for_commit_ptr;
7501   orig_entry->queued_by_other= false;
7502   if (wfc && wfc->waitee.load(std::memory_order_acquire))
7503   {
7504     wait_for_commit *loc_waitee;
7505 
7506     mysql_mutex_lock(&wfc->LOCK_wait_commit);
7507     /*
7508       Do an extra check here, this time safely under lock.
7509 
7510       If waitee->commit_started is set, it means that the transaction we need
7511       to wait for has already queued up for group commit. In this case it is
7512       safe for us to queue up immediately as well, increasing the opprtunities
7513       for group commit. Because waitee has taken the LOCK_prepare_ordered
7514       before setting the flag, so there is no risk that we can queue ahead of
7515       it.
7516     */
7517     if ((loc_waitee= wfc->waitee.load(std::memory_order_relaxed)) &&
7518         !loc_waitee->commit_started)
7519     {
7520       PSI_stage_info old_stage;
7521 
7522         /*
7523           Release MDL_BACKUP_COMMIT LOCK while waiting for other threads to
7524           commit.
7525           This is needed to avoid deadlock between the other threads (which not
7526           yet have the MDL_BACKUP_COMMIT_LOCK) and any threads using
7527           BACKUP LOCK BLOCK_COMMIT.
7528         */
7529       if (thd->backup_commit_lock && thd->backup_commit_lock->ticket &&
7530           !backup_lock_released)
7531       {
7532         backup_lock_released= 1;
7533         thd->mdl_context.release_lock(thd->backup_commit_lock->ticket);
7534         thd->backup_commit_lock->ticket= 0;
7535       }
7536 
7537       /*
7538         By setting wfc->opaque_pointer to our own entry, we mark that we are
7539         ready to commit, but waiting for another transaction to commit before
7540         us.
7541 
7542         This other transaction may then take over the commit process for us to
7543         get us included in its own group commit. If this happens, the
7544         queued_by_other flag is set.
7545 
7546         Setting this flag may or may not be seen by the other thread, but we
7547         are safe in any case: The other thread will set queued_by_other under
7548         its LOCK_wait_commit, and we will not check queued_by_other only after
7549         we have been woken up.
7550       */
7551       wfc->opaque_pointer= orig_entry;
7552       DEBUG_SYNC(orig_entry->thd, "group_commit_waiting_for_prior");
7553       orig_entry->thd->ENTER_COND(&wfc->COND_wait_commit,
7554                                   &wfc->LOCK_wait_commit,
7555                                   &stage_waiting_for_prior_transaction_to_commit,
7556                                   &old_stage);
7557       while ((loc_waitee= wfc->waitee.load(std::memory_order_relaxed)) &&
7558               !orig_entry->thd->check_killed(1))
7559         mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit);
7560       wfc->opaque_pointer= NULL;
7561       DBUG_PRINT("info", ("After waiting for prior commit, queued_by_other=%d",
7562                  orig_entry->queued_by_other));
7563 
7564       if (loc_waitee)
7565       {
7566         /* Wait terminated due to kill. */
7567         mysql_mutex_lock(&loc_waitee->LOCK_wait_commit);
7568         if (loc_waitee->wakeup_subsequent_commits_running ||
7569             orig_entry->queued_by_other)
7570         {
7571           /* Our waitee is already waking us up, so ignore the kill. */
7572           mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
7573           do
7574           {
7575             mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit);
7576           } while (wfc->waitee.load(std::memory_order_relaxed));
7577         }
7578         else
7579         {
7580           /* We were killed, so remove us from the list of waitee. */
7581           wfc->remove_from_list(&loc_waitee->subsequent_commits_list);
7582           mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
7583           /*
7584             This is the thread clearing its own status, it is no longer on
7585             the list of waiters. So no memory barriers are needed here.
7586           */
7587           wfc->waitee.store(NULL, std::memory_order_relaxed);
7588 
7589           orig_entry->thd->EXIT_COND(&old_stage);
7590           /* Interrupted by kill. */
7591           DEBUG_SYNC(orig_entry->thd, "group_commit_waiting_for_prior_killed");
7592           wfc->wakeup_error= orig_entry->thd->killed_errno();
7593           if (!wfc->wakeup_error)
7594             wfc->wakeup_error= ER_QUERY_INTERRUPTED;
7595           my_message(wfc->wakeup_error,
7596                      ER_THD(orig_entry->thd, wfc->wakeup_error), MYF(0));
7597           result= -1;
7598           goto end;
7599         }
7600       }
7601       orig_entry->thd->EXIT_COND(&old_stage);
7602     }
7603     else
7604       mysql_mutex_unlock(&wfc->LOCK_wait_commit);
7605   }
7606   /*
7607     If the transaction we were waiting for has already put us into the group
7608     commit queue (and possibly already done the entire binlog commit for us),
7609     then there is nothing else to do.
7610   */
7611   if (orig_entry->queued_by_other)
7612     goto end;
7613 
7614   if (wfc && wfc->wakeup_error)
7615   {
7616     my_error(ER_PRIOR_COMMIT_FAILED, MYF(0));
7617     result= -1;
7618     goto end;
7619   }
7620 
7621   /* Now enqueue ourselves in the group commit queue. */
7622   DEBUG_SYNC(orig_entry->thd, "commit_before_enqueue");
7623   orig_entry->thd->clear_wakeup_ready();
7624   mysql_mutex_lock(&LOCK_prepare_ordered);
7625   orig_queue= group_commit_queue;
7626 
7627   /*
7628     Iteratively process everything added to the queue, looking for waiters,
7629     and their waiters, and so on. If a waiter is ready to commit, we
7630     immediately add it to the queue, and mark it as queued_by_other.
7631 
7632     This would be natural to do with recursion, but we want to avoid
7633     potentially unbounded recursion blowing the C stack, so we use the list
7634     approach instead.
7635 
7636     We keep a list of the group_commit_entry of all the waiters that need to
7637     be processed. Initially this list contains only the entry passed into this
7638     function.
7639 
7640     We process entries in the list one by one. The element currently being
7641     processed is pointed to by `entry`, and the element at the end of the list
7642     is pointed to by `last` (we do not use NULL to terminate the list).
7643 
7644     As we process an entry, any waiters for that entry are added at the end of
7645     the list, to be processed in subsequent iterations. The the entry is added
7646     to the group_commit_queue.  This continues until the list is exhausted,
7647     with all entries ever added eventually processed.
7648 
7649     The end result is a breath-first traversal of the tree of waiters,
7650     re-using the `next' pointers of the group_commit_entry objects in place of
7651     extra stack space in a recursive traversal.
7652 
7653     The temporary list linked through these `next' pointers is not used by the
7654     caller or any other function; it only exists while doing the iterative
7655     tree traversal. After, all the processed entries are linked into the
7656     group_commit_queue.
7657   */
7658 
7659   cur= wfc;
7660   last= orig_entry;
7661   entry= orig_entry;
7662   for (;;)
7663   {
7664     group_commit_entry *next_entry;
7665 
7666     if (entry->cache_mngr->using_xa)
7667     {
7668       DEBUG_SYNC(entry->thd, "commit_before_prepare_ordered");
7669       run_prepare_ordered(entry->thd, entry->all);
7670       DEBUG_SYNC(entry->thd, "commit_after_prepare_ordered");
7671     }
7672 
7673     if (cur)
7674     {
7675       /*
7676         Now that we have taken LOCK_prepare_ordered and will queue up in the
7677         group commit queue, it is safe for following transactions to queue
7678         themselves. We will grab here any transaction that is now ready to
7679         queue up, but after that, more transactions may become ready while the
7680         leader is waiting to start the group commit. So set the flag
7681         `commit_started', so that later transactions can still participate in
7682         the group commit..
7683       */
7684       cur->commit_started= true;
7685 
7686       /*
7687         Check if this transaction has other transaction waiting for it to
7688         commit.
7689 
7690         If so, process the waiting transactions, and their waiters and so on,
7691         transitively.
7692       */
7693       if (cur->subsequent_commits_list)
7694       {
7695         wait_for_commit *waiter, **waiter_ptr;
7696 
7697         mysql_mutex_lock(&cur->LOCK_wait_commit);
7698         /*
7699           Grab the list, now safely under lock, and process it if still
7700           non-empty.
7701         */
7702         waiter= cur->subsequent_commits_list;
7703         waiter_ptr= &cur->subsequent_commits_list;
7704         while (waiter)
7705         {
7706           wait_for_commit *next_waiter= waiter->next_subsequent_commit;
7707           group_commit_entry *entry2=
7708             (group_commit_entry *)waiter->opaque_pointer;
7709           if (entry2)
7710           {
7711             /*
7712               This is another transaction ready to be written to the binary
7713               log. We can put it into the queue directly, without needing a
7714               separate context switch to the other thread. We just set a flag
7715               so that the other thread will know when it wakes up that it was
7716               already processed.
7717 
7718               So remove it from the list of our waiters, and instead put it at
7719               the end of the list to be processed in a subsequent iteration of
7720               the outer loop.
7721             */
7722             *waiter_ptr= next_waiter;
7723             entry2->queued_by_other= true;
7724             last->next= entry2;
7725             last= entry2;
7726             /*
7727               As a small optimisation, we do not actually need to set
7728               entry2->next to NULL, as we can use the pointer `last' to check
7729               for end-of-list.
7730             */
7731           }
7732           else
7733           {
7734             /*
7735               This transaction is not ready to participate in the group commit
7736               yet, so leave it in the waiter list. It might join the group
7737               commit later, if it completes soon enough to do so (it will see
7738               our wfc->commit_started flag set), or it might commit later in a
7739               later group commit.
7740             */
7741             waiter_ptr= &waiter->next_subsequent_commit;
7742           }
7743           waiter= next_waiter;
7744         }
7745         mysql_mutex_unlock(&cur->LOCK_wait_commit);
7746       }
7747     }
7748 
7749     /*
7750       Handle the heuristics that if another transaction is waiting for this
7751       transaction (or if it does so later), then we want to trigger group
7752       commit immediately, without waiting for the binlog_commit_wait_usec
7753       timeout to expire.
7754     */
7755     entry->thd->waiting_on_group_commit= true;
7756 
7757     /* Add the entry to the group commit queue. */
7758     next_entry= entry->next;
7759     entry->next= group_commit_queue;
7760     group_commit_queue= entry;
7761     if (entry == last)
7762       break;
7763     /*
7764       Move to the next entry in the flattened list of waiting transactions
7765       that still need to be processed transitively.
7766     */
7767     entry= next_entry;
7768     DBUG_ASSERT(entry != NULL);
7769     cur= entry->thd->wait_for_commit_ptr;
7770   }
7771 
7772   result= orig_queue == NULL;
7773 
7774 #ifdef WITH_WSREP
7775   if (wsrep_is_active(entry->thd) &&
7776       wsrep_run_commit_hook(entry->thd, entry->all))
7777   {
7778     /*  Release commit order here */
7779     if (wsrep_ordered_commit(entry->thd, entry->all, wsrep_apply_error()))
7780       result= -2;
7781 
7782     /* return -3, if this is leader */
7783     if (orig_queue == NULL)
7784       result= -3;
7785   }
7786   else
7787     DBUG_ASSERT(result != -2 && result != -3);
7788 #endif /* WITH_WSREP */
7789 
7790   if (opt_binlog_commit_wait_count > 0 && orig_queue != NULL)
7791     mysql_cond_signal(&COND_prepare_ordered);
7792   mysql_mutex_unlock(&LOCK_prepare_ordered);
7793   DEBUG_SYNC(orig_entry->thd, "commit_after_release_LOCK_prepare_ordered");
7794 
7795   DBUG_PRINT("info", ("Queued for group commit as %s",
7796                       (orig_queue == NULL) ? "leader" : "participant"));
7797 
7798 end:
7799   if (backup_lock_released)
7800     thd->mdl_context.acquire_lock(thd->backup_commit_lock,
7801                                   thd->variables.lock_wait_timeout);
7802   DBUG_RETURN(result);
7803 }
7804 
7805 bool
write_transaction_to_binlog_events(group_commit_entry * entry)7806 MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry)
7807 {
7808   int is_leader= queue_for_group_commit(entry);
7809 #ifdef WITH_WSREP
7810   /* commit order was released in queue_for_group_commit() call,
7811      here we check if wsrep_commit_ordered() failed or if we are leader */
7812   switch (is_leader)
7813   {
7814   case -2: /* wsrep_ordered_commit() has failed */
7815     DBUG_ASSERT(wsrep_is_active(entry->thd));
7816     DBUG_ASSERT(wsrep_run_commit_hook(entry->thd, entry->all));
7817     entry->thd->wakeup_subsequent_commits(1);
7818     return true;
7819   case -3: /* this is leader, wait for prior commit to
7820               complete. This establishes total order for group leaders
7821            */
7822     DBUG_ASSERT(wsrep_is_active(entry->thd));
7823     DBUG_ASSERT(wsrep_run_commit_hook(entry->thd, entry->all));
7824     if (entry->thd->wait_for_prior_commit())
7825       return true;
7826 
7827     /* retain the correct is_leader value */
7828     is_leader= 1;
7829     break;
7830 
7831   default: /* native MariaDB cases */
7832     break;
7833   }
7834 #endif /* WITH_WSREP */
7835 
7836   /*
7837     The first in the queue handles group commit for all; the others just wait
7838     to be signalled when group commit is done.
7839   */
7840   if (is_leader < 0)
7841     return true;                                /* Error */
7842   else if (is_leader)
7843     trx_group_commit_leader(entry);
7844   else if (!entry->queued_by_other)
7845   {
7846     DEBUG_SYNC(entry->thd, "after_semisync_queue");
7847 
7848     entry->thd->wait_for_wakeup_ready();
7849   }
7850   else
7851   {
7852     /*
7853       If we were queued by another prior commit, then we are woken up
7854       only when the leader has already completed the commit for us.
7855       So nothing to do here then.
7856     */
7857   }
7858 
7859   if (!opt_optimize_thread_scheduling)
7860   {
7861     /* For the leader, trx_group_commit_leader() already took the lock. */
7862     if (!is_leader)
7863       mysql_mutex_lock(&LOCK_commit_ordered);
7864 
7865     DEBUG_SYNC(entry->thd, "commit_loop_entry_commit_ordered");
7866     ++num_commits;
7867     if (entry->cache_mngr->using_xa && !entry->error)
7868       run_commit_ordered(entry->thd, entry->all);
7869 
7870     group_commit_entry *next= entry->next;
7871     if (!next)
7872     {
7873       group_commit_queue_busy= FALSE;
7874       mysql_cond_signal(&COND_queue_busy);
7875       DEBUG_SYNC(entry->thd, "commit_after_group_run_commit_ordered");
7876     }
7877     mysql_mutex_unlock(&LOCK_commit_ordered);
7878     entry->thd->wakeup_subsequent_commits(entry->error);
7879 
7880     if (next)
7881     {
7882       /*
7883         Wake up the next thread in the group commit.
7884 
7885         The next thread can be waiting in two different ways, depending on
7886         whether it put itself in the queue, or if it was put in queue by us
7887         because it had to wait for us to commit first.
7888 
7889         So execute the appropriate wakeup, identified by the queued_by_other
7890         field.
7891       */
7892       if (next->queued_by_other)
7893         next->thd->wait_for_commit_ptr->wakeup(entry->error);
7894       else
7895         next->thd->signal_wakeup_ready();
7896     }
7897     else
7898     {
7899       /*
7900         If we rotated the binlog, and if we are using the unoptimized thread
7901         scheduling where every thread runs its own commit_ordered(), then we
7902         must do the commit checkpoint and log purge here, after all
7903         commit_ordered() calls have finished, and locks have been released.
7904       */
7905       if (entry->check_purge)
7906         checkpoint_and_purge(entry->binlog_id);
7907     }
7908 
7909   }
7910 
7911   if (likely(!entry->error))
7912     return entry->thd->wait_for_prior_commit();
7913 
7914   switch (entry->error)
7915   {
7916   case ER_ERROR_ON_WRITE:
7917     my_error(ER_ERROR_ON_WRITE, MYF(ME_ERROR_LOG), name, entry->commit_errno);
7918     break;
7919   case ER_ERROR_ON_READ:
7920     my_error(ER_ERROR_ON_READ, MYF(ME_ERROR_LOG),
7921              entry->error_cache->file_name, entry->commit_errno);
7922     break;
7923   default:
7924     /*
7925       There are not (and should not be) any errors thrown not covered above.
7926       But just in case one is added later without updating the above switch
7927       statement, include a catch-all.
7928     */
7929     my_printf_error(entry->error,
7930                     "Error writing transaction to binary log: %d",
7931                     MYF(ME_ERROR_LOG), entry->error);
7932   }
7933 
7934   /*
7935     Since we return error, this transaction XID will not be committed, so
7936     we need to mark it as not needed for recovery (unlog() is not called
7937     for a transaction if log_xid() fails).
7938   */
7939   if (entry->cache_mngr->using_xa && entry->cache_mngr->xa_xid &&
7940       entry->cache_mngr->need_unlog)
7941     mark_xid_done(entry->cache_mngr->binlog_id, true);
7942 
7943   return 1;
7944 }
7945 
7946 /*
7947   Do binlog group commit as the lead thread.
7948 
7949   This must be called when this statement/transaction is queued at the start of
7950   the group_commit_queue. It will wait to obtain the LOCK_log mutex, then group
7951   commit all the transactions in the queue (more may have entered while waiting
7952   for LOCK_log). After commit is done, all other threads in the queue will be
7953   signalled.
7954 
7955  */
7956 void
trx_group_commit_leader(group_commit_entry * leader)7957 MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
7958 {
7959   uint xid_count= 0;
7960   my_off_t UNINIT_VAR(commit_offset);
7961   group_commit_entry *current, *last_in_queue;
7962   group_commit_entry *queue= NULL;
7963   bool check_purge= false;
7964   ulong UNINIT_VAR(binlog_id);
7965   uint64 commit_id;
7966   DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader");
7967 
7968   {
7969     DBUG_EXECUTE_IF("inject_binlog_commit_before_get_LOCK_log",
7970       DBUG_ASSERT(!debug_sync_set_action(leader->thd, STRING_WITH_LEN
7971         ("commit_before_get_LOCK_log SIGNAL waiting WAIT_FOR cont TIMEOUT 1")));
7972     );
7973     /*
7974       Lock the LOCK_log(), and once we get it, collect any additional writes
7975       that queued up while we were waiting.
7976     */
7977     DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_log");
7978     mysql_mutex_lock(&LOCK_log);
7979     DEBUG_SYNC(leader->thd, "commit_after_get_LOCK_log");
7980 
7981     mysql_mutex_lock(&LOCK_prepare_ordered);
7982     if (opt_binlog_commit_wait_count)
7983       wait_for_sufficient_commits();
7984     /*
7985       Note that wait_for_sufficient_commits() may have released and
7986       re-acquired the LOCK_log and LOCK_prepare_ordered if it needed to wait.
7987     */
7988     current= group_commit_queue;
7989     group_commit_queue= NULL;
7990     mysql_mutex_unlock(&LOCK_prepare_ordered);
7991     binlog_id= current_binlog_id;
7992 
7993     /* As the queue is in reverse order of entering, reverse it. */
7994     last_in_queue= current;
7995     while (current)
7996     {
7997       group_commit_entry *next= current->next;
7998       /*
7999         Now that group commit is started, we can clear the flag; there is no
8000         longer any use in waiters on this commit trying to trigger it early.
8001       */
8002       current->thd->waiting_on_group_commit= false;
8003       current->next= queue;
8004       queue= current;
8005       current= next;
8006     }
8007     DBUG_ASSERT(leader == queue /* the leader should be first in queue */);
8008 
8009     /* Now we have in queue the list of transactions to be committed in order. */
8010   }
8011 
8012   DBUG_ASSERT(is_open());
8013   if (likely(is_open()))                       // Should always be true
8014   {
8015     commit_id= (last_in_queue == leader ? 0 : (uint64)leader->thd->query_id);
8016     DBUG_EXECUTE_IF("binlog_force_commit_id",
8017       {
8018         const LEX_CSTRING commit_name= { STRING_WITH_LEN("commit_id") };
8019         bool null_value;
8020         user_var_entry *entry=
8021           (user_var_entry*) my_hash_search(&leader->thd->user_vars,
8022                                            (uchar*) commit_name.str,
8023                                            commit_name.length);
8024         commit_id= entry->val_int(&null_value);
8025       });
8026     /*
8027       Commit every transaction in the queue.
8028 
8029       Note that we are doing this in a different thread than the one running
8030       the transaction! So we are limited in the operations we can do. In
8031       particular, we cannot call my_error() on behalf of a transaction, as
8032       that obtains the THD from thread local storage. Instead, we must set
8033       current->error and let the thread do the error reporting itself once
8034       we wake it up.
8035     */
8036     for (current= queue; current != NULL; current= current->next)
8037     {
8038       set_current_thd(current->thd);
8039       binlog_cache_mngr *cache_mngr= current->cache_mngr;
8040 
8041       /*
8042         We already checked before that at least one cache is non-empty; if both
8043         are empty we would have skipped calling into here.
8044       */
8045       DBUG_ASSERT(!cache_mngr->stmt_cache.empty() || !cache_mngr->trx_cache.empty());
8046 
8047       if (unlikely((current->error= write_transaction_or_stmt(current,
8048                                                               commit_id))))
8049         current->commit_errno= errno;
8050 
8051       strmake_buf(cache_mngr->last_commit_pos_file, log_file_name);
8052       commit_offset= my_b_write_tell(&log_file);
8053       cache_mngr->last_commit_pos_offset= commit_offset;
8054       if (cache_mngr->using_xa && cache_mngr->xa_xid)
8055       {
8056         /*
8057           If all storage engines support commit_checkpoint_request(), then we
8058           do not need to keep track of when this XID is durably committed.
8059           Instead we will just ask the storage engine to durably commit all its
8060           XIDs when we rotate a binlog file.
8061         */
8062         if (current->need_unlog)
8063         {
8064           xid_count++;
8065           cache_mngr->need_unlog= true;
8066           cache_mngr->binlog_id= binlog_id;
8067         }
8068         else
8069           cache_mngr->need_unlog= false;
8070 
8071         cache_mngr->delayed_error= false;
8072       }
8073     }
8074     set_current_thd(leader->thd);
8075 
8076     bool synced= 0;
8077     if (unlikely(flush_and_sync(&synced)))
8078     {
8079       for (current= queue; current != NULL; current= current->next)
8080       {
8081         if (!current->error)
8082         {
8083           current->error= ER_ERROR_ON_WRITE;
8084           current->commit_errno= errno;
8085           current->error_cache= NULL;
8086         }
8087       }
8088     }
8089     else
8090     {
8091       bool any_error= false;
8092 
8093       mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
8094       mysql_mutex_assert_owner(&LOCK_log);
8095       mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync);
8096       mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
8097 
8098       for (current= queue; current != NULL; current= current->next)
8099       {
8100 #ifdef HAVE_REPLICATION
8101         if (likely(!current->error) &&
8102             unlikely(repl_semisync_master.
8103                      report_binlog_update(current->thd,
8104                                           current->cache_mngr->
8105                                           last_commit_pos_file,
8106                                           current->cache_mngr->
8107                                           last_commit_pos_offset)))
8108         {
8109           current->error= ER_ERROR_ON_WRITE;
8110           current->commit_errno= -1;
8111           current->error_cache= NULL;
8112           any_error= true;
8113         }
8114 #endif
8115       }
8116 
8117       /*
8118         update binlog_end_pos so it can be read by dump thread
8119         Note: must be _after_ the RUN_HOOK(after_flush) or else
8120         semi-sync might not have put the transaction into
8121         it's list before dump-thread tries to send it
8122       */
8123       update_binlog_end_pos(commit_offset);
8124 
8125       if (unlikely(any_error))
8126         sql_print_error("Failed to run 'after_flush' hooks");
8127     }
8128 
8129     /*
8130       If any commit_events are Xid_log_event, increase the number of pending
8131       XIDs in current binlog (it's decreased in ::unlog()). When the count in
8132       a (not active) binlog file reaches zero, we know that it is no longer
8133       needed in XA recovery, and we can log a new binlog checkpoint event.
8134     */
8135     if (xid_count > 0)
8136     {
8137       mark_xids_active(binlog_id, xid_count);
8138     }
8139 
8140     if (rotate(false, &check_purge))
8141     {
8142       /*
8143         If we fail to rotate, which thread should get the error?
8144         We give the error to the leader, as any my_error() thrown inside
8145         rotate() will have been registered for the leader THD.
8146 
8147         However we must not return error from here - that would cause
8148         ha_commit_trans() to abort and rollback the transaction, which would
8149         leave an inconsistent state with the transaction committed in the
8150         binlog but rolled back in the engine.
8151 
8152         Instead set a flag so that we can return error later, from unlog(),
8153         when the transaction has been safely committed in the engine.
8154       */
8155       leader->cache_mngr->delayed_error= true;
8156       my_error(ER_ERROR_ON_WRITE, MYF(ME_ERROR_LOG), name, errno);
8157       check_purge= false;
8158     }
8159     /* In case of binlog rotate, update the correct current binlog offset. */
8160     commit_offset= my_b_write_tell(&log_file);
8161   }
8162 
8163   DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_after_binlog_sync");
8164   mysql_mutex_lock(&LOCK_after_binlog_sync);
8165   /*
8166     We cannot unlock LOCK_log until we have locked LOCK_after_binlog_sync;
8167     otherwise scheduling could allow the next group commit to run ahead of us,
8168     messing up the order of commit_ordered() calls. But as soon as
8169     LOCK_after_binlog_sync is obtained, we can let the next group commit start.
8170   */
8171   mysql_mutex_unlock(&LOCK_log);
8172 
8173   DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_log");
8174 
8175   /*
8176     Loop through threads and run the binlog_sync hook
8177   */
8178   {
8179     mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
8180     mysql_mutex_assert_not_owner(&LOCK_log);
8181     mysql_mutex_assert_owner(&LOCK_after_binlog_sync);
8182     mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
8183 
8184     bool first __attribute__((unused))= true;
8185     bool last __attribute__((unused));
8186     for (current= queue; current != NULL; current= current->next)
8187     {
8188       last= current->next == NULL;
8189 #ifdef HAVE_REPLICATION
8190       if (likely(!current->error))
8191         current->error=
8192           repl_semisync_master.wait_after_sync(current->cache_mngr->
8193                                                last_commit_pos_file,
8194                                                current->cache_mngr->
8195                                                last_commit_pos_offset);
8196 #endif
8197       first= false;
8198     }
8199   }
8200 
8201   DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered");
8202 
8203   mysql_mutex_lock(&LOCK_commit_ordered);
8204   DBUG_EXECUTE_IF("crash_before_engine_commit",
8205       {
8206         DBUG_SUICIDE();
8207       });
8208   last_commit_pos_offset= commit_offset;
8209 
8210   /*
8211     Unlock LOCK_after_binlog_sync only *after* LOCK_commit_ordered has been
8212     acquired so that groups can not reorder for the different stages of
8213     the group commit procedure.
8214   */
8215   mysql_mutex_unlock(&LOCK_after_binlog_sync);
8216   DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_after_binlog_sync");
8217   ++num_group_commits;
8218 
8219   if (!opt_optimize_thread_scheduling)
8220   {
8221     /*
8222       If we want to run commit_ordered() each in the transaction's own thread
8223       context, then we need to mark the queue reserved; we need to finish all
8224       threads in one group commit before the next group commit can be allowed
8225       to proceed, and we cannot unlock a simple pthreads mutex in a different
8226       thread from the one that locked it.
8227     */
8228 
8229     while (group_commit_queue_busy)
8230       mysql_cond_wait(&COND_queue_busy, &LOCK_commit_ordered);
8231     group_commit_queue_busy= TRUE;
8232 
8233     /*
8234       Set these so parent can run checkpoint_and_purge() in last thread.
8235       (When using optimized thread scheduling, we run checkpoint_and_purge()
8236       in this function, so parent does not need to and we need not set these
8237       values).
8238     */
8239     last_in_queue->check_purge= check_purge;
8240     last_in_queue->binlog_id= binlog_id;
8241 
8242     /* Note that we return with LOCK_commit_ordered locked! */
8243     DBUG_VOID_RETURN;
8244   }
8245 
8246   /*
8247     Wakeup each participant waiting for our group commit, first calling the
8248     commit_ordered() methods for any transactions doing 2-phase commit.
8249   */
8250   current= queue;
8251   while (current != NULL)
8252   {
8253     group_commit_entry *next;
8254 
8255     DEBUG_SYNC(leader->thd, "commit_loop_entry_commit_ordered");
8256     ++num_commits;
8257     if (current->cache_mngr->using_xa && likely(!current->error) &&
8258         DBUG_EVALUATE_IF("skip_commit_ordered", 0, 1))
8259       run_commit_ordered(current->thd, current->all);
8260     current->thd->wakeup_subsequent_commits(current->error);
8261 
8262     /*
8263       Careful not to access current->next after waking up the other thread! As
8264       it may change immediately after wakeup.
8265     */
8266     next= current->next;
8267     if (current != leader)                      // Don't wake up ourself
8268     {
8269       if (current->queued_by_other)
8270         current->thd->wait_for_commit_ptr->wakeup(current->error);
8271       else
8272         current->thd->signal_wakeup_ready();
8273     }
8274     current= next;
8275   }
8276   DEBUG_SYNC(leader->thd, "commit_after_group_run_commit_ordered");
8277   mysql_mutex_unlock(&LOCK_commit_ordered);
8278   DEBUG_SYNC(leader->thd, "commit_after_group_release_commit_ordered");
8279 
8280   if (check_purge)
8281     checkpoint_and_purge(binlog_id);
8282 
8283   DBUG_VOID_RETURN;
8284 }
8285 
8286 
8287 int
write_transaction_or_stmt(group_commit_entry * entry,uint64 commit_id)8288 MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry,
8289                                          uint64 commit_id)
8290 {
8291   binlog_cache_mngr *mngr= entry->cache_mngr;
8292   DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_or_stmt");
8293 
8294   if (write_gtid_event(entry->thd, false, entry->using_trx_cache, commit_id))
8295     DBUG_RETURN(ER_ERROR_ON_WRITE);
8296 
8297   if (entry->using_stmt_cache && !mngr->stmt_cache.empty() &&
8298       write_cache(entry->thd, mngr->get_binlog_cache_log(FALSE)))
8299   {
8300     entry->error_cache= &mngr->stmt_cache.cache_log;
8301     DBUG_RETURN(ER_ERROR_ON_WRITE);
8302   }
8303 
8304   if (entry->using_trx_cache && !mngr->trx_cache.empty())
8305   {
8306     DBUG_EXECUTE_IF("crash_before_writing_xid",
8307                     {
8308                       if ((write_cache(entry->thd,
8309                                        mngr->get_binlog_cache_log(TRUE))))
8310                         DBUG_PRINT("info", ("error writing binlog cache"));
8311                       else
8312                         flush_and_sync(0);
8313 
8314                       DBUG_PRINT("info", ("crashing before writing xid"));
8315                       DBUG_SUICIDE();
8316                     });
8317 
8318     if (write_cache(entry->thd, mngr->get_binlog_cache_log(TRUE)))
8319     {
8320       entry->error_cache= &mngr->trx_cache.cache_log;
8321       DBUG_RETURN(ER_ERROR_ON_WRITE);
8322     }
8323   }
8324 
8325   DBUG_EXECUTE_IF("inject_error_writing_xid",
8326                   {
8327                     entry->error_cache= NULL;
8328                     errno= 28;
8329                     DBUG_RETURN(ER_ERROR_ON_WRITE);
8330                   });
8331 
8332   if (write_event(entry->end_event))
8333   {
8334     entry->error_cache= NULL;
8335     DBUG_RETURN(ER_ERROR_ON_WRITE);
8336   }
8337   status_var_add(entry->thd->status_var.binlog_bytes_written,
8338                  entry->end_event->data_written);
8339 
8340   if (entry->incident_event)
8341   {
8342     if (write_event(entry->incident_event))
8343     {
8344       entry->error_cache= NULL;
8345       DBUG_RETURN(ER_ERROR_ON_WRITE);
8346     }
8347   }
8348 
8349   if (unlikely(mngr->get_binlog_cache_log(FALSE)->error))
8350   {
8351     entry->error_cache= &mngr->stmt_cache.cache_log;
8352     DBUG_RETURN(ER_ERROR_ON_WRITE);
8353   }
8354   if (unlikely(mngr->get_binlog_cache_log(TRUE)->error))  // Error on read
8355   {
8356     entry->error_cache= &mngr->trx_cache.cache_log;
8357     DBUG_RETURN(ER_ERROR_ON_WRITE);
8358   }
8359 
8360   DBUG_RETURN(0);
8361 }
8362 
8363 
8364 /*
8365   Wait for sufficient commits to queue up for group commit, according to the
8366   values of binlog_commit_wait_count and binlog_commit_wait_usec.
8367 
8368   Note that this function may release and re-acquire LOCK_log and
8369   LOCK_prepare_ordered if it needs to wait.
8370 */
8371 
8372 void
wait_for_sufficient_commits()8373 MYSQL_BIN_LOG::wait_for_sufficient_commits()
8374 {
8375   size_t count;
8376   group_commit_entry *e;
8377   group_commit_entry *last_head;
8378   struct timespec wait_until;
8379 
8380   mysql_mutex_assert_owner(&LOCK_log);
8381   mysql_mutex_assert_owner(&LOCK_prepare_ordered);
8382 
8383   for (e= last_head= group_commit_queue, count= 0; e; e= e->next)
8384   {
8385     if (++count >= opt_binlog_commit_wait_count)
8386     {
8387       group_commit_trigger_count++;
8388       return;
8389     }
8390     if (unlikely(e->thd->has_waiter))
8391     {
8392       group_commit_trigger_lock_wait++;
8393       return;
8394     }
8395   }
8396 
8397   mysql_mutex_unlock(&LOCK_log);
8398   set_timespec_nsec(wait_until, (ulonglong)1000*opt_binlog_commit_wait_usec);
8399 
8400   for (;;)
8401   {
8402     int err;
8403     group_commit_entry *head;
8404 
8405     err= mysql_cond_timedwait(&COND_prepare_ordered, &LOCK_prepare_ordered,
8406                               &wait_until);
8407     if (err == ETIMEDOUT)
8408     {
8409       group_commit_trigger_timeout++;
8410       break;
8411     }
8412     if (unlikely(last_head->thd->has_waiter))
8413     {
8414       group_commit_trigger_lock_wait++;
8415       break;
8416     }
8417     head= group_commit_queue;
8418     for (e= head; e && e != last_head; e= e->next)
8419     {
8420       ++count;
8421       if (unlikely(e->thd->has_waiter))
8422       {
8423         group_commit_trigger_lock_wait++;
8424         goto after_loop;
8425       }
8426     }
8427     if (count >= opt_binlog_commit_wait_count)
8428     {
8429       group_commit_trigger_count++;
8430       break;
8431     }
8432     last_head= head;
8433   }
8434 after_loop:
8435 
8436   /*
8437     We must not wait for LOCK_log while holding LOCK_prepare_ordered.
8438     LOCK_log can be held for long periods (eg. we do I/O under it), while
8439     LOCK_prepare_ordered must only be held for short periods.
8440 
8441     In addition, waiting for LOCK_log while holding LOCK_prepare_ordered would
8442     violate locking order of LOCK_log-before-LOCK_prepare_ordered. This could
8443     cause SAFEMUTEX warnings (even if it cannot actually deadlock with current
8444     code, as there can be at most one group commit leader thread at a time).
8445 
8446     So release and re-acquire LOCK_prepare_ordered if we need to wait for the
8447     LOCK_log.
8448   */
8449   if (mysql_mutex_trylock(&LOCK_log))
8450   {
8451     mysql_mutex_unlock(&LOCK_prepare_ordered);
8452     mysql_mutex_lock(&LOCK_log);
8453     mysql_mutex_lock(&LOCK_prepare_ordered);
8454   }
8455 }
8456 
8457 
8458 void
binlog_trigger_immediate_group_commit()8459 MYSQL_BIN_LOG::binlog_trigger_immediate_group_commit()
8460 {
8461   group_commit_entry *head;
8462   mysql_mutex_assert_owner(&LOCK_prepare_ordered);
8463   head= group_commit_queue;
8464   if (head)
8465   {
8466     head->thd->has_waiter= true;
8467     mysql_cond_signal(&COND_prepare_ordered);
8468   }
8469 }
8470 
8471 
8472 /*
8473   This function is called when a transaction T1 goes to wait for another
8474   transaction T2. It is used to cut short any binlog group commit delay from
8475   --binlog-commit-wait-count in the case where another transaction is stalled
8476   on the wait due to conflicting row locks.
8477 
8478   If T2 is already ready to group commit, any waiting group commit will be
8479   signalled to proceed immediately. Otherwise, a flag will be set in T2, and
8480   when T2 later becomes ready, immediate group commit will be triggered.
8481 */
8482 void
binlog_report_wait_for(THD * thd1,THD * thd2)8483 binlog_report_wait_for(THD *thd1, THD *thd2)
8484 {
8485   if (opt_binlog_commit_wait_count == 0)
8486     return;
8487   mysql_mutex_lock(&LOCK_prepare_ordered);
8488   thd2->has_waiter= true;
8489   if (thd2->waiting_on_group_commit)
8490     mysql_bin_log.binlog_trigger_immediate_group_commit();
8491   mysql_mutex_unlock(&LOCK_prepare_ordered);
8492 }
8493 
8494 
8495 /**
8496   Wait until we get a signal that the relay log has been updated.
8497 
8498   @param thd		Thread variable
8499 
8500   @note
8501     One must have a lock on LOCK_log before calling this function.
8502     This lock will be released before return! That's required by
8503     THD::enter_cond() (see NOTES in sql_class.h).
8504 */
8505 
wait_for_update_relay_log(THD * thd)8506 void MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd)
8507 {
8508   PSI_stage_info old_stage;
8509   DBUG_ENTER("wait_for_update_relay_log");
8510 
8511   mysql_mutex_assert_owner(&LOCK_log);
8512   thd->ENTER_COND(&COND_relay_log_updated, &LOCK_log,
8513                   &stage_slave_has_read_all_relay_log,
8514                   &old_stage);
8515   mysql_cond_wait(&COND_relay_log_updated, &LOCK_log);
8516   thd->EXIT_COND(&old_stage);
8517   DBUG_VOID_RETURN;
8518 }
8519 
8520 /**
8521   Wait until we get a signal that the binary log has been updated.
8522   Applies to master only.
8523 
8524   NOTES
8525   @param[in] thd        a THD struct
8526   @param[in] timeout    a pointer to a timespec;
8527                         NULL means to wait w/o timeout.
8528   @retval    0          if got signalled on update
8529   @retval    non-0      if wait timeout elapsed
8530   @note
8531     LOCK_log must be taken before calling this function.
8532     LOCK_log is being released while the thread is waiting.
8533     LOCK_log is released by the caller.
8534 */
8535 
wait_for_update_binlog_end_pos(THD * thd,struct timespec * timeout)8536 int MYSQL_BIN_LOG::wait_for_update_binlog_end_pos(THD* thd,
8537                                                   struct timespec *timeout)
8538 {
8539   int ret= 0;
8540   DBUG_ENTER("wait_for_update_binlog_end_pos");
8541 
8542   thd_wait_begin(thd, THD_WAIT_BINLOG);
8543   mysql_mutex_assert_owner(get_binlog_end_pos_lock());
8544   if (!timeout)
8545     mysql_cond_wait(&COND_bin_log_updated, get_binlog_end_pos_lock());
8546   else
8547     ret= mysql_cond_timedwait(&COND_bin_log_updated, get_binlog_end_pos_lock(),
8548                               timeout);
8549   thd_wait_end(thd);
8550   DBUG_RETURN(ret);
8551 }
8552 
8553 
8554 /**
8555   Close the log file.
8556 
8557   @param exiting     Bitmask for one or more of the following bits:
8558           - LOG_CLOSE_INDEX : if we should close the index file
8559           - LOG_CLOSE_TO_BE_OPENED : if we intend to call open
8560                                      at once after close.
8561           - LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
8562           - LOG_CLOSE_DELAYED_CLOSE : do not yet close the file and clear the
8563                                       LOG_EVENT_BINLOG_IN_USE_F flag
8564 
8565   @note
8566     One can do an open on the object at once after doing a close.
8567     The internal structures are not freed until cleanup() is called
8568 */
8569 
close(uint exiting)8570 void MYSQL_BIN_LOG::close(uint exiting)
8571 {					// One can't set log_type here!
8572   bool failed_to_save_state= false;
8573   DBUG_ENTER("MYSQL_BIN_LOG::close");
8574   DBUG_PRINT("enter",("exiting: %d", (int) exiting));
8575 
8576   mysql_mutex_assert_owner(&LOCK_log);
8577 
8578   if (log_state == LOG_OPENED)
8579   {
8580 #ifdef HAVE_REPLICATION
8581     if (log_type == LOG_BIN &&
8582 	(exiting & LOG_CLOSE_STOP_EVENT))
8583     {
8584       Stop_log_event s;
8585       // the checksumming rule for relay-log case is similar to Rotate
8586         s.checksum_alg= is_relay_log ? relay_log_checksum_alg
8587                                      : (enum_binlog_checksum_alg)binlog_checksum_options;
8588       DBUG_ASSERT(!is_relay_log ||
8589                   relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
8590       write_event(&s);
8591       bytes_written+= s.data_written;
8592       flush_io_cache(&log_file);
8593       update_binlog_end_pos();
8594 
8595       /*
8596         When we shut down server, write out the binlog state to a separate
8597         file so we do not have to scan an entire binlog file to recover it
8598         at next server start.
8599 
8600         Note that this must be written and synced to disk before marking the
8601         last binlog file as "not crashed".
8602       */
8603       if (!is_relay_log && write_state_to_file())
8604       {
8605         sql_print_error("Failed to save binlog GTID state during shutdown. "
8606                         "Binlog will be marked as crashed, so that crash "
8607                         "recovery can recover the state at next server "
8608                         "startup.");
8609         /*
8610           Leave binlog file marked as crashed, so we can recover state by
8611           scanning it now that we failed to write out the state properly.
8612         */
8613         failed_to_save_state= true;
8614       }
8615     }
8616 #endif /* HAVE_REPLICATION */
8617 
8618     /* don't pwrite in a file opened with O_APPEND - it doesn't work */
8619     if (log_file.type == WRITE_CACHE && log_type == LOG_BIN
8620         && !(exiting & LOG_CLOSE_DELAYED_CLOSE))
8621     {
8622       my_off_t org_position= mysql_file_tell(log_file.file, MYF(0));
8623       if (!failed_to_save_state)
8624         clear_inuse_flag_when_closing(log_file.file);
8625       /*
8626         Restore position so that anything we have in the IO_cache is written
8627         to the correct position.
8628         We need the seek here, as mysql_file_pwrite() is not guaranteed to keep the
8629         original position on system that doesn't support pwrite().
8630       */
8631       mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
8632     }
8633 
8634     /* this will cleanup IO_CACHE, sync and close the file */
8635     MYSQL_LOG::close(exiting);
8636   }
8637 
8638   /*
8639     The following test is needed even if is_open() is not set, as we may have
8640     called a not complete close earlier and the index file is still open.
8641   */
8642 
8643   if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
8644   {
8645     end_io_cache(&index_file);
8646     if (unlikely(mysql_file_close(index_file.file, MYF(0)) < 0) &&
8647         ! write_error)
8648     {
8649       write_error= 1;
8650       sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), index_file_name, errno);
8651     }
8652   }
8653   log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
8654   my_free(name);
8655   name= NULL;
8656   DBUG_VOID_RETURN;
8657 }
8658 
8659 
8660 /*
8661   Clear the LOG_EVENT_BINLOG_IN_USE_F; this marks the binlog file as cleanly
8662   closed and not needing crash recovery.
8663 */
clear_inuse_flag_when_closing(File file)8664 void MYSQL_BIN_LOG::clear_inuse_flag_when_closing(File file)
8665 {
8666   my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
8667   uchar flags= 0;            // clearing LOG_EVENT_BINLOG_IN_USE_F
8668   mysql_file_pwrite(file, &flags, 1, offset, MYF(0));
8669 }
8670 
8671 
set_max_size(ulong max_size_arg)8672 void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg)
8673 {
8674   /*
8675     We need to take locks, otherwise this may happen:
8676     new_file() is called, calls open(old_max_size), then before open() starts,
8677     set_max_size() sets max_size to max_size_arg, then open() starts and
8678     uses the old_max_size argument, so max_size_arg has been overwritten and
8679     it's like if the SET command was never run.
8680   */
8681   DBUG_ENTER("MYSQL_BIN_LOG::set_max_size");
8682   mysql_mutex_lock(&LOCK_log);
8683   if (is_open())
8684     max_size= max_size_arg;
8685   mysql_mutex_unlock(&LOCK_log);
8686   DBUG_VOID_RETURN;
8687 }
8688 
8689 
8690 /**
8691   Check if a string is a valid number.
8692 
8693   @param str			String to test
8694   @param res			Store value here
8695   @param allow_wildcards	Set to 1 if we should ignore '%' and '_'
8696 
8697   @note
8698     For the moment the allow_wildcards argument is not used
8699     Should be move to some other file.
8700 
8701   @retval
8702     1	String is a number
8703   @retval
8704     0	String is not a number
8705 */
8706 
test_if_number(const char * str,ulong * res,bool allow_wildcards)8707 static bool test_if_number(const char *str, ulong *res, bool allow_wildcards)
8708 {
8709   int flag;
8710   const char *start;
8711   DBUG_ENTER("test_if_number");
8712 
8713   flag=0; start=str;
8714   while (*str++ == ' ') ;
8715   if (*--str == '-' || *str == '+')
8716     str++;
8717   while (my_isdigit(files_charset_info,*str) ||
8718 	 (allow_wildcards && (*str == wild_many || *str == wild_one)))
8719   {
8720     flag=1;
8721     str++;
8722   }
8723   if (*str == '.')
8724   {
8725     for (str++ ;
8726 	 my_isdigit(files_charset_info,*str) ||
8727 	   (allow_wildcards && (*str == wild_many || *str == wild_one)) ;
8728 	 str++, flag=1) ;
8729   }
8730   if (*str != 0 || flag == 0)
8731     DBUG_RETURN(0);
8732   if (res)
8733     *res=atol(start);
8734   DBUG_RETURN(1);			/* Number ok */
8735 } /* test_if_number */
8736 
8737 
sql_perror(const char * message)8738 void sql_perror(const char *message)
8739 {
8740 #if defined(_WIN32)
8741   char* buf;
8742   DWORD dw= GetLastError();
8743   if (FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |  FORMAT_MESSAGE_FROM_SYSTEM |
8744         FORMAT_MESSAGE_IGNORE_INSERTS,  NULL, dw,
8745         MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&buf, 0, NULL ) > 0)
8746   {
8747     sql_print_error("%s: %s",message, buf);
8748     LocalFree((HLOCAL)buf);
8749   }
8750   else
8751   {
8752     sql_print_error("%s", message);
8753   }
8754 #elif defined(HAVE_STRERROR)
8755   sql_print_error("%s: %s",message, strerror(errno));
8756 #else
8757   perror(message);
8758 #endif
8759 }
8760 
8761 
8762 /*
8763   Change the file associated with two output streams. Used to
8764   redirect stdout and stderr to a file. The streams are reopened
8765   only for appending (writing at end of file).
8766 */
reopen_fstreams(const char * filename,FILE * outstream,FILE * errstream)8767 bool reopen_fstreams(const char *filename, FILE *outstream, FILE *errstream)
8768 {
8769   if ((outstream && !my_freopen(filename, "a", outstream)) ||
8770       (errstream && !my_freopen(filename, "a", errstream)))
8771   {
8772     my_error(ER_CANT_CREATE_FILE, MYF(0), filename, errno);
8773     return TRUE;
8774   }
8775 
8776   /* The error stream must be unbuffered. */
8777   if (errstream)
8778     setbuf(errstream, NULL);
8779 
8780   return FALSE;
8781 }
8782 
8783 
8784 /*
8785   Unfortunately, there seems to be no good way
8786   to restore the original streams upon failure.
8787 */
redirect_std_streams(const char * file)8788 static bool redirect_std_streams(const char *file)
8789 {
8790   if (reopen_fstreams(file, stdout, stderr))
8791     return TRUE;
8792 
8793   setbuf(stderr, NULL);
8794   return FALSE;
8795 }
8796 
8797 
flush_error_log()8798 bool flush_error_log()
8799 {
8800   bool result= 0;
8801   if (opt_error_log)
8802   {
8803     mysql_mutex_lock(&LOCK_error_log);
8804     if (redirect_std_streams(log_error_file))
8805       result= 1;
8806     mysql_mutex_unlock(&LOCK_error_log);
8807   }
8808   return result;
8809 }
8810 
8811 #ifdef _WIN32
8812 struct eventlog_source
8813 {
8814   HANDLE handle;
eventlog_sourceeventlog_source8815   eventlog_source()
8816   {
8817     setup_windows_event_source();
8818     handle = RegisterEventSource(NULL, "MariaDB");
8819   }
8820 
~eventlog_sourceeventlog_source8821   ~eventlog_source()
8822   {
8823     if (handle)
8824       DeregisterEventSource(handle);
8825   }
8826 };
8827 
8828 static eventlog_source eventlog;
8829 
print_buffer_to_nt_eventlog(enum loglevel level,char * buff,size_t length,size_t buffLen)8830 static void print_buffer_to_nt_eventlog(enum loglevel level, char *buff,
8831                                         size_t length, size_t buffLen)
8832 {
8833   HANDLE event= eventlog.handle;
8834   char   *buffptr= buff;
8835   DBUG_ENTER("print_buffer_to_nt_eventlog");
8836 
8837   /* Add ending CR/LF's to string, overwrite last chars if necessary */
8838   strmov(buffptr+MY_MIN(length, buffLen-5), "\r\n\r\n");
8839 
8840   if (event)
8841   {
8842     switch (level) {
8843       case ERROR_LEVEL:
8844         ReportEvent(event, EVENTLOG_ERROR_TYPE, 0, MSG_DEFAULT, NULL, 1, 0,
8845                     (LPCSTR*)&buffptr, NULL);
8846         break;
8847       case WARNING_LEVEL:
8848         ReportEvent(event, EVENTLOG_WARNING_TYPE, 0, MSG_DEFAULT, NULL, 1, 0,
8849                     (LPCSTR*) &buffptr, NULL);
8850         break;
8851       case INFORMATION_LEVEL:
8852         ReportEvent(event, EVENTLOG_INFORMATION_TYPE, 0, MSG_DEFAULT, NULL, 1,
8853                     0, (LPCSTR*) &buffptr, NULL);
8854         break;
8855     }
8856   }
8857 
8858   DBUG_VOID_RETURN;
8859 }
8860 #endif /* _WIN32 */
8861 
8862 
8863 #ifndef EMBEDDED_LIBRARY
print_buffer_to_file(enum loglevel level,const char * buffer,size_t length)8864 static void print_buffer_to_file(enum loglevel level, const char *buffer,
8865                                  size_t length)
8866 {
8867   time_t skr;
8868   struct tm tm_tmp;
8869   struct tm *start;
8870   THD *thd= 0;
8871   size_t tag_length= 0;
8872   char tag[NAME_LEN];
8873   DBUG_ENTER("print_buffer_to_file");
8874   DBUG_PRINT("enter",("buffer: %s", buffer));
8875 
8876   if (mysqld_server_initialized && (thd= current_thd))
8877   {
8878     if (thd->connection_name.length)
8879     {
8880       /*
8881         Add tag for slaves so that the user can see from which connection
8882         the error originates.
8883       */
8884       tag_length= my_snprintf(tag, sizeof(tag),
8885                               ER_THD(thd, ER_MASTER_LOG_PREFIX),
8886                               (int) thd->connection_name.length,
8887                               thd->connection_name.str);
8888     }
8889   }
8890 
8891   mysql_mutex_lock(&LOCK_error_log);
8892 
8893   skr= my_time(0);
8894   localtime_r(&skr, &tm_tmp);
8895   start=&tm_tmp;
8896 
8897   fprintf(stderr, "%d-%02d-%02d %2d:%02d:%02d %lu [%s] %.*s%.*s\n",
8898           start->tm_year + 1900,
8899           start->tm_mon+1,
8900           start->tm_mday,
8901           start->tm_hour,
8902           start->tm_min,
8903           start->tm_sec,
8904           (unsigned long) (thd ? thd->thread_id : 0),
8905           (level == ERROR_LEVEL ? "ERROR" : level == WARNING_LEVEL ?
8906            "Warning" : "Note"),
8907           (int) tag_length, tag,
8908           (int) length, buffer);
8909 
8910   fflush(stderr);
8911 
8912   mysql_mutex_unlock(&LOCK_error_log);
8913   DBUG_VOID_RETURN;
8914 }
8915 
8916 /**
8917   Prints a printf style message to the error log and, under NT, to the
8918   Windows event log.
8919 
8920   This function prints the message into a buffer and then sends that buffer
8921   to other functions to write that message to other logging sources.
8922 
8923   @param level          The level of the msg significance
8924   @param format         Printf style format of message
8925   @param args           va_list list of arguments for the message
8926 
8927   @returns
8928     The function always returns 0. The return value is present in the
8929     signature to be compatible with other logging routines, which could
8930     return an error (e.g. logging to the log tables)
8931 */
vprint_msg_to_log(enum loglevel level,const char * format,va_list args)8932 int vprint_msg_to_log(enum loglevel level, const char *format, va_list args)
8933 {
8934   char   buff[1024];
8935   size_t length;
8936   DBUG_ENTER("vprint_msg_to_log");
8937 
8938   length= my_vsnprintf(buff, sizeof(buff), format, args);
8939   print_buffer_to_file(level, buff, length);
8940 
8941 #ifdef _WIN32
8942   print_buffer_to_nt_eventlog(level, buff, length, sizeof(buff));
8943 #endif
8944 
8945   DBUG_RETURN(0);
8946 }
8947 #endif /* EMBEDDED_LIBRARY */
8948 
8949 
sql_print_error(const char * format,...)8950 void sql_print_error(const char *format, ...)
8951 {
8952   va_list args;
8953   DBUG_ENTER("sql_print_error");
8954 
8955   va_start(args, format);
8956   error_log_print(ERROR_LEVEL, format, args);
8957   va_end(args);
8958 
8959   DBUG_VOID_RETURN;
8960 }
8961 
8962 
sql_print_warning(const char * format,...)8963 void sql_print_warning(const char *format, ...)
8964 {
8965   va_list args;
8966   DBUG_ENTER("sql_print_warning");
8967 
8968   va_start(args, format);
8969   error_log_print(WARNING_LEVEL, format, args);
8970   va_end(args);
8971 
8972   DBUG_VOID_RETURN;
8973 }
8974 
8975 
sql_print_information(const char * format,...)8976 void sql_print_information(const char *format, ...)
8977 {
8978   va_list args;
8979   DBUG_ENTER("sql_print_information");
8980 
8981   va_start(args, format);
8982   sql_print_information_v(format, args);
8983   va_end(args);
8984 
8985   DBUG_VOID_RETURN;
8986 }
8987 
sql_print_information_v(const char * format,va_list ap)8988 void sql_print_information_v(const char *format, va_list ap)
8989 {
8990   if (disable_log_notes)
8991     return;                 // Skip notes during start/shutdown
8992 
8993   error_log_print(INFORMATION_LEVEL, format, ap);
8994 }
8995 
8996 void
run_prepare_ordered(THD * thd,bool all)8997 TC_LOG::run_prepare_ordered(THD *thd, bool all)
8998 {
8999   Ha_trx_info *ha_info=
9000     all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
9001 
9002   mysql_mutex_assert_owner(&LOCK_prepare_ordered);
9003   for (; ha_info; ha_info= ha_info->next())
9004   {
9005     handlerton *ht= ha_info->ht();
9006     if (!ht->prepare_ordered)
9007       continue;
9008     ht->prepare_ordered(ht, thd, all);
9009   }
9010 }
9011 
9012 
9013 void
run_commit_ordered(THD * thd,bool all)9014 TC_LOG::run_commit_ordered(THD *thd, bool all)
9015 {
9016   Ha_trx_info *ha_info=
9017     all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
9018 
9019   mysql_mutex_assert_owner(&LOCK_commit_ordered);
9020   for (; ha_info; ha_info= ha_info->next())
9021   {
9022     handlerton *ht= ha_info->ht();
9023     if (!ht->commit_ordered)
9024       continue;
9025     ht->commit_ordered(ht, thd, all);
9026     DEBUG_SYNC(thd, "commit_after_run_commit_ordered");
9027   }
9028 }
9029 
9030 
log_and_order(THD * thd,my_xid xid,bool all,bool need_prepare_ordered,bool need_commit_ordered)9031 int TC_LOG_MMAP::log_and_order(THD *thd, my_xid xid, bool all,
9032                                bool need_prepare_ordered,
9033                                bool need_commit_ordered)
9034 {
9035   int cookie;
9036   struct commit_entry entry;
9037   bool UNINIT_VAR(is_group_commit_leader);
9038 
9039   if (need_prepare_ordered)
9040   {
9041     mysql_mutex_lock(&LOCK_prepare_ordered);
9042     run_prepare_ordered(thd, all);
9043     if (need_commit_ordered)
9044     {
9045       /*
9046         Must put us in queue so we can run_commit_ordered() in same sequence
9047         as we did run_prepare_ordered().
9048       */
9049       thd->clear_wakeup_ready();
9050       entry.thd= thd;
9051       commit_entry *previous_queue= commit_ordered_queue;
9052       entry.next= previous_queue;
9053       commit_ordered_queue= &entry;
9054       is_group_commit_leader= (previous_queue == NULL);
9055     }
9056     mysql_mutex_unlock(&LOCK_prepare_ordered);
9057   }
9058 
9059   if (thd->wait_for_prior_commit())
9060     return 0;
9061 
9062   cookie= 0;
9063   if (xid)
9064     cookie= log_one_transaction(xid);
9065 
9066   if (need_commit_ordered)
9067   {
9068     if (need_prepare_ordered)
9069     {
9070       /*
9071         We did the run_prepare_ordered() serialised, then ran the log_xid() in
9072         parallel. Now we have to do run_commit_ordered() serialised in the
9073         same sequence as run_prepare_ordered().
9074 
9075         We do this starting from the head of the queue, each thread doing
9076         run_commit_ordered() and signalling the next in queue.
9077       */
9078       if (is_group_commit_leader)
9079       {
9080         /* The first in queue starts the ball rolling. */
9081         mysql_mutex_lock(&LOCK_prepare_ordered);
9082         while (commit_ordered_queue_busy)
9083           mysql_cond_wait(&COND_queue_busy, &LOCK_prepare_ordered);
9084         commit_entry *queue= commit_ordered_queue;
9085         commit_ordered_queue= NULL;
9086         /*
9087           Mark the queue busy while we bounce it from one thread to the
9088           next.
9089         */
9090         commit_ordered_queue_busy= true;
9091         mysql_mutex_unlock(&LOCK_prepare_ordered);
9092 
9093         /* Reverse the queue list so we get correct order. */
9094         commit_entry *prev= NULL;
9095         while (queue)
9096         {
9097           commit_entry *next= queue->next;
9098           queue->next= prev;
9099           prev= queue;
9100           queue= next;
9101         }
9102         DBUG_ASSERT(prev == &entry && prev->thd == thd);
9103       }
9104       else
9105       {
9106         /* Not first in queue; just wait until previous thread wakes us up. */
9107         thd->wait_for_wakeup_ready();
9108       }
9109     }
9110 
9111     /* Only run commit_ordered() if log_xid was successful. */
9112     if (cookie)
9113     {
9114       mysql_mutex_lock(&LOCK_commit_ordered);
9115       run_commit_ordered(thd, all);
9116       mysql_mutex_unlock(&LOCK_commit_ordered);
9117     }
9118 
9119     if (need_prepare_ordered)
9120     {
9121       commit_entry *next= entry.next;
9122       if (next)
9123       {
9124         next->thd->signal_wakeup_ready();
9125       }
9126       else
9127       {
9128         mysql_mutex_lock(&LOCK_prepare_ordered);
9129         commit_ordered_queue_busy= false;
9130         mysql_cond_signal(&COND_queue_busy);
9131         mysql_mutex_unlock(&LOCK_prepare_ordered);
9132       }
9133     }
9134   }
9135 
9136   return cookie;
9137 }
9138 
9139 
9140 /********* transaction coordinator log for 2pc - mmap() based solution *******/
9141 
9142 /*
9143   the log consists of a file, mapped to memory.
9144   file is divided into pages of tc_log_page_size size.
9145   (usable size of the first page is smaller because of the log header)
9146   there is a PAGE control structure for each page
9147   each page (or rather its PAGE control structure) can be in one of
9148   the three states - active, syncing, pool.
9149   there could be only one page in the active or syncing state,
9150   but many in pool - pool is a fifo queue.
9151   the usual lifecycle of a page is pool->active->syncing->pool.
9152   the "active" page is a page where new xid's are logged.
9153   the page stays active as long as the syncing slot is taken.
9154   the "syncing" page is being synced to disk. no new xid can be added to it.
9155   when the syncing is done the page is moved to a pool and an active page
9156   becomes "syncing".
9157 
9158   the result of such an architecture is a natural "commit grouping" -
9159   If commits are coming faster than the system can sync, they do not
9160   stall. Instead, all commits that came since the last sync are
9161   logged to the same "active" page, and they all are synced with the next -
9162   one - sync. Thus, thought individual commits are delayed, throughput
9163   is not decreasing.
9164 
9165   when an xid is added to an active page, the thread of this xid waits
9166   for a page's condition until the page is synced. when syncing slot
9167   becomes vacant one of these waiters is awaken to take care of syncing.
9168   it syncs the page and signals all waiters that the page is synced.
9169   PAGE::waiters is used to count these waiters, and a page may never
9170   become active again until waiters==0 (that is all waiters from the
9171   previous sync have noticed that the sync was completed)
9172 
9173   note, that the page becomes "dirty" and has to be synced only when a
9174   new xid is added into it. Removing a xid from a page does not make it
9175   dirty - we don't sync xid removals to disk.
9176 */
9177 
9178 ulong tc_log_page_waits= 0;
9179 
9180 #ifdef HAVE_MMAP
9181 
9182 #define TC_LOG_HEADER_SIZE (sizeof(tc_log_magic)+1)
9183 
9184 static const uchar tc_log_magic[]={(uchar) 254, 0x23, 0x05, 0x74};
9185 
9186 ulong opt_tc_log_size;
9187 ulong tc_log_max_pages_used=0, tc_log_page_size=0, tc_log_cur_pages_used=0;
9188 
open(const char * opt_name)9189 int TC_LOG_MMAP::open(const char *opt_name)
9190 {
9191   uint i;
9192   bool crashed=FALSE;
9193   PAGE *pg;
9194 
9195   DBUG_ASSERT(total_ha_2pc > 1);
9196   DBUG_ASSERT(opt_name && opt_name[0]);
9197 
9198   tc_log_page_size= my_getpagesize();
9199 
9200   fn_format(logname,opt_name,mysql_data_home,"",MY_UNPACK_FILENAME);
9201   if ((fd= mysql_file_open(key_file_tclog, logname, O_RDWR | O_CLOEXEC, MYF(0))) < 0)
9202   {
9203     if (my_errno != ENOENT)
9204       goto err;
9205     if (using_heuristic_recover())
9206       return 1;
9207     if ((fd= mysql_file_create(key_file_tclog, logname, CREATE_MODE,
9208                                O_RDWR | O_CLOEXEC, MYF(MY_WME))) < 0)
9209       goto err;
9210     inited=1;
9211     file_length= opt_tc_log_size;
9212     if (mysql_file_chsize(fd, file_length, 0, MYF(MY_WME)))
9213       goto err;
9214   }
9215   else
9216   {
9217     inited= 1;
9218     crashed= TRUE;
9219     sql_print_information("Recovering after a crash using %s", opt_name);
9220     if (tc_heuristic_recover)
9221     {
9222       sql_print_error("Cannot perform automatic crash recovery when "
9223                       "--tc-heuristic-recover is used");
9224       goto err;
9225     }
9226     file_length= mysql_file_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME+MY_FAE));
9227     if (file_length == MY_FILEPOS_ERROR || file_length % tc_log_page_size)
9228       goto err;
9229   }
9230 
9231   data= (uchar *)my_mmap(0, (size_t)file_length, PROT_READ|PROT_WRITE,
9232                         MAP_NOSYNC|MAP_SHARED, fd, 0);
9233   if (data == MAP_FAILED)
9234   {
9235     my_errno=errno;
9236     goto err;
9237   }
9238   inited=2;
9239 
9240   npages=(uint)file_length/tc_log_page_size;
9241   if (npages < 3)             // to guarantee non-empty pool
9242     goto err;
9243   if (!(pages=(PAGE *)my_malloc(npages*sizeof(PAGE), MYF(MY_WME|MY_ZEROFILL))))
9244     goto err;
9245   inited=3;
9246   for (pg=pages, i=0; i < npages; i++, pg++)
9247   {
9248     pg->next=pg+1;
9249     pg->waiters=0;
9250     pg->state=PS_POOL;
9251     mysql_mutex_init(key_PAGE_lock, &pg->lock, MY_MUTEX_INIT_FAST);
9252     mysql_cond_init(key_PAGE_cond, &pg->cond, 0);
9253     pg->ptr= pg->start=(my_xid *)(data + i*tc_log_page_size);
9254     pg->size=pg->free=tc_log_page_size/sizeof(my_xid);
9255     pg->end=pg->start + pg->size;
9256   }
9257   pages[0].size=pages[0].free=
9258                 (tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid);
9259   pages[0].start=pages[0].end-pages[0].size;
9260   pages[npages-1].next=0;
9261   inited=4;
9262 
9263   if (crashed && recover())
9264       goto err;
9265 
9266   memcpy(data, tc_log_magic, sizeof(tc_log_magic));
9267   data[sizeof(tc_log_magic)]= (uchar)total_ha_2pc;
9268   my_msync(fd, data, tc_log_page_size, MS_SYNC);
9269   inited=5;
9270 
9271   mysql_mutex_init(key_LOCK_sync, &LOCK_sync, MY_MUTEX_INIT_FAST);
9272   mysql_mutex_init(key_LOCK_active, &LOCK_active, MY_MUTEX_INIT_FAST);
9273   mysql_mutex_init(key_LOCK_pool, &LOCK_pool, MY_MUTEX_INIT_FAST);
9274   mysql_mutex_init(key_LOCK_pending_checkpoint, &LOCK_pending_checkpoint,
9275                    MY_MUTEX_INIT_FAST);
9276   mysql_cond_init(key_COND_active, &COND_active, 0);
9277   mysql_cond_init(key_COND_pool, &COND_pool, 0);
9278   mysql_cond_init(key_TC_LOG_MMAP_COND_queue_busy, &COND_queue_busy, 0);
9279 
9280   inited=6;
9281 
9282   syncing= 0;
9283   active=pages;
9284   DBUG_ASSERT(npages >= 2);
9285   pool=pages+1;
9286   pool_last_ptr= &((pages+npages-1)->next);
9287   commit_ordered_queue= NULL;
9288   commit_ordered_queue_busy= false;
9289 
9290   return 0;
9291 
9292 err:
9293   close();
9294   return 1;
9295 }
9296 
9297 /**
9298   there is no active page, let's got one from the pool.
9299 
9300   Two strategies here:
9301     -# take the first from the pool
9302     -# if there're waiters - take the one with the most free space.
9303 
9304   @todo
9305     page merging. try to allocate adjacent page first,
9306     so that they can be flushed both in one sync
9307 */
9308 
get_active_from_pool()9309 void TC_LOG_MMAP::get_active_from_pool()
9310 {
9311   PAGE **p, **best_p=0;
9312   int best_free;
9313 
9314   mysql_mutex_lock(&LOCK_pool);
9315 
9316   do
9317   {
9318     best_p= p= &pool;
9319     if ((*p)->waiters == 0 && (*p)->free > 0) // can the first page be used ?
9320       break;                                  // yes - take it.
9321 
9322     best_free=0;            // no - trying second strategy
9323     for (p=&(*p)->next; *p; p=&(*p)->next)
9324     {
9325       if ((*p)->waiters == 0 && (*p)->free > best_free)
9326       {
9327         best_free=(*p)->free;
9328         best_p=p;
9329       }
9330     }
9331   }
9332   while ((*best_p == 0 || best_free == 0) && overflow());
9333 
9334   mysql_mutex_assert_owner(&LOCK_active);
9335   active=*best_p;
9336 
9337   /* Unlink the page from the pool. */
9338   if (!(*best_p)->next)
9339     pool_last_ptr= best_p;
9340   *best_p=(*best_p)->next;
9341   mysql_mutex_unlock(&LOCK_pool);
9342 
9343   mysql_mutex_lock(&active->lock);
9344   if (active->free == active->size) // we've chosen an empty page
9345   {
9346     tc_log_cur_pages_used++;
9347     set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used);
9348   }
9349 }
9350 
9351 /**
9352   @todo
9353   perhaps, increase log size ?
9354 */
overflow()9355 int TC_LOG_MMAP::overflow()
9356 {
9357   /*
9358     simple overflow handling - just wait
9359     TODO perhaps, increase log size ?
9360     let's check the behaviour of tc_log_page_waits first
9361   */
9362   tc_log_page_waits++;
9363   mysql_cond_wait(&COND_pool, &LOCK_pool);
9364   return 1; // always return 1
9365 }
9366 
9367 /**
9368   Record that transaction XID is committed on the persistent storage.
9369 
9370     This function is called in the middle of two-phase commit:
9371     First all resources prepare the transaction, then tc_log->log() is called,
9372     then all resources commit the transaction, then tc_log->unlog() is called.
9373 
9374     All access to active page is serialized but it's not a problem, as
9375     we're assuming that fsync() will be a main bottleneck.
9376     That is, parallelizing writes to log pages we'll decrease number of
9377     threads waiting for a page, but then all these threads will be waiting
9378     for a fsync() anyway
9379 
9380    If tc_log == MYSQL_LOG then tc_log writes transaction to binlog and
9381    records XID in a special Xid_log_event.
9382    If tc_log = TC_LOG_MMAP then xid is written in a special memory-mapped
9383    log.
9384 
9385   @retval
9386     0  - error
9387   @retval
9388     \# - otherwise, "cookie", a number that will be passed as an argument
9389     to unlog() call. tc_log can define it any way it wants,
9390     and use for whatever purposes. TC_LOG_MMAP sets it
9391     to the position in memory where xid was logged to.
9392 */
9393 
log_one_transaction(my_xid xid)9394 int TC_LOG_MMAP::log_one_transaction(my_xid xid)
9395 {
9396   int err;
9397   PAGE *p;
9398   ulong cookie;
9399 
9400   mysql_mutex_lock(&LOCK_active);
9401 
9402   /*
9403     if the active page is full - just wait...
9404     frankly speaking, active->free here accessed outside of mutex
9405     protection, but it's safe, because it only means we may miss an
9406     unlog() for the active page, and we're not waiting for it here -
9407     unlog() does not signal COND_active.
9408   */
9409   while (unlikely(active && active->free == 0))
9410     mysql_cond_wait(&COND_active, &LOCK_active);
9411 
9412   /* no active page ? take one from the pool */
9413   if (active == 0)
9414     get_active_from_pool();
9415   else
9416     mysql_mutex_lock(&active->lock);
9417 
9418   p=active;
9419 
9420   /*
9421     p->free is always > 0 here because to decrease it one needs
9422     to take p->lock and before it one needs to take LOCK_active.
9423     But checked that active->free > 0 under LOCK_active and
9424     haven't release it ever since
9425   */
9426 
9427   /* searching for an empty slot */
9428   while (*p->ptr)
9429   {
9430     p->ptr++;
9431     DBUG_ASSERT(p->ptr < p->end);               // because p->free > 0
9432   }
9433 
9434   /* found! store xid there and mark the page dirty */
9435   cookie= (ulong)((uchar *)p->ptr - data);      // can never be zero
9436   *p->ptr++= xid;
9437   p->free--;
9438   p->state= PS_DIRTY;
9439   mysql_mutex_unlock(&p->lock);
9440 
9441   mysql_mutex_lock(&LOCK_sync);
9442   if (syncing)
9443   {                                          // somebody's syncing. let's wait
9444     mysql_mutex_unlock(&LOCK_active);
9445     mysql_mutex_lock(&p->lock);
9446     p->waiters++;
9447     while (p->state == PS_DIRTY && syncing)
9448     {
9449       mysql_mutex_unlock(&p->lock);
9450       mysql_cond_wait(&p->cond, &LOCK_sync);
9451       mysql_mutex_lock(&p->lock);
9452     }
9453     p->waiters--;
9454     err= p->state == PS_ERROR;
9455     if (p->state != PS_DIRTY)                   // page was synced
9456     {
9457       mysql_mutex_unlock(&LOCK_sync);
9458       if (p->waiters == 0)
9459         mysql_cond_signal(&COND_pool);     // in case somebody's waiting
9460       mysql_mutex_unlock(&p->lock);
9461       goto done;                             // we're done
9462     }
9463     DBUG_ASSERT(!syncing);
9464     mysql_mutex_unlock(&p->lock);
9465     syncing = p;
9466     mysql_mutex_unlock(&LOCK_sync);
9467 
9468     mysql_mutex_lock(&LOCK_active);
9469     active=0;                                  // page is not active anymore
9470     mysql_cond_broadcast(&COND_active);
9471     mysql_mutex_unlock(&LOCK_active);
9472   }
9473   else
9474   {
9475     syncing = p;                               // place is vacant - take it
9476     mysql_mutex_unlock(&LOCK_sync);
9477     active = 0;                                // page is not active anymore
9478     mysql_cond_broadcast(&COND_active);
9479     mysql_mutex_unlock(&LOCK_active);
9480   }
9481   err= sync();
9482 
9483 done:
9484   return err ? 0 : cookie;
9485 }
9486 
sync()9487 int TC_LOG_MMAP::sync()
9488 {
9489   int err;
9490 
9491   DBUG_ASSERT(syncing != active);
9492 
9493   /*
9494     sit down and relax - this can take a while...
9495     note - no locks are held at this point
9496   */
9497   err= my_msync(fd, syncing->start, syncing->size * sizeof(my_xid), MS_SYNC);
9498 
9499   /* page is synced. let's move it to the pool */
9500   mysql_mutex_lock(&LOCK_pool);
9501   (*pool_last_ptr)=syncing;
9502   pool_last_ptr=&(syncing->next);
9503   syncing->next=0;
9504   syncing->state= err ? PS_ERROR : PS_POOL;
9505   mysql_cond_signal(&COND_pool);           // in case somebody's waiting
9506   mysql_mutex_unlock(&LOCK_pool);
9507 
9508   /* marking 'syncing' slot free */
9509   mysql_mutex_lock(&LOCK_sync);
9510   mysql_cond_broadcast(&syncing->cond);    // signal "sync done"
9511   syncing=0;
9512   /*
9513     we check the "active" pointer without LOCK_active. Still, it's safe -
9514     "active" can change from NULL to not NULL any time, but it
9515     will take LOCK_sync before waiting on active->cond. That is, it can never
9516     miss a signal.
9517     And "active" can change to NULL only by the syncing thread
9518     (the thread that will send a signal below)
9519   */
9520   if (active)
9521     mysql_cond_signal(&active->cond);      // wake up a new syncer
9522   mysql_mutex_unlock(&LOCK_sync);
9523   return err;
9524 }
9525 
9526 static void
mmap_do_checkpoint_callback(void * data)9527 mmap_do_checkpoint_callback(void *data)
9528 {
9529   TC_LOG_MMAP::pending_cookies *pending=
9530     static_cast<TC_LOG_MMAP::pending_cookies *>(data);
9531   ++pending->pending_count;
9532 }
9533 
unlog(ulong cookie,my_xid xid)9534 int TC_LOG_MMAP::unlog(ulong cookie, my_xid xid)
9535 {
9536   pending_cookies *full_buffer= NULL;
9537   uint32 ncookies= tc_log_page_size / sizeof(my_xid);
9538   DBUG_ASSERT(*(my_xid *)(data+cookie) == xid);
9539 
9540   /*
9541     Do not delete the entry immediately, as there may be participating storage
9542     engines which implement commit_checkpoint_request(), and thus have not yet
9543     flushed the commit durably to disk.
9544 
9545     Instead put it in a queue - and periodically, we will request a checkpoint
9546     from all engines and delete a whole batch at once.
9547   */
9548   mysql_mutex_lock(&LOCK_pending_checkpoint);
9549   if (pending_checkpoint == NULL)
9550   {
9551     uint32 size= sizeof(*pending_checkpoint) + sizeof(ulong) * (ncookies - 1);
9552     if (!(pending_checkpoint=
9553           (pending_cookies *)my_malloc(size, MYF(MY_ZEROFILL))))
9554     {
9555       my_error(ER_OUTOFMEMORY, MYF(0), size);
9556       mysql_mutex_unlock(&LOCK_pending_checkpoint);
9557       return 1;
9558     }
9559   }
9560 
9561   pending_checkpoint->cookies[pending_checkpoint->count++]= cookie;
9562   if (pending_checkpoint->count == ncookies)
9563   {
9564     full_buffer= pending_checkpoint;
9565     pending_checkpoint= NULL;
9566   }
9567   mysql_mutex_unlock(&LOCK_pending_checkpoint);
9568 
9569   if (full_buffer)
9570   {
9571     /*
9572       We do an extra increment and notify here - this ensures that
9573       things work also if there are no engines at all that support
9574       commit_checkpoint_request.
9575     */
9576     ++full_buffer->pending_count;
9577     ha_commit_checkpoint_request(full_buffer, mmap_do_checkpoint_callback);
9578     commit_checkpoint_notify(full_buffer);
9579   }
9580   return 0;
9581 }
9582 
9583 
9584 void
commit_checkpoint_notify(void * cookie)9585 TC_LOG_MMAP::commit_checkpoint_notify(void *cookie)
9586 {
9587   uint count;
9588   pending_cookies *pending= static_cast<pending_cookies *>(cookie);
9589   mysql_mutex_lock(&LOCK_pending_checkpoint);
9590   DBUG_ASSERT(pending->pending_count > 0);
9591   count= --pending->pending_count;
9592   mysql_mutex_unlock(&LOCK_pending_checkpoint);
9593   if (count == 0)
9594   {
9595     uint i;
9596     for (i= 0; i < tc_log_page_size / sizeof(my_xid); ++i)
9597       delete_entry(pending->cookies[i]);
9598     my_free(pending);
9599   }
9600 }
9601 
9602 
9603 /**
9604   erase xid from the page, update page free space counters/pointers.
9605   cookie points directly to the memory where xid was logged.
9606 */
9607 
delete_entry(ulong cookie)9608 int TC_LOG_MMAP::delete_entry(ulong cookie)
9609 {
9610   PAGE *p=pages+(cookie/tc_log_page_size);
9611   my_xid *x=(my_xid *)(data+cookie);
9612 
9613   DBUG_ASSERT(x >= p->start && x < p->end);
9614 
9615   mysql_mutex_lock(&p->lock);
9616   *x=0;
9617   p->free++;
9618   DBUG_ASSERT(p->free <= p->size);
9619   set_if_smaller(p->ptr, x);
9620   if (p->free == p->size)              // the page is completely empty
9621     statistic_decrement(tc_log_cur_pages_used, &LOCK_status);
9622   if (p->waiters == 0)                 // the page is in pool and ready to rock
9623     mysql_cond_signal(&COND_pool);     // ping ... for overflow()
9624   mysql_mutex_unlock(&p->lock);
9625   return 0;
9626 }
9627 
close()9628 void TC_LOG_MMAP::close()
9629 {
9630   uint i;
9631   switch (inited) {
9632   case 6:
9633     mysql_mutex_destroy(&LOCK_sync);
9634     mysql_mutex_destroy(&LOCK_active);
9635     mysql_mutex_destroy(&LOCK_pool);
9636     mysql_mutex_destroy(&LOCK_pending_checkpoint);
9637     mysql_cond_destroy(&COND_pool);
9638     mysql_cond_destroy(&COND_active);
9639     mysql_cond_destroy(&COND_queue_busy);
9640     /* fall through */
9641   case 5:
9642     data[0]='A'; // garble the first (signature) byte, in case mysql_file_delete fails
9643     /* fall through */
9644   case 4:
9645     for (i=0; i < npages; i++)
9646     {
9647       if (pages[i].ptr == 0)
9648         break;
9649       mysql_mutex_destroy(&pages[i].lock);
9650       mysql_cond_destroy(&pages[i].cond);
9651     }
9652     /* fall through */
9653   case 3:
9654     my_free(pages);
9655     /* fall through */
9656   case 2:
9657     my_munmap((char*)data, (size_t)file_length);
9658     /* fall through */
9659   case 1:
9660     mysql_file_close(fd, MYF(0));
9661   }
9662   if (inited>=5) // cannot do in the switch because of Windows
9663     mysql_file_delete(key_file_tclog, logname, MYF(MY_WME));
9664   if (pending_checkpoint)
9665     my_free(pending_checkpoint);
9666   inited=0;
9667 }
9668 
9669 
recover()9670 int TC_LOG_MMAP::recover()
9671 {
9672   HASH xids;
9673   PAGE *p=pages, *end_p=pages+npages;
9674 
9675   if (bcmp(data, tc_log_magic, sizeof(tc_log_magic)))
9676   {
9677     sql_print_error("Bad magic header in tc log");
9678     goto err1;
9679   }
9680 
9681   /*
9682     the first byte after magic signature is set to current
9683     number of storage engines on startup
9684   */
9685   if (data[sizeof(tc_log_magic)] > total_ha_2pc)
9686   {
9687     sql_print_error("Recovery failed! You must enable "
9688                     "all engines that were enabled at the moment of the crash");
9689     goto err1;
9690   }
9691 
9692   if (my_hash_init(&xids, &my_charset_bin, tc_log_page_size/3, 0,
9693                    sizeof(my_xid), 0, 0, MYF(0)))
9694     goto err1;
9695 
9696   for ( ; p < end_p ; p++)
9697   {
9698     for (my_xid *x=p->start; x < p->end; x++)
9699       if (*x && my_hash_insert(&xids, (uchar *)x))
9700         goto err2; // OOM
9701   }
9702 
9703   if (ha_recover(&xids))
9704     goto err2;
9705 
9706   my_hash_free(&xids);
9707   bzero(data, (size_t)file_length);
9708   return 0;
9709 
9710 err2:
9711   my_hash_free(&xids);
9712 err1:
9713   sql_print_error("Crash recovery failed. Either correct the problem "
9714                   "(if it's, for example, out of memory error) and restart, "
9715                   "or delete tc log and start mysqld with "
9716                   "--tc-heuristic-recover={commit|rollback}");
9717   return 1;
9718 }
9719 #endif
9720 
9721 TC_LOG *tc_log;
9722 TC_LOG_DUMMY tc_log_dummy;
9723 TC_LOG_MMAP  tc_log_mmap;
9724 
9725 /**
9726   Perform heuristic recovery, if --tc-heuristic-recover was used.
9727 
9728   @note
9729     no matter whether heuristic recovery was successful or not
9730     mysqld must exit. So, return value is the same in both cases.
9731 
9732   @retval
9733     0	no heuristic recovery was requested
9734   @retval
9735     1   heuristic recovery was performed
9736 */
9737 
using_heuristic_recover()9738 int TC_LOG::using_heuristic_recover()
9739 {
9740   if (!tc_heuristic_recover)
9741     return 0;
9742 
9743   sql_print_information("Heuristic crash recovery mode");
9744   if (ha_recover(0))
9745     sql_print_error("Heuristic crash recovery failed");
9746   sql_print_information("Please restart mysqld without --tc-heuristic-recover");
9747   return 1;
9748 }
9749 
9750 /****** transaction coordinator log for 2pc - binlog() based solution ******/
9751 #define TC_LOG_BINLOG MYSQL_BIN_LOG
9752 
open(const char * opt_name)9753 int TC_LOG_BINLOG::open(const char *opt_name)
9754 {
9755   int      error= 1;
9756 
9757   DBUG_ASSERT(total_ha_2pc > 1);
9758   DBUG_ASSERT(opt_name && opt_name[0]);
9759 
9760   if (!my_b_inited(&index_file))
9761   {
9762     /* There was a failure to open the index file, can't open the binlog */
9763     cleanup();
9764     return 1;
9765   }
9766 
9767   if (using_heuristic_recover())
9768   {
9769     mysql_mutex_lock(&LOCK_log);
9770     /* generate a new binlog to mask a corrupted one */
9771     open(opt_name, LOG_BIN, 0, 0, WRITE_CACHE, max_binlog_size, 0, TRUE);
9772     mysql_mutex_unlock(&LOCK_log);
9773     cleanup();
9774     return 1;
9775   }
9776 
9777   error= do_binlog_recovery(opt_name, true);
9778   binlog_state_recover_done= true;
9779   return error;
9780 }
9781 
9782 /** This is called on shutdown, after ha_panic. */
close()9783 void TC_LOG_BINLOG::close()
9784 {
9785 }
9786 
9787 /*
9788   Do a binlog log_xid() for a group of transactions, linked through
9789   thd->next_commit_ordered.
9790 */
9791 int
log_and_order(THD * thd,my_xid xid,bool all,bool need_prepare_ordered,bool need_commit_ordered)9792 TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all,
9793                              bool need_prepare_ordered __attribute__((unused)),
9794                              bool need_commit_ordered __attribute__((unused)))
9795 {
9796   int err;
9797   DBUG_ENTER("TC_LOG_BINLOG::log_and_order");
9798 
9799   binlog_cache_mngr *cache_mngr= thd->binlog_setup_trx_data();
9800   if (!cache_mngr)
9801   {
9802     WSREP_DEBUG("Skipping empty log_xid: %s", thd->query());
9803     DBUG_RETURN(0);
9804   }
9805 
9806   cache_mngr->using_xa= TRUE;
9807   cache_mngr->xa_xid= xid;
9808   err= binlog_commit_flush_xid_caches(thd, cache_mngr, all, xid);
9809 
9810   DEBUG_SYNC(thd, "binlog_after_log_and_order");
9811 
9812   if (err)
9813     DBUG_RETURN(0);
9814 
9815   bool need_unlog= cache_mngr->need_unlog;
9816   /*
9817     The transaction won't need the flag anymore.
9818     Todo/fixme: consider to move the statement into cache_mngr->reset()
9819                 relocated to the current or later point.
9820   */
9821   cache_mngr->need_unlog= false;
9822   /*
9823     If using explicit user XA, we will not have XID. We must still return a
9824     non-zero cookie (as zero cookie signals error).
9825   */
9826   if (!xid || !need_unlog)
9827     DBUG_RETURN(BINLOG_COOKIE_DUMMY(cache_mngr->delayed_error));
9828 
9829   DBUG_RETURN(BINLOG_COOKIE_MAKE(cache_mngr->binlog_id,
9830                                  cache_mngr->delayed_error));
9831 }
9832 
9833 /*
9834   After an XID is logged, we need to hold on to the current binlog file until
9835   it is fully committed in the storage engine. The reason is that crash
9836   recovery only looks at the latest binlog, so we must make sure there are no
9837   outstanding prepared (but not committed) transactions before rotating the
9838   binlog.
9839 
9840   To handle this, we keep a count of outstanding XIDs. This function is used
9841   to increase this count when committing one or more transactions to the
9842   binary log.
9843 */
9844 void
mark_xids_active(ulong binlog_id,uint xid_count)9845 TC_LOG_BINLOG::mark_xids_active(ulong binlog_id, uint xid_count)
9846 {
9847   xid_count_per_binlog *b;
9848 
9849   DBUG_ENTER("TC_LOG_BINLOG::mark_xids_active");
9850   DBUG_PRINT("info", ("binlog_id=%lu xid_count=%u", binlog_id, xid_count));
9851 
9852   mysql_mutex_lock(&LOCK_xid_list);
9853   I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
9854   while ((b= it++))
9855   {
9856     if (b->binlog_id == binlog_id)
9857     {
9858       b->xid_count += xid_count;
9859       break;
9860     }
9861   }
9862   /*
9863     As we do not delete elements until count reach zero, elements should always
9864     be found.
9865   */
9866   DBUG_ASSERT(b);
9867   mysql_mutex_unlock(&LOCK_xid_list);
9868   DBUG_VOID_RETURN;
9869 }
9870 
9871 /*
9872   Once an XID is committed, it can no longer be needed during crash recovery,
9873   as it has been durably recorded on disk as "committed".
9874 
9875   This function is called to mark an XID this way. It needs to decrease the
9876   count of pending XIDs in the corresponding binlog. When the count reaches
9877   zero (for an "old" binlog that is not the active one), that binlog file no
9878   longer need to be scanned during crash recovery, so we can log a new binlog
9879   checkpoint.
9880 */
9881 void
mark_xid_done(ulong binlog_id,bool write_checkpoint)9882 TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint)
9883 {
9884   xid_count_per_binlog *b;
9885   bool first;
9886   ulong current;
9887 
9888   DBUG_ENTER("TC_LOG_BINLOG::mark_xid_done");
9889 
9890   mysql_mutex_lock(&LOCK_xid_list);
9891   current= current_binlog_id;
9892   I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
9893   first= true;
9894   while ((b= it++))
9895   {
9896     if (b->binlog_id == binlog_id)
9897     {
9898       --b->xid_count;
9899 
9900       DBUG_ASSERT(b->xid_count >= 0); // catch unmatched (++) decrement
9901 
9902       break;
9903     }
9904     first= false;
9905   }
9906   /* Binlog is always found, as we do not remove until count reaches 0 */
9907   DBUG_ASSERT(b);
9908   /*
9909     If a RESET MASTER is pending, we are about to remove all log files, and
9910     the RESET MASTER thread is waiting for all pending unlog() calls to
9911     complete while holding LOCK_log. In this case we should not log a binlog
9912     checkpoint event (it would be deleted immediately anyway and we would
9913     deadlock on LOCK_log) but just signal the thread.
9914   */
9915   if (unlikely(reset_master_pending))
9916   {
9917     mysql_cond_broadcast(&COND_xid_list);
9918     mysql_mutex_unlock(&LOCK_xid_list);
9919     DBUG_VOID_RETURN;
9920   }
9921 
9922   if (likely(binlog_id == current) || b->xid_count != 0 || !first ||
9923       !write_checkpoint)
9924   {
9925     /* No new binlog checkpoint reached yet. */
9926     mysql_mutex_unlock(&LOCK_xid_list);
9927     DBUG_VOID_RETURN;
9928   }
9929 
9930   /*
9931     Now log a binlog checkpoint for the first binlog file with a non-zero count.
9932 
9933     Note that it is possible (though perhaps unlikely) that when count of
9934     binlog (N-2) drops to zero, binlog (N-1) is already at zero. So we may
9935     need to skip several entries before we find the one to log in the binlog
9936     checkpoint event.
9937 
9938     We chain the locking of LOCK_xid_list and LOCK_log, so that we ensure that
9939     Binlog_checkpoint_events are logged in order. This simplifies recovery a
9940     bit, as it can just take the last binlog checkpoint in the log, rather
9941     than compare all found against each other to find the one pointing to the
9942     most recent binlog.
9943 
9944     Note also that we need to first release LOCK_xid_list, then acquire
9945     LOCK_log, then re-aquire LOCK_xid_list. If we were to take LOCK_log while
9946     holding LOCK_xid_list, we might deadlock with other threads that take the
9947     locks in the opposite order.
9948   */
9949 
9950   ++mark_xid_done_waiting;
9951   mysql_mutex_unlock(&LOCK_xid_list);
9952   mysql_mutex_lock(&LOCK_log);
9953   mysql_mutex_lock(&LOCK_xid_list);
9954   --mark_xid_done_waiting;
9955   mysql_cond_broadcast(&COND_xid_list);
9956   /* We need to reload current_binlog_id due to release/re-take of lock. */
9957   current= current_binlog_id;
9958 
9959   for (;;)
9960   {
9961     /* Remove initial element(s) with zero count. */
9962     b= binlog_xid_count_list.head();
9963     /*
9964       We must not remove all elements in the list - the entry for the current
9965       binlog must be present always.
9966     */
9967     DBUG_ASSERT(b);
9968     if (b->binlog_id == current || b->xid_count > 0)
9969       break;
9970     WSREP_XID_LIST_ENTRY("TC_LOG_BINLOG::mark_xid_done(): Removing "
9971                          "xid_list_entry for %s (%lu)", b);
9972     delete binlog_xid_count_list.get();
9973   }
9974 
9975   mysql_mutex_unlock(&LOCK_xid_list);
9976   write_binlog_checkpoint_event_already_locked(b->binlog_name,
9977                                                b->binlog_name_len);
9978   mysql_mutex_unlock(&LOCK_log);
9979   DBUG_VOID_RETURN;
9980 }
9981 
unlog(ulong cookie,my_xid xid)9982 int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
9983 {
9984   DBUG_ENTER("TC_LOG_BINLOG::unlog");
9985   if (!xid)
9986     DBUG_RETURN(0);
9987 
9988   if (!BINLOG_COOKIE_IS_DUMMY(cookie))
9989     mark_xid_done(BINLOG_COOKIE_GET_ID(cookie), true);
9990   /*
9991     See comment in trx_group_commit_leader() - if rotate() gave a failure,
9992     we delay the return of error code to here.
9993   */
9994   DBUG_RETURN(BINLOG_COOKIE_GET_ERROR_FLAG(cookie));
9995 }
9996 
9997 void
commit_checkpoint_notify(void * cookie)9998 TC_LOG_BINLOG::commit_checkpoint_notify(void *cookie)
9999 {
10000   xid_count_per_binlog *entry= static_cast<xid_count_per_binlog *>(cookie);
10001   bool found_entry= false;
10002   mysql_mutex_lock(&LOCK_binlog_background_thread);
10003   /* count the same notification kind from different engines */
10004   for (xid_count_per_binlog *link= binlog_background_thread_queue;
10005        link && !found_entry; link= link->next_in_queue)
10006   {
10007     if ((found_entry= (entry == link)))
10008       entry->notify_count++;
10009   }
10010   if (!found_entry)
10011   {
10012     entry->next_in_queue= binlog_background_thread_queue;
10013     binlog_background_thread_queue= entry;
10014   }
10015   mysql_cond_signal(&COND_binlog_background_thread);
10016   mysql_mutex_unlock(&LOCK_binlog_background_thread);
10017 }
10018 
10019 /*
10020   Binlog background thread.
10021 
10022   This thread is used to log binlog checkpoints in the background, rather than
10023   in the context of random storage engine threads that happen to call
10024   commit_checkpoint_notify_ha() and may not like the delays while syncing
10025   binlog to disk or may not be setup with all my_thread_init() and other
10026   necessary stuff.
10027 
10028   In the future, this thread could also be used to do log rotation in the
10029   background, which could eliminate all stalls around binlog rotations.
10030 */
10031 pthread_handler_t
binlog_background_thread(void * arg)10032 binlog_background_thread(void *arg __attribute__((unused)))
10033 {
10034   bool stop;
10035   MYSQL_BIN_LOG::xid_count_per_binlog *queue, *next;
10036   THD *thd;
10037   my_thread_init();
10038   DBUG_ENTER("binlog_background_thread");
10039 
10040   thd= new THD(next_thread_id());
10041   thd->system_thread= SYSTEM_THREAD_BINLOG_BACKGROUND;
10042   thd->thread_stack= (char*) &thd;           /* Set approximate stack start */
10043   thd->store_globals();
10044   thd->security_ctx->skip_grants();
10045   thd->set_command(COM_DAEMON);
10046 
10047   /*
10048     Load the slave replication GTID state from the mysql.gtid_slave_pos
10049     table.
10050 
10051     This is mostly so that we can start our seq_no counter from the highest
10052     seq_no seen by a slave. This way, we have a way to tell if a transaction
10053     logged by ourselves as master is newer or older than a replicated
10054     transaction.
10055   */
10056 #ifdef HAVE_REPLICATION
10057   if (rpl_load_gtid_slave_state(thd))
10058     sql_print_warning("Failed to load slave replication state from table "
10059                       "%s.%s: %u: %s", "mysql",
10060                       rpl_gtid_slave_state_table_name.str,
10061                       thd->get_stmt_da()->sql_errno(),
10062                       thd->get_stmt_da()->message());
10063 #endif
10064 
10065   mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
10066   binlog_background_thread_started= true;
10067   mysql_cond_signal(&mysql_bin_log.COND_binlog_background_thread_end);
10068   mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
10069 
10070   for (;;)
10071   {
10072     /*
10073       Wait until there is something in the queue to process, or we are asked
10074       to shut down.
10075     */
10076     THD_STAGE_INFO(thd, stage_binlog_waiting_background_tasks);
10077     mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
10078     for (;;)
10079     {
10080       stop= binlog_background_thread_stop;
10081       queue= binlog_background_thread_queue;
10082       if (stop && !mysql_bin_log.is_xidlist_idle())
10083       {
10084         /*
10085           Delay stop until all pending binlog checkpoints have been processed.
10086         */
10087         stop= false;
10088       }
10089       if (stop || queue)
10090         break;
10091       mysql_cond_wait(&mysql_bin_log.COND_binlog_background_thread,
10092                       &mysql_bin_log.LOCK_binlog_background_thread);
10093     }
10094     /* Grab the queue, if any. */
10095     binlog_background_thread_queue= NULL;
10096     mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
10097 
10098     /* Process any incoming commit_checkpoint_notify() calls. */
10099     DBUG_EXECUTE_IF("inject_binlog_background_thread_before_mark_xid_done",
10100       DBUG_ASSERT(!debug_sync_set_action(
10101         thd,
10102         STRING_WITH_LEN("binlog_background_thread_before_mark_xid_done "
10103                         "SIGNAL injected_binlog_background_thread "
10104                         "WAIT_FOR something_that_will_never_happen "
10105                         "TIMEOUT 2")));
10106       );
10107     while (queue)
10108     {
10109       long count= queue->notify_count;
10110       THD_STAGE_INFO(thd, stage_binlog_processing_checkpoint_notify);
10111       DEBUG_SYNC(thd, "binlog_background_thread_before_mark_xid_done");
10112       /* Set the thread start time */
10113       thd->set_time();
10114       /* Grab next pointer first, as mark_xid_done() may free the element. */
10115       next= queue->next_in_queue;
10116       queue->notify_count= 0;
10117       for (long i= 0; i <= count; i++)
10118         mysql_bin_log.mark_xid_done(queue->binlog_id, true);
10119       queue= next;
10120 
10121       DBUG_EXECUTE_IF("binlog_background_checkpoint_processed",
10122         DBUG_ASSERT(!debug_sync_set_action(
10123           thd,
10124           STRING_WITH_LEN("now SIGNAL binlog_background_checkpoint_processed")));
10125         );
10126     }
10127 
10128     if (stop)
10129       break;
10130   }
10131 
10132   THD_STAGE_INFO(thd, stage_binlog_stopping_background_thread);
10133 
10134   /* No need to use mutex as thd is not linked into other threads */
10135   delete thd;
10136 
10137   my_thread_end();
10138 
10139   /* Signal that we are (almost) stopped. */
10140   mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
10141   binlog_background_thread_stop= false;
10142   mysql_cond_signal(&mysql_bin_log.COND_binlog_background_thread_end);
10143   mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
10144 
10145   DBUG_RETURN(0);
10146 }
10147 
10148 #ifdef HAVE_PSI_INTERFACE
10149 static PSI_thread_key key_thread_binlog;
10150 
10151 static PSI_thread_info all_binlog_threads[]=
10152 {
10153   { &key_thread_binlog, "binlog_background", PSI_FLAG_GLOBAL},
10154 };
10155 #endif /* HAVE_PSI_INTERFACE */
10156 
10157 static bool
start_binlog_background_thread()10158 start_binlog_background_thread()
10159 {
10160   pthread_t th;
10161 
10162 #ifdef HAVE_PSI_INTERFACE
10163   if (PSI_server)
10164     PSI_server->register_thread("sql", all_binlog_threads,
10165                                 array_elements(all_binlog_threads));
10166 #endif
10167 
10168   if (mysql_thread_create(key_thread_binlog, &th, &connection_attrib,
10169                           binlog_background_thread, NULL))
10170     return 1;
10171 
10172   /*
10173     Wait for the thread to have started (so we know that the slave replication
10174     state is loaded and we have correct global_gtid_counter).
10175   */
10176   mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
10177   while (!binlog_background_thread_started)
10178     mysql_cond_wait(&mysql_bin_log.COND_binlog_background_thread_end,
10179                     &mysql_bin_log.LOCK_binlog_background_thread);
10180   mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
10181 
10182   return 0;
10183 }
10184 
10185 
recover(LOG_INFO * linfo,const char * last_log_name,IO_CACHE * first_log,Format_description_log_event * fdle,bool do_xa)10186 int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
10187                            IO_CACHE *first_log,
10188                            Format_description_log_event *fdle, bool do_xa)
10189 {
10190   Log_event *ev= NULL;
10191   HASH xids;
10192   MEM_ROOT mem_root;
10193   char binlog_checkpoint_name[FN_REFLEN];
10194   bool binlog_checkpoint_found;
10195   bool first_round;
10196   IO_CACHE log;
10197   File file= -1;
10198   const char *errmsg;
10199 #ifdef HAVE_REPLICATION
10200   rpl_gtid last_gtid;
10201   bool last_gtid_standalone= false;
10202   bool last_gtid_valid= false;
10203 #endif
10204 
10205   if (! fdle->is_valid() ||
10206       (do_xa && my_hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
10207                              sizeof(my_xid), 0, 0, MYF(0))))
10208     goto err1;
10209 
10210   if (do_xa)
10211     init_alloc_root(&mem_root, "TC_LOG_BINLOG", TC_LOG_PAGE_SIZE,
10212                     TC_LOG_PAGE_SIZE, MYF(0));
10213 
10214   fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
10215 
10216   /*
10217     Scan the binlog for XIDs that need to be committed if still in the
10218     prepared stage.
10219 
10220     Start with the latest binlog file, then continue with any other binlog
10221     files if the last found binlog checkpoint indicates it is needed.
10222   */
10223 
10224   binlog_checkpoint_found= false;
10225   first_round= true;
10226   for (;;)
10227   {
10228     while ((ev= Log_event::read_log_event(first_round ? first_log : &log,
10229                                           fdle, opt_master_verify_checksum))
10230            && ev->is_valid())
10231     {
10232       enum Log_event_type typ= ev->get_type_code();
10233       switch (typ)
10234       {
10235       case XID_EVENT:
10236       {
10237         if (do_xa)
10238         {
10239           Xid_log_event *xev=(Xid_log_event *)ev;
10240           uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
10241                                           sizeof(xev->xid));
10242           if (!x || my_hash_insert(&xids, x))
10243             goto err2;
10244         }
10245         break;
10246       }
10247       case BINLOG_CHECKPOINT_EVENT:
10248         if (first_round && do_xa)
10249         {
10250           size_t dir_len;
10251           Binlog_checkpoint_log_event *cev= (Binlog_checkpoint_log_event *)ev;
10252           if (cev->binlog_file_len >= FN_REFLEN)
10253             sql_print_warning("Incorrect binlog checkpoint event with too "
10254                               "long file name found.");
10255           else
10256           {
10257             /*
10258               Note that we cannot use make_log_name() here, as we have not yet
10259               initialised MYSQL_BIN_LOG::log_file_name.
10260             */
10261             dir_len= dirname_length(last_log_name);
10262             strmake(strnmov(binlog_checkpoint_name, last_log_name, dir_len),
10263                     cev->binlog_file_name, FN_REFLEN - 1 - dir_len);
10264             binlog_checkpoint_found= true;
10265           }
10266         }
10267         break;
10268       case GTID_LIST_EVENT:
10269         if (first_round)
10270         {
10271           Gtid_list_log_event *glev= (Gtid_list_log_event *)ev;
10272 
10273           /* Initialise the binlog state from the Gtid_list event. */
10274           if (rpl_global_gtid_binlog_state.load(glev->list, glev->count))
10275             goto err2;
10276         }
10277         break;
10278 
10279 #ifdef HAVE_REPLICATION
10280       case GTID_EVENT:
10281         if (first_round)
10282         {
10283           Gtid_log_event *gev= (Gtid_log_event *)ev;
10284 
10285           /* Update the binlog state with any GTID logged after Gtid_list. */
10286           last_gtid.domain_id= gev->domain_id;
10287           last_gtid.server_id= gev->server_id;
10288           last_gtid.seq_no= gev->seq_no;
10289           last_gtid_standalone=
10290             ((gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false);
10291           last_gtid_valid= true;
10292         }
10293         break;
10294 #endif
10295 
10296       case START_ENCRYPTION_EVENT:
10297         {
10298           if (fdle->start_decryption((Start_encryption_log_event*) ev))
10299             goto err2;
10300         }
10301         break;
10302 
10303       default:
10304         /* Nothing. */
10305         break;
10306       }
10307 
10308 #ifdef HAVE_REPLICATION
10309       if (last_gtid_valid &&
10310           ((last_gtid_standalone && !ev->is_part_of_group(typ)) ||
10311            (!last_gtid_standalone &&
10312             (typ == XID_EVENT ||
10313              (LOG_EVENT_IS_QUERY(typ) &&
10314               (((Query_log_event *)ev)->is_commit() ||
10315                ((Query_log_event *)ev)->is_rollback()))))))
10316       {
10317         if (rpl_global_gtid_binlog_state.update_nolock(&last_gtid, false))
10318           goto err2;
10319         last_gtid_valid= false;
10320       }
10321 #endif
10322 
10323       delete ev;
10324       ev= NULL;
10325     }
10326 
10327     if (!do_xa)
10328       break;
10329     /*
10330       If the last binlog checkpoint event points to an older log, we have to
10331       scan all logs from there also, to get all possible XIDs to recover.
10332 
10333       If there was no binlog checkpoint event at all, this means the log was
10334       written by an older version of MariaDB (or MySQL) - these always have an
10335       (implicit) binlog checkpoint event at the start of the last binlog file.
10336     */
10337     if (first_round)
10338     {
10339       if (!binlog_checkpoint_found)
10340         break;
10341       first_round= false;
10342       DBUG_EXECUTE_IF("xa_recover_expect_master_bin_000004",
10343           if (0 != strcmp("./master-bin.000004", binlog_checkpoint_name) &&
10344               0 != strcmp(".\\master-bin.000004", binlog_checkpoint_name))
10345             DBUG_SUICIDE();
10346         );
10347       if (find_log_pos(linfo, binlog_checkpoint_name, 1))
10348       {
10349         sql_print_error("Binlog file '%s' not found in binlog index, needed "
10350                         "for recovery. Aborting.", binlog_checkpoint_name);
10351         goto err2;
10352       }
10353     }
10354     else
10355     {
10356       end_io_cache(&log);
10357       mysql_file_close(file, MYF(MY_WME));
10358       file= -1;
10359     }
10360 
10361     if (!strcmp(linfo->log_file_name, last_log_name))
10362       break;                                    // No more files to do
10363     if ((file= open_binlog(&log, linfo->log_file_name, &errmsg)) < 0)
10364     {
10365       sql_print_error("%s", errmsg);
10366       goto err2;
10367     }
10368     /*
10369       We do not need to read the Format_description_log_event of other binlog
10370       files. It is not possible for a binlog checkpoint to span multiple
10371       binlog files written by different versions of the server. So we can use
10372       the first one read for reading from all binlog files.
10373     */
10374     if (find_next_log(linfo, 1))
10375     {
10376       sql_print_error("Error reading binlog files during recovery. Aborting.");
10377       goto err2;
10378     }
10379     fdle->reset_crypto();
10380   }
10381 
10382   if (do_xa)
10383   {
10384     if (ha_recover(&xids))
10385       goto err2;
10386 
10387     free_root(&mem_root, MYF(0));
10388     my_hash_free(&xids);
10389   }
10390   return 0;
10391 
10392 err2:
10393   delete ev;
10394   if (file >= 0)
10395   {
10396     end_io_cache(&log);
10397     mysql_file_close(file, MYF(MY_WME));
10398   }
10399   if (do_xa)
10400   {
10401     free_root(&mem_root, MYF(0));
10402     my_hash_free(&xids);
10403   }
10404 err1:
10405   sql_print_error("Crash recovery failed. Either correct the problem "
10406                   "(if it's, for example, out of memory error) and restart, "
10407                   "or delete (or rename) binary log and start mysqld with "
10408                   "--tc-heuristic-recover={commit|rollback}");
10409   return 1;
10410 }
10411 
10412 
10413 int
do_binlog_recovery(const char * opt_name,bool do_xa_recovery)10414 MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery)
10415 {
10416   LOG_INFO log_info;
10417   const char *errmsg;
10418   IO_CACHE    log;
10419   File        file;
10420   Log_event  *ev= 0;
10421   Format_description_log_event fdle(BINLOG_VERSION);
10422   char        log_name[FN_REFLEN];
10423   int error;
10424 
10425   if (unlikely((error= find_log_pos(&log_info, NullS, 1))))
10426   {
10427     /*
10428       If there are no binlog files (LOG_INFO_EOF), then we still try to read
10429       the .state file to restore the binlog state. This allows to copy a server
10430       to provision a new one without copying the binlog files (except the
10431       master-bin.state file) and still preserve the correct binlog state.
10432     */
10433     if (error != LOG_INFO_EOF)
10434       sql_print_error("find_log_pos() failed (error: %d)", error);
10435     else
10436     {
10437       error= read_state_from_file();
10438       if (error == 2)
10439       {
10440         /*
10441           No binlog files and no binlog state is not an error (eg. just initial
10442           server start after fresh installation).
10443         */
10444         error= 0;
10445       }
10446     }
10447     return error;
10448   }
10449 
10450   if (! fdle.is_valid())
10451     return 1;
10452 
10453   do
10454   {
10455     strmake_buf(log_name, log_info.log_file_name);
10456   } while (!(error= find_next_log(&log_info, 1)));
10457 
10458   if (error !=  LOG_INFO_EOF)
10459   {
10460     sql_print_error("find_log_pos() failed (error: %d)", error);
10461     return error;
10462   }
10463 
10464   if ((file= open_binlog(&log, log_name, &errmsg)) < 0)
10465   {
10466     sql_print_error("%s", errmsg);
10467     return 1;
10468   }
10469 
10470   if ((ev= Log_event::read_log_event(&log, &fdle,
10471                                      opt_master_verify_checksum)) &&
10472       ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
10473   {
10474     if (ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
10475     {
10476       sql_print_information("Recovering after a crash using %s", opt_name);
10477       error= recover(&log_info, log_name, &log,
10478                      (Format_description_log_event *)ev, do_xa_recovery);
10479     }
10480     else
10481     {
10482       error= read_state_from_file();
10483       if (unlikely(error == 2))
10484       {
10485         /*
10486           The binlog exists, but the .state file is missing. This is normal if
10487           this is the first master start after a major upgrade to 10.0 (with
10488           GTID support).
10489 
10490           However, it could also be that the .state file was lost somehow, and
10491           in this case it could be a serious issue, as we would set the wrong
10492           binlog state in the next binlog file to be created, and GTID
10493           processing would be corrupted. A common way would be copying files
10494           from an old server to a new one and forgetting the .state file.
10495 
10496           So in this case, we want to try to recover the binlog state by
10497           scanning the last binlog file (but we do not need any XA recovery).
10498 
10499           ToDo: We could avoid one scan at first start after major upgrade, by
10500           detecting that there is no GTID_LIST event at the start of the
10501           binlog file, and stopping the scan in that case.
10502         */
10503         error= recover(&log_info, log_name, &log,
10504                        (Format_description_log_event *)ev, false);
10505       }
10506     }
10507   }
10508 
10509   delete ev;
10510   end_io_cache(&log);
10511   mysql_file_close(file, MYF(MY_WME));
10512 
10513   return error;
10514 }
10515 
10516 
10517 #ifdef INNODB_COMPATIBILITY_HOOKS
10518 /**
10519   Get the file name of the MySQL binlog.
10520   @return the name of the binlog file
10521 */
10522 extern "C"
mysql_bin_log_file_name(void)10523 const char* mysql_bin_log_file_name(void)
10524 {
10525   return mysql_bin_log.get_log_fname();
10526 }
10527 /**
10528   Get the current position of the MySQL binlog.
10529   @return byte offset from the beginning of the binlog
10530 */
10531 extern "C"
mysql_bin_log_file_pos(void)10532 ulonglong mysql_bin_log_file_pos(void)
10533 {
10534   return (ulonglong) mysql_bin_log.get_log_file()->pos_in_file;
10535 }
10536 /*
10537   Get the current position of the MySQL binlog for transaction currently being
10538   committed.
10539 
10540   This is valid to call from within storage engine commit_ordered() and
10541   commit() methods only.
10542 
10543   Since it stores the position inside THD, it is safe to call without any
10544   locking.
10545 */
10546 void
mysql_bin_log_commit_pos(THD * thd,ulonglong * out_pos,const char ** out_file)10547 mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file)
10548 {
10549   binlog_cache_mngr *cache_mngr;
10550   if (opt_bin_log &&
10551       (cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton)))
10552   {
10553     *out_file= cache_mngr->last_commit_pos_file;
10554     *out_pos= (ulonglong)(cache_mngr->last_commit_pos_offset);
10555   }
10556   else
10557   {
10558     *out_file= NULL;
10559     *out_pos= 0;
10560   }
10561 }
10562 #endif /* INNODB_COMPATIBILITY_HOOKS */
10563 
10564 
10565 static void
binlog_checksum_update(MYSQL_THD thd,struct st_mysql_sys_var * var,void * var_ptr,const void * save)10566 binlog_checksum_update(MYSQL_THD thd, struct st_mysql_sys_var *var,
10567                        void *var_ptr, const void *save)
10568 {
10569   ulong value=  *((ulong *)save);
10570   bool check_purge= false;
10571   ulong UNINIT_VAR(prev_binlog_id);
10572 
10573   mysql_mutex_lock(mysql_bin_log.get_log_lock());
10574   if(mysql_bin_log.is_open())
10575   {
10576     prev_binlog_id= mysql_bin_log.current_binlog_id;
10577     if (binlog_checksum_options != value)
10578       mysql_bin_log.checksum_alg_reset= (enum_binlog_checksum_alg)value;
10579     if (mysql_bin_log.rotate(true, &check_purge))
10580       check_purge= false;
10581   }
10582   else
10583   {
10584     binlog_checksum_options= value;
10585   }
10586   DBUG_ASSERT(binlog_checksum_options == value);
10587   mysql_bin_log.checksum_alg_reset= BINLOG_CHECKSUM_ALG_UNDEF;
10588   mysql_mutex_unlock(mysql_bin_log.get_log_lock());
10589   if (check_purge)
10590     mysql_bin_log.checkpoint_and_purge(prev_binlog_id);
10591 }
10592 
10593 
show_binlog_vars(THD * thd,SHOW_VAR * var,void *,system_status_var * status_var,enum_var_type)10594 static int show_binlog_vars(THD *thd, SHOW_VAR *var, void *,
10595                             system_status_var *status_var, enum_var_type)
10596 {
10597   mysql_bin_log.set_status_variables(thd);
10598   var->type= SHOW_ARRAY;
10599   var->value= (char *)&binlog_status_vars_detail;
10600   return 0;
10601 }
10602 
10603 static SHOW_VAR binlog_status_vars_top[]= {
10604   {"Binlog", (char *) &show_binlog_vars, SHOW_FUNC},
10605   {NullS, NullS, SHOW_LONG}
10606 };
10607 
10608 static MYSQL_SYSVAR_BOOL(
10609   optimize_thread_scheduling,
10610   opt_optimize_thread_scheduling,
10611   PLUGIN_VAR_READONLY,
10612   "Run fast part of group commit in a single thread, to optimize kernel "
10613   "thread scheduling. On by default. Disable to run each transaction in group "
10614   "commit in its own thread, which can be slower at very high concurrency. "
10615   "This option is mostly for testing one algorithm versus the other, and it "
10616   "should not normally be necessary to change it.",
10617   NULL,
10618   NULL,
10619   1);
10620 
10621 static MYSQL_SYSVAR_ENUM(
10622   checksum,
10623   binlog_checksum_options,
10624   PLUGIN_VAR_RQCMDARG,
10625   "Type of BINLOG_CHECKSUM_ALG. Include checksum for "
10626   "log events in the binary log",
10627   NULL,
10628   binlog_checksum_update,
10629   BINLOG_CHECKSUM_ALG_CRC32,
10630   &binlog_checksum_typelib);
10631 
10632 static struct st_mysql_sys_var *binlog_sys_vars[]=
10633 {
10634   MYSQL_SYSVAR(optimize_thread_scheduling),
10635   MYSQL_SYSVAR(checksum),
10636   NULL
10637 };
10638 
10639 
10640 /*
10641   Copy out the non-directory part of binlog position filename for the
10642   `binlog_snapshot_file' status variable, same way as it is done for
10643   SHOW MASTER STATUS.
10644 */
10645 static void
set_binlog_snapshot_file(const char * src)10646 set_binlog_snapshot_file(const char *src)
10647 {
10648   size_t dir_len = dirname_length(src);
10649   strmake_buf(binlog_snapshot_file, src + dir_len);
10650 }
10651 
10652 /*
10653   Copy out current values of status variables, for SHOW STATUS or
10654   information_schema.global_status.
10655 
10656   This is called only under LOCK_all_status_vars, so we can fill in a static array.
10657 */
10658 void
set_status_variables(THD * thd)10659 TC_LOG_BINLOG::set_status_variables(THD *thd)
10660 {
10661   binlog_cache_mngr *cache_mngr;
10662 
10663   if (thd && opt_bin_log)
10664     cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10665   else
10666     cache_mngr= 0;
10667 
10668   bool have_snapshot= (cache_mngr && cache_mngr->last_commit_pos_file[0] != 0);
10669   mysql_mutex_lock(&LOCK_commit_ordered);
10670   binlog_status_var_num_commits= this->num_commits;
10671   binlog_status_var_num_group_commits= this->num_group_commits;
10672   if (!have_snapshot)
10673   {
10674     set_binlog_snapshot_file(last_commit_pos_file);
10675     binlog_snapshot_position= last_commit_pos_offset;
10676   }
10677   mysql_mutex_unlock(&LOCK_commit_ordered);
10678   mysql_mutex_lock(&LOCK_prepare_ordered);
10679   binlog_status_group_commit_trigger_count= this->group_commit_trigger_count;
10680   binlog_status_group_commit_trigger_timeout= this->group_commit_trigger_timeout;
10681   binlog_status_group_commit_trigger_lock_wait= this->group_commit_trigger_lock_wait;
10682   mysql_mutex_unlock(&LOCK_prepare_ordered);
10683 
10684   if (have_snapshot)
10685   {
10686     set_binlog_snapshot_file(cache_mngr->last_commit_pos_file);
10687     binlog_snapshot_position= cache_mngr->last_commit_pos_offset;
10688   }
10689 }
10690 
10691 
10692 /*
10693   Find the Gtid_list_log_event at the start of a binlog.
10694 
10695   NULL for ok, non-NULL error message for error.
10696 
10697   If ok, then the event is returned in *out_gtid_list. This can be NULL if we
10698   get back to binlogs written by old server version without GTID support. If
10699   so, it means we have reached the point to start from, as no GTID events can
10700   exist in earlier binlogs.
10701 */
10702 const char *
get_gtid_list_event(IO_CACHE * cache,Gtid_list_log_event ** out_gtid_list)10703 get_gtid_list_event(IO_CACHE *cache, Gtid_list_log_event **out_gtid_list)
10704 {
10705   Format_description_log_event init_fdle(BINLOG_VERSION);
10706   Format_description_log_event *fdle;
10707   Log_event *ev;
10708   const char *errormsg = NULL;
10709 
10710   *out_gtid_list= NULL;
10711 
10712   if (!(ev= Log_event::read_log_event(cache, &init_fdle,
10713                                       opt_master_verify_checksum)) ||
10714       ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
10715   {
10716     if (ev)
10717       delete ev;
10718     return "Could not read format description log event while looking for "
10719       "GTID position in binlog";
10720   }
10721 
10722   fdle= static_cast<Format_description_log_event *>(ev);
10723 
10724   for (;;)
10725   {
10726     Log_event_type typ;
10727 
10728     ev= Log_event::read_log_event(cache, fdle, opt_master_verify_checksum);
10729     if (!ev)
10730     {
10731       errormsg= "Could not read GTID list event while looking for GTID "
10732         "position in binlog";
10733       break;
10734     }
10735     typ= ev->get_type_code();
10736     if (typ == GTID_LIST_EVENT)
10737       break;                                    /* Done, found it */
10738     if (typ == START_ENCRYPTION_EVENT)
10739     {
10740       if (fdle->start_decryption((Start_encryption_log_event*) ev))
10741         errormsg= "Could not set up decryption for binlog.";
10742     }
10743     delete ev;
10744     if (typ == ROTATE_EVENT || typ == STOP_EVENT ||
10745         typ == FORMAT_DESCRIPTION_EVENT || typ == START_ENCRYPTION_EVENT)
10746       continue;                                 /* Continue looking */
10747 
10748     /* We did not find any Gtid_list_log_event, must be old binlog. */
10749     ev= NULL;
10750     break;
10751   }
10752 
10753   delete fdle;
10754   *out_gtid_list= static_cast<Gtid_list_log_event *>(ev);
10755   return errormsg;
10756 }
10757 
10758 
10759 struct st_mysql_storage_engine binlog_storage_engine=
10760 { MYSQL_HANDLERTON_INTERFACE_VERSION };
10761 
maria_declare_plugin(binlog)10762 maria_declare_plugin(binlog)
10763 {
10764   MYSQL_STORAGE_ENGINE_PLUGIN,
10765   &binlog_storage_engine,
10766   "binlog",
10767   "MySQL AB",
10768   "This is a pseudo storage engine to represent the binlog in a transaction",
10769   PLUGIN_LICENSE_GPL,
10770   binlog_init, /* Plugin Init */
10771   NULL, /* Plugin Deinit */
10772   0x0100 /* 1.0 */,
10773   binlog_status_vars_top,     /* status variables                */
10774   binlog_sys_vars,            /* system variables                */
10775   "1.0",                      /* string version */
10776   MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
10777 }
10778 maria_declare_plugin_end;
10779 
10780 #ifdef WITH_WSREP
10781 #include "wsrep_mysqld.h"
10782 
wsrep_get_trans_cache(THD * thd)10783 IO_CACHE *wsrep_get_trans_cache(THD * thd)
10784 {
10785   DBUG_ASSERT(binlog_hton->slot != HA_SLOT_UNDEF);
10786   binlog_cache_mngr *cache_mngr = (binlog_cache_mngr*)
10787     thd_get_ha_data(thd, binlog_hton);
10788   if (cache_mngr)
10789     return cache_mngr->get_binlog_cache_log(true);
10790 
10791   WSREP_DEBUG("binlog cache not initialized, conn: %llu",
10792 	      thd->thread_id);
10793   return NULL;
10794 }
10795 
wsrep_thd_binlog_trx_reset(THD * thd)10796 void wsrep_thd_binlog_trx_reset(THD * thd)
10797 {
10798   DBUG_ENTER("wsrep_thd_binlog_trx_reset");
10799   WSREP_DEBUG("wsrep_thd_binlog_reset");
10800   /*
10801     todo: fix autocommit select to not call the caller
10802   */
10803   binlog_cache_mngr *const cache_mngr=
10804     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10805   if (cache_mngr)
10806   {
10807     cache_mngr->reset(false, true);
10808     if (!cache_mngr->stmt_cache.empty())
10809     {
10810       WSREP_DEBUG("pending events in stmt cache, sql: %s", thd->query());
10811       cache_mngr->stmt_cache.reset();
10812     }
10813   }
10814   thd->clear_binlog_table_maps();
10815   DBUG_VOID_RETURN;
10816 }
10817 
wsrep_thd_binlog_stmt_rollback(THD * thd)10818 void wsrep_thd_binlog_stmt_rollback(THD * thd)
10819 {
10820   DBUG_ENTER("wsrep_thd_binlog_stmt_rollback");
10821   WSREP_DEBUG("wsrep_thd_binlog_stmt_rollback");
10822   binlog_cache_mngr *const cache_mngr=
10823     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10824   if (cache_mngr)
10825   {
10826     thd->binlog_remove_pending_rows_event(TRUE, TRUE);
10827     cache_mngr->stmt_cache.reset();
10828   }
10829   DBUG_VOID_RETURN;
10830 }
10831 
wsrep_register_binlog_handler(THD * thd,bool trx)10832 void wsrep_register_binlog_handler(THD *thd, bool trx)
10833 {
10834   DBUG_ENTER("register_binlog_handler");
10835   /*
10836     If this is the first call to this function while processing a statement,
10837     the transactional cache does not have a savepoint defined. So, in what
10838     follows:
10839       . an implicit savepoint is defined;
10840       . callbacks are registered;
10841       . binary log is set as read/write.
10842 
10843     The savepoint allows for truncating the trx-cache transactional changes
10844     fail. Callbacks are necessary to flush caches upon committing or rolling
10845     back a statement or a transaction. However, notifications do not happen
10846     if the binary log is set as read/write.
10847   */
10848   binlog_cache_mngr *cache_mngr=
10849     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10850   /* cache_mngr may be missing e.g. in mtr test ev51914.test */
10851   if (cache_mngr)
10852   {
10853     /*
10854       Set an implicit savepoint in order to be able to truncate a trx-cache.
10855     */
10856     if (cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF)
10857     {
10858       my_off_t pos= 0;
10859       binlog_trans_log_savepos(thd, &pos);
10860       cache_mngr->trx_cache.set_prev_position(pos);
10861     }
10862 
10863     /*
10864       Set callbacks in order to be able to call commmit or rollback.
10865     */
10866     if (trx)
10867       trans_register_ha(thd, TRUE, binlog_hton);
10868     trans_register_ha(thd, FALSE, binlog_hton);
10869 
10870     /*
10871       Set the binary log as read/write otherwise callbacks are not called.
10872     */
10873     thd->ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
10874   }
10875   DBUG_VOID_RETURN;
10876 }
10877 
10878 #endif /* WITH_WSREP */
10879