1 /* Copyright (c) 2000, 2018, Oracle and/or its affiliates.
2    Copyright (c) 2009, 2020, MariaDB Corporation.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA */
16 
17 
18 /**
19   @file
20 
21   @brief
22   logging of commands
23 
24   @todo
25     Abort logging when we get an error in reading or writing log files
26 */
27 
28 #include "mariadb.h"		/* NO_EMBEDDED_ACCESS_CHECKS */
29 #include "sql_priv.h"
30 #include "log.h"
31 #include "sql_base.h"                           // open_log_table
32 #include "sql_repl.h"
33 #include "sql_delete.h"                         // mysql_truncate
34 #include "sql_parse.h"                          // command_name
35 #include "sql_time.h"           // calc_time_from_sec, my_time_compare
36 #include "tztime.h"             // my_tz_OFFSET0, struct Time_zone
37 #include "sql_acl.h"            // SUPER_ACL
38 #include "log_event.h"          // Query_log_event
39 #include "rpl_filter.h"
40 #include "rpl_rli.h"
41 #include "sql_audit.h"
42 #include "mysqld.h"
43 
44 #include <my_dir.h>
45 #include <m_ctype.h>				// For test_if_number
46 
47 #include <set_var.h> // for Sys_last_gtid_ptr
48 
49 #ifdef _WIN32
50 #include "message.h"
51 #endif
52 
53 #include "sql_plugin.h"
54 #include "debug_sync.h"
55 #include "sql_show.h"
56 #include "my_pthread.h"
57 #include "semisync_master.h"
58 #include "wsrep_mysqld.h"
59 #include "sp_rcontext.h"
60 #include "sp_head.h"
61 
62 /* max size of the log message */
63 #define MAX_LOG_BUFFER_SIZE 1024
64 #define MAX_TIME_SIZE 32
65 #define MY_OFF_T_UNDEF (~(my_off_t)0UL)
66 /* Truncate cache log files bigger than this */
67 #define CACHE_FILE_TRUNC_SIZE 65536
68 
SliceComputationResultSliceComputationResult69 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
70 
71 handlerton *binlog_hton;
72 LOGGER logger;
73 
74 const char *log_bin_index= 0;
75 const char *log_bin_basename= 0;
76 
77 MYSQL_BIN_LOG mysql_bin_log(&sync_binlog_period);
78 
79 static bool test_if_number(const char *str,
80 			   ulong *res, bool allow_wildcards);
81 static int binlog_init(void *p);
82 static int binlog_close_connection(handlerton *hton, THD *thd);
83 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
84 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
85 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
86                                                       THD *thd);
87 static int binlog_commit(handlerton *hton, THD *thd, bool all);
88 static int binlog_rollback(handlerton *hton, THD *thd, bool all);
89 static int binlog_prepare(handlerton *hton, THD *thd, bool all);
90 static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd);
91 
92 static const LEX_CSTRING write_error_msg=
93     { STRING_WITH_LEN("error writing to the binary log") };
94 
95 static my_bool opt_optimize_thread_scheduling= TRUE;
96 ulong binlog_checksum_options;
97 #ifndef DBUG_OFF
98 ulong opt_binlog_dbug_fsync_sleep= 0;
99 #endif
100 
101 mysql_mutex_t LOCK_prepare_ordered;
102 mysql_cond_t COND_prepare_ordered;
103 mysql_mutex_t LOCK_after_binlog_sync;
104 mysql_mutex_t LOCK_commit_ordered;
105 
106 static ulonglong binlog_status_var_num_commits;
107 static ulonglong binlog_status_var_num_group_commits;
isEmptyComputationSliceState108 static ulonglong binlog_status_group_commit_trigger_count;
109 static ulonglong binlog_status_group_commit_trigger_lock_wait;
110 static ulonglong binlog_status_group_commit_trigger_timeout;
111 static char binlog_snapshot_file[FN_REFLEN];
112 static ulonglong binlog_snapshot_position;
113 
114 static const char *fatal_log_error=
115   "Could not use %s for logging (error %d). "
116   "Turning logging off for the whole duration of the MariaDB server process. "
117   "To turn it on again: fix the cause, shutdown the MariaDB server and "
118   "restart it.";
119 
120 
121 static SHOW_VAR binlog_status_vars_detail[]=
122 {
123   {"commits",
124     (char *)&binlog_status_var_num_commits, SHOW_LONGLONG},
125   {"group_commits",
126     (char *)&binlog_status_var_num_group_commits, SHOW_LONGLONG},
127   {"group_commit_trigger_count",
128     (char *)&binlog_status_group_commit_trigger_count, SHOW_LONGLONG},
129   {"group_commit_trigger_lock_wait",
130     (char *)&binlog_status_group_commit_trigger_lock_wait, SHOW_LONGLONG},
131   {"group_commit_trigger_timeout",
132     (char *)&binlog_status_group_commit_trigger_timeout, SHOW_LONGLONG},
133   {"snapshot_file",
134     (char *)&binlog_snapshot_file, SHOW_CHAR},
135   {"snapshot_position",
136    (char *)&binlog_snapshot_position, SHOW_LONGLONG},
137   {NullS, NullS, SHOW_LONG}
138 };
139 
140 /*
141   Variables for the binlog background thread.
142   Protected by the MYSQL_BIN_LOG::LOCK_binlog_background_thread mutex.
143  */
144 static bool binlog_background_thread_started= false;
145 static bool binlog_background_thread_stop= false;
146 static MYSQL_BIN_LOG::xid_count_per_binlog *
147     binlog_background_thread_queue= NULL;
148 
149 static bool start_binlog_background_thread();
150 
151 static rpl_binlog_state rpl_global_gtid_binlog_state;
152 
153 void setup_log_handling()
154 {
155   rpl_global_gtid_binlog_state.init();
156 }
157 
158 
159 /**
160    purge logs, master and slave sides both, related error code
161    converter.
162    Called from @c purge_error_message(), @c MYSQL_BIN_LOG::reset_logs()
163 
164    @param  res  an internal to purging routines error code
165 
166    @return the user level error code ER_*
167 */
168 uint purge_log_get_error_code(int res)
169 {
170   uint errcode= 0;
171 
172   switch (res)  {
173   case 0: break;
174   case LOG_INFO_EOF:	errcode= ER_UNKNOWN_TARGET_BINLOG; break;
175   case LOG_INFO_IO:	errcode= ER_IO_ERR_LOG_INDEX_READ; break;
176   case LOG_INFO_INVALID:errcode= ER_BINLOG_PURGE_PROHIBITED; break;
177   case LOG_INFO_SEEK:	errcode= ER_FSEEK_FAIL; break;
178   case LOG_INFO_MEM:	errcode= ER_OUT_OF_RESOURCES; break;
179   case LOG_INFO_FATAL:	errcode= ER_BINLOG_PURGE_FATAL_ERR; break;
180   case LOG_INFO_IN_USE: errcode= ER_LOG_IN_USE; break;
181   case LOG_INFO_EMFILE: errcode= ER_BINLOG_PURGE_EMFILE; break;
182   default:		errcode= ER_LOG_PURGE_UNKNOWN_ERR; break;
183   }
184 
185   return errcode;
186 }
187 
188 /**
189   Silence all errors and warnings reported when performing a write
190   to a log table.
191   Errors and warnings are not reported to the client or SQL exception
192   handlers, so that the presence of logging does not interfere and affect
193   the logic of an application.
194 */
195 class Silence_log_table_errors : public Internal_error_handler
196 {
197   char m_message[MYSQL_ERRMSG_SIZE];
198 public:
199   Silence_log_table_errors()
200   {
201     m_message[0]= '\0';
202   }
203 
204   virtual ~Silence_log_table_errors() {}
205 
206   virtual bool handle_condition(THD *thd,
207                                 uint sql_errno,
208                                 const char* sql_state,
209                                 Sql_condition::enum_warning_level *level,
210                                 const char* msg,
211                                 Sql_condition ** cond_hdl);
212   const char *message() const { return m_message; }
213 };
214 
215 bool
216 Silence_log_table_errors::handle_condition(THD *,
217                                            uint,
218                                            const char*,
219                                            Sql_condition::enum_warning_level*,
220                                            const char* msg,
221                                            Sql_condition ** cond_hdl)
222 {
223   *cond_hdl= NULL;
224   strmake_buf(m_message, msg);
225   return TRUE;
226 }
227 
228 sql_print_message_func sql_print_message_handlers[3] =
229 {
230   sql_print_information,
231   sql_print_warning,
232   sql_print_error
233 };
234 
235 
236 /**
237   Create the name of the log file
238 
239   @param[OUT] out    a pointer to a new allocated name will go there
240   @param[IN] log_ext The extension for the file (e.g .log)
241   @param[IN] once    whether to use malloc_once or a normal malloc.
242 */
243 void make_default_log_name(char **out, const char* log_ext, bool once)
244 {
245   char buff[FN_REFLEN+10];
246   fn_format(buff, opt_log_basename, "", log_ext, MYF(MY_REPLACE_EXT));
247   if (once)
248     *out= my_once_strdup(buff, MYF(MY_WME));
MemRefRegionMemRefRegion249   else
250   {
251     my_free(*out);
252     *out= my_strdup(buff, MYF(MY_WME));
253   }
254 }
255 
256 
257 /*
258   Helper classes to store non-transactional and transactional data
259   before copying it to the binary log.
260 */
261 class binlog_cache_data
262 {
263 public:
264   binlog_cache_data(): m_pending(0), status(0),
265   before_stmt_pos(MY_OFF_T_UNDEF),
266   incident(FALSE),
267   saved_max_binlog_cache_size(0), ptr_binlog_cache_use(0),
268   ptr_binlog_cache_disk_use(0)
269   { }
270 
271   ~binlog_cache_data()
272   {
273     DBUG_ASSERT(empty());
274     close_cached_file(&cache_log);
275   }
276 
277   /*
278     Return 1 if there is no relevant entries in the cache
279 
280     This is:
281     - Cache is empty
282     - There are row or critical (DDL?) events in the cache
283 
284     The status test is needed to avoid writing entries with only
285     a table map entry, which would crash in do_apply_event() on the slave
286     as it assumes that there is always a row entry after a table map.
getConstraintsMemRefRegion287   */
288   bool empty() const
289   {
290     return (pending() == NULL &&
291             (my_b_write_tell(&cache_log) == 0 ||
292              ((status & (LOGGED_ROW_EVENT | LOGGED_CRITICAL)) == 0)));
293   }
294 
295   Rows_log_event *pending() const
296   {
297     return m_pending;
298   }
299 
300   void set_pending(Rows_log_event *const pending_arg)
301   {
302     m_pending= pending_arg;
303   }
304 
305   void set_incident(void)
306   {
307     incident= TRUE;
308   }
309 
310   bool has_incident(void)
311   {
312     return(incident);
313   }
314 
315   void reset()
316   {
317     bool cache_was_empty= empty();
318     bool truncate_file= (cache_log.file != -1 &&
319                          my_b_write_tell(&cache_log) > CACHE_FILE_TRUNC_SIZE);
320     truncate(0,1);                              // Forget what's in cache
321     if (!cache_was_empty)
322       compute_statistics();
323     if (truncate_file)
324       my_chsize(cache_log.file, 0, 0, MYF(MY_WME));
325 
326     status= 0;
327     incident= FALSE;
328     before_stmt_pos= MY_OFF_T_UNDEF;
329     DBUG_ASSERT(empty());
330   }
331 
332   my_off_t get_byte_position() const
333   {
334     return my_b_tell(&cache_log);
335   }
336 
337   my_off_t get_prev_position()
338   {
339      return(before_stmt_pos);
340   }
341 
342   void set_prev_position(my_off_t pos)
343   {
344      before_stmt_pos= pos;
345   }
346 
347   void restore_prev_position()
348   {
349     truncate(before_stmt_pos);
350   }
351 
352   void restore_savepoint(my_off_t pos)
353   {
354     truncate(pos);
355     if (pos < before_stmt_pos)
356       before_stmt_pos= MY_OFF_T_UNDEF;
357   }
358 
359   void set_binlog_cache_info(my_off_t param_max_binlog_cache_size,
360                              ulong *param_ptr_binlog_cache_use,
361                              ulong *param_ptr_binlog_cache_disk_use)
362   {
363     /*
364       The assertions guarantee that the set_binlog_cache_info is
365       called just once and information passed as parameters are
366       never zero.
367 
368       This is done while calling the constructor binlog_cache_mngr.
369       We cannot set information in the constructor binlog_cache_data
370       because the space for binlog_cache_mngr is allocated through
371       a placement new.
372 
373       In the future, we can refactor this and change it to avoid
374       the set_binlog_info.
375     */
376     DBUG_ASSERT(saved_max_binlog_cache_size == 0 &&
377                 param_max_binlog_cache_size != 0 &&
378                 ptr_binlog_cache_use == 0 &&
379                 param_ptr_binlog_cache_use != 0 &&
380                 ptr_binlog_cache_disk_use == 0 &&
381                 param_ptr_binlog_cache_disk_use != 0);
382 
383     saved_max_binlog_cache_size= param_max_binlog_cache_size;
384     ptr_binlog_cache_use= param_ptr_binlog_cache_use;
385     ptr_binlog_cache_disk_use= param_ptr_binlog_cache_disk_use;
386     cache_log.end_of_file= saved_max_binlog_cache_size;
387   }
388 
389   void add_status(enum_logged_status status_arg)
390   {
391     status|= status_arg;
392   }
393 
394   /*
395     Cache to store data before copying it to the binary log.
396   */
397   IO_CACHE cache_log;
398 
399 private:
400   /*
401     Pending binrows event. This event is the event where the rows are currently
402     written.
403    */
404   Rows_log_event *m_pending;
405 
406   /*
407     Bit flags for what has been writting to cache. Used to
408     discard logs without any data changes.
409     see enum_logged_status;
410   */
411   uint32 status;
412 
413   /*
414     Binlog position before the start of the current statement.
415   */
416   my_off_t before_stmt_pos;
417 
418   /*
419     This indicates that some events did not get into the cache and most likely
420     it is corrupted.
421   */
422   bool incident;
423 
424   /**
425     This function computes binlog cache and disk usage.
426   */
427   void compute_statistics()
428   {
429     statistic_increment(*ptr_binlog_cache_use, &LOCK_status);
430     if (cache_log.disk_writes != 0)
431     {
432 #ifdef REAL_STATISTICS
433       statistic_add(*ptr_binlog_cache_disk_use,
434                     cache_log.disk_writes, &LOCK_status);
435 #else
436       statistic_increment(*ptr_binlog_cache_disk_use, &LOCK_status);
437 #endif
438       cache_log.disk_writes= 0;
439     }
440   }
441 
442   /*
443     Stores the values of maximum size of the cache allowed when this cache
444     is configured. This corresponds to either
445       . max_binlog_cache_size or max_binlog_stmt_cache_size.
446   */
447   my_off_t saved_max_binlog_cache_size;
448 
449   /*
450     Stores a pointer to the status variable that keeps track of the in-memory
451     cache usage. This corresponds to either
452       . binlog_cache_use or binlog_stmt_cache_use.
453   */
454   ulong *ptr_binlog_cache_use;
455 
456   /*
457     Stores a pointer to the status variable that keeps track of the disk
458     cache usage. This corresponds to either
459       . binlog_cache_disk_use or binlog_stmt_cache_disk_use.
460   */
461   ulong *ptr_binlog_cache_disk_use;
462 
463   /*
464     It truncates the cache to a certain position. This includes deleting the
465     pending event.
466    */
467   void truncate(my_off_t pos, bool reset_cache=0)
468   {
469     DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos));
470     cache_log.error=0;
471     if (pending())
472     {
473       delete pending();
474       set_pending(0);
475     }
476     reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, reset_cache);
477     cache_log.end_of_file= saved_max_binlog_cache_size;
478   }
479 
480   binlog_cache_data& operator=(const binlog_cache_data& info);
481   binlog_cache_data(const binlog_cache_data& info);
482 };
483 
484 
485 void Log_event_writer::add_status(enum_logged_status status)
486 {
487   if (likely(cache_data))
488     cache_data->add_status(status);
489 }
490 
491 class binlog_cache_mngr {
492 public:
493   binlog_cache_mngr(my_off_t param_max_binlog_stmt_cache_size,
494                     my_off_t param_max_binlog_cache_size,
495                     ulong *param_ptr_binlog_stmt_cache_use,
496                     ulong *param_ptr_binlog_stmt_cache_disk_use,
497                     ulong *param_ptr_binlog_cache_use,
498                     ulong *param_ptr_binlog_cache_disk_use)
499     : last_commit_pos_offset(0), using_xa(FALSE), xa_xid(0)
500   {
501      stmt_cache.set_binlog_cache_info(param_max_binlog_stmt_cache_size,
502                                       param_ptr_binlog_stmt_cache_use,
503                                       param_ptr_binlog_stmt_cache_disk_use);
504      trx_cache.set_binlog_cache_info(param_max_binlog_cache_size,
505                                      param_ptr_binlog_cache_use,
506                                      param_ptr_binlog_cache_disk_use);
507      last_commit_pos_file[0]= 0;
508   }
509 
510   void reset(bool do_stmt, bool do_trx)
511   {
512     if (do_stmt)
513       stmt_cache.reset();
514     if (do_trx)
515     {
516       trx_cache.reset();
517       using_xa= FALSE;
518       last_commit_pos_file[0]= 0;
519       last_commit_pos_offset= 0;
520     }
521   }
522 
523   binlog_cache_data* get_binlog_cache_data(bool is_transactional)
524   {
525     return (is_transactional ? &trx_cache : &stmt_cache);
526   }
527 
528   IO_CACHE* get_binlog_cache_log(bool is_transactional)
529   {
530     return (is_transactional ? &trx_cache.cache_log : &stmt_cache.cache_log);
531   }
532 
533   binlog_cache_data stmt_cache;
534 
535   binlog_cache_data trx_cache;
536 
537   /*
538     Binlog position for current transaction.
539     For START TRANSACTION WITH CONSISTENT SNAPSHOT, this is the binlog
540     position corresponding to the snapshot taken. During (and after) commit,
541     this is set to the binlog position corresponding to just after the
542     commit (so storage engines can store it in their transaction log).
543   */
544   char last_commit_pos_file[FN_REFLEN];
545   my_off_t last_commit_pos_offset;
546 
547   /*
548     Flag set true if this transaction is committed with log_xid() as part of
549     XA, false if not.
550   */
551   bool using_xa;
552   my_xid xa_xid;
553   bool need_unlog;
554   /*
555     Id of binlog that transaction was written to; only needed if need_unlog is
556     true.
557   */
558   ulong binlog_id;
559   /* Set if we get an error during commit that must be returned from unlog(). */
560   bool delayed_error;
561 
562 private:
563 
564   binlog_cache_mngr& operator=(const binlog_cache_mngr& info);
565   binlog_cache_mngr(const binlog_cache_mngr& info);
566 };
567 
568 bool LOGGER::is_log_table_enabled(uint log_table_type)
569 {
570   switch (log_table_type) {
571   case QUERY_LOG_SLOW:
572     return (table_log_handler != NULL) && global_system_variables.sql_log_slow
573             && (log_output_options & LOG_TABLE);
574   case QUERY_LOG_GENERAL:
575     return (table_log_handler != NULL) && opt_log
576             && (log_output_options & LOG_TABLE);
577   default:
578     DBUG_ASSERT(0);
579     return FALSE;                             /* make compiler happy */
580   }
581 }
582 
583 /**
584    Check if a given table is opened log table
585 
586    @param table             Table to check
587    @param check_if_opened   Only fail if it's a log table in use
588    @param error_msg	    String to put in error message if not ok.
589                             No error message if 0
590    @return 0 ok
591    @return # Type of log file
592  */
593 
594 int check_if_log_table(const TABLE_LIST *table,
595                        bool check_if_opened,
596                        const char *error_msg)
597 {
598   int result= 0;
599   if (table->db.length == 5 &&
600       !my_strcasecmp(table_alias_charset, table->db.str, "mysql"))
601   {
602     const char *table_name= table->table_name.str;
603 
604     if (table->table_name.length == 11 &&
605         !my_strcasecmp(table_alias_charset, table_name, "general_log"))
606     {
607       result= QUERY_LOG_GENERAL;
608       goto end;
609     }
610 
611     if (table->table_name.length == 8 &&
612         !my_strcasecmp(table_alias_charset, table_name, "slow_log"))
613     {
614       result= QUERY_LOG_SLOW;
615       goto end;
616     }
617   }
618   return 0;
619 
620 end:
621   if (!check_if_opened || logger.is_log_table_enabled(result))
622   {
623     if (error_msg)
624       my_error(ER_BAD_LOG_STATEMENT, MYF(0), error_msg);
625     return result;
626   }
627   return 0;
628 }
629 
630 
631 Log_to_csv_event_handler::Log_to_csv_event_handler()
632 {
633 }
634 
635 
636 Log_to_csv_event_handler::~Log_to_csv_event_handler()
637 {
638 }
639 
640 
641 void Log_to_csv_event_handler::cleanup()
642 {
643   logger.is_log_tables_initialized= FALSE;
644 }
645 
646 /* log event handlers */
647 
648 /**
649   Log command to the general log table
650 
651   Log given command to the general log table.
652 
653   @param  event_time        command start timestamp
654   @param  user_host         the pointer to the string with user@host info
655   @param  user_host_len     length of the user_host string. this is computed
656                             once and passed to all general log event handlers
657   @param  thread_id         Id of the thread, issued a query
658   @param  command_type      the type of the command being logged
659   @param  command_type_len  the length of the string above
660   @param  sql_text          the very text of the query being executed
661   @param  sql_text_len      the length of sql_text string
662 
663 
664   @return This function attempts to never call my_error(). This is
665   necessary, because general logging happens already after a statement
666   status has been sent to the client, so the client can not see the
667   error anyway. Besides, the error is not related to the statement
668   being executed and is internal, and thus should be handled
669   internally (@todo: how?).
670   If a write to the table has failed, the function attempts to
671   write to a short error message to the file. The failure is also
672   indicated in the return value.
673 
674   @retval  FALSE   OK
675   @retval  TRUE    error occurred
676 */
677 
678 bool Log_to_csv_event_handler::
679   log_general(THD *thd, my_hrtime_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg,
680               const char *command_type, size_t command_type_len,
681               const char *sql_text, size_t sql_text_len,
682               CHARSET_INFO *client_cs)
683 {
684   TABLE_LIST table_list;
685   TABLE *table;
686   bool result= TRUE;
687   bool need_close= FALSE;
688   bool need_pop= FALSE;
689   bool need_rnd_end= FALSE;
690   uint field_index;
691   Silence_log_table_errors error_handler;
692   Open_tables_backup open_tables_backup;
693   ulonglong save_thd_options;
694   bool save_time_zone_used;
695   DBUG_ENTER("log_general");
696 
697   /*
698     CSV uses TIME_to_timestamp() internally if table needs to be repaired
699     which will set thd->time_zone_used
700   */
701   save_time_zone_used= thd->time_zone_used;
702 
703   save_thd_options= thd->variables.option_bits;
704   thd->variables.option_bits&= ~OPTION_BIN_LOG;
705 
706   table_list.init_one_table(&MYSQL_SCHEMA_NAME, &GENERAL_LOG_NAME, 0,
707                             TL_WRITE_CONCURRENT_INSERT);
708 
709   /*
710     1) open_log_table generates an error of the
711     table can not be opened or is corrupted.
712     2) "INSERT INTO general_log" can generate warning sometimes.
713 
714     Suppress these warnings and errors, they can't be dealt with
715     properly anyway.
716 
717     QQ: this problem needs to be studied in more detail.
718     Comment this 2 lines and run "cast.test" to see what's happening.
719   */
720   thd->push_internal_handler(& error_handler);
721   need_pop= TRUE;
722 
723   if (!(table= open_log_table(thd, &table_list, &open_tables_backup)))
724     goto err;
725 
726   need_close= TRUE;
727 
728   if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) ||
729       table->file->ha_rnd_init_with_error(0))
730     goto err;
731 
732   need_rnd_end= TRUE;
733 
734   /* Honor next number columns if present */
735   table->next_number_field= table->found_next_number_field;
736 
737   /*
738     NOTE: we do not call restore_record() here, as all fields are
739     filled by the Logger (=> no need to load default ones).
740   */
741 
742   /*
743     We do not set a value for table->field[0], as it will use
744     default value (which is CURRENT_TIMESTAMP).
745   */
746 
747   /* check that all columns exist */
748   if (table->s->fields < 6)
749     goto err;
750 
751   DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP);
752 
753   ((Field_timestamp*) table->field[0])->store_TIME(
754                   hrtime_to_my_time(event_time), hrtime_sec_part(event_time));
755 
756   /* do a write */
757   if (table->field[1]->store(user_host, user_host_len, client_cs) ||
758       table->field[2]->store((longlong) thread_id_arg, TRUE) ||
759       table->field[3]->store((longlong) global_system_variables.server_id,
760                              TRUE) ||
761       table->field[4]->store(command_type, command_type_len, client_cs))
762     goto err;
763 
764   /*
765     A positive return value in store() means truncation.
766     Still logging a message in the log in this case.
767   */
768   table->field[5]->flags|= FIELDFLAG_HEX_ESCAPE;
769   if (table->field[5]->store(sql_text, sql_text_len, client_cs) < 0)
770     goto err;
771 
772   /* mark all fields as not null */
773   table->field[1]->set_notnull();
774   table->field[2]->set_notnull();
775   table->field[3]->set_notnull();
776   table->field[4]->set_notnull();
777   table->field[5]->set_notnull();
778 
779   /* Set any extra columns to their default values */
780   for (field_index= 6 ; field_index < table->s->fields ; field_index++)
781   {
782     table->field[field_index]->set_default();
783   }
784 
785   /* log table entries are not replicated */
786   if (table->file->ha_write_row(table->record[0]))
787     goto err;
788 
789   result= FALSE;
790 
791 err:
792   if (result && !thd->killed)
793     sql_print_error("Failed to write to mysql.general_log: %s",
794                     error_handler.message());
795 
796   if (need_rnd_end)
797   {
798     table->file->ha_rnd_end();
799     table->file->ha_release_auto_increment();
800   }
801   if (need_pop)
802     thd->pop_internal_handler();
803   if (need_close)
804     close_log_table(thd, &open_tables_backup);
805 
806   thd->variables.option_bits= save_thd_options;
807   thd->time_zone_used= save_time_zone_used;
808   DBUG_RETURN(result);
809 }
810 
811 
812 /*
813   Log a query to the slow log table
814 
815   SYNOPSIS
816     log_slow()
817     thd               THD of the query
818     current_time      current timestamp
819     user_host         the pointer to the string with user@host info
820     user_host_len     length of the user_host string. this is computed once
821                       and passed to all general log event handlers
822     query_time        Amount of time the query took to execute (in microseconds)
823     lock_time         Amount of time the query was locked (in microseconds)
824     is_command        The flag, which determines, whether the sql_text is a
825                       query or an administrator command (these are treated
826                       differently by the old logging routines)
827     sql_text          the very text of the query or administrator command
828                       processed
829     sql_text_len      the length of sql_text string
830 
831   DESCRIPTION
832 
833    Log a query to the slow log table
834 
835   RETURN
836     FALSE - OK
837     TRUE - error occurred
838 */
839 
840 bool Log_to_csv_event_handler::
841   log_slow(THD *thd, my_hrtime_t current_time,
842            const char *user_host, size_t user_host_len,
843            ulonglong query_utime, ulonglong lock_utime, bool is_command,
844            const char *sql_text, size_t sql_text_len)
845 {
846   TABLE_LIST table_list;
847   TABLE *table;
848   bool result= TRUE;
849   bool need_close= FALSE;
850   bool need_rnd_end= FALSE;
851   Silence_log_table_errors error_handler;
852   Open_tables_backup open_tables_backup;
853   CHARSET_INFO *client_cs= thd->variables.character_set_client;
854   bool save_time_zone_used;
855   long query_time= (long) MY_MIN(query_utime/1000000, TIME_MAX_VALUE_SECONDS);
856   long lock_time=  (long) MY_MIN(lock_utime/1000000, TIME_MAX_VALUE_SECONDS);
857   long query_time_micro= (long) (query_utime % 1000000);
858   long lock_time_micro=  (long) (lock_utime % 1000000);
859 
860   DBUG_ENTER("Log_to_csv_event_handler::log_slow");
861 
862   thd->push_internal_handler(& error_handler);
863   /*
864     CSV uses TIME_to_timestamp() internally if table needs to be repaired
865     which will set thd->time_zone_used
866   */
867   save_time_zone_used= thd->time_zone_used;
868 
869   table_list.init_one_table(&MYSQL_SCHEMA_NAME, &SLOW_LOG_NAME, 0,
870                             TL_WRITE_CONCURRENT_INSERT);
871 
872   if (!(table= open_log_table(thd, &table_list, &open_tables_backup)))
873     goto err;
874 
875   need_close= TRUE;
876 
877   if (table->file->extra(HA_EXTRA_MARK_AS_LOG_TABLE) ||
878       table->file->ha_rnd_init_with_error(0))
879     goto err;
880 
881   need_rnd_end= TRUE;
882 
883   /* Honor next number columns if present */
884   table->next_number_field= table->found_next_number_field;
885 
886   restore_record(table, s->default_values);    // Get empty record
887 
888   /* check that all columns exist */
889   if (table->s->fields < 13)
890     goto err;
891 
892   /* store the time and user values */
893   DBUG_ASSERT(table->field[0]->type() == MYSQL_TYPE_TIMESTAMP);
894   ((Field_timestamp*) table->field[0])->store_TIME(
895              hrtime_to_my_time(current_time), hrtime_sec_part(current_time));
896   if (table->field[1]->store(user_host, user_host_len, client_cs))
897     goto err;
898 
899   /*
900     A TIME field can not hold the full longlong range; query_time or
901     lock_time may be truncated without warning here, if greater than
902     839 hours (~35 days)
903   */
904   MYSQL_TIME t;
905   t.neg= 0;
906 
907   /* fill in query_time field */
908   calc_time_from_sec(&t, query_time, query_time_micro);
909   if (table->field[2]->store_time(&t))
910     goto err;
911   /* lock_time */
912   calc_time_from_sec(&t, lock_time, lock_time_micro);
913   if (table->field[3]->store_time(&t))
914     goto err;
915   /* rows_sent */
916   if (table->field[4]->store((longlong) thd->get_sent_row_count(), TRUE))
917     goto err;
918   /* rows_examined */
919   if (table->field[5]->store((longlong) thd->get_examined_row_count(), TRUE))
920     goto err;
921 
922   /* fill database field */
923   if (thd->db.str)
924   {
925     if (table->field[6]->store(thd->db.str, thd->db.length, client_cs))
926       goto err;
927     table->field[6]->set_notnull();
928   }
929 
930   if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
931   {
932     if (table->
933         field[7]->store((longlong)
934                         thd->first_successful_insert_id_in_prev_stmt_for_binlog,
935                         TRUE))
936       goto err;
937     table->field[7]->set_notnull();
938   }
939 
940   /*
941     Set value if we do an insert on autoincrement column. Note that for
942     some engines (those for which get_auto_increment() does not leave a
943     table lock until the statement ends), this is just the first value and
944     the next ones used may not be contiguous to it.
945   */
946   if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
947   {
948     if (table->
949         field[8]->store((longlong)
950           thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(), TRUE))
951       goto err;
952     table->field[8]->set_notnull();
953   }
954 
955   if (table->field[9]->store((longlong)global_system_variables.server_id, TRUE))
956     goto err;
957   table->field[9]->set_notnull();
958 
959   /*
960     Column sql_text.
961     A positive return value in store() means truncation.
962     Still logging a message in the log in this case.
963   */
964   if (table->field[10]->store(sql_text, sql_text_len, client_cs) < 0)
965     goto err;
966 
967   if (table->field[11]->store((longlong) thd->thread_id, TRUE))
968     goto err;
969 
970   /* Rows_affected */
971   if (table->field[12]->store(thd->get_stmt_da()->is_ok() ?
972                               (longlong) thd->get_stmt_da()->affected_rows() :
973                               0, TRUE))
974     goto err;
975 
976   /* log table entries are not replicated */
977   if (table->file->ha_write_row(table->record[0]))
978     goto err;
979 
980   result= FALSE;
981 
982 err:
983   thd->pop_internal_handler();
984 
985   if (result && !thd->killed)
986     sql_print_error("Failed to write to mysql.slow_log: %s",
987                     error_handler.message());
988 
989   if (need_rnd_end)
990   {
991     table->file->ha_rnd_end();
992     table->file->ha_release_auto_increment();
993   }
994   if (need_close)
995     close_log_table(thd, &open_tables_backup);
996   thd->time_zone_used= save_time_zone_used;
997   DBUG_RETURN(result);
998 }
999 
1000 int Log_to_csv_event_handler::
1001   activate_log(THD *thd, uint log_table_type)
1002 {
1003   TABLE_LIST table_list;
1004   TABLE *table;
1005   LEX_CSTRING *UNINIT_VAR(log_name);
1006   int result;
1007   Open_tables_backup open_tables_backup;
1008 
1009   DBUG_ENTER("Log_to_csv_event_handler::activate_log");
1010 
1011   if (log_table_type == QUERY_LOG_GENERAL)
1012   {
1013     log_name= &GENERAL_LOG_NAME;
1014   }
1015   else
1016   {
1017     DBUG_ASSERT(log_table_type == QUERY_LOG_SLOW);
1018 
1019     log_name= &SLOW_LOG_NAME;
1020   }
1021   table_list.init_one_table(&MYSQL_SCHEMA_NAME, log_name, 0, TL_WRITE_CONCURRENT_INSERT);
1022 
1023   table= open_log_table(thd, &table_list, &open_tables_backup);
1024   if (table)
1025   {
1026     result= 0;
1027     close_log_table(thd, &open_tables_backup);
1028   }
1029   else
1030     result= 1;
1031 
1032   DBUG_RETURN(result);
1033 }
1034 
1035 bool Log_to_csv_event_handler::
1036   log_error(enum loglevel level, const char *format, va_list args)
1037 {
1038   /* No log table is implemented */
1039   DBUG_ASSERT(0);
1040   return FALSE;
1041 }
1042 
1043 bool Log_to_file_event_handler::
1044   log_error(enum loglevel level, const char *format,
1045             va_list args)
1046 {
1047   return vprint_msg_to_log(level, format, args);
1048 }
1049 
1050 void Log_to_file_event_handler::init_pthread_objects()
1051 {
1052   mysql_log.init_pthread_objects();
1053   mysql_slow_log.init_pthread_objects();
1054 }
1055 
1056 
1057 /** Wrapper around MYSQL_LOG::write() for slow log. */
1058 
1059 bool Log_to_file_event_handler::
1060   log_slow(THD *thd, my_hrtime_t current_time,
1061            const char *user_host, size_t user_host_len,
1062            ulonglong query_utime, ulonglong lock_utime, bool is_command,
1063            const char *sql_text, size_t sql_text_len)
1064 {
1065   Silence_log_table_errors error_handler;
1066   thd->push_internal_handler(&error_handler);
1067   bool retval= mysql_slow_log.write(thd, hrtime_to_my_time(current_time),
1068                                     user_host, user_host_len,
1069                                     query_utime, lock_utime, is_command,
1070                                     sql_text, sql_text_len);
1071   thd->pop_internal_handler();
1072   return retval;
1073 }
1074 
1075 
1076 /**
1077    Wrapper around MYSQL_LOG::write() for general log. We need it since we
1078    want all log event handlers to have the same signature.
1079 */
1080 
1081 bool Log_to_file_event_handler::
1082   log_general(THD *thd, my_hrtime_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg,
1083               const char *command_type, size_t command_type_len,
1084               const char *sql_text, size_t sql_text_len,
1085               CHARSET_INFO *client_cs)
1086 {
1087   Silence_log_table_errors error_handler;
1088   thd->push_internal_handler(&error_handler);
1089   bool retval= mysql_log.write(hrtime_to_time(event_time), user_host,
1090                                user_host_len,
1091                                thread_id_arg, command_type, command_type_len,
1092                                sql_text, sql_text_len);
1093   thd->pop_internal_handler();
1094   return retval;
1095 }
1096 
1097 
1098 bool Log_to_file_event_handler::init()
1099 {
1100   if (!is_initialized)
1101   {
1102     if (global_system_variables.sql_log_slow)
1103       mysql_slow_log.open_slow_log(opt_slow_logname);
1104 
1105     if (opt_log)
1106       mysql_log.open_query_log(opt_logname);
1107 
1108     is_initialized= TRUE;
1109   }
1110 
1111   return FALSE;
1112 }
1113 
1114 
1115 void Log_to_file_event_handler::cleanup()
1116 {
1117   mysql_log.cleanup();
1118   mysql_slow_log.cleanup();
1119 }
1120 
1121 void Log_to_file_event_handler::flush()
1122 {
1123   /* reopen log files */
1124   if (opt_log)
1125     mysql_log.reopen_file();
1126   if (global_system_variables.sql_log_slow)
1127     mysql_slow_log.reopen_file();
1128 }
1129 
1130 /*
1131   Log error with all enabled log event handlers
1132 
1133   SYNOPSIS
1134     error_log_print()
1135 
1136     level             The level of the error significance: NOTE,
1137                       WARNING or ERROR.
1138     format            format string for the error message
1139     args              list of arguments for the format string
1140 
1141   RETURN
1142     FALSE - OK
1143     TRUE - error occurred
1144 */
1145 
1146 bool LOGGER::error_log_print(enum loglevel level, const char *format,
1147                              va_list args)
1148 {
1149   bool error= FALSE;
1150   Log_event_handler **current_handler;
1151 
1152   /* currently we don't need locking here as there is no error_log table */
1153   for (current_handler= error_log_handler_list ; *current_handler ;)
1154     error= (*current_handler++)->log_error(level, format, args) || error;
1155 
1156   return error;
1157 }
1158 
1159 
1160 void LOGGER::cleanup_base()
1161 {
1162   DBUG_ASSERT(inited == 1);
1163   mysql_rwlock_destroy(&LOCK_logger);
1164   if (table_log_handler)
1165   {
1166     table_log_handler->cleanup();
1167     delete table_log_handler;
1168     table_log_handler= NULL;
1169   }
1170   if (file_log_handler)
1171     file_log_handler->cleanup();
1172 }
1173 
1174 
1175 void LOGGER::cleanup_end()
1176 {
1177   DBUG_ASSERT(inited == 1);
1178   if (file_log_handler)
1179   {
1180     delete file_log_handler;
1181     file_log_handler=NULL;
1182   }
1183   inited= 0;
1184 }
1185 
1186 
1187 /**
1188   Perform basic log initialization: create file-based log handler and
1189   init error log.
1190 */
1191 void LOGGER::init_base()
1192 {
1193   DBUG_ASSERT(inited == 0);
1194   inited= 1;
1195 
1196   /*
1197     Here we create file log handler. We don't do it for the table log handler
1198     here as it cannot be created so early. The reason is THD initialization,
1199     which depends on the system variables (parsed later).
1200   */
1201   if (!file_log_handler)
1202     file_log_handler= new Log_to_file_event_handler;
1203 
1204   /* by default we use traditional error log */
1205   init_error_log(LOG_FILE);
1206 
1207   file_log_handler->init_pthread_objects();
1208   mysql_rwlock_init(key_rwlock_LOCK_logger, &LOCK_logger);
1209 }
1210 
1211 
1212 void LOGGER::init_log_tables()
1213 {
1214   if (!table_log_handler)
1215     table_log_handler= new Log_to_csv_event_handler;
1216 
1217   if (!is_log_tables_initialized &&
1218       !table_log_handler->init() && !file_log_handler->init())
1219     is_log_tables_initialized= TRUE;
1220 }
1221 
1222 
1223 /**
1224   Close and reopen the slow log (with locks).
1225 
1226   @returns FALSE.
1227 */
1228 bool LOGGER::flush_slow_log()
1229 {
1230   /*
1231     Now we lock logger, as nobody should be able to use logging routines while
1232     log tables are closed
1233   */
1234   logger.lock_exclusive();
1235 
1236   /* Reopen slow log file */
1237   if (global_system_variables.sql_log_slow)
1238     file_log_handler->get_mysql_slow_log()->reopen_file();
1239 
1240   /* End of log flush */
1241   logger.unlock();
1242 
1243   return 0;
1244 }
1245 
1246 
1247 /**
1248   Close and reopen the general log (with locks).
1249 
1250   @returns FALSE.
1251 */
1252 bool LOGGER::flush_general_log()
1253 {
1254   /*
1255     Now we lock logger, as nobody should be able to use logging routines while
1256     log tables are closed
1257   */
1258   logger.lock_exclusive();
1259 
1260   /* Reopen general log file */
1261   if (opt_log)
1262     file_log_handler->get_mysql_log()->reopen_file();
1263 
1264   /* End of log flush */
1265   logger.unlock();
1266 
1267   return 0;
1268 }
1269 
1270 
1271 /*
1272   Log slow query with all enabled log event handlers
1273 
1274   SYNOPSIS
1275     slow_log_print()
1276 
1277     thd                 THD of the query being logged
1278     query               The query being logged
1279     query_length        The length of the query string
1280     current_utime       Current time in microseconds (from undefined start)
1281 
1282   RETURN
1283     FALSE   OK
1284     TRUE    error occurred
1285 */
1286 
1287 bool LOGGER::slow_log_print(THD *thd, const char *query, size_t query_length,
1288                             ulonglong current_utime)
1289 
1290 {
1291   bool error= FALSE;
1292   Log_event_handler **current_handler;
1293   bool is_command= FALSE;
1294   char user_host_buff[MAX_USER_HOST_SIZE + 1];
1295   Security_context *sctx= thd->security_ctx;
1296   uint user_host_len= 0;
1297   ulonglong query_utime, lock_utime;
1298 
1299   DBUG_ASSERT(thd->enable_slow_log);
1300   /*
1301     Print the message to the buffer if we have slow log enabled
1302   */
1303 
1304   if (*slow_log_handler_list)
1305   {
1306     /* do not log slow queries from replication threads */
1307     if (!thd->variables.sql_log_slow)
1308       return 0;
1309 
1310     lock_shared();
1311     if (!global_system_variables.sql_log_slow)
1312     {
1313       unlock();
1314       return 0;
1315     }
1316 
1317     /* fill in user_host value: the format is "%s[%s] @ %s [%s]" */
1318     user_host_len= (uint)(strxnmov(user_host_buff, MAX_USER_HOST_SIZE,
1319                              sctx->priv_user, "[",
1320                              sctx->user ? sctx->user : (thd->slave_thread ? "SQL_SLAVE" : ""), "] @ ",
1321                              sctx->host ? sctx->host : "", " [",
1322                              sctx->ip ? sctx->ip : "", "]", NullS) -
1323                     user_host_buff);
1324 
1325     DBUG_ASSERT(thd->start_utime);
1326     DBUG_ASSERT(thd->start_time);
1327     query_utime= (current_utime - thd->start_utime);
1328     lock_utime=  (thd->utime_after_lock - thd->start_utime);
1329     my_hrtime_t current_time= { hrtime_from_time(thd->start_time) +
1330                                 thd->start_time_sec_part + query_utime };
1331 
1332     if (!query)
1333     {
1334       is_command= TRUE;
1335       query= command_name[thd->get_command()].str;
1336       query_length= (uint)command_name[thd->get_command()].length;
1337     }
1338 
1339     for (current_handler= slow_log_handler_list; *current_handler ;)
1340       error= (*current_handler++)->log_slow(thd, current_time,
1341                                             user_host_buff, user_host_len,
1342                                             query_utime, lock_utime, is_command,
1343                                             query, query_length) || error;
1344 
1345     unlock();
1346   }
1347   return error;
1348 }
1349 
1350 bool LOGGER::general_log_write(THD *thd, enum enum_server_command command,
1351                                const char *query, size_t query_length)
1352 {
1353   bool error= FALSE;
1354   Log_event_handler **current_handler= general_log_handler_list;
1355   char user_host_buff[MAX_USER_HOST_SIZE + 1];
1356   uint user_host_len= 0;
1357   my_hrtime_t current_time;
1358 
1359   DBUG_ASSERT(thd);
1360 
1361   user_host_len= make_user_name(thd, user_host_buff);
1362 
1363   current_time= my_hrtime();
1364 
1365   mysql_audit_general_log(thd, hrtime_to_time(current_time),
1366                           user_host_buff, user_host_len,
1367                           command_name[(uint) command].str,
1368                           (uint)command_name[(uint) command].length,
1369                           query, (uint)query_length);
1370 
1371   if (opt_log && log_command(thd, command))
1372   {
1373     lock_shared();
1374     while (*current_handler)
1375       error|= (*current_handler++)->
1376         log_general(thd, current_time, user_host_buff,
1377                     user_host_len, thd->thread_id,
1378                     command_name[(uint) command].str,
1379                     command_name[(uint) command].length,
1380                     query, query_length,
1381                     thd->variables.character_set_client) || error;
1382     unlock();
1383   }
1384 
1385   return error;
1386 }
1387 
1388 bool LOGGER::general_log_print(THD *thd, enum enum_server_command command,
1389                                const char *format, va_list args)
1390 {
1391   size_t message_buff_len= 0;
1392   char message_buff[MAX_LOG_BUFFER_SIZE];
1393 
1394   /* prepare message */
1395   if (format)
1396     message_buff_len= my_vsnprintf(message_buff, sizeof(message_buff),
1397                                    format, args);
1398   else
1399     message_buff[0]= '\0';
1400 
1401   return general_log_write(thd, command, message_buff, message_buff_len);
1402 }
1403 
1404 void LOGGER::init_error_log(ulonglong error_log_printer)
1405 {
1406   if (error_log_printer & LOG_NONE)
1407   {
1408     error_log_handler_list[0]= 0;
1409     return;
1410   }
1411 
1412   switch (error_log_printer) {
1413   case LOG_FILE:
1414     error_log_handler_list[0]= file_log_handler;
1415     error_log_handler_list[1]= 0;
1416     break;
1417     /* these two are disabled for now */
1418   case LOG_TABLE:
1419     DBUG_ASSERT(0);
1420     break;
1421   case LOG_TABLE|LOG_FILE:
1422     DBUG_ASSERT(0);
1423     break;
1424   }
1425 }
1426 
1427 void LOGGER::init_slow_log(ulonglong slow_log_printer)
1428 {
1429   if (slow_log_printer & LOG_NONE)
1430   {
1431     slow_log_handler_list[0]= 0;
1432     return;
1433   }
1434 
1435   switch (slow_log_printer) {
1436   case LOG_FILE:
1437     slow_log_handler_list[0]= file_log_handler;
1438     slow_log_handler_list[1]= 0;
1439     break;
1440   case LOG_TABLE:
1441     slow_log_handler_list[0]= table_log_handler;
1442     slow_log_handler_list[1]= 0;
1443     break;
1444   case LOG_TABLE|LOG_FILE:
1445     slow_log_handler_list[0]= file_log_handler;
1446     slow_log_handler_list[1]= table_log_handler;
1447     slow_log_handler_list[2]= 0;
1448     break;
1449   }
1450 }
1451 
1452 void LOGGER::init_general_log(ulonglong general_log_printer)
1453 {
1454   if (general_log_printer & LOG_NONE)
1455   {
1456     general_log_handler_list[0]= 0;
1457     return;
1458   }
1459 
1460   switch (general_log_printer) {
1461   case LOG_FILE:
1462     general_log_handler_list[0]= file_log_handler;
1463     general_log_handler_list[1]= 0;
1464     break;
1465   case LOG_TABLE:
1466     general_log_handler_list[0]= table_log_handler;
1467     general_log_handler_list[1]= 0;
1468     break;
1469   case LOG_TABLE|LOG_FILE:
1470     general_log_handler_list[0]= file_log_handler;
1471     general_log_handler_list[1]= table_log_handler;
1472     general_log_handler_list[2]= 0;
1473     break;
1474   }
1475 }
1476 
1477 
1478 bool LOGGER::activate_log_handler(THD* thd, uint log_type)
1479 {
1480   MYSQL_QUERY_LOG *file_log;
1481   bool res= FALSE;
1482   lock_exclusive();
1483   switch (log_type) {
1484   case QUERY_LOG_SLOW:
1485     if (!global_system_variables.sql_log_slow)
1486     {
1487       file_log= file_log_handler->get_mysql_slow_log();
1488 
1489       file_log->open_slow_log(opt_slow_logname);
1490       if (table_log_handler->activate_log(thd, QUERY_LOG_SLOW))
1491       {
1492         /* Error printed by open table in activate_log() */
1493         res= TRUE;
1494         file_log->close(0);
1495       }
1496       else
1497       {
1498         init_slow_log(log_output_options);
1499         global_system_variables.sql_log_slow= TRUE;
1500       }
1501     }
1502     break;
1503   case QUERY_LOG_GENERAL:
1504     if (!opt_log)
1505     {
1506       file_log= file_log_handler->get_mysql_log();
1507 
1508       file_log->open_query_log(opt_logname);
1509       if (table_log_handler->activate_log(thd, QUERY_LOG_GENERAL))
1510       {
1511         /* Error printed by open table in activate_log() */
1512         res= TRUE;
1513         file_log->close(0);
1514       }
1515       else
1516       {
1517         init_general_log(log_output_options);
1518         opt_log= TRUE;
1519       }
1520     }
1521     break;
1522   default:
1523     DBUG_ASSERT(0);
1524   }
1525   unlock();
1526   return res;
1527 }
1528 
1529 
1530 void LOGGER::deactivate_log_handler(THD *thd, uint log_type)
1531 {
1532   my_bool *tmp_opt= 0;
1533   MYSQL_LOG *UNINIT_VAR(file_log);
1534 
1535   switch (log_type) {
1536   case QUERY_LOG_SLOW:
1537     tmp_opt= &global_system_variables.sql_log_slow;
1538     file_log= file_log_handler->get_mysql_slow_log();
1539     break;
1540   case QUERY_LOG_GENERAL:
1541     tmp_opt= &opt_log;
1542     file_log= file_log_handler->get_mysql_log();
1543     break;
1544   default:
1545     MY_ASSERT_UNREACHABLE();
1546   }
1547 
1548   if (!(*tmp_opt))
1549     return;
1550 
1551   lock_exclusive();
1552   file_log->close(0);
1553   *tmp_opt= FALSE;
1554   unlock();
1555 }
1556 
1557 
1558 /* the parameters are unused for the log tables */
1559 bool Log_to_csv_event_handler::init()
1560 {
1561   return 0;
1562 }
1563 
1564 int LOGGER::set_handlers(ulonglong error_log_printer,
1565                          ulonglong slow_log_printer,
1566                          ulonglong general_log_printer)
1567 {
1568   /* error log table is not supported yet */
1569   DBUG_ASSERT(error_log_printer < LOG_TABLE);
1570 
1571   lock_exclusive();
1572 
1573   if ((slow_log_printer & LOG_TABLE || general_log_printer & LOG_TABLE) &&
1574       !is_log_tables_initialized)
1575   {
1576     slow_log_printer= (slow_log_printer & ~LOG_TABLE) | LOG_FILE;
1577     general_log_printer= (general_log_printer & ~LOG_TABLE) | LOG_FILE;
1578 
1579     sql_print_error("Failed to initialize log tables. "
1580                     "Falling back to the old-fashioned logs");
1581   }
1582 
1583   init_error_log(error_log_printer);
1584   init_slow_log(slow_log_printer);
1585   init_general_log(general_log_printer);
1586 
1587   unlock();
1588 
1589   return 0;
1590 }
1591 
1592  /*
1593   Save position of binary log transaction cache.
1594 
1595   SYNPOSIS
1596     binlog_trans_log_savepos()
1597 
1598     thd      The thread to take the binlog data from
1599     pos      Pointer to variable where the position will be stored
1600 
1601   DESCRIPTION
1602 
1603     Save the current position in the binary log transaction cache into
1604     the variable pointed to by 'pos'
1605  */
1606 
1607 static void
1608 binlog_trans_log_savepos(THD *thd, my_off_t *pos)
1609 {
1610   DBUG_ENTER("binlog_trans_log_savepos");
1611   DBUG_ASSERT(pos != NULL);
1612   binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
1613   DBUG_ASSERT((WSREP(thd) && wsrep_emulate_bin_log) || mysql_bin_log.is_open());
1614   *pos= cache_mngr->trx_cache.get_byte_position();
1615   DBUG_PRINT("return", ("*pos: %lu", (ulong) *pos));
1616   DBUG_VOID_RETURN;
1617 }
1618 
1619 
1620 /*
1621   Truncate the binary log transaction cache.
1622 
1623   SYNPOSIS
1624     binlog_trans_log_truncate()
1625 
1626     thd      The thread to take the binlog data from
1627     pos      Position to truncate to
1628 
1629   DESCRIPTION
1630 
1631     Truncate the binary log to the given position. Will not change
1632     anything else.
1633 
1634  */
1635 static void
1636 binlog_trans_log_truncate(THD *thd, my_off_t pos)
1637 {
1638   DBUG_ENTER("binlog_trans_log_truncate");
1639   DBUG_PRINT("enter", ("pos: %lu", (ulong) pos));
1640 
1641   DBUG_ASSERT(thd_get_ha_data(thd, binlog_hton) != NULL);
1642   /* Only true if binlog_trans_log_savepos() wasn't called before */
1643   DBUG_ASSERT(pos != ~(my_off_t) 0);
1644 
1645   binlog_cache_mngr *const cache_mngr=
1646     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
1647   cache_mngr->trx_cache.restore_savepoint(pos);
1648   DBUG_VOID_RETURN;
1649 }
1650 
1651 
1652 /*
1653   this function is mostly a placeholder.
1654   conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open)
1655   should be moved here.
1656 */
1657 
1658 int binlog_init(void *p)
1659 {
1660   binlog_hton= (handlerton *)p;
1661   binlog_hton->state= (WSREP_ON || opt_bin_log) ? SHOW_OPTION_YES
1662                                                 : SHOW_OPTION_NO;
1663   binlog_hton->db_type=DB_TYPE_BINLOG;
1664   binlog_hton->savepoint_offset= sizeof(my_off_t);
1665   binlog_hton->close_connection= binlog_close_connection;
1666   binlog_hton->savepoint_set= binlog_savepoint_set;
1667   binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
1668   binlog_hton->savepoint_rollback_can_release_mdl=
1669                                      binlog_savepoint_rollback_can_release_mdl;
1670   binlog_hton->commit= binlog_commit;
1671   binlog_hton->rollback= binlog_rollback;
1672   binlog_hton->prepare= binlog_prepare;
1673   binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot;
1674   binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
1675   return 0;
1676 }
1677 
1678 #ifdef WITH_WSREP
1679 #include "wsrep_binlog.h"
1680 #endif /* WITH_WSREP */
1681 static int binlog_close_connection(handlerton *hton, THD *thd)
1682 {
1683   DBUG_ENTER("binlog_close_connection");
1684   binlog_cache_mngr *const cache_mngr=
1685     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
1686 #ifdef WITH_WSREP
1687   if (WSREP(thd) && cache_mngr && !cache_mngr->trx_cache.empty()) {
1688     IO_CACHE* cache= get_trans_log(thd);
1689     uchar *buf;
1690     size_t len=0;
1691     wsrep_write_cache_buf(cache, &buf, &len);
1692     WSREP_WARN("binlog trx cache not empty (%zu bytes) @ connection close %lld",
1693                len, (longlong) thd->thread_id);
1694     if (len > 0) wsrep_dump_rbr_buf(thd, buf, len);
1695 
1696     cache = cache_mngr->get_binlog_cache_log(false);
1697     wsrep_write_cache_buf(cache, &buf, &len);
1698     WSREP_WARN("binlog stmt cache not empty (%zu bytes) @ connection close %lld",
1699                len, (longlong) thd->thread_id);
1700     if (len > 0) wsrep_dump_rbr_buf(thd, buf, len);
1701   }
1702 #endif /* WITH_WSREP */
1703   DBUG_ASSERT(cache_mngr->trx_cache.empty() && cache_mngr->stmt_cache.empty());
1704   thd_set_ha_data(thd, binlog_hton, NULL);
1705   cache_mngr->~binlog_cache_mngr();
1706   my_free(cache_mngr);
1707   DBUG_RETURN(0);
1708 }
1709 
1710 /*
1711   This function flushes a cache upon commit/rollback.
1712 
1713   SYNOPSIS
1714     binlog_flush_cache()
1715 
1716     thd        The thread whose transaction should be ended
1717     cache_mngr Pointer to the binlog_cache_mngr to use
1718     all        True if the entire transaction should be ended, false if
1719                only the statement transaction should be ended.
1720     end_ev     The end event to use (COMMIT, ROLLBACK, or commit XID)
1721     using_stmt True if the statement cache should be flushed
1722     using_trx  True if the transaction cache should be flushed
1723 
1724   DESCRIPTION
1725 
1726     End the currently transaction or statement. The transaction can be either
1727     a real transaction or a statement transaction.
1728 
1729     This can be to commit a transaction, with a COMMIT query event or an XA
1730     commit XID event. But it can also be to rollback a transaction with a
1731     ROLLBACK query event, used for rolling back transactions which also
1732     contain updates to non-transactional tables. Or it can be a flush of
1733     a statement cache.
1734  */
1735 
1736 static int
1737 binlog_flush_cache(THD *thd, binlog_cache_mngr *cache_mngr,
1738                    Log_event *end_ev, bool all, bool using_stmt,
1739                    bool using_trx)
1740 {
1741   int error= 0;
1742   DBUG_ENTER("binlog_flush_cache");
1743   DBUG_PRINT("enter", ("end_ev: %p", end_ev));
1744 
1745   if ((using_stmt && !cache_mngr->stmt_cache.empty()) ||
1746       (using_trx && !cache_mngr->trx_cache.empty()))
1747   {
1748     if (using_stmt && thd->binlog_flush_pending_rows_event(TRUE, FALSE))
1749       DBUG_RETURN(1);
1750     if (using_trx && thd->binlog_flush_pending_rows_event(TRUE, TRUE))
1751       DBUG_RETURN(1);
1752 
1753     /*
1754       Doing a commit or a rollback including non-transactional tables,
1755       i.e., ending a transaction where we might write the transaction
1756       cache to the binary log.
1757 
1758       We can always end the statement when ending a transaction since
1759       transactions are not allowed inside stored functions.  If they
1760       were, we would have to ensure that we're not ending a statement
1761       inside a stored function.
1762     */
1763     error= mysql_bin_log.write_transaction_to_binlog(thd, cache_mngr,
1764                                                      end_ev, all,
1765                                                      using_stmt, using_trx);
1766   }
1767   else
1768   {
1769     /*
1770       This can happen in row-format binlog with something like
1771           BEGIN; INSERT INTO nontrans_table; INSERT IGNORE INTO trans_table;
1772       The nontrans_table is written directly into the binlog before commit,
1773       and if the trans_table is ignored there will be no rows to write when
1774       we get here.
1775 
1776       So there is no work to do. Therefore, we will not increment any XID
1777       count, so we must not decrement any XID count in unlog().
1778     */
1779     cache_mngr->need_unlog= 0;
1780   }
1781   cache_mngr->reset(using_stmt, using_trx);
1782 
1783   DBUG_ASSERT((!using_stmt || cache_mngr->stmt_cache.empty()) &&
1784               (!using_trx || cache_mngr->trx_cache.empty()));
1785   DBUG_RETURN(error);
1786 }
1787 
1788 
1789 /**
1790   This function flushes the stmt-cache upon commit.
1791 
1792   @param thd                The thread whose transaction should be flushed
1793   @param cache_mngr         Pointer to the cache manager
1794 
1795   @return
1796     nonzero if an error pops up when flushing the cache.
1797 */
1798 static inline int
1799 binlog_commit_flush_stmt_cache(THD *thd, bool all,
1800                                binlog_cache_mngr *cache_mngr)
1801 {
1802   DBUG_ENTER("binlog_commit_flush_stmt_cache");
1803 #ifdef WITH_WSREP
1804   if (thd->wsrep_mysql_replicated > 0)
1805   {
1806     DBUG_ASSERT(WSREP(thd));
1807     WSREP_DEBUG("avoiding binlog_commit_flush_trx_cache: %d",
1808                 thd->wsrep_mysql_replicated);
1809     return 0;
1810   }
1811 #endif
1812 
1813   Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
1814                           FALSE, TRUE, TRUE, 0);
1815   DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, FALSE));
1816 }
1817 
1818 /**
1819   This function flushes the trx-cache upon commit.
1820 
1821   @param thd                The thread whose transaction should be flushed
1822   @param cache_mngr         Pointer to the cache manager
1823 
1824   @return
1825     nonzero if an error pops up when flushing the cache.
1826 */
1827 static inline int
1828 binlog_commit_flush_trx_cache(THD *thd, bool all, binlog_cache_mngr *cache_mngr)
1829 {
1830   DBUG_ENTER("binlog_commit_flush_trx_cache");
1831   Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
1832                           TRUE, TRUE, TRUE, 0);
1833   DBUG_RETURN(binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE));
1834 }
1835 
1836 /**
1837   This function flushes the trx-cache upon rollback.
1838 
1839   @param thd                The thread whose transaction should be flushed
1840   @param cache_mngr         Pointer to the cache manager
1841 
1842   @return
1843     nonzero if an error pops up when flushing the cache.
1844 */
1845 static inline int
1846 binlog_rollback_flush_trx_cache(THD *thd, bool all,
1847                                 binlog_cache_mngr *cache_mngr)
1848 {
1849   Query_log_event end_evt(thd, STRING_WITH_LEN("ROLLBACK"),
1850                           TRUE, TRUE, TRUE, 0);
1851   return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, FALSE, TRUE));
1852 }
1853 
1854 /**
1855   This function flushes the trx-cache upon commit.
1856 
1857   @param thd                The thread whose transaction should be flushed
1858   @param cache_mngr         Pointer to the cache manager
1859   @param xid                Transaction Id
1860 
1861   @return
1862     nonzero if an error pops up when flushing the cache.
1863 */
1864 static inline int
1865 binlog_commit_flush_xid_caches(THD *thd, binlog_cache_mngr *cache_mngr,
1866                                bool all, my_xid xid)
1867 {
1868   if (xid)
1869   {
1870     Xid_log_event end_evt(thd, xid, TRUE);
1871     return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE));
1872   }
1873   else
1874   {
1875     /*
1876       Empty xid occurs in XA COMMIT ... ONE PHASE.
1877       In this case, we do not have a MySQL xid for the transaction, and the
1878       external XA transaction coordinator will have to handle recovery if
1879       needed. So we end the transaction with a plain COMMIT query event.
1880     */
1881     Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
1882                             TRUE, TRUE, TRUE, 0);
1883     return (binlog_flush_cache(thd, cache_mngr, &end_evt, all, TRUE, TRUE));
1884   }
1885 }
1886 
1887 /**
1888   This function truncates the transactional cache upon committing or rolling
1889   back either a transaction or a statement.
1890 
1891   @param thd        The thread whose transaction should be flushed
1892   @param cache_mngr Pointer to the cache data to be flushed
1893   @param all        @c true means truncate the transaction, otherwise the
1894                     statement must be truncated.
1895 
1896   @return
1897     nonzero if an error pops up when truncating the transactional cache.
1898 */
1899 static int
1900 binlog_truncate_trx_cache(THD *thd, binlog_cache_mngr *cache_mngr, bool all)
1901 {
1902   DBUG_ENTER("binlog_truncate_trx_cache");
1903   int error=0;
1904   /*
1905     This function handles transactional changes and as such this flag
1906     equals to true.
1907   */
1908   bool const is_transactional= TRUE;
1909 
1910   DBUG_PRINT("info", ("thd->options={ %s %s}, transaction: %s",
1911                       FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
1912                       FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
1913                       all ? "all" : "stmt"));
1914 
1915   thd->binlog_remove_pending_rows_event(TRUE, is_transactional);
1916   /*
1917     If rolling back an entire transaction or a single statement not
1918     inside a transaction, we reset the transaction cache.
1919   */
1920   if (ending_trans(thd, all))
1921   {
1922     if (cache_mngr->trx_cache.has_incident())
1923       error= mysql_bin_log.write_incident(thd);
1924 
1925     thd->clear_binlog_table_maps();
1926 
1927     cache_mngr->reset(false, true);
1928   }
1929   /*
1930     If rolling back a statement in a transaction, we truncate the
1931     transaction cache to remove the statement.
1932   */
1933   else
1934     cache_mngr->trx_cache.restore_prev_position();
1935 
1936   DBUG_ASSERT(thd->binlog_get_pending_rows_event(is_transactional) == NULL);
1937   DBUG_RETURN(error);
1938 }
1939 
1940 static int binlog_prepare(handlerton *hton, THD *thd, bool all)
1941 {
1942   /*
1943     do nothing.
1944     just pretend we can do 2pc, so that MySQL won't
1945     switch to 1pc.
1946     real work will be done in MYSQL_BIN_LOG::log_and_order()
1947   */
1948   return 0;
1949 }
1950 
1951 /*
1952   We flush the cache wrapped in a beging/rollback if:
1953     . aborting a single or multi-statement transaction and;
1954     . the OPTION_KEEP_LOG is active or;
1955     . the format is STMT and a non-trans table was updated or;
1956     . the format is MIXED and a temporary non-trans table was
1957       updated or;
1958     . the format is MIXED, non-trans table was updated and
1959       aborting a single statement transaction;
1960 */
1961 static bool trans_cannot_safely_rollback(THD *thd, bool all)
1962 {
1963   DBUG_ASSERT(ending_trans(thd, all));
1964 
1965   return ((thd->variables.option_bits & OPTION_KEEP_LOG) ||
1966           (trans_has_updated_non_trans_table(thd) &&
1967            thd->wsrep_binlog_format() == BINLOG_FORMAT_STMT) ||
1968           (thd->transaction.all.has_modified_non_trans_temp_table() &&
1969            thd->wsrep_binlog_format() == BINLOG_FORMAT_MIXED) ||
1970           (trans_has_updated_non_trans_table(thd) &&
1971            ending_single_stmt_trans(thd,all) &&
1972            thd->wsrep_binlog_format() == BINLOG_FORMAT_MIXED));
1973 }
1974 
1975 
1976 /**
1977   This function is called once after each statement.
1978 
1979   It has the responsibility to flush the caches to the binary log on commits.
1980 
1981   @param hton  The binlog handlerton.
1982   @param thd   The client thread that executes the transaction.
1983   @param all   This is @c true if this is a real transaction commit, and
1984                @false otherwise.
1985 
1986   @see handlerton::commit
1987 */
1988 static int binlog_commit(handlerton *hton, THD *thd, bool all)
1989 {
1990   int error= 0;
1991   PSI_stage_info org_stage;
1992   DBUG_ENTER("binlog_commit");
1993 
1994   binlog_cache_mngr *const cache_mngr=
1995     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
1996 
1997   if (!cache_mngr)
1998   {
1999     DBUG_ASSERT(WSREP(thd));
2000     DBUG_RETURN(0);
2001   }
2002 
2003   DBUG_PRINT("debug",
2004              ("all: %d, in_transaction: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
2005               all,
2006               YESNO(thd->in_multi_stmt_transaction_mode()),
2007               YESNO(thd->transaction.all.modified_non_trans_table),
2008               YESNO(thd->transaction.stmt.modified_non_trans_table)));
2009 
2010 
2011   thd->backup_stage(&org_stage);
2012   THD_STAGE_INFO(thd, stage_binlog_write);
2013   if (!cache_mngr->stmt_cache.empty())
2014   {
2015     error= binlog_commit_flush_stmt_cache(thd, all, cache_mngr);
2016   }
2017 
2018   if (cache_mngr->trx_cache.empty())
2019   {
2020     /*
2021       we're here because cache_log was flushed in MYSQL_BIN_LOG::log_xid()
2022     */
2023     cache_mngr->reset(false, true);
2024     THD_STAGE_INFO(thd, org_stage);
2025     DBUG_RETURN(error);
2026   }
2027 
2028   /*
2029     We commit the transaction if:
2030      - We are not in a transaction and committing a statement, or
2031      - We are in a transaction and a full transaction is committed.
2032     Otherwise, we accumulate the changes.
2033   */
2034   if (likely(!error) && ending_trans(thd, all))
2035     error= binlog_commit_flush_trx_cache(thd, all, cache_mngr);
2036 
2037   /*
2038     This is part of the stmt rollback.
2039   */
2040   if (!all)
2041     cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
2042 
2043   THD_STAGE_INFO(thd, org_stage);
2044   DBUG_RETURN(error);
2045 }
2046 
2047 /**
2048   This function is called when a transaction or a statement is rolled back.
2049 
2050   @param hton  The binlog handlerton.
2051   @param thd   The client thread that executes the transaction.
2052   @param all   This is @c true if this is a real transaction rollback, and
2053                @false otherwise.
2054 
2055   @see handlerton::rollback
2056 */
2057 static int binlog_rollback(handlerton *hton, THD *thd, bool all)
2058 {
2059   DBUG_ENTER("binlog_rollback");
2060   int error= 0;
2061   binlog_cache_mngr *const cache_mngr=
2062     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
2063 
2064   if (!cache_mngr)
2065   {
2066     DBUG_ASSERT(WSREP(thd));
2067     DBUG_RETURN(0);
2068   }
2069 
2070   DBUG_PRINT("debug", ("all: %s, all.modified_non_trans_table: %s, stmt.modified_non_trans_table: %s",
2071                        YESNO(all),
2072                        YESNO(thd->transaction.all.modified_non_trans_table),
2073                        YESNO(thd->transaction.stmt.modified_non_trans_table)));
2074 
2075   /*
2076     If an incident event is set we do not flush the content of the statement
2077     cache because it may be corrupted.
2078   */
2079   if (cache_mngr->stmt_cache.has_incident())
2080   {
2081     error= mysql_bin_log.write_incident(thd);
2082     cache_mngr->reset(true, false);
2083   }
2084   else if (!cache_mngr->stmt_cache.empty())
2085   {
2086     error= binlog_commit_flush_stmt_cache(thd, all, cache_mngr);
2087   }
2088 
2089   if (cache_mngr->trx_cache.empty())
2090   {
2091     /*
2092       we're here because cache_log was flushed in MYSQL_BIN_LOG::log_xid()
2093     */
2094     cache_mngr->reset(false, true);
2095     DBUG_RETURN(error);
2096   }
2097   if (!wsrep_emulate_bin_log && mysql_bin_log.check_write_error(thd))
2098   {
2099     /*
2100       "all == true" means that a "rollback statement" triggered the error and
2101       this function was called. However, this must not happen as a rollback
2102       is written directly to the binary log. And in auto-commit mode, a single
2103       statement that is rolled back has the flag all == false.
2104     */
2105     DBUG_ASSERT(!all);
2106     /*
2107       We reach this point if the effect of a statement did not properly get into
2108       a cache and need to be rolled back.
2109     */
2110     error |= binlog_truncate_trx_cache(thd, cache_mngr, all);
2111   }
2112   else if (likely(!error))
2113   {
2114     if (ending_trans(thd, all) && trans_cannot_safely_rollback(thd, all))
2115       error= binlog_rollback_flush_trx_cache(thd, all, cache_mngr);
2116     /*
2117       Truncate the cache if:
2118         . aborting a single or multi-statement transaction or;
2119         . the current statement created or dropped a temporary table
2120           while having actual STATEMENT format;
2121         . the format is not STMT or no non-trans table was
2122           updated and;
2123         . the format is not MIXED or no temporary non-trans table
2124           was updated.
2125     */
2126     else if (ending_trans(thd, all) ||
2127              (!(thd->transaction.stmt.has_created_dropped_temp_table() &&
2128                 !thd->is_current_stmt_binlog_format_row()) &&
2129               (!stmt_has_updated_non_trans_table(thd) ||
2130                thd->wsrep_binlog_format() != BINLOG_FORMAT_STMT) &&
2131               (!thd->transaction.stmt.has_modified_non_trans_temp_table() ||
2132                thd->wsrep_binlog_format() != BINLOG_FORMAT_MIXED)))
2133       error= binlog_truncate_trx_cache(thd, cache_mngr, all);
2134   }
2135 
2136   /*
2137     This is part of the stmt rollback.
2138   */
2139   if (!all)
2140     cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
2141 
2142   DBUG_RETURN(error);
2143 }
2144 
2145 
2146 void binlog_reset_cache(THD *thd)
2147 {
2148   binlog_cache_mngr *const cache_mngr= opt_bin_log ?
2149     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton) : 0;
2150   DBUG_ENTER("binlog_reset_cache");
2151   if (cache_mngr)
2152   {
2153     thd->binlog_remove_pending_rows_event(TRUE, TRUE);
2154     cache_mngr->reset(true, true);
2155   }
2156   DBUG_VOID_RETURN;
2157 }
2158 
2159 
2160 void MYSQL_BIN_LOG::set_write_error(THD *thd, bool is_transactional)
2161 {
2162   DBUG_ENTER("MYSQL_BIN_LOG::set_write_error");
2163 
2164   write_error= 1;
2165 
2166   if (unlikely(check_write_error(thd)))
2167     DBUG_VOID_RETURN;
2168 
2169   if (my_errno == EFBIG)
2170   {
2171     if (is_transactional)
2172     {
2173       my_message(ER_TRANS_CACHE_FULL, ER_THD(thd, ER_TRANS_CACHE_FULL), MYF(MY_WME));
2174     }
2175     else
2176     {
2177       my_message(ER_STMT_CACHE_FULL, ER_THD(thd, ER_STMT_CACHE_FULL), MYF(MY_WME));
2178     }
2179   }
2180   else
2181   {
2182     my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), name, errno);
2183   }
2184 
2185   DBUG_VOID_RETURN;
2186 }
2187 
2188 bool MYSQL_BIN_LOG::check_write_error(THD *thd)
2189 {
2190   DBUG_ENTER("MYSQL_BIN_LOG::check_write_error");
2191 
2192   bool checked= FALSE;
2193 
2194   if (likely(!thd->is_error()))
2195     DBUG_RETURN(checked);
2196 
2197   switch (thd->get_stmt_da()->sql_errno())
2198   {
2199     case ER_TRANS_CACHE_FULL:
2200     case ER_STMT_CACHE_FULL:
2201     case ER_ERROR_ON_WRITE:
2202     case ER_BINLOG_LOGGING_IMPOSSIBLE:
2203       checked= TRUE;
2204     break;
2205   }
2206 
2207   DBUG_RETURN(checked);
2208 }
2209 
2210 
2211 /**
2212   @note
2213   How do we handle this (unlikely but legal) case:
2214   @verbatim
2215     [transaction] + [update to non-trans table] + [rollback to savepoint] ?
2216   @endverbatim
2217   The problem occurs when a savepoint is before the update to the
2218   non-transactional table. Then when there's a rollback to the savepoint, if we
2219   simply truncate the binlog cache, we lose the part of the binlog cache where
2220   the update is. If we want to not lose it, we need to write the SAVEPOINT
2221   command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
2222   is easy: it's just write at the end of the binlog cache, but the former
2223   should be *inserted* to the place where the user called SAVEPOINT. The
2224   solution is that when the user calls SAVEPOINT, we write it to the binlog
2225   cache (so no need to later insert it). As transactions are never intermixed
2226   in the binary log (i.e. they are serialized), we won't have conflicts with
2227   savepoint names when using mysqlbinlog or in the slave SQL thread.
2228   Then when ROLLBACK TO SAVEPOINT is called, if we updated some
2229   non-transactional table, we don't truncate the binlog cache but instead write
2230   ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
2231   will chop the SAVEPOINT command from the binlog cache, which is good as in
2232   that case there is no need to have it in the binlog).
2233 */
2234 
2235 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
2236 {
2237   int error= 1;
2238   DBUG_ENTER("binlog_savepoint_set");
2239 
2240   char buf[1024];
2241 
2242   String log_query(buf, sizeof(buf), &my_charset_bin);
2243   if (log_query.copy(STRING_WITH_LEN("SAVEPOINT "), &my_charset_bin) ||
2244       append_identifier(thd, &log_query, &thd->lex->ident))
2245     DBUG_RETURN(1);
2246   int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
2247   Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
2248                         TRUE, FALSE, TRUE, errcode);
2249   /*
2250     We cannot record the position before writing the statement
2251     because a rollback to a savepoint (.e.g. consider it "S") would
2252     prevent the savepoint statement (i.e. "SAVEPOINT S") from being
2253     written to the binary log despite the fact that the server could
2254     still issue other rollback statements to the same savepoint (i.e.
2255     "S").
2256     Given that the savepoint is valid until the server releases it,
2257     ie, until the transaction commits or it is released explicitly,
2258     we need to log it anyway so that we don't have "ROLLBACK TO S"
2259     or "RELEASE S" without the preceding "SAVEPOINT S" in the binary
2260     log.
2261   */
2262   if (likely(!(error= mysql_bin_log.write(&qinfo))))
2263     binlog_trans_log_savepos(thd, (my_off_t*) sv);
2264 
2265   DBUG_RETURN(error);
2266 }
2267 
2268 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
2269 {
2270   DBUG_ENTER("binlog_savepoint_rollback");
2271 
2272   /*
2273     Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
2274     non-transactional table. Otherwise, truncate the binlog cache starting
2275     from the SAVEPOINT command.
2276   */
2277   if (unlikely(trans_has_updated_non_trans_table(thd) ||
2278                (thd->variables.option_bits & OPTION_KEEP_LOG)))
2279   {
2280     char buf[1024];
2281     String log_query(buf, sizeof(buf), &my_charset_bin);
2282     if (log_query.copy(STRING_WITH_LEN("ROLLBACK TO "), &my_charset_bin) ||
2283         append_identifier(thd, &log_query, &thd->lex->ident))
2284       DBUG_RETURN(1);
2285     int errcode= query_error_code(thd, thd->killed == NOT_KILLED);
2286     Query_log_event qinfo(thd, log_query.ptr(), log_query.length(),
2287                           TRUE, FALSE, TRUE, errcode);
2288     DBUG_RETURN(mysql_bin_log.write(&qinfo));
2289   }
2290 
2291   binlog_trans_log_truncate(thd, *(my_off_t*)sv);
2292 
2293   /*
2294     When a SAVEPOINT is executed inside a stored function/trigger we force the
2295     pending event to be flushed with a STMT_END_F flag and clear the table maps
2296     as well to ensure that following DMLs will have a clean state to start
2297     with. ROLLBACK inside a stored routine has to finalize possibly existing
2298     current row-based pending event with cleaning up table maps. That ensures
2299     that following DMLs will have a clean state to start with.
2300    */
2301   if (thd->in_sub_stmt)
2302     thd->clear_binlog_table_maps();
2303 
2304   DBUG_RETURN(0);
2305 }
2306 
2307 
2308 /**
2309   Check whether binlog state allows to safely release MDL locks after
2310   rollback to savepoint.
2311 
2312   @param hton  The binlog handlerton.
2313   @param thd   The client thread that executes the transaction.
2314 
2315   @return true  - It is safe to release MDL locks.
2316           false - If it is not.
2317 */
2318 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
2319                                                       THD *thd)
2320 {
2321   DBUG_ENTER("binlog_savepoint_rollback_can_release_mdl");
2322   /*
2323     If we have not updated any non-transactional tables rollback
2324     to savepoint will simply truncate binlog cache starting from
2325     SAVEPOINT command. So it should be safe to release MDL acquired
2326     after SAVEPOINT command in this case.
2327   */
2328   DBUG_RETURN(!trans_cannot_safely_rollback(thd, true));
2329 }
2330 
2331 
2332 int check_binlog_magic(IO_CACHE* log, const char** errmsg)
2333 {
2334   uchar magic[4];
2335   DBUG_ASSERT(my_b_tell(log) == 0);
2336 
2337   if (my_b_read(log, magic, sizeof(magic)))
2338   {
2339     *errmsg = "I/O error reading the header from the binary log";
2340     sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno,
2341 		    log->error);
2342     return 1;
2343   }
2344   if (bcmp(magic, BINLOG_MAGIC, sizeof(magic)))
2345   {
2346     *errmsg = "Binlog has bad magic number;  It's not a binary log file that can be used by this version of MySQL";
2347     return 1;
2348   }
2349   return 0;
2350 }
2351 
2352 
2353 File open_binlog(IO_CACHE *log, const char *log_file_name, const char **errmsg)
2354 {
2355   File file;
2356   DBUG_ENTER("open_binlog");
2357 
2358   if ((file= mysql_file_open(key_file_binlog,
2359                              log_file_name, O_RDONLY | O_BINARY | O_SHARE,
2360                              MYF(MY_WME))) < 0)
2361   {
2362     sql_print_error("Failed to open log (file '%s', errno %d)",
2363                     log_file_name, my_errno);
2364     *errmsg = "Could not open log file";
2365     goto err;
2366   }
2367   if (init_io_cache(log, file, (size_t)binlog_file_cache_size, READ_CACHE, 0, 0,
2368                     MYF(MY_WME|MY_DONT_CHECK_FILESIZE)))
2369   {
2370     sql_print_error("Failed to create a cache on log (file '%s')",
2371                     log_file_name);
2372     *errmsg = "Could not open log file";
2373     goto err;
2374   }
2375   if (check_binlog_magic(log,errmsg))
2376     goto err;
2377   DBUG_RETURN(file);
2378 
2379 err:
2380   if (file >= 0)
2381   {
2382     mysql_file_close(file, MYF(0));
2383     end_io_cache(log);
2384   }
2385   DBUG_RETURN(-1);
2386 }
2387 
2388 #ifdef _WIN32
2389 static int eventSource = 0;
2390 
2391 static void setup_windows_event_source()
2392 {
2393   HKEY    hRegKey= NULL;
2394   DWORD   dwError= 0;
2395   TCHAR   szPath[MAX_PATH];
2396   DWORD dwTypes;
2397 
2398   if (eventSource)               // Ensure that we are only called once
2399     return;
2400   eventSource= 1;
2401 
2402   // Create the event source registry key
2403   dwError= RegCreateKey(HKEY_LOCAL_MACHINE,
2404                           "SYSTEM\\CurrentControlSet\\Services\\EventLog\\Application\\MySQL",
2405                           &hRegKey);
2406 
2407   /* Name of the PE module that contains the message resource */
2408   GetModuleFileName(NULL, szPath, MAX_PATH);
2409 
2410   /* Register EventMessageFile */
2411   dwError = RegSetValueEx(hRegKey, "EventMessageFile", 0, REG_EXPAND_SZ,
2412                           (PBYTE) szPath, (DWORD) (strlen(szPath) + 1));
2413 
2414   /* Register supported event types */
2415   dwTypes= (EVENTLOG_ERROR_TYPE | EVENTLOG_WARNING_TYPE |
2416             EVENTLOG_INFORMATION_TYPE);
2417   dwError= RegSetValueEx(hRegKey, "TypesSupported", 0, REG_DWORD,
2418                          (LPBYTE) &dwTypes, sizeof dwTypes);
2419 
2420   RegCloseKey(hRegKey);
2421 }
2422 
2423 #endif /* _WIN32 */
2424 
2425 
2426 /**
2427   Find a unique filename for 'filename.#'.
2428 
2429   Set '#' to the number next to the maximum found in the most
2430   recent log file extension.
2431 
2432   This function will return nonzero if: (i) the generated name
2433   exceeds FN_REFLEN; (ii) if the number of extensions is exhausted;
2434   or (iii) some other error happened while examining the filesystem.
2435 
2436   @return
2437     nonzero if not possible to get unique filename.
2438 */
2439 
2440 static int find_uniq_filename(char *name, ulong next_log_number)
2441 {
2442   uint                  i;
2443   char                  buff[FN_REFLEN], ext_buf[FN_REFLEN];
2444   struct st_my_dir     *dir_info;
2445   struct fileinfo *file_info;
2446   ulong                 max_found= 0, next= 0, number= 0;
2447   size_t		buf_length, length;
2448   char			*start, *end;
2449   int                   error= 0;
2450   DBUG_ENTER("find_uniq_filename");
2451 
2452   length= dirname_part(buff, name, &buf_length);
2453   start=  name + length;
2454   end=    strend(start);
2455 
2456   *end='.';
2457   length= (size_t) (end - start + 1);
2458 
2459   if ((DBUG_EVALUATE_IF("error_unique_log_filename", 1,
2460                         unlikely(!(dir_info= my_dir(buff,
2461                                                     MYF(MY_DONT_SORT)))))))
2462   {						// This shouldn't happen
2463     strmov(end,".1");				// use name+1
2464     DBUG_RETURN(1);
2465   }
2466   file_info= dir_info->dir_entry;
2467   max_found= next_log_number ? next_log_number-1 : 0;
2468   for (i= dir_info->number_of_files ; i-- ; file_info++)
2469   {
2470     if (strncmp(file_info->name, start, length) == 0 &&
2471 	test_if_number(file_info->name+length, &number,0))
2472     {
2473       set_if_bigger(max_found, number);
2474     }
2475   }
2476   my_dirend(dir_info);
2477 
2478   /* check if reached the maximum possible extension number */
2479   if (max_found >= MAX_LOG_UNIQUE_FN_EXT)
2480   {
2481     sql_print_error("Log filename extension number exhausted: %06lu. \
2482 Please fix this by archiving old logs and \
2483 updating the index files.", max_found);
2484     error= 1;
2485     goto end;
2486   }
2487 
2488   next= max_found + 1;
2489   if (sprintf(ext_buf, "%06lu", next)<0)
2490   {
2491     error= 1;
2492     goto end;
2493   }
2494   *end++='.';
2495 
2496   /*
2497     Check if the generated extension size + the file name exceeds the
2498     buffer size used. If one did not check this, then the filename might be
2499     truncated, resulting in error.
2500    */
2501   if (((strlen(ext_buf) + (end - name)) >= FN_REFLEN))
2502   {
2503     sql_print_error("Log filename too large: %s%s (%zu). \
2504 Please fix this by archiving old logs and updating the \
2505 index files.", name, ext_buf, (strlen(ext_buf) + (end - name)));
2506     error= 1;
2507     goto end;
2508   }
2509 
2510   if (sprintf(end, "%06lu", next)<0)
2511   {
2512     error= 1;
2513     goto end;
2514   }
2515 
2516   /* print warning if reaching the end of available extensions. */
2517   if ((next > (MAX_LOG_UNIQUE_FN_EXT - LOG_WARN_UNIQUE_FN_EXT_LEFT)))
2518     sql_print_warning("Next log extension: %lu. \
2519 Remaining log filename extensions: %lu. \
2520 Please consider archiving some logs.", next, (MAX_LOG_UNIQUE_FN_EXT - next));
2521 
2522 end:
2523   DBUG_RETURN(error);
2524 }
2525 
2526 
2527 void MYSQL_LOG::init(enum_log_type log_type_arg,
2528                      enum cache_type io_cache_type_arg)
2529 {
2530   DBUG_ENTER("MYSQL_LOG::init");
2531   log_type= log_type_arg;
2532   io_cache_type= io_cache_type_arg;
2533   DBUG_PRINT("info",("log_type: %d", log_type));
2534   DBUG_VOID_RETURN;
2535 }
2536 
2537 
2538 bool MYSQL_LOG::init_and_set_log_file_name(const char *log_name,
2539                                            const char *new_name,
2540                                            ulong next_log_number,
2541                                            enum_log_type log_type_arg,
2542                                            enum cache_type io_cache_type_arg)
2543 {
2544   init(log_type_arg, io_cache_type_arg);
2545 
2546   if (new_name)
2547   {
2548     strmov(log_file_name, new_name);
2549   }
2550   else if (!new_name && generate_new_name(log_file_name, log_name,
2551                                           next_log_number))
2552     return TRUE;
2553 
2554   return FALSE;
2555 }
2556 
2557 
2558 /*
2559   Open a (new) log file.
2560 
2561   SYNOPSIS
2562     open()
2563 
2564     log_name            The name of the log to open
2565     log_type_arg        The type of the log. E.g. LOG_NORMAL
2566     new_name            The new name for the logfile. This is only needed
2567                         when the method is used to open the binlog file.
2568     io_cache_type_arg   The type of the IO_CACHE to use for this log file
2569 
2570   DESCRIPTION
2571     Open the logfile, init IO_CACHE and write startup messages
2572     (in case of general and slow query logs).
2573 
2574   RETURN VALUES
2575     0   ok
2576     1   error
2577 */
2578 
2579 bool MYSQL_LOG::open(
2580 #ifdef HAVE_PSI_INTERFACE
2581                      PSI_file_key log_file_key,
2582 #endif
2583                      const char *log_name, enum_log_type log_type_arg,
2584                      const char *new_name, ulong next_log_number,
2585                      enum cache_type io_cache_type_arg)
2586 {
2587   char buff[FN_REFLEN];
2588   MY_STAT f_stat;
2589   File file= -1;
2590   my_off_t seek_offset;
2591   bool is_fifo = false;
2592   int open_flags= O_CREAT | O_BINARY | O_CLOEXEC;
2593   DBUG_ENTER("MYSQL_LOG::open");
2594   DBUG_PRINT("enter", ("log_type: %d", (int) log_type_arg));
2595 
2596   write_error= 0;
2597 
2598   if (!(name= my_strdup(log_name, MYF(MY_WME))))
2599   {
2600     name= (char *)log_name; // for the error message
2601     goto err;
2602   }
2603 
2604   /*
2605     log_type is LOG_UNKNOWN if we should not generate a new name
2606     This is only used when called from MYSQL_BINARY_LOG::open, which
2607     has already updated log_file_name.
2608    */
2609   if (log_type_arg != LOG_UNKNOWN &&
2610       init_and_set_log_file_name(name, new_name, next_log_number,
2611                                  log_type_arg, io_cache_type_arg))
2612     goto err;
2613 
2614   is_fifo = my_stat(log_file_name, &f_stat, MYF(0)) &&
2615             MY_S_ISFIFO(f_stat.st_mode);
2616 
2617   if (io_cache_type == SEQ_READ_APPEND)
2618     open_flags |= O_RDWR | O_APPEND;
2619   else
2620     open_flags |= O_WRONLY | (log_type == LOG_BIN ? 0 : O_APPEND);
2621 
2622   if (is_fifo)
2623     open_flags |= O_NONBLOCK;
2624 
2625   db[0]= 0;
2626 
2627 #ifdef HAVE_PSI_INTERFACE
2628   /* Keep the key for reopen */
2629   m_log_file_key= log_file_key;
2630 #endif
2631 
2632   if ((file= mysql_file_open(log_file_key, log_file_name, open_flags,
2633                              MYF(MY_WME | ME_WAITTANG))) < 0)
2634     goto err;
2635 
2636   if (is_fifo)
2637     seek_offset= 0;
2638   else if ((seek_offset= mysql_file_tell(file, MYF(MY_WME))))
2639     goto err;
2640 
2641   if (init_io_cache(&log_file, file, IO_SIZE, io_cache_type, seek_offset, 0,
2642                     MYF(MY_WME | MY_NABP |
2643                         ((log_type == LOG_BIN) ? MY_WAIT_IF_FULL : 0))))
2644     goto err;
2645 
2646   if (log_type == LOG_NORMAL)
2647   {
2648     char *end;
2649     size_t len=my_snprintf(buff, sizeof(buff), "%s, Version: %s (%s). "
2650 #ifdef EMBEDDED_LIBRARY
2651                         "embedded library\n",
2652                         my_progname, server_version, MYSQL_COMPILATION_COMMENT
2653 #elif defined(_WIN32)
2654 			"started with:\nTCP Port: %d, Named Pipe: %s\n",
2655                         my_progname, server_version, MYSQL_COMPILATION_COMMENT,
2656                         mysqld_port, mysqld_unix_port
2657 #else
2658 			"started with:\nTcp port: %d  Unix socket: %s\n",
2659                         my_progname, server_version, MYSQL_COMPILATION_COMMENT,
2660                         mysqld_port, mysqld_unix_port
2661 #endif
2662                        );
2663     end= strnmov(buff + len, "Time\t\t    Id Command\tArgument\n",
2664                  sizeof(buff) - len);
2665     if (my_b_write(&log_file, (uchar*) buff, (uint) (end-buff)) ||
2666 	flush_io_cache(&log_file))
2667       goto err;
2668   }
2669 
2670   log_state= LOG_OPENED;
2671   DBUG_RETURN(0);
2672 
2673 err:
2674   sql_print_error(fatal_log_error, name, errno);
2675   if (file >= 0)
2676     mysql_file_close(file, MYF(0));
2677   end_io_cache(&log_file);
2678   my_free(name);
2679   name= NULL;
2680   log_state= LOG_CLOSED;
2681   DBUG_RETURN(1);
2682 }
2683 
2684 MYSQL_LOG::MYSQL_LOG()
2685   : name(0), write_error(FALSE), inited(FALSE), log_type(LOG_UNKNOWN),
2686     log_state(LOG_CLOSED)
2687 {
2688   /*
2689     We don't want to initialize LOCK_Log here as such initialization depends on
2690     safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
2691     called only in main(). Doing initialization here would make it happen
2692     before main().
2693   */
2694   bzero((char*) &log_file, sizeof(log_file));
2695 }
2696 
2697 void MYSQL_LOG::init_pthread_objects()
2698 {
2699   DBUG_ASSERT(inited == 0);
2700   inited= 1;
2701   mysql_mutex_init(key_LOG_LOCK_log, &LOCK_log, MY_MUTEX_INIT_SLOW);
2702 }
2703 
2704 /*
2705   Close the log file
2706 
2707   SYNOPSIS
2708     close()
2709     exiting     Bitmask. LOG_CLOSE_TO_BE_OPENED is used if we intend to call
2710                 open at once after close. LOG_CLOSE_DELAYED_CLOSE is used for
2711                 binlog rotation, to delay actual close of the old file until
2712                 we have successfully created the new file.
2713 
2714   NOTES
2715     One can do an open on the object at once after doing a close.
2716     The internal structures are not freed until cleanup() is called
2717 */
2718 
2719 void MYSQL_LOG::close(uint exiting)
2720 {					// One can't set log_type here!
2721   DBUG_ENTER("MYSQL_LOG::close");
2722   DBUG_PRINT("enter",("exiting: %d", (int) exiting));
2723   if (log_state == LOG_OPENED)
2724   {
2725     end_io_cache(&log_file);
2726 
2727     if (log_type == LOG_BIN && mysql_file_sync(log_file.file, MYF(MY_WME)) && ! write_error)
2728     {
2729       write_error= 1;
2730       sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno);
2731     }
2732 
2733     if (!(exiting & LOG_CLOSE_DELAYED_CLOSE) &&
2734         mysql_file_close(log_file.file, MYF(MY_WME)) && ! write_error)
2735     {
2736       write_error= 1;
2737       sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno);
2738     }
2739   }
2740 
2741   log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
2742   my_free(name);
2743   name= NULL;
2744   DBUG_VOID_RETURN;
2745 }
2746 
2747 /** This is called only once. */
2748 
2749 void MYSQL_LOG::cleanup()
2750 {
2751   DBUG_ENTER("cleanup");
2752   if (inited)
2753   {
2754     inited= 0;
2755     mysql_mutex_destroy(&LOCK_log);
2756     close(0);
2757   }
2758   DBUG_VOID_RETURN;
2759 }
2760 
2761 
2762 int MYSQL_LOG::generate_new_name(char *new_name, const char *log_name,
2763                                  ulong next_log_number)
2764 {
2765   fn_format(new_name, log_name, mysql_data_home, "", 4);
2766   if (log_type == LOG_BIN)
2767   {
2768     if (!fn_ext(log_name)[0])
2769     {
2770       if (DBUG_EVALUATE_IF("binlog_inject_new_name_error", TRUE, FALSE) ||
2771           unlikely(find_uniq_filename(new_name, next_log_number)))
2772       {
2773         THD *thd= current_thd;
2774         if (unlikely(thd))
2775           my_error(ER_NO_UNIQUE_LOGFILE, MYF(ME_FATALERROR), log_name);
2776         sql_print_error(ER_DEFAULT(ER_NO_UNIQUE_LOGFILE), log_name);
2777 	return 1;
2778       }
2779     }
2780   }
2781   return 0;
2782 }
2783 
2784 
2785 /*
2786   Reopen the log file
2787 
2788   SYNOPSIS
2789     reopen_file()
2790 
2791   DESCRIPTION
2792     Reopen the log file. The method is used during FLUSH LOGS
2793     and locks LOCK_log mutex
2794 */
2795 
2796 
2797 void MYSQL_QUERY_LOG::reopen_file()
2798 {
2799   char *save_name;
2800   DBUG_ENTER("MYSQL_LOG::reopen_file");
2801 
2802   mysql_mutex_lock(&LOCK_log);
2803   if (!is_open())
2804   {
2805     DBUG_PRINT("info",("log is closed"));
2806     mysql_mutex_unlock(&LOCK_log);
2807     DBUG_VOID_RETURN;
2808   }
2809 
2810   save_name= name;
2811   name= 0;				// Don't free name
2812   close(LOG_CLOSE_TO_BE_OPENED);
2813 
2814   /*
2815      Note that at this point, log_state != LOG_CLOSED (important for is_open()).
2816   */
2817 
2818   open(
2819 #ifdef HAVE_PSI_INTERFACE
2820        m_log_file_key,
2821 #endif
2822        save_name, log_type, 0, 0, io_cache_type);
2823   my_free(save_name);
2824 
2825   mysql_mutex_unlock(&LOCK_log);
2826 
2827   DBUG_VOID_RETURN;
2828 }
2829 
2830 
2831 /*
2832   Write a command to traditional general log file
2833 
2834   SYNOPSIS
2835     write()
2836 
2837     event_time        command start timestamp
2838     user_host         the pointer to the string with user@host info
2839     user_host_len     length of the user_host string. this is computed once
2840                       and passed to all general log  event handlers
2841     thread_id         Id of the thread, issued a query
2842     command_type      the type of the command being logged
2843     command_type_len  the length of the string above
2844     sql_text          the very text of the query being executed
2845     sql_text_len      the length of sql_text string
2846 
2847   DESCRIPTION
2848 
2849    Log given command to to normal (not rotable) log file
2850 
2851   RETURN
2852     FASE - OK
2853     TRUE - error occurred
2854 */
2855 
2856 bool MYSQL_QUERY_LOG::write(time_t event_time, const char *user_host, size_t user_host_len, my_thread_id thread_id_arg,
2857                             const char *command_type, size_t command_type_len,
2858                             const char *sql_text, size_t sql_text_len)
2859 {
2860   char buff[32];
2861   char local_time_buff[MAX_TIME_SIZE];
2862   struct tm start;
2863   size_t time_buff_len= 0;
2864 
2865   mysql_mutex_lock(&LOCK_log);
2866 
2867   /* Test if someone closed between the is_open test and lock */
2868   if (is_open())
2869   {
2870     /* for testing output of timestamp and thread id */
2871     DBUG_EXECUTE_IF("reset_log_last_time", last_time= 0;);
2872 
2873     /* Note that my_b_write() assumes it knows the length for this */
2874     if (event_time != last_time)
2875     {
2876       last_time= event_time;
2877 
2878       localtime_r(&event_time, &start);
2879 
2880       time_buff_len= my_snprintf(local_time_buff, MAX_TIME_SIZE,
2881                                  "%02d%02d%02d %2d:%02d:%02d\t",
2882                                  start.tm_year % 100, start.tm_mon + 1,
2883                                  start.tm_mday, start.tm_hour,
2884                                  start.tm_min, start.tm_sec);
2885 
2886       if (my_b_write(&log_file, (uchar*) local_time_buff, time_buff_len))
2887         goto err;
2888     }
2889     else
2890       if (my_b_write(&log_file, (uchar*) "\t\t" ,2) < 0)
2891         goto err;
2892 
2893     /* command_type, thread_id */
2894     size_t length= my_snprintf(buff, 32, "%6llu ", thread_id_arg);
2895 
2896     if (my_b_write(&log_file, (uchar*) buff, length))
2897       goto err;
2898 
2899     if (my_b_write(&log_file, (uchar*) command_type, command_type_len))
2900       goto err;
2901 
2902     if (my_b_write(&log_file, (uchar*) "\t", 1))
2903       goto err;
2904 
2905     /* sql_text */
2906     if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len))
2907       goto err;
2908 
2909     if (my_b_write(&log_file, (uchar*) "\n", 1) ||
2910         flush_io_cache(&log_file))
2911       goto err;
2912   }
2913 
2914   mysql_mutex_unlock(&LOCK_log);
2915   return FALSE;
2916 err:
2917 
2918   if (!write_error)
2919   {
2920     write_error= 1;
2921     sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno);
2922   }
2923   mysql_mutex_unlock(&LOCK_log);
2924   return TRUE;
2925 }
2926 
2927 
2928 /*
2929   Log a query to the traditional slow log file
2930 
2931   SYNOPSIS
2932     write()
2933 
2934     thd               THD of the query
2935     current_time      current timestamp
2936     user_host         the pointer to the string with user@host info
2937     user_host_len     length of the user_host string. this is computed once
2938                       and passed to all general log event handlers
2939     query_utime       Amount of time the query took to execute (in microseconds)
2940     lock_utime        Amount of time the query was locked (in microseconds)
2941     is_command        The flag, which determines, whether the sql_text is a
2942                       query or an administrator command.
2943     sql_text          the very text of the query or administrator command
2944                       processed
2945     sql_text_len      the length of sql_text string
2946 
2947   DESCRIPTION
2948 
2949    Log a query to the slow log file.
2950 
2951   RETURN
2952     FALSE - OK
2953     TRUE - error occurred
2954 */
2955 
2956 bool MYSQL_QUERY_LOG::write(THD *thd, time_t current_time,
2957                             const char *user_host, size_t user_host_len, ulonglong query_utime,
2958                             ulonglong lock_utime, bool is_command,
2959                             const char *sql_text, size_t sql_text_len)
2960 {
2961   bool error= 0;
2962   char llbuff[22];
2963   DBUG_ENTER("MYSQL_QUERY_LOG::write");
2964 
2965   mysql_mutex_lock(&LOCK_log);
2966   if (is_open())
2967   {						// Safety against reopen
2968     char buff[80], *end;
2969     char query_time_buff[22+7], lock_time_buff[22+7];
2970     size_t buff_len;
2971     end= buff;
2972 
2973     if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
2974     {
2975       if (current_time != last_time)
2976       {
2977         last_time= current_time;
2978         struct tm start;
2979         localtime_r(&current_time, &start);
2980 
2981         buff_len= my_snprintf(buff, sizeof buff,
2982                               "# Time: %02d%02d%02d %2d:%02d:%02d\n",
2983                               start.tm_year % 100, start.tm_mon + 1,
2984                               start.tm_mday, start.tm_hour,
2985                               start.tm_min, start.tm_sec);
2986 
2987         /* Note that my_b_write() assumes it knows the length for this */
2988         if (my_b_write(&log_file, (uchar*) buff, buff_len))
2989           goto err;
2990       }
2991       const uchar uh[]= "# User@Host: ";
2992       if (my_b_write(&log_file, uh, sizeof(uh) - 1) ||
2993           my_b_write(&log_file, (uchar*) user_host, user_host_len) ||
2994           my_b_write(&log_file, (uchar*) "\n", 1))
2995         goto err;
2996 
2997     /* For slow query log */
2998     sprintf(query_time_buff, "%.6f", ulonglong2double(query_utime)/1000000.0);
2999     sprintf(lock_time_buff,  "%.6f", ulonglong2double(lock_utime)/1000000.0);
3000     if (my_b_printf(&log_file,
3001                     "# Thread_id: %lu  Schema: %s  QC_hit: %s\n"
3002                     "# Query_time: %s  Lock_time: %s  Rows_sent: %lu  Rows_examined: %lu\n"
3003                     "# Rows_affected: %lu  Bytes_sent: %lu\n",
3004                     (ulong) thd->thread_id, thd->get_db(),
3005                     ((thd->query_plan_flags & QPLAN_QC) ? "Yes" : "No"),
3006                     query_time_buff, lock_time_buff,
3007                     (ulong) thd->get_sent_row_count(),
3008                     (ulong) thd->get_examined_row_count(),
3009                     (ulong) thd->get_affected_rows(),
3010                     (ulong) (thd->status_var.bytes_sent - thd->bytes_sent_old)))
3011       goto err;
3012 
3013     if ((thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_QUERY_PLAN)
3014         && thd->tmp_tables_used &&
3015         my_b_printf(&log_file,
3016                     "# Tmp_tables: %lu  Tmp_disk_tables: %lu  "
3017                     "Tmp_table_sizes: %s\n",
3018                     (ulong) thd->tmp_tables_used,
3019                     (ulong) thd->tmp_tables_disk_used,
3020                     llstr(thd->tmp_tables_size, llbuff)))
3021       goto err;
3022 
3023     if (thd->spcont &&
3024         my_b_printf(&log_file, "# Stored_routine: %s\n",
3025                     ErrConvDQName(thd->spcont->m_sp).ptr()))
3026       goto err;
3027 
3028      if ((thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_QUERY_PLAN) &&
3029          (thd->query_plan_flags &
3030           (QPLAN_FULL_SCAN | QPLAN_FULL_JOIN | QPLAN_TMP_TABLE |
3031            QPLAN_TMP_DISK | QPLAN_FILESORT | QPLAN_FILESORT_DISK |
3032            QPLAN_FILESORT_PRIORITY_QUEUE)) &&
3033          my_b_printf(&log_file,
3034                      "# Full_scan: %s  Full_join: %s  "
3035                      "Tmp_table: %s  Tmp_table_on_disk: %s\n"
3036                      "# Filesort: %s  Filesort_on_disk: %s  Merge_passes: %lu  "
3037                      "Priority_queue: %s\n",
3038                      ((thd->query_plan_flags & QPLAN_FULL_SCAN) ? "Yes" : "No"),
3039                      ((thd->query_plan_flags & QPLAN_FULL_JOIN) ? "Yes" : "No"),
3040                      (thd->tmp_tables_used ? "Yes" : "No"),
3041                      (thd->tmp_tables_disk_used ? "Yes" : "No"),
3042                      ((thd->query_plan_flags & QPLAN_FILESORT) ? "Yes" : "No"),
3043                      ((thd->query_plan_flags & QPLAN_FILESORT_DISK) ?
3044                       "Yes" : "No"),
3045                      thd->query_plan_fsort_passes,
3046                      ((thd->query_plan_flags & QPLAN_FILESORT_PRIORITY_QUEUE) ?
3047                        "Yes" : "No")
3048                      ))
3049       goto err;
3050     if (thd->variables.log_slow_verbosity & LOG_SLOW_VERBOSITY_EXPLAIN &&
3051         thd->lex->explain)
3052     {
3053       StringBuffer<128> buf;
3054       DBUG_ASSERT(!thd->free_list);
3055       if (!print_explain_for_slow_log(thd->lex, thd, &buf))
3056         if (my_b_printf(&log_file, "%s", buf.c_ptr_safe()))
3057           goto err;
3058       thd->free_items();
3059     }
3060     if (thd->db.str && strcmp(thd->db.str, db))
3061     {						// Database changed
3062       if (my_b_printf(&log_file,"use %s;\n",thd->db.str))
3063         goto err;
3064       strmov(db,thd->db.str);
3065     }
3066     if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
3067     {
3068       end=strmov(end, ",last_insert_id=");
3069       end=longlong10_to_str((longlong)
3070                             thd->first_successful_insert_id_in_prev_stmt_for_binlog,
3071                             end, -10);
3072     }
3073     // Save value if we do an insert.
3074     if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
3075     {
3076       if (!(specialflag & SPECIAL_SHORT_LOG_FORMAT))
3077       {
3078         end=strmov(end,",insert_id=");
3079         end=longlong10_to_str((longlong)
3080                               thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(),
3081                               end, -10);
3082       }
3083     }
3084 
3085     /*
3086       This info used to show up randomly, depending on whether the query
3087       checked the query start time or not. now we always write current
3088       timestamp to the slow log
3089     */
3090     end= strmov(end, ",timestamp=");
3091     end= int10_to_str((long) current_time, end, 10);
3092 
3093     if (end != buff)
3094     {
3095       *end++=';';
3096       *end='\n';
3097       if (my_b_write(&log_file, (uchar*) "SET ", 4) ||
3098           my_b_write(&log_file, (uchar*) buff + 1, (uint) (end-buff)))
3099         goto err;
3100     }
3101     if (is_command)
3102     {
3103       end= strxmov(buff, "# administrator command: ", NullS);
3104       buff_len= (ulong) (end - buff);
3105       DBUG_EXECUTE_IF("simulate_slow_log_write_error",
3106                       {DBUG_SET("+d,simulate_file_write_error");});
3107       if(my_b_write(&log_file, (uchar*) buff, buff_len))
3108         goto err;
3109     }
3110     if (my_b_write(&log_file, (uchar*) sql_text, sql_text_len) ||
3111         my_b_write(&log_file, (uchar*) ";\n",2) ||
3112         flush_io_cache(&log_file))
3113       goto err;
3114 
3115     }
3116   }
3117 end:
3118   mysql_mutex_unlock(&LOCK_log);
3119   DBUG_RETURN(error);
3120 
3121 err:
3122   error= 1;
3123   if (!write_error)
3124   {
3125     write_error= 1;
3126     sql_print_error(ER_THD(thd, ER_ERROR_ON_WRITE), name, errno);
3127   }
3128   goto end;
3129 }
3130 
3131 
3132 /**
3133   @todo
3134   The following should be using fn_format();  We just need to
3135   first change fn_format() to cut the file name if it's too long.
3136 */
3137 const char *MYSQL_LOG::generate_name(const char *log_name,
3138                                      const char *suffix,
3139                                      bool strip_ext, char *buff)
3140 {
3141   if (!log_name || !log_name[0])
3142   {
3143     strmake(buff, pidfile_name, FN_REFLEN - strlen(suffix) - 1);
3144     return (const char *)
3145       fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR));
3146   }
3147   // get rid of extension if the log is binary to avoid problems
3148   if (strip_ext)
3149   {
3150     char *p= fn_ext(log_name);
3151     uint length= (uint) (p - log_name);
3152     strmake(buff, log_name, MY_MIN(length, FN_REFLEN-1));
3153     return (const char*)buff;
3154   }
3155   return log_name;
3156 }
3157 
3158 
3159 /*
3160   Print some additional information about addition/removal of
3161   XID list entries.
3162   TODO: Remove once MDEV-9510 is fixed.
3163 */
3164 #ifdef WITH_WSREP
3165 #define WSREP_XID_LIST_ENTRY(X, Y)                    \
3166   if (wsrep_debug)                                    \
3167   {                                                   \
3168     char buf[FN_REFLEN];                              \
3169     strmake(buf, Y->binlog_name, Y->binlog_name_len); \
3170     WSREP_DEBUG(X, buf, Y->binlog_id);                \
3171   }
3172 #else
3173 #define WSREP_XID_LIST_ENTRY(X, Y) do { } while(0)
3174 #endif
3175 
3176 MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
3177   :reset_master_pending(0), mark_xid_done_waiting(0),
3178    bytes_written(0), file_id(1), open_count(1),
3179    group_commit_queue(0), group_commit_queue_busy(FALSE),
3180    num_commits(0), num_group_commits(0),
3181    group_commit_trigger_count(0), group_commit_trigger_timeout(0),
3182    group_commit_trigger_lock_wait(0),
3183    sync_period_ptr(sync_period), sync_counter(0),
3184    state_file_deleted(false), binlog_state_recover_done(false),
3185    is_relay_log(0), relay_signal_cnt(0),
3186    checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF),
3187    relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF),
3188    description_event_for_exec(0), description_event_for_queue(0),
3189    current_binlog_id(0)
3190 {
3191   /*
3192     We don't want to initialize locks here as such initialization depends on
3193     safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
3194     called only in main(). Doing initialization here would make it happen
3195     before main().
3196   */
3197   index_file_name[0] = 0;
3198   bzero((char*) &index_file, sizeof(index_file));
3199   bzero((char*) &purge_index_file, sizeof(purge_index_file));
3200 }
3201 
3202 void MYSQL_BIN_LOG::stop_background_thread()
3203 {
3204   if (binlog_background_thread_started)
3205   {
3206     mysql_mutex_lock(&LOCK_binlog_background_thread);
3207     binlog_background_thread_stop= true;
3208     mysql_cond_signal(&COND_binlog_background_thread);
3209     while (binlog_background_thread_stop)
3210       mysql_cond_wait(&COND_binlog_background_thread_end,
3211                       &LOCK_binlog_background_thread);
3212     mysql_mutex_unlock(&LOCK_binlog_background_thread);
3213     binlog_background_thread_started= false;
3214   }
3215 }
3216 
3217 /* this is called only once */
3218 
3219 void MYSQL_BIN_LOG::cleanup()
3220 {
3221   DBUG_ENTER("cleanup");
3222   if (inited)
3223   {
3224     xid_count_per_binlog *b;
3225 
3226     /* Wait for the binlog background thread to stop. */
3227     if (!is_relay_log)
3228       stop_background_thread();
3229 
3230     inited= 0;
3231     mysql_mutex_lock(&LOCK_log);
3232     close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT);
3233     mysql_mutex_unlock(&LOCK_log);
3234     delete description_event_for_queue;
3235     delete description_event_for_exec;
3236 
3237     while ((b= binlog_xid_count_list.get()))
3238     {
3239       /*
3240         There should be no pending XIDs at shutdown, and only one entry (for
3241         the active binlog file) in the list.
3242       */
3243       DBUG_ASSERT(b->xid_count == 0);
3244       DBUG_ASSERT(!binlog_xid_count_list.head());
3245       WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::cleanup(): Removing xid_list_entry "
3246                            "for %s (%lu)", b);
3247       delete b;
3248     }
3249 
3250     mysql_mutex_destroy(&LOCK_log);
3251     mysql_mutex_destroy(&LOCK_index);
3252     mysql_mutex_destroy(&LOCK_xid_list);
3253     mysql_mutex_destroy(&LOCK_binlog_background_thread);
3254     mysql_mutex_destroy(&LOCK_binlog_end_pos);
3255     mysql_cond_destroy(&COND_relay_log_updated);
3256     mysql_cond_destroy(&COND_bin_log_updated);
3257     mysql_cond_destroy(&COND_queue_busy);
3258     mysql_cond_destroy(&COND_xid_list);
3259     mysql_cond_destroy(&COND_binlog_background_thread);
3260     mysql_cond_destroy(&COND_binlog_background_thread_end);
3261   }
3262 
3263   /*
3264     Free data for global binlog state.
3265     We can't do that automatically as we need to do this before
3266     safemalloc is shut down
3267   */
3268   if (!is_relay_log)
3269     rpl_global_gtid_binlog_state.free();
3270   DBUG_VOID_RETURN;
3271 }
3272 
3273 
3274 /* Init binlog-specific vars */
3275 void MYSQL_BIN_LOG::init(ulong max_size_arg)
3276 {
3277   DBUG_ENTER("MYSQL_BIN_LOG::init");
3278   max_size= max_size_arg;
3279   DBUG_PRINT("info",("max_size: %lu", max_size));
3280   DBUG_VOID_RETURN;
3281 }
3282 
3283 
3284 void MYSQL_BIN_LOG::init_pthread_objects()
3285 {
3286   MYSQL_LOG::init_pthread_objects();
3287   mysql_mutex_init(m_key_LOCK_index, &LOCK_index, MY_MUTEX_INIT_SLOW);
3288   mysql_mutex_setflags(&LOCK_index, MYF_NO_DEADLOCK_DETECTION);
3289   mysql_mutex_init(key_BINLOG_LOCK_xid_list,
3290                    &LOCK_xid_list, MY_MUTEX_INIT_FAST);
3291   mysql_cond_init(m_key_relay_log_update, &COND_relay_log_updated, 0);
3292   mysql_cond_init(m_key_bin_log_update, &COND_bin_log_updated, 0);
3293   mysql_cond_init(m_key_COND_queue_busy, &COND_queue_busy, 0);
3294   mysql_cond_init(key_BINLOG_COND_xid_list, &COND_xid_list, 0);
3295 
3296   mysql_mutex_init(key_BINLOG_LOCK_binlog_background_thread,
3297                    &LOCK_binlog_background_thread, MY_MUTEX_INIT_FAST);
3298   mysql_cond_init(key_BINLOG_COND_binlog_background_thread,
3299                   &COND_binlog_background_thread, 0);
3300   mysql_cond_init(key_BINLOG_COND_binlog_background_thread_end,
3301                   &COND_binlog_background_thread_end, 0);
3302 
3303   mysql_mutex_init(m_key_LOCK_binlog_end_pos, &LOCK_binlog_end_pos,
3304                    MY_MUTEX_INIT_SLOW);
3305 }
3306 
3307 
3308 bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
3309                                     const char *log_name, bool need_mutex)
3310 {
3311   File index_file_nr= -1;
3312   DBUG_ASSERT(!my_b_inited(&index_file));
3313 
3314   /*
3315     First open of this class instance
3316     Create an index file that will hold all file names uses for logging.
3317     Add new entries to the end of it.
3318   */
3319   myf opt= MY_UNPACK_FILENAME;
3320   if (!index_file_name_arg)
3321   {
3322     index_file_name_arg= log_name;    // Use same basename for index file
3323     opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
3324   }
3325   fn_format(index_file_name, index_file_name_arg, mysql_data_home,
3326             ".index", opt);
3327   if ((index_file_nr= mysql_file_open(m_key_file_log_index,
3328                                       index_file_name,
3329                                       O_RDWR | O_CREAT | O_BINARY | O_CLOEXEC,
3330                                       MYF(MY_WME))) < 0 ||
3331        mysql_file_sync(index_file_nr, MYF(MY_WME)) ||
3332        init_io_cache(&index_file, index_file_nr,
3333                      IO_SIZE, WRITE_CACHE,
3334                      mysql_file_seek(index_file_nr, 0L, MY_SEEK_END, MYF(0)),
3335                                      0, MYF(MY_WME | MY_WAIT_IF_FULL)) ||
3336       DBUG_EVALUATE_IF("fault_injection_openning_index", 1, 0))
3337   {
3338     /*
3339       TODO: all operations creating/deleting the index file or a log, should
3340       call my_sync_dir() or my_sync_dir_by_file() to be durable.
3341       TODO: file creation should be done with mysql_file_create()
3342       not mysql_file_open().
3343     */
3344     if (index_file_nr >= 0)
3345       mysql_file_close(index_file_nr, MYF(0));
3346     return TRUE;
3347   }
3348 
3349 #ifdef HAVE_REPLICATION
3350   /*
3351     Sync the index by purging any binary log file that is not registered.
3352     In other words, either purge binary log files that were removed from
3353     the index but not purged from the file system due to a crash or purge
3354     any binary log file that was created but not register in the index
3355     due to a crash.
3356   */
3357 
3358   if (set_purge_index_file_name(index_file_name_arg) ||
3359       open_purge_index_file(FALSE) ||
3360       purge_index_entry(NULL, NULL, need_mutex) ||
3361       close_purge_index_file() ||
3362       DBUG_EVALUATE_IF("fault_injection_recovering_index", 1, 0))
3363   {
3364     sql_print_error("MYSQL_BIN_LOG::open_index_file failed to sync the index "
3365                     "file.");
3366     return TRUE;
3367   }
3368 #endif
3369 
3370   return FALSE;
3371 }
3372 
3373 
3374 /**
3375   Open a (new) binlog file.
3376 
3377   - Open the log file and the index file. Register the new
3378   file name in it
3379   - When calling this when the file is in use, you must have a locks
3380   on LOCK_log and LOCK_index.
3381 
3382   @retval
3383     0	ok
3384   @retval
3385     1	error
3386 */
3387 
3388 bool MYSQL_BIN_LOG::open(const char *log_name,
3389                          enum_log_type log_type_arg,
3390                          const char *new_name,
3391                          ulong next_log_number,
3392                          enum cache_type io_cache_type_arg,
3393                          ulong max_size_arg,
3394                          bool null_created_arg,
3395                          bool need_mutex)
3396 {
3397   File file= -1;
3398   xid_count_per_binlog *new_xid_list_entry= NULL, *b;
3399   DBUG_ENTER("MYSQL_BIN_LOG::open");
3400   DBUG_PRINT("enter",("log_type: %d",(int) log_type_arg));
3401 
3402   mysql_mutex_assert_owner(&LOCK_log);
3403 
3404   if (!is_relay_log)
3405   {
3406     if (!binlog_state_recover_done)
3407     {
3408       binlog_state_recover_done= true;
3409       if (do_binlog_recovery(opt_bin_logname, false))
3410         DBUG_RETURN(1);
3411     }
3412 
3413     if (!binlog_background_thread_started &&
3414         start_binlog_background_thread())
3415       DBUG_RETURN(1);
3416   }
3417 
3418   /* We need to calculate new log file name for purge to delete old */
3419   if (init_and_set_log_file_name(log_name, new_name, next_log_number,
3420                                  log_type_arg, io_cache_type_arg))
3421   {
3422     sql_print_error("MYSQL_BIN_LOG::open failed to generate new file name.");
3423     if (!is_relay_log)
3424       goto err;
3425     DBUG_RETURN(1);
3426   }
3427 
3428 #ifdef HAVE_REPLICATION
3429   if (open_purge_index_file(TRUE) ||
3430       register_create_index_entry(log_file_name) ||
3431       sync_purge_index_file() ||
3432       DBUG_EVALUATE_IF("fault_injection_registering_index", 1, 0))
3433   {
3434     /**
3435         TODO:
3436         Although this was introduced to appease valgrind when
3437         injecting emulated faults using
3438         fault_injection_registering_index it may be good to consider
3439         what actually happens when open_purge_index_file succeeds but
3440         register or sync fails.
3441 
3442         Perhaps we might need the code below in MYSQL_LOG_BIN::cleanup
3443         for "real life" purposes as well?
3444      */
3445     DBUG_EXECUTE_IF("fault_injection_registering_index", {
3446       if (my_b_inited(&purge_index_file))
3447       {
3448         end_io_cache(&purge_index_file);
3449         my_close(purge_index_file.file, MYF(0));
3450       }
3451     });
3452 
3453     sql_print_error("MYSQL_BIN_LOG::open failed to sync the index file.");
3454     DBUG_RETURN(1);
3455   }
3456   DBUG_EXECUTE_IF("crash_create_non_critical_before_update_index", DBUG_SUICIDE(););
3457 #endif
3458 
3459   write_error= 0;
3460 
3461   /* open the main log file */
3462   if (MYSQL_LOG::open(
3463 #ifdef HAVE_PSI_INTERFACE
3464                       m_key_file_log,
3465 #endif
3466                       log_name,
3467                       LOG_UNKNOWN, /* Don't generate new name */
3468                       0, 0, io_cache_type_arg))
3469   {
3470 #ifdef HAVE_REPLICATION
3471     close_purge_index_file();
3472 #endif
3473     DBUG_RETURN(1);                            /* all warnings issued */
3474   }
3475 
3476   init(max_size_arg);
3477 
3478   open_count++;
3479 
3480   DBUG_ASSERT(log_type == LOG_BIN);
3481 
3482   {
3483     bool write_file_name_to_index_file=0;
3484 
3485     if (!my_b_filelength(&log_file))
3486     {
3487       /*
3488 	The binary log file was empty (probably newly created)
3489 	This is the normal case and happens when the user doesn't specify
3490 	an extension for the binary log files.
3491 	In this case we write a standard header to it.
3492       */
3493       if (my_b_safe_write(&log_file, BINLOG_MAGIC,
3494 			  BIN_LOG_HEADER_SIZE))
3495         goto err;
3496       bytes_written+= BIN_LOG_HEADER_SIZE;
3497       write_file_name_to_index_file= 1;
3498     }
3499 
3500     {
3501       /*
3502         In 4.x we put Start event only in the first binlog. But from 5.0 we
3503         want a Start event even if this is not the very first binlog.
3504       */
3505       Format_description_log_event s(BINLOG_VERSION);
3506       /*
3507         don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
3508         as we won't be able to reset it later
3509       */
3510       if (io_cache_type == WRITE_CACHE)
3511         s.flags |= LOG_EVENT_BINLOG_IN_USE_F;
3512 
3513       if (is_relay_log)
3514       {
3515         if (relay_log_checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
3516           relay_log_checksum_alg=
3517             opt_slave_sql_verify_checksum ? (enum_binlog_checksum_alg) binlog_checksum_options
3518                                           : BINLOG_CHECKSUM_ALG_OFF;
3519         s.checksum_alg= relay_log_checksum_alg;
3520         s.set_relay_log_event();
3521       }
3522       else
3523         s.checksum_alg= (enum_binlog_checksum_alg)binlog_checksum_options;
3524 
3525       crypto.scheme = 0;
3526       DBUG_ASSERT(s.checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
3527       if (!s.is_valid())
3528         goto err;
3529       s.dont_set_created= null_created_arg;
3530       if (write_event(&s))
3531         goto err;
3532       bytes_written+= s.data_written;
3533 
3534       if (encrypt_binlog)
3535       {
3536         uint key_version= encryption_key_get_latest_version(ENCRYPTION_KEY_SYSTEM_DATA);
3537         if (key_version == ENCRYPTION_KEY_VERSION_INVALID)
3538         {
3539           sql_print_error("Failed to enable encryption of binary logs");
3540           goto err;
3541         }
3542 
3543         if (key_version != ENCRYPTION_KEY_NOT_ENCRYPTED)
3544         {
3545           if (my_random_bytes(crypto.nonce, sizeof(crypto.nonce)))
3546             goto err;
3547 
3548           Start_encryption_log_event sele(1, key_version, crypto.nonce);
3549           sele.checksum_alg= s.checksum_alg;
3550           if (write_event(&sele))
3551             goto err;
3552 
3553           // Start_encryption_log_event is written, enable the encryption
3554           if (crypto.init(sele.crypto_scheme, key_version))
3555             goto err;
3556         }
3557       }
3558 
3559       if (!is_relay_log)
3560       {
3561         char buf[FN_REFLEN];
3562 
3563         /*
3564           Output a Gtid_list_log_event at the start of the binlog file.
3565 
3566           This is used to quickly determine which GTIDs are found in binlog
3567           files earlier than this one, and which are found in this (or later)
3568           binlogs.
3569 
3570           The list gives a mapping from (domain_id, server_id) -> seq_no (so
3571           this means that there is at most one entry for every unique pair
3572           (domain_id, server_id) in the list). It indicates that this seq_no is
3573           the last one found in an earlier binlog file for this (domain_id,
3574           server_id) combination - so any higher seq_no should be search for
3575           from this binlog file, or a later one.
3576 
3577           This allows to locate the binlog file containing a given GTID by
3578           scanning backwards, reading just the Gtid_list_log_event at the
3579           start of each file, and scanning only the relevant binlog file when
3580           found, not all binlog files.
3581 
3582           The existence of a given entry (domain_id, server_id, seq_no)
3583           guarantees only that this seq_no will not be found in this or any
3584           later binlog file. It does not guarantee that it can be found it an
3585           earlier binlog file, for example the file may have been purged.
3586 
3587           If there is no entry for a given (domain_id, server_id) pair, then
3588           it means that no such GTID exists in any earlier binlog. It is
3589           permissible to remove such pair from future Gtid_list_log_events
3590           if all previous binlog files containing such GTIDs have been purged
3591           (though such optimization is not performed at the time of this
3592           writing). So if there is no entry for given GTID it means that such
3593           GTID should be search for in this or later binlog file, same as if
3594           there had been an entry (domain_id, server_id, 0).
3595         */
3596 
3597         Gtid_list_log_event gl_ev(&rpl_global_gtid_binlog_state, 0);
3598         if (write_event(&gl_ev))
3599           goto err;
3600 
3601         /* Output a binlog checkpoint event at the start of the binlog file. */
3602 
3603         /*
3604           Construct an entry in the binlog_xid_count_list for the new binlog
3605           file (we will not link it into the list until we know the new file
3606           is successfully created; otherwise we would have to remove it again
3607           if creation failed, which gets tricky since other threads may have
3608           seen the entry in the meantime - and we do not want to hold
3609           LOCK_xid_list for long periods of time).
3610 
3611           Write the current binlog checkpoint into the log, so XA recovery will
3612           know from where to start recovery.
3613         */
3614         size_t off= dirname_length(log_file_name);
3615         uint len= static_cast<uint>(strlen(log_file_name) - off);
3616         new_xid_list_entry= new xid_count_per_binlog(log_file_name+off, len);
3617         if (!new_xid_list_entry)
3618           goto err;
3619 
3620         /*
3621           Find the name for the Initial binlog checkpoint.
3622 
3623           Normally this will just be the first entry, as we delete entries
3624           when their count drops to zero. But we scan the list to handle any
3625           corner case, eg. for the first binlog file opened after startup, the
3626           list will be empty.
3627         */
3628         mysql_mutex_lock(&LOCK_xid_list);
3629         I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
3630         while ((b= it++) && b->xid_count == 0)
3631           ;
3632         mysql_mutex_unlock(&LOCK_xid_list);
3633         if (!b)
3634           b= new_xid_list_entry;
3635         if (b->binlog_name)
3636           strmake(buf, b->binlog_name, b->binlog_name_len);
3637         else
3638           goto err;
3639         Binlog_checkpoint_log_event ev(buf, len);
3640         DBUG_EXECUTE_IF("crash_before_write_checkpoint_event",
3641                         flush_io_cache(&log_file);
3642                         mysql_file_sync(log_file.file, MYF(MY_WME));
3643                         DBUG_SUICIDE(););
3644         if (write_event(&ev))
3645           goto err;
3646         bytes_written+= ev.data_written;
3647       }
3648     }
3649     if (description_event_for_queue &&
3650         description_event_for_queue->binlog_version>=4)
3651     {
3652       /*
3653         This is a relay log written to by the I/O slave thread.
3654         Write the event so that others can later know the format of this relay
3655         log.
3656         Note that this event is very close to the original event from the
3657         master (it has binlog version of the master, event types of the
3658         master), so this is suitable to parse the next relay log's event. It
3659         has been produced by
3660         Format_description_log_event::Format_description_log_event(char* buf,).
3661         Why don't we want to write the description_event_for_queue if this
3662         event is for format<4 (3.23 or 4.x): this is because in that case, the
3663         description_event_for_queue describes the data received from the
3664         master, but not the data written to the relay log (*conversion*),
3665         which is in format 4 (slave's).
3666       */
3667       /*
3668         Set 'created' to 0, so that in next relay logs this event does not
3669         trigger cleaning actions on the slave in
3670         Format_description_log_event::apply_event_impl().
3671       */
3672       description_event_for_queue->created= 0;
3673       /* Don't set log_pos in event header */
3674       description_event_for_queue->set_artificial_event();
3675 
3676       if (write_event(description_event_for_queue))
3677         goto err;
3678       bytes_written+= description_event_for_queue->data_written;
3679     }
3680     if (flush_io_cache(&log_file) ||
3681         mysql_file_sync(log_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))
3682       goto err;
3683 
3684     my_off_t offset= my_b_tell(&log_file);
3685 
3686     if (!is_relay_log)
3687     {
3688       /* update binlog_end_pos so that it can be read by after sync hook */
3689       reset_binlog_end_pos(log_file_name, offset);
3690 
3691       mysql_mutex_lock(&LOCK_commit_ordered);
3692       strmake_buf(last_commit_pos_file, log_file_name);
3693       last_commit_pos_offset= offset;
3694       mysql_mutex_unlock(&LOCK_commit_ordered);
3695     }
3696 
3697     if (write_file_name_to_index_file)
3698     {
3699 #ifdef HAVE_REPLICATION
3700 #ifdef ENABLED_DEBUG_SYNC
3701       if (current_thd)
3702         DEBUG_SYNC(current_thd, "binlog_open_before_update_index");
3703 #endif
3704       DBUG_EXECUTE_IF("crash_create_critical_before_update_index", DBUG_SUICIDE(););
3705 #endif
3706 
3707       DBUG_ASSERT(my_b_inited(&index_file) != 0);
3708       reinit_io_cache(&index_file, WRITE_CACHE,
3709                       my_b_filelength(&index_file), 0, 0);
3710       /*
3711         As this is a new log file, we write the file name to the index
3712         file. As every time we write to the index file, we sync it.
3713       */
3714       if (DBUG_EVALUATE_IF("fault_injection_updating_index", 1, 0) ||
3715           my_b_write(&index_file, (uchar*) log_file_name,
3716                      strlen(log_file_name)) ||
3717           my_b_write(&index_file, (uchar*) "\n", 1) ||
3718           flush_io_cache(&index_file) ||
3719           mysql_file_sync(index_file.file, MYF(MY_WME|MY_SYNC_FILESIZE)))
3720         goto err;
3721 
3722 #ifdef HAVE_REPLICATION
3723       DBUG_EXECUTE_IF("crash_create_after_update_index", DBUG_SUICIDE(););
3724 #endif
3725     }
3726   }
3727 
3728   if (!is_relay_log)
3729   {
3730     /*
3731       Now the file was created successfully, so we can link in the entry for
3732       the new binlog file in binlog_xid_count_list.
3733     */
3734     mysql_mutex_lock(&LOCK_xid_list);
3735     ++current_binlog_id;
3736     new_xid_list_entry->binlog_id= current_binlog_id;
3737     /* Remove any initial entries with no pending XIDs.  */
3738     while ((b= binlog_xid_count_list.head()) && b->xid_count == 0)
3739     {
3740       WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::open(): Removing xid_list_entry for "
3741                            "%s (%lu)", b);
3742       delete binlog_xid_count_list.get();
3743     }
3744     mysql_cond_broadcast(&COND_xid_list);
3745     WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::open(): Adding new xid_list_entry for "
3746                          "%s (%lu)", new_xid_list_entry);
3747     binlog_xid_count_list.push_back(new_xid_list_entry);
3748     mysql_mutex_unlock(&LOCK_xid_list);
3749 
3750     /*
3751       Now that we have synced a new binlog file with an initial Gtid_list
3752       event, it is safe to delete the binlog state file. We will write out
3753       a new, updated file at shutdown, and if we crash before we can recover
3754       the state from the newly written binlog file.
3755 
3756       Since the state file will contain out-of-date data as soon as the first
3757       new GTID is binlogged, it is better to remove it, to avoid any risk of
3758       accidentally reading incorrect data later.
3759     */
3760     if (!state_file_deleted)
3761     {
3762       char buf[FN_REFLEN];
3763       fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
3764                 MY_UNPACK_FILENAME);
3765       my_delete(buf, MY_SYNC_DIR);
3766       state_file_deleted= true;
3767     }
3768   }
3769 
3770   log_state= LOG_OPENED;
3771 
3772 #ifdef HAVE_REPLICATION
3773   close_purge_index_file();
3774 #endif
3775 
3776   /* Notify the io thread that binlog is rotated to a new file */
3777   if (is_relay_log)
3778     signal_relay_log_update();
3779   else
3780     update_binlog_end_pos();
3781   DBUG_RETURN(0);
3782 
3783 err:
3784   int tmp_errno= errno;
3785 #ifdef HAVE_REPLICATION
3786   if (is_inited_purge_index_file())
3787     purge_index_entry(NULL, NULL, need_mutex);
3788   close_purge_index_file();
3789 #endif
3790   sql_print_error(fatal_log_error, (name) ? name : log_name, tmp_errno);
3791   if (new_xid_list_entry)
3792     delete new_xid_list_entry;
3793   if (file >= 0)
3794     mysql_file_close(file, MYF(0));
3795   close(LOG_CLOSE_INDEX);
3796   DBUG_RETURN(1);
3797 }
3798 
3799 
3800 int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo)
3801 {
3802   mysql_mutex_lock(&LOCK_log);
3803   int ret = raw_get_current_log(linfo);
3804   mysql_mutex_unlock(&LOCK_log);
3805   return ret;
3806 }
3807 
3808 int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
3809 {
3810   mysql_mutex_assert_owner(&LOCK_log);
3811   strmake_buf(linfo->log_file_name, log_file_name);
3812   linfo->pos = my_b_tell(&log_file);
3813   return 0;
3814 }
3815 
3816 /**
3817   Move all data up in a file in an filename index file.
3818 
3819     We do the copy outside of the IO_CACHE as the cache buffers would just
3820     make things slower and more complicated.
3821     In most cases the copy loop should only do one read.
3822 
3823   @param index_file			File to move
3824   @param offset			Move everything from here to beginning
3825 
3826   @note
3827     File will be truncated to be 'offset' shorter or filled up with newlines
3828 
3829   @retval
3830     0	ok
3831 */
3832 
3833 #ifdef HAVE_REPLICATION
3834 
3835 static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset)
3836 {
3837   int bytes_read;
3838   my_off_t init_offset= offset;
3839   File file= index_file->file;
3840   uchar io_buf[IO_SIZE*2];
3841   DBUG_ENTER("copy_up_file_and_fill");
3842 
3843   for (;; offset+= bytes_read)
3844   {
3845     mysql_file_seek(file, offset, MY_SEEK_SET, MYF(0));
3846     if ((bytes_read= (int) mysql_file_read(file, io_buf, sizeof(io_buf),
3847                                            MYF(MY_WME)))
3848 	< 0)
3849       goto err;
3850     if (!bytes_read)
3851       break;					// end of file
3852     mysql_file_seek(file, offset-init_offset, MY_SEEK_SET, MYF(0));
3853     if (mysql_file_write(file, io_buf, bytes_read,
3854                          MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
3855       goto err;
3856   }
3857   /* The following will either truncate the file or fill the end with \n' */
3858   if (mysql_file_chsize(file, offset - init_offset, '\n', MYF(MY_WME)) ||
3859       mysql_file_sync(file, MYF(MY_WME|MY_SYNC_FILESIZE)))
3860     goto err;
3861 
3862   /* Reset data in old index cache */
3863   reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 1);
3864   DBUG_RETURN(0);
3865 
3866 err:
3867   DBUG_RETURN(1);
3868 }
3869 
3870 #endif /* HAVE_REPLICATION */
3871 
3872 /**
3873   Find the position in the log-index-file for the given log name.
3874 
3875   @param linfo		Store here the found log file name and position to
3876                        the NEXT log file name in the index file.
3877   @param log_name	Filename to find in the index file.
3878                        Is a null pointer if we want to read the first entry
3879   @param need_lock	Set this to 1 if the parent doesn't already have a
3880                        lock on LOCK_index
3881 
3882   @note
3883     On systems without the truncate function the file will end with one or
3884     more empty lines.  These will be ignored when reading the file.
3885 
3886   @retval
3887     0			ok
3888   @retval
3889     LOG_INFO_EOF	        End of log-index-file found
3890   @retval
3891     LOG_INFO_IO		Got IO error while reading file
3892 */
3893 
3894 int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
3895 			    bool need_lock)
3896 {
3897   int error= 0;
3898   char *full_fname= linfo->log_file_name;
3899   char full_log_name[FN_REFLEN], fname[FN_REFLEN];
3900   uint log_name_len= 0, fname_len= 0;
3901   DBUG_ENTER("find_log_pos");
3902   full_log_name[0]= full_fname[0]= 0;
3903 
3904   /*
3905     Mutex needed because we need to make sure the file pointer does not
3906     move from under our feet
3907   */
3908   if (need_lock)
3909     mysql_mutex_lock(&LOCK_index);
3910   mysql_mutex_assert_owner(&LOCK_index);
3911 
3912   // extend relative paths for log_name to be searched
3913   if (log_name)
3914   {
3915     if(normalize_binlog_name(full_log_name, log_name, is_relay_log))
3916     {
3917       error= LOG_INFO_EOF;
3918       goto end;
3919     }
3920   }
3921 
3922   log_name_len= log_name ? (uint) strlen(full_log_name) : 0;
3923   DBUG_PRINT("enter", ("log_name: %s, full_log_name: %s",
3924                        log_name ? log_name : "NULL", full_log_name));
3925 
3926   /* As the file is flushed, we can't get an error here */
3927   (void) reinit_io_cache(&index_file, READ_CACHE, (my_off_t) 0, 0, 0);
3928 
3929   for (;;)
3930   {
3931     size_t length;
3932     my_off_t offset= my_b_tell(&index_file);
3933 
3934     DBUG_EXECUTE_IF("simulate_find_log_pos_error",
3935                     error=  LOG_INFO_EOF; break;);
3936     /* If we get 0 or 1 characters, this is the end of the file */
3937     if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
3938     {
3939       /* Did not find the given entry; Return not found or error */
3940       error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
3941       break;
3942     }
3943     if (fname[length-1] != '\n')
3944       continue;                                 // Not a log entry
3945     fname[length-1]= 0;                         // Remove end \n
3946 
3947     // extend relative paths and match against full path
3948     if (normalize_binlog_name(full_fname, fname, is_relay_log))
3949     {
3950       error= LOG_INFO_EOF;
3951       break;
3952     }
3953     fname_len= (uint) strlen(full_fname);
3954 
3955     // if the log entry matches, null string matching anything
3956     if (!log_name ||
3957         (log_name_len == fname_len &&
3958 	 !strncmp(full_fname, full_log_name, log_name_len)))
3959     {
3960       DBUG_PRINT("info", ("Found log file entry"));
3961       linfo->index_file_start_offset= offset;
3962       linfo->index_file_offset = my_b_tell(&index_file);
3963       break;
3964     }
3965   }
3966 
3967 end:
3968   if (need_lock)
3969     mysql_mutex_unlock(&LOCK_index);
3970   DBUG_RETURN(error);
3971 }
3972 
3973 
3974 /**
3975   Find the position in the log-index-file for the given log name.
3976 
3977   @param
3978     linfo		Store here the next log file name and position to
3979 			the file name after that.
3980   @param
3981     need_lock		Set this to 1 if the parent doesn't already have a
3982 			lock on LOCK_index
3983 
3984   @note
3985     - Before calling this function, one has to call find_log_pos()
3986     to set up 'linfo'
3987     - Mutex needed because we need to make sure the file pointer does not move
3988     from under our feet
3989 
3990   @retval
3991     0			ok
3992   @retval
3993     LOG_INFO_EOF	        End of log-index-file found
3994   @retval
3995     LOG_INFO_IO		Got IO error while reading file
3996 */
3997 
3998 int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock)
3999 {
4000   int error= 0;
4001   size_t length;
4002   char fname[FN_REFLEN];
4003   char *full_fname= linfo->log_file_name;
4004 
4005   if (need_lock)
4006     mysql_mutex_lock(&LOCK_index);
4007   mysql_mutex_assert_owner(&LOCK_index);
4008 
4009   /* As the file is flushed, we can't get an error here */
4010   (void) reinit_io_cache(&index_file, READ_CACHE, linfo->index_file_offset, 0,
4011 			 0);
4012 
4013   linfo->index_file_start_offset= linfo->index_file_offset;
4014   if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
4015   {
4016     error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
4017     goto err;
4018   }
4019 
4020   if (fname[0] != 0)
4021   {
4022     if(normalize_binlog_name(full_fname, fname, is_relay_log))
4023     {
4024       error= LOG_INFO_EOF;
4025       goto err;
4026     }
4027     length= strlen(full_fname);
4028   }
4029 
4030   full_fname[length-1]= 0;			// kill \n
4031   linfo->index_file_offset= my_b_tell(&index_file);
4032 
4033 err:
4034   if (need_lock)
4035     mysql_mutex_unlock(&LOCK_index);
4036   return error;
4037 }
4038 
4039 
4040 /**
4041   Delete all logs referred to in the index file.
4042 
4043   The new index file will only contain this file.
4044 
4045   @param thd		  Thread id. This can be zero in case of resetting
4046                           relay logs
4047   @param create_new_log   1 if we should start writing to a new log file
4048   @param next_log_number  min number of next log file to use, if possible.
4049 
4050   @note
4051     If not called from slave thread, write start event to new log
4052 
4053   @retval
4054     0	ok
4055   @retval
4056     1   error
4057 */
4058 
4059 bool MYSQL_BIN_LOG::reset_logs(THD *thd, bool create_new_log,
4060                                rpl_gtid *init_state, uint32 init_state_len,
4061                                ulong next_log_number)
4062 {
4063   LOG_INFO linfo;
4064   bool error=0;
4065   int err;
4066   const char* save_name;
4067   DBUG_ENTER("reset_logs");
4068 
4069   if (!is_relay_log)
4070   {
4071     if (init_state && !is_empty_state())
4072     {
4073       my_error(ER_BINLOG_MUST_BE_EMPTY, MYF(0));
4074       DBUG_RETURN(1);
4075     }
4076 
4077     /*
4078       Mark that a RESET MASTER is in progress.
4079       This ensures that a binlog checkpoint will not try to write binlog
4080       checkpoint events, which would be useless (as we are deleting the binlog
4081       anyway) and could deadlock, as we are holding LOCK_log.
4082 
4083       Wait for any mark_xid_done() calls that might be already running to
4084       complete (mark_xid_done_waiting counter to drop to zero); we need to
4085       do this before we take the LOCK_log to not deadlock.
4086     */
4087     mysql_mutex_lock(&LOCK_xid_list);
4088     reset_master_pending++;
4089     while (mark_xid_done_waiting > 0)
4090       mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4091     mysql_mutex_unlock(&LOCK_xid_list);
4092   }
4093 
4094   DEBUG_SYNC_C_IF_THD(thd, "reset_logs_after_set_reset_master_pending");
4095   /*
4096     We need to get both locks to be sure that no one is trying to
4097     write to the index log file.
4098   */
4099   mysql_mutex_lock(&LOCK_log);
4100   mysql_mutex_lock(&LOCK_index);
4101 
4102   if (!is_relay_log)
4103   {
4104     /*
4105       We are going to nuke all binary log files.
4106       Without binlog, we cannot XA recover prepared-but-not-committed
4107       transactions in engines. So force a commit checkpoint first.
4108 
4109       Note that we take and immediately
4110       release LOCK_after_binlog_sync/LOCK_commit_ordered. This has
4111       the effect to ensure that any on-going group commit (in
4112       trx_group_commit_leader()) has completed before we request the checkpoint,
4113       due to the chaining of LOCK_log and LOCK_commit_ordered in that function.
4114       (We are holding LOCK_log, so no new group commit can start).
4115 
4116       Without this, it is possible (though perhaps unlikely) that the RESET
4117       MASTER could run in-between the write to the binlog and the
4118       commit_ordered() in the engine of some transaction, and then a crash
4119       later would leave such transaction not recoverable.
4120     */
4121 
4122     mysql_mutex_lock(&LOCK_after_binlog_sync);
4123     mysql_mutex_lock(&LOCK_commit_ordered);
4124     mysql_mutex_unlock(&LOCK_after_binlog_sync);
4125     mysql_mutex_unlock(&LOCK_commit_ordered);
4126 
4127     mark_xids_active(current_binlog_id, 1);
4128     do_checkpoint_request(current_binlog_id);
4129 
4130     /* Now wait for all checkpoint requests and pending unlog() to complete. */
4131     mysql_mutex_lock(&LOCK_xid_list);
4132     for (;;)
4133     {
4134       if (is_xidlist_idle_nolock())
4135         break;
4136       /*
4137         Wait until signalled that one more binlog dropped to zero, then check
4138         again.
4139       */
4140       mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4141     }
4142 
4143     /*
4144       Now all XIDs are fully flushed to disk, and we are holding LOCK_log so
4145       no new ones will be written. So we can proceed to delete the logs.
4146     */
4147     mysql_mutex_unlock(&LOCK_xid_list);
4148   }
4149 
4150   /* Save variables so that we can reopen the log */
4151   save_name=name;
4152   name=0;					// Protect against free
4153   close(LOG_CLOSE_TO_BE_OPENED);
4154 
4155   /*
4156     First delete all old log files and then update the index file.
4157     As we first delete the log files and do not use sort of logging,
4158     a crash may lead to an inconsistent state where the index has
4159     references to non-existent files.
4160 
4161     We need to invert the steps and use the purge_index_file methods
4162     in order to make the operation safe.
4163   */
4164 
4165   if ((err= find_log_pos(&linfo, NullS, 0)) != 0)
4166   {
4167     uint errcode= purge_log_get_error_code(err);
4168     sql_print_error("Failed to locate old binlog or relay log files");
4169     my_message(errcode, ER_THD_OR_DEFAULT(thd, errcode), MYF(0));
4170     error= 1;
4171     goto err;
4172   }
4173 
4174   for (;;)
4175   {
4176     if (unlikely((error= my_delete(linfo.log_file_name, MYF(0)))))
4177     {
4178       if (my_errno == ENOENT)
4179       {
4180         if (thd)
4181           push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4182                               ER_LOG_PURGE_NO_FILE,
4183                               ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4184                               linfo.log_file_name);
4185 
4186         sql_print_information("Failed to delete file '%s'",
4187                               linfo.log_file_name);
4188         my_errno= 0;
4189         error= 0;
4190       }
4191       else
4192       {
4193         if (thd)
4194           push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4195                               ER_BINLOG_PURGE_FATAL_ERR,
4196                               "a problem with deleting %s; "
4197                               "consider examining correspondence "
4198                               "of your binlog index file "
4199                               "to the actual binlog files",
4200                               linfo.log_file_name);
4201         error= 1;
4202         goto err;
4203       }
4204     }
4205     if (find_next_log(&linfo, 0))
4206       break;
4207   }
4208 
4209   if (!is_relay_log)
4210   {
4211     if (init_state)
4212       rpl_global_gtid_binlog_state.load(init_state, init_state_len);
4213     else
4214       rpl_global_gtid_binlog_state.reset();
4215   }
4216 
4217   /* Start logging with a new file */
4218   close(LOG_CLOSE_INDEX | LOG_CLOSE_TO_BE_OPENED);
4219   // Reset (open will update)
4220   if (unlikely((error= my_delete(index_file_name, MYF(0)))))
4221   {
4222     if (my_errno == ENOENT)
4223     {
4224       if (thd)
4225         push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4226                             ER_LOG_PURGE_NO_FILE,
4227                             ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4228                             index_file_name);
4229       sql_print_information("Failed to delete file '%s'",
4230                             index_file_name);
4231       my_errno= 0;
4232       error= 0;
4233     }
4234     else
4235     {
4236       if (thd)
4237         push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4238                             ER_BINLOG_PURGE_FATAL_ERR,
4239                             "a problem with deleting %s; "
4240                             "consider examining correspondence "
4241                             "of your binlog index file "
4242                             "to the actual binlog files",
4243                             index_file_name);
4244       error= 1;
4245       goto err;
4246     }
4247   }
4248   if (create_new_log && !open_index_file(index_file_name, 0, FALSE))
4249     if (unlikely((error= open(save_name, log_type, 0, next_log_number,
4250                               io_cache_type, max_size, 0, FALSE))))
4251       goto err;
4252   my_free((void *) save_name);
4253 
4254 err:
4255   if (error == 1)
4256     name= const_cast<char*>(save_name);
4257 
4258   if (!is_relay_log)
4259   {
4260     xid_count_per_binlog *b;
4261     /*
4262       Remove all entries in the xid_count list except the last.
4263       Normally we will just be deleting all the entries that we waited for to
4264       drop to zero above. But if we fail during RESET MASTER for some reason
4265       then we will not have created any new log file, and we may keep the last
4266       of the old entries.
4267     */
4268     mysql_mutex_lock(&LOCK_xid_list);
4269     for (;;)
4270     {
4271       b= binlog_xid_count_list.head();
4272       DBUG_ASSERT(b /* List can never become empty. */);
4273       if (b->binlog_id == current_binlog_id)
4274         break;
4275       DBUG_ASSERT(b->xid_count == 0);
4276       WSREP_XID_LIST_ENTRY("MYSQL_BIN_LOG::reset_logs(): Removing "
4277                            "xid_list_entry for %s (%lu)", b);
4278       delete binlog_xid_count_list.get();
4279     }
4280     mysql_cond_broadcast(&COND_xid_list);
4281     reset_master_pending--;
4282     mysql_mutex_unlock(&LOCK_xid_list);
4283   }
4284 
4285   mysql_mutex_unlock(&LOCK_index);
4286   mysql_mutex_unlock(&LOCK_log);
4287   DBUG_RETURN(error);
4288 }
4289 
4290 
4291 void MYSQL_BIN_LOG::wait_for_last_checkpoint_event()
4292 {
4293   mysql_mutex_lock(&LOCK_xid_list);
4294   for (;;)
4295   {
4296     if (binlog_xid_count_list.is_last(binlog_xid_count_list.head()))
4297       break;
4298     mysql_cond_wait(&COND_xid_list, &LOCK_xid_list);
4299   }
4300   mysql_mutex_unlock(&LOCK_xid_list);
4301 
4302   /*
4303     LOCK_xid_list and LOCK_log are chained, so the LOCK_log will only be
4304     obtained after mark_xid_done() has written the last checkpoint event.
4305   */
4306   mysql_mutex_lock(&LOCK_log);
4307   mysql_mutex_unlock(&LOCK_log);
4308 }
4309 
4310 
4311 /**
4312   Delete relay log files prior to rli->group_relay_log_name
4313   (i.e. all logs which are not involved in a non-finished group
4314   (transaction)), remove them from the index file and start on next
4315   relay log.
4316 
4317   IMPLEMENTATION
4318 
4319   - You must hold rli->data_lock before calling this function, since
4320     it writes group_relay_log_pos and similar fields of
4321     Relay_log_info.
4322   - Protects index file with LOCK_index
4323   - Delete relevant relay log files
4324   - Copy all file names after these ones to the front of the index file
4325   - If the OS has truncate, truncate the file, else fill it with \n'
4326   - Read the next file name from the index file and store in rli->linfo
4327 
4328   @param rli	       Relay log information
4329   @param included     If false, all relay logs that are strictly before
4330                       rli->group_relay_log_name are deleted ; if true, the
4331                       latter is deleted too (i.e. all relay logs
4332                       read by the SQL slave thread are deleted).
4333 
4334   @note
4335     - This is only called from the slave SQL thread when it has read
4336     all commands from a relay log and want to switch to a new relay log.
4337     - When this happens, we can be in an active transaction as
4338     a transaction can span over two relay logs
4339     (although it is always written as a single block to the master's binary
4340     log, hence cannot span over two master's binary logs).
4341 
4342   @retval
4343     0			ok
4344   @retval
4345     LOG_INFO_EOF	        End of log-index-file found
4346   @retval
4347     LOG_INFO_SEEK	Could not allocate IO cache
4348   @retval
4349     LOG_INFO_IO		Got IO error while reading file
4350 */
4351 
4352 #ifdef HAVE_REPLICATION
4353 
4354 int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
4355 {
4356   int error, errcode;
4357   char *to_purge_if_included= NULL;
4358   inuse_relaylog *ir;
4359   ulonglong log_space_reclaimed= 0;
4360   DBUG_ENTER("purge_first_log");
4361 
4362   DBUG_ASSERT(is_open());
4363   DBUG_ASSERT(rli->slave_running == MYSQL_SLAVE_RUN_NOT_CONNECT);
4364   DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
4365 
4366   mysql_mutex_assert_owner(&rli->data_lock);
4367 
4368   mysql_mutex_lock(&LOCK_index);
4369 
4370   ir= rli->inuse_relaylog_list;
4371   while (ir)
4372   {
4373     inuse_relaylog *next= ir->next;
4374     if (!ir->completed || ir->dequeued_count < ir->queued_count)
4375     {
4376       included= false;
4377       break;
4378     }
4379     if (!included && !strcmp(ir->name, rli->group_relay_log_name))
4380       break;
4381     if (!next)
4382     {
4383       rli->last_inuse_relaylog= NULL;
4384       included= 1;
4385       to_purge_if_included= my_strdup(ir->name, MYF(0));
4386     }
4387     rli->free_inuse_relaylog(ir);
4388     ir= next;
4389   }
4390   rli->inuse_relaylog_list= ir;
4391   if (ir)
4392     to_purge_if_included= my_strdup(ir->name, MYF(0));
4393 
4394   /*
4395     Read the next log file name from the index file and pass it back to
4396     the caller.
4397   */
4398   if (unlikely((error=find_log_pos(&rli->linfo, rli->event_relay_log_name,
4399                                    0))) ||
4400       unlikely((error=find_next_log(&rli->linfo, 0))))
4401   {
4402     sql_print_error("next log error: %d  offset: %llu  log: %s included: %d",
4403                     error, rli->linfo.index_file_offset,
4404                     rli->event_relay_log_name, included);
4405     goto err;
4406   }
4407 
4408   /*
4409     Reset rli's coordinates to the current log.
4410   */
4411   rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
4412   strmake_buf(rli->event_relay_log_name,rli->linfo.log_file_name);
4413 
4414   /*
4415     If we removed the rli->group_relay_log_name file,
4416     we must update the rli->group* coordinates, otherwise do not touch it as the
4417     group's execution is not finished (e.g. COMMIT not executed)
4418   */
4419   if (included)
4420   {
4421     rli->group_relay_log_pos = BIN_LOG_HEADER_SIZE;
4422     strmake_buf(rli->group_relay_log_name,rli->linfo.log_file_name);
4423     rli->notify_group_relay_log_name_update();
4424   }
4425 
4426   /* Store where we are in the new file for the execution thread */
4427   if (rli->flush())
4428     error= LOG_INFO_IO;
4429 
4430   DBUG_EXECUTE_IF("crash_before_purge_logs", DBUG_SUICIDE(););
4431 
4432   rli->relay_log.purge_logs(to_purge_if_included, included,
4433                             0, 0, &log_space_reclaimed);
4434 
4435   mysql_mutex_lock(&rli->log_space_lock);
4436   my_atomic_add64_explicit((volatile int64*)(&rli->log_space_total),
4437                            (-(int64)log_space_reclaimed),
4438                            MY_MEMORY_ORDER_RELAXED);
4439   mysql_cond_broadcast(&rli->log_space_cond);
4440   mysql_mutex_unlock(&rli->log_space_lock);
4441 
4442   /*
4443    * Need to update the log pos because purge logs has been called
4444    * after fetching initially the log pos at the beginning of the method.
4445    */
4446   if ((errcode= find_log_pos(&rli->linfo, rli->event_relay_log_name, 0)))
4447   {
4448     sql_print_error("next log error: %d  offset: %llu  log: %s included: %d",
4449                     errcode, rli->linfo.index_file_offset,
4450                     rli->group_relay_log_name, included);
4451     goto err;
4452   }
4453 
4454   /* If included was passed, rli->linfo should be the first entry. */
4455   DBUG_ASSERT(!included || rli->linfo.index_file_start_offset == 0);
4456 
4457 err:
4458   my_free(to_purge_if_included);
4459   mysql_mutex_unlock(&LOCK_index);
4460   DBUG_RETURN(error);
4461 }
4462 
4463 /**
4464   Update log index_file.
4465 */
4466 
4467 int MYSQL_BIN_LOG::update_log_index(LOG_INFO* log_info, bool need_update_threads)
4468 {
4469   if (copy_up_file_and_fill(&index_file, log_info->index_file_start_offset))
4470     return LOG_INFO_IO;
4471 
4472   // now update offsets in index file for running threads
4473   if (need_update_threads)
4474     adjust_linfo_offsets(log_info->index_file_start_offset);
4475   return 0;
4476 }
4477 
4478 /**
4479   Remove all logs before the given log from disk and from the index file.
4480 
4481   @param to_log	      Delete all log file name before this file.
4482   @param included            If true, to_log is deleted too.
4483   @param need_mutex
4484   @param need_update_threads If we want to update the log coordinates of
4485                              all threads. False for relay logs, true otherwise.
4486   @param reclaimeed_log_space If not null, increment this variable to
4487                               the amount of log space freed
4488 
4489   @note
4490     If any of the logs before the deleted one is in use,
4491     only purge logs up to this one.
4492 
4493   @retval
4494     0			ok
4495   @retval
4496     LOG_INFO_EOF		to_log not found
4497     LOG_INFO_EMFILE             too many files opened
4498     LOG_INFO_FATAL              if any other than ENOENT error from
4499                                 mysql_file_stat() or mysql_file_delete()
4500 */
4501 
4502 int MYSQL_BIN_LOG::purge_logs(const char *to_log,
4503                               bool included,
4504                               bool need_mutex,
4505                               bool need_update_threads,
4506                               ulonglong *reclaimed_space)
4507 {
4508   int error= 0;
4509   bool exit_loop= 0;
4510   LOG_INFO log_info;
4511   THD *thd= current_thd;
4512   DBUG_ENTER("purge_logs");
4513   DBUG_PRINT("info",("to_log= %s",to_log));
4514 
4515   if (need_mutex)
4516     mysql_mutex_lock(&LOCK_index);
4517   if (unlikely((error=find_log_pos(&log_info, to_log, 0 /*no mutex*/))) )
4518   {
4519     sql_print_error("MYSQL_BIN_LOG::purge_logs was called with file %s not "
4520                     "listed in the index.", to_log);
4521     goto err;
4522   }
4523 
4524   if (unlikely((error= open_purge_index_file(TRUE))))
4525   {
4526     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to sync the index file.");
4527     goto err;
4528   }
4529 
4530   /*
4531     File name exists in index file; delete until we find this file
4532     or a file that is used.
4533   */
4534   if (unlikely((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/))))
4535     goto err;
4536   while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)) &&
4537          can_purge_log(log_info.log_file_name))
4538   {
4539     if (unlikely((error= register_purge_index_entry(log_info.log_file_name))))
4540     {
4541       sql_print_error("MYSQL_BIN_LOG::purge_logs failed to copy %s to register file.",
4542                       log_info.log_file_name);
4543       goto err;
4544     }
4545 
4546     if (find_next_log(&log_info, 0) || exit_loop)
4547       break;
4548   }
4549 
4550   DBUG_EXECUTE_IF("crash_purge_before_update_index", DBUG_SUICIDE(););
4551 
4552   if (unlikely((error= sync_purge_index_file())))
4553   {
4554     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to flush register file.");
4555     goto err;
4556   }
4557 
4558   /* We know how many files to delete. Update index file. */
4559   if (unlikely((error=update_log_index(&log_info, need_update_threads))))
4560   {
4561     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to update the index file");
4562     goto err;
4563   }
4564 
4565   DBUG_EXECUTE_IF("crash_purge_critical_after_update_index", DBUG_SUICIDE(););
4566 
4567 err:
4568   /* Read each entry from purge_index_file and delete the file. */
4569   if (is_inited_purge_index_file() &&
4570       (error= purge_index_entry(thd, reclaimed_space, FALSE)))
4571     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to process registered files"
4572                     " that would be purged.");
4573   close_purge_index_file();
4574 
4575   DBUG_EXECUTE_IF("crash_purge_non_critical_after_update_index", DBUG_SUICIDE(););
4576 
4577   if (need_mutex)
4578     mysql_mutex_unlock(&LOCK_index);
4579   DBUG_RETURN(error);
4580 }
4581 
4582 int MYSQL_BIN_LOG::set_purge_index_file_name(const char *base_file_name)
4583 {
4584   int error= 0;
4585   DBUG_ENTER("MYSQL_BIN_LOG::set_purge_index_file_name");
4586   if (fn_format(purge_index_file_name, base_file_name, mysql_data_home,
4587                 ".~rec~", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
4588                               MY_REPLACE_EXT)) == NULL)
4589   {
4590     error= 1;
4591     sql_print_error("MYSQL_BIN_LOG::set_purge_index_file_name failed to set "
4592                       "file name.");
4593   }
4594   DBUG_RETURN(error);
4595 }
4596 
4597 int MYSQL_BIN_LOG::open_purge_index_file(bool destroy)
4598 {
4599   int error= 0;
4600   File file= -1;
4601 
4602   DBUG_ENTER("MYSQL_BIN_LOG::open_purge_index_file");
4603 
4604   if (destroy)
4605     close_purge_index_file();
4606 
4607   if (!my_b_inited(&purge_index_file))
4608   {
4609     if ((file= my_open(purge_index_file_name, O_RDWR | O_CREAT | O_BINARY,
4610                        MYF(MY_WME | ME_WAITTANG))) < 0  ||
4611         init_io_cache(&purge_index_file, file, IO_SIZE,
4612                       (destroy ? WRITE_CACHE : READ_CACHE),
4613                       0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
4614     {
4615       error= 1;
4616       sql_print_error("MYSQL_BIN_LOG::open_purge_index_file failed to open register "
4617                       " file.");
4618     }
4619   }
4620   DBUG_RETURN(error);
4621 }
4622 
4623 int MYSQL_BIN_LOG::close_purge_index_file()
4624 {
4625   int error= 0;
4626 
4627   DBUG_ENTER("MYSQL_BIN_LOG::close_purge_index_file");
4628 
4629   if (my_b_inited(&purge_index_file))
4630   {
4631     end_io_cache(&purge_index_file);
4632     error= my_close(purge_index_file.file, MYF(0));
4633   }
4634   my_delete(purge_index_file_name, MYF(0));
4635   bzero((char*) &purge_index_file, sizeof(purge_index_file));
4636 
4637   DBUG_RETURN(error);
4638 }
4639 
4640 bool MYSQL_BIN_LOG::is_inited_purge_index_file()
4641 {
4642   return my_b_inited(&purge_index_file);
4643 }
4644 
4645 int MYSQL_BIN_LOG::sync_purge_index_file()
4646 {
4647   int error= 0;
4648   DBUG_ENTER("MYSQL_BIN_LOG::sync_purge_index_file");
4649 
4650   if (unlikely((error= flush_io_cache(&purge_index_file))) ||
4651       unlikely((error= my_sync(purge_index_file.file,
4652                                MYF(MY_WME | MY_SYNC_FILESIZE)))))
4653     DBUG_RETURN(error);
4654 
4655   DBUG_RETURN(error);
4656 }
4657 
4658 int MYSQL_BIN_LOG::register_purge_index_entry(const char *entry)
4659 {
4660   int error= 0;
4661   DBUG_ENTER("MYSQL_BIN_LOG::register_purge_index_entry");
4662 
4663   if (unlikely((error=my_b_write(&purge_index_file, (const uchar*)entry,
4664                                  strlen(entry)))) ||
4665       unlikely((error=my_b_write(&purge_index_file, (const uchar*)"\n", 1))))
4666     DBUG_RETURN (error);
4667 
4668   DBUG_RETURN(error);
4669 }
4670 
4671 int MYSQL_BIN_LOG::register_create_index_entry(const char *entry)
4672 {
4673   DBUG_ENTER("MYSQL_BIN_LOG::register_create_index_entry");
4674   DBUG_RETURN(register_purge_index_entry(entry));
4675 }
4676 
4677 int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *reclaimed_space,
4678                                      bool need_mutex)
4679 {
4680   DBUG_ENTER("MYSQL_BIN_LOG:purge_index_entry");
4681   MY_STAT s;
4682   int error= 0;
4683   LOG_INFO log_info;
4684   LOG_INFO check_log_info;
4685 
4686   DBUG_ASSERT(my_b_inited(&purge_index_file));
4687 
4688   if (unlikely((error= reinit_io_cache(&purge_index_file, READ_CACHE, 0, 0,
4689                                        0))))
4690   {
4691     sql_print_error("MYSQL_BIN_LOG::purge_index_entry failed to reinit register file "
4692                     "for read");
4693     goto err;
4694   }
4695 
4696   for (;;)
4697   {
4698     size_t length;
4699 
4700     if ((length=my_b_gets(&purge_index_file, log_info.log_file_name,
4701                           FN_REFLEN)) <= 1)
4702     {
4703       if (purge_index_file.error)
4704       {
4705         error= purge_index_file.error;
4706         sql_print_error("MYSQL_BIN_LOG::purge_index_entry error %d reading from "
4707                         "register file.", error);
4708         goto err;
4709       }
4710 
4711       /* Reached EOF */
4712       break;
4713     }
4714 
4715     /* Get rid of the trailing '\n' */
4716     log_info.log_file_name[length-1]= 0;
4717 
4718     if (unlikely(!mysql_file_stat(m_key_file_log, log_info.log_file_name, &s,
4719                                   MYF(0))))
4720     {
4721       if (my_errno == ENOENT)
4722       {
4723         /*
4724           It's not fatal if we can't stat a log file that does not exist;
4725           If we could not stat, we won't delete.
4726         */
4727         if (thd)
4728         {
4729           push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4730                               ER_LOG_PURGE_NO_FILE, ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4731                               log_info.log_file_name);
4732         }
4733         sql_print_information("Failed to execute mysql_file_stat on file '%s'",
4734 			      log_info.log_file_name);
4735         my_errno= 0;
4736       }
4737       else
4738       {
4739         /*
4740           Other than ENOENT are fatal
4741         */
4742         if (thd)
4743         {
4744           push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4745                               ER_BINLOG_PURGE_FATAL_ERR,
4746                               "a problem with getting info on being purged %s; "
4747                               "consider examining correspondence "
4748                               "of your binlog index file "
4749                               "to the actual binlog files",
4750                               log_info.log_file_name);
4751         }
4752         else
4753         {
4754           sql_print_information("Failed to delete log file '%s'; "
4755                                 "consider examining correspondence "
4756                                 "of your binlog index file "
4757                                 "to the actual binlog files",
4758                                 log_info.log_file_name);
4759         }
4760         error= LOG_INFO_FATAL;
4761         goto err;
4762       }
4763     }
4764     else
4765     {
4766       if (unlikely((error= find_log_pos(&check_log_info,
4767                                         log_info.log_file_name, need_mutex))))
4768       {
4769         if (error != LOG_INFO_EOF)
4770         {
4771           if (thd)
4772           {
4773             push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4774                                 ER_BINLOG_PURGE_FATAL_ERR,
4775                                 "a problem with deleting %s and "
4776                                 "reading the binlog index file",
4777                                 log_info.log_file_name);
4778           }
4779           else
4780           {
4781             sql_print_information("Failed to delete file '%s' and "
4782                                   "read the binlog index file",
4783                                   log_info.log_file_name);
4784           }
4785           goto err;
4786         }
4787 
4788         error= 0;
4789 
4790         DBUG_PRINT("info",("purging %s",log_info.log_file_name));
4791         if (!my_delete(log_info.log_file_name, MYF(0)))
4792         {
4793           if (reclaimed_space)
4794             *reclaimed_space+= s.st_size;
4795         }
4796         else
4797         {
4798           if (my_errno == ENOENT)
4799           {
4800             if (thd)
4801             {
4802               push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4803                                   ER_LOG_PURGE_NO_FILE, ER_THD(thd, ER_LOG_PURGE_NO_FILE),
4804                                   log_info.log_file_name);
4805             }
4806             sql_print_information("Failed to delete file '%s'",
4807                                   log_info.log_file_name);
4808             my_errno= 0;
4809           }
4810           else
4811           {
4812             if (thd)
4813             {
4814               push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4815                                   ER_BINLOG_PURGE_FATAL_ERR,
4816                                   "a problem with deleting %s; "
4817                                   "consider examining correspondence "
4818                                   "of your binlog index file "
4819                                   "to the actual binlog files",
4820                                   log_info.log_file_name);
4821             }
4822             else
4823             {
4824               sql_print_information("Failed to delete file '%s'; "
4825                                     "consider examining correspondence "
4826                                     "of your binlog index file "
4827                                     "to the actual binlog files",
4828                                     log_info.log_file_name);
4829             }
4830             if (my_errno == EMFILE)
4831             {
4832               DBUG_PRINT("info",
4833                          ("my_errno: %d, set ret = LOG_INFO_EMFILE", my_errno));
4834               error= LOG_INFO_EMFILE;
4835               goto err;
4836             }
4837             error= LOG_INFO_FATAL;
4838             goto err;
4839           }
4840         }
4841       }
4842     }
4843   }
4844 
4845 err:
4846   DBUG_RETURN(error);
4847 }
4848 
4849 /**
4850   Remove all logs before the given file date from disk and from the
4851   index file.
4852 
4853   @param thd		Thread pointer
4854   @param purge_time	Delete all log files before given date.
4855 
4856   @note
4857     If any of the logs before the deleted one is in use,
4858     only purge logs up to this one.
4859 
4860   @retval
4861     0				ok
4862   @retval
4863     LOG_INFO_PURGE_NO_ROTATE	Binary file that can't be rotated
4864     LOG_INFO_FATAL              if any other than ENOENT error from
4865                                 mysql_file_stat() or mysql_file_delete()
4866 */
4867 
4868 int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time)
4869 {
4870   int error;
4871   char to_log[FN_REFLEN];
4872   LOG_INFO log_info;
4873   MY_STAT stat_area;
4874   THD *thd= current_thd;
4875   DBUG_ENTER("purge_logs_before_date");
4876 
4877   mysql_mutex_lock(&LOCK_index);
4878   to_log[0]= 0;
4879 
4880   if (unlikely((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/))))
4881     goto err;
4882 
4883   while (strcmp(log_file_name, log_info.log_file_name) &&
4884 	 can_purge_log(log_info.log_file_name))
4885   {
4886     if (!mysql_file_stat(m_key_file_log,
4887                          log_info.log_file_name, &stat_area, MYF(0)))
4888     {
4889       if (my_errno == ENOENT)
4890       {
4891         /*
4892           It's not fatal if we can't stat a log file that does not exist.
4893         */
4894         my_errno= 0;
4895       }
4896       else
4897       {
4898         /*
4899           Other than ENOENT are fatal
4900         */
4901         if (thd)
4902         {
4903           push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4904                               ER_BINLOG_PURGE_FATAL_ERR,
4905                               "a problem with getting info on being purged %s; "
4906                               "consider examining correspondence "
4907                               "of your binlog index file "
4908                               "to the actual binlog files",
4909                               log_info.log_file_name);
4910         }
4911         else
4912         {
4913           sql_print_information("Failed to delete log file '%s'",
4914                                 log_info.log_file_name);
4915         }
4916         error= LOG_INFO_FATAL;
4917         goto err;
4918       }
4919     }
4920     else
4921     {
4922       if (stat_area.st_mtime < purge_time)
4923         strmake_buf(to_log, log_info.log_file_name);
4924       else
4925         break;
4926     }
4927     if (find_next_log(&log_info, 0))
4928       break;
4929   }
4930 
4931   error= (to_log[0] ? purge_logs(to_log, 1, 0, 1, (ulonglong *) 0) : 0);
4932 
4933 err:
4934   mysql_mutex_unlock(&LOCK_index);
4935   DBUG_RETURN(error);
4936 }
4937 
4938 
4939 bool
4940 MYSQL_BIN_LOG::can_purge_log(const char *log_file_name_arg)
4941 {
4942   xid_count_per_binlog *b;
4943 
4944   if (is_active(log_file_name_arg))
4945     return false;
4946   mysql_mutex_lock(&LOCK_xid_list);
4947   {
4948     I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
4949     while ((b= it++) &&
4950            0 != strncmp(log_file_name_arg+dirname_length(log_file_name_arg),
4951                         b->binlog_name, b->binlog_name_len))
4952       ;
4953   }
4954   mysql_mutex_unlock(&LOCK_xid_list);
4955   if (b)
4956     return false;
4957   return !log_in_use(log_file_name_arg);
4958 }
4959 #endif /* HAVE_REPLICATION */
4960 
4961 
4962 bool
4963 MYSQL_BIN_LOG::is_xidlist_idle()
4964 {
4965   bool res;
4966   mysql_mutex_lock(&LOCK_xid_list);
4967   res= is_xidlist_idle_nolock();
4968   mysql_mutex_unlock(&LOCK_xid_list);
4969   return res;
4970 }
4971 
4972 
4973 bool
4974 MYSQL_BIN_LOG::is_xidlist_idle_nolock()
4975 {
4976   xid_count_per_binlog *b;
4977 
4978   I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
4979   while ((b= it++))
4980   {
4981     if (b->xid_count > 0)
4982       return false;
4983   }
4984   return true;
4985 }
4986 
4987 #ifdef WITH_WSREP
4988 inline bool
4989 is_gtid_cached_internal(IO_CACHE *file)
4990 {
4991   uchar data[EVENT_TYPE_OFFSET+1];
4992   bool result= false;
4993   my_off_t write_pos= my_b_tell(file);
4994   if (reinit_io_cache(file, READ_CACHE, 0, 0, 0))
4995     return false;
4996   /*
4997    In the cache we have gtid event if , below condition is true,
4998   */
4999   my_b_read(file, data, sizeof(data));
5000   uint event_type= (uchar)data[EVENT_TYPE_OFFSET];
5001   if (event_type == GTID_LOG_EVENT)
5002     result= true;
5003   /*
5004     Cleanup , Why because we have not read the full buffer
5005     and this will cause next to next reinit_io_cache(called in write_cache)
5006     to make cache empty.
5007    */
5008   file->read_pos= file->read_end;
5009   if (reinit_io_cache(file, WRITE_CACHE, write_pos, 0, 0))
5010     return false;
5011   return result;
5012 }
5013 #endif
5014 
5015 #ifdef WITH_WSREP
5016 inline bool
5017 MYSQL_BIN_LOG::is_gtid_cached(THD *thd)
5018 {
5019   binlog_cache_mngr *mngr= (binlog_cache_mngr *) thd_get_ha_data(
5020           thd, binlog_hton);
5021   if (!mngr)
5022     return false;
5023   binlog_cache_data *cache_trans= mngr->get_binlog_cache_data(
5024           use_trans_cache(thd, true));
5025   binlog_cache_data *cache_stmt= mngr->get_binlog_cache_data(
5026           use_trans_cache(thd, false));
5027   if (cache_trans && !cache_trans->empty() &&
5028           is_gtid_cached_internal(&cache_trans->cache_log))
5029     return true;
5030   if (cache_stmt && !cache_stmt->empty() &&
5031           is_gtid_cached_internal(&cache_stmt->cache_log))
5032     return true;
5033   return false;
5034 }
5035 #endif
5036 /**
5037   Create a new log file name.
5038 
5039   @param buf		buf of at least FN_REFLEN where new name is stored
5040 
5041   @note
5042     If file name will be longer then FN_REFLEN it will be truncated
5043 */
5044 
5045 void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
5046 {
5047   size_t dir_len = dirname_length(log_file_name);
5048   if (dir_len >= FN_REFLEN)
5049     dir_len=FN_REFLEN-1;
5050   strnmov(buf, log_file_name, dir_len);
5051   strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
5052 }
5053 
5054 
5055 /**
5056   Check if we are writing/reading to the given log file.
5057 */
5058 
5059 bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
5060 {
5061   /**
5062    * there should/must be mysql_mutex_assert_owner(&LOCK_log) here...
5063    * but code violates this! (scary monsters and super creeps!)
5064    *
5065    * example stacktrace:
5066    * #8  MYSQL_BIN_LOG::is_active
5067    * #9  MYSQL_BIN_LOG::can_purge_log
5068    * #10 MYSQL_BIN_LOG::purge_logs
5069    * #11 MYSQL_BIN_LOG::purge_first_log
5070    * #12 next_event
5071    * #13 exec_relay_log_event
5072    *
5073    * I didn't investigate if this is ligit...(i.e if my comment is wrong)
5074    */
5075   return !strcmp(log_file_name, log_file_name_arg);
5076 }
5077 
5078 
5079 /*
5080   Wrappers around new_file_impl to avoid using argument
5081   to control locking. The argument 1) less readable 2) breaks
5082   incapsulation 3) allows external access to the class without
5083   a lock (which is not possible with private new_file_without_locking
5084   method).
5085 
5086   @retval
5087     nonzero - error
5088 */
5089 
5090 int MYSQL_BIN_LOG::new_file()
5091 {
5092   return new_file_impl(1);
5093 }
5094 
5095 /*
5096   @retval
5097     nonzero - error
5098  */
5099 int MYSQL_BIN_LOG::new_file_without_locking()
5100 {
5101   return new_file_impl(0);
5102 }
5103 
5104 
5105 /**
5106   Start writing to a new log file or reopen the old file.
5107 
5108   @param need_lock		Set to 1 if caller has not locked LOCK_log
5109 
5110   @retval
5111     nonzero - error
5112 
5113   @note
5114     The new file name is stored last in the index file
5115 */
5116 
5117 int MYSQL_BIN_LOG::new_file_impl(bool need_lock)
5118 {
5119   int error= 0, close_on_error= FALSE;
5120   char new_name[FN_REFLEN], *new_name_ptr, *old_name, *file_to_open;
5121   uint close_flag;
5122   bool delay_close= false;
5123   File UNINIT_VAR(old_file);
5124   DBUG_ENTER("MYSQL_BIN_LOG::new_file_impl");
5125 
5126   if (need_lock)
5127     mysql_mutex_lock(&LOCK_log);
5128   mysql_mutex_assert_owner(&LOCK_log);
5129 
5130   if (!is_open())
5131   {
5132     DBUG_PRINT("info",("log is closed"));
5133     mysql_mutex_unlock(&LOCK_log);
5134     DBUG_RETURN(error);
5135   }
5136 
5137   mysql_mutex_lock(&LOCK_index);
5138 
5139   /* Reuse old name if not binlog and not update log */
5140   new_name_ptr= name;
5141 
5142   /*
5143     If user hasn't specified an extension, generate a new log name
5144     We have to do this here and not in open as we want to store the
5145     new file name in the current binary log file.
5146   */
5147   if (unlikely((error= generate_new_name(new_name, name, 0))))
5148   {
5149 #ifdef ENABLE_AND_FIX_HANG
5150     close_on_error= TRUE;
5151 #endif
5152     goto end;
5153   }
5154   new_name_ptr=new_name;
5155 
5156   if (log_type == LOG_BIN)
5157   {
5158     {
5159       /*
5160         We log the whole file name for log file as the user may decide
5161         to change base names at some point.
5162       */
5163       Rotate_log_event r(new_name+dirname_length(new_name), 0, LOG_EVENT_OFFSET,
5164                          is_relay_log ? Rotate_log_event::RELAY_LOG : 0);
5165       /*
5166          The current relay-log's closing Rotate event must have checksum
5167          value computed with an algorithm of the last relay-logged FD event.
5168       */
5169       if (is_relay_log)
5170         r.checksum_alg= relay_log_checksum_alg;
5171       DBUG_ASSERT(!is_relay_log || relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
5172       if(DBUG_EVALUATE_IF("fault_injection_new_file_rotate_event", (error=close_on_error=TRUE), FALSE) ||
5173          (error= write_event(&r)))
5174       {
5175         DBUG_EXECUTE_IF("fault_injection_new_file_rotate_event", errno=2;);
5176         close_on_error= TRUE;
5177         my_printf_error(ER_ERROR_ON_WRITE,
5178                         ER_THD_OR_DEFAULT(current_thd, ER_CANT_OPEN_FILE),
5179                         MYF(ME_FATALERROR), name, errno);
5180         goto end;
5181       }
5182       bytes_written += r.data_written;
5183     }
5184   }
5185 
5186   /*
5187     Update needs to be signalled even if there is no rotate event
5188     log rotation should give the waiting thread a signal to
5189     discover EOF and move on to the next log.
5190   */
5191   if (unlikely((error= flush_io_cache(&log_file))))
5192   {
5193     close_on_error= TRUE;
5194     goto end;
5195   }
5196   update_binlog_end_pos();
5197 
5198   old_name=name;
5199   name=0;				// Don't free name
5200   close_flag= LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX;
5201   if (!is_relay_log)
5202   {
5203     /*
5204       We need to keep the old binlog file open (and marked as in-use) until
5205       the new one is fully created and synced to disk and index. Otherwise we
5206       leave a window where if we crash, there is no binlog file marked as
5207       crashed for server restart to detect the need for recovery.
5208     */
5209     old_file= log_file.file;
5210     close_flag|= LOG_CLOSE_DELAYED_CLOSE;
5211     delay_close= true;
5212   }
5213   close(close_flag);
5214   if (log_type == LOG_BIN && checksum_alg_reset != BINLOG_CHECKSUM_ALG_UNDEF)
5215   {
5216     DBUG_ASSERT(!is_relay_log);
5217     DBUG_ASSERT(binlog_checksum_options != checksum_alg_reset);
5218     binlog_checksum_options= checksum_alg_reset;
5219   }
5220   /*
5221      Note that at this point, log_state != LOG_CLOSED
5222      (important for is_open()).
5223   */
5224 
5225   /*
5226      new_file() is only used for rotation (in FLUSH LOGS or because size >
5227      max_binlog_size or max_relay_log_size).
5228      If this is a binary log, the Format_description_log_event at the
5229      beginning of the new file should have created=0 (to distinguish with the
5230      Format_description_log_event written at server startup, which should
5231      trigger temp tables deletion on slaves.
5232   */
5233 
5234   /* reopen index binlog file, BUG#34582 */
5235   file_to_open= index_file_name;
5236   error= open_index_file(index_file_name, 0, FALSE);
5237   if (likely(!error))
5238   {
5239     /* reopen the binary log file. */
5240     file_to_open= new_name_ptr;
5241     error= open(old_name, log_type, new_name_ptr, 0, io_cache_type,
5242                 max_size, 1, FALSE);
5243   }
5244 
5245   /* handle reopening errors */
5246   if (unlikely(error))
5247   {
5248     my_error(ER_CANT_OPEN_FILE, MYF(ME_FATALERROR), file_to_open, error);
5249     close_on_error= TRUE;
5250   }
5251 
5252   my_free(old_name);
5253 
5254 end:
5255 
5256   if (delay_close)
5257   {
5258     clear_inuse_flag_when_closing(old_file);
5259     mysql_file_close(old_file, MYF(MY_WME));
5260   }
5261 
5262   if (unlikely(error && close_on_error)) /* rotate or reopen failed */
5263   {
5264     /*
5265       Close whatever was left opened.
5266 
5267       We are keeping the behavior as it exists today, ie,
5268       we disable logging and move on (see: BUG#51014).
5269 
5270       TODO: as part of WL#1790 consider other approaches:
5271        - kill mysql (safety);
5272        - try multiple locations for opening a log file;
5273        - switch server to protected/readonly mode
5274        - ...
5275     */
5276     close(LOG_CLOSE_INDEX);
5277     sql_print_error(fatal_log_error, new_name_ptr, errno);
5278   }
5279 
5280   mysql_mutex_unlock(&LOCK_index);
5281   if (need_lock)
5282     mysql_mutex_unlock(&LOCK_log);
5283 
5284   DBUG_RETURN(error);
5285 }
5286 
5287 bool MYSQL_BIN_LOG::write_event(Log_event *ev, binlog_cache_data *cache_data,
5288                                 IO_CACHE *file)
5289 {
5290   Log_event_writer writer(file, 0, &crypto);
5291   if (crypto.scheme && file == &log_file)
5292     writer.ctx= alloca(crypto.ctx_size);
5293   if (cache_data)
5294     cache_data->add_status(ev->logged_status());
5295   return writer.write(ev);
5296 }
5297 
5298 bool MYSQL_BIN_LOG::append(Log_event *ev)
5299 {
5300   bool res;
5301   mysql_mutex_lock(&LOCK_log);
5302   res= append_no_lock(ev);
5303   mysql_mutex_unlock(&LOCK_log);
5304   return res;
5305 }
5306 
5307 
5308 bool MYSQL_BIN_LOG::append_no_lock(Log_event* ev)
5309 {
5310   bool error = 0;
5311   DBUG_ENTER("MYSQL_BIN_LOG::append");
5312 
5313   mysql_mutex_assert_owner(&LOCK_log);
5314   DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
5315 
5316   if (write_event(ev))
5317   {
5318     error=1;
5319     goto err;
5320   }
5321   bytes_written+= ev->data_written;
5322   DBUG_PRINT("info",("max_size: %lu",max_size));
5323   if (flush_and_sync(0))
5324     goto err;
5325   if (my_b_append_tell(&log_file) > max_size)
5326     error= new_file_without_locking();
5327 err:
5328   update_binlog_end_pos();
5329   DBUG_RETURN(error);
5330 }
5331 
5332 bool MYSQL_BIN_LOG::write_event_buffer(uchar* buf, uint len)
5333 {
5334   bool error= 1;
5335   uchar *ebuf= 0;
5336   DBUG_ENTER("MYSQL_BIN_LOG::write_event_buffer");
5337 
5338   DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
5339 
5340   mysql_mutex_assert_owner(&LOCK_log);
5341 
5342   if (crypto.scheme != 0)
5343   {
5344     DBUG_ASSERT(crypto.scheme == 1);
5345 
5346     uint elen;
5347     uchar iv[BINLOG_IV_LENGTH];
5348 
5349     ebuf= (uchar*)my_safe_alloca(len);
5350     if (!ebuf)
5351       goto err;
5352 
5353     crypto.set_iv(iv, (uint32)my_b_append_tell(&log_file));
5354 
5355     /*
5356       we want to encrypt everything, excluding the event length:
5357       massage the data before the encryption
5358     */
5359     memcpy(buf + EVENT_LEN_OFFSET, buf, 4);
5360 
5361     if (encryption_crypt(buf + 4, len - 4,
5362                          ebuf + 4, &elen,
5363                          crypto.key, crypto.key_length, iv, sizeof(iv),
5364                          ENCRYPTION_FLAG_ENCRYPT | ENCRYPTION_FLAG_NOPAD,
5365                          ENCRYPTION_KEY_SYSTEM_DATA, crypto.key_version))
5366       goto err;
5367 
5368     DBUG_ASSERT(elen == len - 4);
5369 
5370     /* massage the data after the encryption */
5371     memcpy(ebuf, ebuf + EVENT_LEN_OFFSET, 4);
5372     int4store(ebuf + EVENT_LEN_OFFSET, len);
5373 
5374     buf= ebuf;
5375   }
5376   if (my_b_append(&log_file, buf, len))
5377     goto err;
5378   bytes_written+= len;
5379 
5380   error= 0;
5381   DBUG_PRINT("info",("max_size: %lu",max_size));
5382   if (flush_and_sync(0))
5383     goto err;
5384   if (my_b_append_tell(&log_file) > max_size)
5385     error= new_file_without_locking();
5386 err:
5387   my_safe_afree(ebuf, len);
5388   if (likely(!error))
5389     update_binlog_end_pos();
5390   DBUG_RETURN(error);
5391 }
5392 
5393 bool MYSQL_BIN_LOG::flush_and_sync(bool *synced)
5394 {
5395   int err=0, fd=log_file.file;
5396   if (synced)
5397     *synced= 0;
5398   mysql_mutex_assert_owner(&LOCK_log);
5399   if (flush_io_cache(&log_file))
5400     return 1;
5401   uint sync_period= get_sync_period();
5402   if (sync_period && ++sync_counter >= sync_period)
5403   {
5404     sync_counter= 0;
5405     err= mysql_file_sync(fd, MYF(MY_WME|MY_SYNC_FILESIZE));
5406     if (synced)
5407       *synced= 1;
5408 #ifndef DBUG_OFF
5409     if (opt_binlog_dbug_fsync_sleep > 0)
5410       my_sleep(opt_binlog_dbug_fsync_sleep);
5411 #endif
5412   }
5413   return err;
5414 }
5415 
5416 void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param)
5417 {
5418   DBUG_ASSERT(!thd->binlog_evt_union.do_union);
5419   thd->binlog_evt_union.do_union= TRUE;
5420   thd->binlog_evt_union.unioned_events= FALSE;
5421   thd->binlog_evt_union.unioned_events_trans= FALSE;
5422   thd->binlog_evt_union.first_query_id= query_id_param;
5423 }
5424 
5425 void MYSQL_BIN_LOG::stop_union_events(THD *thd)
5426 {
5427   DBUG_ASSERT(thd->binlog_evt_union.do_union);
5428   thd->binlog_evt_union.do_union= FALSE;
5429 }
5430 
5431 bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
5432 {
5433   return (thd->binlog_evt_union.do_union &&
5434           query_id_param >= thd->binlog_evt_union.first_query_id);
5435 }
5436 
5437 /**
5438   This function checks if a transactional table was updated by the
5439   current transaction.
5440 
5441   @param thd The client thread that executed the current statement.
5442   @return
5443     @c true if a transactional table was updated, @c false otherwise.
5444 */
5445 bool
5446 trans_has_updated_trans_table(const THD* thd)
5447 {
5448   binlog_cache_mngr *const cache_mngr=
5449     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5450 
5451   return (cache_mngr ? !cache_mngr->trx_cache.empty() : 0);
5452 }
5453 
5454 /**
5455   This function checks if a transactional table was updated by the
5456   current statement.
5457 
5458   @param thd The client thread that executed the current statement.
5459   @return
5460     @c true if a transactional table was updated, @c false otherwise.
5461 */
5462 bool
5463 stmt_has_updated_trans_table(const THD *thd)
5464 {
5465   Ha_trx_info *ha_info;
5466 
5467   for (ha_info= thd->transaction.stmt.ha_list; ha_info;
5468        ha_info= ha_info->next())
5469   {
5470     if (ha_info->is_trx_read_write() && ha_info->ht() != binlog_hton)
5471       return (TRUE);
5472   }
5473   return (FALSE);
5474 }
5475 
5476 /**
5477   This function checks if either a trx-cache or a non-trx-cache should
5478   be used. If @c bin_log_direct_non_trans_update is active or the format
5479   is either MIXED or ROW, the cache to be used depends on the flag @c
5480   is_transactional.
5481 
5482   On the other hand, if binlog_format is STMT or direct option is
5483   OFF, the trx-cache should be used if and only if the statement is
5484   transactional or the trx-cache is not empty. Otherwise, the
5485   non-trx-cache should be used.
5486 
5487   @param thd              The client thread.
5488   @param is_transactional The changes are related to a trx-table.
5489   @return
5490     @c true if a trx-cache should be used, @c false otherwise.
5491 */
5492 bool use_trans_cache(const THD* thd, bool is_transactional)
5493 {
5494   if (is_transactional)
5495     return 1;
5496   binlog_cache_mngr *const cache_mngr=
5497     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5498 
5499   return ((thd->is_current_stmt_binlog_format_row() ||
5500            thd->variables.binlog_direct_non_trans_update) ? 0 :
5501           !cache_mngr->trx_cache.empty());
5502 }
5503 
5504 /**
5505   This function checks if a transaction, either a multi-statement
5506   or a single statement transaction is about to commit or not.
5507 
5508   @param thd The client thread that executed the current statement.
5509   @param all Committing a transaction (i.e. TRUE) or a statement
5510              (i.e. FALSE).
5511   @return
5512     @c true if committing a transaction, otherwise @c false.
5513 */
5514 bool ending_trans(THD* thd, const bool all)
5515 {
5516   return (all || ending_single_stmt_trans(thd, all));
5517 }
5518 
5519 /**
5520   This function checks if a single statement transaction is about
5521   to commit or not.
5522 
5523   @param thd The client thread that executed the current statement.
5524   @param all Committing a transaction (i.e. TRUE) or a statement
5525              (i.e. FALSE).
5526   @return
5527     @c true if committing a single statement transaction, otherwise
5528     @c false.
5529 */
5530 bool ending_single_stmt_trans(THD* thd, const bool all)
5531 {
5532   return (!all && !thd->in_multi_stmt_transaction_mode());
5533 }
5534 
5535 /**
5536   This function checks if a non-transactional table was updated by
5537   the current transaction.
5538 
5539   @param thd The client thread that executed the current statement.
5540   @return
5541     @c true if a non-transactional table was updated, @c false
5542     otherwise.
5543 */
5544 bool trans_has_updated_non_trans_table(const THD* thd)
5545 {
5546   return (thd->transaction.all.modified_non_trans_table ||
5547           thd->transaction.stmt.modified_non_trans_table);
5548 }
5549 
5550 /**
5551   This function checks if a non-transactional table was updated by the
5552   current statement.
5553 
5554   @param thd The client thread that executed the current statement.
5555   @return
5556     @c true if a non-transactional table was updated, @c false otherwise.
5557 */
5558 bool stmt_has_updated_non_trans_table(const THD* thd)
5559 {
5560   return (thd->transaction.stmt.modified_non_trans_table);
5561 }
5562 
5563 /*
5564   These functions are placed in this file since they need access to
5565   binlog_hton, which has internal linkage.
5566 */
5567 
5568 binlog_cache_mngr *THD::binlog_setup_trx_data()
5569 {
5570   DBUG_ENTER("THD::binlog_setup_trx_data");
5571   binlog_cache_mngr *cache_mngr=
5572     (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5573 
5574   if (cache_mngr)
5575     DBUG_RETURN(cache_mngr);                             // Already set up
5576 
5577   cache_mngr= (binlog_cache_mngr*) my_malloc(sizeof(binlog_cache_mngr), MYF(MY_ZEROFILL));
5578   if (!cache_mngr ||
5579       open_cached_file(&cache_mngr->stmt_cache.cache_log, mysql_tmpdir,
5580                        LOG_PREFIX, (size_t)binlog_stmt_cache_size, MYF(MY_WME)) ||
5581       open_cached_file(&cache_mngr->trx_cache.cache_log, mysql_tmpdir,
5582                        LOG_PREFIX, (size_t)binlog_cache_size, MYF(MY_WME)))
5583   {
5584     my_free(cache_mngr);
5585     DBUG_RETURN(0);                      // Didn't manage to set it up
5586   }
5587   thd_set_ha_data(this, binlog_hton, cache_mngr);
5588 
5589   cache_mngr= new (cache_mngr)
5590               binlog_cache_mngr(max_binlog_stmt_cache_size,
5591                                 max_binlog_cache_size,
5592                                 &binlog_stmt_cache_use,
5593                                 &binlog_stmt_cache_disk_use,
5594                                 &binlog_cache_use,
5595                                 &binlog_cache_disk_use);
5596   DBUG_RETURN(cache_mngr);
5597 }
5598 
5599 /*
5600   Function to start a statement and optionally a transaction for the
5601   binary log.
5602 
5603   SYNOPSIS
5604     binlog_start_trans_and_stmt()
5605 
5606   DESCRIPTION
5607 
5608     This function does three things:
5609     - Start a transaction if not in autocommit mode or if a BEGIN
5610       statement has been seen.
5611 
5612     - Start a statement transaction to allow us to truncate the cache.
5613 
5614     - Save the current binlog position so that we can roll back the
5615       statement by truncating the cache.
5616 
5617       We only update the saved position if the old one was undefined,
5618       the reason is that there are some cases (e.g., for CREATE-SELECT)
5619       where the position is saved twice (e.g., both in
5620       select_create::prepare() and THD::binlog_write_table_map()) , but
5621       we should use the first. This means that calls to this function
5622       can be used to start the statement before the first table map
5623       event, to include some extra events.
5624  */
5625 
5626 void
5627 THD::binlog_start_trans_and_stmt()
5628 {
5629   binlog_cache_mngr *cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5630   DBUG_ENTER("binlog_start_trans_and_stmt");
5631   DBUG_PRINT("enter", ("cache_mngr: %p  cache_mngr->trx_cache.get_prev_position(): %lu",
5632                        cache_mngr,
5633                        (cache_mngr ? (ulong) cache_mngr->trx_cache.get_prev_position() :
5634                         (ulong) 0)));
5635 
5636   if (cache_mngr == NULL ||
5637       cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF)
5638   {
5639     this->binlog_set_stmt_begin();
5640     bool mstmt_mode= in_multi_stmt_transaction_mode();
5641 #ifdef WITH_WSREP
5642       /* Write Gtid
5643          Get domain id only when gtid mode is set
5644          If this event is replicate through a master then ,
5645          we will forward the same gtid another nodes
5646          We have to do this only one time in mysql transaction.
5647          Since this function is called multiple times , We will check for
5648          ha_info->is_started()
5649        */
5650     Ha_trx_info *ha_info;
5651     ha_info= this->ha_data[binlog_hton->slot].ha_info + (mstmt_mode ? 1 : 0);
5652 
5653     if (!ha_info->is_started() && wsrep_gtid_mode
5654             && this->variables.gtid_seq_no)
5655     {
5656       binlog_cache_mngr *const cache_mngr=
5657         (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5658       binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(1);
5659       IO_CACHE *file= &cache_data->cache_log;
5660       Log_event_writer writer(file, cache_data);
5661         Gtid_log_event gtid_event(this, this->variables.gtid_seq_no,
5662                             this->variables.gtid_domain_id,
5663                             true, LOG_EVENT_SUPPRESS_USE_F,
5664                             true, 0);
5665       gtid_event.server_id= this->variables.server_id;
5666       writer.write(&gtid_event);
5667     }
5668 #endif
5669     if (mstmt_mode)
5670       trans_register_ha(this, TRUE, binlog_hton);
5671     trans_register_ha(this, FALSE, binlog_hton);
5672     /*
5673       Mark statement transaction as read/write. We never start
5674       a binary log transaction and keep it read-only,
5675       therefore it's best to mark the transaction read/write just
5676       at the same time we start it.
5677       Not necessary to mark the normal transaction read/write
5678       since the statement-level flag will be propagated automatically
5679       inside ha_commit_trans.
5680     */
5681     ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
5682   }
5683   DBUG_VOID_RETURN;
5684 }
5685 
5686 void THD::binlog_set_stmt_begin() {
5687   binlog_cache_mngr *cache_mngr=
5688     (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5689 
5690   /*
5691     The call to binlog_trans_log_savepos() might create the cache_mngr
5692     structure, if it didn't exist before, so we save the position
5693     into an auto variable and then write it into the transaction
5694     data for the binary log (i.e., cache_mngr).
5695   */
5696   my_off_t pos= 0;
5697   binlog_trans_log_savepos(this, &pos);
5698   cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5699   cache_mngr->trx_cache.set_prev_position(pos);
5700 }
5701 
5702 static int
5703 binlog_start_consistent_snapshot(handlerton *hton, THD *thd)
5704 {
5705   int err= 0;
5706   DBUG_ENTER("binlog_start_consistent_snapshot");
5707 
5708   binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
5709 
5710   /* Server layer calls us with LOCK_commit_ordered locked, so this is safe. */
5711   mysql_mutex_assert_owner(&LOCK_commit_ordered);
5712   strmake_buf(cache_mngr->last_commit_pos_file, mysql_bin_log.last_commit_pos_file);
5713   cache_mngr->last_commit_pos_offset= mysql_bin_log.last_commit_pos_offset;
5714 
5715   trans_register_ha(thd, TRUE, hton);
5716 
5717   DBUG_RETURN(err);
5718 }
5719 
5720 /**
5721   This function writes a table map to the binary log.
5722   Note that in order to keep the signature uniform with related methods,
5723   we use a redundant parameter to indicate whether a transactional table
5724   was changed or not.
5725 
5726   If with_annotate != NULL and
5727   *with_annotate = TRUE write also Annotate_rows before the table map.
5728 
5729   @param table             a pointer to the table.
5730   @param is_transactional  @c true indicates a transactional table,
5731                            otherwise @c false a non-transactional.
5732   @return
5733     nonzero if an error pops up when writing the table map event.
5734 */
5735 int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
5736                                 my_bool *with_annotate)
5737 {
5738   int error;
5739   DBUG_ENTER("THD::binlog_write_table_map");
5740   DBUG_PRINT("enter", ("table: %p  (%s: #%lu)",
5741                        table, table->s->table_name.str,
5742                        table->s->table_map_id));
5743 
5744   /* Ensure that all events in a GTID group are in the same cache */
5745   if (variables.option_bits & OPTION_GTID_BEGIN)
5746     is_transactional= 1;
5747 
5748   /* Pre-conditions */
5749   DBUG_ASSERT(is_current_stmt_binlog_format_row());
5750   DBUG_ASSERT(WSREP_EMULATE_BINLOG(this) || mysql_bin_log.is_open());
5751   DBUG_ASSERT(table->s->table_map_id != ULONG_MAX);
5752 
5753   Table_map_log_event
5754     the_event(this, table, table->s->table_map_id, is_transactional);
5755 
5756   if (binlog_table_maps == 0)
5757     binlog_start_trans_and_stmt();
5758 
5759   binlog_cache_mngr *const cache_mngr=
5760     (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5761   binlog_cache_data *cache_data= (cache_mngr->
5762                                   get_binlog_cache_data(is_transactional));
5763   IO_CACHE *file= &cache_data->cache_log;
5764   Log_event_writer writer(file, cache_data);
5765 
5766   if (with_annotate && *with_annotate)
5767   {
5768     Annotate_rows_log_event anno(table->in_use, is_transactional, false);
5769     /* Annotate event should be written not more than once */
5770     *with_annotate= 0;
5771     if (unlikely((error= writer.write(&anno))))
5772     {
5773       if (my_errno == EFBIG)
5774         cache_data->set_incident();
5775       DBUG_RETURN(error);
5776     }
5777   }
5778   if (unlikely((error= writer.write(&the_event))))
5779     DBUG_RETURN(error);
5780 
5781   binlog_table_maps++;
5782   DBUG_RETURN(0);
5783 }
5784 
5785 /**
5786   This function retrieves a pending row event from a cache which is
5787   specified through the parameter @c is_transactional. Respectively, when it
5788   is @c true, the pending event is returned from the transactional cache.
5789   Otherwise from the non-transactional cache.
5790 
5791   @param is_transactional  @c true indicates a transactional cache,
5792                            otherwise @c false a non-transactional.
5793   @return
5794     The row event if any.
5795 */
5796 Rows_log_event*
5797 THD::binlog_get_pending_rows_event(bool is_transactional) const
5798 {
5799   Rows_log_event* rows= NULL;
5800   binlog_cache_mngr *const cache_mngr=
5801     (binlog_cache_mngr*) thd_get_ha_data(this, binlog_hton);
5802 
5803   /*
5804     This is less than ideal, but here's the story: If there is no cache_mngr,
5805     prepare_pending_rows_event() has never been called (since the cache_mngr
5806     is set up there). In that case, we just return NULL.
5807    */
5808   if (cache_mngr)
5809   {
5810     binlog_cache_data *cache_data=
5811       cache_mngr->get_binlog_cache_data(use_trans_cache(this, is_transactional));
5812 
5813     rows= cache_data->pending();
5814   }
5815   return (rows);
5816 }
5817 
5818 /**
5819   This function stores a pending row event into a cache which is specified
5820   through the parameter @c is_transactional. Respectively, when it is @c
5821   true, the pending event is stored into the transactional cache. Otherwise
5822   into the non-transactional cache.
5823 
5824   @param evt               a pointer to the row event.
5825   @param is_transactional  @c true indicates a transactional cache,
5826                            otherwise @c false a non-transactional.
5827 */
5828 void
5829 THD::binlog_set_pending_rows_event(Rows_log_event* ev, bool is_transactional)
5830 {
5831   binlog_cache_mngr *const cache_mngr= binlog_setup_trx_data();
5832 
5833   DBUG_ASSERT(cache_mngr);
5834 
5835   binlog_cache_data *cache_data=
5836     cache_mngr->get_binlog_cache_data(use_trans_cache(this, is_transactional));
5837 
5838   cache_data->set_pending(ev);
5839 }
5840 
5841 
5842 /**
5843   This function removes the pending rows event, discarding any outstanding
5844   rows. If there is no pending rows event available, this is effectively a
5845   no-op.
5846 
5847   @param thd               a pointer to the user thread.
5848   @param is_transactional  @c true indicates a transactional cache,
5849                            otherwise @c false a non-transactional.
5850 */
5851 int
5852 MYSQL_BIN_LOG::remove_pending_rows_event(THD *thd, bool is_transactional)
5853 {
5854   DBUG_ENTER("MYSQL_BIN_LOG::remove_pending_rows_event");
5855 
5856   binlog_cache_mngr *const cache_mngr=
5857     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5858 
5859   DBUG_ASSERT(cache_mngr);
5860 
5861   binlog_cache_data *cache_data=
5862     cache_mngr->get_binlog_cache_data(use_trans_cache(thd, is_transactional));
5863 
5864   if (Rows_log_event* pending= cache_data->pending())
5865   {
5866     delete pending;
5867     cache_data->set_pending(NULL);
5868   }
5869 
5870   DBUG_RETURN(0);
5871 }
5872 
5873 /*
5874   Moves the last bunch of rows from the pending Rows event to a cache (either
5875   transactional cache if is_transaction is @c true, or the non-transactional
5876   cache otherwise. Sets a new pending event.
5877 
5878   @param thd               a pointer to the user thread.
5879   @param evt               a pointer to the row event.
5880   @param is_transactional  @c true indicates a transactional cache,
5881                            otherwise @c false a non-transactional.
5882 */
5883 int
5884 MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
5885                                                 Rows_log_event* event,
5886                                                 bool is_transactional)
5887 {
5888   DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)");
5889   DBUG_ASSERT(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open());
5890   DBUG_PRINT("enter", ("event: %p", event));
5891 
5892   binlog_cache_mngr *const cache_mngr=
5893     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
5894 
5895   DBUG_ASSERT(cache_mngr);
5896 
5897   binlog_cache_data *cache_data=
5898     cache_mngr->get_binlog_cache_data(use_trans_cache(thd, is_transactional));
5899 
5900   DBUG_PRINT("info", ("cache_mngr->pending(): %p", cache_data->pending()));
5901 
5902   if (Rows_log_event* pending= cache_data->pending())
5903   {
5904     Log_event_writer writer(&cache_data->cache_log, cache_data);
5905 
5906     /*
5907       Write pending event to the cache.
5908     */
5909     DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
5910                     {DBUG_SET("+d,simulate_file_write_error");});
5911     if (writer.write(pending))
5912     {
5913       set_write_error(thd, is_transactional);
5914       if (check_write_error(thd) && cache_data &&
5915           stmt_has_updated_non_trans_table(thd))
5916         cache_data->set_incident();
5917       delete pending;
5918       cache_data->set_pending(NULL);
5919       DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
5920                       {DBUG_SET("-d,simulate_file_write_error");});
5921       DBUG_RETURN(1);
5922     }
5923 
5924     delete pending;
5925   }
5926 
5927   thd->binlog_set_pending_rows_event(event, is_transactional);
5928 
5929   DBUG_RETURN(0);
5930 }
5931 
5932 
5933 /* Generate a new global transaction ID, and write it to the binlog */
5934 
5935 bool
5936 MYSQL_BIN_LOG::write_gtid_event(THD *thd, bool standalone,
5937                                 bool is_transactional, uint64 commit_id)
5938 {
5939   rpl_gtid gtid;
5940   uint32 domain_id;
5941   uint32 local_server_id;
5942   uint64 seq_no;
5943   int err;
5944   DBUG_ENTER("write_gtid_event");
5945   DBUG_PRINT("enter", ("standalone: %d", standalone));
5946 
5947 #ifdef WITH_WSREP
5948   if (WSREP(thd) && thd->wsrep_trx_meta.gtid.seqno != -1 && wsrep_gtid_mode && !thd->variables.gtid_seq_no)
5949   {
5950     domain_id= wsrep_gtid_domain_id;
5951   } else {
5952 #endif /* WITH_WSREP */
5953   domain_id= thd->variables.gtid_domain_id;
5954 #ifdef WITH_WSREP
5955   }
5956 #endif /* WITH_WSREP */
5957   local_server_id= thd->variables.server_id;
5958   seq_no= thd->variables.gtid_seq_no;
5959 
5960   DBUG_ASSERT(local_server_id != 0);
5961 
5962   if (thd->variables.option_bits & OPTION_GTID_BEGIN)
5963   {
5964     DBUG_PRINT("error", ("OPTION_GTID_BEGIN is set. "
5965                          "Master and slave will have different GTID values"));
5966     /* Reset the flag, as we will write out a GTID anyway */
5967     thd->variables.option_bits&= ~OPTION_GTID_BEGIN;
5968   }
5969 
5970   /*
5971     Reset the session variable gtid_seq_no, to reduce the risk of accidentally
5972     producing a duplicate GTID.
5973   */
5974   thd->variables.gtid_seq_no= 0;
5975   if (seq_no != 0)
5976   {
5977     /* Use the specified sequence number. */
5978     gtid.domain_id= domain_id;
5979     gtid.server_id= local_server_id;
5980     gtid.seq_no= seq_no;
5981     err= rpl_global_gtid_binlog_state.update(&gtid, opt_gtid_strict_mode);
5982     if (err && thd->get_stmt_da()->sql_errno()==ER_GTID_STRICT_OUT_OF_ORDER)
5983       errno= ER_GTID_STRICT_OUT_OF_ORDER;
5984   }
5985   else
5986   {
5987     /* Allocate the next sequence number for the GTID. */
5988     err= rpl_global_gtid_binlog_state.update_with_next_gtid(domain_id,
5989                                                             local_server_id, &gtid);
5990     seq_no= gtid.seq_no;
5991   }
5992   if (err)
5993     DBUG_RETURN(true);
5994 
5995   thd->set_last_commit_gtid(gtid);
5996 
5997   Gtid_log_event gtid_event(thd, seq_no, domain_id, standalone,
5998                             LOG_EVENT_SUPPRESS_USE_F, is_transactional,
5999                             commit_id);
6000 
6001   /* Write the event to the binary log. */
6002   DBUG_ASSERT(this == &mysql_bin_log);
6003 
6004 #ifdef WITH_WSREP
6005   if (wsrep_gtid_mode && is_gtid_cached(thd))
6006     DBUG_RETURN(false);
6007 #endif
6008 
6009   if (write_event(&gtid_event))
6010     DBUG_RETURN(true);
6011   status_var_add(thd->status_var.binlog_bytes_written, gtid_event.data_written);
6012 
6013   DBUG_RETURN(false);
6014 }
6015 
6016 
6017 int
6018 MYSQL_BIN_LOG::write_state_to_file()
6019 {
6020   File file_no;
6021   IO_CACHE cache;
6022   char buf[FN_REFLEN];
6023   int err;
6024   bool opened= false;
6025   bool log_inited= false;
6026 
6027   fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
6028             MY_UNPACK_FILENAME);
6029   if ((file_no= mysql_file_open(key_file_binlog_state, buf,
6030                                 O_RDWR|O_CREAT|O_TRUNC|O_BINARY,
6031                                 MYF(MY_WME))) < 0)
6032   {
6033     err= 1;
6034     goto err;
6035   }
6036   opened= true;
6037   if ((err= init_io_cache(&cache, file_no, IO_SIZE, WRITE_CACHE, 0, 0,
6038                            MYF(MY_WME|MY_WAIT_IF_FULL))))
6039     goto err;
6040   log_inited= true;
6041   if ((err= rpl_global_gtid_binlog_state.write_to_iocache(&cache)))
6042     goto err;
6043   log_inited= false;
6044   if ((err= end_io_cache(&cache)))
6045     goto err;
6046   if ((err= mysql_file_sync(file_no, MYF(MY_WME|MY_SYNC_FILESIZE))))
6047     goto err;
6048   goto end;
6049 
6050 err:
6051   sql_print_error("Error writing binlog state to file '%s'.\n", buf);
6052   if (log_inited)
6053     end_io_cache(&cache);
6054 end:
6055   if (opened)
6056     mysql_file_close(file_no, MYF(0));
6057 
6058   return err;
6059 }
6060 
6061 
6062 /*
6063   Initialize the binlog state from the master-bin.state file, at server startup.
6064 
6065   Returns:
6066     0 for success.
6067     2 for when .state file did not exist.
6068     1 for other error.
6069 */
6070 int
6071 MYSQL_BIN_LOG::read_state_from_file()
6072 {
6073   File file_no;
6074   IO_CACHE cache;
6075   char buf[FN_REFLEN];
6076   int err;
6077   bool opened= false;
6078   bool log_inited= false;
6079 
6080   fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
6081             MY_UNPACK_FILENAME);
6082   if ((file_no= mysql_file_open(key_file_binlog_state, buf,
6083                                 O_RDONLY|O_BINARY, MYF(0))) < 0)
6084   {
6085     if (my_errno != ENOENT)
6086     {
6087       err= 1;
6088       goto err;
6089     }
6090     else
6091     {
6092       /*
6093         If the state file does not exist, this is the first server startup
6094         with GTID enabled. So initialize to empty state.
6095       */
6096       rpl_global_gtid_binlog_state.reset();
6097       err= 2;
6098       goto end;
6099     }
6100   }
6101   opened= true;
6102   if ((err= init_io_cache(&cache, file_no, IO_SIZE, READ_CACHE, 0, 0,
6103                           MYF(MY_WME|MY_WAIT_IF_FULL))))
6104     goto err;
6105   log_inited= true;
6106   if ((err= rpl_global_gtid_binlog_state.read_from_iocache(&cache)))
6107     goto err;
6108   goto end;
6109 
6110 err:
6111   sql_print_error("Error reading binlog GTID state from file '%s'.\n", buf);
6112 end:
6113   if (log_inited)
6114     end_io_cache(&cache);
6115   if (opened)
6116     mysql_file_close(file_no, MYF(0));
6117 
6118   return err;
6119 }
6120 
6121 
6122 int
6123 MYSQL_BIN_LOG::get_most_recent_gtid_list(rpl_gtid **list, uint32 *size)
6124 {
6125   return rpl_global_gtid_binlog_state.get_most_recent_gtid_list(list, size);
6126 }
6127 
6128 
6129 bool
6130 MYSQL_BIN_LOG::append_state_pos(String *str)
6131 {
6132   return rpl_global_gtid_binlog_state.append_pos(str);
6133 }
6134 
6135 
6136 bool
6137 MYSQL_BIN_LOG::append_state(String *str)
6138 {
6139   return rpl_global_gtid_binlog_state.append_state(str);
6140 }
6141 
6142 
6143 bool
6144 MYSQL_BIN_LOG::is_empty_state()
6145 {
6146   return (rpl_global_gtid_binlog_state.count() == 0);
6147 }
6148 
6149 
6150 bool
6151 MYSQL_BIN_LOG::find_in_binlog_state(uint32 domain_id, uint32 server_id_arg,
6152                                     rpl_gtid *out_gtid)
6153 {
6154   rpl_gtid *gtid;
6155   if ((gtid= rpl_global_gtid_binlog_state.find(domain_id, server_id_arg)))
6156     *out_gtid= *gtid;
6157   return gtid != NULL;
6158 }
6159 
6160 
6161 bool
6162 MYSQL_BIN_LOG::lookup_domain_in_binlog_state(uint32 domain_id,
6163                                              rpl_gtid *out_gtid)
6164 {
6165   rpl_gtid *found_gtid;
6166 
6167   if ((found_gtid= rpl_global_gtid_binlog_state.find_most_recent(domain_id)))
6168   {
6169     *out_gtid= *found_gtid;
6170     return true;
6171   }
6172 
6173   return false;
6174 }
6175 
6176 
6177 int
6178 MYSQL_BIN_LOG::bump_seq_no_counter_if_needed(uint32 domain_id, uint64 seq_no)
6179 {
6180   return rpl_global_gtid_binlog_state.bump_seq_no_if_needed(domain_id, seq_no);
6181 }
6182 
6183 
6184 bool
6185 MYSQL_BIN_LOG::check_strict_gtid_sequence(uint32 domain_id,
6186                                           uint32 server_id_arg,
6187                                           uint64 seq_no)
6188 {
6189   return rpl_global_gtid_binlog_state.check_strict_sequence(domain_id,
6190                                                             server_id_arg,
6191                                                             seq_no);
6192 }
6193 
6194 
6195 /**
6196   Write an event to the binary log. If with_annotate != NULL and
6197   *with_annotate = TRUE write also Annotate_rows before the event
6198   (this should happen only if the event is a Table_map).
6199 */
6200 
6201 bool MYSQL_BIN_LOG::write(Log_event *event_info, my_bool *with_annotate)
6202 {
6203   THD *thd= event_info->thd;
6204   bool error= 1;
6205   binlog_cache_data *cache_data= 0;
6206   bool is_trans_cache= FALSE;
6207   bool using_trans= event_info->use_trans_cache();
6208   bool direct= event_info->use_direct_logging();
6209   ulong UNINIT_VAR(prev_binlog_id);
6210   DBUG_ENTER("MYSQL_BIN_LOG::write(Log_event *)");
6211 
6212   /*
6213     When binary logging is not enabled (--log-bin=0), wsrep-patch partially
6214     enables it without opening the binlog file (MYSQL_BIN_LOG::open().
6215     So, avoid writing to binlog file.
6216   */
6217   if (direct &&
6218       (wsrep_emulate_bin_log ||
6219        (WSREP(thd) && !(thd->variables.option_bits & OPTION_BIN_LOG))))
6220     DBUG_RETURN(0);
6221 
6222   if (thd->variables.option_bits & OPTION_GTID_BEGIN)
6223   {
6224     DBUG_PRINT("info", ("OPTION_GTID_BEGIN was set"));
6225     /* Wait for commit from binary log before we commit */
6226     direct= 0;
6227     using_trans= 1;
6228   }
6229 
6230   if (thd->binlog_evt_union.do_union)
6231   {
6232     /*
6233       In Stored function; Remember that function call caused an update.
6234       We will log the function call to the binary log on function exit
6235     */
6236     thd->binlog_evt_union.unioned_events= TRUE;
6237     thd->binlog_evt_union.unioned_events_trans |= using_trans;
6238     DBUG_RETURN(0);
6239   }
6240 
6241   /*
6242     We only end the statement if we are in a top-level statement.  If
6243     we are inside a stored function, we do not end the statement since
6244     this will close all tables on the slave. But there can be a special case
6245     where we are inside a stored function/trigger and a SAVEPOINT is being
6246     set in side the stored function/trigger. This SAVEPOINT execution will
6247     force the pending event to be flushed without an STMT_END_F flag. This
6248     will result in a case where following DMLs will be considered as part of
6249     same statement and result in data loss on slave. Hence in this case we
6250     force the end_stmt to be true.
6251   */
6252   bool const end_stmt= (thd->in_sub_stmt && thd->lex->sql_command ==
6253                         SQLCOM_SAVEPOINT) ? true :
6254     (thd->locked_tables_mode && thd->lex->requires_prelocking());
6255   if (thd->binlog_flush_pending_rows_event(end_stmt, using_trans))
6256     DBUG_RETURN(error);
6257 
6258   /*
6259      In most cases this is only called if 'is_open()' is true; in fact this is
6260      mostly called if is_open() *was* true a few instructions before, but it
6261      could have changed since.
6262   */
6263   /* applier and replayer can skip writing binlog events */
6264   if ((WSREP_EMULATE_BINLOG(thd) &&
6265        IF_WSREP(thd->wsrep_exec_mode != REPL_RECV, 0)) || is_open())
6266   {
6267     my_off_t UNINIT_VAR(my_org_b_tell);
6268 #ifdef HAVE_REPLICATION
6269     /*
6270       In the future we need to add to the following if tests like
6271       "do the involved tables match (to be implemented)
6272       binlog_[wild_]{do|ignore}_table?" (WL#1049)"
6273     */
6274     const char *local_db= event_info->get_db();
6275 
6276     bool option_bin_log_flag= (thd->variables.option_bits & OPTION_BIN_LOG);
6277 
6278     /*
6279       Log all updates to binlog cache so that they can get replicated to other
6280       nodes. A check has been added to stop them from getting logged into
6281       binary log files.
6282     */
6283     if (WSREP(thd)) option_bin_log_flag= true;
6284 
6285     if ((!(option_bin_log_flag)) ||
6286 	(thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT &&
6287          thd->lex->sql_command != SQLCOM_SAVEPOINT &&
6288          !binlog_filter->db_ok(local_db)))
6289       DBUG_RETURN(0);
6290 #endif /* HAVE_REPLICATION */
6291 
6292     IO_CACHE *file= NULL;
6293 
6294     if (direct)
6295     {
6296       int res;
6297       uint64 commit_id= 0;
6298       DBUG_PRINT("info", ("direct is set"));
6299       if ((res= thd->wait_for_prior_commit()))
6300         DBUG_RETURN(res);
6301       file= &log_file;
6302       my_org_b_tell= my_b_tell(file);
6303       mysql_mutex_lock(&LOCK_log);
6304       prev_binlog_id= current_binlog_id;
6305       DBUG_EXECUTE_IF("binlog_force_commit_id",
6306         {
6307           const LEX_CSTRING commit_name= { STRING_WITH_LEN("commit_id") };
6308           bool null_value;
6309           user_var_entry *entry=
6310             (user_var_entry*) my_hash_search(&thd->user_vars,
6311                                              (uchar*) commit_name.str,
6312                                              commit_name.length);
6313           commit_id= entry->val_int(&null_value);
6314         });
6315       if (write_gtid_event(thd, true, using_trans, commit_id))
6316         goto err;
6317     }
6318     else
6319     {
6320       binlog_cache_mngr *const cache_mngr= thd->binlog_setup_trx_data();
6321       if (!cache_mngr)
6322         goto err;
6323 
6324       is_trans_cache= use_trans_cache(thd, using_trans);
6325       cache_data= cache_mngr->get_binlog_cache_data(is_trans_cache);
6326       file= &cache_data->cache_log;
6327 
6328       if (thd->lex->stmt_accessed_non_trans_temp_table() && is_trans_cache)
6329         thd->transaction.stmt.mark_modified_non_trans_temp_table();
6330       thd->binlog_start_trans_and_stmt();
6331     }
6332     DBUG_PRINT("info",("event type: %d",event_info->get_type_code()));
6333 
6334     /*
6335        No check for auto events flag here - this write method should
6336        never be called if auto-events are enabled.
6337 
6338        Write first log events which describe the 'run environment'
6339        of the SQL command. If row-based binlogging, Insert_id, Rand
6340        and other kind of "setting context" events are not needed.
6341     */
6342 
6343     if (with_annotate && *with_annotate)
6344     {
6345       DBUG_ASSERT(event_info->get_type_code() == TABLE_MAP_EVENT);
6346       Annotate_rows_log_event anno(thd, using_trans, direct);
6347       /* Annotate event should be written not more than once */
6348       *with_annotate= 0;
6349       if (write_event(&anno, cache_data, file))
6350         goto err;
6351     }
6352 
6353     {
6354       if (!thd->is_current_stmt_binlog_format_row())
6355       {
6356         if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
6357         {
6358           Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
6359                              thd->first_successful_insert_id_in_prev_stmt_for_binlog,
6360                              using_trans, direct);
6361           if (write_event(&e, cache_data, file))
6362             goto err;
6363         }
6364         if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
6365         {
6366           DBUG_PRINT("info",("number of auto_inc intervals: %u",
6367                              thd->auto_inc_intervals_in_cur_stmt_for_binlog.
6368                              nb_elements()));
6369           Intvar_log_event e(thd, (uchar) INSERT_ID_EVENT,
6370                              thd->auto_inc_intervals_in_cur_stmt_for_binlog.
6371                              minimum(), using_trans, direct);
6372           if (write_event(&e, cache_data, file))
6373             goto err;
6374         }
6375         if (thd->rand_used)
6376         {
6377           Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2,
6378                            using_trans, direct);
6379           if (write_event(&e, cache_data, file))
6380             goto err;
6381         }
6382         if (thd->user_var_events.elements)
6383         {
6384           for (uint i= 0; i < thd->user_var_events.elements; i++)
6385           {
6386             BINLOG_USER_VAR_EVENT *user_var_event;
6387             get_dynamic(&thd->user_var_events,(uchar*) &user_var_event, i);
6388 
6389             /* setting flags for user var log event */
6390             uchar flags= User_var_log_event::UNDEF_F;
6391             if (user_var_event->unsigned_flag)
6392               flags|= User_var_log_event::UNSIGNED_F;
6393 
6394             User_var_log_event e(thd, user_var_event->user_var_event->name.str,
6395                                  user_var_event->user_var_event->name.length,
6396                                  user_var_event->value,
6397                                  user_var_event->length,
6398                                  user_var_event->type,
6399                                  user_var_event->charset_number,
6400                                  flags,
6401                                  using_trans,
6402                                  direct);
6403             if (write_event(&e, cache_data, file))
6404               goto err;
6405           }
6406         }
6407       }
6408     }
6409 
6410     /*
6411       Write the event.
6412     */
6413     if (write_event(event_info, cache_data, file) ||
6414         DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
6415       goto err;
6416 
6417     error= 0;
6418 err:
6419     if (direct)
6420     {
6421       my_off_t offset= my_b_tell(file);
6422       bool check_purge= false;
6423       DBUG_ASSERT(!is_relay_log);
6424 
6425       if (likely(!error))
6426       {
6427         bool synced;
6428 
6429         if ((error= flush_and_sync(&synced)))
6430         {
6431         }
6432         else
6433         {
6434           mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
6435           mysql_mutex_assert_owner(&LOCK_log);
6436           mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync);
6437           mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
6438 #ifdef HAVE_REPLICATION
6439           if (repl_semisync_master.report_binlog_update(thd, log_file_name,
6440                                                         file->pos_in_file))
6441           {
6442             sql_print_error("Failed to run 'after_flush' hooks");
6443             error= 1;
6444           }
6445           else
6446 #endif
6447           {
6448             /*
6449               update binlog_end_pos so it can be read by dump thread
6450               note: must be _after_ the RUN_HOOK(after_flush) or else
6451               semi-sync might not have put the transaction into
6452               it's list before dump-thread tries to send it
6453             */
6454             update_binlog_end_pos(offset);
6455             /*
6456               If a transaction with the LOAD DATA statement is divided
6457               into logical mini-transactions (of the 10K rows) and binlog
6458               is rotated, then the last portion of data may be lost due to
6459               wsrep handler re-registration at the boundary of the split.
6460               Since splitting of the LOAD DATA into mini-transactions is
6461               logical, we should not allow these mini-transactions to fall
6462               into separate binlogs. Therefore, it is necessary to prohibit
6463               the rotation of binlog in the middle of processing LOAD DATA:
6464             */
6465 #ifdef WITH_WSREP
6466             if (!thd->wsrep_split_flag)
6467             {
6468 #endif /* WITH_WSREP */
6469             if (unlikely((error= rotate(false, &check_purge))))
6470               check_purge= false;
6471 #ifdef WITH_WSREP
6472             }
6473 #endif /* WITH_WSREP */
6474           }
6475         }
6476       }
6477 
6478       status_var_add(thd->status_var.binlog_bytes_written,
6479                      offset - my_org_b_tell);
6480 
6481       mysql_mutex_lock(&LOCK_after_binlog_sync);
6482       mysql_mutex_unlock(&LOCK_log);
6483 
6484       mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
6485       mysql_mutex_assert_not_owner(&LOCK_log);
6486       mysql_mutex_assert_owner(&LOCK_after_binlog_sync);
6487       mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
6488 #ifdef HAVE_REPLICATION
6489       if (repl_semisync_master.wait_after_sync(log_file_name,
6490                                                file->pos_in_file))
6491       {
6492         error=1;
6493         /* error is already printed inside hook */
6494       }
6495 #endif
6496 
6497       /*
6498         Take mutex to protect against a reader seeing partial writes of 64-bit
6499         offset on 32-bit CPUs.
6500       */
6501       mysql_mutex_lock(&LOCK_commit_ordered);
6502       mysql_mutex_unlock(&LOCK_after_binlog_sync);
6503       last_commit_pos_offset= offset;
6504       mysql_mutex_unlock(&LOCK_commit_ordered);
6505 
6506       if (check_purge)
6507         checkpoint_and_purge(prev_binlog_id);
6508     }
6509 
6510     if (unlikely(error))
6511     {
6512       set_write_error(thd, is_trans_cache);
6513       if (check_write_error(thd) && cache_data &&
6514           stmt_has_updated_non_trans_table(thd))
6515         cache_data->set_incident();
6516     }
6517   }
6518 
6519   DBUG_RETURN(error);
6520 }
6521 
6522 
6523 int error_log_print(enum loglevel level, const char *format,
6524                     va_list args)
6525 {
6526   return logger.error_log_print(level, format, args);
6527 }
6528 
6529 
6530 bool slow_log_print(THD *thd, const char *query, uint query_length,
6531                     ulonglong current_utime)
6532 {
6533   return logger.slow_log_print(thd, query, query_length, current_utime);
6534 }
6535 
6536 
6537 /**
6538   Decide if we should log the command to general log
6539 
6540   @retval
6541      FALSE  No logging
6542      TRUE   Ok to log
6543 */
6544 
6545 bool LOGGER::log_command(THD *thd, enum enum_server_command command)
6546 {
6547   /*
6548     Log command if we have at least one log event handler enabled and want
6549     to log this king of commands
6550   */
6551   if (!(*general_log_handler_list && (what_to_log & (1L << (uint) command))))
6552     return FALSE;
6553 
6554   /*
6555     If LOG_SLOW_DISABLE_SLAVE is set when slave thread starts, then
6556     OPTION_LOG_OFF is set.
6557     Only the super user can set this bit.
6558   */
6559   return !(thd->variables.option_bits & OPTION_LOG_OFF);
6560 }
6561 
6562 
6563 bool general_log_print(THD *thd, enum enum_server_command command,
6564                        const char *format, ...)
6565 {
6566   va_list args;
6567   uint error= 0;
6568 
6569   /* Print the message to the buffer if we want to log this kind of commands */
6570   if (! logger.log_command(thd, command))
6571     return FALSE;
6572 
6573   va_start(args, format);
6574   error= logger.general_log_print(thd, command, format, args);
6575   va_end(args);
6576 
6577   return error;
6578 }
6579 
6580 bool general_log_write(THD *thd, enum enum_server_command command,
6581                        const char *query, size_t query_length)
6582 {
6583   /* Write the message to the log if we want to log this king of commands */
6584   if (logger.log_command(thd, command) || mysql_audit_general_enabled())
6585     return logger.general_log_write(thd, command, query, query_length);
6586 
6587   return FALSE;
6588 }
6589 
6590 
6591 static void
6592 binlog_checkpoint_callback(void *cookie)
6593 {
6594   MYSQL_BIN_LOG::xid_count_per_binlog *entry=
6595     (MYSQL_BIN_LOG::xid_count_per_binlog *)cookie;
6596   /*
6597     For every supporting engine, we increment the xid_count and issue a
6598     commit_checkpoint_request(). Then we can count when all
6599     commit_checkpoint_notify() callbacks have occurred, and then log a new
6600     binlog checkpoint event.
6601   */
6602   mysql_bin_log.mark_xids_active(entry->binlog_id, 1);
6603 }
6604 
6605 
6606 /*
6607   Request a commit checkpoint from each supporting engine.
6608   This must be called after each binlog rotate, and after LOCK_log has been
6609   released. The xid_count value in the xid_count_per_binlog entry was
6610   incremented by 1 and will be decremented in this function; this ensures
6611   that the entry will not go away early despite LOCK_log not being held.
6612 */
6613 void
6614 MYSQL_BIN_LOG::do_checkpoint_request(ulong binlog_id)
6615 {
6616   xid_count_per_binlog *entry;
6617 
6618   /*
6619     Find the binlog entry, and invoke commit_checkpoint_request() on it in
6620     each supporting storage engine.
6621   */
6622   mysql_mutex_lock(&LOCK_xid_list);
6623   I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
6624   do {
6625     entry= it++;
6626     DBUG_ASSERT(entry /* binlog_id is always somewhere in the list. */);
6627   } while (entry->binlog_id != binlog_id);
6628   mysql_mutex_unlock(&LOCK_xid_list);
6629 
6630   ha_commit_checkpoint_request(entry, binlog_checkpoint_callback);
6631   /*
6632     When we rotated the binlog, we incremented xid_count to make sure the
6633     entry would not go away until this point, where we have done all necessary
6634     commit_checkpoint_request() calls.
6635     So now we can (and must) decrease the count - when it reaches zero, we
6636     will know that both all pending unlog() and all pending
6637     commit_checkpoint_notify() calls are done, and we can log a new binlog
6638     checkpoint.
6639   */
6640   mark_xid_done(binlog_id, true);
6641 }
6642 
6643 
6644 /**
6645   The method executes rotation when LOCK_log is already acquired
6646   by the caller.
6647 
6648   @param force_rotate  caller can request the log rotation
6649   @param check_purge   is set to true if rotation took place
6650 
6651   @note
6652     Caller _must_ check the check_purge variable. If this is set, it means
6653     that the binlog was rotated, and caller _must_ ensure that
6654     do_checkpoint_request() is called later with the binlog_id of the rotated
6655     binlog file. The call to do_checkpoint_request() must happen after
6656     LOCK_log is released (which is why we cannot simply do it here).
6657     Usually, checkpoint_and_purge() is appropriate, as it will both handle
6658     the checkpointing and any needed purging of old logs.
6659 
6660   @note
6661     If rotation fails, for instance the server was unable
6662     to create a new log file, we still try to write an
6663     incident event to the current log.
6664 
6665   @retval
6666     nonzero - error in rotating routine.
6667 */
6668 int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge)
6669 {
6670   int error= 0;
6671   DBUG_ENTER("MYSQL_BIN_LOG::rotate");
6672 
6673 #ifdef WITH_WSREP
6674   if (WSREP_ON && wsrep_to_isolation)
6675   {
6676     *check_purge= false;
6677     WSREP_DEBUG("avoiding binlog rotate due to TO isolation: %d",
6678                 wsrep_to_isolation);
6679     DBUG_RETURN(0);
6680   }
6681 #endif /* WITH_WSREP */
6682 
6683   //todo: fix the macro def and restore safe_mutex_assert_owner(&LOCK_log);
6684   *check_purge= false;
6685 
6686   if (force_rotate || (my_b_tell(&log_file) >= (my_off_t) max_size))
6687   {
6688     ulong binlog_id= current_binlog_id;
6689     /*
6690       We rotate the binlog, so we need to start a commit checkpoint in all
6691       supporting engines - when it finishes, we can log a new binlog checkpoint
6692       event.
6693 
6694       But we cannot start the checkpoint here - there could be a group commit
6695       still in progress which needs to be included in the checkpoint, and
6696       besides we do not want to do the (possibly expensive) checkpoint while
6697       LOCK_log is held.
6698 
6699       On the other hand, we must be sure that the xid_count entry for the
6700       previous log does not go away until we start the checkpoint - which it
6701       could do as it is no longer the most recent. So we increment xid_count
6702       (to count the pending checkpoint request) - this will fix the entry in
6703       place until we decrement again in do_checkpoint_request().
6704     */
6705     mark_xids_active(binlog_id, 1);
6706 
6707     if (unlikely((error= new_file_without_locking())))
6708     {
6709       /**
6710          Be conservative... There are possible lost events (eg,
6711          failing to log the Execute_load_query_log_event
6712          on a LOAD DATA while using a non-transactional
6713          table)!
6714 
6715          We give it a shot and try to write an incident event anyway
6716          to the current log.
6717       */
6718       if (!write_incident_already_locked(current_thd))
6719         flush_and_sync(0);
6720 
6721       /*
6722         We failed to rotate - so we have to decrement the xid_count back that
6723         we incremented before attempting the rotate.
6724       */
6725       mark_xid_done(binlog_id, false);
6726     }
6727     else
6728       *check_purge= true;
6729   }
6730   DBUG_RETURN(error);
6731 }
6732 
6733 /**
6734   The method executes logs purging routine.
6735 
6736   @retval
6737     nonzero - error in rotating routine.
6738 */
6739 void MYSQL_BIN_LOG::purge()
6740 {
6741   mysql_mutex_assert_not_owner(&LOCK_log);
6742 #ifdef HAVE_REPLICATION
6743   if (expire_logs_days)
6744   {
6745     DEBUG_SYNC(current_thd, "at_purge_logs_before_date");
6746     time_t purge_time= my_time(0) - expire_logs_days*24*60*60;
6747     if (purge_time >= 0)
6748     {
6749       purge_logs_before_date(purge_time);
6750     }
6751     DEBUG_SYNC(current_thd, "after_purge_logs_before_date");
6752   }
6753 #endif
6754 }
6755 
6756 
6757 void MYSQL_BIN_LOG::checkpoint_and_purge(ulong binlog_id)
6758 {
6759   do_checkpoint_request(binlog_id);
6760   purge();
6761 }
6762 
6763 
6764 /**
6765   Searches for the first (oldest) binlog file name in in the binlog index.
6766 
6767   @param[in,out]  buf_arg  pointer to a buffer to hold found
6768                            the first binary log file name
6769   @return         NULL     on success, otherwise error message
6770 */
6771 static const char* get_first_binlog(char* buf_arg)
6772 {
6773   IO_CACHE *index_file;
6774   size_t length;
6775   char fname[FN_REFLEN];
6776   const char* errmsg= NULL;
6777 
6778   DBUG_ENTER("get_first_binlog");
6779 
6780   DBUG_ASSERT(mysql_bin_log.is_open());
6781 
6782   mysql_bin_log.lock_index();
6783 
6784   index_file=mysql_bin_log.get_index_file();
6785   if (reinit_io_cache(index_file, READ_CACHE, (my_off_t) 0, 0, 0))
6786   {
6787     errmsg= "failed to create a cache on binlog index";
6788     goto end;
6789   }
6790   /* The file ends with EOF or empty line */
6791   if ((length=my_b_gets(index_file, fname, sizeof(fname))) <= 1)
6792   {
6793     errmsg= "empty binlog index";
6794     goto end;
6795   }
6796   else
6797   {
6798     fname[length-1]= 0;                         // Remove end \n
6799   }
6800   if (normalize_binlog_name(buf_arg, fname, false))
6801   {
6802     errmsg= "could not normalize the first file name in the binlog index";
6803     goto end;
6804   }
6805 end:
6806   mysql_bin_log.unlock_index();
6807 
6808   DBUG_RETURN(errmsg);
6809 }
6810 
6811 /**
6812   Check weather the gtid binlog state can safely remove gtid
6813   domains passed as the argument. A safety condition is satisfied when
6814   there are no events from the being deleted domains in the currently existing
6815   binlog files. Upon successful check the supplied domains are removed
6816   from @@gtid_binlog_state. The caller is supposed to rotate binlog so that
6817   the active latest file won't have the deleted domains in its Gtid_list header.
6818 
6819   @param  domain_drop_lex  gtid domain id sequence from lex.
6820                            Passed as a pointer to dynamic array must be not empty
6821                            unless pointer value NULL.
6822   @retval zero             on success
6823   @retval > 0              ineffective call none from the *non* empty
6824                            gtid domain sequence is deleted
6825   @retval < 0              on error
6826 */
6827 static int do_delete_gtid_domain(DYNAMIC_ARRAY *domain_drop_lex)
6828 {
6829   int rc= 0;
6830   Gtid_list_log_event *glev= NULL;
6831   char buf[FN_REFLEN];
6832   File file;
6833   IO_CACHE cache;
6834   const char* errmsg= NULL;
6835   char errbuf[MYSQL_ERRMSG_SIZE]= {0};
6836 
6837   if (!domain_drop_lex)
6838     return 0; // still "effective" having empty domain sequence to delete
6839 
6840   DBUG_ASSERT(domain_drop_lex->elements > 0);
6841   mysql_mutex_assert_owner(mysql_bin_log.get_log_lock());
6842 
6843   if ((errmsg= get_first_binlog(buf)) != NULL)
6844     goto end;
6845   bzero((char*) &cache, sizeof(cache));
6846   if ((file= open_binlog(&cache, buf, &errmsg)) == (File) -1)
6847     goto end;
6848   errmsg= get_gtid_list_event(&cache, &glev);
6849   end_io_cache(&cache);
6850   mysql_file_close(file, MYF(MY_WME));
6851 
6852   DBUG_EXECUTE_IF("inject_binlog_delete_domain_init_error",
6853                   errmsg= "injected error";);
6854   if (errmsg)
6855     goto end;
6856   errmsg= rpl_global_gtid_binlog_state.drop_domain(domain_drop_lex,
6857                                                    glev, errbuf);
6858 
6859 end:
6860   if (errmsg)
6861   {
6862     if (strlen(errmsg) > 0)
6863     {
6864       my_error(ER_BINLOG_CANT_DELETE_GTID_DOMAIN, MYF(0), errmsg);
6865       rc= -1;
6866     }
6867     else
6868     {
6869       rc= 1;
6870     }
6871   }
6872   delete glev;
6873 
6874   return rc;
6875 }
6876 
6877 /**
6878   The method is a shortcut of @c rotate() and @c purge().
6879   LOCK_log is acquired prior to rotate and is released after it.
6880 
6881   @param force_rotate  caller can request the log rotation
6882 
6883   @retval
6884     nonzero - error in rotating routine.
6885 */
6886 int MYSQL_BIN_LOG::rotate_and_purge(bool force_rotate,
6887                                     DYNAMIC_ARRAY *domain_drop_lex)
6888 {
6889   int err_gtid=0, error= 0;
6890   ulong prev_binlog_id;
6891   DBUG_ENTER("MYSQL_BIN_LOG::rotate_and_purge");
6892   bool check_purge= false;
6893 
6894   mysql_mutex_lock(&LOCK_log);
6895 
6896   DEBUG_SYNC(current_thd, "rotate_after_acquire_LOCK_log");
6897 
6898   prev_binlog_id= current_binlog_id;
6899 
6900   if ((err_gtid= do_delete_gtid_domain(domain_drop_lex)))
6901   {
6902     // inffective attempt to delete merely skips rotate and purge
6903     if (err_gtid < 0)
6904       error= 1; // otherwise error is propagated the user
6905   }
6906   else if (unlikely((error= rotate(force_rotate, &check_purge))))
6907     check_purge= false;
6908 
6909   DEBUG_SYNC(current_thd, "rotate_after_rotate");
6910 
6911   /*
6912     NOTE: Run purge_logs wo/ holding LOCK_log because it does not need
6913           the mutex. Otherwise causes various deadlocks.
6914           Explicit binlog rotation must be synchronized with a concurrent
6915           binlog ordered commit, in particular not let binlog
6916           checkpoint notification request until early binlogged
6917           concurrent commits have has been completed.
6918   */
6919   mysql_mutex_lock(&LOCK_after_binlog_sync);
6920   mysql_mutex_unlock(&LOCK_log);
6921   mysql_mutex_lock(&LOCK_commit_ordered);
6922   mysql_mutex_unlock(&LOCK_after_binlog_sync);
6923   mysql_mutex_unlock(&LOCK_commit_ordered);
6924 
6925   if (check_purge)
6926     checkpoint_and_purge(prev_binlog_id);
6927 
6928   DBUG_RETURN(error);
6929 }
6930 
6931 uint MYSQL_BIN_LOG::next_file_id()
6932 {
6933   uint res;
6934   mysql_mutex_lock(&LOCK_log);
6935   res = file_id++;
6936   mysql_mutex_unlock(&LOCK_log);
6937   return res;
6938 }
6939 
6940 class CacheWriter: public Log_event_writer
6941 {
6942 public:
6943   size_t remains;
6944 
6945   CacheWriter(THD *thd_arg, IO_CACHE *file_arg, bool do_checksum,
6946               Binlog_crypt_data *cr)
6947     : Log_event_writer(file_arg, 0, cr), remains(0), thd(thd_arg),
6948       first(true)
6949   { checksum_len= do_checksum ? BINLOG_CHECKSUM_LEN : 0; }
6950 
6951   ~CacheWriter()
6952   { status_var_add(thd->status_var.binlog_bytes_written, bytes_written); }
6953 
6954   int write(uchar* pos, size_t len)
6955   {
6956     DBUG_ENTER("CacheWriter::write");
6957     if (first)
6958       write_header(pos, len);
6959     else
6960       write_data(pos, len);
6961 
6962     remains -= len;
6963     if ((first= !remains))
6964       write_footer();
6965     DBUG_RETURN(0);
6966   }
6967 private:
6968   THD *thd;
6969   bool first;
6970 };
6971 
6972 /*
6973   Write the contents of a cache to the binary log.
6974 
6975   SYNOPSIS
6976     write_cache()
6977     thd      Current_thread
6978     cache    Cache to write to the binary log
6979 
6980   DESCRIPTION
6981     Write the contents of the cache to the binary log. The cache will
6982     be reset as a READ_CACHE to be able to read the contents from it.
6983 
6984     Reading from the trans cache with possible (per @c binlog_checksum_options)
6985     adding checksum value  and then fixing the length and the end_log_pos of
6986     events prior to fill in the binlog cache.
6987 */
6988 
6989 int MYSQL_BIN_LOG::write_cache(THD *thd, IO_CACHE *cache)
6990 {
6991   DBUG_ENTER("MYSQL_BIN_LOG::write_cache");
6992 
6993   mysql_mutex_assert_owner(&LOCK_log);
6994   if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
6995     DBUG_RETURN(ER_ERROR_ON_WRITE);
6996   size_t length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
6997   size_t val;
6998   size_t end_log_pos_inc= 0; // each event processed adds BINLOG_CHECKSUM_LEN 2 t
6999   uchar header[LOG_EVENT_HEADER_LEN];
7000   CacheWriter writer(thd, &log_file, binlog_checksum_options, &crypto);
7001 
7002   if (crypto.scheme)
7003     writer.ctx= alloca(crypto.ctx_size);
7004 
7005   // while there is just one alg the following must hold:
7006   DBUG_ASSERT(binlog_checksum_options == BINLOG_CHECKSUM_ALG_OFF ||
7007               binlog_checksum_options == BINLOG_CHECKSUM_ALG_CRC32);
7008 
7009   /*
7010     The events in the buffer have incorrect end_log_pos data
7011     (relative to beginning of group rather than absolute),
7012     so we'll recalculate them in situ so the binlog is always
7013     correct, even in the middle of a group. This is possible
7014     because we now know the start position of the group (the
7015     offset of this cache in the log, if you will); all we need
7016     to do is to find all event-headers, and add the position of
7017     the group to the end_log_pos of each event.  This is pretty
7018     straight forward, except that we read the cache in segments,
7019     so an event-header might end up on the cache-border and get
7020     split.
7021   */
7022 
7023   group= (size_t)my_b_tell(&log_file);
7024   hdr_offs= carry= 0;
7025 
7026   do
7027   {
7028     /*
7029       if we only got a partial header in the last iteration,
7030       get the other half now and process a full header.
7031     */
7032     if (unlikely(carry > 0))
7033     {
7034       DBUG_ASSERT(carry < LOG_EVENT_HEADER_LEN);
7035       size_t tail= LOG_EVENT_HEADER_LEN - carry;
7036 
7037       /* assemble both halves */
7038       memcpy(&header[carry], (char *)cache->read_pos, tail);
7039 
7040       uint32 len= uint4korr(header + EVENT_LEN_OFFSET);
7041       writer.remains= len;
7042 
7043       /* fix end_log_pos */
7044       end_log_pos_inc += writer.checksum_len;
7045       val= uint4korr(header + LOG_POS_OFFSET) + group + end_log_pos_inc;
7046       int4store(header + LOG_POS_OFFSET, val);
7047 
7048       /* fix len */
7049       len+= writer.checksum_len;
7050       int4store(header + EVENT_LEN_OFFSET, len);
7051 
7052       if (writer.write(header, LOG_EVENT_HEADER_LEN))
7053         DBUG_RETURN(ER_ERROR_ON_WRITE);
7054 
7055       cache->read_pos+= tail;
7056       length-= tail;
7057       carry= 0;
7058 
7059       /* next event header at ... */
7060       hdr_offs= len - LOG_EVENT_HEADER_LEN - writer.checksum_len;
7061     }
7062 
7063     /* if there is anything to write, process it. */
7064 
7065     if (likely(length > 0))
7066     {
7067       DBUG_EXECUTE_IF("fail_binlog_write_1",
7068                       errno= 28; DBUG_RETURN(ER_ERROR_ON_WRITE););
7069       /*
7070         process all event-headers in this (partial) cache.
7071         if next header is beyond current read-buffer,
7072         we'll get it later (though not necessarily in the
7073         very next iteration, just "eventually").
7074       */
7075 
7076       if (hdr_offs >= length)
7077       {
7078         if (writer.write(cache->read_pos, length))
7079           DBUG_RETURN(ER_ERROR_ON_WRITE);
7080       }
7081 
7082       while (hdr_offs < length)
7083       {
7084         /*
7085           finish off with remains of the last event that crawls
7086           from previous into the current buffer
7087         */
7088         if (writer.remains != 0)
7089         {
7090           if (writer.write(cache->read_pos, hdr_offs))
7091             DBUG_RETURN(ER_ERROR_ON_WRITE);
7092         }
7093 
7094         /*
7095           partial header only? save what we can get, process once
7096           we get the rest.
7097         */
7098         if (hdr_offs + LOG_EVENT_HEADER_LEN > length)
7099         {
7100           carry= length - hdr_offs;
7101           memcpy(header, (char *)cache->read_pos + hdr_offs, carry);
7102           length= hdr_offs;
7103         }
7104         else
7105         {
7106           /* we've got a full event-header, and it came in one piece */
7107           uchar *ev= (uchar *)cache->read_pos + hdr_offs;
7108           uint ev_len= uint4korr(ev + EVENT_LEN_OFFSET); // netto len
7109           uchar *log_pos= ev + LOG_POS_OFFSET;
7110 
7111           end_log_pos_inc += writer.checksum_len;
7112           /* fix end_log_pos */
7113           val= uint4korr(log_pos) + group + end_log_pos_inc;
7114           int4store(log_pos, val);
7115 
7116           /* fix length */
7117           int4store(ev + EVENT_LEN_OFFSET, ev_len + writer.checksum_len);
7118 
7119           writer.remains= ev_len;
7120           if (writer.write(ev, MY_MIN(ev_len, length - hdr_offs)))
7121             DBUG_RETURN(ER_ERROR_ON_WRITE);
7122 
7123           /* next event header at ... */
7124           hdr_offs += ev_len; // incr by the netto len
7125 
7126           DBUG_ASSERT(!writer.checksum_len || writer.remains == 0 || hdr_offs >= length);
7127         }
7128       }
7129 
7130       /*
7131         Adjust hdr_offs. Note that it may still point beyond the segment
7132         read in the next iteration; if the current event is very long,
7133         it may take a couple of read-iterations (and subsequent adjustments
7134         of hdr_offs) for it to point into the then-current segment.
7135         If we have a split header (!carry), hdr_offs will be set at the
7136         beginning of the next iteration, overwriting the value we set here:
7137       */
7138       hdr_offs -= length;
7139     }
7140   } while ((length= my_b_fill(cache)));
7141 
7142   DBUG_ASSERT(carry == 0);
7143   DBUG_ASSERT(!writer.checksum_len || writer.remains == 0);
7144 
7145   DBUG_RETURN(0);                               // All OK
7146 }
7147 
7148 /*
7149   Helper function to get the error code of the query to be binlogged.
7150  */
7151 int query_error_code(THD *thd, bool not_killed)
7152 {
7153   int error;
7154 
7155   if (not_killed || (killed_mask_hard(thd->killed) == KILL_BAD_DATA))
7156   {
7157     error= thd->is_error() ? thd->get_stmt_da()->sql_errno() : 0;
7158     if (!error)
7159       return error;
7160 
7161     /* thd->get_get_stmt_da()->sql_errno() might be ER_SERVER_SHUTDOWN or
7162        ER_QUERY_INTERRUPTED, So here we need to make sure that error
7163        is not set to these errors when specified not_killed by the
7164        caller.
7165     */
7166     if (error == ER_SERVER_SHUTDOWN || error == ER_QUERY_INTERRUPTED ||
7167         error == ER_NEW_ABORTING_CONNECTION || error == ER_CONNECTION_KILLED)
7168       error= 0;
7169   }
7170   else
7171   {
7172     /* killed status for DELAYED INSERT thread should never be used */
7173     DBUG_ASSERT(!(thd->system_thread & SYSTEM_THREAD_DELAYED_INSERT));
7174     error= thd->killed_errno();
7175   }
7176 
7177   return error;
7178 }
7179 
7180 
7181 bool MYSQL_BIN_LOG::write_incident_already_locked(THD *thd)
7182 {
7183   uint error= 0;
7184   DBUG_ENTER("MYSQL_BIN_LOG::write_incident_already_locked");
7185   Incident incident= INCIDENT_LOST_EVENTS;
7186   Incident_log_event ev(thd, incident, &write_error_msg);
7187 
7188   if (likely(is_open()))
7189   {
7190     error= write_event(&ev);
7191     status_var_add(thd->status_var.binlog_bytes_written, ev.data_written);
7192   }
7193 
7194   DBUG_RETURN(error);
7195 }
7196 
7197 
7198 bool MYSQL_BIN_LOG::write_incident(THD *thd)
7199 {
7200   uint error= 0;
7201   my_off_t offset;
7202   bool check_purge= false;
7203   ulong prev_binlog_id;
7204   DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
7205 
7206   mysql_mutex_lock(&LOCK_log);
7207   if (likely(is_open()))
7208   {
7209     prev_binlog_id= current_binlog_id;
7210     if (likely(!(error= write_incident_already_locked(thd))) &&
7211         likely(!(error= flush_and_sync(0))))
7212     {
7213       update_binlog_end_pos();
7214       /*
7215         If a transaction with the LOAD DATA statement is divided
7216         into logical mini-transactions (of the 10K rows) and binlog
7217         is rotated, then the last portion of data may be lost due to
7218         wsrep handler re-registration at the boundary of the split.
7219         Since splitting of the LOAD DATA into mini-transactions is
7220         logical, we should not allow these mini-transactions to fall
7221         into separate binlogs. Therefore, it is necessary to prohibit
7222         the rotation of binlog in the middle of processing LOAD DATA:
7223       */
7224 #ifdef WITH_WSREP
7225       if (!thd->wsrep_split_flag)
7226       {
7227 #endif /* WITH_WSREP */
7228       if (unlikely((error= rotate(false, &check_purge))))
7229         check_purge= false;
7230 #ifdef WITH_WSREP
7231       }
7232 #endif /* WITH_WSREP */
7233     }
7234 
7235     offset= my_b_tell(&log_file);
7236 
7237     update_binlog_end_pos(offset);
7238 
7239     /*
7240       Take mutex to protect against a reader seeing partial writes of 64-bit
7241       offset on 32-bit CPUs.
7242     */
7243     mysql_mutex_lock(&LOCK_commit_ordered);
7244     last_commit_pos_offset= offset;
7245     mysql_mutex_unlock(&LOCK_commit_ordered);
7246     mysql_mutex_unlock(&LOCK_log);
7247 
7248     if (check_purge)
7249       checkpoint_and_purge(prev_binlog_id);
7250   }
7251   else
7252   {
7253     mysql_mutex_unlock(&LOCK_log);
7254   }
7255 
7256   DBUG_RETURN(error);
7257 }
7258 
7259 void
7260 MYSQL_BIN_LOG::write_binlog_checkpoint_event_already_locked(const char *name_arg, uint len)
7261 {
7262   my_off_t offset;
7263   Binlog_checkpoint_log_event ev(name_arg, len);
7264   /*
7265     Note that we must sync the binlog checkpoint to disk.
7266     Otherwise a subsequent log purge could delete binlogs that XA recovery
7267     thinks are needed (even though they are not really).
7268   */
7269   if (!write_event(&ev) && !flush_and_sync(0))
7270   {
7271     update_binlog_end_pos();
7272   }
7273   else
7274   {
7275     /*
7276       If we fail to write the checkpoint event, something is probably really
7277       bad with the binlog. We complain in the error log.
7278 
7279       Note that failure to write binlog checkpoint does not compromise the
7280       ability to do crash recovery - crash recovery will just have to scan a
7281       bit more of the binlog than strictly necessary.
7282     */
7283     sql_print_error("Failed to write binlog checkpoint event to binary log\n");
7284   }
7285 
7286   offset= my_b_tell(&log_file);
7287 
7288   update_binlog_end_pos(offset);
7289 
7290   /*
7291     Take mutex to protect against a reader seeing partial writes of 64-bit
7292     offset on 32-bit CPUs.
7293   */
7294   mysql_mutex_lock(&LOCK_commit_ordered);
7295   last_commit_pos_offset= offset;
7296   mysql_mutex_unlock(&LOCK_commit_ordered);
7297 }
7298 
7299 
7300 /**
7301   Write a cached log entry to the binary log.
7302   - To support transaction over replication, we wrap the transaction
7303   with BEGIN/COMMIT or BEGIN/ROLLBACK in the binary log.
7304   We want to write a BEGIN/ROLLBACK block when a non-transactional table
7305   was updated in a transaction which was rolled back. This is to ensure
7306   that the same updates are run on the slave.
7307 
7308   @param thd
7309   @param cache		The cache to copy to the binlog
7310   @param commit_event   The commit event to print after writing the
7311                         contents of the cache.
7312   @param incident       Defines if an incident event should be created to
7313                         notify that some non-transactional changes did
7314                         not get into the binlog.
7315 
7316   @note
7317     We only come here if there is something in the cache.
7318   @note
7319     The thing in the cache is always a complete transaction.
7320   @note
7321     'cache' needs to be reinitialized after this functions returns.
7322 */
7323 
7324 bool
7325 MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd,
7326                                            binlog_cache_mngr *cache_mngr,
7327                                            Log_event *end_ev, bool all,
7328                                            bool using_stmt_cache,
7329                                            bool using_trx_cache)
7330 {
7331   group_commit_entry entry;
7332   Ha_trx_info *ha_info;
7333   DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_to_binlog");
7334 
7335   /*
7336     Control should not be allowed beyond this point in wsrep_emulate_bin_log
7337     mode. Also, do not write the cached updates to binlog if binary logging is
7338     disabled (log-bin/sql_log_bin).
7339   */
7340   if (wsrep_emulate_bin_log)
7341   {
7342     DBUG_RETURN(0);
7343   }
7344   else if (!(thd->variables.option_bits & OPTION_BIN_LOG))
7345   {
7346     cache_mngr->need_unlog= false;
7347     DBUG_RETURN(0);
7348   }
7349 
7350   entry.thd= thd;
7351   entry.cache_mngr= cache_mngr;
7352   entry.error= 0;
7353   entry.all= all;
7354   entry.using_stmt_cache= using_stmt_cache;
7355   entry.using_trx_cache= using_trx_cache;
7356   entry.need_unlog= false;
7357   ha_info= all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
7358 
7359   for (; ha_info; ha_info= ha_info->next())
7360   {
7361     if (ha_info->is_started() && ha_info->ht() != binlog_hton &&
7362         !ha_info->ht()->commit_checkpoint_request)
7363       entry.need_unlog= true;
7364     break;
7365   }
7366 
7367   entry.end_event= end_ev;
7368   if (cache_mngr->stmt_cache.has_incident() ||
7369       cache_mngr->trx_cache.has_incident())
7370   {
7371     Incident_log_event inc_ev(thd, INCIDENT_LOST_EVENTS, &write_error_msg);
7372     entry.incident_event= &inc_ev;
7373     DBUG_RETURN(write_transaction_to_binlog_events(&entry));
7374   }
7375   else
7376   {
7377     entry.incident_event= NULL;
7378     DBUG_RETURN(write_transaction_to_binlog_events(&entry));
7379   }
7380 }
7381 
7382 
7383 /*
7384   Put a transaction that is ready to commit in the group commit queue.
7385   The transaction is identified by the ENTRY object passed into this function.
7386 
7387   To facilitate group commit for the binlog, we first queue up ourselves in
7388   this function. Then later the first thread to enter the queue waits for
7389   the LOCK_log mutex, and commits for everyone in the queue once it gets the
7390   lock. Any other threads in the queue just wait for the first one to finish
7391   the commit and wake them up. This way, all transactions in the queue get
7392   committed in a single disk operation.
7393 
7394   The main work in this function is when the commit in one transaction has
7395   been marked to wait for the commit of another transaction to happen
7396   first. This is used to support in-order parallel replication, where
7397   transactions can execute out-of-order but need to be committed in-order with
7398   how they happened on the master. The waiting of one commit on another needs
7399   to be integrated with the group commit queue, to ensure that the waiting
7400   transaction can participate in the same group commit as the waited-for
7401   transaction.
7402 
7403   So when we put a transaction in the queue, we check if there were other
7404   transactions already prepared to commit but just waiting for the first one
7405   to commit. If so, we add those to the queue as well, transitively for all
7406   waiters.
7407 
7408   And if a transaction is marked to wait for a prior transaction, but that
7409   prior transaction is already queued for group commit, then we can queue the
7410   new transaction directly to participate in the group commit.
7411 
7412   @retval < 0   Error
7413   @retval > 0   If queued as the first entry in the queue (meaning this
7414                 is the leader)
7415   @retval   0   Otherwise (queued as participant, leader handles the commit)
7416 */
7417 
7418 int
7419 MYSQL_BIN_LOG::queue_for_group_commit(group_commit_entry *orig_entry)
7420 {
7421   group_commit_entry *entry, *orig_queue, *last;
7422   wait_for_commit *cur;
7423   wait_for_commit *wfc;
7424   DBUG_ENTER("MYSQL_BIN_LOG::queue_for_group_commit");
7425 
7426   /*
7427     Check if we need to wait for another transaction to commit before us.
7428 
7429     It is safe to do a quick check without lock first in the case where we do
7430     not have to wait. But if the quick check shows we need to wait, we must do
7431     another safe check under lock, to avoid the race where the other
7432     transaction wakes us up between the check and the wait.
7433   */
7434   wfc= orig_entry->thd->wait_for_commit_ptr;
7435   orig_entry->queued_by_other= false;
7436   if (wfc && wfc->waitee)
7437   {
7438     mysql_mutex_lock(&wfc->LOCK_wait_commit);
7439     /*
7440       Do an extra check here, this time safely under lock.
7441 
7442       If waitee->commit_started is set, it means that the transaction we need
7443       to wait for has already queued up for group commit. In this case it is
7444       safe for us to queue up immediately as well, increasing the opprtunities
7445       for group commit. Because waitee has taken the LOCK_prepare_ordered
7446       before setting the flag, so there is no risk that we can queue ahead of
7447       it.
7448     */
7449     if (wfc->waitee && !wfc->waitee->commit_started)
7450     {
7451       PSI_stage_info old_stage;
7452       wait_for_commit *loc_waitee;
7453 
7454       /*
7455         By setting wfc->opaque_pointer to our own entry, we mark that we are
7456         ready to commit, but waiting for another transaction to commit before
7457         us.
7458 
7459         This other transaction may then take over the commit process for us to
7460         get us included in its own group commit. If this happens, the
7461         queued_by_other flag is set.
7462 
7463         Setting this flag may or may not be seen by the other thread, but we
7464         are safe in any case: The other thread will set queued_by_other under
7465         its LOCK_wait_commit, and we will not check queued_by_other only after
7466         we have been woken up.
7467       */
7468       wfc->opaque_pointer= orig_entry;
7469       DEBUG_SYNC(orig_entry->thd, "group_commit_waiting_for_prior");
7470       orig_entry->thd->ENTER_COND(&wfc->COND_wait_commit,
7471                                   &wfc->LOCK_wait_commit,
7472                                   &stage_waiting_for_prior_transaction_to_commit,
7473                                   &old_stage);
7474       while ((loc_waitee= wfc->waitee) && !orig_entry->thd->check_killed(1))
7475         mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit);
7476       wfc->opaque_pointer= NULL;
7477       DBUG_PRINT("info", ("After waiting for prior commit, queued_by_other=%d",
7478                  orig_entry->queued_by_other));
7479 
7480       if (loc_waitee)
7481       {
7482         /* Wait terminated due to kill. */
7483         mysql_mutex_lock(&loc_waitee->LOCK_wait_commit);
7484         if (loc_waitee->wakeup_subsequent_commits_running ||
7485             orig_entry->queued_by_other)
7486         {
7487           /* Our waitee is already waking us up, so ignore the kill. */
7488           mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
7489           do
7490           {
7491             mysql_cond_wait(&wfc->COND_wait_commit, &wfc->LOCK_wait_commit);
7492           } while (wfc->waitee);
7493         }
7494         else
7495         {
7496           /* We were killed, so remove us from the list of waitee. */
7497           wfc->remove_from_list(&loc_waitee->subsequent_commits_list);
7498           mysql_mutex_unlock(&loc_waitee->LOCK_wait_commit);
7499           wfc->waitee= NULL;
7500 
7501           orig_entry->thd->EXIT_COND(&old_stage);
7502           /* Interrupted by kill. */
7503           DEBUG_SYNC(orig_entry->thd, "group_commit_waiting_for_prior_killed");
7504           wfc->wakeup_error= orig_entry->thd->killed_errno();
7505           if (!wfc->wakeup_error)
7506             wfc->wakeup_error= ER_QUERY_INTERRUPTED;
7507           my_message(wfc->wakeup_error,
7508                      ER_THD(orig_entry->thd, wfc->wakeup_error), MYF(0));
7509           DBUG_RETURN(-1);
7510         }
7511       }
7512       orig_entry->thd->EXIT_COND(&old_stage);
7513     }
7514     else
7515       mysql_mutex_unlock(&wfc->LOCK_wait_commit);
7516   }
7517   /*
7518     If the transaction we were waiting for has already put us into the group
7519     commit queue (and possibly already done the entire binlog commit for us),
7520     then there is nothing else to do.
7521   */
7522   if (orig_entry->queued_by_other)
7523     DBUG_RETURN(0);
7524 
7525   if (wfc && wfc->wakeup_error)
7526   {
7527     my_error(ER_PRIOR_COMMIT_FAILED, MYF(0));
7528     DBUG_RETURN(-1);
7529   }
7530 
7531   /* Now enqueue ourselves in the group commit queue. */
7532   DEBUG_SYNC(orig_entry->thd, "commit_before_enqueue");
7533   orig_entry->thd->clear_wakeup_ready();
7534   mysql_mutex_lock(&LOCK_prepare_ordered);
7535   orig_queue= group_commit_queue;
7536 
7537   /*
7538     Iteratively process everything added to the queue, looking for waiters,
7539     and their waiters, and so on. If a waiter is ready to commit, we
7540     immediately add it to the queue, and mark it as queued_by_other.
7541 
7542     This would be natural to do with recursion, but we want to avoid
7543     potentially unbounded recursion blowing the C stack, so we use the list
7544     approach instead.
7545 
7546     We keep a list of the group_commit_entry of all the waiters that need to
7547     be processed. Initially this list contains only the entry passed into this
7548     function.
7549 
7550     We process entries in the list one by one. The element currently being
7551     processed is pointed to by `entry`, and the element at the end of the list
7552     is pointed to by `last` (we do not use NULL to terminate the list).
7553 
7554     As we process an entry, any waiters for that entry are added at the end of
7555     the list, to be processed in subsequent iterations. The the entry is added
7556     to the group_commit_queue.  This continues until the list is exhausted,
7557     with all entries ever added eventually processed.
7558 
7559     The end result is a breath-first traversal of the tree of waiters,
7560     re-using the `next' pointers of the group_commit_entry objects in place of
7561     extra stack space in a recursive traversal.
7562 
7563     The temporary list linked through these `next' pointers is not used by the
7564     caller or any other function; it only exists while doing the iterative
7565     tree traversal. After, all the processed entries are linked into the
7566     group_commit_queue.
7567   */
7568 
7569   cur= wfc;
7570   last= orig_entry;
7571   entry= orig_entry;
7572   for (;;)
7573   {
7574     group_commit_entry *next_entry;
7575 
7576     if (entry->cache_mngr->using_xa)
7577     {
7578       DEBUG_SYNC(entry->thd, "commit_before_prepare_ordered");
7579       run_prepare_ordered(entry->thd, entry->all);
7580       DEBUG_SYNC(entry->thd, "commit_after_prepare_ordered");
7581     }
7582 
7583     if (cur)
7584     {
7585       /*
7586         Now that we have taken LOCK_prepare_ordered and will queue up in the
7587         group commit queue, it is safe for following transactions to queue
7588         themselves. We will grab here any transaction that is now ready to
7589         queue up, but after that, more transactions may become ready while the
7590         leader is waiting to start the group commit. So set the flag
7591         `commit_started', so that later transactions can still participate in
7592         the group commit..
7593       */
7594       cur->commit_started= true;
7595 
7596       /*
7597         Check if this transaction has other transaction waiting for it to
7598         commit.
7599 
7600         If so, process the waiting transactions, and their waiters and so on,
7601         transitively.
7602       */
7603       if (cur->subsequent_commits_list)
7604       {
7605         wait_for_commit *waiter, **waiter_ptr;
7606 
7607         mysql_mutex_lock(&cur->LOCK_wait_commit);
7608         /*
7609           Grab the list, now safely under lock, and process it if still
7610           non-empty.
7611         */
7612         waiter= cur->subsequent_commits_list;
7613         waiter_ptr= &cur->subsequent_commits_list;
7614         while (waiter)
7615         {
7616           wait_for_commit *next_waiter= waiter->next_subsequent_commit;
7617           group_commit_entry *entry2=
7618             (group_commit_entry *)waiter->opaque_pointer;
7619           if (entry2)
7620           {
7621             /*
7622               This is another transaction ready to be written to the binary
7623               log. We can put it into the queue directly, without needing a
7624               separate context switch to the other thread. We just set a flag
7625               so that the other thread will know when it wakes up that it was
7626               already processed.
7627 
7628               So remove it from the list of our waiters, and instead put it at
7629               the end of the list to be processed in a subsequent iteration of
7630               the outer loop.
7631             */
7632             *waiter_ptr= next_waiter;
7633             entry2->queued_by_other= true;
7634             last->next= entry2;
7635             last= entry2;
7636             /*
7637               As a small optimisation, we do not actually need to set
7638               entry2->next to NULL, as we can use the pointer `last' to check
7639               for end-of-list.
7640             */
7641           }
7642           else
7643           {
7644             /*
7645               This transaction is not ready to participate in the group commit
7646               yet, so leave it in the waiter list. It might join the group
7647               commit later, if it completes soon enough to do so (it will see
7648               our wfc->commit_started flag set), or it might commit later in a
7649               later group commit.
7650             */
7651             waiter_ptr= &waiter->next_subsequent_commit;
7652           }
7653           waiter= next_waiter;
7654         }
7655         mysql_mutex_unlock(&cur->LOCK_wait_commit);
7656       }
7657     }
7658 
7659     /*
7660       Handle the heuristics that if another transaction is waiting for this
7661       transaction (or if it does so later), then we want to trigger group
7662       commit immediately, without waiting for the binlog_commit_wait_usec
7663       timeout to expire.
7664     */
7665     entry->thd->waiting_on_group_commit= true;
7666 
7667     /* Add the entry to the group commit queue. */
7668     next_entry= entry->next;
7669     entry->next= group_commit_queue;
7670     group_commit_queue= entry;
7671     if (entry == last)
7672       break;
7673     /*
7674       Move to the next entry in the flattened list of waiting transactions
7675       that still need to be processed transitively.
7676     */
7677     entry= next_entry;
7678     DBUG_ASSERT(entry != NULL);
7679     cur= entry->thd->wait_for_commit_ptr;
7680   }
7681 
7682   if (opt_binlog_commit_wait_count > 0 && orig_queue != NULL)
7683     mysql_cond_signal(&COND_prepare_ordered);
7684   mysql_mutex_unlock(&LOCK_prepare_ordered);
7685   DEBUG_SYNC(orig_entry->thd, "commit_after_release_LOCK_prepare_ordered");
7686 
7687   DBUG_PRINT("info", ("Queued for group commit as %s",
7688                       (orig_queue == NULL) ? "leader" : "participant"));
7689   DBUG_RETURN(orig_queue == NULL);
7690 }
7691 
7692 bool
7693 MYSQL_BIN_LOG::write_transaction_to_binlog_events(group_commit_entry *entry)
7694 {
7695   int is_leader= queue_for_group_commit(entry);
7696 
7697   /*
7698     The first in the queue handles group commit for all; the others just wait
7699     to be signalled when group commit is done.
7700   */
7701   if (is_leader < 0)
7702     return true;                                /* Error */
7703   else if (is_leader)
7704     trx_group_commit_leader(entry);
7705   else if (!entry->queued_by_other)
7706   {
7707     DEBUG_SYNC(entry->thd, "after_semisync_queue");
7708 
7709     entry->thd->wait_for_wakeup_ready();
7710   }
7711   else
7712   {
7713     /*
7714       If we were queued by another prior commit, then we are woken up
7715       only when the leader has already completed the commit for us.
7716       So nothing to do here then.
7717     */
7718   }
7719 
7720   if (!opt_optimize_thread_scheduling)
7721   {
7722     /* For the leader, trx_group_commit_leader() already took the lock. */
7723     if (!is_leader)
7724       mysql_mutex_lock(&LOCK_commit_ordered);
7725 
7726     DEBUG_SYNC(entry->thd, "commit_loop_entry_commit_ordered");
7727     ++num_commits;
7728     if (entry->cache_mngr->using_xa && !entry->error)
7729       run_commit_ordered(entry->thd, entry->all);
7730 
7731     group_commit_entry *next= entry->next;
7732     if (!next)
7733     {
7734       group_commit_queue_busy= FALSE;
7735       mysql_cond_signal(&COND_queue_busy);
7736       DEBUG_SYNC(entry->thd, "commit_after_group_run_commit_ordered");
7737     }
7738     mysql_mutex_unlock(&LOCK_commit_ordered);
7739     entry->thd->wakeup_subsequent_commits(entry->error);
7740 
7741     if (next)
7742     {
7743       /*
7744         Wake up the next thread in the group commit.
7745 
7746         The next thread can be waiting in two different ways, depending on
7747         whether it put itself in the queue, or if it was put in queue by us
7748         because it had to wait for us to commit first.
7749 
7750         So execute the appropriate wakeup, identified by the queued_by_other
7751         field.
7752       */
7753       if (next->queued_by_other)
7754         next->thd->wait_for_commit_ptr->wakeup(entry->error);
7755       else
7756         next->thd->signal_wakeup_ready();
7757     }
7758     else
7759     {
7760       /*
7761         If we rotated the binlog, and if we are using the unoptimized thread
7762         scheduling where every thread runs its own commit_ordered(), then we
7763         must do the commit checkpoint and log purge here, after all
7764         commit_ordered() calls have finished, and locks have been released.
7765       */
7766       if (entry->check_purge)
7767         checkpoint_and_purge(entry->binlog_id);
7768     }
7769 
7770   }
7771 
7772   if (likely(!entry->error))
7773     return entry->thd->wait_for_prior_commit();
7774 
7775   switch (entry->error)
7776   {
7777   case ER_ERROR_ON_WRITE:
7778     my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, entry->commit_errno);
7779     break;
7780   case ER_ERROR_ON_READ:
7781     my_error(ER_ERROR_ON_READ, MYF(ME_NOREFRESH),
7782              entry->error_cache->file_name, entry->commit_errno);
7783     break;
7784   default:
7785     /*
7786       There are not (and should not be) any errors thrown not covered above.
7787       But just in case one is added later without updating the above switch
7788       statement, include a catch-all.
7789     */
7790     my_printf_error(entry->error,
7791                     "Error writing transaction to binary log: %d",
7792                     MYF(ME_NOREFRESH), entry->error);
7793   }
7794 
7795   /*
7796     Since we return error, this transaction XID will not be committed, so
7797     we need to mark it as not needed for recovery (unlog() is not called
7798     for a transaction if log_xid() fails).
7799   */
7800   if (entry->cache_mngr->using_xa && entry->cache_mngr->xa_xid &&
7801       entry->cache_mngr->need_unlog)
7802     mark_xid_done(entry->cache_mngr->binlog_id, true);
7803 
7804   return 1;
7805 }
7806 
7807 /*
7808   Do binlog group commit as the lead thread.
7809 
7810   This must be called when this statement/transaction is queued at the start of
7811   the group_commit_queue. It will wait to obtain the LOCK_log mutex, then group
7812   commit all the transactions in the queue (more may have entered while waiting
7813   for LOCK_log). After commit is done, all other threads in the queue will be
7814   signalled.
7815 
7816  */
7817 void
7818 MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
7819 {
7820   uint xid_count= 0;
7821   my_off_t UNINIT_VAR(commit_offset);
7822   group_commit_entry *current, *last_in_queue;
7823   group_commit_entry *queue= NULL;
7824   bool check_purge= false;
7825   ulong UNINIT_VAR(binlog_id);
7826   uint64 commit_id;
7827   DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader");
7828 
7829   {
7830     DBUG_EXECUTE_IF("inject_binlog_commit_before_get_LOCK_log",
7831       DBUG_ASSERT(!debug_sync_set_action(leader->thd, STRING_WITH_LEN
7832         ("commit_before_get_LOCK_log SIGNAL waiting WAIT_FOR cont TIMEOUT 1")));
7833     );
7834     /*
7835       Lock the LOCK_log(), and once we get it, collect any additional writes
7836       that queued up while we were waiting.
7837     */
7838     DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_log");
7839     mysql_mutex_lock(&LOCK_log);
7840     DEBUG_SYNC(leader->thd, "commit_after_get_LOCK_log");
7841 
7842     mysql_mutex_lock(&LOCK_prepare_ordered);
7843     if (opt_binlog_commit_wait_count)
7844       wait_for_sufficient_commits();
7845     /*
7846       Note that wait_for_sufficient_commits() may have released and
7847       re-acquired the LOCK_log and LOCK_prepare_ordered if it needed to wait.
7848     */
7849     current= group_commit_queue;
7850     group_commit_queue= NULL;
7851     mysql_mutex_unlock(&LOCK_prepare_ordered);
7852     binlog_id= current_binlog_id;
7853 
7854     /* As the queue is in reverse order of entering, reverse it. */
7855     last_in_queue= current;
7856     while (current)
7857     {
7858       group_commit_entry *next= current->next;
7859       /*
7860         Now that group commit is started, we can clear the flag; there is no
7861         longer any use in waiters on this commit trying to trigger it early.
7862       */
7863       current->thd->waiting_on_group_commit= false;
7864       current->next= queue;
7865       queue= current;
7866       current= next;
7867     }
7868     DBUG_ASSERT(leader == queue /* the leader should be first in queue */);
7869 
7870     /* Now we have in queue the list of transactions to be committed in order. */
7871   }
7872 
7873   DBUG_ASSERT(is_open());
7874   if (likely(is_open()))                       // Should always be true
7875   {
7876     commit_id= (last_in_queue == leader ? 0 : (uint64)leader->thd->query_id);
7877     DBUG_EXECUTE_IF("binlog_force_commit_id",
7878       {
7879         const LEX_CSTRING commit_name= { STRING_WITH_LEN("commit_id") };
7880         bool null_value;
7881         user_var_entry *entry=
7882           (user_var_entry*) my_hash_search(&leader->thd->user_vars,
7883                                            (uchar*) commit_name.str,
7884                                            commit_name.length);
7885         commit_id= entry->val_int(&null_value);
7886       });
7887     /*
7888       Commit every transaction in the queue.
7889 
7890       Note that we are doing this in a different thread than the one running
7891       the transaction! So we are limited in the operations we can do. In
7892       particular, we cannot call my_error() on behalf of a transaction, as
7893       that obtains the THD from thread local storage. Instead, we must set
7894       current->error and let the thread do the error reporting itself once
7895       we wake it up.
7896     */
7897     for (current= queue; current != NULL; current= current->next)
7898     {
7899       set_current_thd(current->thd);
7900       binlog_cache_mngr *cache_mngr= current->cache_mngr;
7901 
7902       /*
7903         We already checked before that at least one cache is non-empty; if both
7904         are empty we would have skipped calling into here.
7905       */
7906       DBUG_ASSERT(!cache_mngr->stmt_cache.empty() || !cache_mngr->trx_cache.empty());
7907 
7908       if (unlikely((current->error= write_transaction_or_stmt(current,
7909                                                               commit_id))))
7910         current->commit_errno= errno;
7911 
7912       strmake_buf(cache_mngr->last_commit_pos_file, log_file_name);
7913       commit_offset= my_b_write_tell(&log_file);
7914       cache_mngr->last_commit_pos_offset= commit_offset;
7915       if (cache_mngr->using_xa && cache_mngr->xa_xid)
7916       {
7917         /*
7918           If all storage engines support commit_checkpoint_request(), then we
7919           do not need to keep track of when this XID is durably committed.
7920           Instead we will just ask the storage engine to durably commit all its
7921           XIDs when we rotate a binlog file.
7922         */
7923         if (current->need_unlog)
7924         {
7925           xid_count++;
7926           cache_mngr->need_unlog= true;
7927           cache_mngr->binlog_id= binlog_id;
7928         }
7929         else
7930           cache_mngr->need_unlog= false;
7931 
7932         cache_mngr->delayed_error= false;
7933       }
7934     }
7935     set_current_thd(leader->thd);
7936 
7937     bool synced= 0;
7938     if (unlikely(flush_and_sync(&synced)))
7939     {
7940       for (current= queue; current != NULL; current= current->next)
7941       {
7942         if (!current->error)
7943         {
7944           current->error= ER_ERROR_ON_WRITE;
7945           current->commit_errno= errno;
7946           current->error_cache= NULL;
7947         }
7948       }
7949     }
7950     else
7951     {
7952       bool any_error= false;
7953 
7954       mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
7955       mysql_mutex_assert_owner(&LOCK_log);
7956       mysql_mutex_assert_not_owner(&LOCK_after_binlog_sync);
7957       mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
7958 
7959       for (current= queue; current != NULL; current= current->next)
7960       {
7961 #ifdef HAVE_REPLICATION
7962         if (likely(!current->error) &&
7963             unlikely(repl_semisync_master.
7964                      report_binlog_update(current->thd,
7965                                           current->cache_mngr->
7966                                           last_commit_pos_file,
7967                                           current->cache_mngr->
7968                                           last_commit_pos_offset)))
7969         {
7970           current->error= ER_ERROR_ON_WRITE;
7971           current->commit_errno= -1;
7972           current->error_cache= NULL;
7973           any_error= true;
7974         }
7975 #endif
7976       }
7977 
7978       /*
7979         update binlog_end_pos so it can be read by dump thread
7980         Note: must be _after_ the RUN_HOOK(after_flush) or else
7981         semi-sync might not have put the transaction into
7982         it's list before dump-thread tries to send it
7983       */
7984       update_binlog_end_pos(commit_offset);
7985 
7986       if (unlikely(any_error))
7987         sql_print_error("Failed to run 'after_flush' hooks");
7988     }
7989 
7990     /*
7991       If any commit_events are Xid_log_event, increase the number of pending
7992       XIDs in current binlog (it's decreased in ::unlog()). When the count in
7993       a (not active) binlog file reaches zero, we know that it is no longer
7994       needed in XA recovery, and we can log a new binlog checkpoint event.
7995     */
7996     if (xid_count > 0)
7997     {
7998       mark_xids_active(binlog_id, xid_count);
7999     }
8000 
8001     /*
8002       If a transaction with the LOAD DATA statement is divided
8003       into logical mini-transactions (of the 10K rows) and binlog
8004       is rotated, then the last portion of data may be lost due to
8005       wsrep handler re-registration at the boundary of the split.
8006       Since splitting of the LOAD DATA into mini-transactions is
8007       logical, we should not allow these mini-transactions to fall
8008       into separate binlogs. Therefore, it is necessary to prohibit
8009       the rotation of binlog in the middle of processing LOAD DATA:
8010     */
8011 #ifdef WITH_WSREP
8012     if (!leader->thd->wsrep_split_flag)
8013     {
8014 #endif /* WITH_WSREP */
8015     if (rotate(false, &check_purge))
8016     {
8017       /*
8018         If we fail to rotate, which thread should get the error?
8019         We give the error to the leader, as any my_error() thrown inside
8020         rotate() will have been registered for the leader THD.
8021 
8022         However we must not return error from here - that would cause
8023         ha_commit_trans() to abort and rollback the transaction, which would
8024         leave an inconsistent state with the transaction committed in the
8025         binlog but rolled back in the engine.
8026 
8027         Instead set a flag so that we can return error later, from unlog(),
8028         when the transaction has been safely committed in the engine.
8029       */
8030       leader->cache_mngr->delayed_error= true;
8031       my_error(ER_ERROR_ON_WRITE, MYF(ME_NOREFRESH), name, errno);
8032       check_purge= false;
8033     }
8034 #ifdef WITH_WSREP
8035     }
8036 #endif /* WITH_WSREP */
8037     /* In case of binlog rotate, update the correct current binlog offset. */
8038     commit_offset= my_b_write_tell(&log_file);
8039   }
8040 
8041   DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_after_binlog_sync");
8042   mysql_mutex_lock(&LOCK_after_binlog_sync);
8043   /*
8044     We cannot unlock LOCK_log until we have locked LOCK_after_binlog_sync;
8045     otherwise scheduling could allow the next group commit to run ahead of us,
8046     messing up the order of commit_ordered() calls. But as soon as
8047     LOCK_after_binlog_sync is obtained, we can let the next group commit start.
8048   */
8049   mysql_mutex_unlock(&LOCK_log);
8050 
8051   DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_log");
8052 
8053   /*
8054     Loop through threads and run the binlog_sync hook
8055   */
8056   {
8057     mysql_mutex_assert_not_owner(&LOCK_prepare_ordered);
8058     mysql_mutex_assert_not_owner(&LOCK_log);
8059     mysql_mutex_assert_owner(&LOCK_after_binlog_sync);
8060     mysql_mutex_assert_not_owner(&LOCK_commit_ordered);
8061 
8062     bool first __attribute__((unused))= true;
8063     bool last __attribute__((unused));
8064     for (current= queue; current != NULL; current= current->next)
8065     {
8066       last= current->next == NULL;
8067 #ifdef HAVE_REPLICATION
8068       if (likely(!current->error))
8069         current->error=
8070           repl_semisync_master.wait_after_sync(current->cache_mngr->
8071                                                last_commit_pos_file,
8072                                                current->cache_mngr->
8073                                                last_commit_pos_offset);
8074 #endif
8075       first= false;
8076     }
8077   }
8078 
8079   DEBUG_SYNC(leader->thd, "commit_before_get_LOCK_commit_ordered");
8080 
8081   mysql_mutex_lock(&LOCK_commit_ordered);
8082   DBUG_EXECUTE_IF("crash_before_engine_commit",
8083       {
8084         DBUG_SUICIDE();
8085       });
8086   last_commit_pos_offset= commit_offset;
8087 
8088   /*
8089     Unlock LOCK_after_binlog_sync only *after* LOCK_commit_ordered has been
8090     acquired so that groups can not reorder for the different stages of
8091     the group commit procedure.
8092   */
8093   mysql_mutex_unlock(&LOCK_after_binlog_sync);
8094   DEBUG_SYNC(leader->thd, "commit_after_release_LOCK_after_binlog_sync");
8095   ++num_group_commits;
8096 
8097   if (!opt_optimize_thread_scheduling)
8098   {
8099     /*
8100       If we want to run commit_ordered() each in the transaction's own thread
8101       context, then we need to mark the queue reserved; we need to finish all
8102       threads in one group commit before the next group commit can be allowed
8103       to proceed, and we cannot unlock a simple pthreads mutex in a different
8104       thread from the one that locked it.
8105     */
8106 
8107     while (group_commit_queue_busy)
8108       mysql_cond_wait(&COND_queue_busy, &LOCK_commit_ordered);
8109     group_commit_queue_busy= TRUE;
8110 
8111     /*
8112       Set these so parent can run checkpoint_and_purge() in last thread.
8113       (When using optimized thread scheduling, we run checkpoint_and_purge()
8114       in this function, so parent does not need to and we need not set these
8115       values).
8116     */
8117     last_in_queue->check_purge= check_purge;
8118     last_in_queue->binlog_id= binlog_id;
8119 
8120     /* Note that we return with LOCK_commit_ordered locked! */
8121     DBUG_VOID_RETURN;
8122   }
8123 
8124   /*
8125     Wakeup each participant waiting for our group commit, first calling the
8126     commit_ordered() methods for any transactions doing 2-phase commit.
8127   */
8128   current= queue;
8129   while (current != NULL)
8130   {
8131     group_commit_entry *next;
8132 
8133     DEBUG_SYNC(leader->thd, "commit_loop_entry_commit_ordered");
8134     ++num_commits;
8135     if (current->cache_mngr->using_xa && likely(!current->error) &&
8136         DBUG_EVALUATE_IF("skip_commit_ordered", 0, 1))
8137       run_commit_ordered(current->thd, current->all);
8138     current->thd->wakeup_subsequent_commits(current->error);
8139 
8140     /*
8141       Careful not to access current->next after waking up the other thread! As
8142       it may change immediately after wakeup.
8143     */
8144     next= current->next;
8145     if (current != leader)                      // Don't wake up ourself
8146     {
8147       if (current->queued_by_other)
8148         current->thd->wait_for_commit_ptr->wakeup(current->error);
8149       else
8150         current->thd->signal_wakeup_ready();
8151     }
8152     current= next;
8153   }
8154   DEBUG_SYNC(leader->thd, "commit_after_group_run_commit_ordered");
8155   mysql_mutex_unlock(&LOCK_commit_ordered);
8156   DEBUG_SYNC(leader->thd, "commit_after_group_release_commit_ordered");
8157 
8158   if (check_purge)
8159     checkpoint_and_purge(binlog_id);
8160 
8161   DBUG_VOID_RETURN;
8162 }
8163 
8164 
8165 int
8166 MYSQL_BIN_LOG::write_transaction_or_stmt(group_commit_entry *entry,
8167                                          uint64 commit_id)
8168 {
8169   binlog_cache_mngr *mngr= entry->cache_mngr;
8170   DBUG_ENTER("MYSQL_BIN_LOG::write_transaction_or_stmt");
8171 
8172   if (write_gtid_event(entry->thd, false, entry->using_trx_cache, commit_id))
8173     DBUG_RETURN(ER_ERROR_ON_WRITE);
8174 
8175   if (entry->using_stmt_cache && !mngr->stmt_cache.empty() &&
8176       write_cache(entry->thd, mngr->get_binlog_cache_log(FALSE)))
8177   {
8178     entry->error_cache= &mngr->stmt_cache.cache_log;
8179     DBUG_RETURN(ER_ERROR_ON_WRITE);
8180   }
8181 
8182   if (entry->using_trx_cache && !mngr->trx_cache.empty())
8183   {
8184     DBUG_EXECUTE_IF("crash_before_writing_xid",
8185                     {
8186                       if ((write_cache(entry->thd,
8187                                        mngr->get_binlog_cache_log(TRUE))))
8188                         DBUG_PRINT("info", ("error writing binlog cache"));
8189                       else
8190                         flush_and_sync(0);
8191 
8192                       DBUG_PRINT("info", ("crashing before writing xid"));
8193                       DBUG_SUICIDE();
8194                     });
8195 
8196     if (write_cache(entry->thd, mngr->get_binlog_cache_log(TRUE)))
8197     {
8198       entry->error_cache= &mngr->trx_cache.cache_log;
8199       DBUG_RETURN(ER_ERROR_ON_WRITE);
8200     }
8201   }
8202 
8203   DBUG_EXECUTE_IF("inject_error_writing_xid",
8204                   {
8205                     entry->error_cache= NULL;
8206                     errno= 28;
8207                     DBUG_RETURN(ER_ERROR_ON_WRITE);
8208                   });
8209 
8210   if (write_event(entry->end_event))
8211   {
8212     entry->error_cache= NULL;
8213     DBUG_RETURN(ER_ERROR_ON_WRITE);
8214   }
8215   status_var_add(entry->thd->status_var.binlog_bytes_written,
8216                  entry->end_event->data_written);
8217 
8218   if (entry->incident_event)
8219   {
8220     if (write_event(entry->incident_event))
8221     {
8222       entry->error_cache= NULL;
8223       DBUG_RETURN(ER_ERROR_ON_WRITE);
8224     }
8225   }
8226 
8227   if (unlikely(mngr->get_binlog_cache_log(FALSE)->error))
8228   {
8229     entry->error_cache= &mngr->stmt_cache.cache_log;
8230     DBUG_RETURN(ER_ERROR_ON_WRITE);
8231   }
8232   if (unlikely(mngr->get_binlog_cache_log(TRUE)->error))  // Error on read
8233   {
8234     entry->error_cache= &mngr->trx_cache.cache_log;
8235     DBUG_RETURN(ER_ERROR_ON_WRITE);
8236   }
8237 
8238   DBUG_RETURN(0);
8239 }
8240 
8241 
8242 /*
8243   Wait for sufficient commits to queue up for group commit, according to the
8244   values of binlog_commit_wait_count and binlog_commit_wait_usec.
8245 
8246   Note that this function may release and re-acquire LOCK_log and
8247   LOCK_prepare_ordered if it needs to wait.
8248 */
8249 
8250 void
8251 MYSQL_BIN_LOG::wait_for_sufficient_commits()
8252 {
8253   size_t count;
8254   group_commit_entry *e;
8255   group_commit_entry *last_head;
8256   struct timespec wait_until;
8257 
8258   mysql_mutex_assert_owner(&LOCK_log);
8259   mysql_mutex_assert_owner(&LOCK_prepare_ordered);
8260 
8261   for (e= last_head= group_commit_queue, count= 0; e; e= e->next)
8262   {
8263     if (++count >= opt_binlog_commit_wait_count)
8264     {
8265       group_commit_trigger_count++;
8266       return;
8267     }
8268     if (unlikely(e->thd->has_waiter))
8269     {
8270       group_commit_trigger_lock_wait++;
8271       return;
8272     }
8273   }
8274 
8275   mysql_mutex_unlock(&LOCK_log);
8276   set_timespec_nsec(wait_until, (ulonglong)1000*opt_binlog_commit_wait_usec);
8277 
8278   for (;;)
8279   {
8280     int err;
8281     group_commit_entry *head;
8282 
8283     err= mysql_cond_timedwait(&COND_prepare_ordered, &LOCK_prepare_ordered,
8284                               &wait_until);
8285     if (err == ETIMEDOUT)
8286     {
8287       group_commit_trigger_timeout++;
8288       break;
8289     }
8290     if (unlikely(last_head->thd->has_waiter))
8291     {
8292       group_commit_trigger_lock_wait++;
8293       break;
8294     }
8295     head= group_commit_queue;
8296     for (e= head; e && e != last_head; e= e->next)
8297     {
8298       ++count;
8299       if (unlikely(e->thd->has_waiter))
8300       {
8301         group_commit_trigger_lock_wait++;
8302         goto after_loop;
8303       }
8304     }
8305     if (count >= opt_binlog_commit_wait_count)
8306     {
8307       group_commit_trigger_count++;
8308       break;
8309     }
8310     last_head= head;
8311   }
8312 after_loop:
8313 
8314   /*
8315     We must not wait for LOCK_log while holding LOCK_prepare_ordered.
8316     LOCK_log can be held for long periods (eg. we do I/O under it), while
8317     LOCK_prepare_ordered must only be held for short periods.
8318 
8319     In addition, waiting for LOCK_log while holding LOCK_prepare_ordered would
8320     violate locking order of LOCK_log-before-LOCK_prepare_ordered. This could
8321     cause SAFEMUTEX warnings (even if it cannot actually deadlock with current
8322     code, as there can be at most one group commit leader thread at a time).
8323 
8324     So release and re-acquire LOCK_prepare_ordered if we need to wait for the
8325     LOCK_log.
8326   */
8327   if (mysql_mutex_trylock(&LOCK_log))
8328   {
8329     mysql_mutex_unlock(&LOCK_prepare_ordered);
8330     mysql_mutex_lock(&LOCK_log);
8331     mysql_mutex_lock(&LOCK_prepare_ordered);
8332   }
8333 }
8334 
8335 
8336 void
8337 MYSQL_BIN_LOG::binlog_trigger_immediate_group_commit()
8338 {
8339   group_commit_entry *head;
8340   mysql_mutex_assert_owner(&LOCK_prepare_ordered);
8341   head= group_commit_queue;
8342   if (head)
8343   {
8344     head->thd->has_waiter= true;
8345     mysql_cond_signal(&COND_prepare_ordered);
8346   }
8347 }
8348 
8349 
8350 /*
8351   This function is called when a transaction T1 goes to wait for another
8352   transaction T2. It is used to cut short any binlog group commit delay from
8353   --binlog-commit-wait-count in the case where another transaction is stalled
8354   on the wait due to conflicting row locks.
8355 
8356   If T2 is already ready to group commit, any waiting group commit will be
8357   signalled to proceed immediately. Otherwise, a flag will be set in T2, and
8358   when T2 later becomes ready, immediate group commit will be triggered.
8359 */
8360 void
8361 binlog_report_wait_for(THD *thd1, THD *thd2)
8362 {
8363   if (opt_binlog_commit_wait_count == 0)
8364     return;
8365   mysql_mutex_lock(&LOCK_prepare_ordered);
8366   thd2->has_waiter= true;
8367   if (thd2->waiting_on_group_commit)
8368     mysql_bin_log.binlog_trigger_immediate_group_commit();
8369   mysql_mutex_unlock(&LOCK_prepare_ordered);
8370 }
8371 
8372 
8373 /**
8374   Wait until we get a signal that the relay log has been updated.
8375 
8376   @param thd		Thread variable
8377 
8378   @note
8379     One must have a lock on LOCK_log before calling this function.
8380     This lock will be released before return! That's required by
8381     THD::enter_cond() (see NOTES in sql_class.h).
8382 */
8383 
8384 void MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd)
8385 {
8386   PSI_stage_info old_stage;
8387   DBUG_ENTER("wait_for_update_relay_log");
8388 
8389   mysql_mutex_assert_owner(&LOCK_log);
8390   thd->ENTER_COND(&COND_relay_log_updated, &LOCK_log,
8391                   &stage_slave_has_read_all_relay_log,
8392                   &old_stage);
8393   mysql_cond_wait(&COND_relay_log_updated, &LOCK_log);
8394   thd->EXIT_COND(&old_stage);
8395   DBUG_VOID_RETURN;
8396 }
8397 
8398 /**
8399   Wait until we get a signal that the binary log has been updated.
8400   Applies to master only.
8401 
8402   NOTES
8403   @param[in] thd        a THD struct
8404   @param[in] timeout    a pointer to a timespec;
8405                         NULL means to wait w/o timeout.
8406   @retval    0          if got signalled on update
8407   @retval    non-0      if wait timeout elapsed
8408   @note
8409     LOCK_log must be taken before calling this function.
8410     LOCK_log is being released while the thread is waiting.
8411     LOCK_log is released by the caller.
8412 */
8413 
8414 int MYSQL_BIN_LOG::wait_for_update_binlog_end_pos(THD* thd,
8415                                                   struct timespec *timeout)
8416 {
8417   int ret= 0;
8418   DBUG_ENTER("wait_for_update_binlog_end_pos");
8419 
8420   thd_wait_begin(thd, THD_WAIT_BINLOG);
8421   mysql_mutex_assert_owner(get_binlog_end_pos_lock());
8422   if (!timeout)
8423     mysql_cond_wait(&COND_bin_log_updated, get_binlog_end_pos_lock());
8424   else
8425     ret= mysql_cond_timedwait(&COND_bin_log_updated, get_binlog_end_pos_lock(),
8426                               timeout);
8427   thd_wait_end(thd);
8428   DBUG_RETURN(ret);
8429 }
8430 
8431 
8432 /**
8433   Close the log file.
8434 
8435   @param exiting     Bitmask for one or more of the following bits:
8436           - LOG_CLOSE_INDEX : if we should close the index file
8437           - LOG_CLOSE_TO_BE_OPENED : if we intend to call open
8438                                      at once after close.
8439           - LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
8440           - LOG_CLOSE_DELAYED_CLOSE : do not yet close the file and clear the
8441                                       LOG_EVENT_BINLOG_IN_USE_F flag
8442 
8443   @note
8444     One can do an open on the object at once after doing a close.
8445     The internal structures are not freed until cleanup() is called
8446 */
8447 
8448 void MYSQL_BIN_LOG::close(uint exiting)
8449 {					// One can't set log_type here!
8450   bool failed_to_save_state= false;
8451   DBUG_ENTER("MYSQL_BIN_LOG::close");
8452   DBUG_PRINT("enter",("exiting: %d", (int) exiting));
8453 
8454   mysql_mutex_assert_owner(&LOCK_log);
8455 
8456   if (log_state == LOG_OPENED)
8457   {
8458 #ifdef HAVE_REPLICATION
8459     if (log_type == LOG_BIN &&
8460 	(exiting & LOG_CLOSE_STOP_EVENT))
8461     {
8462       Stop_log_event s;
8463       // the checksumming rule for relay-log case is similar to Rotate
8464         s.checksum_alg= is_relay_log ? relay_log_checksum_alg
8465                                      : (enum_binlog_checksum_alg)binlog_checksum_options;
8466       DBUG_ASSERT(!is_relay_log ||
8467                   relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
8468       write_event(&s);
8469       bytes_written+= s.data_written;
8470       flush_io_cache(&log_file);
8471       update_binlog_end_pos();
8472 
8473       /*
8474         When we shut down server, write out the binlog state to a separate
8475         file so we do not have to scan an entire binlog file to recover it
8476         at next server start.
8477 
8478         Note that this must be written and synced to disk before marking the
8479         last binlog file as "not crashed".
8480       */
8481       if (!is_relay_log && write_state_to_file())
8482       {
8483         sql_print_error("Failed to save binlog GTID state during shutdown. "
8484                         "Binlog will be marked as crashed, so that crash "
8485                         "recovery can recover the state at next server "
8486                         "startup.");
8487         /*
8488           Leave binlog file marked as crashed, so we can recover state by
8489           scanning it now that we failed to write out the state properly.
8490         */
8491         failed_to_save_state= true;
8492       }
8493     }
8494 #endif /* HAVE_REPLICATION */
8495 
8496     /* don't pwrite in a file opened with O_APPEND - it doesn't work */
8497     if (log_file.type == WRITE_CACHE && log_type == LOG_BIN
8498         && !(exiting & LOG_CLOSE_DELAYED_CLOSE))
8499     {
8500       my_off_t org_position= mysql_file_tell(log_file.file, MYF(0));
8501       if (!failed_to_save_state)
8502         clear_inuse_flag_when_closing(log_file.file);
8503       /*
8504         Restore position so that anything we have in the IO_cache is written
8505         to the correct position.
8506         We need the seek here, as mysql_file_pwrite() is not guaranteed to keep the
8507         original position on system that doesn't support pwrite().
8508       */
8509       mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
8510     }
8511 
8512     /* this will cleanup IO_CACHE, sync and close the file */
8513     MYSQL_LOG::close(exiting);
8514   }
8515 
8516   /*
8517     The following test is needed even if is_open() is not set, as we may have
8518     called a not complete close earlier and the index file is still open.
8519   */
8520 
8521   if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
8522   {
8523     end_io_cache(&index_file);
8524     if (unlikely(mysql_file_close(index_file.file, MYF(0)) < 0) &&
8525         ! write_error)
8526     {
8527       write_error= 1;
8528       sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), index_file_name, errno);
8529     }
8530   }
8531   log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
8532   my_free(name);
8533   name= NULL;
8534   DBUG_VOID_RETURN;
8535 }
8536 
8537 
8538 /*
8539   Clear the LOG_EVENT_BINLOG_IN_USE_F; this marks the binlog file as cleanly
8540   closed and not needing crash recovery.
8541 */
8542 void MYSQL_BIN_LOG::clear_inuse_flag_when_closing(File file)
8543 {
8544   my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
8545   uchar flags= 0;            // clearing LOG_EVENT_BINLOG_IN_USE_F
8546   mysql_file_pwrite(file, &flags, 1, offset, MYF(0));
8547 }
8548 
8549 
8550 void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg)
8551 {
8552   /*
8553     We need to take locks, otherwise this may happen:
8554     new_file() is called, calls open(old_max_size), then before open() starts,
8555     set_max_size() sets max_size to max_size_arg, then open() starts and
8556     uses the old_max_size argument, so max_size_arg has been overwritten and
8557     it's like if the SET command was never run.
8558   */
8559   DBUG_ENTER("MYSQL_BIN_LOG::set_max_size");
8560   mysql_mutex_lock(&LOCK_log);
8561   if (is_open())
8562     max_size= max_size_arg;
8563   mysql_mutex_unlock(&LOCK_log);
8564   DBUG_VOID_RETURN;
8565 }
8566 
8567 
8568 /**
8569   Check if a string is a valid number.
8570 
8571   @param str			String to test
8572   @param res			Store value here
8573   @param allow_wildcards	Set to 1 if we should ignore '%' and '_'
8574 
8575   @note
8576     For the moment the allow_wildcards argument is not used
8577     Should be move to some other file.
8578 
8579   @retval
8580     1	String is a number
8581   @retval
8582     0	String is not a number
8583 */
8584 
8585 static bool test_if_number(const char *str, ulong *res, bool allow_wildcards)
8586 {
8587   int flag;
8588   const char *start;
8589   DBUG_ENTER("test_if_number");
8590 
8591   flag=0; start=str;
8592   while (*str++ == ' ') ;
8593   if (*--str == '-' || *str == '+')
8594     str++;
8595   while (my_isdigit(files_charset_info,*str) ||
8596 	 (allow_wildcards && (*str == wild_many || *str == wild_one)))
8597   {
8598     flag=1;
8599     str++;
8600   }
8601   if (*str == '.')
8602   {
8603     for (str++ ;
8604 	 my_isdigit(files_charset_info,*str) ||
8605 	   (allow_wildcards && (*str == wild_many || *str == wild_one)) ;
8606 	 str++, flag=1) ;
8607   }
8608   if (*str != 0 || flag == 0)
8609     DBUG_RETURN(0);
8610   if (res)
8611     *res=atol(start);
8612   DBUG_RETURN(1);			/* Number ok */
8613 } /* test_if_number */
8614 
8615 
8616 void sql_perror(const char *message)
8617 {
8618 #if defined(_WIN32)
8619   char* buf;
8620   DWORD dw= GetLastError();
8621   if (FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |  FORMAT_MESSAGE_FROM_SYSTEM |
8622         FORMAT_MESSAGE_IGNORE_INSERTS,  NULL, dw,
8623         MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&buf, 0, NULL ) > 0)
8624   {
8625     sql_print_error("%s: %s",message, buf);
8626     LocalFree((HLOCAL)buf);
8627   }
8628   else
8629   {
8630     sql_print_error("%s", message);
8631   }
8632 #elif defined(HAVE_STRERROR)
8633   sql_print_error("%s: %s",message, strerror(errno));
8634 #else
8635   perror(message);
8636 #endif
8637 }
8638 
8639 
8640 /*
8641   Change the file associated with two output streams. Used to
8642   redirect stdout and stderr to a file. The streams are reopened
8643   only for appending (writing at end of file).
8644 */
8645 bool reopen_fstreams(const char *filename, FILE *outstream, FILE *errstream)
8646 {
8647   if ((outstream && !my_freopen(filename, "a", outstream)) ||
8648       (errstream && !my_freopen(filename, "a", errstream)))
8649   {
8650     my_error(ER_CANT_CREATE_FILE, MYF(0), filename, errno);
8651     return TRUE;
8652   }
8653 
8654   /* The error stream must be unbuffered. */
8655   if (errstream)
8656     setbuf(errstream, NULL);
8657 
8658   return FALSE;
8659 }
8660 
8661 
8662 /*
8663   Unfortunately, there seems to be no good way
8664   to restore the original streams upon failure.
8665 */
8666 static bool redirect_std_streams(const char *file)
8667 {
8668   if (reopen_fstreams(file, stdout, stderr))
8669     return TRUE;
8670 
8671   setbuf(stderr, NULL);
8672   return FALSE;
8673 }
8674 
8675 
8676 bool flush_error_log()
8677 {
8678   bool result= 0;
8679   if (opt_error_log)
8680   {
8681     mysql_mutex_lock(&LOCK_error_log);
8682     if (redirect_std_streams(log_error_file))
8683       result= 1;
8684     mysql_mutex_unlock(&LOCK_error_log);
8685   }
8686   return result;
8687 }
8688 
8689 #ifdef _WIN32
8690 static void print_buffer_to_nt_eventlog(enum loglevel level, char *buff,
8691                                         size_t length, size_t buffLen)
8692 {
8693   HANDLE event;
8694   char   *buffptr= buff;
8695   DBUG_ENTER("print_buffer_to_nt_eventlog");
8696 
8697   /* Add ending CR/LF's to string, overwrite last chars if necessary */
8698   strmov(buffptr+MY_MIN(length, buffLen-5), "\r\n\r\n");
8699 
8700   setup_windows_event_source();
8701   if ((event= RegisterEventSource(NULL,"MySQL")))
8702   {
8703     switch (level) {
8704       case ERROR_LEVEL:
8705         ReportEvent(event, EVENTLOG_ERROR_TYPE, 0, MSG_DEFAULT, NULL, 1, 0,
8706                     (LPCSTR*)&buffptr, NULL);
8707         break;
8708       case WARNING_LEVEL:
8709         ReportEvent(event, EVENTLOG_WARNING_TYPE, 0, MSG_DEFAULT, NULL, 1, 0,
8710                     (LPCSTR*) &buffptr, NULL);
8711         break;
8712       case INFORMATION_LEVEL:
8713         ReportEvent(event, EVENTLOG_INFORMATION_TYPE, 0, MSG_DEFAULT, NULL, 1,
8714                     0, (LPCSTR*) &buffptr, NULL);
8715         break;
8716     }
8717     DeregisterEventSource(event);
8718   }
8719 
8720   DBUG_VOID_RETURN;
8721 }
8722 #endif /* _WIN32 */
8723 
8724 
8725 #ifndef EMBEDDED_LIBRARY
8726 static void print_buffer_to_file(enum loglevel level, const char *buffer,
8727                                  size_t length)
8728 {
8729   time_t skr;
8730   struct tm tm_tmp;
8731   struct tm *start;
8732   THD *thd= 0;
8733   size_t tag_length= 0;
8734   char tag[NAME_LEN];
8735   DBUG_ENTER("print_buffer_to_file");
8736   DBUG_PRINT("enter",("buffer: %s", buffer));
8737 
8738   if (mysqld_server_initialized && (thd= current_thd))
8739   {
8740     if (thd->connection_name.length)
8741     {
8742       /*
8743         Add tag for slaves so that the user can see from which connection
8744         the error originates.
8745       */
8746       tag_length= my_snprintf(tag, sizeof(tag),
8747                               ER_THD(thd, ER_MASTER_LOG_PREFIX),
8748                               (int) thd->connection_name.length,
8749                               thd->connection_name.str);
8750     }
8751   }
8752 
8753   mysql_mutex_lock(&LOCK_error_log);
8754 
8755   skr= my_time(0);
8756   localtime_r(&skr, &tm_tmp);
8757   start=&tm_tmp;
8758 
8759   fprintf(stderr, "%d-%02d-%02d %2d:%02d:%02d %lu [%s] %.*s%.*s\n",
8760           start->tm_year + 1900,
8761           start->tm_mon+1,
8762           start->tm_mday,
8763           start->tm_hour,
8764           start->tm_min,
8765           start->tm_sec,
8766           (unsigned long) (thd ? thd->thread_id : 0),
8767           (level == ERROR_LEVEL ? "ERROR" : level == WARNING_LEVEL ?
8768            "Warning" : "Note"),
8769           (int) tag_length, tag,
8770           (int) length, buffer);
8771 
8772   fflush(stderr);
8773 
8774   mysql_mutex_unlock(&LOCK_error_log);
8775   DBUG_VOID_RETURN;
8776 }
8777 
8778 /**
8779   Prints a printf style message to the error log and, under NT, to the
8780   Windows event log.
8781 
8782   This function prints the message into a buffer and then sends that buffer
8783   to other functions to write that message to other logging sources.
8784 
8785   @param level          The level of the msg significance
8786   @param format         Printf style format of message
8787   @param args           va_list list of arguments for the message
8788 
8789   @returns
8790     The function always returns 0. The return value is present in the
8791     signature to be compatible with other logging routines, which could
8792     return an error (e.g. logging to the log tables)
8793 */
8794 int vprint_msg_to_log(enum loglevel level, const char *format, va_list args)
8795 {
8796   char   buff[1024];
8797   size_t length;
8798   DBUG_ENTER("vprint_msg_to_log");
8799 
8800   length= my_vsnprintf(buff, sizeof(buff), format, args);
8801   print_buffer_to_file(level, buff, length);
8802 
8803 #ifdef _WIN32
8804   print_buffer_to_nt_eventlog(level, buff, length, sizeof(buff));
8805 #endif
8806 
8807   DBUG_RETURN(0);
8808 }
8809 #endif /* EMBEDDED_LIBRARY */
8810 
8811 
8812 void sql_print_error(const char *format, ...)
8813 {
8814   va_list args;
8815   DBUG_ENTER("sql_print_error");
8816 
8817   va_start(args, format);
8818   error_log_print(ERROR_LEVEL, format, args);
8819   va_end(args);
8820 
8821   DBUG_VOID_RETURN;
8822 }
8823 
8824 
8825 void sql_print_warning(const char *format, ...)
8826 {
8827   va_list args;
8828   DBUG_ENTER("sql_print_warning");
8829 
8830   va_start(args, format);
8831   error_log_print(WARNING_LEVEL, format, args);
8832   va_end(args);
8833 
8834   DBUG_VOID_RETURN;
8835 }
8836 
8837 
8838 void sql_print_information(const char *format, ...)
8839 {
8840   va_list args;
8841   DBUG_ENTER("sql_print_information");
8842 
8843   va_start(args, format);
8844   sql_print_information_v(format, args);
8845   va_end(args);
8846 
8847   DBUG_VOID_RETURN;
8848 }
8849 
8850 void sql_print_information_v(const char *format, va_list ap)
8851 {
8852   if (disable_log_notes)
8853     return;                 // Skip notes during start/shutdown
8854 
8855   error_log_print(INFORMATION_LEVEL, format, ap);
8856 }
8857 
8858 void
8859 TC_LOG::run_prepare_ordered(THD *thd, bool all)
8860 {
8861   Ha_trx_info *ha_info=
8862     all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
8863 
8864   mysql_mutex_assert_owner(&LOCK_prepare_ordered);
8865   for (; ha_info; ha_info= ha_info->next())
8866   {
8867     handlerton *ht= ha_info->ht();
8868     if (!ht->prepare_ordered)
8869       continue;
8870     ht->prepare_ordered(ht, thd, all);
8871   }
8872 }
8873 
8874 
8875 void
8876 TC_LOG::run_commit_ordered(THD *thd, bool all)
8877 {
8878   Ha_trx_info *ha_info=
8879     all ? thd->transaction.all.ha_list : thd->transaction.stmt.ha_list;
8880 
8881   mysql_mutex_assert_owner(&LOCK_commit_ordered);
8882   for (; ha_info; ha_info= ha_info->next())
8883   {
8884     handlerton *ht= ha_info->ht();
8885     if (!ht->commit_ordered)
8886       continue;
8887     ht->commit_ordered(ht, thd, all);
8888     DEBUG_SYNC(thd, "commit_after_run_commit_ordered");
8889   }
8890 }
8891 
8892 
8893 int TC_LOG_MMAP::log_and_order(THD *thd, my_xid xid, bool all,
8894                                bool need_prepare_ordered,
8895                                bool need_commit_ordered)
8896 {
8897   int cookie;
8898   struct commit_entry entry;
8899   bool UNINIT_VAR(is_group_commit_leader);
8900 
8901   if (need_prepare_ordered)
8902   {
8903     mysql_mutex_lock(&LOCK_prepare_ordered);
8904     run_prepare_ordered(thd, all);
8905     if (need_commit_ordered)
8906     {
8907       /*
8908         Must put us in queue so we can run_commit_ordered() in same sequence
8909         as we did run_prepare_ordered().
8910       */
8911       thd->clear_wakeup_ready();
8912       entry.thd= thd;
8913       commit_entry *previous_queue= commit_ordered_queue;
8914       entry.next= previous_queue;
8915       commit_ordered_queue= &entry;
8916       is_group_commit_leader= (previous_queue == NULL);
8917     }
8918     mysql_mutex_unlock(&LOCK_prepare_ordered);
8919   }
8920 
8921   if (thd->wait_for_prior_commit())
8922     return 0;
8923 
8924   cookie= 0;
8925   if (xid)
8926     cookie= log_one_transaction(xid);
8927 
8928   if (need_commit_ordered)
8929   {
8930     if (need_prepare_ordered)
8931     {
8932       /*
8933         We did the run_prepare_ordered() serialised, then ran the log_xid() in
8934         parallel. Now we have to do run_commit_ordered() serialised in the
8935         same sequence as run_prepare_ordered().
8936 
8937         We do this starting from the head of the queue, each thread doing
8938         run_commit_ordered() and signalling the next in queue.
8939       */
8940       if (is_group_commit_leader)
8941       {
8942         /* The first in queue starts the ball rolling. */
8943         mysql_mutex_lock(&LOCK_prepare_ordered);
8944         while (commit_ordered_queue_busy)
8945           mysql_cond_wait(&COND_queue_busy, &LOCK_prepare_ordered);
8946         commit_entry *queue= commit_ordered_queue;
8947         commit_ordered_queue= NULL;
8948         /*
8949           Mark the queue busy while we bounce it from one thread to the
8950           next.
8951         */
8952         commit_ordered_queue_busy= true;
8953         mysql_mutex_unlock(&LOCK_prepare_ordered);
8954 
8955         /* Reverse the queue list so we get correct order. */
8956         commit_entry *prev= NULL;
8957         while (queue)
8958         {
8959           commit_entry *next= queue->next;
8960           queue->next= prev;
8961           prev= queue;
8962           queue= next;
8963         }
8964         DBUG_ASSERT(prev == &entry && prev->thd == thd);
8965       }
8966       else
8967       {
8968         /* Not first in queue; just wait until previous thread wakes us up. */
8969         thd->wait_for_wakeup_ready();
8970       }
8971     }
8972 
8973     /* Only run commit_ordered() if log_xid was successful. */
8974     if (cookie)
8975     {
8976       mysql_mutex_lock(&LOCK_commit_ordered);
8977       run_commit_ordered(thd, all);
8978       mysql_mutex_unlock(&LOCK_commit_ordered);
8979     }
8980 
8981     if (need_prepare_ordered)
8982     {
8983       commit_entry *next= entry.next;
8984       if (next)
8985       {
8986         next->thd->signal_wakeup_ready();
8987       }
8988       else
8989       {
8990         mysql_mutex_lock(&LOCK_prepare_ordered);
8991         commit_ordered_queue_busy= false;
8992         mysql_cond_signal(&COND_queue_busy);
8993         mysql_mutex_unlock(&LOCK_prepare_ordered);
8994       }
8995     }
8996   }
8997 
8998   return cookie;
8999 }
9000 
9001 
9002 /********* transaction coordinator log for 2pc - mmap() based solution *******/
9003 
9004 /*
9005   the log consists of a file, mapped to memory.
9006   file is divided into pages of tc_log_page_size size.
9007   (usable size of the first page is smaller because of the log header)
9008   there is a PAGE control structure for each page
9009   each page (or rather its PAGE control structure) can be in one of
9010   the three states - active, syncing, pool.
9011   there could be only one page in the active or syncing state,
9012   but many in pool - pool is a fifo queue.
9013   the usual lifecycle of a page is pool->active->syncing->pool.
9014   the "active" page is a page where new xid's are logged.
9015   the page stays active as long as the syncing slot is taken.
9016   the "syncing" page is being synced to disk. no new xid can be added to it.
9017   when the syncing is done the page is moved to a pool and an active page
9018   becomes "syncing".
9019 
9020   the result of such an architecture is a natural "commit grouping" -
9021   If commits are coming faster than the system can sync, they do not
9022   stall. Instead, all commits that came since the last sync are
9023   logged to the same "active" page, and they all are synced with the next -
9024   one - sync. Thus, thought individual commits are delayed, throughput
9025   is not decreasing.
9026 
9027   when an xid is added to an active page, the thread of this xid waits
9028   for a page's condition until the page is synced. when syncing slot
9029   becomes vacant one of these waiters is awaken to take care of syncing.
9030   it syncs the page and signals all waiters that the page is synced.
9031   PAGE::waiters is used to count these waiters, and a page may never
9032   become active again until waiters==0 (that is all waiters from the
9033   previous sync have noticed that the sync was completed)
9034 
9035   note, that the page becomes "dirty" and has to be synced only when a
9036   new xid is added into it. Removing a xid from a page does not make it
9037   dirty - we don't sync xid removals to disk.
9038 */
9039 
9040 ulong tc_log_page_waits= 0;
9041 
9042 #ifdef HAVE_MMAP
9043 
9044 #define TC_LOG_HEADER_SIZE (sizeof(tc_log_magic)+1)
9045 
9046 static const uchar tc_log_magic[]={(uchar) 254, 0x23, 0x05, 0x74};
9047 
9048 ulong opt_tc_log_size;
9049 ulong tc_log_max_pages_used=0, tc_log_page_size=0, tc_log_cur_pages_used=0;
9050 
9051 int TC_LOG_MMAP::open(const char *opt_name)
9052 {
9053   uint i;
9054   bool crashed=FALSE;
9055   PAGE *pg;
9056 
9057   DBUG_ASSERT(total_ha_2pc > 1);
9058   DBUG_ASSERT(opt_name && opt_name[0]);
9059 
9060   tc_log_page_size= my_getpagesize();
9061 
9062   fn_format(logname,opt_name,mysql_data_home,"",MY_UNPACK_FILENAME);
9063   if ((fd= mysql_file_open(key_file_tclog, logname, O_RDWR | O_CLOEXEC, MYF(0))) < 0)
9064   {
9065     if (my_errno != ENOENT)
9066       goto err;
9067     if (using_heuristic_recover())
9068       return 1;
9069     if ((fd= mysql_file_create(key_file_tclog, logname, CREATE_MODE,
9070                                O_RDWR | O_CLOEXEC, MYF(MY_WME))) < 0)
9071       goto err;
9072     inited=1;
9073     file_length= opt_tc_log_size;
9074     if (mysql_file_chsize(fd, file_length, 0, MYF(MY_WME)))
9075       goto err;
9076   }
9077   else
9078   {
9079     inited= 1;
9080     crashed= TRUE;
9081     sql_print_information("Recovering after a crash using %s", opt_name);
9082     if (tc_heuristic_recover)
9083     {
9084       sql_print_error("Cannot perform automatic crash recovery when "
9085                       "--tc-heuristic-recover is used");
9086       goto err;
9087     }
9088     file_length= mysql_file_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME+MY_FAE));
9089     if (file_length == MY_FILEPOS_ERROR || file_length % tc_log_page_size)
9090       goto err;
9091   }
9092 
9093   data= (uchar *)my_mmap(0, (size_t)file_length, PROT_READ|PROT_WRITE,
9094                         MAP_NOSYNC|MAP_SHARED, fd, 0);
9095   if (data == MAP_FAILED)
9096   {
9097     my_errno=errno;
9098     goto err;
9099   }
9100   inited=2;
9101 
9102   npages=(uint)file_length/tc_log_page_size;
9103   if (npages < 3)             // to guarantee non-empty pool
9104     goto err;
9105   if (!(pages=(PAGE *)my_malloc(npages*sizeof(PAGE), MYF(MY_WME|MY_ZEROFILL))))
9106     goto err;
9107   inited=3;
9108   for (pg=pages, i=0; i < npages; i++, pg++)
9109   {
9110     pg->next=pg+1;
9111     pg->waiters=0;
9112     pg->state=PS_POOL;
9113     mysql_mutex_init(key_PAGE_lock, &pg->lock, MY_MUTEX_INIT_FAST);
9114     mysql_cond_init(key_PAGE_cond, &pg->cond, 0);
9115     pg->ptr= pg->start=(my_xid *)(data + i*tc_log_page_size);
9116     pg->size=pg->free=tc_log_page_size/sizeof(my_xid);
9117     pg->end=pg->start + pg->size;
9118   }
9119   pages[0].size=pages[0].free=
9120                 (tc_log_page_size-TC_LOG_HEADER_SIZE)/sizeof(my_xid);
9121   pages[0].start=pages[0].end-pages[0].size;
9122   pages[npages-1].next=0;
9123   inited=4;
9124 
9125   if (crashed && recover())
9126       goto err;
9127 
9128   memcpy(data, tc_log_magic, sizeof(tc_log_magic));
9129   data[sizeof(tc_log_magic)]= (uchar)total_ha_2pc;
9130   my_msync(fd, data, tc_log_page_size, MS_SYNC);
9131   inited=5;
9132 
9133   mysql_mutex_init(key_LOCK_sync, &LOCK_sync, MY_MUTEX_INIT_FAST);
9134   mysql_mutex_init(key_LOCK_active, &LOCK_active, MY_MUTEX_INIT_FAST);
9135   mysql_mutex_init(key_LOCK_pool, &LOCK_pool, MY_MUTEX_INIT_FAST);
9136   mysql_mutex_init(key_LOCK_pending_checkpoint, &LOCK_pending_checkpoint,
9137                    MY_MUTEX_INIT_FAST);
9138   mysql_cond_init(key_COND_active, &COND_active, 0);
9139   mysql_cond_init(key_COND_pool, &COND_pool, 0);
9140   mysql_cond_init(key_TC_LOG_MMAP_COND_queue_busy, &COND_queue_busy, 0);
9141 
9142   inited=6;
9143 
9144   syncing= 0;
9145   active=pages;
9146   DBUG_ASSERT(npages >= 2);
9147   pool=pages+1;
9148   pool_last_ptr= &((pages+npages-1)->next);
9149   commit_ordered_queue= NULL;
9150   commit_ordered_queue_busy= false;
9151 
9152   return 0;
9153 
9154 err:
9155   close();
9156   return 1;
9157 }
9158 
9159 /**
9160   there is no active page, let's got one from the pool.
9161 
9162   Two strategies here:
9163     -# take the first from the pool
9164     -# if there're waiters - take the one with the most free space.
9165 
9166   @todo
9167     page merging. try to allocate adjacent page first,
9168     so that they can be flushed both in one sync
9169 */
9170 
9171 void TC_LOG_MMAP::get_active_from_pool()
9172 {
9173   PAGE **p, **best_p=0;
9174   int best_free;
9175 
9176   mysql_mutex_lock(&LOCK_pool);
9177 
9178   do
9179   {
9180     best_p= p= &pool;
9181     if ((*p)->waiters == 0 && (*p)->free > 0) // can the first page be used ?
9182       break;                                  // yes - take it.
9183 
9184     best_free=0;            // no - trying second strategy
9185     for (p=&(*p)->next; *p; p=&(*p)->next)
9186     {
9187       if ((*p)->waiters == 0 && (*p)->free > best_free)
9188       {
9189         best_free=(*p)->free;
9190         best_p=p;
9191       }
9192     }
9193   }
9194   while ((*best_p == 0 || best_free == 0) && overflow());
9195 
9196   mysql_mutex_assert_owner(&LOCK_active);
9197   active=*best_p;
9198 
9199   /* Unlink the page from the pool. */
9200   if (!(*best_p)->next)
9201     pool_last_ptr= best_p;
9202   *best_p=(*best_p)->next;
9203   mysql_mutex_unlock(&LOCK_pool);
9204 
9205   mysql_mutex_lock(&active->lock);
9206   if (active->free == active->size) // we've chosen an empty page
9207   {
9208     tc_log_cur_pages_used++;
9209     set_if_bigger(tc_log_max_pages_used, tc_log_cur_pages_used);
9210   }
9211 }
9212 
9213 /**
9214   @todo
9215   perhaps, increase log size ?
9216 */
9217 int TC_LOG_MMAP::overflow()
9218 {
9219   /*
9220     simple overflow handling - just wait
9221     TODO perhaps, increase log size ?
9222     let's check the behaviour of tc_log_page_waits first
9223   */
9224   tc_log_page_waits++;
9225   mysql_cond_wait(&COND_pool, &LOCK_pool);
9226   return 1; // always return 1
9227 }
9228 
9229 /**
9230   Record that transaction XID is committed on the persistent storage.
9231 
9232     This function is called in the middle of two-phase commit:
9233     First all resources prepare the transaction, then tc_log->log() is called,
9234     then all resources commit the transaction, then tc_log->unlog() is called.
9235 
9236     All access to active page is serialized but it's not a problem, as
9237     we're assuming that fsync() will be a main bottleneck.
9238     That is, parallelizing writes to log pages we'll decrease number of
9239     threads waiting for a page, but then all these threads will be waiting
9240     for a fsync() anyway
9241 
9242    If tc_log == MYSQL_LOG then tc_log writes transaction to binlog and
9243    records XID in a special Xid_log_event.
9244    If tc_log = TC_LOG_MMAP then xid is written in a special memory-mapped
9245    log.
9246 
9247   @retval
9248     0  - error
9249   @retval
9250     \# - otherwise, "cookie", a number that will be passed as an argument
9251     to unlog() call. tc_log can define it any way it wants,
9252     and use for whatever purposes. TC_LOG_MMAP sets it
9253     to the position in memory where xid was logged to.
9254 */
9255 
9256 int TC_LOG_MMAP::log_one_transaction(my_xid xid)
9257 {
9258   int err;
9259   PAGE *p;
9260   ulong cookie;
9261 
9262   mysql_mutex_lock(&LOCK_active);
9263 
9264   /*
9265     if the active page is full - just wait...
9266     frankly speaking, active->free here accessed outside of mutex
9267     protection, but it's safe, because it only means we may miss an
9268     unlog() for the active page, and we're not waiting for it here -
9269     unlog() does not signal COND_active.
9270   */
9271   while (unlikely(active && active->free == 0))
9272     mysql_cond_wait(&COND_active, &LOCK_active);
9273 
9274   /* no active page ? take one from the pool */
9275   if (active == 0)
9276     get_active_from_pool();
9277   else
9278     mysql_mutex_lock(&active->lock);
9279 
9280   p=active;
9281 
9282   /*
9283     p->free is always > 0 here because to decrease it one needs
9284     to take p->lock and before it one needs to take LOCK_active.
9285     But checked that active->free > 0 under LOCK_active and
9286     haven't release it ever since
9287   */
9288 
9289   /* searching for an empty slot */
9290   while (*p->ptr)
9291   {
9292     p->ptr++;
9293     DBUG_ASSERT(p->ptr < p->end);               // because p->free > 0
9294   }
9295 
9296   /* found! store xid there and mark the page dirty */
9297   cookie= (ulong)((uchar *)p->ptr - data);      // can never be zero
9298   *p->ptr++= xid;
9299   p->free--;
9300   p->state= PS_DIRTY;
9301   mysql_mutex_unlock(&p->lock);
9302 
9303   mysql_mutex_lock(&LOCK_sync);
9304   if (syncing)
9305   {                                          // somebody's syncing. let's wait
9306     mysql_mutex_unlock(&LOCK_active);
9307     mysql_mutex_lock(&p->lock);
9308     p->waiters++;
9309     while (p->state == PS_DIRTY && syncing)
9310     {
9311       mysql_mutex_unlock(&p->lock);
9312       mysql_cond_wait(&p->cond, &LOCK_sync);
9313       mysql_mutex_lock(&p->lock);
9314     }
9315     p->waiters--;
9316     err= p->state == PS_ERROR;
9317     if (p->state != PS_DIRTY)                   // page was synced
9318     {
9319       mysql_mutex_unlock(&LOCK_sync);
9320       if (p->waiters == 0)
9321         mysql_cond_signal(&COND_pool);     // in case somebody's waiting
9322       mysql_mutex_unlock(&p->lock);
9323       goto done;                             // we're done
9324     }
9325     DBUG_ASSERT(!syncing);
9326     mysql_mutex_unlock(&p->lock);
9327     syncing = p;
9328     mysql_mutex_unlock(&LOCK_sync);
9329 
9330     mysql_mutex_lock(&LOCK_active);
9331     active=0;                                  // page is not active anymore
9332     mysql_cond_broadcast(&COND_active);
9333     mysql_mutex_unlock(&LOCK_active);
9334   }
9335   else
9336   {
9337     syncing = p;                               // place is vacant - take it
9338     mysql_mutex_unlock(&LOCK_sync);
9339     active = 0;                                // page is not active anymore
9340     mysql_cond_broadcast(&COND_active);
9341     mysql_mutex_unlock(&LOCK_active);
9342   }
9343   err= sync();
9344 
9345 done:
9346   return err ? 0 : cookie;
9347 }
9348 
9349 int TC_LOG_MMAP::sync()
9350 {
9351   int err;
9352 
9353   DBUG_ASSERT(syncing != active);
9354 
9355   /*
9356     sit down and relax - this can take a while...
9357     note - no locks are held at this point
9358   */
9359   err= my_msync(fd, syncing->start, syncing->size * sizeof(my_xid), MS_SYNC);
9360 
9361   /* page is synced. let's move it to the pool */
9362   mysql_mutex_lock(&LOCK_pool);
9363   (*pool_last_ptr)=syncing;
9364   pool_last_ptr=&(syncing->next);
9365   syncing->next=0;
9366   syncing->state= err ? PS_ERROR : PS_POOL;
9367   mysql_cond_signal(&COND_pool);           // in case somebody's waiting
9368   mysql_mutex_unlock(&LOCK_pool);
9369 
9370   /* marking 'syncing' slot free */
9371   mysql_mutex_lock(&LOCK_sync);
9372   mysql_cond_broadcast(&syncing->cond);    // signal "sync done"
9373   syncing=0;
9374   /*
9375     we check the "active" pointer without LOCK_active. Still, it's safe -
9376     "active" can change from NULL to not NULL any time, but it
9377     will take LOCK_sync before waiting on active->cond. That is, it can never
9378     miss a signal.
9379     And "active" can change to NULL only by the syncing thread
9380     (the thread that will send a signal below)
9381   */
9382   if (active)
9383     mysql_cond_signal(&active->cond);      // wake up a new syncer
9384   mysql_mutex_unlock(&LOCK_sync);
9385   return err;
9386 }
9387 
9388 static void
9389 mmap_do_checkpoint_callback(void *data)
9390 {
9391   TC_LOG_MMAP::pending_cookies *pending=
9392     static_cast<TC_LOG_MMAP::pending_cookies *>(data);
9393   ++pending->pending_count;
9394 }
9395 
9396 int TC_LOG_MMAP::unlog(ulong cookie, my_xid xid)
9397 {
9398   pending_cookies *full_buffer= NULL;
9399   uint32 ncookies= tc_log_page_size / sizeof(my_xid);
9400   DBUG_ASSERT(*(my_xid *)(data+cookie) == xid);
9401 
9402   /*
9403     Do not delete the entry immediately, as there may be participating storage
9404     engines which implement commit_checkpoint_request(), and thus have not yet
9405     flushed the commit durably to disk.
9406 
9407     Instead put it in a queue - and periodically, we will request a checkpoint
9408     from all engines and delete a whole batch at once.
9409   */
9410   mysql_mutex_lock(&LOCK_pending_checkpoint);
9411   if (pending_checkpoint == NULL)
9412   {
9413     uint32 size= sizeof(*pending_checkpoint) + sizeof(ulong) * (ncookies - 1);
9414     if (!(pending_checkpoint=
9415           (pending_cookies *)my_malloc(size, MYF(MY_ZEROFILL))))
9416     {
9417       my_error(ER_OUTOFMEMORY, MYF(0), size);
9418       mysql_mutex_unlock(&LOCK_pending_checkpoint);
9419       return 1;
9420     }
9421   }
9422 
9423   pending_checkpoint->cookies[pending_checkpoint->count++]= cookie;
9424   if (pending_checkpoint->count == ncookies)
9425   {
9426     full_buffer= pending_checkpoint;
9427     pending_checkpoint= NULL;
9428   }
9429   mysql_mutex_unlock(&LOCK_pending_checkpoint);
9430 
9431   if (full_buffer)
9432   {
9433     /*
9434       We do an extra increment and notify here - this ensures that
9435       things work also if there are no engines at all that support
9436       commit_checkpoint_request.
9437     */
9438     ++full_buffer->pending_count;
9439     ha_commit_checkpoint_request(full_buffer, mmap_do_checkpoint_callback);
9440     commit_checkpoint_notify(full_buffer);
9441   }
9442   return 0;
9443 }
9444 
9445 
9446 void
9447 TC_LOG_MMAP::commit_checkpoint_notify(void *cookie)
9448 {
9449   uint count;
9450   pending_cookies *pending= static_cast<pending_cookies *>(cookie);
9451   mysql_mutex_lock(&LOCK_pending_checkpoint);
9452   DBUG_ASSERT(pending->pending_count > 0);
9453   count= --pending->pending_count;
9454   mysql_mutex_unlock(&LOCK_pending_checkpoint);
9455   if (count == 0)
9456   {
9457     uint i;
9458     for (i= 0; i < tc_log_page_size / sizeof(my_xid); ++i)
9459       delete_entry(pending->cookies[i]);
9460     my_free(pending);
9461   }
9462 }
9463 
9464 
9465 /**
9466   erase xid from the page, update page free space counters/pointers.
9467   cookie points directly to the memory where xid was logged.
9468 */
9469 
9470 int TC_LOG_MMAP::delete_entry(ulong cookie)
9471 {
9472   PAGE *p=pages+(cookie/tc_log_page_size);
9473   my_xid *x=(my_xid *)(data+cookie);
9474 
9475   DBUG_ASSERT(x >= p->start && x < p->end);
9476 
9477   mysql_mutex_lock(&p->lock);
9478   *x=0;
9479   p->free++;
9480   DBUG_ASSERT(p->free <= p->size);
9481   set_if_smaller(p->ptr, x);
9482   if (p->free == p->size)              // the page is completely empty
9483     statistic_decrement(tc_log_cur_pages_used, &LOCK_status);
9484   if (p->waiters == 0)                 // the page is in pool and ready to rock
9485     mysql_cond_signal(&COND_pool);     // ping ... for overflow()
9486   mysql_mutex_unlock(&p->lock);
9487   return 0;
9488 }
9489 
9490 void TC_LOG_MMAP::close()
9491 {
9492   uint i;
9493   switch (inited) {
9494   case 6:
9495     mysql_mutex_destroy(&LOCK_sync);
9496     mysql_mutex_destroy(&LOCK_active);
9497     mysql_mutex_destroy(&LOCK_pool);
9498     mysql_mutex_destroy(&LOCK_pending_checkpoint);
9499     mysql_cond_destroy(&COND_pool);
9500     mysql_cond_destroy(&COND_active);
9501     mysql_cond_destroy(&COND_queue_busy);
9502     /* fall through */
9503   case 5:
9504     data[0]='A'; // garble the first (signature) byte, in case mysql_file_delete fails
9505     /* fall through */
9506   case 4:
9507     for (i=0; i < npages; i++)
9508     {
9509       if (pages[i].ptr == 0)
9510         break;
9511       mysql_mutex_destroy(&pages[i].lock);
9512       mysql_cond_destroy(&pages[i].cond);
9513     }
9514     /* fall through */
9515   case 3:
9516     my_free(pages);
9517     /* fall through */
9518   case 2:
9519     my_munmap((char*)data, (size_t)file_length);
9520     /* fall through */
9521   case 1:
9522     mysql_file_close(fd, MYF(0));
9523   }
9524   if (inited>=5) // cannot do in the switch because of Windows
9525     mysql_file_delete(key_file_tclog, logname, MYF(MY_WME));
9526   if (pending_checkpoint)
9527     my_free(pending_checkpoint);
9528   inited=0;
9529 }
9530 
9531 
9532 int TC_LOG_MMAP::recover()
9533 {
9534   HASH xids;
9535   PAGE *p=pages, *end_p=pages+npages;
9536 
9537   if (bcmp(data, tc_log_magic, sizeof(tc_log_magic)))
9538   {
9539     sql_print_error("Bad magic header in tc log");
9540     goto err1;
9541   }
9542 
9543   /*
9544     the first byte after magic signature is set to current
9545     number of storage engines on startup
9546   */
9547   if (data[sizeof(tc_log_magic)] > total_ha_2pc)
9548   {
9549     sql_print_error("Recovery failed! You must enable "
9550                     "all engines that were enabled at the moment of the crash");
9551     goto err1;
9552   }
9553 
9554   if (my_hash_init(&xids, &my_charset_bin, tc_log_page_size/3, 0,
9555                    sizeof(my_xid), 0, 0, MYF(0)))
9556     goto err1;
9557 
9558   for ( ; p < end_p ; p++)
9559   {
9560     for (my_xid *x=p->start; x < p->end; x++)
9561       if (*x && my_hash_insert(&xids, (uchar *)x))
9562         goto err2; // OOM
9563   }
9564 
9565   if (ha_recover(&xids))
9566     goto err2;
9567 
9568   my_hash_free(&xids);
9569   bzero(data, (size_t)file_length);
9570   return 0;
9571 
9572 err2:
9573   my_hash_free(&xids);
9574 err1:
9575   sql_print_error("Crash recovery failed. Either correct the problem "
9576                   "(if it's, for example, out of memory error) and restart, "
9577                   "or delete tc log and start mysqld with "
9578                   "--tc-heuristic-recover={commit|rollback}");
9579   return 1;
9580 }
9581 #endif
9582 
9583 TC_LOG *tc_log;
9584 TC_LOG_DUMMY tc_log_dummy;
9585 TC_LOG_MMAP  tc_log_mmap;
9586 
9587 /**
9588   Perform heuristic recovery, if --tc-heuristic-recover was used.
9589 
9590   @note
9591     no matter whether heuristic recovery was successful or not
9592     mysqld must exit. So, return value is the same in both cases.
9593 
9594   @retval
9595     0	no heuristic recovery was requested
9596   @retval
9597     1   heuristic recovery was performed
9598 */
9599 
9600 int TC_LOG::using_heuristic_recover()
9601 {
9602   if (!tc_heuristic_recover)
9603     return 0;
9604 
9605   sql_print_information("Heuristic crash recovery mode");
9606   if (ha_recover(0))
9607     sql_print_error("Heuristic crash recovery failed");
9608   sql_print_information("Please restart mysqld without --tc-heuristic-recover");
9609   return 1;
9610 }
9611 
9612 /****** transaction coordinator log for 2pc - binlog() based solution ******/
9613 #define TC_LOG_BINLOG MYSQL_BIN_LOG
9614 
9615 int TC_LOG_BINLOG::open(const char *opt_name)
9616 {
9617   int      error= 1;
9618 
9619   DBUG_ASSERT(total_ha_2pc > 1);
9620   DBUG_ASSERT(opt_name && opt_name[0]);
9621 
9622   if (!my_b_inited(&index_file))
9623   {
9624     /* There was a failure to open the index file, can't open the binlog */
9625     cleanup();
9626     return 1;
9627   }
9628 
9629   if (using_heuristic_recover())
9630   {
9631     mysql_mutex_lock(&LOCK_log);
9632     /* generate a new binlog to mask a corrupted one */
9633     open(opt_name, LOG_BIN, 0, 0, WRITE_CACHE, max_binlog_size, 0, TRUE);
9634     mysql_mutex_unlock(&LOCK_log);
9635     cleanup();
9636     return 1;
9637   }
9638 
9639   error= do_binlog_recovery(opt_name, true);
9640   binlog_state_recover_done= true;
9641   return error;
9642 }
9643 
9644 /** This is called on shutdown, after ha_panic. */
9645 void TC_LOG_BINLOG::close()
9646 {
9647 }
9648 
9649 /*
9650   Do a binlog log_xid() for a group of transactions, linked through
9651   thd->next_commit_ordered.
9652 */
9653 int
9654 TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all,
9655                              bool need_prepare_ordered __attribute__((unused)),
9656                              bool need_commit_ordered __attribute__((unused)))
9657 {
9658   int err;
9659   DBUG_ENTER("TC_LOG_BINLOG::log_and_order");
9660 
9661   binlog_cache_mngr *cache_mngr= thd->binlog_setup_trx_data();
9662   if (!cache_mngr)
9663   {
9664     WSREP_DEBUG("Skipping empty log_xid: %s", thd->query());
9665     DBUG_RETURN(0);
9666   }
9667 
9668   cache_mngr->using_xa= TRUE;
9669   cache_mngr->xa_xid= xid;
9670   err= binlog_commit_flush_xid_caches(thd, cache_mngr, all, xid);
9671 
9672   DEBUG_SYNC(thd, "binlog_after_log_and_order");
9673 
9674   if (err)
9675     DBUG_RETURN(0);
9676 
9677   bool need_unlog= cache_mngr->need_unlog;
9678   /*
9679     The transaction won't need the flag anymore.
9680     Todo/fixme: consider to move the statement into cache_mngr->reset()
9681                 relocated to the current or later point.
9682   */
9683   cache_mngr->need_unlog= false;
9684   /*
9685     If using explicit user XA, we will not have XID. We must still return a
9686     non-zero cookie (as zero cookie signals error).
9687   */
9688   if (!xid || !need_unlog)
9689     DBUG_RETURN(BINLOG_COOKIE_DUMMY(cache_mngr->delayed_error));
9690 
9691   DBUG_RETURN(BINLOG_COOKIE_MAKE(cache_mngr->binlog_id,
9692                                  cache_mngr->delayed_error));
9693 }
9694 
9695 /*
9696   After an XID is logged, we need to hold on to the current binlog file until
9697   it is fully committed in the storage engine. The reason is that crash
9698   recovery only looks at the latest binlog, so we must make sure there are no
9699   outstanding prepared (but not committed) transactions before rotating the
9700   binlog.
9701 
9702   To handle this, we keep a count of outstanding XIDs. This function is used
9703   to increase this count when committing one or more transactions to the
9704   binary log.
9705 */
9706 void
9707 TC_LOG_BINLOG::mark_xids_active(ulong binlog_id, uint xid_count)
9708 {
9709   xid_count_per_binlog *b;
9710 
9711   DBUG_ENTER("TC_LOG_BINLOG::mark_xids_active");
9712   DBUG_PRINT("info", ("binlog_id=%lu xid_count=%u", binlog_id, xid_count));
9713 
9714   mysql_mutex_lock(&LOCK_xid_list);
9715   I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
9716   while ((b= it++))
9717   {
9718     if (b->binlog_id == binlog_id)
9719     {
9720       b->xid_count += xid_count;
9721       break;
9722     }
9723   }
9724   /*
9725     As we do not delete elements until count reach zero, elements should always
9726     be found.
9727   */
9728   DBUG_ASSERT(b);
9729   mysql_mutex_unlock(&LOCK_xid_list);
9730   DBUG_VOID_RETURN;
9731 }
9732 
9733 /*
9734   Once an XID is committed, it can no longer be needed during crash recovery,
9735   as it has been durably recorded on disk as "committed".
9736 
9737   This function is called to mark an XID this way. It needs to decrease the
9738   count of pending XIDs in the corresponding binlog. When the count reaches
9739   zero (for an "old" binlog that is not the active one), that binlog file no
9740   longer need to be scanned during crash recovery, so we can log a new binlog
9741   checkpoint.
9742 */
9743 void
9744 TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint)
9745 {
9746   xid_count_per_binlog *b;
9747   bool first;
9748   ulong current;
9749 
9750   DBUG_ENTER("TC_LOG_BINLOG::mark_xid_done");
9751 
9752   mysql_mutex_lock(&LOCK_xid_list);
9753   current= current_binlog_id;
9754   I_List_iterator<xid_count_per_binlog> it(binlog_xid_count_list);
9755   first= true;
9756   while ((b= it++))
9757   {
9758     if (b->binlog_id == binlog_id)
9759     {
9760       --b->xid_count;
9761 
9762       DBUG_ASSERT(b->xid_count >= 0); // catch unmatched (++) decrement
9763 
9764       break;
9765     }
9766     first= false;
9767   }
9768   /* Binlog is always found, as we do not remove until count reaches 0 */
9769   DBUG_ASSERT(b);
9770   /*
9771     If a RESET MASTER is pending, we are about to remove all log files, and
9772     the RESET MASTER thread is waiting for all pending unlog() calls to
9773     complete while holding LOCK_log. In this case we should not log a binlog
9774     checkpoint event (it would be deleted immediately anyway and we would
9775     deadlock on LOCK_log) but just signal the thread.
9776   */
9777   if (unlikely(reset_master_pending))
9778   {
9779     mysql_cond_broadcast(&COND_xid_list);
9780     mysql_mutex_unlock(&LOCK_xid_list);
9781     DBUG_VOID_RETURN;
9782   }
9783 
9784   if (likely(binlog_id == current) || b->xid_count != 0 || !first ||
9785       !write_checkpoint)
9786   {
9787     /* No new binlog checkpoint reached yet. */
9788     mysql_mutex_unlock(&LOCK_xid_list);
9789     DBUG_VOID_RETURN;
9790   }
9791 
9792   /*
9793     Now log a binlog checkpoint for the first binlog file with a non-zero count.
9794 
9795     Note that it is possible (though perhaps unlikely) that when count of
9796     binlog (N-2) drops to zero, binlog (N-1) is already at zero. So we may
9797     need to skip several entries before we find the one to log in the binlog
9798     checkpoint event.
9799 
9800     We chain the locking of LOCK_xid_list and LOCK_log, so that we ensure that
9801     Binlog_checkpoint_events are logged in order. This simplifies recovery a
9802     bit, as it can just take the last binlog checkpoint in the log, rather
9803     than compare all found against each other to find the one pointing to the
9804     most recent binlog.
9805 
9806     Note also that we need to first release LOCK_xid_list, then acquire
9807     LOCK_log, then re-aquire LOCK_xid_list. If we were to take LOCK_log while
9808     holding LOCK_xid_list, we might deadlock with other threads that take the
9809     locks in the opposite order.
9810   */
9811 
9812   ++mark_xid_done_waiting;
9813   mysql_mutex_unlock(&LOCK_xid_list);
9814   mysql_mutex_lock(&LOCK_log);
9815   mysql_mutex_lock(&LOCK_xid_list);
9816   --mark_xid_done_waiting;
9817   mysql_cond_broadcast(&COND_xid_list);
9818   /* We need to reload current_binlog_id due to release/re-take of lock. */
9819   current= current_binlog_id;
9820 
9821   for (;;)
9822   {
9823     /* Remove initial element(s) with zero count. */
9824     b= binlog_xid_count_list.head();
9825     /*
9826       We must not remove all elements in the list - the entry for the current
9827       binlog must be present always.
9828     */
9829     DBUG_ASSERT(b);
9830     if (b->binlog_id == current || b->xid_count > 0)
9831       break;
9832     WSREP_XID_LIST_ENTRY("TC_LOG_BINLOG::mark_xid_done(): Removing "
9833                          "xid_list_entry for %s (%lu)", b);
9834     delete binlog_xid_count_list.get();
9835   }
9836 
9837   mysql_mutex_unlock(&LOCK_xid_list);
9838   write_binlog_checkpoint_event_already_locked(b->binlog_name,
9839                                                b->binlog_name_len);
9840   mysql_mutex_unlock(&LOCK_log);
9841   DBUG_VOID_RETURN;
9842 }
9843 
9844 int TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
9845 {
9846   DBUG_ENTER("TC_LOG_BINLOG::unlog");
9847   if (!xid)
9848     DBUG_RETURN(0);
9849 
9850   if (!BINLOG_COOKIE_IS_DUMMY(cookie))
9851     mark_xid_done(BINLOG_COOKIE_GET_ID(cookie), true);
9852   /*
9853     See comment in trx_group_commit_leader() - if rotate() gave a failure,
9854     we delay the return of error code to here.
9855   */
9856   DBUG_RETURN(BINLOG_COOKIE_GET_ERROR_FLAG(cookie));
9857 }
9858 
9859 void
9860 TC_LOG_BINLOG::commit_checkpoint_notify(void *cookie)
9861 {
9862   xid_count_per_binlog *entry= static_cast<xid_count_per_binlog *>(cookie);
9863   bool found_entry= false;
9864   mysql_mutex_lock(&LOCK_binlog_background_thread);
9865   /* count the same notification kind from different engines */
9866   for (xid_count_per_binlog *link= binlog_background_thread_queue;
9867        link && !found_entry; link= link->next_in_queue)
9868   {
9869     if ((found_entry= (entry == link)))
9870       entry->notify_count++;
9871   }
9872   if (!found_entry)
9873   {
9874     entry->next_in_queue= binlog_background_thread_queue;
9875     binlog_background_thread_queue= entry;
9876   }
9877   mysql_cond_signal(&COND_binlog_background_thread);
9878   mysql_mutex_unlock(&LOCK_binlog_background_thread);
9879 }
9880 
9881 /*
9882   Binlog background thread.
9883 
9884   This thread is used to log binlog checkpoints in the background, rather than
9885   in the context of random storage engine threads that happen to call
9886   commit_checkpoint_notify_ha() and may not like the delays while syncing
9887   binlog to disk or may not be setup with all my_thread_init() and other
9888   necessary stuff.
9889 
9890   In the future, this thread could also be used to do log rotation in the
9891   background, which could eliminate all stalls around binlog rotations.
9892 */
9893 pthread_handler_t
9894 binlog_background_thread(void *arg __attribute__((unused)))
9895 {
9896   bool stop;
9897   MYSQL_BIN_LOG::xid_count_per_binlog *queue, *next;
9898   THD *thd;
9899   my_thread_init();
9900   DBUG_ENTER("binlog_background_thread");
9901 
9902   thd= new THD(next_thread_id());
9903   thd->system_thread= SYSTEM_THREAD_BINLOG_BACKGROUND;
9904   thd->thread_stack= (char*) &thd;           /* Set approximate stack start */
9905   thd->store_globals();
9906   thd->security_ctx->skip_grants();
9907   thd->set_command(COM_DAEMON);
9908 
9909   /*
9910     Load the slave replication GTID state from the mysql.gtid_slave_pos
9911     table.
9912 
9913     This is mostly so that we can start our seq_no counter from the highest
9914     seq_no seen by a slave. This way, we have a way to tell if a transaction
9915     logged by ourselves as master is newer or older than a replicated
9916     transaction.
9917   */
9918 #ifdef HAVE_REPLICATION
9919   if (rpl_load_gtid_slave_state(thd))
9920     sql_print_warning("Failed to load slave replication state from table "
9921                       "%s.%s: %u: %s", "mysql",
9922                       rpl_gtid_slave_state_table_name.str,
9923                       thd->get_stmt_da()->sql_errno(),
9924                       thd->get_stmt_da()->message());
9925 #endif
9926 
9927   mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
9928   binlog_background_thread_started= true;
9929   mysql_cond_signal(&mysql_bin_log.COND_binlog_background_thread_end);
9930   mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
9931 
9932   for (;;)
9933   {
9934     /*
9935       Wait until there is something in the queue to process, or we are asked
9936       to shut down.
9937     */
9938     THD_STAGE_INFO(thd, stage_binlog_waiting_background_tasks);
9939     mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
9940     for (;;)
9941     {
9942       stop= binlog_background_thread_stop;
9943       queue= binlog_background_thread_queue;
9944       if (stop && !mysql_bin_log.is_xidlist_idle())
9945       {
9946         /*
9947           Delay stop until all pending binlog checkpoints have been processed.
9948         */
9949         stop= false;
9950       }
9951       if (stop || queue)
9952         break;
9953       mysql_cond_wait(&mysql_bin_log.COND_binlog_background_thread,
9954                       &mysql_bin_log.LOCK_binlog_background_thread);
9955     }
9956     /* Grab the queue, if any. */
9957     binlog_background_thread_queue= NULL;
9958     mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
9959 
9960     /* Process any incoming commit_checkpoint_notify() calls. */
9961     DBUG_EXECUTE_IF("inject_binlog_background_thread_before_mark_xid_done",
9962       DBUG_ASSERT(!debug_sync_set_action(
9963         thd,
9964         STRING_WITH_LEN("binlog_background_thread_before_mark_xid_done "
9965                         "SIGNAL injected_binlog_background_thread "
9966                         "WAIT_FOR something_that_will_never_happen "
9967                         "TIMEOUT 2")));
9968       );
9969     while (queue)
9970     {
9971       long count= queue->notify_count;
9972       THD_STAGE_INFO(thd, stage_binlog_processing_checkpoint_notify);
9973       DEBUG_SYNC(thd, "binlog_background_thread_before_mark_xid_done");
9974       /* Set the thread start time */
9975       thd->set_time();
9976       /* Grab next pointer first, as mark_xid_done() may free the element. */
9977       next= queue->next_in_queue;
9978       queue->notify_count= 0;
9979       for (long i= 0; i <= count; i++)
9980         mysql_bin_log.mark_xid_done(queue->binlog_id, true);
9981       queue= next;
9982 
9983       DBUG_EXECUTE_IF("binlog_background_checkpoint_processed",
9984         DBUG_ASSERT(!debug_sync_set_action(
9985           thd,
9986           STRING_WITH_LEN("now SIGNAL binlog_background_checkpoint_processed")));
9987         );
9988     }
9989 
9990     if (stop)
9991       break;
9992   }
9993 
9994   THD_STAGE_INFO(thd, stage_binlog_stopping_background_thread);
9995 
9996   /* No need to use mutex as thd is not linked into other threads */
9997   delete thd;
9998 
9999   my_thread_end();
10000 
10001   /* Signal that we are (almost) stopped. */
10002   mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
10003   binlog_background_thread_stop= false;
10004   mysql_cond_signal(&mysql_bin_log.COND_binlog_background_thread_end);
10005   mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
10006 
10007   DBUG_RETURN(0);
10008 }
10009 
10010 #ifdef HAVE_PSI_INTERFACE
10011 static PSI_thread_key key_thread_binlog;
10012 
10013 static PSI_thread_info all_binlog_threads[]=
10014 {
10015   { &key_thread_binlog, "binlog_background", PSI_FLAG_GLOBAL},
10016 };
10017 #endif /* HAVE_PSI_INTERFACE */
10018 
10019 static bool
10020 start_binlog_background_thread()
10021 {
10022   pthread_t th;
10023 
10024 #ifdef HAVE_PSI_INTERFACE
10025   if (PSI_server)
10026     PSI_server->register_thread("sql", all_binlog_threads,
10027                                 array_elements(all_binlog_threads));
10028 #endif
10029 
10030   if (mysql_thread_create(key_thread_binlog, &th, &connection_attrib,
10031                           binlog_background_thread, NULL))
10032     return 1;
10033 
10034   /*
10035     Wait for the thread to have started (so we know that the slave replication
10036     state is loaded and we have correct global_gtid_counter).
10037   */
10038   mysql_mutex_lock(&mysql_bin_log.LOCK_binlog_background_thread);
10039   while (!binlog_background_thread_started)
10040     mysql_cond_wait(&mysql_bin_log.COND_binlog_background_thread_end,
10041                     &mysql_bin_log.LOCK_binlog_background_thread);
10042   mysql_mutex_unlock(&mysql_bin_log.LOCK_binlog_background_thread);
10043 
10044   return 0;
10045 }
10046 
10047 
10048 int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
10049                            IO_CACHE *first_log,
10050                            Format_description_log_event *fdle, bool do_xa)
10051 {
10052   Log_event *ev= NULL;
10053   HASH xids;
10054   MEM_ROOT mem_root;
10055   char binlog_checkpoint_name[FN_REFLEN];
10056   bool binlog_checkpoint_found;
10057   bool first_round;
10058   IO_CACHE log;
10059   File file= -1;
10060   const char *errmsg;
10061 #ifdef HAVE_REPLICATION
10062   rpl_gtid last_gtid;
10063   bool last_gtid_standalone= false;
10064   bool last_gtid_valid= false;
10065 #endif
10066 
10067   if (! fdle->is_valid() ||
10068       (do_xa && my_hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
10069                              sizeof(my_xid), 0, 0, MYF(0))))
10070     goto err1;
10071 
10072   if (do_xa)
10073     init_alloc_root(&mem_root, "TC_LOG_BINLOG", TC_LOG_PAGE_SIZE,
10074                     TC_LOG_PAGE_SIZE, MYF(0));
10075 
10076   fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
10077 
10078   /*
10079     Scan the binlog for XIDs that need to be committed if still in the
10080     prepared stage.
10081 
10082     Start with the latest binlog file, then continue with any other binlog
10083     files if the last found binlog checkpoint indicates it is needed.
10084   */
10085 
10086   binlog_checkpoint_found= false;
10087   first_round= true;
10088   for (;;)
10089   {
10090     while ((ev= Log_event::read_log_event(first_round ? first_log : &log,
10091                                           fdle, opt_master_verify_checksum))
10092            && ev->is_valid())
10093     {
10094       enum Log_event_type typ= ev->get_type_code();
10095       switch (typ)
10096       {
10097       case XID_EVENT:
10098       {
10099         if (do_xa)
10100         {
10101           Xid_log_event *xev=(Xid_log_event *)ev;
10102           uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
10103                                           sizeof(xev->xid));
10104           if (!x || my_hash_insert(&xids, x))
10105             goto err2;
10106         }
10107         break;
10108       }
10109       case BINLOG_CHECKPOINT_EVENT:
10110         if (first_round && do_xa)
10111         {
10112           size_t dir_len;
10113           Binlog_checkpoint_log_event *cev= (Binlog_checkpoint_log_event *)ev;
10114           if (cev->binlog_file_len >= FN_REFLEN)
10115             sql_print_warning("Incorrect binlog checkpoint event with too "
10116                               "long file name found.");
10117           else
10118           {
10119             /*
10120               Note that we cannot use make_log_name() here, as we have not yet
10121               initialised MYSQL_BIN_LOG::log_file_name.
10122             */
10123             dir_len= dirname_length(last_log_name);
10124             strmake(strnmov(binlog_checkpoint_name, last_log_name, dir_len),
10125                     cev->binlog_file_name, FN_REFLEN - 1 - dir_len);
10126             binlog_checkpoint_found= true;
10127           }
10128         }
10129         break;
10130       case GTID_LIST_EVENT:
10131         if (first_round)
10132         {
10133           Gtid_list_log_event *glev= (Gtid_list_log_event *)ev;
10134 
10135           /* Initialise the binlog state from the Gtid_list event. */
10136           if (rpl_global_gtid_binlog_state.load(glev->list, glev->count))
10137             goto err2;
10138         }
10139         break;
10140 
10141 #ifdef HAVE_REPLICATION
10142       case GTID_EVENT:
10143         if (first_round)
10144         {
10145           Gtid_log_event *gev= (Gtid_log_event *)ev;
10146 
10147           /* Update the binlog state with any GTID logged after Gtid_list. */
10148           last_gtid.domain_id= gev->domain_id;
10149           last_gtid.server_id= gev->server_id;
10150           last_gtid.seq_no= gev->seq_no;
10151           last_gtid_standalone=
10152             ((gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false);
10153           last_gtid_valid= true;
10154         }
10155         break;
10156 #endif
10157 
10158       case START_ENCRYPTION_EVENT:
10159         {
10160           if (fdle->start_decryption((Start_encryption_log_event*) ev))
10161             goto err2;
10162         }
10163         break;
10164 
10165       default:
10166         /* Nothing. */
10167         break;
10168       }
10169 
10170 #ifdef HAVE_REPLICATION
10171       if (last_gtid_valid &&
10172           ((last_gtid_standalone && !ev->is_part_of_group(typ)) ||
10173            (!last_gtid_standalone &&
10174             (typ == XID_EVENT ||
10175              (LOG_EVENT_IS_QUERY(typ) &&
10176               (((Query_log_event *)ev)->is_commit() ||
10177                ((Query_log_event *)ev)->is_rollback()))))))
10178       {
10179         if (rpl_global_gtid_binlog_state.update_nolock(&last_gtid, false))
10180           goto err2;
10181         last_gtid_valid= false;
10182       }
10183 #endif
10184 
10185       delete ev;
10186       ev= NULL;
10187     }
10188 
10189     if (!do_xa)
10190       break;
10191     /*
10192       If the last binlog checkpoint event points to an older log, we have to
10193       scan all logs from there also, to get all possible XIDs to recover.
10194 
10195       If there was no binlog checkpoint event at all, this means the log was
10196       written by an older version of MariaDB (or MySQL) - these always have an
10197       (implicit) binlog checkpoint event at the start of the last binlog file.
10198     */
10199     if (first_round)
10200     {
10201       if (!binlog_checkpoint_found)
10202         break;
10203       first_round= false;
10204       DBUG_EXECUTE_IF("xa_recover_expect_master_bin_000004",
10205           if (0 != strcmp("./master-bin.000004", binlog_checkpoint_name) &&
10206               0 != strcmp(".\\master-bin.000004", binlog_checkpoint_name))
10207             DBUG_SUICIDE();
10208         );
10209       if (find_log_pos(linfo, binlog_checkpoint_name, 1))
10210       {
10211         sql_print_error("Binlog file '%s' not found in binlog index, needed "
10212                         "for recovery. Aborting.", binlog_checkpoint_name);
10213         goto err2;
10214       }
10215     }
10216     else
10217     {
10218       end_io_cache(&log);
10219       mysql_file_close(file, MYF(MY_WME));
10220       file= -1;
10221     }
10222 
10223     if (!strcmp(linfo->log_file_name, last_log_name))
10224       break;                                    // No more files to do
10225     if ((file= open_binlog(&log, linfo->log_file_name, &errmsg)) < 0)
10226     {
10227       sql_print_error("%s", errmsg);
10228       goto err2;
10229     }
10230     /*
10231       We do not need to read the Format_description_log_event of other binlog
10232       files. It is not possible for a binlog checkpoint to span multiple
10233       binlog files written by different versions of the server. So we can use
10234       the first one read for reading from all binlog files.
10235     */
10236     if (find_next_log(linfo, 1))
10237     {
10238       sql_print_error("Error reading binlog files during recovery. Aborting.");
10239       goto err2;
10240     }
10241     fdle->reset_crypto();
10242   }
10243 
10244   if (do_xa)
10245   {
10246     if (ha_recover(&xids))
10247       goto err2;
10248 
10249     free_root(&mem_root, MYF(0));
10250     my_hash_free(&xids);
10251   }
10252   return 0;
10253 
10254 err2:
10255   delete ev;
10256   if (file >= 0)
10257   {
10258     end_io_cache(&log);
10259     mysql_file_close(file, MYF(MY_WME));
10260   }
10261   if (do_xa)
10262   {
10263     free_root(&mem_root, MYF(0));
10264     my_hash_free(&xids);
10265   }
10266 err1:
10267   sql_print_error("Crash recovery failed. Either correct the problem "
10268                   "(if it's, for example, out of memory error) and restart, "
10269                   "or delete (or rename) binary log and start mysqld with "
10270                   "--tc-heuristic-recover={commit|rollback}");
10271   return 1;
10272 }
10273 
10274 
10275 int
10276 MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery)
10277 {
10278   LOG_INFO log_info;
10279   const char *errmsg;
10280   IO_CACHE    log;
10281   File        file;
10282   Log_event  *ev= 0;
10283   Format_description_log_event fdle(BINLOG_VERSION);
10284   char        log_name[FN_REFLEN];
10285   int error;
10286 
10287   if (unlikely((error= find_log_pos(&log_info, NullS, 1))))
10288   {
10289     /*
10290       If there are no binlog files (LOG_INFO_EOF), then we still try to read
10291       the .state file to restore the binlog state. This allows to copy a server
10292       to provision a new one without copying the binlog files (except the
10293       master-bin.state file) and still preserve the correct binlog state.
10294     */
10295     if (error != LOG_INFO_EOF)
10296       sql_print_error("find_log_pos() failed (error: %d)", error);
10297     else
10298     {
10299       error= read_state_from_file();
10300       if (error == 2)
10301       {
10302         /*
10303           No binlog files and no binlog state is not an error (eg. just initial
10304           server start after fresh installation).
10305         */
10306         error= 0;
10307       }
10308     }
10309     return error;
10310   }
10311 
10312   if (! fdle.is_valid())
10313     return 1;
10314 
10315   do
10316   {
10317     strmake_buf(log_name, log_info.log_file_name);
10318   } while (!(error= find_next_log(&log_info, 1)));
10319 
10320   if (error !=  LOG_INFO_EOF)
10321   {
10322     sql_print_error("find_log_pos() failed (error: %d)", error);
10323     return error;
10324   }
10325 
10326   if ((file= open_binlog(&log, log_name, &errmsg)) < 0)
10327   {
10328     sql_print_error("%s", errmsg);
10329     return 1;
10330   }
10331 
10332   if ((ev= Log_event::read_log_event(&log, &fdle,
10333                                      opt_master_verify_checksum)) &&
10334       ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
10335   {
10336     if (ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
10337     {
10338       sql_print_information("Recovering after a crash using %s", opt_name);
10339       error= recover(&log_info, log_name, &log,
10340                      (Format_description_log_event *)ev, do_xa_recovery);
10341     }
10342     else
10343     {
10344       error= read_state_from_file();
10345       if (unlikely(error == 2))
10346       {
10347         /*
10348           The binlog exists, but the .state file is missing. This is normal if
10349           this is the first master start after a major upgrade to 10.0 (with
10350           GTID support).
10351 
10352           However, it could also be that the .state file was lost somehow, and
10353           in this case it could be a serious issue, as we would set the wrong
10354           binlog state in the next binlog file to be created, and GTID
10355           processing would be corrupted. A common way would be copying files
10356           from an old server to a new one and forgetting the .state file.
10357 
10358           So in this case, we want to try to recover the binlog state by
10359           scanning the last binlog file (but we do not need any XA recovery).
10360 
10361           ToDo: We could avoid one scan at first start after major upgrade, by
10362           detecting that there is no GTID_LIST event at the start of the
10363           binlog file, and stopping the scan in that case.
10364         */
10365         error= recover(&log_info, log_name, &log,
10366                        (Format_description_log_event *)ev, false);
10367       }
10368     }
10369   }
10370 
10371   delete ev;
10372   end_io_cache(&log);
10373   mysql_file_close(file, MYF(MY_WME));
10374 
10375   return error;
10376 }
10377 
10378 
10379 #ifdef INNODB_COMPATIBILITY_HOOKS
10380 /**
10381   Get the file name of the MySQL binlog.
10382   @return the name of the binlog file
10383 */
10384 extern "C"
10385 const char* mysql_bin_log_file_name(void)
10386 {
10387   return mysql_bin_log.get_log_fname();
10388 }
10389 /**
10390   Get the current position of the MySQL binlog.
10391   @return byte offset from the beginning of the binlog
10392 */
10393 extern "C"
10394 ulonglong mysql_bin_log_file_pos(void)
10395 {
10396   return (ulonglong) mysql_bin_log.get_log_file()->pos_in_file;
10397 }
10398 /*
10399   Get the current position of the MySQL binlog for transaction currently being
10400   committed.
10401 
10402   This is valid to call from within storage engine commit_ordered() and
10403   commit() methods only.
10404 
10405   Since it stores the position inside THD, it is safe to call without any
10406   locking.
10407 */
10408 void
10409 mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file)
10410 {
10411   binlog_cache_mngr *cache_mngr;
10412   if (opt_bin_log &&
10413       (cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton)))
10414   {
10415     *out_file= cache_mngr->last_commit_pos_file;
10416     *out_pos= (ulonglong)(cache_mngr->last_commit_pos_offset);
10417   }
10418   else
10419   {
10420     *out_file= NULL;
10421     *out_pos= 0;
10422   }
10423 }
10424 #endif /* INNODB_COMPATIBILITY_HOOKS */
10425 
10426 
10427 static void
10428 binlog_checksum_update(MYSQL_THD thd, struct st_mysql_sys_var *var,
10429                        void *var_ptr, const void *save)
10430 {
10431   ulong value=  *((ulong *)save);
10432   bool check_purge= false;
10433   ulong UNINIT_VAR(prev_binlog_id);
10434 
10435   mysql_mutex_lock(mysql_bin_log.get_log_lock());
10436   if(mysql_bin_log.is_open())
10437   {
10438     prev_binlog_id= mysql_bin_log.current_binlog_id;
10439     if (binlog_checksum_options != value)
10440       mysql_bin_log.checksum_alg_reset= (enum_binlog_checksum_alg)value;
10441     if (mysql_bin_log.rotate(true, &check_purge))
10442       check_purge= false;
10443   }
10444   else
10445   {
10446     binlog_checksum_options= value;
10447   }
10448   DBUG_ASSERT(binlog_checksum_options == value);
10449   mysql_bin_log.checksum_alg_reset= BINLOG_CHECKSUM_ALG_UNDEF;
10450   mysql_mutex_unlock(mysql_bin_log.get_log_lock());
10451   if (check_purge)
10452     mysql_bin_log.checkpoint_and_purge(prev_binlog_id);
10453 }
10454 
10455 
10456 static int show_binlog_vars(THD *thd, SHOW_VAR *var, void *,
10457                             system_status_var *status_var, enum_var_type)
10458 {
10459   mysql_bin_log.set_status_variables(thd);
10460   var->type= SHOW_ARRAY;
10461   var->value= (char *)&binlog_status_vars_detail;
10462   return 0;
10463 }
10464 
10465 static SHOW_VAR binlog_status_vars_top[]= {
10466   {"Binlog", (char *) &show_binlog_vars, SHOW_FUNC},
10467   {NullS, NullS, SHOW_LONG}
10468 };
10469 
10470 static MYSQL_SYSVAR_BOOL(
10471   optimize_thread_scheduling,
10472   opt_optimize_thread_scheduling,
10473   PLUGIN_VAR_READONLY,
10474   "Run fast part of group commit in a single thread, to optimize kernel "
10475   "thread scheduling. On by default. Disable to run each transaction in group "
10476   "commit in its own thread, which can be slower at very high concurrency. "
10477   "This option is mostly for testing one algorithm versus the other, and it "
10478   "should not normally be necessary to change it.",
10479   NULL,
10480   NULL,
10481   1);
10482 
10483 static MYSQL_SYSVAR_ENUM(
10484   checksum,
10485   binlog_checksum_options,
10486   PLUGIN_VAR_RQCMDARG,
10487   "Type of BINLOG_CHECKSUM_ALG. Include checksum for "
10488   "log events in the binary log",
10489   NULL,
10490   binlog_checksum_update,
10491   BINLOG_CHECKSUM_ALG_CRC32,
10492   &binlog_checksum_typelib);
10493 
10494 static struct st_mysql_sys_var *binlog_sys_vars[]=
10495 {
10496   MYSQL_SYSVAR(optimize_thread_scheduling),
10497   MYSQL_SYSVAR(checksum),
10498   NULL
10499 };
10500 
10501 
10502 /*
10503   Copy out the non-directory part of binlog position filename for the
10504   `binlog_snapshot_file' status variable, same way as it is done for
10505   SHOW MASTER STATUS.
10506 */
10507 static void
10508 set_binlog_snapshot_file(const char *src)
10509 {
10510   size_t dir_len = dirname_length(src);
10511   strmake_buf(binlog_snapshot_file, src + dir_len);
10512 }
10513 
10514 /*
10515   Copy out current values of status variables, for SHOW STATUS or
10516   information_schema.global_status.
10517 
10518   This is called only under LOCK_show_status, so we can fill in a static array.
10519 */
10520 void
10521 TC_LOG_BINLOG::set_status_variables(THD *thd)
10522 {
10523   binlog_cache_mngr *cache_mngr;
10524 
10525   if (thd && opt_bin_log)
10526     cache_mngr= (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10527   else
10528     cache_mngr= 0;
10529 
10530   bool have_snapshot= (cache_mngr && cache_mngr->last_commit_pos_file[0] != 0);
10531   mysql_mutex_lock(&LOCK_commit_ordered);
10532   binlog_status_var_num_commits= this->num_commits;
10533   binlog_status_var_num_group_commits= this->num_group_commits;
10534   if (!have_snapshot)
10535   {
10536     set_binlog_snapshot_file(last_commit_pos_file);
10537     binlog_snapshot_position= last_commit_pos_offset;
10538   }
10539   mysql_mutex_unlock(&LOCK_commit_ordered);
10540   mysql_mutex_lock(&LOCK_prepare_ordered);
10541   binlog_status_group_commit_trigger_count= this->group_commit_trigger_count;
10542   binlog_status_group_commit_trigger_timeout= this->group_commit_trigger_timeout;
10543   binlog_status_group_commit_trigger_lock_wait= this->group_commit_trigger_lock_wait;
10544   mysql_mutex_unlock(&LOCK_prepare_ordered);
10545 
10546   if (have_snapshot)
10547   {
10548     set_binlog_snapshot_file(cache_mngr->last_commit_pos_file);
10549     binlog_snapshot_position= cache_mngr->last_commit_pos_offset;
10550   }
10551 }
10552 
10553 
10554 /*
10555   Find the Gtid_list_log_event at the start of a binlog.
10556 
10557   NULL for ok, non-NULL error message for error.
10558 
10559   If ok, then the event is returned in *out_gtid_list. This can be NULL if we
10560   get back to binlogs written by old server version without GTID support. If
10561   so, it means we have reached the point to start from, as no GTID events can
10562   exist in earlier binlogs.
10563 */
10564 const char *
10565 get_gtid_list_event(IO_CACHE *cache, Gtid_list_log_event **out_gtid_list)
10566 {
10567   Format_description_log_event init_fdle(BINLOG_VERSION);
10568   Format_description_log_event *fdle;
10569   Log_event *ev;
10570   const char *errormsg = NULL;
10571 
10572   *out_gtid_list= NULL;
10573 
10574   if (!(ev= Log_event::read_log_event(cache, &init_fdle,
10575                                       opt_master_verify_checksum)) ||
10576       ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
10577   {
10578     if (ev)
10579       delete ev;
10580     return "Could not read format description log event while looking for "
10581       "GTID position in binlog";
10582   }
10583 
10584   fdle= static_cast<Format_description_log_event *>(ev);
10585 
10586   for (;;)
10587   {
10588     Log_event_type typ;
10589 
10590     ev= Log_event::read_log_event(cache, fdle, opt_master_verify_checksum);
10591     if (!ev)
10592     {
10593       errormsg= "Could not read GTID list event while looking for GTID "
10594         "position in binlog";
10595       break;
10596     }
10597     typ= ev->get_type_code();
10598     if (typ == GTID_LIST_EVENT)
10599       break;                                    /* Done, found it */
10600     if (typ == START_ENCRYPTION_EVENT)
10601     {
10602       if (fdle->start_decryption((Start_encryption_log_event*) ev))
10603         errormsg= "Could not set up decryption for binlog.";
10604     }
10605     delete ev;
10606     if (typ == ROTATE_EVENT || typ == STOP_EVENT ||
10607         typ == FORMAT_DESCRIPTION_EVENT || typ == START_ENCRYPTION_EVENT)
10608       continue;                                 /* Continue looking */
10609 
10610     /* We did not find any Gtid_list_log_event, must be old binlog. */
10611     ev= NULL;
10612     break;
10613   }
10614 
10615   delete fdle;
10616   *out_gtid_list= static_cast<Gtid_list_log_event *>(ev);
10617   return errormsg;
10618 }
10619 
10620 
10621 struct st_mysql_storage_engine binlog_storage_engine=
10622 { MYSQL_HANDLERTON_INTERFACE_VERSION };
10623 
10624 maria_declare_plugin(binlog)
10625 {
10626   MYSQL_STORAGE_ENGINE_PLUGIN,
10627   &binlog_storage_engine,
10628   "binlog",
10629   "MySQL AB",
10630   "This is a pseudo storage engine to represent the binlog in a transaction",
10631   PLUGIN_LICENSE_GPL,
10632   binlog_init, /* Plugin Init */
10633   NULL, /* Plugin Deinit */
10634   0x0100 /* 1.0 */,
10635   binlog_status_vars_top,     /* status variables                */
10636   binlog_sys_vars,            /* system variables                */
10637   "1.0",                      /* string version */
10638   MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
10639 }
10640 maria_declare_plugin_end;
10641 
10642 #ifdef WITH_WSREP
10643 IO_CACHE * get_trans_log(THD * thd)
10644 {
10645   DBUG_ASSERT(binlog_hton->slot != HA_SLOT_UNDEF);
10646   binlog_cache_mngr *cache_mngr = (binlog_cache_mngr*)
10647     thd_get_ha_data(thd, binlog_hton);
10648   if (cache_mngr)
10649     return cache_mngr->get_binlog_cache_log(true);
10650 
10651   WSREP_DEBUG("binlog cache not initialized, conn: %llu",
10652 	      thd->thread_id);
10653   return NULL;
10654 }
10655 
10656 
10657 bool wsrep_trans_cache_is_empty(THD *thd)
10658 {
10659   binlog_cache_mngr *const cache_mngr=
10660       (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10661   return (!cache_mngr || cache_mngr->trx_cache.empty());
10662 }
10663 
10664 
10665 void thd_binlog_trx_reset(THD * thd)
10666 {
10667   /*
10668     todo: fix autocommit select to not call the caller
10669   */
10670   if (thd_get_ha_data(thd, binlog_hton) != NULL)
10671   {
10672     binlog_cache_mngr *const cache_mngr=
10673       (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10674     if (cache_mngr)
10675     {
10676       cache_mngr->reset(false, true);
10677       if (!cache_mngr->stmt_cache.empty())
10678       {
10679         WSREP_DEBUG("pending events in stmt cache, sql: %s", thd->query());
10680         cache_mngr->stmt_cache.reset();
10681       }
10682     }
10683   }
10684   thd->clear_binlog_table_maps();
10685 }
10686 
10687 
10688 void thd_binlog_rollback_stmt(THD * thd)
10689 {
10690   WSREP_DEBUG("thd_binlog_rollback_stmt connection: %llu",
10691 	      thd->thread_id);
10692   binlog_cache_mngr *const cache_mngr=
10693     (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
10694   if (cache_mngr)
10695     cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
10696 }
10697 #endif /* WITH_WSREP */
10698