1 /* Copyright (c) 2009, 2021, Oracle and/or its affiliates.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software Foundation,
21    51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22 
23 #include "binlog.h"
24 
25 #include "my_stacktrace.h"                  // my_safe_print_system_time
26 #include "debug_sync.h"                     // DEBUG_SYNC
27 #include "log.h"                            // sql_print_warning
28 #include "log_event.h"                      // Rows_log_event
29 #include "mysqld_thd_manager.h"             // Global_THD_manager
30 #include "rpl_handler.h"                    // RUN_HOOK
31 #include "rpl_mi.h"                         // Master_info
32 #include "rpl_rli.h"                        // Relay_log_info
33 #include "rpl_rli_pdb.h"                    // Slave_worker
34 #include "rpl_slave_commit_order_manager.h" // Commit_order_manager
35 #include "rpl_trx_boundary_parser.h"        // Transaction_boundary_parser
36 #include "rpl_context.h"
37 #include "sql_class.h"                      // THD
38 #include "sql_parse.h"                      // sqlcom_can_generate_row_events
39 #include "sql_show.h"                       // append_identifier
40 #include "sql_base.h"                       // find_temporary_table
41 
42 #include "pfs_file_provider.h"
43 #include "mysql/psi/mysql_file.h"
44 
45 #include <pfs_transaction_provider.h>
46 #include <mysql/psi/mysql_transaction.h>
47 #include "xa.h"
48 
49 #include <list>
50 #include <string>
51 #include "my_rnd.h"
52 #include <sstream>
53 
54 using std::max;
55 using std::min;
56 using std::string;
57 using std::list;
58 using binary_log::checksum_crc32;
59 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
60 
61 #define LOG_PREFIX	"ML"
62 
63 /**
64   @defgroup Binary_Log Binary Log
65   @{
66  */
67 
68 #define MY_OFF_T_UNDEF (~(my_off_t)0UL)
69 
70 /*
71   Constants required for the limit unsafe warnings suppression
72  */
73 //seconds after which the limit unsafe warnings suppression will be activated
74 #define LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT 50
75 //number of limit unsafe warnings after which the suppression will be activated
76 #define LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT 50
77 #define MAX_SESSION_ATTACH_TRIES 10
78 
79 static ulonglong limit_unsafe_suppression_start_time= 0;
80 static bool unsafe_warning_suppression_is_activated= false;
81 static int limit_unsafe_warning_count= 0;
82 
83 static handlerton *binlog_hton;
84 bool opt_binlog_order_commits= true;
85 
86 const char *log_bin_index= 0;
87 const char *log_bin_basename= 0;
88 
89 MYSQL_BIN_LOG mysql_bin_log(&sync_binlog_period, WRITE_CACHE);
90 
91 static int binlog_init(void *p);
92 static int binlog_start_trans_and_stmt(THD *thd, Log_event *start_event);
93 static int binlog_close_connection(handlerton *hton, THD *thd);
94 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
95 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
96 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
97                                                       THD *thd);
98 static int binlog_commit(handlerton *hton, THD *thd, bool all);
99 static int binlog_rollback(handlerton *hton, THD *thd, bool all);
100 static int binlog_prepare(handlerton *hton, THD *thd, bool all);
101 static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd);
102 static int binlog_clone_consistent_snapshot(handlerton *hton, THD *thd,
103                                             THD *from_thd);
104 static int binlog_xa_commit(handlerton *hton,  XID *xid);
105 static int binlog_xa_rollback(handlerton *hton,  XID *xid);
106 
107 static void exec_binlog_error_action_abort(const char* err_string);
108 
109 // The last published global binlog position
110 static char binlog_global_snapshot_file[FN_REFLEN];
111 static ulonglong binlog_global_snapshot_position;
112 
113 // Binlog position variables for SHOW STATUS
114 static char binlog_snapshot_file[FN_REFLEN];
115 static ulonglong binlog_snapshot_position;
116 static std::string binlog_snapshot_gtid_executed;
117 
118 static SHOW_VAR binlog_status_vars_detail[]=
119 {
120   {"snapshot_file",
121    (char *)&binlog_snapshot_file, SHOW_CHAR, SHOW_SCOPE_GLOBAL},
122   {"snapshot_position",
123    (char *)&binlog_snapshot_position, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
124   {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
125 };
126 
127 /**
128   Helper class to switch to a new thread and then go back to the previous one,
129   when the object is destroyed using RAII.
130 
131   This class is used to temporarily switch to another session (THD
132   structure). It will set up thread specific "globals" correctly
133   so that the POSIX thread looks exactly like the session attached to.
134   However, PSI_thread info is not touched as it is required to show
135   the actual physial view in PFS instrumentation i.e., it should
136   depict as the real thread doing the work instead of thread it switched
137   to.
138 
139   On destruction, the original session (which is supplied to the
140   constructor) will be re-attached automatically. For example, with
141   this code, the value of @c current_thd will be the same before and
142   after execution of the code.
143 
144   @code
145   {
146     for (int i = 0 ; i < count ; ++i)
147     {
148       // here we are attached to current_thd
149       // [...]
150       Thd_backup_and_restore switch_thd(current_thd, other_thd[i]);
151       // [...]
152       // here we are attached to other_thd[i]
153       // [...]
154     }
155     // here we are attached to current_thd
156   }
157   @endcode
158 
159   @warning The class is not designed to be inherited from.
160  */
161 
162 #ifndef EMBEDDED_LIBRARY
163 
164 class Thd_backup_and_restore
165 {
166 public:
167   /**
168     Try to attach the POSIX thread to a session.
169     - This function attaches the POSIX thread to a session
170     in MAX_SESSION_ATTACH_TRIES tries when encountering
171     'out of memory' error, and terminates the server after
172     failed in MAX_SESSION_ATTACH_TRIES tries.
173 
174     @param[in] backup_thd    The thd to restore to when object is destructed.
175     @param[in] new_thd       The thd to attach to.
176    */
177 
Thd_backup_and_restore(THD * backup_thd,THD * new_thd)178   Thd_backup_and_restore(THD *backup_thd, THD *new_thd)
179     : m_backup_thd(backup_thd), m_new_thd(new_thd),
180       m_new_thd_old_real_id(new_thd->real_id)
181   {
182     assert(m_backup_thd != NULL && m_new_thd != NULL);
183     // Reset the state of the current thd.
184     m_backup_thd->restore_globals();
185     int i= 0;
186     /*
187       Attach the POSIX thread to a session in MAX_SESSION_ATTACH_TRIES
188       tries when encountering 'out of memory' error.
189     */
190     while (i < MAX_SESSION_ATTACH_TRIES)
191     {
192       /*
193         Currently attach_to(...) returns ER_OUTOFMEMORY or 0. So
194         we continue to attach the POSIX thread when encountering
195         the ER_OUTOFMEMORY error. Please take care other error
196         returned from attach_to(...) in future.
197       */
198       if (!attach_to(new_thd))
199       {
200         if (i > 0)
201           sql_print_warning("Server overcomes the temporary 'out of memory' "
202                             "in '%d' tries while attaching to session thread "
203                             "during the group commit phase.\n", i + 1);
204         break;
205       }
206       /* Sleep 1 microsecond per try to avoid temporary 'out of memory' */
207       my_sleep(1);
208       i++;
209     }
210     /*
211       Terminate the server after failed to attach the POSIX thread
212       to a session in MAX_SESSION_ATTACH_TRIES tries.
213     */
214     if (MAX_SESSION_ATTACH_TRIES == i)
215     {
216       my_safe_print_system_time();
217       my_safe_printf_stderr("%s", "[Fatal] Out of memory while attaching to "
218                             "session thread during the group commit phase. "
219                             "Data consistency between master and slave can "
220                             "be guaranteed after server restarts.\n");
221       _exit(MYSQLD_FAILURE_EXIT);
222     }
223   }
224 
225   /**
226       Restores to previous thd.
227    */
~Thd_backup_and_restore()228   ~Thd_backup_and_restore()
229   {
230     /*
231       Restore the global variables of the thd we previously attached to,
232       to its original state. In other words, detach the m_new_thd.
233     */
234     m_new_thd->restore_globals();
235     m_new_thd->real_id= m_new_thd_old_real_id;
236 
237     // Reset the global variables to the original state.
238     if (unlikely(m_backup_thd->store_globals()))
239       assert(0);                           // Out of memory?!
240   }
241 
242 private:
243 
244   /**
245     Attach the POSIX thread to a session.
246    */
attach_to(THD * thd)247   int attach_to(THD *thd)
248   {
249     if (DBUG_EVALUATE_IF("simulate_session_attach_error", 1, 0)
250         || unlikely(thd->store_globals()))
251     {
252       /*
253         Indirectly uses pthread_setspecific, which can only return
254         ENOMEM or EINVAL. Since store_globals are using correct keys,
255         the only alternative is out of memory.
256       */
257       return ER_OUTOFMEMORY;
258     }
259     return 0;
260   }
261 
262   THD *m_backup_thd;
263   THD *m_new_thd;
264   my_thread_t m_new_thd_old_real_id;
265 };
266 
267 #endif /* !EMBEDDED_LIBRARY */
268 
269 /**
270   Caches for non-transactional and transactional data before writing
271   it to the binary log.
272 
273   @todo All the access functions for the flags suggest that the
274   encapsuling is not done correctly, so try to move any logic that
275   requires access to the flags into the cache.
276 */
277 class binlog_cache_data
278 {
279 public:
280 
binlog_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg,const IO_CACHE & cache_log_arg)281   binlog_cache_data(bool trx_cache_arg,
282                     my_off_t max_binlog_cache_size_arg,
283                     ulong *ptr_binlog_cache_use_arg,
284                     ulong *ptr_binlog_cache_disk_use_arg,
285                     const IO_CACHE &cache_log_arg)
286   : cache_log(cache_log_arg),
287     m_pending(0),
288     saved_max_binlog_cache_size(max_binlog_cache_size_arg),
289     ptr_binlog_cache_use(ptr_binlog_cache_use_arg),
290     ptr_binlog_cache_disk_use(ptr_binlog_cache_disk_use_arg)
291   {
292     reset();
293     flags.transactional= trx_cache_arg;
294     cache_log.end_of_file= saved_max_binlog_cache_size;
295   }
296 
297   int finalize(THD *thd, Log_event *end_event);
298   int finalize(THD *thd, Log_event *end_event, XID_STATE *xs);
299   int flush(THD *thd, my_off_t *bytes, bool *wrote_xid);
300   int write_event(THD *thd, Log_event *event);
301 
~binlog_cache_data()302   virtual ~binlog_cache_data()
303   {
304     assert(is_binlog_empty());
305     close_cached_file(&cache_log);
306   }
307 
is_binlog_empty() const308   bool is_binlog_empty() const
309   {
310     my_off_t pos= my_b_tell(&cache_log);
311     DBUG_PRINT("debug", ("%s_cache - pending: 0x%llx, bytes: %llu",
312                          (flags.transactional ? "trx" : "stmt"),
313                          (ulonglong) pending(), (ulonglong) pos));
314     return pending() == NULL && pos == 0;
315   }
316 
is_finalized() const317   bool is_finalized() const {
318     return flags.finalized;
319   }
320 
pending() const321   Rows_log_event *pending() const
322   {
323     return m_pending;
324   }
325 
set_pending(Rows_log_event * const pending)326   void set_pending(Rows_log_event *const pending)
327   {
328     m_pending= pending;
329   }
330 
set_incident(void)331   void set_incident(void)
332   {
333     flags.incident= true;
334   }
335 
has_incident(void) const336   bool has_incident(void) const
337   {
338     return flags.incident;
339   }
340 
341   /**
342     Sets the binlog_cache_data::Flags::flush_error flag if there
343     is an error while flushing cache to the file.
344 
345     @param thd  The client thread that is executing the transaction.
346   */
set_flush_error(THD * thd)347   void set_flush_error(THD *thd)
348   {
349     flags.flush_error= true;
350     if(is_trx_cache())
351     {
352       /*
353          If the cache is a transactional cache and if the write
354          has failed due to ENOSPC, then my_write() would have
355          set EE_WRITE error, so clear the error and create an
356          equivalent server error.
357       */
358       if (thd->is_error())
359         thd->clear_error();
360       char errbuf[MYSYS_STRERROR_SIZE];
361       my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), my_filename(cache_log.file),
362           errno, my_strerror(errbuf, sizeof(errbuf), errno));
363     }
364   }
365 
get_flush_error(void) const366   bool get_flush_error(void) const
367   {
368     return flags.flush_error;
369   }
370 
has_xid() const371   bool has_xid() const {
372     // There should only be an XID event if we are transactional
373     assert((flags.transactional && flags.with_xid) || !flags.with_xid);
374     return flags.with_xid;
375   }
376 
is_trx_cache() const377   bool is_trx_cache() const
378   {
379     return flags.transactional;
380   }
381 
get_byte_position() const382   my_off_t get_byte_position() const
383   {
384     return my_b_tell(&cache_log);
385   }
386 
cache_state_rollback(my_off_t pos_to_rollback)387   void cache_state_rollback(my_off_t pos_to_rollback)
388   {
389     if (pos_to_rollback)
390     {
391       std::map<my_off_t,cache_state>::iterator it;
392       it = cache_state_map.find(pos_to_rollback);
393       if (it != cache_state_map.end())
394       {
395         flags.with_rbr= it->second.with_rbr;
396         flags.with_sbr= it->second.with_sbr;
397         flags.with_start= it->second.with_start;
398         flags.with_end= it->second.with_end;
399         flags.with_content= it->second.with_content;
400       }
401       else
402         assert(it == cache_state_map.end());
403     }
404     // Rolling back to pos == 0 means cleaning up the cache.
405     else
406     {
407       flags.with_rbr= false;
408       flags.with_sbr= false;
409       flags.with_start= false;
410       flags.with_end= false;
411       flags.with_content= false;
412     }
413   }
414 
cache_state_checkpoint(my_off_t pos_to_checkpoint)415   void cache_state_checkpoint(my_off_t pos_to_checkpoint)
416   {
417     // We only need to store the cache state for pos > 0
418     if (pos_to_checkpoint)
419     {
420       cache_state state;
421       state.with_rbr= flags.with_rbr;
422       state.with_sbr= flags.with_sbr;
423       state.with_start= flags.with_start;
424       state.with_end= flags.with_end;
425       state.with_content= flags.with_content;
426       cache_state_map[pos_to_checkpoint]= state;
427     }
428   }
429 
reset()430   virtual void reset()
431   {
432     compute_statistics();
433     truncate(0);
434 
435     /*
436       If IOCACHE has a file associated, change its size to 0.
437       It is safer to do it here, since we are certain that one
438       asked the cache to go to position 0 with truncate.
439     */
440     if(cache_log.file != -1)
441     {
442       int error= 0;
443       if((error= my_chsize(cache_log.file, 0, 0, MYF(MY_WME))))
444         sql_print_warning("Unable to resize binlog IOCACHE auxilary file");
445 
446       DBUG_EXECUTE_IF("show_io_cache_size",
447                       {
448                         my_off_t file_size= my_seek(cache_log.file,
449                                                     0L,MY_SEEK_END,MYF(MY_WME+MY_FAE));
450                         sql_print_error("New size:%llu",
451                                         static_cast<ulonglong>(file_size));
452                       });
453     }
454 
455     flags.incident= false;
456     flags.with_xid= false;
457     flags.immediate= false;
458     flags.finalized= false;
459     flags.with_sbr= false;
460     flags.with_rbr= false;
461     flags.with_start= false;
462     flags.with_end= false;
463     flags.with_content= false;
464     flags.flush_error= false;
465 
466     /*
467       The truncate function calls reinit_io_cache that calls my_b_flush_io_cache
468       which may increase disk_writes. This breaks the disk_writes use by the
469       binary log which aims to compute the ratio between in-memory cache usage
470       and disk cache usage. To avoid this undesirable behavior, we reset the
471       variable after truncating the cache.
472     */
473     cache_log.disk_writes= 0;
474     cache_state_map.clear();
475     assert(is_binlog_empty());
476   }
477 
478   /*
479     Sets the write position to point at the position given. If the
480     cache has swapped to a file, it reinitializes it, so that the
481     proper data is added to the IO_CACHE buffer. Otherwise, it just
482     does a my_b_seek.
483 
484     my_b_seek will not work if the cache has swapped, that's why
485     we do this workaround.
486 
487     @param[IN]  pos the new write position.
488     @param[IN]  use_reinit if the position should be reset resorting
489                 to reset_io_cache (which may issue a flush_io_cache
490                 inside)
491 
492     @return The previous write position.
493    */
reset_write_pos(my_off_t pos,bool use_reinit)494   my_off_t reset_write_pos(my_off_t pos, bool use_reinit)
495   {
496     DBUG_ENTER("reset_write_pos");
497     assert(cache_log.type == WRITE_CACHE);
498 
499     my_off_t oldpos= get_byte_position();
500 
501     if (use_reinit)
502     {
503       MY_ATTRIBUTE((unused)) int reinit_res=
504         reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, 0);
505       assert(reinit_res == 0);
506     }
507     else
508       my_b_seek(&cache_log, pos);
509 
510     DBUG_RETURN(oldpos);
511   }
512 
513   /*
514     Cache to store data before copying it to the binary log.
515   */
516   IO_CACHE cache_log;
517 
518   /**
519     Returns information about the cache content with respect to
520     the binlog_format of the events.
521 
522     This will be used to set a flag on GTID_LOG_EVENT stating that the
523     transaction may have SBR statements or not, but the binlog dump
524     will show this flag as "rbr_only" when it is not set. That's why
525     an empty transaction should return true below, or else an empty
526     transaction would be assumed as "rbr_only" even not having RBR
527     events.
528 
529     When dumping a binary log content using mysqlbinlog client program,
530     for any transaction assumed as "rbr_only" it will be printed a
531     statement changing the transaction isolation level to READ COMMITTED.
532     It doesn't make sense to have an empty transaction "requiring" this
533     isolation level change.
534 
535     @return true  The cache have SBR events or is empty.
536     @return false The cache contains a transaction with no SBR events.
537    */
may_have_sbr_stmts()538   bool may_have_sbr_stmts()
539   {
540     return flags.with_sbr || !flags.with_rbr;
541   }
542 
543   /**
544     Check if the binlog cache contains an empty transaction, which has
545     two binlog events "BEGIN" and "COMMIT".
546 
547     @return true  The binlog cache contains an empty transaction.
548     @return false Otherwise.
549   */
has_empty_transaction()550   bool has_empty_transaction()
551   {
552     /*
553       The empty transaction has two events in trx/stmt binlog cache
554       and no changes (no SBR changing content and no RBR events).
555       Other transaction should not have two events. So we can identify
556       if this is an empty transaction by the event counter and the
557       cache flags.
558     */
559     if (flags.with_start &&     // Has transaction start statement
560             flags.with_end &&   // Has transaction end statement
561             !flags.with_sbr &&  // No statements changing content
562             !flags.with_rbr &&  // No rows changing content
563             !flags.immediate && // Not a DDL
564             !flags.with_xid &&  // Not a XID transaction and not an atomic DDL Query
565             !flags.with_content)// Does not have any content
566     {
567       assert(!flags.with_sbr); // No statements changing content
568       assert(!flags.with_rbr); // No rows changing content
569       assert(!flags.immediate);// Not a DDL
570       assert(!flags.with_xid); // Not a XID trx and not an atomic DDL Query
571 
572       return true;
573     }
574     return false;
575   }
576 
577   /**
578     Check if the binlog cache is empty or contains an empty transaction,
579     which has two binlog events "BEGIN" and "COMMIT".
580 
581     @return true  The binlog cache is empty or contains an empty transaction.
582     @return false Otherwise.
583   */
is_empty_or_has_empty_transaction()584   bool is_empty_or_has_empty_transaction()
585   {
586     return is_binlog_empty() || has_empty_transaction();
587   }
588 
589 protected:
590   /*
591     This structure should have all cache variables/flags that should be restored
592     when a ROLLBACK TO SAVEPOINT statement be executed.
593   */
594   struct cache_state
595   {
596     bool with_sbr;
597     bool with_rbr;
598     bool with_start;
599     bool with_end;
600     bool with_content;
601   };
602   /*
603     For every SAVEPOINT used, we will store a cache_state for the current
604     binlog cache position. So, if a ROLLBACK TO SAVEPOINT is used, we can
605     restore the cache_state values after truncating the binlog cache.
606   */
607   std::map<my_off_t, cache_state> cache_state_map;
608 
609   /*
610     It truncates the cache to a certain position. This includes deleting the
611     pending event.
612    */
truncate(my_off_t pos)613   void truncate(my_off_t pos)
614   {
615     DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos));
616     remove_pending_event();
617     /*
618       Whenever there is an error while flushing cache to file,
619       the local cache will not be in a normal state and the same
620       cache cannot be used without facing an assert.
621       So, clear the cache if there is a flush error.
622     */
623     MY_ATTRIBUTE((unused)) int reinit_res=
624       reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, get_flush_error());
625     assert(reinit_res == 0);
626     cache_log.end_of_file= saved_max_binlog_cache_size;
627   }
628 
629   /**
630      Flush pending event to the cache buffer.
631    */
flush_pending_event(THD * thd)632   int flush_pending_event(THD *thd) {
633     if (m_pending)
634     {
635       m_pending->set_flags(Rows_log_event::STMT_END_F);
636       if (int error= write_event(thd, m_pending))
637         return error;
638       thd->clear_binlog_table_maps();
639     }
640     return 0;
641   }
642 
643   /**
644     Remove the pending event.
645    */
remove_pending_event()646   int remove_pending_event() {
647     delete m_pending;
648     m_pending= NULL;
649     return 0;
650   }
651   struct Flags {
652     /*
653       Defines if this is either a trx-cache or stmt-cache, respectively, a
654       transactional or non-transactional cache.
655     */
656     bool transactional:1;
657 
658     /*
659       This indicates that some events did not get into the cache and most likely
660       it is corrupted.
661     */
662     bool incident:1;
663 
664     /*
665       This indicates that the cache should be written without BEGIN/END.
666     */
667     bool immediate:1;
668 
669     /*
670       This flag indicates that the buffer was finalized and has to be
671       flushed to disk.
672      */
673     bool finalized:1;
674 
675     /*
676       This indicates that the cache contain an XID event.
677      */
678     bool with_xid:1;
679 
680     /*
681       This indicates that the cache contain statements changing content.
682     */
683     bool with_sbr:1;
684 
685     /*
686       This indicates that the cache contain RBR event changing content.
687     */
688     bool with_rbr:1;
689 
690     /*
691       This indicates that the cache contain s transaction start statement.
692     */
693     bool with_start:1;
694 
695     /*
696       This indicates that the cache contain a transaction end event.
697     */
698     bool with_end:1;
699 
700     /*
701       This indicates that the cache contain content other than START/END.
702     */
703     bool with_content:1;
704 
705     /*
706       This flag is set to 'true' when there is an error while flushing the
707       I/O cache to file.
708     */
709     bool flush_error:1;
710   } flags;
711 
712 private:
713   /*
714     Pending binrows event. This event is the event where the rows are currently
715     written.
716    */
717   Rows_log_event *m_pending;
718 
719   /**
720     This function computes binlog cache and disk usage.
721   */
compute_statistics()722   void compute_statistics()
723   {
724     if (!is_binlog_empty())
725     {
726       (*ptr_binlog_cache_use)++;
727       if (cache_log.disk_writes != 0)
728         (*ptr_binlog_cache_disk_use)++;
729     }
730   }
731 
732   /*
733     Stores the values of maximum size of the cache allowed when this cache
734     is configured. This corresponds to either
735       . max_binlog_cache_size or max_binlog_stmt_cache_size.
736   */
737   my_off_t saved_max_binlog_cache_size;
738 
739   /*
740     Stores a pointer to the status variable that keeps track of the in-memory
741     cache usage. This corresponds to either
742       . binlog_cache_use or binlog_stmt_cache_use.
743   */
744   ulong *ptr_binlog_cache_use;
745 
746   /*
747     Stores a pointer to the status variable that keeps track of the disk
748     cache usage. This corresponds to either
749       . binlog_cache_disk_use or binlog_stmt_cache_disk_use.
750   */
751   ulong *ptr_binlog_cache_disk_use;
752 
753   binlog_cache_data& operator=(const binlog_cache_data& info);
754   binlog_cache_data(const binlog_cache_data& info);
755 };
756 
757 
758 class binlog_stmt_cache_data
759   : public binlog_cache_data
760 {
761 public:
binlog_stmt_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg,const IO_CACHE & cache_log)762   binlog_stmt_cache_data(bool trx_cache_arg,
763                         my_off_t max_binlog_cache_size_arg,
764                         ulong *ptr_binlog_cache_use_arg,
765                         ulong *ptr_binlog_cache_disk_use_arg,
766                         const IO_CACHE &cache_log)
767     : binlog_cache_data(trx_cache_arg,
768                         max_binlog_cache_size_arg,
769                         ptr_binlog_cache_use_arg,
770                         ptr_binlog_cache_disk_use_arg,
771                         cache_log)
772   {
773   }
774 
775   using binlog_cache_data::finalize;
776 
777   int finalize(THD *thd);
778 };
779 
780 
781 int
finalize(THD * thd)782 binlog_stmt_cache_data::finalize(THD *thd)
783 {
784   if (flags.immediate)
785   {
786     if (int error= finalize(thd, NULL))
787       return error;
788   }
789   else
790   {
791     Query_log_event
792       end_evt(thd, STRING_WITH_LEN("COMMIT"), false, false, true, 0, true);
793     if (int error= finalize(thd, &end_evt))
794       return error;
795   }
796   return 0;
797 }
798 
799 
800 class binlog_trx_cache_data : public binlog_cache_data
801 {
802 public:
binlog_trx_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg,const IO_CACHE & cache_log)803   binlog_trx_cache_data(bool trx_cache_arg,
804                         my_off_t max_binlog_cache_size_arg,
805                         ulong *ptr_binlog_cache_use_arg,
806                         ulong *ptr_binlog_cache_disk_use_arg,
807                         const IO_CACHE &cache_log)
808   : binlog_cache_data(trx_cache_arg,
809                       max_binlog_cache_size_arg,
810                       ptr_binlog_cache_use_arg,
811                       ptr_binlog_cache_disk_use_arg,
812                       cache_log),
813     m_cannot_rollback(FALSE), before_stmt_pos(MY_OFF_T_UNDEF)
814   {   }
815 
reset()816   void reset()
817   {
818     DBUG_ENTER("reset");
819     DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
820     m_cannot_rollback= FALSE;
821     before_stmt_pos= MY_OFF_T_UNDEF;
822     binlog_cache_data::reset();
823     DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
824     DBUG_VOID_RETURN;
825   }
826 
cannot_rollback() const827   bool cannot_rollback() const
828   {
829     return m_cannot_rollback;
830   }
831 
set_cannot_rollback()832   void set_cannot_rollback()
833   {
834     m_cannot_rollback= TRUE;
835   }
836 
get_prev_position() const837   my_off_t get_prev_position() const
838   {
839      return before_stmt_pos;
840   }
841 
set_prev_position(my_off_t pos)842   void set_prev_position(my_off_t pos)
843   {
844     DBUG_ENTER("set_prev_position");
845     DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
846     before_stmt_pos= pos;
847     cache_state_checkpoint(before_stmt_pos);
848     DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
849     DBUG_VOID_RETURN;
850   }
851 
restore_prev_position()852   void restore_prev_position()
853   {
854     DBUG_ENTER("restore_prev_position");
855     DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
856     binlog_cache_data::truncate(before_stmt_pos);
857     cache_state_rollback(before_stmt_pos);
858     before_stmt_pos= MY_OFF_T_UNDEF;
859     DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
860     DBUG_VOID_RETURN;
861   }
862 
restore_savepoint(my_off_t pos)863   void restore_savepoint(my_off_t pos)
864   {
865     DBUG_ENTER("restore_savepoint");
866     DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
867     binlog_cache_data::truncate(pos);
868     if (pos <= before_stmt_pos)
869       before_stmt_pos= MY_OFF_T_UNDEF;
870     cache_state_rollback(pos);
871     DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
872     DBUG_VOID_RETURN;
873   }
874 
875   using binlog_cache_data::truncate;
876 
877   int truncate(THD *thd, bool all);
878 
879 private:
880   /*
881     It will be set TRUE if any statement which cannot be rolled back safely
882     is put in trx_cache.
883   */
884   bool m_cannot_rollback;
885 
886   /*
887     Binlog position before the start of the current statement.
888   */
889   my_off_t before_stmt_pos;
890 
891   binlog_trx_cache_data& operator=(const binlog_trx_cache_data& info);
892   binlog_trx_cache_data(const binlog_trx_cache_data& info);
893 };
894 
895 class binlog_cache_mngr {
896 public:
binlog_cache_mngr(my_off_t max_binlog_stmt_cache_size_arg,ulong * ptr_binlog_stmt_cache_use_arg,ulong * ptr_binlog_stmt_cache_disk_use_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg,const IO_CACHE & stmt_cache_log,const IO_CACHE & trx_cache_log)897   binlog_cache_mngr(my_off_t max_binlog_stmt_cache_size_arg,
898                     ulong *ptr_binlog_stmt_cache_use_arg,
899                     ulong *ptr_binlog_stmt_cache_disk_use_arg,
900                     my_off_t max_binlog_cache_size_arg,
901                     ulong *ptr_binlog_cache_use_arg,
902                     ulong *ptr_binlog_cache_disk_use_arg,
903                     const IO_CACHE &stmt_cache_log,
904                     const IO_CACHE &trx_cache_log)
905   : stmt_cache(FALSE, max_binlog_stmt_cache_size_arg,
906                ptr_binlog_stmt_cache_use_arg,
907                ptr_binlog_stmt_cache_disk_use_arg,
908                stmt_cache_log),
909     trx_cache(TRUE, max_binlog_cache_size_arg,
910               ptr_binlog_cache_use_arg,
911               ptr_binlog_cache_disk_use_arg,
912               trx_cache_log),
913     has_logged_xid(NULL)
914   {  }
915 
get_binlog_cache_data(bool is_transactional)916   binlog_cache_data* get_binlog_cache_data(bool is_transactional)
917   {
918     if (is_transactional)
919       return &trx_cache;
920     else
921       return &stmt_cache;
922   }
923 
get_binlog_cache_log(bool is_transactional)924   IO_CACHE* get_binlog_cache_log(bool is_transactional)
925   {
926     return (is_transactional ? &trx_cache.cache_log : &stmt_cache.cache_log);
927   }
928 
929   /**
930     Convenience method to check if both caches are empty.
931    */
is_binlog_empty() const932   bool is_binlog_empty() const {
933     return stmt_cache.is_binlog_empty() && trx_cache.is_binlog_empty();
934   }
935 
936   /*
937     clear stmt_cache and trx_cache if they are not empty
938   */
reset()939   void reset()
940   {
941     if (!stmt_cache.is_binlog_empty())
942       stmt_cache.reset();
943     if (!trx_cache.is_binlog_empty())
944       trx_cache.reset();
945   }
946 
947 #ifndef NDEBUG
dbug_any_finalized() const948   bool dbug_any_finalized() const {
949     return stmt_cache.is_finalized() || trx_cache.is_finalized();
950   }
951 #endif
952 
953   /*
954     Convenience method to flush both caches to the binary log.
955 
956     @param bytes_written Pointer to variable that will be set to the
957                          number of bytes written for the flush.
958     @param wrote_xid     Pointer to variable that will be set to @c
959                          true if any XID event was written to the
960                          binary log. Otherwise, the variable will not
961                          be touched.
962     @return Error code on error, zero if no error.
963    */
flush(THD * thd,my_off_t * bytes_written,bool * wrote_xid)964   int flush(THD *thd, my_off_t *bytes_written, bool *wrote_xid)
965   {
966     my_off_t stmt_bytes= 0;
967     my_off_t trx_bytes= 0;
968     assert(stmt_cache.has_xid() == 0);
969     int error= stmt_cache.flush(thd, &stmt_bytes, wrote_xid);
970     if (error)
971       return error;
972     DEBUG_SYNC(thd, "after_flush_stm_cache_before_flush_trx_cache");
973     if (int error= trx_cache.flush(thd, &trx_bytes, wrote_xid))
974       return error;
975     *bytes_written= stmt_bytes + trx_bytes;
976     return 0;
977   }
978 
979   /**
980     Check if at least one of transacaction and statement binlog caches
981     contains an empty transaction, other one is empty or contains an
982     empty transaction.
983 
984     @return true  At least one of transacaction and statement binlog
985                   caches an empty transaction, other one is emptry
986                   or contains an empty transaction.
987     @return false Otherwise.
988   */
has_empty_transaction()989   bool has_empty_transaction()
990   {
991     return (trx_cache.is_empty_or_has_empty_transaction() &&
992             stmt_cache.is_empty_or_has_empty_transaction() &&
993             !is_binlog_empty());
994   }
995 
996   /**
997     Check if manager contains consistent snapshot of log coordinates
998     and gtid_executed.
999 
1000     @return true  Consistent snapshot available
1001     @return false Otherwise
1002    */
has_consistent_snapshot() const1003   bool has_consistent_snapshot() const
1004   {
1005     /**
1006       snapshot_gtid_executed can be empty string
1007       if gtid_mode=OFF.
1008      */
1009 
1010     return binlog_info.log_file_name[0] != '\0';
1011   }
1012 
1013   /**
1014     Removes consistent snapshot from cache.
1015    */
drop_consistent_snapshot()1016   void drop_consistent_snapshot()
1017   {
1018     binlog_info.log_file_name[0]= '\0';
1019     snapshot_gtid_executed.clear();
1020   }
1021 
1022   binlog_stmt_cache_data stmt_cache;
1023   binlog_trx_cache_data trx_cache;
1024 
1025   LOG_INFO    binlog_info;
1026   std::string snapshot_gtid_executed;
1027 
1028   /*
1029     The bool flag is for preventing do_binlog_xa_commit_rollback()
1030     execution twice which can happen for "external" xa commit/rollback.
1031   */
1032   bool has_logged_xid;
1033 private:
1034 
1035   binlog_cache_mngr& operator=(const binlog_cache_mngr& info);
1036   binlog_cache_mngr(const binlog_cache_mngr& info);
1037 };
1038 
1039 
thd_get_cache_mngr(const THD * thd)1040 static binlog_cache_mngr *thd_get_cache_mngr(const THD *thd)
1041 {
1042   /*
1043     If opt_bin_log is not set, binlog_hton->slot == -1 and hence
1044     thd_get_ha_data(thd, hton) segfaults.
1045   */
1046   assert(opt_bin_log);
1047   return (binlog_cache_mngr *)thd_get_ha_data(thd, binlog_hton);
1048 }
1049 
1050 
1051 /**
1052   Checks if the BINLOG_CACHE_SIZE's value is greater than MAX_BINLOG_CACHE_SIZE.
1053   If this happens, the BINLOG_CACHE_SIZE is set to MAX_BINLOG_CACHE_SIZE.
1054 */
check_binlog_cache_size(THD * thd)1055 void check_binlog_cache_size(THD *thd)
1056 {
1057   if (binlog_cache_size > max_binlog_cache_size)
1058   {
1059     if (thd)
1060     {
1061       push_warning_printf(thd, Sql_condition::SL_WARNING,
1062                           ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX,
1063                           ER(ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX),
1064                           (ulong) binlog_cache_size,
1065                           (ulong) max_binlog_cache_size);
1066     }
1067     else
1068     {
1069       sql_print_warning(ER_DEFAULT(ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX),
1070                         binlog_cache_size,
1071                         (ulong) max_binlog_cache_size);
1072     }
1073     binlog_cache_size= static_cast<ulong>(max_binlog_cache_size);
1074   }
1075 }
1076 
1077 /**
1078   Checks if the BINLOG_STMT_CACHE_SIZE's value is greater than MAX_BINLOG_STMT_CACHE_SIZE.
1079   If this happens, the BINLOG_STMT_CACHE_SIZE is set to MAX_BINLOG_STMT_CACHE_SIZE.
1080 */
check_binlog_stmt_cache_size(THD * thd)1081 void check_binlog_stmt_cache_size(THD *thd)
1082 {
1083   if (binlog_stmt_cache_size > max_binlog_stmt_cache_size)
1084   {
1085     if (thd)
1086     {
1087       push_warning_printf(thd, Sql_condition::SL_WARNING,
1088                           ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX,
1089                           ER(ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX),
1090                           (ulong) binlog_stmt_cache_size,
1091                           (ulong) max_binlog_stmt_cache_size);
1092     }
1093     else
1094     {
1095       sql_print_warning(ER_DEFAULT(ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX),
1096                         binlog_stmt_cache_size,
1097                         (ulong) max_binlog_stmt_cache_size);
1098     }
1099     binlog_stmt_cache_size= static_cast<ulong>(max_binlog_stmt_cache_size);
1100   }
1101 }
1102 
1103 /**
1104  Check whether binlog_hton has valid slot and enabled
1105 */
binlog_enabled()1106 bool binlog_enabled()
1107 {
1108 	return(binlog_hton && binlog_hton->slot != HA_SLOT_UNDEF);
1109 }
1110 
1111  /*
1112   Save position of binary log transaction cache.
1113 
1114   SYNPOSIS
1115     binlog_trans_log_savepos()
1116 
1117     thd      The thread to take the binlog data from
1118     pos      Pointer to variable where the position will be stored
1119 
1120   DESCRIPTION
1121 
1122     Save the current position in the binary log transaction cache into
1123     the variable pointed to by 'pos'
1124  */
1125 
1126 static void
binlog_trans_log_savepos(THD * thd,my_off_t * pos)1127 binlog_trans_log_savepos(THD *thd, my_off_t *pos)
1128 {
1129   DBUG_ENTER("binlog_trans_log_savepos");
1130   assert(pos != NULL);
1131   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
1132   assert(mysql_bin_log.is_open());
1133   *pos= cache_mngr->trx_cache.get_byte_position();
1134   DBUG_PRINT("return", ("position: %lu", (ulong) *pos));
1135   cache_mngr->trx_cache.cache_state_checkpoint(*pos);
1136   DBUG_VOID_RETURN;
1137 }
1138 
binlog_dummy_recover(handlerton * hton,XID * xid,uint len)1139 static int binlog_dummy_recover(handlerton *hton, XID *xid, uint len)
1140 {
1141   return 0;
1142 }
1143 
1144 /**
1145   Auxiliary class to copy serialized events to the binary log and
1146   correct some of the fields that are not known until just before
1147   writing the event.
1148 
1149   This class allows feeding events in parts, so it is practical to use
1150   in do_write_cache() which reads events from an IO_CACHE where events
1151   may span mutiple cache pages.
1152 
1153   The following fields are fixed before writing the event:
1154   - end_log_pos is set
1155   - the checksum is computed if checksums are enabled
1156   - the length is incremented by the checksum size if checksums are enabled
1157 */
1158 class Binlog_event_writer
1159 {
1160   IO_CACHE *output_cache;
1161   bool have_checksum;
1162   ha_checksum initial_checksum;
1163   ha_checksum checksum;
1164   uint32 end_log_pos;
1165   THD *thd;
1166 
1167 public:
1168   /**
1169     Constructs a new Binlog_event_writer. Should be called once before
1170     starting to flush the transaction or statement cache to the
1171     binlog.
1172 
1173     @param output_cache_arg IO_CACHE to write to.
1174     @param thd_arg THD to account written binlog byte statistics to
1175     @param have_checksum_al
1176   */
Binlog_event_writer(IO_CACHE * output_cache_arg,THD * thd_arg)1177   Binlog_event_writer(IO_CACHE *output_cache_arg, THD *thd_arg)
1178     : output_cache(output_cache_arg),
1179       have_checksum(binlog_checksum_options !=
1180                     binary_log::BINLOG_CHECKSUM_ALG_OFF),
1181       initial_checksum(my_checksum(0L, NULL, 0)),
1182       checksum(initial_checksum),
1183       end_log_pos(my_b_tell(output_cache)),
1184       thd(thd_arg)
1185   {
1186     // Simulate checksum error
1187     if (DBUG_EVALUATE_IF("fault_injection_crc_value", 1, 0))
1188       checksum--;
1189   }
1190 
1191   Event_encrypter event_encrypter;
1192 
1193   /**
1194     Write part of an event to disk.
1195 
1196     @param buf_p[IN,OUT] Points to buffer with data to write.  The
1197     caller must set this initially, and it will be increased by the
1198     number of bytes written.
1199 
1200     @param buf_len_p[IN,OUT] Points to the remaining length of the
1201     buffer, i.e., from buf_p to the end of the buffer.  The caller
1202     must set this initially, and it will be decreased by the number of
1203     written bytes.
1204 
1205     @param event_len_p[IN,OUT] Points to the remaining length of the
1206     event, i.e., the size of the event minus what was already written.
1207     This must be initialized to zero by the caller, must be remembered
1208     by the caller between calls, and is updated by this function: when
1209     an event begins it is set to the length of the event, and for each
1210     call it is decreased by the number of written bytes.
1211 
1212     It is allowed that buf_len_p is less than event_len_p (i.e., event
1213     is only partial) and that event_len_p is less than buf_len_p
1214     (i.e., there is more than this event in the buffer).  This
1215     function will write as much as is available of one event, but
1216     never more than one.  It is required that buf_len_p >=
1217     LOG_EVENT_HEADER_LEN.
1218 
1219     @retval true Error, i.e., my_b_write failed.
1220     @retval false Success.
1221   */
write_event_part(uchar ** buf_p,uint32 * buf_len_p,uint32 * event_len_p)1222   bool write_event_part(uchar **buf_p, uint32 *buf_len_p, uint32 *event_len_p)
1223   {
1224     DBUG_ENTER("Binlog_event_writer::write_event_part");
1225 
1226     if (*buf_len_p == 0)
1227       DBUG_RETURN(false);
1228 
1229     size_t len= *event_len_p;
1230     uchar *pos= *buf_p;
1231 
1232     bool is_header= (*event_len_p == 0);
1233 
1234     // This is the beginning of an event
1235     if (*event_len_p == 0)
1236     {
1237       // Caller must ensure that the first part of the event contains
1238       // a full event header.
1239       assert(*buf_len_p >= LOG_EVENT_HEADER_LEN);
1240 
1241       // Read event length
1242       *event_len_p= uint4korr(*buf_p + EVENT_LEN_OFFSET);
1243 
1244       // Increase end_log_pos
1245       end_log_pos+= *event_len_p;
1246 
1247       // Change event length if checksum is enabled
1248       if (have_checksum)
1249       {
1250         int4store(*buf_p + EVENT_LEN_OFFSET,
1251                   *event_len_p + BINLOG_CHECKSUM_LEN);
1252         // end_log_pos is shifted by the checksum length
1253         end_log_pos+= BINLOG_CHECKSUM_LEN;
1254       }
1255 
1256       // Store end_log_pos
1257       int4store(*buf_p + LOG_POS_OFFSET, end_log_pos);
1258       assert(output_cache == mysql_bin_log.get_log_file());
1259 
1260       len= *event_len_p;
1261 
1262       if (event_encrypter.is_encryption_enabled())
1263       {
1264         uint32 write_bytes= std::min<uint32>(*buf_len_p, *event_len_p);
1265         len= write_bytes;
1266         assert(write_bytes > 0);
1267 
1268         // update the checksum
1269         if (have_checksum)
1270           checksum= my_checksum(checksum, *buf_p, write_bytes);
1271 
1272         if (event_encrypter.init(output_cache, pos, len))
1273           DBUG_RETURN(true);
1274       }
1275     }
1276 
1277     // write the buffer
1278     uint32 write_bytes= std::min<uint32>(*buf_len_p, len);
1279     assert(write_bytes > 0);
1280     if (event_encrypter.encrypt_and_write(output_cache, pos, write_bytes))
1281       DBUG_RETURN(true);
1282 
1283     if (event_encrypter.is_encryption_enabled() && is_header)
1284       write_bytes+=4;
1285     else if (have_checksum)
1286       checksum= my_checksum(checksum, *buf_p, write_bytes);
1287 
1288     // Step positions.
1289     *buf_p+= write_bytes;
1290     *buf_len_p-= write_bytes;
1291     *event_len_p-= write_bytes;
1292     thd->binlog_bytes_written+= write_bytes;
1293 
1294     if (*event_len_p == 0)
1295     {
1296       // store checksum
1297       if (have_checksum)
1298       {
1299         uchar checksum_buf[BINLOG_CHECKSUM_LEN];
1300         int4store(checksum_buf, checksum);
1301         if (event_encrypter.encrypt_and_write(output_cache, checksum_buf, BINLOG_CHECKSUM_LEN))
1302           DBUG_RETURN(true);
1303         thd->binlog_bytes_written+= BINLOG_CHECKSUM_LEN;
1304         checksum= initial_checksum;
1305       }
1306       if (event_encrypter.is_encryption_enabled() && event_encrypter.finish(output_cache))
1307         DBUG_RETURN(true);
1308     }
1309 
1310     DBUG_RETURN(false);
1311   }
1312 
1313   /**
1314     Write a full event to disk.
1315 
1316     This is a wrapper around write_event_part, which handles the
1317     special case where you have a complete event in the buffer.
1318 
1319     @param buf Buffer to write.
1320     @param buf_len Number of bytes to write.
1321 
1322     @retval true Error, i.e., my_b_write failed.
1323     @retval false Success.
1324   */
write_full_event(uchar * buf,uint32 buf_len)1325   bool write_full_event(uchar *buf, uint32 buf_len)
1326   {
1327     uint32 event_len_unused= 0;
1328     bool ret= write_event_part(&buf, &buf_len, &event_len_unused);
1329     assert(buf_len == 0);
1330     assert(event_len_unused == 0);
1331     return ret;
1332   }
1333 
1334 };
1335 
1336 
1337 /*
1338   this function is mostly a placeholder.
1339   conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open)
1340   should be moved here.
1341 */
1342 
binlog_init(void * p)1343 static int binlog_init(void *p)
1344 {
1345   binlog_hton= (handlerton *)p;
1346   binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO;
1347   binlog_hton->db_type=DB_TYPE_BINLOG;
1348   binlog_hton->savepoint_offset= sizeof(my_off_t);
1349   binlog_hton->close_connection= binlog_close_connection;
1350   binlog_hton->savepoint_set= binlog_savepoint_set;
1351   binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
1352   binlog_hton->savepoint_rollback_can_release_mdl=
1353                                      binlog_savepoint_rollback_can_release_mdl;
1354   binlog_hton->commit= binlog_commit;
1355   binlog_hton->commit_by_xid= binlog_xa_commit;
1356   binlog_hton->rollback= binlog_rollback;
1357   binlog_hton->rollback_by_xid= binlog_xa_rollback;
1358   binlog_hton->prepare= binlog_prepare;
1359   binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot;
1360   binlog_hton->clone_consistent_snapshot= binlog_clone_consistent_snapshot;
1361   binlog_hton->recover=binlog_dummy_recover;
1362   binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
1363   return 0;
1364 }
1365 
1366 
binlog_deinit(void * p)1367 static int binlog_deinit(void *p)
1368 {
1369   /* Using binlog as TC after the binlog has been unloaded, won't work */
1370   if (tc_log == &mysql_bin_log)
1371     tc_log= NULL;
1372   binlog_hton= NULL;
1373   return 0;
1374 }
1375 
1376 
binlog_close_connection(handlerton * hton,THD * thd)1377 static int binlog_close_connection(handlerton *hton, THD *thd)
1378 {
1379   DBUG_ENTER("binlog_close_connection");
1380   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
1381   assert(cache_mngr->is_binlog_empty());
1382   DBUG_PRINT("debug", ("Set ha_data slot %d to 0x%llx", binlog_hton->slot, (ulonglong) NULL));
1383   thd_set_ha_data(thd, binlog_hton, NULL);
1384   cache_mngr->~binlog_cache_mngr();
1385   my_free(cache_mngr);
1386   DBUG_RETURN(0);
1387 }
1388 
write_event(THD * thd,Log_event * ev)1389 int binlog_cache_data::write_event(THD *thd, Log_event *ev)
1390 {
1391   DBUG_ENTER("binlog_cache_data::write_event");
1392 
1393   if (ev != NULL)
1394   {
1395     DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
1396                   {DBUG_SET("+d,simulate_file_write_error");});
1397 
1398     DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
1399                   {
1400                   static int count= -1;
1401                   count++;
1402                   if(count %4 == 3 && ev->get_type_code() ==
1403                       binary_log::WRITE_ROWS_EVENT)
1404                     DBUG_SET("+d,simulate_temp_file_write_error");
1405                   });
1406     if (ev->write(&cache_log) != 0)
1407     {
1408       DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
1409                       {
1410                         DBUG_SET("-d,simulate_file_write_error");
1411                         DBUG_SET("-d,simulate_disk_full_at_flush_pending");
1412                         /*
1413                            after +d,simulate_file_write_error the local cache
1414                            is in unsane state. Since -d,simulate_file_write_error
1415                            revokes the first simulation do_write_cache()
1416                            can't be run without facing an assert.
1417                            So it's blocked with the following 2nd simulation:
1418                         */
1419                         DBUG_SET("+d,simulate_do_write_cache_failure");
1420                       });
1421 
1422       DBUG_EXECUTE_IF("simulate_temp_file_write_error",
1423                       {
1424                         DBUG_SET("-d,simulate_temp_file_write_error");
1425                       });
1426       /*
1427         If the flush has failed due to ENOSPC error, set the
1428         flush_error flag.
1429       */
1430       if (thd->is_error() && my_errno() == ENOSPC)
1431       {
1432         set_flush_error(thd);
1433       }
1434       DBUG_RETURN(1);
1435     }
1436     if (ev->get_type_code() == binary_log::XID_EVENT)
1437       flags.with_xid= true;
1438     if (ev->is_using_immediate_logging())
1439       flags.immediate= true;
1440     /* With respect to the event type being written */
1441     if (ev->is_sbr_logging_format())
1442       flags.with_sbr= true;
1443     if (ev->is_rbr_logging_format())
1444       flags.with_rbr= true;
1445 #ifndef EMBEDDED_LIBRARY
1446     /* With respect to empty transactions */
1447     if (ev->starts_group())
1448       flags.with_start= true;
1449     if (ev->ends_group())
1450       flags.with_end= true;
1451     if ((!ev->starts_group() && !ev->ends_group())
1452         ||ev->get_type_code() == binary_log::VIEW_CHANGE_EVENT)
1453       flags.with_content= true;
1454 #endif
1455   }
1456   DBUG_RETURN(0);
1457 }
1458 
assign_automatic_gtids_to_flush_group(THD * first_seen)1459 bool MYSQL_BIN_LOG::assign_automatic_gtids_to_flush_group(THD *first_seen)
1460 {
1461   DBUG_ENTER("MYSQL_BIN_LOG::assign_automatic_gtids_to_flush_group");
1462   bool error= false;
1463   bool is_global_sid_locked= false;
1464   rpl_sidno locked_sidno= 0;
1465 
1466   for (THD *head= first_seen ; head ; head = head->next_to_commit)
1467   {
1468     assert(head->variables.gtid_next.type != UNDEFINED_GROUP);
1469 
1470     /* Generate GTID */
1471     if (head->variables.gtid_next.type == AUTOMATIC_GROUP)
1472     {
1473       if (!is_global_sid_locked)
1474       {
1475         global_sid_lock->rdlock();
1476         is_global_sid_locked= true;
1477       }
1478       if (gtid_state->generate_automatic_gtid(head,
1479               head->get_transaction()->get_rpl_transaction_ctx()->get_sidno(),
1480               head->get_transaction()->get_rpl_transaction_ctx()->get_gno(),
1481               &locked_sidno)
1482               != RETURN_STATUS_OK)
1483       {
1484         head->commit_error= THD::CE_FLUSH_GNO_EXHAUSTED_ERROR;
1485         error= true;
1486       }
1487     }
1488     else
1489     {
1490       DBUG_PRINT("info", ("thd->variables.gtid_next.type=%d "
1491                           "thd->owned_gtid.sidno=%d",
1492                           head->variables.gtid_next.type,
1493                           head->owned_gtid.sidno));
1494       if (head->variables.gtid_next.type == GTID_GROUP)
1495         assert(head->owned_gtid.sidno > 0);
1496       else
1497       {
1498         assert(head->variables.gtid_next.type == ANONYMOUS_GROUP);
1499         assert(head->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS);
1500       }
1501     }
1502   }
1503 
1504   if (locked_sidno > 0)
1505     gtid_state->unlock_sidno(locked_sidno);
1506 
1507   if (is_global_sid_locked)
1508     global_sid_lock->unlock();
1509 
1510   DBUG_RETURN(error);
1511 }
1512 
1513 
1514 /**
1515   Write the Gtid_log_event to the binary log (prior to writing the
1516   statement or transaction cache).
1517 
1518   @param thd Thread that is committing.
1519   @param cache_data The cache that is flushing.
1520   @param writer The event will be written to this Binlog_event_writer object.
1521 
1522   @retval false Success.
1523   @retval true Error.
1524 */
write_gtid(THD * thd,binlog_cache_data * cache_data,Binlog_event_writer * writer)1525 bool MYSQL_BIN_LOG::write_gtid(THD *thd, binlog_cache_data *cache_data,
1526                                Binlog_event_writer *writer)
1527 {
1528   DBUG_ENTER("MYSQL_BIN_LOG::write_gtid");
1529 
1530   /*
1531     The GTID for the THD was assigned at
1532     assign_automatic_gtids_to_flush_group()
1533   */
1534   assert(thd->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS ||
1535          thd->owned_gtid.sidno > 0);
1536 
1537   int64 sequence_number, last_committed;
1538   /* Generate logical timestamps for MTS */
1539   m_dependency_tracker.get_dependency(thd, sequence_number, last_committed);
1540 
1541   /*
1542     In case both the transaction cache and the statement cache are
1543     non-empty, both will be flushed in sequence and logged as
1544     different transactions. Then the second transaction must only
1545     be executed after the first one has committed. Therefore, we
1546     need to set last_committed for the second transaction equal to
1547     last_committed for the first transaction. This is done in
1548     binlog_cache_data::flush. binlog_cache_data::flush uses the
1549     condition trn_ctx->last_committed==SEQ_UNINIT to detect this
1550     situation, hence the need to set it here.
1551   */
1552   thd->get_transaction()->last_committed= SEQ_UNINIT;
1553 
1554 
1555   /*
1556     Generate and write the Gtid_log_event.
1557   */
1558   Gtid_log_event gtid_event(thd, cache_data->is_trx_cache(),
1559                             last_committed, sequence_number,
1560                             cache_data->may_have_sbr_stmts());
1561   uchar buf[Gtid_log_event::MAX_EVENT_LENGTH];
1562   uint32 buf_len= gtid_event.write_to_memory(buf);
1563   bool ret= writer->write_full_event(buf, buf_len);
1564 
1565   DBUG_RETURN(ret);
1566 }
1567 
1568 
gtid_end_transaction(THD * thd)1569 int MYSQL_BIN_LOG::gtid_end_transaction(THD *thd)
1570 {
1571   DBUG_ENTER("MYSQL_BIN_LOG::gtid_end_transaction");
1572 
1573   DBUG_PRINT("info", ("query=%s", thd->query().str));
1574 
1575   if (thd->owned_gtid.sidno > 0)
1576   {
1577     assert(thd->variables.gtid_next.type == GTID_GROUP);
1578 
1579     if (!opt_bin_log || (thd->slave_thread && !opt_log_slave_updates))
1580     {
1581       /*
1582         If the binary log is disabled for this thread (either by
1583         log_bin=0 or sql_log_bin=0 or by log_slave_updates=0 for a
1584         slave thread), then the statement must not be written to the
1585         binary log.  In this case, we just save the GTID into the
1586         table directly.
1587 
1588         (This only happens for DDL, since DML will save the GTID into
1589         table and release ownership inside ha_commit_trans.)
1590       */
1591       if (gtid_state->save(thd) != 0)
1592       {
1593         gtid_state->update_on_rollback(thd);
1594         DBUG_RETURN(1);
1595       }
1596       else
1597         gtid_state->update_on_commit(thd);
1598     }
1599     else
1600     {
1601       /*
1602         If statement is supposed to be written to binlog, we write it
1603         to the binary log.  Inserting into table and releasing
1604         ownership will be done in the binlog commit handler.
1605       */
1606 
1607       /*
1608         thd->cache_mngr may be uninitialized if the first transaction
1609         executed by the client is empty.
1610       */
1611       if (thd->binlog_setup_trx_data())
1612         DBUG_RETURN(1);
1613       binlog_cache_data *cache_data= &thd_get_cache_mngr(thd)->trx_cache;
1614 
1615       // Generate BEGIN event
1616       Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE,
1617                             FALSE, TRUE, 0, TRUE);
1618       assert(!qinfo.is_using_immediate_logging());
1619 
1620       /*
1621         Write BEGIN event and then commit (which will generate commit
1622         event and Gtid_log_event)
1623       */
1624       DBUG_PRINT("debug", ("Writing to trx_cache"));
1625       if (cache_data->write_event(thd, &qinfo) ||
1626           mysql_bin_log.commit(thd, true))
1627         DBUG_RETURN(1);
1628     }
1629   }
1630   else if (thd->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS ||
1631            /*
1632              A transaction with an empty owned gtid should call
1633              end_gtid_violating_transaction(...) to clear the
1634              flag thd->has_gtid_consistency_violatoin in case
1635              it is set. It missed the clear in ordered_commit,
1636              because its binlog transaction cache is empty.
1637            */
1638            thd->has_gtid_consistency_violation)
1639 
1640   {
1641     gtid_state->update_on_commit(thd);
1642   }
1643   else if (thd->variables.gtid_next.type == GTID_GROUP &&
1644            thd->owned_gtid.is_empty())
1645   {
1646     assert(thd->has_gtid_consistency_violation == false);
1647     gtid_state->update_on_commit(thd);
1648   }
1649 
1650   DBUG_RETURN(0);
1651 }
1652 
1653 /**
1654   This function finalizes the cache preparing for commit or rollback.
1655 
1656   The function just writes all the necessary events to the cache but
1657   does not flush the data to the binary log file. That is the role of
1658   the binlog_cache_data::flush function.
1659 
1660   @see binlog_cache_data::flush
1661 
1662   @param thd                The thread whose transaction should be flushed
1663   @param cache_data         Pointer to the cache
1664   @param end_ev             The end event either commit/rollback
1665 
1666   @return
1667     nonzero if an error pops up when flushing the cache.
1668 */
1669 int
finalize(THD * thd,Log_event * end_event)1670 binlog_cache_data::finalize(THD *thd, Log_event *end_event)
1671 {
1672   DBUG_ENTER("binlog_cache_data::finalize");
1673   if (!is_binlog_empty())
1674   {
1675     assert(!flags.finalized);
1676     if (int error= flush_pending_event(thd))
1677       DBUG_RETURN(error);
1678     if (int error= write_event(thd, end_event))
1679       DBUG_RETURN(error);
1680     flags.finalized= true;
1681     DBUG_PRINT("debug", ("flags.finalized: %s", YESNO(flags.finalized)));
1682   }
1683   DBUG_RETURN(0);
1684 }
1685 
1686 
1687 /**
1688    The method writes XA END query to XA-prepared transaction's cache
1689    and calls the "basic" finalize().
1690 
1691    @return error code, 0 success
1692 */
1693 
finalize(THD * thd,Log_event * end_event,XID_STATE * xs)1694 int binlog_cache_data::finalize(THD *thd, Log_event *end_event, XID_STATE *xs)
1695 {
1696   int error= 0;
1697   char buf[XID::ser_buf_size];
1698   char query[sizeof("XA END") + 1 + sizeof(buf)];
1699   int qlen= sprintf(query, "XA END %s", xs->get_xid()->serialize(buf));
1700   Query_log_event qev(thd, query, qlen, true, false, true, 0);
1701 
1702   if ((error= write_event(thd, &qev)))
1703     return error;
1704 
1705   return finalize(thd, end_event);
1706 }
1707 
1708 
1709 /**
1710   Flush caches to the binary log.
1711 
1712   If the cache is finalized, the cache will be flushed to the binary
1713   log file. If the cache is not finalized, nothing will be done.
1714 
1715   If flushing fails for any reason, an error will be reported and the
1716   cache will be reset. Flushing can fail in two circumstances:
1717 
1718   - It was not possible to write the cache to the file. In this case,
1719     it does not make sense to keep the cache.
1720 
1721   - The cache was successfully written to disk but post-flush actions
1722     (such as binary log rotation) failed. In this case, the cache is
1723     already written to disk and there is no reason to keep it.
1724 
1725   @see binlog_cache_data::finalize
1726  */
1727 int
flush(THD * thd,my_off_t * bytes_written,bool * wrote_xid)1728 binlog_cache_data::flush(THD *thd, my_off_t *bytes_written, bool *wrote_xid)
1729 {
1730   /*
1731     Doing a commit or a rollback including non-transactional tables,
1732     i.e., ending a transaction where we might write the transaction
1733     cache to the binary log.
1734 
1735     We can always end the statement when ending a transaction since
1736     transactions are not allowed inside stored functions. If they
1737     were, we would have to ensure that we're not ending a statement
1738     inside a stored function.
1739   */
1740   DBUG_ENTER("binlog_cache_data::flush");
1741   DBUG_PRINT("debug", ("flags.finalized: %s", YESNO(flags.finalized)));
1742   int error= 0;
1743   if (flags.finalized)
1744   {
1745     my_off_t bytes_in_cache= my_b_tell(&cache_log);
1746     Transaction_ctx *trn_ctx= thd->get_transaction();
1747 
1748     DBUG_PRINT("debug", ("bytes_in_cache: %llu", bytes_in_cache));
1749 
1750     trn_ctx->sequence_number= mysql_bin_log.m_dependency_tracker.step();
1751     /*
1752       In case of two caches the transaction is split into two groups.
1753       The 2nd group is considered to be a successor of the 1st rather
1754       than to have a common commit parent with it.
1755       Notice that due to a simple method of detection that the current is
1756       the 2nd cache being flushed, the very first few transactions may be logged
1757       sequentially (a next one is tagged as if a preceding one is its
1758       commit parent).
1759     */
1760     if (trn_ctx->last_committed == SEQ_UNINIT)
1761       trn_ctx->last_committed= trn_ctx->sequence_number - 1;
1762 
1763     /*
1764       The GTID is written prior to flushing the statement cache, if
1765       the transaction has written to the statement cache; and prior to
1766       flushing the transaction cache if the transaction has written to
1767       the transaction cache.  If GTIDs are enabled, then transactional
1768       and non-transactional updates cannot be mixed, so at most one of
1769       the caches can be non-empty, so just one GTID will be
1770       generated. If GTIDs are disabled, then no GTID is generated at
1771       all; if both the transactional cache and the statement cache are
1772       non-empty then we get two Anonymous_gtid_log_events, which is
1773       correct.
1774     */
1775     Binlog_event_writer writer(mysql_bin_log.get_log_file(), thd);
1776 
1777     if (mysql_bin_log.get_crypto_data()->is_enabled())
1778       writer.event_encrypter.enable_encryption(mysql_bin_log.get_crypto_data());
1779 
1780     /* The GTID ownership process might set the commit_error */
1781     error= (thd->commit_error == THD::CE_FLUSH_ERROR ||
1782            thd->commit_error == THD::CE_FLUSH_GNO_EXHAUSTED_ERROR);
1783 
1784     DBUG_EXECUTE_IF("simulate_binlog_flush_error",
1785                     {
1786                       if (rand() % 3 == 0)
1787                       {
1788                         thd->commit_error= THD::CE_FLUSH_ERROR;
1789                       }
1790                     };);
1791 
1792     if (!error)
1793       if ((error= mysql_bin_log.write_gtid(thd, this, &writer)))
1794         thd->commit_error= THD::CE_FLUSH_ERROR;
1795     if (!error)
1796       error= mysql_bin_log.write_cache(thd, this, &writer);
1797 
1798     if (flags.with_xid && error == 0)
1799       *wrote_xid= true;
1800 
1801     /*
1802       Reset have to be after the if above, since it clears the
1803       with_xid flag
1804     */
1805     reset();
1806     if (bytes_written)
1807       *bytes_written= bytes_in_cache;
1808   }
1809   assert(!flags.finalized);
1810   DBUG_RETURN(error);
1811 }
1812 
1813 /**
1814   This function truncates the transactional cache upon committing or rolling
1815   back either a transaction or a statement.
1816 
1817   @param thd        The thread whose transaction should be flushed
1818   @param cache_mngr Pointer to the cache data to be flushed
1819   @param all        @c true means truncate the transaction, otherwise the
1820                     statement must be truncated.
1821 
1822   @return
1823     nonzero if an error pops up when truncating the transactional cache.
1824 */
1825 int
truncate(THD * thd,bool all)1826 binlog_trx_cache_data::truncate(THD *thd, bool all)
1827 {
1828   DBUG_ENTER("binlog_trx_cache_data::truncate");
1829   int error=0;
1830 
1831   DBUG_PRINT("info", ("thd->options={ %s %s}, transaction: %s",
1832                       FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
1833                       FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
1834                       all ? "all" : "stmt"));
1835 
1836   remove_pending_event();
1837 
1838   /*
1839     If rolling back an entire transaction or a single statement not
1840     inside a transaction, we reset the transaction cache.
1841   */
1842   if (ending_trans(thd, all))
1843   {
1844     if (has_incident())
1845     {
1846       const char* err_msg= "Error happend while resetting the transaction "
1847                            "cache for a rolled back transaction or a single "
1848                            "statement not inside a transaction.";
1849       error= mysql_bin_log.write_incident(thd, true/*need_lock_log=true*/,
1850                                           err_msg);
1851     }
1852     reset();
1853   }
1854   /*
1855     If rolling back a statement in a transaction, we truncate the
1856     transaction cache to remove the statement.
1857   */
1858   else if (get_prev_position() != MY_OFF_T_UNDEF)
1859     restore_prev_position();
1860 
1861   thd->clear_binlog_table_maps();
1862 
1863   DBUG_RETURN(error);
1864 }
1865 
1866 
get_xa_opt(THD * thd)1867 inline enum xa_option_words get_xa_opt(THD *thd)
1868 {
1869   enum xa_option_words xa_opt= XA_NONE;
1870   switch(thd->lex->sql_command)
1871   {
1872   case SQLCOM_XA_COMMIT:
1873     xa_opt= static_cast<Sql_cmd_xa_commit*>(thd->lex->m_sql_cmd)->get_xa_opt();
1874     break;
1875   default:
1876     break;
1877   }
1878 
1879   return xa_opt;
1880 }
1881 
1882 
1883 /**
1884    Predicate function yields true when XA transaction is
1885    being logged having a proper state ready for prepare or
1886    commit in one phase.
1887 
1888    @param thd    THD pointer of running transaction
1889    @return true  When the being prepared transaction should be binlogged,
1890            false otherwise.
1891 */
1892 
is_loggable_xa_prepare(THD * thd)1893 inline bool is_loggable_xa_prepare(THD *thd)
1894 {
1895   /*
1896     simulate_commit_failure is doing a trick with XID_STATE while
1897     the ongoing transaction is not XA, and therefore to be errored out,
1898     asserted below. In that case because of the
1899     latter fact the function returns @c false.
1900   */
1901   DBUG_EXECUTE_IF("simulate_commit_failure",
1902                   {
1903                     XID_STATE *xs= thd->get_transaction()->xid_state();
1904                     assert((thd->is_error() &&
1905                             xs->get_state() == XID_STATE::XA_IDLE) ||
1906                            xs->get_state() == XID_STATE::XA_NOTR);
1907                   });
1908 
1909   return DBUG_EVALUATE_IF("simulate_commit_failure",
1910                           false,
1911                           thd->get_transaction()->xid_state()->
1912                           has_state(XID_STATE::XA_IDLE));
1913 }
1914 
binlog_prepare(handlerton * hton,THD * thd,bool all)1915 static int binlog_prepare(handlerton *hton, THD *thd, bool all)
1916 {
1917   DBUG_ENTER("binlog_prepare");
1918   if (!all)
1919   {
1920     thd->get_transaction()->store_commit_parent(mysql_bin_log.
1921       m_dependency_tracker.get_max_committed_timestamp());
1922 
1923   }
1924 
1925   DBUG_RETURN(all && is_loggable_xa_prepare(thd) ?
1926               mysql_bin_log.commit(thd, true) : 0);
1927 }
1928 
1929 
1930 /**
1931    Logging XA commit/rollback of a prepared transaction.
1932 
1933    The function is called at XA-commit or XA-rollback logging via
1934    two paths: the recovered-or-slave-applier or immediately through
1935    the  XA-prepared transaction connection itself.
1936    It fills in appropiate event in the statement cache whenever
1937    xid state is marked with is_binlogged() flag that indicates
1938    the prepared part of the transaction must've been logged.
1939 
1940    About early returns from the function.
1941    In the recovered-or-slave-applier case the function may be called
1942    for the 2nd time, which has_logged_xid monitors.
1943    ONE_PHASE option to XA-COMMIT is handled to skip
1944    writing XA-commit event now.
1945    And the final early return check is for the read-only XA that is
1946    not to be logged.
1947 
1948    @param thd          THD handle
1949    @param xid          a pointer to XID object that is serialized
1950    @param commit       when @c true XA-COMMIT is to be logged,
1951                        and @c false when it's XA-ROLLBACK.
1952    @return error code, 0 success
1953 */
1954 
do_binlog_xa_commit_rollback(THD * thd,XID * xid,bool commit)1955 inline int do_binlog_xa_commit_rollback(THD *thd, XID *xid, bool commit)
1956 {
1957   assert(thd->lex->sql_command == SQLCOM_XA_COMMIT ||
1958          thd->lex->sql_command == SQLCOM_XA_ROLLBACK);
1959 
1960   XID_STATE *xid_state= thd->get_transaction()->xid_state();
1961   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
1962 
1963   if (cache_mngr != NULL && cache_mngr->has_logged_xid)
1964     return 0;
1965 
1966   if (get_xa_opt(thd) == XA_ONE_PHASE)
1967     return 0;
1968   if (!xid_state->is_binlogged())
1969     return 0; // nothing was really logged at prepare
1970   if (thd->is_error() && DBUG_EVALUATE_IF("simulate_xa_rm_error", 0, 1))
1971     return 0; // don't binlog if there are some errors.
1972 
1973   assert(!xid->is_null() ||
1974          !(thd->variables.option_bits & OPTION_BIN_LOG));
1975 
1976   char buf[XID::ser_buf_size];
1977   char query[(sizeof("XA ROLLBACK")) + 1 + sizeof(buf)];
1978   int qlen= sprintf(query, "XA %s %s", commit ? "COMMIT" : "ROLLBACK",
1979                     xid->serialize(buf));
1980   Query_log_event qinfo(thd, query, qlen, false, true, true, 0, false);
1981   return mysql_bin_log.write_event(&qinfo);
1982 }
1983 
binlog_start_consistent_snapshot(handlerton * hton,THD * thd)1984 static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd)
1985 {
1986   int err= 0;
1987   LOG_INFO li;
1988   DBUG_ENTER("binlog_start_consistent_snapshot");
1989 
1990   if ((err= thd->binlog_setup_trx_data()))
1991     DBUG_RETURN(err);
1992 
1993   binlog_cache_mngr * const cache_mngr= thd_get_cache_mngr(thd);
1994 
1995   /* Server layer calls us with LOCK_log locked, so this is safe. */
1996   mysql_bin_log.raw_get_current_log(&cache_mngr->binlog_info);
1997   gtid_state->get_snapshot_gtid_executed(cache_mngr->snapshot_gtid_executed);
1998 
1999   trans_register_ha(thd, true, hton, NULL);
2000 
2001   DBUG_RETURN(err);
2002 }
2003 
binlog_clone_consistent_snapshot(handlerton * hton,THD * thd,THD * from_thd)2004 static int binlog_clone_consistent_snapshot(handlerton *hton, THD *thd,
2005                                             THD *from_thd)
2006 {
2007   binlog_cache_mngr *from_cache_mngr;
2008   binlog_cache_mngr *cache_mngr;
2009   int err= 0;
2010   char log_file_name[FN_REFLEN];
2011   my_off_t pos;
2012 
2013   DBUG_ENTER("binlog_start_consistent_snapshot");
2014 
2015   from_cache_mngr= opt_bin_log ?
2016     (binlog_cache_mngr *) thd_get_cache_mngr(from_thd) : NULL;
2017 
2018   if (from_cache_mngr == NULL)
2019   {
2020     push_warning_printf(thd, Sql_condition::SL_WARNING,
2021                         HA_ERR_UNSUPPORTED,
2022                         "WITH CONSISTENT SNAPSHOT FROM SESSION was ignored for "
2023                         "binary log, because the specified session does not "
2024                         "have a consistent snapshot of binary log "
2025                         "coordinates.");
2026     DBUG_RETURN(0);
2027   }
2028 
2029   if ((err= thd->binlog_setup_trx_data()))
2030     DBUG_RETURN(err);
2031 
2032   cache_mngr= thd_get_cache_mngr(thd);
2033 
2034   pos= from_cache_mngr->binlog_info.pos;
2035   strmake(log_file_name, from_cache_mngr->binlog_info.log_file_name,
2036           sizeof(log_file_name) - 1);
2037 
2038   mysql_mutex_lock(&thd->LOCK_thd_data);
2039 
2040   cache_mngr->snapshot_gtid_executed= from_cache_mngr->snapshot_gtid_executed;
2041   cache_mngr->binlog_info.pos = pos;
2042   strmake(cache_mngr->binlog_info.log_file_name, log_file_name,
2043           sizeof(cache_mngr->binlog_info.log_file_name) - 1);
2044 
2045   mysql_mutex_unlock(&thd->LOCK_thd_data);
2046 
2047   trans_register_ha(thd, true, hton, NULL);
2048 
2049   DBUG_RETURN(err);
2050 }
2051 
2052 
2053 /**
2054    Logging XA commit/rollback of a prepared transaction in the case
2055    it was disconnected and resumed (recovered), or executed by a slave applier.
2056 
2057    @param thd         THD handle
2058    @param xid         a pointer to XID object
2059    @param commit      when @c true XA-COMMIT is logged, otherwise XA-ROLLBACK
2060 
2061    @return error code, 0 success
2062 */
2063 
binlog_xa_commit_or_rollback(THD * thd,XID * xid,bool commit)2064 inline int binlog_xa_commit_or_rollback(THD *thd, XID *xid, bool commit)
2065 {
2066   int error= 0;
2067 
2068 #ifndef NDEBUG
2069   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
2070   assert(!cache_mngr || !cache_mngr->has_logged_xid);
2071 #endif
2072   if (!(error= do_binlog_xa_commit_rollback(thd, xid, commit)))
2073   {
2074     /*
2075       Error can't be propagated naturally via result.
2076       A grand-caller has to access to it through thd's da.
2077       todo:
2078       Bug #20488921 ERROR PROPAGATION DOES FULLY WORK IN XA
2079       stands in the way of implementing a failure simulation
2080       for XA PREPARE/COMMIT/ROLLBACK.
2081     */
2082     binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
2083 
2084     if (cache_mngr)
2085       cache_mngr->has_logged_xid= true;
2086     if (commit)
2087       (void) mysql_bin_log.commit(thd, true);
2088     else
2089       (void) mysql_bin_log.rollback(thd, true);
2090     if (cache_mngr)
2091       cache_mngr->has_logged_xid= false;
2092   }
2093   return error;
2094 }
2095 
2096 
binlog_xa_commit(handlerton * hton,XID * xid)2097 static int binlog_xa_commit(handlerton *hton,  XID *xid)
2098 {
2099   (void) binlog_xa_commit_or_rollback(current_thd, xid, true);
2100 
2101   return 0;
2102 }
2103 
2104 
binlog_xa_rollback(handlerton * hton,XID * xid)2105 static int binlog_xa_rollback(handlerton *hton,  XID *xid)
2106 {
2107   (void) binlog_xa_commit_or_rollback(current_thd, xid, false);
2108 
2109   return 0;
2110 }
2111 
2112 /**
2113   When a fatal error occurs due to which binary logging becomes impossible and
2114   the user specified binlog_error_action= ABORT_SERVER the following function is
2115   invoked. This function pushes the appropriate error message to client and logs
2116   the same to server error log and then aborts the server.
2117 
2118   @param err_string          Error string which specifies the exact error
2119                              message from the caller.
2120 
2121   @retval
2122     none
2123 */
exec_binlog_error_action_abort(const char * err_string)2124 static void exec_binlog_error_action_abort(const char* err_string)
2125 {
2126   THD *thd= current_thd;
2127   /*
2128     When the code enters here it means that there was an error at higher layer
2129     and my_error function could have been invoked to let the client know what
2130     went wrong during the execution.
2131 
2132     But these errors will not let the client know that the server is going to
2133     abort. Even if we add an additional my_error function call at this point
2134     client will be able to see only the first error message that was set
2135     during the very first invocation of my_error function call.
2136 
2137     The advantage of having multiple my_error function calls are visible when
2138     the server is up and running and user issues SHOW WARNINGS or SHOW ERROR
2139     calls. In this special scenario server will be immediately aborted and
2140     user will not be able execute the above SHOW commands.
2141 
2142     Hence we clear the previous errors and push one critical error message to
2143     clients.
2144    */
2145   if (thd)
2146   {
2147     if (thd->is_error())
2148       thd->clear_error();
2149     /*
2150       Adding ME_ERRORLOG flag will ensure that the error is sent to both
2151       client and to the server error log as well.
2152     */
2153     my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(ME_ERRORLOG + ME_FATALERROR),
2154              err_string);
2155     thd->send_statement_status();
2156   }
2157   else
2158     sql_print_error("%s",err_string);
2159   abort();
2160 }
2161 
2162 
2163 
2164 /**
2165   This function is called once after each statement.
2166 
2167   @todo This function is currently not used any more and will
2168   eventually be eliminated. The real commit job is done in the
2169   MYSQL_BIN_LOG::commit function.
2170 
2171   @see MYSQL_BIN_LOG::commit
2172 
2173   @param hton  The binlog handlerton.
2174   @param thd   The client thread that executes the transaction.
2175   @param all   This is @c true if this is a real transaction commit, and
2176                @false otherwise.
2177 
2178   @see handlerton::commit
2179 */
binlog_commit(handlerton * hton,THD * thd,bool all)2180 static int binlog_commit(handlerton *hton, THD *thd, bool all)
2181 {
2182   DBUG_ENTER("binlog_commit");
2183   /*
2184     Nothing to do (any more) on commit.
2185    */
2186   DBUG_RETURN(0);
2187 }
2188 
2189 /**
2190   This function is called when a transaction or a statement is rolled back.
2191 
2192   @internal It is necessary to execute a rollback here if the
2193   transaction was rolled back because of executing a ROLLBACK TO
2194   SAVEPOINT command, but it is not used for normal rollback since
2195   MYSQL_BIN_LOG::rollback is called in that case.
2196 
2197   @todo Refactor code to introduce a <code>MYSQL_BIN_LOG::rollback(THD
2198   *thd, SAVEPOINT *sv)</code> function in @c TC_LOG and have that
2199   function execute the necessary work to rollback to a savepoint.
2200 
2201   @param hton  The binlog handlerton.
2202   @param thd   The client thread that executes the transaction.
2203   @param all   This is @c true if this is a real transaction rollback, and
2204                @false otherwise.
2205 
2206   @see handlerton::rollback
2207 */
binlog_rollback(handlerton * hton,THD * thd,bool all)2208 static int binlog_rollback(handlerton *hton, THD *thd, bool all)
2209 {
2210   DBUG_ENTER("binlog_rollback");
2211   int error= 0;
2212   if (thd->lex->sql_command == SQLCOM_ROLLBACK_TO_SAVEPOINT)
2213     error= mysql_bin_log.rollback(thd, all);
2214   DBUG_RETURN(error);
2215 }
2216 
2217 
2218 bool
append(THD * first)2219 Stage_manager::Mutex_queue::append(THD *first)
2220 {
2221   DBUG_ENTER("Stage_manager::Mutex_queue::append");
2222   lock();
2223   DBUG_PRINT("enter", ("first: 0x%llx", (ulonglong) first));
2224   DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2225                        (ulonglong) m_first, (ulonglong) &m_first,
2226                        (ulonglong) m_last));
2227   int32 count= 1;
2228   bool empty= (m_first == NULL);
2229   *m_last= first;
2230   DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2231                        (ulonglong) m_first, (ulonglong) &m_first,
2232                        (ulonglong) m_last));
2233   /*
2234     Go to the last THD instance of the list. We expect lists to be
2235     moderately short. If they are not, we need to track the end of
2236     the queue as well.
2237   */
2238 
2239   while (first->next_to_commit)
2240   {
2241     count++;
2242     first= first->next_to_commit;
2243   }
2244   my_atomic_add32(&m_size, count);
2245 
2246   m_last= &first->next_to_commit;
2247   DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2248                         (ulonglong) m_first, (ulonglong) &m_first,
2249                         (ulonglong) m_last));
2250   assert(m_first || m_last == &m_first);
2251   DBUG_PRINT("return", ("empty: %s", YESNO(empty)));
2252   unlock();
2253   DBUG_RETURN(empty);
2254 }
2255 
2256 
2257 std::pair<bool, THD*>
pop_front()2258 Stage_manager::Mutex_queue::pop_front()
2259 {
2260   DBUG_ENTER("Stage_manager::Mutex_queue::pop_front");
2261   lock();
2262   THD *result= m_first;
2263   bool more= true;
2264   /*
2265     We do not set next_to_commit to NULL here since this is only used
2266     in the flush stage. We will have to call fetch_queue last here,
2267     and will then "cut" the linked list by setting the end of that
2268     queue to NULL.
2269   */
2270   if (result)
2271     m_first= result->next_to_commit;
2272   if (m_first == NULL)
2273   {
2274     more= false;
2275     m_last = &m_first;
2276   }
2277   assert(my_atomic_load32(&m_size) > 0);
2278   my_atomic_add32(&m_size, -1);
2279   assert(m_first || m_last == &m_first);
2280   unlock();
2281   DBUG_PRINT("return", ("result: 0x%llx, more: %s",
2282                         (ulonglong) result, YESNO(more)));
2283   DBUG_RETURN(std::make_pair(more, result));
2284 }
2285 
2286 
2287 bool
enroll_for(StageID stage,THD * thd,mysql_mutex_t * stage_mutex)2288 Stage_manager::enroll_for(StageID stage, THD *thd, mysql_mutex_t *stage_mutex)
2289 {
2290   // If the queue was empty: we're the leader for this batch
2291   DBUG_PRINT("debug", ("Enqueue 0x%llx to queue for stage %d",
2292                        (ulonglong) thd, stage));
2293   bool leader= m_queue[stage].append(thd);
2294 
2295 #ifdef HAVE_REPLICATION
2296   if (stage == FLUSH_STAGE && has_commit_order_manager(thd))
2297   {
2298     Slave_worker *worker= dynamic_cast<Slave_worker *>(thd->rli_slave);
2299     Commit_order_manager *mngr= worker->get_commit_order_manager();
2300 
2301     mngr->unregister_trx(worker);
2302   }
2303 #endif
2304 
2305   /*
2306     We do not need to unlock the stage_mutex if it is LOCK_log when rotating
2307     binlog caused by logging incident log event, since it should be held
2308     always during rotation.
2309   */
2310   bool need_unlock_stage_mutex=
2311     !(mysql_bin_log.is_rotating_caused_by_incident &&
2312       stage_mutex == mysql_bin_log.get_log_lock());
2313 
2314   /*
2315     The stage mutex can be NULL if we are enrolling for the first
2316     stage.
2317   */
2318   if (stage_mutex && need_unlock_stage_mutex)
2319     mysql_mutex_unlock(stage_mutex);
2320 
2321 #ifndef NDEBUG
2322   DBUG_PRINT("info", ("This is a leader thread: %d (0=n 1=y)", leader));
2323 
2324   DEBUG_SYNC(thd, "after_enrolling_for_stage");
2325 
2326   switch (stage)
2327   {
2328   case Stage_manager::FLUSH_STAGE:
2329     DEBUG_SYNC(thd, "bgc_after_enrolling_for_flush_stage");
2330     break;
2331   case Stage_manager::SYNC_STAGE:
2332     DEBUG_SYNC(thd, "bgc_after_enrolling_for_sync_stage");
2333     break;
2334   case Stage_manager::COMMIT_STAGE:
2335     DEBUG_SYNC(thd, "bgc_after_enrolling_for_commit_stage");
2336     break;
2337   default:
2338     // not reached
2339     assert(0);
2340   }
2341 
2342   DBUG_EXECUTE_IF("assert_leader", assert(leader););
2343   DBUG_EXECUTE_IF("assert_follower", assert(!leader););
2344 #endif
2345 
2346   /*
2347     If the queue was not empty, we're a follower and wait for the
2348     leader to process the queue. If we were holding a mutex, we have
2349     to release it before going to sleep.
2350   */
2351   if (!leader)
2352   {
2353     mysql_mutex_lock(&m_lock_done);
2354 #ifndef NDEBUG
2355     /*
2356       Leader can be awaiting all-clear to preempt follower's execution.
2357       With setting the status the follower ensures it won't execute anything
2358       including thread-specific code.
2359     */
2360     thd->get_transaction()->m_flags.ready_preempt= 1;
2361     if (leader_await_preempt_status)
2362       mysql_cond_signal(&m_cond_preempt);
2363 #endif
2364     while (thd->get_transaction()->m_flags.pending)
2365       mysql_cond_wait(&m_cond_done, &m_lock_done);
2366     mysql_mutex_unlock(&m_lock_done);
2367   }
2368   return leader;
2369 }
2370 
2371 
fetch_and_empty()2372 THD *Stage_manager::Mutex_queue::fetch_and_empty()
2373 {
2374   DBUG_ENTER("Stage_manager::Mutex_queue::fetch_and_empty");
2375   lock();
2376   DBUG_PRINT("enter", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2377                        (ulonglong) m_first, (ulonglong) &m_first,
2378                        (ulonglong) m_last));
2379   THD *result= m_first;
2380   m_first= NULL;
2381   m_last= &m_first;
2382   DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2383                        (ulonglong) m_first, (ulonglong) &m_first,
2384                        (ulonglong) m_last));
2385   DBUG_PRINT("info", ("fetched queue of %d transactions", my_atomic_load32(&m_size)));
2386   DBUG_PRINT("return", ("result: 0x%llx", (ulonglong) result));
2387   assert(my_atomic_load32(&m_size) >= 0);
2388   my_atomic_store32(&m_size, 0);
2389   unlock();
2390   DBUG_RETURN(result);
2391 }
2392 
wait_count_or_timeout(ulong count,long usec,StageID stage)2393 void Stage_manager::wait_count_or_timeout(ulong count, long usec, StageID stage)
2394 {
2395   long to_wait=
2396     DBUG_EVALUATE_IF("bgc_set_infinite_delay", LONG_MAX, usec);
2397   /*
2398     For testing purposes while waiting for inifinity
2399     to arrive, we keep checking the queue size at regular,
2400     small intervals. Otherwise, waiting 0.1 * infinite
2401     is too long.
2402    */
2403   long delta=
2404     DBUG_EVALUATE_IF("bgc_set_infinite_delay", 100000,
2405                      max<long>(1, (to_wait * 0.1)));
2406 
2407   while (to_wait > 0 && (count == 0 || static_cast<ulong>(m_queue[stage].get_size()) < count))
2408   {
2409 #ifndef NDEBUG
2410     if (current_thd)
2411       DEBUG_SYNC(current_thd, "bgc_wait_count_or_timeout");
2412 #endif
2413     my_sleep(delta);
2414     to_wait -= delta;
2415   }
2416 }
2417 
signal_done(THD * queue)2418 void Stage_manager::signal_done(THD *queue)
2419 {
2420   mysql_mutex_lock(&m_lock_done);
2421   for (THD *thd= queue ; thd ; thd = thd->next_to_commit)
2422     thd->get_transaction()->m_flags.pending= false;
2423   mysql_mutex_unlock(&m_lock_done);
2424   mysql_cond_broadcast(&m_cond_done);
2425 }
2426 
2427 #ifndef NDEBUG
clear_preempt_status(THD * head)2428 void Stage_manager::clear_preempt_status(THD *head)
2429 {
2430   assert(head);
2431 
2432   mysql_mutex_lock(&m_lock_done);
2433   while(!head->get_transaction()->m_flags.ready_preempt)
2434   {
2435     leader_await_preempt_status= true;
2436     mysql_cond_wait(&m_cond_preempt, &m_lock_done);
2437   }
2438   leader_await_preempt_status= false;
2439   mysql_mutex_unlock(&m_lock_done);
2440 }
2441 #endif
2442 
2443 /**
2444   Write a rollback record of the transaction to the binary log.
2445 
2446   For binary log group commit, the rollback is separated into three
2447   parts:
2448 
2449   1. First part consists of filling the necessary caches and
2450      finalizing them (if they need to be finalized). After a cache is
2451      finalized, nothing can be added to the cache.
2452 
2453   2. Second part execute an ordered flush and commit. This will be
2454      done using the group commit functionality in @c ordered_commit.
2455 
2456      Since we roll back the transaction early, we call @c
2457      ordered_commit with the @c skip_commit flag set. The @c
2458      ha_commit_low call inside @c ordered_commit will then not be
2459      called.
2460 
2461   3. Third part checks any errors resulting from the flush and handles
2462      them appropriately.
2463 
2464   @see MYSQL_BIN_LOG::ordered_commit
2465   @see ha_commit_low
2466   @see ha_rollback_low
2467 
2468   @param thd Session to commit
2469   @param all This is @c true if this is a real transaction rollback, and
2470              @false otherwise.
2471 
2472   @return Error code, or zero if there were no error.
2473  */
2474 
rollback(THD * thd,bool all)2475 int MYSQL_BIN_LOG::rollback(THD *thd, bool all)
2476 {
2477   int error= 0;
2478   bool stuff_logged= false;
2479   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
2480 
2481   DBUG_ENTER("MYSQL_BIN_LOG::rollback(THD *thd, bool all)");
2482   DBUG_PRINT("enter", ("all: %s, cache_mngr: 0x%llx, thd->is_error: %s",
2483                        YESNO(all), (ulonglong) cache_mngr,
2484                        YESNO(thd->is_error())));
2485   /*
2486     Defer XA-transaction rollback until its XA-rollback event is recorded.
2487     When we are executing a ROLLBACK TO SAVEPOINT, we
2488     should only clear the caches since this function is called as part
2489     of the engine rollback.
2490     In other cases we roll back the transaction in the engines early
2491     since this will release locks and allow other transactions to
2492     start executing.
2493   */
2494   if (thd->lex->sql_command == SQLCOM_XA_ROLLBACK)
2495   {
2496     XID_STATE *xs= thd->get_transaction()->xid_state();
2497 
2498     assert(all || !xs->is_binlogged() ||
2499            (!xs->is_in_recovery() && thd->is_error()));
2500     /*
2501       Whenever cache_mngr is not initialized, the xa prepared
2502       transaction's binary logging status must not be set, unless the
2503       transaction is rolled back through an external connection which
2504       has binlogging switched off.
2505     */
2506     assert(cache_mngr || !xs->is_binlogged()
2507            || !(is_open() && thd->variables.option_bits & OPTION_BIN_LOG));
2508 
2509     if ((error= do_binlog_xa_commit_rollback(thd, xs->get_xid(), false)))
2510       goto end;
2511     cache_mngr= thd_get_cache_mngr(thd);
2512   }
2513   else if (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT)
2514   {
2515     /*
2516       Reset binlog_snapshot_% variables for the current connection so that the
2517       current coordinates are shown after committing a consistent snapshot
2518       transaction.
2519     */
2520     if (cache_mngr != NULL)
2521     {
2522       mysql_mutex_lock(&thd->LOCK_thd_data);
2523       cache_mngr->drop_consistent_snapshot();
2524       mysql_mutex_unlock(&thd->LOCK_thd_data);
2525     }
2526 
2527     if ((error= ha_rollback_low(thd, all)))
2528       goto end;
2529   }
2530 
2531   /*
2532     If there is no cache manager, or if there is nothing in the
2533     caches, there are no caches to roll back, so we're trivially done
2534     unless XA-ROLLBACK that yet to run rollback_low().
2535   */
2536   if (cache_mngr == NULL || cache_mngr->is_binlog_empty())
2537   {
2538     goto end;
2539   }
2540 
2541   DBUG_PRINT("debug",
2542              ("all.cannot_safely_rollback(): %s, trx_cache_empty: %s",
2543               YESNO(thd->get_transaction()->cannot_safely_rollback(
2544                   Transaction_ctx::SESSION)),
2545               YESNO(cache_mngr->trx_cache.is_binlog_empty())));
2546   DBUG_PRINT("debug",
2547              ("stmt.cannot_safely_rollback(): %s, stmt_cache_empty: %s",
2548               YESNO(thd->get_transaction()->cannot_safely_rollback(
2549                   Transaction_ctx::STMT)),
2550               YESNO(cache_mngr->stmt_cache.is_binlog_empty())));
2551 
2552   /*
2553     If an incident event is set we do not flush the content of the statement
2554     cache because it may be corrupted.
2555   */
2556   if (cache_mngr->stmt_cache.has_incident())
2557   {
2558     const char* err_msg= "The content of the statement cache is corrupted "
2559                          "while writing a rollback record of the transaction "
2560                          "to the binary log.";
2561     error= write_incident(thd, true/*need_lock_log=true*/, err_msg);
2562     cache_mngr->stmt_cache.reset();
2563   }
2564   else if (!cache_mngr->stmt_cache.is_binlog_empty())
2565   {
2566     if (thd->lex->sql_command == SQLCOM_CREATE_TABLE &&
2567         thd->lex->select_lex->item_list.elements && /* With select */
2568         !(thd->lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) &&
2569         thd->is_current_stmt_binlog_format_row())
2570     {
2571       /*
2572         In row based binlog format, we reset the binlog statement cache
2573         when rolling back a single statement 'CREATE...SELECT' transaction,
2574         since the 'CREATE TABLE' event was put in the binlog statement cache.
2575       */
2576       cache_mngr->stmt_cache.reset();
2577     }
2578     else
2579     {
2580       if ((error= cache_mngr->stmt_cache.finalize(thd)))
2581         goto end;
2582       stuff_logged= true;
2583     }
2584   }
2585 
2586   if (ending_trans(thd, all))
2587   {
2588     if (trans_cannot_safely_rollback(thd))
2589     {
2590       const char xa_rollback_str[]= "XA ROLLBACK";
2591       /*
2592         sizeof(xa_rollback_str) and XID::ser_buf_size both allocate `\0',
2593         so one of the two is used for necessary in the xa case `space' char
2594       */
2595       char query[sizeof(xa_rollback_str) + XID::ser_buf_size]= "ROLLBACK";
2596       XID_STATE *xs= thd->get_transaction()->xid_state();
2597 
2598       if (thd->lex->sql_command == SQLCOM_XA_ROLLBACK)
2599       {
2600         /* this block is relevant only for not prepared yet and "local" xa trx */
2601         assert(thd->get_transaction()->xid_state()->
2602                has_state(XID_STATE::XA_IDLE));
2603         assert(!cache_mngr->has_logged_xid);
2604 
2605         sprintf(query, "%s ", xa_rollback_str);
2606         xs->get_xid()->serialize(query + sizeof(xa_rollback_str));
2607       }
2608       /*
2609         If the transaction is being rolled back and contains changes that
2610         cannot be rolled back, the trx-cache's content is flushed.
2611       */
2612       Query_log_event
2613         end_evt(thd, query, strlen(query), true, false, true, 0, true);
2614       error= thd->lex->sql_command != SQLCOM_XA_ROLLBACK ?
2615         cache_mngr->trx_cache.finalize(thd, &end_evt) :
2616         cache_mngr->trx_cache.finalize(thd, &end_evt, xs);
2617       stuff_logged= true;
2618     }
2619     else
2620     {
2621       /*
2622         If the transaction is being rolled back and its changes can be
2623         rolled back, the trx-cache's content is truncated.
2624       */
2625       error= cache_mngr->trx_cache.truncate(thd, all);
2626     }
2627   }
2628   else
2629   {
2630     /*
2631       If a statement is being rolled back, it is necessary to know
2632       exactly why a statement may not be safely rolled back as in
2633       some specific situations the trx-cache can be truncated.
2634 
2635       If a temporary table is created or dropped, the trx-cache is not
2636       truncated. Note that if the stmt-cache is used, there is nothing
2637       to truncate in the trx-cache.
2638 
2639       If a non-transactional table is updated and the binlog format is
2640       statement, the trx-cache is not truncated. The trx-cache is used
2641       when the direct option is off and a transactional table has been
2642       updated before the current statement in the context of the
2643       current transaction. Note that if the stmt-cache is used there is
2644       nothing to truncate in the trx-cache.
2645 
2646       If other binlog formats are used, updates to non-transactional
2647       tables are written to the stmt-cache and trx-cache can be safely
2648       truncated, if necessary.
2649     */
2650     if (thd->get_transaction()->has_dropped_temp_table(
2651           Transaction_ctx::STMT) ||
2652         thd->get_transaction()->has_created_temp_table(
2653           Transaction_ctx::STMT) ||
2654         (thd->get_transaction()->has_modified_non_trans_table(
2655           Transaction_ctx::STMT) &&
2656         thd->variables.binlog_format == BINLOG_FORMAT_STMT))
2657     {
2658       /*
2659         If the statement is being rolled back and dropped or created a
2660         temporary table or modified a non-transactional table and the
2661         statement-based replication is in use, the statement's changes
2662         in the trx-cache are preserved.
2663       */
2664       cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
2665     }
2666     else
2667     {
2668       /*
2669         Otherwise, the statement's changes in the trx-cache are
2670         truncated.
2671       */
2672       error= cache_mngr->trx_cache.truncate(thd, all);
2673     }
2674   }
2675   if (stuff_logged)
2676   {
2677     Transaction_ctx *trn_ctx= thd->get_transaction();
2678     trn_ctx->store_commit_parent(m_dependency_tracker.get_max_committed_timestamp());
2679   }
2680 
2681   DBUG_PRINT("debug", ("error: %d", error));
2682   if (error == 0 && stuff_logged)
2683   {
2684     if (RUN_HOOK(transaction,
2685                  before_commit,
2686                  (thd, all,
2687                   thd_get_cache_mngr(thd)->get_binlog_cache_log(true),
2688                   thd_get_cache_mngr(thd)->get_binlog_cache_log(false),
2689                   max<my_off_t>(max_binlog_cache_size,
2690                                 max_binlog_stmt_cache_size))))
2691     {
2692       //Reset the thread OK status before changing the outcome.
2693       if (thd->get_stmt_da()->is_ok())
2694         thd->get_stmt_da()->reset_diagnostics_area();
2695       my_error(ER_RUN_HOOK_ERROR, MYF(0), "before_commit");
2696       DBUG_RETURN(RESULT_ABORTED);
2697     }
2698 #ifndef NDEBUG
2699     /*
2700       XA rollback is always accepted.
2701     */
2702     if (thd->get_transaction()->get_rpl_transaction_ctx()->is_transaction_rollback())
2703       assert(0);
2704 #endif
2705 
2706     error= prepare_ordered_commit(thd, all, /* skip_commit */ true);
2707     if (!error)
2708       error= ordered_commit(thd);
2709   }
2710 
2711   if (check_write_error(thd))
2712   {
2713     /*
2714       We reach this point if the effect of a statement did not properly get into
2715       a cache and need to be rolled back.
2716     */
2717     error|= cache_mngr->trx_cache.truncate(thd, all);
2718   }
2719 
2720 end:
2721   /* Deferred xa rollback to engines */
2722   if (!error && thd->lex->sql_command == SQLCOM_XA_ROLLBACK)
2723   {
2724     error= ha_rollback_low(thd, all);
2725     /* Successful XA-rollback commits the new gtid_state */
2726     if (!error && !thd->is_error())
2727       gtid_state->update_on_commit(thd);
2728   }
2729   /*
2730     When a statement errors out on auto-commit mode it is rollback
2731     implicitly, so the same should happen to its GTID.
2732   */
2733   if (!thd->in_active_multi_stmt_transaction())
2734     gtid_state->update_on_rollback(thd);
2735 
2736   /*
2737     TODO: some errors are overwritten, which may cause problem,
2738     fix it later.
2739   */
2740   DBUG_PRINT("return", ("error: %d", error));
2741   DBUG_RETURN(error);
2742 }
2743 
2744 /**
2745   @note
2746   How do we handle this (unlikely but legal) case:
2747   @verbatim
2748     [transaction] + [update to non-trans table] + [rollback to savepoint] ?
2749   @endverbatim
2750   The problem occurs when a savepoint is before the update to the
2751   non-transactional table. Then when there's a rollback to the savepoint, if we
2752   simply truncate the binlog cache, we lose the part of the binlog cache where
2753   the update is. If we want to not lose it, we need to write the SAVEPOINT
2754   command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
2755   is easy: it's just write at the end of the binlog cache, but the former
2756   should be *inserted* to the place where the user called SAVEPOINT. The
2757   solution is that when the user calls SAVEPOINT, we write it to the binlog
2758   cache (so no need to later insert it). As transactions are never intermixed
2759   in the binary log (i.e. they are serialized), we won't have conflicts with
2760   savepoint names when using mysqlbinlog or in the slave SQL thread.
2761   Then when ROLLBACK TO SAVEPOINT is called, if we updated some
2762   non-transactional table, we don't truncate the binlog cache but instead write
2763   ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
2764   will chop the SAVEPOINT command from the binlog cache, which is good as in
2765   that case there is no need to have it in the binlog).
2766 */
2767 
binlog_savepoint_set(handlerton * hton,THD * thd,void * sv)2768 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
2769 {
2770   DBUG_ENTER("binlog_savepoint_set");
2771   int error= 1;
2772 
2773   String log_query;
2774   if (log_query.append(STRING_WITH_LEN("SAVEPOINT ")))
2775     DBUG_RETURN(error);
2776   else
2777     append_identifier(thd, &log_query, thd->lex->ident.str,
2778                       thd->lex->ident.length);
2779 
2780   int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
2781   Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
2782                         TRUE, FALSE, TRUE, errcode);
2783   /*
2784     We cannot record the position before writing the statement
2785     because a rollback to a savepoint (.e.g. consider it "S") would
2786     prevent the savepoint statement (i.e. "SAVEPOINT S") from being
2787     written to the binary log despite the fact that the server could
2788     still issue other rollback statements to the same savepoint (i.e.
2789     "S").
2790     Given that the savepoint is valid until the server releases it,
2791     ie, until the transaction commits or it is released explicitly,
2792     we need to log it anyway so that we don't have "ROLLBACK TO S"
2793     or "RELEASE S" without the preceding "SAVEPOINT S" in the binary
2794     log.
2795   */
2796   if (!(error= mysql_bin_log.write_event(&qinfo)))
2797     binlog_trans_log_savepos(thd, (my_off_t*) sv);
2798 
2799   DBUG_RETURN(error);
2800 }
2801 
binlog_savepoint_rollback(handlerton * hton,THD * thd,void * sv)2802 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
2803 {
2804   DBUG_ENTER("binlog_savepoint_rollback");
2805   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
2806   my_off_t pos= *(my_off_t*) sv;
2807   assert(pos != ~(my_off_t) 0);
2808 
2809   /*
2810     Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
2811     non-transactional table. Otherwise, truncate the binlog cache starting
2812     from the SAVEPOINT command.
2813   */
2814   if (trans_cannot_safely_rollback(thd))
2815   {
2816     String log_query;
2817     if (log_query.append(STRING_WITH_LEN("ROLLBACK TO ")))
2818       DBUG_RETURN(1);
2819     else
2820     {
2821       /*
2822         Before writing identifier to the binlog, make sure to
2823         quote the identifier properly so as to prevent any SQL
2824         injection on the slave.
2825       */
2826       append_identifier(thd, &log_query, thd->lex->ident.str,
2827                         thd->lex->ident.length);
2828     }
2829 
2830     int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
2831     Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
2832                           TRUE, FALSE, TRUE, errcode);
2833     DBUG_RETURN(mysql_bin_log.write_event(&qinfo));
2834   }
2835   // Otherwise, we truncate the cache
2836   cache_mngr->trx_cache.restore_savepoint(pos);
2837   /*
2838     When a SAVEPOINT is executed inside a stored function/trigger we force the
2839     pending event to be flushed with a STMT_END_F flag and clear the table maps
2840     as well to ensure that following DMLs will have a clean state to start
2841     with. ROLLBACK inside a stored routine has to finalize possibly existing
2842     current row-based pending event with cleaning up table maps. That ensures
2843     that following DMLs will have a clean state to start with.
2844    */
2845   if (thd->in_sub_stmt)
2846     thd->clear_binlog_table_maps();
2847   DBUG_RETURN(0);
2848 }
2849 
2850 /**
2851    purge logs, master and slave sides both, related error code
2852    convertor.
2853    Called from @c purge_error_message(), @c MYSQL_BIN_LOG::reset_logs()
2854 
2855    @param  res  an error code as used by purging routines
2856 
2857    @return the user level error code ER_*
2858 */
purge_log_get_error_code(int res)2859 static uint purge_log_get_error_code(int res)
2860 {
2861   uint errcode= 0;
2862 
2863   switch (res)  {
2864   case 0: break;
2865   case LOG_INFO_EOF:	errcode= ER_UNKNOWN_TARGET_BINLOG; break;
2866   case LOG_INFO_IO:	errcode= ER_IO_ERR_LOG_INDEX_READ; break;
2867   case LOG_INFO_INVALID:errcode= ER_BINLOG_PURGE_PROHIBITED; break;
2868   case LOG_INFO_SEEK:	errcode= ER_FSEEK_FAIL; break;
2869   case LOG_INFO_MEM:	errcode= ER_OUT_OF_RESOURCES; break;
2870   case LOG_INFO_FATAL:	errcode= ER_BINLOG_PURGE_FATAL_ERR; break;
2871   case LOG_INFO_IN_USE: errcode= ER_LOG_IN_USE; break;
2872   case LOG_INFO_EMFILE: errcode= ER_BINLOG_PURGE_EMFILE; break;
2873   default:		errcode= ER_LOG_PURGE_UNKNOWN_ERR; break;
2874   }
2875 
2876   return errcode;
2877 }
2878 
2879 /**
2880   Check whether binlog state allows to safely release MDL locks after
2881   rollback to savepoint.
2882 
2883   @param hton  The binlog handlerton.
2884   @param thd   The client thread that executes the transaction.
2885 
2886   @return true  - It is safe to release MDL locks.
2887           false - If it is not.
2888 */
binlog_savepoint_rollback_can_release_mdl(handlerton * hton,THD * thd)2889 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
2890                                                       THD *thd)
2891 {
2892   DBUG_ENTER("binlog_savepoint_rollback_can_release_mdl");
2893   /**
2894     If we have not updated any non-transactional tables rollback
2895     to savepoint will simply truncate binlog cache starting from
2896     SAVEPOINT command. So it should be safe to release MDL acquired
2897     after SAVEPOINT command in this case.
2898   */
2899   DBUG_RETURN(!trans_cannot_safely_rollback(thd));
2900 }
2901 
2902 #ifdef HAVE_REPLICATION
2903 /**
2904   Adjust log offset in the binary log file for all running slaves
2905   This class implements call back function for do_for_all_thd().
2906   It is called for each thd in thd list to adjust offset.
2907 */
2908 class Adjust_offset : public Do_THD_Impl
2909 {
2910 public:
Adjust_offset(my_off_t value)2911   Adjust_offset(my_off_t value) : m_purge_offset(value) {}
operator ()(THD * thd)2912   virtual void operator()(THD *thd)
2913   {
2914     LOG_INFO* linfo;
2915     mysql_mutex_lock(&thd->LOCK_thd_data);
2916     if ((linfo= thd->current_linfo))
2917     {
2918       /*
2919         Index file offset can be less that purge offset only if
2920         we just started reading the index file. In that case
2921         we have nothing to adjust.
2922       */
2923       if (linfo->index_file_offset < m_purge_offset)
2924         linfo->fatal = (linfo->index_file_offset != 0);
2925       else
2926         linfo->index_file_offset -= m_purge_offset;
2927     }
2928     mysql_mutex_unlock(&thd->LOCK_thd_data);
2929   }
2930 private:
2931   my_off_t m_purge_offset;
2932 };
2933 
2934 /*
2935   Adjust the position pointer in the binary log file for all running slaves.
2936 
2937   SYNOPSIS
2938     adjust_linfo_offsets()
2939     purge_offset	Number of bytes removed from start of log index file
2940 
2941   NOTES
2942     - This is called when doing a PURGE when we delete lines from the
2943       index log file.
2944 
2945   REQUIREMENTS
2946     - Before calling this function, we have to ensure that no threads are
2947       using any binary log file before purge_offset.
2948 
2949   TODO
2950     - Inform the slave threads that they should sync the position
2951       in the binary log file with flush_relay_log_info.
2952       Now they sync is done for next read.
2953 */
adjust_linfo_offsets(my_off_t purge_offset)2954 static void adjust_linfo_offsets(my_off_t purge_offset)
2955 {
2956   Adjust_offset adjust_offset(purge_offset);
2957   Global_THD_manager::get_instance()->do_for_all_thd(&adjust_offset);
2958 }
2959 
2960 /**
2961   This class implements Call back function for do_for_all_thd().
2962   It is called for each thd in thd list to count
2963   threads using bin log file
2964 */
2965 
2966 class Log_in_use : public Do_THD_Impl
2967 {
2968 public:
Log_in_use(const char * value)2969   Log_in_use(const char* value) : m_log_name(value), m_count(0)
2970   {
2971     m_log_name_len = strlen(m_log_name) + 1;
2972   }
operator ()(THD * thd)2973   virtual void operator()(THD *thd)
2974   {
2975     LOG_INFO* linfo;
2976     mysql_mutex_lock(&thd->LOCK_thd_data);
2977     if ((linfo = thd->current_linfo))
2978     {
2979       if(!strncmp(m_log_name, linfo->log_file_name, m_log_name_len))
2980       {
2981         sql_print_warning("file %s was not purged because it was being read "
2982                           "by thread number %u", m_log_name, thd->thread_id());
2983         m_count++;
2984       }
2985     }
2986     mysql_mutex_unlock(&thd->LOCK_thd_data);
2987   }
get_count()2988   int get_count() { return m_count; }
2989 private:
2990   const char* m_log_name;
2991   size_t m_log_name_len;
2992   int m_count;
2993 };
2994 
log_in_use(const char * log_name)2995 static int log_in_use(const char* log_name)
2996 {
2997   Log_in_use log_in_use(log_name);
2998 #ifndef NDEBUG
2999   if (current_thd)
3000     DEBUG_SYNC(current_thd,"purge_logs_after_lock_index_before_thread_count");
3001 #endif
3002   Global_THD_manager::get_instance()->do_for_all_thd(&log_in_use);
3003   return log_in_use.get_count();
3004 }
3005 
purge_error_message(THD * thd,int res)3006 static bool purge_error_message(THD* thd, int res)
3007 {
3008   uint errcode;
3009 
3010   if ((errcode= purge_log_get_error_code(res)) != 0)
3011   {
3012     my_message(errcode, ER(errcode), MYF(0));
3013     return TRUE;
3014   }
3015   my_ok(thd);
3016   return FALSE;
3017 }
3018 
3019 #endif /* HAVE_REPLICATION */
3020 
check_binlog_magic(IO_CACHE * log,const char ** errmsg)3021 int check_binlog_magic(IO_CACHE* log, const char** errmsg)
3022 {
3023   char magic[4];
3024   assert(my_b_tell(log) == 0);
3025 
3026   if (my_b_read(log, (uchar*) magic, sizeof(magic)))
3027   {
3028     *errmsg = "I/O error reading the header from the binary log";
3029     sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno(),
3030 		    log->error);
3031     return 1;
3032   }
3033   if (memcmp(magic, BINLOG_MAGIC, sizeof(magic)))
3034   {
3035     *errmsg = "Binlog has bad magic number;  It's not a binary log file that can be used by this version of MySQL";
3036     return 1;
3037   }
3038   return 0;
3039 }
3040 
3041 
open_binlog_file(IO_CACHE * log,const char * log_file_name,const char ** errmsg)3042 File open_binlog_file(IO_CACHE *log, const char *log_file_name, const char **errmsg)
3043 {
3044   File file;
3045   DBUG_ENTER("open_binlog_file");
3046 
3047   if ((file= mysql_file_open(key_file_binlog,
3048                              log_file_name, O_RDONLY | O_BINARY | O_SHARE,
3049                              MYF(MY_WME))) < 0)
3050   {
3051     sql_print_error("Failed to open log (file '%s', errno %d)",
3052                     log_file_name, my_errno());
3053     *errmsg = "Could not open log file";
3054     goto err;
3055   }
3056   if (init_io_cache_ext(log, file, IO_SIZE*2, READ_CACHE, 0, 0,
3057                         MYF(MY_WME|MY_DONT_CHECK_FILESIZE), key_file_binlog_cache))
3058   {
3059     sql_print_error("Failed to create a cache on log (file '%s')",
3060                     log_file_name);
3061     *errmsg = "Could not open log file";
3062     goto err;
3063   }
3064   if (check_binlog_magic(log,errmsg))
3065     goto err;
3066   DBUG_RETURN(file);
3067 
3068 err:
3069   if (file >= 0)
3070   {
3071     mysql_file_close(file, MYF(0));
3072     end_io_cache(log);
3073   }
3074   DBUG_RETURN(-1);
3075 }
3076 
is_transaction_empty(THD * thd)3077 bool is_transaction_empty(THD *thd)
3078 {
3079   DBUG_ENTER("is_transaction_empty");
3080   int rw_ha_count= check_trx_rw_engines(thd, Transaction_ctx::SESSION);
3081   rw_ha_count+= check_trx_rw_engines(thd, Transaction_ctx::STMT);
3082   DBUG_RETURN(rw_ha_count == 0);
3083 }
3084 
check_trx_rw_engines(THD * thd,Transaction_ctx::enum_trx_scope trx_scope)3085 int check_trx_rw_engines(THD *thd, Transaction_ctx::enum_trx_scope trx_scope)
3086 {
3087   DBUG_ENTER("check_trx_rw_engines");
3088 
3089   int rw_ha_count= 0;
3090   Ha_trx_info *ha_list=
3091       (Ha_trx_info *)thd->get_transaction()->ha_trx_info(trx_scope);
3092 
3093   for (Ha_trx_info *ha_info= ha_list; ha_info; ha_info= ha_info->next()) {
3094     if (ha_info->is_trx_read_write())
3095       ++rw_ha_count;
3096   }
3097   DBUG_RETURN(rw_ha_count);
3098 }
3099 
is_empty_transaction_in_binlog_cache(const THD * thd)3100 bool is_empty_transaction_in_binlog_cache(const THD* thd)
3101 {
3102   DBUG_ENTER("is_empty_transaction_in_binlog_cache");
3103 
3104   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
3105   if (cache_mngr != NULL && cache_mngr->has_empty_transaction())
3106   {
3107     DBUG_RETURN(true);
3108   }
3109 
3110   DBUG_RETURN(false);
3111 }
3112 
3113 
3114 /**
3115   This function checks if a transactional table was updated by the
3116   current transaction.
3117 
3118   @param thd The client thread that executed the current statement.
3119   @return
3120     @c true if a transactional table was updated, @c false otherwise.
3121 */
3122 bool
trans_has_updated_trans_table(const THD * thd)3123 trans_has_updated_trans_table(const THD* thd)
3124 {
3125   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
3126 
3127   return (cache_mngr ? !cache_mngr->trx_cache.is_binlog_empty() : 0);
3128 }
3129 
3130 /**
3131   This function checks if a transactional table was updated by the
3132   current statement.
3133 
3134   @param ha_list Registered storage engine handler list.
3135   @return
3136     @c true if a transactional table was updated, @c false otherwise.
3137 */
3138 bool
stmt_has_updated_trans_table(Ha_trx_info * ha_list)3139 stmt_has_updated_trans_table(Ha_trx_info* ha_list)
3140 {
3141   const Ha_trx_info *ha_info;
3142   for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
3143   {
3144     if (ha_info->is_trx_read_write() && ha_info->ht() != binlog_hton)
3145       return (TRUE);
3146   }
3147   return (FALSE);
3148 }
3149 
3150 bool
trans_has_noop_dml(Ha_trx_info * ha_list)3151 trans_has_noop_dml(Ha_trx_info* ha_list)
3152 {
3153   const Ha_trx_info *ha_info;
3154   for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
3155   {
3156     if (ha_info->is_trx_noop_read_write())
3157       return (TRUE);
3158   }
3159   return (FALSE);
3160 }
3161 
3162 /**
3163   This function checks if a transaction, either a multi-statement
3164   or a single statement transaction is about to commit or not.
3165 
3166   @param thd The client thread that executed the current statement.
3167   @param all Committing a transaction (i.e. TRUE) or a statement
3168              (i.e. FALSE).
3169   @return
3170     @c true if committing a transaction, otherwise @c false.
3171 */
ending_trans(THD * thd,const bool all)3172 bool ending_trans(THD* thd, const bool all)
3173 {
3174   return (all || ending_single_stmt_trans(thd, all));
3175 }
3176 
3177 /**
3178   This function checks if a single statement transaction is about
3179   to commit or not.
3180 
3181   @param thd The client thread that executed the current statement.
3182   @param all Committing a transaction (i.e. TRUE) or a statement
3183              (i.e. FALSE).
3184   @return
3185     @c true if committing a single statement transaction, otherwise
3186     @c false.
3187 */
ending_single_stmt_trans(THD * thd,const bool all)3188 bool ending_single_stmt_trans(THD* thd, const bool all)
3189 {
3190   return (!all && !thd->in_multi_stmt_transaction_mode());
3191 }
3192 
3193 /**
3194   This function checks if a transaction cannot be rolled back safely.
3195 
3196   @param thd The client thread that executed the current statement.
3197   @return
3198     @c true if cannot be safely rolled back, @c false otherwise.
3199 */
trans_cannot_safely_rollback(const THD * thd)3200 bool trans_cannot_safely_rollback(const THD* thd)
3201 {
3202   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
3203 
3204   return cache_mngr->trx_cache.cannot_rollback();
3205 }
3206 
3207 /**
3208   This function checks if current statement cannot be rollded back safely.
3209 
3210   @param thd The client thread that executed the current statement.
3211   @return
3212     @c true if cannot be safely rolled back, @c false otherwise.
3213 */
stmt_cannot_safely_rollback(const THD * thd)3214 bool stmt_cannot_safely_rollback(const THD* thd)
3215 {
3216   return thd->get_transaction()->cannot_safely_rollback(Transaction_ctx::STMT);
3217 }
3218 
3219 #ifndef EMBEDDED_LIBRARY
3220 /**
3221   Execute a PURGE BINARY LOGS TO <log> command.
3222 
3223   @param thd Pointer to THD object for the client thread executing the
3224   statement.
3225 
3226   @param to_log Name of the last log to purge.
3227 
3228   @retval FALSE success
3229   @retval TRUE failure
3230 */
purge_master_logs(THD * thd,const char * to_log)3231 bool purge_master_logs(THD* thd, const char* to_log)
3232 {
3233   char search_file_name[FN_REFLEN];
3234   if (!mysql_bin_log.is_open())
3235   {
3236     my_ok(thd);
3237     return FALSE;
3238   }
3239 
3240   mysql_bin_log.make_log_name(search_file_name, to_log);
3241   return purge_error_message(thd,
3242                              mysql_bin_log.purge_logs(search_file_name, false,
3243                                                       true/*need_lock_index=true*/,
3244                                                       true/*need_update_threads=true*/,
3245                                                       NULL, false));
3246 }
3247 
3248 
3249 /**
3250   Execute a PURGE BINARY LOGS BEFORE <date> command.
3251 
3252   @param thd Pointer to THD object for the client thread executing the
3253   statement.
3254 
3255   @param purge_time Date before which logs should be purged.
3256 
3257   @retval FALSE success
3258   @retval TRUE failure
3259 */
purge_master_logs_before_date(THD * thd,time_t purge_time)3260 bool purge_master_logs_before_date(THD* thd, time_t purge_time)
3261 {
3262   if (!mysql_bin_log.is_open())
3263   {
3264     my_ok(thd);
3265     return 0;
3266   }
3267   return purge_error_message(thd,
3268                              mysql_bin_log.purge_logs_before_date(purge_time,
3269                                                                   false));
3270 }
3271 #endif /* EMBEDDED_LIBRARY */
3272 
3273 /*
3274   Helper function to get the error code of the query to be binlogged.
3275  */
query_error_code(THD * thd,bool not_killed)3276 int query_error_code(THD *thd, bool not_killed)
3277 {
3278   int error;
3279 
3280   if (not_killed || (thd->killed == THD::KILL_BAD_DATA))
3281   {
3282     error= thd->is_error() ? thd->get_stmt_da()->mysql_errno() : 0;
3283 
3284     /* thd->get_stmt_da()->sql_errno() might be ER_SERVER_SHUTDOWN or
3285        ER_QUERY_INTERRUPTED, So here we need to make sure that error
3286        is not set to these errors when specified not_killed by the
3287        caller.
3288     */
3289     if (error == ER_SERVER_SHUTDOWN || error == ER_QUERY_INTERRUPTED)
3290       error= 0;
3291   }
3292   else
3293     error= thd->killed_errno();
3294 
3295   return error;
3296 }
3297 
3298 
3299 /**
3300   Copy content of 'from' file from offset to 'to' file.
3301 
3302   - We do the copy outside of the IO_CACHE as the cache
3303   buffers would just make things slower and more complicated.
3304   In most cases the copy loop should only do one read.
3305 
3306   @param from          File to copy.
3307   @param to            File to copy to.
3308   @param offset        Offset in 'from' file.
3309 
3310 
3311   @retval
3312     0    ok
3313   @retval
3314     -1    error
3315 */
copy_file(IO_CACHE * from,IO_CACHE * to,my_off_t offset)3316 static bool copy_file(IO_CACHE *from, IO_CACHE *to, my_off_t offset)
3317 {
3318   int bytes_read;
3319   uchar io_buf[IO_SIZE*2];
3320   DBUG_ENTER("copy_file");
3321 
3322   mysql_file_seek(from->file, offset, MY_SEEK_SET, MYF(0));
3323   while(TRUE)
3324   {
3325     if ((bytes_read= (int) mysql_file_read(from->file, io_buf, sizeof(io_buf),
3326                                            MYF(MY_WME)))
3327         < 0)
3328       goto err;
3329     if (DBUG_EVALUATE_IF("fault_injection_copy_part_file", 1, 0))
3330       bytes_read= bytes_read/2;
3331     if (!bytes_read)
3332       break;                                    // end of file
3333     if (mysql_file_write(to->file, io_buf, bytes_read, MYF(MY_WME | MY_NABP)))
3334       goto err;
3335   }
3336 
3337   DBUG_RETURN(0);
3338 
3339 err:
3340   DBUG_RETURN(1);
3341 }
3342 
3343 
3344 #ifdef HAVE_REPLICATION
3345 /**
3346    Load data's io cache specific hook to be executed
3347    before a chunk of data is being read into the cache's buffer
3348    The fuction instantianates and writes into the binlog
3349    replication events along LOAD DATA processing.
3350 
3351    @param file  pointer to io-cache
3352    @retval 0 success
3353    @retval 1 failure
3354 */
log_loaded_block(IO_CACHE * file)3355 int log_loaded_block(IO_CACHE* file)
3356 {
3357   DBUG_ENTER("log_loaded_block");
3358   LOAD_FILE_INFO *lf_info;
3359   uint block_len;
3360   /* buffer contains position where we started last read */
3361   uchar* buffer= (uchar*) my_b_get_buffer_start(file);
3362   uint max_event_size= current_thd->variables.max_allowed_packet;
3363   lf_info= (LOAD_FILE_INFO*) file->arg;
3364   if (lf_info->thd->is_current_stmt_binlog_format_row())
3365     DBUG_RETURN(0);
3366   if (lf_info->last_pos_in_file != HA_POS_ERROR &&
3367       lf_info->last_pos_in_file >= my_b_get_pos_in_file(file))
3368     DBUG_RETURN(0);
3369 
3370   for (block_len= (uint) (my_b_get_bytes_in_buffer(file)); block_len > 0;
3371        buffer += min(block_len, max_event_size),
3372        block_len -= min(block_len, max_event_size))
3373   {
3374     lf_info->last_pos_in_file= my_b_get_pos_in_file(file);
3375     if (lf_info->wrote_create_file)
3376     {
3377       Append_block_log_event a(lf_info->thd, lf_info->thd->db().str, buffer,
3378                                min(block_len, max_event_size),
3379                                lf_info->log_delayed);
3380       if (mysql_bin_log.write_event(&a))
3381         DBUG_RETURN(1);
3382     }
3383     else
3384     {
3385       Begin_load_query_log_event b(lf_info->thd, lf_info->thd->db().str,
3386                                    buffer,
3387                                    min(block_len, max_event_size),
3388                                    lf_info->log_delayed);
3389       if (mysql_bin_log.write_event(&b))
3390         DBUG_RETURN(1);
3391       lf_info->wrote_create_file= 1;
3392     }
3393   }
3394   DBUG_RETURN(0);
3395 }
3396 
3397 /* Helper function for SHOW BINLOG/RELAYLOG EVENTS */
show_binlog_events(THD * thd,MYSQL_BIN_LOG * binary_log)3398 bool show_binlog_events(THD *thd, MYSQL_BIN_LOG *binary_log)
3399 {
3400   Protocol *protocol= thd->get_protocol();
3401   List<Item> field_list;
3402   const char *errmsg = 0;
3403   bool ret = TRUE;
3404   IO_CACHE log;
3405   File file = -1;
3406   int old_max_allowed_packet= thd->variables.max_allowed_packet;
3407   LOG_INFO linfo;
3408 
3409   DBUG_ENTER("show_binlog_events");
3410 
3411   assert(thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS ||
3412          thd->lex->sql_command == SQLCOM_SHOW_RELAYLOG_EVENTS);
3413 
3414   Format_description_log_event *description_event= new
3415     Format_description_log_event(3); /* MySQL 4.0 by default */
3416 
3417   if (binary_log->is_open())
3418   {
3419     LEX_MASTER_INFO *lex_mi= &thd->lex->mi;
3420     SELECT_LEX_UNIT *unit= thd->lex->unit;
3421     ha_rows event_count, limit_start, limit_end;
3422     my_off_t pos = max<my_off_t>(BIN_LOG_HEADER_SIZE, lex_mi->pos); // user-friendly
3423     char search_file_name[FN_REFLEN], *name;
3424     const char *log_file_name = lex_mi->log_file_name;
3425     mysql_mutex_t *log_lock = binary_log->get_log_lock();
3426     Log_event* ev;
3427 
3428     unit->set_limit(thd->lex->current_select());
3429     limit_start= unit->offset_limit_cnt;
3430     limit_end= unit->select_limit_cnt;
3431 
3432     name= search_file_name;
3433     if (log_file_name)
3434       binary_log->make_log_name(search_file_name, log_file_name);
3435     else
3436       name=0;					// Find first log
3437 
3438     linfo.index_file_offset = 0;
3439 
3440     if (binary_log->find_log_pos(&linfo, name, true/*need_lock_index=true*/))
3441     {
3442       errmsg = "Could not find target log";
3443       goto err;
3444     }
3445 
3446     mysql_mutex_lock(&thd->LOCK_thd_data);
3447     thd->current_linfo = &linfo;
3448     mysql_mutex_unlock(&thd->LOCK_thd_data);
3449 
3450     if ((file=open_binlog_file(&log, linfo.log_file_name, &errmsg)) < 0)
3451       goto err;
3452 
3453     my_off_t end_pos;
3454     /*
3455       Acquire LOCK_log only for the duration to calculate the
3456       log's end position. LOCK_log should be acquired even while
3457       we are checking whether the log is active log or not.
3458     */
3459     mysql_mutex_lock(log_lock);
3460     if (binary_log->is_active(linfo.log_file_name))
3461     {
3462       LOG_INFO li;
3463       binary_log->get_current_log(&li, false /*LOCK_log is already acquired*/);
3464       end_pos= li.pos;
3465     }
3466     else
3467     {
3468       end_pos= my_b_filelength(&log);
3469     }
3470     mysql_mutex_unlock(log_lock);
3471 
3472     /*
3473       to account binlog event header size
3474     */
3475     thd->variables.max_allowed_packet += MAX_LOG_EVENT_HEADER;
3476 
3477     DEBUG_SYNC(thd, "after_show_binlog_event_found_file");
3478 
3479     /*
3480       open_binlog_file() sought to position 4.
3481       Read the first event in case it's a Format_description_log_event, to
3482       know the format. If there's no such event, we are 3.23 or 4.x. This
3483       code, like before, can't read 3.23 binlogs.
3484       Also read the second event, in case it's a Start_encryption_log_event.
3485       This code will fail on a mixed relay log (one which has Format_desc then
3486       Rotate then Format_desc).
3487     */
3488 
3489     my_off_t scan_pos= BIN_LOG_HEADER_SIZE;
3490     while (scan_pos < pos)
3491     {
3492       ev= Log_event::read_log_event(&log, (mysql_mutex_t*)0, description_event,
3493                                     opt_master_verify_checksum);
3494       scan_pos= my_b_tell(&log);
3495       if (ev == NULL || (ev->get_type_code() != binary_log::FORMAT_DESCRIPTION_EVENT &&
3496           !ev->is_valid()))
3497       {
3498         errmsg = "Wrong offset or I/O error";
3499         goto err;
3500       }
3501       if (ev->get_type_code() == binary_log::FORMAT_DESCRIPTION_EVENT)
3502       {
3503         delete description_event;
3504         description_event= (Format_description_log_event*) ev;
3505         if (!description_event->is_valid())
3506         {
3507           errmsg="Invalid Format_description event; could be out of memory";
3508           goto err;
3509         }
3510       }
3511       else
3512       {
3513         if (ev->get_type_code() == binary_log::START_ENCRYPTION_EVENT)
3514         {
3515           if (description_event->start_decryption(static_cast<Start_encryption_log_event*>(ev)))
3516           {
3517             delete ev;
3518             errmsg= "Could not initialize decryption of binlog.";
3519             goto err;
3520           }
3521         }
3522         delete ev;
3523         break;
3524       }
3525     }
3526 
3527     my_b_seek(&log, pos);
3528 
3529     for (event_count = 0;
3530          (ev = Log_event::read_log_event(&log, (mysql_mutex_t*) 0,
3531                                          description_event,
3532                                          opt_master_verify_checksum)); )
3533     {
3534       DEBUG_SYNC(thd, "wait_in_show_binlog_events_loop");
3535       if (event_count >= limit_start &&
3536 	 ev->net_send(protocol, linfo.log_file_name, pos))
3537       {
3538 	errmsg = "Net error";
3539 	delete ev;
3540 	goto err;
3541       }
3542 
3543       if (ev->get_type_code() == binary_log::FORMAT_DESCRIPTION_EVENT)
3544       {
3545         Format_description_log_event* new_fdle=
3546           static_cast<Format_description_log_event*>(ev);
3547         new_fdle->copy_crypto_data(*description_event);
3548         delete description_event;
3549         description_event= new_fdle;
3550       }
3551       else
3552       {
3553         if (ev->get_type_code() == binary_log::START_ENCRYPTION_EVENT)
3554         {
3555           if (description_event->start_decryption(static_cast<Start_encryption_log_event*>(ev)))
3556           {
3557             errmsg= "Error starting decryption";
3558             delete ev;
3559             goto err;
3560           }
3561         }
3562         delete ev;
3563       }
3564 
3565       pos = my_b_tell(&log);
3566 
3567       if (++event_count >= limit_end || pos >= end_pos)
3568 	break;
3569     }
3570 
3571     if (event_count < limit_end && log.error)
3572     {
3573       errmsg = "Wrong offset or I/O error";
3574       goto err;
3575     }
3576 
3577   }
3578   // Check that linfo is still on the function scope.
3579   DEBUG_SYNC(thd, "after_show_binlog_events");
3580 
3581   ret= FALSE;
3582 
3583 err:
3584   delete description_event;
3585   if (file >= 0)
3586   {
3587     end_io_cache(&log);
3588     mysql_file_close(file, MYF(MY_WME));
3589   }
3590 
3591   if (errmsg)
3592   {
3593     if(thd->lex->sql_command == SQLCOM_SHOW_RELAYLOG_EVENTS)
3594       my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0),
3595              "SHOW RELAYLOG EVENTS", errmsg);
3596     else
3597       my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0),
3598              "SHOW BINLOG EVENTS", errmsg);
3599   }
3600   else
3601     my_eof(thd);
3602 
3603   mysql_mutex_lock(&thd->LOCK_thd_data);
3604   thd->current_linfo = 0;
3605   mysql_mutex_unlock(&thd->LOCK_thd_data);
3606   thd->variables.max_allowed_packet= old_max_allowed_packet;
3607   DBUG_RETURN(ret);
3608 }
3609 
3610 /**
3611   Execute a SHOW BINLOG EVENTS statement.
3612 
3613   @param thd Pointer to THD object for the client thread executing the
3614   statement.
3615 
3616   @retval FALSE success
3617   @retval TRUE failure
3618 */
mysql_show_binlog_events(THD * thd)3619 bool mysql_show_binlog_events(THD* thd)
3620 {
3621   List<Item> field_list;
3622   DBUG_ENTER("mysql_show_binlog_events");
3623 
3624   assert(thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS);
3625 
3626   Log_event::init_show_field_list(&field_list);
3627   if (thd->send_result_metadata(&field_list,
3628                                 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
3629     DBUG_RETURN(TRUE);
3630 
3631   /*
3632     Wait for handlers to insert any pending information
3633     into the binlog.  For e.g. ndb which updates the binlog asynchronously
3634     this is needed so that the uses sees all its own commands in the binlog
3635   */
3636   ha_binlog_wait(thd);
3637 
3638   DBUG_RETURN(show_binlog_events(thd, &mysql_bin_log));
3639 }
3640 
3641 #endif /* HAVE_REPLICATION */
3642 
3643 
MYSQL_BIN_LOG(uint * sync_period,enum cache_type io_cache_type_arg)3644 MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period,
3645                              enum cache_type io_cache_type_arg)
3646   :name(NULL), write_error(false), inited(false),
3647    io_cache_type(io_cache_type_arg),
3648 #ifdef HAVE_PSI_INTERFACE
3649    m_key_LOCK_log(key_LOG_LOCK_log),
3650 #endif
3651    bytes_written(0), binlog_space_total(0), file_id(1),
3652    open_count(1), sync_period_ptr(sync_period), sync_counter(0),
3653    is_relay_log(0), signal_cnt(0),
3654    checksum_alg_reset(binary_log::BINLOG_CHECKSUM_ALG_UNDEF),
3655    relay_log_checksum_alg(binary_log::BINLOG_CHECKSUM_ALG_UNDEF),
3656    previous_gtid_set_relaylog(0), snapshot_lock_acquired(false),
3657    is_rotating_caused_by_incident(false)
3658 {
3659   log_state.atomic_set(LOG_CLOSED);
3660   /*
3661     We don't want to initialize locks here as such initialization depends on
3662     safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
3663     called only in main(). Doing initialization here would make it happen
3664     before main().
3665   */
3666   m_prep_xids.atomic_set(0);
3667   memset(&log_file, 0, sizeof(log_file));
3668   index_file_name[0] = 0;
3669   memset(&index_file, 0, sizeof(index_file));
3670   memset(&purge_index_file, 0, sizeof(purge_index_file));
3671   memset(&crash_safe_index_file, 0, sizeof(crash_safe_index_file));
3672 }
3673 
3674 
3675 /* this is called only once */
3676 
cleanup()3677 void MYSQL_BIN_LOG::cleanup()
3678 {
3679   DBUG_ENTER("cleanup");
3680   if (inited)
3681   {
3682     inited= 0;
3683     close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT, true /*need_lock_log=true*/,
3684           true /*need_lock_index=true*/);
3685     mysql_mutex_destroy(&LOCK_log);
3686     mysql_mutex_destroy(&LOCK_index);
3687     mysql_mutex_destroy(&LOCK_commit);
3688     mysql_mutex_destroy(&LOCK_sync);
3689     mysql_mutex_destroy(&LOCK_binlog_end_pos);
3690     mysql_mutex_destroy(&LOCK_xids);
3691     mysql_cond_destroy(&update_cond);
3692     mysql_cond_destroy(&m_prep_xids_cond);
3693     stage_manager.deinit();
3694   }
3695   DBUG_VOID_RETURN;
3696 }
3697 
3698 
init_pthread_objects()3699 void MYSQL_BIN_LOG::init_pthread_objects()
3700 {
3701   assert(inited == 0);
3702   inited= 1;
3703   mysql_mutex_init(m_key_LOCK_log, &LOCK_log, MY_MUTEX_INIT_SLOW);
3704   mysql_mutex_init(m_key_LOCK_index, &LOCK_index, MY_MUTEX_INIT_SLOW);
3705   mysql_mutex_init(m_key_LOCK_commit, &LOCK_commit, MY_MUTEX_INIT_FAST);
3706   mysql_mutex_init(m_key_LOCK_sync, &LOCK_sync, MY_MUTEX_INIT_FAST);
3707   mysql_mutex_init(m_key_LOCK_binlog_end_pos, &LOCK_binlog_end_pos,
3708                    MY_MUTEX_INIT_FAST);
3709   mysql_mutex_init(m_key_LOCK_xids, &LOCK_xids, MY_MUTEX_INIT_FAST);
3710   mysql_cond_init(m_key_update_cond, &update_cond);
3711   mysql_cond_init(m_key_prep_xids_cond, &m_prep_xids_cond);
3712   stage_manager.init(
3713 #ifdef HAVE_PSI_INTERFACE
3714                    m_key_LOCK_flush_queue,
3715                    m_key_LOCK_sync_queue,
3716                    m_key_LOCK_commit_queue,
3717                    m_key_LOCK_done, m_key_COND_done
3718 #endif
3719                    );
3720 }
3721 
3722 
3723 /**
3724   Check if a string is a valid number.
3725 
3726   @param str			String to test
3727   @param res			Store value here
3728   @param allow_wildcards	Set to 1 if we should ignore '%' and '_'
3729 
3730   @note
3731     For the moment the allow_wildcards argument is not used
3732     Should be moved to some other file.
3733 
3734   @retval
3735     1	String is a number
3736   @retval
3737     0	String is not a number
3738 */
3739 
is_number(const char * str,ulong * res,bool allow_wildcards)3740 static bool is_number(const char *str,
3741                       ulong *res, bool allow_wildcards)
3742 {
3743   int flag;
3744   const char *start;
3745   DBUG_ENTER("is_number");
3746 
3747   flag=0; start=str;
3748   while (*str++ == ' ') ;
3749   if (*--str == '-' || *str == '+')
3750     str++;
3751   while (my_isdigit(files_charset_info,*str) ||
3752 	 (allow_wildcards && (*str == wild_many || *str == wild_one)))
3753   {
3754     flag=1;
3755     str++;
3756   }
3757   if (*str == '.')
3758   {
3759     for (str++ ;
3760 	 my_isdigit(files_charset_info,*str) ||
3761 	   (allow_wildcards && (*str == wild_many || *str == wild_one)) ;
3762 	 str++, flag=1) ;
3763   }
3764   if (*str != 0 || flag == 0)
3765     DBUG_RETURN(0);
3766   if (res)
3767     *res=atol(start);
3768   DBUG_RETURN(1);			/* Number ok */
3769 } /* is_number */
3770 
3771 
3772 /*
3773    Number of warnings that will be printed to error log
3774    before extension number is exhausted.
3775 */
3776 #define LOG_WARN_UNIQUE_FN_EXT_LEFT 1000
3777 
3778 /**
3779   Find a unique filename for 'filename.#'.
3780 
3781   Set '#' to the highest existing log file extension plus one.
3782 
3783   This function will return nonzero if: (i) the generated name
3784   exceeds FN_REFLEN; (ii) if the number of extensions is exhausted;
3785   or (iii) some other error happened while examining the filesystem.
3786 
3787   @return
3788     nonzero if not possible to get unique filename.
3789 */
3790 
find_uniq_filename(char * name)3791 static int find_uniq_filename(char *name)
3792 {
3793   uint                  i;
3794   char                  buff[FN_REFLEN], ext_buf[FN_REFLEN];
3795   struct st_my_dir     *dir_info;
3796   struct fileinfo *file_info;
3797   ulong                 max_found= 0, next= 0, number= 0;
3798   size_t		buf_length, length;
3799   char			*start, *end;
3800   int                   error= 0;
3801   DBUG_ENTER("find_uniq_filename");
3802 
3803   length= dirname_part(buff, name, &buf_length);
3804   start=  name + length;
3805   end=    strend(start);
3806 
3807   *end='.';
3808   length= (size_t) (end - start + 1);
3809 
3810   if ((DBUG_EVALUATE_IF("error_unique_log_filename", 1,
3811       !(dir_info= my_dir(buff,MYF(MY_DONT_SORT))))))
3812   {						// This shouldn't happen
3813     my_stpcpy(end,".1");				// use name+1
3814     DBUG_RETURN(1);
3815   }
3816   file_info= dir_info->dir_entry;
3817   for (i= dir_info->number_off_files ; i-- ; file_info++)
3818   {
3819     if (strncmp(file_info->name, start, length) == 0 &&
3820 	is_number(file_info->name+length, &number,0))
3821     {
3822       set_if_bigger(max_found, number);
3823     }
3824   }
3825   my_dirend(dir_info);
3826 
3827   /* check if reached the maximum possible extension number */
3828   if (max_found == MAX_LOG_UNIQUE_FN_EXT)
3829   {
3830     sql_print_error("Log filename extension number exhausted: %06lu. \
3831 Please fix this by archiving old logs and \
3832 updating the index files.", max_found);
3833     error= 1;
3834     goto end;
3835   }
3836 
3837   next= max_found + 1;
3838   if (sprintf(ext_buf, "%06lu", next)<0)
3839   {
3840     error= 1;
3841     goto end;
3842   }
3843   *end++='.';
3844 
3845   /*
3846     Check if the generated extension size + the file name exceeds the
3847     buffer size used. If one did not check this, then the filename might be
3848     truncated, resulting in error.
3849    */
3850   if (((strlen(ext_buf) + (end - name)) >= FN_REFLEN))
3851   {
3852     sql_print_error("Log filename too large: %s%s (%zu). \
3853 Please fix this by archiving old logs and updating the \
3854 index files.", name, ext_buf, (strlen(ext_buf) + (end - name)));
3855     error= 1;
3856     goto end;
3857   }
3858 
3859   if (sprintf(end, "%06lu", next)<0)
3860   {
3861     error= 1;
3862     goto end;
3863   }
3864 
3865   /* print warning if reaching the end of available extensions. */
3866   if ((next > (MAX_LOG_UNIQUE_FN_EXT - LOG_WARN_UNIQUE_FN_EXT_LEFT)))
3867     sql_print_warning("Next log extension: %lu. \
3868 Remaining log filename extensions: %lu. \
3869 Please consider archiving some logs.", next, (MAX_LOG_UNIQUE_FN_EXT - next));
3870 
3871 end:
3872   DBUG_RETURN(error);
3873 }
3874 
generate_new_name(char * new_name,const char * log_name)3875 int MYSQL_BIN_LOG::generate_new_name(char *new_name, const char *log_name)
3876 {
3877   fn_format(new_name, log_name, mysql_data_home, "", 4);
3878   if (!fn_ext(log_name)[0])
3879   {
3880       if (find_uniq_filename(new_name))
3881       {
3882         my_printf_error(ER_NO_UNIQUE_LOGFILE, ER(ER_NO_UNIQUE_LOGFILE),
3883                         MYF(ME_FATALERROR), log_name);
3884         sql_print_error(ER(ER_NO_UNIQUE_LOGFILE), log_name);
3885         return 1;
3886       }
3887   }
3888   return 0;
3889 }
3890 
3891 /**
3892   @todo
3893   The following should be using fn_format();  We just need to
3894   first change fn_format() to cut the file name if it's too long.
3895 */
generate_name(const char * log_name,const char * suffix,char * buff)3896 const char *MYSQL_BIN_LOG::generate_name(const char *log_name,
3897                                          const char *suffix,
3898                                          char *buff)
3899 {
3900   if (!log_name || !log_name[0])
3901   {
3902     strmake(buff, default_logfile_name, FN_REFLEN - strlen(suffix) - 1);
3903     return (const char *)
3904       fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR));
3905   }
3906   // get rid of extension to avoid problems
3907 
3908   char *p= fn_ext(log_name);
3909   uint length= (uint) (p - log_name);
3910   strmake(buff, log_name, min<size_t>(length, FN_REFLEN-1));
3911   return (const char*)buff;
3912 }
3913 
3914 
init_and_set_log_file_name(const char * log_name,const char * new_name)3915 bool MYSQL_BIN_LOG::init_and_set_log_file_name(const char *log_name,
3916                                                const char *new_name)
3917 {
3918   if (new_name && !my_stpcpy(log_file_name, new_name))
3919     return TRUE;
3920   else if (!new_name && generate_new_name(log_file_name, log_name))
3921     return TRUE;
3922 
3923   return FALSE;
3924 }
3925 
3926 
3927 /**
3928   Open the logfile and init IO_CACHE.
3929 
3930   @param log_name            The name of the log to open
3931   @param new_name            The new name for the logfile.
3932                              NULL forces generate_new_name() to be called.
3933 
3934   @return true if error, false otherwise.
3935 */
3936 
open(PSI_file_key log_file_key,const char * log_name,const char * new_name)3937 bool MYSQL_BIN_LOG::open(
3938 #ifdef HAVE_PSI_INTERFACE
3939                      PSI_file_key log_file_key,
3940 #endif
3941                      const char *log_name,
3942                      const char *new_name)
3943 {
3944   File file= -1;
3945   my_off_t pos= 0;
3946   int open_flags= O_CREAT | O_BINARY;
3947   DBUG_ENTER("MYSQL_BIN_LOG::open");
3948 
3949   write_error= 0;
3950 
3951   if (!(name= my_strdup(key_memory_MYSQL_LOG_name,
3952                         log_name, MYF(MY_WME))))
3953   {
3954     name= (char *)log_name; // for the error message
3955     goto err;
3956   }
3957 
3958   if (init_and_set_log_file_name(name, new_name) ||
3959       DBUG_EVALUATE_IF("fault_injection_init_name", 1, 0))
3960     goto err;
3961 
3962   if (io_cache_type == SEQ_READ_APPEND)
3963     open_flags |= O_RDWR | O_APPEND;
3964   else
3965     open_flags |= O_WRONLY;
3966 
3967   db[0]= 0;
3968 
3969 #ifdef HAVE_PSI_INTERFACE
3970   /* Keep the key for reopen */
3971   m_log_file_key= log_file_key;
3972 #endif
3973 
3974   if ((file= mysql_file_open(log_file_key,
3975                              log_file_name, open_flags,
3976                              MYF(MY_WME))) < 0)
3977     goto err;
3978 
3979   if ((pos= mysql_file_tell(file, MYF(MY_WME))) == MY_FILEPOS_ERROR)
3980   {
3981     if (my_errno() == ESPIPE)
3982       pos= 0;
3983     else
3984       goto err;
3985   }
3986 
3987   if (init_io_cache(&log_file, file, IO_SIZE, io_cache_type, pos, 0,
3988                     MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
3989     goto err;
3990 
3991   log_state.atomic_set(LOG_OPENED);
3992   DBUG_RETURN(0);
3993 
3994 err:
3995   if (binlog_error_action == ABORT_SERVER)
3996   {
3997     exec_binlog_error_action_abort("Either disk is full or file system is read "
3998                                    "only while opening the binlog. Aborting the"
3999                                    " server.");
4000   }
4001   else
4002     sql_print_error("Could not open %s for logging (error %d). "
4003                     "Turning logging off for the whole duration "
4004                     "of the MySQL server process. To turn it on "
4005                     "again: fix the cause, shutdown the MySQL "
4006                     "server and restart it.",
4007                     name, errno);
4008   if (file >= 0)
4009     mysql_file_close(file, MYF(0));
4010   end_io_cache(&log_file);
4011   my_free(name);
4012   name= NULL;
4013   log_state.atomic_set(LOG_CLOSED);
4014   DBUG_RETURN(1);
4015 }
4016 
4017 
open_index_file(const char * index_file_name_arg,const char * log_name,bool need_lock_index)4018 bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
4019                                     const char *log_name, bool need_lock_index)
4020 {
4021   bool error= false;
4022   File index_file_nr= -1;
4023   if (need_lock_index)
4024     mysql_mutex_lock(&LOCK_index);
4025   else
4026     mysql_mutex_assert_owner(&LOCK_index);
4027 
4028   /*
4029     First open of this class instance
4030     Create an index file that will hold all file names uses for logging.
4031     Add new entries to the end of it.
4032   */
4033   myf opt= MY_UNPACK_FILENAME;
4034 
4035   if (my_b_inited(&index_file))
4036     goto end;
4037 
4038   if (!index_file_name_arg)
4039   {
4040     index_file_name_arg= log_name;    // Use same basename for index file
4041     opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
4042   }
4043   fn_format(index_file_name, index_file_name_arg, mysql_data_home,
4044             ".index", opt);
4045 
4046   if (set_crash_safe_index_file_name(index_file_name_arg))
4047   {
4048     sql_print_error("MYSQL_BIN_LOG::set_crash_safe_index_file_name failed.");
4049     error= true;
4050     goto end;
4051   }
4052 
4053   /*
4054     We need move crash_safe_index_file to index_file if the index_file
4055     does not exist and crash_safe_index_file exists when mysqld server
4056     restarts.
4057   */
4058   if (my_access(index_file_name, F_OK) &&
4059       !my_access(crash_safe_index_file_name, F_OK) &&
4060       my_rename(crash_safe_index_file_name, index_file_name, MYF(MY_WME)))
4061   {
4062     sql_print_error("MYSQL_BIN_LOG::open_index_file failed to "
4063                     "move crash_safe_index_file to index file.");
4064     error= true;
4065     goto end;
4066   }
4067 
4068   if ((index_file_nr= mysql_file_open(m_key_file_log_index,
4069                                       index_file_name,
4070                                       O_RDWR | O_CREAT | O_BINARY,
4071                                       MYF(MY_WME))) < 0 ||
4072        mysql_file_sync(index_file_nr, MYF(MY_WME)) ||
4073        init_io_cache_ext(&index_file, index_file_nr,
4074                          IO_SIZE, READ_CACHE,
4075                          mysql_file_seek(index_file_nr, 0L, MY_SEEK_END, MYF(0)),
4076                                          0, MYF(MY_WME | MY_WAIT_IF_FULL),
4077                          m_key_file_log_index_cache) ||
4078       DBUG_EVALUATE_IF("fault_injection_openning_index", 1, 0))
4079   {
4080     /*
4081       TODO: all operations creating/deleting the index file or a log, should
4082       call my_sync_dir() or my_sync_dir_by_file() to be durable.
4083       TODO: file creation should be done with mysql_file_create()
4084       not mysql_file_open().
4085     */
4086     if (index_file_nr >= 0)
4087       mysql_file_close(index_file_nr, MYF(0));
4088     error= true;
4089     goto end;
4090   }
4091 
4092 #ifdef HAVE_REPLICATION
4093   /*
4094     Sync the index by purging any binary log file that is not registered.
4095     In other words, either purge binary log files that were removed from
4096     the index but not purged from the file system due to a crash or purge
4097     any binary log file that was created but not register in the index
4098     due to a crash.
4099   */
4100 
4101   if (set_purge_index_file_name(index_file_name_arg) ||
4102       open_purge_index_file(FALSE) ||
4103       purge_index_entry(NULL, NULL, false) ||
4104       close_purge_index_file() ||
4105       DBUG_EVALUATE_IF("fault_injection_recovering_index", 1, 0))
4106   {
4107     sql_print_error("MYSQL_BIN_LOG::open_index_file failed to sync the index "
4108                     "file.");
4109     error= true;
4110     goto end;
4111   }
4112 #endif
4113 
4114 end:
4115   if (need_lock_index)
4116     mysql_mutex_unlock(&LOCK_index);
4117   return error;
4118 }
4119 
4120 /**
4121   Add the GTIDs from the given relaylog file and also
4122   update the IO thread transaction parser.
4123 
4124   @param filename Relaylog file to read from.
4125   @param retrieved_set Gtid_set to store the GTIDs found on the relaylog file.
4126   @param verify_checksum Set to true to verify event checksums.
4127   @param trx_parser The transaction boundary parser to be used in order to
4128   only add a GTID to the gtid_set after ensuring the transaction is fully
4129   stored on the relay log.
4130   @param gtid_partial_trx The gtid of the last incomplete transaction
4131   found in the relay log.
4132 
4133   @retval false The file was successfully read and all GTIDs from
4134   Previous_gtids and Gtid_log_event from complete transactions were added to
4135   the retrieved_set.
4136   @retval true There was an error during the procedure.
4137 */
4138 static bool
read_gtids_and_update_trx_parser_from_relaylog(const char * filename,Gtid_set * retrieved_gtids,bool verify_checksum,Transaction_boundary_parser * trx_parser,Gtid * gtid_partial_trx)4139 read_gtids_and_update_trx_parser_from_relaylog(
4140   const char *filename,
4141   Gtid_set *retrieved_gtids,
4142   bool verify_checksum,
4143   Transaction_boundary_parser *trx_parser,
4144   Gtid *gtid_partial_trx)
4145 {
4146   DBUG_ENTER("read_gtids_and_update_trx_parser_from_relaylog");
4147   DBUG_PRINT("info", ("Opening file %s", filename));
4148 
4149   assert(retrieved_gtids != NULL);
4150   assert(trx_parser != NULL);
4151 #ifndef NDEBUG
4152   unsigned long event_counter= 0;
4153 #endif
4154 
4155   /*
4156     Create a Format_description_log_event that is used to read the
4157     first event of the log.
4158   */
4159   Format_description_log_event fd_ev(BINLOG_VERSION), *fd_ev_p= &fd_ev;
4160   if (!fd_ev.is_valid())
4161     DBUG_RETURN(true);
4162 
4163   File file;
4164   IO_CACHE log;
4165 
4166   const char *errmsg= NULL;
4167   if ((file= open_binlog_file(&log, filename, &errmsg)) < 0)
4168   {
4169     sql_print_error("%s", errmsg);
4170     /*
4171       As read_gtids_from_binlog() will not throw error on truncated
4172       relaylog files, we should do the same here in order to keep the
4173       current behavior.
4174     */
4175     DBUG_RETURN(false);
4176   }
4177 
4178   fd_ev_p->reset_crypto();
4179 
4180   /*
4181     Seek for Previous_gtids_log_event and Gtid_log_event events to
4182     gather information what has been processed so far.
4183   */
4184   my_b_seek(&log, BIN_LOG_HEADER_SIZE);
4185   Log_event *ev= NULL;
4186   bool error= false;
4187   bool seen_prev_gtids= false;
4188   ulong data_len= 0;
4189 
4190   while (!error &&
4191          (ev= Log_event::read_log_event(&log, 0, fd_ev_p, verify_checksum)) !=
4192          NULL)
4193   {
4194     DBUG_PRINT("info", ("Read event of type %s", ev->get_type_str()));
4195 #ifndef NDEBUG
4196     event_counter++;
4197 #endif
4198 
4199     data_len= uint4korr(ev->temp_buf + EVENT_LEN_OFFSET);
4200     if (trx_parser->feed_event(ev->temp_buf, data_len, fd_ev_p, false))
4201     {
4202       /*
4203         The transaction boundary parser found an error while parsing a
4204         sequence of events from the relaylog. As we don't know if the
4205         parsing has started from a reliable point (it might started in
4206         a relay log file that begins with the rest of a transaction
4207         that started in a previous relay log file), it is better to do
4208         nothing in this case. The boundary parser will fix itself once
4209         finding an event that represent a transaction boundary.
4210 
4211         Suppose the following relaylog:
4212 
4213          rl-bin.000011 | rl-bin.000012 | rl-bin.000013 | rl-bin-000014
4214         ---------------+---------------+---------------+---------------
4215          PREV_GTIDS    | PREV_GTIDS    | PREV_GTIDS    | PREV_GTIDS
4216          (empty)       | (UUID:1-2)    | (UUID:1-2)    | (UUID:1-2)
4217         ---------------+---------------+---------------+---------------
4218          XID           | QUERY(INSERT) | QUERY(INSERT) | XID
4219         ---------------+---------------+---------------+---------------
4220          GTID(UUID:2)  |
4221         ---------------+
4222          QUERY(CREATE  |
4223          TABLE t1 ...) |
4224         ---------------+
4225          GTID(UUID:3)  |
4226         ---------------+
4227          QUERY(BEGIN)  |
4228         ---------------+
4229 
4230         As it is impossible to determine the current Retrieved_Gtid_Set by only
4231         looking to the PREVIOUS_GTIDS on the last relay log file, and scanning
4232         events on it, we tried to find a relay log file that contains at least
4233         one GTID event during the backwards search.
4234 
4235         In the example, we will find a GTID only in rl-bin.000011, as the
4236         UUID:3 transaction was spanned across 4 relay log files.
4237 
4238         The transaction spanning can be caused by "FLUSH RELAY LOGS" commands
4239         on slave while it is queuing the transaction.
4240 
4241         So, in order to correctly add UUID:3 into Retrieved_Gtid_Set, we need
4242         to parse the relay log starting on the file we found the last GTID
4243         queued to know if the transaction was fully retrieved or not.
4244 
4245         Start scanning rl-bin.000011 after resetting the transaction parser
4246         will generate an error, as XID event is only expected inside a DML,
4247         but in this case, we can ignore this error and reset the parser.
4248       */
4249       trx_parser->reset();
4250       /*
4251         We also have to discard the GTID of the partial transaction that was
4252         not finished if there is one. This is needed supposing that an
4253         incomplete transaction was replicated with a GTID.
4254 
4255         GTID(1), QUERY(BEGIN), QUERY(INSERT), ANONYMOUS_GTID, QUERY(DROP ...)
4256 
4257         In the example above, without cleaning the gtid_partial_trx,
4258         the GTID(1) would be added to the Retrieved_Gtid_Set after the
4259         QUERY(DROP ...) event.
4260 
4261         GTID(1), QUERY(BEGIN), QUERY(INSERT), GTID(2), QUERY(DROP ...)
4262 
4263         In the example above the GTID(1) will also be discarded as the
4264         GTID(1) transaction is not complete.
4265       */
4266       if (!gtid_partial_trx->is_empty())
4267       {
4268         DBUG_PRINT("info", ("Discarding Gtid(%d, %lld) as the transaction "
4269                             "wasn't complete and we found an error in the"
4270                             "transaction boundary parser.",
4271                             gtid_partial_trx->sidno,
4272                             gtid_partial_trx->gno));
4273         gtid_partial_trx->clear();
4274       }
4275     }
4276 
4277     Format_description_log_event *new_fd_ev_p= NULL;
4278     switch (ev->get_type_code())
4279     {
4280     case binary_log::FORMAT_DESCRIPTION_EVENT:
4281       new_fd_ev_p= static_cast<Format_description_log_event*>(ev);
4282       new_fd_ev_p->copy_crypto_data(*fd_ev_p);
4283       if (fd_ev_p != &fd_ev)
4284         delete fd_ev_p;
4285       fd_ev_p= new_fd_ev_p;
4286       break;
4287     case binary_log::ROTATE_EVENT:
4288       // do nothing; just accept this event and go to next
4289       break;
4290     case binary_log::PREVIOUS_GTIDS_LOG_EVENT:
4291     {
4292       seen_prev_gtids= true;
4293       // add events to sets
4294       Previous_gtids_log_event *prev_gtids_ev= (Previous_gtids_log_event *)ev;
4295       if (prev_gtids_ev->add_to_set(retrieved_gtids) != 0)
4296       {
4297         error= true;
4298         break;
4299       }
4300 #ifndef NDEBUG
4301       char* prev_buffer= prev_gtids_ev->get_str(NULL, NULL);
4302       DBUG_PRINT("info", ("Got Previous_gtids from file '%s': Gtid_set='%s'.",
4303                           filename, prev_buffer));
4304       my_free(prev_buffer);
4305 #endif
4306       break;
4307     }
4308     case binary_log::GTID_LOG_EVENT:
4309     {
4310       /* If we didn't find any PREVIOUS_GTIDS in this file */
4311       if (!seen_prev_gtids)
4312       {
4313         my_error(ER_BINLOG_LOGICAL_CORRUPTION, MYF(0), filename,
4314                  "The first global transaction identifier was read, but "
4315                  "no other information regarding identifiers existing "
4316                  "on the previous log files was found.");
4317         error= true;
4318         break;
4319       }
4320 
4321       Gtid_log_event *gtid_ev= (Gtid_log_event *)ev;
4322       rpl_sidno sidno= gtid_ev->get_sidno(retrieved_gtids->get_sid_map());
4323       if (sidno < 0)
4324       {
4325         error= true;
4326         break;
4327       }
4328       else
4329       {
4330         if (retrieved_gtids->ensure_sidno(sidno) != RETURN_STATUS_OK)
4331         {
4332           error= true;
4333           break;
4334         }
4335         else
4336         {
4337           /*
4338             As are updating the transaction boundary parser while reading
4339             GTIDs from relay log files to fill the Retrieved_Gtid_Set, we
4340             should not add the GTID here as we don't know if the transaction
4341             is complete on the relay log yet.
4342           */
4343           gtid_partial_trx->set(sidno, gtid_ev->get_gno());
4344         }
4345         DBUG_PRINT("info", ("Found Gtid in relaylog file '%s': Gtid(%d, %lld).",
4346                             filename, sidno, gtid_ev->get_gno()));
4347       }
4348       break;
4349     }
4350     case binary_log::START_ENCRYPTION_EVENT:
4351       if (fd_ev_p->start_decryption((Start_encryption_log_event*) ev))
4352         sql_print_warning("Error initializing decryption while reading GTIDs from relaylog");
4353       break;
4354     case binary_log::ANONYMOUS_GTID_LOG_EVENT:
4355     default:
4356       /*
4357         If we reached the end of a transaction after storing it's GTID
4358         in gtid_partial_trx variable, it is time to add this GTID to the
4359         retrieved_gtids set because the transaction is complete and there is no
4360         need for asking this transaction again.
4361       */
4362       if (trx_parser->is_not_inside_transaction())
4363       {
4364         if (!gtid_partial_trx->is_empty())
4365         {
4366           DBUG_PRINT("info", ("Adding Gtid to Retrieved_Gtid_Set as the "
4367                               "transaction was completed at "
4368                               "relaylog file '%s': Gtid(%d, %lld).",
4369                               filename, gtid_partial_trx->sidno,
4370                               gtid_partial_trx->gno));
4371           retrieved_gtids->_add_gtid(gtid_partial_trx->sidno,
4372                                      gtid_partial_trx->gno);
4373           gtid_partial_trx->clear();
4374         }
4375       }
4376       break;
4377     }
4378     if (ev != fd_ev_p)
4379       delete ev;
4380   }
4381 
4382   if (log.error < 0)
4383   {
4384     // This is not a fatal error; the log may just be truncated.
4385     // @todo but what other errors could happen? IO error?
4386     sql_print_warning("Error reading GTIDs from relaylog: %d", log.error);
4387   }
4388 
4389   if (fd_ev_p != &fd_ev)
4390   {
4391     delete fd_ev_p;
4392     fd_ev_p= &fd_ev;
4393   }
4394 
4395   mysql_file_close(file, MYF(MY_WME));
4396   end_io_cache(&log);
4397 
4398 #ifndef NDEBUG
4399   sql_print_information("%lu events read in relaylog file '%s' for updating "
4400                         "Retrieved_Gtid_Set and/or IO thread transaction "
4401                         "parser state.",
4402                         event_counter, filename);
4403 #endif
4404 
4405   DBUG_RETURN(error);
4406 }
4407 
4408 /**
4409   Reads GTIDs from the given binlog file.
4410 
4411   @param filename File to read from.
4412   @param all_gtids If not NULL, then the GTIDs from the
4413   Previous_gtids_log_event and from all Gtid_log_events are stored in
4414   this object.
4415   @param prev_gtids If not NULL, then the GTIDs from the
4416   Previous_gtids_log_events are stored in this object.
4417   @param first_gtid If not NULL, then the first GTID information from the
4418   file will be stored in this object.
4419   @param sid_map The sid_map object to use in the rpl_sidno generation
4420   of the Gtid_log_event. If lock is needed in the sid_map, the caller
4421   must hold it.
4422   @param verify_checksum Set to true to verify event checksums.
4423 
4424   @retval GOT_GTIDS The file was successfully read and it contains
4425   both Gtid_log_events and Previous_gtids_log_events.
4426   This is only possible if either all_gtids or first_gtid are not null.
4427   @retval GOT_PREVIOUS_GTIDS The file was successfully read and it
4428   contains Previous_gtids_log_events but no Gtid_log_events.
4429   For binary logs, if no all_gtids and no first_gtid are specified,
4430   this function will be done right after reading the PREVIOUS_GTIDS
4431   regardless of the rest of the content of the binary log file.
4432   @retval NO_GTIDS The file was successfully read and it does not
4433   contain GTID events.
4434   @retval ERROR Out of memory, or IO error, or malformed event
4435   structure, or the file is malformed (e.g., contains Gtid_log_events
4436   but no Previous_gtids_log_event).
4437   @retval TRUNCATED The file was truncated before the end of the
4438   first Previous_gtids_log_event.
4439 */
4440 enum enum_read_gtids_from_binlog_status
4441 { GOT_GTIDS, GOT_PREVIOUS_GTIDS, NO_GTIDS, ERROR, TRUNCATED };
4442 static enum_read_gtids_from_binlog_status
read_gtids_from_binlog(const char * filename,Gtid_set * all_gtids,Gtid_set * prev_gtids,Gtid * first_gtid,Sid_map * sid_map,bool verify_checksum,bool is_relay_log)4443 read_gtids_from_binlog(const char *filename, Gtid_set *all_gtids,
4444                        Gtid_set *prev_gtids, Gtid *first_gtid,
4445                        Sid_map* sid_map,
4446                        bool verify_checksum, bool is_relay_log)
4447 {
4448   DBUG_ENTER("read_gtids_from_binlog");
4449   DBUG_PRINT("info", ("Opening file %s", filename));
4450 
4451   /*
4452     Create a Format_description_log_event that is used to read the
4453     first event of the log.
4454   */
4455   Format_description_log_event fd_ev(BINLOG_VERSION), *fd_ev_p= &fd_ev;
4456   if (!fd_ev.is_valid())
4457     DBUG_RETURN(ERROR);
4458 
4459   File file;
4460   IO_CACHE log;
4461 
4462 #ifndef NDEBUG
4463   unsigned long event_counter= 0;
4464   /*
4465     We assert here that both all_gtids and prev_gtids, if specified,
4466     uses the same sid_map as the one passed as a parameter. This is just
4467     to ensure that, if the sid_map needed some lock and was locked by
4468     the caller, the lock applies to all the GTID sets this function is
4469     dealing with.
4470   */
4471   if (all_gtids)
4472     assert(all_gtids->get_sid_map() == sid_map);
4473   if (prev_gtids)
4474     assert(prev_gtids->get_sid_map() == sid_map);
4475 #endif
4476 
4477   const char *errmsg= NULL;
4478   if ((file= open_binlog_file(&log, filename, &errmsg)) < 0)
4479   {
4480     sql_print_error("%s", errmsg);
4481     /*
4482       We need to revisit the recovery procedure for relay log
4483       files. Currently, it is called after this routine.
4484       /Alfranio
4485     */
4486     DBUG_RETURN(TRUNCATED);
4487   }
4488 
4489   fd_ev_p->reset_crypto();
4490 
4491   /*
4492     Seek for Previous_gtids_log_event and Gtid_log_event events to
4493     gather information what has been processed so far.
4494   */
4495   my_b_seek(&log, BIN_LOG_HEADER_SIZE);
4496   Log_event *ev= NULL;
4497   enum_read_gtids_from_binlog_status ret= NO_GTIDS;
4498   bool done= false;
4499   bool seen_first_gtid= false;
4500   while (!done &&
4501          (ev= Log_event::read_log_event(&log, 0, fd_ev_p, verify_checksum)) !=
4502          NULL)
4503   {
4504 #ifndef NDEBUG
4505     event_counter++;
4506 #endif
4507     DBUG_PRINT("info", ("Read event of type %s", ev->get_type_str()));
4508     Format_description_log_event *new_fd_ev_p= NULL;
4509     switch (ev->get_type_code())
4510     {
4511     case binary_log::FORMAT_DESCRIPTION_EVENT:
4512       new_fd_ev_p= static_cast<Format_description_log_event*>(ev);
4513       new_fd_ev_p->copy_crypto_data(*fd_ev_p);
4514       if (fd_ev_p != &fd_ev)
4515         delete fd_ev_p;
4516       fd_ev_p= new_fd_ev_p;
4517       break;
4518     case binary_log::ROTATE_EVENT:
4519       // do nothing; just accept this event and go to next
4520       break;
4521     case binary_log::PREVIOUS_GTIDS_LOG_EVENT:
4522     {
4523       ret= GOT_PREVIOUS_GTIDS;
4524       // add events to sets
4525       Previous_gtids_log_event *prev_gtids_ev=
4526         (Previous_gtids_log_event *)ev;
4527       if (all_gtids != NULL && prev_gtids_ev->add_to_set(all_gtids) != 0)
4528         ret= ERROR, done= true;
4529       else if (prev_gtids != NULL && prev_gtids_ev->add_to_set(prev_gtids) != 0)
4530         ret= ERROR, done= true;
4531 #ifndef NDEBUG
4532       char* prev_buffer= prev_gtids_ev->get_str(NULL, NULL);
4533       DBUG_PRINT("info", ("Got Previous_gtids from file '%s': Gtid_set='%s'.",
4534                           filename, prev_buffer));
4535       my_free(prev_buffer);
4536 #endif
4537       /*
4538         If this is not a relay log, the previous_gtids were asked and no
4539         all_gtids neither first_gtid were asked, it is fine to consider the
4540         job as done.
4541       */
4542       if (!is_relay_log && prev_gtids != NULL &&
4543           all_gtids == NULL && first_gtid == NULL)
4544         done= true;
4545       DBUG_EXECUTE_IF("inject_fault_bug16502579", {
4546                       DBUG_PRINT("debug", ("PREVIOUS_GTIDS_LOG_EVENT found. "
4547                                            "Injected ret=NO_GTIDS."));
4548                       if (ret == GOT_PREVIOUS_GTIDS)
4549                       {
4550                         ret=NO_GTIDS;
4551                         done= false;
4552                       }
4553                       });
4554       break;
4555     }
4556     case binary_log::GTID_LOG_EVENT:
4557     {
4558       if (ret != GOT_GTIDS)
4559       {
4560         if (ret != GOT_PREVIOUS_GTIDS)
4561         {
4562           /*
4563             Since this routine is run on startup, there may not be a
4564             THD instance. Therefore, ER(X) cannot be used.
4565            */
4566           const char* msg_fmt= (current_thd != NULL) ?
4567                                ER(ER_BINLOG_LOGICAL_CORRUPTION) :
4568                                ER_DEFAULT(ER_BINLOG_LOGICAL_CORRUPTION);
4569           my_printf_error(ER_BINLOG_LOGICAL_CORRUPTION,
4570                           msg_fmt, MYF(0),
4571                           filename,
4572                           "The first global transaction identifier was read, but "
4573                           "no other information regarding identifiers existing "
4574                           "on the previous log files was found.");
4575           ret= ERROR, done= true;
4576           break;
4577         }
4578         else
4579           ret= GOT_GTIDS;
4580       }
4581       /*
4582         When this is a relaylog, we just check if the relay log contains at
4583         least one Gtid_log_event, so that we can distinguish the return values
4584         GOT_GTID and GOT_PREVIOUS_GTIDS. We don't need to read anything else
4585         from the relay log.
4586         When this is a binary log, if all_gtids is requested (i.e., NOT NULL),
4587         we should continue to read all gtids. If just first_gtid was requested,
4588         we will be done after storing this Gtid_log_event info on it.
4589       */
4590       if (is_relay_log)
4591       {
4592         ret= GOT_GTIDS, done= true;
4593       }
4594       else
4595       {
4596         Gtid_log_event *gtid_ev= (Gtid_log_event *)ev;
4597         rpl_sidno sidno= gtid_ev->get_sidno(sid_map);
4598         if (sidno < 0)
4599           ret= ERROR, done= true;
4600         else
4601         {
4602           if (all_gtids)
4603           {
4604             if (all_gtids->ensure_sidno(sidno) != RETURN_STATUS_OK)
4605               ret= ERROR, done= true;
4606             all_gtids->_add_gtid(sidno, gtid_ev->get_gno());
4607             DBUG_PRINT("info", ("Got Gtid from file '%s': Gtid(%d, %lld).",
4608                                 filename, sidno, gtid_ev->get_gno()));
4609           }
4610 
4611           /* If the first GTID was requested, stores it */
4612           if (first_gtid && !seen_first_gtid)
4613           {
4614             first_gtid->set(sidno, gtid_ev->get_gno());
4615             seen_first_gtid= true;
4616             /* If the first_gtid was the only thing requested, we are done */
4617             if (all_gtids == NULL)
4618               ret= GOT_GTIDS, done= true;
4619           }
4620         }
4621       }
4622       break;
4623     }
4624     case binary_log::START_ENCRYPTION_EVENT:
4625     {
4626       if (fd_ev_p->start_decryption(static_cast<Start_encryption_log_event*>(ev)))
4627         sql_print_warning("Error initializing decryption while reading GTIDs from binary log");
4628       // in case start_decryption fails next call to read_log_event will fail too
4629       // this failure will be handled outside the loop
4630       break;
4631     }
4632 
4633     case binary_log::ANONYMOUS_GTID_LOG_EVENT:
4634     {
4635       /*
4636         When this is a relaylog, we just check if it contains
4637         at least one Anonymous_gtid_log_event after initialization
4638         (FDs, Rotates and PREVIOUS_GTIDS), so that we can distinguish the
4639         return values GOT_GTID and GOT_PREVIOUS_GTIDS.
4640         We don't need to read anything else from the relay log.
4641       */
4642       if (is_relay_log)
4643       {
4644         ret= GOT_GTIDS;
4645         done= true;
4646         break;
4647       }
4648       assert(prev_gtids == NULL ? true : all_gtids != NULL ||
4649              first_gtid != NULL);
4650     }
4651     // Fall through.
4652     default:
4653       // if we found any other event type without finding a
4654       // previous_gtids_log_event, then the rest of this binlog
4655       // cannot contain gtids
4656       if (ret != GOT_GTIDS && ret != GOT_PREVIOUS_GTIDS)
4657         done= true;
4658       /*
4659         The GTIDs of the relaylog files will be handled later
4660         because of the possibility of transactions be spanned
4661         along distinct relaylog files.
4662         So, if we found an ordinary event without finding the
4663         GTID but we already found the PREVIOUS_GTIDS, this probably
4664         means that the event is from a transaction that started on
4665         previous relaylog file.
4666       */
4667       if (ret == GOT_PREVIOUS_GTIDS && is_relay_log)
4668         done= true;
4669       break;
4670     }
4671     if (ev != fd_ev_p)
4672       delete ev;
4673     DBUG_PRINT("info", ("done=%d", done));
4674   }
4675 
4676   if (log.error < 0)
4677   {
4678     // This is not a fatal error; the log may just be truncated.
4679 
4680     // @todo but what other errors could happen? IO error?
4681     sql_print_warning("Error reading GTIDs from binary log: %d", log.error);
4682   }
4683 
4684   if (fd_ev_p != &fd_ev)
4685   {
4686     delete fd_ev_p;
4687     fd_ev_p= &fd_ev;
4688   }
4689 
4690   mysql_file_close(file, MYF(MY_WME));
4691   end_io_cache(&log);
4692 
4693   if (all_gtids)
4694     all_gtids->dbug_print("all_gtids");
4695   else
4696     DBUG_PRINT("info", ("all_gtids==NULL"));
4697   if (prev_gtids)
4698     prev_gtids->dbug_print("prev_gtids");
4699   else
4700     DBUG_PRINT("info", ("prev_gtids==NULL"));
4701   if (first_gtid == NULL)
4702     DBUG_PRINT("info", ("first_gtid==NULL"));
4703   else if (first_gtid->sidno == 0)
4704     DBUG_PRINT("info", ("first_gtid.sidno==0"));
4705   else
4706     first_gtid->dbug_print(sid_map, "first_gtid");
4707 
4708   DBUG_PRINT("info", ("returning %d", ret));
4709 #ifndef NDEBUG
4710   if (!is_relay_log && prev_gtids != NULL &&
4711       all_gtids == NULL && first_gtid == NULL)
4712     sql_print_information("Read %lu events from binary log file '%s' to "
4713                           "determine the GTIDs purged from binary logs.",
4714                           event_counter, filename);
4715 #endif
4716   DBUG_RETURN(ret);
4717 }
4718 
find_first_log_not_in_gtid_set(char * binlog_file_name,const Gtid_set * gtid_set,Gtid * first_gtid,std::string & errmsg)4719 bool MYSQL_BIN_LOG::find_first_log_not_in_gtid_set(char *binlog_file_name,
4720                                                    const Gtid_set *gtid_set,
4721                                                    Gtid *first_gtid,
4722                                                    std::string &errmsg)
4723 {
4724   DBUG_ENTER("MYSQL_BIN_LOG::gtid_read_start_binlog");
4725   /*
4726     Gather the set of files to be accessed.
4727   */
4728   list<string> filename_list;
4729   LOG_INFO linfo;
4730   int error;
4731 
4732   list<string>::reverse_iterator rit;
4733   Gtid_set binlog_previous_gtid_set(gtid_set->get_sid_map());
4734 
4735   mysql_mutex_lock(&LOCK_index);
4736   for (error= find_log_pos(&linfo, NULL, false/*need_lock_index=false*/);
4737        !error; error= find_next_log(&linfo, false/*need_lock_index=false*/))
4738   {
4739     DBUG_PRINT("info", ("read log filename '%s'", linfo.log_file_name));
4740     filename_list.push_back(string(linfo.log_file_name));
4741   }
4742   mysql_mutex_unlock(&LOCK_index);
4743   if (error != LOG_INFO_EOF)
4744   {
4745     errmsg.assign(
4746         "Failed to read the binary log index file while "
4747         "looking for the oldest binary log that contains any GTID "
4748         "that is not in the given gtid set");
4749     error= -1;
4750     goto end;
4751   }
4752 
4753   if (filename_list.empty())
4754   {
4755     errmsg.assign(
4756         "Could not find first log file name in binary log index file "
4757         "while looking for the oldest binary log that contains any GTID "
4758         "that is not in the given gtid set");
4759     error= -2;
4760     goto end;
4761   }
4762 
4763   /*
4764     Iterate over all the binary logs in reverse order, and read only
4765     the Previous_gtids_log_event, to find the first one, that is the
4766     subset of the given gtid set. Since every binary log begins with
4767     a Previous_gtids_log_event, that contains all GTIDs in all
4768     previous binary logs.
4769     We also ask for the first GTID in the binary log to know if we
4770     should send the FD event with the "created" field cleared or not.
4771   */
4772   DBUG_PRINT("info", ("Iterating backwards through binary logs, and reading "
4773                       "only the Previous_gtids_log_event, to find the first "
4774                       "one, that is the subset of the given gtid set."));
4775   rit= filename_list.rbegin();
4776   error= 0;
4777   while (rit != filename_list.rend())
4778   {
4779     binlog_previous_gtid_set.clear();
4780     const char *filename= rit->c_str();
4781     DBUG_PRINT("info", ("Read Previous_gtids_log_event from filename='%s'",
4782                         filename));
4783     switch (read_gtids_from_binlog(filename, NULL, &binlog_previous_gtid_set,
4784                                    first_gtid,
4785                                    binlog_previous_gtid_set.get_sid_map(),
4786                                    opt_master_verify_checksum, is_relay_log))
4787     {
4788     case ERROR:
4789       errmsg.assign(
4790           "Error reading header of binary log while looking for "
4791           "the oldest binary log that contains any GTID that is not in "
4792           "the given gtid set");
4793       error= -3;
4794       goto end;
4795     case NO_GTIDS:
4796       errmsg.assign(
4797           "Found old binary log without GTIDs while looking for "
4798           "the oldest binary log that contains any GTID that is not in "
4799           "the given gtid set");
4800       error= -4;
4801       goto end;
4802     case GOT_GTIDS:
4803     case GOT_PREVIOUS_GTIDS:
4804       if (binlog_previous_gtid_set.is_subset(gtid_set))
4805       {
4806         strcpy(binlog_file_name, filename);
4807         /*
4808           Verify that the selected binlog is not the first binlog,
4809         */
4810         DBUG_EXECUTE_IF("slave_reconnect_with_gtid_set_executed",
4811                         assert(strcmp(filename_list.begin()->c_str(),
4812                                       binlog_file_name) != 0););
4813         goto end;
4814       }
4815     case TRUNCATED:
4816       break;
4817     }
4818 
4819     rit++;
4820   }
4821 
4822   if (rit == filename_list.rend())
4823   {
4824     report_missing_gtids(&binlog_previous_gtid_set, gtid_set, errmsg);
4825     error= -5;
4826   }
4827 
4828 end:
4829   if (error)
4830     DBUG_PRINT("error", ("'%s'", errmsg.c_str()));
4831   filename_list.clear();
4832   DBUG_PRINT("info", ("returning %d", error));
4833   DBUG_RETURN(error != 0 ? true : false);
4834 }
4835 
init_gtid_sets(Gtid_set * all_gtids,Gtid_set * lost_gtids,bool verify_checksum,bool need_lock,Transaction_boundary_parser * trx_parser,Gtid * gtid_partial_trx,bool is_server_starting)4836 bool MYSQL_BIN_LOG::init_gtid_sets(Gtid_set *all_gtids, Gtid_set *lost_gtids,
4837                                    bool verify_checksum, bool need_lock,
4838                                    Transaction_boundary_parser *trx_parser,
4839                                    Gtid *gtid_partial_trx,
4840                                    bool is_server_starting)
4841 {
4842   DBUG_ENTER("MYSQL_BIN_LOG::init_gtid_sets");
4843   DBUG_PRINT("info", ("lost_gtids=%p; so we are recovering a %s log; is_relay_log=%d",
4844                       lost_gtids, lost_gtids == NULL ? "relay" : "binary",
4845                       is_relay_log));
4846 
4847   /*
4848     If this is a relay log, we must have the IO thread Master_info trx_parser
4849     in order to correctly feed it with relay log events.
4850   */
4851 #ifndef NDEBUG
4852   if (is_relay_log)
4853   {
4854     assert(trx_parser != NULL);
4855     assert(lost_gtids == NULL);
4856   }
4857 #endif
4858 
4859   /*
4860     Acquires the necessary locks to ensure that logs are not either
4861     removed or updated when we are reading from it.
4862   */
4863   if (need_lock)
4864   {
4865     // We don't need LOCK_log if we are only going to read the initial
4866     // Prevoius_gtids_log_event and ignore the Gtid_log_events.
4867     if (all_gtids != NULL)
4868       mysql_mutex_lock(&LOCK_log);
4869     mysql_mutex_lock(&LOCK_index);
4870     global_sid_lock->wrlock();
4871   }
4872   else
4873   {
4874     if (all_gtids != NULL)
4875       mysql_mutex_assert_owner(&LOCK_log);
4876     mysql_mutex_assert_owner(&LOCK_index);
4877     global_sid_lock->assert_some_wrlock();
4878   }
4879 
4880   // Gather the set of files to be accessed.
4881   list<string> filename_list;
4882   LOG_INFO linfo;
4883   int error;
4884 
4885   list<string>::iterator it;
4886   list<string>::reverse_iterator rit;
4887   bool reached_first_file= false;
4888 
4889   /* Initialize the sid_map to be used in read_gtids_from_binlog */
4890   Sid_map *sid_map= NULL;
4891   if (all_gtids)
4892     sid_map= all_gtids->get_sid_map();
4893   else if (lost_gtids)
4894     sid_map= lost_gtids->get_sid_map();
4895 
4896   for (error= find_log_pos(&linfo, NULL, false/*need_lock_index=false*/); !error;
4897        error= find_next_log(&linfo, false/*need_lock_index=false*/))
4898   {
4899     DBUG_PRINT("info", ("read log filename '%s'", linfo.log_file_name));
4900     filename_list.push_back(string(linfo.log_file_name));
4901   }
4902   if (error != LOG_INFO_EOF)
4903   {
4904     DBUG_PRINT("error", ("Error reading %s index",
4905                          is_relay_log ? "relaylog" : "binlog"));
4906     goto end;
4907   }
4908   /*
4909     On server starting, one new empty binlog file is created and
4910     its file name is put into index file before initializing
4911     GLOBAL.GTID_EXECUTED AND GLOBAL.GTID_PURGED, it is not the
4912     last binlog file before the server restarts, so we remove
4913     its file name from filename_list.
4914   */
4915   if (is_server_starting && !is_relay_log && !filename_list.empty())
4916     filename_list.pop_back();
4917 
4918   error= 0;
4919 
4920   if (all_gtids != NULL)
4921   {
4922     DBUG_PRINT("info", ("Iterating backwards through %s logs, "
4923                         "looking for the last %s log that contains "
4924                         "a Previous_gtids_log_event.",
4925                         is_relay_log ? "relay" : "binary",
4926                         is_relay_log ? "relay" : "binary"));
4927     // Iterate over all files in reverse order until we find one that
4928     // contains a Previous_gtids_log_event.
4929     rit= filename_list.rbegin();
4930     bool can_stop_reading= false;
4931     reached_first_file= (rit == filename_list.rend());
4932     DBUG_PRINT("info", ("filename='%s' reached_first_file=%d",
4933                         reached_first_file ? "" : rit->c_str(),
4934                         reached_first_file));
4935     while (!can_stop_reading && !reached_first_file)
4936     {
4937       const char *filename= rit->c_str();
4938       assert(rit != filename_list.rend());
4939       rit++;
4940       reached_first_file= (rit == filename_list.rend());
4941       DBUG_PRINT("info", ("filename='%s' can_stop_reading=%d "
4942                           "reached_first_file=%d, ",
4943                           filename, can_stop_reading, reached_first_file));
4944       switch (read_gtids_from_binlog(filename, all_gtids,
4945                                      reached_first_file ? lost_gtids : NULL,
4946                                      NULL/* first_gtid */,
4947                                      sid_map, verify_checksum, is_relay_log))
4948       {
4949         case ERROR:
4950         {
4951           error= 1;
4952           goto end;
4953         }
4954         case GOT_GTIDS:
4955         {
4956           can_stop_reading= true;
4957           break;
4958         }
4959         case GOT_PREVIOUS_GTIDS:
4960         {
4961           /*
4962             If this is a binlog file, it is enough to have GOT_PREVIOUS_GTIDS.
4963             If this is a relaylog file, we need to find at least one GTID to
4964             start parsing the relay log to add GTID of transactions that might
4965             have spanned in distinct relaylog files.
4966           */
4967           if (!is_relay_log)
4968             can_stop_reading= true;
4969           break;
4970         }
4971         case NO_GTIDS:
4972         {
4973           /*
4974             Mysql server iterates backwards through binary logs, looking for
4975             the last binary log that contains a Previous_gtids_log_event for
4976             gathering the set of gtid_executed on server start. This may take
4977             very long time if it has many binary logs and almost all of them
4978             are out of filesystem cache. So if the binlog_gtid_simple_recovery
4979             is enabled, and the last binary log does not contain any GTID
4980             event, do not read any more binary logs, GLOBAL.GTID_EXECUTED and
4981             GLOBAL.GTID_PURGED should be empty in the case.
4982           */
4983           if (binlog_gtid_simple_recovery && is_server_starting &&
4984               !is_relay_log)
4985           {
4986             assert(all_gtids->is_empty());
4987             assert(lost_gtids->is_empty());
4988             goto end;
4989           }
4990           /*FALLTHROUGH*/
4991         }
4992         case TRUNCATED:
4993         {
4994           break;
4995         }
4996       }
4997     }
4998 
4999     /*
5000       If we use GTIDs and have partial transactions on the relay log,
5001       must check if it ends on next relay log files.
5002       We also need to feed the boundary parser with the rest of the
5003       relay log to put it in the correct state before receiving new
5004       events from the master in the case of GTID auto positioning be
5005       disabled.
5006     */
5007     if (is_relay_log && filename_list.size() > 0)
5008     {
5009       /*
5010         Suppose the following relaylog:
5011 
5012          rl-bin.000001 | rl-bin.000002 | rl-bin.000003 | rl-bin-000004
5013         ---------------+---------------+---------------+---------------
5014          PREV_GTIDS    | PREV_GTIDS    | PREV_GTIDS    | PREV_GTIDS
5015          (empty)       | (UUID:1)      | (UUID:1)      | (UUID:1)
5016         ---------------+---------------+---------------+---------------
5017          GTID(UUID:1)  | QUERY(INSERT) | QUERY(INSERT) | XID
5018         ---------------+---------------+---------------+---------------
5019          QUERY(CREATE  |
5020          TABLE t1 ...) |
5021         ---------------+
5022          GTID(UUID:2)  |
5023         ---------------+
5024          QUERY(BEGIN)  |
5025         ---------------+
5026 
5027         As it is impossible to determine the current Retrieved_Gtid_Set by only
5028         looking to the PREVIOUS_GTIDS on the last relay log file, and scanning
5029         events on it, we tried to find a relay log file that contains at least
5030         one GTID event during the backwards search.
5031 
5032         In the example, we will find a GTID only in rl-bin.000001, as the
5033         UUID:2 transaction was spanned across 4 relay log files.
5034 
5035         The transaction spanning can be caused by "FLUSH RELAY LOGS" commands
5036         on slave while it is queuing the transaction.
5037 
5038         So, in order to correctly add UUID:2 into Retrieved_Gtid_Set, we need
5039         to parse the relay log starting on the file we found the last GTID
5040         queued to know if the transaction was fully retrieved or not.
5041       */
5042 
5043       /*
5044         Adjust the reverse iterator to point to the relaylog file we
5045         need to start parsing, as it was incremented after generating
5046         the relay log file name.
5047       */
5048       assert(rit != filename_list.rbegin());
5049       rit--;
5050       assert(rit != filename_list.rend());
5051       /* Reset the transaction parser before feeding it with events */
5052       trx_parser->reset();
5053       gtid_partial_trx->clear();
5054 
5055       DBUG_PRINT("info", ("Iterating forwards through relay logs, "
5056                           "updating the Retrieved_Gtid_Set and updating "
5057                           "IO thread trx parser before start."));
5058       for (it= find(filename_list.begin(), filename_list.end(), *rit);
5059            it != filename_list.end(); it++)
5060       {
5061         const char *filename= it->c_str();
5062         DBUG_PRINT("info", ("filename='%s'", filename));
5063         if (read_gtids_and_update_trx_parser_from_relaylog(filename, all_gtids,
5064                                                            true, trx_parser,
5065                                                            gtid_partial_trx))
5066         {
5067           error= 1;
5068           goto end;
5069         }
5070       }
5071     }
5072   }
5073   if (lost_gtids != NULL && !reached_first_file)
5074   {
5075     /*
5076       This branch is only reacheable by a binary log. The relay log
5077       don't need to get lost_gtids information.
5078 
5079       A 5.6 server sets GTID_PURGED by rotating the binary log.
5080 
5081       A 5.6 server that had recently enabled GTIDs and set GTID_PURGED
5082       would have a sequence of binary logs like:
5083 
5084       master-bin.N  : No PREVIOUS_GTIDS (GTID wasn't enabled)
5085       master-bin.N+1: Has an empty PREVIOUS_GTIDS and a ROTATE
5086                       (GTID was enabled on startup)
5087       master-bin.N+2: Has a PREVIOUS_GTIDS with the content set by a
5088                       SET @@GLOBAL.GTID_PURGED + has GTIDs of some
5089                       transactions.
5090 
5091       If this 5.6 server be upgraded to 5.7 keeping its binary log files,
5092       this routine will have to find the first binary log that contains a
5093       PREVIOUS_GTIDS + a GTID event to ensure that the content of the
5094       GTID_PURGED will be correctly set (assuming binlog_gtid_simple_recovery
5095       is not enabled).
5096     */
5097     DBUG_PRINT("info", ("Iterating forwards through binary logs, looking for "
5098                         "the first binary log that contains both a "
5099                         "Previous_gtids_log_event and a Gtid_log_event."));
5100     assert(!is_relay_log);
5101     for (it= filename_list.begin(); it != filename_list.end(); it++)
5102     {
5103       /*
5104         We should pass a first_gtid to read_gtids_from_binlog when
5105         binlog_gtid_simple_recovery is disabled, or else it will return
5106         right after reading the PREVIOUS_GTIDS event to avoid stall on
5107         reading the whole binary log.
5108       */
5109       Gtid first_gtid= {0, 0};
5110       const char *filename= it->c_str();
5111       DBUG_PRINT("info", ("filename='%s'", filename));
5112       switch (read_gtids_from_binlog(filename, NULL, lost_gtids,
5113                                      binlog_gtid_simple_recovery ? NULL :
5114                                                                    &first_gtid,
5115                                      sid_map, verify_checksum, is_relay_log))
5116       {
5117         case ERROR:
5118         {
5119           error= 1;
5120           /*FALLTHROUGH*/
5121         }
5122         case GOT_GTIDS:
5123         {
5124           goto end;
5125         }
5126         case NO_GTIDS:
5127         case GOT_PREVIOUS_GTIDS:
5128         {
5129           /*
5130             Mysql server iterates forwards through binary logs, looking for
5131             the first binary log that contains both Previous_gtids_log_event
5132             and gtid_log_event for gathering the set of gtid_purged on server
5133             start. It also iterates forwards through binary logs, looking for
5134             the first binary log that contains both Previous_gtids_log_event
5135             and gtid_log_event for gathering the set of gtid_purged when
5136             purging binary logs. This may take very long time if it has many
5137             binary logs and almost all of them are out of filesystem cache.
5138             So if the binlog_gtid_simple_recovery is enabled, we just
5139             initialize GLOBAL.GTID_PURGED from the first binary log, do not
5140             read any more binary logs.
5141           */
5142           if (binlog_gtid_simple_recovery)
5143             goto end;
5144           /*FALLTHROUGH*/
5145         }
5146         case TRUNCATED:
5147         {
5148           break;
5149         }
5150       }
5151     }
5152   }
5153 end:
5154   if (all_gtids)
5155     all_gtids->dbug_print("all_gtids");
5156   if (lost_gtids)
5157     lost_gtids->dbug_print("lost_gtids");
5158   if (need_lock)
5159   {
5160     global_sid_lock->unlock();
5161     mysql_mutex_unlock(&LOCK_index);
5162     if (all_gtids != NULL)
5163       mysql_mutex_unlock(&LOCK_log);
5164   }
5165   filename_list.clear();
5166   DBUG_PRINT("info", ("returning %d", error));
5167   DBUG_RETURN(error != 0 ? true : false);
5168 }
5169 
5170 
5171 /**
5172   Open a (new) binlog file.
5173 
5174   - Open the log file and the index file. Register the new
5175   file name in it
5176   - When calling this when the file is in use, you must have a locks
5177   on LOCK_log and LOCK_index.
5178 
5179   @retval
5180     0	ok
5181   @retval
5182     1	error
5183 */
5184 
open_binlog(const char * log_name,const char * new_name,ulong max_size_arg,bool null_created_arg,bool need_lock_index,bool need_sid_lock,Format_description_log_event * extra_description_event)5185 bool MYSQL_BIN_LOG::open_binlog(const char *log_name,
5186                                 const char *new_name,
5187                                 ulong max_size_arg,
5188                                 bool null_created_arg,
5189                                 bool need_lock_index,
5190                                 bool need_sid_lock,
5191                                 Format_description_log_event *extra_description_event)
5192 {
5193   // lock_index must be acquired *before* sid_lock.
5194   assert(need_sid_lock || !need_lock_index);
5195   DBUG_ENTER("MYSQL_BIN_LOG::open_binlog(const char *, ...)");
5196   DBUG_PRINT("enter",("base filename: %s", log_name));
5197   const char *log_to_encrypt= is_relay_log ? "relay_log" : "binlog";
5198 
5199   mysql_mutex_assert_owner(get_log_lock());
5200 
5201   if (init_and_set_log_file_name(log_name, new_name))
5202   {
5203     sql_print_error("MYSQL_BIN_LOG::open failed to generate new file name.");
5204     DBUG_RETURN(1);
5205   }
5206 
5207   DBUG_PRINT("info", ("generated filename: %s", log_file_name));
5208 
5209 #ifdef HAVE_REPLICATION
5210   if (open_purge_index_file(TRUE) ||
5211       register_create_index_entry(log_file_name) ||
5212       sync_purge_index_file() ||
5213       DBUG_EVALUATE_IF("fault_injection_registering_index", 1, 0))
5214   {
5215     /**
5216       @todo: although this was introduced to appease valgrind
5217       when injecting emulated faults using fault_injection_registering_index
5218       it may be good to consider what actually happens when
5219       open_purge_index_file succeeds but register or sync fails.
5220 
5221       Perhaps we might need the code below in MYSQL_BIN_LOG::cleanup
5222       for "real life" purposes as well?
5223     */
5224     DBUG_EXECUTE_IF("fault_injection_registering_index", {
5225       if (my_b_inited(&purge_index_file))
5226       {
5227         end_io_cache(&purge_index_file);
5228         my_close(purge_index_file.file, MYF(0));
5229       }
5230     });
5231 
5232     sql_print_error("MYSQL_BIN_LOG::open failed to sync the index file.");
5233     DBUG_RETURN(1);
5234   }
5235   DBUG_EXECUTE_IF("crash_create_non_critical_before_update_index", DBUG_SUICIDE(););
5236 #endif
5237 
5238   write_error= 0;
5239 
5240   /* open the main log file */
5241   if (open(
5242 #ifdef HAVE_PSI_INTERFACE
5243                       m_key_file_log,
5244 #endif
5245                       log_name, new_name))
5246   {
5247 #ifdef HAVE_REPLICATION
5248     close_purge_index_file();
5249 #endif
5250     DBUG_RETURN(1);                            /* all warnings issued */
5251   }
5252 
5253   max_size= max_size_arg;
5254 
5255   open_count++;
5256 
5257   bool write_file_name_to_index_file=0;
5258 
5259   /* This must be before goto err. */
5260 #ifndef NDEBUG
5261   binary_log_debug::debug_pretend_version_50034_in_binlog=
5262     DBUG_EVALUATE_IF("pretend_version_50034_in_binlog", true, false);
5263 #endif
5264   Format_description_log_event s(BINLOG_VERSION);
5265 
5266   if (!my_b_filelength(&log_file))
5267   {
5268     /*
5269       The binary log file was empty (probably newly created)
5270       This is the normal case and happens when the user doesn't specify
5271       an extension for the binary log files.
5272       In this case we write a standard header to it.
5273     */
5274     if (my_b_safe_write(&log_file, (uchar*) BINLOG_MAGIC,
5275                         BIN_LOG_HEADER_SIZE))
5276       goto err;
5277     bytes_written+= BIN_LOG_HEADER_SIZE;
5278     write_file_name_to_index_file= 1;
5279   }
5280 
5281   /*
5282     don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
5283     as we won't be able to reset it later
5284   */
5285   if (io_cache_type == WRITE_CACHE)
5286   {
5287     s.common_header->flags|= LOG_EVENT_BINLOG_IN_USE_F;
5288   }
5289 
5290   if (is_relay_log)
5291   {
5292     /* relay-log */
5293     if (relay_log_checksum_alg == binary_log::BINLOG_CHECKSUM_ALG_UNDEF)
5294     {
5295       /* inherit master's A descriptor if one has been received */
5296       if (opt_slave_sql_verify_checksum == 0)
5297         /* otherwise use slave's local preference of RL events verification */
5298         relay_log_checksum_alg= binary_log::BINLOG_CHECKSUM_ALG_OFF;
5299       else
5300         relay_log_checksum_alg= static_cast<enum_binlog_checksum_alg>
5301                                 (binlog_checksum_options);
5302     }
5303     s.common_footer->checksum_alg= relay_log_checksum_alg;
5304   }
5305   else
5306     /* binlog */
5307     s.common_footer->checksum_alg= static_cast<enum_binlog_checksum_alg>
5308                                      (binlog_checksum_options);
5309 
5310   crypto.disable();
5311   assert((s.common_footer)->checksum_alg !=
5312          binary_log::BINLOG_CHECKSUM_ALG_UNDEF);
5313   if (!s.is_valid())
5314     goto err;
5315   s.dont_set_created= null_created_arg;
5316   /* Set LOG_EVENT_RELAY_LOG_F flag for relay log's FD */
5317   if (is_relay_log)
5318     s.set_relay_log_event();
5319   if (s.write(&log_file))
5320     goto err;
5321   bytes_written+= s.common_header->data_written;
5322 
5323   if (encrypt_binlog)
5324   {
5325     if (crypto.load_latest_binlog_key())
5326     {
5327       sql_print_error("Failed to fetch or create percona_binlog key from/in keyring and thus "
5328                       "failed to initialize %s encryption. Have you enabled "
5329                       "keyring plugin?", log_to_encrypt);
5330       goto err;
5331     }
5332     DBUG_EXECUTE_IF("check_consecutive_binlog_key_versions",
5333                     { static uint next_key_version = 1;
5334                       assert(crypto.get_key_version() == next_key_version++);});
5335 
5336     uchar nonce[Binlog_crypt_data::BINLOG_NONCE_LENGTH];
5337     memset(nonce, 0, Binlog_crypt_data::BINLOG_NONCE_LENGTH);
5338     if (my_rand_buffer(nonce, sizeof(nonce)))
5339       goto err;
5340 
5341     Start_encryption_log_event sele(1, crypto.get_key_version(), nonce);
5342     sele.common_footer->checksum_alg= s.common_footer->checksum_alg;
5343     if (write_to_file(&sele))
5344     {
5345       sql_print_error("Failed to write Start_encryption event to binary log and thus "
5346                       "failed to initialize %s encryption.", log_to_encrypt);
5347       goto err;
5348     }
5349     bytes_written+= sele.common_header->data_written;
5350 
5351     if (crypto.init_with_loaded_key(sele.crypto_scheme, nonce))
5352     {
5353       sql_print_error("Failed to initialize %s encryption.", log_to_encrypt);
5354       goto err;
5355     }
5356   }
5357 
5358   /*
5359     We need to revisit this code and improve it.
5360     See further comments in the mysqld.
5361     /Alfranio
5362   */
5363   if (current_thd)
5364   {
5365     Gtid_set logged_gtids_binlog(global_sid_map, global_sid_lock);
5366     Gtid_set* previous_logged_gtids;
5367 
5368     if (is_relay_log)
5369       previous_logged_gtids= previous_gtid_set_relaylog;
5370     else
5371       previous_logged_gtids= &logged_gtids_binlog;
5372 
5373     if (need_sid_lock)
5374       global_sid_lock->wrlock();
5375     else
5376       global_sid_lock->assert_some_wrlock();
5377 
5378     if (!is_relay_log)
5379     {
5380       const Gtid_set *executed_gtids= gtid_state->get_executed_gtids();
5381       const Gtid_set *gtids_only_in_table=
5382         gtid_state->get_gtids_only_in_table();
5383       /* logged_gtids_binlog= executed_gtids - gtids_only_in_table */
5384       if (logged_gtids_binlog.add_gtid_set(executed_gtids) !=
5385           RETURN_STATUS_OK)
5386       {
5387         if (need_sid_lock)
5388           global_sid_lock->unlock();
5389         goto err;
5390       }
5391       logged_gtids_binlog.remove_gtid_set(gtids_only_in_table);
5392     }
5393     DBUG_PRINT("info",("Generating PREVIOUS_GTIDS for %s file.",
5394                        is_relay_log ? "relaylog" : "binlog"));
5395     Previous_gtids_log_event prev_gtids_ev(previous_logged_gtids);
5396     if (is_relay_log)
5397       prev_gtids_ev.set_relay_log_event();
5398     if (need_sid_lock)
5399       global_sid_lock->unlock();
5400     prev_gtids_ev.common_footer->checksum_alg=
5401                                    (s.common_footer)->checksum_alg;
5402     if (write_to_file(&prev_gtids_ev))
5403       goto err;
5404     bytes_written+= prev_gtids_ev.common_header->data_written;
5405   }
5406   else // !(current_thd)
5407   {
5408     /*
5409       If the slave was configured before server restart, the server will
5410       generate a new relay log file without having current_thd, but this
5411       new relay log file must have a PREVIOUS_GTIDS event as we now
5412       generate the PREVIOUS_GTIDS event always.
5413 
5414       This is only needed for relay log files because the server will add
5415       the PREVIOUS_GTIDS of binary logs (when current_thd==NULL) after
5416       server's GTID initialization.
5417 
5418       During server's startup at mysqld_main(), from the binary/relay log
5419       initialization point of view, it will:
5420       1) Call init_server_components() that will generate a new binary log
5421          file but won't write the PREVIOUS_GTIDS event yet;
5422       2) Initialize server's GTIDs;
5423       3) Write the binary log PREVIOUS_GTIDS;
5424       4) Call init_slave() in where the new relay log file will be created
5425          after initializing relay log's Retrieved_Gtid_Set;
5426     */
5427     if (is_relay_log)
5428     {
5429       if (need_sid_lock)
5430         global_sid_lock->wrlock();
5431       else
5432         global_sid_lock->assert_some_wrlock();
5433 
5434       DBUG_PRINT("info",("Generating PREVIOUS_GTIDS for relaylog file."));
5435       Previous_gtids_log_event prev_gtids_ev(previous_gtid_set_relaylog);
5436       prev_gtids_ev.set_relay_log_event();
5437 
5438       if (need_sid_lock)
5439         global_sid_lock->unlock();
5440 
5441       prev_gtids_ev.common_footer->checksum_alg=
5442                                    (s.common_footer)->checksum_alg;
5443       if (write_to_file(&prev_gtids_ev))
5444         goto err;
5445       bytes_written+= prev_gtids_ev.common_header->data_written;
5446     }
5447   }
5448   if (extra_description_event &&
5449       extra_description_event->binlog_version>=4)
5450   {
5451     /*
5452       This is a relay log written to by the I/O slave thread.
5453       Write the event so that others can later know the format of this relay
5454       log.
5455       Note that this event is very close to the original event from the
5456       master (it has binlog version of the master, event types of the
5457       master), so this is suitable to parse the next relay log's event. It
5458       has been produced by
5459       Format_description_log_event::Format_description_log_event(char* buf,).
5460       Why don't we want to write the mi_description_event if this
5461       event is for format<4 (3.23 or 4.x): this is because in that case, the
5462       mi_description_event describes the data received from the
5463       master, but not the data written to the relay log (*conversion*),
5464       which is in format 4 (slave's).
5465     */
5466     /*
5467       Set 'created' to 0, so that in next relay logs this event does not
5468       trigger cleaning actions on the slave in
5469       Format_description_log_event::apply_event_impl().
5470     */
5471     extra_description_event->created= 0;
5472     /* Don't set log_pos in event header */
5473     extra_description_event->set_artificial_event();
5474 
5475     if (write_to_file(extra_description_event))
5476       goto err;
5477     bytes_written+= extra_description_event->common_header->data_written;
5478   }
5479   if (flush_io_cache(&log_file) ||
5480       mysql_file_sync(log_file.file, MYF(MY_WME)))
5481     goto err;
5482 
5483   if (write_file_name_to_index_file)
5484   {
5485 #ifdef HAVE_REPLICATION
5486     DBUG_EXECUTE_IF("crash_create_critical_before_update_index", DBUG_SUICIDE(););
5487 #endif
5488 
5489     assert(my_b_inited(&index_file) != 0);
5490 
5491     /*
5492       The new log file name is appended into crash safe index file after
5493       all the content of index file is copyed into the crash safe index
5494       file. Then move the crash safe index file to index file.
5495     */
5496     DBUG_EXECUTE_IF("simulate_disk_full_on_open_binlog",
5497                     {DBUG_SET("+d,simulate_no_free_space_error");});
5498     if (DBUG_EVALUATE_IF("fault_injection_updating_index", 1, 0) ||
5499         add_log_to_index((uchar*) log_file_name, strlen(log_file_name),
5500                          need_lock_index))
5501     {
5502       DBUG_EXECUTE_IF("simulate_disk_full_on_open_binlog",
5503                       {
5504                         DBUG_SET("-d,simulate_file_write_error");
5505                         DBUG_SET("-d,simulate_no_free_space_error");
5506                         DBUG_SET("-d,simulate_disk_full_on_open_binlog");
5507                       });
5508       goto err;
5509     }
5510 
5511 #ifdef HAVE_REPLICATION
5512     DBUG_EXECUTE_IF("crash_create_after_update_index", DBUG_SUICIDE(););
5513 #endif
5514   }
5515 
5516   log_state.atomic_set(LOG_OPENED);
5517   /*
5518     At every rotate memorize the last transaction counter state to use it as
5519     offset at logging the transaction logical timestamps.
5520   */
5521   mysql_mutex_lock(&LOCK_slave_trans_dep_tracker);
5522   m_dependency_tracker.rotate();
5523   mysql_mutex_unlock(&LOCK_slave_trans_dep_tracker);
5524 
5525 #ifdef HAVE_REPLICATION
5526   close_purge_index_file();
5527 #endif
5528 
5529   update_binlog_end_pos();
5530   DBUG_RETURN(0);
5531 
5532 err:
5533 #ifdef HAVE_REPLICATION
5534   if (is_inited_purge_index_file())
5535     purge_index_entry(NULL, NULL, need_lock_index);
5536   close_purge_index_file();
5537 #endif
5538   if (binlog_error_action == ABORT_SERVER)
5539   {
5540     std::string err_msg= "Either disk is full or file system is read only ";
5541     if (encrypt_binlog)
5542       err_msg+= "or encryption failed ";
5543     err_msg+= "while opening the ";
5544     err_msg+= log_to_encrypt;
5545     err_msg+= ". Aborting the server.";
5546 
5547     exec_binlog_error_action_abort(err_msg.c_str());
5548   }
5549   else
5550   {
5551     sql_print_error("Could not use %s for logging (error %d). "
5552                     "Turning logging off for the whole duration of the MySQL "
5553                     "server process. To turn it on again: fix the cause, "
5554                     "shutdown the MySQL server and restart it.",
5555                     (new_name) ? new_name : name, errno);
5556     close(LOG_CLOSE_INDEX, false, need_lock_index);
5557   }
5558   DBUG_RETURN(1);
5559 }
5560 
5561 
5562 /**
5563   Move crash safe index file to index file.
5564 
5565   @param need_lock_index If true, LOCK_index will be acquired;
5566   otherwise it should already be held.
5567 
5568   @retval 0 ok
5569   @retval -1 error
5570 */
move_crash_safe_index_file_to_index_file(bool need_lock_index)5571 int MYSQL_BIN_LOG::move_crash_safe_index_file_to_index_file(bool need_lock_index)
5572 {
5573   int error= 0;
5574   File fd= -1;
5575   DBUG_ENTER("MYSQL_BIN_LOG::move_crash_safe_index_file_to_index_file");
5576   int failure_trials= MYSQL_BIN_LOG::MAX_RETRIES_FOR_DELETE_RENAME_FAILURE;
5577   bool file_rename_status= false, file_delete_status= false;
5578   THD *thd= current_thd;
5579 
5580   if (need_lock_index)
5581     mysql_mutex_lock(&LOCK_index);
5582   else
5583     mysql_mutex_assert_owner(&LOCK_index);
5584 
5585   if (my_b_inited(&index_file))
5586   {
5587     end_io_cache(&index_file);
5588     if (mysql_file_close(index_file.file, MYF(0)) < 0)
5589     {
5590       error= -1;
5591       sql_print_error("While rebuilding index file %s: "
5592                       "Failed to close the index file.", index_file_name);
5593       /*
5594         Delete Crash safe index file here and recover the binlog.index
5595         state(index_file io_cache) from old binlog.index content.
5596        */
5597       mysql_file_delete(key_file_binlog_index, crash_safe_index_file_name,
5598                         MYF(0));
5599 
5600       goto recoverable_err;
5601     }
5602 
5603     /*
5604       Sometimes an outsider can lock index files for temporary viewing
5605       purpose. For eg: MEB locks binlog.index/relaylog.index to view
5606       the content of the file. During that small period of time, deletion
5607       of the file is not possible on some platforms(Eg: Windows)
5608       Server should retry the delete operation for few times instead of panicking
5609       immediately.
5610     */
5611     while ((file_delete_status == false) && (failure_trials > 0))
5612     {
5613       if (DBUG_EVALUATE_IF("force_index_file_delete_failure", 1, 0)) break;
5614 
5615       DBUG_EXECUTE_IF("simulate_index_file_delete_failure",
5616                   {
5617                     /* This simulation causes the delete to fail */
5618                     static char first_char= index_file_name[0];
5619                     index_file_name[0]= 0;
5620                     sql_print_information("Retrying delete");
5621                     if (failure_trials == 1)
5622                       index_file_name[0]= first_char;
5623                   };);
5624       file_delete_status = !(mysql_file_delete(key_file_binlog_index,
5625                                                index_file_name, MYF(MY_WME)));
5626       --failure_trials;
5627       if (!file_delete_status)
5628       {
5629         my_sleep(1000);
5630         /* Clear the error before retrying. */
5631         if (failure_trials > 0)
5632           thd->clear_error();
5633       }
5634     }
5635 
5636     if (!file_delete_status)
5637     {
5638       error= -1;
5639       sql_print_error("While rebuilding index file %s: "
5640                       "Failed to delete the existing index file. It could be "
5641                       "that file is being used by some other process.",
5642                       index_file_name);
5643       /*
5644         Delete Crash safe file index file here and recover the binlog.index
5645         state(index_file io_cache) from old binlog.index content.
5646        */
5647       mysql_file_delete(key_file_binlog_index, crash_safe_index_file_name,
5648                         MYF(0));
5649 
5650       goto recoverable_err;
5651     }
5652   }
5653 
5654   DBUG_EXECUTE_IF("crash_create_before_rename_index_file", DBUG_SUICIDE(););
5655   /*
5656     Sometimes an outsider can lock index files for temporary viewing
5657     purpose. For eg: MEB locks binlog.index/relaylog.index to view
5658     the content of the file. During that small period of time, rename
5659     of the file is not possible on some platforms(Eg: Windows)
5660     Server should retry the rename operation for few times instead of panicking
5661     immediately.
5662   */
5663   failure_trials = MYSQL_BIN_LOG::MAX_RETRIES_FOR_DELETE_RENAME_FAILURE;
5664   while ((file_rename_status == false) && (failure_trials > 0))
5665   {
5666     DBUG_EXECUTE_IF("simulate_crash_safe_index_file_rename_failure",
5667                 {
5668                   /* This simulation causes the rename to fail */
5669                   static char first_char= index_file_name[0];
5670                   index_file_name[0]= 0;
5671                   sql_print_information("Retrying rename");
5672                   if (failure_trials == 1)
5673                     index_file_name[0]= first_char;
5674                 };);
5675     file_rename_status =
5676         !(my_rename(crash_safe_index_file_name, index_file_name, MYF(MY_WME)));
5677     --failure_trials;
5678     if (!file_rename_status)
5679     {
5680       my_sleep(1000);
5681       /* Clear the error before retrying. */
5682       if (failure_trials > 0)
5683         thd->clear_error();
5684     }
5685   }
5686   if (!file_rename_status)
5687   {
5688     error= -1;
5689     sql_print_error("While rebuilding index file %s: "
5690                     "Failed to rename the new index file to the existing "
5691                     "index file.", index_file_name);
5692     goto fatal_err;
5693   }
5694   DBUG_EXECUTE_IF("crash_create_after_rename_index_file", DBUG_SUICIDE(););
5695 
5696 recoverable_err:
5697   if ((fd= mysql_file_open(key_file_binlog_index,
5698                            index_file_name,
5699                            O_RDWR | O_CREAT | O_BINARY,
5700                            MYF(MY_WME))) < 0 ||
5701            mysql_file_sync(fd, MYF(MY_WME)) ||
5702            init_io_cache_ext(&index_file, fd, IO_SIZE, READ_CACHE,
5703                              mysql_file_seek(fd, 0L, MY_SEEK_END, MYF(0)),
5704                                              0, MYF(MY_WME | MY_WAIT_IF_FULL),
5705                              key_file_binlog_index_cache))
5706   {
5707     sql_print_error("After rebuilding the index file %s: "
5708                     "Failed to open the index file.", index_file_name);
5709     goto fatal_err;
5710   }
5711 
5712   if (need_lock_index)
5713     mysql_mutex_unlock(&LOCK_index);
5714   DBUG_RETURN(error);
5715 
5716 fatal_err:
5717   /*
5718     This situation is very very rare to happen (unless there is some serious
5719     memory related issues like OOM) and should be treated as fatal error.
5720     Hence it is better to bring down the server without respecting
5721     'binlog_error_action' value here.
5722   */
5723   exec_binlog_error_action_abort("MySQL server failed to update the "
5724                                  "binlog.index file's content properly. "
5725                                  "It might not be in sync with available "
5726                                  "binlogs and the binlog.index file state is in "
5727                                  "unrecoverable state. Aborting the server.");
5728   /*
5729     Server is aborted in the above function.
5730     This is dead code to make compiler happy.
5731    */
5732   DBUG_RETURN(error);
5733 }
5734 
5735 
5736 /**
5737   Append log file name to index file.
5738 
5739   - To make crash safe, we copy all the content of index file
5740   to crash safe index file firstly and then append the log
5741   file name to the crash safe index file. Finally move the
5742   crash safe index file to index file.
5743 
5744   @retval
5745     0   ok
5746   @retval
5747     -1   error
5748 */
add_log_to_index(uchar * log_name,size_t log_name_len,bool need_lock_index)5749 int MYSQL_BIN_LOG::add_log_to_index(uchar* log_name,
5750                                     size_t log_name_len, bool need_lock_index)
5751 {
5752   DBUG_ENTER("MYSQL_BIN_LOG::add_log_to_index");
5753 
5754   if (open_crash_safe_index_file())
5755   {
5756     sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5757                     "open the crash safe index file.");
5758     goto err;
5759   }
5760 
5761   if (copy_file(&index_file, &crash_safe_index_file, 0))
5762   {
5763     sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5764                     "copy index file to crash safe index file.");
5765     goto err;
5766   }
5767 
5768   if (my_b_write(&crash_safe_index_file, log_name, log_name_len) ||
5769       my_b_write(&crash_safe_index_file, (uchar*) "\n", 1) ||
5770       flush_io_cache(&crash_safe_index_file) ||
5771       mysql_file_sync(crash_safe_index_file.file, MYF(MY_WME)))
5772   {
5773     sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5774                     "append log file name: %s, to crash "
5775                     "safe index file.", log_name);
5776     goto err;
5777   }
5778 
5779   if (close_crash_safe_index_file())
5780   {
5781     sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5782                     "close the crash safe index file.");
5783     goto err;
5784   }
5785 
5786   if (move_crash_safe_index_file_to_index_file(need_lock_index))
5787   {
5788     sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5789                     "move crash safe index file to index file.");
5790     goto err;
5791   }
5792 
5793   DBUG_RETURN(0);
5794 
5795 err:
5796   DBUG_RETURN(-1);
5797 }
5798 
get_current_log(LOG_INFO * linfo,bool need_lock_log)5799 int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo, bool need_lock_log/*true*/)
5800 {
5801   if (need_lock_log)
5802     mysql_mutex_lock(&LOCK_log);
5803   int ret = raw_get_current_log(linfo);
5804   if (need_lock_log)
5805     mysql_mutex_unlock(&LOCK_log);
5806   return ret;
5807 }
5808 
raw_get_current_log(LOG_INFO * linfo)5809 int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
5810 {
5811   strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1);
5812   linfo->pos = my_b_safe_tell(&log_file);
5813   return 0;
5814 }
5815 
check_write_error_code(uint error_code)5816 static bool check_write_error_code(uint error_code)
5817 {
5818   return error_code == ER_TRANS_CACHE_FULL ||
5819          error_code == ER_STMT_CACHE_FULL  ||
5820          error_code == ER_ERROR_ON_WRITE   ||
5821          error_code == ER_BINLOG_LOGGING_IMPOSSIBLE;
5822 }
5823 
check_write_error(THD * thd)5824 bool MYSQL_BIN_LOG::check_write_error(THD *thd)
5825 {
5826   DBUG_ENTER("MYSQL_BIN_LOG::check_write_error");
5827 
5828   if (!thd->is_error())
5829     DBUG_RETURN(false);
5830 
5831   bool checked= check_write_error_code(thd->get_stmt_da()->mysql_errno());
5832 
5833   if (!checked)
5834   {
5835     /* Check all conditions for one that matches the expected error */
5836     const Sql_condition *err;
5837     Diagnostics_area::Sql_condition_iterator it=
5838       thd->get_stmt_da()->sql_conditions();
5839     while ((err= it++) != NULL && !checked)
5840     {
5841       checked= check_write_error_code(err->mysql_errno());
5842     }
5843   }
5844   DBUG_PRINT("return", ("checked: %s", YESNO(checked)));
5845   DBUG_RETURN(checked);
5846 }
5847 
set_write_error(THD * thd,bool is_transactional)5848 void MYSQL_BIN_LOG::set_write_error(THD *thd, bool is_transactional)
5849 {
5850   DBUG_ENTER("MYSQL_BIN_LOG::set_write_error");
5851 
5852   write_error= 1;
5853 
5854   if (check_write_error(thd))
5855     DBUG_VOID_RETURN;
5856 
5857   if (my_errno() == EFBIG)
5858   {
5859     if (is_transactional)
5860     {
5861       my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(MY_WME));
5862     }
5863     else
5864     {
5865       my_message(ER_STMT_CACHE_FULL, ER(ER_STMT_CACHE_FULL), MYF(MY_WME));
5866     }
5867   }
5868   else
5869   {
5870     char errbuf[MYSYS_STRERROR_SIZE];
5871     my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), name,
5872              errno, my_strerror(errbuf, sizeof(errbuf), errno));
5873   }
5874 
5875   DBUG_VOID_RETURN;
5876 }
5877 
compare_log_name(const char * log_1,const char * log_2)5878 static int compare_log_name(const char* log_1, const char* log_2)
5879 {
5880   const char * log_1_basename= log_1 + dirname_length(log_1);
5881   const char * log_2_basename= log_2 + dirname_length(log_2);
5882 
5883   return strcmp(log_1_basename,log_2_basename);
5884 }
5885 
5886 /**
5887   Find the position in the log-index-file for the given log name.
5888 
5889   @param[out] linfo The found log file name will be stored here, along
5890   with the byte offset of the next log file name in the index file.
5891   @param log_name Filename to find in the index file, or NULL if we
5892   want to read the first entry.
5893   @param need_lock_index If false, this function acquires LOCK_index;
5894   otherwise the lock should already be held by the caller.
5895 
5896   @note
5897     On systems without the truncate function the file will end with one or
5898     more empty lines.  These will be ignored when reading the file.
5899 
5900   @retval
5901     0			ok
5902   @retval
5903     LOG_INFO_EOF	        End of log-index-file found
5904   @retval
5905     LOG_INFO_IO		Got IO error while reading file
5906 */
5907 
find_log_pos(LOG_INFO * linfo,const char * log_name,bool need_lock_index)5908 int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
5909                                 bool need_lock_index)
5910 {
5911   int error= 0;
5912   char *full_fname= linfo->log_file_name;
5913   char full_log_name[FN_REFLEN], fname[FN_REFLEN];
5914   DBUG_ENTER("find_log_pos");
5915   full_log_name[0]= full_fname[0]= 0;
5916 
5917   /*
5918     Mutex needed because we need to make sure the file pointer does not
5919     move from under our feet
5920   */
5921   if (need_lock_index)
5922     mysql_mutex_lock(&LOCK_index);
5923   else
5924     mysql_mutex_assert_owner(&LOCK_index);
5925 
5926   if (!my_b_inited(&index_file))
5927   {
5928       error= LOG_INFO_IO;
5929       goto end;
5930   }
5931 
5932   // extend relative paths for log_name to be searched
5933   if (log_name)
5934   {
5935     if(normalize_binlog_name(full_log_name, log_name, is_relay_log))
5936     {
5937       error= LOG_INFO_EOF;
5938       goto end;
5939     }
5940   }
5941 
5942   DBUG_PRINT("enter", ("log_name: %s, full_log_name: %s",
5943                        log_name ? log_name : "NULL", full_log_name));
5944 
5945   /* As the file is flushed, we can't get an error here */
5946   my_b_seek(&index_file, (my_off_t) 0);
5947 
5948   for (;;)
5949   {
5950     size_t length;
5951     my_off_t offset= my_b_tell(&index_file);
5952 
5953     DBUG_EXECUTE_IF("simulate_find_log_pos_error",
5954                     error=  LOG_INFO_EOF; break;);
5955     /* If we get 0 or 1 characters, this is the end of the file */
5956     if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
5957     {
5958       /* Did not find the given entry; Return not found or error */
5959       error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
5960       break;
5961     }
5962 
5963     // extend relative paths and match against full path
5964     if (normalize_binlog_name(full_fname, fname, is_relay_log))
5965     {
5966       error= LOG_INFO_EOF;
5967       break;
5968     }
5969     // if the log entry matches, null string matching anything
5970     if (!log_name ||
5971         !compare_log_name(full_fname,full_log_name))
5972     {
5973       DBUG_PRINT("info", ("Found log file entry"));
5974       linfo->index_file_start_offset= offset;
5975       linfo->index_file_offset = my_b_tell(&index_file);
5976       break;
5977     }
5978     linfo->entry_index++;
5979   }
5980 
5981 end:
5982   if (need_lock_index)
5983     mysql_mutex_unlock(&LOCK_index);
5984   DBUG_RETURN(error);
5985 }
5986 
5987 
5988 /**
5989   Find the position in the log-index-file for the given log name.
5990 
5991   @param[out] linfo The filename will be stored here, along with the
5992   byte offset of the next filename in the index file.
5993 
5994   @param need_lock_index If true, LOCK_index will be acquired;
5995   otherwise it should already be held by the caller.
5996 
5997   @note
5998     - Before calling this function, one has to call find_log_pos()
5999     to set up 'linfo'
6000     - Mutex needed because we need to make sure the file pointer does not move
6001     from under our feet
6002 
6003   @retval 0 ok
6004   @retval LOG_INFO_EOF End of log-index-file found
6005   @retval LOG_INFO_IO Got IO error while reading file
6006 */
find_next_log(LOG_INFO * linfo,bool need_lock_index)6007 int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock_index)
6008 {
6009   int error= 0;
6010   size_t length;
6011   char fname[FN_REFLEN];
6012   char *full_fname= linfo->log_file_name;
6013 
6014   if (need_lock_index)
6015     mysql_mutex_lock(&LOCK_index);
6016   else
6017     mysql_mutex_assert_owner(&LOCK_index);
6018 
6019   if (!my_b_inited(&index_file))
6020   {
6021       error= LOG_INFO_IO;
6022       goto err;
6023   }
6024   /* As the file is flushed, we can't get an error here */
6025   my_b_seek(&index_file, linfo->index_file_offset);
6026 
6027   linfo->index_file_start_offset= linfo->index_file_offset;
6028   if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
6029   {
6030     error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
6031     goto err;
6032   }
6033 
6034   if (fname[0] != 0)
6035   {
6036     if(normalize_binlog_name(full_fname, fname, is_relay_log))
6037     {
6038       error= LOG_INFO_EOF;
6039       goto err;
6040     }
6041     length= strlen(full_fname);
6042   }
6043 
6044   linfo->index_file_offset= my_b_tell(&index_file);
6045 
6046 err:
6047   if (need_lock_index)
6048     mysql_mutex_unlock(&LOCK_index);
6049   return error;
6050 }
6051 
6052 /**
6053   Find the relay log name following the given name from relay log index file.
6054 
6055   @param[in|out] log_name  The name is full path name.
6056 
6057   @return return 0 if it finds next relay log. Otherwise return the error code.
6058 */
find_next_relay_log(char log_name[FN_REFLEN+1])6059 int MYSQL_BIN_LOG::find_next_relay_log(char log_name[FN_REFLEN+1])
6060 {
6061   LOG_INFO info;
6062   int error;
6063   char relative_path_name[FN_REFLEN+1];
6064 
6065   if (fn_format(relative_path_name, log_name+dirname_length(log_name),
6066                 mysql_data_home, "", 0)
6067       == NullS)
6068     return 1;
6069 
6070   mysql_mutex_lock(&LOCK_index);
6071 
6072   error= find_log_pos(&info, relative_path_name, false);
6073   if (error == 0)
6074   {
6075     error= find_next_log(&info, false);
6076     if (error == 0)
6077       strcpy(log_name, info.log_file_name);
6078   }
6079 
6080   mysql_mutex_unlock(&LOCK_index);
6081   return error;
6082 }
6083 
6084 /**
6085   Removes files, as part of a RESET MASTER or RESET SLAVE statement,
6086   by deleting all logs refered to in the index file. Then, it starts
6087   writing to a new log file.
6088 
6089   The new index file will only contain this file.
6090 
6091   @param thd Thread
6092 
6093   @note
6094     If not called from slave thread, write start event to new log
6095 
6096   @retval
6097     0	ok
6098   @retval
6099     1   error
6100 */
reset_logs(THD * thd,bool delete_only)6101 bool MYSQL_BIN_LOG::reset_logs(THD* thd, bool delete_only)
6102 {
6103   LOG_INFO linfo;
6104   bool error=0;
6105   int err;
6106   const char* save_name;
6107   DBUG_ENTER("reset_logs");
6108 
6109   /*
6110     Flush logs for storage engines, so that the last transaction
6111     is fsynced inside storage engines.
6112   */
6113   if (ha_flush_logs(NULL))
6114     DBUG_RETURN(1);
6115 
6116   ha_reset_logs(thd);
6117 
6118   /*
6119     We need to get both locks to be sure that no one is trying to
6120     write to the index log file.
6121   */
6122   mysql_mutex_lock(&LOCK_log);
6123   mysql_mutex_lock(&LOCK_index);
6124 
6125   global_sid_lock->wrlock();
6126 
6127   /* Save variables so that we can reopen the log */
6128   save_name=name;
6129   name=0;					// Protect against free
6130   close(LOG_CLOSE_TO_BE_OPENED, false/*need_lock_log=false*/,
6131         false/*need_lock_index=false*/);
6132 
6133   /*
6134     First delete all old log files and then update the index file.
6135     As we first delete the log files and do not use sort of logging,
6136     a crash may lead to an inconsistent state where the index has
6137     references to non-existent files.
6138 
6139     We need to invert the steps and use the purge_index_file methods
6140     in order to make the operation safe.
6141   */
6142 
6143   if ((err= find_log_pos(&linfo, NullS, false/*need_lock_index=false*/)) != 0)
6144   {
6145     uint errcode= purge_log_get_error_code(err);
6146     sql_print_error("Failed to locate old binlog or relay log files");
6147     my_message(errcode, ER(errcode), MYF(0));
6148     error= 1;
6149     goto err;
6150   }
6151 
6152   for (;;)
6153   {
6154     if ((error= my_delete_allow_opened(linfo.log_file_name, MYF(0))) != 0)
6155     {
6156       if (my_errno() == ENOENT)
6157       {
6158         push_warning_printf(current_thd, Sql_condition::SL_WARNING,
6159                             ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
6160                             linfo.log_file_name);
6161         sql_print_information("Failed to delete file '%s'",
6162                               linfo.log_file_name);
6163         set_my_errno(0);
6164         error= 0;
6165       }
6166       else
6167       {
6168         push_warning_printf(current_thd, Sql_condition::SL_WARNING,
6169                             ER_BINLOG_PURGE_FATAL_ERR,
6170                             "a problem with deleting %s; "
6171                             "consider examining correspondence "
6172                             "of your binlog index file "
6173                             "to the actual binlog files",
6174                             linfo.log_file_name);
6175         error= 1;
6176         goto err;
6177       }
6178     }
6179     if (find_next_log(&linfo, false/*need_lock_index=false*/))
6180       break;
6181   }
6182 
6183   /* Start logging with a new file */
6184   close(LOG_CLOSE_INDEX | LOG_CLOSE_TO_BE_OPENED,
6185         false/*need_lock_log=false*/,
6186         false/*need_lock_index=false*/);
6187   if ((error= my_delete_allow_opened(index_file_name, MYF(0))))	// Reset (open will update)
6188   {
6189     if (my_errno() == ENOENT)
6190     {
6191       push_warning_printf(current_thd, Sql_condition::SL_WARNING,
6192                           ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
6193                           index_file_name);
6194       sql_print_information("Failed to delete file '%s'",
6195                             index_file_name);
6196       set_my_errno(0);
6197       error= 0;
6198     }
6199     else
6200     {
6201       push_warning_printf(current_thd, Sql_condition::SL_WARNING,
6202                           ER_BINLOG_PURGE_FATAL_ERR,
6203                           "a problem with deleting %s; "
6204                           "consider examining correspondence "
6205                           "of your binlog index file "
6206                           "to the actual binlog files",
6207                           index_file_name);
6208       error= 1;
6209       goto err;
6210     }
6211   }
6212 
6213 #ifdef HAVE_REPLICATION
6214   /*
6215     For relay logs we clear the gtid state associated per channel(i.e rli)
6216     in the purge_relay_logs()
6217   */
6218   if (!is_relay_log)
6219   {
6220     if(gtid_state->clear(thd))
6221     {
6222       error= 1;
6223       goto err;
6224     }
6225     // don't clear global_sid_map because it's used by the relay log too
6226     if (gtid_state->init() != 0)
6227       goto err;
6228   }
6229 #endif
6230 
6231   if (!delete_only)
6232   {
6233     if (!open_index_file(index_file_name, 0, false/*need_lock_index=false*/))
6234     if ((error= open_binlog(save_name, 0,
6235                             max_size, false,
6236                             false/*need_lock_index=false*/,
6237                             false/*need_sid_lock=false*/,
6238                             NULL)))
6239       goto err;
6240   }
6241   my_free((void *) save_name);
6242 
6243 err:
6244   if (error == 1)
6245     name= const_cast<char*>(save_name);
6246   global_sid_lock->unlock();
6247 #ifdef HAVE_REPLICATION
6248   count_binlog_space(false);
6249 #endif
6250   mysql_mutex_unlock(&LOCK_index);
6251   mysql_mutex_unlock(&LOCK_log);
6252   DBUG_RETURN(error);
6253 }
6254 
6255 
6256 /**
6257   Set the name of crash safe index file.
6258 
6259   @retval
6260     0   ok
6261   @retval
6262     1   error
6263 */
set_crash_safe_index_file_name(const char * base_file_name)6264 int MYSQL_BIN_LOG::set_crash_safe_index_file_name(const char *base_file_name)
6265 {
6266   int error= 0;
6267   DBUG_ENTER("MYSQL_BIN_LOG::set_crash_safe_index_file_name");
6268   if (fn_format(crash_safe_index_file_name, base_file_name, mysql_data_home,
6269                 ".index_crash_safe", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
6270                                          MY_REPLACE_EXT)) == NULL)
6271   {
6272     error= 1;
6273     sql_print_error("MYSQL_BIN_LOG::set_crash_safe_index_file_name failed "
6274                     "to set file name.");
6275   }
6276   DBUG_RETURN(error);
6277 }
6278 
6279 
6280 /**
6281   Open a (new) crash safe index file.
6282 
6283   @note
6284     The crash safe index file is a special file
6285     used for guaranteeing index file crash safe.
6286   @retval
6287     0   ok
6288   @retval
6289     1   error
6290 */
open_crash_safe_index_file()6291 int MYSQL_BIN_LOG::open_crash_safe_index_file()
6292 {
6293   int error= 0;
6294   File file= -1;
6295 
6296   DBUG_ENTER("MYSQL_BIN_LOG::open_crash_safe_index_file");
6297 
6298   if (!my_b_inited(&crash_safe_index_file))
6299   {
6300     if ((file= my_open(crash_safe_index_file_name, O_RDWR | O_CREAT | O_BINARY,
6301                        MYF(MY_WME))) < 0  ||
6302         init_io_cache(&crash_safe_index_file, file, IO_SIZE, WRITE_CACHE,
6303                       0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
6304     {
6305       error= 1;
6306       sql_print_error("MYSQL_BIN_LOG::open_crash_safe_index_file failed "
6307                       "to open temporary index file.");
6308     }
6309   }
6310   DBUG_RETURN(error);
6311 }
6312 
6313 
6314 /**
6315   Close the crash safe index file.
6316 
6317   @note
6318     The crash safe file is just closed, is not deleted.
6319     Because it is moved to index file later on.
6320   @retval
6321     0   ok
6322   @retval
6323     1   error
6324 */
close_crash_safe_index_file()6325 int MYSQL_BIN_LOG::close_crash_safe_index_file()
6326 {
6327   int error= 0;
6328 
6329   DBUG_ENTER("MYSQL_BIN_LOG::close_crash_safe_index_file");
6330 
6331   if (my_b_inited(&crash_safe_index_file))
6332   {
6333     end_io_cache(&crash_safe_index_file);
6334     error= my_close(crash_safe_index_file.file, MYF(0));
6335   }
6336   memset(&crash_safe_index_file, 0, sizeof(crash_safe_index_file));
6337 
6338   DBUG_RETURN(error);
6339 }
6340 
6341 
6342 /**
6343   Delete relay log files prior to rli->group_relay_log_name
6344   (i.e. all logs which are not involved in a non-finished group
6345   (transaction)), remove them from the index file and start on next
6346   relay log.
6347 
6348   IMPLEMENTATION
6349 
6350   - You must hold rli->data_lock before calling this function, since
6351     it writes group_relay_log_pos and similar fields of
6352     Relay_log_info.
6353   - Protects index file with LOCK_index
6354   - Delete relevant relay log files
6355   - Copy all file names after these ones to the front of the index file
6356   - If the OS has truncate, truncate the file, else fill it with \n'
6357   - Read the next file name from the index file and store in rli->linfo
6358 
6359   @param rli	       Relay log information
6360   @param included     If false, all relay logs that are strictly before
6361                       rli->group_relay_log_name are deleted ; if true, the
6362                       latter is deleted too (i.e. all relay logs
6363                       read by the SQL slave thread are deleted).
6364 
6365   @note
6366     - This is only called from the slave SQL thread when it has read
6367     all commands from a relay log and want to switch to a new relay log.
6368     - When this happens, we can be in an active transaction as
6369     a transaction can span over two relay logs
6370     (although it is always written as a single block to the master's binary
6371     log, hence cannot span over two master's binary logs).
6372 
6373   @retval
6374     0			ok
6375   @retval
6376     LOG_INFO_EOF	        End of log-index-file found
6377   @retval
6378     LOG_INFO_SEEK	Could not allocate IO cache
6379   @retval
6380     LOG_INFO_IO		Got IO error while reading file
6381 */
6382 
6383 #ifdef HAVE_REPLICATION
6384 
purge_first_log(Relay_log_info * rli,bool included)6385 int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
6386 {
6387   int error;
6388   char *to_purge_if_included= NULL;
6389   DBUG_ENTER("purge_first_log");
6390 
6391   assert(current_thd->system_thread == SYSTEM_THREAD_SLAVE_SQL);
6392   assert(is_relay_log);
6393   assert(is_open());
6394   assert(rli->slave_running == 1);
6395   assert(!strcmp(rli->linfo.log_file_name,rli->get_event_relay_log_name()));
6396 
6397   mysql_mutex_assert_owner(&rli->data_lock);
6398 
6399   mysql_mutex_lock(&LOCK_index);
6400   to_purge_if_included= my_strdup(key_memory_Relay_log_info_group_relay_log_name,
6401                                   rli->get_group_relay_log_name(), MYF(0));
6402 
6403   /*
6404     Read the next log file name from the index file and pass it back to
6405     the caller.
6406   */
6407   if((error=find_log_pos(&rli->linfo, rli->get_event_relay_log_name(),
6408                          false/*need_lock_index=false*/)) ||
6409      (error=find_next_log(&rli->linfo, false/*need_lock_index=false*/)))
6410   {
6411     char buff[22];
6412     sql_print_error("next log error: %d  offset: %s  log: %s included: %d",
6413                     error,
6414                     llstr(rli->linfo.index_file_offset,buff),
6415                     rli->get_event_relay_log_name(),
6416                     included);
6417     goto err;
6418   }
6419 
6420   /*
6421     Reset rli's coordinates to the current log.
6422   */
6423   rli->set_event_relay_log_pos(BIN_LOG_HEADER_SIZE);
6424   rli->set_event_relay_log_name(rli->linfo.log_file_name);
6425 
6426   /*
6427     If we removed the rli->group_relay_log_name file,
6428     we must update the rli->group* coordinates, otherwise do not touch it as the
6429     group's execution is not finished (e.g. COMMIT not executed)
6430   */
6431   if (included)
6432   {
6433     rli->set_group_relay_log_pos(BIN_LOG_HEADER_SIZE);
6434     rli->set_group_relay_log_name(rli->linfo.log_file_name);
6435     rli->notify_group_relay_log_name_update();
6436   }
6437   /*
6438     Store where we are in the new file for the execution thread.
6439     If we are in the middle of a transaction, then we
6440     should not store the position in the repository, instead in
6441     that case set a flag to true which indicates that a 'forced flush'
6442     is postponed due to transaction split across the relaylogs.
6443   */
6444   if (!rli->is_in_group())
6445     rli->flush_info(TRUE);
6446   else
6447     rli->force_flush_postponed_due_to_split_trans= true;
6448 
6449   DBUG_EXECUTE_IF("crash_before_purge_logs", DBUG_SUICIDE(););
6450 
6451   mysql_mutex_lock(&rli->log_space_lock);
6452   rli->relay_log.purge_logs(to_purge_if_included, included,
6453                             false/*need_lock_index=false*/,
6454                             false/*need_update_threads=false*/,
6455                             &rli->log_space_total, true);
6456   // Tell the I/O thread to take the relay_log_space_limit into account
6457   rli->ignore_log_space_limit= 0;
6458   mysql_mutex_unlock(&rli->log_space_lock);
6459 
6460   /*
6461     Ok to broadcast after the critical region as there is no risk of
6462     the mutex being destroyed by this thread later - this helps save
6463     context switches
6464   */
6465   mysql_cond_broadcast(&rli->log_space_cond);
6466 
6467   /*
6468    * Need to update the log pos because purge logs has been called
6469    * after fetching initially the log pos at the begining of the method.
6470    */
6471   if((error=find_log_pos(&rli->linfo, rli->get_event_relay_log_name(),
6472                          false/*need_lock_index=false*/)))
6473   {
6474     char buff[22];
6475     sql_print_error("next log error: %d  offset: %s  log: %s included: %d",
6476                     error,
6477                     llstr(rli->linfo.index_file_offset,buff),
6478                     rli->get_group_relay_log_name(),
6479                     included);
6480     goto err;
6481   }
6482 
6483   /* If included was passed, rli->linfo should be the first entry. */
6484   assert(!included || rli->linfo.index_file_start_offset == 0);
6485 
6486 err:
6487   my_free(to_purge_if_included);
6488   mysql_mutex_unlock(&LOCK_index);
6489   DBUG_RETURN(error);
6490 }
6491 
6492 
6493 /**
6494   Remove logs from index file.
6495 
6496   - To make crash safe, we copy the content of index file
6497   from index_file_start_offset recored in log_info to
6498   crash safe index file firstly and then move the crash
6499   safe index file to index file.
6500 
6501   @param linfo                  Store here the found log file name and
6502                                 position to the NEXT log file name in
6503                                 the index file.
6504 
6505   @param need_update_threads    If we want to update the log coordinates
6506                                 of all threads. False for relay logs,
6507                                 true otherwise.
6508 
6509   @retval
6510     0    ok
6511   @retval
6512     LOG_INFO_IO    Got IO error while reading/writing file
6513 */
remove_logs_from_index(LOG_INFO * log_info,bool need_update_threads)6514 int MYSQL_BIN_LOG::remove_logs_from_index(LOG_INFO* log_info, bool need_update_threads)
6515 {
6516   if (open_crash_safe_index_file())
6517   {
6518     sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
6519                     "open the crash safe index file.");
6520     goto err;
6521   }
6522 
6523   if (copy_file(&index_file, &crash_safe_index_file,
6524                 log_info->index_file_start_offset))
6525   {
6526     sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
6527                     "copy index file to crash safe index file.");
6528     goto err;
6529   }
6530 
6531   if (close_crash_safe_index_file())
6532   {
6533     sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
6534                     "close the crash safe index file.");
6535     goto err;
6536   }
6537   DBUG_EXECUTE_IF("fault_injection_copy_part_file", DBUG_SUICIDE(););
6538 
6539   if (move_crash_safe_index_file_to_index_file(false/*need_lock_index=false*/))
6540   {
6541     sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
6542                     "move crash safe index file to index file.");
6543     goto err;
6544   }
6545 
6546   // now update offsets in index file for running threads
6547   if (need_update_threads)
6548     adjust_linfo_offsets(log_info->index_file_start_offset);
6549   return 0;
6550 
6551 err:
6552   return LOG_INFO_IO;
6553 }
6554 
6555 /**
6556   Remove all logs before the given log from disk and from the index file.
6557 
6558   @param to_log	      Delete all log file name before this file.
6559   @param included            If true, to_log is deleted too.
6560   @param need_lock_index
6561   @param need_update_threads If we want to update the log coordinates of
6562                              all threads. False for relay logs, true otherwise.
6563   @param freed_log_space     If not null, decrement this variable of
6564                              the amount of log space freed
6565   @param auto_purge          True if this is an automatic purge.
6566 
6567   @note
6568     If any of the logs before the deleted one is in use,
6569     only purge logs up to this one.
6570 
6571   @retval
6572     0			ok
6573   @retval
6574     LOG_INFO_EOF		to_log not found
6575     LOG_INFO_EMFILE             too many files opened
6576     LOG_INFO_FATAL              if any other than ENOENT error from
6577                                 mysql_file_stat() or mysql_file_delete()
6578 */
6579 
purge_logs(const char * to_log,bool included,bool need_lock_index,bool need_update_threads,ulonglong * decrease_log_space,bool auto_purge)6580 int MYSQL_BIN_LOG::purge_logs(const char *to_log,
6581                               bool included,
6582                               bool need_lock_index,
6583                               bool need_update_threads,
6584                               ulonglong *decrease_log_space,
6585                               bool auto_purge)
6586 {
6587   int error= 0, no_of_log_files_to_purge= 0, no_of_log_files_purged= 0;
6588   int no_of_threads_locking_log= 0;
6589   bool exit_loop= 0;
6590   LOG_INFO log_info;
6591   THD *thd= current_thd;
6592   DBUG_ENTER("purge_logs");
6593   DBUG_PRINT("info",("to_log= %s",to_log));
6594 
6595   if (need_lock_index)
6596     mysql_mutex_lock(&LOCK_index);
6597   else
6598     mysql_mutex_assert_owner(&LOCK_index);
6599   if ((error=find_log_pos(&log_info, to_log, false/*need_lock_index=false*/)))
6600   {
6601     sql_print_error("MYSQL_BIN_LOG::purge_logs was called with file %s not "
6602                     "listed in the index.", to_log);
6603     goto err;
6604   }
6605 
6606   no_of_log_files_to_purge= log_info.entry_index;
6607 
6608   if ((error= open_purge_index_file(TRUE)))
6609   {
6610     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to sync the index file.");
6611     goto err;
6612   }
6613 
6614   /*
6615     File name exists in index file; delete until we find this file
6616     or a file that is used.
6617   */
6618   if ((error=find_log_pos(&log_info, NullS, false/*need_lock_index=false*/)))
6619     goto err;
6620 
6621   while ((compare_log_name(to_log,log_info.log_file_name) || (exit_loop=included)))
6622   {
6623     if(is_active(log_info.log_file_name))
6624     {
6625       if(!auto_purge)
6626         push_warning_printf(thd, Sql_condition::SL_WARNING,
6627                             ER_WARN_PURGE_LOG_IS_ACTIVE,
6628                             ER(ER_WARN_PURGE_LOG_IS_ACTIVE),
6629                             log_info.log_file_name);
6630       break;
6631     }
6632 
6633     if ((no_of_threads_locking_log= log_in_use(log_info.log_file_name)))
6634     {
6635       if(!auto_purge)
6636         push_warning_printf(thd, Sql_condition::SL_WARNING,
6637                             ER_WARN_PURGE_LOG_IN_USE,
6638                             ER(ER_WARN_PURGE_LOG_IN_USE),
6639                             log_info.log_file_name,  no_of_threads_locking_log,
6640                             no_of_log_files_purged, no_of_log_files_to_purge);
6641       break;
6642     }
6643     no_of_log_files_purged++;
6644 
6645     if ((error= register_purge_index_entry(log_info.log_file_name)))
6646     {
6647       sql_print_error("MYSQL_BIN_LOG::purge_logs failed to copy %s to register file.",
6648                       log_info.log_file_name);
6649       goto err;
6650     }
6651 
6652     if (find_next_log(&log_info, false/*need_lock_index=false*/) || exit_loop)
6653       break;
6654   }
6655 
6656   DBUG_EXECUTE_IF("crash_purge_before_update_index", DBUG_SUICIDE(););
6657 
6658   if ((error= sync_purge_index_file()))
6659   {
6660     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to flush register file.");
6661     goto err;
6662   }
6663 
6664   /* We know how many files to delete. Update index file. */
6665   if ((error=remove_logs_from_index(&log_info, need_update_threads)))
6666   {
6667     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to update the index file");
6668     goto err;
6669   }
6670 
6671   // Update gtid_state->lost_gtids
6672   if (!is_relay_log)
6673   {
6674     global_sid_lock->wrlock();
6675     error= init_gtid_sets(NULL,
6676                           const_cast<Gtid_set *>(gtid_state->get_lost_gtids()),
6677                           opt_master_verify_checksum,
6678                           false/*false=don't need lock*/,
6679                           NULL/*trx_parser*/, NULL/*gtid_partial_trx*/);
6680     global_sid_lock->unlock();
6681     if (error)
6682       goto err;
6683   }
6684 
6685   DBUG_EXECUTE_IF("crash_purge_critical_after_update_index", DBUG_SUICIDE(););
6686 
6687 err:
6688 
6689   int error_index= 0, close_error_index= 0;
6690   /* Read each entry from purge_index_file and delete the file. */
6691   if (!error && is_inited_purge_index_file() &&
6692       (error_index= purge_index_entry(thd, decrease_log_space, false/*need_lock_index=false*/)))
6693     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to process registered files"
6694                     " that would be purged.");
6695 
6696   close_error_index= close_purge_index_file();
6697 
6698   DBUG_EXECUTE_IF("crash_purge_non_critical_after_update_index", DBUG_SUICIDE(););
6699 
6700   count_binlog_space(false);
6701   if (need_lock_index)
6702     mysql_mutex_unlock(&LOCK_index);
6703 
6704   /*
6705     Error codes from purge logs take precedence.
6706     Then error codes from purging the index entry.
6707     Finally, error codes from closing the purge index file.
6708   */
6709   error= error ? error : (error_index ? error_index :
6710                           close_error_index);
6711 
6712   DBUG_RETURN(error);
6713 }
6714 
set_purge_index_file_name(const char * base_file_name)6715 int MYSQL_BIN_LOG::set_purge_index_file_name(const char *base_file_name)
6716 {
6717   int error= 0;
6718   DBUG_ENTER("MYSQL_BIN_LOG::set_purge_index_file_name");
6719   if (fn_format(purge_index_file_name, base_file_name, mysql_data_home,
6720                 ".~rec~", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
6721                               MY_REPLACE_EXT)) == NULL)
6722   {
6723     error= 1;
6724     sql_print_error("MYSQL_BIN_LOG::set_purge_index_file_name failed to set "
6725                       "file name.");
6726   }
6727   DBUG_RETURN(error);
6728 }
6729 
open_purge_index_file(bool destroy)6730 int MYSQL_BIN_LOG::open_purge_index_file(bool destroy)
6731 {
6732   int error= 0;
6733   File file= -1;
6734 
6735   DBUG_ENTER("MYSQL_BIN_LOG::open_purge_index_file");
6736 
6737   if (destroy)
6738     close_purge_index_file();
6739 
6740   if (!my_b_inited(&purge_index_file))
6741   {
6742     if ((file= my_open(purge_index_file_name, O_RDWR | O_CREAT | O_BINARY,
6743                        MYF(MY_WME))) < 0  ||
6744         init_io_cache(&purge_index_file, file, IO_SIZE,
6745                       (destroy ? WRITE_CACHE : READ_CACHE),
6746                       0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
6747     {
6748       error= 1;
6749       sql_print_error("MYSQL_BIN_LOG::open_purge_index_file failed to open register "
6750                       " file.");
6751     }
6752   }
6753   DBUG_RETURN(error);
6754 }
6755 
close_purge_index_file()6756 int MYSQL_BIN_LOG::close_purge_index_file()
6757 {
6758   int error= 0;
6759 
6760   DBUG_ENTER("MYSQL_BIN_LOG::close_purge_index_file");
6761 
6762   if (my_b_inited(&purge_index_file))
6763   {
6764     end_io_cache(&purge_index_file);
6765     error= my_close(purge_index_file.file, MYF(0));
6766   }
6767   my_delete(purge_index_file_name, MYF(0));
6768   memset(&purge_index_file, 0, sizeof(purge_index_file));
6769 
6770   DBUG_RETURN(error);
6771 }
6772 
is_inited_purge_index_file()6773 bool MYSQL_BIN_LOG::is_inited_purge_index_file()
6774 {
6775   DBUG_ENTER("MYSQL_BIN_LOG::is_inited_purge_index_file");
6776   DBUG_RETURN (my_b_inited(&purge_index_file));
6777 }
6778 
sync_purge_index_file()6779 int MYSQL_BIN_LOG::sync_purge_index_file()
6780 {
6781   int error= 0;
6782   DBUG_ENTER("MYSQL_BIN_LOG::sync_purge_index_file");
6783 
6784   if ((error= flush_io_cache(&purge_index_file)) ||
6785       (error= my_sync(purge_index_file.file, MYF(MY_WME))))
6786     DBUG_RETURN(error);
6787 
6788   DBUG_RETURN(error);
6789 }
6790 
register_purge_index_entry(const char * entry)6791 int MYSQL_BIN_LOG::register_purge_index_entry(const char *entry)
6792 {
6793   int error= 0;
6794   DBUG_ENTER("MYSQL_BIN_LOG::register_purge_index_entry");
6795 
6796   if ((error=my_b_write(&purge_index_file, (const uchar*)entry, strlen(entry))) ||
6797       (error=my_b_write(&purge_index_file, (const uchar*)"\n", 1)))
6798     DBUG_RETURN (error);
6799 
6800   DBUG_RETURN(error);
6801 }
6802 
register_create_index_entry(const char * entry)6803 int MYSQL_BIN_LOG::register_create_index_entry(const char *entry)
6804 {
6805   DBUG_ENTER("MYSQL_BIN_LOG::register_create_index_entry");
6806   DBUG_RETURN(register_purge_index_entry(entry));
6807 }
6808 
purge_index_entry(THD * thd,ulonglong * decrease_log_space,bool need_lock_index)6809 int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *decrease_log_space,
6810                                      bool need_lock_index)
6811 {
6812   MY_STAT s;
6813   int error= 0;
6814   LOG_INFO log_info;
6815   LOG_INFO check_log_info;
6816 
6817   DBUG_ENTER("MYSQL_BIN_LOG:purge_index_entry");
6818 
6819   assert(my_b_inited(&purge_index_file));
6820 
6821   if ((error=reinit_io_cache(&purge_index_file, READ_CACHE, 0, 0, 0)))
6822   {
6823     sql_print_error("MYSQL_BIN_LOG::purge_index_entry failed to reinit register file "
6824                     "for read");
6825     goto err;
6826   }
6827 
6828   for (;;)
6829   {
6830     size_t length;
6831 
6832     if ((length=my_b_gets(&purge_index_file, log_info.log_file_name,
6833                           FN_REFLEN)) <= 1)
6834     {
6835       if (purge_index_file.error)
6836       {
6837         error= purge_index_file.error;
6838         sql_print_error("MYSQL_BIN_LOG::purge_index_entry error %d reading from "
6839                         "register file.", error);
6840         goto err;
6841       }
6842 
6843       /* Reached EOF */
6844       break;
6845     }
6846 
6847     /* Get rid of the trailing '\n' */
6848     log_info.log_file_name[length-1]= 0;
6849 
6850     if (!mysql_file_stat(m_key_file_log, log_info.log_file_name, &s, MYF(0)))
6851     {
6852       if (my_errno() == ENOENT)
6853       {
6854         /*
6855           It's not fatal if we can't stat a log file that does not exist;
6856           If we could not stat, we won't delete.
6857         */
6858         if (thd)
6859         {
6860           push_warning_printf(thd, Sql_condition::SL_WARNING,
6861                               ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
6862                               log_info.log_file_name);
6863         }
6864         sql_print_information("Failed to execute mysql_file_stat on file '%s'",
6865 			      log_info.log_file_name);
6866         set_my_errno(0);
6867       }
6868       else
6869       {
6870         /*
6871           Other than ENOENT are fatal
6872         */
6873         if (thd)
6874         {
6875           push_warning_printf(thd, Sql_condition::SL_WARNING,
6876                               ER_BINLOG_PURGE_FATAL_ERR,
6877                               "a problem with getting info on being purged %s; "
6878                               "consider examining correspondence "
6879                               "of your binlog index file "
6880                               "to the actual binlog files",
6881                               log_info.log_file_name);
6882         }
6883         else
6884         {
6885           sql_print_information("Failed to delete log file '%s'; "
6886                                 "consider examining correspondence "
6887                                 "of your binlog index file "
6888                                 "to the actual binlog files",
6889                                 log_info.log_file_name);
6890         }
6891         error= LOG_INFO_FATAL;
6892         goto err;
6893       }
6894     }
6895     else
6896     {
6897       if ((error= find_log_pos(&check_log_info, log_info.log_file_name,
6898                                need_lock_index)))
6899       {
6900         if (error != LOG_INFO_EOF)
6901         {
6902           if (thd)
6903           {
6904             push_warning_printf(thd, Sql_condition::SL_WARNING,
6905                                 ER_BINLOG_PURGE_FATAL_ERR,
6906                                 "a problem with deleting %s and "
6907                                 "reading the binlog index file",
6908                                 log_info.log_file_name);
6909           }
6910           else
6911           {
6912             sql_print_information("Failed to delete file '%s' and "
6913                                   "read the binlog index file",
6914                                   log_info.log_file_name);
6915           }
6916           goto err;
6917         }
6918 
6919         error= 0;
6920         if (!need_lock_index)
6921         {
6922           /*
6923             This is to avoid triggering an error in NDB.
6924 
6925             @todo: This is weird, what does NDB errors have to do with
6926             need_lock_index? Explain better or refactor /Sven
6927           */
6928           ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
6929         }
6930 
6931         DBUG_PRINT("info",("purging %s",log_info.log_file_name));
6932         if (!mysql_file_delete(key_file_binlog, log_info.log_file_name, MYF(0)))
6933         {
6934           DBUG_EXECUTE_IF("wait_in_purge_index_entry",
6935                           {
6936                               const char action[] = "now SIGNAL in_purge_index_entry WAIT_FOR go_ahead_sql";
6937                               assert(!debug_sync_set_action(thd, STRING_WITH_LEN(action)));
6938                               DBUG_SET("-d,wait_in_purge_index_entry");
6939                           };);
6940 
6941           if (decrease_log_space)
6942             *decrease_log_space-= s.st_size;
6943         }
6944         else
6945         {
6946           if (my_errno() == ENOENT)
6947           {
6948             if (thd)
6949             {
6950               push_warning_printf(thd, Sql_condition::SL_WARNING,
6951                                   ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
6952                                   log_info.log_file_name);
6953             }
6954             sql_print_information("Failed to delete file '%s'",
6955                                   log_info.log_file_name);
6956             set_my_errno(0);
6957           }
6958           else
6959           {
6960             if (thd)
6961             {
6962               push_warning_printf(thd, Sql_condition::SL_WARNING,
6963                                   ER_BINLOG_PURGE_FATAL_ERR,
6964                                   "a problem with deleting %s; "
6965                                   "consider examining correspondence "
6966                                   "of your binlog index file "
6967                                   "to the actual binlog files",
6968                                   log_info.log_file_name);
6969             }
6970             else
6971             {
6972               sql_print_information("Failed to delete file '%s'; "
6973                                     "consider examining correspondence "
6974                                     "of your binlog index file "
6975                                     "to the actual binlog files",
6976                                     log_info.log_file_name);
6977             }
6978             if (my_errno() == EMFILE)
6979             {
6980               DBUG_PRINT("info",
6981                          ("my_errno: %d, set ret = LOG_INFO_EMFILE", my_errno()));
6982               error= LOG_INFO_EMFILE;
6983               goto err;
6984             }
6985             error= LOG_INFO_FATAL;
6986             goto err;
6987           }
6988         }
6989       }
6990     }
6991   }
6992 
6993 err:
6994   DBUG_RETURN(error);
6995 }
6996 
6997 /**
6998   Count a total size of binary logs (except the active one) to the variable
6999   binlog_space_total.
7000 
7001   @param need_lock_index  If true, this function acquires LOCK_index;
7002                           otherwise the caller should already have acquired it.
7003 
7004   @retval
7005     0			ok
7006   @retval
7007     LOG_INFO_FATAL      if any other than ENOENT error from
7008                         mysql_file_stat() or mysql_file_delete()
7009     LOG_INFO_EOF        End of log-index-file found
7010     LOG_INFO_IO         Got IO error while reading log-index-file
7011 */
7012 
count_binlog_space(bool need_lock_index)7013 int MYSQL_BIN_LOG::count_binlog_space(bool need_lock_index) {
7014   DBUG_ENTER("count_binlog_space");
7015   if (is_relay_log)
7016     DBUG_RETURN(0);
7017 
7018   if (need_lock_index)
7019     mysql_mutex_lock(&LOCK_index);
7020   else
7021     mysql_mutex_assert_owner(&LOCK_index);
7022 
7023   int error;
7024   LOG_INFO log_info;
7025   binlog_space_total = 0;
7026   if ((error = find_log_pos(&log_info, NullS, false /*need_lock_index=false*/)))
7027     goto done;
7028 
7029   MY_STAT stat_area;
7030   while (!(is_active(log_info.log_file_name))) {
7031     if (!mysql_file_stat(m_key_file_log, log_info.log_file_name, &stat_area,
7032                          MYF(0))) {
7033       if (my_errno() == ENOENT) {
7034         /*
7035           It's not fatal if we can't stat a log file that does not exist.
7036         */
7037         set_my_errno(0);
7038       } else {
7039         error = LOG_INFO_FATAL;
7040         goto done;
7041       }
7042     } else {
7043       binlog_space_total += stat_area.st_size;
7044     }
7045     if (find_next_log(&log_info, false /*need_lock_index=false*/)) break;
7046   }
7047 
7048   error = 0;
7049 
7050 done:
7051   if (need_lock_index) mysql_mutex_unlock(&LOCK_index);
7052   DBUG_RETURN(error);
7053 }
7054 
7055 /**
7056   Purge old logs so that we have a total size lower than binlog_space_limit.
7057 
7058   @param need_lock_index  If true, this function acquires LOCK_index;
7059                           otherwise the caller should already have acquired it.
7060 
7061   @note
7062     If any of the logs before the deleted one is in use,
7063     only purge logs up to this one.
7064 
7065   @retval
7066     0				ok
7067   @retval
7068     LOG_INFO_FATAL      if any other than ENOENT error from
7069                         mysql_file_stat() or mysql_file_delete()
7070     LOG_INFO_EOF        End of log-index-file found
7071     LOG_INFO_IO         Got IO error while reading log-index-file
7072 */
7073 
purge_logs_by_size(bool need_lock_index)7074 int MYSQL_BIN_LOG::purge_logs_by_size(bool need_lock_index) {
7075   DBUG_ENTER("purge_logs_by_size");
7076 
7077   if (is_relay_log || !binlog_space_limit)
7078       DBUG_RETURN(0);
7079 
7080   if (need_lock_index)
7081     mysql_mutex_lock(&LOCK_index);
7082   else
7083     mysql_mutex_assert_owner(&LOCK_index);
7084 
7085   int error = 0;
7086   LOG_INFO log_info;
7087   my_off_t binlog_pos= my_b_tell(&log_file);
7088   count_binlog_space(false);
7089 
7090   if (!binlog_space_total ||
7091       binlog_space_total + binlog_pos <= binlog_space_limit)
7092     goto done;
7093 
7094   if ((error = find_log_pos(&log_info, NullS, false /*need_lock_index=false*/)))
7095     goto done;
7096 
7097   MY_STAT stat_area;
7098   char to_log[FN_REFLEN];
7099   to_log[0] = 0;
7100   while (!is_active(log_info.log_file_name)) {
7101     if (!mysql_file_stat(m_key_file_log, log_info.log_file_name, &stat_area,
7102                          MYF(0))) {
7103       if (my_errno() == ENOENT) {
7104         /*
7105           It's not fatal if we can't stat a log file that does not exist.
7106         */
7107         set_my_errno(0);
7108       } else {
7109         /*
7110           Other than ENOENT are fatal
7111         */
7112         THD *thd = current_thd;
7113         if (thd) {
7114           push_warning_printf(thd, Sql_condition::SL_WARNING,
7115                               ER_BINLOG_PURGE_FATAL_ERR,
7116                               "a problem with getting info on being purged %s; "
7117                               "consider examining correspondence "
7118                               "of your binlog index file "
7119                               "to the actual binlog files",
7120                               log_info.log_file_name);
7121         } else {
7122           sql_print_information("Failed to stat log file '%s'",
7123                                 log_info.log_file_name);
7124         }
7125         error = LOG_INFO_FATAL;
7126         goto done;
7127       }
7128     }
7129     /* check if a total size of binary logs is bigger than binlog_space_limit
7130        if yes check if it is in use, if not in use then add
7131        it in the list of binary log files to be purged.
7132     */
7133     else if (binlog_space_total + binlog_pos > binlog_space_limit) {
7134       if ((log_in_use(log_info.log_file_name)))
7135         break;
7136       DBUG_PRINT("info", ("purge_logs_by_size binlog_space_total=%llu "
7137               "binlog_pos=%llu sum=%llu\n", binlog_space_total,
7138               binlog_pos, binlog_space_total+binlog_pos));
7139       if (binlog_space_total >= (ulonglong)stat_area.st_size)
7140         binlog_space_total -= stat_area.st_size;
7141       else
7142         break;
7143       strmake(to_log, log_info.log_file_name,
7144               sizeof(log_info.log_file_name) - 1);
7145     } else
7146       break;
7147     if (find_next_log(&log_info, false /*need_lock_index=false*/)) break;
7148   }
7149 
7150   error = (to_log[0] ? purge_logs(to_log, true, false /*need_lock_index=false*/,
7151                                   true /*need_update_threads=true*/,
7152                                   NULL, true)
7153                      : 0);
7154 
7155 done:
7156   if (need_lock_index) mysql_mutex_unlock(&LOCK_index);
7157   DBUG_RETURN(error);
7158 }
7159 
7160 /**
7161   Purge old logs so that we have a maximum of max_nr_files logs.
7162 
7163   @param max_nr_files	Maximum number of logfiles to have
7164 
7165   @note
7166   If any of the logs before the deleted one is in use,
7167   only purge logs up to this one.
7168 
7169   @retval
7170   0				ok
7171   @retval
7172   LOG_INFO_PURGE_NO_ROTATE	Binary file that can't be rotated
7173   LOG_INFO_FATAL              if any other than ENOENT error from
7174   mysql_file_stat() or mysql_file_delete()
7175 */
7176 
purge_logs_maximum_number(ulong max_nr_files)7177 int MYSQL_BIN_LOG::purge_logs_maximum_number(ulong max_nr_files)
7178 {
7179   int error;
7180   char to_log[FN_REFLEN];
7181   LOG_INFO log_info;
7182   ulong current_number_of_logs= 1;
7183 
7184   DBUG_ENTER("purge_logs_maximum_number");
7185 
7186   mysql_mutex_lock(&LOCK_index);
7187   to_log[0]= 0;
7188 
7189   if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
7190     goto err;
7191 
7192   while (!find_next_log(&log_info, 0))
7193     current_number_of_logs++;
7194 
7195   if (current_number_of_logs <= max_nr_files)
7196   {
7197     error= 0;
7198     goto err; /* No logs to expire */
7199   }
7200 
7201   if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
7202     goto err;
7203 
7204   while (strcmp(log_file_name, log_info.log_file_name) &&
7205          !is_active(log_info.log_file_name) &&
7206          !log_in_use(log_info.log_file_name) &&
7207          current_number_of_logs > max_nr_files)
7208   {
7209     current_number_of_logs--;
7210     strmake(to_log,
7211             log_info.log_file_name,
7212             sizeof(log_info.log_file_name) - 1);
7213 
7214     if (find_next_log(&log_info, 0))
7215     {
7216       break;
7217     }
7218   }
7219 
7220   error= (to_log[0] ? purge_logs(to_log, true, false, true,
7221                                  (ulonglong *) 0, true) : 0);
7222 
7223 err:
7224   mysql_mutex_unlock(&LOCK_index);
7225   DBUG_RETURN(error);
7226 }
7227 
7228 /**
7229   Remove all logs before the given file date from disk and from the
7230   index file.
7231 
7232   @param thd		Thread pointer
7233   @param purge_time	Delete all log files before given date.
7234   @param auto_purge     True if this is an automatic purge.
7235 
7236   @note
7237     If any of the logs before the deleted one is in use,
7238     only purge logs up to this one.
7239 
7240   @retval
7241     0				ok
7242   @retval
7243     LOG_INFO_PURGE_NO_ROTATE	Binary file that can't be rotated
7244     LOG_INFO_FATAL              if any other than ENOENT error from
7245                                 mysql_file_stat() or mysql_file_delete()
7246 */
7247 
purge_logs_before_date(time_t purge_time,bool auto_purge)7248 int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time, bool auto_purge)
7249 {
7250   int error;
7251   int no_of_threads_locking_log= 0, no_of_log_files_purged= 0;
7252   bool log_is_active= false, log_is_in_use= false;
7253   char to_log[FN_REFLEN], copy_log_in_use[FN_REFLEN];
7254   LOG_INFO log_info;
7255   MY_STAT stat_area;
7256   THD *thd= current_thd;
7257 
7258   DBUG_ENTER("purge_logs_before_date");
7259 
7260   mysql_mutex_lock(&LOCK_index);
7261   to_log[0]= 0;
7262 
7263   if ((error=find_log_pos(&log_info, NullS, false/*need_lock_index=false*/)))
7264     goto err;
7265 
7266   while (!(log_is_active= is_active(log_info.log_file_name)))
7267   {
7268     if (!mysql_file_stat(m_key_file_log,
7269                          log_info.log_file_name, &stat_area, MYF(0)))
7270     {
7271       if (my_errno() == ENOENT)
7272       {
7273         /*
7274           It's not fatal if we can't stat a log file that does not exist.
7275         */
7276         set_my_errno(0);
7277       }
7278       else
7279       {
7280         /*
7281           Other than ENOENT are fatal
7282         */
7283         if (thd)
7284         {
7285           push_warning_printf(thd, Sql_condition::SL_WARNING,
7286                               ER_BINLOG_PURGE_FATAL_ERR,
7287                               "a problem with getting info on being purged %s; "
7288                               "consider examining correspondence "
7289                               "of your binlog index file "
7290                               "to the actual binlog files",
7291                               log_info.log_file_name);
7292         }
7293         else
7294         {
7295           sql_print_information("Failed to delete log file '%s'",
7296                                 log_info.log_file_name);
7297         }
7298         error= LOG_INFO_FATAL;
7299         goto err;
7300       }
7301     }
7302     /* check if the binary log file is older than the purge_time
7303        if yes check if it is in use, if not in use then add
7304        it in the list of binary log files to be purged.
7305     */
7306     else if (stat_area.st_mtime < purge_time)
7307     {
7308       if ((no_of_threads_locking_log= log_in_use(log_info.log_file_name)))
7309       {
7310         if (!auto_purge)
7311         {
7312           log_is_in_use= true;
7313           strcpy(copy_log_in_use, log_info.log_file_name);
7314         }
7315         break;
7316       }
7317       strmake(to_log,
7318               log_info.log_file_name,
7319               sizeof(log_info.log_file_name) - 1);
7320       no_of_log_files_purged++;
7321     }
7322     else
7323       break;
7324     if (find_next_log(&log_info, false/*need_lock_index=false*/))
7325       break;
7326   }
7327 
7328   if (log_is_active)
7329   {
7330     if(!auto_purge)
7331       push_warning_printf(thd, Sql_condition::SL_WARNING,
7332                           ER_WARN_PURGE_LOG_IS_ACTIVE,
7333                           ER(ER_WARN_PURGE_LOG_IS_ACTIVE),
7334                           log_info.log_file_name);
7335 
7336   }
7337 
7338   if (log_is_in_use)
7339   {
7340     int no_of_log_files_to_purge= no_of_log_files_purged+1;
7341     while (strcmp(log_file_name, log_info.log_file_name))
7342     {
7343       if (mysql_file_stat(m_key_file_log, log_info.log_file_name,
7344                           &stat_area, MYF(0)))
7345       {
7346         if (stat_area.st_mtime < purge_time)
7347           no_of_log_files_to_purge++;
7348         else
7349           break;
7350       }
7351       if (find_next_log(&log_info, false/*need_lock_index=false*/))
7352       {
7353         no_of_log_files_to_purge++;
7354         break;
7355       }
7356     }
7357 
7358     push_warning_printf(thd, Sql_condition::SL_WARNING,
7359                         ER_WARN_PURGE_LOG_IN_USE,
7360                         ER(ER_WARN_PURGE_LOG_IN_USE),
7361                         copy_log_in_use, no_of_threads_locking_log,
7362                         no_of_log_files_purged, no_of_log_files_to_purge);
7363   }
7364 
7365   error= (to_log[0] ? purge_logs(to_log, true,
7366                                  false/*need_lock_index=false*/,
7367                                  true/*need_update_threads=true*/,
7368                                  (ulonglong *) 0, auto_purge) : 0);
7369 
7370 err:
7371   mysql_mutex_unlock(&LOCK_index);
7372   DBUG_RETURN(error);
7373 }
7374 #endif /* HAVE_REPLICATION */
7375 
7376 
7377 /**
7378   Create a new log file name.
7379 
7380   @param buf		buf of at least FN_REFLEN where new name is stored
7381 
7382   @note
7383     If file name will be longer then FN_REFLEN it will be truncated
7384 */
7385 
make_log_name(char * buf,const char * log_ident)7386 void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
7387 {
7388   size_t dir_len = dirname_length(log_file_name);
7389   if (dir_len >= FN_REFLEN)
7390     dir_len=FN_REFLEN-1;
7391   my_stpnmov(buf, log_file_name, dir_len);
7392   strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
7393 }
7394 
7395 
7396 /**
7397   Check if we are writing/reading to the given log file.
7398 */
7399 
is_active(const char * log_file_name_arg)7400 bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
7401 {
7402   return !compare_log_name(log_file_name, log_file_name_arg);
7403 }
7404 
7405 
inc_prep_xids(THD * thd)7406 void MYSQL_BIN_LOG::inc_prep_xids(THD *thd)
7407 {
7408   DBUG_ENTER("MYSQL_BIN_LOG::inc_prep_xids");
7409 #ifndef NDEBUG
7410   int result= m_prep_xids.atomic_add(1);
7411   DBUG_PRINT("debug", ("m_prep_xids: %d", result + 1));
7412 #else
7413   (void) m_prep_xids.atomic_add(1);
7414 #endif
7415   thd->get_transaction()->m_flags.xid_written= true;
7416   DBUG_VOID_RETURN;
7417 }
7418 
7419 
dec_prep_xids(THD * thd)7420 void MYSQL_BIN_LOG::dec_prep_xids(THD *thd)
7421 {
7422   DBUG_ENTER("MYSQL_BIN_LOG::dec_prep_xids");
7423   int32 result= m_prep_xids.atomic_add(-1);
7424   DBUG_PRINT("debug", ("m_prep_xids: %d", result - 1));
7425   thd->get_transaction()->m_flags.xid_written= false;
7426   /* If the old value was 1, it is zero now. */
7427   if (result == 1)
7428   {
7429     mysql_mutex_lock(&LOCK_xids);
7430     mysql_cond_signal(&m_prep_xids_cond);
7431     mysql_mutex_unlock(&LOCK_xids);
7432   }
7433   DBUG_VOID_RETURN;
7434 }
7435 
write_to_file(Log_event * event)7436 int MYSQL_BIN_LOG::write_to_file(Log_event* event)
7437 {
7438   if (crypto.is_enabled())
7439     event->event_encrypter.enable_encryption(&crypto);
7440   return event->write(&log_file);
7441 }
7442 
7443 /*
7444   Wrappers around new_file_impl to avoid using argument
7445   to control locking. The argument 1) less readable 2) breaks
7446   incapsulation 3) allows external access to the class without
7447   a lock (which is not possible with private new_file_without_locking
7448   method).
7449 
7450   @retval
7451     nonzero - error
7452 
7453 */
7454 
new_file(Format_description_log_event * extra_description_event)7455 int MYSQL_BIN_LOG::new_file(Format_description_log_event *extra_description_event)
7456 {
7457   return new_file_impl(true/*need_lock_log=true*/, extra_description_event);
7458 }
7459 
7460 /*
7461   @retval
7462     nonzero - error
7463 */
new_file_without_locking(Format_description_log_event * extra_description_event)7464 int MYSQL_BIN_LOG::new_file_without_locking(Format_description_log_event *extra_description_event)
7465 {
7466   return new_file_impl(false/*need_lock_log=false*/, extra_description_event);
7467 }
7468 
7469 
7470 /**
7471   Start writing to a new log file or reopen the old file.
7472 
7473   @param need_lock_log If true, this function acquires LOCK_log;
7474   otherwise the caller should already have acquired it.
7475 
7476   @retval 0 success
7477   @retval nonzero - error
7478 
7479   @note The new file name is stored last in the index file
7480 */
new_file_impl(bool need_lock_log,Format_description_log_event * extra_description_event)7481 int MYSQL_BIN_LOG::new_file_impl(bool need_lock_log, Format_description_log_event *extra_description_event)
7482 {
7483   int error= 0;
7484   bool close_on_error= false;
7485   char new_name[FN_REFLEN], *new_name_ptr= NULL, *old_name, *file_to_open;
7486 
7487   DBUG_ENTER("MYSQL_BIN_LOG::new_file_impl");
7488   if (!is_open())
7489   {
7490     DBUG_PRINT("info",("log is closed"));
7491     DBUG_RETURN(error);
7492   }
7493 
7494   if (need_lock_log)
7495     mysql_mutex_lock(&LOCK_log);
7496   else
7497     mysql_mutex_assert_owner(&LOCK_log);
7498   DBUG_EXECUTE_IF("semi_sync_3-way_deadlock",
7499                   DEBUG_SYNC(current_thd, "before_rotate_binlog"););
7500   mysql_mutex_lock(&LOCK_xids);
7501   /*
7502     We need to ensure that the number of prepared XIDs are 0.
7503 
7504     If m_prep_xids is not zero:
7505     - We wait for storage engine commit, hence decrease m_prep_xids
7506     - We keep the LOCK_log to block new transactions from being
7507       written to the binary log.
7508    */
7509   while (get_prep_xids() > 0)
7510   {
7511     DEBUG_SYNC(current_thd, "before_rotate_binlog_file");
7512     mysql_cond_wait(&m_prep_xids_cond, &LOCK_xids);
7513   }
7514   mysql_mutex_unlock(&LOCK_xids);
7515 
7516   mysql_mutex_lock(&LOCK_index);
7517 
7518   mysql_mutex_assert_owner(&LOCK_log);
7519   mysql_mutex_assert_owner(&LOCK_index);
7520 
7521 
7522   if (DBUG_EVALUATE_IF("expire_logs_always", 0, 1)
7523       && (error= ha_flush_logs(NULL)))
7524     goto end;
7525 
7526   if (!is_relay_log)
7527   {
7528     /* Save set of GTIDs of the last binlog into table on binlog rotation */
7529     if ((error= gtid_state->save_gtids_of_last_binlog_into_table(true)))
7530     {
7531       close_on_error= true;
7532       goto end;
7533     }
7534   }
7535 
7536   /*
7537     If user hasn't specified an extension, generate a new log name
7538     We have to do this here and not in open as we want to store the
7539     new file name in the current binary log file.
7540   */
7541   new_name_ptr= new_name;
7542   if ((error= generate_new_name(new_name, name)))
7543   {
7544     // Use the old name if generation of new name fails.
7545     strcpy(new_name, name);
7546     close_on_error= TRUE;
7547     goto end;
7548   }
7549   /*
7550     Make sure that the log_file is initialized before writing
7551     Rotate_log_event into it.
7552   */
7553   if (log_file.alloced_buffer)
7554   {
7555     /*
7556       We log the whole file name for log file as the user may decide
7557       to change base names at some point.
7558     */
7559     Rotate_log_event r(new_name+dirname_length(new_name), 0, LOG_EVENT_OFFSET,
7560                        is_relay_log ? Rotate_log_event::RELAY_LOG : 0);
7561     /*
7562       The current relay-log's closing Rotate event must have checksum
7563       value computed with an algorithm of the last relay-logged FD event.
7564     */
7565     if (is_relay_log)
7566       (r.common_footer)->checksum_alg= relay_log_checksum_alg;
7567     assert(!is_relay_log || relay_log_checksum_alg !=
7568            binary_log::BINLOG_CHECKSUM_ALG_UNDEF);
7569     if(DBUG_EVALUATE_IF("fault_injection_new_file_rotate_event",
7570                         (error=1), FALSE) ||
7571        (error= write_to_file(&r)))
7572     {
7573       char errbuf[MYSYS_STRERROR_SIZE];
7574       DBUG_EXECUTE_IF("fault_injection_new_file_rotate_event", errno=2;);
7575       close_on_error= true;
7576       my_printf_error(ER_ERROR_ON_WRITE, ER(ER_CANT_OPEN_FILE),
7577                       MYF(ME_FATALERROR), name,
7578                       errno, my_strerror(errbuf, sizeof(errbuf), errno));
7579       goto end;
7580     }
7581     bytes_written += r.common_header->data_written;
7582   }
7583 
7584   if ((error= flush_io_cache(&log_file)))
7585   {
7586     close_on_error= true;
7587     goto end;
7588   }
7589 
7590   DEBUG_SYNC(current_thd, "after_rotate_event_appended");
7591 
7592   old_name=name;
7593   name=0;				// Don't free name
7594   close(LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX,
7595         false/*need_lock_log=false*/,
7596         false/*need_lock_index=false*/);
7597 
7598   if (checksum_alg_reset != binary_log::BINLOG_CHECKSUM_ALG_UNDEF)
7599   {
7600     assert(!is_relay_log);
7601     assert(binlog_checksum_options != checksum_alg_reset);
7602     binlog_checksum_options= checksum_alg_reset;
7603   }
7604   /*
7605      Note that at this point, log_state != LOG_CLOSED (important for is_open()).
7606   */
7607 
7608   DEBUG_SYNC(current_thd, "before_rotate_binlog_file");
7609   /*
7610      new_file() is only used for rotation (in FLUSH LOGS or because size >
7611      max_binlog_size or max_relay_log_size).
7612      If this is a binary log, the Format_description_log_event at the beginning of
7613      the new file should have created=0 (to distinguish with the
7614      Format_description_log_event written at server startup, which should
7615      trigger temp tables deletion on slaves.
7616   */
7617 
7618   /* reopen index binlog file, BUG#34582 */
7619   file_to_open= index_file_name;
7620   error= open_index_file(index_file_name, 0, false/*need_lock_index=false*/);
7621   if (!error)
7622   {
7623     /* reopen the binary log file. */
7624     file_to_open= new_name_ptr;
7625     error= open_binlog(old_name, new_name_ptr,
7626                        max_size, true/*null_created_arg=true*/,
7627                        false/*need_lock_index=false*/,
7628                        true/*need_sid_lock=true*/,
7629                        extra_description_event);
7630   }
7631 
7632   /* handle reopening errors */
7633   if (error)
7634   {
7635     char errbuf[MYSYS_STRERROR_SIZE];
7636     my_printf_error(ER_CANT_OPEN_FILE, ER(ER_CANT_OPEN_FILE),
7637                     MYF(ME_FATALERROR), file_to_open,
7638                     error, my_strerror(errbuf, sizeof(errbuf), error));
7639     close_on_error= true;
7640   }
7641   my_free(old_name);
7642 
7643 end:
7644 
7645   if (error && close_on_error /* rotate, flush or reopen failed */)
7646   {
7647     /*
7648       Close whatever was left opened.
7649 
7650       We are keeping the behavior as it exists today, ie,
7651       we disable logging and move on (see: BUG#51014).
7652 
7653       TODO: as part of WL#1790 consider other approaches:
7654        - kill mysql (safety);
7655        - try multiple locations for opening a log file;
7656        - switch server to protected/readonly mode
7657        - ...
7658     */
7659     if (binlog_error_action == ABORT_SERVER)
7660     {
7661       exec_binlog_error_action_abort("Either disk is full or file system is"
7662                                      " read only while rotating the binlog."
7663                                      " Aborting the server.");
7664     }
7665     else
7666       sql_print_error("Could not open %s for logging (error %d). "
7667                       "Turning logging off for the whole duration "
7668                       "of the MySQL server process. To turn it on "
7669                       "again: fix the cause, shutdown the MySQL "
7670                       "server and restart it.",
7671                       new_name_ptr, errno);
7672     close(LOG_CLOSE_INDEX, false /*need_lock_log=false*/,
7673           false/*need_lock_index=false*/);
7674   }
7675 
7676   mysql_mutex_unlock(&LOCK_index);
7677   if (need_lock_log)
7678     mysql_mutex_unlock(&LOCK_log);
7679   DEBUG_SYNC(current_thd, "after_disable_binlog");
7680   DBUG_RETURN(error);
7681 }
7682 
7683 
7684 #ifdef HAVE_REPLICATION
7685 /**
7686   Called after an event has been written to the relay log by the IO
7687   thread.  This flushes and possibly syncs the file (according to the
7688   sync options), rotates the file if it has grown over the limit, and
7689   finally calls signal_update().
7690 
7691   @note The caller must hold LOCK_log before invoking this function.
7692 
7693   @param mi Master_info for the IO thread.
7694   @param need_data_lock If true, mi->data_lock will be acquired if a
7695   rotation is needed.  Otherwise, mi->data_lock must be held by the
7696   caller.
7697 
7698   @retval false success
7699   @retval true error
7700 */
after_append_to_relay_log(Master_info * mi)7701 bool MYSQL_BIN_LOG::after_append_to_relay_log(Master_info *mi)
7702 {
7703   DBUG_ENTER("MYSQL_BIN_LOG::after_append_to_relay_log");
7704   DBUG_PRINT("info",("max_size: %lu",max_size));
7705 
7706   // Check pre-conditions
7707   mysql_mutex_assert_owner(&LOCK_log);
7708   mysql_mutex_assert_owner(&mi->data_lock);
7709   assert(is_relay_log);
7710   assert(current_thd->system_thread == SYSTEM_THREAD_SLAVE_IO);
7711 
7712   /*
7713     We allow the relay log rotation by relay log size
7714     only if the trx parser is not inside a transaction.
7715   */
7716   bool can_rotate= mi->transaction_parser.is_not_inside_transaction();
7717 
7718 #ifndef NDEBUG
7719   if ((uint) my_b_append_tell(&log_file) >
7720       DBUG_EVALUATE_IF("rotate_slave_debug_group", 500, max_size) &&
7721       !can_rotate)
7722   {
7723     DBUG_PRINT("info",("Postponing the rotation by size waiting for "
7724                        "the end of the current transaction."));
7725   }
7726 #endif
7727 
7728   // Flush and sync
7729   bool error= false;
7730   if (flush_and_sync(0) == 0 && can_rotate)
7731   {
7732     /*
7733       If the last event of the transaction has been flushed, we can add
7734       the GTID (if it is not empty) to the logged set, or else it will
7735       not be available in the Previous GTIDs of the next relay log file
7736       if we are going to rotate the relay log.
7737     */
7738     Gtid *last_gtid_queued= mi->get_last_gtid_queued();
7739     if (!last_gtid_queued->is_empty())
7740     {
7741       global_sid_lock->rdlock();
7742       mi->rli->add_logged_gtid(last_gtid_queued->sidno,
7743                                last_gtid_queued->gno);
7744       global_sid_lock->unlock();
7745       mi->clear_last_gtid_queued();
7746     }
7747 
7748     /*
7749       If relay log is too big, rotate. But only if not in the middle of a
7750       transaction when GTIDs are enabled.
7751       We now try to mimic the following master binlog behavior: "A transaction
7752       is written in one chunk to the binary log, so it is never split between
7753       several binary logs. Therefore, if you have big transactions, you might
7754       see binary log files larger than max_binlog_size."
7755     */
7756     if ((uint) my_b_append_tell(&log_file) >
7757         DBUG_EVALUATE_IF("rotate_slave_debug_group", 500, max_size))
7758     {
7759       error= new_file_without_locking(mi->get_mi_description_event());
7760     }
7761   }
7762 
7763   signal_update();
7764 
7765   DBUG_RETURN(error);
7766 }
7767 
7768 
append_event(Log_event * ev,Master_info * mi)7769 bool MYSQL_BIN_LOG::append_event(Log_event* ev, Master_info *mi)
7770 {
7771   DBUG_ENTER("MYSQL_BIN_LOG::append");
7772 
7773   // check preconditions
7774   assert(log_file.type == SEQ_READ_APPEND);
7775   assert(is_relay_log);
7776 
7777   // acquire locks
7778   mysql_mutex_lock(&LOCK_log);
7779 
7780   // write data
7781   bool error = false;
7782   if (write_to_file(ev) == 0)
7783   {
7784     bytes_written+= ev->common_header->data_written;
7785     error= after_append_to_relay_log(mi);
7786   }
7787   else
7788     error= true;
7789 
7790   mysql_mutex_unlock(&LOCK_log);
7791   DBUG_RETURN(error);
7792 }
7793 
append_buffer(uchar * buf,size_t len,Master_info * mi)7794 bool MYSQL_BIN_LOG::append_buffer(uchar* buf, size_t len, Master_info *mi)
7795 {
7796   DBUG_ENTER("MYSQL_BIN_LOG::append_buffer");
7797 
7798   // check preconditions
7799   assert(log_file.type == SEQ_READ_APPEND);
7800   assert(is_relay_log);
7801   mysql_mutex_assert_owner(&LOCK_log);
7802 
7803   // write data
7804   uchar *ebuf= NULL;
7805 
7806   if (crypto.is_enabled())
7807   {
7808     ebuf= reinterpret_cast<uchar*>(my_malloc(PSI_NOT_INSTRUMENTED, len, MYF(MY_WME)));
7809     if (!ebuf ||
7810         encrypt_event(my_b_append_tell(&log_file), crypto, buf, ebuf, len))
7811     {
7812       if (ebuf != NULL)
7813         my_free(ebuf);
7814       DBUG_RETURN(true);
7815     }
7816 
7817     buf= ebuf;
7818   }
7819 
7820   if (my_b_append(&log_file,(uchar*) buf,len))
7821   {
7822     if (ebuf != NULL)
7823       my_free(ebuf);
7824     DBUG_RETURN(true);
7825   }
7826 
7827   if (ebuf != NULL)
7828     my_free(ebuf);
7829 
7830   bytes_written += len;
7831   DBUG_RETURN(after_append_to_relay_log(mi));
7832 }
7833 #endif // ifdef HAVE_REPLICATION
7834 
flush_and_sync(const bool force)7835 bool MYSQL_BIN_LOG::flush_and_sync(const bool force)
7836 {
7837   mysql_mutex_assert_owner(&LOCK_log);
7838 
7839   if (flush_io_cache(&log_file))
7840     return 1;
7841 
7842   std::pair<bool, bool> result= sync_binlog_file(force);
7843 
7844   return result.first;
7845 }
7846 
start_union_events(THD * thd,query_id_t query_id_param)7847 void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param)
7848 {
7849   assert(!thd->binlog_evt_union.do_union);
7850   thd->binlog_evt_union.do_union= TRUE;
7851   thd->binlog_evt_union.unioned_events= FALSE;
7852   thd->binlog_evt_union.unioned_events_trans= FALSE;
7853   thd->binlog_evt_union.first_query_id= query_id_param;
7854 }
7855 
stop_union_events(THD * thd)7856 void MYSQL_BIN_LOG::stop_union_events(THD *thd)
7857 {
7858   assert(thd->binlog_evt_union.do_union);
7859   thd->binlog_evt_union.do_union= FALSE;
7860 }
7861 
is_query_in_union(THD * thd,query_id_t query_id_param)7862 bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
7863 {
7864   return (thd->binlog_evt_union.do_union &&
7865           query_id_param >= thd->binlog_evt_union.first_query_id);
7866 }
7867 
7868 /*
7869   Updates thd's position-of-next-event variables
7870   after a *real* write a file.
7871  */
update_thd_next_event_pos(THD * thd)7872 void MYSQL_BIN_LOG::update_thd_next_event_pos(THD* thd)
7873 {
7874   if (likely(thd != NULL))
7875   {
7876     thd->set_next_event_pos(log_file_name,
7877                             my_b_tell(&log_file));
7878   }
7879 }
7880 
7881 /*
7882   Moves the last bunch of rows from the pending Rows event to a cache (either
7883   transactional cache if is_transaction is @c true, or the non-transactional
7884   cache otherwise. Sets a new pending event.
7885 
7886   @param thd               a pointer to the user thread.
7887   @param evt               a pointer to the row event.
7888   @param is_transactional  @c true indicates a transactional cache,
7889                            otherwise @c false a non-transactional.
7890 */
7891 int
flush_and_set_pending_rows_event(THD * thd,Rows_log_event * event,bool is_transactional)7892 MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
7893                                                 Rows_log_event* event,
7894                                                 bool is_transactional)
7895 {
7896   DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)");
7897   assert(mysql_bin_log.is_open());
7898   DBUG_PRINT("enter", ("event: 0x%lx", (long) event));
7899 
7900   int error= 0;
7901   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
7902 
7903   assert(cache_mngr);
7904 
7905   binlog_cache_data *cache_data=
7906     cache_mngr->get_binlog_cache_data(is_transactional);
7907 
7908   DBUG_PRINT("info", ("cache_mngr->pending(): 0x%lx", (long) cache_data->pending()));
7909 
7910   if (Rows_log_event* pending= cache_data->pending())
7911   {
7912     /*
7913       Write pending event to the cache.
7914     */
7915     if (cache_data->write_event(thd, pending))
7916     {
7917       set_write_error(thd, is_transactional);
7918       if (check_write_error(thd) && cache_data &&
7919           stmt_cannot_safely_rollback(thd))
7920         cache_data->set_incident();
7921       delete pending;
7922       cache_data->set_pending(NULL);
7923       DBUG_RETURN(1);
7924     }
7925 
7926     delete pending;
7927   }
7928 
7929   cache_data->set_pending(event);
7930 
7931   DBUG_RETURN(error);
7932 }
7933 
7934 /**
7935   Write an event to the binary log.
7936 */
7937 
write_event(Log_event * event_info)7938 bool MYSQL_BIN_LOG::write_event(Log_event *event_info)
7939 {
7940   THD *thd= event_info->thd;
7941   bool error= 1;
7942   DBUG_ENTER("MYSQL_BIN_LOG::write_event(Log_event *)");
7943 
7944   if (thd->binlog_evt_union.do_union)
7945   {
7946     /*
7947       In Stored function; Remember that function call caused an update.
7948       We will log the function call to the binary log on function exit
7949     */
7950     thd->binlog_evt_union.unioned_events= TRUE;
7951     thd->binlog_evt_union.unioned_events_trans |=
7952       event_info->is_using_trans_cache();
7953     DBUG_RETURN(0);
7954   }
7955 
7956   /*
7957     We only end the statement if we are in a top-level statement.  If
7958     we are inside a stored function, we do not end the statement since
7959     this will close all tables on the slave. But there can be a special case
7960     where we are inside a stored function/trigger and a SAVEPOINT is being
7961     set in side the stored function/trigger. This SAVEPOINT execution will
7962     force the pending event to be flushed without an STMT_END_F flag. This
7963     will result in a case where following DMLs will be considered as part of
7964     same statement and result in data loss on slave. Hence in this case we
7965     force the end_stmt to be true.
7966   */
7967   bool const end_stmt= (thd->in_sub_stmt && thd->lex->sql_command ==
7968                         SQLCOM_SAVEPOINT)? true:
7969     (thd->locked_tables_mode && thd->lex->requires_prelocking());
7970   if (thd->binlog_flush_pending_rows_event(end_stmt,
7971                                            event_info->is_using_trans_cache()))
7972     DBUG_RETURN(error);
7973 
7974   /*
7975      In most cases this is only called if 'is_open()' is true; in fact this is
7976      mostly called if is_open() *was* true a few instructions before, but it
7977      could have changed since.
7978   */
7979   if (likely(is_open()))
7980   {
7981 #ifdef HAVE_REPLICATION
7982     /*
7983       In the future we need to add to the following if tests like
7984       "do the involved tables match (to be implemented)
7985       binlog_[wild_]{do|ignore}_table?" (WL#1049)"
7986     */
7987     const char *local_db= event_info->get_db();
7988     if ((thd && !(thd->variables.option_bits & OPTION_BIN_LOG)) ||
7989 	(thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT &&
7990          thd->lex->sql_command != SQLCOM_SAVEPOINT &&
7991          (!event_info->is_no_filter_event() &&
7992           !binlog_filter->db_ok(local_db))))
7993       DBUG_RETURN(0);
7994 #endif /* HAVE_REPLICATION */
7995 
7996     assert(event_info->is_using_trans_cache() || event_info->is_using_stmt_cache());
7997 
7998     if (binlog_start_trans_and_stmt(thd, event_info))
7999       DBUG_RETURN(error);
8000 
8001     bool is_trans_cache= event_info->is_using_trans_cache();
8002     binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
8003     binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(is_trans_cache);
8004 
8005     DBUG_PRINT("info",("event type: %d",event_info->get_type_code()));
8006 
8007     /*
8008        No check for auto events flag here - this write method should
8009        never be called if auto-events are enabled.
8010 
8011        Write first log events which describe the 'run environment'
8012        of the SQL command. If row-based binlogging, Insert_id, Rand
8013        and other kind of "setting context" events are not needed.
8014     */
8015     if (thd)
8016     {
8017       if (!thd->is_current_stmt_binlog_format_row())
8018       {
8019         if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
8020         {
8021           Intvar_log_event e(thd,(uchar) binary_log::Intvar_event::LAST_INSERT_ID_EVENT,
8022                              thd->first_successful_insert_id_in_prev_stmt_for_binlog,
8023                              event_info->event_cache_type, event_info->event_logging_type);
8024           if (cache_data->write_event(thd, &e))
8025             goto err;
8026           if (event_info->is_using_immediate_logging())
8027             thd->binlog_bytes_written+= e.header()->data_written;
8028         }
8029         if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
8030         {
8031           DBUG_PRINT("info",("number of auto_inc intervals: %u",
8032                              thd->auto_inc_intervals_in_cur_stmt_for_binlog.
8033                              nb_elements()));
8034           Intvar_log_event e(thd, (uchar) binary_log::Intvar_event::INSERT_ID_EVENT,
8035                              thd->auto_inc_intervals_in_cur_stmt_for_binlog.
8036                              minimum(), event_info->event_cache_type,
8037                              event_info->event_logging_type);
8038           if (cache_data->write_event(thd, &e))
8039             goto err;
8040           if (event_info->is_using_immediate_logging())
8041             thd->binlog_bytes_written+= e.header()->data_written;
8042         }
8043         if (thd->rand_used)
8044         {
8045           Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2,
8046                            event_info->event_cache_type,
8047                            event_info->event_logging_type);
8048           if (cache_data->write_event(thd, &e))
8049             goto err;
8050           if (event_info->is_using_immediate_logging())
8051             thd->binlog_bytes_written+= e.header()->data_written;
8052         }
8053         if (!thd->user_var_events.empty())
8054         {
8055           for (size_t i= 0; i < thd->user_var_events.size(); i++)
8056           {
8057             BINLOG_USER_VAR_EVENT *user_var_event= thd->user_var_events[i];
8058 
8059             /* setting flags for user var log event */
8060             uchar flags= User_var_log_event::UNDEF_F;
8061             if (user_var_event->unsigned_flag)
8062               flags|= User_var_log_event::UNSIGNED_F;
8063 
8064             User_var_log_event e(thd,
8065                                  user_var_event->user_var_event->entry_name.ptr(),
8066                                  user_var_event->user_var_event->entry_name.length(),
8067                                  user_var_event->value,
8068                                  user_var_event->length,
8069                                  user_var_event->type,
8070                                  user_var_event->charset_number, flags,
8071                                  event_info->event_cache_type,
8072                                  event_info->event_logging_type);
8073             if (cache_data->write_event(thd, &e))
8074               goto err;
8075             if (event_info->is_using_immediate_logging())
8076               thd->binlog_bytes_written+= e.header()->data_written;
8077           }
8078         }
8079       }
8080     }
8081 
8082     /*
8083       Write the event.
8084     */
8085     if (cache_data->write_event(thd, event_info))
8086       goto err;
8087 
8088     if (DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
8089       goto err;
8090     if (event_info->is_using_immediate_logging())
8091       thd->binlog_bytes_written+= event_info->common_header->data_written;
8092 
8093     /*
8094       After writing the event, if the trx-cache was used and any unsafe
8095       change was written into it, the cache is marked as cannot safely
8096       roll back.
8097     */
8098     if (is_trans_cache && stmt_cannot_safely_rollback(thd))
8099       cache_mngr->trx_cache.set_cannot_rollback();
8100 
8101     error= 0;
8102 
8103 err:
8104     if (error)
8105     {
8106       set_write_error(thd, is_trans_cache);
8107       if (check_write_error(thd) && cache_data &&
8108           stmt_cannot_safely_rollback(thd))
8109         cache_data->set_incident();
8110     }
8111   }
8112 
8113   DBUG_RETURN(error);
8114 }
8115 
8116 /**
8117   The method executes rotation when LOCK_log is already acquired
8118   by the caller.
8119 
8120   @param force_rotate  caller can request the log rotation
8121   @param check_purge   is set to true if rotation took place
8122 
8123   @note
8124     If rotation fails, for instance the server was unable
8125     to create a new log file, we still try to write an
8126     incident event to the current log.
8127 
8128   @note The caller must hold LOCK_log when invoking this function.
8129 
8130   @retval
8131     nonzero - error in rotating routine.
8132 */
rotate(bool force_rotate,bool * check_purge)8133 int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge)
8134 {
8135   int error= 0;
8136   DBUG_ENTER("MYSQL_BIN_LOG::rotate");
8137 
8138   assert(!is_relay_log);
8139   mysql_mutex_assert_owner(&LOCK_log);
8140 
8141   DEBUG_SYNC(current_thd,"stop_binlog_rotation_after_acquiring_lock_log");
8142 
8143   *check_purge= false;
8144 
8145   if (DBUG_EVALUATE_IF("force_rotate", 1, 0) || force_rotate ||
8146       (my_b_tell(&log_file) >= (my_off_t) max_size))
8147   {
8148     error= new_file_without_locking(NULL);
8149     *check_purge= true;
8150     publish_coordinates_for_global_status();
8151   }
8152   DBUG_RETURN(error);
8153 }
8154 
8155 /**
8156   The method executes logs purging routine.
8157 
8158   @retval
8159     nonzero - error in rotating routine.
8160 */
purge()8161 void MYSQL_BIN_LOG::purge()
8162 {
8163 #ifdef HAVE_REPLICATION
8164   if (expire_logs_days)
8165   {
8166     DEBUG_SYNC(current_thd, "at_purge_logs_before_date");
8167     time_t purge_time= my_time(0) - expire_logs_days*24*60*60;
8168     DBUG_EXECUTE_IF("expire_logs_always",
8169                     { purge_time= my_time(0);});
8170     if (purge_time >= 0)
8171     {
8172       /*
8173         Flush logs for storage engines, so that the last transaction
8174         is fsynced inside storage engines.
8175       */
8176       ha_flush_logs(NULL);
8177       purge_logs_before_date(purge_time, true);
8178     }
8179   }
8180   if (max_binlog_files)
8181   {
8182     purge_logs_maximum_number(max_binlog_files);
8183   }
8184   if (binlog_space_limit)
8185   {
8186     purge_logs_by_size(true);
8187   }
8188 #endif
8189 }
8190 
8191 /**
8192   Execute a FLUSH LOGS statement.
8193 
8194   The method is a shortcut of @c rotate() and @c purge().
8195   LOCK_log is acquired prior to rotate and is released after it.
8196 
8197   @param force_rotate  caller can request the log rotation
8198 
8199   @retval
8200     nonzero - error in rotating routine.
8201 */
rotate_and_purge(THD * thd,bool force_rotate)8202 int MYSQL_BIN_LOG::rotate_and_purge(THD* thd, bool force_rotate)
8203 {
8204   int error= 0;
8205   DBUG_ENTER("MYSQL_BIN_LOG::rotate_and_purge");
8206   bool check_purge= false;
8207 
8208   /*
8209     FLUSH BINARY LOGS command should ignore 'read-only' and 'super_read_only'
8210     options so that it can update 'mysql.gtid_executed' replication repository
8211     table.
8212   */
8213   thd->set_skip_readonly_check();
8214   /*
8215     Wait for handlerton to insert any pending information into the binlog.
8216     For e.g. ha_ndbcluster which updates the binlog asynchronously this is
8217     needed so that the user see its own commands in the binlog.
8218   */
8219   ha_binlog_wait(thd);
8220 
8221   assert(!is_relay_log);
8222   mysql_mutex_lock(&LOCK_log);
8223   error= rotate(force_rotate, &check_purge);
8224   /*
8225     NOTE: Run purge_logs wo/ holding LOCK_log because it does not need
8226           the mutex. Otherwise causes various deadlocks.
8227   */
8228   mysql_mutex_unlock(&LOCK_log);
8229 
8230   if (!error && check_purge)
8231     purge();
8232 
8233   DBUG_RETURN(error);
8234 }
8235 
next_file_id()8236 uint MYSQL_BIN_LOG::next_file_id()
8237 {
8238   uint res;
8239   mysql_mutex_lock(&LOCK_log);
8240   res = file_id++;
8241   mysql_mutex_unlock(&LOCK_log);
8242   return res;
8243 }
8244 
8245 
get_gtid_executed(Sid_map * sid_map,Gtid_set * gtid_set)8246 int MYSQL_BIN_LOG::get_gtid_executed(Sid_map *sid_map, Gtid_set *gtid_set)
8247 {
8248   DBUG_ENTER("MYSQL_BIN_LOG::get_gtid_executed");
8249   int error= 0;
8250 
8251   mysql_mutex_lock(&mysql_bin_log.LOCK_commit);
8252   global_sid_lock->wrlock();
8253 
8254   enum_return_status return_status= global_sid_map->copy(sid_map);
8255   if (return_status != RETURN_STATUS_OK)
8256   {
8257     error= 1;
8258     goto end;
8259   }
8260 
8261   return_status= gtid_set->add_gtid_set(gtid_state->get_executed_gtids());
8262   if (return_status != RETURN_STATUS_OK)
8263     error= 1;
8264 
8265 end:
8266   global_sid_lock->unlock();
8267   mysql_mutex_unlock(&mysql_bin_log.LOCK_commit);
8268 
8269   DBUG_RETURN(error);
8270 }
8271 
8272 
8273 /**
8274   Auxiliary function to read a page from the cache and set the given
8275   buffer pointer to point to the beginning of the page and the given
8276   length pointer to point to the end of it.
8277 
8278   @param cache IO_CACHE to read from
8279   @param[OUT] buf_p Will be set to point to the beginning of the page.
8280   @param[OUT] buf_len_p Will be set to the length of the buffer.
8281 
8282   @retval false Success
8283   @retval true Error reading from the cache.
8284 */
read_cache_page(IO_CACHE * cache,uchar ** buf_p,uint32 * buf_len_p)8285 static bool read_cache_page(IO_CACHE *cache, uchar **buf_p, uint32 *buf_len_p)
8286 {
8287   assert(*buf_len_p == 0);
8288   cache->read_pos= cache->read_end;
8289   *buf_len_p= my_b_fill(cache);
8290   *buf_p= cache->read_pos;
8291   return cache->error ? true : false;
8292 }
8293 
8294 
8295 /**
8296   Write the contents of the given IO_CACHE to the binary log.
8297 
8298   The cache will be reset as a READ_CACHE to be able to read the
8299   contents from it.
8300 
8301   The data will be post-processed: see class Binlog_event_writer for
8302   details.
8303 
8304   @param cache Events will be read from this IO_CACHE.
8305   @param writer Events will be written to this Binlog_event_writer.
8306 
8307   @retval true IO error.
8308   @retval false Success.
8309 
8310   @see MYSQL_BIN_LOG::write_cache
8311 */
do_write_cache(IO_CACHE * cache,Binlog_event_writer * writer)8312 bool MYSQL_BIN_LOG::do_write_cache(IO_CACHE *cache, Binlog_event_writer *writer)
8313 {
8314   DBUG_ENTER("MYSQL_BIN_LOG::do_write_cache");
8315 
8316   DBUG_EXECUTE_IF("simulate_do_write_cache_failure",
8317                   {
8318                     /*
8319                        see binlog_cache_data::write_event() that reacts on
8320                        @c simulate_disk_full_at_flush_pending.
8321                     */
8322                     DBUG_SET("-d,simulate_do_write_cache_failure");
8323                     DBUG_RETURN(true);
8324                   });
8325 
8326 #ifndef NDEBUG
8327   uint64 expected_total_len= my_b_tell(cache);
8328 #endif
8329 
8330   DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
8331                   {
8332                     DBUG_SET("+d,simulate_file_write_error");
8333                   });
8334 
8335   int reinit_err= reinit_io_cache(cache, READ_CACHE, 0, 0, 0);
8336   DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
8337                   {
8338                     DBUG_SET("-d,simulate_file_write_error");
8339                   });
8340   if (reinit_err)
8341     DBUG_RETURN(true);
8342 
8343   uchar *buf= cache->read_pos;
8344   uint32 buf_len= my_b_bytes_in_cache(cache);
8345   uint32 event_len= 0;
8346   uchar header[LOG_EVENT_HEADER_LEN];
8347   uint32 header_len= 0;
8348 
8349   /*
8350     Each iteration of this loop processes all or a part of
8351     1) an event header or 2) an event body from the IO_CACHE.
8352   */
8353   while (true)
8354   {
8355     /**
8356       Nothing in cache: try to refill, and if cache was ended here,
8357       return success.  This code is needed even on the first iteration
8358       of the loop, because reinit_io_cache may or may not fill the
8359       first page.
8360     */
8361     if (buf_len == 0)
8362     {
8363       if (read_cache_page(cache, &buf, &buf_len))
8364       {
8365         /**
8366           @todo: this can happen in case of disk corruption in the
8367           IO_CACHE.  We may have written a half transaction (even half
8368           event) to the binlog.  We should rollback the transaction
8369           and truncate the binlog.  /Sven
8370         */
8371         assert(0);
8372       }
8373       if (buf_len == 0)
8374       {
8375         /**
8376           @todo: this can happen in case of disk corruption in the
8377           IO_CACHE.  We may have written a half transaction (even half
8378           event) to the binlog.  We should rollback the transaction
8379           and truncate the binlog.  /Sven
8380         */
8381         assert(my_b_tell(cache) == expected_total_len);
8382         /* Arrive the end of the cache */
8383         DBUG_RETURN(false);
8384       }
8385     }
8386 
8387     /* Write event header into binlog */
8388     if (event_len == 0)
8389     {
8390       /* data in the buf may be smaller than header size.*/
8391       uint32 header_incr =
8392         std::min<uint32>(LOG_EVENT_HEADER_LEN - header_len, buf_len);
8393 
8394       memcpy(header + header_len, buf, header_incr);
8395       header_len += header_incr;
8396       buf += header_incr;
8397       buf_len -= header_incr;
8398 
8399       if (header_len == LOG_EVENT_HEADER_LEN)
8400       {
8401         // Flush event header.
8402         uchar *header_p= header;
8403         if (writer->write_event_part(&header_p, &header_len, &event_len))
8404           DBUG_RETURN(true);
8405         assert(header_len == 0);
8406       }
8407     }
8408     else
8409     {
8410       /* Write all or part of the event body to binlog */
8411       if (writer->write_event_part(&buf, &buf_len, &event_len))
8412         DBUG_RETURN(true);
8413     }
8414   }
8415 }
8416 
8417 /**
8418   Writes an incident event to stmt_cache.
8419 
8420   @param ev Incident event to be written
8421   @param thd Thread variable
8422   @param need_lock_log If true, will acquire LOCK_log; otherwise the
8423   caller should already have acquired LOCK_log.
8424   @param err_msg Error message written to log file for the incident.
8425   @do_flush_and_sync If true, will call flush_and_sync(), rotate() and
8426   purge().
8427 
8428   @retval false error
8429   @retval true success
8430 */
write_incident(Incident_log_event * ev,THD * thd,bool need_lock_log,const char * err_msg,bool do_flush_and_sync)8431 bool MYSQL_BIN_LOG::write_incident(Incident_log_event *ev, THD *thd,
8432                                    bool need_lock_log, const char* err_msg,
8433                                    bool do_flush_and_sync)
8434 {
8435   uint error= 0;
8436   DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
8437   assert(err_msg);
8438 
8439   if (!is_open())
8440     DBUG_RETURN(error);
8441 
8442   // @todo make this work with the group log. /sven
8443   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
8444 
8445   /*
8446     thd->cache_mngr may be uninitialized when first transaction resulted in an
8447     incident. If there is no cache manager exists for the session, then we
8448     create one, so that a GTID is generated and is written prior to flushing
8449     the stmt_cache.
8450   */
8451   if (cache_mngr == NULL)
8452   {
8453     if (thd->binlog_setup_trx_data() ||
8454         DBUG_EVALUATE_IF("simulate_cache_creation_failure", 1, 0))
8455     {
8456       enum_gtid_mode gtid_mode= get_gtid_mode(GTID_MODE_LOCK_NONE);
8457       if (gtid_mode == GTID_MODE_ON || gtid_mode == GTID_MODE_ON_PERMISSIVE)
8458       {
8459         const char *mode= gtid_mode == GTID_MODE_ON ? "ON" : "ON_PERMISSIVE";
8460         std::ostringstream message;
8461 
8462         message << "Could not create IO cache while writing an incident event "
8463                    "to the binary log for query: '"<< thd->query().str <<
8464                    "'. Since GTID_MODE= " << mode <<", server is unable "
8465                    "to proceed with logging.";
8466         handle_binlog_flush_or_sync_error(thd, true, message.str().c_str());
8467         DBUG_RETURN(true);
8468       }
8469     }
8470     else
8471       cache_mngr= thd_get_cache_mngr(thd);
8472   }
8473 
8474 #ifndef NDEBUG
8475   if (DBUG_EVALUATE_IF("simulate_write_incident_event_into_binlog_directly",
8476                        1, 0) && !cache_mngr->stmt_cache.is_binlog_empty())
8477   {
8478     /* The stmt_cache contains corruption data, so we can reset it. */
8479     cache_mngr->stmt_cache.reset();
8480   }
8481 #endif
8482 
8483   /*
8484     If there is no binlog cache then we write incidents directly
8485     into the binlog. If caller needs GTIDs it has to setup the
8486     binlog cache (for the injector thread).
8487   */
8488   if (cache_mngr == NULL ||
8489       DBUG_EVALUATE_IF("simulate_write_incident_event_into_binlog_directly",
8490                        1, 0))
8491   {
8492     if (need_lock_log)
8493       mysql_mutex_lock(&LOCK_log);
8494     else
8495       mysql_mutex_assert_owner(&LOCK_log);
8496     /* Write an incident event into binlog directly. */
8497     error= write_to_file(ev);
8498     /*
8499       Write an error to log. So that user might have a chance
8500       to be alerted and explore incident details.
8501     */
8502     if (!error)
8503       sql_print_error("%s An incident event has been written to the binary "
8504                       "log which will stop the slaves.", err_msg);
8505   }
8506   else // (cache_mngr != NULL)
8507   {
8508     if (!cache_mngr->stmt_cache.is_binlog_empty())
8509     {
8510       /* The stmt_cache contains corruption data, so we can reset it. */
8511       cache_mngr->stmt_cache.reset();
8512     }
8513     if (!cache_mngr->trx_cache.is_binlog_empty())
8514     {
8515       /* The trx_cache contains corruption data, so we can reset it. */
8516       cache_mngr->trx_cache.reset();
8517     }
8518     /*
8519       Write the incident event into stmt_cache, so that a GTID is generated and
8520       written for it prior to flushing the stmt_cache.
8521     */
8522     binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(false);
8523     if ((error= cache_data->write_event(thd, ev)))
8524     {
8525       sql_print_error("Failed to write an incident event into stmt_cache.");
8526       cache_mngr->stmt_cache.reset();
8527       DBUG_RETURN(error);
8528     }
8529 
8530     if (need_lock_log)
8531       mysql_mutex_lock(&LOCK_log);
8532     else
8533       mysql_mutex_assert_owner(&LOCK_log);
8534   }
8535 
8536   if (do_flush_and_sync)
8537   {
8538     if (!error && !(error= flush_and_sync()))
8539     {
8540       bool check_purge= false;
8541       update_binlog_end_pos();
8542       is_rotating_caused_by_incident= true;
8543       error= rotate(true, &check_purge);
8544       is_rotating_caused_by_incident= false;
8545       if (!error && check_purge)
8546         purge();
8547     }
8548   }
8549 
8550   if (need_lock_log)
8551     mysql_mutex_unlock(&LOCK_log);
8552 
8553   /*
8554     Write an error to log. So that user might have a chance
8555     to be alerted and explore incident details.
8556   */
8557   if (!error && cache_mngr != NULL)
8558     sql_print_error("%s An incident event has been written to the binary "
8559                     "log which will stop the slaves.", err_msg);
8560 
8561   DBUG_RETURN(error);
8562 }
8563 
write_stmt_directly(THD * thd,const char * stmt,size_t stmt_len,enum_sql_command sql_command)8564 bool MYSQL_BIN_LOG::write_stmt_directly(THD* thd, const char *stmt, size_t stmt_len,
8565                                        enum_sql_command sql_command)
8566 {
8567   bool ret= false;
8568   /* backup the original command */
8569   enum_sql_command save_sql_command= thd->lex->sql_command;
8570   thd->lex->sql_command= sql_command;
8571 
8572   if (thd->binlog_query(THD::STMT_QUERY_TYPE, stmt, stmt_len,
8573                         FALSE, FALSE, FALSE, 0) ||
8574       commit(thd, false) != TC_LOG::RESULT_SUCCESS)
8575   {
8576     ret= true;
8577   }
8578 
8579   thd->lex->sql_command= save_sql_command;
8580   return ret;
8581 }
8582 
8583 
8584 /**
8585   Creates an incident event and writes it to the binary log.
8586 
8587   @param thd  Thread variable
8588   @param ev   Incident event to be written
8589   @param err_msg Error message written to log file for the incident.
8590   @param lock If the binary lock should be locked or not
8591 
8592   @retval
8593     0    error
8594   @retval
8595     1    success
8596 */
write_incident(THD * thd,bool need_lock_log,const char * err_msg,bool do_flush_and_sync)8597 bool MYSQL_BIN_LOG::write_incident(THD *thd, bool need_lock_log,
8598                                    const char* err_msg,
8599                                    bool do_flush_and_sync)
8600 {
8601   DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
8602 
8603   if (!is_open())
8604     DBUG_RETURN(0);
8605 
8606   LEX_STRING write_error_msg= {(char*) err_msg, strlen(err_msg)};
8607   binary_log::Incident_event::enum_incident incident=
8608                               binary_log::Incident_event::INCIDENT_LOST_EVENTS;
8609   Incident_log_event ev(thd, incident, write_error_msg);
8610 
8611   DBUG_RETURN(write_incident(&ev, thd, need_lock_log, err_msg,
8612                              do_flush_and_sync));
8613 }
8614 
8615 
8616 /**
8617   Write the contents of the statement or transaction cache to the binary log.
8618 
8619   Comparison with do_write_cache:
8620 
8621   - do_write_cache is a lower-level function that only performs the
8622     actual write.
8623 
8624   - write_cache is a higher-level function that calls do_write_cache
8625     and additionally performs some maintenance tasks, including:
8626     - report any errors that occurred
8627     - write incident event if needed
8628     - update gtid_state
8629     - update thd.binlog_next_event_pos
8630 
8631   @param thd Thread variable
8632 
8633   @param cache_data Events will be read from the IO_CACHE of this
8634   cache_data object.
8635 
8636   @param writer Events will be written to this Binlog_event_writer.
8637 
8638   @retval true IO error.
8639   @retval false Success.
8640 
8641   @note We only come here if there is something in the cache.
8642   @note Whatever is in the cache is always a complete transaction.
8643   @note 'cache' needs to be reinitialized after this functions returns.
8644 */
write_cache(THD * thd,binlog_cache_data * cache_data,Binlog_event_writer * writer)8645 bool MYSQL_BIN_LOG::write_cache(THD *thd, binlog_cache_data *cache_data,
8646                                 Binlog_event_writer *writer)
8647 {
8648   DBUG_ENTER("MYSQL_BIN_LOG::write_cache(THD *, binlog_cache_data *, bool)");
8649 
8650   IO_CACHE *cache= &cache_data->cache_log;
8651   bool incident= cache_data->has_incident();
8652 
8653   mysql_mutex_assert_owner(&LOCK_log);
8654 
8655   assert(is_open());
8656   if (likely(is_open()))                       // Should always be true
8657   {
8658     /*
8659       We only bother to write to the binary log if there is anything
8660       to write.
8661 
8662       @todo Is this check redundant? Probably this is only called if
8663       there is anything in the cache (see @note in comment above this
8664       function). Check if we can replace this by an assertion. /Sven
8665     */
8666     if (my_b_tell(cache) > 0)
8667     {
8668       DBUG_EXECUTE_IF("crash_before_writing_xid",
8669                       {
8670                         if ((write_error= do_write_cache(cache, writer)))
8671                           DBUG_PRINT("info", ("error writing binlog cache: %d",
8672                                               write_error));
8673                         flush_and_sync(true);
8674                         DBUG_PRINT("info", ("crashing before writing xid"));
8675                         DBUG_SUICIDE();
8676                       });
8677       if ((write_error= do_write_cache(cache, writer)))
8678         goto err;
8679 
8680       const char* err_msg= "Non-transactional changes did not get into "
8681                            "the binlog.";
8682       if (incident && write_incident(thd, false/*need_lock_log=false*/,
8683                                      err_msg,
8684                                      false/*do_flush_and_sync==false*/))
8685         goto err;
8686 
8687       DBUG_EXECUTE_IF("half_binlogged_transaction", DBUG_SUICIDE(););
8688       if (cache->error)				// Error on read
8689       {
8690         char errbuf[MYSYS_STRERROR_SIZE];
8691         sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name,
8692                         errno, my_strerror(errbuf, sizeof(errbuf), errno));
8693         write_error= true; // Don't give more errors
8694         goto err;
8695       }
8696     }
8697     update_thd_next_event_pos(thd);
8698   }
8699 
8700   DBUG_RETURN(false);
8701 
8702 err:
8703   if (!write_error)
8704   {
8705     char errbuf[MYSYS_STRERROR_SIZE];
8706     write_error= true;
8707     sql_print_error(ER(ER_ERROR_ON_WRITE), name,
8708                     errno, my_strerror(errbuf, sizeof(errbuf), errno));
8709   }
8710 
8711   /*
8712     If the flush has failed due to ENOSPC, set the flush_error flag.
8713   */
8714   if (cache->error && thd->is_error() && my_errno() == ENOSPC)
8715   {
8716     cache_data->set_flush_error(thd);
8717   }
8718   thd->commit_error= THD::CE_FLUSH_ERROR;
8719 
8720   DBUG_RETURN(true);
8721 }
8722 
8723 
8724 /**
8725   Wait until we get a signal that the relay log has been updated.
8726 
8727   @param[in] thd        Thread variable
8728   @param[in] timeout    a pointer to a timespec;
8729                         NULL means to wait w/o timeout.
8730 
8731   @retval    0          if got signalled on update
8732   @retval    non-0      if wait timeout elapsed
8733 
8734   @note
8735     One must have a lock on LOCK_log before calling this function.
8736 */
8737 
wait_for_update_relay_log(THD * thd,const struct timespec * timeout)8738 int MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd, const struct timespec *timeout)
8739 {
8740   int ret= 0;
8741   PSI_stage_info old_stage;
8742   DBUG_ENTER("wait_for_update_relay_log");
8743 
8744   thd->ENTER_COND(&update_cond, &LOCK_log,
8745                   &stage_slave_has_read_all_relay_log,
8746                   &old_stage);
8747 
8748   if (!timeout)
8749     mysql_cond_wait(&update_cond, &LOCK_log);
8750   else
8751     ret= mysql_cond_timedwait(&update_cond, &LOCK_log,
8752                               const_cast<struct timespec *>(timeout));
8753   mysql_mutex_unlock(&LOCK_log);
8754   thd->EXIT_COND(&old_stage);
8755 
8756   DBUG_RETURN(ret);
8757 }
8758 
8759 /**
8760   Wait until we get a signal that the binary log has been updated.
8761   Applies to master only.
8762 
8763   NOTES
8764   @param[in] thd        a THD struct
8765   @param[in] timeout    a pointer to a timespec;
8766                         NULL means to wait w/o timeout.
8767   @retval    0          if got signalled on update
8768   @retval    non-0      if wait timeout elapsed
8769   @note
8770     LOCK_log must be taken before calling this function.
8771     LOCK_log is being released while the thread is waiting.
8772     LOCK_log is released by the caller.
8773 */
8774 
wait_for_update_bin_log(THD * thd,const struct timespec * timeout)8775 int MYSQL_BIN_LOG::wait_for_update_bin_log(THD* thd,
8776                                            const struct timespec *timeout)
8777 {
8778   int ret= 0;
8779   DBUG_ENTER("wait_for_update_bin_log");
8780 
8781   if (!timeout)
8782     mysql_cond_wait(&update_cond, &LOCK_binlog_end_pos);
8783   else
8784     ret= mysql_cond_timedwait(&update_cond, &LOCK_binlog_end_pos,
8785                               const_cast<struct timespec *>(timeout));
8786   DBUG_RETURN(ret);
8787 }
8788 
8789 
8790 /**
8791   Close the log file.
8792 
8793   @param exiting     Bitmask for one or more of the following bits:
8794           - LOG_CLOSE_INDEX : if we should close the index file
8795           - LOG_CLOSE_TO_BE_OPENED : if we intend to call open
8796                                      at once after close.
8797           - LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
8798 
8799   @param need_lock_log If true, this function acquires LOCK_log;
8800   otherwise the caller should already have acquired it.
8801 
8802   @param need_lock_index If true, this function acquires LOCK_index;
8803   otherwise the caller should already have acquired it.
8804 
8805   @note
8806     One can do an open on the object at once after doing a close.
8807     The internal structures are not freed until cleanup() is called
8808 */
8809 
close(uint exiting,bool need_lock_log,bool need_lock_index)8810 void MYSQL_BIN_LOG::close(uint exiting, bool need_lock_log,
8811                           bool need_lock_index)
8812 {					// One can't set log_type here!
8813   DBUG_ENTER("MYSQL_BIN_LOG::close");
8814   DBUG_PRINT("enter",("exiting: %d", (int) exiting));
8815   if (need_lock_log)
8816     mysql_mutex_lock(&LOCK_log);
8817   else
8818     mysql_mutex_assert_owner(&LOCK_log);
8819 
8820   if (log_state.atomic_get() == LOG_OPENED)
8821   {
8822 #ifdef HAVE_REPLICATION
8823     if ((exiting & LOG_CLOSE_STOP_EVENT) != 0)
8824     {
8825       /**
8826         TODO(WL#7546): Change the implementation to Stop_event after write() is
8827         moved into libbinlogevents
8828       */
8829       Stop_log_event s;
8830       // the checksumming rule for relay-log case is similar to Rotate
8831         s.common_footer->checksum_alg= is_relay_log ? relay_log_checksum_alg :
8832                                        static_cast<enum_binlog_checksum_alg>
8833                                        (binlog_checksum_options);
8834       assert(!is_relay_log ||
8835              relay_log_checksum_alg != binary_log::BINLOG_CHECKSUM_ALG_UNDEF);
8836       write_to_file(&s);
8837       bytes_written+= s.common_header->data_written;
8838       flush_io_cache(&log_file);
8839       update_binlog_end_pos();
8840     }
8841 #endif /* HAVE_REPLICATION */
8842 
8843     /* don't pwrite in a file opened with O_APPEND - it doesn't work */
8844     if (log_file.type == WRITE_CACHE)
8845     {
8846       my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
8847       my_off_t org_position= mysql_file_tell(log_file.file, MYF(0));
8848       uchar flags= 0;            // clearing LOG_EVENT_BINLOG_IN_USE_F
8849       mysql_file_pwrite(log_file.file, &flags, 1, offset, MYF(0));
8850       /*
8851         Restore position so that anything we have in the IO_cache is written
8852         to the correct position.
8853         We need the seek here, as mysql_file_pwrite() is not guaranteed to keep the
8854         original position on system that doesn't support pwrite().
8855       */
8856       mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
8857     }
8858 
8859     /* this will cleanup IO_CACHE, sync and close the file */
8860     if (log_state.atomic_get() == LOG_OPENED)
8861     {
8862       end_io_cache(&log_file);
8863 
8864       if (mysql_file_sync(log_file.file, MYF(MY_WME)) && ! write_error)
8865       {
8866         char errbuf[MYSYS_STRERROR_SIZE];
8867         write_error= 1;
8868         sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno,
8869                         my_strerror(errbuf, sizeof(errbuf), errno));
8870       }
8871 
8872       if (mysql_file_close(log_file.file, MYF(MY_WME)) && ! write_error)
8873       {
8874         char errbuf[MYSYS_STRERROR_SIZE];
8875         write_error= 1;
8876         sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno,
8877                         my_strerror(errbuf, sizeof(errbuf), errno));
8878       }
8879     }
8880 
8881     log_state.atomic_set((exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED);
8882     my_free(name);
8883     name= NULL;
8884   }
8885 
8886   /*
8887     The following test is needed even if is_open() is not set, as we may have
8888     called a not complete close earlier and the index file is still open.
8889   */
8890 
8891   if (need_lock_index)
8892     mysql_mutex_lock(&LOCK_index);
8893   else
8894     mysql_mutex_assert_owner(&LOCK_index);
8895 
8896   if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
8897   {
8898     end_io_cache(&index_file);
8899     if (mysql_file_close(index_file.file, MYF(0)) < 0 && ! write_error)
8900     {
8901       char errbuf[MYSYS_STRERROR_SIZE];
8902       write_error= 1;
8903       sql_print_error(ER(ER_ERROR_ON_WRITE), index_file_name,
8904                       errno, my_strerror(errbuf, sizeof(errbuf), errno));
8905     }
8906   }
8907 
8908   if (need_lock_index)
8909     mysql_mutex_unlock(&LOCK_index);
8910 
8911   log_state.atomic_set((exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED);
8912   my_free(name);
8913   name= NULL;
8914 
8915   if (need_lock_log)
8916     mysql_mutex_unlock(&LOCK_log);
8917 
8918   DBUG_VOID_RETURN;
8919 }
8920 
harvest_bytes_written(Relay_log_info * rli,bool need_log_space_lock)8921 void MYSQL_BIN_LOG::harvest_bytes_written(Relay_log_info* rli, bool need_log_space_lock)
8922 {
8923 #ifndef NDEBUG
8924   char buf1[22],buf2[22];
8925 #endif
8926   DBUG_ENTER("harvest_bytes_written");
8927   if (need_log_space_lock)
8928     mysql_mutex_lock(&rli->log_space_lock);
8929   else
8930     mysql_mutex_assert_owner(&rli->log_space_lock);
8931   rli->log_space_total+= bytes_written;
8932   DBUG_PRINT("info",("relay_log_space: %s  bytes_written: %s",
8933         llstr(rli->log_space_total,buf1), llstr(bytes_written,buf2)));
8934   bytes_written=0;
8935   if (need_log_space_lock)
8936     mysql_mutex_unlock(&rli->log_space_lock);
8937   DBUG_VOID_RETURN;
8938 }
8939 
set_max_size(ulong max_size_arg)8940 void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg)
8941 {
8942   /*
8943     We need to take locks, otherwise this may happen:
8944     new_file() is called, calls open(old_max_size), then before open() starts,
8945     set_max_size() sets max_size to max_size_arg, then open() starts and
8946     uses the old_max_size argument, so max_size_arg has been overwritten and
8947     it's like if the SET command was never run.
8948   */
8949   DBUG_ENTER("MYSQL_BIN_LOG::set_max_size");
8950   mysql_mutex_lock(&LOCK_log);
8951   if (is_open())
8952     max_size= max_size_arg;
8953   mysql_mutex_unlock(&LOCK_log);
8954   DBUG_VOID_RETURN;
8955 }
8956 
8957 /****** transaction coordinator log for 2pc - binlog() based solution ******/
8958 
8959 /**
8960   @todo
8961   keep in-memory list of prepared transactions
8962   (add to list in log(), remove on unlog())
8963   and copy it to the new binlog if rotated
8964   but let's check the behaviour of tc_log_page_waits first!
8965 */
8966 
open_binlog(const char * opt_name)8967 int MYSQL_BIN_LOG::open_binlog(const char *opt_name)
8968 {
8969   LOG_INFO log_info;
8970   int      error= 1;
8971 
8972   /*
8973     This function is used for 2pc transaction coordination.  Hence, it
8974     is never used for relay logs.
8975   */
8976   assert(!is_relay_log);
8977   assert(total_ha_2pc > 1 || (1 == total_ha_2pc && opt_bin_log));
8978   assert(opt_name && opt_name[0]);
8979 
8980   if (!my_b_inited(&index_file))
8981   {
8982     /* There was a failure to open the index file, can't open the binlog */
8983     cleanup();
8984     return 1;
8985   }
8986 
8987   if (using_heuristic_recover())
8988   {
8989     /* generate a new binlog to mask a corrupted one */
8990     mysql_mutex_lock(&LOCK_log);
8991     open_binlog(opt_name, 0, max_binlog_size, false,
8992                 true/*need_lock_index=true*/,
8993                 true/*need_sid_lock=true*/,
8994                 NULL);
8995     mysql_mutex_unlock(&LOCK_log);
8996     cleanup();
8997     return 1;
8998   }
8999 
9000   if ((error= find_log_pos(&log_info, NullS, true/*need_lock_index=true*/)))
9001   {
9002     if (error != LOG_INFO_EOF)
9003       sql_print_error("find_log_pos() failed (error: %d)", error);
9004     else
9005       error= 0;
9006     goto err;
9007   }
9008 
9009   {
9010     const char *errmsg;
9011     IO_CACHE    log;
9012     File        file;
9013     Log_event  *ev=0;
9014     Format_description_log_event fdle(BINLOG_VERSION);
9015     char        log_name[FN_REFLEN];
9016     my_off_t    valid_pos= 0;
9017     my_off_t    binlog_size;
9018     MY_STAT     s;
9019 
9020     if (! fdle.is_valid())
9021       goto err;
9022 
9023     do
9024     {
9025       strmake(log_name, log_info.log_file_name, sizeof(log_name)-1);
9026     } while (!(error= find_next_log(&log_info, true/*need_lock_index=true*/)));
9027 
9028     if (error !=  LOG_INFO_EOF)
9029     {
9030       sql_print_error("find_log_pos() failed (error: %d)", error);
9031       goto err;
9032     }
9033 
9034     if ((file= open_binlog_file(&log, log_name, &errmsg)) < 0)
9035     {
9036       sql_print_error("%s", errmsg);
9037       goto err;
9038     }
9039 
9040     my_stat(log_name, &s, MYF(0));
9041     binlog_size= s.st_size;
9042 
9043     /*
9044       If the binary log was not properly closed it means that the server
9045       may have crashed. In that case, we need to call MYSQL_BIN_LOG::recover
9046       to:
9047 
9048         a) collect logged XIDs;
9049         b) complete the 2PC of the pending XIDs;
9050         c) collect the last valid position.
9051 
9052       Therefore, we do need to iterate over the binary log, even if
9053       total_ha_2pc == 1, to find the last valid group of events written.
9054       Later we will take this value and truncate the log if need be.
9055     */
9056     if ((ev= Log_event::read_log_event(&log, 0, &fdle,
9057                                        opt_master_verify_checksum)) &&
9058         ev->get_type_code() == binary_log::FORMAT_DESCRIPTION_EVENT &&
9059         (ev->common_header->flags & LOG_EVENT_BINLOG_IN_USE_F ||
9060          DBUG_EVALUATE_IF("eval_force_bin_log_recovery", true, false)))
9061     {
9062       sql_print_information("Recovering after a crash using %s", opt_name);
9063       valid_pos= my_b_tell(&log);
9064       error= recover(&log, (Format_description_log_event *)ev, &valid_pos);
9065     }
9066     else
9067       error=0;
9068 
9069     delete ev;
9070     end_io_cache(&log);
9071     mysql_file_close(file, MYF(MY_WME));
9072 
9073     if (error)
9074       goto err;
9075 
9076     /* Trim the crashed binlog file to last valid transaction
9077       or event (non-transaction) base on valid_pos. */
9078     if (valid_pos > 0)
9079     {
9080       if ((file= mysql_file_open(key_file_binlog, log_name,
9081                                  O_RDWR | O_BINARY, MYF(MY_WME))) < 0)
9082       {
9083         sql_print_error("Failed to open the crashed binlog file "
9084                         "when master server is recovering it.");
9085         return -1;
9086       }
9087 
9088       /* Change binlog file size to valid_pos */
9089       if (valid_pos < binlog_size)
9090       {
9091         if (my_chsize(file, valid_pos, 0, MYF(MY_WME)))
9092         {
9093           sql_print_error("Failed to trim the crashed binlog file "
9094                           "when master server is recovering it.");
9095           mysql_file_close(file, MYF(MY_WME));
9096           return -1;
9097         }
9098         else
9099         {
9100           sql_print_information("Crashed binlog file %s size is %llu, "
9101                                 "but recovered up to %llu. Binlog trimmed to %llu bytes.",
9102                                 log_name, binlog_size, valid_pos, valid_pos);
9103         }
9104       }
9105 
9106       /* Clear LOG_EVENT_BINLOG_IN_USE_F */
9107       my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
9108       uchar flags= 0;
9109       if (mysql_file_pwrite(file, &flags, 1, offset, MYF(0)) != 1)
9110       {
9111         sql_print_error("Failed to clear LOG_EVENT_BINLOG_IN_USE_F "
9112                         "for the crashed binlog file when master "
9113                         "server is recovering it.");
9114         mysql_file_close(file, MYF(MY_WME));
9115         return -1;
9116       }
9117 
9118       mysql_file_close(file, MYF(MY_WME));
9119     } //end if
9120   }
9121 
9122 err:
9123   return error;
9124 }
9125 
9126 /** This is called on shutdown, after ha_panic. */
close()9127 void MYSQL_BIN_LOG::close()
9128 {
9129 }
9130 
9131 /*
9132   Prepare the transaction in the transaction coordinator.
9133 
9134   This function will prepare the transaction in the storage engines
9135   (by calling @c ha_prepare_low) what will write a prepare record
9136   to the log buffers.
9137 
9138   @retval 0    success
9139   @retval 1    error
9140 */
prepare(THD * thd,bool all)9141 int MYSQL_BIN_LOG::prepare(THD *thd, bool all)
9142 {
9143   DBUG_ENTER("MYSQL_BIN_LOG::prepare");
9144 
9145   assert(opt_bin_log);
9146   /*
9147     The applier thread explicitly overrides the value of sql_log_bin
9148     with the value of log_slave_updates.
9149     We may also end up here in some cases if we have a transaction with two
9150     active transactional storage engines, such as is the case if this is a
9151     replication applier and log_slave_updates=0.
9152   */
9153   assert((thd->slave_thread ?
9154          opt_log_slave_updates : thd->variables.sql_log_bin) ||
9155          total_ha_2pc > 1);
9156 
9157   /*
9158     Set HA_IGNORE_DURABILITY to not flush the prepared record of the
9159     transaction to the log of storage engine (for example, InnoDB
9160     redo log) during the prepare phase. So that we can flush prepared
9161     records of transactions to the log of storage engine in a group
9162     right before flushing them to binary log during binlog group
9163     commit flush stage. Reset to HA_REGULAR_DURABILITY at the
9164     beginning of parsing next command.
9165   */
9166   thd->durability_property= HA_IGNORE_DURABILITY;
9167 
9168   int error= ha_prepare_low(thd, all);
9169 
9170   DBUG_RETURN(error);
9171 }
9172 
9173 /**
9174   Commit the transaction in the transaction coordinator.
9175 
9176   This function will commit the sessions transaction in the binary log
9177   and in the storage engines (by calling @c ha_commit_low). If the
9178   transaction was successfully logged (or not successfully unlogged)
9179   but the commit in the engines did not succed, there is a risk of
9180   inconsistency between the engines and the binary log.
9181 
9182   For binary log group commit, the commit is separated into three
9183   parts:
9184 
9185   1. First part consists of filling the necessary caches and
9186      finalizing them (if they need to be finalized). After this,
9187      nothing is added to any of the caches.
9188 
9189   2. Second part execute an ordered flush and commit. This will be
9190      done using the group commit functionality in ordered_commit.
9191 
9192   3. Third part checks any errors resulting from the ordered commit
9193      and handles them appropriately.
9194 
9195   @retval RESULT_SUCCESS   success
9196   @retval RESULT_ABORTED   error, transaction was neither logged nor committed
9197   @retval RESULT_INCONSISTENT  error, transaction was logged but not committed
9198 */
commit(THD * thd,bool all)9199 TC_LOG::enum_result MYSQL_BIN_LOG::commit(THD *thd, bool all)
9200 {
9201   DBUG_ENTER("MYSQL_BIN_LOG::commit");
9202   DBUG_PRINT("info", ("query='%s'",
9203                       thd == current_thd ? thd->query().str : NULL));
9204   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
9205   Transaction_ctx *trn_ctx= thd->get_transaction();
9206   my_xid xid= trn_ctx->xid_state()->get_xid()->get_my_xid();
9207   bool stmt_stuff_logged= false;
9208   bool trx_stuff_logged= false;
9209   bool binlog_prot_acquired= false;
9210   bool skip_commit= is_loggable_xa_prepare(thd);
9211 
9212   DBUG_PRINT("enter", ("thd: 0x%llx, all: %s, xid: %llu, cache_mngr: 0x%llx",
9213                        (ulonglong) thd, YESNO(all), (ulonglong) xid,
9214                        (ulonglong) cache_mngr));
9215 
9216   /*
9217     No cache manager means nothing to log, but we still have to commit
9218     the transaction.
9219    */
9220   if (cache_mngr == NULL)
9221   {
9222     if (!skip_commit && ha_commit_low(thd, all))
9223       DBUG_RETURN(RESULT_ABORTED);
9224     DBUG_RETURN(RESULT_SUCCESS);
9225   }
9226 
9227   /*
9228     Reset binlog_snapshot_% variables for the current connection so that the
9229     current coordinates are shown after committing a consistent snapshot
9230     transaction.
9231   */
9232   if (all)
9233   {
9234     mysql_mutex_lock(&thd->LOCK_thd_data);
9235     cache_mngr->drop_consistent_snapshot();
9236     mysql_mutex_unlock(&thd->LOCK_thd_data);
9237   }
9238 
9239   Transaction_ctx::enum_trx_scope trx_scope=  all ? Transaction_ctx::SESSION :
9240                                                     Transaction_ctx::STMT;
9241 
9242   DBUG_PRINT("debug", ("in_transaction: %s, no_2pc: %s, rw_ha_count: %d",
9243                        YESNO(thd->in_multi_stmt_transaction_mode()),
9244                        YESNO(trn_ctx->no_2pc(trx_scope)),
9245                        trn_ctx->rw_ha_count(trx_scope)));
9246   DBUG_PRINT("debug",
9247              ("all.cannot_safely_rollback(): %s, trx_cache_empty: %s",
9248               YESNO(trn_ctx->cannot_safely_rollback(Transaction_ctx::SESSION)),
9249               YESNO(cache_mngr->trx_cache.is_binlog_empty())));
9250   DBUG_PRINT("debug",
9251              ("stmt.cannot_safely_rollback(): %s, stmt_cache_empty: %s",
9252               YESNO(trn_ctx->cannot_safely_rollback(Transaction_ctx::STMT)),
9253               YESNO(cache_mngr->stmt_cache.is_binlog_empty())));
9254 
9255 
9256   /*
9257     If there are no handlertons registered, there is nothing to
9258     commit. Note that DDLs are written earlier in this case (inside
9259     binlog_query).
9260 
9261     TODO: This can be a problem in those cases that there are no
9262     handlertons registered. DDLs are one example, but the other case
9263     is MyISAM. In this case, we could register a dummy handlerton to
9264     trigger the commit.
9265 
9266     Any statement that requires logging will call binlog_query before
9267     trans_commit_stmt, so an alternative is to use the condition
9268     "binlog_query called or stmt.ha_list != 0".
9269    */
9270   if (!all && !trn_ctx->is_active(trx_scope) &&
9271       cache_mngr->stmt_cache.is_binlog_empty())
9272     DBUG_RETURN(RESULT_SUCCESS);
9273 
9274   if (thd->lex->sql_command == SQLCOM_XA_COMMIT)
9275   {
9276     /* The Commit phase of the XA two phase logging. */
9277 
9278     bool one_phase= get_xa_opt(thd) == XA_ONE_PHASE;
9279     assert(all);
9280     assert(!skip_commit || one_phase);
9281 
9282     int err= 0;
9283     XID_STATE *xs= thd->get_transaction()->xid_state();
9284     /*
9285       XA COMMIT ONE PHASE statement which has not gone through the binary log
9286       prepare phase, has to end the active XA transaction with appropriate XA
9287       END followed by XA COMMIT ONE PHASE.
9288 
9289       The state of XA transaction is changed to PREPARED after the prepare
9290       phase, intermediately in ha_commit_trans code for the interest of
9291       binlogger. Hence check that the XA COMMIT ONE PHASE is set to 'PREPARE'
9292       and it has not already been written to binary log. For such transaction
9293       write the appropriate XA END statement.
9294     */
9295     if (!(is_loggable_xa_prepare(thd))
9296         && one_phase
9297         && !(xs->is_binlogged())
9298         && !cache_mngr->trx_cache.is_binlog_empty())
9299     {
9300       XA_prepare_log_event end_evt(thd, xs->get_xid(), one_phase);
9301       err= cache_mngr->trx_cache.finalize(thd, &end_evt, xs);
9302       if (err)
9303       {
9304         DBUG_RETURN(RESULT_ABORTED);
9305       }
9306       trx_stuff_logged= true;
9307       thd->get_transaction()->xid_state()->set_binlogged();
9308     }
9309     if (DBUG_EVALUATE_IF("simulate_xa_commit_log_failure", true,
9310                          do_binlog_xa_commit_rollback(thd, xs->get_xid(),
9311                                                       true)))
9312       DBUG_RETURN(RESULT_ABORTED);
9313   }
9314 
9315   /*
9316     If there is anything in the stmt cache, and GTIDs are enabled,
9317     then this is a single statement outside a transaction and it is
9318     impossible that there is anything in the trx cache.  Hence, we
9319     write any empty group(s) to the stmt cache.
9320 
9321     Otherwise, we write any empty group(s) to the trx cache at the end
9322     of the transaction.
9323   */
9324   if (!cache_mngr->stmt_cache.is_binlog_empty())
9325   {
9326     /*
9327       Commit parent identification of non-transactional query has
9328       been deferred until now, except for the mixed transaction case.
9329     */
9330     trn_ctx->store_commit_parent(m_dependency_tracker.get_max_committed_timestamp());
9331     if (cache_mngr->stmt_cache.finalize(thd))
9332       DBUG_RETURN(RESULT_ABORTED);
9333     stmt_stuff_logged= true;
9334   }
9335 
9336   /*
9337     We commit the transaction if:
9338      - We are not in a transaction and committing a statement, or
9339      - We are in a transaction and a full transaction is committed.
9340     Otherwise, we accumulate the changes.
9341   */
9342   if (!cache_mngr->trx_cache.is_binlog_empty() &&
9343       ending_trans(thd, all) && !trx_stuff_logged)
9344   {
9345     const bool real_trans=
9346       (all || !trn_ctx->is_active(Transaction_ctx::SESSION));
9347 
9348     /*
9349       We are committing an XA transaction if it is a "real" transaction
9350       and has an XID assigned (because some handlerton registered). A
9351       transaction is "real" if either 'all' is true or the 'all.ha_list'
9352       is empty.
9353 
9354       Note: This is kind of strange since registering the binlog
9355       handlerton will then make the transaction XA, which is not really
9356       true. This occurs for example if a MyISAM statement is executed
9357       with row-based replication on.
9358     */
9359     if (is_loggable_xa_prepare(thd))
9360     {
9361       /* The prepare phase of XA transaction two phase logging. */
9362       int err= 0;
9363       bool one_phase= get_xa_opt(thd) == XA_ONE_PHASE;
9364 
9365       assert(thd->lex->sql_command != SQLCOM_XA_COMMIT || one_phase);
9366 
9367       XID_STATE *xs= thd->get_transaction()->xid_state();
9368       XA_prepare_log_event end_evt(thd, xs->get_xid(), one_phase);
9369 
9370       assert(skip_commit);
9371 
9372       err= cache_mngr->trx_cache.finalize(thd, &end_evt, xs);
9373       if (err ||
9374           (DBUG_EVALUATE_IF("simulate_xa_prepare_failure_in_cache_finalize",
9375                             true, false)))
9376       {
9377         DBUG_RETURN(RESULT_ABORTED);
9378       }
9379     }
9380     else if (real_trans && xid && trn_ctx->rw_ha_count(trx_scope) > 1 &&
9381              !trn_ctx->no_2pc(trx_scope))
9382     {
9383       Xid_log_event end_evt(thd, xid);
9384       if (cache_mngr->trx_cache.finalize(thd, &end_evt))
9385         DBUG_RETURN(RESULT_ABORTED);
9386     }
9387     else
9388     {
9389       Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
9390                               true, FALSE, TRUE, 0, TRUE);
9391       if (cache_mngr->trx_cache.finalize(thd, &end_evt))
9392         DBUG_RETURN(RESULT_ABORTED);
9393     }
9394     trx_stuff_logged= true;
9395   }
9396 
9397   /*
9398     This is part of the stmt rollback.
9399   */
9400   if (!all)
9401     cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
9402 
9403   /*
9404     Now all the events are written to the caches, so we will commit
9405     the transaction in the engines. This is done using the group
9406     commit logic in ordered_commit, which will return when the
9407     transaction is committed.
9408 
9409     If the commit in the engines fail, we still have something logged
9410     to the binary log so we have to report this as a "bad" failure
9411     (failed to commit, but logged something).
9412   */
9413   if (stmt_stuff_logged || trx_stuff_logged)
9414   {
9415     if (RUN_HOOK(transaction,
9416                  before_commit,
9417                  (thd, all,
9418                   thd_get_cache_mngr(thd)->get_binlog_cache_log(true),
9419                   thd_get_cache_mngr(thd)->get_binlog_cache_log(false),
9420                   max<my_off_t>(max_binlog_cache_size,
9421                                 max_binlog_stmt_cache_size))) ||
9422         DBUG_EVALUATE_IF("simulate_failure_in_before_commit_hook", true, false))
9423     {
9424       ha_rollback_low(thd, all);
9425       gtid_state->update_on_rollback(thd);
9426       thd_get_cache_mngr(thd)->reset();
9427       //Reset the thread OK status before changing the outcome.
9428       if (thd->get_stmt_da()->is_ok())
9429         thd->get_stmt_da()->reset_diagnostics_area();
9430       my_error(ER_RUN_HOOK_ERROR, MYF(0), "before_commit");
9431       DBUG_RETURN(RESULT_ABORTED);
9432     }
9433     /*
9434       Check whether the transaction should commit or abort given the
9435       plugin feedback.
9436     */
9437     if (thd->get_transaction()->get_rpl_transaction_ctx()->is_transaction_rollback() ||
9438         (DBUG_EVALUATE_IF("simulate_transaction_rollback_request", true, false)))
9439     {
9440       ha_rollback_low(thd, all);
9441       gtid_state->update_on_rollback(thd);
9442       thd_get_cache_mngr(thd)->reset();
9443       if (thd->get_stmt_da()->is_ok())
9444         thd->get_stmt_da()->reset_diagnostics_area();
9445       my_error(ER_TRANSACTION_ROLLBACK_DURING_COMMIT, MYF(0));
9446       DBUG_RETURN(RESULT_ABORTED);
9447     }
9448 
9449     int rc= prepare_ordered_commit(thd, all, skip_commit);
9450     if (rc)
9451       DBUG_RETURN(RESULT_INCONSISTENT);
9452 
9453     /*
9454       Block binlog updates if there's an active BINLOG lock.
9455 
9456       We allow binlog lock owner to commit, assuming it knows what it does. We
9457       also check if protection has not been acquired earlier, which is possible
9458       in slave threads to protect master binlog coordinates.
9459     */
9460     if (!thd->backup_binlog_lock.is_acquired() &&
9461         !thd->backup_binlog_lock.is_protection_acquired())
9462     {
9463       const ulong timeout= thd->variables.lock_wait_timeout;
9464 
9465       DBUG_PRINT("debug", ("Acquiring binlog protection lock"));
9466 
9467 #ifdef HAVE_REPLICATION
9468       DBUG_EXECUTE_IF("delay_slave_worker_0", {
9469         if (has_commit_order_manager(thd))
9470         {
9471           Slave_worker *worker= dynamic_cast<Slave_worker *>(thd->rli_slave);
9472 
9473           if (worker->id == 0)
9474           {
9475             static bool skip_first_query= true;
9476             if (!skip_first_query)
9477             {
9478               static const char act[]= "now WAIT_FOR signal.lock_binlog_for_backup";
9479               assert(!debug_sync_set_action(thd, STRING_WITH_LEN(act)));
9480 
9481               static const char act2[]= "now SIGNAL finished_delay_slave_worker_0";
9482               assert(opt_debug_sync_timeout > 0);
9483               assert(!debug_sync_set_action(thd, STRING_WITH_LEN(act2)));
9484 
9485               DBUG_SET("-d,delay_slave_worker_0");
9486             }
9487             skip_first_query= !skip_first_query;
9488           }
9489         }
9490       });
9491 #endif
9492 
9493       if (thd->backup_binlog_lock.acquire_protection(thd, MDL_EXPLICIT,
9494                                                      timeout))
9495       {
9496         cache_mngr->stmt_cache.reset();
9497         cache_mngr->trx_cache.reset();
9498 
9499         DBUG_RETURN(RESULT_ABORTED);
9500       }
9501 
9502       binlog_prot_acquired= true;
9503     }
9504 
9505     rc= ordered_commit(thd);
9506 
9507     if (binlog_prot_acquired)
9508     {
9509       DBUG_PRINT("debug", ("Releasing binlog protection lock"));
9510       thd->backup_binlog_lock.release_protection(thd);
9511     }
9512 
9513     if (rc)
9514       DBUG_RETURN(RESULT_INCONSISTENT);
9515 
9516     /*
9517       Mark the flag m_is_binlogged to true only after we are done
9518       with checking all the error cases.
9519     */
9520     if (is_loggable_xa_prepare(thd))
9521       thd->get_transaction()->xid_state()->set_binlogged();
9522   }
9523   else if (!skip_commit)
9524   {
9525     /*
9526       We only set engine binlog position in ordered_commit path flush phase
9527       and not all transactions go through them (such as table copy in DDL).
9528       So in cases where a DDL statement implicitly commits earlier transaction
9529       and starting a new one, the new transaction could be "leaking" the
9530       engine binlog pos. In order to avoid that and accidentally overwrite
9531       binlog position with previous location, we reset it here.
9532     */
9533     thd->set_trans_pos(NULL, 0);
9534     if (ha_commit_low(thd, all))
9535       DBUG_RETURN(RESULT_INCONSISTENT);
9536   }
9537 
9538   DBUG_RETURN(RESULT_SUCCESS);
9539 }
9540 
9541 
9542 /**
9543    Flush caches for session.
9544 
9545    @note @c set_trans_pos is called with a pointer to the file name
9546    that the binary log currently use and a rotation will change the
9547    contents of the variable.
9548 
9549    The position is used when calling the after_flush, after_commit,
9550    and after_rollback hooks, but these have been placed so that they
9551    occur before a rotation is executed.
9552 
9553    It is the responsibility of any plugin that use this position to
9554    copy it if they need it after the hook has returned.
9555 
9556    The current "global" transaction_counter is stepped and its new value
9557    is assigned to the transaction.
9558  */
9559 std::pair<int,my_off_t>
flush_thread_caches(THD * thd)9560 MYSQL_BIN_LOG::flush_thread_caches(THD *thd)
9561 {
9562   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
9563   my_off_t bytes= 0;
9564   bool wrote_xid= false;
9565   int error= cache_mngr->flush(thd, &bytes, &wrote_xid);
9566   if (!error && bytes > 0)
9567   {
9568     /*
9569       Note that set_trans_pos does not copy the file name. See
9570       this function documentation for more info.
9571     */
9572     thd->set_trans_pos(log_file_name, my_b_tell(&log_file));
9573     if (wrote_xid)
9574       inc_prep_xids(thd);
9575   }
9576   DBUG_PRINT("debug", ("bytes: %llu", bytes));
9577   return std::make_pair(error, bytes);
9578 }
9579 
9580 
9581 /**
9582   Execute the flush stage.
9583 
9584   @param total_bytes_var Pointer to variable that will be set to total
9585   number of bytes flushed, or NULL.
9586 
9587   @param rotate_var Pointer to variable that will be set to true if
9588   binlog rotation should be performed after releasing locks. If rotate
9589   is not necessary, the variable will not be touched.
9590 
9591   @return Error code on error, zero on success
9592  */
9593 
9594 int
process_flush_stage_queue(my_off_t * total_bytes_var,bool * rotate_var,THD ** out_queue_var)9595 MYSQL_BIN_LOG::process_flush_stage_queue(my_off_t *total_bytes_var,
9596                                          bool *rotate_var,
9597                                          THD **out_queue_var)
9598 {
9599   DBUG_ENTER("MYSQL_BIN_LOG::process_flush_stage_queue");
9600   #ifndef NDEBUG
9601   // number of flushes per group.
9602   int no_flushes= 0;
9603   #endif
9604   assert(total_bytes_var && rotate_var && out_queue_var);
9605   my_off_t total_bytes= 0;
9606   int flush_error= 1;
9607   mysql_mutex_assert_owner(&LOCK_log);
9608 
9609   /*
9610     Fetch the entire flush queue and empty it, so that the next batch
9611     has a leader. We must do this before invoking ha_flush_logs(...)
9612     for guaranteeing to flush prepared records of transactions before
9613     flushing them to binary log, which is required by crash recovery.
9614   */
9615   THD *first_seen= stage_manager.fetch_queue_for(Stage_manager::FLUSH_STAGE);
9616   assert(first_seen != NULL);
9617   /*
9618     We flush prepared records of transactions to the log of storage
9619     engine (for example, InnoDB redo log) in a group right before
9620     flushing them to binary log.
9621   */
9622   ha_flush_logs(NULL, true);
9623   DBUG_EXECUTE_IF("crash_after_flush_engine_log", DBUG_SUICIDE(););
9624   assign_automatic_gtids_to_flush_group(first_seen);
9625   /* Flush thread caches to binary log. */
9626   for (THD *head= first_seen ; head ; head = head->next_to_commit)
9627   {
9628     std::pair<int,my_off_t> result= flush_thread_caches(head);
9629     total_bytes+= result.second;
9630     if (flush_error == 1)
9631       flush_error= result.first;
9632 #ifndef NDEBUG
9633     no_flushes++;
9634 #endif
9635   }
9636 
9637   *out_queue_var= first_seen;
9638   *total_bytes_var= total_bytes;
9639   if (total_bytes > 0 && my_b_tell(&log_file) >= (my_off_t) max_size)
9640     *rotate_var= true;
9641 #ifndef NDEBUG
9642   DBUG_PRINT("info",("no_flushes:= %d", no_flushes));
9643   no_flushes= 0;
9644 #endif
9645   DBUG_RETURN(flush_error);
9646 }
9647 
9648 /**
9649   Commit a sequence of sessions.
9650 
9651   This function commit an entire queue of sessions starting with the
9652   session in @c first. If there were an error in the flushing part of
9653   the ordered commit, the error code is passed in and all the threads
9654   are marked accordingly (but not committed).
9655 
9656   It will also add the GTIDs of the transactions to gtid_executed.
9657 
9658   @see MYSQL_BIN_LOG::ordered_commit
9659 
9660   @param thd The "master" thread
9661   @param first First thread in the queue of threads to commit
9662  */
9663 
9664 void
process_commit_stage_queue(THD * thd,THD * first)9665 MYSQL_BIN_LOG::process_commit_stage_queue(THD *thd, THD *first)
9666 {
9667   mysql_mutex_assert_owner(&LOCK_commit);
9668 #ifndef NDEBUG
9669   thd->get_transaction()->m_flags.ready_preempt= 1; // formality by the leader
9670 #endif
9671   for (THD *head= first ; head ; head = head->next_to_commit)
9672   {
9673     DBUG_PRINT("debug", ("Thread ID: %u, commit_error: %d, flags.pending: %s",
9674                          head->thread_id(), head->commit_error,
9675                          YESNO(head->get_transaction()->m_flags.pending)));
9676     /*
9677       If flushing failed, set commit_error for the session, skip the
9678       transaction and proceed with the next transaction instead. This
9679       will mark all threads as failed, since the flush failed.
9680 
9681       If flush succeeded, attach to the session and commit it in the
9682       engines.
9683     */
9684 #ifndef NDEBUG
9685     stage_manager.clear_preempt_status(head);
9686 #endif
9687     if (head->get_transaction()->sequence_number != SEQ_UNINIT)
9688     {
9689       mysql_mutex_lock(&LOCK_slave_trans_dep_tracker);
9690       m_dependency_tracker.update_max_committed(head);
9691       mysql_mutex_unlock(&LOCK_slave_trans_dep_tracker);
9692     }
9693     /*
9694       Flush/Sync error should be ignored and continue
9695       to commit phase. And thd->commit_error cannot be
9696       COMMIT_ERROR at this moment.
9697     */
9698     assert(head->commit_error != THD::CE_COMMIT_ERROR);
9699 #ifndef EMBEDDED_LIBRARY
9700     Thd_backup_and_restore switch_thd(thd, head);
9701 #endif /* !EMBEDDED_LIBRARY */
9702     bool all= head->get_transaction()->m_flags.real_commit;
9703     if (head->get_transaction()->m_flags.commit_low)
9704     {
9705       /* head is parked to have exited append() */
9706       assert(head->get_transaction()->m_flags.ready_preempt);
9707       /*
9708         storage engine commit
9709        */
9710       if (ha_commit_low(head, all, false))
9711         head->commit_error= THD::CE_COMMIT_ERROR;
9712     }
9713     DBUG_PRINT("debug", ("commit_error: %d, flags.pending: %s",
9714                          head->commit_error,
9715                          YESNO(head->get_transaction()->m_flags.pending)));
9716   }
9717 
9718   /*
9719     Handle the GTID of the threads.
9720     gtid_executed table is kept updated even though transactions fail to be
9721     logged. That's required by slave auto positioning.
9722   */
9723   gtid_state->update_commit_group(first);
9724 
9725   for (THD *head= first ; head ; head = head->next_to_commit)
9726   {
9727     /*
9728       Decrement the prepared XID counter after storage engine commit.
9729       We also need decrement the prepared XID when encountering a
9730       flush error or session attach error for avoiding 3-way deadlock
9731       among user thread, rotate thread and dump thread.
9732     */
9733     if (head->get_transaction()->m_flags.xid_written)
9734       dec_prep_xids(head);
9735   }
9736 }
9737 
9738 /**
9739   Process after commit for a sequence of sessions.
9740 
9741   @param thd The "master" thread
9742   @param first First thread in the queue of threads to commit
9743  */
9744 
9745 void
process_after_commit_stage_queue(THD * thd,THD * first)9746 MYSQL_BIN_LOG::process_after_commit_stage_queue(THD *thd, THD *first)
9747 {
9748   for (THD *head= first; head; head= head->next_to_commit)
9749   {
9750     if (head->get_transaction()->m_flags.run_hooks &&
9751         head->commit_error != THD::CE_COMMIT_ERROR)
9752     {
9753 
9754       /*
9755         TODO: This hook here should probably move outside/below this
9756               if and be the only after_commit invocation left in the
9757               code.
9758       */
9759 #ifndef EMBEDDED_LIBRARY
9760       Thd_backup_and_restore switch_thd(thd, head);
9761 #endif /* !EMBEDDED_LIBRARY */
9762       bool all= head->get_transaction()->m_flags.real_commit;
9763       (void) RUN_HOOK(transaction, after_commit, (head, all));
9764       /*
9765         When after_commit finished for the transaction, clear the run_hooks flag.
9766         This allow other parts of the system to check if after_commit was called.
9767       */
9768       head->get_transaction()->m_flags.run_hooks= false;
9769     }
9770   }
9771 }
9772 
9773 #ifndef NDEBUG
9774 /** Names for the stages. */
9775 static const char* g_stage_name[] = {
9776   "FLUSH",
9777   "SYNC",
9778   "COMMIT",
9779 };
9780 #endif
9781 
9782 
9783 /**
9784   Enter a stage of the ordered commit procedure.
9785 
9786   Entering is stage is done by:
9787 
9788   - Atomically enqueueing a queue of processes (which is just one for
9789     the first phase).
9790 
9791   - If the queue was empty, the thread is the leader for that stage
9792     and it should process the entire queue for that stage.
9793 
9794   - If the queue was not empty, the thread is a follower and can go
9795     waiting for the commit to finish.
9796 
9797   The function will lock the stage mutex if it was designated the
9798   leader for the phase.
9799 
9800   @param thd    Session structure
9801   @param stage  The stage to enter
9802   @param queue  Queue of threads to enqueue for the stage
9803   @param stage_mutex Mutex for the stage
9804 
9805   @retval true  The thread should "bail out" and go waiting for the
9806                 commit to finish
9807   @retval false The thread is the leader for the stage and should do
9808                 the processing.
9809 */
9810 
9811 bool
change_stage(THD * thd,Stage_manager::StageID stage,THD * queue,mysql_mutex_t * leave_mutex,mysql_mutex_t * enter_mutex)9812 MYSQL_BIN_LOG::change_stage(THD *thd,
9813                             Stage_manager::StageID stage, THD *queue,
9814                             mysql_mutex_t *leave_mutex,
9815                             mysql_mutex_t *enter_mutex)
9816 {
9817   DBUG_ENTER("MYSQL_BIN_LOG::change_stage");
9818   DBUG_PRINT("enter", ("thd: 0x%llx, stage: %s, queue: 0x%llx",
9819                        (ulonglong) thd, g_stage_name[stage], (ulonglong) queue));
9820   assert(0 <= stage && stage < Stage_manager::STAGE_COUNTER);
9821   assert(enter_mutex);
9822   assert(queue);
9823   /*
9824     enroll_for will release the leave_mutex once the sessions are
9825     queued.
9826   */
9827   if (!stage_manager.enroll_for(stage, queue, leave_mutex))
9828   {
9829     assert(!thd_get_cache_mngr(thd)->dbug_any_finalized());
9830     DBUG_RETURN(true);
9831   }
9832 
9833   /*
9834     We do not lock the enter_mutex if it is LOCK_log when rotating binlog
9835     caused by logging incident log event, since it is already locked.
9836   */
9837   bool need_lock_enter_mutex=
9838     !(is_rotating_caused_by_incident && enter_mutex == &LOCK_log);
9839 
9840   if (need_lock_enter_mutex)
9841     mysql_mutex_lock(enter_mutex);
9842   else
9843     mysql_mutex_assert_owner(enter_mutex);
9844 
9845   DBUG_RETURN(false);
9846 }
9847 
9848 
9849 
9850 /**
9851   Flush the I/O cache to file.
9852 
9853   Flush the binary log to the binlog file if any byte where written
9854   and signal that the binary log file has been updated if the flush
9855   succeeds.
9856 */
9857 
9858 int
flush_cache_to_file(my_off_t * end_pos_var)9859 MYSQL_BIN_LOG::flush_cache_to_file(my_off_t *end_pos_var)
9860 {
9861   if (flush_io_cache(&log_file))
9862   {
9863     THD *thd= current_thd;
9864     thd->commit_error= THD::CE_FLUSH_ERROR;
9865     return ER_ERROR_ON_WRITE;
9866   }
9867   *end_pos_var= my_b_tell(&log_file);
9868   return 0;
9869 }
9870 
9871 
9872 /**
9873   Call fsync() to sync the file to disk.
9874 */
9875 std::pair<bool, bool>
sync_binlog_file(bool force)9876 MYSQL_BIN_LOG::sync_binlog_file(bool force)
9877 {
9878   bool synced= false;
9879   unsigned int sync_period= get_sync_period();
9880   if (force || (sync_period && ++sync_counter >= sync_period))
9881   {
9882     sync_counter= 0;
9883 
9884     /**
9885       On *pure non-transactional* workloads there is a small window
9886       in time where a concurrent rotate might be able to close
9887       the file before the sync is actually done. In that case,
9888       ignore the bad file descriptor errors.
9889 
9890       Transactional workloads (InnoDB) are not affected since the
9891       the rotation will not happen until all transactions have
9892       committed to the storage engine, thence decreased the XID
9893       counters.
9894 
9895       TODO: fix this properly even for non-transactional storage
9896             engines.
9897      */
9898     if (DBUG_EVALUATE_IF("simulate_error_during_sync_binlog_file", 1,
9899                          mysql_file_sync(log_file.file,
9900                                          MYF(MY_WME | MY_IGNORE_BADFD))))
9901     {
9902       THD *thd= current_thd;
9903       thd->commit_error= THD::CE_SYNC_ERROR;
9904       return std::make_pair(true, synced);
9905     }
9906     synced= true;
9907   }
9908   return std::make_pair(false, synced);
9909 }
9910 
9911 
9912 /**
9913    Helper function executed when leaving @c ordered_commit.
9914 
9915    This function contain the necessary code for fetching the error
9916    code, doing post-commit checks, and wrapping up the commit if
9917    necessary.
9918 
9919    It is typically called when enter_stage indicates that the thread
9920    should bail out, and also when the ultimate leader thread finishes
9921    executing @c ordered_commit.
9922 
9923    It is typically used in this manner:
9924    @code
9925    if (enter_stage(thd, Thread_queue::FLUSH_STAGE, thd, &LOCK_log))
9926      return finish_commit(thd);
9927    @endcode
9928 
9929    @return Error code if the session commit failed, or zero on
9930    success.
9931  */
9932 int
finish_commit(THD * thd)9933 MYSQL_BIN_LOG::finish_commit(THD *thd)
9934 {
9935   DBUG_ENTER("MYSQL_BIN_LOG::finish_commit");
9936   DEBUG_SYNC(thd, "reached_finish_commit");
9937   /*
9938     In some unlikely situations, it can happen that binary
9939     log is closed before the thread flushes it's cache.
9940     In that case, clear the caches before doing commit.
9941   */
9942   if (unlikely(!is_open()))
9943   {
9944     binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
9945     if (cache_mngr)
9946       cache_mngr->reset();
9947   }
9948   if (thd->get_transaction()->sequence_number != SEQ_UNINIT)
9949   {
9950     mysql_mutex_lock(&LOCK_slave_trans_dep_tracker);
9951     m_dependency_tracker.update_max_committed(thd);
9952     mysql_mutex_unlock(&LOCK_slave_trans_dep_tracker);
9953   }
9954   if (thd->get_transaction()->m_flags.commit_low)
9955   {
9956     const bool all= thd->get_transaction()->m_flags.real_commit;
9957     /*
9958       Now flush error and sync erros are ignored and we are continuing and
9959       committing. And at this time, commit_error cannot be COMMIT_ERROR.
9960     */
9961     assert(thd->commit_error != THD::CE_COMMIT_ERROR);
9962 
9963     /*
9964       Acquire a shared lock to block commits if an X lock has been acquired by
9965       LOCK TABLES FOR BACKUP or START TRANSACTION WITH CONSISTENT SNAPSHOT. We
9966       only reach this code if binlog_order_commits=0.
9967     */
9968     assert(opt_binlog_order_commits == 0);
9969 
9970     slock();
9971 
9972     /*
9973       storage engine commit
9974     */
9975     if (ha_commit_low(thd, all, false))
9976       thd->commit_error= THD::CE_COMMIT_ERROR;
9977 
9978     sunlock();
9979     /*
9980       Decrement the prepared XID counter after storage engine commit
9981     */
9982     if (thd->get_transaction()->m_flags.xid_written)
9983       dec_prep_xids(thd);
9984     /*
9985       If commit succeeded, we call the after_commit hook
9986 
9987       TODO: This hook here should probably move outside/below this
9988             if and be the only after_commit invocation left in the
9989             code.
9990     */
9991     if ((thd->commit_error != THD::CE_COMMIT_ERROR) &&
9992         thd->get_transaction()->m_flags.run_hooks)
9993     {
9994       (void) RUN_HOOK(transaction, after_commit, (thd, all));
9995       thd->get_transaction()->m_flags.run_hooks= false;
9996     }
9997   }
9998   else if (thd->get_transaction()->m_flags.xid_written)
9999     dec_prep_xids(thd);
10000 
10001   /*
10002     If the ordered commit didn't updated the GTIDs for this thd yet
10003     at process_commit_stage_queue (i.e. --binlog-order-commits=0)
10004     the thd still has the ownership of a GTID and we must handle it.
10005   */
10006   if (!thd->owned_gtid.is_empty())
10007   {
10008     /*
10009       Gtid is added to gtid_state.executed_gtids and removed from owned_gtids
10010       on update_on_commit().
10011     */
10012     if (thd->commit_error == THD::CE_NONE)
10013     {
10014       gtid_state->update_on_commit(thd);
10015     }
10016     else
10017       gtid_state->update_on_rollback(thd);
10018   }
10019 
10020   DBUG_EXECUTE_IF("leaving_finish_commit",
10021                   {
10022                     const char act[]=
10023                       "now SIGNAL signal_leaving_finish_commit";
10024                     assert(!debug_sync_set_action(current_thd,
10025                                                   STRING_WITH_LEN(act)));
10026                   };);
10027 
10028   assert(thd->commit_error || !thd->get_transaction()->m_flags.run_hooks);
10029   assert(!thd_get_cache_mngr(thd)->dbug_any_finalized());
10030   DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d",
10031                         thd->thread_id(), thd->commit_error));
10032   /*
10033     flush or sync errors are handled by the leader of the group
10034     (using binlog_error_action). Hence treat only COMMIT_ERRORs as errors.
10035   */
10036   DBUG_RETURN(thd->commit_error == THD::CE_COMMIT_ERROR);
10037 }
10038 
10039 /**
10040    Auxiliary function used in ordered_commit.
10041 */
call_after_sync_hook(THD * queue_head)10042 static inline int call_after_sync_hook(THD *queue_head)
10043 {
10044   const char *log_file= NULL;
10045   my_off_t pos= 0;
10046 
10047   if (NO_HOOK(binlog_storage))
10048     return 0;
10049 
10050   assert(queue_head != NULL);
10051   for (THD *thd= queue_head; thd != NULL; thd= thd->next_to_commit)
10052     if (likely(thd->commit_error == THD::CE_NONE))
10053       thd->get_trans_fixed_pos(&log_file, &pos);
10054 
10055   if (DBUG_EVALUATE_IF("simulate_after_sync_hook_error", 1, 0) ||
10056       RUN_HOOK(binlog_storage, after_sync, (queue_head, log_file, pos)))
10057   {
10058     sql_print_error("Failed to run 'after_sync' hooks");
10059     return ER_ERROR_ON_WRITE;
10060   }
10061   return 0;
10062 }
10063 
10064 /**
10065   Helper function to handle flush or sync stage errors.
10066   If binlog_error_action= ABORT_SERVER, server will be aborted
10067   after reporting the error to the client.
10068   If binlog_error_action= IGNORE_ERROR, binlog will be closed
10069   for the reset of the life time of the server. close() call is protected
10070   with LOCK_log to avoid any parallel operations on binary log.
10071 
10072   @param thd Thread object that faced flush/sync error
10073   @param need_lock_log
10074                        > Indicates true if LOCk_log is needed before closing
10075                          binlog (happens when we are handling sync error)
10076                        > Indicates false if LOCK_log is already acquired
10077                          by the thread (happens when we are handling flush
10078                          error)
10079   @param message Message stating the reason of the failure
10080 
10081   @return void
10082 */
handle_binlog_flush_or_sync_error(THD * thd,bool need_lock_log,const char * message)10083 void MYSQL_BIN_LOG::handle_binlog_flush_or_sync_error(THD *thd,
10084                                                       bool need_lock_log,
10085                                                       const char* message)
10086 {
10087   char errmsg[MYSQL_ERRMSG_SIZE]= {0};
10088   if (!message)
10089     sprintf(errmsg, "An error occurred during %s stage of the commit. "
10090             "'binlog_error_action' is set to '%s'.",
10091             thd->commit_error== THD::CE_FLUSH_ERROR ? "flush" : "sync",
10092             binlog_error_action == ABORT_SERVER ? "ABORT_SERVER" : "IGNORE_ERROR");
10093   else
10094     strncpy(errmsg, message, MYSQL_ERRMSG_SIZE-1);
10095   if (binlog_error_action == ABORT_SERVER)
10096   {
10097     char err_buff[MYSQL_ERRMSG_SIZE + 25];
10098     sprintf(err_buff, "%s Server is being stopped.", errmsg);
10099     exec_binlog_error_action_abort(err_buff);
10100   }
10101   else
10102   {
10103     DEBUG_SYNC(thd, "before_binlog_closed_due_to_error");
10104     if (need_lock_log)
10105       mysql_mutex_lock(&LOCK_log);
10106     else
10107       mysql_mutex_assert_owner(&LOCK_log);
10108     /*
10109       It can happen that other group leader encountered
10110       error and already closed the binary log. So print
10111       error only if it is in open state. But we should
10112       call close() always just in case if the previous
10113       close did not close index file.
10114     */
10115     if (is_open())
10116     {
10117       sql_print_error("%s Hence turning logging off for the whole duration "
10118                       "of the MySQL server process. To turn it on again: fix "
10119                       "the cause, shutdown the MySQL server and restart it.",
10120                       errmsg);
10121     }
10122     close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT, false/*need_lock_log=false*/,
10123           true/*need_lock_index=true*/);
10124     /*
10125       If there is a write error (flush/sync stage) and if
10126       binlog_error_action=IGNORE_ERROR, clear the error
10127       and allow the commit to happen in storage engine.
10128     */
10129     if (check_write_error(thd)) { /* we have DA_ERROR */
10130       thd->clear_error(); /* sets thd->get_stmt_da()->status() to DA_EMPTY */
10131       /* For SQLCOM_COMMIT, ROLLBACK, ROLLBACK TO SAVEPOINT, there is already
10132       my_ok() in mysql_execute_command. Doing double my_ok() is not allowed. So
10133       we avoid that here */
10134       if (thd_sql_command(thd) != SQLCOM_COMMIT &&
10135           thd_sql_command(thd) != SQLCOM_ROLLBACK &&
10136           thd_sql_command(thd) != SQLCOM_ROLLBACK_TO_SAVEPOINT) {
10137         my_ok(thd); /* sets thd->get_stmt_da()->status() to DA_OK */
10138       }
10139     }
10140 
10141     if (need_lock_log)
10142       mysql_mutex_unlock(&LOCK_log);
10143     DEBUG_SYNC(thd, "after_binlog_closed_due_to_error");
10144   }
10145 }
10146 /**
10147   Flush and commit the transaction.
10148 
10149   This will execute an ordered flush and commit of all outstanding
10150   transactions and is the main function for the binary log group
10151   commit logic. The function performs the ordered commit in two
10152   phases.
10153 
10154   The first phase flushes the caches to the binary log and under
10155   LOCK_log and marks all threads that were flushed as not pending.
10156 
10157   The second phase executes under LOCK_commit and commits all
10158   transactions in order.
10159 
10160   The procedure is:
10161 
10162   1. Queue ourselves for flushing.
10163   2. Grab the log lock, which might result is blocking if the mutex is
10164      already held by another thread.
10165   3. If we were not committed while waiting for the lock
10166      1. Fetch the queue
10167      2. For each thread in the queue:
10168         a. Attach to it
10169         b. Flush the caches, saving any error code
10170      3. Flush and sync (depending on the value of sync_binlog).
10171      4. Signal that the binary log was updated
10172   4. Release the log lock
10173   5. Grab the commit lock
10174      1. For each thread in the queue:
10175         a. If there were no error when flushing and the transaction shall be committed:
10176            - Commit the transaction, saving the result of executing the commit.
10177   6. Release the commit lock
10178   7. Call purge, if any of the committed thread requested a purge.
10179   8. Return with the saved error code
10180 
10181   @todo The use of @c skip_commit is a hack that we use since the @c
10182   TC_LOG Interface does not contain functions to handle
10183   savepoints. Once the binary log is eliminated as a handlerton and
10184   the @c TC_LOG interface is extended with savepoint handling, this
10185   parameter can be removed.
10186 
10187   @param thd Session to commit transaction for
10188   @param all   This is @c true if this is a real transaction commit, and
10189                @c false otherwise.
10190   @param skip_commit
10191                This is @c true if the call to @c ha_commit_low should
10192                be skipped (it is handled by the caller somehow) and @c
10193                false otherwise (the normal case).
10194  */
prepare_ordered_commit(THD * thd,bool all,bool skip_commit)10195 int MYSQL_BIN_LOG::prepare_ordered_commit(THD *thd, bool all,
10196                                           bool skip_commit)
10197 {
10198   DBUG_ENTER("MYSQL_BIN_LOG::prepare_ordered_commit");
10199 
10200   /*
10201     These values are used while flushing a transaction, so clear
10202     everything.
10203 
10204     Notes:
10205 
10206     - It would be good if we could keep transaction coordinator
10207       log-specific data out of the THD structure, but that is not the
10208       case right now.
10209 
10210     - Everything in the transaction structure is reset when calling
10211       ha_commit_low since that calls Transaction_ctx::cleanup.
10212   */
10213   thd->get_transaction()->m_flags.pending= true;
10214   thd->commit_error= THD::CE_NONE;
10215   thd->next_to_commit= NULL;
10216   thd->durability_property= HA_IGNORE_DURABILITY;
10217   thd->get_transaction()->m_flags.real_commit= all;
10218   thd->get_transaction()->m_flags.xid_written= false;
10219   thd->get_transaction()->m_flags.commit_low= !skip_commit;
10220   thd->get_transaction()->m_flags.run_hooks= !skip_commit;
10221 #ifndef NDEBUG
10222   /*
10223      The group commit Leader may have to wait for follower whose transaction
10224      is not ready to be preempted. Initially the status is pessimistic.
10225      Preemption guarding logics is necessary only when !NDEBUG is set.
10226      It won't be required for the dbug-off case as long as the follower won't
10227      execute any thread-specific write access code in this method, which is
10228      the case as of current.
10229   */
10230   thd->get_transaction()->m_flags.ready_preempt= 0;
10231 #endif
10232 
10233   DBUG_PRINT("enter", ("flags.pending: %s, commit_error: %d, thread_id: %u",
10234                        YESNO(thd->get_transaction()->m_flags.pending),
10235                        thd->commit_error, thd->thread_id()));
10236 
10237   DEBUG_SYNC(thd, "bgc_before_flush_stage");
10238 
10239   /*
10240     Stage #1: flushing transactions to binary log
10241 
10242     While flushing, we allow new threads to enter and will process
10243     them in due time. Once the queue was empty, we cannot reap
10244     anything more since it is possible that a thread entered and
10245     appointed itself leader for the flush phase.
10246   */
10247 
10248 #ifdef HAVE_REPLICATION
10249   if (has_commit_order_manager(thd))
10250   {
10251     Slave_worker *worker= dynamic_cast<Slave_worker *>(thd->rli_slave);
10252     Commit_order_manager *mngr= worker->get_commit_order_manager();
10253 
10254     if (mngr->wait_for_its_turn(worker, all))
10255     {
10256       thd->commit_error= THD::CE_COMMIT_ERROR;
10257       DBUG_RETURN(thd->commit_error);
10258     }
10259   }
10260 #endif
10261 
10262   DBUG_RETURN(0); /* no error */
10263 }
10264 
10265 
ordered_commit(THD * thd)10266 int MYSQL_BIN_LOG::ordered_commit(THD *thd)
10267 {
10268   DBUG_ENTER("MYSQL_BIN_LOG::ordered_commit");
10269   int      flush_error= 0, sync_error= 0;
10270   my_off_t total_bytes= 0;
10271   bool     do_rotate= false;
10272 
10273   if (change_stage(thd, Stage_manager::FLUSH_STAGE, thd, NULL, &LOCK_log))
10274   {
10275     DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d",
10276                           thd->thread_id(), thd->commit_error));
10277     DBUG_RETURN(finish_commit(thd));
10278   }
10279 
10280   THD *wait_queue= NULL, *final_queue= NULL;
10281   mysql_mutex_t *leave_mutex_before_commit_stage= NULL;
10282   my_off_t flush_end_pos= 0;
10283   bool update_binlog_end_pos_after_sync;
10284   if (unlikely(!is_open()))
10285   {
10286     final_queue= stage_manager.fetch_queue_for(Stage_manager::FLUSH_STAGE);
10287     leave_mutex_before_commit_stage= &LOCK_log;
10288     /*
10289       binary log is closed, flush stage and sync stage should be
10290       ignored. Binlog cache should be cleared, but instead of doing
10291       it here, do that work in 'finish_commit' function so that
10292       leader and followers thread caches will be cleared.
10293     */
10294     goto commit_stage;
10295   }
10296   DEBUG_SYNC(thd, "waiting_in_the_middle_of_flush_stage");
10297   flush_error= process_flush_stage_queue(&total_bytes, &do_rotate,
10298                                                  &wait_queue);
10299 
10300   if (flush_error == 0 && total_bytes > 0)
10301     flush_error= flush_cache_to_file(&flush_end_pos);
10302   DBUG_EXECUTE_IF("crash_after_flush_binlog", DBUG_SUICIDE(););
10303 
10304   update_binlog_end_pos_after_sync= (get_sync_period() == 1);
10305 
10306   /*
10307     If the flush finished successfully, we can call the after_flush
10308     hook. Being invoked here, we have the guarantee that the hook is
10309     executed before the before/after_send_hooks on the dump thread
10310     preventing race conditions among these plug-ins.
10311   */
10312   if (flush_error == 0)
10313   {
10314     const char *file_name_ptr= log_file_name + dirname_length(log_file_name);
10315     assert(flush_end_pos != 0);
10316     if (RUN_HOOK(binlog_storage, after_flush,
10317                  (thd, file_name_ptr, flush_end_pos)))
10318     {
10319       sql_print_error("Failed to run 'after_flush' hooks");
10320       flush_error= ER_ERROR_ON_WRITE;
10321     }
10322 
10323     if (!update_binlog_end_pos_after_sync)
10324       update_binlog_end_pos();
10325     DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
10326   }
10327 
10328   if (flush_error)
10329   {
10330     /*
10331       Handle flush error (if any) after leader finishes it's flush stage.
10332     */
10333     handle_binlog_flush_or_sync_error(thd, false /* need_lock_log */,
10334               (thd->commit_error == THD::CE_FLUSH_GNO_EXHAUSTED_ERROR)
10335               ? ER(ER_GNO_EXHAUSTED) : NULL);
10336   }
10337 
10338   publish_coordinates_for_global_status();
10339 
10340   DEBUG_SYNC(thd, "bgc_after_flush_stage_before_sync_stage");
10341 
10342   /*
10343     Stage #2: Syncing binary log file to disk
10344   */
10345 
10346   if (change_stage(thd, Stage_manager::SYNC_STAGE, wait_queue, &LOCK_log, &LOCK_sync))
10347   {
10348     DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d",
10349                           thd->thread_id(), thd->commit_error));
10350     DBUG_RETURN(finish_commit(thd));
10351   }
10352 
10353   /*
10354     Shall introduce a delay only if it is going to do sync
10355     in this ongoing SYNC stage. The "+1" used below in the
10356     if condition is to count the ongoing sync stage.
10357     When sync_binlog=0 (where we never do sync in BGC group),
10358     it is considered as a special case and delay will be executed
10359     for every group just like how it is done when sync_binlog= 1.
10360   */
10361   if (!flush_error && (sync_counter + 1 >= get_sync_period()))
10362     stage_manager.wait_count_or_timeout(opt_binlog_group_commit_sync_no_delay_count,
10363                                         opt_binlog_group_commit_sync_delay,
10364                                         Stage_manager::SYNC_STAGE);
10365 
10366   final_queue= stage_manager.fetch_queue_for(Stage_manager::SYNC_STAGE);
10367 
10368   if (flush_error == 0 && total_bytes > 0)
10369   {
10370     DEBUG_SYNC(thd, "before_sync_binlog_file");
10371     std::pair<bool, bool> result= sync_binlog_file(false);
10372     sync_error= result.first;
10373   }
10374 
10375   if (update_binlog_end_pos_after_sync)
10376   {
10377     THD *tmp_thd= final_queue;
10378     const char *binlog_file= NULL;
10379     my_off_t pos= 0;
10380     while (tmp_thd->next_to_commit != NULL)
10381       tmp_thd= tmp_thd->next_to_commit;
10382     if (flush_error == 0 && sync_error == 0)
10383     {
10384       tmp_thd->get_trans_fixed_pos(&binlog_file, &pos);
10385       update_binlog_end_pos(binlog_file, pos);
10386     }
10387   }
10388 
10389   DEBUG_SYNC(thd, "bgc_after_sync_stage_before_commit_stage");
10390 
10391   leave_mutex_before_commit_stage= &LOCK_sync;
10392   /*
10393     Stage #3: Commit all transactions in order.
10394 
10395     This stage is skipped if we do not need to order the commits and
10396     each thread have to execute the handlerton commit instead.
10397 
10398     Howver, since we are keeping the lock from the previous stage, we
10399     need to unlock it if we skip the stage.
10400 
10401     We must also step commit_clock before the ha_commit_low() is called
10402     either in ordered fashion(by the leader of this stage) or by the tread
10403     themselves.
10404 
10405     We are delaying the handling of sync error until
10406     all locks are released but we should not enter into
10407     commit stage if binlog_error_action is ABORT_SERVER.
10408   */
10409 commit_stage:
10410   if (opt_binlog_order_commits &&
10411       (sync_error == 0 || binlog_error_action != ABORT_SERVER))
10412   {
10413     if (change_stage(thd, Stage_manager::COMMIT_STAGE,
10414                      final_queue, leave_mutex_before_commit_stage,
10415                      &LOCK_commit))
10416     {
10417       DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d",
10418                             thd->thread_id(), thd->commit_error));
10419       DBUG_RETURN(finish_commit(thd));
10420     }
10421     THD *commit_queue= stage_manager.fetch_queue_for(Stage_manager::COMMIT_STAGE);
10422     DBUG_EXECUTE_IF("semi_sync_3-way_deadlock",
10423                     DEBUG_SYNC(thd, "before_process_commit_stage_queue"););
10424 
10425     if (flush_error == 0 && sync_error == 0)
10426       sync_error= call_after_sync_hook(commit_queue);
10427 
10428     /*
10429       process_commit_stage_queue will call update_on_commit or
10430       update_on_rollback for the GTID owned by each thd in the queue.
10431 
10432       This will be done this way to guarantee that GTIDs are added to
10433       gtid_executed in order, to avoid creating unnecessary temporary
10434       gaps and keep gtid_executed as a single interval at all times.
10435 
10436       If we allow each thread to call update_on_commit only when they
10437       are at finish_commit, the GTID order cannot be guaranteed and
10438       temporary gaps may appear in gtid_executed. When this happen,
10439       the server would have to add and remove intervals from the
10440       Gtid_set, and adding and removing intervals requires a mutex,
10441       which would reduce performance.
10442     */
10443     process_commit_stage_queue(thd, commit_queue);
10444     mysql_mutex_unlock(&LOCK_commit);
10445     /*
10446       Process after_commit after LOCK_commit is released for avoiding
10447       3-way deadlock among user thread, rotate thread and dump thread.
10448     */
10449     process_after_commit_stage_queue(thd, commit_queue);
10450     final_queue= commit_queue;
10451   }
10452   else
10453   {
10454     if (leave_mutex_before_commit_stage)
10455       mysql_mutex_unlock(leave_mutex_before_commit_stage);
10456     if (flush_error == 0 && sync_error == 0)
10457       sync_error= call_after_sync_hook(final_queue);
10458   }
10459 
10460   /*
10461     Handle sync error after we release all locks in order to avoid deadlocks
10462   */
10463   if (sync_error)
10464     handle_binlog_flush_or_sync_error(thd, true /* need_lock_log */, NULL);
10465 
10466   /* Commit done so signal all waiting threads */
10467   stage_manager.signal_done(final_queue);
10468 
10469   /*
10470     Finish the commit before executing a rotate, or run the risk of a
10471     deadlock. We don't need the return value here since it is in
10472     thd->commit_error, which is returned below.
10473   */
10474   (void) finish_commit(thd);
10475 
10476   /*
10477     If we need to rotate, we do it without commit error.
10478     Otherwise the thd->commit_error will be possibly reset.
10479    */
10480   if (DBUG_EVALUATE_IF("force_rotate", 1, 0) ||
10481       (do_rotate && thd->commit_error == THD::CE_NONE &&
10482        !is_rotating_caused_by_incident))
10483   {
10484     /*
10485       Do not force the rotate as several consecutive groups may
10486       request unnecessary rotations.
10487 
10488       NOTE: Run purge_logs wo/ holding LOCK_log because it does not
10489       need the mutex. Otherwise causes various deadlocks.
10490     */
10491 
10492     DEBUG_SYNC(thd, "ready_to_do_rotation");
10493     bool check_purge= false;
10494     mysql_mutex_lock(&LOCK_log);
10495     /*
10496       If rotate fails then depends on binlog_error_action variable
10497       appropriate action will be taken inside rotate call.
10498     */
10499     int error= rotate(false, &check_purge);
10500     mysql_mutex_unlock(&LOCK_log);
10501 
10502     if (error)
10503       thd->commit_error= THD::CE_COMMIT_ERROR;
10504     else if (check_purge)
10505       purge();
10506   }
10507 
10508 #ifdef HAVE_REPLICATION
10509   if (binlog_space_limit && binlog_space_total &&
10510       binlog_space_total + my_b_tell(&log_file) > binlog_space_limit)
10511     purge_logs_by_size(true);
10512 #endif
10513 
10514   /*
10515     flush or sync errors are handled above (using binlog_error_action).
10516     Hence treat only COMMIT_ERRORs as errors.
10517   */
10518   DBUG_RETURN(thd->commit_error == THD::CE_COMMIT_ERROR);
10519 }
10520 
10521 
10522 /**
10523   MYSQLD server recovers from last crashed binlog.
10524 
10525   @param log           IO_CACHE of the crashed binlog.
10526   @param fdle          Format_description_log_event of the crashed binlog.
10527   @param valid_pos     The position of the last valid transaction or
10528                        event(non-transaction) of the crashed binlog.
10529 
10530   @retval
10531     0                  ok
10532   @retval
10533     1                  error
10534 */
recover(IO_CACHE * log,Format_description_log_event * fdle,my_off_t * valid_pos)10535 int MYSQL_BIN_LOG::recover(IO_CACHE *log, Format_description_log_event *fdle,
10536                             my_off_t *valid_pos)
10537 {
10538   Log_event  *ev;
10539   HASH xids;
10540   MEM_ROOT mem_root;
10541   /*
10542     The flag is used for handling the case that a transaction
10543     is partially written to the binlog.
10544   */
10545   bool in_transaction= FALSE;
10546   int memory_page_size= my_getpagesize();
10547 
10548   if (! fdle->is_valid() ||
10549       my_hash_init(&xids, &my_charset_bin, memory_page_size/3, 0,
10550                    sizeof(my_xid), 0, 0, 0,
10551                    key_memory_binlog_recover_exec))
10552     goto err1;
10553 
10554   init_alloc_root(key_memory_binlog_recover_exec,
10555                   &mem_root, memory_page_size, memory_page_size);
10556 
10557   while ((ev= Log_event::read_log_event(log, 0, fdle, TRUE))
10558          && ev->is_valid())
10559   {
10560     if (ev->get_type_code() == binary_log::QUERY_EVENT &&
10561         !strcmp(((Query_log_event*)ev)->query, "BEGIN"))
10562       in_transaction= TRUE;
10563 
10564     if (ev->get_type_code() == binary_log::QUERY_EVENT &&
10565         !strcmp(((Query_log_event*)ev)->query, "COMMIT"))
10566     {
10567       assert(in_transaction == TRUE);
10568       in_transaction= FALSE;
10569     }
10570     else if (ev->get_type_code() == binary_log::XID_EVENT)
10571     {
10572       assert(in_transaction == TRUE);
10573       in_transaction= FALSE;
10574       Xid_log_event *xev=(Xid_log_event *)ev;
10575       uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
10576                                       sizeof(xev->xid));
10577       if (!x || my_hash_insert(&xids, x))
10578         goto err2;
10579     }
10580     else if (ev->get_type_code() == binary_log::START_ENCRYPTION_EVENT &&
10581              fdle->start_decryption(static_cast<Start_encryption_log_event*>(ev)))
10582     {
10583       sql_print_warning("Error initializing decryption while crash_recovery.");
10584       goto err2;
10585     }
10586 
10587     /*
10588       Recorded valid position for the crashed binlog file
10589       which did not contain incorrect events. The following
10590       positions increase the variable valid_pos:
10591 
10592       1 -
10593         ...
10594         <---> HERE IS VALID <--->
10595         GTID
10596         BEGIN
10597         ...
10598         COMMIT
10599         ...
10600 
10601       2 -
10602         ...
10603         <---> HERE IS VALID <--->
10604         GTID
10605         DDL/UTILITY
10606         ...
10607 
10608       In other words, the following positions do not increase
10609       the variable valid_pos:
10610 
10611       1 -
10612         GTID
10613         <---> HERE IS VALID <--->
10614         ...
10615 
10616       2 -
10617         GTID
10618         BEGIN
10619         <---> HERE IS VALID <--->
10620         ...
10621     */
10622     if (!log->error && !in_transaction &&
10623         !is_gtid_event(ev))
10624       *valid_pos= my_b_tell(log);
10625 
10626     delete ev;
10627   }
10628 
10629   /*
10630     Call ha_recover if and only if there is a registered engine that
10631     does 2PC, otherwise in DBUG builds calling ha_recover directly
10632     will result in an assert. (Production builds would be safe since
10633     ha_recover returns right away if total_ha_2pc <= opt_log_bin.)
10634    */
10635   if (total_ha_2pc > 1 && ha_recover(&xids))
10636     goto err2;
10637 
10638   free_root(&mem_root, MYF(0));
10639   my_hash_free(&xids);
10640   return 0;
10641 
10642 err2:
10643   free_root(&mem_root, MYF(0));
10644   my_hash_free(&xids);
10645 err1:
10646   sql_print_error("Crash recovery failed. Either correct the problem "
10647                   "(if it's, for example, out of memory error) and restart, "
10648                   "or delete (or rename) binary log and start mysqld with "
10649                   "--tc-heuristic-recover={commit|rollback}");
10650   return 1;
10651 }
10652 
10653 /*
10654   Copy out the non-directory part of binlog position filename for the
10655   `binlog_snapshot_file' status variable, same way as it is done for
10656   SHOW MASTER STATUS.
10657 */
set_binlog_snapshot_file(const char * src)10658 static void set_binlog_snapshot_file(const char *src)
10659 {
10660   mysql_mutex_assert_owner(&LOCK_status);
10661 
10662   int dir_len = dirname_length(src);
10663   strmake(binlog_snapshot_file, src + dir_len,
10664           sizeof(binlog_snapshot_file) - 1);
10665 }
10666 
10667 /** Copy the current binlog coordinates to the variables used for the
10668 not-in-consistent-snapshot case of SHOW STATUS */
publish_coordinates_for_global_status(void) const10669 void MYSQL_BIN_LOG::publish_coordinates_for_global_status(void) const
10670 {
10671   mysql_mutex_assert_owner(&LOCK_log);
10672 
10673   mysql_mutex_lock(&LOCK_status);
10674   strcpy(binlog_global_snapshot_file, log_file_name);
10675   binlog_global_snapshot_position=
10676       my_b_inited(&log_file) ? my_b_tell(&log_file) : 0;
10677   mysql_mutex_unlock(&LOCK_status);
10678 }
10679 
10680 
xlock(void)10681 void MYSQL_BIN_LOG::xlock(void)
10682 {
10683   mysql_mutex_lock(&LOCK_log);
10684 
10685   assert(!snapshot_lock_acquired);
10686 
10687   /*
10688     We must ensure that no writes to binlog and no commits to storage engines
10689     occur after function is called for START TRANSACTION FOR CONSISTENT
10690     SNAPSHOT. With binlog_order_commits=1 (the default) flushing to binlog is
10691     performed under the LOCK_log mutex and commits are done under the
10692     LOCK_commit mutex, both in the stage leader thread. So acquiring those 2
10693     mutexes is sufficient to guarantee atomicity.
10694 
10695     With binlog_order_commits=0 commits are performed in parallel by separate
10696     threads with each acquiring a shared lock on LOCK_consistent_snapshot.
10697 
10698     binlog_order_commits is a dynamic variable, so we have to keep track what
10699     primitives should be used in xunlock().
10700   */
10701   if (opt_binlog_order_commits)
10702   {
10703     mysql_mutex_lock(&LOCK_commit);
10704   }
10705   else
10706   {
10707     snapshot_lock_acquired= true;
10708     mysql_rwlock_wrlock(&LOCK_consistent_snapshot);
10709   }
10710 }
10711 
10712 
xunlock(void)10713 void MYSQL_BIN_LOG::xunlock(void)
10714 {
10715   if (!snapshot_lock_acquired)
10716   {
10717     mysql_mutex_unlock(&LOCK_commit);
10718   }
10719   else
10720   {
10721     mysql_rwlock_unlock(&LOCK_consistent_snapshot);
10722     snapshot_lock_acquired= false;
10723   }
10724 
10725   mysql_mutex_unlock(&LOCK_log);
10726 }
10727 
report_missing_purged_gtids(const Gtid_set * slave_executed_gtid_set,std::string & errmsg)10728 void MYSQL_BIN_LOG::report_missing_purged_gtids(
10729     const Gtid_set *slave_executed_gtid_set, std::string &errmsg)
10730 {
10731   DBUG_ENTER("MYSQL_BIN_LOG::report_missing_purged_gtids");
10732   THD *thd= current_thd;
10733   Gtid_set gtid_missing(gtid_state->get_lost_gtids()->get_sid_map());
10734   gtid_missing.add_gtid_set(gtid_state->get_lost_gtids());
10735   gtid_missing.remove_gtid_set(slave_executed_gtid_set);
10736 
10737   String tmp_uuid;
10738   uchar name[]= "slave_uuid";
10739 
10740   /* Protects thd->user_vars. */
10741   mysql_mutex_lock(&thd->LOCK_thd_data);
10742   user_var_entry *entry=
10743     (user_var_entry*) my_hash_search(&thd->user_vars, name, sizeof(name)-1);
10744   if (entry && entry->length() > 0)
10745     tmp_uuid.copy(entry->ptr(), entry->length(), NULL);
10746   mysql_mutex_unlock(&thd->LOCK_thd_data);
10747 
10748 
10749   char* missing_gtids= NULL;
10750   char* slave_executed_gtids= NULL;
10751   gtid_missing.to_string(&missing_gtids);
10752   slave_executed_gtid_set->to_string(&slave_executed_gtids);
10753 
10754   /*
10755      Log the information about the missing purged GTIDs to the error log
10756      if the message is less than MAX_LOG_BUFFER_SIZE.
10757   */
10758   std::ostringstream log_info;
10759   log_info << "The missing transactions are '"<< missing_gtids <<"'";
10760   const char* log_msg= ER(ER_FOUND_MISSING_GTIDS);
10761 
10762   /* Don't consider the "%s" in the format string. Subtract 2 from the
10763      total length */
10764   uint total_length= (strlen(log_msg) - 2 + log_info.str().length());
10765 
10766   DBUG_EXECUTE_IF("simulate_long_missing_gtids",
10767                   { total_length= MAX_LOG_BUFFER_SIZE + 1;});
10768 
10769   if (total_length > MAX_LOG_BUFFER_SIZE)
10770     log_info.str("To find the missing purged transactions, run \"SELECT"
10771                  " @@GLOBAL.GTID_PURGED\" on the master, then run \"SELECT"
10772                  " CONCAT(RECEIVED_TRANSACTION_SET, ',', @@GLOBAL.GTID_EXECUTED)"
10773                  " FROM PERFORMANCE_SCHEMA.replication_connection_status\" on"
10774                  " the slave, and then run \"SELECT GTID_SUBTRACT(<master_set>,"
10775                  " <slave_set>)\" on any server");
10776 
10777   sql_print_warning(ER_THD(thd, ER_FOUND_MISSING_GTIDS), tmp_uuid.ptr(),
10778                     log_info.str().c_str());
10779 
10780   /*
10781      Send the information about the slave executed GTIDs and missing
10782      purged GTIDs to slave if the message is less than MYSQL_ERRMSG_SIZE.
10783   */
10784   std::ostringstream gtid_info;
10785   gtid_info << "The GTID set sent by the slave is '" << slave_executed_gtids
10786             << "', and the missing transactions are '"<< missing_gtids <<"'";
10787   errmsg.assign(ER_THD(thd, ER_MASTER_HAS_PURGED_REQUIRED_GTIDS));
10788 
10789   /* Don't consider the "%s" in the format string. Subtract 2 from the
10790      total length */
10791   total_length= (errmsg.length() - 2 + gtid_info.str().length());
10792 
10793   DBUG_EXECUTE_IF("simulate_long_missing_gtids",
10794                   { total_length= MYSQL_ERRMSG_SIZE + 1;});
10795 
10796   if (total_length > MYSQL_ERRMSG_SIZE)
10797     gtid_info.str("The GTID sets and the missing purged transactions are too"
10798                   " long to print in this message. For more information,"
10799                   " please see the master's error log or the manual for"
10800                   " GTID_SUBTRACT");
10801 
10802   /* Buffer for formatting the message about the missing GTIDs. */
10803   char buff[MYSQL_ERRMSG_SIZE];
10804   my_snprintf(buff, MYSQL_ERRMSG_SIZE, errmsg.c_str(), gtid_info.str().c_str());
10805   errmsg.assign(const_cast<const char*>(buff));
10806 
10807   my_free(missing_gtids);
10808   my_free(slave_executed_gtids);
10809   DBUG_VOID_RETURN;
10810 }
10811 
report_missing_gtids(const Gtid_set * previous_gtid_set,const Gtid_set * slave_executed_gtid_set,std::string & errmsg)10812 void MYSQL_BIN_LOG::report_missing_gtids(const Gtid_set* previous_gtid_set,
10813                                          const Gtid_set* slave_executed_gtid_set,
10814                                          std::string& errmsg)
10815 {
10816   DBUG_ENTER("MYSQL_BIN_LOG::report_missing_gtids");
10817   THD *thd=current_thd;
10818   char* missing_gtids= NULL;
10819   char* slave_executed_gtids= NULL;
10820   Gtid_set gtid_missing(slave_executed_gtid_set->get_sid_map());
10821   gtid_missing.add_gtid_set(slave_executed_gtid_set);
10822   gtid_missing.remove_gtid_set(previous_gtid_set);
10823   gtid_missing.to_string(&missing_gtids);
10824   slave_executed_gtid_set->to_string(&slave_executed_gtids);
10825 
10826   String tmp_uuid;
10827   uchar name[]= "slave_uuid";
10828 
10829   /* Protects thd->user_vars. */
10830   mysql_mutex_lock(&thd->LOCK_thd_data);
10831 
10832   user_var_entry *entry=
10833     (user_var_entry*) my_hash_search(&thd->user_vars, name, sizeof(name)-1);
10834   if (entry && entry->length() > 0)
10835     tmp_uuid.copy(entry->ptr(), entry->length(), NULL);
10836   mysql_mutex_unlock(&thd->LOCK_thd_data);
10837 
10838   /*
10839      Log the information about the missing purged GTIDs to the error log
10840      if the message is less than MAX_LOG_BUFFER_SIZE.
10841   */
10842   std::ostringstream log_info;
10843   log_info << "If the binary log files have been deleted from disk,"
10844       " check the consistency of 'GTID_PURGED' variable."
10845       " The missing transactions are '"<< missing_gtids <<"'";
10846   const char* log_msg= ER(ER_FOUND_MISSING_GTIDS);
10847 
10848   /* Don't consider the "%s" in the format string. Subtract 2 from the
10849      total length */
10850   if ((strlen(log_msg) - 2 + log_info.str().length()) > MAX_LOG_BUFFER_SIZE)
10851     log_info.str("To find the missing purged transactions, run \"SELECT"
10852                  " @@GLOBAL.GTID_PURGED\" on the master, then run \"SELECT"
10853                  " CONCAT(RECEIVED_TRANSACTION_SET, ',', @@GLOBAL.GTID_EXECUTED)"
10854                  " FROM PERFORMANCE_SCHEMA.replication_connection_status\" on"
10855                  " the slave, and then run \"SELECT GTID_SUBTRACT(<master_set>,"
10856                  " <slave_set>)\" on any server");
10857 
10858   sql_print_warning(ER_THD(thd, ER_FOUND_MISSING_GTIDS), tmp_uuid.ptr(),
10859                     log_info.str().c_str());
10860 
10861   /*
10862      Send the information about the slave executed GTIDs and missing
10863      purged GTIDs to slave if the message is less than MYSQL_ERRMSG_SIZE.
10864   */
10865   std::ostringstream gtid_info;
10866   gtid_info << "The GTID set sent by the slave is '" << slave_executed_gtids
10867             << "', and the missing transactions are '"<< missing_gtids <<"'";
10868   errmsg.assign(ER_THD(thd, ER_MASTER_HAS_PURGED_REQUIRED_GTIDS));
10869 
10870   /* Don't consider the "%s" in the format string. Subtract 2 from the
10871      total length */
10872   if ((errmsg.length() - 2 + gtid_info.str().length()) > MYSQL_ERRMSG_SIZE)
10873     gtid_info.str("The GTID sets and the missing purged transactions are too"
10874                   " long to print in this message. For more information,"
10875                   " please see the master's error log or the manual for"
10876                   " GTID_SUBTRACT");
10877   /* Buffer for formatting the message about the missing GTIDs. */
10878   char buff[MYSQL_ERRMSG_SIZE];
10879   my_snprintf(buff, MYSQL_ERRMSG_SIZE, errmsg.c_str(), gtid_info.str().c_str());
10880   errmsg.assign(const_cast<const char*>(buff));
10881 
10882   my_free(missing_gtids);
10883   my_free(slave_executed_gtids);
10884 
10885   DBUG_VOID_RETURN;
10886 }
10887 
is_binlog_cache_empty(bool is_transactional)10888 bool THD::is_binlog_cache_empty(bool is_transactional)
10889 {
10890   DBUG_ENTER("THD::is_binlog_cache_empty(bool)");
10891 
10892   // If opt_bin_log==0, it is not safe to call thd_get_cache_mngr
10893   // because binlog_hton has not been completely set up.
10894   assert(opt_bin_log);
10895   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(this);
10896 
10897   // cache_mngr is NULL until we call thd->binlog_setup_trx_data, so
10898   // we assert that this has been done.
10899   assert(cache_mngr != NULL);
10900 
10901   binlog_cache_data *cache_data=
10902     cache_mngr->get_binlog_cache_data(is_transactional);
10903   assert(cache_data != NULL);
10904 
10905   DBUG_RETURN(cache_data->is_binlog_empty());
10906 }
10907 
10908 /*
10909   These functions are placed in this file since they need access to
10910   binlog_hton, which has internal linkage.
10911 */
10912 
binlog_setup_trx_data()10913 int THD::binlog_setup_trx_data()
10914 {
10915   DBUG_ENTER("THD::binlog_setup_trx_data");
10916   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(this);
10917 
10918   if (cache_mngr)
10919     DBUG_RETURN(0);                             // Already set up
10920 
10921   IO_CACHE stmt_cache_log, trx_cache_log;
10922   memset(&stmt_cache_log, 0, sizeof(stmt_cache_log));
10923   memset(&trx_cache_log, 0, sizeof(trx_cache_log));
10924 
10925   cache_mngr= (binlog_cache_mngr*) my_malloc(key_memory_binlog_cache_mngr,
10926                                              sizeof(binlog_cache_mngr), MYF(MY_ZEROFILL));
10927   if (!cache_mngr)
10928   {
10929     DBUG_RETURN(1);
10930   }
10931   if (open_cached_file(&stmt_cache_log, mysql_tmpdir,
10932                        LOG_PREFIX, binlog_stmt_cache_size, MYF(MY_WME)))
10933   {
10934     my_free(cache_mngr);
10935     DBUG_RETURN(1);                      // Didn't manage to set it up
10936   }
10937   if (open_cached_file(&trx_cache_log, mysql_tmpdir,
10938                        LOG_PREFIX, binlog_cache_size, MYF(MY_WME)))
10939   {
10940     close_cached_file(&stmt_cache_log);
10941     my_free(cache_mngr);
10942     DBUG_RETURN(1);
10943   }
10944   DBUG_PRINT("debug", ("Set ha_data slot %d to 0x%llx", binlog_hton->slot, (ulonglong) cache_mngr));
10945   thd_set_ha_data(this, binlog_hton, cache_mngr);
10946 
10947   cache_mngr= new (thd_get_cache_mngr(this))
10948               binlog_cache_mngr(max_binlog_stmt_cache_size,
10949                                 &binlog_stmt_cache_use,
10950                                 &binlog_stmt_cache_disk_use,
10951                                 max_binlog_cache_size,
10952                                 &binlog_cache_use,
10953                                 &binlog_cache_disk_use,
10954                                 stmt_cache_log,
10955                                 trx_cache_log);
10956   DBUG_RETURN(0);
10957 }
10958 
10959 /**
10960 
10961 */
register_binlog_handler(THD * thd,bool trx)10962 void register_binlog_handler(THD *thd, bool trx)
10963 {
10964   DBUG_ENTER("register_binlog_handler");
10965   /*
10966     If this is the first call to this function while processing a statement,
10967     the transactional cache does not have a savepoint defined. So, in what
10968     follows:
10969       . an implicit savepoint is defined;
10970       . callbacks are registered;
10971       . binary log is set as read/write.
10972 
10973     The savepoint allows for truncating the trx-cache transactional changes
10974     fail. Callbacks are necessary to flush caches upon committing or rolling
10975     back a statement or a transaction. However, notifications do not happen
10976     if the binary log is set as read/write.
10977   */
10978   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
10979   if (cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF)
10980   {
10981     /*
10982       Set an implicit savepoint in order to be able to truncate a trx-cache.
10983     */
10984     my_off_t pos= 0;
10985     binlog_trans_log_savepos(thd, &pos);
10986     cache_mngr->trx_cache.set_prev_position(pos);
10987 
10988     /*
10989       Set callbacks in order to be able to call commmit or rollback.
10990     */
10991     if (trx)
10992       trans_register_ha(thd, TRUE, binlog_hton, NULL);
10993     trans_register_ha(thd, FALSE, binlog_hton, NULL);
10994 
10995     /*
10996       Set the binary log as read/write otherwise callbacks are not called.
10997     */
10998     thd->ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
10999   }
11000   DBUG_VOID_RETURN;
11001 }
11002 
11003 /**
11004   Function to start a statement and optionally a transaction for the
11005   binary log.
11006 
11007   This function does three things:
11008     - Starts a transaction if not in autocommit mode or if a BEGIN
11009       statement has been seen.
11010 
11011     - Start a statement transaction to allow us to truncate the cache.
11012 
11013     - Save the currrent binlog position so that we can roll back the
11014       statement by truncating the cache.
11015 
11016       We only update the saved position if the old one was undefined,
11017       the reason is that there are some cases (e.g., for CREATE-SELECT)
11018       where the position is saved twice (e.g., both in
11019       Query_result_create::prepare() and THD::binlog_write_table_map()), but
11020       we should use the first. This means that calls to this function
11021       can be used to start the statement before the first table map
11022       event, to include some extra events.
11023 
11024   Note however that IMMEDIATE_LOGGING implies that the statement is
11025   written without BEGIN/COMMIT.
11026 
11027   @param thd         Thread variable
11028   @param start_event The first event requested to be written into the
11029                      binary log
11030  */
binlog_start_trans_and_stmt(THD * thd,Log_event * start_event)11031 static int binlog_start_trans_and_stmt(THD *thd, Log_event *start_event)
11032 {
11033   DBUG_ENTER("binlog_start_trans_and_stmt");
11034 
11035   /*
11036     Initialize the cache manager if this was not done yet.
11037   */
11038   if (thd->binlog_setup_trx_data())
11039     DBUG_RETURN(1);
11040 
11041   /*
11042     Retrieve the appropriated cache.
11043   */
11044   bool is_transactional= start_event->is_using_trans_cache();
11045   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
11046   binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(is_transactional);
11047 
11048   /*
11049     If the event is requesting immediatly logging, there is no need to go
11050     further down and set savepoint and register callbacks.
11051   */
11052   if (start_event->is_using_immediate_logging())
11053     DBUG_RETURN(0);
11054 
11055   register_binlog_handler(thd, thd->in_multi_stmt_transaction_mode());
11056 
11057   /*
11058     If the cache is empty log "BEGIN" at the beginning of every transaction.
11059     Here, a transaction is either a BEGIN..COMMIT/ROLLBACK block or a single
11060     statement in autocommit mode.
11061   */
11062   if (cache_data->is_binlog_empty())
11063   {
11064     static const char begin[]= "BEGIN";
11065     const char *query= NULL;
11066     char buf[XID::ser_buf_size];
11067     char xa_start[sizeof("XA START") + 1 + sizeof(buf)];
11068     XID_STATE *xs= thd->get_transaction()->xid_state();
11069     int qlen= sizeof(begin) - 1;
11070 
11071     if (is_transactional && xs->has_state(XID_STATE::XA_ACTIVE))
11072     {
11073       /*
11074         XA-prepare logging case.
11075       */
11076       qlen= sprintf(xa_start, "XA START %s", xs->get_xid()->serialize(buf));
11077       query= xa_start;
11078     }
11079     else
11080     {
11081       /*
11082         Regular transaction case.
11083       */
11084       query= begin;
11085     }
11086 
11087     Query_log_event qinfo(thd, query, qlen,
11088                           is_transactional, false, true, 0, true);
11089     if (cache_data->write_event(thd, &qinfo))
11090       DBUG_RETURN(1);
11091   }
11092 
11093   DBUG_RETURN(0);
11094 }
11095 
11096 /**
11097   This function writes a table map to the binary log.
11098   Note that in order to keep the signature uniform with related methods,
11099   we use a redundant parameter to indicate whether a transactional table
11100   was changed or not.
11101   Sometimes it will write a Rows_query_log_event into binary log before
11102   the table map too.
11103 
11104   @param table             a pointer to the table.
11105   @param is_transactional  @c true indicates a transactional table,
11106                            otherwise @c false a non-transactional.
11107   @param binlog_rows_query @c true indicates a Rows_query log event
11108                            will be binlogged before table map,
11109                            otherwise @c false indicates it will not
11110                            be binlogged.
11111   @return
11112     nonzero if an error pops up when writing the table map event
11113     or the Rows_query log event.
11114 */
binlog_write_table_map(TABLE * table,bool is_transactional,bool binlog_rows_query)11115 int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
11116                                 bool binlog_rows_query)
11117 {
11118   int error;
11119   DBUG_ENTER("THD::binlog_write_table_map");
11120   DBUG_PRINT("enter", ("table: 0x%lx  (%s: #%llu)",
11121                        (long) table, table->s->table_name.str,
11122                        table->s->table_map_id.id()));
11123 
11124   /* Pre-conditions */
11125   assert(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
11126   assert(table->s->table_map_id.is_valid());
11127 
11128   Table_map_log_event
11129     the_event(this, table, table->s->table_map_id, is_transactional);
11130 
11131   binlog_start_trans_and_stmt(this, &the_event);
11132 
11133   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(this);
11134 
11135   binlog_cache_data *cache_data=
11136     cache_mngr->get_binlog_cache_data(is_transactional);
11137 
11138   if (binlog_rows_query && this->query().str)
11139   {
11140     /* Write the Rows_query_log_event into binlog before the table map */
11141     Rows_query_log_event
11142       rows_query_ev(this, this->query().str, this->query().length);
11143     if ((error= cache_data->write_event(this, &rows_query_ev)))
11144       DBUG_RETURN(error);
11145   }
11146 
11147   if ((error= cache_data->write_event(this, &the_event)))
11148     DBUG_RETURN(error);
11149 
11150   binlog_table_maps++;
11151   DBUG_RETURN(0);
11152 }
11153 
11154 /**
11155   This function retrieves a pending row event from a cache which is
11156   specified through the parameter @c is_transactional. Respectively, when it
11157   is @c true, the pending event is returned from the transactional cache.
11158   Otherwise from the non-transactional cache.
11159 
11160   @param is_transactional  @c true indicates a transactional cache,
11161                            otherwise @c false a non-transactional.
11162   @return
11163     The row event if any.
11164 */
11165 Rows_log_event*
binlog_get_pending_rows_event(bool is_transactional) const11166 THD::binlog_get_pending_rows_event(bool is_transactional) const
11167 {
11168   Rows_log_event* rows= NULL;
11169   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(this);
11170 
11171   /*
11172     This is less than ideal, but here's the story: If there is no cache_mngr,
11173     prepare_pending_rows_event() has never been called (since the cache_mngr
11174     is set up there). In that case, we just return NULL.
11175    */
11176   if (cache_mngr)
11177   {
11178     binlog_cache_data *cache_data=
11179       cache_mngr->get_binlog_cache_data(is_transactional);
11180 
11181     rows= cache_data->pending();
11182   }
11183   return (rows);
11184 }
11185 
11186 /**
11187    @param db    db name c-string to be inserted into alphabetically sorted
11188                 THD::binlog_accessed_db_names list.
11189 
11190                 Note, that space for both the data and the node
11191                 struct are allocated in THD::main_mem_root.
11192                 The list lasts for the top-level query time and is reset
11193                 in @c THD::cleanup_after_query().
11194 */
11195 void
add_to_binlog_accessed_dbs(const char * db_param)11196 THD::add_to_binlog_accessed_dbs(const char *db_param)
11197 {
11198   char *after_db;
11199   /*
11200     binlog_accessed_db_names list is to maintain the database
11201     names which are referenced in a given command.
11202     Prior to bug 17806014 fix, 'main_mem_root' memory root used
11203     to store this list. The 'main_mem_root' scope is till the end
11204     of the query. Hence it caused increasing memory consumption
11205     problem in big procedures like the ones mentioned below.
11206     Eg: CALL p1() where p1 is having 1,00,000 create and drop tables.
11207     'main_mem_root' is freed only at the end of the command CALL p1()'s
11208     execution. But binlog_accessed_db_names list scope is only till the
11209     individual statements specified the procedure(create/drop statements).
11210     Hence the memory allocated in 'main_mem_root' was left uncleared
11211     until the p1's completion, even though it is not required after
11212     completion of individual statements.
11213 
11214     Instead of using 'main_mem_root' whose scope is complete query execution,
11215     now the memroot is changed to use 'thd->mem_root' whose scope is until the
11216     individual statement in CALL p1(). 'thd->mem_root' is set to 'execute_mem_root'
11217     in the context of procedure and it's scope is till the individual statement
11218     in CALL p1() and thd->memroot is equal to 'main_mem_root' in the context
11219     of a normal 'top level query'.
11220 
11221     Eg: a) create table t1(i int); => If this function is called while
11222            processing this statement, thd->memroot is equal to &main_mem_root
11223            which will be freed immediately after executing this statement.
11224         b) CALL p1() -> p1 contains create table t1(i int); => If this function
11225            is called while processing create table statement which is inside
11226            a stored procedure, then thd->memroot is equal to 'execute_mem_root'
11227            which will be freed immediately after executing this statement.
11228     In both a and b case, thd->memroot will be freed immediately and will not
11229     increase memory consumption.
11230 
11231     A special case(stored functions/triggers):
11232     Consider the following example:
11233     create function f1(i int) returns int
11234     begin
11235       insert into db1.t1 values (1);
11236       insert into db2.t1 values (2);
11237     end;
11238     When we are processing SELECT f1(), the list should contain db1, db2 names.
11239     Since thd->mem_root contains 'execute_mem_root' in the context of
11240     stored function, the mem root will be freed after adding db1 in
11241     the list and when we are processing the second statement and when we try
11242     to add 'db2' in the db1's list, it will lead to crash as db1's memory
11243     is already freed. To handle this special case, if in_sub_stmt is set
11244     (which is true incase of stored functions/triggers), we use &main_mem_root,
11245     if not set we will use thd->memroot which changes it's value to
11246     'execute_mem_root' or '&main_mem_root' depends on the context.
11247    */
11248   MEM_ROOT *db_mem_root= in_sub_stmt ? &main_mem_root : mem_root;
11249 
11250   if (!binlog_accessed_db_names)
11251     binlog_accessed_db_names= new (db_mem_root) List<char>;
11252 
11253   if (binlog_accessed_db_names->elements >  MAX_DBS_IN_EVENT_MTS)
11254   {
11255     push_warning_printf(this, Sql_condition::SL_WARNING,
11256                         ER_MTS_UPDATED_DBS_GREATER_MAX,
11257                         ER(ER_MTS_UPDATED_DBS_GREATER_MAX),
11258                         MAX_DBS_IN_EVENT_MTS);
11259     return;
11260   }
11261 
11262   after_db= strdup_root(db_mem_root, db_param);
11263 
11264   /*
11265      sorted insertion is implemented with first rearranging data
11266      (pointer to char*) of the links and final appending of the least
11267      ordered data to create a new link in the list.
11268   */
11269   if (binlog_accessed_db_names->elements != 0)
11270   {
11271     List_iterator<char> it(*get_binlog_accessed_db_names());
11272 
11273     while (it++)
11274     {
11275       char *swap= NULL;
11276       char **ref_cur_db= it.ref();
11277       int cmp= strcmp(after_db, *ref_cur_db);
11278 
11279       assert(!swap || cmp < 0);
11280 
11281       if (cmp == 0)
11282       {
11283         after_db= NULL;  /* dup to ignore */
11284         break;
11285       }
11286       else if (swap || cmp > 0)
11287       {
11288         swap= *ref_cur_db;
11289         *ref_cur_db= after_db;
11290         after_db= swap;
11291       }
11292     }
11293   }
11294   if (after_db)
11295     binlog_accessed_db_names->push_back(after_db, db_mem_root);
11296 }
11297 
11298 /*
11299   Tells if two (or more) tables have auto_increment columns and we want to
11300   lock those tables with a write lock.
11301 
11302   SYNOPSIS
11303     has_two_write_locked_tables_with_auto_increment
11304       tables        Table list
11305 
11306   NOTES:
11307     Call this function only when you have established the list of all tables
11308     which you'll want to update (including stored functions, triggers, views
11309     inside your statement).
11310 */
11311 
11312 static bool
has_write_table_with_auto_increment(TABLE_LIST * tables)11313 has_write_table_with_auto_increment(TABLE_LIST *tables)
11314 {
11315   for (TABLE_LIST *table= tables; table; table= table->next_global)
11316   {
11317     /* we must do preliminary checks as table->table may be NULL */
11318     if (!table->is_placeholder() &&
11319         table->table->found_next_number_field &&
11320         (table->lock_type >= TL_WRITE_ALLOW_WRITE))
11321       return 1;
11322   }
11323 
11324   return 0;
11325 }
11326 
11327 /*
11328    checks if we have select tables in the table list and write tables
11329    with auto-increment column.
11330 
11331   SYNOPSIS
11332    has_two_write_locked_tables_with_auto_increment_and_select
11333       tables        Table list
11334 
11335   RETURN VALUES
11336 
11337    -true if the table list has atleast one table with auto-increment column
11338 
11339 
11340          and atleast one table to select from.
11341    -false otherwise
11342 */
11343 
11344 static bool
has_write_table_with_auto_increment_and_select(TABLE_LIST * tables)11345 has_write_table_with_auto_increment_and_select(TABLE_LIST *tables)
11346 {
11347   bool has_select= false;
11348   bool has_auto_increment_tables = has_write_table_with_auto_increment(tables);
11349   for(TABLE_LIST *table= tables; table; table= table->next_global)
11350   {
11351      if (!table->is_placeholder() &&
11352         (table->lock_type <= TL_READ_NO_INSERT))
11353       {
11354         has_select= true;
11355         break;
11356       }
11357   }
11358   return(has_select && has_auto_increment_tables);
11359 }
11360 
11361 /*
11362   Tells if there is a table whose auto_increment column is a part
11363   of a compound primary key while is not the first column in
11364   the table definition.
11365 
11366   @param tables Table list
11367 
11368   @return true if the table exists, fais if does not.
11369 */
11370 
11371 static bool
has_write_table_auto_increment_not_first_in_pk(TABLE_LIST * tables)11372 has_write_table_auto_increment_not_first_in_pk(TABLE_LIST *tables)
11373 {
11374   for (TABLE_LIST *table= tables; table; table= table->next_global)
11375   {
11376     /* we must do preliminary checks as table->table may be NULL */
11377     if (!table->is_placeholder() &&
11378         table->table->found_next_number_field &&
11379         (table->lock_type >= TL_WRITE_ALLOW_WRITE)
11380         && table->table->s->next_number_keypart != 0)
11381       return 1;
11382   }
11383 
11384   return 0;
11385 }
11386 
11387 /*
11388   Function to check whether the table in query uses a fulltext parser
11389   plugin or not.
11390 
11391   @param s - table share pointer.
11392 
11393   @retval TRUE - The table uses fulltext parser plugin.
11394   @retval FALSE - Otherwise.
11395 */
fulltext_unsafe_set(TABLE_SHARE * s)11396 static bool inline fulltext_unsafe_set(TABLE_SHARE *s)
11397 {
11398   for (unsigned int i= 0 ; i < s->keys ; i++)
11399   {
11400     if ((s->key_info[i].flags & HA_USES_PARSER) && s->keys_in_use.is_set(i))
11401       return TRUE;
11402   }
11403   return FALSE;
11404 }
11405 #ifndef NDEBUG
get_locked_tables_mode_name(enum_locked_tables_mode locked_tables_mode)11406 const char * get_locked_tables_mode_name(enum_locked_tables_mode locked_tables_mode)
11407 {
11408    switch (locked_tables_mode)
11409    {
11410    case LTM_NONE:
11411      return "LTM_NONE";
11412    case LTM_LOCK_TABLES:
11413      return "LTM_LOCK_TABLES";
11414    case LTM_PRELOCKED:
11415      return "LTM_PRELOCKED";
11416    case LTM_PRELOCKED_UNDER_LOCK_TABLES:
11417      return "LTM_PRELOCKED_UNDER_LOCK_TABLES";
11418    default:
11419      return "Unknown table lock mode";
11420    }
11421 }
11422 #endif
11423 
11424 /**
11425   Decide on logging format to use for the statement and issue errors
11426   or warnings as needed.  The decision depends on the following
11427   parameters:
11428 
11429   - The logging mode, i.e., the value of binlog_format.  Can be
11430     statement, mixed, or row.
11431 
11432   - The type of statement.  There are three types of statements:
11433     "normal" safe statements; unsafe statements; and row injections.
11434     An unsafe statement is one that, if logged in statement format,
11435     might produce different results when replayed on the slave (e.g.,
11436     queries with a LIMIT clause).  A row injection is either a BINLOG
11437     statement, or a row event executed by the slave's SQL thread.
11438 
11439   - The capabilities of tables modified by the statement.  The
11440     *capabilities vector* for a table is a set of flags associated
11441     with the table.  Currently, it only includes two flags: *row
11442     capability flag* and *statement capability flag*.
11443 
11444     The row capability flag is set if and only if the engine can
11445     handle row-based logging. The statement capability flag is set if
11446     and only if the table can handle statement-based logging.
11447 
11448   Decision table for logging format
11449   ---------------------------------
11450 
11451   The following table summarizes how the format and generated
11452   warning/error depends on the tables' capabilities, the statement
11453   type, and the current binlog_format.
11454 
11455      Row capable        N NNNNNNNNN YYYYYYYYY YYYYYYYYY
11456      Statement capable  N YYYYYYYYY NNNNNNNNN YYYYYYYYY
11457 
11458      Statement type     * SSSUUUIII SSSUUUIII SSSUUUIII
11459 
11460      binlog_format      * SMRSMRSMR SMRSMRSMR SMRSMRSMR
11461 
11462      Logged format      - SS-S----- -RR-RR-RR SRRSRR-RR
11463      Warning/Error      1 --2732444 5--5--6-- ---7--6--
11464 
11465   Legend
11466   ------
11467 
11468   Row capable:    N - Some table not row-capable, Y - All tables row-capable
11469   Stmt capable:   N - Some table not stmt-capable, Y - All tables stmt-capable
11470   Statement type: (S)afe, (U)nsafe, or Row (I)njection
11471   binlog_format:  (S)TATEMENT, (M)IXED, or (R)OW
11472   Logged format:  (S)tatement or (R)ow
11473   Warning/Error:  Warnings and error messages are as follows:
11474 
11475   1. Error: Cannot execute statement: binlogging impossible since both
11476      row-incapable engines and statement-incapable engines are
11477      involved.
11478 
11479   2. Error: Cannot execute statement: binlogging impossible since
11480      BINLOG_FORMAT = ROW and at least one table uses a storage engine
11481      limited to statement-logging.
11482 
11483   3. Error: Cannot execute statement: binlogging of unsafe statement
11484      is impossible when storage engine is limited to statement-logging
11485      and BINLOG_FORMAT = MIXED.
11486 
11487   4. Error: Cannot execute row injection: binlogging impossible since
11488      at least one table uses a storage engine limited to
11489      statement-logging.
11490 
11491   5. Error: Cannot execute statement: binlogging impossible since
11492      BINLOG_FORMAT = STATEMENT and at least one table uses a storage
11493      engine limited to row-logging.
11494 
11495   6. Error: Cannot execute row injection: binlogging impossible since
11496      BINLOG_FORMAT = STATEMENT.
11497 
11498   7. Warning: Unsafe statement binlogged in statement format since
11499      BINLOG_FORMAT = STATEMENT.
11500 
11501   In addition, we can produce the following error (not depending on
11502   the variables of the decision diagram):
11503 
11504   8. Error: Cannot execute statement: binlogging impossible since more
11505      than one engine is involved and at least one engine is
11506      self-logging.
11507 
11508   9. Error: Do not allow users to modify a gtid_executed table
11509      explicitly by a XA transaction.
11510 
11511   For each error case above, the statement is prevented from being
11512   logged, we report an error, and roll back the statement.  For
11513   warnings, we set the thd->binlog_flags variable: the warning will be
11514   printed only if the statement is successfully logged.
11515 
11516   @see THD::binlog_query
11517 
11518   @param[in] thd    Client thread
11519   @param[in] tables Tables involved in the query
11520   @param[in] use_cached_table_flags use cached value of
11521   handler::cached_table_flags. Do not use cached value and force recalculation
11522   in case of 'false'.
11523 
11524   @retval 0 No error; statement can be logged.
11525   @retval -1 One of the error conditions above applies (1, 2, 4, 5, 6 or 9).
11526 */
11527 
decide_logging_format(TABLE_LIST * tables,bool use_cached_table_flags)11528 int THD::decide_logging_format(TABLE_LIST *tables, bool use_cached_table_flags)
11529 {
11530   DBUG_ENTER("THD::decide_logging_format");
11531   DBUG_PRINT("info", ("query: %s", query().str));
11532   DBUG_PRINT("info", ("variables.binlog_format: %lu",
11533                       variables.binlog_format));
11534   DBUG_PRINT("info", ("lex->get_stmt_unsafe_flags(): 0x%x",
11535                       lex->get_stmt_unsafe_flags()));
11536 
11537   DEBUG_SYNC(current_thd, "begin_decide_logging_format");
11538 
11539   reset_binlog_local_stmt_filter();
11540 
11541   /*
11542     We should not decide logging format if the binlog is closed or
11543     binlogging is off, or if the statement is filtered out from the
11544     binlog by filtering rules.
11545   */
11546   if (mysql_bin_log.is_open() && (variables.option_bits & OPTION_BIN_LOG) &&
11547       !(variables.binlog_format == BINLOG_FORMAT_STMT &&
11548         !binlog_filter->db_ok(m_db.str)))
11549   {
11550     /*
11551       Compute one bit field with the union of all the engine
11552       capabilities, and one with the intersection of all the engine
11553       capabilities.
11554     */
11555     handler::Table_flags flags_write_some_set= 0;
11556     handler::Table_flags flags_access_some_set= 0;
11557     handler::Table_flags flags_write_all_set=
11558       HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE;
11559 
11560     /*
11561        If different types of engines are about to be updated.
11562        For example: Innodb and Falcon; Innodb and MyIsam.
11563     */
11564     my_bool multi_write_engine= FALSE;
11565     /*
11566        If different types of engines are about to be accessed
11567        and any of them is about to be updated. For example:
11568        Innodb and Falcon; Innodb and MyIsam.
11569     */
11570     my_bool multi_access_engine= FALSE;
11571     /*
11572        72475 : Track if statement creates or drops a temporary table
11573        and log in ROW if it does.
11574     */
11575     bool create_drop_temp_table= false;
11576     /*
11577        Identifies if a table is changed.
11578     */
11579     my_bool is_write= FALSE;
11580     /*
11581        A pointer to a previous table that was changed.
11582     */
11583     TABLE* prev_write_table= NULL;
11584     /*
11585        A pointer to a previous table that was accessed.
11586     */
11587     TABLE* prev_access_table= NULL;
11588     /*
11589       True if at least one table is transactional.
11590     */
11591     bool write_to_some_transactional_table= false;
11592     /*
11593       True if at least one table is non-transactional.
11594     */
11595     bool write_to_some_non_transactional_table= false;
11596     /*
11597        True if all non-transactional tables that has been updated
11598        are temporary.
11599     */
11600     bool write_all_non_transactional_are_tmp_tables= true;
11601     /**
11602       The number of tables used in the current statement,
11603       that should be replicated.
11604     */
11605     uint replicated_tables_count= 0;
11606     /**
11607       The number of tables written to in the current statement,
11608       that should not be replicated.
11609       A table should not be replicated when it is considered
11610       'local' to a MySQL instance.
11611       Currently, these tables are:
11612       - mysql.slow_log
11613       - mysql.general_log
11614       - mysql.slave_relay_log_info
11615       - mysql.slave_master_info
11616       - mysql.slave_worker_info
11617       - performance_schema.*
11618       - TODO: information_schema.*
11619       In practice, from this list, only performance_schema.* tables
11620       are written to by user queries.
11621     */
11622     uint non_replicated_tables_count= 0;
11623     /**
11624       Indicate whether we alreadly reported a warning
11625       on modifying gtid_executed table.
11626     */
11627     int warned_gtid_executed_table= 0;
11628 #ifndef NDEBUG
11629     {
11630       DBUG_PRINT("debug", ("prelocked_mode: %s",
11631                            get_locked_tables_mode_name(locked_tables_mode)));
11632     }
11633 #endif
11634 
11635     if (variables.binlog_format != BINLOG_FORMAT_ROW && tables)
11636     {
11637       /*
11638         DML statements that modify a table with an auto_increment column based on
11639         rows selected from a table are unsafe as the order in which the rows are
11640         fetched fron the select tables cannot be determined and may differ on
11641         master and slave.
11642        */
11643       if (has_write_table_with_auto_increment_and_select(tables))
11644         lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_WRITE_AUTOINC_SELECT);
11645 
11646       if (has_write_table_auto_increment_not_first_in_pk(tables))
11647         lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_NOT_FIRST);
11648 
11649       /*
11650         A query that modifies autoinc column in sub-statement can make the
11651         master and slave inconsistent.
11652         We can solve these problems in mixed mode by switching to binlogging
11653         if at least one updated table is used by sub-statement
11654        */
11655       if (lex->requires_prelocking() &&
11656           has_write_table_with_auto_increment(lex->first_not_own_table()))
11657         lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_COLUMNS);
11658     }
11659 
11660     /*
11661       Get the capabilities vector for all involved storage engines and
11662       mask out the flags for the binary log.
11663     */
11664     for (TABLE_LIST *table= tables; table; table= table->next_global)
11665     {
11666       if (table->is_placeholder())
11667       {
11668         /*
11669           bug 72475 : Detect if this is a CREATE TEMPORARY or DROP of a
11670           temporary table. This will be used later in determining whether to
11671           log in ROW or STMT if MIXED replication is being used.
11672         */
11673         if(!create_drop_temp_table &&
11674            !table->table &&
11675            ((lex->sql_command == SQLCOM_CREATE_TABLE &&
11676              (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE)) ||
11677             ((lex->sql_command == SQLCOM_DROP_TABLE ||
11678               lex->sql_command == SQLCOM_TRUNCATE) &&
11679              find_temporary_table(this, table))))
11680         {
11681           create_drop_temp_table= true;
11682         }
11683         continue;
11684       }
11685 
11686       handler::Table_flags const flags= table->table->file->ha_table_flags(!use_cached_table_flags);
11687 
11688       DBUG_PRINT("info", ("table: %s; ha_table_flags: 0x%llx",
11689                           table->table_name, flags));
11690 
11691       if (table->table->no_replicate)
11692       {
11693         if (!warned_gtid_executed_table)
11694         {
11695           warned_gtid_executed_table=
11696             gtid_state->warn_or_err_on_modify_gtid_table(this, table);
11697           /*
11698             Do not allow users to modify the gtid_executed table
11699             explicitly by a XA transaction.
11700           */
11701           if (warned_gtid_executed_table == 2)
11702             DBUG_RETURN(-1);
11703         }
11704         /*
11705           The statement uses a table that is not replicated.
11706           The following properties about the table:
11707           - persistent / transient
11708           - transactional / non transactional
11709           - temporary / permanent
11710           - read or write
11711           - multiple engines involved because of this table
11712           are not relevant, as this table is completely ignored.
11713           Because the statement uses a non replicated table,
11714           using STATEMENT format in the binlog is impossible.
11715           Either this statement will be discarded entirely,
11716           or it will be logged (possibly partially) in ROW format.
11717         */
11718         lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_TABLE);
11719 
11720         if (table->lock_type >= TL_WRITE_ALLOW_WRITE)
11721         {
11722           non_replicated_tables_count++;
11723           continue;
11724         }
11725       }
11726 
11727       replicated_tables_count++;
11728 
11729       my_bool trans= table->table->file->has_transactions();
11730 
11731       if (table->lock_type >= TL_WRITE_ALLOW_WRITE)
11732       {
11733         write_to_some_transactional_table=
11734           write_to_some_transactional_table || trans;
11735 
11736         write_to_some_non_transactional_table=
11737           write_to_some_non_transactional_table || !trans;
11738 
11739         if (prev_write_table && prev_write_table->file->ht !=
11740             table->table->file->ht)
11741           multi_write_engine= TRUE;
11742 
11743         if (table->table->s->tmp_table)
11744           lex->set_stmt_accessed_table(trans ? LEX::STMT_WRITES_TEMP_TRANS_TABLE :
11745                                                LEX::STMT_WRITES_TEMP_NON_TRANS_TABLE);
11746         else
11747           lex->set_stmt_accessed_table(trans ? LEX::STMT_WRITES_TRANS_TABLE :
11748                                                LEX::STMT_WRITES_NON_TRANS_TABLE);
11749 
11750         /*
11751          Non-transactional updates are allowed when row binlog format is
11752          used and all non-transactional tables are temporary.
11753          Binlog format is checked on THD::is_dml_gtid_compatible() method.
11754         */
11755         if (!trans)
11756           write_all_non_transactional_are_tmp_tables=
11757             write_all_non_transactional_are_tmp_tables &&
11758             table->table->s->tmp_table;
11759 
11760         flags_write_all_set &= flags;
11761         flags_write_some_set |= flags;
11762         is_write= TRUE;
11763 
11764         prev_write_table= table->table;
11765 
11766         /*
11767           It should be marked unsafe if a table which uses a fulltext parser
11768           plugin is modified. See also bug#48183.
11769         */
11770         if (!lex->is_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_FULLTEXT_PLUGIN))
11771         {
11772           if (fulltext_unsafe_set(table->table->s))
11773             lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_FULLTEXT_PLUGIN);
11774         }
11775         /*
11776           INSERT...ON DUPLICATE KEY UPDATE on a table with more than one unique keys
11777           can be unsafe. Check for it if the flag is already not marked for the
11778           given statement.
11779         */
11780         if (!lex->is_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS) &&
11781             lex->sql_command == SQLCOM_INSERT && lex->duplicates == DUP_UPDATE)
11782         {
11783           uint keys= table->table->s->keys, i= 0, unique_keys= 0;
11784           for (KEY* keyinfo= table->table->s->key_info;
11785                i < keys && unique_keys <= 1; i++, keyinfo++)
11786           {
11787             if (keyinfo->flags & HA_NOSAME)
11788               unique_keys++;
11789           }
11790           if (unique_keys > 1 )
11791             lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS);
11792         }
11793       }
11794       if(lex->get_using_match())
11795       {
11796         if (fulltext_unsafe_set(table->table->s))
11797           lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_FULLTEXT_PLUGIN);
11798       }
11799 
11800       flags_access_some_set |= flags;
11801 
11802       if (table->table->s->tmp_table)
11803         lex->set_stmt_accessed_table(trans ? LEX::STMT_READS_TEMP_TRANS_TABLE :
11804                                              LEX::STMT_READS_TEMP_NON_TRANS_TABLE);
11805       else
11806         lex->set_stmt_accessed_table(trans ? LEX::STMT_READS_TRANS_TABLE :
11807                                              LEX::STMT_READS_NON_TRANS_TABLE);
11808 
11809       if (prev_access_table && prev_access_table->file->ht !=
11810           table->table->file->ht)
11811          multi_access_engine= TRUE;
11812 
11813       prev_access_table= table->table;
11814     }
11815     assert(!is_write ||
11816            write_to_some_transactional_table ||
11817            write_to_some_non_transactional_table);
11818     /*
11819       write_all_non_transactional_are_tmp_tables may be true if any
11820       non-transactional table was not updated, so we fix its value here.
11821     */
11822     write_all_non_transactional_are_tmp_tables=
11823       write_all_non_transactional_are_tmp_tables &&
11824       write_to_some_non_transactional_table;
11825 
11826     DBUG_PRINT("info", ("flags_write_all_set: 0x%llx", flags_write_all_set));
11827     DBUG_PRINT("info", ("flags_write_some_set: 0x%llx", flags_write_some_set));
11828     DBUG_PRINT("info", ("flags_access_some_set: 0x%llx", flags_access_some_set));
11829     DBUG_PRINT("info", ("multi_write_engine: %d", multi_write_engine));
11830     DBUG_PRINT("info", ("multi_access_engine: %d", multi_access_engine));
11831 
11832     int error= 0;
11833     int unsafe_flags;
11834 
11835     bool multi_stmt_trans= in_multi_stmt_transaction_mode();
11836     bool trans_table= trans_has_updated_trans_table(this);
11837     bool binlog_direct= variables.binlog_direct_non_trans_update;
11838 
11839     if (lex->is_mixed_stmt_unsafe(multi_stmt_trans, binlog_direct,
11840                                   trans_table, tx_isolation))
11841       lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_MIXED_STATEMENT);
11842     else if (multi_stmt_trans && trans_table && !binlog_direct &&
11843              lex->stmt_accessed_table(LEX::STMT_WRITES_NON_TRANS_TABLE))
11844       lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_NONTRANS_AFTER_TRANS);
11845 
11846     /*
11847       If more than one engine is involved in the statement and at
11848       least one is doing it's own logging (is *self-logging*), the
11849       statement cannot be logged atomically, so we generate an error
11850       rather than allowing the binlog to become corrupt.
11851     */
11852     if (multi_write_engine &&
11853         (flags_write_some_set & HA_HAS_OWN_BINLOGGING))
11854       my_error((error= ER_BINLOG_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE),
11855                MYF(0));
11856     else if (multi_access_engine && flags_access_some_set & HA_HAS_OWN_BINLOGGING)
11857       lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE);
11858 
11859     /* XA is unsafe for statements */
11860     if (is_write &&
11861         !get_transaction()->xid_state()->has_state(XID_STATE::XA_NOTR))
11862       lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_XA);
11863 
11864     DBUG_EXECUTE_IF("make_stmt_only_engines",
11865                     {
11866                       flags_write_all_set= HA_BINLOG_STMT_CAPABLE;
11867                     };);
11868 
11869     /* both statement-only and row-only engines involved */
11870     if ((flags_write_all_set & (HA_BINLOG_STMT_CAPABLE | HA_BINLOG_ROW_CAPABLE)) == 0)
11871     {
11872       /*
11873         1. Error: Binary logging impossible since both row-incapable
11874            engines and statement-incapable engines are involved
11875       */
11876       my_error((error= ER_BINLOG_ROW_ENGINE_AND_STMT_ENGINE), MYF(0));
11877     }
11878     /* statement-only engines involved */
11879     else if ((flags_write_all_set & HA_BINLOG_ROW_CAPABLE) == 0)
11880     {
11881       if (lex->is_stmt_row_injection())
11882       {
11883         /*
11884           4. Error: Cannot execute row injection since table uses
11885              storage engine limited to statement-logging
11886         */
11887         my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_ENGINE), MYF(0));
11888       }
11889       else if (variables.binlog_format == BINLOG_FORMAT_ROW &&
11890                sqlcom_can_generate_row_events(this->lex->sql_command))
11891       {
11892         /*
11893           2. Error: Cannot modify table that uses a storage engine
11894              limited to statement-logging when BINLOG_FORMAT = ROW
11895         */
11896         my_error((error= ER_BINLOG_ROW_MODE_AND_STMT_ENGINE), MYF(0));
11897       }
11898       else if (variables.binlog_format == BINLOG_FORMAT_MIXED &&
11899           ((unsafe_flags= lex->get_stmt_unsafe_flags()) != 0))
11900       {
11901         /*
11902           3. Error: Cannot execute statement: binlogging of unsafe
11903              statement is impossible when storage engine is limited to
11904              statement-logging and BINLOG_FORMAT = MIXED.
11905         */
11906         for (int unsafe_type= 0;
11907              unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT;
11908              unsafe_type++)
11909           if (unsafe_flags & (1 << unsafe_type))
11910             my_error((error= ER_BINLOG_UNSAFE_AND_STMT_ENGINE), MYF(0),
11911                      ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
11912       }
11913       else if (is_write && ((unsafe_flags= lex->get_stmt_unsafe_flags()) != 0))
11914       {
11915         /*
11916           7. Warning: Unsafe statement logged as statement due to
11917              binlog_format = STATEMENT
11918         */
11919         binlog_unsafe_warning_flags|= unsafe_flags;
11920         DBUG_PRINT("info", ("Scheduling warning to be issued by "
11921                             "binlog_query: '%s'",
11922                             ER(ER_BINLOG_UNSAFE_STATEMENT)));
11923         DBUG_PRINT("info", ("binlog_unsafe_warning_flags: 0x%x",
11924                             binlog_unsafe_warning_flags));
11925       }
11926       /* log in statement format! */
11927     }
11928     /* no statement-only engines */
11929     else
11930     {
11931       /* binlog_format = STATEMENT */
11932       if (variables.binlog_format == BINLOG_FORMAT_STMT)
11933       {
11934         if (lex->is_stmt_row_injection())
11935         {
11936           /*
11937             6. Error: Cannot execute row injection since
11938                BINLOG_FORMAT = STATEMENT
11939           */
11940           my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_MODE), MYF(0));
11941         }
11942         else if ((flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0 &&
11943                  sqlcom_can_generate_row_events(this->lex->sql_command))
11944         {
11945           /*
11946             5. Error: Cannot modify table that uses a storage engine
11947                limited to row-logging when binlog_format = STATEMENT
11948           */
11949           my_error((error= ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), "");
11950         }
11951         else if (is_write && (unsafe_flags= lex->get_stmt_unsafe_flags()) != 0)
11952         {
11953           /*
11954             7. Warning: Unsafe statement logged as statement due to
11955                binlog_format = STATEMENT
11956           */
11957           binlog_unsafe_warning_flags|= unsafe_flags;
11958           DBUG_PRINT("info", ("Scheduling warning to be issued by "
11959                               "binlog_query: '%s'",
11960                               ER(ER_BINLOG_UNSAFE_STATEMENT)));
11961           DBUG_PRINT("info", ("binlog_unsafe_warning_flags: 0x%x",
11962                               binlog_unsafe_warning_flags));
11963         }
11964         /* log in statement format! */
11965       }
11966       /* No statement-only engines and binlog_format != STATEMENT.
11967          I.e., nothing prevents us from row logging if needed. */
11968       else
11969       {
11970         if (lex->is_stmt_unsafe() || lex->is_stmt_row_injection()
11971             || (flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0
11972             || (flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0
11973             || lex->stmt_accessed_table(LEX::STMT_READS_TEMP_TRANS_TABLE)
11974             || lex->stmt_accessed_table(LEX::STMT_READS_TEMP_NON_TRANS_TABLE)
11975             || create_drop_temp_table)
11976         {
11977 #ifndef NDEBUG
11978           int flags= lex->get_stmt_unsafe_flags();
11979           DBUG_PRINT("info", ("setting row format for unsafe statement"));
11980           for (int i= 0; i < Query_tables_list::BINLOG_STMT_UNSAFE_COUNT; i++)
11981           {
11982             if (flags & (1 << i))
11983               DBUG_PRINT("info", ("unsafe reason: %s",
11984                                   ER(Query_tables_list::binlog_stmt_unsafe_errcode[i])));
11985           }
11986           DBUG_PRINT("info", ("is_row_injection=%d",
11987                               lex->is_stmt_row_injection()));
11988           DBUG_PRINT("info", ("stmt_capable=%llu",
11989                               (flags_write_all_set & HA_BINLOG_STMT_CAPABLE)));
11990 #endif
11991           /* log in row format! */
11992           set_current_stmt_binlog_format_row_if_mixed();
11993         }
11994       }
11995     }
11996 
11997     if (non_replicated_tables_count > 0)
11998     {
11999       if ((replicated_tables_count == 0) || ! is_write)
12000       {
12001         DBUG_PRINT("info", ("decision: no logging, no replicated table affected"));
12002         set_binlog_local_stmt_filter();
12003       }
12004       else
12005       {
12006         if (! is_current_stmt_binlog_format_row())
12007         {
12008           my_error((error= ER_BINLOG_STMT_MODE_AND_NO_REPL_TABLES), MYF(0));
12009         }
12010         else
12011         {
12012           clear_binlog_local_stmt_filter();
12013         }
12014       }
12015     }
12016     else
12017     {
12018       clear_binlog_local_stmt_filter();
12019     }
12020 
12021     if (!error &&
12022         !is_dml_gtid_compatible(write_to_some_transactional_table,
12023                                 write_to_some_non_transactional_table,
12024                                 write_all_non_transactional_are_tmp_tables))
12025       error= 1;
12026 
12027     if (error) {
12028       DBUG_PRINT("info", ("decision: no logging since an error was generated"));
12029       DBUG_RETURN(-1);
12030     }
12031 
12032     if (is_write &&
12033         lex->sql_command != SQLCOM_END /* rows-event applying by slave */)
12034     {
12035       /*
12036         Master side of DML in the STMT format events parallelization.
12037         All involving table db:s are stored in a abc-ordered name list.
12038         In case the number of databases exceeds MAX_DBS_IN_EVENT_MTS maximum
12039         the list gathering breaks since it won't be sent to the slave.
12040       */
12041       for (TABLE_LIST *table= tables; table; table= table->next_global)
12042       {
12043         if (table->is_placeholder())
12044           continue;
12045 
12046         assert(table->table);
12047 
12048         if (table->table->file->referenced_by_foreign_key())
12049         {
12050           /*
12051              FK-referenced dbs can't be gathered currently. The following
12052              event will be marked for sequential execution on slave.
12053           */
12054           binlog_accessed_db_names= NULL;
12055           add_to_binlog_accessed_dbs("");
12056           break;
12057         }
12058         if (!is_current_stmt_binlog_format_row())
12059           add_to_binlog_accessed_dbs(table->db);
12060       }
12061     }
12062     DBUG_PRINT("info", ("decision: logging in %s format",
12063                         is_current_stmt_binlog_format_row() ?
12064                         "ROW" : "STATEMENT"));
12065 
12066     if (variables.binlog_format == BINLOG_FORMAT_ROW &&
12067         (lex->sql_command == SQLCOM_UPDATE ||
12068          lex->sql_command == SQLCOM_UPDATE_MULTI ||
12069          lex->sql_command == SQLCOM_DELETE ||
12070          lex->sql_command == SQLCOM_DELETE_MULTI))
12071     {
12072       String table_names;
12073       /*
12074         Generate a warning for UPDATE/DELETE statements that modify a
12075         BLACKHOLE table, as row events are not logged in row format.
12076       */
12077       for (TABLE_LIST *table= tables; table; table= table->next_global)
12078       {
12079         if (table->is_placeholder())
12080           continue;
12081         if (table->table->file->ht->db_type == DB_TYPE_BLACKHOLE_DB &&
12082             table->lock_type >= TL_WRITE_ALLOW_WRITE)
12083         {
12084             table_names.append(table->table_name);
12085             table_names.append(",");
12086         }
12087       }
12088       if (!table_names.is_empty())
12089       {
12090         bool is_update= (lex->sql_command == SQLCOM_UPDATE ||
12091                          lex->sql_command == SQLCOM_UPDATE_MULTI);
12092         /*
12093           Replace the last ',' with '.' for table_names
12094         */
12095         table_names.replace(table_names.length()-1, 1, ".", 1);
12096         push_warning_printf(this, Sql_condition::SL_WARNING,
12097                             WARN_ON_BLOCKHOLE_IN_RBR,
12098                             ER(WARN_ON_BLOCKHOLE_IN_RBR),
12099                             is_update ? "UPDATE" : "DELETE",
12100                             table_names.c_ptr());
12101       }
12102     }
12103   }
12104   else
12105   {
12106     DBUG_PRINT("info", ("decision: no logging since "
12107                         "mysql_bin_log.is_open() = %d "
12108                         "and (options & OPTION_BIN_LOG) = 0x%llx "
12109                         "and binlog_format = %lu "
12110                         "and binlog_filter->db_ok(db) = %d",
12111                         mysql_bin_log.is_open(),
12112                         (variables.option_bits & OPTION_BIN_LOG),
12113                         variables.binlog_format,
12114                         binlog_filter->db_ok(m_db.str)));
12115 
12116     for (TABLE_LIST *table= tables; table; table= table->next_global)
12117     {
12118       if (!table->is_placeholder() && table->table->no_replicate &&
12119           gtid_state->warn_or_err_on_modify_gtid_table(this, table))
12120         break;
12121     }
12122   }
12123 
12124   DEBUG_SYNC(current_thd, "end_decide_logging_format");
12125 
12126   DBUG_RETURN(0);
12127 }
12128 
12129 
12130 /**
12131   Given that a possible violation of gtid consistency has happened,
12132   checks if gtid-inconsistencies are forbidden by the current value of
12133   ENFORCE_GTID_CONSISTENCY and GTID_MODE. If forbidden, generates
12134   error or warning accordingly.
12135 
12136   @param thd The thread that has issued the GTID-violating statement.
12137 
12138   @param error_code The error code to use, if error or warning is to
12139   be generated.
12140 
12141   @retval false Error was generated.
12142   @retval true No error was generated (possibly a warning was generated).
12143 */
handle_gtid_consistency_violation(THD * thd,int error_code)12144 bool handle_gtid_consistency_violation(THD *thd, int error_code)
12145 {
12146   DBUG_ENTER("handle_gtid_consistency_violation");
12147 
12148   enum_group_type gtid_next_type= thd->variables.gtid_next.type;
12149   global_sid_lock->rdlock();
12150   enum_gtid_consistency_mode gtid_consistency_mode=
12151     get_gtid_consistency_mode();
12152   enum_gtid_mode gtid_mode= get_gtid_mode(GTID_MODE_LOCK_SID);
12153 
12154   DBUG_PRINT("info", ("gtid_next.type=%d gtid_mode=%s "
12155                       "gtid_consistency_mode=%d error=%d query=%s",
12156                       gtid_next_type,
12157                       get_gtid_mode_string(gtid_mode),
12158                       gtid_consistency_mode,
12159                       error_code,
12160                       thd->query().str));
12161 
12162   /*
12163     GTID violations should generate error if:
12164     - GTID_MODE=ON or ON_PERMISSIVE and GTID_NEXT='AUTOMATIC' (since the
12165       transaction is expected to commit using a GTID), or
12166     - GTID_NEXT='UUID:NUMBER' (since the transaction is expected to
12167       commit usinga GTID), or
12168     - ENFORCE_GTID_CONSISTENCY=ON.
12169   */
12170   if ((gtid_next_type == AUTOMATIC_GROUP &&
12171        gtid_mode >= GTID_MODE_ON_PERMISSIVE) ||
12172       gtid_next_type == GTID_GROUP ||
12173       gtid_consistency_mode == GTID_CONSISTENCY_MODE_ON)
12174   {
12175     global_sid_lock->unlock();
12176     my_error(error_code, MYF(0));
12177     DBUG_RETURN(false);
12178   }
12179   else
12180   {
12181     /*
12182       If we are not generating an error, we must increase the counter
12183       of GTID-violating transactions.  This will prevent a concurrent
12184       client from executing a SET GTID_MODE or SET
12185       ENFORCE_GTID_CONSISTENCY statement that would be incompatible
12186       with this transaction.
12187 
12188       If the transaction had already been accounted as a gtid violating
12189       transaction, then don't increment the counters, just issue the
12190       warning below. This prevents calling
12191       begin_automatic_gtid_violating_transaction or
12192       begin_anonymous_gtid_violating_transaction multiple times for the
12193       same transaction, which would make the counter go out of sync.
12194     */
12195     if (!thd->has_gtid_consistency_violation)
12196     {
12197       if (gtid_next_type == AUTOMATIC_GROUP)
12198         gtid_state->begin_automatic_gtid_violating_transaction();
12199       else
12200       {
12201         assert(gtid_next_type == ANONYMOUS_GROUP);
12202         gtid_state->begin_anonymous_gtid_violating_transaction();
12203       }
12204 
12205       /*
12206         If a transaction generates multiple GTID violation conditions,
12207         it must still only update the counters once.  Hence we use
12208         this per-thread flag to keep track of whether the thread has a
12209         consistency or not.  This function must only be called if the
12210         transaction does not already have a GTID violation.
12211       */
12212       thd->has_gtid_consistency_violation= true;
12213     }
12214 
12215     global_sid_lock->unlock();
12216 
12217     // Generate warning if ENFORCE_GTID_CONSISTENCY = WARN.
12218     if (gtid_consistency_mode == GTID_CONSISTENCY_MODE_WARN)
12219     {
12220       // Need to print to log so that replication admin knows when users
12221       // have adjusted their workloads.
12222       sql_print_warning("%s", ER(error_code));
12223       // Need to print to client so that users can adjust their workload.
12224       push_warning(thd, Sql_condition::SL_WARNING, error_code, ER(error_code));
12225     }
12226     DBUG_RETURN(true);
12227   }
12228 }
12229 
12230 
is_ddl_gtid_compatible()12231 bool THD::is_ddl_gtid_compatible()
12232 {
12233   DBUG_ENTER("THD::is_ddl_gtid_compatible");
12234 
12235   // If @@session.sql_log_bin has been manually turned off (only
12236   // doable by SUPER), then no problem, we can execute any statement.
12237   if ((variables.option_bits & OPTION_BIN_LOG) == 0 ||
12238       mysql_bin_log.is_open() == false)
12239     DBUG_RETURN(true);
12240 
12241   DBUG_PRINT("info",
12242              ("SQLCOM_CREATE:%d CREATE-TMP:%d SELECT:%d SQLCOM_DROP:%d DROP-TMP:%d trx:%d",
12243               lex->sql_command == SQLCOM_CREATE_TABLE,
12244               (lex->sql_command == SQLCOM_CREATE_TABLE &&
12245                (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE)),
12246               lex->select_lex->item_list.elements,
12247               lex->sql_command == SQLCOM_DROP_TABLE,
12248               (lex->sql_command == SQLCOM_DROP_TABLE && lex->drop_temporary),
12249               in_multi_stmt_transaction_mode()));
12250 
12251   if (lex->sql_command == SQLCOM_CREATE_TABLE &&
12252       !(lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) &&
12253       lex->select_lex->item_list.elements)
12254   {
12255     /*
12256       CREATE ... SELECT (without TEMPORARY) is unsafe because if
12257       binlog_format=row it will be logged as a CREATE TABLE followed
12258       by row events, re-executed non-atomically as two transactions,
12259       and then written to the slave's binary log as two separate
12260       transactions with the same GTID.
12261     */
12262     bool ret= handle_gtid_consistency_violation(
12263       this, ER_GTID_UNSAFE_CREATE_SELECT);
12264     DBUG_RETURN(ret);
12265   }
12266   else if ((lex->sql_command == SQLCOM_CREATE_TABLE &&
12267             (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) != 0))
12268   {
12269     /*
12270       In statement binary log format, CREATE TEMPORARY TABLE is unsafe
12271       to execute inside a transaction because the table will stay and the
12272       transaction will be written to the slave's binary log with the GTID even
12273       if the transaction is rolled back. This includes the execution inside
12274       functions and triggers.
12275       The same considerations apply for DROP TEMPORARY TABLE too, this is
12276       checked in mysql_rm_table instead.
12277     */
12278     if ((in_multi_stmt_transaction_mode() || in_sub_stmt)
12279         && variables.binlog_format == BINLOG_FORMAT_STMT)
12280     {
12281       bool ret= handle_gtid_consistency_violation(
12282         this, ER_GTID_UNSAFE_CREATE_DROP_TEMPORARY_TABLE_IN_TRANSACTION);
12283       DBUG_RETURN(ret);
12284     }
12285   }
12286   DBUG_RETURN(true);
12287 }
12288 
12289 
12290 bool
is_dml_gtid_compatible(bool some_transactional_table,bool some_non_transactional_table,bool non_transactional_tables_are_tmp)12291 THD::is_dml_gtid_compatible(bool some_transactional_table,
12292                             bool some_non_transactional_table,
12293                             bool non_transactional_tables_are_tmp)
12294 {
12295   DBUG_ENTER("THD::is_dml_gtid_compatible(bool, bool, bool)");
12296 
12297   // If @@session.sql_log_bin has been manually turned off (only
12298   // doable by SUPER), then no problem, we can execute any statement.
12299   if ((variables.option_bits & OPTION_BIN_LOG) == 0 ||
12300       mysql_bin_log.is_open() == false)
12301     DBUG_RETURN(true);
12302 
12303   /*
12304     Single non-transactional updates are allowed when not mixed
12305     together with transactional statements within a transaction.
12306     Furthermore, writing to transactional and non-transactional
12307     engines in a single statement is also disallowed.
12308     Multi-statement transactions on non-transactional tables are
12309     split into single-statement transactions when
12310     GTID_NEXT = "AUTOMATIC".
12311 
12312     Non-transactional updates are allowed when row binlog format is
12313     used and all non-transactional tables are temporary.
12314 
12315     The debug symbol "allow_gtid_unsafe_non_transactional_updates"
12316     disables the error.  This is useful because it allows us to run
12317     old tests that were not written with the restrictions of GTIDs in
12318     mind.
12319   */
12320   DBUG_PRINT("info", ("some_non_transactional_table=%d "
12321                       "some_transactional_table=%d "
12322                       "trans_has_updated_trans_table=%d "
12323                       "non_transactional_tables_are_tmp=%d "
12324                       "is_current_stmt_binlog_format_row=%d",
12325                       some_non_transactional_table,
12326                       some_transactional_table,
12327                       trans_has_updated_trans_table(this),
12328                       non_transactional_tables_are_tmp,
12329                       is_current_stmt_binlog_format_row()));
12330   if (some_non_transactional_table &&
12331       (some_transactional_table || trans_has_updated_trans_table(this)) &&
12332       !(non_transactional_tables_are_tmp &&
12333         is_current_stmt_binlog_format_row()) &&
12334       !DBUG_EVALUATE_IF("allow_gtid_unsafe_non_transactional_updates", 1, 0))
12335   {
12336     DBUG_RETURN(handle_gtid_consistency_violation(
12337       this, ER_GTID_UNSAFE_NON_TRANSACTIONAL_TABLE));
12338   }
12339 
12340   DBUG_RETURN(true);
12341 }
12342 
12343 /*
12344   Implementation of interface to write rows to the binary log through the
12345   thread.  The thread is responsible for writing the rows it has
12346   inserted/updated/deleted.
12347 */
12348 
12349 #ifndef MYSQL_CLIENT
12350 
12351 /*
12352   Template member function for ensuring that there is an rows log
12353   event of the apropriate type before proceeding.
12354 
12355   PRE CONDITION:
12356     - Events of type 'RowEventT' have the type code 'type_code'.
12357 
12358   POST CONDITION:
12359     If a non-NULL pointer is returned, the pending event for thread 'thd' will
12360     be an event of type 'RowEventT' (which have the type code 'type_code')
12361     will either empty or have enough space to hold 'needed' bytes.  In
12362     addition, the columns bitmap will be correct for the row, meaning that
12363     the pending event will be flushed if the columns in the event differ from
12364     the columns suppled to the function.
12365 
12366   RETURNS
12367     If no error, a non-NULL pending event (either one which already existed or
12368     the newly created one).
12369     If error, NULL.
12370  */
12371 
12372 template <class RowsEventT> Rows_log_event*
binlog_prepare_pending_rows_event(TABLE * table,uint32 serv_id,size_t needed,bool is_transactional,RowsEventT * hint MY_ATTRIBUTE ((unused)),const uchar * extra_row_info)12373 THD::binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id,
12374                                        size_t needed,
12375                                        bool is_transactional,
12376 				       RowsEventT *hint MY_ATTRIBUTE((unused)),
12377                                        const uchar* extra_row_info)
12378 {
12379   DBUG_ENTER("binlog_prepare_pending_rows_event");
12380 
12381   /* Fetch the type code for the RowsEventT template parameter */
12382   int const general_type_code= RowsEventT::TYPE_CODE;
12383 
12384   Rows_log_event* pending= binlog_get_pending_rows_event(is_transactional);
12385 
12386   if (unlikely(pending && !pending->is_valid()))
12387     DBUG_RETURN(NULL);
12388 
12389   /*
12390     Check if the current event is non-NULL and a write-rows
12391     event. Also check if the table provided is mapped: if it is not,
12392     then we have switched to writing to a new table.
12393     If there is no pending event, we need to create one. If there is a pending
12394     event, but it's not about the same table id, or not of the same type
12395     (between Write, Update and Delete), or not the same affected columns, or
12396     going to be too big, flush this event to disk and create a new pending
12397     event.
12398   */
12399   if (!pending ||
12400       pending->server_id != serv_id ||
12401       pending->get_table_id() != table->s->table_map_id ||
12402       pending->get_general_type_code() != general_type_code ||
12403       pending->get_data_size() + needed > opt_binlog_rows_event_max_size ||
12404       pending->read_write_bitmaps_cmp(table) == FALSE ||
12405       !binlog_row_event_extra_data_eq(pending->get_extra_row_data(),
12406                                       extra_row_info))
12407   {
12408     /* Create a new RowsEventT... */
12409     Rows_log_event* const
12410 	ev= new RowsEventT(this, table, table->s->table_map_id,
12411                            is_transactional, extra_row_info);
12412     if (unlikely(!ev))
12413       DBUG_RETURN(NULL);
12414     ev->server_id= serv_id; // I don't like this, it's too easy to forget.
12415     /*
12416       flush the pending event and replace it with the newly created
12417       event...
12418     */
12419     if (unlikely(
12420         mysql_bin_log.flush_and_set_pending_rows_event(this, ev,
12421                                                        is_transactional)))
12422     {
12423       delete ev;
12424       DBUG_RETURN(NULL);
12425     }
12426 
12427     DBUG_RETURN(ev);               /* This is the new pending event */
12428   }
12429   DBUG_RETURN(pending);        /* This is the current pending event */
12430 }
12431 
12432 /* Declare in unnamed namespace. */
12433 namespace {
12434 
12435   /**
12436      Class to handle temporary allocation of memory for row data.
12437 
12438      The responsibilities of the class is to provide memory for
12439      packing one or two rows of packed data (depending on what
12440      constructor is called).
12441 
12442      In order to make the allocation more efficient for "simple" rows,
12443      i.e., rows that do not contain any blobs, a pointer to the
12444      allocated memory is of memory is stored in the table structure
12445      for simple rows.  If memory for a table containing a blob field
12446      is requested, only memory for that is allocated, and subsequently
12447      released when the object is destroyed.
12448 
12449    */
12450   class Row_data_memory {
12451   public:
12452     /**
12453       Build an object to keep track of a block-local piece of memory
12454       for storing a row of data.
12455 
12456       @param table
12457       Table where the pre-allocated memory is stored.
12458 
12459       @param length
12460       Length of data that is needed, if the record contain blobs.
12461      */
Row_data_memory(TABLE * table,size_t const len1)12462     Row_data_memory(TABLE *table, size_t const len1)
12463       : m_memory(0)
12464     {
12465 #ifndef NDEBUG
12466       m_alloc_checked= FALSE;
12467 #endif
12468       allocate_memory(table, len1);
12469       m_ptr[0]= has_memory() ? m_memory : 0;
12470       m_ptr[1]= 0;
12471     }
12472 
Row_data_memory(TABLE * table,size_t const len1,size_t const len2)12473     Row_data_memory(TABLE *table, size_t const len1, size_t const len2)
12474       : m_memory(0)
12475     {
12476 #ifndef NDEBUG
12477       m_alloc_checked= FALSE;
12478 #endif
12479       allocate_memory(table, len1 + len2);
12480       m_ptr[0]= has_memory() ? m_memory        : 0;
12481       m_ptr[1]= has_memory() ? m_memory + len1 : 0;
12482     }
12483 
~Row_data_memory()12484     ~Row_data_memory()
12485     {
12486       if (m_memory != 0 && m_release_memory_on_destruction)
12487         my_free(m_memory);
12488     }
12489 
12490     /**
12491        Is there memory allocated?
12492 
12493        @retval true There is memory allocated
12494        @retval false Memory allocation failed
12495      */
has_memory() const12496     bool has_memory() const {
12497 #ifndef NDEBUG
12498       m_alloc_checked= TRUE;
12499 #endif
12500       return m_memory != 0;
12501     }
12502 
slot(uint s)12503     uchar *slot(uint s)
12504     {
12505       assert(s < sizeof(m_ptr)/sizeof(*m_ptr));
12506       assert(m_ptr[s] != 0);
12507       assert(m_alloc_checked == TRUE);
12508       return m_ptr[s];
12509     }
12510 
12511   private:
allocate_memory(TABLE * const table,size_t const total_length)12512     void allocate_memory(TABLE *const table, size_t const total_length)
12513     {
12514       if (table->s->blob_fields == 0)
12515       {
12516         /*
12517           The maximum length of a packed record is less than this
12518           length. We use this value instead of the supplied length
12519           when allocating memory for records, since we don't know how
12520           the memory will be used in future allocations.
12521 
12522           Since table->s->reclength is for unpacked records, we have
12523           to add two bytes for each field, which can potentially be
12524           added to hold the length of a packed field.
12525         */
12526         size_t const maxlen= table->s->reclength + 2 * table->s->fields;
12527 
12528         /*
12529           Allocate memory for two records if memory hasn't been
12530           allocated. We allocate memory for two records so that it can
12531           be used when processing update rows as well.
12532         */
12533         if (table->write_row_record == 0)
12534           table->write_row_record=
12535             (uchar *) alloc_root(&table->mem_root, 2 * maxlen);
12536         m_memory= table->write_row_record;
12537         m_release_memory_on_destruction= FALSE;
12538       }
12539       else
12540       {
12541         m_memory= (uchar *) my_malloc(key_memory_Row_data_memory_memory,
12542                                       total_length, MYF(MY_WME));
12543         m_release_memory_on_destruction= TRUE;
12544       }
12545     }
12546 
12547 #ifndef NDEBUG
12548     mutable bool m_alloc_checked;
12549 #endif
12550     bool m_release_memory_on_destruction;
12551     uchar *m_memory;
12552     uchar *m_ptr[2];
12553   };
12554 
12555 } // namespace
12556 
binlog_write_row(TABLE * table,bool is_trans,uchar const * record,const uchar * extra_row_info)12557 int THD::binlog_write_row(TABLE* table, bool is_trans,
12558                           uchar const *record,
12559                           const uchar* extra_row_info)
12560 {
12561   assert(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
12562 
12563   /*
12564     Pack records into format for transfer. We are allocating more
12565     memory than needed, but that doesn't matter.
12566   */
12567   Row_data_memory memory(table, max_row_length(table, record));
12568   if (!memory.has_memory())
12569     return HA_ERR_OUT_OF_MEM;
12570 
12571   uchar *row_data= memory.slot(0);
12572 
12573   size_t const len= pack_row(table, table->write_set, row_data, record);
12574 
12575   Rows_log_event* const ev=
12576     binlog_prepare_pending_rows_event(table, server_id, len, is_trans,
12577                                       static_cast<Write_rows_log_event*>(0),
12578                                       extra_row_info);
12579 
12580   if (unlikely(ev == 0))
12581     return HA_ERR_OUT_OF_MEM;
12582 
12583   return ev->add_row_data(row_data, len);
12584 }
12585 
binlog_update_row(TABLE * table,bool is_trans,const uchar * before_record,const uchar * after_record,const uchar * extra_row_info)12586 int THD::binlog_update_row(TABLE* table, bool is_trans,
12587                            const uchar *before_record,
12588                            const uchar *after_record,
12589                            const uchar* extra_row_info)
12590 {
12591   assert(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
12592   int error= 0;
12593 
12594   /**
12595     Save a reference to the original read and write set bitmaps.
12596     We will need this to restore the bitmaps at the end.
12597    */
12598   MY_BITMAP *old_read_set= table->read_set;
12599   MY_BITMAP *old_write_set= table->write_set;
12600 
12601   /**
12602      This will remove spurious fields required during execution but
12603      not needed for binlogging. This is done according to the:
12604      binlog-row-image option.
12605    */
12606   binlog_prepare_row_images(table);
12607 
12608   size_t const before_maxlen = max_row_length(table, before_record);
12609   size_t const after_maxlen  = max_row_length(table, after_record);
12610 
12611   Row_data_memory row_data(table, before_maxlen, after_maxlen);
12612   if (!row_data.has_memory())
12613     return HA_ERR_OUT_OF_MEM;
12614 
12615   uchar *before_row= row_data.slot(0);
12616   uchar *after_row= row_data.slot(1);
12617 
12618   size_t const before_size= pack_row(table, table->read_set, before_row,
12619                                         before_record);
12620   size_t const after_size= pack_row(table, table->write_set, after_row,
12621                                        after_record);
12622 
12623   DBUG_DUMP("before_record", before_record, table->s->reclength);
12624   DBUG_DUMP("after_record",  after_record, table->s->reclength);
12625   DBUG_DUMP("before_row",    before_row, before_size);
12626   DBUG_DUMP("after_row",     after_row, after_size);
12627 
12628   Rows_log_event* const ev=
12629     binlog_prepare_pending_rows_event(table, server_id,
12630 				      before_size + after_size, is_trans,
12631 				      static_cast<Update_rows_log_event*>(0),
12632                                       extra_row_info);
12633 
12634   if (unlikely(ev == 0))
12635     return HA_ERR_OUT_OF_MEM;
12636 
12637   error= ev->add_row_data(before_row, before_size) ||
12638          ev->add_row_data(after_row, after_size);
12639 
12640   /* restore read/write set for the rest of execution */
12641   table->column_bitmaps_set_no_signal(old_read_set,
12642                                       old_write_set);
12643 
12644   bitmap_clear_all(&table->tmp_set);
12645 
12646   return error;
12647 }
12648 
binlog_delete_row(TABLE * table,bool is_trans,uchar const * record,const uchar * extra_row_info)12649 int THD::binlog_delete_row(TABLE* table, bool is_trans,
12650                            uchar const *record,
12651                            const uchar* extra_row_info)
12652 {
12653   assert(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
12654   int error= 0;
12655 
12656   /**
12657     Save a reference to the original read and write set bitmaps.
12658     We will need this to restore the bitmaps at the end.
12659    */
12660   MY_BITMAP *old_read_set= table->read_set;
12661   MY_BITMAP *old_write_set= table->write_set;
12662 
12663   /**
12664      This will remove spurious fields required during execution but
12665      not needed for binlogging. This is done according to the:
12666      binlog-row-image option.
12667    */
12668   binlog_prepare_row_images(table);
12669 
12670   /*
12671      Pack records into format for transfer. We are allocating more
12672      memory than needed, but that doesn't matter.
12673   */
12674   Row_data_memory memory(table, max_row_length(table, record));
12675   if (unlikely(!memory.has_memory()))
12676     return HA_ERR_OUT_OF_MEM;
12677 
12678   uchar *row_data= memory.slot(0);
12679 
12680   DBUG_DUMP("table->read_set", (uchar*) table->read_set->bitmap, (table->s->fields + 7) / 8);
12681   size_t const len= pack_row(table, table->read_set, row_data, record);
12682 
12683   Rows_log_event* const ev=
12684     binlog_prepare_pending_rows_event(table, server_id, len, is_trans,
12685 				      static_cast<Delete_rows_log_event*>(0),
12686                                       extra_row_info);
12687 
12688   if (unlikely(ev == 0))
12689     return HA_ERR_OUT_OF_MEM;
12690 
12691   error= ev->add_row_data(row_data, len);
12692 
12693   /* restore read/write set for the rest of execution */
12694   table->column_bitmaps_set_no_signal(old_read_set,
12695                                       old_write_set);
12696 
12697   bitmap_clear_all(&table->tmp_set);
12698   return error;
12699 }
12700 
binlog_prepare_row_images(TABLE * table)12701 void THD::binlog_prepare_row_images(TABLE *table)
12702 {
12703   DBUG_ENTER("THD::binlog_prepare_row_images");
12704   /**
12705     Remove from read_set spurious columns. The write_set has been
12706     handled before in table->mark_columns_needed_for_update.
12707    */
12708 
12709   DBUG_PRINT_BITSET("debug", "table->read_set (before preparing): %s", table->read_set);
12710   THD *thd= table->in_use;
12711 
12712   /**
12713     if there is a primary key in the table (ie, user declared PK or a
12714     non-null unique index) and we dont want to ship the entire image,
12715     and the handler involved supports this.
12716    */
12717   if (table->s->primary_key < MAX_KEY &&
12718       (thd->variables.binlog_row_image < BINLOG_ROW_IMAGE_FULL) &&
12719       !ha_check_storage_engine_flag(table->s->db_type(), HTON_NO_BINLOG_ROW_OPT))
12720   {
12721     /**
12722       Just to be sure that tmp_set is currently not in use as
12723       the read_set already.
12724     */
12725     assert(table->read_set != &table->tmp_set);
12726     // Verify it's not used
12727     assert(bitmap_is_clear_all(&table->tmp_set));
12728 
12729     switch(thd->variables.binlog_row_image)
12730     {
12731       case BINLOG_ROW_IMAGE_MINIMAL:
12732         /* MINIMAL: Mark only PK */
12733         table->mark_columns_used_by_index_no_reset(table->s->primary_key,
12734                                                    &table->tmp_set);
12735         break;
12736       case BINLOG_ROW_IMAGE_NOBLOB:
12737         /**
12738           NOBLOB: Remove unnecessary BLOB fields from read_set
12739                   (the ones that are not part of PK).
12740          */
12741         bitmap_union(&table->tmp_set, table->read_set);
12742         for (Field **ptr=table->field ; *ptr ; ptr++)
12743         {
12744           Field *field= (*ptr);
12745           if ((field->type() == MYSQL_TYPE_BLOB) &&
12746               !(field->flags & PRI_KEY_FLAG))
12747             bitmap_clear_bit(&table->tmp_set, field->field_index);
12748         }
12749         break;
12750       default:
12751         assert(0); // impossible.
12752     }
12753 
12754     /* set the temporary read_set */
12755     table->column_bitmaps_set_no_signal(&table->tmp_set,
12756                                         table->write_set);
12757   }
12758 
12759   DBUG_PRINT_BITSET("debug", "table->read_set (after preparing): %s", table->read_set);
12760   DBUG_VOID_RETURN;
12761 }
12762 
12763 
binlog_flush_pending_rows_event(bool stmt_end,bool is_transactional)12764 int THD::binlog_flush_pending_rows_event(bool stmt_end, bool is_transactional)
12765 {
12766   DBUG_ENTER("THD::binlog_flush_pending_rows_event");
12767   /*
12768     We shall flush the pending event even if we are not in row-based
12769     mode: it might be the case that we left row-based mode before
12770     flushing anything (e.g., if we have explicitly locked tables).
12771    */
12772   if (!mysql_bin_log.is_open())
12773     DBUG_RETURN(0);
12774 
12775   /*
12776     Mark the event as the last event of a statement if the stmt_end
12777     flag is set.
12778   */
12779   int error= 0;
12780   if (Rows_log_event *pending= binlog_get_pending_rows_event(is_transactional))
12781   {
12782     if (stmt_end)
12783     {
12784       pending->set_flags(Rows_log_event::STMT_END_F);
12785       binlog_table_maps= 0;
12786     }
12787 
12788     error= mysql_bin_log.flush_and_set_pending_rows_event(this, 0,
12789                                                           is_transactional);
12790   }
12791 
12792   DBUG_RETURN(error);
12793 }
12794 
12795 
12796 /**
12797    binlog_row_event_extra_data_eq
12798 
12799    Comparator for two binlog row event extra data
12800    pointers.
12801 
12802    It compares their significant bytes.
12803 
12804    Null pointers are acceptable
12805 
12806    @param a
12807      first pointer
12808 
12809    @param b
12810      first pointer
12811 
12812    @return
12813      true if the referenced structures are equal
12814 */
12815 bool
binlog_row_event_extra_data_eq(const uchar * a,const uchar * b)12816 THD::binlog_row_event_extra_data_eq(const uchar* a,
12817                                     const uchar* b)
12818 {
12819   return ((a == b) ||
12820           ((a != NULL) &&
12821            (b != NULL) &&
12822            (a[EXTRA_ROW_INFO_LEN_OFFSET] ==
12823             b[EXTRA_ROW_INFO_LEN_OFFSET]) &&
12824            (memcmp(a, b,
12825                    a[EXTRA_ROW_INFO_LEN_OFFSET]) == 0)));
12826 }
12827 
12828 #if !defined(NDEBUG)
12829 static const char *
show_query_type(THD::enum_binlog_query_type qtype)12830 show_query_type(THD::enum_binlog_query_type qtype)
12831 {
12832   switch (qtype) {
12833   case THD::ROW_QUERY_TYPE:
12834     return "ROW";
12835   case THD::STMT_QUERY_TYPE:
12836     return "STMT";
12837   case THD::QUERY_TYPE_COUNT:
12838   default:
12839     assert(0 <= qtype && qtype < THD::QUERY_TYPE_COUNT);
12840   }
12841   static char buf[64];
12842   sprintf(buf, "UNKNOWN#%d", qtype);
12843   return buf;
12844 }
12845 #endif
12846 
12847 /**
12848   Auxiliary function to reset the limit unsafety warning suppression.
12849 */
reset_binlog_unsafe_suppression()12850 static void reset_binlog_unsafe_suppression()
12851 {
12852   DBUG_ENTER("reset_binlog_unsafe_suppression");
12853   unsafe_warning_suppression_is_activated= false;
12854   limit_unsafe_warning_count= 0;
12855   limit_unsafe_suppression_start_time= my_getsystime()/10000000;
12856   DBUG_VOID_RETURN;
12857 }
12858 
12859 /**
12860   Auxiliary function to print warning in the error log.
12861 */
print_unsafe_warning_to_log(int unsafe_type,char * buf,const char * query)12862 static void print_unsafe_warning_to_log(int unsafe_type, char* buf,
12863                                         const char* query)
12864 {
12865   DBUG_ENTER("print_unsafe_warning_in_log");
12866   sprintf(buf, ER(ER_BINLOG_UNSAFE_STATEMENT),
12867           ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
12868   sql_print_warning(ER(ER_MESSAGE_AND_STATEMENT), buf, query);
12869   DBUG_VOID_RETURN;
12870 }
12871 
12872 /**
12873   Auxiliary function to check if the warning for limit unsafety should be
12874   thrown or suppressed. Details of the implementation can be found in the
12875   comments inline.
12876 
12877   @params
12878    buf         - buffer to hold the warning message text
12879    unsafe_type - The type of unsafety.
12880    query       - The actual query statement.
12881 
12882   TODO: Remove this function and implement a general service for all warnings
12883   that would prevent flooding the error log. => switch to log_throttle class?
12884 */
do_unsafe_limit_checkout(char * buf,int unsafe_type,const char * query)12885 static void do_unsafe_limit_checkout(char* buf, int unsafe_type, const char* query)
12886 {
12887   ulonglong now;
12888   DBUG_ENTER("do_unsafe_limit_checkout");
12889   assert(unsafe_type == LEX::BINLOG_STMT_UNSAFE_LIMIT);
12890   limit_unsafe_warning_count++;
12891   /*
12892     INITIALIZING:
12893     If this is the first time this function is called with log warning
12894     enabled, the monitoring the unsafe warnings should start.
12895   */
12896   if (limit_unsafe_suppression_start_time == 0)
12897   {
12898     limit_unsafe_suppression_start_time= my_getsystime()/10000000;
12899     print_unsafe_warning_to_log(unsafe_type, buf, query);
12900   }
12901   else
12902   {
12903     if (!unsafe_warning_suppression_is_activated)
12904       print_unsafe_warning_to_log(unsafe_type, buf, query);
12905 
12906     if (limit_unsafe_warning_count >=
12907         LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT)
12908     {
12909       now= my_getsystime()/10000000;
12910       if (!unsafe_warning_suppression_is_activated)
12911       {
12912         /*
12913           ACTIVATION:
12914           We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT warnings in
12915           less than LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT we activate the
12916           suppression.
12917         */
12918         if ((now-limit_unsafe_suppression_start_time) <=
12919                        LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT)
12920         {
12921           unsafe_warning_suppression_is_activated= true;
12922           DBUG_PRINT("info",("A warning flood has been detected and the limit \
12923 unsafety warning suppression has been activated."));
12924         }
12925         else
12926         {
12927           /*
12928            there is no flooding till now, therefore we restart the monitoring
12929           */
12930           limit_unsafe_suppression_start_time= my_getsystime()/10000000;
12931           limit_unsafe_warning_count= 0;
12932         }
12933       }
12934       else
12935       {
12936         /*
12937           Print the suppression note and the unsafe warning.
12938         */
12939         sql_print_information("The following warning was suppressed %d times \
12940 during the last %d seconds in the error log",
12941                               limit_unsafe_warning_count,
12942                               (int)
12943                               (now-limit_unsafe_suppression_start_time));
12944         print_unsafe_warning_to_log(unsafe_type, buf, query);
12945         /*
12946           DEACTIVATION: We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT
12947           warnings in more than  LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT, the
12948           suppression should be deactivated.
12949         */
12950         if ((now - limit_unsafe_suppression_start_time) >
12951             LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT)
12952         {
12953           reset_binlog_unsafe_suppression();
12954           DBUG_PRINT("info",("The limit unsafety warning supression has been \
12955 deactivated"));
12956         }
12957       }
12958       limit_unsafe_warning_count= 0;
12959     }
12960   }
12961   DBUG_VOID_RETURN;
12962 }
12963 
12964 /**
12965   Auxiliary method used by @c binlog_query() to raise warnings.
12966 
12967   The type of warning and the type of unsafeness is stored in
12968   THD::binlog_unsafe_warning_flags.
12969 */
issue_unsafe_warnings()12970 void THD::issue_unsafe_warnings()
12971 {
12972   char buf[MYSQL_ERRMSG_SIZE * 2];
12973   DBUG_ENTER("issue_unsafe_warnings");
12974   /*
12975     Ensure that binlog_unsafe_warning_flags is big enough to hold all
12976     bits.  This is actually a constant expression.
12977   */
12978   assert(LEX::BINLOG_STMT_UNSAFE_COUNT <=
12979          sizeof(binlog_unsafe_warning_flags) * CHAR_BIT);
12980 
12981   uint32 unsafe_type_flags= binlog_unsafe_warning_flags;
12982 
12983   if ((unsafe_type_flags & (1U << LEX::BINLOG_STMT_UNSAFE_LIMIT)) != 0)
12984   {
12985     if ((lex->sql_command == SQLCOM_DELETE
12986          || lex->sql_command == SQLCOM_UPDATE) &&
12987         lex->select_lex->select_limit)
12988     {
12989       ORDER *order= (ORDER *) ((lex->select_lex->order_list.elements) ?
12990                                lex->select_lex->order_list.first : NULL);
12991       if ((lex->select_lex->select_limit &&
12992            lex->select_lex->select_limit->fixed &&
12993            lex->select_lex->select_limit->val_int() == 0) ||
12994           is_order_deterministic(lex->query_tables,
12995                                  lex->select_lex->where_cond(), order))
12996       {
12997         unsafe_type_flags&= ~(1U << LEX::BINLOG_STMT_UNSAFE_LIMIT);
12998       }
12999     }
13000     if ((lex->sql_command == SQLCOM_INSERT_SELECT ||
13001          lex->sql_command == SQLCOM_REPLACE_SELECT) &&
13002         order_deterministic)
13003     {
13004       unsafe_type_flags&= ~(1U << LEX::BINLOG_STMT_UNSAFE_LIMIT);
13005     }
13006 
13007   }
13008 
13009   /*
13010     For each unsafe_type, check if the statement is unsafe in this way
13011     and issue a warning.
13012   */
13013   for (int unsafe_type=0;
13014        unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT;
13015        unsafe_type++)
13016   {
13017     if ((unsafe_type_flags & (1 << unsafe_type)) != 0)
13018     {
13019       push_warning_printf(this, Sql_condition::SL_NOTE,
13020                           ER_BINLOG_UNSAFE_STATEMENT,
13021                           ER(ER_BINLOG_UNSAFE_STATEMENT),
13022                           ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
13023       if (log_error_verbosity > 1 && opt_log_unsafe_statements)
13024       {
13025         if (unsafe_type == LEX::BINLOG_STMT_UNSAFE_LIMIT)
13026           do_unsafe_limit_checkout( buf, unsafe_type, query().str);
13027         else //cases other than LIMIT unsafety
13028           print_unsafe_warning_to_log(unsafe_type, buf, query().str);
13029       }
13030     }
13031   }
13032   DBUG_VOID_RETURN;
13033 }
13034 
13035 /**
13036   Log the current query.
13037 
13038   The query will be logged in either row format or statement format
13039   depending on the value of @c current_stmt_binlog_format_row field and
13040   the value of the @c qtype parameter.
13041 
13042   This function must be called:
13043 
13044   - After the all calls to ha_*_row() functions have been issued.
13045 
13046   - After any writes to system tables. Rationale: if system tables
13047     were written after a call to this function, and the master crashes
13048     after the call to this function and before writing the system
13049     tables, then the master and slave get out of sync.
13050 
13051   - Before tables are unlocked and closed.
13052 
13053   @see decide_logging_format
13054 
13055   @retval 0 Success
13056 
13057   @retval nonzero If there is a failure when writing the query (e.g.,
13058   write failure), then the error code is returned.
13059 */
binlog_query(THD::enum_binlog_query_type qtype,const char * query_arg,size_t query_len,bool is_trans,bool direct,bool suppress_use,int errcode)13060 int THD::binlog_query(THD::enum_binlog_query_type qtype, const char *query_arg,
13061                       size_t query_len, bool is_trans, bool direct,
13062                       bool suppress_use, int errcode)
13063 {
13064   DBUG_ENTER("THD::binlog_query");
13065   DBUG_PRINT("enter", ("qtype: %s  query: '%s'",
13066                        show_query_type(qtype), query_arg));
13067   assert(query_arg && mysql_bin_log.is_open());
13068 
13069   if (get_binlog_local_stmt_filter() == BINLOG_FILTER_SET)
13070   {
13071     /*
13072       The current statement is to be ignored, and not written to
13073       the binlog. Do not call issue_unsafe_warnings().
13074     */
13075     DBUG_RETURN(0);
13076   }
13077 
13078   /*
13079     If we are not in prelocked mode, mysql_unlock_tables() will be
13080     called after this binlog_query(), so we have to flush the pending
13081     rows event with the STMT_END_F set to unlock all tables at the
13082     slave side as well.
13083 
13084     If we are in prelocked mode, the flushing will be done inside the
13085     top-most close_thread_tables().
13086   */
13087   if (this->locked_tables_mode <= LTM_LOCK_TABLES)
13088     if (int error= binlog_flush_pending_rows_event(TRUE, is_trans))
13089       DBUG_RETURN(error);
13090 
13091   /*
13092     Warnings for unsafe statements logged in statement format are
13093     printed in three places instead of in decide_logging_format().
13094     This is because the warnings should be printed only if the statement
13095     is actually logged. When executing decide_logging_format(), we cannot
13096     know for sure if the statement will be logged:
13097 
13098     1 - sp_head::execute_procedure which prints out warnings for calls to
13099     stored procedures.
13100 
13101     2 - sp_head::execute_function which prints out warnings for calls
13102     involving functions.
13103 
13104     3 - THD::binlog_query (here) which prints warning for top level
13105     statements not covered by the two cases above: i.e., if not insided a
13106     procedure and a function.
13107 
13108     Besides, we should not try to print these warnings if it is not
13109     possible to write statements to the binary log as it happens when
13110     the execution is inside a function, or generaly speaking, when
13111     the variables.option_bits & OPTION_BIN_LOG is false.
13112   */
13113   if ((variables.option_bits & OPTION_BIN_LOG) &&
13114       sp_runtime_ctx == NULL && !binlog_evt_union.do_union)
13115   {
13116     issue_unsafe_warnings();
13117     order_deterministic= true;
13118   }
13119 
13120   switch (qtype) {
13121     /*
13122       ROW_QUERY_TYPE means that the statement may be logged either in
13123       row format or in statement format.  If
13124       current_stmt_binlog_format is row, it means that the
13125       statement has already been logged in row format and hence shall
13126       not be logged again.
13127     */
13128   case THD::ROW_QUERY_TYPE:
13129     DBUG_PRINT("debug",
13130                ("is_current_stmt_binlog_format_row: %d",
13131                 is_current_stmt_binlog_format_row()));
13132     if (is_current_stmt_binlog_format_row())
13133       DBUG_RETURN(0);
13134     /* Fall through */
13135 
13136     /*
13137       STMT_QUERY_TYPE means that the query must be logged in statement
13138       format; it cannot be logged in row format.  This is typically
13139       used by DDL statements.  It is an error to use this query type
13140       if current_stmt_binlog_format_row is row.
13141 
13142       @todo Currently there are places that call this method with
13143       STMT_QUERY_TYPE and current_stmt_binlog_format is row.  Fix those
13144       places and add assert to ensure correct behavior. /Sven
13145     */
13146   case THD::STMT_QUERY_TYPE:
13147     /*
13148       The MYSQL_BIN_LOG::write() function will set the STMT_END_F flag and
13149       flush the pending rows event if necessary.
13150     */
13151     {
13152       Query_log_event qinfo(this, query_arg, query_len, is_trans, direct,
13153                             suppress_use, errcode);
13154       /*
13155         Binlog table maps will be irrelevant after a Query_log_event
13156         (they are just removed on the slave side) so after the query
13157         log event is written to the binary log, we pretend that no
13158         table maps were written.
13159        */
13160       int error= mysql_bin_log.write_event(&qinfo);
13161       binlog_table_maps= 0;
13162       DBUG_RETURN(error);
13163     }
13164     break;
13165 
13166   case THD::QUERY_TYPE_COUNT:
13167   default:
13168     assert(0 <= qtype && qtype < QUERY_TYPE_COUNT);
13169   }
13170   DBUG_RETURN(0);
13171 }
13172 
13173 #endif /* !defined(MYSQL_CLIENT) */
13174 
get_cache_mngr(THD * thd)13175 static const binlog_cache_mngr *get_cache_mngr(THD *thd)
13176 {
13177   const binlog_cache_mngr *cache_mngr
13178     = (thd && opt_bin_log)
13179     ? static_cast<binlog_cache_mngr *>(thd_get_ha_data(thd, binlog_hton))
13180     : NULL;
13181 
13182   return cache_mngr;
13183 }
13184 
show_binlog_vars(THD * thd,SHOW_VAR * var,char * buff)13185 static int show_binlog_vars(THD *thd, SHOW_VAR *var, char *buff)
13186 {
13187   mysql_mutex_assert_owner(&LOCK_status);
13188 
13189   const binlog_cache_mngr *cache_mngr= get_cache_mngr(thd);
13190 
13191   if (cache_mngr && cache_mngr->has_consistent_snapshot())
13192   {
13193     set_binlog_snapshot_file(cache_mngr->binlog_info.log_file_name);
13194     binlog_snapshot_position= cache_mngr->binlog_info.pos;
13195   }
13196   else if (mysql_bin_log.is_open())
13197   {
13198     set_binlog_snapshot_file(binlog_global_snapshot_file);
13199     binlog_snapshot_position= binlog_global_snapshot_position;
13200   }
13201   else
13202   {
13203     binlog_snapshot_file[0]= '\0';
13204     binlog_snapshot_position= 0;
13205   }
13206 
13207   var->type= SHOW_ARRAY;
13208   var->value= (char *)&binlog_status_vars_detail;
13209   return 0;
13210 }
13211 
show_binlog_snapshot_gtid_executed(THD * thd,SHOW_VAR * var,char * buff)13212 static int show_binlog_snapshot_gtid_executed(THD *thd, SHOW_VAR *var,
13213                                               char *buff)
13214 {
13215   mysql_mutex_assert_owner(&LOCK_status);
13216 
13217   const binlog_cache_mngr *cache_mngr= get_cache_mngr(thd);
13218 
13219   if (cache_mngr && cache_mngr->has_consistent_snapshot())
13220   {
13221     binlog_snapshot_gtid_executed= cache_mngr->snapshot_gtid_executed;
13222   }
13223   else if (mysql_bin_log.is_open())
13224   {
13225     binlog_snapshot_gtid_executed= "not-in-consistent-snapshot";
13226   }
13227   else
13228   {
13229     binlog_snapshot_gtid_executed.clear();
13230   }
13231 
13232   var->type= SHOW_CHAR;
13233   var->value= const_cast<char *>(binlog_snapshot_gtid_executed.c_str());
13234   return 0;
13235 }
13236 
13237 static SHOW_VAR binlog_status_vars_top[]= {
13238     {"Binlog", (char *)&show_binlog_vars, SHOW_FUNC, SHOW_SCOPE_GLOBAL},
13239     {"Binlog_snapshot_gtid_executed",
13240      (char *)&show_binlog_snapshot_gtid_executed, SHOW_FUNC,
13241      SHOW_SCOPE_GLOBAL},
13242     {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}};
13243 
13244 struct st_mysql_storage_engine binlog_storage_engine=
13245 { MYSQL_HANDLERTON_INTERFACE_VERSION };
13246 
13247 /** @} */
13248 
mysql_declare_plugin(binlog)13249 mysql_declare_plugin(binlog)
13250 {
13251   MYSQL_STORAGE_ENGINE_PLUGIN,
13252   &binlog_storage_engine,
13253   "binlog",
13254   "MySQL AB",
13255   "This is a pseudo storage engine to represent the binlog in a transaction",
13256   PLUGIN_LICENSE_GPL,
13257   binlog_init, /* Plugin Init */
13258   binlog_deinit, /* Plugin Deinit */
13259   0x0100 /* 1.0 */,
13260   binlog_status_vars_top,     /* status variables                */
13261   NULL,                       /* system variables                */
13262   NULL,                       /* config options                  */
13263   0,
13264 }
13265 mysql_declare_plugin_end;
13266