1 /* Copyright (c) 2009, 2021, Oracle and/or its affiliates.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software Foundation,
21    51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22 
23 #include "binlog.h"
24 
25 #include "my_stacktrace.h"                  // my_safe_print_system_time
26 #include "debug_sync.h"                     // DEBUG_SYNC
27 #include "log.h"                            // sql_print_warning
28 #include "log_event.h"                      // Rows_log_event
29 #include "mysqld_thd_manager.h"             // Global_THD_manager
30 #include "rpl_handler.h"                    // RUN_HOOK
31 #include "rpl_mi.h"                         // Master_info
32 #include "rpl_rli.h"                        // Relay_log_info
33 #include "rpl_rli_pdb.h"                    // Slave_worker
34 #include "rpl_slave_commit_order_manager.h" // Commit_order_manager
35 #include "rpl_trx_boundary_parser.h"        // Transaction_boundary_parser
36 #include "rpl_context.h"
37 #include "sql_class.h"                      // THD
38 #include "sql_parse.h"                      // sqlcom_can_generate_row_events
39 #include "sql_show.h"                       // append_identifier
40 
41 #include "pfs_file_provider.h"
42 #include "mysql/psi/mysql_file.h"
43 
44 #include <pfs_transaction_provider.h>
45 #include <mysql/psi/mysql_transaction.h>
46 #include "xa.h"
47 
48 #include <list>
49 #include <string>
50 #include <sstream>
51 
52 #ifdef WITH_WSREP
53 #include "wsrep_xid.h"
54 #endif /* WITH_WSREP */
55 
56 using std::max;
57 using std::min;
58 using std::string;
59 using std::list;
60 using binary_log::checksum_crc32;
61 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
62 
63 #define LOG_PREFIX	"ML"
64 
65 /**
66   @defgroup Binary_Log Binary Log
67   @{
68  */
69 
70 #define MY_OFF_T_UNDEF (~(my_off_t)0UL)
71 
72 /*
73   Constants required for the limit unsafe warnings suppression
74  */
75 //seconds after which the limit unsafe warnings suppression will be activated
76 #define LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT 50
77 //number of limit unsafe warnings after which the suppression will be activated
78 #define LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT 50
79 #define MAX_SESSION_ATTACH_TRIES 10
80 
81 static ulonglong limit_unsafe_suppression_start_time= 0;
82 static bool unsafe_warning_suppression_is_activated= false;
83 static int limit_unsafe_warning_count= 0;
84 
85 #ifndef WITH_WSREP
86 static handlerton *binlog_hton;
87 #else
88 handlerton *binlog_hton; // we need it in wsrep_binlog.cc
89 #endif
90 bool opt_binlog_order_commits= true;
91 
92 const char *log_bin_index= 0;
93 const char *log_bin_basename= 0;
94 
95 MYSQL_BIN_LOG mysql_bin_log(&sync_binlog_period, WRITE_CACHE);
96 
97 static int binlog_init(void *p);
98 static int binlog_start_trans_and_stmt(THD *thd, Log_event *start_event);
99 static int binlog_close_connection(handlerton *hton, THD *thd);
100 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
101 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
102 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
103                                                       THD *thd);
104 static int binlog_commit(handlerton *hton, THD *thd, bool all);
105 static int binlog_rollback(handlerton *hton, THD *thd, bool all);
106 static int binlog_prepare(handlerton *hton, THD *thd, bool all);
107 static int binlog_xa_commit(handlerton *hton,  XID *xid);
108 static int binlog_xa_rollback(handlerton *hton,  XID *xid);
109 static void exec_binlog_error_action_abort(const char* err_string);
110 
111 /**
112   Helper class to switch to a new thread and then go back to the previous one,
113   when the object is destroyed using RAII.
114 
115   This class is used to temporarily switch to another session (THD
116   structure). It will set up thread specific "globals" correctly
117   so that the POSIX thread looks exactly like the session attached to.
118   However, PSI_thread info is not touched as it is required to show
119   the actual physial view in PFS instrumentation i.e., it should
120   depict as the real thread doing the work instead of thread it switched
121   to.
122 
123   On destruction, the original session (which is supplied to the
124   constructor) will be re-attached automatically. For example, with
125   this code, the value of @c current_thd will be the same before and
126   after execution of the code.
127 
128   @code
129   {
130     for (int i = 0 ; i < count ; ++i)
131     {
132       // here we are attached to current_thd
133       // [...]
134       Thd_backup_and_restore switch_thd(current_thd, other_thd[i]);
135       // [...]
136       // here we are attached to other_thd[i]
137       // [...]
138     }
139     // here we are attached to current_thd
140   }
141   @endcode
142 
143   @warning The class is not designed to be inherited from.
144  */
145 
146 #ifndef EMBEDDED_LIBRARY
147 
148 class Thd_backup_and_restore
149 {
150 public:
151   /**
152     Try to attach the POSIX thread to a session.
153     - This function attaches the POSIX thread to a session
154     in MAX_SESSION_ATTACH_TRIES tries when encountering
155     'out of memory' error, and terminates the server after
156     failed in MAX_SESSION_ATTACH_TRIES tries.
157 
158     @param[in] backup_thd    The thd to restore to when object is destructed.
159     @param[in] new_thd       The thd to attach to.
160    */
161 
Thd_backup_and_restore(THD * backup_thd,THD * new_thd)162   Thd_backup_and_restore(THD *backup_thd, THD *new_thd)
163     : m_backup_thd(backup_thd), m_new_thd(new_thd),
164       m_new_thd_old_real_id(new_thd->real_id)
165   {
166     assert(m_backup_thd != NULL && m_new_thd != NULL);
167     // Reset the state of the current thd.
168     m_backup_thd->restore_globals();
169     int i= 0;
170     /*
171       Attach the POSIX thread to a session in MAX_SESSION_ATTACH_TRIES
172       tries when encountering 'out of memory' error.
173     */
174     while (i < MAX_SESSION_ATTACH_TRIES)
175     {
176       /*
177         Currently attach_to(...) returns ER_OUTOFMEMORY or 0. So
178         we continue to attach the POSIX thread when encountering
179         the ER_OUTOFMEMORY error. Please take care other error
180         returned from attach_to(...) in future.
181       */
182       if (!attach_to(new_thd))
183       {
184         if (i > 0)
185           sql_print_warning("Server overcomes the temporary 'out of memory' "
186                             "in '%d' tries while attaching to session thread "
187                             "during the group commit phase.\n", i + 1);
188         break;
189       }
190       /* Sleep 1 microsecond per try to avoid temporary 'out of memory' */
191       my_sleep(1);
192       i++;
193     }
194     /*
195       Terminate the server after failed to attach the POSIX thread
196       to a session in MAX_SESSION_ATTACH_TRIES tries.
197     */
198     if (MAX_SESSION_ATTACH_TRIES == i)
199     {
200       my_safe_print_system_time();
201       my_safe_printf_stderr("%s", "[Fatal] Out of memory while attaching to "
202                             "session thread during the group commit phase. "
203                             "Data consistency between master and slave can "
204                             "be guaranteed after server restarts.\n");
205       _exit(MYSQLD_FAILURE_EXIT);
206     }
207   }
208 
209   /**
210       Restores to previous thd.
211    */
~Thd_backup_and_restore()212   ~Thd_backup_and_restore()
213   {
214     /*
215       Restore the global variables of the thd we previously attached to,
216       to its original state. In other words, detach the m_new_thd.
217     */
218     m_new_thd->restore_globals();
219     m_new_thd->real_id= m_new_thd_old_real_id;
220 
221     // Reset the global variables to the original state.
222     if (unlikely(m_backup_thd->store_globals()))
223       assert(0);                           // Out of memory?!
224   }
225 
226 private:
227 
228   /**
229     Attach the POSIX thread to a session.
230    */
attach_to(THD * thd)231   int attach_to(THD *thd)
232   {
233     if (DBUG_EVALUATE_IF("simulate_session_attach_error", 1, 0)
234         || unlikely(thd->store_globals()))
235     {
236       /*
237         Indirectly uses pthread_setspecific, which can only return
238         ENOMEM or EINVAL. Since store_globals are using correct keys,
239         the only alternative is out of memory.
240       */
241       return ER_OUTOFMEMORY;
242     }
243     return 0;
244   }
245 
246   THD *m_backup_thd;
247   THD *m_new_thd;
248   my_thread_t m_new_thd_old_real_id;
249 };
250 
251 #endif /* !EMBEDDED_LIBRARY */
252 
253 /**
254   Caches for non-transactional and transactional data before writing
255   it to the binary log.
256 
257   @todo All the access functions for the flags suggest that the
258   encapsuling is not done correctly, so try to move any logic that
259   requires access to the flags into the cache.
260 */
261 class binlog_cache_data
262 {
263 public:
264 
binlog_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg,const IO_CACHE & cache_log_arg)265   binlog_cache_data(bool trx_cache_arg,
266                     my_off_t max_binlog_cache_size_arg,
267                     ulong *ptr_binlog_cache_use_arg,
268                     ulong *ptr_binlog_cache_disk_use_arg,
269                     const IO_CACHE &cache_log_arg)
270   : cache_log(cache_log_arg),
271     m_pending(0),
272     saved_max_binlog_cache_size(max_binlog_cache_size_arg),
273     ptr_binlog_cache_use(ptr_binlog_cache_use_arg),
274     ptr_binlog_cache_disk_use(ptr_binlog_cache_disk_use_arg)
275   {
276     reset();
277     flags.transactional= trx_cache_arg;
278     cache_log.end_of_file= saved_max_binlog_cache_size;
279   }
280 
281   int finalize(THD *thd, Log_event *end_event);
282   int finalize(THD *thd, Log_event *end_event, XID_STATE *xs);
283   int flush(THD *thd, my_off_t *bytes, bool *wrote_xid);
284   int write_event(THD *thd, Log_event *event);
285 
~binlog_cache_data()286   virtual ~binlog_cache_data()
287   {
288     assert(is_binlog_empty());
289     close_cached_file(&cache_log);
290   }
291 
is_binlog_empty() const292   bool is_binlog_empty() const
293   {
294     my_off_t pos= my_b_tell(&cache_log);
295     DBUG_PRINT("debug", ("%s_cache - pending: 0x%llx, bytes: %llu",
296                          (flags.transactional ? "trx" : "stmt"),
297                          (ulonglong) pending(), (ulonglong) pos));
298     return pending() == NULL && pos == 0;
299   }
300 
is_finalized() const301   bool is_finalized() const {
302     return flags.finalized;
303   }
304 
pending() const305   Rows_log_event *pending() const
306   {
307     return m_pending;
308   }
309 
set_pending(Rows_log_event * const pending)310   void set_pending(Rows_log_event *const pending)
311   {
312     m_pending= pending;
313   }
314 
set_incident(void)315   void set_incident(void)
316   {
317     flags.incident= true;
318   }
319 
has_incident(void) const320   bool has_incident(void) const
321   {
322     return flags.incident;
323   }
324 
325   /**
326     Sets the binlog_cache_data::Flags::flush_error flag if there
327     is an error while flushing cache to the file.
328 
329     @param thd  The client thread that is executing the transaction.
330   */
set_flush_error(THD * thd)331   void set_flush_error(THD *thd)
332   {
333     flags.flush_error= true;
334     if(is_trx_cache())
335     {
336       /*
337          If the cache is a transactional cache and if the write
338          has failed due to ENOSPC, then my_write() would have
339          set EE_WRITE error, so clear the error and create an
340          equivalent server error.
341       */
342       if (thd->is_error())
343         thd->clear_error();
344       char errbuf[MYSYS_STRERROR_SIZE];
345       my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), my_filename(cache_log.file),
346           errno, my_strerror(errbuf, sizeof(errbuf), errno));
347     }
348   }
349 
get_flush_error(void) const350   bool get_flush_error(void) const
351   {
352     return flags.flush_error;
353   }
354 
has_xid() const355   bool has_xid() const {
356     // There should only be an XID event if we are transactional
357     assert((flags.transactional && flags.with_xid) || !flags.with_xid);
358     return flags.with_xid;
359   }
360 
is_trx_cache() const361   bool is_trx_cache() const
362   {
363     return flags.transactional;
364   }
365 
get_byte_position() const366   my_off_t get_byte_position() const
367   {
368     return my_b_tell(&cache_log);
369   }
370 
cache_state_rollback(my_off_t pos_to_rollback)371   void cache_state_rollback(my_off_t pos_to_rollback)
372   {
373     if (pos_to_rollback)
374     {
375       std::map<my_off_t,cache_state>::iterator it;
376       it = cache_state_map.find(pos_to_rollback);
377       if (it != cache_state_map.end())
378       {
379         flags.with_rbr= it->second.with_rbr;
380         flags.with_sbr= it->second.with_sbr;
381         flags.with_start= it->second.with_start;
382         flags.with_end= it->second.with_end;
383         flags.with_content= it->second.with_content;
384       }
385       else
386         assert(it == cache_state_map.end());
387     }
388     // Rolling back to pos == 0 means cleaning up the cache.
389     else
390     {
391       flags.with_rbr= false;
392       flags.with_sbr= false;
393       flags.with_start= false;
394       flags.with_end= false;
395       flags.with_content= false;
396     }
397   }
398 
cache_state_checkpoint(my_off_t pos_to_checkpoint)399   void cache_state_checkpoint(my_off_t pos_to_checkpoint)
400   {
401     // We only need to store the cache state for pos > 0
402     if (pos_to_checkpoint)
403     {
404       cache_state state;
405       state.with_rbr= flags.with_rbr;
406       state.with_sbr= flags.with_sbr;
407       state.with_start= flags.with_start;
408       state.with_end= flags.with_end;
409       state.with_content= flags.with_content;
410       cache_state_map[pos_to_checkpoint]= state;
411     }
412   }
413 
reset()414   virtual void reset()
415   {
416     compute_statistics();
417     truncate(0);
418 
419     /*
420       If IOCACHE has a file associated, change its size to 0.
421       It is safer to do it here, since we are certain that one
422       asked the cache to go to position 0 with truncate.
423     */
424     if(cache_log.file != -1)
425     {
426       int error= 0;
427       if((error= my_chsize(cache_log.file, 0, 0, MYF(MY_WME))))
428         sql_print_warning("Unable to resize binlog IOCACHE auxilary file");
429 
430       DBUG_EXECUTE_IF("show_io_cache_size",
431                       {
432                         my_off_t file_size= my_seek(cache_log.file,
433                                                     0L,MY_SEEK_END,MYF(MY_WME+MY_FAE));
434                         sql_print_error("New size:%llu",
435                                         static_cast<ulonglong>(file_size));
436                       });
437     }
438 
439     flags.incident= false;
440     flags.with_xid= false;
441     flags.immediate= false;
442     flags.finalized= false;
443     flags.with_sbr= false;
444     flags.with_rbr= false;
445     flags.with_start= false;
446     flags.with_end= false;
447     flags.with_content= false;
448     flags.flush_error= false;
449 
450     /*
451       The truncate function calls reinit_io_cache that calls my_b_flush_io_cache
452       which may increase disk_writes. This breaks the disk_writes use by the
453       binary log which aims to compute the ratio between in-memory cache usage
454       and disk cache usage. To avoid this undesirable behavior, we reset the
455       variable after truncating the cache.
456     */
457     cache_log.disk_writes= 0;
458     cache_state_map.clear();
459     assert(is_binlog_empty());
460   }
461 
462   /*
463     Sets the write position to point at the position given. If the
464     cache has swapped to a file, it reinitializes it, so that the
465     proper data is added to the IO_CACHE buffer. Otherwise, it just
466     does a my_b_seek.
467 
468     my_b_seek will not work if the cache has swapped, that's why
469     we do this workaround.
470 
471     @param[IN]  pos the new write position.
472     @param[IN]  use_reinit if the position should be reset resorting
473                 to reset_io_cache (which may issue a flush_io_cache
474                 inside)
475 
476     @return The previous write position.
477    */
reset_write_pos(my_off_t pos,bool use_reinit)478   my_off_t reset_write_pos(my_off_t pos, bool use_reinit)
479   {
480     DBUG_ENTER("reset_write_pos");
481     assert(cache_log.type == WRITE_CACHE);
482 
483     my_off_t oldpos= get_byte_position();
484 
485     if (use_reinit)
486       reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, 0);
487     else
488       my_b_seek(&cache_log, pos);
489 
490     DBUG_RETURN(oldpos);
491   }
492 
493   /*
494     Cache to store data before copying it to the binary log.
495   */
496   IO_CACHE cache_log;
497 
498   /**
499     Returns information about the cache content with respect to
500     the binlog_format of the events.
501 
502     This will be used to set a flag on GTID_LOG_EVENT stating that the
503     transaction may have SBR statements or not, but the binlog dump
504     will show this flag as "rbr_only" when it is not set. That's why
505     an empty transaction should return true below, or else an empty
506     transaction would be assumed as "rbr_only" even not having RBR
507     events.
508 
509     When dumping a binary log content using mysqlbinlog client program,
510     for any transaction assumed as "rbr_only" it will be printed a
511     statement changing the transaction isolation level to READ COMMITTED.
512     It doesn't make sense to have an empty transaction "requiring" this
513     isolation level change.
514 
515     @return true  The cache have SBR events or is empty.
516     @return false The cache contains a transaction with no SBR events.
517    */
may_have_sbr_stmts()518   bool may_have_sbr_stmts()
519   {
520     return flags.with_sbr || !flags.with_rbr;
521   }
522 
523   /**
524     Check if the binlog cache contains an empty transaction, which has
525     two binlog events "BEGIN" and "COMMIT".
526 
527     @return true  The binlog cache contains an empty transaction.
528     @return false Otherwise.
529   */
has_empty_transaction()530   bool has_empty_transaction()
531   {
532     /*
533       The empty transaction has two events in trx/stmt binlog cache
534       and no changes (no SBR changing content and no RBR events).
535       Other transaction should not have two events. So we can identify
536       if this is an empty transaction by the event counter and the
537       cache flags.
538     */
539     if (flags.with_start &&     // Has transaction start statement
540             flags.with_end &&   // Has transaction end statement
541             !flags.with_sbr &&  // No statements changing content
542             !flags.with_rbr &&  // No rows changing content
543             !flags.immediate && // Not a DDL
544             !flags.with_xid &&  // Not a XID transaction and not an atomic DDL Query
545             !flags.with_content)// Does not have any content
546     {
547       assert(!flags.with_sbr); // No statements changing content
548       assert(!flags.with_rbr); // No rows changing content
549       assert(!flags.immediate);// Not a DDL
550       assert(!flags.with_xid); // Not a XID trx and not an atomic DDL Query
551 
552       return true;
553     }
554     return false;
555   }
556 
557   /**
558     Check if the binlog cache is empty or contains an empty transaction,
559     which has two binlog events "BEGIN" and "COMMIT".
560 
561     @return true  The binlog cache is empty or contains an empty transaction.
562     @return false Otherwise.
563   */
is_empty_or_has_empty_transaction()564   bool is_empty_or_has_empty_transaction()
565   {
566     return is_binlog_empty() || has_empty_transaction();
567   }
568 
569 protected:
570   /*
571     This structure should have all cache variables/flags that should be restored
572     when a ROLLBACK TO SAVEPOINT statement be executed.
573   */
574   struct cache_state
575   {
576     bool with_sbr;
577     bool with_rbr;
578     bool with_start;
579     bool with_end;
580     bool with_content;
581   };
582   /*
583     For every SAVEPOINT used, we will store a cache_state for the current
584     binlog cache position. So, if a ROLLBACK TO SAVEPOINT is used, we can
585     restore the cache_state values after truncating the binlog cache.
586   */
587   std::map<my_off_t, cache_state> cache_state_map;
588 
589   /*
590     It truncates the cache to a certain position. This includes deleting the
591     pending event.
592    */
truncate(my_off_t pos)593   void truncate(my_off_t pos)
594   {
595     DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos));
596     remove_pending_event();
597     /*
598       Whenever there is an error while flushing cache to file,
599       the local cache will not be in a normal state and the same
600       cache cannot be used without facing an assert.
601       So, clear the cache if there is a flush error.
602     */
603     reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, get_flush_error());
604     cache_log.end_of_file= saved_max_binlog_cache_size;
605   }
606 
607   /**
608      Flush pending event to the cache buffer.
609    */
flush_pending_event(THD * thd)610   int flush_pending_event(THD *thd) {
611     if (m_pending)
612     {
613       m_pending->set_flags(Rows_log_event::STMT_END_F);
614       if (int error= write_event(thd, m_pending))
615         return error;
616       thd->clear_binlog_table_maps();
617     }
618     return 0;
619   }
620 
621   /**
622     Remove the pending event.
623    */
remove_pending_event()624   int remove_pending_event() {
625     delete m_pending;
626     m_pending= NULL;
627     return 0;
628   }
629   struct Flags {
630     /*
631       Defines if this is either a trx-cache or stmt-cache, respectively, a
632       transactional or non-transactional cache.
633     */
634     bool transactional:1;
635 
636     /*
637       This indicates that some events did not get into the cache and most likely
638       it is corrupted.
639     */
640     bool incident:1;
641 
642     /*
643       This indicates that the cache should be written without BEGIN/END.
644     */
645     bool immediate:1;
646 
647     /*
648       This flag indicates that the buffer was finalized and has to be
649       flushed to disk.
650      */
651     bool finalized:1;
652 
653     /*
654       This indicates that the cache contain an XID event.
655      */
656     bool with_xid:1;
657 
658     /*
659       This indicates that the cache contain statements changing content.
660     */
661     bool with_sbr:1;
662 
663     /*
664       This indicates that the cache contain RBR event changing content.
665     */
666     bool with_rbr:1;
667 
668     /*
669       This indicates that the cache contain s transaction start statement.
670     */
671     bool with_start:1;
672 
673     /*
674       This indicates that the cache contain a transaction end event.
675     */
676     bool with_end:1;
677 
678     /*
679       This indicates that the cache contain content other than START/END.
680     */
681     bool with_content:1;
682 
683     /*
684       This flag is set to 'true' when there is an error while flushing the
685       I/O cache to file.
686     */
687     bool flush_error:1;
688   } flags;
689 
690 private:
691   /*
692     Pending binrows event. This event is the event where the rows are currently
693     written.
694    */
695   Rows_log_event *m_pending;
696 
697   /**
698     This function computes binlog cache and disk usage.
699   */
compute_statistics()700   void compute_statistics()
701   {
702     if (!is_binlog_empty())
703     {
704       (*ptr_binlog_cache_use)++;
705       if (cache_log.disk_writes != 0)
706         (*ptr_binlog_cache_disk_use)++;
707     }
708   }
709 
710   /*
711     Stores the values of maximum size of the cache allowed when this cache
712     is configured. This corresponds to either
713       . max_binlog_cache_size or max_binlog_stmt_cache_size.
714   */
715   my_off_t saved_max_binlog_cache_size;
716 
717   /*
718     Stores a pointer to the status variable that keeps track of the in-memory
719     cache usage. This corresponds to either
720       . binlog_cache_use or binlog_stmt_cache_use.
721   */
722   ulong *ptr_binlog_cache_use;
723 
724   /*
725     Stores a pointer to the status variable that keeps track of the disk
726     cache usage. This corresponds to either
727       . binlog_cache_disk_use or binlog_stmt_cache_disk_use.
728   */
729   ulong *ptr_binlog_cache_disk_use;
730 
731   binlog_cache_data& operator=(const binlog_cache_data& info);
732   binlog_cache_data(const binlog_cache_data& info);
733 };
734 
735 
736 class binlog_stmt_cache_data
737   : public binlog_cache_data
738 {
739 public:
binlog_stmt_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg,const IO_CACHE & cache_log)740   binlog_stmt_cache_data(bool trx_cache_arg,
741                         my_off_t max_binlog_cache_size_arg,
742                         ulong *ptr_binlog_cache_use_arg,
743                         ulong *ptr_binlog_cache_disk_use_arg,
744                         const IO_CACHE &cache_log)
745     : binlog_cache_data(trx_cache_arg,
746                         max_binlog_cache_size_arg,
747                         ptr_binlog_cache_use_arg,
748                         ptr_binlog_cache_disk_use_arg,
749                         cache_log)
750   {
751   }
752 
753   using binlog_cache_data::finalize;
754 
755   int finalize(THD *thd);
756 };
757 
758 
759 int
finalize(THD * thd)760 binlog_stmt_cache_data::finalize(THD *thd)
761 {
762   if (flags.immediate)
763   {
764     if (int error= finalize(thd, NULL))
765       return error;
766   }
767   else
768   {
769     Query_log_event
770       end_evt(thd, STRING_WITH_LEN("COMMIT"), false, false, true, 0, true);
771     if (int error= finalize(thd, &end_evt))
772       return error;
773   }
774   return 0;
775 }
776 
777 
778 class binlog_trx_cache_data : public binlog_cache_data
779 {
780 public:
binlog_trx_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg,const IO_CACHE & cache_log)781   binlog_trx_cache_data(bool trx_cache_arg,
782                         my_off_t max_binlog_cache_size_arg,
783                         ulong *ptr_binlog_cache_use_arg,
784                         ulong *ptr_binlog_cache_disk_use_arg,
785                         const IO_CACHE &cache_log)
786   : binlog_cache_data(trx_cache_arg,
787                       max_binlog_cache_size_arg,
788                       ptr_binlog_cache_use_arg,
789                       ptr_binlog_cache_disk_use_arg,
790                       cache_log),
791     m_cannot_rollback(FALSE), before_stmt_pos(MY_OFF_T_UNDEF)
792   {   }
793 
reset()794   void reset()
795   {
796     DBUG_ENTER("reset");
797     DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
798     m_cannot_rollback= FALSE;
799     before_stmt_pos= MY_OFF_T_UNDEF;
800     binlog_cache_data::reset();
801     DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
802     DBUG_VOID_RETURN;
803   }
804 
cannot_rollback() const805   bool cannot_rollback() const
806   {
807     return m_cannot_rollback;
808   }
809 
set_cannot_rollback()810   void set_cannot_rollback()
811   {
812     m_cannot_rollback= TRUE;
813   }
814 
get_prev_position() const815   my_off_t get_prev_position() const
816   {
817      return before_stmt_pos;
818   }
819 
set_prev_position(my_off_t pos)820   void set_prev_position(my_off_t pos)
821   {
822     DBUG_ENTER("set_prev_position");
823     DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
824     before_stmt_pos= pos;
825     cache_state_checkpoint(before_stmt_pos);
826     DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
827     DBUG_VOID_RETURN;
828   }
829 
restore_prev_position()830   void restore_prev_position()
831   {
832     DBUG_ENTER("restore_prev_position");
833     DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
834     binlog_cache_data::truncate(before_stmt_pos);
835     cache_state_rollback(before_stmt_pos);
836     before_stmt_pos= MY_OFF_T_UNDEF;
837     DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
838     DBUG_VOID_RETURN;
839   }
840 
restore_savepoint(my_off_t pos)841   void restore_savepoint(my_off_t pos)
842   {
843     DBUG_ENTER("restore_savepoint");
844     DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
845     binlog_cache_data::truncate(pos);
846     if (pos <= before_stmt_pos)
847       before_stmt_pos= MY_OFF_T_UNDEF;
848     cache_state_rollback(pos);
849     DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
850     DBUG_VOID_RETURN;
851   }
852 
853   using binlog_cache_data::truncate;
854 
855   int truncate(THD *thd, bool all);
856 
857 private:
858   /*
859     It will be set TRUE if any statement which cannot be rolled back safely
860     is put in trx_cache.
861   */
862   bool m_cannot_rollback;
863 
864   /*
865     Binlog position before the start of the current statement.
866   */
867   my_off_t before_stmt_pos;
868 
869   binlog_trx_cache_data& operator=(const binlog_trx_cache_data& info);
870   binlog_trx_cache_data(const binlog_trx_cache_data& info);
871 };
872 
873 class binlog_cache_mngr {
874 public:
binlog_cache_mngr(my_off_t max_binlog_stmt_cache_size_arg,ulong * ptr_binlog_stmt_cache_use_arg,ulong * ptr_binlog_stmt_cache_disk_use_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg,const IO_CACHE & stmt_cache_log,const IO_CACHE & trx_cache_log)875   binlog_cache_mngr(my_off_t max_binlog_stmt_cache_size_arg,
876                     ulong *ptr_binlog_stmt_cache_use_arg,
877                     ulong *ptr_binlog_stmt_cache_disk_use_arg,
878                     my_off_t max_binlog_cache_size_arg,
879                     ulong *ptr_binlog_cache_use_arg,
880                     ulong *ptr_binlog_cache_disk_use_arg,
881                     const IO_CACHE &stmt_cache_log,
882                     const IO_CACHE &trx_cache_log)
883   : stmt_cache(FALSE, max_binlog_stmt_cache_size_arg,
884                ptr_binlog_stmt_cache_use_arg,
885                ptr_binlog_stmt_cache_disk_use_arg,
886                stmt_cache_log),
887     trx_cache(TRUE, max_binlog_cache_size_arg,
888               ptr_binlog_cache_use_arg,
889               ptr_binlog_cache_disk_use_arg,
890               trx_cache_log),
891     has_logged_xid(NULL)
892   {  }
893 
get_binlog_cache_data(bool is_transactional)894   binlog_cache_data* get_binlog_cache_data(bool is_transactional)
895   {
896     if (is_transactional)
897       return &trx_cache;
898     else
899       return &stmt_cache;
900   }
901 
get_binlog_cache_log(bool is_transactional)902   IO_CACHE* get_binlog_cache_log(bool is_transactional)
903   {
904     return (is_transactional ? &trx_cache.cache_log : &stmt_cache.cache_log);
905   }
906 
907   /**
908     Convenience method to check if both caches are empty.
909    */
is_binlog_empty() const910   bool is_binlog_empty() const {
911     return stmt_cache.is_binlog_empty() && trx_cache.is_binlog_empty();
912   }
913 
914   /*
915     clear stmt_cache and trx_cache if they are not empty
916   */
reset()917   void reset()
918   {
919     if (!stmt_cache.is_binlog_empty())
920       stmt_cache.reset();
921     if (!trx_cache.is_binlog_empty())
922       trx_cache.reset();
923   }
924 
925 #ifndef NDEBUG
dbug_any_finalized() const926   bool dbug_any_finalized() const {
927     return stmt_cache.is_finalized() || trx_cache.is_finalized();
928   }
929 #endif
930 
931   /*
932     Convenience method to flush both caches to the binary log.
933 
934     @param bytes_written Pointer to variable that will be set to the
935                          number of bytes written for the flush.
936     @param wrote_xid     Pointer to variable that will be set to @c
937                          true if any XID event was written to the
938                          binary log. Otherwise, the variable will not
939                          be touched.
940     @return Error code on error, zero if no error.
941    */
flush(THD * thd,my_off_t * bytes_written,bool * wrote_xid)942   int flush(THD *thd, my_off_t *bytes_written, bool *wrote_xid)
943   {
944     my_off_t stmt_bytes= 0;
945     my_off_t trx_bytes= 0;
946     assert(stmt_cache.has_xid() == 0);
947     int error= stmt_cache.flush(thd, &stmt_bytes, wrote_xid);
948     if (error)
949       return error;
950     DEBUG_SYNC(thd, "after_flush_stm_cache_before_flush_trx_cache");
951     if (int error= trx_cache.flush(thd, &trx_bytes, wrote_xid))
952       return error;
953     *bytes_written= stmt_bytes + trx_bytes;
954     return 0;
955   }
956 
957   /**
958     Check if at least one of transacaction and statement binlog caches
959     contains an empty transaction, other one is empty or contains an
960     empty transaction.
961 
962     @return true  At least one of transacaction and statement binlog
963                   caches an empty transaction, other one is emptry
964                   or contains an empty transaction.
965     @return false Otherwise.
966   */
has_empty_transaction()967   bool has_empty_transaction()
968   {
969     return (trx_cache.is_empty_or_has_empty_transaction() &&
970             stmt_cache.is_empty_or_has_empty_transaction() &&
971             !is_binlog_empty());
972   }
973 
974   binlog_stmt_cache_data stmt_cache;
975   binlog_trx_cache_data trx_cache;
976   /*
977     The bool flag is for preventing do_binlog_xa_commit_rollback()
978     execution twice which can happen for "external" xa commit/rollback.
979   */
980   bool has_logged_xid;
981 private:
982 
983   binlog_cache_mngr& operator=(const binlog_cache_mngr& info);
984   binlog_cache_mngr(const binlog_cache_mngr& info);
985 };
986 
987 
thd_get_cache_mngr(const THD * thd)988 static binlog_cache_mngr *thd_get_cache_mngr(const THD *thd)
989 {
990   /*
991     If opt_bin_log is not set, binlog_hton->slot == -1 and hence
992     thd_get_ha_data(thd, hton) segfaults.
993   */
994 #ifndef WITH_WSREP
995   assert(opt_bin_log);
996 #endif
997   return (binlog_cache_mngr *)thd_get_ha_data(thd, binlog_hton);
998 }
999 
1000 
1001 /**
1002   Checks if the BINLOG_CACHE_SIZE's value is greater than MAX_BINLOG_CACHE_SIZE.
1003   If this happens, the BINLOG_CACHE_SIZE is set to MAX_BINLOG_CACHE_SIZE.
1004 */
check_binlog_cache_size(THD * thd)1005 void check_binlog_cache_size(THD *thd)
1006 {
1007   if (binlog_cache_size > max_binlog_cache_size)
1008   {
1009     if (thd)
1010     {
1011       push_warning_printf(thd, Sql_condition::SL_WARNING,
1012                           ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX,
1013                           ER(ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX),
1014                           (ulong) binlog_cache_size,
1015                           (ulong) max_binlog_cache_size);
1016     }
1017     else
1018     {
1019       sql_print_warning(ER_DEFAULT(ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX),
1020                         binlog_cache_size,
1021                         (ulong) max_binlog_cache_size);
1022     }
1023     binlog_cache_size= static_cast<ulong>(max_binlog_cache_size);
1024   }
1025 }
1026 
1027 /**
1028   Checks if the BINLOG_STMT_CACHE_SIZE's value is greater than MAX_BINLOG_STMT_CACHE_SIZE.
1029   If this happens, the BINLOG_STMT_CACHE_SIZE is set to MAX_BINLOG_STMT_CACHE_SIZE.
1030 */
check_binlog_stmt_cache_size(THD * thd)1031 void check_binlog_stmt_cache_size(THD *thd)
1032 {
1033   if (binlog_stmt_cache_size > max_binlog_stmt_cache_size)
1034   {
1035     if (thd)
1036     {
1037       push_warning_printf(thd, Sql_condition::SL_WARNING,
1038                           ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX,
1039                           ER(ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX),
1040                           (ulong) binlog_stmt_cache_size,
1041                           (ulong) max_binlog_stmt_cache_size);
1042     }
1043     else
1044     {
1045       sql_print_warning(ER_DEFAULT(ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX),
1046                         binlog_stmt_cache_size,
1047                         (ulong) max_binlog_stmt_cache_size);
1048     }
1049     binlog_stmt_cache_size= static_cast<ulong>(max_binlog_stmt_cache_size);
1050   }
1051 }
1052 
1053 /**
1054  Check whether binlog_hton has valid slot and enabled
1055 */
binlog_enabled()1056 bool binlog_enabled()
1057 {
1058 	return(binlog_hton && binlog_hton->slot != HA_SLOT_UNDEF);
1059 }
1060 
1061  /*
1062   Save position of binary log transaction cache.
1063 
1064   SYNPOSIS
1065     binlog_trans_log_savepos()
1066 
1067     thd      The thread to take the binlog data from
1068     pos      Pointer to variable where the position will be stored
1069 
1070   DESCRIPTION
1071 
1072     Save the current position in the binary log transaction cache into
1073     the variable pointed to by 'pos'
1074  */
1075 
1076 static void
binlog_trans_log_savepos(THD * thd,my_off_t * pos)1077 binlog_trans_log_savepos(THD *thd, my_off_t *pos)
1078 {
1079   DBUG_ENTER("binlog_trans_log_savepos");
1080   assert(pos != NULL);
1081   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
1082 #ifdef WITH_WSREP
1083   assert((WSREP_EMULATE_BINLOG(thd)) || mysql_bin_log.is_open());
1084 #else
1085   assert(mysql_bin_log.is_open());
1086 #endif /* WITH_WSREP */
1087   *pos= cache_mngr->trx_cache.get_byte_position();
1088   DBUG_PRINT("return", ("position: %lu", (ulong) *pos));
1089   cache_mngr->trx_cache.cache_state_checkpoint(*pos);
1090   DBUG_VOID_RETURN;
1091 }
1092 
binlog_dummy_recover(handlerton * hton,XID * xid,uint len)1093 static int binlog_dummy_recover(handlerton *hton, XID *xid, uint len)
1094 {
1095   return 0;
1096 }
1097 
1098 /**
1099   Auxiliary class to copy serialized events to the binary log and
1100   correct some of the fields that are not known until just before
1101   writing the event.
1102 
1103   This class allows feeding events in parts, so it is practical to use
1104   in do_write_cache() which reads events from an IO_CACHE where events
1105   may span mutiple cache pages.
1106 
1107   The following fields are fixed before writing the event:
1108   - end_log_pos is set
1109   - the checksum is computed if checksums are enabled
1110   - the length is incremented by the checksum size if checksums are enabled
1111 */
1112 class Binlog_event_writer
1113 {
1114   IO_CACHE *output_cache;
1115   bool have_checksum;
1116   ha_checksum initial_checksum;
1117   ha_checksum checksum;
1118   uint32 end_log_pos;
1119 
1120 public:
1121   /**
1122     Constructs a new Binlog_event_writer. Should be called once before
1123     starting to flush the transaction or statement cache to the
1124     binlog.
1125 
1126     @param output_cache_arg IO_CACHE to write to.
1127     @param have_checksum_al
1128   */
Binlog_event_writer(IO_CACHE * output_cache_arg)1129   Binlog_event_writer(IO_CACHE *output_cache_arg)
1130     : output_cache(output_cache_arg),
1131       have_checksum(binlog_checksum_options !=
1132                     binary_log::BINLOG_CHECKSUM_ALG_OFF),
1133       initial_checksum(my_checksum(0L, NULL, 0)),
1134       checksum(initial_checksum),
1135       end_log_pos(my_b_tell(output_cache))
1136   {
1137     // Simulate checksum error
1138     if (DBUG_EVALUATE_IF("fault_injection_crc_value", 1, 0))
1139       checksum--;
1140   }
1141 
1142   /**
1143     Write part of an event to disk.
1144 
1145     @param buf_p[IN,OUT] Points to buffer with data to write.  The
1146     caller must set this initially, and it will be increased by the
1147     number of bytes written.
1148 
1149     @param buf_len_p[IN,OUT] Points to the remaining length of the
1150     buffer, i.e., from buf_p to the end of the buffer.  The caller
1151     must set this initially, and it will be decreased by the number of
1152     written bytes.
1153 
1154     @param event_len_p[IN,OUT] Points to the remaining length of the
1155     event, i.e., the size of the event minus what was already written.
1156     This must be initialized to zero by the caller, must be remembered
1157     by the caller between calls, and is updated by this function: when
1158     an event begins it is set to the length of the event, and for each
1159     call it is decreased by the number of written bytes.
1160 
1161     It is allowed that buf_len_p is less than event_len_p (i.e., event
1162     is only partial) and that event_len_p is less than buf_len_p
1163     (i.e., there is more than this event in the buffer).  This
1164     function will write as much as is available of one event, but
1165     never more than one.  It is required that buf_len_p >=
1166     LOG_EVENT_HEADER_LEN.
1167 
1168     @retval true Error, i.e., my_b_write failed.
1169     @retval false Success.
1170   */
write_event_part(uchar ** buf_p,uint32 * buf_len_p,uint32 * event_len_p)1171   bool write_event_part(uchar **buf_p, uint32 *buf_len_p, uint32 *event_len_p)
1172   {
1173     DBUG_ENTER("Binlog_event_writer::write_event_part");
1174 
1175     if (*buf_len_p == 0)
1176       DBUG_RETURN(false);
1177 
1178     // This is the beginning of an event
1179     if (*event_len_p == 0)
1180     {
1181       // Caller must ensure that the first part of the event contains
1182       // a full event header.
1183       assert(*buf_len_p >= LOG_EVENT_HEADER_LEN);
1184 
1185       // Read event length
1186       *event_len_p= uint4korr(*buf_p + EVENT_LEN_OFFSET);
1187 
1188       // Increase end_log_pos
1189       end_log_pos+= *event_len_p;
1190 
1191       // Change event length if checksum is enabled
1192       if (have_checksum)
1193       {
1194         int4store(*buf_p + EVENT_LEN_OFFSET,
1195                   *event_len_p + BINLOG_CHECKSUM_LEN);
1196         // end_log_pos is shifted by the checksum length
1197         end_log_pos+= BINLOG_CHECKSUM_LEN;
1198       }
1199 
1200       // Store end_log_pos
1201       int4store(*buf_p + LOG_POS_OFFSET, end_log_pos);
1202     }
1203 
1204     // write the buffer
1205     uint32 write_bytes= std::min<uint32>(*buf_len_p, *event_len_p);
1206     assert(write_bytes > 0);
1207     if (my_b_write(output_cache, *buf_p, write_bytes))
1208       DBUG_RETURN(true);
1209 
1210     // update the checksum
1211     if (have_checksum)
1212       checksum= my_checksum(checksum, *buf_p, write_bytes);
1213 
1214     // Step positions.
1215     *buf_p+= write_bytes;
1216     *buf_len_p-= write_bytes;
1217     *event_len_p-= write_bytes;
1218 
1219     if (have_checksum)
1220     {
1221       // store checksum
1222       if (*event_len_p == 0)
1223       {
1224         char checksum_buf[BINLOG_CHECKSUM_LEN];
1225         int4store(checksum_buf, checksum);
1226         if (my_b_write(output_cache, checksum_buf, BINLOG_CHECKSUM_LEN))
1227           DBUG_RETURN(true);
1228         checksum= initial_checksum;
1229       }
1230     }
1231 
1232     DBUG_RETURN(false);
1233   }
1234 
1235   /**
1236     Write a full event to disk.
1237 
1238     This is a wrapper around write_event_part, which handles the
1239     special case where you have a complete event in the buffer.
1240 
1241     @param buf Buffer to write.
1242     @param buf_len Number of bytes to write.
1243 
1244     @retval true Error, i.e., my_b_write failed.
1245     @retval false Success.
1246   */
write_full_event(uchar * buf,uint32 buf_len)1247   bool write_full_event(uchar *buf, uint32 buf_len)
1248   {
1249     uint32 event_len_unused= 0;
1250     bool ret= write_event_part(&buf, &buf_len, &event_len_unused);
1251     assert(buf_len == 0);
1252     assert(event_len_unused == 0);
1253     return ret;
1254   }
1255 
1256 };
1257 
1258 
1259 /*
1260   this function is mostly a placeholder.
1261   conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open)
1262   should be moved here.
1263 */
1264 
binlog_init(void * p)1265 static int binlog_init(void *p)
1266 {
1267   binlog_hton= (handlerton *)p;
1268 #ifdef WITH_WSREP
1269   if (WSREP_ON)
1270     binlog_hton->state= SHOW_OPTION_YES;
1271   else
1272   {
1273 #endif /* WITH_WSREP */
1274   binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO;
1275 #ifdef WITH_WSREP
1276   }
1277 #endif /* WITH_WSREP */
1278   binlog_hton->db_type=DB_TYPE_BINLOG;
1279   binlog_hton->savepoint_offset= sizeof(my_off_t);
1280   binlog_hton->close_connection= binlog_close_connection;
1281   binlog_hton->savepoint_set= binlog_savepoint_set;
1282   binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
1283   binlog_hton->savepoint_rollback_can_release_mdl=
1284                                      binlog_savepoint_rollback_can_release_mdl;
1285   binlog_hton->commit= binlog_commit;
1286   binlog_hton->commit_by_xid= binlog_xa_commit;
1287   binlog_hton->rollback= binlog_rollback;
1288   binlog_hton->rollback_by_xid= binlog_xa_rollback;
1289   binlog_hton->prepare= binlog_prepare;
1290   binlog_hton->recover=binlog_dummy_recover;
1291   binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
1292   return 0;
1293 }
1294 
1295 #ifdef WITH_WSREP
1296 #include "wsrep_binlog.h"
1297 #endif /* WITH_WSREP */
1298 
binlog_deinit(void * p)1299 static int binlog_deinit(void *p)
1300 {
1301   /* Using binlog as TC after the binlog has been unloaded, won't work */
1302   if (tc_log == &mysql_bin_log)
1303     tc_log= NULL;
1304   binlog_hton= NULL;
1305   return 0;
1306 }
1307 
1308 
binlog_close_connection(handlerton * hton,THD * thd)1309 static int binlog_close_connection(handlerton *hton, THD *thd)
1310 {
1311   DBUG_ENTER("binlog_close_connection");
1312   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
1313 #ifdef WITH_WSREP
1314   if (!cache_mngr->is_binlog_empty()) {
1315     IO_CACHE* cache= get_trans_log(thd, true);
1316     uchar *buf= NULL;
1317     size_t len= 0;
1318     wsrep_write_cache_buf(cache, &buf, &len);
1319     WSREP_WARN("binlog trx cache not empty (%llu bytes) @ connection close %llu",
1320                (unsigned long long) len, (unsigned long long) thd->thread_id());
1321     if (len > 0) wsrep_dump_rbr_buf(thd, buf, len);
1322 
1323     cache = cache_mngr->get_binlog_cache_log(false);
1324     wsrep_write_cache_buf(cache, &buf, &len);
1325     WSREP_WARN("binlog stmt cache not empty (%llu bytes) @ connection close %llu",
1326                (unsigned long long) len, (unsigned long long) thd->thread_id());
1327     if (len > 0) wsrep_dump_rbr_buf(thd, buf, len);
1328   }
1329 #endif /* WITH_WSREP */
1330   assert(cache_mngr->is_binlog_empty());
1331   DBUG_PRINT("debug", ("Set ha_data slot %d to 0x%llx", binlog_hton->slot, (ulonglong) NULL));
1332   thd_set_ha_data(thd, binlog_hton, NULL);
1333   cache_mngr->~binlog_cache_mngr();
1334   my_free(cache_mngr);
1335   DBUG_RETURN(0);
1336 }
1337 
write_event(THD * thd,Log_event * ev)1338 int binlog_cache_data::write_event(THD *thd, Log_event *ev)
1339 {
1340   DBUG_ENTER("binlog_cache_data::write_event");
1341 
1342   if (ev != NULL)
1343   {
1344     DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
1345                   {DBUG_SET("+d,simulate_file_write_error");});
1346 
1347     DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
1348                   {
1349                   static int count= -1;
1350                   count++;
1351                   if(count %4 == 3 && ev->get_type_code() ==
1352                       binary_log::WRITE_ROWS_EVENT)
1353                     DBUG_SET("+d,simulate_temp_file_write_error");
1354                   });
1355     if (ev->write(&cache_log) != 0)
1356     {
1357       DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
1358                       {
1359                         DBUG_SET("-d,simulate_file_write_error");
1360                         DBUG_SET("-d,simulate_disk_full_at_flush_pending");
1361                         /*
1362                            after +d,simulate_file_write_error the local cache
1363                            is in unsane state. Since -d,simulate_file_write_error
1364                            revokes the first simulation do_write_cache()
1365                            can't be run without facing an assert.
1366                            So it's blocked with the following 2nd simulation:
1367                         */
1368                         DBUG_SET("+d,simulate_do_write_cache_failure");
1369                       });
1370 
1371       DBUG_EXECUTE_IF("simulate_temp_file_write_error",
1372                       {
1373                         DBUG_SET("-d,simulate_temp_file_write_error");
1374                       });
1375       /*
1376         If the flush has failed due to ENOSPC error, set the
1377         flush_error flag.
1378       */
1379       if (thd->is_error() && my_errno() == ENOSPC)
1380       {
1381         set_flush_error(thd);
1382       }
1383       DBUG_RETURN(1);
1384     }
1385     if (ev->get_type_code() == binary_log::XID_EVENT)
1386       flags.with_xid= true;
1387     if (ev->is_using_immediate_logging())
1388       flags.immediate= true;
1389     /* With respect to the event type being written */
1390     if (ev->is_sbr_logging_format())
1391       flags.with_sbr= true;
1392     if (ev->is_rbr_logging_format())
1393       flags.with_rbr= true;
1394 #ifndef EMBEDDED_LIBRARY
1395     /* With respect to empty transactions */
1396     if (ev->starts_group())
1397       flags.with_start= true;
1398     if (ev->ends_group())
1399       flags.with_end= true;
1400     if ((!ev->starts_group() && !ev->ends_group())
1401         ||ev->get_type_code() == binary_log::VIEW_CHANGE_EVENT)
1402       flags.with_content= true;
1403 #endif
1404   }
1405   DBUG_RETURN(0);
1406 }
1407 
assign_automatic_gtids_to_flush_group(THD * first_seen)1408 bool MYSQL_BIN_LOG::assign_automatic_gtids_to_flush_group(THD *first_seen)
1409 {
1410   DBUG_ENTER("MYSQL_BIN_LOG::assign_automatic_gtids_to_flush_group");
1411   bool error= false;
1412   bool is_global_sid_locked= false;
1413   rpl_sidno locked_sidno= 0;
1414 
1415   for (THD *head= first_seen ; head ; head = head->next_to_commit)
1416   {
1417     assert(head->variables.gtid_next.type != UNDEFINED_GROUP);
1418 
1419     /* Generate GTID */
1420     if (head->variables.gtid_next.type == AUTOMATIC_GROUP)
1421     {
1422       if (!is_global_sid_locked)
1423       {
1424         global_sid_lock->rdlock();
1425         is_global_sid_locked= true;
1426       }
1427       if (gtid_state->generate_automatic_gtid(head,
1428               head->get_transaction()->get_rpl_transaction_ctx()->get_sidno(),
1429               head->get_transaction()->get_rpl_transaction_ctx()->get_gno(),
1430               &locked_sidno)
1431               != RETURN_STATUS_OK)
1432       {
1433         head->commit_error= THD::CE_FLUSH_GNO_EXHAUSTED_ERROR;
1434         error= true;
1435       }
1436     }
1437     else
1438     {
1439       DBUG_PRINT("info", ("thd->variables.gtid_next.type=%d "
1440                           "thd->owned_gtid.sidno=%d",
1441                           head->variables.gtid_next.type,
1442                           head->owned_gtid.sidno));
1443       if (head->variables.gtid_next.type == GTID_GROUP)
1444         assert(head->owned_gtid.sidno > 0);
1445       else
1446       {
1447         assert(head->variables.gtid_next.type == ANONYMOUS_GROUP);
1448         assert(head->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS);
1449       }
1450     }
1451   }
1452 
1453   if (locked_sidno > 0)
1454     gtid_state->unlock_sidno(locked_sidno);
1455 
1456   if (is_global_sid_locked)
1457     global_sid_lock->unlock();
1458 
1459   DBUG_RETURN(error);
1460 }
1461 
1462 
1463 /**
1464   Write the Gtid_log_event to the binary log (prior to writing the
1465   statement or transaction cache).
1466 
1467   @param thd Thread that is committing.
1468   @param cache_data The cache that is flushing.
1469   @param writer The event will be written to this Binlog_event_writer object.
1470 
1471   @retval false Success.
1472   @retval true Error.
1473 */
write_gtid(THD * thd,binlog_cache_data * cache_data,Binlog_event_writer * writer)1474 bool MYSQL_BIN_LOG::write_gtid(THD *thd, binlog_cache_data *cache_data,
1475                                Binlog_event_writer *writer)
1476 {
1477   DBUG_ENTER("MYSQL_BIN_LOG::write_gtid");
1478 
1479   /*
1480     The GTID for the THD was assigned at
1481     assign_automatic_gtids_to_flush_group()
1482   */
1483   assert(thd->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS ||
1484          thd->owned_gtid.sidno > 0);
1485 
1486   int64 sequence_number, last_committed;
1487   /* Generate logical timestamps for MTS */
1488   m_dependency_tracker.get_dependency(thd, sequence_number, last_committed);
1489 
1490   /*
1491     In case both the transaction cache and the statement cache are
1492     non-empty, both will be flushed in sequence and logged as
1493     different transactions. Then the second transaction must only
1494     be executed after the first one has committed. Therefore, we
1495     need to set last_committed for the second transaction equal to
1496     last_committed for the first transaction. This is done in
1497     binlog_cache_data::flush. binlog_cache_data::flush uses the
1498     condition trn_ctx->last_committed==SEQ_UNINIT to detect this
1499     situation, hence the need to set it here.
1500   */
1501   thd->get_transaction()->last_committed= SEQ_UNINIT;
1502 
1503 
1504   /*
1505     Generate and write the Gtid_log_event.
1506   */
1507   Gtid_log_event gtid_event(thd, cache_data->is_trx_cache(),
1508                             last_committed, sequence_number,
1509                             cache_data->may_have_sbr_stmts());
1510   uchar buf[Gtid_log_event::MAX_EVENT_LENGTH];
1511   uint32 buf_len= gtid_event.write_to_memory(buf);
1512   bool ret= writer->write_full_event(buf, buf_len);
1513 
1514   DBUG_RETURN(ret);
1515 }
1516 
1517 
gtid_end_transaction(THD * thd)1518 int MYSQL_BIN_LOG::gtid_end_transaction(THD *thd)
1519 {
1520   DBUG_ENTER("MYSQL_BIN_LOG::gtid_end_transaction");
1521 
1522   DBUG_PRINT("info", ("query=%s", thd->query().str));
1523 
1524   if (thd->owned_gtid.sidno > 0)
1525   {
1526     assert(thd->variables.gtid_next.type == GTID_GROUP);
1527 
1528     if (!opt_bin_log || (thd->slave_thread && !opt_log_slave_updates))
1529     {
1530       /*
1531         If the binary log is disabled for this thread (either by
1532         log_bin=0 or sql_log_bin=0 or by log_slave_updates=0 for a
1533         slave thread), then the statement must not be written to the
1534         binary log.  In this case, we just save the GTID into the
1535         table directly.
1536 
1537         (This only happens for DDL, since DML will save the GTID into
1538         table and release ownership inside ha_commit_trans.)
1539       */
1540       if (gtid_state->save(thd) != 0)
1541       {
1542         gtid_state->update_on_rollback(thd);
1543         DBUG_RETURN(1);
1544       }
1545       else
1546         gtid_state->update_on_commit(thd);
1547     }
1548     else
1549     {
1550       /*
1551         If statement is supposed to be written to binlog, we write it
1552         to the binary log.  Inserting into table and releasing
1553         ownership will be done in the binlog commit handler.
1554       */
1555 
1556       /*
1557         thd->cache_mngr may be uninitialized if the first transaction
1558         executed by the client is empty.
1559       */
1560       if (thd->binlog_setup_trx_data())
1561         DBUG_RETURN(1);
1562       binlog_cache_data *cache_data= &thd_get_cache_mngr(thd)->trx_cache;
1563 
1564       // Generate BEGIN event
1565       Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE,
1566                             FALSE, TRUE, 0, TRUE);
1567       assert(!qinfo.is_using_immediate_logging());
1568 
1569 #ifdef WITH_WSREP
1570   if (WSREP_ON && thd->slave_thread && !thd->wsrep_applier)
1571   {
1572     thd->wsrep_replicate_GTID= true;
1573   }
1574 #endif /* WITH_WSREP */
1575       /*
1576         Write BEGIN event and then commit (which will generate commit
1577         event and Gtid_log_event)
1578       */
1579       DBUG_PRINT("debug", ("Writing to trx_cache"));
1580       if (cache_data->write_event(thd, &qinfo) ||
1581           mysql_bin_log.commit(thd, true))
1582         DBUG_RETURN(1);
1583     }
1584   }
1585   else if (thd->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS ||
1586            /*
1587              A transaction with an empty owned gtid should call
1588              end_gtid_violating_transaction(...) to clear the
1589              flag thd->has_gtid_consistency_violatoin in case
1590              it is set. It missed the clear in ordered_commit,
1591              because its binlog transaction cache is empty.
1592            */
1593            thd->has_gtid_consistency_violation)
1594 
1595   {
1596     gtid_state->update_on_commit(thd);
1597   }
1598   else if (thd->variables.gtid_next.type == GTID_GROUP &&
1599            thd->owned_gtid.is_empty())
1600   {
1601     assert(thd->has_gtid_consistency_violation == false);
1602     gtid_state->update_on_commit(thd);
1603   }
1604 
1605   DBUG_RETURN(0);
1606 }
1607 
1608 /**
1609   This function finalizes the cache preparing for commit or rollback.
1610 
1611   The function just writes all the necessary events to the cache but
1612   does not flush the data to the binary log file. That is the role of
1613   the binlog_cache_data::flush function.
1614 
1615   @see binlog_cache_data::flush
1616 
1617   @param thd                The thread whose transaction should be flushed
1618   @param cache_data         Pointer to the cache
1619   @param end_ev             The end event either commit/rollback
1620 
1621   @return
1622     nonzero if an error pops up when flushing the cache.
1623 */
1624 int
finalize(THD * thd,Log_event * end_event)1625 binlog_cache_data::finalize(THD *thd, Log_event *end_event)
1626 {
1627   DBUG_ENTER("binlog_cache_data::finalize");
1628   if (!is_binlog_empty())
1629   {
1630     assert(!flags.finalized);
1631     if (int error= flush_pending_event(thd))
1632       DBUG_RETURN(error);
1633     if (int error= write_event(thd, end_event))
1634       DBUG_RETURN(error);
1635     flags.finalized= true;
1636     DBUG_PRINT("debug", ("flags.finalized: %s", YESNO(flags.finalized)));
1637   }
1638   DBUG_RETURN(0);
1639 }
1640 
1641 
1642 /**
1643    The method writes XA END query to XA-prepared transaction's cache
1644    and calls the "basic" finalize().
1645 
1646    @return error code, 0 success
1647 */
1648 
finalize(THD * thd,Log_event * end_event,XID_STATE * xs)1649 int binlog_cache_data::finalize(THD *thd, Log_event *end_event, XID_STATE *xs)
1650 {
1651   int error= 0;
1652   char buf[XID::ser_buf_size];
1653   char query[sizeof("XA END") + 1 + sizeof(buf)];
1654   int qlen= sprintf(query, "XA END %s", xs->get_xid()->serialize(buf));
1655   Query_log_event qev(thd, query, qlen, true, false, true, 0);
1656 
1657   if ((error= write_event(thd, &qev)))
1658     return error;
1659 
1660   return finalize(thd, end_event);
1661 }
1662 
1663 
1664 /**
1665   Flush caches to the binary log.
1666 
1667   If the cache is finalized, the cache will be flushed to the binary
1668   log file. If the cache is not finalized, nothing will be done.
1669 
1670   If flushing fails for any reason, an error will be reported and the
1671   cache will be reset. Flushing can fail in two circumstances:
1672 
1673   - It was not possible to write the cache to the file. In this case,
1674     it does not make sense to keep the cache.
1675 
1676   - The cache was successfully written to disk but post-flush actions
1677     (such as binary log rotation) failed. In this case, the cache is
1678     already written to disk and there is no reason to keep it.
1679 
1680   @see binlog_cache_data::finalize
1681  */
1682 int
flush(THD * thd,my_off_t * bytes_written,bool * wrote_xid)1683 binlog_cache_data::flush(THD *thd, my_off_t *bytes_written, bool *wrote_xid)
1684 {
1685   /*
1686     Doing a commit or a rollback including non-transactional tables,
1687     i.e., ending a transaction where we might write the transaction
1688     cache to the binary log.
1689 
1690     We can always end the statement when ending a transaction since
1691     transactions are not allowed inside stored functions. If they
1692     were, we would have to ensure that we're not ending a statement
1693     inside a stored function.
1694   */
1695 
1696   DBUG_ENTER("binlog_cache_data::flush");
1697   DBUG_PRINT("debug", ("flags.finalized: %s", YESNO(flags.finalized)));
1698   int error= 0;
1699   if (flags.finalized)
1700   {
1701     my_off_t bytes_in_cache= my_b_tell(&cache_log);
1702     Transaction_ctx *trn_ctx= thd->get_transaction();
1703 
1704     DBUG_PRINT("debug", ("bytes_in_cache: %llu", bytes_in_cache));
1705 
1706     trn_ctx->sequence_number= mysql_bin_log.m_dependency_tracker.step();
1707     /*
1708       In case of two caches the transaction is split into two groups.
1709       The 2nd group is considered to be a successor of the 1st rather
1710       than to have a common commit parent with it.
1711       Notice that due to a simple method of detection that the current is
1712       the 2nd cache being flushed, the very first few transactions may be logged
1713       sequentially (a next one is tagged as if a preceding one is its
1714       commit parent).
1715     */
1716     if (trn_ctx->last_committed == SEQ_UNINIT)
1717       trn_ctx->last_committed= trn_ctx->sequence_number - 1;
1718 
1719     /*
1720       The GTID is written prior to flushing the statement cache, if
1721       the transaction has written to the statement cache; and prior to
1722       flushing the transaction cache if the transaction has written to
1723       the transaction cache.  If GTIDs are enabled, then transactional
1724       and non-transactional updates cannot be mixed, so at most one of
1725       the caches can be non-empty, so just one GTID will be
1726       generated. If GTIDs are disabled, then no GTID is generated at
1727       all; if both the transactional cache and the statement cache are
1728       non-empty then we get two Anonymous_gtid_log_events, which is
1729       correct.
1730     */
1731     Binlog_event_writer writer(mysql_bin_log.get_log_file());
1732 
1733     /* The GTID ownership process might set the commit_error */
1734     error= (thd->commit_error == THD::CE_FLUSH_ERROR ||
1735            thd->commit_error == THD::CE_FLUSH_GNO_EXHAUSTED_ERROR);
1736 
1737     DBUG_EXECUTE_IF("simulate_binlog_flush_error",
1738                     {
1739                       if (rand() % 3 == 0)
1740                       {
1741                         thd->commit_error= THD::CE_FLUSH_ERROR;
1742                       }
1743                     };);
1744 
1745     if (!error)
1746       if ((error= mysql_bin_log.write_gtid(thd, this, &writer)))
1747         thd->commit_error= THD::CE_FLUSH_ERROR;
1748     if (!error)
1749       error= mysql_bin_log.write_cache(thd, this, &writer);
1750 
1751     if (flags.with_xid && error == 0)
1752       *wrote_xid= true;
1753 
1754     /*
1755       Reset have to be after the if above, since it clears the
1756       with_xid flag
1757     */
1758     reset();
1759     if (bytes_written)
1760       *bytes_written= bytes_in_cache;
1761   }
1762   assert(!flags.finalized);
1763   DBUG_RETURN(error);
1764 }
1765 
1766 /**
1767   This function truncates the transactional cache upon committing or rolling
1768   back either a transaction or a statement.
1769 
1770   @param thd        The thread whose transaction should be flushed
1771   @param cache_mngr Pointer to the cache data to be flushed
1772   @param all        @c true means truncate the transaction, otherwise the
1773                     statement must be truncated.
1774 
1775   @return
1776     nonzero if an error pops up when truncating the transactional cache.
1777 */
1778 int
truncate(THD * thd,bool all)1779 binlog_trx_cache_data::truncate(THD *thd, bool all)
1780 {
1781   DBUG_ENTER("binlog_trx_cache_data::truncate");
1782   int error=0;
1783 
1784   DBUG_PRINT("info", ("thd->options={ %s %s}, transaction: %s",
1785                       FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
1786                       FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
1787                       all ? "all" : "stmt"));
1788 
1789   remove_pending_event();
1790 
1791   /*
1792     If rolling back an entire transaction or a single statement not
1793     inside a transaction, we reset the transaction cache.
1794   */
1795   if (ending_trans(thd, all))
1796   {
1797     if (has_incident())
1798     {
1799       const char* err_msg= "Error happend while resetting the transaction "
1800                            "cache for a rolled back transaction or a single "
1801                            "statement not inside a transaction.";
1802       error= mysql_bin_log.write_incident(thd, true/*need_lock_log=true*/,
1803                                           err_msg);
1804     }
1805     reset();
1806   }
1807   /*
1808     If rolling back a statement in a transaction, we truncate the
1809     transaction cache to remove the statement.
1810   */
1811   else if (get_prev_position() != MY_OFF_T_UNDEF)
1812     restore_prev_position();
1813 
1814   thd->clear_binlog_table_maps();
1815 
1816   DBUG_RETURN(error);
1817 }
1818 
1819 
get_xa_opt(THD * thd)1820 inline enum xa_option_words get_xa_opt(THD *thd)
1821 {
1822   enum xa_option_words xa_opt= XA_NONE;
1823   switch(thd->lex->sql_command)
1824   {
1825   case SQLCOM_XA_COMMIT:
1826     xa_opt= static_cast<Sql_cmd_xa_commit*>(thd->lex->m_sql_cmd)->get_xa_opt();
1827     break;
1828   default:
1829     break;
1830   }
1831 
1832   return xa_opt;
1833 }
1834 
1835 
1836 /**
1837    Predicate function yields true when XA transaction is
1838    being logged having a proper state ready for prepare or
1839    commit in one phase.
1840 
1841    @param thd    THD pointer of running transaction
1842    @return true  When the being prepared transaction should be binlogged,
1843            false otherwise.
1844 */
1845 
is_loggable_xa_prepare(THD * thd)1846 inline bool is_loggable_xa_prepare(THD *thd)
1847 {
1848   /*
1849     simulate_commit_failure is doing a trick with XID_STATE while
1850     the ongoing transaction is not XA, and therefore to be errored out,
1851     asserted below. In that case because of the
1852     latter fact the function returns @c false.
1853   */
1854   DBUG_EXECUTE_IF("simulate_commit_failure",
1855                   {
1856                     XID_STATE *xs= thd->get_transaction()->xid_state();
1857                     assert((thd->is_error() &&
1858                             xs->get_state() == XID_STATE::XA_IDLE) ||
1859                            xs->get_state() == XID_STATE::XA_NOTR);
1860                   });
1861 
1862   return DBUG_EVALUATE_IF("simulate_commit_failure",
1863                           false,
1864                           thd->get_transaction()->xid_state()->
1865                           has_state(XID_STATE::XA_IDLE));
1866 }
1867 
binlog_prepare(handlerton * hton,THD * thd,bool all)1868 static int binlog_prepare(handlerton *hton, THD *thd, bool all)
1869 {
1870   DBUG_ENTER("binlog_prepare");
1871   if (!all)
1872   {
1873     thd->get_transaction()->store_commit_parent(mysql_bin_log.
1874       m_dependency_tracker.get_max_committed_timestamp());
1875 
1876   }
1877 
1878   DBUG_RETURN(all && is_loggable_xa_prepare(thd) ?
1879               mysql_bin_log.commit(thd, true) : 0);
1880 }
1881 
1882 
1883 /**
1884    Logging XA commit/rollback of a prepared transaction.
1885 
1886    The function is called at XA-commit or XA-rollback logging via
1887    two paths: the recovered-or-slave-applier or immediately through
1888    the  XA-prepared transaction connection itself.
1889    It fills in appropiate event in the statement cache whenever
1890    xid state is marked with is_binlogged() flag that indicates
1891    the prepared part of the transaction must've been logged.
1892 
1893    About early returns from the function.
1894    In the recovered-or-slave-applier case the function may be called
1895    for the 2nd time, which has_logged_xid monitors.
1896    ONE_PHASE option to XA-COMMIT is handled to skip
1897    writing XA-commit event now.
1898    And the final early return check is for the read-only XA that is
1899    not to be logged.
1900 
1901    @param thd          THD handle
1902    @param xid          a pointer to XID object that is serialized
1903    @param commit       when @c true XA-COMMIT is to be logged,
1904                        and @c false when it's XA-ROLLBACK.
1905    @return error code, 0 success
1906 */
1907 
do_binlog_xa_commit_rollback(THD * thd,XID * xid,bool commit)1908 inline int do_binlog_xa_commit_rollback(THD *thd, XID *xid, bool commit)
1909 {
1910   assert(thd->lex->sql_command == SQLCOM_XA_COMMIT ||
1911          thd->lex->sql_command == SQLCOM_XA_ROLLBACK);
1912 
1913   XID_STATE *xid_state= thd->get_transaction()->xid_state();
1914   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
1915 
1916   if (cache_mngr != NULL && cache_mngr->has_logged_xid)
1917     return 0;
1918 
1919   if (get_xa_opt(thd) == XA_ONE_PHASE)
1920     return 0;
1921   if (!xid_state->is_binlogged())
1922     return 0; // nothing was really logged at prepare
1923   if (thd->is_error() && DBUG_EVALUATE_IF("simulate_xa_rm_error", 0, 1))
1924     return 0; // don't binlog if there are some errors.
1925 
1926   assert(!xid->is_null() ||
1927          !(thd->variables.option_bits & OPTION_BIN_LOG));
1928 
1929   char buf[XID::ser_buf_size];
1930   char query[(sizeof("XA ROLLBACK")) + 1 + sizeof(buf)];
1931   int qlen= sprintf(query, "XA %s %s", commit ? "COMMIT" : "ROLLBACK",
1932                     xid->serialize(buf));
1933   Query_log_event qinfo(thd, query, qlen, false, true, true, 0, false);
1934   return mysql_bin_log.write_event(&qinfo);
1935 }
1936 
1937 
1938 /**
1939    Logging XA commit/rollback of a prepared transaction in the case
1940    it was disconnected and resumed (recovered), or executed by a slave applier.
1941 
1942    @param thd         THD handle
1943    @param xid         a pointer to XID object
1944    @param commit      when @c true XA-COMMIT is logged, otherwise XA-ROLLBACK
1945 
1946    @return error code, 0 success
1947 */
1948 
binlog_xa_commit_or_rollback(THD * thd,XID * xid,bool commit)1949 inline int binlog_xa_commit_or_rollback(THD *thd, XID *xid, bool commit)
1950 {
1951   int error= 0;
1952 
1953 #ifndef NDEBUG
1954   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
1955   assert(!cache_mngr || !cache_mngr->has_logged_xid);
1956 #endif
1957   if (!(error= do_binlog_xa_commit_rollback(thd, xid, commit)))
1958   {
1959     /*
1960       Error can't be propagated naturally via result.
1961       A grand-caller has to access to it through thd's da.
1962       todo:
1963       Bug #20488921 ERROR PROPAGATION DOES FULLY WORK IN XA
1964       stands in the way of implementing a failure simulation
1965       for XA PREPARE/COMMIT/ROLLBACK.
1966     */
1967     binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
1968 
1969     if (cache_mngr)
1970       cache_mngr->has_logged_xid= true;
1971     if (commit)
1972       (void) mysql_bin_log.commit(thd, true);
1973     else
1974       (void) mysql_bin_log.rollback(thd, true);
1975     if (cache_mngr)
1976       cache_mngr->has_logged_xid= false;
1977   }
1978   return error;
1979 }
1980 
1981 
binlog_xa_commit(handlerton * hton,XID * xid)1982 static int binlog_xa_commit(handlerton *hton,  XID *xid)
1983 {
1984   (void) binlog_xa_commit_or_rollback(current_thd, xid, true);
1985 
1986   return 0;
1987 }
1988 
1989 
binlog_xa_rollback(handlerton * hton,XID * xid)1990 static int binlog_xa_rollback(handlerton *hton,  XID *xid)
1991 {
1992   (void) binlog_xa_commit_or_rollback(current_thd, xid, false);
1993 
1994   return 0;
1995 }
1996 
1997 /**
1998   When a fatal error occurs due to which binary logging becomes impossible and
1999   the user specified binlog_error_action= ABORT_SERVER the following function is
2000   invoked. This function pushes the appropriate error message to client and logs
2001   the same to server error log and then aborts the server.
2002 
2003   @param err_string          Error string which specifies the exact error
2004                              message from the caller.
2005 
2006   @retval
2007     none
2008 */
exec_binlog_error_action_abort(const char * err_string)2009 static void exec_binlog_error_action_abort(const char* err_string)
2010 {
2011   THD *thd= current_thd;
2012   /*
2013     When the code enters here it means that there was an error at higher layer
2014     and my_error function could have been invoked to let the client know what
2015     went wrong during the execution.
2016 
2017     But these errors will not let the client know that the server is going to
2018     abort. Even if we add an additional my_error function call at this point
2019     client will be able to see only the first error message that was set
2020     during the very first invocation of my_error function call.
2021 
2022     The advantage of having multiple my_error function calls are visible when
2023     the server is up and running and user issues SHOW WARNINGS or SHOW ERROR
2024     calls. In this special scenario server will be immediately aborted and
2025     user will not be able execute the above SHOW commands.
2026 
2027     Hence we clear the previous errors and push one critical error message to
2028     clients.
2029    */
2030   if (thd)
2031   {
2032     if (thd->is_error())
2033       thd->clear_error();
2034     /*
2035       Adding ME_ERRORLOG flag will ensure that the error is sent to both
2036       client and to the server error log as well.
2037     */
2038     my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(ME_ERRORLOG + ME_FATALERROR),
2039              err_string);
2040     thd->send_statement_status();
2041   }
2042   else
2043     sql_print_error("%s",err_string);
2044   abort();
2045 }
2046 
2047 
2048 
2049 /**
2050   This function is called once after each statement.
2051 
2052   @todo This function is currently not used any more and will
2053   eventually be eliminated. The real commit job is done in the
2054   MYSQL_BIN_LOG::commit function.
2055 
2056   @see MYSQL_BIN_LOG::commit
2057 
2058   @param hton  The binlog handlerton.
2059   @param thd   The client thread that executes the transaction.
2060   @param all   This is @c true if this is a real transaction commit, and
2061                @false otherwise.
2062 
2063   @see handlerton::commit
2064 */
binlog_commit(handlerton * hton,THD * thd,bool all)2065 static int binlog_commit(handlerton *hton, THD *thd, bool all)
2066 {
2067   DBUG_ENTER("binlog_commit");
2068   /*
2069     Nothing to do (any more) on commit.
2070    */
2071   DBUG_RETURN(0);
2072 }
2073 
2074 /**
2075   This function is called when a transaction or a statement is rolled back.
2076 
2077   @internal It is necessary to execute a rollback here if the
2078   transaction was rolled back because of executing a ROLLBACK TO
2079   SAVEPOINT command, but it is not used for normal rollback since
2080   MYSQL_BIN_LOG::rollback is called in that case.
2081 
2082   @todo Refactor code to introduce a <code>MYSQL_BIN_LOG::rollback(THD
2083   *thd, SAVEPOINT *sv)</code> function in @c TC_LOG and have that
2084   function execute the necessary work to rollback to a savepoint.
2085 
2086   @param hton  The binlog handlerton.
2087   @param thd   The client thread that executes the transaction.
2088   @param all   This is @c true if this is a real transaction rollback, and
2089                @false otherwise.
2090 
2091   @see handlerton::rollback
2092 */
binlog_rollback(handlerton * hton,THD * thd,bool all)2093 static int binlog_rollback(handlerton *hton, THD *thd, bool all)
2094 {
2095   DBUG_ENTER("binlog_rollback");
2096   int error= 0;
2097 #ifdef WITH_WSREP
2098   if (thd->lex->sql_command == SQLCOM_ROLLBACK_TO_SAVEPOINT &&
2099       thd->wsrep_conflict_state != ABORTING)
2100 #else
2101   if (thd->lex->sql_command == SQLCOM_ROLLBACK_TO_SAVEPOINT)
2102 #endif /* WITH_WSREP */
2103     error= mysql_bin_log.rollback(thd, all);
2104   DBUG_RETURN(error);
2105 }
2106 
2107 
2108 bool
append(THD * first)2109 Stage_manager::Mutex_queue::append(THD *first)
2110 {
2111   DBUG_ENTER("Stage_manager::Mutex_queue::append");
2112   lock();
2113   DBUG_PRINT("enter", ("first: 0x%llx", (ulonglong) first));
2114   DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2115                        (ulonglong) m_first, (ulonglong) &m_first,
2116                        (ulonglong) m_last));
2117   int32 count= 1;
2118   bool empty= (m_first == NULL);
2119   *m_last= first;
2120   DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2121                        (ulonglong) m_first, (ulonglong) &m_first,
2122                        (ulonglong) m_last));
2123   /*
2124     Go to the last THD instance of the list. We expect lists to be
2125     moderately short. If they are not, we need to track the end of
2126     the queue as well.
2127   */
2128 
2129   while (first->next_to_commit)
2130   {
2131     count++;
2132     first= first->next_to_commit;
2133   }
2134   my_atomic_add32(&m_size, count);
2135 
2136   m_last= &first->next_to_commit;
2137   DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2138                         (ulonglong) m_first, (ulonglong) &m_first,
2139                         (ulonglong) m_last));
2140   assert(m_first || m_last == &m_first);
2141   DBUG_PRINT("return", ("empty: %s", YESNO(empty)));
2142   unlock();
2143   DBUG_RETURN(empty);
2144 }
2145 
2146 
2147 std::pair<bool, THD*>
pop_front()2148 Stage_manager::Mutex_queue::pop_front()
2149 {
2150   DBUG_ENTER("Stage_manager::Mutex_queue::pop_front");
2151   lock();
2152   THD *result= m_first;
2153   bool more= true;
2154   /*
2155     We do not set next_to_commit to NULL here since this is only used
2156     in the flush stage. We will have to call fetch_queue last here,
2157     and will then "cut" the linked list by setting the end of that
2158     queue to NULL.
2159   */
2160   if (result)
2161     m_first= result->next_to_commit;
2162   if (m_first == NULL)
2163   {
2164     more= false;
2165     m_last = &m_first;
2166   }
2167   assert(my_atomic_load32(&m_size) > 0);
2168   my_atomic_add32(&m_size, -1);
2169   assert(m_first || m_last == &m_first);
2170   unlock();
2171   DBUG_PRINT("return", ("result: 0x%llx, more: %s",
2172                         (ulonglong) result, YESNO(more)));
2173   DBUG_RETURN(std::make_pair(more, result));
2174 }
2175 
2176 
2177 bool
enroll_for(StageID stage,THD * thd,mysql_mutex_t * stage_mutex)2178 Stage_manager::enroll_for(StageID stage, THD *thd, mysql_mutex_t *stage_mutex)
2179 {
2180   // If the queue was empty: we're the leader for this batch
2181   DBUG_PRINT("debug", ("Enqueue 0x%llx to queue for stage %d",
2182                        (ulonglong) thd, stage));
2183   bool leader= m_queue[stage].append(thd);
2184 
2185 #ifdef HAVE_REPLICATION
2186   if (stage == FLUSH_STAGE && has_commit_order_manager(thd))
2187   {
2188     Slave_worker *worker= dynamic_cast<Slave_worker *>(thd->rli_slave);
2189     Commit_order_manager *mngr= worker->get_commit_order_manager();
2190 
2191     mngr->unregister_trx(worker);
2192   }
2193 #endif
2194 
2195   /*
2196     We do not need to unlock the stage_mutex if it is LOCK_log when rotating
2197     binlog caused by logging incident log event, since it should be held
2198     always during rotation.
2199   */
2200   bool need_unlock_stage_mutex=
2201     !(mysql_bin_log.is_rotating_caused_by_incident &&
2202       stage_mutex == mysql_bin_log.get_log_lock());
2203 
2204   /*
2205     The stage mutex can be NULL if we are enrolling for the first
2206     stage.
2207   */
2208   if (stage_mutex && need_unlock_stage_mutex)
2209     mysql_mutex_unlock(stage_mutex);
2210 
2211 #ifndef NDEBUG
2212   DBUG_PRINT("info", ("This is a leader thread: %d (0=n 1=y)", leader));
2213 
2214   DEBUG_SYNC(thd, "after_enrolling_for_stage");
2215 
2216   switch (stage)
2217   {
2218   case Stage_manager::FLUSH_STAGE:
2219     DEBUG_SYNC(thd, "bgc_after_enrolling_for_flush_stage");
2220     break;
2221   case Stage_manager::SYNC_STAGE:
2222     DEBUG_SYNC(thd, "bgc_after_enrolling_for_sync_stage");
2223     break;
2224   case Stage_manager::COMMIT_STAGE:
2225     DEBUG_SYNC(thd, "bgc_after_enrolling_for_commit_stage");
2226     break;
2227   default:
2228     // not reached
2229     assert(0);
2230   }
2231 
2232   DBUG_EXECUTE_IF("assert_leader", assert(leader););
2233   DBUG_EXECUTE_IF("assert_follower", assert(!leader););
2234 #endif
2235 
2236   /*
2237     If the queue was not empty, we're a follower and wait for the
2238     leader to process the queue. If we were holding a mutex, we have
2239     to release it before going to sleep.
2240   */
2241   if (!leader)
2242   {
2243     mysql_mutex_lock(&m_lock_done);
2244 #ifndef NDEBUG
2245     /*
2246       Leader can be awaiting all-clear to preempt follower's execution.
2247       With setting the status the follower ensures it won't execute anything
2248       including thread-specific code.
2249     */
2250     thd->get_transaction()->m_flags.ready_preempt= 1;
2251     if (leader_await_preempt_status)
2252       mysql_cond_signal(&m_cond_preempt);
2253 #endif
2254     while (thd->get_transaction()->m_flags.pending)
2255       mysql_cond_wait(&m_cond_done, &m_lock_done);
2256     mysql_mutex_unlock(&m_lock_done);
2257   }
2258   return leader;
2259 }
2260 
2261 
fetch_and_empty()2262 THD *Stage_manager::Mutex_queue::fetch_and_empty()
2263 {
2264   DBUG_ENTER("Stage_manager::Mutex_queue::fetch_and_empty");
2265   lock();
2266   DBUG_PRINT("enter", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2267                        (ulonglong) m_first, (ulonglong) &m_first,
2268                        (ulonglong) m_last));
2269   THD *result= m_first;
2270   m_first= NULL;
2271   m_last= &m_first;
2272   DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2273                        (ulonglong) m_first, (ulonglong) &m_first,
2274                        (ulonglong) m_last));
2275   DBUG_PRINT("info", ("fetched queue of %d transactions", my_atomic_load32(&m_size)));
2276   DBUG_PRINT("return", ("result: 0x%llx", (ulonglong) result));
2277   assert(my_atomic_load32(&m_size) >= 0);
2278   my_atomic_store32(&m_size, 0);
2279   unlock();
2280   DBUG_RETURN(result);
2281 }
2282 
wait_count_or_timeout(ulong count,long usec,StageID stage)2283 void Stage_manager::wait_count_or_timeout(ulong count, long usec, StageID stage)
2284 {
2285   long to_wait=
2286     DBUG_EVALUATE_IF("bgc_set_infinite_delay", LONG_MAX, usec);
2287   /*
2288     For testing purposes while waiting for inifinity
2289     to arrive, we keep checking the queue size at regular,
2290     small intervals. Otherwise, waiting 0.1 * infinite
2291     is too long.
2292    */
2293   long delta=
2294     DBUG_EVALUATE_IF("bgc_set_infinite_delay", 100000,
2295                      max<long>(1, (to_wait * 0.1)));
2296 
2297   while (to_wait > 0 && (count == 0 || static_cast<ulong>(m_queue[stage].get_size()) < count))
2298   {
2299 #ifndef NDEBUG
2300     if (current_thd)
2301       DEBUG_SYNC(current_thd, "bgc_wait_count_or_timeout");
2302 #endif
2303     my_sleep(delta);
2304     to_wait -= delta;
2305   }
2306 }
2307 
signal_done(THD * queue)2308 void Stage_manager::signal_done(THD *queue)
2309 {
2310   mysql_mutex_lock(&m_lock_done);
2311   for (THD *thd= queue ; thd ; thd = thd->next_to_commit)
2312     thd->get_transaction()->m_flags.pending= false;
2313   mysql_mutex_unlock(&m_lock_done);
2314   mysql_cond_broadcast(&m_cond_done);
2315 }
2316 
2317 #ifndef NDEBUG
clear_preempt_status(THD * head)2318 void Stage_manager::clear_preempt_status(THD *head)
2319 {
2320   assert(head);
2321 
2322   mysql_mutex_lock(&m_lock_done);
2323   while(!head->get_transaction()->m_flags.ready_preempt)
2324   {
2325     leader_await_preempt_status= true;
2326     mysql_cond_wait(&m_cond_preempt, &m_lock_done);
2327   }
2328   leader_await_preempt_status= false;
2329   mysql_mutex_unlock(&m_lock_done);
2330 }
2331 #endif
2332 
2333 /**
2334   Write a rollback record of the transaction to the binary log.
2335 
2336   For binary log group commit, the rollback is separated into three
2337   parts:
2338 
2339   1. First part consists of filling the necessary caches and
2340      finalizing them (if they need to be finalized). After a cache is
2341      finalized, nothing can be added to the cache.
2342 
2343   2. Second part execute an ordered flush and commit. This will be
2344      done using the group commit functionality in @c ordered_commit.
2345 
2346      Since we roll back the transaction early, we call @c
2347      ordered_commit with the @c skip_commit flag set. The @c
2348      ha_commit_low call inside @c ordered_commit will then not be
2349      called.
2350 
2351   3. Third part checks any errors resulting from the flush and handles
2352      them appropriately.
2353 
2354   @see MYSQL_BIN_LOG::ordered_commit
2355   @see ha_commit_low
2356   @see ha_rollback_low
2357 
2358   @param thd Session to commit
2359   @param all This is @c true if this is a real transaction rollback, and
2360              @false otherwise.
2361 
2362   @return Error code, or zero if there were no error.
2363  */
2364 
rollback(THD * thd,bool all)2365 int MYSQL_BIN_LOG::rollback(THD *thd, bool all)
2366 {
2367   int error= 0;
2368   bool stuff_logged= false;
2369   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
2370 
2371   DBUG_ENTER("MYSQL_BIN_LOG::rollback(THD *thd, bool all)");
2372   DBUG_PRINT("enter", ("all: %s, cache_mngr: 0x%llx, thd->is_error: %s",
2373                        YESNO(all), (ulonglong) cache_mngr,
2374                        YESNO(thd->is_error())));
2375   /*
2376     Defer XA-transaction rollback until its XA-rollback event is recorded.
2377     When we are executing a ROLLBACK TO SAVEPOINT, we
2378     should only clear the caches since this function is called as part
2379     of the engine rollback.
2380     In other cases we roll back the transaction in the engines early
2381     since this will release locks and allow other transactions to
2382     start executing.
2383   */
2384   if (thd->lex->sql_command == SQLCOM_XA_ROLLBACK)
2385   {
2386     XID_STATE *xs= thd->get_transaction()->xid_state();
2387 
2388     assert(all || !xs->is_binlogged() ||
2389            (!xs->is_in_recovery() && thd->is_error()));
2390     /*
2391       Whenever cache_mngr is not initialized, the xa prepared
2392       transaction's binary logging status must not be set, unless the
2393       transaction is rolled back through an external connection which
2394       has binlogging switched off.
2395     */
2396     assert(cache_mngr || !xs->is_binlogged()
2397            || !(is_open() && thd->variables.option_bits & OPTION_BIN_LOG));
2398 
2399     if ((error= do_binlog_xa_commit_rollback(thd, xs->get_xid(), false)))
2400       goto end;
2401     cache_mngr= thd_get_cache_mngr(thd);
2402   }
2403 #ifdef WITH_WSREP
2404   /*
2405     BF aborted THD may have dandling sql_command set to SQLCOM_ROLLBACK_TO_SAVEPOINT,
2406     don't care about it, as we have to BF abort this one
2407    */
2408   else if (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT ||
2409     thd->wsrep_conflict_state == ABORTING)
2410 #else
2411   else if (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT)
2412 #endif /* WITH_WSREP */
2413     if ((error= ha_rollback_low(thd, all)))
2414       goto end;
2415 
2416   /*
2417     If there is no cache manager, or if there is nothing in the
2418     caches, there are no caches to roll back, so we're trivially done
2419     unless XA-ROLLBACK that yet to run rollback_low().
2420   */
2421   if (cache_mngr == NULL || cache_mngr->is_binlog_empty())
2422   {
2423     goto end;
2424   }
2425 
2426   DBUG_PRINT("debug",
2427              ("all.cannot_safely_rollback(): %s, trx_cache_empty: %s",
2428               YESNO(thd->get_transaction()->cannot_safely_rollback(
2429                   Transaction_ctx::SESSION)),
2430               YESNO(cache_mngr->trx_cache.is_binlog_empty())));
2431   DBUG_PRINT("debug",
2432              ("stmt.cannot_safely_rollback(): %s, stmt_cache_empty: %s",
2433               YESNO(thd->get_transaction()->cannot_safely_rollback(
2434                   Transaction_ctx::STMT)),
2435               YESNO(cache_mngr->stmt_cache.is_binlog_empty())));
2436 
2437   /*
2438     If an incident event is set we do not flush the content of the statement
2439     cache because it may be corrupted.
2440   */
2441   if (cache_mngr->stmt_cache.has_incident())
2442   {
2443     const char* err_msg= "The content of the statement cache is corrupted "
2444                          "while writing a rollback record of the transaction "
2445                          "to the binary log.";
2446     error= write_incident(thd, true/*need_lock_log=true*/, err_msg);
2447     cache_mngr->stmt_cache.reset();
2448   }
2449   else if (!cache_mngr->stmt_cache.is_binlog_empty())
2450   {
2451     if (thd->lex->sql_command == SQLCOM_CREATE_TABLE &&
2452         thd->lex->select_lex->item_list.elements && /* With select */
2453         !(thd->lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) &&
2454         thd->is_current_stmt_binlog_format_row())
2455     {
2456       /*
2457         In row based binlog format, we reset the binlog statement cache
2458         when rolling back a single statement 'CREATE...SELECT' transaction,
2459         since the 'CREATE TABLE' event was put in the binlog statement cache.
2460       */
2461       cache_mngr->stmt_cache.reset();
2462     }
2463     else
2464     {
2465       if ((error= cache_mngr->stmt_cache.finalize(thd)))
2466         goto end;
2467       stuff_logged= true;
2468     }
2469   }
2470 
2471   if (ending_trans(thd, all))
2472   {
2473     if (trans_cannot_safely_rollback(thd))
2474     {
2475       const char xa_rollback_str[]= "XA ROLLBACK";
2476       /*
2477         sizeof(xa_rollback_str) and XID::ser_buf_size both allocate `\0',
2478         so one of the two is used for necessary in the xa case `space' char
2479       */
2480       char query[sizeof(xa_rollback_str) + XID::ser_buf_size]= "ROLLBACK";
2481       XID_STATE *xs= thd->get_transaction()->xid_state();
2482 
2483       if (thd->lex->sql_command == SQLCOM_XA_ROLLBACK)
2484       {
2485         /* this block is relevant only for not prepared yet and "local" xa trx */
2486         assert(thd->get_transaction()->xid_state()->
2487                has_state(XID_STATE::XA_IDLE));
2488         assert(!cache_mngr->has_logged_xid);
2489 
2490         sprintf(query, "%s ", xa_rollback_str);
2491         xs->get_xid()->serialize(query + sizeof(xa_rollback_str));
2492       }
2493       /*
2494         If the transaction is being rolled back and contains changes that
2495         cannot be rolled back, the trx-cache's content is flushed.
2496       */
2497       Query_log_event
2498         end_evt(thd, query, strlen(query), true, false, true, 0, true);
2499       error= thd->lex->sql_command != SQLCOM_XA_ROLLBACK ?
2500         cache_mngr->trx_cache.finalize(thd, &end_evt) :
2501         cache_mngr->trx_cache.finalize(thd, &end_evt, xs);
2502       stuff_logged= true;
2503     }
2504     else
2505     {
2506       /*
2507         If the transaction is being rolled back and its changes can be
2508         rolled back, the trx-cache's content is truncated.
2509       */
2510       error= cache_mngr->trx_cache.truncate(thd, all);
2511     }
2512   }
2513   else
2514   {
2515     /*
2516       If a statement is being rolled back, it is necessary to know
2517       exactly why a statement may not be safely rolled back as in
2518       some specific situations the trx-cache can be truncated.
2519 
2520       If a temporary table is created or dropped, the trx-cache is not
2521       truncated. Note that if the stmt-cache is used, there is nothing
2522       to truncate in the trx-cache.
2523 
2524       If a non-transactional table is updated and the binlog format is
2525       statement, the trx-cache is not truncated. The trx-cache is used
2526       when the direct option is off and a transactional table has been
2527       updated before the current statement in the context of the
2528       current transaction. Note that if the stmt-cache is used there is
2529       nothing to truncate in the trx-cache.
2530 
2531       If other binlog formats are used, updates to non-transactional
2532       tables are written to the stmt-cache and trx-cache can be safely
2533       truncated, if necessary.
2534     */
2535     if (thd->get_transaction()->has_dropped_temp_table(
2536           Transaction_ctx::STMT) ||
2537         thd->get_transaction()->has_created_temp_table(
2538           Transaction_ctx::STMT) ||
2539         (thd->get_transaction()->has_modified_non_trans_table(
2540           Transaction_ctx::STMT) &&
2541         thd->variables.binlog_format == BINLOG_FORMAT_STMT))
2542     {
2543       /*
2544         If the statement is being rolled back and dropped or created a
2545         temporary table or modified a non-transactional table and the
2546         statement-based replication is in use, the statement's changes
2547         in the trx-cache are preserved.
2548       */
2549       cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
2550     }
2551     else
2552     {
2553       /*
2554         Otherwise, the statement's changes in the trx-cache are
2555         truncated.
2556       */
2557       error= cache_mngr->trx_cache.truncate(thd, all);
2558     }
2559   }
2560   if (stuff_logged)
2561   {
2562     Transaction_ctx *trn_ctx= thd->get_transaction();
2563     trn_ctx->store_commit_parent(m_dependency_tracker.get_max_committed_timestamp());
2564   }
2565 
2566   DBUG_PRINT("debug", ("error: %d", error));
2567   if (error == 0 && stuff_logged)
2568   {
2569     if (RUN_HOOK(transaction,
2570                  before_commit,
2571                  (thd, all,
2572                   thd_get_cache_mngr(thd)->get_binlog_cache_log(true),
2573                   thd_get_cache_mngr(thd)->get_binlog_cache_log(false),
2574                   max<my_off_t>(max_binlog_cache_size,
2575                                 max_binlog_stmt_cache_size))))
2576     {
2577       //Reset the thread OK status before changing the outcome.
2578       if (thd->get_stmt_da()->is_ok())
2579         thd->get_stmt_da()->reset_diagnostics_area();
2580       my_error(ER_RUN_HOOK_ERROR, MYF(0), "before_commit");
2581       DBUG_RETURN(RESULT_ABORTED);
2582     }
2583 #ifndef NDEBUG
2584     /*
2585       XA rollback is always accepted.
2586     */
2587     if (thd->get_transaction()->get_rpl_transaction_ctx()->is_transaction_rollback())
2588       assert(0);
2589 #endif
2590 
2591     error= ordered_commit(thd, all, /* skip_commit */ true);
2592   }
2593 
2594 #ifdef WITH_WSREP
2595   if (!WSREP_EMULATE_BINLOG(thd) && check_write_error(thd))
2596 #else
2597   if (check_write_error(thd))
2598 #endif
2599   {
2600     /*
2601       We reach this point if the effect of a statement did not properly get into
2602       a cache and need to be rolled back.
2603     */
2604     error|= cache_mngr->trx_cache.truncate(thd, all);
2605   }
2606 
2607 end:
2608   /* Deferred xa rollback to engines */
2609   if (!error && thd->lex->sql_command == SQLCOM_XA_ROLLBACK)
2610   {
2611     error= ha_rollback_low(thd, all);
2612     /* Successful XA-rollback commits the new gtid_state */
2613     if (!error && !thd->is_error())
2614       gtid_state->update_on_commit(thd);
2615   }
2616   /*
2617     When a statement errors out on auto-commit mode it is rollback
2618     implicitly, so the same should happen to its GTID.
2619   */
2620   if (!thd->in_active_multi_stmt_transaction())
2621     gtid_state->update_on_rollback(thd);
2622 
2623   /*
2624     TODO: some errors are overwritten, which may cause problem,
2625     fix it later.
2626   */
2627   DBUG_PRINT("return", ("error: %d", error));
2628   DBUG_RETURN(error);
2629 }
2630 
2631 /**
2632   @note
2633   How do we handle this (unlikely but legal) case:
2634   @verbatim
2635     [transaction] + [update to non-trans table] + [rollback to savepoint] ?
2636   @endverbatim
2637   The problem occurs when a savepoint is before the update to the
2638   non-transactional table. Then when there's a rollback to the savepoint, if we
2639   simply truncate the binlog cache, we lose the part of the binlog cache where
2640   the update is. If we want to not lose it, we need to write the SAVEPOINT
2641   command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
2642   is easy: it's just write at the end of the binlog cache, but the former
2643   should be *inserted* to the place where the user called SAVEPOINT. The
2644   solution is that when the user calls SAVEPOINT, we write it to the binlog
2645   cache (so no need to later insert it). As transactions are never intermixed
2646   in the binary log (i.e. they are serialized), we won't have conflicts with
2647   savepoint names when using mysqlbinlog or in the slave SQL thread.
2648   Then when ROLLBACK TO SAVEPOINT is called, if we updated some
2649   non-transactional table, we don't truncate the binlog cache but instead write
2650   ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
2651   will chop the SAVEPOINT command from the binlog cache, which is good as in
2652   that case there is no need to have it in the binlog).
2653 */
2654 
binlog_savepoint_set(handlerton * hton,THD * thd,void * sv)2655 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
2656 {
2657   DBUG_ENTER("binlog_savepoint_set");
2658   int error= 1;
2659 #ifdef WITH_WSREP
2660   /*
2661     Clear table maps before writing SAVEPOINT event. This enforces
2662     recreation of table map events for the following row event.
2663    */
2664   thd->clear_binlog_table_maps();
2665 #endif /* WITH_WSREP */
2666   String log_query;
2667   if (log_query.append(STRING_WITH_LEN("SAVEPOINT ")))
2668     DBUG_RETURN(error);
2669   else
2670     append_identifier(thd, &log_query, thd->lex->ident.str,
2671                       thd->lex->ident.length);
2672 
2673   int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
2674   Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
2675                         TRUE, FALSE, TRUE, errcode);
2676   /*
2677     We cannot record the position before writing the statement
2678     because a rollback to a savepoint (.e.g. consider it "S") would
2679     prevent the savepoint statement (i.e. "SAVEPOINT S") from being
2680     written to the binary log despite the fact that the server could
2681     still issue other rollback statements to the same savepoint (i.e.
2682     "S").
2683     Given that the savepoint is valid until the server releases it,
2684     ie, until the transaction commits or it is released explicitly,
2685     we need to log it anyway so that we don't have "ROLLBACK TO S"
2686     or "RELEASE S" without the preceding "SAVEPOINT S" in the binary
2687     log.
2688   */
2689   if (!(error= mysql_bin_log.write_event(&qinfo)))
2690     binlog_trans_log_savepos(thd, (my_off_t*) sv);
2691 
2692   DBUG_RETURN(error);
2693 }
2694 
binlog_savepoint_rollback(handlerton * hton,THD * thd,void * sv)2695 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
2696 {
2697   DBUG_ENTER("binlog_savepoint_rollback");
2698   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
2699   my_off_t pos= *(my_off_t*) sv;
2700   assert(pos != ~(my_off_t) 0);
2701 
2702   /*
2703     Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
2704     non-transactional table. Otherwise, truncate the binlog cache starting
2705     from the SAVEPOINT command.
2706   */
2707   if (trans_cannot_safely_rollback(thd))
2708   {
2709     String log_query;
2710     if (log_query.append(STRING_WITH_LEN("ROLLBACK TO ")))
2711       DBUG_RETURN(1);
2712     else
2713     {
2714       /*
2715         Before writing identifier to the binlog, make sure to
2716         quote the identifier properly so as to prevent any SQL
2717         injection on the slave.
2718       */
2719       append_identifier(thd, &log_query, thd->lex->ident.str,
2720                         thd->lex->ident.length);
2721     }
2722 
2723     int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
2724     Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
2725                           TRUE, FALSE, TRUE, errcode);
2726     DBUG_RETURN(mysql_bin_log.write_event(&qinfo));
2727   }
2728   // Otherwise, we truncate the cache
2729   cache_mngr->trx_cache.restore_savepoint(pos);
2730   /*
2731     When a SAVEPOINT is executed inside a stored function/trigger we force the
2732     pending event to be flushed with a STMT_END_F flag and clear the table maps
2733     as well to ensure that following DMLs will have a clean state to start
2734     with. ROLLBACK inside a stored routine has to finalize possibly existing
2735     current row-based pending event with cleaning up table maps. That ensures
2736     that following DMLs will have a clean state to start with.
2737    */
2738   if (thd->in_sub_stmt)
2739     thd->clear_binlog_table_maps();
2740   DBUG_RETURN(0);
2741 }
2742 
2743 /**
2744    purge logs, master and slave sides both, related error code
2745    convertor.
2746    Called from @c purge_error_message(), @c MYSQL_BIN_LOG::reset_logs()
2747 
2748    @param  res  an error code as used by purging routines
2749 
2750    @return the user level error code ER_*
2751 */
purge_log_get_error_code(int res)2752 static uint purge_log_get_error_code(int res)
2753 {
2754   uint errcode= 0;
2755 
2756   switch (res)  {
2757   case 0: break;
2758   case LOG_INFO_EOF:	errcode= ER_UNKNOWN_TARGET_BINLOG; break;
2759   case LOG_INFO_IO:	errcode= ER_IO_ERR_LOG_INDEX_READ; break;
2760   case LOG_INFO_INVALID:errcode= ER_BINLOG_PURGE_PROHIBITED; break;
2761   case LOG_INFO_SEEK:	errcode= ER_FSEEK_FAIL; break;
2762   case LOG_INFO_MEM:	errcode= ER_OUT_OF_RESOURCES; break;
2763   case LOG_INFO_FATAL:	errcode= ER_BINLOG_PURGE_FATAL_ERR; break;
2764   case LOG_INFO_IN_USE: errcode= ER_LOG_IN_USE; break;
2765   case LOG_INFO_EMFILE: errcode= ER_BINLOG_PURGE_EMFILE; break;
2766   default:		errcode= ER_LOG_PURGE_UNKNOWN_ERR; break;
2767   }
2768 
2769   return errcode;
2770 }
2771 
2772 /**
2773   Check whether binlog state allows to safely release MDL locks after
2774   rollback to savepoint.
2775 
2776   @param hton  The binlog handlerton.
2777   @param thd   The client thread that executes the transaction.
2778 
2779   @return true  - It is safe to release MDL locks.
2780           false - If it is not.
2781 */
binlog_savepoint_rollback_can_release_mdl(handlerton * hton,THD * thd)2782 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
2783                                                       THD *thd)
2784 {
2785   DBUG_ENTER("binlog_savepoint_rollback_can_release_mdl");
2786   /**
2787     If we have not updated any non-transactional tables rollback
2788     to savepoint will simply truncate binlog cache starting from
2789     SAVEPOINT command. So it should be safe to release MDL acquired
2790     after SAVEPOINT command in this case.
2791   */
2792   DBUG_RETURN(!trans_cannot_safely_rollback(thd));
2793 }
2794 
2795 #ifdef HAVE_REPLICATION
2796 /**
2797   Adjust log offset in the binary log file for all running slaves
2798   This class implements call back function for do_for_all_thd().
2799   It is called for each thd in thd list to adjust offset.
2800 */
2801 class Adjust_offset : public Do_THD_Impl
2802 {
2803 public:
Adjust_offset(my_off_t value)2804   Adjust_offset(my_off_t value) : m_purge_offset(value) {}
operator ()(THD * thd)2805   virtual void operator()(THD *thd)
2806   {
2807     LOG_INFO* linfo;
2808     mysql_mutex_lock(&thd->LOCK_thd_data);
2809     if ((linfo= thd->current_linfo))
2810     {
2811       /*
2812         Index file offset can be less that purge offset only if
2813         we just started reading the index file. In that case
2814         we have nothing to adjust.
2815       */
2816       if (linfo->index_file_offset < m_purge_offset)
2817         linfo->fatal = (linfo->index_file_offset != 0);
2818       else
2819         linfo->index_file_offset -= m_purge_offset;
2820     }
2821     mysql_mutex_unlock(&thd->LOCK_thd_data);
2822   }
2823 private:
2824   my_off_t m_purge_offset;
2825 };
2826 
2827 /*
2828   Adjust the position pointer in the binary log file for all running slaves.
2829 
2830   SYNOPSIS
2831     adjust_linfo_offsets()
2832     purge_offset	Number of bytes removed from start of log index file
2833 
2834   NOTES
2835     - This is called when doing a PURGE when we delete lines from the
2836       index log file.
2837 
2838   REQUIREMENTS
2839     - Before calling this function, we have to ensure that no threads are
2840       using any binary log file before purge_offset.
2841 
2842   TODO
2843     - Inform the slave threads that they should sync the position
2844       in the binary log file with flush_relay_log_info.
2845       Now they sync is done for next read.
2846 */
adjust_linfo_offsets(my_off_t purge_offset)2847 static void adjust_linfo_offsets(my_off_t purge_offset)
2848 {
2849   Adjust_offset adjust_offset(purge_offset);
2850   Global_THD_manager::get_instance()->do_for_all_thd(&adjust_offset);
2851 }
2852 
2853 /**
2854   This class implements Call back function for do_for_all_thd().
2855   It is called for each thd in thd list to count
2856   threads using bin log file
2857 */
2858 
2859 class Log_in_use : public Do_THD_Impl
2860 {
2861 public:
Log_in_use(const char * value)2862   Log_in_use(const char* value) : m_log_name(value), m_count(0)
2863   {
2864     m_log_name_len = strlen(m_log_name) + 1;
2865   }
operator ()(THD * thd)2866   virtual void operator()(THD *thd)
2867   {
2868     LOG_INFO* linfo;
2869     mysql_mutex_lock(&thd->LOCK_thd_data);
2870     if ((linfo = thd->current_linfo))
2871     {
2872       if(!strncmp(m_log_name, linfo->log_file_name, m_log_name_len))
2873       {
2874         sql_print_warning("file %s was not purged because it was being read"
2875                           "by thread number %u", m_log_name, thd->thread_id());
2876         m_count++;
2877       }
2878     }
2879     mysql_mutex_unlock(&thd->LOCK_thd_data);
2880   }
get_count()2881   int get_count() { return m_count; }
2882 private:
2883   const char* m_log_name;
2884   size_t m_log_name_len;
2885   int m_count;
2886 };
2887 
log_in_use(const char * log_name)2888 static int log_in_use(const char* log_name)
2889 {
2890   Log_in_use log_in_use(log_name);
2891 #ifndef NDEBUG
2892   if (current_thd)
2893     DEBUG_SYNC(current_thd,"purge_logs_after_lock_index_before_thread_count");
2894 #endif
2895   Global_THD_manager::get_instance()->do_for_all_thd(&log_in_use);
2896   return log_in_use.get_count();
2897 }
2898 
purge_error_message(THD * thd,int res)2899 static bool purge_error_message(THD* thd, int res)
2900 {
2901   uint errcode;
2902 
2903   if ((errcode= purge_log_get_error_code(res)) != 0)
2904   {
2905     my_message(errcode, ER(errcode), MYF(0));
2906     return TRUE;
2907   }
2908   my_ok(thd);
2909   return FALSE;
2910 }
2911 
2912 #endif /* HAVE_REPLICATION */
2913 
check_binlog_magic(IO_CACHE * log,const char ** errmsg)2914 int check_binlog_magic(IO_CACHE* log, const char** errmsg)
2915 {
2916   char magic[4];
2917   assert(my_b_tell(log) == 0);
2918 
2919   if (my_b_read(log, (uchar*) magic, sizeof(magic)))
2920   {
2921     *errmsg = "I/O error reading the header from the binary log";
2922     sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno(),
2923 		    log->error);
2924     return 1;
2925   }
2926   if (memcmp(magic, BINLOG_MAGIC, sizeof(magic)))
2927   {
2928     *errmsg = "Binlog has bad magic number;  It's not a binary log file that can be used by this version of MySQL";
2929     return 1;
2930   }
2931   return 0;
2932 }
2933 
2934 
open_binlog_file(IO_CACHE * log,const char * log_file_name,const char ** errmsg)2935 File open_binlog_file(IO_CACHE *log, const char *log_file_name, const char **errmsg)
2936 {
2937   File file;
2938   DBUG_ENTER("open_binlog_file");
2939 
2940   if ((file= mysql_file_open(key_file_binlog,
2941                              log_file_name, O_RDONLY | O_BINARY | O_SHARE,
2942                              MYF(MY_WME))) < 0)
2943   {
2944     sql_print_error("Failed to open log (file '%s', errno %d)",
2945                     log_file_name, my_errno());
2946     *errmsg = "Could not open log file";
2947     goto err;
2948   }
2949   if (init_io_cache_ext(log, file, IO_SIZE*2, READ_CACHE, 0, 0,
2950                         MYF(MY_WME|MY_DONT_CHECK_FILESIZE), key_file_binlog_cache))
2951   {
2952     sql_print_error("Failed to create a cache on log (file '%s')",
2953                     log_file_name);
2954     *errmsg = "Could not open log file";
2955     goto err;
2956   }
2957   if (check_binlog_magic(log,errmsg))
2958     goto err;
2959   DBUG_RETURN(file);
2960 
2961 err:
2962   if (file >= 0)
2963   {
2964     mysql_file_close(file, MYF(0));
2965     end_io_cache(log);
2966   }
2967   DBUG_RETURN(-1);
2968 }
2969 
is_transaction_empty(THD * thd)2970 bool is_transaction_empty(THD *thd)
2971 {
2972   DBUG_ENTER("is_transaction_empty");
2973   int rw_ha_count= check_trx_rw_engines(thd, Transaction_ctx::SESSION);
2974   rw_ha_count+= check_trx_rw_engines(thd, Transaction_ctx::STMT);
2975   DBUG_RETURN(rw_ha_count == 0);
2976 }
2977 
check_trx_rw_engines(THD * thd,Transaction_ctx::enum_trx_scope trx_scope)2978 int check_trx_rw_engines(THD *thd, Transaction_ctx::enum_trx_scope trx_scope)
2979 {
2980   DBUG_ENTER("check_trx_rw_engines");
2981 
2982   int rw_ha_count= 0;
2983   Ha_trx_info *ha_list=
2984       (Ha_trx_info *)thd->get_transaction()->ha_trx_info(trx_scope);
2985 
2986   for (Ha_trx_info *ha_info= ha_list; ha_info; ha_info= ha_info->next()) {
2987     if (ha_info->is_trx_read_write())
2988       ++rw_ha_count;
2989   }
2990   DBUG_RETURN(rw_ha_count);
2991 }
2992 
is_empty_transaction_in_binlog_cache(const THD * thd)2993 bool is_empty_transaction_in_binlog_cache(const THD* thd)
2994 {
2995   DBUG_ENTER("is_empty_transaction_in_binlog_cache");
2996 
2997   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
2998   if (cache_mngr != NULL && cache_mngr->has_empty_transaction())
2999   {
3000     DBUG_RETURN(true);
3001   }
3002 
3003   DBUG_RETURN(false);
3004 }
3005 
3006 
3007 /**
3008   This function checks if a transactional table was updated by the
3009   current transaction.
3010 
3011   @param thd The client thread that executed the current statement.
3012   @return
3013     @c true if a transactional table was updated, @c false otherwise.
3014 */
3015 bool
trans_has_updated_trans_table(const THD * thd)3016 trans_has_updated_trans_table(const THD* thd)
3017 {
3018   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
3019 
3020   return (cache_mngr ? !cache_mngr->trx_cache.is_binlog_empty() : 0);
3021 }
3022 
3023 /**
3024   This function checks if a transactional table was updated by the
3025   current statement.
3026 
3027   @param ha_list Registered storage engine handler list.
3028   @return
3029     @c true if a transactional table was updated, @c false otherwise.
3030 */
3031 bool
stmt_has_updated_trans_table(Ha_trx_info * ha_list)3032 stmt_has_updated_trans_table(Ha_trx_info* ha_list)
3033 {
3034   const Ha_trx_info *ha_info;
3035   for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
3036   {
3037     if (ha_info->is_trx_read_write() && ha_info->ht() != binlog_hton)
3038       return (TRUE);
3039   }
3040   return (FALSE);
3041 }
3042 
3043 bool
trans_has_noop_dml(Ha_trx_info * ha_list)3044 trans_has_noop_dml(Ha_trx_info* ha_list)
3045 {
3046   const Ha_trx_info *ha_info;
3047   for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
3048   {
3049     if (ha_info->is_trx_noop_read_write())
3050       return (TRUE);
3051   }
3052   return (FALSE);
3053 }
3054 
3055 /**
3056   This function checks if a transaction, either a multi-statement
3057   or a single statement transaction is about to commit or not.
3058 
3059   @param thd The client thread that executed the current statement.
3060   @param all Committing a transaction (i.e. TRUE) or a statement
3061              (i.e. FALSE).
3062   @return
3063     @c true if committing a transaction, otherwise @c false.
3064 */
ending_trans(THD * thd,const bool all)3065 bool ending_trans(THD* thd, const bool all)
3066 {
3067   return (all || ending_single_stmt_trans(thd, all));
3068 }
3069 
3070 /**
3071   This function checks if a single statement transaction is about
3072   to commit or not.
3073 
3074   @param thd The client thread that executed the current statement.
3075   @param all Committing a transaction (i.e. TRUE) or a statement
3076              (i.e. FALSE).
3077   @return
3078     @c true if committing a single statement transaction, otherwise
3079     @c false.
3080 */
ending_single_stmt_trans(THD * thd,const bool all)3081 bool ending_single_stmt_trans(THD* thd, const bool all)
3082 {
3083   return (!all && !thd->in_multi_stmt_transaction_mode());
3084 }
3085 
3086 /**
3087   This function checks if a transaction cannot be rolled back safely.
3088 
3089   @param thd The client thread that executed the current statement.
3090   @return
3091     @c true if cannot be safely rolled back, @c false otherwise.
3092 */
trans_cannot_safely_rollback(const THD * thd)3093 bool trans_cannot_safely_rollback(const THD* thd)
3094 {
3095   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
3096 
3097   return cache_mngr->trx_cache.cannot_rollback();
3098 }
3099 
3100 /**
3101   This function checks if current statement cannot be rollded back safely.
3102 
3103   @param thd The client thread that executed the current statement.
3104   @return
3105     @c true if cannot be safely rolled back, @c false otherwise.
3106 */
stmt_cannot_safely_rollback(const THD * thd)3107 bool stmt_cannot_safely_rollback(const THD* thd)
3108 {
3109   return thd->get_transaction()->cannot_safely_rollback(Transaction_ctx::STMT);
3110 }
3111 
3112 #ifndef EMBEDDED_LIBRARY
3113 /**
3114   Execute a PURGE BINARY LOGS TO <log> command.
3115 
3116   @param thd Pointer to THD object for the client thread executing the
3117   statement.
3118 
3119   @param to_log Name of the last log to purge.
3120 
3121   @retval FALSE success
3122   @retval TRUE failure
3123 */
purge_master_logs(THD * thd,const char * to_log)3124 bool purge_master_logs(THD* thd, const char* to_log)
3125 {
3126   char search_file_name[FN_REFLEN];
3127   if (!mysql_bin_log.is_open())
3128   {
3129     my_ok(thd);
3130     return FALSE;
3131   }
3132 
3133   mysql_bin_log.make_log_name(search_file_name, to_log);
3134   return purge_error_message(thd,
3135                              mysql_bin_log.purge_logs(search_file_name, false,
3136                                                       true/*need_lock_index=true*/,
3137                                                       true/*need_update_threads=true*/,
3138                                                       NULL, false));
3139 }
3140 
3141 
3142 /**
3143   Execute a PURGE BINARY LOGS BEFORE <date> command.
3144 
3145   @param thd Pointer to THD object for the client thread executing the
3146   statement.
3147 
3148   @param purge_time Date before which logs should be purged.
3149 
3150   @retval FALSE success
3151   @retval TRUE failure
3152 */
purge_master_logs_before_date(THD * thd,time_t purge_time)3153 bool purge_master_logs_before_date(THD* thd, time_t purge_time)
3154 {
3155   if (!mysql_bin_log.is_open())
3156   {
3157     my_ok(thd);
3158     return 0;
3159   }
3160   return purge_error_message(thd,
3161                              mysql_bin_log.purge_logs_before_date(purge_time,
3162                                                                   false));
3163 }
3164 #endif /* EMBEDDED_LIBRARY */
3165 
3166 /*
3167   Helper function to get the error code of the query to be binlogged.
3168  */
query_error_code(THD * thd,bool not_killed)3169 int query_error_code(THD *thd, bool not_killed)
3170 {
3171   int error;
3172 
3173   if (not_killed || (thd->killed == THD::KILL_BAD_DATA))
3174   {
3175     error= thd->is_error() ? thd->get_stmt_da()->mysql_errno() : 0;
3176 
3177     /* thd->get_stmt_da()->sql_errno() might be ER_SERVER_SHUTDOWN or
3178        ER_QUERY_INTERRUPTED, So here we need to make sure that error
3179        is not set to these errors when specified not_killed by the
3180        caller.
3181     */
3182     if (error == ER_SERVER_SHUTDOWN || error == ER_QUERY_INTERRUPTED)
3183       error= 0;
3184   }
3185   else
3186     error= thd->killed_errno();
3187 
3188   return error;
3189 }
3190 
3191 
3192 /**
3193   Copy content of 'from' file from offset to 'to' file.
3194 
3195   - We do the copy outside of the IO_CACHE as the cache
3196   buffers would just make things slower and more complicated.
3197   In most cases the copy loop should only do one read.
3198 
3199   @param from          File to copy.
3200   @param to            File to copy to.
3201   @param offset        Offset in 'from' file.
3202 
3203 
3204   @retval
3205     0    ok
3206   @retval
3207     -1    error
3208 */
copy_file(IO_CACHE * from,IO_CACHE * to,my_off_t offset)3209 static bool copy_file(IO_CACHE *from, IO_CACHE *to, my_off_t offset)
3210 {
3211   int bytes_read;
3212   uchar io_buf[IO_SIZE*2];
3213   DBUG_ENTER("copy_file");
3214 
3215   mysql_file_seek(from->file, offset, MY_SEEK_SET, MYF(0));
3216   while(TRUE)
3217   {
3218     if ((bytes_read= (int) mysql_file_read(from->file, io_buf, sizeof(io_buf),
3219                                            MYF(MY_WME)))
3220         < 0)
3221       goto err;
3222     if (DBUG_EVALUATE_IF("fault_injection_copy_part_file", 1, 0))
3223       bytes_read= bytes_read/2;
3224     if (!bytes_read)
3225       break;                                    // end of file
3226     if (mysql_file_write(to->file, io_buf, bytes_read, MYF(MY_WME | MY_NABP)))
3227       goto err;
3228   }
3229 
3230   DBUG_RETURN(0);
3231 
3232 err:
3233   DBUG_RETURN(1);
3234 }
3235 
3236 
3237 #ifdef HAVE_REPLICATION
3238 /**
3239    Load data's io cache specific hook to be executed
3240    before a chunk of data is being read into the cache's buffer
3241    The fuction instantianates and writes into the binlog
3242    replication events along LOAD DATA processing.
3243 
3244    @param file  pointer to io-cache
3245    @retval 0 success
3246    @retval 1 failure
3247 */
log_loaded_block(IO_CACHE * file)3248 int log_loaded_block(IO_CACHE* file)
3249 {
3250   DBUG_ENTER("log_loaded_block");
3251   LOAD_FILE_INFO *lf_info;
3252   uint block_len;
3253   /* buffer contains position where we started last read */
3254   uchar* buffer= (uchar*) my_b_get_buffer_start(file);
3255   uint max_event_size= current_thd->variables.max_allowed_packet;
3256   lf_info= (LOAD_FILE_INFO*) file->arg;
3257   if (lf_info->thd->is_current_stmt_binlog_format_row())
3258     DBUG_RETURN(0);
3259   if (lf_info->last_pos_in_file != HA_POS_ERROR &&
3260       lf_info->last_pos_in_file >= my_b_get_pos_in_file(file))
3261     DBUG_RETURN(0);
3262 
3263   for (block_len= (uint) (my_b_get_bytes_in_buffer(file)); block_len > 0;
3264        buffer += min(block_len, max_event_size),
3265        block_len -= min(block_len, max_event_size))
3266   {
3267     lf_info->last_pos_in_file= my_b_get_pos_in_file(file);
3268     if (lf_info->wrote_create_file)
3269     {
3270       Append_block_log_event a(lf_info->thd, lf_info->thd->db().str, buffer,
3271                                min(block_len, max_event_size),
3272                                lf_info->log_delayed);
3273       if (mysql_bin_log.write_event(&a))
3274         DBUG_RETURN(1);
3275     }
3276     else
3277     {
3278       Begin_load_query_log_event b(lf_info->thd, lf_info->thd->db().str,
3279                                    buffer,
3280                                    min(block_len, max_event_size),
3281                                    lf_info->log_delayed);
3282       if (mysql_bin_log.write_event(&b))
3283         DBUG_RETURN(1);
3284       lf_info->wrote_create_file= 1;
3285     }
3286   }
3287   DBUG_RETURN(0);
3288 }
3289 
3290 /* Helper function for SHOW BINLOG/RELAYLOG EVENTS */
show_binlog_events(THD * thd,MYSQL_BIN_LOG * binary_log)3291 bool show_binlog_events(THD *thd, MYSQL_BIN_LOG *binary_log)
3292 {
3293   Protocol *protocol= thd->get_protocol();
3294   List<Item> field_list;
3295   const char *errmsg = 0;
3296   bool ret = TRUE;
3297   IO_CACHE log;
3298   File file = -1;
3299   int old_max_allowed_packet= thd->variables.max_allowed_packet;
3300   LOG_INFO linfo;
3301 
3302   DBUG_ENTER("show_binlog_events");
3303 
3304   assert(thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS ||
3305          thd->lex->sql_command == SQLCOM_SHOW_RELAYLOG_EVENTS);
3306 
3307   Format_description_log_event *description_event= new
3308     Format_description_log_event(3); /* MySQL 4.0 by default */
3309 
3310   if (binary_log->is_open())
3311   {
3312     LEX_MASTER_INFO *lex_mi= &thd->lex->mi;
3313     SELECT_LEX_UNIT *unit= thd->lex->unit;
3314     ha_rows event_count, limit_start, limit_end;
3315     my_off_t pos = max<my_off_t>(BIN_LOG_HEADER_SIZE, lex_mi->pos); // user-friendly
3316     char search_file_name[FN_REFLEN], *name;
3317     const char *log_file_name = lex_mi->log_file_name;
3318     mysql_mutex_t *log_lock = binary_log->get_log_lock();
3319     Log_event* ev;
3320 
3321     unit->set_limit(thd->lex->current_select());
3322     limit_start= unit->offset_limit_cnt;
3323     limit_end= unit->select_limit_cnt;
3324 
3325     name= search_file_name;
3326     if (log_file_name)
3327       binary_log->make_log_name(search_file_name, log_file_name);
3328     else
3329       name=0;					// Find first log
3330 
3331     linfo.index_file_offset = 0;
3332 
3333     if (binary_log->find_log_pos(&linfo, name, true/*need_lock_index=true*/))
3334     {
3335       errmsg = "Could not find target log";
3336       goto err;
3337     }
3338 
3339     mysql_mutex_lock(&thd->LOCK_thd_data);
3340     thd->current_linfo = &linfo;
3341     mysql_mutex_unlock(&thd->LOCK_thd_data);
3342 
3343     if ((file=open_binlog_file(&log, linfo.log_file_name, &errmsg)) < 0)
3344       goto err;
3345 
3346     my_off_t end_pos;
3347     /*
3348       Acquire LOCK_log only for the duration to calculate the
3349       log's end position. LOCK_log should be acquired even while
3350       we are checking whether the log is active log or not.
3351     */
3352     mysql_mutex_lock(log_lock);
3353     if (binary_log->is_active(linfo.log_file_name))
3354     {
3355       LOG_INFO li;
3356       binary_log->get_current_log(&li, false /*LOCK_log is already acquired*/);
3357       end_pos= li.pos;
3358     }
3359     else
3360     {
3361       end_pos= my_b_filelength(&log);
3362     }
3363     mysql_mutex_unlock(log_lock);
3364 
3365     /*
3366       to account binlog event header size
3367     */
3368     thd->variables.max_allowed_packet += MAX_LOG_EVENT_HEADER;
3369 
3370     DEBUG_SYNC(thd, "after_show_binlog_event_found_file");
3371 
3372     /*
3373       open_binlog_file() sought to position 4.
3374       Read the first event in case it's a Format_description_log_event, to
3375       know the format. If there's no such event, we are 3.23 or 4.x. This
3376       code, like before, can't read 3.23 binlogs.
3377       This code will fail on a mixed relay log (one which has Format_desc then
3378       Rotate then Format_desc).
3379     */
3380     ev= Log_event::read_log_event(&log, (mysql_mutex_t*)0, description_event,
3381                                    opt_master_verify_checksum);
3382     if (ev)
3383     {
3384       if (ev->get_type_code() == binary_log::FORMAT_DESCRIPTION_EVENT)
3385       {
3386         delete description_event;
3387         description_event= (Format_description_log_event*) ev;
3388       }
3389       else
3390         delete ev;
3391     }
3392 
3393     my_b_seek(&log, pos);
3394 
3395     if (!description_event->is_valid())
3396     {
3397       errmsg="Invalid Format_description event; could be out of memory";
3398       goto err;
3399     }
3400 
3401     for (event_count = 0;
3402          (ev = Log_event::read_log_event(&log, (mysql_mutex_t*) 0,
3403                                          description_event,
3404                                          opt_master_verify_checksum)); )
3405     {
3406       DEBUG_SYNC(thd, "wait_in_show_binlog_events_loop");
3407       if (ev->get_type_code() == binary_log::FORMAT_DESCRIPTION_EVENT)
3408         description_event->common_footer->checksum_alg=
3409                            ev->common_footer->checksum_alg;
3410       if (event_count >= limit_start &&
3411 	  ev->net_send(protocol, linfo.log_file_name, pos))
3412       {
3413 	errmsg = "Net error";
3414 	delete ev;
3415 	goto err;
3416       }
3417 
3418       pos = my_b_tell(&log);
3419       delete ev;
3420 
3421       if (++event_count >= limit_end || pos >= end_pos)
3422 	break;
3423     }
3424 
3425     if (event_count < limit_end && log.error)
3426     {
3427       errmsg = "Wrong offset or I/O error";
3428       goto err;
3429     }
3430 
3431   }
3432   // Check that linfo is still on the function scope.
3433   DEBUG_SYNC(thd, "after_show_binlog_events");
3434 
3435   ret= FALSE;
3436 
3437 err:
3438   delete description_event;
3439   if (file >= 0)
3440   {
3441     end_io_cache(&log);
3442     mysql_file_close(file, MYF(MY_WME));
3443   }
3444 
3445   if (errmsg)
3446   {
3447     if(thd->lex->sql_command == SQLCOM_SHOW_RELAYLOG_EVENTS)
3448       my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0),
3449              "SHOW RELAYLOG EVENTS", errmsg);
3450     else
3451       my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0),
3452              "SHOW BINLOG EVENTS", errmsg);
3453   }
3454   else
3455     my_eof(thd);
3456 
3457   mysql_mutex_lock(&thd->LOCK_thd_data);
3458   thd->current_linfo = 0;
3459   mysql_mutex_unlock(&thd->LOCK_thd_data);
3460   thd->variables.max_allowed_packet= old_max_allowed_packet;
3461   DBUG_RETURN(ret);
3462 }
3463 
3464 /**
3465   Execute a SHOW BINLOG EVENTS statement.
3466 
3467   @param thd Pointer to THD object for the client thread executing the
3468   statement.
3469 
3470   @retval FALSE success
3471   @retval TRUE failure
3472 */
mysql_show_binlog_events(THD * thd)3473 bool mysql_show_binlog_events(THD* thd)
3474 {
3475   List<Item> field_list;
3476   DBUG_ENTER("mysql_show_binlog_events");
3477 
3478   assert(thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS);
3479 
3480   Log_event::init_show_field_list(&field_list);
3481   if (thd->send_result_metadata(&field_list,
3482                                 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
3483     DBUG_RETURN(TRUE);
3484 
3485   /*
3486     Wait for handlers to insert any pending information
3487     into the binlog.  For e.g. ndb which updates the binlog asynchronously
3488     this is needed so that the uses sees all its own commands in the binlog
3489   */
3490   ha_binlog_wait(thd);
3491 
3492   DBUG_RETURN(show_binlog_events(thd, &mysql_bin_log));
3493 }
3494 
3495 #endif /* HAVE_REPLICATION */
3496 
3497 
MYSQL_BIN_LOG(uint * sync_period,enum cache_type io_cache_type_arg)3498 MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period,
3499                              enum cache_type io_cache_type_arg)
3500   :name(NULL), write_error(false), inited(false),
3501    io_cache_type(io_cache_type_arg),
3502 #ifdef HAVE_PSI_INTERFACE
3503    m_key_LOCK_log(key_LOG_LOCK_log),
3504 #endif
3505    bytes_written(0), file_id(1), open_count(1),
3506    sync_period_ptr(sync_period), sync_counter(0),
3507    is_relay_log(0), signal_cnt(0),
3508    checksum_alg_reset(binary_log::BINLOG_CHECKSUM_ALG_UNDEF),
3509    relay_log_checksum_alg(binary_log::BINLOG_CHECKSUM_ALG_UNDEF),
3510    previous_gtid_set_relaylog(0), is_rotating_caused_by_incident(false)
3511 {
3512   log_state.atomic_set(LOG_CLOSED);
3513   /*
3514     We don't want to initialize locks here as such initialization depends on
3515     safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
3516     called only in main(). Doing initialization here would make it happen
3517     before main().
3518   */
3519   m_prep_xids.atomic_set(0);
3520   memset(&log_file, 0, sizeof(log_file));
3521   index_file_name[0] = 0;
3522   memset(&index_file, 0, sizeof(index_file));
3523   memset(&purge_index_file, 0, sizeof(purge_index_file));
3524   memset(&crash_safe_index_file, 0, sizeof(crash_safe_index_file));
3525 }
3526 
3527 
3528 /* this is called only once */
3529 
cleanup()3530 void MYSQL_BIN_LOG::cleanup()
3531 {
3532   DBUG_ENTER("cleanup");
3533   if (inited)
3534   {
3535     inited= 0;
3536     close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT, true /*need_lock_log=true*/,
3537           true /*need_lock_index=true*/);
3538     mysql_mutex_destroy(&LOCK_log);
3539     mysql_mutex_destroy(&LOCK_index);
3540     mysql_mutex_destroy(&LOCK_commit);
3541     mysql_mutex_destroy(&LOCK_sync);
3542     mysql_mutex_destroy(&LOCK_binlog_end_pos);
3543     mysql_mutex_destroy(&LOCK_xids);
3544     mysql_cond_destroy(&update_cond);
3545     mysql_cond_destroy(&m_prep_xids_cond);
3546     stage_manager.deinit();
3547   }
3548   DBUG_VOID_RETURN;
3549 }
3550 
3551 
init_pthread_objects()3552 void MYSQL_BIN_LOG::init_pthread_objects()
3553 {
3554   assert(inited == 0);
3555   inited= 1;
3556   mysql_mutex_init(m_key_LOCK_log, &LOCK_log, MY_MUTEX_INIT_SLOW);
3557   mysql_mutex_init(m_key_LOCK_index, &LOCK_index, MY_MUTEX_INIT_SLOW);
3558   mysql_mutex_init(m_key_LOCK_commit, &LOCK_commit, MY_MUTEX_INIT_FAST);
3559   mysql_mutex_init(m_key_LOCK_sync, &LOCK_sync, MY_MUTEX_INIT_FAST);
3560   mysql_mutex_init(m_key_LOCK_binlog_end_pos, &LOCK_binlog_end_pos,
3561                    MY_MUTEX_INIT_FAST);
3562   mysql_mutex_init(m_key_LOCK_xids, &LOCK_xids, MY_MUTEX_INIT_FAST);
3563   mysql_cond_init(m_key_update_cond, &update_cond);
3564   mysql_cond_init(m_key_prep_xids_cond, &m_prep_xids_cond);
3565   stage_manager.init(
3566 #ifdef HAVE_PSI_INTERFACE
3567                    m_key_LOCK_flush_queue,
3568                    m_key_LOCK_sync_queue,
3569                    m_key_LOCK_commit_queue,
3570                    m_key_LOCK_done, m_key_COND_done
3571 #endif
3572                    );
3573 }
3574 
3575 
3576 /**
3577   Check if a string is a valid number.
3578 
3579   @param str			String to test
3580   @param res			Store value here
3581   @param allow_wildcards	Set to 1 if we should ignore '%' and '_'
3582 
3583   @note
3584     For the moment the allow_wildcards argument is not used
3585     Should be moved to some other file.
3586 
3587   @retval
3588     1	String is a number
3589   @retval
3590     0	String is not a number
3591 */
3592 
is_number(const char * str,ulong * res,bool allow_wildcards)3593 static bool is_number(const char *str,
3594                       ulong *res, bool allow_wildcards)
3595 {
3596   int flag;
3597   const char *start;
3598   DBUG_ENTER("is_number");
3599 
3600   flag=0; start=str;
3601   while (*str++ == ' ') ;
3602   if (*--str == '-' || *str == '+')
3603     str++;
3604   while (my_isdigit(files_charset_info,*str) ||
3605 	 (allow_wildcards && (*str == wild_many || *str == wild_one)))
3606   {
3607     flag=1;
3608     str++;
3609   }
3610   if (*str == '.')
3611   {
3612     for (str++ ;
3613 	 my_isdigit(files_charset_info,*str) ||
3614 	   (allow_wildcards && (*str == wild_many || *str == wild_one)) ;
3615 	 str++, flag=1) ;
3616   }
3617   if (*str != 0 || flag == 0)
3618     DBUG_RETURN(0);
3619   if (res)
3620     *res=atol(start);
3621   DBUG_RETURN(1);			/* Number ok */
3622 } /* is_number */
3623 
3624 
3625 /*
3626   Maximum unique log filename extension.
3627   Note: setting to 0x7FFFFFFF due to atol windows
3628         overflow/truncate.
3629  */
3630 #define MAX_LOG_UNIQUE_FN_EXT 0x7FFFFFFF
3631 
3632 /*
3633    Number of warnings that will be printed to error log
3634    before extension number is exhausted.
3635 */
3636 #define LOG_WARN_UNIQUE_FN_EXT_LEFT 1000
3637 
3638 /**
3639   Find a unique filename for 'filename.#'.
3640 
3641   Set '#' to the highest existing log file extension plus one.
3642 
3643   This function will return nonzero if: (i) the generated name
3644   exceeds FN_REFLEN; (ii) if the number of extensions is exhausted;
3645   or (iii) some other error happened while examining the filesystem.
3646 
3647   @return
3648     nonzero if not possible to get unique filename.
3649 */
3650 
find_uniq_filename(char * name)3651 static int find_uniq_filename(char *name)
3652 {
3653   uint                  i;
3654   char                  buff[FN_REFLEN], ext_buf[FN_REFLEN];
3655   struct st_my_dir     *dir_info;
3656   struct fileinfo *file_info;
3657   ulong                 max_found= 0, next= 0, number= 0;
3658   size_t		buf_length, length;
3659   char			*start, *end;
3660   int                   error= 0;
3661   DBUG_ENTER("find_uniq_filename");
3662 
3663   length= dirname_part(buff, name, &buf_length);
3664   start=  name + length;
3665   end=    strend(start);
3666 
3667   *end='.';
3668   length= (size_t) (end - start + 1);
3669 
3670   if ((DBUG_EVALUATE_IF("error_unique_log_filename", 1,
3671       !(dir_info= my_dir(buff,MYF(MY_DONT_SORT))))))
3672   {						// This shouldn't happen
3673     my_stpcpy(end,".1");				// use name+1
3674     DBUG_RETURN(1);
3675   }
3676   file_info= dir_info->dir_entry;
3677   for (i= dir_info->number_off_files ; i-- ; file_info++)
3678   {
3679     if (strncmp(file_info->name, start, length) == 0 &&
3680 	is_number(file_info->name+length, &number,0))
3681     {
3682       set_if_bigger(max_found, number);
3683     }
3684   }
3685   my_dirend(dir_info);
3686 
3687   /* check if reached the maximum possible extension number */
3688   if (max_found == MAX_LOG_UNIQUE_FN_EXT)
3689   {
3690     sql_print_error("Log filename extension number exhausted: %06lu. \
3691 Please fix this by archiving old logs and \
3692 updating the index files.", max_found);
3693     error= 1;
3694     goto end;
3695   }
3696 
3697   next= max_found + 1;
3698   if (sprintf(ext_buf, "%06lu", next)<0)
3699   {
3700     error= 1;
3701     goto end;
3702   }
3703   *end++='.';
3704 
3705   /*
3706     Check if the generated extension size + the file name exceeds the
3707     buffer size used. If one did not check this, then the filename might be
3708     truncated, resulting in error.
3709    */
3710   if (((strlen(ext_buf) + (end - name)) >= FN_REFLEN))
3711   {
3712     sql_print_error("Log filename too large: %s%s (%zu). \
3713 Please fix this by archiving old logs and updating the \
3714 index files.", name, ext_buf, (strlen(ext_buf) + (end - name)));
3715     error= 1;
3716     goto end;
3717   }
3718 
3719   if (sprintf(end, "%06lu", next)<0)
3720   {
3721     error= 1;
3722     goto end;
3723   }
3724 
3725   /* print warning if reaching the end of available extensions. */
3726   if ((next > (MAX_LOG_UNIQUE_FN_EXT - LOG_WARN_UNIQUE_FN_EXT_LEFT)))
3727     sql_print_warning("Next log extension: %lu. \
3728 Remaining log filename extensions: %lu. \
3729 Please consider archiving some logs.", next, (MAX_LOG_UNIQUE_FN_EXT - next));
3730 
3731 end:
3732   DBUG_RETURN(error);
3733 }
3734 
3735 
generate_new_name(char * new_name,const char * log_name)3736 int MYSQL_BIN_LOG::generate_new_name(char *new_name, const char *log_name)
3737 {
3738   fn_format(new_name, log_name, mysql_data_home, "", 4);
3739   if (!fn_ext(log_name)[0])
3740   {
3741     if (find_uniq_filename(new_name))
3742     {
3743       my_printf_error(ER_NO_UNIQUE_LOGFILE, ER(ER_NO_UNIQUE_LOGFILE),
3744                       MYF(ME_FATALERROR), log_name);
3745       sql_print_error(ER(ER_NO_UNIQUE_LOGFILE), log_name);
3746       return 1;
3747     }
3748   }
3749   return 0;
3750 }
3751 
3752 
3753 /**
3754   @todo
3755   The following should be using fn_format();  We just need to
3756   first change fn_format() to cut the file name if it's too long.
3757 */
generate_name(const char * log_name,const char * suffix,char * buff)3758 const char *MYSQL_BIN_LOG::generate_name(const char *log_name,
3759                                          const char *suffix,
3760                                          char *buff)
3761 {
3762   if (!log_name || !log_name[0])
3763   {
3764     strmake(buff, default_logfile_name, FN_REFLEN - strlen(suffix) - 1);
3765     return (const char *)
3766       fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR));
3767   }
3768   // get rid of extension to avoid problems
3769 
3770   char *p= fn_ext(log_name);
3771   uint length= (uint) (p - log_name);
3772   strmake(buff, log_name, min<size_t>(length, FN_REFLEN-1));
3773   return (const char*)buff;
3774 }
3775 
3776 
init_and_set_log_file_name(const char * log_name,const char * new_name)3777 bool MYSQL_BIN_LOG::init_and_set_log_file_name(const char *log_name,
3778                                                const char *new_name)
3779 {
3780   if (new_name && !my_stpcpy(log_file_name, new_name))
3781     return TRUE;
3782   else if (!new_name && generate_new_name(log_file_name, log_name))
3783     return TRUE;
3784 
3785   return FALSE;
3786 }
3787 
3788 
3789 /**
3790   Open the logfile and init IO_CACHE.
3791 
3792   @param log_name            The name of the log to open
3793   @param new_name            The new name for the logfile.
3794                              NULL forces generate_new_name() to be called.
3795 
3796   @return true if error, false otherwise.
3797 */
3798 
open(PSI_file_key log_file_key,const char * log_name,const char * new_name)3799 bool MYSQL_BIN_LOG::open(
3800 #ifdef HAVE_PSI_INTERFACE
3801                      PSI_file_key log_file_key,
3802 #endif
3803                      const char *log_name,
3804                      const char *new_name)
3805 {
3806   File file= -1;
3807   my_off_t pos= 0;
3808   int open_flags= O_CREAT | O_BINARY;
3809   DBUG_ENTER("MYSQL_BIN_LOG::open");
3810 
3811   write_error= 0;
3812 
3813   if (!(name= my_strdup(key_memory_MYSQL_LOG_name,
3814                         log_name, MYF(MY_WME))))
3815   {
3816     name= (char *)log_name; // for the error message
3817     goto err;
3818   }
3819 
3820   if (init_and_set_log_file_name(name, new_name) ||
3821       DBUG_EVALUATE_IF("fault_injection_init_name", 1, 0))
3822     goto err;
3823 
3824   if (io_cache_type == SEQ_READ_APPEND)
3825     open_flags |= O_RDWR | O_APPEND;
3826   else
3827     open_flags |= O_WRONLY;
3828 
3829   db[0]= 0;
3830 
3831 #ifdef HAVE_PSI_INTERFACE
3832   /* Keep the key for reopen */
3833   m_log_file_key= log_file_key;
3834 #endif
3835 
3836   if ((file= mysql_file_open(log_file_key,
3837                              log_file_name, open_flags,
3838                              MYF(MY_WME))) < 0)
3839     goto err;
3840 
3841   if ((pos= mysql_file_tell(file, MYF(MY_WME))) == MY_FILEPOS_ERROR)
3842   {
3843     if (my_errno() == ESPIPE)
3844       pos= 0;
3845     else
3846       goto err;
3847   }
3848 
3849   if (init_io_cache(&log_file, file, IO_SIZE, io_cache_type, pos, 0,
3850                     MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
3851     goto err;
3852 
3853   log_state.atomic_set(LOG_OPENED);
3854   DBUG_RETURN(0);
3855 
3856 err:
3857   if (binlog_error_action == ABORT_SERVER)
3858   {
3859     exec_binlog_error_action_abort("Either disk is full or file system is read "
3860                                    "only while opening the binlog. Aborting the"
3861                                    " server.");
3862   }
3863   else
3864     sql_print_error("Could not open %s for logging (error %d). "
3865                     "Turning logging off for the whole duration "
3866                     "of the MySQL server process. To turn it on "
3867                     "again: fix the cause, shutdown the MySQL "
3868                     "server and restart it.",
3869                     name, errno);
3870   if (file >= 0)
3871     mysql_file_close(file, MYF(0));
3872   end_io_cache(&log_file);
3873   my_free(name);
3874   name= NULL;
3875   log_state.atomic_set(LOG_CLOSED);
3876   DBUG_RETURN(1);
3877 }
3878 
3879 
open_index_file(const char * index_file_name_arg,const char * log_name,bool need_lock_index)3880 bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
3881                                     const char *log_name, bool need_lock_index)
3882 {
3883   bool error= false;
3884   File index_file_nr= -1;
3885   if (need_lock_index)
3886     mysql_mutex_lock(&LOCK_index);
3887   else
3888     mysql_mutex_assert_owner(&LOCK_index);
3889 
3890   /*
3891     First open of this class instance
3892     Create an index file that will hold all file names uses for logging.
3893     Add new entries to the end of it.
3894   */
3895   myf opt= MY_UNPACK_FILENAME;
3896 
3897   if (my_b_inited(&index_file))
3898     goto end;
3899 
3900   if (!index_file_name_arg)
3901   {
3902     index_file_name_arg= log_name;    // Use same basename for index file
3903     opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
3904   }
3905   fn_format(index_file_name, index_file_name_arg, mysql_data_home,
3906             ".index", opt);
3907 
3908   if (set_crash_safe_index_file_name(index_file_name_arg))
3909   {
3910     sql_print_error("MYSQL_BIN_LOG::set_crash_safe_index_file_name failed.");
3911     error= true;
3912     goto end;
3913   }
3914 
3915   /*
3916     We need move crash_safe_index_file to index_file if the index_file
3917     does not exist and crash_safe_index_file exists when mysqld server
3918     restarts.
3919   */
3920   if (my_access(index_file_name, F_OK) &&
3921       !my_access(crash_safe_index_file_name, F_OK) &&
3922       my_rename(crash_safe_index_file_name, index_file_name, MYF(MY_WME)))
3923   {
3924     sql_print_error("MYSQL_BIN_LOG::open_index_file failed to "
3925                     "move crash_safe_index_file to index file.");
3926     error= true;
3927     goto end;
3928   }
3929 
3930   if ((index_file_nr= mysql_file_open(m_key_file_log_index,
3931                                       index_file_name,
3932                                       O_RDWR | O_CREAT | O_BINARY,
3933                                       MYF(MY_WME))) < 0 ||
3934        mysql_file_sync(index_file_nr, MYF(MY_WME)) ||
3935        init_io_cache_ext(&index_file, index_file_nr,
3936                          IO_SIZE, READ_CACHE,
3937                          mysql_file_seek(index_file_nr, 0L, MY_SEEK_END, MYF(0)),
3938                                          0, MYF(MY_WME | MY_WAIT_IF_FULL),
3939                          m_key_file_log_index_cache) ||
3940       DBUG_EVALUATE_IF("fault_injection_openning_index", 1, 0))
3941   {
3942     /*
3943       TODO: all operations creating/deleting the index file or a log, should
3944       call my_sync_dir() or my_sync_dir_by_file() to be durable.
3945       TODO: file creation should be done with mysql_file_create()
3946       not mysql_file_open().
3947     */
3948     if (index_file_nr >= 0)
3949       mysql_file_close(index_file_nr, MYF(0));
3950     error= true;
3951     goto end;
3952   }
3953 
3954 #ifdef HAVE_REPLICATION
3955   /*
3956     Sync the index by purging any binary log file that is not registered.
3957     In other words, either purge binary log files that were removed from
3958     the index but not purged from the file system due to a crash or purge
3959     any binary log file that was created but not register in the index
3960     due to a crash.
3961   */
3962 
3963   if (set_purge_index_file_name(index_file_name_arg) ||
3964       open_purge_index_file(FALSE) ||
3965       purge_index_entry(NULL, NULL, false) ||
3966       close_purge_index_file() ||
3967       DBUG_EVALUATE_IF("fault_injection_recovering_index", 1, 0))
3968   {
3969     sql_print_error("MYSQL_BIN_LOG::open_index_file failed to sync the index "
3970                     "file.");
3971     error= true;
3972     goto end;
3973   }
3974 #endif
3975 
3976 end:
3977   if (need_lock_index)
3978     mysql_mutex_unlock(&LOCK_index);
3979   return error;
3980 }
3981 
3982 /**
3983   Add the GTIDs from the given relaylog file and also
3984   update the IO thread transaction parser.
3985 
3986   @param filename Relaylog file to read from.
3987   @param retrieved_set Gtid_set to store the GTIDs found on the relaylog file.
3988   @param verify_checksum Set to true to verify event checksums.
3989   @param trx_parser The transaction boundary parser to be used in order to
3990   only add a GTID to the gtid_set after ensuring the transaction is fully
3991   stored on the relay log.
3992   @param gtid_partial_trx The gtid of the last incomplete transaction
3993   found in the relay log.
3994 
3995   @retval false The file was successfully read and all GTIDs from
3996   Previous_gtids and Gtid_log_event from complete transactions were added to
3997   the retrieved_set.
3998   @retval true There was an error during the procedure.
3999 */
4000 static bool
read_gtids_and_update_trx_parser_from_relaylog(const char * filename,Gtid_set * retrieved_gtids,bool verify_checksum,Transaction_boundary_parser * trx_parser,Gtid * gtid_partial_trx)4001 read_gtids_and_update_trx_parser_from_relaylog(
4002   const char *filename,
4003   Gtid_set *retrieved_gtids,
4004   bool verify_checksum,
4005   Transaction_boundary_parser *trx_parser,
4006   Gtid *gtid_partial_trx)
4007 {
4008   DBUG_ENTER("read_gtids_and_update_trx_parser_from_relaylog");
4009   DBUG_PRINT("info", ("Opening file %s", filename));
4010 
4011   assert(retrieved_gtids != NULL);
4012   assert(trx_parser != NULL);
4013 #ifndef NDEBUG
4014   unsigned long event_counter= 0;
4015 #endif
4016 
4017   /*
4018     Create a Format_description_log_event that is used to read the
4019     first event of the log.
4020   */
4021   Format_description_log_event fd_ev(BINLOG_VERSION), *fd_ev_p= &fd_ev;
4022   if (!fd_ev.is_valid())
4023     DBUG_RETURN(true);
4024 
4025   File file;
4026   IO_CACHE log;
4027 
4028   const char *errmsg= NULL;
4029   if ((file= open_binlog_file(&log, filename, &errmsg)) < 0)
4030   {
4031     sql_print_error("%s", errmsg);
4032     /*
4033       As read_gtids_from_binlog() will not throw error on truncated
4034       relaylog files, we should do the same here in order to keep the
4035       current behavior.
4036     */
4037     DBUG_RETURN(false);
4038   }
4039 
4040   /*
4041     Seek for Previous_gtids_log_event and Gtid_log_event events to
4042     gather information what has been processed so far.
4043   */
4044   my_b_seek(&log, BIN_LOG_HEADER_SIZE);
4045   Log_event *ev= NULL;
4046   bool error= false;
4047   bool seen_prev_gtids= false;
4048   ulong data_len= 0;
4049 
4050   while (!error &&
4051          (ev= Log_event::read_log_event(&log, 0, fd_ev_p, verify_checksum)) !=
4052          NULL)
4053   {
4054     DBUG_PRINT("info", ("Read event of type %s", ev->get_type_str()));
4055 #ifndef NDEBUG
4056     event_counter++;
4057 #endif
4058 
4059     data_len= uint4korr(ev->temp_buf + EVENT_LEN_OFFSET);
4060     if (trx_parser->feed_event(ev->temp_buf, data_len, fd_ev_p, false))
4061     {
4062       /*
4063         The transaction boundary parser found an error while parsing a
4064         sequence of events from the relaylog. As we don't know if the
4065         parsing has started from a reliable point (it might started in
4066         a relay log file that begins with the rest of a transaction
4067         that started in a previous relay log file), it is better to do
4068         nothing in this case. The boundary parser will fix itself once
4069         finding an event that represent a transaction boundary.
4070 
4071         Suppose the following relaylog:
4072 
4073          rl-bin.000011 | rl-bin.000012 | rl-bin.000013 | rl-bin-000014
4074         ---------------+---------------+---------------+---------------
4075          PREV_GTIDS    | PREV_GTIDS    | PREV_GTIDS    | PREV_GTIDS
4076          (empty)       | (UUID:1-2)    | (UUID:1-2)    | (UUID:1-2)
4077         ---------------+---------------+---------------+---------------
4078          XID           | QUERY(INSERT) | QUERY(INSERT) | XID
4079         ---------------+---------------+---------------+---------------
4080          GTID(UUID:2)  |
4081         ---------------+
4082          QUERY(CREATE  |
4083          TABLE t1 ...) |
4084         ---------------+
4085          GTID(UUID:3)  |
4086         ---------------+
4087          QUERY(BEGIN)  |
4088         ---------------+
4089 
4090         As it is impossible to determine the current Retrieved_Gtid_Set by only
4091         looking to the PREVIOUS_GTIDS on the last relay log file, and scanning
4092         events on it, we tried to find a relay log file that contains at least
4093         one GTID event during the backwards search.
4094 
4095         In the example, we will find a GTID only in rl-bin.000011, as the
4096         UUID:3 transaction was spanned across 4 relay log files.
4097 
4098         The transaction spanning can be caused by "FLUSH RELAY LOGS" commands
4099         on slave while it is queuing the transaction.
4100 
4101         So, in order to correctly add UUID:3 into Retrieved_Gtid_Set, we need
4102         to parse the relay log starting on the file we found the last GTID
4103         queued to know if the transaction was fully retrieved or not.
4104 
4105         Start scanning rl-bin.000011 after resetting the transaction parser
4106         will generate an error, as XID event is only expected inside a DML,
4107         but in this case, we can ignore this error and reset the parser.
4108       */
4109       trx_parser->reset();
4110       /*
4111         We also have to discard the GTID of the partial transaction that was
4112         not finished if there is one. This is needed supposing that an
4113         incomplete transaction was replicated with a GTID.
4114 
4115         GTID(1), QUERY(BEGIN), QUERY(INSERT), ANONYMOUS_GTID, QUERY(DROP ...)
4116 
4117         In the example above, without cleaning the gtid_partial_trx,
4118         the GTID(1) would be added to the Retrieved_Gtid_Set after the
4119         QUERY(DROP ...) event.
4120 
4121         GTID(1), QUERY(BEGIN), QUERY(INSERT), GTID(2), QUERY(DROP ...)
4122 
4123         In the example above the GTID(1) will also be discarded as the
4124         GTID(1) transaction is not complete.
4125       */
4126       if (!gtid_partial_trx->is_empty())
4127       {
4128         DBUG_PRINT("info", ("Discarding Gtid(%d, %lld) as the transaction "
4129                             "wasn't complete and we found an error in the"
4130                             "transaction boundary parser.",
4131                             gtid_partial_trx->sidno,
4132                             gtid_partial_trx->gno));
4133         gtid_partial_trx->clear();
4134       }
4135     }
4136 
4137     switch (ev->get_type_code())
4138     {
4139     case binary_log::FORMAT_DESCRIPTION_EVENT:
4140       if (fd_ev_p != &fd_ev)
4141         delete fd_ev_p;
4142       fd_ev_p= (Format_description_log_event *)ev;
4143       break;
4144     case binary_log::ROTATE_EVENT:
4145       // do nothing; just accept this event and go to next
4146       break;
4147     case binary_log::PREVIOUS_GTIDS_LOG_EVENT:
4148     {
4149       seen_prev_gtids= true;
4150       // add events to sets
4151       Previous_gtids_log_event *prev_gtids_ev= (Previous_gtids_log_event *)ev;
4152       if (prev_gtids_ev->add_to_set(retrieved_gtids) != 0)
4153       {
4154         error= true;
4155         break;
4156       }
4157 #ifndef NDEBUG
4158       char* prev_buffer= prev_gtids_ev->get_str(NULL, NULL);
4159       DBUG_PRINT("info", ("Got Previous_gtids from file '%s': Gtid_set='%s'.",
4160                           filename, prev_buffer));
4161       my_free(prev_buffer);
4162 #endif
4163       break;
4164     }
4165     case binary_log::GTID_LOG_EVENT:
4166     {
4167       /* If we didn't find any PREVIOUS_GTIDS in this file */
4168       if (!seen_prev_gtids)
4169       {
4170         my_error(ER_BINLOG_LOGICAL_CORRUPTION, MYF(0), filename,
4171                  "The first global transaction identifier was read, but "
4172                  "no other information regarding identifiers existing "
4173                  "on the previous log files was found.");
4174         error= true;
4175         break;
4176       }
4177 
4178       Gtid_log_event *gtid_ev= (Gtid_log_event *)ev;
4179       rpl_sidno sidno= gtid_ev->get_sidno(retrieved_gtids->get_sid_map());
4180       if (sidno < 0)
4181       {
4182         error= true;
4183         break;
4184       }
4185       else
4186       {
4187         if (retrieved_gtids->ensure_sidno(sidno) != RETURN_STATUS_OK)
4188         {
4189           error= true;
4190           break;
4191         }
4192         else
4193         {
4194           /*
4195             As are updating the transaction boundary parser while reading
4196             GTIDs from relay log files to fill the Retrieved_Gtid_Set, we
4197             should not add the GTID here as we don't know if the transaction
4198             is complete on the relay log yet.
4199           */
4200           gtid_partial_trx->set(sidno, gtid_ev->get_gno());
4201         }
4202         DBUG_PRINT("info", ("Found Gtid in relaylog file '%s': Gtid(%d, %lld).",
4203                             filename, sidno, gtid_ev->get_gno()));
4204       }
4205       break;
4206     }
4207     case binary_log::ANONYMOUS_GTID_LOG_EVENT:
4208     default:
4209       /*
4210         If we reached the end of a transaction after storing it's GTID
4211         in gtid_partial_trx variable, it is time to add this GTID to the
4212         retrieved_gtids set because the transaction is complete and there is no
4213         need for asking this transaction again.
4214       */
4215       if (trx_parser->is_not_inside_transaction())
4216       {
4217         if (!gtid_partial_trx->is_empty())
4218         {
4219           DBUG_PRINT("info", ("Adding Gtid to Retrieved_Gtid_Set as the "
4220                               "transaction was completed at "
4221                               "relaylog file '%s': Gtid(%d, %lld).",
4222                               filename, gtid_partial_trx->sidno,
4223                               gtid_partial_trx->gno));
4224           retrieved_gtids->_add_gtid(gtid_partial_trx->sidno,
4225                                      gtid_partial_trx->gno);
4226           gtid_partial_trx->clear();
4227         }
4228       }
4229       break;
4230     }
4231     if (ev != fd_ev_p)
4232       delete ev;
4233   }
4234 
4235   if (log.error < 0)
4236   {
4237     // This is not a fatal error; the log may just be truncated.
4238     // @todo but what other errors could happen? IO error?
4239     sql_print_warning("Error reading GTIDs from relaylog: %d", log.error);
4240   }
4241 
4242   if (fd_ev_p != &fd_ev)
4243   {
4244     delete fd_ev_p;
4245     fd_ev_p= &fd_ev;
4246   }
4247 
4248   mysql_file_close(file, MYF(MY_WME));
4249   end_io_cache(&log);
4250 
4251 #ifndef NDEBUG
4252   sql_print_information("%lu events read in relaylog file '%s' for updating "
4253                         "Retrieved_Gtid_Set and/or IO thread transaction "
4254                         "parser state.",
4255                         event_counter, filename);
4256 #endif
4257 
4258   DBUG_RETURN(error);
4259 }
4260 
4261 /**
4262   Reads GTIDs from the given binlog file.
4263 
4264   @param filename File to read from.
4265   @param all_gtids If not NULL, then the GTIDs from the
4266   Previous_gtids_log_event and from all Gtid_log_events are stored in
4267   this object.
4268   @param prev_gtids If not NULL, then the GTIDs from the
4269   Previous_gtids_log_events are stored in this object.
4270   @param first_gtid If not NULL, then the first GTID information from the
4271   file will be stored in this object.
4272   @param sid_map The sid_map object to use in the rpl_sidno generation
4273   of the Gtid_log_event. If lock is needed in the sid_map, the caller
4274   must hold it.
4275   @param verify_checksum Set to true to verify event checksums.
4276 
4277   @retval GOT_GTIDS The file was successfully read and it contains
4278   both Gtid_log_events and Previous_gtids_log_events.
4279   This is only possible if either all_gtids or first_gtid are not null.
4280   @retval GOT_PREVIOUS_GTIDS The file was successfully read and it
4281   contains Previous_gtids_log_events but no Gtid_log_events.
4282   For binary logs, if no all_gtids and no first_gtid are specified,
4283   this function will be done right after reading the PREVIOUS_GTIDS
4284   regardless of the rest of the content of the binary log file.
4285   @retval NO_GTIDS The file was successfully read and it does not
4286   contain GTID events.
4287   @retval ERROR Out of memory, or IO error, or malformed event
4288   structure, or the file is malformed (e.g., contains Gtid_log_events
4289   but no Previous_gtids_log_event).
4290   @retval TRUNCATED The file was truncated before the end of the
4291   first Previous_gtids_log_event.
4292 */
4293 enum enum_read_gtids_from_binlog_status
4294 { GOT_GTIDS, GOT_PREVIOUS_GTIDS, NO_GTIDS, ERROR, TRUNCATED };
4295 static enum_read_gtids_from_binlog_status
read_gtids_from_binlog(const char * filename,Gtid_set * all_gtids,Gtid_set * prev_gtids,Gtid * first_gtid,Sid_map * sid_map,bool verify_checksum,bool is_relay_log)4296 read_gtids_from_binlog(const char *filename, Gtid_set *all_gtids,
4297                        Gtid_set *prev_gtids, Gtid *first_gtid,
4298                        Sid_map* sid_map,
4299                        bool verify_checksum, bool is_relay_log)
4300 {
4301   DBUG_ENTER("read_gtids_from_binlog");
4302   DBUG_PRINT("info", ("Opening file %s", filename));
4303 
4304   /*
4305     Create a Format_description_log_event that is used to read the
4306     first event of the log.
4307   */
4308   Format_description_log_event fd_ev(BINLOG_VERSION), *fd_ev_p= &fd_ev;
4309   if (!fd_ev.is_valid())
4310     DBUG_RETURN(ERROR);
4311 
4312   File file;
4313   IO_CACHE log;
4314 
4315 #ifndef NDEBUG
4316   unsigned long event_counter= 0;
4317   /*
4318     We assert here that both all_gtids and prev_gtids, if specified,
4319     uses the same sid_map as the one passed as a parameter. This is just
4320     to ensure that, if the sid_map needed some lock and was locked by
4321     the caller, the lock applies to all the GTID sets this function is
4322     dealing with.
4323   */
4324   if (all_gtids)
4325     assert(all_gtids->get_sid_map() == sid_map);
4326   if (prev_gtids)
4327     assert(prev_gtids->get_sid_map() == sid_map);
4328 #endif
4329 
4330   const char *errmsg= NULL;
4331   if ((file= open_binlog_file(&log, filename, &errmsg)) < 0)
4332   {
4333     sql_print_error("%s", errmsg);
4334     /*
4335       We need to revisit the recovery procedure for relay log
4336       files. Currently, it is called after this routine.
4337       /Alfranio
4338     */
4339     DBUG_RETURN(TRUNCATED);
4340   }
4341 
4342   /*
4343     Seek for Previous_gtids_log_event and Gtid_log_event events to
4344     gather information what has been processed so far.
4345   */
4346   my_b_seek(&log, BIN_LOG_HEADER_SIZE);
4347   Log_event *ev= NULL;
4348   enum_read_gtids_from_binlog_status ret= NO_GTIDS;
4349   bool done= false;
4350   bool seen_first_gtid= false;
4351   while (!done &&
4352          (ev= Log_event::read_log_event(&log, 0, fd_ev_p, verify_checksum)) !=
4353          NULL)
4354   {
4355 #ifndef NDEBUG
4356     event_counter++;
4357 #endif
4358     DBUG_PRINT("info", ("Read event of type %s", ev->get_type_str()));
4359     switch (ev->get_type_code())
4360     {
4361     case binary_log::FORMAT_DESCRIPTION_EVENT:
4362       if (fd_ev_p != &fd_ev)
4363         delete fd_ev_p;
4364       fd_ev_p= (Format_description_log_event *)ev;
4365       break;
4366     case binary_log::ROTATE_EVENT:
4367       // do nothing; just accept this event and go to next
4368       break;
4369     case binary_log::PREVIOUS_GTIDS_LOG_EVENT:
4370     {
4371       ret= GOT_PREVIOUS_GTIDS;
4372       // add events to sets
4373       Previous_gtids_log_event *prev_gtids_ev=
4374         (Previous_gtids_log_event *)ev;
4375       if (all_gtids != NULL && prev_gtids_ev->add_to_set(all_gtids) != 0)
4376         ret= ERROR, done= true;
4377       else if (prev_gtids != NULL && prev_gtids_ev->add_to_set(prev_gtids) != 0)
4378         ret= ERROR, done= true;
4379 #ifndef NDEBUG
4380       char* prev_buffer= prev_gtids_ev->get_str(NULL, NULL);
4381       DBUG_PRINT("info", ("Got Previous_gtids from file '%s': Gtid_set='%s'.",
4382                           filename, prev_buffer));
4383       my_free(prev_buffer);
4384 #endif
4385       /*
4386         If this is not a relay log, the previous_gtids were asked and no
4387         all_gtids neither first_gtid were asked, it is fine to consider the
4388         job as done.
4389       */
4390       if (!is_relay_log && prev_gtids != NULL &&
4391           all_gtids == NULL && first_gtid == NULL)
4392         done= true;
4393       DBUG_EXECUTE_IF("inject_fault_bug16502579", {
4394                       DBUG_PRINT("debug", ("PREVIOUS_GTIDS_LOG_EVENT found. "
4395                                            "Injected ret=NO_GTIDS."));
4396                       if (ret == GOT_PREVIOUS_GTIDS)
4397                       {
4398                         ret=NO_GTIDS;
4399                         done= false;
4400                       }
4401                       });
4402       break;
4403     }
4404     case binary_log::GTID_LOG_EVENT:
4405     {
4406       if (ret != GOT_GTIDS)
4407       {
4408         if (ret != GOT_PREVIOUS_GTIDS)
4409         {
4410           /*
4411             Since this routine is run on startup, there may not be a
4412             THD instance. Therefore, ER(X) cannot be used.
4413            */
4414           const char* msg_fmt= (current_thd != NULL) ?
4415                                ER(ER_BINLOG_LOGICAL_CORRUPTION) :
4416                                ER_DEFAULT(ER_BINLOG_LOGICAL_CORRUPTION);
4417           my_printf_error(ER_BINLOG_LOGICAL_CORRUPTION,
4418                           msg_fmt, MYF(0),
4419                           filename,
4420                           "The first global transaction identifier was read, but "
4421                           "no other information regarding identifiers existing "
4422                           "on the previous log files was found.");
4423           ret= ERROR, done= true;
4424           break;
4425         }
4426         else
4427           ret= GOT_GTIDS;
4428       }
4429       /*
4430         When this is a relaylog, we just check if the relay log contains at
4431         least one Gtid_log_event, so that we can distinguish the return values
4432         GOT_GTID and GOT_PREVIOUS_GTIDS. We don't need to read anything else
4433         from the relay log.
4434         When this is a binary log, if all_gtids is requested (i.e., NOT NULL),
4435         we should continue to read all gtids. If just first_gtid was requested,
4436         we will be done after storing this Gtid_log_event info on it.
4437       */
4438       if (is_relay_log)
4439       {
4440         ret= GOT_GTIDS, done= true;
4441       }
4442       else
4443       {
4444         Gtid_log_event *gtid_ev= (Gtid_log_event *)ev;
4445         rpl_sidno sidno= gtid_ev->get_sidno(sid_map);
4446         if (sidno < 0)
4447           ret= ERROR, done= true;
4448         else
4449         {
4450           if (all_gtids)
4451           {
4452             if (all_gtids->ensure_sidno(sidno) != RETURN_STATUS_OK)
4453               ret= ERROR, done= true;
4454             all_gtids->_add_gtid(sidno, gtid_ev->get_gno());
4455             DBUG_PRINT("info", ("Got Gtid from file '%s': Gtid(%d, %lld).",
4456                                 filename, sidno, gtid_ev->get_gno()));
4457           }
4458 
4459           /* If the first GTID was requested, stores it */
4460           if (first_gtid && !seen_first_gtid)
4461           {
4462             first_gtid->set(sidno, gtid_ev->get_gno());
4463             seen_first_gtid= true;
4464             /* If the first_gtid was the only thing requested, we are done */
4465             if (all_gtids == NULL)
4466               ret= GOT_GTIDS, done= true;
4467           }
4468         }
4469       }
4470       break;
4471     }
4472     case binary_log::ANONYMOUS_GTID_LOG_EVENT:
4473     {
4474       /*
4475         When this is a relaylog, we just check if it contains
4476         at least one Anonymous_gtid_log_event after initialization
4477         (FDs, Rotates and PREVIOUS_GTIDS), so that we can distinguish the
4478         return values GOT_GTID and GOT_PREVIOUS_GTIDS.
4479         We don't need to read anything else from the relay log.
4480       */
4481       if (is_relay_log)
4482       {
4483         ret= GOT_GTIDS;
4484         done= true;
4485         break;
4486       }
4487       assert(prev_gtids == NULL ? true : all_gtids != NULL ||
4488              first_gtid != NULL);
4489     }
4490     // Fall through.
4491     default:
4492       // if we found any other event type without finding a
4493       // previous_gtids_log_event, then the rest of this binlog
4494       // cannot contain gtids
4495       if (ret != GOT_GTIDS && ret != GOT_PREVIOUS_GTIDS)
4496         done= true;
4497       /*
4498         The GTIDs of the relaylog files will be handled later
4499         because of the possibility of transactions be spanned
4500         along distinct relaylog files.
4501         So, if we found an ordinary event without finding the
4502         GTID but we already found the PREVIOUS_GTIDS, this probably
4503         means that the event is from a transaction that started on
4504         previous relaylog file.
4505       */
4506       if (ret == GOT_PREVIOUS_GTIDS && is_relay_log)
4507         done= true;
4508       break;
4509     }
4510     if (ev != fd_ev_p)
4511       delete ev;
4512     DBUG_PRINT("info", ("done=%d", done));
4513   }
4514 
4515   if (log.error < 0)
4516   {
4517     // This is not a fatal error; the log may just be truncated.
4518 
4519     // @todo but what other errors could happen? IO error?
4520     sql_print_warning("Error reading GTIDs from binary log: %d", log.error);
4521   }
4522 
4523   if (fd_ev_p != &fd_ev)
4524   {
4525     delete fd_ev_p;
4526     fd_ev_p= &fd_ev;
4527   }
4528 
4529   mysql_file_close(file, MYF(MY_WME));
4530   end_io_cache(&log);
4531 
4532   if (all_gtids)
4533     all_gtids->dbug_print("all_gtids");
4534   else
4535     DBUG_PRINT("info", ("all_gtids==NULL"));
4536   if (prev_gtids)
4537     prev_gtids->dbug_print("prev_gtids");
4538   else
4539     DBUG_PRINT("info", ("prev_gtids==NULL"));
4540   if (first_gtid == NULL)
4541     DBUG_PRINT("info", ("first_gtid==NULL"));
4542   else if (first_gtid->sidno == 0)
4543     DBUG_PRINT("info", ("first_gtid.sidno==0"));
4544   else
4545     first_gtid->dbug_print(sid_map, "first_gtid");
4546 
4547   DBUG_PRINT("info", ("returning %d", ret));
4548 #ifndef NDEBUG
4549   if (!is_relay_log && prev_gtids != NULL &&
4550       all_gtids == NULL && first_gtid == NULL)
4551     sql_print_information("Read %lu events from binary log file '%s' to "
4552                           "determine the GTIDs purged from binary logs.",
4553                           event_counter, filename);
4554 #endif
4555   DBUG_RETURN(ret);
4556 }
4557 
find_first_log_not_in_gtid_set(char * binlog_file_name,const Gtid_set * gtid_set,Gtid * first_gtid,const char ** errmsg)4558 bool MYSQL_BIN_LOG::find_first_log_not_in_gtid_set(char *binlog_file_name,
4559                                                    const Gtid_set *gtid_set,
4560                                                    Gtid *first_gtid,
4561                                                    const char **errmsg)
4562 {
4563   DBUG_ENTER("MYSQL_BIN_LOG::gtid_read_start_binlog");
4564   /*
4565     Gather the set of files to be accessed.
4566   */
4567   list<string> filename_list;
4568   LOG_INFO linfo;
4569   int error;
4570 
4571   list<string>::reverse_iterator rit;
4572   Gtid_set binlog_previous_gtid_set(gtid_set->get_sid_map());
4573 
4574   mysql_mutex_lock(&LOCK_index);
4575   for (error= find_log_pos(&linfo, NULL, false/*need_lock_index=false*/);
4576        !error; error= find_next_log(&linfo, false/*need_lock_index=false*/))
4577   {
4578     DBUG_PRINT("info", ("read log filename '%s'", linfo.log_file_name));
4579     filename_list.push_back(string(linfo.log_file_name));
4580   }
4581   mysql_mutex_unlock(&LOCK_index);
4582   if (error != LOG_INFO_EOF)
4583   {
4584     *errmsg= "Failed to read the binary log index file while "
4585       "looking for the oldest binary log that contains any GTID "
4586       "that is not in the given gtid set";
4587     error= -1;
4588     goto end;
4589   }
4590 
4591   if (filename_list.empty())
4592   {
4593     *errmsg= "Could not find first log file name in binary log index file "
4594       "while looking for the oldest binary log that contains any GTID "
4595       "that is not in the given gtid set";
4596     error= -2;
4597     goto end;
4598   }
4599 
4600   /*
4601     Iterate over all the binary logs in reverse order, and read only
4602     the Previous_gtids_log_event, to find the first one, that is the
4603     subset of the given gtid set. Since every binary log begins with
4604     a Previous_gtids_log_event, that contains all GTIDs in all
4605     previous binary logs.
4606     We also ask for the first GTID in the binary log to know if we
4607     should send the FD event with the "created" field cleared or not.
4608   */
4609   DBUG_PRINT("info", ("Iterating backwards through binary logs, and reading "
4610                       "only the Previous_gtids_log_event, to find the first "
4611                       "one, that is the subset of the given gtid set."));
4612   rit= filename_list.rbegin();
4613   error= 0;
4614   while (rit != filename_list.rend())
4615   {
4616     binlog_previous_gtid_set.clear();
4617     const char *filename= rit->c_str();
4618     DBUG_PRINT("info", ("Read Previous_gtids_log_event from filename='%s'",
4619                         filename));
4620     switch (read_gtids_from_binlog(filename, NULL, &binlog_previous_gtid_set,
4621                                    first_gtid,
4622                                    binlog_previous_gtid_set.get_sid_map(),
4623                                    opt_master_verify_checksum, is_relay_log))
4624     {
4625     case ERROR:
4626       *errmsg= "Error reading header of binary log while looking for "
4627         "the oldest binary log that contains any GTID that is not in "
4628         "the given gtid set";
4629       error= -3;
4630       goto end;
4631     case NO_GTIDS:
4632       *errmsg= "Found old binary log without GTIDs while looking for "
4633         "the oldest binary log that contains any GTID that is not in "
4634         "the given gtid set";
4635       error= -4;
4636       goto end;
4637     case GOT_GTIDS:
4638     case GOT_PREVIOUS_GTIDS:
4639       if (binlog_previous_gtid_set.is_subset(gtid_set))
4640       {
4641         strcpy(binlog_file_name, filename);
4642         /*
4643           Verify that the selected binlog is not the first binlog,
4644         */
4645         DBUG_EXECUTE_IF("slave_reconnect_with_gtid_set_executed",
4646                         assert(strcmp(filename_list.begin()->c_str(),
4647                                       binlog_file_name) != 0););
4648         goto end;
4649       }
4650     case TRUNCATED:
4651       break;
4652     }
4653 
4654     rit++;
4655   }
4656 
4657   if (rit == filename_list.rend())
4658   {
4659     report_missing_gtids(&binlog_previous_gtid_set, gtid_set, errmsg);
4660     error= -5;
4661   }
4662 
4663 end:
4664   if (error)
4665     DBUG_PRINT("error", ("'%s'", *errmsg));
4666   filename_list.clear();
4667   DBUG_PRINT("info", ("returning %d", error));
4668   DBUG_RETURN(error != 0 ? true : false);
4669 }
4670 
init_gtid_sets(Gtid_set * all_gtids,Gtid_set * lost_gtids,bool verify_checksum,bool need_lock,Transaction_boundary_parser * trx_parser,Gtid * gtid_partial_trx,bool is_server_starting)4671 bool MYSQL_BIN_LOG::init_gtid_sets(Gtid_set *all_gtids, Gtid_set *lost_gtids,
4672                                    bool verify_checksum, bool need_lock,
4673                                    Transaction_boundary_parser *trx_parser,
4674                                    Gtid *gtid_partial_trx,
4675                                    bool is_server_starting)
4676 {
4677   DBUG_ENTER("MYSQL_BIN_LOG::init_gtid_sets");
4678   DBUG_PRINT("info", ("lost_gtids=%p; so we are recovering a %s log; is_relay_log=%d",
4679                       lost_gtids, lost_gtids == NULL ? "relay" : "binary",
4680                       is_relay_log));
4681 
4682   /*
4683     If this is a relay log, we must have the IO thread Master_info trx_parser
4684     in order to correctly feed it with relay log events.
4685   */
4686 #ifndef NDEBUG
4687   if (is_relay_log)
4688   {
4689     assert(trx_parser != NULL);
4690     assert(lost_gtids == NULL);
4691   }
4692 #endif
4693 
4694   /*
4695     Acquires the necessary locks to ensure that logs are not either
4696     removed or updated when we are reading from it.
4697   */
4698   if (need_lock)
4699   {
4700     // We don't need LOCK_log if we are only going to read the initial
4701     // Prevoius_gtids_log_event and ignore the Gtid_log_events.
4702     if (all_gtids != NULL)
4703       mysql_mutex_lock(&LOCK_log);
4704     mysql_mutex_lock(&LOCK_index);
4705     global_sid_lock->wrlock();
4706   }
4707   else
4708   {
4709     if (all_gtids != NULL)
4710       mysql_mutex_assert_owner(&LOCK_log);
4711     mysql_mutex_assert_owner(&LOCK_index);
4712     global_sid_lock->assert_some_wrlock();
4713   }
4714 
4715   // Gather the set of files to be accessed.
4716   list<string> filename_list;
4717   LOG_INFO linfo;
4718   int error;
4719 
4720   list<string>::iterator it;
4721   list<string>::reverse_iterator rit;
4722   bool reached_first_file= false;
4723 
4724   /* Initialize the sid_map to be used in read_gtids_from_binlog */
4725   Sid_map *sid_map= NULL;
4726   if (all_gtids)
4727     sid_map= all_gtids->get_sid_map();
4728   else if (lost_gtids)
4729     sid_map= lost_gtids->get_sid_map();
4730 
4731   for (error= find_log_pos(&linfo, NULL, false/*need_lock_index=false*/); !error;
4732        error= find_next_log(&linfo, false/*need_lock_index=false*/))
4733   {
4734     DBUG_PRINT("info", ("read log filename '%s'", linfo.log_file_name));
4735     filename_list.push_back(string(linfo.log_file_name));
4736   }
4737   if (error != LOG_INFO_EOF)
4738   {
4739     DBUG_PRINT("error", ("Error reading %s index",
4740                          is_relay_log ? "relaylog" : "binlog"));
4741     goto end;
4742   }
4743   /*
4744     On server starting, one new empty binlog file is created and
4745     its file name is put into index file before initializing
4746     GLOBAL.GTID_EXECUTED AND GLOBAL.GTID_PURGED, it is not the
4747     last binlog file before the server restarts, so we remove
4748     its file name from filename_list.
4749   */
4750   if (is_server_starting && !is_relay_log && !filename_list.empty())
4751     filename_list.pop_back();
4752 
4753   error= 0;
4754 
4755   if (all_gtids != NULL)
4756   {
4757     DBUG_PRINT("info", ("Iterating backwards through %s logs, "
4758                         "looking for the last %s log that contains "
4759                         "a Previous_gtids_log_event.",
4760                         is_relay_log ? "relay" : "binary",
4761                         is_relay_log ? "relay" : "binary"));
4762     // Iterate over all files in reverse order until we find one that
4763     // contains a Previous_gtids_log_event.
4764     rit= filename_list.rbegin();
4765     bool can_stop_reading= false;
4766     reached_first_file= (rit == filename_list.rend());
4767     DBUG_PRINT("info", ("filename='%s' reached_first_file=%d",
4768                         reached_first_file ? "" : rit->c_str(),
4769                         reached_first_file));
4770     while (!can_stop_reading && !reached_first_file)
4771     {
4772       const char *filename= rit->c_str();
4773       assert(rit != filename_list.rend());
4774       rit++;
4775       reached_first_file= (rit == filename_list.rend());
4776       DBUG_PRINT("info", ("filename='%s' can_stop_reading=%d "
4777                           "reached_first_file=%d, ",
4778                           filename, can_stop_reading, reached_first_file));
4779       switch (read_gtids_from_binlog(filename, all_gtids,
4780                                      reached_first_file ? lost_gtids : NULL,
4781                                      NULL/* first_gtid */,
4782                                      sid_map, verify_checksum, is_relay_log))
4783       {
4784         case ERROR:
4785         {
4786           error= 1;
4787           goto end;
4788         }
4789         case GOT_GTIDS:
4790         {
4791           can_stop_reading= true;
4792           break;
4793         }
4794         case GOT_PREVIOUS_GTIDS:
4795         {
4796           /*
4797             If this is a binlog file, it is enough to have GOT_PREVIOUS_GTIDS.
4798             If this is a relaylog file, we need to find at least one GTID to
4799             start parsing the relay log to add GTID of transactions that might
4800             have spanned in distinct relaylog files.
4801           */
4802           if (!is_relay_log)
4803             can_stop_reading= true;
4804           break;
4805         }
4806         case NO_GTIDS:
4807         {
4808           /*
4809             Mysql server iterates backwards through binary logs, looking for
4810             the last binary log that contains a Previous_gtids_log_event for
4811             gathering the set of gtid_executed on server start. This may take
4812             very long time if it has many binary logs and almost all of them
4813             are out of filesystem cache. So if the binlog_gtid_simple_recovery
4814             is enabled, and the last binary log does not contain any GTID
4815             event, do not read any more binary logs, GLOBAL.GTID_EXECUTED and
4816             GLOBAL.GTID_PURGED should be empty in the case.
4817           */
4818           if (binlog_gtid_simple_recovery && is_server_starting &&
4819               !is_relay_log)
4820           {
4821             assert(all_gtids->is_empty());
4822             assert(lost_gtids->is_empty());
4823             goto end;
4824           }
4825           /*FALLTHROUGH*/
4826         }
4827         case TRUNCATED:
4828         {
4829           break;
4830         }
4831       }
4832     }
4833 
4834     /*
4835       If we use GTIDs and have partial transactions on the relay log,
4836       must check if it ends on next relay log files.
4837       We also need to feed the boundary parser with the rest of the
4838       relay log to put it in the correct state before receiving new
4839       events from the master in the case of GTID auto positioning be
4840       disabled.
4841     */
4842     if (is_relay_log && filename_list.size() > 0)
4843     {
4844       /*
4845         Suppose the following relaylog:
4846 
4847          rl-bin.000001 | rl-bin.000002 | rl-bin.000003 | rl-bin-000004
4848         ---------------+---------------+---------------+---------------
4849          PREV_GTIDS    | PREV_GTIDS    | PREV_GTIDS    | PREV_GTIDS
4850          (empty)       | (UUID:1)      | (UUID:1)      | (UUID:1)
4851         ---------------+---------------+---------------+---------------
4852          GTID(UUID:1)  | QUERY(INSERT) | QUERY(INSERT) | XID
4853         ---------------+---------------+---------------+---------------
4854          QUERY(CREATE  |
4855          TABLE t1 ...) |
4856         ---------------+
4857          GTID(UUID:2)  |
4858         ---------------+
4859          QUERY(BEGIN)  |
4860         ---------------+
4861 
4862         As it is impossible to determine the current Retrieved_Gtid_Set by only
4863         looking to the PREVIOUS_GTIDS on the last relay log file, and scanning
4864         events on it, we tried to find a relay log file that contains at least
4865         one GTID event during the backwards search.
4866 
4867         In the example, we will find a GTID only in rl-bin.000001, as the
4868         UUID:2 transaction was spanned across 4 relay log files.
4869 
4870         The transaction spanning can be caused by "FLUSH RELAY LOGS" commands
4871         on slave while it is queuing the transaction.
4872 
4873         So, in order to correctly add UUID:2 into Retrieved_Gtid_Set, we need
4874         to parse the relay log starting on the file we found the last GTID
4875         queued to know if the transaction was fully retrieved or not.
4876       */
4877 
4878       /*
4879         Adjust the reverse iterator to point to the relaylog file we
4880         need to start parsing, as it was incremented after generating
4881         the relay log file name.
4882       */
4883       assert(rit != filename_list.rbegin());
4884       rit--;
4885       assert(rit != filename_list.rend());
4886       /* Reset the transaction parser before feeding it with events */
4887       trx_parser->reset();
4888       gtid_partial_trx->clear();
4889 
4890       DBUG_PRINT("info", ("Iterating forwards through relay logs, "
4891                           "updating the Retrieved_Gtid_Set and updating "
4892                           "IO thread trx parser before start."));
4893       for (it= find(filename_list.begin(), filename_list.end(), *rit);
4894            it != filename_list.end(); it++)
4895       {
4896         const char *filename= it->c_str();
4897         DBUG_PRINT("info", ("filename='%s'", filename));
4898         if (read_gtids_and_update_trx_parser_from_relaylog(filename, all_gtids,
4899                                                            true, trx_parser,
4900                                                            gtid_partial_trx))
4901         {
4902           error= 1;
4903           goto end;
4904         }
4905       }
4906     }
4907   }
4908   if (lost_gtids != NULL && !reached_first_file)
4909   {
4910     /*
4911       This branch is only reacheable by a binary log. The relay log
4912       don't need to get lost_gtids information.
4913 
4914       A 5.6 server sets GTID_PURGED by rotating the binary log.
4915 
4916       A 5.6 server that had recently enabled GTIDs and set GTID_PURGED
4917       would have a sequence of binary logs like:
4918 
4919       master-bin.N  : No PREVIOUS_GTIDS (GTID wasn't enabled)
4920       master-bin.N+1: Has an empty PREVIOUS_GTIDS and a ROTATE
4921                       (GTID was enabled on startup)
4922       master-bin.N+2: Has a PREVIOUS_GTIDS with the content set by a
4923                       SET @@GLOBAL.GTID_PURGED + has GTIDs of some
4924                       transactions.
4925 
4926       If this 5.6 server be upgraded to 5.7 keeping its binary log files,
4927       this routine will have to find the first binary log that contains a
4928       PREVIOUS_GTIDS + a GTID event to ensure that the content of the
4929       GTID_PURGED will be correctly set (assuming binlog_gtid_simple_recovery
4930       is not enabled).
4931     */
4932     DBUG_PRINT("info", ("Iterating forwards through binary logs, looking for "
4933                         "the first binary log that contains both a "
4934                         "Previous_gtids_log_event and a Gtid_log_event."));
4935     assert(!is_relay_log);
4936     for (it= filename_list.begin(); it != filename_list.end(); it++)
4937     {
4938       /*
4939         We should pass a first_gtid to read_gtids_from_binlog when
4940         binlog_gtid_simple_recovery is disabled, or else it will return
4941         right after reading the PREVIOUS_GTIDS event to avoid stall on
4942         reading the whole binary log.
4943       */
4944       Gtid first_gtid= {0, 0};
4945       const char *filename= it->c_str();
4946       DBUG_PRINT("info", ("filename='%s'", filename));
4947       switch (read_gtids_from_binlog(filename, NULL, lost_gtids,
4948                                      binlog_gtid_simple_recovery ? NULL :
4949                                                                    &first_gtid,
4950                                      sid_map, verify_checksum, is_relay_log))
4951       {
4952         case ERROR:
4953         {
4954           error= 1;
4955           /*FALLTHROUGH*/
4956         }
4957         case GOT_GTIDS:
4958         {
4959           goto end;
4960         }
4961         case NO_GTIDS:
4962         case GOT_PREVIOUS_GTIDS:
4963         {
4964           /*
4965             Mysql server iterates forwards through binary logs, looking for
4966             the first binary log that contains both Previous_gtids_log_event
4967             and gtid_log_event for gathering the set of gtid_purged on server
4968             start. It also iterates forwards through binary logs, looking for
4969             the first binary log that contains both Previous_gtids_log_event
4970             and gtid_log_event for gathering the set of gtid_purged when
4971             purging binary logs. This may take very long time if it has many
4972             binary logs and almost all of them are out of filesystem cache.
4973             So if the binlog_gtid_simple_recovery is enabled, we just
4974             initialize GLOBAL.GTID_PURGED from the first binary log, do not
4975             read any more binary logs.
4976           */
4977           if (binlog_gtid_simple_recovery)
4978             goto end;
4979           /*FALLTHROUGH*/
4980         }
4981         case TRUNCATED:
4982         {
4983           break;
4984         }
4985       }
4986     }
4987   }
4988 end:
4989   if (all_gtids)
4990     all_gtids->dbug_print("all_gtids");
4991   if (lost_gtids)
4992     lost_gtids->dbug_print("lost_gtids");
4993   if (need_lock)
4994   {
4995     global_sid_lock->unlock();
4996     mysql_mutex_unlock(&LOCK_index);
4997     if (all_gtids != NULL)
4998       mysql_mutex_unlock(&LOCK_log);
4999   }
5000   filename_list.clear();
5001   DBUG_PRINT("info", ("returning %d", error));
5002   DBUG_RETURN(error != 0 ? true : false);
5003 }
5004 
5005 
5006 /**
5007   Open a (new) binlog file.
5008 
5009   - Open the log file and the index file. Register the new
5010   file name in it
5011   - When calling this when the file is in use, you must have a locks
5012   on LOCK_log and LOCK_index.
5013 
5014   @retval
5015     0	ok
5016   @retval
5017     1	error
5018 */
5019 
open_binlog(const char * log_name,const char * new_name,ulong max_size_arg,bool null_created_arg,bool need_lock_index,bool need_sid_lock,Format_description_log_event * extra_description_event)5020 bool MYSQL_BIN_LOG::open_binlog(const char *log_name,
5021                                 const char *new_name,
5022                                 ulong max_size_arg,
5023                                 bool null_created_arg,
5024                                 bool need_lock_index,
5025                                 bool need_sid_lock,
5026                                 Format_description_log_event *extra_description_event)
5027 {
5028   // lock_index must be acquired *before* sid_lock.
5029   assert(need_sid_lock || !need_lock_index);
5030   DBUG_ENTER("MYSQL_BIN_LOG::open_binlog(const char *, ...)");
5031   DBUG_PRINT("enter",("base filename: %s", log_name));
5032 
5033   mysql_mutex_assert_owner(get_log_lock());
5034 
5035   if (init_and_set_log_file_name(log_name, new_name))
5036   {
5037     sql_print_error("MYSQL_BIN_LOG::open failed to generate new file name.");
5038     DBUG_RETURN(1);
5039   }
5040 
5041   DBUG_PRINT("info", ("generated filename: %s", log_file_name));
5042 
5043   DEBUG_SYNC(current_thd, "after_log_file_name_initialized");
5044 
5045 #ifdef HAVE_REPLICATION
5046   if (open_purge_index_file(TRUE) ||
5047       register_create_index_entry(log_file_name) ||
5048       sync_purge_index_file() ||
5049       DBUG_EVALUATE_IF("fault_injection_registering_index", 1, 0))
5050   {
5051     /**
5052       @todo: although this was introduced to appease valgrind
5053       when injecting emulated faults using fault_injection_registering_index
5054       it may be good to consider what actually happens when
5055       open_purge_index_file succeeds but register or sync fails.
5056 
5057       Perhaps we might need the code below in MYSQL_BIN_LOG::cleanup
5058       for "real life" purposes as well?
5059     */
5060     DBUG_EXECUTE_IF("fault_injection_registering_index", {
5061       if (my_b_inited(&purge_index_file))
5062       {
5063         end_io_cache(&purge_index_file);
5064         my_close(purge_index_file.file, MYF(0));
5065       }
5066     });
5067 
5068     sql_print_error("MYSQL_BIN_LOG::open failed to sync the index file.");
5069     DBUG_RETURN(1);
5070   }
5071   DBUG_EXECUTE_IF("crash_create_non_critical_before_update_index", DBUG_SUICIDE(););
5072 #endif
5073 
5074   write_error= 0;
5075 
5076   /* open the main log file */
5077   if (open(
5078 #ifdef HAVE_PSI_INTERFACE
5079                       m_key_file_log,
5080 #endif
5081                       log_name, new_name))
5082   {
5083 #ifdef HAVE_REPLICATION
5084     close_purge_index_file();
5085 #endif
5086     DBUG_RETURN(1);                            /* all warnings issued */
5087   }
5088 
5089   max_size= max_size_arg;
5090 
5091   open_count++;
5092 
5093   bool write_file_name_to_index_file=0;
5094 
5095   /* This must be before goto err. */
5096 #ifndef NDEBUG
5097   binary_log_debug::debug_pretend_version_50034_in_binlog=
5098     DBUG_EVALUATE_IF("pretend_version_50034_in_binlog", true, false);
5099 #endif
5100   Format_description_log_event s(BINLOG_VERSION);
5101 
5102   if (!my_b_filelength(&log_file))
5103   {
5104     /*
5105       The binary log file was empty (probably newly created)
5106       This is the normal case and happens when the user doesn't specify
5107       an extension for the binary log files.
5108       In this case we write a standard header to it.
5109     */
5110     if (my_b_safe_write(&log_file, (uchar*) BINLOG_MAGIC,
5111                         BIN_LOG_HEADER_SIZE))
5112       goto err;
5113     bytes_written+= BIN_LOG_HEADER_SIZE;
5114     write_file_name_to_index_file= 1;
5115   }
5116 
5117   /*
5118     don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
5119     as we won't be able to reset it later
5120   */
5121   if (io_cache_type == WRITE_CACHE)
5122   {
5123     s.common_header->flags|= LOG_EVENT_BINLOG_IN_USE_F;
5124   }
5125 
5126   if (is_relay_log)
5127   {
5128     /* relay-log */
5129     if (relay_log_checksum_alg == binary_log::BINLOG_CHECKSUM_ALG_UNDEF)
5130     {
5131       /* inherit master's A descriptor if one has been received */
5132       if (opt_slave_sql_verify_checksum == 0)
5133         /* otherwise use slave's local preference of RL events verification */
5134         relay_log_checksum_alg= binary_log::BINLOG_CHECKSUM_ALG_OFF;
5135       else
5136         relay_log_checksum_alg= static_cast<enum_binlog_checksum_alg>
5137                                 (binlog_checksum_options);
5138     }
5139     s.common_footer->checksum_alg= relay_log_checksum_alg;
5140   }
5141   else
5142     /* binlog */
5143     s.common_footer->checksum_alg= static_cast<enum_binlog_checksum_alg>
5144                                      (binlog_checksum_options);
5145 
5146   assert((s.common_footer)->checksum_alg !=
5147          binary_log::BINLOG_CHECKSUM_ALG_UNDEF);
5148   if (!s.is_valid())
5149     goto err;
5150   s.dont_set_created= null_created_arg;
5151   /* Set LOG_EVENT_RELAY_LOG_F flag for relay log's FD */
5152   if (is_relay_log)
5153     s.set_relay_log_event();
5154   if (s.write(&log_file))
5155     goto err;
5156   bytes_written+= s.common_header->data_written;
5157   /*
5158     We need to revisit this code and improve it.
5159     See further comments in the mysqld.
5160     /Alfranio
5161   */
5162   if (current_thd)
5163   {
5164     Gtid_set logged_gtids_binlog(global_sid_map, global_sid_lock);
5165     Gtid_set* previous_logged_gtids;
5166 
5167     if (is_relay_log)
5168       previous_logged_gtids= previous_gtid_set_relaylog;
5169     else
5170       previous_logged_gtids= &logged_gtids_binlog;
5171 
5172     if (need_sid_lock)
5173       global_sid_lock->wrlock();
5174     else
5175       global_sid_lock->assert_some_wrlock();
5176 
5177     if (!is_relay_log)
5178     {
5179       const Gtid_set *executed_gtids= gtid_state->get_executed_gtids();
5180       const Gtid_set *gtids_only_in_table=
5181         gtid_state->get_gtids_only_in_table();
5182       /* logged_gtids_binlog= executed_gtids - gtids_only_in_table */
5183       if (logged_gtids_binlog.add_gtid_set(executed_gtids) !=
5184           RETURN_STATUS_OK)
5185       {
5186         if (need_sid_lock)
5187           global_sid_lock->unlock();
5188         goto err;
5189       }
5190       logged_gtids_binlog.remove_gtid_set(gtids_only_in_table);
5191     }
5192     DBUG_PRINT("info",("Generating PREVIOUS_GTIDS for %s file.",
5193                        is_relay_log ? "relaylog" : "binlog"));
5194     Previous_gtids_log_event prev_gtids_ev(previous_logged_gtids);
5195     if (is_relay_log)
5196       prev_gtids_ev.set_relay_log_event();
5197     if (need_sid_lock)
5198       global_sid_lock->unlock();
5199     prev_gtids_ev.common_footer->checksum_alg=
5200                                    (s.common_footer)->checksum_alg;
5201     if (prev_gtids_ev.write(&log_file))
5202       goto err;
5203     bytes_written+= prev_gtids_ev.common_header->data_written;
5204   }
5205   else // !(current_thd)
5206   {
5207     /*
5208       If the slave was configured before server restart, the server will
5209       generate a new relay log file without having current_thd, but this
5210       new relay log file must have a PREVIOUS_GTIDS event as we now
5211       generate the PREVIOUS_GTIDS event always.
5212 
5213       This is only needed for relay log files because the server will add
5214       the PREVIOUS_GTIDS of binary logs (when current_thd==NULL) after
5215       server's GTID initialization.
5216 
5217       During server's startup at mysqld_main(), from the binary/relay log
5218       initialization point of view, it will:
5219       1) Call init_server_components() that will generate a new binary log
5220          file but won't write the PREVIOUS_GTIDS event yet;
5221       2) Initialize server's GTIDs;
5222       3) Write the binary log PREVIOUS_GTIDS;
5223       4) Call init_slave() in where the new relay log file will be created
5224          after initializing relay log's Retrieved_Gtid_Set;
5225     */
5226     if (is_relay_log)
5227     {
5228       if (need_sid_lock)
5229         global_sid_lock->wrlock();
5230       else
5231         global_sid_lock->assert_some_wrlock();
5232 
5233       DBUG_PRINT("info",("Generating PREVIOUS_GTIDS for relaylog file."));
5234       Previous_gtids_log_event prev_gtids_ev(previous_gtid_set_relaylog);
5235       prev_gtids_ev.set_relay_log_event();
5236 
5237       if (need_sid_lock)
5238         global_sid_lock->unlock();
5239 
5240       prev_gtids_ev.common_footer->checksum_alg=
5241                                    (s.common_footer)->checksum_alg;
5242       if (prev_gtids_ev.write(&log_file))
5243         goto err;
5244       bytes_written+= prev_gtids_ev.common_header->data_written;
5245     }
5246   }
5247   if (extra_description_event &&
5248       extra_description_event->binlog_version>=4)
5249   {
5250     /*
5251       This is a relay log written to by the I/O slave thread.
5252       Write the event so that others can later know the format of this relay
5253       log.
5254       Note that this event is very close to the original event from the
5255       master (it has binlog version of the master, event types of the
5256       master), so this is suitable to parse the next relay log's event. It
5257       has been produced by
5258       Format_description_log_event::Format_description_log_event(char* buf,).
5259       Why don't we want to write the mi_description_event if this
5260       event is for format<4 (3.23 or 4.x): this is because in that case, the
5261       mi_description_event describes the data received from the
5262       master, but not the data written to the relay log (*conversion*),
5263       which is in format 4 (slave's).
5264     */
5265     /*
5266       Set 'created' to 0, so that in next relay logs this event does not
5267       trigger cleaning actions on the slave in
5268       Format_description_log_event::apply_event_impl().
5269     */
5270     extra_description_event->created= 0;
5271     /* Don't set log_pos in event header */
5272     extra_description_event->set_artificial_event();
5273 
5274     if (extra_description_event->write(&log_file))
5275       goto err;
5276     bytes_written+= extra_description_event->common_header->data_written;
5277   }
5278   if (flush_io_cache(&log_file) ||
5279       mysql_file_sync(log_file.file, MYF(MY_WME)))
5280     goto err;
5281 
5282   if (write_file_name_to_index_file)
5283   {
5284 #ifdef HAVE_REPLICATION
5285     DBUG_EXECUTE_IF("crash_create_critical_before_update_index", DBUG_SUICIDE(););
5286 #endif
5287 
5288     assert(my_b_inited(&index_file) != 0);
5289 
5290     /*
5291       The new log file name is appended into crash safe index file after
5292       all the content of index file is copyed into the crash safe index
5293       file. Then move the crash safe index file to index file.
5294     */
5295     DBUG_EXECUTE_IF("simulate_disk_full_on_open_binlog",
5296                     {DBUG_SET("+d,simulate_no_free_space_error");});
5297     if (DBUG_EVALUATE_IF("fault_injection_updating_index", 1, 0) ||
5298         add_log_to_index((uchar*) log_file_name, strlen(log_file_name),
5299                          need_lock_index))
5300     {
5301       DBUG_EXECUTE_IF("simulate_disk_full_on_open_binlog",
5302                       {
5303                         DBUG_SET("-d,simulate_file_write_error");
5304                         DBUG_SET("-d,simulate_no_free_space_error");
5305                         DBUG_SET("-d,simulate_disk_full_on_open_binlog");
5306                       });
5307       goto err;
5308     }
5309 
5310 #ifdef HAVE_REPLICATION
5311     DBUG_EXECUTE_IF("crash_create_after_update_index", DBUG_SUICIDE(););
5312 #endif
5313   }
5314 
5315   log_state.atomic_set(LOG_OPENED);
5316   /*
5317     At every rotate memorize the last transaction counter state to use it as
5318     offset at logging the transaction logical timestamps.
5319   */
5320   mysql_mutex_lock(&LOCK_slave_trans_dep_tracker);
5321   m_dependency_tracker.rotate();
5322   mysql_mutex_unlock(&LOCK_slave_trans_dep_tracker);
5323 
5324 #ifdef HAVE_REPLICATION
5325   close_purge_index_file();
5326 #endif
5327 
5328   update_binlog_end_pos();
5329   DBUG_RETURN(0);
5330 
5331 err:
5332 #ifdef HAVE_REPLICATION
5333   if (is_inited_purge_index_file())
5334     purge_index_entry(NULL, NULL, need_lock_index);
5335   close_purge_index_file();
5336 #endif
5337   if (binlog_error_action == ABORT_SERVER)
5338   {
5339     exec_binlog_error_action_abort("Either disk is full or file system is read "
5340                                    "only while opening the binlog. Aborting the"
5341                                    " server.");
5342   }
5343   else
5344   {
5345     sql_print_error("Could not use %s for logging (error %d). "
5346                     "Turning logging off for the whole duration of the MySQL "
5347                     "server process. To turn it on again: fix the cause, "
5348                     "shutdown the MySQL server and restart it.",
5349                     (new_name) ? new_name : name, errno);
5350     close(LOG_CLOSE_INDEX, false, need_lock_index);
5351   }
5352   DBUG_RETURN(1);
5353 }
5354 
5355 
5356 /**
5357   Move crash safe index file to index file.
5358 
5359   @param need_lock_index If true, LOCK_index will be acquired;
5360   otherwise it should already be held.
5361 
5362   @retval 0 ok
5363   @retval -1 error
5364 */
move_crash_safe_index_file_to_index_file(bool need_lock_index)5365 int MYSQL_BIN_LOG::move_crash_safe_index_file_to_index_file(bool need_lock_index)
5366 {
5367   int error= 0;
5368   File fd= -1;
5369   DBUG_ENTER("MYSQL_BIN_LOG::move_crash_safe_index_file_to_index_file");
5370   int failure_trials= MYSQL_BIN_LOG::MAX_RETRIES_FOR_DELETE_RENAME_FAILURE;
5371   bool file_rename_status= false, file_delete_status= false;
5372   THD *thd= current_thd;
5373 
5374   if (need_lock_index)
5375     mysql_mutex_lock(&LOCK_index);
5376   else
5377     mysql_mutex_assert_owner(&LOCK_index);
5378 
5379   if (my_b_inited(&index_file))
5380   {
5381     end_io_cache(&index_file);
5382     if (mysql_file_close(index_file.file, MYF(0)) < 0)
5383     {
5384       error= -1;
5385       sql_print_error("While rebuilding index file %s: "
5386                       "Failed to close the index file.", index_file_name);
5387       /*
5388         Delete Crash safe index file here and recover the binlog.index
5389         state(index_file io_cache) from old binlog.index content.
5390        */
5391       mysql_file_delete(key_file_binlog_index, crash_safe_index_file_name,
5392                         MYF(0));
5393 
5394       goto recoverable_err;
5395     }
5396 
5397     /*
5398       Sometimes an outsider can lock index files for temporary viewing
5399       purpose. For eg: MEB locks binlog.index/relaylog.index to view
5400       the content of the file. During that small period of time, deletion
5401       of the file is not possible on some platforms(Eg: Windows)
5402       Server should retry the delete operation for few times instead of panicking
5403       immediately.
5404     */
5405     while ((file_delete_status == false) && (failure_trials > 0))
5406     {
5407       if (DBUG_EVALUATE_IF("force_index_file_delete_failure", 1, 0)) break;
5408 
5409       DBUG_EXECUTE_IF("simulate_index_file_delete_failure",
5410                   {
5411                     /* This simulation causes the delete to fail */
5412                     static char first_char= index_file_name[0];
5413                     index_file_name[0]= 0;
5414                     sql_print_information("Retrying delete");
5415                     if (failure_trials == 1)
5416                       index_file_name[0]= first_char;
5417                   };);
5418       file_delete_status = !(mysql_file_delete(key_file_binlog_index,
5419                                                index_file_name, MYF(MY_WME)));
5420       --failure_trials;
5421       if (!file_delete_status)
5422       {
5423         my_sleep(1000);
5424         /* Clear the error before retrying. */
5425         if (failure_trials > 0)
5426           thd->clear_error();
5427       }
5428     }
5429 
5430     if (!file_delete_status)
5431     {
5432       error= -1;
5433       sql_print_error("While rebuilding index file %s: "
5434                       "Failed to delete the existing index file. It could be "
5435                       "that file is being used by some other process.",
5436                       index_file_name);
5437       /*
5438         Delete Crash safe file index file here and recover the binlog.index
5439         state(index_file io_cache) from old binlog.index content.
5440        */
5441       mysql_file_delete(key_file_binlog_index, crash_safe_index_file_name,
5442                         MYF(0));
5443 
5444       goto recoverable_err;
5445     }
5446   }
5447 
5448   DBUG_EXECUTE_IF("crash_create_before_rename_index_file", DBUG_SUICIDE(););
5449   /*
5450     Sometimes an outsider can lock index files for temporary viewing
5451     purpose. For eg: MEB locks binlog.index/relaylog.index to view
5452     the content of the file. During that small period of time, rename
5453     of the file is not possible on some platforms(Eg: Windows)
5454     Server should retry the rename operation for few times instead of panicking
5455     immediately.
5456   */
5457   failure_trials = MYSQL_BIN_LOG::MAX_RETRIES_FOR_DELETE_RENAME_FAILURE;
5458   while ((file_rename_status == false) && (failure_trials > 0))
5459   {
5460     DBUG_EXECUTE_IF("simulate_crash_safe_index_file_rename_failure",
5461                 {
5462                   /* This simulation causes the rename to fail */
5463                   static char first_char= index_file_name[0];
5464                   index_file_name[0]= 0;
5465                   sql_print_information("Retrying rename");
5466                   if (failure_trials == 1)
5467                     index_file_name[0]= first_char;
5468                 };);
5469     file_rename_status =
5470         !(my_rename(crash_safe_index_file_name, index_file_name, MYF(MY_WME)));
5471     --failure_trials;
5472     if (!file_rename_status)
5473     {
5474       my_sleep(1000);
5475       /* Clear the error before retrying. */
5476       if (failure_trials > 0)
5477         thd->clear_error();
5478     }
5479   }
5480   if (!file_rename_status)
5481   {
5482     error= -1;
5483     sql_print_error("While rebuilding index file %s: "
5484                     "Failed to rename the new index file to the existing "
5485                     "index file.", index_file_name);
5486     goto fatal_err;
5487   }
5488   DBUG_EXECUTE_IF("crash_create_after_rename_index_file", DBUG_SUICIDE(););
5489 
5490 recoverable_err:
5491   if ((fd= mysql_file_open(key_file_binlog_index,
5492                            index_file_name,
5493                            O_RDWR | O_CREAT | O_BINARY,
5494                            MYF(MY_WME))) < 0 ||
5495            mysql_file_sync(fd, MYF(MY_WME)) ||
5496            init_io_cache_ext(&index_file, fd, IO_SIZE, READ_CACHE,
5497                              mysql_file_seek(fd, 0L, MY_SEEK_END, MYF(0)),
5498                                              0, MYF(MY_WME | MY_WAIT_IF_FULL),
5499                              key_file_binlog_index_cache))
5500   {
5501     sql_print_error("After rebuilding the index file %s: "
5502                     "Failed to open the index file.", index_file_name);
5503     goto fatal_err;
5504   }
5505 
5506   if (need_lock_index)
5507     mysql_mutex_unlock(&LOCK_index);
5508   DBUG_RETURN(error);
5509 
5510 fatal_err:
5511   /*
5512     This situation is very very rare to happen (unless there is some serious
5513     memory related issues like OOM) and should be treated as fatal error.
5514     Hence it is better to bring down the server without respecting
5515     'binlog_error_action' value here.
5516   */
5517   exec_binlog_error_action_abort("MySQL server failed to update the "
5518                                  "binlog.index file's content properly. "
5519                                  "It might not be in sync with available "
5520                                  "binlogs and the binlog.index file state is in "
5521                                  "unrecoverable state. Aborting the server.");
5522   /*
5523     Server is aborted in the above function.
5524     This is dead code to make compiler happy.
5525    */
5526   DBUG_RETURN(error);
5527 }
5528 
5529 
5530 /**
5531   Append log file name to index file.
5532 
5533   - To make crash safe, we copy all the content of index file
5534   to crash safe index file firstly and then append the log
5535   file name to the crash safe index file. Finally move the
5536   crash safe index file to index file.
5537 
5538   @retval
5539     0   ok
5540   @retval
5541     -1   error
5542 */
add_log_to_index(uchar * log_name,size_t log_name_len,bool need_lock_index)5543 int MYSQL_BIN_LOG::add_log_to_index(uchar* log_name,
5544                                     size_t log_name_len, bool need_lock_index)
5545 {
5546   DBUG_ENTER("MYSQL_BIN_LOG::add_log_to_index");
5547 
5548   if (open_crash_safe_index_file())
5549   {
5550     sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5551                     "open the crash safe index file.");
5552     goto err;
5553   }
5554 
5555   if (copy_file(&index_file, &crash_safe_index_file, 0))
5556   {
5557     sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5558                     "copy index file to crash safe index file.");
5559     goto err;
5560   }
5561 
5562   if (my_b_write(&crash_safe_index_file, log_name, log_name_len) ||
5563       my_b_write(&crash_safe_index_file, (uchar*) "\n", 1) ||
5564       flush_io_cache(&crash_safe_index_file) ||
5565       mysql_file_sync(crash_safe_index_file.file, MYF(MY_WME)))
5566   {
5567     sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5568                     "append log file name: %s, to crash "
5569                     "safe index file.", log_name);
5570     goto err;
5571   }
5572 
5573   if (close_crash_safe_index_file())
5574   {
5575     sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5576                     "close the crash safe index file.");
5577     goto err;
5578   }
5579 
5580   if (move_crash_safe_index_file_to_index_file(need_lock_index))
5581   {
5582     sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5583                     "move crash safe index file to index file.");
5584     goto err;
5585   }
5586 
5587   DBUG_RETURN(0);
5588 
5589 err:
5590   DBUG_RETURN(-1);
5591 }
5592 
get_current_log(LOG_INFO * linfo,bool need_lock_log)5593 int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo, bool need_lock_log/*true*/)
5594 {
5595   if (need_lock_log)
5596     mysql_mutex_lock(&LOCK_log);
5597   int ret = raw_get_current_log(linfo);
5598   if (need_lock_log)
5599     mysql_mutex_unlock(&LOCK_log);
5600   return ret;
5601 }
5602 
raw_get_current_log(LOG_INFO * linfo)5603 int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
5604 {
5605   strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1);
5606   linfo->pos = my_b_safe_tell(&log_file);
5607   return 0;
5608 }
5609 
check_write_error(THD * thd)5610 bool MYSQL_BIN_LOG::check_write_error(THD *thd)
5611 {
5612   DBUG_ENTER("MYSQL_BIN_LOG::check_write_error");
5613 
5614   bool checked= FALSE;
5615 
5616   if (!thd->is_error())
5617     DBUG_RETURN(checked);
5618 
5619   switch (thd->get_stmt_da()->mysql_errno())
5620   {
5621     case ER_TRANS_CACHE_FULL:
5622     case ER_STMT_CACHE_FULL:
5623     case ER_ERROR_ON_WRITE:
5624     case ER_BINLOG_LOGGING_IMPOSSIBLE:
5625       checked= TRUE;
5626     break;
5627   }
5628   DBUG_PRINT("return", ("checked: %s", YESNO(checked)));
5629   DBUG_RETURN(checked);
5630 }
5631 
set_write_error(THD * thd,bool is_transactional)5632 void MYSQL_BIN_LOG::set_write_error(THD *thd, bool is_transactional)
5633 {
5634   DBUG_ENTER("MYSQL_BIN_LOG::set_write_error");
5635 
5636   write_error= 1;
5637 
5638   if (check_write_error(thd))
5639     DBUG_VOID_RETURN;
5640 
5641   if (my_errno() == EFBIG)
5642   {
5643     if (is_transactional)
5644     {
5645       my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(MY_WME));
5646     }
5647     else
5648     {
5649       my_message(ER_STMT_CACHE_FULL, ER(ER_STMT_CACHE_FULL), MYF(MY_WME));
5650     }
5651   }
5652   else
5653   {
5654     char errbuf[MYSYS_STRERROR_SIZE];
5655     my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), name,
5656              errno, my_strerror(errbuf, sizeof(errbuf), errno));
5657   }
5658 
5659   DBUG_VOID_RETURN;
5660 }
5661 
compare_log_name(const char * log_1,const char * log_2)5662 static int compare_log_name(const char* log_1, const char* log_2)
5663 {
5664   const char * log_1_basename= log_1 + dirname_length(log_1);
5665   const char * log_2_basename= log_2 + dirname_length(log_2);
5666 
5667   return strcmp(log_1_basename,log_2_basename);
5668 }
5669 
5670 /**
5671   Find the position in the log-index-file for the given log name.
5672 
5673   @param[out] linfo The found log file name will be stored here, along
5674   with the byte offset of the next log file name in the index file.
5675   @param log_name Filename to find in the index file, or NULL if we
5676   want to read the first entry.
5677   @param need_lock_index If false, this function acquires LOCK_index;
5678   otherwise the lock should already be held by the caller.
5679 
5680   @note
5681     On systems without the truncate function the file will end with one or
5682     more empty lines.  These will be ignored when reading the file.
5683 
5684   @retval
5685     0			ok
5686   @retval
5687     LOG_INFO_EOF	        End of log-index-file found
5688   @retval
5689     LOG_INFO_IO		Got IO error while reading file
5690 */
5691 
find_log_pos(LOG_INFO * linfo,const char * log_name,bool need_lock_index)5692 int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
5693                                 bool need_lock_index)
5694 {
5695   int error= 0;
5696   char *full_fname= linfo->log_file_name;
5697   char full_log_name[FN_REFLEN], fname[FN_REFLEN];
5698   DBUG_ENTER("find_log_pos");
5699   full_log_name[0]= full_fname[0]= 0;
5700 
5701   /*
5702     Mutex needed because we need to make sure the file pointer does not
5703     move from under our feet
5704   */
5705   if (need_lock_index)
5706     mysql_mutex_lock(&LOCK_index);
5707   else
5708     mysql_mutex_assert_owner(&LOCK_index);
5709 
5710   if (!my_b_inited(&index_file))
5711   {
5712       error= LOG_INFO_IO;
5713       goto end;
5714   }
5715 
5716   // extend relative paths for log_name to be searched
5717   if (log_name)
5718   {
5719     if(normalize_binlog_name(full_log_name, log_name, is_relay_log))
5720     {
5721       error= LOG_INFO_EOF;
5722       goto end;
5723     }
5724   }
5725 
5726   DBUG_PRINT("enter", ("log_name: %s, full_log_name: %s",
5727                        log_name ? log_name : "NULL", full_log_name));
5728 
5729   /* As the file is flushed, we can't get an error here */
5730   my_b_seek(&index_file, (my_off_t) 0);
5731 
5732   for (;;)
5733   {
5734     size_t length;
5735     my_off_t offset= my_b_tell(&index_file);
5736 
5737     DBUG_EXECUTE_IF("simulate_find_log_pos_error",
5738                     error=  LOG_INFO_EOF; break;);
5739     /* If we get 0 or 1 characters, this is the end of the file */
5740     if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
5741     {
5742       /* Did not find the given entry; Return not found or error */
5743       error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
5744       break;
5745     }
5746 
5747     // extend relative paths and match against full path
5748     if (normalize_binlog_name(full_fname, fname, is_relay_log))
5749     {
5750       error= LOG_INFO_EOF;
5751       break;
5752     }
5753     // if the log entry matches, null string matching anything
5754     if (!log_name ||
5755         !compare_log_name(full_fname,full_log_name))
5756     {
5757       DBUG_PRINT("info", ("Found log file entry"));
5758       linfo->index_file_start_offset= offset;
5759       linfo->index_file_offset = my_b_tell(&index_file);
5760       break;
5761     }
5762     linfo->entry_index++;
5763   }
5764 
5765 end:
5766   if (need_lock_index)
5767     mysql_mutex_unlock(&LOCK_index);
5768   DBUG_RETURN(error);
5769 }
5770 
5771 
5772 /**
5773   Find the position in the log-index-file for the given log name.
5774 
5775   @param[out] linfo The filename will be stored here, along with the
5776   byte offset of the next filename in the index file.
5777 
5778   @param need_lock_index If true, LOCK_index will be acquired;
5779   otherwise it should already be held by the caller.
5780 
5781   @note
5782     - Before calling this function, one has to call find_log_pos()
5783     to set up 'linfo'
5784     - Mutex needed because we need to make sure the file pointer does not move
5785     from under our feet
5786 
5787   @retval 0 ok
5788   @retval LOG_INFO_EOF End of log-index-file found
5789   @retval LOG_INFO_IO Got IO error while reading file
5790 */
find_next_log(LOG_INFO * linfo,bool need_lock_index)5791 int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock_index)
5792 {
5793   int error= 0;
5794   size_t length;
5795   char fname[FN_REFLEN];
5796   char *full_fname= linfo->log_file_name;
5797 
5798   if (need_lock_index)
5799     mysql_mutex_lock(&LOCK_index);
5800   else
5801     mysql_mutex_assert_owner(&LOCK_index);
5802 
5803   if (!my_b_inited(&index_file))
5804   {
5805       error= LOG_INFO_IO;
5806       goto err;
5807   }
5808   /* As the file is flushed, we can't get an error here */
5809   my_b_seek(&index_file, linfo->index_file_offset);
5810 
5811   linfo->index_file_start_offset= linfo->index_file_offset;
5812   if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
5813   {
5814     error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
5815     goto err;
5816   }
5817 
5818   if (fname[0] != 0)
5819   {
5820     if(normalize_binlog_name(full_fname, fname, is_relay_log))
5821     {
5822       error= LOG_INFO_EOF;
5823       goto err;
5824     }
5825     length= strlen(full_fname);
5826   }
5827 
5828   linfo->index_file_offset= my_b_tell(&index_file);
5829 
5830 err:
5831   if (need_lock_index)
5832     mysql_mutex_unlock(&LOCK_index);
5833   return error;
5834 }
5835 
5836 /**
5837   Find the relay log name following the given name from relay log index file.
5838 
5839   @param[in|out] log_name  The name is full path name.
5840 
5841   @return return 0 if it finds next relay log. Otherwise return the error code.
5842 */
find_next_relay_log(char log_name[FN_REFLEN+1])5843 int MYSQL_BIN_LOG::find_next_relay_log(char log_name[FN_REFLEN+1])
5844 {
5845   LOG_INFO info;
5846   int error;
5847   char relative_path_name[FN_REFLEN+1];
5848 
5849   if (fn_format(relative_path_name, log_name+dirname_length(log_name),
5850                 mysql_data_home, "", 0)
5851       == NullS)
5852     return 1;
5853 
5854   mysql_mutex_lock(&LOCK_index);
5855 
5856   error= find_log_pos(&info, relative_path_name, false);
5857   if (error == 0)
5858   {
5859     error= find_next_log(&info, false);
5860     if (error == 0)
5861       strcpy(log_name, info.log_file_name);
5862   }
5863 
5864   mysql_mutex_unlock(&LOCK_index);
5865   return error;
5866 }
5867 
5868 /**
5869   Removes files, as part of a RESET MASTER or RESET SLAVE statement,
5870   by deleting all logs refered to in the index file. Then, it starts
5871   writing to a new log file.
5872 
5873   The new index file will only contain this file.
5874 
5875   @param thd Thread
5876 
5877   @note
5878     If not called from slave thread, write start event to new log
5879 
5880   @retval
5881     0	ok
5882   @retval
5883     1   error
5884 */
reset_logs(THD * thd,bool delete_only)5885 bool MYSQL_BIN_LOG::reset_logs(THD* thd, bool delete_only)
5886 {
5887   LOG_INFO linfo;
5888   bool error=0;
5889   int err;
5890   const char* save_name;
5891   DBUG_ENTER("reset_logs");
5892 
5893   /*
5894     Flush logs for storage engines, so that the last transaction
5895     is fsynced inside storage engines.
5896   */
5897   if (ha_flush_logs(NULL))
5898     DBUG_RETURN(1);
5899 
5900   ha_reset_logs(thd);
5901 
5902   /*
5903     We need to get both locks to be sure that no one is trying to
5904     write to the index log file.
5905   */
5906   mysql_mutex_lock(&LOCK_log);
5907   mysql_mutex_lock(&LOCK_index);
5908 
5909   global_sid_lock->wrlock();
5910 
5911   /* Save variables so that we can reopen the log */
5912   save_name=name;
5913   name=0;					// Protect against free
5914   close(LOG_CLOSE_TO_BE_OPENED, false/*need_lock_log=false*/,
5915         false/*need_lock_index=false*/);
5916 
5917   /*
5918     First delete all old log files and then update the index file.
5919     As we first delete the log files and do not use sort of logging,
5920     a crash may lead to an inconsistent state where the index has
5921     references to non-existent files.
5922 
5923     We need to invert the steps and use the purge_index_file methods
5924     in order to make the operation safe.
5925   */
5926 
5927   if ((err= find_log_pos(&linfo, NullS, false/*need_lock_index=false*/)) != 0)
5928   {
5929     uint errcode= purge_log_get_error_code(err);
5930     sql_print_error("Failed to locate old binlog or relay log files");
5931     my_message(errcode, ER(errcode), MYF(0));
5932     error= 1;
5933     goto err;
5934   }
5935 
5936   for (;;)
5937   {
5938     if ((error= my_delete_allow_opened(linfo.log_file_name, MYF(0))) != 0)
5939     {
5940       if (my_errno() == ENOENT)
5941       {
5942         push_warning_printf(current_thd, Sql_condition::SL_WARNING,
5943                             ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
5944                             linfo.log_file_name);
5945         sql_print_information("Failed to delete file '%s'",
5946                               linfo.log_file_name);
5947         set_my_errno(0);
5948         error= 0;
5949       }
5950       else
5951       {
5952         push_warning_printf(current_thd, Sql_condition::SL_WARNING,
5953                             ER_BINLOG_PURGE_FATAL_ERR,
5954                             "a problem with deleting %s; "
5955                             "consider examining correspondence "
5956                             "of your binlog index file "
5957                             "to the actual binlog files",
5958                             linfo.log_file_name);
5959         error= 1;
5960         goto err;
5961       }
5962     }
5963     if (find_next_log(&linfo, false/*need_lock_index=false*/))
5964       break;
5965   }
5966 
5967   /* Start logging with a new file */
5968   close(LOG_CLOSE_INDEX | LOG_CLOSE_TO_BE_OPENED,
5969         false/*need_lock_log=false*/,
5970         false/*need_lock_index=false*/);
5971   if ((error= my_delete_allow_opened(index_file_name, MYF(0))))	// Reset (open will update)
5972   {
5973     if (my_errno() == ENOENT)
5974     {
5975       push_warning_printf(current_thd, Sql_condition::SL_WARNING,
5976                           ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
5977                           index_file_name);
5978       sql_print_information("Failed to delete file '%s'",
5979                             index_file_name);
5980       set_my_errno(0);
5981       error= 0;
5982     }
5983     else
5984     {
5985       push_warning_printf(current_thd, Sql_condition::SL_WARNING,
5986                           ER_BINLOG_PURGE_FATAL_ERR,
5987                           "a problem with deleting %s; "
5988                           "consider examining correspondence "
5989                           "of your binlog index file "
5990                           "to the actual binlog files",
5991                           index_file_name);
5992       error= 1;
5993       goto err;
5994     }
5995   }
5996 
5997 #ifdef HAVE_REPLICATION
5998   /*
5999     For relay logs we clear the gtid state associated per channel(i.e rli)
6000     in the purge_relay_logs()
6001   */
6002   if (!is_relay_log)
6003   {
6004     if(gtid_state->clear(thd))
6005     {
6006       error= 1;
6007       goto err;
6008     }
6009     // don't clear global_sid_map because it's used by the relay log too
6010     if (gtid_state->init() != 0)
6011       goto err;
6012   }
6013 #endif
6014 
6015   if (!delete_only)
6016   {
6017     if (!open_index_file(index_file_name, 0, false/*need_lock_index=false*/))
6018     if ((error= open_binlog(save_name, 0,
6019                             max_size, false,
6020                             false/*need_lock_index=false*/,
6021                             false/*need_sid_lock=false*/,
6022                             NULL)))
6023       goto err;
6024   }
6025   my_free((void *) save_name);
6026 
6027 err:
6028   if (error == 1)
6029     name= const_cast<char*>(save_name);
6030   global_sid_lock->unlock();
6031   mysql_mutex_unlock(&LOCK_index);
6032   mysql_mutex_unlock(&LOCK_log);
6033   DBUG_RETURN(error);
6034 }
6035 
6036 
6037 /**
6038   Set the name of crash safe index file.
6039 
6040   @retval
6041     0   ok
6042   @retval
6043     1   error
6044 */
set_crash_safe_index_file_name(const char * base_file_name)6045 int MYSQL_BIN_LOG::set_crash_safe_index_file_name(const char *base_file_name)
6046 {
6047   int error= 0;
6048   DBUG_ENTER("MYSQL_BIN_LOG::set_crash_safe_index_file_name");
6049   if (fn_format(crash_safe_index_file_name, base_file_name, mysql_data_home,
6050                 ".index_crash_safe", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
6051                                          MY_REPLACE_EXT)) == NULL)
6052   {
6053     error= 1;
6054     sql_print_error("MYSQL_BIN_LOG::set_crash_safe_index_file_name failed "
6055                     "to set file name.");
6056   }
6057   DBUG_RETURN(error);
6058 }
6059 
6060 
6061 /**
6062   Open a (new) crash safe index file.
6063 
6064   @note
6065     The crash safe index file is a special file
6066     used for guaranteeing index file crash safe.
6067   @retval
6068     0   ok
6069   @retval
6070     1   error
6071 */
open_crash_safe_index_file()6072 int MYSQL_BIN_LOG::open_crash_safe_index_file()
6073 {
6074   int error= 0;
6075   File file= -1;
6076 
6077   DBUG_ENTER("MYSQL_BIN_LOG::open_crash_safe_index_file");
6078 
6079   if (!my_b_inited(&crash_safe_index_file))
6080   {
6081     if ((file= my_open(crash_safe_index_file_name, O_RDWR | O_CREAT | O_BINARY,
6082                        MYF(MY_WME))) < 0  ||
6083         init_io_cache(&crash_safe_index_file, file, IO_SIZE, WRITE_CACHE,
6084                       0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
6085     {
6086       error= 1;
6087       sql_print_error("MYSQL_BIN_LOG::open_crash_safe_index_file failed "
6088                       "to open temporary index file.");
6089     }
6090   }
6091   DBUG_RETURN(error);
6092 }
6093 
6094 
6095 /**
6096   Close the crash safe index file.
6097 
6098   @note
6099     The crash safe file is just closed, is not deleted.
6100     Because it is moved to index file later on.
6101   @retval
6102     0   ok
6103   @retval
6104     1   error
6105 */
close_crash_safe_index_file()6106 int MYSQL_BIN_LOG::close_crash_safe_index_file()
6107 {
6108   int error= 0;
6109 
6110   DBUG_ENTER("MYSQL_BIN_LOG::close_crash_safe_index_file");
6111 
6112   if (my_b_inited(&crash_safe_index_file))
6113   {
6114     end_io_cache(&crash_safe_index_file);
6115     error= my_close(crash_safe_index_file.file, MYF(0));
6116   }
6117   memset(&crash_safe_index_file, 0, sizeof(crash_safe_index_file));
6118 
6119   DBUG_RETURN(error);
6120 }
6121 
6122 
6123 /**
6124   Delete relay log files prior to rli->group_relay_log_name
6125   (i.e. all logs which are not involved in a non-finished group
6126   (transaction)), remove them from the index file and start on next
6127   relay log.
6128 
6129   IMPLEMENTATION
6130 
6131   - You must hold rli->data_lock before calling this function, since
6132     it writes group_relay_log_pos and similar fields of
6133     Relay_log_info.
6134   - Protects index file with LOCK_index
6135   - Delete relevant relay log files
6136   - Copy all file names after these ones to the front of the index file
6137   - If the OS has truncate, truncate the file, else fill it with \n'
6138   - Read the next file name from the index file and store in rli->linfo
6139 
6140   @param rli	       Relay log information
6141   @param included     If false, all relay logs that are strictly before
6142                       rli->group_relay_log_name are deleted ; if true, the
6143                       latter is deleted too (i.e. all relay logs
6144                       read by the SQL slave thread are deleted).
6145 
6146   @note
6147     - This is only called from the slave SQL thread when it has read
6148     all commands from a relay log and want to switch to a new relay log.
6149     - When this happens, we can be in an active transaction as
6150     a transaction can span over two relay logs
6151     (although it is always written as a single block to the master's binary
6152     log, hence cannot span over two master's binary logs).
6153 
6154   @retval
6155     0			ok
6156   @retval
6157     LOG_INFO_EOF	        End of log-index-file found
6158   @retval
6159     LOG_INFO_SEEK	Could not allocate IO cache
6160   @retval
6161     LOG_INFO_IO		Got IO error while reading file
6162 */
6163 
6164 #ifdef HAVE_REPLICATION
6165 
purge_first_log(Relay_log_info * rli,bool included)6166 int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
6167 {
6168   int error;
6169   char *to_purge_if_included= NULL;
6170   DBUG_ENTER("purge_first_log");
6171 
6172   assert(current_thd->system_thread == SYSTEM_THREAD_SLAVE_SQL);
6173   assert(is_relay_log);
6174   assert(is_open());
6175   assert(rli->slave_running == 1);
6176   assert(!strcmp(rli->linfo.log_file_name,rli->get_event_relay_log_name()));
6177 
6178   mysql_mutex_assert_owner(&rli->data_lock);
6179 
6180   mysql_mutex_lock(&LOCK_index);
6181   to_purge_if_included= my_strdup(key_memory_Relay_log_info_group_relay_log_name,
6182                                   rli->get_group_relay_log_name(), MYF(0));
6183 
6184   /*
6185     Read the next log file name from the index file and pass it back to
6186     the caller.
6187   */
6188   if((error=find_log_pos(&rli->linfo, rli->get_event_relay_log_name(),
6189                          false/*need_lock_index=false*/)) ||
6190      (error=find_next_log(&rli->linfo, false/*need_lock_index=false*/)))
6191   {
6192     char buff[22];
6193     sql_print_error("next log error: %d  offset: %s  log: %s included: %d",
6194                     error,
6195                     llstr(rli->linfo.index_file_offset,buff),
6196                     rli->get_event_relay_log_name(),
6197                     included);
6198     goto err;
6199   }
6200 
6201   /*
6202     Reset rli's coordinates to the current log.
6203   */
6204   rli->set_event_relay_log_pos(BIN_LOG_HEADER_SIZE);
6205   rli->set_event_relay_log_name(rli->linfo.log_file_name);
6206 
6207   /*
6208     If we removed the rli->group_relay_log_name file,
6209     we must update the rli->group* coordinates, otherwise do not touch it as the
6210     group's execution is not finished (e.g. COMMIT not executed)
6211   */
6212   if (included)
6213   {
6214     rli->set_group_relay_log_pos(BIN_LOG_HEADER_SIZE);
6215     rli->set_group_relay_log_name(rli->linfo.log_file_name);
6216     rli->notify_group_relay_log_name_update();
6217   }
6218   /*
6219     Store where we are in the new file for the execution thread.
6220     If we are in the middle of a transaction, then we
6221     should not store the position in the repository, instead in
6222     that case set a flag to true which indicates that a 'forced flush'
6223     is postponed due to transaction split across the relaylogs.
6224   */
6225   if (!rli->is_in_group())
6226     rli->flush_info(TRUE);
6227   else
6228     rli->force_flush_postponed_due_to_split_trans= true;
6229 
6230   DBUG_EXECUTE_IF("crash_before_purge_logs", DBUG_SUICIDE(););
6231 
6232   mysql_mutex_lock(&rli->log_space_lock);
6233   rli->relay_log.purge_logs(to_purge_if_included, included,
6234                             false/*need_lock_index=false*/,
6235                             false/*need_update_threads=false*/,
6236                             &rli->log_space_total, true);
6237   // Tell the I/O thread to take the relay_log_space_limit into account
6238   rli->ignore_log_space_limit= 0;
6239   mysql_mutex_unlock(&rli->log_space_lock);
6240 
6241   /*
6242     Ok to broadcast after the critical region as there is no risk of
6243     the mutex being destroyed by this thread later - this helps save
6244     context switches
6245   */
6246   mysql_cond_broadcast(&rli->log_space_cond);
6247 
6248   /*
6249    * Need to update the log pos because purge logs has been called
6250    * after fetching initially the log pos at the begining of the method.
6251    */
6252   if((error=find_log_pos(&rli->linfo, rli->get_event_relay_log_name(),
6253                          false/*need_lock_index=false*/)))
6254   {
6255     char buff[22];
6256     sql_print_error("next log error: %d  offset: %s  log: %s included: %d",
6257                     error,
6258                     llstr(rli->linfo.index_file_offset,buff),
6259                     rli->get_group_relay_log_name(),
6260                     included);
6261     goto err;
6262   }
6263 
6264   /* If included was passed, rli->linfo should be the first entry. */
6265   assert(!included || rli->linfo.index_file_start_offset == 0);
6266 
6267 err:
6268   my_free(to_purge_if_included);
6269   mysql_mutex_unlock(&LOCK_index);
6270   DBUG_RETURN(error);
6271 }
6272 
6273 
6274 /**
6275   Remove logs from index file.
6276 
6277   - To make crash safe, we copy the content of index file
6278   from index_file_start_offset recored in log_info to
6279   crash safe index file firstly and then move the crash
6280   safe index file to index file.
6281 
6282   @param linfo                  Store here the found log file name and
6283                                 position to the NEXT log file name in
6284                                 the index file.
6285 
6286   @param need_update_threads    If we want to update the log coordinates
6287                                 of all threads. False for relay logs,
6288                                 true otherwise.
6289 
6290   @retval
6291     0    ok
6292   @retval
6293     LOG_INFO_IO    Got IO error while reading/writing file
6294 */
remove_logs_from_index(LOG_INFO * log_info,bool need_update_threads)6295 int MYSQL_BIN_LOG::remove_logs_from_index(LOG_INFO* log_info, bool need_update_threads)
6296 {
6297   if (open_crash_safe_index_file())
6298   {
6299     sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
6300                     "open the crash safe index file.");
6301     goto err;
6302   }
6303 
6304   if (copy_file(&index_file, &crash_safe_index_file,
6305                 log_info->index_file_start_offset))
6306   {
6307     sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
6308                     "copy index file to crash safe index file.");
6309     goto err;
6310   }
6311 
6312   if (close_crash_safe_index_file())
6313   {
6314     sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
6315                     "close the crash safe index file.");
6316     goto err;
6317   }
6318   DBUG_EXECUTE_IF("fault_injection_copy_part_file", DBUG_SUICIDE(););
6319 
6320   if (move_crash_safe_index_file_to_index_file(false/*need_lock_index=false*/))
6321   {
6322     sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
6323                     "move crash safe index file to index file.");
6324     goto err;
6325   }
6326 
6327   // now update offsets in index file for running threads
6328   if (need_update_threads)
6329     adjust_linfo_offsets(log_info->index_file_start_offset);
6330   return 0;
6331 
6332 err:
6333   return LOG_INFO_IO;
6334 }
6335 
6336 /**
6337   Remove all logs before the given log from disk and from the index file.
6338 
6339   @param to_log	      Delete all log file name before this file.
6340   @param included            If true, to_log is deleted too.
6341   @param need_lock_index
6342   @param need_update_threads If we want to update the log coordinates of
6343                              all threads. False for relay logs, true otherwise.
6344   @param freed_log_space     If not null, decrement this variable of
6345                              the amount of log space freed
6346   @param auto_purge          True if this is an automatic purge.
6347 
6348   @note
6349     If any of the logs before the deleted one is in use,
6350     only purge logs up to this one.
6351 
6352   @retval
6353     0			ok
6354   @retval
6355     LOG_INFO_EOF		to_log not found
6356     LOG_INFO_EMFILE             too many files opened
6357     LOG_INFO_FATAL              if any other than ENOENT error from
6358                                 mysql_file_stat() or mysql_file_delete()
6359 */
6360 
purge_logs(const char * to_log,bool included,bool need_lock_index,bool need_update_threads,ulonglong * decrease_log_space,bool auto_purge)6361 int MYSQL_BIN_LOG::purge_logs(const char *to_log,
6362                               bool included,
6363                               bool need_lock_index,
6364                               bool need_update_threads,
6365                               ulonglong *decrease_log_space,
6366                               bool auto_purge)
6367 {
6368   int error= 0, no_of_log_files_to_purge= 0, no_of_log_files_purged= 0;
6369   int no_of_threads_locking_log= 0;
6370   bool exit_loop= 0;
6371   LOG_INFO log_info;
6372   THD *thd= current_thd;
6373   DBUG_ENTER("purge_logs");
6374   DBUG_PRINT("info",("to_log= %s",to_log));
6375 
6376   if (need_lock_index)
6377     mysql_mutex_lock(&LOCK_index);
6378   else
6379     mysql_mutex_assert_owner(&LOCK_index);
6380   if ((error=find_log_pos(&log_info, to_log, false/*need_lock_index=false*/)))
6381   {
6382     sql_print_error("MYSQL_BIN_LOG::purge_logs was called with file %s not "
6383                     "listed in the index.", to_log);
6384     goto err;
6385   }
6386 
6387   no_of_log_files_to_purge= log_info.entry_index;
6388 
6389   if ((error= open_purge_index_file(TRUE)))
6390   {
6391     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to sync the index file.");
6392     goto err;
6393   }
6394 
6395   /*
6396     File name exists in index file; delete until we find this file
6397     or a file that is used.
6398   */
6399   if ((error=find_log_pos(&log_info, NullS, false/*need_lock_index=false*/)))
6400     goto err;
6401 
6402   while ((compare_log_name(to_log,log_info.log_file_name) || (exit_loop=included)))
6403   {
6404     if(is_active(log_info.log_file_name))
6405     {
6406       if(!auto_purge)
6407         push_warning_printf(thd, Sql_condition::SL_WARNING,
6408                             ER_WARN_PURGE_LOG_IS_ACTIVE,
6409                             ER(ER_WARN_PURGE_LOG_IS_ACTIVE),
6410                             log_info.log_file_name);
6411       break;
6412     }
6413 
6414     if ((no_of_threads_locking_log= log_in_use(log_info.log_file_name)))
6415     {
6416       if(!auto_purge)
6417         push_warning_printf(thd, Sql_condition::SL_WARNING,
6418                             ER_WARN_PURGE_LOG_IN_USE,
6419                             ER(ER_WARN_PURGE_LOG_IN_USE),
6420                             log_info.log_file_name,  no_of_threads_locking_log,
6421                             no_of_log_files_purged, no_of_log_files_to_purge);
6422       break;
6423     }
6424     no_of_log_files_purged++;
6425 
6426     if ((error= register_purge_index_entry(log_info.log_file_name)))
6427     {
6428       sql_print_error("MYSQL_BIN_LOG::purge_logs failed to copy %s to register file.",
6429                       log_info.log_file_name);
6430       goto err;
6431     }
6432 
6433     if (find_next_log(&log_info, false/*need_lock_index=false*/) || exit_loop)
6434       break;
6435   }
6436 
6437   DBUG_EXECUTE_IF("crash_purge_before_update_index", DBUG_SUICIDE(););
6438 
6439   if ((error= sync_purge_index_file()))
6440   {
6441     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to flush register file.");
6442     goto err;
6443   }
6444 
6445   /* We know how many files to delete. Update index file. */
6446   if ((error=remove_logs_from_index(&log_info, need_update_threads)))
6447   {
6448     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to update the index file");
6449     goto err;
6450   }
6451 
6452   // Update gtid_state->lost_gtids
6453   if (!is_relay_log)
6454   {
6455     global_sid_lock->wrlock();
6456     error= init_gtid_sets(NULL,
6457                           const_cast<Gtid_set *>(gtid_state->get_lost_gtids()),
6458                           opt_master_verify_checksum,
6459                           false/*false=don't need lock*/,
6460                           NULL/*trx_parser*/, NULL/*gtid_partial_trx*/);
6461     global_sid_lock->unlock();
6462     if (error)
6463       goto err;
6464   }
6465 
6466   DBUG_EXECUTE_IF("crash_purge_critical_after_update_index", DBUG_SUICIDE(););
6467 
6468 err:
6469 
6470   int error_index= 0, close_error_index= 0;
6471   /* Read each entry from purge_index_file and delete the file. */
6472   if (!error && is_inited_purge_index_file() &&
6473       (error_index= purge_index_entry(thd, decrease_log_space, false/*need_lock_index=false*/)))
6474     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to process registered files"
6475                     " that would be purged.");
6476 
6477   close_error_index= close_purge_index_file();
6478 
6479   DBUG_EXECUTE_IF("crash_purge_non_critical_after_update_index", DBUG_SUICIDE(););
6480 
6481   if (need_lock_index)
6482     mysql_mutex_unlock(&LOCK_index);
6483 
6484   /*
6485     Error codes from purge logs take precedence.
6486     Then error codes from purging the index entry.
6487     Finally, error codes from closing the purge index file.
6488   */
6489   error= error ? error : (error_index ? error_index :
6490                           close_error_index);
6491 
6492   DBUG_RETURN(error);
6493 }
6494 
set_purge_index_file_name(const char * base_file_name)6495 int MYSQL_BIN_LOG::set_purge_index_file_name(const char *base_file_name)
6496 {
6497   int error= 0;
6498   DBUG_ENTER("MYSQL_BIN_LOG::set_purge_index_file_name");
6499   if (fn_format(purge_index_file_name, base_file_name, mysql_data_home,
6500                 ".~rec~", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
6501                               MY_REPLACE_EXT)) == NULL)
6502   {
6503     error= 1;
6504     sql_print_error("MYSQL_BIN_LOG::set_purge_index_file_name failed to set "
6505                       "file name.");
6506   }
6507   DBUG_RETURN(error);
6508 }
6509 
open_purge_index_file(bool destroy)6510 int MYSQL_BIN_LOG::open_purge_index_file(bool destroy)
6511 {
6512   int error= 0;
6513   File file= -1;
6514 
6515   DBUG_ENTER("MYSQL_BIN_LOG::open_purge_index_file");
6516 
6517   if (destroy)
6518     close_purge_index_file();
6519 
6520   if (!my_b_inited(&purge_index_file))
6521   {
6522     if ((file= my_open(purge_index_file_name, O_RDWR | O_CREAT | O_BINARY,
6523                        MYF(MY_WME))) < 0  ||
6524         init_io_cache(&purge_index_file, file, IO_SIZE,
6525                       (destroy ? WRITE_CACHE : READ_CACHE),
6526                       0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
6527     {
6528       error= 1;
6529       sql_print_error("MYSQL_BIN_LOG::open_purge_index_file failed to open register "
6530                       " file.");
6531     }
6532   }
6533   DBUG_RETURN(error);
6534 }
6535 
close_purge_index_file()6536 int MYSQL_BIN_LOG::close_purge_index_file()
6537 {
6538   int error= 0;
6539 
6540   DBUG_ENTER("MYSQL_BIN_LOG::close_purge_index_file");
6541 
6542   if (my_b_inited(&purge_index_file))
6543   {
6544     end_io_cache(&purge_index_file);
6545     error= my_close(purge_index_file.file, MYF(0));
6546   }
6547   my_delete(purge_index_file_name, MYF(0));
6548   memset(&purge_index_file, 0, sizeof(purge_index_file));
6549 
6550   DBUG_RETURN(error);
6551 }
6552 
is_inited_purge_index_file()6553 bool MYSQL_BIN_LOG::is_inited_purge_index_file()
6554 {
6555   DBUG_ENTER("MYSQL_BIN_LOG::is_inited_purge_index_file");
6556   DBUG_RETURN (my_b_inited(&purge_index_file));
6557 }
6558 
sync_purge_index_file()6559 int MYSQL_BIN_LOG::sync_purge_index_file()
6560 {
6561   int error= 0;
6562   DBUG_ENTER("MYSQL_BIN_LOG::sync_purge_index_file");
6563 
6564   if ((error= flush_io_cache(&purge_index_file)) ||
6565       (error= my_sync(purge_index_file.file, MYF(MY_WME))))
6566     DBUG_RETURN(error);
6567 
6568   DBUG_RETURN(error);
6569 }
6570 
register_purge_index_entry(const char * entry)6571 int MYSQL_BIN_LOG::register_purge_index_entry(const char *entry)
6572 {
6573   int error= 0;
6574   DBUG_ENTER("MYSQL_BIN_LOG::register_purge_index_entry");
6575 
6576   if ((error=my_b_write(&purge_index_file, (const uchar*)entry, strlen(entry))) ||
6577       (error=my_b_write(&purge_index_file, (const uchar*)"\n", 1)))
6578     DBUG_RETURN (error);
6579 
6580   DBUG_RETURN(error);
6581 }
6582 
register_create_index_entry(const char * entry)6583 int MYSQL_BIN_LOG::register_create_index_entry(const char *entry)
6584 {
6585   DBUG_ENTER("MYSQL_BIN_LOG::register_create_index_entry");
6586   DBUG_RETURN(register_purge_index_entry(entry));
6587 }
6588 
purge_index_entry(THD * thd,ulonglong * decrease_log_space,bool need_lock_index)6589 int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *decrease_log_space,
6590                                      bool need_lock_index)
6591 {
6592   MY_STAT s;
6593   int error= 0;
6594   LOG_INFO log_info;
6595   LOG_INFO check_log_info;
6596 
6597   DBUG_ENTER("MYSQL_BIN_LOG:purge_index_entry");
6598 
6599   assert(my_b_inited(&purge_index_file));
6600 
6601   if ((error=reinit_io_cache(&purge_index_file, READ_CACHE, 0, 0, 0)))
6602   {
6603     sql_print_error("MYSQL_BIN_LOG::purge_index_entry failed to reinit register file "
6604                     "for read");
6605     goto err;
6606   }
6607 
6608   for (;;)
6609   {
6610     size_t length;
6611 
6612     if ((length=my_b_gets(&purge_index_file, log_info.log_file_name,
6613                           FN_REFLEN)) <= 1)
6614     {
6615       if (purge_index_file.error)
6616       {
6617         error= purge_index_file.error;
6618         sql_print_error("MYSQL_BIN_LOG::purge_index_entry error %d reading from "
6619                         "register file.", error);
6620         goto err;
6621       }
6622 
6623       /* Reached EOF */
6624       break;
6625     }
6626 
6627     /* Get rid of the trailing '\n' */
6628     log_info.log_file_name[length-1]= 0;
6629 
6630     if (!mysql_file_stat(m_key_file_log, log_info.log_file_name, &s, MYF(0)))
6631     {
6632       if (my_errno() == ENOENT)
6633       {
6634         /*
6635           It's not fatal if we can't stat a log file that does not exist;
6636           If we could not stat, we won't delete.
6637         */
6638         if (thd)
6639         {
6640           push_warning_printf(thd, Sql_condition::SL_WARNING,
6641                               ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
6642                               log_info.log_file_name);
6643         }
6644         sql_print_information("Failed to execute mysql_file_stat on file '%s'",
6645 			      log_info.log_file_name);
6646         set_my_errno(0);
6647       }
6648       else
6649       {
6650         /*
6651           Other than ENOENT are fatal
6652         */
6653         if (thd)
6654         {
6655           push_warning_printf(thd, Sql_condition::SL_WARNING,
6656                               ER_BINLOG_PURGE_FATAL_ERR,
6657                               "a problem with getting info on being purged %s; "
6658                               "consider examining correspondence "
6659                               "of your binlog index file "
6660                               "to the actual binlog files",
6661                               log_info.log_file_name);
6662         }
6663         else
6664         {
6665           sql_print_information("Failed to delete log file '%s'; "
6666                                 "consider examining correspondence "
6667                                 "of your binlog index file "
6668                                 "to the actual binlog files",
6669                                 log_info.log_file_name);
6670         }
6671         error= LOG_INFO_FATAL;
6672         goto err;
6673       }
6674     }
6675     else
6676     {
6677       if ((error= find_log_pos(&check_log_info, log_info.log_file_name,
6678                                need_lock_index)))
6679       {
6680         if (error != LOG_INFO_EOF)
6681         {
6682           if (thd)
6683           {
6684             push_warning_printf(thd, Sql_condition::SL_WARNING,
6685                                 ER_BINLOG_PURGE_FATAL_ERR,
6686                                 "a problem with deleting %s and "
6687                                 "reading the binlog index file",
6688                                 log_info.log_file_name);
6689           }
6690           else
6691           {
6692             sql_print_information("Failed to delete file '%s' and "
6693                                   "read the binlog index file",
6694                                   log_info.log_file_name);
6695           }
6696           goto err;
6697         }
6698 
6699         error= 0;
6700         if (!need_lock_index)
6701         {
6702           /*
6703             This is to avoid triggering an error in NDB.
6704 
6705             @todo: This is weird, what does NDB errors have to do with
6706             need_lock_index? Explain better or refactor /Sven
6707           */
6708           ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
6709         }
6710 
6711         DBUG_PRINT("info",("purging %s",log_info.log_file_name));
6712         if (!mysql_file_delete(key_file_binlog, log_info.log_file_name, MYF(0)))
6713         {
6714           DBUG_EXECUTE_IF("wait_in_purge_index_entry",
6715                           {
6716                               const char action[] = "now SIGNAL in_purge_index_entry WAIT_FOR go_ahead_sql";
6717                               assert(!debug_sync_set_action(thd, STRING_WITH_LEN(action)));
6718                               DBUG_SET("-d,wait_in_purge_index_entry");
6719                           };);
6720 
6721           if (decrease_log_space)
6722             *decrease_log_space-= s.st_size;
6723         }
6724         else
6725         {
6726           if (my_errno() == ENOENT)
6727           {
6728             if (thd)
6729             {
6730               push_warning_printf(thd, Sql_condition::SL_WARNING,
6731                                   ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
6732                                   log_info.log_file_name);
6733             }
6734             sql_print_information("Failed to delete file '%s'",
6735                                   log_info.log_file_name);
6736             set_my_errno(0);
6737           }
6738           else
6739           {
6740             if (thd)
6741             {
6742               push_warning_printf(thd, Sql_condition::SL_WARNING,
6743                                   ER_BINLOG_PURGE_FATAL_ERR,
6744                                   "a problem with deleting %s; "
6745                                   "consider examining correspondence "
6746                                   "of your binlog index file "
6747                                   "to the actual binlog files",
6748                                   log_info.log_file_name);
6749             }
6750             else
6751             {
6752               sql_print_information("Failed to delete file '%s'; "
6753                                     "consider examining correspondence "
6754                                     "of your binlog index file "
6755                                     "to the actual binlog files",
6756                                     log_info.log_file_name);
6757             }
6758             if (my_errno() == EMFILE)
6759             {
6760               DBUG_PRINT("info",
6761                          ("my_errno: %d, set ret = LOG_INFO_EMFILE", my_errno()));
6762               error= LOG_INFO_EMFILE;
6763               goto err;
6764             }
6765             error= LOG_INFO_FATAL;
6766             goto err;
6767           }
6768         }
6769       }
6770     }
6771   }
6772 
6773 err:
6774   DBUG_RETURN(error);
6775 }
6776 
6777 /**
6778   Remove all logs before the given file date from disk and from the
6779   index file.
6780 
6781   @param thd		Thread pointer
6782   @param purge_time	Delete all log files before given date.
6783   @param auto_purge     True if this is an automatic purge.
6784 
6785   @note
6786     If any of the logs before the deleted one is in use,
6787     only purge logs up to this one.
6788 
6789   @retval
6790     0				ok
6791   @retval
6792     LOG_INFO_PURGE_NO_ROTATE	Binary file that can't be rotated
6793     LOG_INFO_FATAL              if any other than ENOENT error from
6794                                 mysql_file_stat() or mysql_file_delete()
6795 */
6796 
purge_logs_before_date(time_t purge_time,bool auto_purge)6797 int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time, bool auto_purge)
6798 {
6799   int error;
6800   int no_of_threads_locking_log= 0, no_of_log_files_purged= 0;
6801   bool log_is_active= false, log_is_in_use= false;
6802   char to_log[FN_REFLEN], copy_log_in_use[FN_REFLEN];
6803   LOG_INFO log_info;
6804   MY_STAT stat_area;
6805   THD *thd= current_thd;
6806 
6807   DBUG_ENTER("purge_logs_before_date");
6808 
6809   mysql_mutex_lock(&LOCK_index);
6810   to_log[0]= 0;
6811 
6812   if ((error=find_log_pos(&log_info, NullS, false/*need_lock_index=false*/)))
6813     goto err;
6814 
6815   while (!(log_is_active= is_active(log_info.log_file_name)))
6816   {
6817     if (!mysql_file_stat(m_key_file_log,
6818                          log_info.log_file_name, &stat_area, MYF(0)))
6819     {
6820       if (my_errno() == ENOENT)
6821       {
6822         /*
6823           It's not fatal if we can't stat a log file that does not exist.
6824         */
6825         set_my_errno(0);
6826       }
6827       else
6828       {
6829         /*
6830           Other than ENOENT are fatal
6831         */
6832         if (thd)
6833         {
6834           push_warning_printf(thd, Sql_condition::SL_WARNING,
6835                               ER_BINLOG_PURGE_FATAL_ERR,
6836                               "a problem with getting info on being purged %s; "
6837                               "consider examining correspondence "
6838                               "of your binlog index file "
6839                               "to the actual binlog files",
6840                               log_info.log_file_name);
6841         }
6842         else
6843         {
6844           sql_print_information("Failed to delete log file '%s'",
6845                                 log_info.log_file_name);
6846         }
6847         error= LOG_INFO_FATAL;
6848         goto err;
6849       }
6850     }
6851     /* check if the binary log file is older than the purge_time
6852        if yes check if it is in use, if not in use then add
6853        it in the list of binary log files to be purged.
6854     */
6855     else if (stat_area.st_mtime < purge_time)
6856     {
6857       if ((no_of_threads_locking_log= log_in_use(log_info.log_file_name)))
6858       {
6859         if (!auto_purge)
6860         {
6861           log_is_in_use= true;
6862           strcpy(copy_log_in_use, log_info.log_file_name);
6863         }
6864         break;
6865       }
6866       strmake(to_log,
6867               log_info.log_file_name,
6868               sizeof(log_info.log_file_name) - 1);
6869       no_of_log_files_purged++;
6870     }
6871     else
6872       break;
6873     if (find_next_log(&log_info, false/*need_lock_index=false*/))
6874       break;
6875   }
6876 
6877   if (log_is_active)
6878   {
6879     if(!auto_purge)
6880       push_warning_printf(thd, Sql_condition::SL_WARNING,
6881                           ER_WARN_PURGE_LOG_IS_ACTIVE,
6882                           ER(ER_WARN_PURGE_LOG_IS_ACTIVE),
6883                           log_info.log_file_name);
6884 
6885   }
6886 
6887   if (log_is_in_use)
6888   {
6889     int no_of_log_files_to_purge= no_of_log_files_purged+1;
6890     while (strcmp(log_file_name, log_info.log_file_name))
6891     {
6892       if (mysql_file_stat(m_key_file_log, log_info.log_file_name,
6893                           &stat_area, MYF(0)))
6894       {
6895         if (stat_area.st_mtime < purge_time)
6896           no_of_log_files_to_purge++;
6897         else
6898           break;
6899       }
6900       if (find_next_log(&log_info, false/*need_lock_index=false*/))
6901       {
6902         no_of_log_files_to_purge++;
6903         break;
6904       }
6905     }
6906 
6907     push_warning_printf(thd, Sql_condition::SL_WARNING,
6908                         ER_WARN_PURGE_LOG_IN_USE,
6909                         ER(ER_WARN_PURGE_LOG_IN_USE),
6910                         copy_log_in_use, no_of_threads_locking_log,
6911                         no_of_log_files_purged, no_of_log_files_to_purge);
6912   }
6913 
6914   error= (to_log[0] ? purge_logs(to_log, true,
6915                                  false/*need_lock_index=false*/,
6916                                  true/*need_update_threads=true*/,
6917                                  (ulonglong *) 0, auto_purge) : 0);
6918 
6919 err:
6920   mysql_mutex_unlock(&LOCK_index);
6921   DBUG_RETURN(error);
6922 }
6923 #endif /* HAVE_REPLICATION */
6924 
6925 
6926 /**
6927   Create a new log file name.
6928 
6929   @param buf		buf of at least FN_REFLEN where new name is stored
6930 
6931   @note
6932     If file name will be longer then FN_REFLEN it will be truncated
6933 */
6934 
make_log_name(char * buf,const char * log_ident)6935 void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
6936 {
6937   size_t dir_len = dirname_length(log_file_name);
6938   if (dir_len >= FN_REFLEN)
6939     dir_len=FN_REFLEN-1;
6940   my_stpnmov(buf, log_file_name, dir_len);
6941   strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
6942 }
6943 
6944 
6945 /**
6946   Check if we are writing/reading to the given log file.
6947 */
6948 
is_active(const char * log_file_name_arg)6949 bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
6950 {
6951   return !compare_log_name(log_file_name, log_file_name_arg);
6952 }
6953 
6954 
inc_prep_xids(THD * thd)6955 void MYSQL_BIN_LOG::inc_prep_xids(THD *thd)
6956 {
6957   DBUG_ENTER("MYSQL_BIN_LOG::inc_prep_xids");
6958 #ifndef NDEBUG
6959   int result= m_prep_xids.atomic_add(1);
6960   DBUG_PRINT("debug", ("m_prep_xids: %d", result + 1));
6961 #else
6962   (void) m_prep_xids.atomic_add(1);
6963 #endif
6964   thd->get_transaction()->m_flags.xid_written= true;
6965   DBUG_VOID_RETURN;
6966 }
6967 
6968 
dec_prep_xids(THD * thd)6969 void MYSQL_BIN_LOG::dec_prep_xids(THD *thd)
6970 {
6971   DBUG_ENTER("MYSQL_BIN_LOG::dec_prep_xids");
6972   int32 result= m_prep_xids.atomic_add(-1);
6973   DBUG_PRINT("debug", ("m_prep_xids: %d", result - 1));
6974   thd->get_transaction()->m_flags.xid_written= false;
6975   /* If the old value was 1, it is zero now. */
6976   if (result == 1)
6977   {
6978     mysql_mutex_lock(&LOCK_xids);
6979     mysql_cond_signal(&m_prep_xids_cond);
6980     mysql_mutex_unlock(&LOCK_xids);
6981   }
6982   DBUG_VOID_RETURN;
6983 }
6984 
6985 
6986 /*
6987   Wrappers around new_file_impl to avoid using argument
6988   to control locking. The argument 1) less readable 2) breaks
6989   incapsulation 3) allows external access to the class without
6990   a lock (which is not possible with private new_file_without_locking
6991   method).
6992 
6993   @retval
6994     nonzero - error
6995 
6996 */
6997 
new_file(Format_description_log_event * extra_description_event)6998 int MYSQL_BIN_LOG::new_file(Format_description_log_event *extra_description_event)
6999 {
7000   return new_file_impl(true/*need_lock_log=true*/, extra_description_event);
7001 }
7002 
7003 /*
7004   @retval
7005     nonzero - error
7006 */
new_file_without_locking(Format_description_log_event * extra_description_event)7007 int MYSQL_BIN_LOG::new_file_without_locking(Format_description_log_event *extra_description_event)
7008 {
7009   return new_file_impl(false/*need_lock_log=false*/, extra_description_event);
7010 }
7011 
7012 
7013 /**
7014   Start writing to a new log file or reopen the old file.
7015 
7016   @param need_lock_log If true, this function acquires LOCK_log;
7017   otherwise the caller should already have acquired it.
7018 
7019   @retval 0 success
7020   @retval nonzero - error
7021 
7022   @note The new file name is stored last in the index file
7023 */
new_file_impl(bool need_lock_log,Format_description_log_event * extra_description_event)7024 int MYSQL_BIN_LOG::new_file_impl(bool need_lock_log, Format_description_log_event *extra_description_event)
7025 {
7026   int error= 0;
7027   bool close_on_error= false;
7028   char new_name[FN_REFLEN], *new_name_ptr= NULL, *old_name, *file_to_open;
7029 
7030   DBUG_ENTER("MYSQL_BIN_LOG::new_file_impl");
7031   if (!is_open())
7032   {
7033     DBUG_PRINT("info",("log is closed"));
7034     DBUG_RETURN(error);
7035   }
7036 
7037   if (need_lock_log)
7038     mysql_mutex_lock(&LOCK_log);
7039   else
7040     mysql_mutex_assert_owner(&LOCK_log);
7041   DBUG_EXECUTE_IF("semi_sync_3-way_deadlock",
7042                   DEBUG_SYNC(current_thd, "before_rotate_binlog"););
7043   mysql_mutex_lock(&LOCK_xids);
7044   /*
7045     We need to ensure that the number of prepared XIDs are 0.
7046 
7047     If m_prep_xids is not zero:
7048     - We wait for storage engine commit, hence decrease m_prep_xids
7049     - We keep the LOCK_log to block new transactions from being
7050       written to the binary log.
7051    */
7052   while (get_prep_xids() > 0)
7053   {
7054     DEBUG_SYNC(current_thd, "before_rotate_binlog_file");
7055     mysql_cond_wait(&m_prep_xids_cond, &LOCK_xids);
7056   }
7057   mysql_mutex_unlock(&LOCK_xids);
7058 
7059   mysql_mutex_lock(&LOCK_index);
7060 
7061   mysql_mutex_assert_owner(&LOCK_log);
7062   mysql_mutex_assert_owner(&LOCK_index);
7063 
7064 
7065   if (DBUG_EVALUATE_IF("expire_logs_always", 0, 1)
7066       && (error= ha_flush_logs(NULL)))
7067     goto end;
7068 
7069   if (!is_relay_log)
7070   {
7071     /* Save set of GTIDs of the last binlog into table on binlog rotation */
7072     if ((error= gtid_state->save_gtids_of_last_binlog_into_table(true)))
7073     {
7074       close_on_error= true;
7075       goto end;
7076     }
7077   }
7078 
7079   /*
7080     If user hasn't specified an extension, generate a new log name
7081     We have to do this here and not in open as we want to store the
7082     new file name in the current binary log file.
7083   */
7084   new_name_ptr= new_name;
7085   if ((error= generate_new_name(new_name, name)))
7086   {
7087     // Use the old name if generation of new name fails.
7088     strcpy(new_name, name);
7089     close_on_error= TRUE;
7090     goto end;
7091   }
7092   /*
7093     Make sure that the log_file is initialized before writing
7094     Rotate_log_event into it.
7095   */
7096   if (log_file.alloced_buffer)
7097   {
7098     /*
7099       We log the whole file name for log file as the user may decide
7100       to change base names at some point.
7101     */
7102     Rotate_log_event r(new_name+dirname_length(new_name), 0, LOG_EVENT_OFFSET,
7103                        is_relay_log ? Rotate_log_event::RELAY_LOG : 0);
7104     /*
7105       The current relay-log's closing Rotate event must have checksum
7106       value computed with an algorithm of the last relay-logged FD event.
7107     */
7108     if (is_relay_log)
7109       (r.common_footer)->checksum_alg= relay_log_checksum_alg;
7110     assert(!is_relay_log || relay_log_checksum_alg !=
7111            binary_log::BINLOG_CHECKSUM_ALG_UNDEF);
7112     if(DBUG_EVALUATE_IF("fault_injection_new_file_rotate_event",
7113                         (error=1), FALSE) ||
7114        (error= r.write(&log_file)))
7115     {
7116       char errbuf[MYSYS_STRERROR_SIZE];
7117       DBUG_EXECUTE_IF("fault_injection_new_file_rotate_event", errno=2;);
7118       close_on_error= true;
7119       my_printf_error(ER_ERROR_ON_WRITE, ER(ER_CANT_OPEN_FILE),
7120                       MYF(ME_FATALERROR), name,
7121                       errno, my_strerror(errbuf, sizeof(errbuf), errno));
7122       goto end;
7123     }
7124     bytes_written += r.common_header->data_written;
7125   }
7126 
7127   if ((error= flush_io_cache(&log_file)))
7128   {
7129     close_on_error= true;
7130     goto end;
7131   }
7132 
7133   DEBUG_SYNC(current_thd, "after_rotate_event_appended");
7134 
7135   old_name=name;
7136   name=0;				// Don't free name
7137   close(LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX,
7138         false/*need_lock_log=false*/,
7139         false/*need_lock_index=false*/);
7140 
7141   if (checksum_alg_reset != binary_log::BINLOG_CHECKSUM_ALG_UNDEF)
7142   {
7143     assert(!is_relay_log);
7144     assert(binlog_checksum_options != checksum_alg_reset);
7145     binlog_checksum_options= checksum_alg_reset;
7146   }
7147   /*
7148      Note that at this point, log_state != LOG_CLOSED (important for is_open()).
7149   */
7150 
7151   DEBUG_SYNC(current_thd, "before_rotate_binlog_file");
7152   /*
7153      new_file() is only used for rotation (in FLUSH LOGS or because size >
7154      max_binlog_size or max_relay_log_size).
7155      If this is a binary log, the Format_description_log_event at the beginning of
7156      the new file should have created=0 (to distinguish with the
7157      Format_description_log_event written at server startup, which should
7158      trigger temp tables deletion on slaves.
7159   */
7160 
7161   /* reopen index binlog file, BUG#34582 */
7162   file_to_open= index_file_name;
7163   error= open_index_file(index_file_name, 0, false/*need_lock_index=false*/);
7164   if (!error)
7165   {
7166     /* reopen the binary log file. */
7167     file_to_open= new_name_ptr;
7168     error= open_binlog(old_name, new_name_ptr,
7169                        max_size, true/*null_created_arg=true*/,
7170                        false/*need_lock_index=false*/,
7171                        true/*need_sid_lock=true*/,
7172                        extra_description_event);
7173   }
7174 
7175   /* handle reopening errors */
7176   if (error)
7177   {
7178     char errbuf[MYSYS_STRERROR_SIZE];
7179     my_printf_error(ER_CANT_OPEN_FILE, ER(ER_CANT_OPEN_FILE),
7180                     MYF(ME_FATALERROR), file_to_open,
7181                     error, my_strerror(errbuf, sizeof(errbuf), error));
7182     close_on_error= true;
7183   }
7184   my_free(old_name);
7185 
7186 end:
7187 
7188   if (error && close_on_error /* rotate, flush or reopen failed */)
7189   {
7190     /*
7191       Close whatever was left opened.
7192 
7193       We are keeping the behavior as it exists today, ie,
7194       we disable logging and move on (see: BUG#51014).
7195 
7196       TODO: as part of WL#1790 consider other approaches:
7197        - kill mysql (safety);
7198        - try multiple locations for opening a log file;
7199        - switch server to protected/readonly mode
7200        - ...
7201     */
7202     if (binlog_error_action == ABORT_SERVER)
7203     {
7204       exec_binlog_error_action_abort("Either disk is full or file system is"
7205                                      " read only while rotating the binlog."
7206                                      " Aborting the server.");
7207     }
7208     else
7209       sql_print_error("Could not open %s for logging (error %d). "
7210                       "Turning logging off for the whole duration "
7211                       "of the MySQL server process. To turn it on "
7212                       "again: fix the cause, shutdown the MySQL "
7213                       "server and restart it.",
7214                       new_name_ptr, errno);
7215     close(LOG_CLOSE_INDEX, false /*need_lock_log=false*/,
7216           false/*need_lock_index=false*/);
7217   }
7218 
7219   mysql_mutex_unlock(&LOCK_index);
7220   if (need_lock_log)
7221     mysql_mutex_unlock(&LOCK_log);
7222   DEBUG_SYNC(current_thd, "after_disable_binlog");
7223   DBUG_RETURN(error);
7224 }
7225 
7226 
7227 #ifdef HAVE_REPLICATION
7228 /**
7229   Called after an event has been written to the relay log by the IO
7230   thread.  This flushes and possibly syncs the file (according to the
7231   sync options), rotates the file if it has grown over the limit, and
7232   finally calls signal_update().
7233 
7234   @note The caller must hold LOCK_log before invoking this function.
7235 
7236   @param mi Master_info for the IO thread.
7237   @param need_data_lock If true, mi->data_lock will be acquired if a
7238   rotation is needed.  Otherwise, mi->data_lock must be held by the
7239   caller.
7240 
7241   @retval false success
7242   @retval true error
7243 */
after_append_to_relay_log(Master_info * mi)7244 bool MYSQL_BIN_LOG::after_append_to_relay_log(Master_info *mi)
7245 {
7246   DBUG_ENTER("MYSQL_BIN_LOG::after_append_to_relay_log");
7247   DBUG_PRINT("info",("max_size: %lu",max_size));
7248 
7249   // Check pre-conditions
7250   mysql_mutex_assert_owner(&LOCK_log);
7251   mysql_mutex_assert_owner(&mi->data_lock);
7252   assert(is_relay_log);
7253   assert(current_thd->system_thread == SYSTEM_THREAD_SLAVE_IO);
7254 
7255   /*
7256     We allow the relay log rotation by relay log size
7257     only if the trx parser is not inside a transaction.
7258   */
7259   bool can_rotate= mi->transaction_parser.is_not_inside_transaction();
7260 
7261 #ifndef NDEBUG
7262   if ((uint) my_b_append_tell(&log_file) >
7263       DBUG_EVALUATE_IF("rotate_slave_debug_group", 500, max_size) &&
7264       !can_rotate)
7265   {
7266     DBUG_PRINT("info",("Postponing the rotation by size waiting for "
7267                        "the end of the current transaction."));
7268   }
7269 #endif
7270 
7271   // Flush and sync
7272   bool error= false;
7273   if (flush_and_sync(0) == 0 && can_rotate)
7274   {
7275     /*
7276       If the last event of the transaction has been flushed, we can add
7277       the GTID (if it is not empty) to the logged set, or else it will
7278       not be available in the Previous GTIDs of the next relay log file
7279       if we are going to rotate the relay log.
7280     */
7281     Gtid *last_gtid_queued= mi->get_last_gtid_queued();
7282     if (!last_gtid_queued->is_empty())
7283     {
7284       global_sid_lock->rdlock();
7285       mi->rli->add_logged_gtid(last_gtid_queued->sidno,
7286                                last_gtid_queued->gno);
7287       global_sid_lock->unlock();
7288       mi->clear_last_gtid_queued();
7289     }
7290 
7291     /*
7292       If relay log is too big, rotate. But only if not in the middle of a
7293       transaction when GTIDs are enabled.
7294       We now try to mimic the following master binlog behavior: "A transaction
7295       is written in one chunk to the binary log, so it is never split between
7296       several binary logs. Therefore, if you have big transactions, you might
7297       see binary log files larger than max_binlog_size."
7298     */
7299     if ((uint) my_b_append_tell(&log_file) >
7300         DBUG_EVALUATE_IF("rotate_slave_debug_group", 500, max_size))
7301     {
7302       error= new_file_without_locking(mi->get_mi_description_event());
7303     }
7304   }
7305 
7306   signal_update();
7307 
7308   DBUG_RETURN(error);
7309 }
7310 
7311 
append_event(Log_event * ev,Master_info * mi)7312 bool MYSQL_BIN_LOG::append_event(Log_event* ev, Master_info *mi)
7313 {
7314   DBUG_ENTER("MYSQL_BIN_LOG::append");
7315 
7316   // check preconditions
7317   assert(log_file.type == SEQ_READ_APPEND);
7318   assert(is_relay_log);
7319 
7320   // acquire locks
7321   mysql_mutex_lock(&LOCK_log);
7322 
7323   // write data
7324   bool error = false;
7325   if (ev->write(&log_file) == 0)
7326   {
7327     bytes_written+= ev->common_header->data_written;
7328     error= after_append_to_relay_log(mi);
7329   }
7330   else
7331     error= true;
7332 
7333   mysql_mutex_unlock(&LOCK_log);
7334   DBUG_RETURN(error);
7335 }
7336 
7337 
append_buffer(const char * buf,uint len,Master_info * mi)7338 bool MYSQL_BIN_LOG::append_buffer(const char* buf, uint len, Master_info *mi)
7339 {
7340   DBUG_ENTER("MYSQL_BIN_LOG::append_buffer");
7341 
7342   // check preconditions
7343   assert(log_file.type == SEQ_READ_APPEND);
7344   assert(is_relay_log);
7345   mysql_mutex_assert_owner(&LOCK_log);
7346 
7347   // write data
7348   bool error= false;
7349   if (my_b_append(&log_file,(uchar*) buf,len) == 0)
7350   {
7351     bytes_written += len;
7352     error= after_append_to_relay_log(mi);
7353   }
7354   else
7355     error= true;
7356 
7357   DBUG_RETURN(error);
7358 }
7359 #endif // ifdef HAVE_REPLICATION
7360 
flush_and_sync(const bool force)7361 bool MYSQL_BIN_LOG::flush_and_sync(const bool force)
7362 {
7363   mysql_mutex_assert_owner(&LOCK_log);
7364 
7365   if (flush_io_cache(&log_file))
7366     return 1;
7367 
7368   std::pair<bool, bool> result= sync_binlog_file(force);
7369 
7370   return result.first;
7371 }
7372 
start_union_events(THD * thd,query_id_t query_id_param)7373 void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param)
7374 {
7375   assert(!thd->binlog_evt_union.do_union);
7376   thd->binlog_evt_union.do_union= TRUE;
7377   thd->binlog_evt_union.unioned_events= FALSE;
7378   thd->binlog_evt_union.unioned_events_trans= FALSE;
7379   thd->binlog_evt_union.first_query_id= query_id_param;
7380 }
7381 
stop_union_events(THD * thd)7382 void MYSQL_BIN_LOG::stop_union_events(THD *thd)
7383 {
7384   assert(thd->binlog_evt_union.do_union);
7385   thd->binlog_evt_union.do_union= FALSE;
7386 }
7387 
is_query_in_union(THD * thd,query_id_t query_id_param)7388 bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
7389 {
7390   return (thd->binlog_evt_union.do_union &&
7391           query_id_param >= thd->binlog_evt_union.first_query_id);
7392 }
7393 
7394 /*
7395   Updates thd's position-of-next-event variables
7396   after a *real* write a file.
7397  */
update_thd_next_event_pos(THD * thd)7398 void MYSQL_BIN_LOG::update_thd_next_event_pos(THD* thd)
7399 {
7400   if (likely(thd != NULL))
7401   {
7402     thd->set_next_event_pos(log_file_name,
7403                             my_b_tell(&log_file));
7404   }
7405 }
7406 
7407 /*
7408   Moves the last bunch of rows from the pending Rows event to a cache (either
7409   transactional cache if is_transaction is @c true, or the non-transactional
7410   cache otherwise. Sets a new pending event.
7411 
7412   @param thd               a pointer to the user thread.
7413   @param evt               a pointer to the row event.
7414   @param is_transactional  @c true indicates a transactional cache,
7415                            otherwise @c false a non-transactional.
7416 */
7417 int
flush_and_set_pending_rows_event(THD * thd,Rows_log_event * event,bool is_transactional)7418 MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
7419                                                 Rows_log_event* event,
7420                                                 bool is_transactional)
7421 {
7422   DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)");
7423 #ifdef WITH_WSREP
7424   assert(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open());
7425 #else
7426   assert(mysql_bin_log.is_open());
7427 #endif /* WITH_WSREP */
7428   DBUG_PRINT("enter", ("event: 0x%lx", (long) event));
7429 
7430   int error= 0;
7431   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
7432 
7433   assert(cache_mngr);
7434 
7435   binlog_cache_data *cache_data=
7436     cache_mngr->get_binlog_cache_data(is_transactional);
7437 
7438   DBUG_PRINT("info", ("cache_mngr->pending(): 0x%lx", (long) cache_data->pending()));
7439 
7440   if (Rows_log_event* pending= cache_data->pending())
7441   {
7442     /*
7443       Write pending event to the cache.
7444     */
7445     if (cache_data->write_event(thd, pending))
7446     {
7447       set_write_error(thd, is_transactional);
7448       if (check_write_error(thd) && cache_data &&
7449           stmt_cannot_safely_rollback(thd))
7450         cache_data->set_incident();
7451       delete pending;
7452       cache_data->set_pending(NULL);
7453       DBUG_RETURN(1);
7454     }
7455 
7456     delete pending;
7457   }
7458 
7459   cache_data->set_pending(event);
7460 
7461   DBUG_RETURN(error);
7462 }
7463 
7464 /**
7465   Write an event to the binary log.
7466 */
7467 
write_event(Log_event * event_info)7468 bool MYSQL_BIN_LOG::write_event(Log_event *event_info)
7469 {
7470   THD *thd= event_info->thd;
7471   bool error= 1;
7472   DBUG_ENTER("MYSQL_BIN_LOG::write_event(Log_event *)");
7473 
7474   if (thd->binlog_evt_union.do_union)
7475   {
7476     /*
7477       In Stored function; Remember that function call caused an update.
7478       We will log the function call to the binary log on function exit
7479     */
7480     thd->binlog_evt_union.unioned_events= TRUE;
7481     thd->binlog_evt_union.unioned_events_trans |=
7482       event_info->is_using_trans_cache();
7483     DBUG_RETURN(0);
7484   }
7485 
7486   /*
7487     We only end the statement if we are in a top-level statement.  If
7488     we are inside a stored function, we do not end the statement since
7489     this will close all tables on the slave. But there can be a special case
7490     where we are inside a stored function/trigger and a SAVEPOINT is being
7491     set in side the stored function/trigger. This SAVEPOINT execution will
7492     force the pending event to be flushed without an STMT_END_F flag. This
7493     will result in a case where following DMLs will be considered as part of
7494     same statement and result in data loss on slave. Hence in this case we
7495     force the end_stmt to be true.
7496   */
7497   bool const end_stmt= (thd->in_sub_stmt && thd->lex->sql_command ==
7498                         SQLCOM_SAVEPOINT)? true:
7499     (thd->locked_tables_mode && thd->lex->requires_prelocking());
7500   if (thd->binlog_flush_pending_rows_event(end_stmt,
7501                                            event_info->is_using_trans_cache()))
7502     DBUG_RETURN(error);
7503 
7504   /*
7505      In most cases this is only called if 'is_open()' is true; in fact this is
7506      mostly called if is_open() *was* true a few instructions before, but it
7507      could have changed since.
7508   */
7509 #ifdef WITH_WSREP
7510   /* applier and replayer can skip writing binlog events */
7511   if ((WSREP_EMULATE_BINLOG(thd) && (thd->wsrep_exec_mode != REPL_RECV)) ||
7512       is_open())
7513 #else
7514   if (likely(is_open()))
7515 #endif
7516   {
7517 #ifdef HAVE_REPLICATION
7518     /*
7519       In the future we need to add to the following if tests like
7520       "do the involved tables match (to be implemented)
7521       binlog_[wild_]{do|ignore}_table?" (WL#1049)"
7522     */
7523     const char *local_db= event_info->get_db();
7524     if ((thd && !(thd->variables.option_bits & OPTION_BIN_LOG)) ||
7525 	(thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT &&
7526          thd->lex->sql_command != SQLCOM_SAVEPOINT &&
7527          (!event_info->is_no_filter_event() &&
7528           !binlog_filter->db_ok(local_db))))
7529       DBUG_RETURN(0);
7530 #endif /* HAVE_REPLICATION */
7531 
7532     assert(event_info->is_using_trans_cache() || event_info->is_using_stmt_cache());
7533 
7534     if (binlog_start_trans_and_stmt(thd, event_info))
7535       DBUG_RETURN(error);
7536 
7537     bool is_trans_cache= event_info->is_using_trans_cache();
7538     binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
7539     binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(is_trans_cache);
7540 
7541     DBUG_PRINT("info",("event type: %d",event_info->get_type_code()));
7542 
7543     /*
7544        No check for auto events flag here - this write method should
7545        never be called if auto-events are enabled.
7546 
7547        Write first log events which describe the 'run environment'
7548        of the SQL command. If row-based binlogging, Insert_id, Rand
7549        and other kind of "setting context" events are not needed.
7550     */
7551     if (thd)
7552     {
7553       if (!thd->is_current_stmt_binlog_format_row())
7554       {
7555         if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
7556         {
7557           Intvar_log_event e(thd,(uchar) binary_log::Intvar_event::LAST_INSERT_ID_EVENT,
7558                              thd->first_successful_insert_id_in_prev_stmt_for_binlog,
7559                              event_info->event_cache_type, event_info->event_logging_type);
7560           if (cache_data->write_event(thd, &e))
7561             goto err;
7562         }
7563         if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
7564         {
7565           DBUG_PRINT("info",("number of auto_inc intervals: %u",
7566                              thd->auto_inc_intervals_in_cur_stmt_for_binlog.
7567                              nb_elements()));
7568           Intvar_log_event e(thd, (uchar) binary_log::Intvar_event::INSERT_ID_EVENT,
7569                              thd->auto_inc_intervals_in_cur_stmt_for_binlog.
7570                              minimum(), event_info->event_cache_type,
7571                              event_info->event_logging_type);
7572           if (cache_data->write_event(thd, &e))
7573             goto err;
7574         }
7575         if (thd->rand_used)
7576         {
7577           Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2,
7578                            event_info->event_cache_type,
7579                            event_info->event_logging_type);
7580           if (cache_data->write_event(thd, &e))
7581             goto err;
7582         }
7583         if (!thd->user_var_events.empty())
7584         {
7585           for (size_t i= 0; i < thd->user_var_events.size(); i++)
7586           {
7587             BINLOG_USER_VAR_EVENT *user_var_event= thd->user_var_events[i];
7588 
7589             /* setting flags for user var log event */
7590             uchar flags= User_var_log_event::UNDEF_F;
7591             if (user_var_event->unsigned_flag)
7592               flags|= User_var_log_event::UNSIGNED_F;
7593 
7594             User_var_log_event e(thd,
7595                                  user_var_event->user_var_event->entry_name.ptr(),
7596                                  user_var_event->user_var_event->entry_name.length(),
7597                                  user_var_event->value,
7598                                  user_var_event->length,
7599                                  user_var_event->type,
7600                                  user_var_event->charset_number, flags,
7601                                  event_info->event_cache_type,
7602                                  event_info->event_logging_type);
7603             if (cache_data->write_event(thd, &e))
7604               goto err;
7605           }
7606         }
7607       }
7608     }
7609 
7610     /*
7611       Write the event.
7612     */
7613     if (cache_data->write_event(thd, event_info))
7614       goto err;
7615 
7616     if (DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
7617       goto err;
7618 
7619     /*
7620       After writing the event, if the trx-cache was used and any unsafe
7621       change was written into it, the cache is marked as cannot safely
7622       roll back.
7623     */
7624     if (is_trans_cache && stmt_cannot_safely_rollback(thd))
7625       cache_mngr->trx_cache.set_cannot_rollback();
7626 
7627     error= 0;
7628 
7629 err:
7630     if (error)
7631     {
7632       set_write_error(thd, is_trans_cache);
7633       if (check_write_error(thd) && cache_data &&
7634           stmt_cannot_safely_rollback(thd))
7635         cache_data->set_incident();
7636     }
7637   }
7638 
7639   DBUG_RETURN(error);
7640 }
7641 
7642 /**
7643   The method executes rotation when LOCK_log is already acquired
7644   by the caller.
7645 
7646   @param force_rotate  caller can request the log rotation
7647   @param check_purge   is set to true if rotation took place
7648 
7649   @note
7650     If rotation fails, for instance the server was unable
7651     to create a new log file, we still try to write an
7652     incident event to the current log.
7653 
7654   @note The caller must hold LOCK_log when invoking this function.
7655 
7656   @retval
7657     nonzero - error in rotating routine.
7658 */
rotate(bool force_rotate,bool * check_purge)7659 int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge)
7660 {
7661   int error= 0;
7662   DBUG_ENTER("MYSQL_BIN_LOG::rotate");
7663 #ifdef WITH_WSREP
7664   if (WSREP_ON && wsrep_to_isolation)
7665     {
7666       *check_purge= false;
7667       WSREP_DEBUG("avoiding binlog rotate due to TO isolation: %d",
7668 		  wsrep_to_isolation);
7669       DBUG_RETURN(0);
7670     }
7671 #endif
7672 
7673   assert(!is_relay_log);
7674   mysql_mutex_assert_owner(&LOCK_log);
7675 
7676   DEBUG_SYNC(current_thd,"stop_binlog_rotation_after_acquiring_lock_log");
7677 
7678   *check_purge= false;
7679 
7680   if (DBUG_EVALUATE_IF("force_rotate", 1, 0) || force_rotate ||
7681       (my_b_tell(&log_file) >= (my_off_t) max_size))
7682   {
7683     error= new_file_without_locking(NULL);
7684     *check_purge= true;
7685   }
7686   DBUG_RETURN(error);
7687 }
7688 
7689 /**
7690   The method executes logs purging routine.
7691 
7692   @retval
7693     nonzero - error in rotating routine.
7694 */
purge()7695 void MYSQL_BIN_LOG::purge()
7696 {
7697 #ifdef HAVE_REPLICATION
7698   if (expire_logs_days)
7699   {
7700     DEBUG_SYNC(current_thd, "at_purge_logs_before_date");
7701     time_t purge_time= my_time(0) - expire_logs_days*24*60*60;
7702     DBUG_EXECUTE_IF("expire_logs_always",
7703                     { purge_time= my_time(0);});
7704     if (purge_time >= 0)
7705     {
7706       /*
7707         Flush logs for storage engines, so that the last transaction
7708         is fsynced inside storage engines.
7709       */
7710       ha_flush_logs(NULL);
7711       purge_logs_before_date(purge_time, true);
7712     }
7713   }
7714 #endif
7715 }
7716 
7717 /**
7718   Execute a FLUSH LOGS statement.
7719 
7720   The method is a shortcut of @c rotate() and @c purge().
7721   LOCK_log is acquired prior to rotate and is released after it.
7722 
7723   @param force_rotate  caller can request the log rotation
7724 
7725   @retval
7726     nonzero - error in rotating routine.
7727 */
rotate_and_purge(THD * thd,bool force_rotate)7728 int MYSQL_BIN_LOG::rotate_and_purge(THD* thd, bool force_rotate)
7729 {
7730   int error= 0;
7731   DBUG_ENTER("MYSQL_BIN_LOG::rotate_and_purge");
7732   bool check_purge= false;
7733 
7734   /*
7735     FLUSH BINARY LOGS command should ignore 'read-only' and 'super_read_only'
7736     options so that it can update 'mysql.gtid_executed' replication repository
7737     table.
7738   */
7739   thd->set_skip_readonly_check();
7740   /*
7741     Wait for handlerton to insert any pending information into the binlog.
7742     For e.g. ha_ndbcluster which updates the binlog asynchronously this is
7743     needed so that the user see its own commands in the binlog.
7744   */
7745   ha_binlog_wait(thd);
7746 
7747   assert(!is_relay_log);
7748   mysql_mutex_lock(&LOCK_log);
7749   error= rotate(force_rotate, &check_purge);
7750   /*
7751     NOTE: Run purge_logs wo/ holding LOCK_log because it does not need
7752           the mutex. Otherwise causes various deadlocks.
7753   */
7754   mysql_mutex_unlock(&LOCK_log);
7755 
7756   if (!error && check_purge)
7757     purge();
7758 
7759   DBUG_RETURN(error);
7760 }
7761 
next_file_id()7762 uint MYSQL_BIN_LOG::next_file_id()
7763 {
7764   uint res;
7765   mysql_mutex_lock(&LOCK_log);
7766   res = file_id++;
7767   mysql_mutex_unlock(&LOCK_log);
7768   return res;
7769 }
7770 
7771 
get_gtid_executed(Sid_map * sid_map,Gtid_set * gtid_set)7772 int MYSQL_BIN_LOG::get_gtid_executed(Sid_map *sid_map, Gtid_set *gtid_set)
7773 {
7774   DBUG_ENTER("MYSQL_BIN_LOG::get_gtid_executed");
7775   int error= 0;
7776 
7777   mysql_mutex_lock(&mysql_bin_log.LOCK_commit);
7778   global_sid_lock->wrlock();
7779 
7780   enum_return_status return_status= global_sid_map->copy(sid_map);
7781   if (return_status != RETURN_STATUS_OK)
7782   {
7783     error= 1;
7784     goto end;
7785   }
7786 
7787   return_status= gtid_set->add_gtid_set(gtid_state->get_executed_gtids());
7788   if (return_status != RETURN_STATUS_OK)
7789     error= 1;
7790 
7791 end:
7792   global_sid_lock->unlock();
7793   mysql_mutex_unlock(&mysql_bin_log.LOCK_commit);
7794 
7795   DBUG_RETURN(error);
7796 }
7797 
7798 
7799 /**
7800   Auxiliary function to read a page from the cache and set the given
7801   buffer pointer to point to the beginning of the page and the given
7802   length pointer to point to the end of it.
7803 
7804   @param cache IO_CACHE to read from
7805   @param[OUT] buf_p Will be set to point to the beginning of the page.
7806   @param[OUT] buf_len_p Will be set to the length of the buffer.
7807 
7808   @retval false Success
7809   @retval true Error reading from the cache.
7810 */
read_cache_page(IO_CACHE * cache,uchar ** buf_p,uint32 * buf_len_p)7811 static bool read_cache_page(IO_CACHE *cache, uchar **buf_p, uint32 *buf_len_p)
7812 {
7813   assert(*buf_len_p == 0);
7814   cache->read_pos= cache->read_end;
7815   *buf_len_p= my_b_fill(cache);
7816   *buf_p= cache->read_pos;
7817   return cache->error ? true : false;
7818 }
7819 
7820 
7821 /**
7822   Write the contents of the given IO_CACHE to the binary log.
7823 
7824   The cache will be reset as a READ_CACHE to be able to read the
7825   contents from it.
7826 
7827   The data will be post-processed: see class Binlog_event_writer for
7828   details.
7829 
7830   @param cache Events will be read from this IO_CACHE.
7831   @param writer Events will be written to this Binlog_event_writer.
7832 
7833   @retval true IO error.
7834   @retval false Success.
7835 
7836   @see MYSQL_BIN_LOG::write_cache
7837 */
do_write_cache(IO_CACHE * cache,Binlog_event_writer * writer)7838 bool MYSQL_BIN_LOG::do_write_cache(IO_CACHE *cache, Binlog_event_writer *writer)
7839 {
7840   DBUG_ENTER("MYSQL_BIN_LOG::do_write_cache");
7841 
7842   DBUG_EXECUTE_IF("simulate_do_write_cache_failure",
7843                   {
7844                     /*
7845                        see binlog_cache_data::write_event() that reacts on
7846                        @c simulate_disk_full_at_flush_pending.
7847                     */
7848                     DBUG_SET("-d,simulate_do_write_cache_failure");
7849                     DBUG_RETURN(true);
7850                   });
7851 
7852 #ifndef NDEBUG
7853   uint64 expected_total_len= my_b_tell(cache);
7854 #endif
7855 
7856   DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
7857                   {
7858                     DBUG_SET("+d,simulate_file_write_error");
7859                   });
7860 
7861   if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
7862   {
7863     DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
7864                     {
7865                       DBUG_SET("-d,simulate_file_write_error");
7866                     });
7867     DBUG_RETURN(true);
7868   }
7869 
7870   uchar *buf= cache->read_pos;
7871   uint32 buf_len= my_b_bytes_in_cache(cache);
7872   uint32 event_len= 0;
7873   uchar header[LOG_EVENT_HEADER_LEN];
7874   uint32 header_len= 0;
7875 
7876   /*
7877     Each iteration of this loop processes all or a part of
7878     1) an event header or 2) an event body from the IO_CACHE.
7879   */
7880   while (true)
7881   {
7882     /**
7883       Nothing in cache: try to refill, and if cache was ended here,
7884       return success.  This code is needed even on the first iteration
7885       of the loop, because reinit_io_cache may or may not fill the
7886       first page.
7887     */
7888     if (buf_len == 0)
7889     {
7890       if (read_cache_page(cache, &buf, &buf_len))
7891       {
7892         /**
7893           @todo: this can happen in case of disk corruption in the
7894           IO_CACHE.  We may have written a half transaction (even half
7895           event) to the binlog.  We should rollback the transaction
7896           and truncate the binlog.  /Sven
7897         */
7898         assert(0);
7899       }
7900       if (buf_len == 0)
7901       {
7902         /**
7903           @todo: this can happen in case of disk corruption in the
7904           IO_CACHE.  We may have written a half transaction (even half
7905           event) to the binlog.  We should rollback the transaction
7906           and truncate the binlog.  /Sven
7907         */
7908         assert(my_b_tell(cache) == expected_total_len);
7909         /* Arrive the end of the cache */
7910         DBUG_RETURN(false);
7911       }
7912     }
7913 
7914     /* Write event header into binlog */
7915     if (event_len == 0)
7916     {
7917       /* data in the buf may be smaller than header size.*/
7918       uint32 header_incr =
7919         std::min<uint32>(LOG_EVENT_HEADER_LEN - header_len, buf_len);
7920 
7921       memcpy(header + header_len, buf, header_incr);
7922       header_len += header_incr;
7923       buf += header_incr;
7924       buf_len -= header_incr;
7925 
7926       if (header_len == LOG_EVENT_HEADER_LEN)
7927       {
7928         // Flush event header.
7929         uchar *header_p= header;
7930         if (writer->write_event_part(&header_p, &header_len, &event_len))
7931           DBUG_RETURN(true);
7932         assert(header_len == 0);
7933       }
7934     }
7935     else
7936     {
7937       /* Write all or part of the event body to binlog */
7938       if (writer->write_event_part(&buf, &buf_len, &event_len))
7939         DBUG_RETURN(true);
7940     }
7941   }
7942 }
7943 
7944 /**
7945   Writes an incident event to stmt_cache.
7946 
7947   @param ev Incident event to be written
7948   @param thd Thread variable
7949   @param need_lock_log If true, will acquire LOCK_log; otherwise the
7950   caller should already have acquired LOCK_log.
7951   @param err_msg Error message written to log file for the incident.
7952   @do_flush_and_sync If true, will call flush_and_sync(), rotate() and
7953   purge().
7954 
7955   @retval false error
7956   @retval true success
7957 */
write_incident(Incident_log_event * ev,THD * thd,bool need_lock_log,const char * err_msg,bool do_flush_and_sync)7958 bool MYSQL_BIN_LOG::write_incident(Incident_log_event *ev, THD *thd,
7959                                    bool need_lock_log, const char* err_msg,
7960                                    bool do_flush_and_sync)
7961 {
7962   uint error= 0;
7963   DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
7964   assert(err_msg);
7965 
7966   if (!is_open())
7967     DBUG_RETURN(error);
7968 
7969   // @todo make this work with the group log. /sven
7970   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
7971 
7972   /*
7973     thd->cache_mngr may be uninitialized when first transaction resulted in an
7974     incident. If there is no cache manager exists for the session, then we
7975     create one, so that a GTID is generated and is written prior to flushing
7976     the stmt_cache.
7977   */
7978   if (cache_mngr == NULL)
7979   {
7980     if (thd->binlog_setup_trx_data() ||
7981         DBUG_EVALUATE_IF("simulate_cache_creation_failure", 1, 0))
7982     {
7983       enum_gtid_mode gtid_mode= get_gtid_mode(GTID_MODE_LOCK_NONE);
7984       if (gtid_mode == GTID_MODE_ON || gtid_mode == GTID_MODE_ON_PERMISSIVE)
7985       {
7986         const char *mode= gtid_mode == GTID_MODE_ON ? "ON" : "ON_PERMISSIVE";
7987         std::ostringstream message;
7988 
7989         message << "Could not create IO cache while writing an incident event "
7990                    "to the binary log for query: '"<< thd->query().str <<
7991                    "'. Since GTID_MODE= " << mode <<", server is unable "
7992                    "to proceed with logging.";
7993         handle_binlog_flush_or_sync_error(thd, true, message.str().c_str());
7994         DBUG_RETURN(true);
7995       }
7996     }
7997     else
7998       cache_mngr= thd_get_cache_mngr(thd);
7999   }
8000 
8001 #ifndef NDEBUG
8002   if (DBUG_EVALUATE_IF("simulate_write_incident_event_into_binlog_directly",
8003                        1, 0) && !cache_mngr->stmt_cache.is_binlog_empty())
8004   {
8005     /* The stmt_cache contains corruption data, so we can reset it. */
8006     cache_mngr->stmt_cache.reset();
8007   }
8008 #endif
8009 
8010   /*
8011     If there is no binlog cache then we write incidents directly
8012     into the binlog. If caller needs GTIDs it has to setup the
8013     binlog cache (for the injector thread).
8014   */
8015   if (cache_mngr == NULL ||
8016       DBUG_EVALUATE_IF("simulate_write_incident_event_into_binlog_directly",
8017                        1, 0))
8018   {
8019     if (need_lock_log)
8020       mysql_mutex_lock(&LOCK_log);
8021     else
8022       mysql_mutex_assert_owner(&LOCK_log);
8023     /* Write an incident event into binlog directly. */
8024     error= ev->write(&log_file);
8025     /*
8026       Write an error to log. So that user might have a chance
8027       to be alerted and explore incident details.
8028     */
8029     if (!error)
8030       sql_print_error("%s An incident event has been written to the binary "
8031                       "log which will stop the slaves.", err_msg);
8032   }
8033   else // (cache_mngr != NULL)
8034   {
8035     if (!cache_mngr->stmt_cache.is_binlog_empty())
8036     {
8037       /* The stmt_cache contains corruption data, so we can reset it. */
8038       cache_mngr->stmt_cache.reset();
8039     }
8040     if (!cache_mngr->trx_cache.is_binlog_empty())
8041     {
8042       /* The trx_cache contains corruption data, so we can reset it. */
8043       cache_mngr->trx_cache.reset();
8044     }
8045     /*
8046       Write the incident event into stmt_cache, so that a GTID is generated and
8047       written for it prior to flushing the stmt_cache.
8048     */
8049     binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(false);
8050     if ((error= cache_data->write_event(thd, ev)))
8051     {
8052       sql_print_error("Failed to write an incident event into stmt_cache.");
8053       cache_mngr->stmt_cache.reset();
8054       DBUG_RETURN(error);
8055     }
8056 
8057     if (need_lock_log)
8058       mysql_mutex_lock(&LOCK_log);
8059     else
8060       mysql_mutex_assert_owner(&LOCK_log);
8061   }
8062 
8063   if (do_flush_and_sync)
8064   {
8065     if (!error && !(error= flush_and_sync()))
8066     {
8067       bool check_purge= false;
8068       update_binlog_end_pos();
8069       is_rotating_caused_by_incident= true;
8070       error= rotate(true, &check_purge);
8071       is_rotating_caused_by_incident= false;
8072       if (!error && check_purge)
8073         purge();
8074     }
8075   }
8076 
8077   if (need_lock_log)
8078     mysql_mutex_unlock(&LOCK_log);
8079 
8080   /*
8081     Write an error to log. So that user might have a chance
8082     to be alerted and explore incident details.
8083   */
8084   if (!error && cache_mngr != NULL)
8085     sql_print_error("%s An incident event has been written to the binary "
8086                     "log which will stop the slaves.", err_msg);
8087 
8088   DBUG_RETURN(error);
8089 }
8090 
write_stmt_directly(THD * thd,const char * stmt,size_t stmt_len,enum_sql_command sql_command)8091 bool MYSQL_BIN_LOG::write_stmt_directly(THD* thd, const char *stmt, size_t stmt_len,
8092                                        enum_sql_command sql_command)
8093 {
8094   bool ret= false;
8095   /* backup the original command */
8096   enum_sql_command save_sql_command= thd->lex->sql_command;
8097   thd->lex->sql_command= sql_command;
8098 
8099   if (thd->binlog_query(THD::STMT_QUERY_TYPE, stmt, stmt_len,
8100                         FALSE, FALSE, FALSE, 0) ||
8101       commit(thd, false) != TC_LOG::RESULT_SUCCESS)
8102   {
8103     ret= true;
8104   }
8105 
8106   thd->lex->sql_command= save_sql_command;
8107   return ret;
8108 }
8109 
8110 
8111 /**
8112   Creates an incident event and writes it to the binary log.
8113 
8114   @param thd  Thread variable
8115   @param ev   Incident event to be written
8116   @param err_msg Error message written to log file for the incident.
8117   @param lock If the binary lock should be locked or not
8118 
8119   @retval
8120     0    error
8121   @retval
8122     1    success
8123 */
write_incident(THD * thd,bool need_lock_log,const char * err_msg,bool do_flush_and_sync)8124 bool MYSQL_BIN_LOG::write_incident(THD *thd, bool need_lock_log,
8125                                    const char* err_msg,
8126                                    bool do_flush_and_sync)
8127 {
8128   DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
8129 
8130   if (!is_open())
8131     DBUG_RETURN(0);
8132 
8133   LEX_STRING write_error_msg= {(char*) err_msg, strlen(err_msg)};
8134   binary_log::Incident_event::enum_incident incident=
8135                               binary_log::Incident_event::INCIDENT_LOST_EVENTS;
8136   Incident_log_event ev(thd, incident, write_error_msg);
8137 
8138   DBUG_RETURN(write_incident(&ev, thd, need_lock_log, err_msg,
8139                              do_flush_and_sync));
8140 }
8141 
8142 
8143 /**
8144   Write the contents of the statement or transaction cache to the binary log.
8145 
8146   Comparison with do_write_cache:
8147 
8148   - do_write_cache is a lower-level function that only performs the
8149     actual write.
8150 
8151   - write_cache is a higher-level function that calls do_write_cache
8152     and additionally performs some maintenance tasks, including:
8153     - report any errors that occurred
8154     - write incident event if needed
8155     - update gtid_state
8156     - update thd.binlog_next_event_pos
8157 
8158   @param thd Thread variable
8159 
8160   @param cache_data Events will be read from the IO_CACHE of this
8161   cache_data object.
8162 
8163   @param writer Events will be written to this Binlog_event_writer.
8164 
8165   @retval true IO error.
8166   @retval false Success.
8167 
8168   @note We only come here if there is something in the cache.
8169   @note Whatever is in the cache is always a complete transaction.
8170   @note 'cache' needs to be reinitialized after this functions returns.
8171 */
write_cache(THD * thd,binlog_cache_data * cache_data,Binlog_event_writer * writer)8172 bool MYSQL_BIN_LOG::write_cache(THD *thd, binlog_cache_data *cache_data,
8173                                 Binlog_event_writer *writer)
8174 {
8175   DBUG_ENTER("MYSQL_BIN_LOG::write_cache(THD *, binlog_cache_data *, bool)");
8176 #ifdef WITH_WSREP
8177   if (WSREP_EMULATE_BINLOG(thd)) DBUG_RETURN(0);
8178 #endif /* WITH_WSREP */
8179 
8180   IO_CACHE *cache= &cache_data->cache_log;
8181   bool incident= cache_data->has_incident();
8182 
8183   mysql_mutex_assert_owner(&LOCK_log);
8184 
8185   assert(is_open());
8186   if (likely(is_open()))                       // Should always be true
8187   {
8188     /*
8189       We only bother to write to the binary log if there is anything
8190       to write.
8191 
8192       @todo Is this check redundant? Probably this is only called if
8193       there is anything in the cache (see @note in comment above this
8194       function). Check if we can replace this by an assertion. /Sven
8195     */
8196     if (my_b_tell(cache) > 0)
8197     {
8198       DBUG_EXECUTE_IF("crash_before_writing_xid",
8199                       {
8200                         if ((write_error= do_write_cache(cache, writer)))
8201                           DBUG_PRINT("info", ("error writing binlog cache: %d",
8202                                               write_error));
8203                         flush_and_sync(true);
8204                         DBUG_PRINT("info", ("crashing before writing xid"));
8205                         DBUG_SUICIDE();
8206                       });
8207       if ((write_error= do_write_cache(cache, writer)))
8208         goto err;
8209 
8210       const char* err_msg= "Non-transactional changes did not get into "
8211                            "the binlog.";
8212       if (incident && write_incident(thd, false/*need_lock_log=false*/,
8213                                      err_msg,
8214                                      false/*do_flush_and_sync==false*/))
8215         goto err;
8216 
8217       DBUG_EXECUTE_IF("half_binlogged_transaction", DBUG_SUICIDE(););
8218       if (cache->error)				// Error on read
8219       {
8220         char errbuf[MYSYS_STRERROR_SIZE];
8221         sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name,
8222                         errno, my_strerror(errbuf, sizeof(errbuf), errno));
8223         write_error= true; // Don't give more errors
8224         goto err;
8225       }
8226     }
8227     update_thd_next_event_pos(thd);
8228   }
8229 
8230   DBUG_RETURN(false);
8231 
8232 err:
8233   if (!write_error)
8234   {
8235     char errbuf[MYSYS_STRERROR_SIZE];
8236     write_error= true;
8237     sql_print_error(ER(ER_ERROR_ON_WRITE), name,
8238                     errno, my_strerror(errbuf, sizeof(errbuf), errno));
8239   }
8240 
8241   /*
8242     If the flush has failed due to ENOSPC, set the flush_error flag.
8243   */
8244   if (cache->error && thd->is_error() && my_errno() == ENOSPC)
8245   {
8246     cache_data->set_flush_error(thd);
8247   }
8248   thd->commit_error= THD::CE_FLUSH_ERROR;
8249 
8250   DBUG_RETURN(true);
8251 }
8252 
8253 
8254 /**
8255   Wait until we get a signal that the relay log has been updated.
8256 
8257   @param[in] thd        Thread variable
8258   @param[in] timeout    a pointer to a timespec;
8259                         NULL means to wait w/o timeout.
8260 
8261   @retval    0          if got signalled on update
8262   @retval    non-0      if wait timeout elapsed
8263 
8264   @note
8265     One must have a lock on LOCK_log before calling this function.
8266 */
8267 
wait_for_update_relay_log(THD * thd,const struct timespec * timeout)8268 int MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd, const struct timespec *timeout)
8269 {
8270   int ret= 0;
8271   PSI_stage_info old_stage;
8272   DBUG_ENTER("wait_for_update_relay_log");
8273 
8274   thd->ENTER_COND(&update_cond, &LOCK_log,
8275                   &stage_slave_has_read_all_relay_log,
8276                   &old_stage);
8277 
8278   if (!timeout)
8279     mysql_cond_wait(&update_cond, &LOCK_log);
8280   else
8281     ret= mysql_cond_timedwait(&update_cond, &LOCK_log,
8282                               const_cast<struct timespec *>(timeout));
8283   mysql_mutex_unlock(&LOCK_log);
8284   thd->EXIT_COND(&old_stage);
8285 
8286   DBUG_RETURN(ret);
8287 }
8288 
8289 /**
8290   Wait until we get a signal that the binary log has been updated.
8291   Applies to master only.
8292 
8293   NOTES
8294   @param[in] thd        a THD struct
8295   @param[in] timeout    a pointer to a timespec;
8296                         NULL means to wait w/o timeout.
8297   @retval    0          if got signalled on update
8298   @retval    non-0      if wait timeout elapsed
8299   @note
8300     LOCK_log must be taken before calling this function.
8301     LOCK_log is being released while the thread is waiting.
8302     LOCK_log is released by the caller.
8303 */
8304 
wait_for_update_bin_log(THD * thd,const struct timespec * timeout)8305 int MYSQL_BIN_LOG::wait_for_update_bin_log(THD* thd,
8306                                            const struct timespec *timeout)
8307 {
8308   int ret= 0;
8309   DBUG_ENTER("wait_for_update_bin_log");
8310 
8311   if (!timeout)
8312     mysql_cond_wait(&update_cond, &LOCK_binlog_end_pos);
8313   else
8314     ret= mysql_cond_timedwait(&update_cond, &LOCK_binlog_end_pos,
8315                               const_cast<struct timespec *>(timeout));
8316   DBUG_RETURN(ret);
8317 }
8318 
8319 
8320 /**
8321   Close the log file.
8322 
8323   @param exiting     Bitmask for one or more of the following bits:
8324           - LOG_CLOSE_INDEX : if we should close the index file
8325           - LOG_CLOSE_TO_BE_OPENED : if we intend to call open
8326                                      at once after close.
8327           - LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
8328 
8329   @param need_lock_log If true, this function acquires LOCK_log;
8330   otherwise the caller should already have acquired it.
8331 
8332   @param need_lock_index If true, this function acquires LOCK_index;
8333   otherwise the caller should already have acquired it.
8334 
8335   @note
8336     One can do an open on the object at once after doing a close.
8337     The internal structures are not freed until cleanup() is called
8338 */
8339 
close(uint exiting,bool need_lock_log,bool need_lock_index)8340 void MYSQL_BIN_LOG::close(uint exiting, bool need_lock_log,
8341                           bool need_lock_index)
8342 {					// One can't set log_type here!
8343   DBUG_ENTER("MYSQL_BIN_LOG::close");
8344   DBUG_PRINT("enter",("exiting: %d", (int) exiting));
8345   if (need_lock_log)
8346     mysql_mutex_lock(&LOCK_log);
8347   else
8348     mysql_mutex_assert_owner(&LOCK_log);
8349 
8350   if (log_state.atomic_get() == LOG_OPENED)
8351   {
8352 #ifdef HAVE_REPLICATION
8353     if ((exiting & LOG_CLOSE_STOP_EVENT) != 0)
8354     {
8355       /**
8356         TODO(WL#7546): Change the implementation to Stop_event after write() is
8357         moved into libbinlogevents
8358       */
8359       Stop_log_event s;
8360       // the checksumming rule for relay-log case is similar to Rotate
8361         s.common_footer->checksum_alg= is_relay_log ? relay_log_checksum_alg :
8362                                        static_cast<enum_binlog_checksum_alg>
8363                                        (binlog_checksum_options);
8364         assert(!is_relay_log ||
8365                relay_log_checksum_alg != binary_log::BINLOG_CHECKSUM_ALG_UNDEF);
8366       s.write(&log_file);
8367       bytes_written+= s.common_header->data_written;
8368       flush_io_cache(&log_file);
8369       update_binlog_end_pos();
8370     }
8371 #endif /* HAVE_REPLICATION */
8372 
8373     /* don't pwrite in a file opened with O_APPEND - it doesn't work */
8374     if (log_file.type == WRITE_CACHE)
8375     {
8376       my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
8377       my_off_t org_position= mysql_file_tell(log_file.file, MYF(0));
8378       uchar flags= 0;            // clearing LOG_EVENT_BINLOG_IN_USE_F
8379       mysql_file_pwrite(log_file.file, &flags, 1, offset, MYF(0));
8380       /*
8381         Restore position so that anything we have in the IO_cache is written
8382         to the correct position.
8383         We need the seek here, as mysql_file_pwrite() is not guaranteed to keep the
8384         original position on system that doesn't support pwrite().
8385       */
8386       mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
8387     }
8388 
8389     /* this will cleanup IO_CACHE, sync and close the file */
8390     if (log_state.atomic_get() == LOG_OPENED)
8391     {
8392       end_io_cache(&log_file);
8393 
8394       if (mysql_file_sync(log_file.file, MYF(MY_WME)) && ! write_error)
8395       {
8396         char errbuf[MYSYS_STRERROR_SIZE];
8397         write_error= 1;
8398         sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno,
8399                         my_strerror(errbuf, sizeof(errbuf), errno));
8400       }
8401 
8402       if (mysql_file_close(log_file.file, MYF(MY_WME)) && ! write_error)
8403       {
8404         char errbuf[MYSYS_STRERROR_SIZE];
8405         write_error= 1;
8406         sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno,
8407                         my_strerror(errbuf, sizeof(errbuf), errno));
8408       }
8409     }
8410 
8411     log_state.atomic_set((exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED);
8412     my_free(name);
8413     name= NULL;
8414   }
8415 
8416   /*
8417     The following test is needed even if is_open() is not set, as we may have
8418     called a not complete close earlier and the index file is still open.
8419   */
8420 
8421   if (need_lock_index)
8422     mysql_mutex_lock(&LOCK_index);
8423   else
8424     mysql_mutex_assert_owner(&LOCK_index);
8425 
8426   if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
8427   {
8428     end_io_cache(&index_file);
8429     if (mysql_file_close(index_file.file, MYF(0)) < 0 && ! write_error)
8430     {
8431       char errbuf[MYSYS_STRERROR_SIZE];
8432       write_error= 1;
8433       sql_print_error(ER(ER_ERROR_ON_WRITE), index_file_name,
8434                       errno, my_strerror(errbuf, sizeof(errbuf), errno));
8435     }
8436   }
8437 
8438   if (need_lock_index)
8439     mysql_mutex_unlock(&LOCK_index);
8440 
8441   log_state.atomic_set((exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED);
8442   my_free(name);
8443   name= NULL;
8444 
8445   if (need_lock_log)
8446     mysql_mutex_unlock(&LOCK_log);
8447 
8448   DBUG_VOID_RETURN;
8449 }
8450 
harvest_bytes_written(Relay_log_info * rli,bool need_log_space_lock)8451 void MYSQL_BIN_LOG::harvest_bytes_written(Relay_log_info* rli, bool need_log_space_lock)
8452 {
8453 #ifndef NDEBUG
8454   char buf1[22],buf2[22];
8455 #endif
8456   DBUG_ENTER("harvest_bytes_written");
8457   if (need_log_space_lock)
8458     mysql_mutex_lock(&rli->log_space_lock);
8459   else
8460     mysql_mutex_assert_owner(&rli->log_space_lock);
8461   rli->log_space_total+= bytes_written;
8462   DBUG_PRINT("info",("relay_log_space: %s  bytes_written: %s",
8463         llstr(rli->log_space_total,buf1), llstr(bytes_written,buf2)));
8464   bytes_written=0;
8465   if (need_log_space_lock)
8466     mysql_mutex_unlock(&rli->log_space_lock);
8467   DBUG_VOID_RETURN;
8468 }
8469 
set_max_size(ulong max_size_arg)8470 void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg)
8471 {
8472   /*
8473     We need to take locks, otherwise this may happen:
8474     new_file() is called, calls open(old_max_size), then before open() starts,
8475     set_max_size() sets max_size to max_size_arg, then open() starts and
8476     uses the old_max_size argument, so max_size_arg has been overwritten and
8477     it's like if the SET command was never run.
8478   */
8479   DBUG_ENTER("MYSQL_BIN_LOG::set_max_size");
8480   mysql_mutex_lock(&LOCK_log);
8481   if (is_open())
8482     max_size= max_size_arg;
8483   mysql_mutex_unlock(&LOCK_log);
8484   DBUG_VOID_RETURN;
8485 }
8486 
8487 /****** transaction coordinator log for 2pc - binlog() based solution ******/
8488 
8489 /**
8490   @todo
8491   keep in-memory list of prepared transactions
8492   (add to list in log(), remove on unlog())
8493   and copy it to the new binlog if rotated
8494   but let's check the behaviour of tc_log_page_waits first!
8495 */
8496 
open_binlog(const char * opt_name)8497 int MYSQL_BIN_LOG::open_binlog(const char *opt_name)
8498 {
8499   LOG_INFO log_info;
8500   int      error= 1;
8501 
8502   /*
8503     This function is used for 2pc transaction coordination.  Hence, it
8504     is never used for relay logs.
8505   */
8506   assert(!is_relay_log);
8507   assert(total_ha_2pc > 1 || (1 == total_ha_2pc && opt_bin_log));
8508   assert(opt_name && opt_name[0]);
8509 
8510   if (!my_b_inited(&index_file))
8511   {
8512     /* There was a failure to open the index file, can't open the binlog */
8513     cleanup();
8514     return 1;
8515   }
8516 
8517   if (using_heuristic_recover())
8518   {
8519     /* generate a new binlog to mask a corrupted one */
8520     mysql_mutex_lock(&LOCK_log);
8521     open_binlog(opt_name, 0, max_binlog_size, false,
8522                 true/*need_lock_index=true*/,
8523                 true/*need_sid_lock=true*/,
8524                 NULL);
8525     mysql_mutex_unlock(&LOCK_log);
8526     cleanup();
8527     return 1;
8528   }
8529 
8530   if ((error= find_log_pos(&log_info, NullS, true/*need_lock_index=true*/)))
8531   {
8532     if (error != LOG_INFO_EOF)
8533       sql_print_error("find_log_pos() failed (error: %d)", error);
8534     else
8535       error= 0;
8536     goto err;
8537   }
8538 
8539   {
8540     const char *errmsg;
8541     IO_CACHE    log;
8542     File        file;
8543     Log_event  *ev=0;
8544     Format_description_log_event fdle(BINLOG_VERSION);
8545     char        log_name[FN_REFLEN];
8546     my_off_t    valid_pos= 0;
8547     my_off_t    binlog_size;
8548     MY_STAT     s;
8549 
8550     if (! fdle.is_valid())
8551       goto err;
8552 
8553     do
8554     {
8555       strmake(log_name, log_info.log_file_name, sizeof(log_name)-1);
8556     } while (!(error= find_next_log(&log_info, true/*need_lock_index=true*/)));
8557 
8558     if (error !=  LOG_INFO_EOF)
8559     {
8560       sql_print_error("find_log_pos() failed (error: %d)", error);
8561       goto err;
8562     }
8563 
8564     if ((file= open_binlog_file(&log, log_name, &errmsg)) < 0)
8565     {
8566       sql_print_error("%s", errmsg);
8567       goto err;
8568     }
8569 
8570     my_stat(log_name, &s, MYF(0));
8571     binlog_size= s.st_size;
8572 
8573     /*
8574       If the binary log was not properly closed it means that the server
8575       may have crashed. In that case, we need to call MYSQL_BIN_LOG::recover
8576       to:
8577 
8578         a) collect logged XIDs;
8579         b) complete the 2PC of the pending XIDs;
8580         c) collect the last valid position.
8581 
8582       Therefore, we do need to iterate over the binary log, even if
8583       total_ha_2pc == 1, to find the last valid group of events written.
8584       Later we will take this value and truncate the log if need be.
8585     */
8586     if ((ev= Log_event::read_log_event(&log, 0, &fdle,
8587                                        opt_master_verify_checksum)) &&
8588         ev->get_type_code() == binary_log::FORMAT_DESCRIPTION_EVENT &&
8589         (ev->common_header->flags & LOG_EVENT_BINLOG_IN_USE_F ||
8590          DBUG_EVALUATE_IF("eval_force_bin_log_recovery", true, false)))
8591     {
8592       sql_print_information("Recovering after a crash using %s", opt_name);
8593       valid_pos= my_b_tell(&log);
8594       error= recover(&log, (Format_description_log_event *)ev, &valid_pos);
8595     }
8596     else
8597       error=0;
8598 
8599     delete ev;
8600     end_io_cache(&log);
8601     mysql_file_close(file, MYF(MY_WME));
8602 
8603     if (error)
8604       goto err;
8605 
8606     /* Trim the crashed binlog file to last valid transaction
8607       or event (non-transaction) base on valid_pos. */
8608     if (valid_pos > 0)
8609     {
8610       if ((file= mysql_file_open(key_file_binlog, log_name,
8611                                  O_RDWR | O_BINARY, MYF(MY_WME))) < 0)
8612       {
8613         sql_print_error("Failed to open the crashed binlog file "
8614                         "when master server is recovering it.");
8615         return -1;
8616       }
8617 
8618       /* Change binlog file size to valid_pos */
8619       if (valid_pos < binlog_size)
8620       {
8621         if (my_chsize(file, valid_pos, 0, MYF(MY_WME)))
8622         {
8623           sql_print_error("Failed to trim the crashed binlog file "
8624                           "when master server is recovering it.");
8625           mysql_file_close(file, MYF(MY_WME));
8626           return -1;
8627         }
8628         else
8629         {
8630           sql_print_information("Crashed binlog file %s size is %llu, "
8631                                 "but recovered up to %llu. Binlog trimmed to %llu bytes.",
8632                                 log_name, binlog_size, valid_pos, valid_pos);
8633         }
8634       }
8635 
8636       /* Clear LOG_EVENT_BINLOG_IN_USE_F */
8637       my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
8638       uchar flags= 0;
8639       if (mysql_file_pwrite(file, &flags, 1, offset, MYF(0)) != 1)
8640       {
8641         sql_print_error("Failed to clear LOG_EVENT_BINLOG_IN_USE_F "
8642                         "for the crashed binlog file when master "
8643                         "server is recovering it.");
8644         mysql_file_close(file, MYF(MY_WME));
8645         return -1;
8646       }
8647 
8648       mysql_file_close(file, MYF(MY_WME));
8649     } //end if
8650   }
8651 
8652 err:
8653   return error;
8654 }
8655 
8656 /** This is called on shutdown, after ha_panic. */
close()8657 void MYSQL_BIN_LOG::close()
8658 {
8659 }
8660 
8661 /*
8662   Prepare the transaction in the transaction coordinator.
8663 
8664   This function will prepare the transaction in the storage engines
8665   (by calling @c ha_prepare_low) what will write a prepare record
8666   to the log buffers.
8667 
8668   @retval 0    success
8669   @retval 1    error
8670 */
prepare(THD * thd,bool all)8671 int MYSQL_BIN_LOG::prepare(THD *thd, bool all)
8672 {
8673   DBUG_ENTER("MYSQL_BIN_LOG::prepare");
8674 
8675   assert(opt_bin_log);
8676   /*
8677     The applier thread explicitly overrides the value of sql_log_bin
8678     with the value of log_slave_updates.
8679   */
8680 #ifdef WITH_WSREP
8681   assert(thd->wsrep_applier || (thd->slave_thread ?
8682          opt_log_slave_updates : true));
8683 #else
8684   assert(thd->slave_thread ?
8685          opt_log_slave_updates : thd->variables.sql_log_bin);
8686 #endif /* WITH_WSREP */
8687 
8688   /*
8689     Set HA_IGNORE_DURABILITY to not flush the prepared record of the
8690     transaction to the log of storage engine (for example, InnoDB
8691     redo log) during the prepare phase. So that we can flush prepared
8692     records of transactions to the log of storage engine in a group
8693     right before flushing them to binary log during binlog group
8694     commit flush stage. Reset to HA_REGULAR_DURABILITY at the
8695     beginning of parsing next command.
8696   */
8697   thd->durability_property= HA_IGNORE_DURABILITY;
8698 
8699   int error= ha_prepare_low(thd, all);
8700 
8701   DBUG_RETURN(error);
8702 }
8703 
8704 /**
8705   Commit the transaction in the transaction coordinator.
8706 
8707   This function will commit the sessions transaction in the binary log
8708   and in the storage engines (by calling @c ha_commit_low). If the
8709   transaction was successfully logged (or not successfully unlogged)
8710   but the commit in the engines did not succed, there is a risk of
8711   inconsistency between the engines and the binary log.
8712 
8713   For binary log group commit, the commit is separated into three
8714   parts:
8715 
8716   1. First part consists of filling the necessary caches and
8717      finalizing them (if they need to be finalized). After this,
8718      nothing is added to any of the caches.
8719 
8720   2. Second part execute an ordered flush and commit. This will be
8721      done using the group commit functionality in ordered_commit.
8722 
8723   3. Third part checks any errors resulting from the ordered commit
8724      and handles them appropriately.
8725 
8726   @retval RESULT_SUCCESS   success
8727   @retval RESULT_ABORTED   error, transaction was neither logged nor committed
8728   @retval RESULT_INCONSISTENT  error, transaction was logged but not committed
8729 */
commit(THD * thd,bool all)8730 TC_LOG::enum_result MYSQL_BIN_LOG::commit(THD *thd, bool all)
8731 {
8732   DBUG_ENTER("MYSQL_BIN_LOG::commit");
8733   DBUG_PRINT("info", ("query='%s'",
8734                       thd == current_thd ? thd->query().str : NULL));
8735   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
8736   Transaction_ctx *trn_ctx= thd->get_transaction();
8737 #ifdef WITH_WSREP
8738   my_xid xid= (wsrep_is_wsrep_xid(trn_ctx->xid_state()->get_xid()) ?
8739                wsrep_xid_seqno(  (*trn_ctx->xid_state()->get_xid()) ) :
8740                trn_ctx->xid_state()->get_xid()->get_my_xid());
8741 #else
8742   my_xid xid= trn_ctx->xid_state()->get_xid()->get_my_xid();
8743 #endif /* WITH_WSREP */
8744   bool stmt_stuff_logged= false;
8745   bool trx_stuff_logged= false;
8746   bool skip_commit= is_loggable_xa_prepare(thd);
8747 
8748   DBUG_PRINT("enter", ("thd: 0x%llx, all: %s, xid: %llu, cache_mngr: 0x%llx",
8749                        (ulonglong) thd, YESNO(all), (ulonglong) xid,
8750                        (ulonglong) cache_mngr));
8751 
8752   /*
8753     No cache manager means nothing to log, but we still have to commit
8754     the transaction.
8755    */
8756   if (cache_mngr == NULL)
8757   {
8758     if (!skip_commit && ha_commit_low(thd, all))
8759       DBUG_RETURN(RESULT_ABORTED);
8760     DBUG_RETURN(RESULT_SUCCESS);
8761   }
8762 
8763   Transaction_ctx::enum_trx_scope trx_scope=  all ? Transaction_ctx::SESSION :
8764                                                     Transaction_ctx::STMT;
8765 
8766   DBUG_PRINT("debug", ("in_transaction: %s, no_2pc: %s, rw_ha_count: %d",
8767                        YESNO(thd->in_multi_stmt_transaction_mode()),
8768                        YESNO(trn_ctx->no_2pc(trx_scope)),
8769                        trn_ctx->rw_ha_count(trx_scope)));
8770   DBUG_PRINT("debug",
8771              ("all.cannot_safely_rollback(): %s, trx_cache_empty: %s",
8772               YESNO(trn_ctx->cannot_safely_rollback(Transaction_ctx::SESSION)),
8773               YESNO(cache_mngr->trx_cache.is_binlog_empty())));
8774   DBUG_PRINT("debug",
8775              ("stmt.cannot_safely_rollback(): %s, stmt_cache_empty: %s",
8776               YESNO(trn_ctx->cannot_safely_rollback(Transaction_ctx::STMT)),
8777               YESNO(cache_mngr->stmt_cache.is_binlog_empty())));
8778 
8779 
8780   /*
8781     If there are no handlertons registered, there is nothing to
8782     commit. Note that DDLs are written earlier in this case (inside
8783     binlog_query).
8784 
8785     TODO: This can be a problem in those cases that there are no
8786     handlertons registered. DDLs are one example, but the other case
8787     is MyISAM. In this case, we could register a dummy handlerton to
8788     trigger the commit.
8789 
8790     Any statement that requires logging will call binlog_query before
8791     trans_commit_stmt, so an alternative is to use the condition
8792     "binlog_query called or stmt.ha_list != 0".
8793    */
8794   if (!all && !trn_ctx->is_active(trx_scope) &&
8795       cache_mngr->stmt_cache.is_binlog_empty())
8796     DBUG_RETURN(RESULT_SUCCESS);
8797 
8798   if (thd->lex->sql_command == SQLCOM_XA_COMMIT)
8799   {
8800     /* The Commit phase of the XA two phase logging. */
8801 
8802     bool one_phase= get_xa_opt(thd) == XA_ONE_PHASE;
8803     assert(all);
8804     assert(!skip_commit || one_phase);
8805 
8806     int err= 0;
8807     XID_STATE *xs= thd->get_transaction()->xid_state();
8808     /*
8809       XA COMMIT ONE PHASE statement which has not gone through the binary log
8810       prepare phase, has to end the active XA transaction with appropriate XA
8811       END followed by XA COMMIT ONE PHASE.
8812 
8813       The state of XA transaction is changed to PREPARED after the prepare
8814       phase, intermediately in ha_commit_trans code for the interest of
8815       binlogger. Hence check that the XA COMMIT ONE PHASE is set to 'PREPARE'
8816       and it has not already been written to binary log. For such transaction
8817       write the appropriate XA END statement.
8818     */
8819     if (!(is_loggable_xa_prepare(thd))
8820         && one_phase
8821         && !(xs->is_binlogged())
8822         && !cache_mngr->trx_cache.is_binlog_empty())
8823     {
8824       XA_prepare_log_event end_evt(thd, xs->get_xid(), one_phase);
8825       err= cache_mngr->trx_cache.finalize(thd, &end_evt, xs);
8826       if (err)
8827       {
8828         DBUG_RETURN(RESULT_ABORTED);
8829       }
8830       trx_stuff_logged= true;
8831       thd->get_transaction()->xid_state()->set_binlogged();
8832     }
8833     if (DBUG_EVALUATE_IF("simulate_xa_commit_log_failure", true,
8834                          do_binlog_xa_commit_rollback(thd, xs->get_xid(),
8835                                                       true)))
8836       DBUG_RETURN(RESULT_ABORTED);
8837   }
8838 
8839   /*
8840     If there is anything in the stmt cache, and GTIDs are enabled,
8841     then this is a single statement outside a transaction and it is
8842     impossible that there is anything in the trx cache.  Hence, we
8843     write any empty group(s) to the stmt cache.
8844 
8845     Otherwise, we write any empty group(s) to the trx cache at the end
8846     of the transaction.
8847   */
8848   if (!cache_mngr->stmt_cache.is_binlog_empty())
8849   {
8850     /*
8851       Commit parent identification of non-transactional query has
8852       been deferred until now, except for the mixed transaction case.
8853     */
8854     trn_ctx->store_commit_parent(m_dependency_tracker.get_max_committed_timestamp());
8855     if (cache_mngr->stmt_cache.finalize(thd))
8856       DBUG_RETURN(RESULT_ABORTED);
8857     stmt_stuff_logged= true;
8858   }
8859 
8860   /*
8861     We commit the transaction if:
8862      - We are not in a transaction and committing a statement, or
8863      - We are in a transaction and a full transaction is committed.
8864     Otherwise, we accumulate the changes.
8865   */
8866   if (!cache_mngr->trx_cache.is_binlog_empty() &&
8867       ending_trans(thd, all) && !trx_stuff_logged)
8868   {
8869     const bool real_trans=
8870       (all || !trn_ctx->is_active(Transaction_ctx::SESSION));
8871 
8872     /*
8873       We are committing an XA transaction if it is a "real" transaction
8874       and has an XID assigned (because some handlerton registered). A
8875       transaction is "real" if either 'all' is true or the 'all.ha_list'
8876       is empty.
8877 
8878       Note: This is kind of strange since registering the binlog
8879       handlerton will then make the transaction XA, which is not really
8880       true. This occurs for example if a MyISAM statement is executed
8881       with row-based replication on.
8882     */
8883     if (is_loggable_xa_prepare(thd))
8884     {
8885       /* The prepare phase of XA transaction two phase logging. */
8886       int err= 0;
8887       bool one_phase= get_xa_opt(thd) == XA_ONE_PHASE;
8888 
8889       assert(thd->lex->sql_command != SQLCOM_XA_COMMIT || one_phase);
8890 
8891       XID_STATE *xs= thd->get_transaction()->xid_state();
8892       XA_prepare_log_event end_evt(thd, xs->get_xid(), one_phase);
8893 
8894       assert(skip_commit);
8895 
8896       err= cache_mngr->trx_cache.finalize(thd, &end_evt, xs);
8897       if (err ||
8898           (DBUG_EVALUATE_IF("simulate_xa_prepare_failure_in_cache_finalize",
8899                             true, false)))
8900       {
8901         DBUG_RETURN(RESULT_ABORTED);
8902       }
8903     }
8904 #ifdef WITH_WSREP
8905     /* LOAD DATA splitting sub-transactions are not properly registered
8906        and we compensate here to get the XID event to be created
8907     */
8908     else if (real_trans && xid &&
8909              ((trn_ctx->rw_ha_count(trx_scope) > 1) ||
8910               (WSREP(thd) && thd->lex->sql_command == SQLCOM_LOAD)) &&
8911 #else
8912     else if (real_trans && xid && trn_ctx->rw_ha_count(trx_scope) > 1 &&
8913 #endif /* WITH_WSREP */
8914              !trn_ctx->no_2pc(trx_scope))
8915     {
8916       Xid_log_event end_evt(thd, xid);
8917       if (cache_mngr->trx_cache.finalize(thd, &end_evt))
8918         DBUG_RETURN(RESULT_ABORTED);
8919     }
8920     else
8921     {
8922       Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
8923                               true, FALSE, TRUE, 0, TRUE);
8924       if (cache_mngr->trx_cache.finalize(thd, &end_evt))
8925         DBUG_RETURN(RESULT_ABORTED);
8926     }
8927     trx_stuff_logged= true;
8928   }
8929 
8930   /*
8931     This is part of the stmt rollback.
8932   */
8933   if (!all)
8934     cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
8935 
8936   /*
8937     Now all the events are written to the caches, so we will commit
8938     the transaction in the engines. This is done using the group
8939     commit logic in ordered_commit, which will return when the
8940     transaction is committed.
8941 
8942     If the commit in the engines fail, we still have something logged
8943     to the binary log so we have to report this as a "bad" failure
8944     (failed to commit, but logged something).
8945   */
8946   if (stmt_stuff_logged || trx_stuff_logged)
8947   {
8948 #ifdef WITH_WSREP
8949     if (WSREP_ON && thd->wsrep_replicate_GTID &&
8950         wsrep_replicate_GTID(thd))
8951     {
8952       /* GTID replication failed */
8953       DBUG_RETURN(RESULT_ABORTED);
8954     }
8955 #endif /* WITH_WSREP */
8956     if (RUN_HOOK(transaction,
8957                  before_commit,
8958                  (thd, all,
8959                   thd_get_cache_mngr(thd)->get_binlog_cache_log(true),
8960                   thd_get_cache_mngr(thd)->get_binlog_cache_log(false),
8961                   max<my_off_t>(max_binlog_cache_size,
8962                                 max_binlog_stmt_cache_size))) ||
8963         DBUG_EVALUATE_IF("simulate_failure_in_before_commit_hook", true, false))
8964     {
8965       ha_rollback_low(thd, all);
8966       gtid_state->update_on_rollback(thd);
8967       thd_get_cache_mngr(thd)->reset();
8968       //Reset the thread OK status before changing the outcome.
8969       if (thd->get_stmt_da()->is_ok())
8970         thd->get_stmt_da()->reset_diagnostics_area();
8971       my_error(ER_RUN_HOOK_ERROR, MYF(0), "before_commit");
8972       DBUG_RETURN(RESULT_ABORTED);
8973     }
8974     /*
8975       Check whether the transaction should commit or abort given the
8976       plugin feedback.
8977     */
8978     if (thd->get_transaction()->get_rpl_transaction_ctx()->is_transaction_rollback() ||
8979         (DBUG_EVALUATE_IF("simulate_transaction_rollback_request", true, false)))
8980     {
8981       ha_rollback_low(thd, all);
8982       gtid_state->update_on_rollback(thd);
8983       thd_get_cache_mngr(thd)->reset();
8984       if (thd->get_stmt_da()->is_ok())
8985         thd->get_stmt_da()->reset_diagnostics_area();
8986       my_error(ER_TRANSACTION_ROLLBACK_DURING_COMMIT, MYF(0));
8987       DBUG_RETURN(RESULT_ABORTED);
8988     }
8989 
8990     if (ordered_commit(thd, all, skip_commit))
8991       DBUG_RETURN(RESULT_INCONSISTENT);
8992 
8993     /*
8994       Mark the flag m_is_binlogged to true only after we are done
8995       with checking all the error cases.
8996     */
8997     if (is_loggable_xa_prepare(thd))
8998       thd->get_transaction()->xid_state()->set_binlogged();
8999   }
9000   else if (!skip_commit)
9001   {
9002     if (ha_commit_low(thd, all))
9003       DBUG_RETURN(RESULT_INCONSISTENT);
9004   }
9005 
9006   DBUG_RETURN(RESULT_SUCCESS);
9007 }
9008 
9009 
9010 /**
9011    Flush caches for session.
9012 
9013    @note @c set_trans_pos is called with a pointer to the file name
9014    that the binary log currently use and a rotation will change the
9015    contents of the variable.
9016 
9017    The position is used when calling the after_flush, after_commit,
9018    and after_rollback hooks, but these have been placed so that they
9019    occur before a rotation is executed.
9020 
9021    It is the responsibility of any plugin that use this position to
9022    copy it if they need it after the hook has returned.
9023 
9024    The current "global" transaction_counter is stepped and its new value
9025    is assigned to the transaction.
9026  */
9027 std::pair<int,my_off_t>
flush_thread_caches(THD * thd)9028 MYSQL_BIN_LOG::flush_thread_caches(THD *thd)
9029 {
9030   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
9031   my_off_t bytes= 0;
9032   bool wrote_xid= false;
9033   int error= cache_mngr->flush(thd, &bytes, &wrote_xid);
9034   if (!error && bytes > 0)
9035   {
9036     /*
9037       Note that set_trans_pos does not copy the file name. See
9038       this function documentation for more info.
9039     */
9040     thd->set_trans_pos(log_file_name, my_b_tell(&log_file));
9041     if (wrote_xid)
9042       inc_prep_xids(thd);
9043   }
9044   DBUG_PRINT("debug", ("bytes: %llu", bytes));
9045   return std::make_pair(error, bytes);
9046 }
9047 
9048 
9049 /**
9050   Execute the flush stage.
9051 
9052   @param total_bytes_var Pointer to variable that will be set to total
9053   number of bytes flushed, or NULL.
9054 
9055   @param rotate_var Pointer to variable that will be set to true if
9056   binlog rotation should be performed after releasing locks. If rotate
9057   is not necessary, the variable will not be touched.
9058 
9059   @return Error code on error, zero on success
9060  */
9061 
9062 int
process_flush_stage_queue(my_off_t * total_bytes_var,bool * rotate_var,THD ** out_queue_var)9063 MYSQL_BIN_LOG::process_flush_stage_queue(my_off_t *total_bytes_var,
9064                                          bool *rotate_var,
9065                                          THD **out_queue_var)
9066 {
9067   DBUG_ENTER("MYSQL_BIN_LOG::process_flush_stage_queue");
9068   #ifndef NDEBUG
9069   // number of flushes per group.
9070   int no_flushes= 0;
9071   #endif
9072   assert(total_bytes_var && rotate_var && out_queue_var);
9073   my_off_t total_bytes= 0;
9074   int flush_error= 1;
9075   mysql_mutex_assert_owner(&LOCK_log);
9076 
9077   /*
9078     Fetch the entire flush queue and empty it, so that the next batch
9079     has a leader. We must do this before invoking ha_flush_logs(...)
9080     for guaranteeing to flush prepared records of transactions before
9081     flushing them to binary log, which is required by crash recovery.
9082   */
9083   THD *first_seen= stage_manager.fetch_queue_for(Stage_manager::FLUSH_STAGE);
9084   assert(first_seen != NULL);
9085   /*
9086     We flush prepared records of transactions to the log of storage
9087     engine (for example, InnoDB redo log) in a group right before
9088     flushing them to binary log.
9089   */
9090   ha_flush_logs(NULL, true);
9091   DBUG_EXECUTE_IF("crash_after_flush_engine_log", DBUG_SUICIDE(););
9092   assign_automatic_gtids_to_flush_group(first_seen);
9093   /* Flush thread caches to binary log. */
9094   for (THD *head= first_seen ; head ; head = head->next_to_commit)
9095   {
9096     std::pair<int,my_off_t> result= flush_thread_caches(head);
9097     total_bytes+= result.second;
9098     if (flush_error == 1)
9099       flush_error= result.first;
9100 #ifndef NDEBUG
9101     no_flushes++;
9102 #endif
9103   }
9104 
9105   *out_queue_var= first_seen;
9106   *total_bytes_var= total_bytes;
9107   if (total_bytes > 0 && my_b_tell(&log_file) >= (my_off_t) max_size)
9108     *rotate_var= true;
9109 #ifndef NDEBUG
9110   DBUG_PRINT("info",("no_flushes:= %d", no_flushes));
9111   no_flushes= 0;
9112 #endif
9113   DBUG_RETURN(flush_error);
9114 }
9115 
9116 /**
9117   Commit a sequence of sessions.
9118 
9119   This function commit an entire queue of sessions starting with the
9120   session in @c first. If there were an error in the flushing part of
9121   the ordered commit, the error code is passed in and all the threads
9122   are marked accordingly (but not committed).
9123 
9124   It will also add the GTIDs of the transactions to gtid_executed.
9125 
9126   @see MYSQL_BIN_LOG::ordered_commit
9127 
9128   @param thd The "master" thread
9129   @param first First thread in the queue of threads to commit
9130  */
9131 
9132 void
process_commit_stage_queue(THD * thd,THD * first)9133 MYSQL_BIN_LOG::process_commit_stage_queue(THD *thd, THD *first)
9134 {
9135   mysql_mutex_assert_owner(&LOCK_commit);
9136 #ifndef NDEBUG
9137   thd->get_transaction()->m_flags.ready_preempt= 1; // formality by the leader
9138 #endif
9139   for (THD *head= first ; head ; head = head->next_to_commit)
9140   {
9141     DBUG_PRINT("debug", ("Thread ID: %u, commit_error: %d, flags.pending: %s",
9142                          head->thread_id(), head->commit_error,
9143                          YESNO(head->get_transaction()->m_flags.pending)));
9144     /*
9145       If flushing failed, set commit_error for the session, skip the
9146       transaction and proceed with the next transaction instead. This
9147       will mark all threads as failed, since the flush failed.
9148 
9149       If flush succeeded, attach to the session and commit it in the
9150       engines.
9151     */
9152 #ifndef NDEBUG
9153     stage_manager.clear_preempt_status(head);
9154 #endif
9155     if (head->get_transaction()->sequence_number != SEQ_UNINIT)
9156     {
9157       mysql_mutex_lock(&LOCK_slave_trans_dep_tracker);
9158       m_dependency_tracker.update_max_committed(head);
9159       mysql_mutex_unlock(&LOCK_slave_trans_dep_tracker);
9160     }
9161     /*
9162       Flush/Sync error should be ignored and continue
9163       to commit phase. And thd->commit_error cannot be
9164       COMMIT_ERROR at this moment.
9165     */
9166     assert(head->commit_error != THD::CE_COMMIT_ERROR);
9167 #ifndef EMBEDDED_LIBRARY
9168     Thd_backup_and_restore switch_thd(thd, head);
9169 #endif /* !EMBEDDED_LIBRARY */
9170     bool all= head->get_transaction()->m_flags.real_commit;
9171     if (head->get_transaction()->m_flags.commit_low)
9172     {
9173       /* head is parked to have exited append() */
9174       assert(head->get_transaction()->m_flags.ready_preempt);
9175       /*
9176         storage engine commit
9177        */
9178       if (ha_commit_low(head, all, false))
9179         head->commit_error= THD::CE_COMMIT_ERROR;
9180     }
9181     DBUG_PRINT("debug", ("commit_error: %d, flags.pending: %s",
9182                          head->commit_error,
9183                          YESNO(head->get_transaction()->m_flags.pending)));
9184   }
9185 
9186   /*
9187     Handle the GTID of the threads.
9188     gtid_executed table is kept updated even though transactions fail to be
9189     logged. That's required by slave auto positioning.
9190   */
9191   gtid_state->update_commit_group(first);
9192 
9193   for (THD *head= first ; head ; head = head->next_to_commit)
9194   {
9195     /*
9196       Decrement the prepared XID counter after storage engine commit.
9197       We also need decrement the prepared XID when encountering a
9198       flush error or session attach error for avoiding 3-way deadlock
9199       among user thread, rotate thread and dump thread.
9200     */
9201     if (head->get_transaction()->m_flags.xid_written)
9202       dec_prep_xids(head);
9203   }
9204 }
9205 
9206 /**
9207   Process after commit for a sequence of sessions.
9208 
9209   @param thd The "master" thread
9210   @param first First thread in the queue of threads to commit
9211  */
9212 
9213 void
process_after_commit_stage_queue(THD * thd,THD * first)9214 MYSQL_BIN_LOG::process_after_commit_stage_queue(THD *thd, THD *first)
9215 {
9216   for (THD *head= first; head; head= head->next_to_commit)
9217   {
9218     if (head->get_transaction()->m_flags.run_hooks &&
9219         head->commit_error != THD::CE_COMMIT_ERROR)
9220     {
9221 
9222       /*
9223         TODO: This hook here should probably move outside/below this
9224               if and be the only after_commit invocation left in the
9225               code.
9226       */
9227 #ifndef EMBEDDED_LIBRARY
9228       Thd_backup_and_restore switch_thd(thd, head);
9229 #endif /* !EMBEDDED_LIBRARY */
9230       bool all= head->get_transaction()->m_flags.real_commit;
9231       (void) RUN_HOOK(transaction, after_commit, (head, all));
9232       /*
9233         When after_commit finished for the transaction, clear the run_hooks flag.
9234         This allow other parts of the system to check if after_commit was called.
9235       */
9236       head->get_transaction()->m_flags.run_hooks= false;
9237     }
9238   }
9239 }
9240 
9241 #ifndef NDEBUG
9242 /** Names for the stages. */
9243 static const char* g_stage_name[] = {
9244   "FLUSH",
9245   "SYNC",
9246   "COMMIT",
9247 };
9248 #endif
9249 
9250 
9251 /**
9252   Enter a stage of the ordered commit procedure.
9253 
9254   Entering is stage is done by:
9255 
9256   - Atomically enqueueing a queue of processes (which is just one for
9257     the first phase).
9258 
9259   - If the queue was empty, the thread is the leader for that stage
9260     and it should process the entire queue for that stage.
9261 
9262   - If the queue was not empty, the thread is a follower and can go
9263     waiting for the commit to finish.
9264 
9265   The function will lock the stage mutex if it was designated the
9266   leader for the phase.
9267 
9268   @param thd    Session structure
9269   @param stage  The stage to enter
9270   @param queue  Queue of threads to enqueue for the stage
9271   @param stage_mutex Mutex for the stage
9272 
9273   @retval true  The thread should "bail out" and go waiting for the
9274                 commit to finish
9275   @retval false The thread is the leader for the stage and should do
9276                 the processing.
9277 */
9278 
9279 bool
change_stage(THD * thd,Stage_manager::StageID stage,THD * queue,mysql_mutex_t * leave_mutex,mysql_mutex_t * enter_mutex)9280 MYSQL_BIN_LOG::change_stage(THD *thd,
9281                             Stage_manager::StageID stage, THD *queue,
9282                             mysql_mutex_t *leave_mutex,
9283                             mysql_mutex_t *enter_mutex)
9284 {
9285   DBUG_ENTER("MYSQL_BIN_LOG::change_stage");
9286   DBUG_PRINT("enter", ("thd: 0x%llx, stage: %s, queue: 0x%llx",
9287                        (ulonglong) thd, g_stage_name[stage], (ulonglong) queue));
9288   assert(0 <= stage && stage < Stage_manager::STAGE_COUNTER);
9289   assert(enter_mutex);
9290   assert(queue);
9291   /*
9292     enroll_for will release the leave_mutex once the sessions are
9293     queued.
9294   */
9295   if (!stage_manager.enroll_for(stage, queue, leave_mutex))
9296   {
9297     assert(!thd_get_cache_mngr(thd)->dbug_any_finalized());
9298     DBUG_RETURN(true);
9299   }
9300 
9301   /*
9302     We do not lock the enter_mutex if it is LOCK_log when rotating binlog
9303     caused by logging incident log event, since it is already locked.
9304   */
9305   bool need_lock_enter_mutex=
9306     !(is_rotating_caused_by_incident && enter_mutex == &LOCK_log);
9307 
9308   if (need_lock_enter_mutex)
9309     mysql_mutex_lock(enter_mutex);
9310   else
9311     mysql_mutex_assert_owner(enter_mutex);
9312 
9313   DBUG_RETURN(false);
9314 }
9315 
9316 
9317 
9318 /**
9319   Flush the I/O cache to file.
9320 
9321   Flush the binary log to the binlog file if any byte where written
9322   and signal that the binary log file has been updated if the flush
9323   succeeds.
9324 */
9325 
9326 int
flush_cache_to_file(my_off_t * end_pos_var)9327 MYSQL_BIN_LOG::flush_cache_to_file(my_off_t *end_pos_var)
9328 {
9329   if (flush_io_cache(&log_file))
9330   {
9331     THD *thd= current_thd;
9332     thd->commit_error= THD::CE_FLUSH_ERROR;
9333     return ER_ERROR_ON_WRITE;
9334   }
9335   *end_pos_var= my_b_tell(&log_file);
9336   return 0;
9337 }
9338 
9339 
9340 /**
9341   Call fsync() to sync the file to disk.
9342 */
9343 std::pair<bool, bool>
sync_binlog_file(bool force)9344 MYSQL_BIN_LOG::sync_binlog_file(bool force)
9345 {
9346   bool synced= false;
9347   unsigned int sync_period= get_sync_period();
9348   if (force || (sync_period && ++sync_counter >= sync_period))
9349   {
9350     sync_counter= 0;
9351 
9352     /**
9353       On *pure non-transactional* workloads there is a small window
9354       in time where a concurrent rotate might be able to close
9355       the file before the sync is actually done. In that case,
9356       ignore the bad file descriptor errors.
9357 
9358       Transactional workloads (InnoDB) are not affected since the
9359       the rotation will not happen until all transactions have
9360       committed to the storage engine, thence decreased the XID
9361       counters.
9362 
9363       TODO: fix this properly even for non-transactional storage
9364             engines.
9365      */
9366     if (DBUG_EVALUATE_IF("simulate_error_during_sync_binlog_file", 1,
9367                          mysql_file_sync(log_file.file,
9368                                          MYF(MY_WME | MY_IGNORE_BADFD))))
9369     {
9370       THD *thd= current_thd;
9371       thd->commit_error= THD::CE_SYNC_ERROR;
9372       return std::make_pair(true, synced);
9373     }
9374     synced= true;
9375   }
9376   return std::make_pair(false, synced);
9377 }
9378 
9379 
9380 /**
9381    Helper function executed when leaving @c ordered_commit.
9382 
9383    This function contain the necessary code for fetching the error
9384    code, doing post-commit checks, and wrapping up the commit if
9385    necessary.
9386 
9387    It is typically called when enter_stage indicates that the thread
9388    should bail out, and also when the ultimate leader thread finishes
9389    executing @c ordered_commit.
9390 
9391    It is typically used in this manner:
9392    @code
9393    if (enter_stage(thd, Thread_queue::FLUSH_STAGE, thd, &LOCK_log))
9394      return finish_commit(thd);
9395    @endcode
9396 
9397    @return Error code if the session commit failed, or zero on
9398    success.
9399  */
9400 int
finish_commit(THD * thd)9401 MYSQL_BIN_LOG::finish_commit(THD *thd)
9402 {
9403   DBUG_ENTER("MYSQL_BIN_LOG::finish_commit");
9404   DEBUG_SYNC(thd, "reached_finish_commit");
9405   /*
9406     In some unlikely situations, it can happen that binary
9407     log is closed before the thread flushes it's cache.
9408     In that case, clear the caches before doing commit.
9409   */
9410   if (unlikely(!is_open()))
9411   {
9412     binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
9413     if (cache_mngr)
9414       cache_mngr->reset();
9415   }
9416   if (thd->get_transaction()->sequence_number != SEQ_UNINIT)
9417   {
9418     mysql_mutex_lock(&LOCK_slave_trans_dep_tracker);
9419     m_dependency_tracker.update_max_committed(thd);
9420     mysql_mutex_unlock(&LOCK_slave_trans_dep_tracker);
9421   }
9422   if (thd->get_transaction()->m_flags.commit_low)
9423   {
9424     const bool all= thd->get_transaction()->m_flags.real_commit;
9425     /*
9426       Now flush error and sync erros are ignored and we are continuing and
9427       committing. And at this time, commit_error cannot be COMMIT_ERROR.
9428     */
9429     assert(thd->commit_error != THD::CE_COMMIT_ERROR);
9430     /*
9431       storage engine commit
9432     */
9433     if (ha_commit_low(thd, all, false))
9434       thd->commit_error= THD::CE_COMMIT_ERROR;
9435     /*
9436       Decrement the prepared XID counter after storage engine commit
9437     */
9438     if (thd->get_transaction()->m_flags.xid_written)
9439       dec_prep_xids(thd);
9440     /*
9441       If commit succeeded, we call the after_commit hook
9442 
9443       TODO: This hook here should probably move outside/below this
9444             if and be the only after_commit invocation left in the
9445             code.
9446     */
9447     if ((thd->commit_error != THD::CE_COMMIT_ERROR) &&
9448         thd->get_transaction()->m_flags.run_hooks)
9449     {
9450       (void) RUN_HOOK(transaction, after_commit, (thd, all));
9451       thd->get_transaction()->m_flags.run_hooks= false;
9452     }
9453   }
9454   else if (thd->get_transaction()->m_flags.xid_written)
9455     dec_prep_xids(thd);
9456 
9457   /*
9458     If the ordered commit didn't updated the GTIDs for this thd yet
9459     at process_commit_stage_queue (i.e. --binlog-order-commits=0)
9460     the thd still has the ownership of a GTID and we must handle it.
9461   */
9462   if (!thd->owned_gtid.is_empty())
9463   {
9464     /*
9465       Gtid is added to gtid_state.executed_gtids and removed from owned_gtids
9466       on update_on_commit().
9467     */
9468     if (thd->commit_error == THD::CE_NONE)
9469     {
9470       gtid_state->update_on_commit(thd);
9471     }
9472     else
9473       gtid_state->update_on_rollback(thd);
9474   }
9475 
9476   DBUG_EXECUTE_IF("leaving_finish_commit",
9477                   {
9478                     const char act[]=
9479                       "now SIGNAL signal_leaving_finish_commit";
9480                     assert(!debug_sync_set_action(current_thd,
9481                                                   STRING_WITH_LEN(act)));
9482                   };);
9483 
9484   assert(thd->commit_error || !thd->get_transaction()->m_flags.run_hooks);
9485   assert(!thd_get_cache_mngr(thd)->dbug_any_finalized());
9486   DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d",
9487                         thd->thread_id(), thd->commit_error));
9488   /*
9489     flush or sync errors are handled by the leader of the group
9490     (using binlog_error_action). Hence treat only COMMIT_ERRORs as errors.
9491   */
9492   DBUG_RETURN(thd->commit_error == THD::CE_COMMIT_ERROR);
9493 }
9494 
9495 /**
9496    Auxiliary function used in ordered_commit.
9497 */
call_after_sync_hook(THD * queue_head)9498 static inline int call_after_sync_hook(THD *queue_head)
9499 {
9500   const char *log_file= NULL;
9501   my_off_t pos= 0;
9502 
9503   if (NO_HOOK(binlog_storage))
9504     return 0;
9505 
9506   assert(queue_head != NULL);
9507   for (THD *thd= queue_head; thd != NULL; thd= thd->next_to_commit)
9508     if (likely(thd->commit_error == THD::CE_NONE))
9509       thd->get_trans_fixed_pos(&log_file, &pos);
9510 
9511   if (DBUG_EVALUATE_IF("simulate_after_sync_hook_error", 1, 0) ||
9512       RUN_HOOK(binlog_storage, after_sync, (queue_head, log_file, pos)))
9513   {
9514     sql_print_error("Failed to run 'after_sync' hooks");
9515     return ER_ERROR_ON_WRITE;
9516   }
9517   return 0;
9518 }
9519 
9520 /**
9521   Helper function to handle flush or sync stage errors.
9522   If binlog_error_action= ABORT_SERVER, server will be aborted
9523   after reporting the error to the client.
9524   If binlog_error_action= IGNORE_ERROR, binlog will be closed
9525   for the reset of the life time of the server. close() call is protected
9526   with LOCK_log to avoid any parallel operations on binary log.
9527 
9528   @param thd Thread object that faced flush/sync error
9529   @param need_lock_log
9530                        > Indicates true if LOCk_log is needed before closing
9531                          binlog (happens when we are handling sync error)
9532                        > Indicates false if LOCK_log is already acquired
9533                          by the thread (happens when we are handling flush
9534                          error)
9535   @param message Message stating the reason of the failure
9536 
9537   @return void
9538 */
handle_binlog_flush_or_sync_error(THD * thd,bool need_lock_log,const char * message)9539 void MYSQL_BIN_LOG::handle_binlog_flush_or_sync_error(THD *thd,
9540                                                       bool need_lock_log,
9541                                                       const char* message)
9542 {
9543   char errmsg[MYSQL_ERRMSG_SIZE]= {0};
9544   if (!message)
9545     sprintf(errmsg, "An error occurred during %s stage of the commit. "
9546             "'binlog_error_action' is set to '%s'.",
9547             thd->commit_error== THD::CE_FLUSH_ERROR ? "flush" : "sync",
9548             binlog_error_action == ABORT_SERVER ? "ABORT_SERVER" : "IGNORE_ERROR");
9549   else
9550     strncpy(errmsg, message, MYSQL_ERRMSG_SIZE-1);
9551   if (binlog_error_action == ABORT_SERVER)
9552   {
9553     char err_buff[MYSQL_ERRMSG_SIZE + 25];
9554     sprintf(err_buff, "%s Server is being stopped.", errmsg);
9555     exec_binlog_error_action_abort(err_buff);
9556   }
9557   else
9558   {
9559     DEBUG_SYNC(thd, "before_binlog_closed_due_to_error");
9560     if (need_lock_log)
9561       mysql_mutex_lock(&LOCK_log);
9562     else
9563       mysql_mutex_assert_owner(&LOCK_log);
9564     /*
9565       It can happen that other group leader encountered
9566       error and already closed the binary log. So print
9567       error only if it is in open state. But we should
9568       call close() always just in case if the previous
9569       close did not close index file.
9570     */
9571     if (is_open())
9572     {
9573       sql_print_error("%s Hence turning logging off for the whole duration "
9574                       "of the MySQL server process. To turn it on again: fix "
9575                       "the cause, shutdown the MySQL server and restart it.",
9576                       errmsg);
9577     }
9578     close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT, false/*need_lock_log=false*/,
9579           true/*need_lock_index=true*/);
9580     /*
9581       If there is a write error (flush/sync stage) and if
9582       binlog_error_action=IGNORE_ERROR, clear the error
9583       and allow the commit to happen in storage engine.
9584     */
9585     if (check_write_error(thd))
9586       thd->clear_error();
9587 
9588     if (need_lock_log)
9589       mysql_mutex_unlock(&LOCK_log);
9590     DEBUG_SYNC(thd, "after_binlog_closed_due_to_error");
9591   }
9592 }
9593 /**
9594   Flush and commit the transaction.
9595 
9596   This will execute an ordered flush and commit of all outstanding
9597   transactions and is the main function for the binary log group
9598   commit logic. The function performs the ordered commit in two
9599   phases.
9600 
9601   The first phase flushes the caches to the binary log and under
9602   LOCK_log and marks all threads that were flushed as not pending.
9603 
9604   The second phase executes under LOCK_commit and commits all
9605   transactions in order.
9606 
9607   The procedure is:
9608 
9609   1. Queue ourselves for flushing.
9610   2. Grab the log lock, which might result is blocking if the mutex is
9611      already held by another thread.
9612   3. If we were not committed while waiting for the lock
9613      1. Fetch the queue
9614      2. For each thread in the queue:
9615         a. Attach to it
9616         b. Flush the caches, saving any error code
9617      3. Flush and sync (depending on the value of sync_binlog).
9618      4. Signal that the binary log was updated
9619   4. Release the log lock
9620   5. Grab the commit lock
9621      1. For each thread in the queue:
9622         a. If there were no error when flushing and the transaction shall be committed:
9623            - Commit the transaction, saving the result of executing the commit.
9624   6. Release the commit lock
9625   7. Call purge, if any of the committed thread requested a purge.
9626   8. Return with the saved error code
9627 
9628   @todo The use of @c skip_commit is a hack that we use since the @c
9629   TC_LOG Interface does not contain functions to handle
9630   savepoints. Once the binary log is eliminated as a handlerton and
9631   the @c TC_LOG interface is extended with savepoint handling, this
9632   parameter can be removed.
9633 
9634   @param thd Session to commit transaction for
9635   @param all   This is @c true if this is a real transaction commit, and
9636                @c false otherwise.
9637   @param skip_commit
9638                This is @c true if the call to @c ha_commit_low should
9639                be skipped (it is handled by the caller somehow) and @c
9640                false otherwise (the normal case).
9641  */
ordered_commit(THD * thd,bool all,bool skip_commit)9642 int MYSQL_BIN_LOG::ordered_commit(THD *thd, bool all, bool skip_commit)
9643 {
9644   DBUG_ENTER("MYSQL_BIN_LOG::ordered_commit");
9645   int flush_error= 0, sync_error= 0;
9646   my_off_t total_bytes= 0;
9647   bool do_rotate= false;
9648 
9649 #ifdef WITH_WSREP
9650   if (WSREP_EMULATE_BINLOG(thd))
9651   {
9652     /*
9653       Skip group commit, just do storage engine commit.
9654     */
9655     int rcode = ha_commit_low(thd, all);
9656 
9657     /* if there is myisam statement inside innodb transaction, we may
9658        have events in stmt cache
9659     */
9660     binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
9661     if(!cache_mngr->stmt_cache.is_binlog_empty())
9662     {
9663       WSREP_DEBUG("stmt transaction inside MST, SQL: %s", WSREP_QUERY(thd));
9664       cache_mngr->stmt_cache.reset();
9665     }
9666     DBUG_RETURN(rcode);
9667   }
9668 #endif /* WITH_WSREP */
9669 
9670   /*
9671     These values are used while flushing a transaction, so clear
9672     everything.
9673 
9674     Notes:
9675 
9676     - It would be good if we could keep transaction coordinator
9677       log-specific data out of the THD structure, but that is not the
9678       case right now.
9679 
9680     - Everything in the transaction structure is reset when calling
9681       ha_commit_low since that calls Transaction_ctx::cleanup.
9682   */
9683   thd->get_transaction()->m_flags.pending= true;
9684   thd->commit_error= THD::CE_NONE;
9685   thd->next_to_commit= NULL;
9686   thd->durability_property= HA_IGNORE_DURABILITY;
9687   thd->get_transaction()->m_flags.real_commit= all;
9688   thd->get_transaction()->m_flags.xid_written= false;
9689   thd->get_transaction()->m_flags.commit_low= !skip_commit;
9690   thd->get_transaction()->m_flags.run_hooks= !skip_commit;
9691 #ifndef NDEBUG
9692   /*
9693      The group commit Leader may have to wait for follower whose transaction
9694      is not ready to be preempted. Initially the status is pessimistic.
9695      Preemption guarding logics is necessary only when !NDEBUG is set.
9696      It won't be required for the dbug-off case as long as the follower won't
9697      execute any thread-specific write access code in this method, which is
9698      the case as of current.
9699   */
9700   thd->get_transaction()->m_flags.ready_preempt= 0;
9701 #endif
9702 
9703   DBUG_PRINT("enter", ("flags.pending: %s, commit_error: %d, thread_id: %u",
9704                        YESNO(thd->get_transaction()->m_flags.pending),
9705                        thd->commit_error, thd->thread_id()));
9706 
9707   DEBUG_SYNC(thd, "bgc_before_flush_stage");
9708 
9709   /*
9710     Stage #1: flushing transactions to binary log
9711 
9712     While flushing, we allow new threads to enter and will process
9713     them in due time. Once the queue was empty, we cannot reap
9714     anything more since it is possible that a thread entered and
9715     appointed itself leader for the flush phase.
9716   */
9717 
9718 #ifdef HAVE_REPLICATION
9719   if (has_commit_order_manager(thd))
9720   {
9721     Slave_worker *worker= dynamic_cast<Slave_worker *>(thd->rli_slave);
9722     Commit_order_manager *mngr= worker->get_commit_order_manager();
9723 
9724     if (mngr->wait_for_its_turn(worker, all))
9725     {
9726       thd->commit_error= THD::CE_COMMIT_ERROR;
9727       DBUG_RETURN(thd->commit_error);
9728     }
9729 
9730     if (change_stage(thd, Stage_manager::FLUSH_STAGE, thd, NULL, &LOCK_log))
9731       DBUG_RETURN(finish_commit(thd));
9732   }
9733   else
9734 #endif
9735   if (change_stage(thd, Stage_manager::FLUSH_STAGE, thd, NULL, &LOCK_log))
9736   {
9737     DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d",
9738                           thd->thread_id(), thd->commit_error));
9739     DBUG_RETURN(finish_commit(thd));
9740   }
9741 
9742   THD *wait_queue= NULL, *final_queue= NULL;
9743   mysql_mutex_t *leave_mutex_before_commit_stage= NULL;
9744   my_off_t flush_end_pos= 0;
9745   bool update_binlog_end_pos_after_sync;
9746   if (unlikely(!is_open()))
9747   {
9748     final_queue= stage_manager.fetch_queue_for(Stage_manager::FLUSH_STAGE);
9749     leave_mutex_before_commit_stage= &LOCK_log;
9750     /*
9751       binary log is closed, flush stage and sync stage should be
9752       ignored. Binlog cache should be cleared, but instead of doing
9753       it here, do that work in 'finish_commit' function so that
9754       leader and followers thread caches will be cleared.
9755     */
9756     goto commit_stage;
9757   }
9758   DEBUG_SYNC(thd, "waiting_in_the_middle_of_flush_stage");
9759   flush_error= process_flush_stage_queue(&total_bytes, &do_rotate,
9760                                                  &wait_queue);
9761 
9762   if (flush_error == 0 && total_bytes > 0)
9763     flush_error= flush_cache_to_file(&flush_end_pos);
9764   DBUG_EXECUTE_IF("crash_after_flush_binlog", DBUG_SUICIDE(););
9765 
9766   update_binlog_end_pos_after_sync= (get_sync_period() == 1);
9767 
9768   /*
9769     If the flush finished successfully, we can call the after_flush
9770     hook. Being invoked here, we have the guarantee that the hook is
9771     executed before the before/after_send_hooks on the dump thread
9772     preventing race conditions among these plug-ins.
9773   */
9774   if (flush_error == 0)
9775   {
9776     const char *file_name_ptr= log_file_name + dirname_length(log_file_name);
9777     assert(flush_end_pos != 0);
9778     if (RUN_HOOK(binlog_storage, after_flush,
9779                  (thd, file_name_ptr, flush_end_pos)))
9780     {
9781       sql_print_error("Failed to run 'after_flush' hooks");
9782       flush_error= ER_ERROR_ON_WRITE;
9783     }
9784 
9785     if (!update_binlog_end_pos_after_sync)
9786       update_binlog_end_pos();
9787     DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
9788   }
9789 
9790   if (flush_error)
9791   {
9792     /*
9793       Handle flush error (if any) after leader finishes it's flush stage.
9794     */
9795     handle_binlog_flush_or_sync_error(thd, false /* need_lock_log */,
9796               (thd->commit_error == THD::CE_FLUSH_GNO_EXHAUSTED_ERROR)
9797               ? ER(ER_GNO_EXHAUSTED) : NULL);
9798   }
9799 
9800   DEBUG_SYNC(thd, "bgc_after_flush_stage_before_sync_stage");
9801 
9802   /*
9803     Stage #2: Syncing binary log file to disk
9804   */
9805 
9806   if (change_stage(thd, Stage_manager::SYNC_STAGE, wait_queue, &LOCK_log, &LOCK_sync))
9807   {
9808     DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d",
9809                           thd->thread_id(), thd->commit_error));
9810     DBUG_RETURN(finish_commit(thd));
9811   }
9812 
9813   /*
9814     Shall introduce a delay only if it is going to do sync
9815     in this ongoing SYNC stage. The "+1" used below in the
9816     if condition is to count the ongoing sync stage.
9817     When sync_binlog=0 (where we never do sync in BGC group),
9818     it is considered as a special case and delay will be executed
9819     for every group just like how it is done when sync_binlog= 1.
9820   */
9821   if (!flush_error && (sync_counter + 1 >= get_sync_period()))
9822     stage_manager.wait_count_or_timeout(opt_binlog_group_commit_sync_no_delay_count,
9823                                         opt_binlog_group_commit_sync_delay,
9824                                         Stage_manager::SYNC_STAGE);
9825 
9826   final_queue= stage_manager.fetch_queue_for(Stage_manager::SYNC_STAGE);
9827 
9828   if (flush_error == 0 && total_bytes > 0)
9829   {
9830     DEBUG_SYNC(thd, "before_sync_binlog_file");
9831     std::pair<bool, bool> result= sync_binlog_file(false);
9832     sync_error= result.first;
9833   }
9834 
9835   if (update_binlog_end_pos_after_sync)
9836   {
9837     THD *tmp_thd= final_queue;
9838     const char *binlog_file= NULL;
9839     my_off_t pos= 0;
9840     while (tmp_thd->next_to_commit != NULL)
9841       tmp_thd= tmp_thd->next_to_commit;
9842     if (flush_error == 0 && sync_error == 0)
9843     {
9844       tmp_thd->get_trans_fixed_pos(&binlog_file, &pos);
9845       update_binlog_end_pos(binlog_file, pos);
9846     }
9847   }
9848 
9849   DEBUG_SYNC(thd, "bgc_after_sync_stage_before_commit_stage");
9850 
9851   leave_mutex_before_commit_stage= &LOCK_sync;
9852   /*
9853     Stage #3: Commit all transactions in order.
9854 
9855     This stage is skipped if we do not need to order the commits and
9856     each thread have to execute the handlerton commit instead.
9857 
9858     Howver, since we are keeping the lock from the previous stage, we
9859     need to unlock it if we skip the stage.
9860 
9861     We must also step commit_clock before the ha_commit_low() is called
9862     either in ordered fashion(by the leader of this stage) or by the tread
9863     themselves.
9864 
9865     We are delaying the handling of sync error until
9866     all locks are released but we should not enter into
9867     commit stage if binlog_error_action is ABORT_SERVER.
9868   */
9869 commit_stage:
9870   if (opt_binlog_order_commits &&
9871       (sync_error == 0 || binlog_error_action != ABORT_SERVER))
9872   {
9873     if (change_stage(thd, Stage_manager::COMMIT_STAGE,
9874                      final_queue, leave_mutex_before_commit_stage,
9875                      &LOCK_commit))
9876     {
9877       DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d",
9878                             thd->thread_id(), thd->commit_error));
9879       DBUG_RETURN(finish_commit(thd));
9880     }
9881     THD *commit_queue= stage_manager.fetch_queue_for(Stage_manager::COMMIT_STAGE);
9882     DBUG_EXECUTE_IF("semi_sync_3-way_deadlock",
9883                     DEBUG_SYNC(thd, "before_process_commit_stage_queue"););
9884 
9885     if (flush_error == 0 && sync_error == 0)
9886       sync_error= call_after_sync_hook(commit_queue);
9887 
9888     /*
9889       process_commit_stage_queue will call update_on_commit or
9890       update_on_rollback for the GTID owned by each thd in the queue.
9891 
9892       This will be done this way to guarantee that GTIDs are added to
9893       gtid_executed in order, to avoid creating unnecessary temporary
9894       gaps and keep gtid_executed as a single interval at all times.
9895 
9896       If we allow each thread to call update_on_commit only when they
9897       are at finish_commit, the GTID order cannot be guaranteed and
9898       temporary gaps may appear in gtid_executed. When this happen,
9899       the server would have to add and remove intervals from the
9900       Gtid_set, and adding and removing intervals requires a mutex,
9901       which would reduce performance.
9902     */
9903     process_commit_stage_queue(thd, commit_queue);
9904     mysql_mutex_unlock(&LOCK_commit);
9905     /*
9906       Process after_commit after LOCK_commit is released for avoiding
9907       3-way deadlock among user thread, rotate thread and dump thread.
9908     */
9909     process_after_commit_stage_queue(thd, commit_queue);
9910     final_queue= commit_queue;
9911   }
9912   else
9913   {
9914     if (leave_mutex_before_commit_stage)
9915       mysql_mutex_unlock(leave_mutex_before_commit_stage);
9916     if (flush_error == 0 && sync_error == 0)
9917       sync_error= call_after_sync_hook(final_queue);
9918   }
9919 
9920   /*
9921     Handle sync error after we release all locks in order to avoid deadlocks
9922   */
9923   if (sync_error)
9924     handle_binlog_flush_or_sync_error(thd, true /* need_lock_log */, NULL);
9925 
9926   /* Commit done so signal all waiting threads */
9927   stage_manager.signal_done(final_queue);
9928 
9929   /*
9930     Finish the commit before executing a rotate, or run the risk of a
9931     deadlock. We don't need the return value here since it is in
9932     thd->commit_error, which is returned below.
9933   */
9934   (void) finish_commit(thd);
9935 
9936   /*
9937     If we need to rotate, we do it without commit error.
9938     Otherwise the thd->commit_error will be possibly reset.
9939    */
9940   if (DBUG_EVALUATE_IF("force_rotate", 1, 0) ||
9941       (do_rotate && thd->commit_error == THD::CE_NONE &&
9942        !is_rotating_caused_by_incident))
9943   {
9944     /*
9945       Do not force the rotate as several consecutive groups may
9946       request unnecessary rotations.
9947 
9948       NOTE: Run purge_logs wo/ holding LOCK_log because it does not
9949       need the mutex. Otherwise causes various deadlocks.
9950     */
9951 
9952     DEBUG_SYNC(thd, "ready_to_do_rotation");
9953     bool check_purge= false;
9954     mysql_mutex_lock(&LOCK_log);
9955     /*
9956       If rotate fails then depends on binlog_error_action variable
9957       appropriate action will be taken inside rotate call.
9958     */
9959     int error= rotate(false, &check_purge);
9960     mysql_mutex_unlock(&LOCK_log);
9961 
9962     if (error)
9963       thd->commit_error= THD::CE_COMMIT_ERROR;
9964     else if (check_purge)
9965       purge();
9966   }
9967   /*
9968     flush or sync errors are handled above (using binlog_error_action).
9969     Hence treat only COMMIT_ERRORs as errors.
9970   */
9971   DBUG_RETURN(thd->commit_error == THD::CE_COMMIT_ERROR);
9972 }
9973 
9974 
9975 /**
9976   MYSQLD server recovers from last crashed binlog.
9977 
9978   @param log           IO_CACHE of the crashed binlog.
9979   @param fdle          Format_description_log_event of the crashed binlog.
9980   @param valid_pos     The position of the last valid transaction or
9981                        event(non-transaction) of the crashed binlog.
9982 
9983   @retval
9984     0                  ok
9985   @retval
9986     1                  error
9987 */
recover(IO_CACHE * log,Format_description_log_event * fdle,my_off_t * valid_pos)9988 int MYSQL_BIN_LOG::recover(IO_CACHE *log, Format_description_log_event *fdle,
9989                             my_off_t *valid_pos)
9990 {
9991   Log_event  *ev;
9992   HASH xids;
9993   MEM_ROOT mem_root;
9994   /*
9995     The flag is used for handling the case that a transaction
9996     is partially written to the binlog.
9997   */
9998   bool in_transaction= FALSE;
9999   int memory_page_size= my_getpagesize();
10000 
10001 #ifdef WITH_WSREP
10002   /*
10003     Read current wsrep position from storage engines to have consistent
10004     end position for binlog scan.
10005   */
10006   wsrep_uuid_t uuid;
10007   wsrep_seqno_t seqno;
10008   if (WSREP_ON)
10009   {
10010     wsrep_get_SE_checkpoint(uuid, seqno);
10011     char uuid_str[40];
10012     wsrep_uuid_print(&uuid, uuid_str, sizeof(uuid_str));
10013     WSREP_INFO("Binlog recovery, found wsrep position %s:%lld", uuid_str,
10014                (long long)seqno);
10015   }
10016   const wsrep_seqno_t last_xid_seqno= (WSREP_ON) ? seqno :
10017                                                    WSREP_SEQNO_UNDEFINED;
10018   wsrep_seqno_t cur_xid_seqno= WSREP_SEQNO_UNDEFINED;
10019 #endif /* WITH_WSREP */
10020 
10021   if (! fdle->is_valid() ||
10022       my_hash_init(&xids, &my_charset_bin, memory_page_size/3, 0,
10023                    sizeof(my_xid), 0, 0, 0,
10024                    key_memory_binlog_recover_exec))
10025     goto err1;
10026 
10027   init_alloc_root(key_memory_binlog_recover_exec,
10028                   &mem_root, memory_page_size, memory_page_size);
10029 
10030   while ((ev= Log_event::read_log_event(log, 0, fdle, TRUE))
10031          && ev->is_valid())
10032   {
10033 #ifdef WITH_WSREP
10034     if (last_xid_seqno != WSREP_SEQNO_UNDEFINED &&
10035         last_xid_seqno == cur_xid_seqno)
10036     {
10037       delete ev;
10038       continue;
10039     }
10040 #endif
10041     if (ev->get_type_code() == binary_log::QUERY_EVENT &&
10042         !strcmp(((Query_log_event*)ev)->query, "BEGIN"))
10043       in_transaction= TRUE;
10044 
10045     if (ev->get_type_code() == binary_log::QUERY_EVENT &&
10046         !strcmp(((Query_log_event*)ev)->query, "COMMIT"))
10047     {
10048       assert(in_transaction == TRUE);
10049       in_transaction= FALSE;
10050     }
10051     else if (ev->get_type_code() == binary_log::XID_EVENT)
10052     {
10053       assert(in_transaction == TRUE);
10054       in_transaction= FALSE;
10055       Xid_log_event *xev=(Xid_log_event *)ev;
10056       uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
10057                                       sizeof(xev->xid));
10058       if (!x || my_hash_insert(&xids, x))
10059         goto err2;
10060 #ifdef WITH_WSREP
10061       cur_xid_seqno= xev->xid;
10062 #endif /* WITH_WSREP */
10063     }
10064 
10065     /*
10066       Recorded valid position for the crashed binlog file
10067       which did not contain incorrect events. The following
10068       positions increase the variable valid_pos:
10069 
10070       1 -
10071         ...
10072         <---> HERE IS VALID <--->
10073         GTID
10074         BEGIN
10075         ...
10076         COMMIT
10077         ...
10078 
10079       2 -
10080         ...
10081         <---> HERE IS VALID <--->
10082         GTID
10083         DDL/UTILITY
10084         ...
10085 
10086       In other words, the following positions do not increase
10087       the variable valid_pos:
10088 
10089       1 -
10090         GTID
10091         <---> HERE IS VALID <--->
10092         ...
10093 
10094       2 -
10095         GTID
10096         BEGIN
10097         <---> HERE IS VALID <--->
10098         ...
10099     */
10100     if (!log->error && !in_transaction &&
10101         !is_gtid_event(ev))
10102       *valid_pos= my_b_tell(log);
10103 
10104     delete ev;
10105   }
10106 
10107   /*
10108     Call ha_recover if and only if there is a registered engine that
10109     does 2PC, otherwise in DBUG builds calling ha_recover directly
10110     will result in an assert. (Production builds would be safe since
10111     ha_recover returns right away if total_ha_2pc <= opt_log_bin.)
10112    */
10113   if (total_ha_2pc > 1 && ha_recover(&xids))
10114     goto err2;
10115 
10116   free_root(&mem_root, MYF(0));
10117   my_hash_free(&xids);
10118   return 0;
10119 
10120 err2:
10121   free_root(&mem_root, MYF(0));
10122   my_hash_free(&xids);
10123 err1:
10124   sql_print_error("Crash recovery failed. Either correct the problem "
10125                   "(if it's, for example, out of memory error) and restart, "
10126                   "or delete (or rename) binary log and start mysqld with "
10127                   "--tc-heuristic-recover={commit|rollback}");
10128   return 1;
10129 }
10130 
report_missing_purged_gtids(const Gtid_set * slave_executed_gtid_set,const char ** errmsg)10131 void MYSQL_BIN_LOG::report_missing_purged_gtids(const Gtid_set* slave_executed_gtid_set,
10132                                          const char** errmsg)
10133 {
10134   DBUG_ENTER("MYSQL_BIN_LOG::report_missing_purged_gtids");
10135   THD *thd= current_thd;
10136   Gtid_set gtid_missing(gtid_state->get_lost_gtids()->get_sid_map());
10137   gtid_missing.add_gtid_set(gtid_state->get_lost_gtids());
10138   gtid_missing.remove_gtid_set(slave_executed_gtid_set);
10139 
10140   String tmp_uuid;
10141   uchar name[]= "slave_uuid";
10142 
10143   /* Protects thd->user_vars. */
10144   mysql_mutex_lock(&thd->LOCK_thd_data);
10145   user_var_entry *entry=
10146     (user_var_entry*) my_hash_search(&thd->user_vars, name, sizeof(name)-1);
10147   if (entry && entry->length() > 0)
10148     tmp_uuid.copy(entry->ptr(), entry->length(), NULL);
10149   mysql_mutex_unlock(&thd->LOCK_thd_data);
10150 
10151 
10152   char* missing_gtids= NULL;
10153   char* slave_executed_gtids= NULL;
10154   gtid_missing.to_string(&missing_gtids, false);
10155   slave_executed_gtid_set->to_string(&slave_executed_gtids, false);
10156 
10157   /*
10158      Log the information about the missing purged GTIDs to the error log
10159      if the message is less than MAX_LOG_BUFFER_SIZE.
10160   */
10161   std::ostringstream log_info;
10162   log_info << "The missing transactions are '"<< missing_gtids <<"'";
10163   const char* log_msg= ER(ER_FOUND_MISSING_GTIDS);
10164 
10165   /* Don't consider the "%s" in the format string. Subtract 2 from the
10166      total length */
10167   uint total_length= (strlen(log_msg) - 2 + log_info.str().length());
10168 
10169   DBUG_EXECUTE_IF("simulate_long_missing_gtids",
10170                   { total_length= MAX_LOG_BUFFER_SIZE + 1;});
10171 
10172   if (total_length > MAX_LOG_BUFFER_SIZE)
10173     log_info.str("To find the missing purged transactions, run \"SELECT"
10174                  " @@GLOBAL.GTID_PURGED\" on the master, then run \"SELECT"
10175                  " CONCAT(RECEIVED_TRANSACTION_SET, ',', @@GLOBAL.GTID_EXECUTED)"
10176                  " FROM PERFORMANCE_SCHEMA.replication_connection_status\" on"
10177                  " the slave, and then run \"SELECT GTID_SUBTRACT(<master_set>,"
10178                  " <slave_set>)\" on any server");
10179 
10180   sql_print_warning(ER_THD(thd, ER_FOUND_MISSING_GTIDS), tmp_uuid.ptr(),
10181                     log_info.str().c_str());
10182 
10183   /*
10184      Send the information about the slave executed GTIDs and missing
10185      purged GTIDs to slave if the message is less than MYSQL_ERRMSG_SIZE.
10186   */
10187   std::ostringstream gtid_info;
10188   gtid_info << "The GTID set sent by the slave is '" << slave_executed_gtids
10189             << "', and the missing transactions are '"<< missing_gtids <<"'";
10190   *errmsg= ER_THD(thd, ER_MASTER_HAS_PURGED_REQUIRED_GTIDS);
10191 
10192   /* Don't consider the "%s" in the format string. Subtract 2 from the
10193      total length */
10194   total_length= (strlen(*errmsg) - 2 + gtid_info.str().length());
10195 
10196   DBUG_EXECUTE_IF("simulate_long_missing_gtids",
10197                   { total_length= MYSQL_ERRMSG_SIZE + 1;});
10198 
10199   if (total_length > MYSQL_ERRMSG_SIZE)
10200     gtid_info.str("The GTID sets and the missing purged transactions are too"
10201                   " long to print in this message. For more information,"
10202                   " please see the master's error log or the manual for"
10203                   " GTID_SUBTRACT");
10204 
10205   /* Buffer for formatting the message about the missing GTIDs. */
10206   static char buff[MYSQL_ERRMSG_SIZE];
10207   my_snprintf(buff, MYSQL_ERRMSG_SIZE, *errmsg, gtid_info.str().c_str());
10208   *errmsg= const_cast<const char*>(buff);
10209 
10210   my_free(missing_gtids);
10211   my_free(slave_executed_gtids);
10212   DBUG_VOID_RETURN;
10213 }
10214 
report_missing_gtids(const Gtid_set * previous_gtid_set,const Gtid_set * slave_executed_gtid_set,const char ** errmsg)10215 void MYSQL_BIN_LOG::report_missing_gtids(const Gtid_set* previous_gtid_set,
10216                                          const Gtid_set* slave_executed_gtid_set,
10217                                          const char** errmsg)
10218 {
10219   DBUG_ENTER("MYSQL_BIN_LOG::report_missing_gtids");
10220   THD *thd=current_thd;
10221   char* missing_gtids= NULL;
10222   char* slave_executed_gtids= NULL;
10223   Gtid_set gtid_missing(slave_executed_gtid_set->get_sid_map());
10224   gtid_missing.add_gtid_set(slave_executed_gtid_set);
10225   gtid_missing.remove_gtid_set(previous_gtid_set);
10226   gtid_missing.to_string(&missing_gtids, false);
10227   slave_executed_gtid_set->to_string(&slave_executed_gtids, false);
10228 
10229   String tmp_uuid;
10230   uchar name[]= "slave_uuid";
10231 
10232   /* Protects thd->user_vars. */
10233   mysql_mutex_lock(&thd->LOCK_thd_data);
10234 
10235   user_var_entry *entry=
10236     (user_var_entry*) my_hash_search(&thd->user_vars, name, sizeof(name)-1);
10237   if (entry && entry->length() > 0)
10238     tmp_uuid.copy(entry->ptr(), entry->length(), NULL);
10239   mysql_mutex_unlock(&thd->LOCK_thd_data);
10240 
10241   /*
10242      Log the information about the missing purged GTIDs to the error log
10243      if the message is less than MAX_LOG_BUFFER_SIZE.
10244   */
10245   std::ostringstream log_info;
10246   log_info << "If the binary log files have been deleted from disk,"
10247       " check the consistency of 'GTID_PURGED' variable."
10248       " The missing transactions are '"<< missing_gtids <<"'";
10249   const char* log_msg= ER(ER_FOUND_MISSING_GTIDS);
10250 
10251   /* Don't consider the "%s" in the format string. Subtract 2 from the
10252      total length */
10253   if ((strlen(log_msg) - 2 + log_info.str().length()) > MAX_LOG_BUFFER_SIZE)
10254     log_info.str("To find the missing purged transactions, run \"SELECT"
10255                  " @@GLOBAL.GTID_PURGED\" on the master, then run \"SELECT"
10256                  " CONCAT(RECEIVED_TRANSACTION_SET, ',', @@GLOBAL.GTID_EXECUTED)"
10257                  " FROM PERFORMANCE_SCHEMA.replication_connection_status\" on"
10258                  " the slave, and then run \"SELECT GTID_SUBTRACT(<master_set>,"
10259                  " <slave_set>)\" on any server");
10260 
10261   sql_print_warning(ER_THD(thd, ER_FOUND_MISSING_GTIDS), tmp_uuid.ptr(),
10262                     log_info.str().c_str());
10263 
10264   /*
10265      Send the information about the slave executed GTIDs and missing
10266      purged GTIDs to slave if the message is less than MYSQL_ERRMSG_SIZE.
10267   */
10268   std::ostringstream gtid_info;
10269   gtid_info << "The GTID set sent by the slave is '" << slave_executed_gtids
10270             << "', and the missing transactions are '"<< missing_gtids <<"'";
10271   *errmsg= ER_THD(thd, ER_MASTER_HAS_PURGED_REQUIRED_GTIDS);
10272 
10273   /* Don't consider the "%s" in the format string. Subtract 2 from the
10274      total length */
10275   if ((strlen(*errmsg) - 2 + gtid_info.str().length()) > MYSQL_ERRMSG_SIZE)
10276     gtid_info.str("The GTID sets and the missing purged transactions are too"
10277                   " long to print in this message. For more information,"
10278                   " please see the master's error log or the manual for"
10279                   " GTID_SUBTRACT");
10280   /* Buffer for formatting the message about the missing GTIDs. */
10281   static char buff[MYSQL_ERRMSG_SIZE];
10282   my_snprintf(buff, MYSQL_ERRMSG_SIZE, *errmsg, gtid_info.str().c_str());
10283   *errmsg= const_cast<const char*>(buff);
10284 
10285   my_free(missing_gtids);
10286   my_free(slave_executed_gtids);
10287 
10288   DBUG_VOID_RETURN;
10289 }
10290 
is_binlog_cache_empty(bool is_transactional)10291 bool THD::is_binlog_cache_empty(bool is_transactional)
10292 {
10293   DBUG_ENTER("THD::is_binlog_cache_empty(bool)");
10294 
10295   // If opt_bin_log==0, it is not safe to call thd_get_cache_mngr
10296   // because binlog_hton has not been completely set up.
10297 #ifndef WITH_WSREP
10298   assert(opt_bin_log);
10299 #endif
10300   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(this);
10301 
10302   // cache_mngr is NULL until we call thd->binlog_setup_trx_data, so
10303   // we assert that this has been done.
10304   assert(cache_mngr != NULL);
10305 
10306   binlog_cache_data *cache_data=
10307     cache_mngr->get_binlog_cache_data(is_transactional);
10308   assert(cache_data != NULL);
10309 
10310   DBUG_RETURN(cache_data->is_binlog_empty());
10311 }
10312 
10313 /*
10314   These functions are placed in this file since they need access to
10315   binlog_hton, which has internal linkage.
10316 */
10317 
binlog_setup_trx_data()10318 int THD::binlog_setup_trx_data()
10319 {
10320   DBUG_ENTER("THD::binlog_setup_trx_data");
10321   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(this);
10322 
10323   if (cache_mngr)
10324     DBUG_RETURN(0);                             // Already set up
10325 
10326   IO_CACHE stmt_cache_log, trx_cache_log;
10327   memset(&stmt_cache_log, 0, sizeof(stmt_cache_log));
10328   memset(&trx_cache_log, 0, sizeof(trx_cache_log));
10329 
10330   cache_mngr= (binlog_cache_mngr*) my_malloc(key_memory_binlog_cache_mngr,
10331                                              sizeof(binlog_cache_mngr), MYF(MY_ZEROFILL));
10332   if (!cache_mngr)
10333   {
10334     DBUG_RETURN(1);
10335   }
10336   if (open_cached_file(&stmt_cache_log, mysql_tmpdir,
10337                        LOG_PREFIX, binlog_stmt_cache_size, MYF(MY_WME)))
10338   {
10339     my_free(cache_mngr);
10340     DBUG_RETURN(1);                      // Didn't manage to set it up
10341   }
10342   if (open_cached_file(&trx_cache_log, mysql_tmpdir,
10343                        LOG_PREFIX, binlog_cache_size, MYF(MY_WME)))
10344   {
10345     close_cached_file(&stmt_cache_log);
10346     my_free(cache_mngr);
10347     DBUG_RETURN(1);
10348   }
10349   DBUG_PRINT("debug", ("Set ha_data slot %d to 0x%llx", binlog_hton->slot, (ulonglong) cache_mngr));
10350   thd_set_ha_data(this, binlog_hton, cache_mngr);
10351 
10352   cache_mngr= new (thd_get_cache_mngr(this))
10353               binlog_cache_mngr(max_binlog_stmt_cache_size,
10354                                 &binlog_stmt_cache_use,
10355                                 &binlog_stmt_cache_disk_use,
10356                                 max_binlog_cache_size,
10357                                 &binlog_cache_use,
10358                                 &binlog_cache_disk_use,
10359                                 stmt_cache_log,
10360                                 trx_cache_log);
10361   DBUG_RETURN(0);
10362 }
10363 
10364 /**
10365 
10366 */
register_binlog_handler(THD * thd,bool trx)10367 void register_binlog_handler(THD *thd, bool trx)
10368 {
10369   DBUG_ENTER("register_binlog_handler");
10370   /*
10371     If this is the first call to this function while processing a statement,
10372     the transactional cache does not have a savepoint defined. So, in what
10373     follows:
10374       . an implicit savepoint is defined;
10375       . callbacks are registered;
10376       . binary log is set as read/write.
10377 
10378     The savepoint allows for truncating the trx-cache transactional changes
10379     fail. Callbacks are necessary to flush caches upon committing or rolling
10380     back a statement or a transaction. However, notifications do not happen
10381     if the binary log is set as read/write.
10382   */
10383   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
10384   if (cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF)
10385   {
10386     /*
10387       Set an implicit savepoint in order to be able to truncate a trx-cache.
10388     */
10389     my_off_t pos= 0;
10390     binlog_trans_log_savepos(thd, &pos);
10391     cache_mngr->trx_cache.set_prev_position(pos);
10392 
10393     /*
10394       Set callbacks in order to be able to call commmit or rollback.
10395     */
10396     if (trx)
10397       trans_register_ha(thd, TRUE, binlog_hton, NULL);
10398     trans_register_ha(thd, FALSE, binlog_hton, NULL);
10399 
10400     /*
10401       Set the binary log as read/write otherwise callbacks are not called.
10402     */
10403     thd->ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
10404   }
10405   DBUG_VOID_RETURN;
10406 }
10407 
10408 /**
10409   Function to start a statement and optionally a transaction for the
10410   binary log.
10411 
10412   This function does three things:
10413     - Starts a transaction if not in autocommit mode or if a BEGIN
10414       statement has been seen.
10415 
10416     - Start a statement transaction to allow us to truncate the cache.
10417 
10418     - Save the currrent binlog position so that we can roll back the
10419       statement by truncating the cache.
10420 
10421       We only update the saved position if the old one was undefined,
10422       the reason is that there are some cases (e.g., for CREATE-SELECT)
10423       where the position is saved twice (e.g., both in
10424       Query_result_create::prepare() and THD::binlog_write_table_map()), but
10425       we should use the first. This means that calls to this function
10426       can be used to start the statement before the first table map
10427       event, to include some extra events.
10428 
10429   Note however that IMMEDIATE_LOGGING implies that the statement is
10430   written without BEGIN/COMMIT.
10431 
10432   @param thd         Thread variable
10433   @param start_event The first event requested to be written into the
10434                      binary log
10435  */
binlog_start_trans_and_stmt(THD * thd,Log_event * start_event)10436 static int binlog_start_trans_and_stmt(THD *thd, Log_event *start_event)
10437 {
10438   DBUG_ENTER("binlog_start_trans_and_stmt");
10439 
10440   /*
10441     Initialize the cache manager if this was not done yet.
10442   */
10443   if (thd->binlog_setup_trx_data())
10444     DBUG_RETURN(1);
10445 
10446   /*
10447     Retrieve the appropriated cache.
10448   */
10449   bool is_transactional= start_event->is_using_trans_cache();
10450   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
10451   binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(is_transactional);
10452 
10453   /*
10454     If the event is requesting immediatly logging, there is no need to go
10455     further down and set savepoint and register callbacks.
10456   */
10457   if (start_event->is_using_immediate_logging())
10458     DBUG_RETURN(0);
10459 
10460   register_binlog_handler(thd, thd->in_multi_stmt_transaction_mode());
10461 
10462   /*
10463     If the cache is empty log "BEGIN" at the beginning of every transaction.
10464     Here, a transaction is either a BEGIN..COMMIT/ROLLBACK block or a single
10465     statement in autocommit mode.
10466   */
10467   if (cache_data->is_binlog_empty())
10468   {
10469     static const char begin[]= "BEGIN";
10470     const char *query= NULL;
10471     char buf[XID::ser_buf_size];
10472     char xa_start[sizeof("XA START") + 1 + sizeof(buf)];
10473     XID_STATE *xs= thd->get_transaction()->xid_state();
10474     int qlen= sizeof(begin) - 1;
10475 
10476     if (is_transactional && xs->has_state(XID_STATE::XA_ACTIVE))
10477     {
10478       /*
10479         XA-prepare logging case.
10480       */
10481       qlen= sprintf(xa_start, "XA START %s", xs->get_xid()->serialize(buf));
10482       query= xa_start;
10483     }
10484     else
10485     {
10486       /*
10487         Regular transaction case.
10488       */
10489       query= begin;
10490     }
10491 
10492     Query_log_event qinfo(thd, query, qlen,
10493                           is_transactional, false, true, 0, true);
10494     if (cache_data->write_event(thd, &qinfo))
10495       DBUG_RETURN(1);
10496   }
10497 
10498   DBUG_RETURN(0);
10499 }
10500 
10501 /**
10502   This function writes a table map to the binary log.
10503   Note that in order to keep the signature uniform with related methods,
10504   we use a redundant parameter to indicate whether a transactional table
10505   was changed or not.
10506   Sometimes it will write a Rows_query_log_event into binary log before
10507   the table map too.
10508 
10509   @param table             a pointer to the table.
10510   @param is_transactional  @c true indicates a transactional table,
10511                            otherwise @c false a non-transactional.
10512   @param binlog_rows_query @c true indicates a Rows_query log event
10513                            will be binlogged before table map,
10514                            otherwise @c false indicates it will not
10515                            be binlogged.
10516   @return
10517     nonzero if an error pops up when writing the table map event
10518     or the Rows_query log event.
10519 */
binlog_write_table_map(TABLE * table,bool is_transactional,bool binlog_rows_query)10520 int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
10521                                 bool binlog_rows_query)
10522 {
10523   int error;
10524   DBUG_ENTER("THD::binlog_write_table_map");
10525   DBUG_PRINT("enter", ("table: 0x%lx  (%s: #%llu)",
10526                        (long) table, table->s->table_name.str,
10527                        table->s->table_map_id.id()));
10528 
10529   /* Pre-conditions */
10530 #ifdef WITH_WSREP
10531   assert(is_current_stmt_binlog_format_row() &&
10532          (WSREP_EMULATE_BINLOG_NNULL(this) || mysql_bin_log.is_open()));
10533 #else
10534   assert(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
10535 #endif /* WITH_WSREP */
10536   assert(table->s->table_map_id.is_valid());
10537 
10538   Table_map_log_event
10539     the_event(this, table, table->s->table_map_id, is_transactional);
10540 
10541   binlog_start_trans_and_stmt(this, &the_event);
10542 
10543   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(this);
10544 
10545   binlog_cache_data *cache_data=
10546     cache_mngr->get_binlog_cache_data(is_transactional);
10547 
10548   if (binlog_rows_query && this->query().str)
10549   {
10550     /* Write the Rows_query_log_event into binlog before the table map */
10551     Rows_query_log_event
10552       rows_query_ev(this, this->query().str, this->query().length);
10553     if ((error= cache_data->write_event(this, &rows_query_ev)))
10554       DBUG_RETURN(error);
10555   }
10556 
10557   if ((error= cache_data->write_event(this, &the_event)))
10558     DBUG_RETURN(error);
10559 
10560   binlog_table_maps++;
10561   DBUG_RETURN(0);
10562 }
10563 
10564 /**
10565   This function retrieves a pending row event from a cache which is
10566   specified through the parameter @c is_transactional. Respectively, when it
10567   is @c true, the pending event is returned from the transactional cache.
10568   Otherwise from the non-transactional cache.
10569 
10570   @param is_transactional  @c true indicates a transactional cache,
10571                            otherwise @c false a non-transactional.
10572   @return
10573     The row event if any.
10574 */
10575 Rows_log_event*
binlog_get_pending_rows_event(bool is_transactional) const10576 THD::binlog_get_pending_rows_event(bool is_transactional) const
10577 {
10578   Rows_log_event* rows= NULL;
10579   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(this);
10580 
10581   /*
10582     This is less than ideal, but here's the story: If there is no cache_mngr,
10583     prepare_pending_rows_event() has never been called (since the cache_mngr
10584     is set up there). In that case, we just return NULL.
10585    */
10586   if (cache_mngr)
10587   {
10588     binlog_cache_data *cache_data=
10589       cache_mngr->get_binlog_cache_data(is_transactional);
10590 
10591     rows= cache_data->pending();
10592   }
10593   return (rows);
10594 }
10595 
10596 /**
10597    @param db    db name c-string to be inserted into alphabetically sorted
10598                 THD::binlog_accessed_db_names list.
10599 
10600                 Note, that space for both the data and the node
10601                 struct are allocated in THD::main_mem_root.
10602                 The list lasts for the top-level query time and is reset
10603                 in @c THD::cleanup_after_query().
10604 */
10605 void
add_to_binlog_accessed_dbs(const char * db_param)10606 THD::add_to_binlog_accessed_dbs(const char *db_param)
10607 {
10608   char *after_db;
10609   /*
10610     binlog_accessed_db_names list is to maintain the database
10611     names which are referenced in a given command.
10612     Prior to bug 17806014 fix, 'main_mem_root' memory root used
10613     to store this list. The 'main_mem_root' scope is till the end
10614     of the query. Hence it caused increasing memory consumption
10615     problem in big procedures like the ones mentioned below.
10616     Eg: CALL p1() where p1 is having 1,00,000 create and drop tables.
10617     'main_mem_root' is freed only at the end of the command CALL p1()'s
10618     execution. But binlog_accessed_db_names list scope is only till the
10619     individual statements specified the procedure(create/drop statements).
10620     Hence the memory allocated in 'main_mem_root' was left uncleared
10621     until the p1's completion, even though it is not required after
10622     completion of individual statements.
10623 
10624     Instead of using 'main_mem_root' whose scope is complete query execution,
10625     now the memroot is changed to use 'thd->mem_root' whose scope is until the
10626     individual statement in CALL p1(). 'thd->mem_root' is set to 'execute_mem_root'
10627     in the context of procedure and it's scope is till the individual statement
10628     in CALL p1() and thd->memroot is equal to 'main_mem_root' in the context
10629     of a normal 'top level query'.
10630 
10631     Eg: a) create table t1(i int); => If this function is called while
10632            processing this statement, thd->memroot is equal to &main_mem_root
10633            which will be freed immediately after executing this statement.
10634         b) CALL p1() -> p1 contains create table t1(i int); => If this function
10635            is called while processing create table statement which is inside
10636            a stored procedure, then thd->memroot is equal to 'execute_mem_root'
10637            which will be freed immediately after executing this statement.
10638     In both a and b case, thd->memroot will be freed immediately and will not
10639     increase memory consumption.
10640 
10641     A special case(stored functions/triggers):
10642     Consider the following example:
10643     create function f1(i int) returns int
10644     begin
10645       insert into db1.t1 values (1);
10646       insert into db2.t1 values (2);
10647     end;
10648     When we are processing SELECT f1(), the list should contain db1, db2 names.
10649     Since thd->mem_root contains 'execute_mem_root' in the context of
10650     stored function, the mem root will be freed after adding db1 in
10651     the list and when we are processing the second statement and when we try
10652     to add 'db2' in the db1's list, it will lead to crash as db1's memory
10653     is already freed. To handle this special case, if in_sub_stmt is set
10654     (which is true incase of stored functions/triggers), we use &main_mem_root,
10655     if not set we will use thd->memroot which changes it's value to
10656     'execute_mem_root' or '&main_mem_root' depends on the context.
10657    */
10658   MEM_ROOT *db_mem_root= in_sub_stmt ? &main_mem_root : mem_root;
10659 
10660   if (!binlog_accessed_db_names)
10661     binlog_accessed_db_names= new (db_mem_root) List<char>;
10662 
10663   if (binlog_accessed_db_names->elements >  MAX_DBS_IN_EVENT_MTS)
10664   {
10665     push_warning_printf(this, Sql_condition::SL_WARNING,
10666                         ER_MTS_UPDATED_DBS_GREATER_MAX,
10667                         ER(ER_MTS_UPDATED_DBS_GREATER_MAX),
10668                         MAX_DBS_IN_EVENT_MTS);
10669     return;
10670   }
10671 
10672   after_db= strdup_root(db_mem_root, db_param);
10673 
10674   /*
10675      sorted insertion is implemented with first rearranging data
10676      (pointer to char*) of the links and final appending of the least
10677      ordered data to create a new link in the list.
10678   */
10679   if (binlog_accessed_db_names->elements != 0)
10680   {
10681     List_iterator<char> it(*get_binlog_accessed_db_names());
10682 
10683     while (it++)
10684     {
10685       char *swap= NULL;
10686       char **ref_cur_db= it.ref();
10687       int cmp= strcmp(after_db, *ref_cur_db);
10688 
10689       assert(!swap || cmp < 0);
10690 
10691       if (cmp == 0)
10692       {
10693         after_db= NULL;  /* dup to ignore */
10694         break;
10695       }
10696       else if (swap || cmp > 0)
10697       {
10698         swap= *ref_cur_db;
10699         *ref_cur_db= after_db;
10700         after_db= swap;
10701       }
10702     }
10703   }
10704   if (after_db)
10705     binlog_accessed_db_names->push_back(after_db, db_mem_root);
10706 }
10707 
10708 /*
10709   Tells if two (or more) tables have auto_increment columns and we want to
10710   lock those tables with a write lock.
10711 
10712   SYNOPSIS
10713     has_two_write_locked_tables_with_auto_increment
10714       tables        Table list
10715 
10716   NOTES:
10717     Call this function only when you have established the list of all tables
10718     which you'll want to update (including stored functions, triggers, views
10719     inside your statement).
10720 */
10721 
10722 static bool
has_write_table_with_auto_increment(TABLE_LIST * tables)10723 has_write_table_with_auto_increment(TABLE_LIST *tables)
10724 {
10725   for (TABLE_LIST *table= tables; table; table= table->next_global)
10726   {
10727     /* we must do preliminary checks as table->table may be NULL */
10728     if (!table->is_placeholder() &&
10729         table->table->found_next_number_field &&
10730         (table->lock_type >= TL_WRITE_ALLOW_WRITE))
10731       return 1;
10732   }
10733 
10734   return 0;
10735 }
10736 
10737 /*
10738    checks if we have select tables in the table list and write tables
10739    with auto-increment column.
10740 
10741   SYNOPSIS
10742    has_two_write_locked_tables_with_auto_increment_and_select
10743       tables        Table list
10744 
10745   RETURN VALUES
10746 
10747    -true if the table list has atleast one table with auto-increment column
10748 
10749 
10750          and atleast one table to select from.
10751    -false otherwise
10752 */
10753 
10754 static bool
has_write_table_with_auto_increment_and_select(TABLE_LIST * tables)10755 has_write_table_with_auto_increment_and_select(TABLE_LIST *tables)
10756 {
10757   bool has_select= false;
10758   bool has_auto_increment_tables = has_write_table_with_auto_increment(tables);
10759   for(TABLE_LIST *table= tables; table; table= table->next_global)
10760   {
10761      if (!table->is_placeholder() &&
10762         (table->lock_type <= TL_READ_NO_INSERT))
10763       {
10764         has_select= true;
10765         break;
10766       }
10767   }
10768   return(has_select && has_auto_increment_tables);
10769 }
10770 
10771 /*
10772   Tells if there is a table whose auto_increment column is a part
10773   of a compound primary key while is not the first column in
10774   the table definition.
10775 
10776   @param tables Table list
10777 
10778   @return true if the table exists, fais if does not.
10779 */
10780 
10781 static bool
has_write_table_auto_increment_not_first_in_pk(TABLE_LIST * tables)10782 has_write_table_auto_increment_not_first_in_pk(TABLE_LIST *tables)
10783 {
10784   for (TABLE_LIST *table= tables; table; table= table->next_global)
10785   {
10786     /* we must do preliminary checks as table->table may be NULL */
10787     if (!table->is_placeholder() &&
10788         table->table->found_next_number_field &&
10789         (table->lock_type >= TL_WRITE_ALLOW_WRITE)
10790         && table->table->s->next_number_keypart != 0)
10791       return 1;
10792   }
10793 
10794   return 0;
10795 }
10796 
10797 /*
10798   Function to check whether the table in query uses a fulltext parser
10799   plugin or not.
10800 
10801   @param s - table share pointer.
10802 
10803   @retval TRUE - The table uses fulltext parser plugin.
10804   @retval FALSE - Otherwise.
10805 */
fulltext_unsafe_set(TABLE_SHARE * s)10806 static bool inline fulltext_unsafe_set(TABLE_SHARE *s)
10807 {
10808   for (unsigned int i= 0 ; i < s->keys ; i++)
10809   {
10810     if ((s->key_info[i].flags & HA_USES_PARSER) && s->keys_in_use.is_set(i))
10811       return TRUE;
10812   }
10813   return FALSE;
10814 }
10815 #ifndef NDEBUG
get_locked_tables_mode_name(enum_locked_tables_mode locked_tables_mode)10816 const char * get_locked_tables_mode_name(enum_locked_tables_mode locked_tables_mode)
10817 {
10818    switch (locked_tables_mode)
10819    {
10820    case LTM_NONE:
10821      return "LTM_NONE";
10822    case LTM_LOCK_TABLES:
10823      return "LTM_LOCK_TABLES";
10824    case LTM_PRELOCKED:
10825      return "LTM_PRELOCKED";
10826    case LTM_PRELOCKED_UNDER_LOCK_TABLES:
10827      return "LTM_PRELOCKED_UNDER_LOCK_TABLES";
10828    default:
10829      return "Unknown table lock mode";
10830    }
10831 }
10832 #endif
10833 
10834 /**
10835   Decide on logging format to use for the statement and issue errors
10836   or warnings as needed.  The decision depends on the following
10837   parameters:
10838 
10839   - The logging mode, i.e., the value of binlog_format.  Can be
10840     statement, mixed, or row.
10841 
10842   - The type of statement.  There are three types of statements:
10843     "normal" safe statements; unsafe statements; and row injections.
10844     An unsafe statement is one that, if logged in statement format,
10845     might produce different results when replayed on the slave (e.g.,
10846     queries with a LIMIT clause).  A row injection is either a BINLOG
10847     statement, or a row event executed by the slave's SQL thread.
10848 
10849   - The capabilities of tables modified by the statement.  The
10850     *capabilities vector* for a table is a set of flags associated
10851     with the table.  Currently, it only includes two flags: *row
10852     capability flag* and *statement capability flag*.
10853 
10854     The row capability flag is set if and only if the engine can
10855     handle row-based logging. The statement capability flag is set if
10856     and only if the table can handle statement-based logging.
10857 
10858   Decision table for logging format
10859   ---------------------------------
10860 
10861   The following table summarizes how the format and generated
10862   warning/error depends on the tables' capabilities, the statement
10863   type, and the current binlog_format.
10864 
10865      Row capable        N NNNNNNNNN YYYYYYYYY YYYYYYYYY
10866      Statement capable  N YYYYYYYYY NNNNNNNNN YYYYYYYYY
10867 
10868      Statement type     * SSSUUUIII SSSUUUIII SSSUUUIII
10869 
10870      binlog_format      * SMRSMRSMR SMRSMRSMR SMRSMRSMR
10871 
10872      Logged format      - SS-S----- -RR-RR-RR SRRSRR-RR
10873      Warning/Error      1 --2732444 5--5--6-- ---7--6--
10874 
10875   Legend
10876   ------
10877 
10878   Row capable:    N - Some table not row-capable, Y - All tables row-capable
10879   Stmt capable:   N - Some table not stmt-capable, Y - All tables stmt-capable
10880   Statement type: (S)afe, (U)nsafe, or Row (I)njection
10881   binlog_format:  (S)TATEMENT, (M)IXED, or (R)OW
10882   Logged format:  (S)tatement or (R)ow
10883   Warning/Error:  Warnings and error messages are as follows:
10884 
10885   1. Error: Cannot execute statement: binlogging impossible since both
10886      row-incapable engines and statement-incapable engines are
10887      involved.
10888 
10889   2. Error: Cannot execute statement: binlogging impossible since
10890      BINLOG_FORMAT = ROW and at least one table uses a storage engine
10891      limited to statement-logging.
10892 
10893   3. Error: Cannot execute statement: binlogging of unsafe statement
10894      is impossible when storage engine is limited to statement-logging
10895      and BINLOG_FORMAT = MIXED.
10896 
10897   4. Error: Cannot execute row injection: binlogging impossible since
10898      at least one table uses a storage engine limited to
10899      statement-logging.
10900 
10901   5. Error: Cannot execute statement: binlogging impossible since
10902      BINLOG_FORMAT = STATEMENT and at least one table uses a storage
10903      engine limited to row-logging.
10904 
10905   6. Error: Cannot execute row injection: binlogging impossible since
10906      BINLOG_FORMAT = STATEMENT.
10907 
10908   7. Warning: Unsafe statement binlogged in statement format since
10909      BINLOG_FORMAT = STATEMENT.
10910 
10911   In addition, we can produce the following error (not depending on
10912   the variables of the decision diagram):
10913 
10914   8. Error: Cannot execute statement: binlogging impossible since more
10915      than one engine is involved and at least one engine is
10916      self-logging.
10917 
10918   9. Error: Do not allow users to modify a gtid_executed table
10919      explicitly by a XA transaction.
10920 
10921   For each error case above, the statement is prevented from being
10922   logged, we report an error, and roll back the statement.  For
10923   warnings, we set the thd->binlog_flags variable: the warning will be
10924   printed only if the statement is successfully logged.
10925 
10926   @see THD::binlog_query
10927 
10928   @param[in] thd    Client thread
10929   @param[in] tables Tables involved in the query
10930 
10931   @retval 0 No error; statement can be logged.
10932   @retval -1 One of the error conditions above applies (1, 2, 4, 5, 6 or 9).
10933 */
10934 
decide_logging_format(TABLE_LIST * tables)10935 int THD::decide_logging_format(TABLE_LIST *tables)
10936 {
10937   DBUG_ENTER("THD::decide_logging_format");
10938   DBUG_PRINT("info", ("query: %s", query().str));
10939   DBUG_PRINT("info", ("variables.binlog_format: %lu",
10940                       variables.binlog_format));
10941   DBUG_PRINT("info", ("lex->get_stmt_unsafe_flags(): 0x%x",
10942                       lex->get_stmt_unsafe_flags()));
10943 
10944   DEBUG_SYNC(current_thd, "begin_decide_logging_format");
10945 
10946   reset_binlog_local_stmt_filter();
10947 
10948   /*
10949     We should not decide logging format if the binlog is closed or
10950     binlogging is off, or if the statement is filtered out from the
10951     binlog by filtering rules.
10952   */
10953 #ifdef WITH_WSREP
10954   if ((WSREP_EMULATE_BINLOG_NNULL(this) ||
10955        (mysql_bin_log.is_open() && (variables.option_bits & OPTION_BIN_LOG))) &&
10956       !(WSREP_BINLOG_FORMAT(variables.binlog_format) == BINLOG_FORMAT_STMT    &&
10957         !binlog_filter->db_ok(m_db.str)))
10958 #else
10959   if (mysql_bin_log.is_open() && (variables.option_bits & OPTION_BIN_LOG) &&
10960       !(variables.binlog_format == BINLOG_FORMAT_STMT &&
10961         !binlog_filter->db_ok(m_db.str)))
10962 #endif /* WITH_WSREP */
10963   {
10964     /*
10965       Compute one bit field with the union of all the engine
10966       capabilities, and one with the intersection of all the engine
10967       capabilities.
10968     */
10969     handler::Table_flags flags_write_some_set= 0;
10970     handler::Table_flags flags_access_some_set= 0;
10971     handler::Table_flags flags_write_all_set=
10972       HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE;
10973 
10974     /*
10975        If different types of engines are about to be updated.
10976        For example: Innodb and Falcon; Innodb and MyIsam.
10977     */
10978     my_bool multi_write_engine= FALSE;
10979     /*
10980        If different types of engines are about to be accessed
10981        and any of them is about to be updated. For example:
10982        Innodb and Falcon; Innodb and MyIsam.
10983     */
10984     my_bool multi_access_engine= FALSE;
10985     /*
10986        Identifies if a table is changed.
10987     */
10988     my_bool is_write= FALSE;
10989     /*
10990        A pointer to a previous table that was changed.
10991     */
10992     TABLE* prev_write_table= NULL;
10993     /*
10994        A pointer to a previous table that was accessed.
10995     */
10996     TABLE* prev_access_table= NULL;
10997     /*
10998       True if at least one table is transactional.
10999     */
11000     bool write_to_some_transactional_table= false;
11001     /*
11002       True if at least one table is non-transactional.
11003     */
11004     bool write_to_some_non_transactional_table= false;
11005     /*
11006        True if all non-transactional tables that has been updated
11007        are temporary.
11008     */
11009     bool write_all_non_transactional_are_tmp_tables= true;
11010     /**
11011       The number of tables used in the current statement,
11012       that should be replicated.
11013     */
11014     uint replicated_tables_count= 0;
11015     /**
11016       The number of tables written to in the current statement,
11017       that should not be replicated.
11018       A table should not be replicated when it is considered
11019       'local' to a MySQL instance.
11020       Currently, these tables are:
11021       - mysql.slow_log
11022       - mysql.general_log
11023       - mysql.slave_relay_log_info
11024       - mysql.slave_master_info
11025       - mysql.slave_worker_info
11026       - performance_schema.*
11027       - TODO: information_schema.*
11028       In practice, from this list, only performance_schema.* tables
11029       are written to by user queries.
11030     */
11031     uint non_replicated_tables_count= 0;
11032     /**
11033       Indicate whether we alreadly reported a warning
11034       on modifying gtid_executed table.
11035     */
11036     int warned_gtid_executed_table= 0;
11037 #ifndef NDEBUG
11038     {
11039       DBUG_PRINT("debug", ("prelocked_mode: %s",
11040                            get_locked_tables_mode_name(locked_tables_mode)));
11041     }
11042 #endif
11043 
11044     if (variables.binlog_format != BINLOG_FORMAT_ROW && tables)
11045     {
11046       /*
11047         DML statements that modify a table with an auto_increment column based on
11048         rows selected from a table are unsafe as the order in which the rows are
11049         fetched fron the select tables cannot be determined and may differ on
11050         master and slave.
11051        */
11052       if (has_write_table_with_auto_increment_and_select(tables))
11053         lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_WRITE_AUTOINC_SELECT);
11054 
11055       if (has_write_table_auto_increment_not_first_in_pk(tables))
11056         lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_NOT_FIRST);
11057 
11058       /*
11059         A query that modifies autoinc column in sub-statement can make the
11060         master and slave inconsistent.
11061         We can solve these problems in mixed mode by switching to binlogging
11062         if at least one updated table is used by sub-statement
11063        */
11064       if (lex->requires_prelocking() &&
11065           has_write_table_with_auto_increment(lex->first_not_own_table()))
11066         lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_COLUMNS);
11067     }
11068 
11069     /*
11070       Get the capabilities vector for all involved storage engines and
11071       mask out the flags for the binary log.
11072     */
11073     for (TABLE_LIST *table= tables; table; table= table->next_global)
11074     {
11075       if (table->is_placeholder())
11076         continue;
11077 
11078       handler::Table_flags const flags= table->table->file->ha_table_flags();
11079 
11080       DBUG_PRINT("info", ("table: %s; ha_table_flags: 0x%llx",
11081                           table->table_name, flags));
11082 
11083       if (table->table->no_replicate)
11084       {
11085         if (!warned_gtid_executed_table)
11086         {
11087           warned_gtid_executed_table=
11088             gtid_state->warn_or_err_on_modify_gtid_table(this, table);
11089           /*
11090             Do not allow users to modify the gtid_executed table
11091             explicitly by a XA transaction.
11092           */
11093           if (warned_gtid_executed_table == 2)
11094             DBUG_RETURN(-1);
11095         }
11096         /*
11097           The statement uses a table that is not replicated.
11098           The following properties about the table:
11099           - persistent / transient
11100           - transactional / non transactional
11101           - temporary / permanent
11102           - read or write
11103           - multiple engines involved because of this table
11104           are not relevant, as this table is completely ignored.
11105           Because the statement uses a non replicated table,
11106           using STATEMENT format in the binlog is impossible.
11107           Either this statement will be discarded entirely,
11108           or it will be logged (possibly partially) in ROW format.
11109         */
11110         lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_TABLE);
11111 
11112         if (table->lock_type >= TL_WRITE_ALLOW_WRITE)
11113         {
11114           non_replicated_tables_count++;
11115           continue;
11116         }
11117       }
11118 
11119       replicated_tables_count++;
11120 
11121       my_bool trans= table->table->file->has_transactions();
11122 
11123       if (table->lock_type >= TL_WRITE_ALLOW_WRITE)
11124       {
11125         write_to_some_transactional_table=
11126           write_to_some_transactional_table || trans;
11127 
11128         write_to_some_non_transactional_table=
11129           write_to_some_non_transactional_table || !trans;
11130 
11131         if (prev_write_table && prev_write_table->file->ht !=
11132             table->table->file->ht)
11133           multi_write_engine= TRUE;
11134 
11135         if (table->table->s->tmp_table)
11136           lex->set_stmt_accessed_table(trans ? LEX::STMT_WRITES_TEMP_TRANS_TABLE :
11137                                                LEX::STMT_WRITES_TEMP_NON_TRANS_TABLE);
11138         else
11139           lex->set_stmt_accessed_table(trans ? LEX::STMT_WRITES_TRANS_TABLE :
11140                                                LEX::STMT_WRITES_NON_TRANS_TABLE);
11141 
11142         /*
11143          Non-transactional updates are allowed when row binlog format is
11144          used and all non-transactional tables are temporary.
11145          Binlog format is checked on THD::is_dml_gtid_compatible() method.
11146         */
11147         if (!trans)
11148           write_all_non_transactional_are_tmp_tables=
11149             write_all_non_transactional_are_tmp_tables &&
11150             table->table->s->tmp_table;
11151 
11152         flags_write_all_set &= flags;
11153         flags_write_some_set |= flags;
11154         is_write= TRUE;
11155 
11156         prev_write_table= table->table;
11157 
11158         /*
11159           It should be marked unsafe if a table which uses a fulltext parser
11160           plugin is modified. See also bug#48183.
11161         */
11162         if (!lex->is_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_FULLTEXT_PLUGIN))
11163         {
11164           if (fulltext_unsafe_set(table->table->s))
11165             lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_FULLTEXT_PLUGIN);
11166         }
11167         /*
11168           INSERT...ON DUPLICATE KEY UPDATE on a table with more than one unique keys
11169           can be unsafe. Check for it if the flag is already not marked for the
11170           given statement.
11171         */
11172         if (!lex->is_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS) &&
11173             lex->sql_command == SQLCOM_INSERT && lex->duplicates == DUP_UPDATE)
11174         {
11175           uint keys= table->table->s->keys, i= 0, unique_keys= 0;
11176           for (KEY* keyinfo= table->table->s->key_info;
11177                i < keys && unique_keys <= 1; i++, keyinfo++)
11178           {
11179             if (keyinfo->flags & HA_NOSAME)
11180               unique_keys++;
11181           }
11182           if (unique_keys > 1 )
11183             lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS);
11184         }
11185       }
11186       if(lex->get_using_match())
11187       {
11188         if (fulltext_unsafe_set(table->table->s))
11189           lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_FULLTEXT_PLUGIN);
11190       }
11191 
11192       flags_access_some_set |= flags;
11193 
11194       if (lex->sql_command != SQLCOM_CREATE_TABLE ||
11195           (lex->sql_command == SQLCOM_CREATE_TABLE &&
11196           (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE)))
11197       {
11198         if (table->table->s->tmp_table)
11199           lex->set_stmt_accessed_table(trans ? LEX::STMT_READS_TEMP_TRANS_TABLE :
11200                                                LEX::STMT_READS_TEMP_NON_TRANS_TABLE);
11201         else
11202           lex->set_stmt_accessed_table(trans ? LEX::STMT_READS_TRANS_TABLE :
11203                                                LEX::STMT_READS_NON_TRANS_TABLE);
11204       }
11205 
11206       if (prev_access_table && prev_access_table->file->ht !=
11207           table->table->file->ht)
11208          multi_access_engine= TRUE;
11209 
11210       prev_access_table= table->table;
11211     }
11212     assert(!is_write ||
11213            write_to_some_transactional_table ||
11214            write_to_some_non_transactional_table);
11215     /*
11216       write_all_non_transactional_are_tmp_tables may be true if any
11217       non-transactional table was not updated, so we fix its value here.
11218     */
11219     write_all_non_transactional_are_tmp_tables=
11220       write_all_non_transactional_are_tmp_tables &&
11221       write_to_some_non_transactional_table;
11222 
11223     DBUG_PRINT("info", ("flags_write_all_set: 0x%llx", flags_write_all_set));
11224     DBUG_PRINT("info", ("flags_write_some_set: 0x%llx", flags_write_some_set));
11225     DBUG_PRINT("info", ("flags_access_some_set: 0x%llx", flags_access_some_set));
11226     DBUG_PRINT("info", ("multi_write_engine: %d", multi_write_engine));
11227     DBUG_PRINT("info", ("multi_access_engine: %d", multi_access_engine));
11228 
11229     int error= 0;
11230     int unsafe_flags;
11231 
11232     bool multi_stmt_trans= in_multi_stmt_transaction_mode();
11233     bool trans_table= trans_has_updated_trans_table(this);
11234     bool binlog_direct= variables.binlog_direct_non_trans_update;
11235 
11236     if (lex->is_mixed_stmt_unsafe(multi_stmt_trans, binlog_direct,
11237                                   trans_table, tx_isolation))
11238       lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_MIXED_STATEMENT);
11239     else if (multi_stmt_trans && trans_table && !binlog_direct &&
11240              lex->stmt_accessed_table(LEX::STMT_WRITES_NON_TRANS_TABLE))
11241       lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_NONTRANS_AFTER_TRANS);
11242 
11243     /*
11244       If more than one engine is involved in the statement and at
11245       least one is doing it's own logging (is *self-logging*), the
11246       statement cannot be logged atomically, so we generate an error
11247       rather than allowing the binlog to become corrupt.
11248     */
11249     if (multi_write_engine &&
11250         (flags_write_some_set & HA_HAS_OWN_BINLOGGING))
11251       my_error((error= ER_BINLOG_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE),
11252                MYF(0));
11253     else if (multi_access_engine && flags_access_some_set & HA_HAS_OWN_BINLOGGING)
11254       lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE);
11255 
11256     /* XA is unsafe for statements */
11257     if (is_write &&
11258         !get_transaction()->xid_state()->has_state(XID_STATE::XA_NOTR))
11259       lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_XA);
11260 
11261     DBUG_EXECUTE_IF("make_stmt_only_engines",
11262                     {
11263                       flags_write_all_set= HA_BINLOG_STMT_CAPABLE;
11264                     };);
11265 
11266     /* both statement-only and row-only engines involved */
11267     if ((flags_write_all_set & (HA_BINLOG_STMT_CAPABLE | HA_BINLOG_ROW_CAPABLE)) == 0)
11268     {
11269       /*
11270         1. Error: Binary logging impossible since both row-incapable
11271            engines and statement-incapable engines are involved
11272       */
11273       my_error((error= ER_BINLOG_ROW_ENGINE_AND_STMT_ENGINE), MYF(0));
11274     }
11275     /* statement-only engines involved */
11276     else if ((flags_write_all_set & HA_BINLOG_ROW_CAPABLE) == 0)
11277     {
11278       if (lex->is_stmt_row_injection())
11279       {
11280         /*
11281           4. Error: Cannot execute row injection since table uses
11282              storage engine limited to statement-logging
11283         */
11284         my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_ENGINE), MYF(0));
11285       }
11286       else if (WSREP_BINLOG_FORMAT(variables.binlog_format) == BINLOG_FORMAT_ROW &&
11287                sqlcom_can_generate_row_events(this->lex->sql_command))
11288       {
11289         /*
11290           2. Error: Cannot modify table that uses a storage engine
11291              limited to statement-logging when BINLOG_FORMAT = ROW
11292         */
11293         my_error((error= ER_BINLOG_ROW_MODE_AND_STMT_ENGINE), MYF(0));
11294       }
11295       else if (variables.binlog_format == BINLOG_FORMAT_MIXED &&
11296           ((unsafe_flags= lex->get_stmt_unsafe_flags()) != 0))
11297       {
11298         /*
11299           3. Error: Cannot execute statement: binlogging of unsafe
11300              statement is impossible when storage engine is limited to
11301              statement-logging and BINLOG_FORMAT = MIXED.
11302         */
11303         for (int unsafe_type= 0;
11304              unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT;
11305              unsafe_type++)
11306           if (unsafe_flags & (1 << unsafe_type))
11307             my_error((error= ER_BINLOG_UNSAFE_AND_STMT_ENGINE), MYF(0),
11308                      ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
11309       }
11310       else if (is_write && ((unsafe_flags= lex->get_stmt_unsafe_flags()) != 0))
11311       {
11312         /*
11313           7. Warning: Unsafe statement logged as statement due to
11314              binlog_format = STATEMENT
11315         */
11316         binlog_unsafe_warning_flags|= unsafe_flags;
11317         DBUG_PRINT("info", ("Scheduling warning to be issued by "
11318                             "binlog_query: '%s'",
11319                             ER(ER_BINLOG_UNSAFE_STATEMENT)));
11320         DBUG_PRINT("info", ("binlog_unsafe_warning_flags: 0x%x",
11321                             binlog_unsafe_warning_flags));
11322       }
11323       /* log in statement format! */
11324     }
11325     /* no statement-only engines */
11326     else
11327     {
11328       /* binlog_format = STATEMENT */
11329       if (WSREP_BINLOG_FORMAT(variables.binlog_format) == BINLOG_FORMAT_STMT)
11330        {
11331         if (lex->is_stmt_row_injection())
11332         {
11333           /*
11334             6. Error: Cannot execute row injection since
11335                BINLOG_FORMAT = STATEMENT
11336           */
11337           my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_MODE), MYF(0));
11338         }
11339         else if ((flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0 &&
11340                  sqlcom_can_generate_row_events(this->lex->sql_command))
11341         {
11342           /*
11343             5. Error: Cannot modify table that uses a storage engine
11344                limited to row-logging when binlog_format = STATEMENT
11345           */
11346 #ifdef WITH_WSREP
11347           if (!WSREP_NNULL(this) || wsrep_exec_mode == LOCAL_STATE)
11348           {
11349 #endif /* WITH_WSREP */
11350           my_error((error= ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), "");
11351 #ifdef WITH_WSREP
11352           }
11353 #endif /* WITH_WSREP */
11354         }
11355         else if (is_write && (unsafe_flags= lex->get_stmt_unsafe_flags()) != 0)
11356         {
11357           /*
11358             7. Warning: Unsafe statement logged as statement due to
11359                binlog_format = STATEMENT
11360           */
11361           binlog_unsafe_warning_flags|= unsafe_flags;
11362           DBUG_PRINT("info", ("Scheduling warning to be issued by "
11363                               "binlog_query: '%s'",
11364                               ER(ER_BINLOG_UNSAFE_STATEMENT)));
11365           DBUG_PRINT("info", ("binlog_unsafe_warning_flags: 0x%x",
11366                               binlog_unsafe_warning_flags));
11367         }
11368         /* log in statement format! */
11369       }
11370       /* No statement-only engines and binlog_format != STATEMENT.
11371          I.e., nothing prevents us from row logging if needed. */
11372       else
11373       {
11374         if (lex->is_stmt_unsafe() || lex->is_stmt_row_injection()
11375             || (flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0)
11376         {
11377 #ifndef NDEBUG
11378           int flags= lex->get_stmt_unsafe_flags();
11379           DBUG_PRINT("info", ("setting row format for unsafe statement"));
11380           for (int i= 0; i < Query_tables_list::BINLOG_STMT_UNSAFE_COUNT; i++)
11381           {
11382             if (flags & (1 << i))
11383               DBUG_PRINT("info", ("unsafe reason: %s",
11384                                   ER(Query_tables_list::binlog_stmt_unsafe_errcode[i])));
11385           }
11386           DBUG_PRINT("info", ("is_row_injection=%d",
11387                               lex->is_stmt_row_injection()));
11388           DBUG_PRINT("info", ("stmt_capable=%llu",
11389                               (flags_write_all_set & HA_BINLOG_STMT_CAPABLE)));
11390 #endif
11391           /* log in row format! */
11392           set_current_stmt_binlog_format_row_if_mixed();
11393         }
11394       }
11395     }
11396 
11397     if (non_replicated_tables_count > 0)
11398     {
11399       if ((replicated_tables_count == 0) || ! is_write)
11400       {
11401         DBUG_PRINT("info", ("decision: no logging, no replicated table affected"));
11402         set_binlog_local_stmt_filter();
11403       }
11404       else
11405       {
11406         if (! is_current_stmt_binlog_format_row())
11407         {
11408           my_error((error= ER_BINLOG_STMT_MODE_AND_NO_REPL_TABLES), MYF(0));
11409         }
11410         else
11411         {
11412           clear_binlog_local_stmt_filter();
11413         }
11414       }
11415     }
11416     else
11417     {
11418       clear_binlog_local_stmt_filter();
11419     }
11420 
11421     if (!error &&
11422         !is_dml_gtid_compatible(write_to_some_transactional_table,
11423                                 write_to_some_non_transactional_table,
11424                                 write_all_non_transactional_are_tmp_tables))
11425       error= 1;
11426 
11427     if (error) {
11428       DBUG_PRINT("info", ("decision: no logging since an error was generated"));
11429       DBUG_RETURN(-1);
11430     }
11431 
11432     if (is_write &&
11433         lex->sql_command != SQLCOM_END /* rows-event applying by slave */)
11434     {
11435       /*
11436         Master side of DML in the STMT format events parallelization.
11437         All involving table db:s are stored in a abc-ordered name list.
11438         In case the number of databases exceeds MAX_DBS_IN_EVENT_MTS maximum
11439         the list gathering breaks since it won't be sent to the slave.
11440       */
11441       for (TABLE_LIST *table= tables; table; table= table->next_global)
11442       {
11443         if (table->is_placeholder())
11444           continue;
11445 
11446         assert(table->table);
11447 
11448         if (table->table->file->referenced_by_foreign_key())
11449         {
11450           /*
11451              FK-referenced dbs can't be gathered currently. The following
11452              event will be marked for sequential execution on slave.
11453           */
11454           binlog_accessed_db_names= NULL;
11455           add_to_binlog_accessed_dbs("");
11456           break;
11457         }
11458         if (!is_current_stmt_binlog_format_row())
11459           add_to_binlog_accessed_dbs(table->db);
11460       }
11461     }
11462     DBUG_PRINT("info", ("decision: logging in %s format",
11463                         is_current_stmt_binlog_format_row() ?
11464                         "ROW" : "STATEMENT"));
11465 
11466     if (variables.binlog_format == BINLOG_FORMAT_ROW &&
11467         (lex->sql_command == SQLCOM_UPDATE ||
11468          lex->sql_command == SQLCOM_UPDATE_MULTI ||
11469          lex->sql_command == SQLCOM_DELETE ||
11470          lex->sql_command == SQLCOM_DELETE_MULTI))
11471     {
11472       String table_names;
11473       /*
11474         Generate a warning for UPDATE/DELETE statements that modify a
11475         BLACKHOLE table, as row events are not logged in row format.
11476       */
11477       for (TABLE_LIST *table= tables; table; table= table->next_global)
11478       {
11479         if (table->is_placeholder())
11480           continue;
11481         if (table->table->file->ht->db_type == DB_TYPE_BLACKHOLE_DB &&
11482             table->lock_type >= TL_WRITE_ALLOW_WRITE)
11483         {
11484             table_names.append(table->table_name);
11485             table_names.append(",");
11486         }
11487       }
11488       if (!table_names.is_empty())
11489       {
11490         bool is_update= (lex->sql_command == SQLCOM_UPDATE ||
11491                          lex->sql_command == SQLCOM_UPDATE_MULTI);
11492         /*
11493           Replace the last ',' with '.' for table_names
11494         */
11495         table_names.replace(table_names.length()-1, 1, ".", 1);
11496         push_warning_printf(this, Sql_condition::SL_WARNING,
11497                             WARN_ON_BLOCKHOLE_IN_RBR,
11498                             ER(WARN_ON_BLOCKHOLE_IN_RBR),
11499                             is_update ? "UPDATE" : "DELETE",
11500                             table_names.c_ptr());
11501       }
11502     }
11503   }
11504   else
11505   {
11506     DBUG_PRINT("info", ("decision: no logging since "
11507                         "mysql_bin_log.is_open() = %d "
11508                         "and (options & OPTION_BIN_LOG) = 0x%llx "
11509                         "and binlog_format = %lu "
11510                         "and binlog_filter->db_ok(db) = %d",
11511                         mysql_bin_log.is_open(),
11512                         (variables.option_bits & OPTION_BIN_LOG),
11513                         variables.binlog_format,
11514                         binlog_filter->db_ok(m_db.str)));
11515 
11516     for (TABLE_LIST *table= tables; table; table= table->next_global)
11517     {
11518       if (!table->is_placeholder() && table->table->no_replicate &&
11519           gtid_state->warn_or_err_on_modify_gtid_table(this, table))
11520         break;
11521     }
11522   }
11523 
11524   DEBUG_SYNC(current_thd, "end_decide_logging_format");
11525 
11526   DBUG_RETURN(0);
11527 }
11528 
11529 
11530 /**
11531   Given that a possible violation of gtid consistency has happened,
11532   checks if gtid-inconsistencies are forbidden by the current value of
11533   ENFORCE_GTID_CONSISTENCY and GTID_MODE. If forbidden, generates
11534   error or warning accordingly.
11535 
11536   @param thd The thread that has issued the GTID-violating statement.
11537 
11538   @param error_code The error code to use, if error or warning is to
11539   be generated.
11540 
11541   @retval false Error was generated.
11542   @retval true No error was generated (possibly a warning was generated).
11543 */
handle_gtid_consistency_violation(THD * thd,int error_code)11544 static bool handle_gtid_consistency_violation(THD *thd, int error_code)
11545 {
11546   DBUG_ENTER("handle_gtid_consistency_violation");
11547 
11548   enum_group_type gtid_next_type= thd->variables.gtid_next.type;
11549   global_sid_lock->rdlock();
11550   enum_gtid_consistency_mode gtid_consistency_mode=
11551     get_gtid_consistency_mode();
11552   enum_gtid_mode gtid_mode= get_gtid_mode(GTID_MODE_LOCK_SID);
11553 
11554   DBUG_PRINT("info", ("gtid_next.type=%d gtid_mode=%s "
11555                       "gtid_consistency_mode=%d error=%d query=%s",
11556                       gtid_next_type,
11557                       get_gtid_mode_string(gtid_mode),
11558                       gtid_consistency_mode,
11559                       error_code,
11560                       thd->query().str));
11561 
11562   /*
11563     GTID violations should generate error if:
11564     - GTID_MODE=ON or ON_PERMISSIVE and GTID_NEXT='AUTOMATIC' (since the
11565       transaction is expected to commit using a GTID), or
11566     - GTID_NEXT='UUID:NUMBER' (since the transaction is expected to
11567       commit usinga GTID), or
11568     - ENFORCE_GTID_CONSISTENCY=ON.
11569   */
11570   if ((gtid_next_type == AUTOMATIC_GROUP &&
11571        gtid_mode >= GTID_MODE_ON_PERMISSIVE) ||
11572       gtid_next_type == GTID_GROUP ||
11573       gtid_consistency_mode == GTID_CONSISTENCY_MODE_ON)
11574   {
11575     global_sid_lock->unlock();
11576     my_error(error_code, MYF(0));
11577     DBUG_RETURN(false);
11578   }
11579   else
11580   {
11581     /*
11582       If we are not generating an error, we must increase the counter
11583       of GTID-violating transactions.  This will prevent a concurrent
11584       client from executing a SET GTID_MODE or SET
11585       ENFORCE_GTID_CONSISTENCY statement that would be incompatible
11586       with this transaction.
11587 
11588       If the transaction had already been accounted as a gtid violating
11589       transaction, then don't increment the counters, just issue the
11590       warning below. This prevents calling
11591       begin_automatic_gtid_violating_transaction or
11592       begin_anonymous_gtid_violating_transaction multiple times for the
11593       same transaction, which would make the counter go out of sync.
11594     */
11595     if (!thd->has_gtid_consistency_violation)
11596     {
11597       if (gtid_next_type == AUTOMATIC_GROUP)
11598         gtid_state->begin_automatic_gtid_violating_transaction();
11599       else
11600       {
11601         assert(gtid_next_type == ANONYMOUS_GROUP);
11602         gtid_state->begin_anonymous_gtid_violating_transaction();
11603       }
11604 
11605       /*
11606         If a transaction generates multiple GTID violation conditions,
11607         it must still only update the counters once.  Hence we use
11608         this per-thread flag to keep track of whether the thread has a
11609         consistency or not.  This function must only be called if the
11610         transaction does not already have a GTID violation.
11611       */
11612       thd->has_gtid_consistency_violation= true;
11613     }
11614 
11615     global_sid_lock->unlock();
11616 
11617     // Generate warning if ENFORCE_GTID_CONSISTENCY = WARN.
11618     if (gtid_consistency_mode == GTID_CONSISTENCY_MODE_WARN)
11619     {
11620       // Need to print to log so that replication admin knows when users
11621       // have adjusted their workloads.
11622       sql_print_warning("%s", ER(error_code));
11623       // Need to print to client so that users can adjust their workload.
11624       push_warning(thd, Sql_condition::SL_WARNING, error_code, ER(error_code));
11625     }
11626     DBUG_RETURN(true);
11627   }
11628 }
11629 
11630 
is_ddl_gtid_compatible()11631 bool THD::is_ddl_gtid_compatible()
11632 {
11633   DBUG_ENTER("THD::is_ddl_gtid_compatible");
11634 
11635   // If @@session.sql_log_bin has been manually turned off (only
11636   // doable by SUPER), then no problem, we can execute any statement.
11637   if ((variables.option_bits & OPTION_BIN_LOG) == 0 ||
11638       mysql_bin_log.is_open() == false)
11639     DBUG_RETURN(true);
11640 
11641   DBUG_PRINT("info",
11642              ("SQLCOM_CREATE:%d CREATE-TMP:%d SELECT:%d SQLCOM_DROP:%d DROP-TMP:%d trx:%d",
11643               lex->sql_command == SQLCOM_CREATE_TABLE,
11644               (lex->sql_command == SQLCOM_CREATE_TABLE &&
11645                (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE)),
11646               lex->select_lex->item_list.elements,
11647               lex->sql_command == SQLCOM_DROP_TABLE,
11648               (lex->sql_command == SQLCOM_DROP_TABLE && lex->drop_temporary),
11649               in_multi_stmt_transaction_mode()));
11650 
11651   if (lex->sql_command == SQLCOM_CREATE_TABLE &&
11652       !(lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) &&
11653       lex->select_lex->item_list.elements)
11654   {
11655     /*
11656       CREATE ... SELECT (without TEMPORARY) is unsafe because if
11657       binlog_format=row it will be logged as a CREATE TABLE followed
11658       by row events, re-executed non-atomically as two transactions,
11659       and then written to the slave's binary log as two separate
11660       transactions with the same GTID.
11661     */
11662     bool ret= handle_gtid_consistency_violation(
11663       this, ER_GTID_UNSAFE_CREATE_SELECT);
11664     DBUG_RETURN(ret);
11665   }
11666   else if ((lex->sql_command == SQLCOM_CREATE_TABLE &&
11667             (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) != 0) ||
11668            (lex->sql_command == SQLCOM_DROP_TABLE && lex->drop_temporary))
11669   {
11670     /*
11671       [CREATE|DROP] TEMPORARY TABLE is unsafe to execute
11672       inside a transaction because the table will stay and the
11673       transaction will be written to the slave's binary log with the
11674       GTID even if the transaction is rolled back.
11675       This includes the execution inside Functions and Triggers.
11676     */
11677     if (in_multi_stmt_transaction_mode() || in_sub_stmt)
11678     {
11679       bool ret= handle_gtid_consistency_violation(
11680         this, ER_GTID_UNSAFE_CREATE_DROP_TEMPORARY_TABLE_IN_TRANSACTION);
11681       DBUG_RETURN(ret);
11682     }
11683   }
11684   DBUG_RETURN(true);
11685 }
11686 
11687 
11688 bool
is_dml_gtid_compatible(bool some_transactional_table,bool some_non_transactional_table,bool non_transactional_tables_are_tmp)11689 THD::is_dml_gtid_compatible(bool some_transactional_table,
11690                             bool some_non_transactional_table,
11691                             bool non_transactional_tables_are_tmp)
11692 {
11693   DBUG_ENTER("THD::is_dml_gtid_compatible(bool, bool, bool)");
11694 
11695   // If @@session.sql_log_bin has been manually turned off (only
11696   // doable by SUPER), then no problem, we can execute any statement.
11697   if ((variables.option_bits & OPTION_BIN_LOG) == 0 ||
11698       mysql_bin_log.is_open() == false)
11699     DBUG_RETURN(true);
11700 
11701   /*
11702     Single non-transactional updates are allowed when not mixed
11703     together with transactional statements within a transaction.
11704     Furthermore, writing to transactional and non-transactional
11705     engines in a single statement is also disallowed.
11706     Multi-statement transactions on non-transactional tables are
11707     split into single-statement transactions when
11708     GTID_NEXT = "AUTOMATIC".
11709 
11710     Non-transactional updates are allowed when row binlog format is
11711     used and all non-transactional tables are temporary.
11712 
11713     The debug symbol "allow_gtid_unsafe_non_transactional_updates"
11714     disables the error.  This is useful because it allows us to run
11715     old tests that were not written with the restrictions of GTIDs in
11716     mind.
11717   */
11718   DBUG_PRINT("info", ("some_non_transactional_table=%d "
11719                       "some_transactional_table=%d "
11720                       "trans_has_updated_trans_table=%d "
11721                       "non_transactional_tables_are_tmp=%d "
11722                       "is_current_stmt_binlog_format_row=%d",
11723                       some_non_transactional_table,
11724                       some_transactional_table,
11725                       trans_has_updated_trans_table(this),
11726                       non_transactional_tables_are_tmp,
11727                       is_current_stmt_binlog_format_row()));
11728   if (some_non_transactional_table &&
11729       (some_transactional_table || trans_has_updated_trans_table(this)) &&
11730       !(non_transactional_tables_are_tmp &&
11731         is_current_stmt_binlog_format_row()) &&
11732       !DBUG_EVALUATE_IF("allow_gtid_unsafe_non_transactional_updates", 1, 0))
11733   {
11734     DBUG_RETURN(handle_gtid_consistency_violation(
11735       this, ER_GTID_UNSAFE_NON_TRANSACTIONAL_TABLE));
11736   }
11737 
11738   DBUG_RETURN(true);
11739 }
11740 
11741 /*
11742   Implementation of interface to write rows to the binary log through the
11743   thread.  The thread is responsible for writing the rows it has
11744   inserted/updated/deleted.
11745 */
11746 
11747 #ifndef MYSQL_CLIENT
11748 
11749 /*
11750   Template member function for ensuring that there is an rows log
11751   event of the apropriate type before proceeding.
11752 
11753   PRE CONDITION:
11754     - Events of type 'RowEventT' have the type code 'type_code'.
11755 
11756   POST CONDITION:
11757     If a non-NULL pointer is returned, the pending event for thread 'thd' will
11758     be an event of type 'RowEventT' (which have the type code 'type_code')
11759     will either empty or have enough space to hold 'needed' bytes.  In
11760     addition, the columns bitmap will be correct for the row, meaning that
11761     the pending event will be flushed if the columns in the event differ from
11762     the columns suppled to the function.
11763 
11764   RETURNS
11765     If no error, a non-NULL pending event (either one which already existed or
11766     the newly created one).
11767     If error, NULL.
11768  */
11769 
11770 template <class RowsEventT> Rows_log_event*
binlog_prepare_pending_rows_event(TABLE * table,uint32 serv_id,size_t needed,bool is_transactional,RowsEventT * hint MY_ATTRIBUTE ((unused)),const uchar * extra_row_info)11771 THD::binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id,
11772                                        size_t needed,
11773                                        bool is_transactional,
11774 				       RowsEventT *hint MY_ATTRIBUTE((unused)),
11775                                        const uchar* extra_row_info)
11776 {
11777   DBUG_ENTER("binlog_prepare_pending_rows_event");
11778 
11779   /* Fetch the type code for the RowsEventT template parameter */
11780   int const general_type_code= RowsEventT::TYPE_CODE;
11781 
11782   Rows_log_event* pending= binlog_get_pending_rows_event(is_transactional);
11783 
11784   if (unlikely(pending && !pending->is_valid()))
11785     DBUG_RETURN(NULL);
11786 
11787   /*
11788     Check if the current event is non-NULL and a write-rows
11789     event. Also check if the table provided is mapped: if it is not,
11790     then we have switched to writing to a new table.
11791     If there is no pending event, we need to create one. If there is a pending
11792     event, but it's not about the same table id, or not of the same type
11793     (between Write, Update and Delete), or not the same affected columns, or
11794     going to be too big, flush this event to disk and create a new pending
11795     event.
11796   */
11797   if (!pending ||
11798       pending->server_id != serv_id ||
11799       pending->get_table_id() != table->s->table_map_id ||
11800       pending->get_general_type_code() != general_type_code ||
11801       pending->get_data_size() + needed > opt_binlog_rows_event_max_size ||
11802       pending->read_write_bitmaps_cmp(table) == FALSE ||
11803       !binlog_row_event_extra_data_eq(pending->get_extra_row_data(),
11804                                       extra_row_info))
11805   {
11806     /* Create a new RowsEventT... */
11807     Rows_log_event* const
11808 	ev= new RowsEventT(this, table, table->s->table_map_id,
11809                            is_transactional, extra_row_info);
11810     if (unlikely(!ev))
11811       DBUG_RETURN(NULL);
11812     ev->server_id= serv_id; // I don't like this, it's too easy to forget.
11813     /*
11814       flush the pending event and replace it with the newly created
11815       event...
11816     */
11817     if (unlikely(
11818         mysql_bin_log.flush_and_set_pending_rows_event(this, ev,
11819                                                        is_transactional)))
11820     {
11821       delete ev;
11822       DBUG_RETURN(NULL);
11823     }
11824 
11825     DBUG_RETURN(ev);               /* This is the new pending event */
11826   }
11827   DBUG_RETURN(pending);        /* This is the current pending event */
11828 }
11829 
11830 /* Declare in unnamed namespace. */
11831 namespace {
11832 
11833   /**
11834      Class to handle temporary allocation of memory for row data.
11835 
11836      The responsibilities of the class is to provide memory for
11837      packing one or two rows of packed data (depending on what
11838      constructor is called).
11839 
11840      In order to make the allocation more efficient for "simple" rows,
11841      i.e., rows that do not contain any blobs, a pointer to the
11842      allocated memory is of memory is stored in the table structure
11843      for simple rows.  If memory for a table containing a blob field
11844      is requested, only memory for that is allocated, and subsequently
11845      released when the object is destroyed.
11846 
11847    */
11848   class Row_data_memory {
11849   public:
11850     /**
11851       Build an object to keep track of a block-local piece of memory
11852       for storing a row of data.
11853 
11854       @param table
11855       Table where the pre-allocated memory is stored.
11856 
11857       @param length
11858       Length of data that is needed, if the record contain blobs.
11859      */
Row_data_memory(TABLE * table,size_t const len1)11860     Row_data_memory(TABLE *table, size_t const len1)
11861       : m_memory(0)
11862     {
11863 #ifndef NDEBUG
11864       m_alloc_checked= FALSE;
11865 #endif
11866       allocate_memory(table, len1);
11867       m_ptr[0]= has_memory() ? m_memory : 0;
11868       m_ptr[1]= 0;
11869     }
11870 
Row_data_memory(TABLE * table,size_t const len1,size_t const len2)11871     Row_data_memory(TABLE *table, size_t const len1, size_t const len2)
11872       : m_memory(0)
11873     {
11874 #ifndef NDEBUG
11875       m_alloc_checked= FALSE;
11876 #endif
11877       allocate_memory(table, len1 + len2);
11878       m_ptr[0]= has_memory() ? m_memory        : 0;
11879       m_ptr[1]= has_memory() ? m_memory + len1 : 0;
11880     }
11881 
~Row_data_memory()11882     ~Row_data_memory()
11883     {
11884       if (m_memory != 0 && m_release_memory_on_destruction)
11885         my_free(m_memory);
11886     }
11887 
11888     /**
11889        Is there memory allocated?
11890 
11891        @retval true There is memory allocated
11892        @retval false Memory allocation failed
11893      */
has_memory() const11894     bool has_memory() const {
11895 #ifndef NDEBUG
11896       m_alloc_checked= TRUE;
11897 #endif
11898       return m_memory != 0;
11899     }
11900 
slot(uint s)11901     uchar *slot(uint s)
11902     {
11903       assert(s < sizeof(m_ptr)/sizeof(*m_ptr));
11904       assert(m_ptr[s] != 0);
11905       assert(m_alloc_checked == TRUE);
11906       return m_ptr[s];
11907     }
11908 
11909   private:
allocate_memory(TABLE * const table,size_t const total_length)11910     void allocate_memory(TABLE *const table, size_t const total_length)
11911     {
11912       if (table->s->blob_fields == 0)
11913       {
11914         /*
11915           The maximum length of a packed record is less than this
11916           length. We use this value instead of the supplied length
11917           when allocating memory for records, since we don't know how
11918           the memory will be used in future allocations.
11919 
11920           Since table->s->reclength is for unpacked records, we have
11921           to add two bytes for each field, which can potentially be
11922           added to hold the length of a packed field.
11923         */
11924         size_t const maxlen= table->s->reclength + 2 * table->s->fields;
11925 
11926         /*
11927           Allocate memory for two records if memory hasn't been
11928           allocated. We allocate memory for two records so that it can
11929           be used when processing update rows as well.
11930         */
11931         if (table->write_row_record == 0)
11932           table->write_row_record=
11933             (uchar *) alloc_root(&table->mem_root, 2 * maxlen);
11934         m_memory= table->write_row_record;
11935         m_release_memory_on_destruction= FALSE;
11936       }
11937       else
11938       {
11939         m_memory= (uchar *) my_malloc(key_memory_Row_data_memory_memory,
11940                                       total_length, MYF(MY_WME));
11941         m_release_memory_on_destruction= TRUE;
11942       }
11943     }
11944 
11945 #ifndef NDEBUG
11946     mutable bool m_alloc_checked;
11947 #endif
11948     bool m_release_memory_on_destruction;
11949     uchar *m_memory;
11950     uchar *m_ptr[2];
11951   };
11952 
11953 } // namespace
11954 
binlog_write_row(TABLE * table,bool is_trans,uchar const * record,const uchar * extra_row_info)11955 int THD::binlog_write_row(TABLE* table, bool is_trans,
11956                           uchar const *record,
11957                           const uchar* extra_row_info)
11958 {
11959 #ifdef WITH_WSREP
11960   assert(is_current_stmt_binlog_format_row() &&
11961 	 ((WSREP_EMULATE_BINLOG_NNULL(this) || mysql_bin_log.is_open())));
11962 #else
11963   assert(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
11964 #endif /* WITH_WSREP */
11965 
11966   /*
11967     Pack records into format for transfer. We are allocating more
11968     memory than needed, but that doesn't matter.
11969   */
11970   Row_data_memory memory(table, max_row_length(table, record));
11971   if (!memory.has_memory())
11972     return HA_ERR_OUT_OF_MEM;
11973 
11974   uchar *row_data= memory.slot(0);
11975 
11976   size_t const len= pack_row(table, table->write_set, row_data, record);
11977 
11978   Rows_log_event* const ev=
11979     binlog_prepare_pending_rows_event(table, server_id, len, is_trans,
11980                                       static_cast<Write_rows_log_event*>(0),
11981                                       extra_row_info);
11982 
11983   if (unlikely(ev == 0))
11984     return HA_ERR_OUT_OF_MEM;
11985 
11986   return ev->add_row_data(row_data, len);
11987 }
11988 
binlog_update_row(TABLE * table,bool is_trans,const uchar * before_record,const uchar * after_record,const uchar * extra_row_info)11989 int THD::binlog_update_row(TABLE* table, bool is_trans,
11990                            const uchar *before_record,
11991                            const uchar *after_record,
11992                            const uchar* extra_row_info)
11993 {
11994 #ifdef WITH_WSREP
11995   assert(is_current_stmt_binlog_format_row() &&
11996          ((WSREP_EMULATE_BINLOG_NNULL(this) || mysql_bin_log.is_open())));
11997 #else
11998   assert(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
11999 #endif /* WITH_WSREP */
12000 
12001   int error= 0;
12002 
12003   /**
12004     Save a reference to the original read and write set bitmaps.
12005     We will need this to restore the bitmaps at the end.
12006    */
12007   MY_BITMAP *old_read_set= table->read_set;
12008   MY_BITMAP *old_write_set= table->write_set;
12009 
12010   /**
12011      This will remove spurious fields required during execution but
12012      not needed for binlogging. This is done according to the:
12013      binlog-row-image option.
12014    */
12015   binlog_prepare_row_images(table);
12016 
12017   size_t const before_maxlen = max_row_length(table, before_record);
12018   size_t const after_maxlen  = max_row_length(table, after_record);
12019 
12020   Row_data_memory row_data(table, before_maxlen, after_maxlen);
12021   if (!row_data.has_memory())
12022     return HA_ERR_OUT_OF_MEM;
12023 
12024   uchar *before_row= row_data.slot(0);
12025   uchar *after_row= row_data.slot(1);
12026 
12027   size_t const before_size= pack_row(table, table->read_set, before_row,
12028                                         before_record);
12029   size_t const after_size= pack_row(table, table->write_set, after_row,
12030                                        after_record);
12031 
12032   DBUG_DUMP("before_record", before_record, table->s->reclength);
12033   DBUG_DUMP("after_record",  after_record, table->s->reclength);
12034   DBUG_DUMP("before_row",    before_row, before_size);
12035   DBUG_DUMP("after_row",     after_row, after_size);
12036 
12037   Rows_log_event* const ev=
12038     binlog_prepare_pending_rows_event(table, server_id,
12039 				      before_size + after_size, is_trans,
12040 				      static_cast<Update_rows_log_event*>(0),
12041                                       extra_row_info);
12042 
12043   if (unlikely(ev == 0))
12044     return HA_ERR_OUT_OF_MEM;
12045 
12046   error= ev->add_row_data(before_row, before_size) ||
12047          ev->add_row_data(after_row, after_size);
12048 
12049   /* restore read/write set for the rest of execution */
12050   table->column_bitmaps_set_no_signal(old_read_set,
12051                                       old_write_set);
12052 
12053   bitmap_clear_all(&table->tmp_set);
12054 
12055   return error;
12056 }
12057 
binlog_delete_row(TABLE * table,bool is_trans,uchar const * record,const uchar * extra_row_info)12058 int THD::binlog_delete_row(TABLE* table, bool is_trans,
12059                            uchar const *record,
12060                            const uchar* extra_row_info)
12061 {
12062 #ifdef WITH_WSREP
12063   assert(is_current_stmt_binlog_format_row() &&
12064          ((WSREP_EMULATE_BINLOG_NNULL(this) || mysql_bin_log.is_open())));
12065 #else
12066   assert(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
12067 #endif /* WITH_WSREP */
12068 
12069   int error= 0;
12070 
12071   /**
12072     Save a reference to the original read and write set bitmaps.
12073     We will need this to restore the bitmaps at the end.
12074    */
12075   MY_BITMAP *old_read_set= table->read_set;
12076   MY_BITMAP *old_write_set= table->write_set;
12077 
12078   /**
12079      This will remove spurious fields required during execution but
12080      not needed for binlogging. This is done according to the:
12081      binlog-row-image option.
12082    */
12083   binlog_prepare_row_images(table);
12084 
12085   /*
12086      Pack records into format for transfer. We are allocating more
12087      memory than needed, but that doesn't matter.
12088   */
12089   Row_data_memory memory(table, max_row_length(table, record));
12090   if (unlikely(!memory.has_memory()))
12091     return HA_ERR_OUT_OF_MEM;
12092 
12093   uchar *row_data= memory.slot(0);
12094 
12095   DBUG_DUMP("table->read_set", (uchar*) table->read_set->bitmap, (table->s->fields + 7) / 8);
12096   size_t const len= pack_row(table, table->read_set, row_data, record);
12097 
12098   Rows_log_event* const ev=
12099     binlog_prepare_pending_rows_event(table, server_id, len, is_trans,
12100 				      static_cast<Delete_rows_log_event*>(0),
12101                                       extra_row_info);
12102 
12103   if (unlikely(ev == 0))
12104     return HA_ERR_OUT_OF_MEM;
12105 
12106   error= ev->add_row_data(row_data, len);
12107 
12108   /* restore read/write set for the rest of execution */
12109   table->column_bitmaps_set_no_signal(old_read_set,
12110                                       old_write_set);
12111 
12112   bitmap_clear_all(&table->tmp_set);
12113   return error;
12114 }
12115 
binlog_prepare_row_images(TABLE * table)12116 void THD::binlog_prepare_row_images(TABLE *table)
12117 {
12118   DBUG_ENTER("THD::binlog_prepare_row_images");
12119   /**
12120     Remove from read_set spurious columns. The write_set has been
12121     handled before in table->mark_columns_needed_for_update.
12122    */
12123 
12124   DBUG_PRINT_BITSET("debug", "table->read_set (before preparing): %s", table->read_set);
12125   THD *thd= table->in_use;
12126 
12127   /**
12128     if there is a primary key in the table (ie, user declared PK or a
12129     non-null unique index) and we dont want to ship the entire image,
12130     and the handler involved supports this.
12131    */
12132   if (table->s->primary_key < MAX_KEY &&
12133       (thd->variables.binlog_row_image < BINLOG_ROW_IMAGE_FULL) &&
12134       !ha_check_storage_engine_flag(table->s->db_type(), HTON_NO_BINLOG_ROW_OPT))
12135   {
12136     /**
12137       Just to be sure that tmp_set is currently not in use as
12138       the read_set already.
12139     */
12140     assert(table->read_set != &table->tmp_set);
12141     // Verify it's not used
12142     assert(bitmap_is_clear_all(&table->tmp_set));
12143 
12144     switch(thd->variables.binlog_row_image)
12145     {
12146       case BINLOG_ROW_IMAGE_MINIMAL:
12147         /* MINIMAL: Mark only PK */
12148         table->mark_columns_used_by_index_no_reset(table->s->primary_key,
12149                                                    &table->tmp_set);
12150         break;
12151       case BINLOG_ROW_IMAGE_NOBLOB:
12152         /**
12153           NOBLOB: Remove unnecessary BLOB fields from read_set
12154                   (the ones that are not part of PK).
12155          */
12156         bitmap_union(&table->tmp_set, table->read_set);
12157         for (Field **ptr=table->field ; *ptr ; ptr++)
12158         {
12159           Field *field= (*ptr);
12160           if ((field->type() == MYSQL_TYPE_BLOB) &&
12161               !(field->flags & PRI_KEY_FLAG))
12162             bitmap_clear_bit(&table->tmp_set, field->field_index);
12163         }
12164         break;
12165       default:
12166         assert(0); // impossible.
12167     }
12168 
12169     /* set the temporary read_set */
12170     table->column_bitmaps_set_no_signal(&table->tmp_set,
12171                                         table->write_set);
12172   }
12173 
12174   DBUG_PRINT_BITSET("debug", "table->read_set (after preparing): %s", table->read_set);
12175   DBUG_VOID_RETURN;
12176 }
12177 
12178 
binlog_flush_pending_rows_event(bool stmt_end,bool is_transactional)12179 int THD::binlog_flush_pending_rows_event(bool stmt_end, bool is_transactional)
12180 {
12181   DBUG_ENTER("THD::binlog_flush_pending_rows_event");
12182   /*
12183     We shall flush the pending event even if we are not in row-based
12184     mode: it might be the case that we left row-based mode before
12185     flushing anything (e.g., if we have explicitly locked tables).
12186    */
12187 #ifdef WITH_WSREP
12188   if (!(WSREP_EMULATE_BINLOG_NNULL(this) || mysql_bin_log.is_open()))
12189 #else
12190  if (!mysql_bin_log.is_open())
12191 #endif /* WITH_WSREP */
12192     DBUG_RETURN(0);
12193 
12194   /*
12195     Mark the event as the last event of a statement if the stmt_end
12196     flag is set.
12197   */
12198   int error= 0;
12199   if (Rows_log_event *pending= binlog_get_pending_rows_event(is_transactional))
12200   {
12201     if (stmt_end)
12202     {
12203       pending->set_flags(Rows_log_event::STMT_END_F);
12204       binlog_table_maps= 0;
12205     }
12206 
12207     error= mysql_bin_log.flush_and_set_pending_rows_event(this, 0,
12208                                                           is_transactional);
12209   }
12210 
12211   DBUG_RETURN(error);
12212 }
12213 
12214 
12215 /**
12216    binlog_row_event_extra_data_eq
12217 
12218    Comparator for two binlog row event extra data
12219    pointers.
12220 
12221    It compares their significant bytes.
12222 
12223    Null pointers are acceptable
12224 
12225    @param a
12226      first pointer
12227 
12228    @param b
12229      first pointer
12230 
12231    @return
12232      true if the referenced structures are equal
12233 */
12234 bool
binlog_row_event_extra_data_eq(const uchar * a,const uchar * b)12235 THD::binlog_row_event_extra_data_eq(const uchar* a,
12236                                     const uchar* b)
12237 {
12238   return ((a == b) ||
12239           ((a != NULL) &&
12240            (b != NULL) &&
12241            (a[EXTRA_ROW_INFO_LEN_OFFSET] ==
12242             b[EXTRA_ROW_INFO_LEN_OFFSET]) &&
12243            (memcmp(a, b,
12244                    a[EXTRA_ROW_INFO_LEN_OFFSET]) == 0)));
12245 }
12246 
12247 #if !defined(NDEBUG)
12248 static const char *
show_query_type(THD::enum_binlog_query_type qtype)12249 show_query_type(THD::enum_binlog_query_type qtype)
12250 {
12251   switch (qtype) {
12252   case THD::ROW_QUERY_TYPE:
12253     return "ROW";
12254   case THD::STMT_QUERY_TYPE:
12255     return "STMT";
12256   case THD::QUERY_TYPE_COUNT:
12257   default:
12258     assert(0 <= qtype && qtype < THD::QUERY_TYPE_COUNT);
12259   }
12260   static char buf[64];
12261   sprintf(buf, "UNKNOWN#%d", qtype);
12262   return buf;
12263 }
12264 #endif
12265 
12266 /**
12267   Auxiliary function to reset the limit unsafety warning suppression.
12268 */
reset_binlog_unsafe_suppression()12269 static void reset_binlog_unsafe_suppression()
12270 {
12271   DBUG_ENTER("reset_binlog_unsafe_suppression");
12272   unsafe_warning_suppression_is_activated= false;
12273   limit_unsafe_warning_count= 0;
12274   limit_unsafe_suppression_start_time= my_getsystime()/10000000;
12275   DBUG_VOID_RETURN;
12276 }
12277 
12278 /**
12279   Auxiliary function to print warning in the error log.
12280 */
print_unsafe_warning_to_log(int unsafe_type,char * buf,const char * query)12281 static void print_unsafe_warning_to_log(int unsafe_type, char* buf,
12282                                         const char* query)
12283 {
12284   DBUG_ENTER("print_unsafe_warning_in_log");
12285   sprintf(buf, ER(ER_BINLOG_UNSAFE_STATEMENT),
12286           ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
12287   sql_print_warning(ER(ER_MESSAGE_AND_STATEMENT), buf, query);
12288   DBUG_VOID_RETURN;
12289 }
12290 
12291 /**
12292   Auxiliary function to check if the warning for limit unsafety should be
12293   thrown or suppressed. Details of the implementation can be found in the
12294   comments inline.
12295 
12296   @params
12297    buf         - buffer to hold the warning message text
12298    unsafe_type - The type of unsafety.
12299    query       - The actual query statement.
12300 
12301   TODO: Remove this function and implement a general service for all warnings
12302   that would prevent flooding the error log. => switch to log_throttle class?
12303 */
do_unsafe_limit_checkout(char * buf,int unsafe_type,const char * query)12304 static void do_unsafe_limit_checkout(char* buf, int unsafe_type, const char* query)
12305 {
12306   ulonglong now;
12307   DBUG_ENTER("do_unsafe_limit_checkout");
12308   assert(unsafe_type == LEX::BINLOG_STMT_UNSAFE_LIMIT);
12309   limit_unsafe_warning_count++;
12310   /*
12311     INITIALIZING:
12312     If this is the first time this function is called with log warning
12313     enabled, the monitoring the unsafe warnings should start.
12314   */
12315   if (limit_unsafe_suppression_start_time == 0)
12316   {
12317     limit_unsafe_suppression_start_time= my_getsystime()/10000000;
12318     print_unsafe_warning_to_log(unsafe_type, buf, query);
12319   }
12320   else
12321   {
12322     if (!unsafe_warning_suppression_is_activated)
12323       print_unsafe_warning_to_log(unsafe_type, buf, query);
12324 
12325     if (limit_unsafe_warning_count >=
12326         LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT)
12327     {
12328       now= my_getsystime()/10000000;
12329       if (!unsafe_warning_suppression_is_activated)
12330       {
12331         /*
12332           ACTIVATION:
12333           We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT warnings in
12334           less than LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT we activate the
12335           suppression.
12336         */
12337         if ((now-limit_unsafe_suppression_start_time) <=
12338                        LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT)
12339         {
12340           unsafe_warning_suppression_is_activated= true;
12341           DBUG_PRINT("info",("A warning flood has been detected and the limit \
12342 unsafety warning suppression has been activated."));
12343         }
12344         else
12345         {
12346           /*
12347            there is no flooding till now, therefore we restart the monitoring
12348           */
12349           limit_unsafe_suppression_start_time= my_getsystime()/10000000;
12350           limit_unsafe_warning_count= 0;
12351         }
12352       }
12353       else
12354       {
12355         /*
12356           Print the suppression note and the unsafe warning.
12357         */
12358         sql_print_information("The following warning was suppressed %d times \
12359 during the last %d seconds in the error log",
12360                               limit_unsafe_warning_count,
12361                               (int)
12362                               (now-limit_unsafe_suppression_start_time));
12363         print_unsafe_warning_to_log(unsafe_type, buf, query);
12364         /*
12365           DEACTIVATION: We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT
12366           warnings in more than  LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT, the
12367           suppression should be deactivated.
12368         */
12369         if ((now - limit_unsafe_suppression_start_time) >
12370             LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT)
12371         {
12372           reset_binlog_unsafe_suppression();
12373           DBUG_PRINT("info",("The limit unsafety warning supression has been \
12374 deactivated"));
12375         }
12376       }
12377       limit_unsafe_warning_count= 0;
12378     }
12379   }
12380   DBUG_VOID_RETURN;
12381 }
12382 
12383 /**
12384   Auxiliary method used by @c binlog_query() to raise warnings.
12385 
12386   The type of warning and the type of unsafeness is stored in
12387   THD::binlog_unsafe_warning_flags.
12388 */
issue_unsafe_warnings()12389 void THD::issue_unsafe_warnings()
12390 {
12391   char buf[MYSQL_ERRMSG_SIZE * 2];
12392   DBUG_ENTER("issue_unsafe_warnings");
12393   /*
12394     Ensure that binlog_unsafe_warning_flags is big enough to hold all
12395     bits.  This is actually a constant expression.
12396   */
12397   assert(LEX::BINLOG_STMT_UNSAFE_COUNT <=
12398          sizeof(binlog_unsafe_warning_flags) * CHAR_BIT);
12399 
12400   uint32 unsafe_type_flags= binlog_unsafe_warning_flags;
12401 
12402   /*
12403     For each unsafe_type, check if the statement is unsafe in this way
12404     and issue a warning.
12405   */
12406   for (int unsafe_type=0;
12407        unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT;
12408        unsafe_type++)
12409   {
12410     if ((unsafe_type_flags & (1 << unsafe_type)) != 0)
12411     {
12412       push_warning_printf(this, Sql_condition::SL_NOTE,
12413                           ER_BINLOG_UNSAFE_STATEMENT,
12414                           ER(ER_BINLOG_UNSAFE_STATEMENT),
12415                           ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
12416       if (log_error_verbosity > 1 && opt_log_unsafe_statements)
12417       {
12418         if (unsafe_type == LEX::BINLOG_STMT_UNSAFE_LIMIT)
12419           do_unsafe_limit_checkout( buf, unsafe_type, query().str);
12420         else //cases other than LIMIT unsafety
12421           print_unsafe_warning_to_log(unsafe_type, buf, query().str);
12422       }
12423     }
12424   }
12425   DBUG_VOID_RETURN;
12426 }
12427 
12428 /**
12429   Log the current query.
12430 
12431   The query will be logged in either row format or statement format
12432   depending on the value of @c current_stmt_binlog_format_row field and
12433   the value of the @c qtype parameter.
12434 
12435   This function must be called:
12436 
12437   - After the all calls to ha_*_row() functions have been issued.
12438 
12439   - After any writes to system tables. Rationale: if system tables
12440     were written after a call to this function, and the master crashes
12441     after the call to this function and before writing the system
12442     tables, then the master and slave get out of sync.
12443 
12444   - Before tables are unlocked and closed.
12445 
12446   @see decide_logging_format
12447 
12448   @retval 0 Success
12449 
12450   @retval nonzero If there is a failure when writing the query (e.g.,
12451   write failure), then the error code is returned.
12452 */
binlog_query(THD::enum_binlog_query_type qtype,const char * query_arg,size_t query_len,bool is_trans,bool direct,bool suppress_use,int errcode)12453 int THD::binlog_query(THD::enum_binlog_query_type qtype, const char *query_arg,
12454                       size_t query_len, bool is_trans, bool direct,
12455                       bool suppress_use, int errcode)
12456 {
12457   DBUG_ENTER("THD::binlog_query");
12458   DBUG_PRINT("enter", ("qtype: %s  query: '%s'",
12459                        show_query_type(qtype), query_arg));
12460 #ifdef WITH_WSREP
12461   assert(query_arg && (WSREP_EMULATE_BINLOG_NNULL(this)
12462                             || mysql_bin_log.is_open()));
12463 #else
12464   assert(query_arg && mysql_bin_log.is_open());
12465 #endif /* WITH_WSREP */
12466 
12467   if (get_binlog_local_stmt_filter() == BINLOG_FILTER_SET)
12468   {
12469     /*
12470       The current statement is to be ignored, and not written to
12471       the binlog. Do not call issue_unsafe_warnings().
12472     */
12473     DBUG_RETURN(0);
12474   }
12475 
12476   /*
12477     If we are not in prelocked mode, mysql_unlock_tables() will be
12478     called after this binlog_query(), so we have to flush the pending
12479     rows event with the STMT_END_F set to unlock all tables at the
12480     slave side as well.
12481 
12482     If we are in prelocked mode, the flushing will be done inside the
12483     top-most close_thread_tables().
12484   */
12485   if (this->locked_tables_mode <= LTM_LOCK_TABLES)
12486     if (int error= binlog_flush_pending_rows_event(TRUE, is_trans))
12487       DBUG_RETURN(error);
12488 
12489   /*
12490     Warnings for unsafe statements logged in statement format are
12491     printed in three places instead of in decide_logging_format().
12492     This is because the warnings should be printed only if the statement
12493     is actually logged. When executing decide_logging_format(), we cannot
12494     know for sure if the statement will be logged:
12495 
12496     1 - sp_head::execute_procedure which prints out warnings for calls to
12497     stored procedures.
12498 
12499     2 - sp_head::execute_function which prints out warnings for calls
12500     involving functions.
12501 
12502     3 - THD::binlog_query (here) which prints warning for top level
12503     statements not covered by the two cases above: i.e., if not insided a
12504     procedure and a function.
12505 
12506     Besides, we should not try to print these warnings if it is not
12507     possible to write statements to the binary log as it happens when
12508     the execution is inside a function, or generaly speaking, when
12509     the variables.option_bits & OPTION_BIN_LOG is false.
12510   */
12511   if ((variables.option_bits & OPTION_BIN_LOG) &&
12512       sp_runtime_ctx == NULL && !binlog_evt_union.do_union)
12513     issue_unsafe_warnings();
12514 
12515   switch (qtype) {
12516     /*
12517       ROW_QUERY_TYPE means that the statement may be logged either in
12518       row format or in statement format.  If
12519       current_stmt_binlog_format is row, it means that the
12520       statement has already been logged in row format and hence shall
12521       not be logged again.
12522     */
12523   case THD::ROW_QUERY_TYPE:
12524     DBUG_PRINT("debug",
12525                ("is_current_stmt_binlog_format_row: %d",
12526                 is_current_stmt_binlog_format_row()));
12527     if (is_current_stmt_binlog_format_row())
12528       DBUG_RETURN(0);
12529     /* Fall through */
12530 
12531     /*
12532       STMT_QUERY_TYPE means that the query must be logged in statement
12533       format; it cannot be logged in row format.  This is typically
12534       used by DDL statements.  It is an error to use this query type
12535       if current_stmt_binlog_format_row is row.
12536 
12537       @todo Currently there are places that call this method with
12538       STMT_QUERY_TYPE and current_stmt_binlog_format is row.  Fix those
12539       places and add assert to ensure correct behavior. /Sven
12540     */
12541   case THD::STMT_QUERY_TYPE:
12542     /*
12543       The MYSQL_BIN_LOG::write() function will set the STMT_END_F flag and
12544       flush the pending rows event if necessary.
12545     */
12546     {
12547       Query_log_event qinfo(this, query_arg, query_len, is_trans, direct,
12548                             suppress_use, errcode);
12549       /*
12550         Binlog table maps will be irrelevant after a Query_log_event
12551         (they are just removed on the slave side) so after the query
12552         log event is written to the binary log, we pretend that no
12553         table maps were written.
12554        */
12555       int error= mysql_bin_log.write_event(&qinfo);
12556       binlog_table_maps= 0;
12557       DBUG_RETURN(error);
12558     }
12559     break;
12560 
12561   case THD::QUERY_TYPE_COUNT:
12562   default:
12563     assert(0 <= qtype && qtype < QUERY_TYPE_COUNT);
12564   }
12565   DBUG_RETURN(0);
12566 }
12567 
12568 #endif /* !defined(MYSQL_CLIENT) */
12569 #ifdef WITH_WSREP
get_trans_log(THD * thd,bool transaction)12570 IO_CACHE * get_trans_log(THD * thd, bool transaction)
12571 {
12572   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
12573   if (cache_mngr)
12574   {
12575     return cache_mngr->get_binlog_cache_log(transaction);
12576   }
12577   else
12578   {
12579     WSREP_DEBUG("binlog cache not initialized, conn :%u", thd->thread_id());
12580     return NULL;
12581   }
12582 }
12583 
wsrep_trans_cache_is_empty(THD * thd)12584 bool wsrep_trans_cache_is_empty(THD *thd)
12585 {
12586   binlog_cache_mngr *const cache_mngr=
12587       (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
12588   return (!cache_mngr || cache_mngr->trx_cache.is_binlog_empty());
12589 }
12590 
thd_binlog_flush_pending_rows_event(THD * thd,bool stmt_end)12591 void thd_binlog_flush_pending_rows_event(THD *thd, bool stmt_end)
12592 {
12593   thd->binlog_flush_pending_rows_event(stmt_end);
12594 }
thd_binlog_trx_reset(THD * thd)12595 void thd_binlog_trx_reset(THD * thd)
12596 {
12597   /*
12598     todo: fix autocommit select to not call the caller
12599   */
12600   if (thd_get_ha_data(thd, binlog_hton) != NULL)
12601   {
12602     binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
12603     if (cache_mngr)
12604     {
12605       cache_mngr->trx_cache.reset();
12606       if (!cache_mngr->stmt_cache.is_binlog_empty())
12607       {
12608 	WSREP_DEBUG("pending events in stmt cache, sql: %s", WSREP_QUERY(thd));
12609 	cache_mngr->stmt_cache.reset();
12610       }
12611     }
12612   }
12613   thd->clear_binlog_table_maps();
12614 }
12615 
wsrep_thd_binlog_commit(THD * thd,bool all)12616 TC_LOG::enum_result wsrep_thd_binlog_commit(THD* thd, bool all)
12617 {
12618   /* binlog commit is called for wsrep replication to happen
12619      - applier and replayer can skip binlog commit
12620      - also if node is not joined, replication must be skipped
12621    */
12622   if (WSREP_EMULATE_BINLOG(thd) && (thd->wsrep_exec_mode != REPL_RECV) &&
12623       wsrep_ready_get())
12624     return mysql_bin_log.commit(thd, all);
12625   else
12626     return (ha_commit_low(thd, all) ?
12627             TC_LOG::RESULT_ABORTED : TC_LOG::RESULT_SUCCESS);
12628 }
12629 
wsrep_thd_binlog_rollback(THD * thd,bool all)12630 int wsrep_thd_binlog_rollback(THD* thd, bool all)
12631 {
12632   /* binlog rollback is called for wsrep replication to happen
12633      - applier and replayer can skip binlog commit
12634      - also if node is not joined, replication must be skipped
12635    */
12636   if (WSREP_EMULATE_BINLOG(thd) && (thd->wsrep_exec_mode != REPL_RECV) &&
12637       wsrep_ready_get())
12638     return mysql_bin_log.rollback(thd, all);
12639   else
12640     return ha_rollback_low(thd, all);
12641 }
12642 #endif /* WITH_WSREP */
12643 
12644 struct st_mysql_storage_engine binlog_storage_engine=
12645 { MYSQL_HANDLERTON_INTERFACE_VERSION };
12646 
12647 /** @} */
12648 
mysql_declare_plugin(binlog)12649 mysql_declare_plugin(binlog)
12650 {
12651   MYSQL_STORAGE_ENGINE_PLUGIN,
12652   &binlog_storage_engine,
12653   "binlog",
12654   "MySQL AB",
12655   "This is a pseudo storage engine to represent the binlog in a transaction",
12656   PLUGIN_LICENSE_GPL,
12657   binlog_init, /* Plugin Init */
12658   binlog_deinit, /* Plugin Deinit */
12659   0x0100 /* 1.0 */,
12660   NULL,                       /* status variables                */
12661   NULL,                       /* system variables                */
12662   NULL,                       /* config options                  */
12663   0,
12664 }
12665 mysql_declare_plugin_end;
12666