1 /* Copyright (c) 2009, 2020, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software Foundation,
21    51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22 
23 
24 #include "my_global.h"
25 #include "log.h"
26 #include "binlog.h"
27 #include "log_event.h"
28 #include "rpl_filter.h"
29 #include "rpl_rli.h"
30 #include "sql_plugin.h"
31 #include "rpl_handler.h"
32 #include "rpl_info_factory.h"
33 #include "rpl_utility.h"
34 #include "debug_sync.h"
35 #include "global_threads.h"
36 #include "sql_show.h"
37 #include "sql_parse.h"
38 #include "sql_base.h"
39 #include "rpl_mi.h"
40 #include <list>
41 #include <string>
42 #include <sstream>
43 #include <my_stacktrace.h>
44 
45 using std::max;
46 using std::min;
47 using std::string;
48 using std::list;
49 
50 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
51 
52 /**
53   @defgroup Binary_Log Binary Log
54   @{
55  */
56 
57 #define MY_OFF_T_UNDEF (~(my_off_t)0UL)
58 
59 /*
60   Constants required for the limit unsafe warnings suppression
61  */
62 //seconds after which the limit unsafe warnings suppression will be activated
63 #define LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT 50
64 //number of limit unsafe warnings after which the suppression will be activated
65 #define LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT 50
66 #define MAX_SESSION_ATTACH_TRIES 10
67 
68 static ulonglong limit_unsafe_suppression_start_time= 0;
69 static bool unsafe_warning_suppression_is_activated= false;
70 static int limit_unsafe_warning_count= 0;
71 
72 static handlerton *binlog_hton;
73 bool opt_binlog_order_commits= true;
74 
75 const char *log_bin_index= 0;
76 const char *log_bin_basename= 0;
77 
78 MYSQL_BIN_LOG mysql_bin_log(&sync_binlog_period);
79 
80 static int binlog_init(void *p);
81 static int binlog_start_trans_and_stmt(THD *thd, Log_event *start_event);
82 static int binlog_close_connection(handlerton *hton, THD *thd);
83 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
84 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
85 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
86                                                       THD *thd);
87 static int binlog_commit(handlerton *hton, THD *thd, bool all);
88 static int binlog_rollback(handlerton *hton, THD *thd, bool all);
89 static int binlog_prepare(handlerton *hton, THD *thd, bool all);
90 static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd);
91 static int binlog_clone_consistent_snapshot(handlerton *hton, THD *thd,
92                                             THD *from_thd);
93 
94 // The last published global binlog position
95 static char binlog_global_snapshot_file[FN_REFLEN];
96 static ulonglong binlog_global_snapshot_position;
97 
98 // Binlog position variables for SHOW STATUS
99 static char binlog_snapshot_file[FN_REFLEN];
100 static ulonglong binlog_snapshot_position;
101 
102 static SHOW_VAR binlog_status_vars_detail[]=
103 {
104   {"snapshot_file",
105     (char *)&binlog_snapshot_file, SHOW_CHAR},
106   {"snapshot_position",
107    (char *)&binlog_snapshot_position, SHOW_LONGLONG},
108   {NullS, NullS, SHOW_LONG}
109 };
110 
111 /**
112   Print system time.
113  */
114 
print_system_time()115 static void print_system_time()
116 {
117 #ifdef __WIN__
118   SYSTEMTIME utc_time;
119   GetSystemTime(&utc_time);
120   const long hrs=  utc_time.wHour;
121   const long mins= utc_time.wMinute;
122   const long secs= utc_time.wSecond;
123 #else
124   /* Using time() instead of my_time() to avoid looping */
125   const time_t curr_time= time(NULL);
126   /* Calculate time of day */
127   const long tmins = curr_time / 60;
128   const long thrs  = tmins / 60;
129   const long hrs   = thrs  % 24;
130   const long mins  = tmins % 60;
131   const long secs  = curr_time % 60;
132 #endif
133   char hrs_buf[3]= "00";
134   char mins_buf[3]= "00";
135   char secs_buf[3]= "00";
136   int base= 10;
137   my_safe_itoa(base, hrs, &hrs_buf[2]);
138   my_safe_itoa(base, mins, &mins_buf[2]);
139   my_safe_itoa(base, secs, &secs_buf[2]);
140 
141   my_safe_printf_stderr("---------- %s:%s:%s UTC - ",
142                         hrs_buf, mins_buf, secs_buf);
143 }
144 
145 
146 /**
147   Helper class to perform a thread excursion.
148 
149   This class is used to temporarily switch to another session (THD
150   structure). It will set up thread specific "globals" correctly
151   so that the POSIX thread looks exactly like the session attached to.
152   However, PSI_thread info is not touched as it is required to show
153   the actual physial view in PFS instrumentation i.e., it should
154   depict as the real thread doing the work instead of thread it switched
155   to.
156 
157   On destruction, the original session (which is supplied to the
158   constructor) will be re-attached automatically. For example, with
159   this code, the value of @c current_thd will be the same before and
160   after execution of the code.
161 
162   @code
163   {
164     Thread_excursion excursion(current_thd);
165     for (int i = 0 ; i < count ; ++i)
166       excursion.attach_to(other_thd[i]);
167   }
168   @endcode
169 
170   @warning The class is not designed to be inherited from.
171  */
172 
173 class Thread_excursion
174 {
175 public:
Thread_excursion(THD * thd)176   Thread_excursion(THD *thd)
177     : m_original_thd(thd)
178   {
179   }
180 
~Thread_excursion()181   ~Thread_excursion() {
182 #ifndef EMBEDDED_LIBRARY
183     if (unlikely(setup_thread_globals(m_original_thd)))
184       DBUG_ASSERT(0);                           // Out of memory?!
185 #endif
186   }
187 
188   /**
189     Try to attach the POSIX thread to a session.
190     - This function attaches the POSIX thread to a session
191     in MAX_SESSION_ATTACH_TRIES tries when encountering
192     'out of memory' error, and terminates the server after
193     failed in MAX_SESSION_ATTACH_TRIES tries.
194 
195     @param[in] thd       The thd of a session
196    */
try_to_attach_to(THD * thd)197   void try_to_attach_to(THD *thd)
198   {
199     int i= 0;
200     /*
201       Attach the POSIX thread to a session in MAX_SESSION_ATTACH_TRIES
202       tries when encountering 'out of memory' error.
203     */
204     while (i < MAX_SESSION_ATTACH_TRIES)
205     {
206       /*
207         Currently attach_to(...) returns ER_OUTOFMEMORY or 0. So
208         we continue to attach the POSIX thread when encountering
209         the ER_OUTOFMEMORY error. Please take care other error
210         returned from attach_to(...) in future.
211       */
212       if (!attach_to(thd))
213       {
214         if (i > 0)
215           sql_print_warning("Server overcomes the temporary 'out of memory' "
216                             "in '%d' tries while attaching to session thread "
217                             "during the group commit phase.\n", i + 1);
218         break;
219       }
220       i++;
221     }
222     /*
223       Terminate the server after failed to attach the POSIX thread
224       to a session in MAX_SESSION_ATTACH_TRIES tries.
225     */
226     if (MAX_SESSION_ATTACH_TRIES == i)
227     {
228       print_system_time();
229       my_safe_printf_stderr("%s", "[Fatal] Out of memory while attaching to "
230                             "session thread during the group commit phase. "
231                             "Data consistency between master and slave can "
232                             "be guaranteed after server restarts.\n");
233       _exit(EXIT_FAILURE);
234     }
235   }
236 
237 private:
238 
239   /**
240     Attach the POSIX thread to a session.
241    */
attach_to(THD * thd)242   int attach_to(THD *thd)
243   {
244 #ifndef EMBEDDED_LIBRARY
245     if (DBUG_EVALUATE_IF("simulate_session_attach_error", 1, 0)
246         || unlikely(setup_thread_globals(thd)))
247     {
248       /*
249         Indirectly uses pthread_setspecific, which can only return
250         ENOMEM or EINVAL. Since store_globals are using correct keys,
251         the only alternative is out of memory.
252       */
253       return ER_OUTOFMEMORY;
254     }
255 #endif /* EMBEDDED_LIBRARY */
256     return 0;
257   }
258 
setup_thread_globals(THD * thd) const259   int setup_thread_globals(THD *thd) const {
260     int error= 0;
261     THD *original_thd= my_pthread_getspecific(THD*, THR_THD);
262     MEM_ROOT* original_mem_root= my_pthread_getspecific(MEM_ROOT*, THR_MALLOC);
263     if ((error= my_pthread_setspecific_ptr(THR_THD, thd)))
264       goto exit0;
265     if ((error= my_pthread_setspecific_ptr(THR_MALLOC, &thd->mem_root)))
266       goto exit1;
267     if ((error= set_mysys_var(thd->mysys_var)))
268       goto exit2;
269     goto exit0;
270 exit2:
271     error= my_pthread_setspecific_ptr(THR_MALLOC,  original_mem_root);
272 exit1:
273     error= my_pthread_setspecific_ptr(THR_THD,  original_thd);
274 exit0:
275     return error;
276   }
277 
278   THD *m_original_thd;
279 };
280 
281 
282 /**
283   Caches for non-transactional and transactional data before writing
284   it to the binary log.
285 
286   @todo All the access functions for the flags suggest that the
287   encapsuling is not done correctly, so try to move any logic that
288   requires access to the flags into the cache.
289 */
290 class binlog_cache_data
291 {
292 public:
293 
binlog_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg)294   binlog_cache_data(bool trx_cache_arg,
295                     my_off_t max_binlog_cache_size_arg,
296                     ulong *ptr_binlog_cache_use_arg,
297                     ulong *ptr_binlog_cache_disk_use_arg)
298   : m_pending(0), saved_max_binlog_cache_size(max_binlog_cache_size_arg),
299     ptr_binlog_cache_use(ptr_binlog_cache_use_arg),
300     ptr_binlog_cache_disk_use(ptr_binlog_cache_disk_use_arg)
301   {
302     reset();
303     flags.transactional= trx_cache_arg;
304     cache_log.end_of_file= saved_max_binlog_cache_size;
305   }
306 
307   int finalize(THD *thd, Log_event *end_event);
308   int flush(THD *thd, my_off_t *bytes, bool *wrote_xid);
309   int write_event(THD *thd, Log_event *event);
310 
~binlog_cache_data()311   virtual ~binlog_cache_data()
312   {
313     DBUG_ASSERT(is_binlog_empty());
314     close_cached_file(&cache_log);
315   }
316 
is_binlog_empty() const317   bool is_binlog_empty() const
318   {
319     my_off_t pos= my_b_tell(&cache_log);
320     DBUG_PRINT("debug", ("%s_cache - pending: 0x%llx, bytes: %llu",
321                          (flags.transactional ? "trx" : "stmt"),
322                          (ulonglong) pending(), (ulonglong) pos));
323     return pending() == NULL && pos == 0;
324   }
325 
is_group_cache_empty() const326   bool is_group_cache_empty() const
327   {
328     return group_cache.is_empty();
329   }
330 
331 #ifndef DBUG_OFF
dbug_is_finalized() const332   bool dbug_is_finalized() const {
333     return flags.finalized;
334   }
335 #endif
336 
pending() const337   Rows_log_event *pending() const
338   {
339     return m_pending;
340   }
341 
set_pending(Rows_log_event * const pending)342   void set_pending(Rows_log_event *const pending)
343   {
344     m_pending= pending;
345   }
346 
set_incident(void)347   void set_incident(void)
348   {
349     flags.incident= true;
350   }
351 
has_incident(void) const352   bool has_incident(void) const
353   {
354     return flags.incident;
355   }
356 
357   /**
358     Sets the binlog_cache_data::Flags::flush_error flag if there
359     is an error while flushing cache to the file.
360 
361     @param thd  The client thread that is executing the transaction.
362   */
set_flush_error(THD * thd)363   void set_flush_error(THD *thd)
364   {
365     flags.flush_error= true;
366     if(is_trx_cache())
367     {
368       /*
369          If the cache is a transactional cache and if the write
370          has failed due to ENOSPC, then my_write() would have
371          set EE_WRITE error, so clear the error and create an
372          equivalent server error.
373       */
374       if (thd->is_error())
375         thd->clear_error();
376       char errbuf[MYSYS_STRERROR_SIZE];
377       my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), my_filename(cache_log.file),
378           errno, my_strerror(errbuf, sizeof(errbuf), errno));
379     }
380   }
381 
get_flush_error(void) const382   bool get_flush_error(void) const
383   {
384     return flags.flush_error;
385   }
386 
has_xid() const387   bool has_xid() const {
388     // There should only be an XID event if we are transactional
389     DBUG_ASSERT((flags.transactional && flags.with_xid) || !flags.with_xid);
390     return flags.with_xid;
391   }
392 
is_trx_cache() const393   bool is_trx_cache() const
394   {
395     return flags.transactional;
396   }
397 
get_byte_position() const398   my_off_t get_byte_position() const
399   {
400     return my_b_tell(&cache_log);
401   }
402 
reset()403   virtual void reset()
404   {
405     compute_statistics();
406     truncate(0);
407 
408     /*
409       If IOCACHE has a file associated, change its size to 0.
410       It is safer to do it here, since we are certain that one
411       asked the cache to go to position 0 with truncate.
412     */
413     if(cache_log.file != -1)
414     {
415       int error= 0;
416       if((error= my_chsize(cache_log.file, 0, 0, MYF(MY_WME))))
417         sql_print_warning("Unable to resize binlog IOCACHE auxilary file");
418 
419       DBUG_EXECUTE_IF("show_io_cache_size",
420                       {
421                         ulong file_size= my_seek(cache_log.file,
422                                                0L,MY_SEEK_END,MYF(MY_WME+MY_FAE));
423                         sql_print_error("New size:%ld", file_size);
424                       });
425     }
426 
427     flags.incident= false;
428     flags.with_xid= false;
429     flags.immediate= false;
430     flags.finalized= false;
431     flags.flush_error= false;
432     /*
433       The truncate function calls reinit_io_cache that calls my_b_flush_io_cache
434       which may increase disk_writes. This breaks the disk_writes use by the
435       binary log which aims to compute the ratio between in-memory cache usage
436       and disk cache usage. To avoid this undesirable behavior, we reset the
437       variable after truncating the cache.
438     */
439     cache_log.disk_writes= 0;
440     group_cache.clear();
441     DBUG_ASSERT(is_binlog_empty());
442   }
443 
444   /*
445     Sets the write position to point at the position given. If the
446     cache has swapped to a file, it reinitializes it, so that the
447     proper data is added to the IO_CACHE buffer. Otherwise, it just
448     does a my_b_seek.
449 
450     my_b_seek will not work if the cache has swapped, that's why
451     we do this workaround.
452 
453     @param[IN]  pos the new write position.
454     @param[IN]  use_reinit if the position should be reset resorting
455                 to reset_io_cache (which may issue a flush_io_cache
456                 inside)
457 
458     @return The previous write position.
459    */
reset_write_pos(my_off_t pos,bool use_reinit)460   my_off_t reset_write_pos(my_off_t pos, bool use_reinit)
461   {
462     DBUG_ENTER("reset_write_pos");
463     DBUG_ASSERT(cache_log.type == WRITE_CACHE);
464 
465     my_off_t oldpos= get_byte_position();
466 
467     if (use_reinit)
468       reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, 0);
469     else
470       my_b_seek(&cache_log, pos);
471 
472     DBUG_RETURN(oldpos);
473   }
474 
475   /*
476     Cache to store data before copying it to the binary log.
477   */
478   IO_CACHE cache_log;
479 
480   /**
481     The group cache for this cache.
482   */
483   Group_cache group_cache;
484 
485 protected:
486   /*
487     It truncates the cache to a certain position. This includes deleting the
488     pending event.
489    */
truncate(my_off_t pos)490   void truncate(my_off_t pos)
491   {
492     DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos));
493     remove_pending_event();
494     /*
495       Whenever there is an error while flushing cache to file,
496       the local cache will not be in a normal state and the same
497       cache cannot be used without facing an assert.
498       So, clear the cache if there is a flush error.
499     */
500     reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, get_flush_error());
501     cache_log.end_of_file= saved_max_binlog_cache_size;
502   }
503 
504   /**
505      Flush pending event to the cache buffer.
506    */
flush_pending_event(THD * thd)507   int flush_pending_event(THD *thd) {
508     if (m_pending)
509     {
510       m_pending->set_flags(Rows_log_event::STMT_END_F);
511       if (int error= write_event(thd, m_pending))
512         return error;
513       thd->clear_binlog_table_maps();
514     }
515     return 0;
516   }
517 
518   /**
519     Remove the pending event.
520    */
remove_pending_event()521   int remove_pending_event() {
522     delete m_pending;
523     m_pending= NULL;
524     return 0;
525   }
526   struct Flags {
527     /*
528       Defines if this is either a trx-cache or stmt-cache, respectively, a
529       transactional or non-transactional cache.
530     */
531     bool transactional:1;
532 
533     /*
534       This indicates that some events did not get into the cache and most likely
535       it is corrupted.
536     */
537     bool incident:1;
538 
539     /*
540       This indicates that the cache should be written without BEGIN/END.
541     */
542     bool immediate:1;
543 
544     /*
545       This flag indicates that the buffer was finalized and has to be
546       flushed to disk.
547      */
548     bool finalized:1;
549 
550     /*
551       This indicates that the cache contain an XID event.
552      */
553     bool with_xid:1;
554 
555     /*
556       This flag is set to 'true' when there is an error while flushing the
557       I/O cache to file.
558      */
559     bool flush_error:1;
560   } flags;
561 
562 private:
563   /*
564     Pending binrows event. This event is the event where the rows are currently
565     written.
566    */
567   Rows_log_event *m_pending;
568 
569   /**
570     This function computes binlog cache and disk usage.
571   */
compute_statistics()572   void compute_statistics()
573   {
574     if (!is_binlog_empty())
575     {
576       statistic_increment(*ptr_binlog_cache_use, &LOCK_status);
577       if (cache_log.disk_writes != 0)
578         statistic_increment(*ptr_binlog_cache_disk_use, &LOCK_status);
579     }
580   }
581 
582   /*
583     Stores the values of maximum size of the cache allowed when this cache
584     is configured. This corresponds to either
585       . max_binlog_cache_size or max_binlog_stmt_cache_size.
586   */
587   my_off_t saved_max_binlog_cache_size;
588 
589   /*
590     Stores a pointer to the status variable that keeps track of the in-memory
591     cache usage. This corresponds to either
592       . binlog_cache_use or binlog_stmt_cache_use.
593   */
594   ulong *ptr_binlog_cache_use;
595 
596   /*
597     Stores a pointer to the status variable that keeps track of the disk
598     cache usage. This corresponds to either
599       . binlog_cache_disk_use or binlog_stmt_cache_disk_use.
600   */
601   ulong *ptr_binlog_cache_disk_use;
602 
603   binlog_cache_data& operator=(const binlog_cache_data& info);
604   binlog_cache_data(const binlog_cache_data& info);
605 };
606 
607 
608 class binlog_stmt_cache_data
609   : public binlog_cache_data
610 {
611 public:
binlog_stmt_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg)612   binlog_stmt_cache_data(bool trx_cache_arg,
613                         my_off_t max_binlog_cache_size_arg,
614                         ulong *ptr_binlog_cache_use_arg,
615                         ulong *ptr_binlog_cache_disk_use_arg)
616     : binlog_cache_data(trx_cache_arg,
617                         max_binlog_cache_size_arg,
618                         ptr_binlog_cache_use_arg,
619                         ptr_binlog_cache_disk_use_arg)
620   {
621   }
622 
623   using binlog_cache_data::finalize;
624 
625   int finalize(THD *thd);
626 };
627 
628 
629 int
finalize(THD * thd)630 binlog_stmt_cache_data::finalize(THD *thd)
631 {
632   if (flags.immediate)
633   {
634     if (int error= finalize(thd, NULL))
635       return error;
636   }
637   else
638   {
639     Query_log_event
640       end_evt(thd, STRING_WITH_LEN("COMMIT"), false, false, true, 0, true);
641     if (int error= finalize(thd, &end_evt))
642       return error;
643   }
644   return 0;
645 }
646 
647 
648 class binlog_trx_cache_data : public binlog_cache_data
649 {
650 public:
binlog_trx_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg)651   binlog_trx_cache_data(bool trx_cache_arg,
652                         my_off_t max_binlog_cache_size_arg,
653                         ulong *ptr_binlog_cache_use_arg,
654                         ulong *ptr_binlog_cache_disk_use_arg)
655   : binlog_cache_data(trx_cache_arg,
656                       max_binlog_cache_size_arg,
657                       ptr_binlog_cache_use_arg,
658                       ptr_binlog_cache_disk_use_arg),
659     m_cannot_rollback(FALSE), before_stmt_pos(MY_OFF_T_UNDEF)
660   {   }
661 
reset()662   void reset()
663   {
664     DBUG_ENTER("reset");
665     DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
666     m_cannot_rollback= FALSE;
667     before_stmt_pos= MY_OFF_T_UNDEF;
668     binlog_cache_data::reset();
669     DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
670     DBUG_VOID_RETURN;
671   }
672 
cannot_rollback() const673   bool cannot_rollback() const
674   {
675     return m_cannot_rollback;
676   }
677 
set_cannot_rollback()678   void set_cannot_rollback()
679   {
680     m_cannot_rollback= TRUE;
681   }
682 
get_prev_position() const683   my_off_t get_prev_position() const
684   {
685      return before_stmt_pos;
686   }
687 
set_prev_position(my_off_t pos)688   void set_prev_position(my_off_t pos)
689   {
690     DBUG_ENTER("set_prev_position");
691     DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
692     before_stmt_pos= pos;
693     DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
694     DBUG_VOID_RETURN;
695   }
696 
restore_prev_position()697   void restore_prev_position()
698   {
699     DBUG_ENTER("restore_prev_position");
700     DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
701     binlog_cache_data::truncate(before_stmt_pos);
702     before_stmt_pos= MY_OFF_T_UNDEF;
703     DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
704     DBUG_VOID_RETURN;
705   }
706 
restore_savepoint(my_off_t pos)707   void restore_savepoint(my_off_t pos)
708   {
709     DBUG_ENTER("restore_savepoint");
710     DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
711     binlog_cache_data::truncate(pos);
712     if (pos <= before_stmt_pos)
713       before_stmt_pos= MY_OFF_T_UNDEF;
714     DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
715     DBUG_VOID_RETURN;
716   }
717 
718   using binlog_cache_data::truncate;
719 
720   int truncate(THD *thd, bool all);
721 
722 private:
723   /*
724     It will be set TRUE if any statement which cannot be rolled back safely
725     is put in trx_cache.
726   */
727   bool m_cannot_rollback;
728 
729   /*
730     Binlog position before the start of the current statement.
731   */
732   my_off_t before_stmt_pos;
733 
734   binlog_trx_cache_data& operator=(const binlog_trx_cache_data& info);
735   binlog_trx_cache_data(const binlog_trx_cache_data& info);
736 };
737 
738 class binlog_cache_mngr {
739 public:
binlog_cache_mngr(my_off_t max_binlog_stmt_cache_size_arg,ulong * ptr_binlog_stmt_cache_use_arg,ulong * ptr_binlog_stmt_cache_disk_use_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg)740   binlog_cache_mngr(my_off_t max_binlog_stmt_cache_size_arg,
741                     ulong *ptr_binlog_stmt_cache_use_arg,
742                     ulong *ptr_binlog_stmt_cache_disk_use_arg,
743                     my_off_t max_binlog_cache_size_arg,
744                     ulong *ptr_binlog_cache_use_arg,
745                     ulong *ptr_binlog_cache_disk_use_arg)
746   : stmt_cache(FALSE, max_binlog_stmt_cache_size_arg,
747                ptr_binlog_stmt_cache_use_arg,
748                ptr_binlog_stmt_cache_disk_use_arg),
749     trx_cache(TRUE, max_binlog_cache_size_arg,
750               ptr_binlog_cache_use_arg,
751               ptr_binlog_cache_disk_use_arg)
752   {  }
753 
get_binlog_cache_data(bool is_transactional)754   binlog_cache_data* get_binlog_cache_data(bool is_transactional)
755   {
756     if (is_transactional)
757       return &trx_cache;
758     else
759       return &stmt_cache;
760   }
761 
get_binlog_cache_log(bool is_transactional)762   IO_CACHE* get_binlog_cache_log(bool is_transactional)
763   {
764     return (is_transactional ? &trx_cache.cache_log : &stmt_cache.cache_log);
765   }
766 
767   /**
768     Convenience method to check if both caches are empty.
769    */
is_binlog_empty() const770   bool is_binlog_empty() const {
771     return stmt_cache.is_binlog_empty() && trx_cache.is_binlog_empty();
772   }
773 
774   /*
775     clear stmt_cache and trx_cache if they are not empty
776   */
reset()777   void reset()
778   {
779     if (!stmt_cache.is_binlog_empty())
780       stmt_cache.reset();
781     if (!trx_cache.is_binlog_empty())
782       trx_cache.reset();
783   }
784 
785 #ifndef DBUG_OFF
dbug_any_finalized() const786   bool dbug_any_finalized() const {
787     return stmt_cache.dbug_is_finalized() || trx_cache.dbug_is_finalized();
788   }
789 #endif
790 
791   /*
792     Convenience method to flush both caches to the binary log.
793 
794     @param bytes_written Pointer to variable that will be set to the
795                          number of bytes written for the flush.
796     @param wrote_xid     Pointer to variable that will be set to @c
797                          true if any XID event was written to the
798                          binary log. Otherwise, the variable will not
799                          be touched.
800     @return Error code on error, zero if no error.
801    */
flush(THD * thd,my_off_t * bytes_written,bool * wrote_xid)802   int flush(THD *thd, my_off_t *bytes_written, bool *wrote_xid)
803   {
804     my_off_t stmt_bytes= 0;
805     my_off_t trx_bytes= 0;
806     DBUG_ASSERT(stmt_cache.has_xid() == 0);
807     if (int error= stmt_cache.flush(thd, &stmt_bytes, wrote_xid))
808       return error;
809     if (int error= trx_cache.flush(thd, &trx_bytes, wrote_xid))
810       return error;
811     *bytes_written= stmt_bytes + trx_bytes;
812     return 0;
813   }
814 
815   binlog_stmt_cache_data stmt_cache;
816   binlog_trx_cache_data trx_cache;
817 
818   LOG_INFO binlog_info;
819 
820 private:
821 
822   binlog_cache_mngr& operator=(const binlog_cache_mngr& info);
823   binlog_cache_mngr(const binlog_cache_mngr& info);
824 };
825 
826 
thd_get_cache_mngr(const THD * thd)827 static binlog_cache_mngr *thd_get_cache_mngr(const THD *thd)
828 {
829   /*
830     If opt_bin_log is not set, binlog_hton->slot == -1 and hence
831     thd_get_ha_data(thd, hton) segfaults.
832   */
833   DBUG_ASSERT(opt_bin_log);
834   return (binlog_cache_mngr *)thd_get_ha_data(thd, binlog_hton);
835 }
836 
837 
838 /**
839   Checks if the BINLOG_CACHE_SIZE's value is greater than MAX_BINLOG_CACHE_SIZE.
840   If this happens, the BINLOG_CACHE_SIZE is set to MAX_BINLOG_CACHE_SIZE.
841 */
check_binlog_cache_size(THD * thd)842 void check_binlog_cache_size(THD *thd)
843 {
844   if (binlog_cache_size > max_binlog_cache_size)
845   {
846     if (thd)
847     {
848       push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
849                           ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX,
850                           ER(ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX),
851                           (ulong) binlog_cache_size,
852                           (ulong) max_binlog_cache_size);
853     }
854     else
855     {
856       sql_print_warning(ER_DEFAULT(ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX),
857                         (ulong) binlog_cache_size,
858                         (ulong) max_binlog_cache_size);
859     }
860     binlog_cache_size= max_binlog_cache_size;
861   }
862 }
863 
864 /**
865   Checks if the BINLOG_STMT_CACHE_SIZE's value is greater than MAX_BINLOG_STMT_CACHE_SIZE.
866   If this happens, the BINLOG_STMT_CACHE_SIZE is set to MAX_BINLOG_STMT_CACHE_SIZE.
867 */
check_binlog_stmt_cache_size(THD * thd)868 void check_binlog_stmt_cache_size(THD *thd)
869 {
870   if (binlog_stmt_cache_size > max_binlog_stmt_cache_size)
871   {
872     if (thd)
873     {
874       push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
875                           ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX,
876                           ER(ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX),
877                           (ulong) binlog_stmt_cache_size,
878                           (ulong) max_binlog_stmt_cache_size);
879     }
880     else
881     {
882       sql_print_warning(ER_DEFAULT(ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX),
883                         (ulong) binlog_stmt_cache_size,
884                         (ulong) max_binlog_stmt_cache_size);
885     }
886     binlog_stmt_cache_size= max_binlog_stmt_cache_size;
887   }
888 }
889 
890 /**
891  Check whether binlog_hton has valid slot and enabled
892 */
binlog_enabled()893 bool binlog_enabled()
894 {
895 	return(binlog_hton && binlog_hton->slot != HA_SLOT_UNDEF);
896 }
897 
898  /*
899   Save position of binary log transaction cache.
900 
901   SYNPOSIS
902     binlog_trans_log_savepos()
903 
904     thd      The thread to take the binlog data from
905     pos      Pointer to variable where the position will be stored
906 
907   DESCRIPTION
908 
909     Save the current position in the binary log transaction cache into
910     the variable pointed to by 'pos'
911  */
912 
913 static void
binlog_trans_log_savepos(THD * thd,my_off_t * pos)914 binlog_trans_log_savepos(THD *thd, my_off_t *pos)
915 {
916   DBUG_ENTER("binlog_trans_log_savepos");
917   DBUG_ASSERT(pos != NULL);
918   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
919   DBUG_ASSERT(mysql_bin_log.is_open());
920   *pos= cache_mngr->trx_cache.get_byte_position();
921   DBUG_PRINT("return", ("position: %lu", (ulong) *pos));
922   DBUG_VOID_RETURN;
923 }
924 
925 
926 /*
927   this function is mostly a placeholder.
928   conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open)
929   should be moved here.
930 */
931 
binlog_init(void * p)932 static int binlog_init(void *p)
933 {
934   binlog_hton= (handlerton *)p;
935   binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO;
936   binlog_hton->db_type=DB_TYPE_BINLOG;
937   binlog_hton->savepoint_offset= sizeof(my_off_t);
938   binlog_hton->close_connection= binlog_close_connection;
939   binlog_hton->savepoint_set= binlog_savepoint_set;
940   binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
941   binlog_hton->savepoint_rollback_can_release_mdl=
942                                      binlog_savepoint_rollback_can_release_mdl;
943   binlog_hton->commit= binlog_commit;
944   binlog_hton->rollback= binlog_rollback;
945   binlog_hton->prepare= binlog_prepare;
946   binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot;
947   binlog_hton->clone_consistent_snapshot= binlog_clone_consistent_snapshot;
948   binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
949   return 0;
950 }
951 
binlog_close_connection(handlerton * hton,THD * thd)952 static int binlog_close_connection(handlerton *hton, THD *thd)
953 {
954   DBUG_ENTER("binlog_close_connection");
955   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
956   DBUG_ASSERT(cache_mngr->is_binlog_empty());
957   DBUG_ASSERT(cache_mngr->trx_cache.is_group_cache_empty() &&
958               cache_mngr->stmt_cache.is_group_cache_empty());
959   DBUG_PRINT("debug", ("Set ha_data slot %d to 0x%llx", binlog_hton->slot, (ulonglong) NULL));
960   thd_set_ha_data(thd, binlog_hton, NULL);
961   cache_mngr->~binlog_cache_mngr();
962   my_free(cache_mngr);
963   DBUG_RETURN(0);
964 }
965 
should_write_gtids(const THD * thd)966 static bool should_write_gtids(const THD *thd) {
967   /*
968     Return false in the situation where slave sql_thread is
969     trying to generate gtid's for binlog events received from master.
970 
971     Note that the check thd->variables.gtid_next.type == AUTOMATIC_GROUP
972     is used to ensure that a new gtid is generated for the transaction group,
973     instead of using SESSION.gtid_next value.
974   */
975   if (thd->rli_slave &&
976       thd->variables.gtid_next.type == AUTOMATIC_GROUP)
977     return false;
978   /*
979     Return true (allow gtids to be generated) in the scenario where
980     opt_gtid_deployment_step is false (Normal run after deployment procedure
981     is done).
982 
983     Return true in the scenario where slave sql_thread uses gtid received from
984     master. This is necessary in the situation where deployment is done on
985     master, but slave still in deployment mode (opt_gtid_deployment_step is true).
986   */
987   return (!opt_gtid_deployment_step || (thd->rli_slave &&
988           thd->variables.gtid_next.type != AUTOMATIC_GROUP));
989 
990 }
991 
write_event(THD * thd,Log_event * ev)992 int binlog_cache_data::write_event(THD *thd, Log_event *ev)
993 {
994   DBUG_ENTER("binlog_cache_data::write_event");
995 
996   if (gtid_mode > 0 && should_write_gtids(thd))
997   {
998     Group_cache::enum_add_group_status status=
999       group_cache.add_logged_group(thd, get_byte_position());
1000     if (status == Group_cache::ERROR)
1001       DBUG_RETURN(1);
1002     else if (status == Group_cache::APPEND_NEW_GROUP)
1003     {
1004       Gtid_log_event gtid_ev(thd, is_trx_cache());
1005       if (gtid_ev.write(&cache_log) != 0)
1006         DBUG_RETURN(1);
1007     }
1008   }
1009 
1010   if (ev != NULL)
1011   {
1012     DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
1013                   {DBUG_SET("+d,simulate_file_write_error");});
1014 
1015     DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
1016                   {
1017                   static int count= -1;
1018                   count++;
1019                   if(count % 4 == 3 && ev->get_type_code() == WRITE_ROWS_EVENT)
1020                     DBUG_SET("+d,simulate_temp_file_write_error");
1021                   });
1022     if (ev->write(&cache_log) != 0)
1023     {
1024       DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
1025                       {
1026                         DBUG_SET("-d,simulate_file_write_error");
1027                         DBUG_SET("-d,simulate_disk_full_at_flush_pending");
1028                         /*
1029                            after +d,simulate_file_write_error the local cache
1030                            is in unsane state. Since -d,simulate_file_write_error
1031                            revokes the first simulation do_write_cache()
1032                            can't be run without facing an assert.
1033                            So it's blocked with the following 2nd simulation:
1034                         */
1035                         DBUG_SET("+d,simulate_do_write_cache_failure");
1036                       });
1037 
1038       DBUG_EXECUTE_IF("simulate_temp_file_write_error",
1039                       {
1040                         DBUG_SET("-d,simulate_temp_file_write_error");
1041                       });
1042       /*
1043         If the flush has failed due to ENOSPC error, set the
1044         flush_error flag.
1045       */
1046       if (thd->is_error() && my_errno == ENOSPC)
1047       {
1048         set_flush_error(thd);
1049       }
1050       DBUG_RETURN(1);
1051     }
1052     if (ev->get_type_code() == XID_EVENT)
1053       flags.with_xid= true;
1054     if (ev->is_using_immediate_logging())
1055       flags.immediate= true;
1056   }
1057   DBUG_RETURN(0);
1058 }
1059 
1060 
1061 /**
1062   Checks if the given GTID exists in the Group_cache. If not, add it
1063   as an empty group.
1064 
1065   @todo Move this function into the cache class?
1066 
1067   @param thd THD object that owns the Group_cache
1068   @param cache_data binlog_cache_data object for the cache
1069   @param gtid GTID to check
1070 */
write_one_empty_group_to_cache(THD * thd,binlog_cache_data * cache_data,Gtid gtid)1071 static int write_one_empty_group_to_cache(THD *thd,
1072                                           binlog_cache_data *cache_data,
1073                                           Gtid gtid)
1074 {
1075   DBUG_ENTER("write_one_empty_group_to_cache");
1076   Group_cache *group_cache= &cache_data->group_cache;
1077   if (group_cache->contains_gtid(gtid))
1078     DBUG_RETURN(0);
1079   /*
1080     Apparently this code is not being called. We need to
1081     investigate if this is a bug or this code is not
1082     necessary. /Alfranio
1083 
1084     Empty groups are currently being handled in the function
1085     gtid_empty_group_log_and_cleanup().
1086   */
1087   DBUG_ASSERT(0); /*NOTREACHED*/
1088 #ifdef NON_ERROR_GTID
1089   IO_CACHE *cache= &cache_data->cache_log;
1090   Group_cache::enum_add_group_status status= group_cache->add_empty_group(gtid);
1091   if (status == Group_cache::ERROR)
1092     DBUG_RETURN(1);
1093   DBUG_ASSERT(status == Group_cache::APPEND_NEW_GROUP);
1094   Gtid_specification spec= { GTID_GROUP, gtid };
1095   Gtid_log_event gtid_ev(thd, cache_data->is_trx_cache(), &spec);
1096   if (gtid_ev.write(cache) != 0)
1097     DBUG_RETURN(1);
1098 #endif
1099   DBUG_RETURN(0);
1100 }
1101 
1102 /**
1103   Writes all GTIDs that the thread owns to the stmt/trx cache, if the
1104   GTID is not already in the cache.
1105 
1106   @todo Move this function into the cache class?
1107 
1108   @param thd THD object for the thread that owns the cache.
1109   @param cache_data The cache.
1110 */
write_empty_groups_to_cache(THD * thd,binlog_cache_data * cache_data)1111 static int write_empty_groups_to_cache(THD *thd, binlog_cache_data *cache_data)
1112 {
1113   DBUG_ENTER("write_empty_groups_to_cache");
1114   if (thd->owned_gtid.sidno == -1)
1115   {
1116 #ifdef HAVE_GTID_NEXT_LIST
1117     Gtid_set::Gtid_iterator git(&thd->owned_gtid_set);
1118     Gtid gtid= git.get();
1119     while (gtid.sidno != 0)
1120     {
1121       if (write_one_empty_group_to_cache(thd, cache_data, gtid) != 0)
1122         DBUG_RETURN(1);
1123       git.next();
1124       gtid= git.get();
1125     }
1126 #else
1127     DBUG_ASSERT(0);
1128 #endif
1129   }
1130   else if (thd->owned_gtid.sidno > 0)
1131     if (write_one_empty_group_to_cache(thd, cache_data, thd->owned_gtid) != 0)
1132       DBUG_RETURN(1);
1133   DBUG_RETURN(0);
1134 }
1135 
1136 
1137 /**
1138 
1139   @todo Move this function into the cache class?
1140  */
1141 static int
gtid_before_write_cache(THD * thd,binlog_cache_data * cache_data)1142 gtid_before_write_cache(THD* thd, binlog_cache_data* cache_data)
1143 {
1144   DBUG_ENTER("gtid_before_write_cache");
1145   int error= 0;
1146 
1147   DBUG_ASSERT(thd->variables.gtid_next.type != UNDEFINED_GROUP);
1148 
1149   if (gtid_mode == 0 || !should_write_gtids(thd))
1150   {
1151     DBUG_RETURN(0);
1152   }
1153 
1154   Group_cache* group_cache= &cache_data->group_cache;
1155 
1156   global_sid_lock->rdlock();
1157 
1158   if (thd->variables.gtid_next.type == AUTOMATIC_GROUP)
1159   {
1160     if (group_cache->generate_automatic_gno(thd) !=
1161         RETURN_STATUS_OK)
1162     {
1163       global_sid_lock->unlock();
1164       DBUG_RETURN(1);
1165     }
1166   }
1167   if (write_empty_groups_to_cache(thd, cache_data) != 0)
1168   {
1169     global_sid_lock->unlock();
1170     DBUG_RETURN(1);
1171   }
1172 
1173   global_sid_lock->unlock();
1174 
1175   /*
1176     If an automatic group number was generated, change the first event
1177     into a "real" one.
1178   */
1179   if (thd->variables.gtid_next.type == AUTOMATIC_GROUP)
1180   {
1181     DBUG_ASSERT(group_cache->get_n_groups() == 1);
1182     Cached_group *cached_group= group_cache->get_unsafe_pointer(0);
1183     DBUG_ASSERT(cached_group->spec.type != AUTOMATIC_GROUP);
1184     Gtid_log_event gtid_ev(thd, cache_data->is_trx_cache(),
1185                            &cached_group->spec);
1186     bool using_file= cache_data->cache_log.pos_in_file > 0;
1187 
1188     DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
1189                   {
1190                   DBUG_SET("+d,simulate_temp_file_write_error");
1191                   });
1192 
1193     my_off_t saved_position= cache_data->reset_write_pos(0, using_file);
1194 
1195     if (!cache_data->cache_log.error)
1196     {
1197       if (gtid_ev.write(&cache_data->cache_log))
1198         goto err;
1199       cache_data->reset_write_pos(saved_position, using_file);
1200     }
1201 
1202     if (cache_data->cache_log.error)
1203       goto err;
1204   }
1205 
1206   DBUG_RETURN(error);
1207 
1208 err:
1209   DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
1210                 {
1211                 DBUG_SET("-d,simulate_temp_file_write_error");
1212                 });
1213   /*
1214     If the reinit_io_cache has failed, set the flush_error flag.
1215   */
1216   if (cache_data->cache_log.error)
1217   {
1218     cache_data->set_flush_error(thd);
1219   }
1220   DBUG_RETURN(1);
1221 
1222 }
1223 
1224 /**
1225    The function logs an empty group with GTID and performs cleanup.
1226    Its logic wrt GTID is equivalent to one of binlog_commit().
1227    It's called at the end of statement execution in case binlog_commit()
1228    was skipped.
1229    Such cases are due ineffective binlogging incl an empty group
1230    re-execution.
1231 
1232    @param thd   The thread handle
1233 
1234    @return
1235     nonzero if an error pops up.
1236 */
gtid_empty_group_log_and_cleanup(THD * thd)1237 int gtid_empty_group_log_and_cleanup(THD *thd)
1238 {
1239   int ret= 1;
1240   binlog_cache_data* cache_data= NULL;
1241 
1242   DBUG_ENTER("gtid_empty_group_log_and_cleanup");
1243 
1244   Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE,
1245                           FALSE, TRUE, 0, TRUE);
1246   DBUG_ASSERT(!qinfo.is_using_immediate_logging());
1247 
1248   /*
1249     thd->cache_mngr is uninitialized on the first empty transaction.
1250   */
1251   if (thd->binlog_setup_trx_data())
1252     DBUG_RETURN(1);
1253   cache_data= &thd_get_cache_mngr(thd)->trx_cache;
1254   DBUG_PRINT("debug", ("Writing to trx_cache"));
1255   if (cache_data->write_event(thd, &qinfo) ||
1256       gtid_before_write_cache(thd, cache_data))
1257     goto err;
1258 
1259   ret= mysql_bin_log.commit(thd, true);
1260 
1261 err:
1262   DBUG_RETURN(ret);
1263 }
1264 
1265 /**
1266   This function finalizes the cache preparing for commit or rollback.
1267 
1268   The function just writes all the necessary events to the cache but
1269   does not flush the data to the binary log file. That is the role of
1270   the binlog_cache_data::flush function.
1271 
1272   @see binlog_cache_data::flush
1273 
1274   @param thd                The thread whose transaction should be flushed
1275   @param cache_data         Pointer to the cache
1276   @param end_ev             The end event either commit/rollback
1277 
1278   @return
1279     nonzero if an error pops up when flushing the cache.
1280 */
1281 int
finalize(THD * thd,Log_event * end_event)1282 binlog_cache_data::finalize(THD *thd, Log_event *end_event)
1283 {
1284   DBUG_ENTER("binlog_cache_data::finalize");
1285   if (!is_binlog_empty())
1286   {
1287     DBUG_ASSERT(!flags.finalized);
1288     if (int error= flush_pending_event(thd))
1289       DBUG_RETURN(error);
1290     if (int error= write_event(thd, end_event))
1291       DBUG_RETURN(error);
1292     flags.finalized= true;
1293     DBUG_PRINT("debug", ("flags.finalized: %s", YESNO(flags.finalized)));
1294   }
1295   DBUG_RETURN(0);
1296 }
1297 
1298 /**
1299   Flush caches to the binary log.
1300 
1301   If the cache is finalized, the cache will be flushed to the binary
1302   log file. If the cache is not finalized, nothing will be done.
1303 
1304   If flushing fails for any reason, an error will be reported and the
1305   cache will be reset. Flushing can fail in two circumstances:
1306 
1307   - It was not possible to write the cache to the file. In this case,
1308     it does not make sense to keep the cache.
1309 
1310   - The cache was successfully written to disk but post-flush actions
1311     (such as binary log rotation) failed. In this case, the cache is
1312     already written to disk and there is no reason to keep it.
1313 
1314   @see binlog_cache_data::finalize
1315  */
1316 int
flush(THD * thd,my_off_t * bytes_written,bool * wrote_xid)1317 binlog_cache_data::flush(THD *thd, my_off_t *bytes_written, bool *wrote_xid)
1318 {
1319   /*
1320     Doing a commit or a rollback including non-transactional tables,
1321     i.e., ending a transaction where we might write the transaction
1322     cache to the binary log.
1323 
1324     We can always end the statement when ending a transaction since
1325     transactions are not allowed inside stored functions. If they
1326     were, we would have to ensure that we're not ending a statement
1327     inside a stored function.
1328   */
1329   DBUG_ENTER("binlog_cache_data::flush");
1330   DBUG_PRINT("debug", ("flags.finalized: %s", YESNO(flags.finalized)));
1331   int error= 0;
1332   if (flags.finalized)
1333   {
1334     my_off_t bytes_in_cache= my_b_tell(&cache_log);
1335     DBUG_PRINT("debug", ("bytes_in_cache: %llu", bytes_in_cache));
1336     /*
1337       The cache is always reset since subsequent rollbacks of the
1338       transactions might trigger attempts to write to the binary log
1339       if the cache is not reset.
1340      */
1341     if (!(error= gtid_before_write_cache(thd, this)))
1342       error= mysql_bin_log.write_cache(thd, this);
1343     else
1344       thd->commit_error= THD::CE_FLUSH_ERROR;
1345 
1346     if (flags.with_xid && error == 0)
1347       *wrote_xid= true;
1348 
1349     /*
1350       Reset have to be after the if above, since it clears the
1351       with_xid flag
1352     */
1353     reset();
1354     if (bytes_written)
1355       *bytes_written= bytes_in_cache;
1356   }
1357   DBUG_ASSERT(!flags.finalized);
1358   DBUG_RETURN(error);
1359 }
1360 
1361 /**
1362   This function truncates the transactional cache upon committing or rolling
1363   back either a transaction or a statement.
1364 
1365   @param thd        The thread whose transaction should be flushed
1366   @param cache_mngr Pointer to the cache data to be flushed
1367   @param all        @c true means truncate the transaction, otherwise the
1368                     statement must be truncated.
1369 
1370   @return
1371     nonzero if an error pops up when truncating the transactional cache.
1372 */
1373 int
truncate(THD * thd,bool all)1374 binlog_trx_cache_data::truncate(THD *thd, bool all)
1375 {
1376   DBUG_ENTER("binlog_trx_cache_data::truncate");
1377   int error=0;
1378 
1379   DBUG_PRINT("info", ("thd->options={ %s %s}, transaction: %s",
1380                       FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
1381                       FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
1382                       all ? "all" : "stmt"));
1383 
1384   remove_pending_event();
1385 
1386   /*
1387     If rolling back an entire transaction or a single statement not
1388     inside a transaction, we reset the transaction cache.
1389   */
1390   if (ending_trans(thd, all))
1391   {
1392     if (has_incident())
1393       error= mysql_bin_log.write_incident(thd, true/*need_lock_log=true*/);
1394     reset();
1395   }
1396   /*
1397     If rolling back a statement in a transaction, we truncate the
1398     transaction cache to remove the statement.
1399   */
1400   else if (get_prev_position() != MY_OFF_T_UNDEF)
1401   {
1402     restore_prev_position();
1403     if (is_binlog_empty())
1404     {
1405       /*
1406         After restoring the previous position, we need to check if
1407         the cache is empty. In such case, the group cache needs to
1408         be cleaned up too because the GTID is removed too from the
1409         cache.
1410 
1411         So if any change happens again, the GTID must be rewritten
1412         and this will not happen if the group cache is not cleaned
1413         up.
1414 
1415         After integrating this with NDB, we need to check if the
1416         current approach is enough or the group cache needs to
1417         explicitly support rollback to savepoints.
1418       */
1419       group_cache.clear();
1420     }
1421   }
1422 
1423   thd->clear_binlog_table_maps();
1424 
1425   DBUG_RETURN(error);
1426 }
1427 
binlog_prepare(handlerton * hton,THD * thd,bool all)1428 static int binlog_prepare(handlerton *hton, THD *thd, bool all)
1429 {
1430   /*
1431     do nothing.
1432     just pretend we can do 2pc, so that MySQL won't
1433     switch to 1pc.
1434     real work will be done in MYSQL_BIN_LOG::commit()
1435   */
1436   return 0;
1437 }
1438 
binlog_start_consistent_snapshot(handlerton * hton,THD * thd)1439 static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd)
1440 {
1441   int err= 0;
1442   DBUG_ENTER("binlog_start_consistent_snapshot");
1443 
1444   if ((err= thd->binlog_setup_trx_data()))
1445     DBUG_RETURN(err);
1446 
1447   binlog_cache_mngr * const cache_mngr= thd_get_cache_mngr(thd);
1448 
1449   /* Server layer calls us with LOCK_log locked, so this is safe. */
1450   mysql_bin_log.raw_get_current_log(&cache_mngr->binlog_info);
1451 
1452   trans_register_ha(thd, TRUE, hton);
1453 
1454   DBUG_RETURN(err);
1455 }
1456 
binlog_clone_consistent_snapshot(handlerton * hton,THD * thd,THD * from_thd)1457 static int binlog_clone_consistent_snapshot(handlerton *hton, THD *thd,
1458                                             THD *from_thd)
1459 {
1460   binlog_cache_mngr *from_cache_mngr;
1461   binlog_cache_mngr *cache_mngr;
1462   int err= 0;
1463   char log_file_name[FN_REFLEN];
1464   my_off_t pos;
1465 
1466   DBUG_ENTER("binlog_start_consistent_snapshot");
1467 
1468   from_cache_mngr= opt_bin_log ?
1469     (binlog_cache_mngr *) thd_get_cache_mngr(from_thd) : NULL;
1470 
1471   if (from_cache_mngr == NULL)
1472   {
1473     push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
1474                         HA_ERR_UNSUPPORTED,
1475                         "WITH CONSISTENT SNAPSHOT FROM SESSION was ignored for "
1476                         "binary log, because the specified session does not "
1477                         "have a consistent snapshot of binary log "
1478                         "coordinates.");
1479     DBUG_RETURN(0);
1480   }
1481 
1482   if ((err= thd->binlog_setup_trx_data()))
1483     DBUG_RETURN(err);
1484 
1485   cache_mngr= thd_get_cache_mngr(thd);
1486 
1487   mysql_mutex_lock(&from_cache_mngr->binlog_info.lock);
1488 
1489   pos= from_cache_mngr->binlog_info.pos;
1490   strmake(log_file_name, from_cache_mngr->binlog_info.log_file_name,
1491           sizeof(log_file_name) - 1);
1492 
1493   mysql_mutex_unlock(&from_cache_mngr->binlog_info.lock);
1494 
1495   mysql_mutex_lock(&cache_mngr->binlog_info.lock);
1496 
1497   cache_mngr->binlog_info.pos = pos;
1498   strmake(cache_mngr->binlog_info.log_file_name, log_file_name,
1499           sizeof(cache_mngr->binlog_info.log_file_name) - 1);
1500 
1501   mysql_mutex_unlock(&cache_mngr->binlog_info.lock);
1502 
1503   trans_register_ha(thd, TRUE, hton);
1504 
1505   DBUG_RETURN(err);
1506 }
1507 
1508 /**
1509   This function is called once after each statement.
1510 
1511   @todo This function is currently not used any more and will
1512   eventually be eliminated. The real commit job is done in the
1513   MYSQL_BIN_LOG::commit function.
1514 
1515   @see MYSQL_BIN_LOG::commit
1516 
1517   @param hton  The binlog handlerton.
1518   @param thd   The client thread that executes the transaction.
1519   @param all   This is @c true if this is a real transaction commit, and
1520                @false otherwise.
1521 
1522   @see handlerton::commit
1523 */
binlog_commit(handlerton * hton,THD * thd,bool all)1524 static int binlog_commit(handlerton *hton, THD *thd, bool all)
1525 {
1526   DBUG_ENTER("binlog_commit");
1527   /*
1528     Nothing to do (any more) on commit.
1529    */
1530   DBUG_RETURN(0);
1531 }
1532 
1533 /**
1534   This function is called when a transaction or a statement is rolled back.
1535 
1536   @internal It is necessary to execute a rollback here if the
1537   transaction was rolled back because of executing a ROLLBACK TO
1538   SAVEPOINT command, but it is not used for normal rollback since
1539   MYSQL_BIN_LOG::rollback is called in that case.
1540 
1541   @todo Refactor code to introduce a <code>MYSQL_BIN_LOG::rollback(THD
1542   *thd, SAVEPOINT *sv)</code> function in @c TC_LOG and have that
1543   function execute the necessary work to rollback to a savepoint.
1544 
1545   @param hton  The binlog handlerton.
1546   @param thd   The client thread that executes the transaction.
1547   @param all   This is @c true if this is a real transaction rollback, and
1548                @false otherwise.
1549 
1550   @see handlerton::rollback
1551 */
binlog_rollback(handlerton * hton,THD * thd,bool all)1552 static int binlog_rollback(handlerton *hton, THD *thd, bool all)
1553 {
1554   DBUG_ENTER("binlog_rollback");
1555   int error= 0;
1556   if (thd->lex->sql_command == SQLCOM_ROLLBACK_TO_SAVEPOINT)
1557     error= mysql_bin_log.rollback(thd, all);
1558   DBUG_RETURN(error);
1559 }
1560 
1561 
1562 bool
append(THD * first)1563 Stage_manager::Mutex_queue::append(THD *first)
1564 {
1565   DBUG_ENTER("Stage_manager::Mutex_queue::append");
1566   lock();
1567   DBUG_PRINT("enter", ("first: 0x%llx", (ulonglong) first));
1568   DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
1569                        (ulonglong) m_first, (ulonglong) &m_first,
1570                        (ulonglong) m_last));
1571   bool empty= (m_first == NULL);
1572   *m_last= first;
1573   DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
1574                        (ulonglong) m_first, (ulonglong) &m_first,
1575                        (ulonglong) m_last));
1576   /*
1577     Go to the last THD instance of the list. We expect lists to be
1578     moderately short. If they are not, we need to track the end of
1579     the queue as well.
1580   */
1581   while (first->next_to_commit)
1582     first= first->next_to_commit;
1583   m_last= &first->next_to_commit;
1584   DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
1585                         (ulonglong) m_first, (ulonglong) &m_first,
1586                         (ulonglong) m_last));
1587   DBUG_ASSERT(m_first || m_last == &m_first);
1588   DBUG_PRINT("return", ("empty: %s", YESNO(empty)));
1589   unlock();
1590   DBUG_RETURN(empty);
1591 }
1592 
1593 
1594 std::pair<bool, THD*>
pop_front()1595 Stage_manager::Mutex_queue::pop_front()
1596 {
1597   DBUG_ENTER("Stage_manager::Mutex_queue::pop_front");
1598   lock();
1599   THD *result= m_first;
1600   bool more= true;
1601   /*
1602     We do not set next_to_commit to NULL here since this is only used
1603     in the flush stage. We will have to call fetch_queue last here,
1604     and will then "cut" the linked list by setting the end of that
1605     queue to NULL.
1606   */
1607   if (result)
1608     m_first= result->next_to_commit;
1609   if (m_first == NULL)
1610   {
1611     more= false;
1612     m_last = &m_first;
1613   }
1614   DBUG_ASSERT(m_first || m_last == &m_first);
1615   unlock();
1616   DBUG_PRINT("return", ("result: 0x%llx, more: %s",
1617                         (ulonglong) result, YESNO(more)));
1618   DBUG_RETURN(std::make_pair(more, result));
1619 }
1620 
1621 
1622 bool
enroll_for(StageID stage,THD * thd,mysql_mutex_t * stage_mutex)1623 Stage_manager::enroll_for(StageID stage, THD *thd, mysql_mutex_t *stage_mutex)
1624 {
1625   // If the queue was empty: we're the leader for this batch
1626   DBUG_PRINT("debug", ("Enqueue 0x%llx to queue for stage %d",
1627                        (ulonglong) thd, stage));
1628   bool leader= m_queue[stage].append(thd);
1629 
1630   /*
1631     The stage mutex can be NULL if we are enrolling for the first
1632     stage.
1633   */
1634   if (stage_mutex)
1635     mysql_mutex_unlock(stage_mutex);
1636 
1637   /*
1638     If the queue was not empty, we're a follower and wait for the
1639     leader to process the queue. If we were holding a mutex, we have
1640     to release it before going to sleep.
1641   */
1642   if (!leader)
1643   {
1644     mysql_mutex_lock(&m_lock_done);
1645 #ifndef DBUG_OFF
1646     /*
1647       Leader can be awaiting all-clear to preempt follower's execution.
1648       With setting the status the follower ensures it won't execute anything
1649       including thread-specific code.
1650     */
1651     thd->transaction.flags.ready_preempt= 1;
1652     if (leader_await_preempt_status)
1653       mysql_cond_signal(&m_cond_preempt);
1654 #endif
1655     while (thd->transaction.flags.pending) {
1656       mysql_cond_wait(&m_cond_done, &m_lock_done);
1657     }
1658     mysql_mutex_unlock(&m_lock_done);
1659   }
1660   return leader;
1661 }
1662 
1663 
fetch_and_empty()1664 THD *Stage_manager::Mutex_queue::fetch_and_empty()
1665 {
1666   DBUG_ENTER("Stage_manager::Mutex_queue::fetch_and_empty");
1667   lock();
1668   DBUG_PRINT("enter", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
1669                        (ulonglong) m_first, (ulonglong) &m_first,
1670                        (ulonglong) m_last));
1671   THD *result= m_first;
1672   m_first= NULL;
1673   m_last= &m_first;
1674   DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
1675                        (ulonglong) m_first, (ulonglong) &m_first,
1676                        (ulonglong) m_last));
1677   DBUG_ASSERT(m_first || m_last == &m_first);
1678   DBUG_PRINT("return", ("result: 0x%llx", (ulonglong) result));
1679   unlock();
1680   DBUG_RETURN(result);
1681 }
1682 
1683 #ifndef DBUG_OFF
clear_preempt_status(THD * head)1684 void Stage_manager::clear_preempt_status(THD *head)
1685 {
1686   DBUG_ASSERT(head);
1687 
1688   mysql_mutex_lock(&m_lock_done);
1689   while(!head->transaction.flags.ready_preempt)
1690   {
1691     leader_await_preempt_status= true;
1692     mysql_cond_wait(&m_cond_preempt, &m_lock_done);
1693   }
1694   leader_await_preempt_status= false;
1695   mysql_mutex_unlock(&m_lock_done);
1696 }
1697 #endif
1698 
1699 /**
1700   Write a rollback record of the transaction to the binary log.
1701 
1702   For binary log group commit, the rollback is separated into three
1703   parts:
1704 
1705   1. First part consists of filling the necessary caches and
1706      finalizing them (if they need to be finalized). After a cache is
1707      finalized, nothing can be added to the cache.
1708 
1709   2. Second part execute an ordered flush and commit. This will be
1710      done using the group commit functionality in @c ordered_commit.
1711 
1712      Since we roll back the transaction early, we call @c
1713      ordered_commit with the @c skip_commit flag set. The @c
1714      ha_commit_low call inside @c ordered_commit will then not be
1715      called.
1716 
1717   3. Third part checks any errors resulting from the flush and handles
1718      them appropriately.
1719 
1720   @see MYSQL_BIN_LOG::ordered_commit
1721   @see ha_commit_low
1722   @see ha_rollback_low
1723 
1724   @param thd Session to commit
1725   @param all This is @c true if this is a real transaction rollback, and
1726              @false otherwise.
1727 
1728   @return Error code, or zero if there were no error.
1729  */
1730 
rollback(THD * thd,bool all)1731 int MYSQL_BIN_LOG::rollback(THD *thd, bool all)
1732 {
1733   int error= 0;
1734   bool stuff_logged= false;
1735 
1736   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
1737   DBUG_ENTER("MYSQL_BIN_LOG::rollback(THD *thd, bool all)");
1738   DBUG_PRINT("enter", ("all: %s, cache_mngr: 0x%llx, thd->is_error: %s",
1739                        YESNO(all), (ulonglong) cache_mngr, YESNO(thd->is_error())));
1740 
1741   /*
1742     We roll back the transaction in the engines early since this will
1743     release locks and allow other transactions to start executing.
1744 
1745     If we are executing a ROLLBACK TO SAVEPOINT, we should only clear
1746     the caches since this function is called as part of the engine
1747     rollback.
1748    */
1749   if (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT)
1750   {
1751     /*
1752       Reset binlog_snapshot_% variables for the current connection so that the
1753       current coordinates are shown after committing a consistent snapshot
1754       transaction.
1755     */
1756     if (cache_mngr != NULL)
1757     {
1758       mysql_mutex_lock(&cache_mngr->binlog_info.lock);
1759       cache_mngr->binlog_info.log_file_name[0]= '\0';
1760       mysql_mutex_unlock(&cache_mngr->binlog_info.lock);
1761     }
1762 
1763     if ((error= ha_rollback_low(thd, all)))
1764       goto end;
1765   }
1766 
1767   /*
1768     If there is no cache manager, or if there is nothing in the
1769     caches, there are no caches to roll back, so we're trivially done.
1770    */
1771   if (cache_mngr == NULL || cache_mngr->is_binlog_empty())
1772     goto end;
1773 
1774   DBUG_PRINT("debug",
1775              ("all.cannot_safely_rollback(): %s, trx_cache_empty: %s",
1776               YESNO(thd->transaction.all.cannot_safely_rollback()),
1777               YESNO(cache_mngr->trx_cache.is_binlog_empty())));
1778   DBUG_PRINT("debug",
1779              ("stmt.cannot_safely_rollback(): %s, stmt_cache_empty: %s",
1780               YESNO(thd->transaction.stmt.cannot_safely_rollback()),
1781               YESNO(cache_mngr->stmt_cache.is_binlog_empty())));
1782 
1783   /*
1784     If an incident event is set we do not flush the content of the statement
1785     cache because it may be corrupted.
1786   */
1787   if (cache_mngr->stmt_cache.has_incident())
1788   {
1789     error= write_incident(thd, true/*need_lock_log=true*/);
1790     cache_mngr->stmt_cache.reset();
1791   }
1792   else if (!cache_mngr->stmt_cache.is_binlog_empty())
1793   {
1794     if ((error= cache_mngr->stmt_cache.finalize(thd)))
1795       goto end;
1796     stuff_logged= true;
1797   }
1798 
1799   if (ending_trans(thd, all))
1800   {
1801     if (trans_cannot_safely_rollback(thd))
1802     {
1803       /*
1804         If the transaction is being rolled back and contains changes that
1805         cannot be rolled back, the trx-cache's content is flushed.
1806       */
1807       Query_log_event
1808         end_evt(thd, STRING_WITH_LEN("ROLLBACK"), true, false, true, 0, true);
1809       error= cache_mngr->trx_cache.finalize(thd, &end_evt);
1810       stuff_logged= true;
1811     }
1812     else
1813     {
1814       /*
1815         If the transaction is being rolled back and its changes can be
1816         rolled back, the trx-cache's content is truncated.
1817       */
1818       error= cache_mngr->trx_cache.truncate(thd, all);
1819     }
1820   }
1821   else
1822   {
1823     /*
1824       If a statement is being rolled back, it is necessary to know
1825       exactly why a statement may not be safely rolled back as in
1826       some specific situations the trx-cache can be truncated.
1827 
1828       If a temporary table is created or dropped, the trx-cache is not
1829       truncated. Note that if the stmt-cache is used, there is nothing
1830       to truncate in the trx-cache.
1831 
1832       If a non-transactional table is updated and the binlog format is
1833       statement, the trx-cache is not truncated. The trx-cache is used
1834       when the direct option is off and a transactional table has been
1835       updated before the current statement in the context of the
1836       current transaction. Note that if the stmt-cache is used there is
1837       nothing to truncate in the trx-cache.
1838 
1839       If other binlog formats are used, updates to non-transactional
1840       tables are written to the stmt-cache and trx-cache can be safely
1841       truncated, if necessary.
1842     */
1843     if (thd->transaction.stmt.has_dropped_temp_table() ||
1844         thd->transaction.stmt.has_created_temp_table() ||
1845         (thd->transaction.stmt.has_modified_non_trans_table() &&
1846         thd->variables.binlog_format == BINLOG_FORMAT_STMT))
1847     {
1848       /*
1849         If the statement is being rolled back and dropped or created a
1850         temporary table or modified a non-transactional table and the
1851         statement-based replication is in use, the statement's changes
1852         in the trx-cache are preserved.
1853       */
1854       cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
1855     }
1856     else
1857     {
1858       /*
1859         Otherwise, the statement's changes in the trx-cache are
1860         truncated.
1861       */
1862       error= cache_mngr->trx_cache.truncate(thd, all);
1863     }
1864   }
1865 
1866   DBUG_PRINT("debug", ("error: %d", error));
1867   if (error == 0 && stuff_logged)
1868     error= ordered_commit(thd, all, /* skip_commit */ true);
1869 
1870   if (check_write_error(thd))
1871   {
1872     /*
1873       "all == true" means that a "rollback statement" triggered the error and
1874       this function was called. However, this must not happen as a rollback
1875       is written directly to the binary log. And in auto-commit mode, a single
1876       statement that is rolled back has the flag all == false.
1877     */
1878     DBUG_ASSERT(!all);
1879     /*
1880       We reach this point if the effect of a statement did not properly get into
1881       a cache and need to be rolled back.
1882     */
1883     error |= cache_mngr->trx_cache.truncate(thd, all);
1884   }
1885 
1886 end:
1887   /*
1888     When a statement errors out on auto-commit mode it is rollback
1889     implicitly, so the same should happen to its GTID.
1890   */
1891   if (!thd->in_active_multi_stmt_transaction())
1892     gtid_rollback(thd);
1893 
1894   DBUG_PRINT("return", ("error: %d", error));
1895   DBUG_RETURN(error);
1896 }
1897 
1898 /**
1899   @note
1900   How do we handle this (unlikely but legal) case:
1901   @verbatim
1902     [transaction] + [update to non-trans table] + [rollback to savepoint] ?
1903   @endverbatim
1904   The problem occurs when a savepoint is before the update to the
1905   non-transactional table. Then when there's a rollback to the savepoint, if we
1906   simply truncate the binlog cache, we lose the part of the binlog cache where
1907   the update is. If we want to not lose it, we need to write the SAVEPOINT
1908   command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
1909   is easy: it's just write at the end of the binlog cache, but the former
1910   should be *inserted* to the place where the user called SAVEPOINT. The
1911   solution is that when the user calls SAVEPOINT, we write it to the binlog
1912   cache (so no need to later insert it). As transactions are never intermixed
1913   in the binary log (i.e. they are serialized), we won't have conflicts with
1914   savepoint names when using mysqlbinlog or in the slave SQL thread.
1915   Then when ROLLBACK TO SAVEPOINT is called, if we updated some
1916   non-transactional table, we don't truncate the binlog cache but instead write
1917   ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
1918   will chop the SAVEPOINT command from the binlog cache, which is good as in
1919   that case there is no need to have it in the binlog).
1920 */
1921 
binlog_savepoint_set(handlerton * hton,THD * thd,void * sv)1922 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
1923 {
1924   DBUG_ENTER("binlog_savepoint_set");
1925   int error= 1;
1926 
1927   String log_query;
1928   if (log_query.append(STRING_WITH_LEN("SAVEPOINT ")))
1929     DBUG_RETURN(error);
1930   else
1931     append_identifier(thd, &log_query, thd->lex->ident.str,
1932                       thd->lex->ident.length);
1933 
1934   int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
1935   Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
1936                         TRUE, FALSE, TRUE, errcode);
1937   /*
1938     We cannot record the position before writing the statement
1939     because a rollback to a savepoint (.e.g. consider it "S") would
1940     prevent the savepoint statement (i.e. "SAVEPOINT S") from being
1941     written to the binary log despite the fact that the server could
1942     still issue other rollback statements to the same savepoint (i.e.
1943     "S").
1944     Given that the savepoint is valid until the server releases it,
1945     ie, until the transaction commits or it is released explicitly,
1946     we need to log it anyway so that we don't have "ROLLBACK TO S"
1947     or "RELEASE S" without the preceding "SAVEPOINT S" in the binary
1948     log.
1949   */
1950   if (!(error= mysql_bin_log.write_event(&qinfo)))
1951     binlog_trans_log_savepos(thd, (my_off_t*) sv);
1952 
1953   DBUG_RETURN(error);
1954 }
1955 
binlog_savepoint_rollback(handlerton * hton,THD * thd,void * sv)1956 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
1957 {
1958   DBUG_ENTER("binlog_savepoint_rollback");
1959   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
1960   my_off_t pos= *(my_off_t*) sv;
1961   DBUG_ASSERT(pos != ~(my_off_t) 0);
1962 
1963   /*
1964     Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
1965     non-transactional table. Otherwise, truncate the binlog cache starting
1966     from the SAVEPOINT command.
1967   */
1968   if (trans_cannot_safely_rollback(thd))
1969   {
1970     String log_query;
1971     if (log_query.append(STRING_WITH_LEN("ROLLBACK TO ")))
1972       DBUG_RETURN(1);
1973     else
1974     {
1975       /*
1976         Before writing identifier to the binlog, make sure to
1977         quote the identifier properly so as to prevent any SQL
1978         injection on the slave.
1979       */
1980       append_identifier(thd, &log_query, thd->lex->ident.str,
1981                         thd->lex->ident.length);
1982     }
1983 
1984     int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
1985     Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
1986                           TRUE, FALSE, TRUE, errcode);
1987     DBUG_RETURN(mysql_bin_log.write_event(&qinfo));
1988   }
1989   // Otherwise, we truncate the cache
1990   cache_mngr->trx_cache.restore_savepoint(pos);
1991   /*
1992     When a SAVEPOINT is executed inside a stored function/trigger we force the
1993     pending event to be flushed with a STMT_END_F flag and clear the table maps
1994     as well to ensure that following DMLs will have a clean state to start
1995     with. ROLLBACK inside a stored routine has to finalize possibly existing
1996     current row-based pending event with cleaning up table maps. That ensures
1997     that following DMLs will have a clean state to start with.
1998    */
1999   if (thd->in_sub_stmt)
2000     thd->clear_binlog_table_maps();
2001   if (cache_mngr->trx_cache.is_binlog_empty())
2002     cache_mngr->trx_cache.group_cache.clear();
2003   DBUG_RETURN(0);
2004 }
2005 
2006 /**
2007   Check whether binlog state allows to safely release MDL locks after
2008   rollback to savepoint.
2009 
2010   @param hton  The binlog handlerton.
2011   @param thd   The client thread that executes the transaction.
2012 
2013   @return true  - It is safe to release MDL locks.
2014           false - If it is not.
2015 */
binlog_savepoint_rollback_can_release_mdl(handlerton * hton,THD * thd)2016 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
2017                                                       THD *thd)
2018 {
2019   DBUG_ENTER("binlog_savepoint_rollback_can_release_mdl");
2020   /*
2021     If we have not updated any non-transactional tables rollback
2022     to savepoint will simply truncate binlog cache starting from
2023     SAVEPOINT command. So it should be safe to release MDL acquired
2024     after SAVEPOINT command in this case.
2025   */
2026   DBUG_RETURN(!trans_cannot_safely_rollback(thd));
2027 }
2028 
2029 #ifdef HAVE_REPLICATION
2030 
2031 /*
2032   Adjust the position pointer in the binary log file for all running slaves
2033 
2034   SYNOPSIS
2035     adjust_linfo_offsets()
2036     purge_offset	Number of bytes removed from start of log index file
2037 
2038   NOTES
2039     - This is called when doing a PURGE when we delete lines from the
2040       index log file
2041 
2042   REQUIREMENTS
2043     - Before calling this function, we have to ensure that no threads are
2044       using any binary log file before purge_offset.a
2045 
2046   TODO
2047     - Inform the slave threads that they should sync the position
2048       in the binary log file with flush_relay_log_info.
2049       Now they sync is done for next read.
2050 */
2051 
adjust_linfo_offsets(my_off_t purge_offset)2052 static void adjust_linfo_offsets(my_off_t purge_offset)
2053 {
2054   mysql_mutex_lock(&LOCK_thread_count);
2055 
2056   Thread_iterator it= global_thread_list_begin();
2057   Thread_iterator end= global_thread_list_end();
2058   for (; it != end; ++it)
2059   {
2060     LOG_INFO* linfo;
2061     if ((linfo = (*it)->current_linfo))
2062     {
2063       mysql_mutex_lock(&linfo->lock);
2064       /*
2065 	Index file offset can be less that purge offset only if
2066 	we just started reading the index file. In that case
2067 	we have nothing to adjust
2068       */
2069       if (linfo->index_file_offset < purge_offset)
2070 	linfo->fatal = (linfo->index_file_offset != 0);
2071       else
2072 	linfo->index_file_offset -= purge_offset;
2073       mysql_mutex_unlock(&linfo->lock);
2074     }
2075   }
2076   mysql_mutex_unlock(&LOCK_thread_count);
2077 }
2078 
2079 
log_in_use(const char * log_name)2080 static int log_in_use(const char* log_name)
2081 {
2082   size_t log_name_len = strlen(log_name) + 1;
2083   int thread_count=0;
2084 #ifndef DBUG_OFF
2085   if (current_thd)
2086     DEBUG_SYNC(current_thd,"purge_logs_after_lock_index_before_thread_count");
2087 #endif
2088   mysql_mutex_lock(&LOCK_thread_count);
2089 
2090   Thread_iterator it= global_thread_list_begin();
2091   Thread_iterator end= global_thread_list_end();
2092   for (; it != end; ++it)
2093   {
2094     LOG_INFO* linfo;
2095     if ((linfo = (*it)->current_linfo))
2096     {
2097       mysql_mutex_lock(&linfo->lock);
2098       if(!strncmp(log_name, linfo->log_file_name, log_name_len))
2099       {
2100         thread_count++;
2101         sql_print_warning("file %s was not purged because it was being read "
2102                           "by thread number %llu", log_name,
2103                           (ulonglong)(*it)->thread_id);
2104       }
2105       mysql_mutex_unlock(&linfo->lock);
2106     }
2107   }
2108 
2109   mysql_mutex_unlock(&LOCK_thread_count);
2110   return thread_count;
2111 }
2112 
purge_error_message(THD * thd,int res)2113 static bool purge_error_message(THD* thd, int res)
2114 {
2115   uint errcode;
2116 
2117   if ((errcode= purge_log_get_error_code(res)) != 0)
2118   {
2119     my_message(errcode, ER(errcode), MYF(0));
2120     return TRUE;
2121   }
2122   my_ok(thd);
2123   return FALSE;
2124 }
2125 
2126 #endif /* HAVE_REPLICATION */
2127 
check_binlog_magic(IO_CACHE * log,const char ** errmsg)2128 int check_binlog_magic(IO_CACHE* log, const char** errmsg)
2129 {
2130   char magic[4];
2131   DBUG_ASSERT(my_b_tell(log) == 0);
2132 
2133   if (my_b_read(log, (uchar*) magic, sizeof(magic)))
2134   {
2135     *errmsg = "I/O error reading the header from the binary log";
2136     sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno,
2137 		    log->error);
2138     return 1;
2139   }
2140   if (memcmp(magic, BINLOG_MAGIC, sizeof(magic)))
2141   {
2142     *errmsg = "Binlog has bad magic number;  It's not a binary log file that can be used by this version of MySQL";
2143     return 1;
2144   }
2145   return 0;
2146 }
2147 
2148 
open_binlog_file(IO_CACHE * log,const char * log_file_name,const char ** errmsg)2149 File open_binlog_file(IO_CACHE *log, const char *log_file_name, const char **errmsg)
2150 {
2151   File file;
2152   DBUG_ENTER("open_binlog_file");
2153 
2154   if ((file= mysql_file_open(key_file_binlog,
2155                              log_file_name, O_RDONLY | O_BINARY | O_SHARE,
2156                              MYF(MY_WME))) < 0)
2157   {
2158     sql_print_error("Failed to open log (file '%s', errno %d)",
2159                     log_file_name, my_errno);
2160     *errmsg = "Could not open log file";
2161     goto err;
2162   }
2163   if (init_io_cache(log, file, IO_SIZE*2, READ_CACHE, 0, 0,
2164                     MYF(MY_WME|MY_DONT_CHECK_FILESIZE)))
2165   {
2166     sql_print_error("Failed to create a cache on log (file '%s')",
2167                     log_file_name);
2168     *errmsg = "Could not open log file";
2169     goto err;
2170   }
2171   if (check_binlog_magic(log,errmsg))
2172     goto err;
2173   DBUG_RETURN(file);
2174 
2175 err:
2176   if (file >= 0)
2177   {
2178     mysql_file_close(file, MYF(0));
2179     end_io_cache(log);
2180   }
2181   DBUG_RETURN(-1);
2182 }
2183 
2184 /**
2185   This function checks if a transactional table was updated by the
2186   current transaction.
2187 
2188   @param thd The client thread that executed the current statement.
2189   @return
2190     @c true if a transactional table was updated, @c false otherwise.
2191 */
2192 bool
trans_has_updated_trans_table(const THD * thd)2193 trans_has_updated_trans_table(const THD* thd)
2194 {
2195   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
2196 
2197   return (cache_mngr ? !cache_mngr->trx_cache.is_binlog_empty() : 0);
2198 }
2199 
2200 /**
2201   This function checks if a transactional table was updated by the
2202   current statement.
2203 
2204   @param ha_list Registered storage engine handler list.
2205   @return
2206     @c true if a transactional table was updated, @c false otherwise.
2207 */
2208 bool
stmt_has_updated_trans_table(Ha_trx_info * ha_list)2209 stmt_has_updated_trans_table(Ha_trx_info* ha_list)
2210 {
2211   Ha_trx_info *ha_info;
2212 
2213   for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
2214   {
2215     if (ha_info->is_trx_read_write() && ha_info->ht() != binlog_hton)
2216       return (TRUE);
2217   }
2218   return (FALSE);
2219 }
2220 
2221 /**
2222   This function checks if a transaction, either a multi-statement
2223   or a single statement transaction is about to commit or not.
2224 
2225   @param thd The client thread that executed the current statement.
2226   @param all Committing a transaction (i.e. TRUE) or a statement
2227              (i.e. FALSE).
2228   @return
2229     @c true if committing a transaction, otherwise @c false.
2230 */
ending_trans(THD * thd,const bool all)2231 bool ending_trans(THD* thd, const bool all)
2232 {
2233   return (all || ending_single_stmt_trans(thd, all));
2234 }
2235 
2236 /**
2237   This function checks if a single statement transaction is about
2238   to commit or not.
2239 
2240   @param thd The client thread that executed the current statement.
2241   @param all Committing a transaction (i.e. TRUE) or a statement
2242              (i.e. FALSE).
2243   @return
2244     @c true if committing a single statement transaction, otherwise
2245     @c false.
2246 */
ending_single_stmt_trans(THD * thd,const bool all)2247 bool ending_single_stmt_trans(THD* thd, const bool all)
2248 {
2249   return (!all && !thd->in_multi_stmt_transaction_mode());
2250 }
2251 
2252 /**
2253   This function checks if a transaction cannot be rolled back safely.
2254 
2255   @param thd The client thread that executed the current statement.
2256   @return
2257     @c true if cannot be safely rolled back, @c false otherwise.
2258 */
trans_cannot_safely_rollback(const THD * thd)2259 bool trans_cannot_safely_rollback(const THD* thd)
2260 {
2261   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
2262 
2263   return cache_mngr->trx_cache.cannot_rollback();
2264 }
2265 
2266 /**
2267   This function checks if current statement cannot be rollded back safely.
2268 
2269   @param thd The client thread that executed the current statement.
2270   @return
2271     @c true if cannot be safely rolled back, @c false otherwise.
2272 */
stmt_cannot_safely_rollback(const THD * thd)2273 bool stmt_cannot_safely_rollback(const THD* thd)
2274 {
2275   return thd->transaction.stmt.cannot_safely_rollback();
2276 }
2277 
2278 #ifndef EMBEDDED_LIBRARY
2279 /**
2280   Execute a PURGE BINARY LOGS TO <log> command.
2281 
2282   @param thd Pointer to THD object for the client thread executing the
2283   statement.
2284 
2285   @param to_log Name of the last log to purge.
2286 
2287   @retval FALSE success
2288   @retval TRUE failure
2289 */
purge_master_logs(THD * thd,const char * to_log)2290 bool purge_master_logs(THD* thd, const char* to_log)
2291 {
2292   char search_file_name[FN_REFLEN];
2293   if (!mysql_bin_log.is_open())
2294   {
2295     my_ok(thd);
2296     return FALSE;
2297   }
2298 
2299   mysql_bin_log.make_log_name(search_file_name, to_log);
2300   return purge_error_message(thd,
2301                              mysql_bin_log.purge_logs(search_file_name, false,
2302                                                       true/*need_lock_index=true*/,
2303                                                       true/*need_update_threads=true*/,
2304                                                       NULL, false));
2305 }
2306 
2307 
2308 /**
2309   Execute a PURGE BINARY LOGS BEFORE <date> command.
2310 
2311   @param thd Pointer to THD object for the client thread executing the
2312   statement.
2313 
2314   @param purge_time Date before which logs should be purged.
2315 
2316   @retval FALSE success
2317   @retval TRUE failure
2318 */
purge_master_logs_before_date(THD * thd,time_t purge_time)2319 bool purge_master_logs_before_date(THD* thd, time_t purge_time)
2320 {
2321   if (!mysql_bin_log.is_open())
2322   {
2323     my_ok(thd);
2324     return 0;
2325   }
2326   return purge_error_message(thd,
2327                              mysql_bin_log.purge_logs_before_date(purge_time,
2328                                                                   false));
2329 }
2330 #endif /* EMBEDDED_LIBRARY */
2331 
2332 /*
2333   Helper function to get the error code of the query to be binlogged.
2334  */
query_error_code(THD * thd,bool not_killed)2335 int query_error_code(THD *thd, bool not_killed)
2336 {
2337   int error;
2338 
2339   if (not_killed || (thd->killed == THD::KILL_BAD_DATA))
2340   {
2341     error= thd->is_error() ? thd->get_stmt_da()->sql_errno() : 0;
2342 
2343     /* thd->get_stmt_da()->sql_errno() might be ER_SERVER_SHUTDOWN or
2344        ER_QUERY_INTERRUPTED, So here we need to make sure that error
2345        is not set to these errors when specified not_killed by the
2346        caller.
2347     */
2348     if (error == ER_SERVER_SHUTDOWN || error == ER_QUERY_INTERRUPTED)
2349       error= 0;
2350   }
2351   else
2352   {
2353     /* killed status for DELAYED INSERT thread should never be used */
2354     DBUG_ASSERT(!(thd->system_thread & SYSTEM_THREAD_DELAYED_INSERT));
2355     error= thd->killed_errno();
2356   }
2357 
2358   return error;
2359 }
2360 
2361 
2362 /**
2363   Copy content of 'from' file from offset to 'to' file.
2364 
2365   - We do the copy outside of the IO_CACHE as the cache
2366   buffers would just make things slower and more complicated.
2367   In most cases the copy loop should only do one read.
2368 
2369   @param from          File to copy.
2370   @param to            File to copy to.
2371   @param offset        Offset in 'from' file.
2372 
2373 
2374   @retval
2375     0    ok
2376   @retval
2377     -1    error
2378 */
copy_file(IO_CACHE * from,IO_CACHE * to,my_off_t offset)2379 static bool copy_file(IO_CACHE *from, IO_CACHE *to, my_off_t offset)
2380 {
2381   int bytes_read;
2382   uchar io_buf[IO_SIZE*2];
2383   DBUG_ENTER("copy_file");
2384 
2385   mysql_file_seek(from->file, offset, MY_SEEK_SET, MYF(0));
2386   while(TRUE)
2387   {
2388     if ((bytes_read= (int) mysql_file_read(from->file, io_buf, sizeof(io_buf),
2389                                            MYF(MY_WME)))
2390         < 0)
2391       goto err;
2392     if (DBUG_EVALUATE_IF("fault_injection_copy_part_file", 1, 0))
2393       bytes_read= bytes_read/2;
2394     if (!bytes_read)
2395       break;                                    // end of file
2396     if (mysql_file_write(to->file, io_buf, bytes_read, MYF(MY_WME | MY_NABP)))
2397       goto err;
2398   }
2399 
2400   DBUG_RETURN(0);
2401 
2402 err:
2403   DBUG_RETURN(1);
2404 }
2405 
2406 
2407 #ifdef HAVE_REPLICATION
2408 /**
2409    Load data's io cache specific hook to be executed
2410    before a chunk of data is being read into the cache's buffer
2411    The fuction instantianates and writes into the binlog
2412    replication events along LOAD DATA processing.
2413 
2414    @param file  pointer to io-cache
2415    @retval 0 success
2416    @retval 1 failure
2417 */
log_loaded_block(IO_CACHE * file)2418 int log_loaded_block(IO_CACHE* file)
2419 {
2420   DBUG_ENTER("log_loaded_block");
2421   LOAD_FILE_INFO *lf_info;
2422   uint block_len;
2423   /* buffer contains position where we started last read */
2424   uchar* buffer= (uchar*) my_b_get_buffer_start(file);
2425   uint max_event_size= current_thd->variables.max_allowed_packet;
2426   lf_info= (LOAD_FILE_INFO*) file->arg;
2427   if (lf_info->thd->is_current_stmt_binlog_format_row())
2428     DBUG_RETURN(0);
2429   if (lf_info->last_pos_in_file != HA_POS_ERROR &&
2430       lf_info->last_pos_in_file >= my_b_get_pos_in_file(file))
2431     DBUG_RETURN(0);
2432 
2433   for (block_len= (uint) (my_b_get_bytes_in_buffer(file)); block_len > 0;
2434        buffer += min(block_len, max_event_size),
2435        block_len -= min(block_len, max_event_size))
2436   {
2437     lf_info->last_pos_in_file= my_b_get_pos_in_file(file);
2438     if (lf_info->wrote_create_file)
2439     {
2440       Append_block_log_event a(lf_info->thd, lf_info->thd->db, buffer,
2441                                min(block_len, max_event_size),
2442                                lf_info->log_delayed);
2443       if (mysql_bin_log.write_event(&a))
2444         DBUG_RETURN(1);
2445     }
2446     else
2447     {
2448       Begin_load_query_log_event b(lf_info->thd, lf_info->thd->db,
2449                                    buffer,
2450                                    min(block_len, max_event_size),
2451                                    lf_info->log_delayed);
2452       if (mysql_bin_log.write_event(&b))
2453         DBUG_RETURN(1);
2454       lf_info->wrote_create_file= 1;
2455     }
2456   }
2457   DBUG_RETURN(0);
2458 }
2459 
2460 /* Helper function for SHOW BINLOG/RELAYLOG EVENTS */
show_binlog_events(THD * thd,MYSQL_BIN_LOG * binary_log)2461 bool show_binlog_events(THD *thd, MYSQL_BIN_LOG *binary_log)
2462 {
2463   Protocol *protocol= thd->protocol;
2464   List<Item> field_list;
2465   const char *errmsg = 0;
2466   bool ret = TRUE;
2467   IO_CACHE log;
2468   File file = -1;
2469   int old_max_allowed_packet= thd->variables.max_allowed_packet;
2470   LOG_INFO linfo;
2471 
2472   DBUG_ENTER("show_binlog_events");
2473 
2474   DBUG_ASSERT(thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS ||
2475               thd->lex->sql_command == SQLCOM_SHOW_RELAYLOG_EVENTS);
2476 
2477   Format_description_log_event *description_event= new
2478     Format_description_log_event(3); /* MySQL 4.0 by default */
2479 
2480   if (binary_log->is_open())
2481   {
2482     LEX_MASTER_INFO *lex_mi= &thd->lex->mi;
2483     SELECT_LEX_UNIT *unit= &thd->lex->unit;
2484     ha_rows event_count, limit_start, limit_end;
2485     my_off_t pos = max<my_off_t>(BIN_LOG_HEADER_SIZE, lex_mi->pos); // user-friendly
2486     char search_file_name[FN_REFLEN], *name;
2487     const char *log_file_name = lex_mi->log_file_name;
2488     mysql_mutex_t *log_lock = binary_log->get_log_lock();
2489     Log_event* ev;
2490 
2491     unit->set_limit(thd->lex->current_select);
2492     limit_start= unit->offset_limit_cnt;
2493     limit_end= unit->select_limit_cnt;
2494 
2495     name= search_file_name;
2496     if (log_file_name)
2497       binary_log->make_log_name(search_file_name, log_file_name);
2498     else
2499       name=0;					// Find first log
2500 
2501     linfo.index_file_offset = 0;
2502 
2503     if (binary_log->find_log_pos(&linfo, name, true/*need_lock_index=true*/))
2504     {
2505       errmsg = "Could not find target log";
2506       goto err;
2507     }
2508 
2509     mysql_mutex_lock(&LOCK_thread_count);
2510     thd->current_linfo = &linfo;
2511     mysql_mutex_unlock(&LOCK_thread_count);
2512 
2513     if ((file=open_binlog_file(&log, linfo.log_file_name, &errmsg)) < 0)
2514       goto err;
2515 
2516     my_off_t end_pos;
2517     /*
2518       Acquire LOCK_log only for the duration to calculate the
2519       log's end position. LOCK_log should be acquired even while
2520       we are checking whether the log is active log or not.
2521     */
2522     mysql_mutex_lock(log_lock);
2523     if (binary_log->is_active(linfo.log_file_name))
2524     {
2525       LOG_INFO li;
2526       binary_log->get_current_log(&li, false /*LOCK_log is already acquired*/);
2527       end_pos= li.pos;
2528     }
2529     else
2530     {
2531       end_pos= my_b_filelength(&log);
2532     }
2533     mysql_mutex_unlock(log_lock);
2534 
2535     /*
2536       to account binlog event header size
2537     */
2538     thd->variables.max_allowed_packet += MAX_LOG_EVENT_HEADER;
2539 
2540     DEBUG_SYNC(thd, "after_show_binlog_event_found_file");
2541 
2542     /*
2543       open_binlog_file() sought to position 4.
2544       Read the first event in case it's a Format_description_log_event, to
2545       know the format. If there's no such event, we are 3.23 or 4.x. This
2546       code, like before, can't read 3.23 binlogs.
2547       This code will fail on a mixed relay log (one which has Format_desc then
2548       Rotate then Format_desc).
2549     */
2550     ev= Log_event::read_log_event(&log, (mysql_mutex_t*)0, description_event,
2551                                    opt_master_verify_checksum);
2552     if (ev)
2553     {
2554       if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
2555       {
2556         delete description_event;
2557         description_event= (Format_description_log_event*) ev;
2558       }
2559       else
2560         delete ev;
2561     }
2562 
2563     my_b_seek(&log, pos);
2564 
2565     if (!description_event->is_valid())
2566     {
2567       errmsg="Invalid Format_description event; could be out of memory";
2568       goto err;
2569     }
2570 
2571     for (event_count = 0;
2572          (ev = Log_event::read_log_event(&log, (mysql_mutex_t*) 0,
2573                                          description_event,
2574                                          opt_master_verify_checksum)); )
2575     {
2576       DEBUG_SYNC(thd, "wait_in_show_binlog_events_loop");
2577       if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
2578         description_event->checksum_alg= ev->checksum_alg;
2579 
2580       if (event_count >= limit_start &&
2581         ev->net_send(protocol, linfo.log_file_name, pos))
2582       {
2583 	errmsg = "Net error";
2584 	delete ev;
2585 	goto err;
2586       }
2587 
2588       pos = my_b_tell(&log);
2589       delete ev;
2590 
2591       if (++event_count >= limit_end || pos >= end_pos)
2592 	break;
2593     }
2594 
2595     if (event_count < limit_end && log.error)
2596     {
2597       errmsg = "Wrong offset or I/O error";
2598       goto err;
2599     }
2600 
2601   }
2602   // Check that linfo is still on the function scope.
2603   DEBUG_SYNC(thd, "after_show_binlog_events");
2604 
2605   ret= FALSE;
2606 
2607 err:
2608   delete description_event;
2609   if (file >= 0)
2610   {
2611     end_io_cache(&log);
2612     mysql_file_close(file, MYF(MY_WME));
2613   }
2614 
2615   if (errmsg)
2616     my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0),
2617              "SHOW BINLOG EVENTS", errmsg);
2618   else
2619     my_eof(thd);
2620 
2621   mysql_mutex_lock(&LOCK_thread_count);
2622   thd->current_linfo = 0;
2623   mysql_mutex_unlock(&LOCK_thread_count);
2624   thd->variables.max_allowed_packet= old_max_allowed_packet;
2625   DBUG_RETURN(ret);
2626 }
2627 
2628 /**
2629   Execute a SHOW BINLOG EVENTS statement.
2630 
2631   @param thd Pointer to THD object for the client thread executing the
2632   statement.
2633 
2634   @retval FALSE success
2635   @retval TRUE failure
2636 */
mysql_show_binlog_events(THD * thd)2637 bool mysql_show_binlog_events(THD* thd)
2638 {
2639   Protocol *protocol= thd->protocol;
2640   List<Item> field_list;
2641   DBUG_ENTER("mysql_show_binlog_events");
2642 
2643   DBUG_ASSERT(thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS);
2644 
2645   Log_event::init_show_field_list(&field_list);
2646   if (protocol->send_result_set_metadata(&field_list,
2647                             Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
2648     DBUG_RETURN(TRUE);
2649 
2650   /*
2651     Wait for handlers to insert any pending information
2652     into the binlog.  For e.g. ndb which updates the binlog asynchronously
2653     this is needed so that the uses sees all its own commands in the binlog
2654   */
2655   ha_binlog_wait(thd);
2656 
2657   DBUG_RETURN(show_binlog_events(thd, &mysql_bin_log));
2658 }
2659 
2660 #endif /* HAVE_REPLICATION */
2661 
2662 
MYSQL_BIN_LOG(uint * sync_period)2663 MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
2664   :bytes_written(0), file_id(1), open_count(1),
2665    sync_period_ptr(sync_period), sync_counter(0),
2666    m_prep_xids(0),
2667    is_relay_log(0), signal_cnt(0),
2668    checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF),
2669    relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF),
2670    previous_gtid_set(0), snapshot_lock_acquired(false)
2671 {
2672   /*
2673     We don't want to initialize locks here as such initialization depends on
2674     safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
2675     called only in main(). Doing initialization here would make it happen
2676     before main().
2677   */
2678   index_file_name[0] = 0;
2679   memset(&index_file, 0, sizeof(index_file));
2680   memset(&purge_index_file, 0, sizeof(purge_index_file));
2681   memset(&crash_safe_index_file, 0, sizeof(crash_safe_index_file));
2682 }
2683 
2684 
2685 /* this is called only once */
2686 
cleanup()2687 void MYSQL_BIN_LOG::cleanup()
2688 {
2689   DBUG_ENTER("cleanup");
2690   if (inited)
2691   {
2692     inited= 0;
2693     close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT, true /*need_lock_log=true*/,
2694           true /*need_lock_index=true*/);
2695     mysql_mutex_destroy(&LOCK_log);
2696     mysql_mutex_destroy(&LOCK_index);
2697     mysql_mutex_destroy(&LOCK_commit);
2698     mysql_mutex_destroy(&LOCK_sync);
2699     mysql_mutex_destroy(&LOCK_xids);
2700     mysql_cond_destroy(&update_cond);
2701     my_atomic_rwlock_destroy(&m_prep_xids_lock);
2702     mysql_cond_destroy(&m_prep_xids_cond);
2703     stage_manager.deinit();
2704   }
2705   DBUG_VOID_RETURN;
2706 }
2707 
2708 
init_pthread_objects()2709 void MYSQL_BIN_LOG::init_pthread_objects()
2710 {
2711   MYSQL_LOG::init_pthread_objects();
2712   mysql_mutex_init(m_key_LOCK_index, &LOCK_index, MY_MUTEX_INIT_SLOW);
2713   mysql_mutex_init(m_key_LOCK_commit, &LOCK_commit, MY_MUTEX_INIT_FAST);
2714   mysql_mutex_init(m_key_LOCK_sync, &LOCK_sync, MY_MUTEX_INIT_FAST);
2715   mysql_mutex_init(m_key_LOCK_xids, &LOCK_xids, MY_MUTEX_INIT_FAST);
2716   mysql_cond_init(m_key_update_cond, &update_cond, 0);
2717   my_atomic_rwlock_init(&m_prep_xids_lock);
2718   mysql_cond_init(m_key_prep_xids_cond, &m_prep_xids_cond, NULL);
2719   stage_manager.init(
2720 #ifdef HAVE_PSI_INTERFACE
2721                    m_key_LOCK_flush_queue,
2722                    m_key_LOCK_sync_queue,
2723                    m_key_LOCK_commit_queue,
2724                    m_key_LOCK_done, m_key_COND_done
2725 #endif
2726                    );
2727 }
2728 
open_index_file(const char * index_file_name_arg,const char * log_name,bool need_lock_index)2729 bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
2730                                     const char *log_name, bool need_lock_index)
2731 {
2732   bool error= false;
2733   File index_file_nr= -1;
2734 
2735   if (need_lock_index)
2736     mysql_mutex_lock(&LOCK_index);
2737   else
2738     mysql_mutex_assert_owner(&LOCK_index);
2739 
2740   /*
2741     First open of this class instance
2742     Create an index file that will hold all file names uses for logging.
2743     Add new entries to the end of it.
2744   */
2745   myf opt= MY_UNPACK_FILENAME;
2746 
2747   if (my_b_inited(&index_file))
2748     goto end;
2749 
2750   if (!index_file_name_arg)
2751   {
2752     index_file_name_arg= log_name;    // Use same basename for index file
2753     opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
2754   }
2755   fn_format(index_file_name, index_file_name_arg, mysql_data_home,
2756             ".index", opt);
2757 
2758   if (set_crash_safe_index_file_name(index_file_name_arg))
2759   {
2760     sql_print_error("MYSQL_BIN_LOG::set_crash_safe_index_file_name failed.");
2761     error= true;
2762     goto end;
2763   }
2764 
2765   /*
2766     We need move crash_safe_index_file to index_file if the index_file
2767     does not exist and crash_safe_index_file exists when mysqld server
2768     restarts.
2769   */
2770   if (my_access(index_file_name, F_OK) &&
2771       !my_access(crash_safe_index_file_name, F_OK) &&
2772       my_rename(crash_safe_index_file_name, index_file_name, MYF(MY_WME)))
2773   {
2774     sql_print_error("MYSQL_BIN_LOG::open_index_file failed to "
2775                     "move crash_safe_index_file to index file.");
2776     error= true;
2777     goto end;
2778   }
2779 
2780   if ((index_file_nr= mysql_file_open(m_key_file_log_index,
2781                                       index_file_name,
2782                                       O_RDWR | O_CREAT | O_BINARY,
2783                                       MYF(MY_WME))) < 0 ||
2784        mysql_file_sync(index_file_nr, MYF(MY_WME)) ||
2785        init_io_cache(&index_file, index_file_nr,
2786                      IO_SIZE, READ_CACHE,
2787                      mysql_file_seek(index_file_nr, 0L, MY_SEEK_END, MYF(0)),
2788                                      0, MYF(MY_WME | MY_WAIT_IF_FULL)) ||
2789       DBUG_EVALUATE_IF("fault_injection_openning_index", 1, 0))
2790   {
2791     /*
2792       TODO: all operations creating/deleting the index file or a log, should
2793       call my_sync_dir() or my_sync_dir_by_file() to be durable.
2794       TODO: file creation should be done with mysql_file_create()
2795       not mysql_file_open().
2796     */
2797     if (index_file_nr >= 0)
2798       mysql_file_close(index_file_nr, MYF(0));
2799     error= true;
2800     goto end;
2801   }
2802 
2803 #ifdef HAVE_REPLICATION
2804   /*
2805     Sync the index by purging any binary log file that is not registered.
2806     In other words, either purge binary log files that were removed from
2807     the index but not purged from the file system due to a crash or purge
2808     any binary log file that was created but not register in the index
2809     due to a crash.
2810   */
2811 
2812   if (set_purge_index_file_name(index_file_name_arg) ||
2813       open_purge_index_file(FALSE) ||
2814       purge_index_entry(NULL, NULL, false) ||
2815       close_purge_index_file() ||
2816       DBUG_EVALUATE_IF("fault_injection_recovering_index", 1, 0))
2817   {
2818     sql_print_error("MYSQL_BIN_LOG::open_index_file failed to sync the index "
2819                     "file.");
2820     error= TRUE;
2821     goto end;
2822   }
2823 #endif
2824 end:
2825   if (need_lock_index)
2826     mysql_mutex_unlock(&LOCK_index);
2827   return error;
2828 }
2829 
2830 
2831 /**
2832   Reads GTIDs from the given binlog file.
2833 
2834   @param filename File to read from.
2835   @param all_gtids If not NULL, then the GTIDs from the
2836   Previous_gtids_log_event and from all Gtid_log_events are stored in
2837   this object.
2838   @param prev_gtids If not NULL, then the GTIDs from the
2839   Previous_gtids_log_events are stored in this object.
2840   @param first_gtid If not NULL, then the first GTID information from the
2841   file will be stored in this object.
2842   @param last_gtid If not NULL, then the last GTID information from the
2843   file will be stored in this object.
2844   @param sid_map The sid_map object to use in the rpl_sidno generation
2845   of the Gtid_log_event. If lock is needed in the sid_map, the caller
2846   must hold it.
2847   @param verify_checksum Set to true to verify event checksums.
2848 
2849   @retval GOT_GTIDS The file was successfully read and it contains
2850   both Gtid_log_events and Previous_gtids_log_events.
2851   @retval GOT_PREVIOUS_GTIDS The file was successfully read and it
2852   contains Previous_gtids_log_events but no Gtid_log_events.
2853   @retval NO_GTIDS The file was successfully read and it does not
2854   contain GTID events.
2855   @retval ERROR Out of memory, or the file contains GTID events
2856   when GTID_MODE = OFF, or the file is malformed (e.g., contains
2857   Gtid_log_events but no Previous_gtids_log_event).
2858   @retval TRUNCATED The file was truncated before the end of the
2859   first Previous_gtids_log_event.
2860 */
2861 enum enum_read_gtids_from_binlog_status
2862 { GOT_GTIDS, GOT_PREVIOUS_GTIDS, NO_GTIDS, ERROR, TRUNCATED };
2863 static enum_read_gtids_from_binlog_status
read_gtids_from_binlog(const char * filename,Gtid_set * all_gtids,Gtid_set * prev_gtids,Gtid * first_gtid,Gtid * last_gtid,Sid_map * sid_map,bool verify_checksum)2864 read_gtids_from_binlog(const char *filename, Gtid_set *all_gtids,
2865                        Gtid_set *prev_gtids, Gtid *first_gtid,
2866                        Gtid *last_gtid,
2867                        Sid_map* sid_map,
2868                        bool verify_checksum)
2869 {
2870   DBUG_ENTER("read_gtids_from_binlog");
2871   DBUG_PRINT("info", ("Opening file %s", filename));
2872 
2873   /*
2874     Create a Format_description_log_event that is used to read the
2875     first event of the log.
2876   */
2877   Format_description_log_event fd_ev(BINLOG_VERSION), *fd_ev_p= &fd_ev;
2878   if (!fd_ev.is_valid())
2879     DBUG_RETURN(ERROR);
2880 
2881   File file;
2882   IO_CACHE log;
2883 
2884   /*
2885     We assert here that both all_gtids and prev_gtids, if specified,
2886     uses the same sid_map as the one passed as a parameter. This is just
2887     to ensure that, if the sid_map needed some lock and was locked by
2888     the caller, the lock applies to all the GTID sets this function is
2889     dealing with.
2890   */
2891 #ifndef DBUG_OFF
2892   if (all_gtids)
2893     DBUG_ASSERT(all_gtids->get_sid_map() == sid_map);
2894   if (prev_gtids)
2895     DBUG_ASSERT(prev_gtids->get_sid_map() == sid_map);
2896 #endif
2897 
2898   const char *errmsg= NULL;
2899   if ((file= open_binlog_file(&log, filename, &errmsg)) < 0)
2900   {
2901     sql_print_error("%s", errmsg);
2902     /*
2903       We need to revisit the recovery procedure for relay log
2904       files. Currently, it is called after this routine.
2905       /Alfranio
2906     */
2907     DBUG_RETURN(TRUNCATED);
2908   }
2909 
2910   /*
2911     Seek for Previous_gtids_log_event and Gtid_log_event events to
2912     gather information what has been processed so far.
2913   */
2914   my_b_seek(&log, BIN_LOG_HEADER_SIZE);
2915   Log_event *ev= NULL;
2916   enum_read_gtids_from_binlog_status ret= NO_GTIDS;
2917   bool done= false;
2918   bool seen_first_gtid= false;
2919   while (!done &&
2920          (ev= Log_event::read_log_event(&log, 0, fd_ev_p, verify_checksum)) !=
2921          NULL)
2922   {
2923     DBUG_PRINT("info", ("Read event of type %s", ev->get_type_str()));
2924     switch (ev->get_type_code())
2925     {
2926     case FORMAT_DESCRIPTION_EVENT:
2927       if (fd_ev_p != &fd_ev)
2928         delete fd_ev_p;
2929       fd_ev_p= (Format_description_log_event *)ev;
2930       break;
2931     case ROTATE_EVENT:
2932       // do nothing; just accept this event and go to next
2933       break;
2934     case PREVIOUS_GTIDS_LOG_EVENT:
2935     {
2936       if (gtid_mode == 0)
2937       {
2938         my_error(ER_FOUND_GTID_EVENT_WHEN_GTID_MODE_IS_OFF, MYF(0));
2939         ret= ERROR;
2940       }
2941       ret= GOT_PREVIOUS_GTIDS;
2942       // add events to sets
2943       Previous_gtids_log_event *prev_gtids_ev=
2944         (Previous_gtids_log_event *)ev;
2945       if (all_gtids != NULL && prev_gtids_ev->add_to_set(all_gtids) != 0)
2946         ret= ERROR, done= true;
2947       else if (prev_gtids != NULL && prev_gtids_ev->add_to_set(prev_gtids) != 0)
2948         ret= ERROR, done= true;
2949 #ifndef DBUG_OFF
2950       char* prev_buffer= prev_gtids_ev->get_str(NULL, NULL);
2951       DBUG_PRINT("info", ("Got Previous_gtids from file '%s': Gtid_set='%s'.",
2952                           filename, prev_buffer));
2953       my_free(prev_buffer);
2954 #endif
2955       break;
2956     }
2957     case GTID_LOG_EVENT:
2958     {
2959       DBUG_EXECUTE_IF("inject_fault_bug16502579", {
2960                       DBUG_PRINT("debug", ("GTID_LOG_EVENT found. Injected ret=NO_GTIDS."));
2961                       ret=NO_GTIDS;
2962                       });
2963       if (ret != GOT_GTIDS)
2964       {
2965         if (ret != GOT_PREVIOUS_GTIDS)
2966         {
2967           /*
2968             Since this routine is run on startup, there may not be a
2969             THD instance. Therefore, ER(X) cannot be used.
2970            */
2971           const char* msg_fmt= (current_thd != NULL) ?
2972                                ER(ER_BINLOG_LOGICAL_CORRUPTION) :
2973                                ER_DEFAULT(ER_BINLOG_LOGICAL_CORRUPTION);
2974           my_printf_error(ER_BINLOG_LOGICAL_CORRUPTION,
2975                           msg_fmt, MYF(0),
2976                           filename,
2977                           "The first global transaction identifier was read, but "
2978                           "no other information regarding identifiers existing "
2979                           "on the previous log files was found.");
2980           ret= ERROR, done= true;
2981           break;
2982         }
2983         else
2984           ret= GOT_GTIDS;
2985       }
2986       /*
2987         When all_gtids, first_gtid and last_gtid are all NULL,
2988         we just check if the binary log contains at least one Gtid_log_event,
2989         so that we can distinguish the return values GOT_GTID and
2990         GOT_PREVIOUS_GTIDS. We don't need to read anything else from the
2991         binary log.
2992         If all_gtids or last_gtid is requested (i.e., NOT NULL), we should
2993         continue to read all gtids.
2994         If just first_gtid was requested, we will be done after storing this
2995         Gtid_log_event info on it.
2996       */
2997       if (all_gtids == NULL && first_gtid == NULL && last_gtid == NULL)
2998       {
2999         ret= GOT_GTIDS, done= true;
3000       }
3001       else
3002       {
3003         Gtid_log_event *gtid_ev= (Gtid_log_event *)ev;
3004         rpl_sidno sidno= gtid_ev->get_sidno(sid_map);
3005         if (sidno < 0)
3006           ret= ERROR, done= true;
3007         else
3008         {
3009           if (all_gtids)
3010           {
3011             if (all_gtids->ensure_sidno(sidno) != RETURN_STATUS_OK)
3012               ret= ERROR, done= true;
3013             else if (all_gtids->_add_gtid(sidno, gtid_ev->get_gno()) !=
3014                      RETURN_STATUS_OK)
3015               ret= ERROR, done= true;
3016             DBUG_PRINT("info", ("Got Gtid from file '%s': Gtid(%d, %lld).",
3017                                 filename, sidno, gtid_ev->get_gno()));
3018           }
3019 
3020           /* If the first GTID was requested, stores it */
3021           if (first_gtid && !seen_first_gtid)
3022           {
3023             first_gtid->set(sidno, gtid_ev->get_gno());
3024             seen_first_gtid= true;
3025             /* If the first_gtid was the only thing requested, we are done */
3026             if (all_gtids == NULL && last_gtid == NULL)
3027               ret= GOT_GTIDS, done= true;
3028           }
3029 
3030           if (last_gtid)
3031             last_gtid->set(sidno, gtid_ev->get_gno());
3032         }
3033       }
3034       break;
3035     }
3036     case ANONYMOUS_GTID_LOG_EVENT:
3037     default:
3038       // if we found any other event type without finding a
3039       // previous_gtids_log_event, then the rest of this binlog
3040       // cannot contain gtids
3041       if (ret != GOT_GTIDS && ret != GOT_PREVIOUS_GTIDS)
3042         done= true;
3043       break;
3044     }
3045     if (ev != fd_ev_p)
3046       delete ev;
3047     DBUG_PRINT("info", ("done=%d", done));
3048   }
3049 
3050   if (log.error < 0)
3051   {
3052     // This is not a fatal error; the log may just be truncated.
3053 
3054     // @todo but what other errors could happen? IO error?
3055     sql_print_warning("Error reading GTIDs from binary log: %d", log.error);
3056   }
3057 
3058   if (fd_ev_p != &fd_ev)
3059   {
3060     delete fd_ev_p;
3061     fd_ev_p= &fd_ev;
3062   }
3063 
3064   mysql_file_close(file, MYF(MY_WME));
3065   end_io_cache(&log);
3066 
3067   DBUG_PRINT("info", ("returning %d", ret));
3068   DBUG_RETURN(ret);
3069 }
3070 
find_first_log_not_in_gtid_set(char * binlog_file_name,const Gtid_set * gtid_set,Gtid * first_gtid,const char ** errmsg)3071 bool MYSQL_BIN_LOG::find_first_log_not_in_gtid_set(char *binlog_file_name,
3072                                                    const Gtid_set *gtid_set,
3073                                                    Gtid *first_gtid,
3074                                                    const char **errmsg)
3075 {
3076   DBUG_ENTER("MYSQL_BIN_LOG::gtid_read_start_binlog");
3077   /*
3078     Gather the set of files to be accessed.
3079   */
3080   list<string> filename_list;
3081   LOG_INFO linfo;
3082   int error;
3083 
3084   list<string>::reverse_iterator rit;
3085   Gtid_set previous_gtid_set(gtid_set->get_sid_map());
3086 
3087   mysql_mutex_lock(&LOCK_index);
3088   for (error= find_log_pos(&linfo, NULL, false/*need_lock_index=false*/);
3089        !error; error= find_next_log(&linfo, false/*need_lock_index=false*/))
3090   {
3091     DBUG_PRINT("info", ("read log filename '%s'", linfo.log_file_name));
3092     filename_list.push_back(string(linfo.log_file_name));
3093   }
3094   mysql_mutex_unlock(&LOCK_index);
3095   if (error != LOG_INFO_EOF)
3096   {
3097     *errmsg= "Failed to read the binary log index file while "
3098       "looking for the oldest binary log that contains any GTID "
3099       "that is not in the given gtid set";
3100     error= -1;
3101     goto end;
3102   }
3103 
3104   if (filename_list.empty())
3105   {
3106     *errmsg= "Could not find first log file name in binary log index file "
3107       "while looking for the oldest binary log that contains any GTID "
3108       "that is not in the given gtid set";
3109     error= -2;
3110     goto end;
3111   }
3112 
3113   /*
3114     Iterate over all the binary logs in reverse order, and read only
3115     the Previous_gtids_log_event, to find the first one, that is the
3116     subset of the given gtid set. Since every binary log begins with
3117     a Previous_gtids_log_event, that contains all GTIDs in all
3118     previous binary logs.
3119     We also ask for the first GTID in the binary log to know if we
3120     should send the FD event with the "created" field cleared or not.
3121   */
3122   DBUG_PRINT("info", ("Iterating backwards through binary logs, and reading "
3123                       "only the Previous_gtids_log_event, to find the first "
3124                       "one, that is the subset of the given gtid set."));
3125   rit= filename_list.rbegin();
3126   error= 0;
3127   while (rit != filename_list.rend())
3128   {
3129     previous_gtid_set.clear();
3130     const char *filename= rit->c_str();
3131     DBUG_PRINT("info", ("Read Previous_gtids_log_event from filename='%s'",
3132                         filename));
3133     switch (read_gtids_from_binlog(filename, NULL, &previous_gtid_set,
3134                                    first_gtid, NULL/* last_gtid */,
3135                                    previous_gtid_set.get_sid_map(),
3136                                    opt_master_verify_checksum))
3137     {
3138     case ERROR:
3139       *errmsg= "Error reading header of binary log while looking for "
3140         "the oldest binary log that contains any GTID that is not in "
3141         "the given gtid set";
3142       error= -3;
3143       goto end;
3144     case NO_GTIDS:
3145       *errmsg= "Found old binary log without GTIDs while looking for "
3146         "the oldest binary log that contains any GTID that is not in "
3147         "the given gtid set";
3148       error= -4;
3149       goto end;
3150     case GOT_GTIDS:
3151     case GOT_PREVIOUS_GTIDS:
3152       if (previous_gtid_set.is_subset(gtid_set))
3153       {
3154         strcpy(binlog_file_name, filename);
3155         /*
3156           Verify that the selected binlog is not the first binlog,
3157         */
3158         DBUG_EXECUTE_IF("slave_reconnect_with_gtid_set_executed",
3159                         DBUG_ASSERT(strcmp(filename_list.begin()->c_str(),
3160                                            binlog_file_name) != 0););
3161         goto end;
3162       }
3163     case TRUNCATED:
3164       break;
3165     }
3166 
3167     rit++;
3168   }
3169 
3170   if (rit == filename_list.rend())
3171   {
3172     report_missing_gtids(&previous_gtid_set, gtid_set, errmsg);
3173     error= -5;
3174   }
3175 
3176 end:
3177   if (error)
3178     DBUG_PRINT("error", ("'%s'", *errmsg));
3179   filename_list.clear();
3180   DBUG_PRINT("info", ("returning %d", error));
3181   DBUG_RETURN(error != 0 ? true : false);
3182 }
3183 
init_gtid_sets(Gtid_set * all_gtids,Gtid_set * lost_gtids,Gtid * last_gtid,bool verify_checksum,bool need_lock,bool is_server_starting)3184 bool MYSQL_BIN_LOG::init_gtid_sets(Gtid_set *all_gtids, Gtid_set *lost_gtids,
3185                                    Gtid *last_gtid, bool verify_checksum,
3186                                    bool need_lock, bool is_server_starting)
3187 {
3188   DBUG_ENTER("MYSQL_BIN_LOG::init_gtid_sets");
3189   DBUG_PRINT("info", ("lost_gtids=%p; so we are recovering a %s log",
3190                       lost_gtids, lost_gtids == NULL ? "relay" : "binary"));
3191 
3192   /*
3193     Acquires the necessary locks to ensure that logs are not either
3194     removed or updated when we are reading from it.
3195   */
3196   if (need_lock)
3197   {
3198     // We don't need LOCK_log if we are only going to read the initial
3199     // Prevoius_gtids_log_event and ignore the Gtid_log_events.
3200     if (all_gtids != NULL)
3201       mysql_mutex_lock(&LOCK_log);
3202     mysql_mutex_lock(&LOCK_index);
3203     global_sid_lock->wrlock();
3204   }
3205   else
3206   {
3207     if (all_gtids != NULL)
3208       mysql_mutex_assert_owner(&LOCK_log);
3209     mysql_mutex_assert_owner(&LOCK_index);
3210     global_sid_lock->assert_some_wrlock();
3211   }
3212 
3213   // Gather the set of files to be accessed.
3214   list<string> filename_list;
3215   LOG_INFO linfo;
3216   int error;
3217 
3218   list<string>::iterator it;
3219   list<string>::reverse_iterator rit;
3220   bool reached_first_file= false;
3221 
3222   /* Initialize the sid_map to be used in read_gtids_from_binlog */
3223   Sid_map *sid_map= NULL;
3224   if (all_gtids)
3225     sid_map= all_gtids->get_sid_map();
3226   else if (lost_gtids)
3227     sid_map= lost_gtids->get_sid_map();
3228 
3229   for (error= find_log_pos(&linfo, NULL, false/*need_lock_index=false*/); !error;
3230        error= find_next_log(&linfo, false/*need_lock_index=false*/))
3231   {
3232     DBUG_PRINT("info", ("read log filename '%s'", linfo.log_file_name));
3233     filename_list.push_back(string(linfo.log_file_name));
3234   }
3235   if (error != LOG_INFO_EOF)
3236   {
3237     DBUG_PRINT("error", ("Error reading binlog index"));
3238     goto end;
3239   }
3240   /*
3241     On server starting, one new empty binlog file is created and
3242     its file name is put into index file before initializing
3243     GLOBAL.GTID_EXECUTED AND GLOBAL.GTID_PURGED, it is not the
3244     last binlog file before the server restarts, so we remove
3245     its file name from filename_list.
3246   */
3247   if (is_server_starting && !is_relay_log && !filename_list.empty())
3248     filename_list.pop_back();
3249 
3250   error= 0;
3251 
3252   if (all_gtids != NULL)
3253   {
3254     DBUG_PRINT("info", ("Iterating backwards through binary logs, looking for the last binary log that contains a Previous_gtids_log_event."));
3255     // Iterate over all files in reverse order until we find one that
3256     // contains a Previous_gtids_log_event.
3257     rit= filename_list.rbegin();
3258     bool got_gtids= false;
3259     reached_first_file= (rit == filename_list.rend());
3260     DBUG_PRINT("info", ("filename='%s' reached_first_file=%d",
3261                         rit->c_str(), reached_first_file));
3262     while ((!got_gtids || (last_gtid && last_gtid->empty()))
3263            && !reached_first_file)
3264     {
3265       const char *filename= rit->c_str();
3266       rit++;
3267       reached_first_file= (rit == filename_list.rend());
3268       DBUG_PRINT("info", ("filename='%s' got_gtids=%d reached_first_file=%d",
3269                           filename, got_gtids, reached_first_file));
3270       switch (read_gtids_from_binlog(filename, got_gtids ? NULL : all_gtids,
3271                                      reached_first_file ? lost_gtids : NULL,
3272                                      NULL/* first_gtid */, last_gtid,
3273                                      sid_map, verify_checksum))
3274       {
3275         case ERROR:
3276         {
3277           error= 1;
3278           goto end;
3279         }
3280         case GOT_GTIDS:
3281         case GOT_PREVIOUS_GTIDS:
3282         {
3283           got_gtids= true;
3284           break;
3285         }
3286         case NO_GTIDS:
3287         {
3288           /*
3289             If the binlog_gtid_simple_recovery is enabled, and the
3290             last binary log does not contain any GTID event, do not
3291             read any more binary logs, GLOBAL.GTID_EXECUTED and
3292             GLOBAL.GTID_PURGED should be empty in the case. Otherwise,
3293             initialize GTID_EXECUTED as usual.
3294           */
3295           if (binlog_gtid_simple_recovery && !is_relay_log)
3296           {
3297             DBUG_ASSERT(all_gtids->is_empty() && lost_gtids->is_empty());
3298             goto end;
3299           }
3300           /*FALLTHROUGH*/
3301         }
3302         case TRUNCATED:
3303         {
3304           break;
3305         }
3306       }
3307     }
3308   }
3309   if (lost_gtids != NULL && !reached_first_file)
3310   {
3311     DBUG_PRINT("info", ("Iterating forwards through binary logs, looking for the first binary log that contains a Previous_gtids_log_event."));
3312     for (it= filename_list.begin(); it != filename_list.end(); it++)
3313     {
3314       const char *filename= it->c_str();
3315       DBUG_PRINT("info", ("filename='%s'", filename));
3316       switch (read_gtids_from_binlog(filename, NULL, lost_gtids,
3317                                      NULL/* first_gtid */, NULL/* last_gtid */,
3318                                      sid_map, verify_checksum))
3319       {
3320         case ERROR:
3321         {
3322           error= 1;
3323           /*FALLTHROUGH*/
3324         }
3325         case GOT_GTIDS:
3326         {
3327           goto end;
3328         }
3329         case NO_GTIDS:
3330         {
3331           /*
3332             If the binlog_gtid_simple_recovery is enabled, and the
3333             first binary log does not contain any GTID event, do not
3334             read any more binary logs, GLOBAL.GTID_PURGED should be
3335             empty in the case.
3336           */
3337           if (binlog_gtid_simple_recovery && !is_relay_log)
3338           {
3339             DBUG_ASSERT(lost_gtids->is_empty());
3340             goto end;
3341           }
3342           /*FALLTHROUGH*/
3343         }
3344         case GOT_PREVIOUS_GTIDS:
3345         case TRUNCATED:
3346         {
3347           break;
3348         }
3349       }
3350     }
3351   }
3352 end:
3353   if (all_gtids)
3354     all_gtids->dbug_print("all_gtids");
3355   if (lost_gtids)
3356     lost_gtids->dbug_print("lost_gtids");
3357   if (need_lock)
3358   {
3359     global_sid_lock->unlock();
3360     mysql_mutex_unlock(&LOCK_index);
3361     if (all_gtids != NULL)
3362       mysql_mutex_unlock(&LOCK_log);
3363   }
3364   filename_list.clear();
3365   DBUG_PRINT("info", ("returning %d", error));
3366   DBUG_RETURN(error != 0 ? true : false);
3367 }
3368 
3369 
3370 /**
3371   Open a (new) binlog file.
3372 
3373   - Open the log file and the index file. Register the new
3374   file name in it
3375   - When calling this when the file is in use, you must have a locks
3376   on LOCK_log and LOCK_index.
3377 
3378   @retval
3379     0	ok
3380   @retval
3381     1	error
3382 */
3383 
open_binlog(const char * log_name,const char * new_name,enum cache_type io_cache_type_arg,ulong max_size_arg,bool null_created_arg,bool need_lock_log,bool need_lock_index,bool need_sid_lock,Format_description_log_event * extra_description_event)3384 bool MYSQL_BIN_LOG::open_binlog(const char *log_name,
3385                                 const char *new_name,
3386                                 enum cache_type io_cache_type_arg,
3387                                 ulong max_size_arg,
3388                                 bool null_created_arg,
3389                                 bool need_lock_log,
3390                                 bool need_lock_index,
3391                                 bool need_sid_lock,
3392                                 Format_description_log_event *extra_description_event)
3393 {
3394 
3395   // lock_index must be acquired *before* sid_lock.
3396   DBUG_ASSERT(need_sid_lock || !need_lock_index);
3397   DBUG_ENTER("MYSQL_BIN_LOG::open_binlog(const char *, ...)");
3398   DBUG_PRINT("enter",("name: %s", log_name));
3399 
3400   if (init_and_set_log_file_name(log_name, new_name, LOG_BIN,
3401                                  io_cache_type_arg))
3402   {
3403     sql_print_error("MYSQL_BIN_LOG::open failed to generate new file name.");
3404     DBUG_RETURN(1);
3405   }
3406 
3407 #ifdef HAVE_REPLICATION
3408   if (open_purge_index_file(TRUE) ||
3409       register_create_index_entry(log_file_name) ||
3410       sync_purge_index_file() ||
3411       DBUG_EVALUATE_IF("fault_injection_registering_index", 1, 0))
3412   {
3413     /**
3414       @todo: although this was introduced to appease valgrind
3415       when injecting emulated faults using fault_injection_registering_index
3416       it may be good to consider what actually happens when
3417       open_purge_index_file succeeds but register or sync fails.
3418 
3419       Perhaps we might need the code below in MYSQL_LOG_BIN::cleanup
3420       for "real life" purposes as well?
3421     */
3422     DBUG_EXECUTE_IF("fault_injection_registering_index", {
3423       if (my_b_inited(&purge_index_file))
3424       {
3425         end_io_cache(&purge_index_file);
3426         my_close(purge_index_file.file, MYF(0));
3427       }
3428     });
3429 
3430     sql_print_error("MYSQL_BIN_LOG::open failed to sync the index file.");
3431     DBUG_RETURN(1);
3432   }
3433   DBUG_EXECUTE_IF("crash_create_non_critical_before_update_index", DBUG_SUICIDE(););
3434 #endif
3435 
3436   write_error= 0;
3437 
3438   /* open the main log file */
3439   if (MYSQL_LOG::open(
3440 #ifdef HAVE_PSI_INTERFACE
3441                       m_key_file_log,
3442 #endif
3443                       log_name, LOG_BIN, new_name, io_cache_type_arg))
3444   {
3445 #ifdef HAVE_REPLICATION
3446     close_purge_index_file();
3447 #endif
3448     DBUG_RETURN(1);                            /* all warnings issued */
3449   }
3450 
3451   max_size= max_size_arg;
3452 
3453   open_count++;
3454 
3455   bool write_file_name_to_index_file=0;
3456 
3457   /* This must be before goto err. */
3458   Format_description_log_event s(BINLOG_VERSION);
3459 
3460   if (!my_b_filelength(&log_file))
3461   {
3462     /*
3463       The binary log file was empty (probably newly created)
3464       This is the normal case and happens when the user doesn't specify
3465       an extension for the binary log files.
3466       In this case we write a standard header to it.
3467     */
3468     if (my_b_safe_write(&log_file, (uchar*) BINLOG_MAGIC,
3469                         BIN_LOG_HEADER_SIZE))
3470       goto err;
3471     bytes_written+= BIN_LOG_HEADER_SIZE;
3472     write_file_name_to_index_file= 1;
3473   }
3474 
3475   /*
3476     don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
3477     as we won't be able to reset it later
3478   */
3479   if (io_cache_type == WRITE_CACHE)
3480     s.flags |= LOG_EVENT_BINLOG_IN_USE_F;
3481   s.checksum_alg= is_relay_log ?
3482     /* relay-log */
3483     /* inherit master's A descriptor if one has been received */
3484     (relay_log_checksum_alg=
3485      (relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF) ?
3486      relay_log_checksum_alg :
3487      /* otherwise use slave's local preference of RL events verification */
3488      (opt_slave_sql_verify_checksum == 0) ?
3489      (uint8) BINLOG_CHECKSUM_ALG_OFF : binlog_checksum_options):
3490     /* binlog */
3491     binlog_checksum_options;
3492   DBUG_ASSERT(s.checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
3493   if (!s.is_valid())
3494     goto err;
3495   s.dont_set_created= null_created_arg;
3496   /* Set LOG_EVENT_RELAY_LOG_F flag for relay log's FD */
3497   if (is_relay_log)
3498     s.set_relay_log_event();
3499   if (s.write(&log_file))
3500     goto err;
3501   bytes_written+= s.data_written;
3502   /*
3503     We need to revisit this code and improve it.
3504     See further comments in the mysqld.
3505     /Alfranio
3506   */
3507   if (current_thd && gtid_mode > 0)
3508   {
3509     if (need_sid_lock)
3510       global_sid_lock->wrlock();
3511     else
3512       global_sid_lock->assert_some_wrlock();
3513     Previous_gtids_log_event prev_gtids_ev(previous_gtid_set);
3514     if (is_relay_log)
3515       prev_gtids_ev.set_relay_log_event();
3516     if (need_sid_lock)
3517       global_sid_lock->unlock();
3518     prev_gtids_ev.checksum_alg= s.checksum_alg;
3519     if (prev_gtids_ev.write(&log_file))
3520       goto err;
3521     bytes_written+= prev_gtids_ev.data_written;
3522   }
3523   if (extra_description_event &&
3524       extra_description_event->binlog_version>=4)
3525   {
3526     /*
3527       This is a relay log written to by the I/O slave thread.
3528       Write the event so that others can later know the format of this relay
3529       log.
3530       Note that this event is very close to the original event from the
3531       master (it has binlog version of the master, event types of the
3532       master), so this is suitable to parse the next relay log's event. It
3533       has been produced by
3534       Format_description_log_event::Format_description_log_event(char* buf,).
3535       Why don't we want to write the mi_description_event if this
3536       event is for format<4 (3.23 or 4.x): this is because in that case, the
3537       mi_description_event describes the data received from the
3538       master, but not the data written to the relay log (*conversion*),
3539       which is in format 4 (slave's).
3540     */
3541     /*
3542       Set 'created' to 0, so that in next relay logs this event does not
3543       trigger cleaning actions on the slave in
3544       Format_description_log_event::apply_event_impl().
3545     */
3546     extra_description_event->created= 0;
3547     /* Don't set log_pos in event header */
3548     extra_description_event->set_artificial_event();
3549 
3550     if (extra_description_event->write(&log_file))
3551       goto err;
3552     bytes_written+= extra_description_event->data_written;
3553   }
3554   if (flush_io_cache(&log_file) ||
3555       mysql_file_sync(log_file.file, MYF(MY_WME)))
3556     goto err;
3557 
3558   if (write_file_name_to_index_file)
3559   {
3560 #ifdef HAVE_REPLICATION
3561     DBUG_EXECUTE_IF("crash_create_critical_before_update_index", DBUG_SUICIDE(););
3562 #endif
3563 
3564     DBUG_ASSERT(my_b_inited(&index_file) != 0);
3565 
3566     /*
3567       The new log file name is appended into crash safe index file after
3568       all the content of index file is copyed into the crash safe index
3569       file. Then move the crash safe index file to index file.
3570     */
3571     DBUG_EXECUTE_IF("simulate_disk_full_on_open_binlog",
3572                     {DBUG_SET("+d,simulate_no_free_space_error");});
3573     if (DBUG_EVALUATE_IF("fault_injection_updating_index", 1, 0) ||
3574         add_log_to_index((uchar*) log_file_name, strlen(log_file_name),
3575                          need_lock_index))
3576     {
3577       DBUG_EXECUTE_IF("simulate_disk_full_on_open_binlog",
3578                       {
3579                         DBUG_SET("-d,simulate_file_write_error");
3580                         DBUG_SET("-d,simulate_no_free_space_error");
3581                         DBUG_SET("-d,simulate_disk_full_on_open_binlog");
3582                       });
3583       goto err;
3584     }
3585 
3586 #ifdef HAVE_REPLICATION
3587     DBUG_EXECUTE_IF("crash_create_after_update_index", DBUG_SUICIDE(););
3588 #endif
3589   }
3590 
3591   log_state= LOG_OPENED;
3592 
3593 #ifdef HAVE_REPLICATION
3594   close_purge_index_file();
3595 #endif
3596 
3597   DBUG_RETURN(0);
3598 
3599 err:
3600 #ifdef HAVE_REPLICATION
3601   if (is_inited_purge_index_file())
3602     purge_index_entry(NULL, NULL, need_lock_index);
3603   close_purge_index_file();
3604 #endif
3605 
3606   if (binlog_error_action == ABORT_SERVER)
3607   {
3608     exec_binlog_error_action_abort("Either disk is full or file system is read "
3609                                    "only while opening the binlog. Aborting the"
3610                                    " server.");
3611   }
3612   else
3613   {
3614     sql_print_error("Could not use %s for logging (error %d). "
3615                     "Turning logging off for the whole duration of the MySQL "
3616                     "server process. To turn it on again: fix the cause, "
3617                     "shutdown the MySQL server and restart it.",
3618                     (new_name) ? new_name : name, errno);
3619     close(LOG_CLOSE_INDEX, need_lock_log, need_lock_index);
3620   }
3621   DBUG_RETURN(1);
3622 }
3623 
3624 
3625 /**
3626   Move crash safe index file to index file.
3627 
3628   @param need_lock_index If true, LOCK_index will be acquired;
3629   otherwise it should already be held.
3630 
3631   @retval 0 ok
3632   @retval -1 error
3633 */
move_crash_safe_index_file_to_index_file(bool need_lock_index)3634 int MYSQL_BIN_LOG::move_crash_safe_index_file_to_index_file(bool need_lock_index)
3635 {
3636   int error= 0;
3637   File fd= -1;
3638   DBUG_ENTER("MYSQL_BIN_LOG::move_crash_safe_index_file_to_index_file");
3639   int failure_trials= MYSQL_BIN_LOG::MAX_RETRIES_FOR_DELETE_RENAME_FAILURE;
3640   bool file_rename_status= false, file_delete_status= false;
3641   THD *thd= current_thd;
3642 
3643   if (need_lock_index)
3644     mysql_mutex_lock(&LOCK_index);
3645   else
3646     mysql_mutex_assert_owner(&LOCK_index);
3647 
3648   if (my_b_inited(&index_file))
3649   {
3650     end_io_cache(&index_file);
3651     if (mysql_file_close(index_file.file, MYF(0)) < 0)
3652     {
3653       error= -1;
3654       sql_print_error("While rebuilding index file %s: "
3655                       "Failed to close the index file.", index_file_name);
3656       /*
3657         Delete Crash safe index file here and recover the binlog.index
3658         state(index_file io_cache) from old binlog.index content.
3659        */
3660       mysql_file_delete(key_file_binlog_index, crash_safe_index_file_name,
3661                         MYF(0));
3662 
3663       goto recoverable_err;
3664     }
3665 
3666     /*
3667       Sometimes an outsider can lock index files for temporary viewing
3668       purpose. For eg: MEB locks binlog.index/relaylog.index to view
3669       the content of the file. During that small period of time, deletion
3670       of the file is not possible on some platforms(Eg: Windows)
3671       Server should retry the delete operation for few times instead of panicking
3672       immediately.
3673     */
3674     while ((file_delete_status == false) && (failure_trials > 0))
3675     {
3676       if (DBUG_EVALUATE_IF("force_index_file_delete_failure", 1, 0)) break;
3677 
3678       DBUG_EXECUTE_IF("simulate_index_file_delete_failure",
3679                   {
3680                     /* This simulation causes the delete to fail */
3681                     static char first_char= index_file_name[0];
3682                     index_file_name[0]= 0;
3683                     sql_print_information("Retrying delete");
3684                     if (failure_trials == 1)
3685                       index_file_name[0]= first_char;
3686                   };);
3687       file_delete_status = !(mysql_file_delete(key_file_binlog_index,
3688                                                index_file_name, MYF(MY_WME)));
3689       --failure_trials;
3690       if (!file_delete_status)
3691       {
3692         my_sleep(1000);
3693         /* Clear the error before retrying. */
3694         if (failure_trials > 0)
3695           thd->clear_error();
3696       }
3697     }
3698 
3699     if (!file_delete_status)
3700     {
3701       error= -1;
3702       sql_print_error("While rebuilding index file %s: "
3703                       "Failed to delete the existing index file. It could be "
3704                       "that file is being used by some other process.",
3705                       index_file_name);
3706       /*
3707         Delete Crash safe file index file here and recover the binlog.index
3708         state(index_file io_cache) from old binlog.index content.
3709        */
3710       mysql_file_delete(key_file_binlog_index, crash_safe_index_file_name,
3711                         MYF(0));
3712 
3713       goto recoverable_err;
3714     }
3715   }
3716 
3717   DBUG_EXECUTE_IF("crash_create_before_rename_index_file", DBUG_SUICIDE(););
3718   /*
3719     Sometimes an outsider can lock index files for temporary viewing
3720     purpose. For eg: MEB locks binlog.index/relaylog.index to view
3721     the content of the file. During that small period of time, rename
3722     of the file is not possible on some platforms(Eg: Windows)
3723     Server should retry the rename operation for few times instead of panicking
3724     immediately.
3725   */
3726   failure_trials = MYSQL_BIN_LOG::MAX_RETRIES_FOR_DELETE_RENAME_FAILURE;
3727   while ((file_rename_status == false) && (failure_trials > 0))
3728   {
3729     DBUG_EXECUTE_IF("simulate_crash_safe_index_file_rename_failure",
3730                 {
3731                   /* This simulation causes the rename to fail */
3732                   static char first_char= index_file_name[0];
3733                   index_file_name[0]= 0;
3734                   sql_print_information("Retrying rename");
3735                   if (failure_trials == 1)
3736                     index_file_name[0]= first_char;
3737                 };);
3738     file_rename_status =
3739         !(my_rename(crash_safe_index_file_name, index_file_name, MYF(MY_WME)));
3740     --failure_trials;
3741     if (!file_rename_status)
3742     {
3743       my_sleep(1000);
3744       /* Clear the error before retrying. */
3745       if (failure_trials > 0)
3746         thd->clear_error();
3747     }
3748   }
3749   if (!file_rename_status)
3750   {
3751     error= -1;
3752     sql_print_error("While rebuilding index file %s: "
3753                     "Failed to rename the new index file to the existing "
3754                     "index file.", index_file_name);
3755     goto fatal_err;
3756   }
3757   DBUG_EXECUTE_IF("crash_create_after_rename_index_file", DBUG_SUICIDE(););
3758 
3759 recoverable_err:
3760   if ((fd= mysql_file_open(key_file_binlog_index,
3761                            index_file_name,
3762                            O_RDWR | O_CREAT | O_BINARY,
3763                            MYF(MY_WME))) < 0 ||
3764            mysql_file_sync(fd, MYF(MY_WME)) ||
3765            init_io_cache(&index_file, fd, IO_SIZE, READ_CACHE,
3766                          mysql_file_seek(fd, 0L, MY_SEEK_END, MYF(0)),
3767                                          0, MYF(MY_WME | MY_WAIT_IF_FULL)))
3768   {
3769     sql_print_error("After rebuilding the index file %s: "
3770                     "Failed to open the index file.", index_file_name);
3771     goto fatal_err;
3772   }
3773 
3774   if (need_lock_index)
3775     mysql_mutex_unlock(&LOCK_index);
3776   DBUG_RETURN(error);
3777 
3778 fatal_err:
3779   /*
3780     This situation is very very rare to happen (unless there is some serious
3781     memory related issues like OOM) and should be treated as fatal error.
3782     Hence it is better to bring down the server without respecting
3783     'binlog_error_action' value here.
3784   */
3785   exec_binlog_error_action_abort("MySQL server failed to update the "
3786                                  "binlog.index file's content properly. "
3787                                  "It might not be in sync with available "
3788                                  "binlogs and the binlog.index file state is in "
3789                                  "unrecoverable state. Aborting the server.");
3790   /*
3791     Server is aborted in the above function.
3792     This is dead code to make compiler happy.
3793    */
3794   DBUG_RETURN(error);
3795 }
3796 
3797 
3798 /**
3799   Append log file name to index file.
3800 
3801   - To make crash safe, we copy all the content of index file
3802   to crash safe index file firstly and then append the log
3803   file name to the crash safe index file. Finally move the
3804   crash safe index file to index file.
3805 
3806   @retval
3807     0   ok
3808   @retval
3809     -1   error
3810 */
add_log_to_index(uchar * log_name,int log_name_len,bool need_lock_index)3811 int MYSQL_BIN_LOG::add_log_to_index(uchar* log_name,
3812                                     int log_name_len, bool need_lock_index)
3813 {
3814   DBUG_ENTER("MYSQL_BIN_LOG::add_log_to_index");
3815 
3816   if (open_crash_safe_index_file())
3817   {
3818     sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
3819                     "open the crash safe index file.");
3820     goto err;
3821   }
3822 
3823   if (copy_file(&index_file, &crash_safe_index_file, 0))
3824   {
3825     sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
3826                     "copy index file to crash safe index file.");
3827     goto err;
3828   }
3829 
3830   if (my_b_write(&crash_safe_index_file, log_name, log_name_len) ||
3831       my_b_write(&crash_safe_index_file, (uchar*) "\n", 1) ||
3832       flush_io_cache(&crash_safe_index_file) ||
3833       mysql_file_sync(crash_safe_index_file.file, MYF(MY_WME)))
3834   {
3835     sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
3836                     "append log file name: %s, to crash "
3837                     "safe index file.", log_name);
3838     goto err;
3839   }
3840 
3841   if (close_crash_safe_index_file())
3842   {
3843     sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
3844                     "close the crash safe index file.");
3845     goto err;
3846   }
3847 
3848   if (move_crash_safe_index_file_to_index_file(need_lock_index))
3849   {
3850     sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
3851                     "move crash safe index file to index file.");
3852     goto err;
3853   }
3854 
3855   DBUG_RETURN(0);
3856 
3857 err:
3858   DBUG_RETURN(-1);
3859 }
3860 
get_current_log(LOG_INFO * linfo,bool need_lock_log)3861 int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo, bool need_lock_log/*true*/)
3862 {
3863   if (need_lock_log)
3864     mysql_mutex_lock(&LOCK_log);
3865   int ret = raw_get_current_log(linfo);
3866   if (need_lock_log)
3867     mysql_mutex_unlock(&LOCK_log);
3868   return ret;
3869 }
3870 
raw_get_current_log(LOG_INFO * linfo)3871 int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
3872 {
3873   strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1);
3874   linfo->pos = my_b_safe_tell(&log_file);
3875   return 0;
3876 }
3877 
check_write_error_code(uint error_code)3878 static bool check_write_error_code(uint error_code)
3879 {
3880   return error_code == ER_TRANS_CACHE_FULL ||
3881          error_code == ER_STMT_CACHE_FULL  ||
3882          error_code == ER_ERROR_ON_WRITE   ||
3883          error_code == ER_BINLOG_LOGGING_IMPOSSIBLE;
3884 }
3885 
check_write_error(THD * thd)3886 bool MYSQL_BIN_LOG::check_write_error(THD *thd)
3887 {
3888   DBUG_ENTER("MYSQL_BIN_LOG::check_write_error");
3889 
3890   if (!thd->is_error())
3891     DBUG_RETURN(false);
3892 
3893   bool checked= check_write_error_code(thd->get_stmt_da()->sql_errno());
3894 
3895   if (!checked)
3896   {
3897     /* Check all conditions for one that matches the expected error */
3898     const Sql_condition *err;
3899     Diagnostics_area::Sql_condition_iterator it=
3900       thd->get_stmt_da()->sql_conditions();
3901     while ((err= it++) != NULL && !checked)
3902     {
3903       checked= check_write_error_code(err->get_sql_errno());
3904     }
3905   }
3906   DBUG_PRINT("return", ("checked: %s", YESNO(checked)));
3907   DBUG_RETURN(checked);
3908 }
3909 
set_write_error(THD * thd,bool is_transactional)3910 void MYSQL_BIN_LOG::set_write_error(THD *thd, bool is_transactional)
3911 {
3912   DBUG_ENTER("MYSQL_BIN_LOG::set_write_error");
3913 
3914   write_error= 1;
3915 
3916   if (check_write_error(thd))
3917     DBUG_VOID_RETURN;
3918 
3919   if (my_errno == EFBIG)
3920   {
3921     if (is_transactional)
3922     {
3923       my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(MY_WME));
3924     }
3925     else
3926     {
3927       my_message(ER_STMT_CACHE_FULL, ER(ER_STMT_CACHE_FULL), MYF(MY_WME));
3928     }
3929   }
3930   else
3931   {
3932     char errbuf[MYSYS_STRERROR_SIZE];
3933     my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), name,
3934              errno, my_strerror(errbuf, sizeof(errbuf), errno));
3935   }
3936 
3937   DBUG_VOID_RETURN;
3938 }
3939 
3940 /**
3941   Find the position in the log-index-file for the given log name.
3942 
3943   @param[out] linfo The found log file name will be stored here, along
3944   with the byte offset of the next log file name in the index file.
3945   @param log_name Filename to find in the index file, or NULL if we
3946   want to read the first entry.
3947   @param need_lock_index If false, this function acquires LOCK_index;
3948   otherwise the lock should already be held by the caller.
3949 
3950   @note
3951     On systems without the truncate function the file will end with one or
3952     more empty lines.  These will be ignored when reading the file.
3953 
3954   @retval
3955     0			ok
3956   @retval
3957     LOG_INFO_EOF	        End of log-index-file found
3958   @retval
3959     LOG_INFO_IO		Got IO error while reading file
3960 */
3961 
find_log_pos(LOG_INFO * linfo,const char * log_name,bool need_lock_index)3962 int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
3963                                 bool need_lock_index)
3964 {
3965   int error= 0;
3966   char *full_fname= linfo->log_file_name;
3967   char full_log_name[FN_REFLEN], fname[FN_REFLEN];
3968   uint log_name_len= 0, fname_len= 0;
3969   DBUG_ENTER("find_log_pos");
3970   full_log_name[0]= full_fname[0]= 0;
3971 
3972   /*
3973     Mutex needed because we need to make sure the file pointer does not
3974     move from under our feet
3975   */
3976   if (need_lock_index)
3977     mysql_mutex_lock(&LOCK_index);
3978   else
3979     mysql_mutex_assert_owner(&LOCK_index);
3980 
3981   if (!my_b_inited(&index_file))
3982   {
3983       error= LOG_INFO_IO;
3984       goto end;
3985   }
3986 
3987   // extend relative paths for log_name to be searched
3988   if (log_name)
3989   {
3990     if(normalize_binlog_name(full_log_name, log_name, is_relay_log))
3991     {
3992       error= LOG_INFO_EOF;
3993       goto end;
3994     }
3995   }
3996 
3997   log_name_len= log_name ? (uint) strlen(full_log_name) : 0;
3998   DBUG_PRINT("enter", ("log_name: %s, full_log_name: %s",
3999                        log_name ? log_name : "NULL", full_log_name));
4000 
4001   /* As the file is flushed, we can't get an error here */
4002   my_b_seek(&index_file, (my_off_t) 0);
4003 
4004   for (;;)
4005   {
4006     uint length;
4007     my_off_t offset= my_b_tell(&index_file);
4008 
4009     DBUG_EXECUTE_IF("simulate_find_log_pos_error",
4010                     error=  LOG_INFO_EOF; break;);
4011     /* If we get 0 or 1 characters, this is the end of the file */
4012     if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
4013     {
4014       /* Did not find the given entry; Return not found or error */
4015       error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
4016       break;
4017     }
4018 
4019     // extend relative paths and match against full path
4020     if (normalize_binlog_name(full_fname, fname, is_relay_log))
4021     {
4022       error= LOG_INFO_EOF;
4023       break;
4024     }
4025     fname_len= (uint) strlen(full_fname);
4026 
4027     // if the log entry matches, null string matching anything
4028     if (!log_name ||
4029        (log_name_len == fname_len-1 && full_fname[log_name_len] == '\n' &&
4030         !strncmp(full_fname, full_log_name, log_name_len)))
4031     {
4032       DBUG_PRINT("info", ("Found log file entry"));
4033       full_fname[fname_len-1]= 0;                      // remove last \n
4034       linfo->index_file_start_offset= offset;
4035       linfo->index_file_offset = my_b_tell(&index_file);
4036       break;
4037     }
4038     linfo->entry_index++;
4039   }
4040 
4041 end:
4042   if (need_lock_index)
4043     mysql_mutex_unlock(&LOCK_index);
4044   DBUG_RETURN(error);
4045 }
4046 
4047 
4048 /**
4049   Find the position in the log-index-file for the given log name.
4050 
4051   @param[out] linfo The filename will be stored here, along with the
4052   byte offset of the next filename in the index file.
4053 
4054   @param need_lock_index If true, LOCK_index will be acquired;
4055   otherwise it should already be held by the caller.
4056 
4057   @note
4058     - Before calling this function, one has to call find_log_pos()
4059     to set up 'linfo'
4060     - Mutex needed because we need to make sure the file pointer does not move
4061     from under our feet
4062 
4063   @retval 0 ok
4064   @retval LOG_INFO_EOF End of log-index-file found
4065   @retval LOG_INFO_IO Got IO error while reading file
4066 */
find_next_log(LOG_INFO * linfo,bool need_lock_index)4067 int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock_index)
4068 {
4069   int error= 0;
4070   uint length;
4071   char fname[FN_REFLEN];
4072   char *full_fname= linfo->log_file_name;
4073 
4074   if (need_lock_index)
4075     mysql_mutex_lock(&LOCK_index);
4076   else
4077     mysql_mutex_assert_owner(&LOCK_index);
4078 
4079   if (!my_b_inited(&index_file))
4080   {
4081       error= LOG_INFO_IO;
4082       goto err;
4083   }
4084   /* As the file is flushed, we can't get an error here */
4085   my_b_seek(&index_file, linfo->index_file_offset);
4086 
4087   linfo->index_file_start_offset= linfo->index_file_offset;
4088   if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
4089   {
4090     error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
4091     goto err;
4092   }
4093 
4094   if (fname[0] != 0)
4095   {
4096     if(normalize_binlog_name(full_fname, fname, is_relay_log))
4097     {
4098       error= LOG_INFO_EOF;
4099       goto err;
4100     }
4101     length= strlen(full_fname);
4102   }
4103 
4104   full_fname[length-1]= 0;                     // kill \n
4105   linfo->index_file_offset= my_b_tell(&index_file);
4106 
4107 err:
4108   if (need_lock_index)
4109     mysql_mutex_unlock(&LOCK_index);
4110   return error;
4111 }
4112 
4113 
4114 /**
4115   Removes files, as part of a RESET MASTER or RESET SLAVE statement,
4116   by deleting all logs refered to in the index file. Then, it starts
4117   writing to a new log file.
4118 
4119   The new index file will only contain this file.
4120 
4121   @param thd Thread
4122 
4123   @note
4124     If not called from slave thread, write start event to new log
4125 
4126   @retval
4127     0	ok
4128   @retval
4129     1   error
4130 */
reset_logs(THD * thd)4131 bool MYSQL_BIN_LOG::reset_logs(THD* thd)
4132 {
4133   LOG_INFO linfo;
4134   bool error=0;
4135   int err;
4136   const char* save_name;
4137   DBUG_ENTER("reset_logs");
4138 
4139   /*
4140     Flush logs for storage engines, so that the last transaction
4141     is fsynced inside storage engines.
4142   */
4143   if (ha_flush_logs(NULL))
4144     DBUG_RETURN(1);
4145 
4146   ha_reset_logs(thd);
4147 
4148   /*
4149     We need to get both locks to be sure that no one is trying to
4150     write to the index log file.
4151   */
4152   mysql_mutex_lock(&LOCK_log);
4153   mysql_mutex_lock(&LOCK_index);
4154 
4155   /*
4156     The following mutex is needed to ensure that no threads call
4157     'delete thd' as we would then risk missing a 'rollback' from this
4158     thread. If the transaction involved MyISAM tables, it should go
4159     into binlog even on rollback.
4160   */
4161   mysql_mutex_lock(&LOCK_thread_count);
4162 
4163   global_sid_lock->wrlock();
4164 
4165   /* Save variables so that we can reopen the log */
4166   save_name=name;
4167   name=0;					// Protect against free
4168   close(LOG_CLOSE_TO_BE_OPENED, false/*need_lock_log=false*/,
4169         false/*need_lock_index=false*/);
4170 
4171   /*
4172     First delete all old log files and then update the index file.
4173     As we first delete the log files and do not use sort of logging,
4174     a crash may lead to an inconsistent state where the index has
4175     references to non-existent files.
4176 
4177     We need to invert the steps and use the purge_index_file methods
4178     in order to make the operation safe.
4179   */
4180 
4181   if ((err= find_log_pos(&linfo, NullS, false/*need_lock_index=false*/)) != 0)
4182   {
4183     uint errcode= purge_log_get_error_code(err);
4184     sql_print_error("Failed to locate old binlog or relay log files");
4185     my_message(errcode, ER(errcode), MYF(0));
4186     error= 1;
4187     goto err;
4188   }
4189 
4190   for (;;)
4191   {
4192     if ((error= my_delete_allow_opened(linfo.log_file_name, MYF(0))) != 0)
4193     {
4194       if (my_errno == ENOENT)
4195       {
4196         push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
4197                             ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
4198                             linfo.log_file_name);
4199         sql_print_information("Failed to delete file '%s'",
4200                               linfo.log_file_name);
4201         my_errno= 0;
4202         error= 0;
4203       }
4204       else
4205       {
4206         push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
4207                             ER_BINLOG_PURGE_FATAL_ERR,
4208                             "a problem with deleting %s; "
4209                             "consider examining correspondence "
4210                             "of your binlog index file "
4211                             "to the actual binlog files",
4212                             linfo.log_file_name);
4213         error= 1;
4214         goto err;
4215       }
4216     }
4217     if (find_next_log(&linfo, false/*need_lock_index=false*/))
4218       break;
4219   }
4220 
4221   /* Start logging with a new file */
4222   close(LOG_CLOSE_INDEX | LOG_CLOSE_TO_BE_OPENED,
4223         false/*need_lock_log=false*/,
4224         false/*need_lock_index=false*/);
4225   if ((error= my_delete_allow_opened(index_file_name, MYF(0))))	// Reset (open will update)
4226   {
4227     if (my_errno == ENOENT)
4228     {
4229       push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
4230                           ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
4231                           index_file_name);
4232       sql_print_information("Failed to delete file '%s'",
4233                             index_file_name);
4234       my_errno= 0;
4235       error= 0;
4236     }
4237     else
4238     {
4239       push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
4240                           ER_BINLOG_PURGE_FATAL_ERR,
4241                           "a problem with deleting %s; "
4242                           "consider examining correspondence "
4243                           "of your binlog index file "
4244                           "to the actual binlog files",
4245                           index_file_name);
4246       error= 1;
4247       goto err;
4248     }
4249   }
4250 
4251 #ifdef HAVE_REPLICATION
4252   if (is_relay_log)
4253   {
4254     DBUG_ASSERT(active_mi != NULL);
4255     DBUG_ASSERT(active_mi->rli != NULL);
4256     (const_cast<Gtid_set *>(active_mi->rli->get_gtid_set()))->clear();
4257   }
4258   else
4259   {
4260     gtid_state->clear();
4261     // don't clear global_sid_map because it's used by the relay log too
4262     if (gtid_state->init() != 0)
4263       goto err;
4264   }
4265 #endif
4266 
4267   if (!open_index_file(index_file_name, 0, false/*need_lock_index=false*/))
4268     if ((error= open_binlog(save_name, 0, io_cache_type,
4269                             max_size, false,
4270                             false/*need_lock_log=false*/,
4271                             false/*need_lock_index=false*/,
4272                             false/*need_sid_lock=false*/,
4273                             NULL)))
4274       goto err;
4275   my_free((void *) save_name);
4276 
4277 err:
4278   if (error == 1)
4279     name= const_cast<char*>(save_name);
4280   global_sid_lock->unlock();
4281   mysql_mutex_unlock(&LOCK_thread_count);
4282   mysql_mutex_unlock(&LOCK_index);
4283   mysql_mutex_unlock(&LOCK_log);
4284   DBUG_RETURN(error);
4285 }
4286 
4287 
4288 /**
4289   Set the name of crash safe index file.
4290 
4291   @retval
4292     0   ok
4293   @retval
4294     1   error
4295 */
set_crash_safe_index_file_name(const char * base_file_name)4296 int MYSQL_BIN_LOG::set_crash_safe_index_file_name(const char *base_file_name)
4297 {
4298   int error= 0;
4299   DBUG_ENTER("MYSQL_BIN_LOG::set_crash_safe_index_file_name");
4300   if (fn_format(crash_safe_index_file_name, base_file_name, mysql_data_home,
4301                 ".index_crash_safe", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
4302                                          MY_REPLACE_EXT)) == NULL)
4303   {
4304     error= 1;
4305     sql_print_error("MYSQL_BIN_LOG::set_crash_safe_index_file_name failed "
4306                     "to set file name.");
4307   }
4308   DBUG_RETURN(error);
4309 }
4310 
4311 
4312 /**
4313   Open a (new) crash safe index file.
4314 
4315   @note
4316     The crash safe index file is a special file
4317     used for guaranteeing index file crash safe.
4318   @retval
4319     0   ok
4320   @retval
4321     1   error
4322 */
open_crash_safe_index_file()4323 int MYSQL_BIN_LOG::open_crash_safe_index_file()
4324 {
4325   int error= 0;
4326   File file= -1;
4327 
4328   DBUG_ENTER("MYSQL_BIN_LOG::open_crash_safe_index_file");
4329 
4330   if (!my_b_inited(&crash_safe_index_file))
4331   {
4332     if ((file= my_open(crash_safe_index_file_name, O_RDWR | O_CREAT | O_BINARY,
4333                        MYF(MY_WME | ME_WAITTANG))) < 0  ||
4334         init_io_cache(&crash_safe_index_file, file, IO_SIZE, WRITE_CACHE,
4335                       0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
4336     {
4337       error= 1;
4338       sql_print_error("MYSQL_BIN_LOG::open_crash_safe_index_file failed "
4339                       "to open temporary index file.");
4340     }
4341   }
4342   DBUG_RETURN(error);
4343 }
4344 
4345 
4346 /**
4347   Close the crash safe index file.
4348 
4349   @note
4350     The crash safe file is just closed, is not deleted.
4351     Because it is moved to index file later on.
4352   @retval
4353     0   ok
4354   @retval
4355     1   error
4356 */
close_crash_safe_index_file()4357 int MYSQL_BIN_LOG::close_crash_safe_index_file()
4358 {
4359   int error= 0;
4360 
4361   DBUG_ENTER("MYSQL_BIN_LOG::close_crash_safe_index_file");
4362 
4363   if (my_b_inited(&crash_safe_index_file))
4364   {
4365     end_io_cache(&crash_safe_index_file);
4366     error= my_close(crash_safe_index_file.file, MYF(0));
4367   }
4368   memset(&crash_safe_index_file, 0, sizeof(crash_safe_index_file));
4369 
4370   DBUG_RETURN(error);
4371 }
4372 
4373 
4374 /**
4375   Delete relay log files prior to rli->group_relay_log_name
4376   (i.e. all logs which are not involved in a non-finished group
4377   (transaction)), remove them from the index file and start on next
4378   relay log.
4379 
4380   IMPLEMENTATION
4381 
4382   - You must hold rli->data_lock before calling this function, since
4383     it writes group_relay_log_pos and similar fields of
4384     Relay_log_info.
4385   - Protects index file with LOCK_index
4386   - Delete relevant relay log files
4387   - Copy all file names after these ones to the front of the index file
4388   - If the OS has truncate, truncate the file, else fill it with \n'
4389   - Read the next file name from the index file and store in rli->linfo
4390 
4391   @param rli	       Relay log information
4392   @param included     If false, all relay logs that are strictly before
4393                       rli->group_relay_log_name are deleted ; if true, the
4394                       latter is deleted too (i.e. all relay logs
4395                       read by the SQL slave thread are deleted).
4396 
4397   @note
4398     - This is only called from the slave SQL thread when it has read
4399     all commands from a relay log and want to switch to a new relay log.
4400     - When this happens, we can be in an active transaction as
4401     a transaction can span over two relay logs
4402     (although it is always written as a single block to the master's binary
4403     log, hence cannot span over two master's binary logs).
4404 
4405   @retval
4406     0			ok
4407   @retval
4408     LOG_INFO_EOF	        End of log-index-file found
4409   @retval
4410     LOG_INFO_SEEK	Could not allocate IO cache
4411   @retval
4412     LOG_INFO_IO		Got IO error while reading file
4413 */
4414 
4415 #ifdef HAVE_REPLICATION
4416 
purge_first_log(Relay_log_info * rli,bool included)4417 int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
4418 {
4419   int error;
4420   char *to_purge_if_included= NULL;
4421   DBUG_ENTER("purge_first_log");
4422 
4423   DBUG_ASSERT(current_thd->system_thread == SYSTEM_THREAD_SLAVE_SQL);
4424   DBUG_ASSERT(is_relay_log);
4425   DBUG_ASSERT(is_open());
4426   DBUG_ASSERT(rli->slave_running == 1);
4427   DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->get_event_relay_log_name()));
4428 
4429   mysql_mutex_assert_owner(&rli->data_lock);
4430 
4431   mysql_mutex_lock(&LOCK_index);
4432   to_purge_if_included= my_strdup(rli->get_group_relay_log_name(), MYF(0));
4433 
4434   /*
4435     Read the next log file name from the index file and pass it back to
4436     the caller.
4437   */
4438   if((error=find_log_pos(&rli->linfo, rli->get_event_relay_log_name(),
4439                          false/*need_lock_index=false*/)) ||
4440      (error=find_next_log(&rli->linfo, false/*need_lock_index=false*/)))
4441   {
4442     char buff[22];
4443     sql_print_error("next log error: %d  offset: %s  log: %s included: %d",
4444                     error,
4445                     llstr(rli->linfo.index_file_offset,buff),
4446                     rli->get_event_relay_log_name(),
4447                     included);
4448     goto err;
4449   }
4450 
4451   /*
4452     Reset rli's coordinates to the current log.
4453   */
4454   rli->set_event_relay_log_pos(BIN_LOG_HEADER_SIZE);
4455   rli->set_event_relay_log_name(rli->linfo.log_file_name);
4456 
4457   /*
4458     If we removed the rli->group_relay_log_name file,
4459     we must update the rli->group* coordinates, otherwise do not touch it as the
4460     group's execution is not finished (e.g. COMMIT not executed)
4461   */
4462   if (included)
4463   {
4464     rli->set_group_relay_log_pos(BIN_LOG_HEADER_SIZE);
4465     rli->set_group_relay_log_name(rli->linfo.log_file_name);
4466     rli->notify_group_relay_log_name_update();
4467   }
4468   /*
4469     Store where we are in the new file for the execution thread.
4470     If we are in the middle of a group), then we should not store
4471     the position in the repository, instead in that case set a flag
4472     to true which indicates that a 'forced flush' is postponed due
4473     to transaction split across the relaylogs.
4474   */
4475   if (!rli->is_in_group())
4476     rli->flush_info(TRUE);
4477   else
4478     rli->force_flush_postponed_due_to_split_trans= true;
4479 
4480   DBUG_EXECUTE_IF("crash_before_purge_logs", DBUG_SUICIDE(););
4481 
4482   mysql_mutex_lock(&rli->log_space_lock);
4483   rli->relay_log.purge_logs(to_purge_if_included, included,
4484                             false/*need_lock_index=false*/,
4485                             false/*need_update_threads=false*/,
4486                             &rli->log_space_total, true);
4487   // Tell the I/O thread to take the relay_log_space_limit into account
4488   rli->ignore_log_space_limit= 0;
4489   mysql_mutex_unlock(&rli->log_space_lock);
4490 
4491   /*
4492     Ok to broadcast after the critical region as there is no risk of
4493     the mutex being destroyed by this thread later - this helps save
4494     context switches
4495   */
4496   mysql_cond_broadcast(&rli->log_space_cond);
4497 
4498   /*
4499    * Need to update the log pos because purge logs has been called
4500    * after fetching initially the log pos at the begining of the method.
4501    */
4502   if((error=find_log_pos(&rli->linfo, rli->get_event_relay_log_name(),
4503                          false/*need_lock_index=false*/)))
4504   {
4505     char buff[22];
4506     sql_print_error("next log error: %d  offset: %s  log: %s included: %d",
4507                     error,
4508                     llstr(rli->linfo.index_file_offset,buff),
4509                     rli->get_group_relay_log_name(),
4510                     included);
4511     goto err;
4512   }
4513 
4514   /* If included was passed, rli->linfo should be the first entry. */
4515   DBUG_ASSERT(!included || rli->linfo.index_file_start_offset == 0);
4516 
4517 err:
4518   my_free(to_purge_if_included);
4519   mysql_mutex_unlock(&LOCK_index);
4520   DBUG_RETURN(error);
4521 }
4522 
4523 
4524 /**
4525   Remove logs from index file.
4526 
4527   - To make crash safe, we copy the content of index file
4528   from index_file_start_offset recored in log_info to
4529   crash safe index file firstly and then move the crash
4530   safe index file to index file.
4531 
4532   @param linfo                  Store here the found log file name and
4533                                 position to the NEXT log file name in
4534                                 the index file.
4535 
4536   @param need_update_threads    If we want to update the log coordinates
4537                                 of all threads. False for relay logs,
4538                                 true otherwise.
4539 
4540   @retval
4541     0    ok
4542   @retval
4543     LOG_INFO_IO    Got IO error while reading/writing file
4544 */
remove_logs_from_index(LOG_INFO * log_info,bool need_update_threads)4545 int MYSQL_BIN_LOG::remove_logs_from_index(LOG_INFO* log_info, bool need_update_threads)
4546 {
4547   if (open_crash_safe_index_file())
4548   {
4549     sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
4550                     "open the crash safe index file.");
4551     goto err;
4552   }
4553 
4554   if (copy_file(&index_file, &crash_safe_index_file,
4555                 log_info->index_file_start_offset))
4556   {
4557     sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
4558                     "copy index file to crash safe index file.");
4559     goto err;
4560   }
4561 
4562   if (close_crash_safe_index_file())
4563   {
4564     sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
4565                     "close the crash safe index file.");
4566     goto err;
4567   }
4568   DBUG_EXECUTE_IF("fault_injection_copy_part_file", DBUG_SUICIDE(););
4569 
4570   if (move_crash_safe_index_file_to_index_file(false/*need_lock_index=false*/))
4571   {
4572     sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
4573                     "move crash safe index file to index file.");
4574     goto err;
4575   }
4576 
4577   // now update offsets in index file for running threads
4578   if (need_update_threads)
4579     adjust_linfo_offsets(log_info->index_file_start_offset);
4580   return 0;
4581 
4582 err:
4583   return LOG_INFO_IO;
4584 }
4585 
4586 /**
4587   Remove all logs before the given log from disk and from the index file.
4588 
4589   @param to_log	      Delete all log file name before this file.
4590   @param included            If true, to_log is deleted too.
4591   @param need_lock_index
4592   @param need_update_threads If we want to update the log coordinates of
4593                              all threads. False for relay logs, true otherwise.
4594   @param freed_log_space     If not null, decrement this variable of
4595                              the amount of log space freed
4596   @param auto_purge          True if this is an automatic purge.
4597 
4598   @note
4599     If any of the logs before the deleted one is in use,
4600     only purge logs up to this one.
4601 
4602   @retval
4603     0			ok
4604   @retval
4605     LOG_INFO_EOF		to_log not found
4606     LOG_INFO_EMFILE             too many files opened
4607     LOG_INFO_FATAL              if any other than ENOENT error from
4608                                 mysql_file_stat() or mysql_file_delete()
4609 */
4610 
purge_logs(const char * to_log,bool included,bool need_lock_index,bool need_update_threads,ulonglong * decrease_log_space,bool auto_purge)4611 int MYSQL_BIN_LOG::purge_logs(const char *to_log,
4612                               bool included,
4613                               bool need_lock_index,
4614                               bool need_update_threads,
4615                               ulonglong *decrease_log_space,
4616                               bool auto_purge)
4617 {
4618   int error= 0, no_of_log_files_to_purge= 0, no_of_log_files_purged= 0;
4619   int no_of_threads_locking_log= 0;
4620   bool exit_loop= 0;
4621   LOG_INFO log_info;
4622   THD *thd= current_thd;
4623   DBUG_ENTER("purge_logs");
4624   DBUG_PRINT("info",("to_log= %s",to_log));
4625 
4626   if (need_lock_index)
4627     mysql_mutex_lock(&LOCK_index);
4628   else
4629     mysql_mutex_assert_owner(&LOCK_index);
4630   if ((error=find_log_pos(&log_info, to_log, false/*need_lock_index=false*/)))
4631   {
4632     sql_print_error("MYSQL_BIN_LOG::purge_logs was called with file %s not "
4633                     "listed in the index.", to_log);
4634     goto err;
4635   }
4636 
4637   no_of_log_files_to_purge= log_info.entry_index;
4638 
4639   if ((error= open_purge_index_file(TRUE)))
4640   {
4641     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to sync the index file.");
4642     goto err;
4643   }
4644 
4645   /*
4646     File name exists in index file; delete until we find this file
4647     or a file that is used.
4648   */
4649   if ((error=find_log_pos(&log_info, NullS, false/*need_lock_index=false*/)))
4650     goto err;
4651 
4652   while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)))
4653   {
4654     if(is_active(log_info.log_file_name))
4655     {
4656       if(!auto_purge)
4657         push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4658                             ER_WARN_PURGE_LOG_IS_ACTIVE,
4659                             ER(ER_WARN_PURGE_LOG_IS_ACTIVE),
4660                             log_info.log_file_name);
4661       break;
4662     }
4663 
4664     if ((no_of_threads_locking_log= log_in_use(log_info.log_file_name)))
4665     {
4666       if(!auto_purge)
4667         push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4668                             ER_WARN_PURGE_LOG_IN_USE,
4669                             ER(ER_WARN_PURGE_LOG_IN_USE),
4670                             log_info.log_file_name,  no_of_threads_locking_log,
4671                             no_of_log_files_purged, no_of_log_files_to_purge);
4672       break;
4673     }
4674     no_of_log_files_purged++;
4675 
4676     if ((error= register_purge_index_entry(log_info.log_file_name)))
4677     {
4678       sql_print_error("MYSQL_BIN_LOG::purge_logs failed to copy %s to register file.",
4679                       log_info.log_file_name);
4680       goto err;
4681     }
4682 
4683     if (find_next_log(&log_info, false/*need_lock_index=false*/) || exit_loop)
4684       break;
4685   }
4686 
4687   DBUG_EXECUTE_IF("crash_purge_before_update_index", DBUG_SUICIDE(););
4688 
4689   if ((error= sync_purge_index_file()))
4690   {
4691     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to flush register file.");
4692     goto err;
4693   }
4694 
4695   /* We know how many files to delete. Update index file. */
4696   if ((error=remove_logs_from_index(&log_info, need_update_threads)))
4697   {
4698     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to update the index file");
4699     goto err;
4700   }
4701 
4702   // Update gtid_state->lost_gtids
4703   if (gtid_mode > 0 && !is_relay_log)
4704   {
4705     global_sid_lock->wrlock();
4706     error= init_gtid_sets(NULL,
4707                        const_cast<Gtid_set *>(gtid_state->get_lost_gtids()),
4708                        NULL,
4709                        opt_master_verify_checksum,
4710                        false/*false=don't need lock*/);
4711     global_sid_lock->unlock();
4712     if (error)
4713       goto err;
4714   }
4715 
4716   DBUG_EXECUTE_IF("crash_purge_critical_after_update_index", DBUG_SUICIDE(););
4717 
4718 err:
4719 
4720   int error_index= 0, close_error_index= 0;
4721   /* Read each entry from purge_index_file and delete the file. */
4722   if (!error && is_inited_purge_index_file() &&
4723       (error_index= purge_index_entry(thd, decrease_log_space, false/*need_lock_index=false*/)))
4724     sql_print_error("MYSQL_BIN_LOG::purge_logs failed to process registered files"
4725                     " that would be purged.");
4726 
4727   close_error_index= close_purge_index_file();
4728 
4729   DBUG_EXECUTE_IF("crash_purge_non_critical_after_update_index", DBUG_SUICIDE(););
4730 
4731   if (need_lock_index)
4732     mysql_mutex_unlock(&LOCK_index);
4733 
4734   /*
4735     Error codes from purge logs take precedence.
4736     Then error codes from purging the index entry.
4737     Finally, error codes from closing the purge index file.
4738   */
4739   error= error ? error : (error_index ? error_index :
4740                           close_error_index);
4741 
4742   DBUG_RETURN(error);
4743 }
4744 
set_purge_index_file_name(const char * base_file_name)4745 int MYSQL_BIN_LOG::set_purge_index_file_name(const char *base_file_name)
4746 {
4747   int error= 0;
4748   DBUG_ENTER("MYSQL_BIN_LOG::set_purge_index_file_name");
4749   if (fn_format(purge_index_file_name, base_file_name, mysql_data_home,
4750                 ".~rec~", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
4751                               MY_REPLACE_EXT)) == NULL)
4752   {
4753     error= 1;
4754     sql_print_error("MYSQL_BIN_LOG::set_purge_index_file_name failed to set "
4755                       "file name.");
4756   }
4757   DBUG_RETURN(error);
4758 }
4759 
open_purge_index_file(bool destroy)4760 int MYSQL_BIN_LOG::open_purge_index_file(bool destroy)
4761 {
4762   int error= 0;
4763   File file= -1;
4764 
4765   DBUG_ENTER("MYSQL_BIN_LOG::open_purge_index_file");
4766 
4767   if (destroy)
4768     close_purge_index_file();
4769 
4770   if (!my_b_inited(&purge_index_file))
4771   {
4772     if ((file= my_open(purge_index_file_name, O_RDWR | O_CREAT | O_BINARY,
4773                        MYF(MY_WME | ME_WAITTANG))) < 0  ||
4774         init_io_cache(&purge_index_file, file, IO_SIZE,
4775                       (destroy ? WRITE_CACHE : READ_CACHE),
4776                       0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
4777     {
4778       error= 1;
4779       sql_print_error("MYSQL_BIN_LOG::open_purge_index_file failed to open register "
4780                       " file.");
4781     }
4782   }
4783   DBUG_RETURN(error);
4784 }
4785 
close_purge_index_file()4786 int MYSQL_BIN_LOG::close_purge_index_file()
4787 {
4788   int error= 0;
4789 
4790   DBUG_ENTER("MYSQL_BIN_LOG::close_purge_index_file");
4791 
4792   if (my_b_inited(&purge_index_file))
4793   {
4794     end_io_cache(&purge_index_file);
4795     error= my_close(purge_index_file.file, MYF(0));
4796   }
4797   my_delete(purge_index_file_name, MYF(0));
4798   memset(&purge_index_file, 0, sizeof(purge_index_file));
4799 
4800   DBUG_RETURN(error);
4801 }
4802 
is_inited_purge_index_file()4803 bool MYSQL_BIN_LOG::is_inited_purge_index_file()
4804 {
4805   DBUG_ENTER("MYSQL_BIN_LOG::is_inited_purge_index_file");
4806   DBUG_RETURN (my_b_inited(&purge_index_file));
4807 }
4808 
sync_purge_index_file()4809 int MYSQL_BIN_LOG::sync_purge_index_file()
4810 {
4811   int error= 0;
4812   DBUG_ENTER("MYSQL_BIN_LOG::sync_purge_index_file");
4813 
4814   if ((error= flush_io_cache(&purge_index_file)) ||
4815       (error= my_sync(purge_index_file.file, MYF(MY_WME))))
4816     DBUG_RETURN(error);
4817 
4818   DBUG_RETURN(error);
4819 }
4820 
register_purge_index_entry(const char * entry)4821 int MYSQL_BIN_LOG::register_purge_index_entry(const char *entry)
4822 {
4823   int error= 0;
4824   DBUG_ENTER("MYSQL_BIN_LOG::register_purge_index_entry");
4825 
4826   if ((error=my_b_write(&purge_index_file, (const uchar*)entry, strlen(entry))) ||
4827       (error=my_b_write(&purge_index_file, (const uchar*)"\n", 1)))
4828     DBUG_RETURN (error);
4829 
4830   DBUG_RETURN(error);
4831 }
4832 
register_create_index_entry(const char * entry)4833 int MYSQL_BIN_LOG::register_create_index_entry(const char *entry)
4834 {
4835   DBUG_ENTER("MYSQL_BIN_LOG::register_create_index_entry");
4836   DBUG_RETURN(register_purge_index_entry(entry));
4837 }
4838 
purge_index_entry(THD * thd,ulonglong * decrease_log_space,bool need_lock_index)4839 int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *decrease_log_space,
4840                                      bool need_lock_index)
4841 {
4842   MY_STAT s;
4843   int error= 0;
4844   LOG_INFO log_info;
4845   LOG_INFO check_log_info;
4846 
4847   DBUG_ENTER("MYSQL_BIN_LOG:purge_index_entry");
4848 
4849   DBUG_ASSERT(my_b_inited(&purge_index_file));
4850 
4851   if ((error=reinit_io_cache(&purge_index_file, READ_CACHE, 0, 0, 0)))
4852   {
4853     sql_print_error("MYSQL_BIN_LOG::purge_index_entry failed to reinit register file "
4854                     "for read");
4855     goto err;
4856   }
4857 
4858   for (;;)
4859   {
4860     uint length;
4861 
4862     if ((length=my_b_gets(&purge_index_file, log_info.log_file_name,
4863                           FN_REFLEN)) <= 1)
4864     {
4865       if (purge_index_file.error)
4866       {
4867         error= purge_index_file.error;
4868         sql_print_error("MYSQL_BIN_LOG::purge_index_entry error %d reading from "
4869                         "register file.", error);
4870         goto err;
4871       }
4872 
4873       /* Reached EOF */
4874       break;
4875     }
4876 
4877     /* Get rid of the trailing '\n' */
4878     log_info.log_file_name[length-1]= 0;
4879 
4880     if (!mysql_file_stat(m_key_file_log, log_info.log_file_name, &s, MYF(0)))
4881     {
4882       if (my_errno == ENOENT)
4883       {
4884         /*
4885           It's not fatal if we can't stat a log file that does not exist;
4886           If we could not stat, we won't delete.
4887         */
4888         if (thd)
4889         {
4890           push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4891                               ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
4892                               log_info.log_file_name);
4893         }
4894         sql_print_information("Failed to execute mysql_file_stat on file '%s'",
4895 			      log_info.log_file_name);
4896         my_errno= 0;
4897       }
4898       else
4899       {
4900         /*
4901           Other than ENOENT are fatal
4902         */
4903         if (thd)
4904         {
4905           push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4906                               ER_BINLOG_PURGE_FATAL_ERR,
4907                               "a problem with getting info on being purged %s; "
4908                               "consider examining correspondence "
4909                               "of your binlog index file "
4910                               "to the actual binlog files",
4911                               log_info.log_file_name);
4912         }
4913         else
4914         {
4915           sql_print_information("Failed to delete log file '%s'; "
4916                                 "consider examining correspondence "
4917                                 "of your binlog index file "
4918                                 "to the actual binlog files",
4919                                 log_info.log_file_name);
4920         }
4921         error= LOG_INFO_FATAL;
4922         goto err;
4923       }
4924     }
4925     else
4926     {
4927       if ((error= find_log_pos(&check_log_info, log_info.log_file_name,
4928                                need_lock_index)))
4929       {
4930         if (error != LOG_INFO_EOF)
4931         {
4932           if (thd)
4933           {
4934             push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4935                                 ER_BINLOG_PURGE_FATAL_ERR,
4936                                 "a problem with deleting %s and "
4937                                 "reading the binlog index file",
4938                                 log_info.log_file_name);
4939           }
4940           else
4941           {
4942             sql_print_information("Failed to delete file '%s' and "
4943                                   "read the binlog index file",
4944                                   log_info.log_file_name);
4945           }
4946           goto err;
4947         }
4948 
4949         error= 0;
4950         if (!need_lock_index)
4951         {
4952           /*
4953             This is to avoid triggering an error in NDB.
4954 
4955             @todo: This is weird, what does NDB errors have to do with
4956             need_lock_index? Explain better or refactor /Sven
4957           */
4958           ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
4959         }
4960 
4961         DBUG_PRINT("info",("purging %s",log_info.log_file_name));
4962         if (!mysql_file_delete(key_file_binlog, log_info.log_file_name, MYF(0)))
4963         {
4964           DBUG_EXECUTE_IF("wait_in_purge_index_entry",
4965                           {
4966                               const char action[] = "now SIGNAL in_purge_index_entry WAIT_FOR go_ahead_sql";
4967                               DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(action)));
4968                               DBUG_SET("-d,wait_in_purge_index_entry");
4969                           };);
4970 
4971           if (decrease_log_space)
4972             *decrease_log_space-= s.st_size;
4973         }
4974         else
4975         {
4976           if (my_errno == ENOENT)
4977           {
4978             if (thd)
4979             {
4980               push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4981                                   ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
4982                                   log_info.log_file_name);
4983             }
4984             sql_print_information("Failed to delete file '%s'",
4985                                   log_info.log_file_name);
4986             my_errno= 0;
4987           }
4988           else
4989           {
4990             if (thd)
4991             {
4992               push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4993                                   ER_BINLOG_PURGE_FATAL_ERR,
4994                                   "a problem with deleting %s; "
4995                                   "consider examining correspondence "
4996                                   "of your binlog index file "
4997                                   "to the actual binlog files",
4998                                   log_info.log_file_name);
4999             }
5000             else
5001             {
5002               sql_print_information("Failed to delete file '%s'; "
5003                                     "consider examining correspondence "
5004                                     "of your binlog index file "
5005                                     "to the actual binlog files",
5006                                     log_info.log_file_name);
5007             }
5008             if (my_errno == EMFILE)
5009             {
5010               DBUG_PRINT("info",
5011                          ("my_errno: %d, set ret = LOG_INFO_EMFILE", my_errno));
5012               error= LOG_INFO_EMFILE;
5013               goto err;
5014             }
5015             error= LOG_INFO_FATAL;
5016             goto err;
5017           }
5018         }
5019       }
5020     }
5021   }
5022 
5023 err:
5024   DBUG_RETURN(error);
5025 }
5026 
5027 /**
5028   Purge old logs so that we have a maximum of max_nr_files logs.
5029 
5030   @param max_nr_files	Maximum number of logfiles to have
5031 
5032   @note
5033   If any of the logs before the deleted one is in use,
5034   only purge logs up to this one.
5035 
5036   @retval
5037   0				ok
5038   @retval
5039   LOG_INFO_PURGE_NO_ROTATE	Binary file that can't be rotated
5040   LOG_INFO_FATAL              if any other than ENOENT error from
5041   mysql_file_stat() or mysql_file_delete()
5042 */
5043 
purge_logs_maximum_number(ulong max_nr_files)5044 int MYSQL_BIN_LOG::purge_logs_maximum_number(ulong max_nr_files)
5045 {
5046   int error;
5047   char to_log[FN_REFLEN];
5048   LOG_INFO log_info;
5049   ulong current_number_of_logs= 1;
5050 
5051   DBUG_ENTER("purge_logs_maximum_number");
5052 
5053   mysql_mutex_lock(&LOCK_index);
5054   to_log[0]= 0;
5055 
5056   if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
5057     goto err;
5058 
5059   while (!find_next_log(&log_info, 0))
5060     current_number_of_logs++;
5061 
5062   if (current_number_of_logs <= max_nr_files)
5063   {
5064     error= 0;
5065     goto err; /* No logs to expire */
5066   }
5067 
5068   if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
5069     goto err;
5070 
5071   while (strcmp(log_file_name, log_info.log_file_name) &&
5072          !is_active(log_info.log_file_name) &&
5073          !log_in_use(log_info.log_file_name) &&
5074          current_number_of_logs > max_nr_files)
5075   {
5076     current_number_of_logs--;
5077     strmake(to_log,
5078             log_info.log_file_name,
5079             sizeof(log_info.log_file_name) - 1);
5080 
5081     if (find_next_log(&log_info, 0))
5082     {
5083       break;
5084     }
5085   }
5086 
5087   error= (to_log[0] ? purge_logs(to_log, true, false, true,
5088                                  (ulonglong *) 0, true) : 0);
5089 
5090 err:
5091   mysql_mutex_unlock(&LOCK_index);
5092   DBUG_RETURN(error);
5093 }
5094 
5095 /**
5096   Remove all logs before the given file date from disk and from the
5097   index file.
5098 
5099   @param thd		Thread pointer
5100   @param purge_time	Delete all log files before given date.
5101   @param auto_purge     True if this is an automatic purge.
5102 
5103   @note
5104     If any of the logs before the deleted one is in use,
5105     only purge logs up to this one.
5106 
5107   @retval
5108     0				ok
5109   @retval
5110     LOG_INFO_PURGE_NO_ROTATE	Binary file that can't be rotated
5111     LOG_INFO_FATAL              if any other than ENOENT error from
5112                                 mysql_file_stat() or mysql_file_delete()
5113 */
5114 
purge_logs_before_date(time_t purge_time,bool auto_purge)5115 int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time, bool auto_purge)
5116 {
5117   int error;
5118   int no_of_threads_locking_log= 0, no_of_log_files_purged= 0;
5119   bool log_is_active= false, log_is_in_use= false;
5120   char to_log[FN_REFLEN], copy_log_in_use[FN_REFLEN];
5121   LOG_INFO log_info;
5122   MY_STAT stat_area;
5123   THD *thd= current_thd;
5124 
5125   DBUG_ENTER("purge_logs_before_date");
5126 
5127   mysql_mutex_lock(&LOCK_index);
5128   to_log[0]= 0;
5129 
5130   if ((error=find_log_pos(&log_info, NullS, false/*need_lock_index=false*/)))
5131     goto err;
5132 
5133   while (!(log_is_active= is_active(log_info.log_file_name)))
5134   {
5135     if ((no_of_threads_locking_log= log_in_use(log_info.log_file_name)))
5136     {
5137       if (!auto_purge)
5138       {
5139         log_is_in_use= true;
5140         strcpy(copy_log_in_use, log_info.log_file_name);
5141       }
5142       break;
5143     }
5144     no_of_log_files_purged++;
5145 
5146     if (!mysql_file_stat(m_key_file_log,
5147                          log_info.log_file_name, &stat_area, MYF(0)))
5148     {
5149       if (my_errno == ENOENT)
5150       {
5151         /*
5152           It's not fatal if we can't stat a log file that does not exist.
5153         */
5154         my_errno= 0;
5155       }
5156       else
5157       {
5158         /*
5159           Other than ENOENT are fatal
5160         */
5161         if (thd)
5162         {
5163           push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
5164                               ER_BINLOG_PURGE_FATAL_ERR,
5165                               "a problem with getting info on being purged %s; "
5166                               "consider examining correspondence "
5167                               "of your binlog index file "
5168                               "to the actual binlog files",
5169                               log_info.log_file_name);
5170         }
5171         else
5172         {
5173           sql_print_information("Failed to delete log file '%s'",
5174                                 log_info.log_file_name);
5175         }
5176         error= LOG_INFO_FATAL;
5177         goto err;
5178       }
5179     }
5180     else
5181     {
5182       if (stat_area.st_mtime < purge_time)
5183         strmake(to_log,
5184                 log_info.log_file_name,
5185                 sizeof(log_info.log_file_name) - 1);
5186       else
5187         break;
5188     }
5189     if (find_next_log(&log_info, false/*need_lock_index=false*/))
5190       break;
5191   }
5192 
5193   if (log_is_active)
5194   {
5195     if(!auto_purge)
5196       push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
5197                           ER_WARN_PURGE_LOG_IS_ACTIVE,
5198                           ER(ER_WARN_PURGE_LOG_IS_ACTIVE),
5199                           log_info.log_file_name);
5200 
5201   }
5202 
5203   if (log_is_in_use)
5204   {
5205     int no_of_log_files_to_purge= no_of_log_files_purged+1;
5206     while (strcmp(log_file_name, log_info.log_file_name))
5207     {
5208       if (mysql_file_stat(m_key_file_log, log_info.log_file_name,
5209                           &stat_area, MYF(0)))
5210       {
5211         if (stat_area.st_mtime < purge_time)
5212           no_of_log_files_to_purge++;
5213         else
5214           break;
5215       }
5216       if (find_next_log(&log_info, false/*need_lock_index=false*/))
5217       {
5218         no_of_log_files_to_purge++;
5219         break;
5220       }
5221     }
5222 
5223     push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
5224                         ER_WARN_PURGE_LOG_IN_USE,
5225                         ER(ER_WARN_PURGE_LOG_IN_USE),
5226                         copy_log_in_use, no_of_threads_locking_log,
5227                         no_of_log_files_purged, no_of_log_files_to_purge);
5228   }
5229 
5230   error= (to_log[0] ? purge_logs(to_log, true,
5231                                  false/*need_lock_index=false*/,
5232                                  true/*need_update_threads=true*/,
5233                                  (ulonglong *) 0, auto_purge) : 0);
5234 
5235 err:
5236   mysql_mutex_unlock(&LOCK_index);
5237   DBUG_RETURN(error);
5238 }
5239 #endif /* HAVE_REPLICATION */
5240 
5241 
5242 /**
5243   Create a new log file name.
5244 
5245   @param buf		buf of at least FN_REFLEN where new name is stored
5246 
5247   @note
5248     If file name will be longer then FN_REFLEN it will be truncated
5249 */
5250 
make_log_name(char * buf,const char * log_ident)5251 void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
5252 {
5253   uint dir_len = dirname_length(log_file_name);
5254   if (dir_len >= FN_REFLEN)
5255     dir_len=FN_REFLEN-1;
5256   strnmov(buf, log_file_name, dir_len);
5257   strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
5258 }
5259 
5260 
5261 /**
5262   Check if we are writing/reading to the given log file.
5263 */
5264 
is_active(const char * log_file_name_arg)5265 bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
5266 {
5267   return !strcmp(log_file_name, log_file_name_arg);
5268 }
5269 
5270 
5271 /*
5272   Wrappers around new_file_impl to avoid using argument
5273   to control locking. The argument 1) less readable 2) breaks
5274   incapsulation 3) allows external access to the class without
5275   a lock (which is not possible with private new_file_without_locking
5276   method).
5277 
5278   @retval
5279     nonzero - error
5280 
5281 */
5282 
new_file(Format_description_log_event * extra_description_event)5283 int MYSQL_BIN_LOG::new_file(Format_description_log_event *extra_description_event)
5284 {
5285   return new_file_impl(true/*need_lock_log=true*/, extra_description_event);
5286 }
5287 
5288 /*
5289   @retval
5290     nonzero - error
5291 */
new_file_without_locking(Format_description_log_event * extra_description_event)5292 int MYSQL_BIN_LOG::new_file_without_locking(Format_description_log_event *extra_description_event)
5293 {
5294   return new_file_impl(false/*need_lock_log=false*/, extra_description_event);
5295 }
5296 
5297 
5298 /**
5299   Start writing to a new log file or reopen the old file.
5300 
5301   @param need_lock_log If true, this function acquires LOCK_log;
5302   otherwise the caller should already have acquired it.
5303 
5304   @retval 0 success
5305   @retval nonzero - error
5306 
5307   @note The new file name is stored last in the index file
5308 */
new_file_impl(bool need_lock_log,Format_description_log_event * extra_description_event)5309 int MYSQL_BIN_LOG::new_file_impl(bool need_lock_log, Format_description_log_event *extra_description_event)
5310 {
5311   int error= 0, close_on_error= FALSE;
5312   char new_name[FN_REFLEN], *new_name_ptr, *old_name, *file_to_open;
5313 
5314   DBUG_ENTER("MYSQL_BIN_LOG::new_file_impl");
5315   if (!is_open())
5316   {
5317     DBUG_PRINT("info",("log is closed"));
5318     DBUG_RETURN(error);
5319   }
5320 
5321   if (need_lock_log)
5322     mysql_mutex_lock(&LOCK_log);
5323   else
5324     mysql_mutex_assert_owner(&LOCK_log);
5325   DBUG_EXECUTE_IF("semi_sync_3-way_deadlock",
5326                   DEBUG_SYNC(current_thd, "before_rotate_binlog"););
5327   mysql_mutex_lock(&LOCK_xids);
5328   /*
5329     We need to ensure that the number of prepared XIDs are 0.
5330 
5331     If m_prep_xids is not zero:
5332     - We wait for storage engine commit, hence decrease m_prep_xids
5333     - We keep the LOCK_log to block new transactions from being
5334       written to the binary log.
5335    */
5336   while (get_prep_xids() > 0)
5337   {
5338     DEBUG_SYNC(current_thd, "before_rotate_binlog_file");
5339     mysql_cond_wait(&m_prep_xids_cond, &LOCK_xids);
5340   }
5341   mysql_mutex_unlock(&LOCK_xids);
5342 
5343   mysql_mutex_lock(&LOCK_index);
5344 
5345   if (DBUG_EVALUATE_IF("expire_logs_always", 0, 1)
5346       && (error= ha_flush_logs(NULL)))
5347     goto end;
5348 
5349   mysql_mutex_assert_owner(&LOCK_log);
5350   mysql_mutex_assert_owner(&LOCK_index);
5351 
5352 
5353   /*
5354     If user hasn't specified an extension, generate a new log name
5355     We have to do this here and not in open as we want to store the
5356     new file name in the current binary log file.
5357   */
5358   new_name_ptr= new_name;
5359   if ((error= generate_new_name(new_name, name)))
5360   {
5361     // Use the old name if generation of new name fails.
5362     strcpy(new_name, name);
5363     close_on_error= TRUE;
5364     goto end;
5365   }
5366   else
5367   {
5368     /*
5369       We log the whole file name for log file as the user may decide
5370       to change base names at some point.
5371     */
5372     Rotate_log_event r(new_name+dirname_length(new_name), 0, LOG_EVENT_OFFSET,
5373                        is_relay_log ? Rotate_log_event::RELAY_LOG : 0);
5374     /*
5375       The current relay-log's closing Rotate event must have checksum
5376       value computed with an algorithm of the last relay-logged FD event.
5377     */
5378     if (is_relay_log)
5379       r.checksum_alg= relay_log_checksum_alg;
5380     DBUG_ASSERT(!is_relay_log || relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
5381     if(DBUG_EVALUATE_IF("fault_injection_new_file_rotate_event", (error=close_on_error=TRUE), FALSE) ||
5382        (error= r.write(&log_file)))
5383     {
5384       char errbuf[MYSYS_STRERROR_SIZE];
5385       DBUG_EXECUTE_IF("fault_injection_new_file_rotate_event", errno=2;);
5386       close_on_error= TRUE;
5387       my_printf_error(ER_ERROR_ON_WRITE, ER(ER_CANT_OPEN_FILE),
5388                       MYF(ME_FATALERROR), name,
5389                       errno, my_strerror(errbuf, sizeof(errbuf), errno));
5390       goto end;
5391     }
5392     bytes_written += r.data_written;
5393   }
5394   /*
5395     Update needs to be signalled even if there is no rotate event
5396     log rotation should give the waiting thread a signal to
5397     discover EOF and move on to the next log.
5398   */
5399   signal_update();
5400 
5401   old_name=name;
5402   name=0;				// Don't free name
5403   close(LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX,
5404         false/*need_lock_log=false*/,
5405         false/*need_lock_index=false*/);
5406 
5407   if (checksum_alg_reset != BINLOG_CHECKSUM_ALG_UNDEF)
5408   {
5409     DBUG_ASSERT(!is_relay_log);
5410     DBUG_ASSERT(binlog_checksum_options != checksum_alg_reset);
5411     binlog_checksum_options= checksum_alg_reset;
5412   }
5413   /*
5414      Note that at this point, log_state != LOG_CLOSED (important for is_open()).
5415   */
5416 
5417   DEBUG_SYNC(current_thd, "before_rotate_binlog_file");
5418   /*
5419      new_file() is only used for rotation (in FLUSH LOGS or because size >
5420      max_binlog_size or max_relay_log_size).
5421      If this is a binary log, the Format_description_log_event at the beginning of
5422      the new file should have created=0 (to distinguish with the
5423      Format_description_log_event written at server startup, which should
5424      trigger temp tables deletion on slaves.
5425   */
5426 
5427   /* reopen index binlog file, BUG#34582 */
5428   file_to_open= index_file_name;
5429   error= open_index_file(index_file_name, 0, false/*need_lock_index=false*/);
5430   if (!error)
5431   {
5432     /* reopen the binary log file. */
5433     file_to_open= new_name_ptr;
5434     error= open_binlog(old_name, new_name_ptr, io_cache_type,
5435                        max_size, true/*null_created_arg=true*/,
5436                        false/*need_lock_log=false*/,
5437                        false/*need_lock_index=false*/,
5438                        true/*need_sid_lock=true*/,
5439                        extra_description_event);
5440   }
5441 
5442   /* handle reopening errors */
5443   if (error)
5444   {
5445     char errbuf[MYSYS_STRERROR_SIZE];
5446     my_printf_error(ER_CANT_OPEN_FILE, ER(ER_CANT_OPEN_FILE),
5447                     MYF(ME_FATALERROR), file_to_open,
5448                     error, my_strerror(errbuf, sizeof(errbuf), error));
5449     close_on_error= TRUE;
5450   }
5451   my_free(old_name);
5452 
5453 end:
5454 
5455   if (error && close_on_error /* rotate or reopen failed */)
5456   {
5457     /*
5458       Close whatever was left opened.
5459 
5460       We are keeping the behavior as it exists today, ie,
5461       we disable logging and move on (see: BUG#51014).
5462 
5463       TODO: as part of WL#1790 consider other approaches:
5464        - kill mysql (safety);
5465        - try multiple locations for opening a log file;
5466        - switch server to protected/readonly mode
5467        - ...
5468     */
5469     if (binlog_error_action == ABORT_SERVER)
5470     {
5471       exec_binlog_error_action_abort("Either disk is full or file system is"
5472                                      " read only while rotating the binlog."
5473                                      " Aborting the server.");
5474     }
5475     else
5476       sql_print_error("Could not open %s for logging (error %d). "
5477                       "Turning logging off for the whole duration "
5478                       "of the MySQL server process. To turn it on "
5479                       "again: fix the cause, shutdown the MySQL "
5480                       "server and restart it.",
5481                       new_name_ptr, errno);
5482     close(LOG_CLOSE_INDEX, false /*need_lock_log=false*/,
5483           false/*need_lock_index=false*/);
5484   }
5485 
5486   mysql_mutex_unlock(&LOCK_index);
5487   if (need_lock_log)
5488     mysql_mutex_unlock(&LOCK_log);
5489 
5490   DEBUG_SYNC(current_thd, "after_disable_binlog");
5491   DBUG_RETURN(error);
5492 }
5493 
5494 
5495 #ifdef HAVE_REPLICATION
5496 /**
5497   Called after an event has been written to the relay log by the IO
5498   thread.  This flushes and possibly syncs the file (according to the
5499   sync options), rotates the file if it has grown over the limit, and
5500   finally calls signal_update().
5501 
5502   @note The caller must hold LOCK_log before invoking this function.
5503 
5504   @param mi Master_info for the IO thread.
5505   @param need_data_lock If true, mi->data_lock will be acquired if a
5506   rotation is needed.  Otherwise, mi->data_lock must be held by the
5507   caller.
5508 
5509   @retval false success
5510   @retval true error
5511 */
after_append_to_relay_log(Master_info * mi)5512 bool MYSQL_BIN_LOG::after_append_to_relay_log(Master_info *mi)
5513 {
5514   DBUG_ENTER("MYSQL_BIN_LOG::after_append_to_relay_log");
5515   DBUG_PRINT("info",("max_size: %lu",max_size));
5516 
5517   // Check pre-conditions
5518   mysql_mutex_assert_owner(&LOCK_log);
5519   mysql_mutex_assert_owner(&mi->data_lock);
5520   DBUG_ASSERT(is_relay_log);
5521   DBUG_ASSERT(current_thd->system_thread == SYSTEM_THREAD_SLAVE_IO);
5522 
5523   // Flush and sync
5524   bool error= false;
5525   if (flush_and_sync(0) == 0)
5526   {
5527     DBUG_EXECUTE_IF ("set_max_size_zero",
5528                      {max_size=0;});
5529     // If relay log is too big, rotate
5530     if ((uint) my_b_append_tell(&log_file) >
5531         DBUG_EVALUATE_IF("rotate_slave_debug_group", 500, max_size))
5532     {
5533       error= new_file_without_locking(mi->get_mi_description_event());
5534       DBUG_EXECUTE_IF ("set_max_size_zero",
5535                        {
5536                        max_size=1073741824;
5537                        DBUG_SET("-d,set_max_size_zero");
5538                        DBUG_SET("-d,flush_after_reading_gtid_event");
5539                        });
5540     }
5541   }
5542 
5543   signal_update();
5544 
5545   DBUG_RETURN(error);
5546 }
5547 
5548 
append_event(Log_event * ev,Master_info * mi)5549 bool MYSQL_BIN_LOG::append_event(Log_event* ev, Master_info *mi)
5550 {
5551   DBUG_ENTER("MYSQL_BIN_LOG::append");
5552 
5553   // check preconditions
5554   DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
5555   DBUG_ASSERT(is_relay_log);
5556 
5557   // acquire locks
5558   mysql_mutex_lock(&LOCK_log);
5559 
5560   // write data
5561   bool error = false;
5562   if (ev->write(&log_file) == 0)
5563   {
5564     bytes_written+= ev->data_written;
5565     error= after_append_to_relay_log(mi);
5566   }
5567   else
5568     error= true;
5569 
5570   mysql_mutex_unlock(&LOCK_log);
5571   DBUG_RETURN(error);
5572 }
5573 
5574 
append_buffer(const char * buf,uint len,Master_info * mi)5575 bool MYSQL_BIN_LOG::append_buffer(const char* buf, uint len, Master_info *mi)
5576 {
5577   DBUG_ENTER("MYSQL_BIN_LOG::append_buffer");
5578 
5579   // check preconditions
5580   DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
5581   DBUG_ASSERT(is_relay_log);
5582   mysql_mutex_assert_owner(&LOCK_log);
5583 
5584   // write data
5585   bool error= false;
5586   if (my_b_append(&log_file,(uchar*) buf,len) == 0)
5587   {
5588     bytes_written += len;
5589     error= after_append_to_relay_log(mi);
5590   }
5591   else
5592     error= true;
5593 
5594   DBUG_RETURN(error);
5595 }
5596 #endif // ifdef HAVE_REPLICATION
5597 
flush_and_sync(const bool force)5598 bool MYSQL_BIN_LOG::flush_and_sync(const bool force)
5599 {
5600   mysql_mutex_assert_owner(&LOCK_log);
5601 
5602   if (flush_io_cache(&log_file))
5603     return 1;
5604 
5605   std::pair<bool, bool> result= sync_binlog_file(force);
5606 
5607   return result.first;
5608 }
5609 
start_union_events(THD * thd,query_id_t query_id_param)5610 void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param)
5611 {
5612   DBUG_ASSERT(!thd->binlog_evt_union.do_union);
5613   thd->binlog_evt_union.do_union= TRUE;
5614   thd->binlog_evt_union.unioned_events= FALSE;
5615   thd->binlog_evt_union.unioned_events_trans= FALSE;
5616   thd->binlog_evt_union.first_query_id= query_id_param;
5617 }
5618 
stop_union_events(THD * thd)5619 void MYSQL_BIN_LOG::stop_union_events(THD *thd)
5620 {
5621   DBUG_ASSERT(thd->binlog_evt_union.do_union);
5622   thd->binlog_evt_union.do_union= FALSE;
5623 }
5624 
is_query_in_union(THD * thd,query_id_t query_id_param)5625 bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
5626 {
5627   return (thd->binlog_evt_union.do_union &&
5628           query_id_param >= thd->binlog_evt_union.first_query_id);
5629 }
5630 
5631 /*
5632   Updates thd's position-of-next-event variables
5633   after a *real* write a file.
5634  */
update_thd_next_event_pos(THD * thd)5635 void MYSQL_BIN_LOG::update_thd_next_event_pos(THD* thd)
5636 {
5637   if (likely(thd != NULL))
5638   {
5639     thd->set_next_event_pos(log_file_name,
5640                             my_b_tell(&log_file));
5641   }
5642 }
5643 
5644 /*
5645   Moves the last bunch of rows from the pending Rows event to a cache (either
5646   transactional cache if is_transaction is @c true, or the non-transactional
5647   cache otherwise. Sets a new pending event.
5648 
5649   @param thd               a pointer to the user thread.
5650   @param evt               a pointer to the row event.
5651   @param is_transactional  @c true indicates a transactional cache,
5652                            otherwise @c false a non-transactional.
5653 */
5654 int
flush_and_set_pending_rows_event(THD * thd,Rows_log_event * event,bool is_transactional)5655 MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
5656                                                 Rows_log_event* event,
5657                                                 bool is_transactional)
5658 {
5659   DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)");
5660   DBUG_ASSERT(mysql_bin_log.is_open());
5661   DBUG_PRINT("enter", ("event: 0x%lx", (long) event));
5662 
5663   int error= 0;
5664   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
5665 
5666   DBUG_ASSERT(cache_mngr);
5667 
5668   binlog_cache_data *cache_data=
5669     cache_mngr->get_binlog_cache_data(is_transactional);
5670 
5671   DBUG_PRINT("info", ("cache_mngr->pending(): 0x%lx", (long) cache_data->pending()));
5672 
5673   if (Rows_log_event* pending= cache_data->pending())
5674   {
5675     /*
5676       Write pending event to the cache.
5677     */
5678     if (cache_data->write_event(thd, pending))
5679     {
5680       set_write_error(thd, is_transactional);
5681       if (check_write_error(thd) && cache_data &&
5682           stmt_cannot_safely_rollback(thd))
5683         cache_data->set_incident();
5684       delete pending;
5685       cache_data->set_pending(NULL);
5686       DBUG_RETURN(1);
5687     }
5688 
5689     delete pending;
5690   }
5691 
5692   cache_data->set_pending(event);
5693 
5694   DBUG_RETURN(error);
5695 }
5696 
5697 /**
5698   Write an event to the binary log.
5699 */
5700 
write_event(Log_event * event_info)5701 bool MYSQL_BIN_LOG::write_event(Log_event *event_info)
5702 {
5703   THD *thd= event_info->thd;
5704   bool error= 1;
5705   DBUG_ENTER("MYSQL_BIN_LOG::write_event(Log_event *)");
5706 
5707   if (thd->binlog_evt_union.do_union)
5708   {
5709     /*
5710       In Stored function; Remember that function call caused an update.
5711       We will log the function call to the binary log on function exit
5712     */
5713     thd->binlog_evt_union.unioned_events= TRUE;
5714     thd->binlog_evt_union.unioned_events_trans |=
5715       event_info->is_using_trans_cache();
5716     DBUG_RETURN(0);
5717   }
5718 
5719   /*
5720     We only end the statement if we are in a top-level statement.  If
5721     we are inside a stored function, we do not end the statement since
5722     this will close all tables on the slave. But there can be a special case
5723     where we are inside a stored function/trigger and a SAVEPOINT is being
5724     set in side the stored function/trigger. This SAVEPOINT execution will
5725     force the pending event to be flushed without an STMT_END_F flag. This
5726     will result in a case where following DMLs will be considered as part of
5727     same statement and result in data loss on slave. Hence in this case we
5728     force the end_stmt to be true.
5729   */
5730   bool const end_stmt= (thd->in_sub_stmt && thd->lex->sql_command ==
5731                         SQLCOM_SAVEPOINT)? true:
5732     (thd->locked_tables_mode && thd->lex->requires_prelocking());
5733   if (thd->binlog_flush_pending_rows_event(end_stmt,
5734                                            event_info->is_using_trans_cache()))
5735     DBUG_RETURN(error);
5736 
5737   /*
5738      In most cases this is only called if 'is_open()' is true; in fact this is
5739      mostly called if is_open() *was* true a few instructions before, but it
5740      could have changed since.
5741   */
5742   if (likely(is_open()))
5743   {
5744 #ifdef HAVE_REPLICATION
5745     /*
5746       In the future we need to add to the following if tests like
5747       "do the involved tables match (to be implemented)
5748       binlog_[wild_]{do|ignore}_table?" (WL#1049)"
5749     */
5750     const char *local_db= event_info->get_db();
5751     if ((thd && !(thd->variables.option_bits & OPTION_BIN_LOG)) ||
5752 	(thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT &&
5753          thd->lex->sql_command != SQLCOM_SAVEPOINT &&
5754          (!event_info->is_no_filter_event() &&
5755           !binlog_filter->db_ok(local_db))))
5756       DBUG_RETURN(0);
5757 #endif /* HAVE_REPLICATION */
5758 
5759     DBUG_ASSERT(event_info->is_using_trans_cache() || event_info->is_using_stmt_cache());
5760 
5761     if (binlog_start_trans_and_stmt(thd, event_info))
5762       DBUG_RETURN(error);
5763 
5764     bool is_trans_cache= event_info->is_using_trans_cache();
5765     binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
5766     binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(is_trans_cache);
5767 
5768     DBUG_PRINT("info",("event type: %d",event_info->get_type_code()));
5769 
5770     /*
5771        No check for auto events flag here - this write method should
5772        never be called if auto-events are enabled.
5773 
5774        Write first log events which describe the 'run environment'
5775        of the SQL command. If row-based binlogging, Insert_id, Rand
5776        and other kind of "setting context" events are not needed.
5777     */
5778     if (thd)
5779     {
5780       if (!thd->is_current_stmt_binlog_format_row())
5781       {
5782         if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
5783         {
5784           Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
5785                              thd->first_successful_insert_id_in_prev_stmt_for_binlog,
5786                              event_info->event_cache_type, event_info->event_logging_type);
5787           if (cache_data->write_event(thd, &e))
5788             goto err;
5789           if (event_info->is_using_immediate_logging())
5790             thd->binlog_bytes_written+= e.data_written;
5791         }
5792         if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
5793         {
5794           DBUG_PRINT("info",("number of auto_inc intervals: %u",
5795                              thd->auto_inc_intervals_in_cur_stmt_for_binlog.
5796                              nb_elements()));
5797           Intvar_log_event e(thd, (uchar) INSERT_ID_EVENT,
5798                              thd->auto_inc_intervals_in_cur_stmt_for_binlog.
5799                              minimum(), event_info->event_cache_type,
5800                              event_info->event_logging_type);
5801           if (cache_data->write_event(thd, &e))
5802             goto err;
5803           if (event_info->is_using_immediate_logging())
5804             thd->binlog_bytes_written+= e.data_written;
5805         }
5806         if (thd->rand_used)
5807         {
5808           Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2,
5809                            event_info->event_cache_type,
5810                            event_info->event_logging_type);
5811           if (cache_data->write_event(thd, &e))
5812             goto err;
5813           if (event_info->is_using_immediate_logging())
5814             thd->binlog_bytes_written+= e.data_written;
5815         }
5816         if (thd->user_var_events.elements)
5817         {
5818           for (uint i= 0; i < thd->user_var_events.elements; i++)
5819           {
5820             BINLOG_USER_VAR_EVENT *user_var_event;
5821             get_dynamic(&thd->user_var_events,(uchar*) &user_var_event, i);
5822 
5823             /* setting flags for user var log event */
5824             uchar flags= User_var_log_event::UNDEF_F;
5825             if (user_var_event->unsigned_flag)
5826               flags|= User_var_log_event::UNSIGNED_F;
5827 
5828             User_var_log_event e(thd,
5829                                  user_var_event->user_var_event->entry_name.ptr(),
5830                                  user_var_event->user_var_event->entry_name.length(),
5831                                  user_var_event->value,
5832                                  user_var_event->length,
5833                                  user_var_event->type,
5834                                  user_var_event->charset_number, flags,
5835                                  event_info->event_cache_type,
5836                                  event_info->event_logging_type);
5837             if (cache_data->write_event(thd, &e))
5838               goto err;
5839             if (event_info->is_using_immediate_logging())
5840               thd->binlog_bytes_written+= e.data_written;
5841           }
5842         }
5843       }
5844     }
5845 
5846     /*
5847       Write the event.
5848     */
5849     if (cache_data->write_event(thd, event_info) ||
5850         DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
5851       goto err;
5852     if (event_info->is_using_immediate_logging())
5853       thd->binlog_bytes_written+= event_info->data_written;
5854 
5855     /*
5856       After writing the event, if the trx-cache was used and any unsafe
5857       change was written into it, the cache is marked as cannot safely
5858       roll back.
5859     */
5860     if (is_trans_cache && stmt_cannot_safely_rollback(thd))
5861       cache_mngr->trx_cache.set_cannot_rollback();
5862 
5863     error= 0;
5864 
5865 err:
5866     if (error)
5867     {
5868       set_write_error(thd, is_trans_cache);
5869       if (check_write_error(thd) && cache_data &&
5870           stmt_cannot_safely_rollback(thd))
5871         cache_data->set_incident();
5872     }
5873   }
5874 
5875   DBUG_RETURN(error);
5876 }
5877 
5878 /**
5879   The method executes rotation when LOCK_log is already acquired
5880   by the caller.
5881 
5882   @param force_rotate  caller can request the log rotation
5883   @param check_purge   is set to true if rotation took place
5884 
5885   @note
5886     If rotation fails, for instance the server was unable
5887     to create a new log file, we still try to write an
5888     incident event to the current log.
5889 
5890   @note The caller must hold LOCK_log when invoking this function.
5891 
5892   @retval
5893     nonzero - error in rotating routine.
5894 */
rotate(bool force_rotate,bool * check_purge)5895 int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge)
5896 {
5897   int error= 0;
5898   DBUG_ENTER("MYSQL_BIN_LOG::rotate");
5899 
5900   DBUG_ASSERT(!is_relay_log);
5901   mysql_mutex_assert_owner(&LOCK_log);
5902 
5903   *check_purge= false;
5904 
5905   if (DBUG_EVALUATE_IF("force_rotate", 1, 0) || force_rotate ||
5906       (my_b_tell(&log_file) >= (my_off_t) max_size))
5907   {
5908     error= new_file_without_locking(NULL);
5909     *check_purge= true;
5910     publish_coordinates_for_global_status();
5911   }
5912   DBUG_RETURN(error);
5913 }
5914 
5915 /**
5916   The method executes logs purging routine.
5917 
5918   @retval
5919     nonzero - error in rotating routine.
5920 */
purge()5921 void MYSQL_BIN_LOG::purge()
5922 {
5923 #ifdef HAVE_REPLICATION
5924   if (expire_logs_days)
5925   {
5926     DEBUG_SYNC(current_thd, "at_purge_logs_before_date");
5927     time_t purge_time= my_time(0) - expire_logs_days*24*60*60;
5928     DBUG_EXECUTE_IF("expire_logs_always",
5929                     { purge_time= my_time(0);});
5930     if (purge_time >= 0)
5931     {
5932       /*
5933         Flush logs for storage engines, so that the last transaction
5934         is fsynced inside storage engines.
5935       */
5936       ha_flush_logs(NULL);
5937       purge_logs_before_date(purge_time, true);
5938     }
5939   }
5940   if (max_binlog_files)
5941   {
5942     purge_logs_maximum_number(max_binlog_files);
5943   }
5944 #endif
5945 }
5946 
5947 /**
5948   The method is a shortcut of @c rotate() and @c purge().
5949   LOCK_log is acquired prior to rotate and is released after it.
5950 
5951   @param force_rotate  caller can request the log rotation
5952 
5953   @retval
5954     nonzero - error in rotating routine.
5955 */
rotate_and_purge(THD * thd,bool force_rotate)5956 int MYSQL_BIN_LOG::rotate_and_purge(THD* thd, bool force_rotate)
5957 {
5958   int error= 0;
5959   DBUG_ENTER("MYSQL_BIN_LOG::rotate_and_purge");
5960   bool check_purge= false;
5961 
5962   /*
5963     Wait for handlerton to insert any pending information into the binlog.
5964     For e.g. ha_ndbcluster which updates the binlog asynchronously this is
5965     needed so that the user see its own commands in the binlog.
5966   */
5967   ha_binlog_wait(thd);
5968 
5969   DBUG_ASSERT(!is_relay_log);
5970   mysql_mutex_lock(&LOCK_log);
5971   error= rotate(force_rotate, &check_purge);
5972   /*
5973     NOTE: Run purge_logs wo/ holding LOCK_log because it does not need
5974           the mutex. Otherwise causes various deadlocks.
5975   */
5976   mysql_mutex_unlock(&LOCK_log);
5977 
5978   if (!error && check_purge)
5979     purge();
5980 
5981   DBUG_RETURN(error);
5982 }
5983 
next_file_id()5984 uint MYSQL_BIN_LOG::next_file_id()
5985 {
5986   uint res;
5987   mysql_mutex_lock(&LOCK_log);
5988   res = file_id++;
5989   mysql_mutex_unlock(&LOCK_log);
5990   return res;
5991 }
5992 
5993 
5994 /**
5995   Calculate checksum of possibly a part of an event containing at least
5996   the whole common header.
5997 
5998   @param    buf       the pointer to trans cache's buffer
5999   @param    off       the offset of the beginning of the event in the buffer
6000   @param    event_len no-checksum length of the event
6001   @param    length    the current size of the buffer
6002 
6003   @param    crc       [in-out] the checksum
6004 
6005   Event size in incremented by @c BINLOG_CHECKSUM_LEN.
6006 
6007   @return 0 or number of unprocessed yet bytes of the event excluding
6008             the checksum part.
6009 */
fix_log_event_crc(uchar * buf,uint off,uint event_len,uint length,ha_checksum * crc)6010   static ulong fix_log_event_crc(uchar *buf, uint off, uint event_len,
6011                                  uint length, ha_checksum *crc)
6012 {
6013   ulong ret;
6014   uchar *event_begin= buf + off;
6015   uint16 flags= uint2korr(event_begin + FLAGS_OFFSET);
6016 
6017   DBUG_ASSERT(length >= off + LOG_EVENT_HEADER_LEN); //at least common header in
6018   int2store(event_begin + FLAGS_OFFSET, flags);
6019   ret= length >= off + event_len ? 0 : off + event_len - length;
6020   *crc= my_checksum(*crc, event_begin, event_len - ret);
6021   return ret;
6022 }
6023 
6024 /*
6025   Write the contents of a cache to the binary log.
6026 
6027   SYNOPSIS
6028     do_write_cache()
6029     cache    Cache to write to the binary log
6030     lock_log True if the LOCK_log mutex should be aquired, false otherwise
6031 
6032   DESCRIPTION
6033     Write the contents of the cache to the binary log. The cache will
6034     be reset as a READ_CACHE to be able to read the contents from it.
6035 
6036     Reading from the trans cache with possible (per @c binlog_checksum_options)
6037     adding checksum value  and then fixing the length and the end_log_pos of
6038     events prior to fill in the binlog cache.
6039 */
6040 
do_write_cache(THD * thd,IO_CACHE * cache)6041 int MYSQL_BIN_LOG::do_write_cache(THD *thd, IO_CACHE *cache)
6042 {
6043   DBUG_ENTER("MYSQL_BIN_LOG::do_write_cache(IO_CACHE *)");
6044 
6045   DBUG_EXECUTE_IF("simulate_do_write_cache_failure",
6046                   {
6047                     /*
6048                        see binlog_cache_data::write_event() that reacts on
6049                        @c simulate_disk_full_at_flush_pending.
6050                     */
6051                     DBUG_SET("-d,simulate_do_write_cache_failure");
6052                     DBUG_RETURN(ER_ERROR_ON_WRITE);
6053                   });
6054 
6055   if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
6056     DBUG_RETURN(ER_ERROR_ON_WRITE);
6057   uint length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
6058   ulong remains= 0; // part of unprocessed yet netto length of the event
6059   long val;
6060   ulong end_log_pos_inc= 0; // each event processed adds BINLOG_CHECKSUM_LEN 2 t
6061   uchar header[LOG_EVENT_HEADER_LEN];
6062   ha_checksum crc= 0, crc_0= 0; // assignments to keep compiler happy
6063   my_bool do_checksum= (binlog_checksum_options != BINLOG_CHECKSUM_ALG_OFF);
6064   uchar buf[BINLOG_CHECKSUM_LEN];
6065 
6066   // while there is just one alg the following must hold:
6067   DBUG_ASSERT(!do_checksum ||
6068               binlog_checksum_options == BINLOG_CHECKSUM_ALG_CRC32);
6069 
6070   /*
6071     The events in the buffer have incorrect end_log_pos data
6072     (relative to beginning of group rather than absolute),
6073     so we'll recalculate them in situ so the binlog is always
6074     correct, even in the middle of a group. This is possible
6075     because we now know the start position of the group (the
6076     offset of this cache in the log, if you will); all we need
6077     to do is to find all event-headers, and add the position of
6078     the group to the end_log_pos of each event.  This is pretty
6079     straight forward, except that we read the cache in segments,
6080     so an event-header might end up on the cache-border and get
6081     split.
6082   */
6083 
6084   group= (uint)my_b_tell(&log_file);
6085   DBUG_PRINT("debug", ("length: %llu, group: %llu",
6086                        (ulonglong) length, (ulonglong) group));
6087   hdr_offs= carry= 0;
6088   if (do_checksum)
6089     crc= crc_0= my_checksum(0L, NULL, 0);
6090 
6091   if (DBUG_EVALUATE_IF("fault_injection_crc_value", 1, 0))
6092     crc= crc - 1;
6093 
6094   do
6095   {
6096     /*
6097       if we only got a partial header in the last iteration,
6098       get the other half now and process a full header.
6099     */
6100     if (unlikely(carry > 0))
6101     {
6102       DBUG_ASSERT(carry < LOG_EVENT_HEADER_LEN);
6103 
6104       /* assemble both halves */
6105       memcpy(&header[carry], (char *)cache->read_pos,
6106              LOG_EVENT_HEADER_LEN - carry);
6107 
6108       /* fix end_log_pos */
6109       val=uint4korr(header + LOG_POS_OFFSET);
6110       val+= group +
6111         (end_log_pos_inc+= (do_checksum ? BINLOG_CHECKSUM_LEN : 0));
6112       int4store(&header[LOG_POS_OFFSET], val);
6113 
6114       if (do_checksum)
6115       {
6116         ulong len= uint4korr(header + EVENT_LEN_OFFSET);
6117         /* fix len */
6118         int4store(&header[EVENT_LEN_OFFSET], len + BINLOG_CHECKSUM_LEN);
6119       }
6120 
6121       /* write the first half of the split header */
6122       if (my_b_write(&log_file, header, carry))
6123         DBUG_RETURN(ER_ERROR_ON_WRITE);
6124       thd->binlog_bytes_written+= carry;
6125 
6126       /*
6127         copy fixed second half of header to cache so the correct
6128         version will be written later.
6129       */
6130       memcpy((char *)cache->read_pos, &header[carry],
6131              LOG_EVENT_HEADER_LEN - carry);
6132 
6133       /* next event header at ... */
6134       hdr_offs= uint4korr(header + EVENT_LEN_OFFSET) - carry -
6135         (do_checksum ? BINLOG_CHECKSUM_LEN : 0);
6136 
6137       if (do_checksum)
6138       {
6139         DBUG_ASSERT(crc == crc_0 && remains == 0);
6140         crc= my_checksum(crc, header, carry);
6141         remains= uint4korr(header + EVENT_LEN_OFFSET) - carry -
6142           BINLOG_CHECKSUM_LEN;
6143       }
6144       carry= 0;
6145     }
6146 
6147     /* if there is anything to write, process it. */
6148 
6149     if (likely(length > 0))
6150     {
6151       /*
6152         process all event-headers in this (partial) cache.
6153         if next header is beyond current read-buffer,
6154         we'll get it later (though not necessarily in the
6155         very next iteration, just "eventually").
6156       */
6157 
6158       /* crc-calc the whole buffer */
6159       if (do_checksum && hdr_offs >= length)
6160       {
6161 
6162         DBUG_ASSERT(remains != 0 && crc != crc_0);
6163 
6164         crc= my_checksum(crc, cache->read_pos, length);
6165         remains -= length;
6166         if (my_b_write(&log_file, cache->read_pos, length))
6167           DBUG_RETURN(ER_ERROR_ON_WRITE);
6168         if (remains == 0)
6169         {
6170           int4store(buf, crc);
6171           if (my_b_write(&log_file, buf, BINLOG_CHECKSUM_LEN))
6172             DBUG_RETURN(ER_ERROR_ON_WRITE);
6173           crc= crc_0;
6174         }
6175       }
6176 
6177       while (hdr_offs < length)
6178       {
6179         /*
6180           partial header only? save what we can get, process once
6181           we get the rest.
6182         */
6183 
6184         if (do_checksum)
6185         {
6186           if (remains != 0)
6187           {
6188             /*
6189               finish off with remains of the last event that crawls
6190               from previous into the current buffer
6191             */
6192             DBUG_ASSERT(crc != crc_0);
6193             crc= my_checksum(crc, cache->read_pos, hdr_offs);
6194             int4store(buf, crc);
6195             remains -= hdr_offs;
6196             DBUG_ASSERT(remains == 0);
6197             if (my_b_write(&log_file, cache->read_pos, hdr_offs) ||
6198                 my_b_write(&log_file, buf, BINLOG_CHECKSUM_LEN))
6199               DBUG_RETURN(ER_ERROR_ON_WRITE);
6200             crc= crc_0;
6201           }
6202         }
6203 
6204         if (hdr_offs + LOG_EVENT_HEADER_LEN > length)
6205         {
6206           carry= length - hdr_offs;
6207           memcpy(header, (char *)cache->read_pos + hdr_offs, carry);
6208           length= hdr_offs;
6209         }
6210         else
6211         {
6212           /* we've got a full event-header, and it came in one piece */
6213           uchar *ev= (uchar *)cache->read_pos + hdr_offs;
6214           uint event_len= uint4korr(ev + EVENT_LEN_OFFSET); // netto len
6215           uchar *log_pos= ev + LOG_POS_OFFSET;
6216 
6217           /* fix end_log_pos */
6218           val= uint4korr(log_pos) + group +
6219             (end_log_pos_inc += (do_checksum ? BINLOG_CHECKSUM_LEN : 0));
6220           int4store(log_pos, val);
6221 
6222 	  /* fix CRC */
6223 	  if (do_checksum)
6224           {
6225             /* fix length */
6226             int4store(ev + EVENT_LEN_OFFSET, event_len + BINLOG_CHECKSUM_LEN);
6227             remains= fix_log_event_crc(cache->read_pos, hdr_offs, event_len,
6228                                        length, &crc);
6229             DBUG_EXECUTE_IF("fail_binlog_write_1",
6230                             errno= 28; DBUG_RETURN(ER_ERROR_ON_WRITE););
6231             if (my_b_write(&log_file, ev,
6232                            remains == 0 ? event_len : length - hdr_offs))
6233               DBUG_RETURN(ER_ERROR_ON_WRITE);
6234             if (remains == 0)
6235             {
6236               int4store(buf, crc);
6237               if (my_b_write(&log_file, buf, BINLOG_CHECKSUM_LEN))
6238                 DBUG_RETURN(ER_ERROR_ON_WRITE);
6239               crc= crc_0; // crc is complete
6240             }
6241           }
6242 
6243           /* next event header at ... */
6244           hdr_offs += event_len; // incr by the netto len
6245 
6246           DBUG_ASSERT(!do_checksum || remains == 0 || hdr_offs >= length);
6247         }
6248       }
6249 
6250       /*
6251         Adjust hdr_offs. Note that it may still point beyond the segment
6252         read in the next iteration; if the current event is very long,
6253         it may take a couple of read-iterations (and subsequent adjustments
6254         of hdr_offs) for it to point into the then-current segment.
6255         If we have a split header (!carry), hdr_offs will be set at the
6256         beginning of the next iteration, overwriting the value we set here:
6257       */
6258       hdr_offs -= length;
6259     }
6260 
6261     /* Write the entire buf to the binary log file */
6262     if (!do_checksum)
6263     {
6264 /*      DBUG_EXECUTE_IF("fail_binlog_write_1",
6265         errno= 28; DBUG_RETURN(ER_ERROR_ON_WRITE);); */
6266       if (my_b_write(&log_file, cache->read_pos, length))
6267         DBUG_RETURN(ER_ERROR_ON_WRITE);
6268       thd->binlog_bytes_written+= length;
6269     }
6270     cache->read_pos=cache->read_end;		// Mark buffer used up
6271   } while ((length= my_b_fill(cache)));
6272 
6273   DBUG_ASSERT(carry == 0);
6274   DBUG_ASSERT(!do_checksum || remains == 0);
6275   DBUG_ASSERT(!do_checksum || crc == crc_0);
6276 
6277   DBUG_RETURN(0); // All OK
6278 }
6279 
6280 /**
6281   Writes an incident event to the binary log.
6282 
6283   @param ev Incident event to be written
6284   @param need_lock_log If true, will acquire LOCK_log; otherwise the
6285   caller should already have acquired LOCK_log.
6286   @do_flush_and_sync If true, will call flush_and_sync(), rotate() and
6287   purge().
6288 
6289   @retval false error
6290   @retval true success
6291 */
write_incident(Incident_log_event * ev,bool need_lock_log,bool do_flush_and_sync)6292 bool MYSQL_BIN_LOG::write_incident(Incident_log_event *ev, bool need_lock_log,
6293                                    bool do_flush_and_sync)
6294 {
6295   uint error= 0;
6296   DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
6297 
6298   if (!is_open())
6299     DBUG_RETURN(error);
6300 
6301   if (need_lock_log)
6302     mysql_mutex_lock(&LOCK_log);
6303   else
6304     mysql_mutex_assert_owner(&LOCK_log);
6305 
6306   // @todo make this work with the group log. /sven
6307 
6308   error= ev->write(&log_file);
6309 
6310   if (do_flush_and_sync)
6311   {
6312     if (!error && !(error= flush_and_sync()))
6313     {
6314       bool check_purge= false;
6315       signal_update();
6316       error= rotate(true, &check_purge);
6317       if (!error && check_purge)
6318         purge();
6319     }
6320   }
6321 
6322   if (need_lock_log)
6323     mysql_mutex_unlock(&LOCK_log);
6324 
6325   DBUG_RETURN(error);
6326 }
6327 
write_dml_directly(THD * thd,const char * stmt,size_t stmt_len,enum_sql_command sql_command)6328 bool MYSQL_BIN_LOG::write_dml_directly(THD* thd, const char *stmt, size_t stmt_len,
6329                                        enum_sql_command sql_command)
6330 {
6331   bool ret= false;
6332   /* backup the original command */
6333   enum_sql_command save_sql_command= thd->lex->sql_command;
6334   thd->lex->sql_command= sql_command;
6335 
6336   if (thd->binlog_query(THD::STMT_QUERY_TYPE, stmt, stmt_len,
6337                         FALSE, FALSE, FALSE, 0) ||
6338       commit(thd, false) != TC_LOG::RESULT_SUCCESS)
6339   {
6340     ret= true;
6341   }
6342 
6343   thd->lex->sql_command= save_sql_command;
6344   return ret;
6345 }
6346 
6347 
6348 /**
6349   Creates an incident event and writes it to the binary log.
6350 
6351   @param thd  Thread variable
6352   @param ev   Incident event to be written
6353   @param lock If the binary lock should be locked or not
6354 
6355   @retval
6356     0    error
6357   @retval
6358     1    success
6359 */
write_incident(THD * thd,bool need_lock_log,bool do_flush_and_sync)6360 bool MYSQL_BIN_LOG::write_incident(THD *thd, bool need_lock_log,
6361                                    bool do_flush_and_sync)
6362 {
6363   DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
6364 
6365   if (!is_open())
6366     DBUG_RETURN(0);
6367 
6368   LEX_STRING const write_error_msg=
6369     { C_STRING_WITH_LEN("error writing to the binary log") };
6370   Incident incident= INCIDENT_LOST_EVENTS;
6371   Incident_log_event ev(thd, incident, write_error_msg);
6372 
6373   DBUG_RETURN(write_incident(&ev, need_lock_log, do_flush_and_sync));
6374 }
6375 
6376 /**
6377   Write a cached log entry to the binary log.
6378 
6379   @param thd            Thread variable
6380   @param cache		The cache to copy to the binlog
6381   @param incident       Defines if an incident event should be created to
6382                         notify that some non-transactional changes did
6383                         not get into the binlog.
6384   @param prepared       Defines if a transaction is part of a 2-PC.
6385 
6386   @note
6387     We only come here if there is something in the cache.
6388   @note
6389     The thing in the cache is always a complete transaction.
6390   @note
6391     'cache' needs to be reinitialized after this functions returns.
6392 */
6393 
write_cache(THD * thd,binlog_cache_data * cache_data)6394 bool MYSQL_BIN_LOG::write_cache(THD *thd, binlog_cache_data *cache_data)
6395 {
6396   DBUG_ENTER("MYSQL_BIN_LOG::write_cache(THD *, binlog_cache_data *, bool)");
6397 
6398   IO_CACHE *cache= &cache_data->cache_log;
6399   bool incident= cache_data->has_incident();
6400 
6401   DBUG_EXECUTE_IF("simulate_binlog_flush_error",
6402                   {
6403                     if (rand() % 3 == 0)
6404                     {
6405                       write_error=1;
6406                       thd->commit_error= THD::CE_FLUSH_ERROR;
6407                       DBUG_RETURN(0);
6408                     }
6409                   };);
6410 
6411   mysql_mutex_assert_owner(&LOCK_log);
6412 
6413   DBUG_ASSERT(is_open());
6414   if (likely(is_open()))                       // Should always be true
6415   {
6416     /*
6417       We only bother to write to the binary log if there is anything
6418       to write.
6419      */
6420     if (my_b_tell(cache) > 0)
6421     {
6422       DBUG_EXECUTE_IF("crash_before_writing_xid",
6423                       {
6424                         if ((write_error= do_write_cache(thd, cache)))
6425                           DBUG_PRINT("info", ("error writing binlog cache: %d",
6426                                                write_error));
6427                         flush_and_sync(true);
6428                         DBUG_PRINT("info", ("crashing before writing xid"));
6429                         DBUG_SUICIDE();
6430                       });
6431 
6432       if ((write_error= do_write_cache(thd, cache)))
6433         goto err;
6434 
6435       if (incident && write_incident(thd, false/*need_lock_log=false*/,
6436                                      false/*do_flush_and_sync==false*/))
6437         goto err;
6438 
6439       DBUG_EXECUTE_IF("half_binlogged_transaction", DBUG_SUICIDE(););
6440       if (cache->error)				// Error on read
6441       {
6442         char errbuf[MYSYS_STRERROR_SIZE];
6443         sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name,
6444                         errno, my_strerror(errbuf, sizeof(errbuf), errno));
6445         write_error=1;				// Don't give more errors
6446         goto err;
6447       }
6448 
6449       global_sid_lock->rdlock();
6450       if (gtid_state->update_on_flush(thd) != RETURN_STATUS_OK)
6451       {
6452         global_sid_lock->unlock();
6453         goto err;
6454       }
6455       global_sid_lock->unlock();
6456     }
6457     update_thd_next_event_pos(thd);
6458   }
6459 
6460   DBUG_RETURN(0);
6461 
6462 err:
6463   if (!write_error)
6464   {
6465     char errbuf[MYSYS_STRERROR_SIZE];
6466     write_error= 1;
6467     sql_print_error(ER(ER_ERROR_ON_WRITE), name,
6468                     errno, my_strerror(errbuf, sizeof(errbuf), errno));
6469   }
6470 
6471   /*
6472     If the flush has failed due to ENOSPC, set the flush_error flag.
6473   */
6474   if (cache->error && thd->is_error() && my_errno == ENOSPC)
6475   {
6476     cache_data->set_flush_error(thd);
6477   }
6478   thd->commit_error= THD::CE_FLUSH_ERROR;
6479 
6480   DBUG_RETURN(1);
6481 }
6482 
6483 
6484 /**
6485   Wait until we get a signal that the relay log has been updated.
6486 
6487   @param[in] thd        Thread variable
6488   @param[in] timeout    a pointer to a timespec;
6489                         NULL means to wait w/o timeout.
6490 
6491   @retval    0          if got signalled on update
6492   @retval    non-0      if wait timeout elapsed
6493 
6494   @note
6495     One must have a lock on LOCK_log before calling this function.
6496 */
6497 
wait_for_update_relay_log(THD * thd,const struct timespec * timeout)6498 int MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd, const struct timespec *timeout)
6499 {
6500   int ret= 0;
6501   PSI_stage_info old_stage;
6502   DBUG_ENTER("wait_for_update_relay_log");
6503 
6504   thd->ENTER_COND(&update_cond, &LOCK_log,
6505                   &stage_slave_has_read_all_relay_log,
6506                   &old_stage);
6507 
6508   if (!timeout)
6509     mysql_cond_wait(&update_cond, &LOCK_log);
6510   else
6511     ret= mysql_cond_timedwait(&update_cond, &LOCK_log,
6512                               const_cast<struct timespec *>(timeout));
6513   thd->EXIT_COND(&old_stage);
6514 
6515   DBUG_RETURN(ret);
6516 }
6517 
6518 /**
6519   Wait until we get a signal that the binary log has been updated.
6520   Applies to master only.
6521 
6522   NOTES
6523   @param[in] thd        a THD struct
6524   @param[in] timeout    a pointer to a timespec;
6525                         NULL means to wait w/o timeout.
6526   @retval    0          if got signalled on update
6527   @retval    non-0      if wait timeout elapsed
6528   @note
6529     LOCK_log must be taken before calling this function.
6530     LOCK_log is being released while the thread is waiting.
6531     LOCK_log is released by the caller.
6532 */
6533 
wait_for_update_bin_log(THD * thd,const struct timespec * timeout)6534 int MYSQL_BIN_LOG::wait_for_update_bin_log(THD* thd,
6535                                            const struct timespec *timeout)
6536 {
6537   int ret= 0;
6538   DBUG_ENTER("wait_for_update_bin_log");
6539 
6540   if (!timeout)
6541     mysql_cond_wait(&update_cond, &LOCK_log);
6542   else
6543     ret= mysql_cond_timedwait(&update_cond, &LOCK_log,
6544                               const_cast<struct timespec *>(timeout));
6545   DBUG_RETURN(ret);
6546 }
6547 
6548 
6549 /**
6550   Close the log file.
6551 
6552   @param exiting     Bitmask for one or more of the following bits:
6553           - LOG_CLOSE_INDEX : if we should close the index file
6554           - LOG_CLOSE_TO_BE_OPENED : if we intend to call open
6555                                      at once after close.
6556           - LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
6557 
6558   @param need_lock_log If true, this function acquires LOCK_log;
6559   otherwise the caller should already have acquired it.
6560 
6561   @param need_lock_index If true, this function acquires LOCK_index;
6562   otherwise the caller should already have acquired it.
6563 
6564   @note
6565     One can do an open on the object at once after doing a close.
6566     The internal structures are not freed until cleanup() is called
6567 */
6568 
close(uint exiting,bool need_lock_log,bool need_lock_index)6569 void MYSQL_BIN_LOG::close(uint exiting, bool need_lock_log,
6570                           bool need_lock_index)
6571 {					// One can't set log_type here!
6572   DBUG_ENTER("MYSQL_BIN_LOG::close");
6573   DBUG_PRINT("enter",("exiting: %d", (int) exiting));
6574 
6575   if (need_lock_log)
6576     mysql_mutex_lock(&LOCK_log);
6577   else
6578     mysql_mutex_assert_owner(&LOCK_log);
6579 
6580   if (log_state == LOG_OPENED)
6581   {
6582 #ifdef HAVE_REPLICATION
6583     if ((exiting & LOG_CLOSE_STOP_EVENT) != 0)
6584     {
6585       Stop_log_event s;
6586       // the checksumming rule for relay-log case is similar to Rotate
6587         s.checksum_alg= is_relay_log ?
6588           relay_log_checksum_alg : binlog_checksum_options;
6589       DBUG_ASSERT(!is_relay_log ||
6590                   relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
6591       s.write(&log_file);
6592       bytes_written+= s.data_written;
6593       signal_update();
6594     }
6595 #endif /* HAVE_REPLICATION */
6596 
6597     /* don't pwrite in a file opened with O_APPEND - it doesn't work */
6598     if (log_file.type == WRITE_CACHE)
6599     {
6600       my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
6601       my_off_t org_position= mysql_file_tell(log_file.file, MYF(0));
6602       uchar flags= 0;            // clearing LOG_EVENT_BINLOG_IN_USE_F
6603       mysql_file_pwrite(log_file.file, &flags, 1, offset, MYF(0));
6604       /*
6605         Restore position so that anything we have in the IO_cache is written
6606         to the correct position.
6607         We need the seek here, as mysql_file_pwrite() is not guaranteed to keep the
6608         original position on system that doesn't support pwrite().
6609       */
6610       mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
6611     }
6612 
6613     /* this will cleanup IO_CACHE, sync and close the file */
6614     MYSQL_LOG::close(exiting);
6615   }
6616 
6617   /*
6618     The following test is needed even if is_open() is not set, as we may have
6619     called a not complete close earlier and the index file is still open.
6620   */
6621 
6622   if (need_lock_index)
6623     mysql_mutex_lock(&LOCK_index);
6624   else
6625     mysql_mutex_assert_owner(&LOCK_index);
6626 
6627   if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
6628   {
6629     end_io_cache(&index_file);
6630     if (mysql_file_close(index_file.file, MYF(0)) < 0 && ! write_error)
6631     {
6632       char errbuf[MYSYS_STRERROR_SIZE];
6633       write_error= 1;
6634       sql_print_error(ER(ER_ERROR_ON_WRITE), index_file_name,
6635                       errno, my_strerror(errbuf, sizeof(errbuf), errno));
6636     }
6637   }
6638 
6639   if (need_lock_index)
6640     mysql_mutex_unlock(&LOCK_index);
6641 
6642   log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
6643   my_free(name);
6644   name= NULL;
6645 
6646   if (need_lock_log)
6647     mysql_mutex_unlock(&LOCK_log);
6648 
6649   DBUG_VOID_RETURN;
6650 }
6651 
harvest_bytes_written(Relay_log_info * rli,bool need_log_space_lock)6652 void MYSQL_BIN_LOG::harvest_bytes_written(Relay_log_info* rli, bool need_log_space_lock)
6653 {
6654 #ifndef DBUG_OFF
6655   char buf1[22],buf2[22];
6656 #endif
6657   DBUG_ENTER("harvest_bytes_written");
6658   if (need_log_space_lock)
6659     mysql_mutex_lock(&rli->log_space_lock);
6660   else
6661     mysql_mutex_assert_owner(&rli->log_space_lock);
6662   rli->log_space_total+= bytes_written;
6663   DBUG_PRINT("info",("relay_log_space: %s  bytes_written: %s",
6664         llstr(rli->log_space_total,buf1), llstr(bytes_written,buf2)));
6665   bytes_written=0;
6666   if (need_log_space_lock)
6667     mysql_mutex_unlock(&rli->log_space_lock);
6668   DBUG_VOID_RETURN;
6669 }
6670 
set_max_size(ulong max_size_arg)6671 void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg)
6672 {
6673   /*
6674     We need to take locks, otherwise this may happen:
6675     new_file() is called, calls open(old_max_size), then before open() starts,
6676     set_max_size() sets max_size to max_size_arg, then open() starts and
6677     uses the old_max_size argument, so max_size_arg has been overwritten and
6678     it's like if the SET command was never run.
6679   */
6680   DBUG_ENTER("MYSQL_BIN_LOG::set_max_size");
6681   mysql_mutex_lock(&LOCK_log);
6682   if (is_open())
6683     max_size= max_size_arg;
6684   mysql_mutex_unlock(&LOCK_log);
6685   DBUG_VOID_RETURN;
6686 }
6687 
6688 
signal_update()6689 void MYSQL_BIN_LOG::signal_update()
6690 {
6691   DBUG_ENTER("MYSQL_BIN_LOG::signal_update");
6692   signal_cnt++;
6693   mysql_cond_broadcast(&update_cond);
6694   DBUG_VOID_RETURN;
6695 }
6696 
6697 /****** transaction coordinator log for 2pc - binlog() based solution ******/
6698 
6699 /**
6700   @todo
6701   keep in-memory list of prepared transactions
6702   (add to list in log(), remove on unlog())
6703   and copy it to the new binlog if rotated
6704   but let's check the behaviour of tc_log_page_waits first!
6705 */
6706 
open_binlog(const char * opt_name)6707 int MYSQL_BIN_LOG::open_binlog(const char *opt_name)
6708 {
6709   LOG_INFO log_info;
6710   int      error= 1;
6711 
6712   /*
6713     This function is used for 2pc transaction coordination.  Hence, it
6714     is never used for relay logs.
6715   */
6716   DBUG_ASSERT(!is_relay_log);
6717   DBUG_ASSERT(total_ha_2pc > 1 || (1 == total_ha_2pc && opt_bin_log));
6718   DBUG_ASSERT(opt_name && opt_name[0]);
6719 
6720   if (!my_b_inited(&index_file))
6721   {
6722     /* There was a failure to open the index file, can't open the binlog */
6723     cleanup();
6724     return 1;
6725   }
6726 
6727   if (using_heuristic_recover())
6728   {
6729     /* generate a new binlog to mask a corrupted one */
6730     open_binlog(opt_name, 0, WRITE_CACHE, max_binlog_size, false,
6731                 true/*need_lock_log=true*/,
6732                 true/*need_lock_index=true*/,
6733                 true/*need_sid_lock=true*/,
6734                 NULL);
6735     cleanup();
6736     return 1;
6737   }
6738 
6739   if ((error= find_log_pos(&log_info, NullS, true/*need_lock_index=true*/)))
6740   {
6741     if (error != LOG_INFO_EOF)
6742       sql_print_error("find_log_pos() failed (error: %d)", error);
6743     else
6744       error= 0;
6745     goto err;
6746   }
6747 
6748   {
6749     const char *errmsg;
6750     IO_CACHE    log;
6751     File        file;
6752     Log_event  *ev=0;
6753     Format_description_log_event fdle(BINLOG_VERSION);
6754     char        log_name[FN_REFLEN];
6755     my_off_t    valid_pos= 0;
6756     my_off_t    binlog_size;
6757     MY_STAT     s;
6758 
6759     if (! fdle.is_valid())
6760       goto err;
6761 
6762     do
6763     {
6764       strmake(log_name, log_info.log_file_name, sizeof(log_name)-1);
6765     } while (!(error= find_next_log(&log_info, true/*need_lock_index=true*/)));
6766 
6767     if (error !=  LOG_INFO_EOF)
6768     {
6769       sql_print_error("find_log_pos() failed (error: %d)", error);
6770       goto err;
6771     }
6772 
6773     if ((file= open_binlog_file(&log, log_name, &errmsg)) < 0)
6774     {
6775       sql_print_error("%s", errmsg);
6776       goto err;
6777     }
6778 
6779     my_stat(log_name, &s, MYF(0));
6780     binlog_size= s.st_size;
6781 
6782     if ((ev= Log_event::read_log_event(&log, 0, &fdle,
6783                                        opt_master_verify_checksum)) &&
6784         ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
6785         ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
6786     {
6787       sql_print_information("Recovering after a crash using %s", opt_name);
6788       valid_pos= my_b_tell(&log);
6789       error= recover(&log, (Format_description_log_event *)ev, &valid_pos);
6790     }
6791     else
6792       error=0;
6793 
6794     delete ev;
6795     end_io_cache(&log);
6796     mysql_file_close(file, MYF(MY_WME));
6797 
6798     if (error)
6799       goto err;
6800 
6801     /* Trim the crashed binlog file to last valid transaction
6802       or event (non-transaction) base on valid_pos. */
6803     if (valid_pos > 0)
6804     {
6805       if ((file= mysql_file_open(key_file_binlog, log_name,
6806                                  O_RDWR | O_BINARY, MYF(MY_WME))) < 0)
6807       {
6808         sql_print_error("Failed to open the crashed binlog file "
6809                         "when master server is recovering it.");
6810         return -1;
6811       }
6812 
6813       /* Change binlog file size to valid_pos */
6814       if (valid_pos < binlog_size)
6815       {
6816         if (my_chsize(file, valid_pos, 0, MYF(MY_WME)))
6817         {
6818           sql_print_error("Failed to trim the crashed binlog file "
6819                           "when master server is recovering it.");
6820           mysql_file_close(file, MYF(MY_WME));
6821           return -1;
6822         }
6823         else
6824         {
6825           sql_print_information("Crashed binlog file %s size is %llu, "
6826                                 "but recovered up to %llu. Binlog trimmed to %llu bytes.",
6827                                 log_name, binlog_size, valid_pos, valid_pos);
6828         }
6829       }
6830 
6831       /* Clear LOG_EVENT_BINLOG_IN_USE_F */
6832       my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
6833       uchar flags= 0;
6834       if (mysql_file_pwrite(file, &flags, 1, offset, MYF(0)) != 1)
6835       {
6836         sql_print_error("Failed to clear LOG_EVENT_BINLOG_IN_USE_F "
6837                         "for the crashed binlog file when master "
6838                         "server is recovering it.");
6839         mysql_file_close(file, MYF(MY_WME));
6840         return -1;
6841       }
6842 
6843       mysql_file_close(file, MYF(MY_WME));
6844     } //end if
6845   }
6846 
6847 err:
6848   return error;
6849 }
6850 
6851 /** This is called on shutdown, after ha_panic. */
close()6852 void MYSQL_BIN_LOG::close()
6853 {
6854 }
6855 
6856 /*
6857   Prepare the transaction in the transaction coordinator.
6858 
6859   This function will prepare the transaction in the storage engines
6860   (by calling @c ha_prepare_low) what will write a prepare record
6861   to the log buffers.
6862 
6863   @retval 0    success
6864   @retval 1    error
6865 */
prepare(THD * thd,bool all)6866 int MYSQL_BIN_LOG::prepare(THD *thd, bool all)
6867 {
6868   DBUG_ENTER("MYSQL_BIN_LOG::prepare");
6869 
6870   int error= ha_prepare_low(thd, all);
6871 
6872   DBUG_RETURN(error);
6873 }
6874 
6875 /**
6876   Commit the transaction in the transaction coordinator.
6877 
6878   This function will commit the sessions transaction in the binary log
6879   and in the storage engines (by calling @c ha_commit_low). If the
6880   transaction was successfully logged (or not successfully unlogged)
6881   but the commit in the engines did not succed, there is a risk of
6882   inconsistency between the engines and the binary log.
6883 
6884   For binary log group commit, the commit is separated into three
6885   parts:
6886 
6887   1. First part consists of filling the necessary caches and
6888      finalizing them (if they need to be finalized). After this,
6889      nothing is added to any of the caches.
6890 
6891   2. Second part execute an ordered flush and commit. This will be
6892      done using the group commit functionality in ordered_commit.
6893 
6894   3. Third part checks any errors resulting from the ordered commit
6895      and handles them appropriately.
6896 
6897   @retval 0    success
6898   @retval 1    error, transaction was neither logged nor committed
6899   @retval 2    error, transaction was logged but not committed
6900 */
commit(THD * thd,bool all)6901 TC_LOG::enum_result MYSQL_BIN_LOG::commit(THD *thd, bool all)
6902 {
6903   DBUG_ENTER("MYSQL_BIN_LOG::commit");
6904 
6905   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
6906   my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
6907   int error= RESULT_SUCCESS;
6908   bool stuff_logged= false;
6909   bool binlog_prot_acquired= false;
6910 
6911   DBUG_PRINT("enter", ("thd: 0x%llx, all: %s, xid: %llu, cache_mngr: 0x%llx",
6912                        (ulonglong) thd, YESNO(all), (ulonglong) xid,
6913                        (ulonglong) cache_mngr));
6914 
6915   /*
6916     No cache manager means nothing to log, but we still have to commit
6917     the transaction.
6918    */
6919   if (cache_mngr == NULL)
6920   {
6921     if (ha_commit_low(thd, all))
6922       DBUG_RETURN(RESULT_ABORTED);
6923     DBUG_RETURN(RESULT_SUCCESS);
6924   }
6925 
6926   /*
6927     Reset binlog_snapshot_% variables for the current connection so that the
6928     current coordinates are shown after committing a consistent snapshot
6929     transaction.
6930   */
6931   if (all)
6932   {
6933     mysql_mutex_lock(&cache_mngr->binlog_info.lock);
6934     cache_mngr->binlog_info.log_file_name[0]= '\0';
6935     mysql_mutex_unlock(&cache_mngr->binlog_info.lock);
6936   }
6937 
6938   THD_TRANS *trans= all ? &thd->transaction.all : &thd->transaction.stmt;
6939 
6940   DBUG_PRINT("debug", ("in_transaction: %s, no_2pc: %s, rw_ha_count: %d",
6941                        YESNO(thd->in_multi_stmt_transaction_mode()),
6942                        YESNO(trans->no_2pc),
6943                        trans->rw_ha_count));
6944   DBUG_PRINT("debug",
6945              ("all.cannot_safely_rollback(): %s, trx_cache_empty: %s",
6946               YESNO(thd->transaction.all.cannot_safely_rollback()),
6947               YESNO(cache_mngr->trx_cache.is_binlog_empty())));
6948   DBUG_PRINT("debug",
6949              ("stmt.cannot_safely_rollback(): %s, stmt_cache_empty: %s",
6950               YESNO(thd->transaction.stmt.cannot_safely_rollback()),
6951               YESNO(cache_mngr->stmt_cache.is_binlog_empty())));
6952 
6953 
6954   /*
6955     If there are no handlertons registered, there is nothing to
6956     commit. Note that DDLs are written earlier in this case (inside
6957     binlog_query).
6958 
6959     TODO: This can be a problem in those cases that there are no
6960     handlertons registered. DDLs are one example, but the other case
6961     is MyISAM. In this case, we could register a dummy handlerton to
6962     trigger the commit.
6963 
6964     Any statement that requires logging will call binlog_query before
6965     trans_commit_stmt, so an alternative is to use the condition
6966     "binlog_query called or stmt.ha_list != 0".
6967    */
6968   if (!all && trans->ha_list == 0 &&
6969       cache_mngr->stmt_cache.is_binlog_empty())
6970     DBUG_RETURN(RESULT_SUCCESS);
6971 
6972   /*
6973     If there is anything in the stmt cache, and GTIDs are enabled,
6974     then this is a single statement outside a transaction and it is
6975     impossible that there is anything in the trx cache.  Hence, we
6976     write any empty group(s) to the stmt cache.
6977 
6978     Otherwise, we write any empty group(s) to the trx cache at the end
6979     of the transaction.
6980   */
6981   if (!cache_mngr->stmt_cache.is_binlog_empty())
6982   {
6983     error= write_empty_groups_to_cache(thd, &cache_mngr->stmt_cache);
6984     if (error == 0)
6985     {
6986       if (cache_mngr->stmt_cache.finalize(thd))
6987         DBUG_RETURN(RESULT_ABORTED);
6988       stuff_logged= true;
6989     }
6990   }
6991 
6992   /*
6993     We commit the transaction if:
6994      - We are not in a transaction and committing a statement, or
6995      - We are in a transaction and a full transaction is committed.
6996     Otherwise, we accumulate the changes.
6997   */
6998   if (!error && !cache_mngr->trx_cache.is_binlog_empty() &&
6999       ending_trans(thd, all))
7000   {
7001     const bool real_trans= (all || thd->transaction.all.ha_list == 0);
7002     /*
7003       We are committing an XA transaction if it is a "real" transaction
7004       and have an XID assigned (because some handlerton registered). A
7005       transaction is "real" if either 'all' is true or the 'all.ha_list'
7006       is empty.
7007 
7008       Note: This is kind of strange since registering the binlog
7009       handlerton will then make the transaction XA, which is not really
7010       true. This occurs for example if a MyISAM statement is executed
7011       with row-based replication on.
7012    */
7013     if (real_trans && xid && trans->rw_ha_count > 1 && !trans->no_2pc)
7014     {
7015       Xid_log_event end_evt(thd, xid);
7016       if (cache_mngr->trx_cache.finalize(thd, &end_evt))
7017         DBUG_RETURN(RESULT_ABORTED);
7018     }
7019     else
7020     {
7021       Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
7022                               true, FALSE, TRUE, 0, TRUE);
7023       if (cache_mngr->trx_cache.finalize(thd, &end_evt))
7024         DBUG_RETURN(RESULT_ABORTED);
7025     }
7026     stuff_logged= true;
7027   }
7028 
7029   /*
7030     This is part of the stmt rollback.
7031   */
7032   if (!all)
7033     cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
7034 
7035   DBUG_PRINT("debug", ("error: %d", error));
7036 
7037   if (error)
7038     DBUG_RETURN(RESULT_ABORTED);
7039 
7040   /*
7041     Now all the events are written to the caches, so we will commit
7042     the transaction in the engines. This is done using the group
7043     commit logic in ordered_commit, which will return when the
7044     transaction is committed.
7045 
7046     If the commit in the engines fail, we still have something logged
7047     to the binary log so we have to report this as a "bad" failure
7048     (failed to commit, but logged something).
7049   */
7050   if (stuff_logged)
7051   {
7052     int rc;
7053 
7054     /*
7055        Block binlog updates if there's an active BINLOG lock.
7056 
7057        We allow binlog lock owner to commit, assuming it knows what it does. We
7058        also check if protection has not been acquired earlier, which is possible
7059        in slave threads to protect master binlog coordinates.
7060     */
7061     if (!thd->backup_binlog_lock.is_acquired() &&
7062         !thd->backup_binlog_lock.is_protection_acquired())
7063     {
7064       const ulong timeout= thd->variables.lock_wait_timeout;
7065 
7066       DBUG_PRINT("debug", ("Acquiring binlog protection lock"));
7067       if (thd->backup_binlog_lock.acquire_protection(thd, MDL_EXPLICIT,
7068                                                       timeout))
7069       {
7070         cache_mngr->stmt_cache.reset();
7071         cache_mngr->trx_cache.reset();
7072 
7073         DBUG_RETURN(RESULT_ABORTED);
7074       }
7075 
7076       binlog_prot_acquired= true;
7077     }
7078 
7079     rc= ordered_commit(thd, all);
7080 
7081     if (binlog_prot_acquired)
7082     {
7083       DBUG_PRINT("debug", ("Releasing binlog protection lock"));
7084       thd->backup_binlog_lock.release_protection(thd);
7085     }
7086 
7087     if (rc)
7088       DBUG_RETURN(RESULT_INCONSISTENT);
7089   }
7090   else
7091   {
7092     if (ha_commit_low(thd, all))
7093       DBUG_RETURN(RESULT_INCONSISTENT);
7094   }
7095 
7096   DBUG_RETURN(error ? RESULT_INCONSISTENT : RESULT_SUCCESS);
7097 }
7098 
7099 
7100 /**
7101    Flush caches for session.
7102 
7103    @note @c set_trans_pos is called with a pointer to the file name
7104    that the binary log currently use and a rotation will change the
7105    contents of the variable.
7106 
7107    The position is used when calling the after_flush, after_commit,
7108    and after_rollback hooks, but these have been placed so that they
7109    occur before a rotation is executed.
7110 
7111    It is the responsibility of any plugin that use this position to
7112    copy it if they need it after the hook has returned.
7113  */
7114 std::pair<int,my_off_t>
flush_thread_caches(THD * thd)7115 MYSQL_BIN_LOG::flush_thread_caches(THD *thd)
7116 {
7117   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
7118   my_off_t bytes= 0;
7119   bool wrote_xid= false;
7120   int error= cache_mngr->flush(thd, &bytes, &wrote_xid);
7121   if (!error && bytes > 0)
7122   {
7123     /*
7124       Note that set_trans_pos does not copy the file name. See
7125       this function documentation for more info.
7126     */
7127     thd->set_trans_pos(log_file_name, my_b_tell(&log_file));
7128     if (wrote_xid)
7129       inc_prep_xids(thd);
7130   }
7131   DBUG_PRINT("debug", ("bytes: %llu", bytes));
7132   return std::make_pair(error, bytes);
7133 }
7134 
7135 
7136 /**
7137   Execute the flush stage.
7138 
7139   @param total_bytes_var Pointer to variable that will be set to total
7140   number of bytes flushed, or NULL.
7141 
7142   @param rotate_var Pointer to variable that will be set to true if
7143   binlog rotation should be performed after releasing locks. If rotate
7144   is not necessary, the variable will not be touched.
7145 
7146   @return Error code on error, zero on success
7147  */
7148 
7149 int
process_flush_stage_queue(my_off_t * total_bytes_var,bool * rotate_var,THD ** out_queue_var)7150 MYSQL_BIN_LOG::process_flush_stage_queue(my_off_t *total_bytes_var,
7151                                          bool *rotate_var,
7152                                          THD **out_queue_var)
7153 {
7154   DBUG_ASSERT(total_bytes_var && rotate_var && out_queue_var);
7155   my_off_t total_bytes= 0;
7156   int flush_error= 1;
7157   mysql_mutex_assert_owner(&LOCK_log);
7158 
7159   my_atomic_rwlock_rdlock(&opt_binlog_max_flush_queue_time_lock);
7160   const ulonglong max_udelay= my_atomic_load32(&opt_binlog_max_flush_queue_time);
7161   my_atomic_rwlock_rdunlock(&opt_binlog_max_flush_queue_time_lock);
7162   const ulonglong start_utime= max_udelay > 0 ? my_micro_time() : 0;
7163 
7164   /*
7165     First we read the queue until it either is empty or the difference
7166     between the time we started and the current time is too large.
7167 
7168     We remember the first thread we unqueued, because this will be the
7169     beginning of the out queue.
7170    */
7171   bool has_more= true;
7172   THD *first_seen= NULL;
7173   while ((max_udelay == 0 || my_micro_time() < start_utime + max_udelay) && has_more)
7174   {
7175     std::pair<bool,THD*> current= stage_manager.pop_front(Stage_manager::FLUSH_STAGE);
7176     std::pair<int,my_off_t> result= flush_thread_caches(current.second);
7177     has_more= current.first;
7178     total_bytes+= result.second;
7179     if (flush_error == 1)
7180       flush_error= result.first;
7181     if (first_seen == NULL)
7182       first_seen= current.second;
7183   }
7184 
7185   /*
7186     Either the queue is empty, or we ran out of time. If we ran out of
7187     time, we have to fetch the entire queue (and flush it) since
7188     otherwise the next batch will not have a leader.
7189    */
7190   if (has_more)
7191   {
7192     THD *queue= stage_manager.fetch_queue_for(Stage_manager::FLUSH_STAGE);
7193     for (THD *head= queue ; head ; head = head->next_to_commit)
7194     {
7195       std::pair<int,my_off_t> result= flush_thread_caches(head);
7196       total_bytes+= result.second;
7197       if (flush_error == 1)
7198         flush_error= result.first;
7199     }
7200     if (first_seen == NULL)
7201       first_seen= queue;
7202   }
7203 
7204   *out_queue_var= first_seen;
7205   *total_bytes_var= total_bytes;
7206   if (total_bytes > 0 && my_b_tell(&log_file) >= (my_off_t) max_size)
7207     *rotate_var= true;
7208   return flush_error;
7209 }
7210 
7211 
7212 /**
7213   Commit a sequence of sessions.
7214 
7215   This function commit an entire queue of sessions starting with the
7216   session in @c first. If there were an error in the flushing part of
7217   the ordered commit, the error code is passed in and all the threads
7218   are marked accordingly (but not committed).
7219 
7220   @see MYSQL_BIN_LOG::ordered_commit
7221 
7222   @param thd The "master" thread
7223   @param first First thread in the queue of threads to commit
7224  */
7225 
7226 void
process_commit_stage_queue(THD * thd,THD * first)7227 MYSQL_BIN_LOG::process_commit_stage_queue(THD *thd, THD *first)
7228 {
7229   mysql_mutex_assert_owner(&LOCK_commit);
7230   Thread_excursion excursion(thd);
7231 #ifndef DBUG_OFF
7232   thd->transaction.flags.ready_preempt= 1; // formality by the leader
7233 #endif
7234   for (THD *head= first ; head ; head = head->next_to_commit)
7235   {
7236     DBUG_PRINT("debug", ("Thread ID: %lu, commit_error: %d, flags.pending: %s",
7237                          head->thread_id, head->commit_error,
7238                          YESNO(head->transaction.flags.pending)));
7239     /*
7240       If flushing failed, set commit_error for the session, skip the
7241       transaction and proceed with the next transaction instead. This
7242       will mark all threads as failed, since the flush failed.
7243 
7244       If flush succeeded, attach to the session and commit it in the
7245       engines.
7246     */
7247 #ifndef DBUG_OFF
7248     stage_manager.clear_preempt_status(head);
7249 #endif
7250     /*
7251       Flush/Sync error should be ignored and continue
7252       to commit phase. And thd->commit_error cannot be
7253       COMMIT_ERROR at this moment.
7254     */
7255     DBUG_ASSERT(head->commit_error != THD::CE_COMMIT_ERROR);
7256     excursion.try_to_attach_to(head);
7257     bool all= head->transaction.flags.real_commit;
7258     if (head->transaction.flags.commit_low)
7259     {
7260       /* head is parked to have exited append() */
7261       DBUG_ASSERT(head->transaction.flags.ready_preempt);
7262       /*
7263         storage engine commit
7264       */
7265       if (ha_commit_low(head, all, false))
7266         head->commit_error= THD::CE_COMMIT_ERROR;
7267     }
7268     DBUG_PRINT("debug", ("commit_error: %d, flags.pending: %s",
7269                          head->commit_error,
7270                          YESNO(head->transaction.flags.pending)));
7271     /*
7272       Decrement the prepared XID counter after storage engine commit.
7273       We also need decrement the prepared XID when encountering a
7274       flush error or session attach error for avoiding 3-way deadlock
7275       among user thread, rotate thread and dump thread.
7276     */
7277     if (head->transaction.flags.xid_written)
7278       dec_prep_xids(head);
7279   }
7280 }
7281 
7282 /**
7283   Process after commit for a sequence of sessions.
7284 
7285   @param thd The "master" thread
7286   @param first First thread in the queue of threads to commit
7287  */
7288 
7289 void
process_after_commit_stage_queue(THD * thd,THD * first)7290 MYSQL_BIN_LOG::process_after_commit_stage_queue(THD *thd, THD *first)
7291 {
7292   Thread_excursion excursion(thd);
7293   for (THD *head= first; head; head= head->next_to_commit)
7294   {
7295     if (head->transaction.flags.run_hooks &&
7296         head->commit_error != THD::CE_COMMIT_ERROR)
7297     {
7298 
7299       /*
7300         TODO: This hook here should probably move outside/below this
7301               if and be the only after_commit invocation left in the
7302               code.
7303       */
7304       excursion.try_to_attach_to(head);
7305       bool all= head->transaction.flags.real_commit;
7306       (void) RUN_HOOK(transaction, after_commit, (head, all));
7307       /*
7308         When after_commit finished for the transaction, clear the run_hooks flag.
7309         This allow other parts of the system to check if after_commit was called.
7310       */
7311       head->transaction.flags.run_hooks= false;
7312     }
7313   }
7314 }
7315 
7316 #ifndef DBUG_OFF
7317 /** Names for the stages. */
7318 static const char* g_stage_name[] = {
7319   "FLUSH",
7320   "SYNC",
7321   "COMMIT",
7322 };
7323 #endif
7324 
7325 
7326 /**
7327   Enter a stage of the ordered commit procedure.
7328 
7329   Entering is stage is done by:
7330 
7331   - Atomically enqueueing a queue of processes (which is just one for
7332     the first phase).
7333 
7334   - If the queue was empty, the thread is the leader for that stage
7335     and it should process the entire queue for that stage.
7336 
7337   - If the queue was not empty, the thread is a follower and can go
7338     waiting for the commit to finish.
7339 
7340   The function will lock the stage mutex if it was designated the
7341   leader for the phase.
7342 
7343   @param thd    Session structure
7344   @param stage  The stage to enter
7345   @param queue  Queue of threads to enqueue for the stage
7346   @param stage_mutex Mutex for the stage
7347 
7348   @retval true  The thread should "bail out" and go waiting for the
7349                 commit to finish
7350   @retval false The thread is the leader for the stage and should do
7351                 the processing.
7352 */
7353 
7354 bool
change_stage(THD * thd,Stage_manager::StageID stage,THD * queue,mysql_mutex_t * leave_mutex,mysql_mutex_t * enter_mutex)7355 MYSQL_BIN_LOG::change_stage(THD *thd,
7356                             Stage_manager::StageID stage, THD *queue,
7357                             mysql_mutex_t *leave_mutex,
7358                             mysql_mutex_t *enter_mutex)
7359 {
7360   DBUG_ENTER("MYSQL_BIN_LOG::change_stage");
7361   DBUG_PRINT("enter", ("thd: 0x%llx, stage: %s, queue: 0x%llx",
7362                        (ulonglong) thd, g_stage_name[stage], (ulonglong) queue));
7363   DBUG_ASSERT(0 <= stage && stage < Stage_manager::STAGE_COUNTER);
7364   DBUG_ASSERT(enter_mutex);
7365   DBUG_ASSERT(queue);
7366   /*
7367     enroll_for will release the leave_mutex once the sessions are
7368     queued.
7369   */
7370   if (!stage_manager.enroll_for(stage, queue, leave_mutex))
7371   {
7372     DBUG_ASSERT(!thd_get_cache_mngr(thd)->dbug_any_finalized());
7373     DBUG_RETURN(true);
7374   }
7375   mysql_mutex_lock(enter_mutex);
7376   DBUG_RETURN(false);
7377 }
7378 
7379 
7380 
7381 /**
7382   Flush the I/O cache to file.
7383 
7384   Flush the binary log to the binlog file if any byte where written
7385   and signal that the binary log file has been updated if the flush
7386   succeeds.
7387 */
7388 
7389 int
flush_cache_to_file(my_off_t * end_pos_var)7390 MYSQL_BIN_LOG::flush_cache_to_file(my_off_t *end_pos_var)
7391 {
7392   if (flush_io_cache(&log_file))
7393   {
7394     THD *thd= current_thd;
7395     thd->commit_error= THD::CE_FLUSH_ERROR;
7396     return ER_ERROR_ON_WRITE;
7397   }
7398   *end_pos_var= my_b_tell(&log_file);
7399   return 0;
7400 }
7401 
7402 
7403 /**
7404   Call fsync() to sync the file to disk.
7405 */
7406 std::pair<bool, bool>
sync_binlog_file(bool force)7407 MYSQL_BIN_LOG::sync_binlog_file(bool force)
7408 {
7409   bool synced= false;
7410   unsigned int sync_period= get_sync_period();
7411   if (force || (sync_period && ++sync_counter >= sync_period))
7412   {
7413     sync_counter= 0;
7414 
7415     /**
7416       On *pure non-transactional* workloads there is a small window
7417       in time where a concurrent rotate might be able to close
7418       the file before the sync is actually done. In that case,
7419       ignore the bad file descriptor errors.
7420 
7421       Transactional workloads (InnoDB) are not affected since the
7422       the rotation will not happen until all transactions have
7423       committed to the storage engine, thence decreased the XID
7424       counters.
7425 
7426       TODO: fix this properly even for non-transactional storage
7427             engines.
7428      */
7429     if (DBUG_EVALUATE_IF("simulate_error_during_sync_binlog_file", 1,
7430                          mysql_file_sync(log_file.file,
7431                                          MYF(MY_WME | MY_IGNORE_BADFD))))
7432     {
7433       THD *thd= current_thd;
7434       thd->commit_error= THD::CE_SYNC_ERROR;
7435       return std::make_pair(true, synced);
7436     }
7437     synced= true;
7438   }
7439   return std::make_pair(false, synced);
7440 }
7441 
7442 
7443 /**
7444    Helper function executed when leaving @c ordered_commit.
7445 
7446    This function contain the necessary code for fetching the error
7447    code, doing post-commit checks, and wrapping up the commit if
7448    necessary.
7449 
7450    It is typically called when enter_stage indicates that the thread
7451    should bail out, and also when the ultimate leader thread finishes
7452    executing @c ordered_commit.
7453 
7454    It is typically used in this manner:
7455    @code
7456    if (enter_stage(thd, Thread_queue::FLUSH_STAGE, thd, &LOCK_log))
7457      return finish_commit(thd);
7458    @endcode
7459 
7460    @return Error code if the session commit failed, or zero on
7461    success.
7462  */
7463 int
finish_commit(THD * thd)7464 MYSQL_BIN_LOG::finish_commit(THD *thd)
7465 {
7466   /*
7467     In some unlikely situations, it can happen that binary
7468     log is closed before the thread flushes it's cache.
7469     In that case, clear the caches before doing commit.
7470   */
7471   if (unlikely(!is_open()))
7472   {
7473     binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
7474     if (cache_mngr)
7475       cache_mngr->reset();
7476   }
7477   if (thd->transaction.flags.commit_low)
7478   {
7479     const bool all= thd->transaction.flags.real_commit;
7480     /*
7481       storage engine commit
7482     */
7483     DBUG_ASSERT(thd->commit_error != THD::CE_COMMIT_ERROR);
7484     if (thd->commit_error == THD::CE_NONE)
7485     {
7486       /*
7487         Acquire a shared lock to block commits if an X lock has been acquired by
7488         LOCK TABLES FOR BACKUP or START TRANSACTION WITH CONSISTENT SNAPSHOT. We
7489         only reach this code if binlog_order_commits=0.
7490       */
7491       DBUG_ASSERT(opt_binlog_order_commits == 0);
7492 
7493       slock();
7494 
7495       if (ha_commit_low(thd, all, false))
7496         thd->commit_error= THD::CE_COMMIT_ERROR;
7497 
7498       sunlock();
7499     }
7500     /*
7501       Decrement the prepared XID counter after storage engine commit
7502     */
7503     if (thd->transaction.flags.xid_written)
7504       dec_prep_xids(thd);
7505     /*
7506       If commit succeeded, we call the after_commit hook
7507 
7508       TODO: This hook here should probably move outside/below this
7509             if and be the only after_commit invocation left in the
7510             code.
7511     */
7512     if ((thd->commit_error != THD::CE_COMMIT_ERROR ) && thd->transaction.flags.run_hooks)
7513     {
7514       (void) RUN_HOOK(transaction, after_commit, (thd, all));
7515       thd->transaction.flags.run_hooks= false;
7516     }
7517   }
7518   else if (thd->transaction.flags.xid_written)
7519     dec_prep_xids(thd);
7520 
7521   /*
7522     Remove committed GTID from owned_gtids, it was already logged on
7523     MYSQL_BIN_LOG::write_cache().
7524   */
7525   global_sid_lock->rdlock();
7526   gtid_state->update_on_commit(thd);
7527   global_sid_lock->unlock();
7528 
7529   DBUG_ASSERT(thd->commit_error || !thd->transaction.flags.run_hooks);
7530   DBUG_ASSERT(!thd_get_cache_mngr(thd)->dbug_any_finalized());
7531   DBUG_PRINT("return", ("Thread ID: %lu, commit_error: %d",
7532                         thd->thread_id, thd->commit_error));
7533   /*
7534     flush or sync errors are handled by the leader of the group
7535     (using binlog_error_action). Hence treat only COMMIT_ERRORs as errors.
7536   */
7537   return (thd->commit_error == THD::CE_COMMIT_ERROR);
7538 }
7539 
7540 /**
7541   Helper function to handle flush or sync stage errors.
7542   If binlog_error_action= ABORT_SERVER, server will be aborted
7543   after reporting the error to the client.
7544   If binlog_error_action= IGNORE_ERROR, binlog will be closed
7545   for the life time of the server. close() call is protected
7546   with LOCK_log to avoid any parallel operations on binary log.
7547 
7548   @param thd Thread object that faced flush/sync error
7549   @param need_lock_log
7550                        > Indicates true if LOCk_log is needed before closing
7551                          binlog (happens when we are handling sync error)
7552                        > Indicates false if LOCK_log is already acquired
7553                          by the thread (happens when we are handling flush
7554                          error)
7555 
7556   @return void
7557 */
handle_binlog_flush_or_sync_error(THD * thd,bool need_lock_log)7558 void MYSQL_BIN_LOG::handle_binlog_flush_or_sync_error(THD *thd,
7559                                                       bool need_lock_log)
7560 {
7561   char errmsg[MYSQL_ERRMSG_SIZE];
7562   sprintf(errmsg, "An error occurred during %s stage of the commit. "
7563           "'binlog_error_action' is set to '%s'.",
7564           thd->commit_error== THD::CE_FLUSH_ERROR ? "flush" : "sync",
7565           binlog_error_action == ABORT_SERVER ? "ABORT_SERVER" : "IGNORE_ERROR");
7566   if (binlog_error_action == ABORT_SERVER)
7567   {
7568     static const char format_err[]= "%s Hence aborting the server.";
7569     char err_buff[MYSQL_ERRMSG_SIZE + sizeof(format_err)];
7570     snprintf(err_buff, sizeof(err_buff), format_err, errmsg);
7571     exec_binlog_error_action_abort(err_buff);
7572   }
7573   else
7574   {
7575     DEBUG_SYNC(thd, "before_binlog_closed_due_to_error");
7576     if (need_lock_log)
7577       mysql_mutex_lock(&LOCK_log);
7578     else
7579       mysql_mutex_assert_owner(&LOCK_log);
7580     /*
7581       It can happen that other group leader encountered
7582       error and already closed the binary log. So print
7583       error only if it is in open state. But we should
7584       call close() always just in case if the previous
7585       close did not close index file.
7586     */
7587     if (is_open())
7588     {
7589       sql_print_error("%s Hence turning logging off for the whole duration "
7590                       "of the MySQL server process. To turn it on again: fix "
7591                       "the cause, shutdown the MySQL server and restart it.",
7592                       errmsg);
7593     }
7594     close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT, false/*need_lock_log=false*/,
7595           true/*need_lock_index=true*/);
7596     /*
7597       If there is a write error (flush/sync stage) and if
7598       binlog_error_action=IGNORE_ERROR, clear the error
7599       and allow the commit to happen in storage engine.
7600     */
7601     if (check_write_error(thd))
7602       thd->clear_error();
7603 
7604     if (need_lock_log)
7605       mysql_mutex_unlock(&LOCK_log);
7606     DEBUG_SYNC(thd, "after_binlog_closed_due_to_error");
7607   }
7608 }
7609 /**
7610   Flush and commit the transaction.
7611 
7612   This will execute an ordered flush and commit of all outstanding
7613   transactions and is the main function for the binary log group
7614   commit logic. The function performs the ordered commit in two
7615   phases.
7616 
7617   The first phase flushes the caches to the binary log and under
7618   LOCK_log and marks all threads that were flushed as not pending.
7619 
7620   The second phase executes under LOCK_commit and commits all
7621   transactions in order.
7622 
7623   The procedure is:
7624 
7625   1. Queue ourselves for flushing.
7626   2. Grab the log lock, which might result is blocking if the mutex is
7627      already held by another thread.
7628   3. If we were not committed while waiting for the lock
7629      1. Fetch the queue
7630      2. For each thread in the queue:
7631         a. Attach to it
7632         b. Flush the caches, saving any error code
7633      3. Flush and sync (depending on the value of sync_binlog).
7634      4. Signal that the binary log was updated
7635   4. Release the log lock
7636   5. Grab the commit lock
7637      1. For each thread in the queue:
7638         a. If there were no error when flushing and the transaction shall be committed:
7639            - Commit the transaction, saving the result of executing the commit.
7640   6. Release the commit lock
7641   7. Call purge, if any of the committed thread requested a purge.
7642   8. Return with the saved error code
7643 
7644   @todo The use of @c skip_commit is a hack that we use since the @c
7645   TC_LOG Interface does not contain functions to handle
7646   savepoints. Once the binary log is eliminated as a handlerton and
7647   the @c TC_LOG interface is extended with savepoint handling, this
7648   parameter can be removed.
7649 
7650   @param thd Session to commit transaction for
7651   @param all   This is @c true if this is a real transaction commit, and
7652                @c false otherwise.
7653   @param skip_commit
7654                This is @c true if the call to @c ha_commit_low should
7655                be skipped (it is handled by the caller somehow) and @c
7656                false otherwise (the normal case).
7657  */
ordered_commit(THD * thd,bool all,bool skip_commit)7658 int MYSQL_BIN_LOG::ordered_commit(THD *thd, bool all, bool skip_commit)
7659 {
7660   DBUG_ENTER("MYSQL_BIN_LOG::ordered_commit");
7661   int flush_error= 0, sync_error= 0;
7662   my_off_t total_bytes= 0;
7663   bool do_rotate= false;
7664 
7665   /*
7666     These values are used while flushing a transaction, so clear
7667     everything.
7668 
7669     Notes:
7670 
7671     - It would be good if we could keep transaction coordinator
7672       log-specific data out of the THD structure, but that is not the
7673       case right now.
7674 
7675     - Everything in the transaction structure is reset when calling
7676       ha_commit_low since that calls st_transaction::cleanup.
7677   */
7678   thd->transaction.flags.pending= true;
7679   thd->commit_error= THD::CE_NONE;
7680   thd->next_to_commit= NULL;
7681   thd->durability_property= HA_IGNORE_DURABILITY;
7682   thd->transaction.flags.real_commit= all;
7683   thd->transaction.flags.xid_written= false;
7684   thd->transaction.flags.commit_low= !skip_commit;
7685   thd->transaction.flags.run_hooks= !skip_commit;
7686 #ifndef DBUG_OFF
7687   /*
7688      The group commit Leader may have to wait for follower whose transaction
7689      is not ready to be preempted. Initially the status is pessimistic.
7690      Preemption guarding logics is necessary only when DBUG_ON is set.
7691      It won't be required for the dbug-off case as long as the follower won't
7692      execute any thread-specific write access code in this method, which is
7693      the case as of current.
7694   */
7695   thd->transaction.flags.ready_preempt= 0;
7696 #endif
7697 
7698   DBUG_PRINT("enter", ("flags.pending: %s, commit_error: %d, thread_id: %lu",
7699                        YESNO(thd->transaction.flags.pending),
7700                        thd->commit_error, thd->thread_id));
7701 
7702   /*
7703     Stage #1: flushing transactions to binary log
7704 
7705     While flushing, we allow new threads to enter and will process
7706     them in due time. Once the queue was empty, we cannot reap
7707     anything more since it is possible that a thread entered and
7708     appointed itself leader for the flush phase.
7709   */
7710   DEBUG_SYNC(thd, "waiting_to_enter_flush_stage");
7711   if (change_stage(thd, Stage_manager::FLUSH_STAGE, thd, NULL, &LOCK_log))
7712   {
7713     DBUG_PRINT("return", ("Thread ID: %lu, commit_error: %d",
7714                           thd->thread_id, thd->commit_error));
7715     DBUG_RETURN(finish_commit(thd));
7716   }
7717 
7718   THD *wait_queue= NULL, *final_queue= NULL;
7719   mysql_mutex_t *leave_mutex_before_commit_stage= NULL;
7720   my_off_t flush_end_pos= 0;
7721   bool need_LOCK_log;
7722   if (unlikely(!is_open()))
7723   {
7724     final_queue= stage_manager.fetch_queue_for(Stage_manager::FLUSH_STAGE);
7725     leave_mutex_before_commit_stage= &LOCK_log;
7726     /*
7727       binary log is closed, flush stage and sync stage should be
7728       ignored. Binlog cache should be cleared, but instead of doing
7729       it here, do that work in 'finish_commit' function so that
7730       leader and followers thread caches will be cleared.
7731     */
7732     goto commit_stage;
7733   }
7734   DEBUG_SYNC(thd, "waiting_in_the_middle_of_flush_stage");
7735   flush_error= process_flush_stage_queue(&total_bytes, &do_rotate,
7736                                                  &wait_queue);
7737 
7738   if (flush_error == 0 && total_bytes > 0)
7739     flush_error= flush_cache_to_file(&flush_end_pos);
7740 
7741   /*
7742     If the flush finished successfully, we can call the after_flush
7743     hook. Being invoked here, we have the guarantee that the hook is
7744     executed before the before/after_send_hooks on the dump thread
7745     preventing race conditions among these plug-ins.
7746   */
7747   if (flush_error == 0)
7748   {
7749     const char *file_name_ptr= log_file_name + dirname_length(log_file_name);
7750     DBUG_ASSERT(flush_end_pos != 0);
7751     if (RUN_HOOK(binlog_storage, after_flush,
7752                  (thd, file_name_ptr, flush_end_pos)))
7753     {
7754       sql_print_error("Failed to run 'after_flush' hooks");
7755       flush_error= ER_ERROR_ON_WRITE;
7756     }
7757 
7758     signal_update();
7759     DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
7760   }
7761 
7762   if (flush_error)
7763   {
7764     /*
7765       Handle flush error (if any) after leader finishes it's flush stage.
7766     */
7767     handle_binlog_flush_or_sync_error(thd, false /* need_lock_log */);
7768   }
7769 
7770   publish_coordinates_for_global_status();
7771 
7772   /*
7773     Stage #2: Syncing binary log file to disk
7774   */
7775   need_LOCK_log= (get_sync_period() == 1);
7776 
7777   /*
7778     LOCK_log is not released when sync_binlog is 1. It guarantees that the
7779     events are not be replicated by dump threads before they are synced to disk.
7780   */
7781   if (change_stage(thd, Stage_manager::SYNC_STAGE, wait_queue,
7782                    need_LOCK_log ? NULL : &LOCK_log, &LOCK_sync))
7783   {
7784     DBUG_PRINT("return", ("Thread ID: %lu, commit_error: %d",
7785                           thd->thread_id, thd->commit_error));
7786     DBUG_RETURN(finish_commit(thd));
7787   }
7788   final_queue= stage_manager.fetch_queue_for(Stage_manager::SYNC_STAGE);
7789   if (flush_error == 0 && total_bytes > 0)
7790   {
7791     DEBUG_SYNC(thd, "before_sync_binlog_file");
7792     std::pair<bool, bool> result= sync_binlog_file(false);
7793     sync_error= result.first;
7794   }
7795 
7796   if (need_LOCK_log)
7797     mysql_mutex_unlock(&LOCK_log);
7798   leave_mutex_before_commit_stage= &LOCK_sync;
7799   /*
7800     Stage #3: Commit all transactions in order.
7801 
7802     This stage is skipped if we do not need to order the commits and
7803     each thread have to execute the handlerton commit instead.
7804 
7805     Howver, since we are keeping the lock from the previous stage, we
7806     need to unlock it if we skip the stage.
7807    */
7808 commit_stage:
7809   /*
7810     We are delaying the handling of sync error until
7811     all locks are released but we should not enter into
7812     commit stage if binlog_error_action is ABORT_SERVER.
7813   */
7814   if (opt_binlog_order_commits &&
7815       (sync_error == 0 || binlog_error_action != ABORT_SERVER))
7816   {
7817     if (change_stage(thd, Stage_manager::COMMIT_STAGE,
7818                      final_queue, leave_mutex_before_commit_stage,
7819                      &LOCK_commit))
7820     {
7821       DBUG_PRINT("return", ("Thread ID: %lu, commit_error: %d",
7822                             thd->thread_id, thd->commit_error));
7823       DBUG_RETURN(finish_commit(thd));
7824     }
7825     THD *commit_queue= stage_manager.fetch_queue_for(Stage_manager::COMMIT_STAGE);
7826     DBUG_EXECUTE_IF("semi_sync_3-way_deadlock",
7827                     DEBUG_SYNC(thd, "before_process_commit_stage_queue"););
7828     process_commit_stage_queue(thd, commit_queue);
7829     mysql_mutex_unlock(&LOCK_commit);
7830     /*
7831       Process after_commit after LOCK_commit is released for avoiding
7832       3-way deadlock among user thread, rotate thread and dump thread.
7833     */
7834     process_after_commit_stage_queue(thd, commit_queue);
7835     final_queue= commit_queue;
7836   }
7837   else if (leave_mutex_before_commit_stage)
7838     mysql_mutex_unlock(leave_mutex_before_commit_stage);
7839 
7840   /*
7841     Handle sync error after we release all locks in order to avoid deadlocks
7842   */
7843   if (sync_error)
7844     handle_binlog_flush_or_sync_error(thd, true /* need_lock_log */);
7845 
7846   /* Commit done so signal all waiting threads */
7847   stage_manager.signal_done(final_queue);
7848 
7849   /*
7850     Finish the commit before executing a rotate, or run the risk of a
7851     deadlock. We don't need the return value here since it is in
7852     thd->commit_error, which is returned below.
7853   */
7854   (void) finish_commit(thd);
7855 
7856   /*
7857     If we need to rotate, we do it without commit error.
7858     Otherwise the thd->commit_error will be possibly reset.
7859    */
7860   if (DBUG_EVALUATE_IF("force_rotate", 1, 0) ||
7861       (do_rotate && thd->commit_error == THD::CE_NONE))
7862   {
7863     /*
7864       Do not force the rotate as several consecutive groups may
7865       request unnecessary rotations.
7866 
7867       NOTE: Run purge_logs wo/ holding LOCK_log because it does not
7868       need the mutex. Otherwise causes various deadlocks.
7869     */
7870 
7871     DEBUG_SYNC(thd, "ready_to_do_rotation");
7872     bool check_purge= false;
7873     mysql_mutex_lock(&LOCK_log);
7874     /*
7875       If rotate fails then depends on binlog_error_action variable
7876       appropriate action will be taken inside rotate call.
7877     */
7878     int error= rotate(false, &check_purge);
7879     mysql_mutex_unlock(&LOCK_log);
7880 
7881     if (error)
7882       thd->commit_error= THD::CE_COMMIT_ERROR;
7883     else if (check_purge)
7884       purge();
7885   }
7886   /*
7887     flush or sync errors are handled above (using binlog_error_action).
7888     Hence treat only COMMIT_ERRORs as errors.
7889   */
7890   DBUG_RETURN(thd->commit_error == THD::CE_COMMIT_ERROR);
7891 }
7892 
7893 
7894 /**
7895   MYSQLD server recovers from last crashed binlog.
7896 
7897   @param log           IO_CACHE of the crashed binlog.
7898   @param fdle          Format_description_log_event of the crashed binlog.
7899   @param valid_pos     The position of the last valid transaction or
7900                        event(non-transaction) of the crashed binlog.
7901 
7902   @retval
7903     0                  ok
7904   @retval
7905     1                  error
7906 */
recover(IO_CACHE * log,Format_description_log_event * fdle,my_off_t * valid_pos)7907 int MYSQL_BIN_LOG::recover(IO_CACHE *log, Format_description_log_event *fdle,
7908                             my_off_t *valid_pos)
7909 {
7910   Log_event  *ev;
7911   HASH xids;
7912   MEM_ROOT mem_root;
7913   /*
7914     The flag is used for handling the case that a transaction
7915     is partially written to the binlog.
7916   */
7917   bool in_transaction= FALSE;
7918 
7919   if (! fdle->is_valid() ||
7920       my_hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
7921                    sizeof(my_xid), 0, 0, MYF(0)))
7922     goto err1;
7923 
7924   init_alloc_root(&mem_root, TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE);
7925 
7926   while ((ev= Log_event::read_log_event(log, 0, fdle, TRUE))
7927          && ev->is_valid())
7928   {
7929     if (ev->get_type_code() == QUERY_EVENT &&
7930         !strcmp(((Query_log_event*)ev)->query, "BEGIN"))
7931       in_transaction= TRUE;
7932 
7933     if (ev->get_type_code() == QUERY_EVENT &&
7934         !strcmp(((Query_log_event*)ev)->query, "COMMIT"))
7935     {
7936       DBUG_ASSERT(in_transaction == TRUE);
7937       in_transaction= FALSE;
7938     }
7939     else if (ev->get_type_code() == XID_EVENT)
7940     {
7941       DBUG_ASSERT(in_transaction == TRUE);
7942       in_transaction= FALSE;
7943       Xid_log_event *xev=(Xid_log_event *)ev;
7944       uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
7945                                       sizeof(xev->xid));
7946       if (!x || my_hash_insert(&xids, x))
7947         goto err2;
7948     }
7949 
7950     /*
7951       Recorded valid position for the crashed binlog file
7952       which did not contain incorrect events. The following
7953       positions increase the variable valid_pos:
7954 
7955       1 -
7956         ...
7957         <---> HERE IS VALID <--->
7958         GTID
7959         BEGIN
7960         ...
7961         COMMIT
7962         ...
7963 
7964       2 -
7965         ...
7966         <---> HERE IS VALID <--->
7967         GTID
7968         DDL/UTILITY
7969         ...
7970 
7971       In other words, the following positions do not increase
7972       the variable valid_pos:
7973 
7974       1 -
7975         GTID
7976         <---> HERE IS VALID <--->
7977         ...
7978 
7979       2 -
7980         GTID
7981         BEGIN
7982         <---> HERE IS VALID <--->
7983         ...
7984     */
7985     if (!log->error && !in_transaction &&
7986         !is_gtid_event(ev))
7987       *valid_pos= my_b_tell(log);
7988 
7989     delete ev;
7990   }
7991 
7992   if (ha_recover(&xids))
7993     goto err2;
7994 
7995   free_root(&mem_root, MYF(0));
7996   my_hash_free(&xids);
7997   return 0;
7998 
7999 err2:
8000   free_root(&mem_root, MYF(0));
8001   my_hash_free(&xids);
8002 err1:
8003   sql_print_error("Crash recovery failed. Either correct the problem "
8004                   "(if it's, for example, out of memory error) and restart, "
8005                   "or delete (or rename) binary log and start mysqld with "
8006                   "--tc-heuristic-recover={commit|rollback}");
8007   return 1;
8008 }
8009 
8010 /*
8011   Copy out the non-directory part of binlog position filename for the
8012   `binlog_snapshot_file' status variable, same way as it is done for
8013   SHOW MASTER STATUS.
8014 */
set_binlog_snapshot_file(const char * src)8015 static void set_binlog_snapshot_file(const char *src)
8016 {
8017   mysql_mutex_assert_owner(&LOCK_status);
8018 
8019   int dir_len = dirname_length(src);
8020   strmake(binlog_snapshot_file, src + dir_len,
8021           sizeof(binlog_snapshot_file) - 1);
8022 }
8023 
8024 
8025 /** Copy the current binlog coordinates to the variables used for the
8026 not-in-consistent-snapshot case of SHOW STATUS */
publish_coordinates_for_global_status(void) const8027 void MYSQL_BIN_LOG::publish_coordinates_for_global_status(void) const
8028 {
8029   mysql_mutex_assert_owner(&LOCK_log);
8030 
8031   mysql_mutex_lock(&LOCK_status);
8032   strcpy(binlog_global_snapshot_file, log_file_name);
8033   binlog_global_snapshot_position= my_b_tell(&log_file);
8034   mysql_mutex_unlock(&LOCK_status);
8035 }
8036 
8037 
xlock(void)8038 void MYSQL_BIN_LOG::xlock(void)
8039 {
8040   mysql_mutex_lock(&LOCK_log);
8041 
8042   DBUG_ASSERT(!snapshot_lock_acquired);
8043 
8044   /*
8045     We must ensure that no writes to binlog and no commits to storage engines
8046     occur after function is called for START TRANSACTION FOR CONSISTENT
8047     SNAPSHOT. With binlog_order_commits=1 (the default) flushing to binlog is
8048     performed under the LOCK_log mutex and commits are done under the
8049     LOCK_commit mutex, both in the stage leader thread. So acquiring those 2
8050     mutexes is sufficient to guarantee atomicity.
8051 
8052     With binlog_order_commits=0 commits are performed in parallel by separate
8053     threads with each acquiring a shared lock on LOCK_consistent_snapshot.
8054 
8055     binlog_order_commits is a dynamic variable, so we have to keep track what
8056     primitives should be used in xunlock().
8057   */
8058   if (opt_binlog_order_commits)
8059   {
8060     mysql_mutex_lock(&LOCK_commit);
8061   }
8062   else
8063   {
8064     snapshot_lock_acquired= true;
8065     mysql_rwlock_wrlock(&LOCK_consistent_snapshot);
8066   }
8067 }
8068 
8069 
xunlock(void)8070 void MYSQL_BIN_LOG::xunlock(void)
8071 {
8072   if (!snapshot_lock_acquired)
8073   {
8074     mysql_mutex_unlock(&LOCK_commit);
8075   }
8076   else
8077   {
8078     mysql_rwlock_unlock(&LOCK_consistent_snapshot);
8079     snapshot_lock_acquired= false;
8080   }
8081 
8082   mysql_mutex_unlock(&LOCK_log);
8083 }
8084 
report_missing_purged_gtids(const Gtid_set * slave_executed_gtid_set,const char ** errmsg)8085 void MYSQL_BIN_LOG::report_missing_purged_gtids(const Gtid_set* slave_executed_gtid_set,
8086                                          const char** errmsg)
8087 {
8088   DBUG_ENTER("MYSQL_BIN_LOG::report_missing_purged_gtids");
8089   THD *thd= current_thd;
8090   Gtid_set gtid_missing(gtid_state->get_lost_gtids()->get_sid_map());
8091   gtid_missing.add_gtid_set(gtid_state->get_lost_gtids());
8092   gtid_missing.remove_gtid_set(slave_executed_gtid_set);
8093 
8094   String tmp_uuid;
8095   uchar name[]= "slave_uuid";
8096 
8097   /* Protects thd->user_vars. */
8098   mysql_mutex_lock(&thd->LOCK_thd_data);
8099   user_var_entry *entry=
8100     (user_var_entry*) my_hash_search(&thd->user_vars, name, sizeof(name)-1);
8101   if (entry && entry->length() > 0)
8102     tmp_uuid.copy(entry->ptr(), entry->length(), NULL);
8103   mysql_mutex_unlock(&thd->LOCK_thd_data);
8104 
8105 
8106   char* missing_gtids= NULL;
8107   char* slave_executed_gtids= NULL;
8108   gtid_missing.to_string(&missing_gtids, NULL);
8109   slave_executed_gtid_set->to_string(&slave_executed_gtids, NULL);
8110 
8111   /*
8112      Log the information about the missing purged GTIDs to the error log
8113      if the message is less than MAX_LOG_BUFFER_SIZE.
8114   */
8115   std::ostringstream log_info;
8116   log_info << "The missing transactions are '"<< missing_gtids <<"'";
8117   const char* log_msg= ER(ER_FOUND_MISSING_GTIDS);
8118 
8119   /* Don't consider the "%s" in the format string. Subtract 2 from the
8120      total length */
8121   int total_length= (strlen(log_msg) - 2 + log_info.str().length());
8122 
8123   DBUG_EXECUTE_IF("simulate_long_missing_gtids",
8124                   { total_length= MAX_LOG_BUFFER_SIZE + 1;});
8125 
8126   if (total_length > MAX_LOG_BUFFER_SIZE)
8127     log_info.str("To find the missing purged transactions, run \"SELECT"
8128                  " @@GLOBAL.GTID_PURGED\" on the master, then run \"SHOW"
8129                  " SLAVE STATUS\" on the slave for the Retrieved_Gtid_Set,"
8130                  " and then run \"SELECT GTID_SUBTRACT(<master_set>,"
8131                  " <slave_set>)\" on any server");
8132 
8133   sql_print_warning(ER_THD(thd, ER_FOUND_MISSING_GTIDS), tmp_uuid.ptr(),
8134                     log_info.str().c_str());
8135 
8136   /*
8137      Send the information about the slave executed GTIDs and missing
8138      purged GTIDs to slave if the message is less than MYSQL_ERRMSG_SIZE.
8139   */
8140   std::ostringstream gtid_info;
8141   gtid_info << "The GTID set sent by the slave is '" << slave_executed_gtids
8142             << "', and the missing transactions are '"<< missing_gtids <<"'";
8143   *errmsg= ER_THD(thd, ER_MASTER_HAS_PURGED_REQUIRED_GTIDS);
8144 
8145   /* Don't consider the "%s" in the format string. Subtract 2 from the
8146      total length */
8147   total_length= (strlen(*errmsg) - 2 + gtid_info.str().length());
8148 
8149   DBUG_EXECUTE_IF("simulate_long_missing_gtids",
8150                   { total_length= MYSQL_ERRMSG_SIZE + 1;});
8151 
8152   if (total_length > MYSQL_ERRMSG_SIZE)
8153     gtid_info.str("The GTID sets and the missing purged transactions are too"
8154                   " long to print in this message. For more information,"
8155                   " please see the master's error log or the manual for"
8156                   " GTID_SUBTRACT");
8157 
8158   /* Buffer for formatting the message about the missing GTIDs. */
8159   static char buff[MYSQL_ERRMSG_SIZE];
8160   my_snprintf(buff, MYSQL_ERRMSG_SIZE, *errmsg, gtid_info.str().c_str());
8161   *errmsg= const_cast<const char*>(buff);
8162 
8163   my_free(missing_gtids);
8164   my_free(slave_executed_gtids);
8165   DBUG_VOID_RETURN;
8166 }
8167 
report_missing_gtids(const Gtid_set * previous_gtid_set,const Gtid_set * slave_executed_gtid_set,const char ** errmsg)8168 void MYSQL_BIN_LOG::report_missing_gtids(const Gtid_set* previous_gtid_set,
8169                                          const Gtid_set* slave_executed_gtid_set,
8170                                          const char** errmsg)
8171 {
8172   DBUG_ENTER("MYSQL_BIN_LOG::report_missing_gtids");
8173   THD *thd=current_thd;
8174   char* missing_gtids= NULL;
8175   char* slave_executed_gtids= NULL;
8176   Gtid_set gtid_missing(slave_executed_gtid_set->get_sid_map());
8177   gtid_missing.add_gtid_set(slave_executed_gtid_set);
8178   gtid_missing.remove_gtid_set(previous_gtid_set);
8179   gtid_missing.to_string(&missing_gtids, NULL);
8180   slave_executed_gtid_set->to_string(&slave_executed_gtids, NULL);
8181 
8182   String tmp_uuid;
8183   uchar name[]= "slave_uuid";
8184 
8185   /* Protects thd->user_vars. */
8186   mysql_mutex_lock(&thd->LOCK_thd_data);
8187 
8188   user_var_entry *entry=
8189     (user_var_entry*) my_hash_search(&thd->user_vars, name, sizeof(name)-1);
8190   if (entry && entry->length() > 0)
8191     tmp_uuid.copy(entry->ptr(), entry->length(), NULL);
8192   mysql_mutex_unlock(&thd->LOCK_thd_data);
8193 
8194   /*
8195      Log the information about the missing purged GTIDs to the error log
8196      if the message is less than MAX_LOG_BUFFER_SIZE.
8197   */
8198   std::ostringstream log_info;
8199   log_info << "If the binary log files have been deleted from disk,"
8200       " check the consistency of 'GTID_PURGED' variable."
8201       " The missing transactions are '"<< missing_gtids <<"'";
8202   const char* log_msg= ER(ER_FOUND_MISSING_GTIDS);
8203 
8204   /* Don't consider the "%s" in the format string. Subtract 2 from the
8205      total length */
8206   if ((strlen(log_msg) - 2 + log_info.str().length()) > MAX_LOG_BUFFER_SIZE)
8207     log_info.str("To find the missing purged transactions, run \"SELECT"
8208                  " @@GLOBAL.GTID_PURGED\" on the master, then run \"SHOW"
8209                  " SLAVE STATUS\" on the slave for the Retrieved_Gtid_Set,"
8210                  " and then run \"SELECT GTID_SUBTRACT(<master_set>,"
8211                  " <slave_set>)\" on any server");
8212 
8213   sql_print_warning(ER_THD(thd, ER_FOUND_MISSING_GTIDS), tmp_uuid.ptr(),
8214                     log_info.str().c_str());
8215 
8216   /*
8217      Send the information about the slave executed GTIDs and missing
8218      purged GTIDs to slave if the message is less than MYSQL_ERRMSG_SIZE.
8219   */
8220   std::ostringstream gtid_info;
8221   gtid_info << "The GTID set sent by the slave is '" << slave_executed_gtids
8222             << "', and the missing transactions are '"<< missing_gtids <<"'";
8223   *errmsg= ER_THD(thd, ER_MASTER_HAS_PURGED_REQUIRED_GTIDS);
8224 
8225   /* Don't consider the "%s" in the format string. Subtract 2 from the
8226      total length */
8227   if ((strlen(*errmsg) - 2 + gtid_info.str().length()) > MYSQL_ERRMSG_SIZE)
8228     gtid_info.str("The GTID sets and the missing purged transactions are too"
8229                   " long to print in this message. For more information,"
8230                   " please see the master's error log or the manual for"
8231                   " GTID_SUBTRACT");
8232   /* Buffer for formatting the message about the missing GTIDs. */
8233   static char buff[MYSQL_ERRMSG_SIZE];
8234   my_snprintf(buff, MYSQL_ERRMSG_SIZE, *errmsg, gtid_info.str().c_str());
8235   *errmsg= const_cast<const char*>(buff);
8236 
8237   my_free(missing_gtids);
8238   my_free(slave_executed_gtids);
8239 
8240   DBUG_VOID_RETURN;
8241 }
8242 
get_group_cache(bool is_transactional)8243 Group_cache *THD::get_group_cache(bool is_transactional)
8244 {
8245   DBUG_ENTER("THD::get_group_cache(bool)");
8246 
8247   // If opt_bin_log==0, it is not safe to call thd_get_cache_mngr
8248   // because binlog_hton has not been completely set up.
8249   DBUG_ASSERT(opt_bin_log);
8250   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(this);
8251 
8252   // cache_mngr is NULL until we call thd->binlog_setup_trx_data, so
8253   // we assert that this has been done.
8254   DBUG_ASSERT(cache_mngr != NULL);
8255 
8256   binlog_cache_data *cache_data=
8257     cache_mngr->get_binlog_cache_data(is_transactional);
8258   DBUG_ASSERT(cache_data != NULL);
8259 
8260   DBUG_RETURN(&cache_data->group_cache);
8261 }
8262 
8263 /*
8264   These functions are placed in this file since they need access to
8265   binlog_hton, which has internal linkage.
8266 */
8267 
binlog_setup_trx_data()8268 int THD::binlog_setup_trx_data()
8269 {
8270   DBUG_ENTER("THD::binlog_setup_trx_data");
8271   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(this);
8272 
8273   if (cache_mngr)
8274     DBUG_RETURN(0);                             // Already set up
8275 
8276   cache_mngr= (binlog_cache_mngr*) my_malloc(sizeof(binlog_cache_mngr), MYF(MY_ZEROFILL));
8277   if (!cache_mngr ||
8278       open_cached_file(&cache_mngr->stmt_cache.cache_log, mysql_tmpdir,
8279                        LOG_PREFIX, binlog_stmt_cache_size, MYF(MY_WME)) ||
8280       open_cached_file(&cache_mngr->trx_cache.cache_log, mysql_tmpdir,
8281                        LOG_PREFIX, binlog_cache_size, MYF(MY_WME)))
8282   {
8283     my_free(cache_mngr);
8284     DBUG_RETURN(1);                      // Didn't manage to set it up
8285   }
8286   DBUG_PRINT("debug", ("Set ha_data slot %d to 0x%llx", binlog_hton->slot, (ulonglong) cache_mngr));
8287   thd_set_ha_data(this, binlog_hton, cache_mngr);
8288 
8289   cache_mngr= new (thd_get_cache_mngr(this))
8290               binlog_cache_mngr(max_binlog_stmt_cache_size,
8291                                 &binlog_stmt_cache_use,
8292                                 &binlog_stmt_cache_disk_use,
8293                                 max_binlog_cache_size,
8294                                 &binlog_cache_use,
8295                                 &binlog_cache_disk_use);
8296   DBUG_RETURN(0);
8297 }
8298 
8299 /**
8300 
8301 */
register_binlog_handler(THD * thd,bool trx)8302 void register_binlog_handler(THD *thd, bool trx)
8303 {
8304   DBUG_ENTER("register_binlog_handler");
8305   /*
8306     If this is the first call to this function while processing a statement,
8307     the transactional cache does not have a savepoint defined. So, in what
8308     follows:
8309       . an implicit savepoint is defined;
8310       . callbacks are registered;
8311       . binary log is set as read/write.
8312 
8313     The savepoint allows for truncating the trx-cache transactional changes
8314     fail. Callbacks are necessary to flush caches upon committing or rolling
8315     back a statement or a transaction. However, notifications do not happen
8316     if the binary log is set as read/write.
8317   */
8318   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
8319   if (cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF)
8320   {
8321     /*
8322       Set an implicit savepoint in order to be able to truncate a trx-cache.
8323     */
8324     my_off_t pos= 0;
8325     binlog_trans_log_savepos(thd, &pos);
8326     cache_mngr->trx_cache.set_prev_position(pos);
8327 
8328     /*
8329       Set callbacks in order to be able to call commmit or rollback.
8330     */
8331     if (trx)
8332       trans_register_ha(thd, TRUE, binlog_hton);
8333     trans_register_ha(thd, FALSE, binlog_hton);
8334 
8335     /*
8336       Set the binary log as read/write otherwise callbacks are not called.
8337     */
8338     thd->ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
8339   }
8340   DBUG_VOID_RETURN;
8341 }
8342 
8343 /**
8344   Function to start a statement and optionally a transaction for the
8345   binary log.
8346 
8347   This function does three things:
8348     - Starts a transaction if not in autocommit mode or if a BEGIN
8349       statement has been seen.
8350 
8351     - Start a statement transaction to allow us to truncate the cache.
8352 
8353     - Save the currrent binlog position so that we can roll back the
8354       statement by truncating the cache.
8355 
8356       We only update the saved position if the old one was undefined,
8357       the reason is that there are some cases (e.g., for CREATE-SELECT)
8358       where the position is saved twice (e.g., both in
8359       select_create::prepare() and THD::binlog_write_table_map()) , but
8360       we should use the first. This means that calls to this function
8361       can be used to start the statement before the first table map
8362       event, to include some extra events.
8363 
8364   Note however that IMMEDIATE_LOGGING implies that the statement is
8365   written without BEGIN/COMMIT.
8366 
8367   @param thd         Thread variable
8368   @param start_event The first event requested to be written into the
8369                      binary log
8370  */
binlog_start_trans_and_stmt(THD * thd,Log_event * start_event)8371 static int binlog_start_trans_and_stmt(THD *thd, Log_event *start_event)
8372 {
8373   DBUG_ENTER("binlog_start_trans_and_stmt");
8374 
8375   /*
8376     Initialize the cache manager if this was not done yet.
8377   */
8378   if (thd->binlog_setup_trx_data())
8379     DBUG_RETURN(1);
8380 
8381   /*
8382     Retrieve the appropriated cache.
8383   */
8384   bool is_transactional= start_event->is_using_trans_cache();
8385   binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
8386   binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(is_transactional);
8387 
8388   /*
8389     If the event is requesting immediatly logging, there is no need to go
8390     further down and set savepoint and register callbacks.
8391   */
8392   if (start_event->is_using_immediate_logging())
8393     DBUG_RETURN(0);
8394 
8395   register_binlog_handler(thd, thd->in_multi_stmt_transaction_mode());
8396 
8397   /*
8398     If the cache is empty log "BEGIN" at the beginning of every transaction.
8399     Here, a transaction is either a BEGIN..COMMIT/ROLLBACK block or a single
8400     statement in autocommit mode.
8401   */
8402   if (cache_data->is_binlog_empty())
8403   {
8404     Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"),
8405                           is_transactional, FALSE, TRUE, 0, TRUE);
8406     if (cache_data->write_event(thd, &qinfo))
8407       DBUG_RETURN(1);
8408   }
8409 
8410   DBUG_RETURN(0);
8411 }
8412 
8413 /**
8414   This function writes a table map to the binary log.
8415   Note that in order to keep the signature uniform with related methods,
8416   we use a redundant parameter to indicate whether a transactional table
8417   was changed or not.
8418   Sometimes it will write a Rows_query_log_event into binary log before
8419   the table map too.
8420 
8421   @param table             a pointer to the table.
8422   @param is_transactional  @c true indicates a transactional table,
8423                            otherwise @c false a non-transactional.
8424   @param binlog_rows_query @c true indicates a Rows_query log event
8425                            will be binlogged before table map,
8426                            otherwise @c false indicates it will not
8427                            be binlogged.
8428   @return
8429     nonzero if an error pops up when writing the table map event
8430     or the Rows_query log event.
8431 */
binlog_write_table_map(TABLE * table,bool is_transactional,bool binlog_rows_query)8432 int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
8433                                 bool binlog_rows_query)
8434 {
8435   int error;
8436   DBUG_ENTER("THD::binlog_write_table_map");
8437   DBUG_PRINT("enter", ("table: 0x%lx  (%s: #%llu)",
8438                        (long) table, table->s->table_name.str,
8439                        table->s->table_map_id.id()));
8440 
8441   /* Pre-conditions */
8442   DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
8443   DBUG_ASSERT(table->s->table_map_id.is_valid());
8444 
8445   Table_map_log_event
8446     the_event(this, table, table->s->table_map_id, is_transactional);
8447 
8448   binlog_start_trans_and_stmt(this, &the_event);
8449 
8450   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(this);
8451 
8452   binlog_cache_data *cache_data=
8453     cache_mngr->get_binlog_cache_data(is_transactional);
8454 
8455   if (binlog_rows_query && this->query())
8456   {
8457     /* Write the Rows_query_log_event into binlog before the table map */
8458     Rows_query_log_event
8459       rows_query_ev(this, this->query(), this->query_length());
8460     if ((error= cache_data->write_event(this, &rows_query_ev)))
8461       DBUG_RETURN(error);
8462   }
8463 
8464   if ((error= cache_data->write_event(this, &the_event)))
8465     DBUG_RETURN(error);
8466 
8467   binlog_table_maps++;
8468   DBUG_RETURN(0);
8469 }
8470 
8471 /**
8472   This function retrieves a pending row event from a cache which is
8473   specified through the parameter @c is_transactional. Respectively, when it
8474   is @c true, the pending event is returned from the transactional cache.
8475   Otherwise from the non-transactional cache.
8476 
8477   @param is_transactional  @c true indicates a transactional cache,
8478                            otherwise @c false a non-transactional.
8479   @return
8480     The row event if any.
8481 */
8482 Rows_log_event*
binlog_get_pending_rows_event(bool is_transactional) const8483 THD::binlog_get_pending_rows_event(bool is_transactional) const
8484 {
8485   Rows_log_event* rows= NULL;
8486   binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(this);
8487 
8488   /*
8489     This is less than ideal, but here's the story: If there is no cache_mngr,
8490     prepare_pending_rows_event() has never been called (since the cache_mngr
8491     is set up there). In that case, we just return NULL.
8492    */
8493   if (cache_mngr)
8494   {
8495     binlog_cache_data *cache_data=
8496       cache_mngr->get_binlog_cache_data(is_transactional);
8497 
8498     rows= cache_data->pending();
8499   }
8500   return (rows);
8501 }
8502 
8503 /**
8504    @param db    db name c-string to be inserted into alphabetically sorted
8505                 THD::binlog_accessed_db_names list.
8506 
8507                 Note, that space for both the data and the node
8508                 struct are allocated in THD::main_mem_root.
8509                 The list lasts for the top-level query time and is reset
8510                 in @c THD::cleanup_after_query().
8511 */
8512 void
add_to_binlog_accessed_dbs(const char * db_param)8513 THD::add_to_binlog_accessed_dbs(const char *db_param)
8514 {
8515   char *after_db;
8516   /*
8517     binlog_accessed_db_names list is to maintain the database
8518     names which are referenced in a given command.
8519     Prior to bug 17806014 fix, 'main_mem_root' memory root used
8520     to store this list. The 'main_mem_root' scope is till the end
8521     of the query. Hence it caused increasing memory consumption
8522     problem in big procedures like the ones mentioned below.
8523     Eg: CALL p1() where p1 is having 1,00,000 create and drop tables.
8524     'main_mem_root' is freed only at the end of the command CALL p1()'s
8525     execution. But binlog_accessed_db_names list scope is only till the
8526     individual statements specified the procedure(create/drop statements).
8527     Hence the memory allocated in 'main_mem_root' was left uncleared
8528     until the p1's completion, even though it is not required after
8529     completion of individual statements.
8530 
8531     Instead of using 'main_mem_root' whose scope is complete query execution,
8532     now the memroot is changed to use 'thd->mem_root' whose scope is until the
8533     individual statement in CALL p1(). 'thd->mem_root' is set to 'execute_mem_root'
8534     in the context of procedure and it's scope is till the individual statement
8535     in CALL p1() and thd->memroot is equal to 'main_mem_root' in the context
8536     of a normal 'top level query'.
8537 
8538     Eg: a) create table t1(i int); => If this function is called while
8539            processing this statement, thd->memroot is equal to &main_mem_root
8540            which will be freed immediately after executing this statement.
8541         b) CALL p1() -> p1 contains create table t1(i int); => If this function
8542            is called while processing create table statement which is inside
8543            a stored procedure, then thd->memroot is equal to 'execute_mem_root'
8544            which will be freed immediately after executing this statement.
8545     In both a and b case, thd->memroot will be freed immediately and will not
8546     increase memory consumption.
8547 
8548     A special case(stored functions/triggers):
8549     Consider the following example:
8550     create function f1(i int) returns int
8551     begin
8552       insert into db1.t1 values (1);
8553       insert into db2.t1 values (2);
8554     end;
8555     When we are processing SELECT f1(), the list should contain db1, db2 names.
8556     Since thd->mem_root contains 'execute_mem_root' in the context of
8557     stored function, the mem root will be freed after adding db1 in
8558     the list and when we are processing the second statement and when we try
8559     to add 'db2' in the db1's list, it will lead to crash as db1's memory
8560     is already freed. To handle this special case, if in_sub_stmt is set
8561     (which is true incase of stored functions/triggers), we use &main_mem_root,
8562     if not set we will use thd->memroot which changes it's value to
8563     'execute_mem_root' or '&main_mem_root' depends on the context.
8564    */
8565   MEM_ROOT *db_mem_root= in_sub_stmt ? &main_mem_root : mem_root;
8566 
8567   if (!binlog_accessed_db_names)
8568     binlog_accessed_db_names= new (db_mem_root) List<char>;
8569 
8570   if (binlog_accessed_db_names->elements >  MAX_DBS_IN_EVENT_MTS)
8571   {
8572     push_warning_printf(this, Sql_condition::WARN_LEVEL_WARN,
8573                         ER_MTS_UPDATED_DBS_GREATER_MAX,
8574                         ER(ER_MTS_UPDATED_DBS_GREATER_MAX),
8575                         MAX_DBS_IN_EVENT_MTS);
8576     return;
8577   }
8578 
8579   after_db= strdup_root(db_mem_root, db_param);
8580 
8581   /*
8582      sorted insertion is implemented with first rearranging data
8583      (pointer to char*) of the links and final appending of the least
8584      ordered data to create a new link in the list.
8585   */
8586   if (binlog_accessed_db_names->elements != 0)
8587   {
8588     List_iterator<char> it(*get_binlog_accessed_db_names());
8589 
8590     while (it++)
8591     {
8592       char *swap= NULL;
8593       char **ref_cur_db= it.ref();
8594       int cmp= strcmp(after_db, *ref_cur_db);
8595 
8596       DBUG_ASSERT(!swap || cmp < 0);
8597 
8598       if (cmp == 0)
8599       {
8600         after_db= NULL;  /* dup to ignore */
8601         break;
8602       }
8603       else if (swap || cmp > 0)
8604       {
8605         swap= *ref_cur_db;
8606         *ref_cur_db= after_db;
8607         after_db= swap;
8608       }
8609     }
8610   }
8611   if (after_db)
8612     binlog_accessed_db_names->push_back(after_db, db_mem_root);
8613 }
8614 
8615 /*
8616   Tells if two (or more) tables have auto_increment columns and we want to
8617   lock those tables with a write lock.
8618 
8619   SYNOPSIS
8620     has_two_write_locked_tables_with_auto_increment
8621       tables        Table list
8622 
8623   NOTES:
8624     Call this function only when you have established the list of all tables
8625     which you'll want to update (including stored functions, triggers, views
8626     inside your statement).
8627 */
8628 
8629 static bool
has_write_table_with_auto_increment(TABLE_LIST * tables)8630 has_write_table_with_auto_increment(TABLE_LIST *tables)
8631 {
8632   for (TABLE_LIST *table= tables; table; table= table->next_global)
8633   {
8634     /* we must do preliminary checks as table->table may be NULL */
8635     if (!table->placeholder() &&
8636         table->table->found_next_number_field &&
8637         (table->lock_type >= TL_WRITE_ALLOW_WRITE))
8638       return 1;
8639   }
8640 
8641   return 0;
8642 }
8643 
8644 /*
8645    checks if we have select tables in the table list and write tables
8646    with auto-increment column.
8647 
8648   SYNOPSIS
8649    has_two_write_locked_tables_with_auto_increment_and_select
8650       tables        Table list
8651 
8652   RETURN VALUES
8653 
8654    -true if the table list has atleast one table with auto-increment column
8655 
8656 
8657          and atleast one table to select from.
8658    -false otherwise
8659 */
8660 
8661 static bool
has_write_table_with_auto_increment_and_select(TABLE_LIST * tables)8662 has_write_table_with_auto_increment_and_select(TABLE_LIST *tables)
8663 {
8664   bool has_select= false;
8665   bool has_auto_increment_tables = has_write_table_with_auto_increment(tables);
8666   for(TABLE_LIST *table= tables; table; table= table->next_global)
8667   {
8668      if (!table->placeholder() &&
8669         (table->lock_type <= TL_READ_NO_INSERT))
8670       {
8671         has_select= true;
8672         break;
8673       }
8674   }
8675   return(has_select && has_auto_increment_tables);
8676 }
8677 
8678 /*
8679   Tells if there is a table whose auto_increment column is a part
8680   of a compound primary key while is not the first column in
8681   the table definition.
8682 
8683   @param tables Table list
8684 
8685   @return true if the table exists, fais if does not.
8686 */
8687 
8688 static bool
has_write_table_auto_increment_not_first_in_pk(TABLE_LIST * tables)8689 has_write_table_auto_increment_not_first_in_pk(TABLE_LIST *tables)
8690 {
8691   for (TABLE_LIST *table= tables; table; table= table->next_global)
8692   {
8693     /* we must do preliminary checks as table->table may be NULL */
8694     if (!table->placeholder() &&
8695         table->table->found_next_number_field &&
8696         (table->lock_type >= TL_WRITE_ALLOW_WRITE)
8697         && table->table->s->next_number_keypart != 0)
8698       return 1;
8699   }
8700 
8701   return 0;
8702 }
8703 
8704 #ifndef DBUG_OFF
get_locked_tables_mode_name(enum_locked_tables_mode locked_tables_mode)8705 const char * get_locked_tables_mode_name(enum_locked_tables_mode locked_tables_mode)
8706 {
8707    switch (locked_tables_mode)
8708    {
8709    case LTM_NONE:
8710      return "LTM_NONE";
8711    case LTM_LOCK_TABLES:
8712      return "LTM_LOCK_TABLES";
8713    case LTM_PRELOCKED:
8714      return "LTM_PRELOCKED";
8715    case LTM_PRELOCKED_UNDER_LOCK_TABLES:
8716      return "LTM_PRELOCKED_UNDER_LOCK_TABLES";
8717    default:
8718      return "Unknown table lock mode";
8719    }
8720 }
8721 #endif
8722 
8723 
8724 /**
8725   Decide on logging format to use for the statement and issue errors
8726   or warnings as needed.  The decision depends on the following
8727   parameters:
8728 
8729   - The logging mode, i.e., the value of binlog_format.  Can be
8730     statement, mixed, or row.
8731 
8732   - The type of statement.  There are three types of statements:
8733     "normal" safe statements; unsafe statements; and row injections.
8734     An unsafe statement is one that, if logged in statement format,
8735     might produce different results when replayed on the slave (e.g.,
8736     INSERT DELAYED).  A row injection is either a BINLOG statement, or
8737     a row event executed by the slave's SQL thread.
8738 
8739   - The capabilities of tables modified by the statement.  The
8740     *capabilities vector* for a table is a set of flags associated
8741     with the table.  Currently, it only includes two flags: *row
8742     capability flag* and *statement capability flag*.
8743 
8744     The row capability flag is set if and only if the engine can
8745     handle row-based logging. The statement capability flag is set if
8746     and only if the table can handle statement-based logging.
8747 
8748   Decision table for logging format
8749   ---------------------------------
8750 
8751   The following table summarizes how the format and generated
8752   warning/error depends on the tables' capabilities, the statement
8753   type, and the current binlog_format.
8754 
8755      Row capable        N NNNNNNNNN YYYYYYYYY YYYYYYYYY
8756      Statement capable  N YYYYYYYYY NNNNNNNNN YYYYYYYYY
8757 
8758      Statement type     * SSSUUUIII SSSUUUIII SSSUUUIII
8759 
8760      binlog_format      * SMRSMRSMR SMRSMRSMR SMRSMRSMR
8761 
8762      Logged format      - SS-S----- -RR-RR-RR SRRSRR-RR
8763      Warning/Error      1 --2732444 5--5--6-- ---7--6--
8764 
8765   Legend
8766   ------
8767 
8768   Row capable:    N - Some table not row-capable, Y - All tables row-capable
8769   Stmt capable:   N - Some table not stmt-capable, Y - All tables stmt-capable
8770   Statement type: (S)afe, (U)nsafe, or Row (I)njection
8771   binlog_format:  (S)TATEMENT, (M)IXED, or (R)OW
8772   Logged format:  (S)tatement or (R)ow
8773   Warning/Error:  Warnings and error messages are as follows:
8774 
8775   1. Error: Cannot execute statement: binlogging impossible since both
8776      row-incapable engines and statement-incapable engines are
8777      involved.
8778 
8779   2. Error: Cannot execute statement: binlogging impossible since
8780      BINLOG_FORMAT = ROW and at least one table uses a storage engine
8781      limited to statement-logging.
8782 
8783   3. Error: Cannot execute statement: binlogging of unsafe statement
8784      is impossible when storage engine is limited to statement-logging
8785      and BINLOG_FORMAT = MIXED.
8786 
8787   4. Error: Cannot execute row injection: binlogging impossible since
8788      at least one table uses a storage engine limited to
8789      statement-logging.
8790 
8791   5. Error: Cannot execute statement: binlogging impossible since
8792      BINLOG_FORMAT = STATEMENT and at least one table uses a storage
8793      engine limited to row-logging.
8794 
8795   6. Error: Cannot execute row injection: binlogging impossible since
8796      BINLOG_FORMAT = STATEMENT.
8797 
8798   7. Warning: Unsafe statement binlogged in statement format since
8799      BINLOG_FORMAT = STATEMENT.
8800 
8801   In addition, we can produce the following error (not depending on
8802   the variables of the decision diagram):
8803 
8804   8. Error: Cannot execute statement: binlogging impossible since more
8805      than one engine is involved and at least one engine is
8806      self-logging.
8807 
8808   For each error case above, the statement is prevented from being
8809   logged, we report an error, and roll back the statement.  For
8810   warnings, we set the thd->binlog_flags variable: the warning will be
8811   printed only if the statement is successfully logged.
8812 
8813   @see THD::binlog_query
8814 
8815   @param[in] thd    Client thread
8816   @param[in] tables Tables involved in the query
8817 
8818   @retval 0 No error; statement can be logged.
8819   @retval -1 One of the error conditions above applies (1, 2, 4, 5, or 6).
8820 */
8821 
decide_logging_format(TABLE_LIST * tables)8822 int THD::decide_logging_format(TABLE_LIST *tables)
8823 {
8824   DBUG_ENTER("THD::decide_logging_format");
8825   DBUG_PRINT("info", ("query: %s", query()));
8826   DBUG_PRINT("info", ("variables.binlog_format: %lu",
8827                       variables.binlog_format));
8828   DBUG_PRINT("info", ("lex->get_stmt_unsafe_flags(): 0x%x",
8829                       lex->get_stmt_unsafe_flags()));
8830 
8831   reset_binlog_local_stmt_filter();
8832 
8833   /*
8834     We should not decide logging format if the binlog is closed or
8835     binlogging is off, or if the statement is filtered out from the
8836     binlog by filtering rules.
8837   */
8838   if (mysql_bin_log.is_open() && (variables.option_bits & OPTION_BIN_LOG) &&
8839       !(variables.binlog_format == BINLOG_FORMAT_STMT &&
8840         !binlog_filter->db_ok(db)))
8841   {
8842     /*
8843       Compute one bit field with the union of all the engine
8844       capabilities, and one with the intersection of all the engine
8845       capabilities.
8846     */
8847     handler::Table_flags flags_write_some_set= 0;
8848     handler::Table_flags flags_access_some_set= 0;
8849     handler::Table_flags flags_write_all_set=
8850       HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE;
8851 
8852     /*
8853        If different types of engines are about to be updated.
8854        For example: Innodb and Falcon; Innodb and MyIsam.
8855     */
8856     my_bool multi_write_engine= FALSE;
8857     /*
8858        If different types of engines are about to be accessed
8859        and any of them is about to be updated. For example:
8860        Innodb and Falcon; Innodb and MyIsam.
8861     */
8862     my_bool multi_access_engine= FALSE;
8863     /*
8864        bug 1313901 : Track if statement creates or drops a temporary table
8865                      and log in ROW if it does.
8866     */
8867     my_bool create_drop_temp_table= FALSE;
8868     /*
8869        Identifies if a table is changed.
8870     */
8871     my_bool is_write= FALSE;
8872     /*
8873        A pointer to a previous table that was changed.
8874     */
8875     TABLE* prev_write_table= NULL;
8876     /*
8877        A pointer to a previous table that was accessed.
8878     */
8879     TABLE* prev_access_table= NULL;
8880     /*
8881       True if at least one table is transactional.
8882     */
8883     bool write_to_some_transactional_table= false;
8884     /*
8885       True if at least one table is non-transactional.
8886     */
8887     bool write_to_some_non_transactional_table= false;
8888     /*
8889        True if all non-transactional tables that has been updated
8890        are temporary.
8891     */
8892     bool write_all_non_transactional_are_tmp_tables= true;
8893     /**
8894       The number of tables used in the current statement,
8895       that should be replicated.
8896     */
8897     uint replicated_tables_count= 0;
8898     /**
8899       The number of tables written to in the current statement,
8900       that should not be replicated.
8901       A table should not be replicated when it is considered
8902       'local' to a MySQL instance.
8903       Currently, these tables are:
8904       - mysql.slow_log
8905       - mysql.general_log
8906       - mysql.slave_relay_log_info
8907       - mysql.slave_master_info
8908       - mysql.slave_worker_info
8909       - performance_schema.*
8910       - TODO: information_schema.*
8911       In practice, from this list, only performance_schema.* tables
8912       are written to by user queries.
8913     */
8914     uint non_replicated_tables_count= 0;
8915 #ifndef DBUG_OFF
8916     {
8917       DBUG_PRINT("debug", ("prelocked_mode: %s",
8918                            get_locked_tables_mode_name(locked_tables_mode)));
8919     }
8920 #endif
8921 
8922     if (variables.binlog_format != BINLOG_FORMAT_ROW && tables)
8923     {
8924       /*
8925         DML statements that modify a table with an auto_increment column based on
8926         rows selected from a table are unsafe as the order in which the rows are
8927         fetched fron the select tables cannot be determined and may differ on
8928         master and slave.
8929        */
8930       if (has_write_table_with_auto_increment_and_select(tables))
8931         lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_WRITE_AUTOINC_SELECT);
8932 
8933       if (has_write_table_auto_increment_not_first_in_pk(tables))
8934         lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_NOT_FIRST);
8935 
8936       /*
8937         A query that modifies autoinc column in sub-statement can make the
8938         master and slave inconsistent.
8939         We can solve these problems in mixed mode by switching to binlogging
8940         if at least one updated table is used by sub-statement
8941        */
8942       if (lex->requires_prelocking() &&
8943           has_write_table_with_auto_increment(lex->first_not_own_table()))
8944         lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_COLUMNS);
8945     }
8946 
8947     /*
8948       Get the capabilities vector for all involved storage engines and
8949       mask out the flags for the binary log.
8950     */
8951     for (TABLE_LIST *table= tables; table; table= table->next_global)
8952     {
8953       if (table->placeholder())
8954       {
8955         /*
8956           bug 1313901 : Detect if this is a CREATE TEMPORARY or DROP of a
8957                         temporary table. This will be used later in determining
8958                         whether to log in ROW or STMT if MIXED replication is
8959                         being used.
8960         */
8961         if(!create_drop_temp_table &&
8962            !table->table &&
8963             ((lex->sql_command == SQLCOM_CREATE_TABLE &&
8964               (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE)) ||
8965              ((lex->sql_command == SQLCOM_DROP_TABLE ||
8966                lex->sql_command == SQLCOM_TRUNCATE) &&
8967               find_temporary_table(this, table))))
8968         {
8969           create_drop_temp_table= TRUE;
8970         }
8971          continue;
8972       }
8973 
8974       handler::Table_flags const flags= table->table->file->ha_table_flags();
8975 
8976       DBUG_PRINT("info", ("table: %s; ha_table_flags: 0x%llx",
8977                           table->table_name, flags));
8978 
8979       if (table->table->no_replicate)
8980       {
8981         /*
8982           The statement uses a table that is not replicated.
8983           The following properties about the table:
8984           - persistent / transient
8985           - transactional / non transactional
8986           - temporary / permanent
8987           - read or write
8988           - multiple engines involved because of this table
8989           are not relevant, as this table is completely ignored.
8990           Because the statement uses a non replicated table,
8991           using STATEMENT format in the binlog is impossible.
8992           Either this statement will be discarded entirely,
8993           or it will be logged (possibly partially) in ROW format.
8994         */
8995         lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_TABLE);
8996 
8997         if (table->lock_type >= TL_WRITE_ALLOW_WRITE)
8998         {
8999           non_replicated_tables_count++;
9000           continue;
9001         }
9002       }
9003 
9004       replicated_tables_count++;
9005 
9006       my_bool trans= table->table->file->has_transactions();
9007 
9008       if (table->lock_type >= TL_WRITE_ALLOW_WRITE)
9009       {
9010         write_to_some_transactional_table=
9011           write_to_some_transactional_table || trans;
9012 
9013         write_to_some_non_transactional_table=
9014           write_to_some_non_transactional_table || !trans;
9015 
9016         if (prev_write_table && prev_write_table->file->ht !=
9017             table->table->file->ht)
9018           multi_write_engine= TRUE;
9019 
9020         if (table->table->s->tmp_table)
9021           lex->set_stmt_accessed_table(trans ? LEX::STMT_WRITES_TEMP_TRANS_TABLE :
9022                                                LEX::STMT_WRITES_TEMP_NON_TRANS_TABLE);
9023         else
9024           lex->set_stmt_accessed_table(trans ? LEX::STMT_WRITES_TRANS_TABLE :
9025                                                LEX::STMT_WRITES_NON_TRANS_TABLE);
9026 
9027         /*
9028          Non-transactional updates are allowed when row binlog format is
9029          used and all non-transactional tables are temporary.
9030          Binlog format is checked on THD::is_dml_gtid_compatible() method.
9031         */
9032         if (!trans)
9033           write_all_non_transactional_are_tmp_tables=
9034             write_all_non_transactional_are_tmp_tables &&
9035             table->table->s->tmp_table;
9036 
9037         flags_write_all_set &= flags;
9038         flags_write_some_set |= flags;
9039         is_write= TRUE;
9040 
9041         prev_write_table= table->table;
9042 
9043         /*
9044           INSERT...ON DUPLICATE KEY UPDATE on a table with more than one unique keys
9045           can be unsafe. Check for it if the flag is already not marked for the
9046           given statement.
9047         */
9048         if (!lex->is_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS) &&
9049             lex->sql_command == SQLCOM_INSERT &&
9050             /* Duplicate key update is not supported by INSERT DELAYED */
9051             get_command() != COM_DELAYED_INSERT && lex->duplicates == DUP_UPDATE)
9052         {
9053           uint keys= table->table->s->keys, i= 0, unique_keys= 0;
9054           for (KEY* keyinfo= table->table->s->key_info;
9055                i < keys && unique_keys <= 1; i++, keyinfo++)
9056           {
9057             if (keyinfo->flags & HA_NOSAME)
9058               unique_keys++;
9059           }
9060           if (unique_keys > 1 )
9061             lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS);
9062         }
9063       }
9064       flags_access_some_set |= flags;
9065 
9066       if (table->table->s->tmp_table)
9067         lex->set_stmt_accessed_table(trans ? LEX::STMT_READS_TEMP_TRANS_TABLE :
9068                                              LEX::STMT_READS_TEMP_NON_TRANS_TABLE);
9069       else
9070         lex->set_stmt_accessed_table(trans ? LEX::STMT_READS_TRANS_TABLE :
9071                                              LEX::STMT_READS_NON_TRANS_TABLE);
9072 
9073       if (prev_access_table && prev_access_table->file->ht !=
9074           table->table->file->ht)
9075          multi_access_engine= TRUE;
9076 
9077       prev_access_table= table->table;
9078     }
9079     DBUG_ASSERT(!is_write ||
9080                 write_to_some_transactional_table ||
9081                 write_to_some_non_transactional_table);
9082     /*
9083       write_all_non_transactional_are_tmp_tables may be true if any
9084       non-transactional table was not updated, so we fix its value here.
9085     */
9086     write_all_non_transactional_are_tmp_tables=
9087       write_all_non_transactional_are_tmp_tables &&
9088       write_to_some_non_transactional_table;
9089 
9090     DBUG_PRINT("info", ("flags_write_all_set: 0x%llx", flags_write_all_set));
9091     DBUG_PRINT("info", ("flags_write_some_set: 0x%llx", flags_write_some_set));
9092     DBUG_PRINT("info", ("flags_access_some_set: 0x%llx", flags_access_some_set));
9093     DBUG_PRINT("info", ("multi_write_engine: %d", multi_write_engine));
9094     DBUG_PRINT("info", ("multi_access_engine: %d", multi_access_engine));
9095 
9096     int error= 0;
9097     int unsafe_flags;
9098 
9099     bool multi_stmt_trans= in_multi_stmt_transaction_mode();
9100     bool trans_table= trans_has_updated_trans_table(this);
9101     bool binlog_direct= variables.binlog_direct_non_trans_update;
9102 
9103     if (lex->is_mixed_stmt_unsafe(multi_stmt_trans, binlog_direct,
9104                                   trans_table, tx_isolation))
9105       lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_MIXED_STATEMENT);
9106     else if (multi_stmt_trans && trans_table && !binlog_direct &&
9107              lex->stmt_accessed_table(LEX::STMT_WRITES_NON_TRANS_TABLE))
9108       lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_NONTRANS_AFTER_TRANS);
9109 
9110     /*
9111       If more than one engine is involved in the statement and at
9112       least one is doing it's own logging (is *self-logging*), the
9113       statement cannot be logged atomically, so we generate an error
9114       rather than allowing the binlog to become corrupt.
9115     */
9116     if (multi_write_engine &&
9117         (flags_write_some_set & HA_HAS_OWN_BINLOGGING))
9118       my_error((error= ER_BINLOG_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE),
9119                MYF(0));
9120     else if (multi_access_engine && flags_access_some_set & HA_HAS_OWN_BINLOGGING)
9121       lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE);
9122 
9123     DBUG_EXECUTE_IF("make_stmt_only_engines",
9124                     {
9125                       flags_write_all_set= HA_BINLOG_STMT_CAPABLE;
9126                     };);
9127 
9128     /* both statement-only and row-only engines involved */
9129     if ((flags_write_all_set & (HA_BINLOG_STMT_CAPABLE | HA_BINLOG_ROW_CAPABLE)) == 0)
9130     {
9131       /*
9132         1. Error: Binary logging impossible since both row-incapable
9133            engines and statement-incapable engines are involved
9134       */
9135       my_error((error= ER_BINLOG_ROW_ENGINE_AND_STMT_ENGINE), MYF(0));
9136     }
9137     /* statement-only engines involved */
9138     else if ((flags_write_all_set & HA_BINLOG_ROW_CAPABLE) == 0)
9139     {
9140       if (lex->is_stmt_row_injection())
9141       {
9142         /*
9143           4. Error: Cannot execute row injection since table uses
9144              storage engine limited to statement-logging
9145         */
9146         my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_ENGINE), MYF(0));
9147       }
9148       else if (variables.binlog_format == BINLOG_FORMAT_ROW &&
9149                sqlcom_can_generate_row_events(this->lex->sql_command))
9150       {
9151         /*
9152           2. Error: Cannot modify table that uses a storage engine
9153              limited to statement-logging when BINLOG_FORMAT = ROW
9154         */
9155         my_error((error= ER_BINLOG_ROW_MODE_AND_STMT_ENGINE), MYF(0));
9156       }
9157       else if (variables.binlog_format == BINLOG_FORMAT_MIXED &&
9158           ((unsafe_flags= lex->get_stmt_unsafe_flags()) != 0))
9159       {
9160         /*
9161           3. Error: Cannot execute statement: binlogging of unsafe
9162              statement is impossible when storage engine is limited to
9163              statement-logging and BINLOG_FORMAT = MIXED.
9164         */
9165         for (int unsafe_type= 0;
9166              unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT;
9167              unsafe_type++)
9168           if (unsafe_flags & (1 << unsafe_type))
9169             my_error((error= ER_BINLOG_UNSAFE_AND_STMT_ENGINE), MYF(0),
9170                      ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
9171       }
9172       else if (is_write && ((unsafe_flags= lex->get_stmt_unsafe_flags()) != 0))
9173       {
9174         /*
9175           7. Warning: Unsafe statement logged as statement due to
9176              binlog_format = STATEMENT
9177         */
9178         binlog_unsafe_warning_flags|= unsafe_flags;
9179         DBUG_PRINT("info", ("Scheduling warning to be issued by "
9180                             "binlog_query: '%s'",
9181                             ER(ER_BINLOG_UNSAFE_STATEMENT)));
9182         DBUG_PRINT("info", ("binlog_unsafe_warning_flags: 0x%x",
9183                             binlog_unsafe_warning_flags));
9184       }
9185       /* log in statement format! */
9186     }
9187     /* no statement-only engines */
9188     else
9189     {
9190       /* binlog_format = STATEMENT */
9191       if (variables.binlog_format == BINLOG_FORMAT_STMT)
9192       {
9193         if (lex->is_stmt_row_injection())
9194         {
9195           /*
9196             6. Error: Cannot execute row injection since
9197                BINLOG_FORMAT = STATEMENT
9198           */
9199           my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_MODE), MYF(0));
9200         }
9201         else if ((flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0 &&
9202                  sqlcom_can_generate_row_events(this->lex->sql_command))
9203         {
9204           /*
9205             5. Error: Cannot modify table that uses a storage engine
9206                limited to row-logging when binlog_format = STATEMENT
9207           */
9208           my_error((error= ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), "");
9209         }
9210         else if (is_write && (unsafe_flags= lex->get_stmt_unsafe_flags()) != 0)
9211         {
9212           /*
9213             7. Warning: Unsafe statement logged as statement due to
9214                binlog_format = STATEMENT
9215           */
9216           binlog_unsafe_warning_flags|= unsafe_flags;
9217           DBUG_PRINT("info", ("Scheduling warning to be issued by "
9218                               "binlog_query: '%s'",
9219                               ER(ER_BINLOG_UNSAFE_STATEMENT)));
9220           DBUG_PRINT("info", ("binlog_unsafe_warning_flags: 0x%x",
9221                               binlog_unsafe_warning_flags));
9222         }
9223         /* log in statement format! */
9224       }
9225       /* No statement-only engines and binlog_format != STATEMENT.
9226          I.e., nothing prevents us from row logging if needed. */
9227       else
9228       {
9229         if (lex->is_stmt_unsafe() || lex->is_stmt_row_injection()
9230             || (flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0
9231             || (flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0
9232             || lex->stmt_accessed_table(LEX::STMT_READS_TEMP_TRANS_TABLE)
9233             || lex->stmt_accessed_table(LEX::STMT_READS_TEMP_NON_TRANS_TABLE)
9234             || create_drop_temp_table)
9235         {
9236           /* log in row format! */
9237           set_current_stmt_binlog_format_row_if_mixed();
9238         }
9239       }
9240     }
9241 
9242     if (non_replicated_tables_count > 0)
9243     {
9244       if ((replicated_tables_count == 0) || ! is_write)
9245       {
9246         DBUG_PRINT("info", ("decision: no logging, no replicated table affected"));
9247         set_binlog_local_stmt_filter();
9248       }
9249       else
9250       {
9251         if (! is_current_stmt_binlog_format_row())
9252         {
9253           my_error((error= ER_BINLOG_STMT_MODE_AND_NO_REPL_TABLES), MYF(0));
9254         }
9255         else
9256         {
9257           clear_binlog_local_stmt_filter();
9258         }
9259       }
9260     }
9261     else
9262     {
9263       clear_binlog_local_stmt_filter();
9264     }
9265 
9266     if (!error && enforce_gtid_consistency &&
9267         !is_dml_gtid_compatible(write_to_some_transactional_table,
9268                                 write_to_some_non_transactional_table,
9269                                 write_all_non_transactional_are_tmp_tables))
9270       error= 1;
9271 
9272     if (error) {
9273       DBUG_PRINT("info", ("decision: no logging since an error was generated"));
9274       DBUG_RETURN(-1);
9275     }
9276 
9277     if (is_write &&
9278         lex->sql_command != SQLCOM_END /* rows-event applying by slave */)
9279     {
9280       /*
9281         Master side of DML in the STMT format events parallelization.
9282         All involving table db:s are stored in a abc-ordered name list.
9283         In case the number of databases exceeds MAX_DBS_IN_EVENT_MTS maximum
9284         the list gathering breaks since it won't be sent to the slave.
9285       */
9286       for (TABLE_LIST *table= tables; table; table= table->next_global)
9287       {
9288         if (table->placeholder())
9289           continue;
9290 
9291         DBUG_ASSERT(table->table);
9292 
9293         if (table->table->file->referenced_by_foreign_key())
9294         {
9295           /*
9296              FK-referenced dbs can't be gathered currently. The following
9297              event will be marked for sequential execution on slave.
9298           */
9299           binlog_accessed_db_names= NULL;
9300           add_to_binlog_accessed_dbs("");
9301           break;
9302         }
9303         if (!is_current_stmt_binlog_format_row())
9304           add_to_binlog_accessed_dbs(table->db);
9305       }
9306     }
9307     DBUG_PRINT("info", ("decision: logging in %s format",
9308                         is_current_stmt_binlog_format_row() ?
9309                         "ROW" : "STATEMENT"));
9310 
9311     if (variables.binlog_format == BINLOG_FORMAT_ROW &&
9312         (lex->sql_command == SQLCOM_UPDATE ||
9313          lex->sql_command == SQLCOM_UPDATE_MULTI ||
9314          lex->sql_command == SQLCOM_DELETE ||
9315          lex->sql_command == SQLCOM_DELETE_MULTI))
9316     {
9317       String table_names;
9318       /*
9319         Generate a warning for UPDATE/DELETE statements that modify a
9320         BLACKHOLE table, as row events are not logged in row format.
9321       */
9322       for (TABLE_LIST *table= tables; table; table= table->next_global)
9323       {
9324         if (table->placeholder())
9325           continue;
9326         if (table->table->file->ht->db_type == DB_TYPE_BLACKHOLE_DB &&
9327             table->lock_type >= TL_WRITE_ALLOW_WRITE)
9328         {
9329             table_names.append(table->table_name);
9330             table_names.append(",");
9331         }
9332       }
9333       if (!table_names.is_empty())
9334       {
9335         bool is_update= (lex->sql_command == SQLCOM_UPDATE ||
9336                          lex->sql_command == SQLCOM_UPDATE_MULTI);
9337         /*
9338           Replace the last ',' with '.' for table_names
9339         */
9340         table_names.replace(table_names.length()-1, 1, ".", 1);
9341         push_warning_printf(this, Sql_condition::WARN_LEVEL_WARN,
9342                             WARN_ON_BLOCKHOLE_IN_RBR,
9343                             ER(WARN_ON_BLOCKHOLE_IN_RBR),
9344                             is_update ? "UPDATE" : "DELETE",
9345                             table_names.c_ptr());
9346       }
9347     }
9348   }
9349 #ifndef DBUG_OFF
9350   else
9351     DBUG_PRINT("info", ("decision: no logging since "
9352                         "mysql_bin_log.is_open() = %d "
9353                         "and (options & OPTION_BIN_LOG) = 0x%llx "
9354                         "and binlog_format = %lu "
9355                         "and binlog_filter->db_ok(db) = %d",
9356                         mysql_bin_log.is_open(),
9357                         (variables.option_bits & OPTION_BIN_LOG),
9358                         variables.binlog_format,
9359                         binlog_filter->db_ok(db)));
9360 #endif
9361 
9362   DBUG_RETURN(0);
9363 }
9364 
9365 
is_ddl_gtid_compatible() const9366 bool THD::is_ddl_gtid_compatible() const
9367 {
9368   DBUG_ENTER("THD::is_ddl_gtid_compatible");
9369 
9370   // If @@session.sql_log_bin has been manually turned off (only
9371   // doable by SUPER), then no problem, we can execute any statement.
9372   if ((variables.option_bits & OPTION_BIN_LOG) == 0)
9373     DBUG_RETURN(true);
9374 
9375   if (lex->sql_command == SQLCOM_CREATE_TABLE &&
9376       !(lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) &&
9377       lex->select_lex.item_list.elements)
9378   {
9379     /*
9380       CREATE ... SELECT (without TEMPORARY) is unsafe because if
9381       binlog_format=row it will be logged as a CREATE TABLE followed
9382       by row events, re-executed non-atomically as two transactions,
9383       and then written to the slave's binary log as two separate
9384       transactions with the same GTID.
9385     */
9386     my_error(ER_GTID_UNSAFE_CREATE_SELECT, MYF(0));
9387     DBUG_RETURN(false);
9388   }
9389   if (lex->sql_command == SQLCOM_CREATE_TABLE &&
9390       (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) != 0)
9391   {
9392     /*
9393       In statement binary log format, CREATE TEMPORARY TABLE is unsafe
9394       to execute inside a transaction because the table will stay and the
9395       transaction will be written to the slave's binary log with the GTID even
9396       if the transaction is rolled back. This includes the execution inside
9397       functions and triggers.
9398       The same considerations apply for DROP TEMPORARY TABLE too, this is
9399       checked in mysql_rm_table instead.
9400     */
9401     if ((in_multi_stmt_transaction_mode() || in_sub_stmt)
9402         && variables.binlog_format == BINLOG_FORMAT_STMT)
9403     {
9404       my_error(ER_GTID_UNSAFE_CREATE_DROP_TEMPORARY_TABLE_IN_TRANSACTION,
9405                MYF(0));
9406       DBUG_RETURN(false);
9407     }
9408   }
9409   DBUG_RETURN(true);
9410 }
9411 
9412 
9413 bool
is_dml_gtid_compatible(bool transactional_table,bool non_transactional_table,bool non_transactional_tmp_tables) const9414 THD::is_dml_gtid_compatible(bool transactional_table,
9415                             bool non_transactional_table,
9416                             bool non_transactional_tmp_tables) const
9417 {
9418   DBUG_ENTER("THD::is_dml_gtid_compatible(bool, bool, bool)");
9419 
9420   // If @@session.sql_log_bin has been manually turned off (only
9421   // doable by SUPER), then no problem, we can execute any statement.
9422   if ((variables.option_bits & OPTION_BIN_LOG) == 0)
9423     DBUG_RETURN(true);
9424 
9425   /*
9426     Single non-transactional updates are allowed when not mixed
9427     together with transactional statements within a transaction.
9428     Furthermore, writing to transactional and non-transactional
9429     engines in a single statement is also disallowed.
9430     Multi-statement transactions on non-transactional tables are
9431     split into single-statement transactions when
9432     GTID_NEXT = "AUTOMATIC".
9433 
9434     Non-transactional updates are allowed when row binlog format is
9435     used and all non-transactional tables are temporary.
9436 
9437     The debug symbol "allow_gtid_unsafe_non_transactional_updates"
9438     disables the error.  This is useful because it allows us to run
9439     old tests that were not written with the restrictions of GTIDs in
9440     mind.
9441   */
9442   if (non_transactional_table &&
9443       (transactional_table || trans_has_updated_trans_table(this)) &&
9444       !(non_transactional_tmp_tables && is_current_stmt_binlog_format_row()) &&
9445       !DBUG_EVALUATE_IF("allow_gtid_unsafe_non_transactional_updates", 1, 0))
9446   {
9447     my_error(ER_GTID_UNSAFE_NON_TRANSACTIONAL_TABLE, MYF(0));
9448     DBUG_RETURN(false);
9449   }
9450 
9451   DBUG_RETURN(true);
9452 }
9453 
9454 /*
9455   Implementation of interface to write rows to the binary log through the
9456   thread.  The thread is responsible for writing the rows it has
9457   inserted/updated/deleted.
9458 */
9459 
9460 #ifndef MYSQL_CLIENT
9461 
9462 /*
9463   Template member function for ensuring that there is an rows log
9464   event of the apropriate type before proceeding.
9465 
9466   PRE CONDITION:
9467     - Events of type 'RowEventT' have the type code 'type_code'.
9468 
9469   POST CONDITION:
9470     If a non-NULL pointer is returned, the pending event for thread 'thd' will
9471     be an event of type 'RowEventT' (which have the type code 'type_code')
9472     will either empty or have enough space to hold 'needed' bytes.  In
9473     addition, the columns bitmap will be correct for the row, meaning that
9474     the pending event will be flushed if the columns in the event differ from
9475     the columns suppled to the function.
9476 
9477   RETURNS
9478     If no error, a non-NULL pending event (either one which already existed or
9479     the newly created one).
9480     If error, NULL.
9481  */
9482 
9483 template <class RowsEventT> Rows_log_event*
binlog_prepare_pending_rows_event(TABLE * table,uint32 serv_id,size_t needed,bool is_transactional,RowsEventT * hint MY_ATTRIBUTE ((unused)),const uchar * extra_row_info)9484 THD::binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id,
9485                                        size_t needed,
9486                                        bool is_transactional,
9487 				       RowsEventT *hint MY_ATTRIBUTE((unused)),
9488                                        const uchar* extra_row_info)
9489 {
9490   DBUG_ENTER("binlog_prepare_pending_rows_event");
9491 
9492   /* Fetch the type code for the RowsEventT template parameter */
9493   int const general_type_code= RowsEventT::TYPE_CODE;
9494 
9495   Rows_log_event* pending= binlog_get_pending_rows_event(is_transactional);
9496 
9497   if (unlikely(pending && !pending->is_valid()))
9498     DBUG_RETURN(NULL);
9499 
9500   /*
9501     Check if the current event is non-NULL and a write-rows
9502     event. Also check if the table provided is mapped: if it is not,
9503     then we have switched to writing to a new table.
9504     If there is no pending event, we need to create one. If there is a pending
9505     event, but it's not about the same table id, or not of the same type
9506     (between Write, Update and Delete), or not the same affected columns, or
9507     going to be too big, flush this event to disk and create a new pending
9508     event.
9509   */
9510   if (!pending ||
9511       pending->server_id != serv_id ||
9512       pending->get_table_id() != table->s->table_map_id ||
9513       pending->get_general_type_code() != general_type_code ||
9514       pending->get_data_size() + needed > opt_binlog_rows_event_max_size ||
9515       pending->read_write_bitmaps_cmp(table) == FALSE ||
9516       !binlog_row_event_extra_data_eq(pending->get_extra_row_data(),
9517                                       extra_row_info))
9518   {
9519     /* Create a new RowsEventT... */
9520     Rows_log_event* const
9521 	ev= new RowsEventT(this, table, table->s->table_map_id,
9522                            is_transactional, extra_row_info);
9523     if (unlikely(!ev))
9524       DBUG_RETURN(NULL);
9525     ev->server_id= serv_id; // I don't like this, it's too easy to forget.
9526     /*
9527       flush the pending event and replace it with the newly created
9528       event...
9529     */
9530     if (unlikely(
9531         mysql_bin_log.flush_and_set_pending_rows_event(this, ev,
9532                                                        is_transactional)))
9533     {
9534       delete ev;
9535       DBUG_RETURN(NULL);
9536     }
9537 
9538     DBUG_RETURN(ev);               /* This is the new pending event */
9539   }
9540   DBUG_RETURN(pending);        /* This is the current pending event */
9541 }
9542 
9543 /* Declare in unnamed namespace. */
9544 CPP_UNNAMED_NS_START
9545 
9546   /**
9547      Class to handle temporary allocation of memory for row data.
9548 
9549      The responsibilities of the class is to provide memory for
9550      packing one or two rows of packed data (depending on what
9551      constructor is called).
9552 
9553      In order to make the allocation more efficient for "simple" rows,
9554      i.e., rows that do not contain any blobs, a pointer to the
9555      allocated memory is of memory is stored in the table structure
9556      for simple rows.  If memory for a table containing a blob field
9557      is requested, only memory for that is allocated, and subsequently
9558      released when the object is destroyed.
9559 
9560    */
9561   class Row_data_memory {
9562   public:
9563     /**
9564       Build an object to keep track of a block-local piece of memory
9565       for storing a row of data.
9566 
9567       @param table
9568       Table where the pre-allocated memory is stored.
9569 
9570       @param length
9571       Length of data that is needed, if the record contain blobs.
9572      */
Row_data_memory(TABLE * table,size_t const len1)9573     Row_data_memory(TABLE *table, size_t const len1)
9574       : m_memory(0)
9575     {
9576 #ifndef DBUG_OFF
9577       m_alloc_checked= FALSE;
9578 #endif
9579       allocate_memory(table, len1);
9580       m_ptr[0]= has_memory() ? m_memory : 0;
9581       m_ptr[1]= 0;
9582     }
9583 
Row_data_memory(TABLE * table,size_t const len1,size_t const len2)9584     Row_data_memory(TABLE *table, size_t const len1, size_t const len2)
9585       : m_memory(0)
9586     {
9587 #ifndef DBUG_OFF
9588       m_alloc_checked= FALSE;
9589 #endif
9590       allocate_memory(table, len1 + len2);
9591       m_ptr[0]= has_memory() ? m_memory        : 0;
9592       m_ptr[1]= has_memory() ? m_memory + len1 : 0;
9593     }
9594 
~Row_data_memory()9595     ~Row_data_memory()
9596     {
9597       if (m_memory != 0 && m_release_memory_on_destruction)
9598         my_free(m_memory);
9599     }
9600 
9601     /**
9602        Is there memory allocated?
9603 
9604        @retval true There is memory allocated
9605        @retval false Memory allocation failed
9606      */
has_memory() const9607     bool has_memory() const {
9608 #ifndef DBUG_OFF
9609       m_alloc_checked= TRUE;
9610 #endif
9611       return m_memory != 0;
9612     }
9613 
slot(uint s)9614     uchar *slot(uint s)
9615     {
9616       DBUG_ASSERT(s < sizeof(m_ptr)/sizeof(*m_ptr));
9617       DBUG_ASSERT(m_ptr[s] != 0);
9618       DBUG_ASSERT(m_alloc_checked == TRUE);
9619       return m_ptr[s];
9620     }
9621 
9622   private:
allocate_memory(TABLE * const table,size_t const total_length)9623     void allocate_memory(TABLE *const table, size_t const total_length)
9624     {
9625       if (table->s->blob_fields == 0)
9626       {
9627         /*
9628           The maximum length of a packed record is less than this
9629           length. We use this value instead of the supplied length
9630           when allocating memory for records, since we don't know how
9631           the memory will be used in future allocations.
9632 
9633           Since table->s->reclength is for unpacked records, we have
9634           to add two bytes for each field, which can potentially be
9635           added to hold the length of a packed field.
9636         */
9637         size_t const maxlen= table->s->reclength + 2 * table->s->fields;
9638 
9639         /*
9640           Allocate memory for two records if memory hasn't been
9641           allocated. We allocate memory for two records so that it can
9642           be used when processing update rows as well.
9643         */
9644         if (table->write_row_record == 0)
9645           table->write_row_record=
9646             (uchar *) alloc_root(&table->mem_root, 2 * maxlen);
9647         m_memory= table->write_row_record;
9648         m_release_memory_on_destruction= FALSE;
9649       }
9650       else
9651       {
9652         m_memory= (uchar *) my_malloc(total_length, MYF(MY_WME));
9653         m_release_memory_on_destruction= TRUE;
9654       }
9655     }
9656 
9657 #ifndef DBUG_OFF
9658     mutable bool m_alloc_checked;
9659 #endif
9660     bool m_release_memory_on_destruction;
9661     uchar *m_memory;
9662     uchar *m_ptr[2];
9663   };
9664 
9665 CPP_UNNAMED_NS_END
9666 
binlog_write_row(TABLE * table,bool is_trans,uchar const * record,const uchar * extra_row_info)9667 int THD::binlog_write_row(TABLE* table, bool is_trans,
9668                           uchar const *record,
9669                           const uchar* extra_row_info)
9670 {
9671   DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
9672 
9673   /*
9674     Pack records into format for transfer. We are allocating more
9675     memory than needed, but that doesn't matter.
9676   */
9677   Row_data_memory memory(table, max_row_length(table, record));
9678   if (!memory.has_memory())
9679     return HA_ERR_OUT_OF_MEM;
9680 
9681   uchar *row_data= memory.slot(0);
9682 
9683   size_t const len= pack_row(table, table->write_set, row_data, record);
9684 
9685   Rows_log_event* const ev=
9686     binlog_prepare_pending_rows_event(table, server_id, len, is_trans,
9687                                       static_cast<Write_rows_log_event*>(0),
9688                                       extra_row_info);
9689 
9690   if (unlikely(ev == 0))
9691     return HA_ERR_OUT_OF_MEM;
9692 
9693   return ev->add_row_data(row_data, len);
9694 }
9695 
binlog_update_row(TABLE * table,bool is_trans,const uchar * before_record,const uchar * after_record,const uchar * extra_row_info)9696 int THD::binlog_update_row(TABLE* table, bool is_trans,
9697                            const uchar *before_record,
9698                            const uchar *after_record,
9699                            const uchar* extra_row_info)
9700 {
9701   DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
9702   int error= 0;
9703 
9704   /**
9705     Save a reference to the original read and write set bitmaps.
9706     We will need this to restore the bitmaps at the end.
9707    */
9708   MY_BITMAP *old_read_set= table->read_set;
9709   MY_BITMAP *old_write_set= table->write_set;
9710 
9711   /**
9712      This will remove spurious fields required during execution but
9713      not needed for binlogging. This is done according to the:
9714      binlog-row-image option.
9715    */
9716   binlog_prepare_row_images(table);
9717 
9718   size_t const before_maxlen = max_row_length(table, before_record);
9719   size_t const after_maxlen  = max_row_length(table, after_record);
9720 
9721   Row_data_memory row_data(table, before_maxlen, after_maxlen);
9722   if (!row_data.has_memory())
9723     return HA_ERR_OUT_OF_MEM;
9724 
9725   uchar *before_row= row_data.slot(0);
9726   uchar *after_row= row_data.slot(1);
9727 
9728   size_t const before_size= pack_row(table, table->read_set, before_row,
9729                                         before_record);
9730   size_t const after_size= pack_row(table, table->write_set, after_row,
9731                                        after_record);
9732 
9733   /*
9734     Don't print debug messages when running valgrind since they can
9735     trigger false warnings.
9736    */
9737 #ifndef HAVE_purify
9738   DBUG_DUMP("before_record", before_record, table->s->reclength);
9739   DBUG_DUMP("after_record",  after_record, table->s->reclength);
9740   DBUG_DUMP("before_row",    before_row, before_size);
9741   DBUG_DUMP("after_row",     after_row, after_size);
9742 #endif
9743 
9744   Rows_log_event* const ev=
9745     binlog_prepare_pending_rows_event(table, server_id,
9746 				      before_size + after_size, is_trans,
9747 				      static_cast<Update_rows_log_event*>(0),
9748                                       extra_row_info);
9749 
9750   if (unlikely(ev == 0))
9751     return HA_ERR_OUT_OF_MEM;
9752 
9753   error= ev->add_row_data(before_row, before_size) ||
9754          ev->add_row_data(after_row, after_size);
9755 
9756   /* restore read/write set for the rest of execution */
9757   table->column_bitmaps_set_no_signal(old_read_set,
9758                                       old_write_set);
9759 
9760   return error;
9761 }
9762 
binlog_delete_row(TABLE * table,bool is_trans,uchar const * record,const uchar * extra_row_info)9763 int THD::binlog_delete_row(TABLE* table, bool is_trans,
9764                            uchar const *record,
9765                            const uchar* extra_row_info)
9766 {
9767   DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
9768   int error= 0;
9769 
9770   /**
9771     Save a reference to the original read and write set bitmaps.
9772     We will need this to restore the bitmaps at the end.
9773    */
9774   MY_BITMAP *old_read_set= table->read_set;
9775   MY_BITMAP *old_write_set= table->write_set;
9776 
9777   /**
9778      This will remove spurious fields required during execution but
9779      not needed for binlogging. This is done according to the:
9780      binlog-row-image option.
9781    */
9782   binlog_prepare_row_images(table);
9783 
9784   /*
9785      Pack records into format for transfer. We are allocating more
9786      memory than needed, but that doesn't matter.
9787   */
9788   Row_data_memory memory(table, max_row_length(table, record));
9789   if (unlikely(!memory.has_memory()))
9790     return HA_ERR_OUT_OF_MEM;
9791 
9792   uchar *row_data= memory.slot(0);
9793 
9794   DBUG_DUMP("table->read_set", (uchar*) table->read_set->bitmap, (table->s->fields + 7) / 8);
9795   size_t const len= pack_row(table, table->read_set, row_data, record);
9796 
9797   Rows_log_event* const ev=
9798     binlog_prepare_pending_rows_event(table, server_id, len, is_trans,
9799 				      static_cast<Delete_rows_log_event*>(0),
9800                                       extra_row_info);
9801 
9802   if (unlikely(ev == 0))
9803     return HA_ERR_OUT_OF_MEM;
9804 
9805   error= ev->add_row_data(row_data, len);
9806 
9807   /* restore read/write set for the rest of execution */
9808   table->column_bitmaps_set_no_signal(old_read_set,
9809                                       old_write_set);
9810 
9811   return error;
9812 }
9813 
binlog_prepare_row_images(TABLE * table)9814 void THD::binlog_prepare_row_images(TABLE *table)
9815 {
9816   DBUG_ENTER("THD::binlog_prepare_row_images");
9817   /**
9818     Remove from read_set spurious columns. The write_set has been
9819     handled before in table->mark_columns_needed_for_update.
9820    */
9821 
9822   DBUG_PRINT_BITSET("debug", "table->read_set (before preparing): %s", table->read_set);
9823   THD *thd= table->in_use;
9824 
9825   /**
9826     if there is a primary key in the table (ie, user declared PK or a
9827     non-null unique index) and we dont want to ship the entire image,
9828     and the handler involved supports this.
9829    */
9830   if (table->s->primary_key < MAX_KEY &&
9831       (thd->variables.binlog_row_image < BINLOG_ROW_IMAGE_FULL) &&
9832       !ha_check_storage_engine_flag(table->s->db_type(), HTON_NO_BINLOG_ROW_OPT))
9833   {
9834     /**
9835       Just to be sure that tmp_set is currently not in use as
9836       the read_set already.
9837     */
9838     DBUG_ASSERT(table->read_set != &table->tmp_set);
9839 
9840     bitmap_clear_all(&table->tmp_set);
9841 
9842     switch(thd->variables.binlog_row_image)
9843     {
9844       case BINLOG_ROW_IMAGE_MINIMAL:
9845         /* MINIMAL: Mark only PK */
9846         table->mark_columns_used_by_index_no_reset(table->s->primary_key,
9847                                                    &table->tmp_set);
9848         break;
9849       case BINLOG_ROW_IMAGE_NOBLOB:
9850         /**
9851           NOBLOB: Remove unnecessary BLOB fields from read_set
9852                   (the ones that are not part of PK).
9853          */
9854         bitmap_union(&table->tmp_set, table->read_set);
9855         for (Field **ptr=table->field ; *ptr ; ptr++)
9856         {
9857           Field *field= (*ptr);
9858           if ((field->type() == MYSQL_TYPE_BLOB) &&
9859               !(field->flags & PRI_KEY_FLAG))
9860             bitmap_clear_bit(&table->tmp_set, field->field_index);
9861         }
9862         break;
9863       default:
9864         DBUG_ASSERT(0); // impossible.
9865     }
9866 
9867     /* set the temporary read_set */
9868     table->column_bitmaps_set_no_signal(&table->tmp_set,
9869                                         table->write_set);
9870   }
9871 
9872   DBUG_PRINT_BITSET("debug", "table->read_set (after preparing): %s", table->read_set);
9873   DBUG_VOID_RETURN;
9874 }
9875 
9876 
binlog_flush_pending_rows_event(bool stmt_end,bool is_transactional)9877 int THD::binlog_flush_pending_rows_event(bool stmt_end, bool is_transactional)
9878 {
9879   DBUG_ENTER("THD::binlog_flush_pending_rows_event");
9880   /*
9881     We shall flush the pending event even if we are not in row-based
9882     mode: it might be the case that we left row-based mode before
9883     flushing anything (e.g., if we have explicitly locked tables).
9884    */
9885   if (!mysql_bin_log.is_open())
9886     DBUG_RETURN(0);
9887 
9888   /*
9889     Mark the event as the last event of a statement if the stmt_end
9890     flag is set.
9891   */
9892   int error= 0;
9893   if (Rows_log_event *pending= binlog_get_pending_rows_event(is_transactional))
9894   {
9895     if (stmt_end)
9896     {
9897       pending->set_flags(Rows_log_event::STMT_END_F);
9898       binlog_table_maps= 0;
9899     }
9900 
9901     error= mysql_bin_log.flush_and_set_pending_rows_event(this, 0,
9902                                                           is_transactional);
9903   }
9904 
9905   DBUG_RETURN(error);
9906 }
9907 
9908 
9909 /**
9910    binlog_row_event_extra_data_eq
9911 
9912    Comparator for two binlog row event extra data
9913    pointers.
9914 
9915    It compares their significant bytes.
9916 
9917    Null pointers are acceptable
9918 
9919    @param a
9920      first pointer
9921 
9922    @param b
9923      first pointer
9924 
9925    @return
9926      true if the referenced structures are equal
9927 */
9928 bool
binlog_row_event_extra_data_eq(const uchar * a,const uchar * b)9929 THD::binlog_row_event_extra_data_eq(const uchar* a,
9930                                     const uchar* b)
9931 {
9932   return ((a == b) ||
9933           ((a != NULL) &&
9934            (b != NULL) &&
9935            (a[EXTRA_ROW_INFO_LEN_OFFSET] ==
9936             b[EXTRA_ROW_INFO_LEN_OFFSET]) &&
9937            (memcmp(a, b,
9938                    a[EXTRA_ROW_INFO_LEN_OFFSET]) == 0)));
9939 }
9940 
9941 #if !defined(DBUG_OFF) && !defined(_lint)
9942 static const char *
show_query_type(THD::enum_binlog_query_type qtype)9943 show_query_type(THD::enum_binlog_query_type qtype)
9944 {
9945   switch (qtype) {
9946   case THD::ROW_QUERY_TYPE:
9947     return "ROW";
9948   case THD::STMT_QUERY_TYPE:
9949     return "STMT";
9950   case THD::QUERY_TYPE_COUNT:
9951   default:
9952     DBUG_ASSERT(0 <= qtype && qtype < THD::QUERY_TYPE_COUNT);
9953   }
9954   static char buf[64];
9955   sprintf(buf, "UNKNOWN#%d", qtype);
9956   return buf;
9957 }
9958 #endif
9959 
9960 /**
9961   Auxiliary function to reset the limit unsafety warning suppression.
9962 */
reset_binlog_unsafe_suppression()9963 static void reset_binlog_unsafe_suppression()
9964 {
9965   DBUG_ENTER("reset_binlog_unsafe_suppression");
9966   unsafe_warning_suppression_is_activated= false;
9967   limit_unsafe_warning_count= 0;
9968   limit_unsafe_suppression_start_time= my_getsystime()/10000000;
9969   DBUG_VOID_RETURN;
9970 }
9971 
9972 /**
9973   Auxiliary function to print warning in the error log.
9974 */
print_unsafe_warning_to_log(int unsafe_type,char * buf,char * query)9975 static void print_unsafe_warning_to_log(int unsafe_type, char* buf,
9976                                  char* query)
9977 {
9978   DBUG_ENTER("print_unsafe_warning_in_log");
9979   sprintf(buf, ER(ER_BINLOG_UNSAFE_STATEMENT),
9980           ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
9981   sql_print_warning(ER(ER_MESSAGE_AND_STATEMENT), buf, query);
9982   DBUG_VOID_RETURN;
9983 }
9984 
9985 /**
9986   Auxiliary function to check if the warning for limit unsafety should be
9987   thrown or suppressed. Details of the implementation can be found in the
9988   comments inline.
9989   SYNOPSIS:
9990   @params
9991    buf         - buffer to hold the warning message text
9992    unsafe_type - The type of unsafety.
9993    query       - The actual query statement.
9994 
9995   TODO: Remove this function and implement a general service for all warnings
9996   that would prevent flooding the error log.
9997 */
do_unsafe_limit_checkout(char * buf,int unsafe_type,char * query)9998 static void do_unsafe_limit_checkout(char* buf, int unsafe_type, char* query)
9999 {
10000   ulonglong now;
10001   DBUG_ENTER("do_unsafe_limit_checkout");
10002   DBUG_ASSERT(unsafe_type == LEX::BINLOG_STMT_UNSAFE_LIMIT);
10003   limit_unsafe_warning_count++;
10004   /*
10005     INITIALIZING:
10006     If this is the first time this function is called with log warning
10007     enabled, the monitoring the unsafe warnings should start.
10008   */
10009   if (limit_unsafe_suppression_start_time == 0)
10010   {
10011     limit_unsafe_suppression_start_time= my_getsystime()/10000000;
10012     print_unsafe_warning_to_log(unsafe_type, buf, query);
10013   }
10014   else
10015   {
10016     if (!unsafe_warning_suppression_is_activated)
10017       print_unsafe_warning_to_log(unsafe_type, buf, query);
10018 
10019     if (limit_unsafe_warning_count >=
10020         LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT)
10021     {
10022       now= my_getsystime()/10000000;
10023       if (!unsafe_warning_suppression_is_activated)
10024       {
10025         /*
10026           ACTIVATION:
10027           We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT warnings in
10028           less than LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT we activate the
10029           suppression.
10030         */
10031         if ((now-limit_unsafe_suppression_start_time) <=
10032                        LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT)
10033         {
10034           unsafe_warning_suppression_is_activated= true;
10035           DBUG_PRINT("info",("A warning flood has been detected and the limit \
10036 unsafety warning suppression has been activated."));
10037         }
10038         else
10039         {
10040           /*
10041            there is no flooding till now, therefore we restart the monitoring
10042           */
10043           limit_unsafe_suppression_start_time= my_getsystime()/10000000;
10044           limit_unsafe_warning_count= 0;
10045         }
10046       }
10047       else
10048       {
10049         /*
10050           Print the suppression note and the unsafe warning.
10051         */
10052         sql_print_information("The following warning was suppressed %d times \
10053 during the last %d seconds in the error log",
10054                               limit_unsafe_warning_count,
10055                               (int)
10056                               (now-limit_unsafe_suppression_start_time));
10057         print_unsafe_warning_to_log(unsafe_type, buf, query);
10058         /*
10059           DEACTIVATION: We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT
10060           warnings in more than  LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT, the
10061           suppression should be deactivated.
10062         */
10063         if ((now - limit_unsafe_suppression_start_time) >
10064             LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT)
10065         {
10066           reset_binlog_unsafe_suppression();
10067           DBUG_PRINT("info",("The limit unsafety warning supression has been \
10068 deactivated"));
10069         }
10070       }
10071       limit_unsafe_warning_count= 0;
10072     }
10073   }
10074   DBUG_VOID_RETURN;
10075 }
10076 
10077 /**
10078   Auxiliary method used by @c binlog_query() to raise warnings.
10079 
10080   The type of warning and the type of unsafeness is stored in
10081   THD::binlog_unsafe_warning_flags.
10082 */
issue_unsafe_warnings()10083 void THD::issue_unsafe_warnings()
10084 {
10085   char buf[MYSQL_ERRMSG_SIZE * 2];
10086   DBUG_ENTER("issue_unsafe_warnings");
10087   /*
10088     Ensure that binlog_unsafe_warning_flags is big enough to hold all
10089     bits.  This is actually a constant expression.
10090   */
10091   DBUG_ASSERT(LEX::BINLOG_STMT_UNSAFE_COUNT <=
10092               sizeof(binlog_unsafe_warning_flags) * CHAR_BIT);
10093 
10094   uint32 unsafe_type_flags= binlog_unsafe_warning_flags;
10095 
10096   if ((unsafe_type_flags & (1U << LEX::BINLOG_STMT_UNSAFE_LIMIT)) != 0)
10097   {
10098     if ((lex->sql_command == SQLCOM_DELETE || lex->sql_command == SQLCOM_UPDATE) &&
10099         lex->select_lex.select_limit)
10100     {
10101       ORDER *order= (ORDER *) ((lex->select_lex.order_list.elements) ?
10102                                lex->select_lex.order_list.first : NULL);
10103       if ((lex->select_lex.select_limit &&
10104            lex->select_lex.select_limit->fixed &&
10105            lex->select_lex.select_limit->val_int() == 0) ||
10106           is_order_deterministic(lex->query_tables,
10107                                    lex->select_lex.where, order))
10108       {
10109         unsafe_type_flags&= ~(1U << LEX::BINLOG_STMT_UNSAFE_LIMIT);
10110       }
10111     }
10112     if ((lex->sql_command == SQLCOM_INSERT_SELECT ||
10113          lex->sql_command == SQLCOM_REPLACE_SELECT) &&
10114         order_deterministic)
10115     {
10116       unsafe_type_flags&= ~(1U << LEX::BINLOG_STMT_UNSAFE_LIMIT);
10117     }
10118 
10119   }
10120 
10121   /*
10122     For each unsafe_type, check if the statement is unsafe in this way
10123     and issue a warning.
10124   */
10125   for (int unsafe_type=0;
10126        unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT;
10127        unsafe_type++)
10128   {
10129     if ((unsafe_type_flags & (1 << unsafe_type)) != 0)
10130     {
10131       push_warning_printf(this, Sql_condition::WARN_LEVEL_NOTE,
10132                           ER_BINLOG_UNSAFE_STATEMENT,
10133                           ER(ER_BINLOG_UNSAFE_STATEMENT),
10134                           ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
10135       if (log_warnings && ((opt_log_warnings_suppress & (ULL(1) << log_warnings_suppress_1592)) == 0))
10136       {
10137         if (unsafe_type == LEX::BINLOG_STMT_UNSAFE_LIMIT)
10138           do_unsafe_limit_checkout( buf, unsafe_type, query());
10139         else //cases other than LIMIT unsafety
10140           print_unsafe_warning_to_log(unsafe_type, buf, query());
10141       }
10142     }
10143   }
10144   DBUG_VOID_RETURN;
10145 }
10146 
10147 /**
10148   Log the current query.
10149 
10150   The query will be logged in either row format or statement format
10151   depending on the value of @c current_stmt_binlog_format_row field and
10152   the value of the @c qtype parameter.
10153 
10154   This function must be called:
10155 
10156   - After the all calls to ha_*_row() functions have been issued.
10157 
10158   - After any writes to system tables. Rationale: if system tables
10159     were written after a call to this function, and the master crashes
10160     after the call to this function and before writing the system
10161     tables, then the master and slave get out of sync.
10162 
10163   - Before tables are unlocked and closed.
10164 
10165   @see decide_logging_format
10166 
10167   @retval 0 Success
10168 
10169   @retval nonzero If there is a failure when writing the query (e.g.,
10170   write failure), then the error code is returned.
10171 */
binlog_query(THD::enum_binlog_query_type qtype,char const * query_arg,ulong query_len,bool is_trans,bool direct,bool suppress_use,int errcode)10172 int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
10173                       ulong query_len, bool is_trans, bool direct,
10174                       bool suppress_use, int errcode)
10175 {
10176   DBUG_ENTER("THD::binlog_query");
10177   DBUG_PRINT("enter", ("qtype: %s  query: '%s'",
10178                        show_query_type(qtype), query_arg));
10179   DBUG_ASSERT(query_arg && mysql_bin_log.is_open());
10180 
10181   if (get_binlog_local_stmt_filter() == BINLOG_FILTER_SET)
10182   {
10183     /*
10184       The current statement is to be ignored, and not written to
10185       the binlog. Do not call issue_unsafe_warnings().
10186     */
10187     DBUG_RETURN(0);
10188   }
10189 
10190   /*
10191     If we are not in prelocked mode, mysql_unlock_tables() will be
10192     called after this binlog_query(), so we have to flush the pending
10193     rows event with the STMT_END_F set to unlock all tables at the
10194     slave side as well.
10195 
10196     If we are in prelocked mode, the flushing will be done inside the
10197     top-most close_thread_tables().
10198   */
10199   if (this->locked_tables_mode <= LTM_LOCK_TABLES)
10200     if (int error= binlog_flush_pending_rows_event(TRUE, is_trans))
10201       DBUG_RETURN(error);
10202 
10203   /*
10204     Warnings for unsafe statements logged in statement format are
10205     printed in three places instead of in decide_logging_format().
10206     This is because the warnings should be printed only if the statement
10207     is actually logged. When executing decide_logging_format(), we cannot
10208     know for sure if the statement will be logged:
10209 
10210     1 - sp_head::execute_procedure which prints out warnings for calls to
10211     stored procedures.
10212 
10213     2 - sp_head::execute_function which prints out warnings for calls
10214     involving functions.
10215 
10216     3 - THD::binlog_query (here) which prints warning for top level
10217     statements not covered by the two cases above: i.e., if not insided a
10218     procedure and a function.
10219 
10220     Besides, we should not try to print these warnings if it is not
10221     possible to write statements to the binary log as it happens when
10222     the execution is inside a function, or generaly speaking, when
10223     the variables.option_bits & OPTION_BIN_LOG is false.
10224   */
10225   if ((variables.option_bits & OPTION_BIN_LOG) &&
10226       sp_runtime_ctx == NULL && !binlog_evt_union.do_union)
10227   {
10228     issue_unsafe_warnings();
10229     order_deterministic= true;
10230   }
10231 
10232   switch (qtype) {
10233     /*
10234       ROW_QUERY_TYPE means that the statement may be logged either in
10235       row format or in statement format.  If
10236       current_stmt_binlog_format is row, it means that the
10237       statement has already been logged in row format and hence shall
10238       not be logged again.
10239     */
10240   case THD::ROW_QUERY_TYPE:
10241     DBUG_PRINT("debug",
10242                ("is_current_stmt_binlog_format_row: %d",
10243                 is_current_stmt_binlog_format_row()));
10244     if (is_current_stmt_binlog_format_row())
10245       DBUG_RETURN(0);
10246     /* Fall through */
10247 
10248     /*
10249       STMT_QUERY_TYPE means that the query must be logged in statement
10250       format; it cannot be logged in row format.  This is typically
10251       used by DDL statements.  It is an error to use this query type
10252       if current_stmt_binlog_format_row is row.
10253 
10254       @todo Currently there are places that call this method with
10255       STMT_QUERY_TYPE and current_stmt_binlog_format is row.  Fix those
10256       places and add assert to ensure correct behavior. /Sven
10257     */
10258   case THD::STMT_QUERY_TYPE:
10259     /*
10260       The MYSQL_LOG::write() function will set the STMT_END_F flag and
10261       flush the pending rows event if necessary.
10262     */
10263     {
10264       Query_log_event qinfo(this, query_arg, query_len, is_trans, direct,
10265                             suppress_use, errcode);
10266       /*
10267         Binlog table maps will be irrelevant after a Query_log_event
10268         (they are just removed on the slave side) so after the query
10269         log event is written to the binary log, we pretend that no
10270         table maps were written.
10271        */
10272       int error= mysql_bin_log.write_event(&qinfo);
10273       binlog_table_maps= 0;
10274       DBUG_RETURN(error);
10275     }
10276     break;
10277 
10278   case THD::QUERY_TYPE_COUNT:
10279   default:
10280     DBUG_ASSERT(0 <= qtype && qtype < QUERY_TYPE_COUNT);
10281   }
10282   DBUG_RETURN(0);
10283 }
10284 
10285 #endif /* !defined(MYSQL_CLIENT) */
10286 
show_binlog_vars(THD * thd,SHOW_VAR * var,char * buff)10287 static int show_binlog_vars(THD *thd, SHOW_VAR *var, char *buff)
10288 {
10289   mysql_mutex_assert_owner(&LOCK_status);
10290 
10291   const binlog_cache_mngr *cache_mngr
10292     = (thd && opt_bin_log)
10293     ? static_cast<binlog_cache_mngr *>(thd_get_ha_data(thd, binlog_hton))
10294     : NULL;
10295 
10296   const bool have_snapshot= (cache_mngr &&
10297                        cache_mngr->binlog_info.log_file_name[0] != '\0');
10298 
10299   if (have_snapshot)
10300   {
10301     set_binlog_snapshot_file(cache_mngr->binlog_info.log_file_name);
10302     binlog_snapshot_position= cache_mngr->binlog_info.pos;
10303   }
10304   else if (mysql_bin_log.is_open())
10305   {
10306     set_binlog_snapshot_file(binlog_global_snapshot_file);
10307     binlog_snapshot_position= binlog_global_snapshot_position;
10308   }
10309   else
10310   {
10311     binlog_snapshot_file[0]= '\0';
10312     binlog_snapshot_position= 0;
10313   }
10314   var->type= SHOW_ARRAY;
10315   var->value= (char *)&binlog_status_vars_detail;
10316   return 0;
10317 }
10318 
10319 static SHOW_VAR binlog_status_vars_top[]= {
10320   {"Binlog", (char *) &show_binlog_vars, SHOW_FUNC},
10321   {NullS, NullS, SHOW_LONG}
10322 };
10323 
10324 struct st_mysql_storage_engine binlog_storage_engine=
10325 { MYSQL_HANDLERTON_INTERFACE_VERSION };
10326 
10327 /** @} */
10328 
mysql_declare_plugin(binlog)10329 mysql_declare_plugin(binlog)
10330 {
10331   MYSQL_STORAGE_ENGINE_PLUGIN,
10332   &binlog_storage_engine,
10333   "binlog",
10334   "MySQL AB",
10335   "This is a pseudo storage engine to represent the binlog in a transaction",
10336   PLUGIN_LICENSE_GPL,
10337   binlog_init, /* Plugin Init */
10338   NULL, /* Plugin Deinit */
10339   0x0100 /* 1.0 */,
10340   binlog_status_vars_top,     /* status variables                */
10341   NULL,                       /* system variables                */
10342   NULL,                       /* config options                  */
10343   0,
10344 }
10345 mysql_declare_plugin_end;
10346