1 /* Copyright (c) 2009, 2020, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software Foundation,
21 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22
23
24 #include "my_global.h"
25 #include "log.h"
26 #include "binlog.h"
27 #include "log_event.h"
28 #include "rpl_filter.h"
29 #include "rpl_rli.h"
30 #include "sql_plugin.h"
31 #include "rpl_handler.h"
32 #include "rpl_info_factory.h"
33 #include "rpl_utility.h"
34 #include "debug_sync.h"
35 #include "global_threads.h"
36 #include "sql_show.h"
37 #include "sql_parse.h"
38 #include "sql_base.h"
39 #include "rpl_mi.h"
40 #include <list>
41 #include <string>
42 #include <sstream>
43 #include <my_stacktrace.h>
44
45 using std::max;
46 using std::min;
47 using std::string;
48 using std::list;
49
50 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
51
52 /**
53 @defgroup Binary_Log Binary Log
54 @{
55 */
56
57 #define MY_OFF_T_UNDEF (~(my_off_t)0UL)
58
59 /*
60 Constants required for the limit unsafe warnings suppression
61 */
62 //seconds after which the limit unsafe warnings suppression will be activated
63 #define LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT 50
64 //number of limit unsafe warnings after which the suppression will be activated
65 #define LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT 50
66 #define MAX_SESSION_ATTACH_TRIES 10
67
68 static ulonglong limit_unsafe_suppression_start_time= 0;
69 static bool unsafe_warning_suppression_is_activated= false;
70 static int limit_unsafe_warning_count= 0;
71
72 static handlerton *binlog_hton;
73 bool opt_binlog_order_commits= true;
74
75 const char *log_bin_index= 0;
76 const char *log_bin_basename= 0;
77
78 MYSQL_BIN_LOG mysql_bin_log(&sync_binlog_period);
79
80 static int binlog_init(void *p);
81 static int binlog_start_trans_and_stmt(THD *thd, Log_event *start_event);
82 static int binlog_close_connection(handlerton *hton, THD *thd);
83 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
84 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
85 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
86 THD *thd);
87 static int binlog_commit(handlerton *hton, THD *thd, bool all);
88 static int binlog_rollback(handlerton *hton, THD *thd, bool all);
89 static int binlog_prepare(handlerton *hton, THD *thd, bool all);
90 static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd);
91 static int binlog_clone_consistent_snapshot(handlerton *hton, THD *thd,
92 THD *from_thd);
93
94 // The last published global binlog position
95 static char binlog_global_snapshot_file[FN_REFLEN];
96 static ulonglong binlog_global_snapshot_position;
97
98 // Binlog position variables for SHOW STATUS
99 static char binlog_snapshot_file[FN_REFLEN];
100 static ulonglong binlog_snapshot_position;
101
102 static SHOW_VAR binlog_status_vars_detail[]=
103 {
104 {"snapshot_file",
105 (char *)&binlog_snapshot_file, SHOW_CHAR},
106 {"snapshot_position",
107 (char *)&binlog_snapshot_position, SHOW_LONGLONG},
108 {NullS, NullS, SHOW_LONG}
109 };
110
111 /**
112 Print system time.
113 */
114
print_system_time()115 static void print_system_time()
116 {
117 #ifdef __WIN__
118 SYSTEMTIME utc_time;
119 GetSystemTime(&utc_time);
120 const long hrs= utc_time.wHour;
121 const long mins= utc_time.wMinute;
122 const long secs= utc_time.wSecond;
123 #else
124 /* Using time() instead of my_time() to avoid looping */
125 const time_t curr_time= time(NULL);
126 /* Calculate time of day */
127 const long tmins = curr_time / 60;
128 const long thrs = tmins / 60;
129 const long hrs = thrs % 24;
130 const long mins = tmins % 60;
131 const long secs = curr_time % 60;
132 #endif
133 char hrs_buf[3]= "00";
134 char mins_buf[3]= "00";
135 char secs_buf[3]= "00";
136 int base= 10;
137 my_safe_itoa(base, hrs, &hrs_buf[2]);
138 my_safe_itoa(base, mins, &mins_buf[2]);
139 my_safe_itoa(base, secs, &secs_buf[2]);
140
141 my_safe_printf_stderr("---------- %s:%s:%s UTC - ",
142 hrs_buf, mins_buf, secs_buf);
143 }
144
145
146 /**
147 Helper class to perform a thread excursion.
148
149 This class is used to temporarily switch to another session (THD
150 structure). It will set up thread specific "globals" correctly
151 so that the POSIX thread looks exactly like the session attached to.
152 However, PSI_thread info is not touched as it is required to show
153 the actual physial view in PFS instrumentation i.e., it should
154 depict as the real thread doing the work instead of thread it switched
155 to.
156
157 On destruction, the original session (which is supplied to the
158 constructor) will be re-attached automatically. For example, with
159 this code, the value of @c current_thd will be the same before and
160 after execution of the code.
161
162 @code
163 {
164 Thread_excursion excursion(current_thd);
165 for (int i = 0 ; i < count ; ++i)
166 excursion.attach_to(other_thd[i]);
167 }
168 @endcode
169
170 @warning The class is not designed to be inherited from.
171 */
172
173 class Thread_excursion
174 {
175 public:
Thread_excursion(THD * thd)176 Thread_excursion(THD *thd)
177 : m_original_thd(thd)
178 {
179 }
180
~Thread_excursion()181 ~Thread_excursion() {
182 #ifndef EMBEDDED_LIBRARY
183 if (unlikely(setup_thread_globals(m_original_thd)))
184 DBUG_ASSERT(0); // Out of memory?!
185 #endif
186 }
187
188 /**
189 Try to attach the POSIX thread to a session.
190 - This function attaches the POSIX thread to a session
191 in MAX_SESSION_ATTACH_TRIES tries when encountering
192 'out of memory' error, and terminates the server after
193 failed in MAX_SESSION_ATTACH_TRIES tries.
194
195 @param[in] thd The thd of a session
196 */
try_to_attach_to(THD * thd)197 void try_to_attach_to(THD *thd)
198 {
199 int i= 0;
200 /*
201 Attach the POSIX thread to a session in MAX_SESSION_ATTACH_TRIES
202 tries when encountering 'out of memory' error.
203 */
204 while (i < MAX_SESSION_ATTACH_TRIES)
205 {
206 /*
207 Currently attach_to(...) returns ER_OUTOFMEMORY or 0. So
208 we continue to attach the POSIX thread when encountering
209 the ER_OUTOFMEMORY error. Please take care other error
210 returned from attach_to(...) in future.
211 */
212 if (!attach_to(thd))
213 {
214 if (i > 0)
215 sql_print_warning("Server overcomes the temporary 'out of memory' "
216 "in '%d' tries while attaching to session thread "
217 "during the group commit phase.\n", i + 1);
218 break;
219 }
220 i++;
221 }
222 /*
223 Terminate the server after failed to attach the POSIX thread
224 to a session in MAX_SESSION_ATTACH_TRIES tries.
225 */
226 if (MAX_SESSION_ATTACH_TRIES == i)
227 {
228 print_system_time();
229 my_safe_printf_stderr("%s", "[Fatal] Out of memory while attaching to "
230 "session thread during the group commit phase. "
231 "Data consistency between master and slave can "
232 "be guaranteed after server restarts.\n");
233 _exit(EXIT_FAILURE);
234 }
235 }
236
237 private:
238
239 /**
240 Attach the POSIX thread to a session.
241 */
attach_to(THD * thd)242 int attach_to(THD *thd)
243 {
244 #ifndef EMBEDDED_LIBRARY
245 if (DBUG_EVALUATE_IF("simulate_session_attach_error", 1, 0)
246 || unlikely(setup_thread_globals(thd)))
247 {
248 /*
249 Indirectly uses pthread_setspecific, which can only return
250 ENOMEM or EINVAL. Since store_globals are using correct keys,
251 the only alternative is out of memory.
252 */
253 return ER_OUTOFMEMORY;
254 }
255 #endif /* EMBEDDED_LIBRARY */
256 return 0;
257 }
258
setup_thread_globals(THD * thd) const259 int setup_thread_globals(THD *thd) const {
260 int error= 0;
261 THD *original_thd= my_pthread_getspecific(THD*, THR_THD);
262 MEM_ROOT* original_mem_root= my_pthread_getspecific(MEM_ROOT*, THR_MALLOC);
263 if ((error= my_pthread_setspecific_ptr(THR_THD, thd)))
264 goto exit0;
265 if ((error= my_pthread_setspecific_ptr(THR_MALLOC, &thd->mem_root)))
266 goto exit1;
267 if ((error= set_mysys_var(thd->mysys_var)))
268 goto exit2;
269 goto exit0;
270 exit2:
271 error= my_pthread_setspecific_ptr(THR_MALLOC, original_mem_root);
272 exit1:
273 error= my_pthread_setspecific_ptr(THR_THD, original_thd);
274 exit0:
275 return error;
276 }
277
278 THD *m_original_thd;
279 };
280
281
282 /**
283 Caches for non-transactional and transactional data before writing
284 it to the binary log.
285
286 @todo All the access functions for the flags suggest that the
287 encapsuling is not done correctly, so try to move any logic that
288 requires access to the flags into the cache.
289 */
290 class binlog_cache_data
291 {
292 public:
293
binlog_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg)294 binlog_cache_data(bool trx_cache_arg,
295 my_off_t max_binlog_cache_size_arg,
296 ulong *ptr_binlog_cache_use_arg,
297 ulong *ptr_binlog_cache_disk_use_arg)
298 : m_pending(0), saved_max_binlog_cache_size(max_binlog_cache_size_arg),
299 ptr_binlog_cache_use(ptr_binlog_cache_use_arg),
300 ptr_binlog_cache_disk_use(ptr_binlog_cache_disk_use_arg)
301 {
302 reset();
303 flags.transactional= trx_cache_arg;
304 cache_log.end_of_file= saved_max_binlog_cache_size;
305 }
306
307 int finalize(THD *thd, Log_event *end_event);
308 int flush(THD *thd, my_off_t *bytes, bool *wrote_xid);
309 int write_event(THD *thd, Log_event *event);
310
~binlog_cache_data()311 virtual ~binlog_cache_data()
312 {
313 DBUG_ASSERT(is_binlog_empty());
314 close_cached_file(&cache_log);
315 }
316
is_binlog_empty() const317 bool is_binlog_empty() const
318 {
319 my_off_t pos= my_b_tell(&cache_log);
320 DBUG_PRINT("debug", ("%s_cache - pending: 0x%llx, bytes: %llu",
321 (flags.transactional ? "trx" : "stmt"),
322 (ulonglong) pending(), (ulonglong) pos));
323 return pending() == NULL && pos == 0;
324 }
325
is_group_cache_empty() const326 bool is_group_cache_empty() const
327 {
328 return group_cache.is_empty();
329 }
330
331 #ifndef DBUG_OFF
dbug_is_finalized() const332 bool dbug_is_finalized() const {
333 return flags.finalized;
334 }
335 #endif
336
pending() const337 Rows_log_event *pending() const
338 {
339 return m_pending;
340 }
341
set_pending(Rows_log_event * const pending)342 void set_pending(Rows_log_event *const pending)
343 {
344 m_pending= pending;
345 }
346
set_incident(void)347 void set_incident(void)
348 {
349 flags.incident= true;
350 }
351
has_incident(void) const352 bool has_incident(void) const
353 {
354 return flags.incident;
355 }
356
357 /**
358 Sets the binlog_cache_data::Flags::flush_error flag if there
359 is an error while flushing cache to the file.
360
361 @param thd The client thread that is executing the transaction.
362 */
set_flush_error(THD * thd)363 void set_flush_error(THD *thd)
364 {
365 flags.flush_error= true;
366 if(is_trx_cache())
367 {
368 /*
369 If the cache is a transactional cache and if the write
370 has failed due to ENOSPC, then my_write() would have
371 set EE_WRITE error, so clear the error and create an
372 equivalent server error.
373 */
374 if (thd->is_error())
375 thd->clear_error();
376 char errbuf[MYSYS_STRERROR_SIZE];
377 my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), my_filename(cache_log.file),
378 errno, my_strerror(errbuf, sizeof(errbuf), errno));
379 }
380 }
381
get_flush_error(void) const382 bool get_flush_error(void) const
383 {
384 return flags.flush_error;
385 }
386
has_xid() const387 bool has_xid() const {
388 // There should only be an XID event if we are transactional
389 DBUG_ASSERT((flags.transactional && flags.with_xid) || !flags.with_xid);
390 return flags.with_xid;
391 }
392
is_trx_cache() const393 bool is_trx_cache() const
394 {
395 return flags.transactional;
396 }
397
get_byte_position() const398 my_off_t get_byte_position() const
399 {
400 return my_b_tell(&cache_log);
401 }
402
reset()403 virtual void reset()
404 {
405 compute_statistics();
406 truncate(0);
407
408 /*
409 If IOCACHE has a file associated, change its size to 0.
410 It is safer to do it here, since we are certain that one
411 asked the cache to go to position 0 with truncate.
412 */
413 if(cache_log.file != -1)
414 {
415 int error= 0;
416 if((error= my_chsize(cache_log.file, 0, 0, MYF(MY_WME))))
417 sql_print_warning("Unable to resize binlog IOCACHE auxilary file");
418
419 DBUG_EXECUTE_IF("show_io_cache_size",
420 {
421 ulong file_size= my_seek(cache_log.file,
422 0L,MY_SEEK_END,MYF(MY_WME+MY_FAE));
423 sql_print_error("New size:%ld", file_size);
424 });
425 }
426
427 flags.incident= false;
428 flags.with_xid= false;
429 flags.immediate= false;
430 flags.finalized= false;
431 flags.flush_error= false;
432 /*
433 The truncate function calls reinit_io_cache that calls my_b_flush_io_cache
434 which may increase disk_writes. This breaks the disk_writes use by the
435 binary log which aims to compute the ratio between in-memory cache usage
436 and disk cache usage. To avoid this undesirable behavior, we reset the
437 variable after truncating the cache.
438 */
439 cache_log.disk_writes= 0;
440 group_cache.clear();
441 DBUG_ASSERT(is_binlog_empty());
442 }
443
444 /*
445 Sets the write position to point at the position given. If the
446 cache has swapped to a file, it reinitializes it, so that the
447 proper data is added to the IO_CACHE buffer. Otherwise, it just
448 does a my_b_seek.
449
450 my_b_seek will not work if the cache has swapped, that's why
451 we do this workaround.
452
453 @param[IN] pos the new write position.
454 @param[IN] use_reinit if the position should be reset resorting
455 to reset_io_cache (which may issue a flush_io_cache
456 inside)
457
458 @return The previous write position.
459 */
reset_write_pos(my_off_t pos,bool use_reinit)460 my_off_t reset_write_pos(my_off_t pos, bool use_reinit)
461 {
462 DBUG_ENTER("reset_write_pos");
463 DBUG_ASSERT(cache_log.type == WRITE_CACHE);
464
465 my_off_t oldpos= get_byte_position();
466
467 if (use_reinit)
468 reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, 0);
469 else
470 my_b_seek(&cache_log, pos);
471
472 DBUG_RETURN(oldpos);
473 }
474
475 /*
476 Cache to store data before copying it to the binary log.
477 */
478 IO_CACHE cache_log;
479
480 /**
481 The group cache for this cache.
482 */
483 Group_cache group_cache;
484
485 protected:
486 /*
487 It truncates the cache to a certain position. This includes deleting the
488 pending event.
489 */
truncate(my_off_t pos)490 void truncate(my_off_t pos)
491 {
492 DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos));
493 remove_pending_event();
494 /*
495 Whenever there is an error while flushing cache to file,
496 the local cache will not be in a normal state and the same
497 cache cannot be used without facing an assert.
498 So, clear the cache if there is a flush error.
499 */
500 reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, get_flush_error());
501 cache_log.end_of_file= saved_max_binlog_cache_size;
502 }
503
504 /**
505 Flush pending event to the cache buffer.
506 */
flush_pending_event(THD * thd)507 int flush_pending_event(THD *thd) {
508 if (m_pending)
509 {
510 m_pending->set_flags(Rows_log_event::STMT_END_F);
511 if (int error= write_event(thd, m_pending))
512 return error;
513 thd->clear_binlog_table_maps();
514 }
515 return 0;
516 }
517
518 /**
519 Remove the pending event.
520 */
remove_pending_event()521 int remove_pending_event() {
522 delete m_pending;
523 m_pending= NULL;
524 return 0;
525 }
526 struct Flags {
527 /*
528 Defines if this is either a trx-cache or stmt-cache, respectively, a
529 transactional or non-transactional cache.
530 */
531 bool transactional:1;
532
533 /*
534 This indicates that some events did not get into the cache and most likely
535 it is corrupted.
536 */
537 bool incident:1;
538
539 /*
540 This indicates that the cache should be written without BEGIN/END.
541 */
542 bool immediate:1;
543
544 /*
545 This flag indicates that the buffer was finalized and has to be
546 flushed to disk.
547 */
548 bool finalized:1;
549
550 /*
551 This indicates that the cache contain an XID event.
552 */
553 bool with_xid:1;
554
555 /*
556 This flag is set to 'true' when there is an error while flushing the
557 I/O cache to file.
558 */
559 bool flush_error:1;
560 } flags;
561
562 private:
563 /*
564 Pending binrows event. This event is the event where the rows are currently
565 written.
566 */
567 Rows_log_event *m_pending;
568
569 /**
570 This function computes binlog cache and disk usage.
571 */
compute_statistics()572 void compute_statistics()
573 {
574 if (!is_binlog_empty())
575 {
576 statistic_increment(*ptr_binlog_cache_use, &LOCK_status);
577 if (cache_log.disk_writes != 0)
578 statistic_increment(*ptr_binlog_cache_disk_use, &LOCK_status);
579 }
580 }
581
582 /*
583 Stores the values of maximum size of the cache allowed when this cache
584 is configured. This corresponds to either
585 . max_binlog_cache_size or max_binlog_stmt_cache_size.
586 */
587 my_off_t saved_max_binlog_cache_size;
588
589 /*
590 Stores a pointer to the status variable that keeps track of the in-memory
591 cache usage. This corresponds to either
592 . binlog_cache_use or binlog_stmt_cache_use.
593 */
594 ulong *ptr_binlog_cache_use;
595
596 /*
597 Stores a pointer to the status variable that keeps track of the disk
598 cache usage. This corresponds to either
599 . binlog_cache_disk_use or binlog_stmt_cache_disk_use.
600 */
601 ulong *ptr_binlog_cache_disk_use;
602
603 binlog_cache_data& operator=(const binlog_cache_data& info);
604 binlog_cache_data(const binlog_cache_data& info);
605 };
606
607
608 class binlog_stmt_cache_data
609 : public binlog_cache_data
610 {
611 public:
binlog_stmt_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg)612 binlog_stmt_cache_data(bool trx_cache_arg,
613 my_off_t max_binlog_cache_size_arg,
614 ulong *ptr_binlog_cache_use_arg,
615 ulong *ptr_binlog_cache_disk_use_arg)
616 : binlog_cache_data(trx_cache_arg,
617 max_binlog_cache_size_arg,
618 ptr_binlog_cache_use_arg,
619 ptr_binlog_cache_disk_use_arg)
620 {
621 }
622
623 using binlog_cache_data::finalize;
624
625 int finalize(THD *thd);
626 };
627
628
629 int
finalize(THD * thd)630 binlog_stmt_cache_data::finalize(THD *thd)
631 {
632 if (flags.immediate)
633 {
634 if (int error= finalize(thd, NULL))
635 return error;
636 }
637 else
638 {
639 Query_log_event
640 end_evt(thd, STRING_WITH_LEN("COMMIT"), false, false, true, 0, true);
641 if (int error= finalize(thd, &end_evt))
642 return error;
643 }
644 return 0;
645 }
646
647
648 class binlog_trx_cache_data : public binlog_cache_data
649 {
650 public:
binlog_trx_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg)651 binlog_trx_cache_data(bool trx_cache_arg,
652 my_off_t max_binlog_cache_size_arg,
653 ulong *ptr_binlog_cache_use_arg,
654 ulong *ptr_binlog_cache_disk_use_arg)
655 : binlog_cache_data(trx_cache_arg,
656 max_binlog_cache_size_arg,
657 ptr_binlog_cache_use_arg,
658 ptr_binlog_cache_disk_use_arg),
659 m_cannot_rollback(FALSE), before_stmt_pos(MY_OFF_T_UNDEF)
660 { }
661
reset()662 void reset()
663 {
664 DBUG_ENTER("reset");
665 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
666 m_cannot_rollback= FALSE;
667 before_stmt_pos= MY_OFF_T_UNDEF;
668 binlog_cache_data::reset();
669 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
670 DBUG_VOID_RETURN;
671 }
672
cannot_rollback() const673 bool cannot_rollback() const
674 {
675 return m_cannot_rollback;
676 }
677
set_cannot_rollback()678 void set_cannot_rollback()
679 {
680 m_cannot_rollback= TRUE;
681 }
682
get_prev_position() const683 my_off_t get_prev_position() const
684 {
685 return before_stmt_pos;
686 }
687
set_prev_position(my_off_t pos)688 void set_prev_position(my_off_t pos)
689 {
690 DBUG_ENTER("set_prev_position");
691 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
692 before_stmt_pos= pos;
693 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
694 DBUG_VOID_RETURN;
695 }
696
restore_prev_position()697 void restore_prev_position()
698 {
699 DBUG_ENTER("restore_prev_position");
700 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
701 binlog_cache_data::truncate(before_stmt_pos);
702 before_stmt_pos= MY_OFF_T_UNDEF;
703 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
704 DBUG_VOID_RETURN;
705 }
706
restore_savepoint(my_off_t pos)707 void restore_savepoint(my_off_t pos)
708 {
709 DBUG_ENTER("restore_savepoint");
710 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
711 binlog_cache_data::truncate(pos);
712 if (pos <= before_stmt_pos)
713 before_stmt_pos= MY_OFF_T_UNDEF;
714 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
715 DBUG_VOID_RETURN;
716 }
717
718 using binlog_cache_data::truncate;
719
720 int truncate(THD *thd, bool all);
721
722 private:
723 /*
724 It will be set TRUE if any statement which cannot be rolled back safely
725 is put in trx_cache.
726 */
727 bool m_cannot_rollback;
728
729 /*
730 Binlog position before the start of the current statement.
731 */
732 my_off_t before_stmt_pos;
733
734 binlog_trx_cache_data& operator=(const binlog_trx_cache_data& info);
735 binlog_trx_cache_data(const binlog_trx_cache_data& info);
736 };
737
738 class binlog_cache_mngr {
739 public:
binlog_cache_mngr(my_off_t max_binlog_stmt_cache_size_arg,ulong * ptr_binlog_stmt_cache_use_arg,ulong * ptr_binlog_stmt_cache_disk_use_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg)740 binlog_cache_mngr(my_off_t max_binlog_stmt_cache_size_arg,
741 ulong *ptr_binlog_stmt_cache_use_arg,
742 ulong *ptr_binlog_stmt_cache_disk_use_arg,
743 my_off_t max_binlog_cache_size_arg,
744 ulong *ptr_binlog_cache_use_arg,
745 ulong *ptr_binlog_cache_disk_use_arg)
746 : stmt_cache(FALSE, max_binlog_stmt_cache_size_arg,
747 ptr_binlog_stmt_cache_use_arg,
748 ptr_binlog_stmt_cache_disk_use_arg),
749 trx_cache(TRUE, max_binlog_cache_size_arg,
750 ptr_binlog_cache_use_arg,
751 ptr_binlog_cache_disk_use_arg)
752 { }
753
get_binlog_cache_data(bool is_transactional)754 binlog_cache_data* get_binlog_cache_data(bool is_transactional)
755 {
756 if (is_transactional)
757 return &trx_cache;
758 else
759 return &stmt_cache;
760 }
761
get_binlog_cache_log(bool is_transactional)762 IO_CACHE* get_binlog_cache_log(bool is_transactional)
763 {
764 return (is_transactional ? &trx_cache.cache_log : &stmt_cache.cache_log);
765 }
766
767 /**
768 Convenience method to check if both caches are empty.
769 */
is_binlog_empty() const770 bool is_binlog_empty() const {
771 return stmt_cache.is_binlog_empty() && trx_cache.is_binlog_empty();
772 }
773
774 /*
775 clear stmt_cache and trx_cache if they are not empty
776 */
reset()777 void reset()
778 {
779 if (!stmt_cache.is_binlog_empty())
780 stmt_cache.reset();
781 if (!trx_cache.is_binlog_empty())
782 trx_cache.reset();
783 }
784
785 #ifndef DBUG_OFF
dbug_any_finalized() const786 bool dbug_any_finalized() const {
787 return stmt_cache.dbug_is_finalized() || trx_cache.dbug_is_finalized();
788 }
789 #endif
790
791 /*
792 Convenience method to flush both caches to the binary log.
793
794 @param bytes_written Pointer to variable that will be set to the
795 number of bytes written for the flush.
796 @param wrote_xid Pointer to variable that will be set to @c
797 true if any XID event was written to the
798 binary log. Otherwise, the variable will not
799 be touched.
800 @return Error code on error, zero if no error.
801 */
flush(THD * thd,my_off_t * bytes_written,bool * wrote_xid)802 int flush(THD *thd, my_off_t *bytes_written, bool *wrote_xid)
803 {
804 my_off_t stmt_bytes= 0;
805 my_off_t trx_bytes= 0;
806 DBUG_ASSERT(stmt_cache.has_xid() == 0);
807 if (int error= stmt_cache.flush(thd, &stmt_bytes, wrote_xid))
808 return error;
809 if (int error= trx_cache.flush(thd, &trx_bytes, wrote_xid))
810 return error;
811 *bytes_written= stmt_bytes + trx_bytes;
812 return 0;
813 }
814
815 binlog_stmt_cache_data stmt_cache;
816 binlog_trx_cache_data trx_cache;
817
818 LOG_INFO binlog_info;
819
820 private:
821
822 binlog_cache_mngr& operator=(const binlog_cache_mngr& info);
823 binlog_cache_mngr(const binlog_cache_mngr& info);
824 };
825
826
thd_get_cache_mngr(const THD * thd)827 static binlog_cache_mngr *thd_get_cache_mngr(const THD *thd)
828 {
829 /*
830 If opt_bin_log is not set, binlog_hton->slot == -1 and hence
831 thd_get_ha_data(thd, hton) segfaults.
832 */
833 DBUG_ASSERT(opt_bin_log);
834 return (binlog_cache_mngr *)thd_get_ha_data(thd, binlog_hton);
835 }
836
837
838 /**
839 Checks if the BINLOG_CACHE_SIZE's value is greater than MAX_BINLOG_CACHE_SIZE.
840 If this happens, the BINLOG_CACHE_SIZE is set to MAX_BINLOG_CACHE_SIZE.
841 */
check_binlog_cache_size(THD * thd)842 void check_binlog_cache_size(THD *thd)
843 {
844 if (binlog_cache_size > max_binlog_cache_size)
845 {
846 if (thd)
847 {
848 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
849 ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX,
850 ER(ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX),
851 (ulong) binlog_cache_size,
852 (ulong) max_binlog_cache_size);
853 }
854 else
855 {
856 sql_print_warning(ER_DEFAULT(ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX),
857 (ulong) binlog_cache_size,
858 (ulong) max_binlog_cache_size);
859 }
860 binlog_cache_size= max_binlog_cache_size;
861 }
862 }
863
864 /**
865 Checks if the BINLOG_STMT_CACHE_SIZE's value is greater than MAX_BINLOG_STMT_CACHE_SIZE.
866 If this happens, the BINLOG_STMT_CACHE_SIZE is set to MAX_BINLOG_STMT_CACHE_SIZE.
867 */
check_binlog_stmt_cache_size(THD * thd)868 void check_binlog_stmt_cache_size(THD *thd)
869 {
870 if (binlog_stmt_cache_size > max_binlog_stmt_cache_size)
871 {
872 if (thd)
873 {
874 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
875 ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX,
876 ER(ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX),
877 (ulong) binlog_stmt_cache_size,
878 (ulong) max_binlog_stmt_cache_size);
879 }
880 else
881 {
882 sql_print_warning(ER_DEFAULT(ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX),
883 (ulong) binlog_stmt_cache_size,
884 (ulong) max_binlog_stmt_cache_size);
885 }
886 binlog_stmt_cache_size= max_binlog_stmt_cache_size;
887 }
888 }
889
890 /**
891 Check whether binlog_hton has valid slot and enabled
892 */
binlog_enabled()893 bool binlog_enabled()
894 {
895 return(binlog_hton && binlog_hton->slot != HA_SLOT_UNDEF);
896 }
897
898 /*
899 Save position of binary log transaction cache.
900
901 SYNPOSIS
902 binlog_trans_log_savepos()
903
904 thd The thread to take the binlog data from
905 pos Pointer to variable where the position will be stored
906
907 DESCRIPTION
908
909 Save the current position in the binary log transaction cache into
910 the variable pointed to by 'pos'
911 */
912
913 static void
binlog_trans_log_savepos(THD * thd,my_off_t * pos)914 binlog_trans_log_savepos(THD *thd, my_off_t *pos)
915 {
916 DBUG_ENTER("binlog_trans_log_savepos");
917 DBUG_ASSERT(pos != NULL);
918 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
919 DBUG_ASSERT(mysql_bin_log.is_open());
920 *pos= cache_mngr->trx_cache.get_byte_position();
921 DBUG_PRINT("return", ("position: %lu", (ulong) *pos));
922 DBUG_VOID_RETURN;
923 }
924
925
926 /*
927 this function is mostly a placeholder.
928 conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open)
929 should be moved here.
930 */
931
binlog_init(void * p)932 static int binlog_init(void *p)
933 {
934 binlog_hton= (handlerton *)p;
935 binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO;
936 binlog_hton->db_type=DB_TYPE_BINLOG;
937 binlog_hton->savepoint_offset= sizeof(my_off_t);
938 binlog_hton->close_connection= binlog_close_connection;
939 binlog_hton->savepoint_set= binlog_savepoint_set;
940 binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
941 binlog_hton->savepoint_rollback_can_release_mdl=
942 binlog_savepoint_rollback_can_release_mdl;
943 binlog_hton->commit= binlog_commit;
944 binlog_hton->rollback= binlog_rollback;
945 binlog_hton->prepare= binlog_prepare;
946 binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot;
947 binlog_hton->clone_consistent_snapshot= binlog_clone_consistent_snapshot;
948 binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
949 return 0;
950 }
951
binlog_close_connection(handlerton * hton,THD * thd)952 static int binlog_close_connection(handlerton *hton, THD *thd)
953 {
954 DBUG_ENTER("binlog_close_connection");
955 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
956 DBUG_ASSERT(cache_mngr->is_binlog_empty());
957 DBUG_ASSERT(cache_mngr->trx_cache.is_group_cache_empty() &&
958 cache_mngr->stmt_cache.is_group_cache_empty());
959 DBUG_PRINT("debug", ("Set ha_data slot %d to 0x%llx", binlog_hton->slot, (ulonglong) NULL));
960 thd_set_ha_data(thd, binlog_hton, NULL);
961 cache_mngr->~binlog_cache_mngr();
962 my_free(cache_mngr);
963 DBUG_RETURN(0);
964 }
965
should_write_gtids(const THD * thd)966 static bool should_write_gtids(const THD *thd) {
967 /*
968 Return false in the situation where slave sql_thread is
969 trying to generate gtid's for binlog events received from master.
970
971 Note that the check thd->variables.gtid_next.type == AUTOMATIC_GROUP
972 is used to ensure that a new gtid is generated for the transaction group,
973 instead of using SESSION.gtid_next value.
974 */
975 if (thd->rli_slave &&
976 thd->variables.gtid_next.type == AUTOMATIC_GROUP)
977 return false;
978 /*
979 Return true (allow gtids to be generated) in the scenario where
980 opt_gtid_deployment_step is false (Normal run after deployment procedure
981 is done).
982
983 Return true in the scenario where slave sql_thread uses gtid received from
984 master. This is necessary in the situation where deployment is done on
985 master, but slave still in deployment mode (opt_gtid_deployment_step is true).
986 */
987 return (!opt_gtid_deployment_step || (thd->rli_slave &&
988 thd->variables.gtid_next.type != AUTOMATIC_GROUP));
989
990 }
991
write_event(THD * thd,Log_event * ev)992 int binlog_cache_data::write_event(THD *thd, Log_event *ev)
993 {
994 DBUG_ENTER("binlog_cache_data::write_event");
995
996 if (gtid_mode > 0 && should_write_gtids(thd))
997 {
998 Group_cache::enum_add_group_status status=
999 group_cache.add_logged_group(thd, get_byte_position());
1000 if (status == Group_cache::ERROR)
1001 DBUG_RETURN(1);
1002 else if (status == Group_cache::APPEND_NEW_GROUP)
1003 {
1004 Gtid_log_event gtid_ev(thd, is_trx_cache());
1005 if (gtid_ev.write(&cache_log) != 0)
1006 DBUG_RETURN(1);
1007 }
1008 }
1009
1010 if (ev != NULL)
1011 {
1012 DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
1013 {DBUG_SET("+d,simulate_file_write_error");});
1014
1015 DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
1016 {
1017 static int count= -1;
1018 count++;
1019 if(count % 4 == 3 && ev->get_type_code() == WRITE_ROWS_EVENT)
1020 DBUG_SET("+d,simulate_temp_file_write_error");
1021 });
1022 if (ev->write(&cache_log) != 0)
1023 {
1024 DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
1025 {
1026 DBUG_SET("-d,simulate_file_write_error");
1027 DBUG_SET("-d,simulate_disk_full_at_flush_pending");
1028 /*
1029 after +d,simulate_file_write_error the local cache
1030 is in unsane state. Since -d,simulate_file_write_error
1031 revokes the first simulation do_write_cache()
1032 can't be run without facing an assert.
1033 So it's blocked with the following 2nd simulation:
1034 */
1035 DBUG_SET("+d,simulate_do_write_cache_failure");
1036 });
1037
1038 DBUG_EXECUTE_IF("simulate_temp_file_write_error",
1039 {
1040 DBUG_SET("-d,simulate_temp_file_write_error");
1041 });
1042 /*
1043 If the flush has failed due to ENOSPC error, set the
1044 flush_error flag.
1045 */
1046 if (thd->is_error() && my_errno == ENOSPC)
1047 {
1048 set_flush_error(thd);
1049 }
1050 DBUG_RETURN(1);
1051 }
1052 if (ev->get_type_code() == XID_EVENT)
1053 flags.with_xid= true;
1054 if (ev->is_using_immediate_logging())
1055 flags.immediate= true;
1056 }
1057 DBUG_RETURN(0);
1058 }
1059
1060
1061 /**
1062 Checks if the given GTID exists in the Group_cache. If not, add it
1063 as an empty group.
1064
1065 @todo Move this function into the cache class?
1066
1067 @param thd THD object that owns the Group_cache
1068 @param cache_data binlog_cache_data object for the cache
1069 @param gtid GTID to check
1070 */
write_one_empty_group_to_cache(THD * thd,binlog_cache_data * cache_data,Gtid gtid)1071 static int write_one_empty_group_to_cache(THD *thd,
1072 binlog_cache_data *cache_data,
1073 Gtid gtid)
1074 {
1075 DBUG_ENTER("write_one_empty_group_to_cache");
1076 Group_cache *group_cache= &cache_data->group_cache;
1077 if (group_cache->contains_gtid(gtid))
1078 DBUG_RETURN(0);
1079 /*
1080 Apparently this code is not being called. We need to
1081 investigate if this is a bug or this code is not
1082 necessary. /Alfranio
1083
1084 Empty groups are currently being handled in the function
1085 gtid_empty_group_log_and_cleanup().
1086 */
1087 DBUG_ASSERT(0); /*NOTREACHED*/
1088 #ifdef NON_ERROR_GTID
1089 IO_CACHE *cache= &cache_data->cache_log;
1090 Group_cache::enum_add_group_status status= group_cache->add_empty_group(gtid);
1091 if (status == Group_cache::ERROR)
1092 DBUG_RETURN(1);
1093 DBUG_ASSERT(status == Group_cache::APPEND_NEW_GROUP);
1094 Gtid_specification spec= { GTID_GROUP, gtid };
1095 Gtid_log_event gtid_ev(thd, cache_data->is_trx_cache(), &spec);
1096 if (gtid_ev.write(cache) != 0)
1097 DBUG_RETURN(1);
1098 #endif
1099 DBUG_RETURN(0);
1100 }
1101
1102 /**
1103 Writes all GTIDs that the thread owns to the stmt/trx cache, if the
1104 GTID is not already in the cache.
1105
1106 @todo Move this function into the cache class?
1107
1108 @param thd THD object for the thread that owns the cache.
1109 @param cache_data The cache.
1110 */
write_empty_groups_to_cache(THD * thd,binlog_cache_data * cache_data)1111 static int write_empty_groups_to_cache(THD *thd, binlog_cache_data *cache_data)
1112 {
1113 DBUG_ENTER("write_empty_groups_to_cache");
1114 if (thd->owned_gtid.sidno == -1)
1115 {
1116 #ifdef HAVE_GTID_NEXT_LIST
1117 Gtid_set::Gtid_iterator git(&thd->owned_gtid_set);
1118 Gtid gtid= git.get();
1119 while (gtid.sidno != 0)
1120 {
1121 if (write_one_empty_group_to_cache(thd, cache_data, gtid) != 0)
1122 DBUG_RETURN(1);
1123 git.next();
1124 gtid= git.get();
1125 }
1126 #else
1127 DBUG_ASSERT(0);
1128 #endif
1129 }
1130 else if (thd->owned_gtid.sidno > 0)
1131 if (write_one_empty_group_to_cache(thd, cache_data, thd->owned_gtid) != 0)
1132 DBUG_RETURN(1);
1133 DBUG_RETURN(0);
1134 }
1135
1136
1137 /**
1138
1139 @todo Move this function into the cache class?
1140 */
1141 static int
gtid_before_write_cache(THD * thd,binlog_cache_data * cache_data)1142 gtid_before_write_cache(THD* thd, binlog_cache_data* cache_data)
1143 {
1144 DBUG_ENTER("gtid_before_write_cache");
1145 int error= 0;
1146
1147 DBUG_ASSERT(thd->variables.gtid_next.type != UNDEFINED_GROUP);
1148
1149 if (gtid_mode == 0 || !should_write_gtids(thd))
1150 {
1151 DBUG_RETURN(0);
1152 }
1153
1154 Group_cache* group_cache= &cache_data->group_cache;
1155
1156 global_sid_lock->rdlock();
1157
1158 if (thd->variables.gtid_next.type == AUTOMATIC_GROUP)
1159 {
1160 if (group_cache->generate_automatic_gno(thd) !=
1161 RETURN_STATUS_OK)
1162 {
1163 global_sid_lock->unlock();
1164 DBUG_RETURN(1);
1165 }
1166 }
1167 if (write_empty_groups_to_cache(thd, cache_data) != 0)
1168 {
1169 global_sid_lock->unlock();
1170 DBUG_RETURN(1);
1171 }
1172
1173 global_sid_lock->unlock();
1174
1175 /*
1176 If an automatic group number was generated, change the first event
1177 into a "real" one.
1178 */
1179 if (thd->variables.gtid_next.type == AUTOMATIC_GROUP)
1180 {
1181 DBUG_ASSERT(group_cache->get_n_groups() == 1);
1182 Cached_group *cached_group= group_cache->get_unsafe_pointer(0);
1183 DBUG_ASSERT(cached_group->spec.type != AUTOMATIC_GROUP);
1184 Gtid_log_event gtid_ev(thd, cache_data->is_trx_cache(),
1185 &cached_group->spec);
1186 bool using_file= cache_data->cache_log.pos_in_file > 0;
1187
1188 DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
1189 {
1190 DBUG_SET("+d,simulate_temp_file_write_error");
1191 });
1192
1193 my_off_t saved_position= cache_data->reset_write_pos(0, using_file);
1194
1195 if (!cache_data->cache_log.error)
1196 {
1197 if (gtid_ev.write(&cache_data->cache_log))
1198 goto err;
1199 cache_data->reset_write_pos(saved_position, using_file);
1200 }
1201
1202 if (cache_data->cache_log.error)
1203 goto err;
1204 }
1205
1206 DBUG_RETURN(error);
1207
1208 err:
1209 DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
1210 {
1211 DBUG_SET("-d,simulate_temp_file_write_error");
1212 });
1213 /*
1214 If the reinit_io_cache has failed, set the flush_error flag.
1215 */
1216 if (cache_data->cache_log.error)
1217 {
1218 cache_data->set_flush_error(thd);
1219 }
1220 DBUG_RETURN(1);
1221
1222 }
1223
1224 /**
1225 The function logs an empty group with GTID and performs cleanup.
1226 Its logic wrt GTID is equivalent to one of binlog_commit().
1227 It's called at the end of statement execution in case binlog_commit()
1228 was skipped.
1229 Such cases are due ineffective binlogging incl an empty group
1230 re-execution.
1231
1232 @param thd The thread handle
1233
1234 @return
1235 nonzero if an error pops up.
1236 */
gtid_empty_group_log_and_cleanup(THD * thd)1237 int gtid_empty_group_log_and_cleanup(THD *thd)
1238 {
1239 int ret= 1;
1240 binlog_cache_data* cache_data= NULL;
1241
1242 DBUG_ENTER("gtid_empty_group_log_and_cleanup");
1243
1244 Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE,
1245 FALSE, TRUE, 0, TRUE);
1246 DBUG_ASSERT(!qinfo.is_using_immediate_logging());
1247
1248 /*
1249 thd->cache_mngr is uninitialized on the first empty transaction.
1250 */
1251 if (thd->binlog_setup_trx_data())
1252 DBUG_RETURN(1);
1253 cache_data= &thd_get_cache_mngr(thd)->trx_cache;
1254 DBUG_PRINT("debug", ("Writing to trx_cache"));
1255 if (cache_data->write_event(thd, &qinfo) ||
1256 gtid_before_write_cache(thd, cache_data))
1257 goto err;
1258
1259 ret= mysql_bin_log.commit(thd, true);
1260
1261 err:
1262 DBUG_RETURN(ret);
1263 }
1264
1265 /**
1266 This function finalizes the cache preparing for commit or rollback.
1267
1268 The function just writes all the necessary events to the cache but
1269 does not flush the data to the binary log file. That is the role of
1270 the binlog_cache_data::flush function.
1271
1272 @see binlog_cache_data::flush
1273
1274 @param thd The thread whose transaction should be flushed
1275 @param cache_data Pointer to the cache
1276 @param end_ev The end event either commit/rollback
1277
1278 @return
1279 nonzero if an error pops up when flushing the cache.
1280 */
1281 int
finalize(THD * thd,Log_event * end_event)1282 binlog_cache_data::finalize(THD *thd, Log_event *end_event)
1283 {
1284 DBUG_ENTER("binlog_cache_data::finalize");
1285 if (!is_binlog_empty())
1286 {
1287 DBUG_ASSERT(!flags.finalized);
1288 if (int error= flush_pending_event(thd))
1289 DBUG_RETURN(error);
1290 if (int error= write_event(thd, end_event))
1291 DBUG_RETURN(error);
1292 flags.finalized= true;
1293 DBUG_PRINT("debug", ("flags.finalized: %s", YESNO(flags.finalized)));
1294 }
1295 DBUG_RETURN(0);
1296 }
1297
1298 /**
1299 Flush caches to the binary log.
1300
1301 If the cache is finalized, the cache will be flushed to the binary
1302 log file. If the cache is not finalized, nothing will be done.
1303
1304 If flushing fails for any reason, an error will be reported and the
1305 cache will be reset. Flushing can fail in two circumstances:
1306
1307 - It was not possible to write the cache to the file. In this case,
1308 it does not make sense to keep the cache.
1309
1310 - The cache was successfully written to disk but post-flush actions
1311 (such as binary log rotation) failed. In this case, the cache is
1312 already written to disk and there is no reason to keep it.
1313
1314 @see binlog_cache_data::finalize
1315 */
1316 int
flush(THD * thd,my_off_t * bytes_written,bool * wrote_xid)1317 binlog_cache_data::flush(THD *thd, my_off_t *bytes_written, bool *wrote_xid)
1318 {
1319 /*
1320 Doing a commit or a rollback including non-transactional tables,
1321 i.e., ending a transaction where we might write the transaction
1322 cache to the binary log.
1323
1324 We can always end the statement when ending a transaction since
1325 transactions are not allowed inside stored functions. If they
1326 were, we would have to ensure that we're not ending a statement
1327 inside a stored function.
1328 */
1329 DBUG_ENTER("binlog_cache_data::flush");
1330 DBUG_PRINT("debug", ("flags.finalized: %s", YESNO(flags.finalized)));
1331 int error= 0;
1332 if (flags.finalized)
1333 {
1334 my_off_t bytes_in_cache= my_b_tell(&cache_log);
1335 DBUG_PRINT("debug", ("bytes_in_cache: %llu", bytes_in_cache));
1336 /*
1337 The cache is always reset since subsequent rollbacks of the
1338 transactions might trigger attempts to write to the binary log
1339 if the cache is not reset.
1340 */
1341 if (!(error= gtid_before_write_cache(thd, this)))
1342 error= mysql_bin_log.write_cache(thd, this);
1343 else
1344 thd->commit_error= THD::CE_FLUSH_ERROR;
1345
1346 if (flags.with_xid && error == 0)
1347 *wrote_xid= true;
1348
1349 /*
1350 Reset have to be after the if above, since it clears the
1351 with_xid flag
1352 */
1353 reset();
1354 if (bytes_written)
1355 *bytes_written= bytes_in_cache;
1356 }
1357 DBUG_ASSERT(!flags.finalized);
1358 DBUG_RETURN(error);
1359 }
1360
1361 /**
1362 This function truncates the transactional cache upon committing or rolling
1363 back either a transaction or a statement.
1364
1365 @param thd The thread whose transaction should be flushed
1366 @param cache_mngr Pointer to the cache data to be flushed
1367 @param all @c true means truncate the transaction, otherwise the
1368 statement must be truncated.
1369
1370 @return
1371 nonzero if an error pops up when truncating the transactional cache.
1372 */
1373 int
truncate(THD * thd,bool all)1374 binlog_trx_cache_data::truncate(THD *thd, bool all)
1375 {
1376 DBUG_ENTER("binlog_trx_cache_data::truncate");
1377 int error=0;
1378
1379 DBUG_PRINT("info", ("thd->options={ %s %s}, transaction: %s",
1380 FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
1381 FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
1382 all ? "all" : "stmt"));
1383
1384 remove_pending_event();
1385
1386 /*
1387 If rolling back an entire transaction or a single statement not
1388 inside a transaction, we reset the transaction cache.
1389 */
1390 if (ending_trans(thd, all))
1391 {
1392 if (has_incident())
1393 error= mysql_bin_log.write_incident(thd, true/*need_lock_log=true*/);
1394 reset();
1395 }
1396 /*
1397 If rolling back a statement in a transaction, we truncate the
1398 transaction cache to remove the statement.
1399 */
1400 else if (get_prev_position() != MY_OFF_T_UNDEF)
1401 {
1402 restore_prev_position();
1403 if (is_binlog_empty())
1404 {
1405 /*
1406 After restoring the previous position, we need to check if
1407 the cache is empty. In such case, the group cache needs to
1408 be cleaned up too because the GTID is removed too from the
1409 cache.
1410
1411 So if any change happens again, the GTID must be rewritten
1412 and this will not happen if the group cache is not cleaned
1413 up.
1414
1415 After integrating this with NDB, we need to check if the
1416 current approach is enough or the group cache needs to
1417 explicitly support rollback to savepoints.
1418 */
1419 group_cache.clear();
1420 }
1421 }
1422
1423 thd->clear_binlog_table_maps();
1424
1425 DBUG_RETURN(error);
1426 }
1427
binlog_prepare(handlerton * hton,THD * thd,bool all)1428 static int binlog_prepare(handlerton *hton, THD *thd, bool all)
1429 {
1430 /*
1431 do nothing.
1432 just pretend we can do 2pc, so that MySQL won't
1433 switch to 1pc.
1434 real work will be done in MYSQL_BIN_LOG::commit()
1435 */
1436 return 0;
1437 }
1438
binlog_start_consistent_snapshot(handlerton * hton,THD * thd)1439 static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd)
1440 {
1441 int err= 0;
1442 DBUG_ENTER("binlog_start_consistent_snapshot");
1443
1444 if ((err= thd->binlog_setup_trx_data()))
1445 DBUG_RETURN(err);
1446
1447 binlog_cache_mngr * const cache_mngr= thd_get_cache_mngr(thd);
1448
1449 /* Server layer calls us with LOCK_log locked, so this is safe. */
1450 mysql_bin_log.raw_get_current_log(&cache_mngr->binlog_info);
1451
1452 trans_register_ha(thd, TRUE, hton);
1453
1454 DBUG_RETURN(err);
1455 }
1456
binlog_clone_consistent_snapshot(handlerton * hton,THD * thd,THD * from_thd)1457 static int binlog_clone_consistent_snapshot(handlerton *hton, THD *thd,
1458 THD *from_thd)
1459 {
1460 binlog_cache_mngr *from_cache_mngr;
1461 binlog_cache_mngr *cache_mngr;
1462 int err= 0;
1463 char log_file_name[FN_REFLEN];
1464 my_off_t pos;
1465
1466 DBUG_ENTER("binlog_start_consistent_snapshot");
1467
1468 from_cache_mngr= opt_bin_log ?
1469 (binlog_cache_mngr *) thd_get_cache_mngr(from_thd) : NULL;
1470
1471 if (from_cache_mngr == NULL)
1472 {
1473 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
1474 HA_ERR_UNSUPPORTED,
1475 "WITH CONSISTENT SNAPSHOT FROM SESSION was ignored for "
1476 "binary log, because the specified session does not "
1477 "have a consistent snapshot of binary log "
1478 "coordinates.");
1479 DBUG_RETURN(0);
1480 }
1481
1482 if ((err= thd->binlog_setup_trx_data()))
1483 DBUG_RETURN(err);
1484
1485 cache_mngr= thd_get_cache_mngr(thd);
1486
1487 mysql_mutex_lock(&from_cache_mngr->binlog_info.lock);
1488
1489 pos= from_cache_mngr->binlog_info.pos;
1490 strmake(log_file_name, from_cache_mngr->binlog_info.log_file_name,
1491 sizeof(log_file_name) - 1);
1492
1493 mysql_mutex_unlock(&from_cache_mngr->binlog_info.lock);
1494
1495 mysql_mutex_lock(&cache_mngr->binlog_info.lock);
1496
1497 cache_mngr->binlog_info.pos = pos;
1498 strmake(cache_mngr->binlog_info.log_file_name, log_file_name,
1499 sizeof(cache_mngr->binlog_info.log_file_name) - 1);
1500
1501 mysql_mutex_unlock(&cache_mngr->binlog_info.lock);
1502
1503 trans_register_ha(thd, TRUE, hton);
1504
1505 DBUG_RETURN(err);
1506 }
1507
1508 /**
1509 This function is called once after each statement.
1510
1511 @todo This function is currently not used any more and will
1512 eventually be eliminated. The real commit job is done in the
1513 MYSQL_BIN_LOG::commit function.
1514
1515 @see MYSQL_BIN_LOG::commit
1516
1517 @param hton The binlog handlerton.
1518 @param thd The client thread that executes the transaction.
1519 @param all This is @c true if this is a real transaction commit, and
1520 @false otherwise.
1521
1522 @see handlerton::commit
1523 */
binlog_commit(handlerton * hton,THD * thd,bool all)1524 static int binlog_commit(handlerton *hton, THD *thd, bool all)
1525 {
1526 DBUG_ENTER("binlog_commit");
1527 /*
1528 Nothing to do (any more) on commit.
1529 */
1530 DBUG_RETURN(0);
1531 }
1532
1533 /**
1534 This function is called when a transaction or a statement is rolled back.
1535
1536 @internal It is necessary to execute a rollback here if the
1537 transaction was rolled back because of executing a ROLLBACK TO
1538 SAVEPOINT command, but it is not used for normal rollback since
1539 MYSQL_BIN_LOG::rollback is called in that case.
1540
1541 @todo Refactor code to introduce a <code>MYSQL_BIN_LOG::rollback(THD
1542 *thd, SAVEPOINT *sv)</code> function in @c TC_LOG and have that
1543 function execute the necessary work to rollback to a savepoint.
1544
1545 @param hton The binlog handlerton.
1546 @param thd The client thread that executes the transaction.
1547 @param all This is @c true if this is a real transaction rollback, and
1548 @false otherwise.
1549
1550 @see handlerton::rollback
1551 */
binlog_rollback(handlerton * hton,THD * thd,bool all)1552 static int binlog_rollback(handlerton *hton, THD *thd, bool all)
1553 {
1554 DBUG_ENTER("binlog_rollback");
1555 int error= 0;
1556 if (thd->lex->sql_command == SQLCOM_ROLLBACK_TO_SAVEPOINT)
1557 error= mysql_bin_log.rollback(thd, all);
1558 DBUG_RETURN(error);
1559 }
1560
1561
1562 bool
append(THD * first)1563 Stage_manager::Mutex_queue::append(THD *first)
1564 {
1565 DBUG_ENTER("Stage_manager::Mutex_queue::append");
1566 lock();
1567 DBUG_PRINT("enter", ("first: 0x%llx", (ulonglong) first));
1568 DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
1569 (ulonglong) m_first, (ulonglong) &m_first,
1570 (ulonglong) m_last));
1571 bool empty= (m_first == NULL);
1572 *m_last= first;
1573 DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
1574 (ulonglong) m_first, (ulonglong) &m_first,
1575 (ulonglong) m_last));
1576 /*
1577 Go to the last THD instance of the list. We expect lists to be
1578 moderately short. If they are not, we need to track the end of
1579 the queue as well.
1580 */
1581 while (first->next_to_commit)
1582 first= first->next_to_commit;
1583 m_last= &first->next_to_commit;
1584 DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
1585 (ulonglong) m_first, (ulonglong) &m_first,
1586 (ulonglong) m_last));
1587 DBUG_ASSERT(m_first || m_last == &m_first);
1588 DBUG_PRINT("return", ("empty: %s", YESNO(empty)));
1589 unlock();
1590 DBUG_RETURN(empty);
1591 }
1592
1593
1594 std::pair<bool, THD*>
pop_front()1595 Stage_manager::Mutex_queue::pop_front()
1596 {
1597 DBUG_ENTER("Stage_manager::Mutex_queue::pop_front");
1598 lock();
1599 THD *result= m_first;
1600 bool more= true;
1601 /*
1602 We do not set next_to_commit to NULL here since this is only used
1603 in the flush stage. We will have to call fetch_queue last here,
1604 and will then "cut" the linked list by setting the end of that
1605 queue to NULL.
1606 */
1607 if (result)
1608 m_first= result->next_to_commit;
1609 if (m_first == NULL)
1610 {
1611 more= false;
1612 m_last = &m_first;
1613 }
1614 DBUG_ASSERT(m_first || m_last == &m_first);
1615 unlock();
1616 DBUG_PRINT("return", ("result: 0x%llx, more: %s",
1617 (ulonglong) result, YESNO(more)));
1618 DBUG_RETURN(std::make_pair(more, result));
1619 }
1620
1621
1622 bool
enroll_for(StageID stage,THD * thd,mysql_mutex_t * stage_mutex)1623 Stage_manager::enroll_for(StageID stage, THD *thd, mysql_mutex_t *stage_mutex)
1624 {
1625 // If the queue was empty: we're the leader for this batch
1626 DBUG_PRINT("debug", ("Enqueue 0x%llx to queue for stage %d",
1627 (ulonglong) thd, stage));
1628 bool leader= m_queue[stage].append(thd);
1629
1630 /*
1631 The stage mutex can be NULL if we are enrolling for the first
1632 stage.
1633 */
1634 if (stage_mutex)
1635 mysql_mutex_unlock(stage_mutex);
1636
1637 /*
1638 If the queue was not empty, we're a follower and wait for the
1639 leader to process the queue. If we were holding a mutex, we have
1640 to release it before going to sleep.
1641 */
1642 if (!leader)
1643 {
1644 mysql_mutex_lock(&m_lock_done);
1645 #ifndef DBUG_OFF
1646 /*
1647 Leader can be awaiting all-clear to preempt follower's execution.
1648 With setting the status the follower ensures it won't execute anything
1649 including thread-specific code.
1650 */
1651 thd->transaction.flags.ready_preempt= 1;
1652 if (leader_await_preempt_status)
1653 mysql_cond_signal(&m_cond_preempt);
1654 #endif
1655 while (thd->transaction.flags.pending) {
1656 mysql_cond_wait(&m_cond_done, &m_lock_done);
1657 }
1658 mysql_mutex_unlock(&m_lock_done);
1659 }
1660 return leader;
1661 }
1662
1663
fetch_and_empty()1664 THD *Stage_manager::Mutex_queue::fetch_and_empty()
1665 {
1666 DBUG_ENTER("Stage_manager::Mutex_queue::fetch_and_empty");
1667 lock();
1668 DBUG_PRINT("enter", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
1669 (ulonglong) m_first, (ulonglong) &m_first,
1670 (ulonglong) m_last));
1671 THD *result= m_first;
1672 m_first= NULL;
1673 m_last= &m_first;
1674 DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
1675 (ulonglong) m_first, (ulonglong) &m_first,
1676 (ulonglong) m_last));
1677 DBUG_ASSERT(m_first || m_last == &m_first);
1678 DBUG_PRINT("return", ("result: 0x%llx", (ulonglong) result));
1679 unlock();
1680 DBUG_RETURN(result);
1681 }
1682
1683 #ifndef DBUG_OFF
clear_preempt_status(THD * head)1684 void Stage_manager::clear_preempt_status(THD *head)
1685 {
1686 DBUG_ASSERT(head);
1687
1688 mysql_mutex_lock(&m_lock_done);
1689 while(!head->transaction.flags.ready_preempt)
1690 {
1691 leader_await_preempt_status= true;
1692 mysql_cond_wait(&m_cond_preempt, &m_lock_done);
1693 }
1694 leader_await_preempt_status= false;
1695 mysql_mutex_unlock(&m_lock_done);
1696 }
1697 #endif
1698
1699 /**
1700 Write a rollback record of the transaction to the binary log.
1701
1702 For binary log group commit, the rollback is separated into three
1703 parts:
1704
1705 1. First part consists of filling the necessary caches and
1706 finalizing them (if they need to be finalized). After a cache is
1707 finalized, nothing can be added to the cache.
1708
1709 2. Second part execute an ordered flush and commit. This will be
1710 done using the group commit functionality in @c ordered_commit.
1711
1712 Since we roll back the transaction early, we call @c
1713 ordered_commit with the @c skip_commit flag set. The @c
1714 ha_commit_low call inside @c ordered_commit will then not be
1715 called.
1716
1717 3. Third part checks any errors resulting from the flush and handles
1718 them appropriately.
1719
1720 @see MYSQL_BIN_LOG::ordered_commit
1721 @see ha_commit_low
1722 @see ha_rollback_low
1723
1724 @param thd Session to commit
1725 @param all This is @c true if this is a real transaction rollback, and
1726 @false otherwise.
1727
1728 @return Error code, or zero if there were no error.
1729 */
1730
rollback(THD * thd,bool all)1731 int MYSQL_BIN_LOG::rollback(THD *thd, bool all)
1732 {
1733 int error= 0;
1734 bool stuff_logged= false;
1735
1736 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
1737 DBUG_ENTER("MYSQL_BIN_LOG::rollback(THD *thd, bool all)");
1738 DBUG_PRINT("enter", ("all: %s, cache_mngr: 0x%llx, thd->is_error: %s",
1739 YESNO(all), (ulonglong) cache_mngr, YESNO(thd->is_error())));
1740
1741 /*
1742 We roll back the transaction in the engines early since this will
1743 release locks and allow other transactions to start executing.
1744
1745 If we are executing a ROLLBACK TO SAVEPOINT, we should only clear
1746 the caches since this function is called as part of the engine
1747 rollback.
1748 */
1749 if (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT)
1750 {
1751 /*
1752 Reset binlog_snapshot_% variables for the current connection so that the
1753 current coordinates are shown after committing a consistent snapshot
1754 transaction.
1755 */
1756 if (cache_mngr != NULL)
1757 {
1758 mysql_mutex_lock(&cache_mngr->binlog_info.lock);
1759 cache_mngr->binlog_info.log_file_name[0]= '\0';
1760 mysql_mutex_unlock(&cache_mngr->binlog_info.lock);
1761 }
1762
1763 if ((error= ha_rollback_low(thd, all)))
1764 goto end;
1765 }
1766
1767 /*
1768 If there is no cache manager, or if there is nothing in the
1769 caches, there are no caches to roll back, so we're trivially done.
1770 */
1771 if (cache_mngr == NULL || cache_mngr->is_binlog_empty())
1772 goto end;
1773
1774 DBUG_PRINT("debug",
1775 ("all.cannot_safely_rollback(): %s, trx_cache_empty: %s",
1776 YESNO(thd->transaction.all.cannot_safely_rollback()),
1777 YESNO(cache_mngr->trx_cache.is_binlog_empty())));
1778 DBUG_PRINT("debug",
1779 ("stmt.cannot_safely_rollback(): %s, stmt_cache_empty: %s",
1780 YESNO(thd->transaction.stmt.cannot_safely_rollback()),
1781 YESNO(cache_mngr->stmt_cache.is_binlog_empty())));
1782
1783 /*
1784 If an incident event is set we do not flush the content of the statement
1785 cache because it may be corrupted.
1786 */
1787 if (cache_mngr->stmt_cache.has_incident())
1788 {
1789 error= write_incident(thd, true/*need_lock_log=true*/);
1790 cache_mngr->stmt_cache.reset();
1791 }
1792 else if (!cache_mngr->stmt_cache.is_binlog_empty())
1793 {
1794 if ((error= cache_mngr->stmt_cache.finalize(thd)))
1795 goto end;
1796 stuff_logged= true;
1797 }
1798
1799 if (ending_trans(thd, all))
1800 {
1801 if (trans_cannot_safely_rollback(thd))
1802 {
1803 /*
1804 If the transaction is being rolled back and contains changes that
1805 cannot be rolled back, the trx-cache's content is flushed.
1806 */
1807 Query_log_event
1808 end_evt(thd, STRING_WITH_LEN("ROLLBACK"), true, false, true, 0, true);
1809 error= cache_mngr->trx_cache.finalize(thd, &end_evt);
1810 stuff_logged= true;
1811 }
1812 else
1813 {
1814 /*
1815 If the transaction is being rolled back and its changes can be
1816 rolled back, the trx-cache's content is truncated.
1817 */
1818 error= cache_mngr->trx_cache.truncate(thd, all);
1819 }
1820 }
1821 else
1822 {
1823 /*
1824 If a statement is being rolled back, it is necessary to know
1825 exactly why a statement may not be safely rolled back as in
1826 some specific situations the trx-cache can be truncated.
1827
1828 If a temporary table is created or dropped, the trx-cache is not
1829 truncated. Note that if the stmt-cache is used, there is nothing
1830 to truncate in the trx-cache.
1831
1832 If a non-transactional table is updated and the binlog format is
1833 statement, the trx-cache is not truncated. The trx-cache is used
1834 when the direct option is off and a transactional table has been
1835 updated before the current statement in the context of the
1836 current transaction. Note that if the stmt-cache is used there is
1837 nothing to truncate in the trx-cache.
1838
1839 If other binlog formats are used, updates to non-transactional
1840 tables are written to the stmt-cache and trx-cache can be safely
1841 truncated, if necessary.
1842 */
1843 if (thd->transaction.stmt.has_dropped_temp_table() ||
1844 thd->transaction.stmt.has_created_temp_table() ||
1845 (thd->transaction.stmt.has_modified_non_trans_table() &&
1846 thd->variables.binlog_format == BINLOG_FORMAT_STMT))
1847 {
1848 /*
1849 If the statement is being rolled back and dropped or created a
1850 temporary table or modified a non-transactional table and the
1851 statement-based replication is in use, the statement's changes
1852 in the trx-cache are preserved.
1853 */
1854 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
1855 }
1856 else
1857 {
1858 /*
1859 Otherwise, the statement's changes in the trx-cache are
1860 truncated.
1861 */
1862 error= cache_mngr->trx_cache.truncate(thd, all);
1863 }
1864 }
1865
1866 DBUG_PRINT("debug", ("error: %d", error));
1867 if (error == 0 && stuff_logged)
1868 error= ordered_commit(thd, all, /* skip_commit */ true);
1869
1870 if (check_write_error(thd))
1871 {
1872 /*
1873 "all == true" means that a "rollback statement" triggered the error and
1874 this function was called. However, this must not happen as a rollback
1875 is written directly to the binary log. And in auto-commit mode, a single
1876 statement that is rolled back has the flag all == false.
1877 */
1878 DBUG_ASSERT(!all);
1879 /*
1880 We reach this point if the effect of a statement did not properly get into
1881 a cache and need to be rolled back.
1882 */
1883 error |= cache_mngr->trx_cache.truncate(thd, all);
1884 }
1885
1886 end:
1887 /*
1888 When a statement errors out on auto-commit mode it is rollback
1889 implicitly, so the same should happen to its GTID.
1890 */
1891 if (!thd->in_active_multi_stmt_transaction())
1892 gtid_rollback(thd);
1893
1894 DBUG_PRINT("return", ("error: %d", error));
1895 DBUG_RETURN(error);
1896 }
1897
1898 /**
1899 @note
1900 How do we handle this (unlikely but legal) case:
1901 @verbatim
1902 [transaction] + [update to non-trans table] + [rollback to savepoint] ?
1903 @endverbatim
1904 The problem occurs when a savepoint is before the update to the
1905 non-transactional table. Then when there's a rollback to the savepoint, if we
1906 simply truncate the binlog cache, we lose the part of the binlog cache where
1907 the update is. If we want to not lose it, we need to write the SAVEPOINT
1908 command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
1909 is easy: it's just write at the end of the binlog cache, but the former
1910 should be *inserted* to the place where the user called SAVEPOINT. The
1911 solution is that when the user calls SAVEPOINT, we write it to the binlog
1912 cache (so no need to later insert it). As transactions are never intermixed
1913 in the binary log (i.e. they are serialized), we won't have conflicts with
1914 savepoint names when using mysqlbinlog or in the slave SQL thread.
1915 Then when ROLLBACK TO SAVEPOINT is called, if we updated some
1916 non-transactional table, we don't truncate the binlog cache but instead write
1917 ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
1918 will chop the SAVEPOINT command from the binlog cache, which is good as in
1919 that case there is no need to have it in the binlog).
1920 */
1921
binlog_savepoint_set(handlerton * hton,THD * thd,void * sv)1922 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
1923 {
1924 DBUG_ENTER("binlog_savepoint_set");
1925 int error= 1;
1926
1927 String log_query;
1928 if (log_query.append(STRING_WITH_LEN("SAVEPOINT ")))
1929 DBUG_RETURN(error);
1930 else
1931 append_identifier(thd, &log_query, thd->lex->ident.str,
1932 thd->lex->ident.length);
1933
1934 int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
1935 Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
1936 TRUE, FALSE, TRUE, errcode);
1937 /*
1938 We cannot record the position before writing the statement
1939 because a rollback to a savepoint (.e.g. consider it "S") would
1940 prevent the savepoint statement (i.e. "SAVEPOINT S") from being
1941 written to the binary log despite the fact that the server could
1942 still issue other rollback statements to the same savepoint (i.e.
1943 "S").
1944 Given that the savepoint is valid until the server releases it,
1945 ie, until the transaction commits or it is released explicitly,
1946 we need to log it anyway so that we don't have "ROLLBACK TO S"
1947 or "RELEASE S" without the preceding "SAVEPOINT S" in the binary
1948 log.
1949 */
1950 if (!(error= mysql_bin_log.write_event(&qinfo)))
1951 binlog_trans_log_savepos(thd, (my_off_t*) sv);
1952
1953 DBUG_RETURN(error);
1954 }
1955
binlog_savepoint_rollback(handlerton * hton,THD * thd,void * sv)1956 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
1957 {
1958 DBUG_ENTER("binlog_savepoint_rollback");
1959 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
1960 my_off_t pos= *(my_off_t*) sv;
1961 DBUG_ASSERT(pos != ~(my_off_t) 0);
1962
1963 /*
1964 Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
1965 non-transactional table. Otherwise, truncate the binlog cache starting
1966 from the SAVEPOINT command.
1967 */
1968 if (trans_cannot_safely_rollback(thd))
1969 {
1970 String log_query;
1971 if (log_query.append(STRING_WITH_LEN("ROLLBACK TO ")))
1972 DBUG_RETURN(1);
1973 else
1974 {
1975 /*
1976 Before writing identifier to the binlog, make sure to
1977 quote the identifier properly so as to prevent any SQL
1978 injection on the slave.
1979 */
1980 append_identifier(thd, &log_query, thd->lex->ident.str,
1981 thd->lex->ident.length);
1982 }
1983
1984 int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
1985 Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
1986 TRUE, FALSE, TRUE, errcode);
1987 DBUG_RETURN(mysql_bin_log.write_event(&qinfo));
1988 }
1989 // Otherwise, we truncate the cache
1990 cache_mngr->trx_cache.restore_savepoint(pos);
1991 /*
1992 When a SAVEPOINT is executed inside a stored function/trigger we force the
1993 pending event to be flushed with a STMT_END_F flag and clear the table maps
1994 as well to ensure that following DMLs will have a clean state to start
1995 with. ROLLBACK inside a stored routine has to finalize possibly existing
1996 current row-based pending event with cleaning up table maps. That ensures
1997 that following DMLs will have a clean state to start with.
1998 */
1999 if (thd->in_sub_stmt)
2000 thd->clear_binlog_table_maps();
2001 if (cache_mngr->trx_cache.is_binlog_empty())
2002 cache_mngr->trx_cache.group_cache.clear();
2003 DBUG_RETURN(0);
2004 }
2005
2006 /**
2007 Check whether binlog state allows to safely release MDL locks after
2008 rollback to savepoint.
2009
2010 @param hton The binlog handlerton.
2011 @param thd The client thread that executes the transaction.
2012
2013 @return true - It is safe to release MDL locks.
2014 false - If it is not.
2015 */
binlog_savepoint_rollback_can_release_mdl(handlerton * hton,THD * thd)2016 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
2017 THD *thd)
2018 {
2019 DBUG_ENTER("binlog_savepoint_rollback_can_release_mdl");
2020 /*
2021 If we have not updated any non-transactional tables rollback
2022 to savepoint will simply truncate binlog cache starting from
2023 SAVEPOINT command. So it should be safe to release MDL acquired
2024 after SAVEPOINT command in this case.
2025 */
2026 DBUG_RETURN(!trans_cannot_safely_rollback(thd));
2027 }
2028
2029 #ifdef HAVE_REPLICATION
2030
2031 /*
2032 Adjust the position pointer in the binary log file for all running slaves
2033
2034 SYNOPSIS
2035 adjust_linfo_offsets()
2036 purge_offset Number of bytes removed from start of log index file
2037
2038 NOTES
2039 - This is called when doing a PURGE when we delete lines from the
2040 index log file
2041
2042 REQUIREMENTS
2043 - Before calling this function, we have to ensure that no threads are
2044 using any binary log file before purge_offset.a
2045
2046 TODO
2047 - Inform the slave threads that they should sync the position
2048 in the binary log file with flush_relay_log_info.
2049 Now they sync is done for next read.
2050 */
2051
adjust_linfo_offsets(my_off_t purge_offset)2052 static void adjust_linfo_offsets(my_off_t purge_offset)
2053 {
2054 mysql_mutex_lock(&LOCK_thread_count);
2055
2056 Thread_iterator it= global_thread_list_begin();
2057 Thread_iterator end= global_thread_list_end();
2058 for (; it != end; ++it)
2059 {
2060 LOG_INFO* linfo;
2061 if ((linfo = (*it)->current_linfo))
2062 {
2063 mysql_mutex_lock(&linfo->lock);
2064 /*
2065 Index file offset can be less that purge offset only if
2066 we just started reading the index file. In that case
2067 we have nothing to adjust
2068 */
2069 if (linfo->index_file_offset < purge_offset)
2070 linfo->fatal = (linfo->index_file_offset != 0);
2071 else
2072 linfo->index_file_offset -= purge_offset;
2073 mysql_mutex_unlock(&linfo->lock);
2074 }
2075 }
2076 mysql_mutex_unlock(&LOCK_thread_count);
2077 }
2078
2079
log_in_use(const char * log_name)2080 static int log_in_use(const char* log_name)
2081 {
2082 size_t log_name_len = strlen(log_name) + 1;
2083 int thread_count=0;
2084 #ifndef DBUG_OFF
2085 if (current_thd)
2086 DEBUG_SYNC(current_thd,"purge_logs_after_lock_index_before_thread_count");
2087 #endif
2088 mysql_mutex_lock(&LOCK_thread_count);
2089
2090 Thread_iterator it= global_thread_list_begin();
2091 Thread_iterator end= global_thread_list_end();
2092 for (; it != end; ++it)
2093 {
2094 LOG_INFO* linfo;
2095 if ((linfo = (*it)->current_linfo))
2096 {
2097 mysql_mutex_lock(&linfo->lock);
2098 if(!strncmp(log_name, linfo->log_file_name, log_name_len))
2099 {
2100 thread_count++;
2101 sql_print_warning("file %s was not purged because it was being read "
2102 "by thread number %llu", log_name,
2103 (ulonglong)(*it)->thread_id);
2104 }
2105 mysql_mutex_unlock(&linfo->lock);
2106 }
2107 }
2108
2109 mysql_mutex_unlock(&LOCK_thread_count);
2110 return thread_count;
2111 }
2112
purge_error_message(THD * thd,int res)2113 static bool purge_error_message(THD* thd, int res)
2114 {
2115 uint errcode;
2116
2117 if ((errcode= purge_log_get_error_code(res)) != 0)
2118 {
2119 my_message(errcode, ER(errcode), MYF(0));
2120 return TRUE;
2121 }
2122 my_ok(thd);
2123 return FALSE;
2124 }
2125
2126 #endif /* HAVE_REPLICATION */
2127
check_binlog_magic(IO_CACHE * log,const char ** errmsg)2128 int check_binlog_magic(IO_CACHE* log, const char** errmsg)
2129 {
2130 char magic[4];
2131 DBUG_ASSERT(my_b_tell(log) == 0);
2132
2133 if (my_b_read(log, (uchar*) magic, sizeof(magic)))
2134 {
2135 *errmsg = "I/O error reading the header from the binary log";
2136 sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno,
2137 log->error);
2138 return 1;
2139 }
2140 if (memcmp(magic, BINLOG_MAGIC, sizeof(magic)))
2141 {
2142 *errmsg = "Binlog has bad magic number; It's not a binary log file that can be used by this version of MySQL";
2143 return 1;
2144 }
2145 return 0;
2146 }
2147
2148
open_binlog_file(IO_CACHE * log,const char * log_file_name,const char ** errmsg)2149 File open_binlog_file(IO_CACHE *log, const char *log_file_name, const char **errmsg)
2150 {
2151 File file;
2152 DBUG_ENTER("open_binlog_file");
2153
2154 if ((file= mysql_file_open(key_file_binlog,
2155 log_file_name, O_RDONLY | O_BINARY | O_SHARE,
2156 MYF(MY_WME))) < 0)
2157 {
2158 sql_print_error("Failed to open log (file '%s', errno %d)",
2159 log_file_name, my_errno);
2160 *errmsg = "Could not open log file";
2161 goto err;
2162 }
2163 if (init_io_cache(log, file, IO_SIZE*2, READ_CACHE, 0, 0,
2164 MYF(MY_WME|MY_DONT_CHECK_FILESIZE)))
2165 {
2166 sql_print_error("Failed to create a cache on log (file '%s')",
2167 log_file_name);
2168 *errmsg = "Could not open log file";
2169 goto err;
2170 }
2171 if (check_binlog_magic(log,errmsg))
2172 goto err;
2173 DBUG_RETURN(file);
2174
2175 err:
2176 if (file >= 0)
2177 {
2178 mysql_file_close(file, MYF(0));
2179 end_io_cache(log);
2180 }
2181 DBUG_RETURN(-1);
2182 }
2183
2184 /**
2185 This function checks if a transactional table was updated by the
2186 current transaction.
2187
2188 @param thd The client thread that executed the current statement.
2189 @return
2190 @c true if a transactional table was updated, @c false otherwise.
2191 */
2192 bool
trans_has_updated_trans_table(const THD * thd)2193 trans_has_updated_trans_table(const THD* thd)
2194 {
2195 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
2196
2197 return (cache_mngr ? !cache_mngr->trx_cache.is_binlog_empty() : 0);
2198 }
2199
2200 /**
2201 This function checks if a transactional table was updated by the
2202 current statement.
2203
2204 @param ha_list Registered storage engine handler list.
2205 @return
2206 @c true if a transactional table was updated, @c false otherwise.
2207 */
2208 bool
stmt_has_updated_trans_table(Ha_trx_info * ha_list)2209 stmt_has_updated_trans_table(Ha_trx_info* ha_list)
2210 {
2211 Ha_trx_info *ha_info;
2212
2213 for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
2214 {
2215 if (ha_info->is_trx_read_write() && ha_info->ht() != binlog_hton)
2216 return (TRUE);
2217 }
2218 return (FALSE);
2219 }
2220
2221 /**
2222 This function checks if a transaction, either a multi-statement
2223 or a single statement transaction is about to commit or not.
2224
2225 @param thd The client thread that executed the current statement.
2226 @param all Committing a transaction (i.e. TRUE) or a statement
2227 (i.e. FALSE).
2228 @return
2229 @c true if committing a transaction, otherwise @c false.
2230 */
ending_trans(THD * thd,const bool all)2231 bool ending_trans(THD* thd, const bool all)
2232 {
2233 return (all || ending_single_stmt_trans(thd, all));
2234 }
2235
2236 /**
2237 This function checks if a single statement transaction is about
2238 to commit or not.
2239
2240 @param thd The client thread that executed the current statement.
2241 @param all Committing a transaction (i.e. TRUE) or a statement
2242 (i.e. FALSE).
2243 @return
2244 @c true if committing a single statement transaction, otherwise
2245 @c false.
2246 */
ending_single_stmt_trans(THD * thd,const bool all)2247 bool ending_single_stmt_trans(THD* thd, const bool all)
2248 {
2249 return (!all && !thd->in_multi_stmt_transaction_mode());
2250 }
2251
2252 /**
2253 This function checks if a transaction cannot be rolled back safely.
2254
2255 @param thd The client thread that executed the current statement.
2256 @return
2257 @c true if cannot be safely rolled back, @c false otherwise.
2258 */
trans_cannot_safely_rollback(const THD * thd)2259 bool trans_cannot_safely_rollback(const THD* thd)
2260 {
2261 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
2262
2263 return cache_mngr->trx_cache.cannot_rollback();
2264 }
2265
2266 /**
2267 This function checks if current statement cannot be rollded back safely.
2268
2269 @param thd The client thread that executed the current statement.
2270 @return
2271 @c true if cannot be safely rolled back, @c false otherwise.
2272 */
stmt_cannot_safely_rollback(const THD * thd)2273 bool stmt_cannot_safely_rollback(const THD* thd)
2274 {
2275 return thd->transaction.stmt.cannot_safely_rollback();
2276 }
2277
2278 #ifndef EMBEDDED_LIBRARY
2279 /**
2280 Execute a PURGE BINARY LOGS TO <log> command.
2281
2282 @param thd Pointer to THD object for the client thread executing the
2283 statement.
2284
2285 @param to_log Name of the last log to purge.
2286
2287 @retval FALSE success
2288 @retval TRUE failure
2289 */
purge_master_logs(THD * thd,const char * to_log)2290 bool purge_master_logs(THD* thd, const char* to_log)
2291 {
2292 char search_file_name[FN_REFLEN];
2293 if (!mysql_bin_log.is_open())
2294 {
2295 my_ok(thd);
2296 return FALSE;
2297 }
2298
2299 mysql_bin_log.make_log_name(search_file_name, to_log);
2300 return purge_error_message(thd,
2301 mysql_bin_log.purge_logs(search_file_name, false,
2302 true/*need_lock_index=true*/,
2303 true/*need_update_threads=true*/,
2304 NULL, false));
2305 }
2306
2307
2308 /**
2309 Execute a PURGE BINARY LOGS BEFORE <date> command.
2310
2311 @param thd Pointer to THD object for the client thread executing the
2312 statement.
2313
2314 @param purge_time Date before which logs should be purged.
2315
2316 @retval FALSE success
2317 @retval TRUE failure
2318 */
purge_master_logs_before_date(THD * thd,time_t purge_time)2319 bool purge_master_logs_before_date(THD* thd, time_t purge_time)
2320 {
2321 if (!mysql_bin_log.is_open())
2322 {
2323 my_ok(thd);
2324 return 0;
2325 }
2326 return purge_error_message(thd,
2327 mysql_bin_log.purge_logs_before_date(purge_time,
2328 false));
2329 }
2330 #endif /* EMBEDDED_LIBRARY */
2331
2332 /*
2333 Helper function to get the error code of the query to be binlogged.
2334 */
query_error_code(THD * thd,bool not_killed)2335 int query_error_code(THD *thd, bool not_killed)
2336 {
2337 int error;
2338
2339 if (not_killed || (thd->killed == THD::KILL_BAD_DATA))
2340 {
2341 error= thd->is_error() ? thd->get_stmt_da()->sql_errno() : 0;
2342
2343 /* thd->get_stmt_da()->sql_errno() might be ER_SERVER_SHUTDOWN or
2344 ER_QUERY_INTERRUPTED, So here we need to make sure that error
2345 is not set to these errors when specified not_killed by the
2346 caller.
2347 */
2348 if (error == ER_SERVER_SHUTDOWN || error == ER_QUERY_INTERRUPTED)
2349 error= 0;
2350 }
2351 else
2352 {
2353 /* killed status for DELAYED INSERT thread should never be used */
2354 DBUG_ASSERT(!(thd->system_thread & SYSTEM_THREAD_DELAYED_INSERT));
2355 error= thd->killed_errno();
2356 }
2357
2358 return error;
2359 }
2360
2361
2362 /**
2363 Copy content of 'from' file from offset to 'to' file.
2364
2365 - We do the copy outside of the IO_CACHE as the cache
2366 buffers would just make things slower and more complicated.
2367 In most cases the copy loop should only do one read.
2368
2369 @param from File to copy.
2370 @param to File to copy to.
2371 @param offset Offset in 'from' file.
2372
2373
2374 @retval
2375 0 ok
2376 @retval
2377 -1 error
2378 */
copy_file(IO_CACHE * from,IO_CACHE * to,my_off_t offset)2379 static bool copy_file(IO_CACHE *from, IO_CACHE *to, my_off_t offset)
2380 {
2381 int bytes_read;
2382 uchar io_buf[IO_SIZE*2];
2383 DBUG_ENTER("copy_file");
2384
2385 mysql_file_seek(from->file, offset, MY_SEEK_SET, MYF(0));
2386 while(TRUE)
2387 {
2388 if ((bytes_read= (int) mysql_file_read(from->file, io_buf, sizeof(io_buf),
2389 MYF(MY_WME)))
2390 < 0)
2391 goto err;
2392 if (DBUG_EVALUATE_IF("fault_injection_copy_part_file", 1, 0))
2393 bytes_read= bytes_read/2;
2394 if (!bytes_read)
2395 break; // end of file
2396 if (mysql_file_write(to->file, io_buf, bytes_read, MYF(MY_WME | MY_NABP)))
2397 goto err;
2398 }
2399
2400 DBUG_RETURN(0);
2401
2402 err:
2403 DBUG_RETURN(1);
2404 }
2405
2406
2407 #ifdef HAVE_REPLICATION
2408 /**
2409 Load data's io cache specific hook to be executed
2410 before a chunk of data is being read into the cache's buffer
2411 The fuction instantianates and writes into the binlog
2412 replication events along LOAD DATA processing.
2413
2414 @param file pointer to io-cache
2415 @retval 0 success
2416 @retval 1 failure
2417 */
log_loaded_block(IO_CACHE * file)2418 int log_loaded_block(IO_CACHE* file)
2419 {
2420 DBUG_ENTER("log_loaded_block");
2421 LOAD_FILE_INFO *lf_info;
2422 uint block_len;
2423 /* buffer contains position where we started last read */
2424 uchar* buffer= (uchar*) my_b_get_buffer_start(file);
2425 uint max_event_size= current_thd->variables.max_allowed_packet;
2426 lf_info= (LOAD_FILE_INFO*) file->arg;
2427 if (lf_info->thd->is_current_stmt_binlog_format_row())
2428 DBUG_RETURN(0);
2429 if (lf_info->last_pos_in_file != HA_POS_ERROR &&
2430 lf_info->last_pos_in_file >= my_b_get_pos_in_file(file))
2431 DBUG_RETURN(0);
2432
2433 for (block_len= (uint) (my_b_get_bytes_in_buffer(file)); block_len > 0;
2434 buffer += min(block_len, max_event_size),
2435 block_len -= min(block_len, max_event_size))
2436 {
2437 lf_info->last_pos_in_file= my_b_get_pos_in_file(file);
2438 if (lf_info->wrote_create_file)
2439 {
2440 Append_block_log_event a(lf_info->thd, lf_info->thd->db, buffer,
2441 min(block_len, max_event_size),
2442 lf_info->log_delayed);
2443 if (mysql_bin_log.write_event(&a))
2444 DBUG_RETURN(1);
2445 }
2446 else
2447 {
2448 Begin_load_query_log_event b(lf_info->thd, lf_info->thd->db,
2449 buffer,
2450 min(block_len, max_event_size),
2451 lf_info->log_delayed);
2452 if (mysql_bin_log.write_event(&b))
2453 DBUG_RETURN(1);
2454 lf_info->wrote_create_file= 1;
2455 }
2456 }
2457 DBUG_RETURN(0);
2458 }
2459
2460 /* Helper function for SHOW BINLOG/RELAYLOG EVENTS */
show_binlog_events(THD * thd,MYSQL_BIN_LOG * binary_log)2461 bool show_binlog_events(THD *thd, MYSQL_BIN_LOG *binary_log)
2462 {
2463 Protocol *protocol= thd->protocol;
2464 List<Item> field_list;
2465 const char *errmsg = 0;
2466 bool ret = TRUE;
2467 IO_CACHE log;
2468 File file = -1;
2469 int old_max_allowed_packet= thd->variables.max_allowed_packet;
2470 LOG_INFO linfo;
2471
2472 DBUG_ENTER("show_binlog_events");
2473
2474 DBUG_ASSERT(thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS ||
2475 thd->lex->sql_command == SQLCOM_SHOW_RELAYLOG_EVENTS);
2476
2477 Format_description_log_event *description_event= new
2478 Format_description_log_event(3); /* MySQL 4.0 by default */
2479
2480 if (binary_log->is_open())
2481 {
2482 LEX_MASTER_INFO *lex_mi= &thd->lex->mi;
2483 SELECT_LEX_UNIT *unit= &thd->lex->unit;
2484 ha_rows event_count, limit_start, limit_end;
2485 my_off_t pos = max<my_off_t>(BIN_LOG_HEADER_SIZE, lex_mi->pos); // user-friendly
2486 char search_file_name[FN_REFLEN], *name;
2487 const char *log_file_name = lex_mi->log_file_name;
2488 mysql_mutex_t *log_lock = binary_log->get_log_lock();
2489 Log_event* ev;
2490
2491 unit->set_limit(thd->lex->current_select);
2492 limit_start= unit->offset_limit_cnt;
2493 limit_end= unit->select_limit_cnt;
2494
2495 name= search_file_name;
2496 if (log_file_name)
2497 binary_log->make_log_name(search_file_name, log_file_name);
2498 else
2499 name=0; // Find first log
2500
2501 linfo.index_file_offset = 0;
2502
2503 if (binary_log->find_log_pos(&linfo, name, true/*need_lock_index=true*/))
2504 {
2505 errmsg = "Could not find target log";
2506 goto err;
2507 }
2508
2509 mysql_mutex_lock(&LOCK_thread_count);
2510 thd->current_linfo = &linfo;
2511 mysql_mutex_unlock(&LOCK_thread_count);
2512
2513 if ((file=open_binlog_file(&log, linfo.log_file_name, &errmsg)) < 0)
2514 goto err;
2515
2516 my_off_t end_pos;
2517 /*
2518 Acquire LOCK_log only for the duration to calculate the
2519 log's end position. LOCK_log should be acquired even while
2520 we are checking whether the log is active log or not.
2521 */
2522 mysql_mutex_lock(log_lock);
2523 if (binary_log->is_active(linfo.log_file_name))
2524 {
2525 LOG_INFO li;
2526 binary_log->get_current_log(&li, false /*LOCK_log is already acquired*/);
2527 end_pos= li.pos;
2528 }
2529 else
2530 {
2531 end_pos= my_b_filelength(&log);
2532 }
2533 mysql_mutex_unlock(log_lock);
2534
2535 /*
2536 to account binlog event header size
2537 */
2538 thd->variables.max_allowed_packet += MAX_LOG_EVENT_HEADER;
2539
2540 DEBUG_SYNC(thd, "after_show_binlog_event_found_file");
2541
2542 /*
2543 open_binlog_file() sought to position 4.
2544 Read the first event in case it's a Format_description_log_event, to
2545 know the format. If there's no such event, we are 3.23 or 4.x. This
2546 code, like before, can't read 3.23 binlogs.
2547 This code will fail on a mixed relay log (one which has Format_desc then
2548 Rotate then Format_desc).
2549 */
2550 ev= Log_event::read_log_event(&log, (mysql_mutex_t*)0, description_event,
2551 opt_master_verify_checksum);
2552 if (ev)
2553 {
2554 if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
2555 {
2556 delete description_event;
2557 description_event= (Format_description_log_event*) ev;
2558 }
2559 else
2560 delete ev;
2561 }
2562
2563 my_b_seek(&log, pos);
2564
2565 if (!description_event->is_valid())
2566 {
2567 errmsg="Invalid Format_description event; could be out of memory";
2568 goto err;
2569 }
2570
2571 for (event_count = 0;
2572 (ev = Log_event::read_log_event(&log, (mysql_mutex_t*) 0,
2573 description_event,
2574 opt_master_verify_checksum)); )
2575 {
2576 DEBUG_SYNC(thd, "wait_in_show_binlog_events_loop");
2577 if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
2578 description_event->checksum_alg= ev->checksum_alg;
2579
2580 if (event_count >= limit_start &&
2581 ev->net_send(protocol, linfo.log_file_name, pos))
2582 {
2583 errmsg = "Net error";
2584 delete ev;
2585 goto err;
2586 }
2587
2588 pos = my_b_tell(&log);
2589 delete ev;
2590
2591 if (++event_count >= limit_end || pos >= end_pos)
2592 break;
2593 }
2594
2595 if (event_count < limit_end && log.error)
2596 {
2597 errmsg = "Wrong offset or I/O error";
2598 goto err;
2599 }
2600
2601 }
2602 // Check that linfo is still on the function scope.
2603 DEBUG_SYNC(thd, "after_show_binlog_events");
2604
2605 ret= FALSE;
2606
2607 err:
2608 delete description_event;
2609 if (file >= 0)
2610 {
2611 end_io_cache(&log);
2612 mysql_file_close(file, MYF(MY_WME));
2613 }
2614
2615 if (errmsg)
2616 my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0),
2617 "SHOW BINLOG EVENTS", errmsg);
2618 else
2619 my_eof(thd);
2620
2621 mysql_mutex_lock(&LOCK_thread_count);
2622 thd->current_linfo = 0;
2623 mysql_mutex_unlock(&LOCK_thread_count);
2624 thd->variables.max_allowed_packet= old_max_allowed_packet;
2625 DBUG_RETURN(ret);
2626 }
2627
2628 /**
2629 Execute a SHOW BINLOG EVENTS statement.
2630
2631 @param thd Pointer to THD object for the client thread executing the
2632 statement.
2633
2634 @retval FALSE success
2635 @retval TRUE failure
2636 */
mysql_show_binlog_events(THD * thd)2637 bool mysql_show_binlog_events(THD* thd)
2638 {
2639 Protocol *protocol= thd->protocol;
2640 List<Item> field_list;
2641 DBUG_ENTER("mysql_show_binlog_events");
2642
2643 DBUG_ASSERT(thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS);
2644
2645 Log_event::init_show_field_list(&field_list);
2646 if (protocol->send_result_set_metadata(&field_list,
2647 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
2648 DBUG_RETURN(TRUE);
2649
2650 /*
2651 Wait for handlers to insert any pending information
2652 into the binlog. For e.g. ndb which updates the binlog asynchronously
2653 this is needed so that the uses sees all its own commands in the binlog
2654 */
2655 ha_binlog_wait(thd);
2656
2657 DBUG_RETURN(show_binlog_events(thd, &mysql_bin_log));
2658 }
2659
2660 #endif /* HAVE_REPLICATION */
2661
2662
MYSQL_BIN_LOG(uint * sync_period)2663 MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
2664 :bytes_written(0), file_id(1), open_count(1),
2665 sync_period_ptr(sync_period), sync_counter(0),
2666 m_prep_xids(0),
2667 is_relay_log(0), signal_cnt(0),
2668 checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF),
2669 relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF),
2670 previous_gtid_set(0), snapshot_lock_acquired(false)
2671 {
2672 /*
2673 We don't want to initialize locks here as such initialization depends on
2674 safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
2675 called only in main(). Doing initialization here would make it happen
2676 before main().
2677 */
2678 index_file_name[0] = 0;
2679 memset(&index_file, 0, sizeof(index_file));
2680 memset(&purge_index_file, 0, sizeof(purge_index_file));
2681 memset(&crash_safe_index_file, 0, sizeof(crash_safe_index_file));
2682 }
2683
2684
2685 /* this is called only once */
2686
cleanup()2687 void MYSQL_BIN_LOG::cleanup()
2688 {
2689 DBUG_ENTER("cleanup");
2690 if (inited)
2691 {
2692 inited= 0;
2693 close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT, true /*need_lock_log=true*/,
2694 true /*need_lock_index=true*/);
2695 mysql_mutex_destroy(&LOCK_log);
2696 mysql_mutex_destroy(&LOCK_index);
2697 mysql_mutex_destroy(&LOCK_commit);
2698 mysql_mutex_destroy(&LOCK_sync);
2699 mysql_mutex_destroy(&LOCK_xids);
2700 mysql_cond_destroy(&update_cond);
2701 my_atomic_rwlock_destroy(&m_prep_xids_lock);
2702 mysql_cond_destroy(&m_prep_xids_cond);
2703 stage_manager.deinit();
2704 }
2705 DBUG_VOID_RETURN;
2706 }
2707
2708
init_pthread_objects()2709 void MYSQL_BIN_LOG::init_pthread_objects()
2710 {
2711 MYSQL_LOG::init_pthread_objects();
2712 mysql_mutex_init(m_key_LOCK_index, &LOCK_index, MY_MUTEX_INIT_SLOW);
2713 mysql_mutex_init(m_key_LOCK_commit, &LOCK_commit, MY_MUTEX_INIT_FAST);
2714 mysql_mutex_init(m_key_LOCK_sync, &LOCK_sync, MY_MUTEX_INIT_FAST);
2715 mysql_mutex_init(m_key_LOCK_xids, &LOCK_xids, MY_MUTEX_INIT_FAST);
2716 mysql_cond_init(m_key_update_cond, &update_cond, 0);
2717 my_atomic_rwlock_init(&m_prep_xids_lock);
2718 mysql_cond_init(m_key_prep_xids_cond, &m_prep_xids_cond, NULL);
2719 stage_manager.init(
2720 #ifdef HAVE_PSI_INTERFACE
2721 m_key_LOCK_flush_queue,
2722 m_key_LOCK_sync_queue,
2723 m_key_LOCK_commit_queue,
2724 m_key_LOCK_done, m_key_COND_done
2725 #endif
2726 );
2727 }
2728
open_index_file(const char * index_file_name_arg,const char * log_name,bool need_lock_index)2729 bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
2730 const char *log_name, bool need_lock_index)
2731 {
2732 bool error= false;
2733 File index_file_nr= -1;
2734
2735 if (need_lock_index)
2736 mysql_mutex_lock(&LOCK_index);
2737 else
2738 mysql_mutex_assert_owner(&LOCK_index);
2739
2740 /*
2741 First open of this class instance
2742 Create an index file that will hold all file names uses for logging.
2743 Add new entries to the end of it.
2744 */
2745 myf opt= MY_UNPACK_FILENAME;
2746
2747 if (my_b_inited(&index_file))
2748 goto end;
2749
2750 if (!index_file_name_arg)
2751 {
2752 index_file_name_arg= log_name; // Use same basename for index file
2753 opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
2754 }
2755 fn_format(index_file_name, index_file_name_arg, mysql_data_home,
2756 ".index", opt);
2757
2758 if (set_crash_safe_index_file_name(index_file_name_arg))
2759 {
2760 sql_print_error("MYSQL_BIN_LOG::set_crash_safe_index_file_name failed.");
2761 error= true;
2762 goto end;
2763 }
2764
2765 /*
2766 We need move crash_safe_index_file to index_file if the index_file
2767 does not exist and crash_safe_index_file exists when mysqld server
2768 restarts.
2769 */
2770 if (my_access(index_file_name, F_OK) &&
2771 !my_access(crash_safe_index_file_name, F_OK) &&
2772 my_rename(crash_safe_index_file_name, index_file_name, MYF(MY_WME)))
2773 {
2774 sql_print_error("MYSQL_BIN_LOG::open_index_file failed to "
2775 "move crash_safe_index_file to index file.");
2776 error= true;
2777 goto end;
2778 }
2779
2780 if ((index_file_nr= mysql_file_open(m_key_file_log_index,
2781 index_file_name,
2782 O_RDWR | O_CREAT | O_BINARY,
2783 MYF(MY_WME))) < 0 ||
2784 mysql_file_sync(index_file_nr, MYF(MY_WME)) ||
2785 init_io_cache(&index_file, index_file_nr,
2786 IO_SIZE, READ_CACHE,
2787 mysql_file_seek(index_file_nr, 0L, MY_SEEK_END, MYF(0)),
2788 0, MYF(MY_WME | MY_WAIT_IF_FULL)) ||
2789 DBUG_EVALUATE_IF("fault_injection_openning_index", 1, 0))
2790 {
2791 /*
2792 TODO: all operations creating/deleting the index file or a log, should
2793 call my_sync_dir() or my_sync_dir_by_file() to be durable.
2794 TODO: file creation should be done with mysql_file_create()
2795 not mysql_file_open().
2796 */
2797 if (index_file_nr >= 0)
2798 mysql_file_close(index_file_nr, MYF(0));
2799 error= true;
2800 goto end;
2801 }
2802
2803 #ifdef HAVE_REPLICATION
2804 /*
2805 Sync the index by purging any binary log file that is not registered.
2806 In other words, either purge binary log files that were removed from
2807 the index but not purged from the file system due to a crash or purge
2808 any binary log file that was created but not register in the index
2809 due to a crash.
2810 */
2811
2812 if (set_purge_index_file_name(index_file_name_arg) ||
2813 open_purge_index_file(FALSE) ||
2814 purge_index_entry(NULL, NULL, false) ||
2815 close_purge_index_file() ||
2816 DBUG_EVALUATE_IF("fault_injection_recovering_index", 1, 0))
2817 {
2818 sql_print_error("MYSQL_BIN_LOG::open_index_file failed to sync the index "
2819 "file.");
2820 error= TRUE;
2821 goto end;
2822 }
2823 #endif
2824 end:
2825 if (need_lock_index)
2826 mysql_mutex_unlock(&LOCK_index);
2827 return error;
2828 }
2829
2830
2831 /**
2832 Reads GTIDs from the given binlog file.
2833
2834 @param filename File to read from.
2835 @param all_gtids If not NULL, then the GTIDs from the
2836 Previous_gtids_log_event and from all Gtid_log_events are stored in
2837 this object.
2838 @param prev_gtids If not NULL, then the GTIDs from the
2839 Previous_gtids_log_events are stored in this object.
2840 @param first_gtid If not NULL, then the first GTID information from the
2841 file will be stored in this object.
2842 @param last_gtid If not NULL, then the last GTID information from the
2843 file will be stored in this object.
2844 @param sid_map The sid_map object to use in the rpl_sidno generation
2845 of the Gtid_log_event. If lock is needed in the sid_map, the caller
2846 must hold it.
2847 @param verify_checksum Set to true to verify event checksums.
2848
2849 @retval GOT_GTIDS The file was successfully read and it contains
2850 both Gtid_log_events and Previous_gtids_log_events.
2851 @retval GOT_PREVIOUS_GTIDS The file was successfully read and it
2852 contains Previous_gtids_log_events but no Gtid_log_events.
2853 @retval NO_GTIDS The file was successfully read and it does not
2854 contain GTID events.
2855 @retval ERROR Out of memory, or the file contains GTID events
2856 when GTID_MODE = OFF, or the file is malformed (e.g., contains
2857 Gtid_log_events but no Previous_gtids_log_event).
2858 @retval TRUNCATED The file was truncated before the end of the
2859 first Previous_gtids_log_event.
2860 */
2861 enum enum_read_gtids_from_binlog_status
2862 { GOT_GTIDS, GOT_PREVIOUS_GTIDS, NO_GTIDS, ERROR, TRUNCATED };
2863 static enum_read_gtids_from_binlog_status
read_gtids_from_binlog(const char * filename,Gtid_set * all_gtids,Gtid_set * prev_gtids,Gtid * first_gtid,Gtid * last_gtid,Sid_map * sid_map,bool verify_checksum)2864 read_gtids_from_binlog(const char *filename, Gtid_set *all_gtids,
2865 Gtid_set *prev_gtids, Gtid *first_gtid,
2866 Gtid *last_gtid,
2867 Sid_map* sid_map,
2868 bool verify_checksum)
2869 {
2870 DBUG_ENTER("read_gtids_from_binlog");
2871 DBUG_PRINT("info", ("Opening file %s", filename));
2872
2873 /*
2874 Create a Format_description_log_event that is used to read the
2875 first event of the log.
2876 */
2877 Format_description_log_event fd_ev(BINLOG_VERSION), *fd_ev_p= &fd_ev;
2878 if (!fd_ev.is_valid())
2879 DBUG_RETURN(ERROR);
2880
2881 File file;
2882 IO_CACHE log;
2883
2884 /*
2885 We assert here that both all_gtids and prev_gtids, if specified,
2886 uses the same sid_map as the one passed as a parameter. This is just
2887 to ensure that, if the sid_map needed some lock and was locked by
2888 the caller, the lock applies to all the GTID sets this function is
2889 dealing with.
2890 */
2891 #ifndef DBUG_OFF
2892 if (all_gtids)
2893 DBUG_ASSERT(all_gtids->get_sid_map() == sid_map);
2894 if (prev_gtids)
2895 DBUG_ASSERT(prev_gtids->get_sid_map() == sid_map);
2896 #endif
2897
2898 const char *errmsg= NULL;
2899 if ((file= open_binlog_file(&log, filename, &errmsg)) < 0)
2900 {
2901 sql_print_error("%s", errmsg);
2902 /*
2903 We need to revisit the recovery procedure for relay log
2904 files. Currently, it is called after this routine.
2905 /Alfranio
2906 */
2907 DBUG_RETURN(TRUNCATED);
2908 }
2909
2910 /*
2911 Seek for Previous_gtids_log_event and Gtid_log_event events to
2912 gather information what has been processed so far.
2913 */
2914 my_b_seek(&log, BIN_LOG_HEADER_SIZE);
2915 Log_event *ev= NULL;
2916 enum_read_gtids_from_binlog_status ret= NO_GTIDS;
2917 bool done= false;
2918 bool seen_first_gtid= false;
2919 while (!done &&
2920 (ev= Log_event::read_log_event(&log, 0, fd_ev_p, verify_checksum)) !=
2921 NULL)
2922 {
2923 DBUG_PRINT("info", ("Read event of type %s", ev->get_type_str()));
2924 switch (ev->get_type_code())
2925 {
2926 case FORMAT_DESCRIPTION_EVENT:
2927 if (fd_ev_p != &fd_ev)
2928 delete fd_ev_p;
2929 fd_ev_p= (Format_description_log_event *)ev;
2930 break;
2931 case ROTATE_EVENT:
2932 // do nothing; just accept this event and go to next
2933 break;
2934 case PREVIOUS_GTIDS_LOG_EVENT:
2935 {
2936 if (gtid_mode == 0)
2937 {
2938 my_error(ER_FOUND_GTID_EVENT_WHEN_GTID_MODE_IS_OFF, MYF(0));
2939 ret= ERROR;
2940 }
2941 ret= GOT_PREVIOUS_GTIDS;
2942 // add events to sets
2943 Previous_gtids_log_event *prev_gtids_ev=
2944 (Previous_gtids_log_event *)ev;
2945 if (all_gtids != NULL && prev_gtids_ev->add_to_set(all_gtids) != 0)
2946 ret= ERROR, done= true;
2947 else if (prev_gtids != NULL && prev_gtids_ev->add_to_set(prev_gtids) != 0)
2948 ret= ERROR, done= true;
2949 #ifndef DBUG_OFF
2950 char* prev_buffer= prev_gtids_ev->get_str(NULL, NULL);
2951 DBUG_PRINT("info", ("Got Previous_gtids from file '%s': Gtid_set='%s'.",
2952 filename, prev_buffer));
2953 my_free(prev_buffer);
2954 #endif
2955 break;
2956 }
2957 case GTID_LOG_EVENT:
2958 {
2959 DBUG_EXECUTE_IF("inject_fault_bug16502579", {
2960 DBUG_PRINT("debug", ("GTID_LOG_EVENT found. Injected ret=NO_GTIDS."));
2961 ret=NO_GTIDS;
2962 });
2963 if (ret != GOT_GTIDS)
2964 {
2965 if (ret != GOT_PREVIOUS_GTIDS)
2966 {
2967 /*
2968 Since this routine is run on startup, there may not be a
2969 THD instance. Therefore, ER(X) cannot be used.
2970 */
2971 const char* msg_fmt= (current_thd != NULL) ?
2972 ER(ER_BINLOG_LOGICAL_CORRUPTION) :
2973 ER_DEFAULT(ER_BINLOG_LOGICAL_CORRUPTION);
2974 my_printf_error(ER_BINLOG_LOGICAL_CORRUPTION,
2975 msg_fmt, MYF(0),
2976 filename,
2977 "The first global transaction identifier was read, but "
2978 "no other information regarding identifiers existing "
2979 "on the previous log files was found.");
2980 ret= ERROR, done= true;
2981 break;
2982 }
2983 else
2984 ret= GOT_GTIDS;
2985 }
2986 /*
2987 When all_gtids, first_gtid and last_gtid are all NULL,
2988 we just check if the binary log contains at least one Gtid_log_event,
2989 so that we can distinguish the return values GOT_GTID and
2990 GOT_PREVIOUS_GTIDS. We don't need to read anything else from the
2991 binary log.
2992 If all_gtids or last_gtid is requested (i.e., NOT NULL), we should
2993 continue to read all gtids.
2994 If just first_gtid was requested, we will be done after storing this
2995 Gtid_log_event info on it.
2996 */
2997 if (all_gtids == NULL && first_gtid == NULL && last_gtid == NULL)
2998 {
2999 ret= GOT_GTIDS, done= true;
3000 }
3001 else
3002 {
3003 Gtid_log_event *gtid_ev= (Gtid_log_event *)ev;
3004 rpl_sidno sidno= gtid_ev->get_sidno(sid_map);
3005 if (sidno < 0)
3006 ret= ERROR, done= true;
3007 else
3008 {
3009 if (all_gtids)
3010 {
3011 if (all_gtids->ensure_sidno(sidno) != RETURN_STATUS_OK)
3012 ret= ERROR, done= true;
3013 else if (all_gtids->_add_gtid(sidno, gtid_ev->get_gno()) !=
3014 RETURN_STATUS_OK)
3015 ret= ERROR, done= true;
3016 DBUG_PRINT("info", ("Got Gtid from file '%s': Gtid(%d, %lld).",
3017 filename, sidno, gtid_ev->get_gno()));
3018 }
3019
3020 /* If the first GTID was requested, stores it */
3021 if (first_gtid && !seen_first_gtid)
3022 {
3023 first_gtid->set(sidno, gtid_ev->get_gno());
3024 seen_first_gtid= true;
3025 /* If the first_gtid was the only thing requested, we are done */
3026 if (all_gtids == NULL && last_gtid == NULL)
3027 ret= GOT_GTIDS, done= true;
3028 }
3029
3030 if (last_gtid)
3031 last_gtid->set(sidno, gtid_ev->get_gno());
3032 }
3033 }
3034 break;
3035 }
3036 case ANONYMOUS_GTID_LOG_EVENT:
3037 default:
3038 // if we found any other event type without finding a
3039 // previous_gtids_log_event, then the rest of this binlog
3040 // cannot contain gtids
3041 if (ret != GOT_GTIDS && ret != GOT_PREVIOUS_GTIDS)
3042 done= true;
3043 break;
3044 }
3045 if (ev != fd_ev_p)
3046 delete ev;
3047 DBUG_PRINT("info", ("done=%d", done));
3048 }
3049
3050 if (log.error < 0)
3051 {
3052 // This is not a fatal error; the log may just be truncated.
3053
3054 // @todo but what other errors could happen? IO error?
3055 sql_print_warning("Error reading GTIDs from binary log: %d", log.error);
3056 }
3057
3058 if (fd_ev_p != &fd_ev)
3059 {
3060 delete fd_ev_p;
3061 fd_ev_p= &fd_ev;
3062 }
3063
3064 mysql_file_close(file, MYF(MY_WME));
3065 end_io_cache(&log);
3066
3067 DBUG_PRINT("info", ("returning %d", ret));
3068 DBUG_RETURN(ret);
3069 }
3070
find_first_log_not_in_gtid_set(char * binlog_file_name,const Gtid_set * gtid_set,Gtid * first_gtid,const char ** errmsg)3071 bool MYSQL_BIN_LOG::find_first_log_not_in_gtid_set(char *binlog_file_name,
3072 const Gtid_set *gtid_set,
3073 Gtid *first_gtid,
3074 const char **errmsg)
3075 {
3076 DBUG_ENTER("MYSQL_BIN_LOG::gtid_read_start_binlog");
3077 /*
3078 Gather the set of files to be accessed.
3079 */
3080 list<string> filename_list;
3081 LOG_INFO linfo;
3082 int error;
3083
3084 list<string>::reverse_iterator rit;
3085 Gtid_set previous_gtid_set(gtid_set->get_sid_map());
3086
3087 mysql_mutex_lock(&LOCK_index);
3088 for (error= find_log_pos(&linfo, NULL, false/*need_lock_index=false*/);
3089 !error; error= find_next_log(&linfo, false/*need_lock_index=false*/))
3090 {
3091 DBUG_PRINT("info", ("read log filename '%s'", linfo.log_file_name));
3092 filename_list.push_back(string(linfo.log_file_name));
3093 }
3094 mysql_mutex_unlock(&LOCK_index);
3095 if (error != LOG_INFO_EOF)
3096 {
3097 *errmsg= "Failed to read the binary log index file while "
3098 "looking for the oldest binary log that contains any GTID "
3099 "that is not in the given gtid set";
3100 error= -1;
3101 goto end;
3102 }
3103
3104 if (filename_list.empty())
3105 {
3106 *errmsg= "Could not find first log file name in binary log index file "
3107 "while looking for the oldest binary log that contains any GTID "
3108 "that is not in the given gtid set";
3109 error= -2;
3110 goto end;
3111 }
3112
3113 /*
3114 Iterate over all the binary logs in reverse order, and read only
3115 the Previous_gtids_log_event, to find the first one, that is the
3116 subset of the given gtid set. Since every binary log begins with
3117 a Previous_gtids_log_event, that contains all GTIDs in all
3118 previous binary logs.
3119 We also ask for the first GTID in the binary log to know if we
3120 should send the FD event with the "created" field cleared or not.
3121 */
3122 DBUG_PRINT("info", ("Iterating backwards through binary logs, and reading "
3123 "only the Previous_gtids_log_event, to find the first "
3124 "one, that is the subset of the given gtid set."));
3125 rit= filename_list.rbegin();
3126 error= 0;
3127 while (rit != filename_list.rend())
3128 {
3129 previous_gtid_set.clear();
3130 const char *filename= rit->c_str();
3131 DBUG_PRINT("info", ("Read Previous_gtids_log_event from filename='%s'",
3132 filename));
3133 switch (read_gtids_from_binlog(filename, NULL, &previous_gtid_set,
3134 first_gtid, NULL/* last_gtid */,
3135 previous_gtid_set.get_sid_map(),
3136 opt_master_verify_checksum))
3137 {
3138 case ERROR:
3139 *errmsg= "Error reading header of binary log while looking for "
3140 "the oldest binary log that contains any GTID that is not in "
3141 "the given gtid set";
3142 error= -3;
3143 goto end;
3144 case NO_GTIDS:
3145 *errmsg= "Found old binary log without GTIDs while looking for "
3146 "the oldest binary log that contains any GTID that is not in "
3147 "the given gtid set";
3148 error= -4;
3149 goto end;
3150 case GOT_GTIDS:
3151 case GOT_PREVIOUS_GTIDS:
3152 if (previous_gtid_set.is_subset(gtid_set))
3153 {
3154 strcpy(binlog_file_name, filename);
3155 /*
3156 Verify that the selected binlog is not the first binlog,
3157 */
3158 DBUG_EXECUTE_IF("slave_reconnect_with_gtid_set_executed",
3159 DBUG_ASSERT(strcmp(filename_list.begin()->c_str(),
3160 binlog_file_name) != 0););
3161 goto end;
3162 }
3163 case TRUNCATED:
3164 break;
3165 }
3166
3167 rit++;
3168 }
3169
3170 if (rit == filename_list.rend())
3171 {
3172 report_missing_gtids(&previous_gtid_set, gtid_set, errmsg);
3173 error= -5;
3174 }
3175
3176 end:
3177 if (error)
3178 DBUG_PRINT("error", ("'%s'", *errmsg));
3179 filename_list.clear();
3180 DBUG_PRINT("info", ("returning %d", error));
3181 DBUG_RETURN(error != 0 ? true : false);
3182 }
3183
init_gtid_sets(Gtid_set * all_gtids,Gtid_set * lost_gtids,Gtid * last_gtid,bool verify_checksum,bool need_lock,bool is_server_starting)3184 bool MYSQL_BIN_LOG::init_gtid_sets(Gtid_set *all_gtids, Gtid_set *lost_gtids,
3185 Gtid *last_gtid, bool verify_checksum,
3186 bool need_lock, bool is_server_starting)
3187 {
3188 DBUG_ENTER("MYSQL_BIN_LOG::init_gtid_sets");
3189 DBUG_PRINT("info", ("lost_gtids=%p; so we are recovering a %s log",
3190 lost_gtids, lost_gtids == NULL ? "relay" : "binary"));
3191
3192 /*
3193 Acquires the necessary locks to ensure that logs are not either
3194 removed or updated when we are reading from it.
3195 */
3196 if (need_lock)
3197 {
3198 // We don't need LOCK_log if we are only going to read the initial
3199 // Prevoius_gtids_log_event and ignore the Gtid_log_events.
3200 if (all_gtids != NULL)
3201 mysql_mutex_lock(&LOCK_log);
3202 mysql_mutex_lock(&LOCK_index);
3203 global_sid_lock->wrlock();
3204 }
3205 else
3206 {
3207 if (all_gtids != NULL)
3208 mysql_mutex_assert_owner(&LOCK_log);
3209 mysql_mutex_assert_owner(&LOCK_index);
3210 global_sid_lock->assert_some_wrlock();
3211 }
3212
3213 // Gather the set of files to be accessed.
3214 list<string> filename_list;
3215 LOG_INFO linfo;
3216 int error;
3217
3218 list<string>::iterator it;
3219 list<string>::reverse_iterator rit;
3220 bool reached_first_file= false;
3221
3222 /* Initialize the sid_map to be used in read_gtids_from_binlog */
3223 Sid_map *sid_map= NULL;
3224 if (all_gtids)
3225 sid_map= all_gtids->get_sid_map();
3226 else if (lost_gtids)
3227 sid_map= lost_gtids->get_sid_map();
3228
3229 for (error= find_log_pos(&linfo, NULL, false/*need_lock_index=false*/); !error;
3230 error= find_next_log(&linfo, false/*need_lock_index=false*/))
3231 {
3232 DBUG_PRINT("info", ("read log filename '%s'", linfo.log_file_name));
3233 filename_list.push_back(string(linfo.log_file_name));
3234 }
3235 if (error != LOG_INFO_EOF)
3236 {
3237 DBUG_PRINT("error", ("Error reading binlog index"));
3238 goto end;
3239 }
3240 /*
3241 On server starting, one new empty binlog file is created and
3242 its file name is put into index file before initializing
3243 GLOBAL.GTID_EXECUTED AND GLOBAL.GTID_PURGED, it is not the
3244 last binlog file before the server restarts, so we remove
3245 its file name from filename_list.
3246 */
3247 if (is_server_starting && !is_relay_log && !filename_list.empty())
3248 filename_list.pop_back();
3249
3250 error= 0;
3251
3252 if (all_gtids != NULL)
3253 {
3254 DBUG_PRINT("info", ("Iterating backwards through binary logs, looking for the last binary log that contains a Previous_gtids_log_event."));
3255 // Iterate over all files in reverse order until we find one that
3256 // contains a Previous_gtids_log_event.
3257 rit= filename_list.rbegin();
3258 bool got_gtids= false;
3259 reached_first_file= (rit == filename_list.rend());
3260 DBUG_PRINT("info", ("filename='%s' reached_first_file=%d",
3261 rit->c_str(), reached_first_file));
3262 while ((!got_gtids || (last_gtid && last_gtid->empty()))
3263 && !reached_first_file)
3264 {
3265 const char *filename= rit->c_str();
3266 rit++;
3267 reached_first_file= (rit == filename_list.rend());
3268 DBUG_PRINT("info", ("filename='%s' got_gtids=%d reached_first_file=%d",
3269 filename, got_gtids, reached_first_file));
3270 switch (read_gtids_from_binlog(filename, got_gtids ? NULL : all_gtids,
3271 reached_first_file ? lost_gtids : NULL,
3272 NULL/* first_gtid */, last_gtid,
3273 sid_map, verify_checksum))
3274 {
3275 case ERROR:
3276 {
3277 error= 1;
3278 goto end;
3279 }
3280 case GOT_GTIDS:
3281 case GOT_PREVIOUS_GTIDS:
3282 {
3283 got_gtids= true;
3284 break;
3285 }
3286 case NO_GTIDS:
3287 {
3288 /*
3289 If the binlog_gtid_simple_recovery is enabled, and the
3290 last binary log does not contain any GTID event, do not
3291 read any more binary logs, GLOBAL.GTID_EXECUTED and
3292 GLOBAL.GTID_PURGED should be empty in the case. Otherwise,
3293 initialize GTID_EXECUTED as usual.
3294 */
3295 if (binlog_gtid_simple_recovery && !is_relay_log)
3296 {
3297 DBUG_ASSERT(all_gtids->is_empty() && lost_gtids->is_empty());
3298 goto end;
3299 }
3300 /*FALLTHROUGH*/
3301 }
3302 case TRUNCATED:
3303 {
3304 break;
3305 }
3306 }
3307 }
3308 }
3309 if (lost_gtids != NULL && !reached_first_file)
3310 {
3311 DBUG_PRINT("info", ("Iterating forwards through binary logs, looking for the first binary log that contains a Previous_gtids_log_event."));
3312 for (it= filename_list.begin(); it != filename_list.end(); it++)
3313 {
3314 const char *filename= it->c_str();
3315 DBUG_PRINT("info", ("filename='%s'", filename));
3316 switch (read_gtids_from_binlog(filename, NULL, lost_gtids,
3317 NULL/* first_gtid */, NULL/* last_gtid */,
3318 sid_map, verify_checksum))
3319 {
3320 case ERROR:
3321 {
3322 error= 1;
3323 /*FALLTHROUGH*/
3324 }
3325 case GOT_GTIDS:
3326 {
3327 goto end;
3328 }
3329 case NO_GTIDS:
3330 {
3331 /*
3332 If the binlog_gtid_simple_recovery is enabled, and the
3333 first binary log does not contain any GTID event, do not
3334 read any more binary logs, GLOBAL.GTID_PURGED should be
3335 empty in the case.
3336 */
3337 if (binlog_gtid_simple_recovery && !is_relay_log)
3338 {
3339 DBUG_ASSERT(lost_gtids->is_empty());
3340 goto end;
3341 }
3342 /*FALLTHROUGH*/
3343 }
3344 case GOT_PREVIOUS_GTIDS:
3345 case TRUNCATED:
3346 {
3347 break;
3348 }
3349 }
3350 }
3351 }
3352 end:
3353 if (all_gtids)
3354 all_gtids->dbug_print("all_gtids");
3355 if (lost_gtids)
3356 lost_gtids->dbug_print("lost_gtids");
3357 if (need_lock)
3358 {
3359 global_sid_lock->unlock();
3360 mysql_mutex_unlock(&LOCK_index);
3361 if (all_gtids != NULL)
3362 mysql_mutex_unlock(&LOCK_log);
3363 }
3364 filename_list.clear();
3365 DBUG_PRINT("info", ("returning %d", error));
3366 DBUG_RETURN(error != 0 ? true : false);
3367 }
3368
3369
3370 /**
3371 Open a (new) binlog file.
3372
3373 - Open the log file and the index file. Register the new
3374 file name in it
3375 - When calling this when the file is in use, you must have a locks
3376 on LOCK_log and LOCK_index.
3377
3378 @retval
3379 0 ok
3380 @retval
3381 1 error
3382 */
3383
open_binlog(const char * log_name,const char * new_name,enum cache_type io_cache_type_arg,ulong max_size_arg,bool null_created_arg,bool need_lock_log,bool need_lock_index,bool need_sid_lock,Format_description_log_event * extra_description_event)3384 bool MYSQL_BIN_LOG::open_binlog(const char *log_name,
3385 const char *new_name,
3386 enum cache_type io_cache_type_arg,
3387 ulong max_size_arg,
3388 bool null_created_arg,
3389 bool need_lock_log,
3390 bool need_lock_index,
3391 bool need_sid_lock,
3392 Format_description_log_event *extra_description_event)
3393 {
3394
3395 // lock_index must be acquired *before* sid_lock.
3396 DBUG_ASSERT(need_sid_lock || !need_lock_index);
3397 DBUG_ENTER("MYSQL_BIN_LOG::open_binlog(const char *, ...)");
3398 DBUG_PRINT("enter",("name: %s", log_name));
3399
3400 if (init_and_set_log_file_name(log_name, new_name, LOG_BIN,
3401 io_cache_type_arg))
3402 {
3403 sql_print_error("MYSQL_BIN_LOG::open failed to generate new file name.");
3404 DBUG_RETURN(1);
3405 }
3406
3407 #ifdef HAVE_REPLICATION
3408 if (open_purge_index_file(TRUE) ||
3409 register_create_index_entry(log_file_name) ||
3410 sync_purge_index_file() ||
3411 DBUG_EVALUATE_IF("fault_injection_registering_index", 1, 0))
3412 {
3413 /**
3414 @todo: although this was introduced to appease valgrind
3415 when injecting emulated faults using fault_injection_registering_index
3416 it may be good to consider what actually happens when
3417 open_purge_index_file succeeds but register or sync fails.
3418
3419 Perhaps we might need the code below in MYSQL_LOG_BIN::cleanup
3420 for "real life" purposes as well?
3421 */
3422 DBUG_EXECUTE_IF("fault_injection_registering_index", {
3423 if (my_b_inited(&purge_index_file))
3424 {
3425 end_io_cache(&purge_index_file);
3426 my_close(purge_index_file.file, MYF(0));
3427 }
3428 });
3429
3430 sql_print_error("MYSQL_BIN_LOG::open failed to sync the index file.");
3431 DBUG_RETURN(1);
3432 }
3433 DBUG_EXECUTE_IF("crash_create_non_critical_before_update_index", DBUG_SUICIDE(););
3434 #endif
3435
3436 write_error= 0;
3437
3438 /* open the main log file */
3439 if (MYSQL_LOG::open(
3440 #ifdef HAVE_PSI_INTERFACE
3441 m_key_file_log,
3442 #endif
3443 log_name, LOG_BIN, new_name, io_cache_type_arg))
3444 {
3445 #ifdef HAVE_REPLICATION
3446 close_purge_index_file();
3447 #endif
3448 DBUG_RETURN(1); /* all warnings issued */
3449 }
3450
3451 max_size= max_size_arg;
3452
3453 open_count++;
3454
3455 bool write_file_name_to_index_file=0;
3456
3457 /* This must be before goto err. */
3458 Format_description_log_event s(BINLOG_VERSION);
3459
3460 if (!my_b_filelength(&log_file))
3461 {
3462 /*
3463 The binary log file was empty (probably newly created)
3464 This is the normal case and happens when the user doesn't specify
3465 an extension for the binary log files.
3466 In this case we write a standard header to it.
3467 */
3468 if (my_b_safe_write(&log_file, (uchar*) BINLOG_MAGIC,
3469 BIN_LOG_HEADER_SIZE))
3470 goto err;
3471 bytes_written+= BIN_LOG_HEADER_SIZE;
3472 write_file_name_to_index_file= 1;
3473 }
3474
3475 /*
3476 don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
3477 as we won't be able to reset it later
3478 */
3479 if (io_cache_type == WRITE_CACHE)
3480 s.flags |= LOG_EVENT_BINLOG_IN_USE_F;
3481 s.checksum_alg= is_relay_log ?
3482 /* relay-log */
3483 /* inherit master's A descriptor if one has been received */
3484 (relay_log_checksum_alg=
3485 (relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF) ?
3486 relay_log_checksum_alg :
3487 /* otherwise use slave's local preference of RL events verification */
3488 (opt_slave_sql_verify_checksum == 0) ?
3489 (uint8) BINLOG_CHECKSUM_ALG_OFF : binlog_checksum_options):
3490 /* binlog */
3491 binlog_checksum_options;
3492 DBUG_ASSERT(s.checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
3493 if (!s.is_valid())
3494 goto err;
3495 s.dont_set_created= null_created_arg;
3496 /* Set LOG_EVENT_RELAY_LOG_F flag for relay log's FD */
3497 if (is_relay_log)
3498 s.set_relay_log_event();
3499 if (s.write(&log_file))
3500 goto err;
3501 bytes_written+= s.data_written;
3502 /*
3503 We need to revisit this code and improve it.
3504 See further comments in the mysqld.
3505 /Alfranio
3506 */
3507 if (current_thd && gtid_mode > 0)
3508 {
3509 if (need_sid_lock)
3510 global_sid_lock->wrlock();
3511 else
3512 global_sid_lock->assert_some_wrlock();
3513 Previous_gtids_log_event prev_gtids_ev(previous_gtid_set);
3514 if (is_relay_log)
3515 prev_gtids_ev.set_relay_log_event();
3516 if (need_sid_lock)
3517 global_sid_lock->unlock();
3518 prev_gtids_ev.checksum_alg= s.checksum_alg;
3519 if (prev_gtids_ev.write(&log_file))
3520 goto err;
3521 bytes_written+= prev_gtids_ev.data_written;
3522 }
3523 if (extra_description_event &&
3524 extra_description_event->binlog_version>=4)
3525 {
3526 /*
3527 This is a relay log written to by the I/O slave thread.
3528 Write the event so that others can later know the format of this relay
3529 log.
3530 Note that this event is very close to the original event from the
3531 master (it has binlog version of the master, event types of the
3532 master), so this is suitable to parse the next relay log's event. It
3533 has been produced by
3534 Format_description_log_event::Format_description_log_event(char* buf,).
3535 Why don't we want to write the mi_description_event if this
3536 event is for format<4 (3.23 or 4.x): this is because in that case, the
3537 mi_description_event describes the data received from the
3538 master, but not the data written to the relay log (*conversion*),
3539 which is in format 4 (slave's).
3540 */
3541 /*
3542 Set 'created' to 0, so that in next relay logs this event does not
3543 trigger cleaning actions on the slave in
3544 Format_description_log_event::apply_event_impl().
3545 */
3546 extra_description_event->created= 0;
3547 /* Don't set log_pos in event header */
3548 extra_description_event->set_artificial_event();
3549
3550 if (extra_description_event->write(&log_file))
3551 goto err;
3552 bytes_written+= extra_description_event->data_written;
3553 }
3554 if (flush_io_cache(&log_file) ||
3555 mysql_file_sync(log_file.file, MYF(MY_WME)))
3556 goto err;
3557
3558 if (write_file_name_to_index_file)
3559 {
3560 #ifdef HAVE_REPLICATION
3561 DBUG_EXECUTE_IF("crash_create_critical_before_update_index", DBUG_SUICIDE(););
3562 #endif
3563
3564 DBUG_ASSERT(my_b_inited(&index_file) != 0);
3565
3566 /*
3567 The new log file name is appended into crash safe index file after
3568 all the content of index file is copyed into the crash safe index
3569 file. Then move the crash safe index file to index file.
3570 */
3571 DBUG_EXECUTE_IF("simulate_disk_full_on_open_binlog",
3572 {DBUG_SET("+d,simulate_no_free_space_error");});
3573 if (DBUG_EVALUATE_IF("fault_injection_updating_index", 1, 0) ||
3574 add_log_to_index((uchar*) log_file_name, strlen(log_file_name),
3575 need_lock_index))
3576 {
3577 DBUG_EXECUTE_IF("simulate_disk_full_on_open_binlog",
3578 {
3579 DBUG_SET("-d,simulate_file_write_error");
3580 DBUG_SET("-d,simulate_no_free_space_error");
3581 DBUG_SET("-d,simulate_disk_full_on_open_binlog");
3582 });
3583 goto err;
3584 }
3585
3586 #ifdef HAVE_REPLICATION
3587 DBUG_EXECUTE_IF("crash_create_after_update_index", DBUG_SUICIDE(););
3588 #endif
3589 }
3590
3591 log_state= LOG_OPENED;
3592
3593 #ifdef HAVE_REPLICATION
3594 close_purge_index_file();
3595 #endif
3596
3597 DBUG_RETURN(0);
3598
3599 err:
3600 #ifdef HAVE_REPLICATION
3601 if (is_inited_purge_index_file())
3602 purge_index_entry(NULL, NULL, need_lock_index);
3603 close_purge_index_file();
3604 #endif
3605
3606 if (binlog_error_action == ABORT_SERVER)
3607 {
3608 exec_binlog_error_action_abort("Either disk is full or file system is read "
3609 "only while opening the binlog. Aborting the"
3610 " server.");
3611 }
3612 else
3613 {
3614 sql_print_error("Could not use %s for logging (error %d). "
3615 "Turning logging off for the whole duration of the MySQL "
3616 "server process. To turn it on again: fix the cause, "
3617 "shutdown the MySQL server and restart it.",
3618 (new_name) ? new_name : name, errno);
3619 close(LOG_CLOSE_INDEX, need_lock_log, need_lock_index);
3620 }
3621 DBUG_RETURN(1);
3622 }
3623
3624
3625 /**
3626 Move crash safe index file to index file.
3627
3628 @param need_lock_index If true, LOCK_index will be acquired;
3629 otherwise it should already be held.
3630
3631 @retval 0 ok
3632 @retval -1 error
3633 */
move_crash_safe_index_file_to_index_file(bool need_lock_index)3634 int MYSQL_BIN_LOG::move_crash_safe_index_file_to_index_file(bool need_lock_index)
3635 {
3636 int error= 0;
3637 File fd= -1;
3638 DBUG_ENTER("MYSQL_BIN_LOG::move_crash_safe_index_file_to_index_file");
3639 int failure_trials= MYSQL_BIN_LOG::MAX_RETRIES_FOR_DELETE_RENAME_FAILURE;
3640 bool file_rename_status= false, file_delete_status= false;
3641 THD *thd= current_thd;
3642
3643 if (need_lock_index)
3644 mysql_mutex_lock(&LOCK_index);
3645 else
3646 mysql_mutex_assert_owner(&LOCK_index);
3647
3648 if (my_b_inited(&index_file))
3649 {
3650 end_io_cache(&index_file);
3651 if (mysql_file_close(index_file.file, MYF(0)) < 0)
3652 {
3653 error= -1;
3654 sql_print_error("While rebuilding index file %s: "
3655 "Failed to close the index file.", index_file_name);
3656 /*
3657 Delete Crash safe index file here and recover the binlog.index
3658 state(index_file io_cache) from old binlog.index content.
3659 */
3660 mysql_file_delete(key_file_binlog_index, crash_safe_index_file_name,
3661 MYF(0));
3662
3663 goto recoverable_err;
3664 }
3665
3666 /*
3667 Sometimes an outsider can lock index files for temporary viewing
3668 purpose. For eg: MEB locks binlog.index/relaylog.index to view
3669 the content of the file. During that small period of time, deletion
3670 of the file is not possible on some platforms(Eg: Windows)
3671 Server should retry the delete operation for few times instead of panicking
3672 immediately.
3673 */
3674 while ((file_delete_status == false) && (failure_trials > 0))
3675 {
3676 if (DBUG_EVALUATE_IF("force_index_file_delete_failure", 1, 0)) break;
3677
3678 DBUG_EXECUTE_IF("simulate_index_file_delete_failure",
3679 {
3680 /* This simulation causes the delete to fail */
3681 static char first_char= index_file_name[0];
3682 index_file_name[0]= 0;
3683 sql_print_information("Retrying delete");
3684 if (failure_trials == 1)
3685 index_file_name[0]= first_char;
3686 };);
3687 file_delete_status = !(mysql_file_delete(key_file_binlog_index,
3688 index_file_name, MYF(MY_WME)));
3689 --failure_trials;
3690 if (!file_delete_status)
3691 {
3692 my_sleep(1000);
3693 /* Clear the error before retrying. */
3694 if (failure_trials > 0)
3695 thd->clear_error();
3696 }
3697 }
3698
3699 if (!file_delete_status)
3700 {
3701 error= -1;
3702 sql_print_error("While rebuilding index file %s: "
3703 "Failed to delete the existing index file. It could be "
3704 "that file is being used by some other process.",
3705 index_file_name);
3706 /*
3707 Delete Crash safe file index file here and recover the binlog.index
3708 state(index_file io_cache) from old binlog.index content.
3709 */
3710 mysql_file_delete(key_file_binlog_index, crash_safe_index_file_name,
3711 MYF(0));
3712
3713 goto recoverable_err;
3714 }
3715 }
3716
3717 DBUG_EXECUTE_IF("crash_create_before_rename_index_file", DBUG_SUICIDE(););
3718 /*
3719 Sometimes an outsider can lock index files for temporary viewing
3720 purpose. For eg: MEB locks binlog.index/relaylog.index to view
3721 the content of the file. During that small period of time, rename
3722 of the file is not possible on some platforms(Eg: Windows)
3723 Server should retry the rename operation for few times instead of panicking
3724 immediately.
3725 */
3726 failure_trials = MYSQL_BIN_LOG::MAX_RETRIES_FOR_DELETE_RENAME_FAILURE;
3727 while ((file_rename_status == false) && (failure_trials > 0))
3728 {
3729 DBUG_EXECUTE_IF("simulate_crash_safe_index_file_rename_failure",
3730 {
3731 /* This simulation causes the rename to fail */
3732 static char first_char= index_file_name[0];
3733 index_file_name[0]= 0;
3734 sql_print_information("Retrying rename");
3735 if (failure_trials == 1)
3736 index_file_name[0]= first_char;
3737 };);
3738 file_rename_status =
3739 !(my_rename(crash_safe_index_file_name, index_file_name, MYF(MY_WME)));
3740 --failure_trials;
3741 if (!file_rename_status)
3742 {
3743 my_sleep(1000);
3744 /* Clear the error before retrying. */
3745 if (failure_trials > 0)
3746 thd->clear_error();
3747 }
3748 }
3749 if (!file_rename_status)
3750 {
3751 error= -1;
3752 sql_print_error("While rebuilding index file %s: "
3753 "Failed to rename the new index file to the existing "
3754 "index file.", index_file_name);
3755 goto fatal_err;
3756 }
3757 DBUG_EXECUTE_IF("crash_create_after_rename_index_file", DBUG_SUICIDE(););
3758
3759 recoverable_err:
3760 if ((fd= mysql_file_open(key_file_binlog_index,
3761 index_file_name,
3762 O_RDWR | O_CREAT | O_BINARY,
3763 MYF(MY_WME))) < 0 ||
3764 mysql_file_sync(fd, MYF(MY_WME)) ||
3765 init_io_cache(&index_file, fd, IO_SIZE, READ_CACHE,
3766 mysql_file_seek(fd, 0L, MY_SEEK_END, MYF(0)),
3767 0, MYF(MY_WME | MY_WAIT_IF_FULL)))
3768 {
3769 sql_print_error("After rebuilding the index file %s: "
3770 "Failed to open the index file.", index_file_name);
3771 goto fatal_err;
3772 }
3773
3774 if (need_lock_index)
3775 mysql_mutex_unlock(&LOCK_index);
3776 DBUG_RETURN(error);
3777
3778 fatal_err:
3779 /*
3780 This situation is very very rare to happen (unless there is some serious
3781 memory related issues like OOM) and should be treated as fatal error.
3782 Hence it is better to bring down the server without respecting
3783 'binlog_error_action' value here.
3784 */
3785 exec_binlog_error_action_abort("MySQL server failed to update the "
3786 "binlog.index file's content properly. "
3787 "It might not be in sync with available "
3788 "binlogs and the binlog.index file state is in "
3789 "unrecoverable state. Aborting the server.");
3790 /*
3791 Server is aborted in the above function.
3792 This is dead code to make compiler happy.
3793 */
3794 DBUG_RETURN(error);
3795 }
3796
3797
3798 /**
3799 Append log file name to index file.
3800
3801 - To make crash safe, we copy all the content of index file
3802 to crash safe index file firstly and then append the log
3803 file name to the crash safe index file. Finally move the
3804 crash safe index file to index file.
3805
3806 @retval
3807 0 ok
3808 @retval
3809 -1 error
3810 */
add_log_to_index(uchar * log_name,int log_name_len,bool need_lock_index)3811 int MYSQL_BIN_LOG::add_log_to_index(uchar* log_name,
3812 int log_name_len, bool need_lock_index)
3813 {
3814 DBUG_ENTER("MYSQL_BIN_LOG::add_log_to_index");
3815
3816 if (open_crash_safe_index_file())
3817 {
3818 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
3819 "open the crash safe index file.");
3820 goto err;
3821 }
3822
3823 if (copy_file(&index_file, &crash_safe_index_file, 0))
3824 {
3825 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
3826 "copy index file to crash safe index file.");
3827 goto err;
3828 }
3829
3830 if (my_b_write(&crash_safe_index_file, log_name, log_name_len) ||
3831 my_b_write(&crash_safe_index_file, (uchar*) "\n", 1) ||
3832 flush_io_cache(&crash_safe_index_file) ||
3833 mysql_file_sync(crash_safe_index_file.file, MYF(MY_WME)))
3834 {
3835 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
3836 "append log file name: %s, to crash "
3837 "safe index file.", log_name);
3838 goto err;
3839 }
3840
3841 if (close_crash_safe_index_file())
3842 {
3843 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
3844 "close the crash safe index file.");
3845 goto err;
3846 }
3847
3848 if (move_crash_safe_index_file_to_index_file(need_lock_index))
3849 {
3850 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
3851 "move crash safe index file to index file.");
3852 goto err;
3853 }
3854
3855 DBUG_RETURN(0);
3856
3857 err:
3858 DBUG_RETURN(-1);
3859 }
3860
get_current_log(LOG_INFO * linfo,bool need_lock_log)3861 int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo, bool need_lock_log/*true*/)
3862 {
3863 if (need_lock_log)
3864 mysql_mutex_lock(&LOCK_log);
3865 int ret = raw_get_current_log(linfo);
3866 if (need_lock_log)
3867 mysql_mutex_unlock(&LOCK_log);
3868 return ret;
3869 }
3870
raw_get_current_log(LOG_INFO * linfo)3871 int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
3872 {
3873 strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1);
3874 linfo->pos = my_b_safe_tell(&log_file);
3875 return 0;
3876 }
3877
check_write_error_code(uint error_code)3878 static bool check_write_error_code(uint error_code)
3879 {
3880 return error_code == ER_TRANS_CACHE_FULL ||
3881 error_code == ER_STMT_CACHE_FULL ||
3882 error_code == ER_ERROR_ON_WRITE ||
3883 error_code == ER_BINLOG_LOGGING_IMPOSSIBLE;
3884 }
3885
check_write_error(THD * thd)3886 bool MYSQL_BIN_LOG::check_write_error(THD *thd)
3887 {
3888 DBUG_ENTER("MYSQL_BIN_LOG::check_write_error");
3889
3890 if (!thd->is_error())
3891 DBUG_RETURN(false);
3892
3893 bool checked= check_write_error_code(thd->get_stmt_da()->sql_errno());
3894
3895 if (!checked)
3896 {
3897 /* Check all conditions for one that matches the expected error */
3898 const Sql_condition *err;
3899 Diagnostics_area::Sql_condition_iterator it=
3900 thd->get_stmt_da()->sql_conditions();
3901 while ((err= it++) != NULL && !checked)
3902 {
3903 checked= check_write_error_code(err->get_sql_errno());
3904 }
3905 }
3906 DBUG_PRINT("return", ("checked: %s", YESNO(checked)));
3907 DBUG_RETURN(checked);
3908 }
3909
set_write_error(THD * thd,bool is_transactional)3910 void MYSQL_BIN_LOG::set_write_error(THD *thd, bool is_transactional)
3911 {
3912 DBUG_ENTER("MYSQL_BIN_LOG::set_write_error");
3913
3914 write_error= 1;
3915
3916 if (check_write_error(thd))
3917 DBUG_VOID_RETURN;
3918
3919 if (my_errno == EFBIG)
3920 {
3921 if (is_transactional)
3922 {
3923 my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(MY_WME));
3924 }
3925 else
3926 {
3927 my_message(ER_STMT_CACHE_FULL, ER(ER_STMT_CACHE_FULL), MYF(MY_WME));
3928 }
3929 }
3930 else
3931 {
3932 char errbuf[MYSYS_STRERROR_SIZE];
3933 my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), name,
3934 errno, my_strerror(errbuf, sizeof(errbuf), errno));
3935 }
3936
3937 DBUG_VOID_RETURN;
3938 }
3939
3940 /**
3941 Find the position in the log-index-file for the given log name.
3942
3943 @param[out] linfo The found log file name will be stored here, along
3944 with the byte offset of the next log file name in the index file.
3945 @param log_name Filename to find in the index file, or NULL if we
3946 want to read the first entry.
3947 @param need_lock_index If false, this function acquires LOCK_index;
3948 otherwise the lock should already be held by the caller.
3949
3950 @note
3951 On systems without the truncate function the file will end with one or
3952 more empty lines. These will be ignored when reading the file.
3953
3954 @retval
3955 0 ok
3956 @retval
3957 LOG_INFO_EOF End of log-index-file found
3958 @retval
3959 LOG_INFO_IO Got IO error while reading file
3960 */
3961
find_log_pos(LOG_INFO * linfo,const char * log_name,bool need_lock_index)3962 int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
3963 bool need_lock_index)
3964 {
3965 int error= 0;
3966 char *full_fname= linfo->log_file_name;
3967 char full_log_name[FN_REFLEN], fname[FN_REFLEN];
3968 uint log_name_len= 0, fname_len= 0;
3969 DBUG_ENTER("find_log_pos");
3970 full_log_name[0]= full_fname[0]= 0;
3971
3972 /*
3973 Mutex needed because we need to make sure the file pointer does not
3974 move from under our feet
3975 */
3976 if (need_lock_index)
3977 mysql_mutex_lock(&LOCK_index);
3978 else
3979 mysql_mutex_assert_owner(&LOCK_index);
3980
3981 if (!my_b_inited(&index_file))
3982 {
3983 error= LOG_INFO_IO;
3984 goto end;
3985 }
3986
3987 // extend relative paths for log_name to be searched
3988 if (log_name)
3989 {
3990 if(normalize_binlog_name(full_log_name, log_name, is_relay_log))
3991 {
3992 error= LOG_INFO_EOF;
3993 goto end;
3994 }
3995 }
3996
3997 log_name_len= log_name ? (uint) strlen(full_log_name) : 0;
3998 DBUG_PRINT("enter", ("log_name: %s, full_log_name: %s",
3999 log_name ? log_name : "NULL", full_log_name));
4000
4001 /* As the file is flushed, we can't get an error here */
4002 my_b_seek(&index_file, (my_off_t) 0);
4003
4004 for (;;)
4005 {
4006 uint length;
4007 my_off_t offset= my_b_tell(&index_file);
4008
4009 DBUG_EXECUTE_IF("simulate_find_log_pos_error",
4010 error= LOG_INFO_EOF; break;);
4011 /* If we get 0 or 1 characters, this is the end of the file */
4012 if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
4013 {
4014 /* Did not find the given entry; Return not found or error */
4015 error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
4016 break;
4017 }
4018
4019 // extend relative paths and match against full path
4020 if (normalize_binlog_name(full_fname, fname, is_relay_log))
4021 {
4022 error= LOG_INFO_EOF;
4023 break;
4024 }
4025 fname_len= (uint) strlen(full_fname);
4026
4027 // if the log entry matches, null string matching anything
4028 if (!log_name ||
4029 (log_name_len == fname_len-1 && full_fname[log_name_len] == '\n' &&
4030 !strncmp(full_fname, full_log_name, log_name_len)))
4031 {
4032 DBUG_PRINT("info", ("Found log file entry"));
4033 full_fname[fname_len-1]= 0; // remove last \n
4034 linfo->index_file_start_offset= offset;
4035 linfo->index_file_offset = my_b_tell(&index_file);
4036 break;
4037 }
4038 linfo->entry_index++;
4039 }
4040
4041 end:
4042 if (need_lock_index)
4043 mysql_mutex_unlock(&LOCK_index);
4044 DBUG_RETURN(error);
4045 }
4046
4047
4048 /**
4049 Find the position in the log-index-file for the given log name.
4050
4051 @param[out] linfo The filename will be stored here, along with the
4052 byte offset of the next filename in the index file.
4053
4054 @param need_lock_index If true, LOCK_index will be acquired;
4055 otherwise it should already be held by the caller.
4056
4057 @note
4058 - Before calling this function, one has to call find_log_pos()
4059 to set up 'linfo'
4060 - Mutex needed because we need to make sure the file pointer does not move
4061 from under our feet
4062
4063 @retval 0 ok
4064 @retval LOG_INFO_EOF End of log-index-file found
4065 @retval LOG_INFO_IO Got IO error while reading file
4066 */
find_next_log(LOG_INFO * linfo,bool need_lock_index)4067 int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock_index)
4068 {
4069 int error= 0;
4070 uint length;
4071 char fname[FN_REFLEN];
4072 char *full_fname= linfo->log_file_name;
4073
4074 if (need_lock_index)
4075 mysql_mutex_lock(&LOCK_index);
4076 else
4077 mysql_mutex_assert_owner(&LOCK_index);
4078
4079 if (!my_b_inited(&index_file))
4080 {
4081 error= LOG_INFO_IO;
4082 goto err;
4083 }
4084 /* As the file is flushed, we can't get an error here */
4085 my_b_seek(&index_file, linfo->index_file_offset);
4086
4087 linfo->index_file_start_offset= linfo->index_file_offset;
4088 if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
4089 {
4090 error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
4091 goto err;
4092 }
4093
4094 if (fname[0] != 0)
4095 {
4096 if(normalize_binlog_name(full_fname, fname, is_relay_log))
4097 {
4098 error= LOG_INFO_EOF;
4099 goto err;
4100 }
4101 length= strlen(full_fname);
4102 }
4103
4104 full_fname[length-1]= 0; // kill \n
4105 linfo->index_file_offset= my_b_tell(&index_file);
4106
4107 err:
4108 if (need_lock_index)
4109 mysql_mutex_unlock(&LOCK_index);
4110 return error;
4111 }
4112
4113
4114 /**
4115 Removes files, as part of a RESET MASTER or RESET SLAVE statement,
4116 by deleting all logs refered to in the index file. Then, it starts
4117 writing to a new log file.
4118
4119 The new index file will only contain this file.
4120
4121 @param thd Thread
4122
4123 @note
4124 If not called from slave thread, write start event to new log
4125
4126 @retval
4127 0 ok
4128 @retval
4129 1 error
4130 */
reset_logs(THD * thd)4131 bool MYSQL_BIN_LOG::reset_logs(THD* thd)
4132 {
4133 LOG_INFO linfo;
4134 bool error=0;
4135 int err;
4136 const char* save_name;
4137 DBUG_ENTER("reset_logs");
4138
4139 /*
4140 Flush logs for storage engines, so that the last transaction
4141 is fsynced inside storage engines.
4142 */
4143 if (ha_flush_logs(NULL))
4144 DBUG_RETURN(1);
4145
4146 ha_reset_logs(thd);
4147
4148 /*
4149 We need to get both locks to be sure that no one is trying to
4150 write to the index log file.
4151 */
4152 mysql_mutex_lock(&LOCK_log);
4153 mysql_mutex_lock(&LOCK_index);
4154
4155 /*
4156 The following mutex is needed to ensure that no threads call
4157 'delete thd' as we would then risk missing a 'rollback' from this
4158 thread. If the transaction involved MyISAM tables, it should go
4159 into binlog even on rollback.
4160 */
4161 mysql_mutex_lock(&LOCK_thread_count);
4162
4163 global_sid_lock->wrlock();
4164
4165 /* Save variables so that we can reopen the log */
4166 save_name=name;
4167 name=0; // Protect against free
4168 close(LOG_CLOSE_TO_BE_OPENED, false/*need_lock_log=false*/,
4169 false/*need_lock_index=false*/);
4170
4171 /*
4172 First delete all old log files and then update the index file.
4173 As we first delete the log files and do not use sort of logging,
4174 a crash may lead to an inconsistent state where the index has
4175 references to non-existent files.
4176
4177 We need to invert the steps and use the purge_index_file methods
4178 in order to make the operation safe.
4179 */
4180
4181 if ((err= find_log_pos(&linfo, NullS, false/*need_lock_index=false*/)) != 0)
4182 {
4183 uint errcode= purge_log_get_error_code(err);
4184 sql_print_error("Failed to locate old binlog or relay log files");
4185 my_message(errcode, ER(errcode), MYF(0));
4186 error= 1;
4187 goto err;
4188 }
4189
4190 for (;;)
4191 {
4192 if ((error= my_delete_allow_opened(linfo.log_file_name, MYF(0))) != 0)
4193 {
4194 if (my_errno == ENOENT)
4195 {
4196 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
4197 ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
4198 linfo.log_file_name);
4199 sql_print_information("Failed to delete file '%s'",
4200 linfo.log_file_name);
4201 my_errno= 0;
4202 error= 0;
4203 }
4204 else
4205 {
4206 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
4207 ER_BINLOG_PURGE_FATAL_ERR,
4208 "a problem with deleting %s; "
4209 "consider examining correspondence "
4210 "of your binlog index file "
4211 "to the actual binlog files",
4212 linfo.log_file_name);
4213 error= 1;
4214 goto err;
4215 }
4216 }
4217 if (find_next_log(&linfo, false/*need_lock_index=false*/))
4218 break;
4219 }
4220
4221 /* Start logging with a new file */
4222 close(LOG_CLOSE_INDEX | LOG_CLOSE_TO_BE_OPENED,
4223 false/*need_lock_log=false*/,
4224 false/*need_lock_index=false*/);
4225 if ((error= my_delete_allow_opened(index_file_name, MYF(0)))) // Reset (open will update)
4226 {
4227 if (my_errno == ENOENT)
4228 {
4229 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
4230 ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
4231 index_file_name);
4232 sql_print_information("Failed to delete file '%s'",
4233 index_file_name);
4234 my_errno= 0;
4235 error= 0;
4236 }
4237 else
4238 {
4239 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
4240 ER_BINLOG_PURGE_FATAL_ERR,
4241 "a problem with deleting %s; "
4242 "consider examining correspondence "
4243 "of your binlog index file "
4244 "to the actual binlog files",
4245 index_file_name);
4246 error= 1;
4247 goto err;
4248 }
4249 }
4250
4251 #ifdef HAVE_REPLICATION
4252 if (is_relay_log)
4253 {
4254 DBUG_ASSERT(active_mi != NULL);
4255 DBUG_ASSERT(active_mi->rli != NULL);
4256 (const_cast<Gtid_set *>(active_mi->rli->get_gtid_set()))->clear();
4257 }
4258 else
4259 {
4260 gtid_state->clear();
4261 // don't clear global_sid_map because it's used by the relay log too
4262 if (gtid_state->init() != 0)
4263 goto err;
4264 }
4265 #endif
4266
4267 if (!open_index_file(index_file_name, 0, false/*need_lock_index=false*/))
4268 if ((error= open_binlog(save_name, 0, io_cache_type,
4269 max_size, false,
4270 false/*need_lock_log=false*/,
4271 false/*need_lock_index=false*/,
4272 false/*need_sid_lock=false*/,
4273 NULL)))
4274 goto err;
4275 my_free((void *) save_name);
4276
4277 err:
4278 if (error == 1)
4279 name= const_cast<char*>(save_name);
4280 global_sid_lock->unlock();
4281 mysql_mutex_unlock(&LOCK_thread_count);
4282 mysql_mutex_unlock(&LOCK_index);
4283 mysql_mutex_unlock(&LOCK_log);
4284 DBUG_RETURN(error);
4285 }
4286
4287
4288 /**
4289 Set the name of crash safe index file.
4290
4291 @retval
4292 0 ok
4293 @retval
4294 1 error
4295 */
set_crash_safe_index_file_name(const char * base_file_name)4296 int MYSQL_BIN_LOG::set_crash_safe_index_file_name(const char *base_file_name)
4297 {
4298 int error= 0;
4299 DBUG_ENTER("MYSQL_BIN_LOG::set_crash_safe_index_file_name");
4300 if (fn_format(crash_safe_index_file_name, base_file_name, mysql_data_home,
4301 ".index_crash_safe", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
4302 MY_REPLACE_EXT)) == NULL)
4303 {
4304 error= 1;
4305 sql_print_error("MYSQL_BIN_LOG::set_crash_safe_index_file_name failed "
4306 "to set file name.");
4307 }
4308 DBUG_RETURN(error);
4309 }
4310
4311
4312 /**
4313 Open a (new) crash safe index file.
4314
4315 @note
4316 The crash safe index file is a special file
4317 used for guaranteeing index file crash safe.
4318 @retval
4319 0 ok
4320 @retval
4321 1 error
4322 */
open_crash_safe_index_file()4323 int MYSQL_BIN_LOG::open_crash_safe_index_file()
4324 {
4325 int error= 0;
4326 File file= -1;
4327
4328 DBUG_ENTER("MYSQL_BIN_LOG::open_crash_safe_index_file");
4329
4330 if (!my_b_inited(&crash_safe_index_file))
4331 {
4332 if ((file= my_open(crash_safe_index_file_name, O_RDWR | O_CREAT | O_BINARY,
4333 MYF(MY_WME | ME_WAITTANG))) < 0 ||
4334 init_io_cache(&crash_safe_index_file, file, IO_SIZE, WRITE_CACHE,
4335 0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
4336 {
4337 error= 1;
4338 sql_print_error("MYSQL_BIN_LOG::open_crash_safe_index_file failed "
4339 "to open temporary index file.");
4340 }
4341 }
4342 DBUG_RETURN(error);
4343 }
4344
4345
4346 /**
4347 Close the crash safe index file.
4348
4349 @note
4350 The crash safe file is just closed, is not deleted.
4351 Because it is moved to index file later on.
4352 @retval
4353 0 ok
4354 @retval
4355 1 error
4356 */
close_crash_safe_index_file()4357 int MYSQL_BIN_LOG::close_crash_safe_index_file()
4358 {
4359 int error= 0;
4360
4361 DBUG_ENTER("MYSQL_BIN_LOG::close_crash_safe_index_file");
4362
4363 if (my_b_inited(&crash_safe_index_file))
4364 {
4365 end_io_cache(&crash_safe_index_file);
4366 error= my_close(crash_safe_index_file.file, MYF(0));
4367 }
4368 memset(&crash_safe_index_file, 0, sizeof(crash_safe_index_file));
4369
4370 DBUG_RETURN(error);
4371 }
4372
4373
4374 /**
4375 Delete relay log files prior to rli->group_relay_log_name
4376 (i.e. all logs which are not involved in a non-finished group
4377 (transaction)), remove them from the index file and start on next
4378 relay log.
4379
4380 IMPLEMENTATION
4381
4382 - You must hold rli->data_lock before calling this function, since
4383 it writes group_relay_log_pos and similar fields of
4384 Relay_log_info.
4385 - Protects index file with LOCK_index
4386 - Delete relevant relay log files
4387 - Copy all file names after these ones to the front of the index file
4388 - If the OS has truncate, truncate the file, else fill it with \n'
4389 - Read the next file name from the index file and store in rli->linfo
4390
4391 @param rli Relay log information
4392 @param included If false, all relay logs that are strictly before
4393 rli->group_relay_log_name are deleted ; if true, the
4394 latter is deleted too (i.e. all relay logs
4395 read by the SQL slave thread are deleted).
4396
4397 @note
4398 - This is only called from the slave SQL thread when it has read
4399 all commands from a relay log and want to switch to a new relay log.
4400 - When this happens, we can be in an active transaction as
4401 a transaction can span over two relay logs
4402 (although it is always written as a single block to the master's binary
4403 log, hence cannot span over two master's binary logs).
4404
4405 @retval
4406 0 ok
4407 @retval
4408 LOG_INFO_EOF End of log-index-file found
4409 @retval
4410 LOG_INFO_SEEK Could not allocate IO cache
4411 @retval
4412 LOG_INFO_IO Got IO error while reading file
4413 */
4414
4415 #ifdef HAVE_REPLICATION
4416
purge_first_log(Relay_log_info * rli,bool included)4417 int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
4418 {
4419 int error;
4420 char *to_purge_if_included= NULL;
4421 DBUG_ENTER("purge_first_log");
4422
4423 DBUG_ASSERT(current_thd->system_thread == SYSTEM_THREAD_SLAVE_SQL);
4424 DBUG_ASSERT(is_relay_log);
4425 DBUG_ASSERT(is_open());
4426 DBUG_ASSERT(rli->slave_running == 1);
4427 DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->get_event_relay_log_name()));
4428
4429 mysql_mutex_assert_owner(&rli->data_lock);
4430
4431 mysql_mutex_lock(&LOCK_index);
4432 to_purge_if_included= my_strdup(rli->get_group_relay_log_name(), MYF(0));
4433
4434 /*
4435 Read the next log file name from the index file and pass it back to
4436 the caller.
4437 */
4438 if((error=find_log_pos(&rli->linfo, rli->get_event_relay_log_name(),
4439 false/*need_lock_index=false*/)) ||
4440 (error=find_next_log(&rli->linfo, false/*need_lock_index=false*/)))
4441 {
4442 char buff[22];
4443 sql_print_error("next log error: %d offset: %s log: %s included: %d",
4444 error,
4445 llstr(rli->linfo.index_file_offset,buff),
4446 rli->get_event_relay_log_name(),
4447 included);
4448 goto err;
4449 }
4450
4451 /*
4452 Reset rli's coordinates to the current log.
4453 */
4454 rli->set_event_relay_log_pos(BIN_LOG_HEADER_SIZE);
4455 rli->set_event_relay_log_name(rli->linfo.log_file_name);
4456
4457 /*
4458 If we removed the rli->group_relay_log_name file,
4459 we must update the rli->group* coordinates, otherwise do not touch it as the
4460 group's execution is not finished (e.g. COMMIT not executed)
4461 */
4462 if (included)
4463 {
4464 rli->set_group_relay_log_pos(BIN_LOG_HEADER_SIZE);
4465 rli->set_group_relay_log_name(rli->linfo.log_file_name);
4466 rli->notify_group_relay_log_name_update();
4467 }
4468 /*
4469 Store where we are in the new file for the execution thread.
4470 If we are in the middle of a group), then we should not store
4471 the position in the repository, instead in that case set a flag
4472 to true which indicates that a 'forced flush' is postponed due
4473 to transaction split across the relaylogs.
4474 */
4475 if (!rli->is_in_group())
4476 rli->flush_info(TRUE);
4477 else
4478 rli->force_flush_postponed_due_to_split_trans= true;
4479
4480 DBUG_EXECUTE_IF("crash_before_purge_logs", DBUG_SUICIDE(););
4481
4482 mysql_mutex_lock(&rli->log_space_lock);
4483 rli->relay_log.purge_logs(to_purge_if_included, included,
4484 false/*need_lock_index=false*/,
4485 false/*need_update_threads=false*/,
4486 &rli->log_space_total, true);
4487 // Tell the I/O thread to take the relay_log_space_limit into account
4488 rli->ignore_log_space_limit= 0;
4489 mysql_mutex_unlock(&rli->log_space_lock);
4490
4491 /*
4492 Ok to broadcast after the critical region as there is no risk of
4493 the mutex being destroyed by this thread later - this helps save
4494 context switches
4495 */
4496 mysql_cond_broadcast(&rli->log_space_cond);
4497
4498 /*
4499 * Need to update the log pos because purge logs has been called
4500 * after fetching initially the log pos at the begining of the method.
4501 */
4502 if((error=find_log_pos(&rli->linfo, rli->get_event_relay_log_name(),
4503 false/*need_lock_index=false*/)))
4504 {
4505 char buff[22];
4506 sql_print_error("next log error: %d offset: %s log: %s included: %d",
4507 error,
4508 llstr(rli->linfo.index_file_offset,buff),
4509 rli->get_group_relay_log_name(),
4510 included);
4511 goto err;
4512 }
4513
4514 /* If included was passed, rli->linfo should be the first entry. */
4515 DBUG_ASSERT(!included || rli->linfo.index_file_start_offset == 0);
4516
4517 err:
4518 my_free(to_purge_if_included);
4519 mysql_mutex_unlock(&LOCK_index);
4520 DBUG_RETURN(error);
4521 }
4522
4523
4524 /**
4525 Remove logs from index file.
4526
4527 - To make crash safe, we copy the content of index file
4528 from index_file_start_offset recored in log_info to
4529 crash safe index file firstly and then move the crash
4530 safe index file to index file.
4531
4532 @param linfo Store here the found log file name and
4533 position to the NEXT log file name in
4534 the index file.
4535
4536 @param need_update_threads If we want to update the log coordinates
4537 of all threads. False for relay logs,
4538 true otherwise.
4539
4540 @retval
4541 0 ok
4542 @retval
4543 LOG_INFO_IO Got IO error while reading/writing file
4544 */
remove_logs_from_index(LOG_INFO * log_info,bool need_update_threads)4545 int MYSQL_BIN_LOG::remove_logs_from_index(LOG_INFO* log_info, bool need_update_threads)
4546 {
4547 if (open_crash_safe_index_file())
4548 {
4549 sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
4550 "open the crash safe index file.");
4551 goto err;
4552 }
4553
4554 if (copy_file(&index_file, &crash_safe_index_file,
4555 log_info->index_file_start_offset))
4556 {
4557 sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
4558 "copy index file to crash safe index file.");
4559 goto err;
4560 }
4561
4562 if (close_crash_safe_index_file())
4563 {
4564 sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
4565 "close the crash safe index file.");
4566 goto err;
4567 }
4568 DBUG_EXECUTE_IF("fault_injection_copy_part_file", DBUG_SUICIDE(););
4569
4570 if (move_crash_safe_index_file_to_index_file(false/*need_lock_index=false*/))
4571 {
4572 sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
4573 "move crash safe index file to index file.");
4574 goto err;
4575 }
4576
4577 // now update offsets in index file for running threads
4578 if (need_update_threads)
4579 adjust_linfo_offsets(log_info->index_file_start_offset);
4580 return 0;
4581
4582 err:
4583 return LOG_INFO_IO;
4584 }
4585
4586 /**
4587 Remove all logs before the given log from disk and from the index file.
4588
4589 @param to_log Delete all log file name before this file.
4590 @param included If true, to_log is deleted too.
4591 @param need_lock_index
4592 @param need_update_threads If we want to update the log coordinates of
4593 all threads. False for relay logs, true otherwise.
4594 @param freed_log_space If not null, decrement this variable of
4595 the amount of log space freed
4596 @param auto_purge True if this is an automatic purge.
4597
4598 @note
4599 If any of the logs before the deleted one is in use,
4600 only purge logs up to this one.
4601
4602 @retval
4603 0 ok
4604 @retval
4605 LOG_INFO_EOF to_log not found
4606 LOG_INFO_EMFILE too many files opened
4607 LOG_INFO_FATAL if any other than ENOENT error from
4608 mysql_file_stat() or mysql_file_delete()
4609 */
4610
purge_logs(const char * to_log,bool included,bool need_lock_index,bool need_update_threads,ulonglong * decrease_log_space,bool auto_purge)4611 int MYSQL_BIN_LOG::purge_logs(const char *to_log,
4612 bool included,
4613 bool need_lock_index,
4614 bool need_update_threads,
4615 ulonglong *decrease_log_space,
4616 bool auto_purge)
4617 {
4618 int error= 0, no_of_log_files_to_purge= 0, no_of_log_files_purged= 0;
4619 int no_of_threads_locking_log= 0;
4620 bool exit_loop= 0;
4621 LOG_INFO log_info;
4622 THD *thd= current_thd;
4623 DBUG_ENTER("purge_logs");
4624 DBUG_PRINT("info",("to_log= %s",to_log));
4625
4626 if (need_lock_index)
4627 mysql_mutex_lock(&LOCK_index);
4628 else
4629 mysql_mutex_assert_owner(&LOCK_index);
4630 if ((error=find_log_pos(&log_info, to_log, false/*need_lock_index=false*/)))
4631 {
4632 sql_print_error("MYSQL_BIN_LOG::purge_logs was called with file %s not "
4633 "listed in the index.", to_log);
4634 goto err;
4635 }
4636
4637 no_of_log_files_to_purge= log_info.entry_index;
4638
4639 if ((error= open_purge_index_file(TRUE)))
4640 {
4641 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to sync the index file.");
4642 goto err;
4643 }
4644
4645 /*
4646 File name exists in index file; delete until we find this file
4647 or a file that is used.
4648 */
4649 if ((error=find_log_pos(&log_info, NullS, false/*need_lock_index=false*/)))
4650 goto err;
4651
4652 while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)))
4653 {
4654 if(is_active(log_info.log_file_name))
4655 {
4656 if(!auto_purge)
4657 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4658 ER_WARN_PURGE_LOG_IS_ACTIVE,
4659 ER(ER_WARN_PURGE_LOG_IS_ACTIVE),
4660 log_info.log_file_name);
4661 break;
4662 }
4663
4664 if ((no_of_threads_locking_log= log_in_use(log_info.log_file_name)))
4665 {
4666 if(!auto_purge)
4667 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4668 ER_WARN_PURGE_LOG_IN_USE,
4669 ER(ER_WARN_PURGE_LOG_IN_USE),
4670 log_info.log_file_name, no_of_threads_locking_log,
4671 no_of_log_files_purged, no_of_log_files_to_purge);
4672 break;
4673 }
4674 no_of_log_files_purged++;
4675
4676 if ((error= register_purge_index_entry(log_info.log_file_name)))
4677 {
4678 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to copy %s to register file.",
4679 log_info.log_file_name);
4680 goto err;
4681 }
4682
4683 if (find_next_log(&log_info, false/*need_lock_index=false*/) || exit_loop)
4684 break;
4685 }
4686
4687 DBUG_EXECUTE_IF("crash_purge_before_update_index", DBUG_SUICIDE(););
4688
4689 if ((error= sync_purge_index_file()))
4690 {
4691 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to flush register file.");
4692 goto err;
4693 }
4694
4695 /* We know how many files to delete. Update index file. */
4696 if ((error=remove_logs_from_index(&log_info, need_update_threads)))
4697 {
4698 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to update the index file");
4699 goto err;
4700 }
4701
4702 // Update gtid_state->lost_gtids
4703 if (gtid_mode > 0 && !is_relay_log)
4704 {
4705 global_sid_lock->wrlock();
4706 error= init_gtid_sets(NULL,
4707 const_cast<Gtid_set *>(gtid_state->get_lost_gtids()),
4708 NULL,
4709 opt_master_verify_checksum,
4710 false/*false=don't need lock*/);
4711 global_sid_lock->unlock();
4712 if (error)
4713 goto err;
4714 }
4715
4716 DBUG_EXECUTE_IF("crash_purge_critical_after_update_index", DBUG_SUICIDE(););
4717
4718 err:
4719
4720 int error_index= 0, close_error_index= 0;
4721 /* Read each entry from purge_index_file and delete the file. */
4722 if (!error && is_inited_purge_index_file() &&
4723 (error_index= purge_index_entry(thd, decrease_log_space, false/*need_lock_index=false*/)))
4724 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to process registered files"
4725 " that would be purged.");
4726
4727 close_error_index= close_purge_index_file();
4728
4729 DBUG_EXECUTE_IF("crash_purge_non_critical_after_update_index", DBUG_SUICIDE(););
4730
4731 if (need_lock_index)
4732 mysql_mutex_unlock(&LOCK_index);
4733
4734 /*
4735 Error codes from purge logs take precedence.
4736 Then error codes from purging the index entry.
4737 Finally, error codes from closing the purge index file.
4738 */
4739 error= error ? error : (error_index ? error_index :
4740 close_error_index);
4741
4742 DBUG_RETURN(error);
4743 }
4744
set_purge_index_file_name(const char * base_file_name)4745 int MYSQL_BIN_LOG::set_purge_index_file_name(const char *base_file_name)
4746 {
4747 int error= 0;
4748 DBUG_ENTER("MYSQL_BIN_LOG::set_purge_index_file_name");
4749 if (fn_format(purge_index_file_name, base_file_name, mysql_data_home,
4750 ".~rec~", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
4751 MY_REPLACE_EXT)) == NULL)
4752 {
4753 error= 1;
4754 sql_print_error("MYSQL_BIN_LOG::set_purge_index_file_name failed to set "
4755 "file name.");
4756 }
4757 DBUG_RETURN(error);
4758 }
4759
open_purge_index_file(bool destroy)4760 int MYSQL_BIN_LOG::open_purge_index_file(bool destroy)
4761 {
4762 int error= 0;
4763 File file= -1;
4764
4765 DBUG_ENTER("MYSQL_BIN_LOG::open_purge_index_file");
4766
4767 if (destroy)
4768 close_purge_index_file();
4769
4770 if (!my_b_inited(&purge_index_file))
4771 {
4772 if ((file= my_open(purge_index_file_name, O_RDWR | O_CREAT | O_BINARY,
4773 MYF(MY_WME | ME_WAITTANG))) < 0 ||
4774 init_io_cache(&purge_index_file, file, IO_SIZE,
4775 (destroy ? WRITE_CACHE : READ_CACHE),
4776 0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
4777 {
4778 error= 1;
4779 sql_print_error("MYSQL_BIN_LOG::open_purge_index_file failed to open register "
4780 " file.");
4781 }
4782 }
4783 DBUG_RETURN(error);
4784 }
4785
close_purge_index_file()4786 int MYSQL_BIN_LOG::close_purge_index_file()
4787 {
4788 int error= 0;
4789
4790 DBUG_ENTER("MYSQL_BIN_LOG::close_purge_index_file");
4791
4792 if (my_b_inited(&purge_index_file))
4793 {
4794 end_io_cache(&purge_index_file);
4795 error= my_close(purge_index_file.file, MYF(0));
4796 }
4797 my_delete(purge_index_file_name, MYF(0));
4798 memset(&purge_index_file, 0, sizeof(purge_index_file));
4799
4800 DBUG_RETURN(error);
4801 }
4802
is_inited_purge_index_file()4803 bool MYSQL_BIN_LOG::is_inited_purge_index_file()
4804 {
4805 DBUG_ENTER("MYSQL_BIN_LOG::is_inited_purge_index_file");
4806 DBUG_RETURN (my_b_inited(&purge_index_file));
4807 }
4808
sync_purge_index_file()4809 int MYSQL_BIN_LOG::sync_purge_index_file()
4810 {
4811 int error= 0;
4812 DBUG_ENTER("MYSQL_BIN_LOG::sync_purge_index_file");
4813
4814 if ((error= flush_io_cache(&purge_index_file)) ||
4815 (error= my_sync(purge_index_file.file, MYF(MY_WME))))
4816 DBUG_RETURN(error);
4817
4818 DBUG_RETURN(error);
4819 }
4820
register_purge_index_entry(const char * entry)4821 int MYSQL_BIN_LOG::register_purge_index_entry(const char *entry)
4822 {
4823 int error= 0;
4824 DBUG_ENTER("MYSQL_BIN_LOG::register_purge_index_entry");
4825
4826 if ((error=my_b_write(&purge_index_file, (const uchar*)entry, strlen(entry))) ||
4827 (error=my_b_write(&purge_index_file, (const uchar*)"\n", 1)))
4828 DBUG_RETURN (error);
4829
4830 DBUG_RETURN(error);
4831 }
4832
register_create_index_entry(const char * entry)4833 int MYSQL_BIN_LOG::register_create_index_entry(const char *entry)
4834 {
4835 DBUG_ENTER("MYSQL_BIN_LOG::register_create_index_entry");
4836 DBUG_RETURN(register_purge_index_entry(entry));
4837 }
4838
purge_index_entry(THD * thd,ulonglong * decrease_log_space,bool need_lock_index)4839 int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *decrease_log_space,
4840 bool need_lock_index)
4841 {
4842 MY_STAT s;
4843 int error= 0;
4844 LOG_INFO log_info;
4845 LOG_INFO check_log_info;
4846
4847 DBUG_ENTER("MYSQL_BIN_LOG:purge_index_entry");
4848
4849 DBUG_ASSERT(my_b_inited(&purge_index_file));
4850
4851 if ((error=reinit_io_cache(&purge_index_file, READ_CACHE, 0, 0, 0)))
4852 {
4853 sql_print_error("MYSQL_BIN_LOG::purge_index_entry failed to reinit register file "
4854 "for read");
4855 goto err;
4856 }
4857
4858 for (;;)
4859 {
4860 uint length;
4861
4862 if ((length=my_b_gets(&purge_index_file, log_info.log_file_name,
4863 FN_REFLEN)) <= 1)
4864 {
4865 if (purge_index_file.error)
4866 {
4867 error= purge_index_file.error;
4868 sql_print_error("MYSQL_BIN_LOG::purge_index_entry error %d reading from "
4869 "register file.", error);
4870 goto err;
4871 }
4872
4873 /* Reached EOF */
4874 break;
4875 }
4876
4877 /* Get rid of the trailing '\n' */
4878 log_info.log_file_name[length-1]= 0;
4879
4880 if (!mysql_file_stat(m_key_file_log, log_info.log_file_name, &s, MYF(0)))
4881 {
4882 if (my_errno == ENOENT)
4883 {
4884 /*
4885 It's not fatal if we can't stat a log file that does not exist;
4886 If we could not stat, we won't delete.
4887 */
4888 if (thd)
4889 {
4890 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4891 ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
4892 log_info.log_file_name);
4893 }
4894 sql_print_information("Failed to execute mysql_file_stat on file '%s'",
4895 log_info.log_file_name);
4896 my_errno= 0;
4897 }
4898 else
4899 {
4900 /*
4901 Other than ENOENT are fatal
4902 */
4903 if (thd)
4904 {
4905 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4906 ER_BINLOG_PURGE_FATAL_ERR,
4907 "a problem with getting info on being purged %s; "
4908 "consider examining correspondence "
4909 "of your binlog index file "
4910 "to the actual binlog files",
4911 log_info.log_file_name);
4912 }
4913 else
4914 {
4915 sql_print_information("Failed to delete log file '%s'; "
4916 "consider examining correspondence "
4917 "of your binlog index file "
4918 "to the actual binlog files",
4919 log_info.log_file_name);
4920 }
4921 error= LOG_INFO_FATAL;
4922 goto err;
4923 }
4924 }
4925 else
4926 {
4927 if ((error= find_log_pos(&check_log_info, log_info.log_file_name,
4928 need_lock_index)))
4929 {
4930 if (error != LOG_INFO_EOF)
4931 {
4932 if (thd)
4933 {
4934 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4935 ER_BINLOG_PURGE_FATAL_ERR,
4936 "a problem with deleting %s and "
4937 "reading the binlog index file",
4938 log_info.log_file_name);
4939 }
4940 else
4941 {
4942 sql_print_information("Failed to delete file '%s' and "
4943 "read the binlog index file",
4944 log_info.log_file_name);
4945 }
4946 goto err;
4947 }
4948
4949 error= 0;
4950 if (!need_lock_index)
4951 {
4952 /*
4953 This is to avoid triggering an error in NDB.
4954
4955 @todo: This is weird, what does NDB errors have to do with
4956 need_lock_index? Explain better or refactor /Sven
4957 */
4958 ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
4959 }
4960
4961 DBUG_PRINT("info",("purging %s",log_info.log_file_name));
4962 if (!mysql_file_delete(key_file_binlog, log_info.log_file_name, MYF(0)))
4963 {
4964 DBUG_EXECUTE_IF("wait_in_purge_index_entry",
4965 {
4966 const char action[] = "now SIGNAL in_purge_index_entry WAIT_FOR go_ahead_sql";
4967 DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(action)));
4968 DBUG_SET("-d,wait_in_purge_index_entry");
4969 };);
4970
4971 if (decrease_log_space)
4972 *decrease_log_space-= s.st_size;
4973 }
4974 else
4975 {
4976 if (my_errno == ENOENT)
4977 {
4978 if (thd)
4979 {
4980 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4981 ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
4982 log_info.log_file_name);
4983 }
4984 sql_print_information("Failed to delete file '%s'",
4985 log_info.log_file_name);
4986 my_errno= 0;
4987 }
4988 else
4989 {
4990 if (thd)
4991 {
4992 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4993 ER_BINLOG_PURGE_FATAL_ERR,
4994 "a problem with deleting %s; "
4995 "consider examining correspondence "
4996 "of your binlog index file "
4997 "to the actual binlog files",
4998 log_info.log_file_name);
4999 }
5000 else
5001 {
5002 sql_print_information("Failed to delete file '%s'; "
5003 "consider examining correspondence "
5004 "of your binlog index file "
5005 "to the actual binlog files",
5006 log_info.log_file_name);
5007 }
5008 if (my_errno == EMFILE)
5009 {
5010 DBUG_PRINT("info",
5011 ("my_errno: %d, set ret = LOG_INFO_EMFILE", my_errno));
5012 error= LOG_INFO_EMFILE;
5013 goto err;
5014 }
5015 error= LOG_INFO_FATAL;
5016 goto err;
5017 }
5018 }
5019 }
5020 }
5021 }
5022
5023 err:
5024 DBUG_RETURN(error);
5025 }
5026
5027 /**
5028 Purge old logs so that we have a maximum of max_nr_files logs.
5029
5030 @param max_nr_files Maximum number of logfiles to have
5031
5032 @note
5033 If any of the logs before the deleted one is in use,
5034 only purge logs up to this one.
5035
5036 @retval
5037 0 ok
5038 @retval
5039 LOG_INFO_PURGE_NO_ROTATE Binary file that can't be rotated
5040 LOG_INFO_FATAL if any other than ENOENT error from
5041 mysql_file_stat() or mysql_file_delete()
5042 */
5043
purge_logs_maximum_number(ulong max_nr_files)5044 int MYSQL_BIN_LOG::purge_logs_maximum_number(ulong max_nr_files)
5045 {
5046 int error;
5047 char to_log[FN_REFLEN];
5048 LOG_INFO log_info;
5049 ulong current_number_of_logs= 1;
5050
5051 DBUG_ENTER("purge_logs_maximum_number");
5052
5053 mysql_mutex_lock(&LOCK_index);
5054 to_log[0]= 0;
5055
5056 if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
5057 goto err;
5058
5059 while (!find_next_log(&log_info, 0))
5060 current_number_of_logs++;
5061
5062 if (current_number_of_logs <= max_nr_files)
5063 {
5064 error= 0;
5065 goto err; /* No logs to expire */
5066 }
5067
5068 if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
5069 goto err;
5070
5071 while (strcmp(log_file_name, log_info.log_file_name) &&
5072 !is_active(log_info.log_file_name) &&
5073 !log_in_use(log_info.log_file_name) &&
5074 current_number_of_logs > max_nr_files)
5075 {
5076 current_number_of_logs--;
5077 strmake(to_log,
5078 log_info.log_file_name,
5079 sizeof(log_info.log_file_name) - 1);
5080
5081 if (find_next_log(&log_info, 0))
5082 {
5083 break;
5084 }
5085 }
5086
5087 error= (to_log[0] ? purge_logs(to_log, true, false, true,
5088 (ulonglong *) 0, true) : 0);
5089
5090 err:
5091 mysql_mutex_unlock(&LOCK_index);
5092 DBUG_RETURN(error);
5093 }
5094
5095 /**
5096 Remove all logs before the given file date from disk and from the
5097 index file.
5098
5099 @param thd Thread pointer
5100 @param purge_time Delete all log files before given date.
5101 @param auto_purge True if this is an automatic purge.
5102
5103 @note
5104 If any of the logs before the deleted one is in use,
5105 only purge logs up to this one.
5106
5107 @retval
5108 0 ok
5109 @retval
5110 LOG_INFO_PURGE_NO_ROTATE Binary file that can't be rotated
5111 LOG_INFO_FATAL if any other than ENOENT error from
5112 mysql_file_stat() or mysql_file_delete()
5113 */
5114
purge_logs_before_date(time_t purge_time,bool auto_purge)5115 int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time, bool auto_purge)
5116 {
5117 int error;
5118 int no_of_threads_locking_log= 0, no_of_log_files_purged= 0;
5119 bool log_is_active= false, log_is_in_use= false;
5120 char to_log[FN_REFLEN], copy_log_in_use[FN_REFLEN];
5121 LOG_INFO log_info;
5122 MY_STAT stat_area;
5123 THD *thd= current_thd;
5124
5125 DBUG_ENTER("purge_logs_before_date");
5126
5127 mysql_mutex_lock(&LOCK_index);
5128 to_log[0]= 0;
5129
5130 if ((error=find_log_pos(&log_info, NullS, false/*need_lock_index=false*/)))
5131 goto err;
5132
5133 while (!(log_is_active= is_active(log_info.log_file_name)))
5134 {
5135 if ((no_of_threads_locking_log= log_in_use(log_info.log_file_name)))
5136 {
5137 if (!auto_purge)
5138 {
5139 log_is_in_use= true;
5140 strcpy(copy_log_in_use, log_info.log_file_name);
5141 }
5142 break;
5143 }
5144 no_of_log_files_purged++;
5145
5146 if (!mysql_file_stat(m_key_file_log,
5147 log_info.log_file_name, &stat_area, MYF(0)))
5148 {
5149 if (my_errno == ENOENT)
5150 {
5151 /*
5152 It's not fatal if we can't stat a log file that does not exist.
5153 */
5154 my_errno= 0;
5155 }
5156 else
5157 {
5158 /*
5159 Other than ENOENT are fatal
5160 */
5161 if (thd)
5162 {
5163 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
5164 ER_BINLOG_PURGE_FATAL_ERR,
5165 "a problem with getting info on being purged %s; "
5166 "consider examining correspondence "
5167 "of your binlog index file "
5168 "to the actual binlog files",
5169 log_info.log_file_name);
5170 }
5171 else
5172 {
5173 sql_print_information("Failed to delete log file '%s'",
5174 log_info.log_file_name);
5175 }
5176 error= LOG_INFO_FATAL;
5177 goto err;
5178 }
5179 }
5180 else
5181 {
5182 if (stat_area.st_mtime < purge_time)
5183 strmake(to_log,
5184 log_info.log_file_name,
5185 sizeof(log_info.log_file_name) - 1);
5186 else
5187 break;
5188 }
5189 if (find_next_log(&log_info, false/*need_lock_index=false*/))
5190 break;
5191 }
5192
5193 if (log_is_active)
5194 {
5195 if(!auto_purge)
5196 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
5197 ER_WARN_PURGE_LOG_IS_ACTIVE,
5198 ER(ER_WARN_PURGE_LOG_IS_ACTIVE),
5199 log_info.log_file_name);
5200
5201 }
5202
5203 if (log_is_in_use)
5204 {
5205 int no_of_log_files_to_purge= no_of_log_files_purged+1;
5206 while (strcmp(log_file_name, log_info.log_file_name))
5207 {
5208 if (mysql_file_stat(m_key_file_log, log_info.log_file_name,
5209 &stat_area, MYF(0)))
5210 {
5211 if (stat_area.st_mtime < purge_time)
5212 no_of_log_files_to_purge++;
5213 else
5214 break;
5215 }
5216 if (find_next_log(&log_info, false/*need_lock_index=false*/))
5217 {
5218 no_of_log_files_to_purge++;
5219 break;
5220 }
5221 }
5222
5223 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
5224 ER_WARN_PURGE_LOG_IN_USE,
5225 ER(ER_WARN_PURGE_LOG_IN_USE),
5226 copy_log_in_use, no_of_threads_locking_log,
5227 no_of_log_files_purged, no_of_log_files_to_purge);
5228 }
5229
5230 error= (to_log[0] ? purge_logs(to_log, true,
5231 false/*need_lock_index=false*/,
5232 true/*need_update_threads=true*/,
5233 (ulonglong *) 0, auto_purge) : 0);
5234
5235 err:
5236 mysql_mutex_unlock(&LOCK_index);
5237 DBUG_RETURN(error);
5238 }
5239 #endif /* HAVE_REPLICATION */
5240
5241
5242 /**
5243 Create a new log file name.
5244
5245 @param buf buf of at least FN_REFLEN where new name is stored
5246
5247 @note
5248 If file name will be longer then FN_REFLEN it will be truncated
5249 */
5250
make_log_name(char * buf,const char * log_ident)5251 void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
5252 {
5253 uint dir_len = dirname_length(log_file_name);
5254 if (dir_len >= FN_REFLEN)
5255 dir_len=FN_REFLEN-1;
5256 strnmov(buf, log_file_name, dir_len);
5257 strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
5258 }
5259
5260
5261 /**
5262 Check if we are writing/reading to the given log file.
5263 */
5264
is_active(const char * log_file_name_arg)5265 bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
5266 {
5267 return !strcmp(log_file_name, log_file_name_arg);
5268 }
5269
5270
5271 /*
5272 Wrappers around new_file_impl to avoid using argument
5273 to control locking. The argument 1) less readable 2) breaks
5274 incapsulation 3) allows external access to the class without
5275 a lock (which is not possible with private new_file_without_locking
5276 method).
5277
5278 @retval
5279 nonzero - error
5280
5281 */
5282
new_file(Format_description_log_event * extra_description_event)5283 int MYSQL_BIN_LOG::new_file(Format_description_log_event *extra_description_event)
5284 {
5285 return new_file_impl(true/*need_lock_log=true*/, extra_description_event);
5286 }
5287
5288 /*
5289 @retval
5290 nonzero - error
5291 */
new_file_without_locking(Format_description_log_event * extra_description_event)5292 int MYSQL_BIN_LOG::new_file_without_locking(Format_description_log_event *extra_description_event)
5293 {
5294 return new_file_impl(false/*need_lock_log=false*/, extra_description_event);
5295 }
5296
5297
5298 /**
5299 Start writing to a new log file or reopen the old file.
5300
5301 @param need_lock_log If true, this function acquires LOCK_log;
5302 otherwise the caller should already have acquired it.
5303
5304 @retval 0 success
5305 @retval nonzero - error
5306
5307 @note The new file name is stored last in the index file
5308 */
new_file_impl(bool need_lock_log,Format_description_log_event * extra_description_event)5309 int MYSQL_BIN_LOG::new_file_impl(bool need_lock_log, Format_description_log_event *extra_description_event)
5310 {
5311 int error= 0, close_on_error= FALSE;
5312 char new_name[FN_REFLEN], *new_name_ptr, *old_name, *file_to_open;
5313
5314 DBUG_ENTER("MYSQL_BIN_LOG::new_file_impl");
5315 if (!is_open())
5316 {
5317 DBUG_PRINT("info",("log is closed"));
5318 DBUG_RETURN(error);
5319 }
5320
5321 if (need_lock_log)
5322 mysql_mutex_lock(&LOCK_log);
5323 else
5324 mysql_mutex_assert_owner(&LOCK_log);
5325 DBUG_EXECUTE_IF("semi_sync_3-way_deadlock",
5326 DEBUG_SYNC(current_thd, "before_rotate_binlog"););
5327 mysql_mutex_lock(&LOCK_xids);
5328 /*
5329 We need to ensure that the number of prepared XIDs are 0.
5330
5331 If m_prep_xids is not zero:
5332 - We wait for storage engine commit, hence decrease m_prep_xids
5333 - We keep the LOCK_log to block new transactions from being
5334 written to the binary log.
5335 */
5336 while (get_prep_xids() > 0)
5337 {
5338 DEBUG_SYNC(current_thd, "before_rotate_binlog_file");
5339 mysql_cond_wait(&m_prep_xids_cond, &LOCK_xids);
5340 }
5341 mysql_mutex_unlock(&LOCK_xids);
5342
5343 mysql_mutex_lock(&LOCK_index);
5344
5345 if (DBUG_EVALUATE_IF("expire_logs_always", 0, 1)
5346 && (error= ha_flush_logs(NULL)))
5347 goto end;
5348
5349 mysql_mutex_assert_owner(&LOCK_log);
5350 mysql_mutex_assert_owner(&LOCK_index);
5351
5352
5353 /*
5354 If user hasn't specified an extension, generate a new log name
5355 We have to do this here and not in open as we want to store the
5356 new file name in the current binary log file.
5357 */
5358 new_name_ptr= new_name;
5359 if ((error= generate_new_name(new_name, name)))
5360 {
5361 // Use the old name if generation of new name fails.
5362 strcpy(new_name, name);
5363 close_on_error= TRUE;
5364 goto end;
5365 }
5366 else
5367 {
5368 /*
5369 We log the whole file name for log file as the user may decide
5370 to change base names at some point.
5371 */
5372 Rotate_log_event r(new_name+dirname_length(new_name), 0, LOG_EVENT_OFFSET,
5373 is_relay_log ? Rotate_log_event::RELAY_LOG : 0);
5374 /*
5375 The current relay-log's closing Rotate event must have checksum
5376 value computed with an algorithm of the last relay-logged FD event.
5377 */
5378 if (is_relay_log)
5379 r.checksum_alg= relay_log_checksum_alg;
5380 DBUG_ASSERT(!is_relay_log || relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
5381 if(DBUG_EVALUATE_IF("fault_injection_new_file_rotate_event", (error=close_on_error=TRUE), FALSE) ||
5382 (error= r.write(&log_file)))
5383 {
5384 char errbuf[MYSYS_STRERROR_SIZE];
5385 DBUG_EXECUTE_IF("fault_injection_new_file_rotate_event", errno=2;);
5386 close_on_error= TRUE;
5387 my_printf_error(ER_ERROR_ON_WRITE, ER(ER_CANT_OPEN_FILE),
5388 MYF(ME_FATALERROR), name,
5389 errno, my_strerror(errbuf, sizeof(errbuf), errno));
5390 goto end;
5391 }
5392 bytes_written += r.data_written;
5393 }
5394 /*
5395 Update needs to be signalled even if there is no rotate event
5396 log rotation should give the waiting thread a signal to
5397 discover EOF and move on to the next log.
5398 */
5399 signal_update();
5400
5401 old_name=name;
5402 name=0; // Don't free name
5403 close(LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX,
5404 false/*need_lock_log=false*/,
5405 false/*need_lock_index=false*/);
5406
5407 if (checksum_alg_reset != BINLOG_CHECKSUM_ALG_UNDEF)
5408 {
5409 DBUG_ASSERT(!is_relay_log);
5410 DBUG_ASSERT(binlog_checksum_options != checksum_alg_reset);
5411 binlog_checksum_options= checksum_alg_reset;
5412 }
5413 /*
5414 Note that at this point, log_state != LOG_CLOSED (important for is_open()).
5415 */
5416
5417 DEBUG_SYNC(current_thd, "before_rotate_binlog_file");
5418 /*
5419 new_file() is only used for rotation (in FLUSH LOGS or because size >
5420 max_binlog_size or max_relay_log_size).
5421 If this is a binary log, the Format_description_log_event at the beginning of
5422 the new file should have created=0 (to distinguish with the
5423 Format_description_log_event written at server startup, which should
5424 trigger temp tables deletion on slaves.
5425 */
5426
5427 /* reopen index binlog file, BUG#34582 */
5428 file_to_open= index_file_name;
5429 error= open_index_file(index_file_name, 0, false/*need_lock_index=false*/);
5430 if (!error)
5431 {
5432 /* reopen the binary log file. */
5433 file_to_open= new_name_ptr;
5434 error= open_binlog(old_name, new_name_ptr, io_cache_type,
5435 max_size, true/*null_created_arg=true*/,
5436 false/*need_lock_log=false*/,
5437 false/*need_lock_index=false*/,
5438 true/*need_sid_lock=true*/,
5439 extra_description_event);
5440 }
5441
5442 /* handle reopening errors */
5443 if (error)
5444 {
5445 char errbuf[MYSYS_STRERROR_SIZE];
5446 my_printf_error(ER_CANT_OPEN_FILE, ER(ER_CANT_OPEN_FILE),
5447 MYF(ME_FATALERROR), file_to_open,
5448 error, my_strerror(errbuf, sizeof(errbuf), error));
5449 close_on_error= TRUE;
5450 }
5451 my_free(old_name);
5452
5453 end:
5454
5455 if (error && close_on_error /* rotate or reopen failed */)
5456 {
5457 /*
5458 Close whatever was left opened.
5459
5460 We are keeping the behavior as it exists today, ie,
5461 we disable logging and move on (see: BUG#51014).
5462
5463 TODO: as part of WL#1790 consider other approaches:
5464 - kill mysql (safety);
5465 - try multiple locations for opening a log file;
5466 - switch server to protected/readonly mode
5467 - ...
5468 */
5469 if (binlog_error_action == ABORT_SERVER)
5470 {
5471 exec_binlog_error_action_abort("Either disk is full or file system is"
5472 " read only while rotating the binlog."
5473 " Aborting the server.");
5474 }
5475 else
5476 sql_print_error("Could not open %s for logging (error %d). "
5477 "Turning logging off for the whole duration "
5478 "of the MySQL server process. To turn it on "
5479 "again: fix the cause, shutdown the MySQL "
5480 "server and restart it.",
5481 new_name_ptr, errno);
5482 close(LOG_CLOSE_INDEX, false /*need_lock_log=false*/,
5483 false/*need_lock_index=false*/);
5484 }
5485
5486 mysql_mutex_unlock(&LOCK_index);
5487 if (need_lock_log)
5488 mysql_mutex_unlock(&LOCK_log);
5489
5490 DEBUG_SYNC(current_thd, "after_disable_binlog");
5491 DBUG_RETURN(error);
5492 }
5493
5494
5495 #ifdef HAVE_REPLICATION
5496 /**
5497 Called after an event has been written to the relay log by the IO
5498 thread. This flushes and possibly syncs the file (according to the
5499 sync options), rotates the file if it has grown over the limit, and
5500 finally calls signal_update().
5501
5502 @note The caller must hold LOCK_log before invoking this function.
5503
5504 @param mi Master_info for the IO thread.
5505 @param need_data_lock If true, mi->data_lock will be acquired if a
5506 rotation is needed. Otherwise, mi->data_lock must be held by the
5507 caller.
5508
5509 @retval false success
5510 @retval true error
5511 */
after_append_to_relay_log(Master_info * mi)5512 bool MYSQL_BIN_LOG::after_append_to_relay_log(Master_info *mi)
5513 {
5514 DBUG_ENTER("MYSQL_BIN_LOG::after_append_to_relay_log");
5515 DBUG_PRINT("info",("max_size: %lu",max_size));
5516
5517 // Check pre-conditions
5518 mysql_mutex_assert_owner(&LOCK_log);
5519 mysql_mutex_assert_owner(&mi->data_lock);
5520 DBUG_ASSERT(is_relay_log);
5521 DBUG_ASSERT(current_thd->system_thread == SYSTEM_THREAD_SLAVE_IO);
5522
5523 // Flush and sync
5524 bool error= false;
5525 if (flush_and_sync(0) == 0)
5526 {
5527 DBUG_EXECUTE_IF ("set_max_size_zero",
5528 {max_size=0;});
5529 // If relay log is too big, rotate
5530 if ((uint) my_b_append_tell(&log_file) >
5531 DBUG_EVALUATE_IF("rotate_slave_debug_group", 500, max_size))
5532 {
5533 error= new_file_without_locking(mi->get_mi_description_event());
5534 DBUG_EXECUTE_IF ("set_max_size_zero",
5535 {
5536 max_size=1073741824;
5537 DBUG_SET("-d,set_max_size_zero");
5538 DBUG_SET("-d,flush_after_reading_gtid_event");
5539 });
5540 }
5541 }
5542
5543 signal_update();
5544
5545 DBUG_RETURN(error);
5546 }
5547
5548
append_event(Log_event * ev,Master_info * mi)5549 bool MYSQL_BIN_LOG::append_event(Log_event* ev, Master_info *mi)
5550 {
5551 DBUG_ENTER("MYSQL_BIN_LOG::append");
5552
5553 // check preconditions
5554 DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
5555 DBUG_ASSERT(is_relay_log);
5556
5557 // acquire locks
5558 mysql_mutex_lock(&LOCK_log);
5559
5560 // write data
5561 bool error = false;
5562 if (ev->write(&log_file) == 0)
5563 {
5564 bytes_written+= ev->data_written;
5565 error= after_append_to_relay_log(mi);
5566 }
5567 else
5568 error= true;
5569
5570 mysql_mutex_unlock(&LOCK_log);
5571 DBUG_RETURN(error);
5572 }
5573
5574
append_buffer(const char * buf,uint len,Master_info * mi)5575 bool MYSQL_BIN_LOG::append_buffer(const char* buf, uint len, Master_info *mi)
5576 {
5577 DBUG_ENTER("MYSQL_BIN_LOG::append_buffer");
5578
5579 // check preconditions
5580 DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
5581 DBUG_ASSERT(is_relay_log);
5582 mysql_mutex_assert_owner(&LOCK_log);
5583
5584 // write data
5585 bool error= false;
5586 if (my_b_append(&log_file,(uchar*) buf,len) == 0)
5587 {
5588 bytes_written += len;
5589 error= after_append_to_relay_log(mi);
5590 }
5591 else
5592 error= true;
5593
5594 DBUG_RETURN(error);
5595 }
5596 #endif // ifdef HAVE_REPLICATION
5597
flush_and_sync(const bool force)5598 bool MYSQL_BIN_LOG::flush_and_sync(const bool force)
5599 {
5600 mysql_mutex_assert_owner(&LOCK_log);
5601
5602 if (flush_io_cache(&log_file))
5603 return 1;
5604
5605 std::pair<bool, bool> result= sync_binlog_file(force);
5606
5607 return result.first;
5608 }
5609
start_union_events(THD * thd,query_id_t query_id_param)5610 void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param)
5611 {
5612 DBUG_ASSERT(!thd->binlog_evt_union.do_union);
5613 thd->binlog_evt_union.do_union= TRUE;
5614 thd->binlog_evt_union.unioned_events= FALSE;
5615 thd->binlog_evt_union.unioned_events_trans= FALSE;
5616 thd->binlog_evt_union.first_query_id= query_id_param;
5617 }
5618
stop_union_events(THD * thd)5619 void MYSQL_BIN_LOG::stop_union_events(THD *thd)
5620 {
5621 DBUG_ASSERT(thd->binlog_evt_union.do_union);
5622 thd->binlog_evt_union.do_union= FALSE;
5623 }
5624
is_query_in_union(THD * thd,query_id_t query_id_param)5625 bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
5626 {
5627 return (thd->binlog_evt_union.do_union &&
5628 query_id_param >= thd->binlog_evt_union.first_query_id);
5629 }
5630
5631 /*
5632 Updates thd's position-of-next-event variables
5633 after a *real* write a file.
5634 */
update_thd_next_event_pos(THD * thd)5635 void MYSQL_BIN_LOG::update_thd_next_event_pos(THD* thd)
5636 {
5637 if (likely(thd != NULL))
5638 {
5639 thd->set_next_event_pos(log_file_name,
5640 my_b_tell(&log_file));
5641 }
5642 }
5643
5644 /*
5645 Moves the last bunch of rows from the pending Rows event to a cache (either
5646 transactional cache if is_transaction is @c true, or the non-transactional
5647 cache otherwise. Sets a new pending event.
5648
5649 @param thd a pointer to the user thread.
5650 @param evt a pointer to the row event.
5651 @param is_transactional @c true indicates a transactional cache,
5652 otherwise @c false a non-transactional.
5653 */
5654 int
flush_and_set_pending_rows_event(THD * thd,Rows_log_event * event,bool is_transactional)5655 MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
5656 Rows_log_event* event,
5657 bool is_transactional)
5658 {
5659 DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)");
5660 DBUG_ASSERT(mysql_bin_log.is_open());
5661 DBUG_PRINT("enter", ("event: 0x%lx", (long) event));
5662
5663 int error= 0;
5664 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
5665
5666 DBUG_ASSERT(cache_mngr);
5667
5668 binlog_cache_data *cache_data=
5669 cache_mngr->get_binlog_cache_data(is_transactional);
5670
5671 DBUG_PRINT("info", ("cache_mngr->pending(): 0x%lx", (long) cache_data->pending()));
5672
5673 if (Rows_log_event* pending= cache_data->pending())
5674 {
5675 /*
5676 Write pending event to the cache.
5677 */
5678 if (cache_data->write_event(thd, pending))
5679 {
5680 set_write_error(thd, is_transactional);
5681 if (check_write_error(thd) && cache_data &&
5682 stmt_cannot_safely_rollback(thd))
5683 cache_data->set_incident();
5684 delete pending;
5685 cache_data->set_pending(NULL);
5686 DBUG_RETURN(1);
5687 }
5688
5689 delete pending;
5690 }
5691
5692 cache_data->set_pending(event);
5693
5694 DBUG_RETURN(error);
5695 }
5696
5697 /**
5698 Write an event to the binary log.
5699 */
5700
write_event(Log_event * event_info)5701 bool MYSQL_BIN_LOG::write_event(Log_event *event_info)
5702 {
5703 THD *thd= event_info->thd;
5704 bool error= 1;
5705 DBUG_ENTER("MYSQL_BIN_LOG::write_event(Log_event *)");
5706
5707 if (thd->binlog_evt_union.do_union)
5708 {
5709 /*
5710 In Stored function; Remember that function call caused an update.
5711 We will log the function call to the binary log on function exit
5712 */
5713 thd->binlog_evt_union.unioned_events= TRUE;
5714 thd->binlog_evt_union.unioned_events_trans |=
5715 event_info->is_using_trans_cache();
5716 DBUG_RETURN(0);
5717 }
5718
5719 /*
5720 We only end the statement if we are in a top-level statement. If
5721 we are inside a stored function, we do not end the statement since
5722 this will close all tables on the slave. But there can be a special case
5723 where we are inside a stored function/trigger and a SAVEPOINT is being
5724 set in side the stored function/trigger. This SAVEPOINT execution will
5725 force the pending event to be flushed without an STMT_END_F flag. This
5726 will result in a case where following DMLs will be considered as part of
5727 same statement and result in data loss on slave. Hence in this case we
5728 force the end_stmt to be true.
5729 */
5730 bool const end_stmt= (thd->in_sub_stmt && thd->lex->sql_command ==
5731 SQLCOM_SAVEPOINT)? true:
5732 (thd->locked_tables_mode && thd->lex->requires_prelocking());
5733 if (thd->binlog_flush_pending_rows_event(end_stmt,
5734 event_info->is_using_trans_cache()))
5735 DBUG_RETURN(error);
5736
5737 /*
5738 In most cases this is only called if 'is_open()' is true; in fact this is
5739 mostly called if is_open() *was* true a few instructions before, but it
5740 could have changed since.
5741 */
5742 if (likely(is_open()))
5743 {
5744 #ifdef HAVE_REPLICATION
5745 /*
5746 In the future we need to add to the following if tests like
5747 "do the involved tables match (to be implemented)
5748 binlog_[wild_]{do|ignore}_table?" (WL#1049)"
5749 */
5750 const char *local_db= event_info->get_db();
5751 if ((thd && !(thd->variables.option_bits & OPTION_BIN_LOG)) ||
5752 (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT &&
5753 thd->lex->sql_command != SQLCOM_SAVEPOINT &&
5754 (!event_info->is_no_filter_event() &&
5755 !binlog_filter->db_ok(local_db))))
5756 DBUG_RETURN(0);
5757 #endif /* HAVE_REPLICATION */
5758
5759 DBUG_ASSERT(event_info->is_using_trans_cache() || event_info->is_using_stmt_cache());
5760
5761 if (binlog_start_trans_and_stmt(thd, event_info))
5762 DBUG_RETURN(error);
5763
5764 bool is_trans_cache= event_info->is_using_trans_cache();
5765 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
5766 binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(is_trans_cache);
5767
5768 DBUG_PRINT("info",("event type: %d",event_info->get_type_code()));
5769
5770 /*
5771 No check for auto events flag here - this write method should
5772 never be called if auto-events are enabled.
5773
5774 Write first log events which describe the 'run environment'
5775 of the SQL command. If row-based binlogging, Insert_id, Rand
5776 and other kind of "setting context" events are not needed.
5777 */
5778 if (thd)
5779 {
5780 if (!thd->is_current_stmt_binlog_format_row())
5781 {
5782 if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
5783 {
5784 Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
5785 thd->first_successful_insert_id_in_prev_stmt_for_binlog,
5786 event_info->event_cache_type, event_info->event_logging_type);
5787 if (cache_data->write_event(thd, &e))
5788 goto err;
5789 if (event_info->is_using_immediate_logging())
5790 thd->binlog_bytes_written+= e.data_written;
5791 }
5792 if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
5793 {
5794 DBUG_PRINT("info",("number of auto_inc intervals: %u",
5795 thd->auto_inc_intervals_in_cur_stmt_for_binlog.
5796 nb_elements()));
5797 Intvar_log_event e(thd, (uchar) INSERT_ID_EVENT,
5798 thd->auto_inc_intervals_in_cur_stmt_for_binlog.
5799 minimum(), event_info->event_cache_type,
5800 event_info->event_logging_type);
5801 if (cache_data->write_event(thd, &e))
5802 goto err;
5803 if (event_info->is_using_immediate_logging())
5804 thd->binlog_bytes_written+= e.data_written;
5805 }
5806 if (thd->rand_used)
5807 {
5808 Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2,
5809 event_info->event_cache_type,
5810 event_info->event_logging_type);
5811 if (cache_data->write_event(thd, &e))
5812 goto err;
5813 if (event_info->is_using_immediate_logging())
5814 thd->binlog_bytes_written+= e.data_written;
5815 }
5816 if (thd->user_var_events.elements)
5817 {
5818 for (uint i= 0; i < thd->user_var_events.elements; i++)
5819 {
5820 BINLOG_USER_VAR_EVENT *user_var_event;
5821 get_dynamic(&thd->user_var_events,(uchar*) &user_var_event, i);
5822
5823 /* setting flags for user var log event */
5824 uchar flags= User_var_log_event::UNDEF_F;
5825 if (user_var_event->unsigned_flag)
5826 flags|= User_var_log_event::UNSIGNED_F;
5827
5828 User_var_log_event e(thd,
5829 user_var_event->user_var_event->entry_name.ptr(),
5830 user_var_event->user_var_event->entry_name.length(),
5831 user_var_event->value,
5832 user_var_event->length,
5833 user_var_event->type,
5834 user_var_event->charset_number, flags,
5835 event_info->event_cache_type,
5836 event_info->event_logging_type);
5837 if (cache_data->write_event(thd, &e))
5838 goto err;
5839 if (event_info->is_using_immediate_logging())
5840 thd->binlog_bytes_written+= e.data_written;
5841 }
5842 }
5843 }
5844 }
5845
5846 /*
5847 Write the event.
5848 */
5849 if (cache_data->write_event(thd, event_info) ||
5850 DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
5851 goto err;
5852 if (event_info->is_using_immediate_logging())
5853 thd->binlog_bytes_written+= event_info->data_written;
5854
5855 /*
5856 After writing the event, if the trx-cache was used and any unsafe
5857 change was written into it, the cache is marked as cannot safely
5858 roll back.
5859 */
5860 if (is_trans_cache && stmt_cannot_safely_rollback(thd))
5861 cache_mngr->trx_cache.set_cannot_rollback();
5862
5863 error= 0;
5864
5865 err:
5866 if (error)
5867 {
5868 set_write_error(thd, is_trans_cache);
5869 if (check_write_error(thd) && cache_data &&
5870 stmt_cannot_safely_rollback(thd))
5871 cache_data->set_incident();
5872 }
5873 }
5874
5875 DBUG_RETURN(error);
5876 }
5877
5878 /**
5879 The method executes rotation when LOCK_log is already acquired
5880 by the caller.
5881
5882 @param force_rotate caller can request the log rotation
5883 @param check_purge is set to true if rotation took place
5884
5885 @note
5886 If rotation fails, for instance the server was unable
5887 to create a new log file, we still try to write an
5888 incident event to the current log.
5889
5890 @note The caller must hold LOCK_log when invoking this function.
5891
5892 @retval
5893 nonzero - error in rotating routine.
5894 */
rotate(bool force_rotate,bool * check_purge)5895 int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge)
5896 {
5897 int error= 0;
5898 DBUG_ENTER("MYSQL_BIN_LOG::rotate");
5899
5900 DBUG_ASSERT(!is_relay_log);
5901 mysql_mutex_assert_owner(&LOCK_log);
5902
5903 *check_purge= false;
5904
5905 if (DBUG_EVALUATE_IF("force_rotate", 1, 0) || force_rotate ||
5906 (my_b_tell(&log_file) >= (my_off_t) max_size))
5907 {
5908 error= new_file_without_locking(NULL);
5909 *check_purge= true;
5910 publish_coordinates_for_global_status();
5911 }
5912 DBUG_RETURN(error);
5913 }
5914
5915 /**
5916 The method executes logs purging routine.
5917
5918 @retval
5919 nonzero - error in rotating routine.
5920 */
purge()5921 void MYSQL_BIN_LOG::purge()
5922 {
5923 #ifdef HAVE_REPLICATION
5924 if (expire_logs_days)
5925 {
5926 DEBUG_SYNC(current_thd, "at_purge_logs_before_date");
5927 time_t purge_time= my_time(0) - expire_logs_days*24*60*60;
5928 DBUG_EXECUTE_IF("expire_logs_always",
5929 { purge_time= my_time(0);});
5930 if (purge_time >= 0)
5931 {
5932 /*
5933 Flush logs for storage engines, so that the last transaction
5934 is fsynced inside storage engines.
5935 */
5936 ha_flush_logs(NULL);
5937 purge_logs_before_date(purge_time, true);
5938 }
5939 }
5940 if (max_binlog_files)
5941 {
5942 purge_logs_maximum_number(max_binlog_files);
5943 }
5944 #endif
5945 }
5946
5947 /**
5948 The method is a shortcut of @c rotate() and @c purge().
5949 LOCK_log is acquired prior to rotate and is released after it.
5950
5951 @param force_rotate caller can request the log rotation
5952
5953 @retval
5954 nonzero - error in rotating routine.
5955 */
rotate_and_purge(THD * thd,bool force_rotate)5956 int MYSQL_BIN_LOG::rotate_and_purge(THD* thd, bool force_rotate)
5957 {
5958 int error= 0;
5959 DBUG_ENTER("MYSQL_BIN_LOG::rotate_and_purge");
5960 bool check_purge= false;
5961
5962 /*
5963 Wait for handlerton to insert any pending information into the binlog.
5964 For e.g. ha_ndbcluster which updates the binlog asynchronously this is
5965 needed so that the user see its own commands in the binlog.
5966 */
5967 ha_binlog_wait(thd);
5968
5969 DBUG_ASSERT(!is_relay_log);
5970 mysql_mutex_lock(&LOCK_log);
5971 error= rotate(force_rotate, &check_purge);
5972 /*
5973 NOTE: Run purge_logs wo/ holding LOCK_log because it does not need
5974 the mutex. Otherwise causes various deadlocks.
5975 */
5976 mysql_mutex_unlock(&LOCK_log);
5977
5978 if (!error && check_purge)
5979 purge();
5980
5981 DBUG_RETURN(error);
5982 }
5983
next_file_id()5984 uint MYSQL_BIN_LOG::next_file_id()
5985 {
5986 uint res;
5987 mysql_mutex_lock(&LOCK_log);
5988 res = file_id++;
5989 mysql_mutex_unlock(&LOCK_log);
5990 return res;
5991 }
5992
5993
5994 /**
5995 Calculate checksum of possibly a part of an event containing at least
5996 the whole common header.
5997
5998 @param buf the pointer to trans cache's buffer
5999 @param off the offset of the beginning of the event in the buffer
6000 @param event_len no-checksum length of the event
6001 @param length the current size of the buffer
6002
6003 @param crc [in-out] the checksum
6004
6005 Event size in incremented by @c BINLOG_CHECKSUM_LEN.
6006
6007 @return 0 or number of unprocessed yet bytes of the event excluding
6008 the checksum part.
6009 */
fix_log_event_crc(uchar * buf,uint off,uint event_len,uint length,ha_checksum * crc)6010 static ulong fix_log_event_crc(uchar *buf, uint off, uint event_len,
6011 uint length, ha_checksum *crc)
6012 {
6013 ulong ret;
6014 uchar *event_begin= buf + off;
6015 uint16 flags= uint2korr(event_begin + FLAGS_OFFSET);
6016
6017 DBUG_ASSERT(length >= off + LOG_EVENT_HEADER_LEN); //at least common header in
6018 int2store(event_begin + FLAGS_OFFSET, flags);
6019 ret= length >= off + event_len ? 0 : off + event_len - length;
6020 *crc= my_checksum(*crc, event_begin, event_len - ret);
6021 return ret;
6022 }
6023
6024 /*
6025 Write the contents of a cache to the binary log.
6026
6027 SYNOPSIS
6028 do_write_cache()
6029 cache Cache to write to the binary log
6030 lock_log True if the LOCK_log mutex should be aquired, false otherwise
6031
6032 DESCRIPTION
6033 Write the contents of the cache to the binary log. The cache will
6034 be reset as a READ_CACHE to be able to read the contents from it.
6035
6036 Reading from the trans cache with possible (per @c binlog_checksum_options)
6037 adding checksum value and then fixing the length and the end_log_pos of
6038 events prior to fill in the binlog cache.
6039 */
6040
do_write_cache(THD * thd,IO_CACHE * cache)6041 int MYSQL_BIN_LOG::do_write_cache(THD *thd, IO_CACHE *cache)
6042 {
6043 DBUG_ENTER("MYSQL_BIN_LOG::do_write_cache(IO_CACHE *)");
6044
6045 DBUG_EXECUTE_IF("simulate_do_write_cache_failure",
6046 {
6047 /*
6048 see binlog_cache_data::write_event() that reacts on
6049 @c simulate_disk_full_at_flush_pending.
6050 */
6051 DBUG_SET("-d,simulate_do_write_cache_failure");
6052 DBUG_RETURN(ER_ERROR_ON_WRITE);
6053 });
6054
6055 if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
6056 DBUG_RETURN(ER_ERROR_ON_WRITE);
6057 uint length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
6058 ulong remains= 0; // part of unprocessed yet netto length of the event
6059 long val;
6060 ulong end_log_pos_inc= 0; // each event processed adds BINLOG_CHECKSUM_LEN 2 t
6061 uchar header[LOG_EVENT_HEADER_LEN];
6062 ha_checksum crc= 0, crc_0= 0; // assignments to keep compiler happy
6063 my_bool do_checksum= (binlog_checksum_options != BINLOG_CHECKSUM_ALG_OFF);
6064 uchar buf[BINLOG_CHECKSUM_LEN];
6065
6066 // while there is just one alg the following must hold:
6067 DBUG_ASSERT(!do_checksum ||
6068 binlog_checksum_options == BINLOG_CHECKSUM_ALG_CRC32);
6069
6070 /*
6071 The events in the buffer have incorrect end_log_pos data
6072 (relative to beginning of group rather than absolute),
6073 so we'll recalculate them in situ so the binlog is always
6074 correct, even in the middle of a group. This is possible
6075 because we now know the start position of the group (the
6076 offset of this cache in the log, if you will); all we need
6077 to do is to find all event-headers, and add the position of
6078 the group to the end_log_pos of each event. This is pretty
6079 straight forward, except that we read the cache in segments,
6080 so an event-header might end up on the cache-border and get
6081 split.
6082 */
6083
6084 group= (uint)my_b_tell(&log_file);
6085 DBUG_PRINT("debug", ("length: %llu, group: %llu",
6086 (ulonglong) length, (ulonglong) group));
6087 hdr_offs= carry= 0;
6088 if (do_checksum)
6089 crc= crc_0= my_checksum(0L, NULL, 0);
6090
6091 if (DBUG_EVALUATE_IF("fault_injection_crc_value", 1, 0))
6092 crc= crc - 1;
6093
6094 do
6095 {
6096 /*
6097 if we only got a partial header in the last iteration,
6098 get the other half now and process a full header.
6099 */
6100 if (unlikely(carry > 0))
6101 {
6102 DBUG_ASSERT(carry < LOG_EVENT_HEADER_LEN);
6103
6104 /* assemble both halves */
6105 memcpy(&header[carry], (char *)cache->read_pos,
6106 LOG_EVENT_HEADER_LEN - carry);
6107
6108 /* fix end_log_pos */
6109 val=uint4korr(header + LOG_POS_OFFSET);
6110 val+= group +
6111 (end_log_pos_inc+= (do_checksum ? BINLOG_CHECKSUM_LEN : 0));
6112 int4store(&header[LOG_POS_OFFSET], val);
6113
6114 if (do_checksum)
6115 {
6116 ulong len= uint4korr(header + EVENT_LEN_OFFSET);
6117 /* fix len */
6118 int4store(&header[EVENT_LEN_OFFSET], len + BINLOG_CHECKSUM_LEN);
6119 }
6120
6121 /* write the first half of the split header */
6122 if (my_b_write(&log_file, header, carry))
6123 DBUG_RETURN(ER_ERROR_ON_WRITE);
6124 thd->binlog_bytes_written+= carry;
6125
6126 /*
6127 copy fixed second half of header to cache so the correct
6128 version will be written later.
6129 */
6130 memcpy((char *)cache->read_pos, &header[carry],
6131 LOG_EVENT_HEADER_LEN - carry);
6132
6133 /* next event header at ... */
6134 hdr_offs= uint4korr(header + EVENT_LEN_OFFSET) - carry -
6135 (do_checksum ? BINLOG_CHECKSUM_LEN : 0);
6136
6137 if (do_checksum)
6138 {
6139 DBUG_ASSERT(crc == crc_0 && remains == 0);
6140 crc= my_checksum(crc, header, carry);
6141 remains= uint4korr(header + EVENT_LEN_OFFSET) - carry -
6142 BINLOG_CHECKSUM_LEN;
6143 }
6144 carry= 0;
6145 }
6146
6147 /* if there is anything to write, process it. */
6148
6149 if (likely(length > 0))
6150 {
6151 /*
6152 process all event-headers in this (partial) cache.
6153 if next header is beyond current read-buffer,
6154 we'll get it later (though not necessarily in the
6155 very next iteration, just "eventually").
6156 */
6157
6158 /* crc-calc the whole buffer */
6159 if (do_checksum && hdr_offs >= length)
6160 {
6161
6162 DBUG_ASSERT(remains != 0 && crc != crc_0);
6163
6164 crc= my_checksum(crc, cache->read_pos, length);
6165 remains -= length;
6166 if (my_b_write(&log_file, cache->read_pos, length))
6167 DBUG_RETURN(ER_ERROR_ON_WRITE);
6168 if (remains == 0)
6169 {
6170 int4store(buf, crc);
6171 if (my_b_write(&log_file, buf, BINLOG_CHECKSUM_LEN))
6172 DBUG_RETURN(ER_ERROR_ON_WRITE);
6173 crc= crc_0;
6174 }
6175 }
6176
6177 while (hdr_offs < length)
6178 {
6179 /*
6180 partial header only? save what we can get, process once
6181 we get the rest.
6182 */
6183
6184 if (do_checksum)
6185 {
6186 if (remains != 0)
6187 {
6188 /*
6189 finish off with remains of the last event that crawls
6190 from previous into the current buffer
6191 */
6192 DBUG_ASSERT(crc != crc_0);
6193 crc= my_checksum(crc, cache->read_pos, hdr_offs);
6194 int4store(buf, crc);
6195 remains -= hdr_offs;
6196 DBUG_ASSERT(remains == 0);
6197 if (my_b_write(&log_file, cache->read_pos, hdr_offs) ||
6198 my_b_write(&log_file, buf, BINLOG_CHECKSUM_LEN))
6199 DBUG_RETURN(ER_ERROR_ON_WRITE);
6200 crc= crc_0;
6201 }
6202 }
6203
6204 if (hdr_offs + LOG_EVENT_HEADER_LEN > length)
6205 {
6206 carry= length - hdr_offs;
6207 memcpy(header, (char *)cache->read_pos + hdr_offs, carry);
6208 length= hdr_offs;
6209 }
6210 else
6211 {
6212 /* we've got a full event-header, and it came in one piece */
6213 uchar *ev= (uchar *)cache->read_pos + hdr_offs;
6214 uint event_len= uint4korr(ev + EVENT_LEN_OFFSET); // netto len
6215 uchar *log_pos= ev + LOG_POS_OFFSET;
6216
6217 /* fix end_log_pos */
6218 val= uint4korr(log_pos) + group +
6219 (end_log_pos_inc += (do_checksum ? BINLOG_CHECKSUM_LEN : 0));
6220 int4store(log_pos, val);
6221
6222 /* fix CRC */
6223 if (do_checksum)
6224 {
6225 /* fix length */
6226 int4store(ev + EVENT_LEN_OFFSET, event_len + BINLOG_CHECKSUM_LEN);
6227 remains= fix_log_event_crc(cache->read_pos, hdr_offs, event_len,
6228 length, &crc);
6229 DBUG_EXECUTE_IF("fail_binlog_write_1",
6230 errno= 28; DBUG_RETURN(ER_ERROR_ON_WRITE););
6231 if (my_b_write(&log_file, ev,
6232 remains == 0 ? event_len : length - hdr_offs))
6233 DBUG_RETURN(ER_ERROR_ON_WRITE);
6234 if (remains == 0)
6235 {
6236 int4store(buf, crc);
6237 if (my_b_write(&log_file, buf, BINLOG_CHECKSUM_LEN))
6238 DBUG_RETURN(ER_ERROR_ON_WRITE);
6239 crc= crc_0; // crc is complete
6240 }
6241 }
6242
6243 /* next event header at ... */
6244 hdr_offs += event_len; // incr by the netto len
6245
6246 DBUG_ASSERT(!do_checksum || remains == 0 || hdr_offs >= length);
6247 }
6248 }
6249
6250 /*
6251 Adjust hdr_offs. Note that it may still point beyond the segment
6252 read in the next iteration; if the current event is very long,
6253 it may take a couple of read-iterations (and subsequent adjustments
6254 of hdr_offs) for it to point into the then-current segment.
6255 If we have a split header (!carry), hdr_offs will be set at the
6256 beginning of the next iteration, overwriting the value we set here:
6257 */
6258 hdr_offs -= length;
6259 }
6260
6261 /* Write the entire buf to the binary log file */
6262 if (!do_checksum)
6263 {
6264 /* DBUG_EXECUTE_IF("fail_binlog_write_1",
6265 errno= 28; DBUG_RETURN(ER_ERROR_ON_WRITE);); */
6266 if (my_b_write(&log_file, cache->read_pos, length))
6267 DBUG_RETURN(ER_ERROR_ON_WRITE);
6268 thd->binlog_bytes_written+= length;
6269 }
6270 cache->read_pos=cache->read_end; // Mark buffer used up
6271 } while ((length= my_b_fill(cache)));
6272
6273 DBUG_ASSERT(carry == 0);
6274 DBUG_ASSERT(!do_checksum || remains == 0);
6275 DBUG_ASSERT(!do_checksum || crc == crc_0);
6276
6277 DBUG_RETURN(0); // All OK
6278 }
6279
6280 /**
6281 Writes an incident event to the binary log.
6282
6283 @param ev Incident event to be written
6284 @param need_lock_log If true, will acquire LOCK_log; otherwise the
6285 caller should already have acquired LOCK_log.
6286 @do_flush_and_sync If true, will call flush_and_sync(), rotate() and
6287 purge().
6288
6289 @retval false error
6290 @retval true success
6291 */
write_incident(Incident_log_event * ev,bool need_lock_log,bool do_flush_and_sync)6292 bool MYSQL_BIN_LOG::write_incident(Incident_log_event *ev, bool need_lock_log,
6293 bool do_flush_and_sync)
6294 {
6295 uint error= 0;
6296 DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
6297
6298 if (!is_open())
6299 DBUG_RETURN(error);
6300
6301 if (need_lock_log)
6302 mysql_mutex_lock(&LOCK_log);
6303 else
6304 mysql_mutex_assert_owner(&LOCK_log);
6305
6306 // @todo make this work with the group log. /sven
6307
6308 error= ev->write(&log_file);
6309
6310 if (do_flush_and_sync)
6311 {
6312 if (!error && !(error= flush_and_sync()))
6313 {
6314 bool check_purge= false;
6315 signal_update();
6316 error= rotate(true, &check_purge);
6317 if (!error && check_purge)
6318 purge();
6319 }
6320 }
6321
6322 if (need_lock_log)
6323 mysql_mutex_unlock(&LOCK_log);
6324
6325 DBUG_RETURN(error);
6326 }
6327
write_dml_directly(THD * thd,const char * stmt,size_t stmt_len,enum_sql_command sql_command)6328 bool MYSQL_BIN_LOG::write_dml_directly(THD* thd, const char *stmt, size_t stmt_len,
6329 enum_sql_command sql_command)
6330 {
6331 bool ret= false;
6332 /* backup the original command */
6333 enum_sql_command save_sql_command= thd->lex->sql_command;
6334 thd->lex->sql_command= sql_command;
6335
6336 if (thd->binlog_query(THD::STMT_QUERY_TYPE, stmt, stmt_len,
6337 FALSE, FALSE, FALSE, 0) ||
6338 commit(thd, false) != TC_LOG::RESULT_SUCCESS)
6339 {
6340 ret= true;
6341 }
6342
6343 thd->lex->sql_command= save_sql_command;
6344 return ret;
6345 }
6346
6347
6348 /**
6349 Creates an incident event and writes it to the binary log.
6350
6351 @param thd Thread variable
6352 @param ev Incident event to be written
6353 @param lock If the binary lock should be locked or not
6354
6355 @retval
6356 0 error
6357 @retval
6358 1 success
6359 */
write_incident(THD * thd,bool need_lock_log,bool do_flush_and_sync)6360 bool MYSQL_BIN_LOG::write_incident(THD *thd, bool need_lock_log,
6361 bool do_flush_and_sync)
6362 {
6363 DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
6364
6365 if (!is_open())
6366 DBUG_RETURN(0);
6367
6368 LEX_STRING const write_error_msg=
6369 { C_STRING_WITH_LEN("error writing to the binary log") };
6370 Incident incident= INCIDENT_LOST_EVENTS;
6371 Incident_log_event ev(thd, incident, write_error_msg);
6372
6373 DBUG_RETURN(write_incident(&ev, need_lock_log, do_flush_and_sync));
6374 }
6375
6376 /**
6377 Write a cached log entry to the binary log.
6378
6379 @param thd Thread variable
6380 @param cache The cache to copy to the binlog
6381 @param incident Defines if an incident event should be created to
6382 notify that some non-transactional changes did
6383 not get into the binlog.
6384 @param prepared Defines if a transaction is part of a 2-PC.
6385
6386 @note
6387 We only come here if there is something in the cache.
6388 @note
6389 The thing in the cache is always a complete transaction.
6390 @note
6391 'cache' needs to be reinitialized after this functions returns.
6392 */
6393
write_cache(THD * thd,binlog_cache_data * cache_data)6394 bool MYSQL_BIN_LOG::write_cache(THD *thd, binlog_cache_data *cache_data)
6395 {
6396 DBUG_ENTER("MYSQL_BIN_LOG::write_cache(THD *, binlog_cache_data *, bool)");
6397
6398 IO_CACHE *cache= &cache_data->cache_log;
6399 bool incident= cache_data->has_incident();
6400
6401 DBUG_EXECUTE_IF("simulate_binlog_flush_error",
6402 {
6403 if (rand() % 3 == 0)
6404 {
6405 write_error=1;
6406 thd->commit_error= THD::CE_FLUSH_ERROR;
6407 DBUG_RETURN(0);
6408 }
6409 };);
6410
6411 mysql_mutex_assert_owner(&LOCK_log);
6412
6413 DBUG_ASSERT(is_open());
6414 if (likely(is_open())) // Should always be true
6415 {
6416 /*
6417 We only bother to write to the binary log if there is anything
6418 to write.
6419 */
6420 if (my_b_tell(cache) > 0)
6421 {
6422 DBUG_EXECUTE_IF("crash_before_writing_xid",
6423 {
6424 if ((write_error= do_write_cache(thd, cache)))
6425 DBUG_PRINT("info", ("error writing binlog cache: %d",
6426 write_error));
6427 flush_and_sync(true);
6428 DBUG_PRINT("info", ("crashing before writing xid"));
6429 DBUG_SUICIDE();
6430 });
6431
6432 if ((write_error= do_write_cache(thd, cache)))
6433 goto err;
6434
6435 if (incident && write_incident(thd, false/*need_lock_log=false*/,
6436 false/*do_flush_and_sync==false*/))
6437 goto err;
6438
6439 DBUG_EXECUTE_IF("half_binlogged_transaction", DBUG_SUICIDE(););
6440 if (cache->error) // Error on read
6441 {
6442 char errbuf[MYSYS_STRERROR_SIZE];
6443 sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name,
6444 errno, my_strerror(errbuf, sizeof(errbuf), errno));
6445 write_error=1; // Don't give more errors
6446 goto err;
6447 }
6448
6449 global_sid_lock->rdlock();
6450 if (gtid_state->update_on_flush(thd) != RETURN_STATUS_OK)
6451 {
6452 global_sid_lock->unlock();
6453 goto err;
6454 }
6455 global_sid_lock->unlock();
6456 }
6457 update_thd_next_event_pos(thd);
6458 }
6459
6460 DBUG_RETURN(0);
6461
6462 err:
6463 if (!write_error)
6464 {
6465 char errbuf[MYSYS_STRERROR_SIZE];
6466 write_error= 1;
6467 sql_print_error(ER(ER_ERROR_ON_WRITE), name,
6468 errno, my_strerror(errbuf, sizeof(errbuf), errno));
6469 }
6470
6471 /*
6472 If the flush has failed due to ENOSPC, set the flush_error flag.
6473 */
6474 if (cache->error && thd->is_error() && my_errno == ENOSPC)
6475 {
6476 cache_data->set_flush_error(thd);
6477 }
6478 thd->commit_error= THD::CE_FLUSH_ERROR;
6479
6480 DBUG_RETURN(1);
6481 }
6482
6483
6484 /**
6485 Wait until we get a signal that the relay log has been updated.
6486
6487 @param[in] thd Thread variable
6488 @param[in] timeout a pointer to a timespec;
6489 NULL means to wait w/o timeout.
6490
6491 @retval 0 if got signalled on update
6492 @retval non-0 if wait timeout elapsed
6493
6494 @note
6495 One must have a lock on LOCK_log before calling this function.
6496 */
6497
wait_for_update_relay_log(THD * thd,const struct timespec * timeout)6498 int MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd, const struct timespec *timeout)
6499 {
6500 int ret= 0;
6501 PSI_stage_info old_stage;
6502 DBUG_ENTER("wait_for_update_relay_log");
6503
6504 thd->ENTER_COND(&update_cond, &LOCK_log,
6505 &stage_slave_has_read_all_relay_log,
6506 &old_stage);
6507
6508 if (!timeout)
6509 mysql_cond_wait(&update_cond, &LOCK_log);
6510 else
6511 ret= mysql_cond_timedwait(&update_cond, &LOCK_log,
6512 const_cast<struct timespec *>(timeout));
6513 thd->EXIT_COND(&old_stage);
6514
6515 DBUG_RETURN(ret);
6516 }
6517
6518 /**
6519 Wait until we get a signal that the binary log has been updated.
6520 Applies to master only.
6521
6522 NOTES
6523 @param[in] thd a THD struct
6524 @param[in] timeout a pointer to a timespec;
6525 NULL means to wait w/o timeout.
6526 @retval 0 if got signalled on update
6527 @retval non-0 if wait timeout elapsed
6528 @note
6529 LOCK_log must be taken before calling this function.
6530 LOCK_log is being released while the thread is waiting.
6531 LOCK_log is released by the caller.
6532 */
6533
wait_for_update_bin_log(THD * thd,const struct timespec * timeout)6534 int MYSQL_BIN_LOG::wait_for_update_bin_log(THD* thd,
6535 const struct timespec *timeout)
6536 {
6537 int ret= 0;
6538 DBUG_ENTER("wait_for_update_bin_log");
6539
6540 if (!timeout)
6541 mysql_cond_wait(&update_cond, &LOCK_log);
6542 else
6543 ret= mysql_cond_timedwait(&update_cond, &LOCK_log,
6544 const_cast<struct timespec *>(timeout));
6545 DBUG_RETURN(ret);
6546 }
6547
6548
6549 /**
6550 Close the log file.
6551
6552 @param exiting Bitmask for one or more of the following bits:
6553 - LOG_CLOSE_INDEX : if we should close the index file
6554 - LOG_CLOSE_TO_BE_OPENED : if we intend to call open
6555 at once after close.
6556 - LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
6557
6558 @param need_lock_log If true, this function acquires LOCK_log;
6559 otherwise the caller should already have acquired it.
6560
6561 @param need_lock_index If true, this function acquires LOCK_index;
6562 otherwise the caller should already have acquired it.
6563
6564 @note
6565 One can do an open on the object at once after doing a close.
6566 The internal structures are not freed until cleanup() is called
6567 */
6568
close(uint exiting,bool need_lock_log,bool need_lock_index)6569 void MYSQL_BIN_LOG::close(uint exiting, bool need_lock_log,
6570 bool need_lock_index)
6571 { // One can't set log_type here!
6572 DBUG_ENTER("MYSQL_BIN_LOG::close");
6573 DBUG_PRINT("enter",("exiting: %d", (int) exiting));
6574
6575 if (need_lock_log)
6576 mysql_mutex_lock(&LOCK_log);
6577 else
6578 mysql_mutex_assert_owner(&LOCK_log);
6579
6580 if (log_state == LOG_OPENED)
6581 {
6582 #ifdef HAVE_REPLICATION
6583 if ((exiting & LOG_CLOSE_STOP_EVENT) != 0)
6584 {
6585 Stop_log_event s;
6586 // the checksumming rule for relay-log case is similar to Rotate
6587 s.checksum_alg= is_relay_log ?
6588 relay_log_checksum_alg : binlog_checksum_options;
6589 DBUG_ASSERT(!is_relay_log ||
6590 relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
6591 s.write(&log_file);
6592 bytes_written+= s.data_written;
6593 signal_update();
6594 }
6595 #endif /* HAVE_REPLICATION */
6596
6597 /* don't pwrite in a file opened with O_APPEND - it doesn't work */
6598 if (log_file.type == WRITE_CACHE)
6599 {
6600 my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
6601 my_off_t org_position= mysql_file_tell(log_file.file, MYF(0));
6602 uchar flags= 0; // clearing LOG_EVENT_BINLOG_IN_USE_F
6603 mysql_file_pwrite(log_file.file, &flags, 1, offset, MYF(0));
6604 /*
6605 Restore position so that anything we have in the IO_cache is written
6606 to the correct position.
6607 We need the seek here, as mysql_file_pwrite() is not guaranteed to keep the
6608 original position on system that doesn't support pwrite().
6609 */
6610 mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
6611 }
6612
6613 /* this will cleanup IO_CACHE, sync and close the file */
6614 MYSQL_LOG::close(exiting);
6615 }
6616
6617 /*
6618 The following test is needed even if is_open() is not set, as we may have
6619 called a not complete close earlier and the index file is still open.
6620 */
6621
6622 if (need_lock_index)
6623 mysql_mutex_lock(&LOCK_index);
6624 else
6625 mysql_mutex_assert_owner(&LOCK_index);
6626
6627 if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
6628 {
6629 end_io_cache(&index_file);
6630 if (mysql_file_close(index_file.file, MYF(0)) < 0 && ! write_error)
6631 {
6632 char errbuf[MYSYS_STRERROR_SIZE];
6633 write_error= 1;
6634 sql_print_error(ER(ER_ERROR_ON_WRITE), index_file_name,
6635 errno, my_strerror(errbuf, sizeof(errbuf), errno));
6636 }
6637 }
6638
6639 if (need_lock_index)
6640 mysql_mutex_unlock(&LOCK_index);
6641
6642 log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
6643 my_free(name);
6644 name= NULL;
6645
6646 if (need_lock_log)
6647 mysql_mutex_unlock(&LOCK_log);
6648
6649 DBUG_VOID_RETURN;
6650 }
6651
harvest_bytes_written(Relay_log_info * rli,bool need_log_space_lock)6652 void MYSQL_BIN_LOG::harvest_bytes_written(Relay_log_info* rli, bool need_log_space_lock)
6653 {
6654 #ifndef DBUG_OFF
6655 char buf1[22],buf2[22];
6656 #endif
6657 DBUG_ENTER("harvest_bytes_written");
6658 if (need_log_space_lock)
6659 mysql_mutex_lock(&rli->log_space_lock);
6660 else
6661 mysql_mutex_assert_owner(&rli->log_space_lock);
6662 rli->log_space_total+= bytes_written;
6663 DBUG_PRINT("info",("relay_log_space: %s bytes_written: %s",
6664 llstr(rli->log_space_total,buf1), llstr(bytes_written,buf2)));
6665 bytes_written=0;
6666 if (need_log_space_lock)
6667 mysql_mutex_unlock(&rli->log_space_lock);
6668 DBUG_VOID_RETURN;
6669 }
6670
set_max_size(ulong max_size_arg)6671 void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg)
6672 {
6673 /*
6674 We need to take locks, otherwise this may happen:
6675 new_file() is called, calls open(old_max_size), then before open() starts,
6676 set_max_size() sets max_size to max_size_arg, then open() starts and
6677 uses the old_max_size argument, so max_size_arg has been overwritten and
6678 it's like if the SET command was never run.
6679 */
6680 DBUG_ENTER("MYSQL_BIN_LOG::set_max_size");
6681 mysql_mutex_lock(&LOCK_log);
6682 if (is_open())
6683 max_size= max_size_arg;
6684 mysql_mutex_unlock(&LOCK_log);
6685 DBUG_VOID_RETURN;
6686 }
6687
6688
signal_update()6689 void MYSQL_BIN_LOG::signal_update()
6690 {
6691 DBUG_ENTER("MYSQL_BIN_LOG::signal_update");
6692 signal_cnt++;
6693 mysql_cond_broadcast(&update_cond);
6694 DBUG_VOID_RETURN;
6695 }
6696
6697 /****** transaction coordinator log for 2pc - binlog() based solution ******/
6698
6699 /**
6700 @todo
6701 keep in-memory list of prepared transactions
6702 (add to list in log(), remove on unlog())
6703 and copy it to the new binlog if rotated
6704 but let's check the behaviour of tc_log_page_waits first!
6705 */
6706
open_binlog(const char * opt_name)6707 int MYSQL_BIN_LOG::open_binlog(const char *opt_name)
6708 {
6709 LOG_INFO log_info;
6710 int error= 1;
6711
6712 /*
6713 This function is used for 2pc transaction coordination. Hence, it
6714 is never used for relay logs.
6715 */
6716 DBUG_ASSERT(!is_relay_log);
6717 DBUG_ASSERT(total_ha_2pc > 1 || (1 == total_ha_2pc && opt_bin_log));
6718 DBUG_ASSERT(opt_name && opt_name[0]);
6719
6720 if (!my_b_inited(&index_file))
6721 {
6722 /* There was a failure to open the index file, can't open the binlog */
6723 cleanup();
6724 return 1;
6725 }
6726
6727 if (using_heuristic_recover())
6728 {
6729 /* generate a new binlog to mask a corrupted one */
6730 open_binlog(opt_name, 0, WRITE_CACHE, max_binlog_size, false,
6731 true/*need_lock_log=true*/,
6732 true/*need_lock_index=true*/,
6733 true/*need_sid_lock=true*/,
6734 NULL);
6735 cleanup();
6736 return 1;
6737 }
6738
6739 if ((error= find_log_pos(&log_info, NullS, true/*need_lock_index=true*/)))
6740 {
6741 if (error != LOG_INFO_EOF)
6742 sql_print_error("find_log_pos() failed (error: %d)", error);
6743 else
6744 error= 0;
6745 goto err;
6746 }
6747
6748 {
6749 const char *errmsg;
6750 IO_CACHE log;
6751 File file;
6752 Log_event *ev=0;
6753 Format_description_log_event fdle(BINLOG_VERSION);
6754 char log_name[FN_REFLEN];
6755 my_off_t valid_pos= 0;
6756 my_off_t binlog_size;
6757 MY_STAT s;
6758
6759 if (! fdle.is_valid())
6760 goto err;
6761
6762 do
6763 {
6764 strmake(log_name, log_info.log_file_name, sizeof(log_name)-1);
6765 } while (!(error= find_next_log(&log_info, true/*need_lock_index=true*/)));
6766
6767 if (error != LOG_INFO_EOF)
6768 {
6769 sql_print_error("find_log_pos() failed (error: %d)", error);
6770 goto err;
6771 }
6772
6773 if ((file= open_binlog_file(&log, log_name, &errmsg)) < 0)
6774 {
6775 sql_print_error("%s", errmsg);
6776 goto err;
6777 }
6778
6779 my_stat(log_name, &s, MYF(0));
6780 binlog_size= s.st_size;
6781
6782 if ((ev= Log_event::read_log_event(&log, 0, &fdle,
6783 opt_master_verify_checksum)) &&
6784 ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
6785 ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
6786 {
6787 sql_print_information("Recovering after a crash using %s", opt_name);
6788 valid_pos= my_b_tell(&log);
6789 error= recover(&log, (Format_description_log_event *)ev, &valid_pos);
6790 }
6791 else
6792 error=0;
6793
6794 delete ev;
6795 end_io_cache(&log);
6796 mysql_file_close(file, MYF(MY_WME));
6797
6798 if (error)
6799 goto err;
6800
6801 /* Trim the crashed binlog file to last valid transaction
6802 or event (non-transaction) base on valid_pos. */
6803 if (valid_pos > 0)
6804 {
6805 if ((file= mysql_file_open(key_file_binlog, log_name,
6806 O_RDWR | O_BINARY, MYF(MY_WME))) < 0)
6807 {
6808 sql_print_error("Failed to open the crashed binlog file "
6809 "when master server is recovering it.");
6810 return -1;
6811 }
6812
6813 /* Change binlog file size to valid_pos */
6814 if (valid_pos < binlog_size)
6815 {
6816 if (my_chsize(file, valid_pos, 0, MYF(MY_WME)))
6817 {
6818 sql_print_error("Failed to trim the crashed binlog file "
6819 "when master server is recovering it.");
6820 mysql_file_close(file, MYF(MY_WME));
6821 return -1;
6822 }
6823 else
6824 {
6825 sql_print_information("Crashed binlog file %s size is %llu, "
6826 "but recovered up to %llu. Binlog trimmed to %llu bytes.",
6827 log_name, binlog_size, valid_pos, valid_pos);
6828 }
6829 }
6830
6831 /* Clear LOG_EVENT_BINLOG_IN_USE_F */
6832 my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
6833 uchar flags= 0;
6834 if (mysql_file_pwrite(file, &flags, 1, offset, MYF(0)) != 1)
6835 {
6836 sql_print_error("Failed to clear LOG_EVENT_BINLOG_IN_USE_F "
6837 "for the crashed binlog file when master "
6838 "server is recovering it.");
6839 mysql_file_close(file, MYF(MY_WME));
6840 return -1;
6841 }
6842
6843 mysql_file_close(file, MYF(MY_WME));
6844 } //end if
6845 }
6846
6847 err:
6848 return error;
6849 }
6850
6851 /** This is called on shutdown, after ha_panic. */
close()6852 void MYSQL_BIN_LOG::close()
6853 {
6854 }
6855
6856 /*
6857 Prepare the transaction in the transaction coordinator.
6858
6859 This function will prepare the transaction in the storage engines
6860 (by calling @c ha_prepare_low) what will write a prepare record
6861 to the log buffers.
6862
6863 @retval 0 success
6864 @retval 1 error
6865 */
prepare(THD * thd,bool all)6866 int MYSQL_BIN_LOG::prepare(THD *thd, bool all)
6867 {
6868 DBUG_ENTER("MYSQL_BIN_LOG::prepare");
6869
6870 int error= ha_prepare_low(thd, all);
6871
6872 DBUG_RETURN(error);
6873 }
6874
6875 /**
6876 Commit the transaction in the transaction coordinator.
6877
6878 This function will commit the sessions transaction in the binary log
6879 and in the storage engines (by calling @c ha_commit_low). If the
6880 transaction was successfully logged (or not successfully unlogged)
6881 but the commit in the engines did not succed, there is a risk of
6882 inconsistency between the engines and the binary log.
6883
6884 For binary log group commit, the commit is separated into three
6885 parts:
6886
6887 1. First part consists of filling the necessary caches and
6888 finalizing them (if they need to be finalized). After this,
6889 nothing is added to any of the caches.
6890
6891 2. Second part execute an ordered flush and commit. This will be
6892 done using the group commit functionality in ordered_commit.
6893
6894 3. Third part checks any errors resulting from the ordered commit
6895 and handles them appropriately.
6896
6897 @retval 0 success
6898 @retval 1 error, transaction was neither logged nor committed
6899 @retval 2 error, transaction was logged but not committed
6900 */
commit(THD * thd,bool all)6901 TC_LOG::enum_result MYSQL_BIN_LOG::commit(THD *thd, bool all)
6902 {
6903 DBUG_ENTER("MYSQL_BIN_LOG::commit");
6904
6905 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
6906 my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
6907 int error= RESULT_SUCCESS;
6908 bool stuff_logged= false;
6909 bool binlog_prot_acquired= false;
6910
6911 DBUG_PRINT("enter", ("thd: 0x%llx, all: %s, xid: %llu, cache_mngr: 0x%llx",
6912 (ulonglong) thd, YESNO(all), (ulonglong) xid,
6913 (ulonglong) cache_mngr));
6914
6915 /*
6916 No cache manager means nothing to log, but we still have to commit
6917 the transaction.
6918 */
6919 if (cache_mngr == NULL)
6920 {
6921 if (ha_commit_low(thd, all))
6922 DBUG_RETURN(RESULT_ABORTED);
6923 DBUG_RETURN(RESULT_SUCCESS);
6924 }
6925
6926 /*
6927 Reset binlog_snapshot_% variables for the current connection so that the
6928 current coordinates are shown after committing a consistent snapshot
6929 transaction.
6930 */
6931 if (all)
6932 {
6933 mysql_mutex_lock(&cache_mngr->binlog_info.lock);
6934 cache_mngr->binlog_info.log_file_name[0]= '\0';
6935 mysql_mutex_unlock(&cache_mngr->binlog_info.lock);
6936 }
6937
6938 THD_TRANS *trans= all ? &thd->transaction.all : &thd->transaction.stmt;
6939
6940 DBUG_PRINT("debug", ("in_transaction: %s, no_2pc: %s, rw_ha_count: %d",
6941 YESNO(thd->in_multi_stmt_transaction_mode()),
6942 YESNO(trans->no_2pc),
6943 trans->rw_ha_count));
6944 DBUG_PRINT("debug",
6945 ("all.cannot_safely_rollback(): %s, trx_cache_empty: %s",
6946 YESNO(thd->transaction.all.cannot_safely_rollback()),
6947 YESNO(cache_mngr->trx_cache.is_binlog_empty())));
6948 DBUG_PRINT("debug",
6949 ("stmt.cannot_safely_rollback(): %s, stmt_cache_empty: %s",
6950 YESNO(thd->transaction.stmt.cannot_safely_rollback()),
6951 YESNO(cache_mngr->stmt_cache.is_binlog_empty())));
6952
6953
6954 /*
6955 If there are no handlertons registered, there is nothing to
6956 commit. Note that DDLs are written earlier in this case (inside
6957 binlog_query).
6958
6959 TODO: This can be a problem in those cases that there are no
6960 handlertons registered. DDLs are one example, but the other case
6961 is MyISAM. In this case, we could register a dummy handlerton to
6962 trigger the commit.
6963
6964 Any statement that requires logging will call binlog_query before
6965 trans_commit_stmt, so an alternative is to use the condition
6966 "binlog_query called or stmt.ha_list != 0".
6967 */
6968 if (!all && trans->ha_list == 0 &&
6969 cache_mngr->stmt_cache.is_binlog_empty())
6970 DBUG_RETURN(RESULT_SUCCESS);
6971
6972 /*
6973 If there is anything in the stmt cache, and GTIDs are enabled,
6974 then this is a single statement outside a transaction and it is
6975 impossible that there is anything in the trx cache. Hence, we
6976 write any empty group(s) to the stmt cache.
6977
6978 Otherwise, we write any empty group(s) to the trx cache at the end
6979 of the transaction.
6980 */
6981 if (!cache_mngr->stmt_cache.is_binlog_empty())
6982 {
6983 error= write_empty_groups_to_cache(thd, &cache_mngr->stmt_cache);
6984 if (error == 0)
6985 {
6986 if (cache_mngr->stmt_cache.finalize(thd))
6987 DBUG_RETURN(RESULT_ABORTED);
6988 stuff_logged= true;
6989 }
6990 }
6991
6992 /*
6993 We commit the transaction if:
6994 - We are not in a transaction and committing a statement, or
6995 - We are in a transaction and a full transaction is committed.
6996 Otherwise, we accumulate the changes.
6997 */
6998 if (!error && !cache_mngr->trx_cache.is_binlog_empty() &&
6999 ending_trans(thd, all))
7000 {
7001 const bool real_trans= (all || thd->transaction.all.ha_list == 0);
7002 /*
7003 We are committing an XA transaction if it is a "real" transaction
7004 and have an XID assigned (because some handlerton registered). A
7005 transaction is "real" if either 'all' is true or the 'all.ha_list'
7006 is empty.
7007
7008 Note: This is kind of strange since registering the binlog
7009 handlerton will then make the transaction XA, which is not really
7010 true. This occurs for example if a MyISAM statement is executed
7011 with row-based replication on.
7012 */
7013 if (real_trans && xid && trans->rw_ha_count > 1 && !trans->no_2pc)
7014 {
7015 Xid_log_event end_evt(thd, xid);
7016 if (cache_mngr->trx_cache.finalize(thd, &end_evt))
7017 DBUG_RETURN(RESULT_ABORTED);
7018 }
7019 else
7020 {
7021 Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
7022 true, FALSE, TRUE, 0, TRUE);
7023 if (cache_mngr->trx_cache.finalize(thd, &end_evt))
7024 DBUG_RETURN(RESULT_ABORTED);
7025 }
7026 stuff_logged= true;
7027 }
7028
7029 /*
7030 This is part of the stmt rollback.
7031 */
7032 if (!all)
7033 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
7034
7035 DBUG_PRINT("debug", ("error: %d", error));
7036
7037 if (error)
7038 DBUG_RETURN(RESULT_ABORTED);
7039
7040 /*
7041 Now all the events are written to the caches, so we will commit
7042 the transaction in the engines. This is done using the group
7043 commit logic in ordered_commit, which will return when the
7044 transaction is committed.
7045
7046 If the commit in the engines fail, we still have something logged
7047 to the binary log so we have to report this as a "bad" failure
7048 (failed to commit, but logged something).
7049 */
7050 if (stuff_logged)
7051 {
7052 int rc;
7053
7054 /*
7055 Block binlog updates if there's an active BINLOG lock.
7056
7057 We allow binlog lock owner to commit, assuming it knows what it does. We
7058 also check if protection has not been acquired earlier, which is possible
7059 in slave threads to protect master binlog coordinates.
7060 */
7061 if (!thd->backup_binlog_lock.is_acquired() &&
7062 !thd->backup_binlog_lock.is_protection_acquired())
7063 {
7064 const ulong timeout= thd->variables.lock_wait_timeout;
7065
7066 DBUG_PRINT("debug", ("Acquiring binlog protection lock"));
7067 if (thd->backup_binlog_lock.acquire_protection(thd, MDL_EXPLICIT,
7068 timeout))
7069 {
7070 cache_mngr->stmt_cache.reset();
7071 cache_mngr->trx_cache.reset();
7072
7073 DBUG_RETURN(RESULT_ABORTED);
7074 }
7075
7076 binlog_prot_acquired= true;
7077 }
7078
7079 rc= ordered_commit(thd, all);
7080
7081 if (binlog_prot_acquired)
7082 {
7083 DBUG_PRINT("debug", ("Releasing binlog protection lock"));
7084 thd->backup_binlog_lock.release_protection(thd);
7085 }
7086
7087 if (rc)
7088 DBUG_RETURN(RESULT_INCONSISTENT);
7089 }
7090 else
7091 {
7092 if (ha_commit_low(thd, all))
7093 DBUG_RETURN(RESULT_INCONSISTENT);
7094 }
7095
7096 DBUG_RETURN(error ? RESULT_INCONSISTENT : RESULT_SUCCESS);
7097 }
7098
7099
7100 /**
7101 Flush caches for session.
7102
7103 @note @c set_trans_pos is called with a pointer to the file name
7104 that the binary log currently use and a rotation will change the
7105 contents of the variable.
7106
7107 The position is used when calling the after_flush, after_commit,
7108 and after_rollback hooks, but these have been placed so that they
7109 occur before a rotation is executed.
7110
7111 It is the responsibility of any plugin that use this position to
7112 copy it if they need it after the hook has returned.
7113 */
7114 std::pair<int,my_off_t>
flush_thread_caches(THD * thd)7115 MYSQL_BIN_LOG::flush_thread_caches(THD *thd)
7116 {
7117 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
7118 my_off_t bytes= 0;
7119 bool wrote_xid= false;
7120 int error= cache_mngr->flush(thd, &bytes, &wrote_xid);
7121 if (!error && bytes > 0)
7122 {
7123 /*
7124 Note that set_trans_pos does not copy the file name. See
7125 this function documentation for more info.
7126 */
7127 thd->set_trans_pos(log_file_name, my_b_tell(&log_file));
7128 if (wrote_xid)
7129 inc_prep_xids(thd);
7130 }
7131 DBUG_PRINT("debug", ("bytes: %llu", bytes));
7132 return std::make_pair(error, bytes);
7133 }
7134
7135
7136 /**
7137 Execute the flush stage.
7138
7139 @param total_bytes_var Pointer to variable that will be set to total
7140 number of bytes flushed, or NULL.
7141
7142 @param rotate_var Pointer to variable that will be set to true if
7143 binlog rotation should be performed after releasing locks. If rotate
7144 is not necessary, the variable will not be touched.
7145
7146 @return Error code on error, zero on success
7147 */
7148
7149 int
process_flush_stage_queue(my_off_t * total_bytes_var,bool * rotate_var,THD ** out_queue_var)7150 MYSQL_BIN_LOG::process_flush_stage_queue(my_off_t *total_bytes_var,
7151 bool *rotate_var,
7152 THD **out_queue_var)
7153 {
7154 DBUG_ASSERT(total_bytes_var && rotate_var && out_queue_var);
7155 my_off_t total_bytes= 0;
7156 int flush_error= 1;
7157 mysql_mutex_assert_owner(&LOCK_log);
7158
7159 my_atomic_rwlock_rdlock(&opt_binlog_max_flush_queue_time_lock);
7160 const ulonglong max_udelay= my_atomic_load32(&opt_binlog_max_flush_queue_time);
7161 my_atomic_rwlock_rdunlock(&opt_binlog_max_flush_queue_time_lock);
7162 const ulonglong start_utime= max_udelay > 0 ? my_micro_time() : 0;
7163
7164 /*
7165 First we read the queue until it either is empty or the difference
7166 between the time we started and the current time is too large.
7167
7168 We remember the first thread we unqueued, because this will be the
7169 beginning of the out queue.
7170 */
7171 bool has_more= true;
7172 THD *first_seen= NULL;
7173 while ((max_udelay == 0 || my_micro_time() < start_utime + max_udelay) && has_more)
7174 {
7175 std::pair<bool,THD*> current= stage_manager.pop_front(Stage_manager::FLUSH_STAGE);
7176 std::pair<int,my_off_t> result= flush_thread_caches(current.second);
7177 has_more= current.first;
7178 total_bytes+= result.second;
7179 if (flush_error == 1)
7180 flush_error= result.first;
7181 if (first_seen == NULL)
7182 first_seen= current.second;
7183 }
7184
7185 /*
7186 Either the queue is empty, or we ran out of time. If we ran out of
7187 time, we have to fetch the entire queue (and flush it) since
7188 otherwise the next batch will not have a leader.
7189 */
7190 if (has_more)
7191 {
7192 THD *queue= stage_manager.fetch_queue_for(Stage_manager::FLUSH_STAGE);
7193 for (THD *head= queue ; head ; head = head->next_to_commit)
7194 {
7195 std::pair<int,my_off_t> result= flush_thread_caches(head);
7196 total_bytes+= result.second;
7197 if (flush_error == 1)
7198 flush_error= result.first;
7199 }
7200 if (first_seen == NULL)
7201 first_seen= queue;
7202 }
7203
7204 *out_queue_var= first_seen;
7205 *total_bytes_var= total_bytes;
7206 if (total_bytes > 0 && my_b_tell(&log_file) >= (my_off_t) max_size)
7207 *rotate_var= true;
7208 return flush_error;
7209 }
7210
7211
7212 /**
7213 Commit a sequence of sessions.
7214
7215 This function commit an entire queue of sessions starting with the
7216 session in @c first. If there were an error in the flushing part of
7217 the ordered commit, the error code is passed in and all the threads
7218 are marked accordingly (but not committed).
7219
7220 @see MYSQL_BIN_LOG::ordered_commit
7221
7222 @param thd The "master" thread
7223 @param first First thread in the queue of threads to commit
7224 */
7225
7226 void
process_commit_stage_queue(THD * thd,THD * first)7227 MYSQL_BIN_LOG::process_commit_stage_queue(THD *thd, THD *first)
7228 {
7229 mysql_mutex_assert_owner(&LOCK_commit);
7230 Thread_excursion excursion(thd);
7231 #ifndef DBUG_OFF
7232 thd->transaction.flags.ready_preempt= 1; // formality by the leader
7233 #endif
7234 for (THD *head= first ; head ; head = head->next_to_commit)
7235 {
7236 DBUG_PRINT("debug", ("Thread ID: %lu, commit_error: %d, flags.pending: %s",
7237 head->thread_id, head->commit_error,
7238 YESNO(head->transaction.flags.pending)));
7239 /*
7240 If flushing failed, set commit_error for the session, skip the
7241 transaction and proceed with the next transaction instead. This
7242 will mark all threads as failed, since the flush failed.
7243
7244 If flush succeeded, attach to the session and commit it in the
7245 engines.
7246 */
7247 #ifndef DBUG_OFF
7248 stage_manager.clear_preempt_status(head);
7249 #endif
7250 /*
7251 Flush/Sync error should be ignored and continue
7252 to commit phase. And thd->commit_error cannot be
7253 COMMIT_ERROR at this moment.
7254 */
7255 DBUG_ASSERT(head->commit_error != THD::CE_COMMIT_ERROR);
7256 excursion.try_to_attach_to(head);
7257 bool all= head->transaction.flags.real_commit;
7258 if (head->transaction.flags.commit_low)
7259 {
7260 /* head is parked to have exited append() */
7261 DBUG_ASSERT(head->transaction.flags.ready_preempt);
7262 /*
7263 storage engine commit
7264 */
7265 if (ha_commit_low(head, all, false))
7266 head->commit_error= THD::CE_COMMIT_ERROR;
7267 }
7268 DBUG_PRINT("debug", ("commit_error: %d, flags.pending: %s",
7269 head->commit_error,
7270 YESNO(head->transaction.flags.pending)));
7271 /*
7272 Decrement the prepared XID counter after storage engine commit.
7273 We also need decrement the prepared XID when encountering a
7274 flush error or session attach error for avoiding 3-way deadlock
7275 among user thread, rotate thread and dump thread.
7276 */
7277 if (head->transaction.flags.xid_written)
7278 dec_prep_xids(head);
7279 }
7280 }
7281
7282 /**
7283 Process after commit for a sequence of sessions.
7284
7285 @param thd The "master" thread
7286 @param first First thread in the queue of threads to commit
7287 */
7288
7289 void
process_after_commit_stage_queue(THD * thd,THD * first)7290 MYSQL_BIN_LOG::process_after_commit_stage_queue(THD *thd, THD *first)
7291 {
7292 Thread_excursion excursion(thd);
7293 for (THD *head= first; head; head= head->next_to_commit)
7294 {
7295 if (head->transaction.flags.run_hooks &&
7296 head->commit_error != THD::CE_COMMIT_ERROR)
7297 {
7298
7299 /*
7300 TODO: This hook here should probably move outside/below this
7301 if and be the only after_commit invocation left in the
7302 code.
7303 */
7304 excursion.try_to_attach_to(head);
7305 bool all= head->transaction.flags.real_commit;
7306 (void) RUN_HOOK(transaction, after_commit, (head, all));
7307 /*
7308 When after_commit finished for the transaction, clear the run_hooks flag.
7309 This allow other parts of the system to check if after_commit was called.
7310 */
7311 head->transaction.flags.run_hooks= false;
7312 }
7313 }
7314 }
7315
7316 #ifndef DBUG_OFF
7317 /** Names for the stages. */
7318 static const char* g_stage_name[] = {
7319 "FLUSH",
7320 "SYNC",
7321 "COMMIT",
7322 };
7323 #endif
7324
7325
7326 /**
7327 Enter a stage of the ordered commit procedure.
7328
7329 Entering is stage is done by:
7330
7331 - Atomically enqueueing a queue of processes (which is just one for
7332 the first phase).
7333
7334 - If the queue was empty, the thread is the leader for that stage
7335 and it should process the entire queue for that stage.
7336
7337 - If the queue was not empty, the thread is a follower and can go
7338 waiting for the commit to finish.
7339
7340 The function will lock the stage mutex if it was designated the
7341 leader for the phase.
7342
7343 @param thd Session structure
7344 @param stage The stage to enter
7345 @param queue Queue of threads to enqueue for the stage
7346 @param stage_mutex Mutex for the stage
7347
7348 @retval true The thread should "bail out" and go waiting for the
7349 commit to finish
7350 @retval false The thread is the leader for the stage and should do
7351 the processing.
7352 */
7353
7354 bool
change_stage(THD * thd,Stage_manager::StageID stage,THD * queue,mysql_mutex_t * leave_mutex,mysql_mutex_t * enter_mutex)7355 MYSQL_BIN_LOG::change_stage(THD *thd,
7356 Stage_manager::StageID stage, THD *queue,
7357 mysql_mutex_t *leave_mutex,
7358 mysql_mutex_t *enter_mutex)
7359 {
7360 DBUG_ENTER("MYSQL_BIN_LOG::change_stage");
7361 DBUG_PRINT("enter", ("thd: 0x%llx, stage: %s, queue: 0x%llx",
7362 (ulonglong) thd, g_stage_name[stage], (ulonglong) queue));
7363 DBUG_ASSERT(0 <= stage && stage < Stage_manager::STAGE_COUNTER);
7364 DBUG_ASSERT(enter_mutex);
7365 DBUG_ASSERT(queue);
7366 /*
7367 enroll_for will release the leave_mutex once the sessions are
7368 queued.
7369 */
7370 if (!stage_manager.enroll_for(stage, queue, leave_mutex))
7371 {
7372 DBUG_ASSERT(!thd_get_cache_mngr(thd)->dbug_any_finalized());
7373 DBUG_RETURN(true);
7374 }
7375 mysql_mutex_lock(enter_mutex);
7376 DBUG_RETURN(false);
7377 }
7378
7379
7380
7381 /**
7382 Flush the I/O cache to file.
7383
7384 Flush the binary log to the binlog file if any byte where written
7385 and signal that the binary log file has been updated if the flush
7386 succeeds.
7387 */
7388
7389 int
flush_cache_to_file(my_off_t * end_pos_var)7390 MYSQL_BIN_LOG::flush_cache_to_file(my_off_t *end_pos_var)
7391 {
7392 if (flush_io_cache(&log_file))
7393 {
7394 THD *thd= current_thd;
7395 thd->commit_error= THD::CE_FLUSH_ERROR;
7396 return ER_ERROR_ON_WRITE;
7397 }
7398 *end_pos_var= my_b_tell(&log_file);
7399 return 0;
7400 }
7401
7402
7403 /**
7404 Call fsync() to sync the file to disk.
7405 */
7406 std::pair<bool, bool>
sync_binlog_file(bool force)7407 MYSQL_BIN_LOG::sync_binlog_file(bool force)
7408 {
7409 bool synced= false;
7410 unsigned int sync_period= get_sync_period();
7411 if (force || (sync_period && ++sync_counter >= sync_period))
7412 {
7413 sync_counter= 0;
7414
7415 /**
7416 On *pure non-transactional* workloads there is a small window
7417 in time where a concurrent rotate might be able to close
7418 the file before the sync is actually done. In that case,
7419 ignore the bad file descriptor errors.
7420
7421 Transactional workloads (InnoDB) are not affected since the
7422 the rotation will not happen until all transactions have
7423 committed to the storage engine, thence decreased the XID
7424 counters.
7425
7426 TODO: fix this properly even for non-transactional storage
7427 engines.
7428 */
7429 if (DBUG_EVALUATE_IF("simulate_error_during_sync_binlog_file", 1,
7430 mysql_file_sync(log_file.file,
7431 MYF(MY_WME | MY_IGNORE_BADFD))))
7432 {
7433 THD *thd= current_thd;
7434 thd->commit_error= THD::CE_SYNC_ERROR;
7435 return std::make_pair(true, synced);
7436 }
7437 synced= true;
7438 }
7439 return std::make_pair(false, synced);
7440 }
7441
7442
7443 /**
7444 Helper function executed when leaving @c ordered_commit.
7445
7446 This function contain the necessary code for fetching the error
7447 code, doing post-commit checks, and wrapping up the commit if
7448 necessary.
7449
7450 It is typically called when enter_stage indicates that the thread
7451 should bail out, and also when the ultimate leader thread finishes
7452 executing @c ordered_commit.
7453
7454 It is typically used in this manner:
7455 @code
7456 if (enter_stage(thd, Thread_queue::FLUSH_STAGE, thd, &LOCK_log))
7457 return finish_commit(thd);
7458 @endcode
7459
7460 @return Error code if the session commit failed, or zero on
7461 success.
7462 */
7463 int
finish_commit(THD * thd)7464 MYSQL_BIN_LOG::finish_commit(THD *thd)
7465 {
7466 /*
7467 In some unlikely situations, it can happen that binary
7468 log is closed before the thread flushes it's cache.
7469 In that case, clear the caches before doing commit.
7470 */
7471 if (unlikely(!is_open()))
7472 {
7473 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
7474 if (cache_mngr)
7475 cache_mngr->reset();
7476 }
7477 if (thd->transaction.flags.commit_low)
7478 {
7479 const bool all= thd->transaction.flags.real_commit;
7480 /*
7481 storage engine commit
7482 */
7483 DBUG_ASSERT(thd->commit_error != THD::CE_COMMIT_ERROR);
7484 if (thd->commit_error == THD::CE_NONE)
7485 {
7486 /*
7487 Acquire a shared lock to block commits if an X lock has been acquired by
7488 LOCK TABLES FOR BACKUP or START TRANSACTION WITH CONSISTENT SNAPSHOT. We
7489 only reach this code if binlog_order_commits=0.
7490 */
7491 DBUG_ASSERT(opt_binlog_order_commits == 0);
7492
7493 slock();
7494
7495 if (ha_commit_low(thd, all, false))
7496 thd->commit_error= THD::CE_COMMIT_ERROR;
7497
7498 sunlock();
7499 }
7500 /*
7501 Decrement the prepared XID counter after storage engine commit
7502 */
7503 if (thd->transaction.flags.xid_written)
7504 dec_prep_xids(thd);
7505 /*
7506 If commit succeeded, we call the after_commit hook
7507
7508 TODO: This hook here should probably move outside/below this
7509 if and be the only after_commit invocation left in the
7510 code.
7511 */
7512 if ((thd->commit_error != THD::CE_COMMIT_ERROR ) && thd->transaction.flags.run_hooks)
7513 {
7514 (void) RUN_HOOK(transaction, after_commit, (thd, all));
7515 thd->transaction.flags.run_hooks= false;
7516 }
7517 }
7518 else if (thd->transaction.flags.xid_written)
7519 dec_prep_xids(thd);
7520
7521 /*
7522 Remove committed GTID from owned_gtids, it was already logged on
7523 MYSQL_BIN_LOG::write_cache().
7524 */
7525 global_sid_lock->rdlock();
7526 gtid_state->update_on_commit(thd);
7527 global_sid_lock->unlock();
7528
7529 DBUG_ASSERT(thd->commit_error || !thd->transaction.flags.run_hooks);
7530 DBUG_ASSERT(!thd_get_cache_mngr(thd)->dbug_any_finalized());
7531 DBUG_PRINT("return", ("Thread ID: %lu, commit_error: %d",
7532 thd->thread_id, thd->commit_error));
7533 /*
7534 flush or sync errors are handled by the leader of the group
7535 (using binlog_error_action). Hence treat only COMMIT_ERRORs as errors.
7536 */
7537 return (thd->commit_error == THD::CE_COMMIT_ERROR);
7538 }
7539
7540 /**
7541 Helper function to handle flush or sync stage errors.
7542 If binlog_error_action= ABORT_SERVER, server will be aborted
7543 after reporting the error to the client.
7544 If binlog_error_action= IGNORE_ERROR, binlog will be closed
7545 for the life time of the server. close() call is protected
7546 with LOCK_log to avoid any parallel operations on binary log.
7547
7548 @param thd Thread object that faced flush/sync error
7549 @param need_lock_log
7550 > Indicates true if LOCk_log is needed before closing
7551 binlog (happens when we are handling sync error)
7552 > Indicates false if LOCK_log is already acquired
7553 by the thread (happens when we are handling flush
7554 error)
7555
7556 @return void
7557 */
handle_binlog_flush_or_sync_error(THD * thd,bool need_lock_log)7558 void MYSQL_BIN_LOG::handle_binlog_flush_or_sync_error(THD *thd,
7559 bool need_lock_log)
7560 {
7561 char errmsg[MYSQL_ERRMSG_SIZE];
7562 sprintf(errmsg, "An error occurred during %s stage of the commit. "
7563 "'binlog_error_action' is set to '%s'.",
7564 thd->commit_error== THD::CE_FLUSH_ERROR ? "flush" : "sync",
7565 binlog_error_action == ABORT_SERVER ? "ABORT_SERVER" : "IGNORE_ERROR");
7566 if (binlog_error_action == ABORT_SERVER)
7567 {
7568 static const char format_err[]= "%s Hence aborting the server.";
7569 char err_buff[MYSQL_ERRMSG_SIZE + sizeof(format_err)];
7570 snprintf(err_buff, sizeof(err_buff), format_err, errmsg);
7571 exec_binlog_error_action_abort(err_buff);
7572 }
7573 else
7574 {
7575 DEBUG_SYNC(thd, "before_binlog_closed_due_to_error");
7576 if (need_lock_log)
7577 mysql_mutex_lock(&LOCK_log);
7578 else
7579 mysql_mutex_assert_owner(&LOCK_log);
7580 /*
7581 It can happen that other group leader encountered
7582 error and already closed the binary log. So print
7583 error only if it is in open state. But we should
7584 call close() always just in case if the previous
7585 close did not close index file.
7586 */
7587 if (is_open())
7588 {
7589 sql_print_error("%s Hence turning logging off for the whole duration "
7590 "of the MySQL server process. To turn it on again: fix "
7591 "the cause, shutdown the MySQL server and restart it.",
7592 errmsg);
7593 }
7594 close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT, false/*need_lock_log=false*/,
7595 true/*need_lock_index=true*/);
7596 /*
7597 If there is a write error (flush/sync stage) and if
7598 binlog_error_action=IGNORE_ERROR, clear the error
7599 and allow the commit to happen in storage engine.
7600 */
7601 if (check_write_error(thd))
7602 thd->clear_error();
7603
7604 if (need_lock_log)
7605 mysql_mutex_unlock(&LOCK_log);
7606 DEBUG_SYNC(thd, "after_binlog_closed_due_to_error");
7607 }
7608 }
7609 /**
7610 Flush and commit the transaction.
7611
7612 This will execute an ordered flush and commit of all outstanding
7613 transactions and is the main function for the binary log group
7614 commit logic. The function performs the ordered commit in two
7615 phases.
7616
7617 The first phase flushes the caches to the binary log and under
7618 LOCK_log and marks all threads that were flushed as not pending.
7619
7620 The second phase executes under LOCK_commit and commits all
7621 transactions in order.
7622
7623 The procedure is:
7624
7625 1. Queue ourselves for flushing.
7626 2. Grab the log lock, which might result is blocking if the mutex is
7627 already held by another thread.
7628 3. If we were not committed while waiting for the lock
7629 1. Fetch the queue
7630 2. For each thread in the queue:
7631 a. Attach to it
7632 b. Flush the caches, saving any error code
7633 3. Flush and sync (depending on the value of sync_binlog).
7634 4. Signal that the binary log was updated
7635 4. Release the log lock
7636 5. Grab the commit lock
7637 1. For each thread in the queue:
7638 a. If there were no error when flushing and the transaction shall be committed:
7639 - Commit the transaction, saving the result of executing the commit.
7640 6. Release the commit lock
7641 7. Call purge, if any of the committed thread requested a purge.
7642 8. Return with the saved error code
7643
7644 @todo The use of @c skip_commit is a hack that we use since the @c
7645 TC_LOG Interface does not contain functions to handle
7646 savepoints. Once the binary log is eliminated as a handlerton and
7647 the @c TC_LOG interface is extended with savepoint handling, this
7648 parameter can be removed.
7649
7650 @param thd Session to commit transaction for
7651 @param all This is @c true if this is a real transaction commit, and
7652 @c false otherwise.
7653 @param skip_commit
7654 This is @c true if the call to @c ha_commit_low should
7655 be skipped (it is handled by the caller somehow) and @c
7656 false otherwise (the normal case).
7657 */
ordered_commit(THD * thd,bool all,bool skip_commit)7658 int MYSQL_BIN_LOG::ordered_commit(THD *thd, bool all, bool skip_commit)
7659 {
7660 DBUG_ENTER("MYSQL_BIN_LOG::ordered_commit");
7661 int flush_error= 0, sync_error= 0;
7662 my_off_t total_bytes= 0;
7663 bool do_rotate= false;
7664
7665 /*
7666 These values are used while flushing a transaction, so clear
7667 everything.
7668
7669 Notes:
7670
7671 - It would be good if we could keep transaction coordinator
7672 log-specific data out of the THD structure, but that is not the
7673 case right now.
7674
7675 - Everything in the transaction structure is reset when calling
7676 ha_commit_low since that calls st_transaction::cleanup.
7677 */
7678 thd->transaction.flags.pending= true;
7679 thd->commit_error= THD::CE_NONE;
7680 thd->next_to_commit= NULL;
7681 thd->durability_property= HA_IGNORE_DURABILITY;
7682 thd->transaction.flags.real_commit= all;
7683 thd->transaction.flags.xid_written= false;
7684 thd->transaction.flags.commit_low= !skip_commit;
7685 thd->transaction.flags.run_hooks= !skip_commit;
7686 #ifndef DBUG_OFF
7687 /*
7688 The group commit Leader may have to wait for follower whose transaction
7689 is not ready to be preempted. Initially the status is pessimistic.
7690 Preemption guarding logics is necessary only when DBUG_ON is set.
7691 It won't be required for the dbug-off case as long as the follower won't
7692 execute any thread-specific write access code in this method, which is
7693 the case as of current.
7694 */
7695 thd->transaction.flags.ready_preempt= 0;
7696 #endif
7697
7698 DBUG_PRINT("enter", ("flags.pending: %s, commit_error: %d, thread_id: %lu",
7699 YESNO(thd->transaction.flags.pending),
7700 thd->commit_error, thd->thread_id));
7701
7702 /*
7703 Stage #1: flushing transactions to binary log
7704
7705 While flushing, we allow new threads to enter and will process
7706 them in due time. Once the queue was empty, we cannot reap
7707 anything more since it is possible that a thread entered and
7708 appointed itself leader for the flush phase.
7709 */
7710 DEBUG_SYNC(thd, "waiting_to_enter_flush_stage");
7711 if (change_stage(thd, Stage_manager::FLUSH_STAGE, thd, NULL, &LOCK_log))
7712 {
7713 DBUG_PRINT("return", ("Thread ID: %lu, commit_error: %d",
7714 thd->thread_id, thd->commit_error));
7715 DBUG_RETURN(finish_commit(thd));
7716 }
7717
7718 THD *wait_queue= NULL, *final_queue= NULL;
7719 mysql_mutex_t *leave_mutex_before_commit_stage= NULL;
7720 my_off_t flush_end_pos= 0;
7721 bool need_LOCK_log;
7722 if (unlikely(!is_open()))
7723 {
7724 final_queue= stage_manager.fetch_queue_for(Stage_manager::FLUSH_STAGE);
7725 leave_mutex_before_commit_stage= &LOCK_log;
7726 /*
7727 binary log is closed, flush stage and sync stage should be
7728 ignored. Binlog cache should be cleared, but instead of doing
7729 it here, do that work in 'finish_commit' function so that
7730 leader and followers thread caches will be cleared.
7731 */
7732 goto commit_stage;
7733 }
7734 DEBUG_SYNC(thd, "waiting_in_the_middle_of_flush_stage");
7735 flush_error= process_flush_stage_queue(&total_bytes, &do_rotate,
7736 &wait_queue);
7737
7738 if (flush_error == 0 && total_bytes > 0)
7739 flush_error= flush_cache_to_file(&flush_end_pos);
7740
7741 /*
7742 If the flush finished successfully, we can call the after_flush
7743 hook. Being invoked here, we have the guarantee that the hook is
7744 executed before the before/after_send_hooks on the dump thread
7745 preventing race conditions among these plug-ins.
7746 */
7747 if (flush_error == 0)
7748 {
7749 const char *file_name_ptr= log_file_name + dirname_length(log_file_name);
7750 DBUG_ASSERT(flush_end_pos != 0);
7751 if (RUN_HOOK(binlog_storage, after_flush,
7752 (thd, file_name_ptr, flush_end_pos)))
7753 {
7754 sql_print_error("Failed to run 'after_flush' hooks");
7755 flush_error= ER_ERROR_ON_WRITE;
7756 }
7757
7758 signal_update();
7759 DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
7760 }
7761
7762 if (flush_error)
7763 {
7764 /*
7765 Handle flush error (if any) after leader finishes it's flush stage.
7766 */
7767 handle_binlog_flush_or_sync_error(thd, false /* need_lock_log */);
7768 }
7769
7770 publish_coordinates_for_global_status();
7771
7772 /*
7773 Stage #2: Syncing binary log file to disk
7774 */
7775 need_LOCK_log= (get_sync_period() == 1);
7776
7777 /*
7778 LOCK_log is not released when sync_binlog is 1. It guarantees that the
7779 events are not be replicated by dump threads before they are synced to disk.
7780 */
7781 if (change_stage(thd, Stage_manager::SYNC_STAGE, wait_queue,
7782 need_LOCK_log ? NULL : &LOCK_log, &LOCK_sync))
7783 {
7784 DBUG_PRINT("return", ("Thread ID: %lu, commit_error: %d",
7785 thd->thread_id, thd->commit_error));
7786 DBUG_RETURN(finish_commit(thd));
7787 }
7788 final_queue= stage_manager.fetch_queue_for(Stage_manager::SYNC_STAGE);
7789 if (flush_error == 0 && total_bytes > 0)
7790 {
7791 DEBUG_SYNC(thd, "before_sync_binlog_file");
7792 std::pair<bool, bool> result= sync_binlog_file(false);
7793 sync_error= result.first;
7794 }
7795
7796 if (need_LOCK_log)
7797 mysql_mutex_unlock(&LOCK_log);
7798 leave_mutex_before_commit_stage= &LOCK_sync;
7799 /*
7800 Stage #3: Commit all transactions in order.
7801
7802 This stage is skipped if we do not need to order the commits and
7803 each thread have to execute the handlerton commit instead.
7804
7805 Howver, since we are keeping the lock from the previous stage, we
7806 need to unlock it if we skip the stage.
7807 */
7808 commit_stage:
7809 /*
7810 We are delaying the handling of sync error until
7811 all locks are released but we should not enter into
7812 commit stage if binlog_error_action is ABORT_SERVER.
7813 */
7814 if (opt_binlog_order_commits &&
7815 (sync_error == 0 || binlog_error_action != ABORT_SERVER))
7816 {
7817 if (change_stage(thd, Stage_manager::COMMIT_STAGE,
7818 final_queue, leave_mutex_before_commit_stage,
7819 &LOCK_commit))
7820 {
7821 DBUG_PRINT("return", ("Thread ID: %lu, commit_error: %d",
7822 thd->thread_id, thd->commit_error));
7823 DBUG_RETURN(finish_commit(thd));
7824 }
7825 THD *commit_queue= stage_manager.fetch_queue_for(Stage_manager::COMMIT_STAGE);
7826 DBUG_EXECUTE_IF("semi_sync_3-way_deadlock",
7827 DEBUG_SYNC(thd, "before_process_commit_stage_queue"););
7828 process_commit_stage_queue(thd, commit_queue);
7829 mysql_mutex_unlock(&LOCK_commit);
7830 /*
7831 Process after_commit after LOCK_commit is released for avoiding
7832 3-way deadlock among user thread, rotate thread and dump thread.
7833 */
7834 process_after_commit_stage_queue(thd, commit_queue);
7835 final_queue= commit_queue;
7836 }
7837 else if (leave_mutex_before_commit_stage)
7838 mysql_mutex_unlock(leave_mutex_before_commit_stage);
7839
7840 /*
7841 Handle sync error after we release all locks in order to avoid deadlocks
7842 */
7843 if (sync_error)
7844 handle_binlog_flush_or_sync_error(thd, true /* need_lock_log */);
7845
7846 /* Commit done so signal all waiting threads */
7847 stage_manager.signal_done(final_queue);
7848
7849 /*
7850 Finish the commit before executing a rotate, or run the risk of a
7851 deadlock. We don't need the return value here since it is in
7852 thd->commit_error, which is returned below.
7853 */
7854 (void) finish_commit(thd);
7855
7856 /*
7857 If we need to rotate, we do it without commit error.
7858 Otherwise the thd->commit_error will be possibly reset.
7859 */
7860 if (DBUG_EVALUATE_IF("force_rotate", 1, 0) ||
7861 (do_rotate && thd->commit_error == THD::CE_NONE))
7862 {
7863 /*
7864 Do not force the rotate as several consecutive groups may
7865 request unnecessary rotations.
7866
7867 NOTE: Run purge_logs wo/ holding LOCK_log because it does not
7868 need the mutex. Otherwise causes various deadlocks.
7869 */
7870
7871 DEBUG_SYNC(thd, "ready_to_do_rotation");
7872 bool check_purge= false;
7873 mysql_mutex_lock(&LOCK_log);
7874 /*
7875 If rotate fails then depends on binlog_error_action variable
7876 appropriate action will be taken inside rotate call.
7877 */
7878 int error= rotate(false, &check_purge);
7879 mysql_mutex_unlock(&LOCK_log);
7880
7881 if (error)
7882 thd->commit_error= THD::CE_COMMIT_ERROR;
7883 else if (check_purge)
7884 purge();
7885 }
7886 /*
7887 flush or sync errors are handled above (using binlog_error_action).
7888 Hence treat only COMMIT_ERRORs as errors.
7889 */
7890 DBUG_RETURN(thd->commit_error == THD::CE_COMMIT_ERROR);
7891 }
7892
7893
7894 /**
7895 MYSQLD server recovers from last crashed binlog.
7896
7897 @param log IO_CACHE of the crashed binlog.
7898 @param fdle Format_description_log_event of the crashed binlog.
7899 @param valid_pos The position of the last valid transaction or
7900 event(non-transaction) of the crashed binlog.
7901
7902 @retval
7903 0 ok
7904 @retval
7905 1 error
7906 */
recover(IO_CACHE * log,Format_description_log_event * fdle,my_off_t * valid_pos)7907 int MYSQL_BIN_LOG::recover(IO_CACHE *log, Format_description_log_event *fdle,
7908 my_off_t *valid_pos)
7909 {
7910 Log_event *ev;
7911 HASH xids;
7912 MEM_ROOT mem_root;
7913 /*
7914 The flag is used for handling the case that a transaction
7915 is partially written to the binlog.
7916 */
7917 bool in_transaction= FALSE;
7918
7919 if (! fdle->is_valid() ||
7920 my_hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
7921 sizeof(my_xid), 0, 0, MYF(0)))
7922 goto err1;
7923
7924 init_alloc_root(&mem_root, TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE);
7925
7926 while ((ev= Log_event::read_log_event(log, 0, fdle, TRUE))
7927 && ev->is_valid())
7928 {
7929 if (ev->get_type_code() == QUERY_EVENT &&
7930 !strcmp(((Query_log_event*)ev)->query, "BEGIN"))
7931 in_transaction= TRUE;
7932
7933 if (ev->get_type_code() == QUERY_EVENT &&
7934 !strcmp(((Query_log_event*)ev)->query, "COMMIT"))
7935 {
7936 DBUG_ASSERT(in_transaction == TRUE);
7937 in_transaction= FALSE;
7938 }
7939 else if (ev->get_type_code() == XID_EVENT)
7940 {
7941 DBUG_ASSERT(in_transaction == TRUE);
7942 in_transaction= FALSE;
7943 Xid_log_event *xev=(Xid_log_event *)ev;
7944 uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
7945 sizeof(xev->xid));
7946 if (!x || my_hash_insert(&xids, x))
7947 goto err2;
7948 }
7949
7950 /*
7951 Recorded valid position for the crashed binlog file
7952 which did not contain incorrect events. The following
7953 positions increase the variable valid_pos:
7954
7955 1 -
7956 ...
7957 <---> HERE IS VALID <--->
7958 GTID
7959 BEGIN
7960 ...
7961 COMMIT
7962 ...
7963
7964 2 -
7965 ...
7966 <---> HERE IS VALID <--->
7967 GTID
7968 DDL/UTILITY
7969 ...
7970
7971 In other words, the following positions do not increase
7972 the variable valid_pos:
7973
7974 1 -
7975 GTID
7976 <---> HERE IS VALID <--->
7977 ...
7978
7979 2 -
7980 GTID
7981 BEGIN
7982 <---> HERE IS VALID <--->
7983 ...
7984 */
7985 if (!log->error && !in_transaction &&
7986 !is_gtid_event(ev))
7987 *valid_pos= my_b_tell(log);
7988
7989 delete ev;
7990 }
7991
7992 if (ha_recover(&xids))
7993 goto err2;
7994
7995 free_root(&mem_root, MYF(0));
7996 my_hash_free(&xids);
7997 return 0;
7998
7999 err2:
8000 free_root(&mem_root, MYF(0));
8001 my_hash_free(&xids);
8002 err1:
8003 sql_print_error("Crash recovery failed. Either correct the problem "
8004 "(if it's, for example, out of memory error) and restart, "
8005 "or delete (or rename) binary log and start mysqld with "
8006 "--tc-heuristic-recover={commit|rollback}");
8007 return 1;
8008 }
8009
8010 /*
8011 Copy out the non-directory part of binlog position filename for the
8012 `binlog_snapshot_file' status variable, same way as it is done for
8013 SHOW MASTER STATUS.
8014 */
set_binlog_snapshot_file(const char * src)8015 static void set_binlog_snapshot_file(const char *src)
8016 {
8017 mysql_mutex_assert_owner(&LOCK_status);
8018
8019 int dir_len = dirname_length(src);
8020 strmake(binlog_snapshot_file, src + dir_len,
8021 sizeof(binlog_snapshot_file) - 1);
8022 }
8023
8024
8025 /** Copy the current binlog coordinates to the variables used for the
8026 not-in-consistent-snapshot case of SHOW STATUS */
publish_coordinates_for_global_status(void) const8027 void MYSQL_BIN_LOG::publish_coordinates_for_global_status(void) const
8028 {
8029 mysql_mutex_assert_owner(&LOCK_log);
8030
8031 mysql_mutex_lock(&LOCK_status);
8032 strcpy(binlog_global_snapshot_file, log_file_name);
8033 binlog_global_snapshot_position= my_b_tell(&log_file);
8034 mysql_mutex_unlock(&LOCK_status);
8035 }
8036
8037
xlock(void)8038 void MYSQL_BIN_LOG::xlock(void)
8039 {
8040 mysql_mutex_lock(&LOCK_log);
8041
8042 DBUG_ASSERT(!snapshot_lock_acquired);
8043
8044 /*
8045 We must ensure that no writes to binlog and no commits to storage engines
8046 occur after function is called for START TRANSACTION FOR CONSISTENT
8047 SNAPSHOT. With binlog_order_commits=1 (the default) flushing to binlog is
8048 performed under the LOCK_log mutex and commits are done under the
8049 LOCK_commit mutex, both in the stage leader thread. So acquiring those 2
8050 mutexes is sufficient to guarantee atomicity.
8051
8052 With binlog_order_commits=0 commits are performed in parallel by separate
8053 threads with each acquiring a shared lock on LOCK_consistent_snapshot.
8054
8055 binlog_order_commits is a dynamic variable, so we have to keep track what
8056 primitives should be used in xunlock().
8057 */
8058 if (opt_binlog_order_commits)
8059 {
8060 mysql_mutex_lock(&LOCK_commit);
8061 }
8062 else
8063 {
8064 snapshot_lock_acquired= true;
8065 mysql_rwlock_wrlock(&LOCK_consistent_snapshot);
8066 }
8067 }
8068
8069
xunlock(void)8070 void MYSQL_BIN_LOG::xunlock(void)
8071 {
8072 if (!snapshot_lock_acquired)
8073 {
8074 mysql_mutex_unlock(&LOCK_commit);
8075 }
8076 else
8077 {
8078 mysql_rwlock_unlock(&LOCK_consistent_snapshot);
8079 snapshot_lock_acquired= false;
8080 }
8081
8082 mysql_mutex_unlock(&LOCK_log);
8083 }
8084
report_missing_purged_gtids(const Gtid_set * slave_executed_gtid_set,const char ** errmsg)8085 void MYSQL_BIN_LOG::report_missing_purged_gtids(const Gtid_set* slave_executed_gtid_set,
8086 const char** errmsg)
8087 {
8088 DBUG_ENTER("MYSQL_BIN_LOG::report_missing_purged_gtids");
8089 THD *thd= current_thd;
8090 Gtid_set gtid_missing(gtid_state->get_lost_gtids()->get_sid_map());
8091 gtid_missing.add_gtid_set(gtid_state->get_lost_gtids());
8092 gtid_missing.remove_gtid_set(slave_executed_gtid_set);
8093
8094 String tmp_uuid;
8095 uchar name[]= "slave_uuid";
8096
8097 /* Protects thd->user_vars. */
8098 mysql_mutex_lock(&thd->LOCK_thd_data);
8099 user_var_entry *entry=
8100 (user_var_entry*) my_hash_search(&thd->user_vars, name, sizeof(name)-1);
8101 if (entry && entry->length() > 0)
8102 tmp_uuid.copy(entry->ptr(), entry->length(), NULL);
8103 mysql_mutex_unlock(&thd->LOCK_thd_data);
8104
8105
8106 char* missing_gtids= NULL;
8107 char* slave_executed_gtids= NULL;
8108 gtid_missing.to_string(&missing_gtids, NULL);
8109 slave_executed_gtid_set->to_string(&slave_executed_gtids, NULL);
8110
8111 /*
8112 Log the information about the missing purged GTIDs to the error log
8113 if the message is less than MAX_LOG_BUFFER_SIZE.
8114 */
8115 std::ostringstream log_info;
8116 log_info << "The missing transactions are '"<< missing_gtids <<"'";
8117 const char* log_msg= ER(ER_FOUND_MISSING_GTIDS);
8118
8119 /* Don't consider the "%s" in the format string. Subtract 2 from the
8120 total length */
8121 int total_length= (strlen(log_msg) - 2 + log_info.str().length());
8122
8123 DBUG_EXECUTE_IF("simulate_long_missing_gtids",
8124 { total_length= MAX_LOG_BUFFER_SIZE + 1;});
8125
8126 if (total_length > MAX_LOG_BUFFER_SIZE)
8127 log_info.str("To find the missing purged transactions, run \"SELECT"
8128 " @@GLOBAL.GTID_PURGED\" on the master, then run \"SHOW"
8129 " SLAVE STATUS\" on the slave for the Retrieved_Gtid_Set,"
8130 " and then run \"SELECT GTID_SUBTRACT(<master_set>,"
8131 " <slave_set>)\" on any server");
8132
8133 sql_print_warning(ER_THD(thd, ER_FOUND_MISSING_GTIDS), tmp_uuid.ptr(),
8134 log_info.str().c_str());
8135
8136 /*
8137 Send the information about the slave executed GTIDs and missing
8138 purged GTIDs to slave if the message is less than MYSQL_ERRMSG_SIZE.
8139 */
8140 std::ostringstream gtid_info;
8141 gtid_info << "The GTID set sent by the slave is '" << slave_executed_gtids
8142 << "', and the missing transactions are '"<< missing_gtids <<"'";
8143 *errmsg= ER_THD(thd, ER_MASTER_HAS_PURGED_REQUIRED_GTIDS);
8144
8145 /* Don't consider the "%s" in the format string. Subtract 2 from the
8146 total length */
8147 total_length= (strlen(*errmsg) - 2 + gtid_info.str().length());
8148
8149 DBUG_EXECUTE_IF("simulate_long_missing_gtids",
8150 { total_length= MYSQL_ERRMSG_SIZE + 1;});
8151
8152 if (total_length > MYSQL_ERRMSG_SIZE)
8153 gtid_info.str("The GTID sets and the missing purged transactions are too"
8154 " long to print in this message. For more information,"
8155 " please see the master's error log or the manual for"
8156 " GTID_SUBTRACT");
8157
8158 /* Buffer for formatting the message about the missing GTIDs. */
8159 static char buff[MYSQL_ERRMSG_SIZE];
8160 my_snprintf(buff, MYSQL_ERRMSG_SIZE, *errmsg, gtid_info.str().c_str());
8161 *errmsg= const_cast<const char*>(buff);
8162
8163 my_free(missing_gtids);
8164 my_free(slave_executed_gtids);
8165 DBUG_VOID_RETURN;
8166 }
8167
report_missing_gtids(const Gtid_set * previous_gtid_set,const Gtid_set * slave_executed_gtid_set,const char ** errmsg)8168 void MYSQL_BIN_LOG::report_missing_gtids(const Gtid_set* previous_gtid_set,
8169 const Gtid_set* slave_executed_gtid_set,
8170 const char** errmsg)
8171 {
8172 DBUG_ENTER("MYSQL_BIN_LOG::report_missing_gtids");
8173 THD *thd=current_thd;
8174 char* missing_gtids= NULL;
8175 char* slave_executed_gtids= NULL;
8176 Gtid_set gtid_missing(slave_executed_gtid_set->get_sid_map());
8177 gtid_missing.add_gtid_set(slave_executed_gtid_set);
8178 gtid_missing.remove_gtid_set(previous_gtid_set);
8179 gtid_missing.to_string(&missing_gtids, NULL);
8180 slave_executed_gtid_set->to_string(&slave_executed_gtids, NULL);
8181
8182 String tmp_uuid;
8183 uchar name[]= "slave_uuid";
8184
8185 /* Protects thd->user_vars. */
8186 mysql_mutex_lock(&thd->LOCK_thd_data);
8187
8188 user_var_entry *entry=
8189 (user_var_entry*) my_hash_search(&thd->user_vars, name, sizeof(name)-1);
8190 if (entry && entry->length() > 0)
8191 tmp_uuid.copy(entry->ptr(), entry->length(), NULL);
8192 mysql_mutex_unlock(&thd->LOCK_thd_data);
8193
8194 /*
8195 Log the information about the missing purged GTIDs to the error log
8196 if the message is less than MAX_LOG_BUFFER_SIZE.
8197 */
8198 std::ostringstream log_info;
8199 log_info << "If the binary log files have been deleted from disk,"
8200 " check the consistency of 'GTID_PURGED' variable."
8201 " The missing transactions are '"<< missing_gtids <<"'";
8202 const char* log_msg= ER(ER_FOUND_MISSING_GTIDS);
8203
8204 /* Don't consider the "%s" in the format string. Subtract 2 from the
8205 total length */
8206 if ((strlen(log_msg) - 2 + log_info.str().length()) > MAX_LOG_BUFFER_SIZE)
8207 log_info.str("To find the missing purged transactions, run \"SELECT"
8208 " @@GLOBAL.GTID_PURGED\" on the master, then run \"SHOW"
8209 " SLAVE STATUS\" on the slave for the Retrieved_Gtid_Set,"
8210 " and then run \"SELECT GTID_SUBTRACT(<master_set>,"
8211 " <slave_set>)\" on any server");
8212
8213 sql_print_warning(ER_THD(thd, ER_FOUND_MISSING_GTIDS), tmp_uuid.ptr(),
8214 log_info.str().c_str());
8215
8216 /*
8217 Send the information about the slave executed GTIDs and missing
8218 purged GTIDs to slave if the message is less than MYSQL_ERRMSG_SIZE.
8219 */
8220 std::ostringstream gtid_info;
8221 gtid_info << "The GTID set sent by the slave is '" << slave_executed_gtids
8222 << "', and the missing transactions are '"<< missing_gtids <<"'";
8223 *errmsg= ER_THD(thd, ER_MASTER_HAS_PURGED_REQUIRED_GTIDS);
8224
8225 /* Don't consider the "%s" in the format string. Subtract 2 from the
8226 total length */
8227 if ((strlen(*errmsg) - 2 + gtid_info.str().length()) > MYSQL_ERRMSG_SIZE)
8228 gtid_info.str("The GTID sets and the missing purged transactions are too"
8229 " long to print in this message. For more information,"
8230 " please see the master's error log or the manual for"
8231 " GTID_SUBTRACT");
8232 /* Buffer for formatting the message about the missing GTIDs. */
8233 static char buff[MYSQL_ERRMSG_SIZE];
8234 my_snprintf(buff, MYSQL_ERRMSG_SIZE, *errmsg, gtid_info.str().c_str());
8235 *errmsg= const_cast<const char*>(buff);
8236
8237 my_free(missing_gtids);
8238 my_free(slave_executed_gtids);
8239
8240 DBUG_VOID_RETURN;
8241 }
8242
get_group_cache(bool is_transactional)8243 Group_cache *THD::get_group_cache(bool is_transactional)
8244 {
8245 DBUG_ENTER("THD::get_group_cache(bool)");
8246
8247 // If opt_bin_log==0, it is not safe to call thd_get_cache_mngr
8248 // because binlog_hton has not been completely set up.
8249 DBUG_ASSERT(opt_bin_log);
8250 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(this);
8251
8252 // cache_mngr is NULL until we call thd->binlog_setup_trx_data, so
8253 // we assert that this has been done.
8254 DBUG_ASSERT(cache_mngr != NULL);
8255
8256 binlog_cache_data *cache_data=
8257 cache_mngr->get_binlog_cache_data(is_transactional);
8258 DBUG_ASSERT(cache_data != NULL);
8259
8260 DBUG_RETURN(&cache_data->group_cache);
8261 }
8262
8263 /*
8264 These functions are placed in this file since they need access to
8265 binlog_hton, which has internal linkage.
8266 */
8267
binlog_setup_trx_data()8268 int THD::binlog_setup_trx_data()
8269 {
8270 DBUG_ENTER("THD::binlog_setup_trx_data");
8271 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(this);
8272
8273 if (cache_mngr)
8274 DBUG_RETURN(0); // Already set up
8275
8276 cache_mngr= (binlog_cache_mngr*) my_malloc(sizeof(binlog_cache_mngr), MYF(MY_ZEROFILL));
8277 if (!cache_mngr ||
8278 open_cached_file(&cache_mngr->stmt_cache.cache_log, mysql_tmpdir,
8279 LOG_PREFIX, binlog_stmt_cache_size, MYF(MY_WME)) ||
8280 open_cached_file(&cache_mngr->trx_cache.cache_log, mysql_tmpdir,
8281 LOG_PREFIX, binlog_cache_size, MYF(MY_WME)))
8282 {
8283 my_free(cache_mngr);
8284 DBUG_RETURN(1); // Didn't manage to set it up
8285 }
8286 DBUG_PRINT("debug", ("Set ha_data slot %d to 0x%llx", binlog_hton->slot, (ulonglong) cache_mngr));
8287 thd_set_ha_data(this, binlog_hton, cache_mngr);
8288
8289 cache_mngr= new (thd_get_cache_mngr(this))
8290 binlog_cache_mngr(max_binlog_stmt_cache_size,
8291 &binlog_stmt_cache_use,
8292 &binlog_stmt_cache_disk_use,
8293 max_binlog_cache_size,
8294 &binlog_cache_use,
8295 &binlog_cache_disk_use);
8296 DBUG_RETURN(0);
8297 }
8298
8299 /**
8300
8301 */
register_binlog_handler(THD * thd,bool trx)8302 void register_binlog_handler(THD *thd, bool trx)
8303 {
8304 DBUG_ENTER("register_binlog_handler");
8305 /*
8306 If this is the first call to this function while processing a statement,
8307 the transactional cache does not have a savepoint defined. So, in what
8308 follows:
8309 . an implicit savepoint is defined;
8310 . callbacks are registered;
8311 . binary log is set as read/write.
8312
8313 The savepoint allows for truncating the trx-cache transactional changes
8314 fail. Callbacks are necessary to flush caches upon committing or rolling
8315 back a statement or a transaction. However, notifications do not happen
8316 if the binary log is set as read/write.
8317 */
8318 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
8319 if (cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF)
8320 {
8321 /*
8322 Set an implicit savepoint in order to be able to truncate a trx-cache.
8323 */
8324 my_off_t pos= 0;
8325 binlog_trans_log_savepos(thd, &pos);
8326 cache_mngr->trx_cache.set_prev_position(pos);
8327
8328 /*
8329 Set callbacks in order to be able to call commmit or rollback.
8330 */
8331 if (trx)
8332 trans_register_ha(thd, TRUE, binlog_hton);
8333 trans_register_ha(thd, FALSE, binlog_hton);
8334
8335 /*
8336 Set the binary log as read/write otherwise callbacks are not called.
8337 */
8338 thd->ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
8339 }
8340 DBUG_VOID_RETURN;
8341 }
8342
8343 /**
8344 Function to start a statement and optionally a transaction for the
8345 binary log.
8346
8347 This function does three things:
8348 - Starts a transaction if not in autocommit mode or if a BEGIN
8349 statement has been seen.
8350
8351 - Start a statement transaction to allow us to truncate the cache.
8352
8353 - Save the currrent binlog position so that we can roll back the
8354 statement by truncating the cache.
8355
8356 We only update the saved position if the old one was undefined,
8357 the reason is that there are some cases (e.g., for CREATE-SELECT)
8358 where the position is saved twice (e.g., both in
8359 select_create::prepare() and THD::binlog_write_table_map()) , but
8360 we should use the first. This means that calls to this function
8361 can be used to start the statement before the first table map
8362 event, to include some extra events.
8363
8364 Note however that IMMEDIATE_LOGGING implies that the statement is
8365 written without BEGIN/COMMIT.
8366
8367 @param thd Thread variable
8368 @param start_event The first event requested to be written into the
8369 binary log
8370 */
binlog_start_trans_and_stmt(THD * thd,Log_event * start_event)8371 static int binlog_start_trans_and_stmt(THD *thd, Log_event *start_event)
8372 {
8373 DBUG_ENTER("binlog_start_trans_and_stmt");
8374
8375 /*
8376 Initialize the cache manager if this was not done yet.
8377 */
8378 if (thd->binlog_setup_trx_data())
8379 DBUG_RETURN(1);
8380
8381 /*
8382 Retrieve the appropriated cache.
8383 */
8384 bool is_transactional= start_event->is_using_trans_cache();
8385 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
8386 binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(is_transactional);
8387
8388 /*
8389 If the event is requesting immediatly logging, there is no need to go
8390 further down and set savepoint and register callbacks.
8391 */
8392 if (start_event->is_using_immediate_logging())
8393 DBUG_RETURN(0);
8394
8395 register_binlog_handler(thd, thd->in_multi_stmt_transaction_mode());
8396
8397 /*
8398 If the cache is empty log "BEGIN" at the beginning of every transaction.
8399 Here, a transaction is either a BEGIN..COMMIT/ROLLBACK block or a single
8400 statement in autocommit mode.
8401 */
8402 if (cache_data->is_binlog_empty())
8403 {
8404 Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"),
8405 is_transactional, FALSE, TRUE, 0, TRUE);
8406 if (cache_data->write_event(thd, &qinfo))
8407 DBUG_RETURN(1);
8408 }
8409
8410 DBUG_RETURN(0);
8411 }
8412
8413 /**
8414 This function writes a table map to the binary log.
8415 Note that in order to keep the signature uniform with related methods,
8416 we use a redundant parameter to indicate whether a transactional table
8417 was changed or not.
8418 Sometimes it will write a Rows_query_log_event into binary log before
8419 the table map too.
8420
8421 @param table a pointer to the table.
8422 @param is_transactional @c true indicates a transactional table,
8423 otherwise @c false a non-transactional.
8424 @param binlog_rows_query @c true indicates a Rows_query log event
8425 will be binlogged before table map,
8426 otherwise @c false indicates it will not
8427 be binlogged.
8428 @return
8429 nonzero if an error pops up when writing the table map event
8430 or the Rows_query log event.
8431 */
binlog_write_table_map(TABLE * table,bool is_transactional,bool binlog_rows_query)8432 int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
8433 bool binlog_rows_query)
8434 {
8435 int error;
8436 DBUG_ENTER("THD::binlog_write_table_map");
8437 DBUG_PRINT("enter", ("table: 0x%lx (%s: #%llu)",
8438 (long) table, table->s->table_name.str,
8439 table->s->table_map_id.id()));
8440
8441 /* Pre-conditions */
8442 DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
8443 DBUG_ASSERT(table->s->table_map_id.is_valid());
8444
8445 Table_map_log_event
8446 the_event(this, table, table->s->table_map_id, is_transactional);
8447
8448 binlog_start_trans_and_stmt(this, &the_event);
8449
8450 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(this);
8451
8452 binlog_cache_data *cache_data=
8453 cache_mngr->get_binlog_cache_data(is_transactional);
8454
8455 if (binlog_rows_query && this->query())
8456 {
8457 /* Write the Rows_query_log_event into binlog before the table map */
8458 Rows_query_log_event
8459 rows_query_ev(this, this->query(), this->query_length());
8460 if ((error= cache_data->write_event(this, &rows_query_ev)))
8461 DBUG_RETURN(error);
8462 }
8463
8464 if ((error= cache_data->write_event(this, &the_event)))
8465 DBUG_RETURN(error);
8466
8467 binlog_table_maps++;
8468 DBUG_RETURN(0);
8469 }
8470
8471 /**
8472 This function retrieves a pending row event from a cache which is
8473 specified through the parameter @c is_transactional. Respectively, when it
8474 is @c true, the pending event is returned from the transactional cache.
8475 Otherwise from the non-transactional cache.
8476
8477 @param is_transactional @c true indicates a transactional cache,
8478 otherwise @c false a non-transactional.
8479 @return
8480 The row event if any.
8481 */
8482 Rows_log_event*
binlog_get_pending_rows_event(bool is_transactional) const8483 THD::binlog_get_pending_rows_event(bool is_transactional) const
8484 {
8485 Rows_log_event* rows= NULL;
8486 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(this);
8487
8488 /*
8489 This is less than ideal, but here's the story: If there is no cache_mngr,
8490 prepare_pending_rows_event() has never been called (since the cache_mngr
8491 is set up there). In that case, we just return NULL.
8492 */
8493 if (cache_mngr)
8494 {
8495 binlog_cache_data *cache_data=
8496 cache_mngr->get_binlog_cache_data(is_transactional);
8497
8498 rows= cache_data->pending();
8499 }
8500 return (rows);
8501 }
8502
8503 /**
8504 @param db db name c-string to be inserted into alphabetically sorted
8505 THD::binlog_accessed_db_names list.
8506
8507 Note, that space for both the data and the node
8508 struct are allocated in THD::main_mem_root.
8509 The list lasts for the top-level query time and is reset
8510 in @c THD::cleanup_after_query().
8511 */
8512 void
add_to_binlog_accessed_dbs(const char * db_param)8513 THD::add_to_binlog_accessed_dbs(const char *db_param)
8514 {
8515 char *after_db;
8516 /*
8517 binlog_accessed_db_names list is to maintain the database
8518 names which are referenced in a given command.
8519 Prior to bug 17806014 fix, 'main_mem_root' memory root used
8520 to store this list. The 'main_mem_root' scope is till the end
8521 of the query. Hence it caused increasing memory consumption
8522 problem in big procedures like the ones mentioned below.
8523 Eg: CALL p1() where p1 is having 1,00,000 create and drop tables.
8524 'main_mem_root' is freed only at the end of the command CALL p1()'s
8525 execution. But binlog_accessed_db_names list scope is only till the
8526 individual statements specified the procedure(create/drop statements).
8527 Hence the memory allocated in 'main_mem_root' was left uncleared
8528 until the p1's completion, even though it is not required after
8529 completion of individual statements.
8530
8531 Instead of using 'main_mem_root' whose scope is complete query execution,
8532 now the memroot is changed to use 'thd->mem_root' whose scope is until the
8533 individual statement in CALL p1(). 'thd->mem_root' is set to 'execute_mem_root'
8534 in the context of procedure and it's scope is till the individual statement
8535 in CALL p1() and thd->memroot is equal to 'main_mem_root' in the context
8536 of a normal 'top level query'.
8537
8538 Eg: a) create table t1(i int); => If this function is called while
8539 processing this statement, thd->memroot is equal to &main_mem_root
8540 which will be freed immediately after executing this statement.
8541 b) CALL p1() -> p1 contains create table t1(i int); => If this function
8542 is called while processing create table statement which is inside
8543 a stored procedure, then thd->memroot is equal to 'execute_mem_root'
8544 which will be freed immediately after executing this statement.
8545 In both a and b case, thd->memroot will be freed immediately and will not
8546 increase memory consumption.
8547
8548 A special case(stored functions/triggers):
8549 Consider the following example:
8550 create function f1(i int) returns int
8551 begin
8552 insert into db1.t1 values (1);
8553 insert into db2.t1 values (2);
8554 end;
8555 When we are processing SELECT f1(), the list should contain db1, db2 names.
8556 Since thd->mem_root contains 'execute_mem_root' in the context of
8557 stored function, the mem root will be freed after adding db1 in
8558 the list and when we are processing the second statement and when we try
8559 to add 'db2' in the db1's list, it will lead to crash as db1's memory
8560 is already freed. To handle this special case, if in_sub_stmt is set
8561 (which is true incase of stored functions/triggers), we use &main_mem_root,
8562 if not set we will use thd->memroot which changes it's value to
8563 'execute_mem_root' or '&main_mem_root' depends on the context.
8564 */
8565 MEM_ROOT *db_mem_root= in_sub_stmt ? &main_mem_root : mem_root;
8566
8567 if (!binlog_accessed_db_names)
8568 binlog_accessed_db_names= new (db_mem_root) List<char>;
8569
8570 if (binlog_accessed_db_names->elements > MAX_DBS_IN_EVENT_MTS)
8571 {
8572 push_warning_printf(this, Sql_condition::WARN_LEVEL_WARN,
8573 ER_MTS_UPDATED_DBS_GREATER_MAX,
8574 ER(ER_MTS_UPDATED_DBS_GREATER_MAX),
8575 MAX_DBS_IN_EVENT_MTS);
8576 return;
8577 }
8578
8579 after_db= strdup_root(db_mem_root, db_param);
8580
8581 /*
8582 sorted insertion is implemented with first rearranging data
8583 (pointer to char*) of the links and final appending of the least
8584 ordered data to create a new link in the list.
8585 */
8586 if (binlog_accessed_db_names->elements != 0)
8587 {
8588 List_iterator<char> it(*get_binlog_accessed_db_names());
8589
8590 while (it++)
8591 {
8592 char *swap= NULL;
8593 char **ref_cur_db= it.ref();
8594 int cmp= strcmp(after_db, *ref_cur_db);
8595
8596 DBUG_ASSERT(!swap || cmp < 0);
8597
8598 if (cmp == 0)
8599 {
8600 after_db= NULL; /* dup to ignore */
8601 break;
8602 }
8603 else if (swap || cmp > 0)
8604 {
8605 swap= *ref_cur_db;
8606 *ref_cur_db= after_db;
8607 after_db= swap;
8608 }
8609 }
8610 }
8611 if (after_db)
8612 binlog_accessed_db_names->push_back(after_db, db_mem_root);
8613 }
8614
8615 /*
8616 Tells if two (or more) tables have auto_increment columns and we want to
8617 lock those tables with a write lock.
8618
8619 SYNOPSIS
8620 has_two_write_locked_tables_with_auto_increment
8621 tables Table list
8622
8623 NOTES:
8624 Call this function only when you have established the list of all tables
8625 which you'll want to update (including stored functions, triggers, views
8626 inside your statement).
8627 */
8628
8629 static bool
has_write_table_with_auto_increment(TABLE_LIST * tables)8630 has_write_table_with_auto_increment(TABLE_LIST *tables)
8631 {
8632 for (TABLE_LIST *table= tables; table; table= table->next_global)
8633 {
8634 /* we must do preliminary checks as table->table may be NULL */
8635 if (!table->placeholder() &&
8636 table->table->found_next_number_field &&
8637 (table->lock_type >= TL_WRITE_ALLOW_WRITE))
8638 return 1;
8639 }
8640
8641 return 0;
8642 }
8643
8644 /*
8645 checks if we have select tables in the table list and write tables
8646 with auto-increment column.
8647
8648 SYNOPSIS
8649 has_two_write_locked_tables_with_auto_increment_and_select
8650 tables Table list
8651
8652 RETURN VALUES
8653
8654 -true if the table list has atleast one table with auto-increment column
8655
8656
8657 and atleast one table to select from.
8658 -false otherwise
8659 */
8660
8661 static bool
has_write_table_with_auto_increment_and_select(TABLE_LIST * tables)8662 has_write_table_with_auto_increment_and_select(TABLE_LIST *tables)
8663 {
8664 bool has_select= false;
8665 bool has_auto_increment_tables = has_write_table_with_auto_increment(tables);
8666 for(TABLE_LIST *table= tables; table; table= table->next_global)
8667 {
8668 if (!table->placeholder() &&
8669 (table->lock_type <= TL_READ_NO_INSERT))
8670 {
8671 has_select= true;
8672 break;
8673 }
8674 }
8675 return(has_select && has_auto_increment_tables);
8676 }
8677
8678 /*
8679 Tells if there is a table whose auto_increment column is a part
8680 of a compound primary key while is not the first column in
8681 the table definition.
8682
8683 @param tables Table list
8684
8685 @return true if the table exists, fais if does not.
8686 */
8687
8688 static bool
has_write_table_auto_increment_not_first_in_pk(TABLE_LIST * tables)8689 has_write_table_auto_increment_not_first_in_pk(TABLE_LIST *tables)
8690 {
8691 for (TABLE_LIST *table= tables; table; table= table->next_global)
8692 {
8693 /* we must do preliminary checks as table->table may be NULL */
8694 if (!table->placeholder() &&
8695 table->table->found_next_number_field &&
8696 (table->lock_type >= TL_WRITE_ALLOW_WRITE)
8697 && table->table->s->next_number_keypart != 0)
8698 return 1;
8699 }
8700
8701 return 0;
8702 }
8703
8704 #ifndef DBUG_OFF
get_locked_tables_mode_name(enum_locked_tables_mode locked_tables_mode)8705 const char * get_locked_tables_mode_name(enum_locked_tables_mode locked_tables_mode)
8706 {
8707 switch (locked_tables_mode)
8708 {
8709 case LTM_NONE:
8710 return "LTM_NONE";
8711 case LTM_LOCK_TABLES:
8712 return "LTM_LOCK_TABLES";
8713 case LTM_PRELOCKED:
8714 return "LTM_PRELOCKED";
8715 case LTM_PRELOCKED_UNDER_LOCK_TABLES:
8716 return "LTM_PRELOCKED_UNDER_LOCK_TABLES";
8717 default:
8718 return "Unknown table lock mode";
8719 }
8720 }
8721 #endif
8722
8723
8724 /**
8725 Decide on logging format to use for the statement and issue errors
8726 or warnings as needed. The decision depends on the following
8727 parameters:
8728
8729 - The logging mode, i.e., the value of binlog_format. Can be
8730 statement, mixed, or row.
8731
8732 - The type of statement. There are three types of statements:
8733 "normal" safe statements; unsafe statements; and row injections.
8734 An unsafe statement is one that, if logged in statement format,
8735 might produce different results when replayed on the slave (e.g.,
8736 INSERT DELAYED). A row injection is either a BINLOG statement, or
8737 a row event executed by the slave's SQL thread.
8738
8739 - The capabilities of tables modified by the statement. The
8740 *capabilities vector* for a table is a set of flags associated
8741 with the table. Currently, it only includes two flags: *row
8742 capability flag* and *statement capability flag*.
8743
8744 The row capability flag is set if and only if the engine can
8745 handle row-based logging. The statement capability flag is set if
8746 and only if the table can handle statement-based logging.
8747
8748 Decision table for logging format
8749 ---------------------------------
8750
8751 The following table summarizes how the format and generated
8752 warning/error depends on the tables' capabilities, the statement
8753 type, and the current binlog_format.
8754
8755 Row capable N NNNNNNNNN YYYYYYYYY YYYYYYYYY
8756 Statement capable N YYYYYYYYY NNNNNNNNN YYYYYYYYY
8757
8758 Statement type * SSSUUUIII SSSUUUIII SSSUUUIII
8759
8760 binlog_format * SMRSMRSMR SMRSMRSMR SMRSMRSMR
8761
8762 Logged format - SS-S----- -RR-RR-RR SRRSRR-RR
8763 Warning/Error 1 --2732444 5--5--6-- ---7--6--
8764
8765 Legend
8766 ------
8767
8768 Row capable: N - Some table not row-capable, Y - All tables row-capable
8769 Stmt capable: N - Some table not stmt-capable, Y - All tables stmt-capable
8770 Statement type: (S)afe, (U)nsafe, or Row (I)njection
8771 binlog_format: (S)TATEMENT, (M)IXED, or (R)OW
8772 Logged format: (S)tatement or (R)ow
8773 Warning/Error: Warnings and error messages are as follows:
8774
8775 1. Error: Cannot execute statement: binlogging impossible since both
8776 row-incapable engines and statement-incapable engines are
8777 involved.
8778
8779 2. Error: Cannot execute statement: binlogging impossible since
8780 BINLOG_FORMAT = ROW and at least one table uses a storage engine
8781 limited to statement-logging.
8782
8783 3. Error: Cannot execute statement: binlogging of unsafe statement
8784 is impossible when storage engine is limited to statement-logging
8785 and BINLOG_FORMAT = MIXED.
8786
8787 4. Error: Cannot execute row injection: binlogging impossible since
8788 at least one table uses a storage engine limited to
8789 statement-logging.
8790
8791 5. Error: Cannot execute statement: binlogging impossible since
8792 BINLOG_FORMAT = STATEMENT and at least one table uses a storage
8793 engine limited to row-logging.
8794
8795 6. Error: Cannot execute row injection: binlogging impossible since
8796 BINLOG_FORMAT = STATEMENT.
8797
8798 7. Warning: Unsafe statement binlogged in statement format since
8799 BINLOG_FORMAT = STATEMENT.
8800
8801 In addition, we can produce the following error (not depending on
8802 the variables of the decision diagram):
8803
8804 8. Error: Cannot execute statement: binlogging impossible since more
8805 than one engine is involved and at least one engine is
8806 self-logging.
8807
8808 For each error case above, the statement is prevented from being
8809 logged, we report an error, and roll back the statement. For
8810 warnings, we set the thd->binlog_flags variable: the warning will be
8811 printed only if the statement is successfully logged.
8812
8813 @see THD::binlog_query
8814
8815 @param[in] thd Client thread
8816 @param[in] tables Tables involved in the query
8817
8818 @retval 0 No error; statement can be logged.
8819 @retval -1 One of the error conditions above applies (1, 2, 4, 5, or 6).
8820 */
8821
decide_logging_format(TABLE_LIST * tables)8822 int THD::decide_logging_format(TABLE_LIST *tables)
8823 {
8824 DBUG_ENTER("THD::decide_logging_format");
8825 DBUG_PRINT("info", ("query: %s", query()));
8826 DBUG_PRINT("info", ("variables.binlog_format: %lu",
8827 variables.binlog_format));
8828 DBUG_PRINT("info", ("lex->get_stmt_unsafe_flags(): 0x%x",
8829 lex->get_stmt_unsafe_flags()));
8830
8831 reset_binlog_local_stmt_filter();
8832
8833 /*
8834 We should not decide logging format if the binlog is closed or
8835 binlogging is off, or if the statement is filtered out from the
8836 binlog by filtering rules.
8837 */
8838 if (mysql_bin_log.is_open() && (variables.option_bits & OPTION_BIN_LOG) &&
8839 !(variables.binlog_format == BINLOG_FORMAT_STMT &&
8840 !binlog_filter->db_ok(db)))
8841 {
8842 /*
8843 Compute one bit field with the union of all the engine
8844 capabilities, and one with the intersection of all the engine
8845 capabilities.
8846 */
8847 handler::Table_flags flags_write_some_set= 0;
8848 handler::Table_flags flags_access_some_set= 0;
8849 handler::Table_flags flags_write_all_set=
8850 HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE;
8851
8852 /*
8853 If different types of engines are about to be updated.
8854 For example: Innodb and Falcon; Innodb and MyIsam.
8855 */
8856 my_bool multi_write_engine= FALSE;
8857 /*
8858 If different types of engines are about to be accessed
8859 and any of them is about to be updated. For example:
8860 Innodb and Falcon; Innodb and MyIsam.
8861 */
8862 my_bool multi_access_engine= FALSE;
8863 /*
8864 bug 1313901 : Track if statement creates or drops a temporary table
8865 and log in ROW if it does.
8866 */
8867 my_bool create_drop_temp_table= FALSE;
8868 /*
8869 Identifies if a table is changed.
8870 */
8871 my_bool is_write= FALSE;
8872 /*
8873 A pointer to a previous table that was changed.
8874 */
8875 TABLE* prev_write_table= NULL;
8876 /*
8877 A pointer to a previous table that was accessed.
8878 */
8879 TABLE* prev_access_table= NULL;
8880 /*
8881 True if at least one table is transactional.
8882 */
8883 bool write_to_some_transactional_table= false;
8884 /*
8885 True if at least one table is non-transactional.
8886 */
8887 bool write_to_some_non_transactional_table= false;
8888 /*
8889 True if all non-transactional tables that has been updated
8890 are temporary.
8891 */
8892 bool write_all_non_transactional_are_tmp_tables= true;
8893 /**
8894 The number of tables used in the current statement,
8895 that should be replicated.
8896 */
8897 uint replicated_tables_count= 0;
8898 /**
8899 The number of tables written to in the current statement,
8900 that should not be replicated.
8901 A table should not be replicated when it is considered
8902 'local' to a MySQL instance.
8903 Currently, these tables are:
8904 - mysql.slow_log
8905 - mysql.general_log
8906 - mysql.slave_relay_log_info
8907 - mysql.slave_master_info
8908 - mysql.slave_worker_info
8909 - performance_schema.*
8910 - TODO: information_schema.*
8911 In practice, from this list, only performance_schema.* tables
8912 are written to by user queries.
8913 */
8914 uint non_replicated_tables_count= 0;
8915 #ifndef DBUG_OFF
8916 {
8917 DBUG_PRINT("debug", ("prelocked_mode: %s",
8918 get_locked_tables_mode_name(locked_tables_mode)));
8919 }
8920 #endif
8921
8922 if (variables.binlog_format != BINLOG_FORMAT_ROW && tables)
8923 {
8924 /*
8925 DML statements that modify a table with an auto_increment column based on
8926 rows selected from a table are unsafe as the order in which the rows are
8927 fetched fron the select tables cannot be determined and may differ on
8928 master and slave.
8929 */
8930 if (has_write_table_with_auto_increment_and_select(tables))
8931 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_WRITE_AUTOINC_SELECT);
8932
8933 if (has_write_table_auto_increment_not_first_in_pk(tables))
8934 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_NOT_FIRST);
8935
8936 /*
8937 A query that modifies autoinc column in sub-statement can make the
8938 master and slave inconsistent.
8939 We can solve these problems in mixed mode by switching to binlogging
8940 if at least one updated table is used by sub-statement
8941 */
8942 if (lex->requires_prelocking() &&
8943 has_write_table_with_auto_increment(lex->first_not_own_table()))
8944 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_COLUMNS);
8945 }
8946
8947 /*
8948 Get the capabilities vector for all involved storage engines and
8949 mask out the flags for the binary log.
8950 */
8951 for (TABLE_LIST *table= tables; table; table= table->next_global)
8952 {
8953 if (table->placeholder())
8954 {
8955 /*
8956 bug 1313901 : Detect if this is a CREATE TEMPORARY or DROP of a
8957 temporary table. This will be used later in determining
8958 whether to log in ROW or STMT if MIXED replication is
8959 being used.
8960 */
8961 if(!create_drop_temp_table &&
8962 !table->table &&
8963 ((lex->sql_command == SQLCOM_CREATE_TABLE &&
8964 (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE)) ||
8965 ((lex->sql_command == SQLCOM_DROP_TABLE ||
8966 lex->sql_command == SQLCOM_TRUNCATE) &&
8967 find_temporary_table(this, table))))
8968 {
8969 create_drop_temp_table= TRUE;
8970 }
8971 continue;
8972 }
8973
8974 handler::Table_flags const flags= table->table->file->ha_table_flags();
8975
8976 DBUG_PRINT("info", ("table: %s; ha_table_flags: 0x%llx",
8977 table->table_name, flags));
8978
8979 if (table->table->no_replicate)
8980 {
8981 /*
8982 The statement uses a table that is not replicated.
8983 The following properties about the table:
8984 - persistent / transient
8985 - transactional / non transactional
8986 - temporary / permanent
8987 - read or write
8988 - multiple engines involved because of this table
8989 are not relevant, as this table is completely ignored.
8990 Because the statement uses a non replicated table,
8991 using STATEMENT format in the binlog is impossible.
8992 Either this statement will be discarded entirely,
8993 or it will be logged (possibly partially) in ROW format.
8994 */
8995 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_TABLE);
8996
8997 if (table->lock_type >= TL_WRITE_ALLOW_WRITE)
8998 {
8999 non_replicated_tables_count++;
9000 continue;
9001 }
9002 }
9003
9004 replicated_tables_count++;
9005
9006 my_bool trans= table->table->file->has_transactions();
9007
9008 if (table->lock_type >= TL_WRITE_ALLOW_WRITE)
9009 {
9010 write_to_some_transactional_table=
9011 write_to_some_transactional_table || trans;
9012
9013 write_to_some_non_transactional_table=
9014 write_to_some_non_transactional_table || !trans;
9015
9016 if (prev_write_table && prev_write_table->file->ht !=
9017 table->table->file->ht)
9018 multi_write_engine= TRUE;
9019
9020 if (table->table->s->tmp_table)
9021 lex->set_stmt_accessed_table(trans ? LEX::STMT_WRITES_TEMP_TRANS_TABLE :
9022 LEX::STMT_WRITES_TEMP_NON_TRANS_TABLE);
9023 else
9024 lex->set_stmt_accessed_table(trans ? LEX::STMT_WRITES_TRANS_TABLE :
9025 LEX::STMT_WRITES_NON_TRANS_TABLE);
9026
9027 /*
9028 Non-transactional updates are allowed when row binlog format is
9029 used and all non-transactional tables are temporary.
9030 Binlog format is checked on THD::is_dml_gtid_compatible() method.
9031 */
9032 if (!trans)
9033 write_all_non_transactional_are_tmp_tables=
9034 write_all_non_transactional_are_tmp_tables &&
9035 table->table->s->tmp_table;
9036
9037 flags_write_all_set &= flags;
9038 flags_write_some_set |= flags;
9039 is_write= TRUE;
9040
9041 prev_write_table= table->table;
9042
9043 /*
9044 INSERT...ON DUPLICATE KEY UPDATE on a table with more than one unique keys
9045 can be unsafe. Check for it if the flag is already not marked for the
9046 given statement.
9047 */
9048 if (!lex->is_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS) &&
9049 lex->sql_command == SQLCOM_INSERT &&
9050 /* Duplicate key update is not supported by INSERT DELAYED */
9051 get_command() != COM_DELAYED_INSERT && lex->duplicates == DUP_UPDATE)
9052 {
9053 uint keys= table->table->s->keys, i= 0, unique_keys= 0;
9054 for (KEY* keyinfo= table->table->s->key_info;
9055 i < keys && unique_keys <= 1; i++, keyinfo++)
9056 {
9057 if (keyinfo->flags & HA_NOSAME)
9058 unique_keys++;
9059 }
9060 if (unique_keys > 1 )
9061 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS);
9062 }
9063 }
9064 flags_access_some_set |= flags;
9065
9066 if (table->table->s->tmp_table)
9067 lex->set_stmt_accessed_table(trans ? LEX::STMT_READS_TEMP_TRANS_TABLE :
9068 LEX::STMT_READS_TEMP_NON_TRANS_TABLE);
9069 else
9070 lex->set_stmt_accessed_table(trans ? LEX::STMT_READS_TRANS_TABLE :
9071 LEX::STMT_READS_NON_TRANS_TABLE);
9072
9073 if (prev_access_table && prev_access_table->file->ht !=
9074 table->table->file->ht)
9075 multi_access_engine= TRUE;
9076
9077 prev_access_table= table->table;
9078 }
9079 DBUG_ASSERT(!is_write ||
9080 write_to_some_transactional_table ||
9081 write_to_some_non_transactional_table);
9082 /*
9083 write_all_non_transactional_are_tmp_tables may be true if any
9084 non-transactional table was not updated, so we fix its value here.
9085 */
9086 write_all_non_transactional_are_tmp_tables=
9087 write_all_non_transactional_are_tmp_tables &&
9088 write_to_some_non_transactional_table;
9089
9090 DBUG_PRINT("info", ("flags_write_all_set: 0x%llx", flags_write_all_set));
9091 DBUG_PRINT("info", ("flags_write_some_set: 0x%llx", flags_write_some_set));
9092 DBUG_PRINT("info", ("flags_access_some_set: 0x%llx", flags_access_some_set));
9093 DBUG_PRINT("info", ("multi_write_engine: %d", multi_write_engine));
9094 DBUG_PRINT("info", ("multi_access_engine: %d", multi_access_engine));
9095
9096 int error= 0;
9097 int unsafe_flags;
9098
9099 bool multi_stmt_trans= in_multi_stmt_transaction_mode();
9100 bool trans_table= trans_has_updated_trans_table(this);
9101 bool binlog_direct= variables.binlog_direct_non_trans_update;
9102
9103 if (lex->is_mixed_stmt_unsafe(multi_stmt_trans, binlog_direct,
9104 trans_table, tx_isolation))
9105 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_MIXED_STATEMENT);
9106 else if (multi_stmt_trans && trans_table && !binlog_direct &&
9107 lex->stmt_accessed_table(LEX::STMT_WRITES_NON_TRANS_TABLE))
9108 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_NONTRANS_AFTER_TRANS);
9109
9110 /*
9111 If more than one engine is involved in the statement and at
9112 least one is doing it's own logging (is *self-logging*), the
9113 statement cannot be logged atomically, so we generate an error
9114 rather than allowing the binlog to become corrupt.
9115 */
9116 if (multi_write_engine &&
9117 (flags_write_some_set & HA_HAS_OWN_BINLOGGING))
9118 my_error((error= ER_BINLOG_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE),
9119 MYF(0));
9120 else if (multi_access_engine && flags_access_some_set & HA_HAS_OWN_BINLOGGING)
9121 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE);
9122
9123 DBUG_EXECUTE_IF("make_stmt_only_engines",
9124 {
9125 flags_write_all_set= HA_BINLOG_STMT_CAPABLE;
9126 };);
9127
9128 /* both statement-only and row-only engines involved */
9129 if ((flags_write_all_set & (HA_BINLOG_STMT_CAPABLE | HA_BINLOG_ROW_CAPABLE)) == 0)
9130 {
9131 /*
9132 1. Error: Binary logging impossible since both row-incapable
9133 engines and statement-incapable engines are involved
9134 */
9135 my_error((error= ER_BINLOG_ROW_ENGINE_AND_STMT_ENGINE), MYF(0));
9136 }
9137 /* statement-only engines involved */
9138 else if ((flags_write_all_set & HA_BINLOG_ROW_CAPABLE) == 0)
9139 {
9140 if (lex->is_stmt_row_injection())
9141 {
9142 /*
9143 4. Error: Cannot execute row injection since table uses
9144 storage engine limited to statement-logging
9145 */
9146 my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_ENGINE), MYF(0));
9147 }
9148 else if (variables.binlog_format == BINLOG_FORMAT_ROW &&
9149 sqlcom_can_generate_row_events(this->lex->sql_command))
9150 {
9151 /*
9152 2. Error: Cannot modify table that uses a storage engine
9153 limited to statement-logging when BINLOG_FORMAT = ROW
9154 */
9155 my_error((error= ER_BINLOG_ROW_MODE_AND_STMT_ENGINE), MYF(0));
9156 }
9157 else if (variables.binlog_format == BINLOG_FORMAT_MIXED &&
9158 ((unsafe_flags= lex->get_stmt_unsafe_flags()) != 0))
9159 {
9160 /*
9161 3. Error: Cannot execute statement: binlogging of unsafe
9162 statement is impossible when storage engine is limited to
9163 statement-logging and BINLOG_FORMAT = MIXED.
9164 */
9165 for (int unsafe_type= 0;
9166 unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT;
9167 unsafe_type++)
9168 if (unsafe_flags & (1 << unsafe_type))
9169 my_error((error= ER_BINLOG_UNSAFE_AND_STMT_ENGINE), MYF(0),
9170 ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
9171 }
9172 else if (is_write && ((unsafe_flags= lex->get_stmt_unsafe_flags()) != 0))
9173 {
9174 /*
9175 7. Warning: Unsafe statement logged as statement due to
9176 binlog_format = STATEMENT
9177 */
9178 binlog_unsafe_warning_flags|= unsafe_flags;
9179 DBUG_PRINT("info", ("Scheduling warning to be issued by "
9180 "binlog_query: '%s'",
9181 ER(ER_BINLOG_UNSAFE_STATEMENT)));
9182 DBUG_PRINT("info", ("binlog_unsafe_warning_flags: 0x%x",
9183 binlog_unsafe_warning_flags));
9184 }
9185 /* log in statement format! */
9186 }
9187 /* no statement-only engines */
9188 else
9189 {
9190 /* binlog_format = STATEMENT */
9191 if (variables.binlog_format == BINLOG_FORMAT_STMT)
9192 {
9193 if (lex->is_stmt_row_injection())
9194 {
9195 /*
9196 6. Error: Cannot execute row injection since
9197 BINLOG_FORMAT = STATEMENT
9198 */
9199 my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_MODE), MYF(0));
9200 }
9201 else if ((flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0 &&
9202 sqlcom_can_generate_row_events(this->lex->sql_command))
9203 {
9204 /*
9205 5. Error: Cannot modify table that uses a storage engine
9206 limited to row-logging when binlog_format = STATEMENT
9207 */
9208 my_error((error= ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), "");
9209 }
9210 else if (is_write && (unsafe_flags= lex->get_stmt_unsafe_flags()) != 0)
9211 {
9212 /*
9213 7. Warning: Unsafe statement logged as statement due to
9214 binlog_format = STATEMENT
9215 */
9216 binlog_unsafe_warning_flags|= unsafe_flags;
9217 DBUG_PRINT("info", ("Scheduling warning to be issued by "
9218 "binlog_query: '%s'",
9219 ER(ER_BINLOG_UNSAFE_STATEMENT)));
9220 DBUG_PRINT("info", ("binlog_unsafe_warning_flags: 0x%x",
9221 binlog_unsafe_warning_flags));
9222 }
9223 /* log in statement format! */
9224 }
9225 /* No statement-only engines and binlog_format != STATEMENT.
9226 I.e., nothing prevents us from row logging if needed. */
9227 else
9228 {
9229 if (lex->is_stmt_unsafe() || lex->is_stmt_row_injection()
9230 || (flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0
9231 || (flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0
9232 || lex->stmt_accessed_table(LEX::STMT_READS_TEMP_TRANS_TABLE)
9233 || lex->stmt_accessed_table(LEX::STMT_READS_TEMP_NON_TRANS_TABLE)
9234 || create_drop_temp_table)
9235 {
9236 /* log in row format! */
9237 set_current_stmt_binlog_format_row_if_mixed();
9238 }
9239 }
9240 }
9241
9242 if (non_replicated_tables_count > 0)
9243 {
9244 if ((replicated_tables_count == 0) || ! is_write)
9245 {
9246 DBUG_PRINT("info", ("decision: no logging, no replicated table affected"));
9247 set_binlog_local_stmt_filter();
9248 }
9249 else
9250 {
9251 if (! is_current_stmt_binlog_format_row())
9252 {
9253 my_error((error= ER_BINLOG_STMT_MODE_AND_NO_REPL_TABLES), MYF(0));
9254 }
9255 else
9256 {
9257 clear_binlog_local_stmt_filter();
9258 }
9259 }
9260 }
9261 else
9262 {
9263 clear_binlog_local_stmt_filter();
9264 }
9265
9266 if (!error && enforce_gtid_consistency &&
9267 !is_dml_gtid_compatible(write_to_some_transactional_table,
9268 write_to_some_non_transactional_table,
9269 write_all_non_transactional_are_tmp_tables))
9270 error= 1;
9271
9272 if (error) {
9273 DBUG_PRINT("info", ("decision: no logging since an error was generated"));
9274 DBUG_RETURN(-1);
9275 }
9276
9277 if (is_write &&
9278 lex->sql_command != SQLCOM_END /* rows-event applying by slave */)
9279 {
9280 /*
9281 Master side of DML in the STMT format events parallelization.
9282 All involving table db:s are stored in a abc-ordered name list.
9283 In case the number of databases exceeds MAX_DBS_IN_EVENT_MTS maximum
9284 the list gathering breaks since it won't be sent to the slave.
9285 */
9286 for (TABLE_LIST *table= tables; table; table= table->next_global)
9287 {
9288 if (table->placeholder())
9289 continue;
9290
9291 DBUG_ASSERT(table->table);
9292
9293 if (table->table->file->referenced_by_foreign_key())
9294 {
9295 /*
9296 FK-referenced dbs can't be gathered currently. The following
9297 event will be marked for sequential execution on slave.
9298 */
9299 binlog_accessed_db_names= NULL;
9300 add_to_binlog_accessed_dbs("");
9301 break;
9302 }
9303 if (!is_current_stmt_binlog_format_row())
9304 add_to_binlog_accessed_dbs(table->db);
9305 }
9306 }
9307 DBUG_PRINT("info", ("decision: logging in %s format",
9308 is_current_stmt_binlog_format_row() ?
9309 "ROW" : "STATEMENT"));
9310
9311 if (variables.binlog_format == BINLOG_FORMAT_ROW &&
9312 (lex->sql_command == SQLCOM_UPDATE ||
9313 lex->sql_command == SQLCOM_UPDATE_MULTI ||
9314 lex->sql_command == SQLCOM_DELETE ||
9315 lex->sql_command == SQLCOM_DELETE_MULTI))
9316 {
9317 String table_names;
9318 /*
9319 Generate a warning for UPDATE/DELETE statements that modify a
9320 BLACKHOLE table, as row events are not logged in row format.
9321 */
9322 for (TABLE_LIST *table= tables; table; table= table->next_global)
9323 {
9324 if (table->placeholder())
9325 continue;
9326 if (table->table->file->ht->db_type == DB_TYPE_BLACKHOLE_DB &&
9327 table->lock_type >= TL_WRITE_ALLOW_WRITE)
9328 {
9329 table_names.append(table->table_name);
9330 table_names.append(",");
9331 }
9332 }
9333 if (!table_names.is_empty())
9334 {
9335 bool is_update= (lex->sql_command == SQLCOM_UPDATE ||
9336 lex->sql_command == SQLCOM_UPDATE_MULTI);
9337 /*
9338 Replace the last ',' with '.' for table_names
9339 */
9340 table_names.replace(table_names.length()-1, 1, ".", 1);
9341 push_warning_printf(this, Sql_condition::WARN_LEVEL_WARN,
9342 WARN_ON_BLOCKHOLE_IN_RBR,
9343 ER(WARN_ON_BLOCKHOLE_IN_RBR),
9344 is_update ? "UPDATE" : "DELETE",
9345 table_names.c_ptr());
9346 }
9347 }
9348 }
9349 #ifndef DBUG_OFF
9350 else
9351 DBUG_PRINT("info", ("decision: no logging since "
9352 "mysql_bin_log.is_open() = %d "
9353 "and (options & OPTION_BIN_LOG) = 0x%llx "
9354 "and binlog_format = %lu "
9355 "and binlog_filter->db_ok(db) = %d",
9356 mysql_bin_log.is_open(),
9357 (variables.option_bits & OPTION_BIN_LOG),
9358 variables.binlog_format,
9359 binlog_filter->db_ok(db)));
9360 #endif
9361
9362 DBUG_RETURN(0);
9363 }
9364
9365
is_ddl_gtid_compatible() const9366 bool THD::is_ddl_gtid_compatible() const
9367 {
9368 DBUG_ENTER("THD::is_ddl_gtid_compatible");
9369
9370 // If @@session.sql_log_bin has been manually turned off (only
9371 // doable by SUPER), then no problem, we can execute any statement.
9372 if ((variables.option_bits & OPTION_BIN_LOG) == 0)
9373 DBUG_RETURN(true);
9374
9375 if (lex->sql_command == SQLCOM_CREATE_TABLE &&
9376 !(lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) &&
9377 lex->select_lex.item_list.elements)
9378 {
9379 /*
9380 CREATE ... SELECT (without TEMPORARY) is unsafe because if
9381 binlog_format=row it will be logged as a CREATE TABLE followed
9382 by row events, re-executed non-atomically as two transactions,
9383 and then written to the slave's binary log as two separate
9384 transactions with the same GTID.
9385 */
9386 my_error(ER_GTID_UNSAFE_CREATE_SELECT, MYF(0));
9387 DBUG_RETURN(false);
9388 }
9389 if (lex->sql_command == SQLCOM_CREATE_TABLE &&
9390 (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) != 0)
9391 {
9392 /*
9393 In statement binary log format, CREATE TEMPORARY TABLE is unsafe
9394 to execute inside a transaction because the table will stay and the
9395 transaction will be written to the slave's binary log with the GTID even
9396 if the transaction is rolled back. This includes the execution inside
9397 functions and triggers.
9398 The same considerations apply for DROP TEMPORARY TABLE too, this is
9399 checked in mysql_rm_table instead.
9400 */
9401 if ((in_multi_stmt_transaction_mode() || in_sub_stmt)
9402 && variables.binlog_format == BINLOG_FORMAT_STMT)
9403 {
9404 my_error(ER_GTID_UNSAFE_CREATE_DROP_TEMPORARY_TABLE_IN_TRANSACTION,
9405 MYF(0));
9406 DBUG_RETURN(false);
9407 }
9408 }
9409 DBUG_RETURN(true);
9410 }
9411
9412
9413 bool
is_dml_gtid_compatible(bool transactional_table,bool non_transactional_table,bool non_transactional_tmp_tables) const9414 THD::is_dml_gtid_compatible(bool transactional_table,
9415 bool non_transactional_table,
9416 bool non_transactional_tmp_tables) const
9417 {
9418 DBUG_ENTER("THD::is_dml_gtid_compatible(bool, bool, bool)");
9419
9420 // If @@session.sql_log_bin has been manually turned off (only
9421 // doable by SUPER), then no problem, we can execute any statement.
9422 if ((variables.option_bits & OPTION_BIN_LOG) == 0)
9423 DBUG_RETURN(true);
9424
9425 /*
9426 Single non-transactional updates are allowed when not mixed
9427 together with transactional statements within a transaction.
9428 Furthermore, writing to transactional and non-transactional
9429 engines in a single statement is also disallowed.
9430 Multi-statement transactions on non-transactional tables are
9431 split into single-statement transactions when
9432 GTID_NEXT = "AUTOMATIC".
9433
9434 Non-transactional updates are allowed when row binlog format is
9435 used and all non-transactional tables are temporary.
9436
9437 The debug symbol "allow_gtid_unsafe_non_transactional_updates"
9438 disables the error. This is useful because it allows us to run
9439 old tests that were not written with the restrictions of GTIDs in
9440 mind.
9441 */
9442 if (non_transactional_table &&
9443 (transactional_table || trans_has_updated_trans_table(this)) &&
9444 !(non_transactional_tmp_tables && is_current_stmt_binlog_format_row()) &&
9445 !DBUG_EVALUATE_IF("allow_gtid_unsafe_non_transactional_updates", 1, 0))
9446 {
9447 my_error(ER_GTID_UNSAFE_NON_TRANSACTIONAL_TABLE, MYF(0));
9448 DBUG_RETURN(false);
9449 }
9450
9451 DBUG_RETURN(true);
9452 }
9453
9454 /*
9455 Implementation of interface to write rows to the binary log through the
9456 thread. The thread is responsible for writing the rows it has
9457 inserted/updated/deleted.
9458 */
9459
9460 #ifndef MYSQL_CLIENT
9461
9462 /*
9463 Template member function for ensuring that there is an rows log
9464 event of the apropriate type before proceeding.
9465
9466 PRE CONDITION:
9467 - Events of type 'RowEventT' have the type code 'type_code'.
9468
9469 POST CONDITION:
9470 If a non-NULL pointer is returned, the pending event for thread 'thd' will
9471 be an event of type 'RowEventT' (which have the type code 'type_code')
9472 will either empty or have enough space to hold 'needed' bytes. In
9473 addition, the columns bitmap will be correct for the row, meaning that
9474 the pending event will be flushed if the columns in the event differ from
9475 the columns suppled to the function.
9476
9477 RETURNS
9478 If no error, a non-NULL pending event (either one which already existed or
9479 the newly created one).
9480 If error, NULL.
9481 */
9482
9483 template <class RowsEventT> Rows_log_event*
binlog_prepare_pending_rows_event(TABLE * table,uint32 serv_id,size_t needed,bool is_transactional,RowsEventT * hint MY_ATTRIBUTE ((unused)),const uchar * extra_row_info)9484 THD::binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id,
9485 size_t needed,
9486 bool is_transactional,
9487 RowsEventT *hint MY_ATTRIBUTE((unused)),
9488 const uchar* extra_row_info)
9489 {
9490 DBUG_ENTER("binlog_prepare_pending_rows_event");
9491
9492 /* Fetch the type code for the RowsEventT template parameter */
9493 int const general_type_code= RowsEventT::TYPE_CODE;
9494
9495 Rows_log_event* pending= binlog_get_pending_rows_event(is_transactional);
9496
9497 if (unlikely(pending && !pending->is_valid()))
9498 DBUG_RETURN(NULL);
9499
9500 /*
9501 Check if the current event is non-NULL and a write-rows
9502 event. Also check if the table provided is mapped: if it is not,
9503 then we have switched to writing to a new table.
9504 If there is no pending event, we need to create one. If there is a pending
9505 event, but it's not about the same table id, or not of the same type
9506 (between Write, Update and Delete), or not the same affected columns, or
9507 going to be too big, flush this event to disk and create a new pending
9508 event.
9509 */
9510 if (!pending ||
9511 pending->server_id != serv_id ||
9512 pending->get_table_id() != table->s->table_map_id ||
9513 pending->get_general_type_code() != general_type_code ||
9514 pending->get_data_size() + needed > opt_binlog_rows_event_max_size ||
9515 pending->read_write_bitmaps_cmp(table) == FALSE ||
9516 !binlog_row_event_extra_data_eq(pending->get_extra_row_data(),
9517 extra_row_info))
9518 {
9519 /* Create a new RowsEventT... */
9520 Rows_log_event* const
9521 ev= new RowsEventT(this, table, table->s->table_map_id,
9522 is_transactional, extra_row_info);
9523 if (unlikely(!ev))
9524 DBUG_RETURN(NULL);
9525 ev->server_id= serv_id; // I don't like this, it's too easy to forget.
9526 /*
9527 flush the pending event and replace it with the newly created
9528 event...
9529 */
9530 if (unlikely(
9531 mysql_bin_log.flush_and_set_pending_rows_event(this, ev,
9532 is_transactional)))
9533 {
9534 delete ev;
9535 DBUG_RETURN(NULL);
9536 }
9537
9538 DBUG_RETURN(ev); /* This is the new pending event */
9539 }
9540 DBUG_RETURN(pending); /* This is the current pending event */
9541 }
9542
9543 /* Declare in unnamed namespace. */
9544 CPP_UNNAMED_NS_START
9545
9546 /**
9547 Class to handle temporary allocation of memory for row data.
9548
9549 The responsibilities of the class is to provide memory for
9550 packing one or two rows of packed data (depending on what
9551 constructor is called).
9552
9553 In order to make the allocation more efficient for "simple" rows,
9554 i.e., rows that do not contain any blobs, a pointer to the
9555 allocated memory is of memory is stored in the table structure
9556 for simple rows. If memory for a table containing a blob field
9557 is requested, only memory for that is allocated, and subsequently
9558 released when the object is destroyed.
9559
9560 */
9561 class Row_data_memory {
9562 public:
9563 /**
9564 Build an object to keep track of a block-local piece of memory
9565 for storing a row of data.
9566
9567 @param table
9568 Table where the pre-allocated memory is stored.
9569
9570 @param length
9571 Length of data that is needed, if the record contain blobs.
9572 */
Row_data_memory(TABLE * table,size_t const len1)9573 Row_data_memory(TABLE *table, size_t const len1)
9574 : m_memory(0)
9575 {
9576 #ifndef DBUG_OFF
9577 m_alloc_checked= FALSE;
9578 #endif
9579 allocate_memory(table, len1);
9580 m_ptr[0]= has_memory() ? m_memory : 0;
9581 m_ptr[1]= 0;
9582 }
9583
Row_data_memory(TABLE * table,size_t const len1,size_t const len2)9584 Row_data_memory(TABLE *table, size_t const len1, size_t const len2)
9585 : m_memory(0)
9586 {
9587 #ifndef DBUG_OFF
9588 m_alloc_checked= FALSE;
9589 #endif
9590 allocate_memory(table, len1 + len2);
9591 m_ptr[0]= has_memory() ? m_memory : 0;
9592 m_ptr[1]= has_memory() ? m_memory + len1 : 0;
9593 }
9594
~Row_data_memory()9595 ~Row_data_memory()
9596 {
9597 if (m_memory != 0 && m_release_memory_on_destruction)
9598 my_free(m_memory);
9599 }
9600
9601 /**
9602 Is there memory allocated?
9603
9604 @retval true There is memory allocated
9605 @retval false Memory allocation failed
9606 */
has_memory() const9607 bool has_memory() const {
9608 #ifndef DBUG_OFF
9609 m_alloc_checked= TRUE;
9610 #endif
9611 return m_memory != 0;
9612 }
9613
slot(uint s)9614 uchar *slot(uint s)
9615 {
9616 DBUG_ASSERT(s < sizeof(m_ptr)/sizeof(*m_ptr));
9617 DBUG_ASSERT(m_ptr[s] != 0);
9618 DBUG_ASSERT(m_alloc_checked == TRUE);
9619 return m_ptr[s];
9620 }
9621
9622 private:
allocate_memory(TABLE * const table,size_t const total_length)9623 void allocate_memory(TABLE *const table, size_t const total_length)
9624 {
9625 if (table->s->blob_fields == 0)
9626 {
9627 /*
9628 The maximum length of a packed record is less than this
9629 length. We use this value instead of the supplied length
9630 when allocating memory for records, since we don't know how
9631 the memory will be used in future allocations.
9632
9633 Since table->s->reclength is for unpacked records, we have
9634 to add two bytes for each field, which can potentially be
9635 added to hold the length of a packed field.
9636 */
9637 size_t const maxlen= table->s->reclength + 2 * table->s->fields;
9638
9639 /*
9640 Allocate memory for two records if memory hasn't been
9641 allocated. We allocate memory for two records so that it can
9642 be used when processing update rows as well.
9643 */
9644 if (table->write_row_record == 0)
9645 table->write_row_record=
9646 (uchar *) alloc_root(&table->mem_root, 2 * maxlen);
9647 m_memory= table->write_row_record;
9648 m_release_memory_on_destruction= FALSE;
9649 }
9650 else
9651 {
9652 m_memory= (uchar *) my_malloc(total_length, MYF(MY_WME));
9653 m_release_memory_on_destruction= TRUE;
9654 }
9655 }
9656
9657 #ifndef DBUG_OFF
9658 mutable bool m_alloc_checked;
9659 #endif
9660 bool m_release_memory_on_destruction;
9661 uchar *m_memory;
9662 uchar *m_ptr[2];
9663 };
9664
9665 CPP_UNNAMED_NS_END
9666
binlog_write_row(TABLE * table,bool is_trans,uchar const * record,const uchar * extra_row_info)9667 int THD::binlog_write_row(TABLE* table, bool is_trans,
9668 uchar const *record,
9669 const uchar* extra_row_info)
9670 {
9671 DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
9672
9673 /*
9674 Pack records into format for transfer. We are allocating more
9675 memory than needed, but that doesn't matter.
9676 */
9677 Row_data_memory memory(table, max_row_length(table, record));
9678 if (!memory.has_memory())
9679 return HA_ERR_OUT_OF_MEM;
9680
9681 uchar *row_data= memory.slot(0);
9682
9683 size_t const len= pack_row(table, table->write_set, row_data, record);
9684
9685 Rows_log_event* const ev=
9686 binlog_prepare_pending_rows_event(table, server_id, len, is_trans,
9687 static_cast<Write_rows_log_event*>(0),
9688 extra_row_info);
9689
9690 if (unlikely(ev == 0))
9691 return HA_ERR_OUT_OF_MEM;
9692
9693 return ev->add_row_data(row_data, len);
9694 }
9695
binlog_update_row(TABLE * table,bool is_trans,const uchar * before_record,const uchar * after_record,const uchar * extra_row_info)9696 int THD::binlog_update_row(TABLE* table, bool is_trans,
9697 const uchar *before_record,
9698 const uchar *after_record,
9699 const uchar* extra_row_info)
9700 {
9701 DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
9702 int error= 0;
9703
9704 /**
9705 Save a reference to the original read and write set bitmaps.
9706 We will need this to restore the bitmaps at the end.
9707 */
9708 MY_BITMAP *old_read_set= table->read_set;
9709 MY_BITMAP *old_write_set= table->write_set;
9710
9711 /**
9712 This will remove spurious fields required during execution but
9713 not needed for binlogging. This is done according to the:
9714 binlog-row-image option.
9715 */
9716 binlog_prepare_row_images(table);
9717
9718 size_t const before_maxlen = max_row_length(table, before_record);
9719 size_t const after_maxlen = max_row_length(table, after_record);
9720
9721 Row_data_memory row_data(table, before_maxlen, after_maxlen);
9722 if (!row_data.has_memory())
9723 return HA_ERR_OUT_OF_MEM;
9724
9725 uchar *before_row= row_data.slot(0);
9726 uchar *after_row= row_data.slot(1);
9727
9728 size_t const before_size= pack_row(table, table->read_set, before_row,
9729 before_record);
9730 size_t const after_size= pack_row(table, table->write_set, after_row,
9731 after_record);
9732
9733 /*
9734 Don't print debug messages when running valgrind since they can
9735 trigger false warnings.
9736 */
9737 #ifndef HAVE_purify
9738 DBUG_DUMP("before_record", before_record, table->s->reclength);
9739 DBUG_DUMP("after_record", after_record, table->s->reclength);
9740 DBUG_DUMP("before_row", before_row, before_size);
9741 DBUG_DUMP("after_row", after_row, after_size);
9742 #endif
9743
9744 Rows_log_event* const ev=
9745 binlog_prepare_pending_rows_event(table, server_id,
9746 before_size + after_size, is_trans,
9747 static_cast<Update_rows_log_event*>(0),
9748 extra_row_info);
9749
9750 if (unlikely(ev == 0))
9751 return HA_ERR_OUT_OF_MEM;
9752
9753 error= ev->add_row_data(before_row, before_size) ||
9754 ev->add_row_data(after_row, after_size);
9755
9756 /* restore read/write set for the rest of execution */
9757 table->column_bitmaps_set_no_signal(old_read_set,
9758 old_write_set);
9759
9760 return error;
9761 }
9762
binlog_delete_row(TABLE * table,bool is_trans,uchar const * record,const uchar * extra_row_info)9763 int THD::binlog_delete_row(TABLE* table, bool is_trans,
9764 uchar const *record,
9765 const uchar* extra_row_info)
9766 {
9767 DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
9768 int error= 0;
9769
9770 /**
9771 Save a reference to the original read and write set bitmaps.
9772 We will need this to restore the bitmaps at the end.
9773 */
9774 MY_BITMAP *old_read_set= table->read_set;
9775 MY_BITMAP *old_write_set= table->write_set;
9776
9777 /**
9778 This will remove spurious fields required during execution but
9779 not needed for binlogging. This is done according to the:
9780 binlog-row-image option.
9781 */
9782 binlog_prepare_row_images(table);
9783
9784 /*
9785 Pack records into format for transfer. We are allocating more
9786 memory than needed, but that doesn't matter.
9787 */
9788 Row_data_memory memory(table, max_row_length(table, record));
9789 if (unlikely(!memory.has_memory()))
9790 return HA_ERR_OUT_OF_MEM;
9791
9792 uchar *row_data= memory.slot(0);
9793
9794 DBUG_DUMP("table->read_set", (uchar*) table->read_set->bitmap, (table->s->fields + 7) / 8);
9795 size_t const len= pack_row(table, table->read_set, row_data, record);
9796
9797 Rows_log_event* const ev=
9798 binlog_prepare_pending_rows_event(table, server_id, len, is_trans,
9799 static_cast<Delete_rows_log_event*>(0),
9800 extra_row_info);
9801
9802 if (unlikely(ev == 0))
9803 return HA_ERR_OUT_OF_MEM;
9804
9805 error= ev->add_row_data(row_data, len);
9806
9807 /* restore read/write set for the rest of execution */
9808 table->column_bitmaps_set_no_signal(old_read_set,
9809 old_write_set);
9810
9811 return error;
9812 }
9813
binlog_prepare_row_images(TABLE * table)9814 void THD::binlog_prepare_row_images(TABLE *table)
9815 {
9816 DBUG_ENTER("THD::binlog_prepare_row_images");
9817 /**
9818 Remove from read_set spurious columns. The write_set has been
9819 handled before in table->mark_columns_needed_for_update.
9820 */
9821
9822 DBUG_PRINT_BITSET("debug", "table->read_set (before preparing): %s", table->read_set);
9823 THD *thd= table->in_use;
9824
9825 /**
9826 if there is a primary key in the table (ie, user declared PK or a
9827 non-null unique index) and we dont want to ship the entire image,
9828 and the handler involved supports this.
9829 */
9830 if (table->s->primary_key < MAX_KEY &&
9831 (thd->variables.binlog_row_image < BINLOG_ROW_IMAGE_FULL) &&
9832 !ha_check_storage_engine_flag(table->s->db_type(), HTON_NO_BINLOG_ROW_OPT))
9833 {
9834 /**
9835 Just to be sure that tmp_set is currently not in use as
9836 the read_set already.
9837 */
9838 DBUG_ASSERT(table->read_set != &table->tmp_set);
9839
9840 bitmap_clear_all(&table->tmp_set);
9841
9842 switch(thd->variables.binlog_row_image)
9843 {
9844 case BINLOG_ROW_IMAGE_MINIMAL:
9845 /* MINIMAL: Mark only PK */
9846 table->mark_columns_used_by_index_no_reset(table->s->primary_key,
9847 &table->tmp_set);
9848 break;
9849 case BINLOG_ROW_IMAGE_NOBLOB:
9850 /**
9851 NOBLOB: Remove unnecessary BLOB fields from read_set
9852 (the ones that are not part of PK).
9853 */
9854 bitmap_union(&table->tmp_set, table->read_set);
9855 for (Field **ptr=table->field ; *ptr ; ptr++)
9856 {
9857 Field *field= (*ptr);
9858 if ((field->type() == MYSQL_TYPE_BLOB) &&
9859 !(field->flags & PRI_KEY_FLAG))
9860 bitmap_clear_bit(&table->tmp_set, field->field_index);
9861 }
9862 break;
9863 default:
9864 DBUG_ASSERT(0); // impossible.
9865 }
9866
9867 /* set the temporary read_set */
9868 table->column_bitmaps_set_no_signal(&table->tmp_set,
9869 table->write_set);
9870 }
9871
9872 DBUG_PRINT_BITSET("debug", "table->read_set (after preparing): %s", table->read_set);
9873 DBUG_VOID_RETURN;
9874 }
9875
9876
binlog_flush_pending_rows_event(bool stmt_end,bool is_transactional)9877 int THD::binlog_flush_pending_rows_event(bool stmt_end, bool is_transactional)
9878 {
9879 DBUG_ENTER("THD::binlog_flush_pending_rows_event");
9880 /*
9881 We shall flush the pending event even if we are not in row-based
9882 mode: it might be the case that we left row-based mode before
9883 flushing anything (e.g., if we have explicitly locked tables).
9884 */
9885 if (!mysql_bin_log.is_open())
9886 DBUG_RETURN(0);
9887
9888 /*
9889 Mark the event as the last event of a statement if the stmt_end
9890 flag is set.
9891 */
9892 int error= 0;
9893 if (Rows_log_event *pending= binlog_get_pending_rows_event(is_transactional))
9894 {
9895 if (stmt_end)
9896 {
9897 pending->set_flags(Rows_log_event::STMT_END_F);
9898 binlog_table_maps= 0;
9899 }
9900
9901 error= mysql_bin_log.flush_and_set_pending_rows_event(this, 0,
9902 is_transactional);
9903 }
9904
9905 DBUG_RETURN(error);
9906 }
9907
9908
9909 /**
9910 binlog_row_event_extra_data_eq
9911
9912 Comparator for two binlog row event extra data
9913 pointers.
9914
9915 It compares their significant bytes.
9916
9917 Null pointers are acceptable
9918
9919 @param a
9920 first pointer
9921
9922 @param b
9923 first pointer
9924
9925 @return
9926 true if the referenced structures are equal
9927 */
9928 bool
binlog_row_event_extra_data_eq(const uchar * a,const uchar * b)9929 THD::binlog_row_event_extra_data_eq(const uchar* a,
9930 const uchar* b)
9931 {
9932 return ((a == b) ||
9933 ((a != NULL) &&
9934 (b != NULL) &&
9935 (a[EXTRA_ROW_INFO_LEN_OFFSET] ==
9936 b[EXTRA_ROW_INFO_LEN_OFFSET]) &&
9937 (memcmp(a, b,
9938 a[EXTRA_ROW_INFO_LEN_OFFSET]) == 0)));
9939 }
9940
9941 #if !defined(DBUG_OFF) && !defined(_lint)
9942 static const char *
show_query_type(THD::enum_binlog_query_type qtype)9943 show_query_type(THD::enum_binlog_query_type qtype)
9944 {
9945 switch (qtype) {
9946 case THD::ROW_QUERY_TYPE:
9947 return "ROW";
9948 case THD::STMT_QUERY_TYPE:
9949 return "STMT";
9950 case THD::QUERY_TYPE_COUNT:
9951 default:
9952 DBUG_ASSERT(0 <= qtype && qtype < THD::QUERY_TYPE_COUNT);
9953 }
9954 static char buf[64];
9955 sprintf(buf, "UNKNOWN#%d", qtype);
9956 return buf;
9957 }
9958 #endif
9959
9960 /**
9961 Auxiliary function to reset the limit unsafety warning suppression.
9962 */
reset_binlog_unsafe_suppression()9963 static void reset_binlog_unsafe_suppression()
9964 {
9965 DBUG_ENTER("reset_binlog_unsafe_suppression");
9966 unsafe_warning_suppression_is_activated= false;
9967 limit_unsafe_warning_count= 0;
9968 limit_unsafe_suppression_start_time= my_getsystime()/10000000;
9969 DBUG_VOID_RETURN;
9970 }
9971
9972 /**
9973 Auxiliary function to print warning in the error log.
9974 */
print_unsafe_warning_to_log(int unsafe_type,char * buf,char * query)9975 static void print_unsafe_warning_to_log(int unsafe_type, char* buf,
9976 char* query)
9977 {
9978 DBUG_ENTER("print_unsafe_warning_in_log");
9979 sprintf(buf, ER(ER_BINLOG_UNSAFE_STATEMENT),
9980 ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
9981 sql_print_warning(ER(ER_MESSAGE_AND_STATEMENT), buf, query);
9982 DBUG_VOID_RETURN;
9983 }
9984
9985 /**
9986 Auxiliary function to check if the warning for limit unsafety should be
9987 thrown or suppressed. Details of the implementation can be found in the
9988 comments inline.
9989 SYNOPSIS:
9990 @params
9991 buf - buffer to hold the warning message text
9992 unsafe_type - The type of unsafety.
9993 query - The actual query statement.
9994
9995 TODO: Remove this function and implement a general service for all warnings
9996 that would prevent flooding the error log.
9997 */
do_unsafe_limit_checkout(char * buf,int unsafe_type,char * query)9998 static void do_unsafe_limit_checkout(char* buf, int unsafe_type, char* query)
9999 {
10000 ulonglong now;
10001 DBUG_ENTER("do_unsafe_limit_checkout");
10002 DBUG_ASSERT(unsafe_type == LEX::BINLOG_STMT_UNSAFE_LIMIT);
10003 limit_unsafe_warning_count++;
10004 /*
10005 INITIALIZING:
10006 If this is the first time this function is called with log warning
10007 enabled, the monitoring the unsafe warnings should start.
10008 */
10009 if (limit_unsafe_suppression_start_time == 0)
10010 {
10011 limit_unsafe_suppression_start_time= my_getsystime()/10000000;
10012 print_unsafe_warning_to_log(unsafe_type, buf, query);
10013 }
10014 else
10015 {
10016 if (!unsafe_warning_suppression_is_activated)
10017 print_unsafe_warning_to_log(unsafe_type, buf, query);
10018
10019 if (limit_unsafe_warning_count >=
10020 LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT)
10021 {
10022 now= my_getsystime()/10000000;
10023 if (!unsafe_warning_suppression_is_activated)
10024 {
10025 /*
10026 ACTIVATION:
10027 We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT warnings in
10028 less than LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT we activate the
10029 suppression.
10030 */
10031 if ((now-limit_unsafe_suppression_start_time) <=
10032 LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT)
10033 {
10034 unsafe_warning_suppression_is_activated= true;
10035 DBUG_PRINT("info",("A warning flood has been detected and the limit \
10036 unsafety warning suppression has been activated."));
10037 }
10038 else
10039 {
10040 /*
10041 there is no flooding till now, therefore we restart the monitoring
10042 */
10043 limit_unsafe_suppression_start_time= my_getsystime()/10000000;
10044 limit_unsafe_warning_count= 0;
10045 }
10046 }
10047 else
10048 {
10049 /*
10050 Print the suppression note and the unsafe warning.
10051 */
10052 sql_print_information("The following warning was suppressed %d times \
10053 during the last %d seconds in the error log",
10054 limit_unsafe_warning_count,
10055 (int)
10056 (now-limit_unsafe_suppression_start_time));
10057 print_unsafe_warning_to_log(unsafe_type, buf, query);
10058 /*
10059 DEACTIVATION: We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT
10060 warnings in more than LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT, the
10061 suppression should be deactivated.
10062 */
10063 if ((now - limit_unsafe_suppression_start_time) >
10064 LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT)
10065 {
10066 reset_binlog_unsafe_suppression();
10067 DBUG_PRINT("info",("The limit unsafety warning supression has been \
10068 deactivated"));
10069 }
10070 }
10071 limit_unsafe_warning_count= 0;
10072 }
10073 }
10074 DBUG_VOID_RETURN;
10075 }
10076
10077 /**
10078 Auxiliary method used by @c binlog_query() to raise warnings.
10079
10080 The type of warning and the type of unsafeness is stored in
10081 THD::binlog_unsafe_warning_flags.
10082 */
issue_unsafe_warnings()10083 void THD::issue_unsafe_warnings()
10084 {
10085 char buf[MYSQL_ERRMSG_SIZE * 2];
10086 DBUG_ENTER("issue_unsafe_warnings");
10087 /*
10088 Ensure that binlog_unsafe_warning_flags is big enough to hold all
10089 bits. This is actually a constant expression.
10090 */
10091 DBUG_ASSERT(LEX::BINLOG_STMT_UNSAFE_COUNT <=
10092 sizeof(binlog_unsafe_warning_flags) * CHAR_BIT);
10093
10094 uint32 unsafe_type_flags= binlog_unsafe_warning_flags;
10095
10096 if ((unsafe_type_flags & (1U << LEX::BINLOG_STMT_UNSAFE_LIMIT)) != 0)
10097 {
10098 if ((lex->sql_command == SQLCOM_DELETE || lex->sql_command == SQLCOM_UPDATE) &&
10099 lex->select_lex.select_limit)
10100 {
10101 ORDER *order= (ORDER *) ((lex->select_lex.order_list.elements) ?
10102 lex->select_lex.order_list.first : NULL);
10103 if ((lex->select_lex.select_limit &&
10104 lex->select_lex.select_limit->fixed &&
10105 lex->select_lex.select_limit->val_int() == 0) ||
10106 is_order_deterministic(lex->query_tables,
10107 lex->select_lex.where, order))
10108 {
10109 unsafe_type_flags&= ~(1U << LEX::BINLOG_STMT_UNSAFE_LIMIT);
10110 }
10111 }
10112 if ((lex->sql_command == SQLCOM_INSERT_SELECT ||
10113 lex->sql_command == SQLCOM_REPLACE_SELECT) &&
10114 order_deterministic)
10115 {
10116 unsafe_type_flags&= ~(1U << LEX::BINLOG_STMT_UNSAFE_LIMIT);
10117 }
10118
10119 }
10120
10121 /*
10122 For each unsafe_type, check if the statement is unsafe in this way
10123 and issue a warning.
10124 */
10125 for (int unsafe_type=0;
10126 unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT;
10127 unsafe_type++)
10128 {
10129 if ((unsafe_type_flags & (1 << unsafe_type)) != 0)
10130 {
10131 push_warning_printf(this, Sql_condition::WARN_LEVEL_NOTE,
10132 ER_BINLOG_UNSAFE_STATEMENT,
10133 ER(ER_BINLOG_UNSAFE_STATEMENT),
10134 ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
10135 if (log_warnings && ((opt_log_warnings_suppress & (ULL(1) << log_warnings_suppress_1592)) == 0))
10136 {
10137 if (unsafe_type == LEX::BINLOG_STMT_UNSAFE_LIMIT)
10138 do_unsafe_limit_checkout( buf, unsafe_type, query());
10139 else //cases other than LIMIT unsafety
10140 print_unsafe_warning_to_log(unsafe_type, buf, query());
10141 }
10142 }
10143 }
10144 DBUG_VOID_RETURN;
10145 }
10146
10147 /**
10148 Log the current query.
10149
10150 The query will be logged in either row format or statement format
10151 depending on the value of @c current_stmt_binlog_format_row field and
10152 the value of the @c qtype parameter.
10153
10154 This function must be called:
10155
10156 - After the all calls to ha_*_row() functions have been issued.
10157
10158 - After any writes to system tables. Rationale: if system tables
10159 were written after a call to this function, and the master crashes
10160 after the call to this function and before writing the system
10161 tables, then the master and slave get out of sync.
10162
10163 - Before tables are unlocked and closed.
10164
10165 @see decide_logging_format
10166
10167 @retval 0 Success
10168
10169 @retval nonzero If there is a failure when writing the query (e.g.,
10170 write failure), then the error code is returned.
10171 */
binlog_query(THD::enum_binlog_query_type qtype,char const * query_arg,ulong query_len,bool is_trans,bool direct,bool suppress_use,int errcode)10172 int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
10173 ulong query_len, bool is_trans, bool direct,
10174 bool suppress_use, int errcode)
10175 {
10176 DBUG_ENTER("THD::binlog_query");
10177 DBUG_PRINT("enter", ("qtype: %s query: '%s'",
10178 show_query_type(qtype), query_arg));
10179 DBUG_ASSERT(query_arg && mysql_bin_log.is_open());
10180
10181 if (get_binlog_local_stmt_filter() == BINLOG_FILTER_SET)
10182 {
10183 /*
10184 The current statement is to be ignored, and not written to
10185 the binlog. Do not call issue_unsafe_warnings().
10186 */
10187 DBUG_RETURN(0);
10188 }
10189
10190 /*
10191 If we are not in prelocked mode, mysql_unlock_tables() will be
10192 called after this binlog_query(), so we have to flush the pending
10193 rows event with the STMT_END_F set to unlock all tables at the
10194 slave side as well.
10195
10196 If we are in prelocked mode, the flushing will be done inside the
10197 top-most close_thread_tables().
10198 */
10199 if (this->locked_tables_mode <= LTM_LOCK_TABLES)
10200 if (int error= binlog_flush_pending_rows_event(TRUE, is_trans))
10201 DBUG_RETURN(error);
10202
10203 /*
10204 Warnings for unsafe statements logged in statement format are
10205 printed in three places instead of in decide_logging_format().
10206 This is because the warnings should be printed only if the statement
10207 is actually logged. When executing decide_logging_format(), we cannot
10208 know for sure if the statement will be logged:
10209
10210 1 - sp_head::execute_procedure which prints out warnings for calls to
10211 stored procedures.
10212
10213 2 - sp_head::execute_function which prints out warnings for calls
10214 involving functions.
10215
10216 3 - THD::binlog_query (here) which prints warning for top level
10217 statements not covered by the two cases above: i.e., if not insided a
10218 procedure and a function.
10219
10220 Besides, we should not try to print these warnings if it is not
10221 possible to write statements to the binary log as it happens when
10222 the execution is inside a function, or generaly speaking, when
10223 the variables.option_bits & OPTION_BIN_LOG is false.
10224 */
10225 if ((variables.option_bits & OPTION_BIN_LOG) &&
10226 sp_runtime_ctx == NULL && !binlog_evt_union.do_union)
10227 {
10228 issue_unsafe_warnings();
10229 order_deterministic= true;
10230 }
10231
10232 switch (qtype) {
10233 /*
10234 ROW_QUERY_TYPE means that the statement may be logged either in
10235 row format or in statement format. If
10236 current_stmt_binlog_format is row, it means that the
10237 statement has already been logged in row format and hence shall
10238 not be logged again.
10239 */
10240 case THD::ROW_QUERY_TYPE:
10241 DBUG_PRINT("debug",
10242 ("is_current_stmt_binlog_format_row: %d",
10243 is_current_stmt_binlog_format_row()));
10244 if (is_current_stmt_binlog_format_row())
10245 DBUG_RETURN(0);
10246 /* Fall through */
10247
10248 /*
10249 STMT_QUERY_TYPE means that the query must be logged in statement
10250 format; it cannot be logged in row format. This is typically
10251 used by DDL statements. It is an error to use this query type
10252 if current_stmt_binlog_format_row is row.
10253
10254 @todo Currently there are places that call this method with
10255 STMT_QUERY_TYPE and current_stmt_binlog_format is row. Fix those
10256 places and add assert to ensure correct behavior. /Sven
10257 */
10258 case THD::STMT_QUERY_TYPE:
10259 /*
10260 The MYSQL_LOG::write() function will set the STMT_END_F flag and
10261 flush the pending rows event if necessary.
10262 */
10263 {
10264 Query_log_event qinfo(this, query_arg, query_len, is_trans, direct,
10265 suppress_use, errcode);
10266 /*
10267 Binlog table maps will be irrelevant after a Query_log_event
10268 (they are just removed on the slave side) so after the query
10269 log event is written to the binary log, we pretend that no
10270 table maps were written.
10271 */
10272 int error= mysql_bin_log.write_event(&qinfo);
10273 binlog_table_maps= 0;
10274 DBUG_RETURN(error);
10275 }
10276 break;
10277
10278 case THD::QUERY_TYPE_COUNT:
10279 default:
10280 DBUG_ASSERT(0 <= qtype && qtype < QUERY_TYPE_COUNT);
10281 }
10282 DBUG_RETURN(0);
10283 }
10284
10285 #endif /* !defined(MYSQL_CLIENT) */
10286
show_binlog_vars(THD * thd,SHOW_VAR * var,char * buff)10287 static int show_binlog_vars(THD *thd, SHOW_VAR *var, char *buff)
10288 {
10289 mysql_mutex_assert_owner(&LOCK_status);
10290
10291 const binlog_cache_mngr *cache_mngr
10292 = (thd && opt_bin_log)
10293 ? static_cast<binlog_cache_mngr *>(thd_get_ha_data(thd, binlog_hton))
10294 : NULL;
10295
10296 const bool have_snapshot= (cache_mngr &&
10297 cache_mngr->binlog_info.log_file_name[0] != '\0');
10298
10299 if (have_snapshot)
10300 {
10301 set_binlog_snapshot_file(cache_mngr->binlog_info.log_file_name);
10302 binlog_snapshot_position= cache_mngr->binlog_info.pos;
10303 }
10304 else if (mysql_bin_log.is_open())
10305 {
10306 set_binlog_snapshot_file(binlog_global_snapshot_file);
10307 binlog_snapshot_position= binlog_global_snapshot_position;
10308 }
10309 else
10310 {
10311 binlog_snapshot_file[0]= '\0';
10312 binlog_snapshot_position= 0;
10313 }
10314 var->type= SHOW_ARRAY;
10315 var->value= (char *)&binlog_status_vars_detail;
10316 return 0;
10317 }
10318
10319 static SHOW_VAR binlog_status_vars_top[]= {
10320 {"Binlog", (char *) &show_binlog_vars, SHOW_FUNC},
10321 {NullS, NullS, SHOW_LONG}
10322 };
10323
10324 struct st_mysql_storage_engine binlog_storage_engine=
10325 { MYSQL_HANDLERTON_INTERFACE_VERSION };
10326
10327 /** @} */
10328
mysql_declare_plugin(binlog)10329 mysql_declare_plugin(binlog)
10330 {
10331 MYSQL_STORAGE_ENGINE_PLUGIN,
10332 &binlog_storage_engine,
10333 "binlog",
10334 "MySQL AB",
10335 "This is a pseudo storage engine to represent the binlog in a transaction",
10336 PLUGIN_LICENSE_GPL,
10337 binlog_init, /* Plugin Init */
10338 NULL, /* Plugin Deinit */
10339 0x0100 /* 1.0 */,
10340 binlog_status_vars_top, /* status variables */
10341 NULL, /* system variables */
10342 NULL, /* config options */
10343 0,
10344 }
10345 mysql_declare_plugin_end;
10346