1 /* Copyright (c) 2009, 2020, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software Foundation,
21 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22
23
24 #include "my_global.h"
25 #include "log.h"
26 #include "binlog.h"
27 #include "log_event.h"
28 #include "rpl_filter.h"
29 #include "rpl_rli.h"
30 #include "sql_plugin.h"
31 #include "rpl_handler.h"
32 #include "rpl_info_factory.h"
33 #include "rpl_utility.h"
34 #include "debug_sync.h"
35 #include "global_threads.h"
36 #include "sql_show.h"
37 #include "sql_parse.h"
38 #include "rpl_mi.h"
39 #include <list>
40 #include <string>
41 #include <sstream>
42 #include <my_stacktrace.h>
43
44 using std::max;
45 using std::min;
46 using std::string;
47 using std::list;
48
49 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
50
51 /**
52 @defgroup Binary_Log Binary Log
53 @{
54 */
55
56 #define MY_OFF_T_UNDEF (~(my_off_t)0UL)
57
58 /*
59 Constants required for the limit unsafe warnings suppression
60 */
61 //seconds after which the limit unsafe warnings suppression will be activated
62 #define LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT 50
63 //number of limit unsafe warnings after which the suppression will be activated
64 #define LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT 50
65 #define MAX_SESSION_ATTACH_TRIES 10
66
67 static ulonglong limit_unsafe_suppression_start_time= 0;
68 static bool unsafe_warning_suppression_is_activated= false;
69 static int limit_unsafe_warning_count= 0;
70
71 static handlerton *binlog_hton;
72 bool opt_binlog_order_commits= true;
73
74 const char *log_bin_index= 0;
75 const char *log_bin_basename= 0;
76
77 MYSQL_BIN_LOG mysql_bin_log(&sync_binlog_period);
78
79 static int binlog_init(void *p);
80 static int binlog_start_trans_and_stmt(THD *thd, Log_event *start_event);
81 static int binlog_close_connection(handlerton *hton, THD *thd);
82 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
83 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
84 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
85 THD *thd);
86 static int binlog_commit(handlerton *hton, THD *thd, bool all);
87 static int binlog_rollback(handlerton *hton, THD *thd, bool all);
88 static int binlog_prepare(handlerton *hton, THD *thd, bool all);
89
90
91 /**
92 Print system time.
93 */
94
print_system_time()95 static void print_system_time()
96 {
97 #ifdef __WIN__
98 SYSTEMTIME utc_time;
99 GetSystemTime(&utc_time);
100 const long hrs= utc_time.wHour;
101 const long mins= utc_time.wMinute;
102 const long secs= utc_time.wSecond;
103 #else
104 /* Using time() instead of my_time() to avoid looping */
105 const time_t curr_time= time(NULL);
106 /* Calculate time of day */
107 const long tmins = curr_time / 60;
108 const long thrs = tmins / 60;
109 const long hrs = thrs % 24;
110 const long mins = tmins % 60;
111 const long secs = curr_time % 60;
112 #endif
113 char hrs_buf[3]= "00";
114 char mins_buf[3]= "00";
115 char secs_buf[3]= "00";
116 int base= 10;
117 my_safe_itoa(base, hrs, &hrs_buf[2]);
118 my_safe_itoa(base, mins, &mins_buf[2]);
119 my_safe_itoa(base, secs, &secs_buf[2]);
120
121 my_safe_printf_stderr("---------- %s:%s:%s UTC - ",
122 hrs_buf, mins_buf, secs_buf);
123 }
124
125
126 /**
127 Helper class to perform a thread excursion.
128
129 This class is used to temporarily switch to another session (THD
130 structure). It will set up thread specific "globals" correctly
131 so that the POSIX thread looks exactly like the session attached to.
132 However, PSI_thread info is not touched as it is required to show
133 the actual physial view in PFS instrumentation i.e., it should
134 depict as the real thread doing the work instead of thread it switched
135 to.
136
137 On destruction, the original session (which is supplied to the
138 constructor) will be re-attached automatically. For example, with
139 this code, the value of @c current_thd will be the same before and
140 after execution of the code.
141
142 @code
143 {
144 Thread_excursion excursion(current_thd);
145 for (int i = 0 ; i < count ; ++i)
146 excursion.attach_to(other_thd[i]);
147 }
148 @endcode
149
150 @warning The class is not designed to be inherited from.
151 */
152
153 class Thread_excursion
154 {
155 public:
Thread_excursion(THD * thd)156 Thread_excursion(THD *thd)
157 : m_original_thd(thd)
158 {
159 }
160
~Thread_excursion()161 ~Thread_excursion() {
162 #ifndef EMBEDDED_LIBRARY
163 if (unlikely(setup_thread_globals(m_original_thd)))
164 DBUG_ASSERT(0); // Out of memory?!
165 #endif
166 }
167
168 /**
169 Try to attach the POSIX thread to a session.
170 - This function attaches the POSIX thread to a session
171 in MAX_SESSION_ATTACH_TRIES tries when encountering
172 'out of memory' error, and terminates the server after
173 failed in MAX_SESSION_ATTACH_TRIES tries.
174
175 @param[in] thd The thd of a session
176 */
try_to_attach_to(THD * thd)177 void try_to_attach_to(THD *thd)
178 {
179 int i= 0;
180 /*
181 Attach the POSIX thread to a session in MAX_SESSION_ATTACH_TRIES
182 tries when encountering 'out of memory' error.
183 */
184 while (i < MAX_SESSION_ATTACH_TRIES)
185 {
186 /*
187 Currently attach_to(...) returns ER_OUTOFMEMORY or 0. So
188 we continue to attach the POSIX thread when encountering
189 the ER_OUTOFMEMORY error. Please take care other error
190 returned from attach_to(...) in future.
191 */
192 if (!attach_to(thd))
193 {
194 if (i > 0)
195 sql_print_warning("Server overcomes the temporary 'out of memory' "
196 "in '%d' tries while attaching to session thread "
197 "during the group commit phase.\n", i + 1);
198 break;
199 }
200 i++;
201 }
202 /*
203 Terminate the server after failed to attach the POSIX thread
204 to a session in MAX_SESSION_ATTACH_TRIES tries.
205 */
206 if (MAX_SESSION_ATTACH_TRIES == i)
207 {
208 print_system_time();
209 my_safe_printf_stderr("%s", "[Fatal] Out of memory while attaching to "
210 "session thread during the group commit phase. "
211 "Data consistency between master and slave can "
212 "be guaranteed after server restarts.\n");
213 _exit(EXIT_FAILURE);
214 }
215 }
216
217 private:
218
219 /**
220 Attach the POSIX thread to a session.
221 */
attach_to(THD * thd)222 int attach_to(THD *thd)
223 {
224 #ifndef EMBEDDED_LIBRARY
225 if (DBUG_EVALUATE_IF("simulate_session_attach_error", 1, 0)
226 || unlikely(setup_thread_globals(thd)))
227 {
228 /*
229 Indirectly uses pthread_setspecific, which can only return
230 ENOMEM or EINVAL. Since store_globals are using correct keys,
231 the only alternative is out of memory.
232 */
233 return ER_OUTOFMEMORY;
234 }
235 #endif /* EMBEDDED_LIBRARY */
236 return 0;
237 }
238
setup_thread_globals(THD * thd) const239 int setup_thread_globals(THD *thd) const {
240 int error= 0;
241 THD *original_thd= my_pthread_getspecific(THD*, THR_THD);
242 MEM_ROOT* original_mem_root= my_pthread_getspecific(MEM_ROOT*, THR_MALLOC);
243 if ((error= my_pthread_setspecific_ptr(THR_THD, thd)))
244 goto exit0;
245 if ((error= my_pthread_setspecific_ptr(THR_MALLOC, &thd->mem_root)))
246 goto exit1;
247 if ((error= set_mysys_var(thd->mysys_var)))
248 goto exit2;
249 goto exit0;
250 exit2:
251 error= my_pthread_setspecific_ptr(THR_MALLOC, original_mem_root);
252 exit1:
253 error= my_pthread_setspecific_ptr(THR_THD, original_thd);
254 exit0:
255 return error;
256 }
257
258 THD *m_original_thd;
259 };
260
261
262 /**
263 Caches for non-transactional and transactional data before writing
264 it to the binary log.
265
266 @todo All the access functions for the flags suggest that the
267 encapsuling is not done correctly, so try to move any logic that
268 requires access to the flags into the cache.
269 */
270 class binlog_cache_data
271 {
272 public:
273
binlog_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg)274 binlog_cache_data(bool trx_cache_arg,
275 my_off_t max_binlog_cache_size_arg,
276 ulong *ptr_binlog_cache_use_arg,
277 ulong *ptr_binlog_cache_disk_use_arg)
278 : m_pending(0), saved_max_binlog_cache_size(max_binlog_cache_size_arg),
279 ptr_binlog_cache_use(ptr_binlog_cache_use_arg),
280 ptr_binlog_cache_disk_use(ptr_binlog_cache_disk_use_arg)
281 {
282 reset();
283 flags.transactional= trx_cache_arg;
284 cache_log.end_of_file= saved_max_binlog_cache_size;
285 }
286
287 int finalize(THD *thd, Log_event *end_event);
288 int flush(THD *thd, my_off_t *bytes, bool *wrote_xid);
289 int write_event(THD *thd, Log_event *event);
290
~binlog_cache_data()291 virtual ~binlog_cache_data()
292 {
293 DBUG_ASSERT(is_binlog_empty());
294 close_cached_file(&cache_log);
295 }
296
is_binlog_empty() const297 bool is_binlog_empty() const
298 {
299 my_off_t pos= my_b_tell(&cache_log);
300 DBUG_PRINT("debug", ("%s_cache - pending: 0x%llx, bytes: %llu",
301 (flags.transactional ? "trx" : "stmt"),
302 (ulonglong) pending(), (ulonglong) pos));
303 return pending() == NULL && pos == 0;
304 }
305
is_group_cache_empty() const306 bool is_group_cache_empty() const
307 {
308 return group_cache.is_empty();
309 }
310
311 #ifndef DBUG_OFF
dbug_is_finalized() const312 bool dbug_is_finalized() const {
313 return flags.finalized;
314 }
315 #endif
316
pending() const317 Rows_log_event *pending() const
318 {
319 return m_pending;
320 }
321
set_pending(Rows_log_event * const pending)322 void set_pending(Rows_log_event *const pending)
323 {
324 m_pending= pending;
325 }
326
set_incident(void)327 void set_incident(void)
328 {
329 flags.incident= true;
330 }
331
has_incident(void) const332 bool has_incident(void) const
333 {
334 return flags.incident;
335 }
336
337 /**
338 Sets the binlog_cache_data::Flags::flush_error flag if there
339 is an error while flushing cache to the file.
340
341 @param thd The client thread that is executing the transaction.
342 */
set_flush_error(THD * thd)343 void set_flush_error(THD *thd)
344 {
345 flags.flush_error= true;
346 if(is_trx_cache())
347 {
348 /*
349 If the cache is a transactional cache and if the write
350 has failed due to ENOSPC, then my_write() would have
351 set EE_WRITE error, so clear the error and create an
352 equivalent server error.
353 */
354 if (thd->is_error())
355 thd->clear_error();
356 char errbuf[MYSYS_STRERROR_SIZE];
357 my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), my_filename(cache_log.file),
358 errno, my_strerror(errbuf, sizeof(errbuf), errno));
359 }
360 }
361
get_flush_error(void) const362 bool get_flush_error(void) const
363 {
364 return flags.flush_error;
365 }
366
has_xid() const367 bool has_xid() const {
368 // There should only be an XID event if we are transactional
369 DBUG_ASSERT((flags.transactional && flags.with_xid) || !flags.with_xid);
370 return flags.with_xid;
371 }
372
is_trx_cache() const373 bool is_trx_cache() const
374 {
375 return flags.transactional;
376 }
377
get_byte_position() const378 my_off_t get_byte_position() const
379 {
380 return my_b_tell(&cache_log);
381 }
382
reset()383 virtual void reset()
384 {
385 compute_statistics();
386 truncate(0);
387
388 /*
389 If IOCACHE has a file associated, change its size to 0.
390 It is safer to do it here, since we are certain that one
391 asked the cache to go to position 0 with truncate.
392 */
393 if(cache_log.file != -1)
394 {
395 int error= 0;
396 if((error= my_chsize(cache_log.file, 0, 0, MYF(MY_WME))))
397 sql_print_warning("Unable to resize binlog IOCACHE auxilary file");
398
399 DBUG_EXECUTE_IF("show_io_cache_size",
400 {
401 ulong file_size= my_seek(cache_log.file,
402 0L,MY_SEEK_END,MYF(MY_WME+MY_FAE));
403 sql_print_error("New size:%ld", file_size);
404 });
405 }
406
407 flags.incident= false;
408 flags.with_xid= false;
409 flags.immediate= false;
410 flags.finalized= false;
411 flags.flush_error= false;
412 /*
413 The truncate function calls reinit_io_cache that calls my_b_flush_io_cache
414 which may increase disk_writes. This breaks the disk_writes use by the
415 binary log which aims to compute the ratio between in-memory cache usage
416 and disk cache usage. To avoid this undesirable behavior, we reset the
417 variable after truncating the cache.
418 */
419 cache_log.disk_writes= 0;
420 group_cache.clear();
421 DBUG_ASSERT(is_binlog_empty());
422 }
423
424 /*
425 Sets the write position to point at the position given. If the
426 cache has swapped to a file, it reinitializes it, so that the
427 proper data is added to the IO_CACHE buffer. Otherwise, it just
428 does a my_b_seek.
429
430 my_b_seek will not work if the cache has swapped, that's why
431 we do this workaround.
432
433 @param[IN] pos the new write position.
434 @param[IN] use_reinit if the position should be reset resorting
435 to reset_io_cache (which may issue a flush_io_cache
436 inside)
437
438 @return The previous write position.
439 */
reset_write_pos(my_off_t pos,bool use_reinit)440 my_off_t reset_write_pos(my_off_t pos, bool use_reinit)
441 {
442 DBUG_ENTER("reset_write_pos");
443 DBUG_ASSERT(cache_log.type == WRITE_CACHE);
444
445 my_off_t oldpos= get_byte_position();
446
447 if (use_reinit)
448 reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, 0);
449 else
450 my_b_seek(&cache_log, pos);
451
452 DBUG_RETURN(oldpos);
453 }
454
455 /*
456 Cache to store data before copying it to the binary log.
457 */
458 IO_CACHE cache_log;
459
460 /**
461 The group cache for this cache.
462 */
463 Group_cache group_cache;
464
465 protected:
466 /*
467 It truncates the cache to a certain position. This includes deleting the
468 pending event.
469 */
truncate(my_off_t pos)470 void truncate(my_off_t pos)
471 {
472 DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos));
473 remove_pending_event();
474 /*
475 Whenever there is an error while flushing cache to file,
476 the local cache will not be in a normal state and the same
477 cache cannot be used without facing an assert.
478 So, clear the cache if there is a flush error.
479 */
480 reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, get_flush_error());
481 cache_log.end_of_file= saved_max_binlog_cache_size;
482 }
483
484 /**
485 Flush pending event to the cache buffer.
486 */
flush_pending_event(THD * thd)487 int flush_pending_event(THD *thd) {
488 if (m_pending)
489 {
490 m_pending->set_flags(Rows_log_event::STMT_END_F);
491 if (int error= write_event(thd, m_pending))
492 return error;
493 thd->clear_binlog_table_maps();
494 }
495 return 0;
496 }
497
498 /**
499 Remove the pending event.
500 */
remove_pending_event()501 int remove_pending_event() {
502 delete m_pending;
503 m_pending= NULL;
504 return 0;
505 }
506 struct Flags {
507 /*
508 Defines if this is either a trx-cache or stmt-cache, respectively, a
509 transactional or non-transactional cache.
510 */
511 bool transactional:1;
512
513 /*
514 This indicates that some events did not get into the cache and most likely
515 it is corrupted.
516 */
517 bool incident:1;
518
519 /*
520 This indicates that the cache should be written without BEGIN/END.
521 */
522 bool immediate:1;
523
524 /*
525 This flag indicates that the buffer was finalized and has to be
526 flushed to disk.
527 */
528 bool finalized:1;
529
530 /*
531 This indicates that the cache contain an XID event.
532 */
533 bool with_xid:1;
534
535 /*
536 This flag is set to 'true' when there is an error while flushing the
537 I/O cache to file.
538 */
539 bool flush_error:1;
540 } flags;
541
542 private:
543 /*
544 Pending binrows event. This event is the event where the rows are currently
545 written.
546 */
547 Rows_log_event *m_pending;
548
549 /**
550 This function computes binlog cache and disk usage.
551 */
compute_statistics()552 void compute_statistics()
553 {
554 if (!is_binlog_empty())
555 {
556 statistic_increment(*ptr_binlog_cache_use, &LOCK_status);
557 if (cache_log.disk_writes != 0)
558 statistic_increment(*ptr_binlog_cache_disk_use, &LOCK_status);
559 }
560 }
561
562 /*
563 Stores the values of maximum size of the cache allowed when this cache
564 is configured. This corresponds to either
565 . max_binlog_cache_size or max_binlog_stmt_cache_size.
566 */
567 my_off_t saved_max_binlog_cache_size;
568
569 /*
570 Stores a pointer to the status variable that keeps track of the in-memory
571 cache usage. This corresponds to either
572 . binlog_cache_use or binlog_stmt_cache_use.
573 */
574 ulong *ptr_binlog_cache_use;
575
576 /*
577 Stores a pointer to the status variable that keeps track of the disk
578 cache usage. This corresponds to either
579 . binlog_cache_disk_use or binlog_stmt_cache_disk_use.
580 */
581 ulong *ptr_binlog_cache_disk_use;
582
583 binlog_cache_data& operator=(const binlog_cache_data& info);
584 binlog_cache_data(const binlog_cache_data& info);
585 };
586
587
588 class binlog_stmt_cache_data
589 : public binlog_cache_data
590 {
591 public:
binlog_stmt_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg)592 binlog_stmt_cache_data(bool trx_cache_arg,
593 my_off_t max_binlog_cache_size_arg,
594 ulong *ptr_binlog_cache_use_arg,
595 ulong *ptr_binlog_cache_disk_use_arg)
596 : binlog_cache_data(trx_cache_arg,
597 max_binlog_cache_size_arg,
598 ptr_binlog_cache_use_arg,
599 ptr_binlog_cache_disk_use_arg)
600 {
601 }
602
603 using binlog_cache_data::finalize;
604
605 int finalize(THD *thd);
606 };
607
608
609 int
finalize(THD * thd)610 binlog_stmt_cache_data::finalize(THD *thd)
611 {
612 if (flags.immediate)
613 {
614 if (int error= finalize(thd, NULL))
615 return error;
616 }
617 else
618 {
619 Query_log_event
620 end_evt(thd, STRING_WITH_LEN("COMMIT"), false, false, true, 0, true);
621 if (int error= finalize(thd, &end_evt))
622 return error;
623 }
624 return 0;
625 }
626
627
628 class binlog_trx_cache_data : public binlog_cache_data
629 {
630 public:
binlog_trx_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg)631 binlog_trx_cache_data(bool trx_cache_arg,
632 my_off_t max_binlog_cache_size_arg,
633 ulong *ptr_binlog_cache_use_arg,
634 ulong *ptr_binlog_cache_disk_use_arg)
635 : binlog_cache_data(trx_cache_arg,
636 max_binlog_cache_size_arg,
637 ptr_binlog_cache_use_arg,
638 ptr_binlog_cache_disk_use_arg),
639 m_cannot_rollback(FALSE), before_stmt_pos(MY_OFF_T_UNDEF)
640 { }
641
reset()642 void reset()
643 {
644 DBUG_ENTER("reset");
645 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
646 m_cannot_rollback= FALSE;
647 before_stmt_pos= MY_OFF_T_UNDEF;
648 binlog_cache_data::reset();
649 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
650 DBUG_VOID_RETURN;
651 }
652
cannot_rollback() const653 bool cannot_rollback() const
654 {
655 return m_cannot_rollback;
656 }
657
set_cannot_rollback()658 void set_cannot_rollback()
659 {
660 m_cannot_rollback= TRUE;
661 }
662
get_prev_position() const663 my_off_t get_prev_position() const
664 {
665 return before_stmt_pos;
666 }
667
set_prev_position(my_off_t pos)668 void set_prev_position(my_off_t pos)
669 {
670 DBUG_ENTER("set_prev_position");
671 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
672 before_stmt_pos= pos;
673 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
674 DBUG_VOID_RETURN;
675 }
676
restore_prev_position()677 void restore_prev_position()
678 {
679 DBUG_ENTER("restore_prev_position");
680 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
681 binlog_cache_data::truncate(before_stmt_pos);
682 before_stmt_pos= MY_OFF_T_UNDEF;
683 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
684 DBUG_VOID_RETURN;
685 }
686
restore_savepoint(my_off_t pos)687 void restore_savepoint(my_off_t pos)
688 {
689 DBUG_ENTER("restore_savepoint");
690 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
691 binlog_cache_data::truncate(pos);
692 if (pos <= before_stmt_pos)
693 before_stmt_pos= MY_OFF_T_UNDEF;
694 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
695 DBUG_VOID_RETURN;
696 }
697
698 using binlog_cache_data::truncate;
699
700 int truncate(THD *thd, bool all);
701
702 private:
703 /*
704 It will be set TRUE if any statement which cannot be rolled back safely
705 is put in trx_cache.
706 */
707 bool m_cannot_rollback;
708
709 /*
710 Binlog position before the start of the current statement.
711 */
712 my_off_t before_stmt_pos;
713
714 binlog_trx_cache_data& operator=(const binlog_trx_cache_data& info);
715 binlog_trx_cache_data(const binlog_trx_cache_data& info);
716 };
717
718 class binlog_cache_mngr {
719 public:
binlog_cache_mngr(my_off_t max_binlog_stmt_cache_size_arg,ulong * ptr_binlog_stmt_cache_use_arg,ulong * ptr_binlog_stmt_cache_disk_use_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg)720 binlog_cache_mngr(my_off_t max_binlog_stmt_cache_size_arg,
721 ulong *ptr_binlog_stmt_cache_use_arg,
722 ulong *ptr_binlog_stmt_cache_disk_use_arg,
723 my_off_t max_binlog_cache_size_arg,
724 ulong *ptr_binlog_cache_use_arg,
725 ulong *ptr_binlog_cache_disk_use_arg)
726 : stmt_cache(FALSE, max_binlog_stmt_cache_size_arg,
727 ptr_binlog_stmt_cache_use_arg,
728 ptr_binlog_stmt_cache_disk_use_arg),
729 trx_cache(TRUE, max_binlog_cache_size_arg,
730 ptr_binlog_cache_use_arg,
731 ptr_binlog_cache_disk_use_arg)
732 { }
733
get_binlog_cache_data(bool is_transactional)734 binlog_cache_data* get_binlog_cache_data(bool is_transactional)
735 {
736 if (is_transactional)
737 return &trx_cache;
738 else
739 return &stmt_cache;
740 }
741
get_binlog_cache_log(bool is_transactional)742 IO_CACHE* get_binlog_cache_log(bool is_transactional)
743 {
744 return (is_transactional ? &trx_cache.cache_log : &stmt_cache.cache_log);
745 }
746
747 /**
748 Convenience method to check if both caches are empty.
749 */
is_binlog_empty() const750 bool is_binlog_empty() const {
751 return stmt_cache.is_binlog_empty() && trx_cache.is_binlog_empty();
752 }
753
754 /*
755 clear stmt_cache and trx_cache if they are not empty
756 */
reset()757 void reset()
758 {
759 if (!stmt_cache.is_binlog_empty())
760 stmt_cache.reset();
761 if (!trx_cache.is_binlog_empty())
762 trx_cache.reset();
763 }
764
765 #ifndef DBUG_OFF
dbug_any_finalized() const766 bool dbug_any_finalized() const {
767 return stmt_cache.dbug_is_finalized() || trx_cache.dbug_is_finalized();
768 }
769 #endif
770
771 /*
772 Convenience method to flush both caches to the binary log.
773
774 @param bytes_written Pointer to variable that will be set to the
775 number of bytes written for the flush.
776 @param wrote_xid Pointer to variable that will be set to @c
777 true if any XID event was written to the
778 binary log. Otherwise, the variable will not
779 be touched.
780 @return Error code on error, zero if no error.
781 */
flush(THD * thd,my_off_t * bytes_written,bool * wrote_xid)782 int flush(THD *thd, my_off_t *bytes_written, bool *wrote_xid)
783 {
784 my_off_t stmt_bytes= 0;
785 my_off_t trx_bytes= 0;
786 DBUG_ASSERT(stmt_cache.has_xid() == 0);
787 if (int error= stmt_cache.flush(thd, &stmt_bytes, wrote_xid))
788 return error;
789 if (int error= trx_cache.flush(thd, &trx_bytes, wrote_xid))
790 return error;
791 *bytes_written= stmt_bytes + trx_bytes;
792 return 0;
793 }
794
795 binlog_stmt_cache_data stmt_cache;
796 binlog_trx_cache_data trx_cache;
797
798 private:
799
800 binlog_cache_mngr& operator=(const binlog_cache_mngr& info);
801 binlog_cache_mngr(const binlog_cache_mngr& info);
802 };
803
804
thd_get_cache_mngr(const THD * thd)805 static binlog_cache_mngr *thd_get_cache_mngr(const THD *thd)
806 {
807 /*
808 If opt_bin_log is not set, binlog_hton->slot == -1 and hence
809 thd_get_ha_data(thd, hton) segfaults.
810 */
811 DBUG_ASSERT(opt_bin_log);
812 return (binlog_cache_mngr *)thd_get_ha_data(thd, binlog_hton);
813 }
814
815
816 /**
817 Checks if the BINLOG_CACHE_SIZE's value is greater than MAX_BINLOG_CACHE_SIZE.
818 If this happens, the BINLOG_CACHE_SIZE is set to MAX_BINLOG_CACHE_SIZE.
819 */
check_binlog_cache_size(THD * thd)820 void check_binlog_cache_size(THD *thd)
821 {
822 if (binlog_cache_size > max_binlog_cache_size)
823 {
824 if (thd)
825 {
826 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
827 ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX,
828 ER(ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX),
829 (ulong) binlog_cache_size,
830 (ulong) max_binlog_cache_size);
831 }
832 else
833 {
834 sql_print_warning(ER_DEFAULT(ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX),
835 (ulong) binlog_cache_size,
836 (ulong) max_binlog_cache_size);
837 }
838 binlog_cache_size= max_binlog_cache_size;
839 }
840 }
841
842 /**
843 Checks if the BINLOG_STMT_CACHE_SIZE's value is greater than MAX_BINLOG_STMT_CACHE_SIZE.
844 If this happens, the BINLOG_STMT_CACHE_SIZE is set to MAX_BINLOG_STMT_CACHE_SIZE.
845 */
check_binlog_stmt_cache_size(THD * thd)846 void check_binlog_stmt_cache_size(THD *thd)
847 {
848 if (binlog_stmt_cache_size > max_binlog_stmt_cache_size)
849 {
850 if (thd)
851 {
852 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
853 ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX,
854 ER(ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX),
855 (ulong) binlog_stmt_cache_size,
856 (ulong) max_binlog_stmt_cache_size);
857 }
858 else
859 {
860 sql_print_warning(ER_DEFAULT(ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX),
861 (ulong) binlog_stmt_cache_size,
862 (ulong) max_binlog_stmt_cache_size);
863 }
864 binlog_stmt_cache_size= max_binlog_stmt_cache_size;
865 }
866 }
867
868 /**
869 Check whether binlog_hton has valid slot and enabled
870 */
binlog_enabled()871 bool binlog_enabled()
872 {
873 return(binlog_hton && binlog_hton->slot != HA_SLOT_UNDEF);
874 }
875
876 /*
877 Save position of binary log transaction cache.
878
879 SYNPOSIS
880 binlog_trans_log_savepos()
881
882 thd The thread to take the binlog data from
883 pos Pointer to variable where the position will be stored
884
885 DESCRIPTION
886
887 Save the current position in the binary log transaction cache into
888 the variable pointed to by 'pos'
889 */
890
891 static void
binlog_trans_log_savepos(THD * thd,my_off_t * pos)892 binlog_trans_log_savepos(THD *thd, my_off_t *pos)
893 {
894 DBUG_ENTER("binlog_trans_log_savepos");
895 DBUG_ASSERT(pos != NULL);
896 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
897 DBUG_ASSERT(mysql_bin_log.is_open());
898 *pos= cache_mngr->trx_cache.get_byte_position();
899 DBUG_PRINT("return", ("position: %lu", (ulong) *pos));
900 DBUG_VOID_RETURN;
901 }
902
903
904 /*
905 this function is mostly a placeholder.
906 conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open)
907 should be moved here.
908 */
909
binlog_init(void * p)910 static int binlog_init(void *p)
911 {
912 binlog_hton= (handlerton *)p;
913 binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO;
914 binlog_hton->db_type=DB_TYPE_BINLOG;
915 binlog_hton->savepoint_offset= sizeof(my_off_t);
916 binlog_hton->close_connection= binlog_close_connection;
917 binlog_hton->savepoint_set= binlog_savepoint_set;
918 binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
919 binlog_hton->savepoint_rollback_can_release_mdl=
920 binlog_savepoint_rollback_can_release_mdl;
921 binlog_hton->commit= binlog_commit;
922 binlog_hton->rollback= binlog_rollback;
923 binlog_hton->prepare= binlog_prepare;
924 binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
925 return 0;
926 }
927
binlog_close_connection(handlerton * hton,THD * thd)928 static int binlog_close_connection(handlerton *hton, THD *thd)
929 {
930 DBUG_ENTER("binlog_close_connection");
931 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
932 DBUG_ASSERT(cache_mngr->is_binlog_empty());
933 DBUG_ASSERT(cache_mngr->trx_cache.is_group_cache_empty() &&
934 cache_mngr->stmt_cache.is_group_cache_empty());
935 DBUG_PRINT("debug", ("Set ha_data slot %d to 0x%llx", binlog_hton->slot, (ulonglong) NULL));
936 thd_set_ha_data(thd, binlog_hton, NULL);
937 cache_mngr->~binlog_cache_mngr();
938 my_free(cache_mngr);
939 DBUG_RETURN(0);
940 }
941
write_event(THD * thd,Log_event * ev)942 int binlog_cache_data::write_event(THD *thd, Log_event *ev)
943 {
944 DBUG_ENTER("binlog_cache_data::write_event");
945
946 if (gtid_mode > 0)
947 {
948 Group_cache::enum_add_group_status status=
949 group_cache.add_logged_group(thd, get_byte_position());
950 if (status == Group_cache::ERROR)
951 DBUG_RETURN(1);
952 else if (status == Group_cache::APPEND_NEW_GROUP)
953 {
954 Gtid_log_event gtid_ev(thd, is_trx_cache());
955 if (gtid_ev.write(&cache_log) != 0)
956 DBUG_RETURN(1);
957 }
958 }
959
960 if (ev != NULL)
961 {
962 DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
963 {DBUG_SET("+d,simulate_file_write_error");});
964
965 DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
966 {
967 static int count= -1;
968 count++;
969 if(count % 4 == 3 && ev->get_type_code() == WRITE_ROWS_EVENT)
970 DBUG_SET("+d,simulate_temp_file_write_error");
971 });
972 if (ev->write(&cache_log) != 0)
973 {
974 DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
975 {
976 DBUG_SET("-d,simulate_file_write_error");
977 DBUG_SET("-d,simulate_disk_full_at_flush_pending");
978 /*
979 after +d,simulate_file_write_error the local cache
980 is in unsane state. Since -d,simulate_file_write_error
981 revokes the first simulation do_write_cache()
982 can't be run without facing an assert.
983 So it's blocked with the following 2nd simulation:
984 */
985 DBUG_SET("+d,simulate_do_write_cache_failure");
986 });
987
988 DBUG_EXECUTE_IF("simulate_temp_file_write_error",
989 {
990 DBUG_SET("-d,simulate_temp_file_write_error");
991 });
992 /*
993 If the flush has failed due to ENOSPC error, set the
994 flush_error flag.
995 */
996 if (thd->is_error() && my_errno == ENOSPC)
997 {
998 set_flush_error(thd);
999 }
1000 DBUG_RETURN(1);
1001 }
1002 if (ev->get_type_code() == XID_EVENT)
1003 flags.with_xid= true;
1004 if (ev->is_using_immediate_logging())
1005 flags.immediate= true;
1006 }
1007 DBUG_RETURN(0);
1008 }
1009
1010
1011 /**
1012 Checks if the given GTID exists in the Group_cache. If not, add it
1013 as an empty group.
1014
1015 @todo Move this function into the cache class?
1016
1017 @param thd THD object that owns the Group_cache
1018 @param cache_data binlog_cache_data object for the cache
1019 @param gtid GTID to check
1020 */
write_one_empty_group_to_cache(THD * thd,binlog_cache_data * cache_data,Gtid gtid)1021 static int write_one_empty_group_to_cache(THD *thd,
1022 binlog_cache_data *cache_data,
1023 Gtid gtid)
1024 {
1025 DBUG_ENTER("write_one_empty_group_to_cache");
1026 Group_cache *group_cache= &cache_data->group_cache;
1027 if (group_cache->contains_gtid(gtid))
1028 DBUG_RETURN(0);
1029 /*
1030 Apparently this code is not being called. We need to
1031 investigate if this is a bug or this code is not
1032 necessary. /Alfranio
1033
1034 Empty groups are currently being handled in the function
1035 gtid_empty_group_log_and_cleanup().
1036 */
1037 DBUG_ASSERT(0); /*NOTREACHED*/
1038 #ifdef NON_ERROR_GTID
1039 IO_CACHE *cache= &cache_data->cache_log;
1040 Group_cache::enum_add_group_status status= group_cache->add_empty_group(gtid);
1041 if (status == Group_cache::ERROR)
1042 DBUG_RETURN(1);
1043 DBUG_ASSERT(status == Group_cache::APPEND_NEW_GROUP);
1044 Gtid_specification spec= { GTID_GROUP, gtid };
1045 Gtid_log_event gtid_ev(thd, cache_data->is_trx_cache(), &spec);
1046 if (gtid_ev.write(cache) != 0)
1047 DBUG_RETURN(1);
1048 #endif
1049 DBUG_RETURN(0);
1050 }
1051
1052 /**
1053 Writes all GTIDs that the thread owns to the stmt/trx cache, if the
1054 GTID is not already in the cache.
1055
1056 @todo Move this function into the cache class?
1057
1058 @param thd THD object for the thread that owns the cache.
1059 @param cache_data The cache.
1060 */
write_empty_groups_to_cache(THD * thd,binlog_cache_data * cache_data)1061 static int write_empty_groups_to_cache(THD *thd, binlog_cache_data *cache_data)
1062 {
1063 DBUG_ENTER("write_empty_groups_to_cache");
1064 if (thd->owned_gtid.sidno == -1)
1065 {
1066 #ifdef HAVE_GTID_NEXT_LIST
1067 Gtid_set::Gtid_iterator git(&thd->owned_gtid_set);
1068 Gtid gtid= git.get();
1069 while (gtid.sidno != 0)
1070 {
1071 if (write_one_empty_group_to_cache(thd, cache_data, gtid) != 0)
1072 DBUG_RETURN(1);
1073 git.next();
1074 gtid= git.get();
1075 }
1076 #else
1077 DBUG_ASSERT(0);
1078 #endif
1079 }
1080 else if (thd->owned_gtid.sidno > 0)
1081 if (write_one_empty_group_to_cache(thd, cache_data, thd->owned_gtid) != 0)
1082 DBUG_RETURN(1);
1083 DBUG_RETURN(0);
1084 }
1085
1086
1087 /**
1088
1089 @todo Move this function into the cache class?
1090 */
1091 static int
gtid_before_write_cache(THD * thd,binlog_cache_data * cache_data)1092 gtid_before_write_cache(THD* thd, binlog_cache_data* cache_data)
1093 {
1094 DBUG_ENTER("gtid_before_write_cache");
1095 int error= 0;
1096
1097 DBUG_ASSERT(thd->variables.gtid_next.type != UNDEFINED_GROUP);
1098
1099 if (gtid_mode == 0)
1100 DBUG_RETURN(0);
1101
1102 Group_cache* group_cache= &cache_data->group_cache;
1103
1104 global_sid_lock->rdlock();
1105
1106 if (thd->variables.gtid_next.type == AUTOMATIC_GROUP)
1107 {
1108 if (group_cache->generate_automatic_gno(thd) !=
1109 RETURN_STATUS_OK)
1110 {
1111 global_sid_lock->unlock();
1112 DBUG_RETURN(1);
1113 }
1114 }
1115 if (write_empty_groups_to_cache(thd, cache_data) != 0)
1116 {
1117 global_sid_lock->unlock();
1118 DBUG_RETURN(1);
1119 }
1120
1121 global_sid_lock->unlock();
1122
1123 /*
1124 If an automatic group number was generated, change the first event
1125 into a "real" one.
1126 */
1127 if (thd->variables.gtid_next.type == AUTOMATIC_GROUP)
1128 {
1129 DBUG_ASSERT(group_cache->get_n_groups() == 1);
1130 Cached_group *cached_group= group_cache->get_unsafe_pointer(0);
1131 DBUG_ASSERT(cached_group->spec.type != AUTOMATIC_GROUP);
1132 Gtid_log_event gtid_ev(thd, cache_data->is_trx_cache(),
1133 &cached_group->spec);
1134 bool using_file= cache_data->cache_log.pos_in_file > 0;
1135
1136 DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
1137 {
1138 DBUG_SET("+d,simulate_temp_file_write_error");
1139 });
1140
1141 my_off_t saved_position= cache_data->reset_write_pos(0, using_file);
1142
1143 if (!cache_data->cache_log.error)
1144 {
1145 if (gtid_ev.write(&cache_data->cache_log))
1146 goto err;
1147 cache_data->reset_write_pos(saved_position, using_file);
1148 }
1149
1150 if (cache_data->cache_log.error)
1151 goto err;
1152 }
1153
1154 DBUG_RETURN(error);
1155
1156 err:
1157 DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
1158 {
1159 DBUG_SET("-d,simulate_temp_file_write_error");
1160 });
1161 /*
1162 If the reinit_io_cache has failed, set the flush_error flag.
1163 */
1164 if (cache_data->cache_log.error)
1165 {
1166 cache_data->set_flush_error(thd);
1167 }
1168 DBUG_RETURN(1);
1169
1170 }
1171
1172 /**
1173 The function logs an empty group with GTID and performs cleanup.
1174 Its logic wrt GTID is equivalent to one of binlog_commit().
1175 It's called at the end of statement execution in case binlog_commit()
1176 was skipped.
1177 Such cases are due ineffective binlogging incl an empty group
1178 re-execution.
1179
1180 @param thd The thread handle
1181
1182 @return
1183 nonzero if an error pops up.
1184 */
gtid_empty_group_log_and_cleanup(THD * thd)1185 int gtid_empty_group_log_and_cleanup(THD *thd)
1186 {
1187 int ret= 1;
1188 binlog_cache_data* cache_data= NULL;
1189
1190 DBUG_ENTER("gtid_empty_group_log_and_cleanup");
1191
1192 Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE,
1193 FALSE, TRUE, 0, TRUE);
1194 DBUG_ASSERT(!qinfo.is_using_immediate_logging());
1195
1196 /*
1197 thd->cache_mngr is uninitialized on the first empty transaction.
1198 */
1199 if (thd->binlog_setup_trx_data())
1200 DBUG_RETURN(1);
1201 cache_data= &thd_get_cache_mngr(thd)->trx_cache;
1202 DBUG_PRINT("debug", ("Writing to trx_cache"));
1203 if (cache_data->write_event(thd, &qinfo) ||
1204 gtid_before_write_cache(thd, cache_data))
1205 goto err;
1206
1207 ret= mysql_bin_log.commit(thd, true);
1208
1209 err:
1210 DBUG_RETURN(ret);
1211 }
1212
1213 /**
1214 This function finalizes the cache preparing for commit or rollback.
1215
1216 The function just writes all the necessary events to the cache but
1217 does not flush the data to the binary log file. That is the role of
1218 the binlog_cache_data::flush function.
1219
1220 @see binlog_cache_data::flush
1221
1222 @param thd The thread whose transaction should be flushed
1223 @param cache_data Pointer to the cache
1224 @param end_ev The end event either commit/rollback
1225
1226 @return
1227 nonzero if an error pops up when flushing the cache.
1228 */
1229 int
finalize(THD * thd,Log_event * end_event)1230 binlog_cache_data::finalize(THD *thd, Log_event *end_event)
1231 {
1232 DBUG_ENTER("binlog_cache_data::finalize");
1233 if (!is_binlog_empty())
1234 {
1235 DBUG_ASSERT(!flags.finalized);
1236 if (int error= flush_pending_event(thd))
1237 DBUG_RETURN(error);
1238 if (int error= write_event(thd, end_event))
1239 DBUG_RETURN(error);
1240 flags.finalized= true;
1241 DBUG_PRINT("debug", ("flags.finalized: %s", YESNO(flags.finalized)));
1242 }
1243 DBUG_RETURN(0);
1244 }
1245
1246 /**
1247 Flush caches to the binary log.
1248
1249 If the cache is finalized, the cache will be flushed to the binary
1250 log file. If the cache is not finalized, nothing will be done.
1251
1252 If flushing fails for any reason, an error will be reported and the
1253 cache will be reset. Flushing can fail in two circumstances:
1254
1255 - It was not possible to write the cache to the file. In this case,
1256 it does not make sense to keep the cache.
1257
1258 - The cache was successfully written to disk but post-flush actions
1259 (such as binary log rotation) failed. In this case, the cache is
1260 already written to disk and there is no reason to keep it.
1261
1262 @see binlog_cache_data::finalize
1263 */
1264 int
flush(THD * thd,my_off_t * bytes_written,bool * wrote_xid)1265 binlog_cache_data::flush(THD *thd, my_off_t *bytes_written, bool *wrote_xid)
1266 {
1267 /*
1268 Doing a commit or a rollback including non-transactional tables,
1269 i.e., ending a transaction where we might write the transaction
1270 cache to the binary log.
1271
1272 We can always end the statement when ending a transaction since
1273 transactions are not allowed inside stored functions. If they
1274 were, we would have to ensure that we're not ending a statement
1275 inside a stored function.
1276 */
1277 DBUG_ENTER("binlog_cache_data::flush");
1278 DBUG_PRINT("debug", ("flags.finalized: %s", YESNO(flags.finalized)));
1279 int error= 0;
1280 if (flags.finalized)
1281 {
1282 my_off_t bytes_in_cache= my_b_tell(&cache_log);
1283 DBUG_PRINT("debug", ("bytes_in_cache: %llu", bytes_in_cache));
1284 /*
1285 The cache is always reset since subsequent rollbacks of the
1286 transactions might trigger attempts to write to the binary log
1287 if the cache is not reset.
1288 */
1289 if (!(error= gtid_before_write_cache(thd, this)))
1290 error= mysql_bin_log.write_cache(thd, this);
1291 else
1292 thd->commit_error= THD::CE_FLUSH_ERROR;
1293
1294 if (flags.with_xid && error == 0)
1295 *wrote_xid= true;
1296
1297 /*
1298 Reset have to be after the if above, since it clears the
1299 with_xid flag
1300 */
1301 reset();
1302 if (bytes_written)
1303 *bytes_written= bytes_in_cache;
1304 }
1305 DBUG_ASSERT(!flags.finalized);
1306 DBUG_RETURN(error);
1307 }
1308
1309 /**
1310 This function truncates the transactional cache upon committing or rolling
1311 back either a transaction or a statement.
1312
1313 @param thd The thread whose transaction should be flushed
1314 @param cache_mngr Pointer to the cache data to be flushed
1315 @param all @c true means truncate the transaction, otherwise the
1316 statement must be truncated.
1317
1318 @return
1319 nonzero if an error pops up when truncating the transactional cache.
1320 */
1321 int
truncate(THD * thd,bool all)1322 binlog_trx_cache_data::truncate(THD *thd, bool all)
1323 {
1324 DBUG_ENTER("binlog_trx_cache_data::truncate");
1325 int error=0;
1326
1327 DBUG_PRINT("info", ("thd->options={ %s %s}, transaction: %s",
1328 FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
1329 FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
1330 all ? "all" : "stmt"));
1331
1332 remove_pending_event();
1333
1334 /*
1335 If rolling back an entire transaction or a single statement not
1336 inside a transaction, we reset the transaction cache.
1337 */
1338 if (ending_trans(thd, all))
1339 {
1340 if (has_incident())
1341 error= mysql_bin_log.write_incident(thd, true/*need_lock_log=true*/);
1342 reset();
1343 }
1344 /*
1345 If rolling back a statement in a transaction, we truncate the
1346 transaction cache to remove the statement.
1347 */
1348 else if (get_prev_position() != MY_OFF_T_UNDEF)
1349 {
1350 restore_prev_position();
1351 if (is_binlog_empty())
1352 {
1353 /*
1354 After restoring the previous position, we need to check if
1355 the cache is empty. In such case, the group cache needs to
1356 be cleaned up too because the GTID is removed too from the
1357 cache.
1358
1359 So if any change happens again, the GTID must be rewritten
1360 and this will not happen if the group cache is not cleaned
1361 up.
1362
1363 After integrating this with NDB, we need to check if the
1364 current approach is enough or the group cache needs to
1365 explicitly support rollback to savepoints.
1366 */
1367 group_cache.clear();
1368 }
1369 }
1370
1371 thd->clear_binlog_table_maps();
1372
1373 DBUG_RETURN(error);
1374 }
1375
binlog_prepare(handlerton * hton,THD * thd,bool all)1376 static int binlog_prepare(handlerton *hton, THD *thd, bool all)
1377 {
1378 /*
1379 do nothing.
1380 just pretend we can do 2pc, so that MySQL won't
1381 switch to 1pc.
1382 real work will be done in MYSQL_BIN_LOG::commit()
1383 */
1384 return 0;
1385 }
1386
1387 /**
1388 This function is called once after each statement.
1389
1390 @todo This function is currently not used any more and will
1391 eventually be eliminated. The real commit job is done in the
1392 MYSQL_BIN_LOG::commit function.
1393
1394 @see MYSQL_BIN_LOG::commit
1395
1396 @param hton The binlog handlerton.
1397 @param thd The client thread that executes the transaction.
1398 @param all This is @c true if this is a real transaction commit, and
1399 @false otherwise.
1400
1401 @see handlerton::commit
1402 */
binlog_commit(handlerton * hton,THD * thd,bool all)1403 static int binlog_commit(handlerton *hton, THD *thd, bool all)
1404 {
1405 DBUG_ENTER("binlog_commit");
1406 /*
1407 Nothing to do (any more) on commit.
1408 */
1409 DBUG_RETURN(0);
1410 }
1411
1412 /**
1413 This function is called when a transaction or a statement is rolled back.
1414
1415 @internal It is necessary to execute a rollback here if the
1416 transaction was rolled back because of executing a ROLLBACK TO
1417 SAVEPOINT command, but it is not used for normal rollback since
1418 MYSQL_BIN_LOG::rollback is called in that case.
1419
1420 @todo Refactor code to introduce a <code>MYSQL_BIN_LOG::rollback(THD
1421 *thd, SAVEPOINT *sv)</code> function in @c TC_LOG and have that
1422 function execute the necessary work to rollback to a savepoint.
1423
1424 @param hton The binlog handlerton.
1425 @param thd The client thread that executes the transaction.
1426 @param all This is @c true if this is a real transaction rollback, and
1427 @false otherwise.
1428
1429 @see handlerton::rollback
1430 */
binlog_rollback(handlerton * hton,THD * thd,bool all)1431 static int binlog_rollback(handlerton *hton, THD *thd, bool all)
1432 {
1433 DBUG_ENTER("binlog_rollback");
1434 int error= 0;
1435 if (thd->lex->sql_command == SQLCOM_ROLLBACK_TO_SAVEPOINT)
1436 error= mysql_bin_log.rollback(thd, all);
1437 DBUG_RETURN(error);
1438 }
1439
1440
1441 bool
append(THD * first)1442 Stage_manager::Mutex_queue::append(THD *first)
1443 {
1444 DBUG_ENTER("Stage_manager::Mutex_queue::append");
1445 lock();
1446 DBUG_PRINT("enter", ("first: 0x%llx", (ulonglong) first));
1447 DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
1448 (ulonglong) m_first, (ulonglong) &m_first,
1449 (ulonglong) m_last));
1450 bool empty= (m_first == NULL);
1451 *m_last= first;
1452 DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
1453 (ulonglong) m_first, (ulonglong) &m_first,
1454 (ulonglong) m_last));
1455 /*
1456 Go to the last THD instance of the list. We expect lists to be
1457 moderately short. If they are not, we need to track the end of
1458 the queue as well.
1459 */
1460 while (first->next_to_commit)
1461 first= first->next_to_commit;
1462 m_last= &first->next_to_commit;
1463 DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
1464 (ulonglong) m_first, (ulonglong) &m_first,
1465 (ulonglong) m_last));
1466 DBUG_ASSERT(m_first || m_last == &m_first);
1467 DBUG_PRINT("return", ("empty: %s", YESNO(empty)));
1468 unlock();
1469 DBUG_RETURN(empty);
1470 }
1471
1472
1473 std::pair<bool, THD*>
pop_front()1474 Stage_manager::Mutex_queue::pop_front()
1475 {
1476 DBUG_ENTER("Stage_manager::Mutex_queue::pop_front");
1477 lock();
1478 THD *result= m_first;
1479 bool more= true;
1480 /*
1481 We do not set next_to_commit to NULL here since this is only used
1482 in the flush stage. We will have to call fetch_queue last here,
1483 and will then "cut" the linked list by setting the end of that
1484 queue to NULL.
1485 */
1486 if (result)
1487 m_first= result->next_to_commit;
1488 if (m_first == NULL)
1489 {
1490 more= false;
1491 m_last = &m_first;
1492 }
1493 DBUG_ASSERT(m_first || m_last == &m_first);
1494 unlock();
1495 DBUG_PRINT("return", ("result: 0x%llx, more: %s",
1496 (ulonglong) result, YESNO(more)));
1497 DBUG_RETURN(std::make_pair(more, result));
1498 }
1499
1500
1501 bool
enroll_for(StageID stage,THD * thd,mysql_mutex_t * stage_mutex)1502 Stage_manager::enroll_for(StageID stage, THD *thd, mysql_mutex_t *stage_mutex)
1503 {
1504 // If the queue was empty: we're the leader for this batch
1505 DBUG_PRINT("debug", ("Enqueue 0x%llx to queue for stage %d",
1506 (ulonglong) thd, stage));
1507 bool leader= m_queue[stage].append(thd);
1508
1509 /*
1510 The stage mutex can be NULL if we are enrolling for the first
1511 stage.
1512 */
1513 if (stage_mutex)
1514 mysql_mutex_unlock(stage_mutex);
1515
1516 /*
1517 If the queue was not empty, we're a follower and wait for the
1518 leader to process the queue. If we were holding a mutex, we have
1519 to release it before going to sleep.
1520 */
1521 if (!leader)
1522 {
1523 mysql_mutex_lock(&m_lock_done);
1524 #ifndef DBUG_OFF
1525 /*
1526 Leader can be awaiting all-clear to preempt follower's execution.
1527 With setting the status the follower ensures it won't execute anything
1528 including thread-specific code.
1529 */
1530 thd->transaction.flags.ready_preempt= 1;
1531 if (leader_await_preempt_status)
1532 mysql_cond_signal(&m_cond_preempt);
1533 #endif
1534 while (thd->transaction.flags.pending)
1535 mysql_cond_wait(&m_cond_done, &m_lock_done);
1536 mysql_mutex_unlock(&m_lock_done);
1537 }
1538 return leader;
1539 }
1540
1541
fetch_and_empty()1542 THD *Stage_manager::Mutex_queue::fetch_and_empty()
1543 {
1544 DBUG_ENTER("Stage_manager::Mutex_queue::fetch_and_empty");
1545 lock();
1546 DBUG_PRINT("enter", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
1547 (ulonglong) m_first, (ulonglong) &m_first,
1548 (ulonglong) m_last));
1549 THD *result= m_first;
1550 m_first= NULL;
1551 m_last= &m_first;
1552 DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
1553 (ulonglong) m_first, (ulonglong) &m_first,
1554 (ulonglong) m_last));
1555 DBUG_ASSERT(m_first || m_last == &m_first);
1556 DBUG_PRINT("return", ("result: 0x%llx", (ulonglong) result));
1557 unlock();
1558 DBUG_RETURN(result);
1559 }
1560
1561 #ifndef DBUG_OFF
clear_preempt_status(THD * head)1562 void Stage_manager::clear_preempt_status(THD *head)
1563 {
1564 DBUG_ASSERT(head);
1565
1566 mysql_mutex_lock(&m_lock_done);
1567 while(!head->transaction.flags.ready_preempt)
1568 {
1569 leader_await_preempt_status= true;
1570 mysql_cond_wait(&m_cond_preempt, &m_lock_done);
1571 }
1572 leader_await_preempt_status= false;
1573 mysql_mutex_unlock(&m_lock_done);
1574 }
1575 #endif
1576
1577 /**
1578 Write a rollback record of the transaction to the binary log.
1579
1580 For binary log group commit, the rollback is separated into three
1581 parts:
1582
1583 1. First part consists of filling the necessary caches and
1584 finalizing them (if they need to be finalized). After a cache is
1585 finalized, nothing can be added to the cache.
1586
1587 2. Second part execute an ordered flush and commit. This will be
1588 done using the group commit functionality in @c ordered_commit.
1589
1590 Since we roll back the transaction early, we call @c
1591 ordered_commit with the @c skip_commit flag set. The @c
1592 ha_commit_low call inside @c ordered_commit will then not be
1593 called.
1594
1595 3. Third part checks any errors resulting from the flush and handles
1596 them appropriately.
1597
1598 @see MYSQL_BIN_LOG::ordered_commit
1599 @see ha_commit_low
1600 @see ha_rollback_low
1601
1602 @param thd Session to commit
1603 @param all This is @c true if this is a real transaction rollback, and
1604 @false otherwise.
1605
1606 @return Error code, or zero if there were no error.
1607 */
1608
rollback(THD * thd,bool all)1609 int MYSQL_BIN_LOG::rollback(THD *thd, bool all)
1610 {
1611 int error= 0;
1612 bool stuff_logged= false;
1613
1614 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
1615 DBUG_ENTER("MYSQL_BIN_LOG::rollback(THD *thd, bool all)");
1616 DBUG_PRINT("enter", ("all: %s, cache_mngr: 0x%llx, thd->is_error: %s",
1617 YESNO(all), (ulonglong) cache_mngr, YESNO(thd->is_error())));
1618
1619 /*
1620 We roll back the transaction in the engines early since this will
1621 release locks and allow other transactions to start executing.
1622
1623 If we are executing a ROLLBACK TO SAVEPOINT, we should only clear
1624 the caches since this function is called as part of the engine
1625 rollback.
1626 */
1627 if (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT)
1628 if ((error= ha_rollback_low(thd, all)))
1629 goto end;
1630
1631 /*
1632 If there is no cache manager, or if there is nothing in the
1633 caches, there are no caches to roll back, so we're trivially done.
1634 */
1635 if (cache_mngr == NULL || cache_mngr->is_binlog_empty())
1636 goto end;
1637
1638 DBUG_PRINT("debug",
1639 ("all.cannot_safely_rollback(): %s, trx_cache_empty: %s",
1640 YESNO(thd->transaction.all.cannot_safely_rollback()),
1641 YESNO(cache_mngr->trx_cache.is_binlog_empty())));
1642 DBUG_PRINT("debug",
1643 ("stmt.cannot_safely_rollback(): %s, stmt_cache_empty: %s",
1644 YESNO(thd->transaction.stmt.cannot_safely_rollback()),
1645 YESNO(cache_mngr->stmt_cache.is_binlog_empty())));
1646
1647 /*
1648 If an incident event is set we do not flush the content of the statement
1649 cache because it may be corrupted.
1650 */
1651 if (cache_mngr->stmt_cache.has_incident())
1652 {
1653 error= write_incident(thd, true/*need_lock_log=true*/);
1654 cache_mngr->stmt_cache.reset();
1655 }
1656 else if (!cache_mngr->stmt_cache.is_binlog_empty())
1657 {
1658 if ((error= cache_mngr->stmt_cache.finalize(thd)))
1659 goto end;
1660 stuff_logged= true;
1661 }
1662
1663 if (ending_trans(thd, all))
1664 {
1665 if (trans_cannot_safely_rollback(thd))
1666 {
1667 /*
1668 If the transaction is being rolled back and contains changes that
1669 cannot be rolled back, the trx-cache's content is flushed.
1670 */
1671 Query_log_event
1672 end_evt(thd, STRING_WITH_LEN("ROLLBACK"), true, false, true, 0, true);
1673 error= cache_mngr->trx_cache.finalize(thd, &end_evt);
1674 stuff_logged= true;
1675 }
1676 else
1677 {
1678 /*
1679 If the transaction is being rolled back and its changes can be
1680 rolled back, the trx-cache's content is truncated.
1681 */
1682 error= cache_mngr->trx_cache.truncate(thd, all);
1683 }
1684 }
1685 else
1686 {
1687 /*
1688 If a statement is being rolled back, it is necessary to know
1689 exactly why a statement may not be safely rolled back as in
1690 some specific situations the trx-cache can be truncated.
1691
1692 If a temporary table is created or dropped, the trx-cache is not
1693 truncated. Note that if the stmt-cache is used, there is nothing
1694 to truncate in the trx-cache.
1695
1696 If a non-transactional table is updated and the binlog format is
1697 statement, the trx-cache is not truncated. The trx-cache is used
1698 when the direct option is off and a transactional table has been
1699 updated before the current statement in the context of the
1700 current transaction. Note that if the stmt-cache is used there is
1701 nothing to truncate in the trx-cache.
1702
1703 If other binlog formats are used, updates to non-transactional
1704 tables are written to the stmt-cache and trx-cache can be safely
1705 truncated, if necessary.
1706 */
1707 if (thd->transaction.stmt.has_dropped_temp_table() ||
1708 thd->transaction.stmt.has_created_temp_table() ||
1709 (thd->transaction.stmt.has_modified_non_trans_table() &&
1710 thd->variables.binlog_format == BINLOG_FORMAT_STMT))
1711 {
1712 /*
1713 If the statement is being rolled back and dropped or created a
1714 temporary table or modified a non-transactional table and the
1715 statement-based replication is in use, the statement's changes
1716 in the trx-cache are preserved.
1717 */
1718 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
1719 }
1720 else
1721 {
1722 /*
1723 Otherwise, the statement's changes in the trx-cache are
1724 truncated.
1725 */
1726 error= cache_mngr->trx_cache.truncate(thd, all);
1727 }
1728 }
1729
1730 DBUG_PRINT("debug", ("error: %d", error));
1731 if (error == 0 && stuff_logged)
1732 error= ordered_commit(thd, all, /* skip_commit */ true);
1733
1734 if (check_write_error(thd))
1735 {
1736 /*
1737 "all == true" means that a "rollback statement" triggered the error and
1738 this function was called. However, this must not happen as a rollback
1739 is written directly to the binary log. And in auto-commit mode, a single
1740 statement that is rolled back has the flag all == false.
1741 */
1742 DBUG_ASSERT(!all);
1743 /*
1744 We reach this point if the effect of a statement did not properly get into
1745 a cache and need to be rolled back.
1746 */
1747 error |= cache_mngr->trx_cache.truncate(thd, all);
1748 }
1749
1750 end:
1751 /*
1752 When a statement errors out on auto-commit mode it is rollback
1753 implicitly, so the same should happen to its GTID.
1754 */
1755 if (!thd->in_active_multi_stmt_transaction())
1756 gtid_rollback(thd);
1757
1758 DBUG_PRINT("return", ("error: %d", error));
1759 DBUG_RETURN(error);
1760 }
1761
1762 /**
1763 @note
1764 How do we handle this (unlikely but legal) case:
1765 @verbatim
1766 [transaction] + [update to non-trans table] + [rollback to savepoint] ?
1767 @endverbatim
1768 The problem occurs when a savepoint is before the update to the
1769 non-transactional table. Then when there's a rollback to the savepoint, if we
1770 simply truncate the binlog cache, we lose the part of the binlog cache where
1771 the update is. If we want to not lose it, we need to write the SAVEPOINT
1772 command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
1773 is easy: it's just write at the end of the binlog cache, but the former
1774 should be *inserted* to the place where the user called SAVEPOINT. The
1775 solution is that when the user calls SAVEPOINT, we write it to the binlog
1776 cache (so no need to later insert it). As transactions are never intermixed
1777 in the binary log (i.e. they are serialized), we won't have conflicts with
1778 savepoint names when using mysqlbinlog or in the slave SQL thread.
1779 Then when ROLLBACK TO SAVEPOINT is called, if we updated some
1780 non-transactional table, we don't truncate the binlog cache but instead write
1781 ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
1782 will chop the SAVEPOINT command from the binlog cache, which is good as in
1783 that case there is no need to have it in the binlog).
1784 */
1785
binlog_savepoint_set(handlerton * hton,THD * thd,void * sv)1786 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
1787 {
1788 DBUG_ENTER("binlog_savepoint_set");
1789 int error= 1;
1790
1791 String log_query;
1792 if (log_query.append(STRING_WITH_LEN("SAVEPOINT ")))
1793 DBUG_RETURN(error);
1794 else
1795 append_identifier(thd, &log_query, thd->lex->ident.str,
1796 thd->lex->ident.length);
1797
1798 int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
1799 Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
1800 TRUE, FALSE, TRUE, errcode);
1801 /*
1802 We cannot record the position before writing the statement
1803 because a rollback to a savepoint (.e.g. consider it "S") would
1804 prevent the savepoint statement (i.e. "SAVEPOINT S") from being
1805 written to the binary log despite the fact that the server could
1806 still issue other rollback statements to the same savepoint (i.e.
1807 "S").
1808 Given that the savepoint is valid until the server releases it,
1809 ie, until the transaction commits or it is released explicitly,
1810 we need to log it anyway so that we don't have "ROLLBACK TO S"
1811 or "RELEASE S" without the preceding "SAVEPOINT S" in the binary
1812 log.
1813 */
1814 if (!(error= mysql_bin_log.write_event(&qinfo)))
1815 binlog_trans_log_savepos(thd, (my_off_t*) sv);
1816
1817 DBUG_RETURN(error);
1818 }
1819
binlog_savepoint_rollback(handlerton * hton,THD * thd,void * sv)1820 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
1821 {
1822 DBUG_ENTER("binlog_savepoint_rollback");
1823 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
1824 my_off_t pos= *(my_off_t*) sv;
1825 DBUG_ASSERT(pos != ~(my_off_t) 0);
1826
1827 /*
1828 Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
1829 non-transactional table. Otherwise, truncate the binlog cache starting
1830 from the SAVEPOINT command.
1831 */
1832 if (trans_cannot_safely_rollback(thd))
1833 {
1834 String log_query;
1835 if (log_query.append(STRING_WITH_LEN("ROLLBACK TO ")))
1836 DBUG_RETURN(1);
1837 else
1838 {
1839 /*
1840 Before writing identifier to the binlog, make sure to
1841 quote the identifier properly so as to prevent any SQL
1842 injection on the slave.
1843 */
1844 append_identifier(thd, &log_query, thd->lex->ident.str,
1845 thd->lex->ident.length);
1846 }
1847
1848 int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
1849 Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
1850 TRUE, FALSE, TRUE, errcode);
1851 DBUG_RETURN(mysql_bin_log.write_event(&qinfo));
1852 }
1853 // Otherwise, we truncate the cache
1854 cache_mngr->trx_cache.restore_savepoint(pos);
1855 /*
1856 When a SAVEPOINT is executed inside a stored function/trigger we force the
1857 pending event to be flushed with a STMT_END_F flag and clear the table maps
1858 as well to ensure that following DMLs will have a clean state to start
1859 with. ROLLBACK inside a stored routine has to finalize possibly existing
1860 current row-based pending event with cleaning up table maps. That ensures
1861 that following DMLs will have a clean state to start with.
1862 */
1863 if (thd->in_sub_stmt)
1864 thd->clear_binlog_table_maps();
1865 if (cache_mngr->trx_cache.is_binlog_empty())
1866 cache_mngr->trx_cache.group_cache.clear();
1867 DBUG_RETURN(0);
1868 }
1869
1870 /**
1871 Check whether binlog state allows to safely release MDL locks after
1872 rollback to savepoint.
1873
1874 @param hton The binlog handlerton.
1875 @param thd The client thread that executes the transaction.
1876
1877 @return true - It is safe to release MDL locks.
1878 false - If it is not.
1879 */
binlog_savepoint_rollback_can_release_mdl(handlerton * hton,THD * thd)1880 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
1881 THD *thd)
1882 {
1883 DBUG_ENTER("binlog_savepoint_rollback_can_release_mdl");
1884 /*
1885 If we have not updated any non-transactional tables rollback
1886 to savepoint will simply truncate binlog cache starting from
1887 SAVEPOINT command. So it should be safe to release MDL acquired
1888 after SAVEPOINT command in this case.
1889 */
1890 DBUG_RETURN(!trans_cannot_safely_rollback(thd));
1891 }
1892
1893 #ifdef HAVE_REPLICATION
1894
1895 /*
1896 Adjust the position pointer in the binary log file for all running slaves
1897
1898 SYNOPSIS
1899 adjust_linfo_offsets()
1900 purge_offset Number of bytes removed from start of log index file
1901
1902 NOTES
1903 - This is called when doing a PURGE when we delete lines from the
1904 index log file
1905
1906 REQUIREMENTS
1907 - Before calling this function, we have to ensure that no threads are
1908 using any binary log file before purge_offset.a
1909
1910 TODO
1911 - Inform the slave threads that they should sync the position
1912 in the binary log file with flush_relay_log_info.
1913 Now they sync is done for next read.
1914 */
1915
adjust_linfo_offsets(my_off_t purge_offset)1916 static void adjust_linfo_offsets(my_off_t purge_offset)
1917 {
1918 mysql_mutex_lock(&LOCK_thread_count);
1919
1920 Thread_iterator it= global_thread_list_begin();
1921 Thread_iterator end= global_thread_list_end();
1922 for (; it != end; ++it)
1923 {
1924 LOG_INFO* linfo;
1925 if ((linfo = (*it)->current_linfo))
1926 {
1927 mysql_mutex_lock(&linfo->lock);
1928 /*
1929 Index file offset can be less that purge offset only if
1930 we just started reading the index file. In that case
1931 we have nothing to adjust
1932 */
1933 if (linfo->index_file_offset < purge_offset)
1934 linfo->fatal = (linfo->index_file_offset != 0);
1935 else
1936 linfo->index_file_offset -= purge_offset;
1937 mysql_mutex_unlock(&linfo->lock);
1938 }
1939 }
1940 mysql_mutex_unlock(&LOCK_thread_count);
1941 }
1942
1943
log_in_use(const char * log_name)1944 static int log_in_use(const char* log_name)
1945 {
1946 size_t log_name_len = strlen(log_name) + 1;
1947 int thread_count=0;
1948 #ifndef DBUG_OFF
1949 if (current_thd)
1950 DEBUG_SYNC(current_thd,"purge_logs_after_lock_index_before_thread_count");
1951 #endif
1952 mysql_mutex_lock(&LOCK_thread_count);
1953
1954 Thread_iterator it= global_thread_list_begin();
1955 Thread_iterator end= global_thread_list_end();
1956 for (; it != end; ++it)
1957 {
1958 LOG_INFO* linfo;
1959 if ((linfo = (*it)->current_linfo))
1960 {
1961 mysql_mutex_lock(&linfo->lock);
1962 if(!strncmp(log_name, linfo->log_file_name, log_name_len))
1963 {
1964 thread_count++;
1965 sql_print_warning("file %s was not purged because it was being read"
1966 "by thread number %llu", log_name,
1967 (ulonglong)(*it)->thread_id);
1968 }
1969 mysql_mutex_unlock(&linfo->lock);
1970 }
1971 }
1972
1973 mysql_mutex_unlock(&LOCK_thread_count);
1974 return thread_count;
1975 }
1976
purge_error_message(THD * thd,int res)1977 static bool purge_error_message(THD* thd, int res)
1978 {
1979 uint errcode;
1980
1981 if ((errcode= purge_log_get_error_code(res)) != 0)
1982 {
1983 my_message(errcode, ER(errcode), MYF(0));
1984 return TRUE;
1985 }
1986 my_ok(thd);
1987 return FALSE;
1988 }
1989
1990 #endif /* HAVE_REPLICATION */
1991
check_binlog_magic(IO_CACHE * log,const char ** errmsg)1992 int check_binlog_magic(IO_CACHE* log, const char** errmsg)
1993 {
1994 char magic[4];
1995 DBUG_ASSERT(my_b_tell(log) == 0);
1996
1997 if (my_b_read(log, (uchar*) magic, sizeof(magic)))
1998 {
1999 *errmsg = "I/O error reading the header from the binary log";
2000 sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno,
2001 log->error);
2002 return 1;
2003 }
2004 if (memcmp(magic, BINLOG_MAGIC, sizeof(magic)))
2005 {
2006 *errmsg = "Binlog has bad magic number; It's not a binary log file that can be used by this version of MySQL";
2007 return 1;
2008 }
2009 return 0;
2010 }
2011
2012
open_binlog_file(IO_CACHE * log,const char * log_file_name,const char ** errmsg)2013 File open_binlog_file(IO_CACHE *log, const char *log_file_name, const char **errmsg)
2014 {
2015 File file;
2016 DBUG_ENTER("open_binlog_file");
2017
2018 if ((file= mysql_file_open(key_file_binlog,
2019 log_file_name, O_RDONLY | O_BINARY | O_SHARE,
2020 MYF(MY_WME))) < 0)
2021 {
2022 sql_print_error("Failed to open log (file '%s', errno %d)",
2023 log_file_name, my_errno);
2024 *errmsg = "Could not open log file";
2025 goto err;
2026 }
2027 if (init_io_cache(log, file, IO_SIZE*2, READ_CACHE, 0, 0,
2028 MYF(MY_WME|MY_DONT_CHECK_FILESIZE)))
2029 {
2030 sql_print_error("Failed to create a cache on log (file '%s')",
2031 log_file_name);
2032 *errmsg = "Could not open log file";
2033 goto err;
2034 }
2035 if (check_binlog_magic(log,errmsg))
2036 goto err;
2037 DBUG_RETURN(file);
2038
2039 err:
2040 if (file >= 0)
2041 {
2042 mysql_file_close(file, MYF(0));
2043 end_io_cache(log);
2044 }
2045 DBUG_RETURN(-1);
2046 }
2047
2048 /**
2049 This function checks if a transactional table was updated by the
2050 current transaction.
2051
2052 @param thd The client thread that executed the current statement.
2053 @return
2054 @c true if a transactional table was updated, @c false otherwise.
2055 */
2056 bool
trans_has_updated_trans_table(const THD * thd)2057 trans_has_updated_trans_table(const THD* thd)
2058 {
2059 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
2060
2061 return (cache_mngr ? !cache_mngr->trx_cache.is_binlog_empty() : 0);
2062 }
2063
2064 /**
2065 This function checks if a transactional table was updated by the
2066 current statement.
2067
2068 @param ha_list Registered storage engine handler list.
2069 @return
2070 @c true if a transactional table was updated, @c false otherwise.
2071 */
2072 bool
stmt_has_updated_trans_table(Ha_trx_info * ha_list)2073 stmt_has_updated_trans_table(Ha_trx_info* ha_list)
2074 {
2075 Ha_trx_info *ha_info;
2076
2077 for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
2078 {
2079 if (ha_info->is_trx_read_write() && ha_info->ht() != binlog_hton)
2080 return (TRUE);
2081 }
2082 return (FALSE);
2083 }
2084
2085 /**
2086 This function checks if a transaction, either a multi-statement
2087 or a single statement transaction is about to commit or not.
2088
2089 @param thd The client thread that executed the current statement.
2090 @param all Committing a transaction (i.e. TRUE) or a statement
2091 (i.e. FALSE).
2092 @return
2093 @c true if committing a transaction, otherwise @c false.
2094 */
ending_trans(THD * thd,const bool all)2095 bool ending_trans(THD* thd, const bool all)
2096 {
2097 return (all || ending_single_stmt_trans(thd, all));
2098 }
2099
2100 /**
2101 This function checks if a single statement transaction is about
2102 to commit or not.
2103
2104 @param thd The client thread that executed the current statement.
2105 @param all Committing a transaction (i.e. TRUE) or a statement
2106 (i.e. FALSE).
2107 @return
2108 @c true if committing a single statement transaction, otherwise
2109 @c false.
2110 */
ending_single_stmt_trans(THD * thd,const bool all)2111 bool ending_single_stmt_trans(THD* thd, const bool all)
2112 {
2113 return (!all && !thd->in_multi_stmt_transaction_mode());
2114 }
2115
2116 /**
2117 This function checks if a transaction cannot be rolled back safely.
2118
2119 @param thd The client thread that executed the current statement.
2120 @return
2121 @c true if cannot be safely rolled back, @c false otherwise.
2122 */
trans_cannot_safely_rollback(const THD * thd)2123 bool trans_cannot_safely_rollback(const THD* thd)
2124 {
2125 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
2126
2127 return cache_mngr->trx_cache.cannot_rollback();
2128 }
2129
2130 /**
2131 This function checks if current statement cannot be rollded back safely.
2132
2133 @param thd The client thread that executed the current statement.
2134 @return
2135 @c true if cannot be safely rolled back, @c false otherwise.
2136 */
stmt_cannot_safely_rollback(const THD * thd)2137 bool stmt_cannot_safely_rollback(const THD* thd)
2138 {
2139 return thd->transaction.stmt.cannot_safely_rollback();
2140 }
2141
2142 #ifndef EMBEDDED_LIBRARY
2143 /**
2144 Execute a PURGE BINARY LOGS TO <log> command.
2145
2146 @param thd Pointer to THD object for the client thread executing the
2147 statement.
2148
2149 @param to_log Name of the last log to purge.
2150
2151 @retval FALSE success
2152 @retval TRUE failure
2153 */
purge_master_logs(THD * thd,const char * to_log)2154 bool purge_master_logs(THD* thd, const char* to_log)
2155 {
2156 char search_file_name[FN_REFLEN];
2157 if (!mysql_bin_log.is_open())
2158 {
2159 my_ok(thd);
2160 return FALSE;
2161 }
2162
2163 mysql_bin_log.make_log_name(search_file_name, to_log);
2164 return purge_error_message(thd,
2165 mysql_bin_log.purge_logs(search_file_name, false,
2166 true/*need_lock_index=true*/,
2167 true/*need_update_threads=true*/,
2168 NULL, false));
2169 }
2170
2171
2172 /**
2173 Execute a PURGE BINARY LOGS BEFORE <date> command.
2174
2175 @param thd Pointer to THD object for the client thread executing the
2176 statement.
2177
2178 @param purge_time Date before which logs should be purged.
2179
2180 @retval FALSE success
2181 @retval TRUE failure
2182 */
purge_master_logs_before_date(THD * thd,time_t purge_time)2183 bool purge_master_logs_before_date(THD* thd, time_t purge_time)
2184 {
2185 if (!mysql_bin_log.is_open())
2186 {
2187 my_ok(thd);
2188 return 0;
2189 }
2190 return purge_error_message(thd,
2191 mysql_bin_log.purge_logs_before_date(purge_time,
2192 false));
2193 }
2194 #endif /* EMBEDDED_LIBRARY */
2195
2196 /*
2197 Helper function to get the error code of the query to be binlogged.
2198 */
query_error_code(THD * thd,bool not_killed)2199 int query_error_code(THD *thd, bool not_killed)
2200 {
2201 int error;
2202
2203 if (not_killed || (thd->killed == THD::KILL_BAD_DATA))
2204 {
2205 error= thd->is_error() ? thd->get_stmt_da()->sql_errno() : 0;
2206
2207 /* thd->get_stmt_da()->sql_errno() might be ER_SERVER_SHUTDOWN or
2208 ER_QUERY_INTERRUPTED, So here we need to make sure that error
2209 is not set to these errors when specified not_killed by the
2210 caller.
2211 */
2212 if (error == ER_SERVER_SHUTDOWN || error == ER_QUERY_INTERRUPTED)
2213 error= 0;
2214 }
2215 else
2216 {
2217 /* killed status for DELAYED INSERT thread should never be used */
2218 DBUG_ASSERT(!(thd->system_thread & SYSTEM_THREAD_DELAYED_INSERT));
2219 error= thd->killed_errno();
2220 }
2221
2222 return error;
2223 }
2224
2225
2226 /**
2227 Copy content of 'from' file from offset to 'to' file.
2228
2229 - We do the copy outside of the IO_CACHE as the cache
2230 buffers would just make things slower and more complicated.
2231 In most cases the copy loop should only do one read.
2232
2233 @param from File to copy.
2234 @param to File to copy to.
2235 @param offset Offset in 'from' file.
2236
2237
2238 @retval
2239 0 ok
2240 @retval
2241 -1 error
2242 */
copy_file(IO_CACHE * from,IO_CACHE * to,my_off_t offset)2243 static bool copy_file(IO_CACHE *from, IO_CACHE *to, my_off_t offset)
2244 {
2245 int bytes_read;
2246 uchar io_buf[IO_SIZE*2];
2247 DBUG_ENTER("copy_file");
2248
2249 mysql_file_seek(from->file, offset, MY_SEEK_SET, MYF(0));
2250 while(TRUE)
2251 {
2252 if ((bytes_read= (int) mysql_file_read(from->file, io_buf, sizeof(io_buf),
2253 MYF(MY_WME)))
2254 < 0)
2255 goto err;
2256 if (DBUG_EVALUATE_IF("fault_injection_copy_part_file", 1, 0))
2257 bytes_read= bytes_read/2;
2258 if (!bytes_read)
2259 break; // end of file
2260 if (mysql_file_write(to->file, io_buf, bytes_read, MYF(MY_WME | MY_NABP)))
2261 goto err;
2262 }
2263
2264 DBUG_RETURN(0);
2265
2266 err:
2267 DBUG_RETURN(1);
2268 }
2269
2270
2271 #ifdef HAVE_REPLICATION
2272 /**
2273 Load data's io cache specific hook to be executed
2274 before a chunk of data is being read into the cache's buffer
2275 The fuction instantianates and writes into the binlog
2276 replication events along LOAD DATA processing.
2277
2278 @param file pointer to io-cache
2279 @retval 0 success
2280 @retval 1 failure
2281 */
log_loaded_block(IO_CACHE * file)2282 int log_loaded_block(IO_CACHE* file)
2283 {
2284 DBUG_ENTER("log_loaded_block");
2285 LOAD_FILE_INFO *lf_info;
2286 uint block_len;
2287 /* buffer contains position where we started last read */
2288 uchar* buffer= (uchar*) my_b_get_buffer_start(file);
2289 uint max_event_size= current_thd->variables.max_allowed_packet;
2290 lf_info= (LOAD_FILE_INFO*) file->arg;
2291 if (lf_info->thd->is_current_stmt_binlog_format_row())
2292 DBUG_RETURN(0);
2293 if (lf_info->last_pos_in_file != HA_POS_ERROR &&
2294 lf_info->last_pos_in_file >= my_b_get_pos_in_file(file))
2295 DBUG_RETURN(0);
2296
2297 for (block_len= (uint) (my_b_get_bytes_in_buffer(file)); block_len > 0;
2298 buffer += min(block_len, max_event_size),
2299 block_len -= min(block_len, max_event_size))
2300 {
2301 lf_info->last_pos_in_file= my_b_get_pos_in_file(file);
2302 if (lf_info->wrote_create_file)
2303 {
2304 Append_block_log_event a(lf_info->thd, lf_info->thd->db, buffer,
2305 min(block_len, max_event_size),
2306 lf_info->log_delayed);
2307 if (mysql_bin_log.write_event(&a))
2308 DBUG_RETURN(1);
2309 }
2310 else
2311 {
2312 Begin_load_query_log_event b(lf_info->thd, lf_info->thd->db,
2313 buffer,
2314 min(block_len, max_event_size),
2315 lf_info->log_delayed);
2316 if (mysql_bin_log.write_event(&b))
2317 DBUG_RETURN(1);
2318 lf_info->wrote_create_file= 1;
2319 }
2320 }
2321 DBUG_RETURN(0);
2322 }
2323
2324 /* Helper function for SHOW BINLOG/RELAYLOG EVENTS */
show_binlog_events(THD * thd,MYSQL_BIN_LOG * binary_log)2325 bool show_binlog_events(THD *thd, MYSQL_BIN_LOG *binary_log)
2326 {
2327 Protocol *protocol= thd->protocol;
2328 List<Item> field_list;
2329 const char *errmsg = 0;
2330 bool ret = TRUE;
2331 IO_CACHE log;
2332 File file = -1;
2333 int old_max_allowed_packet= thd->variables.max_allowed_packet;
2334 LOG_INFO linfo;
2335
2336 DBUG_ENTER("show_binlog_events");
2337
2338 DBUG_ASSERT(thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS ||
2339 thd->lex->sql_command == SQLCOM_SHOW_RELAYLOG_EVENTS);
2340
2341 Format_description_log_event *description_event= new
2342 Format_description_log_event(3); /* MySQL 4.0 by default */
2343
2344 if (binary_log->is_open())
2345 {
2346 LEX_MASTER_INFO *lex_mi= &thd->lex->mi;
2347 SELECT_LEX_UNIT *unit= &thd->lex->unit;
2348 ha_rows event_count, limit_start, limit_end;
2349 my_off_t pos = max<my_off_t>(BIN_LOG_HEADER_SIZE, lex_mi->pos); // user-friendly
2350 char search_file_name[FN_REFLEN], *name;
2351 const char *log_file_name = lex_mi->log_file_name;
2352 mysql_mutex_t *log_lock = binary_log->get_log_lock();
2353 Log_event* ev;
2354
2355 unit->set_limit(thd->lex->current_select);
2356 limit_start= unit->offset_limit_cnt;
2357 limit_end= unit->select_limit_cnt;
2358
2359 name= search_file_name;
2360 if (log_file_name)
2361 binary_log->make_log_name(search_file_name, log_file_name);
2362 else
2363 name=0; // Find first log
2364
2365 linfo.index_file_offset = 0;
2366
2367 if (binary_log->find_log_pos(&linfo, name, true/*need_lock_index=true*/))
2368 {
2369 errmsg = "Could not find target log";
2370 goto err;
2371 }
2372
2373 mysql_mutex_lock(&LOCK_thread_count);
2374 thd->current_linfo = &linfo;
2375 mysql_mutex_unlock(&LOCK_thread_count);
2376
2377 if ((file=open_binlog_file(&log, linfo.log_file_name, &errmsg)) < 0)
2378 goto err;
2379
2380 my_off_t end_pos;
2381 /*
2382 Acquire LOCK_log only for the duration to calculate the
2383 log's end position. LOCK_log should be acquired even while
2384 we are checking whether the log is active log or not.
2385 */
2386 mysql_mutex_lock(log_lock);
2387 if (binary_log->is_active(linfo.log_file_name))
2388 {
2389 LOG_INFO li;
2390 binary_log->get_current_log(&li, false /*LOCK_log is already acquired*/);
2391 end_pos= li.pos;
2392 }
2393 else
2394 {
2395 end_pos= my_b_filelength(&log);
2396 }
2397 mysql_mutex_unlock(log_lock);
2398
2399 /*
2400 to account binlog event header size
2401 */
2402 thd->variables.max_allowed_packet += MAX_LOG_EVENT_HEADER;
2403
2404 DEBUG_SYNC(thd, "after_show_binlog_event_found_file");
2405
2406 /*
2407 open_binlog_file() sought to position 4.
2408 Read the first event in case it's a Format_description_log_event, to
2409 know the format. If there's no such event, we are 3.23 or 4.x. This
2410 code, like before, can't read 3.23 binlogs.
2411 This code will fail on a mixed relay log (one which has Format_desc then
2412 Rotate then Format_desc).
2413 */
2414 ev= Log_event::read_log_event(&log, (mysql_mutex_t*)0, description_event,
2415 opt_master_verify_checksum);
2416 if (ev)
2417 {
2418 if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
2419 {
2420 delete description_event;
2421 description_event= (Format_description_log_event*) ev;
2422 }
2423 else
2424 delete ev;
2425 }
2426
2427 my_b_seek(&log, pos);
2428
2429 if (!description_event->is_valid())
2430 {
2431 errmsg="Invalid Format_description event; could be out of memory";
2432 goto err;
2433 }
2434
2435 for (event_count = 0;
2436 (ev = Log_event::read_log_event(&log, (mysql_mutex_t*) 0,
2437 description_event,
2438 opt_master_verify_checksum)); )
2439 {
2440 DEBUG_SYNC(thd, "wait_in_show_binlog_events_loop");
2441 if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
2442 description_event->checksum_alg= ev->checksum_alg;
2443
2444 if (event_count >= limit_start &&
2445 ev->net_send(protocol, linfo.log_file_name, pos))
2446 {
2447 errmsg = "Net error";
2448 delete ev;
2449 goto err;
2450 }
2451
2452 pos = my_b_tell(&log);
2453 delete ev;
2454
2455 if (++event_count >= limit_end || pos >= end_pos)
2456 break;
2457 }
2458
2459 if (event_count < limit_end && log.error)
2460 {
2461 errmsg = "Wrong offset or I/O error";
2462 goto err;
2463 }
2464
2465 }
2466 // Check that linfo is still on the function scope.
2467 DEBUG_SYNC(thd, "after_show_binlog_events");
2468
2469 ret= FALSE;
2470
2471 err:
2472 delete description_event;
2473 if (file >= 0)
2474 {
2475 end_io_cache(&log);
2476 mysql_file_close(file, MYF(MY_WME));
2477 }
2478
2479 if (errmsg)
2480 my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0),
2481 "SHOW BINLOG EVENTS", errmsg);
2482 else
2483 my_eof(thd);
2484
2485 mysql_mutex_lock(&LOCK_thread_count);
2486 thd->current_linfo = 0;
2487 mysql_mutex_unlock(&LOCK_thread_count);
2488 thd->variables.max_allowed_packet= old_max_allowed_packet;
2489 DBUG_RETURN(ret);
2490 }
2491
2492 /**
2493 Execute a SHOW BINLOG EVENTS statement.
2494
2495 @param thd Pointer to THD object for the client thread executing the
2496 statement.
2497
2498 @retval FALSE success
2499 @retval TRUE failure
2500 */
mysql_show_binlog_events(THD * thd)2501 bool mysql_show_binlog_events(THD* thd)
2502 {
2503 Protocol *protocol= thd->protocol;
2504 List<Item> field_list;
2505 DBUG_ENTER("mysql_show_binlog_events");
2506
2507 DBUG_ASSERT(thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS);
2508
2509 Log_event::init_show_field_list(&field_list);
2510 if (protocol->send_result_set_metadata(&field_list,
2511 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
2512 DBUG_RETURN(TRUE);
2513
2514 /*
2515 Wait for handlers to insert any pending information
2516 into the binlog. For e.g. ndb which updates the binlog asynchronously
2517 this is needed so that the uses sees all its own commands in the binlog
2518 */
2519 ha_binlog_wait(thd);
2520
2521 DBUG_RETURN(show_binlog_events(thd, &mysql_bin_log));
2522 }
2523
2524 #endif /* HAVE_REPLICATION */
2525
2526
MYSQL_BIN_LOG(uint * sync_period)2527 MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
2528 :bytes_written(0), file_id(1), open_count(1),
2529 sync_period_ptr(sync_period), sync_counter(0),
2530 m_prep_xids(0),
2531 is_relay_log(0), signal_cnt(0),
2532 checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF),
2533 relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF),
2534 previous_gtid_set(0)
2535 {
2536 /*
2537 We don't want to initialize locks here as such initialization depends on
2538 safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
2539 called only in main(). Doing initialization here would make it happen
2540 before main().
2541 */
2542 index_file_name[0] = 0;
2543 memset(&index_file, 0, sizeof(index_file));
2544 memset(&purge_index_file, 0, sizeof(purge_index_file));
2545 memset(&crash_safe_index_file, 0, sizeof(crash_safe_index_file));
2546 }
2547
2548
2549 /* this is called only once */
2550
cleanup()2551 void MYSQL_BIN_LOG::cleanup()
2552 {
2553 DBUG_ENTER("cleanup");
2554 if (inited)
2555 {
2556 inited= 0;
2557 close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT, true /*need_lock_log=true*/,
2558 true /*need_lock_index=true*/);
2559 mysql_mutex_destroy(&LOCK_log);
2560 mysql_mutex_destroy(&LOCK_index);
2561 mysql_mutex_destroy(&LOCK_commit);
2562 mysql_mutex_destroy(&LOCK_sync);
2563 mysql_mutex_destroy(&LOCK_xids);
2564 mysql_cond_destroy(&update_cond);
2565 my_atomic_rwlock_destroy(&m_prep_xids_lock);
2566 mysql_cond_destroy(&m_prep_xids_cond);
2567 stage_manager.deinit();
2568 }
2569 DBUG_VOID_RETURN;
2570 }
2571
2572
init_pthread_objects()2573 void MYSQL_BIN_LOG::init_pthread_objects()
2574 {
2575 MYSQL_LOG::init_pthread_objects();
2576 mysql_mutex_init(m_key_LOCK_index, &LOCK_index, MY_MUTEX_INIT_SLOW);
2577 mysql_mutex_init(m_key_LOCK_commit, &LOCK_commit, MY_MUTEX_INIT_FAST);
2578 mysql_mutex_init(m_key_LOCK_sync, &LOCK_sync, MY_MUTEX_INIT_FAST);
2579 mysql_mutex_init(m_key_LOCK_xids, &LOCK_xids, MY_MUTEX_INIT_FAST);
2580 mysql_cond_init(m_key_update_cond, &update_cond, 0);
2581 my_atomic_rwlock_init(&m_prep_xids_lock);
2582 mysql_cond_init(m_key_prep_xids_cond, &m_prep_xids_cond, NULL);
2583 stage_manager.init(
2584 #ifdef HAVE_PSI_INTERFACE
2585 m_key_LOCK_flush_queue,
2586 m_key_LOCK_sync_queue,
2587 m_key_LOCK_commit_queue,
2588 m_key_LOCK_done, m_key_COND_done
2589 #endif
2590 );
2591 }
2592
open_index_file(const char * index_file_name_arg,const char * log_name,bool need_lock_index)2593 bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
2594 const char *log_name, bool need_lock_index)
2595 {
2596 bool error= false;
2597 File index_file_nr= -1;
2598
2599 if (need_lock_index)
2600 mysql_mutex_lock(&LOCK_index);
2601 else
2602 mysql_mutex_assert_owner(&LOCK_index);
2603
2604 /*
2605 First open of this class instance
2606 Create an index file that will hold all file names uses for logging.
2607 Add new entries to the end of it.
2608 */
2609 myf opt= MY_UNPACK_FILENAME;
2610
2611 if (my_b_inited(&index_file))
2612 goto end;
2613
2614 if (!index_file_name_arg)
2615 {
2616 index_file_name_arg= log_name; // Use same basename for index file
2617 opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
2618 }
2619 fn_format(index_file_name, index_file_name_arg, mysql_data_home,
2620 ".index", opt);
2621
2622 if (set_crash_safe_index_file_name(index_file_name_arg))
2623 {
2624 sql_print_error("MYSQL_BIN_LOG::set_crash_safe_index_file_name failed.");
2625 error= true;
2626 goto end;
2627 }
2628
2629 /*
2630 We need move crash_safe_index_file to index_file if the index_file
2631 does not exist and crash_safe_index_file exists when mysqld server
2632 restarts.
2633 */
2634 if (my_access(index_file_name, F_OK) &&
2635 !my_access(crash_safe_index_file_name, F_OK) &&
2636 my_rename(crash_safe_index_file_name, index_file_name, MYF(MY_WME)))
2637 {
2638 sql_print_error("MYSQL_BIN_LOG::open_index_file failed to "
2639 "move crash_safe_index_file to index file.");
2640 error= true;
2641 goto end;
2642 }
2643
2644 if ((index_file_nr= mysql_file_open(m_key_file_log_index,
2645 index_file_name,
2646 O_RDWR | O_CREAT | O_BINARY,
2647 MYF(MY_WME))) < 0 ||
2648 mysql_file_sync(index_file_nr, MYF(MY_WME)) ||
2649 init_io_cache(&index_file, index_file_nr,
2650 IO_SIZE, READ_CACHE,
2651 mysql_file_seek(index_file_nr, 0L, MY_SEEK_END, MYF(0)),
2652 0, MYF(MY_WME | MY_WAIT_IF_FULL)) ||
2653 DBUG_EVALUATE_IF("fault_injection_openning_index", 1, 0))
2654 {
2655 /*
2656 TODO: all operations creating/deleting the index file or a log, should
2657 call my_sync_dir() or my_sync_dir_by_file() to be durable.
2658 TODO: file creation should be done with mysql_file_create()
2659 not mysql_file_open().
2660 */
2661 if (index_file_nr >= 0)
2662 mysql_file_close(index_file_nr, MYF(0));
2663 error= true;
2664 goto end;
2665 }
2666
2667 #ifdef HAVE_REPLICATION
2668 /*
2669 Sync the index by purging any binary log file that is not registered.
2670 In other words, either purge binary log files that were removed from
2671 the index but not purged from the file system due to a crash or purge
2672 any binary log file that was created but not register in the index
2673 due to a crash.
2674 */
2675
2676 if (set_purge_index_file_name(index_file_name_arg) ||
2677 open_purge_index_file(FALSE) ||
2678 purge_index_entry(NULL, NULL, false) ||
2679 close_purge_index_file() ||
2680 DBUG_EVALUATE_IF("fault_injection_recovering_index", 1, 0))
2681 {
2682 sql_print_error("MYSQL_BIN_LOG::open_index_file failed to sync the index "
2683 "file.");
2684 error= TRUE;
2685 goto end;
2686 }
2687 #endif
2688 end:
2689 if (need_lock_index)
2690 mysql_mutex_unlock(&LOCK_index);
2691 return error;
2692 }
2693
2694
2695 /**
2696 Reads GTIDs from the given binlog file.
2697
2698 @param filename File to read from.
2699 @param all_gtids If not NULL, then the GTIDs from the
2700 Previous_gtids_log_event and from all Gtid_log_events are stored in
2701 this object.
2702 @param prev_gtids If not NULL, then the GTIDs from the
2703 Previous_gtids_log_events are stored in this object.
2704 @param first_gtid If not NULL, then the first GTID information from the
2705 file will be stored in this object.
2706 @param last_gtid If not NULL, then the last GTID information from the
2707 file will be stored in this object.
2708 @param sid_map The sid_map object to use in the rpl_sidno generation
2709 of the Gtid_log_event. If lock is needed in the sid_map, the caller
2710 must hold it.
2711 @param verify_checksum Set to true to verify event checksums.
2712
2713 @retval GOT_GTIDS The file was successfully read and it contains
2714 both Gtid_log_events and Previous_gtids_log_events.
2715 @retval GOT_PREVIOUS_GTIDS The file was successfully read and it
2716 contains Previous_gtids_log_events but no Gtid_log_events.
2717 @retval NO_GTIDS The file was successfully read and it does not
2718 contain GTID events.
2719 @retval ERROR Out of memory, or the file contains GTID events
2720 when GTID_MODE = OFF, or the file is malformed (e.g., contains
2721 Gtid_log_events but no Previous_gtids_log_event).
2722 @retval TRUNCATED The file was truncated before the end of the
2723 first Previous_gtids_log_event.
2724 */
2725 enum enum_read_gtids_from_binlog_status
2726 { GOT_GTIDS, GOT_PREVIOUS_GTIDS, NO_GTIDS, ERROR, TRUNCATED };
2727 static enum_read_gtids_from_binlog_status
read_gtids_from_binlog(const char * filename,Gtid_set * all_gtids,Gtid_set * prev_gtids,Gtid * first_gtid,Gtid * last_gtid,Sid_map * sid_map,bool verify_checksum)2728 read_gtids_from_binlog(const char *filename, Gtid_set *all_gtids,
2729 Gtid_set *prev_gtids, Gtid *first_gtid,
2730 Gtid *last_gtid,
2731 Sid_map* sid_map,
2732 bool verify_checksum)
2733 {
2734 DBUG_ENTER("read_gtids_from_binlog");
2735 DBUG_PRINT("info", ("Opening file %s", filename));
2736
2737 /*
2738 Create a Format_description_log_event that is used to read the
2739 first event of the log.
2740 */
2741 Format_description_log_event fd_ev(BINLOG_VERSION), *fd_ev_p= &fd_ev;
2742 if (!fd_ev.is_valid())
2743 DBUG_RETURN(ERROR);
2744
2745 File file;
2746 IO_CACHE log;
2747
2748 /*
2749 We assert here that both all_gtids and prev_gtids, if specified,
2750 uses the same sid_map as the one passed as a parameter. This is just
2751 to ensure that, if the sid_map needed some lock and was locked by
2752 the caller, the lock applies to all the GTID sets this function is
2753 dealing with.
2754 */
2755 #ifndef DBUG_OFF
2756 if (all_gtids)
2757 DBUG_ASSERT(all_gtids->get_sid_map() == sid_map);
2758 if (prev_gtids)
2759 DBUG_ASSERT(prev_gtids->get_sid_map() == sid_map);
2760 #endif
2761
2762 const char *errmsg= NULL;
2763 if ((file= open_binlog_file(&log, filename, &errmsg)) < 0)
2764 {
2765 sql_print_error("%s", errmsg);
2766 /*
2767 We need to revisit the recovery procedure for relay log
2768 files. Currently, it is called after this routine.
2769 /Alfranio
2770 */
2771 DBUG_RETURN(TRUNCATED);
2772 }
2773
2774 /*
2775 Seek for Previous_gtids_log_event and Gtid_log_event events to
2776 gather information what has been processed so far.
2777 */
2778 my_b_seek(&log, BIN_LOG_HEADER_SIZE);
2779 Log_event *ev= NULL;
2780 enum_read_gtids_from_binlog_status ret= NO_GTIDS;
2781 bool done= false;
2782 bool seen_first_gtid= false;
2783 while (!done &&
2784 (ev= Log_event::read_log_event(&log, 0, fd_ev_p, verify_checksum)) !=
2785 NULL)
2786 {
2787 DBUG_PRINT("info", ("Read event of type %s", ev->get_type_str()));
2788 switch (ev->get_type_code())
2789 {
2790 case FORMAT_DESCRIPTION_EVENT:
2791 if (fd_ev_p != &fd_ev)
2792 delete fd_ev_p;
2793 fd_ev_p= (Format_description_log_event *)ev;
2794 break;
2795 case ROTATE_EVENT:
2796 // do nothing; just accept this event and go to next
2797 break;
2798 case PREVIOUS_GTIDS_LOG_EVENT:
2799 {
2800 if (gtid_mode == 0)
2801 {
2802 my_error(ER_FOUND_GTID_EVENT_WHEN_GTID_MODE_IS_OFF, MYF(0));
2803 ret= ERROR;
2804 }
2805 ret= GOT_PREVIOUS_GTIDS;
2806 // add events to sets
2807 Previous_gtids_log_event *prev_gtids_ev=
2808 (Previous_gtids_log_event *)ev;
2809 if (all_gtids != NULL && prev_gtids_ev->add_to_set(all_gtids) != 0)
2810 ret= ERROR, done= true;
2811 else if (prev_gtids != NULL && prev_gtids_ev->add_to_set(prev_gtids) != 0)
2812 ret= ERROR, done= true;
2813 #ifndef DBUG_OFF
2814 char* prev_buffer= prev_gtids_ev->get_str(NULL, NULL);
2815 DBUG_PRINT("info", ("Got Previous_gtids from file '%s': Gtid_set='%s'.",
2816 filename, prev_buffer));
2817 my_free(prev_buffer);
2818 #endif
2819 break;
2820 }
2821 case GTID_LOG_EVENT:
2822 {
2823 DBUG_EXECUTE_IF("inject_fault_bug16502579", {
2824 DBUG_PRINT("debug", ("GTID_LOG_EVENT found. Injected ret=NO_GTIDS."));
2825 ret=NO_GTIDS;
2826 });
2827 if (ret != GOT_GTIDS)
2828 {
2829 if (ret != GOT_PREVIOUS_GTIDS)
2830 {
2831 /*
2832 Since this routine is run on startup, there may not be a
2833 THD instance. Therefore, ER(X) cannot be used.
2834 */
2835 const char* msg_fmt= (current_thd != NULL) ?
2836 ER(ER_BINLOG_LOGICAL_CORRUPTION) :
2837 ER_DEFAULT(ER_BINLOG_LOGICAL_CORRUPTION);
2838 my_printf_error(ER_BINLOG_LOGICAL_CORRUPTION,
2839 msg_fmt, MYF(0),
2840 filename,
2841 "The first global transaction identifier was read, but "
2842 "no other information regarding identifiers existing "
2843 "on the previous log files was found.");
2844 ret= ERROR, done= true;
2845 break;
2846 }
2847 else
2848 ret= GOT_GTIDS;
2849 }
2850 /*
2851 When all_gtids, first_gtid and last_gtid are all NULL,
2852 we just check if the binary log contains at least one Gtid_log_event,
2853 so that we can distinguish the return values GOT_GTID and
2854 GOT_PREVIOUS_GTIDS. We don't need to read anything else from the
2855 binary log.
2856 If all_gtids or last_gtid is requested (i.e., NOT NULL), we should
2857 continue to read all gtids.
2858 If just first_gtid was requested, we will be done after storing this
2859 Gtid_log_event info on it.
2860 */
2861 if (all_gtids == NULL && first_gtid == NULL && last_gtid == NULL)
2862 {
2863 ret= GOT_GTIDS, done= true;
2864 }
2865 else
2866 {
2867 Gtid_log_event *gtid_ev= (Gtid_log_event *)ev;
2868 rpl_sidno sidno= gtid_ev->get_sidno(sid_map);
2869 if (sidno < 0)
2870 ret= ERROR, done= true;
2871 else
2872 {
2873 if (all_gtids)
2874 {
2875 if (all_gtids->ensure_sidno(sidno) != RETURN_STATUS_OK)
2876 ret= ERROR, done= true;
2877 else if (all_gtids->_add_gtid(sidno, gtid_ev->get_gno()) !=
2878 RETURN_STATUS_OK)
2879 ret= ERROR, done= true;
2880 DBUG_PRINT("info", ("Got Gtid from file '%s': Gtid(%d, %lld).",
2881 filename, sidno, gtid_ev->get_gno()));
2882 }
2883
2884 /* If the first GTID was requested, stores it */
2885 if (first_gtid && !seen_first_gtid)
2886 {
2887 first_gtid->set(sidno, gtid_ev->get_gno());
2888 seen_first_gtid= true;
2889 /* If the first_gtid was the only thing requested, we are done */
2890 if (all_gtids == NULL && last_gtid == NULL)
2891 ret= GOT_GTIDS, done= true;
2892 }
2893
2894 if (last_gtid)
2895 last_gtid->set(sidno, gtid_ev->get_gno());
2896 }
2897 }
2898 break;
2899 }
2900 case ANONYMOUS_GTID_LOG_EVENT:
2901 default:
2902 // if we found any other event type without finding a
2903 // previous_gtids_log_event, then the rest of this binlog
2904 // cannot contain gtids
2905 if (ret != GOT_GTIDS && ret != GOT_PREVIOUS_GTIDS)
2906 done= true;
2907 break;
2908 }
2909 if (ev != fd_ev_p)
2910 delete ev;
2911 DBUG_PRINT("info", ("done=%d", done));
2912 }
2913
2914 if (log.error < 0)
2915 {
2916 // This is not a fatal error; the log may just be truncated.
2917
2918 // @todo but what other errors could happen? IO error?
2919 sql_print_warning("Error reading GTIDs from binary log: %d", log.error);
2920 }
2921
2922 if (fd_ev_p != &fd_ev)
2923 {
2924 delete fd_ev_p;
2925 fd_ev_p= &fd_ev;
2926 }
2927
2928 mysql_file_close(file, MYF(MY_WME));
2929 end_io_cache(&log);
2930
2931 DBUG_PRINT("info", ("returning %d", ret));
2932 DBUG_RETURN(ret);
2933 }
2934
find_first_log_not_in_gtid_set(char * binlog_file_name,const Gtid_set * gtid_set,Gtid * first_gtid,const char ** errmsg)2935 bool MYSQL_BIN_LOG::find_first_log_not_in_gtid_set(char *binlog_file_name,
2936 const Gtid_set *gtid_set,
2937 Gtid *first_gtid,
2938 const char **errmsg)
2939 {
2940 DBUG_ENTER("MYSQL_BIN_LOG::gtid_read_start_binlog");
2941 /*
2942 Gather the set of files to be accessed.
2943 */
2944 list<string> filename_list;
2945 LOG_INFO linfo;
2946 int error;
2947
2948 list<string>::reverse_iterator rit;
2949 Gtid_set previous_gtid_set(gtid_set->get_sid_map());
2950
2951 mysql_mutex_lock(&LOCK_index);
2952 for (error= find_log_pos(&linfo, NULL, false/*need_lock_index=false*/);
2953 !error; error= find_next_log(&linfo, false/*need_lock_index=false*/))
2954 {
2955 DBUG_PRINT("info", ("read log filename '%s'", linfo.log_file_name));
2956 filename_list.push_back(string(linfo.log_file_name));
2957 }
2958 mysql_mutex_unlock(&LOCK_index);
2959 if (error != LOG_INFO_EOF)
2960 {
2961 *errmsg= "Failed to read the binary log index file while "
2962 "looking for the oldest binary log that contains any GTID "
2963 "that is not in the given gtid set";
2964 error= -1;
2965 goto end;
2966 }
2967
2968 if (filename_list.empty())
2969 {
2970 *errmsg= "Could not find first log file name in binary log index file "
2971 "while looking for the oldest binary log that contains any GTID "
2972 "that is not in the given gtid set";
2973 error= -2;
2974 goto end;
2975 }
2976
2977 /*
2978 Iterate over all the binary logs in reverse order, and read only
2979 the Previous_gtids_log_event, to find the first one, that is the
2980 subset of the given gtid set. Since every binary log begins with
2981 a Previous_gtids_log_event, that contains all GTIDs in all
2982 previous binary logs.
2983 We also ask for the first GTID in the binary log to know if we
2984 should send the FD event with the "created" field cleared or not.
2985 */
2986 DBUG_PRINT("info", ("Iterating backwards through binary logs, and reading "
2987 "only the Previous_gtids_log_event, to find the first "
2988 "one, that is the subset of the given gtid set."));
2989 rit= filename_list.rbegin();
2990 error= 0;
2991 while (rit != filename_list.rend())
2992 {
2993 previous_gtid_set.clear();
2994 const char *filename= rit->c_str();
2995 DBUG_PRINT("info", ("Read Previous_gtids_log_event from filename='%s'",
2996 filename));
2997 switch (read_gtids_from_binlog(filename, NULL, &previous_gtid_set,
2998 first_gtid, NULL/* last_gtid */,
2999 previous_gtid_set.get_sid_map(),
3000 opt_master_verify_checksum))
3001 {
3002 case ERROR:
3003 *errmsg= "Error reading header of binary log while looking for "
3004 "the oldest binary log that contains any GTID that is not in "
3005 "the given gtid set";
3006 error= -3;
3007 goto end;
3008 case NO_GTIDS:
3009 *errmsg= "Found old binary log without GTIDs while looking for "
3010 "the oldest binary log that contains any GTID that is not in "
3011 "the given gtid set";
3012 error= -4;
3013 goto end;
3014 case GOT_GTIDS:
3015 case GOT_PREVIOUS_GTIDS:
3016 if (previous_gtid_set.is_subset(gtid_set))
3017 {
3018 strcpy(binlog_file_name, filename);
3019 /*
3020 Verify that the selected binlog is not the first binlog,
3021 */
3022 DBUG_EXECUTE_IF("slave_reconnect_with_gtid_set_executed",
3023 DBUG_ASSERT(strcmp(filename_list.begin()->c_str(),
3024 binlog_file_name) != 0););
3025 goto end;
3026 }
3027 case TRUNCATED:
3028 break;
3029 }
3030
3031 rit++;
3032 }
3033
3034 if (rit == filename_list.rend())
3035 {
3036 report_missing_gtids(&previous_gtid_set, gtid_set, errmsg);
3037 error= -5;
3038 }
3039
3040 end:
3041 if (error)
3042 DBUG_PRINT("error", ("'%s'", *errmsg));
3043 filename_list.clear();
3044 DBUG_PRINT("info", ("returning %d", error));
3045 DBUG_RETURN(error != 0 ? true : false);
3046 }
3047
init_gtid_sets(Gtid_set * all_gtids,Gtid_set * lost_gtids,Gtid * last_gtid,bool verify_checksum,bool need_lock,bool is_server_starting)3048 bool MYSQL_BIN_LOG::init_gtid_sets(Gtid_set *all_gtids, Gtid_set *lost_gtids,
3049 Gtid *last_gtid, bool verify_checksum,
3050 bool need_lock, bool is_server_starting)
3051 {
3052 DBUG_ENTER("MYSQL_BIN_LOG::init_gtid_sets");
3053 DBUG_PRINT("info", ("lost_gtids=%p; so we are recovering a %s log",
3054 lost_gtids, lost_gtids == NULL ? "relay" : "binary"));
3055
3056 /*
3057 Acquires the necessary locks to ensure that logs are not either
3058 removed or updated when we are reading from it.
3059 */
3060 if (need_lock)
3061 {
3062 // We don't need LOCK_log if we are only going to read the initial
3063 // Prevoius_gtids_log_event and ignore the Gtid_log_events.
3064 if (all_gtids != NULL)
3065 mysql_mutex_lock(&LOCK_log);
3066 mysql_mutex_lock(&LOCK_index);
3067 global_sid_lock->wrlock();
3068 }
3069 else
3070 {
3071 if (all_gtids != NULL)
3072 mysql_mutex_assert_owner(&LOCK_log);
3073 mysql_mutex_assert_owner(&LOCK_index);
3074 global_sid_lock->assert_some_wrlock();
3075 }
3076
3077 // Gather the set of files to be accessed.
3078 list<string> filename_list;
3079 LOG_INFO linfo;
3080 int error;
3081
3082 list<string>::iterator it;
3083 list<string>::reverse_iterator rit;
3084 bool reached_first_file= false;
3085
3086 /* Initialize the sid_map to be used in read_gtids_from_binlog */
3087 Sid_map *sid_map= NULL;
3088 if (all_gtids)
3089 sid_map= all_gtids->get_sid_map();
3090 else if (lost_gtids)
3091 sid_map= lost_gtids->get_sid_map();
3092
3093 for (error= find_log_pos(&linfo, NULL, false/*need_lock_index=false*/); !error;
3094 error= find_next_log(&linfo, false/*need_lock_index=false*/))
3095 {
3096 DBUG_PRINT("info", ("read log filename '%s'", linfo.log_file_name));
3097 filename_list.push_back(string(linfo.log_file_name));
3098 }
3099 if (error != LOG_INFO_EOF)
3100 {
3101 DBUG_PRINT("error", ("Error reading binlog index"));
3102 goto end;
3103 }
3104 /*
3105 On server starting, one new empty binlog file is created and
3106 its file name is put into index file before initializing
3107 GLOBAL.GTID_EXECUTED AND GLOBAL.GTID_PURGED, it is not the
3108 last binlog file before the server restarts, so we remove
3109 its file name from filename_list.
3110 */
3111 if (is_server_starting && !is_relay_log && !filename_list.empty())
3112 filename_list.pop_back();
3113
3114 error= 0;
3115
3116 if (all_gtids != NULL)
3117 {
3118 DBUG_PRINT("info", ("Iterating backwards through binary logs, looking for the last binary log that contains a Previous_gtids_log_event."));
3119 // Iterate over all files in reverse order until we find one that
3120 // contains a Previous_gtids_log_event.
3121 rit= filename_list.rbegin();
3122 bool got_gtids= false;
3123 reached_first_file= (rit == filename_list.rend());
3124 DBUG_PRINT("info", ("filename='%s' reached_first_file=%d",
3125 rit->c_str(), reached_first_file));
3126 while ((!got_gtids || (last_gtid && last_gtid->empty()))
3127 && !reached_first_file)
3128 {
3129 const char *filename= rit->c_str();
3130 rit++;
3131 reached_first_file= (rit == filename_list.rend());
3132 DBUG_PRINT("info", ("filename='%s' got_gtids=%d reached_first_file=%d",
3133 filename, got_gtids, reached_first_file));
3134 switch (read_gtids_from_binlog(filename, got_gtids ? NULL : all_gtids,
3135 reached_first_file ? lost_gtids : NULL,
3136 NULL/* first_gtid */, last_gtid,
3137 sid_map, verify_checksum))
3138 {
3139 case ERROR:
3140 {
3141 error= 1;
3142 goto end;
3143 }
3144 case GOT_GTIDS:
3145 case GOT_PREVIOUS_GTIDS:
3146 {
3147 got_gtids= true;
3148 break;
3149 }
3150 case NO_GTIDS:
3151 {
3152 /*
3153 If the binlog_gtid_simple_recovery is enabled, and the
3154 last binary log does not contain any GTID event, do not
3155 read any more binary logs, GLOBAL.GTID_EXECUTED and
3156 GLOBAL.GTID_PURGED should be empty in the case. Otherwise,
3157 initialize GTID_EXECUTED as usual.
3158 */
3159 if (binlog_gtid_simple_recovery && !is_relay_log)
3160 {
3161 DBUG_ASSERT(all_gtids->is_empty() && lost_gtids->is_empty());
3162 goto end;
3163 }
3164 /*FALLTHROUGH*/
3165 }
3166 case TRUNCATED:
3167 {
3168 break;
3169 }
3170 }
3171 }
3172 }
3173 if (lost_gtids != NULL && !reached_first_file)
3174 {
3175 DBUG_PRINT("info", ("Iterating forwards through binary logs, looking for the first binary log that contains a Previous_gtids_log_event."));
3176 for (it= filename_list.begin(); it != filename_list.end(); it++)
3177 {
3178 const char *filename= it->c_str();
3179 DBUG_PRINT("info", ("filename='%s'", filename));
3180 switch (read_gtids_from_binlog(filename, NULL, lost_gtids,
3181 NULL/* first_gtid */, NULL/* last_gtid */,
3182 sid_map, verify_checksum))
3183 {
3184 case ERROR:
3185 {
3186 error= 1;
3187 /*FALLTHROUGH*/
3188 }
3189 case GOT_GTIDS:
3190 {
3191 goto end;
3192 }
3193 case NO_GTIDS:
3194 {
3195 /*
3196 If the binlog_gtid_simple_recovery is enabled, and the
3197 first binary log does not contain any GTID event, do not
3198 read any more binary logs, GLOBAL.GTID_PURGED should be
3199 empty in the case.
3200 */
3201 if (binlog_gtid_simple_recovery && !is_relay_log)
3202 {
3203 DBUG_ASSERT(lost_gtids->is_empty());
3204 goto end;
3205 }
3206 /*FALLTHROUGH*/
3207 }
3208 case GOT_PREVIOUS_GTIDS:
3209 case TRUNCATED:
3210 {
3211 break;
3212 }
3213 }
3214 }
3215 }
3216 end:
3217 if (all_gtids)
3218 all_gtids->dbug_print("all_gtids");
3219 if (lost_gtids)
3220 lost_gtids->dbug_print("lost_gtids");
3221 if (need_lock)
3222 {
3223 global_sid_lock->unlock();
3224 mysql_mutex_unlock(&LOCK_index);
3225 if (all_gtids != NULL)
3226 mysql_mutex_unlock(&LOCK_log);
3227 }
3228 filename_list.clear();
3229 DBUG_PRINT("info", ("returning %d", error));
3230 DBUG_RETURN(error != 0 ? true : false);
3231 }
3232
3233
3234 /**
3235 Open a (new) binlog file.
3236
3237 - Open the log file and the index file. Register the new
3238 file name in it
3239 - When calling this when the file is in use, you must have a locks
3240 on LOCK_log and LOCK_index.
3241
3242 @retval
3243 0 ok
3244 @retval
3245 1 error
3246 */
3247
open_binlog(const char * log_name,const char * new_name,enum cache_type io_cache_type_arg,ulong max_size_arg,bool null_created_arg,bool need_lock_log,bool need_lock_index,bool need_sid_lock,Format_description_log_event * extra_description_event)3248 bool MYSQL_BIN_LOG::open_binlog(const char *log_name,
3249 const char *new_name,
3250 enum cache_type io_cache_type_arg,
3251 ulong max_size_arg,
3252 bool null_created_arg,
3253 bool need_lock_log,
3254 bool need_lock_index,
3255 bool need_sid_lock,
3256 Format_description_log_event *extra_description_event)
3257 {
3258
3259 // lock_index must be acquired *before* sid_lock.
3260 DBUG_ASSERT(need_sid_lock || !need_lock_index);
3261 DBUG_ENTER("MYSQL_BIN_LOG::open_binlog(const char *, ...)");
3262 DBUG_PRINT("enter",("name: %s", log_name));
3263
3264 if (init_and_set_log_file_name(log_name, new_name, LOG_BIN,
3265 io_cache_type_arg))
3266 {
3267 sql_print_error("MYSQL_BIN_LOG::open failed to generate new file name.");
3268 DBUG_RETURN(1);
3269 }
3270
3271 #ifdef HAVE_REPLICATION
3272 if (open_purge_index_file(TRUE) ||
3273 register_create_index_entry(log_file_name) ||
3274 sync_purge_index_file() ||
3275 DBUG_EVALUATE_IF("fault_injection_registering_index", 1, 0))
3276 {
3277 /**
3278 @todo: although this was introduced to appease valgrind
3279 when injecting emulated faults using fault_injection_registering_index
3280 it may be good to consider what actually happens when
3281 open_purge_index_file succeeds but register or sync fails.
3282
3283 Perhaps we might need the code below in MYSQL_LOG_BIN::cleanup
3284 for "real life" purposes as well?
3285 */
3286 DBUG_EXECUTE_IF("fault_injection_registering_index", {
3287 if (my_b_inited(&purge_index_file))
3288 {
3289 end_io_cache(&purge_index_file);
3290 my_close(purge_index_file.file, MYF(0));
3291 }
3292 });
3293
3294 sql_print_error("MYSQL_BIN_LOG::open failed to sync the index file.");
3295 DBUG_RETURN(1);
3296 }
3297 DBUG_EXECUTE_IF("crash_create_non_critical_before_update_index", DBUG_SUICIDE(););
3298 #endif
3299
3300 write_error= 0;
3301
3302 /* open the main log file */
3303 if (MYSQL_LOG::open(
3304 #ifdef HAVE_PSI_INTERFACE
3305 m_key_file_log,
3306 #endif
3307 log_name, LOG_BIN, new_name, io_cache_type_arg))
3308 {
3309 #ifdef HAVE_REPLICATION
3310 close_purge_index_file();
3311 #endif
3312 DBUG_RETURN(1); /* all warnings issued */
3313 }
3314
3315 max_size= max_size_arg;
3316
3317 open_count++;
3318
3319 bool write_file_name_to_index_file=0;
3320
3321 /* This must be before goto err. */
3322 Format_description_log_event s(BINLOG_VERSION);
3323
3324 if (!my_b_filelength(&log_file))
3325 {
3326 /*
3327 The binary log file was empty (probably newly created)
3328 This is the normal case and happens when the user doesn't specify
3329 an extension for the binary log files.
3330 In this case we write a standard header to it.
3331 */
3332 if (my_b_safe_write(&log_file, (uchar*) BINLOG_MAGIC,
3333 BIN_LOG_HEADER_SIZE))
3334 goto err;
3335 bytes_written+= BIN_LOG_HEADER_SIZE;
3336 write_file_name_to_index_file= 1;
3337 }
3338
3339 /*
3340 don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
3341 as we won't be able to reset it later
3342 */
3343 if (io_cache_type == WRITE_CACHE)
3344 s.flags |= LOG_EVENT_BINLOG_IN_USE_F;
3345 s.checksum_alg= is_relay_log ?
3346 /* relay-log */
3347 /* inherit master's A descriptor if one has been received */
3348 (relay_log_checksum_alg=
3349 (relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF) ?
3350 relay_log_checksum_alg :
3351 /* otherwise use slave's local preference of RL events verification */
3352 (opt_slave_sql_verify_checksum == 0) ?
3353 (uint8) BINLOG_CHECKSUM_ALG_OFF : binlog_checksum_options):
3354 /* binlog */
3355 binlog_checksum_options;
3356 DBUG_ASSERT(s.checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
3357 if (!s.is_valid())
3358 goto err;
3359 s.dont_set_created= null_created_arg;
3360 /* Set LOG_EVENT_RELAY_LOG_F flag for relay log's FD */
3361 if (is_relay_log)
3362 s.set_relay_log_event();
3363 if (s.write(&log_file))
3364 goto err;
3365 bytes_written+= s.data_written;
3366 /*
3367 We need to revisit this code and improve it.
3368 See further comments in the mysqld.
3369 /Alfranio
3370 */
3371 if (current_thd && gtid_mode > 0)
3372 {
3373 if (need_sid_lock)
3374 global_sid_lock->wrlock();
3375 else
3376 global_sid_lock->assert_some_wrlock();
3377 Previous_gtids_log_event prev_gtids_ev(previous_gtid_set);
3378 if (is_relay_log)
3379 prev_gtids_ev.set_relay_log_event();
3380 if (need_sid_lock)
3381 global_sid_lock->unlock();
3382 prev_gtids_ev.checksum_alg= s.checksum_alg;
3383 if (prev_gtids_ev.write(&log_file))
3384 goto err;
3385 bytes_written+= prev_gtids_ev.data_written;
3386 }
3387 if (extra_description_event &&
3388 extra_description_event->binlog_version>=4)
3389 {
3390 /*
3391 This is a relay log written to by the I/O slave thread.
3392 Write the event so that others can later know the format of this relay
3393 log.
3394 Note that this event is very close to the original event from the
3395 master (it has binlog version of the master, event types of the
3396 master), so this is suitable to parse the next relay log's event. It
3397 has been produced by
3398 Format_description_log_event::Format_description_log_event(char* buf,).
3399 Why don't we want to write the mi_description_event if this
3400 event is for format<4 (3.23 or 4.x): this is because in that case, the
3401 mi_description_event describes the data received from the
3402 master, but not the data written to the relay log (*conversion*),
3403 which is in format 4 (slave's).
3404 */
3405 /*
3406 Set 'created' to 0, so that in next relay logs this event does not
3407 trigger cleaning actions on the slave in
3408 Format_description_log_event::apply_event_impl().
3409 */
3410 extra_description_event->created= 0;
3411 /* Don't set log_pos in event header */
3412 extra_description_event->set_artificial_event();
3413
3414 if (extra_description_event->write(&log_file))
3415 goto err;
3416 bytes_written+= extra_description_event->data_written;
3417 }
3418 if (flush_io_cache(&log_file) ||
3419 mysql_file_sync(log_file.file, MYF(MY_WME)))
3420 goto err;
3421
3422 if (write_file_name_to_index_file)
3423 {
3424 #ifdef HAVE_REPLICATION
3425 DBUG_EXECUTE_IF("crash_create_critical_before_update_index", DBUG_SUICIDE(););
3426 #endif
3427
3428 DBUG_ASSERT(my_b_inited(&index_file) != 0);
3429
3430 /*
3431 The new log file name is appended into crash safe index file after
3432 all the content of index file is copyed into the crash safe index
3433 file. Then move the crash safe index file to index file.
3434 */
3435 DBUG_EXECUTE_IF("simulate_disk_full_on_open_binlog",
3436 {DBUG_SET("+d,simulate_no_free_space_error");});
3437 if (DBUG_EVALUATE_IF("fault_injection_updating_index", 1, 0) ||
3438 add_log_to_index((uchar*) log_file_name, strlen(log_file_name),
3439 need_lock_index))
3440 {
3441 DBUG_EXECUTE_IF("simulate_disk_full_on_open_binlog",
3442 {
3443 DBUG_SET("-d,simulate_file_write_error");
3444 DBUG_SET("-d,simulate_no_free_space_error");
3445 DBUG_SET("-d,simulate_disk_full_on_open_binlog");
3446 });
3447 goto err;
3448 }
3449
3450 #ifdef HAVE_REPLICATION
3451 DBUG_EXECUTE_IF("crash_create_after_update_index", DBUG_SUICIDE(););
3452 #endif
3453 }
3454
3455 log_state= LOG_OPENED;
3456
3457 #ifdef HAVE_REPLICATION
3458 close_purge_index_file();
3459 #endif
3460
3461 DBUG_RETURN(0);
3462
3463 err:
3464 #ifdef HAVE_REPLICATION
3465 if (is_inited_purge_index_file())
3466 purge_index_entry(NULL, NULL, need_lock_index);
3467 close_purge_index_file();
3468 #endif
3469
3470 if (binlog_error_action == ABORT_SERVER)
3471 {
3472 exec_binlog_error_action_abort("Either disk is full or file system is read "
3473 "only while opening the binlog. Aborting the"
3474 " server.");
3475 }
3476 else
3477 {
3478 sql_print_error("Could not use %s for logging (error %d). "
3479 "Turning logging off for the whole duration of the MySQL "
3480 "server process. To turn it on again: fix the cause, "
3481 "shutdown the MySQL server and restart it.",
3482 (new_name) ? new_name : name, errno);
3483 close(LOG_CLOSE_INDEX, need_lock_log, need_lock_index);
3484 }
3485 DBUG_RETURN(1);
3486 }
3487
3488
3489 /**
3490 Move crash safe index file to index file.
3491
3492 @param need_lock_index If true, LOCK_index will be acquired;
3493 otherwise it should already be held.
3494
3495 @retval 0 ok
3496 @retval -1 error
3497 */
move_crash_safe_index_file_to_index_file(bool need_lock_index)3498 int MYSQL_BIN_LOG::move_crash_safe_index_file_to_index_file(bool need_lock_index)
3499 {
3500 int error= 0;
3501 File fd= -1;
3502 DBUG_ENTER("MYSQL_BIN_LOG::move_crash_safe_index_file_to_index_file");
3503 int failure_trials= MYSQL_BIN_LOG::MAX_RETRIES_FOR_DELETE_RENAME_FAILURE;
3504 bool file_rename_status= false, file_delete_status= false;
3505 THD *thd= current_thd;
3506
3507 if (need_lock_index)
3508 mysql_mutex_lock(&LOCK_index);
3509 else
3510 mysql_mutex_assert_owner(&LOCK_index);
3511
3512 if (my_b_inited(&index_file))
3513 {
3514 end_io_cache(&index_file);
3515 if (mysql_file_close(index_file.file, MYF(0)) < 0)
3516 {
3517 error= -1;
3518 sql_print_error("While rebuilding index file %s: "
3519 "Failed to close the index file.", index_file_name);
3520 /*
3521 Delete Crash safe index file here and recover the binlog.index
3522 state(index_file io_cache) from old binlog.index content.
3523 */
3524 mysql_file_delete(key_file_binlog_index, crash_safe_index_file_name,
3525 MYF(0));
3526
3527 goto recoverable_err;
3528 }
3529
3530 /*
3531 Sometimes an outsider can lock index files for temporary viewing
3532 purpose. For eg: MEB locks binlog.index/relaylog.index to view
3533 the content of the file. During that small period of time, deletion
3534 of the file is not possible on some platforms(Eg: Windows)
3535 Server should retry the delete operation for few times instead of panicking
3536 immediately.
3537 */
3538 while ((file_delete_status == false) && (failure_trials > 0))
3539 {
3540 if (DBUG_EVALUATE_IF("force_index_file_delete_failure", 1, 0)) break;
3541
3542 DBUG_EXECUTE_IF("simulate_index_file_delete_failure",
3543 {
3544 /* This simulation causes the delete to fail */
3545 static char first_char= index_file_name[0];
3546 index_file_name[0]= 0;
3547 sql_print_information("Retrying delete");
3548 if (failure_trials == 1)
3549 index_file_name[0]= first_char;
3550 };);
3551 file_delete_status = !(mysql_file_delete(key_file_binlog_index,
3552 index_file_name, MYF(MY_WME)));
3553 --failure_trials;
3554 if (!file_delete_status)
3555 {
3556 my_sleep(1000);
3557 /* Clear the error before retrying. */
3558 if (failure_trials > 0)
3559 thd->clear_error();
3560 }
3561 }
3562
3563 if (!file_delete_status)
3564 {
3565 error= -1;
3566 sql_print_error("While rebuilding index file %s: "
3567 "Failed to delete the existing index file. It could be "
3568 "that file is being used by some other process.",
3569 index_file_name);
3570 /*
3571 Delete Crash safe file index file here and recover the binlog.index
3572 state(index_file io_cache) from old binlog.index content.
3573 */
3574 mysql_file_delete(key_file_binlog_index, crash_safe_index_file_name,
3575 MYF(0));
3576
3577 goto recoverable_err;
3578 }
3579 }
3580
3581 DBUG_EXECUTE_IF("crash_create_before_rename_index_file", DBUG_SUICIDE(););
3582 /*
3583 Sometimes an outsider can lock index files for temporary viewing
3584 purpose. For eg: MEB locks binlog.index/relaylog.index to view
3585 the content of the file. During that small period of time, rename
3586 of the file is not possible on some platforms(Eg: Windows)
3587 Server should retry the rename operation for few times instead of panicking
3588 immediately.
3589 */
3590 failure_trials = MYSQL_BIN_LOG::MAX_RETRIES_FOR_DELETE_RENAME_FAILURE;
3591 while ((file_rename_status == false) && (failure_trials > 0))
3592 {
3593 DBUG_EXECUTE_IF("simulate_crash_safe_index_file_rename_failure",
3594 {
3595 /* This simulation causes the rename to fail */
3596 static char first_char= index_file_name[0];
3597 index_file_name[0]= 0;
3598 sql_print_information("Retrying rename");
3599 if (failure_trials == 1)
3600 index_file_name[0]= first_char;
3601 };);
3602 file_rename_status =
3603 !(my_rename(crash_safe_index_file_name, index_file_name, MYF(MY_WME)));
3604 --failure_trials;
3605 if (!file_rename_status)
3606 {
3607 my_sleep(1000);
3608 /* Clear the error before retrying. */
3609 if (failure_trials > 0)
3610 thd->clear_error();
3611 }
3612 }
3613 if (!file_rename_status)
3614 {
3615 error= -1;
3616 sql_print_error("While rebuilding index file %s: "
3617 "Failed to rename the new index file to the existing "
3618 "index file.", index_file_name);
3619 goto fatal_err;
3620 }
3621 DBUG_EXECUTE_IF("crash_create_after_rename_index_file", DBUG_SUICIDE(););
3622
3623 recoverable_err:
3624 if ((fd= mysql_file_open(key_file_binlog_index,
3625 index_file_name,
3626 O_RDWR | O_CREAT | O_BINARY,
3627 MYF(MY_WME))) < 0 ||
3628 mysql_file_sync(fd, MYF(MY_WME)) ||
3629 init_io_cache(&index_file, fd, IO_SIZE, READ_CACHE,
3630 mysql_file_seek(fd, 0L, MY_SEEK_END, MYF(0)),
3631 0, MYF(MY_WME | MY_WAIT_IF_FULL)))
3632 {
3633 sql_print_error("After rebuilding the index file %s: "
3634 "Failed to open the index file.", index_file_name);
3635 goto fatal_err;
3636 }
3637
3638 if (need_lock_index)
3639 mysql_mutex_unlock(&LOCK_index);
3640 DBUG_RETURN(error);
3641
3642 fatal_err:
3643 /*
3644 This situation is very very rare to happen (unless there is some serious
3645 memory related issues like OOM) and should be treated as fatal error.
3646 Hence it is better to bring down the server without respecting
3647 'binlog_error_action' value here.
3648 */
3649 exec_binlog_error_action_abort("MySQL server failed to update the "
3650 "binlog.index file's content properly. "
3651 "It might not be in sync with available "
3652 "binlogs and the binlog.index file state is in "
3653 "unrecoverable state. Aborting the server.");
3654 /*
3655 Server is aborted in the above function.
3656 This is dead code to make compiler happy.
3657 */
3658 DBUG_RETURN(error);
3659 }
3660
3661
3662 /**
3663 Append log file name to index file.
3664
3665 - To make crash safe, we copy all the content of index file
3666 to crash safe index file firstly and then append the log
3667 file name to the crash safe index file. Finally move the
3668 crash safe index file to index file.
3669
3670 @retval
3671 0 ok
3672 @retval
3673 -1 error
3674 */
add_log_to_index(uchar * log_name,int log_name_len,bool need_lock_index)3675 int MYSQL_BIN_LOG::add_log_to_index(uchar* log_name,
3676 int log_name_len, bool need_lock_index)
3677 {
3678 DBUG_ENTER("MYSQL_BIN_LOG::add_log_to_index");
3679
3680 if (open_crash_safe_index_file())
3681 {
3682 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
3683 "open the crash safe index file.");
3684 goto err;
3685 }
3686
3687 if (copy_file(&index_file, &crash_safe_index_file, 0))
3688 {
3689 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
3690 "copy index file to crash safe index file.");
3691 goto err;
3692 }
3693
3694 if (my_b_write(&crash_safe_index_file, log_name, log_name_len) ||
3695 my_b_write(&crash_safe_index_file, (uchar*) "\n", 1) ||
3696 flush_io_cache(&crash_safe_index_file) ||
3697 mysql_file_sync(crash_safe_index_file.file, MYF(MY_WME)))
3698 {
3699 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
3700 "append log file name: %s, to crash "
3701 "safe index file.", log_name);
3702 goto err;
3703 }
3704
3705 if (close_crash_safe_index_file())
3706 {
3707 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
3708 "close the crash safe index file.");
3709 goto err;
3710 }
3711
3712 if (move_crash_safe_index_file_to_index_file(need_lock_index))
3713 {
3714 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
3715 "move crash safe index file to index file.");
3716 goto err;
3717 }
3718
3719 DBUG_RETURN(0);
3720
3721 err:
3722 DBUG_RETURN(-1);
3723 }
3724
get_current_log(LOG_INFO * linfo,bool need_lock_log)3725 int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo, bool need_lock_log/*true*/)
3726 {
3727 if (need_lock_log)
3728 mysql_mutex_lock(&LOCK_log);
3729 int ret = raw_get_current_log(linfo);
3730 if (need_lock_log)
3731 mysql_mutex_unlock(&LOCK_log);
3732 return ret;
3733 }
3734
raw_get_current_log(LOG_INFO * linfo)3735 int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
3736 {
3737 strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1);
3738 linfo->pos = my_b_safe_tell(&log_file);
3739 return 0;
3740 }
3741
check_write_error(THD * thd)3742 bool MYSQL_BIN_LOG::check_write_error(THD *thd)
3743 {
3744 DBUG_ENTER("MYSQL_BIN_LOG::check_write_error");
3745
3746 bool checked= FALSE;
3747
3748 if (!thd->is_error())
3749 DBUG_RETURN(checked);
3750
3751 switch (thd->get_stmt_da()->sql_errno())
3752 {
3753 case ER_TRANS_CACHE_FULL:
3754 case ER_STMT_CACHE_FULL:
3755 case ER_ERROR_ON_WRITE:
3756 case ER_BINLOG_LOGGING_IMPOSSIBLE:
3757 checked= TRUE;
3758 break;
3759 }
3760 DBUG_PRINT("return", ("checked: %s", YESNO(checked)));
3761 DBUG_RETURN(checked);
3762 }
3763
set_write_error(THD * thd,bool is_transactional)3764 void MYSQL_BIN_LOG::set_write_error(THD *thd, bool is_transactional)
3765 {
3766 DBUG_ENTER("MYSQL_BIN_LOG::set_write_error");
3767
3768 write_error= 1;
3769
3770 if (check_write_error(thd))
3771 DBUG_VOID_RETURN;
3772
3773 if (my_errno == EFBIG)
3774 {
3775 if (is_transactional)
3776 {
3777 my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(MY_WME));
3778 }
3779 else
3780 {
3781 my_message(ER_STMT_CACHE_FULL, ER(ER_STMT_CACHE_FULL), MYF(MY_WME));
3782 }
3783 }
3784 else
3785 {
3786 char errbuf[MYSYS_STRERROR_SIZE];
3787 my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), name,
3788 errno, my_strerror(errbuf, sizeof(errbuf), errno));
3789 }
3790
3791 DBUG_VOID_RETURN;
3792 }
3793
3794 /**
3795 Find the position in the log-index-file for the given log name.
3796
3797 @param[out] linfo The found log file name will be stored here, along
3798 with the byte offset of the next log file name in the index file.
3799 @param log_name Filename to find in the index file, or NULL if we
3800 want to read the first entry.
3801 @param need_lock_index If false, this function acquires LOCK_index;
3802 otherwise the lock should already be held by the caller.
3803
3804 @note
3805 On systems without the truncate function the file will end with one or
3806 more empty lines. These will be ignored when reading the file.
3807
3808 @retval
3809 0 ok
3810 @retval
3811 LOG_INFO_EOF End of log-index-file found
3812 @retval
3813 LOG_INFO_IO Got IO error while reading file
3814 */
3815
find_log_pos(LOG_INFO * linfo,const char * log_name,bool need_lock_index)3816 int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
3817 bool need_lock_index)
3818 {
3819 int error= 0;
3820 char *full_fname= linfo->log_file_name;
3821 char full_log_name[FN_REFLEN], fname[FN_REFLEN];
3822 uint log_name_len= 0, fname_len= 0;
3823 DBUG_ENTER("find_log_pos");
3824 full_log_name[0]= full_fname[0]= 0;
3825
3826 /*
3827 Mutex needed because we need to make sure the file pointer does not
3828 move from under our feet
3829 */
3830 if (need_lock_index)
3831 mysql_mutex_lock(&LOCK_index);
3832 else
3833 mysql_mutex_assert_owner(&LOCK_index);
3834
3835 if (!my_b_inited(&index_file))
3836 {
3837 error= LOG_INFO_IO;
3838 goto end;
3839 }
3840
3841 // extend relative paths for log_name to be searched
3842 if (log_name)
3843 {
3844 if(normalize_binlog_name(full_log_name, log_name, is_relay_log))
3845 {
3846 error= LOG_INFO_EOF;
3847 goto end;
3848 }
3849 }
3850
3851 log_name_len= log_name ? (uint) strlen(full_log_name) : 0;
3852 DBUG_PRINT("enter", ("log_name: %s, full_log_name: %s",
3853 log_name ? log_name : "NULL", full_log_name));
3854
3855 /* As the file is flushed, we can't get an error here */
3856 my_b_seek(&index_file, (my_off_t) 0);
3857
3858 for (;;)
3859 {
3860 uint length;
3861 my_off_t offset= my_b_tell(&index_file);
3862
3863 DBUG_EXECUTE_IF("simulate_find_log_pos_error",
3864 error= LOG_INFO_EOF; break;);
3865 /* If we get 0 or 1 characters, this is the end of the file */
3866 if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
3867 {
3868 /* Did not find the given entry; Return not found or error */
3869 error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
3870 break;
3871 }
3872
3873 // extend relative paths and match against full path
3874 if (normalize_binlog_name(full_fname, fname, is_relay_log))
3875 {
3876 error= LOG_INFO_EOF;
3877 break;
3878 }
3879 fname_len= (uint) strlen(full_fname);
3880
3881 // if the log entry matches, null string matching anything
3882 if (!log_name ||
3883 (log_name_len == fname_len-1 && full_fname[log_name_len] == '\n' &&
3884 !strncmp(full_fname, full_log_name, log_name_len)))
3885 {
3886 DBUG_PRINT("info", ("Found log file entry"));
3887 full_fname[fname_len-1]= 0; // remove last \n
3888 linfo->index_file_start_offset= offset;
3889 linfo->index_file_offset = my_b_tell(&index_file);
3890 break;
3891 }
3892 linfo->entry_index++;
3893 }
3894
3895 end:
3896 if (need_lock_index)
3897 mysql_mutex_unlock(&LOCK_index);
3898 DBUG_RETURN(error);
3899 }
3900
3901
3902 /**
3903 Find the position in the log-index-file for the given log name.
3904
3905 @param[out] linfo The filename will be stored here, along with the
3906 byte offset of the next filename in the index file.
3907
3908 @param need_lock_index If true, LOCK_index will be acquired;
3909 otherwise it should already be held by the caller.
3910
3911 @note
3912 - Before calling this function, one has to call find_log_pos()
3913 to set up 'linfo'
3914 - Mutex needed because we need to make sure the file pointer does not move
3915 from under our feet
3916
3917 @retval 0 ok
3918 @retval LOG_INFO_EOF End of log-index-file found
3919 @retval LOG_INFO_IO Got IO error while reading file
3920 */
find_next_log(LOG_INFO * linfo,bool need_lock_index)3921 int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock_index)
3922 {
3923 int error= 0;
3924 uint length;
3925 char fname[FN_REFLEN];
3926 char *full_fname= linfo->log_file_name;
3927
3928 if (need_lock_index)
3929 mysql_mutex_lock(&LOCK_index);
3930 else
3931 mysql_mutex_assert_owner(&LOCK_index);
3932
3933 if (!my_b_inited(&index_file))
3934 {
3935 error= LOG_INFO_IO;
3936 goto err;
3937 }
3938 /* As the file is flushed, we can't get an error here */
3939 my_b_seek(&index_file, linfo->index_file_offset);
3940
3941 linfo->index_file_start_offset= linfo->index_file_offset;
3942 if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
3943 {
3944 error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
3945 goto err;
3946 }
3947
3948 if (fname[0] != 0)
3949 {
3950 if(normalize_binlog_name(full_fname, fname, is_relay_log))
3951 {
3952 error= LOG_INFO_EOF;
3953 goto err;
3954 }
3955 length= strlen(full_fname);
3956 }
3957
3958 full_fname[length-1]= 0; // kill \n
3959 linfo->index_file_offset= my_b_tell(&index_file);
3960
3961 err:
3962 if (need_lock_index)
3963 mysql_mutex_unlock(&LOCK_index);
3964 return error;
3965 }
3966
3967
3968 /**
3969 Removes files, as part of a RESET MASTER or RESET SLAVE statement,
3970 by deleting all logs refered to in the index file. Then, it starts
3971 writing to a new log file.
3972
3973 The new index file will only contain this file.
3974
3975 @param thd Thread
3976
3977 @note
3978 If not called from slave thread, write start event to new log
3979
3980 @retval
3981 0 ok
3982 @retval
3983 1 error
3984 */
reset_logs(THD * thd)3985 bool MYSQL_BIN_LOG::reset_logs(THD* thd)
3986 {
3987 LOG_INFO linfo;
3988 bool error=0;
3989 int err;
3990 const char* save_name;
3991 DBUG_ENTER("reset_logs");
3992
3993 /*
3994 Flush logs for storage engines, so that the last transaction
3995 is fsynced inside storage engines.
3996 */
3997 if (ha_flush_logs(NULL))
3998 DBUG_RETURN(1);
3999
4000 ha_reset_logs(thd);
4001
4002 /*
4003 We need to get both locks to be sure that no one is trying to
4004 write to the index log file.
4005 */
4006 mysql_mutex_lock(&LOCK_log);
4007 mysql_mutex_lock(&LOCK_index);
4008
4009 /*
4010 The following mutex is needed to ensure that no threads call
4011 'delete thd' as we would then risk missing a 'rollback' from this
4012 thread. If the transaction involved MyISAM tables, it should go
4013 into binlog even on rollback.
4014 */
4015 mysql_mutex_lock(&LOCK_thread_count);
4016
4017 global_sid_lock->wrlock();
4018
4019 /* Save variables so that we can reopen the log */
4020 save_name=name;
4021 name=0; // Protect against free
4022 close(LOG_CLOSE_TO_BE_OPENED, false/*need_lock_log=false*/,
4023 false/*need_lock_index=false*/);
4024
4025 /*
4026 First delete all old log files and then update the index file.
4027 As we first delete the log files and do not use sort of logging,
4028 a crash may lead to an inconsistent state where the index has
4029 references to non-existent files.
4030
4031 We need to invert the steps and use the purge_index_file methods
4032 in order to make the operation safe.
4033 */
4034
4035 if ((err= find_log_pos(&linfo, NullS, false/*need_lock_index=false*/)) != 0)
4036 {
4037 uint errcode= purge_log_get_error_code(err);
4038 sql_print_error("Failed to locate old binlog or relay log files");
4039 my_message(errcode, ER(errcode), MYF(0));
4040 error= 1;
4041 goto err;
4042 }
4043
4044 for (;;)
4045 {
4046 if ((error= my_delete_allow_opened(linfo.log_file_name, MYF(0))) != 0)
4047 {
4048 if (my_errno == ENOENT)
4049 {
4050 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
4051 ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
4052 linfo.log_file_name);
4053 sql_print_information("Failed to delete file '%s'",
4054 linfo.log_file_name);
4055 my_errno= 0;
4056 error= 0;
4057 }
4058 else
4059 {
4060 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
4061 ER_BINLOG_PURGE_FATAL_ERR,
4062 "a problem with deleting %s; "
4063 "consider examining correspondence "
4064 "of your binlog index file "
4065 "to the actual binlog files",
4066 linfo.log_file_name);
4067 error= 1;
4068 goto err;
4069 }
4070 }
4071 if (find_next_log(&linfo, false/*need_lock_index=false*/))
4072 break;
4073 }
4074
4075 /* Start logging with a new file */
4076 close(LOG_CLOSE_INDEX | LOG_CLOSE_TO_BE_OPENED,
4077 false/*need_lock_log=false*/,
4078 false/*need_lock_index=false*/);
4079 if ((error= my_delete_allow_opened(index_file_name, MYF(0)))) // Reset (open will update)
4080 {
4081 if (my_errno == ENOENT)
4082 {
4083 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
4084 ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
4085 index_file_name);
4086 sql_print_information("Failed to delete file '%s'",
4087 index_file_name);
4088 my_errno= 0;
4089 error= 0;
4090 }
4091 else
4092 {
4093 push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
4094 ER_BINLOG_PURGE_FATAL_ERR,
4095 "a problem with deleting %s; "
4096 "consider examining correspondence "
4097 "of your binlog index file "
4098 "to the actual binlog files",
4099 index_file_name);
4100 error= 1;
4101 goto err;
4102 }
4103 }
4104
4105 #ifdef HAVE_REPLICATION
4106 if (is_relay_log)
4107 {
4108 DBUG_ASSERT(active_mi != NULL);
4109 DBUG_ASSERT(active_mi->rli != NULL);
4110 (const_cast<Gtid_set *>(active_mi->rli->get_gtid_set()))->clear();
4111 }
4112 else
4113 {
4114 gtid_state->clear();
4115 // don't clear global_sid_map because it's used by the relay log too
4116 if (gtid_state->init() != 0)
4117 goto err;
4118 }
4119 #endif
4120
4121 if (!open_index_file(index_file_name, 0, false/*need_lock_index=false*/))
4122 if ((error= open_binlog(save_name, 0, io_cache_type,
4123 max_size, false,
4124 false/*need_lock_log=false*/,
4125 false/*need_lock_index=false*/,
4126 false/*need_sid_lock=false*/,
4127 NULL)))
4128 goto err;
4129 my_free((void *) save_name);
4130
4131 err:
4132 if (error == 1)
4133 name= const_cast<char*>(save_name);
4134 global_sid_lock->unlock();
4135 mysql_mutex_unlock(&LOCK_thread_count);
4136 mysql_mutex_unlock(&LOCK_index);
4137 mysql_mutex_unlock(&LOCK_log);
4138 DBUG_RETURN(error);
4139 }
4140
4141
4142 /**
4143 Set the name of crash safe index file.
4144
4145 @retval
4146 0 ok
4147 @retval
4148 1 error
4149 */
set_crash_safe_index_file_name(const char * base_file_name)4150 int MYSQL_BIN_LOG::set_crash_safe_index_file_name(const char *base_file_name)
4151 {
4152 int error= 0;
4153 DBUG_ENTER("MYSQL_BIN_LOG::set_crash_safe_index_file_name");
4154 if (fn_format(crash_safe_index_file_name, base_file_name, mysql_data_home,
4155 ".index_crash_safe", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
4156 MY_REPLACE_EXT)) == NULL)
4157 {
4158 error= 1;
4159 sql_print_error("MYSQL_BIN_LOG::set_crash_safe_index_file_name failed "
4160 "to set file name.");
4161 }
4162 DBUG_RETURN(error);
4163 }
4164
4165
4166 /**
4167 Open a (new) crash safe index file.
4168
4169 @note
4170 The crash safe index file is a special file
4171 used for guaranteeing index file crash safe.
4172 @retval
4173 0 ok
4174 @retval
4175 1 error
4176 */
open_crash_safe_index_file()4177 int MYSQL_BIN_LOG::open_crash_safe_index_file()
4178 {
4179 int error= 0;
4180 File file= -1;
4181
4182 DBUG_ENTER("MYSQL_BIN_LOG::open_crash_safe_index_file");
4183
4184 if (!my_b_inited(&crash_safe_index_file))
4185 {
4186 if ((file= my_open(crash_safe_index_file_name, O_RDWR | O_CREAT | O_BINARY,
4187 MYF(MY_WME | ME_WAITTANG))) < 0 ||
4188 init_io_cache(&crash_safe_index_file, file, IO_SIZE, WRITE_CACHE,
4189 0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
4190 {
4191 error= 1;
4192 sql_print_error("MYSQL_BIN_LOG::open_crash_safe_index_file failed "
4193 "to open temporary index file.");
4194 }
4195 }
4196 DBUG_RETURN(error);
4197 }
4198
4199
4200 /**
4201 Close the crash safe index file.
4202
4203 @note
4204 The crash safe file is just closed, is not deleted.
4205 Because it is moved to index file later on.
4206 @retval
4207 0 ok
4208 @retval
4209 1 error
4210 */
close_crash_safe_index_file()4211 int MYSQL_BIN_LOG::close_crash_safe_index_file()
4212 {
4213 int error= 0;
4214
4215 DBUG_ENTER("MYSQL_BIN_LOG::close_crash_safe_index_file");
4216
4217 if (my_b_inited(&crash_safe_index_file))
4218 {
4219 end_io_cache(&crash_safe_index_file);
4220 error= my_close(crash_safe_index_file.file, MYF(0));
4221 }
4222 memset(&crash_safe_index_file, 0, sizeof(crash_safe_index_file));
4223
4224 DBUG_RETURN(error);
4225 }
4226
4227
4228 /**
4229 Delete relay log files prior to rli->group_relay_log_name
4230 (i.e. all logs which are not involved in a non-finished group
4231 (transaction)), remove them from the index file and start on next
4232 relay log.
4233
4234 IMPLEMENTATION
4235
4236 - You must hold rli->data_lock before calling this function, since
4237 it writes group_relay_log_pos and similar fields of
4238 Relay_log_info.
4239 - Protects index file with LOCK_index
4240 - Delete relevant relay log files
4241 - Copy all file names after these ones to the front of the index file
4242 - If the OS has truncate, truncate the file, else fill it with \n'
4243 - Read the next file name from the index file and store in rli->linfo
4244
4245 @param rli Relay log information
4246 @param included If false, all relay logs that are strictly before
4247 rli->group_relay_log_name are deleted ; if true, the
4248 latter is deleted too (i.e. all relay logs
4249 read by the SQL slave thread are deleted).
4250
4251 @note
4252 - This is only called from the slave SQL thread when it has read
4253 all commands from a relay log and want to switch to a new relay log.
4254 - When this happens, we can be in an active transaction as
4255 a transaction can span over two relay logs
4256 (although it is always written as a single block to the master's binary
4257 log, hence cannot span over two master's binary logs).
4258
4259 @retval
4260 0 ok
4261 @retval
4262 LOG_INFO_EOF End of log-index-file found
4263 @retval
4264 LOG_INFO_SEEK Could not allocate IO cache
4265 @retval
4266 LOG_INFO_IO Got IO error while reading file
4267 */
4268
4269 #ifdef HAVE_REPLICATION
4270
purge_first_log(Relay_log_info * rli,bool included)4271 int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
4272 {
4273 int error;
4274 char *to_purge_if_included= NULL;
4275 DBUG_ENTER("purge_first_log");
4276
4277 DBUG_ASSERT(current_thd->system_thread == SYSTEM_THREAD_SLAVE_SQL);
4278 DBUG_ASSERT(is_relay_log);
4279 DBUG_ASSERT(is_open());
4280 DBUG_ASSERT(rli->slave_running == 1);
4281 DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->get_event_relay_log_name()));
4282
4283 mysql_mutex_assert_owner(&rli->data_lock);
4284
4285 mysql_mutex_lock(&LOCK_index);
4286 to_purge_if_included= my_strdup(rli->get_group_relay_log_name(), MYF(0));
4287
4288 /*
4289 Read the next log file name from the index file and pass it back to
4290 the caller.
4291 */
4292 if((error=find_log_pos(&rli->linfo, rli->get_event_relay_log_name(),
4293 false/*need_lock_index=false*/)) ||
4294 (error=find_next_log(&rli->linfo, false/*need_lock_index=false*/)))
4295 {
4296 char buff[22];
4297 sql_print_error("next log error: %d offset: %s log: %s included: %d",
4298 error,
4299 llstr(rli->linfo.index_file_offset,buff),
4300 rli->get_event_relay_log_name(),
4301 included);
4302 goto err;
4303 }
4304
4305 /*
4306 Reset rli's coordinates to the current log.
4307 */
4308 rli->set_event_relay_log_pos(BIN_LOG_HEADER_SIZE);
4309 rli->set_event_relay_log_name(rli->linfo.log_file_name);
4310
4311 /*
4312 If we removed the rli->group_relay_log_name file,
4313 we must update the rli->group* coordinates, otherwise do not touch it as the
4314 group's execution is not finished (e.g. COMMIT not executed)
4315 */
4316 if (included)
4317 {
4318 rli->set_group_relay_log_pos(BIN_LOG_HEADER_SIZE);
4319 rli->set_group_relay_log_name(rli->linfo.log_file_name);
4320 rli->notify_group_relay_log_name_update();
4321 }
4322 /*
4323 Store where we are in the new file for the execution thread.
4324 If we are in the middle of a group), then we should not store
4325 the position in the repository, instead in that case set a flag
4326 to true which indicates that a 'forced flush' is postponed due
4327 to transaction split across the relaylogs.
4328 */
4329 if (!rli->is_in_group())
4330 rli->flush_info(TRUE);
4331 else
4332 rli->force_flush_postponed_due_to_split_trans= true;
4333
4334 DBUG_EXECUTE_IF("crash_before_purge_logs", DBUG_SUICIDE(););
4335
4336 mysql_mutex_lock(&rli->log_space_lock);
4337 rli->relay_log.purge_logs(to_purge_if_included, included,
4338 false/*need_lock_index=false*/,
4339 false/*need_update_threads=false*/,
4340 &rli->log_space_total, true);
4341 // Tell the I/O thread to take the relay_log_space_limit into account
4342 rli->ignore_log_space_limit= 0;
4343 mysql_mutex_unlock(&rli->log_space_lock);
4344
4345 /*
4346 Ok to broadcast after the critical region as there is no risk of
4347 the mutex being destroyed by this thread later - this helps save
4348 context switches
4349 */
4350 mysql_cond_broadcast(&rli->log_space_cond);
4351
4352 /*
4353 * Need to update the log pos because purge logs has been called
4354 * after fetching initially the log pos at the begining of the method.
4355 */
4356 if((error=find_log_pos(&rli->linfo, rli->get_event_relay_log_name(),
4357 false/*need_lock_index=false*/)))
4358 {
4359 char buff[22];
4360 sql_print_error("next log error: %d offset: %s log: %s included: %d",
4361 error,
4362 llstr(rli->linfo.index_file_offset,buff),
4363 rli->get_group_relay_log_name(),
4364 included);
4365 goto err;
4366 }
4367
4368 /* If included was passed, rli->linfo should be the first entry. */
4369 DBUG_ASSERT(!included || rli->linfo.index_file_start_offset == 0);
4370
4371 err:
4372 my_free(to_purge_if_included);
4373 mysql_mutex_unlock(&LOCK_index);
4374 DBUG_RETURN(error);
4375 }
4376
4377
4378 /**
4379 Remove logs from index file.
4380
4381 - To make crash safe, we copy the content of index file
4382 from index_file_start_offset recored in log_info to
4383 crash safe index file firstly and then move the crash
4384 safe index file to index file.
4385
4386 @param linfo Store here the found log file name and
4387 position to the NEXT log file name in
4388 the index file.
4389
4390 @param need_update_threads If we want to update the log coordinates
4391 of all threads. False for relay logs,
4392 true otherwise.
4393
4394 @retval
4395 0 ok
4396 @retval
4397 LOG_INFO_IO Got IO error while reading/writing file
4398 */
remove_logs_from_index(LOG_INFO * log_info,bool need_update_threads)4399 int MYSQL_BIN_LOG::remove_logs_from_index(LOG_INFO* log_info, bool need_update_threads)
4400 {
4401 if (open_crash_safe_index_file())
4402 {
4403 sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
4404 "open the crash safe index file.");
4405 goto err;
4406 }
4407
4408 if (copy_file(&index_file, &crash_safe_index_file,
4409 log_info->index_file_start_offset))
4410 {
4411 sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
4412 "copy index file to crash safe index file.");
4413 goto err;
4414 }
4415
4416 if (close_crash_safe_index_file())
4417 {
4418 sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
4419 "close the crash safe index file.");
4420 goto err;
4421 }
4422 DBUG_EXECUTE_IF("fault_injection_copy_part_file", DBUG_SUICIDE(););
4423
4424 if (move_crash_safe_index_file_to_index_file(false/*need_lock_index=false*/))
4425 {
4426 sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
4427 "move crash safe index file to index file.");
4428 goto err;
4429 }
4430
4431 // now update offsets in index file for running threads
4432 if (need_update_threads)
4433 adjust_linfo_offsets(log_info->index_file_start_offset);
4434 return 0;
4435
4436 err:
4437 return LOG_INFO_IO;
4438 }
4439
4440 /**
4441 Remove all logs before the given log from disk and from the index file.
4442
4443 @param to_log Delete all log file name before this file.
4444 @param included If true, to_log is deleted too.
4445 @param need_lock_index
4446 @param need_update_threads If we want to update the log coordinates of
4447 all threads. False for relay logs, true otherwise.
4448 @param freed_log_space If not null, decrement this variable of
4449 the amount of log space freed
4450 @param auto_purge True if this is an automatic purge.
4451
4452 @note
4453 If any of the logs before the deleted one is in use,
4454 only purge logs up to this one.
4455
4456 @retval
4457 0 ok
4458 @retval
4459 LOG_INFO_EOF to_log not found
4460 LOG_INFO_EMFILE too many files opened
4461 LOG_INFO_FATAL if any other than ENOENT error from
4462 mysql_file_stat() or mysql_file_delete()
4463 */
4464
purge_logs(const char * to_log,bool included,bool need_lock_index,bool need_update_threads,ulonglong * decrease_log_space,bool auto_purge)4465 int MYSQL_BIN_LOG::purge_logs(const char *to_log,
4466 bool included,
4467 bool need_lock_index,
4468 bool need_update_threads,
4469 ulonglong *decrease_log_space,
4470 bool auto_purge)
4471 {
4472 int error= 0, no_of_log_files_to_purge= 0, no_of_log_files_purged= 0;
4473 int no_of_threads_locking_log= 0;
4474 bool exit_loop= 0;
4475 LOG_INFO log_info;
4476 THD *thd= current_thd;
4477 DBUG_ENTER("purge_logs");
4478 DBUG_PRINT("info",("to_log= %s",to_log));
4479
4480 if (need_lock_index)
4481 mysql_mutex_lock(&LOCK_index);
4482 else
4483 mysql_mutex_assert_owner(&LOCK_index);
4484 if ((error=find_log_pos(&log_info, to_log, false/*need_lock_index=false*/)))
4485 {
4486 sql_print_error("MYSQL_BIN_LOG::purge_logs was called with file %s not "
4487 "listed in the index.", to_log);
4488 goto err;
4489 }
4490
4491 no_of_log_files_to_purge= log_info.entry_index;
4492
4493 if ((error= open_purge_index_file(TRUE)))
4494 {
4495 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to sync the index file.");
4496 goto err;
4497 }
4498
4499 /*
4500 File name exists in index file; delete until we find this file
4501 or a file that is used.
4502 */
4503 if ((error=find_log_pos(&log_info, NullS, false/*need_lock_index=false*/)))
4504 goto err;
4505
4506 while ((strcmp(to_log,log_info.log_file_name) || (exit_loop=included)))
4507 {
4508 if(is_active(log_info.log_file_name))
4509 {
4510 if(!auto_purge)
4511 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4512 ER_WARN_PURGE_LOG_IS_ACTIVE,
4513 ER(ER_WARN_PURGE_LOG_IS_ACTIVE),
4514 log_info.log_file_name);
4515 break;
4516 }
4517
4518 if ((no_of_threads_locking_log= log_in_use(log_info.log_file_name)))
4519 {
4520 if(!auto_purge)
4521 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4522 ER_WARN_PURGE_LOG_IN_USE,
4523 ER(ER_WARN_PURGE_LOG_IN_USE),
4524 log_info.log_file_name, no_of_threads_locking_log,
4525 no_of_log_files_purged, no_of_log_files_to_purge);
4526 break;
4527 }
4528 no_of_log_files_purged++;
4529
4530 if ((error= register_purge_index_entry(log_info.log_file_name)))
4531 {
4532 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to copy %s to register file.",
4533 log_info.log_file_name);
4534 goto err;
4535 }
4536
4537 if (find_next_log(&log_info, false/*need_lock_index=false*/) || exit_loop)
4538 break;
4539 }
4540
4541 DBUG_EXECUTE_IF("crash_purge_before_update_index", DBUG_SUICIDE(););
4542
4543 if ((error= sync_purge_index_file()))
4544 {
4545 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to flush register file.");
4546 goto err;
4547 }
4548
4549 /* We know how many files to delete. Update index file. */
4550 if ((error=remove_logs_from_index(&log_info, need_update_threads)))
4551 {
4552 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to update the index file");
4553 goto err;
4554 }
4555
4556 // Update gtid_state->lost_gtids
4557 if (gtid_mode > 0 && !is_relay_log)
4558 {
4559 global_sid_lock->wrlock();
4560 error= init_gtid_sets(NULL,
4561 const_cast<Gtid_set *>(gtid_state->get_lost_gtids()),
4562 NULL,
4563 opt_master_verify_checksum,
4564 false/*false=don't need lock*/);
4565 global_sid_lock->unlock();
4566 if (error)
4567 goto err;
4568 }
4569
4570 DBUG_EXECUTE_IF("crash_purge_critical_after_update_index", DBUG_SUICIDE(););
4571
4572 err:
4573
4574 int error_index= 0, close_error_index= 0;
4575 /* Read each entry from purge_index_file and delete the file. */
4576 if (!error && is_inited_purge_index_file() &&
4577 (error_index= purge_index_entry(thd, decrease_log_space, false/*need_lock_index=false*/)))
4578 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to process registered files"
4579 " that would be purged.");
4580
4581 close_error_index= close_purge_index_file();
4582
4583 DBUG_EXECUTE_IF("crash_purge_non_critical_after_update_index", DBUG_SUICIDE(););
4584
4585 if (need_lock_index)
4586 mysql_mutex_unlock(&LOCK_index);
4587
4588 /*
4589 Error codes from purge logs take precedence.
4590 Then error codes from purging the index entry.
4591 Finally, error codes from closing the purge index file.
4592 */
4593 error= error ? error : (error_index ? error_index :
4594 close_error_index);
4595
4596 DBUG_RETURN(error);
4597 }
4598
set_purge_index_file_name(const char * base_file_name)4599 int MYSQL_BIN_LOG::set_purge_index_file_name(const char *base_file_name)
4600 {
4601 int error= 0;
4602 DBUG_ENTER("MYSQL_BIN_LOG::set_purge_index_file_name");
4603 if (fn_format(purge_index_file_name, base_file_name, mysql_data_home,
4604 ".~rec~", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
4605 MY_REPLACE_EXT)) == NULL)
4606 {
4607 error= 1;
4608 sql_print_error("MYSQL_BIN_LOG::set_purge_index_file_name failed to set "
4609 "file name.");
4610 }
4611 DBUG_RETURN(error);
4612 }
4613
open_purge_index_file(bool destroy)4614 int MYSQL_BIN_LOG::open_purge_index_file(bool destroy)
4615 {
4616 int error= 0;
4617 File file= -1;
4618
4619 DBUG_ENTER("MYSQL_BIN_LOG::open_purge_index_file");
4620
4621 if (destroy)
4622 close_purge_index_file();
4623
4624 if (!my_b_inited(&purge_index_file))
4625 {
4626 if ((file= my_open(purge_index_file_name, O_RDWR | O_CREAT | O_BINARY,
4627 MYF(MY_WME | ME_WAITTANG))) < 0 ||
4628 init_io_cache(&purge_index_file, file, IO_SIZE,
4629 (destroy ? WRITE_CACHE : READ_CACHE),
4630 0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
4631 {
4632 error= 1;
4633 sql_print_error("MYSQL_BIN_LOG::open_purge_index_file failed to open register "
4634 " file.");
4635 }
4636 }
4637 DBUG_RETURN(error);
4638 }
4639
close_purge_index_file()4640 int MYSQL_BIN_LOG::close_purge_index_file()
4641 {
4642 int error= 0;
4643
4644 DBUG_ENTER("MYSQL_BIN_LOG::close_purge_index_file");
4645
4646 if (my_b_inited(&purge_index_file))
4647 {
4648 end_io_cache(&purge_index_file);
4649 error= my_close(purge_index_file.file, MYF(0));
4650 }
4651 my_delete(purge_index_file_name, MYF(0));
4652 memset(&purge_index_file, 0, sizeof(purge_index_file));
4653
4654 DBUG_RETURN(error);
4655 }
4656
is_inited_purge_index_file()4657 bool MYSQL_BIN_LOG::is_inited_purge_index_file()
4658 {
4659 DBUG_ENTER("MYSQL_BIN_LOG::is_inited_purge_index_file");
4660 DBUG_RETURN (my_b_inited(&purge_index_file));
4661 }
4662
sync_purge_index_file()4663 int MYSQL_BIN_LOG::sync_purge_index_file()
4664 {
4665 int error= 0;
4666 DBUG_ENTER("MYSQL_BIN_LOG::sync_purge_index_file");
4667
4668 if ((error= flush_io_cache(&purge_index_file)) ||
4669 (error= my_sync(purge_index_file.file, MYF(MY_WME))))
4670 DBUG_RETURN(error);
4671
4672 DBUG_RETURN(error);
4673 }
4674
register_purge_index_entry(const char * entry)4675 int MYSQL_BIN_LOG::register_purge_index_entry(const char *entry)
4676 {
4677 int error= 0;
4678 DBUG_ENTER("MYSQL_BIN_LOG::register_purge_index_entry");
4679
4680 if ((error=my_b_write(&purge_index_file, (const uchar*)entry, strlen(entry))) ||
4681 (error=my_b_write(&purge_index_file, (const uchar*)"\n", 1)))
4682 DBUG_RETURN (error);
4683
4684 DBUG_RETURN(error);
4685 }
4686
register_create_index_entry(const char * entry)4687 int MYSQL_BIN_LOG::register_create_index_entry(const char *entry)
4688 {
4689 DBUG_ENTER("MYSQL_BIN_LOG::register_create_index_entry");
4690 DBUG_RETURN(register_purge_index_entry(entry));
4691 }
4692
purge_index_entry(THD * thd,ulonglong * decrease_log_space,bool need_lock_index)4693 int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *decrease_log_space,
4694 bool need_lock_index)
4695 {
4696 MY_STAT s;
4697 int error= 0;
4698 LOG_INFO log_info;
4699 LOG_INFO check_log_info;
4700
4701 DBUG_ENTER("MYSQL_BIN_LOG:purge_index_entry");
4702
4703 DBUG_ASSERT(my_b_inited(&purge_index_file));
4704
4705 if ((error=reinit_io_cache(&purge_index_file, READ_CACHE, 0, 0, 0)))
4706 {
4707 sql_print_error("MYSQL_BIN_LOG::purge_index_entry failed to reinit register file "
4708 "for read");
4709 goto err;
4710 }
4711
4712 for (;;)
4713 {
4714 uint length;
4715
4716 if ((length=my_b_gets(&purge_index_file, log_info.log_file_name,
4717 FN_REFLEN)) <= 1)
4718 {
4719 if (purge_index_file.error)
4720 {
4721 error= purge_index_file.error;
4722 sql_print_error("MYSQL_BIN_LOG::purge_index_entry error %d reading from "
4723 "register file.", error);
4724 goto err;
4725 }
4726
4727 /* Reached EOF */
4728 break;
4729 }
4730
4731 /* Get rid of the trailing '\n' */
4732 log_info.log_file_name[length-1]= 0;
4733
4734 if (!mysql_file_stat(m_key_file_log, log_info.log_file_name, &s, MYF(0)))
4735 {
4736 if (my_errno == ENOENT)
4737 {
4738 /*
4739 It's not fatal if we can't stat a log file that does not exist;
4740 If we could not stat, we won't delete.
4741 */
4742 if (thd)
4743 {
4744 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4745 ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
4746 log_info.log_file_name);
4747 }
4748 sql_print_information("Failed to execute mysql_file_stat on file '%s'",
4749 log_info.log_file_name);
4750 my_errno= 0;
4751 }
4752 else
4753 {
4754 /*
4755 Other than ENOENT are fatal
4756 */
4757 if (thd)
4758 {
4759 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4760 ER_BINLOG_PURGE_FATAL_ERR,
4761 "a problem with getting info on being purged %s; "
4762 "consider examining correspondence "
4763 "of your binlog index file "
4764 "to the actual binlog files",
4765 log_info.log_file_name);
4766 }
4767 else
4768 {
4769 sql_print_information("Failed to delete log file '%s'; "
4770 "consider examining correspondence "
4771 "of your binlog index file "
4772 "to the actual binlog files",
4773 log_info.log_file_name);
4774 }
4775 error= LOG_INFO_FATAL;
4776 goto err;
4777 }
4778 }
4779 else
4780 {
4781 if ((error= find_log_pos(&check_log_info, log_info.log_file_name,
4782 need_lock_index)))
4783 {
4784 if (error != LOG_INFO_EOF)
4785 {
4786 if (thd)
4787 {
4788 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4789 ER_BINLOG_PURGE_FATAL_ERR,
4790 "a problem with deleting %s and "
4791 "reading the binlog index file",
4792 log_info.log_file_name);
4793 }
4794 else
4795 {
4796 sql_print_information("Failed to delete file '%s' and "
4797 "read the binlog index file",
4798 log_info.log_file_name);
4799 }
4800 goto err;
4801 }
4802
4803 error= 0;
4804 if (!need_lock_index)
4805 {
4806 /*
4807 This is to avoid triggering an error in NDB.
4808
4809 @todo: This is weird, what does NDB errors have to do with
4810 need_lock_index? Explain better or refactor /Sven
4811 */
4812 ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
4813 }
4814
4815 DBUG_PRINT("info",("purging %s",log_info.log_file_name));
4816 if (!mysql_file_delete(key_file_binlog, log_info.log_file_name, MYF(0)))
4817 {
4818 DBUG_EXECUTE_IF("wait_in_purge_index_entry",
4819 {
4820 const char action[] = "now SIGNAL in_purge_index_entry WAIT_FOR go_ahead_sql";
4821 DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(action)));
4822 DBUG_SET("-d,wait_in_purge_index_entry");
4823 };);
4824
4825 if (decrease_log_space)
4826 *decrease_log_space-= s.st_size;
4827 }
4828 else
4829 {
4830 if (my_errno == ENOENT)
4831 {
4832 if (thd)
4833 {
4834 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4835 ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
4836 log_info.log_file_name);
4837 }
4838 sql_print_information("Failed to delete file '%s'",
4839 log_info.log_file_name);
4840 my_errno= 0;
4841 }
4842 else
4843 {
4844 if (thd)
4845 {
4846 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4847 ER_BINLOG_PURGE_FATAL_ERR,
4848 "a problem with deleting %s; "
4849 "consider examining correspondence "
4850 "of your binlog index file "
4851 "to the actual binlog files",
4852 log_info.log_file_name);
4853 }
4854 else
4855 {
4856 sql_print_information("Failed to delete file '%s'; "
4857 "consider examining correspondence "
4858 "of your binlog index file "
4859 "to the actual binlog files",
4860 log_info.log_file_name);
4861 }
4862 if (my_errno == EMFILE)
4863 {
4864 DBUG_PRINT("info",
4865 ("my_errno: %d, set ret = LOG_INFO_EMFILE", my_errno));
4866 error= LOG_INFO_EMFILE;
4867 goto err;
4868 }
4869 error= LOG_INFO_FATAL;
4870 goto err;
4871 }
4872 }
4873 }
4874 }
4875 }
4876
4877 err:
4878 DBUG_RETURN(error);
4879 }
4880
4881 /**
4882 Remove all logs before the given file date from disk and from the
4883 index file.
4884
4885 @param thd Thread pointer
4886 @param purge_time Delete all log files before given date.
4887 @param auto_purge True if this is an automatic purge.
4888
4889 @note
4890 If any of the logs before the deleted one is in use,
4891 only purge logs up to this one.
4892
4893 @retval
4894 0 ok
4895 @retval
4896 LOG_INFO_PURGE_NO_ROTATE Binary file that can't be rotated
4897 LOG_INFO_FATAL if any other than ENOENT error from
4898 mysql_file_stat() or mysql_file_delete()
4899 */
4900
purge_logs_before_date(time_t purge_time,bool auto_purge)4901 int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time, bool auto_purge)
4902 {
4903 int error;
4904 int no_of_threads_locking_log= 0, no_of_log_files_purged= 0;
4905 bool log_is_active= false, log_is_in_use= false;
4906 char to_log[FN_REFLEN], copy_log_in_use[FN_REFLEN];
4907 LOG_INFO log_info;
4908 MY_STAT stat_area;
4909 THD *thd= current_thd;
4910
4911 DBUG_ENTER("purge_logs_before_date");
4912
4913 mysql_mutex_lock(&LOCK_index);
4914 to_log[0]= 0;
4915
4916 if ((error=find_log_pos(&log_info, NullS, false/*need_lock_index=false*/)))
4917 goto err;
4918
4919 while (!(log_is_active= is_active(log_info.log_file_name)))
4920 {
4921 if ((no_of_threads_locking_log= log_in_use(log_info.log_file_name)))
4922 {
4923 if (!auto_purge)
4924 {
4925 log_is_in_use= true;
4926 strcpy(copy_log_in_use, log_info.log_file_name);
4927 }
4928 break;
4929 }
4930 no_of_log_files_purged++;
4931
4932 if (!mysql_file_stat(m_key_file_log,
4933 log_info.log_file_name, &stat_area, MYF(0)))
4934 {
4935 if (my_errno == ENOENT)
4936 {
4937 /*
4938 It's not fatal if we can't stat a log file that does not exist.
4939 */
4940 my_errno= 0;
4941 }
4942 else
4943 {
4944 /*
4945 Other than ENOENT are fatal
4946 */
4947 if (thd)
4948 {
4949 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4950 ER_BINLOG_PURGE_FATAL_ERR,
4951 "a problem with getting info on being purged %s; "
4952 "consider examining correspondence "
4953 "of your binlog index file "
4954 "to the actual binlog files",
4955 log_info.log_file_name);
4956 }
4957 else
4958 {
4959 sql_print_information("Failed to delete log file '%s'",
4960 log_info.log_file_name);
4961 }
4962 error= LOG_INFO_FATAL;
4963 goto err;
4964 }
4965 }
4966 else
4967 {
4968 if (stat_area.st_mtime < purge_time)
4969 strmake(to_log,
4970 log_info.log_file_name,
4971 sizeof(log_info.log_file_name) - 1);
4972 else
4973 break;
4974 }
4975 if (find_next_log(&log_info, false/*need_lock_index=false*/))
4976 break;
4977 }
4978
4979 if (log_is_active)
4980 {
4981 if(!auto_purge)
4982 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
4983 ER_WARN_PURGE_LOG_IS_ACTIVE,
4984 ER(ER_WARN_PURGE_LOG_IS_ACTIVE),
4985 log_info.log_file_name);
4986
4987 }
4988
4989 if (log_is_in_use)
4990 {
4991 int no_of_log_files_to_purge= no_of_log_files_purged+1;
4992 while (strcmp(log_file_name, log_info.log_file_name))
4993 {
4994 if (mysql_file_stat(m_key_file_log, log_info.log_file_name,
4995 &stat_area, MYF(0)))
4996 {
4997 if (stat_area.st_mtime < purge_time)
4998 no_of_log_files_to_purge++;
4999 else
5000 break;
5001 }
5002 if (find_next_log(&log_info, false/*need_lock_index=false*/))
5003 {
5004 no_of_log_files_to_purge++;
5005 break;
5006 }
5007 }
5008
5009 push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
5010 ER_WARN_PURGE_LOG_IN_USE,
5011 ER(ER_WARN_PURGE_LOG_IN_USE),
5012 copy_log_in_use, no_of_threads_locking_log,
5013 no_of_log_files_purged, no_of_log_files_to_purge);
5014 }
5015
5016 error= (to_log[0] ? purge_logs(to_log, true,
5017 false/*need_lock_index=false*/,
5018 true/*need_update_threads=true*/,
5019 (ulonglong *) 0, auto_purge) : 0);
5020
5021 err:
5022 mysql_mutex_unlock(&LOCK_index);
5023 DBUG_RETURN(error);
5024 }
5025 #endif /* HAVE_REPLICATION */
5026
5027
5028 /**
5029 Create a new log file name.
5030
5031 @param buf buf of at least FN_REFLEN where new name is stored
5032
5033 @note
5034 If file name will be longer then FN_REFLEN it will be truncated
5035 */
5036
make_log_name(char * buf,const char * log_ident)5037 void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
5038 {
5039 uint dir_len = dirname_length(log_file_name);
5040 if (dir_len >= FN_REFLEN)
5041 dir_len=FN_REFLEN-1;
5042 strnmov(buf, log_file_name, dir_len);
5043 strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
5044 }
5045
5046
5047 /**
5048 Check if we are writing/reading to the given log file.
5049 */
5050
is_active(const char * log_file_name_arg)5051 bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
5052 {
5053 return !strcmp(log_file_name, log_file_name_arg);
5054 }
5055
5056
5057 /*
5058 Wrappers around new_file_impl to avoid using argument
5059 to control locking. The argument 1) less readable 2) breaks
5060 incapsulation 3) allows external access to the class without
5061 a lock (which is not possible with private new_file_without_locking
5062 method).
5063
5064 @retval
5065 nonzero - error
5066
5067 */
5068
new_file(Format_description_log_event * extra_description_event)5069 int MYSQL_BIN_LOG::new_file(Format_description_log_event *extra_description_event)
5070 {
5071 return new_file_impl(true/*need_lock_log=true*/, extra_description_event);
5072 }
5073
5074 /*
5075 @retval
5076 nonzero - error
5077 */
new_file_without_locking(Format_description_log_event * extra_description_event)5078 int MYSQL_BIN_LOG::new_file_without_locking(Format_description_log_event *extra_description_event)
5079 {
5080 return new_file_impl(false/*need_lock_log=false*/, extra_description_event);
5081 }
5082
5083
5084 /**
5085 Start writing to a new log file or reopen the old file.
5086
5087 @param need_lock_log If true, this function acquires LOCK_log;
5088 otherwise the caller should already have acquired it.
5089
5090 @retval 0 success
5091 @retval nonzero - error
5092
5093 @note The new file name is stored last in the index file
5094 */
new_file_impl(bool need_lock_log,Format_description_log_event * extra_description_event)5095 int MYSQL_BIN_LOG::new_file_impl(bool need_lock_log, Format_description_log_event *extra_description_event)
5096 {
5097 int error= 0, close_on_error= FALSE;
5098 char new_name[FN_REFLEN], *new_name_ptr, *old_name, *file_to_open;
5099
5100 DBUG_ENTER("MYSQL_BIN_LOG::new_file_impl");
5101 if (!is_open())
5102 {
5103 DBUG_PRINT("info",("log is closed"));
5104 DBUG_RETURN(error);
5105 }
5106
5107 if (need_lock_log)
5108 mysql_mutex_lock(&LOCK_log);
5109 else
5110 mysql_mutex_assert_owner(&LOCK_log);
5111 DBUG_EXECUTE_IF("semi_sync_3-way_deadlock",
5112 DEBUG_SYNC(current_thd, "before_rotate_binlog"););
5113 mysql_mutex_lock(&LOCK_xids);
5114 /*
5115 We need to ensure that the number of prepared XIDs are 0.
5116
5117 If m_prep_xids is not zero:
5118 - We wait for storage engine commit, hence decrease m_prep_xids
5119 - We keep the LOCK_log to block new transactions from being
5120 written to the binary log.
5121 */
5122 while (get_prep_xids() > 0)
5123 {
5124 DEBUG_SYNC(current_thd, "before_rotate_binlog_file");
5125 mysql_cond_wait(&m_prep_xids_cond, &LOCK_xids);
5126 }
5127 mysql_mutex_unlock(&LOCK_xids);
5128
5129 mysql_mutex_lock(&LOCK_index);
5130
5131 if (DBUG_EVALUATE_IF("expire_logs_always", 0, 1)
5132 && (error= ha_flush_logs(NULL)))
5133 goto end;
5134
5135 mysql_mutex_assert_owner(&LOCK_log);
5136 mysql_mutex_assert_owner(&LOCK_index);
5137
5138
5139 /*
5140 If user hasn't specified an extension, generate a new log name
5141 We have to do this here and not in open as we want to store the
5142 new file name in the current binary log file.
5143 */
5144 new_name_ptr= new_name;
5145 if ((error= generate_new_name(new_name, name)))
5146 {
5147 // Use the old name if generation of new name fails.
5148 strcpy(new_name, name);
5149 close_on_error= TRUE;
5150 goto end;
5151 }
5152 else
5153 {
5154 /*
5155 We log the whole file name for log file as the user may decide
5156 to change base names at some point.
5157 */
5158 Rotate_log_event r(new_name+dirname_length(new_name), 0, LOG_EVENT_OFFSET,
5159 is_relay_log ? Rotate_log_event::RELAY_LOG : 0);
5160 /*
5161 The current relay-log's closing Rotate event must have checksum
5162 value computed with an algorithm of the last relay-logged FD event.
5163 */
5164 if (is_relay_log)
5165 r.checksum_alg= relay_log_checksum_alg;
5166 DBUG_ASSERT(!is_relay_log || relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
5167 if(DBUG_EVALUATE_IF("fault_injection_new_file_rotate_event", (error=close_on_error=TRUE), FALSE) ||
5168 (error= r.write(&log_file)))
5169 {
5170 char errbuf[MYSYS_STRERROR_SIZE];
5171 DBUG_EXECUTE_IF("fault_injection_new_file_rotate_event", errno=2;);
5172 close_on_error= TRUE;
5173 my_printf_error(ER_ERROR_ON_WRITE, ER(ER_CANT_OPEN_FILE),
5174 MYF(ME_FATALERROR), name,
5175 errno, my_strerror(errbuf, sizeof(errbuf), errno));
5176 goto end;
5177 }
5178 bytes_written += r.data_written;
5179 }
5180 /*
5181 Update needs to be signalled even if there is no rotate event
5182 log rotation should give the waiting thread a signal to
5183 discover EOF and move on to the next log.
5184 */
5185 signal_update();
5186
5187 old_name=name;
5188 name=0; // Don't free name
5189 close(LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX,
5190 false/*need_lock_log=false*/,
5191 false/*need_lock_index=false*/);
5192
5193 if (checksum_alg_reset != BINLOG_CHECKSUM_ALG_UNDEF)
5194 {
5195 DBUG_ASSERT(!is_relay_log);
5196 DBUG_ASSERT(binlog_checksum_options != checksum_alg_reset);
5197 binlog_checksum_options= checksum_alg_reset;
5198 }
5199 /*
5200 Note that at this point, log_state != LOG_CLOSED (important for is_open()).
5201 */
5202
5203 DEBUG_SYNC(current_thd, "before_rotate_binlog_file");
5204 /*
5205 new_file() is only used for rotation (in FLUSH LOGS or because size >
5206 max_binlog_size or max_relay_log_size).
5207 If this is a binary log, the Format_description_log_event at the beginning of
5208 the new file should have created=0 (to distinguish with the
5209 Format_description_log_event written at server startup, which should
5210 trigger temp tables deletion on slaves.
5211 */
5212
5213 /* reopen index binlog file, BUG#34582 */
5214 file_to_open= index_file_name;
5215 error= open_index_file(index_file_name, 0, false/*need_lock_index=false*/);
5216 if (!error)
5217 {
5218 /* reopen the binary log file. */
5219 file_to_open= new_name_ptr;
5220 error= open_binlog(old_name, new_name_ptr, io_cache_type,
5221 max_size, true/*null_created_arg=true*/,
5222 false/*need_lock_log=false*/,
5223 false/*need_lock_index=false*/,
5224 true/*need_sid_lock=true*/,
5225 extra_description_event);
5226 }
5227
5228 /* handle reopening errors */
5229 if (error)
5230 {
5231 char errbuf[MYSYS_STRERROR_SIZE];
5232 my_printf_error(ER_CANT_OPEN_FILE, ER(ER_CANT_OPEN_FILE),
5233 MYF(ME_FATALERROR), file_to_open,
5234 error, my_strerror(errbuf, sizeof(errbuf), error));
5235 close_on_error= TRUE;
5236 }
5237 my_free(old_name);
5238
5239 end:
5240
5241 if (error && close_on_error /* rotate or reopen failed */)
5242 {
5243 /*
5244 Close whatever was left opened.
5245
5246 We are keeping the behavior as it exists today, ie,
5247 we disable logging and move on (see: BUG#51014).
5248
5249 TODO: as part of WL#1790 consider other approaches:
5250 - kill mysql (safety);
5251 - try multiple locations for opening a log file;
5252 - switch server to protected/readonly mode
5253 - ...
5254 */
5255 if (binlog_error_action == ABORT_SERVER)
5256 {
5257 exec_binlog_error_action_abort("Either disk is full or file system is"
5258 " read only while rotating the binlog."
5259 " Aborting the server.");
5260 }
5261 else
5262 sql_print_error("Could not open %s for logging (error %d). "
5263 "Turning logging off for the whole duration "
5264 "of the MySQL server process. To turn it on "
5265 "again: fix the cause, shutdown the MySQL "
5266 "server and restart it.",
5267 new_name_ptr, errno);
5268 close(LOG_CLOSE_INDEX, false /*need_lock_log=false*/,
5269 false/*need_lock_index=false*/);
5270 }
5271
5272 mysql_mutex_unlock(&LOCK_index);
5273 if (need_lock_log)
5274 mysql_mutex_unlock(&LOCK_log);
5275
5276 DEBUG_SYNC(current_thd, "after_disable_binlog");
5277 DBUG_RETURN(error);
5278 }
5279
5280
5281 #ifdef HAVE_REPLICATION
5282 /**
5283 Called after an event has been written to the relay log by the IO
5284 thread. This flushes and possibly syncs the file (according to the
5285 sync options), rotates the file if it has grown over the limit, and
5286 finally calls signal_update().
5287
5288 @note The caller must hold LOCK_log before invoking this function.
5289
5290 @param mi Master_info for the IO thread.
5291 @param need_data_lock If true, mi->data_lock will be acquired if a
5292 rotation is needed. Otherwise, mi->data_lock must be held by the
5293 caller.
5294
5295 @retval false success
5296 @retval true error
5297 */
after_append_to_relay_log(Master_info * mi)5298 bool MYSQL_BIN_LOG::after_append_to_relay_log(Master_info *mi)
5299 {
5300 DBUG_ENTER("MYSQL_BIN_LOG::after_append_to_relay_log");
5301 DBUG_PRINT("info",("max_size: %lu",max_size));
5302
5303 // Check pre-conditions
5304 mysql_mutex_assert_owner(&LOCK_log);
5305 mysql_mutex_assert_owner(&mi->data_lock);
5306 DBUG_ASSERT(is_relay_log);
5307 DBUG_ASSERT(current_thd->system_thread == SYSTEM_THREAD_SLAVE_IO);
5308
5309 // Flush and sync
5310 bool error= false;
5311 if (flush_and_sync(0) == 0)
5312 {
5313 DBUG_EXECUTE_IF ("set_max_size_zero",
5314 {max_size=0;});
5315 // If relay log is too big, rotate
5316 if ((uint) my_b_append_tell(&log_file) >
5317 DBUG_EVALUATE_IF("rotate_slave_debug_group", 500, max_size))
5318 {
5319 error= new_file_without_locking(mi->get_mi_description_event());
5320 DBUG_EXECUTE_IF ("set_max_size_zero",
5321 {
5322 max_size=1073741824;
5323 DBUG_SET("-d,set_max_size_zero");
5324 DBUG_SET("-d,flush_after_reading_gtid_event");
5325 });
5326 }
5327 }
5328
5329 signal_update();
5330
5331 DBUG_RETURN(error);
5332 }
5333
5334
append_event(Log_event * ev,Master_info * mi)5335 bool MYSQL_BIN_LOG::append_event(Log_event* ev, Master_info *mi)
5336 {
5337 DBUG_ENTER("MYSQL_BIN_LOG::append");
5338
5339 // check preconditions
5340 DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
5341 DBUG_ASSERT(is_relay_log);
5342
5343 // acquire locks
5344 mysql_mutex_lock(&LOCK_log);
5345
5346 // write data
5347 bool error = false;
5348 if (ev->write(&log_file) == 0)
5349 {
5350 bytes_written+= ev->data_written;
5351 error= after_append_to_relay_log(mi);
5352 }
5353 else
5354 error= true;
5355
5356 mysql_mutex_unlock(&LOCK_log);
5357 DBUG_RETURN(error);
5358 }
5359
5360
append_buffer(const char * buf,uint len,Master_info * mi)5361 bool MYSQL_BIN_LOG::append_buffer(const char* buf, uint len, Master_info *mi)
5362 {
5363 DBUG_ENTER("MYSQL_BIN_LOG::append_buffer");
5364
5365 // check preconditions
5366 DBUG_ASSERT(log_file.type == SEQ_READ_APPEND);
5367 DBUG_ASSERT(is_relay_log);
5368 mysql_mutex_assert_owner(&LOCK_log);
5369
5370 // write data
5371 bool error= false;
5372 if (my_b_append(&log_file,(uchar*) buf,len) == 0)
5373 {
5374 bytes_written += len;
5375 error= after_append_to_relay_log(mi);
5376 }
5377 else
5378 error= true;
5379
5380 DBUG_RETURN(error);
5381 }
5382 #endif // ifdef HAVE_REPLICATION
5383
flush_and_sync(const bool force)5384 bool MYSQL_BIN_LOG::flush_and_sync(const bool force)
5385 {
5386 mysql_mutex_assert_owner(&LOCK_log);
5387
5388 if (flush_io_cache(&log_file))
5389 return 1;
5390
5391 std::pair<bool, bool> result= sync_binlog_file(force);
5392
5393 return result.first;
5394 }
5395
start_union_events(THD * thd,query_id_t query_id_param)5396 void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param)
5397 {
5398 DBUG_ASSERT(!thd->binlog_evt_union.do_union);
5399 thd->binlog_evt_union.do_union= TRUE;
5400 thd->binlog_evt_union.unioned_events= FALSE;
5401 thd->binlog_evt_union.unioned_events_trans= FALSE;
5402 thd->binlog_evt_union.first_query_id= query_id_param;
5403 }
5404
stop_union_events(THD * thd)5405 void MYSQL_BIN_LOG::stop_union_events(THD *thd)
5406 {
5407 DBUG_ASSERT(thd->binlog_evt_union.do_union);
5408 thd->binlog_evt_union.do_union= FALSE;
5409 }
5410
is_query_in_union(THD * thd,query_id_t query_id_param)5411 bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
5412 {
5413 return (thd->binlog_evt_union.do_union &&
5414 query_id_param >= thd->binlog_evt_union.first_query_id);
5415 }
5416
5417 /*
5418 Updates thd's position-of-next-event variables
5419 after a *real* write a file.
5420 */
update_thd_next_event_pos(THD * thd)5421 void MYSQL_BIN_LOG::update_thd_next_event_pos(THD* thd)
5422 {
5423 if (likely(thd != NULL))
5424 {
5425 thd->set_next_event_pos(log_file_name,
5426 my_b_tell(&log_file));
5427 }
5428 }
5429
5430 /*
5431 Moves the last bunch of rows from the pending Rows event to a cache (either
5432 transactional cache if is_transaction is @c true, or the non-transactional
5433 cache otherwise. Sets a new pending event.
5434
5435 @param thd a pointer to the user thread.
5436 @param evt a pointer to the row event.
5437 @param is_transactional @c true indicates a transactional cache,
5438 otherwise @c false a non-transactional.
5439 */
5440 int
flush_and_set_pending_rows_event(THD * thd,Rows_log_event * event,bool is_transactional)5441 MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
5442 Rows_log_event* event,
5443 bool is_transactional)
5444 {
5445 DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)");
5446 DBUG_ASSERT(mysql_bin_log.is_open());
5447 DBUG_PRINT("enter", ("event: 0x%lx", (long) event));
5448
5449 int error= 0;
5450 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
5451
5452 DBUG_ASSERT(cache_mngr);
5453
5454 binlog_cache_data *cache_data=
5455 cache_mngr->get_binlog_cache_data(is_transactional);
5456
5457 DBUG_PRINT("info", ("cache_mngr->pending(): 0x%lx", (long) cache_data->pending()));
5458
5459 if (Rows_log_event* pending= cache_data->pending())
5460 {
5461 /*
5462 Write pending event to the cache.
5463 */
5464 if (cache_data->write_event(thd, pending))
5465 {
5466 set_write_error(thd, is_transactional);
5467 if (check_write_error(thd) && cache_data &&
5468 stmt_cannot_safely_rollback(thd))
5469 cache_data->set_incident();
5470 delete pending;
5471 cache_data->set_pending(NULL);
5472 DBUG_RETURN(1);
5473 }
5474
5475 delete pending;
5476 }
5477
5478 cache_data->set_pending(event);
5479
5480 DBUG_RETURN(error);
5481 }
5482
5483 /**
5484 Write an event to the binary log.
5485 */
5486
write_event(Log_event * event_info)5487 bool MYSQL_BIN_LOG::write_event(Log_event *event_info)
5488 {
5489 THD *thd= event_info->thd;
5490 bool error= 1;
5491 DBUG_ENTER("MYSQL_BIN_LOG::write_event(Log_event *)");
5492
5493 if (thd->binlog_evt_union.do_union)
5494 {
5495 /*
5496 In Stored function; Remember that function call caused an update.
5497 We will log the function call to the binary log on function exit
5498 */
5499 thd->binlog_evt_union.unioned_events= TRUE;
5500 thd->binlog_evt_union.unioned_events_trans |=
5501 event_info->is_using_trans_cache();
5502 DBUG_RETURN(0);
5503 }
5504
5505 /*
5506 We only end the statement if we are in a top-level statement. If
5507 we are inside a stored function, we do not end the statement since
5508 this will close all tables on the slave. But there can be a special case
5509 where we are inside a stored function/trigger and a SAVEPOINT is being
5510 set in side the stored function/trigger. This SAVEPOINT execution will
5511 force the pending event to be flushed without an STMT_END_F flag. This
5512 will result in a case where following DMLs will be considered as part of
5513 same statement and result in data loss on slave. Hence in this case we
5514 force the end_stmt to be true.
5515 */
5516 bool const end_stmt= (thd->in_sub_stmt && thd->lex->sql_command ==
5517 SQLCOM_SAVEPOINT)? true:
5518 (thd->locked_tables_mode && thd->lex->requires_prelocking());
5519 if (thd->binlog_flush_pending_rows_event(end_stmt,
5520 event_info->is_using_trans_cache()))
5521 DBUG_RETURN(error);
5522
5523 /*
5524 In most cases this is only called if 'is_open()' is true; in fact this is
5525 mostly called if is_open() *was* true a few instructions before, but it
5526 could have changed since.
5527 */
5528 if (likely(is_open()))
5529 {
5530 #ifdef HAVE_REPLICATION
5531 /*
5532 In the future we need to add to the following if tests like
5533 "do the involved tables match (to be implemented)
5534 binlog_[wild_]{do|ignore}_table?" (WL#1049)"
5535 */
5536 const char *local_db= event_info->get_db();
5537 if ((thd && !(thd->variables.option_bits & OPTION_BIN_LOG)) ||
5538 (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT &&
5539 thd->lex->sql_command != SQLCOM_SAVEPOINT &&
5540 (!event_info->is_no_filter_event() &&
5541 !binlog_filter->db_ok(local_db))))
5542 DBUG_RETURN(0);
5543 #endif /* HAVE_REPLICATION */
5544
5545 DBUG_ASSERT(event_info->is_using_trans_cache() || event_info->is_using_stmt_cache());
5546
5547 if (binlog_start_trans_and_stmt(thd, event_info))
5548 DBUG_RETURN(error);
5549
5550 bool is_trans_cache= event_info->is_using_trans_cache();
5551 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
5552 binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(is_trans_cache);
5553
5554 DBUG_PRINT("info",("event type: %d",event_info->get_type_code()));
5555
5556 /*
5557 No check for auto events flag here - this write method should
5558 never be called if auto-events are enabled.
5559
5560 Write first log events which describe the 'run environment'
5561 of the SQL command. If row-based binlogging, Insert_id, Rand
5562 and other kind of "setting context" events are not needed.
5563 */
5564 if (thd)
5565 {
5566 if (!thd->is_current_stmt_binlog_format_row())
5567 {
5568 if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
5569 {
5570 Intvar_log_event e(thd,(uchar) LAST_INSERT_ID_EVENT,
5571 thd->first_successful_insert_id_in_prev_stmt_for_binlog,
5572 event_info->event_cache_type, event_info->event_logging_type);
5573 if (cache_data->write_event(thd, &e))
5574 goto err;
5575 }
5576 if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
5577 {
5578 DBUG_PRINT("info",("number of auto_inc intervals: %u",
5579 thd->auto_inc_intervals_in_cur_stmt_for_binlog.
5580 nb_elements()));
5581 Intvar_log_event e(thd, (uchar) INSERT_ID_EVENT,
5582 thd->auto_inc_intervals_in_cur_stmt_for_binlog.
5583 minimum(), event_info->event_cache_type,
5584 event_info->event_logging_type);
5585 if (cache_data->write_event(thd, &e))
5586 goto err;
5587 }
5588 if (thd->rand_used)
5589 {
5590 Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2,
5591 event_info->event_cache_type,
5592 event_info->event_logging_type);
5593 if (cache_data->write_event(thd, &e))
5594 goto err;
5595 }
5596 if (thd->user_var_events.elements)
5597 {
5598 for (uint i= 0; i < thd->user_var_events.elements; i++)
5599 {
5600 BINLOG_USER_VAR_EVENT *user_var_event;
5601 get_dynamic(&thd->user_var_events,(uchar*) &user_var_event, i);
5602
5603 /* setting flags for user var log event */
5604 uchar flags= User_var_log_event::UNDEF_F;
5605 if (user_var_event->unsigned_flag)
5606 flags|= User_var_log_event::UNSIGNED_F;
5607
5608 User_var_log_event e(thd,
5609 user_var_event->user_var_event->entry_name.ptr(),
5610 user_var_event->user_var_event->entry_name.length(),
5611 user_var_event->value,
5612 user_var_event->length,
5613 user_var_event->type,
5614 user_var_event->charset_number, flags,
5615 event_info->event_cache_type,
5616 event_info->event_logging_type);
5617 if (cache_data->write_event(thd, &e))
5618 goto err;
5619 }
5620 }
5621 }
5622 }
5623
5624 /*
5625 Write the event.
5626 */
5627 if (cache_data->write_event(thd, event_info) ||
5628 DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
5629 goto err;
5630
5631 /*
5632 After writing the event, if the trx-cache was used and any unsafe
5633 change was written into it, the cache is marked as cannot safely
5634 roll back.
5635 */
5636 if (is_trans_cache && stmt_cannot_safely_rollback(thd))
5637 cache_mngr->trx_cache.set_cannot_rollback();
5638
5639 error= 0;
5640
5641 err:
5642 if (error)
5643 {
5644 set_write_error(thd, is_trans_cache);
5645 if (check_write_error(thd) && cache_data &&
5646 stmt_cannot_safely_rollback(thd))
5647 cache_data->set_incident();
5648 }
5649 }
5650
5651 DBUG_RETURN(error);
5652 }
5653
5654 /**
5655 The method executes rotation when LOCK_log is already acquired
5656 by the caller.
5657
5658 @param force_rotate caller can request the log rotation
5659 @param check_purge is set to true if rotation took place
5660
5661 @note
5662 If rotation fails, for instance the server was unable
5663 to create a new log file, we still try to write an
5664 incident event to the current log.
5665
5666 @note The caller must hold LOCK_log when invoking this function.
5667
5668 @retval
5669 nonzero - error in rotating routine.
5670 */
rotate(bool force_rotate,bool * check_purge)5671 int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge)
5672 {
5673 int error= 0;
5674 DBUG_ENTER("MYSQL_BIN_LOG::rotate");
5675
5676 DBUG_ASSERT(!is_relay_log);
5677 mysql_mutex_assert_owner(&LOCK_log);
5678
5679 *check_purge= false;
5680
5681 if (DBUG_EVALUATE_IF("force_rotate", 1, 0) || force_rotate ||
5682 (my_b_tell(&log_file) >= (my_off_t) max_size))
5683 {
5684 error= new_file_without_locking(NULL);
5685 *check_purge= true;
5686 }
5687 DBUG_RETURN(error);
5688 }
5689
5690 /**
5691 The method executes logs purging routine.
5692
5693 @retval
5694 nonzero - error in rotating routine.
5695 */
purge()5696 void MYSQL_BIN_LOG::purge()
5697 {
5698 #ifdef HAVE_REPLICATION
5699 if (expire_logs_days)
5700 {
5701 DEBUG_SYNC(current_thd, "at_purge_logs_before_date");
5702 time_t purge_time= my_time(0) - expire_logs_days*24*60*60;
5703 DBUG_EXECUTE_IF("expire_logs_always",
5704 { purge_time= my_time(0);});
5705 if (purge_time >= 0)
5706 {
5707 /*
5708 Flush logs for storage engines, so that the last transaction
5709 is fsynced inside storage engines.
5710 */
5711 ha_flush_logs(NULL);
5712 purge_logs_before_date(purge_time, true);
5713 }
5714 }
5715 #endif
5716 }
5717
5718 /**
5719 The method is a shortcut of @c rotate() and @c purge().
5720 LOCK_log is acquired prior to rotate and is released after it.
5721
5722 @param force_rotate caller can request the log rotation
5723
5724 @retval
5725 nonzero - error in rotating routine.
5726 */
rotate_and_purge(THD * thd,bool force_rotate)5727 int MYSQL_BIN_LOG::rotate_and_purge(THD* thd, bool force_rotate)
5728 {
5729 int error= 0;
5730 DBUG_ENTER("MYSQL_BIN_LOG::rotate_and_purge");
5731 bool check_purge= false;
5732
5733 /*
5734 Wait for handlerton to insert any pending information into the binlog.
5735 For e.g. ha_ndbcluster which updates the binlog asynchronously this is
5736 needed so that the user see its own commands in the binlog.
5737 */
5738 ha_binlog_wait(thd);
5739
5740 DBUG_ASSERT(!is_relay_log);
5741 mysql_mutex_lock(&LOCK_log);
5742 error= rotate(force_rotate, &check_purge);
5743 /*
5744 NOTE: Run purge_logs wo/ holding LOCK_log because it does not need
5745 the mutex. Otherwise causes various deadlocks.
5746 */
5747 mysql_mutex_unlock(&LOCK_log);
5748
5749 if (!error && check_purge)
5750 purge();
5751
5752 DBUG_RETURN(error);
5753 }
5754
next_file_id()5755 uint MYSQL_BIN_LOG::next_file_id()
5756 {
5757 uint res;
5758 mysql_mutex_lock(&LOCK_log);
5759 res = file_id++;
5760 mysql_mutex_unlock(&LOCK_log);
5761 return res;
5762 }
5763
5764
5765 /**
5766 Calculate checksum of possibly a part of an event containing at least
5767 the whole common header.
5768
5769 @param buf the pointer to trans cache's buffer
5770 @param off the offset of the beginning of the event in the buffer
5771 @param event_len no-checksum length of the event
5772 @param length the current size of the buffer
5773
5774 @param crc [in-out] the checksum
5775
5776 Event size in incremented by @c BINLOG_CHECKSUM_LEN.
5777
5778 @return 0 or number of unprocessed yet bytes of the event excluding
5779 the checksum part.
5780 */
fix_log_event_crc(uchar * buf,uint off,uint event_len,uint length,ha_checksum * crc)5781 static ulong fix_log_event_crc(uchar *buf, uint off, uint event_len,
5782 uint length, ha_checksum *crc)
5783 {
5784 ulong ret;
5785 uchar *event_begin= buf + off;
5786 uint16 flags= uint2korr(event_begin + FLAGS_OFFSET);
5787
5788 DBUG_ASSERT(length >= off + LOG_EVENT_HEADER_LEN); //at least common header in
5789 int2store(event_begin + FLAGS_OFFSET, flags);
5790 ret= length >= off + event_len ? 0 : off + event_len - length;
5791 *crc= my_checksum(*crc, event_begin, event_len - ret);
5792 return ret;
5793 }
5794
5795 /*
5796 Write the contents of a cache to the binary log.
5797
5798 SYNOPSIS
5799 do_write_cache()
5800 cache Cache to write to the binary log
5801 lock_log True if the LOCK_log mutex should be aquired, false otherwise
5802
5803 DESCRIPTION
5804 Write the contents of the cache to the binary log. The cache will
5805 be reset as a READ_CACHE to be able to read the contents from it.
5806
5807 Reading from the trans cache with possible (per @c binlog_checksum_options)
5808 adding checksum value and then fixing the length and the end_log_pos of
5809 events prior to fill in the binlog cache.
5810 */
5811
do_write_cache(IO_CACHE * cache)5812 int MYSQL_BIN_LOG::do_write_cache(IO_CACHE *cache)
5813 {
5814 DBUG_ENTER("MYSQL_BIN_LOG::do_write_cache(IO_CACHE *)");
5815
5816 DBUG_EXECUTE_IF("simulate_do_write_cache_failure",
5817 {
5818 /*
5819 see binlog_cache_data::write_event() that reacts on
5820 @c simulate_disk_full_at_flush_pending.
5821 */
5822 DBUG_SET("-d,simulate_do_write_cache_failure");
5823 DBUG_RETURN(ER_ERROR_ON_WRITE);
5824 });
5825
5826 if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
5827 DBUG_RETURN(ER_ERROR_ON_WRITE);
5828 uint length= my_b_bytes_in_cache(cache), group, carry, hdr_offs;
5829 ulong remains= 0; // part of unprocessed yet netto length of the event
5830 long val;
5831 ulong end_log_pos_inc= 0; // each event processed adds BINLOG_CHECKSUM_LEN 2 t
5832 uchar header[LOG_EVENT_HEADER_LEN];
5833 ha_checksum crc= 0, crc_0= 0; // assignments to keep compiler happy
5834 my_bool do_checksum= (binlog_checksum_options != BINLOG_CHECKSUM_ALG_OFF);
5835 uchar buf[BINLOG_CHECKSUM_LEN];
5836
5837 // while there is just one alg the following must hold:
5838 DBUG_ASSERT(!do_checksum ||
5839 binlog_checksum_options == BINLOG_CHECKSUM_ALG_CRC32);
5840
5841 /*
5842 The events in the buffer have incorrect end_log_pos data
5843 (relative to beginning of group rather than absolute),
5844 so we'll recalculate them in situ so the binlog is always
5845 correct, even in the middle of a group. This is possible
5846 because we now know the start position of the group (the
5847 offset of this cache in the log, if you will); all we need
5848 to do is to find all event-headers, and add the position of
5849 the group to the end_log_pos of each event. This is pretty
5850 straight forward, except that we read the cache in segments,
5851 so an event-header might end up on the cache-border and get
5852 split.
5853 */
5854
5855 group= (uint)my_b_tell(&log_file);
5856 DBUG_PRINT("debug", ("length: %llu, group: %llu",
5857 (ulonglong) length, (ulonglong) group));
5858 hdr_offs= carry= 0;
5859 if (do_checksum)
5860 crc= crc_0= my_checksum(0L, NULL, 0);
5861
5862 if (DBUG_EVALUATE_IF("fault_injection_crc_value", 1, 0))
5863 crc= crc - 1;
5864
5865 do
5866 {
5867 /*
5868 if we only got a partial header in the last iteration,
5869 get the other half now and process a full header.
5870 */
5871 if (unlikely(carry > 0))
5872 {
5873 DBUG_ASSERT(carry < LOG_EVENT_HEADER_LEN);
5874
5875 /* assemble both halves */
5876 memcpy(&header[carry], (char *)cache->read_pos,
5877 LOG_EVENT_HEADER_LEN - carry);
5878
5879 /* fix end_log_pos */
5880 val=uint4korr(header + LOG_POS_OFFSET);
5881 val+= group +
5882 (end_log_pos_inc+= (do_checksum ? BINLOG_CHECKSUM_LEN : 0));
5883 int4store(&header[LOG_POS_OFFSET], val);
5884
5885 if (do_checksum)
5886 {
5887 ulong len= uint4korr(header + EVENT_LEN_OFFSET);
5888 /* fix len */
5889 int4store(&header[EVENT_LEN_OFFSET], len + BINLOG_CHECKSUM_LEN);
5890 }
5891
5892 /* write the first half of the split header */
5893 if (my_b_write(&log_file, header, carry))
5894 DBUG_RETURN(ER_ERROR_ON_WRITE);
5895
5896 /*
5897 copy fixed second half of header to cache so the correct
5898 version will be written later.
5899 */
5900 memcpy((char *)cache->read_pos, &header[carry],
5901 LOG_EVENT_HEADER_LEN - carry);
5902
5903 /* next event header at ... */
5904 hdr_offs= uint4korr(header + EVENT_LEN_OFFSET) - carry -
5905 (do_checksum ? BINLOG_CHECKSUM_LEN : 0);
5906
5907 if (do_checksum)
5908 {
5909 DBUG_ASSERT(crc == crc_0 && remains == 0);
5910 crc= my_checksum(crc, header, carry);
5911 remains= uint4korr(header + EVENT_LEN_OFFSET) - carry -
5912 BINLOG_CHECKSUM_LEN;
5913 }
5914 carry= 0;
5915 }
5916
5917 /* if there is anything to write, process it. */
5918
5919 if (likely(length > 0))
5920 {
5921 /*
5922 process all event-headers in this (partial) cache.
5923 if next header is beyond current read-buffer,
5924 we'll get it later (though not necessarily in the
5925 very next iteration, just "eventually").
5926 */
5927
5928 /* crc-calc the whole buffer */
5929 if (do_checksum && hdr_offs >= length)
5930 {
5931
5932 DBUG_ASSERT(remains != 0 && crc != crc_0);
5933
5934 crc= my_checksum(crc, cache->read_pos, length);
5935 remains -= length;
5936 if (my_b_write(&log_file, cache->read_pos, length))
5937 DBUG_RETURN(ER_ERROR_ON_WRITE);
5938 if (remains == 0)
5939 {
5940 int4store(buf, crc);
5941 if (my_b_write(&log_file, buf, BINLOG_CHECKSUM_LEN))
5942 DBUG_RETURN(ER_ERROR_ON_WRITE);
5943 crc= crc_0;
5944 }
5945 }
5946
5947 while (hdr_offs < length)
5948 {
5949 /*
5950 partial header only? save what we can get, process once
5951 we get the rest.
5952 */
5953
5954 if (do_checksum)
5955 {
5956 if (remains != 0)
5957 {
5958 /*
5959 finish off with remains of the last event that crawls
5960 from previous into the current buffer
5961 */
5962 DBUG_ASSERT(crc != crc_0);
5963 crc= my_checksum(crc, cache->read_pos, hdr_offs);
5964 int4store(buf, crc);
5965 remains -= hdr_offs;
5966 DBUG_ASSERT(remains == 0);
5967 if (my_b_write(&log_file, cache->read_pos, hdr_offs) ||
5968 my_b_write(&log_file, buf, BINLOG_CHECKSUM_LEN))
5969 DBUG_RETURN(ER_ERROR_ON_WRITE);
5970 crc= crc_0;
5971 }
5972 }
5973
5974 if (hdr_offs + LOG_EVENT_HEADER_LEN > length)
5975 {
5976 carry= length - hdr_offs;
5977 memcpy(header, (char *)cache->read_pos + hdr_offs, carry);
5978 length= hdr_offs;
5979 }
5980 else
5981 {
5982 /* we've got a full event-header, and it came in one piece */
5983 uchar *ev= (uchar *)cache->read_pos + hdr_offs;
5984 uint event_len= uint4korr(ev + EVENT_LEN_OFFSET); // netto len
5985 uchar *log_pos= ev + LOG_POS_OFFSET;
5986
5987 /* fix end_log_pos */
5988 val= uint4korr(log_pos) + group +
5989 (end_log_pos_inc += (do_checksum ? BINLOG_CHECKSUM_LEN : 0));
5990 int4store(log_pos, val);
5991
5992 /* fix CRC */
5993 if (do_checksum)
5994 {
5995 /* fix length */
5996 int4store(ev + EVENT_LEN_OFFSET, event_len + BINLOG_CHECKSUM_LEN);
5997 remains= fix_log_event_crc(cache->read_pos, hdr_offs, event_len,
5998 length, &crc);
5999 if (my_b_write(&log_file, ev,
6000 remains == 0 ? event_len : length - hdr_offs))
6001 DBUG_RETURN(ER_ERROR_ON_WRITE);
6002 if (remains == 0)
6003 {
6004 int4store(buf, crc);
6005 if (my_b_write(&log_file, buf, BINLOG_CHECKSUM_LEN))
6006 DBUG_RETURN(ER_ERROR_ON_WRITE);
6007 crc= crc_0; // crc is complete
6008 }
6009 }
6010
6011 /* next event header at ... */
6012 hdr_offs += event_len; // incr by the netto len
6013
6014 DBUG_ASSERT(!do_checksum || remains == 0 || hdr_offs >= length);
6015 }
6016 }
6017
6018 /*
6019 Adjust hdr_offs. Note that it may still point beyond the segment
6020 read in the next iteration; if the current event is very long,
6021 it may take a couple of read-iterations (and subsequent adjustments
6022 of hdr_offs) for it to point into the then-current segment.
6023 If we have a split header (!carry), hdr_offs will be set at the
6024 beginning of the next iteration, overwriting the value we set here:
6025 */
6026 hdr_offs -= length;
6027 }
6028
6029 /* Write the entire buf to the binary log file */
6030 if (!do_checksum)
6031 if (my_b_write(&log_file, cache->read_pos, length))
6032 DBUG_RETURN(ER_ERROR_ON_WRITE);
6033 cache->read_pos=cache->read_end; // Mark buffer used up
6034 } while ((length= my_b_fill(cache)));
6035
6036 DBUG_ASSERT(carry == 0);
6037 DBUG_ASSERT(!do_checksum || remains == 0);
6038 DBUG_ASSERT(!do_checksum || crc == crc_0);
6039
6040 DBUG_RETURN(0); // All OK
6041 }
6042
6043 /**
6044 Writes an incident event to the binary log.
6045
6046 @param ev Incident event to be written
6047 @param need_lock_log If true, will acquire LOCK_log; otherwise the
6048 caller should already have acquired LOCK_log.
6049 @do_flush_and_sync If true, will call flush_and_sync(), rotate() and
6050 purge().
6051
6052 @retval false error
6053 @retval true success
6054 */
write_incident(Incident_log_event * ev,bool need_lock_log,bool do_flush_and_sync)6055 bool MYSQL_BIN_LOG::write_incident(Incident_log_event *ev, bool need_lock_log,
6056 bool do_flush_and_sync)
6057 {
6058 uint error= 0;
6059 DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
6060
6061 if (!is_open())
6062 DBUG_RETURN(error);
6063
6064 if (need_lock_log)
6065 mysql_mutex_lock(&LOCK_log);
6066 else
6067 mysql_mutex_assert_owner(&LOCK_log);
6068
6069 // @todo make this work with the group log. /sven
6070
6071 error= ev->write(&log_file);
6072
6073 if (do_flush_and_sync)
6074 {
6075 if (!error && !(error= flush_and_sync()))
6076 {
6077 bool check_purge= false;
6078 signal_update();
6079 error= rotate(true, &check_purge);
6080 if (!error && check_purge)
6081 purge();
6082 }
6083 }
6084
6085 if (need_lock_log)
6086 mysql_mutex_unlock(&LOCK_log);
6087
6088 DBUG_RETURN(error);
6089 }
6090
write_dml_directly(THD * thd,const char * stmt,size_t stmt_len,enum_sql_command sql_command)6091 bool MYSQL_BIN_LOG::write_dml_directly(THD* thd, const char *stmt, size_t stmt_len,
6092 enum_sql_command sql_command)
6093 {
6094 bool ret= false;
6095 /* backup the original command */
6096 enum_sql_command save_sql_command= thd->lex->sql_command;
6097 thd->lex->sql_command= sql_command;
6098
6099 if (thd->binlog_query(THD::STMT_QUERY_TYPE, stmt, stmt_len,
6100 FALSE, FALSE, FALSE, 0) ||
6101 commit(thd, false) != TC_LOG::RESULT_SUCCESS)
6102 {
6103 ret= true;
6104 }
6105
6106 thd->lex->sql_command= save_sql_command;
6107 return ret;
6108 }
6109
6110
6111 /**
6112 Creates an incident event and writes it to the binary log.
6113
6114 @param thd Thread variable
6115 @param ev Incident event to be written
6116 @param lock If the binary lock should be locked or not
6117
6118 @retval
6119 0 error
6120 @retval
6121 1 success
6122 */
write_incident(THD * thd,bool need_lock_log,bool do_flush_and_sync)6123 bool MYSQL_BIN_LOG::write_incident(THD *thd, bool need_lock_log,
6124 bool do_flush_and_sync)
6125 {
6126 DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
6127
6128 if (!is_open())
6129 DBUG_RETURN(0);
6130
6131 LEX_STRING const write_error_msg=
6132 { C_STRING_WITH_LEN("error writing to the binary log") };
6133 Incident incident= INCIDENT_LOST_EVENTS;
6134 Incident_log_event ev(thd, incident, write_error_msg);
6135
6136 DBUG_RETURN(write_incident(&ev, need_lock_log, do_flush_and_sync));
6137 }
6138
6139 /**
6140 Write a cached log entry to the binary log.
6141
6142 @param thd Thread variable
6143 @param cache The cache to copy to the binlog
6144 @param incident Defines if an incident event should be created to
6145 notify that some non-transactional changes did
6146 not get into the binlog.
6147 @param prepared Defines if a transaction is part of a 2-PC.
6148
6149 @note
6150 We only come here if there is something in the cache.
6151 @note
6152 The thing in the cache is always a complete transaction.
6153 @note
6154 'cache' needs to be reinitialized after this functions returns.
6155 */
6156
write_cache(THD * thd,binlog_cache_data * cache_data)6157 bool MYSQL_BIN_LOG::write_cache(THD *thd, binlog_cache_data *cache_data)
6158 {
6159 DBUG_ENTER("MYSQL_BIN_LOG::write_cache(THD *, binlog_cache_data *, bool)");
6160
6161 IO_CACHE *cache= &cache_data->cache_log;
6162 bool incident= cache_data->has_incident();
6163
6164 DBUG_EXECUTE_IF("simulate_binlog_flush_error",
6165 {
6166 if (rand() % 3 == 0)
6167 {
6168 write_error=1;
6169 thd->commit_error= THD::CE_FLUSH_ERROR;
6170 DBUG_RETURN(0);
6171 }
6172 };);
6173
6174 mysql_mutex_assert_owner(&LOCK_log);
6175
6176 DBUG_ASSERT(is_open());
6177 if (likely(is_open())) // Should always be true
6178 {
6179 /*
6180 We only bother to write to the binary log if there is anything
6181 to write.
6182 */
6183 if (my_b_tell(cache) > 0)
6184 {
6185 DBUG_EXECUTE_IF("crash_before_writing_xid",
6186 {
6187 if ((write_error= do_write_cache(cache)))
6188 DBUG_PRINT("info", ("error writing binlog cache: %d",
6189 write_error));
6190 flush_and_sync(true);
6191 DBUG_PRINT("info", ("crashing before writing xid"));
6192 DBUG_SUICIDE();
6193 });
6194
6195 if ((write_error= do_write_cache(cache)))
6196 goto err;
6197
6198 if (incident && write_incident(thd, false/*need_lock_log=false*/,
6199 false/*do_flush_and_sync==false*/))
6200 goto err;
6201
6202 DBUG_EXECUTE_IF("half_binlogged_transaction", DBUG_SUICIDE(););
6203 if (cache->error) // Error on read
6204 {
6205 char errbuf[MYSYS_STRERROR_SIZE];
6206 sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name,
6207 errno, my_strerror(errbuf, sizeof(errbuf), errno));
6208 write_error=1; // Don't give more errors
6209 goto err;
6210 }
6211
6212 global_sid_lock->rdlock();
6213 if (gtid_state->update_on_flush(thd) != RETURN_STATUS_OK)
6214 {
6215 global_sid_lock->unlock();
6216 goto err;
6217 }
6218 global_sid_lock->unlock();
6219 }
6220 update_thd_next_event_pos(thd);
6221 }
6222
6223 DBUG_RETURN(0);
6224
6225 err:
6226 if (!write_error)
6227 {
6228 char errbuf[MYSYS_STRERROR_SIZE];
6229 write_error= 1;
6230 sql_print_error(ER(ER_ERROR_ON_WRITE), name,
6231 errno, my_strerror(errbuf, sizeof(errbuf), errno));
6232 }
6233
6234 /*
6235 If the flush has failed due to ENOSPC, set the flush_error flag.
6236 */
6237 if (cache->error && thd->is_error() && my_errno == ENOSPC)
6238 {
6239 cache_data->set_flush_error(thd);
6240 }
6241 thd->commit_error= THD::CE_FLUSH_ERROR;
6242
6243 DBUG_RETURN(1);
6244 }
6245
6246
6247 /**
6248 Wait until we get a signal that the relay log has been updated.
6249
6250 @param[in] thd Thread variable
6251 @param[in] timeout a pointer to a timespec;
6252 NULL means to wait w/o timeout.
6253
6254 @retval 0 if got signalled on update
6255 @retval non-0 if wait timeout elapsed
6256
6257 @note
6258 One must have a lock on LOCK_log before calling this function.
6259 */
6260
wait_for_update_relay_log(THD * thd,const struct timespec * timeout)6261 int MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd, const struct timespec *timeout)
6262 {
6263 int ret= 0;
6264 PSI_stage_info old_stage;
6265 DBUG_ENTER("wait_for_update_relay_log");
6266
6267 thd->ENTER_COND(&update_cond, &LOCK_log,
6268 &stage_slave_has_read_all_relay_log,
6269 &old_stage);
6270
6271 if (!timeout)
6272 mysql_cond_wait(&update_cond, &LOCK_log);
6273 else
6274 ret= mysql_cond_timedwait(&update_cond, &LOCK_log,
6275 const_cast<struct timespec *>(timeout));
6276 thd->EXIT_COND(&old_stage);
6277
6278 DBUG_RETURN(ret);
6279 }
6280
6281 /**
6282 Wait until we get a signal that the binary log has been updated.
6283 Applies to master only.
6284
6285 NOTES
6286 @param[in] thd a THD struct
6287 @param[in] timeout a pointer to a timespec;
6288 NULL means to wait w/o timeout.
6289 @retval 0 if got signalled on update
6290 @retval non-0 if wait timeout elapsed
6291 @note
6292 LOCK_log must be taken before calling this function.
6293 LOCK_log is being released while the thread is waiting.
6294 LOCK_log is released by the caller.
6295 */
6296
wait_for_update_bin_log(THD * thd,const struct timespec * timeout)6297 int MYSQL_BIN_LOG::wait_for_update_bin_log(THD* thd,
6298 const struct timespec *timeout)
6299 {
6300 int ret= 0;
6301 DBUG_ENTER("wait_for_update_bin_log");
6302
6303 if (!timeout)
6304 mysql_cond_wait(&update_cond, &LOCK_log);
6305 else
6306 ret= mysql_cond_timedwait(&update_cond, &LOCK_log,
6307 const_cast<struct timespec *>(timeout));
6308 DBUG_RETURN(ret);
6309 }
6310
6311
6312 /**
6313 Close the log file.
6314
6315 @param exiting Bitmask for one or more of the following bits:
6316 - LOG_CLOSE_INDEX : if we should close the index file
6317 - LOG_CLOSE_TO_BE_OPENED : if we intend to call open
6318 at once after close.
6319 - LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
6320
6321 @param need_lock_log If true, this function acquires LOCK_log;
6322 otherwise the caller should already have acquired it.
6323
6324 @param need_lock_index If true, this function acquires LOCK_index;
6325 otherwise the caller should already have acquired it.
6326
6327 @note
6328 One can do an open on the object at once after doing a close.
6329 The internal structures are not freed until cleanup() is called
6330 */
6331
close(uint exiting,bool need_lock_log,bool need_lock_index)6332 void MYSQL_BIN_LOG::close(uint exiting, bool need_lock_log,
6333 bool need_lock_index)
6334 { // One can't set log_type here!
6335 DBUG_ENTER("MYSQL_BIN_LOG::close");
6336 DBUG_PRINT("enter",("exiting: %d", (int) exiting));
6337
6338 if (need_lock_log)
6339 mysql_mutex_lock(&LOCK_log);
6340 else
6341 mysql_mutex_assert_owner(&LOCK_log);
6342
6343 if (log_state == LOG_OPENED)
6344 {
6345 #ifdef HAVE_REPLICATION
6346 if ((exiting & LOG_CLOSE_STOP_EVENT) != 0)
6347 {
6348 Stop_log_event s;
6349 // the checksumming rule for relay-log case is similar to Rotate
6350 s.checksum_alg= is_relay_log ?
6351 relay_log_checksum_alg : binlog_checksum_options;
6352 DBUG_ASSERT(!is_relay_log ||
6353 relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
6354 s.write(&log_file);
6355 bytes_written+= s.data_written;
6356 signal_update();
6357 }
6358 #endif /* HAVE_REPLICATION */
6359
6360 /* don't pwrite in a file opened with O_APPEND - it doesn't work */
6361 if (log_file.type == WRITE_CACHE)
6362 {
6363 my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
6364 my_off_t org_position= mysql_file_tell(log_file.file, MYF(0));
6365 uchar flags= 0; // clearing LOG_EVENT_BINLOG_IN_USE_F
6366 mysql_file_pwrite(log_file.file, &flags, 1, offset, MYF(0));
6367 /*
6368 Restore position so that anything we have in the IO_cache is written
6369 to the correct position.
6370 We need the seek here, as mysql_file_pwrite() is not guaranteed to keep the
6371 original position on system that doesn't support pwrite().
6372 */
6373 mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
6374 }
6375
6376 /* this will cleanup IO_CACHE, sync and close the file */
6377 MYSQL_LOG::close(exiting);
6378 }
6379
6380 /*
6381 The following test is needed even if is_open() is not set, as we may have
6382 called a not complete close earlier and the index file is still open.
6383 */
6384
6385 if (need_lock_index)
6386 mysql_mutex_lock(&LOCK_index);
6387 else
6388 mysql_mutex_assert_owner(&LOCK_index);
6389
6390 if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
6391 {
6392 end_io_cache(&index_file);
6393 if (mysql_file_close(index_file.file, MYF(0)) < 0 && ! write_error)
6394 {
6395 char errbuf[MYSYS_STRERROR_SIZE];
6396 write_error= 1;
6397 sql_print_error(ER(ER_ERROR_ON_WRITE), index_file_name,
6398 errno, my_strerror(errbuf, sizeof(errbuf), errno));
6399 }
6400 }
6401
6402 if (need_lock_index)
6403 mysql_mutex_unlock(&LOCK_index);
6404
6405 log_state= (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
6406 my_free(name);
6407 name= NULL;
6408
6409 if (need_lock_log)
6410 mysql_mutex_unlock(&LOCK_log);
6411
6412 DBUG_VOID_RETURN;
6413 }
6414
harvest_bytes_written(Relay_log_info * rli,bool need_log_space_lock)6415 void MYSQL_BIN_LOG::harvest_bytes_written(Relay_log_info* rli, bool need_log_space_lock)
6416 {
6417 #ifndef DBUG_OFF
6418 char buf1[22],buf2[22];
6419 #endif
6420 DBUG_ENTER("harvest_bytes_written");
6421 if (need_log_space_lock)
6422 mysql_mutex_lock(&rli->log_space_lock);
6423 else
6424 mysql_mutex_assert_owner(&rli->log_space_lock);
6425 rli->log_space_total+= bytes_written;
6426 DBUG_PRINT("info",("relay_log_space: %s bytes_written: %s",
6427 llstr(rli->log_space_total,buf1), llstr(bytes_written,buf2)));
6428 bytes_written=0;
6429 if (need_log_space_lock)
6430 mysql_mutex_unlock(&rli->log_space_lock);
6431 DBUG_VOID_RETURN;
6432 }
6433
set_max_size(ulong max_size_arg)6434 void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg)
6435 {
6436 /*
6437 We need to take locks, otherwise this may happen:
6438 new_file() is called, calls open(old_max_size), then before open() starts,
6439 set_max_size() sets max_size to max_size_arg, then open() starts and
6440 uses the old_max_size argument, so max_size_arg has been overwritten and
6441 it's like if the SET command was never run.
6442 */
6443 DBUG_ENTER("MYSQL_BIN_LOG::set_max_size");
6444 mysql_mutex_lock(&LOCK_log);
6445 if (is_open())
6446 max_size= max_size_arg;
6447 mysql_mutex_unlock(&LOCK_log);
6448 DBUG_VOID_RETURN;
6449 }
6450
6451
signal_update()6452 void MYSQL_BIN_LOG::signal_update()
6453 {
6454 DBUG_ENTER("MYSQL_BIN_LOG::signal_update");
6455 signal_cnt++;
6456 mysql_cond_broadcast(&update_cond);
6457 DBUG_VOID_RETURN;
6458 }
6459
6460 /****** transaction coordinator log for 2pc - binlog() based solution ******/
6461
6462 /**
6463 @todo
6464 keep in-memory list of prepared transactions
6465 (add to list in log(), remove on unlog())
6466 and copy it to the new binlog if rotated
6467 but let's check the behaviour of tc_log_page_waits first!
6468 */
6469
open_binlog(const char * opt_name)6470 int MYSQL_BIN_LOG::open_binlog(const char *opt_name)
6471 {
6472 LOG_INFO log_info;
6473 int error= 1;
6474
6475 /*
6476 This function is used for 2pc transaction coordination. Hence, it
6477 is never used for relay logs.
6478 */
6479 DBUG_ASSERT(!is_relay_log);
6480 DBUG_ASSERT(total_ha_2pc > 1 || (1 == total_ha_2pc && opt_bin_log));
6481 DBUG_ASSERT(opt_name && opt_name[0]);
6482
6483 if (!my_b_inited(&index_file))
6484 {
6485 /* There was a failure to open the index file, can't open the binlog */
6486 cleanup();
6487 return 1;
6488 }
6489
6490 if (using_heuristic_recover())
6491 {
6492 /* generate a new binlog to mask a corrupted one */
6493 open_binlog(opt_name, 0, WRITE_CACHE, max_binlog_size, false,
6494 true/*need_lock_log=true*/,
6495 true/*need_lock_index=true*/,
6496 true/*need_sid_lock=true*/,
6497 NULL);
6498 cleanup();
6499 return 1;
6500 }
6501
6502 if ((error= find_log_pos(&log_info, NullS, true/*need_lock_index=true*/)))
6503 {
6504 if (error != LOG_INFO_EOF)
6505 sql_print_error("find_log_pos() failed (error: %d)", error);
6506 else
6507 error= 0;
6508 goto err;
6509 }
6510
6511 {
6512 const char *errmsg;
6513 IO_CACHE log;
6514 File file;
6515 Log_event *ev=0;
6516 Format_description_log_event fdle(BINLOG_VERSION);
6517 char log_name[FN_REFLEN];
6518 my_off_t valid_pos= 0;
6519 my_off_t binlog_size;
6520 MY_STAT s;
6521
6522 if (! fdle.is_valid())
6523 goto err;
6524
6525 do
6526 {
6527 strmake(log_name, log_info.log_file_name, sizeof(log_name)-1);
6528 } while (!(error= find_next_log(&log_info, true/*need_lock_index=true*/)));
6529
6530 if (error != LOG_INFO_EOF)
6531 {
6532 sql_print_error("find_log_pos() failed (error: %d)", error);
6533 goto err;
6534 }
6535
6536 if ((file= open_binlog_file(&log, log_name, &errmsg)) < 0)
6537 {
6538 sql_print_error("%s", errmsg);
6539 goto err;
6540 }
6541
6542 my_stat(log_name, &s, MYF(0));
6543 binlog_size= s.st_size;
6544
6545 if ((ev= Log_event::read_log_event(&log, 0, &fdle,
6546 opt_master_verify_checksum)) &&
6547 ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
6548 ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
6549 {
6550 sql_print_information("Recovering after a crash using %s", opt_name);
6551 valid_pos= my_b_tell(&log);
6552 error= recover(&log, (Format_description_log_event *)ev, &valid_pos);
6553 }
6554 else
6555 error=0;
6556
6557 delete ev;
6558 end_io_cache(&log);
6559 mysql_file_close(file, MYF(MY_WME));
6560
6561 if (error)
6562 goto err;
6563
6564 /* Trim the crashed binlog file to last valid transaction
6565 or event (non-transaction) base on valid_pos. */
6566 if (valid_pos > 0)
6567 {
6568 if ((file= mysql_file_open(key_file_binlog, log_name,
6569 O_RDWR | O_BINARY, MYF(MY_WME))) < 0)
6570 {
6571 sql_print_error("Failed to open the crashed binlog file "
6572 "when master server is recovering it.");
6573 return -1;
6574 }
6575
6576 /* Change binlog file size to valid_pos */
6577 if (valid_pos < binlog_size)
6578 {
6579 if (my_chsize(file, valid_pos, 0, MYF(MY_WME)))
6580 {
6581 sql_print_error("Failed to trim the crashed binlog file "
6582 "when master server is recovering it.");
6583 mysql_file_close(file, MYF(MY_WME));
6584 return -1;
6585 }
6586 else
6587 {
6588 sql_print_information("Crashed binlog file %s size is %llu, "
6589 "but recovered up to %llu. Binlog trimmed to %llu bytes.",
6590 log_name, binlog_size, valid_pos, valid_pos);
6591 }
6592 }
6593
6594 /* Clear LOG_EVENT_BINLOG_IN_USE_F */
6595 my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
6596 uchar flags= 0;
6597 if (mysql_file_pwrite(file, &flags, 1, offset, MYF(0)) != 1)
6598 {
6599 sql_print_error("Failed to clear LOG_EVENT_BINLOG_IN_USE_F "
6600 "for the crashed binlog file when master "
6601 "server is recovering it.");
6602 mysql_file_close(file, MYF(MY_WME));
6603 return -1;
6604 }
6605
6606 mysql_file_close(file, MYF(MY_WME));
6607 } //end if
6608 }
6609
6610 err:
6611 return error;
6612 }
6613
6614 /** This is called on shutdown, after ha_panic. */
close()6615 void MYSQL_BIN_LOG::close()
6616 {
6617 }
6618
6619 /*
6620 Prepare the transaction in the transaction coordinator.
6621
6622 This function will prepare the transaction in the storage engines
6623 (by calling @c ha_prepare_low) what will write a prepare record
6624 to the log buffers.
6625
6626 @retval 0 success
6627 @retval 1 error
6628 */
prepare(THD * thd,bool all)6629 int MYSQL_BIN_LOG::prepare(THD *thd, bool all)
6630 {
6631 DBUG_ENTER("MYSQL_BIN_LOG::prepare");
6632
6633 int error= ha_prepare_low(thd, all);
6634
6635 DBUG_RETURN(error);
6636 }
6637
6638 /**
6639 Commit the transaction in the transaction coordinator.
6640
6641 This function will commit the sessions transaction in the binary log
6642 and in the storage engines (by calling @c ha_commit_low). If the
6643 transaction was successfully logged (or not successfully unlogged)
6644 but the commit in the engines did not succed, there is a risk of
6645 inconsistency between the engines and the binary log.
6646
6647 For binary log group commit, the commit is separated into three
6648 parts:
6649
6650 1. First part consists of filling the necessary caches and
6651 finalizing them (if they need to be finalized). After this,
6652 nothing is added to any of the caches.
6653
6654 2. Second part execute an ordered flush and commit. This will be
6655 done using the group commit functionality in ordered_commit.
6656
6657 3. Third part checks any errors resulting from the ordered commit
6658 and handles them appropriately.
6659
6660 @retval 0 success
6661 @retval 1 error, transaction was neither logged nor committed
6662 @retval 2 error, transaction was logged but not committed
6663 */
commit(THD * thd,bool all)6664 TC_LOG::enum_result MYSQL_BIN_LOG::commit(THD *thd, bool all)
6665 {
6666 DBUG_ENTER("MYSQL_BIN_LOG::commit");
6667
6668 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
6669 my_xid xid= thd->transaction.xid_state.xid.get_my_xid();
6670 int error= RESULT_SUCCESS;
6671 bool stuff_logged= false;
6672
6673 DBUG_PRINT("enter", ("thd: 0x%llx, all: %s, xid: %llu, cache_mngr: 0x%llx",
6674 (ulonglong) thd, YESNO(all), (ulonglong) xid,
6675 (ulonglong) cache_mngr));
6676
6677 /*
6678 No cache manager means nothing to log, but we still have to commit
6679 the transaction.
6680 */
6681 if (cache_mngr == NULL)
6682 {
6683 if (ha_commit_low(thd, all))
6684 DBUG_RETURN(RESULT_ABORTED);
6685 DBUG_RETURN(RESULT_SUCCESS);
6686 }
6687
6688 THD_TRANS *trans= all ? &thd->transaction.all : &thd->transaction.stmt;
6689
6690 DBUG_PRINT("debug", ("in_transaction: %s, no_2pc: %s, rw_ha_count: %d",
6691 YESNO(thd->in_multi_stmt_transaction_mode()),
6692 YESNO(trans->no_2pc),
6693 trans->rw_ha_count));
6694 DBUG_PRINT("debug",
6695 ("all.cannot_safely_rollback(): %s, trx_cache_empty: %s",
6696 YESNO(thd->transaction.all.cannot_safely_rollback()),
6697 YESNO(cache_mngr->trx_cache.is_binlog_empty())));
6698 DBUG_PRINT("debug",
6699 ("stmt.cannot_safely_rollback(): %s, stmt_cache_empty: %s",
6700 YESNO(thd->transaction.stmt.cannot_safely_rollback()),
6701 YESNO(cache_mngr->stmt_cache.is_binlog_empty())));
6702
6703
6704 /*
6705 If there are no handlertons registered, there is nothing to
6706 commit. Note that DDLs are written earlier in this case (inside
6707 binlog_query).
6708
6709 TODO: This can be a problem in those cases that there are no
6710 handlertons registered. DDLs are one example, but the other case
6711 is MyISAM. In this case, we could register a dummy handlerton to
6712 trigger the commit.
6713
6714 Any statement that requires logging will call binlog_query before
6715 trans_commit_stmt, so an alternative is to use the condition
6716 "binlog_query called or stmt.ha_list != 0".
6717 */
6718 if (!all && trans->ha_list == 0 &&
6719 cache_mngr->stmt_cache.is_binlog_empty())
6720 DBUG_RETURN(RESULT_SUCCESS);
6721
6722 /*
6723 If there is anything in the stmt cache, and GTIDs are enabled,
6724 then this is a single statement outside a transaction and it is
6725 impossible that there is anything in the trx cache. Hence, we
6726 write any empty group(s) to the stmt cache.
6727
6728 Otherwise, we write any empty group(s) to the trx cache at the end
6729 of the transaction.
6730 */
6731 if (!cache_mngr->stmt_cache.is_binlog_empty())
6732 {
6733 error= write_empty_groups_to_cache(thd, &cache_mngr->stmt_cache);
6734 if (error == 0)
6735 {
6736 if (cache_mngr->stmt_cache.finalize(thd))
6737 DBUG_RETURN(RESULT_ABORTED);
6738 stuff_logged= true;
6739 }
6740 }
6741
6742 /*
6743 We commit the transaction if:
6744 - We are not in a transaction and committing a statement, or
6745 - We are in a transaction and a full transaction is committed.
6746 Otherwise, we accumulate the changes.
6747 */
6748 if (!error && !cache_mngr->trx_cache.is_binlog_empty() &&
6749 ending_trans(thd, all))
6750 {
6751 const bool real_trans= (all || thd->transaction.all.ha_list == 0);
6752 /*
6753 We are committing an XA transaction if it is a "real" transaction
6754 and have an XID assigned (because some handlerton registered). A
6755 transaction is "real" if either 'all' is true or the 'all.ha_list'
6756 is empty.
6757
6758 Note: This is kind of strange since registering the binlog
6759 handlerton will then make the transaction XA, which is not really
6760 true. This occurs for example if a MyISAM statement is executed
6761 with row-based replication on.
6762 */
6763 if (real_trans && xid && trans->rw_ha_count > 1 && !trans->no_2pc)
6764 {
6765 Xid_log_event end_evt(thd, xid);
6766 if (cache_mngr->trx_cache.finalize(thd, &end_evt))
6767 DBUG_RETURN(RESULT_ABORTED);
6768 }
6769 else
6770 {
6771 Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
6772 true, FALSE, TRUE, 0, TRUE);
6773 if (cache_mngr->trx_cache.finalize(thd, &end_evt))
6774 DBUG_RETURN(RESULT_ABORTED);
6775 }
6776 stuff_logged= true;
6777 }
6778
6779 /*
6780 This is part of the stmt rollback.
6781 */
6782 if (!all)
6783 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
6784
6785 DBUG_PRINT("debug", ("error: %d", error));
6786
6787 if (error)
6788 DBUG_RETURN(RESULT_ABORTED);
6789
6790 /*
6791 Now all the events are written to the caches, so we will commit
6792 the transaction in the engines. This is done using the group
6793 commit logic in ordered_commit, which will return when the
6794 transaction is committed.
6795
6796 If the commit in the engines fail, we still have something logged
6797 to the binary log so we have to report this as a "bad" failure
6798 (failed to commit, but logged something).
6799 */
6800 if (stuff_logged)
6801 {
6802 if (ordered_commit(thd, all))
6803 DBUG_RETURN(RESULT_INCONSISTENT);
6804 }
6805 else
6806 {
6807 if (ha_commit_low(thd, all))
6808 DBUG_RETURN(RESULT_INCONSISTENT);
6809 }
6810
6811 DBUG_RETURN(error ? RESULT_INCONSISTENT : RESULT_SUCCESS);
6812 }
6813
6814
6815 /**
6816 Flush caches for session.
6817
6818 @note @c set_trans_pos is called with a pointer to the file name
6819 that the binary log currently use and a rotation will change the
6820 contents of the variable.
6821
6822 The position is used when calling the after_flush, after_commit,
6823 and after_rollback hooks, but these have been placed so that they
6824 occur before a rotation is executed.
6825
6826 It is the responsibility of any plugin that use this position to
6827 copy it if they need it after the hook has returned.
6828 */
6829 std::pair<int,my_off_t>
flush_thread_caches(THD * thd)6830 MYSQL_BIN_LOG::flush_thread_caches(THD *thd)
6831 {
6832 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
6833 my_off_t bytes= 0;
6834 bool wrote_xid= false;
6835 int error= cache_mngr->flush(thd, &bytes, &wrote_xid);
6836 if (!error && bytes > 0)
6837 {
6838 /*
6839 Note that set_trans_pos does not copy the file name. See
6840 this function documentation for more info.
6841 */
6842 thd->set_trans_pos(log_file_name, my_b_tell(&log_file));
6843 if (wrote_xid)
6844 inc_prep_xids(thd);
6845 }
6846 DBUG_PRINT("debug", ("bytes: %llu", bytes));
6847 return std::make_pair(error, bytes);
6848 }
6849
6850
6851 /**
6852 Execute the flush stage.
6853
6854 @param total_bytes_var Pointer to variable that will be set to total
6855 number of bytes flushed, or NULL.
6856
6857 @param rotate_var Pointer to variable that will be set to true if
6858 binlog rotation should be performed after releasing locks. If rotate
6859 is not necessary, the variable will not be touched.
6860
6861 @return Error code on error, zero on success
6862 */
6863
6864 int
process_flush_stage_queue(my_off_t * total_bytes_var,bool * rotate_var,THD ** out_queue_var)6865 MYSQL_BIN_LOG::process_flush_stage_queue(my_off_t *total_bytes_var,
6866 bool *rotate_var,
6867 THD **out_queue_var)
6868 {
6869 DBUG_ASSERT(total_bytes_var && rotate_var && out_queue_var);
6870 my_off_t total_bytes= 0;
6871 int flush_error= 1;
6872 mysql_mutex_assert_owner(&LOCK_log);
6873
6874 my_atomic_rwlock_rdlock(&opt_binlog_max_flush_queue_time_lock);
6875 const ulonglong max_udelay= my_atomic_load32(&opt_binlog_max_flush_queue_time);
6876 my_atomic_rwlock_rdunlock(&opt_binlog_max_flush_queue_time_lock);
6877 const ulonglong start_utime= max_udelay > 0 ? my_micro_time() : 0;
6878
6879 /*
6880 First we read the queue until it either is empty or the difference
6881 between the time we started and the current time is too large.
6882
6883 We remember the first thread we unqueued, because this will be the
6884 beginning of the out queue.
6885 */
6886 bool has_more= true;
6887 THD *first_seen= NULL;
6888 while ((max_udelay == 0 || my_micro_time() < start_utime + max_udelay) && has_more)
6889 {
6890 std::pair<bool,THD*> current= stage_manager.pop_front(Stage_manager::FLUSH_STAGE);
6891 std::pair<int,my_off_t> result= flush_thread_caches(current.second);
6892 has_more= current.first;
6893 total_bytes+= result.second;
6894 if (flush_error == 1)
6895 flush_error= result.first;
6896 if (first_seen == NULL)
6897 first_seen= current.second;
6898 }
6899
6900 /*
6901 Either the queue is empty, or we ran out of time. If we ran out of
6902 time, we have to fetch the entire queue (and flush it) since
6903 otherwise the next batch will not have a leader.
6904 */
6905 if (has_more)
6906 {
6907 THD *queue= stage_manager.fetch_queue_for(Stage_manager::FLUSH_STAGE);
6908 for (THD *head= queue ; head ; head = head->next_to_commit)
6909 {
6910 std::pair<int,my_off_t> result= flush_thread_caches(head);
6911 total_bytes+= result.second;
6912 if (flush_error == 1)
6913 flush_error= result.first;
6914 }
6915 if (first_seen == NULL)
6916 first_seen= queue;
6917 }
6918
6919 *out_queue_var= first_seen;
6920 *total_bytes_var= total_bytes;
6921 if (total_bytes > 0 && my_b_tell(&log_file) >= (my_off_t) max_size)
6922 *rotate_var= true;
6923 return flush_error;
6924 }
6925
6926
6927 /**
6928 Commit a sequence of sessions.
6929
6930 This function commit an entire queue of sessions starting with the
6931 session in @c first. If there were an error in the flushing part of
6932 the ordered commit, the error code is passed in and all the threads
6933 are marked accordingly (but not committed).
6934
6935 @see MYSQL_BIN_LOG::ordered_commit
6936
6937 @param thd The "master" thread
6938 @param first First thread in the queue of threads to commit
6939 */
6940
6941 void
process_commit_stage_queue(THD * thd,THD * first)6942 MYSQL_BIN_LOG::process_commit_stage_queue(THD *thd, THD *first)
6943 {
6944 mysql_mutex_assert_owner(&LOCK_commit);
6945 Thread_excursion excursion(thd);
6946 #ifndef DBUG_OFF
6947 thd->transaction.flags.ready_preempt= 1; // formality by the leader
6948 #endif
6949 for (THD *head= first ; head ; head = head->next_to_commit)
6950 {
6951 DBUG_PRINT("debug", ("Thread ID: %lu, commit_error: %d, flags.pending: %s",
6952 head->thread_id, head->commit_error,
6953 YESNO(head->transaction.flags.pending)));
6954 /*
6955 If flushing failed, set commit_error for the session, skip the
6956 transaction and proceed with the next transaction instead. This
6957 will mark all threads as failed, since the flush failed.
6958
6959 If flush succeeded, attach to the session and commit it in the
6960 engines.
6961 */
6962 #ifndef DBUG_OFF
6963 stage_manager.clear_preempt_status(head);
6964 #endif
6965 /*
6966 Flush/Sync error should be ignored and continue
6967 to commit phase. And thd->commit_error cannot be
6968 COMMIT_ERROR at this moment.
6969 */
6970 DBUG_ASSERT(head->commit_error != THD::CE_COMMIT_ERROR);
6971 excursion.try_to_attach_to(head);
6972 bool all= head->transaction.flags.real_commit;
6973 if (head->transaction.flags.commit_low)
6974 {
6975 /* head is parked to have exited append() */
6976 DBUG_ASSERT(head->transaction.flags.ready_preempt);
6977 /*
6978 storage engine commit
6979 */
6980 if (ha_commit_low(head, all, false))
6981 head->commit_error= THD::CE_COMMIT_ERROR;
6982 }
6983 DBUG_PRINT("debug", ("commit_error: %d, flags.pending: %s",
6984 head->commit_error,
6985 YESNO(head->transaction.flags.pending)));
6986 /*
6987 Decrement the prepared XID counter after storage engine commit.
6988 We also need decrement the prepared XID when encountering a
6989 flush error or session attach error for avoiding 3-way deadlock
6990 among user thread, rotate thread and dump thread.
6991 */
6992 if (head->transaction.flags.xid_written)
6993 dec_prep_xids(head);
6994 }
6995 }
6996
6997 /**
6998 Process after commit for a sequence of sessions.
6999
7000 @param thd The "master" thread
7001 @param first First thread in the queue of threads to commit
7002 */
7003
7004 void
process_after_commit_stage_queue(THD * thd,THD * first)7005 MYSQL_BIN_LOG::process_after_commit_stage_queue(THD *thd, THD *first)
7006 {
7007 Thread_excursion excursion(thd);
7008 for (THD *head= first; head; head= head->next_to_commit)
7009 {
7010 if (head->transaction.flags.run_hooks &&
7011 head->commit_error != THD::CE_COMMIT_ERROR)
7012 {
7013
7014 /*
7015 TODO: This hook here should probably move outside/below this
7016 if and be the only after_commit invocation left in the
7017 code.
7018 */
7019 excursion.try_to_attach_to(head);
7020 bool all= head->transaction.flags.real_commit;
7021 (void) RUN_HOOK(transaction, after_commit, (head, all));
7022 /*
7023 When after_commit finished for the transaction, clear the run_hooks flag.
7024 This allow other parts of the system to check if after_commit was called.
7025 */
7026 head->transaction.flags.run_hooks= false;
7027 }
7028 }
7029 }
7030
7031 #ifndef DBUG_OFF
7032 /** Names for the stages. */
7033 static const char* g_stage_name[] = {
7034 "FLUSH",
7035 "SYNC",
7036 "COMMIT",
7037 };
7038 #endif
7039
7040
7041 /**
7042 Enter a stage of the ordered commit procedure.
7043
7044 Entering is stage is done by:
7045
7046 - Atomically enqueueing a queue of processes (which is just one for
7047 the first phase).
7048
7049 - If the queue was empty, the thread is the leader for that stage
7050 and it should process the entire queue for that stage.
7051
7052 - If the queue was not empty, the thread is a follower and can go
7053 waiting for the commit to finish.
7054
7055 The function will lock the stage mutex if it was designated the
7056 leader for the phase.
7057
7058 @param thd Session structure
7059 @param stage The stage to enter
7060 @param queue Queue of threads to enqueue for the stage
7061 @param stage_mutex Mutex for the stage
7062
7063 @retval true The thread should "bail out" and go waiting for the
7064 commit to finish
7065 @retval false The thread is the leader for the stage and should do
7066 the processing.
7067 */
7068
7069 bool
change_stage(THD * thd,Stage_manager::StageID stage,THD * queue,mysql_mutex_t * leave_mutex,mysql_mutex_t * enter_mutex)7070 MYSQL_BIN_LOG::change_stage(THD *thd,
7071 Stage_manager::StageID stage, THD *queue,
7072 mysql_mutex_t *leave_mutex,
7073 mysql_mutex_t *enter_mutex)
7074 {
7075 DBUG_ENTER("MYSQL_BIN_LOG::change_stage");
7076 DBUG_PRINT("enter", ("thd: 0x%llx, stage: %s, queue: 0x%llx",
7077 (ulonglong) thd, g_stage_name[stage], (ulonglong) queue));
7078 DBUG_ASSERT(0 <= stage && stage < Stage_manager::STAGE_COUNTER);
7079 DBUG_ASSERT(enter_mutex);
7080 DBUG_ASSERT(queue);
7081 /*
7082 enroll_for will release the leave_mutex once the sessions are
7083 queued.
7084 */
7085 if (!stage_manager.enroll_for(stage, queue, leave_mutex))
7086 {
7087 DBUG_ASSERT(!thd_get_cache_mngr(thd)->dbug_any_finalized());
7088 DBUG_RETURN(true);
7089 }
7090 mysql_mutex_lock(enter_mutex);
7091 DBUG_RETURN(false);
7092 }
7093
7094
7095
7096 /**
7097 Flush the I/O cache to file.
7098
7099 Flush the binary log to the binlog file if any byte where written
7100 and signal that the binary log file has been updated if the flush
7101 succeeds.
7102 */
7103
7104 int
flush_cache_to_file(my_off_t * end_pos_var)7105 MYSQL_BIN_LOG::flush_cache_to_file(my_off_t *end_pos_var)
7106 {
7107 if (flush_io_cache(&log_file))
7108 {
7109 THD *thd= current_thd;
7110 thd->commit_error= THD::CE_FLUSH_ERROR;
7111 return ER_ERROR_ON_WRITE;
7112 }
7113 *end_pos_var= my_b_tell(&log_file);
7114 return 0;
7115 }
7116
7117
7118 /**
7119 Call fsync() to sync the file to disk.
7120 */
7121 std::pair<bool, bool>
sync_binlog_file(bool force)7122 MYSQL_BIN_LOG::sync_binlog_file(bool force)
7123 {
7124 bool synced= false;
7125 unsigned int sync_period= get_sync_period();
7126 if (force || (sync_period && ++sync_counter >= sync_period))
7127 {
7128 sync_counter= 0;
7129
7130 /**
7131 On *pure non-transactional* workloads there is a small window
7132 in time where a concurrent rotate might be able to close
7133 the file before the sync is actually done. In that case,
7134 ignore the bad file descriptor errors.
7135
7136 Transactional workloads (InnoDB) are not affected since the
7137 the rotation will not happen until all transactions have
7138 committed to the storage engine, thence decreased the XID
7139 counters.
7140
7141 TODO: fix this properly even for non-transactional storage
7142 engines.
7143 */
7144 if (DBUG_EVALUATE_IF("simulate_error_during_sync_binlog_file", 1,
7145 mysql_file_sync(log_file.file,
7146 MYF(MY_WME | MY_IGNORE_BADFD))))
7147 {
7148 THD *thd= current_thd;
7149 thd->commit_error= THD::CE_SYNC_ERROR;
7150 return std::make_pair(true, synced);
7151 }
7152 synced= true;
7153 }
7154 return std::make_pair(false, synced);
7155 }
7156
7157
7158 /**
7159 Helper function executed when leaving @c ordered_commit.
7160
7161 This function contain the necessary code for fetching the error
7162 code, doing post-commit checks, and wrapping up the commit if
7163 necessary.
7164
7165 It is typically called when enter_stage indicates that the thread
7166 should bail out, and also when the ultimate leader thread finishes
7167 executing @c ordered_commit.
7168
7169 It is typically used in this manner:
7170 @code
7171 if (enter_stage(thd, Thread_queue::FLUSH_STAGE, thd, &LOCK_log))
7172 return finish_commit(thd);
7173 @endcode
7174
7175 @return Error code if the session commit failed, or zero on
7176 success.
7177 */
7178 int
finish_commit(THD * thd)7179 MYSQL_BIN_LOG::finish_commit(THD *thd)
7180 {
7181 /*
7182 In some unlikely situations, it can happen that binary
7183 log is closed before the thread flushes it's cache.
7184 In that case, clear the caches before doing commit.
7185 */
7186 if (unlikely(!is_open()))
7187 {
7188 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
7189 if (cache_mngr)
7190 cache_mngr->reset();
7191 }
7192 if (thd->transaction.flags.commit_low)
7193 {
7194 const bool all= thd->transaction.flags.real_commit;
7195 /*
7196 storage engine commit
7197 */
7198 DBUG_ASSERT(thd->commit_error != THD::CE_COMMIT_ERROR);
7199 if (ha_commit_low(thd, all, false))
7200 thd->commit_error= THD::CE_COMMIT_ERROR;
7201 /*
7202 Decrement the prepared XID counter after storage engine commit
7203 */
7204 if (thd->transaction.flags.xid_written)
7205 dec_prep_xids(thd);
7206 /*
7207 If commit succeeded, we call the after_commit hook
7208
7209 TODO: This hook here should probably move outside/below this
7210 if and be the only after_commit invocation left in the
7211 code.
7212 */
7213 if ((thd->commit_error != THD::CE_COMMIT_ERROR ) && thd->transaction.flags.run_hooks)
7214 {
7215 (void) RUN_HOOK(transaction, after_commit, (thd, all));
7216 thd->transaction.flags.run_hooks= false;
7217 }
7218 }
7219 else if (thd->transaction.flags.xid_written)
7220 dec_prep_xids(thd);
7221
7222 /*
7223 Remove committed GTID from owned_gtids, it was already logged on
7224 MYSQL_BIN_LOG::write_cache().
7225 */
7226 global_sid_lock->rdlock();
7227 gtid_state->update_on_commit(thd);
7228 global_sid_lock->unlock();
7229
7230 DBUG_ASSERT(thd->commit_error || !thd->transaction.flags.run_hooks);
7231 DBUG_ASSERT(!thd_get_cache_mngr(thd)->dbug_any_finalized());
7232 DBUG_PRINT("return", ("Thread ID: %lu, commit_error: %d",
7233 thd->thread_id, thd->commit_error));
7234 /*
7235 flush or sync errors are handled by the leader of the group
7236 (using binlog_error_action). Hence treat only COMMIT_ERRORs as errors.
7237 */
7238 return (thd->commit_error == THD::CE_COMMIT_ERROR);
7239 }
7240
7241 /**
7242 Helper function to handle flush or sync stage errors.
7243 If binlog_error_action= ABORT_SERVER, server will be aborted
7244 after reporting the error to the client.
7245 If binlog_error_action= IGNORE_ERROR, binlog will be closed
7246 for the life time of the server. close() call is protected
7247 with LOCK_log to avoid any parallel operations on binary log.
7248
7249 @param thd Thread object that faced flush/sync error
7250 @param need_lock_log
7251 > Indicates true if LOCk_log is needed before closing
7252 binlog (happens when we are handling sync error)
7253 > Indicates false if LOCK_log is already acquired
7254 by the thread (happens when we are handling flush
7255 error)
7256
7257 @return void
7258 */
handle_binlog_flush_or_sync_error(THD * thd,bool need_lock_log)7259 void MYSQL_BIN_LOG::handle_binlog_flush_or_sync_error(THD *thd,
7260 bool need_lock_log)
7261 {
7262 char errmsg[MYSQL_ERRMSG_SIZE];
7263 sprintf(errmsg, "An error occurred during %s stage of the commit. "
7264 "'binlog_error_action' is set to '%s'.",
7265 thd->commit_error== THD::CE_FLUSH_ERROR ? "flush" : "sync",
7266 binlog_error_action == ABORT_SERVER ? "ABORT_SERVER" : "IGNORE_ERROR");
7267 if (binlog_error_action == ABORT_SERVER)
7268 {
7269 char err_buff[MYSQL_ERRMSG_SIZE];
7270 sprintf(err_buff, "%s Hence aborting the server.", errmsg);
7271 exec_binlog_error_action_abort(err_buff);
7272 }
7273 else
7274 {
7275 DEBUG_SYNC(thd, "before_binlog_closed_due_to_error");
7276 if (need_lock_log)
7277 mysql_mutex_lock(&LOCK_log);
7278 else
7279 mysql_mutex_assert_owner(&LOCK_log);
7280 /*
7281 It can happen that other group leader encountered
7282 error and already closed the binary log. So print
7283 error only if it is in open state. But we should
7284 call close() always just in case if the previous
7285 close did not close index file.
7286 */
7287 if (is_open())
7288 {
7289 sql_print_error("%s Hence turning logging off for the whole duration "
7290 "of the MySQL server process. To turn it on again: fix "
7291 "the cause, shutdown the MySQL server and restart it.",
7292 errmsg);
7293 }
7294 close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT, false/*need_lock_log=false*/,
7295 true/*need_lock_index=true*/);
7296 /*
7297 If there is a write error (flush/sync stage) and if
7298 binlog_error_action=IGNORE_ERROR, clear the error
7299 and allow the commit to happen in storage engine.
7300 */
7301 if (check_write_error(thd))
7302 thd->clear_error();
7303
7304 if (need_lock_log)
7305 mysql_mutex_unlock(&LOCK_log);
7306 DEBUG_SYNC(thd, "after_binlog_closed_due_to_error");
7307 }
7308 }
7309 /**
7310 Flush and commit the transaction.
7311
7312 This will execute an ordered flush and commit of all outstanding
7313 transactions and is the main function for the binary log group
7314 commit logic. The function performs the ordered commit in two
7315 phases.
7316
7317 The first phase flushes the caches to the binary log and under
7318 LOCK_log and marks all threads that were flushed as not pending.
7319
7320 The second phase executes under LOCK_commit and commits all
7321 transactions in order.
7322
7323 The procedure is:
7324
7325 1. Queue ourselves for flushing.
7326 2. Grab the log lock, which might result is blocking if the mutex is
7327 already held by another thread.
7328 3. If we were not committed while waiting for the lock
7329 1. Fetch the queue
7330 2. For each thread in the queue:
7331 a. Attach to it
7332 b. Flush the caches, saving any error code
7333 3. Flush and sync (depending on the value of sync_binlog).
7334 4. Signal that the binary log was updated
7335 4. Release the log lock
7336 5. Grab the commit lock
7337 1. For each thread in the queue:
7338 a. If there were no error when flushing and the transaction shall be committed:
7339 - Commit the transaction, saving the result of executing the commit.
7340 6. Release the commit lock
7341 7. Call purge, if any of the committed thread requested a purge.
7342 8. Return with the saved error code
7343
7344 @todo The use of @c skip_commit is a hack that we use since the @c
7345 TC_LOG Interface does not contain functions to handle
7346 savepoints. Once the binary log is eliminated as a handlerton and
7347 the @c TC_LOG interface is extended with savepoint handling, this
7348 parameter can be removed.
7349
7350 @param thd Session to commit transaction for
7351 @param all This is @c true if this is a real transaction commit, and
7352 @c false otherwise.
7353 @param skip_commit
7354 This is @c true if the call to @c ha_commit_low should
7355 be skipped (it is handled by the caller somehow) and @c
7356 false otherwise (the normal case).
7357 */
ordered_commit(THD * thd,bool all,bool skip_commit)7358 int MYSQL_BIN_LOG::ordered_commit(THD *thd, bool all, bool skip_commit)
7359 {
7360 DBUG_ENTER("MYSQL_BIN_LOG::ordered_commit");
7361 int flush_error= 0, sync_error= 0;
7362 my_off_t total_bytes= 0;
7363 bool do_rotate= false;
7364
7365 /*
7366 These values are used while flushing a transaction, so clear
7367 everything.
7368
7369 Notes:
7370
7371 - It would be good if we could keep transaction coordinator
7372 log-specific data out of the THD structure, but that is not the
7373 case right now.
7374
7375 - Everything in the transaction structure is reset when calling
7376 ha_commit_low since that calls st_transaction::cleanup.
7377 */
7378 thd->transaction.flags.pending= true;
7379 thd->commit_error= THD::CE_NONE;
7380 thd->next_to_commit= NULL;
7381 thd->durability_property= HA_IGNORE_DURABILITY;
7382 thd->transaction.flags.real_commit= all;
7383 thd->transaction.flags.xid_written= false;
7384 thd->transaction.flags.commit_low= !skip_commit;
7385 thd->transaction.flags.run_hooks= !skip_commit;
7386 #ifndef DBUG_OFF
7387 /*
7388 The group commit Leader may have to wait for follower whose transaction
7389 is not ready to be preempted. Initially the status is pessimistic.
7390 Preemption guarding logics is necessary only when DBUG_ON is set.
7391 It won't be required for the dbug-off case as long as the follower won't
7392 execute any thread-specific write access code in this method, which is
7393 the case as of current.
7394 */
7395 thd->transaction.flags.ready_preempt= 0;
7396 #endif
7397
7398 DBUG_PRINT("enter", ("flags.pending: %s, commit_error: %d, thread_id: %lu",
7399 YESNO(thd->transaction.flags.pending),
7400 thd->commit_error, thd->thread_id));
7401
7402 /*
7403 Stage #1: flushing transactions to binary log
7404
7405 While flushing, we allow new threads to enter and will process
7406 them in due time. Once the queue was empty, we cannot reap
7407 anything more since it is possible that a thread entered and
7408 appointed itself leader for the flush phase.
7409 */
7410 DEBUG_SYNC(thd, "waiting_to_enter_flush_stage");
7411 if (change_stage(thd, Stage_manager::FLUSH_STAGE, thd, NULL, &LOCK_log))
7412 {
7413 DBUG_PRINT("return", ("Thread ID: %lu, commit_error: %d",
7414 thd->thread_id, thd->commit_error));
7415 DBUG_RETURN(finish_commit(thd));
7416 }
7417
7418 THD *wait_queue= NULL, *final_queue= NULL;
7419 mysql_mutex_t *leave_mutex_before_commit_stage= NULL;
7420 my_off_t flush_end_pos= 0;
7421 bool need_LOCK_log;
7422 if (unlikely(!is_open()))
7423 {
7424 final_queue= stage_manager.fetch_queue_for(Stage_manager::FLUSH_STAGE);
7425 leave_mutex_before_commit_stage= &LOCK_log;
7426 /*
7427 binary log is closed, flush stage and sync stage should be
7428 ignored. Binlog cache should be cleared, but instead of doing
7429 it here, do that work in 'finish_commit' function so that
7430 leader and followers thread caches will be cleared.
7431 */
7432 goto commit_stage;
7433 }
7434 DEBUG_SYNC(thd, "waiting_in_the_middle_of_flush_stage");
7435 flush_error= process_flush_stage_queue(&total_bytes, &do_rotate,
7436 &wait_queue);
7437
7438 if (flush_error == 0 && total_bytes > 0)
7439 flush_error= flush_cache_to_file(&flush_end_pos);
7440
7441 /*
7442 If the flush finished successfully, we can call the after_flush
7443 hook. Being invoked here, we have the guarantee that the hook is
7444 executed before the before/after_send_hooks on the dump thread
7445 preventing race conditions among these plug-ins.
7446 */
7447 if (flush_error == 0)
7448 {
7449 const char *file_name_ptr= log_file_name + dirname_length(log_file_name);
7450 DBUG_ASSERT(flush_end_pos != 0);
7451 if (RUN_HOOK(binlog_storage, after_flush,
7452 (thd, file_name_ptr, flush_end_pos)))
7453 {
7454 sql_print_error("Failed to run 'after_flush' hooks");
7455 flush_error= ER_ERROR_ON_WRITE;
7456 }
7457
7458 signal_update();
7459 DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
7460 }
7461
7462 if (flush_error)
7463 {
7464 /*
7465 Handle flush error (if any) after leader finishes it's flush stage.
7466 */
7467 handle_binlog_flush_or_sync_error(thd, false /* need_lock_log */);
7468 }
7469
7470 /*
7471 Stage #2: Syncing binary log file to disk
7472 */
7473 need_LOCK_log= (get_sync_period() == 1);
7474
7475 /*
7476 LOCK_log is not released when sync_binlog is 1. It guarantees that the
7477 events are not be replicated by dump threads before they are synced to disk.
7478 */
7479 if (change_stage(thd, Stage_manager::SYNC_STAGE, wait_queue,
7480 need_LOCK_log ? NULL : &LOCK_log, &LOCK_sync))
7481 {
7482 DBUG_PRINT("return", ("Thread ID: %lu, commit_error: %d",
7483 thd->thread_id, thd->commit_error));
7484 DBUG_RETURN(finish_commit(thd));
7485 }
7486 final_queue= stage_manager.fetch_queue_for(Stage_manager::SYNC_STAGE);
7487 if (flush_error == 0 && total_bytes > 0)
7488 {
7489 DEBUG_SYNC(thd, "before_sync_binlog_file");
7490 std::pair<bool, bool> result= sync_binlog_file(false);
7491 sync_error= result.first;
7492 }
7493
7494 if (need_LOCK_log)
7495 mysql_mutex_unlock(&LOCK_log);
7496 leave_mutex_before_commit_stage= &LOCK_sync;
7497 /*
7498 Stage #3: Commit all transactions in order.
7499
7500 This stage is skipped if we do not need to order the commits and
7501 each thread have to execute the handlerton commit instead.
7502
7503 Howver, since we are keeping the lock from the previous stage, we
7504 need to unlock it if we skip the stage.
7505 */
7506 commit_stage:
7507 /*
7508 We are delaying the handling of sync error until
7509 all locks are released but we should not enter into
7510 commit stage if binlog_error_action is ABORT_SERVER.
7511 */
7512 if (opt_binlog_order_commits &&
7513 (sync_error == 0 || binlog_error_action != ABORT_SERVER))
7514 {
7515 if (change_stage(thd, Stage_manager::COMMIT_STAGE,
7516 final_queue, leave_mutex_before_commit_stage,
7517 &LOCK_commit))
7518 {
7519 DBUG_PRINT("return", ("Thread ID: %lu, commit_error: %d",
7520 thd->thread_id, thd->commit_error));
7521 DBUG_RETURN(finish_commit(thd));
7522 }
7523 THD *commit_queue= stage_manager.fetch_queue_for(Stage_manager::COMMIT_STAGE);
7524 DBUG_EXECUTE_IF("semi_sync_3-way_deadlock",
7525 DEBUG_SYNC(thd, "before_process_commit_stage_queue"););
7526 process_commit_stage_queue(thd, commit_queue);
7527 mysql_mutex_unlock(&LOCK_commit);
7528 /*
7529 Process after_commit after LOCK_commit is released for avoiding
7530 3-way deadlock among user thread, rotate thread and dump thread.
7531 */
7532 process_after_commit_stage_queue(thd, commit_queue);
7533 final_queue= commit_queue;
7534 }
7535 else if (leave_mutex_before_commit_stage)
7536 mysql_mutex_unlock(leave_mutex_before_commit_stage);
7537
7538 /*
7539 Handle sync error after we release all locks in order to avoid deadlocks
7540 */
7541 if (sync_error)
7542 handle_binlog_flush_or_sync_error(thd, true /* need_lock_log */);
7543
7544 /* Commit done so signal all waiting threads */
7545 stage_manager.signal_done(final_queue);
7546
7547 /*
7548 Finish the commit before executing a rotate, or run the risk of a
7549 deadlock. We don't need the return value here since it is in
7550 thd->commit_error, which is returned below.
7551 */
7552 (void) finish_commit(thd);
7553
7554 /*
7555 If we need to rotate, we do it without commit error.
7556 Otherwise the thd->commit_error will be possibly reset.
7557 */
7558 if (DBUG_EVALUATE_IF("force_rotate", 1, 0) ||
7559 (do_rotate && thd->commit_error == THD::CE_NONE))
7560 {
7561 /*
7562 Do not force the rotate as several consecutive groups may
7563 request unnecessary rotations.
7564
7565 NOTE: Run purge_logs wo/ holding LOCK_log because it does not
7566 need the mutex. Otherwise causes various deadlocks.
7567 */
7568
7569 DEBUG_SYNC(thd, "ready_to_do_rotation");
7570 bool check_purge= false;
7571 mysql_mutex_lock(&LOCK_log);
7572 /*
7573 If rotate fails then depends on binlog_error_action variable
7574 appropriate action will be taken inside rotate call.
7575 */
7576 int error= rotate(false, &check_purge);
7577 mysql_mutex_unlock(&LOCK_log);
7578
7579 if (error)
7580 thd->commit_error= THD::CE_COMMIT_ERROR;
7581 else if (check_purge)
7582 purge();
7583 }
7584 /*
7585 flush or sync errors are handled above (using binlog_error_action).
7586 Hence treat only COMMIT_ERRORs as errors.
7587 */
7588 DBUG_RETURN(thd->commit_error == THD::CE_COMMIT_ERROR);
7589 }
7590
7591
7592 /**
7593 MYSQLD server recovers from last crashed binlog.
7594
7595 @param log IO_CACHE of the crashed binlog.
7596 @param fdle Format_description_log_event of the crashed binlog.
7597 @param valid_pos The position of the last valid transaction or
7598 event(non-transaction) of the crashed binlog.
7599
7600 @retval
7601 0 ok
7602 @retval
7603 1 error
7604 */
recover(IO_CACHE * log,Format_description_log_event * fdle,my_off_t * valid_pos)7605 int MYSQL_BIN_LOG::recover(IO_CACHE *log, Format_description_log_event *fdle,
7606 my_off_t *valid_pos)
7607 {
7608 Log_event *ev;
7609 HASH xids;
7610 MEM_ROOT mem_root;
7611 /*
7612 The flag is used for handling the case that a transaction
7613 is partially written to the binlog.
7614 */
7615 bool in_transaction= FALSE;
7616
7617 if (! fdle->is_valid() ||
7618 my_hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
7619 sizeof(my_xid), 0, 0, MYF(0)))
7620 goto err1;
7621
7622 init_alloc_root(&mem_root, TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE);
7623
7624 while ((ev= Log_event::read_log_event(log, 0, fdle, TRUE))
7625 && ev->is_valid())
7626 {
7627 if (ev->get_type_code() == QUERY_EVENT &&
7628 !strcmp(((Query_log_event*)ev)->query, "BEGIN"))
7629 in_transaction= TRUE;
7630
7631 if (ev->get_type_code() == QUERY_EVENT &&
7632 !strcmp(((Query_log_event*)ev)->query, "COMMIT"))
7633 {
7634 DBUG_ASSERT(in_transaction == TRUE);
7635 in_transaction= FALSE;
7636 }
7637 else if (ev->get_type_code() == XID_EVENT)
7638 {
7639 DBUG_ASSERT(in_transaction == TRUE);
7640 in_transaction= FALSE;
7641 Xid_log_event *xev=(Xid_log_event *)ev;
7642 uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
7643 sizeof(xev->xid));
7644 if (!x || my_hash_insert(&xids, x))
7645 goto err2;
7646 }
7647
7648 /*
7649 Recorded valid position for the crashed binlog file
7650 which did not contain incorrect events. The following
7651 positions increase the variable valid_pos:
7652
7653 1 -
7654 ...
7655 <---> HERE IS VALID <--->
7656 GTID
7657 BEGIN
7658 ...
7659 COMMIT
7660 ...
7661
7662 2 -
7663 ...
7664 <---> HERE IS VALID <--->
7665 GTID
7666 DDL/UTILITY
7667 ...
7668
7669 In other words, the following positions do not increase
7670 the variable valid_pos:
7671
7672 1 -
7673 GTID
7674 <---> HERE IS VALID <--->
7675 ...
7676
7677 2 -
7678 GTID
7679 BEGIN
7680 <---> HERE IS VALID <--->
7681 ...
7682 */
7683 if (!log->error && !in_transaction &&
7684 !is_gtid_event(ev))
7685 *valid_pos= my_b_tell(log);
7686
7687 delete ev;
7688 }
7689
7690 if (ha_recover(&xids))
7691 goto err2;
7692
7693 free_root(&mem_root, MYF(0));
7694 my_hash_free(&xids);
7695 return 0;
7696
7697 err2:
7698 free_root(&mem_root, MYF(0));
7699 my_hash_free(&xids);
7700 err1:
7701 sql_print_error("Crash recovery failed. Either correct the problem "
7702 "(if it's, for example, out of memory error) and restart, "
7703 "or delete (or rename) binary log and start mysqld with "
7704 "--tc-heuristic-recover={commit|rollback}");
7705 return 1;
7706 }
7707
report_missing_purged_gtids(const Gtid_set * slave_executed_gtid_set,const char ** errmsg)7708 void MYSQL_BIN_LOG::report_missing_purged_gtids(const Gtid_set* slave_executed_gtid_set,
7709 const char** errmsg)
7710 {
7711 DBUG_ENTER("MYSQL_BIN_LOG::report_missing_purged_gtids");
7712 THD *thd= current_thd;
7713 Gtid_set gtid_missing(gtid_state->get_lost_gtids()->get_sid_map());
7714 gtid_missing.add_gtid_set(gtid_state->get_lost_gtids());
7715 gtid_missing.remove_gtid_set(slave_executed_gtid_set);
7716
7717 String tmp_uuid;
7718 uchar name[]= "slave_uuid";
7719
7720 /* Protects thd->user_vars. */
7721 mysql_mutex_lock(&thd->LOCK_thd_data);
7722 user_var_entry *entry=
7723 (user_var_entry*) my_hash_search(&thd->user_vars, name, sizeof(name)-1);
7724 if (entry && entry->length() > 0)
7725 tmp_uuid.copy(entry->ptr(), entry->length(), NULL);
7726 mysql_mutex_unlock(&thd->LOCK_thd_data);
7727
7728
7729 char* missing_gtids= NULL;
7730 char* slave_executed_gtids= NULL;
7731 gtid_missing.to_string(&missing_gtids, NULL);
7732 slave_executed_gtid_set->to_string(&slave_executed_gtids, NULL);
7733
7734 /*
7735 Log the information about the missing purged GTIDs to the error log
7736 if the message is less than MAX_LOG_BUFFER_SIZE.
7737 */
7738 std::ostringstream log_info;
7739 log_info << "The missing transactions are '"<< missing_gtids <<"'";
7740 const char* log_msg= ER(ER_FOUND_MISSING_GTIDS);
7741
7742 /* Don't consider the "%s" in the format string. Subtract 2 from the
7743 total length */
7744 int total_length= (strlen(log_msg) - 2 + log_info.str().length());
7745
7746 DBUG_EXECUTE_IF("simulate_long_missing_gtids",
7747 { total_length= MAX_LOG_BUFFER_SIZE + 1;});
7748
7749 if (total_length > MAX_LOG_BUFFER_SIZE)
7750 log_info.str("To find the missing purged transactions, run \"SELECT"
7751 " @@GLOBAL.GTID_PURGED\" on the master, then run \"SHOW"
7752 " SLAVE STATUS\" on the slave for the Retrieved_Gtid_Set,"
7753 " and then run \"SELECT GTID_SUBTRACT(<master_set>,"
7754 " <slave_set>)\" on any server");
7755
7756 sql_print_warning(ER_THD(thd, ER_FOUND_MISSING_GTIDS), tmp_uuid.ptr(),
7757 log_info.str().c_str());
7758
7759 /*
7760 Send the information about the slave executed GTIDs and missing
7761 purged GTIDs to slave if the message is less than MYSQL_ERRMSG_SIZE.
7762 */
7763 std::ostringstream gtid_info;
7764 gtid_info << "The GTID set sent by the slave is '" << slave_executed_gtids
7765 << "', and the missing transactions are '"<< missing_gtids <<"'";
7766 *errmsg= ER_THD(thd, ER_MASTER_HAS_PURGED_REQUIRED_GTIDS);
7767
7768 /* Don't consider the "%s" in the format string. Subtract 2 from the
7769 total length */
7770 total_length= (strlen(*errmsg) - 2 + gtid_info.str().length());
7771
7772 DBUG_EXECUTE_IF("simulate_long_missing_gtids",
7773 { total_length= MYSQL_ERRMSG_SIZE + 1;});
7774
7775 if (total_length > MYSQL_ERRMSG_SIZE)
7776 gtid_info.str("The GTID sets and the missing purged transactions are too"
7777 " long to print in this message. For more information,"
7778 " please see the master's error log or the manual for"
7779 " GTID_SUBTRACT");
7780
7781 /* Buffer for formatting the message about the missing GTIDs. */
7782 static char buff[MYSQL_ERRMSG_SIZE];
7783 my_snprintf(buff, MYSQL_ERRMSG_SIZE, *errmsg, gtid_info.str().c_str());
7784 *errmsg= const_cast<const char*>(buff);
7785
7786 my_free(missing_gtids);
7787 my_free(slave_executed_gtids);
7788 DBUG_VOID_RETURN;
7789 }
7790
report_missing_gtids(const Gtid_set * previous_gtid_set,const Gtid_set * slave_executed_gtid_set,const char ** errmsg)7791 void MYSQL_BIN_LOG::report_missing_gtids(const Gtid_set* previous_gtid_set,
7792 const Gtid_set* slave_executed_gtid_set,
7793 const char** errmsg)
7794 {
7795 DBUG_ENTER("MYSQL_BIN_LOG::report_missing_gtids");
7796 THD *thd=current_thd;
7797 char* missing_gtids= NULL;
7798 char* slave_executed_gtids= NULL;
7799 Gtid_set gtid_missing(slave_executed_gtid_set->get_sid_map());
7800 gtid_missing.add_gtid_set(slave_executed_gtid_set);
7801 gtid_missing.remove_gtid_set(previous_gtid_set);
7802 gtid_missing.to_string(&missing_gtids, NULL);
7803 slave_executed_gtid_set->to_string(&slave_executed_gtids, NULL);
7804
7805 String tmp_uuid;
7806 uchar name[]= "slave_uuid";
7807
7808 /* Protects thd->user_vars. */
7809 mysql_mutex_lock(&thd->LOCK_thd_data);
7810
7811 user_var_entry *entry=
7812 (user_var_entry*) my_hash_search(&thd->user_vars, name, sizeof(name)-1);
7813 if (entry && entry->length() > 0)
7814 tmp_uuid.copy(entry->ptr(), entry->length(), NULL);
7815 mysql_mutex_unlock(&thd->LOCK_thd_data);
7816
7817 /*
7818 Log the information about the missing purged GTIDs to the error log
7819 if the message is less than MAX_LOG_BUFFER_SIZE.
7820 */
7821 std::ostringstream log_info;
7822 log_info << "If the binary log files have been deleted from disk,"
7823 " check the consistency of 'GTID_PURGED' variable."
7824 " The missing transactions are '"<< missing_gtids <<"'";
7825 const char* log_msg= ER(ER_FOUND_MISSING_GTIDS);
7826
7827 /* Don't consider the "%s" in the format string. Subtract 2 from the
7828 total length */
7829 if ((strlen(log_msg) - 2 + log_info.str().length()) > MAX_LOG_BUFFER_SIZE)
7830 log_info.str("To find the missing purged transactions, run \"SELECT"
7831 " @@GLOBAL.GTID_PURGED\" on the master, then run \"SHOW"
7832 " SLAVE STATUS\" on the slave for the Retrieved_Gtid_Set,"
7833 " and then run \"SELECT GTID_SUBTRACT(<master_set>,"
7834 " <slave_set>)\" on any server");
7835
7836 sql_print_warning(ER_THD(thd, ER_FOUND_MISSING_GTIDS), tmp_uuid.ptr(),
7837 log_info.str().c_str());
7838
7839 /*
7840 Send the information about the slave executed GTIDs and missing
7841 purged GTIDs to slave if the message is less than MYSQL_ERRMSG_SIZE.
7842 */
7843 std::ostringstream gtid_info;
7844 gtid_info << "The GTID set sent by the slave is '" << slave_executed_gtids
7845 << "', and the missing transactions are '"<< missing_gtids <<"'";
7846 *errmsg= ER_THD(thd, ER_MASTER_HAS_PURGED_REQUIRED_GTIDS);
7847
7848 /* Don't consider the "%s" in the format string. Subtract 2 from the
7849 total length */
7850 if ((strlen(*errmsg) - 2 + gtid_info.str().length()) > MYSQL_ERRMSG_SIZE)
7851 gtid_info.str("The GTID sets and the missing purged transactions are too"
7852 " long to print in this message. For more information,"
7853 " please see the master's error log or the manual for"
7854 " GTID_SUBTRACT");
7855 /* Buffer for formatting the message about the missing GTIDs. */
7856 static char buff[MYSQL_ERRMSG_SIZE];
7857 my_snprintf(buff, MYSQL_ERRMSG_SIZE, *errmsg, gtid_info.str().c_str());
7858 *errmsg= const_cast<const char*>(buff);
7859
7860 my_free(missing_gtids);
7861 my_free(slave_executed_gtids);
7862
7863 DBUG_VOID_RETURN;
7864 }
get_group_cache(bool is_transactional)7865 Group_cache *THD::get_group_cache(bool is_transactional)
7866 {
7867 DBUG_ENTER("THD::get_group_cache(bool)");
7868
7869 // If opt_bin_log==0, it is not safe to call thd_get_cache_mngr
7870 // because binlog_hton has not been completely set up.
7871 DBUG_ASSERT(opt_bin_log);
7872 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(this);
7873
7874 // cache_mngr is NULL until we call thd->binlog_setup_trx_data, so
7875 // we assert that this has been done.
7876 DBUG_ASSERT(cache_mngr != NULL);
7877
7878 binlog_cache_data *cache_data=
7879 cache_mngr->get_binlog_cache_data(is_transactional);
7880 DBUG_ASSERT(cache_data != NULL);
7881
7882 DBUG_RETURN(&cache_data->group_cache);
7883 }
7884
7885 /*
7886 These functions are placed in this file since they need access to
7887 binlog_hton, which has internal linkage.
7888 */
7889
binlog_setup_trx_data()7890 int THD::binlog_setup_trx_data()
7891 {
7892 DBUG_ENTER("THD::binlog_setup_trx_data");
7893 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(this);
7894
7895 if (cache_mngr)
7896 DBUG_RETURN(0); // Already set up
7897
7898 cache_mngr= (binlog_cache_mngr*) my_malloc(sizeof(binlog_cache_mngr), MYF(MY_ZEROFILL));
7899 if (!cache_mngr ||
7900 open_cached_file(&cache_mngr->stmt_cache.cache_log, mysql_tmpdir,
7901 LOG_PREFIX, binlog_stmt_cache_size, MYF(MY_WME)) ||
7902 open_cached_file(&cache_mngr->trx_cache.cache_log, mysql_tmpdir,
7903 LOG_PREFIX, binlog_cache_size, MYF(MY_WME)))
7904 {
7905 my_free(cache_mngr);
7906 DBUG_RETURN(1); // Didn't manage to set it up
7907 }
7908 DBUG_PRINT("debug", ("Set ha_data slot %d to 0x%llx", binlog_hton->slot, (ulonglong) cache_mngr));
7909 thd_set_ha_data(this, binlog_hton, cache_mngr);
7910
7911 cache_mngr= new (thd_get_cache_mngr(this))
7912 binlog_cache_mngr(max_binlog_stmt_cache_size,
7913 &binlog_stmt_cache_use,
7914 &binlog_stmt_cache_disk_use,
7915 max_binlog_cache_size,
7916 &binlog_cache_use,
7917 &binlog_cache_disk_use);
7918 DBUG_RETURN(0);
7919 }
7920
7921 /**
7922
7923 */
register_binlog_handler(THD * thd,bool trx)7924 void register_binlog_handler(THD *thd, bool trx)
7925 {
7926 DBUG_ENTER("register_binlog_handler");
7927 /*
7928 If this is the first call to this function while processing a statement,
7929 the transactional cache does not have a savepoint defined. So, in what
7930 follows:
7931 . an implicit savepoint is defined;
7932 . callbacks are registered;
7933 . binary log is set as read/write.
7934
7935 The savepoint allows for truncating the trx-cache transactional changes
7936 fail. Callbacks are necessary to flush caches upon committing or rolling
7937 back a statement or a transaction. However, notifications do not happen
7938 if the binary log is set as read/write.
7939 */
7940 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
7941 if (cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF)
7942 {
7943 /*
7944 Set an implicit savepoint in order to be able to truncate a trx-cache.
7945 */
7946 my_off_t pos= 0;
7947 binlog_trans_log_savepos(thd, &pos);
7948 cache_mngr->trx_cache.set_prev_position(pos);
7949
7950 /*
7951 Set callbacks in order to be able to call commmit or rollback.
7952 */
7953 if (trx)
7954 trans_register_ha(thd, TRUE, binlog_hton);
7955 trans_register_ha(thd, FALSE, binlog_hton);
7956
7957 /*
7958 Set the binary log as read/write otherwise callbacks are not called.
7959 */
7960 thd->ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
7961 }
7962 DBUG_VOID_RETURN;
7963 }
7964
7965 /**
7966 Function to start a statement and optionally a transaction for the
7967 binary log.
7968
7969 This function does three things:
7970 - Starts a transaction if not in autocommit mode or if a BEGIN
7971 statement has been seen.
7972
7973 - Start a statement transaction to allow us to truncate the cache.
7974
7975 - Save the currrent binlog position so that we can roll back the
7976 statement by truncating the cache.
7977
7978 We only update the saved position if the old one was undefined,
7979 the reason is that there are some cases (e.g., for CREATE-SELECT)
7980 where the position is saved twice (e.g., both in
7981 select_create::prepare() and THD::binlog_write_table_map()) , but
7982 we should use the first. This means that calls to this function
7983 can be used to start the statement before the first table map
7984 event, to include some extra events.
7985
7986 Note however that IMMEDIATE_LOGGING implies that the statement is
7987 written without BEGIN/COMMIT.
7988
7989 @param thd Thread variable
7990 @param start_event The first event requested to be written into the
7991 binary log
7992 */
binlog_start_trans_and_stmt(THD * thd,Log_event * start_event)7993 static int binlog_start_trans_and_stmt(THD *thd, Log_event *start_event)
7994 {
7995 DBUG_ENTER("binlog_start_trans_and_stmt");
7996
7997 /*
7998 Initialize the cache manager if this was not done yet.
7999 */
8000 if (thd->binlog_setup_trx_data())
8001 DBUG_RETURN(1);
8002
8003 /*
8004 Retrieve the appropriated cache.
8005 */
8006 bool is_transactional= start_event->is_using_trans_cache();
8007 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
8008 binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(is_transactional);
8009
8010 /*
8011 If the event is requesting immediatly logging, there is no need to go
8012 further down and set savepoint and register callbacks.
8013 */
8014 if (start_event->is_using_immediate_logging())
8015 DBUG_RETURN(0);
8016
8017 register_binlog_handler(thd, thd->in_multi_stmt_transaction_mode());
8018
8019 /*
8020 If the cache is empty log "BEGIN" at the beginning of every transaction.
8021 Here, a transaction is either a BEGIN..COMMIT/ROLLBACK block or a single
8022 statement in autocommit mode.
8023 */
8024 if (cache_data->is_binlog_empty())
8025 {
8026 Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"),
8027 is_transactional, FALSE, TRUE, 0, TRUE);
8028 if (cache_data->write_event(thd, &qinfo))
8029 DBUG_RETURN(1);
8030 }
8031
8032 DBUG_RETURN(0);
8033 }
8034
8035 /**
8036 This function writes a table map to the binary log.
8037 Note that in order to keep the signature uniform with related methods,
8038 we use a redundant parameter to indicate whether a transactional table
8039 was changed or not.
8040 Sometimes it will write a Rows_query_log_event into binary log before
8041 the table map too.
8042
8043 @param table a pointer to the table.
8044 @param is_transactional @c true indicates a transactional table,
8045 otherwise @c false a non-transactional.
8046 @param binlog_rows_query @c true indicates a Rows_query log event
8047 will be binlogged before table map,
8048 otherwise @c false indicates it will not
8049 be binlogged.
8050 @return
8051 nonzero if an error pops up when writing the table map event
8052 or the Rows_query log event.
8053 */
binlog_write_table_map(TABLE * table,bool is_transactional,bool binlog_rows_query)8054 int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
8055 bool binlog_rows_query)
8056 {
8057 int error;
8058 DBUG_ENTER("THD::binlog_write_table_map");
8059 DBUG_PRINT("enter", ("table: 0x%lx (%s: #%llu)",
8060 (long) table, table->s->table_name.str,
8061 table->s->table_map_id.id()));
8062
8063 /* Pre-conditions */
8064 DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
8065 DBUG_ASSERT(table->s->table_map_id.is_valid());
8066
8067 Table_map_log_event
8068 the_event(this, table, table->s->table_map_id, is_transactional);
8069
8070 binlog_start_trans_and_stmt(this, &the_event);
8071
8072 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(this);
8073
8074 binlog_cache_data *cache_data=
8075 cache_mngr->get_binlog_cache_data(is_transactional);
8076
8077 if (binlog_rows_query && this->query())
8078 {
8079 /* Write the Rows_query_log_event into binlog before the table map */
8080 Rows_query_log_event
8081 rows_query_ev(this, this->query(), this->query_length());
8082 if ((error= cache_data->write_event(this, &rows_query_ev)))
8083 DBUG_RETURN(error);
8084 }
8085
8086 if ((error= cache_data->write_event(this, &the_event)))
8087 DBUG_RETURN(error);
8088
8089 binlog_table_maps++;
8090 DBUG_RETURN(0);
8091 }
8092
8093 /**
8094 This function retrieves a pending row event from a cache which is
8095 specified through the parameter @c is_transactional. Respectively, when it
8096 is @c true, the pending event is returned from the transactional cache.
8097 Otherwise from the non-transactional cache.
8098
8099 @param is_transactional @c true indicates a transactional cache,
8100 otherwise @c false a non-transactional.
8101 @return
8102 The row event if any.
8103 */
8104 Rows_log_event*
binlog_get_pending_rows_event(bool is_transactional) const8105 THD::binlog_get_pending_rows_event(bool is_transactional) const
8106 {
8107 Rows_log_event* rows= NULL;
8108 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(this);
8109
8110 /*
8111 This is less than ideal, but here's the story: If there is no cache_mngr,
8112 prepare_pending_rows_event() has never been called (since the cache_mngr
8113 is set up there). In that case, we just return NULL.
8114 */
8115 if (cache_mngr)
8116 {
8117 binlog_cache_data *cache_data=
8118 cache_mngr->get_binlog_cache_data(is_transactional);
8119
8120 rows= cache_data->pending();
8121 }
8122 return (rows);
8123 }
8124
8125 /**
8126 @param db db name c-string to be inserted into alphabetically sorted
8127 THD::binlog_accessed_db_names list.
8128
8129 Note, that space for both the data and the node
8130 struct are allocated in THD::main_mem_root.
8131 The list lasts for the top-level query time and is reset
8132 in @c THD::cleanup_after_query().
8133 */
8134 void
add_to_binlog_accessed_dbs(const char * db_param)8135 THD::add_to_binlog_accessed_dbs(const char *db_param)
8136 {
8137 char *after_db;
8138 /*
8139 binlog_accessed_db_names list is to maintain the database
8140 names which are referenced in a given command.
8141 Prior to bug 17806014 fix, 'main_mem_root' memory root used
8142 to store this list. The 'main_mem_root' scope is till the end
8143 of the query. Hence it caused increasing memory consumption
8144 problem in big procedures like the ones mentioned below.
8145 Eg: CALL p1() where p1 is having 1,00,000 create and drop tables.
8146 'main_mem_root' is freed only at the end of the command CALL p1()'s
8147 execution. But binlog_accessed_db_names list scope is only till the
8148 individual statements specified the procedure(create/drop statements).
8149 Hence the memory allocated in 'main_mem_root' was left uncleared
8150 until the p1's completion, even though it is not required after
8151 completion of individual statements.
8152
8153 Instead of using 'main_mem_root' whose scope is complete query execution,
8154 now the memroot is changed to use 'thd->mem_root' whose scope is until the
8155 individual statement in CALL p1(). 'thd->mem_root' is set to 'execute_mem_root'
8156 in the context of procedure and it's scope is till the individual statement
8157 in CALL p1() and thd->memroot is equal to 'main_mem_root' in the context
8158 of a normal 'top level query'.
8159
8160 Eg: a) create table t1(i int); => If this function is called while
8161 processing this statement, thd->memroot is equal to &main_mem_root
8162 which will be freed immediately after executing this statement.
8163 b) CALL p1() -> p1 contains create table t1(i int); => If this function
8164 is called while processing create table statement which is inside
8165 a stored procedure, then thd->memroot is equal to 'execute_mem_root'
8166 which will be freed immediately after executing this statement.
8167 In both a and b case, thd->memroot will be freed immediately and will not
8168 increase memory consumption.
8169
8170 A special case(stored functions/triggers):
8171 Consider the following example:
8172 create function f1(i int) returns int
8173 begin
8174 insert into db1.t1 values (1);
8175 insert into db2.t1 values (2);
8176 end;
8177 When we are processing SELECT f1(), the list should contain db1, db2 names.
8178 Since thd->mem_root contains 'execute_mem_root' in the context of
8179 stored function, the mem root will be freed after adding db1 in
8180 the list and when we are processing the second statement and when we try
8181 to add 'db2' in the db1's list, it will lead to crash as db1's memory
8182 is already freed. To handle this special case, if in_sub_stmt is set
8183 (which is true incase of stored functions/triggers), we use &main_mem_root,
8184 if not set we will use thd->memroot which changes it's value to
8185 'execute_mem_root' or '&main_mem_root' depends on the context.
8186 */
8187 MEM_ROOT *db_mem_root= in_sub_stmt ? &main_mem_root : mem_root;
8188
8189 if (!binlog_accessed_db_names)
8190 binlog_accessed_db_names= new (db_mem_root) List<char>;
8191
8192 if (binlog_accessed_db_names->elements > MAX_DBS_IN_EVENT_MTS)
8193 {
8194 push_warning_printf(this, Sql_condition::WARN_LEVEL_WARN,
8195 ER_MTS_UPDATED_DBS_GREATER_MAX,
8196 ER(ER_MTS_UPDATED_DBS_GREATER_MAX),
8197 MAX_DBS_IN_EVENT_MTS);
8198 return;
8199 }
8200
8201 after_db= strdup_root(db_mem_root, db_param);
8202
8203 /*
8204 sorted insertion is implemented with first rearranging data
8205 (pointer to char*) of the links and final appending of the least
8206 ordered data to create a new link in the list.
8207 */
8208 if (binlog_accessed_db_names->elements != 0)
8209 {
8210 List_iterator<char> it(*get_binlog_accessed_db_names());
8211
8212 while (it++)
8213 {
8214 char *swap= NULL;
8215 char **ref_cur_db= it.ref();
8216 int cmp= strcmp(after_db, *ref_cur_db);
8217
8218 DBUG_ASSERT(!swap || cmp < 0);
8219
8220 if (cmp == 0)
8221 {
8222 after_db= NULL; /* dup to ignore */
8223 break;
8224 }
8225 else if (swap || cmp > 0)
8226 {
8227 swap= *ref_cur_db;
8228 *ref_cur_db= after_db;
8229 after_db= swap;
8230 }
8231 }
8232 }
8233 if (after_db)
8234 binlog_accessed_db_names->push_back(after_db, db_mem_root);
8235 }
8236
8237 /*
8238 Tells if two (or more) tables have auto_increment columns and we want to
8239 lock those tables with a write lock.
8240
8241 SYNOPSIS
8242 has_two_write_locked_tables_with_auto_increment
8243 tables Table list
8244
8245 NOTES:
8246 Call this function only when you have established the list of all tables
8247 which you'll want to update (including stored functions, triggers, views
8248 inside your statement).
8249 */
8250
8251 static bool
has_write_table_with_auto_increment(TABLE_LIST * tables)8252 has_write_table_with_auto_increment(TABLE_LIST *tables)
8253 {
8254 for (TABLE_LIST *table= tables; table; table= table->next_global)
8255 {
8256 /* we must do preliminary checks as table->table may be NULL */
8257 if (!table->placeholder() &&
8258 table->table->found_next_number_field &&
8259 (table->lock_type >= TL_WRITE_ALLOW_WRITE))
8260 return 1;
8261 }
8262
8263 return 0;
8264 }
8265
8266 /*
8267 checks if we have select tables in the table list and write tables
8268 with auto-increment column.
8269
8270 SYNOPSIS
8271 has_two_write_locked_tables_with_auto_increment_and_select
8272 tables Table list
8273
8274 RETURN VALUES
8275
8276 -true if the table list has atleast one table with auto-increment column
8277
8278
8279 and atleast one table to select from.
8280 -false otherwise
8281 */
8282
8283 static bool
has_write_table_with_auto_increment_and_select(TABLE_LIST * tables)8284 has_write_table_with_auto_increment_and_select(TABLE_LIST *tables)
8285 {
8286 bool has_select= false;
8287 bool has_auto_increment_tables = has_write_table_with_auto_increment(tables);
8288 for(TABLE_LIST *table= tables; table; table= table->next_global)
8289 {
8290 if (!table->placeholder() &&
8291 (table->lock_type <= TL_READ_NO_INSERT))
8292 {
8293 has_select= true;
8294 break;
8295 }
8296 }
8297 return(has_select && has_auto_increment_tables);
8298 }
8299
8300 /*
8301 Tells if there is a table whose auto_increment column is a part
8302 of a compound primary key while is not the first column in
8303 the table definition.
8304
8305 @param tables Table list
8306
8307 @return true if the table exists, fais if does not.
8308 */
8309
8310 static bool
has_write_table_auto_increment_not_first_in_pk(TABLE_LIST * tables)8311 has_write_table_auto_increment_not_first_in_pk(TABLE_LIST *tables)
8312 {
8313 for (TABLE_LIST *table= tables; table; table= table->next_global)
8314 {
8315 /* we must do preliminary checks as table->table may be NULL */
8316 if (!table->placeholder() &&
8317 table->table->found_next_number_field &&
8318 (table->lock_type >= TL_WRITE_ALLOW_WRITE)
8319 && table->table->s->next_number_keypart != 0)
8320 return 1;
8321 }
8322
8323 return 0;
8324 }
8325
8326 #ifndef DBUG_OFF
get_locked_tables_mode_name(enum_locked_tables_mode locked_tables_mode)8327 const char * get_locked_tables_mode_name(enum_locked_tables_mode locked_tables_mode)
8328 {
8329 switch (locked_tables_mode)
8330 {
8331 case LTM_NONE:
8332 return "LTM_NONE";
8333 case LTM_LOCK_TABLES:
8334 return "LTM_LOCK_TABLES";
8335 case LTM_PRELOCKED:
8336 return "LTM_PRELOCKED";
8337 case LTM_PRELOCKED_UNDER_LOCK_TABLES:
8338 return "LTM_PRELOCKED_UNDER_LOCK_TABLES";
8339 default:
8340 return "Unknown table lock mode";
8341 }
8342 }
8343 #endif
8344
8345
8346 /**
8347 Decide on logging format to use for the statement and issue errors
8348 or warnings as needed. The decision depends on the following
8349 parameters:
8350
8351 - The logging mode, i.e., the value of binlog_format. Can be
8352 statement, mixed, or row.
8353
8354 - The type of statement. There are three types of statements:
8355 "normal" safe statements; unsafe statements; and row injections.
8356 An unsafe statement is one that, if logged in statement format,
8357 might produce different results when replayed on the slave (e.g.,
8358 INSERT DELAYED). A row injection is either a BINLOG statement, or
8359 a row event executed by the slave's SQL thread.
8360
8361 - The capabilities of tables modified by the statement. The
8362 *capabilities vector* for a table is a set of flags associated
8363 with the table. Currently, it only includes two flags: *row
8364 capability flag* and *statement capability flag*.
8365
8366 The row capability flag is set if and only if the engine can
8367 handle row-based logging. The statement capability flag is set if
8368 and only if the table can handle statement-based logging.
8369
8370 Decision table for logging format
8371 ---------------------------------
8372
8373 The following table summarizes how the format and generated
8374 warning/error depends on the tables' capabilities, the statement
8375 type, and the current binlog_format.
8376
8377 Row capable N NNNNNNNNN YYYYYYYYY YYYYYYYYY
8378 Statement capable N YYYYYYYYY NNNNNNNNN YYYYYYYYY
8379
8380 Statement type * SSSUUUIII SSSUUUIII SSSUUUIII
8381
8382 binlog_format * SMRSMRSMR SMRSMRSMR SMRSMRSMR
8383
8384 Logged format - SS-S----- -RR-RR-RR SRRSRR-RR
8385 Warning/Error 1 --2732444 5--5--6-- ---7--6--
8386
8387 Legend
8388 ------
8389
8390 Row capable: N - Some table not row-capable, Y - All tables row-capable
8391 Stmt capable: N - Some table not stmt-capable, Y - All tables stmt-capable
8392 Statement type: (S)afe, (U)nsafe, or Row (I)njection
8393 binlog_format: (S)TATEMENT, (M)IXED, or (R)OW
8394 Logged format: (S)tatement or (R)ow
8395 Warning/Error: Warnings and error messages are as follows:
8396
8397 1. Error: Cannot execute statement: binlogging impossible since both
8398 row-incapable engines and statement-incapable engines are
8399 involved.
8400
8401 2. Error: Cannot execute statement: binlogging impossible since
8402 BINLOG_FORMAT = ROW and at least one table uses a storage engine
8403 limited to statement-logging.
8404
8405 3. Error: Cannot execute statement: binlogging of unsafe statement
8406 is impossible when storage engine is limited to statement-logging
8407 and BINLOG_FORMAT = MIXED.
8408
8409 4. Error: Cannot execute row injection: binlogging impossible since
8410 at least one table uses a storage engine limited to
8411 statement-logging.
8412
8413 5. Error: Cannot execute statement: binlogging impossible since
8414 BINLOG_FORMAT = STATEMENT and at least one table uses a storage
8415 engine limited to row-logging.
8416
8417 6. Error: Cannot execute row injection: binlogging impossible since
8418 BINLOG_FORMAT = STATEMENT.
8419
8420 7. Warning: Unsafe statement binlogged in statement format since
8421 BINLOG_FORMAT = STATEMENT.
8422
8423 In addition, we can produce the following error (not depending on
8424 the variables of the decision diagram):
8425
8426 8. Error: Cannot execute statement: binlogging impossible since more
8427 than one engine is involved and at least one engine is
8428 self-logging.
8429
8430 For each error case above, the statement is prevented from being
8431 logged, we report an error, and roll back the statement. For
8432 warnings, we set the thd->binlog_flags variable: the warning will be
8433 printed only if the statement is successfully logged.
8434
8435 @see THD::binlog_query
8436
8437 @param[in] thd Client thread
8438 @param[in] tables Tables involved in the query
8439
8440 @retval 0 No error; statement can be logged.
8441 @retval -1 One of the error conditions above applies (1, 2, 4, 5, or 6).
8442 */
8443
decide_logging_format(TABLE_LIST * tables)8444 int THD::decide_logging_format(TABLE_LIST *tables)
8445 {
8446 DBUG_ENTER("THD::decide_logging_format");
8447 DBUG_PRINT("info", ("query: %s", query()));
8448 DBUG_PRINT("info", ("variables.binlog_format: %lu",
8449 variables.binlog_format));
8450 DBUG_PRINT("info", ("lex->get_stmt_unsafe_flags(): 0x%x",
8451 lex->get_stmt_unsafe_flags()));
8452
8453 reset_binlog_local_stmt_filter();
8454
8455 /*
8456 We should not decide logging format if the binlog is closed or
8457 binlogging is off, or if the statement is filtered out from the
8458 binlog by filtering rules.
8459 */
8460 if (mysql_bin_log.is_open() && (variables.option_bits & OPTION_BIN_LOG) &&
8461 !(variables.binlog_format == BINLOG_FORMAT_STMT &&
8462 !binlog_filter->db_ok(db)))
8463 {
8464 /*
8465 Compute one bit field with the union of all the engine
8466 capabilities, and one with the intersection of all the engine
8467 capabilities.
8468 */
8469 handler::Table_flags flags_write_some_set= 0;
8470 handler::Table_flags flags_access_some_set= 0;
8471 handler::Table_flags flags_write_all_set=
8472 HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE;
8473
8474 /*
8475 If different types of engines are about to be updated.
8476 For example: Innodb and Falcon; Innodb and MyIsam.
8477 */
8478 my_bool multi_write_engine= FALSE;
8479 /*
8480 If different types of engines are about to be accessed
8481 and any of them is about to be updated. For example:
8482 Innodb and Falcon; Innodb and MyIsam.
8483 */
8484 my_bool multi_access_engine= FALSE;
8485 /*
8486 Identifies if a table is changed.
8487 */
8488 my_bool is_write= FALSE;
8489 /*
8490 A pointer to a previous table that was changed.
8491 */
8492 TABLE* prev_write_table= NULL;
8493 /*
8494 A pointer to a previous table that was accessed.
8495 */
8496 TABLE* prev_access_table= NULL;
8497 /*
8498 True if at least one table is transactional.
8499 */
8500 bool write_to_some_transactional_table= false;
8501 /*
8502 True if at least one table is non-transactional.
8503 */
8504 bool write_to_some_non_transactional_table= false;
8505 /*
8506 True if all non-transactional tables that has been updated
8507 are temporary.
8508 */
8509 bool write_all_non_transactional_are_tmp_tables= true;
8510 /**
8511 The number of tables used in the current statement,
8512 that should be replicated.
8513 */
8514 uint replicated_tables_count= 0;
8515 /**
8516 The number of tables written to in the current statement,
8517 that should not be replicated.
8518 A table should not be replicated when it is considered
8519 'local' to a MySQL instance.
8520 Currently, these tables are:
8521 - mysql.slow_log
8522 - mysql.general_log
8523 - mysql.slave_relay_log_info
8524 - mysql.slave_master_info
8525 - mysql.slave_worker_info
8526 - performance_schema.*
8527 - TODO: information_schema.*
8528 In practice, from this list, only performance_schema.* tables
8529 are written to by user queries.
8530 */
8531 uint non_replicated_tables_count= 0;
8532 #ifndef DBUG_OFF
8533 {
8534 DBUG_PRINT("debug", ("prelocked_mode: %s",
8535 get_locked_tables_mode_name(locked_tables_mode)));
8536 }
8537 #endif
8538
8539 if (variables.binlog_format != BINLOG_FORMAT_ROW && tables)
8540 {
8541 /*
8542 DML statements that modify a table with an auto_increment column based on
8543 rows selected from a table are unsafe as the order in which the rows are
8544 fetched fron the select tables cannot be determined and may differ on
8545 master and slave.
8546 */
8547 if (has_write_table_with_auto_increment_and_select(tables))
8548 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_WRITE_AUTOINC_SELECT);
8549
8550 if (has_write_table_auto_increment_not_first_in_pk(tables))
8551 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_NOT_FIRST);
8552
8553 /*
8554 A query that modifies autoinc column in sub-statement can make the
8555 master and slave inconsistent.
8556 We can solve these problems in mixed mode by switching to binlogging
8557 if at least one updated table is used by sub-statement
8558 */
8559 if (lex->requires_prelocking() &&
8560 has_write_table_with_auto_increment(lex->first_not_own_table()))
8561 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_COLUMNS);
8562 }
8563
8564 /*
8565 Get the capabilities vector for all involved storage engines and
8566 mask out the flags for the binary log.
8567 */
8568 for (TABLE_LIST *table= tables; table; table= table->next_global)
8569 {
8570 if (table->placeholder())
8571 continue;
8572
8573 handler::Table_flags const flags= table->table->file->ha_table_flags();
8574
8575 DBUG_PRINT("info", ("table: %s; ha_table_flags: 0x%llx",
8576 table->table_name, flags));
8577
8578 if (table->table->no_replicate)
8579 {
8580 /*
8581 The statement uses a table that is not replicated.
8582 The following properties about the table:
8583 - persistent / transient
8584 - transactional / non transactional
8585 - temporary / permanent
8586 - read or write
8587 - multiple engines involved because of this table
8588 are not relevant, as this table is completely ignored.
8589 Because the statement uses a non replicated table,
8590 using STATEMENT format in the binlog is impossible.
8591 Either this statement will be discarded entirely,
8592 or it will be logged (possibly partially) in ROW format.
8593 */
8594 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_TABLE);
8595
8596 if (table->lock_type >= TL_WRITE_ALLOW_WRITE)
8597 {
8598 non_replicated_tables_count++;
8599 continue;
8600 }
8601 }
8602
8603 replicated_tables_count++;
8604
8605 my_bool trans= table->table->file->has_transactions();
8606
8607 if (table->lock_type >= TL_WRITE_ALLOW_WRITE)
8608 {
8609 write_to_some_transactional_table=
8610 write_to_some_transactional_table || trans;
8611
8612 write_to_some_non_transactional_table=
8613 write_to_some_non_transactional_table || !trans;
8614
8615 if (prev_write_table && prev_write_table->file->ht !=
8616 table->table->file->ht)
8617 multi_write_engine= TRUE;
8618
8619 if (table->table->s->tmp_table)
8620 lex->set_stmt_accessed_table(trans ? LEX::STMT_WRITES_TEMP_TRANS_TABLE :
8621 LEX::STMT_WRITES_TEMP_NON_TRANS_TABLE);
8622 else
8623 lex->set_stmt_accessed_table(trans ? LEX::STMT_WRITES_TRANS_TABLE :
8624 LEX::STMT_WRITES_NON_TRANS_TABLE);
8625
8626 /*
8627 Non-transactional updates are allowed when row binlog format is
8628 used and all non-transactional tables are temporary.
8629 Binlog format is checked on THD::is_dml_gtid_compatible() method.
8630 */
8631 if (!trans)
8632 write_all_non_transactional_are_tmp_tables=
8633 write_all_non_transactional_are_tmp_tables &&
8634 table->table->s->tmp_table;
8635
8636 flags_write_all_set &= flags;
8637 flags_write_some_set |= flags;
8638 is_write= TRUE;
8639
8640 prev_write_table= table->table;
8641
8642 /*
8643 INSERT...ON DUPLICATE KEY UPDATE on a table with more than one unique keys
8644 can be unsafe. Check for it if the flag is already not marked for the
8645 given statement.
8646 */
8647 if (!lex->is_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS) &&
8648 lex->sql_command == SQLCOM_INSERT &&
8649 /* Duplicate key update is not supported by INSERT DELAYED */
8650 get_command() != COM_DELAYED_INSERT && lex->duplicates == DUP_UPDATE)
8651 {
8652 uint keys= table->table->s->keys, i= 0, unique_keys= 0;
8653 for (KEY* keyinfo= table->table->s->key_info;
8654 i < keys && unique_keys <= 1; i++, keyinfo++)
8655 {
8656 if (keyinfo->flags & HA_NOSAME)
8657 unique_keys++;
8658 }
8659 if (unique_keys > 1 )
8660 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS);
8661 }
8662 }
8663 flags_access_some_set |= flags;
8664
8665 if (lex->sql_command != SQLCOM_CREATE_TABLE ||
8666 (lex->sql_command == SQLCOM_CREATE_TABLE &&
8667 (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE)))
8668 {
8669 if (table->table->s->tmp_table)
8670 lex->set_stmt_accessed_table(trans ? LEX::STMT_READS_TEMP_TRANS_TABLE :
8671 LEX::STMT_READS_TEMP_NON_TRANS_TABLE);
8672 else
8673 lex->set_stmt_accessed_table(trans ? LEX::STMT_READS_TRANS_TABLE :
8674 LEX::STMT_READS_NON_TRANS_TABLE);
8675 }
8676
8677 if (prev_access_table && prev_access_table->file->ht !=
8678 table->table->file->ht)
8679 multi_access_engine= TRUE;
8680
8681 prev_access_table= table->table;
8682 }
8683 DBUG_ASSERT(!is_write ||
8684 write_to_some_transactional_table ||
8685 write_to_some_non_transactional_table);
8686 /*
8687 write_all_non_transactional_are_tmp_tables may be true if any
8688 non-transactional table was not updated, so we fix its value here.
8689 */
8690 write_all_non_transactional_are_tmp_tables=
8691 write_all_non_transactional_are_tmp_tables &&
8692 write_to_some_non_transactional_table;
8693
8694 DBUG_PRINT("info", ("flags_write_all_set: 0x%llx", flags_write_all_set));
8695 DBUG_PRINT("info", ("flags_write_some_set: 0x%llx", flags_write_some_set));
8696 DBUG_PRINT("info", ("flags_access_some_set: 0x%llx", flags_access_some_set));
8697 DBUG_PRINT("info", ("multi_write_engine: %d", multi_write_engine));
8698 DBUG_PRINT("info", ("multi_access_engine: %d", multi_access_engine));
8699
8700 int error= 0;
8701 int unsafe_flags;
8702
8703 bool multi_stmt_trans= in_multi_stmt_transaction_mode();
8704 bool trans_table= trans_has_updated_trans_table(this);
8705 bool binlog_direct= variables.binlog_direct_non_trans_update;
8706
8707 if (lex->is_mixed_stmt_unsafe(multi_stmt_trans, binlog_direct,
8708 trans_table, tx_isolation))
8709 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_MIXED_STATEMENT);
8710 else if (multi_stmt_trans && trans_table && !binlog_direct &&
8711 lex->stmt_accessed_table(LEX::STMT_WRITES_NON_TRANS_TABLE))
8712 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_NONTRANS_AFTER_TRANS);
8713
8714 /*
8715 If more than one engine is involved in the statement and at
8716 least one is doing it's own logging (is *self-logging*), the
8717 statement cannot be logged atomically, so we generate an error
8718 rather than allowing the binlog to become corrupt.
8719 */
8720 if (multi_write_engine &&
8721 (flags_write_some_set & HA_HAS_OWN_BINLOGGING))
8722 my_error((error= ER_BINLOG_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE),
8723 MYF(0));
8724 else if (multi_access_engine && flags_access_some_set & HA_HAS_OWN_BINLOGGING)
8725 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE);
8726
8727 DBUG_EXECUTE_IF("make_stmt_only_engines",
8728 {
8729 flags_write_all_set= HA_BINLOG_STMT_CAPABLE;
8730 };);
8731
8732 /* both statement-only and row-only engines involved */
8733 if ((flags_write_all_set & (HA_BINLOG_STMT_CAPABLE | HA_BINLOG_ROW_CAPABLE)) == 0)
8734 {
8735 /*
8736 1. Error: Binary logging impossible since both row-incapable
8737 engines and statement-incapable engines are involved
8738 */
8739 my_error((error= ER_BINLOG_ROW_ENGINE_AND_STMT_ENGINE), MYF(0));
8740 }
8741 /* statement-only engines involved */
8742 else if ((flags_write_all_set & HA_BINLOG_ROW_CAPABLE) == 0)
8743 {
8744 if (lex->is_stmt_row_injection())
8745 {
8746 /*
8747 4. Error: Cannot execute row injection since table uses
8748 storage engine limited to statement-logging
8749 */
8750 my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_ENGINE), MYF(0));
8751 }
8752 else if (variables.binlog_format == BINLOG_FORMAT_ROW &&
8753 sqlcom_can_generate_row_events(this->lex->sql_command))
8754 {
8755 /*
8756 2. Error: Cannot modify table that uses a storage engine
8757 limited to statement-logging when BINLOG_FORMAT = ROW
8758 */
8759 my_error((error= ER_BINLOG_ROW_MODE_AND_STMT_ENGINE), MYF(0));
8760 }
8761 else if (variables.binlog_format == BINLOG_FORMAT_MIXED &&
8762 ((unsafe_flags= lex->get_stmt_unsafe_flags()) != 0))
8763 {
8764 /*
8765 3. Error: Cannot execute statement: binlogging of unsafe
8766 statement is impossible when storage engine is limited to
8767 statement-logging and BINLOG_FORMAT = MIXED.
8768 */
8769 for (int unsafe_type= 0;
8770 unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT;
8771 unsafe_type++)
8772 if (unsafe_flags & (1 << unsafe_type))
8773 my_error((error= ER_BINLOG_UNSAFE_AND_STMT_ENGINE), MYF(0),
8774 ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
8775 }
8776 else if (is_write && ((unsafe_flags= lex->get_stmt_unsafe_flags()) != 0))
8777 {
8778 /*
8779 7. Warning: Unsafe statement logged as statement due to
8780 binlog_format = STATEMENT
8781 */
8782 binlog_unsafe_warning_flags|= unsafe_flags;
8783 DBUG_PRINT("info", ("Scheduling warning to be issued by "
8784 "binlog_query: '%s'",
8785 ER(ER_BINLOG_UNSAFE_STATEMENT)));
8786 DBUG_PRINT("info", ("binlog_unsafe_warning_flags: 0x%x",
8787 binlog_unsafe_warning_flags));
8788 }
8789 /* log in statement format! */
8790 }
8791 /* no statement-only engines */
8792 else
8793 {
8794 /* binlog_format = STATEMENT */
8795 if (variables.binlog_format == BINLOG_FORMAT_STMT)
8796 {
8797 if (lex->is_stmt_row_injection())
8798 {
8799 /*
8800 6. Error: Cannot execute row injection since
8801 BINLOG_FORMAT = STATEMENT
8802 */
8803 my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_MODE), MYF(0));
8804 }
8805 else if ((flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0 &&
8806 sqlcom_can_generate_row_events(this->lex->sql_command))
8807 {
8808 /*
8809 5. Error: Cannot modify table that uses a storage engine
8810 limited to row-logging when binlog_format = STATEMENT
8811 */
8812 my_error((error= ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), "");
8813 }
8814 else if (is_write && (unsafe_flags= lex->get_stmt_unsafe_flags()) != 0)
8815 {
8816 /*
8817 7. Warning: Unsafe statement logged as statement due to
8818 binlog_format = STATEMENT
8819 */
8820 binlog_unsafe_warning_flags|= unsafe_flags;
8821 DBUG_PRINT("info", ("Scheduling warning to be issued by "
8822 "binlog_query: '%s'",
8823 ER(ER_BINLOG_UNSAFE_STATEMENT)));
8824 DBUG_PRINT("info", ("binlog_unsafe_warning_flags: 0x%x",
8825 binlog_unsafe_warning_flags));
8826 }
8827 /* log in statement format! */
8828 }
8829 /* No statement-only engines and binlog_format != STATEMENT.
8830 I.e., nothing prevents us from row logging if needed. */
8831 else
8832 {
8833 if (lex->is_stmt_unsafe() || lex->is_stmt_row_injection()
8834 || (flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0)
8835 {
8836 /* log in row format! */
8837 set_current_stmt_binlog_format_row_if_mixed();
8838 }
8839 }
8840 }
8841
8842 if (non_replicated_tables_count > 0)
8843 {
8844 if ((replicated_tables_count == 0) || ! is_write)
8845 {
8846 DBUG_PRINT("info", ("decision: no logging, no replicated table affected"));
8847 set_binlog_local_stmt_filter();
8848 }
8849 else
8850 {
8851 if (! is_current_stmt_binlog_format_row())
8852 {
8853 my_error((error= ER_BINLOG_STMT_MODE_AND_NO_REPL_TABLES), MYF(0));
8854 }
8855 else
8856 {
8857 clear_binlog_local_stmt_filter();
8858 }
8859 }
8860 }
8861 else
8862 {
8863 clear_binlog_local_stmt_filter();
8864 }
8865
8866 if (!error && enforce_gtid_consistency &&
8867 !is_dml_gtid_compatible(write_to_some_transactional_table,
8868 write_to_some_non_transactional_table,
8869 write_all_non_transactional_are_tmp_tables))
8870 error= 1;
8871
8872 if (error) {
8873 DBUG_PRINT("info", ("decision: no logging since an error was generated"));
8874 DBUG_RETURN(-1);
8875 }
8876
8877 if (is_write &&
8878 lex->sql_command != SQLCOM_END /* rows-event applying by slave */)
8879 {
8880 /*
8881 Master side of DML in the STMT format events parallelization.
8882 All involving table db:s are stored in a abc-ordered name list.
8883 In case the number of databases exceeds MAX_DBS_IN_EVENT_MTS maximum
8884 the list gathering breaks since it won't be sent to the slave.
8885 */
8886 for (TABLE_LIST *table= tables; table; table= table->next_global)
8887 {
8888 if (table->placeholder())
8889 continue;
8890
8891 DBUG_ASSERT(table->table);
8892
8893 if (table->table->file->referenced_by_foreign_key())
8894 {
8895 /*
8896 FK-referenced dbs can't be gathered currently. The following
8897 event will be marked for sequential execution on slave.
8898 */
8899 binlog_accessed_db_names= NULL;
8900 add_to_binlog_accessed_dbs("");
8901 break;
8902 }
8903 if (!is_current_stmt_binlog_format_row())
8904 add_to_binlog_accessed_dbs(table->db);
8905 }
8906 }
8907 DBUG_PRINT("info", ("decision: logging in %s format",
8908 is_current_stmt_binlog_format_row() ?
8909 "ROW" : "STATEMENT"));
8910
8911 if (variables.binlog_format == BINLOG_FORMAT_ROW &&
8912 (lex->sql_command == SQLCOM_UPDATE ||
8913 lex->sql_command == SQLCOM_UPDATE_MULTI ||
8914 lex->sql_command == SQLCOM_DELETE ||
8915 lex->sql_command == SQLCOM_DELETE_MULTI))
8916 {
8917 String table_names;
8918 /*
8919 Generate a warning for UPDATE/DELETE statements that modify a
8920 BLACKHOLE table, as row events are not logged in row format.
8921 */
8922 for (TABLE_LIST *table= tables; table; table= table->next_global)
8923 {
8924 if (table->placeholder())
8925 continue;
8926 if (table->table->file->ht->db_type == DB_TYPE_BLACKHOLE_DB &&
8927 table->lock_type >= TL_WRITE_ALLOW_WRITE)
8928 {
8929 table_names.append(table->table_name);
8930 table_names.append(",");
8931 }
8932 }
8933 if (!table_names.is_empty())
8934 {
8935 bool is_update= (lex->sql_command == SQLCOM_UPDATE ||
8936 lex->sql_command == SQLCOM_UPDATE_MULTI);
8937 /*
8938 Replace the last ',' with '.' for table_names
8939 */
8940 table_names.replace(table_names.length()-1, 1, ".", 1);
8941 push_warning_printf(this, Sql_condition::WARN_LEVEL_WARN,
8942 WARN_ON_BLOCKHOLE_IN_RBR,
8943 ER(WARN_ON_BLOCKHOLE_IN_RBR),
8944 is_update ? "UPDATE" : "DELETE",
8945 table_names.c_ptr());
8946 }
8947 }
8948 }
8949 #ifndef DBUG_OFF
8950 else
8951 DBUG_PRINT("info", ("decision: no logging since "
8952 "mysql_bin_log.is_open() = %d "
8953 "and (options & OPTION_BIN_LOG) = 0x%llx "
8954 "and binlog_format = %lu "
8955 "and binlog_filter->db_ok(db) = %d",
8956 mysql_bin_log.is_open(),
8957 (variables.option_bits & OPTION_BIN_LOG),
8958 variables.binlog_format,
8959 binlog_filter->db_ok(db)));
8960 #endif
8961
8962 DBUG_RETURN(0);
8963 }
8964
8965
is_ddl_gtid_compatible() const8966 bool THD::is_ddl_gtid_compatible() const
8967 {
8968 DBUG_ENTER("THD::is_ddl_gtid_compatible");
8969
8970 // If @@session.sql_log_bin has been manually turned off (only
8971 // doable by SUPER), then no problem, we can execute any statement.
8972 if ((variables.option_bits & OPTION_BIN_LOG) == 0)
8973 DBUG_RETURN(true);
8974
8975 if (lex->sql_command == SQLCOM_CREATE_TABLE &&
8976 !(lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) &&
8977 lex->select_lex.item_list.elements)
8978 {
8979 /*
8980 CREATE ... SELECT (without TEMPORARY) is unsafe because if
8981 binlog_format=row it will be logged as a CREATE TABLE followed
8982 by row events, re-executed non-atomically as two transactions,
8983 and then written to the slave's binary log as two separate
8984 transactions with the same GTID.
8985 */
8986 my_error(ER_GTID_UNSAFE_CREATE_SELECT, MYF(0));
8987 DBUG_RETURN(false);
8988 }
8989 if ((lex->sql_command == SQLCOM_CREATE_TABLE &&
8990 (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) != 0) ||
8991 (lex->sql_command == SQLCOM_DROP_TABLE && lex->drop_temporary))
8992 {
8993 /*
8994 [CREATE|DROP] TEMPORARY TABLE is unsafe to execute
8995 inside a transaction because the table will stay and the
8996 transaction will be written to the slave's binary log with the
8997 GTID even if the transaction is rolled back.
8998 This includes the execution inside Functions and Triggers.
8999 */
9000 if (in_multi_stmt_transaction_mode() || in_sub_stmt)
9001 {
9002 my_error(ER_GTID_UNSAFE_CREATE_DROP_TEMPORARY_TABLE_IN_TRANSACTION,
9003 MYF(0));
9004 DBUG_RETURN(false);
9005 }
9006 }
9007 DBUG_RETURN(true);
9008 }
9009
9010
9011 bool
is_dml_gtid_compatible(bool transactional_table,bool non_transactional_table,bool non_transactional_tmp_tables) const9012 THD::is_dml_gtid_compatible(bool transactional_table,
9013 bool non_transactional_table,
9014 bool non_transactional_tmp_tables) const
9015 {
9016 DBUG_ENTER("THD::is_dml_gtid_compatible(bool, bool, bool)");
9017
9018 // If @@session.sql_log_bin has been manually turned off (only
9019 // doable by SUPER), then no problem, we can execute any statement.
9020 if ((variables.option_bits & OPTION_BIN_LOG) == 0)
9021 DBUG_RETURN(true);
9022
9023 /*
9024 Single non-transactional updates are allowed when not mixed
9025 together with transactional statements within a transaction.
9026 Furthermore, writing to transactional and non-transactional
9027 engines in a single statement is also disallowed.
9028 Multi-statement transactions on non-transactional tables are
9029 split into single-statement transactions when
9030 GTID_NEXT = "AUTOMATIC".
9031
9032 Non-transactional updates are allowed when row binlog format is
9033 used and all non-transactional tables are temporary.
9034
9035 The debug symbol "allow_gtid_unsafe_non_transactional_updates"
9036 disables the error. This is useful because it allows us to run
9037 old tests that were not written with the restrictions of GTIDs in
9038 mind.
9039 */
9040 if (non_transactional_table &&
9041 (transactional_table || trans_has_updated_trans_table(this)) &&
9042 !(non_transactional_tmp_tables && is_current_stmt_binlog_format_row()) &&
9043 !DBUG_EVALUATE_IF("allow_gtid_unsafe_non_transactional_updates", 1, 0))
9044 {
9045 my_error(ER_GTID_UNSAFE_NON_TRANSACTIONAL_TABLE, MYF(0));
9046 DBUG_RETURN(false);
9047 }
9048
9049 DBUG_RETURN(true);
9050 }
9051
9052 /*
9053 Implementation of interface to write rows to the binary log through the
9054 thread. The thread is responsible for writing the rows it has
9055 inserted/updated/deleted.
9056 */
9057
9058 #ifndef MYSQL_CLIENT
9059
9060 /*
9061 Template member function for ensuring that there is an rows log
9062 event of the apropriate type before proceeding.
9063
9064 PRE CONDITION:
9065 - Events of type 'RowEventT' have the type code 'type_code'.
9066
9067 POST CONDITION:
9068 If a non-NULL pointer is returned, the pending event for thread 'thd' will
9069 be an event of type 'RowEventT' (which have the type code 'type_code')
9070 will either empty or have enough space to hold 'needed' bytes. In
9071 addition, the columns bitmap will be correct for the row, meaning that
9072 the pending event will be flushed if the columns in the event differ from
9073 the columns suppled to the function.
9074
9075 RETURNS
9076 If no error, a non-NULL pending event (either one which already existed or
9077 the newly created one).
9078 If error, NULL.
9079 */
9080
9081 template <class RowsEventT> Rows_log_event*
binlog_prepare_pending_rows_event(TABLE * table,uint32 serv_id,size_t needed,bool is_transactional,RowsEventT * hint MY_ATTRIBUTE ((unused)),const uchar * extra_row_info)9082 THD::binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id,
9083 size_t needed,
9084 bool is_transactional,
9085 RowsEventT *hint MY_ATTRIBUTE((unused)),
9086 const uchar* extra_row_info)
9087 {
9088 DBUG_ENTER("binlog_prepare_pending_rows_event");
9089
9090 /* Fetch the type code for the RowsEventT template parameter */
9091 int const general_type_code= RowsEventT::TYPE_CODE;
9092
9093 Rows_log_event* pending= binlog_get_pending_rows_event(is_transactional);
9094
9095 if (unlikely(pending && !pending->is_valid()))
9096 DBUG_RETURN(NULL);
9097
9098 /*
9099 Check if the current event is non-NULL and a write-rows
9100 event. Also check if the table provided is mapped: if it is not,
9101 then we have switched to writing to a new table.
9102 If there is no pending event, we need to create one. If there is a pending
9103 event, but it's not about the same table id, or not of the same type
9104 (between Write, Update and Delete), or not the same affected columns, or
9105 going to be too big, flush this event to disk and create a new pending
9106 event.
9107 */
9108 if (!pending ||
9109 pending->server_id != serv_id ||
9110 pending->get_table_id() != table->s->table_map_id ||
9111 pending->get_general_type_code() != general_type_code ||
9112 pending->get_data_size() + needed > opt_binlog_rows_event_max_size ||
9113 pending->read_write_bitmaps_cmp(table) == FALSE ||
9114 !binlog_row_event_extra_data_eq(pending->get_extra_row_data(),
9115 extra_row_info))
9116 {
9117 /* Create a new RowsEventT... */
9118 Rows_log_event* const
9119 ev= new RowsEventT(this, table, table->s->table_map_id,
9120 is_transactional, extra_row_info);
9121 if (unlikely(!ev))
9122 DBUG_RETURN(NULL);
9123 ev->server_id= serv_id; // I don't like this, it's too easy to forget.
9124 /*
9125 flush the pending event and replace it with the newly created
9126 event...
9127 */
9128 if (unlikely(
9129 mysql_bin_log.flush_and_set_pending_rows_event(this, ev,
9130 is_transactional)))
9131 {
9132 delete ev;
9133 DBUG_RETURN(NULL);
9134 }
9135
9136 DBUG_RETURN(ev); /* This is the new pending event */
9137 }
9138 DBUG_RETURN(pending); /* This is the current pending event */
9139 }
9140
9141 /* Declare in unnamed namespace. */
9142 CPP_UNNAMED_NS_START
9143
9144 /**
9145 Class to handle temporary allocation of memory for row data.
9146
9147 The responsibilities of the class is to provide memory for
9148 packing one or two rows of packed data (depending on what
9149 constructor is called).
9150
9151 In order to make the allocation more efficient for "simple" rows,
9152 i.e., rows that do not contain any blobs, a pointer to the
9153 allocated memory is of memory is stored in the table structure
9154 for simple rows. If memory for a table containing a blob field
9155 is requested, only memory for that is allocated, and subsequently
9156 released when the object is destroyed.
9157
9158 */
9159 class Row_data_memory {
9160 public:
9161 /**
9162 Build an object to keep track of a block-local piece of memory
9163 for storing a row of data.
9164
9165 @param table
9166 Table where the pre-allocated memory is stored.
9167
9168 @param length
9169 Length of data that is needed, if the record contain blobs.
9170 */
Row_data_memory(TABLE * table,size_t const len1)9171 Row_data_memory(TABLE *table, size_t const len1)
9172 : m_memory(0)
9173 {
9174 #ifndef DBUG_OFF
9175 m_alloc_checked= FALSE;
9176 #endif
9177 allocate_memory(table, len1);
9178 m_ptr[0]= has_memory() ? m_memory : 0;
9179 m_ptr[1]= 0;
9180 }
9181
Row_data_memory(TABLE * table,size_t const len1,size_t const len2)9182 Row_data_memory(TABLE *table, size_t const len1, size_t const len2)
9183 : m_memory(0)
9184 {
9185 #ifndef DBUG_OFF
9186 m_alloc_checked= FALSE;
9187 #endif
9188 allocate_memory(table, len1 + len2);
9189 m_ptr[0]= has_memory() ? m_memory : 0;
9190 m_ptr[1]= has_memory() ? m_memory + len1 : 0;
9191 }
9192
~Row_data_memory()9193 ~Row_data_memory()
9194 {
9195 if (m_memory != 0 && m_release_memory_on_destruction)
9196 my_free(m_memory);
9197 }
9198
9199 /**
9200 Is there memory allocated?
9201
9202 @retval true There is memory allocated
9203 @retval false Memory allocation failed
9204 */
has_memory() const9205 bool has_memory() const {
9206 #ifndef DBUG_OFF
9207 m_alloc_checked= TRUE;
9208 #endif
9209 return m_memory != 0;
9210 }
9211
slot(uint s)9212 uchar *slot(uint s)
9213 {
9214 DBUG_ASSERT(s < sizeof(m_ptr)/sizeof(*m_ptr));
9215 DBUG_ASSERT(m_ptr[s] != 0);
9216 DBUG_ASSERT(m_alloc_checked == TRUE);
9217 return m_ptr[s];
9218 }
9219
9220 private:
allocate_memory(TABLE * const table,size_t const total_length)9221 void allocate_memory(TABLE *const table, size_t const total_length)
9222 {
9223 if (table->s->blob_fields == 0)
9224 {
9225 /*
9226 The maximum length of a packed record is less than this
9227 length. We use this value instead of the supplied length
9228 when allocating memory for records, since we don't know how
9229 the memory will be used in future allocations.
9230
9231 Since table->s->reclength is for unpacked records, we have
9232 to add two bytes for each field, which can potentially be
9233 added to hold the length of a packed field.
9234 */
9235 size_t const maxlen= table->s->reclength + 2 * table->s->fields;
9236
9237 /*
9238 Allocate memory for two records if memory hasn't been
9239 allocated. We allocate memory for two records so that it can
9240 be used when processing update rows as well.
9241 */
9242 if (table->write_row_record == 0)
9243 table->write_row_record=
9244 (uchar *) alloc_root(&table->mem_root, 2 * maxlen);
9245 m_memory= table->write_row_record;
9246 m_release_memory_on_destruction= FALSE;
9247 }
9248 else
9249 {
9250 m_memory= (uchar *) my_malloc(total_length, MYF(MY_WME));
9251 m_release_memory_on_destruction= TRUE;
9252 }
9253 }
9254
9255 #ifndef DBUG_OFF
9256 mutable bool m_alloc_checked;
9257 #endif
9258 bool m_release_memory_on_destruction;
9259 uchar *m_memory;
9260 uchar *m_ptr[2];
9261 };
9262
9263 CPP_UNNAMED_NS_END
9264
binlog_write_row(TABLE * table,bool is_trans,uchar const * record,const uchar * extra_row_info)9265 int THD::binlog_write_row(TABLE* table, bool is_trans,
9266 uchar const *record,
9267 const uchar* extra_row_info)
9268 {
9269 DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
9270
9271 /*
9272 Pack records into format for transfer. We are allocating more
9273 memory than needed, but that doesn't matter.
9274 */
9275 Row_data_memory memory(table, max_row_length(table, record));
9276 if (!memory.has_memory())
9277 return HA_ERR_OUT_OF_MEM;
9278
9279 uchar *row_data= memory.slot(0);
9280
9281 size_t const len= pack_row(table, table->write_set, row_data, record);
9282
9283 Rows_log_event* const ev=
9284 binlog_prepare_pending_rows_event(table, server_id, len, is_trans,
9285 static_cast<Write_rows_log_event*>(0),
9286 extra_row_info);
9287
9288 if (unlikely(ev == 0))
9289 return HA_ERR_OUT_OF_MEM;
9290
9291 return ev->add_row_data(row_data, len);
9292 }
9293
binlog_update_row(TABLE * table,bool is_trans,const uchar * before_record,const uchar * after_record,const uchar * extra_row_info)9294 int THD::binlog_update_row(TABLE* table, bool is_trans,
9295 const uchar *before_record,
9296 const uchar *after_record,
9297 const uchar* extra_row_info)
9298 {
9299 DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
9300 int error= 0;
9301
9302 /**
9303 Save a reference to the original read and write set bitmaps.
9304 We will need this to restore the bitmaps at the end.
9305 */
9306 MY_BITMAP *old_read_set= table->read_set;
9307 MY_BITMAP *old_write_set= table->write_set;
9308
9309 /**
9310 This will remove spurious fields required during execution but
9311 not needed for binlogging. This is done according to the:
9312 binlog-row-image option.
9313 */
9314 binlog_prepare_row_images(table);
9315
9316 size_t const before_maxlen = max_row_length(table, before_record);
9317 size_t const after_maxlen = max_row_length(table, after_record);
9318
9319 Row_data_memory row_data(table, before_maxlen, after_maxlen);
9320 if (!row_data.has_memory())
9321 return HA_ERR_OUT_OF_MEM;
9322
9323 uchar *before_row= row_data.slot(0);
9324 uchar *after_row= row_data.slot(1);
9325
9326 size_t const before_size= pack_row(table, table->read_set, before_row,
9327 before_record);
9328 size_t const after_size= pack_row(table, table->write_set, after_row,
9329 after_record);
9330
9331 /*
9332 Don't print debug messages when running valgrind since they can
9333 trigger false warnings.
9334 */
9335 #ifndef HAVE_purify
9336 DBUG_DUMP("before_record", before_record, table->s->reclength);
9337 DBUG_DUMP("after_record", after_record, table->s->reclength);
9338 DBUG_DUMP("before_row", before_row, before_size);
9339 DBUG_DUMP("after_row", after_row, after_size);
9340 #endif
9341
9342 Rows_log_event* const ev=
9343 binlog_prepare_pending_rows_event(table, server_id,
9344 before_size + after_size, is_trans,
9345 static_cast<Update_rows_log_event*>(0),
9346 extra_row_info);
9347
9348 if (unlikely(ev == 0))
9349 return HA_ERR_OUT_OF_MEM;
9350
9351 error= ev->add_row_data(before_row, before_size) ||
9352 ev->add_row_data(after_row, after_size);
9353
9354 /* restore read/write set for the rest of execution */
9355 table->column_bitmaps_set_no_signal(old_read_set,
9356 old_write_set);
9357
9358 return error;
9359 }
9360
binlog_delete_row(TABLE * table,bool is_trans,uchar const * record,const uchar * extra_row_info)9361 int THD::binlog_delete_row(TABLE* table, bool is_trans,
9362 uchar const *record,
9363 const uchar* extra_row_info)
9364 {
9365 DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
9366 int error= 0;
9367
9368 /**
9369 Save a reference to the original read and write set bitmaps.
9370 We will need this to restore the bitmaps at the end.
9371 */
9372 MY_BITMAP *old_read_set= table->read_set;
9373 MY_BITMAP *old_write_set= table->write_set;
9374
9375 /**
9376 This will remove spurious fields required during execution but
9377 not needed for binlogging. This is done according to the:
9378 binlog-row-image option.
9379 */
9380 binlog_prepare_row_images(table);
9381
9382 /*
9383 Pack records into format for transfer. We are allocating more
9384 memory than needed, but that doesn't matter.
9385 */
9386 Row_data_memory memory(table, max_row_length(table, record));
9387 if (unlikely(!memory.has_memory()))
9388 return HA_ERR_OUT_OF_MEM;
9389
9390 uchar *row_data= memory.slot(0);
9391
9392 DBUG_DUMP("table->read_set", (uchar*) table->read_set->bitmap, (table->s->fields + 7) / 8);
9393 size_t const len= pack_row(table, table->read_set, row_data, record);
9394
9395 Rows_log_event* const ev=
9396 binlog_prepare_pending_rows_event(table, server_id, len, is_trans,
9397 static_cast<Delete_rows_log_event*>(0),
9398 extra_row_info);
9399
9400 if (unlikely(ev == 0))
9401 return HA_ERR_OUT_OF_MEM;
9402
9403 error= ev->add_row_data(row_data, len);
9404
9405 /* restore read/write set for the rest of execution */
9406 table->column_bitmaps_set_no_signal(old_read_set,
9407 old_write_set);
9408
9409 return error;
9410 }
9411
binlog_prepare_row_images(TABLE * table)9412 void THD::binlog_prepare_row_images(TABLE *table)
9413 {
9414 DBUG_ENTER("THD::binlog_prepare_row_images");
9415 /**
9416 Remove from read_set spurious columns. The write_set has been
9417 handled before in table->mark_columns_needed_for_update.
9418 */
9419
9420 DBUG_PRINT_BITSET("debug", "table->read_set (before preparing): %s", table->read_set);
9421 THD *thd= table->in_use;
9422
9423 /**
9424 if there is a primary key in the table (ie, user declared PK or a
9425 non-null unique index) and we dont want to ship the entire image,
9426 and the handler involved supports this.
9427 */
9428 if (table->s->primary_key < MAX_KEY &&
9429 (thd->variables.binlog_row_image < BINLOG_ROW_IMAGE_FULL) &&
9430 !ha_check_storage_engine_flag(table->s->db_type(), HTON_NO_BINLOG_ROW_OPT))
9431 {
9432 /**
9433 Just to be sure that tmp_set is currently not in use as
9434 the read_set already.
9435 */
9436 DBUG_ASSERT(table->read_set != &table->tmp_set);
9437
9438 bitmap_clear_all(&table->tmp_set);
9439
9440 switch(thd->variables.binlog_row_image)
9441 {
9442 case BINLOG_ROW_IMAGE_MINIMAL:
9443 /* MINIMAL: Mark only PK */
9444 table->mark_columns_used_by_index_no_reset(table->s->primary_key,
9445 &table->tmp_set);
9446 break;
9447 case BINLOG_ROW_IMAGE_NOBLOB:
9448 /**
9449 NOBLOB: Remove unnecessary BLOB fields from read_set
9450 (the ones that are not part of PK).
9451 */
9452 bitmap_union(&table->tmp_set, table->read_set);
9453 for (Field **ptr=table->field ; *ptr ; ptr++)
9454 {
9455 Field *field= (*ptr);
9456 if ((field->type() == MYSQL_TYPE_BLOB) &&
9457 !(field->flags & PRI_KEY_FLAG))
9458 bitmap_clear_bit(&table->tmp_set, field->field_index);
9459 }
9460 break;
9461 default:
9462 DBUG_ASSERT(0); // impossible.
9463 }
9464
9465 /* set the temporary read_set */
9466 table->column_bitmaps_set_no_signal(&table->tmp_set,
9467 table->write_set);
9468 }
9469
9470 DBUG_PRINT_BITSET("debug", "table->read_set (after preparing): %s", table->read_set);
9471 DBUG_VOID_RETURN;
9472 }
9473
9474
binlog_flush_pending_rows_event(bool stmt_end,bool is_transactional)9475 int THD::binlog_flush_pending_rows_event(bool stmt_end, bool is_transactional)
9476 {
9477 DBUG_ENTER("THD::binlog_flush_pending_rows_event");
9478 /*
9479 We shall flush the pending event even if we are not in row-based
9480 mode: it might be the case that we left row-based mode before
9481 flushing anything (e.g., if we have explicitly locked tables).
9482 */
9483 if (!mysql_bin_log.is_open())
9484 DBUG_RETURN(0);
9485
9486 /*
9487 Mark the event as the last event of a statement if the stmt_end
9488 flag is set.
9489 */
9490 int error= 0;
9491 if (Rows_log_event *pending= binlog_get_pending_rows_event(is_transactional))
9492 {
9493 if (stmt_end)
9494 {
9495 pending->set_flags(Rows_log_event::STMT_END_F);
9496 binlog_table_maps= 0;
9497 }
9498
9499 error= mysql_bin_log.flush_and_set_pending_rows_event(this, 0,
9500 is_transactional);
9501 }
9502
9503 DBUG_RETURN(error);
9504 }
9505
9506
9507 /**
9508 binlog_row_event_extra_data_eq
9509
9510 Comparator for two binlog row event extra data
9511 pointers.
9512
9513 It compares their significant bytes.
9514
9515 Null pointers are acceptable
9516
9517 @param a
9518 first pointer
9519
9520 @param b
9521 first pointer
9522
9523 @return
9524 true if the referenced structures are equal
9525 */
9526 bool
binlog_row_event_extra_data_eq(const uchar * a,const uchar * b)9527 THD::binlog_row_event_extra_data_eq(const uchar* a,
9528 const uchar* b)
9529 {
9530 return ((a == b) ||
9531 ((a != NULL) &&
9532 (b != NULL) &&
9533 (a[EXTRA_ROW_INFO_LEN_OFFSET] ==
9534 b[EXTRA_ROW_INFO_LEN_OFFSET]) &&
9535 (memcmp(a, b,
9536 a[EXTRA_ROW_INFO_LEN_OFFSET]) == 0)));
9537 }
9538
9539 #if !defined(DBUG_OFF) && !defined(_lint)
9540 static const char *
show_query_type(THD::enum_binlog_query_type qtype)9541 show_query_type(THD::enum_binlog_query_type qtype)
9542 {
9543 switch (qtype) {
9544 case THD::ROW_QUERY_TYPE:
9545 return "ROW";
9546 case THD::STMT_QUERY_TYPE:
9547 return "STMT";
9548 case THD::QUERY_TYPE_COUNT:
9549 default:
9550 DBUG_ASSERT(0 <= qtype && qtype < THD::QUERY_TYPE_COUNT);
9551 }
9552 static char buf[64];
9553 sprintf(buf, "UNKNOWN#%d", qtype);
9554 return buf;
9555 }
9556 #endif
9557
9558 /**
9559 Auxiliary function to reset the limit unsafety warning suppression.
9560 */
reset_binlog_unsafe_suppression()9561 static void reset_binlog_unsafe_suppression()
9562 {
9563 DBUG_ENTER("reset_binlog_unsafe_suppression");
9564 unsafe_warning_suppression_is_activated= false;
9565 limit_unsafe_warning_count= 0;
9566 limit_unsafe_suppression_start_time= my_getsystime()/10000000;
9567 DBUG_VOID_RETURN;
9568 }
9569
9570 /**
9571 Auxiliary function to print warning in the error log.
9572 */
print_unsafe_warning_to_log(int unsafe_type,char * buf,char * query)9573 static void print_unsafe_warning_to_log(int unsafe_type, char* buf,
9574 char* query)
9575 {
9576 DBUG_ENTER("print_unsafe_warning_in_log");
9577 sprintf(buf, ER(ER_BINLOG_UNSAFE_STATEMENT),
9578 ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
9579 sql_print_warning(ER(ER_MESSAGE_AND_STATEMENT), buf, query);
9580 DBUG_VOID_RETURN;
9581 }
9582
9583 /**
9584 Auxiliary function to check if the warning for limit unsafety should be
9585 thrown or suppressed. Details of the implementation can be found in the
9586 comments inline.
9587 SYNOPSIS:
9588 @params
9589 buf - buffer to hold the warning message text
9590 unsafe_type - The type of unsafety.
9591 query - The actual query statement.
9592
9593 TODO: Remove this function and implement a general service for all warnings
9594 that would prevent flooding the error log.
9595 */
do_unsafe_limit_checkout(char * buf,int unsafe_type,char * query)9596 static void do_unsafe_limit_checkout(char* buf, int unsafe_type, char* query)
9597 {
9598 ulonglong now;
9599 DBUG_ENTER("do_unsafe_limit_checkout");
9600 DBUG_ASSERT(unsafe_type == LEX::BINLOG_STMT_UNSAFE_LIMIT);
9601 limit_unsafe_warning_count++;
9602 /*
9603 INITIALIZING:
9604 If this is the first time this function is called with log warning
9605 enabled, the monitoring the unsafe warnings should start.
9606 */
9607 if (limit_unsafe_suppression_start_time == 0)
9608 {
9609 limit_unsafe_suppression_start_time= my_getsystime()/10000000;
9610 print_unsafe_warning_to_log(unsafe_type, buf, query);
9611 }
9612 else
9613 {
9614 if (!unsafe_warning_suppression_is_activated)
9615 print_unsafe_warning_to_log(unsafe_type, buf, query);
9616
9617 if (limit_unsafe_warning_count >=
9618 LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT)
9619 {
9620 now= my_getsystime()/10000000;
9621 if (!unsafe_warning_suppression_is_activated)
9622 {
9623 /*
9624 ACTIVATION:
9625 We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT warnings in
9626 less than LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT we activate the
9627 suppression.
9628 */
9629 if ((now-limit_unsafe_suppression_start_time) <=
9630 LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT)
9631 {
9632 unsafe_warning_suppression_is_activated= true;
9633 DBUG_PRINT("info",("A warning flood has been detected and the limit \
9634 unsafety warning suppression has been activated."));
9635 }
9636 else
9637 {
9638 /*
9639 there is no flooding till now, therefore we restart the monitoring
9640 */
9641 limit_unsafe_suppression_start_time= my_getsystime()/10000000;
9642 limit_unsafe_warning_count= 0;
9643 }
9644 }
9645 else
9646 {
9647 /*
9648 Print the suppression note and the unsafe warning.
9649 */
9650 sql_print_information("The following warning was suppressed %d times \
9651 during the last %d seconds in the error log",
9652 limit_unsafe_warning_count,
9653 (int)
9654 (now-limit_unsafe_suppression_start_time));
9655 print_unsafe_warning_to_log(unsafe_type, buf, query);
9656 /*
9657 DEACTIVATION: We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT
9658 warnings in more than LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT, the
9659 suppression should be deactivated.
9660 */
9661 if ((now - limit_unsafe_suppression_start_time) >
9662 LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT)
9663 {
9664 reset_binlog_unsafe_suppression();
9665 DBUG_PRINT("info",("The limit unsafety warning supression has been \
9666 deactivated"));
9667 }
9668 }
9669 limit_unsafe_warning_count= 0;
9670 }
9671 }
9672 DBUG_VOID_RETURN;
9673 }
9674
9675 /**
9676 Auxiliary method used by @c binlog_query() to raise warnings.
9677
9678 The type of warning and the type of unsafeness is stored in
9679 THD::binlog_unsafe_warning_flags.
9680 */
issue_unsafe_warnings()9681 void THD::issue_unsafe_warnings()
9682 {
9683 char buf[MYSQL_ERRMSG_SIZE * 2];
9684 DBUG_ENTER("issue_unsafe_warnings");
9685 /*
9686 Ensure that binlog_unsafe_warning_flags is big enough to hold all
9687 bits. This is actually a constant expression.
9688 */
9689 DBUG_ASSERT(LEX::BINLOG_STMT_UNSAFE_COUNT <=
9690 sizeof(binlog_unsafe_warning_flags) * CHAR_BIT);
9691
9692 uint32 unsafe_type_flags= binlog_unsafe_warning_flags;
9693
9694 /*
9695 For each unsafe_type, check if the statement is unsafe in this way
9696 and issue a warning.
9697 */
9698 for (int unsafe_type=0;
9699 unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT;
9700 unsafe_type++)
9701 {
9702 if ((unsafe_type_flags & (1 << unsafe_type)) != 0)
9703 {
9704 push_warning_printf(this, Sql_condition::WARN_LEVEL_NOTE,
9705 ER_BINLOG_UNSAFE_STATEMENT,
9706 ER(ER_BINLOG_UNSAFE_STATEMENT),
9707 ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
9708 if (log_warnings)
9709 {
9710 if (unsafe_type == LEX::BINLOG_STMT_UNSAFE_LIMIT)
9711 do_unsafe_limit_checkout( buf, unsafe_type, query());
9712 else //cases other than LIMIT unsafety
9713 print_unsafe_warning_to_log(unsafe_type, buf, query());
9714 }
9715 }
9716 }
9717 DBUG_VOID_RETURN;
9718 }
9719
9720 /**
9721 Log the current query.
9722
9723 The query will be logged in either row format or statement format
9724 depending on the value of @c current_stmt_binlog_format_row field and
9725 the value of the @c qtype parameter.
9726
9727 This function must be called:
9728
9729 - After the all calls to ha_*_row() functions have been issued.
9730
9731 - After any writes to system tables. Rationale: if system tables
9732 were written after a call to this function, and the master crashes
9733 after the call to this function and before writing the system
9734 tables, then the master and slave get out of sync.
9735
9736 - Before tables are unlocked and closed.
9737
9738 @see decide_logging_format
9739
9740 @retval 0 Success
9741
9742 @retval nonzero If there is a failure when writing the query (e.g.,
9743 write failure), then the error code is returned.
9744 */
binlog_query(THD::enum_binlog_query_type qtype,char const * query_arg,ulong query_len,bool is_trans,bool direct,bool suppress_use,int errcode)9745 int THD::binlog_query(THD::enum_binlog_query_type qtype, char const *query_arg,
9746 ulong query_len, bool is_trans, bool direct,
9747 bool suppress_use, int errcode)
9748 {
9749 DBUG_ENTER("THD::binlog_query");
9750 DBUG_PRINT("enter", ("qtype: %s query: '%s'",
9751 show_query_type(qtype), query_arg));
9752 DBUG_ASSERT(query_arg && mysql_bin_log.is_open());
9753
9754 if (get_binlog_local_stmt_filter() == BINLOG_FILTER_SET)
9755 {
9756 /*
9757 The current statement is to be ignored, and not written to
9758 the binlog. Do not call issue_unsafe_warnings().
9759 */
9760 DBUG_RETURN(0);
9761 }
9762
9763 /*
9764 If we are not in prelocked mode, mysql_unlock_tables() will be
9765 called after this binlog_query(), so we have to flush the pending
9766 rows event with the STMT_END_F set to unlock all tables at the
9767 slave side as well.
9768
9769 If we are in prelocked mode, the flushing will be done inside the
9770 top-most close_thread_tables().
9771 */
9772 if (this->locked_tables_mode <= LTM_LOCK_TABLES)
9773 if (int error= binlog_flush_pending_rows_event(TRUE, is_trans))
9774 DBUG_RETURN(error);
9775
9776 /*
9777 Warnings for unsafe statements logged in statement format are
9778 printed in three places instead of in decide_logging_format().
9779 This is because the warnings should be printed only if the statement
9780 is actually logged. When executing decide_logging_format(), we cannot
9781 know for sure if the statement will be logged:
9782
9783 1 - sp_head::execute_procedure which prints out warnings for calls to
9784 stored procedures.
9785
9786 2 - sp_head::execute_function which prints out warnings for calls
9787 involving functions.
9788
9789 3 - THD::binlog_query (here) which prints warning for top level
9790 statements not covered by the two cases above: i.e., if not insided a
9791 procedure and a function.
9792
9793 Besides, we should not try to print these warnings if it is not
9794 possible to write statements to the binary log as it happens when
9795 the execution is inside a function, or generaly speaking, when
9796 the variables.option_bits & OPTION_BIN_LOG is false.
9797 */
9798 if ((variables.option_bits & OPTION_BIN_LOG) &&
9799 sp_runtime_ctx == NULL && !binlog_evt_union.do_union)
9800 issue_unsafe_warnings();
9801
9802 switch (qtype) {
9803 /*
9804 ROW_QUERY_TYPE means that the statement may be logged either in
9805 row format or in statement format. If
9806 current_stmt_binlog_format is row, it means that the
9807 statement has already been logged in row format and hence shall
9808 not be logged again.
9809 */
9810 case THD::ROW_QUERY_TYPE:
9811 DBUG_PRINT("debug",
9812 ("is_current_stmt_binlog_format_row: %d",
9813 is_current_stmt_binlog_format_row()));
9814 if (is_current_stmt_binlog_format_row())
9815 DBUG_RETURN(0);
9816 /* Fall through */
9817
9818 /*
9819 STMT_QUERY_TYPE means that the query must be logged in statement
9820 format; it cannot be logged in row format. This is typically
9821 used by DDL statements. It is an error to use this query type
9822 if current_stmt_binlog_format_row is row.
9823
9824 @todo Currently there are places that call this method with
9825 STMT_QUERY_TYPE and current_stmt_binlog_format is row. Fix those
9826 places and add assert to ensure correct behavior. /Sven
9827 */
9828 case THD::STMT_QUERY_TYPE:
9829 /*
9830 The MYSQL_LOG::write() function will set the STMT_END_F flag and
9831 flush the pending rows event if necessary.
9832 */
9833 {
9834 Query_log_event qinfo(this, query_arg, query_len, is_trans, direct,
9835 suppress_use, errcode);
9836 /*
9837 Binlog table maps will be irrelevant after a Query_log_event
9838 (they are just removed on the slave side) so after the query
9839 log event is written to the binary log, we pretend that no
9840 table maps were written.
9841 */
9842 int error= mysql_bin_log.write_event(&qinfo);
9843 binlog_table_maps= 0;
9844 DBUG_RETURN(error);
9845 }
9846 break;
9847
9848 case THD::QUERY_TYPE_COUNT:
9849 default:
9850 DBUG_ASSERT(0 <= qtype && qtype < QUERY_TYPE_COUNT);
9851 }
9852 DBUG_RETURN(0);
9853 }
9854
9855 #endif /* !defined(MYSQL_CLIENT) */
9856
9857 struct st_mysql_storage_engine binlog_storage_engine=
9858 { MYSQL_HANDLERTON_INTERFACE_VERSION };
9859
9860 /** @} */
9861
mysql_declare_plugin(binlog)9862 mysql_declare_plugin(binlog)
9863 {
9864 MYSQL_STORAGE_ENGINE_PLUGIN,
9865 &binlog_storage_engine,
9866 "binlog",
9867 "MySQL AB",
9868 "This is a pseudo storage engine to represent the binlog in a transaction",
9869 PLUGIN_LICENSE_GPL,
9870 binlog_init, /* Plugin Init */
9871 NULL, /* Plugin Deinit */
9872 0x0100 /* 1.0 */,
9873 NULL, /* status variables */
9874 NULL, /* system variables */
9875 NULL, /* config options */
9876 0,
9877 }
9878 mysql_declare_plugin_end;
9879