1 /* Copyright (c) 2009, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software Foundation,
21 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22
23 #include "binlog.h"
24
25 #include "my_stacktrace.h" // my_safe_print_system_time
26 #include "debug_sync.h" // DEBUG_SYNC
27 #include "log.h" // sql_print_warning
28 #include "log_event.h" // Rows_log_event
29 #include "mysqld_thd_manager.h" // Global_THD_manager
30 #include "rpl_handler.h" // RUN_HOOK
31 #include "rpl_mi.h" // Master_info
32 #include "rpl_rli.h" // Relay_log_info
33 #include "rpl_rli_pdb.h" // Slave_worker
34 #include "rpl_slave_commit_order_manager.h" // Commit_order_manager
35 #include "rpl_trx_boundary_parser.h" // Transaction_boundary_parser
36 #include "rpl_context.h"
37 #include "sql_class.h" // THD
38 #include "sql_parse.h" // sqlcom_can_generate_row_events
39 #include "sql_show.h" // append_identifier
40
41 #include "pfs_file_provider.h"
42 #include "mysql/psi/mysql_file.h"
43
44 #include <pfs_transaction_provider.h>
45 #include <mysql/psi/mysql_transaction.h>
46 #include "xa.h"
47
48 #include <list>
49 #include <string>
50 #include <sstream>
51
52 #ifdef WITH_WSREP
53 #include "wsrep_xid.h"
54 #endif /* WITH_WSREP */
55
56 using std::max;
57 using std::min;
58 using std::string;
59 using std::list;
60 using binary_log::checksum_crc32;
61 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
62
63 #define LOG_PREFIX "ML"
64
65 /**
66 @defgroup Binary_Log Binary Log
67 @{
68 */
69
70 #define MY_OFF_T_UNDEF (~(my_off_t)0UL)
71
72 /*
73 Constants required for the limit unsafe warnings suppression
74 */
75 //seconds after which the limit unsafe warnings suppression will be activated
76 #define LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT 50
77 //number of limit unsafe warnings after which the suppression will be activated
78 #define LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT 50
79 #define MAX_SESSION_ATTACH_TRIES 10
80
81 static ulonglong limit_unsafe_suppression_start_time= 0;
82 static bool unsafe_warning_suppression_is_activated= false;
83 static int limit_unsafe_warning_count= 0;
84
85 #ifndef WITH_WSREP
86 static handlerton *binlog_hton;
87 #else
88 handlerton *binlog_hton; // we need it in wsrep_binlog.cc
89 #endif
90 bool opt_binlog_order_commits= true;
91
92 const char *log_bin_index= 0;
93 const char *log_bin_basename= 0;
94
95 MYSQL_BIN_LOG mysql_bin_log(&sync_binlog_period, WRITE_CACHE);
96
97 static int binlog_init(void *p);
98 static int binlog_start_trans_and_stmt(THD *thd, Log_event *start_event);
99 static int binlog_close_connection(handlerton *hton, THD *thd);
100 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
101 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
102 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
103 THD *thd);
104 static int binlog_commit(handlerton *hton, THD *thd, bool all);
105 static int binlog_rollback(handlerton *hton, THD *thd, bool all);
106 static int binlog_prepare(handlerton *hton, THD *thd, bool all);
107 static int binlog_xa_commit(handlerton *hton, XID *xid);
108 static int binlog_xa_rollback(handlerton *hton, XID *xid);
109 static void exec_binlog_error_action_abort(const char* err_string);
110
111 /**
112 Helper class to switch to a new thread and then go back to the previous one,
113 when the object is destroyed using RAII.
114
115 This class is used to temporarily switch to another session (THD
116 structure). It will set up thread specific "globals" correctly
117 so that the POSIX thread looks exactly like the session attached to.
118 However, PSI_thread info is not touched as it is required to show
119 the actual physial view in PFS instrumentation i.e., it should
120 depict as the real thread doing the work instead of thread it switched
121 to.
122
123 On destruction, the original session (which is supplied to the
124 constructor) will be re-attached automatically. For example, with
125 this code, the value of @c current_thd will be the same before and
126 after execution of the code.
127
128 @code
129 {
130 for (int i = 0 ; i < count ; ++i)
131 {
132 // here we are attached to current_thd
133 // [...]
134 Thd_backup_and_restore switch_thd(current_thd, other_thd[i]);
135 // [...]
136 // here we are attached to other_thd[i]
137 // [...]
138 }
139 // here we are attached to current_thd
140 }
141 @endcode
142
143 @warning The class is not designed to be inherited from.
144 */
145
146 #ifndef EMBEDDED_LIBRARY
147
148 class Thd_backup_and_restore
149 {
150 public:
151 /**
152 Try to attach the POSIX thread to a session.
153 - This function attaches the POSIX thread to a session
154 in MAX_SESSION_ATTACH_TRIES tries when encountering
155 'out of memory' error, and terminates the server after
156 failed in MAX_SESSION_ATTACH_TRIES tries.
157
158 @param[in] backup_thd The thd to restore to when object is destructed.
159 @param[in] new_thd The thd to attach to.
160 */
161
Thd_backup_and_restore(THD * backup_thd,THD * new_thd)162 Thd_backup_and_restore(THD *backup_thd, THD *new_thd)
163 : m_backup_thd(backup_thd), m_new_thd(new_thd),
164 m_new_thd_old_real_id(new_thd->real_id)
165 {
166 assert(m_backup_thd != NULL && m_new_thd != NULL);
167 // Reset the state of the current thd.
168 m_backup_thd->restore_globals();
169 int i= 0;
170 /*
171 Attach the POSIX thread to a session in MAX_SESSION_ATTACH_TRIES
172 tries when encountering 'out of memory' error.
173 */
174 while (i < MAX_SESSION_ATTACH_TRIES)
175 {
176 /*
177 Currently attach_to(...) returns ER_OUTOFMEMORY or 0. So
178 we continue to attach the POSIX thread when encountering
179 the ER_OUTOFMEMORY error. Please take care other error
180 returned from attach_to(...) in future.
181 */
182 if (!attach_to(new_thd))
183 {
184 if (i > 0)
185 sql_print_warning("Server overcomes the temporary 'out of memory' "
186 "in '%d' tries while attaching to session thread "
187 "during the group commit phase.\n", i + 1);
188 break;
189 }
190 /* Sleep 1 microsecond per try to avoid temporary 'out of memory' */
191 my_sleep(1);
192 i++;
193 }
194 /*
195 Terminate the server after failed to attach the POSIX thread
196 to a session in MAX_SESSION_ATTACH_TRIES tries.
197 */
198 if (MAX_SESSION_ATTACH_TRIES == i)
199 {
200 my_safe_print_system_time();
201 my_safe_printf_stderr("%s", "[Fatal] Out of memory while attaching to "
202 "session thread during the group commit phase. "
203 "Data consistency between master and slave can "
204 "be guaranteed after server restarts.\n");
205 _exit(MYSQLD_FAILURE_EXIT);
206 }
207 }
208
209 /**
210 Restores to previous thd.
211 */
~Thd_backup_and_restore()212 ~Thd_backup_and_restore()
213 {
214 /*
215 Restore the global variables of the thd we previously attached to,
216 to its original state. In other words, detach the m_new_thd.
217 */
218 m_new_thd->restore_globals();
219 m_new_thd->real_id= m_new_thd_old_real_id;
220
221 // Reset the global variables to the original state.
222 if (unlikely(m_backup_thd->store_globals()))
223 assert(0); // Out of memory?!
224 }
225
226 private:
227
228 /**
229 Attach the POSIX thread to a session.
230 */
attach_to(THD * thd)231 int attach_to(THD *thd)
232 {
233 if (DBUG_EVALUATE_IF("simulate_session_attach_error", 1, 0)
234 || unlikely(thd->store_globals()))
235 {
236 /*
237 Indirectly uses pthread_setspecific, which can only return
238 ENOMEM or EINVAL. Since store_globals are using correct keys,
239 the only alternative is out of memory.
240 */
241 return ER_OUTOFMEMORY;
242 }
243 return 0;
244 }
245
246 THD *m_backup_thd;
247 THD *m_new_thd;
248 my_thread_t m_new_thd_old_real_id;
249 };
250
251 #endif /* !EMBEDDED_LIBRARY */
252
253 /**
254 Caches for non-transactional and transactional data before writing
255 it to the binary log.
256
257 @todo All the access functions for the flags suggest that the
258 encapsuling is not done correctly, so try to move any logic that
259 requires access to the flags into the cache.
260 */
261 class binlog_cache_data
262 {
263 public:
264
binlog_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg,const IO_CACHE & cache_log_arg)265 binlog_cache_data(bool trx_cache_arg,
266 my_off_t max_binlog_cache_size_arg,
267 ulong *ptr_binlog_cache_use_arg,
268 ulong *ptr_binlog_cache_disk_use_arg,
269 const IO_CACHE &cache_log_arg)
270 : cache_log(cache_log_arg),
271 m_pending(0),
272 saved_max_binlog_cache_size(max_binlog_cache_size_arg),
273 ptr_binlog_cache_use(ptr_binlog_cache_use_arg),
274 ptr_binlog_cache_disk_use(ptr_binlog_cache_disk_use_arg)
275 {
276 reset();
277 flags.transactional= trx_cache_arg;
278 cache_log.end_of_file= saved_max_binlog_cache_size;
279 }
280
281 int finalize(THD *thd, Log_event *end_event);
282 int finalize(THD *thd, Log_event *end_event, XID_STATE *xs);
283 int flush(THD *thd, my_off_t *bytes, bool *wrote_xid);
284 int write_event(THD *thd, Log_event *event);
285
~binlog_cache_data()286 virtual ~binlog_cache_data()
287 {
288 assert(is_binlog_empty());
289 close_cached_file(&cache_log);
290 }
291
is_binlog_empty() const292 bool is_binlog_empty() const
293 {
294 my_off_t pos= my_b_tell(&cache_log);
295 DBUG_PRINT("debug", ("%s_cache - pending: 0x%llx, bytes: %llu",
296 (flags.transactional ? "trx" : "stmt"),
297 (ulonglong) pending(), (ulonglong) pos));
298 return pending() == NULL && pos == 0;
299 }
300
is_finalized() const301 bool is_finalized() const {
302 return flags.finalized;
303 }
304
pending() const305 Rows_log_event *pending() const
306 {
307 return m_pending;
308 }
309
set_pending(Rows_log_event * const pending)310 void set_pending(Rows_log_event *const pending)
311 {
312 m_pending= pending;
313 }
314
set_incident(void)315 void set_incident(void)
316 {
317 flags.incident= true;
318 }
319
has_incident(void) const320 bool has_incident(void) const
321 {
322 return flags.incident;
323 }
324
325 /**
326 Sets the binlog_cache_data::Flags::flush_error flag if there
327 is an error while flushing cache to the file.
328
329 @param thd The client thread that is executing the transaction.
330 */
set_flush_error(THD * thd)331 void set_flush_error(THD *thd)
332 {
333 flags.flush_error= true;
334 if(is_trx_cache())
335 {
336 /*
337 If the cache is a transactional cache and if the write
338 has failed due to ENOSPC, then my_write() would have
339 set EE_WRITE error, so clear the error and create an
340 equivalent server error.
341 */
342 if (thd->is_error())
343 thd->clear_error();
344 char errbuf[MYSYS_STRERROR_SIZE];
345 my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), my_filename(cache_log.file),
346 errno, my_strerror(errbuf, sizeof(errbuf), errno));
347 }
348 }
349
get_flush_error(void) const350 bool get_flush_error(void) const
351 {
352 return flags.flush_error;
353 }
354
has_xid() const355 bool has_xid() const {
356 // There should only be an XID event if we are transactional
357 assert((flags.transactional && flags.with_xid) || !flags.with_xid);
358 return flags.with_xid;
359 }
360
is_trx_cache() const361 bool is_trx_cache() const
362 {
363 return flags.transactional;
364 }
365
get_byte_position() const366 my_off_t get_byte_position() const
367 {
368 return my_b_tell(&cache_log);
369 }
370
cache_state_rollback(my_off_t pos_to_rollback)371 void cache_state_rollback(my_off_t pos_to_rollback)
372 {
373 if (pos_to_rollback)
374 {
375 std::map<my_off_t,cache_state>::iterator it;
376 it = cache_state_map.find(pos_to_rollback);
377 if (it != cache_state_map.end())
378 {
379 flags.with_rbr= it->second.with_rbr;
380 flags.with_sbr= it->second.with_sbr;
381 flags.with_start= it->second.with_start;
382 flags.with_end= it->second.with_end;
383 flags.with_content= it->second.with_content;
384 }
385 else
386 assert(it == cache_state_map.end());
387 }
388 // Rolling back to pos == 0 means cleaning up the cache.
389 else
390 {
391 flags.with_rbr= false;
392 flags.with_sbr= false;
393 flags.with_start= false;
394 flags.with_end= false;
395 flags.with_content= false;
396 }
397 }
398
cache_state_checkpoint(my_off_t pos_to_checkpoint)399 void cache_state_checkpoint(my_off_t pos_to_checkpoint)
400 {
401 // We only need to store the cache state for pos > 0
402 if (pos_to_checkpoint)
403 {
404 cache_state state;
405 state.with_rbr= flags.with_rbr;
406 state.with_sbr= flags.with_sbr;
407 state.with_start= flags.with_start;
408 state.with_end= flags.with_end;
409 state.with_content= flags.with_content;
410 cache_state_map[pos_to_checkpoint]= state;
411 }
412 }
413
reset()414 virtual void reset()
415 {
416 compute_statistics();
417 truncate(0);
418
419 /*
420 If IOCACHE has a file associated, change its size to 0.
421 It is safer to do it here, since we are certain that one
422 asked the cache to go to position 0 with truncate.
423 */
424 if(cache_log.file != -1)
425 {
426 int error= 0;
427 if((error= my_chsize(cache_log.file, 0, 0, MYF(MY_WME))))
428 sql_print_warning("Unable to resize binlog IOCACHE auxilary file");
429
430 DBUG_EXECUTE_IF("show_io_cache_size",
431 {
432 my_off_t file_size= my_seek(cache_log.file,
433 0L,MY_SEEK_END,MYF(MY_WME+MY_FAE));
434 sql_print_error("New size:%llu",
435 static_cast<ulonglong>(file_size));
436 });
437 }
438
439 flags.incident= false;
440 flags.with_xid= false;
441 flags.immediate= false;
442 flags.finalized= false;
443 flags.with_sbr= false;
444 flags.with_rbr= false;
445 flags.with_start= false;
446 flags.with_end= false;
447 flags.with_content= false;
448 flags.flush_error= false;
449
450 /*
451 The truncate function calls reinit_io_cache that calls my_b_flush_io_cache
452 which may increase disk_writes. This breaks the disk_writes use by the
453 binary log which aims to compute the ratio between in-memory cache usage
454 and disk cache usage. To avoid this undesirable behavior, we reset the
455 variable after truncating the cache.
456 */
457 cache_log.disk_writes= 0;
458 cache_state_map.clear();
459 assert(is_binlog_empty());
460 }
461
462 /*
463 Sets the write position to point at the position given. If the
464 cache has swapped to a file, it reinitializes it, so that the
465 proper data is added to the IO_CACHE buffer. Otherwise, it just
466 does a my_b_seek.
467
468 my_b_seek will not work if the cache has swapped, that's why
469 we do this workaround.
470
471 @param[IN] pos the new write position.
472 @param[IN] use_reinit if the position should be reset resorting
473 to reset_io_cache (which may issue a flush_io_cache
474 inside)
475
476 @return The previous write position.
477 */
reset_write_pos(my_off_t pos,bool use_reinit)478 my_off_t reset_write_pos(my_off_t pos, bool use_reinit)
479 {
480 DBUG_ENTER("reset_write_pos");
481 assert(cache_log.type == WRITE_CACHE);
482
483 my_off_t oldpos= get_byte_position();
484
485 if (use_reinit)
486 reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, 0);
487 else
488 my_b_seek(&cache_log, pos);
489
490 DBUG_RETURN(oldpos);
491 }
492
493 /*
494 Cache to store data before copying it to the binary log.
495 */
496 IO_CACHE cache_log;
497
498 /**
499 Returns information about the cache content with respect to
500 the binlog_format of the events.
501
502 This will be used to set a flag on GTID_LOG_EVENT stating that the
503 transaction may have SBR statements or not, but the binlog dump
504 will show this flag as "rbr_only" when it is not set. That's why
505 an empty transaction should return true below, or else an empty
506 transaction would be assumed as "rbr_only" even not having RBR
507 events.
508
509 When dumping a binary log content using mysqlbinlog client program,
510 for any transaction assumed as "rbr_only" it will be printed a
511 statement changing the transaction isolation level to READ COMMITTED.
512 It doesn't make sense to have an empty transaction "requiring" this
513 isolation level change.
514
515 @return true The cache have SBR events or is empty.
516 @return false The cache contains a transaction with no SBR events.
517 */
may_have_sbr_stmts()518 bool may_have_sbr_stmts()
519 {
520 return flags.with_sbr || !flags.with_rbr;
521 }
522
523 /**
524 Check if the binlog cache contains an empty transaction, which has
525 two binlog events "BEGIN" and "COMMIT".
526
527 @return true The binlog cache contains an empty transaction.
528 @return false Otherwise.
529 */
has_empty_transaction()530 bool has_empty_transaction()
531 {
532 /*
533 The empty transaction has two events in trx/stmt binlog cache
534 and no changes (no SBR changing content and no RBR events).
535 Other transaction should not have two events. So we can identify
536 if this is an empty transaction by the event counter and the
537 cache flags.
538 */
539 if (flags.with_start && // Has transaction start statement
540 flags.with_end && // Has transaction end statement
541 !flags.with_sbr && // No statements changing content
542 !flags.with_rbr && // No rows changing content
543 !flags.immediate && // Not a DDL
544 !flags.with_xid && // Not a XID transaction and not an atomic DDL Query
545 !flags.with_content)// Does not have any content
546 {
547 assert(!flags.with_sbr); // No statements changing content
548 assert(!flags.with_rbr); // No rows changing content
549 assert(!flags.immediate);// Not a DDL
550 assert(!flags.with_xid); // Not a XID trx and not an atomic DDL Query
551
552 return true;
553 }
554 return false;
555 }
556
557 /**
558 Check if the binlog cache is empty or contains an empty transaction,
559 which has two binlog events "BEGIN" and "COMMIT".
560
561 @return true The binlog cache is empty or contains an empty transaction.
562 @return false Otherwise.
563 */
is_empty_or_has_empty_transaction()564 bool is_empty_or_has_empty_transaction()
565 {
566 return is_binlog_empty() || has_empty_transaction();
567 }
568
569 protected:
570 /*
571 This structure should have all cache variables/flags that should be restored
572 when a ROLLBACK TO SAVEPOINT statement be executed.
573 */
574 struct cache_state
575 {
576 bool with_sbr;
577 bool with_rbr;
578 bool with_start;
579 bool with_end;
580 bool with_content;
581 };
582 /*
583 For every SAVEPOINT used, we will store a cache_state for the current
584 binlog cache position. So, if a ROLLBACK TO SAVEPOINT is used, we can
585 restore the cache_state values after truncating the binlog cache.
586 */
587 std::map<my_off_t, cache_state> cache_state_map;
588
589 /*
590 It truncates the cache to a certain position. This includes deleting the
591 pending event.
592 */
truncate(my_off_t pos)593 void truncate(my_off_t pos)
594 {
595 DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos));
596 remove_pending_event();
597 /*
598 Whenever there is an error while flushing cache to file,
599 the local cache will not be in a normal state and the same
600 cache cannot be used without facing an assert.
601 So, clear the cache if there is a flush error.
602 */
603 reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, get_flush_error());
604 cache_log.end_of_file= saved_max_binlog_cache_size;
605 }
606
607 /**
608 Flush pending event to the cache buffer.
609 */
flush_pending_event(THD * thd)610 int flush_pending_event(THD *thd) {
611 if (m_pending)
612 {
613 m_pending->set_flags(Rows_log_event::STMT_END_F);
614 if (int error= write_event(thd, m_pending))
615 return error;
616 thd->clear_binlog_table_maps();
617 }
618 return 0;
619 }
620
621 /**
622 Remove the pending event.
623 */
remove_pending_event()624 int remove_pending_event() {
625 delete m_pending;
626 m_pending= NULL;
627 return 0;
628 }
629 struct Flags {
630 /*
631 Defines if this is either a trx-cache or stmt-cache, respectively, a
632 transactional or non-transactional cache.
633 */
634 bool transactional:1;
635
636 /*
637 This indicates that some events did not get into the cache and most likely
638 it is corrupted.
639 */
640 bool incident:1;
641
642 /*
643 This indicates that the cache should be written without BEGIN/END.
644 */
645 bool immediate:1;
646
647 /*
648 This flag indicates that the buffer was finalized and has to be
649 flushed to disk.
650 */
651 bool finalized:1;
652
653 /*
654 This indicates that the cache contain an XID event.
655 */
656 bool with_xid:1;
657
658 /*
659 This indicates that the cache contain statements changing content.
660 */
661 bool with_sbr:1;
662
663 /*
664 This indicates that the cache contain RBR event changing content.
665 */
666 bool with_rbr:1;
667
668 /*
669 This indicates that the cache contain s transaction start statement.
670 */
671 bool with_start:1;
672
673 /*
674 This indicates that the cache contain a transaction end event.
675 */
676 bool with_end:1;
677
678 /*
679 This indicates that the cache contain content other than START/END.
680 */
681 bool with_content:1;
682
683 /*
684 This flag is set to 'true' when there is an error while flushing the
685 I/O cache to file.
686 */
687 bool flush_error:1;
688 } flags;
689
690 private:
691 /*
692 Pending binrows event. This event is the event where the rows are currently
693 written.
694 */
695 Rows_log_event *m_pending;
696
697 /**
698 This function computes binlog cache and disk usage.
699 */
compute_statistics()700 void compute_statistics()
701 {
702 if (!is_binlog_empty())
703 {
704 (*ptr_binlog_cache_use)++;
705 if (cache_log.disk_writes != 0)
706 (*ptr_binlog_cache_disk_use)++;
707 }
708 }
709
710 /*
711 Stores the values of maximum size of the cache allowed when this cache
712 is configured. This corresponds to either
713 . max_binlog_cache_size or max_binlog_stmt_cache_size.
714 */
715 my_off_t saved_max_binlog_cache_size;
716
717 /*
718 Stores a pointer to the status variable that keeps track of the in-memory
719 cache usage. This corresponds to either
720 . binlog_cache_use or binlog_stmt_cache_use.
721 */
722 ulong *ptr_binlog_cache_use;
723
724 /*
725 Stores a pointer to the status variable that keeps track of the disk
726 cache usage. This corresponds to either
727 . binlog_cache_disk_use or binlog_stmt_cache_disk_use.
728 */
729 ulong *ptr_binlog_cache_disk_use;
730
731 binlog_cache_data& operator=(const binlog_cache_data& info);
732 binlog_cache_data(const binlog_cache_data& info);
733 };
734
735
736 class binlog_stmt_cache_data
737 : public binlog_cache_data
738 {
739 public:
binlog_stmt_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg,const IO_CACHE & cache_log)740 binlog_stmt_cache_data(bool trx_cache_arg,
741 my_off_t max_binlog_cache_size_arg,
742 ulong *ptr_binlog_cache_use_arg,
743 ulong *ptr_binlog_cache_disk_use_arg,
744 const IO_CACHE &cache_log)
745 : binlog_cache_data(trx_cache_arg,
746 max_binlog_cache_size_arg,
747 ptr_binlog_cache_use_arg,
748 ptr_binlog_cache_disk_use_arg,
749 cache_log)
750 {
751 }
752
753 using binlog_cache_data::finalize;
754
755 int finalize(THD *thd);
756 };
757
758
759 int
finalize(THD * thd)760 binlog_stmt_cache_data::finalize(THD *thd)
761 {
762 if (flags.immediate)
763 {
764 if (int error= finalize(thd, NULL))
765 return error;
766 }
767 else
768 {
769 Query_log_event
770 end_evt(thd, STRING_WITH_LEN("COMMIT"), false, false, true, 0, true);
771 if (int error= finalize(thd, &end_evt))
772 return error;
773 }
774 return 0;
775 }
776
777
778 class binlog_trx_cache_data : public binlog_cache_data
779 {
780 public:
binlog_trx_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg,const IO_CACHE & cache_log)781 binlog_trx_cache_data(bool trx_cache_arg,
782 my_off_t max_binlog_cache_size_arg,
783 ulong *ptr_binlog_cache_use_arg,
784 ulong *ptr_binlog_cache_disk_use_arg,
785 const IO_CACHE &cache_log)
786 : binlog_cache_data(trx_cache_arg,
787 max_binlog_cache_size_arg,
788 ptr_binlog_cache_use_arg,
789 ptr_binlog_cache_disk_use_arg,
790 cache_log),
791 m_cannot_rollback(FALSE), before_stmt_pos(MY_OFF_T_UNDEF)
792 { }
793
reset()794 void reset()
795 {
796 DBUG_ENTER("reset");
797 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
798 m_cannot_rollback= FALSE;
799 before_stmt_pos= MY_OFF_T_UNDEF;
800 binlog_cache_data::reset();
801 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
802 DBUG_VOID_RETURN;
803 }
804
cannot_rollback() const805 bool cannot_rollback() const
806 {
807 return m_cannot_rollback;
808 }
809
set_cannot_rollback()810 void set_cannot_rollback()
811 {
812 m_cannot_rollback= TRUE;
813 }
814
get_prev_position() const815 my_off_t get_prev_position() const
816 {
817 return before_stmt_pos;
818 }
819
set_prev_position(my_off_t pos)820 void set_prev_position(my_off_t pos)
821 {
822 DBUG_ENTER("set_prev_position");
823 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
824 before_stmt_pos= pos;
825 cache_state_checkpoint(before_stmt_pos);
826 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
827 DBUG_VOID_RETURN;
828 }
829
restore_prev_position()830 void restore_prev_position()
831 {
832 DBUG_ENTER("restore_prev_position");
833 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
834 binlog_cache_data::truncate(before_stmt_pos);
835 cache_state_rollback(before_stmt_pos);
836 before_stmt_pos= MY_OFF_T_UNDEF;
837 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
838 DBUG_VOID_RETURN;
839 }
840
restore_savepoint(my_off_t pos)841 void restore_savepoint(my_off_t pos)
842 {
843 DBUG_ENTER("restore_savepoint");
844 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
845 binlog_cache_data::truncate(pos);
846 if (pos <= before_stmt_pos)
847 before_stmt_pos= MY_OFF_T_UNDEF;
848 cache_state_rollback(pos);
849 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
850 DBUG_VOID_RETURN;
851 }
852
853 using binlog_cache_data::truncate;
854
855 int truncate(THD *thd, bool all);
856
857 private:
858 /*
859 It will be set TRUE if any statement which cannot be rolled back safely
860 is put in trx_cache.
861 */
862 bool m_cannot_rollback;
863
864 /*
865 Binlog position before the start of the current statement.
866 */
867 my_off_t before_stmt_pos;
868
869 binlog_trx_cache_data& operator=(const binlog_trx_cache_data& info);
870 binlog_trx_cache_data(const binlog_trx_cache_data& info);
871 };
872
873 class binlog_cache_mngr {
874 public:
binlog_cache_mngr(my_off_t max_binlog_stmt_cache_size_arg,ulong * ptr_binlog_stmt_cache_use_arg,ulong * ptr_binlog_stmt_cache_disk_use_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg,const IO_CACHE & stmt_cache_log,const IO_CACHE & trx_cache_log)875 binlog_cache_mngr(my_off_t max_binlog_stmt_cache_size_arg,
876 ulong *ptr_binlog_stmt_cache_use_arg,
877 ulong *ptr_binlog_stmt_cache_disk_use_arg,
878 my_off_t max_binlog_cache_size_arg,
879 ulong *ptr_binlog_cache_use_arg,
880 ulong *ptr_binlog_cache_disk_use_arg,
881 const IO_CACHE &stmt_cache_log,
882 const IO_CACHE &trx_cache_log)
883 : stmt_cache(FALSE, max_binlog_stmt_cache_size_arg,
884 ptr_binlog_stmt_cache_use_arg,
885 ptr_binlog_stmt_cache_disk_use_arg,
886 stmt_cache_log),
887 trx_cache(TRUE, max_binlog_cache_size_arg,
888 ptr_binlog_cache_use_arg,
889 ptr_binlog_cache_disk_use_arg,
890 trx_cache_log),
891 has_logged_xid(NULL)
892 { }
893
get_binlog_cache_data(bool is_transactional)894 binlog_cache_data* get_binlog_cache_data(bool is_transactional)
895 {
896 if (is_transactional)
897 return &trx_cache;
898 else
899 return &stmt_cache;
900 }
901
get_binlog_cache_log(bool is_transactional)902 IO_CACHE* get_binlog_cache_log(bool is_transactional)
903 {
904 return (is_transactional ? &trx_cache.cache_log : &stmt_cache.cache_log);
905 }
906
907 /**
908 Convenience method to check if both caches are empty.
909 */
is_binlog_empty() const910 bool is_binlog_empty() const {
911 return stmt_cache.is_binlog_empty() && trx_cache.is_binlog_empty();
912 }
913
914 /*
915 clear stmt_cache and trx_cache if they are not empty
916 */
reset()917 void reset()
918 {
919 if (!stmt_cache.is_binlog_empty())
920 stmt_cache.reset();
921 if (!trx_cache.is_binlog_empty())
922 trx_cache.reset();
923 }
924
925 #ifndef NDEBUG
dbug_any_finalized() const926 bool dbug_any_finalized() const {
927 return stmt_cache.is_finalized() || trx_cache.is_finalized();
928 }
929 #endif
930
931 /*
932 Convenience method to flush both caches to the binary log.
933
934 @param bytes_written Pointer to variable that will be set to the
935 number of bytes written for the flush.
936 @param wrote_xid Pointer to variable that will be set to @c
937 true if any XID event was written to the
938 binary log. Otherwise, the variable will not
939 be touched.
940 @return Error code on error, zero if no error.
941 */
flush(THD * thd,my_off_t * bytes_written,bool * wrote_xid)942 int flush(THD *thd, my_off_t *bytes_written, bool *wrote_xid)
943 {
944 my_off_t stmt_bytes= 0;
945 my_off_t trx_bytes= 0;
946 assert(stmt_cache.has_xid() == 0);
947 int error= stmt_cache.flush(thd, &stmt_bytes, wrote_xid);
948 if (error)
949 return error;
950 DEBUG_SYNC(thd, "after_flush_stm_cache_before_flush_trx_cache");
951 if (int error= trx_cache.flush(thd, &trx_bytes, wrote_xid))
952 return error;
953 *bytes_written= stmt_bytes + trx_bytes;
954 return 0;
955 }
956
957 /**
958 Check if at least one of transacaction and statement binlog caches
959 contains an empty transaction, other one is empty or contains an
960 empty transaction.
961
962 @return true At least one of transacaction and statement binlog
963 caches an empty transaction, other one is emptry
964 or contains an empty transaction.
965 @return false Otherwise.
966 */
has_empty_transaction()967 bool has_empty_transaction()
968 {
969 return (trx_cache.is_empty_or_has_empty_transaction() &&
970 stmt_cache.is_empty_or_has_empty_transaction() &&
971 !is_binlog_empty());
972 }
973
974 binlog_stmt_cache_data stmt_cache;
975 binlog_trx_cache_data trx_cache;
976 /*
977 The bool flag is for preventing do_binlog_xa_commit_rollback()
978 execution twice which can happen for "external" xa commit/rollback.
979 */
980 bool has_logged_xid;
981 private:
982
983 binlog_cache_mngr& operator=(const binlog_cache_mngr& info);
984 binlog_cache_mngr(const binlog_cache_mngr& info);
985 };
986
987
thd_get_cache_mngr(const THD * thd)988 static binlog_cache_mngr *thd_get_cache_mngr(const THD *thd)
989 {
990 /*
991 If opt_bin_log is not set, binlog_hton->slot == -1 and hence
992 thd_get_ha_data(thd, hton) segfaults.
993 */
994 #ifndef WITH_WSREP
995 assert(opt_bin_log);
996 #endif
997 return (binlog_cache_mngr *)thd_get_ha_data(thd, binlog_hton);
998 }
999
1000
1001 /**
1002 Checks if the BINLOG_CACHE_SIZE's value is greater than MAX_BINLOG_CACHE_SIZE.
1003 If this happens, the BINLOG_CACHE_SIZE is set to MAX_BINLOG_CACHE_SIZE.
1004 */
check_binlog_cache_size(THD * thd)1005 void check_binlog_cache_size(THD *thd)
1006 {
1007 if (binlog_cache_size > max_binlog_cache_size)
1008 {
1009 if (thd)
1010 {
1011 push_warning_printf(thd, Sql_condition::SL_WARNING,
1012 ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX,
1013 ER(ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX),
1014 (ulong) binlog_cache_size,
1015 (ulong) max_binlog_cache_size);
1016 }
1017 else
1018 {
1019 sql_print_warning(ER_DEFAULT(ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX),
1020 binlog_cache_size,
1021 (ulong) max_binlog_cache_size);
1022 }
1023 binlog_cache_size= static_cast<ulong>(max_binlog_cache_size);
1024 }
1025 }
1026
1027 /**
1028 Checks if the BINLOG_STMT_CACHE_SIZE's value is greater than MAX_BINLOG_STMT_CACHE_SIZE.
1029 If this happens, the BINLOG_STMT_CACHE_SIZE is set to MAX_BINLOG_STMT_CACHE_SIZE.
1030 */
check_binlog_stmt_cache_size(THD * thd)1031 void check_binlog_stmt_cache_size(THD *thd)
1032 {
1033 if (binlog_stmt_cache_size > max_binlog_stmt_cache_size)
1034 {
1035 if (thd)
1036 {
1037 push_warning_printf(thd, Sql_condition::SL_WARNING,
1038 ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX,
1039 ER(ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX),
1040 (ulong) binlog_stmt_cache_size,
1041 (ulong) max_binlog_stmt_cache_size);
1042 }
1043 else
1044 {
1045 sql_print_warning(ER_DEFAULT(ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX),
1046 binlog_stmt_cache_size,
1047 (ulong) max_binlog_stmt_cache_size);
1048 }
1049 binlog_stmt_cache_size= static_cast<ulong>(max_binlog_stmt_cache_size);
1050 }
1051 }
1052
1053 /**
1054 Check whether binlog_hton has valid slot and enabled
1055 */
binlog_enabled()1056 bool binlog_enabled()
1057 {
1058 return(binlog_hton && binlog_hton->slot != HA_SLOT_UNDEF);
1059 }
1060
1061 /*
1062 Save position of binary log transaction cache.
1063
1064 SYNPOSIS
1065 binlog_trans_log_savepos()
1066
1067 thd The thread to take the binlog data from
1068 pos Pointer to variable where the position will be stored
1069
1070 DESCRIPTION
1071
1072 Save the current position in the binary log transaction cache into
1073 the variable pointed to by 'pos'
1074 */
1075
1076 static void
binlog_trans_log_savepos(THD * thd,my_off_t * pos)1077 binlog_trans_log_savepos(THD *thd, my_off_t *pos)
1078 {
1079 DBUG_ENTER("binlog_trans_log_savepos");
1080 assert(pos != NULL);
1081 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
1082 #ifdef WITH_WSREP
1083 assert((WSREP_EMULATE_BINLOG(thd)) || mysql_bin_log.is_open());
1084 #else
1085 assert(mysql_bin_log.is_open());
1086 #endif /* WITH_WSREP */
1087 *pos= cache_mngr->trx_cache.get_byte_position();
1088 DBUG_PRINT("return", ("position: %lu", (ulong) *pos));
1089 cache_mngr->trx_cache.cache_state_checkpoint(*pos);
1090 DBUG_VOID_RETURN;
1091 }
1092
binlog_dummy_recover(handlerton * hton,XID * xid,uint len)1093 static int binlog_dummy_recover(handlerton *hton, XID *xid, uint len)
1094 {
1095 return 0;
1096 }
1097
1098 /**
1099 Auxiliary class to copy serialized events to the binary log and
1100 correct some of the fields that are not known until just before
1101 writing the event.
1102
1103 This class allows feeding events in parts, so it is practical to use
1104 in do_write_cache() which reads events from an IO_CACHE where events
1105 may span mutiple cache pages.
1106
1107 The following fields are fixed before writing the event:
1108 - end_log_pos is set
1109 - the checksum is computed if checksums are enabled
1110 - the length is incremented by the checksum size if checksums are enabled
1111 */
1112 class Binlog_event_writer
1113 {
1114 IO_CACHE *output_cache;
1115 bool have_checksum;
1116 ha_checksum initial_checksum;
1117 ha_checksum checksum;
1118 uint32 end_log_pos;
1119
1120 public:
1121 /**
1122 Constructs a new Binlog_event_writer. Should be called once before
1123 starting to flush the transaction or statement cache to the
1124 binlog.
1125
1126 @param output_cache_arg IO_CACHE to write to.
1127 @param have_checksum_al
1128 */
Binlog_event_writer(IO_CACHE * output_cache_arg)1129 Binlog_event_writer(IO_CACHE *output_cache_arg)
1130 : output_cache(output_cache_arg),
1131 have_checksum(binlog_checksum_options !=
1132 binary_log::BINLOG_CHECKSUM_ALG_OFF),
1133 initial_checksum(my_checksum(0L, NULL, 0)),
1134 checksum(initial_checksum),
1135 end_log_pos(my_b_tell(output_cache))
1136 {
1137 // Simulate checksum error
1138 if (DBUG_EVALUATE_IF("fault_injection_crc_value", 1, 0))
1139 checksum--;
1140 }
1141
1142 /**
1143 Write part of an event to disk.
1144
1145 @param buf_p[IN,OUT] Points to buffer with data to write. The
1146 caller must set this initially, and it will be increased by the
1147 number of bytes written.
1148
1149 @param buf_len_p[IN,OUT] Points to the remaining length of the
1150 buffer, i.e., from buf_p to the end of the buffer. The caller
1151 must set this initially, and it will be decreased by the number of
1152 written bytes.
1153
1154 @param event_len_p[IN,OUT] Points to the remaining length of the
1155 event, i.e., the size of the event minus what was already written.
1156 This must be initialized to zero by the caller, must be remembered
1157 by the caller between calls, and is updated by this function: when
1158 an event begins it is set to the length of the event, and for each
1159 call it is decreased by the number of written bytes.
1160
1161 It is allowed that buf_len_p is less than event_len_p (i.e., event
1162 is only partial) and that event_len_p is less than buf_len_p
1163 (i.e., there is more than this event in the buffer). This
1164 function will write as much as is available of one event, but
1165 never more than one. It is required that buf_len_p >=
1166 LOG_EVENT_HEADER_LEN.
1167
1168 @retval true Error, i.e., my_b_write failed.
1169 @retval false Success.
1170 */
write_event_part(uchar ** buf_p,uint32 * buf_len_p,uint32 * event_len_p)1171 bool write_event_part(uchar **buf_p, uint32 *buf_len_p, uint32 *event_len_p)
1172 {
1173 DBUG_ENTER("Binlog_event_writer::write_event_part");
1174
1175 if (*buf_len_p == 0)
1176 DBUG_RETURN(false);
1177
1178 // This is the beginning of an event
1179 if (*event_len_p == 0)
1180 {
1181 // Caller must ensure that the first part of the event contains
1182 // a full event header.
1183 assert(*buf_len_p >= LOG_EVENT_HEADER_LEN);
1184
1185 // Read event length
1186 *event_len_p= uint4korr(*buf_p + EVENT_LEN_OFFSET);
1187
1188 // Increase end_log_pos
1189 end_log_pos+= *event_len_p;
1190
1191 // Change event length if checksum is enabled
1192 if (have_checksum)
1193 {
1194 int4store(*buf_p + EVENT_LEN_OFFSET,
1195 *event_len_p + BINLOG_CHECKSUM_LEN);
1196 // end_log_pos is shifted by the checksum length
1197 end_log_pos+= BINLOG_CHECKSUM_LEN;
1198 }
1199
1200 // Store end_log_pos
1201 int4store(*buf_p + LOG_POS_OFFSET, end_log_pos);
1202 }
1203
1204 // write the buffer
1205 uint32 write_bytes= std::min<uint32>(*buf_len_p, *event_len_p);
1206 assert(write_bytes > 0);
1207 if (my_b_write(output_cache, *buf_p, write_bytes))
1208 DBUG_RETURN(true);
1209
1210 // update the checksum
1211 if (have_checksum)
1212 checksum= my_checksum(checksum, *buf_p, write_bytes);
1213
1214 // Step positions.
1215 *buf_p+= write_bytes;
1216 *buf_len_p-= write_bytes;
1217 *event_len_p-= write_bytes;
1218
1219 if (have_checksum)
1220 {
1221 // store checksum
1222 if (*event_len_p == 0)
1223 {
1224 char checksum_buf[BINLOG_CHECKSUM_LEN];
1225 int4store(checksum_buf, checksum);
1226 if (my_b_write(output_cache, checksum_buf, BINLOG_CHECKSUM_LEN))
1227 DBUG_RETURN(true);
1228 checksum= initial_checksum;
1229 }
1230 }
1231
1232 DBUG_RETURN(false);
1233 }
1234
1235 /**
1236 Write a full event to disk.
1237
1238 This is a wrapper around write_event_part, which handles the
1239 special case where you have a complete event in the buffer.
1240
1241 @param buf Buffer to write.
1242 @param buf_len Number of bytes to write.
1243
1244 @retval true Error, i.e., my_b_write failed.
1245 @retval false Success.
1246 */
write_full_event(uchar * buf,uint32 buf_len)1247 bool write_full_event(uchar *buf, uint32 buf_len)
1248 {
1249 uint32 event_len_unused= 0;
1250 bool ret= write_event_part(&buf, &buf_len, &event_len_unused);
1251 assert(buf_len == 0);
1252 assert(event_len_unused == 0);
1253 return ret;
1254 }
1255
1256 };
1257
1258
1259 /*
1260 this function is mostly a placeholder.
1261 conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open)
1262 should be moved here.
1263 */
1264
binlog_init(void * p)1265 static int binlog_init(void *p)
1266 {
1267 binlog_hton= (handlerton *)p;
1268 #ifdef WITH_WSREP
1269 if (WSREP_ON)
1270 binlog_hton->state= SHOW_OPTION_YES;
1271 else
1272 {
1273 #endif /* WITH_WSREP */
1274 binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO;
1275 #ifdef WITH_WSREP
1276 }
1277 #endif /* WITH_WSREP */
1278 binlog_hton->db_type=DB_TYPE_BINLOG;
1279 binlog_hton->savepoint_offset= sizeof(my_off_t);
1280 binlog_hton->close_connection= binlog_close_connection;
1281 binlog_hton->savepoint_set= binlog_savepoint_set;
1282 binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
1283 binlog_hton->savepoint_rollback_can_release_mdl=
1284 binlog_savepoint_rollback_can_release_mdl;
1285 binlog_hton->commit= binlog_commit;
1286 binlog_hton->commit_by_xid= binlog_xa_commit;
1287 binlog_hton->rollback= binlog_rollback;
1288 binlog_hton->rollback_by_xid= binlog_xa_rollback;
1289 binlog_hton->prepare= binlog_prepare;
1290 binlog_hton->recover=binlog_dummy_recover;
1291 binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
1292 return 0;
1293 }
1294
1295 #ifdef WITH_WSREP
1296 #include "wsrep_binlog.h"
1297 #endif /* WITH_WSREP */
1298
binlog_deinit(void * p)1299 static int binlog_deinit(void *p)
1300 {
1301 /* Using binlog as TC after the binlog has been unloaded, won't work */
1302 if (tc_log == &mysql_bin_log)
1303 tc_log= NULL;
1304 binlog_hton= NULL;
1305 return 0;
1306 }
1307
1308
binlog_close_connection(handlerton * hton,THD * thd)1309 static int binlog_close_connection(handlerton *hton, THD *thd)
1310 {
1311 DBUG_ENTER("binlog_close_connection");
1312 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
1313 #ifdef WITH_WSREP
1314 if (!cache_mngr->is_binlog_empty()) {
1315 IO_CACHE* cache= get_trans_log(thd, true);
1316 uchar *buf= NULL;
1317 size_t len= 0;
1318 wsrep_write_cache_buf(cache, &buf, &len);
1319 WSREP_WARN("binlog trx cache not empty (%llu bytes) @ connection close %llu",
1320 (unsigned long long) len, (unsigned long long) thd->thread_id());
1321 if (len > 0) wsrep_dump_rbr_buf(thd, buf, len);
1322
1323 cache = cache_mngr->get_binlog_cache_log(false);
1324 wsrep_write_cache_buf(cache, &buf, &len);
1325 WSREP_WARN("binlog stmt cache not empty (%llu bytes) @ connection close %llu",
1326 (unsigned long long) len, (unsigned long long) thd->thread_id());
1327 if (len > 0) wsrep_dump_rbr_buf(thd, buf, len);
1328 }
1329 #endif /* WITH_WSREP */
1330 assert(cache_mngr->is_binlog_empty());
1331 DBUG_PRINT("debug", ("Set ha_data slot %d to 0x%llx", binlog_hton->slot, (ulonglong) NULL));
1332 thd_set_ha_data(thd, binlog_hton, NULL);
1333 cache_mngr->~binlog_cache_mngr();
1334 my_free(cache_mngr);
1335 DBUG_RETURN(0);
1336 }
1337
write_event(THD * thd,Log_event * ev)1338 int binlog_cache_data::write_event(THD *thd, Log_event *ev)
1339 {
1340 DBUG_ENTER("binlog_cache_data::write_event");
1341
1342 if (ev != NULL)
1343 {
1344 DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
1345 {DBUG_SET("+d,simulate_file_write_error");});
1346
1347 DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
1348 {
1349 static int count= -1;
1350 count++;
1351 if(count %4 == 3 && ev->get_type_code() ==
1352 binary_log::WRITE_ROWS_EVENT)
1353 DBUG_SET("+d,simulate_temp_file_write_error");
1354 });
1355 if (ev->write(&cache_log) != 0)
1356 {
1357 DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
1358 {
1359 DBUG_SET("-d,simulate_file_write_error");
1360 DBUG_SET("-d,simulate_disk_full_at_flush_pending");
1361 /*
1362 after +d,simulate_file_write_error the local cache
1363 is in unsane state. Since -d,simulate_file_write_error
1364 revokes the first simulation do_write_cache()
1365 can't be run without facing an assert.
1366 So it's blocked with the following 2nd simulation:
1367 */
1368 DBUG_SET("+d,simulate_do_write_cache_failure");
1369 });
1370
1371 DBUG_EXECUTE_IF("simulate_temp_file_write_error",
1372 {
1373 DBUG_SET("-d,simulate_temp_file_write_error");
1374 });
1375 /*
1376 If the flush has failed due to ENOSPC error, set the
1377 flush_error flag.
1378 */
1379 if (thd->is_error() && my_errno() == ENOSPC)
1380 {
1381 set_flush_error(thd);
1382 }
1383 DBUG_RETURN(1);
1384 }
1385 if (ev->get_type_code() == binary_log::XID_EVENT)
1386 flags.with_xid= true;
1387 if (ev->is_using_immediate_logging())
1388 flags.immediate= true;
1389 /* With respect to the event type being written */
1390 if (ev->is_sbr_logging_format())
1391 flags.with_sbr= true;
1392 if (ev->is_rbr_logging_format())
1393 flags.with_rbr= true;
1394 #ifndef EMBEDDED_LIBRARY
1395 /* With respect to empty transactions */
1396 if (ev->starts_group())
1397 flags.with_start= true;
1398 if (ev->ends_group())
1399 flags.with_end= true;
1400 if ((!ev->starts_group() && !ev->ends_group())
1401 ||ev->get_type_code() == binary_log::VIEW_CHANGE_EVENT)
1402 flags.with_content= true;
1403 #endif
1404 }
1405 DBUG_RETURN(0);
1406 }
1407
assign_automatic_gtids_to_flush_group(THD * first_seen)1408 bool MYSQL_BIN_LOG::assign_automatic_gtids_to_flush_group(THD *first_seen)
1409 {
1410 DBUG_ENTER("MYSQL_BIN_LOG::assign_automatic_gtids_to_flush_group");
1411 bool error= false;
1412 bool is_global_sid_locked= false;
1413 rpl_sidno locked_sidno= 0;
1414
1415 for (THD *head= first_seen ; head ; head = head->next_to_commit)
1416 {
1417 assert(head->variables.gtid_next.type != UNDEFINED_GROUP);
1418
1419 /* Generate GTID */
1420 if (head->variables.gtid_next.type == AUTOMATIC_GROUP)
1421 {
1422 if (!is_global_sid_locked)
1423 {
1424 global_sid_lock->rdlock();
1425 is_global_sid_locked= true;
1426 }
1427 if (gtid_state->generate_automatic_gtid(head,
1428 head->get_transaction()->get_rpl_transaction_ctx()->get_sidno(),
1429 head->get_transaction()->get_rpl_transaction_ctx()->get_gno(),
1430 &locked_sidno)
1431 != RETURN_STATUS_OK)
1432 {
1433 head->commit_error= THD::CE_FLUSH_GNO_EXHAUSTED_ERROR;
1434 error= true;
1435 }
1436 }
1437 else
1438 {
1439 DBUG_PRINT("info", ("thd->variables.gtid_next.type=%d "
1440 "thd->owned_gtid.sidno=%d",
1441 head->variables.gtid_next.type,
1442 head->owned_gtid.sidno));
1443 if (head->variables.gtid_next.type == GTID_GROUP)
1444 assert(head->owned_gtid.sidno > 0);
1445 else
1446 {
1447 assert(head->variables.gtid_next.type == ANONYMOUS_GROUP);
1448 assert(head->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS);
1449 }
1450 }
1451 }
1452
1453 if (locked_sidno > 0)
1454 gtid_state->unlock_sidno(locked_sidno);
1455
1456 if (is_global_sid_locked)
1457 global_sid_lock->unlock();
1458
1459 DBUG_RETURN(error);
1460 }
1461
1462
1463 /**
1464 Write the Gtid_log_event to the binary log (prior to writing the
1465 statement or transaction cache).
1466
1467 @param thd Thread that is committing.
1468 @param cache_data The cache that is flushing.
1469 @param writer The event will be written to this Binlog_event_writer object.
1470
1471 @retval false Success.
1472 @retval true Error.
1473 */
write_gtid(THD * thd,binlog_cache_data * cache_data,Binlog_event_writer * writer)1474 bool MYSQL_BIN_LOG::write_gtid(THD *thd, binlog_cache_data *cache_data,
1475 Binlog_event_writer *writer)
1476 {
1477 DBUG_ENTER("MYSQL_BIN_LOG::write_gtid");
1478
1479 /*
1480 The GTID for the THD was assigned at
1481 assign_automatic_gtids_to_flush_group()
1482 */
1483 assert(thd->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS ||
1484 thd->owned_gtid.sidno > 0);
1485
1486 int64 sequence_number, last_committed;
1487 /* Generate logical timestamps for MTS */
1488 m_dependency_tracker.get_dependency(thd, sequence_number, last_committed);
1489
1490 /*
1491 In case both the transaction cache and the statement cache are
1492 non-empty, both will be flushed in sequence and logged as
1493 different transactions. Then the second transaction must only
1494 be executed after the first one has committed. Therefore, we
1495 need to set last_committed for the second transaction equal to
1496 last_committed for the first transaction. This is done in
1497 binlog_cache_data::flush. binlog_cache_data::flush uses the
1498 condition trn_ctx->last_committed==SEQ_UNINIT to detect this
1499 situation, hence the need to set it here.
1500 */
1501 thd->get_transaction()->last_committed= SEQ_UNINIT;
1502
1503
1504 /*
1505 Generate and write the Gtid_log_event.
1506 */
1507 Gtid_log_event gtid_event(thd, cache_data->is_trx_cache(),
1508 last_committed, sequence_number,
1509 cache_data->may_have_sbr_stmts());
1510 uchar buf[Gtid_log_event::MAX_EVENT_LENGTH];
1511 uint32 buf_len= gtid_event.write_to_memory(buf);
1512 bool ret= writer->write_full_event(buf, buf_len);
1513
1514 DBUG_RETURN(ret);
1515 }
1516
1517
gtid_end_transaction(THD * thd)1518 int MYSQL_BIN_LOG::gtid_end_transaction(THD *thd)
1519 {
1520 DBUG_ENTER("MYSQL_BIN_LOG::gtid_end_transaction");
1521
1522 DBUG_PRINT("info", ("query=%s", thd->query().str));
1523
1524 if (thd->owned_gtid.sidno > 0)
1525 {
1526 assert(thd->variables.gtid_next.type == GTID_GROUP);
1527
1528 if (!opt_bin_log || (thd->slave_thread && !opt_log_slave_updates))
1529 {
1530 /*
1531 If the binary log is disabled for this thread (either by
1532 log_bin=0 or sql_log_bin=0 or by log_slave_updates=0 for a
1533 slave thread), then the statement must not be written to the
1534 binary log. In this case, we just save the GTID into the
1535 table directly.
1536
1537 (This only happens for DDL, since DML will save the GTID into
1538 table and release ownership inside ha_commit_trans.)
1539 */
1540 if (gtid_state->save(thd) != 0)
1541 {
1542 gtid_state->update_on_rollback(thd);
1543 DBUG_RETURN(1);
1544 }
1545 else
1546 gtid_state->update_on_commit(thd);
1547 }
1548 else
1549 {
1550 /*
1551 If statement is supposed to be written to binlog, we write it
1552 to the binary log. Inserting into table and releasing
1553 ownership will be done in the binlog commit handler.
1554 */
1555
1556 /*
1557 thd->cache_mngr may be uninitialized if the first transaction
1558 executed by the client is empty.
1559 */
1560 if (thd->binlog_setup_trx_data())
1561 DBUG_RETURN(1);
1562 binlog_cache_data *cache_data= &thd_get_cache_mngr(thd)->trx_cache;
1563
1564 // Generate BEGIN event
1565 Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE,
1566 FALSE, TRUE, 0, TRUE);
1567 assert(!qinfo.is_using_immediate_logging());
1568
1569 #ifdef WITH_WSREP
1570 if (WSREP_ON && thd->slave_thread && !thd->wsrep_applier)
1571 {
1572 thd->wsrep_replicate_GTID= true;
1573 }
1574 #endif /* WITH_WSREP */
1575 /*
1576 Write BEGIN event and then commit (which will generate commit
1577 event and Gtid_log_event)
1578 */
1579 DBUG_PRINT("debug", ("Writing to trx_cache"));
1580 if (cache_data->write_event(thd, &qinfo) ||
1581 mysql_bin_log.commit(thd, true))
1582 DBUG_RETURN(1);
1583 }
1584 }
1585 else if (thd->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS ||
1586 /*
1587 A transaction with an empty owned gtid should call
1588 end_gtid_violating_transaction(...) to clear the
1589 flag thd->has_gtid_consistency_violatoin in case
1590 it is set. It missed the clear in ordered_commit,
1591 because its binlog transaction cache is empty.
1592 */
1593 thd->has_gtid_consistency_violation)
1594
1595 {
1596 gtid_state->update_on_commit(thd);
1597 }
1598 else if (thd->variables.gtid_next.type == GTID_GROUP &&
1599 thd->owned_gtid.is_empty())
1600 {
1601 assert(thd->has_gtid_consistency_violation == false);
1602 gtid_state->update_on_commit(thd);
1603 }
1604
1605 DBUG_RETURN(0);
1606 }
1607
1608 /**
1609 This function finalizes the cache preparing for commit or rollback.
1610
1611 The function just writes all the necessary events to the cache but
1612 does not flush the data to the binary log file. That is the role of
1613 the binlog_cache_data::flush function.
1614
1615 @see binlog_cache_data::flush
1616
1617 @param thd The thread whose transaction should be flushed
1618 @param cache_data Pointer to the cache
1619 @param end_ev The end event either commit/rollback
1620
1621 @return
1622 nonzero if an error pops up when flushing the cache.
1623 */
1624 int
finalize(THD * thd,Log_event * end_event)1625 binlog_cache_data::finalize(THD *thd, Log_event *end_event)
1626 {
1627 DBUG_ENTER("binlog_cache_data::finalize");
1628 if (!is_binlog_empty())
1629 {
1630 assert(!flags.finalized);
1631 if (int error= flush_pending_event(thd))
1632 DBUG_RETURN(error);
1633 if (int error= write_event(thd, end_event))
1634 DBUG_RETURN(error);
1635 flags.finalized= true;
1636 DBUG_PRINT("debug", ("flags.finalized: %s", YESNO(flags.finalized)));
1637 }
1638 DBUG_RETURN(0);
1639 }
1640
1641
1642 /**
1643 The method writes XA END query to XA-prepared transaction's cache
1644 and calls the "basic" finalize().
1645
1646 @return error code, 0 success
1647 */
1648
finalize(THD * thd,Log_event * end_event,XID_STATE * xs)1649 int binlog_cache_data::finalize(THD *thd, Log_event *end_event, XID_STATE *xs)
1650 {
1651 int error= 0;
1652 char buf[XID::ser_buf_size];
1653 char query[sizeof("XA END") + 1 + sizeof(buf)];
1654 int qlen= sprintf(query, "XA END %s", xs->get_xid()->serialize(buf));
1655 Query_log_event qev(thd, query, qlen, true, false, true, 0);
1656
1657 if ((error= write_event(thd, &qev)))
1658 return error;
1659
1660 return finalize(thd, end_event);
1661 }
1662
1663
1664 /**
1665 Flush caches to the binary log.
1666
1667 If the cache is finalized, the cache will be flushed to the binary
1668 log file. If the cache is not finalized, nothing will be done.
1669
1670 If flushing fails for any reason, an error will be reported and the
1671 cache will be reset. Flushing can fail in two circumstances:
1672
1673 - It was not possible to write the cache to the file. In this case,
1674 it does not make sense to keep the cache.
1675
1676 - The cache was successfully written to disk but post-flush actions
1677 (such as binary log rotation) failed. In this case, the cache is
1678 already written to disk and there is no reason to keep it.
1679
1680 @see binlog_cache_data::finalize
1681 */
1682 int
flush(THD * thd,my_off_t * bytes_written,bool * wrote_xid)1683 binlog_cache_data::flush(THD *thd, my_off_t *bytes_written, bool *wrote_xid)
1684 {
1685 /*
1686 Doing a commit or a rollback including non-transactional tables,
1687 i.e., ending a transaction where we might write the transaction
1688 cache to the binary log.
1689
1690 We can always end the statement when ending a transaction since
1691 transactions are not allowed inside stored functions. If they
1692 were, we would have to ensure that we're not ending a statement
1693 inside a stored function.
1694 */
1695
1696 DBUG_ENTER("binlog_cache_data::flush");
1697 DBUG_PRINT("debug", ("flags.finalized: %s", YESNO(flags.finalized)));
1698 int error= 0;
1699 if (flags.finalized)
1700 {
1701 my_off_t bytes_in_cache= my_b_tell(&cache_log);
1702 Transaction_ctx *trn_ctx= thd->get_transaction();
1703
1704 DBUG_PRINT("debug", ("bytes_in_cache: %llu", bytes_in_cache));
1705
1706 trn_ctx->sequence_number= mysql_bin_log.m_dependency_tracker.step();
1707 /*
1708 In case of two caches the transaction is split into two groups.
1709 The 2nd group is considered to be a successor of the 1st rather
1710 than to have a common commit parent with it.
1711 Notice that due to a simple method of detection that the current is
1712 the 2nd cache being flushed, the very first few transactions may be logged
1713 sequentially (a next one is tagged as if a preceding one is its
1714 commit parent).
1715 */
1716 if (trn_ctx->last_committed == SEQ_UNINIT)
1717 trn_ctx->last_committed= trn_ctx->sequence_number - 1;
1718
1719 /*
1720 The GTID is written prior to flushing the statement cache, if
1721 the transaction has written to the statement cache; and prior to
1722 flushing the transaction cache if the transaction has written to
1723 the transaction cache. If GTIDs are enabled, then transactional
1724 and non-transactional updates cannot be mixed, so at most one of
1725 the caches can be non-empty, so just one GTID will be
1726 generated. If GTIDs are disabled, then no GTID is generated at
1727 all; if both the transactional cache and the statement cache are
1728 non-empty then we get two Anonymous_gtid_log_events, which is
1729 correct.
1730 */
1731 Binlog_event_writer writer(mysql_bin_log.get_log_file());
1732
1733 /* The GTID ownership process might set the commit_error */
1734 error= (thd->commit_error == THD::CE_FLUSH_ERROR ||
1735 thd->commit_error == THD::CE_FLUSH_GNO_EXHAUSTED_ERROR);
1736
1737 DBUG_EXECUTE_IF("simulate_binlog_flush_error",
1738 {
1739 if (rand() % 3 == 0)
1740 {
1741 thd->commit_error= THD::CE_FLUSH_ERROR;
1742 }
1743 };);
1744
1745 if (!error)
1746 if ((error= mysql_bin_log.write_gtid(thd, this, &writer)))
1747 thd->commit_error= THD::CE_FLUSH_ERROR;
1748 if (!error)
1749 error= mysql_bin_log.write_cache(thd, this, &writer);
1750
1751 if (flags.with_xid && error == 0)
1752 *wrote_xid= true;
1753
1754 /*
1755 Reset have to be after the if above, since it clears the
1756 with_xid flag
1757 */
1758 reset();
1759 if (bytes_written)
1760 *bytes_written= bytes_in_cache;
1761 }
1762 assert(!flags.finalized);
1763 DBUG_RETURN(error);
1764 }
1765
1766 /**
1767 This function truncates the transactional cache upon committing or rolling
1768 back either a transaction or a statement.
1769
1770 @param thd The thread whose transaction should be flushed
1771 @param cache_mngr Pointer to the cache data to be flushed
1772 @param all @c true means truncate the transaction, otherwise the
1773 statement must be truncated.
1774
1775 @return
1776 nonzero if an error pops up when truncating the transactional cache.
1777 */
1778 int
truncate(THD * thd,bool all)1779 binlog_trx_cache_data::truncate(THD *thd, bool all)
1780 {
1781 DBUG_ENTER("binlog_trx_cache_data::truncate");
1782 int error=0;
1783
1784 DBUG_PRINT("info", ("thd->options={ %s %s}, transaction: %s",
1785 FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
1786 FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
1787 all ? "all" : "stmt"));
1788
1789 remove_pending_event();
1790
1791 /*
1792 If rolling back an entire transaction or a single statement not
1793 inside a transaction, we reset the transaction cache.
1794 */
1795 if (ending_trans(thd, all))
1796 {
1797 if (has_incident())
1798 {
1799 const char* err_msg= "Error happend while resetting the transaction "
1800 "cache for a rolled back transaction or a single "
1801 "statement not inside a transaction.";
1802 error= mysql_bin_log.write_incident(thd, true/*need_lock_log=true*/,
1803 err_msg);
1804 }
1805 reset();
1806 }
1807 /*
1808 If rolling back a statement in a transaction, we truncate the
1809 transaction cache to remove the statement.
1810 */
1811 else if (get_prev_position() != MY_OFF_T_UNDEF)
1812 restore_prev_position();
1813
1814 thd->clear_binlog_table_maps();
1815
1816 DBUG_RETURN(error);
1817 }
1818
1819
get_xa_opt(THD * thd)1820 inline enum xa_option_words get_xa_opt(THD *thd)
1821 {
1822 enum xa_option_words xa_opt= XA_NONE;
1823 switch(thd->lex->sql_command)
1824 {
1825 case SQLCOM_XA_COMMIT:
1826 xa_opt= static_cast<Sql_cmd_xa_commit*>(thd->lex->m_sql_cmd)->get_xa_opt();
1827 break;
1828 default:
1829 break;
1830 }
1831
1832 return xa_opt;
1833 }
1834
1835
1836 /**
1837 Predicate function yields true when XA transaction is
1838 being logged having a proper state ready for prepare or
1839 commit in one phase.
1840
1841 @param thd THD pointer of running transaction
1842 @return true When the being prepared transaction should be binlogged,
1843 false otherwise.
1844 */
1845
is_loggable_xa_prepare(THD * thd)1846 inline bool is_loggable_xa_prepare(THD *thd)
1847 {
1848 /*
1849 simulate_commit_failure is doing a trick with XID_STATE while
1850 the ongoing transaction is not XA, and therefore to be errored out,
1851 asserted below. In that case because of the
1852 latter fact the function returns @c false.
1853 */
1854 DBUG_EXECUTE_IF("simulate_commit_failure",
1855 {
1856 XID_STATE *xs= thd->get_transaction()->xid_state();
1857 assert((thd->is_error() &&
1858 xs->get_state() == XID_STATE::XA_IDLE) ||
1859 xs->get_state() == XID_STATE::XA_NOTR);
1860 });
1861
1862 return DBUG_EVALUATE_IF("simulate_commit_failure",
1863 false,
1864 thd->get_transaction()->xid_state()->
1865 has_state(XID_STATE::XA_IDLE));
1866 }
1867
binlog_prepare(handlerton * hton,THD * thd,bool all)1868 static int binlog_prepare(handlerton *hton, THD *thd, bool all)
1869 {
1870 DBUG_ENTER("binlog_prepare");
1871 if (!all)
1872 {
1873 thd->get_transaction()->store_commit_parent(mysql_bin_log.
1874 m_dependency_tracker.get_max_committed_timestamp());
1875
1876 }
1877
1878 DBUG_RETURN(all && is_loggable_xa_prepare(thd) ?
1879 mysql_bin_log.commit(thd, true) : 0);
1880 }
1881
1882
1883 /**
1884 Logging XA commit/rollback of a prepared transaction.
1885
1886 The function is called at XA-commit or XA-rollback logging via
1887 two paths: the recovered-or-slave-applier or immediately through
1888 the XA-prepared transaction connection itself.
1889 It fills in appropiate event in the statement cache whenever
1890 xid state is marked with is_binlogged() flag that indicates
1891 the prepared part of the transaction must've been logged.
1892
1893 About early returns from the function.
1894 In the recovered-or-slave-applier case the function may be called
1895 for the 2nd time, which has_logged_xid monitors.
1896 ONE_PHASE option to XA-COMMIT is handled to skip
1897 writing XA-commit event now.
1898 And the final early return check is for the read-only XA that is
1899 not to be logged.
1900
1901 @param thd THD handle
1902 @param xid a pointer to XID object that is serialized
1903 @param commit when @c true XA-COMMIT is to be logged,
1904 and @c false when it's XA-ROLLBACK.
1905 @return error code, 0 success
1906 */
1907
do_binlog_xa_commit_rollback(THD * thd,XID * xid,bool commit)1908 inline int do_binlog_xa_commit_rollback(THD *thd, XID *xid, bool commit)
1909 {
1910 assert(thd->lex->sql_command == SQLCOM_XA_COMMIT ||
1911 thd->lex->sql_command == SQLCOM_XA_ROLLBACK);
1912
1913 XID_STATE *xid_state= thd->get_transaction()->xid_state();
1914 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
1915
1916 if (cache_mngr != NULL && cache_mngr->has_logged_xid)
1917 return 0;
1918
1919 if (get_xa_opt(thd) == XA_ONE_PHASE)
1920 return 0;
1921 if (!xid_state->is_binlogged())
1922 return 0; // nothing was really logged at prepare
1923 if (thd->is_error() && DBUG_EVALUATE_IF("simulate_xa_rm_error", 0, 1))
1924 return 0; // don't binlog if there are some errors.
1925
1926 assert(!xid->is_null() ||
1927 !(thd->variables.option_bits & OPTION_BIN_LOG));
1928
1929 char buf[XID::ser_buf_size];
1930 char query[(sizeof("XA ROLLBACK")) + 1 + sizeof(buf)];
1931 int qlen= sprintf(query, "XA %s %s", commit ? "COMMIT" : "ROLLBACK",
1932 xid->serialize(buf));
1933 Query_log_event qinfo(thd, query, qlen, false, true, true, 0, false);
1934 return mysql_bin_log.write_event(&qinfo);
1935 }
1936
1937
1938 /**
1939 Logging XA commit/rollback of a prepared transaction in the case
1940 it was disconnected and resumed (recovered), or executed by a slave applier.
1941
1942 @param thd THD handle
1943 @param xid a pointer to XID object
1944 @param commit when @c true XA-COMMIT is logged, otherwise XA-ROLLBACK
1945
1946 @return error code, 0 success
1947 */
1948
binlog_xa_commit_or_rollback(THD * thd,XID * xid,bool commit)1949 inline int binlog_xa_commit_or_rollback(THD *thd, XID *xid, bool commit)
1950 {
1951 int error= 0;
1952
1953 #ifndef NDEBUG
1954 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
1955 assert(!cache_mngr || !cache_mngr->has_logged_xid);
1956 #endif
1957 if (!(error= do_binlog_xa_commit_rollback(thd, xid, commit)))
1958 {
1959 /*
1960 Error can't be propagated naturally via result.
1961 A grand-caller has to access to it through thd's da.
1962 todo:
1963 Bug #20488921 ERROR PROPAGATION DOES FULLY WORK IN XA
1964 stands in the way of implementing a failure simulation
1965 for XA PREPARE/COMMIT/ROLLBACK.
1966 */
1967 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
1968
1969 if (cache_mngr)
1970 cache_mngr->has_logged_xid= true;
1971 if (commit)
1972 (void) mysql_bin_log.commit(thd, true);
1973 else
1974 (void) mysql_bin_log.rollback(thd, true);
1975 if (cache_mngr)
1976 cache_mngr->has_logged_xid= false;
1977 }
1978 return error;
1979 }
1980
1981
binlog_xa_commit(handlerton * hton,XID * xid)1982 static int binlog_xa_commit(handlerton *hton, XID *xid)
1983 {
1984 (void) binlog_xa_commit_or_rollback(current_thd, xid, true);
1985
1986 return 0;
1987 }
1988
1989
binlog_xa_rollback(handlerton * hton,XID * xid)1990 static int binlog_xa_rollback(handlerton *hton, XID *xid)
1991 {
1992 (void) binlog_xa_commit_or_rollback(current_thd, xid, false);
1993
1994 return 0;
1995 }
1996
1997 /**
1998 When a fatal error occurs due to which binary logging becomes impossible and
1999 the user specified binlog_error_action= ABORT_SERVER the following function is
2000 invoked. This function pushes the appropriate error message to client and logs
2001 the same to server error log and then aborts the server.
2002
2003 @param err_string Error string which specifies the exact error
2004 message from the caller.
2005
2006 @retval
2007 none
2008 */
exec_binlog_error_action_abort(const char * err_string)2009 static void exec_binlog_error_action_abort(const char* err_string)
2010 {
2011 THD *thd= current_thd;
2012 /*
2013 When the code enters here it means that there was an error at higher layer
2014 and my_error function could have been invoked to let the client know what
2015 went wrong during the execution.
2016
2017 But these errors will not let the client know that the server is going to
2018 abort. Even if we add an additional my_error function call at this point
2019 client will be able to see only the first error message that was set
2020 during the very first invocation of my_error function call.
2021
2022 The advantage of having multiple my_error function calls are visible when
2023 the server is up and running and user issues SHOW WARNINGS or SHOW ERROR
2024 calls. In this special scenario server will be immediately aborted and
2025 user will not be able execute the above SHOW commands.
2026
2027 Hence we clear the previous errors and push one critical error message to
2028 clients.
2029 */
2030 if (thd)
2031 {
2032 if (thd->is_error())
2033 thd->clear_error();
2034 /*
2035 Adding ME_ERRORLOG flag will ensure that the error is sent to both
2036 client and to the server error log as well.
2037 */
2038 my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(ME_ERRORLOG + ME_FATALERROR),
2039 err_string);
2040 thd->send_statement_status();
2041 }
2042 else
2043 sql_print_error("%s",err_string);
2044 abort();
2045 }
2046
2047
2048
2049 /**
2050 This function is called once after each statement.
2051
2052 @todo This function is currently not used any more and will
2053 eventually be eliminated. The real commit job is done in the
2054 MYSQL_BIN_LOG::commit function.
2055
2056 @see MYSQL_BIN_LOG::commit
2057
2058 @param hton The binlog handlerton.
2059 @param thd The client thread that executes the transaction.
2060 @param all This is @c true if this is a real transaction commit, and
2061 @false otherwise.
2062
2063 @see handlerton::commit
2064 */
binlog_commit(handlerton * hton,THD * thd,bool all)2065 static int binlog_commit(handlerton *hton, THD *thd, bool all)
2066 {
2067 DBUG_ENTER("binlog_commit");
2068 /*
2069 Nothing to do (any more) on commit.
2070 */
2071 DBUG_RETURN(0);
2072 }
2073
2074 /**
2075 This function is called when a transaction or a statement is rolled back.
2076
2077 @internal It is necessary to execute a rollback here if the
2078 transaction was rolled back because of executing a ROLLBACK TO
2079 SAVEPOINT command, but it is not used for normal rollback since
2080 MYSQL_BIN_LOG::rollback is called in that case.
2081
2082 @todo Refactor code to introduce a <code>MYSQL_BIN_LOG::rollback(THD
2083 *thd, SAVEPOINT *sv)</code> function in @c TC_LOG and have that
2084 function execute the necessary work to rollback to a savepoint.
2085
2086 @param hton The binlog handlerton.
2087 @param thd The client thread that executes the transaction.
2088 @param all This is @c true if this is a real transaction rollback, and
2089 @false otherwise.
2090
2091 @see handlerton::rollback
2092 */
binlog_rollback(handlerton * hton,THD * thd,bool all)2093 static int binlog_rollback(handlerton *hton, THD *thd, bool all)
2094 {
2095 DBUG_ENTER("binlog_rollback");
2096 int error= 0;
2097 #ifdef WITH_WSREP
2098 if (thd->lex->sql_command == SQLCOM_ROLLBACK_TO_SAVEPOINT &&
2099 thd->wsrep_conflict_state != ABORTING)
2100 #else
2101 if (thd->lex->sql_command == SQLCOM_ROLLBACK_TO_SAVEPOINT)
2102 #endif /* WITH_WSREP */
2103 error= mysql_bin_log.rollback(thd, all);
2104 DBUG_RETURN(error);
2105 }
2106
2107
2108 bool
append(THD * first)2109 Stage_manager::Mutex_queue::append(THD *first)
2110 {
2111 DBUG_ENTER("Stage_manager::Mutex_queue::append");
2112 lock();
2113 DBUG_PRINT("enter", ("first: 0x%llx", (ulonglong) first));
2114 DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2115 (ulonglong) m_first, (ulonglong) &m_first,
2116 (ulonglong) m_last));
2117 int32 count= 1;
2118 bool empty= (m_first == NULL);
2119 *m_last= first;
2120 DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2121 (ulonglong) m_first, (ulonglong) &m_first,
2122 (ulonglong) m_last));
2123 /*
2124 Go to the last THD instance of the list. We expect lists to be
2125 moderately short. If they are not, we need to track the end of
2126 the queue as well.
2127 */
2128
2129 while (first->next_to_commit)
2130 {
2131 count++;
2132 first= first->next_to_commit;
2133 }
2134 my_atomic_add32(&m_size, count);
2135
2136 m_last= &first->next_to_commit;
2137 DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2138 (ulonglong) m_first, (ulonglong) &m_first,
2139 (ulonglong) m_last));
2140 assert(m_first || m_last == &m_first);
2141 DBUG_PRINT("return", ("empty: %s", YESNO(empty)));
2142 unlock();
2143 DBUG_RETURN(empty);
2144 }
2145
2146
2147 std::pair<bool, THD*>
pop_front()2148 Stage_manager::Mutex_queue::pop_front()
2149 {
2150 DBUG_ENTER("Stage_manager::Mutex_queue::pop_front");
2151 lock();
2152 THD *result= m_first;
2153 bool more= true;
2154 /*
2155 We do not set next_to_commit to NULL here since this is only used
2156 in the flush stage. We will have to call fetch_queue last here,
2157 and will then "cut" the linked list by setting the end of that
2158 queue to NULL.
2159 */
2160 if (result)
2161 m_first= result->next_to_commit;
2162 if (m_first == NULL)
2163 {
2164 more= false;
2165 m_last = &m_first;
2166 }
2167 assert(my_atomic_load32(&m_size) > 0);
2168 my_atomic_add32(&m_size, -1);
2169 assert(m_first || m_last == &m_first);
2170 unlock();
2171 DBUG_PRINT("return", ("result: 0x%llx, more: %s",
2172 (ulonglong) result, YESNO(more)));
2173 DBUG_RETURN(std::make_pair(more, result));
2174 }
2175
2176
2177 bool
enroll_for(StageID stage,THD * thd,mysql_mutex_t * stage_mutex)2178 Stage_manager::enroll_for(StageID stage, THD *thd, mysql_mutex_t *stage_mutex)
2179 {
2180 // If the queue was empty: we're the leader for this batch
2181 DBUG_PRINT("debug", ("Enqueue 0x%llx to queue for stage %d",
2182 (ulonglong) thd, stage));
2183 bool leader= m_queue[stage].append(thd);
2184
2185 #ifdef HAVE_REPLICATION
2186 if (stage == FLUSH_STAGE && has_commit_order_manager(thd))
2187 {
2188 Slave_worker *worker= dynamic_cast<Slave_worker *>(thd->rli_slave);
2189 Commit_order_manager *mngr= worker->get_commit_order_manager();
2190
2191 mngr->unregister_trx(worker);
2192 }
2193 #endif
2194
2195 /*
2196 We do not need to unlock the stage_mutex if it is LOCK_log when rotating
2197 binlog caused by logging incident log event, since it should be held
2198 always during rotation.
2199 */
2200 bool need_unlock_stage_mutex=
2201 !(mysql_bin_log.is_rotating_caused_by_incident &&
2202 stage_mutex == mysql_bin_log.get_log_lock());
2203
2204 /*
2205 The stage mutex can be NULL if we are enrolling for the first
2206 stage.
2207 */
2208 if (stage_mutex && need_unlock_stage_mutex)
2209 mysql_mutex_unlock(stage_mutex);
2210
2211 #ifndef NDEBUG
2212 DBUG_PRINT("info", ("This is a leader thread: %d (0=n 1=y)", leader));
2213
2214 DEBUG_SYNC(thd, "after_enrolling_for_stage");
2215
2216 switch (stage)
2217 {
2218 case Stage_manager::FLUSH_STAGE:
2219 DEBUG_SYNC(thd, "bgc_after_enrolling_for_flush_stage");
2220 break;
2221 case Stage_manager::SYNC_STAGE:
2222 DEBUG_SYNC(thd, "bgc_after_enrolling_for_sync_stage");
2223 break;
2224 case Stage_manager::COMMIT_STAGE:
2225 DEBUG_SYNC(thd, "bgc_after_enrolling_for_commit_stage");
2226 break;
2227 default:
2228 // not reached
2229 assert(0);
2230 }
2231
2232 DBUG_EXECUTE_IF("assert_leader", assert(leader););
2233 DBUG_EXECUTE_IF("assert_follower", assert(!leader););
2234 #endif
2235
2236 /*
2237 If the queue was not empty, we're a follower and wait for the
2238 leader to process the queue. If we were holding a mutex, we have
2239 to release it before going to sleep.
2240 */
2241 if (!leader)
2242 {
2243 mysql_mutex_lock(&m_lock_done);
2244 #ifndef NDEBUG
2245 /*
2246 Leader can be awaiting all-clear to preempt follower's execution.
2247 With setting the status the follower ensures it won't execute anything
2248 including thread-specific code.
2249 */
2250 thd->get_transaction()->m_flags.ready_preempt= 1;
2251 if (leader_await_preempt_status)
2252 mysql_cond_signal(&m_cond_preempt);
2253 #endif
2254 while (thd->get_transaction()->m_flags.pending)
2255 mysql_cond_wait(&m_cond_done, &m_lock_done);
2256 mysql_mutex_unlock(&m_lock_done);
2257 }
2258 return leader;
2259 }
2260
2261
fetch_and_empty()2262 THD *Stage_manager::Mutex_queue::fetch_and_empty()
2263 {
2264 DBUG_ENTER("Stage_manager::Mutex_queue::fetch_and_empty");
2265 lock();
2266 DBUG_PRINT("enter", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2267 (ulonglong) m_first, (ulonglong) &m_first,
2268 (ulonglong) m_last));
2269 THD *result= m_first;
2270 m_first= NULL;
2271 m_last= &m_first;
2272 DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2273 (ulonglong) m_first, (ulonglong) &m_first,
2274 (ulonglong) m_last));
2275 DBUG_PRINT("info", ("fetched queue of %d transactions", my_atomic_load32(&m_size)));
2276 DBUG_PRINT("return", ("result: 0x%llx", (ulonglong) result));
2277 assert(my_atomic_load32(&m_size) >= 0);
2278 my_atomic_store32(&m_size, 0);
2279 unlock();
2280 DBUG_RETURN(result);
2281 }
2282
wait_count_or_timeout(ulong count,long usec,StageID stage)2283 void Stage_manager::wait_count_or_timeout(ulong count, long usec, StageID stage)
2284 {
2285 long to_wait=
2286 DBUG_EVALUATE_IF("bgc_set_infinite_delay", LONG_MAX, usec);
2287 /*
2288 For testing purposes while waiting for inifinity
2289 to arrive, we keep checking the queue size at regular,
2290 small intervals. Otherwise, waiting 0.1 * infinite
2291 is too long.
2292 */
2293 long delta=
2294 DBUG_EVALUATE_IF("bgc_set_infinite_delay", 100000,
2295 max<long>(1, (to_wait * 0.1)));
2296
2297 while (to_wait > 0 && (count == 0 || static_cast<ulong>(m_queue[stage].get_size()) < count))
2298 {
2299 #ifndef NDEBUG
2300 if (current_thd)
2301 DEBUG_SYNC(current_thd, "bgc_wait_count_or_timeout");
2302 #endif
2303 my_sleep(delta);
2304 to_wait -= delta;
2305 }
2306 }
2307
signal_done(THD * queue)2308 void Stage_manager::signal_done(THD *queue)
2309 {
2310 mysql_mutex_lock(&m_lock_done);
2311 for (THD *thd= queue ; thd ; thd = thd->next_to_commit)
2312 thd->get_transaction()->m_flags.pending= false;
2313 mysql_mutex_unlock(&m_lock_done);
2314 mysql_cond_broadcast(&m_cond_done);
2315 }
2316
2317 #ifndef NDEBUG
clear_preempt_status(THD * head)2318 void Stage_manager::clear_preempt_status(THD *head)
2319 {
2320 assert(head);
2321
2322 mysql_mutex_lock(&m_lock_done);
2323 while(!head->get_transaction()->m_flags.ready_preempt)
2324 {
2325 leader_await_preempt_status= true;
2326 mysql_cond_wait(&m_cond_preempt, &m_lock_done);
2327 }
2328 leader_await_preempt_status= false;
2329 mysql_mutex_unlock(&m_lock_done);
2330 }
2331 #endif
2332
2333 /**
2334 Write a rollback record of the transaction to the binary log.
2335
2336 For binary log group commit, the rollback is separated into three
2337 parts:
2338
2339 1. First part consists of filling the necessary caches and
2340 finalizing them (if they need to be finalized). After a cache is
2341 finalized, nothing can be added to the cache.
2342
2343 2. Second part execute an ordered flush and commit. This will be
2344 done using the group commit functionality in @c ordered_commit.
2345
2346 Since we roll back the transaction early, we call @c
2347 ordered_commit with the @c skip_commit flag set. The @c
2348 ha_commit_low call inside @c ordered_commit will then not be
2349 called.
2350
2351 3. Third part checks any errors resulting from the flush and handles
2352 them appropriately.
2353
2354 @see MYSQL_BIN_LOG::ordered_commit
2355 @see ha_commit_low
2356 @see ha_rollback_low
2357
2358 @param thd Session to commit
2359 @param all This is @c true if this is a real transaction rollback, and
2360 @false otherwise.
2361
2362 @return Error code, or zero if there were no error.
2363 */
2364
rollback(THD * thd,bool all)2365 int MYSQL_BIN_LOG::rollback(THD *thd, bool all)
2366 {
2367 int error= 0;
2368 bool stuff_logged= false;
2369 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
2370
2371 DBUG_ENTER("MYSQL_BIN_LOG::rollback(THD *thd, bool all)");
2372 DBUG_PRINT("enter", ("all: %s, cache_mngr: 0x%llx, thd->is_error: %s",
2373 YESNO(all), (ulonglong) cache_mngr,
2374 YESNO(thd->is_error())));
2375 /*
2376 Defer XA-transaction rollback until its XA-rollback event is recorded.
2377 When we are executing a ROLLBACK TO SAVEPOINT, we
2378 should only clear the caches since this function is called as part
2379 of the engine rollback.
2380 In other cases we roll back the transaction in the engines early
2381 since this will release locks and allow other transactions to
2382 start executing.
2383 */
2384 if (thd->lex->sql_command == SQLCOM_XA_ROLLBACK)
2385 {
2386 XID_STATE *xs= thd->get_transaction()->xid_state();
2387
2388 assert(all || !xs->is_binlogged() ||
2389 (!xs->is_in_recovery() && thd->is_error()));
2390 /*
2391 Whenever cache_mngr is not initialized, the xa prepared
2392 transaction's binary logging status must not be set, unless the
2393 transaction is rolled back through an external connection which
2394 has binlogging switched off.
2395 */
2396 assert(cache_mngr || !xs->is_binlogged()
2397 || !(is_open() && thd->variables.option_bits & OPTION_BIN_LOG));
2398
2399 if ((error= do_binlog_xa_commit_rollback(thd, xs->get_xid(), false)))
2400 goto end;
2401 cache_mngr= thd_get_cache_mngr(thd);
2402 }
2403 #ifdef WITH_WSREP
2404 /*
2405 BF aborted THD may have dandling sql_command set to SQLCOM_ROLLBACK_TO_SAVEPOINT,
2406 don't care about it, as we have to BF abort this one
2407 */
2408 else if (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT ||
2409 thd->wsrep_conflict_state == ABORTING)
2410 #else
2411 else if (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT)
2412 #endif /* WITH_WSREP */
2413 if ((error= ha_rollback_low(thd, all)))
2414 goto end;
2415
2416 /*
2417 If there is no cache manager, or if there is nothing in the
2418 caches, there are no caches to roll back, so we're trivially done
2419 unless XA-ROLLBACK that yet to run rollback_low().
2420 */
2421 if (cache_mngr == NULL || cache_mngr->is_binlog_empty())
2422 {
2423 goto end;
2424 }
2425
2426 DBUG_PRINT("debug",
2427 ("all.cannot_safely_rollback(): %s, trx_cache_empty: %s",
2428 YESNO(thd->get_transaction()->cannot_safely_rollback(
2429 Transaction_ctx::SESSION)),
2430 YESNO(cache_mngr->trx_cache.is_binlog_empty())));
2431 DBUG_PRINT("debug",
2432 ("stmt.cannot_safely_rollback(): %s, stmt_cache_empty: %s",
2433 YESNO(thd->get_transaction()->cannot_safely_rollback(
2434 Transaction_ctx::STMT)),
2435 YESNO(cache_mngr->stmt_cache.is_binlog_empty())));
2436
2437 /*
2438 If an incident event is set we do not flush the content of the statement
2439 cache because it may be corrupted.
2440 */
2441 if (cache_mngr->stmt_cache.has_incident())
2442 {
2443 const char* err_msg= "The content of the statement cache is corrupted "
2444 "while writing a rollback record of the transaction "
2445 "to the binary log.";
2446 error= write_incident(thd, true/*need_lock_log=true*/, err_msg);
2447 cache_mngr->stmt_cache.reset();
2448 }
2449 else if (!cache_mngr->stmt_cache.is_binlog_empty())
2450 {
2451 if (thd->lex->sql_command == SQLCOM_CREATE_TABLE &&
2452 thd->lex->select_lex->item_list.elements && /* With select */
2453 !(thd->lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) &&
2454 thd->is_current_stmt_binlog_format_row())
2455 {
2456 /*
2457 In row based binlog format, we reset the binlog statement cache
2458 when rolling back a single statement 'CREATE...SELECT' transaction,
2459 since the 'CREATE TABLE' event was put in the binlog statement cache.
2460 */
2461 cache_mngr->stmt_cache.reset();
2462 }
2463 else
2464 {
2465 if ((error= cache_mngr->stmt_cache.finalize(thd)))
2466 goto end;
2467 stuff_logged= true;
2468 }
2469 }
2470
2471 if (ending_trans(thd, all))
2472 {
2473 if (trans_cannot_safely_rollback(thd))
2474 {
2475 const char xa_rollback_str[]= "XA ROLLBACK";
2476 /*
2477 sizeof(xa_rollback_str) and XID::ser_buf_size both allocate `\0',
2478 so one of the two is used for necessary in the xa case `space' char
2479 */
2480 char query[sizeof(xa_rollback_str) + XID::ser_buf_size]= "ROLLBACK";
2481 XID_STATE *xs= thd->get_transaction()->xid_state();
2482
2483 if (thd->lex->sql_command == SQLCOM_XA_ROLLBACK)
2484 {
2485 /* this block is relevant only for not prepared yet and "local" xa trx */
2486 assert(thd->get_transaction()->xid_state()->
2487 has_state(XID_STATE::XA_IDLE));
2488 assert(!cache_mngr->has_logged_xid);
2489
2490 sprintf(query, "%s ", xa_rollback_str);
2491 xs->get_xid()->serialize(query + sizeof(xa_rollback_str));
2492 }
2493 /*
2494 If the transaction is being rolled back and contains changes that
2495 cannot be rolled back, the trx-cache's content is flushed.
2496 */
2497 Query_log_event
2498 end_evt(thd, query, strlen(query), true, false, true, 0, true);
2499 error= thd->lex->sql_command != SQLCOM_XA_ROLLBACK ?
2500 cache_mngr->trx_cache.finalize(thd, &end_evt) :
2501 cache_mngr->trx_cache.finalize(thd, &end_evt, xs);
2502 stuff_logged= true;
2503 }
2504 else
2505 {
2506 /*
2507 If the transaction is being rolled back and its changes can be
2508 rolled back, the trx-cache's content is truncated.
2509 */
2510 error= cache_mngr->trx_cache.truncate(thd, all);
2511 }
2512 }
2513 else
2514 {
2515 /*
2516 If a statement is being rolled back, it is necessary to know
2517 exactly why a statement may not be safely rolled back as in
2518 some specific situations the trx-cache can be truncated.
2519
2520 If a temporary table is created or dropped, the trx-cache is not
2521 truncated. Note that if the stmt-cache is used, there is nothing
2522 to truncate in the trx-cache.
2523
2524 If a non-transactional table is updated and the binlog format is
2525 statement, the trx-cache is not truncated. The trx-cache is used
2526 when the direct option is off and a transactional table has been
2527 updated before the current statement in the context of the
2528 current transaction. Note that if the stmt-cache is used there is
2529 nothing to truncate in the trx-cache.
2530
2531 If other binlog formats are used, updates to non-transactional
2532 tables are written to the stmt-cache and trx-cache can be safely
2533 truncated, if necessary.
2534 */
2535 if (thd->get_transaction()->has_dropped_temp_table(
2536 Transaction_ctx::STMT) ||
2537 thd->get_transaction()->has_created_temp_table(
2538 Transaction_ctx::STMT) ||
2539 (thd->get_transaction()->has_modified_non_trans_table(
2540 Transaction_ctx::STMT) &&
2541 thd->variables.binlog_format == BINLOG_FORMAT_STMT))
2542 {
2543 /*
2544 If the statement is being rolled back and dropped or created a
2545 temporary table or modified a non-transactional table and the
2546 statement-based replication is in use, the statement's changes
2547 in the trx-cache are preserved.
2548 */
2549 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
2550 }
2551 else
2552 {
2553 /*
2554 Otherwise, the statement's changes in the trx-cache are
2555 truncated.
2556 */
2557 error= cache_mngr->trx_cache.truncate(thd, all);
2558 }
2559 }
2560 if (stuff_logged)
2561 {
2562 Transaction_ctx *trn_ctx= thd->get_transaction();
2563 trn_ctx->store_commit_parent(m_dependency_tracker.get_max_committed_timestamp());
2564 }
2565
2566 DBUG_PRINT("debug", ("error: %d", error));
2567 if (error == 0 && stuff_logged)
2568 {
2569 if (RUN_HOOK(transaction,
2570 before_commit,
2571 (thd, all,
2572 thd_get_cache_mngr(thd)->get_binlog_cache_log(true),
2573 thd_get_cache_mngr(thd)->get_binlog_cache_log(false),
2574 max<my_off_t>(max_binlog_cache_size,
2575 max_binlog_stmt_cache_size))))
2576 {
2577 //Reset the thread OK status before changing the outcome.
2578 if (thd->get_stmt_da()->is_ok())
2579 thd->get_stmt_da()->reset_diagnostics_area();
2580 my_error(ER_RUN_HOOK_ERROR, MYF(0), "before_commit");
2581 DBUG_RETURN(RESULT_ABORTED);
2582 }
2583 #ifndef NDEBUG
2584 /*
2585 XA rollback is always accepted.
2586 */
2587 if (thd->get_transaction()->get_rpl_transaction_ctx()->is_transaction_rollback())
2588 assert(0);
2589 #endif
2590
2591 error= ordered_commit(thd, all, /* skip_commit */ true);
2592 }
2593
2594 #ifdef WITH_WSREP
2595 if (!WSREP_EMULATE_BINLOG(thd) && check_write_error(thd))
2596 #else
2597 if (check_write_error(thd))
2598 #endif
2599 {
2600 /*
2601 We reach this point if the effect of a statement did not properly get into
2602 a cache and need to be rolled back.
2603 */
2604 error|= cache_mngr->trx_cache.truncate(thd, all);
2605 }
2606
2607 end:
2608 /* Deferred xa rollback to engines */
2609 if (!error && thd->lex->sql_command == SQLCOM_XA_ROLLBACK)
2610 {
2611 error= ha_rollback_low(thd, all);
2612 /* Successful XA-rollback commits the new gtid_state */
2613 if (!error && !thd->is_error())
2614 gtid_state->update_on_commit(thd);
2615 }
2616 /*
2617 When a statement errors out on auto-commit mode it is rollback
2618 implicitly, so the same should happen to its GTID.
2619 */
2620 if (!thd->in_active_multi_stmt_transaction())
2621 gtid_state->update_on_rollback(thd);
2622
2623 /*
2624 TODO: some errors are overwritten, which may cause problem,
2625 fix it later.
2626 */
2627 DBUG_PRINT("return", ("error: %d", error));
2628 DBUG_RETURN(error);
2629 }
2630
2631 /**
2632 @note
2633 How do we handle this (unlikely but legal) case:
2634 @verbatim
2635 [transaction] + [update to non-trans table] + [rollback to savepoint] ?
2636 @endverbatim
2637 The problem occurs when a savepoint is before the update to the
2638 non-transactional table. Then when there's a rollback to the savepoint, if we
2639 simply truncate the binlog cache, we lose the part of the binlog cache where
2640 the update is. If we want to not lose it, we need to write the SAVEPOINT
2641 command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
2642 is easy: it's just write at the end of the binlog cache, but the former
2643 should be *inserted* to the place where the user called SAVEPOINT. The
2644 solution is that when the user calls SAVEPOINT, we write it to the binlog
2645 cache (so no need to later insert it). As transactions are never intermixed
2646 in the binary log (i.e. they are serialized), we won't have conflicts with
2647 savepoint names when using mysqlbinlog or in the slave SQL thread.
2648 Then when ROLLBACK TO SAVEPOINT is called, if we updated some
2649 non-transactional table, we don't truncate the binlog cache but instead write
2650 ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
2651 will chop the SAVEPOINT command from the binlog cache, which is good as in
2652 that case there is no need to have it in the binlog).
2653 */
2654
binlog_savepoint_set(handlerton * hton,THD * thd,void * sv)2655 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
2656 {
2657 DBUG_ENTER("binlog_savepoint_set");
2658 int error= 1;
2659 #ifdef WITH_WSREP
2660 /*
2661 Clear table maps before writing SAVEPOINT event. This enforces
2662 recreation of table map events for the following row event.
2663 */
2664 thd->clear_binlog_table_maps();
2665 #endif /* WITH_WSREP */
2666 String log_query;
2667 if (log_query.append(STRING_WITH_LEN("SAVEPOINT ")))
2668 DBUG_RETURN(error);
2669 else
2670 append_identifier(thd, &log_query, thd->lex->ident.str,
2671 thd->lex->ident.length);
2672
2673 int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
2674 Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
2675 TRUE, FALSE, TRUE, errcode);
2676 /*
2677 We cannot record the position before writing the statement
2678 because a rollback to a savepoint (.e.g. consider it "S") would
2679 prevent the savepoint statement (i.e. "SAVEPOINT S") from being
2680 written to the binary log despite the fact that the server could
2681 still issue other rollback statements to the same savepoint (i.e.
2682 "S").
2683 Given that the savepoint is valid until the server releases it,
2684 ie, until the transaction commits or it is released explicitly,
2685 we need to log it anyway so that we don't have "ROLLBACK TO S"
2686 or "RELEASE S" without the preceding "SAVEPOINT S" in the binary
2687 log.
2688 */
2689 if (!(error= mysql_bin_log.write_event(&qinfo)))
2690 binlog_trans_log_savepos(thd, (my_off_t*) sv);
2691
2692 DBUG_RETURN(error);
2693 }
2694
binlog_savepoint_rollback(handlerton * hton,THD * thd,void * sv)2695 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
2696 {
2697 DBUG_ENTER("binlog_savepoint_rollback");
2698 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
2699 my_off_t pos= *(my_off_t*) sv;
2700 assert(pos != ~(my_off_t) 0);
2701
2702 /*
2703 Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
2704 non-transactional table. Otherwise, truncate the binlog cache starting
2705 from the SAVEPOINT command.
2706 */
2707 if (trans_cannot_safely_rollback(thd))
2708 {
2709 String log_query;
2710 if (log_query.append(STRING_WITH_LEN("ROLLBACK TO ")))
2711 DBUG_RETURN(1);
2712 else
2713 {
2714 /*
2715 Before writing identifier to the binlog, make sure to
2716 quote the identifier properly so as to prevent any SQL
2717 injection on the slave.
2718 */
2719 append_identifier(thd, &log_query, thd->lex->ident.str,
2720 thd->lex->ident.length);
2721 }
2722
2723 int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
2724 Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
2725 TRUE, FALSE, TRUE, errcode);
2726 DBUG_RETURN(mysql_bin_log.write_event(&qinfo));
2727 }
2728 // Otherwise, we truncate the cache
2729 cache_mngr->trx_cache.restore_savepoint(pos);
2730 /*
2731 When a SAVEPOINT is executed inside a stored function/trigger we force the
2732 pending event to be flushed with a STMT_END_F flag and clear the table maps
2733 as well to ensure that following DMLs will have a clean state to start
2734 with. ROLLBACK inside a stored routine has to finalize possibly existing
2735 current row-based pending event with cleaning up table maps. That ensures
2736 that following DMLs will have a clean state to start with.
2737 */
2738 if (thd->in_sub_stmt)
2739 thd->clear_binlog_table_maps();
2740 DBUG_RETURN(0);
2741 }
2742
2743 /**
2744 purge logs, master and slave sides both, related error code
2745 convertor.
2746 Called from @c purge_error_message(), @c MYSQL_BIN_LOG::reset_logs()
2747
2748 @param res an error code as used by purging routines
2749
2750 @return the user level error code ER_*
2751 */
purge_log_get_error_code(int res)2752 static uint purge_log_get_error_code(int res)
2753 {
2754 uint errcode= 0;
2755
2756 switch (res) {
2757 case 0: break;
2758 case LOG_INFO_EOF: errcode= ER_UNKNOWN_TARGET_BINLOG; break;
2759 case LOG_INFO_IO: errcode= ER_IO_ERR_LOG_INDEX_READ; break;
2760 case LOG_INFO_INVALID:errcode= ER_BINLOG_PURGE_PROHIBITED; break;
2761 case LOG_INFO_SEEK: errcode= ER_FSEEK_FAIL; break;
2762 case LOG_INFO_MEM: errcode= ER_OUT_OF_RESOURCES; break;
2763 case LOG_INFO_FATAL: errcode= ER_BINLOG_PURGE_FATAL_ERR; break;
2764 case LOG_INFO_IN_USE: errcode= ER_LOG_IN_USE; break;
2765 case LOG_INFO_EMFILE: errcode= ER_BINLOG_PURGE_EMFILE; break;
2766 default: errcode= ER_LOG_PURGE_UNKNOWN_ERR; break;
2767 }
2768
2769 return errcode;
2770 }
2771
2772 /**
2773 Check whether binlog state allows to safely release MDL locks after
2774 rollback to savepoint.
2775
2776 @param hton The binlog handlerton.
2777 @param thd The client thread that executes the transaction.
2778
2779 @return true - It is safe to release MDL locks.
2780 false - If it is not.
2781 */
binlog_savepoint_rollback_can_release_mdl(handlerton * hton,THD * thd)2782 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
2783 THD *thd)
2784 {
2785 DBUG_ENTER("binlog_savepoint_rollback_can_release_mdl");
2786 /**
2787 If we have not updated any non-transactional tables rollback
2788 to savepoint will simply truncate binlog cache starting from
2789 SAVEPOINT command. So it should be safe to release MDL acquired
2790 after SAVEPOINT command in this case.
2791 */
2792 DBUG_RETURN(!trans_cannot_safely_rollback(thd));
2793 }
2794
2795 #ifdef HAVE_REPLICATION
2796 /**
2797 Adjust log offset in the binary log file for all running slaves
2798 This class implements call back function for do_for_all_thd().
2799 It is called for each thd in thd list to adjust offset.
2800 */
2801 class Adjust_offset : public Do_THD_Impl
2802 {
2803 public:
Adjust_offset(my_off_t value)2804 Adjust_offset(my_off_t value) : m_purge_offset(value) {}
operator ()(THD * thd)2805 virtual void operator()(THD *thd)
2806 {
2807 LOG_INFO* linfo;
2808 mysql_mutex_lock(&thd->LOCK_thd_data);
2809 if ((linfo= thd->current_linfo))
2810 {
2811 /*
2812 Index file offset can be less that purge offset only if
2813 we just started reading the index file. In that case
2814 we have nothing to adjust.
2815 */
2816 if (linfo->index_file_offset < m_purge_offset)
2817 linfo->fatal = (linfo->index_file_offset != 0);
2818 else
2819 linfo->index_file_offset -= m_purge_offset;
2820 }
2821 mysql_mutex_unlock(&thd->LOCK_thd_data);
2822 }
2823 private:
2824 my_off_t m_purge_offset;
2825 };
2826
2827 /*
2828 Adjust the position pointer in the binary log file for all running slaves.
2829
2830 SYNOPSIS
2831 adjust_linfo_offsets()
2832 purge_offset Number of bytes removed from start of log index file
2833
2834 NOTES
2835 - This is called when doing a PURGE when we delete lines from the
2836 index log file.
2837
2838 REQUIREMENTS
2839 - Before calling this function, we have to ensure that no threads are
2840 using any binary log file before purge_offset.
2841
2842 TODO
2843 - Inform the slave threads that they should sync the position
2844 in the binary log file with flush_relay_log_info.
2845 Now they sync is done for next read.
2846 */
adjust_linfo_offsets(my_off_t purge_offset)2847 static void adjust_linfo_offsets(my_off_t purge_offset)
2848 {
2849 Adjust_offset adjust_offset(purge_offset);
2850 Global_THD_manager::get_instance()->do_for_all_thd(&adjust_offset);
2851 }
2852
2853 /**
2854 This class implements Call back function for do_for_all_thd().
2855 It is called for each thd in thd list to count
2856 threads using bin log file
2857 */
2858
2859 class Log_in_use : public Do_THD_Impl
2860 {
2861 public:
Log_in_use(const char * value)2862 Log_in_use(const char* value) : m_log_name(value), m_count(0)
2863 {
2864 m_log_name_len = strlen(m_log_name) + 1;
2865 }
operator ()(THD * thd)2866 virtual void operator()(THD *thd)
2867 {
2868 LOG_INFO* linfo;
2869 mysql_mutex_lock(&thd->LOCK_thd_data);
2870 if ((linfo = thd->current_linfo))
2871 {
2872 if(!strncmp(m_log_name, linfo->log_file_name, m_log_name_len))
2873 {
2874 sql_print_warning("file %s was not purged because it was being read"
2875 "by thread number %u", m_log_name, thd->thread_id());
2876 m_count++;
2877 }
2878 }
2879 mysql_mutex_unlock(&thd->LOCK_thd_data);
2880 }
get_count()2881 int get_count() { return m_count; }
2882 private:
2883 const char* m_log_name;
2884 size_t m_log_name_len;
2885 int m_count;
2886 };
2887
log_in_use(const char * log_name)2888 static int log_in_use(const char* log_name)
2889 {
2890 Log_in_use log_in_use(log_name);
2891 #ifndef NDEBUG
2892 if (current_thd)
2893 DEBUG_SYNC(current_thd,"purge_logs_after_lock_index_before_thread_count");
2894 #endif
2895 Global_THD_manager::get_instance()->do_for_all_thd(&log_in_use);
2896 return log_in_use.get_count();
2897 }
2898
purge_error_message(THD * thd,int res)2899 static bool purge_error_message(THD* thd, int res)
2900 {
2901 uint errcode;
2902
2903 if ((errcode= purge_log_get_error_code(res)) != 0)
2904 {
2905 my_message(errcode, ER(errcode), MYF(0));
2906 return TRUE;
2907 }
2908 my_ok(thd);
2909 return FALSE;
2910 }
2911
2912 #endif /* HAVE_REPLICATION */
2913
check_binlog_magic(IO_CACHE * log,const char ** errmsg)2914 int check_binlog_magic(IO_CACHE* log, const char** errmsg)
2915 {
2916 char magic[4];
2917 assert(my_b_tell(log) == 0);
2918
2919 if (my_b_read(log, (uchar*) magic, sizeof(magic)))
2920 {
2921 *errmsg = "I/O error reading the header from the binary log";
2922 sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno(),
2923 log->error);
2924 return 1;
2925 }
2926 if (memcmp(magic, BINLOG_MAGIC, sizeof(magic)))
2927 {
2928 *errmsg = "Binlog has bad magic number; It's not a binary log file that can be used by this version of MySQL";
2929 return 1;
2930 }
2931 return 0;
2932 }
2933
2934
open_binlog_file(IO_CACHE * log,const char * log_file_name,const char ** errmsg)2935 File open_binlog_file(IO_CACHE *log, const char *log_file_name, const char **errmsg)
2936 {
2937 File file;
2938 DBUG_ENTER("open_binlog_file");
2939
2940 if ((file= mysql_file_open(key_file_binlog,
2941 log_file_name, O_RDONLY | O_BINARY | O_SHARE,
2942 MYF(MY_WME))) < 0)
2943 {
2944 sql_print_error("Failed to open log (file '%s', errno %d)",
2945 log_file_name, my_errno());
2946 *errmsg = "Could not open log file";
2947 goto err;
2948 }
2949 if (init_io_cache_ext(log, file, IO_SIZE*2, READ_CACHE, 0, 0,
2950 MYF(MY_WME|MY_DONT_CHECK_FILESIZE), key_file_binlog_cache))
2951 {
2952 sql_print_error("Failed to create a cache on log (file '%s')",
2953 log_file_name);
2954 *errmsg = "Could not open log file";
2955 goto err;
2956 }
2957 if (check_binlog_magic(log,errmsg))
2958 goto err;
2959 DBUG_RETURN(file);
2960
2961 err:
2962 if (file >= 0)
2963 {
2964 mysql_file_close(file, MYF(0));
2965 end_io_cache(log);
2966 }
2967 DBUG_RETURN(-1);
2968 }
2969
is_transaction_empty(THD * thd)2970 bool is_transaction_empty(THD *thd)
2971 {
2972 DBUG_ENTER("is_transaction_empty");
2973 int rw_ha_count= check_trx_rw_engines(thd, Transaction_ctx::SESSION);
2974 rw_ha_count+= check_trx_rw_engines(thd, Transaction_ctx::STMT);
2975 DBUG_RETURN(rw_ha_count == 0);
2976 }
2977
check_trx_rw_engines(THD * thd,Transaction_ctx::enum_trx_scope trx_scope)2978 int check_trx_rw_engines(THD *thd, Transaction_ctx::enum_trx_scope trx_scope)
2979 {
2980 DBUG_ENTER("check_trx_rw_engines");
2981
2982 int rw_ha_count= 0;
2983 Ha_trx_info *ha_list=
2984 (Ha_trx_info *)thd->get_transaction()->ha_trx_info(trx_scope);
2985
2986 for (Ha_trx_info *ha_info= ha_list; ha_info; ha_info= ha_info->next()) {
2987 if (ha_info->is_trx_read_write())
2988 ++rw_ha_count;
2989 }
2990 DBUG_RETURN(rw_ha_count);
2991 }
2992
is_empty_transaction_in_binlog_cache(const THD * thd)2993 bool is_empty_transaction_in_binlog_cache(const THD* thd)
2994 {
2995 DBUG_ENTER("is_empty_transaction_in_binlog_cache");
2996
2997 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
2998 if (cache_mngr != NULL && cache_mngr->has_empty_transaction())
2999 {
3000 DBUG_RETURN(true);
3001 }
3002
3003 DBUG_RETURN(false);
3004 }
3005
3006
3007 /**
3008 This function checks if a transactional table was updated by the
3009 current transaction.
3010
3011 @param thd The client thread that executed the current statement.
3012 @return
3013 @c true if a transactional table was updated, @c false otherwise.
3014 */
3015 bool
trans_has_updated_trans_table(const THD * thd)3016 trans_has_updated_trans_table(const THD* thd)
3017 {
3018 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
3019
3020 return (cache_mngr ? !cache_mngr->trx_cache.is_binlog_empty() : 0);
3021 }
3022
3023 /**
3024 This function checks if a transactional table was updated by the
3025 current statement.
3026
3027 @param ha_list Registered storage engine handler list.
3028 @return
3029 @c true if a transactional table was updated, @c false otherwise.
3030 */
3031 bool
stmt_has_updated_trans_table(Ha_trx_info * ha_list)3032 stmt_has_updated_trans_table(Ha_trx_info* ha_list)
3033 {
3034 const Ha_trx_info *ha_info;
3035 for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
3036 {
3037 if (ha_info->is_trx_read_write() && ha_info->ht() != binlog_hton)
3038 return (TRUE);
3039 }
3040 return (FALSE);
3041 }
3042
3043 bool
trans_has_noop_dml(Ha_trx_info * ha_list)3044 trans_has_noop_dml(Ha_trx_info* ha_list)
3045 {
3046 const Ha_trx_info *ha_info;
3047 for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
3048 {
3049 if (ha_info->is_trx_noop_read_write())
3050 return (TRUE);
3051 }
3052 return (FALSE);
3053 }
3054
3055 /**
3056 This function checks if a transaction, either a multi-statement
3057 or a single statement transaction is about to commit or not.
3058
3059 @param thd The client thread that executed the current statement.
3060 @param all Committing a transaction (i.e. TRUE) or a statement
3061 (i.e. FALSE).
3062 @return
3063 @c true if committing a transaction, otherwise @c false.
3064 */
ending_trans(THD * thd,const bool all)3065 bool ending_trans(THD* thd, const bool all)
3066 {
3067 return (all || ending_single_stmt_trans(thd, all));
3068 }
3069
3070 /**
3071 This function checks if a single statement transaction is about
3072 to commit or not.
3073
3074 @param thd The client thread that executed the current statement.
3075 @param all Committing a transaction (i.e. TRUE) or a statement
3076 (i.e. FALSE).
3077 @return
3078 @c true if committing a single statement transaction, otherwise
3079 @c false.
3080 */
ending_single_stmt_trans(THD * thd,const bool all)3081 bool ending_single_stmt_trans(THD* thd, const bool all)
3082 {
3083 return (!all && !thd->in_multi_stmt_transaction_mode());
3084 }
3085
3086 /**
3087 This function checks if a transaction cannot be rolled back safely.
3088
3089 @param thd The client thread that executed the current statement.
3090 @return
3091 @c true if cannot be safely rolled back, @c false otherwise.
3092 */
trans_cannot_safely_rollback(const THD * thd)3093 bool trans_cannot_safely_rollback(const THD* thd)
3094 {
3095 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
3096
3097 return cache_mngr->trx_cache.cannot_rollback();
3098 }
3099
3100 /**
3101 This function checks if current statement cannot be rollded back safely.
3102
3103 @param thd The client thread that executed the current statement.
3104 @return
3105 @c true if cannot be safely rolled back, @c false otherwise.
3106 */
stmt_cannot_safely_rollback(const THD * thd)3107 bool stmt_cannot_safely_rollback(const THD* thd)
3108 {
3109 return thd->get_transaction()->cannot_safely_rollback(Transaction_ctx::STMT);
3110 }
3111
3112 #ifndef EMBEDDED_LIBRARY
3113 /**
3114 Execute a PURGE BINARY LOGS TO <log> command.
3115
3116 @param thd Pointer to THD object for the client thread executing the
3117 statement.
3118
3119 @param to_log Name of the last log to purge.
3120
3121 @retval FALSE success
3122 @retval TRUE failure
3123 */
purge_master_logs(THD * thd,const char * to_log)3124 bool purge_master_logs(THD* thd, const char* to_log)
3125 {
3126 char search_file_name[FN_REFLEN];
3127 if (!mysql_bin_log.is_open())
3128 {
3129 my_ok(thd);
3130 return FALSE;
3131 }
3132
3133 mysql_bin_log.make_log_name(search_file_name, to_log);
3134 return purge_error_message(thd,
3135 mysql_bin_log.purge_logs(search_file_name, false,
3136 true/*need_lock_index=true*/,
3137 true/*need_update_threads=true*/,
3138 NULL, false));
3139 }
3140
3141
3142 /**
3143 Execute a PURGE BINARY LOGS BEFORE <date> command.
3144
3145 @param thd Pointer to THD object for the client thread executing the
3146 statement.
3147
3148 @param purge_time Date before which logs should be purged.
3149
3150 @retval FALSE success
3151 @retval TRUE failure
3152 */
purge_master_logs_before_date(THD * thd,time_t purge_time)3153 bool purge_master_logs_before_date(THD* thd, time_t purge_time)
3154 {
3155 if (!mysql_bin_log.is_open())
3156 {
3157 my_ok(thd);
3158 return 0;
3159 }
3160 return purge_error_message(thd,
3161 mysql_bin_log.purge_logs_before_date(purge_time,
3162 false));
3163 }
3164 #endif /* EMBEDDED_LIBRARY */
3165
3166 /*
3167 Helper function to get the error code of the query to be binlogged.
3168 */
query_error_code(THD * thd,bool not_killed)3169 int query_error_code(THD *thd, bool not_killed)
3170 {
3171 int error;
3172
3173 if (not_killed || (thd->killed == THD::KILL_BAD_DATA))
3174 {
3175 error= thd->is_error() ? thd->get_stmt_da()->mysql_errno() : 0;
3176
3177 /* thd->get_stmt_da()->sql_errno() might be ER_SERVER_SHUTDOWN or
3178 ER_QUERY_INTERRUPTED, So here we need to make sure that error
3179 is not set to these errors when specified not_killed by the
3180 caller.
3181 */
3182 if (error == ER_SERVER_SHUTDOWN || error == ER_QUERY_INTERRUPTED)
3183 error= 0;
3184 }
3185 else
3186 error= thd->killed_errno();
3187
3188 return error;
3189 }
3190
3191
3192 /**
3193 Copy content of 'from' file from offset to 'to' file.
3194
3195 - We do the copy outside of the IO_CACHE as the cache
3196 buffers would just make things slower and more complicated.
3197 In most cases the copy loop should only do one read.
3198
3199 @param from File to copy.
3200 @param to File to copy to.
3201 @param offset Offset in 'from' file.
3202
3203
3204 @retval
3205 0 ok
3206 @retval
3207 -1 error
3208 */
copy_file(IO_CACHE * from,IO_CACHE * to,my_off_t offset)3209 static bool copy_file(IO_CACHE *from, IO_CACHE *to, my_off_t offset)
3210 {
3211 int bytes_read;
3212 uchar io_buf[IO_SIZE*2];
3213 DBUG_ENTER("copy_file");
3214
3215 mysql_file_seek(from->file, offset, MY_SEEK_SET, MYF(0));
3216 while(TRUE)
3217 {
3218 if ((bytes_read= (int) mysql_file_read(from->file, io_buf, sizeof(io_buf),
3219 MYF(MY_WME)))
3220 < 0)
3221 goto err;
3222 if (DBUG_EVALUATE_IF("fault_injection_copy_part_file", 1, 0))
3223 bytes_read= bytes_read/2;
3224 if (!bytes_read)
3225 break; // end of file
3226 if (mysql_file_write(to->file, io_buf, bytes_read, MYF(MY_WME | MY_NABP)))
3227 goto err;
3228 }
3229
3230 DBUG_RETURN(0);
3231
3232 err:
3233 DBUG_RETURN(1);
3234 }
3235
3236
3237 #ifdef HAVE_REPLICATION
3238 /**
3239 Load data's io cache specific hook to be executed
3240 before a chunk of data is being read into the cache's buffer
3241 The fuction instantianates and writes into the binlog
3242 replication events along LOAD DATA processing.
3243
3244 @param file pointer to io-cache
3245 @retval 0 success
3246 @retval 1 failure
3247 */
log_loaded_block(IO_CACHE * file)3248 int log_loaded_block(IO_CACHE* file)
3249 {
3250 DBUG_ENTER("log_loaded_block");
3251 LOAD_FILE_INFO *lf_info;
3252 uint block_len;
3253 /* buffer contains position where we started last read */
3254 uchar* buffer= (uchar*) my_b_get_buffer_start(file);
3255 uint max_event_size= current_thd->variables.max_allowed_packet;
3256 lf_info= (LOAD_FILE_INFO*) file->arg;
3257 if (lf_info->thd->is_current_stmt_binlog_format_row())
3258 DBUG_RETURN(0);
3259 if (lf_info->last_pos_in_file != HA_POS_ERROR &&
3260 lf_info->last_pos_in_file >= my_b_get_pos_in_file(file))
3261 DBUG_RETURN(0);
3262
3263 for (block_len= (uint) (my_b_get_bytes_in_buffer(file)); block_len > 0;
3264 buffer += min(block_len, max_event_size),
3265 block_len -= min(block_len, max_event_size))
3266 {
3267 lf_info->last_pos_in_file= my_b_get_pos_in_file(file);
3268 if (lf_info->wrote_create_file)
3269 {
3270 Append_block_log_event a(lf_info->thd, lf_info->thd->db().str, buffer,
3271 min(block_len, max_event_size),
3272 lf_info->log_delayed);
3273 if (mysql_bin_log.write_event(&a))
3274 DBUG_RETURN(1);
3275 }
3276 else
3277 {
3278 Begin_load_query_log_event b(lf_info->thd, lf_info->thd->db().str,
3279 buffer,
3280 min(block_len, max_event_size),
3281 lf_info->log_delayed);
3282 if (mysql_bin_log.write_event(&b))
3283 DBUG_RETURN(1);
3284 lf_info->wrote_create_file= 1;
3285 }
3286 }
3287 DBUG_RETURN(0);
3288 }
3289
3290 /* Helper function for SHOW BINLOG/RELAYLOG EVENTS */
show_binlog_events(THD * thd,MYSQL_BIN_LOG * binary_log)3291 bool show_binlog_events(THD *thd, MYSQL_BIN_LOG *binary_log)
3292 {
3293 Protocol *protocol= thd->get_protocol();
3294 List<Item> field_list;
3295 const char *errmsg = 0;
3296 bool ret = TRUE;
3297 IO_CACHE log;
3298 File file = -1;
3299 int old_max_allowed_packet= thd->variables.max_allowed_packet;
3300 LOG_INFO linfo;
3301
3302 DBUG_ENTER("show_binlog_events");
3303
3304 assert(thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS ||
3305 thd->lex->sql_command == SQLCOM_SHOW_RELAYLOG_EVENTS);
3306
3307 Format_description_log_event *description_event= new
3308 Format_description_log_event(3); /* MySQL 4.0 by default */
3309
3310 if (binary_log->is_open())
3311 {
3312 LEX_MASTER_INFO *lex_mi= &thd->lex->mi;
3313 SELECT_LEX_UNIT *unit= thd->lex->unit;
3314 ha_rows event_count, limit_start, limit_end;
3315 my_off_t pos = max<my_off_t>(BIN_LOG_HEADER_SIZE, lex_mi->pos); // user-friendly
3316 char search_file_name[FN_REFLEN], *name;
3317 const char *log_file_name = lex_mi->log_file_name;
3318 mysql_mutex_t *log_lock = binary_log->get_log_lock();
3319 Log_event* ev;
3320
3321 unit->set_limit(thd->lex->current_select());
3322 limit_start= unit->offset_limit_cnt;
3323 limit_end= unit->select_limit_cnt;
3324
3325 name= search_file_name;
3326 if (log_file_name)
3327 binary_log->make_log_name(search_file_name, log_file_name);
3328 else
3329 name=0; // Find first log
3330
3331 linfo.index_file_offset = 0;
3332
3333 if (binary_log->find_log_pos(&linfo, name, true/*need_lock_index=true*/))
3334 {
3335 errmsg = "Could not find target log";
3336 goto err;
3337 }
3338
3339 mysql_mutex_lock(&thd->LOCK_thd_data);
3340 thd->current_linfo = &linfo;
3341 mysql_mutex_unlock(&thd->LOCK_thd_data);
3342
3343 if ((file=open_binlog_file(&log, linfo.log_file_name, &errmsg)) < 0)
3344 goto err;
3345
3346 my_off_t end_pos;
3347 /*
3348 Acquire LOCK_log only for the duration to calculate the
3349 log's end position. LOCK_log should be acquired even while
3350 we are checking whether the log is active log or not.
3351 */
3352 mysql_mutex_lock(log_lock);
3353 if (binary_log->is_active(linfo.log_file_name))
3354 {
3355 LOG_INFO li;
3356 binary_log->get_current_log(&li, false /*LOCK_log is already acquired*/);
3357 end_pos= li.pos;
3358 }
3359 else
3360 {
3361 end_pos= my_b_filelength(&log);
3362 }
3363 mysql_mutex_unlock(log_lock);
3364
3365 /*
3366 to account binlog event header size
3367 */
3368 thd->variables.max_allowed_packet += MAX_LOG_EVENT_HEADER;
3369
3370 DEBUG_SYNC(thd, "after_show_binlog_event_found_file");
3371
3372 /*
3373 open_binlog_file() sought to position 4.
3374 Read the first event in case it's a Format_description_log_event, to
3375 know the format. If there's no such event, we are 3.23 or 4.x. This
3376 code, like before, can't read 3.23 binlogs.
3377 This code will fail on a mixed relay log (one which has Format_desc then
3378 Rotate then Format_desc).
3379 */
3380 ev= Log_event::read_log_event(&log, (mysql_mutex_t*)0, description_event,
3381 opt_master_verify_checksum);
3382 if (ev)
3383 {
3384 if (ev->get_type_code() == binary_log::FORMAT_DESCRIPTION_EVENT)
3385 {
3386 delete description_event;
3387 description_event= (Format_description_log_event*) ev;
3388 }
3389 else
3390 delete ev;
3391 }
3392
3393 my_b_seek(&log, pos);
3394
3395 if (!description_event->is_valid())
3396 {
3397 errmsg="Invalid Format_description event; could be out of memory";
3398 goto err;
3399 }
3400
3401 for (event_count = 0;
3402 (ev = Log_event::read_log_event(&log, (mysql_mutex_t*) 0,
3403 description_event,
3404 opt_master_verify_checksum)); )
3405 {
3406 DEBUG_SYNC(thd, "wait_in_show_binlog_events_loop");
3407 if (ev->get_type_code() == binary_log::FORMAT_DESCRIPTION_EVENT)
3408 description_event->common_footer->checksum_alg=
3409 ev->common_footer->checksum_alg;
3410 if (event_count >= limit_start &&
3411 ev->net_send(protocol, linfo.log_file_name, pos))
3412 {
3413 errmsg = "Net error";
3414 delete ev;
3415 goto err;
3416 }
3417
3418 pos = my_b_tell(&log);
3419 delete ev;
3420
3421 if (++event_count >= limit_end || pos >= end_pos)
3422 break;
3423 }
3424
3425 if (event_count < limit_end && log.error)
3426 {
3427 errmsg = "Wrong offset or I/O error";
3428 goto err;
3429 }
3430
3431 }
3432 // Check that linfo is still on the function scope.
3433 DEBUG_SYNC(thd, "after_show_binlog_events");
3434
3435 ret= FALSE;
3436
3437 err:
3438 delete description_event;
3439 if (file >= 0)
3440 {
3441 end_io_cache(&log);
3442 mysql_file_close(file, MYF(MY_WME));
3443 }
3444
3445 if (errmsg)
3446 {
3447 if(thd->lex->sql_command == SQLCOM_SHOW_RELAYLOG_EVENTS)
3448 my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0),
3449 "SHOW RELAYLOG EVENTS", errmsg);
3450 else
3451 my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0),
3452 "SHOW BINLOG EVENTS", errmsg);
3453 }
3454 else
3455 my_eof(thd);
3456
3457 mysql_mutex_lock(&thd->LOCK_thd_data);
3458 thd->current_linfo = 0;
3459 mysql_mutex_unlock(&thd->LOCK_thd_data);
3460 thd->variables.max_allowed_packet= old_max_allowed_packet;
3461 DBUG_RETURN(ret);
3462 }
3463
3464 /**
3465 Execute a SHOW BINLOG EVENTS statement.
3466
3467 @param thd Pointer to THD object for the client thread executing the
3468 statement.
3469
3470 @retval FALSE success
3471 @retval TRUE failure
3472 */
mysql_show_binlog_events(THD * thd)3473 bool mysql_show_binlog_events(THD* thd)
3474 {
3475 List<Item> field_list;
3476 DBUG_ENTER("mysql_show_binlog_events");
3477
3478 assert(thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS);
3479
3480 Log_event::init_show_field_list(&field_list);
3481 if (thd->send_result_metadata(&field_list,
3482 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
3483 DBUG_RETURN(TRUE);
3484
3485 /*
3486 Wait for handlers to insert any pending information
3487 into the binlog. For e.g. ndb which updates the binlog asynchronously
3488 this is needed so that the uses sees all its own commands in the binlog
3489 */
3490 ha_binlog_wait(thd);
3491
3492 DBUG_RETURN(show_binlog_events(thd, &mysql_bin_log));
3493 }
3494
3495 #endif /* HAVE_REPLICATION */
3496
3497
MYSQL_BIN_LOG(uint * sync_period,enum cache_type io_cache_type_arg)3498 MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period,
3499 enum cache_type io_cache_type_arg)
3500 :name(NULL), write_error(false), inited(false),
3501 io_cache_type(io_cache_type_arg),
3502 #ifdef HAVE_PSI_INTERFACE
3503 m_key_LOCK_log(key_LOG_LOCK_log),
3504 #endif
3505 bytes_written(0), file_id(1), open_count(1),
3506 sync_period_ptr(sync_period), sync_counter(0),
3507 is_relay_log(0), signal_cnt(0),
3508 checksum_alg_reset(binary_log::BINLOG_CHECKSUM_ALG_UNDEF),
3509 relay_log_checksum_alg(binary_log::BINLOG_CHECKSUM_ALG_UNDEF),
3510 previous_gtid_set_relaylog(0), is_rotating_caused_by_incident(false)
3511 {
3512 log_state.atomic_set(LOG_CLOSED);
3513 /*
3514 We don't want to initialize locks here as such initialization depends on
3515 safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
3516 called only in main(). Doing initialization here would make it happen
3517 before main().
3518 */
3519 m_prep_xids.atomic_set(0);
3520 memset(&log_file, 0, sizeof(log_file));
3521 index_file_name[0] = 0;
3522 memset(&index_file, 0, sizeof(index_file));
3523 memset(&purge_index_file, 0, sizeof(purge_index_file));
3524 memset(&crash_safe_index_file, 0, sizeof(crash_safe_index_file));
3525 }
3526
3527
3528 /* this is called only once */
3529
cleanup()3530 void MYSQL_BIN_LOG::cleanup()
3531 {
3532 DBUG_ENTER("cleanup");
3533 if (inited)
3534 {
3535 inited= 0;
3536 close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT, true /*need_lock_log=true*/,
3537 true /*need_lock_index=true*/);
3538 mysql_mutex_destroy(&LOCK_log);
3539 mysql_mutex_destroy(&LOCK_index);
3540 mysql_mutex_destroy(&LOCK_commit);
3541 mysql_mutex_destroy(&LOCK_sync);
3542 mysql_mutex_destroy(&LOCK_binlog_end_pos);
3543 mysql_mutex_destroy(&LOCK_xids);
3544 mysql_cond_destroy(&update_cond);
3545 mysql_cond_destroy(&m_prep_xids_cond);
3546 stage_manager.deinit();
3547 }
3548 DBUG_VOID_RETURN;
3549 }
3550
3551
init_pthread_objects()3552 void MYSQL_BIN_LOG::init_pthread_objects()
3553 {
3554 assert(inited == 0);
3555 inited= 1;
3556 mysql_mutex_init(m_key_LOCK_log, &LOCK_log, MY_MUTEX_INIT_SLOW);
3557 mysql_mutex_init(m_key_LOCK_index, &LOCK_index, MY_MUTEX_INIT_SLOW);
3558 mysql_mutex_init(m_key_LOCK_commit, &LOCK_commit, MY_MUTEX_INIT_FAST);
3559 mysql_mutex_init(m_key_LOCK_sync, &LOCK_sync, MY_MUTEX_INIT_FAST);
3560 mysql_mutex_init(m_key_LOCK_binlog_end_pos, &LOCK_binlog_end_pos,
3561 MY_MUTEX_INIT_FAST);
3562 mysql_mutex_init(m_key_LOCK_xids, &LOCK_xids, MY_MUTEX_INIT_FAST);
3563 mysql_cond_init(m_key_update_cond, &update_cond);
3564 mysql_cond_init(m_key_prep_xids_cond, &m_prep_xids_cond);
3565 stage_manager.init(
3566 #ifdef HAVE_PSI_INTERFACE
3567 m_key_LOCK_flush_queue,
3568 m_key_LOCK_sync_queue,
3569 m_key_LOCK_commit_queue,
3570 m_key_LOCK_done, m_key_COND_done
3571 #endif
3572 );
3573 }
3574
3575
3576 /**
3577 Check if a string is a valid number.
3578
3579 @param str String to test
3580 @param res Store value here
3581 @param allow_wildcards Set to 1 if we should ignore '%' and '_'
3582
3583 @note
3584 For the moment the allow_wildcards argument is not used
3585 Should be moved to some other file.
3586
3587 @retval
3588 1 String is a number
3589 @retval
3590 0 String is not a number
3591 */
3592
is_number(const char * str,ulong * res,bool allow_wildcards)3593 static bool is_number(const char *str,
3594 ulong *res, bool allow_wildcards)
3595 {
3596 int flag;
3597 const char *start;
3598 DBUG_ENTER("is_number");
3599
3600 flag=0; start=str;
3601 while (*str++ == ' ') ;
3602 if (*--str == '-' || *str == '+')
3603 str++;
3604 while (my_isdigit(files_charset_info,*str) ||
3605 (allow_wildcards && (*str == wild_many || *str == wild_one)))
3606 {
3607 flag=1;
3608 str++;
3609 }
3610 if (*str == '.')
3611 {
3612 for (str++ ;
3613 my_isdigit(files_charset_info,*str) ||
3614 (allow_wildcards && (*str == wild_many || *str == wild_one)) ;
3615 str++, flag=1) ;
3616 }
3617 if (*str != 0 || flag == 0)
3618 DBUG_RETURN(0);
3619 if (res)
3620 *res=atol(start);
3621 DBUG_RETURN(1); /* Number ok */
3622 } /* is_number */
3623
3624
3625 /*
3626 Maximum unique log filename extension.
3627 Note: setting to 0x7FFFFFFF due to atol windows
3628 overflow/truncate.
3629 */
3630 #define MAX_LOG_UNIQUE_FN_EXT 0x7FFFFFFF
3631
3632 /*
3633 Number of warnings that will be printed to error log
3634 before extension number is exhausted.
3635 */
3636 #define LOG_WARN_UNIQUE_FN_EXT_LEFT 1000
3637
3638 /**
3639 Find a unique filename for 'filename.#'.
3640
3641 Set '#' to the highest existing log file extension plus one.
3642
3643 This function will return nonzero if: (i) the generated name
3644 exceeds FN_REFLEN; (ii) if the number of extensions is exhausted;
3645 or (iii) some other error happened while examining the filesystem.
3646
3647 @return
3648 nonzero if not possible to get unique filename.
3649 */
3650
find_uniq_filename(char * name)3651 static int find_uniq_filename(char *name)
3652 {
3653 uint i;
3654 char buff[FN_REFLEN], ext_buf[FN_REFLEN];
3655 struct st_my_dir *dir_info;
3656 struct fileinfo *file_info;
3657 ulong max_found= 0, next= 0, number= 0;
3658 size_t buf_length, length;
3659 char *start, *end;
3660 int error= 0;
3661 DBUG_ENTER("find_uniq_filename");
3662
3663 length= dirname_part(buff, name, &buf_length);
3664 start= name + length;
3665 end= strend(start);
3666
3667 *end='.';
3668 length= (size_t) (end - start + 1);
3669
3670 if ((DBUG_EVALUATE_IF("error_unique_log_filename", 1,
3671 !(dir_info= my_dir(buff,MYF(MY_DONT_SORT))))))
3672 { // This shouldn't happen
3673 my_stpcpy(end,".1"); // use name+1
3674 DBUG_RETURN(1);
3675 }
3676 file_info= dir_info->dir_entry;
3677 for (i= dir_info->number_off_files ; i-- ; file_info++)
3678 {
3679 if (strncmp(file_info->name, start, length) == 0 &&
3680 is_number(file_info->name+length, &number,0))
3681 {
3682 set_if_bigger(max_found, number);
3683 }
3684 }
3685 my_dirend(dir_info);
3686
3687 /* check if reached the maximum possible extension number */
3688 if (max_found == MAX_LOG_UNIQUE_FN_EXT)
3689 {
3690 sql_print_error("Log filename extension number exhausted: %06lu. \
3691 Please fix this by archiving old logs and \
3692 updating the index files.", max_found);
3693 error= 1;
3694 goto end;
3695 }
3696
3697 next= max_found + 1;
3698 if (sprintf(ext_buf, "%06lu", next)<0)
3699 {
3700 error= 1;
3701 goto end;
3702 }
3703 *end++='.';
3704
3705 /*
3706 Check if the generated extension size + the file name exceeds the
3707 buffer size used. If one did not check this, then the filename might be
3708 truncated, resulting in error.
3709 */
3710 if (((strlen(ext_buf) + (end - name)) >= FN_REFLEN))
3711 {
3712 sql_print_error("Log filename too large: %s%s (%zu). \
3713 Please fix this by archiving old logs and updating the \
3714 index files.", name, ext_buf, (strlen(ext_buf) + (end - name)));
3715 error= 1;
3716 goto end;
3717 }
3718
3719 if (sprintf(end, "%06lu", next)<0)
3720 {
3721 error= 1;
3722 goto end;
3723 }
3724
3725 /* print warning if reaching the end of available extensions. */
3726 if ((next > (MAX_LOG_UNIQUE_FN_EXT - LOG_WARN_UNIQUE_FN_EXT_LEFT)))
3727 sql_print_warning("Next log extension: %lu. \
3728 Remaining log filename extensions: %lu. \
3729 Please consider archiving some logs.", next, (MAX_LOG_UNIQUE_FN_EXT - next));
3730
3731 end:
3732 DBUG_RETURN(error);
3733 }
3734
3735
generate_new_name(char * new_name,const char * log_name)3736 int MYSQL_BIN_LOG::generate_new_name(char *new_name, const char *log_name)
3737 {
3738 fn_format(new_name, log_name, mysql_data_home, "", 4);
3739 if (!fn_ext(log_name)[0])
3740 {
3741 if (find_uniq_filename(new_name))
3742 {
3743 my_printf_error(ER_NO_UNIQUE_LOGFILE, ER(ER_NO_UNIQUE_LOGFILE),
3744 MYF(ME_FATALERROR), log_name);
3745 sql_print_error(ER(ER_NO_UNIQUE_LOGFILE), log_name);
3746 return 1;
3747 }
3748 }
3749 return 0;
3750 }
3751
3752
3753 /**
3754 @todo
3755 The following should be using fn_format(); We just need to
3756 first change fn_format() to cut the file name if it's too long.
3757 */
generate_name(const char * log_name,const char * suffix,char * buff)3758 const char *MYSQL_BIN_LOG::generate_name(const char *log_name,
3759 const char *suffix,
3760 char *buff)
3761 {
3762 if (!log_name || !log_name[0])
3763 {
3764 strmake(buff, default_logfile_name, FN_REFLEN - strlen(suffix) - 1);
3765 return (const char *)
3766 fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR));
3767 }
3768 // get rid of extension to avoid problems
3769
3770 char *p= fn_ext(log_name);
3771 uint length= (uint) (p - log_name);
3772 strmake(buff, log_name, min<size_t>(length, FN_REFLEN-1));
3773 return (const char*)buff;
3774 }
3775
3776
init_and_set_log_file_name(const char * log_name,const char * new_name)3777 bool MYSQL_BIN_LOG::init_and_set_log_file_name(const char *log_name,
3778 const char *new_name)
3779 {
3780 if (new_name && !my_stpcpy(log_file_name, new_name))
3781 return TRUE;
3782 else if (!new_name && generate_new_name(log_file_name, log_name))
3783 return TRUE;
3784
3785 return FALSE;
3786 }
3787
3788
3789 /**
3790 Open the logfile and init IO_CACHE.
3791
3792 @param log_name The name of the log to open
3793 @param new_name The new name for the logfile.
3794 NULL forces generate_new_name() to be called.
3795
3796 @return true if error, false otherwise.
3797 */
3798
open(PSI_file_key log_file_key,const char * log_name,const char * new_name)3799 bool MYSQL_BIN_LOG::open(
3800 #ifdef HAVE_PSI_INTERFACE
3801 PSI_file_key log_file_key,
3802 #endif
3803 const char *log_name,
3804 const char *new_name)
3805 {
3806 File file= -1;
3807 my_off_t pos= 0;
3808 int open_flags= O_CREAT | O_BINARY;
3809 DBUG_ENTER("MYSQL_BIN_LOG::open");
3810
3811 write_error= 0;
3812
3813 if (!(name= my_strdup(key_memory_MYSQL_LOG_name,
3814 log_name, MYF(MY_WME))))
3815 {
3816 name= (char *)log_name; // for the error message
3817 goto err;
3818 }
3819
3820 if (init_and_set_log_file_name(name, new_name) ||
3821 DBUG_EVALUATE_IF("fault_injection_init_name", 1, 0))
3822 goto err;
3823
3824 if (io_cache_type == SEQ_READ_APPEND)
3825 open_flags |= O_RDWR | O_APPEND;
3826 else
3827 open_flags |= O_WRONLY;
3828
3829 db[0]= 0;
3830
3831 #ifdef HAVE_PSI_INTERFACE
3832 /* Keep the key for reopen */
3833 m_log_file_key= log_file_key;
3834 #endif
3835
3836 if ((file= mysql_file_open(log_file_key,
3837 log_file_name, open_flags,
3838 MYF(MY_WME))) < 0)
3839 goto err;
3840
3841 if ((pos= mysql_file_tell(file, MYF(MY_WME))) == MY_FILEPOS_ERROR)
3842 {
3843 if (my_errno() == ESPIPE)
3844 pos= 0;
3845 else
3846 goto err;
3847 }
3848
3849 if (init_io_cache(&log_file, file, IO_SIZE, io_cache_type, pos, 0,
3850 MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
3851 goto err;
3852
3853 log_state.atomic_set(LOG_OPENED);
3854 DBUG_RETURN(0);
3855
3856 err:
3857 if (binlog_error_action == ABORT_SERVER)
3858 {
3859 exec_binlog_error_action_abort("Either disk is full or file system is read "
3860 "only while opening the binlog. Aborting the"
3861 " server.");
3862 }
3863 else
3864 sql_print_error("Could not open %s for logging (error %d). "
3865 "Turning logging off for the whole duration "
3866 "of the MySQL server process. To turn it on "
3867 "again: fix the cause, shutdown the MySQL "
3868 "server and restart it.",
3869 name, errno);
3870 if (file >= 0)
3871 mysql_file_close(file, MYF(0));
3872 end_io_cache(&log_file);
3873 my_free(name);
3874 name= NULL;
3875 log_state.atomic_set(LOG_CLOSED);
3876 DBUG_RETURN(1);
3877 }
3878
3879
open_index_file(const char * index_file_name_arg,const char * log_name,bool need_lock_index)3880 bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
3881 const char *log_name, bool need_lock_index)
3882 {
3883 bool error= false;
3884 File index_file_nr= -1;
3885 if (need_lock_index)
3886 mysql_mutex_lock(&LOCK_index);
3887 else
3888 mysql_mutex_assert_owner(&LOCK_index);
3889
3890 /*
3891 First open of this class instance
3892 Create an index file that will hold all file names uses for logging.
3893 Add new entries to the end of it.
3894 */
3895 myf opt= MY_UNPACK_FILENAME;
3896
3897 if (my_b_inited(&index_file))
3898 goto end;
3899
3900 if (!index_file_name_arg)
3901 {
3902 index_file_name_arg= log_name; // Use same basename for index file
3903 opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
3904 }
3905 fn_format(index_file_name, index_file_name_arg, mysql_data_home,
3906 ".index", opt);
3907
3908 if (set_crash_safe_index_file_name(index_file_name_arg))
3909 {
3910 sql_print_error("MYSQL_BIN_LOG::set_crash_safe_index_file_name failed.");
3911 error= true;
3912 goto end;
3913 }
3914
3915 /*
3916 We need move crash_safe_index_file to index_file if the index_file
3917 does not exist and crash_safe_index_file exists when mysqld server
3918 restarts.
3919 */
3920 if (my_access(index_file_name, F_OK) &&
3921 !my_access(crash_safe_index_file_name, F_OK) &&
3922 my_rename(crash_safe_index_file_name, index_file_name, MYF(MY_WME)))
3923 {
3924 sql_print_error("MYSQL_BIN_LOG::open_index_file failed to "
3925 "move crash_safe_index_file to index file.");
3926 error= true;
3927 goto end;
3928 }
3929
3930 if ((index_file_nr= mysql_file_open(m_key_file_log_index,
3931 index_file_name,
3932 O_RDWR | O_CREAT | O_BINARY,
3933 MYF(MY_WME))) < 0 ||
3934 mysql_file_sync(index_file_nr, MYF(MY_WME)) ||
3935 init_io_cache_ext(&index_file, index_file_nr,
3936 IO_SIZE, READ_CACHE,
3937 mysql_file_seek(index_file_nr, 0L, MY_SEEK_END, MYF(0)),
3938 0, MYF(MY_WME | MY_WAIT_IF_FULL),
3939 m_key_file_log_index_cache) ||
3940 DBUG_EVALUATE_IF("fault_injection_openning_index", 1, 0))
3941 {
3942 /*
3943 TODO: all operations creating/deleting the index file or a log, should
3944 call my_sync_dir() or my_sync_dir_by_file() to be durable.
3945 TODO: file creation should be done with mysql_file_create()
3946 not mysql_file_open().
3947 */
3948 if (index_file_nr >= 0)
3949 mysql_file_close(index_file_nr, MYF(0));
3950 error= true;
3951 goto end;
3952 }
3953
3954 #ifdef HAVE_REPLICATION
3955 /*
3956 Sync the index by purging any binary log file that is not registered.
3957 In other words, either purge binary log files that were removed from
3958 the index but not purged from the file system due to a crash or purge
3959 any binary log file that was created but not register in the index
3960 due to a crash.
3961 */
3962
3963 if (set_purge_index_file_name(index_file_name_arg) ||
3964 open_purge_index_file(FALSE) ||
3965 purge_index_entry(NULL, NULL, false) ||
3966 close_purge_index_file() ||
3967 DBUG_EVALUATE_IF("fault_injection_recovering_index", 1, 0))
3968 {
3969 sql_print_error("MYSQL_BIN_LOG::open_index_file failed to sync the index "
3970 "file.");
3971 error= true;
3972 goto end;
3973 }
3974 #endif
3975
3976 end:
3977 if (need_lock_index)
3978 mysql_mutex_unlock(&LOCK_index);
3979 return error;
3980 }
3981
3982 /**
3983 Add the GTIDs from the given relaylog file and also
3984 update the IO thread transaction parser.
3985
3986 @param filename Relaylog file to read from.
3987 @param retrieved_set Gtid_set to store the GTIDs found on the relaylog file.
3988 @param verify_checksum Set to true to verify event checksums.
3989 @param trx_parser The transaction boundary parser to be used in order to
3990 only add a GTID to the gtid_set after ensuring the transaction is fully
3991 stored on the relay log.
3992 @param gtid_partial_trx The gtid of the last incomplete transaction
3993 found in the relay log.
3994
3995 @retval false The file was successfully read and all GTIDs from
3996 Previous_gtids and Gtid_log_event from complete transactions were added to
3997 the retrieved_set.
3998 @retval true There was an error during the procedure.
3999 */
4000 static bool
read_gtids_and_update_trx_parser_from_relaylog(const char * filename,Gtid_set * retrieved_gtids,bool verify_checksum,Transaction_boundary_parser * trx_parser,Gtid * gtid_partial_trx)4001 read_gtids_and_update_trx_parser_from_relaylog(
4002 const char *filename,
4003 Gtid_set *retrieved_gtids,
4004 bool verify_checksum,
4005 Transaction_boundary_parser *trx_parser,
4006 Gtid *gtid_partial_trx)
4007 {
4008 DBUG_ENTER("read_gtids_and_update_trx_parser_from_relaylog");
4009 DBUG_PRINT("info", ("Opening file %s", filename));
4010
4011 assert(retrieved_gtids != NULL);
4012 assert(trx_parser != NULL);
4013 #ifndef NDEBUG
4014 unsigned long event_counter= 0;
4015 #endif
4016
4017 /*
4018 Create a Format_description_log_event that is used to read the
4019 first event of the log.
4020 */
4021 Format_description_log_event fd_ev(BINLOG_VERSION), *fd_ev_p= &fd_ev;
4022 if (!fd_ev.is_valid())
4023 DBUG_RETURN(true);
4024
4025 File file;
4026 IO_CACHE log;
4027
4028 const char *errmsg= NULL;
4029 if ((file= open_binlog_file(&log, filename, &errmsg)) < 0)
4030 {
4031 sql_print_error("%s", errmsg);
4032 /*
4033 As read_gtids_from_binlog() will not throw error on truncated
4034 relaylog files, we should do the same here in order to keep the
4035 current behavior.
4036 */
4037 DBUG_RETURN(false);
4038 }
4039
4040 /*
4041 Seek for Previous_gtids_log_event and Gtid_log_event events to
4042 gather information what has been processed so far.
4043 */
4044 my_b_seek(&log, BIN_LOG_HEADER_SIZE);
4045 Log_event *ev= NULL;
4046 bool error= false;
4047 bool seen_prev_gtids= false;
4048 ulong data_len= 0;
4049
4050 while (!error &&
4051 (ev= Log_event::read_log_event(&log, 0, fd_ev_p, verify_checksum)) !=
4052 NULL)
4053 {
4054 DBUG_PRINT("info", ("Read event of type %s", ev->get_type_str()));
4055 #ifndef NDEBUG
4056 event_counter++;
4057 #endif
4058
4059 data_len= uint4korr(ev->temp_buf + EVENT_LEN_OFFSET);
4060 if (trx_parser->feed_event(ev->temp_buf, data_len, fd_ev_p, false))
4061 {
4062 /*
4063 The transaction boundary parser found an error while parsing a
4064 sequence of events from the relaylog. As we don't know if the
4065 parsing has started from a reliable point (it might started in
4066 a relay log file that begins with the rest of a transaction
4067 that started in a previous relay log file), it is better to do
4068 nothing in this case. The boundary parser will fix itself once
4069 finding an event that represent a transaction boundary.
4070
4071 Suppose the following relaylog:
4072
4073 rl-bin.000011 | rl-bin.000012 | rl-bin.000013 | rl-bin-000014
4074 ---------------+---------------+---------------+---------------
4075 PREV_GTIDS | PREV_GTIDS | PREV_GTIDS | PREV_GTIDS
4076 (empty) | (UUID:1-2) | (UUID:1-2) | (UUID:1-2)
4077 ---------------+---------------+---------------+---------------
4078 XID | QUERY(INSERT) | QUERY(INSERT) | XID
4079 ---------------+---------------+---------------+---------------
4080 GTID(UUID:2) |
4081 ---------------+
4082 QUERY(CREATE |
4083 TABLE t1 ...) |
4084 ---------------+
4085 GTID(UUID:3) |
4086 ---------------+
4087 QUERY(BEGIN) |
4088 ---------------+
4089
4090 As it is impossible to determine the current Retrieved_Gtid_Set by only
4091 looking to the PREVIOUS_GTIDS on the last relay log file, and scanning
4092 events on it, we tried to find a relay log file that contains at least
4093 one GTID event during the backwards search.
4094
4095 In the example, we will find a GTID only in rl-bin.000011, as the
4096 UUID:3 transaction was spanned across 4 relay log files.
4097
4098 The transaction spanning can be caused by "FLUSH RELAY LOGS" commands
4099 on slave while it is queuing the transaction.
4100
4101 So, in order to correctly add UUID:3 into Retrieved_Gtid_Set, we need
4102 to parse the relay log starting on the file we found the last GTID
4103 queued to know if the transaction was fully retrieved or not.
4104
4105 Start scanning rl-bin.000011 after resetting the transaction parser
4106 will generate an error, as XID event is only expected inside a DML,
4107 but in this case, we can ignore this error and reset the parser.
4108 */
4109 trx_parser->reset();
4110 /*
4111 We also have to discard the GTID of the partial transaction that was
4112 not finished if there is one. This is needed supposing that an
4113 incomplete transaction was replicated with a GTID.
4114
4115 GTID(1), QUERY(BEGIN), QUERY(INSERT), ANONYMOUS_GTID, QUERY(DROP ...)
4116
4117 In the example above, without cleaning the gtid_partial_trx,
4118 the GTID(1) would be added to the Retrieved_Gtid_Set after the
4119 QUERY(DROP ...) event.
4120
4121 GTID(1), QUERY(BEGIN), QUERY(INSERT), GTID(2), QUERY(DROP ...)
4122
4123 In the example above the GTID(1) will also be discarded as the
4124 GTID(1) transaction is not complete.
4125 */
4126 if (!gtid_partial_trx->is_empty())
4127 {
4128 DBUG_PRINT("info", ("Discarding Gtid(%d, %lld) as the transaction "
4129 "wasn't complete and we found an error in the"
4130 "transaction boundary parser.",
4131 gtid_partial_trx->sidno,
4132 gtid_partial_trx->gno));
4133 gtid_partial_trx->clear();
4134 }
4135 }
4136
4137 switch (ev->get_type_code())
4138 {
4139 case binary_log::FORMAT_DESCRIPTION_EVENT:
4140 if (fd_ev_p != &fd_ev)
4141 delete fd_ev_p;
4142 fd_ev_p= (Format_description_log_event *)ev;
4143 break;
4144 case binary_log::ROTATE_EVENT:
4145 // do nothing; just accept this event and go to next
4146 break;
4147 case binary_log::PREVIOUS_GTIDS_LOG_EVENT:
4148 {
4149 seen_prev_gtids= true;
4150 // add events to sets
4151 Previous_gtids_log_event *prev_gtids_ev= (Previous_gtids_log_event *)ev;
4152 if (prev_gtids_ev->add_to_set(retrieved_gtids) != 0)
4153 {
4154 error= true;
4155 break;
4156 }
4157 #ifndef NDEBUG
4158 char* prev_buffer= prev_gtids_ev->get_str(NULL, NULL);
4159 DBUG_PRINT("info", ("Got Previous_gtids from file '%s': Gtid_set='%s'.",
4160 filename, prev_buffer));
4161 my_free(prev_buffer);
4162 #endif
4163 break;
4164 }
4165 case binary_log::GTID_LOG_EVENT:
4166 {
4167 /* If we didn't find any PREVIOUS_GTIDS in this file */
4168 if (!seen_prev_gtids)
4169 {
4170 my_error(ER_BINLOG_LOGICAL_CORRUPTION, MYF(0), filename,
4171 "The first global transaction identifier was read, but "
4172 "no other information regarding identifiers existing "
4173 "on the previous log files was found.");
4174 error= true;
4175 break;
4176 }
4177
4178 Gtid_log_event *gtid_ev= (Gtid_log_event *)ev;
4179 rpl_sidno sidno= gtid_ev->get_sidno(retrieved_gtids->get_sid_map());
4180 if (sidno < 0)
4181 {
4182 error= true;
4183 break;
4184 }
4185 else
4186 {
4187 if (retrieved_gtids->ensure_sidno(sidno) != RETURN_STATUS_OK)
4188 {
4189 error= true;
4190 break;
4191 }
4192 else
4193 {
4194 /*
4195 As are updating the transaction boundary parser while reading
4196 GTIDs from relay log files to fill the Retrieved_Gtid_Set, we
4197 should not add the GTID here as we don't know if the transaction
4198 is complete on the relay log yet.
4199 */
4200 gtid_partial_trx->set(sidno, gtid_ev->get_gno());
4201 }
4202 DBUG_PRINT("info", ("Found Gtid in relaylog file '%s': Gtid(%d, %lld).",
4203 filename, sidno, gtid_ev->get_gno()));
4204 }
4205 break;
4206 }
4207 case binary_log::ANONYMOUS_GTID_LOG_EVENT:
4208 default:
4209 /*
4210 If we reached the end of a transaction after storing it's GTID
4211 in gtid_partial_trx variable, it is time to add this GTID to the
4212 retrieved_gtids set because the transaction is complete and there is no
4213 need for asking this transaction again.
4214 */
4215 if (trx_parser->is_not_inside_transaction())
4216 {
4217 if (!gtid_partial_trx->is_empty())
4218 {
4219 DBUG_PRINT("info", ("Adding Gtid to Retrieved_Gtid_Set as the "
4220 "transaction was completed at "
4221 "relaylog file '%s': Gtid(%d, %lld).",
4222 filename, gtid_partial_trx->sidno,
4223 gtid_partial_trx->gno));
4224 retrieved_gtids->_add_gtid(gtid_partial_trx->sidno,
4225 gtid_partial_trx->gno);
4226 gtid_partial_trx->clear();
4227 }
4228 }
4229 break;
4230 }
4231 if (ev != fd_ev_p)
4232 delete ev;
4233 }
4234
4235 if (log.error < 0)
4236 {
4237 // This is not a fatal error; the log may just be truncated.
4238 // @todo but what other errors could happen? IO error?
4239 sql_print_warning("Error reading GTIDs from relaylog: %d", log.error);
4240 }
4241
4242 if (fd_ev_p != &fd_ev)
4243 {
4244 delete fd_ev_p;
4245 fd_ev_p= &fd_ev;
4246 }
4247
4248 mysql_file_close(file, MYF(MY_WME));
4249 end_io_cache(&log);
4250
4251 #ifndef NDEBUG
4252 sql_print_information("%lu events read in relaylog file '%s' for updating "
4253 "Retrieved_Gtid_Set and/or IO thread transaction "
4254 "parser state.",
4255 event_counter, filename);
4256 #endif
4257
4258 DBUG_RETURN(error);
4259 }
4260
4261 /**
4262 Reads GTIDs from the given binlog file.
4263
4264 @param filename File to read from.
4265 @param all_gtids If not NULL, then the GTIDs from the
4266 Previous_gtids_log_event and from all Gtid_log_events are stored in
4267 this object.
4268 @param prev_gtids If not NULL, then the GTIDs from the
4269 Previous_gtids_log_events are stored in this object.
4270 @param first_gtid If not NULL, then the first GTID information from the
4271 file will be stored in this object.
4272 @param sid_map The sid_map object to use in the rpl_sidno generation
4273 of the Gtid_log_event. If lock is needed in the sid_map, the caller
4274 must hold it.
4275 @param verify_checksum Set to true to verify event checksums.
4276
4277 @retval GOT_GTIDS The file was successfully read and it contains
4278 both Gtid_log_events and Previous_gtids_log_events.
4279 This is only possible if either all_gtids or first_gtid are not null.
4280 @retval GOT_PREVIOUS_GTIDS The file was successfully read and it
4281 contains Previous_gtids_log_events but no Gtid_log_events.
4282 For binary logs, if no all_gtids and no first_gtid are specified,
4283 this function will be done right after reading the PREVIOUS_GTIDS
4284 regardless of the rest of the content of the binary log file.
4285 @retval NO_GTIDS The file was successfully read and it does not
4286 contain GTID events.
4287 @retval ERROR Out of memory, or IO error, or malformed event
4288 structure, or the file is malformed (e.g., contains Gtid_log_events
4289 but no Previous_gtids_log_event).
4290 @retval TRUNCATED The file was truncated before the end of the
4291 first Previous_gtids_log_event.
4292 */
4293 enum enum_read_gtids_from_binlog_status
4294 { GOT_GTIDS, GOT_PREVIOUS_GTIDS, NO_GTIDS, ERROR, TRUNCATED };
4295 static enum_read_gtids_from_binlog_status
read_gtids_from_binlog(const char * filename,Gtid_set * all_gtids,Gtid_set * prev_gtids,Gtid * first_gtid,Sid_map * sid_map,bool verify_checksum,bool is_relay_log)4296 read_gtids_from_binlog(const char *filename, Gtid_set *all_gtids,
4297 Gtid_set *prev_gtids, Gtid *first_gtid,
4298 Sid_map* sid_map,
4299 bool verify_checksum, bool is_relay_log)
4300 {
4301 DBUG_ENTER("read_gtids_from_binlog");
4302 DBUG_PRINT("info", ("Opening file %s", filename));
4303
4304 /*
4305 Create a Format_description_log_event that is used to read the
4306 first event of the log.
4307 */
4308 Format_description_log_event fd_ev(BINLOG_VERSION), *fd_ev_p= &fd_ev;
4309 if (!fd_ev.is_valid())
4310 DBUG_RETURN(ERROR);
4311
4312 File file;
4313 IO_CACHE log;
4314
4315 #ifndef NDEBUG
4316 unsigned long event_counter= 0;
4317 /*
4318 We assert here that both all_gtids and prev_gtids, if specified,
4319 uses the same sid_map as the one passed as a parameter. This is just
4320 to ensure that, if the sid_map needed some lock and was locked by
4321 the caller, the lock applies to all the GTID sets this function is
4322 dealing with.
4323 */
4324 if (all_gtids)
4325 assert(all_gtids->get_sid_map() == sid_map);
4326 if (prev_gtids)
4327 assert(prev_gtids->get_sid_map() == sid_map);
4328 #endif
4329
4330 const char *errmsg= NULL;
4331 if ((file= open_binlog_file(&log, filename, &errmsg)) < 0)
4332 {
4333 sql_print_error("%s", errmsg);
4334 /*
4335 We need to revisit the recovery procedure for relay log
4336 files. Currently, it is called after this routine.
4337 /Alfranio
4338 */
4339 DBUG_RETURN(TRUNCATED);
4340 }
4341
4342 /*
4343 Seek for Previous_gtids_log_event and Gtid_log_event events to
4344 gather information what has been processed so far.
4345 */
4346 my_b_seek(&log, BIN_LOG_HEADER_SIZE);
4347 Log_event *ev= NULL;
4348 enum_read_gtids_from_binlog_status ret= NO_GTIDS;
4349 bool done= false;
4350 bool seen_first_gtid= false;
4351 while (!done &&
4352 (ev= Log_event::read_log_event(&log, 0, fd_ev_p, verify_checksum)) !=
4353 NULL)
4354 {
4355 #ifndef NDEBUG
4356 event_counter++;
4357 #endif
4358 DBUG_PRINT("info", ("Read event of type %s", ev->get_type_str()));
4359 switch (ev->get_type_code())
4360 {
4361 case binary_log::FORMAT_DESCRIPTION_EVENT:
4362 if (fd_ev_p != &fd_ev)
4363 delete fd_ev_p;
4364 fd_ev_p= (Format_description_log_event *)ev;
4365 break;
4366 case binary_log::ROTATE_EVENT:
4367 // do nothing; just accept this event and go to next
4368 break;
4369 case binary_log::PREVIOUS_GTIDS_LOG_EVENT:
4370 {
4371 ret= GOT_PREVIOUS_GTIDS;
4372 // add events to sets
4373 Previous_gtids_log_event *prev_gtids_ev=
4374 (Previous_gtids_log_event *)ev;
4375 if (all_gtids != NULL && prev_gtids_ev->add_to_set(all_gtids) != 0)
4376 ret= ERROR, done= true;
4377 else if (prev_gtids != NULL && prev_gtids_ev->add_to_set(prev_gtids) != 0)
4378 ret= ERROR, done= true;
4379 #ifndef NDEBUG
4380 char* prev_buffer= prev_gtids_ev->get_str(NULL, NULL);
4381 DBUG_PRINT("info", ("Got Previous_gtids from file '%s': Gtid_set='%s'.",
4382 filename, prev_buffer));
4383 my_free(prev_buffer);
4384 #endif
4385 /*
4386 If this is not a relay log, the previous_gtids were asked and no
4387 all_gtids neither first_gtid were asked, it is fine to consider the
4388 job as done.
4389 */
4390 if (!is_relay_log && prev_gtids != NULL &&
4391 all_gtids == NULL && first_gtid == NULL)
4392 done= true;
4393 DBUG_EXECUTE_IF("inject_fault_bug16502579", {
4394 DBUG_PRINT("debug", ("PREVIOUS_GTIDS_LOG_EVENT found. "
4395 "Injected ret=NO_GTIDS."));
4396 if (ret == GOT_PREVIOUS_GTIDS)
4397 {
4398 ret=NO_GTIDS;
4399 done= false;
4400 }
4401 });
4402 break;
4403 }
4404 case binary_log::GTID_LOG_EVENT:
4405 {
4406 if (ret != GOT_GTIDS)
4407 {
4408 if (ret != GOT_PREVIOUS_GTIDS)
4409 {
4410 /*
4411 Since this routine is run on startup, there may not be a
4412 THD instance. Therefore, ER(X) cannot be used.
4413 */
4414 const char* msg_fmt= (current_thd != NULL) ?
4415 ER(ER_BINLOG_LOGICAL_CORRUPTION) :
4416 ER_DEFAULT(ER_BINLOG_LOGICAL_CORRUPTION);
4417 my_printf_error(ER_BINLOG_LOGICAL_CORRUPTION,
4418 msg_fmt, MYF(0),
4419 filename,
4420 "The first global transaction identifier was read, but "
4421 "no other information regarding identifiers existing "
4422 "on the previous log files was found.");
4423 ret= ERROR, done= true;
4424 break;
4425 }
4426 else
4427 ret= GOT_GTIDS;
4428 }
4429 /*
4430 When this is a relaylog, we just check if the relay log contains at
4431 least one Gtid_log_event, so that we can distinguish the return values
4432 GOT_GTID and GOT_PREVIOUS_GTIDS. We don't need to read anything else
4433 from the relay log.
4434 When this is a binary log, if all_gtids is requested (i.e., NOT NULL),
4435 we should continue to read all gtids. If just first_gtid was requested,
4436 we will be done after storing this Gtid_log_event info on it.
4437 */
4438 if (is_relay_log)
4439 {
4440 ret= GOT_GTIDS, done= true;
4441 }
4442 else
4443 {
4444 Gtid_log_event *gtid_ev= (Gtid_log_event *)ev;
4445 rpl_sidno sidno= gtid_ev->get_sidno(sid_map);
4446 if (sidno < 0)
4447 ret= ERROR, done= true;
4448 else
4449 {
4450 if (all_gtids)
4451 {
4452 if (all_gtids->ensure_sidno(sidno) != RETURN_STATUS_OK)
4453 ret= ERROR, done= true;
4454 all_gtids->_add_gtid(sidno, gtid_ev->get_gno());
4455 DBUG_PRINT("info", ("Got Gtid from file '%s': Gtid(%d, %lld).",
4456 filename, sidno, gtid_ev->get_gno()));
4457 }
4458
4459 /* If the first GTID was requested, stores it */
4460 if (first_gtid && !seen_first_gtid)
4461 {
4462 first_gtid->set(sidno, gtid_ev->get_gno());
4463 seen_first_gtid= true;
4464 /* If the first_gtid was the only thing requested, we are done */
4465 if (all_gtids == NULL)
4466 ret= GOT_GTIDS, done= true;
4467 }
4468 }
4469 }
4470 break;
4471 }
4472 case binary_log::ANONYMOUS_GTID_LOG_EVENT:
4473 {
4474 /*
4475 When this is a relaylog, we just check if it contains
4476 at least one Anonymous_gtid_log_event after initialization
4477 (FDs, Rotates and PREVIOUS_GTIDS), so that we can distinguish the
4478 return values GOT_GTID and GOT_PREVIOUS_GTIDS.
4479 We don't need to read anything else from the relay log.
4480 */
4481 if (is_relay_log)
4482 {
4483 ret= GOT_GTIDS;
4484 done= true;
4485 break;
4486 }
4487 assert(prev_gtids == NULL ? true : all_gtids != NULL ||
4488 first_gtid != NULL);
4489 }
4490 // Fall through.
4491 default:
4492 // if we found any other event type without finding a
4493 // previous_gtids_log_event, then the rest of this binlog
4494 // cannot contain gtids
4495 if (ret != GOT_GTIDS && ret != GOT_PREVIOUS_GTIDS)
4496 done= true;
4497 /*
4498 The GTIDs of the relaylog files will be handled later
4499 because of the possibility of transactions be spanned
4500 along distinct relaylog files.
4501 So, if we found an ordinary event without finding the
4502 GTID but we already found the PREVIOUS_GTIDS, this probably
4503 means that the event is from a transaction that started on
4504 previous relaylog file.
4505 */
4506 if (ret == GOT_PREVIOUS_GTIDS && is_relay_log)
4507 done= true;
4508 break;
4509 }
4510 if (ev != fd_ev_p)
4511 delete ev;
4512 DBUG_PRINT("info", ("done=%d", done));
4513 }
4514
4515 if (log.error < 0)
4516 {
4517 // This is not a fatal error; the log may just be truncated.
4518
4519 // @todo but what other errors could happen? IO error?
4520 sql_print_warning("Error reading GTIDs from binary log: %d", log.error);
4521 }
4522
4523 if (fd_ev_p != &fd_ev)
4524 {
4525 delete fd_ev_p;
4526 fd_ev_p= &fd_ev;
4527 }
4528
4529 mysql_file_close(file, MYF(MY_WME));
4530 end_io_cache(&log);
4531
4532 if (all_gtids)
4533 all_gtids->dbug_print("all_gtids");
4534 else
4535 DBUG_PRINT("info", ("all_gtids==NULL"));
4536 if (prev_gtids)
4537 prev_gtids->dbug_print("prev_gtids");
4538 else
4539 DBUG_PRINT("info", ("prev_gtids==NULL"));
4540 if (first_gtid == NULL)
4541 DBUG_PRINT("info", ("first_gtid==NULL"));
4542 else if (first_gtid->sidno == 0)
4543 DBUG_PRINT("info", ("first_gtid.sidno==0"));
4544 else
4545 first_gtid->dbug_print(sid_map, "first_gtid");
4546
4547 DBUG_PRINT("info", ("returning %d", ret));
4548 #ifndef NDEBUG
4549 if (!is_relay_log && prev_gtids != NULL &&
4550 all_gtids == NULL && first_gtid == NULL)
4551 sql_print_information("Read %lu events from binary log file '%s' to "
4552 "determine the GTIDs purged from binary logs.",
4553 event_counter, filename);
4554 #endif
4555 DBUG_RETURN(ret);
4556 }
4557
find_first_log_not_in_gtid_set(char * binlog_file_name,const Gtid_set * gtid_set,Gtid * first_gtid,const char ** errmsg)4558 bool MYSQL_BIN_LOG::find_first_log_not_in_gtid_set(char *binlog_file_name,
4559 const Gtid_set *gtid_set,
4560 Gtid *first_gtid,
4561 const char **errmsg)
4562 {
4563 DBUG_ENTER("MYSQL_BIN_LOG::gtid_read_start_binlog");
4564 /*
4565 Gather the set of files to be accessed.
4566 */
4567 list<string> filename_list;
4568 LOG_INFO linfo;
4569 int error;
4570
4571 list<string>::reverse_iterator rit;
4572 Gtid_set binlog_previous_gtid_set(gtid_set->get_sid_map());
4573
4574 mysql_mutex_lock(&LOCK_index);
4575 for (error= find_log_pos(&linfo, NULL, false/*need_lock_index=false*/);
4576 !error; error= find_next_log(&linfo, false/*need_lock_index=false*/))
4577 {
4578 DBUG_PRINT("info", ("read log filename '%s'", linfo.log_file_name));
4579 filename_list.push_back(string(linfo.log_file_name));
4580 }
4581 mysql_mutex_unlock(&LOCK_index);
4582 if (error != LOG_INFO_EOF)
4583 {
4584 *errmsg= "Failed to read the binary log index file while "
4585 "looking for the oldest binary log that contains any GTID "
4586 "that is not in the given gtid set";
4587 error= -1;
4588 goto end;
4589 }
4590
4591 if (filename_list.empty())
4592 {
4593 *errmsg= "Could not find first log file name in binary log index file "
4594 "while looking for the oldest binary log that contains any GTID "
4595 "that is not in the given gtid set";
4596 error= -2;
4597 goto end;
4598 }
4599
4600 /*
4601 Iterate over all the binary logs in reverse order, and read only
4602 the Previous_gtids_log_event, to find the first one, that is the
4603 subset of the given gtid set. Since every binary log begins with
4604 a Previous_gtids_log_event, that contains all GTIDs in all
4605 previous binary logs.
4606 We also ask for the first GTID in the binary log to know if we
4607 should send the FD event with the "created" field cleared or not.
4608 */
4609 DBUG_PRINT("info", ("Iterating backwards through binary logs, and reading "
4610 "only the Previous_gtids_log_event, to find the first "
4611 "one, that is the subset of the given gtid set."));
4612 rit= filename_list.rbegin();
4613 error= 0;
4614 while (rit != filename_list.rend())
4615 {
4616 binlog_previous_gtid_set.clear();
4617 const char *filename= rit->c_str();
4618 DBUG_PRINT("info", ("Read Previous_gtids_log_event from filename='%s'",
4619 filename));
4620 switch (read_gtids_from_binlog(filename, NULL, &binlog_previous_gtid_set,
4621 first_gtid,
4622 binlog_previous_gtid_set.get_sid_map(),
4623 opt_master_verify_checksum, is_relay_log))
4624 {
4625 case ERROR:
4626 *errmsg= "Error reading header of binary log while looking for "
4627 "the oldest binary log that contains any GTID that is not in "
4628 "the given gtid set";
4629 error= -3;
4630 goto end;
4631 case NO_GTIDS:
4632 *errmsg= "Found old binary log without GTIDs while looking for "
4633 "the oldest binary log that contains any GTID that is not in "
4634 "the given gtid set";
4635 error= -4;
4636 goto end;
4637 case GOT_GTIDS:
4638 case GOT_PREVIOUS_GTIDS:
4639 if (binlog_previous_gtid_set.is_subset(gtid_set))
4640 {
4641 strcpy(binlog_file_name, filename);
4642 /*
4643 Verify that the selected binlog is not the first binlog,
4644 */
4645 DBUG_EXECUTE_IF("slave_reconnect_with_gtid_set_executed",
4646 assert(strcmp(filename_list.begin()->c_str(),
4647 binlog_file_name) != 0););
4648 goto end;
4649 }
4650 case TRUNCATED:
4651 break;
4652 }
4653
4654 rit++;
4655 }
4656
4657 if (rit == filename_list.rend())
4658 {
4659 report_missing_gtids(&binlog_previous_gtid_set, gtid_set, errmsg);
4660 error= -5;
4661 }
4662
4663 end:
4664 if (error)
4665 DBUG_PRINT("error", ("'%s'", *errmsg));
4666 filename_list.clear();
4667 DBUG_PRINT("info", ("returning %d", error));
4668 DBUG_RETURN(error != 0 ? true : false);
4669 }
4670
init_gtid_sets(Gtid_set * all_gtids,Gtid_set * lost_gtids,bool verify_checksum,bool need_lock,Transaction_boundary_parser * trx_parser,Gtid * gtid_partial_trx,bool is_server_starting)4671 bool MYSQL_BIN_LOG::init_gtid_sets(Gtid_set *all_gtids, Gtid_set *lost_gtids,
4672 bool verify_checksum, bool need_lock,
4673 Transaction_boundary_parser *trx_parser,
4674 Gtid *gtid_partial_trx,
4675 bool is_server_starting)
4676 {
4677 DBUG_ENTER("MYSQL_BIN_LOG::init_gtid_sets");
4678 DBUG_PRINT("info", ("lost_gtids=%p; so we are recovering a %s log; is_relay_log=%d",
4679 lost_gtids, lost_gtids == NULL ? "relay" : "binary",
4680 is_relay_log));
4681
4682 /*
4683 If this is a relay log, we must have the IO thread Master_info trx_parser
4684 in order to correctly feed it with relay log events.
4685 */
4686 #ifndef NDEBUG
4687 if (is_relay_log)
4688 {
4689 assert(trx_parser != NULL);
4690 assert(lost_gtids == NULL);
4691 }
4692 #endif
4693
4694 /*
4695 Acquires the necessary locks to ensure that logs are not either
4696 removed or updated when we are reading from it.
4697 */
4698 if (need_lock)
4699 {
4700 // We don't need LOCK_log if we are only going to read the initial
4701 // Prevoius_gtids_log_event and ignore the Gtid_log_events.
4702 if (all_gtids != NULL)
4703 mysql_mutex_lock(&LOCK_log);
4704 mysql_mutex_lock(&LOCK_index);
4705 global_sid_lock->wrlock();
4706 }
4707 else
4708 {
4709 if (all_gtids != NULL)
4710 mysql_mutex_assert_owner(&LOCK_log);
4711 mysql_mutex_assert_owner(&LOCK_index);
4712 global_sid_lock->assert_some_wrlock();
4713 }
4714
4715 // Gather the set of files to be accessed.
4716 list<string> filename_list;
4717 LOG_INFO linfo;
4718 int error;
4719
4720 list<string>::iterator it;
4721 list<string>::reverse_iterator rit;
4722 bool reached_first_file= false;
4723
4724 /* Initialize the sid_map to be used in read_gtids_from_binlog */
4725 Sid_map *sid_map= NULL;
4726 if (all_gtids)
4727 sid_map= all_gtids->get_sid_map();
4728 else if (lost_gtids)
4729 sid_map= lost_gtids->get_sid_map();
4730
4731 for (error= find_log_pos(&linfo, NULL, false/*need_lock_index=false*/); !error;
4732 error= find_next_log(&linfo, false/*need_lock_index=false*/))
4733 {
4734 DBUG_PRINT("info", ("read log filename '%s'", linfo.log_file_name));
4735 filename_list.push_back(string(linfo.log_file_name));
4736 }
4737 if (error != LOG_INFO_EOF)
4738 {
4739 DBUG_PRINT("error", ("Error reading %s index",
4740 is_relay_log ? "relaylog" : "binlog"));
4741 goto end;
4742 }
4743 /*
4744 On server starting, one new empty binlog file is created and
4745 its file name is put into index file before initializing
4746 GLOBAL.GTID_EXECUTED AND GLOBAL.GTID_PURGED, it is not the
4747 last binlog file before the server restarts, so we remove
4748 its file name from filename_list.
4749 */
4750 if (is_server_starting && !is_relay_log && !filename_list.empty())
4751 filename_list.pop_back();
4752
4753 error= 0;
4754
4755 if (all_gtids != NULL)
4756 {
4757 DBUG_PRINT("info", ("Iterating backwards through %s logs, "
4758 "looking for the last %s log that contains "
4759 "a Previous_gtids_log_event.",
4760 is_relay_log ? "relay" : "binary",
4761 is_relay_log ? "relay" : "binary"));
4762 // Iterate over all files in reverse order until we find one that
4763 // contains a Previous_gtids_log_event.
4764 rit= filename_list.rbegin();
4765 bool can_stop_reading= false;
4766 reached_first_file= (rit == filename_list.rend());
4767 DBUG_PRINT("info", ("filename='%s' reached_first_file=%d",
4768 reached_first_file ? "" : rit->c_str(),
4769 reached_first_file));
4770 while (!can_stop_reading && !reached_first_file)
4771 {
4772 const char *filename= rit->c_str();
4773 assert(rit != filename_list.rend());
4774 rit++;
4775 reached_first_file= (rit == filename_list.rend());
4776 DBUG_PRINT("info", ("filename='%s' can_stop_reading=%d "
4777 "reached_first_file=%d, ",
4778 filename, can_stop_reading, reached_first_file));
4779 switch (read_gtids_from_binlog(filename, all_gtids,
4780 reached_first_file ? lost_gtids : NULL,
4781 NULL/* first_gtid */,
4782 sid_map, verify_checksum, is_relay_log))
4783 {
4784 case ERROR:
4785 {
4786 error= 1;
4787 goto end;
4788 }
4789 case GOT_GTIDS:
4790 {
4791 can_stop_reading= true;
4792 break;
4793 }
4794 case GOT_PREVIOUS_GTIDS:
4795 {
4796 /*
4797 If this is a binlog file, it is enough to have GOT_PREVIOUS_GTIDS.
4798 If this is a relaylog file, we need to find at least one GTID to
4799 start parsing the relay log to add GTID of transactions that might
4800 have spanned in distinct relaylog files.
4801 */
4802 if (!is_relay_log)
4803 can_stop_reading= true;
4804 break;
4805 }
4806 case NO_GTIDS:
4807 {
4808 /*
4809 Mysql server iterates backwards through binary logs, looking for
4810 the last binary log that contains a Previous_gtids_log_event for
4811 gathering the set of gtid_executed on server start. This may take
4812 very long time if it has many binary logs and almost all of them
4813 are out of filesystem cache. So if the binlog_gtid_simple_recovery
4814 is enabled, and the last binary log does not contain any GTID
4815 event, do not read any more binary logs, GLOBAL.GTID_EXECUTED and
4816 GLOBAL.GTID_PURGED should be empty in the case.
4817 */
4818 if (binlog_gtid_simple_recovery && is_server_starting &&
4819 !is_relay_log)
4820 {
4821 assert(all_gtids->is_empty());
4822 assert(lost_gtids->is_empty());
4823 goto end;
4824 }
4825 /*FALLTHROUGH*/
4826 }
4827 case TRUNCATED:
4828 {
4829 break;
4830 }
4831 }
4832 }
4833
4834 /*
4835 If we use GTIDs and have partial transactions on the relay log,
4836 must check if it ends on next relay log files.
4837 We also need to feed the boundary parser with the rest of the
4838 relay log to put it in the correct state before receiving new
4839 events from the master in the case of GTID auto positioning be
4840 disabled.
4841 */
4842 if (is_relay_log && filename_list.size() > 0)
4843 {
4844 /*
4845 Suppose the following relaylog:
4846
4847 rl-bin.000001 | rl-bin.000002 | rl-bin.000003 | rl-bin-000004
4848 ---------------+---------------+---------------+---------------
4849 PREV_GTIDS | PREV_GTIDS | PREV_GTIDS | PREV_GTIDS
4850 (empty) | (UUID:1) | (UUID:1) | (UUID:1)
4851 ---------------+---------------+---------------+---------------
4852 GTID(UUID:1) | QUERY(INSERT) | QUERY(INSERT) | XID
4853 ---------------+---------------+---------------+---------------
4854 QUERY(CREATE |
4855 TABLE t1 ...) |
4856 ---------------+
4857 GTID(UUID:2) |
4858 ---------------+
4859 QUERY(BEGIN) |
4860 ---------------+
4861
4862 As it is impossible to determine the current Retrieved_Gtid_Set by only
4863 looking to the PREVIOUS_GTIDS on the last relay log file, and scanning
4864 events on it, we tried to find a relay log file that contains at least
4865 one GTID event during the backwards search.
4866
4867 In the example, we will find a GTID only in rl-bin.000001, as the
4868 UUID:2 transaction was spanned across 4 relay log files.
4869
4870 The transaction spanning can be caused by "FLUSH RELAY LOGS" commands
4871 on slave while it is queuing the transaction.
4872
4873 So, in order to correctly add UUID:2 into Retrieved_Gtid_Set, we need
4874 to parse the relay log starting on the file we found the last GTID
4875 queued to know if the transaction was fully retrieved or not.
4876 */
4877
4878 /*
4879 Adjust the reverse iterator to point to the relaylog file we
4880 need to start parsing, as it was incremented after generating
4881 the relay log file name.
4882 */
4883 assert(rit != filename_list.rbegin());
4884 rit--;
4885 assert(rit != filename_list.rend());
4886 /* Reset the transaction parser before feeding it with events */
4887 trx_parser->reset();
4888 gtid_partial_trx->clear();
4889
4890 DBUG_PRINT("info", ("Iterating forwards through relay logs, "
4891 "updating the Retrieved_Gtid_Set and updating "
4892 "IO thread trx parser before start."));
4893 for (it= find(filename_list.begin(), filename_list.end(), *rit);
4894 it != filename_list.end(); it++)
4895 {
4896 const char *filename= it->c_str();
4897 DBUG_PRINT("info", ("filename='%s'", filename));
4898 if (read_gtids_and_update_trx_parser_from_relaylog(filename, all_gtids,
4899 true, trx_parser,
4900 gtid_partial_trx))
4901 {
4902 error= 1;
4903 goto end;
4904 }
4905 }
4906 }
4907 }
4908 if (lost_gtids != NULL && !reached_first_file)
4909 {
4910 /*
4911 This branch is only reacheable by a binary log. The relay log
4912 don't need to get lost_gtids information.
4913
4914 A 5.6 server sets GTID_PURGED by rotating the binary log.
4915
4916 A 5.6 server that had recently enabled GTIDs and set GTID_PURGED
4917 would have a sequence of binary logs like:
4918
4919 master-bin.N : No PREVIOUS_GTIDS (GTID wasn't enabled)
4920 master-bin.N+1: Has an empty PREVIOUS_GTIDS and a ROTATE
4921 (GTID was enabled on startup)
4922 master-bin.N+2: Has a PREVIOUS_GTIDS with the content set by a
4923 SET @@GLOBAL.GTID_PURGED + has GTIDs of some
4924 transactions.
4925
4926 If this 5.6 server be upgraded to 5.7 keeping its binary log files,
4927 this routine will have to find the first binary log that contains a
4928 PREVIOUS_GTIDS + a GTID event to ensure that the content of the
4929 GTID_PURGED will be correctly set (assuming binlog_gtid_simple_recovery
4930 is not enabled).
4931 */
4932 DBUG_PRINT("info", ("Iterating forwards through binary logs, looking for "
4933 "the first binary log that contains both a "
4934 "Previous_gtids_log_event and a Gtid_log_event."));
4935 assert(!is_relay_log);
4936 for (it= filename_list.begin(); it != filename_list.end(); it++)
4937 {
4938 /*
4939 We should pass a first_gtid to read_gtids_from_binlog when
4940 binlog_gtid_simple_recovery is disabled, or else it will return
4941 right after reading the PREVIOUS_GTIDS event to avoid stall on
4942 reading the whole binary log.
4943 */
4944 Gtid first_gtid= {0, 0};
4945 const char *filename= it->c_str();
4946 DBUG_PRINT("info", ("filename='%s'", filename));
4947 switch (read_gtids_from_binlog(filename, NULL, lost_gtids,
4948 binlog_gtid_simple_recovery ? NULL :
4949 &first_gtid,
4950 sid_map, verify_checksum, is_relay_log))
4951 {
4952 case ERROR:
4953 {
4954 error= 1;
4955 /*FALLTHROUGH*/
4956 }
4957 case GOT_GTIDS:
4958 {
4959 goto end;
4960 }
4961 case NO_GTIDS:
4962 case GOT_PREVIOUS_GTIDS:
4963 {
4964 /*
4965 Mysql server iterates forwards through binary logs, looking for
4966 the first binary log that contains both Previous_gtids_log_event
4967 and gtid_log_event for gathering the set of gtid_purged on server
4968 start. It also iterates forwards through binary logs, looking for
4969 the first binary log that contains both Previous_gtids_log_event
4970 and gtid_log_event for gathering the set of gtid_purged when
4971 purging binary logs. This may take very long time if it has many
4972 binary logs and almost all of them are out of filesystem cache.
4973 So if the binlog_gtid_simple_recovery is enabled, we just
4974 initialize GLOBAL.GTID_PURGED from the first binary log, do not
4975 read any more binary logs.
4976 */
4977 if (binlog_gtid_simple_recovery)
4978 goto end;
4979 /*FALLTHROUGH*/
4980 }
4981 case TRUNCATED:
4982 {
4983 break;
4984 }
4985 }
4986 }
4987 }
4988 end:
4989 if (all_gtids)
4990 all_gtids->dbug_print("all_gtids");
4991 if (lost_gtids)
4992 lost_gtids->dbug_print("lost_gtids");
4993 if (need_lock)
4994 {
4995 global_sid_lock->unlock();
4996 mysql_mutex_unlock(&LOCK_index);
4997 if (all_gtids != NULL)
4998 mysql_mutex_unlock(&LOCK_log);
4999 }
5000 filename_list.clear();
5001 DBUG_PRINT("info", ("returning %d", error));
5002 DBUG_RETURN(error != 0 ? true : false);
5003 }
5004
5005
5006 /**
5007 Open a (new) binlog file.
5008
5009 - Open the log file and the index file. Register the new
5010 file name in it
5011 - When calling this when the file is in use, you must have a locks
5012 on LOCK_log and LOCK_index.
5013
5014 @retval
5015 0 ok
5016 @retval
5017 1 error
5018 */
5019
open_binlog(const char * log_name,const char * new_name,ulong max_size_arg,bool null_created_arg,bool need_lock_index,bool need_sid_lock,Format_description_log_event * extra_description_event)5020 bool MYSQL_BIN_LOG::open_binlog(const char *log_name,
5021 const char *new_name,
5022 ulong max_size_arg,
5023 bool null_created_arg,
5024 bool need_lock_index,
5025 bool need_sid_lock,
5026 Format_description_log_event *extra_description_event)
5027 {
5028 // lock_index must be acquired *before* sid_lock.
5029 assert(need_sid_lock || !need_lock_index);
5030 DBUG_ENTER("MYSQL_BIN_LOG::open_binlog(const char *, ...)");
5031 DBUG_PRINT("enter",("base filename: %s", log_name));
5032
5033 mysql_mutex_assert_owner(get_log_lock());
5034
5035 if (init_and_set_log_file_name(log_name, new_name))
5036 {
5037 sql_print_error("MYSQL_BIN_LOG::open failed to generate new file name.");
5038 DBUG_RETURN(1);
5039 }
5040
5041 DBUG_PRINT("info", ("generated filename: %s", log_file_name));
5042
5043 DEBUG_SYNC(current_thd, "after_log_file_name_initialized");
5044
5045 #ifdef HAVE_REPLICATION
5046 if (open_purge_index_file(TRUE) ||
5047 register_create_index_entry(log_file_name) ||
5048 sync_purge_index_file() ||
5049 DBUG_EVALUATE_IF("fault_injection_registering_index", 1, 0))
5050 {
5051 /**
5052 @todo: although this was introduced to appease valgrind
5053 when injecting emulated faults using fault_injection_registering_index
5054 it may be good to consider what actually happens when
5055 open_purge_index_file succeeds but register or sync fails.
5056
5057 Perhaps we might need the code below in MYSQL_BIN_LOG::cleanup
5058 for "real life" purposes as well?
5059 */
5060 DBUG_EXECUTE_IF("fault_injection_registering_index", {
5061 if (my_b_inited(&purge_index_file))
5062 {
5063 end_io_cache(&purge_index_file);
5064 my_close(purge_index_file.file, MYF(0));
5065 }
5066 });
5067
5068 sql_print_error("MYSQL_BIN_LOG::open failed to sync the index file.");
5069 DBUG_RETURN(1);
5070 }
5071 DBUG_EXECUTE_IF("crash_create_non_critical_before_update_index", DBUG_SUICIDE(););
5072 #endif
5073
5074 write_error= 0;
5075
5076 /* open the main log file */
5077 if (open(
5078 #ifdef HAVE_PSI_INTERFACE
5079 m_key_file_log,
5080 #endif
5081 log_name, new_name))
5082 {
5083 #ifdef HAVE_REPLICATION
5084 close_purge_index_file();
5085 #endif
5086 DBUG_RETURN(1); /* all warnings issued */
5087 }
5088
5089 max_size= max_size_arg;
5090
5091 open_count++;
5092
5093 bool write_file_name_to_index_file=0;
5094
5095 /* This must be before goto err. */
5096 #ifndef NDEBUG
5097 binary_log_debug::debug_pretend_version_50034_in_binlog=
5098 DBUG_EVALUATE_IF("pretend_version_50034_in_binlog", true, false);
5099 #endif
5100 Format_description_log_event s(BINLOG_VERSION);
5101
5102 if (!my_b_filelength(&log_file))
5103 {
5104 /*
5105 The binary log file was empty (probably newly created)
5106 This is the normal case and happens when the user doesn't specify
5107 an extension for the binary log files.
5108 In this case we write a standard header to it.
5109 */
5110 if (my_b_safe_write(&log_file, (uchar*) BINLOG_MAGIC,
5111 BIN_LOG_HEADER_SIZE))
5112 goto err;
5113 bytes_written+= BIN_LOG_HEADER_SIZE;
5114 write_file_name_to_index_file= 1;
5115 }
5116
5117 /*
5118 don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
5119 as we won't be able to reset it later
5120 */
5121 if (io_cache_type == WRITE_CACHE)
5122 {
5123 s.common_header->flags|= LOG_EVENT_BINLOG_IN_USE_F;
5124 }
5125
5126 if (is_relay_log)
5127 {
5128 /* relay-log */
5129 if (relay_log_checksum_alg == binary_log::BINLOG_CHECKSUM_ALG_UNDEF)
5130 {
5131 /* inherit master's A descriptor if one has been received */
5132 if (opt_slave_sql_verify_checksum == 0)
5133 /* otherwise use slave's local preference of RL events verification */
5134 relay_log_checksum_alg= binary_log::BINLOG_CHECKSUM_ALG_OFF;
5135 else
5136 relay_log_checksum_alg= static_cast<enum_binlog_checksum_alg>
5137 (binlog_checksum_options);
5138 }
5139 s.common_footer->checksum_alg= relay_log_checksum_alg;
5140 }
5141 else
5142 /* binlog */
5143 s.common_footer->checksum_alg= static_cast<enum_binlog_checksum_alg>
5144 (binlog_checksum_options);
5145
5146 assert((s.common_footer)->checksum_alg !=
5147 binary_log::BINLOG_CHECKSUM_ALG_UNDEF);
5148 if (!s.is_valid())
5149 goto err;
5150 s.dont_set_created= null_created_arg;
5151 /* Set LOG_EVENT_RELAY_LOG_F flag for relay log's FD */
5152 if (is_relay_log)
5153 s.set_relay_log_event();
5154 if (s.write(&log_file))
5155 goto err;
5156 bytes_written+= s.common_header->data_written;
5157 /*
5158 We need to revisit this code and improve it.
5159 See further comments in the mysqld.
5160 /Alfranio
5161 */
5162 if (current_thd)
5163 {
5164 Gtid_set logged_gtids_binlog(global_sid_map, global_sid_lock);
5165 Gtid_set* previous_logged_gtids;
5166
5167 if (is_relay_log)
5168 previous_logged_gtids= previous_gtid_set_relaylog;
5169 else
5170 previous_logged_gtids= &logged_gtids_binlog;
5171
5172 if (need_sid_lock)
5173 global_sid_lock->wrlock();
5174 else
5175 global_sid_lock->assert_some_wrlock();
5176
5177 if (!is_relay_log)
5178 {
5179 const Gtid_set *executed_gtids= gtid_state->get_executed_gtids();
5180 const Gtid_set *gtids_only_in_table=
5181 gtid_state->get_gtids_only_in_table();
5182 /* logged_gtids_binlog= executed_gtids - gtids_only_in_table */
5183 if (logged_gtids_binlog.add_gtid_set(executed_gtids) !=
5184 RETURN_STATUS_OK)
5185 {
5186 if (need_sid_lock)
5187 global_sid_lock->unlock();
5188 goto err;
5189 }
5190 logged_gtids_binlog.remove_gtid_set(gtids_only_in_table);
5191 }
5192 DBUG_PRINT("info",("Generating PREVIOUS_GTIDS for %s file.",
5193 is_relay_log ? "relaylog" : "binlog"));
5194 Previous_gtids_log_event prev_gtids_ev(previous_logged_gtids);
5195 if (is_relay_log)
5196 prev_gtids_ev.set_relay_log_event();
5197 if (need_sid_lock)
5198 global_sid_lock->unlock();
5199 prev_gtids_ev.common_footer->checksum_alg=
5200 (s.common_footer)->checksum_alg;
5201 if (prev_gtids_ev.write(&log_file))
5202 goto err;
5203 bytes_written+= prev_gtids_ev.common_header->data_written;
5204 }
5205 else // !(current_thd)
5206 {
5207 /*
5208 If the slave was configured before server restart, the server will
5209 generate a new relay log file without having current_thd, but this
5210 new relay log file must have a PREVIOUS_GTIDS event as we now
5211 generate the PREVIOUS_GTIDS event always.
5212
5213 This is only needed for relay log files because the server will add
5214 the PREVIOUS_GTIDS of binary logs (when current_thd==NULL) after
5215 server's GTID initialization.
5216
5217 During server's startup at mysqld_main(), from the binary/relay log
5218 initialization point of view, it will:
5219 1) Call init_server_components() that will generate a new binary log
5220 file but won't write the PREVIOUS_GTIDS event yet;
5221 2) Initialize server's GTIDs;
5222 3) Write the binary log PREVIOUS_GTIDS;
5223 4) Call init_slave() in where the new relay log file will be created
5224 after initializing relay log's Retrieved_Gtid_Set;
5225 */
5226 if (is_relay_log)
5227 {
5228 if (need_sid_lock)
5229 global_sid_lock->wrlock();
5230 else
5231 global_sid_lock->assert_some_wrlock();
5232
5233 DBUG_PRINT("info",("Generating PREVIOUS_GTIDS for relaylog file."));
5234 Previous_gtids_log_event prev_gtids_ev(previous_gtid_set_relaylog);
5235 prev_gtids_ev.set_relay_log_event();
5236
5237 if (need_sid_lock)
5238 global_sid_lock->unlock();
5239
5240 prev_gtids_ev.common_footer->checksum_alg=
5241 (s.common_footer)->checksum_alg;
5242 if (prev_gtids_ev.write(&log_file))
5243 goto err;
5244 bytes_written+= prev_gtids_ev.common_header->data_written;
5245 }
5246 }
5247 if (extra_description_event &&
5248 extra_description_event->binlog_version>=4)
5249 {
5250 /*
5251 This is a relay log written to by the I/O slave thread.
5252 Write the event so that others can later know the format of this relay
5253 log.
5254 Note that this event is very close to the original event from the
5255 master (it has binlog version of the master, event types of the
5256 master), so this is suitable to parse the next relay log's event. It
5257 has been produced by
5258 Format_description_log_event::Format_description_log_event(char* buf,).
5259 Why don't we want to write the mi_description_event if this
5260 event is for format<4 (3.23 or 4.x): this is because in that case, the
5261 mi_description_event describes the data received from the
5262 master, but not the data written to the relay log (*conversion*),
5263 which is in format 4 (slave's).
5264 */
5265 /*
5266 Set 'created' to 0, so that in next relay logs this event does not
5267 trigger cleaning actions on the slave in
5268 Format_description_log_event::apply_event_impl().
5269 */
5270 extra_description_event->created= 0;
5271 /* Don't set log_pos in event header */
5272 extra_description_event->set_artificial_event();
5273
5274 if (extra_description_event->write(&log_file))
5275 goto err;
5276 bytes_written+= extra_description_event->common_header->data_written;
5277 }
5278 if (flush_io_cache(&log_file) ||
5279 mysql_file_sync(log_file.file, MYF(MY_WME)))
5280 goto err;
5281
5282 if (write_file_name_to_index_file)
5283 {
5284 #ifdef HAVE_REPLICATION
5285 DBUG_EXECUTE_IF("crash_create_critical_before_update_index", DBUG_SUICIDE(););
5286 #endif
5287
5288 assert(my_b_inited(&index_file) != 0);
5289
5290 /*
5291 The new log file name is appended into crash safe index file after
5292 all the content of index file is copyed into the crash safe index
5293 file. Then move the crash safe index file to index file.
5294 */
5295 DBUG_EXECUTE_IF("simulate_disk_full_on_open_binlog",
5296 {DBUG_SET("+d,simulate_no_free_space_error");});
5297 if (DBUG_EVALUATE_IF("fault_injection_updating_index", 1, 0) ||
5298 add_log_to_index((uchar*) log_file_name, strlen(log_file_name),
5299 need_lock_index))
5300 {
5301 DBUG_EXECUTE_IF("simulate_disk_full_on_open_binlog",
5302 {
5303 DBUG_SET("-d,simulate_file_write_error");
5304 DBUG_SET("-d,simulate_no_free_space_error");
5305 DBUG_SET("-d,simulate_disk_full_on_open_binlog");
5306 });
5307 goto err;
5308 }
5309
5310 #ifdef HAVE_REPLICATION
5311 DBUG_EXECUTE_IF("crash_create_after_update_index", DBUG_SUICIDE(););
5312 #endif
5313 }
5314
5315 log_state.atomic_set(LOG_OPENED);
5316 /*
5317 At every rotate memorize the last transaction counter state to use it as
5318 offset at logging the transaction logical timestamps.
5319 */
5320 mysql_mutex_lock(&LOCK_slave_trans_dep_tracker);
5321 m_dependency_tracker.rotate();
5322 mysql_mutex_unlock(&LOCK_slave_trans_dep_tracker);
5323
5324 #ifdef HAVE_REPLICATION
5325 close_purge_index_file();
5326 #endif
5327
5328 update_binlog_end_pos();
5329 DBUG_RETURN(0);
5330
5331 err:
5332 #ifdef HAVE_REPLICATION
5333 if (is_inited_purge_index_file())
5334 purge_index_entry(NULL, NULL, need_lock_index);
5335 close_purge_index_file();
5336 #endif
5337 if (binlog_error_action == ABORT_SERVER)
5338 {
5339 exec_binlog_error_action_abort("Either disk is full or file system is read "
5340 "only while opening the binlog. Aborting the"
5341 " server.");
5342 }
5343 else
5344 {
5345 sql_print_error("Could not use %s for logging (error %d). "
5346 "Turning logging off for the whole duration of the MySQL "
5347 "server process. To turn it on again: fix the cause, "
5348 "shutdown the MySQL server and restart it.",
5349 (new_name) ? new_name : name, errno);
5350 close(LOG_CLOSE_INDEX, false, need_lock_index);
5351 }
5352 DBUG_RETURN(1);
5353 }
5354
5355
5356 /**
5357 Move crash safe index file to index file.
5358
5359 @param need_lock_index If true, LOCK_index will be acquired;
5360 otherwise it should already be held.
5361
5362 @retval 0 ok
5363 @retval -1 error
5364 */
move_crash_safe_index_file_to_index_file(bool need_lock_index)5365 int MYSQL_BIN_LOG::move_crash_safe_index_file_to_index_file(bool need_lock_index)
5366 {
5367 int error= 0;
5368 File fd= -1;
5369 DBUG_ENTER("MYSQL_BIN_LOG::move_crash_safe_index_file_to_index_file");
5370 int failure_trials= MYSQL_BIN_LOG::MAX_RETRIES_FOR_DELETE_RENAME_FAILURE;
5371 bool file_rename_status= false, file_delete_status= false;
5372 THD *thd= current_thd;
5373
5374 if (need_lock_index)
5375 mysql_mutex_lock(&LOCK_index);
5376 else
5377 mysql_mutex_assert_owner(&LOCK_index);
5378
5379 if (my_b_inited(&index_file))
5380 {
5381 end_io_cache(&index_file);
5382 if (mysql_file_close(index_file.file, MYF(0)) < 0)
5383 {
5384 error= -1;
5385 sql_print_error("While rebuilding index file %s: "
5386 "Failed to close the index file.", index_file_name);
5387 /*
5388 Delete Crash safe index file here and recover the binlog.index
5389 state(index_file io_cache) from old binlog.index content.
5390 */
5391 mysql_file_delete(key_file_binlog_index, crash_safe_index_file_name,
5392 MYF(0));
5393
5394 goto recoverable_err;
5395 }
5396
5397 /*
5398 Sometimes an outsider can lock index files for temporary viewing
5399 purpose. For eg: MEB locks binlog.index/relaylog.index to view
5400 the content of the file. During that small period of time, deletion
5401 of the file is not possible on some platforms(Eg: Windows)
5402 Server should retry the delete operation for few times instead of panicking
5403 immediately.
5404 */
5405 while ((file_delete_status == false) && (failure_trials > 0))
5406 {
5407 if (DBUG_EVALUATE_IF("force_index_file_delete_failure", 1, 0)) break;
5408
5409 DBUG_EXECUTE_IF("simulate_index_file_delete_failure",
5410 {
5411 /* This simulation causes the delete to fail */
5412 static char first_char= index_file_name[0];
5413 index_file_name[0]= 0;
5414 sql_print_information("Retrying delete");
5415 if (failure_trials == 1)
5416 index_file_name[0]= first_char;
5417 };);
5418 file_delete_status = !(mysql_file_delete(key_file_binlog_index,
5419 index_file_name, MYF(MY_WME)));
5420 --failure_trials;
5421 if (!file_delete_status)
5422 {
5423 my_sleep(1000);
5424 /* Clear the error before retrying. */
5425 if (failure_trials > 0)
5426 thd->clear_error();
5427 }
5428 }
5429
5430 if (!file_delete_status)
5431 {
5432 error= -1;
5433 sql_print_error("While rebuilding index file %s: "
5434 "Failed to delete the existing index file. It could be "
5435 "that file is being used by some other process.",
5436 index_file_name);
5437 /*
5438 Delete Crash safe file index file here and recover the binlog.index
5439 state(index_file io_cache) from old binlog.index content.
5440 */
5441 mysql_file_delete(key_file_binlog_index, crash_safe_index_file_name,
5442 MYF(0));
5443
5444 goto recoverable_err;
5445 }
5446 }
5447
5448 DBUG_EXECUTE_IF("crash_create_before_rename_index_file", DBUG_SUICIDE(););
5449 /*
5450 Sometimes an outsider can lock index files for temporary viewing
5451 purpose. For eg: MEB locks binlog.index/relaylog.index to view
5452 the content of the file. During that small period of time, rename
5453 of the file is not possible on some platforms(Eg: Windows)
5454 Server should retry the rename operation for few times instead of panicking
5455 immediately.
5456 */
5457 failure_trials = MYSQL_BIN_LOG::MAX_RETRIES_FOR_DELETE_RENAME_FAILURE;
5458 while ((file_rename_status == false) && (failure_trials > 0))
5459 {
5460 DBUG_EXECUTE_IF("simulate_crash_safe_index_file_rename_failure",
5461 {
5462 /* This simulation causes the rename to fail */
5463 static char first_char= index_file_name[0];
5464 index_file_name[0]= 0;
5465 sql_print_information("Retrying rename");
5466 if (failure_trials == 1)
5467 index_file_name[0]= first_char;
5468 };);
5469 file_rename_status =
5470 !(my_rename(crash_safe_index_file_name, index_file_name, MYF(MY_WME)));
5471 --failure_trials;
5472 if (!file_rename_status)
5473 {
5474 my_sleep(1000);
5475 /* Clear the error before retrying. */
5476 if (failure_trials > 0)
5477 thd->clear_error();
5478 }
5479 }
5480 if (!file_rename_status)
5481 {
5482 error= -1;
5483 sql_print_error("While rebuilding index file %s: "
5484 "Failed to rename the new index file to the existing "
5485 "index file.", index_file_name);
5486 goto fatal_err;
5487 }
5488 DBUG_EXECUTE_IF("crash_create_after_rename_index_file", DBUG_SUICIDE(););
5489
5490 recoverable_err:
5491 if ((fd= mysql_file_open(key_file_binlog_index,
5492 index_file_name,
5493 O_RDWR | O_CREAT | O_BINARY,
5494 MYF(MY_WME))) < 0 ||
5495 mysql_file_sync(fd, MYF(MY_WME)) ||
5496 init_io_cache_ext(&index_file, fd, IO_SIZE, READ_CACHE,
5497 mysql_file_seek(fd, 0L, MY_SEEK_END, MYF(0)),
5498 0, MYF(MY_WME | MY_WAIT_IF_FULL),
5499 key_file_binlog_index_cache))
5500 {
5501 sql_print_error("After rebuilding the index file %s: "
5502 "Failed to open the index file.", index_file_name);
5503 goto fatal_err;
5504 }
5505
5506 if (need_lock_index)
5507 mysql_mutex_unlock(&LOCK_index);
5508 DBUG_RETURN(error);
5509
5510 fatal_err:
5511 /*
5512 This situation is very very rare to happen (unless there is some serious
5513 memory related issues like OOM) and should be treated as fatal error.
5514 Hence it is better to bring down the server without respecting
5515 'binlog_error_action' value here.
5516 */
5517 exec_binlog_error_action_abort("MySQL server failed to update the "
5518 "binlog.index file's content properly. "
5519 "It might not be in sync with available "
5520 "binlogs and the binlog.index file state is in "
5521 "unrecoverable state. Aborting the server.");
5522 /*
5523 Server is aborted in the above function.
5524 This is dead code to make compiler happy.
5525 */
5526 DBUG_RETURN(error);
5527 }
5528
5529
5530 /**
5531 Append log file name to index file.
5532
5533 - To make crash safe, we copy all the content of index file
5534 to crash safe index file firstly and then append the log
5535 file name to the crash safe index file. Finally move the
5536 crash safe index file to index file.
5537
5538 @retval
5539 0 ok
5540 @retval
5541 -1 error
5542 */
add_log_to_index(uchar * log_name,size_t log_name_len,bool need_lock_index)5543 int MYSQL_BIN_LOG::add_log_to_index(uchar* log_name,
5544 size_t log_name_len, bool need_lock_index)
5545 {
5546 DBUG_ENTER("MYSQL_BIN_LOG::add_log_to_index");
5547
5548 if (open_crash_safe_index_file())
5549 {
5550 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5551 "open the crash safe index file.");
5552 goto err;
5553 }
5554
5555 if (copy_file(&index_file, &crash_safe_index_file, 0))
5556 {
5557 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5558 "copy index file to crash safe index file.");
5559 goto err;
5560 }
5561
5562 if (my_b_write(&crash_safe_index_file, log_name, log_name_len) ||
5563 my_b_write(&crash_safe_index_file, (uchar*) "\n", 1) ||
5564 flush_io_cache(&crash_safe_index_file) ||
5565 mysql_file_sync(crash_safe_index_file.file, MYF(MY_WME)))
5566 {
5567 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5568 "append log file name: %s, to crash "
5569 "safe index file.", log_name);
5570 goto err;
5571 }
5572
5573 if (close_crash_safe_index_file())
5574 {
5575 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5576 "close the crash safe index file.");
5577 goto err;
5578 }
5579
5580 if (move_crash_safe_index_file_to_index_file(need_lock_index))
5581 {
5582 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5583 "move crash safe index file to index file.");
5584 goto err;
5585 }
5586
5587 DBUG_RETURN(0);
5588
5589 err:
5590 DBUG_RETURN(-1);
5591 }
5592
get_current_log(LOG_INFO * linfo,bool need_lock_log)5593 int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo, bool need_lock_log/*true*/)
5594 {
5595 if (need_lock_log)
5596 mysql_mutex_lock(&LOCK_log);
5597 int ret = raw_get_current_log(linfo);
5598 if (need_lock_log)
5599 mysql_mutex_unlock(&LOCK_log);
5600 return ret;
5601 }
5602
raw_get_current_log(LOG_INFO * linfo)5603 int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
5604 {
5605 strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1);
5606 linfo->pos = my_b_safe_tell(&log_file);
5607 return 0;
5608 }
5609
check_write_error(THD * thd)5610 bool MYSQL_BIN_LOG::check_write_error(THD *thd)
5611 {
5612 DBUG_ENTER("MYSQL_BIN_LOG::check_write_error");
5613
5614 bool checked= FALSE;
5615
5616 if (!thd->is_error())
5617 DBUG_RETURN(checked);
5618
5619 switch (thd->get_stmt_da()->mysql_errno())
5620 {
5621 case ER_TRANS_CACHE_FULL:
5622 case ER_STMT_CACHE_FULL:
5623 case ER_ERROR_ON_WRITE:
5624 case ER_BINLOG_LOGGING_IMPOSSIBLE:
5625 checked= TRUE;
5626 break;
5627 }
5628 DBUG_PRINT("return", ("checked: %s", YESNO(checked)));
5629 DBUG_RETURN(checked);
5630 }
5631
set_write_error(THD * thd,bool is_transactional)5632 void MYSQL_BIN_LOG::set_write_error(THD *thd, bool is_transactional)
5633 {
5634 DBUG_ENTER("MYSQL_BIN_LOG::set_write_error");
5635
5636 write_error= 1;
5637
5638 if (check_write_error(thd))
5639 DBUG_VOID_RETURN;
5640
5641 if (my_errno() == EFBIG)
5642 {
5643 if (is_transactional)
5644 {
5645 my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(MY_WME));
5646 }
5647 else
5648 {
5649 my_message(ER_STMT_CACHE_FULL, ER(ER_STMT_CACHE_FULL), MYF(MY_WME));
5650 }
5651 }
5652 else
5653 {
5654 char errbuf[MYSYS_STRERROR_SIZE];
5655 my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), name,
5656 errno, my_strerror(errbuf, sizeof(errbuf), errno));
5657 }
5658
5659 DBUG_VOID_RETURN;
5660 }
5661
compare_log_name(const char * log_1,const char * log_2)5662 static int compare_log_name(const char* log_1, const char* log_2)
5663 {
5664 const char * log_1_basename= log_1 + dirname_length(log_1);
5665 const char * log_2_basename= log_2 + dirname_length(log_2);
5666
5667 return strcmp(log_1_basename,log_2_basename);
5668 }
5669
5670 /**
5671 Find the position in the log-index-file for the given log name.
5672
5673 @param[out] linfo The found log file name will be stored here, along
5674 with the byte offset of the next log file name in the index file.
5675 @param log_name Filename to find in the index file, or NULL if we
5676 want to read the first entry.
5677 @param need_lock_index If false, this function acquires LOCK_index;
5678 otherwise the lock should already be held by the caller.
5679
5680 @note
5681 On systems without the truncate function the file will end with one or
5682 more empty lines. These will be ignored when reading the file.
5683
5684 @retval
5685 0 ok
5686 @retval
5687 LOG_INFO_EOF End of log-index-file found
5688 @retval
5689 LOG_INFO_IO Got IO error while reading file
5690 */
5691
find_log_pos(LOG_INFO * linfo,const char * log_name,bool need_lock_index)5692 int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
5693 bool need_lock_index)
5694 {
5695 int error= 0;
5696 char *full_fname= linfo->log_file_name;
5697 char full_log_name[FN_REFLEN], fname[FN_REFLEN];
5698 DBUG_ENTER("find_log_pos");
5699 full_log_name[0]= full_fname[0]= 0;
5700
5701 /*
5702 Mutex needed because we need to make sure the file pointer does not
5703 move from under our feet
5704 */
5705 if (need_lock_index)
5706 mysql_mutex_lock(&LOCK_index);
5707 else
5708 mysql_mutex_assert_owner(&LOCK_index);
5709
5710 if (!my_b_inited(&index_file))
5711 {
5712 error= LOG_INFO_IO;
5713 goto end;
5714 }
5715
5716 // extend relative paths for log_name to be searched
5717 if (log_name)
5718 {
5719 if(normalize_binlog_name(full_log_name, log_name, is_relay_log))
5720 {
5721 error= LOG_INFO_EOF;
5722 goto end;
5723 }
5724 }
5725
5726 DBUG_PRINT("enter", ("log_name: %s, full_log_name: %s",
5727 log_name ? log_name : "NULL", full_log_name));
5728
5729 /* As the file is flushed, we can't get an error here */
5730 my_b_seek(&index_file, (my_off_t) 0);
5731
5732 for (;;)
5733 {
5734 size_t length;
5735 my_off_t offset= my_b_tell(&index_file);
5736
5737 DBUG_EXECUTE_IF("simulate_find_log_pos_error",
5738 error= LOG_INFO_EOF; break;);
5739 /* If we get 0 or 1 characters, this is the end of the file */
5740 if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
5741 {
5742 /* Did not find the given entry; Return not found or error */
5743 error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
5744 break;
5745 }
5746
5747 // extend relative paths and match against full path
5748 if (normalize_binlog_name(full_fname, fname, is_relay_log))
5749 {
5750 error= LOG_INFO_EOF;
5751 break;
5752 }
5753 // if the log entry matches, null string matching anything
5754 if (!log_name ||
5755 !compare_log_name(full_fname,full_log_name))
5756 {
5757 DBUG_PRINT("info", ("Found log file entry"));
5758 linfo->index_file_start_offset= offset;
5759 linfo->index_file_offset = my_b_tell(&index_file);
5760 break;
5761 }
5762 linfo->entry_index++;
5763 }
5764
5765 end:
5766 if (need_lock_index)
5767 mysql_mutex_unlock(&LOCK_index);
5768 DBUG_RETURN(error);
5769 }
5770
5771
5772 /**
5773 Find the position in the log-index-file for the given log name.
5774
5775 @param[out] linfo The filename will be stored here, along with the
5776 byte offset of the next filename in the index file.
5777
5778 @param need_lock_index If true, LOCK_index will be acquired;
5779 otherwise it should already be held by the caller.
5780
5781 @note
5782 - Before calling this function, one has to call find_log_pos()
5783 to set up 'linfo'
5784 - Mutex needed because we need to make sure the file pointer does not move
5785 from under our feet
5786
5787 @retval 0 ok
5788 @retval LOG_INFO_EOF End of log-index-file found
5789 @retval LOG_INFO_IO Got IO error while reading file
5790 */
find_next_log(LOG_INFO * linfo,bool need_lock_index)5791 int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock_index)
5792 {
5793 int error= 0;
5794 size_t length;
5795 char fname[FN_REFLEN];
5796 char *full_fname= linfo->log_file_name;
5797
5798 if (need_lock_index)
5799 mysql_mutex_lock(&LOCK_index);
5800 else
5801 mysql_mutex_assert_owner(&LOCK_index);
5802
5803 if (!my_b_inited(&index_file))
5804 {
5805 error= LOG_INFO_IO;
5806 goto err;
5807 }
5808 /* As the file is flushed, we can't get an error here */
5809 my_b_seek(&index_file, linfo->index_file_offset);
5810
5811 linfo->index_file_start_offset= linfo->index_file_offset;
5812 if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
5813 {
5814 error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
5815 goto err;
5816 }
5817
5818 if (fname[0] != 0)
5819 {
5820 if(normalize_binlog_name(full_fname, fname, is_relay_log))
5821 {
5822 error= LOG_INFO_EOF;
5823 goto err;
5824 }
5825 length= strlen(full_fname);
5826 }
5827
5828 linfo->index_file_offset= my_b_tell(&index_file);
5829
5830 err:
5831 if (need_lock_index)
5832 mysql_mutex_unlock(&LOCK_index);
5833 return error;
5834 }
5835
5836 /**
5837 Find the relay log name following the given name from relay log index file.
5838
5839 @param[in|out] log_name The name is full path name.
5840
5841 @return return 0 if it finds next relay log. Otherwise return the error code.
5842 */
find_next_relay_log(char log_name[FN_REFLEN+1])5843 int MYSQL_BIN_LOG::find_next_relay_log(char log_name[FN_REFLEN+1])
5844 {
5845 LOG_INFO info;
5846 int error;
5847 char relative_path_name[FN_REFLEN+1];
5848
5849 if (fn_format(relative_path_name, log_name+dirname_length(log_name),
5850 mysql_data_home, "", 0)
5851 == NullS)
5852 return 1;
5853
5854 mysql_mutex_lock(&LOCK_index);
5855
5856 error= find_log_pos(&info, relative_path_name, false);
5857 if (error == 0)
5858 {
5859 error= find_next_log(&info, false);
5860 if (error == 0)
5861 strcpy(log_name, info.log_file_name);
5862 }
5863
5864 mysql_mutex_unlock(&LOCK_index);
5865 return error;
5866 }
5867
5868 /**
5869 Removes files, as part of a RESET MASTER or RESET SLAVE statement,
5870 by deleting all logs refered to in the index file. Then, it starts
5871 writing to a new log file.
5872
5873 The new index file will only contain this file.
5874
5875 @param thd Thread
5876
5877 @note
5878 If not called from slave thread, write start event to new log
5879
5880 @retval
5881 0 ok
5882 @retval
5883 1 error
5884 */
reset_logs(THD * thd,bool delete_only)5885 bool MYSQL_BIN_LOG::reset_logs(THD* thd, bool delete_only)
5886 {
5887 LOG_INFO linfo;
5888 bool error=0;
5889 int err;
5890 const char* save_name;
5891 DBUG_ENTER("reset_logs");
5892
5893 /*
5894 Flush logs for storage engines, so that the last transaction
5895 is fsynced inside storage engines.
5896 */
5897 if (ha_flush_logs(NULL))
5898 DBUG_RETURN(1);
5899
5900 ha_reset_logs(thd);
5901
5902 /*
5903 We need to get both locks to be sure that no one is trying to
5904 write to the index log file.
5905 */
5906 mysql_mutex_lock(&LOCK_log);
5907 mysql_mutex_lock(&LOCK_index);
5908
5909 global_sid_lock->wrlock();
5910
5911 /* Save variables so that we can reopen the log */
5912 save_name=name;
5913 name=0; // Protect against free
5914 close(LOG_CLOSE_TO_BE_OPENED, false/*need_lock_log=false*/,
5915 false/*need_lock_index=false*/);
5916
5917 /*
5918 First delete all old log files and then update the index file.
5919 As we first delete the log files and do not use sort of logging,
5920 a crash may lead to an inconsistent state where the index has
5921 references to non-existent files.
5922
5923 We need to invert the steps and use the purge_index_file methods
5924 in order to make the operation safe.
5925 */
5926
5927 if ((err= find_log_pos(&linfo, NullS, false/*need_lock_index=false*/)) != 0)
5928 {
5929 uint errcode= purge_log_get_error_code(err);
5930 sql_print_error("Failed to locate old binlog or relay log files");
5931 my_message(errcode, ER(errcode), MYF(0));
5932 error= 1;
5933 goto err;
5934 }
5935
5936 for (;;)
5937 {
5938 if ((error= my_delete_allow_opened(linfo.log_file_name, MYF(0))) != 0)
5939 {
5940 if (my_errno() == ENOENT)
5941 {
5942 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
5943 ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
5944 linfo.log_file_name);
5945 sql_print_information("Failed to delete file '%s'",
5946 linfo.log_file_name);
5947 set_my_errno(0);
5948 error= 0;
5949 }
5950 else
5951 {
5952 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
5953 ER_BINLOG_PURGE_FATAL_ERR,
5954 "a problem with deleting %s; "
5955 "consider examining correspondence "
5956 "of your binlog index file "
5957 "to the actual binlog files",
5958 linfo.log_file_name);
5959 error= 1;
5960 goto err;
5961 }
5962 }
5963 if (find_next_log(&linfo, false/*need_lock_index=false*/))
5964 break;
5965 }
5966
5967 /* Start logging with a new file */
5968 close(LOG_CLOSE_INDEX | LOG_CLOSE_TO_BE_OPENED,
5969 false/*need_lock_log=false*/,
5970 false/*need_lock_index=false*/);
5971 if ((error= my_delete_allow_opened(index_file_name, MYF(0)))) // Reset (open will update)
5972 {
5973 if (my_errno() == ENOENT)
5974 {
5975 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
5976 ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
5977 index_file_name);
5978 sql_print_information("Failed to delete file '%s'",
5979 index_file_name);
5980 set_my_errno(0);
5981 error= 0;
5982 }
5983 else
5984 {
5985 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
5986 ER_BINLOG_PURGE_FATAL_ERR,
5987 "a problem with deleting %s; "
5988 "consider examining correspondence "
5989 "of your binlog index file "
5990 "to the actual binlog files",
5991 index_file_name);
5992 error= 1;
5993 goto err;
5994 }
5995 }
5996
5997 #ifdef HAVE_REPLICATION
5998 /*
5999 For relay logs we clear the gtid state associated per channel(i.e rli)
6000 in the purge_relay_logs()
6001 */
6002 if (!is_relay_log)
6003 {
6004 if(gtid_state->clear(thd))
6005 {
6006 error= 1;
6007 goto err;
6008 }
6009 // don't clear global_sid_map because it's used by the relay log too
6010 if (gtid_state->init() != 0)
6011 goto err;
6012 }
6013 #endif
6014
6015 if (!delete_only)
6016 {
6017 if (!open_index_file(index_file_name, 0, false/*need_lock_index=false*/))
6018 if ((error= open_binlog(save_name, 0,
6019 max_size, false,
6020 false/*need_lock_index=false*/,
6021 false/*need_sid_lock=false*/,
6022 NULL)))
6023 goto err;
6024 }
6025 my_free((void *) save_name);
6026
6027 err:
6028 if (error == 1)
6029 name= const_cast<char*>(save_name);
6030 global_sid_lock->unlock();
6031 mysql_mutex_unlock(&LOCK_index);
6032 mysql_mutex_unlock(&LOCK_log);
6033 DBUG_RETURN(error);
6034 }
6035
6036
6037 /**
6038 Set the name of crash safe index file.
6039
6040 @retval
6041 0 ok
6042 @retval
6043 1 error
6044 */
set_crash_safe_index_file_name(const char * base_file_name)6045 int MYSQL_BIN_LOG::set_crash_safe_index_file_name(const char *base_file_name)
6046 {
6047 int error= 0;
6048 DBUG_ENTER("MYSQL_BIN_LOG::set_crash_safe_index_file_name");
6049 if (fn_format(crash_safe_index_file_name, base_file_name, mysql_data_home,
6050 ".index_crash_safe", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
6051 MY_REPLACE_EXT)) == NULL)
6052 {
6053 error= 1;
6054 sql_print_error("MYSQL_BIN_LOG::set_crash_safe_index_file_name failed "
6055 "to set file name.");
6056 }
6057 DBUG_RETURN(error);
6058 }
6059
6060
6061 /**
6062 Open a (new) crash safe index file.
6063
6064 @note
6065 The crash safe index file is a special file
6066 used for guaranteeing index file crash safe.
6067 @retval
6068 0 ok
6069 @retval
6070 1 error
6071 */
open_crash_safe_index_file()6072 int MYSQL_BIN_LOG::open_crash_safe_index_file()
6073 {
6074 int error= 0;
6075 File file= -1;
6076
6077 DBUG_ENTER("MYSQL_BIN_LOG::open_crash_safe_index_file");
6078
6079 if (!my_b_inited(&crash_safe_index_file))
6080 {
6081 if ((file= my_open(crash_safe_index_file_name, O_RDWR | O_CREAT | O_BINARY,
6082 MYF(MY_WME))) < 0 ||
6083 init_io_cache(&crash_safe_index_file, file, IO_SIZE, WRITE_CACHE,
6084 0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
6085 {
6086 error= 1;
6087 sql_print_error("MYSQL_BIN_LOG::open_crash_safe_index_file failed "
6088 "to open temporary index file.");
6089 }
6090 }
6091 DBUG_RETURN(error);
6092 }
6093
6094
6095 /**
6096 Close the crash safe index file.
6097
6098 @note
6099 The crash safe file is just closed, is not deleted.
6100 Because it is moved to index file later on.
6101 @retval
6102 0 ok
6103 @retval
6104 1 error
6105 */
close_crash_safe_index_file()6106 int MYSQL_BIN_LOG::close_crash_safe_index_file()
6107 {
6108 int error= 0;
6109
6110 DBUG_ENTER("MYSQL_BIN_LOG::close_crash_safe_index_file");
6111
6112 if (my_b_inited(&crash_safe_index_file))
6113 {
6114 end_io_cache(&crash_safe_index_file);
6115 error= my_close(crash_safe_index_file.file, MYF(0));
6116 }
6117 memset(&crash_safe_index_file, 0, sizeof(crash_safe_index_file));
6118
6119 DBUG_RETURN(error);
6120 }
6121
6122
6123 /**
6124 Delete relay log files prior to rli->group_relay_log_name
6125 (i.e. all logs which are not involved in a non-finished group
6126 (transaction)), remove them from the index file and start on next
6127 relay log.
6128
6129 IMPLEMENTATION
6130
6131 - You must hold rli->data_lock before calling this function, since
6132 it writes group_relay_log_pos and similar fields of
6133 Relay_log_info.
6134 - Protects index file with LOCK_index
6135 - Delete relevant relay log files
6136 - Copy all file names after these ones to the front of the index file
6137 - If the OS has truncate, truncate the file, else fill it with \n'
6138 - Read the next file name from the index file and store in rli->linfo
6139
6140 @param rli Relay log information
6141 @param included If false, all relay logs that are strictly before
6142 rli->group_relay_log_name are deleted ; if true, the
6143 latter is deleted too (i.e. all relay logs
6144 read by the SQL slave thread are deleted).
6145
6146 @note
6147 - This is only called from the slave SQL thread when it has read
6148 all commands from a relay log and want to switch to a new relay log.
6149 - When this happens, we can be in an active transaction as
6150 a transaction can span over two relay logs
6151 (although it is always written as a single block to the master's binary
6152 log, hence cannot span over two master's binary logs).
6153
6154 @retval
6155 0 ok
6156 @retval
6157 LOG_INFO_EOF End of log-index-file found
6158 @retval
6159 LOG_INFO_SEEK Could not allocate IO cache
6160 @retval
6161 LOG_INFO_IO Got IO error while reading file
6162 */
6163
6164 #ifdef HAVE_REPLICATION
6165
purge_first_log(Relay_log_info * rli,bool included)6166 int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
6167 {
6168 int error;
6169 char *to_purge_if_included= NULL;
6170 DBUG_ENTER("purge_first_log");
6171
6172 assert(current_thd->system_thread == SYSTEM_THREAD_SLAVE_SQL);
6173 assert(is_relay_log);
6174 assert(is_open());
6175 assert(rli->slave_running == 1);
6176 assert(!strcmp(rli->linfo.log_file_name,rli->get_event_relay_log_name()));
6177
6178 mysql_mutex_assert_owner(&rli->data_lock);
6179
6180 mysql_mutex_lock(&LOCK_index);
6181 to_purge_if_included= my_strdup(key_memory_Relay_log_info_group_relay_log_name,
6182 rli->get_group_relay_log_name(), MYF(0));
6183
6184 /*
6185 Read the next log file name from the index file and pass it back to
6186 the caller.
6187 */
6188 if((error=find_log_pos(&rli->linfo, rli->get_event_relay_log_name(),
6189 false/*need_lock_index=false*/)) ||
6190 (error=find_next_log(&rli->linfo, false/*need_lock_index=false*/)))
6191 {
6192 char buff[22];
6193 sql_print_error("next log error: %d offset: %s log: %s included: %d",
6194 error,
6195 llstr(rli->linfo.index_file_offset,buff),
6196 rli->get_event_relay_log_name(),
6197 included);
6198 goto err;
6199 }
6200
6201 /*
6202 Reset rli's coordinates to the current log.
6203 */
6204 rli->set_event_relay_log_pos(BIN_LOG_HEADER_SIZE);
6205 rli->set_event_relay_log_name(rli->linfo.log_file_name);
6206
6207 /*
6208 If we removed the rli->group_relay_log_name file,
6209 we must update the rli->group* coordinates, otherwise do not touch it as the
6210 group's execution is not finished (e.g. COMMIT not executed)
6211 */
6212 if (included)
6213 {
6214 rli->set_group_relay_log_pos(BIN_LOG_HEADER_SIZE);
6215 rli->set_group_relay_log_name(rli->linfo.log_file_name);
6216 rli->notify_group_relay_log_name_update();
6217 }
6218 /*
6219 Store where we are in the new file for the execution thread.
6220 If we are in the middle of a transaction, then we
6221 should not store the position in the repository, instead in
6222 that case set a flag to true which indicates that a 'forced flush'
6223 is postponed due to transaction split across the relaylogs.
6224 */
6225 if (!rli->is_in_group())
6226 rli->flush_info(TRUE);
6227 else
6228 rli->force_flush_postponed_due_to_split_trans= true;
6229
6230 DBUG_EXECUTE_IF("crash_before_purge_logs", DBUG_SUICIDE(););
6231
6232 mysql_mutex_lock(&rli->log_space_lock);
6233 rli->relay_log.purge_logs(to_purge_if_included, included,
6234 false/*need_lock_index=false*/,
6235 false/*need_update_threads=false*/,
6236 &rli->log_space_total, true);
6237 // Tell the I/O thread to take the relay_log_space_limit into account
6238 rli->ignore_log_space_limit= 0;
6239 mysql_mutex_unlock(&rli->log_space_lock);
6240
6241 /*
6242 Ok to broadcast after the critical region as there is no risk of
6243 the mutex being destroyed by this thread later - this helps save
6244 context switches
6245 */
6246 mysql_cond_broadcast(&rli->log_space_cond);
6247
6248 /*
6249 * Need to update the log pos because purge logs has been called
6250 * after fetching initially the log pos at the begining of the method.
6251 */
6252 if((error=find_log_pos(&rli->linfo, rli->get_event_relay_log_name(),
6253 false/*need_lock_index=false*/)))
6254 {
6255 char buff[22];
6256 sql_print_error("next log error: %d offset: %s log: %s included: %d",
6257 error,
6258 llstr(rli->linfo.index_file_offset,buff),
6259 rli->get_group_relay_log_name(),
6260 included);
6261 goto err;
6262 }
6263
6264 /* If included was passed, rli->linfo should be the first entry. */
6265 assert(!included || rli->linfo.index_file_start_offset == 0);
6266
6267 err:
6268 my_free(to_purge_if_included);
6269 mysql_mutex_unlock(&LOCK_index);
6270 DBUG_RETURN(error);
6271 }
6272
6273
6274 /**
6275 Remove logs from index file.
6276
6277 - To make crash safe, we copy the content of index file
6278 from index_file_start_offset recored in log_info to
6279 crash safe index file firstly and then move the crash
6280 safe index file to index file.
6281
6282 @param linfo Store here the found log file name and
6283 position to the NEXT log file name in
6284 the index file.
6285
6286 @param need_update_threads If we want to update the log coordinates
6287 of all threads. False for relay logs,
6288 true otherwise.
6289
6290 @retval
6291 0 ok
6292 @retval
6293 LOG_INFO_IO Got IO error while reading/writing file
6294 */
remove_logs_from_index(LOG_INFO * log_info,bool need_update_threads)6295 int MYSQL_BIN_LOG::remove_logs_from_index(LOG_INFO* log_info, bool need_update_threads)
6296 {
6297 if (open_crash_safe_index_file())
6298 {
6299 sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
6300 "open the crash safe index file.");
6301 goto err;
6302 }
6303
6304 if (copy_file(&index_file, &crash_safe_index_file,
6305 log_info->index_file_start_offset))
6306 {
6307 sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
6308 "copy index file to crash safe index file.");
6309 goto err;
6310 }
6311
6312 if (close_crash_safe_index_file())
6313 {
6314 sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
6315 "close the crash safe index file.");
6316 goto err;
6317 }
6318 DBUG_EXECUTE_IF("fault_injection_copy_part_file", DBUG_SUICIDE(););
6319
6320 if (move_crash_safe_index_file_to_index_file(false/*need_lock_index=false*/))
6321 {
6322 sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
6323 "move crash safe index file to index file.");
6324 goto err;
6325 }
6326
6327 // now update offsets in index file for running threads
6328 if (need_update_threads)
6329 adjust_linfo_offsets(log_info->index_file_start_offset);
6330 return 0;
6331
6332 err:
6333 return LOG_INFO_IO;
6334 }
6335
6336 /**
6337 Remove all logs before the given log from disk and from the index file.
6338
6339 @param to_log Delete all log file name before this file.
6340 @param included If true, to_log is deleted too.
6341 @param need_lock_index
6342 @param need_update_threads If we want to update the log coordinates of
6343 all threads. False for relay logs, true otherwise.
6344 @param freed_log_space If not null, decrement this variable of
6345 the amount of log space freed
6346 @param auto_purge True if this is an automatic purge.
6347
6348 @note
6349 If any of the logs before the deleted one is in use,
6350 only purge logs up to this one.
6351
6352 @retval
6353 0 ok
6354 @retval
6355 LOG_INFO_EOF to_log not found
6356 LOG_INFO_EMFILE too many files opened
6357 LOG_INFO_FATAL if any other than ENOENT error from
6358 mysql_file_stat() or mysql_file_delete()
6359 */
6360
purge_logs(const char * to_log,bool included,bool need_lock_index,bool need_update_threads,ulonglong * decrease_log_space,bool auto_purge)6361 int MYSQL_BIN_LOG::purge_logs(const char *to_log,
6362 bool included,
6363 bool need_lock_index,
6364 bool need_update_threads,
6365 ulonglong *decrease_log_space,
6366 bool auto_purge)
6367 {
6368 int error= 0, no_of_log_files_to_purge= 0, no_of_log_files_purged= 0;
6369 int no_of_threads_locking_log= 0;
6370 bool exit_loop= 0;
6371 LOG_INFO log_info;
6372 THD *thd= current_thd;
6373 DBUG_ENTER("purge_logs");
6374 DBUG_PRINT("info",("to_log= %s",to_log));
6375
6376 if (need_lock_index)
6377 mysql_mutex_lock(&LOCK_index);
6378 else
6379 mysql_mutex_assert_owner(&LOCK_index);
6380 if ((error=find_log_pos(&log_info, to_log, false/*need_lock_index=false*/)))
6381 {
6382 sql_print_error("MYSQL_BIN_LOG::purge_logs was called with file %s not "
6383 "listed in the index.", to_log);
6384 goto err;
6385 }
6386
6387 no_of_log_files_to_purge= log_info.entry_index;
6388
6389 if ((error= open_purge_index_file(TRUE)))
6390 {
6391 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to sync the index file.");
6392 goto err;
6393 }
6394
6395 /*
6396 File name exists in index file; delete until we find this file
6397 or a file that is used.
6398 */
6399 if ((error=find_log_pos(&log_info, NullS, false/*need_lock_index=false*/)))
6400 goto err;
6401
6402 while ((compare_log_name(to_log,log_info.log_file_name) || (exit_loop=included)))
6403 {
6404 if(is_active(log_info.log_file_name))
6405 {
6406 if(!auto_purge)
6407 push_warning_printf(thd, Sql_condition::SL_WARNING,
6408 ER_WARN_PURGE_LOG_IS_ACTIVE,
6409 ER(ER_WARN_PURGE_LOG_IS_ACTIVE),
6410 log_info.log_file_name);
6411 break;
6412 }
6413
6414 if ((no_of_threads_locking_log= log_in_use(log_info.log_file_name)))
6415 {
6416 if(!auto_purge)
6417 push_warning_printf(thd, Sql_condition::SL_WARNING,
6418 ER_WARN_PURGE_LOG_IN_USE,
6419 ER(ER_WARN_PURGE_LOG_IN_USE),
6420 log_info.log_file_name, no_of_threads_locking_log,
6421 no_of_log_files_purged, no_of_log_files_to_purge);
6422 break;
6423 }
6424 no_of_log_files_purged++;
6425
6426 if ((error= register_purge_index_entry(log_info.log_file_name)))
6427 {
6428 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to copy %s to register file.",
6429 log_info.log_file_name);
6430 goto err;
6431 }
6432
6433 if (find_next_log(&log_info, false/*need_lock_index=false*/) || exit_loop)
6434 break;
6435 }
6436
6437 DBUG_EXECUTE_IF("crash_purge_before_update_index", DBUG_SUICIDE(););
6438
6439 if ((error= sync_purge_index_file()))
6440 {
6441 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to flush register file.");
6442 goto err;
6443 }
6444
6445 /* We know how many files to delete. Update index file. */
6446 if ((error=remove_logs_from_index(&log_info, need_update_threads)))
6447 {
6448 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to update the index file");
6449 goto err;
6450 }
6451
6452 // Update gtid_state->lost_gtids
6453 if (!is_relay_log)
6454 {
6455 global_sid_lock->wrlock();
6456 error= init_gtid_sets(NULL,
6457 const_cast<Gtid_set *>(gtid_state->get_lost_gtids()),
6458 opt_master_verify_checksum,
6459 false/*false=don't need lock*/,
6460 NULL/*trx_parser*/, NULL/*gtid_partial_trx*/);
6461 global_sid_lock->unlock();
6462 if (error)
6463 goto err;
6464 }
6465
6466 DBUG_EXECUTE_IF("crash_purge_critical_after_update_index", DBUG_SUICIDE(););
6467
6468 err:
6469
6470 int error_index= 0, close_error_index= 0;
6471 /* Read each entry from purge_index_file and delete the file. */
6472 if (!error && is_inited_purge_index_file() &&
6473 (error_index= purge_index_entry(thd, decrease_log_space, false/*need_lock_index=false*/)))
6474 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to process registered files"
6475 " that would be purged.");
6476
6477 close_error_index= close_purge_index_file();
6478
6479 DBUG_EXECUTE_IF("crash_purge_non_critical_after_update_index", DBUG_SUICIDE(););
6480
6481 if (need_lock_index)
6482 mysql_mutex_unlock(&LOCK_index);
6483
6484 /*
6485 Error codes from purge logs take precedence.
6486 Then error codes from purging the index entry.
6487 Finally, error codes from closing the purge index file.
6488 */
6489 error= error ? error : (error_index ? error_index :
6490 close_error_index);
6491
6492 DBUG_RETURN(error);
6493 }
6494
set_purge_index_file_name(const char * base_file_name)6495 int MYSQL_BIN_LOG::set_purge_index_file_name(const char *base_file_name)
6496 {
6497 int error= 0;
6498 DBUG_ENTER("MYSQL_BIN_LOG::set_purge_index_file_name");
6499 if (fn_format(purge_index_file_name, base_file_name, mysql_data_home,
6500 ".~rec~", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
6501 MY_REPLACE_EXT)) == NULL)
6502 {
6503 error= 1;
6504 sql_print_error("MYSQL_BIN_LOG::set_purge_index_file_name failed to set "
6505 "file name.");
6506 }
6507 DBUG_RETURN(error);
6508 }
6509
open_purge_index_file(bool destroy)6510 int MYSQL_BIN_LOG::open_purge_index_file(bool destroy)
6511 {
6512 int error= 0;
6513 File file= -1;
6514
6515 DBUG_ENTER("MYSQL_BIN_LOG::open_purge_index_file");
6516
6517 if (destroy)
6518 close_purge_index_file();
6519
6520 if (!my_b_inited(&purge_index_file))
6521 {
6522 if ((file= my_open(purge_index_file_name, O_RDWR | O_CREAT | O_BINARY,
6523 MYF(MY_WME))) < 0 ||
6524 init_io_cache(&purge_index_file, file, IO_SIZE,
6525 (destroy ? WRITE_CACHE : READ_CACHE),
6526 0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
6527 {
6528 error= 1;
6529 sql_print_error("MYSQL_BIN_LOG::open_purge_index_file failed to open register "
6530 " file.");
6531 }
6532 }
6533 DBUG_RETURN(error);
6534 }
6535
close_purge_index_file()6536 int MYSQL_BIN_LOG::close_purge_index_file()
6537 {
6538 int error= 0;
6539
6540 DBUG_ENTER("MYSQL_BIN_LOG::close_purge_index_file");
6541
6542 if (my_b_inited(&purge_index_file))
6543 {
6544 end_io_cache(&purge_index_file);
6545 error= my_close(purge_index_file.file, MYF(0));
6546 }
6547 my_delete(purge_index_file_name, MYF(0));
6548 memset(&purge_index_file, 0, sizeof(purge_index_file));
6549
6550 DBUG_RETURN(error);
6551 }
6552
is_inited_purge_index_file()6553 bool MYSQL_BIN_LOG::is_inited_purge_index_file()
6554 {
6555 DBUG_ENTER("MYSQL_BIN_LOG::is_inited_purge_index_file");
6556 DBUG_RETURN (my_b_inited(&purge_index_file));
6557 }
6558
sync_purge_index_file()6559 int MYSQL_BIN_LOG::sync_purge_index_file()
6560 {
6561 int error= 0;
6562 DBUG_ENTER("MYSQL_BIN_LOG::sync_purge_index_file");
6563
6564 if ((error= flush_io_cache(&purge_index_file)) ||
6565 (error= my_sync(purge_index_file.file, MYF(MY_WME))))
6566 DBUG_RETURN(error);
6567
6568 DBUG_RETURN(error);
6569 }
6570
register_purge_index_entry(const char * entry)6571 int MYSQL_BIN_LOG::register_purge_index_entry(const char *entry)
6572 {
6573 int error= 0;
6574 DBUG_ENTER("MYSQL_BIN_LOG::register_purge_index_entry");
6575
6576 if ((error=my_b_write(&purge_index_file, (const uchar*)entry, strlen(entry))) ||
6577 (error=my_b_write(&purge_index_file, (const uchar*)"\n", 1)))
6578 DBUG_RETURN (error);
6579
6580 DBUG_RETURN(error);
6581 }
6582
register_create_index_entry(const char * entry)6583 int MYSQL_BIN_LOG::register_create_index_entry(const char *entry)
6584 {
6585 DBUG_ENTER("MYSQL_BIN_LOG::register_create_index_entry");
6586 DBUG_RETURN(register_purge_index_entry(entry));
6587 }
6588
purge_index_entry(THD * thd,ulonglong * decrease_log_space,bool need_lock_index)6589 int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *decrease_log_space,
6590 bool need_lock_index)
6591 {
6592 MY_STAT s;
6593 int error= 0;
6594 LOG_INFO log_info;
6595 LOG_INFO check_log_info;
6596
6597 DBUG_ENTER("MYSQL_BIN_LOG:purge_index_entry");
6598
6599 assert(my_b_inited(&purge_index_file));
6600
6601 if ((error=reinit_io_cache(&purge_index_file, READ_CACHE, 0, 0, 0)))
6602 {
6603 sql_print_error("MYSQL_BIN_LOG::purge_index_entry failed to reinit register file "
6604 "for read");
6605 goto err;
6606 }
6607
6608 for (;;)
6609 {
6610 size_t length;
6611
6612 if ((length=my_b_gets(&purge_index_file, log_info.log_file_name,
6613 FN_REFLEN)) <= 1)
6614 {
6615 if (purge_index_file.error)
6616 {
6617 error= purge_index_file.error;
6618 sql_print_error("MYSQL_BIN_LOG::purge_index_entry error %d reading from "
6619 "register file.", error);
6620 goto err;
6621 }
6622
6623 /* Reached EOF */
6624 break;
6625 }
6626
6627 /* Get rid of the trailing '\n' */
6628 log_info.log_file_name[length-1]= 0;
6629
6630 if (!mysql_file_stat(m_key_file_log, log_info.log_file_name, &s, MYF(0)))
6631 {
6632 if (my_errno() == ENOENT)
6633 {
6634 /*
6635 It's not fatal if we can't stat a log file that does not exist;
6636 If we could not stat, we won't delete.
6637 */
6638 if (thd)
6639 {
6640 push_warning_printf(thd, Sql_condition::SL_WARNING,
6641 ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
6642 log_info.log_file_name);
6643 }
6644 sql_print_information("Failed to execute mysql_file_stat on file '%s'",
6645 log_info.log_file_name);
6646 set_my_errno(0);
6647 }
6648 else
6649 {
6650 /*
6651 Other than ENOENT are fatal
6652 */
6653 if (thd)
6654 {
6655 push_warning_printf(thd, Sql_condition::SL_WARNING,
6656 ER_BINLOG_PURGE_FATAL_ERR,
6657 "a problem with getting info on being purged %s; "
6658 "consider examining correspondence "
6659 "of your binlog index file "
6660 "to the actual binlog files",
6661 log_info.log_file_name);
6662 }
6663 else
6664 {
6665 sql_print_information("Failed to delete log file '%s'; "
6666 "consider examining correspondence "
6667 "of your binlog index file "
6668 "to the actual binlog files",
6669 log_info.log_file_name);
6670 }
6671 error= LOG_INFO_FATAL;
6672 goto err;
6673 }
6674 }
6675 else
6676 {
6677 if ((error= find_log_pos(&check_log_info, log_info.log_file_name,
6678 need_lock_index)))
6679 {
6680 if (error != LOG_INFO_EOF)
6681 {
6682 if (thd)
6683 {
6684 push_warning_printf(thd, Sql_condition::SL_WARNING,
6685 ER_BINLOG_PURGE_FATAL_ERR,
6686 "a problem with deleting %s and "
6687 "reading the binlog index file",
6688 log_info.log_file_name);
6689 }
6690 else
6691 {
6692 sql_print_information("Failed to delete file '%s' and "
6693 "read the binlog index file",
6694 log_info.log_file_name);
6695 }
6696 goto err;
6697 }
6698
6699 error= 0;
6700 if (!need_lock_index)
6701 {
6702 /*
6703 This is to avoid triggering an error in NDB.
6704
6705 @todo: This is weird, what does NDB errors have to do with
6706 need_lock_index? Explain better or refactor /Sven
6707 */
6708 ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
6709 }
6710
6711 DBUG_PRINT("info",("purging %s",log_info.log_file_name));
6712 if (!mysql_file_delete(key_file_binlog, log_info.log_file_name, MYF(0)))
6713 {
6714 DBUG_EXECUTE_IF("wait_in_purge_index_entry",
6715 {
6716 const char action[] = "now SIGNAL in_purge_index_entry WAIT_FOR go_ahead_sql";
6717 assert(!debug_sync_set_action(thd, STRING_WITH_LEN(action)));
6718 DBUG_SET("-d,wait_in_purge_index_entry");
6719 };);
6720
6721 if (decrease_log_space)
6722 *decrease_log_space-= s.st_size;
6723 }
6724 else
6725 {
6726 if (my_errno() == ENOENT)
6727 {
6728 if (thd)
6729 {
6730 push_warning_printf(thd, Sql_condition::SL_WARNING,
6731 ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
6732 log_info.log_file_name);
6733 }
6734 sql_print_information("Failed to delete file '%s'",
6735 log_info.log_file_name);
6736 set_my_errno(0);
6737 }
6738 else
6739 {
6740 if (thd)
6741 {
6742 push_warning_printf(thd, Sql_condition::SL_WARNING,
6743 ER_BINLOG_PURGE_FATAL_ERR,
6744 "a problem with deleting %s; "
6745 "consider examining correspondence "
6746 "of your binlog index file "
6747 "to the actual binlog files",
6748 log_info.log_file_name);
6749 }
6750 else
6751 {
6752 sql_print_information("Failed to delete file '%s'; "
6753 "consider examining correspondence "
6754 "of your binlog index file "
6755 "to the actual binlog files",
6756 log_info.log_file_name);
6757 }
6758 if (my_errno() == EMFILE)
6759 {
6760 DBUG_PRINT("info",
6761 ("my_errno: %d, set ret = LOG_INFO_EMFILE", my_errno()));
6762 error= LOG_INFO_EMFILE;
6763 goto err;
6764 }
6765 error= LOG_INFO_FATAL;
6766 goto err;
6767 }
6768 }
6769 }
6770 }
6771 }
6772
6773 err:
6774 DBUG_RETURN(error);
6775 }
6776
6777 /**
6778 Remove all logs before the given file date from disk and from the
6779 index file.
6780
6781 @param thd Thread pointer
6782 @param purge_time Delete all log files before given date.
6783 @param auto_purge True if this is an automatic purge.
6784
6785 @note
6786 If any of the logs before the deleted one is in use,
6787 only purge logs up to this one.
6788
6789 @retval
6790 0 ok
6791 @retval
6792 LOG_INFO_PURGE_NO_ROTATE Binary file that can't be rotated
6793 LOG_INFO_FATAL if any other than ENOENT error from
6794 mysql_file_stat() or mysql_file_delete()
6795 */
6796
purge_logs_before_date(time_t purge_time,bool auto_purge)6797 int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time, bool auto_purge)
6798 {
6799 int error;
6800 int no_of_threads_locking_log= 0, no_of_log_files_purged= 0;
6801 bool log_is_active= false, log_is_in_use= false;
6802 char to_log[FN_REFLEN], copy_log_in_use[FN_REFLEN];
6803 LOG_INFO log_info;
6804 MY_STAT stat_area;
6805 THD *thd= current_thd;
6806
6807 DBUG_ENTER("purge_logs_before_date");
6808
6809 mysql_mutex_lock(&LOCK_index);
6810 to_log[0]= 0;
6811
6812 if ((error=find_log_pos(&log_info, NullS, false/*need_lock_index=false*/)))
6813 goto err;
6814
6815 while (!(log_is_active= is_active(log_info.log_file_name)))
6816 {
6817 if (!mysql_file_stat(m_key_file_log,
6818 log_info.log_file_name, &stat_area, MYF(0)))
6819 {
6820 if (my_errno() == ENOENT)
6821 {
6822 /*
6823 It's not fatal if we can't stat a log file that does not exist.
6824 */
6825 set_my_errno(0);
6826 }
6827 else
6828 {
6829 /*
6830 Other than ENOENT are fatal
6831 */
6832 if (thd)
6833 {
6834 push_warning_printf(thd, Sql_condition::SL_WARNING,
6835 ER_BINLOG_PURGE_FATAL_ERR,
6836 "a problem with getting info on being purged %s; "
6837 "consider examining correspondence "
6838 "of your binlog index file "
6839 "to the actual binlog files",
6840 log_info.log_file_name);
6841 }
6842 else
6843 {
6844 sql_print_information("Failed to delete log file '%s'",
6845 log_info.log_file_name);
6846 }
6847 error= LOG_INFO_FATAL;
6848 goto err;
6849 }
6850 }
6851 /* check if the binary log file is older than the purge_time
6852 if yes check if it is in use, if not in use then add
6853 it in the list of binary log files to be purged.
6854 */
6855 else if (stat_area.st_mtime < purge_time)
6856 {
6857 if ((no_of_threads_locking_log= log_in_use(log_info.log_file_name)))
6858 {
6859 if (!auto_purge)
6860 {
6861 log_is_in_use= true;
6862 strcpy(copy_log_in_use, log_info.log_file_name);
6863 }
6864 break;
6865 }
6866 strmake(to_log,
6867 log_info.log_file_name,
6868 sizeof(log_info.log_file_name) - 1);
6869 no_of_log_files_purged++;
6870 }
6871 else
6872 break;
6873 if (find_next_log(&log_info, false/*need_lock_index=false*/))
6874 break;
6875 }
6876
6877 if (log_is_active)
6878 {
6879 if(!auto_purge)
6880 push_warning_printf(thd, Sql_condition::SL_WARNING,
6881 ER_WARN_PURGE_LOG_IS_ACTIVE,
6882 ER(ER_WARN_PURGE_LOG_IS_ACTIVE),
6883 log_info.log_file_name);
6884
6885 }
6886
6887 if (log_is_in_use)
6888 {
6889 int no_of_log_files_to_purge= no_of_log_files_purged+1;
6890 while (strcmp(log_file_name, log_info.log_file_name))
6891 {
6892 if (mysql_file_stat(m_key_file_log, log_info.log_file_name,
6893 &stat_area, MYF(0)))
6894 {
6895 if (stat_area.st_mtime < purge_time)
6896 no_of_log_files_to_purge++;
6897 else
6898 break;
6899 }
6900 if (find_next_log(&log_info, false/*need_lock_index=false*/))
6901 {
6902 no_of_log_files_to_purge++;
6903 break;
6904 }
6905 }
6906
6907 push_warning_printf(thd, Sql_condition::SL_WARNING,
6908 ER_WARN_PURGE_LOG_IN_USE,
6909 ER(ER_WARN_PURGE_LOG_IN_USE),
6910 copy_log_in_use, no_of_threads_locking_log,
6911 no_of_log_files_purged, no_of_log_files_to_purge);
6912 }
6913
6914 error= (to_log[0] ? purge_logs(to_log, true,
6915 false/*need_lock_index=false*/,
6916 true/*need_update_threads=true*/,
6917 (ulonglong *) 0, auto_purge) : 0);
6918
6919 err:
6920 mysql_mutex_unlock(&LOCK_index);
6921 DBUG_RETURN(error);
6922 }
6923 #endif /* HAVE_REPLICATION */
6924
6925
6926 /**
6927 Create a new log file name.
6928
6929 @param buf buf of at least FN_REFLEN where new name is stored
6930
6931 @note
6932 If file name will be longer then FN_REFLEN it will be truncated
6933 */
6934
make_log_name(char * buf,const char * log_ident)6935 void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
6936 {
6937 size_t dir_len = dirname_length(log_file_name);
6938 if (dir_len >= FN_REFLEN)
6939 dir_len=FN_REFLEN-1;
6940 my_stpnmov(buf, log_file_name, dir_len);
6941 strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
6942 }
6943
6944
6945 /**
6946 Check if we are writing/reading to the given log file.
6947 */
6948
is_active(const char * log_file_name_arg)6949 bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
6950 {
6951 return !compare_log_name(log_file_name, log_file_name_arg);
6952 }
6953
6954
inc_prep_xids(THD * thd)6955 void MYSQL_BIN_LOG::inc_prep_xids(THD *thd)
6956 {
6957 DBUG_ENTER("MYSQL_BIN_LOG::inc_prep_xids");
6958 #ifndef NDEBUG
6959 int result= m_prep_xids.atomic_add(1);
6960 DBUG_PRINT("debug", ("m_prep_xids: %d", result + 1));
6961 #else
6962 (void) m_prep_xids.atomic_add(1);
6963 #endif
6964 thd->get_transaction()->m_flags.xid_written= true;
6965 DBUG_VOID_RETURN;
6966 }
6967
6968
dec_prep_xids(THD * thd)6969 void MYSQL_BIN_LOG::dec_prep_xids(THD *thd)
6970 {
6971 DBUG_ENTER("MYSQL_BIN_LOG::dec_prep_xids");
6972 int32 result= m_prep_xids.atomic_add(-1);
6973 DBUG_PRINT("debug", ("m_prep_xids: %d", result - 1));
6974 thd->get_transaction()->m_flags.xid_written= false;
6975 /* If the old value was 1, it is zero now. */
6976 if (result == 1)
6977 {
6978 mysql_mutex_lock(&LOCK_xids);
6979 mysql_cond_signal(&m_prep_xids_cond);
6980 mysql_mutex_unlock(&LOCK_xids);
6981 }
6982 DBUG_VOID_RETURN;
6983 }
6984
6985
6986 /*
6987 Wrappers around new_file_impl to avoid using argument
6988 to control locking. The argument 1) less readable 2) breaks
6989 incapsulation 3) allows external access to the class without
6990 a lock (which is not possible with private new_file_without_locking
6991 method).
6992
6993 @retval
6994 nonzero - error
6995
6996 */
6997
new_file(Format_description_log_event * extra_description_event)6998 int MYSQL_BIN_LOG::new_file(Format_description_log_event *extra_description_event)
6999 {
7000 return new_file_impl(true/*need_lock_log=true*/, extra_description_event);
7001 }
7002
7003 /*
7004 @retval
7005 nonzero - error
7006 */
new_file_without_locking(Format_description_log_event * extra_description_event)7007 int MYSQL_BIN_LOG::new_file_without_locking(Format_description_log_event *extra_description_event)
7008 {
7009 return new_file_impl(false/*need_lock_log=false*/, extra_description_event);
7010 }
7011
7012
7013 /**
7014 Start writing to a new log file or reopen the old file.
7015
7016 @param need_lock_log If true, this function acquires LOCK_log;
7017 otherwise the caller should already have acquired it.
7018
7019 @retval 0 success
7020 @retval nonzero - error
7021
7022 @note The new file name is stored last in the index file
7023 */
new_file_impl(bool need_lock_log,Format_description_log_event * extra_description_event)7024 int MYSQL_BIN_LOG::new_file_impl(bool need_lock_log, Format_description_log_event *extra_description_event)
7025 {
7026 int error= 0;
7027 bool close_on_error= false;
7028 char new_name[FN_REFLEN], *new_name_ptr= NULL, *old_name, *file_to_open;
7029
7030 DBUG_ENTER("MYSQL_BIN_LOG::new_file_impl");
7031 if (!is_open())
7032 {
7033 DBUG_PRINT("info",("log is closed"));
7034 DBUG_RETURN(error);
7035 }
7036
7037 if (need_lock_log)
7038 mysql_mutex_lock(&LOCK_log);
7039 else
7040 mysql_mutex_assert_owner(&LOCK_log);
7041 DBUG_EXECUTE_IF("semi_sync_3-way_deadlock",
7042 DEBUG_SYNC(current_thd, "before_rotate_binlog"););
7043 mysql_mutex_lock(&LOCK_xids);
7044 /*
7045 We need to ensure that the number of prepared XIDs are 0.
7046
7047 If m_prep_xids is not zero:
7048 - We wait for storage engine commit, hence decrease m_prep_xids
7049 - We keep the LOCK_log to block new transactions from being
7050 written to the binary log.
7051 */
7052 while (get_prep_xids() > 0)
7053 {
7054 DEBUG_SYNC(current_thd, "before_rotate_binlog_file");
7055 mysql_cond_wait(&m_prep_xids_cond, &LOCK_xids);
7056 }
7057 mysql_mutex_unlock(&LOCK_xids);
7058
7059 mysql_mutex_lock(&LOCK_index);
7060
7061 mysql_mutex_assert_owner(&LOCK_log);
7062 mysql_mutex_assert_owner(&LOCK_index);
7063
7064
7065 if (DBUG_EVALUATE_IF("expire_logs_always", 0, 1)
7066 && (error= ha_flush_logs(NULL)))
7067 goto end;
7068
7069 if (!is_relay_log)
7070 {
7071 /* Save set of GTIDs of the last binlog into table on binlog rotation */
7072 if ((error= gtid_state->save_gtids_of_last_binlog_into_table(true)))
7073 {
7074 close_on_error= true;
7075 goto end;
7076 }
7077 }
7078
7079 /*
7080 If user hasn't specified an extension, generate a new log name
7081 We have to do this here and not in open as we want to store the
7082 new file name in the current binary log file.
7083 */
7084 new_name_ptr= new_name;
7085 if ((error= generate_new_name(new_name, name)))
7086 {
7087 // Use the old name if generation of new name fails.
7088 strcpy(new_name, name);
7089 close_on_error= TRUE;
7090 goto end;
7091 }
7092 /*
7093 Make sure that the log_file is initialized before writing
7094 Rotate_log_event into it.
7095 */
7096 if (log_file.alloced_buffer)
7097 {
7098 /*
7099 We log the whole file name for log file as the user may decide
7100 to change base names at some point.
7101 */
7102 Rotate_log_event r(new_name+dirname_length(new_name), 0, LOG_EVENT_OFFSET,
7103 is_relay_log ? Rotate_log_event::RELAY_LOG : 0);
7104 /*
7105 The current relay-log's closing Rotate event must have checksum
7106 value computed with an algorithm of the last relay-logged FD event.
7107 */
7108 if (is_relay_log)
7109 (r.common_footer)->checksum_alg= relay_log_checksum_alg;
7110 assert(!is_relay_log || relay_log_checksum_alg !=
7111 binary_log::BINLOG_CHECKSUM_ALG_UNDEF);
7112 if(DBUG_EVALUATE_IF("fault_injection_new_file_rotate_event",
7113 (error=1), FALSE) ||
7114 (error= r.write(&log_file)))
7115 {
7116 char errbuf[MYSYS_STRERROR_SIZE];
7117 DBUG_EXECUTE_IF("fault_injection_new_file_rotate_event", errno=2;);
7118 close_on_error= true;
7119 my_printf_error(ER_ERROR_ON_WRITE, ER(ER_CANT_OPEN_FILE),
7120 MYF(ME_FATALERROR), name,
7121 errno, my_strerror(errbuf, sizeof(errbuf), errno));
7122 goto end;
7123 }
7124 bytes_written += r.common_header->data_written;
7125 }
7126
7127 if ((error= flush_io_cache(&log_file)))
7128 {
7129 close_on_error= true;
7130 goto end;
7131 }
7132
7133 DEBUG_SYNC(current_thd, "after_rotate_event_appended");
7134
7135 old_name=name;
7136 name=0; // Don't free name
7137 close(LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX,
7138 false/*need_lock_log=false*/,
7139 false/*need_lock_index=false*/);
7140
7141 if (checksum_alg_reset != binary_log::BINLOG_CHECKSUM_ALG_UNDEF)
7142 {
7143 assert(!is_relay_log);
7144 assert(binlog_checksum_options != checksum_alg_reset);
7145 binlog_checksum_options= checksum_alg_reset;
7146 }
7147 /*
7148 Note that at this point, log_state != LOG_CLOSED (important for is_open()).
7149 */
7150
7151 DEBUG_SYNC(current_thd, "before_rotate_binlog_file");
7152 /*
7153 new_file() is only used for rotation (in FLUSH LOGS or because size >
7154 max_binlog_size or max_relay_log_size).
7155 If this is a binary log, the Format_description_log_event at the beginning of
7156 the new file should have created=0 (to distinguish with the
7157 Format_description_log_event written at server startup, which should
7158 trigger temp tables deletion on slaves.
7159 */
7160
7161 /* reopen index binlog file, BUG#34582 */
7162 file_to_open= index_file_name;
7163 error= open_index_file(index_file_name, 0, false/*need_lock_index=false*/);
7164 if (!error)
7165 {
7166 /* reopen the binary log file. */
7167 file_to_open= new_name_ptr;
7168 error= open_binlog(old_name, new_name_ptr,
7169 max_size, true/*null_created_arg=true*/,
7170 false/*need_lock_index=false*/,
7171 true/*need_sid_lock=true*/,
7172 extra_description_event);
7173 }
7174
7175 /* handle reopening errors */
7176 if (error)
7177 {
7178 char errbuf[MYSYS_STRERROR_SIZE];
7179 my_printf_error(ER_CANT_OPEN_FILE, ER(ER_CANT_OPEN_FILE),
7180 MYF(ME_FATALERROR), file_to_open,
7181 error, my_strerror(errbuf, sizeof(errbuf), error));
7182 close_on_error= true;
7183 }
7184 my_free(old_name);
7185
7186 end:
7187
7188 if (error && close_on_error /* rotate, flush or reopen failed */)
7189 {
7190 /*
7191 Close whatever was left opened.
7192
7193 We are keeping the behavior as it exists today, ie,
7194 we disable logging and move on (see: BUG#51014).
7195
7196 TODO: as part of WL#1790 consider other approaches:
7197 - kill mysql (safety);
7198 - try multiple locations for opening a log file;
7199 - switch server to protected/readonly mode
7200 - ...
7201 */
7202 if (binlog_error_action == ABORT_SERVER)
7203 {
7204 exec_binlog_error_action_abort("Either disk is full or file system is"
7205 " read only while rotating the binlog."
7206 " Aborting the server.");
7207 }
7208 else
7209 sql_print_error("Could not open %s for logging (error %d). "
7210 "Turning logging off for the whole duration "
7211 "of the MySQL server process. To turn it on "
7212 "again: fix the cause, shutdown the MySQL "
7213 "server and restart it.",
7214 new_name_ptr, errno);
7215 close(LOG_CLOSE_INDEX, false /*need_lock_log=false*/,
7216 false/*need_lock_index=false*/);
7217 }
7218
7219 mysql_mutex_unlock(&LOCK_index);
7220 if (need_lock_log)
7221 mysql_mutex_unlock(&LOCK_log);
7222 DEBUG_SYNC(current_thd, "after_disable_binlog");
7223 DBUG_RETURN(error);
7224 }
7225
7226
7227 #ifdef HAVE_REPLICATION
7228 /**
7229 Called after an event has been written to the relay log by the IO
7230 thread. This flushes and possibly syncs the file (according to the
7231 sync options), rotates the file if it has grown over the limit, and
7232 finally calls signal_update().
7233
7234 @note The caller must hold LOCK_log before invoking this function.
7235
7236 @param mi Master_info for the IO thread.
7237 @param need_data_lock If true, mi->data_lock will be acquired if a
7238 rotation is needed. Otherwise, mi->data_lock must be held by the
7239 caller.
7240
7241 @retval false success
7242 @retval true error
7243 */
after_append_to_relay_log(Master_info * mi)7244 bool MYSQL_BIN_LOG::after_append_to_relay_log(Master_info *mi)
7245 {
7246 DBUG_ENTER("MYSQL_BIN_LOG::after_append_to_relay_log");
7247 DBUG_PRINT("info",("max_size: %lu",max_size));
7248
7249 // Check pre-conditions
7250 mysql_mutex_assert_owner(&LOCK_log);
7251 mysql_mutex_assert_owner(&mi->data_lock);
7252 assert(is_relay_log);
7253 assert(current_thd->system_thread == SYSTEM_THREAD_SLAVE_IO);
7254
7255 /*
7256 We allow the relay log rotation by relay log size
7257 only if the trx parser is not inside a transaction.
7258 */
7259 bool can_rotate= mi->transaction_parser.is_not_inside_transaction();
7260
7261 #ifndef NDEBUG
7262 if ((uint) my_b_append_tell(&log_file) >
7263 DBUG_EVALUATE_IF("rotate_slave_debug_group", 500, max_size) &&
7264 !can_rotate)
7265 {
7266 DBUG_PRINT("info",("Postponing the rotation by size waiting for "
7267 "the end of the current transaction."));
7268 }
7269 #endif
7270
7271 // Flush and sync
7272 bool error= false;
7273 if (flush_and_sync(0) == 0 && can_rotate)
7274 {
7275 /*
7276 If the last event of the transaction has been flushed, we can add
7277 the GTID (if it is not empty) to the logged set, or else it will
7278 not be available in the Previous GTIDs of the next relay log file
7279 if we are going to rotate the relay log.
7280 */
7281 Gtid *last_gtid_queued= mi->get_last_gtid_queued();
7282 if (!last_gtid_queued->is_empty())
7283 {
7284 global_sid_lock->rdlock();
7285 mi->rli->add_logged_gtid(last_gtid_queued->sidno,
7286 last_gtid_queued->gno);
7287 global_sid_lock->unlock();
7288 mi->clear_last_gtid_queued();
7289 }
7290
7291 /*
7292 If relay log is too big, rotate. But only if not in the middle of a
7293 transaction when GTIDs are enabled.
7294 We now try to mimic the following master binlog behavior: "A transaction
7295 is written in one chunk to the binary log, so it is never split between
7296 several binary logs. Therefore, if you have big transactions, you might
7297 see binary log files larger than max_binlog_size."
7298 */
7299 if ((uint) my_b_append_tell(&log_file) >
7300 DBUG_EVALUATE_IF("rotate_slave_debug_group", 500, max_size))
7301 {
7302 error= new_file_without_locking(mi->get_mi_description_event());
7303 }
7304 }
7305
7306 signal_update();
7307
7308 DBUG_RETURN(error);
7309 }
7310
7311
append_event(Log_event * ev,Master_info * mi)7312 bool MYSQL_BIN_LOG::append_event(Log_event* ev, Master_info *mi)
7313 {
7314 DBUG_ENTER("MYSQL_BIN_LOG::append");
7315
7316 // check preconditions
7317 assert(log_file.type == SEQ_READ_APPEND);
7318 assert(is_relay_log);
7319
7320 // acquire locks
7321 mysql_mutex_lock(&LOCK_log);
7322
7323 // write data
7324 bool error = false;
7325 if (ev->write(&log_file) == 0)
7326 {
7327 bytes_written+= ev->common_header->data_written;
7328 error= after_append_to_relay_log(mi);
7329 }
7330 else
7331 error= true;
7332
7333 mysql_mutex_unlock(&LOCK_log);
7334 DBUG_RETURN(error);
7335 }
7336
7337
append_buffer(const char * buf,uint len,Master_info * mi)7338 bool MYSQL_BIN_LOG::append_buffer(const char* buf, uint len, Master_info *mi)
7339 {
7340 DBUG_ENTER("MYSQL_BIN_LOG::append_buffer");
7341
7342 // check preconditions
7343 assert(log_file.type == SEQ_READ_APPEND);
7344 assert(is_relay_log);
7345 mysql_mutex_assert_owner(&LOCK_log);
7346
7347 // write data
7348 bool error= false;
7349 if (my_b_append(&log_file,(uchar*) buf,len) == 0)
7350 {
7351 bytes_written += len;
7352 error= after_append_to_relay_log(mi);
7353 }
7354 else
7355 error= true;
7356
7357 DBUG_RETURN(error);
7358 }
7359 #endif // ifdef HAVE_REPLICATION
7360
flush_and_sync(const bool force)7361 bool MYSQL_BIN_LOG::flush_and_sync(const bool force)
7362 {
7363 mysql_mutex_assert_owner(&LOCK_log);
7364
7365 if (flush_io_cache(&log_file))
7366 return 1;
7367
7368 std::pair<bool, bool> result= sync_binlog_file(force);
7369
7370 return result.first;
7371 }
7372
start_union_events(THD * thd,query_id_t query_id_param)7373 void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param)
7374 {
7375 assert(!thd->binlog_evt_union.do_union);
7376 thd->binlog_evt_union.do_union= TRUE;
7377 thd->binlog_evt_union.unioned_events= FALSE;
7378 thd->binlog_evt_union.unioned_events_trans= FALSE;
7379 thd->binlog_evt_union.first_query_id= query_id_param;
7380 }
7381
stop_union_events(THD * thd)7382 void MYSQL_BIN_LOG::stop_union_events(THD *thd)
7383 {
7384 assert(thd->binlog_evt_union.do_union);
7385 thd->binlog_evt_union.do_union= FALSE;
7386 }
7387
is_query_in_union(THD * thd,query_id_t query_id_param)7388 bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
7389 {
7390 return (thd->binlog_evt_union.do_union &&
7391 query_id_param >= thd->binlog_evt_union.first_query_id);
7392 }
7393
7394 /*
7395 Updates thd's position-of-next-event variables
7396 after a *real* write a file.
7397 */
update_thd_next_event_pos(THD * thd)7398 void MYSQL_BIN_LOG::update_thd_next_event_pos(THD* thd)
7399 {
7400 if (likely(thd != NULL))
7401 {
7402 thd->set_next_event_pos(log_file_name,
7403 my_b_tell(&log_file));
7404 }
7405 }
7406
7407 /*
7408 Moves the last bunch of rows from the pending Rows event to a cache (either
7409 transactional cache if is_transaction is @c true, or the non-transactional
7410 cache otherwise. Sets a new pending event.
7411
7412 @param thd a pointer to the user thread.
7413 @param evt a pointer to the row event.
7414 @param is_transactional @c true indicates a transactional cache,
7415 otherwise @c false a non-transactional.
7416 */
7417 int
flush_and_set_pending_rows_event(THD * thd,Rows_log_event * event,bool is_transactional)7418 MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
7419 Rows_log_event* event,
7420 bool is_transactional)
7421 {
7422 DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)");
7423 #ifdef WITH_WSREP
7424 assert(WSREP_EMULATE_BINLOG(thd) || mysql_bin_log.is_open());
7425 #else
7426 assert(mysql_bin_log.is_open());
7427 #endif /* WITH_WSREP */
7428 DBUG_PRINT("enter", ("event: 0x%lx", (long) event));
7429
7430 int error= 0;
7431 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
7432
7433 assert(cache_mngr);
7434
7435 binlog_cache_data *cache_data=
7436 cache_mngr->get_binlog_cache_data(is_transactional);
7437
7438 DBUG_PRINT("info", ("cache_mngr->pending(): 0x%lx", (long) cache_data->pending()));
7439
7440 if (Rows_log_event* pending= cache_data->pending())
7441 {
7442 /*
7443 Write pending event to the cache.
7444 */
7445 if (cache_data->write_event(thd, pending))
7446 {
7447 set_write_error(thd, is_transactional);
7448 if (check_write_error(thd) && cache_data &&
7449 stmt_cannot_safely_rollback(thd))
7450 cache_data->set_incident();
7451 delete pending;
7452 cache_data->set_pending(NULL);
7453 DBUG_RETURN(1);
7454 }
7455
7456 delete pending;
7457 }
7458
7459 cache_data->set_pending(event);
7460
7461 DBUG_RETURN(error);
7462 }
7463
7464 /**
7465 Write an event to the binary log.
7466 */
7467
write_event(Log_event * event_info)7468 bool MYSQL_BIN_LOG::write_event(Log_event *event_info)
7469 {
7470 THD *thd= event_info->thd;
7471 bool error= 1;
7472 DBUG_ENTER("MYSQL_BIN_LOG::write_event(Log_event *)");
7473
7474 if (thd->binlog_evt_union.do_union)
7475 {
7476 /*
7477 In Stored function; Remember that function call caused an update.
7478 We will log the function call to the binary log on function exit
7479 */
7480 thd->binlog_evt_union.unioned_events= TRUE;
7481 thd->binlog_evt_union.unioned_events_trans |=
7482 event_info->is_using_trans_cache();
7483 DBUG_RETURN(0);
7484 }
7485
7486 /*
7487 We only end the statement if we are in a top-level statement. If
7488 we are inside a stored function, we do not end the statement since
7489 this will close all tables on the slave. But there can be a special case
7490 where we are inside a stored function/trigger and a SAVEPOINT is being
7491 set in side the stored function/trigger. This SAVEPOINT execution will
7492 force the pending event to be flushed without an STMT_END_F flag. This
7493 will result in a case where following DMLs will be considered as part of
7494 same statement and result in data loss on slave. Hence in this case we
7495 force the end_stmt to be true.
7496 */
7497 bool const end_stmt= (thd->in_sub_stmt && thd->lex->sql_command ==
7498 SQLCOM_SAVEPOINT)? true:
7499 (thd->locked_tables_mode && thd->lex->requires_prelocking());
7500 if (thd->binlog_flush_pending_rows_event(end_stmt,
7501 event_info->is_using_trans_cache()))
7502 DBUG_RETURN(error);
7503
7504 /*
7505 In most cases this is only called if 'is_open()' is true; in fact this is
7506 mostly called if is_open() *was* true a few instructions before, but it
7507 could have changed since.
7508 */
7509 #ifdef WITH_WSREP
7510 /* applier and replayer can skip writing binlog events */
7511 if ((WSREP_EMULATE_BINLOG(thd) && (thd->wsrep_exec_mode != REPL_RECV)) ||
7512 is_open())
7513 #else
7514 if (likely(is_open()))
7515 #endif
7516 {
7517 #ifdef HAVE_REPLICATION
7518 /*
7519 In the future we need to add to the following if tests like
7520 "do the involved tables match (to be implemented)
7521 binlog_[wild_]{do|ignore}_table?" (WL#1049)"
7522 */
7523 const char *local_db= event_info->get_db();
7524 if ((thd && !(thd->variables.option_bits & OPTION_BIN_LOG)) ||
7525 (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT &&
7526 thd->lex->sql_command != SQLCOM_SAVEPOINT &&
7527 (!event_info->is_no_filter_event() &&
7528 !binlog_filter->db_ok(local_db))))
7529 DBUG_RETURN(0);
7530 #endif /* HAVE_REPLICATION */
7531
7532 assert(event_info->is_using_trans_cache() || event_info->is_using_stmt_cache());
7533
7534 if (binlog_start_trans_and_stmt(thd, event_info))
7535 DBUG_RETURN(error);
7536
7537 bool is_trans_cache= event_info->is_using_trans_cache();
7538 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
7539 binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(is_trans_cache);
7540
7541 DBUG_PRINT("info",("event type: %d",event_info->get_type_code()));
7542
7543 /*
7544 No check for auto events flag here - this write method should
7545 never be called if auto-events are enabled.
7546
7547 Write first log events which describe the 'run environment'
7548 of the SQL command. If row-based binlogging, Insert_id, Rand
7549 and other kind of "setting context" events are not needed.
7550 */
7551 if (thd)
7552 {
7553 if (!thd->is_current_stmt_binlog_format_row())
7554 {
7555 if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
7556 {
7557 Intvar_log_event e(thd,(uchar) binary_log::Intvar_event::LAST_INSERT_ID_EVENT,
7558 thd->first_successful_insert_id_in_prev_stmt_for_binlog,
7559 event_info->event_cache_type, event_info->event_logging_type);
7560 if (cache_data->write_event(thd, &e))
7561 goto err;
7562 }
7563 if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
7564 {
7565 DBUG_PRINT("info",("number of auto_inc intervals: %u",
7566 thd->auto_inc_intervals_in_cur_stmt_for_binlog.
7567 nb_elements()));
7568 Intvar_log_event e(thd, (uchar) binary_log::Intvar_event::INSERT_ID_EVENT,
7569 thd->auto_inc_intervals_in_cur_stmt_for_binlog.
7570 minimum(), event_info->event_cache_type,
7571 event_info->event_logging_type);
7572 if (cache_data->write_event(thd, &e))
7573 goto err;
7574 }
7575 if (thd->rand_used)
7576 {
7577 Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2,
7578 event_info->event_cache_type,
7579 event_info->event_logging_type);
7580 if (cache_data->write_event(thd, &e))
7581 goto err;
7582 }
7583 if (!thd->user_var_events.empty())
7584 {
7585 for (size_t i= 0; i < thd->user_var_events.size(); i++)
7586 {
7587 BINLOG_USER_VAR_EVENT *user_var_event= thd->user_var_events[i];
7588
7589 /* setting flags for user var log event */
7590 uchar flags= User_var_log_event::UNDEF_F;
7591 if (user_var_event->unsigned_flag)
7592 flags|= User_var_log_event::UNSIGNED_F;
7593
7594 User_var_log_event e(thd,
7595 user_var_event->user_var_event->entry_name.ptr(),
7596 user_var_event->user_var_event->entry_name.length(),
7597 user_var_event->value,
7598 user_var_event->length,
7599 user_var_event->type,
7600 user_var_event->charset_number, flags,
7601 event_info->event_cache_type,
7602 event_info->event_logging_type);
7603 if (cache_data->write_event(thd, &e))
7604 goto err;
7605 }
7606 }
7607 }
7608 }
7609
7610 /*
7611 Write the event.
7612 */
7613 if (cache_data->write_event(thd, event_info))
7614 goto err;
7615
7616 if (DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
7617 goto err;
7618
7619 /*
7620 After writing the event, if the trx-cache was used and any unsafe
7621 change was written into it, the cache is marked as cannot safely
7622 roll back.
7623 */
7624 if (is_trans_cache && stmt_cannot_safely_rollback(thd))
7625 cache_mngr->trx_cache.set_cannot_rollback();
7626
7627 error= 0;
7628
7629 err:
7630 if (error)
7631 {
7632 set_write_error(thd, is_trans_cache);
7633 if (check_write_error(thd) && cache_data &&
7634 stmt_cannot_safely_rollback(thd))
7635 cache_data->set_incident();
7636 }
7637 }
7638
7639 DBUG_RETURN(error);
7640 }
7641
7642 /**
7643 The method executes rotation when LOCK_log is already acquired
7644 by the caller.
7645
7646 @param force_rotate caller can request the log rotation
7647 @param check_purge is set to true if rotation took place
7648
7649 @note
7650 If rotation fails, for instance the server was unable
7651 to create a new log file, we still try to write an
7652 incident event to the current log.
7653
7654 @note The caller must hold LOCK_log when invoking this function.
7655
7656 @retval
7657 nonzero - error in rotating routine.
7658 */
rotate(bool force_rotate,bool * check_purge)7659 int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge)
7660 {
7661 int error= 0;
7662 DBUG_ENTER("MYSQL_BIN_LOG::rotate");
7663 #ifdef WITH_WSREP
7664 if (WSREP_ON && wsrep_to_isolation)
7665 {
7666 *check_purge= false;
7667 WSREP_DEBUG("avoiding binlog rotate due to TO isolation: %d",
7668 wsrep_to_isolation);
7669 DBUG_RETURN(0);
7670 }
7671 #endif
7672
7673 assert(!is_relay_log);
7674 mysql_mutex_assert_owner(&LOCK_log);
7675
7676 DEBUG_SYNC(current_thd,"stop_binlog_rotation_after_acquiring_lock_log");
7677
7678 *check_purge= false;
7679
7680 if (DBUG_EVALUATE_IF("force_rotate", 1, 0) || force_rotate ||
7681 (my_b_tell(&log_file) >= (my_off_t) max_size))
7682 {
7683 error= new_file_without_locking(NULL);
7684 *check_purge= true;
7685 }
7686 DBUG_RETURN(error);
7687 }
7688
7689 /**
7690 The method executes logs purging routine.
7691
7692 @retval
7693 nonzero - error in rotating routine.
7694 */
purge()7695 void MYSQL_BIN_LOG::purge()
7696 {
7697 #ifdef HAVE_REPLICATION
7698 if (expire_logs_days)
7699 {
7700 DEBUG_SYNC(current_thd, "at_purge_logs_before_date");
7701 time_t purge_time= my_time(0) - expire_logs_days*24*60*60;
7702 DBUG_EXECUTE_IF("expire_logs_always",
7703 { purge_time= my_time(0);});
7704 if (purge_time >= 0)
7705 {
7706 /*
7707 Flush logs for storage engines, so that the last transaction
7708 is fsynced inside storage engines.
7709 */
7710 ha_flush_logs(NULL);
7711 purge_logs_before_date(purge_time, true);
7712 }
7713 }
7714 #endif
7715 }
7716
7717 /**
7718 Execute a FLUSH LOGS statement.
7719
7720 The method is a shortcut of @c rotate() and @c purge().
7721 LOCK_log is acquired prior to rotate and is released after it.
7722
7723 @param force_rotate caller can request the log rotation
7724
7725 @retval
7726 nonzero - error in rotating routine.
7727 */
rotate_and_purge(THD * thd,bool force_rotate)7728 int MYSQL_BIN_LOG::rotate_and_purge(THD* thd, bool force_rotate)
7729 {
7730 int error= 0;
7731 DBUG_ENTER("MYSQL_BIN_LOG::rotate_and_purge");
7732 bool check_purge= false;
7733
7734 /*
7735 FLUSH BINARY LOGS command should ignore 'read-only' and 'super_read_only'
7736 options so that it can update 'mysql.gtid_executed' replication repository
7737 table.
7738 */
7739 thd->set_skip_readonly_check();
7740 /*
7741 Wait for handlerton to insert any pending information into the binlog.
7742 For e.g. ha_ndbcluster which updates the binlog asynchronously this is
7743 needed so that the user see its own commands in the binlog.
7744 */
7745 ha_binlog_wait(thd);
7746
7747 assert(!is_relay_log);
7748 mysql_mutex_lock(&LOCK_log);
7749 error= rotate(force_rotate, &check_purge);
7750 /*
7751 NOTE: Run purge_logs wo/ holding LOCK_log because it does not need
7752 the mutex. Otherwise causes various deadlocks.
7753 */
7754 mysql_mutex_unlock(&LOCK_log);
7755
7756 if (!error && check_purge)
7757 purge();
7758
7759 DBUG_RETURN(error);
7760 }
7761
next_file_id()7762 uint MYSQL_BIN_LOG::next_file_id()
7763 {
7764 uint res;
7765 mysql_mutex_lock(&LOCK_log);
7766 res = file_id++;
7767 mysql_mutex_unlock(&LOCK_log);
7768 return res;
7769 }
7770
7771
get_gtid_executed(Sid_map * sid_map,Gtid_set * gtid_set)7772 int MYSQL_BIN_LOG::get_gtid_executed(Sid_map *sid_map, Gtid_set *gtid_set)
7773 {
7774 DBUG_ENTER("MYSQL_BIN_LOG::get_gtid_executed");
7775 int error= 0;
7776
7777 mysql_mutex_lock(&mysql_bin_log.LOCK_commit);
7778 global_sid_lock->wrlock();
7779
7780 enum_return_status return_status= global_sid_map->copy(sid_map);
7781 if (return_status != RETURN_STATUS_OK)
7782 {
7783 error= 1;
7784 goto end;
7785 }
7786
7787 return_status= gtid_set->add_gtid_set(gtid_state->get_executed_gtids());
7788 if (return_status != RETURN_STATUS_OK)
7789 error= 1;
7790
7791 end:
7792 global_sid_lock->unlock();
7793 mysql_mutex_unlock(&mysql_bin_log.LOCK_commit);
7794
7795 DBUG_RETURN(error);
7796 }
7797
7798
7799 /**
7800 Auxiliary function to read a page from the cache and set the given
7801 buffer pointer to point to the beginning of the page and the given
7802 length pointer to point to the end of it.
7803
7804 @param cache IO_CACHE to read from
7805 @param[OUT] buf_p Will be set to point to the beginning of the page.
7806 @param[OUT] buf_len_p Will be set to the length of the buffer.
7807
7808 @retval false Success
7809 @retval true Error reading from the cache.
7810 */
read_cache_page(IO_CACHE * cache,uchar ** buf_p,uint32 * buf_len_p)7811 static bool read_cache_page(IO_CACHE *cache, uchar **buf_p, uint32 *buf_len_p)
7812 {
7813 assert(*buf_len_p == 0);
7814 cache->read_pos= cache->read_end;
7815 *buf_len_p= my_b_fill(cache);
7816 *buf_p= cache->read_pos;
7817 return cache->error ? true : false;
7818 }
7819
7820
7821 /**
7822 Write the contents of the given IO_CACHE to the binary log.
7823
7824 The cache will be reset as a READ_CACHE to be able to read the
7825 contents from it.
7826
7827 The data will be post-processed: see class Binlog_event_writer for
7828 details.
7829
7830 @param cache Events will be read from this IO_CACHE.
7831 @param writer Events will be written to this Binlog_event_writer.
7832
7833 @retval true IO error.
7834 @retval false Success.
7835
7836 @see MYSQL_BIN_LOG::write_cache
7837 */
do_write_cache(IO_CACHE * cache,Binlog_event_writer * writer)7838 bool MYSQL_BIN_LOG::do_write_cache(IO_CACHE *cache, Binlog_event_writer *writer)
7839 {
7840 DBUG_ENTER("MYSQL_BIN_LOG::do_write_cache");
7841
7842 DBUG_EXECUTE_IF("simulate_do_write_cache_failure",
7843 {
7844 /*
7845 see binlog_cache_data::write_event() that reacts on
7846 @c simulate_disk_full_at_flush_pending.
7847 */
7848 DBUG_SET("-d,simulate_do_write_cache_failure");
7849 DBUG_RETURN(true);
7850 });
7851
7852 #ifndef NDEBUG
7853 uint64 expected_total_len= my_b_tell(cache);
7854 #endif
7855
7856 DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
7857 {
7858 DBUG_SET("+d,simulate_file_write_error");
7859 });
7860
7861 if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
7862 {
7863 DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
7864 {
7865 DBUG_SET("-d,simulate_file_write_error");
7866 });
7867 DBUG_RETURN(true);
7868 }
7869
7870 uchar *buf= cache->read_pos;
7871 uint32 buf_len= my_b_bytes_in_cache(cache);
7872 uint32 event_len= 0;
7873 uchar header[LOG_EVENT_HEADER_LEN];
7874 uint32 header_len= 0;
7875
7876 /*
7877 Each iteration of this loop processes all or a part of
7878 1) an event header or 2) an event body from the IO_CACHE.
7879 */
7880 while (true)
7881 {
7882 /**
7883 Nothing in cache: try to refill, and if cache was ended here,
7884 return success. This code is needed even on the first iteration
7885 of the loop, because reinit_io_cache may or may not fill the
7886 first page.
7887 */
7888 if (buf_len == 0)
7889 {
7890 if (read_cache_page(cache, &buf, &buf_len))
7891 {
7892 /**
7893 @todo: this can happen in case of disk corruption in the
7894 IO_CACHE. We may have written a half transaction (even half
7895 event) to the binlog. We should rollback the transaction
7896 and truncate the binlog. /Sven
7897 */
7898 assert(0);
7899 }
7900 if (buf_len == 0)
7901 {
7902 /**
7903 @todo: this can happen in case of disk corruption in the
7904 IO_CACHE. We may have written a half transaction (even half
7905 event) to the binlog. We should rollback the transaction
7906 and truncate the binlog. /Sven
7907 */
7908 assert(my_b_tell(cache) == expected_total_len);
7909 /* Arrive the end of the cache */
7910 DBUG_RETURN(false);
7911 }
7912 }
7913
7914 /* Write event header into binlog */
7915 if (event_len == 0)
7916 {
7917 /* data in the buf may be smaller than header size.*/
7918 uint32 header_incr =
7919 std::min<uint32>(LOG_EVENT_HEADER_LEN - header_len, buf_len);
7920
7921 memcpy(header + header_len, buf, header_incr);
7922 header_len += header_incr;
7923 buf += header_incr;
7924 buf_len -= header_incr;
7925
7926 if (header_len == LOG_EVENT_HEADER_LEN)
7927 {
7928 // Flush event header.
7929 uchar *header_p= header;
7930 if (writer->write_event_part(&header_p, &header_len, &event_len))
7931 DBUG_RETURN(true);
7932 assert(header_len == 0);
7933 }
7934 }
7935 else
7936 {
7937 /* Write all or part of the event body to binlog */
7938 if (writer->write_event_part(&buf, &buf_len, &event_len))
7939 DBUG_RETURN(true);
7940 }
7941 }
7942 }
7943
7944 /**
7945 Writes an incident event to stmt_cache.
7946
7947 @param ev Incident event to be written
7948 @param thd Thread variable
7949 @param need_lock_log If true, will acquire LOCK_log; otherwise the
7950 caller should already have acquired LOCK_log.
7951 @param err_msg Error message written to log file for the incident.
7952 @do_flush_and_sync If true, will call flush_and_sync(), rotate() and
7953 purge().
7954
7955 @retval false error
7956 @retval true success
7957 */
write_incident(Incident_log_event * ev,THD * thd,bool need_lock_log,const char * err_msg,bool do_flush_and_sync)7958 bool MYSQL_BIN_LOG::write_incident(Incident_log_event *ev, THD *thd,
7959 bool need_lock_log, const char* err_msg,
7960 bool do_flush_and_sync)
7961 {
7962 uint error= 0;
7963 DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
7964 assert(err_msg);
7965
7966 if (!is_open())
7967 DBUG_RETURN(error);
7968
7969 // @todo make this work with the group log. /sven
7970 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
7971
7972 /*
7973 thd->cache_mngr may be uninitialized when first transaction resulted in an
7974 incident. If there is no cache manager exists for the session, then we
7975 create one, so that a GTID is generated and is written prior to flushing
7976 the stmt_cache.
7977 */
7978 if (cache_mngr == NULL)
7979 {
7980 if (thd->binlog_setup_trx_data() ||
7981 DBUG_EVALUATE_IF("simulate_cache_creation_failure", 1, 0))
7982 {
7983 enum_gtid_mode gtid_mode= get_gtid_mode(GTID_MODE_LOCK_NONE);
7984 if (gtid_mode == GTID_MODE_ON || gtid_mode == GTID_MODE_ON_PERMISSIVE)
7985 {
7986 const char *mode= gtid_mode == GTID_MODE_ON ? "ON" : "ON_PERMISSIVE";
7987 std::ostringstream message;
7988
7989 message << "Could not create IO cache while writing an incident event "
7990 "to the binary log for query: '"<< thd->query().str <<
7991 "'. Since GTID_MODE= " << mode <<", server is unable "
7992 "to proceed with logging.";
7993 handle_binlog_flush_or_sync_error(thd, true, message.str().c_str());
7994 DBUG_RETURN(true);
7995 }
7996 }
7997 else
7998 cache_mngr= thd_get_cache_mngr(thd);
7999 }
8000
8001 #ifndef NDEBUG
8002 if (DBUG_EVALUATE_IF("simulate_write_incident_event_into_binlog_directly",
8003 1, 0) && !cache_mngr->stmt_cache.is_binlog_empty())
8004 {
8005 /* The stmt_cache contains corruption data, so we can reset it. */
8006 cache_mngr->stmt_cache.reset();
8007 }
8008 #endif
8009
8010 /*
8011 If there is no binlog cache then we write incidents directly
8012 into the binlog. If caller needs GTIDs it has to setup the
8013 binlog cache (for the injector thread).
8014 */
8015 if (cache_mngr == NULL ||
8016 DBUG_EVALUATE_IF("simulate_write_incident_event_into_binlog_directly",
8017 1, 0))
8018 {
8019 if (need_lock_log)
8020 mysql_mutex_lock(&LOCK_log);
8021 else
8022 mysql_mutex_assert_owner(&LOCK_log);
8023 /* Write an incident event into binlog directly. */
8024 error= ev->write(&log_file);
8025 /*
8026 Write an error to log. So that user might have a chance
8027 to be alerted and explore incident details.
8028 */
8029 if (!error)
8030 sql_print_error("%s An incident event has been written to the binary "
8031 "log which will stop the slaves.", err_msg);
8032 }
8033 else // (cache_mngr != NULL)
8034 {
8035 if (!cache_mngr->stmt_cache.is_binlog_empty())
8036 {
8037 /* The stmt_cache contains corruption data, so we can reset it. */
8038 cache_mngr->stmt_cache.reset();
8039 }
8040 if (!cache_mngr->trx_cache.is_binlog_empty())
8041 {
8042 /* The trx_cache contains corruption data, so we can reset it. */
8043 cache_mngr->trx_cache.reset();
8044 }
8045 /*
8046 Write the incident event into stmt_cache, so that a GTID is generated and
8047 written for it prior to flushing the stmt_cache.
8048 */
8049 binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(false);
8050 if ((error= cache_data->write_event(thd, ev)))
8051 {
8052 sql_print_error("Failed to write an incident event into stmt_cache.");
8053 cache_mngr->stmt_cache.reset();
8054 DBUG_RETURN(error);
8055 }
8056
8057 if (need_lock_log)
8058 mysql_mutex_lock(&LOCK_log);
8059 else
8060 mysql_mutex_assert_owner(&LOCK_log);
8061 }
8062
8063 if (do_flush_and_sync)
8064 {
8065 if (!error && !(error= flush_and_sync()))
8066 {
8067 bool check_purge= false;
8068 update_binlog_end_pos();
8069 is_rotating_caused_by_incident= true;
8070 error= rotate(true, &check_purge);
8071 is_rotating_caused_by_incident= false;
8072 if (!error && check_purge)
8073 purge();
8074 }
8075 }
8076
8077 if (need_lock_log)
8078 mysql_mutex_unlock(&LOCK_log);
8079
8080 /*
8081 Write an error to log. So that user might have a chance
8082 to be alerted and explore incident details.
8083 */
8084 if (!error && cache_mngr != NULL)
8085 sql_print_error("%s An incident event has been written to the binary "
8086 "log which will stop the slaves.", err_msg);
8087
8088 DBUG_RETURN(error);
8089 }
8090
write_stmt_directly(THD * thd,const char * stmt,size_t stmt_len,enum_sql_command sql_command)8091 bool MYSQL_BIN_LOG::write_stmt_directly(THD* thd, const char *stmt, size_t stmt_len,
8092 enum_sql_command sql_command)
8093 {
8094 bool ret= false;
8095 /* backup the original command */
8096 enum_sql_command save_sql_command= thd->lex->sql_command;
8097 thd->lex->sql_command= sql_command;
8098
8099 if (thd->binlog_query(THD::STMT_QUERY_TYPE, stmt, stmt_len,
8100 FALSE, FALSE, FALSE, 0) ||
8101 commit(thd, false) != TC_LOG::RESULT_SUCCESS)
8102 {
8103 ret= true;
8104 }
8105
8106 thd->lex->sql_command= save_sql_command;
8107 return ret;
8108 }
8109
8110
8111 /**
8112 Creates an incident event and writes it to the binary log.
8113
8114 @param thd Thread variable
8115 @param ev Incident event to be written
8116 @param err_msg Error message written to log file for the incident.
8117 @param lock If the binary lock should be locked or not
8118
8119 @retval
8120 0 error
8121 @retval
8122 1 success
8123 */
write_incident(THD * thd,bool need_lock_log,const char * err_msg,bool do_flush_and_sync)8124 bool MYSQL_BIN_LOG::write_incident(THD *thd, bool need_lock_log,
8125 const char* err_msg,
8126 bool do_flush_and_sync)
8127 {
8128 DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
8129
8130 if (!is_open())
8131 DBUG_RETURN(0);
8132
8133 LEX_STRING write_error_msg= {(char*) err_msg, strlen(err_msg)};
8134 binary_log::Incident_event::enum_incident incident=
8135 binary_log::Incident_event::INCIDENT_LOST_EVENTS;
8136 Incident_log_event ev(thd, incident, write_error_msg);
8137
8138 DBUG_RETURN(write_incident(&ev, thd, need_lock_log, err_msg,
8139 do_flush_and_sync));
8140 }
8141
8142
8143 /**
8144 Write the contents of the statement or transaction cache to the binary log.
8145
8146 Comparison with do_write_cache:
8147
8148 - do_write_cache is a lower-level function that only performs the
8149 actual write.
8150
8151 - write_cache is a higher-level function that calls do_write_cache
8152 and additionally performs some maintenance tasks, including:
8153 - report any errors that occurred
8154 - write incident event if needed
8155 - update gtid_state
8156 - update thd.binlog_next_event_pos
8157
8158 @param thd Thread variable
8159
8160 @param cache_data Events will be read from the IO_CACHE of this
8161 cache_data object.
8162
8163 @param writer Events will be written to this Binlog_event_writer.
8164
8165 @retval true IO error.
8166 @retval false Success.
8167
8168 @note We only come here if there is something in the cache.
8169 @note Whatever is in the cache is always a complete transaction.
8170 @note 'cache' needs to be reinitialized after this functions returns.
8171 */
write_cache(THD * thd,binlog_cache_data * cache_data,Binlog_event_writer * writer)8172 bool MYSQL_BIN_LOG::write_cache(THD *thd, binlog_cache_data *cache_data,
8173 Binlog_event_writer *writer)
8174 {
8175 DBUG_ENTER("MYSQL_BIN_LOG::write_cache(THD *, binlog_cache_data *, bool)");
8176 #ifdef WITH_WSREP
8177 if (WSREP_EMULATE_BINLOG(thd)) DBUG_RETURN(0);
8178 #endif /* WITH_WSREP */
8179
8180 IO_CACHE *cache= &cache_data->cache_log;
8181 bool incident= cache_data->has_incident();
8182
8183 mysql_mutex_assert_owner(&LOCK_log);
8184
8185 assert(is_open());
8186 if (likely(is_open())) // Should always be true
8187 {
8188 /*
8189 We only bother to write to the binary log if there is anything
8190 to write.
8191
8192 @todo Is this check redundant? Probably this is only called if
8193 there is anything in the cache (see @note in comment above this
8194 function). Check if we can replace this by an assertion. /Sven
8195 */
8196 if (my_b_tell(cache) > 0)
8197 {
8198 DBUG_EXECUTE_IF("crash_before_writing_xid",
8199 {
8200 if ((write_error= do_write_cache(cache, writer)))
8201 DBUG_PRINT("info", ("error writing binlog cache: %d",
8202 write_error));
8203 flush_and_sync(true);
8204 DBUG_PRINT("info", ("crashing before writing xid"));
8205 DBUG_SUICIDE();
8206 });
8207 if ((write_error= do_write_cache(cache, writer)))
8208 goto err;
8209
8210 const char* err_msg= "Non-transactional changes did not get into "
8211 "the binlog.";
8212 if (incident && write_incident(thd, false/*need_lock_log=false*/,
8213 err_msg,
8214 false/*do_flush_and_sync==false*/))
8215 goto err;
8216
8217 DBUG_EXECUTE_IF("half_binlogged_transaction", DBUG_SUICIDE(););
8218 if (cache->error) // Error on read
8219 {
8220 char errbuf[MYSYS_STRERROR_SIZE];
8221 sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name,
8222 errno, my_strerror(errbuf, sizeof(errbuf), errno));
8223 write_error= true; // Don't give more errors
8224 goto err;
8225 }
8226 }
8227 update_thd_next_event_pos(thd);
8228 }
8229
8230 DBUG_RETURN(false);
8231
8232 err:
8233 if (!write_error)
8234 {
8235 char errbuf[MYSYS_STRERROR_SIZE];
8236 write_error= true;
8237 sql_print_error(ER(ER_ERROR_ON_WRITE), name,
8238 errno, my_strerror(errbuf, sizeof(errbuf), errno));
8239 }
8240
8241 /*
8242 If the flush has failed due to ENOSPC, set the flush_error flag.
8243 */
8244 if (cache->error && thd->is_error() && my_errno() == ENOSPC)
8245 {
8246 cache_data->set_flush_error(thd);
8247 }
8248 thd->commit_error= THD::CE_FLUSH_ERROR;
8249
8250 DBUG_RETURN(true);
8251 }
8252
8253
8254 /**
8255 Wait until we get a signal that the relay log has been updated.
8256
8257 @param[in] thd Thread variable
8258 @param[in] timeout a pointer to a timespec;
8259 NULL means to wait w/o timeout.
8260
8261 @retval 0 if got signalled on update
8262 @retval non-0 if wait timeout elapsed
8263
8264 @note
8265 One must have a lock on LOCK_log before calling this function.
8266 */
8267
wait_for_update_relay_log(THD * thd,const struct timespec * timeout)8268 int MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd, const struct timespec *timeout)
8269 {
8270 int ret= 0;
8271 PSI_stage_info old_stage;
8272 DBUG_ENTER("wait_for_update_relay_log");
8273
8274 thd->ENTER_COND(&update_cond, &LOCK_log,
8275 &stage_slave_has_read_all_relay_log,
8276 &old_stage);
8277
8278 if (!timeout)
8279 mysql_cond_wait(&update_cond, &LOCK_log);
8280 else
8281 ret= mysql_cond_timedwait(&update_cond, &LOCK_log,
8282 const_cast<struct timespec *>(timeout));
8283 mysql_mutex_unlock(&LOCK_log);
8284 thd->EXIT_COND(&old_stage);
8285
8286 DBUG_RETURN(ret);
8287 }
8288
8289 /**
8290 Wait until we get a signal that the binary log has been updated.
8291 Applies to master only.
8292
8293 NOTES
8294 @param[in] thd a THD struct
8295 @param[in] timeout a pointer to a timespec;
8296 NULL means to wait w/o timeout.
8297 @retval 0 if got signalled on update
8298 @retval non-0 if wait timeout elapsed
8299 @note
8300 LOCK_log must be taken before calling this function.
8301 LOCK_log is being released while the thread is waiting.
8302 LOCK_log is released by the caller.
8303 */
8304
wait_for_update_bin_log(THD * thd,const struct timespec * timeout)8305 int MYSQL_BIN_LOG::wait_for_update_bin_log(THD* thd,
8306 const struct timespec *timeout)
8307 {
8308 int ret= 0;
8309 DBUG_ENTER("wait_for_update_bin_log");
8310
8311 if (!timeout)
8312 mysql_cond_wait(&update_cond, &LOCK_binlog_end_pos);
8313 else
8314 ret= mysql_cond_timedwait(&update_cond, &LOCK_binlog_end_pos,
8315 const_cast<struct timespec *>(timeout));
8316 DBUG_RETURN(ret);
8317 }
8318
8319
8320 /**
8321 Close the log file.
8322
8323 @param exiting Bitmask for one or more of the following bits:
8324 - LOG_CLOSE_INDEX : if we should close the index file
8325 - LOG_CLOSE_TO_BE_OPENED : if we intend to call open
8326 at once after close.
8327 - LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
8328
8329 @param need_lock_log If true, this function acquires LOCK_log;
8330 otherwise the caller should already have acquired it.
8331
8332 @param need_lock_index If true, this function acquires LOCK_index;
8333 otherwise the caller should already have acquired it.
8334
8335 @note
8336 One can do an open on the object at once after doing a close.
8337 The internal structures are not freed until cleanup() is called
8338 */
8339
close(uint exiting,bool need_lock_log,bool need_lock_index)8340 void MYSQL_BIN_LOG::close(uint exiting, bool need_lock_log,
8341 bool need_lock_index)
8342 { // One can't set log_type here!
8343 DBUG_ENTER("MYSQL_BIN_LOG::close");
8344 DBUG_PRINT("enter",("exiting: %d", (int) exiting));
8345 if (need_lock_log)
8346 mysql_mutex_lock(&LOCK_log);
8347 else
8348 mysql_mutex_assert_owner(&LOCK_log);
8349
8350 if (log_state.atomic_get() == LOG_OPENED)
8351 {
8352 #ifdef HAVE_REPLICATION
8353 if ((exiting & LOG_CLOSE_STOP_EVENT) != 0)
8354 {
8355 /**
8356 TODO(WL#7546): Change the implementation to Stop_event after write() is
8357 moved into libbinlogevents
8358 */
8359 Stop_log_event s;
8360 // the checksumming rule for relay-log case is similar to Rotate
8361 s.common_footer->checksum_alg= is_relay_log ? relay_log_checksum_alg :
8362 static_cast<enum_binlog_checksum_alg>
8363 (binlog_checksum_options);
8364 assert(!is_relay_log ||
8365 relay_log_checksum_alg != binary_log::BINLOG_CHECKSUM_ALG_UNDEF);
8366 s.write(&log_file);
8367 bytes_written+= s.common_header->data_written;
8368 flush_io_cache(&log_file);
8369 update_binlog_end_pos();
8370 }
8371 #endif /* HAVE_REPLICATION */
8372
8373 /* don't pwrite in a file opened with O_APPEND - it doesn't work */
8374 if (log_file.type == WRITE_CACHE)
8375 {
8376 my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
8377 my_off_t org_position= mysql_file_tell(log_file.file, MYF(0));
8378 uchar flags= 0; // clearing LOG_EVENT_BINLOG_IN_USE_F
8379 mysql_file_pwrite(log_file.file, &flags, 1, offset, MYF(0));
8380 /*
8381 Restore position so that anything we have in the IO_cache is written
8382 to the correct position.
8383 We need the seek here, as mysql_file_pwrite() is not guaranteed to keep the
8384 original position on system that doesn't support pwrite().
8385 */
8386 mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
8387 }
8388
8389 /* this will cleanup IO_CACHE, sync and close the file */
8390 if (log_state.atomic_get() == LOG_OPENED)
8391 {
8392 end_io_cache(&log_file);
8393
8394 if (mysql_file_sync(log_file.file, MYF(MY_WME)) && ! write_error)
8395 {
8396 char errbuf[MYSYS_STRERROR_SIZE];
8397 write_error= 1;
8398 sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno,
8399 my_strerror(errbuf, sizeof(errbuf), errno));
8400 }
8401
8402 if (mysql_file_close(log_file.file, MYF(MY_WME)) && ! write_error)
8403 {
8404 char errbuf[MYSYS_STRERROR_SIZE];
8405 write_error= 1;
8406 sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno,
8407 my_strerror(errbuf, sizeof(errbuf), errno));
8408 }
8409 }
8410
8411 log_state.atomic_set((exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED);
8412 my_free(name);
8413 name= NULL;
8414 }
8415
8416 /*
8417 The following test is needed even if is_open() is not set, as we may have
8418 called a not complete close earlier and the index file is still open.
8419 */
8420
8421 if (need_lock_index)
8422 mysql_mutex_lock(&LOCK_index);
8423 else
8424 mysql_mutex_assert_owner(&LOCK_index);
8425
8426 if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
8427 {
8428 end_io_cache(&index_file);
8429 if (mysql_file_close(index_file.file, MYF(0)) < 0 && ! write_error)
8430 {
8431 char errbuf[MYSYS_STRERROR_SIZE];
8432 write_error= 1;
8433 sql_print_error(ER(ER_ERROR_ON_WRITE), index_file_name,
8434 errno, my_strerror(errbuf, sizeof(errbuf), errno));
8435 }
8436 }
8437
8438 if (need_lock_index)
8439 mysql_mutex_unlock(&LOCK_index);
8440
8441 log_state.atomic_set((exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED);
8442 my_free(name);
8443 name= NULL;
8444
8445 if (need_lock_log)
8446 mysql_mutex_unlock(&LOCK_log);
8447
8448 DBUG_VOID_RETURN;
8449 }
8450
harvest_bytes_written(Relay_log_info * rli,bool need_log_space_lock)8451 void MYSQL_BIN_LOG::harvest_bytes_written(Relay_log_info* rli, bool need_log_space_lock)
8452 {
8453 #ifndef NDEBUG
8454 char buf1[22],buf2[22];
8455 #endif
8456 DBUG_ENTER("harvest_bytes_written");
8457 if (need_log_space_lock)
8458 mysql_mutex_lock(&rli->log_space_lock);
8459 else
8460 mysql_mutex_assert_owner(&rli->log_space_lock);
8461 rli->log_space_total+= bytes_written;
8462 DBUG_PRINT("info",("relay_log_space: %s bytes_written: %s",
8463 llstr(rli->log_space_total,buf1), llstr(bytes_written,buf2)));
8464 bytes_written=0;
8465 if (need_log_space_lock)
8466 mysql_mutex_unlock(&rli->log_space_lock);
8467 DBUG_VOID_RETURN;
8468 }
8469
set_max_size(ulong max_size_arg)8470 void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg)
8471 {
8472 /*
8473 We need to take locks, otherwise this may happen:
8474 new_file() is called, calls open(old_max_size), then before open() starts,
8475 set_max_size() sets max_size to max_size_arg, then open() starts and
8476 uses the old_max_size argument, so max_size_arg has been overwritten and
8477 it's like if the SET command was never run.
8478 */
8479 DBUG_ENTER("MYSQL_BIN_LOG::set_max_size");
8480 mysql_mutex_lock(&LOCK_log);
8481 if (is_open())
8482 max_size= max_size_arg;
8483 mysql_mutex_unlock(&LOCK_log);
8484 DBUG_VOID_RETURN;
8485 }
8486
8487 /****** transaction coordinator log for 2pc - binlog() based solution ******/
8488
8489 /**
8490 @todo
8491 keep in-memory list of prepared transactions
8492 (add to list in log(), remove on unlog())
8493 and copy it to the new binlog if rotated
8494 but let's check the behaviour of tc_log_page_waits first!
8495 */
8496
open_binlog(const char * opt_name)8497 int MYSQL_BIN_LOG::open_binlog(const char *opt_name)
8498 {
8499 LOG_INFO log_info;
8500 int error= 1;
8501
8502 /*
8503 This function is used for 2pc transaction coordination. Hence, it
8504 is never used for relay logs.
8505 */
8506 assert(!is_relay_log);
8507 assert(total_ha_2pc > 1 || (1 == total_ha_2pc && opt_bin_log));
8508 assert(opt_name && opt_name[0]);
8509
8510 if (!my_b_inited(&index_file))
8511 {
8512 /* There was a failure to open the index file, can't open the binlog */
8513 cleanup();
8514 return 1;
8515 }
8516
8517 if (using_heuristic_recover())
8518 {
8519 /* generate a new binlog to mask a corrupted one */
8520 mysql_mutex_lock(&LOCK_log);
8521 open_binlog(opt_name, 0, max_binlog_size, false,
8522 true/*need_lock_index=true*/,
8523 true/*need_sid_lock=true*/,
8524 NULL);
8525 mysql_mutex_unlock(&LOCK_log);
8526 cleanup();
8527 return 1;
8528 }
8529
8530 if ((error= find_log_pos(&log_info, NullS, true/*need_lock_index=true*/)))
8531 {
8532 if (error != LOG_INFO_EOF)
8533 sql_print_error("find_log_pos() failed (error: %d)", error);
8534 else
8535 error= 0;
8536 goto err;
8537 }
8538
8539 {
8540 const char *errmsg;
8541 IO_CACHE log;
8542 File file;
8543 Log_event *ev=0;
8544 Format_description_log_event fdle(BINLOG_VERSION);
8545 char log_name[FN_REFLEN];
8546 my_off_t valid_pos= 0;
8547 my_off_t binlog_size;
8548 MY_STAT s;
8549
8550 if (! fdle.is_valid())
8551 goto err;
8552
8553 do
8554 {
8555 strmake(log_name, log_info.log_file_name, sizeof(log_name)-1);
8556 } while (!(error= find_next_log(&log_info, true/*need_lock_index=true*/)));
8557
8558 if (error != LOG_INFO_EOF)
8559 {
8560 sql_print_error("find_log_pos() failed (error: %d)", error);
8561 goto err;
8562 }
8563
8564 if ((file= open_binlog_file(&log, log_name, &errmsg)) < 0)
8565 {
8566 sql_print_error("%s", errmsg);
8567 goto err;
8568 }
8569
8570 my_stat(log_name, &s, MYF(0));
8571 binlog_size= s.st_size;
8572
8573 /*
8574 If the binary log was not properly closed it means that the server
8575 may have crashed. In that case, we need to call MYSQL_BIN_LOG::recover
8576 to:
8577
8578 a) collect logged XIDs;
8579 b) complete the 2PC of the pending XIDs;
8580 c) collect the last valid position.
8581
8582 Therefore, we do need to iterate over the binary log, even if
8583 total_ha_2pc == 1, to find the last valid group of events written.
8584 Later we will take this value and truncate the log if need be.
8585 */
8586 if ((ev= Log_event::read_log_event(&log, 0, &fdle,
8587 opt_master_verify_checksum)) &&
8588 ev->get_type_code() == binary_log::FORMAT_DESCRIPTION_EVENT &&
8589 (ev->common_header->flags & LOG_EVENT_BINLOG_IN_USE_F ||
8590 DBUG_EVALUATE_IF("eval_force_bin_log_recovery", true, false)))
8591 {
8592 sql_print_information("Recovering after a crash using %s", opt_name);
8593 valid_pos= my_b_tell(&log);
8594 error= recover(&log, (Format_description_log_event *)ev, &valid_pos);
8595 }
8596 else
8597 error=0;
8598
8599 delete ev;
8600 end_io_cache(&log);
8601 mysql_file_close(file, MYF(MY_WME));
8602
8603 if (error)
8604 goto err;
8605
8606 /* Trim the crashed binlog file to last valid transaction
8607 or event (non-transaction) base on valid_pos. */
8608 if (valid_pos > 0)
8609 {
8610 if ((file= mysql_file_open(key_file_binlog, log_name,
8611 O_RDWR | O_BINARY, MYF(MY_WME))) < 0)
8612 {
8613 sql_print_error("Failed to open the crashed binlog file "
8614 "when master server is recovering it.");
8615 return -1;
8616 }
8617
8618 /* Change binlog file size to valid_pos */
8619 if (valid_pos < binlog_size)
8620 {
8621 if (my_chsize(file, valid_pos, 0, MYF(MY_WME)))
8622 {
8623 sql_print_error("Failed to trim the crashed binlog file "
8624 "when master server is recovering it.");
8625 mysql_file_close(file, MYF(MY_WME));
8626 return -1;
8627 }
8628 else
8629 {
8630 sql_print_information("Crashed binlog file %s size is %llu, "
8631 "but recovered up to %llu. Binlog trimmed to %llu bytes.",
8632 log_name, binlog_size, valid_pos, valid_pos);
8633 }
8634 }
8635
8636 /* Clear LOG_EVENT_BINLOG_IN_USE_F */
8637 my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
8638 uchar flags= 0;
8639 if (mysql_file_pwrite(file, &flags, 1, offset, MYF(0)) != 1)
8640 {
8641 sql_print_error("Failed to clear LOG_EVENT_BINLOG_IN_USE_F "
8642 "for the crashed binlog file when master "
8643 "server is recovering it.");
8644 mysql_file_close(file, MYF(MY_WME));
8645 return -1;
8646 }
8647
8648 mysql_file_close(file, MYF(MY_WME));
8649 } //end if
8650 }
8651
8652 err:
8653 return error;
8654 }
8655
8656 /** This is called on shutdown, after ha_panic. */
close()8657 void MYSQL_BIN_LOG::close()
8658 {
8659 }
8660
8661 /*
8662 Prepare the transaction in the transaction coordinator.
8663
8664 This function will prepare the transaction in the storage engines
8665 (by calling @c ha_prepare_low) what will write a prepare record
8666 to the log buffers.
8667
8668 @retval 0 success
8669 @retval 1 error
8670 */
prepare(THD * thd,bool all)8671 int MYSQL_BIN_LOG::prepare(THD *thd, bool all)
8672 {
8673 DBUG_ENTER("MYSQL_BIN_LOG::prepare");
8674
8675 assert(opt_bin_log);
8676 /*
8677 The applier thread explicitly overrides the value of sql_log_bin
8678 with the value of log_slave_updates.
8679 */
8680 #ifdef WITH_WSREP
8681 assert(thd->wsrep_applier || (thd->slave_thread ?
8682 opt_log_slave_updates : true));
8683 #else
8684 assert(thd->slave_thread ?
8685 opt_log_slave_updates : thd->variables.sql_log_bin);
8686 #endif /* WITH_WSREP */
8687
8688 /*
8689 Set HA_IGNORE_DURABILITY to not flush the prepared record of the
8690 transaction to the log of storage engine (for example, InnoDB
8691 redo log) during the prepare phase. So that we can flush prepared
8692 records of transactions to the log of storage engine in a group
8693 right before flushing them to binary log during binlog group
8694 commit flush stage. Reset to HA_REGULAR_DURABILITY at the
8695 beginning of parsing next command.
8696 */
8697 thd->durability_property= HA_IGNORE_DURABILITY;
8698
8699 int error= ha_prepare_low(thd, all);
8700
8701 DBUG_RETURN(error);
8702 }
8703
8704 /**
8705 Commit the transaction in the transaction coordinator.
8706
8707 This function will commit the sessions transaction in the binary log
8708 and in the storage engines (by calling @c ha_commit_low). If the
8709 transaction was successfully logged (or not successfully unlogged)
8710 but the commit in the engines did not succed, there is a risk of
8711 inconsistency between the engines and the binary log.
8712
8713 For binary log group commit, the commit is separated into three
8714 parts:
8715
8716 1. First part consists of filling the necessary caches and
8717 finalizing them (if they need to be finalized). After this,
8718 nothing is added to any of the caches.
8719
8720 2. Second part execute an ordered flush and commit. This will be
8721 done using the group commit functionality in ordered_commit.
8722
8723 3. Third part checks any errors resulting from the ordered commit
8724 and handles them appropriately.
8725
8726 @retval RESULT_SUCCESS success
8727 @retval RESULT_ABORTED error, transaction was neither logged nor committed
8728 @retval RESULT_INCONSISTENT error, transaction was logged but not committed
8729 */
commit(THD * thd,bool all)8730 TC_LOG::enum_result MYSQL_BIN_LOG::commit(THD *thd, bool all)
8731 {
8732 DBUG_ENTER("MYSQL_BIN_LOG::commit");
8733 DBUG_PRINT("info", ("query='%s'",
8734 thd == current_thd ? thd->query().str : NULL));
8735 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
8736 Transaction_ctx *trn_ctx= thd->get_transaction();
8737 #ifdef WITH_WSREP
8738 my_xid xid= (wsrep_is_wsrep_xid(trn_ctx->xid_state()->get_xid()) ?
8739 wsrep_xid_seqno( (*trn_ctx->xid_state()->get_xid()) ) :
8740 trn_ctx->xid_state()->get_xid()->get_my_xid());
8741 #else
8742 my_xid xid= trn_ctx->xid_state()->get_xid()->get_my_xid();
8743 #endif /* WITH_WSREP */
8744 bool stmt_stuff_logged= false;
8745 bool trx_stuff_logged= false;
8746 bool skip_commit= is_loggable_xa_prepare(thd);
8747
8748 DBUG_PRINT("enter", ("thd: 0x%llx, all: %s, xid: %llu, cache_mngr: 0x%llx",
8749 (ulonglong) thd, YESNO(all), (ulonglong) xid,
8750 (ulonglong) cache_mngr));
8751
8752 /*
8753 No cache manager means nothing to log, but we still have to commit
8754 the transaction.
8755 */
8756 if (cache_mngr == NULL)
8757 {
8758 if (!skip_commit && ha_commit_low(thd, all))
8759 DBUG_RETURN(RESULT_ABORTED);
8760 DBUG_RETURN(RESULT_SUCCESS);
8761 }
8762
8763 Transaction_ctx::enum_trx_scope trx_scope= all ? Transaction_ctx::SESSION :
8764 Transaction_ctx::STMT;
8765
8766 DBUG_PRINT("debug", ("in_transaction: %s, no_2pc: %s, rw_ha_count: %d",
8767 YESNO(thd->in_multi_stmt_transaction_mode()),
8768 YESNO(trn_ctx->no_2pc(trx_scope)),
8769 trn_ctx->rw_ha_count(trx_scope)));
8770 DBUG_PRINT("debug",
8771 ("all.cannot_safely_rollback(): %s, trx_cache_empty: %s",
8772 YESNO(trn_ctx->cannot_safely_rollback(Transaction_ctx::SESSION)),
8773 YESNO(cache_mngr->trx_cache.is_binlog_empty())));
8774 DBUG_PRINT("debug",
8775 ("stmt.cannot_safely_rollback(): %s, stmt_cache_empty: %s",
8776 YESNO(trn_ctx->cannot_safely_rollback(Transaction_ctx::STMT)),
8777 YESNO(cache_mngr->stmt_cache.is_binlog_empty())));
8778
8779
8780 /*
8781 If there are no handlertons registered, there is nothing to
8782 commit. Note that DDLs are written earlier in this case (inside
8783 binlog_query).
8784
8785 TODO: This can be a problem in those cases that there are no
8786 handlertons registered. DDLs are one example, but the other case
8787 is MyISAM. In this case, we could register a dummy handlerton to
8788 trigger the commit.
8789
8790 Any statement that requires logging will call binlog_query before
8791 trans_commit_stmt, so an alternative is to use the condition
8792 "binlog_query called or stmt.ha_list != 0".
8793 */
8794 if (!all && !trn_ctx->is_active(trx_scope) &&
8795 cache_mngr->stmt_cache.is_binlog_empty())
8796 DBUG_RETURN(RESULT_SUCCESS);
8797
8798 if (thd->lex->sql_command == SQLCOM_XA_COMMIT)
8799 {
8800 /* The Commit phase of the XA two phase logging. */
8801
8802 bool one_phase= get_xa_opt(thd) == XA_ONE_PHASE;
8803 assert(all);
8804 assert(!skip_commit || one_phase);
8805
8806 int err= 0;
8807 XID_STATE *xs= thd->get_transaction()->xid_state();
8808 /*
8809 XA COMMIT ONE PHASE statement which has not gone through the binary log
8810 prepare phase, has to end the active XA transaction with appropriate XA
8811 END followed by XA COMMIT ONE PHASE.
8812
8813 The state of XA transaction is changed to PREPARED after the prepare
8814 phase, intermediately in ha_commit_trans code for the interest of
8815 binlogger. Hence check that the XA COMMIT ONE PHASE is set to 'PREPARE'
8816 and it has not already been written to binary log. For such transaction
8817 write the appropriate XA END statement.
8818 */
8819 if (!(is_loggable_xa_prepare(thd))
8820 && one_phase
8821 && !(xs->is_binlogged())
8822 && !cache_mngr->trx_cache.is_binlog_empty())
8823 {
8824 XA_prepare_log_event end_evt(thd, xs->get_xid(), one_phase);
8825 err= cache_mngr->trx_cache.finalize(thd, &end_evt, xs);
8826 if (err)
8827 {
8828 DBUG_RETURN(RESULT_ABORTED);
8829 }
8830 trx_stuff_logged= true;
8831 thd->get_transaction()->xid_state()->set_binlogged();
8832 }
8833 if (DBUG_EVALUATE_IF("simulate_xa_commit_log_failure", true,
8834 do_binlog_xa_commit_rollback(thd, xs->get_xid(),
8835 true)))
8836 DBUG_RETURN(RESULT_ABORTED);
8837 }
8838
8839 /*
8840 If there is anything in the stmt cache, and GTIDs are enabled,
8841 then this is a single statement outside a transaction and it is
8842 impossible that there is anything in the trx cache. Hence, we
8843 write any empty group(s) to the stmt cache.
8844
8845 Otherwise, we write any empty group(s) to the trx cache at the end
8846 of the transaction.
8847 */
8848 if (!cache_mngr->stmt_cache.is_binlog_empty())
8849 {
8850 /*
8851 Commit parent identification of non-transactional query has
8852 been deferred until now, except for the mixed transaction case.
8853 */
8854 trn_ctx->store_commit_parent(m_dependency_tracker.get_max_committed_timestamp());
8855 if (cache_mngr->stmt_cache.finalize(thd))
8856 DBUG_RETURN(RESULT_ABORTED);
8857 stmt_stuff_logged= true;
8858 }
8859
8860 /*
8861 We commit the transaction if:
8862 - We are not in a transaction and committing a statement, or
8863 - We are in a transaction and a full transaction is committed.
8864 Otherwise, we accumulate the changes.
8865 */
8866 if (!cache_mngr->trx_cache.is_binlog_empty() &&
8867 ending_trans(thd, all) && !trx_stuff_logged)
8868 {
8869 const bool real_trans=
8870 (all || !trn_ctx->is_active(Transaction_ctx::SESSION));
8871
8872 /*
8873 We are committing an XA transaction if it is a "real" transaction
8874 and has an XID assigned (because some handlerton registered). A
8875 transaction is "real" if either 'all' is true or the 'all.ha_list'
8876 is empty.
8877
8878 Note: This is kind of strange since registering the binlog
8879 handlerton will then make the transaction XA, which is not really
8880 true. This occurs for example if a MyISAM statement is executed
8881 with row-based replication on.
8882 */
8883 if (is_loggable_xa_prepare(thd))
8884 {
8885 /* The prepare phase of XA transaction two phase logging. */
8886 int err= 0;
8887 bool one_phase= get_xa_opt(thd) == XA_ONE_PHASE;
8888
8889 assert(thd->lex->sql_command != SQLCOM_XA_COMMIT || one_phase);
8890
8891 XID_STATE *xs= thd->get_transaction()->xid_state();
8892 XA_prepare_log_event end_evt(thd, xs->get_xid(), one_phase);
8893
8894 assert(skip_commit);
8895
8896 err= cache_mngr->trx_cache.finalize(thd, &end_evt, xs);
8897 if (err ||
8898 (DBUG_EVALUATE_IF("simulate_xa_prepare_failure_in_cache_finalize",
8899 true, false)))
8900 {
8901 DBUG_RETURN(RESULT_ABORTED);
8902 }
8903 }
8904 #ifdef WITH_WSREP
8905 /* LOAD DATA splitting sub-transactions are not properly registered
8906 and we compensate here to get the XID event to be created
8907 */
8908 else if (real_trans && xid &&
8909 ((trn_ctx->rw_ha_count(trx_scope) > 1) ||
8910 (WSREP(thd) && thd->lex->sql_command == SQLCOM_LOAD)) &&
8911 #else
8912 else if (real_trans && xid && trn_ctx->rw_ha_count(trx_scope) > 1 &&
8913 #endif /* WITH_WSREP */
8914 !trn_ctx->no_2pc(trx_scope))
8915 {
8916 Xid_log_event end_evt(thd, xid);
8917 if (cache_mngr->trx_cache.finalize(thd, &end_evt))
8918 DBUG_RETURN(RESULT_ABORTED);
8919 }
8920 else
8921 {
8922 Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
8923 true, FALSE, TRUE, 0, TRUE);
8924 if (cache_mngr->trx_cache.finalize(thd, &end_evt))
8925 DBUG_RETURN(RESULT_ABORTED);
8926 }
8927 trx_stuff_logged= true;
8928 }
8929
8930 /*
8931 This is part of the stmt rollback.
8932 */
8933 if (!all)
8934 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
8935
8936 /*
8937 Now all the events are written to the caches, so we will commit
8938 the transaction in the engines. This is done using the group
8939 commit logic in ordered_commit, which will return when the
8940 transaction is committed.
8941
8942 If the commit in the engines fail, we still have something logged
8943 to the binary log so we have to report this as a "bad" failure
8944 (failed to commit, but logged something).
8945 */
8946 if (stmt_stuff_logged || trx_stuff_logged)
8947 {
8948 #ifdef WITH_WSREP
8949 if (WSREP_ON && thd->wsrep_replicate_GTID &&
8950 wsrep_replicate_GTID(thd))
8951 {
8952 /* GTID replication failed */
8953 DBUG_RETURN(RESULT_ABORTED);
8954 }
8955 #endif /* WITH_WSREP */
8956 if (RUN_HOOK(transaction,
8957 before_commit,
8958 (thd, all,
8959 thd_get_cache_mngr(thd)->get_binlog_cache_log(true),
8960 thd_get_cache_mngr(thd)->get_binlog_cache_log(false),
8961 max<my_off_t>(max_binlog_cache_size,
8962 max_binlog_stmt_cache_size))) ||
8963 DBUG_EVALUATE_IF("simulate_failure_in_before_commit_hook", true, false))
8964 {
8965 ha_rollback_low(thd, all);
8966 gtid_state->update_on_rollback(thd);
8967 thd_get_cache_mngr(thd)->reset();
8968 //Reset the thread OK status before changing the outcome.
8969 if (thd->get_stmt_da()->is_ok())
8970 thd->get_stmt_da()->reset_diagnostics_area();
8971 my_error(ER_RUN_HOOK_ERROR, MYF(0), "before_commit");
8972 DBUG_RETURN(RESULT_ABORTED);
8973 }
8974 /*
8975 Check whether the transaction should commit or abort given the
8976 plugin feedback.
8977 */
8978 if (thd->get_transaction()->get_rpl_transaction_ctx()->is_transaction_rollback() ||
8979 (DBUG_EVALUATE_IF("simulate_transaction_rollback_request", true, false)))
8980 {
8981 ha_rollback_low(thd, all);
8982 gtid_state->update_on_rollback(thd);
8983 thd_get_cache_mngr(thd)->reset();
8984 if (thd->get_stmt_da()->is_ok())
8985 thd->get_stmt_da()->reset_diagnostics_area();
8986 my_error(ER_TRANSACTION_ROLLBACK_DURING_COMMIT, MYF(0));
8987 DBUG_RETURN(RESULT_ABORTED);
8988 }
8989
8990 if (ordered_commit(thd, all, skip_commit))
8991 DBUG_RETURN(RESULT_INCONSISTENT);
8992
8993 /*
8994 Mark the flag m_is_binlogged to true only after we are done
8995 with checking all the error cases.
8996 */
8997 if (is_loggable_xa_prepare(thd))
8998 thd->get_transaction()->xid_state()->set_binlogged();
8999 }
9000 else if (!skip_commit)
9001 {
9002 if (ha_commit_low(thd, all))
9003 DBUG_RETURN(RESULT_INCONSISTENT);
9004 }
9005
9006 DBUG_RETURN(RESULT_SUCCESS);
9007 }
9008
9009
9010 /**
9011 Flush caches for session.
9012
9013 @note @c set_trans_pos is called with a pointer to the file name
9014 that the binary log currently use and a rotation will change the
9015 contents of the variable.
9016
9017 The position is used when calling the after_flush, after_commit,
9018 and after_rollback hooks, but these have been placed so that they
9019 occur before a rotation is executed.
9020
9021 It is the responsibility of any plugin that use this position to
9022 copy it if they need it after the hook has returned.
9023
9024 The current "global" transaction_counter is stepped and its new value
9025 is assigned to the transaction.
9026 */
9027 std::pair<int,my_off_t>
flush_thread_caches(THD * thd)9028 MYSQL_BIN_LOG::flush_thread_caches(THD *thd)
9029 {
9030 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
9031 my_off_t bytes= 0;
9032 bool wrote_xid= false;
9033 int error= cache_mngr->flush(thd, &bytes, &wrote_xid);
9034 if (!error && bytes > 0)
9035 {
9036 /*
9037 Note that set_trans_pos does not copy the file name. See
9038 this function documentation for more info.
9039 */
9040 thd->set_trans_pos(log_file_name, my_b_tell(&log_file));
9041 if (wrote_xid)
9042 inc_prep_xids(thd);
9043 }
9044 DBUG_PRINT("debug", ("bytes: %llu", bytes));
9045 return std::make_pair(error, bytes);
9046 }
9047
9048
9049 /**
9050 Execute the flush stage.
9051
9052 @param total_bytes_var Pointer to variable that will be set to total
9053 number of bytes flushed, or NULL.
9054
9055 @param rotate_var Pointer to variable that will be set to true if
9056 binlog rotation should be performed after releasing locks. If rotate
9057 is not necessary, the variable will not be touched.
9058
9059 @return Error code on error, zero on success
9060 */
9061
9062 int
process_flush_stage_queue(my_off_t * total_bytes_var,bool * rotate_var,THD ** out_queue_var)9063 MYSQL_BIN_LOG::process_flush_stage_queue(my_off_t *total_bytes_var,
9064 bool *rotate_var,
9065 THD **out_queue_var)
9066 {
9067 DBUG_ENTER("MYSQL_BIN_LOG::process_flush_stage_queue");
9068 #ifndef NDEBUG
9069 // number of flushes per group.
9070 int no_flushes= 0;
9071 #endif
9072 assert(total_bytes_var && rotate_var && out_queue_var);
9073 my_off_t total_bytes= 0;
9074 int flush_error= 1;
9075 mysql_mutex_assert_owner(&LOCK_log);
9076
9077 /*
9078 Fetch the entire flush queue and empty it, so that the next batch
9079 has a leader. We must do this before invoking ha_flush_logs(...)
9080 for guaranteeing to flush prepared records of transactions before
9081 flushing them to binary log, which is required by crash recovery.
9082 */
9083 THD *first_seen= stage_manager.fetch_queue_for(Stage_manager::FLUSH_STAGE);
9084 assert(first_seen != NULL);
9085 /*
9086 We flush prepared records of transactions to the log of storage
9087 engine (for example, InnoDB redo log) in a group right before
9088 flushing them to binary log.
9089 */
9090 ha_flush_logs(NULL, true);
9091 DBUG_EXECUTE_IF("crash_after_flush_engine_log", DBUG_SUICIDE(););
9092 assign_automatic_gtids_to_flush_group(first_seen);
9093 /* Flush thread caches to binary log. */
9094 for (THD *head= first_seen ; head ; head = head->next_to_commit)
9095 {
9096 std::pair<int,my_off_t> result= flush_thread_caches(head);
9097 total_bytes+= result.second;
9098 if (flush_error == 1)
9099 flush_error= result.first;
9100 #ifndef NDEBUG
9101 no_flushes++;
9102 #endif
9103 }
9104
9105 *out_queue_var= first_seen;
9106 *total_bytes_var= total_bytes;
9107 if (total_bytes > 0 && my_b_tell(&log_file) >= (my_off_t) max_size)
9108 *rotate_var= true;
9109 #ifndef NDEBUG
9110 DBUG_PRINT("info",("no_flushes:= %d", no_flushes));
9111 no_flushes= 0;
9112 #endif
9113 DBUG_RETURN(flush_error);
9114 }
9115
9116 /**
9117 Commit a sequence of sessions.
9118
9119 This function commit an entire queue of sessions starting with the
9120 session in @c first. If there were an error in the flushing part of
9121 the ordered commit, the error code is passed in and all the threads
9122 are marked accordingly (but not committed).
9123
9124 It will also add the GTIDs of the transactions to gtid_executed.
9125
9126 @see MYSQL_BIN_LOG::ordered_commit
9127
9128 @param thd The "master" thread
9129 @param first First thread in the queue of threads to commit
9130 */
9131
9132 void
process_commit_stage_queue(THD * thd,THD * first)9133 MYSQL_BIN_LOG::process_commit_stage_queue(THD *thd, THD *first)
9134 {
9135 mysql_mutex_assert_owner(&LOCK_commit);
9136 #ifndef NDEBUG
9137 thd->get_transaction()->m_flags.ready_preempt= 1; // formality by the leader
9138 #endif
9139 for (THD *head= first ; head ; head = head->next_to_commit)
9140 {
9141 DBUG_PRINT("debug", ("Thread ID: %u, commit_error: %d, flags.pending: %s",
9142 head->thread_id(), head->commit_error,
9143 YESNO(head->get_transaction()->m_flags.pending)));
9144 /*
9145 If flushing failed, set commit_error for the session, skip the
9146 transaction and proceed with the next transaction instead. This
9147 will mark all threads as failed, since the flush failed.
9148
9149 If flush succeeded, attach to the session and commit it in the
9150 engines.
9151 */
9152 #ifndef NDEBUG
9153 stage_manager.clear_preempt_status(head);
9154 #endif
9155 if (head->get_transaction()->sequence_number != SEQ_UNINIT)
9156 {
9157 mysql_mutex_lock(&LOCK_slave_trans_dep_tracker);
9158 m_dependency_tracker.update_max_committed(head);
9159 mysql_mutex_unlock(&LOCK_slave_trans_dep_tracker);
9160 }
9161 /*
9162 Flush/Sync error should be ignored and continue
9163 to commit phase. And thd->commit_error cannot be
9164 COMMIT_ERROR at this moment.
9165 */
9166 assert(head->commit_error != THD::CE_COMMIT_ERROR);
9167 #ifndef EMBEDDED_LIBRARY
9168 Thd_backup_and_restore switch_thd(thd, head);
9169 #endif /* !EMBEDDED_LIBRARY */
9170 bool all= head->get_transaction()->m_flags.real_commit;
9171 if (head->get_transaction()->m_flags.commit_low)
9172 {
9173 /* head is parked to have exited append() */
9174 assert(head->get_transaction()->m_flags.ready_preempt);
9175 /*
9176 storage engine commit
9177 */
9178 if (ha_commit_low(head, all, false))
9179 head->commit_error= THD::CE_COMMIT_ERROR;
9180 }
9181 DBUG_PRINT("debug", ("commit_error: %d, flags.pending: %s",
9182 head->commit_error,
9183 YESNO(head->get_transaction()->m_flags.pending)));
9184 }
9185
9186 /*
9187 Handle the GTID of the threads.
9188 gtid_executed table is kept updated even though transactions fail to be
9189 logged. That's required by slave auto positioning.
9190 */
9191 gtid_state->update_commit_group(first);
9192
9193 for (THD *head= first ; head ; head = head->next_to_commit)
9194 {
9195 /*
9196 Decrement the prepared XID counter after storage engine commit.
9197 We also need decrement the prepared XID when encountering a
9198 flush error or session attach error for avoiding 3-way deadlock
9199 among user thread, rotate thread and dump thread.
9200 */
9201 if (head->get_transaction()->m_flags.xid_written)
9202 dec_prep_xids(head);
9203 }
9204 }
9205
9206 /**
9207 Process after commit for a sequence of sessions.
9208
9209 @param thd The "master" thread
9210 @param first First thread in the queue of threads to commit
9211 */
9212
9213 void
process_after_commit_stage_queue(THD * thd,THD * first)9214 MYSQL_BIN_LOG::process_after_commit_stage_queue(THD *thd, THD *first)
9215 {
9216 for (THD *head= first; head; head= head->next_to_commit)
9217 {
9218 if (head->get_transaction()->m_flags.run_hooks &&
9219 head->commit_error != THD::CE_COMMIT_ERROR)
9220 {
9221
9222 /*
9223 TODO: This hook here should probably move outside/below this
9224 if and be the only after_commit invocation left in the
9225 code.
9226 */
9227 #ifndef EMBEDDED_LIBRARY
9228 Thd_backup_and_restore switch_thd(thd, head);
9229 #endif /* !EMBEDDED_LIBRARY */
9230 bool all= head->get_transaction()->m_flags.real_commit;
9231 (void) RUN_HOOK(transaction, after_commit, (head, all));
9232 /*
9233 When after_commit finished for the transaction, clear the run_hooks flag.
9234 This allow other parts of the system to check if after_commit was called.
9235 */
9236 head->get_transaction()->m_flags.run_hooks= false;
9237 }
9238 }
9239 }
9240
9241 #ifndef NDEBUG
9242 /** Names for the stages. */
9243 static const char* g_stage_name[] = {
9244 "FLUSH",
9245 "SYNC",
9246 "COMMIT",
9247 };
9248 #endif
9249
9250
9251 /**
9252 Enter a stage of the ordered commit procedure.
9253
9254 Entering is stage is done by:
9255
9256 - Atomically enqueueing a queue of processes (which is just one for
9257 the first phase).
9258
9259 - If the queue was empty, the thread is the leader for that stage
9260 and it should process the entire queue for that stage.
9261
9262 - If the queue was not empty, the thread is a follower and can go
9263 waiting for the commit to finish.
9264
9265 The function will lock the stage mutex if it was designated the
9266 leader for the phase.
9267
9268 @param thd Session structure
9269 @param stage The stage to enter
9270 @param queue Queue of threads to enqueue for the stage
9271 @param stage_mutex Mutex for the stage
9272
9273 @retval true The thread should "bail out" and go waiting for the
9274 commit to finish
9275 @retval false The thread is the leader for the stage and should do
9276 the processing.
9277 */
9278
9279 bool
change_stage(THD * thd,Stage_manager::StageID stage,THD * queue,mysql_mutex_t * leave_mutex,mysql_mutex_t * enter_mutex)9280 MYSQL_BIN_LOG::change_stage(THD *thd,
9281 Stage_manager::StageID stage, THD *queue,
9282 mysql_mutex_t *leave_mutex,
9283 mysql_mutex_t *enter_mutex)
9284 {
9285 DBUG_ENTER("MYSQL_BIN_LOG::change_stage");
9286 DBUG_PRINT("enter", ("thd: 0x%llx, stage: %s, queue: 0x%llx",
9287 (ulonglong) thd, g_stage_name[stage], (ulonglong) queue));
9288 assert(0 <= stage && stage < Stage_manager::STAGE_COUNTER);
9289 assert(enter_mutex);
9290 assert(queue);
9291 /*
9292 enroll_for will release the leave_mutex once the sessions are
9293 queued.
9294 */
9295 if (!stage_manager.enroll_for(stage, queue, leave_mutex))
9296 {
9297 assert(!thd_get_cache_mngr(thd)->dbug_any_finalized());
9298 DBUG_RETURN(true);
9299 }
9300
9301 /*
9302 We do not lock the enter_mutex if it is LOCK_log when rotating binlog
9303 caused by logging incident log event, since it is already locked.
9304 */
9305 bool need_lock_enter_mutex=
9306 !(is_rotating_caused_by_incident && enter_mutex == &LOCK_log);
9307
9308 if (need_lock_enter_mutex)
9309 mysql_mutex_lock(enter_mutex);
9310 else
9311 mysql_mutex_assert_owner(enter_mutex);
9312
9313 DBUG_RETURN(false);
9314 }
9315
9316
9317
9318 /**
9319 Flush the I/O cache to file.
9320
9321 Flush the binary log to the binlog file if any byte where written
9322 and signal that the binary log file has been updated if the flush
9323 succeeds.
9324 */
9325
9326 int
flush_cache_to_file(my_off_t * end_pos_var)9327 MYSQL_BIN_LOG::flush_cache_to_file(my_off_t *end_pos_var)
9328 {
9329 if (flush_io_cache(&log_file))
9330 {
9331 THD *thd= current_thd;
9332 thd->commit_error= THD::CE_FLUSH_ERROR;
9333 return ER_ERROR_ON_WRITE;
9334 }
9335 *end_pos_var= my_b_tell(&log_file);
9336 return 0;
9337 }
9338
9339
9340 /**
9341 Call fsync() to sync the file to disk.
9342 */
9343 std::pair<bool, bool>
sync_binlog_file(bool force)9344 MYSQL_BIN_LOG::sync_binlog_file(bool force)
9345 {
9346 bool synced= false;
9347 unsigned int sync_period= get_sync_period();
9348 if (force || (sync_period && ++sync_counter >= sync_period))
9349 {
9350 sync_counter= 0;
9351
9352 /**
9353 On *pure non-transactional* workloads there is a small window
9354 in time where a concurrent rotate might be able to close
9355 the file before the sync is actually done. In that case,
9356 ignore the bad file descriptor errors.
9357
9358 Transactional workloads (InnoDB) are not affected since the
9359 the rotation will not happen until all transactions have
9360 committed to the storage engine, thence decreased the XID
9361 counters.
9362
9363 TODO: fix this properly even for non-transactional storage
9364 engines.
9365 */
9366 if (DBUG_EVALUATE_IF("simulate_error_during_sync_binlog_file", 1,
9367 mysql_file_sync(log_file.file,
9368 MYF(MY_WME | MY_IGNORE_BADFD))))
9369 {
9370 THD *thd= current_thd;
9371 thd->commit_error= THD::CE_SYNC_ERROR;
9372 return std::make_pair(true, synced);
9373 }
9374 synced= true;
9375 }
9376 return std::make_pair(false, synced);
9377 }
9378
9379
9380 /**
9381 Helper function executed when leaving @c ordered_commit.
9382
9383 This function contain the necessary code for fetching the error
9384 code, doing post-commit checks, and wrapping up the commit if
9385 necessary.
9386
9387 It is typically called when enter_stage indicates that the thread
9388 should bail out, and also when the ultimate leader thread finishes
9389 executing @c ordered_commit.
9390
9391 It is typically used in this manner:
9392 @code
9393 if (enter_stage(thd, Thread_queue::FLUSH_STAGE, thd, &LOCK_log))
9394 return finish_commit(thd);
9395 @endcode
9396
9397 @return Error code if the session commit failed, or zero on
9398 success.
9399 */
9400 int
finish_commit(THD * thd)9401 MYSQL_BIN_LOG::finish_commit(THD *thd)
9402 {
9403 DBUG_ENTER("MYSQL_BIN_LOG::finish_commit");
9404 DEBUG_SYNC(thd, "reached_finish_commit");
9405 /*
9406 In some unlikely situations, it can happen that binary
9407 log is closed before the thread flushes it's cache.
9408 In that case, clear the caches before doing commit.
9409 */
9410 if (unlikely(!is_open()))
9411 {
9412 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
9413 if (cache_mngr)
9414 cache_mngr->reset();
9415 }
9416 if (thd->get_transaction()->sequence_number != SEQ_UNINIT)
9417 {
9418 mysql_mutex_lock(&LOCK_slave_trans_dep_tracker);
9419 m_dependency_tracker.update_max_committed(thd);
9420 mysql_mutex_unlock(&LOCK_slave_trans_dep_tracker);
9421 }
9422 if (thd->get_transaction()->m_flags.commit_low)
9423 {
9424 const bool all= thd->get_transaction()->m_flags.real_commit;
9425 /*
9426 Now flush error and sync erros are ignored and we are continuing and
9427 committing. And at this time, commit_error cannot be COMMIT_ERROR.
9428 */
9429 assert(thd->commit_error != THD::CE_COMMIT_ERROR);
9430 /*
9431 storage engine commit
9432 */
9433 if (ha_commit_low(thd, all, false))
9434 thd->commit_error= THD::CE_COMMIT_ERROR;
9435 /*
9436 Decrement the prepared XID counter after storage engine commit
9437 */
9438 if (thd->get_transaction()->m_flags.xid_written)
9439 dec_prep_xids(thd);
9440 /*
9441 If commit succeeded, we call the after_commit hook
9442
9443 TODO: This hook here should probably move outside/below this
9444 if and be the only after_commit invocation left in the
9445 code.
9446 */
9447 if ((thd->commit_error != THD::CE_COMMIT_ERROR) &&
9448 thd->get_transaction()->m_flags.run_hooks)
9449 {
9450 (void) RUN_HOOK(transaction, after_commit, (thd, all));
9451 thd->get_transaction()->m_flags.run_hooks= false;
9452 }
9453 }
9454 else if (thd->get_transaction()->m_flags.xid_written)
9455 dec_prep_xids(thd);
9456
9457 /*
9458 If the ordered commit didn't updated the GTIDs for this thd yet
9459 at process_commit_stage_queue (i.e. --binlog-order-commits=0)
9460 the thd still has the ownership of a GTID and we must handle it.
9461 */
9462 if (!thd->owned_gtid.is_empty())
9463 {
9464 /*
9465 Gtid is added to gtid_state.executed_gtids and removed from owned_gtids
9466 on update_on_commit().
9467 */
9468 if (thd->commit_error == THD::CE_NONE)
9469 {
9470 gtid_state->update_on_commit(thd);
9471 }
9472 else
9473 gtid_state->update_on_rollback(thd);
9474 }
9475
9476 DBUG_EXECUTE_IF("leaving_finish_commit",
9477 {
9478 const char act[]=
9479 "now SIGNAL signal_leaving_finish_commit";
9480 assert(!debug_sync_set_action(current_thd,
9481 STRING_WITH_LEN(act)));
9482 };);
9483
9484 assert(thd->commit_error || !thd->get_transaction()->m_flags.run_hooks);
9485 assert(!thd_get_cache_mngr(thd)->dbug_any_finalized());
9486 DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d",
9487 thd->thread_id(), thd->commit_error));
9488 /*
9489 flush or sync errors are handled by the leader of the group
9490 (using binlog_error_action). Hence treat only COMMIT_ERRORs as errors.
9491 */
9492 DBUG_RETURN(thd->commit_error == THD::CE_COMMIT_ERROR);
9493 }
9494
9495 /**
9496 Auxiliary function used in ordered_commit.
9497 */
call_after_sync_hook(THD * queue_head)9498 static inline int call_after_sync_hook(THD *queue_head)
9499 {
9500 const char *log_file= NULL;
9501 my_off_t pos= 0;
9502
9503 if (NO_HOOK(binlog_storage))
9504 return 0;
9505
9506 assert(queue_head != NULL);
9507 for (THD *thd= queue_head; thd != NULL; thd= thd->next_to_commit)
9508 if (likely(thd->commit_error == THD::CE_NONE))
9509 thd->get_trans_fixed_pos(&log_file, &pos);
9510
9511 if (DBUG_EVALUATE_IF("simulate_after_sync_hook_error", 1, 0) ||
9512 RUN_HOOK(binlog_storage, after_sync, (queue_head, log_file, pos)))
9513 {
9514 sql_print_error("Failed to run 'after_sync' hooks");
9515 return ER_ERROR_ON_WRITE;
9516 }
9517 return 0;
9518 }
9519
9520 /**
9521 Helper function to handle flush or sync stage errors.
9522 If binlog_error_action= ABORT_SERVER, server will be aborted
9523 after reporting the error to the client.
9524 If binlog_error_action= IGNORE_ERROR, binlog will be closed
9525 for the reset of the life time of the server. close() call is protected
9526 with LOCK_log to avoid any parallel operations on binary log.
9527
9528 @param thd Thread object that faced flush/sync error
9529 @param need_lock_log
9530 > Indicates true if LOCk_log is needed before closing
9531 binlog (happens when we are handling sync error)
9532 > Indicates false if LOCK_log is already acquired
9533 by the thread (happens when we are handling flush
9534 error)
9535 @param message Message stating the reason of the failure
9536
9537 @return void
9538 */
handle_binlog_flush_or_sync_error(THD * thd,bool need_lock_log,const char * message)9539 void MYSQL_BIN_LOG::handle_binlog_flush_or_sync_error(THD *thd,
9540 bool need_lock_log,
9541 const char* message)
9542 {
9543 char errmsg[MYSQL_ERRMSG_SIZE]= {0};
9544 if (!message)
9545 sprintf(errmsg, "An error occurred during %s stage of the commit. "
9546 "'binlog_error_action' is set to '%s'.",
9547 thd->commit_error== THD::CE_FLUSH_ERROR ? "flush" : "sync",
9548 binlog_error_action == ABORT_SERVER ? "ABORT_SERVER" : "IGNORE_ERROR");
9549 else
9550 strncpy(errmsg, message, MYSQL_ERRMSG_SIZE-1);
9551 if (binlog_error_action == ABORT_SERVER)
9552 {
9553 char err_buff[MYSQL_ERRMSG_SIZE + 25];
9554 sprintf(err_buff, "%s Server is being stopped.", errmsg);
9555 exec_binlog_error_action_abort(err_buff);
9556 }
9557 else
9558 {
9559 DEBUG_SYNC(thd, "before_binlog_closed_due_to_error");
9560 if (need_lock_log)
9561 mysql_mutex_lock(&LOCK_log);
9562 else
9563 mysql_mutex_assert_owner(&LOCK_log);
9564 /*
9565 It can happen that other group leader encountered
9566 error and already closed the binary log. So print
9567 error only if it is in open state. But we should
9568 call close() always just in case if the previous
9569 close did not close index file.
9570 */
9571 if (is_open())
9572 {
9573 sql_print_error("%s Hence turning logging off for the whole duration "
9574 "of the MySQL server process. To turn it on again: fix "
9575 "the cause, shutdown the MySQL server and restart it.",
9576 errmsg);
9577 }
9578 close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT, false/*need_lock_log=false*/,
9579 true/*need_lock_index=true*/);
9580 /*
9581 If there is a write error (flush/sync stage) and if
9582 binlog_error_action=IGNORE_ERROR, clear the error
9583 and allow the commit to happen in storage engine.
9584 */
9585 if (check_write_error(thd))
9586 thd->clear_error();
9587
9588 if (need_lock_log)
9589 mysql_mutex_unlock(&LOCK_log);
9590 DEBUG_SYNC(thd, "after_binlog_closed_due_to_error");
9591 }
9592 }
9593 /**
9594 Flush and commit the transaction.
9595
9596 This will execute an ordered flush and commit of all outstanding
9597 transactions and is the main function for the binary log group
9598 commit logic. The function performs the ordered commit in two
9599 phases.
9600
9601 The first phase flushes the caches to the binary log and under
9602 LOCK_log and marks all threads that were flushed as not pending.
9603
9604 The second phase executes under LOCK_commit and commits all
9605 transactions in order.
9606
9607 The procedure is:
9608
9609 1. Queue ourselves for flushing.
9610 2. Grab the log lock, which might result is blocking if the mutex is
9611 already held by another thread.
9612 3. If we were not committed while waiting for the lock
9613 1. Fetch the queue
9614 2. For each thread in the queue:
9615 a. Attach to it
9616 b. Flush the caches, saving any error code
9617 3. Flush and sync (depending on the value of sync_binlog).
9618 4. Signal that the binary log was updated
9619 4. Release the log lock
9620 5. Grab the commit lock
9621 1. For each thread in the queue:
9622 a. If there were no error when flushing and the transaction shall be committed:
9623 - Commit the transaction, saving the result of executing the commit.
9624 6. Release the commit lock
9625 7. Call purge, if any of the committed thread requested a purge.
9626 8. Return with the saved error code
9627
9628 @todo The use of @c skip_commit is a hack that we use since the @c
9629 TC_LOG Interface does not contain functions to handle
9630 savepoints. Once the binary log is eliminated as a handlerton and
9631 the @c TC_LOG interface is extended with savepoint handling, this
9632 parameter can be removed.
9633
9634 @param thd Session to commit transaction for
9635 @param all This is @c true if this is a real transaction commit, and
9636 @c false otherwise.
9637 @param skip_commit
9638 This is @c true if the call to @c ha_commit_low should
9639 be skipped (it is handled by the caller somehow) and @c
9640 false otherwise (the normal case).
9641 */
ordered_commit(THD * thd,bool all,bool skip_commit)9642 int MYSQL_BIN_LOG::ordered_commit(THD *thd, bool all, bool skip_commit)
9643 {
9644 DBUG_ENTER("MYSQL_BIN_LOG::ordered_commit");
9645 int flush_error= 0, sync_error= 0;
9646 my_off_t total_bytes= 0;
9647 bool do_rotate= false;
9648
9649 #ifdef WITH_WSREP
9650 if (WSREP_EMULATE_BINLOG(thd))
9651 {
9652 /*
9653 Skip group commit, just do storage engine commit.
9654 */
9655 int rcode = ha_commit_low(thd, all);
9656
9657 /* if there is myisam statement inside innodb transaction, we may
9658 have events in stmt cache
9659 */
9660 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
9661 if(!cache_mngr->stmt_cache.is_binlog_empty())
9662 {
9663 WSREP_DEBUG("stmt transaction inside MST, SQL: %s", WSREP_QUERY(thd));
9664 cache_mngr->stmt_cache.reset();
9665 }
9666 DBUG_RETURN(rcode);
9667 }
9668 #endif /* WITH_WSREP */
9669
9670 /*
9671 These values are used while flushing a transaction, so clear
9672 everything.
9673
9674 Notes:
9675
9676 - It would be good if we could keep transaction coordinator
9677 log-specific data out of the THD structure, but that is not the
9678 case right now.
9679
9680 - Everything in the transaction structure is reset when calling
9681 ha_commit_low since that calls Transaction_ctx::cleanup.
9682 */
9683 thd->get_transaction()->m_flags.pending= true;
9684 thd->commit_error= THD::CE_NONE;
9685 thd->next_to_commit= NULL;
9686 thd->durability_property= HA_IGNORE_DURABILITY;
9687 thd->get_transaction()->m_flags.real_commit= all;
9688 thd->get_transaction()->m_flags.xid_written= false;
9689 thd->get_transaction()->m_flags.commit_low= !skip_commit;
9690 thd->get_transaction()->m_flags.run_hooks= !skip_commit;
9691 #ifndef NDEBUG
9692 /*
9693 The group commit Leader may have to wait for follower whose transaction
9694 is not ready to be preempted. Initially the status is pessimistic.
9695 Preemption guarding logics is necessary only when !NDEBUG is set.
9696 It won't be required for the dbug-off case as long as the follower won't
9697 execute any thread-specific write access code in this method, which is
9698 the case as of current.
9699 */
9700 thd->get_transaction()->m_flags.ready_preempt= 0;
9701 #endif
9702
9703 DBUG_PRINT("enter", ("flags.pending: %s, commit_error: %d, thread_id: %u",
9704 YESNO(thd->get_transaction()->m_flags.pending),
9705 thd->commit_error, thd->thread_id()));
9706
9707 DEBUG_SYNC(thd, "bgc_before_flush_stage");
9708
9709 /*
9710 Stage #1: flushing transactions to binary log
9711
9712 While flushing, we allow new threads to enter and will process
9713 them in due time. Once the queue was empty, we cannot reap
9714 anything more since it is possible that a thread entered and
9715 appointed itself leader for the flush phase.
9716 */
9717
9718 #ifdef HAVE_REPLICATION
9719 if (has_commit_order_manager(thd))
9720 {
9721 Slave_worker *worker= dynamic_cast<Slave_worker *>(thd->rli_slave);
9722 Commit_order_manager *mngr= worker->get_commit_order_manager();
9723
9724 if (mngr->wait_for_its_turn(worker, all))
9725 {
9726 thd->commit_error= THD::CE_COMMIT_ERROR;
9727 DBUG_RETURN(thd->commit_error);
9728 }
9729
9730 if (change_stage(thd, Stage_manager::FLUSH_STAGE, thd, NULL, &LOCK_log))
9731 DBUG_RETURN(finish_commit(thd));
9732 }
9733 else
9734 #endif
9735 if (change_stage(thd, Stage_manager::FLUSH_STAGE, thd, NULL, &LOCK_log))
9736 {
9737 DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d",
9738 thd->thread_id(), thd->commit_error));
9739 DBUG_RETURN(finish_commit(thd));
9740 }
9741
9742 THD *wait_queue= NULL, *final_queue= NULL;
9743 mysql_mutex_t *leave_mutex_before_commit_stage= NULL;
9744 my_off_t flush_end_pos= 0;
9745 bool update_binlog_end_pos_after_sync;
9746 if (unlikely(!is_open()))
9747 {
9748 final_queue= stage_manager.fetch_queue_for(Stage_manager::FLUSH_STAGE);
9749 leave_mutex_before_commit_stage= &LOCK_log;
9750 /*
9751 binary log is closed, flush stage and sync stage should be
9752 ignored. Binlog cache should be cleared, but instead of doing
9753 it here, do that work in 'finish_commit' function so that
9754 leader and followers thread caches will be cleared.
9755 */
9756 goto commit_stage;
9757 }
9758 DEBUG_SYNC(thd, "waiting_in_the_middle_of_flush_stage");
9759 flush_error= process_flush_stage_queue(&total_bytes, &do_rotate,
9760 &wait_queue);
9761
9762 if (flush_error == 0 && total_bytes > 0)
9763 flush_error= flush_cache_to_file(&flush_end_pos);
9764 DBUG_EXECUTE_IF("crash_after_flush_binlog", DBUG_SUICIDE(););
9765
9766 update_binlog_end_pos_after_sync= (get_sync_period() == 1);
9767
9768 /*
9769 If the flush finished successfully, we can call the after_flush
9770 hook. Being invoked here, we have the guarantee that the hook is
9771 executed before the before/after_send_hooks on the dump thread
9772 preventing race conditions among these plug-ins.
9773 */
9774 if (flush_error == 0)
9775 {
9776 const char *file_name_ptr= log_file_name + dirname_length(log_file_name);
9777 assert(flush_end_pos != 0);
9778 if (RUN_HOOK(binlog_storage, after_flush,
9779 (thd, file_name_ptr, flush_end_pos)))
9780 {
9781 sql_print_error("Failed to run 'after_flush' hooks");
9782 flush_error= ER_ERROR_ON_WRITE;
9783 }
9784
9785 if (!update_binlog_end_pos_after_sync)
9786 update_binlog_end_pos();
9787 DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
9788 }
9789
9790 if (flush_error)
9791 {
9792 /*
9793 Handle flush error (if any) after leader finishes it's flush stage.
9794 */
9795 handle_binlog_flush_or_sync_error(thd, false /* need_lock_log */,
9796 (thd->commit_error == THD::CE_FLUSH_GNO_EXHAUSTED_ERROR)
9797 ? ER(ER_GNO_EXHAUSTED) : NULL);
9798 }
9799
9800 DEBUG_SYNC(thd, "bgc_after_flush_stage_before_sync_stage");
9801
9802 /*
9803 Stage #2: Syncing binary log file to disk
9804 */
9805
9806 if (change_stage(thd, Stage_manager::SYNC_STAGE, wait_queue, &LOCK_log, &LOCK_sync))
9807 {
9808 DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d",
9809 thd->thread_id(), thd->commit_error));
9810 DBUG_RETURN(finish_commit(thd));
9811 }
9812
9813 /*
9814 Shall introduce a delay only if it is going to do sync
9815 in this ongoing SYNC stage. The "+1" used below in the
9816 if condition is to count the ongoing sync stage.
9817 When sync_binlog=0 (where we never do sync in BGC group),
9818 it is considered as a special case and delay will be executed
9819 for every group just like how it is done when sync_binlog= 1.
9820 */
9821 if (!flush_error && (sync_counter + 1 >= get_sync_period()))
9822 stage_manager.wait_count_or_timeout(opt_binlog_group_commit_sync_no_delay_count,
9823 opt_binlog_group_commit_sync_delay,
9824 Stage_manager::SYNC_STAGE);
9825
9826 final_queue= stage_manager.fetch_queue_for(Stage_manager::SYNC_STAGE);
9827
9828 if (flush_error == 0 && total_bytes > 0)
9829 {
9830 DEBUG_SYNC(thd, "before_sync_binlog_file");
9831 std::pair<bool, bool> result= sync_binlog_file(false);
9832 sync_error= result.first;
9833 }
9834
9835 if (update_binlog_end_pos_after_sync)
9836 {
9837 THD *tmp_thd= final_queue;
9838 const char *binlog_file= NULL;
9839 my_off_t pos= 0;
9840 while (tmp_thd->next_to_commit != NULL)
9841 tmp_thd= tmp_thd->next_to_commit;
9842 if (flush_error == 0 && sync_error == 0)
9843 {
9844 tmp_thd->get_trans_fixed_pos(&binlog_file, &pos);
9845 update_binlog_end_pos(binlog_file, pos);
9846 }
9847 }
9848
9849 DEBUG_SYNC(thd, "bgc_after_sync_stage_before_commit_stage");
9850
9851 leave_mutex_before_commit_stage= &LOCK_sync;
9852 /*
9853 Stage #3: Commit all transactions in order.
9854
9855 This stage is skipped if we do not need to order the commits and
9856 each thread have to execute the handlerton commit instead.
9857
9858 Howver, since we are keeping the lock from the previous stage, we
9859 need to unlock it if we skip the stage.
9860
9861 We must also step commit_clock before the ha_commit_low() is called
9862 either in ordered fashion(by the leader of this stage) or by the tread
9863 themselves.
9864
9865 We are delaying the handling of sync error until
9866 all locks are released but we should not enter into
9867 commit stage if binlog_error_action is ABORT_SERVER.
9868 */
9869 commit_stage:
9870 if (opt_binlog_order_commits &&
9871 (sync_error == 0 || binlog_error_action != ABORT_SERVER))
9872 {
9873 if (change_stage(thd, Stage_manager::COMMIT_STAGE,
9874 final_queue, leave_mutex_before_commit_stage,
9875 &LOCK_commit))
9876 {
9877 DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d",
9878 thd->thread_id(), thd->commit_error));
9879 DBUG_RETURN(finish_commit(thd));
9880 }
9881 THD *commit_queue= stage_manager.fetch_queue_for(Stage_manager::COMMIT_STAGE);
9882 DBUG_EXECUTE_IF("semi_sync_3-way_deadlock",
9883 DEBUG_SYNC(thd, "before_process_commit_stage_queue"););
9884
9885 if (flush_error == 0 && sync_error == 0)
9886 sync_error= call_after_sync_hook(commit_queue);
9887
9888 /*
9889 process_commit_stage_queue will call update_on_commit or
9890 update_on_rollback for the GTID owned by each thd in the queue.
9891
9892 This will be done this way to guarantee that GTIDs are added to
9893 gtid_executed in order, to avoid creating unnecessary temporary
9894 gaps and keep gtid_executed as a single interval at all times.
9895
9896 If we allow each thread to call update_on_commit only when they
9897 are at finish_commit, the GTID order cannot be guaranteed and
9898 temporary gaps may appear in gtid_executed. When this happen,
9899 the server would have to add and remove intervals from the
9900 Gtid_set, and adding and removing intervals requires a mutex,
9901 which would reduce performance.
9902 */
9903 process_commit_stage_queue(thd, commit_queue);
9904 mysql_mutex_unlock(&LOCK_commit);
9905 /*
9906 Process after_commit after LOCK_commit is released for avoiding
9907 3-way deadlock among user thread, rotate thread and dump thread.
9908 */
9909 process_after_commit_stage_queue(thd, commit_queue);
9910 final_queue= commit_queue;
9911 }
9912 else
9913 {
9914 if (leave_mutex_before_commit_stage)
9915 mysql_mutex_unlock(leave_mutex_before_commit_stage);
9916 if (flush_error == 0 && sync_error == 0)
9917 sync_error= call_after_sync_hook(final_queue);
9918 }
9919
9920 /*
9921 Handle sync error after we release all locks in order to avoid deadlocks
9922 */
9923 if (sync_error)
9924 handle_binlog_flush_or_sync_error(thd, true /* need_lock_log */, NULL);
9925
9926 /* Commit done so signal all waiting threads */
9927 stage_manager.signal_done(final_queue);
9928
9929 /*
9930 Finish the commit before executing a rotate, or run the risk of a
9931 deadlock. We don't need the return value here since it is in
9932 thd->commit_error, which is returned below.
9933 */
9934 (void) finish_commit(thd);
9935
9936 /*
9937 If we need to rotate, we do it without commit error.
9938 Otherwise the thd->commit_error will be possibly reset.
9939 */
9940 if (DBUG_EVALUATE_IF("force_rotate", 1, 0) ||
9941 (do_rotate && thd->commit_error == THD::CE_NONE &&
9942 !is_rotating_caused_by_incident))
9943 {
9944 /*
9945 Do not force the rotate as several consecutive groups may
9946 request unnecessary rotations.
9947
9948 NOTE: Run purge_logs wo/ holding LOCK_log because it does not
9949 need the mutex. Otherwise causes various deadlocks.
9950 */
9951
9952 DEBUG_SYNC(thd, "ready_to_do_rotation");
9953 bool check_purge= false;
9954 mysql_mutex_lock(&LOCK_log);
9955 /*
9956 If rotate fails then depends on binlog_error_action variable
9957 appropriate action will be taken inside rotate call.
9958 */
9959 int error= rotate(false, &check_purge);
9960 mysql_mutex_unlock(&LOCK_log);
9961
9962 if (error)
9963 thd->commit_error= THD::CE_COMMIT_ERROR;
9964 else if (check_purge)
9965 purge();
9966 }
9967 /*
9968 flush or sync errors are handled above (using binlog_error_action).
9969 Hence treat only COMMIT_ERRORs as errors.
9970 */
9971 DBUG_RETURN(thd->commit_error == THD::CE_COMMIT_ERROR);
9972 }
9973
9974
9975 /**
9976 MYSQLD server recovers from last crashed binlog.
9977
9978 @param log IO_CACHE of the crashed binlog.
9979 @param fdle Format_description_log_event of the crashed binlog.
9980 @param valid_pos The position of the last valid transaction or
9981 event(non-transaction) of the crashed binlog.
9982
9983 @retval
9984 0 ok
9985 @retval
9986 1 error
9987 */
recover(IO_CACHE * log,Format_description_log_event * fdle,my_off_t * valid_pos)9988 int MYSQL_BIN_LOG::recover(IO_CACHE *log, Format_description_log_event *fdle,
9989 my_off_t *valid_pos)
9990 {
9991 Log_event *ev;
9992 HASH xids;
9993 MEM_ROOT mem_root;
9994 /*
9995 The flag is used for handling the case that a transaction
9996 is partially written to the binlog.
9997 */
9998 bool in_transaction= FALSE;
9999 int memory_page_size= my_getpagesize();
10000
10001 #ifdef WITH_WSREP
10002 /*
10003 Read current wsrep position from storage engines to have consistent
10004 end position for binlog scan.
10005 */
10006 wsrep_uuid_t uuid;
10007 wsrep_seqno_t seqno;
10008 if (WSREP_ON)
10009 {
10010 wsrep_get_SE_checkpoint(uuid, seqno);
10011 char uuid_str[40];
10012 wsrep_uuid_print(&uuid, uuid_str, sizeof(uuid_str));
10013 WSREP_INFO("Binlog recovery, found wsrep position %s:%lld", uuid_str,
10014 (long long)seqno);
10015 }
10016 const wsrep_seqno_t last_xid_seqno= (WSREP_ON) ? seqno :
10017 WSREP_SEQNO_UNDEFINED;
10018 wsrep_seqno_t cur_xid_seqno= WSREP_SEQNO_UNDEFINED;
10019 #endif /* WITH_WSREP */
10020
10021 if (! fdle->is_valid() ||
10022 my_hash_init(&xids, &my_charset_bin, memory_page_size/3, 0,
10023 sizeof(my_xid), 0, 0, 0,
10024 key_memory_binlog_recover_exec))
10025 goto err1;
10026
10027 init_alloc_root(key_memory_binlog_recover_exec,
10028 &mem_root, memory_page_size, memory_page_size);
10029
10030 while ((ev= Log_event::read_log_event(log, 0, fdle, TRUE))
10031 && ev->is_valid())
10032 {
10033 #ifdef WITH_WSREP
10034 if (last_xid_seqno != WSREP_SEQNO_UNDEFINED &&
10035 last_xid_seqno == cur_xid_seqno)
10036 {
10037 delete ev;
10038 continue;
10039 }
10040 #endif
10041 if (ev->get_type_code() == binary_log::QUERY_EVENT &&
10042 !strcmp(((Query_log_event*)ev)->query, "BEGIN"))
10043 in_transaction= TRUE;
10044
10045 if (ev->get_type_code() == binary_log::QUERY_EVENT &&
10046 !strcmp(((Query_log_event*)ev)->query, "COMMIT"))
10047 {
10048 assert(in_transaction == TRUE);
10049 in_transaction= FALSE;
10050 }
10051 else if (ev->get_type_code() == binary_log::XID_EVENT)
10052 {
10053 assert(in_transaction == TRUE);
10054 in_transaction= FALSE;
10055 Xid_log_event *xev=(Xid_log_event *)ev;
10056 uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
10057 sizeof(xev->xid));
10058 if (!x || my_hash_insert(&xids, x))
10059 goto err2;
10060 #ifdef WITH_WSREP
10061 cur_xid_seqno= xev->xid;
10062 #endif /* WITH_WSREP */
10063 }
10064
10065 /*
10066 Recorded valid position for the crashed binlog file
10067 which did not contain incorrect events. The following
10068 positions increase the variable valid_pos:
10069
10070 1 -
10071 ...
10072 <---> HERE IS VALID <--->
10073 GTID
10074 BEGIN
10075 ...
10076 COMMIT
10077 ...
10078
10079 2 -
10080 ...
10081 <---> HERE IS VALID <--->
10082 GTID
10083 DDL/UTILITY
10084 ...
10085
10086 In other words, the following positions do not increase
10087 the variable valid_pos:
10088
10089 1 -
10090 GTID
10091 <---> HERE IS VALID <--->
10092 ...
10093
10094 2 -
10095 GTID
10096 BEGIN
10097 <---> HERE IS VALID <--->
10098 ...
10099 */
10100 if (!log->error && !in_transaction &&
10101 !is_gtid_event(ev))
10102 *valid_pos= my_b_tell(log);
10103
10104 delete ev;
10105 }
10106
10107 /*
10108 Call ha_recover if and only if there is a registered engine that
10109 does 2PC, otherwise in DBUG builds calling ha_recover directly
10110 will result in an assert. (Production builds would be safe since
10111 ha_recover returns right away if total_ha_2pc <= opt_log_bin.)
10112 */
10113 if (total_ha_2pc > 1 && ha_recover(&xids))
10114 goto err2;
10115
10116 free_root(&mem_root, MYF(0));
10117 my_hash_free(&xids);
10118 return 0;
10119
10120 err2:
10121 free_root(&mem_root, MYF(0));
10122 my_hash_free(&xids);
10123 err1:
10124 sql_print_error("Crash recovery failed. Either correct the problem "
10125 "(if it's, for example, out of memory error) and restart, "
10126 "or delete (or rename) binary log and start mysqld with "
10127 "--tc-heuristic-recover={commit|rollback}");
10128 return 1;
10129 }
10130
report_missing_purged_gtids(const Gtid_set * slave_executed_gtid_set,const char ** errmsg)10131 void MYSQL_BIN_LOG::report_missing_purged_gtids(const Gtid_set* slave_executed_gtid_set,
10132 const char** errmsg)
10133 {
10134 DBUG_ENTER("MYSQL_BIN_LOG::report_missing_purged_gtids");
10135 THD *thd= current_thd;
10136 Gtid_set gtid_missing(gtid_state->get_lost_gtids()->get_sid_map());
10137 gtid_missing.add_gtid_set(gtid_state->get_lost_gtids());
10138 gtid_missing.remove_gtid_set(slave_executed_gtid_set);
10139
10140 String tmp_uuid;
10141 uchar name[]= "slave_uuid";
10142
10143 /* Protects thd->user_vars. */
10144 mysql_mutex_lock(&thd->LOCK_thd_data);
10145 user_var_entry *entry=
10146 (user_var_entry*) my_hash_search(&thd->user_vars, name, sizeof(name)-1);
10147 if (entry && entry->length() > 0)
10148 tmp_uuid.copy(entry->ptr(), entry->length(), NULL);
10149 mysql_mutex_unlock(&thd->LOCK_thd_data);
10150
10151
10152 char* missing_gtids= NULL;
10153 char* slave_executed_gtids= NULL;
10154 gtid_missing.to_string(&missing_gtids, false);
10155 slave_executed_gtid_set->to_string(&slave_executed_gtids, false);
10156
10157 /*
10158 Log the information about the missing purged GTIDs to the error log
10159 if the message is less than MAX_LOG_BUFFER_SIZE.
10160 */
10161 std::ostringstream log_info;
10162 log_info << "The missing transactions are '"<< missing_gtids <<"'";
10163 const char* log_msg= ER(ER_FOUND_MISSING_GTIDS);
10164
10165 /* Don't consider the "%s" in the format string. Subtract 2 from the
10166 total length */
10167 uint total_length= (strlen(log_msg) - 2 + log_info.str().length());
10168
10169 DBUG_EXECUTE_IF("simulate_long_missing_gtids",
10170 { total_length= MAX_LOG_BUFFER_SIZE + 1;});
10171
10172 if (total_length > MAX_LOG_BUFFER_SIZE)
10173 log_info.str("To find the missing purged transactions, run \"SELECT"
10174 " @@GLOBAL.GTID_PURGED\" on the master, then run \"SELECT"
10175 " CONCAT(RECEIVED_TRANSACTION_SET, ',', @@GLOBAL.GTID_EXECUTED)"
10176 " FROM PERFORMANCE_SCHEMA.replication_connection_status\" on"
10177 " the slave, and then run \"SELECT GTID_SUBTRACT(<master_set>,"
10178 " <slave_set>)\" on any server");
10179
10180 sql_print_warning(ER_THD(thd, ER_FOUND_MISSING_GTIDS), tmp_uuid.ptr(),
10181 log_info.str().c_str());
10182
10183 /*
10184 Send the information about the slave executed GTIDs and missing
10185 purged GTIDs to slave if the message is less than MYSQL_ERRMSG_SIZE.
10186 */
10187 std::ostringstream gtid_info;
10188 gtid_info << "The GTID set sent by the slave is '" << slave_executed_gtids
10189 << "', and the missing transactions are '"<< missing_gtids <<"'";
10190 *errmsg= ER_THD(thd, ER_MASTER_HAS_PURGED_REQUIRED_GTIDS);
10191
10192 /* Don't consider the "%s" in the format string. Subtract 2 from the
10193 total length */
10194 total_length= (strlen(*errmsg) - 2 + gtid_info.str().length());
10195
10196 DBUG_EXECUTE_IF("simulate_long_missing_gtids",
10197 { total_length= MYSQL_ERRMSG_SIZE + 1;});
10198
10199 if (total_length > MYSQL_ERRMSG_SIZE)
10200 gtid_info.str("The GTID sets and the missing purged transactions are too"
10201 " long to print in this message. For more information,"
10202 " please see the master's error log or the manual for"
10203 " GTID_SUBTRACT");
10204
10205 /* Buffer for formatting the message about the missing GTIDs. */
10206 static char buff[MYSQL_ERRMSG_SIZE];
10207 my_snprintf(buff, MYSQL_ERRMSG_SIZE, *errmsg, gtid_info.str().c_str());
10208 *errmsg= const_cast<const char*>(buff);
10209
10210 my_free(missing_gtids);
10211 my_free(slave_executed_gtids);
10212 DBUG_VOID_RETURN;
10213 }
10214
report_missing_gtids(const Gtid_set * previous_gtid_set,const Gtid_set * slave_executed_gtid_set,const char ** errmsg)10215 void MYSQL_BIN_LOG::report_missing_gtids(const Gtid_set* previous_gtid_set,
10216 const Gtid_set* slave_executed_gtid_set,
10217 const char** errmsg)
10218 {
10219 DBUG_ENTER("MYSQL_BIN_LOG::report_missing_gtids");
10220 THD *thd=current_thd;
10221 char* missing_gtids= NULL;
10222 char* slave_executed_gtids= NULL;
10223 Gtid_set gtid_missing(slave_executed_gtid_set->get_sid_map());
10224 gtid_missing.add_gtid_set(slave_executed_gtid_set);
10225 gtid_missing.remove_gtid_set(previous_gtid_set);
10226 gtid_missing.to_string(&missing_gtids, false);
10227 slave_executed_gtid_set->to_string(&slave_executed_gtids, false);
10228
10229 String tmp_uuid;
10230 uchar name[]= "slave_uuid";
10231
10232 /* Protects thd->user_vars. */
10233 mysql_mutex_lock(&thd->LOCK_thd_data);
10234
10235 user_var_entry *entry=
10236 (user_var_entry*) my_hash_search(&thd->user_vars, name, sizeof(name)-1);
10237 if (entry && entry->length() > 0)
10238 tmp_uuid.copy(entry->ptr(), entry->length(), NULL);
10239 mysql_mutex_unlock(&thd->LOCK_thd_data);
10240
10241 /*
10242 Log the information about the missing purged GTIDs to the error log
10243 if the message is less than MAX_LOG_BUFFER_SIZE.
10244 */
10245 std::ostringstream log_info;
10246 log_info << "If the binary log files have been deleted from disk,"
10247 " check the consistency of 'GTID_PURGED' variable."
10248 " The missing transactions are '"<< missing_gtids <<"'";
10249 const char* log_msg= ER(ER_FOUND_MISSING_GTIDS);
10250
10251 /* Don't consider the "%s" in the format string. Subtract 2 from the
10252 total length */
10253 if ((strlen(log_msg) - 2 + log_info.str().length()) > MAX_LOG_BUFFER_SIZE)
10254 log_info.str("To find the missing purged transactions, run \"SELECT"
10255 " @@GLOBAL.GTID_PURGED\" on the master, then run \"SELECT"
10256 " CONCAT(RECEIVED_TRANSACTION_SET, ',', @@GLOBAL.GTID_EXECUTED)"
10257 " FROM PERFORMANCE_SCHEMA.replication_connection_status\" on"
10258 " the slave, and then run \"SELECT GTID_SUBTRACT(<master_set>,"
10259 " <slave_set>)\" on any server");
10260
10261 sql_print_warning(ER_THD(thd, ER_FOUND_MISSING_GTIDS), tmp_uuid.ptr(),
10262 log_info.str().c_str());
10263
10264 /*
10265 Send the information about the slave executed GTIDs and missing
10266 purged GTIDs to slave if the message is less than MYSQL_ERRMSG_SIZE.
10267 */
10268 std::ostringstream gtid_info;
10269 gtid_info << "The GTID set sent by the slave is '" << slave_executed_gtids
10270 << "', and the missing transactions are '"<< missing_gtids <<"'";
10271 *errmsg= ER_THD(thd, ER_MASTER_HAS_PURGED_REQUIRED_GTIDS);
10272
10273 /* Don't consider the "%s" in the format string. Subtract 2 from the
10274 total length */
10275 if ((strlen(*errmsg) - 2 + gtid_info.str().length()) > MYSQL_ERRMSG_SIZE)
10276 gtid_info.str("The GTID sets and the missing purged transactions are too"
10277 " long to print in this message. For more information,"
10278 " please see the master's error log or the manual for"
10279 " GTID_SUBTRACT");
10280 /* Buffer for formatting the message about the missing GTIDs. */
10281 static char buff[MYSQL_ERRMSG_SIZE];
10282 my_snprintf(buff, MYSQL_ERRMSG_SIZE, *errmsg, gtid_info.str().c_str());
10283 *errmsg= const_cast<const char*>(buff);
10284
10285 my_free(missing_gtids);
10286 my_free(slave_executed_gtids);
10287
10288 DBUG_VOID_RETURN;
10289 }
10290
is_binlog_cache_empty(bool is_transactional)10291 bool THD::is_binlog_cache_empty(bool is_transactional)
10292 {
10293 DBUG_ENTER("THD::is_binlog_cache_empty(bool)");
10294
10295 // If opt_bin_log==0, it is not safe to call thd_get_cache_mngr
10296 // because binlog_hton has not been completely set up.
10297 #ifndef WITH_WSREP
10298 assert(opt_bin_log);
10299 #endif
10300 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(this);
10301
10302 // cache_mngr is NULL until we call thd->binlog_setup_trx_data, so
10303 // we assert that this has been done.
10304 assert(cache_mngr != NULL);
10305
10306 binlog_cache_data *cache_data=
10307 cache_mngr->get_binlog_cache_data(is_transactional);
10308 assert(cache_data != NULL);
10309
10310 DBUG_RETURN(cache_data->is_binlog_empty());
10311 }
10312
10313 /*
10314 These functions are placed in this file since they need access to
10315 binlog_hton, which has internal linkage.
10316 */
10317
binlog_setup_trx_data()10318 int THD::binlog_setup_trx_data()
10319 {
10320 DBUG_ENTER("THD::binlog_setup_trx_data");
10321 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(this);
10322
10323 if (cache_mngr)
10324 DBUG_RETURN(0); // Already set up
10325
10326 IO_CACHE stmt_cache_log, trx_cache_log;
10327 memset(&stmt_cache_log, 0, sizeof(stmt_cache_log));
10328 memset(&trx_cache_log, 0, sizeof(trx_cache_log));
10329
10330 cache_mngr= (binlog_cache_mngr*) my_malloc(key_memory_binlog_cache_mngr,
10331 sizeof(binlog_cache_mngr), MYF(MY_ZEROFILL));
10332 if (!cache_mngr)
10333 {
10334 DBUG_RETURN(1);
10335 }
10336 if (open_cached_file(&stmt_cache_log, mysql_tmpdir,
10337 LOG_PREFIX, binlog_stmt_cache_size, MYF(MY_WME)))
10338 {
10339 my_free(cache_mngr);
10340 DBUG_RETURN(1); // Didn't manage to set it up
10341 }
10342 if (open_cached_file(&trx_cache_log, mysql_tmpdir,
10343 LOG_PREFIX, binlog_cache_size, MYF(MY_WME)))
10344 {
10345 close_cached_file(&stmt_cache_log);
10346 my_free(cache_mngr);
10347 DBUG_RETURN(1);
10348 }
10349 DBUG_PRINT("debug", ("Set ha_data slot %d to 0x%llx", binlog_hton->slot, (ulonglong) cache_mngr));
10350 thd_set_ha_data(this, binlog_hton, cache_mngr);
10351
10352 cache_mngr= new (thd_get_cache_mngr(this))
10353 binlog_cache_mngr(max_binlog_stmt_cache_size,
10354 &binlog_stmt_cache_use,
10355 &binlog_stmt_cache_disk_use,
10356 max_binlog_cache_size,
10357 &binlog_cache_use,
10358 &binlog_cache_disk_use,
10359 stmt_cache_log,
10360 trx_cache_log);
10361 DBUG_RETURN(0);
10362 }
10363
10364 /**
10365
10366 */
register_binlog_handler(THD * thd,bool trx)10367 void register_binlog_handler(THD *thd, bool trx)
10368 {
10369 DBUG_ENTER("register_binlog_handler");
10370 /*
10371 If this is the first call to this function while processing a statement,
10372 the transactional cache does not have a savepoint defined. So, in what
10373 follows:
10374 . an implicit savepoint is defined;
10375 . callbacks are registered;
10376 . binary log is set as read/write.
10377
10378 The savepoint allows for truncating the trx-cache transactional changes
10379 fail. Callbacks are necessary to flush caches upon committing or rolling
10380 back a statement or a transaction. However, notifications do not happen
10381 if the binary log is set as read/write.
10382 */
10383 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
10384 if (cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF)
10385 {
10386 /*
10387 Set an implicit savepoint in order to be able to truncate a trx-cache.
10388 */
10389 my_off_t pos= 0;
10390 binlog_trans_log_savepos(thd, &pos);
10391 cache_mngr->trx_cache.set_prev_position(pos);
10392
10393 /*
10394 Set callbacks in order to be able to call commmit or rollback.
10395 */
10396 if (trx)
10397 trans_register_ha(thd, TRUE, binlog_hton, NULL);
10398 trans_register_ha(thd, FALSE, binlog_hton, NULL);
10399
10400 /*
10401 Set the binary log as read/write otherwise callbacks are not called.
10402 */
10403 thd->ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
10404 }
10405 DBUG_VOID_RETURN;
10406 }
10407
10408 /**
10409 Function to start a statement and optionally a transaction for the
10410 binary log.
10411
10412 This function does three things:
10413 - Starts a transaction if not in autocommit mode or if a BEGIN
10414 statement has been seen.
10415
10416 - Start a statement transaction to allow us to truncate the cache.
10417
10418 - Save the currrent binlog position so that we can roll back the
10419 statement by truncating the cache.
10420
10421 We only update the saved position if the old one was undefined,
10422 the reason is that there are some cases (e.g., for CREATE-SELECT)
10423 where the position is saved twice (e.g., both in
10424 Query_result_create::prepare() and THD::binlog_write_table_map()), but
10425 we should use the first. This means that calls to this function
10426 can be used to start the statement before the first table map
10427 event, to include some extra events.
10428
10429 Note however that IMMEDIATE_LOGGING implies that the statement is
10430 written without BEGIN/COMMIT.
10431
10432 @param thd Thread variable
10433 @param start_event The first event requested to be written into the
10434 binary log
10435 */
binlog_start_trans_and_stmt(THD * thd,Log_event * start_event)10436 static int binlog_start_trans_and_stmt(THD *thd, Log_event *start_event)
10437 {
10438 DBUG_ENTER("binlog_start_trans_and_stmt");
10439
10440 /*
10441 Initialize the cache manager if this was not done yet.
10442 */
10443 if (thd->binlog_setup_trx_data())
10444 DBUG_RETURN(1);
10445
10446 /*
10447 Retrieve the appropriated cache.
10448 */
10449 bool is_transactional= start_event->is_using_trans_cache();
10450 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
10451 binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(is_transactional);
10452
10453 /*
10454 If the event is requesting immediatly logging, there is no need to go
10455 further down and set savepoint and register callbacks.
10456 */
10457 if (start_event->is_using_immediate_logging())
10458 DBUG_RETURN(0);
10459
10460 register_binlog_handler(thd, thd->in_multi_stmt_transaction_mode());
10461
10462 /*
10463 If the cache is empty log "BEGIN" at the beginning of every transaction.
10464 Here, a transaction is either a BEGIN..COMMIT/ROLLBACK block or a single
10465 statement in autocommit mode.
10466 */
10467 if (cache_data->is_binlog_empty())
10468 {
10469 static const char begin[]= "BEGIN";
10470 const char *query= NULL;
10471 char buf[XID::ser_buf_size];
10472 char xa_start[sizeof("XA START") + 1 + sizeof(buf)];
10473 XID_STATE *xs= thd->get_transaction()->xid_state();
10474 int qlen= sizeof(begin) - 1;
10475
10476 if (is_transactional && xs->has_state(XID_STATE::XA_ACTIVE))
10477 {
10478 /*
10479 XA-prepare logging case.
10480 */
10481 qlen= sprintf(xa_start, "XA START %s", xs->get_xid()->serialize(buf));
10482 query= xa_start;
10483 }
10484 else
10485 {
10486 /*
10487 Regular transaction case.
10488 */
10489 query= begin;
10490 }
10491
10492 Query_log_event qinfo(thd, query, qlen,
10493 is_transactional, false, true, 0, true);
10494 if (cache_data->write_event(thd, &qinfo))
10495 DBUG_RETURN(1);
10496 }
10497
10498 DBUG_RETURN(0);
10499 }
10500
10501 /**
10502 This function writes a table map to the binary log.
10503 Note that in order to keep the signature uniform with related methods,
10504 we use a redundant parameter to indicate whether a transactional table
10505 was changed or not.
10506 Sometimes it will write a Rows_query_log_event into binary log before
10507 the table map too.
10508
10509 @param table a pointer to the table.
10510 @param is_transactional @c true indicates a transactional table,
10511 otherwise @c false a non-transactional.
10512 @param binlog_rows_query @c true indicates a Rows_query log event
10513 will be binlogged before table map,
10514 otherwise @c false indicates it will not
10515 be binlogged.
10516 @return
10517 nonzero if an error pops up when writing the table map event
10518 or the Rows_query log event.
10519 */
binlog_write_table_map(TABLE * table,bool is_transactional,bool binlog_rows_query)10520 int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
10521 bool binlog_rows_query)
10522 {
10523 int error;
10524 DBUG_ENTER("THD::binlog_write_table_map");
10525 DBUG_PRINT("enter", ("table: 0x%lx (%s: #%llu)",
10526 (long) table, table->s->table_name.str,
10527 table->s->table_map_id.id()));
10528
10529 /* Pre-conditions */
10530 #ifdef WITH_WSREP
10531 assert(is_current_stmt_binlog_format_row() &&
10532 (WSREP_EMULATE_BINLOG_NNULL(this) || mysql_bin_log.is_open()));
10533 #else
10534 assert(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
10535 #endif /* WITH_WSREP */
10536 assert(table->s->table_map_id.is_valid());
10537
10538 Table_map_log_event
10539 the_event(this, table, table->s->table_map_id, is_transactional);
10540
10541 binlog_start_trans_and_stmt(this, &the_event);
10542
10543 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(this);
10544
10545 binlog_cache_data *cache_data=
10546 cache_mngr->get_binlog_cache_data(is_transactional);
10547
10548 if (binlog_rows_query && this->query().str)
10549 {
10550 /* Write the Rows_query_log_event into binlog before the table map */
10551 Rows_query_log_event
10552 rows_query_ev(this, this->query().str, this->query().length);
10553 if ((error= cache_data->write_event(this, &rows_query_ev)))
10554 DBUG_RETURN(error);
10555 }
10556
10557 if ((error= cache_data->write_event(this, &the_event)))
10558 DBUG_RETURN(error);
10559
10560 binlog_table_maps++;
10561 DBUG_RETURN(0);
10562 }
10563
10564 /**
10565 This function retrieves a pending row event from a cache which is
10566 specified through the parameter @c is_transactional. Respectively, when it
10567 is @c true, the pending event is returned from the transactional cache.
10568 Otherwise from the non-transactional cache.
10569
10570 @param is_transactional @c true indicates a transactional cache,
10571 otherwise @c false a non-transactional.
10572 @return
10573 The row event if any.
10574 */
10575 Rows_log_event*
binlog_get_pending_rows_event(bool is_transactional) const10576 THD::binlog_get_pending_rows_event(bool is_transactional) const
10577 {
10578 Rows_log_event* rows= NULL;
10579 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(this);
10580
10581 /*
10582 This is less than ideal, but here's the story: If there is no cache_mngr,
10583 prepare_pending_rows_event() has never been called (since the cache_mngr
10584 is set up there). In that case, we just return NULL.
10585 */
10586 if (cache_mngr)
10587 {
10588 binlog_cache_data *cache_data=
10589 cache_mngr->get_binlog_cache_data(is_transactional);
10590
10591 rows= cache_data->pending();
10592 }
10593 return (rows);
10594 }
10595
10596 /**
10597 @param db db name c-string to be inserted into alphabetically sorted
10598 THD::binlog_accessed_db_names list.
10599
10600 Note, that space for both the data and the node
10601 struct are allocated in THD::main_mem_root.
10602 The list lasts for the top-level query time and is reset
10603 in @c THD::cleanup_after_query().
10604 */
10605 void
add_to_binlog_accessed_dbs(const char * db_param)10606 THD::add_to_binlog_accessed_dbs(const char *db_param)
10607 {
10608 char *after_db;
10609 /*
10610 binlog_accessed_db_names list is to maintain the database
10611 names which are referenced in a given command.
10612 Prior to bug 17806014 fix, 'main_mem_root' memory root used
10613 to store this list. The 'main_mem_root' scope is till the end
10614 of the query. Hence it caused increasing memory consumption
10615 problem in big procedures like the ones mentioned below.
10616 Eg: CALL p1() where p1 is having 1,00,000 create and drop tables.
10617 'main_mem_root' is freed only at the end of the command CALL p1()'s
10618 execution. But binlog_accessed_db_names list scope is only till the
10619 individual statements specified the procedure(create/drop statements).
10620 Hence the memory allocated in 'main_mem_root' was left uncleared
10621 until the p1's completion, even though it is not required after
10622 completion of individual statements.
10623
10624 Instead of using 'main_mem_root' whose scope is complete query execution,
10625 now the memroot is changed to use 'thd->mem_root' whose scope is until the
10626 individual statement in CALL p1(). 'thd->mem_root' is set to 'execute_mem_root'
10627 in the context of procedure and it's scope is till the individual statement
10628 in CALL p1() and thd->memroot is equal to 'main_mem_root' in the context
10629 of a normal 'top level query'.
10630
10631 Eg: a) create table t1(i int); => If this function is called while
10632 processing this statement, thd->memroot is equal to &main_mem_root
10633 which will be freed immediately after executing this statement.
10634 b) CALL p1() -> p1 contains create table t1(i int); => If this function
10635 is called while processing create table statement which is inside
10636 a stored procedure, then thd->memroot is equal to 'execute_mem_root'
10637 which will be freed immediately after executing this statement.
10638 In both a and b case, thd->memroot will be freed immediately and will not
10639 increase memory consumption.
10640
10641 A special case(stored functions/triggers):
10642 Consider the following example:
10643 create function f1(i int) returns int
10644 begin
10645 insert into db1.t1 values (1);
10646 insert into db2.t1 values (2);
10647 end;
10648 When we are processing SELECT f1(), the list should contain db1, db2 names.
10649 Since thd->mem_root contains 'execute_mem_root' in the context of
10650 stored function, the mem root will be freed after adding db1 in
10651 the list and when we are processing the second statement and when we try
10652 to add 'db2' in the db1's list, it will lead to crash as db1's memory
10653 is already freed. To handle this special case, if in_sub_stmt is set
10654 (which is true incase of stored functions/triggers), we use &main_mem_root,
10655 if not set we will use thd->memroot which changes it's value to
10656 'execute_mem_root' or '&main_mem_root' depends on the context.
10657 */
10658 MEM_ROOT *db_mem_root= in_sub_stmt ? &main_mem_root : mem_root;
10659
10660 if (!binlog_accessed_db_names)
10661 binlog_accessed_db_names= new (db_mem_root) List<char>;
10662
10663 if (binlog_accessed_db_names->elements > MAX_DBS_IN_EVENT_MTS)
10664 {
10665 push_warning_printf(this, Sql_condition::SL_WARNING,
10666 ER_MTS_UPDATED_DBS_GREATER_MAX,
10667 ER(ER_MTS_UPDATED_DBS_GREATER_MAX),
10668 MAX_DBS_IN_EVENT_MTS);
10669 return;
10670 }
10671
10672 after_db= strdup_root(db_mem_root, db_param);
10673
10674 /*
10675 sorted insertion is implemented with first rearranging data
10676 (pointer to char*) of the links and final appending of the least
10677 ordered data to create a new link in the list.
10678 */
10679 if (binlog_accessed_db_names->elements != 0)
10680 {
10681 List_iterator<char> it(*get_binlog_accessed_db_names());
10682
10683 while (it++)
10684 {
10685 char *swap= NULL;
10686 char **ref_cur_db= it.ref();
10687 int cmp= strcmp(after_db, *ref_cur_db);
10688
10689 assert(!swap || cmp < 0);
10690
10691 if (cmp == 0)
10692 {
10693 after_db= NULL; /* dup to ignore */
10694 break;
10695 }
10696 else if (swap || cmp > 0)
10697 {
10698 swap= *ref_cur_db;
10699 *ref_cur_db= after_db;
10700 after_db= swap;
10701 }
10702 }
10703 }
10704 if (after_db)
10705 binlog_accessed_db_names->push_back(after_db, db_mem_root);
10706 }
10707
10708 /*
10709 Tells if two (or more) tables have auto_increment columns and we want to
10710 lock those tables with a write lock.
10711
10712 SYNOPSIS
10713 has_two_write_locked_tables_with_auto_increment
10714 tables Table list
10715
10716 NOTES:
10717 Call this function only when you have established the list of all tables
10718 which you'll want to update (including stored functions, triggers, views
10719 inside your statement).
10720 */
10721
10722 static bool
has_write_table_with_auto_increment(TABLE_LIST * tables)10723 has_write_table_with_auto_increment(TABLE_LIST *tables)
10724 {
10725 for (TABLE_LIST *table= tables; table; table= table->next_global)
10726 {
10727 /* we must do preliminary checks as table->table may be NULL */
10728 if (!table->is_placeholder() &&
10729 table->table->found_next_number_field &&
10730 (table->lock_type >= TL_WRITE_ALLOW_WRITE))
10731 return 1;
10732 }
10733
10734 return 0;
10735 }
10736
10737 /*
10738 checks if we have select tables in the table list and write tables
10739 with auto-increment column.
10740
10741 SYNOPSIS
10742 has_two_write_locked_tables_with_auto_increment_and_select
10743 tables Table list
10744
10745 RETURN VALUES
10746
10747 -true if the table list has atleast one table with auto-increment column
10748
10749
10750 and atleast one table to select from.
10751 -false otherwise
10752 */
10753
10754 static bool
has_write_table_with_auto_increment_and_select(TABLE_LIST * tables)10755 has_write_table_with_auto_increment_and_select(TABLE_LIST *tables)
10756 {
10757 bool has_select= false;
10758 bool has_auto_increment_tables = has_write_table_with_auto_increment(tables);
10759 for(TABLE_LIST *table= tables; table; table= table->next_global)
10760 {
10761 if (!table->is_placeholder() &&
10762 (table->lock_type <= TL_READ_NO_INSERT))
10763 {
10764 has_select= true;
10765 break;
10766 }
10767 }
10768 return(has_select && has_auto_increment_tables);
10769 }
10770
10771 /*
10772 Tells if there is a table whose auto_increment column is a part
10773 of a compound primary key while is not the first column in
10774 the table definition.
10775
10776 @param tables Table list
10777
10778 @return true if the table exists, fais if does not.
10779 */
10780
10781 static bool
has_write_table_auto_increment_not_first_in_pk(TABLE_LIST * tables)10782 has_write_table_auto_increment_not_first_in_pk(TABLE_LIST *tables)
10783 {
10784 for (TABLE_LIST *table= tables; table; table= table->next_global)
10785 {
10786 /* we must do preliminary checks as table->table may be NULL */
10787 if (!table->is_placeholder() &&
10788 table->table->found_next_number_field &&
10789 (table->lock_type >= TL_WRITE_ALLOW_WRITE)
10790 && table->table->s->next_number_keypart != 0)
10791 return 1;
10792 }
10793
10794 return 0;
10795 }
10796
10797 /*
10798 Function to check whether the table in query uses a fulltext parser
10799 plugin or not.
10800
10801 @param s - table share pointer.
10802
10803 @retval TRUE - The table uses fulltext parser plugin.
10804 @retval FALSE - Otherwise.
10805 */
fulltext_unsafe_set(TABLE_SHARE * s)10806 static bool inline fulltext_unsafe_set(TABLE_SHARE *s)
10807 {
10808 for (unsigned int i= 0 ; i < s->keys ; i++)
10809 {
10810 if ((s->key_info[i].flags & HA_USES_PARSER) && s->keys_in_use.is_set(i))
10811 return TRUE;
10812 }
10813 return FALSE;
10814 }
10815 #ifndef NDEBUG
get_locked_tables_mode_name(enum_locked_tables_mode locked_tables_mode)10816 const char * get_locked_tables_mode_name(enum_locked_tables_mode locked_tables_mode)
10817 {
10818 switch (locked_tables_mode)
10819 {
10820 case LTM_NONE:
10821 return "LTM_NONE";
10822 case LTM_LOCK_TABLES:
10823 return "LTM_LOCK_TABLES";
10824 case LTM_PRELOCKED:
10825 return "LTM_PRELOCKED";
10826 case LTM_PRELOCKED_UNDER_LOCK_TABLES:
10827 return "LTM_PRELOCKED_UNDER_LOCK_TABLES";
10828 default:
10829 return "Unknown table lock mode";
10830 }
10831 }
10832 #endif
10833
10834 /**
10835 Decide on logging format to use for the statement and issue errors
10836 or warnings as needed. The decision depends on the following
10837 parameters:
10838
10839 - The logging mode, i.e., the value of binlog_format. Can be
10840 statement, mixed, or row.
10841
10842 - The type of statement. There are three types of statements:
10843 "normal" safe statements; unsafe statements; and row injections.
10844 An unsafe statement is one that, if logged in statement format,
10845 might produce different results when replayed on the slave (e.g.,
10846 queries with a LIMIT clause). A row injection is either a BINLOG
10847 statement, or a row event executed by the slave's SQL thread.
10848
10849 - The capabilities of tables modified by the statement. The
10850 *capabilities vector* for a table is a set of flags associated
10851 with the table. Currently, it only includes two flags: *row
10852 capability flag* and *statement capability flag*.
10853
10854 The row capability flag is set if and only if the engine can
10855 handle row-based logging. The statement capability flag is set if
10856 and only if the table can handle statement-based logging.
10857
10858 Decision table for logging format
10859 ---------------------------------
10860
10861 The following table summarizes how the format and generated
10862 warning/error depends on the tables' capabilities, the statement
10863 type, and the current binlog_format.
10864
10865 Row capable N NNNNNNNNN YYYYYYYYY YYYYYYYYY
10866 Statement capable N YYYYYYYYY NNNNNNNNN YYYYYYYYY
10867
10868 Statement type * SSSUUUIII SSSUUUIII SSSUUUIII
10869
10870 binlog_format * SMRSMRSMR SMRSMRSMR SMRSMRSMR
10871
10872 Logged format - SS-S----- -RR-RR-RR SRRSRR-RR
10873 Warning/Error 1 --2732444 5--5--6-- ---7--6--
10874
10875 Legend
10876 ------
10877
10878 Row capable: N - Some table not row-capable, Y - All tables row-capable
10879 Stmt capable: N - Some table not stmt-capable, Y - All tables stmt-capable
10880 Statement type: (S)afe, (U)nsafe, or Row (I)njection
10881 binlog_format: (S)TATEMENT, (M)IXED, or (R)OW
10882 Logged format: (S)tatement or (R)ow
10883 Warning/Error: Warnings and error messages are as follows:
10884
10885 1. Error: Cannot execute statement: binlogging impossible since both
10886 row-incapable engines and statement-incapable engines are
10887 involved.
10888
10889 2. Error: Cannot execute statement: binlogging impossible since
10890 BINLOG_FORMAT = ROW and at least one table uses a storage engine
10891 limited to statement-logging.
10892
10893 3. Error: Cannot execute statement: binlogging of unsafe statement
10894 is impossible when storage engine is limited to statement-logging
10895 and BINLOG_FORMAT = MIXED.
10896
10897 4. Error: Cannot execute row injection: binlogging impossible since
10898 at least one table uses a storage engine limited to
10899 statement-logging.
10900
10901 5. Error: Cannot execute statement: binlogging impossible since
10902 BINLOG_FORMAT = STATEMENT and at least one table uses a storage
10903 engine limited to row-logging.
10904
10905 6. Error: Cannot execute row injection: binlogging impossible since
10906 BINLOG_FORMAT = STATEMENT.
10907
10908 7. Warning: Unsafe statement binlogged in statement format since
10909 BINLOG_FORMAT = STATEMENT.
10910
10911 In addition, we can produce the following error (not depending on
10912 the variables of the decision diagram):
10913
10914 8. Error: Cannot execute statement: binlogging impossible since more
10915 than one engine is involved and at least one engine is
10916 self-logging.
10917
10918 9. Error: Do not allow users to modify a gtid_executed table
10919 explicitly by a XA transaction.
10920
10921 For each error case above, the statement is prevented from being
10922 logged, we report an error, and roll back the statement. For
10923 warnings, we set the thd->binlog_flags variable: the warning will be
10924 printed only if the statement is successfully logged.
10925
10926 @see THD::binlog_query
10927
10928 @param[in] thd Client thread
10929 @param[in] tables Tables involved in the query
10930
10931 @retval 0 No error; statement can be logged.
10932 @retval -1 One of the error conditions above applies (1, 2, 4, 5, 6 or 9).
10933 */
10934
decide_logging_format(TABLE_LIST * tables)10935 int THD::decide_logging_format(TABLE_LIST *tables)
10936 {
10937 DBUG_ENTER("THD::decide_logging_format");
10938 DBUG_PRINT("info", ("query: %s", query().str));
10939 DBUG_PRINT("info", ("variables.binlog_format: %lu",
10940 variables.binlog_format));
10941 DBUG_PRINT("info", ("lex->get_stmt_unsafe_flags(): 0x%x",
10942 lex->get_stmt_unsafe_flags()));
10943
10944 DEBUG_SYNC(current_thd, "begin_decide_logging_format");
10945
10946 reset_binlog_local_stmt_filter();
10947
10948 /*
10949 We should not decide logging format if the binlog is closed or
10950 binlogging is off, or if the statement is filtered out from the
10951 binlog by filtering rules.
10952 */
10953 #ifdef WITH_WSREP
10954 if ((WSREP_EMULATE_BINLOG_NNULL(this) ||
10955 (mysql_bin_log.is_open() && (variables.option_bits & OPTION_BIN_LOG))) &&
10956 !(WSREP_BINLOG_FORMAT(variables.binlog_format) == BINLOG_FORMAT_STMT &&
10957 !binlog_filter->db_ok(m_db.str)))
10958 #else
10959 if (mysql_bin_log.is_open() && (variables.option_bits & OPTION_BIN_LOG) &&
10960 !(variables.binlog_format == BINLOG_FORMAT_STMT &&
10961 !binlog_filter->db_ok(m_db.str)))
10962 #endif /* WITH_WSREP */
10963 {
10964 /*
10965 Compute one bit field with the union of all the engine
10966 capabilities, and one with the intersection of all the engine
10967 capabilities.
10968 */
10969 handler::Table_flags flags_write_some_set= 0;
10970 handler::Table_flags flags_access_some_set= 0;
10971 handler::Table_flags flags_write_all_set=
10972 HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE;
10973
10974 /*
10975 If different types of engines are about to be updated.
10976 For example: Innodb and Falcon; Innodb and MyIsam.
10977 */
10978 my_bool multi_write_engine= FALSE;
10979 /*
10980 If different types of engines are about to be accessed
10981 and any of them is about to be updated. For example:
10982 Innodb and Falcon; Innodb and MyIsam.
10983 */
10984 my_bool multi_access_engine= FALSE;
10985 /*
10986 Identifies if a table is changed.
10987 */
10988 my_bool is_write= FALSE;
10989 /*
10990 A pointer to a previous table that was changed.
10991 */
10992 TABLE* prev_write_table= NULL;
10993 /*
10994 A pointer to a previous table that was accessed.
10995 */
10996 TABLE* prev_access_table= NULL;
10997 /*
10998 True if at least one table is transactional.
10999 */
11000 bool write_to_some_transactional_table= false;
11001 /*
11002 True if at least one table is non-transactional.
11003 */
11004 bool write_to_some_non_transactional_table= false;
11005 /*
11006 True if all non-transactional tables that has been updated
11007 are temporary.
11008 */
11009 bool write_all_non_transactional_are_tmp_tables= true;
11010 /**
11011 The number of tables used in the current statement,
11012 that should be replicated.
11013 */
11014 uint replicated_tables_count= 0;
11015 /**
11016 The number of tables written to in the current statement,
11017 that should not be replicated.
11018 A table should not be replicated when it is considered
11019 'local' to a MySQL instance.
11020 Currently, these tables are:
11021 - mysql.slow_log
11022 - mysql.general_log
11023 - mysql.slave_relay_log_info
11024 - mysql.slave_master_info
11025 - mysql.slave_worker_info
11026 - performance_schema.*
11027 - TODO: information_schema.*
11028 In practice, from this list, only performance_schema.* tables
11029 are written to by user queries.
11030 */
11031 uint non_replicated_tables_count= 0;
11032 /**
11033 Indicate whether we alreadly reported a warning
11034 on modifying gtid_executed table.
11035 */
11036 int warned_gtid_executed_table= 0;
11037 #ifndef NDEBUG
11038 {
11039 DBUG_PRINT("debug", ("prelocked_mode: %s",
11040 get_locked_tables_mode_name(locked_tables_mode)));
11041 }
11042 #endif
11043
11044 if (variables.binlog_format != BINLOG_FORMAT_ROW && tables)
11045 {
11046 /*
11047 DML statements that modify a table with an auto_increment column based on
11048 rows selected from a table are unsafe as the order in which the rows are
11049 fetched fron the select tables cannot be determined and may differ on
11050 master and slave.
11051 */
11052 if (has_write_table_with_auto_increment_and_select(tables))
11053 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_WRITE_AUTOINC_SELECT);
11054
11055 if (has_write_table_auto_increment_not_first_in_pk(tables))
11056 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_NOT_FIRST);
11057
11058 /*
11059 A query that modifies autoinc column in sub-statement can make the
11060 master and slave inconsistent.
11061 We can solve these problems in mixed mode by switching to binlogging
11062 if at least one updated table is used by sub-statement
11063 */
11064 if (lex->requires_prelocking() &&
11065 has_write_table_with_auto_increment(lex->first_not_own_table()))
11066 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_COLUMNS);
11067 }
11068
11069 /*
11070 Get the capabilities vector for all involved storage engines and
11071 mask out the flags for the binary log.
11072 */
11073 for (TABLE_LIST *table= tables; table; table= table->next_global)
11074 {
11075 if (table->is_placeholder())
11076 continue;
11077
11078 handler::Table_flags const flags= table->table->file->ha_table_flags();
11079
11080 DBUG_PRINT("info", ("table: %s; ha_table_flags: 0x%llx",
11081 table->table_name, flags));
11082
11083 if (table->table->no_replicate)
11084 {
11085 if (!warned_gtid_executed_table)
11086 {
11087 warned_gtid_executed_table=
11088 gtid_state->warn_or_err_on_modify_gtid_table(this, table);
11089 /*
11090 Do not allow users to modify the gtid_executed table
11091 explicitly by a XA transaction.
11092 */
11093 if (warned_gtid_executed_table == 2)
11094 DBUG_RETURN(-1);
11095 }
11096 /*
11097 The statement uses a table that is not replicated.
11098 The following properties about the table:
11099 - persistent / transient
11100 - transactional / non transactional
11101 - temporary / permanent
11102 - read or write
11103 - multiple engines involved because of this table
11104 are not relevant, as this table is completely ignored.
11105 Because the statement uses a non replicated table,
11106 using STATEMENT format in the binlog is impossible.
11107 Either this statement will be discarded entirely,
11108 or it will be logged (possibly partially) in ROW format.
11109 */
11110 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_TABLE);
11111
11112 if (table->lock_type >= TL_WRITE_ALLOW_WRITE)
11113 {
11114 non_replicated_tables_count++;
11115 continue;
11116 }
11117 }
11118
11119 replicated_tables_count++;
11120
11121 my_bool trans= table->table->file->has_transactions();
11122
11123 if (table->lock_type >= TL_WRITE_ALLOW_WRITE)
11124 {
11125 write_to_some_transactional_table=
11126 write_to_some_transactional_table || trans;
11127
11128 write_to_some_non_transactional_table=
11129 write_to_some_non_transactional_table || !trans;
11130
11131 if (prev_write_table && prev_write_table->file->ht !=
11132 table->table->file->ht)
11133 multi_write_engine= TRUE;
11134
11135 if (table->table->s->tmp_table)
11136 lex->set_stmt_accessed_table(trans ? LEX::STMT_WRITES_TEMP_TRANS_TABLE :
11137 LEX::STMT_WRITES_TEMP_NON_TRANS_TABLE);
11138 else
11139 lex->set_stmt_accessed_table(trans ? LEX::STMT_WRITES_TRANS_TABLE :
11140 LEX::STMT_WRITES_NON_TRANS_TABLE);
11141
11142 /*
11143 Non-transactional updates are allowed when row binlog format is
11144 used and all non-transactional tables are temporary.
11145 Binlog format is checked on THD::is_dml_gtid_compatible() method.
11146 */
11147 if (!trans)
11148 write_all_non_transactional_are_tmp_tables=
11149 write_all_non_transactional_are_tmp_tables &&
11150 table->table->s->tmp_table;
11151
11152 flags_write_all_set &= flags;
11153 flags_write_some_set |= flags;
11154 is_write= TRUE;
11155
11156 prev_write_table= table->table;
11157
11158 /*
11159 It should be marked unsafe if a table which uses a fulltext parser
11160 plugin is modified. See also bug#48183.
11161 */
11162 if (!lex->is_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_FULLTEXT_PLUGIN))
11163 {
11164 if (fulltext_unsafe_set(table->table->s))
11165 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_FULLTEXT_PLUGIN);
11166 }
11167 /*
11168 INSERT...ON DUPLICATE KEY UPDATE on a table with more than one unique keys
11169 can be unsafe. Check for it if the flag is already not marked for the
11170 given statement.
11171 */
11172 if (!lex->is_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS) &&
11173 lex->sql_command == SQLCOM_INSERT && lex->duplicates == DUP_UPDATE)
11174 {
11175 uint keys= table->table->s->keys, i= 0, unique_keys= 0;
11176 for (KEY* keyinfo= table->table->s->key_info;
11177 i < keys && unique_keys <= 1; i++, keyinfo++)
11178 {
11179 if (keyinfo->flags & HA_NOSAME)
11180 unique_keys++;
11181 }
11182 if (unique_keys > 1 )
11183 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS);
11184 }
11185 }
11186 if(lex->get_using_match())
11187 {
11188 if (fulltext_unsafe_set(table->table->s))
11189 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_FULLTEXT_PLUGIN);
11190 }
11191
11192 flags_access_some_set |= flags;
11193
11194 if (lex->sql_command != SQLCOM_CREATE_TABLE ||
11195 (lex->sql_command == SQLCOM_CREATE_TABLE &&
11196 (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE)))
11197 {
11198 if (table->table->s->tmp_table)
11199 lex->set_stmt_accessed_table(trans ? LEX::STMT_READS_TEMP_TRANS_TABLE :
11200 LEX::STMT_READS_TEMP_NON_TRANS_TABLE);
11201 else
11202 lex->set_stmt_accessed_table(trans ? LEX::STMT_READS_TRANS_TABLE :
11203 LEX::STMT_READS_NON_TRANS_TABLE);
11204 }
11205
11206 if (prev_access_table && prev_access_table->file->ht !=
11207 table->table->file->ht)
11208 multi_access_engine= TRUE;
11209
11210 prev_access_table= table->table;
11211 }
11212 assert(!is_write ||
11213 write_to_some_transactional_table ||
11214 write_to_some_non_transactional_table);
11215 /*
11216 write_all_non_transactional_are_tmp_tables may be true if any
11217 non-transactional table was not updated, so we fix its value here.
11218 */
11219 write_all_non_transactional_are_tmp_tables=
11220 write_all_non_transactional_are_tmp_tables &&
11221 write_to_some_non_transactional_table;
11222
11223 DBUG_PRINT("info", ("flags_write_all_set: 0x%llx", flags_write_all_set));
11224 DBUG_PRINT("info", ("flags_write_some_set: 0x%llx", flags_write_some_set));
11225 DBUG_PRINT("info", ("flags_access_some_set: 0x%llx", flags_access_some_set));
11226 DBUG_PRINT("info", ("multi_write_engine: %d", multi_write_engine));
11227 DBUG_PRINT("info", ("multi_access_engine: %d", multi_access_engine));
11228
11229 int error= 0;
11230 int unsafe_flags;
11231
11232 bool multi_stmt_trans= in_multi_stmt_transaction_mode();
11233 bool trans_table= trans_has_updated_trans_table(this);
11234 bool binlog_direct= variables.binlog_direct_non_trans_update;
11235
11236 if (lex->is_mixed_stmt_unsafe(multi_stmt_trans, binlog_direct,
11237 trans_table, tx_isolation))
11238 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_MIXED_STATEMENT);
11239 else if (multi_stmt_trans && trans_table && !binlog_direct &&
11240 lex->stmt_accessed_table(LEX::STMT_WRITES_NON_TRANS_TABLE))
11241 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_NONTRANS_AFTER_TRANS);
11242
11243 /*
11244 If more than one engine is involved in the statement and at
11245 least one is doing it's own logging (is *self-logging*), the
11246 statement cannot be logged atomically, so we generate an error
11247 rather than allowing the binlog to become corrupt.
11248 */
11249 if (multi_write_engine &&
11250 (flags_write_some_set & HA_HAS_OWN_BINLOGGING))
11251 my_error((error= ER_BINLOG_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE),
11252 MYF(0));
11253 else if (multi_access_engine && flags_access_some_set & HA_HAS_OWN_BINLOGGING)
11254 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE);
11255
11256 /* XA is unsafe for statements */
11257 if (is_write &&
11258 !get_transaction()->xid_state()->has_state(XID_STATE::XA_NOTR))
11259 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_XA);
11260
11261 DBUG_EXECUTE_IF("make_stmt_only_engines",
11262 {
11263 flags_write_all_set= HA_BINLOG_STMT_CAPABLE;
11264 };);
11265
11266 /* both statement-only and row-only engines involved */
11267 if ((flags_write_all_set & (HA_BINLOG_STMT_CAPABLE | HA_BINLOG_ROW_CAPABLE)) == 0)
11268 {
11269 /*
11270 1. Error: Binary logging impossible since both row-incapable
11271 engines and statement-incapable engines are involved
11272 */
11273 my_error((error= ER_BINLOG_ROW_ENGINE_AND_STMT_ENGINE), MYF(0));
11274 }
11275 /* statement-only engines involved */
11276 else if ((flags_write_all_set & HA_BINLOG_ROW_CAPABLE) == 0)
11277 {
11278 if (lex->is_stmt_row_injection())
11279 {
11280 /*
11281 4. Error: Cannot execute row injection since table uses
11282 storage engine limited to statement-logging
11283 */
11284 my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_ENGINE), MYF(0));
11285 }
11286 else if (WSREP_BINLOG_FORMAT(variables.binlog_format) == BINLOG_FORMAT_ROW &&
11287 sqlcom_can_generate_row_events(this->lex->sql_command))
11288 {
11289 /*
11290 2. Error: Cannot modify table that uses a storage engine
11291 limited to statement-logging when BINLOG_FORMAT = ROW
11292 */
11293 my_error((error= ER_BINLOG_ROW_MODE_AND_STMT_ENGINE), MYF(0));
11294 }
11295 else if (variables.binlog_format == BINLOG_FORMAT_MIXED &&
11296 ((unsafe_flags= lex->get_stmt_unsafe_flags()) != 0))
11297 {
11298 /*
11299 3. Error: Cannot execute statement: binlogging of unsafe
11300 statement is impossible when storage engine is limited to
11301 statement-logging and BINLOG_FORMAT = MIXED.
11302 */
11303 for (int unsafe_type= 0;
11304 unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT;
11305 unsafe_type++)
11306 if (unsafe_flags & (1 << unsafe_type))
11307 my_error((error= ER_BINLOG_UNSAFE_AND_STMT_ENGINE), MYF(0),
11308 ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
11309 }
11310 else if (is_write && ((unsafe_flags= lex->get_stmt_unsafe_flags()) != 0))
11311 {
11312 /*
11313 7. Warning: Unsafe statement logged as statement due to
11314 binlog_format = STATEMENT
11315 */
11316 binlog_unsafe_warning_flags|= unsafe_flags;
11317 DBUG_PRINT("info", ("Scheduling warning to be issued by "
11318 "binlog_query: '%s'",
11319 ER(ER_BINLOG_UNSAFE_STATEMENT)));
11320 DBUG_PRINT("info", ("binlog_unsafe_warning_flags: 0x%x",
11321 binlog_unsafe_warning_flags));
11322 }
11323 /* log in statement format! */
11324 }
11325 /* no statement-only engines */
11326 else
11327 {
11328 /* binlog_format = STATEMENT */
11329 if (WSREP_BINLOG_FORMAT(variables.binlog_format) == BINLOG_FORMAT_STMT)
11330 {
11331 if (lex->is_stmt_row_injection())
11332 {
11333 /*
11334 6. Error: Cannot execute row injection since
11335 BINLOG_FORMAT = STATEMENT
11336 */
11337 my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_MODE), MYF(0));
11338 }
11339 else if ((flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0 &&
11340 sqlcom_can_generate_row_events(this->lex->sql_command))
11341 {
11342 /*
11343 5. Error: Cannot modify table that uses a storage engine
11344 limited to row-logging when binlog_format = STATEMENT
11345 */
11346 #ifdef WITH_WSREP
11347 if (!WSREP_NNULL(this) || wsrep_exec_mode == LOCAL_STATE)
11348 {
11349 #endif /* WITH_WSREP */
11350 my_error((error= ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), "");
11351 #ifdef WITH_WSREP
11352 }
11353 #endif /* WITH_WSREP */
11354 }
11355 else if (is_write && (unsafe_flags= lex->get_stmt_unsafe_flags()) != 0)
11356 {
11357 /*
11358 7. Warning: Unsafe statement logged as statement due to
11359 binlog_format = STATEMENT
11360 */
11361 binlog_unsafe_warning_flags|= unsafe_flags;
11362 DBUG_PRINT("info", ("Scheduling warning to be issued by "
11363 "binlog_query: '%s'",
11364 ER(ER_BINLOG_UNSAFE_STATEMENT)));
11365 DBUG_PRINT("info", ("binlog_unsafe_warning_flags: 0x%x",
11366 binlog_unsafe_warning_flags));
11367 }
11368 /* log in statement format! */
11369 }
11370 /* No statement-only engines and binlog_format != STATEMENT.
11371 I.e., nothing prevents us from row logging if needed. */
11372 else
11373 {
11374 if (lex->is_stmt_unsafe() || lex->is_stmt_row_injection()
11375 || (flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0)
11376 {
11377 #ifndef NDEBUG
11378 int flags= lex->get_stmt_unsafe_flags();
11379 DBUG_PRINT("info", ("setting row format for unsafe statement"));
11380 for (int i= 0; i < Query_tables_list::BINLOG_STMT_UNSAFE_COUNT; i++)
11381 {
11382 if (flags & (1 << i))
11383 DBUG_PRINT("info", ("unsafe reason: %s",
11384 ER(Query_tables_list::binlog_stmt_unsafe_errcode[i])));
11385 }
11386 DBUG_PRINT("info", ("is_row_injection=%d",
11387 lex->is_stmt_row_injection()));
11388 DBUG_PRINT("info", ("stmt_capable=%llu",
11389 (flags_write_all_set & HA_BINLOG_STMT_CAPABLE)));
11390 #endif
11391 /* log in row format! */
11392 set_current_stmt_binlog_format_row_if_mixed();
11393 }
11394 }
11395 }
11396
11397 if (non_replicated_tables_count > 0)
11398 {
11399 if ((replicated_tables_count == 0) || ! is_write)
11400 {
11401 DBUG_PRINT("info", ("decision: no logging, no replicated table affected"));
11402 set_binlog_local_stmt_filter();
11403 }
11404 else
11405 {
11406 if (! is_current_stmt_binlog_format_row())
11407 {
11408 my_error((error= ER_BINLOG_STMT_MODE_AND_NO_REPL_TABLES), MYF(0));
11409 }
11410 else
11411 {
11412 clear_binlog_local_stmt_filter();
11413 }
11414 }
11415 }
11416 else
11417 {
11418 clear_binlog_local_stmt_filter();
11419 }
11420
11421 if (!error &&
11422 !is_dml_gtid_compatible(write_to_some_transactional_table,
11423 write_to_some_non_transactional_table,
11424 write_all_non_transactional_are_tmp_tables))
11425 error= 1;
11426
11427 if (error) {
11428 DBUG_PRINT("info", ("decision: no logging since an error was generated"));
11429 DBUG_RETURN(-1);
11430 }
11431
11432 if (is_write &&
11433 lex->sql_command != SQLCOM_END /* rows-event applying by slave */)
11434 {
11435 /*
11436 Master side of DML in the STMT format events parallelization.
11437 All involving table db:s are stored in a abc-ordered name list.
11438 In case the number of databases exceeds MAX_DBS_IN_EVENT_MTS maximum
11439 the list gathering breaks since it won't be sent to the slave.
11440 */
11441 for (TABLE_LIST *table= tables; table; table= table->next_global)
11442 {
11443 if (table->is_placeholder())
11444 continue;
11445
11446 assert(table->table);
11447
11448 if (table->table->file->referenced_by_foreign_key())
11449 {
11450 /*
11451 FK-referenced dbs can't be gathered currently. The following
11452 event will be marked for sequential execution on slave.
11453 */
11454 binlog_accessed_db_names= NULL;
11455 add_to_binlog_accessed_dbs("");
11456 break;
11457 }
11458 if (!is_current_stmt_binlog_format_row())
11459 add_to_binlog_accessed_dbs(table->db);
11460 }
11461 }
11462 DBUG_PRINT("info", ("decision: logging in %s format",
11463 is_current_stmt_binlog_format_row() ?
11464 "ROW" : "STATEMENT"));
11465
11466 if (variables.binlog_format == BINLOG_FORMAT_ROW &&
11467 (lex->sql_command == SQLCOM_UPDATE ||
11468 lex->sql_command == SQLCOM_UPDATE_MULTI ||
11469 lex->sql_command == SQLCOM_DELETE ||
11470 lex->sql_command == SQLCOM_DELETE_MULTI))
11471 {
11472 String table_names;
11473 /*
11474 Generate a warning for UPDATE/DELETE statements that modify a
11475 BLACKHOLE table, as row events are not logged in row format.
11476 */
11477 for (TABLE_LIST *table= tables; table; table= table->next_global)
11478 {
11479 if (table->is_placeholder())
11480 continue;
11481 if (table->table->file->ht->db_type == DB_TYPE_BLACKHOLE_DB &&
11482 table->lock_type >= TL_WRITE_ALLOW_WRITE)
11483 {
11484 table_names.append(table->table_name);
11485 table_names.append(",");
11486 }
11487 }
11488 if (!table_names.is_empty())
11489 {
11490 bool is_update= (lex->sql_command == SQLCOM_UPDATE ||
11491 lex->sql_command == SQLCOM_UPDATE_MULTI);
11492 /*
11493 Replace the last ',' with '.' for table_names
11494 */
11495 table_names.replace(table_names.length()-1, 1, ".", 1);
11496 push_warning_printf(this, Sql_condition::SL_WARNING,
11497 WARN_ON_BLOCKHOLE_IN_RBR,
11498 ER(WARN_ON_BLOCKHOLE_IN_RBR),
11499 is_update ? "UPDATE" : "DELETE",
11500 table_names.c_ptr());
11501 }
11502 }
11503 }
11504 else
11505 {
11506 DBUG_PRINT("info", ("decision: no logging since "
11507 "mysql_bin_log.is_open() = %d "
11508 "and (options & OPTION_BIN_LOG) = 0x%llx "
11509 "and binlog_format = %lu "
11510 "and binlog_filter->db_ok(db) = %d",
11511 mysql_bin_log.is_open(),
11512 (variables.option_bits & OPTION_BIN_LOG),
11513 variables.binlog_format,
11514 binlog_filter->db_ok(m_db.str)));
11515
11516 for (TABLE_LIST *table= tables; table; table= table->next_global)
11517 {
11518 if (!table->is_placeholder() && table->table->no_replicate &&
11519 gtid_state->warn_or_err_on_modify_gtid_table(this, table))
11520 break;
11521 }
11522 }
11523
11524 DEBUG_SYNC(current_thd, "end_decide_logging_format");
11525
11526 DBUG_RETURN(0);
11527 }
11528
11529
11530 /**
11531 Given that a possible violation of gtid consistency has happened,
11532 checks if gtid-inconsistencies are forbidden by the current value of
11533 ENFORCE_GTID_CONSISTENCY and GTID_MODE. If forbidden, generates
11534 error or warning accordingly.
11535
11536 @param thd The thread that has issued the GTID-violating statement.
11537
11538 @param error_code The error code to use, if error or warning is to
11539 be generated.
11540
11541 @retval false Error was generated.
11542 @retval true No error was generated (possibly a warning was generated).
11543 */
handle_gtid_consistency_violation(THD * thd,int error_code)11544 static bool handle_gtid_consistency_violation(THD *thd, int error_code)
11545 {
11546 DBUG_ENTER("handle_gtid_consistency_violation");
11547
11548 enum_group_type gtid_next_type= thd->variables.gtid_next.type;
11549 global_sid_lock->rdlock();
11550 enum_gtid_consistency_mode gtid_consistency_mode=
11551 get_gtid_consistency_mode();
11552 enum_gtid_mode gtid_mode= get_gtid_mode(GTID_MODE_LOCK_SID);
11553
11554 DBUG_PRINT("info", ("gtid_next.type=%d gtid_mode=%s "
11555 "gtid_consistency_mode=%d error=%d query=%s",
11556 gtid_next_type,
11557 get_gtid_mode_string(gtid_mode),
11558 gtid_consistency_mode,
11559 error_code,
11560 thd->query().str));
11561
11562 /*
11563 GTID violations should generate error if:
11564 - GTID_MODE=ON or ON_PERMISSIVE and GTID_NEXT='AUTOMATIC' (since the
11565 transaction is expected to commit using a GTID), or
11566 - GTID_NEXT='UUID:NUMBER' (since the transaction is expected to
11567 commit usinga GTID), or
11568 - ENFORCE_GTID_CONSISTENCY=ON.
11569 */
11570 if ((gtid_next_type == AUTOMATIC_GROUP &&
11571 gtid_mode >= GTID_MODE_ON_PERMISSIVE) ||
11572 gtid_next_type == GTID_GROUP ||
11573 gtid_consistency_mode == GTID_CONSISTENCY_MODE_ON)
11574 {
11575 global_sid_lock->unlock();
11576 my_error(error_code, MYF(0));
11577 DBUG_RETURN(false);
11578 }
11579 else
11580 {
11581 /*
11582 If we are not generating an error, we must increase the counter
11583 of GTID-violating transactions. This will prevent a concurrent
11584 client from executing a SET GTID_MODE or SET
11585 ENFORCE_GTID_CONSISTENCY statement that would be incompatible
11586 with this transaction.
11587
11588 If the transaction had already been accounted as a gtid violating
11589 transaction, then don't increment the counters, just issue the
11590 warning below. This prevents calling
11591 begin_automatic_gtid_violating_transaction or
11592 begin_anonymous_gtid_violating_transaction multiple times for the
11593 same transaction, which would make the counter go out of sync.
11594 */
11595 if (!thd->has_gtid_consistency_violation)
11596 {
11597 if (gtid_next_type == AUTOMATIC_GROUP)
11598 gtid_state->begin_automatic_gtid_violating_transaction();
11599 else
11600 {
11601 assert(gtid_next_type == ANONYMOUS_GROUP);
11602 gtid_state->begin_anonymous_gtid_violating_transaction();
11603 }
11604
11605 /*
11606 If a transaction generates multiple GTID violation conditions,
11607 it must still only update the counters once. Hence we use
11608 this per-thread flag to keep track of whether the thread has a
11609 consistency or not. This function must only be called if the
11610 transaction does not already have a GTID violation.
11611 */
11612 thd->has_gtid_consistency_violation= true;
11613 }
11614
11615 global_sid_lock->unlock();
11616
11617 // Generate warning if ENFORCE_GTID_CONSISTENCY = WARN.
11618 if (gtid_consistency_mode == GTID_CONSISTENCY_MODE_WARN)
11619 {
11620 // Need to print to log so that replication admin knows when users
11621 // have adjusted their workloads.
11622 sql_print_warning("%s", ER(error_code));
11623 // Need to print to client so that users can adjust their workload.
11624 push_warning(thd, Sql_condition::SL_WARNING, error_code, ER(error_code));
11625 }
11626 DBUG_RETURN(true);
11627 }
11628 }
11629
11630
is_ddl_gtid_compatible()11631 bool THD::is_ddl_gtid_compatible()
11632 {
11633 DBUG_ENTER("THD::is_ddl_gtid_compatible");
11634
11635 // If @@session.sql_log_bin has been manually turned off (only
11636 // doable by SUPER), then no problem, we can execute any statement.
11637 if ((variables.option_bits & OPTION_BIN_LOG) == 0 ||
11638 mysql_bin_log.is_open() == false)
11639 DBUG_RETURN(true);
11640
11641 DBUG_PRINT("info",
11642 ("SQLCOM_CREATE:%d CREATE-TMP:%d SELECT:%d SQLCOM_DROP:%d DROP-TMP:%d trx:%d",
11643 lex->sql_command == SQLCOM_CREATE_TABLE,
11644 (lex->sql_command == SQLCOM_CREATE_TABLE &&
11645 (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE)),
11646 lex->select_lex->item_list.elements,
11647 lex->sql_command == SQLCOM_DROP_TABLE,
11648 (lex->sql_command == SQLCOM_DROP_TABLE && lex->drop_temporary),
11649 in_multi_stmt_transaction_mode()));
11650
11651 if (lex->sql_command == SQLCOM_CREATE_TABLE &&
11652 !(lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) &&
11653 lex->select_lex->item_list.elements)
11654 {
11655 /*
11656 CREATE ... SELECT (without TEMPORARY) is unsafe because if
11657 binlog_format=row it will be logged as a CREATE TABLE followed
11658 by row events, re-executed non-atomically as two transactions,
11659 and then written to the slave's binary log as two separate
11660 transactions with the same GTID.
11661 */
11662 bool ret= handle_gtid_consistency_violation(
11663 this, ER_GTID_UNSAFE_CREATE_SELECT);
11664 DBUG_RETURN(ret);
11665 }
11666 else if ((lex->sql_command == SQLCOM_CREATE_TABLE &&
11667 (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) != 0) ||
11668 (lex->sql_command == SQLCOM_DROP_TABLE && lex->drop_temporary))
11669 {
11670 /*
11671 [CREATE|DROP] TEMPORARY TABLE is unsafe to execute
11672 inside a transaction because the table will stay and the
11673 transaction will be written to the slave's binary log with the
11674 GTID even if the transaction is rolled back.
11675 This includes the execution inside Functions and Triggers.
11676 */
11677 if (in_multi_stmt_transaction_mode() || in_sub_stmt)
11678 {
11679 bool ret= handle_gtid_consistency_violation(
11680 this, ER_GTID_UNSAFE_CREATE_DROP_TEMPORARY_TABLE_IN_TRANSACTION);
11681 DBUG_RETURN(ret);
11682 }
11683 }
11684 DBUG_RETURN(true);
11685 }
11686
11687
11688 bool
is_dml_gtid_compatible(bool some_transactional_table,bool some_non_transactional_table,bool non_transactional_tables_are_tmp)11689 THD::is_dml_gtid_compatible(bool some_transactional_table,
11690 bool some_non_transactional_table,
11691 bool non_transactional_tables_are_tmp)
11692 {
11693 DBUG_ENTER("THD::is_dml_gtid_compatible(bool, bool, bool)");
11694
11695 // If @@session.sql_log_bin has been manually turned off (only
11696 // doable by SUPER), then no problem, we can execute any statement.
11697 if ((variables.option_bits & OPTION_BIN_LOG) == 0 ||
11698 mysql_bin_log.is_open() == false)
11699 DBUG_RETURN(true);
11700
11701 /*
11702 Single non-transactional updates are allowed when not mixed
11703 together with transactional statements within a transaction.
11704 Furthermore, writing to transactional and non-transactional
11705 engines in a single statement is also disallowed.
11706 Multi-statement transactions on non-transactional tables are
11707 split into single-statement transactions when
11708 GTID_NEXT = "AUTOMATIC".
11709
11710 Non-transactional updates are allowed when row binlog format is
11711 used and all non-transactional tables are temporary.
11712
11713 The debug symbol "allow_gtid_unsafe_non_transactional_updates"
11714 disables the error. This is useful because it allows us to run
11715 old tests that were not written with the restrictions of GTIDs in
11716 mind.
11717 */
11718 DBUG_PRINT("info", ("some_non_transactional_table=%d "
11719 "some_transactional_table=%d "
11720 "trans_has_updated_trans_table=%d "
11721 "non_transactional_tables_are_tmp=%d "
11722 "is_current_stmt_binlog_format_row=%d",
11723 some_non_transactional_table,
11724 some_transactional_table,
11725 trans_has_updated_trans_table(this),
11726 non_transactional_tables_are_tmp,
11727 is_current_stmt_binlog_format_row()));
11728 if (some_non_transactional_table &&
11729 (some_transactional_table || trans_has_updated_trans_table(this)) &&
11730 !(non_transactional_tables_are_tmp &&
11731 is_current_stmt_binlog_format_row()) &&
11732 !DBUG_EVALUATE_IF("allow_gtid_unsafe_non_transactional_updates", 1, 0))
11733 {
11734 DBUG_RETURN(handle_gtid_consistency_violation(
11735 this, ER_GTID_UNSAFE_NON_TRANSACTIONAL_TABLE));
11736 }
11737
11738 DBUG_RETURN(true);
11739 }
11740
11741 /*
11742 Implementation of interface to write rows to the binary log through the
11743 thread. The thread is responsible for writing the rows it has
11744 inserted/updated/deleted.
11745 */
11746
11747 #ifndef MYSQL_CLIENT
11748
11749 /*
11750 Template member function for ensuring that there is an rows log
11751 event of the apropriate type before proceeding.
11752
11753 PRE CONDITION:
11754 - Events of type 'RowEventT' have the type code 'type_code'.
11755
11756 POST CONDITION:
11757 If a non-NULL pointer is returned, the pending event for thread 'thd' will
11758 be an event of type 'RowEventT' (which have the type code 'type_code')
11759 will either empty or have enough space to hold 'needed' bytes. In
11760 addition, the columns bitmap will be correct for the row, meaning that
11761 the pending event will be flushed if the columns in the event differ from
11762 the columns suppled to the function.
11763
11764 RETURNS
11765 If no error, a non-NULL pending event (either one which already existed or
11766 the newly created one).
11767 If error, NULL.
11768 */
11769
11770 template <class RowsEventT> Rows_log_event*
binlog_prepare_pending_rows_event(TABLE * table,uint32 serv_id,size_t needed,bool is_transactional,RowsEventT * hint MY_ATTRIBUTE ((unused)),const uchar * extra_row_info)11771 THD::binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id,
11772 size_t needed,
11773 bool is_transactional,
11774 RowsEventT *hint MY_ATTRIBUTE((unused)),
11775 const uchar* extra_row_info)
11776 {
11777 DBUG_ENTER("binlog_prepare_pending_rows_event");
11778
11779 /* Fetch the type code for the RowsEventT template parameter */
11780 int const general_type_code= RowsEventT::TYPE_CODE;
11781
11782 Rows_log_event* pending= binlog_get_pending_rows_event(is_transactional);
11783
11784 if (unlikely(pending && !pending->is_valid()))
11785 DBUG_RETURN(NULL);
11786
11787 /*
11788 Check if the current event is non-NULL and a write-rows
11789 event. Also check if the table provided is mapped: if it is not,
11790 then we have switched to writing to a new table.
11791 If there is no pending event, we need to create one. If there is a pending
11792 event, but it's not about the same table id, or not of the same type
11793 (between Write, Update and Delete), or not the same affected columns, or
11794 going to be too big, flush this event to disk and create a new pending
11795 event.
11796 */
11797 if (!pending ||
11798 pending->server_id != serv_id ||
11799 pending->get_table_id() != table->s->table_map_id ||
11800 pending->get_general_type_code() != general_type_code ||
11801 pending->get_data_size() + needed > opt_binlog_rows_event_max_size ||
11802 pending->read_write_bitmaps_cmp(table) == FALSE ||
11803 !binlog_row_event_extra_data_eq(pending->get_extra_row_data(),
11804 extra_row_info))
11805 {
11806 /* Create a new RowsEventT... */
11807 Rows_log_event* const
11808 ev= new RowsEventT(this, table, table->s->table_map_id,
11809 is_transactional, extra_row_info);
11810 if (unlikely(!ev))
11811 DBUG_RETURN(NULL);
11812 ev->server_id= serv_id; // I don't like this, it's too easy to forget.
11813 /*
11814 flush the pending event and replace it with the newly created
11815 event...
11816 */
11817 if (unlikely(
11818 mysql_bin_log.flush_and_set_pending_rows_event(this, ev,
11819 is_transactional)))
11820 {
11821 delete ev;
11822 DBUG_RETURN(NULL);
11823 }
11824
11825 DBUG_RETURN(ev); /* This is the new pending event */
11826 }
11827 DBUG_RETURN(pending); /* This is the current pending event */
11828 }
11829
11830 /* Declare in unnamed namespace. */
11831 namespace {
11832
11833 /**
11834 Class to handle temporary allocation of memory for row data.
11835
11836 The responsibilities of the class is to provide memory for
11837 packing one or two rows of packed data (depending on what
11838 constructor is called).
11839
11840 In order to make the allocation more efficient for "simple" rows,
11841 i.e., rows that do not contain any blobs, a pointer to the
11842 allocated memory is of memory is stored in the table structure
11843 for simple rows. If memory for a table containing a blob field
11844 is requested, only memory for that is allocated, and subsequently
11845 released when the object is destroyed.
11846
11847 */
11848 class Row_data_memory {
11849 public:
11850 /**
11851 Build an object to keep track of a block-local piece of memory
11852 for storing a row of data.
11853
11854 @param table
11855 Table where the pre-allocated memory is stored.
11856
11857 @param length
11858 Length of data that is needed, if the record contain blobs.
11859 */
Row_data_memory(TABLE * table,size_t const len1)11860 Row_data_memory(TABLE *table, size_t const len1)
11861 : m_memory(0)
11862 {
11863 #ifndef NDEBUG
11864 m_alloc_checked= FALSE;
11865 #endif
11866 allocate_memory(table, len1);
11867 m_ptr[0]= has_memory() ? m_memory : 0;
11868 m_ptr[1]= 0;
11869 }
11870
Row_data_memory(TABLE * table,size_t const len1,size_t const len2)11871 Row_data_memory(TABLE *table, size_t const len1, size_t const len2)
11872 : m_memory(0)
11873 {
11874 #ifndef NDEBUG
11875 m_alloc_checked= FALSE;
11876 #endif
11877 allocate_memory(table, len1 + len2);
11878 m_ptr[0]= has_memory() ? m_memory : 0;
11879 m_ptr[1]= has_memory() ? m_memory + len1 : 0;
11880 }
11881
~Row_data_memory()11882 ~Row_data_memory()
11883 {
11884 if (m_memory != 0 && m_release_memory_on_destruction)
11885 my_free(m_memory);
11886 }
11887
11888 /**
11889 Is there memory allocated?
11890
11891 @retval true There is memory allocated
11892 @retval false Memory allocation failed
11893 */
has_memory() const11894 bool has_memory() const {
11895 #ifndef NDEBUG
11896 m_alloc_checked= TRUE;
11897 #endif
11898 return m_memory != 0;
11899 }
11900
slot(uint s)11901 uchar *slot(uint s)
11902 {
11903 assert(s < sizeof(m_ptr)/sizeof(*m_ptr));
11904 assert(m_ptr[s] != 0);
11905 assert(m_alloc_checked == TRUE);
11906 return m_ptr[s];
11907 }
11908
11909 private:
allocate_memory(TABLE * const table,size_t const total_length)11910 void allocate_memory(TABLE *const table, size_t const total_length)
11911 {
11912 if (table->s->blob_fields == 0)
11913 {
11914 /*
11915 The maximum length of a packed record is less than this
11916 length. We use this value instead of the supplied length
11917 when allocating memory for records, since we don't know how
11918 the memory will be used in future allocations.
11919
11920 Since table->s->reclength is for unpacked records, we have
11921 to add two bytes for each field, which can potentially be
11922 added to hold the length of a packed field.
11923 */
11924 size_t const maxlen= table->s->reclength + 2 * table->s->fields;
11925
11926 /*
11927 Allocate memory for two records if memory hasn't been
11928 allocated. We allocate memory for two records so that it can
11929 be used when processing update rows as well.
11930 */
11931 if (table->write_row_record == 0)
11932 table->write_row_record=
11933 (uchar *) alloc_root(&table->mem_root, 2 * maxlen);
11934 m_memory= table->write_row_record;
11935 m_release_memory_on_destruction= FALSE;
11936 }
11937 else
11938 {
11939 m_memory= (uchar *) my_malloc(key_memory_Row_data_memory_memory,
11940 total_length, MYF(MY_WME));
11941 m_release_memory_on_destruction= TRUE;
11942 }
11943 }
11944
11945 #ifndef NDEBUG
11946 mutable bool m_alloc_checked;
11947 #endif
11948 bool m_release_memory_on_destruction;
11949 uchar *m_memory;
11950 uchar *m_ptr[2];
11951 };
11952
11953 } // namespace
11954
binlog_write_row(TABLE * table,bool is_trans,uchar const * record,const uchar * extra_row_info)11955 int THD::binlog_write_row(TABLE* table, bool is_trans,
11956 uchar const *record,
11957 const uchar* extra_row_info)
11958 {
11959 #ifdef WITH_WSREP
11960 assert(is_current_stmt_binlog_format_row() &&
11961 ((WSREP_EMULATE_BINLOG_NNULL(this) || mysql_bin_log.is_open())));
11962 #else
11963 assert(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
11964 #endif /* WITH_WSREP */
11965
11966 /*
11967 Pack records into format for transfer. We are allocating more
11968 memory than needed, but that doesn't matter.
11969 */
11970 Row_data_memory memory(table, max_row_length(table, record));
11971 if (!memory.has_memory())
11972 return HA_ERR_OUT_OF_MEM;
11973
11974 uchar *row_data= memory.slot(0);
11975
11976 size_t const len= pack_row(table, table->write_set, row_data, record);
11977
11978 Rows_log_event* const ev=
11979 binlog_prepare_pending_rows_event(table, server_id, len, is_trans,
11980 static_cast<Write_rows_log_event*>(0),
11981 extra_row_info);
11982
11983 if (unlikely(ev == 0))
11984 return HA_ERR_OUT_OF_MEM;
11985
11986 return ev->add_row_data(row_data, len);
11987 }
11988
binlog_update_row(TABLE * table,bool is_trans,const uchar * before_record,const uchar * after_record,const uchar * extra_row_info)11989 int THD::binlog_update_row(TABLE* table, bool is_trans,
11990 const uchar *before_record,
11991 const uchar *after_record,
11992 const uchar* extra_row_info)
11993 {
11994 #ifdef WITH_WSREP
11995 assert(is_current_stmt_binlog_format_row() &&
11996 ((WSREP_EMULATE_BINLOG_NNULL(this) || mysql_bin_log.is_open())));
11997 #else
11998 assert(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
11999 #endif /* WITH_WSREP */
12000
12001 int error= 0;
12002
12003 /**
12004 Save a reference to the original read and write set bitmaps.
12005 We will need this to restore the bitmaps at the end.
12006 */
12007 MY_BITMAP *old_read_set= table->read_set;
12008 MY_BITMAP *old_write_set= table->write_set;
12009
12010 /**
12011 This will remove spurious fields required during execution but
12012 not needed for binlogging. This is done according to the:
12013 binlog-row-image option.
12014 */
12015 binlog_prepare_row_images(table);
12016
12017 size_t const before_maxlen = max_row_length(table, before_record);
12018 size_t const after_maxlen = max_row_length(table, after_record);
12019
12020 Row_data_memory row_data(table, before_maxlen, after_maxlen);
12021 if (!row_data.has_memory())
12022 return HA_ERR_OUT_OF_MEM;
12023
12024 uchar *before_row= row_data.slot(0);
12025 uchar *after_row= row_data.slot(1);
12026
12027 size_t const before_size= pack_row(table, table->read_set, before_row,
12028 before_record);
12029 size_t const after_size= pack_row(table, table->write_set, after_row,
12030 after_record);
12031
12032 DBUG_DUMP("before_record", before_record, table->s->reclength);
12033 DBUG_DUMP("after_record", after_record, table->s->reclength);
12034 DBUG_DUMP("before_row", before_row, before_size);
12035 DBUG_DUMP("after_row", after_row, after_size);
12036
12037 Rows_log_event* const ev=
12038 binlog_prepare_pending_rows_event(table, server_id,
12039 before_size + after_size, is_trans,
12040 static_cast<Update_rows_log_event*>(0),
12041 extra_row_info);
12042
12043 if (unlikely(ev == 0))
12044 return HA_ERR_OUT_OF_MEM;
12045
12046 error= ev->add_row_data(before_row, before_size) ||
12047 ev->add_row_data(after_row, after_size);
12048
12049 /* restore read/write set for the rest of execution */
12050 table->column_bitmaps_set_no_signal(old_read_set,
12051 old_write_set);
12052
12053 bitmap_clear_all(&table->tmp_set);
12054
12055 return error;
12056 }
12057
binlog_delete_row(TABLE * table,bool is_trans,uchar const * record,const uchar * extra_row_info)12058 int THD::binlog_delete_row(TABLE* table, bool is_trans,
12059 uchar const *record,
12060 const uchar* extra_row_info)
12061 {
12062 #ifdef WITH_WSREP
12063 assert(is_current_stmt_binlog_format_row() &&
12064 ((WSREP_EMULATE_BINLOG_NNULL(this) || mysql_bin_log.is_open())));
12065 #else
12066 assert(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
12067 #endif /* WITH_WSREP */
12068
12069 int error= 0;
12070
12071 /**
12072 Save a reference to the original read and write set bitmaps.
12073 We will need this to restore the bitmaps at the end.
12074 */
12075 MY_BITMAP *old_read_set= table->read_set;
12076 MY_BITMAP *old_write_set= table->write_set;
12077
12078 /**
12079 This will remove spurious fields required during execution but
12080 not needed for binlogging. This is done according to the:
12081 binlog-row-image option.
12082 */
12083 binlog_prepare_row_images(table);
12084
12085 /*
12086 Pack records into format for transfer. We are allocating more
12087 memory than needed, but that doesn't matter.
12088 */
12089 Row_data_memory memory(table, max_row_length(table, record));
12090 if (unlikely(!memory.has_memory()))
12091 return HA_ERR_OUT_OF_MEM;
12092
12093 uchar *row_data= memory.slot(0);
12094
12095 DBUG_DUMP("table->read_set", (uchar*) table->read_set->bitmap, (table->s->fields + 7) / 8);
12096 size_t const len= pack_row(table, table->read_set, row_data, record);
12097
12098 Rows_log_event* const ev=
12099 binlog_prepare_pending_rows_event(table, server_id, len, is_trans,
12100 static_cast<Delete_rows_log_event*>(0),
12101 extra_row_info);
12102
12103 if (unlikely(ev == 0))
12104 return HA_ERR_OUT_OF_MEM;
12105
12106 error= ev->add_row_data(row_data, len);
12107
12108 /* restore read/write set for the rest of execution */
12109 table->column_bitmaps_set_no_signal(old_read_set,
12110 old_write_set);
12111
12112 bitmap_clear_all(&table->tmp_set);
12113 return error;
12114 }
12115
binlog_prepare_row_images(TABLE * table)12116 void THD::binlog_prepare_row_images(TABLE *table)
12117 {
12118 DBUG_ENTER("THD::binlog_prepare_row_images");
12119 /**
12120 Remove from read_set spurious columns. The write_set has been
12121 handled before in table->mark_columns_needed_for_update.
12122 */
12123
12124 DBUG_PRINT_BITSET("debug", "table->read_set (before preparing): %s", table->read_set);
12125 THD *thd= table->in_use;
12126
12127 /**
12128 if there is a primary key in the table (ie, user declared PK or a
12129 non-null unique index) and we dont want to ship the entire image,
12130 and the handler involved supports this.
12131 */
12132 if (table->s->primary_key < MAX_KEY &&
12133 (thd->variables.binlog_row_image < BINLOG_ROW_IMAGE_FULL) &&
12134 !ha_check_storage_engine_flag(table->s->db_type(), HTON_NO_BINLOG_ROW_OPT))
12135 {
12136 /**
12137 Just to be sure that tmp_set is currently not in use as
12138 the read_set already.
12139 */
12140 assert(table->read_set != &table->tmp_set);
12141 // Verify it's not used
12142 assert(bitmap_is_clear_all(&table->tmp_set));
12143
12144 switch(thd->variables.binlog_row_image)
12145 {
12146 case BINLOG_ROW_IMAGE_MINIMAL:
12147 /* MINIMAL: Mark only PK */
12148 table->mark_columns_used_by_index_no_reset(table->s->primary_key,
12149 &table->tmp_set);
12150 break;
12151 case BINLOG_ROW_IMAGE_NOBLOB:
12152 /**
12153 NOBLOB: Remove unnecessary BLOB fields from read_set
12154 (the ones that are not part of PK).
12155 */
12156 bitmap_union(&table->tmp_set, table->read_set);
12157 for (Field **ptr=table->field ; *ptr ; ptr++)
12158 {
12159 Field *field= (*ptr);
12160 if ((field->type() == MYSQL_TYPE_BLOB) &&
12161 !(field->flags & PRI_KEY_FLAG))
12162 bitmap_clear_bit(&table->tmp_set, field->field_index);
12163 }
12164 break;
12165 default:
12166 assert(0); // impossible.
12167 }
12168
12169 /* set the temporary read_set */
12170 table->column_bitmaps_set_no_signal(&table->tmp_set,
12171 table->write_set);
12172 }
12173
12174 DBUG_PRINT_BITSET("debug", "table->read_set (after preparing): %s", table->read_set);
12175 DBUG_VOID_RETURN;
12176 }
12177
12178
binlog_flush_pending_rows_event(bool stmt_end,bool is_transactional)12179 int THD::binlog_flush_pending_rows_event(bool stmt_end, bool is_transactional)
12180 {
12181 DBUG_ENTER("THD::binlog_flush_pending_rows_event");
12182 /*
12183 We shall flush the pending event even if we are not in row-based
12184 mode: it might be the case that we left row-based mode before
12185 flushing anything (e.g., if we have explicitly locked tables).
12186 */
12187 #ifdef WITH_WSREP
12188 if (!(WSREP_EMULATE_BINLOG_NNULL(this) || mysql_bin_log.is_open()))
12189 #else
12190 if (!mysql_bin_log.is_open())
12191 #endif /* WITH_WSREP */
12192 DBUG_RETURN(0);
12193
12194 /*
12195 Mark the event as the last event of a statement if the stmt_end
12196 flag is set.
12197 */
12198 int error= 0;
12199 if (Rows_log_event *pending= binlog_get_pending_rows_event(is_transactional))
12200 {
12201 if (stmt_end)
12202 {
12203 pending->set_flags(Rows_log_event::STMT_END_F);
12204 binlog_table_maps= 0;
12205 }
12206
12207 error= mysql_bin_log.flush_and_set_pending_rows_event(this, 0,
12208 is_transactional);
12209 }
12210
12211 DBUG_RETURN(error);
12212 }
12213
12214
12215 /**
12216 binlog_row_event_extra_data_eq
12217
12218 Comparator for two binlog row event extra data
12219 pointers.
12220
12221 It compares their significant bytes.
12222
12223 Null pointers are acceptable
12224
12225 @param a
12226 first pointer
12227
12228 @param b
12229 first pointer
12230
12231 @return
12232 true if the referenced structures are equal
12233 */
12234 bool
binlog_row_event_extra_data_eq(const uchar * a,const uchar * b)12235 THD::binlog_row_event_extra_data_eq(const uchar* a,
12236 const uchar* b)
12237 {
12238 return ((a == b) ||
12239 ((a != NULL) &&
12240 (b != NULL) &&
12241 (a[EXTRA_ROW_INFO_LEN_OFFSET] ==
12242 b[EXTRA_ROW_INFO_LEN_OFFSET]) &&
12243 (memcmp(a, b,
12244 a[EXTRA_ROW_INFO_LEN_OFFSET]) == 0)));
12245 }
12246
12247 #if !defined(NDEBUG)
12248 static const char *
show_query_type(THD::enum_binlog_query_type qtype)12249 show_query_type(THD::enum_binlog_query_type qtype)
12250 {
12251 switch (qtype) {
12252 case THD::ROW_QUERY_TYPE:
12253 return "ROW";
12254 case THD::STMT_QUERY_TYPE:
12255 return "STMT";
12256 case THD::QUERY_TYPE_COUNT:
12257 default:
12258 assert(0 <= qtype && qtype < THD::QUERY_TYPE_COUNT);
12259 }
12260 static char buf[64];
12261 sprintf(buf, "UNKNOWN#%d", qtype);
12262 return buf;
12263 }
12264 #endif
12265
12266 /**
12267 Auxiliary function to reset the limit unsafety warning suppression.
12268 */
reset_binlog_unsafe_suppression()12269 static void reset_binlog_unsafe_suppression()
12270 {
12271 DBUG_ENTER("reset_binlog_unsafe_suppression");
12272 unsafe_warning_suppression_is_activated= false;
12273 limit_unsafe_warning_count= 0;
12274 limit_unsafe_suppression_start_time= my_getsystime()/10000000;
12275 DBUG_VOID_RETURN;
12276 }
12277
12278 /**
12279 Auxiliary function to print warning in the error log.
12280 */
print_unsafe_warning_to_log(int unsafe_type,char * buf,const char * query)12281 static void print_unsafe_warning_to_log(int unsafe_type, char* buf,
12282 const char* query)
12283 {
12284 DBUG_ENTER("print_unsafe_warning_in_log");
12285 sprintf(buf, ER(ER_BINLOG_UNSAFE_STATEMENT),
12286 ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
12287 sql_print_warning(ER(ER_MESSAGE_AND_STATEMENT), buf, query);
12288 DBUG_VOID_RETURN;
12289 }
12290
12291 /**
12292 Auxiliary function to check if the warning for limit unsafety should be
12293 thrown or suppressed. Details of the implementation can be found in the
12294 comments inline.
12295
12296 @params
12297 buf - buffer to hold the warning message text
12298 unsafe_type - The type of unsafety.
12299 query - The actual query statement.
12300
12301 TODO: Remove this function and implement a general service for all warnings
12302 that would prevent flooding the error log. => switch to log_throttle class?
12303 */
do_unsafe_limit_checkout(char * buf,int unsafe_type,const char * query)12304 static void do_unsafe_limit_checkout(char* buf, int unsafe_type, const char* query)
12305 {
12306 ulonglong now;
12307 DBUG_ENTER("do_unsafe_limit_checkout");
12308 assert(unsafe_type == LEX::BINLOG_STMT_UNSAFE_LIMIT);
12309 limit_unsafe_warning_count++;
12310 /*
12311 INITIALIZING:
12312 If this is the first time this function is called with log warning
12313 enabled, the monitoring the unsafe warnings should start.
12314 */
12315 if (limit_unsafe_suppression_start_time == 0)
12316 {
12317 limit_unsafe_suppression_start_time= my_getsystime()/10000000;
12318 print_unsafe_warning_to_log(unsafe_type, buf, query);
12319 }
12320 else
12321 {
12322 if (!unsafe_warning_suppression_is_activated)
12323 print_unsafe_warning_to_log(unsafe_type, buf, query);
12324
12325 if (limit_unsafe_warning_count >=
12326 LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT)
12327 {
12328 now= my_getsystime()/10000000;
12329 if (!unsafe_warning_suppression_is_activated)
12330 {
12331 /*
12332 ACTIVATION:
12333 We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT warnings in
12334 less than LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT we activate the
12335 suppression.
12336 */
12337 if ((now-limit_unsafe_suppression_start_time) <=
12338 LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT)
12339 {
12340 unsafe_warning_suppression_is_activated= true;
12341 DBUG_PRINT("info",("A warning flood has been detected and the limit \
12342 unsafety warning suppression has been activated."));
12343 }
12344 else
12345 {
12346 /*
12347 there is no flooding till now, therefore we restart the monitoring
12348 */
12349 limit_unsafe_suppression_start_time= my_getsystime()/10000000;
12350 limit_unsafe_warning_count= 0;
12351 }
12352 }
12353 else
12354 {
12355 /*
12356 Print the suppression note and the unsafe warning.
12357 */
12358 sql_print_information("The following warning was suppressed %d times \
12359 during the last %d seconds in the error log",
12360 limit_unsafe_warning_count,
12361 (int)
12362 (now-limit_unsafe_suppression_start_time));
12363 print_unsafe_warning_to_log(unsafe_type, buf, query);
12364 /*
12365 DEACTIVATION: We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT
12366 warnings in more than LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT, the
12367 suppression should be deactivated.
12368 */
12369 if ((now - limit_unsafe_suppression_start_time) >
12370 LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT)
12371 {
12372 reset_binlog_unsafe_suppression();
12373 DBUG_PRINT("info",("The limit unsafety warning supression has been \
12374 deactivated"));
12375 }
12376 }
12377 limit_unsafe_warning_count= 0;
12378 }
12379 }
12380 DBUG_VOID_RETURN;
12381 }
12382
12383 /**
12384 Auxiliary method used by @c binlog_query() to raise warnings.
12385
12386 The type of warning and the type of unsafeness is stored in
12387 THD::binlog_unsafe_warning_flags.
12388 */
issue_unsafe_warnings()12389 void THD::issue_unsafe_warnings()
12390 {
12391 char buf[MYSQL_ERRMSG_SIZE * 2];
12392 DBUG_ENTER("issue_unsafe_warnings");
12393 /*
12394 Ensure that binlog_unsafe_warning_flags is big enough to hold all
12395 bits. This is actually a constant expression.
12396 */
12397 assert(LEX::BINLOG_STMT_UNSAFE_COUNT <=
12398 sizeof(binlog_unsafe_warning_flags) * CHAR_BIT);
12399
12400 uint32 unsafe_type_flags= binlog_unsafe_warning_flags;
12401
12402 /*
12403 For each unsafe_type, check if the statement is unsafe in this way
12404 and issue a warning.
12405 */
12406 for (int unsafe_type=0;
12407 unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT;
12408 unsafe_type++)
12409 {
12410 if ((unsafe_type_flags & (1 << unsafe_type)) != 0)
12411 {
12412 push_warning_printf(this, Sql_condition::SL_NOTE,
12413 ER_BINLOG_UNSAFE_STATEMENT,
12414 ER(ER_BINLOG_UNSAFE_STATEMENT),
12415 ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
12416 if (log_error_verbosity > 1 && opt_log_unsafe_statements)
12417 {
12418 if (unsafe_type == LEX::BINLOG_STMT_UNSAFE_LIMIT)
12419 do_unsafe_limit_checkout( buf, unsafe_type, query().str);
12420 else //cases other than LIMIT unsafety
12421 print_unsafe_warning_to_log(unsafe_type, buf, query().str);
12422 }
12423 }
12424 }
12425 DBUG_VOID_RETURN;
12426 }
12427
12428 /**
12429 Log the current query.
12430
12431 The query will be logged in either row format or statement format
12432 depending on the value of @c current_stmt_binlog_format_row field and
12433 the value of the @c qtype parameter.
12434
12435 This function must be called:
12436
12437 - After the all calls to ha_*_row() functions have been issued.
12438
12439 - After any writes to system tables. Rationale: if system tables
12440 were written after a call to this function, and the master crashes
12441 after the call to this function and before writing the system
12442 tables, then the master and slave get out of sync.
12443
12444 - Before tables are unlocked and closed.
12445
12446 @see decide_logging_format
12447
12448 @retval 0 Success
12449
12450 @retval nonzero If there is a failure when writing the query (e.g.,
12451 write failure), then the error code is returned.
12452 */
binlog_query(THD::enum_binlog_query_type qtype,const char * query_arg,size_t query_len,bool is_trans,bool direct,bool suppress_use,int errcode)12453 int THD::binlog_query(THD::enum_binlog_query_type qtype, const char *query_arg,
12454 size_t query_len, bool is_trans, bool direct,
12455 bool suppress_use, int errcode)
12456 {
12457 DBUG_ENTER("THD::binlog_query");
12458 DBUG_PRINT("enter", ("qtype: %s query: '%s'",
12459 show_query_type(qtype), query_arg));
12460 #ifdef WITH_WSREP
12461 assert(query_arg && (WSREP_EMULATE_BINLOG_NNULL(this)
12462 || mysql_bin_log.is_open()));
12463 #else
12464 assert(query_arg && mysql_bin_log.is_open());
12465 #endif /* WITH_WSREP */
12466
12467 if (get_binlog_local_stmt_filter() == BINLOG_FILTER_SET)
12468 {
12469 /*
12470 The current statement is to be ignored, and not written to
12471 the binlog. Do not call issue_unsafe_warnings().
12472 */
12473 DBUG_RETURN(0);
12474 }
12475
12476 /*
12477 If we are not in prelocked mode, mysql_unlock_tables() will be
12478 called after this binlog_query(), so we have to flush the pending
12479 rows event with the STMT_END_F set to unlock all tables at the
12480 slave side as well.
12481
12482 If we are in prelocked mode, the flushing will be done inside the
12483 top-most close_thread_tables().
12484 */
12485 if (this->locked_tables_mode <= LTM_LOCK_TABLES)
12486 if (int error= binlog_flush_pending_rows_event(TRUE, is_trans))
12487 DBUG_RETURN(error);
12488
12489 /*
12490 Warnings for unsafe statements logged in statement format are
12491 printed in three places instead of in decide_logging_format().
12492 This is because the warnings should be printed only if the statement
12493 is actually logged. When executing decide_logging_format(), we cannot
12494 know for sure if the statement will be logged:
12495
12496 1 - sp_head::execute_procedure which prints out warnings for calls to
12497 stored procedures.
12498
12499 2 - sp_head::execute_function which prints out warnings for calls
12500 involving functions.
12501
12502 3 - THD::binlog_query (here) which prints warning for top level
12503 statements not covered by the two cases above: i.e., if not insided a
12504 procedure and a function.
12505
12506 Besides, we should not try to print these warnings if it is not
12507 possible to write statements to the binary log as it happens when
12508 the execution is inside a function, or generaly speaking, when
12509 the variables.option_bits & OPTION_BIN_LOG is false.
12510 */
12511 if ((variables.option_bits & OPTION_BIN_LOG) &&
12512 sp_runtime_ctx == NULL && !binlog_evt_union.do_union)
12513 issue_unsafe_warnings();
12514
12515 switch (qtype) {
12516 /*
12517 ROW_QUERY_TYPE means that the statement may be logged either in
12518 row format or in statement format. If
12519 current_stmt_binlog_format is row, it means that the
12520 statement has already been logged in row format and hence shall
12521 not be logged again.
12522 */
12523 case THD::ROW_QUERY_TYPE:
12524 DBUG_PRINT("debug",
12525 ("is_current_stmt_binlog_format_row: %d",
12526 is_current_stmt_binlog_format_row()));
12527 if (is_current_stmt_binlog_format_row())
12528 DBUG_RETURN(0);
12529 /* Fall through */
12530
12531 /*
12532 STMT_QUERY_TYPE means that the query must be logged in statement
12533 format; it cannot be logged in row format. This is typically
12534 used by DDL statements. It is an error to use this query type
12535 if current_stmt_binlog_format_row is row.
12536
12537 @todo Currently there are places that call this method with
12538 STMT_QUERY_TYPE and current_stmt_binlog_format is row. Fix those
12539 places and add assert to ensure correct behavior. /Sven
12540 */
12541 case THD::STMT_QUERY_TYPE:
12542 /*
12543 The MYSQL_BIN_LOG::write() function will set the STMT_END_F flag and
12544 flush the pending rows event if necessary.
12545 */
12546 {
12547 Query_log_event qinfo(this, query_arg, query_len, is_trans, direct,
12548 suppress_use, errcode);
12549 /*
12550 Binlog table maps will be irrelevant after a Query_log_event
12551 (they are just removed on the slave side) so after the query
12552 log event is written to the binary log, we pretend that no
12553 table maps were written.
12554 */
12555 int error= mysql_bin_log.write_event(&qinfo);
12556 binlog_table_maps= 0;
12557 DBUG_RETURN(error);
12558 }
12559 break;
12560
12561 case THD::QUERY_TYPE_COUNT:
12562 default:
12563 assert(0 <= qtype && qtype < QUERY_TYPE_COUNT);
12564 }
12565 DBUG_RETURN(0);
12566 }
12567
12568 #endif /* !defined(MYSQL_CLIENT) */
12569 #ifdef WITH_WSREP
get_trans_log(THD * thd,bool transaction)12570 IO_CACHE * get_trans_log(THD * thd, bool transaction)
12571 {
12572 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
12573 if (cache_mngr)
12574 {
12575 return cache_mngr->get_binlog_cache_log(transaction);
12576 }
12577 else
12578 {
12579 WSREP_DEBUG("binlog cache not initialized, conn :%u", thd->thread_id());
12580 return NULL;
12581 }
12582 }
12583
wsrep_trans_cache_is_empty(THD * thd)12584 bool wsrep_trans_cache_is_empty(THD *thd)
12585 {
12586 binlog_cache_mngr *const cache_mngr=
12587 (binlog_cache_mngr*) thd_get_ha_data(thd, binlog_hton);
12588 return (!cache_mngr || cache_mngr->trx_cache.is_binlog_empty());
12589 }
12590
thd_binlog_flush_pending_rows_event(THD * thd,bool stmt_end)12591 void thd_binlog_flush_pending_rows_event(THD *thd, bool stmt_end)
12592 {
12593 thd->binlog_flush_pending_rows_event(stmt_end);
12594 }
thd_binlog_trx_reset(THD * thd)12595 void thd_binlog_trx_reset(THD * thd)
12596 {
12597 /*
12598 todo: fix autocommit select to not call the caller
12599 */
12600 if (thd_get_ha_data(thd, binlog_hton) != NULL)
12601 {
12602 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
12603 if (cache_mngr)
12604 {
12605 cache_mngr->trx_cache.reset();
12606 if (!cache_mngr->stmt_cache.is_binlog_empty())
12607 {
12608 WSREP_DEBUG("pending events in stmt cache, sql: %s", WSREP_QUERY(thd));
12609 cache_mngr->stmt_cache.reset();
12610 }
12611 }
12612 }
12613 thd->clear_binlog_table_maps();
12614 }
12615
wsrep_thd_binlog_commit(THD * thd,bool all)12616 TC_LOG::enum_result wsrep_thd_binlog_commit(THD* thd, bool all)
12617 {
12618 /* binlog commit is called for wsrep replication to happen
12619 - applier and replayer can skip binlog commit
12620 - also if node is not joined, replication must be skipped
12621 */
12622 if (WSREP_EMULATE_BINLOG(thd) && (thd->wsrep_exec_mode != REPL_RECV) &&
12623 wsrep_ready_get())
12624 return mysql_bin_log.commit(thd, all);
12625 else
12626 return (ha_commit_low(thd, all) ?
12627 TC_LOG::RESULT_ABORTED : TC_LOG::RESULT_SUCCESS);
12628 }
12629
wsrep_thd_binlog_rollback(THD * thd,bool all)12630 int wsrep_thd_binlog_rollback(THD* thd, bool all)
12631 {
12632 /* binlog rollback is called for wsrep replication to happen
12633 - applier and replayer can skip binlog commit
12634 - also if node is not joined, replication must be skipped
12635 */
12636 if (WSREP_EMULATE_BINLOG(thd) && (thd->wsrep_exec_mode != REPL_RECV) &&
12637 wsrep_ready_get())
12638 return mysql_bin_log.rollback(thd, all);
12639 else
12640 return ha_rollback_low(thd, all);
12641 }
12642 #endif /* WITH_WSREP */
12643
12644 struct st_mysql_storage_engine binlog_storage_engine=
12645 { MYSQL_HANDLERTON_INTERFACE_VERSION };
12646
12647 /** @} */
12648
mysql_declare_plugin(binlog)12649 mysql_declare_plugin(binlog)
12650 {
12651 MYSQL_STORAGE_ENGINE_PLUGIN,
12652 &binlog_storage_engine,
12653 "binlog",
12654 "MySQL AB",
12655 "This is a pseudo storage engine to represent the binlog in a transaction",
12656 PLUGIN_LICENSE_GPL,
12657 binlog_init, /* Plugin Init */
12658 binlog_deinit, /* Plugin Deinit */
12659 0x0100 /* 1.0 */,
12660 NULL, /* status variables */
12661 NULL, /* system variables */
12662 NULL, /* config options */
12663 0,
12664 }
12665 mysql_declare_plugin_end;
12666