1 /* Copyright (c) 2009, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software Foundation,
21 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22
23 #include "binlog.h"
24
25 #include "my_stacktrace.h" // my_safe_print_system_time
26 #include "debug_sync.h" // DEBUG_SYNC
27 #include "log.h" // sql_print_warning
28 #include "log_event.h" // Rows_log_event
29 #include "mysqld_thd_manager.h" // Global_THD_manager
30 #include "rpl_handler.h" // RUN_HOOK
31 #include "rpl_mi.h" // Master_info
32 #include "rpl_rli.h" // Relay_log_info
33 #include "rpl_rli_pdb.h" // Slave_worker
34 #include "rpl_slave_commit_order_manager.h" // Commit_order_manager
35 #include "rpl_trx_boundary_parser.h" // Transaction_boundary_parser
36 #include "rpl_context.h"
37 #include "sql_class.h" // THD
38 #include "sql_parse.h" // sqlcom_can_generate_row_events
39 #include "sql_show.h" // append_identifier
40 #include "sql_base.h" // find_temporary_table
41
42 #include "pfs_file_provider.h"
43 #include "mysql/psi/mysql_file.h"
44
45 #include <pfs_transaction_provider.h>
46 #include <mysql/psi/mysql_transaction.h>
47 #include "xa.h"
48
49 #include <list>
50 #include <string>
51 #include "my_rnd.h"
52 #include <sstream>
53
54 using std::max;
55 using std::min;
56 using std::string;
57 using std::list;
58 using binary_log::checksum_crc32;
59 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
60
61 #define LOG_PREFIX "ML"
62
63 /**
64 @defgroup Binary_Log Binary Log
65 @{
66 */
67
68 #define MY_OFF_T_UNDEF (~(my_off_t)0UL)
69
70 /*
71 Constants required for the limit unsafe warnings suppression
72 */
73 //seconds after which the limit unsafe warnings suppression will be activated
74 #define LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT 50
75 //number of limit unsafe warnings after which the suppression will be activated
76 #define LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT 50
77 #define MAX_SESSION_ATTACH_TRIES 10
78
79 static ulonglong limit_unsafe_suppression_start_time= 0;
80 static bool unsafe_warning_suppression_is_activated= false;
81 static int limit_unsafe_warning_count= 0;
82
83 static handlerton *binlog_hton;
84 bool opt_binlog_order_commits= true;
85
86 const char *log_bin_index= 0;
87 const char *log_bin_basename= 0;
88
89 MYSQL_BIN_LOG mysql_bin_log(&sync_binlog_period, WRITE_CACHE);
90
91 static int binlog_init(void *p);
92 static int binlog_start_trans_and_stmt(THD *thd, Log_event *start_event);
93 static int binlog_close_connection(handlerton *hton, THD *thd);
94 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
95 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
96 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
97 THD *thd);
98 static int binlog_commit(handlerton *hton, THD *thd, bool all);
99 static int binlog_rollback(handlerton *hton, THD *thd, bool all);
100 static int binlog_prepare(handlerton *hton, THD *thd, bool all);
101 static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd);
102 static int binlog_clone_consistent_snapshot(handlerton *hton, THD *thd,
103 THD *from_thd);
104 static int binlog_xa_commit(handlerton *hton, XID *xid);
105 static int binlog_xa_rollback(handlerton *hton, XID *xid);
106
107 static void exec_binlog_error_action_abort(const char* err_string);
108
109 // The last published global binlog position
110 static char binlog_global_snapshot_file[FN_REFLEN];
111 static ulonglong binlog_global_snapshot_position;
112
113 // Binlog position variables for SHOW STATUS
114 static char binlog_snapshot_file[FN_REFLEN];
115 static ulonglong binlog_snapshot_position;
116 static std::string binlog_snapshot_gtid_executed;
117
118 static SHOW_VAR binlog_status_vars_detail[]=
119 {
120 {"snapshot_file",
121 (char *)&binlog_snapshot_file, SHOW_CHAR, SHOW_SCOPE_GLOBAL},
122 {"snapshot_position",
123 (char *)&binlog_snapshot_position, SHOW_LONGLONG, SHOW_SCOPE_GLOBAL},
124 {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}
125 };
126
127 /**
128 Helper class to switch to a new thread and then go back to the previous one,
129 when the object is destroyed using RAII.
130
131 This class is used to temporarily switch to another session (THD
132 structure). It will set up thread specific "globals" correctly
133 so that the POSIX thread looks exactly like the session attached to.
134 However, PSI_thread info is not touched as it is required to show
135 the actual physial view in PFS instrumentation i.e., it should
136 depict as the real thread doing the work instead of thread it switched
137 to.
138
139 On destruction, the original session (which is supplied to the
140 constructor) will be re-attached automatically. For example, with
141 this code, the value of @c current_thd will be the same before and
142 after execution of the code.
143
144 @code
145 {
146 for (int i = 0 ; i < count ; ++i)
147 {
148 // here we are attached to current_thd
149 // [...]
150 Thd_backup_and_restore switch_thd(current_thd, other_thd[i]);
151 // [...]
152 // here we are attached to other_thd[i]
153 // [...]
154 }
155 // here we are attached to current_thd
156 }
157 @endcode
158
159 @warning The class is not designed to be inherited from.
160 */
161
162 #ifndef EMBEDDED_LIBRARY
163
164 class Thd_backup_and_restore
165 {
166 public:
167 /**
168 Try to attach the POSIX thread to a session.
169 - This function attaches the POSIX thread to a session
170 in MAX_SESSION_ATTACH_TRIES tries when encountering
171 'out of memory' error, and terminates the server after
172 failed in MAX_SESSION_ATTACH_TRIES tries.
173
174 @param[in] backup_thd The thd to restore to when object is destructed.
175 @param[in] new_thd The thd to attach to.
176 */
177
Thd_backup_and_restore(THD * backup_thd,THD * new_thd)178 Thd_backup_and_restore(THD *backup_thd, THD *new_thd)
179 : m_backup_thd(backup_thd), m_new_thd(new_thd),
180 m_new_thd_old_real_id(new_thd->real_id)
181 {
182 assert(m_backup_thd != NULL && m_new_thd != NULL);
183 // Reset the state of the current thd.
184 m_backup_thd->restore_globals();
185 int i= 0;
186 /*
187 Attach the POSIX thread to a session in MAX_SESSION_ATTACH_TRIES
188 tries when encountering 'out of memory' error.
189 */
190 while (i < MAX_SESSION_ATTACH_TRIES)
191 {
192 /*
193 Currently attach_to(...) returns ER_OUTOFMEMORY or 0. So
194 we continue to attach the POSIX thread when encountering
195 the ER_OUTOFMEMORY error. Please take care other error
196 returned from attach_to(...) in future.
197 */
198 if (!attach_to(new_thd))
199 {
200 if (i > 0)
201 sql_print_warning("Server overcomes the temporary 'out of memory' "
202 "in '%d' tries while attaching to session thread "
203 "during the group commit phase.\n", i + 1);
204 break;
205 }
206 /* Sleep 1 microsecond per try to avoid temporary 'out of memory' */
207 my_sleep(1);
208 i++;
209 }
210 /*
211 Terminate the server after failed to attach the POSIX thread
212 to a session in MAX_SESSION_ATTACH_TRIES tries.
213 */
214 if (MAX_SESSION_ATTACH_TRIES == i)
215 {
216 my_safe_print_system_time();
217 my_safe_printf_stderr("%s", "[Fatal] Out of memory while attaching to "
218 "session thread during the group commit phase. "
219 "Data consistency between master and slave can "
220 "be guaranteed after server restarts.\n");
221 _exit(MYSQLD_FAILURE_EXIT);
222 }
223 }
224
225 /**
226 Restores to previous thd.
227 */
~Thd_backup_and_restore()228 ~Thd_backup_and_restore()
229 {
230 /*
231 Restore the global variables of the thd we previously attached to,
232 to its original state. In other words, detach the m_new_thd.
233 */
234 m_new_thd->restore_globals();
235 m_new_thd->real_id= m_new_thd_old_real_id;
236
237 // Reset the global variables to the original state.
238 if (unlikely(m_backup_thd->store_globals()))
239 assert(0); // Out of memory?!
240 }
241
242 private:
243
244 /**
245 Attach the POSIX thread to a session.
246 */
attach_to(THD * thd)247 int attach_to(THD *thd)
248 {
249 if (DBUG_EVALUATE_IF("simulate_session_attach_error", 1, 0)
250 || unlikely(thd->store_globals()))
251 {
252 /*
253 Indirectly uses pthread_setspecific, which can only return
254 ENOMEM or EINVAL. Since store_globals are using correct keys,
255 the only alternative is out of memory.
256 */
257 return ER_OUTOFMEMORY;
258 }
259 return 0;
260 }
261
262 THD *m_backup_thd;
263 THD *m_new_thd;
264 my_thread_t m_new_thd_old_real_id;
265 };
266
267 #endif /* !EMBEDDED_LIBRARY */
268
269 /**
270 Caches for non-transactional and transactional data before writing
271 it to the binary log.
272
273 @todo All the access functions for the flags suggest that the
274 encapsuling is not done correctly, so try to move any logic that
275 requires access to the flags into the cache.
276 */
277 class binlog_cache_data
278 {
279 public:
280
binlog_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg,const IO_CACHE & cache_log_arg)281 binlog_cache_data(bool trx_cache_arg,
282 my_off_t max_binlog_cache_size_arg,
283 ulong *ptr_binlog_cache_use_arg,
284 ulong *ptr_binlog_cache_disk_use_arg,
285 const IO_CACHE &cache_log_arg)
286 : cache_log(cache_log_arg),
287 m_pending(0),
288 saved_max_binlog_cache_size(max_binlog_cache_size_arg),
289 ptr_binlog_cache_use(ptr_binlog_cache_use_arg),
290 ptr_binlog_cache_disk_use(ptr_binlog_cache_disk_use_arg)
291 {
292 reset();
293 flags.transactional= trx_cache_arg;
294 cache_log.end_of_file= saved_max_binlog_cache_size;
295 }
296
297 int finalize(THD *thd, Log_event *end_event);
298 int finalize(THD *thd, Log_event *end_event, XID_STATE *xs);
299 int flush(THD *thd, my_off_t *bytes, bool *wrote_xid);
300 int write_event(THD *thd, Log_event *event);
301
~binlog_cache_data()302 virtual ~binlog_cache_data()
303 {
304 assert(is_binlog_empty());
305 close_cached_file(&cache_log);
306 }
307
is_binlog_empty() const308 bool is_binlog_empty() const
309 {
310 my_off_t pos= my_b_tell(&cache_log);
311 DBUG_PRINT("debug", ("%s_cache - pending: 0x%llx, bytes: %llu",
312 (flags.transactional ? "trx" : "stmt"),
313 (ulonglong) pending(), (ulonglong) pos));
314 return pending() == NULL && pos == 0;
315 }
316
is_finalized() const317 bool is_finalized() const {
318 return flags.finalized;
319 }
320
pending() const321 Rows_log_event *pending() const
322 {
323 return m_pending;
324 }
325
set_pending(Rows_log_event * const pending)326 void set_pending(Rows_log_event *const pending)
327 {
328 m_pending= pending;
329 }
330
set_incident(void)331 void set_incident(void)
332 {
333 flags.incident= true;
334 }
335
has_incident(void) const336 bool has_incident(void) const
337 {
338 return flags.incident;
339 }
340
341 /**
342 Sets the binlog_cache_data::Flags::flush_error flag if there
343 is an error while flushing cache to the file.
344
345 @param thd The client thread that is executing the transaction.
346 */
set_flush_error(THD * thd)347 void set_flush_error(THD *thd)
348 {
349 flags.flush_error= true;
350 if(is_trx_cache())
351 {
352 /*
353 If the cache is a transactional cache and if the write
354 has failed due to ENOSPC, then my_write() would have
355 set EE_WRITE error, so clear the error and create an
356 equivalent server error.
357 */
358 if (thd->is_error())
359 thd->clear_error();
360 char errbuf[MYSYS_STRERROR_SIZE];
361 my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), my_filename(cache_log.file),
362 errno, my_strerror(errbuf, sizeof(errbuf), errno));
363 }
364 }
365
get_flush_error(void) const366 bool get_flush_error(void) const
367 {
368 return flags.flush_error;
369 }
370
has_xid() const371 bool has_xid() const {
372 // There should only be an XID event if we are transactional
373 assert((flags.transactional && flags.with_xid) || !flags.with_xid);
374 return flags.with_xid;
375 }
376
is_trx_cache() const377 bool is_trx_cache() const
378 {
379 return flags.transactional;
380 }
381
get_byte_position() const382 my_off_t get_byte_position() const
383 {
384 return my_b_tell(&cache_log);
385 }
386
cache_state_rollback(my_off_t pos_to_rollback)387 void cache_state_rollback(my_off_t pos_to_rollback)
388 {
389 if (pos_to_rollback)
390 {
391 std::map<my_off_t,cache_state>::iterator it;
392 it = cache_state_map.find(pos_to_rollback);
393 if (it != cache_state_map.end())
394 {
395 flags.with_rbr= it->second.with_rbr;
396 flags.with_sbr= it->second.with_sbr;
397 flags.with_start= it->second.with_start;
398 flags.with_end= it->second.with_end;
399 flags.with_content= it->second.with_content;
400 }
401 else
402 assert(it == cache_state_map.end());
403 }
404 // Rolling back to pos == 0 means cleaning up the cache.
405 else
406 {
407 flags.with_rbr= false;
408 flags.with_sbr= false;
409 flags.with_start= false;
410 flags.with_end= false;
411 flags.with_content= false;
412 }
413 }
414
cache_state_checkpoint(my_off_t pos_to_checkpoint)415 void cache_state_checkpoint(my_off_t pos_to_checkpoint)
416 {
417 // We only need to store the cache state for pos > 0
418 if (pos_to_checkpoint)
419 {
420 cache_state state;
421 state.with_rbr= flags.with_rbr;
422 state.with_sbr= flags.with_sbr;
423 state.with_start= flags.with_start;
424 state.with_end= flags.with_end;
425 state.with_content= flags.with_content;
426 cache_state_map[pos_to_checkpoint]= state;
427 }
428 }
429
reset()430 virtual void reset()
431 {
432 compute_statistics();
433 truncate(0);
434
435 /*
436 If IOCACHE has a file associated, change its size to 0.
437 It is safer to do it here, since we are certain that one
438 asked the cache to go to position 0 with truncate.
439 */
440 if(cache_log.file != -1)
441 {
442 int error= 0;
443 if((error= my_chsize(cache_log.file, 0, 0, MYF(MY_WME))))
444 sql_print_warning("Unable to resize binlog IOCACHE auxilary file");
445
446 DBUG_EXECUTE_IF("show_io_cache_size",
447 {
448 my_off_t file_size= my_seek(cache_log.file,
449 0L,MY_SEEK_END,MYF(MY_WME+MY_FAE));
450 sql_print_error("New size:%llu",
451 static_cast<ulonglong>(file_size));
452 });
453 }
454
455 flags.incident= false;
456 flags.with_xid= false;
457 flags.immediate= false;
458 flags.finalized= false;
459 flags.with_sbr= false;
460 flags.with_rbr= false;
461 flags.with_start= false;
462 flags.with_end= false;
463 flags.with_content= false;
464 flags.flush_error= false;
465
466 /*
467 The truncate function calls reinit_io_cache that calls my_b_flush_io_cache
468 which may increase disk_writes. This breaks the disk_writes use by the
469 binary log which aims to compute the ratio between in-memory cache usage
470 and disk cache usage. To avoid this undesirable behavior, we reset the
471 variable after truncating the cache.
472 */
473 cache_log.disk_writes= 0;
474 cache_state_map.clear();
475 assert(is_binlog_empty());
476 }
477
478 /*
479 Sets the write position to point at the position given. If the
480 cache has swapped to a file, it reinitializes it, so that the
481 proper data is added to the IO_CACHE buffer. Otherwise, it just
482 does a my_b_seek.
483
484 my_b_seek will not work if the cache has swapped, that's why
485 we do this workaround.
486
487 @param[IN] pos the new write position.
488 @param[IN] use_reinit if the position should be reset resorting
489 to reset_io_cache (which may issue a flush_io_cache
490 inside)
491
492 @return The previous write position.
493 */
reset_write_pos(my_off_t pos,bool use_reinit)494 my_off_t reset_write_pos(my_off_t pos, bool use_reinit)
495 {
496 DBUG_ENTER("reset_write_pos");
497 assert(cache_log.type == WRITE_CACHE);
498
499 my_off_t oldpos= get_byte_position();
500
501 if (use_reinit)
502 {
503 MY_ATTRIBUTE((unused)) int reinit_res=
504 reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, 0);
505 assert(reinit_res == 0);
506 }
507 else
508 my_b_seek(&cache_log, pos);
509
510 DBUG_RETURN(oldpos);
511 }
512
513 /*
514 Cache to store data before copying it to the binary log.
515 */
516 IO_CACHE cache_log;
517
518 /**
519 Returns information about the cache content with respect to
520 the binlog_format of the events.
521
522 This will be used to set a flag on GTID_LOG_EVENT stating that the
523 transaction may have SBR statements or not, but the binlog dump
524 will show this flag as "rbr_only" when it is not set. That's why
525 an empty transaction should return true below, or else an empty
526 transaction would be assumed as "rbr_only" even not having RBR
527 events.
528
529 When dumping a binary log content using mysqlbinlog client program,
530 for any transaction assumed as "rbr_only" it will be printed a
531 statement changing the transaction isolation level to READ COMMITTED.
532 It doesn't make sense to have an empty transaction "requiring" this
533 isolation level change.
534
535 @return true The cache have SBR events or is empty.
536 @return false The cache contains a transaction with no SBR events.
537 */
may_have_sbr_stmts()538 bool may_have_sbr_stmts()
539 {
540 return flags.with_sbr || !flags.with_rbr;
541 }
542
543 /**
544 Check if the binlog cache contains an empty transaction, which has
545 two binlog events "BEGIN" and "COMMIT".
546
547 @return true The binlog cache contains an empty transaction.
548 @return false Otherwise.
549 */
has_empty_transaction()550 bool has_empty_transaction()
551 {
552 /*
553 The empty transaction has two events in trx/stmt binlog cache
554 and no changes (no SBR changing content and no RBR events).
555 Other transaction should not have two events. So we can identify
556 if this is an empty transaction by the event counter and the
557 cache flags.
558 */
559 if (flags.with_start && // Has transaction start statement
560 flags.with_end && // Has transaction end statement
561 !flags.with_sbr && // No statements changing content
562 !flags.with_rbr && // No rows changing content
563 !flags.immediate && // Not a DDL
564 !flags.with_xid && // Not a XID transaction and not an atomic DDL Query
565 !flags.with_content)// Does not have any content
566 {
567 assert(!flags.with_sbr); // No statements changing content
568 assert(!flags.with_rbr); // No rows changing content
569 assert(!flags.immediate);// Not a DDL
570 assert(!flags.with_xid); // Not a XID trx and not an atomic DDL Query
571
572 return true;
573 }
574 return false;
575 }
576
577 /**
578 Check if the binlog cache is empty or contains an empty transaction,
579 which has two binlog events "BEGIN" and "COMMIT".
580
581 @return true The binlog cache is empty or contains an empty transaction.
582 @return false Otherwise.
583 */
is_empty_or_has_empty_transaction()584 bool is_empty_or_has_empty_transaction()
585 {
586 return is_binlog_empty() || has_empty_transaction();
587 }
588
589 protected:
590 /*
591 This structure should have all cache variables/flags that should be restored
592 when a ROLLBACK TO SAVEPOINT statement be executed.
593 */
594 struct cache_state
595 {
596 bool with_sbr;
597 bool with_rbr;
598 bool with_start;
599 bool with_end;
600 bool with_content;
601 };
602 /*
603 For every SAVEPOINT used, we will store a cache_state for the current
604 binlog cache position. So, if a ROLLBACK TO SAVEPOINT is used, we can
605 restore the cache_state values after truncating the binlog cache.
606 */
607 std::map<my_off_t, cache_state> cache_state_map;
608
609 /*
610 It truncates the cache to a certain position. This includes deleting the
611 pending event.
612 */
truncate(my_off_t pos)613 void truncate(my_off_t pos)
614 {
615 DBUG_PRINT("info", ("truncating to position %lu", (ulong) pos));
616 remove_pending_event();
617 /*
618 Whenever there is an error while flushing cache to file,
619 the local cache will not be in a normal state and the same
620 cache cannot be used without facing an assert.
621 So, clear the cache if there is a flush error.
622 */
623 MY_ATTRIBUTE((unused)) int reinit_res=
624 reinit_io_cache(&cache_log, WRITE_CACHE, pos, 0, get_flush_error());
625 assert(reinit_res == 0);
626 cache_log.end_of_file= saved_max_binlog_cache_size;
627 }
628
629 /**
630 Flush pending event to the cache buffer.
631 */
flush_pending_event(THD * thd)632 int flush_pending_event(THD *thd) {
633 if (m_pending)
634 {
635 m_pending->set_flags(Rows_log_event::STMT_END_F);
636 if (int error= write_event(thd, m_pending))
637 return error;
638 thd->clear_binlog_table_maps();
639 }
640 return 0;
641 }
642
643 /**
644 Remove the pending event.
645 */
remove_pending_event()646 int remove_pending_event() {
647 delete m_pending;
648 m_pending= NULL;
649 return 0;
650 }
651 struct Flags {
652 /*
653 Defines if this is either a trx-cache or stmt-cache, respectively, a
654 transactional or non-transactional cache.
655 */
656 bool transactional:1;
657
658 /*
659 This indicates that some events did not get into the cache and most likely
660 it is corrupted.
661 */
662 bool incident:1;
663
664 /*
665 This indicates that the cache should be written without BEGIN/END.
666 */
667 bool immediate:1;
668
669 /*
670 This flag indicates that the buffer was finalized and has to be
671 flushed to disk.
672 */
673 bool finalized:1;
674
675 /*
676 This indicates that the cache contain an XID event.
677 */
678 bool with_xid:1;
679
680 /*
681 This indicates that the cache contain statements changing content.
682 */
683 bool with_sbr:1;
684
685 /*
686 This indicates that the cache contain RBR event changing content.
687 */
688 bool with_rbr:1;
689
690 /*
691 This indicates that the cache contain s transaction start statement.
692 */
693 bool with_start:1;
694
695 /*
696 This indicates that the cache contain a transaction end event.
697 */
698 bool with_end:1;
699
700 /*
701 This indicates that the cache contain content other than START/END.
702 */
703 bool with_content:1;
704
705 /*
706 This flag is set to 'true' when there is an error while flushing the
707 I/O cache to file.
708 */
709 bool flush_error:1;
710 } flags;
711
712 private:
713 /*
714 Pending binrows event. This event is the event where the rows are currently
715 written.
716 */
717 Rows_log_event *m_pending;
718
719 /**
720 This function computes binlog cache and disk usage.
721 */
compute_statistics()722 void compute_statistics()
723 {
724 if (!is_binlog_empty())
725 {
726 (*ptr_binlog_cache_use)++;
727 if (cache_log.disk_writes != 0)
728 (*ptr_binlog_cache_disk_use)++;
729 }
730 }
731
732 /*
733 Stores the values of maximum size of the cache allowed when this cache
734 is configured. This corresponds to either
735 . max_binlog_cache_size or max_binlog_stmt_cache_size.
736 */
737 my_off_t saved_max_binlog_cache_size;
738
739 /*
740 Stores a pointer to the status variable that keeps track of the in-memory
741 cache usage. This corresponds to either
742 . binlog_cache_use or binlog_stmt_cache_use.
743 */
744 ulong *ptr_binlog_cache_use;
745
746 /*
747 Stores a pointer to the status variable that keeps track of the disk
748 cache usage. This corresponds to either
749 . binlog_cache_disk_use or binlog_stmt_cache_disk_use.
750 */
751 ulong *ptr_binlog_cache_disk_use;
752
753 binlog_cache_data& operator=(const binlog_cache_data& info);
754 binlog_cache_data(const binlog_cache_data& info);
755 };
756
757
758 class binlog_stmt_cache_data
759 : public binlog_cache_data
760 {
761 public:
binlog_stmt_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg,const IO_CACHE & cache_log)762 binlog_stmt_cache_data(bool trx_cache_arg,
763 my_off_t max_binlog_cache_size_arg,
764 ulong *ptr_binlog_cache_use_arg,
765 ulong *ptr_binlog_cache_disk_use_arg,
766 const IO_CACHE &cache_log)
767 : binlog_cache_data(trx_cache_arg,
768 max_binlog_cache_size_arg,
769 ptr_binlog_cache_use_arg,
770 ptr_binlog_cache_disk_use_arg,
771 cache_log)
772 {
773 }
774
775 using binlog_cache_data::finalize;
776
777 int finalize(THD *thd);
778 };
779
780
781 int
finalize(THD * thd)782 binlog_stmt_cache_data::finalize(THD *thd)
783 {
784 if (flags.immediate)
785 {
786 if (int error= finalize(thd, NULL))
787 return error;
788 }
789 else
790 {
791 Query_log_event
792 end_evt(thd, STRING_WITH_LEN("COMMIT"), false, false, true, 0, true);
793 if (int error= finalize(thd, &end_evt))
794 return error;
795 }
796 return 0;
797 }
798
799
800 class binlog_trx_cache_data : public binlog_cache_data
801 {
802 public:
binlog_trx_cache_data(bool trx_cache_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg,const IO_CACHE & cache_log)803 binlog_trx_cache_data(bool trx_cache_arg,
804 my_off_t max_binlog_cache_size_arg,
805 ulong *ptr_binlog_cache_use_arg,
806 ulong *ptr_binlog_cache_disk_use_arg,
807 const IO_CACHE &cache_log)
808 : binlog_cache_data(trx_cache_arg,
809 max_binlog_cache_size_arg,
810 ptr_binlog_cache_use_arg,
811 ptr_binlog_cache_disk_use_arg,
812 cache_log),
813 m_cannot_rollback(FALSE), before_stmt_pos(MY_OFF_T_UNDEF)
814 { }
815
reset()816 void reset()
817 {
818 DBUG_ENTER("reset");
819 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
820 m_cannot_rollback= FALSE;
821 before_stmt_pos= MY_OFF_T_UNDEF;
822 binlog_cache_data::reset();
823 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
824 DBUG_VOID_RETURN;
825 }
826
cannot_rollback() const827 bool cannot_rollback() const
828 {
829 return m_cannot_rollback;
830 }
831
set_cannot_rollback()832 void set_cannot_rollback()
833 {
834 m_cannot_rollback= TRUE;
835 }
836
get_prev_position() const837 my_off_t get_prev_position() const
838 {
839 return before_stmt_pos;
840 }
841
set_prev_position(my_off_t pos)842 void set_prev_position(my_off_t pos)
843 {
844 DBUG_ENTER("set_prev_position");
845 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
846 before_stmt_pos= pos;
847 cache_state_checkpoint(before_stmt_pos);
848 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
849 DBUG_VOID_RETURN;
850 }
851
restore_prev_position()852 void restore_prev_position()
853 {
854 DBUG_ENTER("restore_prev_position");
855 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
856 binlog_cache_data::truncate(before_stmt_pos);
857 cache_state_rollback(before_stmt_pos);
858 before_stmt_pos= MY_OFF_T_UNDEF;
859 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
860 DBUG_VOID_RETURN;
861 }
862
restore_savepoint(my_off_t pos)863 void restore_savepoint(my_off_t pos)
864 {
865 DBUG_ENTER("restore_savepoint");
866 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
867 binlog_cache_data::truncate(pos);
868 if (pos <= before_stmt_pos)
869 before_stmt_pos= MY_OFF_T_UNDEF;
870 cache_state_rollback(pos);
871 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong) before_stmt_pos));
872 DBUG_VOID_RETURN;
873 }
874
875 using binlog_cache_data::truncate;
876
877 int truncate(THD *thd, bool all);
878
879 private:
880 /*
881 It will be set TRUE if any statement which cannot be rolled back safely
882 is put in trx_cache.
883 */
884 bool m_cannot_rollback;
885
886 /*
887 Binlog position before the start of the current statement.
888 */
889 my_off_t before_stmt_pos;
890
891 binlog_trx_cache_data& operator=(const binlog_trx_cache_data& info);
892 binlog_trx_cache_data(const binlog_trx_cache_data& info);
893 };
894
895 class binlog_cache_mngr {
896 public:
binlog_cache_mngr(my_off_t max_binlog_stmt_cache_size_arg,ulong * ptr_binlog_stmt_cache_use_arg,ulong * ptr_binlog_stmt_cache_disk_use_arg,my_off_t max_binlog_cache_size_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg,const IO_CACHE & stmt_cache_log,const IO_CACHE & trx_cache_log)897 binlog_cache_mngr(my_off_t max_binlog_stmt_cache_size_arg,
898 ulong *ptr_binlog_stmt_cache_use_arg,
899 ulong *ptr_binlog_stmt_cache_disk_use_arg,
900 my_off_t max_binlog_cache_size_arg,
901 ulong *ptr_binlog_cache_use_arg,
902 ulong *ptr_binlog_cache_disk_use_arg,
903 const IO_CACHE &stmt_cache_log,
904 const IO_CACHE &trx_cache_log)
905 : stmt_cache(FALSE, max_binlog_stmt_cache_size_arg,
906 ptr_binlog_stmt_cache_use_arg,
907 ptr_binlog_stmt_cache_disk_use_arg,
908 stmt_cache_log),
909 trx_cache(TRUE, max_binlog_cache_size_arg,
910 ptr_binlog_cache_use_arg,
911 ptr_binlog_cache_disk_use_arg,
912 trx_cache_log),
913 has_logged_xid(NULL)
914 { }
915
get_binlog_cache_data(bool is_transactional)916 binlog_cache_data* get_binlog_cache_data(bool is_transactional)
917 {
918 if (is_transactional)
919 return &trx_cache;
920 else
921 return &stmt_cache;
922 }
923
get_binlog_cache_log(bool is_transactional)924 IO_CACHE* get_binlog_cache_log(bool is_transactional)
925 {
926 return (is_transactional ? &trx_cache.cache_log : &stmt_cache.cache_log);
927 }
928
929 /**
930 Convenience method to check if both caches are empty.
931 */
is_binlog_empty() const932 bool is_binlog_empty() const {
933 return stmt_cache.is_binlog_empty() && trx_cache.is_binlog_empty();
934 }
935
936 /*
937 clear stmt_cache and trx_cache if they are not empty
938 */
reset()939 void reset()
940 {
941 if (!stmt_cache.is_binlog_empty())
942 stmt_cache.reset();
943 if (!trx_cache.is_binlog_empty())
944 trx_cache.reset();
945 }
946
947 #ifndef NDEBUG
dbug_any_finalized() const948 bool dbug_any_finalized() const {
949 return stmt_cache.is_finalized() || trx_cache.is_finalized();
950 }
951 #endif
952
953 /*
954 Convenience method to flush both caches to the binary log.
955
956 @param bytes_written Pointer to variable that will be set to the
957 number of bytes written for the flush.
958 @param wrote_xid Pointer to variable that will be set to @c
959 true if any XID event was written to the
960 binary log. Otherwise, the variable will not
961 be touched.
962 @return Error code on error, zero if no error.
963 */
flush(THD * thd,my_off_t * bytes_written,bool * wrote_xid)964 int flush(THD *thd, my_off_t *bytes_written, bool *wrote_xid)
965 {
966 my_off_t stmt_bytes= 0;
967 my_off_t trx_bytes= 0;
968 assert(stmt_cache.has_xid() == 0);
969 int error= stmt_cache.flush(thd, &stmt_bytes, wrote_xid);
970 if (error)
971 return error;
972 DEBUG_SYNC(thd, "after_flush_stm_cache_before_flush_trx_cache");
973 if (int error= trx_cache.flush(thd, &trx_bytes, wrote_xid))
974 return error;
975 *bytes_written= stmt_bytes + trx_bytes;
976 return 0;
977 }
978
979 /**
980 Check if at least one of transacaction and statement binlog caches
981 contains an empty transaction, other one is empty or contains an
982 empty transaction.
983
984 @return true At least one of transacaction and statement binlog
985 caches an empty transaction, other one is emptry
986 or contains an empty transaction.
987 @return false Otherwise.
988 */
has_empty_transaction()989 bool has_empty_transaction()
990 {
991 return (trx_cache.is_empty_or_has_empty_transaction() &&
992 stmt_cache.is_empty_or_has_empty_transaction() &&
993 !is_binlog_empty());
994 }
995
996 /**
997 Check if manager contains consistent snapshot of log coordinates
998 and gtid_executed.
999
1000 @return true Consistent snapshot available
1001 @return false Otherwise
1002 */
has_consistent_snapshot() const1003 bool has_consistent_snapshot() const
1004 {
1005 /**
1006 snapshot_gtid_executed can be empty string
1007 if gtid_mode=OFF.
1008 */
1009
1010 return binlog_info.log_file_name[0] != '\0';
1011 }
1012
1013 /**
1014 Removes consistent snapshot from cache.
1015 */
drop_consistent_snapshot()1016 void drop_consistent_snapshot()
1017 {
1018 binlog_info.log_file_name[0]= '\0';
1019 snapshot_gtid_executed.clear();
1020 }
1021
1022 binlog_stmt_cache_data stmt_cache;
1023 binlog_trx_cache_data trx_cache;
1024
1025 LOG_INFO binlog_info;
1026 std::string snapshot_gtid_executed;
1027
1028 /*
1029 The bool flag is for preventing do_binlog_xa_commit_rollback()
1030 execution twice which can happen for "external" xa commit/rollback.
1031 */
1032 bool has_logged_xid;
1033 private:
1034
1035 binlog_cache_mngr& operator=(const binlog_cache_mngr& info);
1036 binlog_cache_mngr(const binlog_cache_mngr& info);
1037 };
1038
1039
thd_get_cache_mngr(const THD * thd)1040 static binlog_cache_mngr *thd_get_cache_mngr(const THD *thd)
1041 {
1042 /*
1043 If opt_bin_log is not set, binlog_hton->slot == -1 and hence
1044 thd_get_ha_data(thd, hton) segfaults.
1045 */
1046 assert(opt_bin_log);
1047 return (binlog_cache_mngr *)thd_get_ha_data(thd, binlog_hton);
1048 }
1049
1050
1051 /**
1052 Checks if the BINLOG_CACHE_SIZE's value is greater than MAX_BINLOG_CACHE_SIZE.
1053 If this happens, the BINLOG_CACHE_SIZE is set to MAX_BINLOG_CACHE_SIZE.
1054 */
check_binlog_cache_size(THD * thd)1055 void check_binlog_cache_size(THD *thd)
1056 {
1057 if (binlog_cache_size > max_binlog_cache_size)
1058 {
1059 if (thd)
1060 {
1061 push_warning_printf(thd, Sql_condition::SL_WARNING,
1062 ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX,
1063 ER(ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX),
1064 (ulong) binlog_cache_size,
1065 (ulong) max_binlog_cache_size);
1066 }
1067 else
1068 {
1069 sql_print_warning(ER_DEFAULT(ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX),
1070 binlog_cache_size,
1071 (ulong) max_binlog_cache_size);
1072 }
1073 binlog_cache_size= static_cast<ulong>(max_binlog_cache_size);
1074 }
1075 }
1076
1077 /**
1078 Checks if the BINLOG_STMT_CACHE_SIZE's value is greater than MAX_BINLOG_STMT_CACHE_SIZE.
1079 If this happens, the BINLOG_STMT_CACHE_SIZE is set to MAX_BINLOG_STMT_CACHE_SIZE.
1080 */
check_binlog_stmt_cache_size(THD * thd)1081 void check_binlog_stmt_cache_size(THD *thd)
1082 {
1083 if (binlog_stmt_cache_size > max_binlog_stmt_cache_size)
1084 {
1085 if (thd)
1086 {
1087 push_warning_printf(thd, Sql_condition::SL_WARNING,
1088 ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX,
1089 ER(ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX),
1090 (ulong) binlog_stmt_cache_size,
1091 (ulong) max_binlog_stmt_cache_size);
1092 }
1093 else
1094 {
1095 sql_print_warning(ER_DEFAULT(ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX),
1096 binlog_stmt_cache_size,
1097 (ulong) max_binlog_stmt_cache_size);
1098 }
1099 binlog_stmt_cache_size= static_cast<ulong>(max_binlog_stmt_cache_size);
1100 }
1101 }
1102
1103 /**
1104 Check whether binlog_hton has valid slot and enabled
1105 */
binlog_enabled()1106 bool binlog_enabled()
1107 {
1108 return(binlog_hton && binlog_hton->slot != HA_SLOT_UNDEF);
1109 }
1110
1111 /*
1112 Save position of binary log transaction cache.
1113
1114 SYNPOSIS
1115 binlog_trans_log_savepos()
1116
1117 thd The thread to take the binlog data from
1118 pos Pointer to variable where the position will be stored
1119
1120 DESCRIPTION
1121
1122 Save the current position in the binary log transaction cache into
1123 the variable pointed to by 'pos'
1124 */
1125
1126 static void
binlog_trans_log_savepos(THD * thd,my_off_t * pos)1127 binlog_trans_log_savepos(THD *thd, my_off_t *pos)
1128 {
1129 DBUG_ENTER("binlog_trans_log_savepos");
1130 assert(pos != NULL);
1131 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
1132 assert(mysql_bin_log.is_open());
1133 *pos= cache_mngr->trx_cache.get_byte_position();
1134 DBUG_PRINT("return", ("position: %lu", (ulong) *pos));
1135 cache_mngr->trx_cache.cache_state_checkpoint(*pos);
1136 DBUG_VOID_RETURN;
1137 }
1138
binlog_dummy_recover(handlerton * hton,XID * xid,uint len)1139 static int binlog_dummy_recover(handlerton *hton, XID *xid, uint len)
1140 {
1141 return 0;
1142 }
1143
1144 /**
1145 Auxiliary class to copy serialized events to the binary log and
1146 correct some of the fields that are not known until just before
1147 writing the event.
1148
1149 This class allows feeding events in parts, so it is practical to use
1150 in do_write_cache() which reads events from an IO_CACHE where events
1151 may span mutiple cache pages.
1152
1153 The following fields are fixed before writing the event:
1154 - end_log_pos is set
1155 - the checksum is computed if checksums are enabled
1156 - the length is incremented by the checksum size if checksums are enabled
1157 */
1158 class Binlog_event_writer
1159 {
1160 IO_CACHE *output_cache;
1161 bool have_checksum;
1162 ha_checksum initial_checksum;
1163 ha_checksum checksum;
1164 uint32 end_log_pos;
1165 THD *thd;
1166
1167 public:
1168 /**
1169 Constructs a new Binlog_event_writer. Should be called once before
1170 starting to flush the transaction or statement cache to the
1171 binlog.
1172
1173 @param output_cache_arg IO_CACHE to write to.
1174 @param thd_arg THD to account written binlog byte statistics to
1175 @param have_checksum_al
1176 */
Binlog_event_writer(IO_CACHE * output_cache_arg,THD * thd_arg)1177 Binlog_event_writer(IO_CACHE *output_cache_arg, THD *thd_arg)
1178 : output_cache(output_cache_arg),
1179 have_checksum(binlog_checksum_options !=
1180 binary_log::BINLOG_CHECKSUM_ALG_OFF),
1181 initial_checksum(my_checksum(0L, NULL, 0)),
1182 checksum(initial_checksum),
1183 end_log_pos(my_b_tell(output_cache)),
1184 thd(thd_arg)
1185 {
1186 // Simulate checksum error
1187 if (DBUG_EVALUATE_IF("fault_injection_crc_value", 1, 0))
1188 checksum--;
1189 }
1190
1191 Event_encrypter event_encrypter;
1192
1193 /**
1194 Write part of an event to disk.
1195
1196 @param buf_p[IN,OUT] Points to buffer with data to write. The
1197 caller must set this initially, and it will be increased by the
1198 number of bytes written.
1199
1200 @param buf_len_p[IN,OUT] Points to the remaining length of the
1201 buffer, i.e., from buf_p to the end of the buffer. The caller
1202 must set this initially, and it will be decreased by the number of
1203 written bytes.
1204
1205 @param event_len_p[IN,OUT] Points to the remaining length of the
1206 event, i.e., the size of the event minus what was already written.
1207 This must be initialized to zero by the caller, must be remembered
1208 by the caller between calls, and is updated by this function: when
1209 an event begins it is set to the length of the event, and for each
1210 call it is decreased by the number of written bytes.
1211
1212 It is allowed that buf_len_p is less than event_len_p (i.e., event
1213 is only partial) and that event_len_p is less than buf_len_p
1214 (i.e., there is more than this event in the buffer). This
1215 function will write as much as is available of one event, but
1216 never more than one. It is required that buf_len_p >=
1217 LOG_EVENT_HEADER_LEN.
1218
1219 @retval true Error, i.e., my_b_write failed.
1220 @retval false Success.
1221 */
write_event_part(uchar ** buf_p,uint32 * buf_len_p,uint32 * event_len_p)1222 bool write_event_part(uchar **buf_p, uint32 *buf_len_p, uint32 *event_len_p)
1223 {
1224 DBUG_ENTER("Binlog_event_writer::write_event_part");
1225
1226 if (*buf_len_p == 0)
1227 DBUG_RETURN(false);
1228
1229 size_t len= *event_len_p;
1230 uchar *pos= *buf_p;
1231
1232 bool is_header= (*event_len_p == 0);
1233
1234 // This is the beginning of an event
1235 if (*event_len_p == 0)
1236 {
1237 // Caller must ensure that the first part of the event contains
1238 // a full event header.
1239 assert(*buf_len_p >= LOG_EVENT_HEADER_LEN);
1240
1241 // Read event length
1242 *event_len_p= uint4korr(*buf_p + EVENT_LEN_OFFSET);
1243
1244 // Increase end_log_pos
1245 end_log_pos+= *event_len_p;
1246
1247 // Change event length if checksum is enabled
1248 if (have_checksum)
1249 {
1250 int4store(*buf_p + EVENT_LEN_OFFSET,
1251 *event_len_p + BINLOG_CHECKSUM_LEN);
1252 // end_log_pos is shifted by the checksum length
1253 end_log_pos+= BINLOG_CHECKSUM_LEN;
1254 }
1255
1256 // Store end_log_pos
1257 int4store(*buf_p + LOG_POS_OFFSET, end_log_pos);
1258 assert(output_cache == mysql_bin_log.get_log_file());
1259
1260 len= *event_len_p;
1261
1262 if (event_encrypter.is_encryption_enabled())
1263 {
1264 uint32 write_bytes= std::min<uint32>(*buf_len_p, *event_len_p);
1265 len= write_bytes;
1266 assert(write_bytes > 0);
1267
1268 // update the checksum
1269 if (have_checksum)
1270 checksum= my_checksum(checksum, *buf_p, write_bytes);
1271
1272 if (event_encrypter.init(output_cache, pos, len))
1273 DBUG_RETURN(true);
1274 }
1275 }
1276
1277 // write the buffer
1278 uint32 write_bytes= std::min<uint32>(*buf_len_p, len);
1279 assert(write_bytes > 0);
1280 if (event_encrypter.encrypt_and_write(output_cache, pos, write_bytes))
1281 DBUG_RETURN(true);
1282
1283 if (event_encrypter.is_encryption_enabled() && is_header)
1284 write_bytes+=4;
1285 else if (have_checksum)
1286 checksum= my_checksum(checksum, *buf_p, write_bytes);
1287
1288 // Step positions.
1289 *buf_p+= write_bytes;
1290 *buf_len_p-= write_bytes;
1291 *event_len_p-= write_bytes;
1292 thd->binlog_bytes_written+= write_bytes;
1293
1294 if (*event_len_p == 0)
1295 {
1296 // store checksum
1297 if (have_checksum)
1298 {
1299 uchar checksum_buf[BINLOG_CHECKSUM_LEN];
1300 int4store(checksum_buf, checksum);
1301 if (event_encrypter.encrypt_and_write(output_cache, checksum_buf, BINLOG_CHECKSUM_LEN))
1302 DBUG_RETURN(true);
1303 thd->binlog_bytes_written+= BINLOG_CHECKSUM_LEN;
1304 checksum= initial_checksum;
1305 }
1306 if (event_encrypter.is_encryption_enabled() && event_encrypter.finish(output_cache))
1307 DBUG_RETURN(true);
1308 }
1309
1310 DBUG_RETURN(false);
1311 }
1312
1313 /**
1314 Write a full event to disk.
1315
1316 This is a wrapper around write_event_part, which handles the
1317 special case where you have a complete event in the buffer.
1318
1319 @param buf Buffer to write.
1320 @param buf_len Number of bytes to write.
1321
1322 @retval true Error, i.e., my_b_write failed.
1323 @retval false Success.
1324 */
write_full_event(uchar * buf,uint32 buf_len)1325 bool write_full_event(uchar *buf, uint32 buf_len)
1326 {
1327 uint32 event_len_unused= 0;
1328 bool ret= write_event_part(&buf, &buf_len, &event_len_unused);
1329 assert(buf_len == 0);
1330 assert(event_len_unused == 0);
1331 return ret;
1332 }
1333
1334 };
1335
1336
1337 /*
1338 this function is mostly a placeholder.
1339 conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open)
1340 should be moved here.
1341 */
1342
binlog_init(void * p)1343 static int binlog_init(void *p)
1344 {
1345 binlog_hton= (handlerton *)p;
1346 binlog_hton->state=opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO;
1347 binlog_hton->db_type=DB_TYPE_BINLOG;
1348 binlog_hton->savepoint_offset= sizeof(my_off_t);
1349 binlog_hton->close_connection= binlog_close_connection;
1350 binlog_hton->savepoint_set= binlog_savepoint_set;
1351 binlog_hton->savepoint_rollback= binlog_savepoint_rollback;
1352 binlog_hton->savepoint_rollback_can_release_mdl=
1353 binlog_savepoint_rollback_can_release_mdl;
1354 binlog_hton->commit= binlog_commit;
1355 binlog_hton->commit_by_xid= binlog_xa_commit;
1356 binlog_hton->rollback= binlog_rollback;
1357 binlog_hton->rollback_by_xid= binlog_xa_rollback;
1358 binlog_hton->prepare= binlog_prepare;
1359 binlog_hton->start_consistent_snapshot= binlog_start_consistent_snapshot;
1360 binlog_hton->clone_consistent_snapshot= binlog_clone_consistent_snapshot;
1361 binlog_hton->recover=binlog_dummy_recover;
1362 binlog_hton->flags= HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
1363 return 0;
1364 }
1365
1366
binlog_deinit(void * p)1367 static int binlog_deinit(void *p)
1368 {
1369 /* Using binlog as TC after the binlog has been unloaded, won't work */
1370 if (tc_log == &mysql_bin_log)
1371 tc_log= NULL;
1372 binlog_hton= NULL;
1373 return 0;
1374 }
1375
1376
binlog_close_connection(handlerton * hton,THD * thd)1377 static int binlog_close_connection(handlerton *hton, THD *thd)
1378 {
1379 DBUG_ENTER("binlog_close_connection");
1380 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
1381 assert(cache_mngr->is_binlog_empty());
1382 DBUG_PRINT("debug", ("Set ha_data slot %d to 0x%llx", binlog_hton->slot, (ulonglong) NULL));
1383 thd_set_ha_data(thd, binlog_hton, NULL);
1384 cache_mngr->~binlog_cache_mngr();
1385 my_free(cache_mngr);
1386 DBUG_RETURN(0);
1387 }
1388
write_event(THD * thd,Log_event * ev)1389 int binlog_cache_data::write_event(THD *thd, Log_event *ev)
1390 {
1391 DBUG_ENTER("binlog_cache_data::write_event");
1392
1393 if (ev != NULL)
1394 {
1395 DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
1396 {DBUG_SET("+d,simulate_file_write_error");});
1397
1398 DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
1399 {
1400 static int count= -1;
1401 count++;
1402 if(count %4 == 3 && ev->get_type_code() ==
1403 binary_log::WRITE_ROWS_EVENT)
1404 DBUG_SET("+d,simulate_temp_file_write_error");
1405 });
1406 if (ev->write(&cache_log) != 0)
1407 {
1408 DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
1409 {
1410 DBUG_SET("-d,simulate_file_write_error");
1411 DBUG_SET("-d,simulate_disk_full_at_flush_pending");
1412 /*
1413 after +d,simulate_file_write_error the local cache
1414 is in unsane state. Since -d,simulate_file_write_error
1415 revokes the first simulation do_write_cache()
1416 can't be run without facing an assert.
1417 So it's blocked with the following 2nd simulation:
1418 */
1419 DBUG_SET("+d,simulate_do_write_cache_failure");
1420 });
1421
1422 DBUG_EXECUTE_IF("simulate_temp_file_write_error",
1423 {
1424 DBUG_SET("-d,simulate_temp_file_write_error");
1425 });
1426 /*
1427 If the flush has failed due to ENOSPC error, set the
1428 flush_error flag.
1429 */
1430 if (thd->is_error() && my_errno() == ENOSPC)
1431 {
1432 set_flush_error(thd);
1433 }
1434 DBUG_RETURN(1);
1435 }
1436 if (ev->get_type_code() == binary_log::XID_EVENT)
1437 flags.with_xid= true;
1438 if (ev->is_using_immediate_logging())
1439 flags.immediate= true;
1440 /* With respect to the event type being written */
1441 if (ev->is_sbr_logging_format())
1442 flags.with_sbr= true;
1443 if (ev->is_rbr_logging_format())
1444 flags.with_rbr= true;
1445 #ifndef EMBEDDED_LIBRARY
1446 /* With respect to empty transactions */
1447 if (ev->starts_group())
1448 flags.with_start= true;
1449 if (ev->ends_group())
1450 flags.with_end= true;
1451 if ((!ev->starts_group() && !ev->ends_group())
1452 ||ev->get_type_code() == binary_log::VIEW_CHANGE_EVENT)
1453 flags.with_content= true;
1454 #endif
1455 }
1456 DBUG_RETURN(0);
1457 }
1458
assign_automatic_gtids_to_flush_group(THD * first_seen)1459 bool MYSQL_BIN_LOG::assign_automatic_gtids_to_flush_group(THD *first_seen)
1460 {
1461 DBUG_ENTER("MYSQL_BIN_LOG::assign_automatic_gtids_to_flush_group");
1462 bool error= false;
1463 bool is_global_sid_locked= false;
1464 rpl_sidno locked_sidno= 0;
1465
1466 for (THD *head= first_seen ; head ; head = head->next_to_commit)
1467 {
1468 assert(head->variables.gtid_next.type != UNDEFINED_GROUP);
1469
1470 /* Generate GTID */
1471 if (head->variables.gtid_next.type == AUTOMATIC_GROUP)
1472 {
1473 if (!is_global_sid_locked)
1474 {
1475 global_sid_lock->rdlock();
1476 is_global_sid_locked= true;
1477 }
1478 if (gtid_state->generate_automatic_gtid(head,
1479 head->get_transaction()->get_rpl_transaction_ctx()->get_sidno(),
1480 head->get_transaction()->get_rpl_transaction_ctx()->get_gno(),
1481 &locked_sidno)
1482 != RETURN_STATUS_OK)
1483 {
1484 head->commit_error= THD::CE_FLUSH_GNO_EXHAUSTED_ERROR;
1485 error= true;
1486 }
1487 }
1488 else
1489 {
1490 DBUG_PRINT("info", ("thd->variables.gtid_next.type=%d "
1491 "thd->owned_gtid.sidno=%d",
1492 head->variables.gtid_next.type,
1493 head->owned_gtid.sidno));
1494 if (head->variables.gtid_next.type == GTID_GROUP)
1495 assert(head->owned_gtid.sidno > 0);
1496 else
1497 {
1498 assert(head->variables.gtid_next.type == ANONYMOUS_GROUP);
1499 assert(head->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS);
1500 }
1501 }
1502 }
1503
1504 if (locked_sidno > 0)
1505 gtid_state->unlock_sidno(locked_sidno);
1506
1507 if (is_global_sid_locked)
1508 global_sid_lock->unlock();
1509
1510 DBUG_RETURN(error);
1511 }
1512
1513
1514 /**
1515 Write the Gtid_log_event to the binary log (prior to writing the
1516 statement or transaction cache).
1517
1518 @param thd Thread that is committing.
1519 @param cache_data The cache that is flushing.
1520 @param writer The event will be written to this Binlog_event_writer object.
1521
1522 @retval false Success.
1523 @retval true Error.
1524 */
write_gtid(THD * thd,binlog_cache_data * cache_data,Binlog_event_writer * writer)1525 bool MYSQL_BIN_LOG::write_gtid(THD *thd, binlog_cache_data *cache_data,
1526 Binlog_event_writer *writer)
1527 {
1528 DBUG_ENTER("MYSQL_BIN_LOG::write_gtid");
1529
1530 /*
1531 The GTID for the THD was assigned at
1532 assign_automatic_gtids_to_flush_group()
1533 */
1534 assert(thd->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS ||
1535 thd->owned_gtid.sidno > 0);
1536
1537 int64 sequence_number, last_committed;
1538 /* Generate logical timestamps for MTS */
1539 m_dependency_tracker.get_dependency(thd, sequence_number, last_committed);
1540
1541 /*
1542 In case both the transaction cache and the statement cache are
1543 non-empty, both will be flushed in sequence and logged as
1544 different transactions. Then the second transaction must only
1545 be executed after the first one has committed. Therefore, we
1546 need to set last_committed for the second transaction equal to
1547 last_committed for the first transaction. This is done in
1548 binlog_cache_data::flush. binlog_cache_data::flush uses the
1549 condition trn_ctx->last_committed==SEQ_UNINIT to detect this
1550 situation, hence the need to set it here.
1551 */
1552 thd->get_transaction()->last_committed= SEQ_UNINIT;
1553
1554
1555 /*
1556 Generate and write the Gtid_log_event.
1557 */
1558 Gtid_log_event gtid_event(thd, cache_data->is_trx_cache(),
1559 last_committed, sequence_number,
1560 cache_data->may_have_sbr_stmts());
1561 uchar buf[Gtid_log_event::MAX_EVENT_LENGTH];
1562 uint32 buf_len= gtid_event.write_to_memory(buf);
1563 bool ret= writer->write_full_event(buf, buf_len);
1564
1565 DBUG_RETURN(ret);
1566 }
1567
1568
gtid_end_transaction(THD * thd)1569 int MYSQL_BIN_LOG::gtid_end_transaction(THD *thd)
1570 {
1571 DBUG_ENTER("MYSQL_BIN_LOG::gtid_end_transaction");
1572
1573 DBUG_PRINT("info", ("query=%s", thd->query().str));
1574
1575 if (thd->owned_gtid.sidno > 0)
1576 {
1577 assert(thd->variables.gtid_next.type == GTID_GROUP);
1578
1579 if (!opt_bin_log || (thd->slave_thread && !opt_log_slave_updates))
1580 {
1581 /*
1582 If the binary log is disabled for this thread (either by
1583 log_bin=0 or sql_log_bin=0 or by log_slave_updates=0 for a
1584 slave thread), then the statement must not be written to the
1585 binary log. In this case, we just save the GTID into the
1586 table directly.
1587
1588 (This only happens for DDL, since DML will save the GTID into
1589 table and release ownership inside ha_commit_trans.)
1590 */
1591 if (gtid_state->save(thd) != 0)
1592 {
1593 gtid_state->update_on_rollback(thd);
1594 DBUG_RETURN(1);
1595 }
1596 else
1597 gtid_state->update_on_commit(thd);
1598 }
1599 else
1600 {
1601 /*
1602 If statement is supposed to be written to binlog, we write it
1603 to the binary log. Inserting into table and releasing
1604 ownership will be done in the binlog commit handler.
1605 */
1606
1607 /*
1608 thd->cache_mngr may be uninitialized if the first transaction
1609 executed by the client is empty.
1610 */
1611 if (thd->binlog_setup_trx_data())
1612 DBUG_RETURN(1);
1613 binlog_cache_data *cache_data= &thd_get_cache_mngr(thd)->trx_cache;
1614
1615 // Generate BEGIN event
1616 Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), TRUE,
1617 FALSE, TRUE, 0, TRUE);
1618 assert(!qinfo.is_using_immediate_logging());
1619
1620 /*
1621 Write BEGIN event and then commit (which will generate commit
1622 event and Gtid_log_event)
1623 */
1624 DBUG_PRINT("debug", ("Writing to trx_cache"));
1625 if (cache_data->write_event(thd, &qinfo) ||
1626 mysql_bin_log.commit(thd, true))
1627 DBUG_RETURN(1);
1628 }
1629 }
1630 else if (thd->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS ||
1631 /*
1632 A transaction with an empty owned gtid should call
1633 end_gtid_violating_transaction(...) to clear the
1634 flag thd->has_gtid_consistency_violatoin in case
1635 it is set. It missed the clear in ordered_commit,
1636 because its binlog transaction cache is empty.
1637 */
1638 thd->has_gtid_consistency_violation)
1639
1640 {
1641 gtid_state->update_on_commit(thd);
1642 }
1643 else if (thd->variables.gtid_next.type == GTID_GROUP &&
1644 thd->owned_gtid.is_empty())
1645 {
1646 assert(thd->has_gtid_consistency_violation == false);
1647 gtid_state->update_on_commit(thd);
1648 }
1649
1650 DBUG_RETURN(0);
1651 }
1652
1653 /**
1654 This function finalizes the cache preparing for commit or rollback.
1655
1656 The function just writes all the necessary events to the cache but
1657 does not flush the data to the binary log file. That is the role of
1658 the binlog_cache_data::flush function.
1659
1660 @see binlog_cache_data::flush
1661
1662 @param thd The thread whose transaction should be flushed
1663 @param cache_data Pointer to the cache
1664 @param end_ev The end event either commit/rollback
1665
1666 @return
1667 nonzero if an error pops up when flushing the cache.
1668 */
1669 int
finalize(THD * thd,Log_event * end_event)1670 binlog_cache_data::finalize(THD *thd, Log_event *end_event)
1671 {
1672 DBUG_ENTER("binlog_cache_data::finalize");
1673 if (!is_binlog_empty())
1674 {
1675 assert(!flags.finalized);
1676 if (int error= flush_pending_event(thd))
1677 DBUG_RETURN(error);
1678 if (int error= write_event(thd, end_event))
1679 DBUG_RETURN(error);
1680 flags.finalized= true;
1681 DBUG_PRINT("debug", ("flags.finalized: %s", YESNO(flags.finalized)));
1682 }
1683 DBUG_RETURN(0);
1684 }
1685
1686
1687 /**
1688 The method writes XA END query to XA-prepared transaction's cache
1689 and calls the "basic" finalize().
1690
1691 @return error code, 0 success
1692 */
1693
finalize(THD * thd,Log_event * end_event,XID_STATE * xs)1694 int binlog_cache_data::finalize(THD *thd, Log_event *end_event, XID_STATE *xs)
1695 {
1696 int error= 0;
1697 char buf[XID::ser_buf_size];
1698 char query[sizeof("XA END") + 1 + sizeof(buf)];
1699 int qlen= sprintf(query, "XA END %s", xs->get_xid()->serialize(buf));
1700 Query_log_event qev(thd, query, qlen, true, false, true, 0);
1701
1702 if ((error= write_event(thd, &qev)))
1703 return error;
1704
1705 return finalize(thd, end_event);
1706 }
1707
1708
1709 /**
1710 Flush caches to the binary log.
1711
1712 If the cache is finalized, the cache will be flushed to the binary
1713 log file. If the cache is not finalized, nothing will be done.
1714
1715 If flushing fails for any reason, an error will be reported and the
1716 cache will be reset. Flushing can fail in two circumstances:
1717
1718 - It was not possible to write the cache to the file. In this case,
1719 it does not make sense to keep the cache.
1720
1721 - The cache was successfully written to disk but post-flush actions
1722 (such as binary log rotation) failed. In this case, the cache is
1723 already written to disk and there is no reason to keep it.
1724
1725 @see binlog_cache_data::finalize
1726 */
1727 int
flush(THD * thd,my_off_t * bytes_written,bool * wrote_xid)1728 binlog_cache_data::flush(THD *thd, my_off_t *bytes_written, bool *wrote_xid)
1729 {
1730 /*
1731 Doing a commit or a rollback including non-transactional tables,
1732 i.e., ending a transaction where we might write the transaction
1733 cache to the binary log.
1734
1735 We can always end the statement when ending a transaction since
1736 transactions are not allowed inside stored functions. If they
1737 were, we would have to ensure that we're not ending a statement
1738 inside a stored function.
1739 */
1740 DBUG_ENTER("binlog_cache_data::flush");
1741 DBUG_PRINT("debug", ("flags.finalized: %s", YESNO(flags.finalized)));
1742 int error= 0;
1743 if (flags.finalized)
1744 {
1745 my_off_t bytes_in_cache= my_b_tell(&cache_log);
1746 Transaction_ctx *trn_ctx= thd->get_transaction();
1747
1748 DBUG_PRINT("debug", ("bytes_in_cache: %llu", bytes_in_cache));
1749
1750 trn_ctx->sequence_number= mysql_bin_log.m_dependency_tracker.step();
1751 /*
1752 In case of two caches the transaction is split into two groups.
1753 The 2nd group is considered to be a successor of the 1st rather
1754 than to have a common commit parent with it.
1755 Notice that due to a simple method of detection that the current is
1756 the 2nd cache being flushed, the very first few transactions may be logged
1757 sequentially (a next one is tagged as if a preceding one is its
1758 commit parent).
1759 */
1760 if (trn_ctx->last_committed == SEQ_UNINIT)
1761 trn_ctx->last_committed= trn_ctx->sequence_number - 1;
1762
1763 /*
1764 The GTID is written prior to flushing the statement cache, if
1765 the transaction has written to the statement cache; and prior to
1766 flushing the transaction cache if the transaction has written to
1767 the transaction cache. If GTIDs are enabled, then transactional
1768 and non-transactional updates cannot be mixed, so at most one of
1769 the caches can be non-empty, so just one GTID will be
1770 generated. If GTIDs are disabled, then no GTID is generated at
1771 all; if both the transactional cache and the statement cache are
1772 non-empty then we get two Anonymous_gtid_log_events, which is
1773 correct.
1774 */
1775 Binlog_event_writer writer(mysql_bin_log.get_log_file(), thd);
1776
1777 if (mysql_bin_log.get_crypto_data()->is_enabled())
1778 writer.event_encrypter.enable_encryption(mysql_bin_log.get_crypto_data());
1779
1780 /* The GTID ownership process might set the commit_error */
1781 error= (thd->commit_error == THD::CE_FLUSH_ERROR ||
1782 thd->commit_error == THD::CE_FLUSH_GNO_EXHAUSTED_ERROR);
1783
1784 DBUG_EXECUTE_IF("simulate_binlog_flush_error",
1785 {
1786 if (rand() % 3 == 0)
1787 {
1788 thd->commit_error= THD::CE_FLUSH_ERROR;
1789 }
1790 };);
1791
1792 if (!error)
1793 if ((error= mysql_bin_log.write_gtid(thd, this, &writer)))
1794 thd->commit_error= THD::CE_FLUSH_ERROR;
1795 if (!error)
1796 error= mysql_bin_log.write_cache(thd, this, &writer);
1797
1798 if (flags.with_xid && error == 0)
1799 *wrote_xid= true;
1800
1801 /*
1802 Reset have to be after the if above, since it clears the
1803 with_xid flag
1804 */
1805 reset();
1806 if (bytes_written)
1807 *bytes_written= bytes_in_cache;
1808 }
1809 assert(!flags.finalized);
1810 DBUG_RETURN(error);
1811 }
1812
1813 /**
1814 This function truncates the transactional cache upon committing or rolling
1815 back either a transaction or a statement.
1816
1817 @param thd The thread whose transaction should be flushed
1818 @param cache_mngr Pointer to the cache data to be flushed
1819 @param all @c true means truncate the transaction, otherwise the
1820 statement must be truncated.
1821
1822 @return
1823 nonzero if an error pops up when truncating the transactional cache.
1824 */
1825 int
truncate(THD * thd,bool all)1826 binlog_trx_cache_data::truncate(THD *thd, bool all)
1827 {
1828 DBUG_ENTER("binlog_trx_cache_data::truncate");
1829 int error=0;
1830
1831 DBUG_PRINT("info", ("thd->options={ %s %s}, transaction: %s",
1832 FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
1833 FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
1834 all ? "all" : "stmt"));
1835
1836 remove_pending_event();
1837
1838 /*
1839 If rolling back an entire transaction or a single statement not
1840 inside a transaction, we reset the transaction cache.
1841 */
1842 if (ending_trans(thd, all))
1843 {
1844 if (has_incident())
1845 {
1846 const char* err_msg= "Error happend while resetting the transaction "
1847 "cache for a rolled back transaction or a single "
1848 "statement not inside a transaction.";
1849 error= mysql_bin_log.write_incident(thd, true/*need_lock_log=true*/,
1850 err_msg);
1851 }
1852 reset();
1853 }
1854 /*
1855 If rolling back a statement in a transaction, we truncate the
1856 transaction cache to remove the statement.
1857 */
1858 else if (get_prev_position() != MY_OFF_T_UNDEF)
1859 restore_prev_position();
1860
1861 thd->clear_binlog_table_maps();
1862
1863 DBUG_RETURN(error);
1864 }
1865
1866
get_xa_opt(THD * thd)1867 inline enum xa_option_words get_xa_opt(THD *thd)
1868 {
1869 enum xa_option_words xa_opt= XA_NONE;
1870 switch(thd->lex->sql_command)
1871 {
1872 case SQLCOM_XA_COMMIT:
1873 xa_opt= static_cast<Sql_cmd_xa_commit*>(thd->lex->m_sql_cmd)->get_xa_opt();
1874 break;
1875 default:
1876 break;
1877 }
1878
1879 return xa_opt;
1880 }
1881
1882
1883 /**
1884 Predicate function yields true when XA transaction is
1885 being logged having a proper state ready for prepare or
1886 commit in one phase.
1887
1888 @param thd THD pointer of running transaction
1889 @return true When the being prepared transaction should be binlogged,
1890 false otherwise.
1891 */
1892
is_loggable_xa_prepare(THD * thd)1893 inline bool is_loggable_xa_prepare(THD *thd)
1894 {
1895 /*
1896 simulate_commit_failure is doing a trick with XID_STATE while
1897 the ongoing transaction is not XA, and therefore to be errored out,
1898 asserted below. In that case because of the
1899 latter fact the function returns @c false.
1900 */
1901 DBUG_EXECUTE_IF("simulate_commit_failure",
1902 {
1903 XID_STATE *xs= thd->get_transaction()->xid_state();
1904 assert((thd->is_error() &&
1905 xs->get_state() == XID_STATE::XA_IDLE) ||
1906 xs->get_state() == XID_STATE::XA_NOTR);
1907 });
1908
1909 return DBUG_EVALUATE_IF("simulate_commit_failure",
1910 false,
1911 thd->get_transaction()->xid_state()->
1912 has_state(XID_STATE::XA_IDLE));
1913 }
1914
binlog_prepare(handlerton * hton,THD * thd,bool all)1915 static int binlog_prepare(handlerton *hton, THD *thd, bool all)
1916 {
1917 DBUG_ENTER("binlog_prepare");
1918 if (!all)
1919 {
1920 thd->get_transaction()->store_commit_parent(mysql_bin_log.
1921 m_dependency_tracker.get_max_committed_timestamp());
1922
1923 }
1924
1925 DBUG_RETURN(all && is_loggable_xa_prepare(thd) ?
1926 mysql_bin_log.commit(thd, true) : 0);
1927 }
1928
1929
1930 /**
1931 Logging XA commit/rollback of a prepared transaction.
1932
1933 The function is called at XA-commit or XA-rollback logging via
1934 two paths: the recovered-or-slave-applier or immediately through
1935 the XA-prepared transaction connection itself.
1936 It fills in appropiate event in the statement cache whenever
1937 xid state is marked with is_binlogged() flag that indicates
1938 the prepared part of the transaction must've been logged.
1939
1940 About early returns from the function.
1941 In the recovered-or-slave-applier case the function may be called
1942 for the 2nd time, which has_logged_xid monitors.
1943 ONE_PHASE option to XA-COMMIT is handled to skip
1944 writing XA-commit event now.
1945 And the final early return check is for the read-only XA that is
1946 not to be logged.
1947
1948 @param thd THD handle
1949 @param xid a pointer to XID object that is serialized
1950 @param commit when @c true XA-COMMIT is to be logged,
1951 and @c false when it's XA-ROLLBACK.
1952 @return error code, 0 success
1953 */
1954
do_binlog_xa_commit_rollback(THD * thd,XID * xid,bool commit)1955 inline int do_binlog_xa_commit_rollback(THD *thd, XID *xid, bool commit)
1956 {
1957 assert(thd->lex->sql_command == SQLCOM_XA_COMMIT ||
1958 thd->lex->sql_command == SQLCOM_XA_ROLLBACK);
1959
1960 XID_STATE *xid_state= thd->get_transaction()->xid_state();
1961 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
1962
1963 if (cache_mngr != NULL && cache_mngr->has_logged_xid)
1964 return 0;
1965
1966 if (get_xa_opt(thd) == XA_ONE_PHASE)
1967 return 0;
1968 if (!xid_state->is_binlogged())
1969 return 0; // nothing was really logged at prepare
1970 if (thd->is_error() && DBUG_EVALUATE_IF("simulate_xa_rm_error", 0, 1))
1971 return 0; // don't binlog if there are some errors.
1972
1973 assert(!xid->is_null() ||
1974 !(thd->variables.option_bits & OPTION_BIN_LOG));
1975
1976 char buf[XID::ser_buf_size];
1977 char query[(sizeof("XA ROLLBACK")) + 1 + sizeof(buf)];
1978 int qlen= sprintf(query, "XA %s %s", commit ? "COMMIT" : "ROLLBACK",
1979 xid->serialize(buf));
1980 Query_log_event qinfo(thd, query, qlen, false, true, true, 0, false);
1981 return mysql_bin_log.write_event(&qinfo);
1982 }
1983
binlog_start_consistent_snapshot(handlerton * hton,THD * thd)1984 static int binlog_start_consistent_snapshot(handlerton *hton, THD *thd)
1985 {
1986 int err= 0;
1987 LOG_INFO li;
1988 DBUG_ENTER("binlog_start_consistent_snapshot");
1989
1990 if ((err= thd->binlog_setup_trx_data()))
1991 DBUG_RETURN(err);
1992
1993 binlog_cache_mngr * const cache_mngr= thd_get_cache_mngr(thd);
1994
1995 /* Server layer calls us with LOCK_log locked, so this is safe. */
1996 mysql_bin_log.raw_get_current_log(&cache_mngr->binlog_info);
1997 gtid_state->get_snapshot_gtid_executed(cache_mngr->snapshot_gtid_executed);
1998
1999 trans_register_ha(thd, true, hton, NULL);
2000
2001 DBUG_RETURN(err);
2002 }
2003
binlog_clone_consistent_snapshot(handlerton * hton,THD * thd,THD * from_thd)2004 static int binlog_clone_consistent_snapshot(handlerton *hton, THD *thd,
2005 THD *from_thd)
2006 {
2007 binlog_cache_mngr *from_cache_mngr;
2008 binlog_cache_mngr *cache_mngr;
2009 int err= 0;
2010 char log_file_name[FN_REFLEN];
2011 my_off_t pos;
2012
2013 DBUG_ENTER("binlog_start_consistent_snapshot");
2014
2015 from_cache_mngr= opt_bin_log ?
2016 (binlog_cache_mngr *) thd_get_cache_mngr(from_thd) : NULL;
2017
2018 if (from_cache_mngr == NULL)
2019 {
2020 push_warning_printf(thd, Sql_condition::SL_WARNING,
2021 HA_ERR_UNSUPPORTED,
2022 "WITH CONSISTENT SNAPSHOT FROM SESSION was ignored for "
2023 "binary log, because the specified session does not "
2024 "have a consistent snapshot of binary log "
2025 "coordinates.");
2026 DBUG_RETURN(0);
2027 }
2028
2029 if ((err= thd->binlog_setup_trx_data()))
2030 DBUG_RETURN(err);
2031
2032 cache_mngr= thd_get_cache_mngr(thd);
2033
2034 pos= from_cache_mngr->binlog_info.pos;
2035 strmake(log_file_name, from_cache_mngr->binlog_info.log_file_name,
2036 sizeof(log_file_name) - 1);
2037
2038 mysql_mutex_lock(&thd->LOCK_thd_data);
2039
2040 cache_mngr->snapshot_gtid_executed= from_cache_mngr->snapshot_gtid_executed;
2041 cache_mngr->binlog_info.pos = pos;
2042 strmake(cache_mngr->binlog_info.log_file_name, log_file_name,
2043 sizeof(cache_mngr->binlog_info.log_file_name) - 1);
2044
2045 mysql_mutex_unlock(&thd->LOCK_thd_data);
2046
2047 trans_register_ha(thd, true, hton, NULL);
2048
2049 DBUG_RETURN(err);
2050 }
2051
2052
2053 /**
2054 Logging XA commit/rollback of a prepared transaction in the case
2055 it was disconnected and resumed (recovered), or executed by a slave applier.
2056
2057 @param thd THD handle
2058 @param xid a pointer to XID object
2059 @param commit when @c true XA-COMMIT is logged, otherwise XA-ROLLBACK
2060
2061 @return error code, 0 success
2062 */
2063
binlog_xa_commit_or_rollback(THD * thd,XID * xid,bool commit)2064 inline int binlog_xa_commit_or_rollback(THD *thd, XID *xid, bool commit)
2065 {
2066 int error= 0;
2067
2068 #ifndef NDEBUG
2069 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
2070 assert(!cache_mngr || !cache_mngr->has_logged_xid);
2071 #endif
2072 if (!(error= do_binlog_xa_commit_rollback(thd, xid, commit)))
2073 {
2074 /*
2075 Error can't be propagated naturally via result.
2076 A grand-caller has to access to it through thd's da.
2077 todo:
2078 Bug #20488921 ERROR PROPAGATION DOES FULLY WORK IN XA
2079 stands in the way of implementing a failure simulation
2080 for XA PREPARE/COMMIT/ROLLBACK.
2081 */
2082 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
2083
2084 if (cache_mngr)
2085 cache_mngr->has_logged_xid= true;
2086 if (commit)
2087 (void) mysql_bin_log.commit(thd, true);
2088 else
2089 (void) mysql_bin_log.rollback(thd, true);
2090 if (cache_mngr)
2091 cache_mngr->has_logged_xid= false;
2092 }
2093 return error;
2094 }
2095
2096
binlog_xa_commit(handlerton * hton,XID * xid)2097 static int binlog_xa_commit(handlerton *hton, XID *xid)
2098 {
2099 (void) binlog_xa_commit_or_rollback(current_thd, xid, true);
2100
2101 return 0;
2102 }
2103
2104
binlog_xa_rollback(handlerton * hton,XID * xid)2105 static int binlog_xa_rollback(handlerton *hton, XID *xid)
2106 {
2107 (void) binlog_xa_commit_or_rollback(current_thd, xid, false);
2108
2109 return 0;
2110 }
2111
2112 /**
2113 When a fatal error occurs due to which binary logging becomes impossible and
2114 the user specified binlog_error_action= ABORT_SERVER the following function is
2115 invoked. This function pushes the appropriate error message to client and logs
2116 the same to server error log and then aborts the server.
2117
2118 @param err_string Error string which specifies the exact error
2119 message from the caller.
2120
2121 @retval
2122 none
2123 */
exec_binlog_error_action_abort(const char * err_string)2124 static void exec_binlog_error_action_abort(const char* err_string)
2125 {
2126 THD *thd= current_thd;
2127 /*
2128 When the code enters here it means that there was an error at higher layer
2129 and my_error function could have been invoked to let the client know what
2130 went wrong during the execution.
2131
2132 But these errors will not let the client know that the server is going to
2133 abort. Even if we add an additional my_error function call at this point
2134 client will be able to see only the first error message that was set
2135 during the very first invocation of my_error function call.
2136
2137 The advantage of having multiple my_error function calls are visible when
2138 the server is up and running and user issues SHOW WARNINGS or SHOW ERROR
2139 calls. In this special scenario server will be immediately aborted and
2140 user will not be able execute the above SHOW commands.
2141
2142 Hence we clear the previous errors and push one critical error message to
2143 clients.
2144 */
2145 if (thd)
2146 {
2147 if (thd->is_error())
2148 thd->clear_error();
2149 /*
2150 Adding ME_ERRORLOG flag will ensure that the error is sent to both
2151 client and to the server error log as well.
2152 */
2153 my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(ME_ERRORLOG + ME_FATALERROR),
2154 err_string);
2155 thd->send_statement_status();
2156 }
2157 else
2158 sql_print_error("%s",err_string);
2159 abort();
2160 }
2161
2162
2163
2164 /**
2165 This function is called once after each statement.
2166
2167 @todo This function is currently not used any more and will
2168 eventually be eliminated. The real commit job is done in the
2169 MYSQL_BIN_LOG::commit function.
2170
2171 @see MYSQL_BIN_LOG::commit
2172
2173 @param hton The binlog handlerton.
2174 @param thd The client thread that executes the transaction.
2175 @param all This is @c true if this is a real transaction commit, and
2176 @false otherwise.
2177
2178 @see handlerton::commit
2179 */
binlog_commit(handlerton * hton,THD * thd,bool all)2180 static int binlog_commit(handlerton *hton, THD *thd, bool all)
2181 {
2182 DBUG_ENTER("binlog_commit");
2183 /*
2184 Nothing to do (any more) on commit.
2185 */
2186 DBUG_RETURN(0);
2187 }
2188
2189 /**
2190 This function is called when a transaction or a statement is rolled back.
2191
2192 @internal It is necessary to execute a rollback here if the
2193 transaction was rolled back because of executing a ROLLBACK TO
2194 SAVEPOINT command, but it is not used for normal rollback since
2195 MYSQL_BIN_LOG::rollback is called in that case.
2196
2197 @todo Refactor code to introduce a <code>MYSQL_BIN_LOG::rollback(THD
2198 *thd, SAVEPOINT *sv)</code> function in @c TC_LOG and have that
2199 function execute the necessary work to rollback to a savepoint.
2200
2201 @param hton The binlog handlerton.
2202 @param thd The client thread that executes the transaction.
2203 @param all This is @c true if this is a real transaction rollback, and
2204 @false otherwise.
2205
2206 @see handlerton::rollback
2207 */
binlog_rollback(handlerton * hton,THD * thd,bool all)2208 static int binlog_rollback(handlerton *hton, THD *thd, bool all)
2209 {
2210 DBUG_ENTER("binlog_rollback");
2211 int error= 0;
2212 if (thd->lex->sql_command == SQLCOM_ROLLBACK_TO_SAVEPOINT)
2213 error= mysql_bin_log.rollback(thd, all);
2214 DBUG_RETURN(error);
2215 }
2216
2217
2218 bool
append(THD * first)2219 Stage_manager::Mutex_queue::append(THD *first)
2220 {
2221 DBUG_ENTER("Stage_manager::Mutex_queue::append");
2222 lock();
2223 DBUG_PRINT("enter", ("first: 0x%llx", (ulonglong) first));
2224 DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2225 (ulonglong) m_first, (ulonglong) &m_first,
2226 (ulonglong) m_last));
2227 int32 count= 1;
2228 bool empty= (m_first == NULL);
2229 *m_last= first;
2230 DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2231 (ulonglong) m_first, (ulonglong) &m_first,
2232 (ulonglong) m_last));
2233 /*
2234 Go to the last THD instance of the list. We expect lists to be
2235 moderately short. If they are not, we need to track the end of
2236 the queue as well.
2237 */
2238
2239 while (first->next_to_commit)
2240 {
2241 count++;
2242 first= first->next_to_commit;
2243 }
2244 my_atomic_add32(&m_size, count);
2245
2246 m_last= &first->next_to_commit;
2247 DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2248 (ulonglong) m_first, (ulonglong) &m_first,
2249 (ulonglong) m_last));
2250 assert(m_first || m_last == &m_first);
2251 DBUG_PRINT("return", ("empty: %s", YESNO(empty)));
2252 unlock();
2253 DBUG_RETURN(empty);
2254 }
2255
2256
2257 std::pair<bool, THD*>
pop_front()2258 Stage_manager::Mutex_queue::pop_front()
2259 {
2260 DBUG_ENTER("Stage_manager::Mutex_queue::pop_front");
2261 lock();
2262 THD *result= m_first;
2263 bool more= true;
2264 /*
2265 We do not set next_to_commit to NULL here since this is only used
2266 in the flush stage. We will have to call fetch_queue last here,
2267 and will then "cut" the linked list by setting the end of that
2268 queue to NULL.
2269 */
2270 if (result)
2271 m_first= result->next_to_commit;
2272 if (m_first == NULL)
2273 {
2274 more= false;
2275 m_last = &m_first;
2276 }
2277 assert(my_atomic_load32(&m_size) > 0);
2278 my_atomic_add32(&m_size, -1);
2279 assert(m_first || m_last == &m_first);
2280 unlock();
2281 DBUG_PRINT("return", ("result: 0x%llx, more: %s",
2282 (ulonglong) result, YESNO(more)));
2283 DBUG_RETURN(std::make_pair(more, result));
2284 }
2285
2286
2287 bool
enroll_for(StageID stage,THD * thd,mysql_mutex_t * stage_mutex)2288 Stage_manager::enroll_for(StageID stage, THD *thd, mysql_mutex_t *stage_mutex)
2289 {
2290 // If the queue was empty: we're the leader for this batch
2291 DBUG_PRINT("debug", ("Enqueue 0x%llx to queue for stage %d",
2292 (ulonglong) thd, stage));
2293 bool leader= m_queue[stage].append(thd);
2294
2295 #ifdef HAVE_REPLICATION
2296 if (stage == FLUSH_STAGE && has_commit_order_manager(thd))
2297 {
2298 Slave_worker *worker= dynamic_cast<Slave_worker *>(thd->rli_slave);
2299 Commit_order_manager *mngr= worker->get_commit_order_manager();
2300
2301 mngr->unregister_trx(worker);
2302 }
2303 #endif
2304
2305 /*
2306 We do not need to unlock the stage_mutex if it is LOCK_log when rotating
2307 binlog caused by logging incident log event, since it should be held
2308 always during rotation.
2309 */
2310 bool need_unlock_stage_mutex=
2311 !(mysql_bin_log.is_rotating_caused_by_incident &&
2312 stage_mutex == mysql_bin_log.get_log_lock());
2313
2314 /*
2315 The stage mutex can be NULL if we are enrolling for the first
2316 stage.
2317 */
2318 if (stage_mutex && need_unlock_stage_mutex)
2319 mysql_mutex_unlock(stage_mutex);
2320
2321 #ifndef NDEBUG
2322 DBUG_PRINT("info", ("This is a leader thread: %d (0=n 1=y)", leader));
2323
2324 DEBUG_SYNC(thd, "after_enrolling_for_stage");
2325
2326 switch (stage)
2327 {
2328 case Stage_manager::FLUSH_STAGE:
2329 DEBUG_SYNC(thd, "bgc_after_enrolling_for_flush_stage");
2330 break;
2331 case Stage_manager::SYNC_STAGE:
2332 DEBUG_SYNC(thd, "bgc_after_enrolling_for_sync_stage");
2333 break;
2334 case Stage_manager::COMMIT_STAGE:
2335 DEBUG_SYNC(thd, "bgc_after_enrolling_for_commit_stage");
2336 break;
2337 default:
2338 // not reached
2339 assert(0);
2340 }
2341
2342 DBUG_EXECUTE_IF("assert_leader", assert(leader););
2343 DBUG_EXECUTE_IF("assert_follower", assert(!leader););
2344 #endif
2345
2346 /*
2347 If the queue was not empty, we're a follower and wait for the
2348 leader to process the queue. If we were holding a mutex, we have
2349 to release it before going to sleep.
2350 */
2351 if (!leader)
2352 {
2353 mysql_mutex_lock(&m_lock_done);
2354 #ifndef NDEBUG
2355 /*
2356 Leader can be awaiting all-clear to preempt follower's execution.
2357 With setting the status the follower ensures it won't execute anything
2358 including thread-specific code.
2359 */
2360 thd->get_transaction()->m_flags.ready_preempt= 1;
2361 if (leader_await_preempt_status)
2362 mysql_cond_signal(&m_cond_preempt);
2363 #endif
2364 while (thd->get_transaction()->m_flags.pending)
2365 mysql_cond_wait(&m_cond_done, &m_lock_done);
2366 mysql_mutex_unlock(&m_lock_done);
2367 }
2368 return leader;
2369 }
2370
2371
fetch_and_empty()2372 THD *Stage_manager::Mutex_queue::fetch_and_empty()
2373 {
2374 DBUG_ENTER("Stage_manager::Mutex_queue::fetch_and_empty");
2375 lock();
2376 DBUG_PRINT("enter", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2377 (ulonglong) m_first, (ulonglong) &m_first,
2378 (ulonglong) m_last));
2379 THD *result= m_first;
2380 m_first= NULL;
2381 m_last= &m_first;
2382 DBUG_PRINT("info", ("m_first: 0x%llx, &m_first: 0x%llx, m_last: 0x%llx",
2383 (ulonglong) m_first, (ulonglong) &m_first,
2384 (ulonglong) m_last));
2385 DBUG_PRINT("info", ("fetched queue of %d transactions", my_atomic_load32(&m_size)));
2386 DBUG_PRINT("return", ("result: 0x%llx", (ulonglong) result));
2387 assert(my_atomic_load32(&m_size) >= 0);
2388 my_atomic_store32(&m_size, 0);
2389 unlock();
2390 DBUG_RETURN(result);
2391 }
2392
wait_count_or_timeout(ulong count,long usec,StageID stage)2393 void Stage_manager::wait_count_or_timeout(ulong count, long usec, StageID stage)
2394 {
2395 long to_wait=
2396 DBUG_EVALUATE_IF("bgc_set_infinite_delay", LONG_MAX, usec);
2397 /*
2398 For testing purposes while waiting for inifinity
2399 to arrive, we keep checking the queue size at regular,
2400 small intervals. Otherwise, waiting 0.1 * infinite
2401 is too long.
2402 */
2403 long delta=
2404 DBUG_EVALUATE_IF("bgc_set_infinite_delay", 100000,
2405 max<long>(1, (to_wait * 0.1)));
2406
2407 while (to_wait > 0 && (count == 0 || static_cast<ulong>(m_queue[stage].get_size()) < count))
2408 {
2409 #ifndef NDEBUG
2410 if (current_thd)
2411 DEBUG_SYNC(current_thd, "bgc_wait_count_or_timeout");
2412 #endif
2413 my_sleep(delta);
2414 to_wait -= delta;
2415 }
2416 }
2417
signal_done(THD * queue)2418 void Stage_manager::signal_done(THD *queue)
2419 {
2420 mysql_mutex_lock(&m_lock_done);
2421 for (THD *thd= queue ; thd ; thd = thd->next_to_commit)
2422 thd->get_transaction()->m_flags.pending= false;
2423 mysql_mutex_unlock(&m_lock_done);
2424 mysql_cond_broadcast(&m_cond_done);
2425 }
2426
2427 #ifndef NDEBUG
clear_preempt_status(THD * head)2428 void Stage_manager::clear_preempt_status(THD *head)
2429 {
2430 assert(head);
2431
2432 mysql_mutex_lock(&m_lock_done);
2433 while(!head->get_transaction()->m_flags.ready_preempt)
2434 {
2435 leader_await_preempt_status= true;
2436 mysql_cond_wait(&m_cond_preempt, &m_lock_done);
2437 }
2438 leader_await_preempt_status= false;
2439 mysql_mutex_unlock(&m_lock_done);
2440 }
2441 #endif
2442
2443 /**
2444 Write a rollback record of the transaction to the binary log.
2445
2446 For binary log group commit, the rollback is separated into three
2447 parts:
2448
2449 1. First part consists of filling the necessary caches and
2450 finalizing them (if they need to be finalized). After a cache is
2451 finalized, nothing can be added to the cache.
2452
2453 2. Second part execute an ordered flush and commit. This will be
2454 done using the group commit functionality in @c ordered_commit.
2455
2456 Since we roll back the transaction early, we call @c
2457 ordered_commit with the @c skip_commit flag set. The @c
2458 ha_commit_low call inside @c ordered_commit will then not be
2459 called.
2460
2461 3. Third part checks any errors resulting from the flush and handles
2462 them appropriately.
2463
2464 @see MYSQL_BIN_LOG::ordered_commit
2465 @see ha_commit_low
2466 @see ha_rollback_low
2467
2468 @param thd Session to commit
2469 @param all This is @c true if this is a real transaction rollback, and
2470 @false otherwise.
2471
2472 @return Error code, or zero if there were no error.
2473 */
2474
rollback(THD * thd,bool all)2475 int MYSQL_BIN_LOG::rollback(THD *thd, bool all)
2476 {
2477 int error= 0;
2478 bool stuff_logged= false;
2479 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
2480
2481 DBUG_ENTER("MYSQL_BIN_LOG::rollback(THD *thd, bool all)");
2482 DBUG_PRINT("enter", ("all: %s, cache_mngr: 0x%llx, thd->is_error: %s",
2483 YESNO(all), (ulonglong) cache_mngr,
2484 YESNO(thd->is_error())));
2485 /*
2486 Defer XA-transaction rollback until its XA-rollback event is recorded.
2487 When we are executing a ROLLBACK TO SAVEPOINT, we
2488 should only clear the caches since this function is called as part
2489 of the engine rollback.
2490 In other cases we roll back the transaction in the engines early
2491 since this will release locks and allow other transactions to
2492 start executing.
2493 */
2494 if (thd->lex->sql_command == SQLCOM_XA_ROLLBACK)
2495 {
2496 XID_STATE *xs= thd->get_transaction()->xid_state();
2497
2498 assert(all || !xs->is_binlogged() ||
2499 (!xs->is_in_recovery() && thd->is_error()));
2500 /*
2501 Whenever cache_mngr is not initialized, the xa prepared
2502 transaction's binary logging status must not be set, unless the
2503 transaction is rolled back through an external connection which
2504 has binlogging switched off.
2505 */
2506 assert(cache_mngr || !xs->is_binlogged()
2507 || !(is_open() && thd->variables.option_bits & OPTION_BIN_LOG));
2508
2509 if ((error= do_binlog_xa_commit_rollback(thd, xs->get_xid(), false)))
2510 goto end;
2511 cache_mngr= thd_get_cache_mngr(thd);
2512 }
2513 else if (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT)
2514 {
2515 /*
2516 Reset binlog_snapshot_% variables for the current connection so that the
2517 current coordinates are shown after committing a consistent snapshot
2518 transaction.
2519 */
2520 if (cache_mngr != NULL)
2521 {
2522 mysql_mutex_lock(&thd->LOCK_thd_data);
2523 cache_mngr->drop_consistent_snapshot();
2524 mysql_mutex_unlock(&thd->LOCK_thd_data);
2525 }
2526
2527 if ((error= ha_rollback_low(thd, all)))
2528 goto end;
2529 }
2530
2531 /*
2532 If there is no cache manager, or if there is nothing in the
2533 caches, there are no caches to roll back, so we're trivially done
2534 unless XA-ROLLBACK that yet to run rollback_low().
2535 */
2536 if (cache_mngr == NULL || cache_mngr->is_binlog_empty())
2537 {
2538 goto end;
2539 }
2540
2541 DBUG_PRINT("debug",
2542 ("all.cannot_safely_rollback(): %s, trx_cache_empty: %s",
2543 YESNO(thd->get_transaction()->cannot_safely_rollback(
2544 Transaction_ctx::SESSION)),
2545 YESNO(cache_mngr->trx_cache.is_binlog_empty())));
2546 DBUG_PRINT("debug",
2547 ("stmt.cannot_safely_rollback(): %s, stmt_cache_empty: %s",
2548 YESNO(thd->get_transaction()->cannot_safely_rollback(
2549 Transaction_ctx::STMT)),
2550 YESNO(cache_mngr->stmt_cache.is_binlog_empty())));
2551
2552 /*
2553 If an incident event is set we do not flush the content of the statement
2554 cache because it may be corrupted.
2555 */
2556 if (cache_mngr->stmt_cache.has_incident())
2557 {
2558 const char* err_msg= "The content of the statement cache is corrupted "
2559 "while writing a rollback record of the transaction "
2560 "to the binary log.";
2561 error= write_incident(thd, true/*need_lock_log=true*/, err_msg);
2562 cache_mngr->stmt_cache.reset();
2563 }
2564 else if (!cache_mngr->stmt_cache.is_binlog_empty())
2565 {
2566 if (thd->lex->sql_command == SQLCOM_CREATE_TABLE &&
2567 thd->lex->select_lex->item_list.elements && /* With select */
2568 !(thd->lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) &&
2569 thd->is_current_stmt_binlog_format_row())
2570 {
2571 /*
2572 In row based binlog format, we reset the binlog statement cache
2573 when rolling back a single statement 'CREATE...SELECT' transaction,
2574 since the 'CREATE TABLE' event was put in the binlog statement cache.
2575 */
2576 cache_mngr->stmt_cache.reset();
2577 }
2578 else
2579 {
2580 if ((error= cache_mngr->stmt_cache.finalize(thd)))
2581 goto end;
2582 stuff_logged= true;
2583 }
2584 }
2585
2586 if (ending_trans(thd, all))
2587 {
2588 if (trans_cannot_safely_rollback(thd))
2589 {
2590 const char xa_rollback_str[]= "XA ROLLBACK";
2591 /*
2592 sizeof(xa_rollback_str) and XID::ser_buf_size both allocate `\0',
2593 so one of the two is used for necessary in the xa case `space' char
2594 */
2595 char query[sizeof(xa_rollback_str) + XID::ser_buf_size]= "ROLLBACK";
2596 XID_STATE *xs= thd->get_transaction()->xid_state();
2597
2598 if (thd->lex->sql_command == SQLCOM_XA_ROLLBACK)
2599 {
2600 /* this block is relevant only for not prepared yet and "local" xa trx */
2601 assert(thd->get_transaction()->xid_state()->
2602 has_state(XID_STATE::XA_IDLE));
2603 assert(!cache_mngr->has_logged_xid);
2604
2605 sprintf(query, "%s ", xa_rollback_str);
2606 xs->get_xid()->serialize(query + sizeof(xa_rollback_str));
2607 }
2608 /*
2609 If the transaction is being rolled back and contains changes that
2610 cannot be rolled back, the trx-cache's content is flushed.
2611 */
2612 Query_log_event
2613 end_evt(thd, query, strlen(query), true, false, true, 0, true);
2614 error= thd->lex->sql_command != SQLCOM_XA_ROLLBACK ?
2615 cache_mngr->trx_cache.finalize(thd, &end_evt) :
2616 cache_mngr->trx_cache.finalize(thd, &end_evt, xs);
2617 stuff_logged= true;
2618 }
2619 else
2620 {
2621 /*
2622 If the transaction is being rolled back and its changes can be
2623 rolled back, the trx-cache's content is truncated.
2624 */
2625 error= cache_mngr->trx_cache.truncate(thd, all);
2626 }
2627 }
2628 else
2629 {
2630 /*
2631 If a statement is being rolled back, it is necessary to know
2632 exactly why a statement may not be safely rolled back as in
2633 some specific situations the trx-cache can be truncated.
2634
2635 If a temporary table is created or dropped, the trx-cache is not
2636 truncated. Note that if the stmt-cache is used, there is nothing
2637 to truncate in the trx-cache.
2638
2639 If a non-transactional table is updated and the binlog format is
2640 statement, the trx-cache is not truncated. The trx-cache is used
2641 when the direct option is off and a transactional table has been
2642 updated before the current statement in the context of the
2643 current transaction. Note that if the stmt-cache is used there is
2644 nothing to truncate in the trx-cache.
2645
2646 If other binlog formats are used, updates to non-transactional
2647 tables are written to the stmt-cache and trx-cache can be safely
2648 truncated, if necessary.
2649 */
2650 if (thd->get_transaction()->has_dropped_temp_table(
2651 Transaction_ctx::STMT) ||
2652 thd->get_transaction()->has_created_temp_table(
2653 Transaction_ctx::STMT) ||
2654 (thd->get_transaction()->has_modified_non_trans_table(
2655 Transaction_ctx::STMT) &&
2656 thd->variables.binlog_format == BINLOG_FORMAT_STMT))
2657 {
2658 /*
2659 If the statement is being rolled back and dropped or created a
2660 temporary table or modified a non-transactional table and the
2661 statement-based replication is in use, the statement's changes
2662 in the trx-cache are preserved.
2663 */
2664 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
2665 }
2666 else
2667 {
2668 /*
2669 Otherwise, the statement's changes in the trx-cache are
2670 truncated.
2671 */
2672 error= cache_mngr->trx_cache.truncate(thd, all);
2673 }
2674 }
2675 if (stuff_logged)
2676 {
2677 Transaction_ctx *trn_ctx= thd->get_transaction();
2678 trn_ctx->store_commit_parent(m_dependency_tracker.get_max_committed_timestamp());
2679 }
2680
2681 DBUG_PRINT("debug", ("error: %d", error));
2682 if (error == 0 && stuff_logged)
2683 {
2684 if (RUN_HOOK(transaction,
2685 before_commit,
2686 (thd, all,
2687 thd_get_cache_mngr(thd)->get_binlog_cache_log(true),
2688 thd_get_cache_mngr(thd)->get_binlog_cache_log(false),
2689 max<my_off_t>(max_binlog_cache_size,
2690 max_binlog_stmt_cache_size))))
2691 {
2692 //Reset the thread OK status before changing the outcome.
2693 if (thd->get_stmt_da()->is_ok())
2694 thd->get_stmt_da()->reset_diagnostics_area();
2695 my_error(ER_RUN_HOOK_ERROR, MYF(0), "before_commit");
2696 DBUG_RETURN(RESULT_ABORTED);
2697 }
2698 #ifndef NDEBUG
2699 /*
2700 XA rollback is always accepted.
2701 */
2702 if (thd->get_transaction()->get_rpl_transaction_ctx()->is_transaction_rollback())
2703 assert(0);
2704 #endif
2705
2706 error= prepare_ordered_commit(thd, all, /* skip_commit */ true);
2707 if (!error)
2708 error= ordered_commit(thd);
2709 }
2710
2711 if (check_write_error(thd))
2712 {
2713 /*
2714 We reach this point if the effect of a statement did not properly get into
2715 a cache and need to be rolled back.
2716 */
2717 error|= cache_mngr->trx_cache.truncate(thd, all);
2718 }
2719
2720 end:
2721 /* Deferred xa rollback to engines */
2722 if (!error && thd->lex->sql_command == SQLCOM_XA_ROLLBACK)
2723 {
2724 error= ha_rollback_low(thd, all);
2725 /* Successful XA-rollback commits the new gtid_state */
2726 if (!error && !thd->is_error())
2727 gtid_state->update_on_commit(thd);
2728 }
2729 /*
2730 When a statement errors out on auto-commit mode it is rollback
2731 implicitly, so the same should happen to its GTID.
2732 */
2733 if (!thd->in_active_multi_stmt_transaction())
2734 gtid_state->update_on_rollback(thd);
2735
2736 /*
2737 TODO: some errors are overwritten, which may cause problem,
2738 fix it later.
2739 */
2740 DBUG_PRINT("return", ("error: %d", error));
2741 DBUG_RETURN(error);
2742 }
2743
2744 /**
2745 @note
2746 How do we handle this (unlikely but legal) case:
2747 @verbatim
2748 [transaction] + [update to non-trans table] + [rollback to savepoint] ?
2749 @endverbatim
2750 The problem occurs when a savepoint is before the update to the
2751 non-transactional table. Then when there's a rollback to the savepoint, if we
2752 simply truncate the binlog cache, we lose the part of the binlog cache where
2753 the update is. If we want to not lose it, we need to write the SAVEPOINT
2754 command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
2755 is easy: it's just write at the end of the binlog cache, but the former
2756 should be *inserted* to the place where the user called SAVEPOINT. The
2757 solution is that when the user calls SAVEPOINT, we write it to the binlog
2758 cache (so no need to later insert it). As transactions are never intermixed
2759 in the binary log (i.e. they are serialized), we won't have conflicts with
2760 savepoint names when using mysqlbinlog or in the slave SQL thread.
2761 Then when ROLLBACK TO SAVEPOINT is called, if we updated some
2762 non-transactional table, we don't truncate the binlog cache but instead write
2763 ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
2764 will chop the SAVEPOINT command from the binlog cache, which is good as in
2765 that case there is no need to have it in the binlog).
2766 */
2767
binlog_savepoint_set(handlerton * hton,THD * thd,void * sv)2768 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv)
2769 {
2770 DBUG_ENTER("binlog_savepoint_set");
2771 int error= 1;
2772
2773 String log_query;
2774 if (log_query.append(STRING_WITH_LEN("SAVEPOINT ")))
2775 DBUG_RETURN(error);
2776 else
2777 append_identifier(thd, &log_query, thd->lex->ident.str,
2778 thd->lex->ident.length);
2779
2780 int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
2781 Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
2782 TRUE, FALSE, TRUE, errcode);
2783 /*
2784 We cannot record the position before writing the statement
2785 because a rollback to a savepoint (.e.g. consider it "S") would
2786 prevent the savepoint statement (i.e. "SAVEPOINT S") from being
2787 written to the binary log despite the fact that the server could
2788 still issue other rollback statements to the same savepoint (i.e.
2789 "S").
2790 Given that the savepoint is valid until the server releases it,
2791 ie, until the transaction commits or it is released explicitly,
2792 we need to log it anyway so that we don't have "ROLLBACK TO S"
2793 or "RELEASE S" without the preceding "SAVEPOINT S" in the binary
2794 log.
2795 */
2796 if (!(error= mysql_bin_log.write_event(&qinfo)))
2797 binlog_trans_log_savepos(thd, (my_off_t*) sv);
2798
2799 DBUG_RETURN(error);
2800 }
2801
binlog_savepoint_rollback(handlerton * hton,THD * thd,void * sv)2802 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv)
2803 {
2804 DBUG_ENTER("binlog_savepoint_rollback");
2805 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
2806 my_off_t pos= *(my_off_t*) sv;
2807 assert(pos != ~(my_off_t) 0);
2808
2809 /*
2810 Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
2811 non-transactional table. Otherwise, truncate the binlog cache starting
2812 from the SAVEPOINT command.
2813 */
2814 if (trans_cannot_safely_rollback(thd))
2815 {
2816 String log_query;
2817 if (log_query.append(STRING_WITH_LEN("ROLLBACK TO ")))
2818 DBUG_RETURN(1);
2819 else
2820 {
2821 /*
2822 Before writing identifier to the binlog, make sure to
2823 quote the identifier properly so as to prevent any SQL
2824 injection on the slave.
2825 */
2826 append_identifier(thd, &log_query, thd->lex->ident.str,
2827 thd->lex->ident.length);
2828 }
2829
2830 int errcode= query_error_code(thd, thd->killed == THD::NOT_KILLED);
2831 Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(),
2832 TRUE, FALSE, TRUE, errcode);
2833 DBUG_RETURN(mysql_bin_log.write_event(&qinfo));
2834 }
2835 // Otherwise, we truncate the cache
2836 cache_mngr->trx_cache.restore_savepoint(pos);
2837 /*
2838 When a SAVEPOINT is executed inside a stored function/trigger we force the
2839 pending event to be flushed with a STMT_END_F flag and clear the table maps
2840 as well to ensure that following DMLs will have a clean state to start
2841 with. ROLLBACK inside a stored routine has to finalize possibly existing
2842 current row-based pending event with cleaning up table maps. That ensures
2843 that following DMLs will have a clean state to start with.
2844 */
2845 if (thd->in_sub_stmt)
2846 thd->clear_binlog_table_maps();
2847 DBUG_RETURN(0);
2848 }
2849
2850 /**
2851 purge logs, master and slave sides both, related error code
2852 convertor.
2853 Called from @c purge_error_message(), @c MYSQL_BIN_LOG::reset_logs()
2854
2855 @param res an error code as used by purging routines
2856
2857 @return the user level error code ER_*
2858 */
purge_log_get_error_code(int res)2859 static uint purge_log_get_error_code(int res)
2860 {
2861 uint errcode= 0;
2862
2863 switch (res) {
2864 case 0: break;
2865 case LOG_INFO_EOF: errcode= ER_UNKNOWN_TARGET_BINLOG; break;
2866 case LOG_INFO_IO: errcode= ER_IO_ERR_LOG_INDEX_READ; break;
2867 case LOG_INFO_INVALID:errcode= ER_BINLOG_PURGE_PROHIBITED; break;
2868 case LOG_INFO_SEEK: errcode= ER_FSEEK_FAIL; break;
2869 case LOG_INFO_MEM: errcode= ER_OUT_OF_RESOURCES; break;
2870 case LOG_INFO_FATAL: errcode= ER_BINLOG_PURGE_FATAL_ERR; break;
2871 case LOG_INFO_IN_USE: errcode= ER_LOG_IN_USE; break;
2872 case LOG_INFO_EMFILE: errcode= ER_BINLOG_PURGE_EMFILE; break;
2873 default: errcode= ER_LOG_PURGE_UNKNOWN_ERR; break;
2874 }
2875
2876 return errcode;
2877 }
2878
2879 /**
2880 Check whether binlog state allows to safely release MDL locks after
2881 rollback to savepoint.
2882
2883 @param hton The binlog handlerton.
2884 @param thd The client thread that executes the transaction.
2885
2886 @return true - It is safe to release MDL locks.
2887 false - If it is not.
2888 */
binlog_savepoint_rollback_can_release_mdl(handlerton * hton,THD * thd)2889 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
2890 THD *thd)
2891 {
2892 DBUG_ENTER("binlog_savepoint_rollback_can_release_mdl");
2893 /**
2894 If we have not updated any non-transactional tables rollback
2895 to savepoint will simply truncate binlog cache starting from
2896 SAVEPOINT command. So it should be safe to release MDL acquired
2897 after SAVEPOINT command in this case.
2898 */
2899 DBUG_RETURN(!trans_cannot_safely_rollback(thd));
2900 }
2901
2902 #ifdef HAVE_REPLICATION
2903 /**
2904 Adjust log offset in the binary log file for all running slaves
2905 This class implements call back function for do_for_all_thd().
2906 It is called for each thd in thd list to adjust offset.
2907 */
2908 class Adjust_offset : public Do_THD_Impl
2909 {
2910 public:
Adjust_offset(my_off_t value)2911 Adjust_offset(my_off_t value) : m_purge_offset(value) {}
operator ()(THD * thd)2912 virtual void operator()(THD *thd)
2913 {
2914 LOG_INFO* linfo;
2915 mysql_mutex_lock(&thd->LOCK_thd_data);
2916 if ((linfo= thd->current_linfo))
2917 {
2918 /*
2919 Index file offset can be less that purge offset only if
2920 we just started reading the index file. In that case
2921 we have nothing to adjust.
2922 */
2923 if (linfo->index_file_offset < m_purge_offset)
2924 linfo->fatal = (linfo->index_file_offset != 0);
2925 else
2926 linfo->index_file_offset -= m_purge_offset;
2927 }
2928 mysql_mutex_unlock(&thd->LOCK_thd_data);
2929 }
2930 private:
2931 my_off_t m_purge_offset;
2932 };
2933
2934 /*
2935 Adjust the position pointer in the binary log file for all running slaves.
2936
2937 SYNOPSIS
2938 adjust_linfo_offsets()
2939 purge_offset Number of bytes removed from start of log index file
2940
2941 NOTES
2942 - This is called when doing a PURGE when we delete lines from the
2943 index log file.
2944
2945 REQUIREMENTS
2946 - Before calling this function, we have to ensure that no threads are
2947 using any binary log file before purge_offset.
2948
2949 TODO
2950 - Inform the slave threads that they should sync the position
2951 in the binary log file with flush_relay_log_info.
2952 Now they sync is done for next read.
2953 */
adjust_linfo_offsets(my_off_t purge_offset)2954 static void adjust_linfo_offsets(my_off_t purge_offset)
2955 {
2956 Adjust_offset adjust_offset(purge_offset);
2957 Global_THD_manager::get_instance()->do_for_all_thd(&adjust_offset);
2958 }
2959
2960 /**
2961 This class implements Call back function for do_for_all_thd().
2962 It is called for each thd in thd list to count
2963 threads using bin log file
2964 */
2965
2966 class Log_in_use : public Do_THD_Impl
2967 {
2968 public:
Log_in_use(const char * value)2969 Log_in_use(const char* value) : m_log_name(value), m_count(0)
2970 {
2971 m_log_name_len = strlen(m_log_name) + 1;
2972 }
operator ()(THD * thd)2973 virtual void operator()(THD *thd)
2974 {
2975 LOG_INFO* linfo;
2976 mysql_mutex_lock(&thd->LOCK_thd_data);
2977 if ((linfo = thd->current_linfo))
2978 {
2979 if(!strncmp(m_log_name, linfo->log_file_name, m_log_name_len))
2980 {
2981 sql_print_warning("file %s was not purged because it was being read "
2982 "by thread number %u", m_log_name, thd->thread_id());
2983 m_count++;
2984 }
2985 }
2986 mysql_mutex_unlock(&thd->LOCK_thd_data);
2987 }
get_count()2988 int get_count() { return m_count; }
2989 private:
2990 const char* m_log_name;
2991 size_t m_log_name_len;
2992 int m_count;
2993 };
2994
log_in_use(const char * log_name)2995 static int log_in_use(const char* log_name)
2996 {
2997 Log_in_use log_in_use(log_name);
2998 #ifndef NDEBUG
2999 if (current_thd)
3000 DEBUG_SYNC(current_thd,"purge_logs_after_lock_index_before_thread_count");
3001 #endif
3002 Global_THD_manager::get_instance()->do_for_all_thd(&log_in_use);
3003 return log_in_use.get_count();
3004 }
3005
purge_error_message(THD * thd,int res)3006 static bool purge_error_message(THD* thd, int res)
3007 {
3008 uint errcode;
3009
3010 if ((errcode= purge_log_get_error_code(res)) != 0)
3011 {
3012 my_message(errcode, ER(errcode), MYF(0));
3013 return TRUE;
3014 }
3015 my_ok(thd);
3016 return FALSE;
3017 }
3018
3019 #endif /* HAVE_REPLICATION */
3020
check_binlog_magic(IO_CACHE * log,const char ** errmsg)3021 int check_binlog_magic(IO_CACHE* log, const char** errmsg)
3022 {
3023 char magic[4];
3024 assert(my_b_tell(log) == 0);
3025
3026 if (my_b_read(log, (uchar*) magic, sizeof(magic)))
3027 {
3028 *errmsg = "I/O error reading the header from the binary log";
3029 sql_print_error("%s, errno=%d, io cache code=%d", *errmsg, my_errno(),
3030 log->error);
3031 return 1;
3032 }
3033 if (memcmp(magic, BINLOG_MAGIC, sizeof(magic)))
3034 {
3035 *errmsg = "Binlog has bad magic number; It's not a binary log file that can be used by this version of MySQL";
3036 return 1;
3037 }
3038 return 0;
3039 }
3040
3041
open_binlog_file(IO_CACHE * log,const char * log_file_name,const char ** errmsg)3042 File open_binlog_file(IO_CACHE *log, const char *log_file_name, const char **errmsg)
3043 {
3044 File file;
3045 DBUG_ENTER("open_binlog_file");
3046
3047 if ((file= mysql_file_open(key_file_binlog,
3048 log_file_name, O_RDONLY | O_BINARY | O_SHARE,
3049 MYF(MY_WME))) < 0)
3050 {
3051 sql_print_error("Failed to open log (file '%s', errno %d)",
3052 log_file_name, my_errno());
3053 *errmsg = "Could not open log file";
3054 goto err;
3055 }
3056 if (init_io_cache_ext(log, file, IO_SIZE*2, READ_CACHE, 0, 0,
3057 MYF(MY_WME|MY_DONT_CHECK_FILESIZE), key_file_binlog_cache))
3058 {
3059 sql_print_error("Failed to create a cache on log (file '%s')",
3060 log_file_name);
3061 *errmsg = "Could not open log file";
3062 goto err;
3063 }
3064 if (check_binlog_magic(log,errmsg))
3065 goto err;
3066 DBUG_RETURN(file);
3067
3068 err:
3069 if (file >= 0)
3070 {
3071 mysql_file_close(file, MYF(0));
3072 end_io_cache(log);
3073 }
3074 DBUG_RETURN(-1);
3075 }
3076
is_transaction_empty(THD * thd)3077 bool is_transaction_empty(THD *thd)
3078 {
3079 DBUG_ENTER("is_transaction_empty");
3080 int rw_ha_count= check_trx_rw_engines(thd, Transaction_ctx::SESSION);
3081 rw_ha_count+= check_trx_rw_engines(thd, Transaction_ctx::STMT);
3082 DBUG_RETURN(rw_ha_count == 0);
3083 }
3084
check_trx_rw_engines(THD * thd,Transaction_ctx::enum_trx_scope trx_scope)3085 int check_trx_rw_engines(THD *thd, Transaction_ctx::enum_trx_scope trx_scope)
3086 {
3087 DBUG_ENTER("check_trx_rw_engines");
3088
3089 int rw_ha_count= 0;
3090 Ha_trx_info *ha_list=
3091 (Ha_trx_info *)thd->get_transaction()->ha_trx_info(trx_scope);
3092
3093 for (Ha_trx_info *ha_info= ha_list; ha_info; ha_info= ha_info->next()) {
3094 if (ha_info->is_trx_read_write())
3095 ++rw_ha_count;
3096 }
3097 DBUG_RETURN(rw_ha_count);
3098 }
3099
is_empty_transaction_in_binlog_cache(const THD * thd)3100 bool is_empty_transaction_in_binlog_cache(const THD* thd)
3101 {
3102 DBUG_ENTER("is_empty_transaction_in_binlog_cache");
3103
3104 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
3105 if (cache_mngr != NULL && cache_mngr->has_empty_transaction())
3106 {
3107 DBUG_RETURN(true);
3108 }
3109
3110 DBUG_RETURN(false);
3111 }
3112
3113
3114 /**
3115 This function checks if a transactional table was updated by the
3116 current transaction.
3117
3118 @param thd The client thread that executed the current statement.
3119 @return
3120 @c true if a transactional table was updated, @c false otherwise.
3121 */
3122 bool
trans_has_updated_trans_table(const THD * thd)3123 trans_has_updated_trans_table(const THD* thd)
3124 {
3125 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
3126
3127 return (cache_mngr ? !cache_mngr->trx_cache.is_binlog_empty() : 0);
3128 }
3129
3130 /**
3131 This function checks if a transactional table was updated by the
3132 current statement.
3133
3134 @param ha_list Registered storage engine handler list.
3135 @return
3136 @c true if a transactional table was updated, @c false otherwise.
3137 */
3138 bool
stmt_has_updated_trans_table(Ha_trx_info * ha_list)3139 stmt_has_updated_trans_table(Ha_trx_info* ha_list)
3140 {
3141 const Ha_trx_info *ha_info;
3142 for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
3143 {
3144 if (ha_info->is_trx_read_write() && ha_info->ht() != binlog_hton)
3145 return (TRUE);
3146 }
3147 return (FALSE);
3148 }
3149
3150 bool
trans_has_noop_dml(Ha_trx_info * ha_list)3151 trans_has_noop_dml(Ha_trx_info* ha_list)
3152 {
3153 const Ha_trx_info *ha_info;
3154 for (ha_info= ha_list; ha_info; ha_info= ha_info->next())
3155 {
3156 if (ha_info->is_trx_noop_read_write())
3157 return (TRUE);
3158 }
3159 return (FALSE);
3160 }
3161
3162 /**
3163 This function checks if a transaction, either a multi-statement
3164 or a single statement transaction is about to commit or not.
3165
3166 @param thd The client thread that executed the current statement.
3167 @param all Committing a transaction (i.e. TRUE) or a statement
3168 (i.e. FALSE).
3169 @return
3170 @c true if committing a transaction, otherwise @c false.
3171 */
ending_trans(THD * thd,const bool all)3172 bool ending_trans(THD* thd, const bool all)
3173 {
3174 return (all || ending_single_stmt_trans(thd, all));
3175 }
3176
3177 /**
3178 This function checks if a single statement transaction is about
3179 to commit or not.
3180
3181 @param thd The client thread that executed the current statement.
3182 @param all Committing a transaction (i.e. TRUE) or a statement
3183 (i.e. FALSE).
3184 @return
3185 @c true if committing a single statement transaction, otherwise
3186 @c false.
3187 */
ending_single_stmt_trans(THD * thd,const bool all)3188 bool ending_single_stmt_trans(THD* thd, const bool all)
3189 {
3190 return (!all && !thd->in_multi_stmt_transaction_mode());
3191 }
3192
3193 /**
3194 This function checks if a transaction cannot be rolled back safely.
3195
3196 @param thd The client thread that executed the current statement.
3197 @return
3198 @c true if cannot be safely rolled back, @c false otherwise.
3199 */
trans_cannot_safely_rollback(const THD * thd)3200 bool trans_cannot_safely_rollback(const THD* thd)
3201 {
3202 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
3203
3204 return cache_mngr->trx_cache.cannot_rollback();
3205 }
3206
3207 /**
3208 This function checks if current statement cannot be rollded back safely.
3209
3210 @param thd The client thread that executed the current statement.
3211 @return
3212 @c true if cannot be safely rolled back, @c false otherwise.
3213 */
stmt_cannot_safely_rollback(const THD * thd)3214 bool stmt_cannot_safely_rollback(const THD* thd)
3215 {
3216 return thd->get_transaction()->cannot_safely_rollback(Transaction_ctx::STMT);
3217 }
3218
3219 #ifndef EMBEDDED_LIBRARY
3220 /**
3221 Execute a PURGE BINARY LOGS TO <log> command.
3222
3223 @param thd Pointer to THD object for the client thread executing the
3224 statement.
3225
3226 @param to_log Name of the last log to purge.
3227
3228 @retval FALSE success
3229 @retval TRUE failure
3230 */
purge_master_logs(THD * thd,const char * to_log)3231 bool purge_master_logs(THD* thd, const char* to_log)
3232 {
3233 char search_file_name[FN_REFLEN];
3234 if (!mysql_bin_log.is_open())
3235 {
3236 my_ok(thd);
3237 return FALSE;
3238 }
3239
3240 mysql_bin_log.make_log_name(search_file_name, to_log);
3241 return purge_error_message(thd,
3242 mysql_bin_log.purge_logs(search_file_name, false,
3243 true/*need_lock_index=true*/,
3244 true/*need_update_threads=true*/,
3245 NULL, false));
3246 }
3247
3248
3249 /**
3250 Execute a PURGE BINARY LOGS BEFORE <date> command.
3251
3252 @param thd Pointer to THD object for the client thread executing the
3253 statement.
3254
3255 @param purge_time Date before which logs should be purged.
3256
3257 @retval FALSE success
3258 @retval TRUE failure
3259 */
purge_master_logs_before_date(THD * thd,time_t purge_time)3260 bool purge_master_logs_before_date(THD* thd, time_t purge_time)
3261 {
3262 if (!mysql_bin_log.is_open())
3263 {
3264 my_ok(thd);
3265 return 0;
3266 }
3267 return purge_error_message(thd,
3268 mysql_bin_log.purge_logs_before_date(purge_time,
3269 false));
3270 }
3271 #endif /* EMBEDDED_LIBRARY */
3272
3273 /*
3274 Helper function to get the error code of the query to be binlogged.
3275 */
query_error_code(THD * thd,bool not_killed)3276 int query_error_code(THD *thd, bool not_killed)
3277 {
3278 int error;
3279
3280 if (not_killed || (thd->killed == THD::KILL_BAD_DATA))
3281 {
3282 error= thd->is_error() ? thd->get_stmt_da()->mysql_errno() : 0;
3283
3284 /* thd->get_stmt_da()->sql_errno() might be ER_SERVER_SHUTDOWN or
3285 ER_QUERY_INTERRUPTED, So here we need to make sure that error
3286 is not set to these errors when specified not_killed by the
3287 caller.
3288 */
3289 if (error == ER_SERVER_SHUTDOWN || error == ER_QUERY_INTERRUPTED)
3290 error= 0;
3291 }
3292 else
3293 error= thd->killed_errno();
3294
3295 return error;
3296 }
3297
3298
3299 /**
3300 Copy content of 'from' file from offset to 'to' file.
3301
3302 - We do the copy outside of the IO_CACHE as the cache
3303 buffers would just make things slower and more complicated.
3304 In most cases the copy loop should only do one read.
3305
3306 @param from File to copy.
3307 @param to File to copy to.
3308 @param offset Offset in 'from' file.
3309
3310
3311 @retval
3312 0 ok
3313 @retval
3314 -1 error
3315 */
copy_file(IO_CACHE * from,IO_CACHE * to,my_off_t offset)3316 static bool copy_file(IO_CACHE *from, IO_CACHE *to, my_off_t offset)
3317 {
3318 int bytes_read;
3319 uchar io_buf[IO_SIZE*2];
3320 DBUG_ENTER("copy_file");
3321
3322 mysql_file_seek(from->file, offset, MY_SEEK_SET, MYF(0));
3323 while(TRUE)
3324 {
3325 if ((bytes_read= (int) mysql_file_read(from->file, io_buf, sizeof(io_buf),
3326 MYF(MY_WME)))
3327 < 0)
3328 goto err;
3329 if (DBUG_EVALUATE_IF("fault_injection_copy_part_file", 1, 0))
3330 bytes_read= bytes_read/2;
3331 if (!bytes_read)
3332 break; // end of file
3333 if (mysql_file_write(to->file, io_buf, bytes_read, MYF(MY_WME | MY_NABP)))
3334 goto err;
3335 }
3336
3337 DBUG_RETURN(0);
3338
3339 err:
3340 DBUG_RETURN(1);
3341 }
3342
3343
3344 #ifdef HAVE_REPLICATION
3345 /**
3346 Load data's io cache specific hook to be executed
3347 before a chunk of data is being read into the cache's buffer
3348 The fuction instantianates and writes into the binlog
3349 replication events along LOAD DATA processing.
3350
3351 @param file pointer to io-cache
3352 @retval 0 success
3353 @retval 1 failure
3354 */
log_loaded_block(IO_CACHE * file)3355 int log_loaded_block(IO_CACHE* file)
3356 {
3357 DBUG_ENTER("log_loaded_block");
3358 LOAD_FILE_INFO *lf_info;
3359 uint block_len;
3360 /* buffer contains position where we started last read */
3361 uchar* buffer= (uchar*) my_b_get_buffer_start(file);
3362 uint max_event_size= current_thd->variables.max_allowed_packet;
3363 lf_info= (LOAD_FILE_INFO*) file->arg;
3364 if (lf_info->thd->is_current_stmt_binlog_format_row())
3365 DBUG_RETURN(0);
3366 if (lf_info->last_pos_in_file != HA_POS_ERROR &&
3367 lf_info->last_pos_in_file >= my_b_get_pos_in_file(file))
3368 DBUG_RETURN(0);
3369
3370 for (block_len= (uint) (my_b_get_bytes_in_buffer(file)); block_len > 0;
3371 buffer += min(block_len, max_event_size),
3372 block_len -= min(block_len, max_event_size))
3373 {
3374 lf_info->last_pos_in_file= my_b_get_pos_in_file(file);
3375 if (lf_info->wrote_create_file)
3376 {
3377 Append_block_log_event a(lf_info->thd, lf_info->thd->db().str, buffer,
3378 min(block_len, max_event_size),
3379 lf_info->log_delayed);
3380 if (mysql_bin_log.write_event(&a))
3381 DBUG_RETURN(1);
3382 }
3383 else
3384 {
3385 Begin_load_query_log_event b(lf_info->thd, lf_info->thd->db().str,
3386 buffer,
3387 min(block_len, max_event_size),
3388 lf_info->log_delayed);
3389 if (mysql_bin_log.write_event(&b))
3390 DBUG_RETURN(1);
3391 lf_info->wrote_create_file= 1;
3392 }
3393 }
3394 DBUG_RETURN(0);
3395 }
3396
3397 /* Helper function for SHOW BINLOG/RELAYLOG EVENTS */
show_binlog_events(THD * thd,MYSQL_BIN_LOG * binary_log)3398 bool show_binlog_events(THD *thd, MYSQL_BIN_LOG *binary_log)
3399 {
3400 Protocol *protocol= thd->get_protocol();
3401 List<Item> field_list;
3402 const char *errmsg = 0;
3403 bool ret = TRUE;
3404 IO_CACHE log;
3405 File file = -1;
3406 int old_max_allowed_packet= thd->variables.max_allowed_packet;
3407 LOG_INFO linfo;
3408
3409 DBUG_ENTER("show_binlog_events");
3410
3411 assert(thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS ||
3412 thd->lex->sql_command == SQLCOM_SHOW_RELAYLOG_EVENTS);
3413
3414 Format_description_log_event *description_event= new
3415 Format_description_log_event(3); /* MySQL 4.0 by default */
3416
3417 if (binary_log->is_open())
3418 {
3419 LEX_MASTER_INFO *lex_mi= &thd->lex->mi;
3420 SELECT_LEX_UNIT *unit= thd->lex->unit;
3421 ha_rows event_count, limit_start, limit_end;
3422 my_off_t pos = max<my_off_t>(BIN_LOG_HEADER_SIZE, lex_mi->pos); // user-friendly
3423 char search_file_name[FN_REFLEN], *name;
3424 const char *log_file_name = lex_mi->log_file_name;
3425 mysql_mutex_t *log_lock = binary_log->get_log_lock();
3426 Log_event* ev;
3427
3428 unit->set_limit(thd->lex->current_select());
3429 limit_start= unit->offset_limit_cnt;
3430 limit_end= unit->select_limit_cnt;
3431
3432 name= search_file_name;
3433 if (log_file_name)
3434 binary_log->make_log_name(search_file_name, log_file_name);
3435 else
3436 name=0; // Find first log
3437
3438 linfo.index_file_offset = 0;
3439
3440 if (binary_log->find_log_pos(&linfo, name, true/*need_lock_index=true*/))
3441 {
3442 errmsg = "Could not find target log";
3443 goto err;
3444 }
3445
3446 mysql_mutex_lock(&thd->LOCK_thd_data);
3447 thd->current_linfo = &linfo;
3448 mysql_mutex_unlock(&thd->LOCK_thd_data);
3449
3450 if ((file=open_binlog_file(&log, linfo.log_file_name, &errmsg)) < 0)
3451 goto err;
3452
3453 my_off_t end_pos;
3454 /*
3455 Acquire LOCK_log only for the duration to calculate the
3456 log's end position. LOCK_log should be acquired even while
3457 we are checking whether the log is active log or not.
3458 */
3459 mysql_mutex_lock(log_lock);
3460 if (binary_log->is_active(linfo.log_file_name))
3461 {
3462 LOG_INFO li;
3463 binary_log->get_current_log(&li, false /*LOCK_log is already acquired*/);
3464 end_pos= li.pos;
3465 }
3466 else
3467 {
3468 end_pos= my_b_filelength(&log);
3469 }
3470 mysql_mutex_unlock(log_lock);
3471
3472 /*
3473 to account binlog event header size
3474 */
3475 thd->variables.max_allowed_packet += MAX_LOG_EVENT_HEADER;
3476
3477 DEBUG_SYNC(thd, "after_show_binlog_event_found_file");
3478
3479 /*
3480 open_binlog_file() sought to position 4.
3481 Read the first event in case it's a Format_description_log_event, to
3482 know the format. If there's no such event, we are 3.23 or 4.x. This
3483 code, like before, can't read 3.23 binlogs.
3484 Also read the second event, in case it's a Start_encryption_log_event.
3485 This code will fail on a mixed relay log (one which has Format_desc then
3486 Rotate then Format_desc).
3487 */
3488
3489 my_off_t scan_pos= BIN_LOG_HEADER_SIZE;
3490 while (scan_pos < pos)
3491 {
3492 ev= Log_event::read_log_event(&log, (mysql_mutex_t*)0, description_event,
3493 opt_master_verify_checksum);
3494 scan_pos= my_b_tell(&log);
3495 if (ev == NULL || (ev->get_type_code() != binary_log::FORMAT_DESCRIPTION_EVENT &&
3496 !ev->is_valid()))
3497 {
3498 errmsg = "Wrong offset or I/O error";
3499 goto err;
3500 }
3501 if (ev->get_type_code() == binary_log::FORMAT_DESCRIPTION_EVENT)
3502 {
3503 delete description_event;
3504 description_event= (Format_description_log_event*) ev;
3505 if (!description_event->is_valid())
3506 {
3507 errmsg="Invalid Format_description event; could be out of memory";
3508 goto err;
3509 }
3510 }
3511 else
3512 {
3513 if (ev->get_type_code() == binary_log::START_ENCRYPTION_EVENT)
3514 {
3515 if (description_event->start_decryption(static_cast<Start_encryption_log_event*>(ev)))
3516 {
3517 delete ev;
3518 errmsg= "Could not initialize decryption of binlog.";
3519 goto err;
3520 }
3521 }
3522 delete ev;
3523 break;
3524 }
3525 }
3526
3527 my_b_seek(&log, pos);
3528
3529 for (event_count = 0;
3530 (ev = Log_event::read_log_event(&log, (mysql_mutex_t*) 0,
3531 description_event,
3532 opt_master_verify_checksum)); )
3533 {
3534 DEBUG_SYNC(thd, "wait_in_show_binlog_events_loop");
3535 if (event_count >= limit_start &&
3536 ev->net_send(protocol, linfo.log_file_name, pos))
3537 {
3538 errmsg = "Net error";
3539 delete ev;
3540 goto err;
3541 }
3542
3543 if (ev->get_type_code() == binary_log::FORMAT_DESCRIPTION_EVENT)
3544 {
3545 Format_description_log_event* new_fdle=
3546 static_cast<Format_description_log_event*>(ev);
3547 new_fdle->copy_crypto_data(*description_event);
3548 delete description_event;
3549 description_event= new_fdle;
3550 }
3551 else
3552 {
3553 if (ev->get_type_code() == binary_log::START_ENCRYPTION_EVENT)
3554 {
3555 if (description_event->start_decryption(static_cast<Start_encryption_log_event*>(ev)))
3556 {
3557 errmsg= "Error starting decryption";
3558 delete ev;
3559 goto err;
3560 }
3561 }
3562 delete ev;
3563 }
3564
3565 pos = my_b_tell(&log);
3566
3567 if (++event_count >= limit_end || pos >= end_pos)
3568 break;
3569 }
3570
3571 if (event_count < limit_end && log.error)
3572 {
3573 errmsg = "Wrong offset or I/O error";
3574 goto err;
3575 }
3576
3577 }
3578 // Check that linfo is still on the function scope.
3579 DEBUG_SYNC(thd, "after_show_binlog_events");
3580
3581 ret= FALSE;
3582
3583 err:
3584 delete description_event;
3585 if (file >= 0)
3586 {
3587 end_io_cache(&log);
3588 mysql_file_close(file, MYF(MY_WME));
3589 }
3590
3591 if (errmsg)
3592 {
3593 if(thd->lex->sql_command == SQLCOM_SHOW_RELAYLOG_EVENTS)
3594 my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0),
3595 "SHOW RELAYLOG EVENTS", errmsg);
3596 else
3597 my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0),
3598 "SHOW BINLOG EVENTS", errmsg);
3599 }
3600 else
3601 my_eof(thd);
3602
3603 mysql_mutex_lock(&thd->LOCK_thd_data);
3604 thd->current_linfo = 0;
3605 mysql_mutex_unlock(&thd->LOCK_thd_data);
3606 thd->variables.max_allowed_packet= old_max_allowed_packet;
3607 DBUG_RETURN(ret);
3608 }
3609
3610 /**
3611 Execute a SHOW BINLOG EVENTS statement.
3612
3613 @param thd Pointer to THD object for the client thread executing the
3614 statement.
3615
3616 @retval FALSE success
3617 @retval TRUE failure
3618 */
mysql_show_binlog_events(THD * thd)3619 bool mysql_show_binlog_events(THD* thd)
3620 {
3621 List<Item> field_list;
3622 DBUG_ENTER("mysql_show_binlog_events");
3623
3624 assert(thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS);
3625
3626 Log_event::init_show_field_list(&field_list);
3627 if (thd->send_result_metadata(&field_list,
3628 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
3629 DBUG_RETURN(TRUE);
3630
3631 /*
3632 Wait for handlers to insert any pending information
3633 into the binlog. For e.g. ndb which updates the binlog asynchronously
3634 this is needed so that the uses sees all its own commands in the binlog
3635 */
3636 ha_binlog_wait(thd);
3637
3638 DBUG_RETURN(show_binlog_events(thd, &mysql_bin_log));
3639 }
3640
3641 #endif /* HAVE_REPLICATION */
3642
3643
MYSQL_BIN_LOG(uint * sync_period,enum cache_type io_cache_type_arg)3644 MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period,
3645 enum cache_type io_cache_type_arg)
3646 :name(NULL), write_error(false), inited(false),
3647 io_cache_type(io_cache_type_arg),
3648 #ifdef HAVE_PSI_INTERFACE
3649 m_key_LOCK_log(key_LOG_LOCK_log),
3650 #endif
3651 bytes_written(0), binlog_space_total(0), file_id(1),
3652 open_count(1), sync_period_ptr(sync_period), sync_counter(0),
3653 is_relay_log(0), signal_cnt(0),
3654 checksum_alg_reset(binary_log::BINLOG_CHECKSUM_ALG_UNDEF),
3655 relay_log_checksum_alg(binary_log::BINLOG_CHECKSUM_ALG_UNDEF),
3656 previous_gtid_set_relaylog(0), snapshot_lock_acquired(false),
3657 is_rotating_caused_by_incident(false)
3658 {
3659 log_state.atomic_set(LOG_CLOSED);
3660 /*
3661 We don't want to initialize locks here as such initialization depends on
3662 safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
3663 called only in main(). Doing initialization here would make it happen
3664 before main().
3665 */
3666 m_prep_xids.atomic_set(0);
3667 memset(&log_file, 0, sizeof(log_file));
3668 index_file_name[0] = 0;
3669 memset(&index_file, 0, sizeof(index_file));
3670 memset(&purge_index_file, 0, sizeof(purge_index_file));
3671 memset(&crash_safe_index_file, 0, sizeof(crash_safe_index_file));
3672 }
3673
3674
3675 /* this is called only once */
3676
cleanup()3677 void MYSQL_BIN_LOG::cleanup()
3678 {
3679 DBUG_ENTER("cleanup");
3680 if (inited)
3681 {
3682 inited= 0;
3683 close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT, true /*need_lock_log=true*/,
3684 true /*need_lock_index=true*/);
3685 mysql_mutex_destroy(&LOCK_log);
3686 mysql_mutex_destroy(&LOCK_index);
3687 mysql_mutex_destroy(&LOCK_commit);
3688 mysql_mutex_destroy(&LOCK_sync);
3689 mysql_mutex_destroy(&LOCK_binlog_end_pos);
3690 mysql_mutex_destroy(&LOCK_xids);
3691 mysql_cond_destroy(&update_cond);
3692 mysql_cond_destroy(&m_prep_xids_cond);
3693 stage_manager.deinit();
3694 }
3695 DBUG_VOID_RETURN;
3696 }
3697
3698
init_pthread_objects()3699 void MYSQL_BIN_LOG::init_pthread_objects()
3700 {
3701 assert(inited == 0);
3702 inited= 1;
3703 mysql_mutex_init(m_key_LOCK_log, &LOCK_log, MY_MUTEX_INIT_SLOW);
3704 mysql_mutex_init(m_key_LOCK_index, &LOCK_index, MY_MUTEX_INIT_SLOW);
3705 mysql_mutex_init(m_key_LOCK_commit, &LOCK_commit, MY_MUTEX_INIT_FAST);
3706 mysql_mutex_init(m_key_LOCK_sync, &LOCK_sync, MY_MUTEX_INIT_FAST);
3707 mysql_mutex_init(m_key_LOCK_binlog_end_pos, &LOCK_binlog_end_pos,
3708 MY_MUTEX_INIT_FAST);
3709 mysql_mutex_init(m_key_LOCK_xids, &LOCK_xids, MY_MUTEX_INIT_FAST);
3710 mysql_cond_init(m_key_update_cond, &update_cond);
3711 mysql_cond_init(m_key_prep_xids_cond, &m_prep_xids_cond);
3712 stage_manager.init(
3713 #ifdef HAVE_PSI_INTERFACE
3714 m_key_LOCK_flush_queue,
3715 m_key_LOCK_sync_queue,
3716 m_key_LOCK_commit_queue,
3717 m_key_LOCK_done, m_key_COND_done
3718 #endif
3719 );
3720 }
3721
3722
3723 /**
3724 Check if a string is a valid number.
3725
3726 @param str String to test
3727 @param res Store value here
3728 @param allow_wildcards Set to 1 if we should ignore '%' and '_'
3729
3730 @note
3731 For the moment the allow_wildcards argument is not used
3732 Should be moved to some other file.
3733
3734 @retval
3735 1 String is a number
3736 @retval
3737 0 String is not a number
3738 */
3739
is_number(const char * str,ulong * res,bool allow_wildcards)3740 static bool is_number(const char *str,
3741 ulong *res, bool allow_wildcards)
3742 {
3743 int flag;
3744 const char *start;
3745 DBUG_ENTER("is_number");
3746
3747 flag=0; start=str;
3748 while (*str++ == ' ') ;
3749 if (*--str == '-' || *str == '+')
3750 str++;
3751 while (my_isdigit(files_charset_info,*str) ||
3752 (allow_wildcards && (*str == wild_many || *str == wild_one)))
3753 {
3754 flag=1;
3755 str++;
3756 }
3757 if (*str == '.')
3758 {
3759 for (str++ ;
3760 my_isdigit(files_charset_info,*str) ||
3761 (allow_wildcards && (*str == wild_many || *str == wild_one)) ;
3762 str++, flag=1) ;
3763 }
3764 if (*str != 0 || flag == 0)
3765 DBUG_RETURN(0);
3766 if (res)
3767 *res=atol(start);
3768 DBUG_RETURN(1); /* Number ok */
3769 } /* is_number */
3770
3771
3772 /*
3773 Number of warnings that will be printed to error log
3774 before extension number is exhausted.
3775 */
3776 #define LOG_WARN_UNIQUE_FN_EXT_LEFT 1000
3777
3778 /**
3779 Find a unique filename for 'filename.#'.
3780
3781 Set '#' to the highest existing log file extension plus one.
3782
3783 This function will return nonzero if: (i) the generated name
3784 exceeds FN_REFLEN; (ii) if the number of extensions is exhausted;
3785 or (iii) some other error happened while examining the filesystem.
3786
3787 @return
3788 nonzero if not possible to get unique filename.
3789 */
3790
find_uniq_filename(char * name)3791 static int find_uniq_filename(char *name)
3792 {
3793 uint i;
3794 char buff[FN_REFLEN], ext_buf[FN_REFLEN];
3795 struct st_my_dir *dir_info;
3796 struct fileinfo *file_info;
3797 ulong max_found= 0, next= 0, number= 0;
3798 size_t buf_length, length;
3799 char *start, *end;
3800 int error= 0;
3801 DBUG_ENTER("find_uniq_filename");
3802
3803 length= dirname_part(buff, name, &buf_length);
3804 start= name + length;
3805 end= strend(start);
3806
3807 *end='.';
3808 length= (size_t) (end - start + 1);
3809
3810 if ((DBUG_EVALUATE_IF("error_unique_log_filename", 1,
3811 !(dir_info= my_dir(buff,MYF(MY_DONT_SORT))))))
3812 { // This shouldn't happen
3813 my_stpcpy(end,".1"); // use name+1
3814 DBUG_RETURN(1);
3815 }
3816 file_info= dir_info->dir_entry;
3817 for (i= dir_info->number_off_files ; i-- ; file_info++)
3818 {
3819 if (strncmp(file_info->name, start, length) == 0 &&
3820 is_number(file_info->name+length, &number,0))
3821 {
3822 set_if_bigger(max_found, number);
3823 }
3824 }
3825 my_dirend(dir_info);
3826
3827 /* check if reached the maximum possible extension number */
3828 if (max_found == MAX_LOG_UNIQUE_FN_EXT)
3829 {
3830 sql_print_error("Log filename extension number exhausted: %06lu. \
3831 Please fix this by archiving old logs and \
3832 updating the index files.", max_found);
3833 error= 1;
3834 goto end;
3835 }
3836
3837 next= max_found + 1;
3838 if (sprintf(ext_buf, "%06lu", next)<0)
3839 {
3840 error= 1;
3841 goto end;
3842 }
3843 *end++='.';
3844
3845 /*
3846 Check if the generated extension size + the file name exceeds the
3847 buffer size used. If one did not check this, then the filename might be
3848 truncated, resulting in error.
3849 */
3850 if (((strlen(ext_buf) + (end - name)) >= FN_REFLEN))
3851 {
3852 sql_print_error("Log filename too large: %s%s (%zu). \
3853 Please fix this by archiving old logs and updating the \
3854 index files.", name, ext_buf, (strlen(ext_buf) + (end - name)));
3855 error= 1;
3856 goto end;
3857 }
3858
3859 if (sprintf(end, "%06lu", next)<0)
3860 {
3861 error= 1;
3862 goto end;
3863 }
3864
3865 /* print warning if reaching the end of available extensions. */
3866 if ((next > (MAX_LOG_UNIQUE_FN_EXT - LOG_WARN_UNIQUE_FN_EXT_LEFT)))
3867 sql_print_warning("Next log extension: %lu. \
3868 Remaining log filename extensions: %lu. \
3869 Please consider archiving some logs.", next, (MAX_LOG_UNIQUE_FN_EXT - next));
3870
3871 end:
3872 DBUG_RETURN(error);
3873 }
3874
generate_new_name(char * new_name,const char * log_name)3875 int MYSQL_BIN_LOG::generate_new_name(char *new_name, const char *log_name)
3876 {
3877 fn_format(new_name, log_name, mysql_data_home, "", 4);
3878 if (!fn_ext(log_name)[0])
3879 {
3880 if (find_uniq_filename(new_name))
3881 {
3882 my_printf_error(ER_NO_UNIQUE_LOGFILE, ER(ER_NO_UNIQUE_LOGFILE),
3883 MYF(ME_FATALERROR), log_name);
3884 sql_print_error(ER(ER_NO_UNIQUE_LOGFILE), log_name);
3885 return 1;
3886 }
3887 }
3888 return 0;
3889 }
3890
3891 /**
3892 @todo
3893 The following should be using fn_format(); We just need to
3894 first change fn_format() to cut the file name if it's too long.
3895 */
generate_name(const char * log_name,const char * suffix,char * buff)3896 const char *MYSQL_BIN_LOG::generate_name(const char *log_name,
3897 const char *suffix,
3898 char *buff)
3899 {
3900 if (!log_name || !log_name[0])
3901 {
3902 strmake(buff, default_logfile_name, FN_REFLEN - strlen(suffix) - 1);
3903 return (const char *)
3904 fn_format(buff, buff, "", suffix, MYF(MY_REPLACE_EXT|MY_REPLACE_DIR));
3905 }
3906 // get rid of extension to avoid problems
3907
3908 char *p= fn_ext(log_name);
3909 uint length= (uint) (p - log_name);
3910 strmake(buff, log_name, min<size_t>(length, FN_REFLEN-1));
3911 return (const char*)buff;
3912 }
3913
3914
init_and_set_log_file_name(const char * log_name,const char * new_name)3915 bool MYSQL_BIN_LOG::init_and_set_log_file_name(const char *log_name,
3916 const char *new_name)
3917 {
3918 if (new_name && !my_stpcpy(log_file_name, new_name))
3919 return TRUE;
3920 else if (!new_name && generate_new_name(log_file_name, log_name))
3921 return TRUE;
3922
3923 return FALSE;
3924 }
3925
3926
3927 /**
3928 Open the logfile and init IO_CACHE.
3929
3930 @param log_name The name of the log to open
3931 @param new_name The new name for the logfile.
3932 NULL forces generate_new_name() to be called.
3933
3934 @return true if error, false otherwise.
3935 */
3936
open(PSI_file_key log_file_key,const char * log_name,const char * new_name)3937 bool MYSQL_BIN_LOG::open(
3938 #ifdef HAVE_PSI_INTERFACE
3939 PSI_file_key log_file_key,
3940 #endif
3941 const char *log_name,
3942 const char *new_name)
3943 {
3944 File file= -1;
3945 my_off_t pos= 0;
3946 int open_flags= O_CREAT | O_BINARY;
3947 DBUG_ENTER("MYSQL_BIN_LOG::open");
3948
3949 write_error= 0;
3950
3951 if (!(name= my_strdup(key_memory_MYSQL_LOG_name,
3952 log_name, MYF(MY_WME))))
3953 {
3954 name= (char *)log_name; // for the error message
3955 goto err;
3956 }
3957
3958 if (init_and_set_log_file_name(name, new_name) ||
3959 DBUG_EVALUATE_IF("fault_injection_init_name", 1, 0))
3960 goto err;
3961
3962 if (io_cache_type == SEQ_READ_APPEND)
3963 open_flags |= O_RDWR | O_APPEND;
3964 else
3965 open_flags |= O_WRONLY;
3966
3967 db[0]= 0;
3968
3969 #ifdef HAVE_PSI_INTERFACE
3970 /* Keep the key for reopen */
3971 m_log_file_key= log_file_key;
3972 #endif
3973
3974 if ((file= mysql_file_open(log_file_key,
3975 log_file_name, open_flags,
3976 MYF(MY_WME))) < 0)
3977 goto err;
3978
3979 if ((pos= mysql_file_tell(file, MYF(MY_WME))) == MY_FILEPOS_ERROR)
3980 {
3981 if (my_errno() == ESPIPE)
3982 pos= 0;
3983 else
3984 goto err;
3985 }
3986
3987 if (init_io_cache(&log_file, file, IO_SIZE, io_cache_type, pos, 0,
3988 MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
3989 goto err;
3990
3991 log_state.atomic_set(LOG_OPENED);
3992 DBUG_RETURN(0);
3993
3994 err:
3995 if (binlog_error_action == ABORT_SERVER)
3996 {
3997 exec_binlog_error_action_abort("Either disk is full or file system is read "
3998 "only while opening the binlog. Aborting the"
3999 " server.");
4000 }
4001 else
4002 sql_print_error("Could not open %s for logging (error %d). "
4003 "Turning logging off for the whole duration "
4004 "of the MySQL server process. To turn it on "
4005 "again: fix the cause, shutdown the MySQL "
4006 "server and restart it.",
4007 name, errno);
4008 if (file >= 0)
4009 mysql_file_close(file, MYF(0));
4010 end_io_cache(&log_file);
4011 my_free(name);
4012 name= NULL;
4013 log_state.atomic_set(LOG_CLOSED);
4014 DBUG_RETURN(1);
4015 }
4016
4017
open_index_file(const char * index_file_name_arg,const char * log_name,bool need_lock_index)4018 bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
4019 const char *log_name, bool need_lock_index)
4020 {
4021 bool error= false;
4022 File index_file_nr= -1;
4023 if (need_lock_index)
4024 mysql_mutex_lock(&LOCK_index);
4025 else
4026 mysql_mutex_assert_owner(&LOCK_index);
4027
4028 /*
4029 First open of this class instance
4030 Create an index file that will hold all file names uses for logging.
4031 Add new entries to the end of it.
4032 */
4033 myf opt= MY_UNPACK_FILENAME;
4034
4035 if (my_b_inited(&index_file))
4036 goto end;
4037
4038 if (!index_file_name_arg)
4039 {
4040 index_file_name_arg= log_name; // Use same basename for index file
4041 opt= MY_UNPACK_FILENAME | MY_REPLACE_EXT;
4042 }
4043 fn_format(index_file_name, index_file_name_arg, mysql_data_home,
4044 ".index", opt);
4045
4046 if (set_crash_safe_index_file_name(index_file_name_arg))
4047 {
4048 sql_print_error("MYSQL_BIN_LOG::set_crash_safe_index_file_name failed.");
4049 error= true;
4050 goto end;
4051 }
4052
4053 /*
4054 We need move crash_safe_index_file to index_file if the index_file
4055 does not exist and crash_safe_index_file exists when mysqld server
4056 restarts.
4057 */
4058 if (my_access(index_file_name, F_OK) &&
4059 !my_access(crash_safe_index_file_name, F_OK) &&
4060 my_rename(crash_safe_index_file_name, index_file_name, MYF(MY_WME)))
4061 {
4062 sql_print_error("MYSQL_BIN_LOG::open_index_file failed to "
4063 "move crash_safe_index_file to index file.");
4064 error= true;
4065 goto end;
4066 }
4067
4068 if ((index_file_nr= mysql_file_open(m_key_file_log_index,
4069 index_file_name,
4070 O_RDWR | O_CREAT | O_BINARY,
4071 MYF(MY_WME))) < 0 ||
4072 mysql_file_sync(index_file_nr, MYF(MY_WME)) ||
4073 init_io_cache_ext(&index_file, index_file_nr,
4074 IO_SIZE, READ_CACHE,
4075 mysql_file_seek(index_file_nr, 0L, MY_SEEK_END, MYF(0)),
4076 0, MYF(MY_WME | MY_WAIT_IF_FULL),
4077 m_key_file_log_index_cache) ||
4078 DBUG_EVALUATE_IF("fault_injection_openning_index", 1, 0))
4079 {
4080 /*
4081 TODO: all operations creating/deleting the index file or a log, should
4082 call my_sync_dir() or my_sync_dir_by_file() to be durable.
4083 TODO: file creation should be done with mysql_file_create()
4084 not mysql_file_open().
4085 */
4086 if (index_file_nr >= 0)
4087 mysql_file_close(index_file_nr, MYF(0));
4088 error= true;
4089 goto end;
4090 }
4091
4092 #ifdef HAVE_REPLICATION
4093 /*
4094 Sync the index by purging any binary log file that is not registered.
4095 In other words, either purge binary log files that were removed from
4096 the index but not purged from the file system due to a crash or purge
4097 any binary log file that was created but not register in the index
4098 due to a crash.
4099 */
4100
4101 if (set_purge_index_file_name(index_file_name_arg) ||
4102 open_purge_index_file(FALSE) ||
4103 purge_index_entry(NULL, NULL, false) ||
4104 close_purge_index_file() ||
4105 DBUG_EVALUATE_IF("fault_injection_recovering_index", 1, 0))
4106 {
4107 sql_print_error("MYSQL_BIN_LOG::open_index_file failed to sync the index "
4108 "file.");
4109 error= true;
4110 goto end;
4111 }
4112 #endif
4113
4114 end:
4115 if (need_lock_index)
4116 mysql_mutex_unlock(&LOCK_index);
4117 return error;
4118 }
4119
4120 /**
4121 Add the GTIDs from the given relaylog file and also
4122 update the IO thread transaction parser.
4123
4124 @param filename Relaylog file to read from.
4125 @param retrieved_set Gtid_set to store the GTIDs found on the relaylog file.
4126 @param verify_checksum Set to true to verify event checksums.
4127 @param trx_parser The transaction boundary parser to be used in order to
4128 only add a GTID to the gtid_set after ensuring the transaction is fully
4129 stored on the relay log.
4130 @param gtid_partial_trx The gtid of the last incomplete transaction
4131 found in the relay log.
4132
4133 @retval false The file was successfully read and all GTIDs from
4134 Previous_gtids and Gtid_log_event from complete transactions were added to
4135 the retrieved_set.
4136 @retval true There was an error during the procedure.
4137 */
4138 static bool
read_gtids_and_update_trx_parser_from_relaylog(const char * filename,Gtid_set * retrieved_gtids,bool verify_checksum,Transaction_boundary_parser * trx_parser,Gtid * gtid_partial_trx)4139 read_gtids_and_update_trx_parser_from_relaylog(
4140 const char *filename,
4141 Gtid_set *retrieved_gtids,
4142 bool verify_checksum,
4143 Transaction_boundary_parser *trx_parser,
4144 Gtid *gtid_partial_trx)
4145 {
4146 DBUG_ENTER("read_gtids_and_update_trx_parser_from_relaylog");
4147 DBUG_PRINT("info", ("Opening file %s", filename));
4148
4149 assert(retrieved_gtids != NULL);
4150 assert(trx_parser != NULL);
4151 #ifndef NDEBUG
4152 unsigned long event_counter= 0;
4153 #endif
4154
4155 /*
4156 Create a Format_description_log_event that is used to read the
4157 first event of the log.
4158 */
4159 Format_description_log_event fd_ev(BINLOG_VERSION), *fd_ev_p= &fd_ev;
4160 if (!fd_ev.is_valid())
4161 DBUG_RETURN(true);
4162
4163 File file;
4164 IO_CACHE log;
4165
4166 const char *errmsg= NULL;
4167 if ((file= open_binlog_file(&log, filename, &errmsg)) < 0)
4168 {
4169 sql_print_error("%s", errmsg);
4170 /*
4171 As read_gtids_from_binlog() will not throw error on truncated
4172 relaylog files, we should do the same here in order to keep the
4173 current behavior.
4174 */
4175 DBUG_RETURN(false);
4176 }
4177
4178 fd_ev_p->reset_crypto();
4179
4180 /*
4181 Seek for Previous_gtids_log_event and Gtid_log_event events to
4182 gather information what has been processed so far.
4183 */
4184 my_b_seek(&log, BIN_LOG_HEADER_SIZE);
4185 Log_event *ev= NULL;
4186 bool error= false;
4187 bool seen_prev_gtids= false;
4188 ulong data_len= 0;
4189
4190 while (!error &&
4191 (ev= Log_event::read_log_event(&log, 0, fd_ev_p, verify_checksum)) !=
4192 NULL)
4193 {
4194 DBUG_PRINT("info", ("Read event of type %s", ev->get_type_str()));
4195 #ifndef NDEBUG
4196 event_counter++;
4197 #endif
4198
4199 data_len= uint4korr(ev->temp_buf + EVENT_LEN_OFFSET);
4200 if (trx_parser->feed_event(ev->temp_buf, data_len, fd_ev_p, false))
4201 {
4202 /*
4203 The transaction boundary parser found an error while parsing a
4204 sequence of events from the relaylog. As we don't know if the
4205 parsing has started from a reliable point (it might started in
4206 a relay log file that begins with the rest of a transaction
4207 that started in a previous relay log file), it is better to do
4208 nothing in this case. The boundary parser will fix itself once
4209 finding an event that represent a transaction boundary.
4210
4211 Suppose the following relaylog:
4212
4213 rl-bin.000011 | rl-bin.000012 | rl-bin.000013 | rl-bin-000014
4214 ---------------+---------------+---------------+---------------
4215 PREV_GTIDS | PREV_GTIDS | PREV_GTIDS | PREV_GTIDS
4216 (empty) | (UUID:1-2) | (UUID:1-2) | (UUID:1-2)
4217 ---------------+---------------+---------------+---------------
4218 XID | QUERY(INSERT) | QUERY(INSERT) | XID
4219 ---------------+---------------+---------------+---------------
4220 GTID(UUID:2) |
4221 ---------------+
4222 QUERY(CREATE |
4223 TABLE t1 ...) |
4224 ---------------+
4225 GTID(UUID:3) |
4226 ---------------+
4227 QUERY(BEGIN) |
4228 ---------------+
4229
4230 As it is impossible to determine the current Retrieved_Gtid_Set by only
4231 looking to the PREVIOUS_GTIDS on the last relay log file, and scanning
4232 events on it, we tried to find a relay log file that contains at least
4233 one GTID event during the backwards search.
4234
4235 In the example, we will find a GTID only in rl-bin.000011, as the
4236 UUID:3 transaction was spanned across 4 relay log files.
4237
4238 The transaction spanning can be caused by "FLUSH RELAY LOGS" commands
4239 on slave while it is queuing the transaction.
4240
4241 So, in order to correctly add UUID:3 into Retrieved_Gtid_Set, we need
4242 to parse the relay log starting on the file we found the last GTID
4243 queued to know if the transaction was fully retrieved or not.
4244
4245 Start scanning rl-bin.000011 after resetting the transaction parser
4246 will generate an error, as XID event is only expected inside a DML,
4247 but in this case, we can ignore this error and reset the parser.
4248 */
4249 trx_parser->reset();
4250 /*
4251 We also have to discard the GTID of the partial transaction that was
4252 not finished if there is one. This is needed supposing that an
4253 incomplete transaction was replicated with a GTID.
4254
4255 GTID(1), QUERY(BEGIN), QUERY(INSERT), ANONYMOUS_GTID, QUERY(DROP ...)
4256
4257 In the example above, without cleaning the gtid_partial_trx,
4258 the GTID(1) would be added to the Retrieved_Gtid_Set after the
4259 QUERY(DROP ...) event.
4260
4261 GTID(1), QUERY(BEGIN), QUERY(INSERT), GTID(2), QUERY(DROP ...)
4262
4263 In the example above the GTID(1) will also be discarded as the
4264 GTID(1) transaction is not complete.
4265 */
4266 if (!gtid_partial_trx->is_empty())
4267 {
4268 DBUG_PRINT("info", ("Discarding Gtid(%d, %lld) as the transaction "
4269 "wasn't complete and we found an error in the"
4270 "transaction boundary parser.",
4271 gtid_partial_trx->sidno,
4272 gtid_partial_trx->gno));
4273 gtid_partial_trx->clear();
4274 }
4275 }
4276
4277 Format_description_log_event *new_fd_ev_p= NULL;
4278 switch (ev->get_type_code())
4279 {
4280 case binary_log::FORMAT_DESCRIPTION_EVENT:
4281 new_fd_ev_p= static_cast<Format_description_log_event*>(ev);
4282 new_fd_ev_p->copy_crypto_data(*fd_ev_p);
4283 if (fd_ev_p != &fd_ev)
4284 delete fd_ev_p;
4285 fd_ev_p= new_fd_ev_p;
4286 break;
4287 case binary_log::ROTATE_EVENT:
4288 // do nothing; just accept this event and go to next
4289 break;
4290 case binary_log::PREVIOUS_GTIDS_LOG_EVENT:
4291 {
4292 seen_prev_gtids= true;
4293 // add events to sets
4294 Previous_gtids_log_event *prev_gtids_ev= (Previous_gtids_log_event *)ev;
4295 if (prev_gtids_ev->add_to_set(retrieved_gtids) != 0)
4296 {
4297 error= true;
4298 break;
4299 }
4300 #ifndef NDEBUG
4301 char* prev_buffer= prev_gtids_ev->get_str(NULL, NULL);
4302 DBUG_PRINT("info", ("Got Previous_gtids from file '%s': Gtid_set='%s'.",
4303 filename, prev_buffer));
4304 my_free(prev_buffer);
4305 #endif
4306 break;
4307 }
4308 case binary_log::GTID_LOG_EVENT:
4309 {
4310 /* If we didn't find any PREVIOUS_GTIDS in this file */
4311 if (!seen_prev_gtids)
4312 {
4313 my_error(ER_BINLOG_LOGICAL_CORRUPTION, MYF(0), filename,
4314 "The first global transaction identifier was read, but "
4315 "no other information regarding identifiers existing "
4316 "on the previous log files was found.");
4317 error= true;
4318 break;
4319 }
4320
4321 Gtid_log_event *gtid_ev= (Gtid_log_event *)ev;
4322 rpl_sidno sidno= gtid_ev->get_sidno(retrieved_gtids->get_sid_map());
4323 if (sidno < 0)
4324 {
4325 error= true;
4326 break;
4327 }
4328 else
4329 {
4330 if (retrieved_gtids->ensure_sidno(sidno) != RETURN_STATUS_OK)
4331 {
4332 error= true;
4333 break;
4334 }
4335 else
4336 {
4337 /*
4338 As are updating the transaction boundary parser while reading
4339 GTIDs from relay log files to fill the Retrieved_Gtid_Set, we
4340 should not add the GTID here as we don't know if the transaction
4341 is complete on the relay log yet.
4342 */
4343 gtid_partial_trx->set(sidno, gtid_ev->get_gno());
4344 }
4345 DBUG_PRINT("info", ("Found Gtid in relaylog file '%s': Gtid(%d, %lld).",
4346 filename, sidno, gtid_ev->get_gno()));
4347 }
4348 break;
4349 }
4350 case binary_log::START_ENCRYPTION_EVENT:
4351 if (fd_ev_p->start_decryption((Start_encryption_log_event*) ev))
4352 sql_print_warning("Error initializing decryption while reading GTIDs from relaylog");
4353 break;
4354 case binary_log::ANONYMOUS_GTID_LOG_EVENT:
4355 default:
4356 /*
4357 If we reached the end of a transaction after storing it's GTID
4358 in gtid_partial_trx variable, it is time to add this GTID to the
4359 retrieved_gtids set because the transaction is complete and there is no
4360 need for asking this transaction again.
4361 */
4362 if (trx_parser->is_not_inside_transaction())
4363 {
4364 if (!gtid_partial_trx->is_empty())
4365 {
4366 DBUG_PRINT("info", ("Adding Gtid to Retrieved_Gtid_Set as the "
4367 "transaction was completed at "
4368 "relaylog file '%s': Gtid(%d, %lld).",
4369 filename, gtid_partial_trx->sidno,
4370 gtid_partial_trx->gno));
4371 retrieved_gtids->_add_gtid(gtid_partial_trx->sidno,
4372 gtid_partial_trx->gno);
4373 gtid_partial_trx->clear();
4374 }
4375 }
4376 break;
4377 }
4378 if (ev != fd_ev_p)
4379 delete ev;
4380 }
4381
4382 if (log.error < 0)
4383 {
4384 // This is not a fatal error; the log may just be truncated.
4385 // @todo but what other errors could happen? IO error?
4386 sql_print_warning("Error reading GTIDs from relaylog: %d", log.error);
4387 }
4388
4389 if (fd_ev_p != &fd_ev)
4390 {
4391 delete fd_ev_p;
4392 fd_ev_p= &fd_ev;
4393 }
4394
4395 mysql_file_close(file, MYF(MY_WME));
4396 end_io_cache(&log);
4397
4398 #ifndef NDEBUG
4399 sql_print_information("%lu events read in relaylog file '%s' for updating "
4400 "Retrieved_Gtid_Set and/or IO thread transaction "
4401 "parser state.",
4402 event_counter, filename);
4403 #endif
4404
4405 DBUG_RETURN(error);
4406 }
4407
4408 /**
4409 Reads GTIDs from the given binlog file.
4410
4411 @param filename File to read from.
4412 @param all_gtids If not NULL, then the GTIDs from the
4413 Previous_gtids_log_event and from all Gtid_log_events are stored in
4414 this object.
4415 @param prev_gtids If not NULL, then the GTIDs from the
4416 Previous_gtids_log_events are stored in this object.
4417 @param first_gtid If not NULL, then the first GTID information from the
4418 file will be stored in this object.
4419 @param sid_map The sid_map object to use in the rpl_sidno generation
4420 of the Gtid_log_event. If lock is needed in the sid_map, the caller
4421 must hold it.
4422 @param verify_checksum Set to true to verify event checksums.
4423
4424 @retval GOT_GTIDS The file was successfully read and it contains
4425 both Gtid_log_events and Previous_gtids_log_events.
4426 This is only possible if either all_gtids or first_gtid are not null.
4427 @retval GOT_PREVIOUS_GTIDS The file was successfully read and it
4428 contains Previous_gtids_log_events but no Gtid_log_events.
4429 For binary logs, if no all_gtids and no first_gtid are specified,
4430 this function will be done right after reading the PREVIOUS_GTIDS
4431 regardless of the rest of the content of the binary log file.
4432 @retval NO_GTIDS The file was successfully read and it does not
4433 contain GTID events.
4434 @retval ERROR Out of memory, or IO error, or malformed event
4435 structure, or the file is malformed (e.g., contains Gtid_log_events
4436 but no Previous_gtids_log_event).
4437 @retval TRUNCATED The file was truncated before the end of the
4438 first Previous_gtids_log_event.
4439 */
4440 enum enum_read_gtids_from_binlog_status
4441 { GOT_GTIDS, GOT_PREVIOUS_GTIDS, NO_GTIDS, ERROR, TRUNCATED };
4442 static enum_read_gtids_from_binlog_status
read_gtids_from_binlog(const char * filename,Gtid_set * all_gtids,Gtid_set * prev_gtids,Gtid * first_gtid,Sid_map * sid_map,bool verify_checksum,bool is_relay_log)4443 read_gtids_from_binlog(const char *filename, Gtid_set *all_gtids,
4444 Gtid_set *prev_gtids, Gtid *first_gtid,
4445 Sid_map* sid_map,
4446 bool verify_checksum, bool is_relay_log)
4447 {
4448 DBUG_ENTER("read_gtids_from_binlog");
4449 DBUG_PRINT("info", ("Opening file %s", filename));
4450
4451 /*
4452 Create a Format_description_log_event that is used to read the
4453 first event of the log.
4454 */
4455 Format_description_log_event fd_ev(BINLOG_VERSION), *fd_ev_p= &fd_ev;
4456 if (!fd_ev.is_valid())
4457 DBUG_RETURN(ERROR);
4458
4459 File file;
4460 IO_CACHE log;
4461
4462 #ifndef NDEBUG
4463 unsigned long event_counter= 0;
4464 /*
4465 We assert here that both all_gtids and prev_gtids, if specified,
4466 uses the same sid_map as the one passed as a parameter. This is just
4467 to ensure that, if the sid_map needed some lock and was locked by
4468 the caller, the lock applies to all the GTID sets this function is
4469 dealing with.
4470 */
4471 if (all_gtids)
4472 assert(all_gtids->get_sid_map() == sid_map);
4473 if (prev_gtids)
4474 assert(prev_gtids->get_sid_map() == sid_map);
4475 #endif
4476
4477 const char *errmsg= NULL;
4478 if ((file= open_binlog_file(&log, filename, &errmsg)) < 0)
4479 {
4480 sql_print_error("%s", errmsg);
4481 /*
4482 We need to revisit the recovery procedure for relay log
4483 files. Currently, it is called after this routine.
4484 /Alfranio
4485 */
4486 DBUG_RETURN(TRUNCATED);
4487 }
4488
4489 fd_ev_p->reset_crypto();
4490
4491 /*
4492 Seek for Previous_gtids_log_event and Gtid_log_event events to
4493 gather information what has been processed so far.
4494 */
4495 my_b_seek(&log, BIN_LOG_HEADER_SIZE);
4496 Log_event *ev= NULL;
4497 enum_read_gtids_from_binlog_status ret= NO_GTIDS;
4498 bool done= false;
4499 bool seen_first_gtid= false;
4500 while (!done &&
4501 (ev= Log_event::read_log_event(&log, 0, fd_ev_p, verify_checksum)) !=
4502 NULL)
4503 {
4504 #ifndef NDEBUG
4505 event_counter++;
4506 #endif
4507 DBUG_PRINT("info", ("Read event of type %s", ev->get_type_str()));
4508 Format_description_log_event *new_fd_ev_p= NULL;
4509 switch (ev->get_type_code())
4510 {
4511 case binary_log::FORMAT_DESCRIPTION_EVENT:
4512 new_fd_ev_p= static_cast<Format_description_log_event*>(ev);
4513 new_fd_ev_p->copy_crypto_data(*fd_ev_p);
4514 if (fd_ev_p != &fd_ev)
4515 delete fd_ev_p;
4516 fd_ev_p= new_fd_ev_p;
4517 break;
4518 case binary_log::ROTATE_EVENT:
4519 // do nothing; just accept this event and go to next
4520 break;
4521 case binary_log::PREVIOUS_GTIDS_LOG_EVENT:
4522 {
4523 ret= GOT_PREVIOUS_GTIDS;
4524 // add events to sets
4525 Previous_gtids_log_event *prev_gtids_ev=
4526 (Previous_gtids_log_event *)ev;
4527 if (all_gtids != NULL && prev_gtids_ev->add_to_set(all_gtids) != 0)
4528 ret= ERROR, done= true;
4529 else if (prev_gtids != NULL && prev_gtids_ev->add_to_set(prev_gtids) != 0)
4530 ret= ERROR, done= true;
4531 #ifndef NDEBUG
4532 char* prev_buffer= prev_gtids_ev->get_str(NULL, NULL);
4533 DBUG_PRINT("info", ("Got Previous_gtids from file '%s': Gtid_set='%s'.",
4534 filename, prev_buffer));
4535 my_free(prev_buffer);
4536 #endif
4537 /*
4538 If this is not a relay log, the previous_gtids were asked and no
4539 all_gtids neither first_gtid were asked, it is fine to consider the
4540 job as done.
4541 */
4542 if (!is_relay_log && prev_gtids != NULL &&
4543 all_gtids == NULL && first_gtid == NULL)
4544 done= true;
4545 DBUG_EXECUTE_IF("inject_fault_bug16502579", {
4546 DBUG_PRINT("debug", ("PREVIOUS_GTIDS_LOG_EVENT found. "
4547 "Injected ret=NO_GTIDS."));
4548 if (ret == GOT_PREVIOUS_GTIDS)
4549 {
4550 ret=NO_GTIDS;
4551 done= false;
4552 }
4553 });
4554 break;
4555 }
4556 case binary_log::GTID_LOG_EVENT:
4557 {
4558 if (ret != GOT_GTIDS)
4559 {
4560 if (ret != GOT_PREVIOUS_GTIDS)
4561 {
4562 /*
4563 Since this routine is run on startup, there may not be a
4564 THD instance. Therefore, ER(X) cannot be used.
4565 */
4566 const char* msg_fmt= (current_thd != NULL) ?
4567 ER(ER_BINLOG_LOGICAL_CORRUPTION) :
4568 ER_DEFAULT(ER_BINLOG_LOGICAL_CORRUPTION);
4569 my_printf_error(ER_BINLOG_LOGICAL_CORRUPTION,
4570 msg_fmt, MYF(0),
4571 filename,
4572 "The first global transaction identifier was read, but "
4573 "no other information regarding identifiers existing "
4574 "on the previous log files was found.");
4575 ret= ERROR, done= true;
4576 break;
4577 }
4578 else
4579 ret= GOT_GTIDS;
4580 }
4581 /*
4582 When this is a relaylog, we just check if the relay log contains at
4583 least one Gtid_log_event, so that we can distinguish the return values
4584 GOT_GTID and GOT_PREVIOUS_GTIDS. We don't need to read anything else
4585 from the relay log.
4586 When this is a binary log, if all_gtids is requested (i.e., NOT NULL),
4587 we should continue to read all gtids. If just first_gtid was requested,
4588 we will be done after storing this Gtid_log_event info on it.
4589 */
4590 if (is_relay_log)
4591 {
4592 ret= GOT_GTIDS, done= true;
4593 }
4594 else
4595 {
4596 Gtid_log_event *gtid_ev= (Gtid_log_event *)ev;
4597 rpl_sidno sidno= gtid_ev->get_sidno(sid_map);
4598 if (sidno < 0)
4599 ret= ERROR, done= true;
4600 else
4601 {
4602 if (all_gtids)
4603 {
4604 if (all_gtids->ensure_sidno(sidno) != RETURN_STATUS_OK)
4605 ret= ERROR, done= true;
4606 all_gtids->_add_gtid(sidno, gtid_ev->get_gno());
4607 DBUG_PRINT("info", ("Got Gtid from file '%s': Gtid(%d, %lld).",
4608 filename, sidno, gtid_ev->get_gno()));
4609 }
4610
4611 /* If the first GTID was requested, stores it */
4612 if (first_gtid && !seen_first_gtid)
4613 {
4614 first_gtid->set(sidno, gtid_ev->get_gno());
4615 seen_first_gtid= true;
4616 /* If the first_gtid was the only thing requested, we are done */
4617 if (all_gtids == NULL)
4618 ret= GOT_GTIDS, done= true;
4619 }
4620 }
4621 }
4622 break;
4623 }
4624 case binary_log::START_ENCRYPTION_EVENT:
4625 {
4626 if (fd_ev_p->start_decryption(static_cast<Start_encryption_log_event*>(ev)))
4627 sql_print_warning("Error initializing decryption while reading GTIDs from binary log");
4628 // in case start_decryption fails next call to read_log_event will fail too
4629 // this failure will be handled outside the loop
4630 break;
4631 }
4632
4633 case binary_log::ANONYMOUS_GTID_LOG_EVENT:
4634 {
4635 /*
4636 When this is a relaylog, we just check if it contains
4637 at least one Anonymous_gtid_log_event after initialization
4638 (FDs, Rotates and PREVIOUS_GTIDS), so that we can distinguish the
4639 return values GOT_GTID and GOT_PREVIOUS_GTIDS.
4640 We don't need to read anything else from the relay log.
4641 */
4642 if (is_relay_log)
4643 {
4644 ret= GOT_GTIDS;
4645 done= true;
4646 break;
4647 }
4648 assert(prev_gtids == NULL ? true : all_gtids != NULL ||
4649 first_gtid != NULL);
4650 }
4651 // Fall through.
4652 default:
4653 // if we found any other event type without finding a
4654 // previous_gtids_log_event, then the rest of this binlog
4655 // cannot contain gtids
4656 if (ret != GOT_GTIDS && ret != GOT_PREVIOUS_GTIDS)
4657 done= true;
4658 /*
4659 The GTIDs of the relaylog files will be handled later
4660 because of the possibility of transactions be spanned
4661 along distinct relaylog files.
4662 So, if we found an ordinary event without finding the
4663 GTID but we already found the PREVIOUS_GTIDS, this probably
4664 means that the event is from a transaction that started on
4665 previous relaylog file.
4666 */
4667 if (ret == GOT_PREVIOUS_GTIDS && is_relay_log)
4668 done= true;
4669 break;
4670 }
4671 if (ev != fd_ev_p)
4672 delete ev;
4673 DBUG_PRINT("info", ("done=%d", done));
4674 }
4675
4676 if (log.error < 0)
4677 {
4678 // This is not a fatal error; the log may just be truncated.
4679
4680 // @todo but what other errors could happen? IO error?
4681 sql_print_warning("Error reading GTIDs from binary log: %d", log.error);
4682 }
4683
4684 if (fd_ev_p != &fd_ev)
4685 {
4686 delete fd_ev_p;
4687 fd_ev_p= &fd_ev;
4688 }
4689
4690 mysql_file_close(file, MYF(MY_WME));
4691 end_io_cache(&log);
4692
4693 if (all_gtids)
4694 all_gtids->dbug_print("all_gtids");
4695 else
4696 DBUG_PRINT("info", ("all_gtids==NULL"));
4697 if (prev_gtids)
4698 prev_gtids->dbug_print("prev_gtids");
4699 else
4700 DBUG_PRINT("info", ("prev_gtids==NULL"));
4701 if (first_gtid == NULL)
4702 DBUG_PRINT("info", ("first_gtid==NULL"));
4703 else if (first_gtid->sidno == 0)
4704 DBUG_PRINT("info", ("first_gtid.sidno==0"));
4705 else
4706 first_gtid->dbug_print(sid_map, "first_gtid");
4707
4708 DBUG_PRINT("info", ("returning %d", ret));
4709 #ifndef NDEBUG
4710 if (!is_relay_log && prev_gtids != NULL &&
4711 all_gtids == NULL && first_gtid == NULL)
4712 sql_print_information("Read %lu events from binary log file '%s' to "
4713 "determine the GTIDs purged from binary logs.",
4714 event_counter, filename);
4715 #endif
4716 DBUG_RETURN(ret);
4717 }
4718
find_first_log_not_in_gtid_set(char * binlog_file_name,const Gtid_set * gtid_set,Gtid * first_gtid,std::string & errmsg)4719 bool MYSQL_BIN_LOG::find_first_log_not_in_gtid_set(char *binlog_file_name,
4720 const Gtid_set *gtid_set,
4721 Gtid *first_gtid,
4722 std::string &errmsg)
4723 {
4724 DBUG_ENTER("MYSQL_BIN_LOG::gtid_read_start_binlog");
4725 /*
4726 Gather the set of files to be accessed.
4727 */
4728 list<string> filename_list;
4729 LOG_INFO linfo;
4730 int error;
4731
4732 list<string>::reverse_iterator rit;
4733 Gtid_set binlog_previous_gtid_set(gtid_set->get_sid_map());
4734
4735 mysql_mutex_lock(&LOCK_index);
4736 for (error= find_log_pos(&linfo, NULL, false/*need_lock_index=false*/);
4737 !error; error= find_next_log(&linfo, false/*need_lock_index=false*/))
4738 {
4739 DBUG_PRINT("info", ("read log filename '%s'", linfo.log_file_name));
4740 filename_list.push_back(string(linfo.log_file_name));
4741 }
4742 mysql_mutex_unlock(&LOCK_index);
4743 if (error != LOG_INFO_EOF)
4744 {
4745 errmsg.assign(
4746 "Failed to read the binary log index file while "
4747 "looking for the oldest binary log that contains any GTID "
4748 "that is not in the given gtid set");
4749 error= -1;
4750 goto end;
4751 }
4752
4753 if (filename_list.empty())
4754 {
4755 errmsg.assign(
4756 "Could not find first log file name in binary log index file "
4757 "while looking for the oldest binary log that contains any GTID "
4758 "that is not in the given gtid set");
4759 error= -2;
4760 goto end;
4761 }
4762
4763 /*
4764 Iterate over all the binary logs in reverse order, and read only
4765 the Previous_gtids_log_event, to find the first one, that is the
4766 subset of the given gtid set. Since every binary log begins with
4767 a Previous_gtids_log_event, that contains all GTIDs in all
4768 previous binary logs.
4769 We also ask for the first GTID in the binary log to know if we
4770 should send the FD event with the "created" field cleared or not.
4771 */
4772 DBUG_PRINT("info", ("Iterating backwards through binary logs, and reading "
4773 "only the Previous_gtids_log_event, to find the first "
4774 "one, that is the subset of the given gtid set."));
4775 rit= filename_list.rbegin();
4776 error= 0;
4777 while (rit != filename_list.rend())
4778 {
4779 binlog_previous_gtid_set.clear();
4780 const char *filename= rit->c_str();
4781 DBUG_PRINT("info", ("Read Previous_gtids_log_event from filename='%s'",
4782 filename));
4783 switch (read_gtids_from_binlog(filename, NULL, &binlog_previous_gtid_set,
4784 first_gtid,
4785 binlog_previous_gtid_set.get_sid_map(),
4786 opt_master_verify_checksum, is_relay_log))
4787 {
4788 case ERROR:
4789 errmsg.assign(
4790 "Error reading header of binary log while looking for "
4791 "the oldest binary log that contains any GTID that is not in "
4792 "the given gtid set");
4793 error= -3;
4794 goto end;
4795 case NO_GTIDS:
4796 errmsg.assign(
4797 "Found old binary log without GTIDs while looking for "
4798 "the oldest binary log that contains any GTID that is not in "
4799 "the given gtid set");
4800 error= -4;
4801 goto end;
4802 case GOT_GTIDS:
4803 case GOT_PREVIOUS_GTIDS:
4804 if (binlog_previous_gtid_set.is_subset(gtid_set))
4805 {
4806 strcpy(binlog_file_name, filename);
4807 /*
4808 Verify that the selected binlog is not the first binlog,
4809 */
4810 DBUG_EXECUTE_IF("slave_reconnect_with_gtid_set_executed",
4811 assert(strcmp(filename_list.begin()->c_str(),
4812 binlog_file_name) != 0););
4813 goto end;
4814 }
4815 case TRUNCATED:
4816 break;
4817 }
4818
4819 rit++;
4820 }
4821
4822 if (rit == filename_list.rend())
4823 {
4824 report_missing_gtids(&binlog_previous_gtid_set, gtid_set, errmsg);
4825 error= -5;
4826 }
4827
4828 end:
4829 if (error)
4830 DBUG_PRINT("error", ("'%s'", errmsg.c_str()));
4831 filename_list.clear();
4832 DBUG_PRINT("info", ("returning %d", error));
4833 DBUG_RETURN(error != 0 ? true : false);
4834 }
4835
init_gtid_sets(Gtid_set * all_gtids,Gtid_set * lost_gtids,bool verify_checksum,bool need_lock,Transaction_boundary_parser * trx_parser,Gtid * gtid_partial_trx,bool is_server_starting)4836 bool MYSQL_BIN_LOG::init_gtid_sets(Gtid_set *all_gtids, Gtid_set *lost_gtids,
4837 bool verify_checksum, bool need_lock,
4838 Transaction_boundary_parser *trx_parser,
4839 Gtid *gtid_partial_trx,
4840 bool is_server_starting)
4841 {
4842 DBUG_ENTER("MYSQL_BIN_LOG::init_gtid_sets");
4843 DBUG_PRINT("info", ("lost_gtids=%p; so we are recovering a %s log; is_relay_log=%d",
4844 lost_gtids, lost_gtids == NULL ? "relay" : "binary",
4845 is_relay_log));
4846
4847 /*
4848 If this is a relay log, we must have the IO thread Master_info trx_parser
4849 in order to correctly feed it with relay log events.
4850 */
4851 #ifndef NDEBUG
4852 if (is_relay_log)
4853 {
4854 assert(trx_parser != NULL);
4855 assert(lost_gtids == NULL);
4856 }
4857 #endif
4858
4859 /*
4860 Acquires the necessary locks to ensure that logs are not either
4861 removed or updated when we are reading from it.
4862 */
4863 if (need_lock)
4864 {
4865 // We don't need LOCK_log if we are only going to read the initial
4866 // Prevoius_gtids_log_event and ignore the Gtid_log_events.
4867 if (all_gtids != NULL)
4868 mysql_mutex_lock(&LOCK_log);
4869 mysql_mutex_lock(&LOCK_index);
4870 global_sid_lock->wrlock();
4871 }
4872 else
4873 {
4874 if (all_gtids != NULL)
4875 mysql_mutex_assert_owner(&LOCK_log);
4876 mysql_mutex_assert_owner(&LOCK_index);
4877 global_sid_lock->assert_some_wrlock();
4878 }
4879
4880 // Gather the set of files to be accessed.
4881 list<string> filename_list;
4882 LOG_INFO linfo;
4883 int error;
4884
4885 list<string>::iterator it;
4886 list<string>::reverse_iterator rit;
4887 bool reached_first_file= false;
4888
4889 /* Initialize the sid_map to be used in read_gtids_from_binlog */
4890 Sid_map *sid_map= NULL;
4891 if (all_gtids)
4892 sid_map= all_gtids->get_sid_map();
4893 else if (lost_gtids)
4894 sid_map= lost_gtids->get_sid_map();
4895
4896 for (error= find_log_pos(&linfo, NULL, false/*need_lock_index=false*/); !error;
4897 error= find_next_log(&linfo, false/*need_lock_index=false*/))
4898 {
4899 DBUG_PRINT("info", ("read log filename '%s'", linfo.log_file_name));
4900 filename_list.push_back(string(linfo.log_file_name));
4901 }
4902 if (error != LOG_INFO_EOF)
4903 {
4904 DBUG_PRINT("error", ("Error reading %s index",
4905 is_relay_log ? "relaylog" : "binlog"));
4906 goto end;
4907 }
4908 /*
4909 On server starting, one new empty binlog file is created and
4910 its file name is put into index file before initializing
4911 GLOBAL.GTID_EXECUTED AND GLOBAL.GTID_PURGED, it is not the
4912 last binlog file before the server restarts, so we remove
4913 its file name from filename_list.
4914 */
4915 if (is_server_starting && !is_relay_log && !filename_list.empty())
4916 filename_list.pop_back();
4917
4918 error= 0;
4919
4920 if (all_gtids != NULL)
4921 {
4922 DBUG_PRINT("info", ("Iterating backwards through %s logs, "
4923 "looking for the last %s log that contains "
4924 "a Previous_gtids_log_event.",
4925 is_relay_log ? "relay" : "binary",
4926 is_relay_log ? "relay" : "binary"));
4927 // Iterate over all files in reverse order until we find one that
4928 // contains a Previous_gtids_log_event.
4929 rit= filename_list.rbegin();
4930 bool can_stop_reading= false;
4931 reached_first_file= (rit == filename_list.rend());
4932 DBUG_PRINT("info", ("filename='%s' reached_first_file=%d",
4933 reached_first_file ? "" : rit->c_str(),
4934 reached_first_file));
4935 while (!can_stop_reading && !reached_first_file)
4936 {
4937 const char *filename= rit->c_str();
4938 assert(rit != filename_list.rend());
4939 rit++;
4940 reached_first_file= (rit == filename_list.rend());
4941 DBUG_PRINT("info", ("filename='%s' can_stop_reading=%d "
4942 "reached_first_file=%d, ",
4943 filename, can_stop_reading, reached_first_file));
4944 switch (read_gtids_from_binlog(filename, all_gtids,
4945 reached_first_file ? lost_gtids : NULL,
4946 NULL/* first_gtid */,
4947 sid_map, verify_checksum, is_relay_log))
4948 {
4949 case ERROR:
4950 {
4951 error= 1;
4952 goto end;
4953 }
4954 case GOT_GTIDS:
4955 {
4956 can_stop_reading= true;
4957 break;
4958 }
4959 case GOT_PREVIOUS_GTIDS:
4960 {
4961 /*
4962 If this is a binlog file, it is enough to have GOT_PREVIOUS_GTIDS.
4963 If this is a relaylog file, we need to find at least one GTID to
4964 start parsing the relay log to add GTID of transactions that might
4965 have spanned in distinct relaylog files.
4966 */
4967 if (!is_relay_log)
4968 can_stop_reading= true;
4969 break;
4970 }
4971 case NO_GTIDS:
4972 {
4973 /*
4974 Mysql server iterates backwards through binary logs, looking for
4975 the last binary log that contains a Previous_gtids_log_event for
4976 gathering the set of gtid_executed on server start. This may take
4977 very long time if it has many binary logs and almost all of them
4978 are out of filesystem cache. So if the binlog_gtid_simple_recovery
4979 is enabled, and the last binary log does not contain any GTID
4980 event, do not read any more binary logs, GLOBAL.GTID_EXECUTED and
4981 GLOBAL.GTID_PURGED should be empty in the case.
4982 */
4983 if (binlog_gtid_simple_recovery && is_server_starting &&
4984 !is_relay_log)
4985 {
4986 assert(all_gtids->is_empty());
4987 assert(lost_gtids->is_empty());
4988 goto end;
4989 }
4990 /*FALLTHROUGH*/
4991 }
4992 case TRUNCATED:
4993 {
4994 break;
4995 }
4996 }
4997 }
4998
4999 /*
5000 If we use GTIDs and have partial transactions on the relay log,
5001 must check if it ends on next relay log files.
5002 We also need to feed the boundary parser with the rest of the
5003 relay log to put it in the correct state before receiving new
5004 events from the master in the case of GTID auto positioning be
5005 disabled.
5006 */
5007 if (is_relay_log && filename_list.size() > 0)
5008 {
5009 /*
5010 Suppose the following relaylog:
5011
5012 rl-bin.000001 | rl-bin.000002 | rl-bin.000003 | rl-bin-000004
5013 ---------------+---------------+---------------+---------------
5014 PREV_GTIDS | PREV_GTIDS | PREV_GTIDS | PREV_GTIDS
5015 (empty) | (UUID:1) | (UUID:1) | (UUID:1)
5016 ---------------+---------------+---------------+---------------
5017 GTID(UUID:1) | QUERY(INSERT) | QUERY(INSERT) | XID
5018 ---------------+---------------+---------------+---------------
5019 QUERY(CREATE |
5020 TABLE t1 ...) |
5021 ---------------+
5022 GTID(UUID:2) |
5023 ---------------+
5024 QUERY(BEGIN) |
5025 ---------------+
5026
5027 As it is impossible to determine the current Retrieved_Gtid_Set by only
5028 looking to the PREVIOUS_GTIDS on the last relay log file, and scanning
5029 events on it, we tried to find a relay log file that contains at least
5030 one GTID event during the backwards search.
5031
5032 In the example, we will find a GTID only in rl-bin.000001, as the
5033 UUID:2 transaction was spanned across 4 relay log files.
5034
5035 The transaction spanning can be caused by "FLUSH RELAY LOGS" commands
5036 on slave while it is queuing the transaction.
5037
5038 So, in order to correctly add UUID:2 into Retrieved_Gtid_Set, we need
5039 to parse the relay log starting on the file we found the last GTID
5040 queued to know if the transaction was fully retrieved or not.
5041 */
5042
5043 /*
5044 Adjust the reverse iterator to point to the relaylog file we
5045 need to start parsing, as it was incremented after generating
5046 the relay log file name.
5047 */
5048 assert(rit != filename_list.rbegin());
5049 rit--;
5050 assert(rit != filename_list.rend());
5051 /* Reset the transaction parser before feeding it with events */
5052 trx_parser->reset();
5053 gtid_partial_trx->clear();
5054
5055 DBUG_PRINT("info", ("Iterating forwards through relay logs, "
5056 "updating the Retrieved_Gtid_Set and updating "
5057 "IO thread trx parser before start."));
5058 for (it= find(filename_list.begin(), filename_list.end(), *rit);
5059 it != filename_list.end(); it++)
5060 {
5061 const char *filename= it->c_str();
5062 DBUG_PRINT("info", ("filename='%s'", filename));
5063 if (read_gtids_and_update_trx_parser_from_relaylog(filename, all_gtids,
5064 true, trx_parser,
5065 gtid_partial_trx))
5066 {
5067 error= 1;
5068 goto end;
5069 }
5070 }
5071 }
5072 }
5073 if (lost_gtids != NULL && !reached_first_file)
5074 {
5075 /*
5076 This branch is only reacheable by a binary log. The relay log
5077 don't need to get lost_gtids information.
5078
5079 A 5.6 server sets GTID_PURGED by rotating the binary log.
5080
5081 A 5.6 server that had recently enabled GTIDs and set GTID_PURGED
5082 would have a sequence of binary logs like:
5083
5084 master-bin.N : No PREVIOUS_GTIDS (GTID wasn't enabled)
5085 master-bin.N+1: Has an empty PREVIOUS_GTIDS and a ROTATE
5086 (GTID was enabled on startup)
5087 master-bin.N+2: Has a PREVIOUS_GTIDS with the content set by a
5088 SET @@GLOBAL.GTID_PURGED + has GTIDs of some
5089 transactions.
5090
5091 If this 5.6 server be upgraded to 5.7 keeping its binary log files,
5092 this routine will have to find the first binary log that contains a
5093 PREVIOUS_GTIDS + a GTID event to ensure that the content of the
5094 GTID_PURGED will be correctly set (assuming binlog_gtid_simple_recovery
5095 is not enabled).
5096 */
5097 DBUG_PRINT("info", ("Iterating forwards through binary logs, looking for "
5098 "the first binary log that contains both a "
5099 "Previous_gtids_log_event and a Gtid_log_event."));
5100 assert(!is_relay_log);
5101 for (it= filename_list.begin(); it != filename_list.end(); it++)
5102 {
5103 /*
5104 We should pass a first_gtid to read_gtids_from_binlog when
5105 binlog_gtid_simple_recovery is disabled, or else it will return
5106 right after reading the PREVIOUS_GTIDS event to avoid stall on
5107 reading the whole binary log.
5108 */
5109 Gtid first_gtid= {0, 0};
5110 const char *filename= it->c_str();
5111 DBUG_PRINT("info", ("filename='%s'", filename));
5112 switch (read_gtids_from_binlog(filename, NULL, lost_gtids,
5113 binlog_gtid_simple_recovery ? NULL :
5114 &first_gtid,
5115 sid_map, verify_checksum, is_relay_log))
5116 {
5117 case ERROR:
5118 {
5119 error= 1;
5120 /*FALLTHROUGH*/
5121 }
5122 case GOT_GTIDS:
5123 {
5124 goto end;
5125 }
5126 case NO_GTIDS:
5127 case GOT_PREVIOUS_GTIDS:
5128 {
5129 /*
5130 Mysql server iterates forwards through binary logs, looking for
5131 the first binary log that contains both Previous_gtids_log_event
5132 and gtid_log_event for gathering the set of gtid_purged on server
5133 start. It also iterates forwards through binary logs, looking for
5134 the first binary log that contains both Previous_gtids_log_event
5135 and gtid_log_event for gathering the set of gtid_purged when
5136 purging binary logs. This may take very long time if it has many
5137 binary logs and almost all of them are out of filesystem cache.
5138 So if the binlog_gtid_simple_recovery is enabled, we just
5139 initialize GLOBAL.GTID_PURGED from the first binary log, do not
5140 read any more binary logs.
5141 */
5142 if (binlog_gtid_simple_recovery)
5143 goto end;
5144 /*FALLTHROUGH*/
5145 }
5146 case TRUNCATED:
5147 {
5148 break;
5149 }
5150 }
5151 }
5152 }
5153 end:
5154 if (all_gtids)
5155 all_gtids->dbug_print("all_gtids");
5156 if (lost_gtids)
5157 lost_gtids->dbug_print("lost_gtids");
5158 if (need_lock)
5159 {
5160 global_sid_lock->unlock();
5161 mysql_mutex_unlock(&LOCK_index);
5162 if (all_gtids != NULL)
5163 mysql_mutex_unlock(&LOCK_log);
5164 }
5165 filename_list.clear();
5166 DBUG_PRINT("info", ("returning %d", error));
5167 DBUG_RETURN(error != 0 ? true : false);
5168 }
5169
5170
5171 /**
5172 Open a (new) binlog file.
5173
5174 - Open the log file and the index file. Register the new
5175 file name in it
5176 - When calling this when the file is in use, you must have a locks
5177 on LOCK_log and LOCK_index.
5178
5179 @retval
5180 0 ok
5181 @retval
5182 1 error
5183 */
5184
open_binlog(const char * log_name,const char * new_name,ulong max_size_arg,bool null_created_arg,bool need_lock_index,bool need_sid_lock,Format_description_log_event * extra_description_event)5185 bool MYSQL_BIN_LOG::open_binlog(const char *log_name,
5186 const char *new_name,
5187 ulong max_size_arg,
5188 bool null_created_arg,
5189 bool need_lock_index,
5190 bool need_sid_lock,
5191 Format_description_log_event *extra_description_event)
5192 {
5193 // lock_index must be acquired *before* sid_lock.
5194 assert(need_sid_lock || !need_lock_index);
5195 DBUG_ENTER("MYSQL_BIN_LOG::open_binlog(const char *, ...)");
5196 DBUG_PRINT("enter",("base filename: %s", log_name));
5197 const char *log_to_encrypt= is_relay_log ? "relay_log" : "binlog";
5198
5199 mysql_mutex_assert_owner(get_log_lock());
5200
5201 if (init_and_set_log_file_name(log_name, new_name))
5202 {
5203 sql_print_error("MYSQL_BIN_LOG::open failed to generate new file name.");
5204 DBUG_RETURN(1);
5205 }
5206
5207 DBUG_PRINT("info", ("generated filename: %s", log_file_name));
5208
5209 #ifdef HAVE_REPLICATION
5210 if (open_purge_index_file(TRUE) ||
5211 register_create_index_entry(log_file_name) ||
5212 sync_purge_index_file() ||
5213 DBUG_EVALUATE_IF("fault_injection_registering_index", 1, 0))
5214 {
5215 /**
5216 @todo: although this was introduced to appease valgrind
5217 when injecting emulated faults using fault_injection_registering_index
5218 it may be good to consider what actually happens when
5219 open_purge_index_file succeeds but register or sync fails.
5220
5221 Perhaps we might need the code below in MYSQL_BIN_LOG::cleanup
5222 for "real life" purposes as well?
5223 */
5224 DBUG_EXECUTE_IF("fault_injection_registering_index", {
5225 if (my_b_inited(&purge_index_file))
5226 {
5227 end_io_cache(&purge_index_file);
5228 my_close(purge_index_file.file, MYF(0));
5229 }
5230 });
5231
5232 sql_print_error("MYSQL_BIN_LOG::open failed to sync the index file.");
5233 DBUG_RETURN(1);
5234 }
5235 DBUG_EXECUTE_IF("crash_create_non_critical_before_update_index", DBUG_SUICIDE(););
5236 #endif
5237
5238 write_error= 0;
5239
5240 /* open the main log file */
5241 if (open(
5242 #ifdef HAVE_PSI_INTERFACE
5243 m_key_file_log,
5244 #endif
5245 log_name, new_name))
5246 {
5247 #ifdef HAVE_REPLICATION
5248 close_purge_index_file();
5249 #endif
5250 DBUG_RETURN(1); /* all warnings issued */
5251 }
5252
5253 max_size= max_size_arg;
5254
5255 open_count++;
5256
5257 bool write_file_name_to_index_file=0;
5258
5259 /* This must be before goto err. */
5260 #ifndef NDEBUG
5261 binary_log_debug::debug_pretend_version_50034_in_binlog=
5262 DBUG_EVALUATE_IF("pretend_version_50034_in_binlog", true, false);
5263 #endif
5264 Format_description_log_event s(BINLOG_VERSION);
5265
5266 if (!my_b_filelength(&log_file))
5267 {
5268 /*
5269 The binary log file was empty (probably newly created)
5270 This is the normal case and happens when the user doesn't specify
5271 an extension for the binary log files.
5272 In this case we write a standard header to it.
5273 */
5274 if (my_b_safe_write(&log_file, (uchar*) BINLOG_MAGIC,
5275 BIN_LOG_HEADER_SIZE))
5276 goto err;
5277 bytes_written+= BIN_LOG_HEADER_SIZE;
5278 write_file_name_to_index_file= 1;
5279 }
5280
5281 /*
5282 don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache
5283 as we won't be able to reset it later
5284 */
5285 if (io_cache_type == WRITE_CACHE)
5286 {
5287 s.common_header->flags|= LOG_EVENT_BINLOG_IN_USE_F;
5288 }
5289
5290 if (is_relay_log)
5291 {
5292 /* relay-log */
5293 if (relay_log_checksum_alg == binary_log::BINLOG_CHECKSUM_ALG_UNDEF)
5294 {
5295 /* inherit master's A descriptor if one has been received */
5296 if (opt_slave_sql_verify_checksum == 0)
5297 /* otherwise use slave's local preference of RL events verification */
5298 relay_log_checksum_alg= binary_log::BINLOG_CHECKSUM_ALG_OFF;
5299 else
5300 relay_log_checksum_alg= static_cast<enum_binlog_checksum_alg>
5301 (binlog_checksum_options);
5302 }
5303 s.common_footer->checksum_alg= relay_log_checksum_alg;
5304 }
5305 else
5306 /* binlog */
5307 s.common_footer->checksum_alg= static_cast<enum_binlog_checksum_alg>
5308 (binlog_checksum_options);
5309
5310 crypto.disable();
5311 assert((s.common_footer)->checksum_alg !=
5312 binary_log::BINLOG_CHECKSUM_ALG_UNDEF);
5313 if (!s.is_valid())
5314 goto err;
5315 s.dont_set_created= null_created_arg;
5316 /* Set LOG_EVENT_RELAY_LOG_F flag for relay log's FD */
5317 if (is_relay_log)
5318 s.set_relay_log_event();
5319 if (s.write(&log_file))
5320 goto err;
5321 bytes_written+= s.common_header->data_written;
5322
5323 if (encrypt_binlog)
5324 {
5325 if (crypto.load_latest_binlog_key())
5326 {
5327 sql_print_error("Failed to fetch or create percona_binlog key from/in keyring and thus "
5328 "failed to initialize %s encryption. Have you enabled "
5329 "keyring plugin?", log_to_encrypt);
5330 goto err;
5331 }
5332 DBUG_EXECUTE_IF("check_consecutive_binlog_key_versions",
5333 { static uint next_key_version = 1;
5334 assert(crypto.get_key_version() == next_key_version++);});
5335
5336 uchar nonce[Binlog_crypt_data::BINLOG_NONCE_LENGTH];
5337 memset(nonce, 0, Binlog_crypt_data::BINLOG_NONCE_LENGTH);
5338 if (my_rand_buffer(nonce, sizeof(nonce)))
5339 goto err;
5340
5341 Start_encryption_log_event sele(1, crypto.get_key_version(), nonce);
5342 sele.common_footer->checksum_alg= s.common_footer->checksum_alg;
5343 if (write_to_file(&sele))
5344 {
5345 sql_print_error("Failed to write Start_encryption event to binary log and thus "
5346 "failed to initialize %s encryption.", log_to_encrypt);
5347 goto err;
5348 }
5349 bytes_written+= sele.common_header->data_written;
5350
5351 if (crypto.init_with_loaded_key(sele.crypto_scheme, nonce))
5352 {
5353 sql_print_error("Failed to initialize %s encryption.", log_to_encrypt);
5354 goto err;
5355 }
5356 }
5357
5358 /*
5359 We need to revisit this code and improve it.
5360 See further comments in the mysqld.
5361 /Alfranio
5362 */
5363 if (current_thd)
5364 {
5365 Gtid_set logged_gtids_binlog(global_sid_map, global_sid_lock);
5366 Gtid_set* previous_logged_gtids;
5367
5368 if (is_relay_log)
5369 previous_logged_gtids= previous_gtid_set_relaylog;
5370 else
5371 previous_logged_gtids= &logged_gtids_binlog;
5372
5373 if (need_sid_lock)
5374 global_sid_lock->wrlock();
5375 else
5376 global_sid_lock->assert_some_wrlock();
5377
5378 if (!is_relay_log)
5379 {
5380 const Gtid_set *executed_gtids= gtid_state->get_executed_gtids();
5381 const Gtid_set *gtids_only_in_table=
5382 gtid_state->get_gtids_only_in_table();
5383 /* logged_gtids_binlog= executed_gtids - gtids_only_in_table */
5384 if (logged_gtids_binlog.add_gtid_set(executed_gtids) !=
5385 RETURN_STATUS_OK)
5386 {
5387 if (need_sid_lock)
5388 global_sid_lock->unlock();
5389 goto err;
5390 }
5391 logged_gtids_binlog.remove_gtid_set(gtids_only_in_table);
5392 }
5393 DBUG_PRINT("info",("Generating PREVIOUS_GTIDS for %s file.",
5394 is_relay_log ? "relaylog" : "binlog"));
5395 Previous_gtids_log_event prev_gtids_ev(previous_logged_gtids);
5396 if (is_relay_log)
5397 prev_gtids_ev.set_relay_log_event();
5398 if (need_sid_lock)
5399 global_sid_lock->unlock();
5400 prev_gtids_ev.common_footer->checksum_alg=
5401 (s.common_footer)->checksum_alg;
5402 if (write_to_file(&prev_gtids_ev))
5403 goto err;
5404 bytes_written+= prev_gtids_ev.common_header->data_written;
5405 }
5406 else // !(current_thd)
5407 {
5408 /*
5409 If the slave was configured before server restart, the server will
5410 generate a new relay log file without having current_thd, but this
5411 new relay log file must have a PREVIOUS_GTIDS event as we now
5412 generate the PREVIOUS_GTIDS event always.
5413
5414 This is only needed for relay log files because the server will add
5415 the PREVIOUS_GTIDS of binary logs (when current_thd==NULL) after
5416 server's GTID initialization.
5417
5418 During server's startup at mysqld_main(), from the binary/relay log
5419 initialization point of view, it will:
5420 1) Call init_server_components() that will generate a new binary log
5421 file but won't write the PREVIOUS_GTIDS event yet;
5422 2) Initialize server's GTIDs;
5423 3) Write the binary log PREVIOUS_GTIDS;
5424 4) Call init_slave() in where the new relay log file will be created
5425 after initializing relay log's Retrieved_Gtid_Set;
5426 */
5427 if (is_relay_log)
5428 {
5429 if (need_sid_lock)
5430 global_sid_lock->wrlock();
5431 else
5432 global_sid_lock->assert_some_wrlock();
5433
5434 DBUG_PRINT("info",("Generating PREVIOUS_GTIDS for relaylog file."));
5435 Previous_gtids_log_event prev_gtids_ev(previous_gtid_set_relaylog);
5436 prev_gtids_ev.set_relay_log_event();
5437
5438 if (need_sid_lock)
5439 global_sid_lock->unlock();
5440
5441 prev_gtids_ev.common_footer->checksum_alg=
5442 (s.common_footer)->checksum_alg;
5443 if (write_to_file(&prev_gtids_ev))
5444 goto err;
5445 bytes_written+= prev_gtids_ev.common_header->data_written;
5446 }
5447 }
5448 if (extra_description_event &&
5449 extra_description_event->binlog_version>=4)
5450 {
5451 /*
5452 This is a relay log written to by the I/O slave thread.
5453 Write the event so that others can later know the format of this relay
5454 log.
5455 Note that this event is very close to the original event from the
5456 master (it has binlog version of the master, event types of the
5457 master), so this is suitable to parse the next relay log's event. It
5458 has been produced by
5459 Format_description_log_event::Format_description_log_event(char* buf,).
5460 Why don't we want to write the mi_description_event if this
5461 event is for format<4 (3.23 or 4.x): this is because in that case, the
5462 mi_description_event describes the data received from the
5463 master, but not the data written to the relay log (*conversion*),
5464 which is in format 4 (slave's).
5465 */
5466 /*
5467 Set 'created' to 0, so that in next relay logs this event does not
5468 trigger cleaning actions on the slave in
5469 Format_description_log_event::apply_event_impl().
5470 */
5471 extra_description_event->created= 0;
5472 /* Don't set log_pos in event header */
5473 extra_description_event->set_artificial_event();
5474
5475 if (write_to_file(extra_description_event))
5476 goto err;
5477 bytes_written+= extra_description_event->common_header->data_written;
5478 }
5479 if (flush_io_cache(&log_file) ||
5480 mysql_file_sync(log_file.file, MYF(MY_WME)))
5481 goto err;
5482
5483 if (write_file_name_to_index_file)
5484 {
5485 #ifdef HAVE_REPLICATION
5486 DBUG_EXECUTE_IF("crash_create_critical_before_update_index", DBUG_SUICIDE(););
5487 #endif
5488
5489 assert(my_b_inited(&index_file) != 0);
5490
5491 /*
5492 The new log file name is appended into crash safe index file after
5493 all the content of index file is copyed into the crash safe index
5494 file. Then move the crash safe index file to index file.
5495 */
5496 DBUG_EXECUTE_IF("simulate_disk_full_on_open_binlog",
5497 {DBUG_SET("+d,simulate_no_free_space_error");});
5498 if (DBUG_EVALUATE_IF("fault_injection_updating_index", 1, 0) ||
5499 add_log_to_index((uchar*) log_file_name, strlen(log_file_name),
5500 need_lock_index))
5501 {
5502 DBUG_EXECUTE_IF("simulate_disk_full_on_open_binlog",
5503 {
5504 DBUG_SET("-d,simulate_file_write_error");
5505 DBUG_SET("-d,simulate_no_free_space_error");
5506 DBUG_SET("-d,simulate_disk_full_on_open_binlog");
5507 });
5508 goto err;
5509 }
5510
5511 #ifdef HAVE_REPLICATION
5512 DBUG_EXECUTE_IF("crash_create_after_update_index", DBUG_SUICIDE(););
5513 #endif
5514 }
5515
5516 log_state.atomic_set(LOG_OPENED);
5517 /*
5518 At every rotate memorize the last transaction counter state to use it as
5519 offset at logging the transaction logical timestamps.
5520 */
5521 mysql_mutex_lock(&LOCK_slave_trans_dep_tracker);
5522 m_dependency_tracker.rotate();
5523 mysql_mutex_unlock(&LOCK_slave_trans_dep_tracker);
5524
5525 #ifdef HAVE_REPLICATION
5526 close_purge_index_file();
5527 #endif
5528
5529 update_binlog_end_pos();
5530 DBUG_RETURN(0);
5531
5532 err:
5533 #ifdef HAVE_REPLICATION
5534 if (is_inited_purge_index_file())
5535 purge_index_entry(NULL, NULL, need_lock_index);
5536 close_purge_index_file();
5537 #endif
5538 if (binlog_error_action == ABORT_SERVER)
5539 {
5540 std::string err_msg= "Either disk is full or file system is read only ";
5541 if (encrypt_binlog)
5542 err_msg+= "or encryption failed ";
5543 err_msg+= "while opening the ";
5544 err_msg+= log_to_encrypt;
5545 err_msg+= ". Aborting the server.";
5546
5547 exec_binlog_error_action_abort(err_msg.c_str());
5548 }
5549 else
5550 {
5551 sql_print_error("Could not use %s for logging (error %d). "
5552 "Turning logging off for the whole duration of the MySQL "
5553 "server process. To turn it on again: fix the cause, "
5554 "shutdown the MySQL server and restart it.",
5555 (new_name) ? new_name : name, errno);
5556 close(LOG_CLOSE_INDEX, false, need_lock_index);
5557 }
5558 DBUG_RETURN(1);
5559 }
5560
5561
5562 /**
5563 Move crash safe index file to index file.
5564
5565 @param need_lock_index If true, LOCK_index will be acquired;
5566 otherwise it should already be held.
5567
5568 @retval 0 ok
5569 @retval -1 error
5570 */
move_crash_safe_index_file_to_index_file(bool need_lock_index)5571 int MYSQL_BIN_LOG::move_crash_safe_index_file_to_index_file(bool need_lock_index)
5572 {
5573 int error= 0;
5574 File fd= -1;
5575 DBUG_ENTER("MYSQL_BIN_LOG::move_crash_safe_index_file_to_index_file");
5576 int failure_trials= MYSQL_BIN_LOG::MAX_RETRIES_FOR_DELETE_RENAME_FAILURE;
5577 bool file_rename_status= false, file_delete_status= false;
5578 THD *thd= current_thd;
5579
5580 if (need_lock_index)
5581 mysql_mutex_lock(&LOCK_index);
5582 else
5583 mysql_mutex_assert_owner(&LOCK_index);
5584
5585 if (my_b_inited(&index_file))
5586 {
5587 end_io_cache(&index_file);
5588 if (mysql_file_close(index_file.file, MYF(0)) < 0)
5589 {
5590 error= -1;
5591 sql_print_error("While rebuilding index file %s: "
5592 "Failed to close the index file.", index_file_name);
5593 /*
5594 Delete Crash safe index file here and recover the binlog.index
5595 state(index_file io_cache) from old binlog.index content.
5596 */
5597 mysql_file_delete(key_file_binlog_index, crash_safe_index_file_name,
5598 MYF(0));
5599
5600 goto recoverable_err;
5601 }
5602
5603 /*
5604 Sometimes an outsider can lock index files for temporary viewing
5605 purpose. For eg: MEB locks binlog.index/relaylog.index to view
5606 the content of the file. During that small period of time, deletion
5607 of the file is not possible on some platforms(Eg: Windows)
5608 Server should retry the delete operation for few times instead of panicking
5609 immediately.
5610 */
5611 while ((file_delete_status == false) && (failure_trials > 0))
5612 {
5613 if (DBUG_EVALUATE_IF("force_index_file_delete_failure", 1, 0)) break;
5614
5615 DBUG_EXECUTE_IF("simulate_index_file_delete_failure",
5616 {
5617 /* This simulation causes the delete to fail */
5618 static char first_char= index_file_name[0];
5619 index_file_name[0]= 0;
5620 sql_print_information("Retrying delete");
5621 if (failure_trials == 1)
5622 index_file_name[0]= first_char;
5623 };);
5624 file_delete_status = !(mysql_file_delete(key_file_binlog_index,
5625 index_file_name, MYF(MY_WME)));
5626 --failure_trials;
5627 if (!file_delete_status)
5628 {
5629 my_sleep(1000);
5630 /* Clear the error before retrying. */
5631 if (failure_trials > 0)
5632 thd->clear_error();
5633 }
5634 }
5635
5636 if (!file_delete_status)
5637 {
5638 error= -1;
5639 sql_print_error("While rebuilding index file %s: "
5640 "Failed to delete the existing index file. It could be "
5641 "that file is being used by some other process.",
5642 index_file_name);
5643 /*
5644 Delete Crash safe file index file here and recover the binlog.index
5645 state(index_file io_cache) from old binlog.index content.
5646 */
5647 mysql_file_delete(key_file_binlog_index, crash_safe_index_file_name,
5648 MYF(0));
5649
5650 goto recoverable_err;
5651 }
5652 }
5653
5654 DBUG_EXECUTE_IF("crash_create_before_rename_index_file", DBUG_SUICIDE(););
5655 /*
5656 Sometimes an outsider can lock index files for temporary viewing
5657 purpose. For eg: MEB locks binlog.index/relaylog.index to view
5658 the content of the file. During that small period of time, rename
5659 of the file is not possible on some platforms(Eg: Windows)
5660 Server should retry the rename operation for few times instead of panicking
5661 immediately.
5662 */
5663 failure_trials = MYSQL_BIN_LOG::MAX_RETRIES_FOR_DELETE_RENAME_FAILURE;
5664 while ((file_rename_status == false) && (failure_trials > 0))
5665 {
5666 DBUG_EXECUTE_IF("simulate_crash_safe_index_file_rename_failure",
5667 {
5668 /* This simulation causes the rename to fail */
5669 static char first_char= index_file_name[0];
5670 index_file_name[0]= 0;
5671 sql_print_information("Retrying rename");
5672 if (failure_trials == 1)
5673 index_file_name[0]= first_char;
5674 };);
5675 file_rename_status =
5676 !(my_rename(crash_safe_index_file_name, index_file_name, MYF(MY_WME)));
5677 --failure_trials;
5678 if (!file_rename_status)
5679 {
5680 my_sleep(1000);
5681 /* Clear the error before retrying. */
5682 if (failure_trials > 0)
5683 thd->clear_error();
5684 }
5685 }
5686 if (!file_rename_status)
5687 {
5688 error= -1;
5689 sql_print_error("While rebuilding index file %s: "
5690 "Failed to rename the new index file to the existing "
5691 "index file.", index_file_name);
5692 goto fatal_err;
5693 }
5694 DBUG_EXECUTE_IF("crash_create_after_rename_index_file", DBUG_SUICIDE(););
5695
5696 recoverable_err:
5697 if ((fd= mysql_file_open(key_file_binlog_index,
5698 index_file_name,
5699 O_RDWR | O_CREAT | O_BINARY,
5700 MYF(MY_WME))) < 0 ||
5701 mysql_file_sync(fd, MYF(MY_WME)) ||
5702 init_io_cache_ext(&index_file, fd, IO_SIZE, READ_CACHE,
5703 mysql_file_seek(fd, 0L, MY_SEEK_END, MYF(0)),
5704 0, MYF(MY_WME | MY_WAIT_IF_FULL),
5705 key_file_binlog_index_cache))
5706 {
5707 sql_print_error("After rebuilding the index file %s: "
5708 "Failed to open the index file.", index_file_name);
5709 goto fatal_err;
5710 }
5711
5712 if (need_lock_index)
5713 mysql_mutex_unlock(&LOCK_index);
5714 DBUG_RETURN(error);
5715
5716 fatal_err:
5717 /*
5718 This situation is very very rare to happen (unless there is some serious
5719 memory related issues like OOM) and should be treated as fatal error.
5720 Hence it is better to bring down the server without respecting
5721 'binlog_error_action' value here.
5722 */
5723 exec_binlog_error_action_abort("MySQL server failed to update the "
5724 "binlog.index file's content properly. "
5725 "It might not be in sync with available "
5726 "binlogs and the binlog.index file state is in "
5727 "unrecoverable state. Aborting the server.");
5728 /*
5729 Server is aborted in the above function.
5730 This is dead code to make compiler happy.
5731 */
5732 DBUG_RETURN(error);
5733 }
5734
5735
5736 /**
5737 Append log file name to index file.
5738
5739 - To make crash safe, we copy all the content of index file
5740 to crash safe index file firstly and then append the log
5741 file name to the crash safe index file. Finally move the
5742 crash safe index file to index file.
5743
5744 @retval
5745 0 ok
5746 @retval
5747 -1 error
5748 */
add_log_to_index(uchar * log_name,size_t log_name_len,bool need_lock_index)5749 int MYSQL_BIN_LOG::add_log_to_index(uchar* log_name,
5750 size_t log_name_len, bool need_lock_index)
5751 {
5752 DBUG_ENTER("MYSQL_BIN_LOG::add_log_to_index");
5753
5754 if (open_crash_safe_index_file())
5755 {
5756 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5757 "open the crash safe index file.");
5758 goto err;
5759 }
5760
5761 if (copy_file(&index_file, &crash_safe_index_file, 0))
5762 {
5763 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5764 "copy index file to crash safe index file.");
5765 goto err;
5766 }
5767
5768 if (my_b_write(&crash_safe_index_file, log_name, log_name_len) ||
5769 my_b_write(&crash_safe_index_file, (uchar*) "\n", 1) ||
5770 flush_io_cache(&crash_safe_index_file) ||
5771 mysql_file_sync(crash_safe_index_file.file, MYF(MY_WME)))
5772 {
5773 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5774 "append log file name: %s, to crash "
5775 "safe index file.", log_name);
5776 goto err;
5777 }
5778
5779 if (close_crash_safe_index_file())
5780 {
5781 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5782 "close the crash safe index file.");
5783 goto err;
5784 }
5785
5786 if (move_crash_safe_index_file_to_index_file(need_lock_index))
5787 {
5788 sql_print_error("MYSQL_BIN_LOG::add_log_to_index failed to "
5789 "move crash safe index file to index file.");
5790 goto err;
5791 }
5792
5793 DBUG_RETURN(0);
5794
5795 err:
5796 DBUG_RETURN(-1);
5797 }
5798
get_current_log(LOG_INFO * linfo,bool need_lock_log)5799 int MYSQL_BIN_LOG::get_current_log(LOG_INFO* linfo, bool need_lock_log/*true*/)
5800 {
5801 if (need_lock_log)
5802 mysql_mutex_lock(&LOCK_log);
5803 int ret = raw_get_current_log(linfo);
5804 if (need_lock_log)
5805 mysql_mutex_unlock(&LOCK_log);
5806 return ret;
5807 }
5808
raw_get_current_log(LOG_INFO * linfo)5809 int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO* linfo)
5810 {
5811 strmake(linfo->log_file_name, log_file_name, sizeof(linfo->log_file_name)-1);
5812 linfo->pos = my_b_safe_tell(&log_file);
5813 return 0;
5814 }
5815
check_write_error_code(uint error_code)5816 static bool check_write_error_code(uint error_code)
5817 {
5818 return error_code == ER_TRANS_CACHE_FULL ||
5819 error_code == ER_STMT_CACHE_FULL ||
5820 error_code == ER_ERROR_ON_WRITE ||
5821 error_code == ER_BINLOG_LOGGING_IMPOSSIBLE;
5822 }
5823
check_write_error(THD * thd)5824 bool MYSQL_BIN_LOG::check_write_error(THD *thd)
5825 {
5826 DBUG_ENTER("MYSQL_BIN_LOG::check_write_error");
5827
5828 if (!thd->is_error())
5829 DBUG_RETURN(false);
5830
5831 bool checked= check_write_error_code(thd->get_stmt_da()->mysql_errno());
5832
5833 if (!checked)
5834 {
5835 /* Check all conditions for one that matches the expected error */
5836 const Sql_condition *err;
5837 Diagnostics_area::Sql_condition_iterator it=
5838 thd->get_stmt_da()->sql_conditions();
5839 while ((err= it++) != NULL && !checked)
5840 {
5841 checked= check_write_error_code(err->mysql_errno());
5842 }
5843 }
5844 DBUG_PRINT("return", ("checked: %s", YESNO(checked)));
5845 DBUG_RETURN(checked);
5846 }
5847
set_write_error(THD * thd,bool is_transactional)5848 void MYSQL_BIN_LOG::set_write_error(THD *thd, bool is_transactional)
5849 {
5850 DBUG_ENTER("MYSQL_BIN_LOG::set_write_error");
5851
5852 write_error= 1;
5853
5854 if (check_write_error(thd))
5855 DBUG_VOID_RETURN;
5856
5857 if (my_errno() == EFBIG)
5858 {
5859 if (is_transactional)
5860 {
5861 my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(MY_WME));
5862 }
5863 else
5864 {
5865 my_message(ER_STMT_CACHE_FULL, ER(ER_STMT_CACHE_FULL), MYF(MY_WME));
5866 }
5867 }
5868 else
5869 {
5870 char errbuf[MYSYS_STRERROR_SIZE];
5871 my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), name,
5872 errno, my_strerror(errbuf, sizeof(errbuf), errno));
5873 }
5874
5875 DBUG_VOID_RETURN;
5876 }
5877
compare_log_name(const char * log_1,const char * log_2)5878 static int compare_log_name(const char* log_1, const char* log_2)
5879 {
5880 const char * log_1_basename= log_1 + dirname_length(log_1);
5881 const char * log_2_basename= log_2 + dirname_length(log_2);
5882
5883 return strcmp(log_1_basename,log_2_basename);
5884 }
5885
5886 /**
5887 Find the position in the log-index-file for the given log name.
5888
5889 @param[out] linfo The found log file name will be stored here, along
5890 with the byte offset of the next log file name in the index file.
5891 @param log_name Filename to find in the index file, or NULL if we
5892 want to read the first entry.
5893 @param need_lock_index If false, this function acquires LOCK_index;
5894 otherwise the lock should already be held by the caller.
5895
5896 @note
5897 On systems without the truncate function the file will end with one or
5898 more empty lines. These will be ignored when reading the file.
5899
5900 @retval
5901 0 ok
5902 @retval
5903 LOG_INFO_EOF End of log-index-file found
5904 @retval
5905 LOG_INFO_IO Got IO error while reading file
5906 */
5907
find_log_pos(LOG_INFO * linfo,const char * log_name,bool need_lock_index)5908 int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
5909 bool need_lock_index)
5910 {
5911 int error= 0;
5912 char *full_fname= linfo->log_file_name;
5913 char full_log_name[FN_REFLEN], fname[FN_REFLEN];
5914 DBUG_ENTER("find_log_pos");
5915 full_log_name[0]= full_fname[0]= 0;
5916
5917 /*
5918 Mutex needed because we need to make sure the file pointer does not
5919 move from under our feet
5920 */
5921 if (need_lock_index)
5922 mysql_mutex_lock(&LOCK_index);
5923 else
5924 mysql_mutex_assert_owner(&LOCK_index);
5925
5926 if (!my_b_inited(&index_file))
5927 {
5928 error= LOG_INFO_IO;
5929 goto end;
5930 }
5931
5932 // extend relative paths for log_name to be searched
5933 if (log_name)
5934 {
5935 if(normalize_binlog_name(full_log_name, log_name, is_relay_log))
5936 {
5937 error= LOG_INFO_EOF;
5938 goto end;
5939 }
5940 }
5941
5942 DBUG_PRINT("enter", ("log_name: %s, full_log_name: %s",
5943 log_name ? log_name : "NULL", full_log_name));
5944
5945 /* As the file is flushed, we can't get an error here */
5946 my_b_seek(&index_file, (my_off_t) 0);
5947
5948 for (;;)
5949 {
5950 size_t length;
5951 my_off_t offset= my_b_tell(&index_file);
5952
5953 DBUG_EXECUTE_IF("simulate_find_log_pos_error",
5954 error= LOG_INFO_EOF; break;);
5955 /* If we get 0 or 1 characters, this is the end of the file */
5956 if ((length= my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
5957 {
5958 /* Did not find the given entry; Return not found or error */
5959 error= !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
5960 break;
5961 }
5962
5963 // extend relative paths and match against full path
5964 if (normalize_binlog_name(full_fname, fname, is_relay_log))
5965 {
5966 error= LOG_INFO_EOF;
5967 break;
5968 }
5969 // if the log entry matches, null string matching anything
5970 if (!log_name ||
5971 !compare_log_name(full_fname,full_log_name))
5972 {
5973 DBUG_PRINT("info", ("Found log file entry"));
5974 linfo->index_file_start_offset= offset;
5975 linfo->index_file_offset = my_b_tell(&index_file);
5976 break;
5977 }
5978 linfo->entry_index++;
5979 }
5980
5981 end:
5982 if (need_lock_index)
5983 mysql_mutex_unlock(&LOCK_index);
5984 DBUG_RETURN(error);
5985 }
5986
5987
5988 /**
5989 Find the position in the log-index-file for the given log name.
5990
5991 @param[out] linfo The filename will be stored here, along with the
5992 byte offset of the next filename in the index file.
5993
5994 @param need_lock_index If true, LOCK_index will be acquired;
5995 otherwise it should already be held by the caller.
5996
5997 @note
5998 - Before calling this function, one has to call find_log_pos()
5999 to set up 'linfo'
6000 - Mutex needed because we need to make sure the file pointer does not move
6001 from under our feet
6002
6003 @retval 0 ok
6004 @retval LOG_INFO_EOF End of log-index-file found
6005 @retval LOG_INFO_IO Got IO error while reading file
6006 */
find_next_log(LOG_INFO * linfo,bool need_lock_index)6007 int MYSQL_BIN_LOG::find_next_log(LOG_INFO* linfo, bool need_lock_index)
6008 {
6009 int error= 0;
6010 size_t length;
6011 char fname[FN_REFLEN];
6012 char *full_fname= linfo->log_file_name;
6013
6014 if (need_lock_index)
6015 mysql_mutex_lock(&LOCK_index);
6016 else
6017 mysql_mutex_assert_owner(&LOCK_index);
6018
6019 if (!my_b_inited(&index_file))
6020 {
6021 error= LOG_INFO_IO;
6022 goto err;
6023 }
6024 /* As the file is flushed, we can't get an error here */
6025 my_b_seek(&index_file, linfo->index_file_offset);
6026
6027 linfo->index_file_start_offset= linfo->index_file_offset;
6028 if ((length=my_b_gets(&index_file, fname, FN_REFLEN)) <= 1)
6029 {
6030 error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
6031 goto err;
6032 }
6033
6034 if (fname[0] != 0)
6035 {
6036 if(normalize_binlog_name(full_fname, fname, is_relay_log))
6037 {
6038 error= LOG_INFO_EOF;
6039 goto err;
6040 }
6041 length= strlen(full_fname);
6042 }
6043
6044 linfo->index_file_offset= my_b_tell(&index_file);
6045
6046 err:
6047 if (need_lock_index)
6048 mysql_mutex_unlock(&LOCK_index);
6049 return error;
6050 }
6051
6052 /**
6053 Find the relay log name following the given name from relay log index file.
6054
6055 @param[in|out] log_name The name is full path name.
6056
6057 @return return 0 if it finds next relay log. Otherwise return the error code.
6058 */
find_next_relay_log(char log_name[FN_REFLEN+1])6059 int MYSQL_BIN_LOG::find_next_relay_log(char log_name[FN_REFLEN+1])
6060 {
6061 LOG_INFO info;
6062 int error;
6063 char relative_path_name[FN_REFLEN+1];
6064
6065 if (fn_format(relative_path_name, log_name+dirname_length(log_name),
6066 mysql_data_home, "", 0)
6067 == NullS)
6068 return 1;
6069
6070 mysql_mutex_lock(&LOCK_index);
6071
6072 error= find_log_pos(&info, relative_path_name, false);
6073 if (error == 0)
6074 {
6075 error= find_next_log(&info, false);
6076 if (error == 0)
6077 strcpy(log_name, info.log_file_name);
6078 }
6079
6080 mysql_mutex_unlock(&LOCK_index);
6081 return error;
6082 }
6083
6084 /**
6085 Removes files, as part of a RESET MASTER or RESET SLAVE statement,
6086 by deleting all logs refered to in the index file. Then, it starts
6087 writing to a new log file.
6088
6089 The new index file will only contain this file.
6090
6091 @param thd Thread
6092
6093 @note
6094 If not called from slave thread, write start event to new log
6095
6096 @retval
6097 0 ok
6098 @retval
6099 1 error
6100 */
reset_logs(THD * thd,bool delete_only)6101 bool MYSQL_BIN_LOG::reset_logs(THD* thd, bool delete_only)
6102 {
6103 LOG_INFO linfo;
6104 bool error=0;
6105 int err;
6106 const char* save_name;
6107 DBUG_ENTER("reset_logs");
6108
6109 /*
6110 Flush logs for storage engines, so that the last transaction
6111 is fsynced inside storage engines.
6112 */
6113 if (ha_flush_logs(NULL))
6114 DBUG_RETURN(1);
6115
6116 ha_reset_logs(thd);
6117
6118 /*
6119 We need to get both locks to be sure that no one is trying to
6120 write to the index log file.
6121 */
6122 mysql_mutex_lock(&LOCK_log);
6123 mysql_mutex_lock(&LOCK_index);
6124
6125 global_sid_lock->wrlock();
6126
6127 /* Save variables so that we can reopen the log */
6128 save_name=name;
6129 name=0; // Protect against free
6130 close(LOG_CLOSE_TO_BE_OPENED, false/*need_lock_log=false*/,
6131 false/*need_lock_index=false*/);
6132
6133 /*
6134 First delete all old log files and then update the index file.
6135 As we first delete the log files and do not use sort of logging,
6136 a crash may lead to an inconsistent state where the index has
6137 references to non-existent files.
6138
6139 We need to invert the steps and use the purge_index_file methods
6140 in order to make the operation safe.
6141 */
6142
6143 if ((err= find_log_pos(&linfo, NullS, false/*need_lock_index=false*/)) != 0)
6144 {
6145 uint errcode= purge_log_get_error_code(err);
6146 sql_print_error("Failed to locate old binlog or relay log files");
6147 my_message(errcode, ER(errcode), MYF(0));
6148 error= 1;
6149 goto err;
6150 }
6151
6152 for (;;)
6153 {
6154 if ((error= my_delete_allow_opened(linfo.log_file_name, MYF(0))) != 0)
6155 {
6156 if (my_errno() == ENOENT)
6157 {
6158 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
6159 ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
6160 linfo.log_file_name);
6161 sql_print_information("Failed to delete file '%s'",
6162 linfo.log_file_name);
6163 set_my_errno(0);
6164 error= 0;
6165 }
6166 else
6167 {
6168 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
6169 ER_BINLOG_PURGE_FATAL_ERR,
6170 "a problem with deleting %s; "
6171 "consider examining correspondence "
6172 "of your binlog index file "
6173 "to the actual binlog files",
6174 linfo.log_file_name);
6175 error= 1;
6176 goto err;
6177 }
6178 }
6179 if (find_next_log(&linfo, false/*need_lock_index=false*/))
6180 break;
6181 }
6182
6183 /* Start logging with a new file */
6184 close(LOG_CLOSE_INDEX | LOG_CLOSE_TO_BE_OPENED,
6185 false/*need_lock_log=false*/,
6186 false/*need_lock_index=false*/);
6187 if ((error= my_delete_allow_opened(index_file_name, MYF(0)))) // Reset (open will update)
6188 {
6189 if (my_errno() == ENOENT)
6190 {
6191 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
6192 ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
6193 index_file_name);
6194 sql_print_information("Failed to delete file '%s'",
6195 index_file_name);
6196 set_my_errno(0);
6197 error= 0;
6198 }
6199 else
6200 {
6201 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
6202 ER_BINLOG_PURGE_FATAL_ERR,
6203 "a problem with deleting %s; "
6204 "consider examining correspondence "
6205 "of your binlog index file "
6206 "to the actual binlog files",
6207 index_file_name);
6208 error= 1;
6209 goto err;
6210 }
6211 }
6212
6213 #ifdef HAVE_REPLICATION
6214 /*
6215 For relay logs we clear the gtid state associated per channel(i.e rli)
6216 in the purge_relay_logs()
6217 */
6218 if (!is_relay_log)
6219 {
6220 if(gtid_state->clear(thd))
6221 {
6222 error= 1;
6223 goto err;
6224 }
6225 // don't clear global_sid_map because it's used by the relay log too
6226 if (gtid_state->init() != 0)
6227 goto err;
6228 }
6229 #endif
6230
6231 if (!delete_only)
6232 {
6233 if (!open_index_file(index_file_name, 0, false/*need_lock_index=false*/))
6234 if ((error= open_binlog(save_name, 0,
6235 max_size, false,
6236 false/*need_lock_index=false*/,
6237 false/*need_sid_lock=false*/,
6238 NULL)))
6239 goto err;
6240 }
6241 my_free((void *) save_name);
6242
6243 err:
6244 if (error == 1)
6245 name= const_cast<char*>(save_name);
6246 global_sid_lock->unlock();
6247 #ifdef HAVE_REPLICATION
6248 count_binlog_space(false);
6249 #endif
6250 mysql_mutex_unlock(&LOCK_index);
6251 mysql_mutex_unlock(&LOCK_log);
6252 DBUG_RETURN(error);
6253 }
6254
6255
6256 /**
6257 Set the name of crash safe index file.
6258
6259 @retval
6260 0 ok
6261 @retval
6262 1 error
6263 */
set_crash_safe_index_file_name(const char * base_file_name)6264 int MYSQL_BIN_LOG::set_crash_safe_index_file_name(const char *base_file_name)
6265 {
6266 int error= 0;
6267 DBUG_ENTER("MYSQL_BIN_LOG::set_crash_safe_index_file_name");
6268 if (fn_format(crash_safe_index_file_name, base_file_name, mysql_data_home,
6269 ".index_crash_safe", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
6270 MY_REPLACE_EXT)) == NULL)
6271 {
6272 error= 1;
6273 sql_print_error("MYSQL_BIN_LOG::set_crash_safe_index_file_name failed "
6274 "to set file name.");
6275 }
6276 DBUG_RETURN(error);
6277 }
6278
6279
6280 /**
6281 Open a (new) crash safe index file.
6282
6283 @note
6284 The crash safe index file is a special file
6285 used for guaranteeing index file crash safe.
6286 @retval
6287 0 ok
6288 @retval
6289 1 error
6290 */
open_crash_safe_index_file()6291 int MYSQL_BIN_LOG::open_crash_safe_index_file()
6292 {
6293 int error= 0;
6294 File file= -1;
6295
6296 DBUG_ENTER("MYSQL_BIN_LOG::open_crash_safe_index_file");
6297
6298 if (!my_b_inited(&crash_safe_index_file))
6299 {
6300 if ((file= my_open(crash_safe_index_file_name, O_RDWR | O_CREAT | O_BINARY,
6301 MYF(MY_WME))) < 0 ||
6302 init_io_cache(&crash_safe_index_file, file, IO_SIZE, WRITE_CACHE,
6303 0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
6304 {
6305 error= 1;
6306 sql_print_error("MYSQL_BIN_LOG::open_crash_safe_index_file failed "
6307 "to open temporary index file.");
6308 }
6309 }
6310 DBUG_RETURN(error);
6311 }
6312
6313
6314 /**
6315 Close the crash safe index file.
6316
6317 @note
6318 The crash safe file is just closed, is not deleted.
6319 Because it is moved to index file later on.
6320 @retval
6321 0 ok
6322 @retval
6323 1 error
6324 */
close_crash_safe_index_file()6325 int MYSQL_BIN_LOG::close_crash_safe_index_file()
6326 {
6327 int error= 0;
6328
6329 DBUG_ENTER("MYSQL_BIN_LOG::close_crash_safe_index_file");
6330
6331 if (my_b_inited(&crash_safe_index_file))
6332 {
6333 end_io_cache(&crash_safe_index_file);
6334 error= my_close(crash_safe_index_file.file, MYF(0));
6335 }
6336 memset(&crash_safe_index_file, 0, sizeof(crash_safe_index_file));
6337
6338 DBUG_RETURN(error);
6339 }
6340
6341
6342 /**
6343 Delete relay log files prior to rli->group_relay_log_name
6344 (i.e. all logs which are not involved in a non-finished group
6345 (transaction)), remove them from the index file and start on next
6346 relay log.
6347
6348 IMPLEMENTATION
6349
6350 - You must hold rli->data_lock before calling this function, since
6351 it writes group_relay_log_pos and similar fields of
6352 Relay_log_info.
6353 - Protects index file with LOCK_index
6354 - Delete relevant relay log files
6355 - Copy all file names after these ones to the front of the index file
6356 - If the OS has truncate, truncate the file, else fill it with \n'
6357 - Read the next file name from the index file and store in rli->linfo
6358
6359 @param rli Relay log information
6360 @param included If false, all relay logs that are strictly before
6361 rli->group_relay_log_name are deleted ; if true, the
6362 latter is deleted too (i.e. all relay logs
6363 read by the SQL slave thread are deleted).
6364
6365 @note
6366 - This is only called from the slave SQL thread when it has read
6367 all commands from a relay log and want to switch to a new relay log.
6368 - When this happens, we can be in an active transaction as
6369 a transaction can span over two relay logs
6370 (although it is always written as a single block to the master's binary
6371 log, hence cannot span over two master's binary logs).
6372
6373 @retval
6374 0 ok
6375 @retval
6376 LOG_INFO_EOF End of log-index-file found
6377 @retval
6378 LOG_INFO_SEEK Could not allocate IO cache
6379 @retval
6380 LOG_INFO_IO Got IO error while reading file
6381 */
6382
6383 #ifdef HAVE_REPLICATION
6384
purge_first_log(Relay_log_info * rli,bool included)6385 int MYSQL_BIN_LOG::purge_first_log(Relay_log_info* rli, bool included)
6386 {
6387 int error;
6388 char *to_purge_if_included= NULL;
6389 DBUG_ENTER("purge_first_log");
6390
6391 assert(current_thd->system_thread == SYSTEM_THREAD_SLAVE_SQL);
6392 assert(is_relay_log);
6393 assert(is_open());
6394 assert(rli->slave_running == 1);
6395 assert(!strcmp(rli->linfo.log_file_name,rli->get_event_relay_log_name()));
6396
6397 mysql_mutex_assert_owner(&rli->data_lock);
6398
6399 mysql_mutex_lock(&LOCK_index);
6400 to_purge_if_included= my_strdup(key_memory_Relay_log_info_group_relay_log_name,
6401 rli->get_group_relay_log_name(), MYF(0));
6402
6403 /*
6404 Read the next log file name from the index file and pass it back to
6405 the caller.
6406 */
6407 if((error=find_log_pos(&rli->linfo, rli->get_event_relay_log_name(),
6408 false/*need_lock_index=false*/)) ||
6409 (error=find_next_log(&rli->linfo, false/*need_lock_index=false*/)))
6410 {
6411 char buff[22];
6412 sql_print_error("next log error: %d offset: %s log: %s included: %d",
6413 error,
6414 llstr(rli->linfo.index_file_offset,buff),
6415 rli->get_event_relay_log_name(),
6416 included);
6417 goto err;
6418 }
6419
6420 /*
6421 Reset rli's coordinates to the current log.
6422 */
6423 rli->set_event_relay_log_pos(BIN_LOG_HEADER_SIZE);
6424 rli->set_event_relay_log_name(rli->linfo.log_file_name);
6425
6426 /*
6427 If we removed the rli->group_relay_log_name file,
6428 we must update the rli->group* coordinates, otherwise do not touch it as the
6429 group's execution is not finished (e.g. COMMIT not executed)
6430 */
6431 if (included)
6432 {
6433 rli->set_group_relay_log_pos(BIN_LOG_HEADER_SIZE);
6434 rli->set_group_relay_log_name(rli->linfo.log_file_name);
6435 rli->notify_group_relay_log_name_update();
6436 }
6437 /*
6438 Store where we are in the new file for the execution thread.
6439 If we are in the middle of a transaction, then we
6440 should not store the position in the repository, instead in
6441 that case set a flag to true which indicates that a 'forced flush'
6442 is postponed due to transaction split across the relaylogs.
6443 */
6444 if (!rli->is_in_group())
6445 rli->flush_info(TRUE);
6446 else
6447 rli->force_flush_postponed_due_to_split_trans= true;
6448
6449 DBUG_EXECUTE_IF("crash_before_purge_logs", DBUG_SUICIDE(););
6450
6451 mysql_mutex_lock(&rli->log_space_lock);
6452 rli->relay_log.purge_logs(to_purge_if_included, included,
6453 false/*need_lock_index=false*/,
6454 false/*need_update_threads=false*/,
6455 &rli->log_space_total, true);
6456 // Tell the I/O thread to take the relay_log_space_limit into account
6457 rli->ignore_log_space_limit= 0;
6458 mysql_mutex_unlock(&rli->log_space_lock);
6459
6460 /*
6461 Ok to broadcast after the critical region as there is no risk of
6462 the mutex being destroyed by this thread later - this helps save
6463 context switches
6464 */
6465 mysql_cond_broadcast(&rli->log_space_cond);
6466
6467 /*
6468 * Need to update the log pos because purge logs has been called
6469 * after fetching initially the log pos at the begining of the method.
6470 */
6471 if((error=find_log_pos(&rli->linfo, rli->get_event_relay_log_name(),
6472 false/*need_lock_index=false*/)))
6473 {
6474 char buff[22];
6475 sql_print_error("next log error: %d offset: %s log: %s included: %d",
6476 error,
6477 llstr(rli->linfo.index_file_offset,buff),
6478 rli->get_group_relay_log_name(),
6479 included);
6480 goto err;
6481 }
6482
6483 /* If included was passed, rli->linfo should be the first entry. */
6484 assert(!included || rli->linfo.index_file_start_offset == 0);
6485
6486 err:
6487 my_free(to_purge_if_included);
6488 mysql_mutex_unlock(&LOCK_index);
6489 DBUG_RETURN(error);
6490 }
6491
6492
6493 /**
6494 Remove logs from index file.
6495
6496 - To make crash safe, we copy the content of index file
6497 from index_file_start_offset recored in log_info to
6498 crash safe index file firstly and then move the crash
6499 safe index file to index file.
6500
6501 @param linfo Store here the found log file name and
6502 position to the NEXT log file name in
6503 the index file.
6504
6505 @param need_update_threads If we want to update the log coordinates
6506 of all threads. False for relay logs,
6507 true otherwise.
6508
6509 @retval
6510 0 ok
6511 @retval
6512 LOG_INFO_IO Got IO error while reading/writing file
6513 */
remove_logs_from_index(LOG_INFO * log_info,bool need_update_threads)6514 int MYSQL_BIN_LOG::remove_logs_from_index(LOG_INFO* log_info, bool need_update_threads)
6515 {
6516 if (open_crash_safe_index_file())
6517 {
6518 sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
6519 "open the crash safe index file.");
6520 goto err;
6521 }
6522
6523 if (copy_file(&index_file, &crash_safe_index_file,
6524 log_info->index_file_start_offset))
6525 {
6526 sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
6527 "copy index file to crash safe index file.");
6528 goto err;
6529 }
6530
6531 if (close_crash_safe_index_file())
6532 {
6533 sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
6534 "close the crash safe index file.");
6535 goto err;
6536 }
6537 DBUG_EXECUTE_IF("fault_injection_copy_part_file", DBUG_SUICIDE(););
6538
6539 if (move_crash_safe_index_file_to_index_file(false/*need_lock_index=false*/))
6540 {
6541 sql_print_error("MYSQL_BIN_LOG::remove_logs_from_index failed to "
6542 "move crash safe index file to index file.");
6543 goto err;
6544 }
6545
6546 // now update offsets in index file for running threads
6547 if (need_update_threads)
6548 adjust_linfo_offsets(log_info->index_file_start_offset);
6549 return 0;
6550
6551 err:
6552 return LOG_INFO_IO;
6553 }
6554
6555 /**
6556 Remove all logs before the given log from disk and from the index file.
6557
6558 @param to_log Delete all log file name before this file.
6559 @param included If true, to_log is deleted too.
6560 @param need_lock_index
6561 @param need_update_threads If we want to update the log coordinates of
6562 all threads. False for relay logs, true otherwise.
6563 @param freed_log_space If not null, decrement this variable of
6564 the amount of log space freed
6565 @param auto_purge True if this is an automatic purge.
6566
6567 @note
6568 If any of the logs before the deleted one is in use,
6569 only purge logs up to this one.
6570
6571 @retval
6572 0 ok
6573 @retval
6574 LOG_INFO_EOF to_log not found
6575 LOG_INFO_EMFILE too many files opened
6576 LOG_INFO_FATAL if any other than ENOENT error from
6577 mysql_file_stat() or mysql_file_delete()
6578 */
6579
purge_logs(const char * to_log,bool included,bool need_lock_index,bool need_update_threads,ulonglong * decrease_log_space,bool auto_purge)6580 int MYSQL_BIN_LOG::purge_logs(const char *to_log,
6581 bool included,
6582 bool need_lock_index,
6583 bool need_update_threads,
6584 ulonglong *decrease_log_space,
6585 bool auto_purge)
6586 {
6587 int error= 0, no_of_log_files_to_purge= 0, no_of_log_files_purged= 0;
6588 int no_of_threads_locking_log= 0;
6589 bool exit_loop= 0;
6590 LOG_INFO log_info;
6591 THD *thd= current_thd;
6592 DBUG_ENTER("purge_logs");
6593 DBUG_PRINT("info",("to_log= %s",to_log));
6594
6595 if (need_lock_index)
6596 mysql_mutex_lock(&LOCK_index);
6597 else
6598 mysql_mutex_assert_owner(&LOCK_index);
6599 if ((error=find_log_pos(&log_info, to_log, false/*need_lock_index=false*/)))
6600 {
6601 sql_print_error("MYSQL_BIN_LOG::purge_logs was called with file %s not "
6602 "listed in the index.", to_log);
6603 goto err;
6604 }
6605
6606 no_of_log_files_to_purge= log_info.entry_index;
6607
6608 if ((error= open_purge_index_file(TRUE)))
6609 {
6610 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to sync the index file.");
6611 goto err;
6612 }
6613
6614 /*
6615 File name exists in index file; delete until we find this file
6616 or a file that is used.
6617 */
6618 if ((error=find_log_pos(&log_info, NullS, false/*need_lock_index=false*/)))
6619 goto err;
6620
6621 while ((compare_log_name(to_log,log_info.log_file_name) || (exit_loop=included)))
6622 {
6623 if(is_active(log_info.log_file_name))
6624 {
6625 if(!auto_purge)
6626 push_warning_printf(thd, Sql_condition::SL_WARNING,
6627 ER_WARN_PURGE_LOG_IS_ACTIVE,
6628 ER(ER_WARN_PURGE_LOG_IS_ACTIVE),
6629 log_info.log_file_name);
6630 break;
6631 }
6632
6633 if ((no_of_threads_locking_log= log_in_use(log_info.log_file_name)))
6634 {
6635 if(!auto_purge)
6636 push_warning_printf(thd, Sql_condition::SL_WARNING,
6637 ER_WARN_PURGE_LOG_IN_USE,
6638 ER(ER_WARN_PURGE_LOG_IN_USE),
6639 log_info.log_file_name, no_of_threads_locking_log,
6640 no_of_log_files_purged, no_of_log_files_to_purge);
6641 break;
6642 }
6643 no_of_log_files_purged++;
6644
6645 if ((error= register_purge_index_entry(log_info.log_file_name)))
6646 {
6647 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to copy %s to register file.",
6648 log_info.log_file_name);
6649 goto err;
6650 }
6651
6652 if (find_next_log(&log_info, false/*need_lock_index=false*/) || exit_loop)
6653 break;
6654 }
6655
6656 DBUG_EXECUTE_IF("crash_purge_before_update_index", DBUG_SUICIDE(););
6657
6658 if ((error= sync_purge_index_file()))
6659 {
6660 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to flush register file.");
6661 goto err;
6662 }
6663
6664 /* We know how many files to delete. Update index file. */
6665 if ((error=remove_logs_from_index(&log_info, need_update_threads)))
6666 {
6667 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to update the index file");
6668 goto err;
6669 }
6670
6671 // Update gtid_state->lost_gtids
6672 if (!is_relay_log)
6673 {
6674 global_sid_lock->wrlock();
6675 error= init_gtid_sets(NULL,
6676 const_cast<Gtid_set *>(gtid_state->get_lost_gtids()),
6677 opt_master_verify_checksum,
6678 false/*false=don't need lock*/,
6679 NULL/*trx_parser*/, NULL/*gtid_partial_trx*/);
6680 global_sid_lock->unlock();
6681 if (error)
6682 goto err;
6683 }
6684
6685 DBUG_EXECUTE_IF("crash_purge_critical_after_update_index", DBUG_SUICIDE(););
6686
6687 err:
6688
6689 int error_index= 0, close_error_index= 0;
6690 /* Read each entry from purge_index_file and delete the file. */
6691 if (!error && is_inited_purge_index_file() &&
6692 (error_index= purge_index_entry(thd, decrease_log_space, false/*need_lock_index=false*/)))
6693 sql_print_error("MYSQL_BIN_LOG::purge_logs failed to process registered files"
6694 " that would be purged.");
6695
6696 close_error_index= close_purge_index_file();
6697
6698 DBUG_EXECUTE_IF("crash_purge_non_critical_after_update_index", DBUG_SUICIDE(););
6699
6700 count_binlog_space(false);
6701 if (need_lock_index)
6702 mysql_mutex_unlock(&LOCK_index);
6703
6704 /*
6705 Error codes from purge logs take precedence.
6706 Then error codes from purging the index entry.
6707 Finally, error codes from closing the purge index file.
6708 */
6709 error= error ? error : (error_index ? error_index :
6710 close_error_index);
6711
6712 DBUG_RETURN(error);
6713 }
6714
set_purge_index_file_name(const char * base_file_name)6715 int MYSQL_BIN_LOG::set_purge_index_file_name(const char *base_file_name)
6716 {
6717 int error= 0;
6718 DBUG_ENTER("MYSQL_BIN_LOG::set_purge_index_file_name");
6719 if (fn_format(purge_index_file_name, base_file_name, mysql_data_home,
6720 ".~rec~", MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH |
6721 MY_REPLACE_EXT)) == NULL)
6722 {
6723 error= 1;
6724 sql_print_error("MYSQL_BIN_LOG::set_purge_index_file_name failed to set "
6725 "file name.");
6726 }
6727 DBUG_RETURN(error);
6728 }
6729
open_purge_index_file(bool destroy)6730 int MYSQL_BIN_LOG::open_purge_index_file(bool destroy)
6731 {
6732 int error= 0;
6733 File file= -1;
6734
6735 DBUG_ENTER("MYSQL_BIN_LOG::open_purge_index_file");
6736
6737 if (destroy)
6738 close_purge_index_file();
6739
6740 if (!my_b_inited(&purge_index_file))
6741 {
6742 if ((file= my_open(purge_index_file_name, O_RDWR | O_CREAT | O_BINARY,
6743 MYF(MY_WME))) < 0 ||
6744 init_io_cache(&purge_index_file, file, IO_SIZE,
6745 (destroy ? WRITE_CACHE : READ_CACHE),
6746 0, 0, MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)))
6747 {
6748 error= 1;
6749 sql_print_error("MYSQL_BIN_LOG::open_purge_index_file failed to open register "
6750 " file.");
6751 }
6752 }
6753 DBUG_RETURN(error);
6754 }
6755
close_purge_index_file()6756 int MYSQL_BIN_LOG::close_purge_index_file()
6757 {
6758 int error= 0;
6759
6760 DBUG_ENTER("MYSQL_BIN_LOG::close_purge_index_file");
6761
6762 if (my_b_inited(&purge_index_file))
6763 {
6764 end_io_cache(&purge_index_file);
6765 error= my_close(purge_index_file.file, MYF(0));
6766 }
6767 my_delete(purge_index_file_name, MYF(0));
6768 memset(&purge_index_file, 0, sizeof(purge_index_file));
6769
6770 DBUG_RETURN(error);
6771 }
6772
is_inited_purge_index_file()6773 bool MYSQL_BIN_LOG::is_inited_purge_index_file()
6774 {
6775 DBUG_ENTER("MYSQL_BIN_LOG::is_inited_purge_index_file");
6776 DBUG_RETURN (my_b_inited(&purge_index_file));
6777 }
6778
sync_purge_index_file()6779 int MYSQL_BIN_LOG::sync_purge_index_file()
6780 {
6781 int error= 0;
6782 DBUG_ENTER("MYSQL_BIN_LOG::sync_purge_index_file");
6783
6784 if ((error= flush_io_cache(&purge_index_file)) ||
6785 (error= my_sync(purge_index_file.file, MYF(MY_WME))))
6786 DBUG_RETURN(error);
6787
6788 DBUG_RETURN(error);
6789 }
6790
register_purge_index_entry(const char * entry)6791 int MYSQL_BIN_LOG::register_purge_index_entry(const char *entry)
6792 {
6793 int error= 0;
6794 DBUG_ENTER("MYSQL_BIN_LOG::register_purge_index_entry");
6795
6796 if ((error=my_b_write(&purge_index_file, (const uchar*)entry, strlen(entry))) ||
6797 (error=my_b_write(&purge_index_file, (const uchar*)"\n", 1)))
6798 DBUG_RETURN (error);
6799
6800 DBUG_RETURN(error);
6801 }
6802
register_create_index_entry(const char * entry)6803 int MYSQL_BIN_LOG::register_create_index_entry(const char *entry)
6804 {
6805 DBUG_ENTER("MYSQL_BIN_LOG::register_create_index_entry");
6806 DBUG_RETURN(register_purge_index_entry(entry));
6807 }
6808
purge_index_entry(THD * thd,ulonglong * decrease_log_space,bool need_lock_index)6809 int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *decrease_log_space,
6810 bool need_lock_index)
6811 {
6812 MY_STAT s;
6813 int error= 0;
6814 LOG_INFO log_info;
6815 LOG_INFO check_log_info;
6816
6817 DBUG_ENTER("MYSQL_BIN_LOG:purge_index_entry");
6818
6819 assert(my_b_inited(&purge_index_file));
6820
6821 if ((error=reinit_io_cache(&purge_index_file, READ_CACHE, 0, 0, 0)))
6822 {
6823 sql_print_error("MYSQL_BIN_LOG::purge_index_entry failed to reinit register file "
6824 "for read");
6825 goto err;
6826 }
6827
6828 for (;;)
6829 {
6830 size_t length;
6831
6832 if ((length=my_b_gets(&purge_index_file, log_info.log_file_name,
6833 FN_REFLEN)) <= 1)
6834 {
6835 if (purge_index_file.error)
6836 {
6837 error= purge_index_file.error;
6838 sql_print_error("MYSQL_BIN_LOG::purge_index_entry error %d reading from "
6839 "register file.", error);
6840 goto err;
6841 }
6842
6843 /* Reached EOF */
6844 break;
6845 }
6846
6847 /* Get rid of the trailing '\n' */
6848 log_info.log_file_name[length-1]= 0;
6849
6850 if (!mysql_file_stat(m_key_file_log, log_info.log_file_name, &s, MYF(0)))
6851 {
6852 if (my_errno() == ENOENT)
6853 {
6854 /*
6855 It's not fatal if we can't stat a log file that does not exist;
6856 If we could not stat, we won't delete.
6857 */
6858 if (thd)
6859 {
6860 push_warning_printf(thd, Sql_condition::SL_WARNING,
6861 ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
6862 log_info.log_file_name);
6863 }
6864 sql_print_information("Failed to execute mysql_file_stat on file '%s'",
6865 log_info.log_file_name);
6866 set_my_errno(0);
6867 }
6868 else
6869 {
6870 /*
6871 Other than ENOENT are fatal
6872 */
6873 if (thd)
6874 {
6875 push_warning_printf(thd, Sql_condition::SL_WARNING,
6876 ER_BINLOG_PURGE_FATAL_ERR,
6877 "a problem with getting info on being purged %s; "
6878 "consider examining correspondence "
6879 "of your binlog index file "
6880 "to the actual binlog files",
6881 log_info.log_file_name);
6882 }
6883 else
6884 {
6885 sql_print_information("Failed to delete log file '%s'; "
6886 "consider examining correspondence "
6887 "of your binlog index file "
6888 "to the actual binlog files",
6889 log_info.log_file_name);
6890 }
6891 error= LOG_INFO_FATAL;
6892 goto err;
6893 }
6894 }
6895 else
6896 {
6897 if ((error= find_log_pos(&check_log_info, log_info.log_file_name,
6898 need_lock_index)))
6899 {
6900 if (error != LOG_INFO_EOF)
6901 {
6902 if (thd)
6903 {
6904 push_warning_printf(thd, Sql_condition::SL_WARNING,
6905 ER_BINLOG_PURGE_FATAL_ERR,
6906 "a problem with deleting %s and "
6907 "reading the binlog index file",
6908 log_info.log_file_name);
6909 }
6910 else
6911 {
6912 sql_print_information("Failed to delete file '%s' and "
6913 "read the binlog index file",
6914 log_info.log_file_name);
6915 }
6916 goto err;
6917 }
6918
6919 error= 0;
6920 if (!need_lock_index)
6921 {
6922 /*
6923 This is to avoid triggering an error in NDB.
6924
6925 @todo: This is weird, what does NDB errors have to do with
6926 need_lock_index? Explain better or refactor /Sven
6927 */
6928 ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
6929 }
6930
6931 DBUG_PRINT("info",("purging %s",log_info.log_file_name));
6932 if (!mysql_file_delete(key_file_binlog, log_info.log_file_name, MYF(0)))
6933 {
6934 DBUG_EXECUTE_IF("wait_in_purge_index_entry",
6935 {
6936 const char action[] = "now SIGNAL in_purge_index_entry WAIT_FOR go_ahead_sql";
6937 assert(!debug_sync_set_action(thd, STRING_WITH_LEN(action)));
6938 DBUG_SET("-d,wait_in_purge_index_entry");
6939 };);
6940
6941 if (decrease_log_space)
6942 *decrease_log_space-= s.st_size;
6943 }
6944 else
6945 {
6946 if (my_errno() == ENOENT)
6947 {
6948 if (thd)
6949 {
6950 push_warning_printf(thd, Sql_condition::SL_WARNING,
6951 ER_LOG_PURGE_NO_FILE, ER(ER_LOG_PURGE_NO_FILE),
6952 log_info.log_file_name);
6953 }
6954 sql_print_information("Failed to delete file '%s'",
6955 log_info.log_file_name);
6956 set_my_errno(0);
6957 }
6958 else
6959 {
6960 if (thd)
6961 {
6962 push_warning_printf(thd, Sql_condition::SL_WARNING,
6963 ER_BINLOG_PURGE_FATAL_ERR,
6964 "a problem with deleting %s; "
6965 "consider examining correspondence "
6966 "of your binlog index file "
6967 "to the actual binlog files",
6968 log_info.log_file_name);
6969 }
6970 else
6971 {
6972 sql_print_information("Failed to delete file '%s'; "
6973 "consider examining correspondence "
6974 "of your binlog index file "
6975 "to the actual binlog files",
6976 log_info.log_file_name);
6977 }
6978 if (my_errno() == EMFILE)
6979 {
6980 DBUG_PRINT("info",
6981 ("my_errno: %d, set ret = LOG_INFO_EMFILE", my_errno()));
6982 error= LOG_INFO_EMFILE;
6983 goto err;
6984 }
6985 error= LOG_INFO_FATAL;
6986 goto err;
6987 }
6988 }
6989 }
6990 }
6991 }
6992
6993 err:
6994 DBUG_RETURN(error);
6995 }
6996
6997 /**
6998 Count a total size of binary logs (except the active one) to the variable
6999 binlog_space_total.
7000
7001 @param need_lock_index If true, this function acquires LOCK_index;
7002 otherwise the caller should already have acquired it.
7003
7004 @retval
7005 0 ok
7006 @retval
7007 LOG_INFO_FATAL if any other than ENOENT error from
7008 mysql_file_stat() or mysql_file_delete()
7009 LOG_INFO_EOF End of log-index-file found
7010 LOG_INFO_IO Got IO error while reading log-index-file
7011 */
7012
count_binlog_space(bool need_lock_index)7013 int MYSQL_BIN_LOG::count_binlog_space(bool need_lock_index) {
7014 DBUG_ENTER("count_binlog_space");
7015 if (is_relay_log)
7016 DBUG_RETURN(0);
7017
7018 if (need_lock_index)
7019 mysql_mutex_lock(&LOCK_index);
7020 else
7021 mysql_mutex_assert_owner(&LOCK_index);
7022
7023 int error;
7024 LOG_INFO log_info;
7025 binlog_space_total = 0;
7026 if ((error = find_log_pos(&log_info, NullS, false /*need_lock_index=false*/)))
7027 goto done;
7028
7029 MY_STAT stat_area;
7030 while (!(is_active(log_info.log_file_name))) {
7031 if (!mysql_file_stat(m_key_file_log, log_info.log_file_name, &stat_area,
7032 MYF(0))) {
7033 if (my_errno() == ENOENT) {
7034 /*
7035 It's not fatal if we can't stat a log file that does not exist.
7036 */
7037 set_my_errno(0);
7038 } else {
7039 error = LOG_INFO_FATAL;
7040 goto done;
7041 }
7042 } else {
7043 binlog_space_total += stat_area.st_size;
7044 }
7045 if (find_next_log(&log_info, false /*need_lock_index=false*/)) break;
7046 }
7047
7048 error = 0;
7049
7050 done:
7051 if (need_lock_index) mysql_mutex_unlock(&LOCK_index);
7052 DBUG_RETURN(error);
7053 }
7054
7055 /**
7056 Purge old logs so that we have a total size lower than binlog_space_limit.
7057
7058 @param need_lock_index If true, this function acquires LOCK_index;
7059 otherwise the caller should already have acquired it.
7060
7061 @note
7062 If any of the logs before the deleted one is in use,
7063 only purge logs up to this one.
7064
7065 @retval
7066 0 ok
7067 @retval
7068 LOG_INFO_FATAL if any other than ENOENT error from
7069 mysql_file_stat() or mysql_file_delete()
7070 LOG_INFO_EOF End of log-index-file found
7071 LOG_INFO_IO Got IO error while reading log-index-file
7072 */
7073
purge_logs_by_size(bool need_lock_index)7074 int MYSQL_BIN_LOG::purge_logs_by_size(bool need_lock_index) {
7075 DBUG_ENTER("purge_logs_by_size");
7076
7077 if (is_relay_log || !binlog_space_limit)
7078 DBUG_RETURN(0);
7079
7080 if (need_lock_index)
7081 mysql_mutex_lock(&LOCK_index);
7082 else
7083 mysql_mutex_assert_owner(&LOCK_index);
7084
7085 int error = 0;
7086 LOG_INFO log_info;
7087 my_off_t binlog_pos= my_b_tell(&log_file);
7088 count_binlog_space(false);
7089
7090 if (!binlog_space_total ||
7091 binlog_space_total + binlog_pos <= binlog_space_limit)
7092 goto done;
7093
7094 if ((error = find_log_pos(&log_info, NullS, false /*need_lock_index=false*/)))
7095 goto done;
7096
7097 MY_STAT stat_area;
7098 char to_log[FN_REFLEN];
7099 to_log[0] = 0;
7100 while (!is_active(log_info.log_file_name)) {
7101 if (!mysql_file_stat(m_key_file_log, log_info.log_file_name, &stat_area,
7102 MYF(0))) {
7103 if (my_errno() == ENOENT) {
7104 /*
7105 It's not fatal if we can't stat a log file that does not exist.
7106 */
7107 set_my_errno(0);
7108 } else {
7109 /*
7110 Other than ENOENT are fatal
7111 */
7112 THD *thd = current_thd;
7113 if (thd) {
7114 push_warning_printf(thd, Sql_condition::SL_WARNING,
7115 ER_BINLOG_PURGE_FATAL_ERR,
7116 "a problem with getting info on being purged %s; "
7117 "consider examining correspondence "
7118 "of your binlog index file "
7119 "to the actual binlog files",
7120 log_info.log_file_name);
7121 } else {
7122 sql_print_information("Failed to stat log file '%s'",
7123 log_info.log_file_name);
7124 }
7125 error = LOG_INFO_FATAL;
7126 goto done;
7127 }
7128 }
7129 /* check if a total size of binary logs is bigger than binlog_space_limit
7130 if yes check if it is in use, if not in use then add
7131 it in the list of binary log files to be purged.
7132 */
7133 else if (binlog_space_total + binlog_pos > binlog_space_limit) {
7134 if ((log_in_use(log_info.log_file_name)))
7135 break;
7136 DBUG_PRINT("info", ("purge_logs_by_size binlog_space_total=%llu "
7137 "binlog_pos=%llu sum=%llu\n", binlog_space_total,
7138 binlog_pos, binlog_space_total+binlog_pos));
7139 if (binlog_space_total >= (ulonglong)stat_area.st_size)
7140 binlog_space_total -= stat_area.st_size;
7141 else
7142 break;
7143 strmake(to_log, log_info.log_file_name,
7144 sizeof(log_info.log_file_name) - 1);
7145 } else
7146 break;
7147 if (find_next_log(&log_info, false /*need_lock_index=false*/)) break;
7148 }
7149
7150 error = (to_log[0] ? purge_logs(to_log, true, false /*need_lock_index=false*/,
7151 true /*need_update_threads=true*/,
7152 NULL, true)
7153 : 0);
7154
7155 done:
7156 if (need_lock_index) mysql_mutex_unlock(&LOCK_index);
7157 DBUG_RETURN(error);
7158 }
7159
7160 /**
7161 Purge old logs so that we have a maximum of max_nr_files logs.
7162
7163 @param max_nr_files Maximum number of logfiles to have
7164
7165 @note
7166 If any of the logs before the deleted one is in use,
7167 only purge logs up to this one.
7168
7169 @retval
7170 0 ok
7171 @retval
7172 LOG_INFO_PURGE_NO_ROTATE Binary file that can't be rotated
7173 LOG_INFO_FATAL if any other than ENOENT error from
7174 mysql_file_stat() or mysql_file_delete()
7175 */
7176
purge_logs_maximum_number(ulong max_nr_files)7177 int MYSQL_BIN_LOG::purge_logs_maximum_number(ulong max_nr_files)
7178 {
7179 int error;
7180 char to_log[FN_REFLEN];
7181 LOG_INFO log_info;
7182 ulong current_number_of_logs= 1;
7183
7184 DBUG_ENTER("purge_logs_maximum_number");
7185
7186 mysql_mutex_lock(&LOCK_index);
7187 to_log[0]= 0;
7188
7189 if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
7190 goto err;
7191
7192 while (!find_next_log(&log_info, 0))
7193 current_number_of_logs++;
7194
7195 if (current_number_of_logs <= max_nr_files)
7196 {
7197 error= 0;
7198 goto err; /* No logs to expire */
7199 }
7200
7201 if ((error=find_log_pos(&log_info, NullS, 0 /*no mutex*/)))
7202 goto err;
7203
7204 while (strcmp(log_file_name, log_info.log_file_name) &&
7205 !is_active(log_info.log_file_name) &&
7206 !log_in_use(log_info.log_file_name) &&
7207 current_number_of_logs > max_nr_files)
7208 {
7209 current_number_of_logs--;
7210 strmake(to_log,
7211 log_info.log_file_name,
7212 sizeof(log_info.log_file_name) - 1);
7213
7214 if (find_next_log(&log_info, 0))
7215 {
7216 break;
7217 }
7218 }
7219
7220 error= (to_log[0] ? purge_logs(to_log, true, false, true,
7221 (ulonglong *) 0, true) : 0);
7222
7223 err:
7224 mysql_mutex_unlock(&LOCK_index);
7225 DBUG_RETURN(error);
7226 }
7227
7228 /**
7229 Remove all logs before the given file date from disk and from the
7230 index file.
7231
7232 @param thd Thread pointer
7233 @param purge_time Delete all log files before given date.
7234 @param auto_purge True if this is an automatic purge.
7235
7236 @note
7237 If any of the logs before the deleted one is in use,
7238 only purge logs up to this one.
7239
7240 @retval
7241 0 ok
7242 @retval
7243 LOG_INFO_PURGE_NO_ROTATE Binary file that can't be rotated
7244 LOG_INFO_FATAL if any other than ENOENT error from
7245 mysql_file_stat() or mysql_file_delete()
7246 */
7247
purge_logs_before_date(time_t purge_time,bool auto_purge)7248 int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time, bool auto_purge)
7249 {
7250 int error;
7251 int no_of_threads_locking_log= 0, no_of_log_files_purged= 0;
7252 bool log_is_active= false, log_is_in_use= false;
7253 char to_log[FN_REFLEN], copy_log_in_use[FN_REFLEN];
7254 LOG_INFO log_info;
7255 MY_STAT stat_area;
7256 THD *thd= current_thd;
7257
7258 DBUG_ENTER("purge_logs_before_date");
7259
7260 mysql_mutex_lock(&LOCK_index);
7261 to_log[0]= 0;
7262
7263 if ((error=find_log_pos(&log_info, NullS, false/*need_lock_index=false*/)))
7264 goto err;
7265
7266 while (!(log_is_active= is_active(log_info.log_file_name)))
7267 {
7268 if (!mysql_file_stat(m_key_file_log,
7269 log_info.log_file_name, &stat_area, MYF(0)))
7270 {
7271 if (my_errno() == ENOENT)
7272 {
7273 /*
7274 It's not fatal if we can't stat a log file that does not exist.
7275 */
7276 set_my_errno(0);
7277 }
7278 else
7279 {
7280 /*
7281 Other than ENOENT are fatal
7282 */
7283 if (thd)
7284 {
7285 push_warning_printf(thd, Sql_condition::SL_WARNING,
7286 ER_BINLOG_PURGE_FATAL_ERR,
7287 "a problem with getting info on being purged %s; "
7288 "consider examining correspondence "
7289 "of your binlog index file "
7290 "to the actual binlog files",
7291 log_info.log_file_name);
7292 }
7293 else
7294 {
7295 sql_print_information("Failed to delete log file '%s'",
7296 log_info.log_file_name);
7297 }
7298 error= LOG_INFO_FATAL;
7299 goto err;
7300 }
7301 }
7302 /* check if the binary log file is older than the purge_time
7303 if yes check if it is in use, if not in use then add
7304 it in the list of binary log files to be purged.
7305 */
7306 else if (stat_area.st_mtime < purge_time)
7307 {
7308 if ((no_of_threads_locking_log= log_in_use(log_info.log_file_name)))
7309 {
7310 if (!auto_purge)
7311 {
7312 log_is_in_use= true;
7313 strcpy(copy_log_in_use, log_info.log_file_name);
7314 }
7315 break;
7316 }
7317 strmake(to_log,
7318 log_info.log_file_name,
7319 sizeof(log_info.log_file_name) - 1);
7320 no_of_log_files_purged++;
7321 }
7322 else
7323 break;
7324 if (find_next_log(&log_info, false/*need_lock_index=false*/))
7325 break;
7326 }
7327
7328 if (log_is_active)
7329 {
7330 if(!auto_purge)
7331 push_warning_printf(thd, Sql_condition::SL_WARNING,
7332 ER_WARN_PURGE_LOG_IS_ACTIVE,
7333 ER(ER_WARN_PURGE_LOG_IS_ACTIVE),
7334 log_info.log_file_name);
7335
7336 }
7337
7338 if (log_is_in_use)
7339 {
7340 int no_of_log_files_to_purge= no_of_log_files_purged+1;
7341 while (strcmp(log_file_name, log_info.log_file_name))
7342 {
7343 if (mysql_file_stat(m_key_file_log, log_info.log_file_name,
7344 &stat_area, MYF(0)))
7345 {
7346 if (stat_area.st_mtime < purge_time)
7347 no_of_log_files_to_purge++;
7348 else
7349 break;
7350 }
7351 if (find_next_log(&log_info, false/*need_lock_index=false*/))
7352 {
7353 no_of_log_files_to_purge++;
7354 break;
7355 }
7356 }
7357
7358 push_warning_printf(thd, Sql_condition::SL_WARNING,
7359 ER_WARN_PURGE_LOG_IN_USE,
7360 ER(ER_WARN_PURGE_LOG_IN_USE),
7361 copy_log_in_use, no_of_threads_locking_log,
7362 no_of_log_files_purged, no_of_log_files_to_purge);
7363 }
7364
7365 error= (to_log[0] ? purge_logs(to_log, true,
7366 false/*need_lock_index=false*/,
7367 true/*need_update_threads=true*/,
7368 (ulonglong *) 0, auto_purge) : 0);
7369
7370 err:
7371 mysql_mutex_unlock(&LOCK_index);
7372 DBUG_RETURN(error);
7373 }
7374 #endif /* HAVE_REPLICATION */
7375
7376
7377 /**
7378 Create a new log file name.
7379
7380 @param buf buf of at least FN_REFLEN where new name is stored
7381
7382 @note
7383 If file name will be longer then FN_REFLEN it will be truncated
7384 */
7385
make_log_name(char * buf,const char * log_ident)7386 void MYSQL_BIN_LOG::make_log_name(char* buf, const char* log_ident)
7387 {
7388 size_t dir_len = dirname_length(log_file_name);
7389 if (dir_len >= FN_REFLEN)
7390 dir_len=FN_REFLEN-1;
7391 my_stpnmov(buf, log_file_name, dir_len);
7392 strmake(buf+dir_len, log_ident, FN_REFLEN - dir_len -1);
7393 }
7394
7395
7396 /**
7397 Check if we are writing/reading to the given log file.
7398 */
7399
is_active(const char * log_file_name_arg)7400 bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg)
7401 {
7402 return !compare_log_name(log_file_name, log_file_name_arg);
7403 }
7404
7405
inc_prep_xids(THD * thd)7406 void MYSQL_BIN_LOG::inc_prep_xids(THD *thd)
7407 {
7408 DBUG_ENTER("MYSQL_BIN_LOG::inc_prep_xids");
7409 #ifndef NDEBUG
7410 int result= m_prep_xids.atomic_add(1);
7411 DBUG_PRINT("debug", ("m_prep_xids: %d", result + 1));
7412 #else
7413 (void) m_prep_xids.atomic_add(1);
7414 #endif
7415 thd->get_transaction()->m_flags.xid_written= true;
7416 DBUG_VOID_RETURN;
7417 }
7418
7419
dec_prep_xids(THD * thd)7420 void MYSQL_BIN_LOG::dec_prep_xids(THD *thd)
7421 {
7422 DBUG_ENTER("MYSQL_BIN_LOG::dec_prep_xids");
7423 int32 result= m_prep_xids.atomic_add(-1);
7424 DBUG_PRINT("debug", ("m_prep_xids: %d", result - 1));
7425 thd->get_transaction()->m_flags.xid_written= false;
7426 /* If the old value was 1, it is zero now. */
7427 if (result == 1)
7428 {
7429 mysql_mutex_lock(&LOCK_xids);
7430 mysql_cond_signal(&m_prep_xids_cond);
7431 mysql_mutex_unlock(&LOCK_xids);
7432 }
7433 DBUG_VOID_RETURN;
7434 }
7435
write_to_file(Log_event * event)7436 int MYSQL_BIN_LOG::write_to_file(Log_event* event)
7437 {
7438 if (crypto.is_enabled())
7439 event->event_encrypter.enable_encryption(&crypto);
7440 return event->write(&log_file);
7441 }
7442
7443 /*
7444 Wrappers around new_file_impl to avoid using argument
7445 to control locking. The argument 1) less readable 2) breaks
7446 incapsulation 3) allows external access to the class without
7447 a lock (which is not possible with private new_file_without_locking
7448 method).
7449
7450 @retval
7451 nonzero - error
7452
7453 */
7454
new_file(Format_description_log_event * extra_description_event)7455 int MYSQL_BIN_LOG::new_file(Format_description_log_event *extra_description_event)
7456 {
7457 return new_file_impl(true/*need_lock_log=true*/, extra_description_event);
7458 }
7459
7460 /*
7461 @retval
7462 nonzero - error
7463 */
new_file_without_locking(Format_description_log_event * extra_description_event)7464 int MYSQL_BIN_LOG::new_file_without_locking(Format_description_log_event *extra_description_event)
7465 {
7466 return new_file_impl(false/*need_lock_log=false*/, extra_description_event);
7467 }
7468
7469
7470 /**
7471 Start writing to a new log file or reopen the old file.
7472
7473 @param need_lock_log If true, this function acquires LOCK_log;
7474 otherwise the caller should already have acquired it.
7475
7476 @retval 0 success
7477 @retval nonzero - error
7478
7479 @note The new file name is stored last in the index file
7480 */
new_file_impl(bool need_lock_log,Format_description_log_event * extra_description_event)7481 int MYSQL_BIN_LOG::new_file_impl(bool need_lock_log, Format_description_log_event *extra_description_event)
7482 {
7483 int error= 0;
7484 bool close_on_error= false;
7485 char new_name[FN_REFLEN], *new_name_ptr= NULL, *old_name, *file_to_open;
7486
7487 DBUG_ENTER("MYSQL_BIN_LOG::new_file_impl");
7488 if (!is_open())
7489 {
7490 DBUG_PRINT("info",("log is closed"));
7491 DBUG_RETURN(error);
7492 }
7493
7494 if (need_lock_log)
7495 mysql_mutex_lock(&LOCK_log);
7496 else
7497 mysql_mutex_assert_owner(&LOCK_log);
7498 DBUG_EXECUTE_IF("semi_sync_3-way_deadlock",
7499 DEBUG_SYNC(current_thd, "before_rotate_binlog"););
7500 mysql_mutex_lock(&LOCK_xids);
7501 /*
7502 We need to ensure that the number of prepared XIDs are 0.
7503
7504 If m_prep_xids is not zero:
7505 - We wait for storage engine commit, hence decrease m_prep_xids
7506 - We keep the LOCK_log to block new transactions from being
7507 written to the binary log.
7508 */
7509 while (get_prep_xids() > 0)
7510 {
7511 DEBUG_SYNC(current_thd, "before_rotate_binlog_file");
7512 mysql_cond_wait(&m_prep_xids_cond, &LOCK_xids);
7513 }
7514 mysql_mutex_unlock(&LOCK_xids);
7515
7516 mysql_mutex_lock(&LOCK_index);
7517
7518 mysql_mutex_assert_owner(&LOCK_log);
7519 mysql_mutex_assert_owner(&LOCK_index);
7520
7521
7522 if (DBUG_EVALUATE_IF("expire_logs_always", 0, 1)
7523 && (error= ha_flush_logs(NULL)))
7524 goto end;
7525
7526 if (!is_relay_log)
7527 {
7528 /* Save set of GTIDs of the last binlog into table on binlog rotation */
7529 if ((error= gtid_state->save_gtids_of_last_binlog_into_table(true)))
7530 {
7531 close_on_error= true;
7532 goto end;
7533 }
7534 }
7535
7536 /*
7537 If user hasn't specified an extension, generate a new log name
7538 We have to do this here and not in open as we want to store the
7539 new file name in the current binary log file.
7540 */
7541 new_name_ptr= new_name;
7542 if ((error= generate_new_name(new_name, name)))
7543 {
7544 // Use the old name if generation of new name fails.
7545 strcpy(new_name, name);
7546 close_on_error= TRUE;
7547 goto end;
7548 }
7549 /*
7550 Make sure that the log_file is initialized before writing
7551 Rotate_log_event into it.
7552 */
7553 if (log_file.alloced_buffer)
7554 {
7555 /*
7556 We log the whole file name for log file as the user may decide
7557 to change base names at some point.
7558 */
7559 Rotate_log_event r(new_name+dirname_length(new_name), 0, LOG_EVENT_OFFSET,
7560 is_relay_log ? Rotate_log_event::RELAY_LOG : 0);
7561 /*
7562 The current relay-log's closing Rotate event must have checksum
7563 value computed with an algorithm of the last relay-logged FD event.
7564 */
7565 if (is_relay_log)
7566 (r.common_footer)->checksum_alg= relay_log_checksum_alg;
7567 assert(!is_relay_log || relay_log_checksum_alg !=
7568 binary_log::BINLOG_CHECKSUM_ALG_UNDEF);
7569 if(DBUG_EVALUATE_IF("fault_injection_new_file_rotate_event",
7570 (error=1), FALSE) ||
7571 (error= write_to_file(&r)))
7572 {
7573 char errbuf[MYSYS_STRERROR_SIZE];
7574 DBUG_EXECUTE_IF("fault_injection_new_file_rotate_event", errno=2;);
7575 close_on_error= true;
7576 my_printf_error(ER_ERROR_ON_WRITE, ER(ER_CANT_OPEN_FILE),
7577 MYF(ME_FATALERROR), name,
7578 errno, my_strerror(errbuf, sizeof(errbuf), errno));
7579 goto end;
7580 }
7581 bytes_written += r.common_header->data_written;
7582 }
7583
7584 if ((error= flush_io_cache(&log_file)))
7585 {
7586 close_on_error= true;
7587 goto end;
7588 }
7589
7590 DEBUG_SYNC(current_thd, "after_rotate_event_appended");
7591
7592 old_name=name;
7593 name=0; // Don't free name
7594 close(LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX,
7595 false/*need_lock_log=false*/,
7596 false/*need_lock_index=false*/);
7597
7598 if (checksum_alg_reset != binary_log::BINLOG_CHECKSUM_ALG_UNDEF)
7599 {
7600 assert(!is_relay_log);
7601 assert(binlog_checksum_options != checksum_alg_reset);
7602 binlog_checksum_options= checksum_alg_reset;
7603 }
7604 /*
7605 Note that at this point, log_state != LOG_CLOSED (important for is_open()).
7606 */
7607
7608 DEBUG_SYNC(current_thd, "before_rotate_binlog_file");
7609 /*
7610 new_file() is only used for rotation (in FLUSH LOGS or because size >
7611 max_binlog_size or max_relay_log_size).
7612 If this is a binary log, the Format_description_log_event at the beginning of
7613 the new file should have created=0 (to distinguish with the
7614 Format_description_log_event written at server startup, which should
7615 trigger temp tables deletion on slaves.
7616 */
7617
7618 /* reopen index binlog file, BUG#34582 */
7619 file_to_open= index_file_name;
7620 error= open_index_file(index_file_name, 0, false/*need_lock_index=false*/);
7621 if (!error)
7622 {
7623 /* reopen the binary log file. */
7624 file_to_open= new_name_ptr;
7625 error= open_binlog(old_name, new_name_ptr,
7626 max_size, true/*null_created_arg=true*/,
7627 false/*need_lock_index=false*/,
7628 true/*need_sid_lock=true*/,
7629 extra_description_event);
7630 }
7631
7632 /* handle reopening errors */
7633 if (error)
7634 {
7635 char errbuf[MYSYS_STRERROR_SIZE];
7636 my_printf_error(ER_CANT_OPEN_FILE, ER(ER_CANT_OPEN_FILE),
7637 MYF(ME_FATALERROR), file_to_open,
7638 error, my_strerror(errbuf, sizeof(errbuf), error));
7639 close_on_error= true;
7640 }
7641 my_free(old_name);
7642
7643 end:
7644
7645 if (error && close_on_error /* rotate, flush or reopen failed */)
7646 {
7647 /*
7648 Close whatever was left opened.
7649
7650 We are keeping the behavior as it exists today, ie,
7651 we disable logging and move on (see: BUG#51014).
7652
7653 TODO: as part of WL#1790 consider other approaches:
7654 - kill mysql (safety);
7655 - try multiple locations for opening a log file;
7656 - switch server to protected/readonly mode
7657 - ...
7658 */
7659 if (binlog_error_action == ABORT_SERVER)
7660 {
7661 exec_binlog_error_action_abort("Either disk is full or file system is"
7662 " read only while rotating the binlog."
7663 " Aborting the server.");
7664 }
7665 else
7666 sql_print_error("Could not open %s for logging (error %d). "
7667 "Turning logging off for the whole duration "
7668 "of the MySQL server process. To turn it on "
7669 "again: fix the cause, shutdown the MySQL "
7670 "server and restart it.",
7671 new_name_ptr, errno);
7672 close(LOG_CLOSE_INDEX, false /*need_lock_log=false*/,
7673 false/*need_lock_index=false*/);
7674 }
7675
7676 mysql_mutex_unlock(&LOCK_index);
7677 if (need_lock_log)
7678 mysql_mutex_unlock(&LOCK_log);
7679 DEBUG_SYNC(current_thd, "after_disable_binlog");
7680 DBUG_RETURN(error);
7681 }
7682
7683
7684 #ifdef HAVE_REPLICATION
7685 /**
7686 Called after an event has been written to the relay log by the IO
7687 thread. This flushes and possibly syncs the file (according to the
7688 sync options), rotates the file if it has grown over the limit, and
7689 finally calls signal_update().
7690
7691 @note The caller must hold LOCK_log before invoking this function.
7692
7693 @param mi Master_info for the IO thread.
7694 @param need_data_lock If true, mi->data_lock will be acquired if a
7695 rotation is needed. Otherwise, mi->data_lock must be held by the
7696 caller.
7697
7698 @retval false success
7699 @retval true error
7700 */
after_append_to_relay_log(Master_info * mi)7701 bool MYSQL_BIN_LOG::after_append_to_relay_log(Master_info *mi)
7702 {
7703 DBUG_ENTER("MYSQL_BIN_LOG::after_append_to_relay_log");
7704 DBUG_PRINT("info",("max_size: %lu",max_size));
7705
7706 // Check pre-conditions
7707 mysql_mutex_assert_owner(&LOCK_log);
7708 mysql_mutex_assert_owner(&mi->data_lock);
7709 assert(is_relay_log);
7710 assert(current_thd->system_thread == SYSTEM_THREAD_SLAVE_IO);
7711
7712 /*
7713 We allow the relay log rotation by relay log size
7714 only if the trx parser is not inside a transaction.
7715 */
7716 bool can_rotate= mi->transaction_parser.is_not_inside_transaction();
7717
7718 #ifndef NDEBUG
7719 if ((uint) my_b_append_tell(&log_file) >
7720 DBUG_EVALUATE_IF("rotate_slave_debug_group", 500, max_size) &&
7721 !can_rotate)
7722 {
7723 DBUG_PRINT("info",("Postponing the rotation by size waiting for "
7724 "the end of the current transaction."));
7725 }
7726 #endif
7727
7728 // Flush and sync
7729 bool error= false;
7730 if (flush_and_sync(0) == 0 && can_rotate)
7731 {
7732 /*
7733 If the last event of the transaction has been flushed, we can add
7734 the GTID (if it is not empty) to the logged set, or else it will
7735 not be available in the Previous GTIDs of the next relay log file
7736 if we are going to rotate the relay log.
7737 */
7738 Gtid *last_gtid_queued= mi->get_last_gtid_queued();
7739 if (!last_gtid_queued->is_empty())
7740 {
7741 global_sid_lock->rdlock();
7742 mi->rli->add_logged_gtid(last_gtid_queued->sidno,
7743 last_gtid_queued->gno);
7744 global_sid_lock->unlock();
7745 mi->clear_last_gtid_queued();
7746 }
7747
7748 /*
7749 If relay log is too big, rotate. But only if not in the middle of a
7750 transaction when GTIDs are enabled.
7751 We now try to mimic the following master binlog behavior: "A transaction
7752 is written in one chunk to the binary log, so it is never split between
7753 several binary logs. Therefore, if you have big transactions, you might
7754 see binary log files larger than max_binlog_size."
7755 */
7756 if ((uint) my_b_append_tell(&log_file) >
7757 DBUG_EVALUATE_IF("rotate_slave_debug_group", 500, max_size))
7758 {
7759 error= new_file_without_locking(mi->get_mi_description_event());
7760 }
7761 }
7762
7763 signal_update();
7764
7765 DBUG_RETURN(error);
7766 }
7767
7768
append_event(Log_event * ev,Master_info * mi)7769 bool MYSQL_BIN_LOG::append_event(Log_event* ev, Master_info *mi)
7770 {
7771 DBUG_ENTER("MYSQL_BIN_LOG::append");
7772
7773 // check preconditions
7774 assert(log_file.type == SEQ_READ_APPEND);
7775 assert(is_relay_log);
7776
7777 // acquire locks
7778 mysql_mutex_lock(&LOCK_log);
7779
7780 // write data
7781 bool error = false;
7782 if (write_to_file(ev) == 0)
7783 {
7784 bytes_written+= ev->common_header->data_written;
7785 error= after_append_to_relay_log(mi);
7786 }
7787 else
7788 error= true;
7789
7790 mysql_mutex_unlock(&LOCK_log);
7791 DBUG_RETURN(error);
7792 }
7793
append_buffer(uchar * buf,size_t len,Master_info * mi)7794 bool MYSQL_BIN_LOG::append_buffer(uchar* buf, size_t len, Master_info *mi)
7795 {
7796 DBUG_ENTER("MYSQL_BIN_LOG::append_buffer");
7797
7798 // check preconditions
7799 assert(log_file.type == SEQ_READ_APPEND);
7800 assert(is_relay_log);
7801 mysql_mutex_assert_owner(&LOCK_log);
7802
7803 // write data
7804 uchar *ebuf= NULL;
7805
7806 if (crypto.is_enabled())
7807 {
7808 ebuf= reinterpret_cast<uchar*>(my_malloc(PSI_NOT_INSTRUMENTED, len, MYF(MY_WME)));
7809 if (!ebuf ||
7810 encrypt_event(my_b_append_tell(&log_file), crypto, buf, ebuf, len))
7811 {
7812 if (ebuf != NULL)
7813 my_free(ebuf);
7814 DBUG_RETURN(true);
7815 }
7816
7817 buf= ebuf;
7818 }
7819
7820 if (my_b_append(&log_file,(uchar*) buf,len))
7821 {
7822 if (ebuf != NULL)
7823 my_free(ebuf);
7824 DBUG_RETURN(true);
7825 }
7826
7827 if (ebuf != NULL)
7828 my_free(ebuf);
7829
7830 bytes_written += len;
7831 DBUG_RETURN(after_append_to_relay_log(mi));
7832 }
7833 #endif // ifdef HAVE_REPLICATION
7834
flush_and_sync(const bool force)7835 bool MYSQL_BIN_LOG::flush_and_sync(const bool force)
7836 {
7837 mysql_mutex_assert_owner(&LOCK_log);
7838
7839 if (flush_io_cache(&log_file))
7840 return 1;
7841
7842 std::pair<bool, bool> result= sync_binlog_file(force);
7843
7844 return result.first;
7845 }
7846
start_union_events(THD * thd,query_id_t query_id_param)7847 void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param)
7848 {
7849 assert(!thd->binlog_evt_union.do_union);
7850 thd->binlog_evt_union.do_union= TRUE;
7851 thd->binlog_evt_union.unioned_events= FALSE;
7852 thd->binlog_evt_union.unioned_events_trans= FALSE;
7853 thd->binlog_evt_union.first_query_id= query_id_param;
7854 }
7855
stop_union_events(THD * thd)7856 void MYSQL_BIN_LOG::stop_union_events(THD *thd)
7857 {
7858 assert(thd->binlog_evt_union.do_union);
7859 thd->binlog_evt_union.do_union= FALSE;
7860 }
7861
is_query_in_union(THD * thd,query_id_t query_id_param)7862 bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param)
7863 {
7864 return (thd->binlog_evt_union.do_union &&
7865 query_id_param >= thd->binlog_evt_union.first_query_id);
7866 }
7867
7868 /*
7869 Updates thd's position-of-next-event variables
7870 after a *real* write a file.
7871 */
update_thd_next_event_pos(THD * thd)7872 void MYSQL_BIN_LOG::update_thd_next_event_pos(THD* thd)
7873 {
7874 if (likely(thd != NULL))
7875 {
7876 thd->set_next_event_pos(log_file_name,
7877 my_b_tell(&log_file));
7878 }
7879 }
7880
7881 /*
7882 Moves the last bunch of rows from the pending Rows event to a cache (either
7883 transactional cache if is_transaction is @c true, or the non-transactional
7884 cache otherwise. Sets a new pending event.
7885
7886 @param thd a pointer to the user thread.
7887 @param evt a pointer to the row event.
7888 @param is_transactional @c true indicates a transactional cache,
7889 otherwise @c false a non-transactional.
7890 */
7891 int
flush_and_set_pending_rows_event(THD * thd,Rows_log_event * event,bool is_transactional)7892 MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
7893 Rows_log_event* event,
7894 bool is_transactional)
7895 {
7896 DBUG_ENTER("MYSQL_BIN_LOG::flush_and_set_pending_rows_event(event)");
7897 assert(mysql_bin_log.is_open());
7898 DBUG_PRINT("enter", ("event: 0x%lx", (long) event));
7899
7900 int error= 0;
7901 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(thd);
7902
7903 assert(cache_mngr);
7904
7905 binlog_cache_data *cache_data=
7906 cache_mngr->get_binlog_cache_data(is_transactional);
7907
7908 DBUG_PRINT("info", ("cache_mngr->pending(): 0x%lx", (long) cache_data->pending()));
7909
7910 if (Rows_log_event* pending= cache_data->pending())
7911 {
7912 /*
7913 Write pending event to the cache.
7914 */
7915 if (cache_data->write_event(thd, pending))
7916 {
7917 set_write_error(thd, is_transactional);
7918 if (check_write_error(thd) && cache_data &&
7919 stmt_cannot_safely_rollback(thd))
7920 cache_data->set_incident();
7921 delete pending;
7922 cache_data->set_pending(NULL);
7923 DBUG_RETURN(1);
7924 }
7925
7926 delete pending;
7927 }
7928
7929 cache_data->set_pending(event);
7930
7931 DBUG_RETURN(error);
7932 }
7933
7934 /**
7935 Write an event to the binary log.
7936 */
7937
write_event(Log_event * event_info)7938 bool MYSQL_BIN_LOG::write_event(Log_event *event_info)
7939 {
7940 THD *thd= event_info->thd;
7941 bool error= 1;
7942 DBUG_ENTER("MYSQL_BIN_LOG::write_event(Log_event *)");
7943
7944 if (thd->binlog_evt_union.do_union)
7945 {
7946 /*
7947 In Stored function; Remember that function call caused an update.
7948 We will log the function call to the binary log on function exit
7949 */
7950 thd->binlog_evt_union.unioned_events= TRUE;
7951 thd->binlog_evt_union.unioned_events_trans |=
7952 event_info->is_using_trans_cache();
7953 DBUG_RETURN(0);
7954 }
7955
7956 /*
7957 We only end the statement if we are in a top-level statement. If
7958 we are inside a stored function, we do not end the statement since
7959 this will close all tables on the slave. But there can be a special case
7960 where we are inside a stored function/trigger and a SAVEPOINT is being
7961 set in side the stored function/trigger. This SAVEPOINT execution will
7962 force the pending event to be flushed without an STMT_END_F flag. This
7963 will result in a case where following DMLs will be considered as part of
7964 same statement and result in data loss on slave. Hence in this case we
7965 force the end_stmt to be true.
7966 */
7967 bool const end_stmt= (thd->in_sub_stmt && thd->lex->sql_command ==
7968 SQLCOM_SAVEPOINT)? true:
7969 (thd->locked_tables_mode && thd->lex->requires_prelocking());
7970 if (thd->binlog_flush_pending_rows_event(end_stmt,
7971 event_info->is_using_trans_cache()))
7972 DBUG_RETURN(error);
7973
7974 /*
7975 In most cases this is only called if 'is_open()' is true; in fact this is
7976 mostly called if is_open() *was* true a few instructions before, but it
7977 could have changed since.
7978 */
7979 if (likely(is_open()))
7980 {
7981 #ifdef HAVE_REPLICATION
7982 /*
7983 In the future we need to add to the following if tests like
7984 "do the involved tables match (to be implemented)
7985 binlog_[wild_]{do|ignore}_table?" (WL#1049)"
7986 */
7987 const char *local_db= event_info->get_db();
7988 if ((thd && !(thd->variables.option_bits & OPTION_BIN_LOG)) ||
7989 (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT &&
7990 thd->lex->sql_command != SQLCOM_SAVEPOINT &&
7991 (!event_info->is_no_filter_event() &&
7992 !binlog_filter->db_ok(local_db))))
7993 DBUG_RETURN(0);
7994 #endif /* HAVE_REPLICATION */
7995
7996 assert(event_info->is_using_trans_cache() || event_info->is_using_stmt_cache());
7997
7998 if (binlog_start_trans_and_stmt(thd, event_info))
7999 DBUG_RETURN(error);
8000
8001 bool is_trans_cache= event_info->is_using_trans_cache();
8002 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
8003 binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(is_trans_cache);
8004
8005 DBUG_PRINT("info",("event type: %d",event_info->get_type_code()));
8006
8007 /*
8008 No check for auto events flag here - this write method should
8009 never be called if auto-events are enabled.
8010
8011 Write first log events which describe the 'run environment'
8012 of the SQL command. If row-based binlogging, Insert_id, Rand
8013 and other kind of "setting context" events are not needed.
8014 */
8015 if (thd)
8016 {
8017 if (!thd->is_current_stmt_binlog_format_row())
8018 {
8019 if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt)
8020 {
8021 Intvar_log_event e(thd,(uchar) binary_log::Intvar_event::LAST_INSERT_ID_EVENT,
8022 thd->first_successful_insert_id_in_prev_stmt_for_binlog,
8023 event_info->event_cache_type, event_info->event_logging_type);
8024 if (cache_data->write_event(thd, &e))
8025 goto err;
8026 if (event_info->is_using_immediate_logging())
8027 thd->binlog_bytes_written+= e.header()->data_written;
8028 }
8029 if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0)
8030 {
8031 DBUG_PRINT("info",("number of auto_inc intervals: %u",
8032 thd->auto_inc_intervals_in_cur_stmt_for_binlog.
8033 nb_elements()));
8034 Intvar_log_event e(thd, (uchar) binary_log::Intvar_event::INSERT_ID_EVENT,
8035 thd->auto_inc_intervals_in_cur_stmt_for_binlog.
8036 minimum(), event_info->event_cache_type,
8037 event_info->event_logging_type);
8038 if (cache_data->write_event(thd, &e))
8039 goto err;
8040 if (event_info->is_using_immediate_logging())
8041 thd->binlog_bytes_written+= e.header()->data_written;
8042 }
8043 if (thd->rand_used)
8044 {
8045 Rand_log_event e(thd,thd->rand_saved_seed1,thd->rand_saved_seed2,
8046 event_info->event_cache_type,
8047 event_info->event_logging_type);
8048 if (cache_data->write_event(thd, &e))
8049 goto err;
8050 if (event_info->is_using_immediate_logging())
8051 thd->binlog_bytes_written+= e.header()->data_written;
8052 }
8053 if (!thd->user_var_events.empty())
8054 {
8055 for (size_t i= 0; i < thd->user_var_events.size(); i++)
8056 {
8057 BINLOG_USER_VAR_EVENT *user_var_event= thd->user_var_events[i];
8058
8059 /* setting flags for user var log event */
8060 uchar flags= User_var_log_event::UNDEF_F;
8061 if (user_var_event->unsigned_flag)
8062 flags|= User_var_log_event::UNSIGNED_F;
8063
8064 User_var_log_event e(thd,
8065 user_var_event->user_var_event->entry_name.ptr(),
8066 user_var_event->user_var_event->entry_name.length(),
8067 user_var_event->value,
8068 user_var_event->length,
8069 user_var_event->type,
8070 user_var_event->charset_number, flags,
8071 event_info->event_cache_type,
8072 event_info->event_logging_type);
8073 if (cache_data->write_event(thd, &e))
8074 goto err;
8075 if (event_info->is_using_immediate_logging())
8076 thd->binlog_bytes_written+= e.header()->data_written;
8077 }
8078 }
8079 }
8080 }
8081
8082 /*
8083 Write the event.
8084 */
8085 if (cache_data->write_event(thd, event_info))
8086 goto err;
8087
8088 if (DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
8089 goto err;
8090 if (event_info->is_using_immediate_logging())
8091 thd->binlog_bytes_written+= event_info->common_header->data_written;
8092
8093 /*
8094 After writing the event, if the trx-cache was used and any unsafe
8095 change was written into it, the cache is marked as cannot safely
8096 roll back.
8097 */
8098 if (is_trans_cache && stmt_cannot_safely_rollback(thd))
8099 cache_mngr->trx_cache.set_cannot_rollback();
8100
8101 error= 0;
8102
8103 err:
8104 if (error)
8105 {
8106 set_write_error(thd, is_trans_cache);
8107 if (check_write_error(thd) && cache_data &&
8108 stmt_cannot_safely_rollback(thd))
8109 cache_data->set_incident();
8110 }
8111 }
8112
8113 DBUG_RETURN(error);
8114 }
8115
8116 /**
8117 The method executes rotation when LOCK_log is already acquired
8118 by the caller.
8119
8120 @param force_rotate caller can request the log rotation
8121 @param check_purge is set to true if rotation took place
8122
8123 @note
8124 If rotation fails, for instance the server was unable
8125 to create a new log file, we still try to write an
8126 incident event to the current log.
8127
8128 @note The caller must hold LOCK_log when invoking this function.
8129
8130 @retval
8131 nonzero - error in rotating routine.
8132 */
rotate(bool force_rotate,bool * check_purge)8133 int MYSQL_BIN_LOG::rotate(bool force_rotate, bool* check_purge)
8134 {
8135 int error= 0;
8136 DBUG_ENTER("MYSQL_BIN_LOG::rotate");
8137
8138 assert(!is_relay_log);
8139 mysql_mutex_assert_owner(&LOCK_log);
8140
8141 DEBUG_SYNC(current_thd,"stop_binlog_rotation_after_acquiring_lock_log");
8142
8143 *check_purge= false;
8144
8145 if (DBUG_EVALUATE_IF("force_rotate", 1, 0) || force_rotate ||
8146 (my_b_tell(&log_file) >= (my_off_t) max_size))
8147 {
8148 error= new_file_without_locking(NULL);
8149 *check_purge= true;
8150 publish_coordinates_for_global_status();
8151 }
8152 DBUG_RETURN(error);
8153 }
8154
8155 /**
8156 The method executes logs purging routine.
8157
8158 @retval
8159 nonzero - error in rotating routine.
8160 */
purge()8161 void MYSQL_BIN_LOG::purge()
8162 {
8163 #ifdef HAVE_REPLICATION
8164 if (expire_logs_days)
8165 {
8166 DEBUG_SYNC(current_thd, "at_purge_logs_before_date");
8167 time_t purge_time= my_time(0) - expire_logs_days*24*60*60;
8168 DBUG_EXECUTE_IF("expire_logs_always",
8169 { purge_time= my_time(0);});
8170 if (purge_time >= 0)
8171 {
8172 /*
8173 Flush logs for storage engines, so that the last transaction
8174 is fsynced inside storage engines.
8175 */
8176 ha_flush_logs(NULL);
8177 purge_logs_before_date(purge_time, true);
8178 }
8179 }
8180 if (max_binlog_files)
8181 {
8182 purge_logs_maximum_number(max_binlog_files);
8183 }
8184 if (binlog_space_limit)
8185 {
8186 purge_logs_by_size(true);
8187 }
8188 #endif
8189 }
8190
8191 /**
8192 Execute a FLUSH LOGS statement.
8193
8194 The method is a shortcut of @c rotate() and @c purge().
8195 LOCK_log is acquired prior to rotate and is released after it.
8196
8197 @param force_rotate caller can request the log rotation
8198
8199 @retval
8200 nonzero - error in rotating routine.
8201 */
rotate_and_purge(THD * thd,bool force_rotate)8202 int MYSQL_BIN_LOG::rotate_and_purge(THD* thd, bool force_rotate)
8203 {
8204 int error= 0;
8205 DBUG_ENTER("MYSQL_BIN_LOG::rotate_and_purge");
8206 bool check_purge= false;
8207
8208 /*
8209 FLUSH BINARY LOGS command should ignore 'read-only' and 'super_read_only'
8210 options so that it can update 'mysql.gtid_executed' replication repository
8211 table.
8212 */
8213 thd->set_skip_readonly_check();
8214 /*
8215 Wait for handlerton to insert any pending information into the binlog.
8216 For e.g. ha_ndbcluster which updates the binlog asynchronously this is
8217 needed so that the user see its own commands in the binlog.
8218 */
8219 ha_binlog_wait(thd);
8220
8221 assert(!is_relay_log);
8222 mysql_mutex_lock(&LOCK_log);
8223 error= rotate(force_rotate, &check_purge);
8224 /*
8225 NOTE: Run purge_logs wo/ holding LOCK_log because it does not need
8226 the mutex. Otherwise causes various deadlocks.
8227 */
8228 mysql_mutex_unlock(&LOCK_log);
8229
8230 if (!error && check_purge)
8231 purge();
8232
8233 DBUG_RETURN(error);
8234 }
8235
next_file_id()8236 uint MYSQL_BIN_LOG::next_file_id()
8237 {
8238 uint res;
8239 mysql_mutex_lock(&LOCK_log);
8240 res = file_id++;
8241 mysql_mutex_unlock(&LOCK_log);
8242 return res;
8243 }
8244
8245
get_gtid_executed(Sid_map * sid_map,Gtid_set * gtid_set)8246 int MYSQL_BIN_LOG::get_gtid_executed(Sid_map *sid_map, Gtid_set *gtid_set)
8247 {
8248 DBUG_ENTER("MYSQL_BIN_LOG::get_gtid_executed");
8249 int error= 0;
8250
8251 mysql_mutex_lock(&mysql_bin_log.LOCK_commit);
8252 global_sid_lock->wrlock();
8253
8254 enum_return_status return_status= global_sid_map->copy(sid_map);
8255 if (return_status != RETURN_STATUS_OK)
8256 {
8257 error= 1;
8258 goto end;
8259 }
8260
8261 return_status= gtid_set->add_gtid_set(gtid_state->get_executed_gtids());
8262 if (return_status != RETURN_STATUS_OK)
8263 error= 1;
8264
8265 end:
8266 global_sid_lock->unlock();
8267 mysql_mutex_unlock(&mysql_bin_log.LOCK_commit);
8268
8269 DBUG_RETURN(error);
8270 }
8271
8272
8273 /**
8274 Auxiliary function to read a page from the cache and set the given
8275 buffer pointer to point to the beginning of the page and the given
8276 length pointer to point to the end of it.
8277
8278 @param cache IO_CACHE to read from
8279 @param[OUT] buf_p Will be set to point to the beginning of the page.
8280 @param[OUT] buf_len_p Will be set to the length of the buffer.
8281
8282 @retval false Success
8283 @retval true Error reading from the cache.
8284 */
read_cache_page(IO_CACHE * cache,uchar ** buf_p,uint32 * buf_len_p)8285 static bool read_cache_page(IO_CACHE *cache, uchar **buf_p, uint32 *buf_len_p)
8286 {
8287 assert(*buf_len_p == 0);
8288 cache->read_pos= cache->read_end;
8289 *buf_len_p= my_b_fill(cache);
8290 *buf_p= cache->read_pos;
8291 return cache->error ? true : false;
8292 }
8293
8294
8295 /**
8296 Write the contents of the given IO_CACHE to the binary log.
8297
8298 The cache will be reset as a READ_CACHE to be able to read the
8299 contents from it.
8300
8301 The data will be post-processed: see class Binlog_event_writer for
8302 details.
8303
8304 @param cache Events will be read from this IO_CACHE.
8305 @param writer Events will be written to this Binlog_event_writer.
8306
8307 @retval true IO error.
8308 @retval false Success.
8309
8310 @see MYSQL_BIN_LOG::write_cache
8311 */
do_write_cache(IO_CACHE * cache,Binlog_event_writer * writer)8312 bool MYSQL_BIN_LOG::do_write_cache(IO_CACHE *cache, Binlog_event_writer *writer)
8313 {
8314 DBUG_ENTER("MYSQL_BIN_LOG::do_write_cache");
8315
8316 DBUG_EXECUTE_IF("simulate_do_write_cache_failure",
8317 {
8318 /*
8319 see binlog_cache_data::write_event() that reacts on
8320 @c simulate_disk_full_at_flush_pending.
8321 */
8322 DBUG_SET("-d,simulate_do_write_cache_failure");
8323 DBUG_RETURN(true);
8324 });
8325
8326 #ifndef NDEBUG
8327 uint64 expected_total_len= my_b_tell(cache);
8328 #endif
8329
8330 DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
8331 {
8332 DBUG_SET("+d,simulate_file_write_error");
8333 });
8334
8335 int reinit_err= reinit_io_cache(cache, READ_CACHE, 0, 0, 0);
8336 DBUG_EXECUTE_IF("simulate_tmpdir_partition_full",
8337 {
8338 DBUG_SET("-d,simulate_file_write_error");
8339 });
8340 if (reinit_err)
8341 DBUG_RETURN(true);
8342
8343 uchar *buf= cache->read_pos;
8344 uint32 buf_len= my_b_bytes_in_cache(cache);
8345 uint32 event_len= 0;
8346 uchar header[LOG_EVENT_HEADER_LEN];
8347 uint32 header_len= 0;
8348
8349 /*
8350 Each iteration of this loop processes all or a part of
8351 1) an event header or 2) an event body from the IO_CACHE.
8352 */
8353 while (true)
8354 {
8355 /**
8356 Nothing in cache: try to refill, and if cache was ended here,
8357 return success. This code is needed even on the first iteration
8358 of the loop, because reinit_io_cache may or may not fill the
8359 first page.
8360 */
8361 if (buf_len == 0)
8362 {
8363 if (read_cache_page(cache, &buf, &buf_len))
8364 {
8365 /**
8366 @todo: this can happen in case of disk corruption in the
8367 IO_CACHE. We may have written a half transaction (even half
8368 event) to the binlog. We should rollback the transaction
8369 and truncate the binlog. /Sven
8370 */
8371 assert(0);
8372 }
8373 if (buf_len == 0)
8374 {
8375 /**
8376 @todo: this can happen in case of disk corruption in the
8377 IO_CACHE. We may have written a half transaction (even half
8378 event) to the binlog. We should rollback the transaction
8379 and truncate the binlog. /Sven
8380 */
8381 assert(my_b_tell(cache) == expected_total_len);
8382 /* Arrive the end of the cache */
8383 DBUG_RETURN(false);
8384 }
8385 }
8386
8387 /* Write event header into binlog */
8388 if (event_len == 0)
8389 {
8390 /* data in the buf may be smaller than header size.*/
8391 uint32 header_incr =
8392 std::min<uint32>(LOG_EVENT_HEADER_LEN - header_len, buf_len);
8393
8394 memcpy(header + header_len, buf, header_incr);
8395 header_len += header_incr;
8396 buf += header_incr;
8397 buf_len -= header_incr;
8398
8399 if (header_len == LOG_EVENT_HEADER_LEN)
8400 {
8401 // Flush event header.
8402 uchar *header_p= header;
8403 if (writer->write_event_part(&header_p, &header_len, &event_len))
8404 DBUG_RETURN(true);
8405 assert(header_len == 0);
8406 }
8407 }
8408 else
8409 {
8410 /* Write all or part of the event body to binlog */
8411 if (writer->write_event_part(&buf, &buf_len, &event_len))
8412 DBUG_RETURN(true);
8413 }
8414 }
8415 }
8416
8417 /**
8418 Writes an incident event to stmt_cache.
8419
8420 @param ev Incident event to be written
8421 @param thd Thread variable
8422 @param need_lock_log If true, will acquire LOCK_log; otherwise the
8423 caller should already have acquired LOCK_log.
8424 @param err_msg Error message written to log file for the incident.
8425 @do_flush_and_sync If true, will call flush_and_sync(), rotate() and
8426 purge().
8427
8428 @retval false error
8429 @retval true success
8430 */
write_incident(Incident_log_event * ev,THD * thd,bool need_lock_log,const char * err_msg,bool do_flush_and_sync)8431 bool MYSQL_BIN_LOG::write_incident(Incident_log_event *ev, THD *thd,
8432 bool need_lock_log, const char* err_msg,
8433 bool do_flush_and_sync)
8434 {
8435 uint error= 0;
8436 DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
8437 assert(err_msg);
8438
8439 if (!is_open())
8440 DBUG_RETURN(error);
8441
8442 // @todo make this work with the group log. /sven
8443 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
8444
8445 /*
8446 thd->cache_mngr may be uninitialized when first transaction resulted in an
8447 incident. If there is no cache manager exists for the session, then we
8448 create one, so that a GTID is generated and is written prior to flushing
8449 the stmt_cache.
8450 */
8451 if (cache_mngr == NULL)
8452 {
8453 if (thd->binlog_setup_trx_data() ||
8454 DBUG_EVALUATE_IF("simulate_cache_creation_failure", 1, 0))
8455 {
8456 enum_gtid_mode gtid_mode= get_gtid_mode(GTID_MODE_LOCK_NONE);
8457 if (gtid_mode == GTID_MODE_ON || gtid_mode == GTID_MODE_ON_PERMISSIVE)
8458 {
8459 const char *mode= gtid_mode == GTID_MODE_ON ? "ON" : "ON_PERMISSIVE";
8460 std::ostringstream message;
8461
8462 message << "Could not create IO cache while writing an incident event "
8463 "to the binary log for query: '"<< thd->query().str <<
8464 "'. Since GTID_MODE= " << mode <<", server is unable "
8465 "to proceed with logging.";
8466 handle_binlog_flush_or_sync_error(thd, true, message.str().c_str());
8467 DBUG_RETURN(true);
8468 }
8469 }
8470 else
8471 cache_mngr= thd_get_cache_mngr(thd);
8472 }
8473
8474 #ifndef NDEBUG
8475 if (DBUG_EVALUATE_IF("simulate_write_incident_event_into_binlog_directly",
8476 1, 0) && !cache_mngr->stmt_cache.is_binlog_empty())
8477 {
8478 /* The stmt_cache contains corruption data, so we can reset it. */
8479 cache_mngr->stmt_cache.reset();
8480 }
8481 #endif
8482
8483 /*
8484 If there is no binlog cache then we write incidents directly
8485 into the binlog. If caller needs GTIDs it has to setup the
8486 binlog cache (for the injector thread).
8487 */
8488 if (cache_mngr == NULL ||
8489 DBUG_EVALUATE_IF("simulate_write_incident_event_into_binlog_directly",
8490 1, 0))
8491 {
8492 if (need_lock_log)
8493 mysql_mutex_lock(&LOCK_log);
8494 else
8495 mysql_mutex_assert_owner(&LOCK_log);
8496 /* Write an incident event into binlog directly. */
8497 error= write_to_file(ev);
8498 /*
8499 Write an error to log. So that user might have a chance
8500 to be alerted and explore incident details.
8501 */
8502 if (!error)
8503 sql_print_error("%s An incident event has been written to the binary "
8504 "log which will stop the slaves.", err_msg);
8505 }
8506 else // (cache_mngr != NULL)
8507 {
8508 if (!cache_mngr->stmt_cache.is_binlog_empty())
8509 {
8510 /* The stmt_cache contains corruption data, so we can reset it. */
8511 cache_mngr->stmt_cache.reset();
8512 }
8513 if (!cache_mngr->trx_cache.is_binlog_empty())
8514 {
8515 /* The trx_cache contains corruption data, so we can reset it. */
8516 cache_mngr->trx_cache.reset();
8517 }
8518 /*
8519 Write the incident event into stmt_cache, so that a GTID is generated and
8520 written for it prior to flushing the stmt_cache.
8521 */
8522 binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(false);
8523 if ((error= cache_data->write_event(thd, ev)))
8524 {
8525 sql_print_error("Failed to write an incident event into stmt_cache.");
8526 cache_mngr->stmt_cache.reset();
8527 DBUG_RETURN(error);
8528 }
8529
8530 if (need_lock_log)
8531 mysql_mutex_lock(&LOCK_log);
8532 else
8533 mysql_mutex_assert_owner(&LOCK_log);
8534 }
8535
8536 if (do_flush_and_sync)
8537 {
8538 if (!error && !(error= flush_and_sync()))
8539 {
8540 bool check_purge= false;
8541 update_binlog_end_pos();
8542 is_rotating_caused_by_incident= true;
8543 error= rotate(true, &check_purge);
8544 is_rotating_caused_by_incident= false;
8545 if (!error && check_purge)
8546 purge();
8547 }
8548 }
8549
8550 if (need_lock_log)
8551 mysql_mutex_unlock(&LOCK_log);
8552
8553 /*
8554 Write an error to log. So that user might have a chance
8555 to be alerted and explore incident details.
8556 */
8557 if (!error && cache_mngr != NULL)
8558 sql_print_error("%s An incident event has been written to the binary "
8559 "log which will stop the slaves.", err_msg);
8560
8561 DBUG_RETURN(error);
8562 }
8563
write_stmt_directly(THD * thd,const char * stmt,size_t stmt_len,enum_sql_command sql_command)8564 bool MYSQL_BIN_LOG::write_stmt_directly(THD* thd, const char *stmt, size_t stmt_len,
8565 enum_sql_command sql_command)
8566 {
8567 bool ret= false;
8568 /* backup the original command */
8569 enum_sql_command save_sql_command= thd->lex->sql_command;
8570 thd->lex->sql_command= sql_command;
8571
8572 if (thd->binlog_query(THD::STMT_QUERY_TYPE, stmt, stmt_len,
8573 FALSE, FALSE, FALSE, 0) ||
8574 commit(thd, false) != TC_LOG::RESULT_SUCCESS)
8575 {
8576 ret= true;
8577 }
8578
8579 thd->lex->sql_command= save_sql_command;
8580 return ret;
8581 }
8582
8583
8584 /**
8585 Creates an incident event and writes it to the binary log.
8586
8587 @param thd Thread variable
8588 @param ev Incident event to be written
8589 @param err_msg Error message written to log file for the incident.
8590 @param lock If the binary lock should be locked or not
8591
8592 @retval
8593 0 error
8594 @retval
8595 1 success
8596 */
write_incident(THD * thd,bool need_lock_log,const char * err_msg,bool do_flush_and_sync)8597 bool MYSQL_BIN_LOG::write_incident(THD *thd, bool need_lock_log,
8598 const char* err_msg,
8599 bool do_flush_and_sync)
8600 {
8601 DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
8602
8603 if (!is_open())
8604 DBUG_RETURN(0);
8605
8606 LEX_STRING write_error_msg= {(char*) err_msg, strlen(err_msg)};
8607 binary_log::Incident_event::enum_incident incident=
8608 binary_log::Incident_event::INCIDENT_LOST_EVENTS;
8609 Incident_log_event ev(thd, incident, write_error_msg);
8610
8611 DBUG_RETURN(write_incident(&ev, thd, need_lock_log, err_msg,
8612 do_flush_and_sync));
8613 }
8614
8615
8616 /**
8617 Write the contents of the statement or transaction cache to the binary log.
8618
8619 Comparison with do_write_cache:
8620
8621 - do_write_cache is a lower-level function that only performs the
8622 actual write.
8623
8624 - write_cache is a higher-level function that calls do_write_cache
8625 and additionally performs some maintenance tasks, including:
8626 - report any errors that occurred
8627 - write incident event if needed
8628 - update gtid_state
8629 - update thd.binlog_next_event_pos
8630
8631 @param thd Thread variable
8632
8633 @param cache_data Events will be read from the IO_CACHE of this
8634 cache_data object.
8635
8636 @param writer Events will be written to this Binlog_event_writer.
8637
8638 @retval true IO error.
8639 @retval false Success.
8640
8641 @note We only come here if there is something in the cache.
8642 @note Whatever is in the cache is always a complete transaction.
8643 @note 'cache' needs to be reinitialized after this functions returns.
8644 */
write_cache(THD * thd,binlog_cache_data * cache_data,Binlog_event_writer * writer)8645 bool MYSQL_BIN_LOG::write_cache(THD *thd, binlog_cache_data *cache_data,
8646 Binlog_event_writer *writer)
8647 {
8648 DBUG_ENTER("MYSQL_BIN_LOG::write_cache(THD *, binlog_cache_data *, bool)");
8649
8650 IO_CACHE *cache= &cache_data->cache_log;
8651 bool incident= cache_data->has_incident();
8652
8653 mysql_mutex_assert_owner(&LOCK_log);
8654
8655 assert(is_open());
8656 if (likely(is_open())) // Should always be true
8657 {
8658 /*
8659 We only bother to write to the binary log if there is anything
8660 to write.
8661
8662 @todo Is this check redundant? Probably this is only called if
8663 there is anything in the cache (see @note in comment above this
8664 function). Check if we can replace this by an assertion. /Sven
8665 */
8666 if (my_b_tell(cache) > 0)
8667 {
8668 DBUG_EXECUTE_IF("crash_before_writing_xid",
8669 {
8670 if ((write_error= do_write_cache(cache, writer)))
8671 DBUG_PRINT("info", ("error writing binlog cache: %d",
8672 write_error));
8673 flush_and_sync(true);
8674 DBUG_PRINT("info", ("crashing before writing xid"));
8675 DBUG_SUICIDE();
8676 });
8677 if ((write_error= do_write_cache(cache, writer)))
8678 goto err;
8679
8680 const char* err_msg= "Non-transactional changes did not get into "
8681 "the binlog.";
8682 if (incident && write_incident(thd, false/*need_lock_log=false*/,
8683 err_msg,
8684 false/*do_flush_and_sync==false*/))
8685 goto err;
8686
8687 DBUG_EXECUTE_IF("half_binlogged_transaction", DBUG_SUICIDE(););
8688 if (cache->error) // Error on read
8689 {
8690 char errbuf[MYSYS_STRERROR_SIZE];
8691 sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name,
8692 errno, my_strerror(errbuf, sizeof(errbuf), errno));
8693 write_error= true; // Don't give more errors
8694 goto err;
8695 }
8696 }
8697 update_thd_next_event_pos(thd);
8698 }
8699
8700 DBUG_RETURN(false);
8701
8702 err:
8703 if (!write_error)
8704 {
8705 char errbuf[MYSYS_STRERROR_SIZE];
8706 write_error= true;
8707 sql_print_error(ER(ER_ERROR_ON_WRITE), name,
8708 errno, my_strerror(errbuf, sizeof(errbuf), errno));
8709 }
8710
8711 /*
8712 If the flush has failed due to ENOSPC, set the flush_error flag.
8713 */
8714 if (cache->error && thd->is_error() && my_errno() == ENOSPC)
8715 {
8716 cache_data->set_flush_error(thd);
8717 }
8718 thd->commit_error= THD::CE_FLUSH_ERROR;
8719
8720 DBUG_RETURN(true);
8721 }
8722
8723
8724 /**
8725 Wait until we get a signal that the relay log has been updated.
8726
8727 @param[in] thd Thread variable
8728 @param[in] timeout a pointer to a timespec;
8729 NULL means to wait w/o timeout.
8730
8731 @retval 0 if got signalled on update
8732 @retval non-0 if wait timeout elapsed
8733
8734 @note
8735 One must have a lock on LOCK_log before calling this function.
8736 */
8737
wait_for_update_relay_log(THD * thd,const struct timespec * timeout)8738 int MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd, const struct timespec *timeout)
8739 {
8740 int ret= 0;
8741 PSI_stage_info old_stage;
8742 DBUG_ENTER("wait_for_update_relay_log");
8743
8744 thd->ENTER_COND(&update_cond, &LOCK_log,
8745 &stage_slave_has_read_all_relay_log,
8746 &old_stage);
8747
8748 if (!timeout)
8749 mysql_cond_wait(&update_cond, &LOCK_log);
8750 else
8751 ret= mysql_cond_timedwait(&update_cond, &LOCK_log,
8752 const_cast<struct timespec *>(timeout));
8753 mysql_mutex_unlock(&LOCK_log);
8754 thd->EXIT_COND(&old_stage);
8755
8756 DBUG_RETURN(ret);
8757 }
8758
8759 /**
8760 Wait until we get a signal that the binary log has been updated.
8761 Applies to master only.
8762
8763 NOTES
8764 @param[in] thd a THD struct
8765 @param[in] timeout a pointer to a timespec;
8766 NULL means to wait w/o timeout.
8767 @retval 0 if got signalled on update
8768 @retval non-0 if wait timeout elapsed
8769 @note
8770 LOCK_log must be taken before calling this function.
8771 LOCK_log is being released while the thread is waiting.
8772 LOCK_log is released by the caller.
8773 */
8774
wait_for_update_bin_log(THD * thd,const struct timespec * timeout)8775 int MYSQL_BIN_LOG::wait_for_update_bin_log(THD* thd,
8776 const struct timespec *timeout)
8777 {
8778 int ret= 0;
8779 DBUG_ENTER("wait_for_update_bin_log");
8780
8781 if (!timeout)
8782 mysql_cond_wait(&update_cond, &LOCK_binlog_end_pos);
8783 else
8784 ret= mysql_cond_timedwait(&update_cond, &LOCK_binlog_end_pos,
8785 const_cast<struct timespec *>(timeout));
8786 DBUG_RETURN(ret);
8787 }
8788
8789
8790 /**
8791 Close the log file.
8792
8793 @param exiting Bitmask for one or more of the following bits:
8794 - LOG_CLOSE_INDEX : if we should close the index file
8795 - LOG_CLOSE_TO_BE_OPENED : if we intend to call open
8796 at once after close.
8797 - LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
8798
8799 @param need_lock_log If true, this function acquires LOCK_log;
8800 otherwise the caller should already have acquired it.
8801
8802 @param need_lock_index If true, this function acquires LOCK_index;
8803 otherwise the caller should already have acquired it.
8804
8805 @note
8806 One can do an open on the object at once after doing a close.
8807 The internal structures are not freed until cleanup() is called
8808 */
8809
close(uint exiting,bool need_lock_log,bool need_lock_index)8810 void MYSQL_BIN_LOG::close(uint exiting, bool need_lock_log,
8811 bool need_lock_index)
8812 { // One can't set log_type here!
8813 DBUG_ENTER("MYSQL_BIN_LOG::close");
8814 DBUG_PRINT("enter",("exiting: %d", (int) exiting));
8815 if (need_lock_log)
8816 mysql_mutex_lock(&LOCK_log);
8817 else
8818 mysql_mutex_assert_owner(&LOCK_log);
8819
8820 if (log_state.atomic_get() == LOG_OPENED)
8821 {
8822 #ifdef HAVE_REPLICATION
8823 if ((exiting & LOG_CLOSE_STOP_EVENT) != 0)
8824 {
8825 /**
8826 TODO(WL#7546): Change the implementation to Stop_event after write() is
8827 moved into libbinlogevents
8828 */
8829 Stop_log_event s;
8830 // the checksumming rule for relay-log case is similar to Rotate
8831 s.common_footer->checksum_alg= is_relay_log ? relay_log_checksum_alg :
8832 static_cast<enum_binlog_checksum_alg>
8833 (binlog_checksum_options);
8834 assert(!is_relay_log ||
8835 relay_log_checksum_alg != binary_log::BINLOG_CHECKSUM_ALG_UNDEF);
8836 write_to_file(&s);
8837 bytes_written+= s.common_header->data_written;
8838 flush_io_cache(&log_file);
8839 update_binlog_end_pos();
8840 }
8841 #endif /* HAVE_REPLICATION */
8842
8843 /* don't pwrite in a file opened with O_APPEND - it doesn't work */
8844 if (log_file.type == WRITE_CACHE)
8845 {
8846 my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
8847 my_off_t org_position= mysql_file_tell(log_file.file, MYF(0));
8848 uchar flags= 0; // clearing LOG_EVENT_BINLOG_IN_USE_F
8849 mysql_file_pwrite(log_file.file, &flags, 1, offset, MYF(0));
8850 /*
8851 Restore position so that anything we have in the IO_cache is written
8852 to the correct position.
8853 We need the seek here, as mysql_file_pwrite() is not guaranteed to keep the
8854 original position on system that doesn't support pwrite().
8855 */
8856 mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
8857 }
8858
8859 /* this will cleanup IO_CACHE, sync and close the file */
8860 if (log_state.atomic_get() == LOG_OPENED)
8861 {
8862 end_io_cache(&log_file);
8863
8864 if (mysql_file_sync(log_file.file, MYF(MY_WME)) && ! write_error)
8865 {
8866 char errbuf[MYSYS_STRERROR_SIZE];
8867 write_error= 1;
8868 sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno,
8869 my_strerror(errbuf, sizeof(errbuf), errno));
8870 }
8871
8872 if (mysql_file_close(log_file.file, MYF(MY_WME)) && ! write_error)
8873 {
8874 char errbuf[MYSYS_STRERROR_SIZE];
8875 write_error= 1;
8876 sql_print_error(ER_DEFAULT(ER_ERROR_ON_WRITE), name, errno,
8877 my_strerror(errbuf, sizeof(errbuf), errno));
8878 }
8879 }
8880
8881 log_state.atomic_set((exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED);
8882 my_free(name);
8883 name= NULL;
8884 }
8885
8886 /*
8887 The following test is needed even if is_open() is not set, as we may have
8888 called a not complete close earlier and the index file is still open.
8889 */
8890
8891 if (need_lock_index)
8892 mysql_mutex_lock(&LOCK_index);
8893 else
8894 mysql_mutex_assert_owner(&LOCK_index);
8895
8896 if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file))
8897 {
8898 end_io_cache(&index_file);
8899 if (mysql_file_close(index_file.file, MYF(0)) < 0 && ! write_error)
8900 {
8901 char errbuf[MYSYS_STRERROR_SIZE];
8902 write_error= 1;
8903 sql_print_error(ER(ER_ERROR_ON_WRITE), index_file_name,
8904 errno, my_strerror(errbuf, sizeof(errbuf), errno));
8905 }
8906 }
8907
8908 if (need_lock_index)
8909 mysql_mutex_unlock(&LOCK_index);
8910
8911 log_state.atomic_set((exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED);
8912 my_free(name);
8913 name= NULL;
8914
8915 if (need_lock_log)
8916 mysql_mutex_unlock(&LOCK_log);
8917
8918 DBUG_VOID_RETURN;
8919 }
8920
harvest_bytes_written(Relay_log_info * rli,bool need_log_space_lock)8921 void MYSQL_BIN_LOG::harvest_bytes_written(Relay_log_info* rli, bool need_log_space_lock)
8922 {
8923 #ifndef NDEBUG
8924 char buf1[22],buf2[22];
8925 #endif
8926 DBUG_ENTER("harvest_bytes_written");
8927 if (need_log_space_lock)
8928 mysql_mutex_lock(&rli->log_space_lock);
8929 else
8930 mysql_mutex_assert_owner(&rli->log_space_lock);
8931 rli->log_space_total+= bytes_written;
8932 DBUG_PRINT("info",("relay_log_space: %s bytes_written: %s",
8933 llstr(rli->log_space_total,buf1), llstr(bytes_written,buf2)));
8934 bytes_written=0;
8935 if (need_log_space_lock)
8936 mysql_mutex_unlock(&rli->log_space_lock);
8937 DBUG_VOID_RETURN;
8938 }
8939
set_max_size(ulong max_size_arg)8940 void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg)
8941 {
8942 /*
8943 We need to take locks, otherwise this may happen:
8944 new_file() is called, calls open(old_max_size), then before open() starts,
8945 set_max_size() sets max_size to max_size_arg, then open() starts and
8946 uses the old_max_size argument, so max_size_arg has been overwritten and
8947 it's like if the SET command was never run.
8948 */
8949 DBUG_ENTER("MYSQL_BIN_LOG::set_max_size");
8950 mysql_mutex_lock(&LOCK_log);
8951 if (is_open())
8952 max_size= max_size_arg;
8953 mysql_mutex_unlock(&LOCK_log);
8954 DBUG_VOID_RETURN;
8955 }
8956
8957 /****** transaction coordinator log for 2pc - binlog() based solution ******/
8958
8959 /**
8960 @todo
8961 keep in-memory list of prepared transactions
8962 (add to list in log(), remove on unlog())
8963 and copy it to the new binlog if rotated
8964 but let's check the behaviour of tc_log_page_waits first!
8965 */
8966
open_binlog(const char * opt_name)8967 int MYSQL_BIN_LOG::open_binlog(const char *opt_name)
8968 {
8969 LOG_INFO log_info;
8970 int error= 1;
8971
8972 /*
8973 This function is used for 2pc transaction coordination. Hence, it
8974 is never used for relay logs.
8975 */
8976 assert(!is_relay_log);
8977 assert(total_ha_2pc > 1 || (1 == total_ha_2pc && opt_bin_log));
8978 assert(opt_name && opt_name[0]);
8979
8980 if (!my_b_inited(&index_file))
8981 {
8982 /* There was a failure to open the index file, can't open the binlog */
8983 cleanup();
8984 return 1;
8985 }
8986
8987 if (using_heuristic_recover())
8988 {
8989 /* generate a new binlog to mask a corrupted one */
8990 mysql_mutex_lock(&LOCK_log);
8991 open_binlog(opt_name, 0, max_binlog_size, false,
8992 true/*need_lock_index=true*/,
8993 true/*need_sid_lock=true*/,
8994 NULL);
8995 mysql_mutex_unlock(&LOCK_log);
8996 cleanup();
8997 return 1;
8998 }
8999
9000 if ((error= find_log_pos(&log_info, NullS, true/*need_lock_index=true*/)))
9001 {
9002 if (error != LOG_INFO_EOF)
9003 sql_print_error("find_log_pos() failed (error: %d)", error);
9004 else
9005 error= 0;
9006 goto err;
9007 }
9008
9009 {
9010 const char *errmsg;
9011 IO_CACHE log;
9012 File file;
9013 Log_event *ev=0;
9014 Format_description_log_event fdle(BINLOG_VERSION);
9015 char log_name[FN_REFLEN];
9016 my_off_t valid_pos= 0;
9017 my_off_t binlog_size;
9018 MY_STAT s;
9019
9020 if (! fdle.is_valid())
9021 goto err;
9022
9023 do
9024 {
9025 strmake(log_name, log_info.log_file_name, sizeof(log_name)-1);
9026 } while (!(error= find_next_log(&log_info, true/*need_lock_index=true*/)));
9027
9028 if (error != LOG_INFO_EOF)
9029 {
9030 sql_print_error("find_log_pos() failed (error: %d)", error);
9031 goto err;
9032 }
9033
9034 if ((file= open_binlog_file(&log, log_name, &errmsg)) < 0)
9035 {
9036 sql_print_error("%s", errmsg);
9037 goto err;
9038 }
9039
9040 my_stat(log_name, &s, MYF(0));
9041 binlog_size= s.st_size;
9042
9043 /*
9044 If the binary log was not properly closed it means that the server
9045 may have crashed. In that case, we need to call MYSQL_BIN_LOG::recover
9046 to:
9047
9048 a) collect logged XIDs;
9049 b) complete the 2PC of the pending XIDs;
9050 c) collect the last valid position.
9051
9052 Therefore, we do need to iterate over the binary log, even if
9053 total_ha_2pc == 1, to find the last valid group of events written.
9054 Later we will take this value and truncate the log if need be.
9055 */
9056 if ((ev= Log_event::read_log_event(&log, 0, &fdle,
9057 opt_master_verify_checksum)) &&
9058 ev->get_type_code() == binary_log::FORMAT_DESCRIPTION_EVENT &&
9059 (ev->common_header->flags & LOG_EVENT_BINLOG_IN_USE_F ||
9060 DBUG_EVALUATE_IF("eval_force_bin_log_recovery", true, false)))
9061 {
9062 sql_print_information("Recovering after a crash using %s", opt_name);
9063 valid_pos= my_b_tell(&log);
9064 error= recover(&log, (Format_description_log_event *)ev, &valid_pos);
9065 }
9066 else
9067 error=0;
9068
9069 delete ev;
9070 end_io_cache(&log);
9071 mysql_file_close(file, MYF(MY_WME));
9072
9073 if (error)
9074 goto err;
9075
9076 /* Trim the crashed binlog file to last valid transaction
9077 or event (non-transaction) base on valid_pos. */
9078 if (valid_pos > 0)
9079 {
9080 if ((file= mysql_file_open(key_file_binlog, log_name,
9081 O_RDWR | O_BINARY, MYF(MY_WME))) < 0)
9082 {
9083 sql_print_error("Failed to open the crashed binlog file "
9084 "when master server is recovering it.");
9085 return -1;
9086 }
9087
9088 /* Change binlog file size to valid_pos */
9089 if (valid_pos < binlog_size)
9090 {
9091 if (my_chsize(file, valid_pos, 0, MYF(MY_WME)))
9092 {
9093 sql_print_error("Failed to trim the crashed binlog file "
9094 "when master server is recovering it.");
9095 mysql_file_close(file, MYF(MY_WME));
9096 return -1;
9097 }
9098 else
9099 {
9100 sql_print_information("Crashed binlog file %s size is %llu, "
9101 "but recovered up to %llu. Binlog trimmed to %llu bytes.",
9102 log_name, binlog_size, valid_pos, valid_pos);
9103 }
9104 }
9105
9106 /* Clear LOG_EVENT_BINLOG_IN_USE_F */
9107 my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
9108 uchar flags= 0;
9109 if (mysql_file_pwrite(file, &flags, 1, offset, MYF(0)) != 1)
9110 {
9111 sql_print_error("Failed to clear LOG_EVENT_BINLOG_IN_USE_F "
9112 "for the crashed binlog file when master "
9113 "server is recovering it.");
9114 mysql_file_close(file, MYF(MY_WME));
9115 return -1;
9116 }
9117
9118 mysql_file_close(file, MYF(MY_WME));
9119 } //end if
9120 }
9121
9122 err:
9123 return error;
9124 }
9125
9126 /** This is called on shutdown, after ha_panic. */
close()9127 void MYSQL_BIN_LOG::close()
9128 {
9129 }
9130
9131 /*
9132 Prepare the transaction in the transaction coordinator.
9133
9134 This function will prepare the transaction in the storage engines
9135 (by calling @c ha_prepare_low) what will write a prepare record
9136 to the log buffers.
9137
9138 @retval 0 success
9139 @retval 1 error
9140 */
prepare(THD * thd,bool all)9141 int MYSQL_BIN_LOG::prepare(THD *thd, bool all)
9142 {
9143 DBUG_ENTER("MYSQL_BIN_LOG::prepare");
9144
9145 assert(opt_bin_log);
9146 /*
9147 The applier thread explicitly overrides the value of sql_log_bin
9148 with the value of log_slave_updates.
9149 We may also end up here in some cases if we have a transaction with two
9150 active transactional storage engines, such as is the case if this is a
9151 replication applier and log_slave_updates=0.
9152 */
9153 assert((thd->slave_thread ?
9154 opt_log_slave_updates : thd->variables.sql_log_bin) ||
9155 total_ha_2pc > 1);
9156
9157 /*
9158 Set HA_IGNORE_DURABILITY to not flush the prepared record of the
9159 transaction to the log of storage engine (for example, InnoDB
9160 redo log) during the prepare phase. So that we can flush prepared
9161 records of transactions to the log of storage engine in a group
9162 right before flushing them to binary log during binlog group
9163 commit flush stage. Reset to HA_REGULAR_DURABILITY at the
9164 beginning of parsing next command.
9165 */
9166 thd->durability_property= HA_IGNORE_DURABILITY;
9167
9168 int error= ha_prepare_low(thd, all);
9169
9170 DBUG_RETURN(error);
9171 }
9172
9173 /**
9174 Commit the transaction in the transaction coordinator.
9175
9176 This function will commit the sessions transaction in the binary log
9177 and in the storage engines (by calling @c ha_commit_low). If the
9178 transaction was successfully logged (or not successfully unlogged)
9179 but the commit in the engines did not succed, there is a risk of
9180 inconsistency between the engines and the binary log.
9181
9182 For binary log group commit, the commit is separated into three
9183 parts:
9184
9185 1. First part consists of filling the necessary caches and
9186 finalizing them (if they need to be finalized). After this,
9187 nothing is added to any of the caches.
9188
9189 2. Second part execute an ordered flush and commit. This will be
9190 done using the group commit functionality in ordered_commit.
9191
9192 3. Third part checks any errors resulting from the ordered commit
9193 and handles them appropriately.
9194
9195 @retval RESULT_SUCCESS success
9196 @retval RESULT_ABORTED error, transaction was neither logged nor committed
9197 @retval RESULT_INCONSISTENT error, transaction was logged but not committed
9198 */
commit(THD * thd,bool all)9199 TC_LOG::enum_result MYSQL_BIN_LOG::commit(THD *thd, bool all)
9200 {
9201 DBUG_ENTER("MYSQL_BIN_LOG::commit");
9202 DBUG_PRINT("info", ("query='%s'",
9203 thd == current_thd ? thd->query().str : NULL));
9204 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
9205 Transaction_ctx *trn_ctx= thd->get_transaction();
9206 my_xid xid= trn_ctx->xid_state()->get_xid()->get_my_xid();
9207 bool stmt_stuff_logged= false;
9208 bool trx_stuff_logged= false;
9209 bool binlog_prot_acquired= false;
9210 bool skip_commit= is_loggable_xa_prepare(thd);
9211
9212 DBUG_PRINT("enter", ("thd: 0x%llx, all: %s, xid: %llu, cache_mngr: 0x%llx",
9213 (ulonglong) thd, YESNO(all), (ulonglong) xid,
9214 (ulonglong) cache_mngr));
9215
9216 /*
9217 No cache manager means nothing to log, but we still have to commit
9218 the transaction.
9219 */
9220 if (cache_mngr == NULL)
9221 {
9222 if (!skip_commit && ha_commit_low(thd, all))
9223 DBUG_RETURN(RESULT_ABORTED);
9224 DBUG_RETURN(RESULT_SUCCESS);
9225 }
9226
9227 /*
9228 Reset binlog_snapshot_% variables for the current connection so that the
9229 current coordinates are shown after committing a consistent snapshot
9230 transaction.
9231 */
9232 if (all)
9233 {
9234 mysql_mutex_lock(&thd->LOCK_thd_data);
9235 cache_mngr->drop_consistent_snapshot();
9236 mysql_mutex_unlock(&thd->LOCK_thd_data);
9237 }
9238
9239 Transaction_ctx::enum_trx_scope trx_scope= all ? Transaction_ctx::SESSION :
9240 Transaction_ctx::STMT;
9241
9242 DBUG_PRINT("debug", ("in_transaction: %s, no_2pc: %s, rw_ha_count: %d",
9243 YESNO(thd->in_multi_stmt_transaction_mode()),
9244 YESNO(trn_ctx->no_2pc(trx_scope)),
9245 trn_ctx->rw_ha_count(trx_scope)));
9246 DBUG_PRINT("debug",
9247 ("all.cannot_safely_rollback(): %s, trx_cache_empty: %s",
9248 YESNO(trn_ctx->cannot_safely_rollback(Transaction_ctx::SESSION)),
9249 YESNO(cache_mngr->trx_cache.is_binlog_empty())));
9250 DBUG_PRINT("debug",
9251 ("stmt.cannot_safely_rollback(): %s, stmt_cache_empty: %s",
9252 YESNO(trn_ctx->cannot_safely_rollback(Transaction_ctx::STMT)),
9253 YESNO(cache_mngr->stmt_cache.is_binlog_empty())));
9254
9255
9256 /*
9257 If there are no handlertons registered, there is nothing to
9258 commit. Note that DDLs are written earlier in this case (inside
9259 binlog_query).
9260
9261 TODO: This can be a problem in those cases that there are no
9262 handlertons registered. DDLs are one example, but the other case
9263 is MyISAM. In this case, we could register a dummy handlerton to
9264 trigger the commit.
9265
9266 Any statement that requires logging will call binlog_query before
9267 trans_commit_stmt, so an alternative is to use the condition
9268 "binlog_query called or stmt.ha_list != 0".
9269 */
9270 if (!all && !trn_ctx->is_active(trx_scope) &&
9271 cache_mngr->stmt_cache.is_binlog_empty())
9272 DBUG_RETURN(RESULT_SUCCESS);
9273
9274 if (thd->lex->sql_command == SQLCOM_XA_COMMIT)
9275 {
9276 /* The Commit phase of the XA two phase logging. */
9277
9278 bool one_phase= get_xa_opt(thd) == XA_ONE_PHASE;
9279 assert(all);
9280 assert(!skip_commit || one_phase);
9281
9282 int err= 0;
9283 XID_STATE *xs= thd->get_transaction()->xid_state();
9284 /*
9285 XA COMMIT ONE PHASE statement which has not gone through the binary log
9286 prepare phase, has to end the active XA transaction with appropriate XA
9287 END followed by XA COMMIT ONE PHASE.
9288
9289 The state of XA transaction is changed to PREPARED after the prepare
9290 phase, intermediately in ha_commit_trans code for the interest of
9291 binlogger. Hence check that the XA COMMIT ONE PHASE is set to 'PREPARE'
9292 and it has not already been written to binary log. For such transaction
9293 write the appropriate XA END statement.
9294 */
9295 if (!(is_loggable_xa_prepare(thd))
9296 && one_phase
9297 && !(xs->is_binlogged())
9298 && !cache_mngr->trx_cache.is_binlog_empty())
9299 {
9300 XA_prepare_log_event end_evt(thd, xs->get_xid(), one_phase);
9301 err= cache_mngr->trx_cache.finalize(thd, &end_evt, xs);
9302 if (err)
9303 {
9304 DBUG_RETURN(RESULT_ABORTED);
9305 }
9306 trx_stuff_logged= true;
9307 thd->get_transaction()->xid_state()->set_binlogged();
9308 }
9309 if (DBUG_EVALUATE_IF("simulate_xa_commit_log_failure", true,
9310 do_binlog_xa_commit_rollback(thd, xs->get_xid(),
9311 true)))
9312 DBUG_RETURN(RESULT_ABORTED);
9313 }
9314
9315 /*
9316 If there is anything in the stmt cache, and GTIDs are enabled,
9317 then this is a single statement outside a transaction and it is
9318 impossible that there is anything in the trx cache. Hence, we
9319 write any empty group(s) to the stmt cache.
9320
9321 Otherwise, we write any empty group(s) to the trx cache at the end
9322 of the transaction.
9323 */
9324 if (!cache_mngr->stmt_cache.is_binlog_empty())
9325 {
9326 /*
9327 Commit parent identification of non-transactional query has
9328 been deferred until now, except for the mixed transaction case.
9329 */
9330 trn_ctx->store_commit_parent(m_dependency_tracker.get_max_committed_timestamp());
9331 if (cache_mngr->stmt_cache.finalize(thd))
9332 DBUG_RETURN(RESULT_ABORTED);
9333 stmt_stuff_logged= true;
9334 }
9335
9336 /*
9337 We commit the transaction if:
9338 - We are not in a transaction and committing a statement, or
9339 - We are in a transaction and a full transaction is committed.
9340 Otherwise, we accumulate the changes.
9341 */
9342 if (!cache_mngr->trx_cache.is_binlog_empty() &&
9343 ending_trans(thd, all) && !trx_stuff_logged)
9344 {
9345 const bool real_trans=
9346 (all || !trn_ctx->is_active(Transaction_ctx::SESSION));
9347
9348 /*
9349 We are committing an XA transaction if it is a "real" transaction
9350 and has an XID assigned (because some handlerton registered). A
9351 transaction is "real" if either 'all' is true or the 'all.ha_list'
9352 is empty.
9353
9354 Note: This is kind of strange since registering the binlog
9355 handlerton will then make the transaction XA, which is not really
9356 true. This occurs for example if a MyISAM statement is executed
9357 with row-based replication on.
9358 */
9359 if (is_loggable_xa_prepare(thd))
9360 {
9361 /* The prepare phase of XA transaction two phase logging. */
9362 int err= 0;
9363 bool one_phase= get_xa_opt(thd) == XA_ONE_PHASE;
9364
9365 assert(thd->lex->sql_command != SQLCOM_XA_COMMIT || one_phase);
9366
9367 XID_STATE *xs= thd->get_transaction()->xid_state();
9368 XA_prepare_log_event end_evt(thd, xs->get_xid(), one_phase);
9369
9370 assert(skip_commit);
9371
9372 err= cache_mngr->trx_cache.finalize(thd, &end_evt, xs);
9373 if (err ||
9374 (DBUG_EVALUATE_IF("simulate_xa_prepare_failure_in_cache_finalize",
9375 true, false)))
9376 {
9377 DBUG_RETURN(RESULT_ABORTED);
9378 }
9379 }
9380 else if (real_trans && xid && trn_ctx->rw_ha_count(trx_scope) > 1 &&
9381 !trn_ctx->no_2pc(trx_scope))
9382 {
9383 Xid_log_event end_evt(thd, xid);
9384 if (cache_mngr->trx_cache.finalize(thd, &end_evt))
9385 DBUG_RETURN(RESULT_ABORTED);
9386 }
9387 else
9388 {
9389 Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"),
9390 true, FALSE, TRUE, 0, TRUE);
9391 if (cache_mngr->trx_cache.finalize(thd, &end_evt))
9392 DBUG_RETURN(RESULT_ABORTED);
9393 }
9394 trx_stuff_logged= true;
9395 }
9396
9397 /*
9398 This is part of the stmt rollback.
9399 */
9400 if (!all)
9401 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
9402
9403 /*
9404 Now all the events are written to the caches, so we will commit
9405 the transaction in the engines. This is done using the group
9406 commit logic in ordered_commit, which will return when the
9407 transaction is committed.
9408
9409 If the commit in the engines fail, we still have something logged
9410 to the binary log so we have to report this as a "bad" failure
9411 (failed to commit, but logged something).
9412 */
9413 if (stmt_stuff_logged || trx_stuff_logged)
9414 {
9415 if (RUN_HOOK(transaction,
9416 before_commit,
9417 (thd, all,
9418 thd_get_cache_mngr(thd)->get_binlog_cache_log(true),
9419 thd_get_cache_mngr(thd)->get_binlog_cache_log(false),
9420 max<my_off_t>(max_binlog_cache_size,
9421 max_binlog_stmt_cache_size))) ||
9422 DBUG_EVALUATE_IF("simulate_failure_in_before_commit_hook", true, false))
9423 {
9424 ha_rollback_low(thd, all);
9425 gtid_state->update_on_rollback(thd);
9426 thd_get_cache_mngr(thd)->reset();
9427 //Reset the thread OK status before changing the outcome.
9428 if (thd->get_stmt_da()->is_ok())
9429 thd->get_stmt_da()->reset_diagnostics_area();
9430 my_error(ER_RUN_HOOK_ERROR, MYF(0), "before_commit");
9431 DBUG_RETURN(RESULT_ABORTED);
9432 }
9433 /*
9434 Check whether the transaction should commit or abort given the
9435 plugin feedback.
9436 */
9437 if (thd->get_transaction()->get_rpl_transaction_ctx()->is_transaction_rollback() ||
9438 (DBUG_EVALUATE_IF("simulate_transaction_rollback_request", true, false)))
9439 {
9440 ha_rollback_low(thd, all);
9441 gtid_state->update_on_rollback(thd);
9442 thd_get_cache_mngr(thd)->reset();
9443 if (thd->get_stmt_da()->is_ok())
9444 thd->get_stmt_da()->reset_diagnostics_area();
9445 my_error(ER_TRANSACTION_ROLLBACK_DURING_COMMIT, MYF(0));
9446 DBUG_RETURN(RESULT_ABORTED);
9447 }
9448
9449 int rc= prepare_ordered_commit(thd, all, skip_commit);
9450 if (rc)
9451 DBUG_RETURN(RESULT_INCONSISTENT);
9452
9453 /*
9454 Block binlog updates if there's an active BINLOG lock.
9455
9456 We allow binlog lock owner to commit, assuming it knows what it does. We
9457 also check if protection has not been acquired earlier, which is possible
9458 in slave threads to protect master binlog coordinates.
9459 */
9460 if (!thd->backup_binlog_lock.is_acquired() &&
9461 !thd->backup_binlog_lock.is_protection_acquired())
9462 {
9463 const ulong timeout= thd->variables.lock_wait_timeout;
9464
9465 DBUG_PRINT("debug", ("Acquiring binlog protection lock"));
9466
9467 #ifdef HAVE_REPLICATION
9468 DBUG_EXECUTE_IF("delay_slave_worker_0", {
9469 if (has_commit_order_manager(thd))
9470 {
9471 Slave_worker *worker= dynamic_cast<Slave_worker *>(thd->rli_slave);
9472
9473 if (worker->id == 0)
9474 {
9475 static bool skip_first_query= true;
9476 if (!skip_first_query)
9477 {
9478 static const char act[]= "now WAIT_FOR signal.lock_binlog_for_backup";
9479 assert(!debug_sync_set_action(thd, STRING_WITH_LEN(act)));
9480
9481 static const char act2[]= "now SIGNAL finished_delay_slave_worker_0";
9482 assert(opt_debug_sync_timeout > 0);
9483 assert(!debug_sync_set_action(thd, STRING_WITH_LEN(act2)));
9484
9485 DBUG_SET("-d,delay_slave_worker_0");
9486 }
9487 skip_first_query= !skip_first_query;
9488 }
9489 }
9490 });
9491 #endif
9492
9493 if (thd->backup_binlog_lock.acquire_protection(thd, MDL_EXPLICIT,
9494 timeout))
9495 {
9496 cache_mngr->stmt_cache.reset();
9497 cache_mngr->trx_cache.reset();
9498
9499 DBUG_RETURN(RESULT_ABORTED);
9500 }
9501
9502 binlog_prot_acquired= true;
9503 }
9504
9505 rc= ordered_commit(thd);
9506
9507 if (binlog_prot_acquired)
9508 {
9509 DBUG_PRINT("debug", ("Releasing binlog protection lock"));
9510 thd->backup_binlog_lock.release_protection(thd);
9511 }
9512
9513 if (rc)
9514 DBUG_RETURN(RESULT_INCONSISTENT);
9515
9516 /*
9517 Mark the flag m_is_binlogged to true only after we are done
9518 with checking all the error cases.
9519 */
9520 if (is_loggable_xa_prepare(thd))
9521 thd->get_transaction()->xid_state()->set_binlogged();
9522 }
9523 else if (!skip_commit)
9524 {
9525 /*
9526 We only set engine binlog position in ordered_commit path flush phase
9527 and not all transactions go through them (such as table copy in DDL).
9528 So in cases where a DDL statement implicitly commits earlier transaction
9529 and starting a new one, the new transaction could be "leaking" the
9530 engine binlog pos. In order to avoid that and accidentally overwrite
9531 binlog position with previous location, we reset it here.
9532 */
9533 thd->set_trans_pos(NULL, 0);
9534 if (ha_commit_low(thd, all))
9535 DBUG_RETURN(RESULT_INCONSISTENT);
9536 }
9537
9538 DBUG_RETURN(RESULT_SUCCESS);
9539 }
9540
9541
9542 /**
9543 Flush caches for session.
9544
9545 @note @c set_trans_pos is called with a pointer to the file name
9546 that the binary log currently use and a rotation will change the
9547 contents of the variable.
9548
9549 The position is used when calling the after_flush, after_commit,
9550 and after_rollback hooks, but these have been placed so that they
9551 occur before a rotation is executed.
9552
9553 It is the responsibility of any plugin that use this position to
9554 copy it if they need it after the hook has returned.
9555
9556 The current "global" transaction_counter is stepped and its new value
9557 is assigned to the transaction.
9558 */
9559 std::pair<int,my_off_t>
flush_thread_caches(THD * thd)9560 MYSQL_BIN_LOG::flush_thread_caches(THD *thd)
9561 {
9562 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
9563 my_off_t bytes= 0;
9564 bool wrote_xid= false;
9565 int error= cache_mngr->flush(thd, &bytes, &wrote_xid);
9566 if (!error && bytes > 0)
9567 {
9568 /*
9569 Note that set_trans_pos does not copy the file name. See
9570 this function documentation for more info.
9571 */
9572 thd->set_trans_pos(log_file_name, my_b_tell(&log_file));
9573 if (wrote_xid)
9574 inc_prep_xids(thd);
9575 }
9576 DBUG_PRINT("debug", ("bytes: %llu", bytes));
9577 return std::make_pair(error, bytes);
9578 }
9579
9580
9581 /**
9582 Execute the flush stage.
9583
9584 @param total_bytes_var Pointer to variable that will be set to total
9585 number of bytes flushed, or NULL.
9586
9587 @param rotate_var Pointer to variable that will be set to true if
9588 binlog rotation should be performed after releasing locks. If rotate
9589 is not necessary, the variable will not be touched.
9590
9591 @return Error code on error, zero on success
9592 */
9593
9594 int
process_flush_stage_queue(my_off_t * total_bytes_var,bool * rotate_var,THD ** out_queue_var)9595 MYSQL_BIN_LOG::process_flush_stage_queue(my_off_t *total_bytes_var,
9596 bool *rotate_var,
9597 THD **out_queue_var)
9598 {
9599 DBUG_ENTER("MYSQL_BIN_LOG::process_flush_stage_queue");
9600 #ifndef NDEBUG
9601 // number of flushes per group.
9602 int no_flushes= 0;
9603 #endif
9604 assert(total_bytes_var && rotate_var && out_queue_var);
9605 my_off_t total_bytes= 0;
9606 int flush_error= 1;
9607 mysql_mutex_assert_owner(&LOCK_log);
9608
9609 /*
9610 Fetch the entire flush queue and empty it, so that the next batch
9611 has a leader. We must do this before invoking ha_flush_logs(...)
9612 for guaranteeing to flush prepared records of transactions before
9613 flushing them to binary log, which is required by crash recovery.
9614 */
9615 THD *first_seen= stage_manager.fetch_queue_for(Stage_manager::FLUSH_STAGE);
9616 assert(first_seen != NULL);
9617 /*
9618 We flush prepared records of transactions to the log of storage
9619 engine (for example, InnoDB redo log) in a group right before
9620 flushing them to binary log.
9621 */
9622 ha_flush_logs(NULL, true);
9623 DBUG_EXECUTE_IF("crash_after_flush_engine_log", DBUG_SUICIDE(););
9624 assign_automatic_gtids_to_flush_group(first_seen);
9625 /* Flush thread caches to binary log. */
9626 for (THD *head= first_seen ; head ; head = head->next_to_commit)
9627 {
9628 std::pair<int,my_off_t> result= flush_thread_caches(head);
9629 total_bytes+= result.second;
9630 if (flush_error == 1)
9631 flush_error= result.first;
9632 #ifndef NDEBUG
9633 no_flushes++;
9634 #endif
9635 }
9636
9637 *out_queue_var= first_seen;
9638 *total_bytes_var= total_bytes;
9639 if (total_bytes > 0 && my_b_tell(&log_file) >= (my_off_t) max_size)
9640 *rotate_var= true;
9641 #ifndef NDEBUG
9642 DBUG_PRINT("info",("no_flushes:= %d", no_flushes));
9643 no_flushes= 0;
9644 #endif
9645 DBUG_RETURN(flush_error);
9646 }
9647
9648 /**
9649 Commit a sequence of sessions.
9650
9651 This function commit an entire queue of sessions starting with the
9652 session in @c first. If there were an error in the flushing part of
9653 the ordered commit, the error code is passed in and all the threads
9654 are marked accordingly (but not committed).
9655
9656 It will also add the GTIDs of the transactions to gtid_executed.
9657
9658 @see MYSQL_BIN_LOG::ordered_commit
9659
9660 @param thd The "master" thread
9661 @param first First thread in the queue of threads to commit
9662 */
9663
9664 void
process_commit_stage_queue(THD * thd,THD * first)9665 MYSQL_BIN_LOG::process_commit_stage_queue(THD *thd, THD *first)
9666 {
9667 mysql_mutex_assert_owner(&LOCK_commit);
9668 #ifndef NDEBUG
9669 thd->get_transaction()->m_flags.ready_preempt= 1; // formality by the leader
9670 #endif
9671 for (THD *head= first ; head ; head = head->next_to_commit)
9672 {
9673 DBUG_PRINT("debug", ("Thread ID: %u, commit_error: %d, flags.pending: %s",
9674 head->thread_id(), head->commit_error,
9675 YESNO(head->get_transaction()->m_flags.pending)));
9676 /*
9677 If flushing failed, set commit_error for the session, skip the
9678 transaction and proceed with the next transaction instead. This
9679 will mark all threads as failed, since the flush failed.
9680
9681 If flush succeeded, attach to the session and commit it in the
9682 engines.
9683 */
9684 #ifndef NDEBUG
9685 stage_manager.clear_preempt_status(head);
9686 #endif
9687 if (head->get_transaction()->sequence_number != SEQ_UNINIT)
9688 {
9689 mysql_mutex_lock(&LOCK_slave_trans_dep_tracker);
9690 m_dependency_tracker.update_max_committed(head);
9691 mysql_mutex_unlock(&LOCK_slave_trans_dep_tracker);
9692 }
9693 /*
9694 Flush/Sync error should be ignored and continue
9695 to commit phase. And thd->commit_error cannot be
9696 COMMIT_ERROR at this moment.
9697 */
9698 assert(head->commit_error != THD::CE_COMMIT_ERROR);
9699 #ifndef EMBEDDED_LIBRARY
9700 Thd_backup_and_restore switch_thd(thd, head);
9701 #endif /* !EMBEDDED_LIBRARY */
9702 bool all= head->get_transaction()->m_flags.real_commit;
9703 if (head->get_transaction()->m_flags.commit_low)
9704 {
9705 /* head is parked to have exited append() */
9706 assert(head->get_transaction()->m_flags.ready_preempt);
9707 /*
9708 storage engine commit
9709 */
9710 if (ha_commit_low(head, all, false))
9711 head->commit_error= THD::CE_COMMIT_ERROR;
9712 }
9713 DBUG_PRINT("debug", ("commit_error: %d, flags.pending: %s",
9714 head->commit_error,
9715 YESNO(head->get_transaction()->m_flags.pending)));
9716 }
9717
9718 /*
9719 Handle the GTID of the threads.
9720 gtid_executed table is kept updated even though transactions fail to be
9721 logged. That's required by slave auto positioning.
9722 */
9723 gtid_state->update_commit_group(first);
9724
9725 for (THD *head= first ; head ; head = head->next_to_commit)
9726 {
9727 /*
9728 Decrement the prepared XID counter after storage engine commit.
9729 We also need decrement the prepared XID when encountering a
9730 flush error or session attach error for avoiding 3-way deadlock
9731 among user thread, rotate thread and dump thread.
9732 */
9733 if (head->get_transaction()->m_flags.xid_written)
9734 dec_prep_xids(head);
9735 }
9736 }
9737
9738 /**
9739 Process after commit for a sequence of sessions.
9740
9741 @param thd The "master" thread
9742 @param first First thread in the queue of threads to commit
9743 */
9744
9745 void
process_after_commit_stage_queue(THD * thd,THD * first)9746 MYSQL_BIN_LOG::process_after_commit_stage_queue(THD *thd, THD *first)
9747 {
9748 for (THD *head= first; head; head= head->next_to_commit)
9749 {
9750 if (head->get_transaction()->m_flags.run_hooks &&
9751 head->commit_error != THD::CE_COMMIT_ERROR)
9752 {
9753
9754 /*
9755 TODO: This hook here should probably move outside/below this
9756 if and be the only after_commit invocation left in the
9757 code.
9758 */
9759 #ifndef EMBEDDED_LIBRARY
9760 Thd_backup_and_restore switch_thd(thd, head);
9761 #endif /* !EMBEDDED_LIBRARY */
9762 bool all= head->get_transaction()->m_flags.real_commit;
9763 (void) RUN_HOOK(transaction, after_commit, (head, all));
9764 /*
9765 When after_commit finished for the transaction, clear the run_hooks flag.
9766 This allow other parts of the system to check if after_commit was called.
9767 */
9768 head->get_transaction()->m_flags.run_hooks= false;
9769 }
9770 }
9771 }
9772
9773 #ifndef NDEBUG
9774 /** Names for the stages. */
9775 static const char* g_stage_name[] = {
9776 "FLUSH",
9777 "SYNC",
9778 "COMMIT",
9779 };
9780 #endif
9781
9782
9783 /**
9784 Enter a stage of the ordered commit procedure.
9785
9786 Entering is stage is done by:
9787
9788 - Atomically enqueueing a queue of processes (which is just one for
9789 the first phase).
9790
9791 - If the queue was empty, the thread is the leader for that stage
9792 and it should process the entire queue for that stage.
9793
9794 - If the queue was not empty, the thread is a follower and can go
9795 waiting for the commit to finish.
9796
9797 The function will lock the stage mutex if it was designated the
9798 leader for the phase.
9799
9800 @param thd Session structure
9801 @param stage The stage to enter
9802 @param queue Queue of threads to enqueue for the stage
9803 @param stage_mutex Mutex for the stage
9804
9805 @retval true The thread should "bail out" and go waiting for the
9806 commit to finish
9807 @retval false The thread is the leader for the stage and should do
9808 the processing.
9809 */
9810
9811 bool
change_stage(THD * thd,Stage_manager::StageID stage,THD * queue,mysql_mutex_t * leave_mutex,mysql_mutex_t * enter_mutex)9812 MYSQL_BIN_LOG::change_stage(THD *thd,
9813 Stage_manager::StageID stage, THD *queue,
9814 mysql_mutex_t *leave_mutex,
9815 mysql_mutex_t *enter_mutex)
9816 {
9817 DBUG_ENTER("MYSQL_BIN_LOG::change_stage");
9818 DBUG_PRINT("enter", ("thd: 0x%llx, stage: %s, queue: 0x%llx",
9819 (ulonglong) thd, g_stage_name[stage], (ulonglong) queue));
9820 assert(0 <= stage && stage < Stage_manager::STAGE_COUNTER);
9821 assert(enter_mutex);
9822 assert(queue);
9823 /*
9824 enroll_for will release the leave_mutex once the sessions are
9825 queued.
9826 */
9827 if (!stage_manager.enroll_for(stage, queue, leave_mutex))
9828 {
9829 assert(!thd_get_cache_mngr(thd)->dbug_any_finalized());
9830 DBUG_RETURN(true);
9831 }
9832
9833 /*
9834 We do not lock the enter_mutex if it is LOCK_log when rotating binlog
9835 caused by logging incident log event, since it is already locked.
9836 */
9837 bool need_lock_enter_mutex=
9838 !(is_rotating_caused_by_incident && enter_mutex == &LOCK_log);
9839
9840 if (need_lock_enter_mutex)
9841 mysql_mutex_lock(enter_mutex);
9842 else
9843 mysql_mutex_assert_owner(enter_mutex);
9844
9845 DBUG_RETURN(false);
9846 }
9847
9848
9849
9850 /**
9851 Flush the I/O cache to file.
9852
9853 Flush the binary log to the binlog file if any byte where written
9854 and signal that the binary log file has been updated if the flush
9855 succeeds.
9856 */
9857
9858 int
flush_cache_to_file(my_off_t * end_pos_var)9859 MYSQL_BIN_LOG::flush_cache_to_file(my_off_t *end_pos_var)
9860 {
9861 if (flush_io_cache(&log_file))
9862 {
9863 THD *thd= current_thd;
9864 thd->commit_error= THD::CE_FLUSH_ERROR;
9865 return ER_ERROR_ON_WRITE;
9866 }
9867 *end_pos_var= my_b_tell(&log_file);
9868 return 0;
9869 }
9870
9871
9872 /**
9873 Call fsync() to sync the file to disk.
9874 */
9875 std::pair<bool, bool>
sync_binlog_file(bool force)9876 MYSQL_BIN_LOG::sync_binlog_file(bool force)
9877 {
9878 bool synced= false;
9879 unsigned int sync_period= get_sync_period();
9880 if (force || (sync_period && ++sync_counter >= sync_period))
9881 {
9882 sync_counter= 0;
9883
9884 /**
9885 On *pure non-transactional* workloads there is a small window
9886 in time where a concurrent rotate might be able to close
9887 the file before the sync is actually done. In that case,
9888 ignore the bad file descriptor errors.
9889
9890 Transactional workloads (InnoDB) are not affected since the
9891 the rotation will not happen until all transactions have
9892 committed to the storage engine, thence decreased the XID
9893 counters.
9894
9895 TODO: fix this properly even for non-transactional storage
9896 engines.
9897 */
9898 if (DBUG_EVALUATE_IF("simulate_error_during_sync_binlog_file", 1,
9899 mysql_file_sync(log_file.file,
9900 MYF(MY_WME | MY_IGNORE_BADFD))))
9901 {
9902 THD *thd= current_thd;
9903 thd->commit_error= THD::CE_SYNC_ERROR;
9904 return std::make_pair(true, synced);
9905 }
9906 synced= true;
9907 }
9908 return std::make_pair(false, synced);
9909 }
9910
9911
9912 /**
9913 Helper function executed when leaving @c ordered_commit.
9914
9915 This function contain the necessary code for fetching the error
9916 code, doing post-commit checks, and wrapping up the commit if
9917 necessary.
9918
9919 It is typically called when enter_stage indicates that the thread
9920 should bail out, and also when the ultimate leader thread finishes
9921 executing @c ordered_commit.
9922
9923 It is typically used in this manner:
9924 @code
9925 if (enter_stage(thd, Thread_queue::FLUSH_STAGE, thd, &LOCK_log))
9926 return finish_commit(thd);
9927 @endcode
9928
9929 @return Error code if the session commit failed, or zero on
9930 success.
9931 */
9932 int
finish_commit(THD * thd)9933 MYSQL_BIN_LOG::finish_commit(THD *thd)
9934 {
9935 DBUG_ENTER("MYSQL_BIN_LOG::finish_commit");
9936 DEBUG_SYNC(thd, "reached_finish_commit");
9937 /*
9938 In some unlikely situations, it can happen that binary
9939 log is closed before the thread flushes it's cache.
9940 In that case, clear the caches before doing commit.
9941 */
9942 if (unlikely(!is_open()))
9943 {
9944 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
9945 if (cache_mngr)
9946 cache_mngr->reset();
9947 }
9948 if (thd->get_transaction()->sequence_number != SEQ_UNINIT)
9949 {
9950 mysql_mutex_lock(&LOCK_slave_trans_dep_tracker);
9951 m_dependency_tracker.update_max_committed(thd);
9952 mysql_mutex_unlock(&LOCK_slave_trans_dep_tracker);
9953 }
9954 if (thd->get_transaction()->m_flags.commit_low)
9955 {
9956 const bool all= thd->get_transaction()->m_flags.real_commit;
9957 /*
9958 Now flush error and sync erros are ignored and we are continuing and
9959 committing. And at this time, commit_error cannot be COMMIT_ERROR.
9960 */
9961 assert(thd->commit_error != THD::CE_COMMIT_ERROR);
9962
9963 /*
9964 Acquire a shared lock to block commits if an X lock has been acquired by
9965 LOCK TABLES FOR BACKUP or START TRANSACTION WITH CONSISTENT SNAPSHOT. We
9966 only reach this code if binlog_order_commits=0.
9967 */
9968 assert(opt_binlog_order_commits == 0);
9969
9970 slock();
9971
9972 /*
9973 storage engine commit
9974 */
9975 if (ha_commit_low(thd, all, false))
9976 thd->commit_error= THD::CE_COMMIT_ERROR;
9977
9978 sunlock();
9979 /*
9980 Decrement the prepared XID counter after storage engine commit
9981 */
9982 if (thd->get_transaction()->m_flags.xid_written)
9983 dec_prep_xids(thd);
9984 /*
9985 If commit succeeded, we call the after_commit hook
9986
9987 TODO: This hook here should probably move outside/below this
9988 if and be the only after_commit invocation left in the
9989 code.
9990 */
9991 if ((thd->commit_error != THD::CE_COMMIT_ERROR) &&
9992 thd->get_transaction()->m_flags.run_hooks)
9993 {
9994 (void) RUN_HOOK(transaction, after_commit, (thd, all));
9995 thd->get_transaction()->m_flags.run_hooks= false;
9996 }
9997 }
9998 else if (thd->get_transaction()->m_flags.xid_written)
9999 dec_prep_xids(thd);
10000
10001 /*
10002 If the ordered commit didn't updated the GTIDs for this thd yet
10003 at process_commit_stage_queue (i.e. --binlog-order-commits=0)
10004 the thd still has the ownership of a GTID and we must handle it.
10005 */
10006 if (!thd->owned_gtid.is_empty())
10007 {
10008 /*
10009 Gtid is added to gtid_state.executed_gtids and removed from owned_gtids
10010 on update_on_commit().
10011 */
10012 if (thd->commit_error == THD::CE_NONE)
10013 {
10014 gtid_state->update_on_commit(thd);
10015 }
10016 else
10017 gtid_state->update_on_rollback(thd);
10018 }
10019
10020 DBUG_EXECUTE_IF("leaving_finish_commit",
10021 {
10022 const char act[]=
10023 "now SIGNAL signal_leaving_finish_commit";
10024 assert(!debug_sync_set_action(current_thd,
10025 STRING_WITH_LEN(act)));
10026 };);
10027
10028 assert(thd->commit_error || !thd->get_transaction()->m_flags.run_hooks);
10029 assert(!thd_get_cache_mngr(thd)->dbug_any_finalized());
10030 DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d",
10031 thd->thread_id(), thd->commit_error));
10032 /*
10033 flush or sync errors are handled by the leader of the group
10034 (using binlog_error_action). Hence treat only COMMIT_ERRORs as errors.
10035 */
10036 DBUG_RETURN(thd->commit_error == THD::CE_COMMIT_ERROR);
10037 }
10038
10039 /**
10040 Auxiliary function used in ordered_commit.
10041 */
call_after_sync_hook(THD * queue_head)10042 static inline int call_after_sync_hook(THD *queue_head)
10043 {
10044 const char *log_file= NULL;
10045 my_off_t pos= 0;
10046
10047 if (NO_HOOK(binlog_storage))
10048 return 0;
10049
10050 assert(queue_head != NULL);
10051 for (THD *thd= queue_head; thd != NULL; thd= thd->next_to_commit)
10052 if (likely(thd->commit_error == THD::CE_NONE))
10053 thd->get_trans_fixed_pos(&log_file, &pos);
10054
10055 if (DBUG_EVALUATE_IF("simulate_after_sync_hook_error", 1, 0) ||
10056 RUN_HOOK(binlog_storage, after_sync, (queue_head, log_file, pos)))
10057 {
10058 sql_print_error("Failed to run 'after_sync' hooks");
10059 return ER_ERROR_ON_WRITE;
10060 }
10061 return 0;
10062 }
10063
10064 /**
10065 Helper function to handle flush or sync stage errors.
10066 If binlog_error_action= ABORT_SERVER, server will be aborted
10067 after reporting the error to the client.
10068 If binlog_error_action= IGNORE_ERROR, binlog will be closed
10069 for the reset of the life time of the server. close() call is protected
10070 with LOCK_log to avoid any parallel operations on binary log.
10071
10072 @param thd Thread object that faced flush/sync error
10073 @param need_lock_log
10074 > Indicates true if LOCk_log is needed before closing
10075 binlog (happens when we are handling sync error)
10076 > Indicates false if LOCK_log is already acquired
10077 by the thread (happens when we are handling flush
10078 error)
10079 @param message Message stating the reason of the failure
10080
10081 @return void
10082 */
handle_binlog_flush_or_sync_error(THD * thd,bool need_lock_log,const char * message)10083 void MYSQL_BIN_LOG::handle_binlog_flush_or_sync_error(THD *thd,
10084 bool need_lock_log,
10085 const char* message)
10086 {
10087 char errmsg[MYSQL_ERRMSG_SIZE]= {0};
10088 if (!message)
10089 sprintf(errmsg, "An error occurred during %s stage of the commit. "
10090 "'binlog_error_action' is set to '%s'.",
10091 thd->commit_error== THD::CE_FLUSH_ERROR ? "flush" : "sync",
10092 binlog_error_action == ABORT_SERVER ? "ABORT_SERVER" : "IGNORE_ERROR");
10093 else
10094 strncpy(errmsg, message, MYSQL_ERRMSG_SIZE-1);
10095 if (binlog_error_action == ABORT_SERVER)
10096 {
10097 char err_buff[MYSQL_ERRMSG_SIZE + 25];
10098 sprintf(err_buff, "%s Server is being stopped.", errmsg);
10099 exec_binlog_error_action_abort(err_buff);
10100 }
10101 else
10102 {
10103 DEBUG_SYNC(thd, "before_binlog_closed_due_to_error");
10104 if (need_lock_log)
10105 mysql_mutex_lock(&LOCK_log);
10106 else
10107 mysql_mutex_assert_owner(&LOCK_log);
10108 /*
10109 It can happen that other group leader encountered
10110 error and already closed the binary log. So print
10111 error only if it is in open state. But we should
10112 call close() always just in case if the previous
10113 close did not close index file.
10114 */
10115 if (is_open())
10116 {
10117 sql_print_error("%s Hence turning logging off for the whole duration "
10118 "of the MySQL server process. To turn it on again: fix "
10119 "the cause, shutdown the MySQL server and restart it.",
10120 errmsg);
10121 }
10122 close(LOG_CLOSE_INDEX|LOG_CLOSE_STOP_EVENT, false/*need_lock_log=false*/,
10123 true/*need_lock_index=true*/);
10124 /*
10125 If there is a write error (flush/sync stage) and if
10126 binlog_error_action=IGNORE_ERROR, clear the error
10127 and allow the commit to happen in storage engine.
10128 */
10129 if (check_write_error(thd)) { /* we have DA_ERROR */
10130 thd->clear_error(); /* sets thd->get_stmt_da()->status() to DA_EMPTY */
10131 /* For SQLCOM_COMMIT, ROLLBACK, ROLLBACK TO SAVEPOINT, there is already
10132 my_ok() in mysql_execute_command. Doing double my_ok() is not allowed. So
10133 we avoid that here */
10134 if (thd_sql_command(thd) != SQLCOM_COMMIT &&
10135 thd_sql_command(thd) != SQLCOM_ROLLBACK &&
10136 thd_sql_command(thd) != SQLCOM_ROLLBACK_TO_SAVEPOINT) {
10137 my_ok(thd); /* sets thd->get_stmt_da()->status() to DA_OK */
10138 }
10139 }
10140
10141 if (need_lock_log)
10142 mysql_mutex_unlock(&LOCK_log);
10143 DEBUG_SYNC(thd, "after_binlog_closed_due_to_error");
10144 }
10145 }
10146 /**
10147 Flush and commit the transaction.
10148
10149 This will execute an ordered flush and commit of all outstanding
10150 transactions and is the main function for the binary log group
10151 commit logic. The function performs the ordered commit in two
10152 phases.
10153
10154 The first phase flushes the caches to the binary log and under
10155 LOCK_log and marks all threads that were flushed as not pending.
10156
10157 The second phase executes under LOCK_commit and commits all
10158 transactions in order.
10159
10160 The procedure is:
10161
10162 1. Queue ourselves for flushing.
10163 2. Grab the log lock, which might result is blocking if the mutex is
10164 already held by another thread.
10165 3. If we were not committed while waiting for the lock
10166 1. Fetch the queue
10167 2. For each thread in the queue:
10168 a. Attach to it
10169 b. Flush the caches, saving any error code
10170 3. Flush and sync (depending on the value of sync_binlog).
10171 4. Signal that the binary log was updated
10172 4. Release the log lock
10173 5. Grab the commit lock
10174 1. For each thread in the queue:
10175 a. If there were no error when flushing and the transaction shall be committed:
10176 - Commit the transaction, saving the result of executing the commit.
10177 6. Release the commit lock
10178 7. Call purge, if any of the committed thread requested a purge.
10179 8. Return with the saved error code
10180
10181 @todo The use of @c skip_commit is a hack that we use since the @c
10182 TC_LOG Interface does not contain functions to handle
10183 savepoints. Once the binary log is eliminated as a handlerton and
10184 the @c TC_LOG interface is extended with savepoint handling, this
10185 parameter can be removed.
10186
10187 @param thd Session to commit transaction for
10188 @param all This is @c true if this is a real transaction commit, and
10189 @c false otherwise.
10190 @param skip_commit
10191 This is @c true if the call to @c ha_commit_low should
10192 be skipped (it is handled by the caller somehow) and @c
10193 false otherwise (the normal case).
10194 */
prepare_ordered_commit(THD * thd,bool all,bool skip_commit)10195 int MYSQL_BIN_LOG::prepare_ordered_commit(THD *thd, bool all,
10196 bool skip_commit)
10197 {
10198 DBUG_ENTER("MYSQL_BIN_LOG::prepare_ordered_commit");
10199
10200 /*
10201 These values are used while flushing a transaction, so clear
10202 everything.
10203
10204 Notes:
10205
10206 - It would be good if we could keep transaction coordinator
10207 log-specific data out of the THD structure, but that is not the
10208 case right now.
10209
10210 - Everything in the transaction structure is reset when calling
10211 ha_commit_low since that calls Transaction_ctx::cleanup.
10212 */
10213 thd->get_transaction()->m_flags.pending= true;
10214 thd->commit_error= THD::CE_NONE;
10215 thd->next_to_commit= NULL;
10216 thd->durability_property= HA_IGNORE_DURABILITY;
10217 thd->get_transaction()->m_flags.real_commit= all;
10218 thd->get_transaction()->m_flags.xid_written= false;
10219 thd->get_transaction()->m_flags.commit_low= !skip_commit;
10220 thd->get_transaction()->m_flags.run_hooks= !skip_commit;
10221 #ifndef NDEBUG
10222 /*
10223 The group commit Leader may have to wait for follower whose transaction
10224 is not ready to be preempted. Initially the status is pessimistic.
10225 Preemption guarding logics is necessary only when !NDEBUG is set.
10226 It won't be required for the dbug-off case as long as the follower won't
10227 execute any thread-specific write access code in this method, which is
10228 the case as of current.
10229 */
10230 thd->get_transaction()->m_flags.ready_preempt= 0;
10231 #endif
10232
10233 DBUG_PRINT("enter", ("flags.pending: %s, commit_error: %d, thread_id: %u",
10234 YESNO(thd->get_transaction()->m_flags.pending),
10235 thd->commit_error, thd->thread_id()));
10236
10237 DEBUG_SYNC(thd, "bgc_before_flush_stage");
10238
10239 /*
10240 Stage #1: flushing transactions to binary log
10241
10242 While flushing, we allow new threads to enter and will process
10243 them in due time. Once the queue was empty, we cannot reap
10244 anything more since it is possible that a thread entered and
10245 appointed itself leader for the flush phase.
10246 */
10247
10248 #ifdef HAVE_REPLICATION
10249 if (has_commit_order_manager(thd))
10250 {
10251 Slave_worker *worker= dynamic_cast<Slave_worker *>(thd->rli_slave);
10252 Commit_order_manager *mngr= worker->get_commit_order_manager();
10253
10254 if (mngr->wait_for_its_turn(worker, all))
10255 {
10256 thd->commit_error= THD::CE_COMMIT_ERROR;
10257 DBUG_RETURN(thd->commit_error);
10258 }
10259 }
10260 #endif
10261
10262 DBUG_RETURN(0); /* no error */
10263 }
10264
10265
ordered_commit(THD * thd)10266 int MYSQL_BIN_LOG::ordered_commit(THD *thd)
10267 {
10268 DBUG_ENTER("MYSQL_BIN_LOG::ordered_commit");
10269 int flush_error= 0, sync_error= 0;
10270 my_off_t total_bytes= 0;
10271 bool do_rotate= false;
10272
10273 if (change_stage(thd, Stage_manager::FLUSH_STAGE, thd, NULL, &LOCK_log))
10274 {
10275 DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d",
10276 thd->thread_id(), thd->commit_error));
10277 DBUG_RETURN(finish_commit(thd));
10278 }
10279
10280 THD *wait_queue= NULL, *final_queue= NULL;
10281 mysql_mutex_t *leave_mutex_before_commit_stage= NULL;
10282 my_off_t flush_end_pos= 0;
10283 bool update_binlog_end_pos_after_sync;
10284 if (unlikely(!is_open()))
10285 {
10286 final_queue= stage_manager.fetch_queue_for(Stage_manager::FLUSH_STAGE);
10287 leave_mutex_before_commit_stage= &LOCK_log;
10288 /*
10289 binary log is closed, flush stage and sync stage should be
10290 ignored. Binlog cache should be cleared, but instead of doing
10291 it here, do that work in 'finish_commit' function so that
10292 leader and followers thread caches will be cleared.
10293 */
10294 goto commit_stage;
10295 }
10296 DEBUG_SYNC(thd, "waiting_in_the_middle_of_flush_stage");
10297 flush_error= process_flush_stage_queue(&total_bytes, &do_rotate,
10298 &wait_queue);
10299
10300 if (flush_error == 0 && total_bytes > 0)
10301 flush_error= flush_cache_to_file(&flush_end_pos);
10302 DBUG_EXECUTE_IF("crash_after_flush_binlog", DBUG_SUICIDE(););
10303
10304 update_binlog_end_pos_after_sync= (get_sync_period() == 1);
10305
10306 /*
10307 If the flush finished successfully, we can call the after_flush
10308 hook. Being invoked here, we have the guarantee that the hook is
10309 executed before the before/after_send_hooks on the dump thread
10310 preventing race conditions among these plug-ins.
10311 */
10312 if (flush_error == 0)
10313 {
10314 const char *file_name_ptr= log_file_name + dirname_length(log_file_name);
10315 assert(flush_end_pos != 0);
10316 if (RUN_HOOK(binlog_storage, after_flush,
10317 (thd, file_name_ptr, flush_end_pos)))
10318 {
10319 sql_print_error("Failed to run 'after_flush' hooks");
10320 flush_error= ER_ERROR_ON_WRITE;
10321 }
10322
10323 if (!update_binlog_end_pos_after_sync)
10324 update_binlog_end_pos();
10325 DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
10326 }
10327
10328 if (flush_error)
10329 {
10330 /*
10331 Handle flush error (if any) after leader finishes it's flush stage.
10332 */
10333 handle_binlog_flush_or_sync_error(thd, false /* need_lock_log */,
10334 (thd->commit_error == THD::CE_FLUSH_GNO_EXHAUSTED_ERROR)
10335 ? ER(ER_GNO_EXHAUSTED) : NULL);
10336 }
10337
10338 publish_coordinates_for_global_status();
10339
10340 DEBUG_SYNC(thd, "bgc_after_flush_stage_before_sync_stage");
10341
10342 /*
10343 Stage #2: Syncing binary log file to disk
10344 */
10345
10346 if (change_stage(thd, Stage_manager::SYNC_STAGE, wait_queue, &LOCK_log, &LOCK_sync))
10347 {
10348 DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d",
10349 thd->thread_id(), thd->commit_error));
10350 DBUG_RETURN(finish_commit(thd));
10351 }
10352
10353 /*
10354 Shall introduce a delay only if it is going to do sync
10355 in this ongoing SYNC stage. The "+1" used below in the
10356 if condition is to count the ongoing sync stage.
10357 When sync_binlog=0 (where we never do sync in BGC group),
10358 it is considered as a special case and delay will be executed
10359 for every group just like how it is done when sync_binlog= 1.
10360 */
10361 if (!flush_error && (sync_counter + 1 >= get_sync_period()))
10362 stage_manager.wait_count_or_timeout(opt_binlog_group_commit_sync_no_delay_count,
10363 opt_binlog_group_commit_sync_delay,
10364 Stage_manager::SYNC_STAGE);
10365
10366 final_queue= stage_manager.fetch_queue_for(Stage_manager::SYNC_STAGE);
10367
10368 if (flush_error == 0 && total_bytes > 0)
10369 {
10370 DEBUG_SYNC(thd, "before_sync_binlog_file");
10371 std::pair<bool, bool> result= sync_binlog_file(false);
10372 sync_error= result.first;
10373 }
10374
10375 if (update_binlog_end_pos_after_sync)
10376 {
10377 THD *tmp_thd= final_queue;
10378 const char *binlog_file= NULL;
10379 my_off_t pos= 0;
10380 while (tmp_thd->next_to_commit != NULL)
10381 tmp_thd= tmp_thd->next_to_commit;
10382 if (flush_error == 0 && sync_error == 0)
10383 {
10384 tmp_thd->get_trans_fixed_pos(&binlog_file, &pos);
10385 update_binlog_end_pos(binlog_file, pos);
10386 }
10387 }
10388
10389 DEBUG_SYNC(thd, "bgc_after_sync_stage_before_commit_stage");
10390
10391 leave_mutex_before_commit_stage= &LOCK_sync;
10392 /*
10393 Stage #3: Commit all transactions in order.
10394
10395 This stage is skipped if we do not need to order the commits and
10396 each thread have to execute the handlerton commit instead.
10397
10398 Howver, since we are keeping the lock from the previous stage, we
10399 need to unlock it if we skip the stage.
10400
10401 We must also step commit_clock before the ha_commit_low() is called
10402 either in ordered fashion(by the leader of this stage) or by the tread
10403 themselves.
10404
10405 We are delaying the handling of sync error until
10406 all locks are released but we should not enter into
10407 commit stage if binlog_error_action is ABORT_SERVER.
10408 */
10409 commit_stage:
10410 if (opt_binlog_order_commits &&
10411 (sync_error == 0 || binlog_error_action != ABORT_SERVER))
10412 {
10413 if (change_stage(thd, Stage_manager::COMMIT_STAGE,
10414 final_queue, leave_mutex_before_commit_stage,
10415 &LOCK_commit))
10416 {
10417 DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d",
10418 thd->thread_id(), thd->commit_error));
10419 DBUG_RETURN(finish_commit(thd));
10420 }
10421 THD *commit_queue= stage_manager.fetch_queue_for(Stage_manager::COMMIT_STAGE);
10422 DBUG_EXECUTE_IF("semi_sync_3-way_deadlock",
10423 DEBUG_SYNC(thd, "before_process_commit_stage_queue"););
10424
10425 if (flush_error == 0 && sync_error == 0)
10426 sync_error= call_after_sync_hook(commit_queue);
10427
10428 /*
10429 process_commit_stage_queue will call update_on_commit or
10430 update_on_rollback for the GTID owned by each thd in the queue.
10431
10432 This will be done this way to guarantee that GTIDs are added to
10433 gtid_executed in order, to avoid creating unnecessary temporary
10434 gaps and keep gtid_executed as a single interval at all times.
10435
10436 If we allow each thread to call update_on_commit only when they
10437 are at finish_commit, the GTID order cannot be guaranteed and
10438 temporary gaps may appear in gtid_executed. When this happen,
10439 the server would have to add and remove intervals from the
10440 Gtid_set, and adding and removing intervals requires a mutex,
10441 which would reduce performance.
10442 */
10443 process_commit_stage_queue(thd, commit_queue);
10444 mysql_mutex_unlock(&LOCK_commit);
10445 /*
10446 Process after_commit after LOCK_commit is released for avoiding
10447 3-way deadlock among user thread, rotate thread and dump thread.
10448 */
10449 process_after_commit_stage_queue(thd, commit_queue);
10450 final_queue= commit_queue;
10451 }
10452 else
10453 {
10454 if (leave_mutex_before_commit_stage)
10455 mysql_mutex_unlock(leave_mutex_before_commit_stage);
10456 if (flush_error == 0 && sync_error == 0)
10457 sync_error= call_after_sync_hook(final_queue);
10458 }
10459
10460 /*
10461 Handle sync error after we release all locks in order to avoid deadlocks
10462 */
10463 if (sync_error)
10464 handle_binlog_flush_or_sync_error(thd, true /* need_lock_log */, NULL);
10465
10466 /* Commit done so signal all waiting threads */
10467 stage_manager.signal_done(final_queue);
10468
10469 /*
10470 Finish the commit before executing a rotate, or run the risk of a
10471 deadlock. We don't need the return value here since it is in
10472 thd->commit_error, which is returned below.
10473 */
10474 (void) finish_commit(thd);
10475
10476 /*
10477 If we need to rotate, we do it without commit error.
10478 Otherwise the thd->commit_error will be possibly reset.
10479 */
10480 if (DBUG_EVALUATE_IF("force_rotate", 1, 0) ||
10481 (do_rotate && thd->commit_error == THD::CE_NONE &&
10482 !is_rotating_caused_by_incident))
10483 {
10484 /*
10485 Do not force the rotate as several consecutive groups may
10486 request unnecessary rotations.
10487
10488 NOTE: Run purge_logs wo/ holding LOCK_log because it does not
10489 need the mutex. Otherwise causes various deadlocks.
10490 */
10491
10492 DEBUG_SYNC(thd, "ready_to_do_rotation");
10493 bool check_purge= false;
10494 mysql_mutex_lock(&LOCK_log);
10495 /*
10496 If rotate fails then depends on binlog_error_action variable
10497 appropriate action will be taken inside rotate call.
10498 */
10499 int error= rotate(false, &check_purge);
10500 mysql_mutex_unlock(&LOCK_log);
10501
10502 if (error)
10503 thd->commit_error= THD::CE_COMMIT_ERROR;
10504 else if (check_purge)
10505 purge();
10506 }
10507
10508 #ifdef HAVE_REPLICATION
10509 if (binlog_space_limit && binlog_space_total &&
10510 binlog_space_total + my_b_tell(&log_file) > binlog_space_limit)
10511 purge_logs_by_size(true);
10512 #endif
10513
10514 /*
10515 flush or sync errors are handled above (using binlog_error_action).
10516 Hence treat only COMMIT_ERRORs as errors.
10517 */
10518 DBUG_RETURN(thd->commit_error == THD::CE_COMMIT_ERROR);
10519 }
10520
10521
10522 /**
10523 MYSQLD server recovers from last crashed binlog.
10524
10525 @param log IO_CACHE of the crashed binlog.
10526 @param fdle Format_description_log_event of the crashed binlog.
10527 @param valid_pos The position of the last valid transaction or
10528 event(non-transaction) of the crashed binlog.
10529
10530 @retval
10531 0 ok
10532 @retval
10533 1 error
10534 */
recover(IO_CACHE * log,Format_description_log_event * fdle,my_off_t * valid_pos)10535 int MYSQL_BIN_LOG::recover(IO_CACHE *log, Format_description_log_event *fdle,
10536 my_off_t *valid_pos)
10537 {
10538 Log_event *ev;
10539 HASH xids;
10540 MEM_ROOT mem_root;
10541 /*
10542 The flag is used for handling the case that a transaction
10543 is partially written to the binlog.
10544 */
10545 bool in_transaction= FALSE;
10546 int memory_page_size= my_getpagesize();
10547
10548 if (! fdle->is_valid() ||
10549 my_hash_init(&xids, &my_charset_bin, memory_page_size/3, 0,
10550 sizeof(my_xid), 0, 0, 0,
10551 key_memory_binlog_recover_exec))
10552 goto err1;
10553
10554 init_alloc_root(key_memory_binlog_recover_exec,
10555 &mem_root, memory_page_size, memory_page_size);
10556
10557 while ((ev= Log_event::read_log_event(log, 0, fdle, TRUE))
10558 && ev->is_valid())
10559 {
10560 if (ev->get_type_code() == binary_log::QUERY_EVENT &&
10561 !strcmp(((Query_log_event*)ev)->query, "BEGIN"))
10562 in_transaction= TRUE;
10563
10564 if (ev->get_type_code() == binary_log::QUERY_EVENT &&
10565 !strcmp(((Query_log_event*)ev)->query, "COMMIT"))
10566 {
10567 assert(in_transaction == TRUE);
10568 in_transaction= FALSE;
10569 }
10570 else if (ev->get_type_code() == binary_log::XID_EVENT)
10571 {
10572 assert(in_transaction == TRUE);
10573 in_transaction= FALSE;
10574 Xid_log_event *xev=(Xid_log_event *)ev;
10575 uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
10576 sizeof(xev->xid));
10577 if (!x || my_hash_insert(&xids, x))
10578 goto err2;
10579 }
10580 else if (ev->get_type_code() == binary_log::START_ENCRYPTION_EVENT &&
10581 fdle->start_decryption(static_cast<Start_encryption_log_event*>(ev)))
10582 {
10583 sql_print_warning("Error initializing decryption while crash_recovery.");
10584 goto err2;
10585 }
10586
10587 /*
10588 Recorded valid position for the crashed binlog file
10589 which did not contain incorrect events. The following
10590 positions increase the variable valid_pos:
10591
10592 1 -
10593 ...
10594 <---> HERE IS VALID <--->
10595 GTID
10596 BEGIN
10597 ...
10598 COMMIT
10599 ...
10600
10601 2 -
10602 ...
10603 <---> HERE IS VALID <--->
10604 GTID
10605 DDL/UTILITY
10606 ...
10607
10608 In other words, the following positions do not increase
10609 the variable valid_pos:
10610
10611 1 -
10612 GTID
10613 <---> HERE IS VALID <--->
10614 ...
10615
10616 2 -
10617 GTID
10618 BEGIN
10619 <---> HERE IS VALID <--->
10620 ...
10621 */
10622 if (!log->error && !in_transaction &&
10623 !is_gtid_event(ev))
10624 *valid_pos= my_b_tell(log);
10625
10626 delete ev;
10627 }
10628
10629 /*
10630 Call ha_recover if and only if there is a registered engine that
10631 does 2PC, otherwise in DBUG builds calling ha_recover directly
10632 will result in an assert. (Production builds would be safe since
10633 ha_recover returns right away if total_ha_2pc <= opt_log_bin.)
10634 */
10635 if (total_ha_2pc > 1 && ha_recover(&xids))
10636 goto err2;
10637
10638 free_root(&mem_root, MYF(0));
10639 my_hash_free(&xids);
10640 return 0;
10641
10642 err2:
10643 free_root(&mem_root, MYF(0));
10644 my_hash_free(&xids);
10645 err1:
10646 sql_print_error("Crash recovery failed. Either correct the problem "
10647 "(if it's, for example, out of memory error) and restart, "
10648 "or delete (or rename) binary log and start mysqld with "
10649 "--tc-heuristic-recover={commit|rollback}");
10650 return 1;
10651 }
10652
10653 /*
10654 Copy out the non-directory part of binlog position filename for the
10655 `binlog_snapshot_file' status variable, same way as it is done for
10656 SHOW MASTER STATUS.
10657 */
set_binlog_snapshot_file(const char * src)10658 static void set_binlog_snapshot_file(const char *src)
10659 {
10660 mysql_mutex_assert_owner(&LOCK_status);
10661
10662 int dir_len = dirname_length(src);
10663 strmake(binlog_snapshot_file, src + dir_len,
10664 sizeof(binlog_snapshot_file) - 1);
10665 }
10666
10667 /** Copy the current binlog coordinates to the variables used for the
10668 not-in-consistent-snapshot case of SHOW STATUS */
publish_coordinates_for_global_status(void) const10669 void MYSQL_BIN_LOG::publish_coordinates_for_global_status(void) const
10670 {
10671 mysql_mutex_assert_owner(&LOCK_log);
10672
10673 mysql_mutex_lock(&LOCK_status);
10674 strcpy(binlog_global_snapshot_file, log_file_name);
10675 binlog_global_snapshot_position=
10676 my_b_inited(&log_file) ? my_b_tell(&log_file) : 0;
10677 mysql_mutex_unlock(&LOCK_status);
10678 }
10679
10680
xlock(void)10681 void MYSQL_BIN_LOG::xlock(void)
10682 {
10683 mysql_mutex_lock(&LOCK_log);
10684
10685 assert(!snapshot_lock_acquired);
10686
10687 /*
10688 We must ensure that no writes to binlog and no commits to storage engines
10689 occur after function is called for START TRANSACTION FOR CONSISTENT
10690 SNAPSHOT. With binlog_order_commits=1 (the default) flushing to binlog is
10691 performed under the LOCK_log mutex and commits are done under the
10692 LOCK_commit mutex, both in the stage leader thread. So acquiring those 2
10693 mutexes is sufficient to guarantee atomicity.
10694
10695 With binlog_order_commits=0 commits are performed in parallel by separate
10696 threads with each acquiring a shared lock on LOCK_consistent_snapshot.
10697
10698 binlog_order_commits is a dynamic variable, so we have to keep track what
10699 primitives should be used in xunlock().
10700 */
10701 if (opt_binlog_order_commits)
10702 {
10703 mysql_mutex_lock(&LOCK_commit);
10704 }
10705 else
10706 {
10707 snapshot_lock_acquired= true;
10708 mysql_rwlock_wrlock(&LOCK_consistent_snapshot);
10709 }
10710 }
10711
10712
xunlock(void)10713 void MYSQL_BIN_LOG::xunlock(void)
10714 {
10715 if (!snapshot_lock_acquired)
10716 {
10717 mysql_mutex_unlock(&LOCK_commit);
10718 }
10719 else
10720 {
10721 mysql_rwlock_unlock(&LOCK_consistent_snapshot);
10722 snapshot_lock_acquired= false;
10723 }
10724
10725 mysql_mutex_unlock(&LOCK_log);
10726 }
10727
report_missing_purged_gtids(const Gtid_set * slave_executed_gtid_set,std::string & errmsg)10728 void MYSQL_BIN_LOG::report_missing_purged_gtids(
10729 const Gtid_set *slave_executed_gtid_set, std::string &errmsg)
10730 {
10731 DBUG_ENTER("MYSQL_BIN_LOG::report_missing_purged_gtids");
10732 THD *thd= current_thd;
10733 Gtid_set gtid_missing(gtid_state->get_lost_gtids()->get_sid_map());
10734 gtid_missing.add_gtid_set(gtid_state->get_lost_gtids());
10735 gtid_missing.remove_gtid_set(slave_executed_gtid_set);
10736
10737 String tmp_uuid;
10738 uchar name[]= "slave_uuid";
10739
10740 /* Protects thd->user_vars. */
10741 mysql_mutex_lock(&thd->LOCK_thd_data);
10742 user_var_entry *entry=
10743 (user_var_entry*) my_hash_search(&thd->user_vars, name, sizeof(name)-1);
10744 if (entry && entry->length() > 0)
10745 tmp_uuid.copy(entry->ptr(), entry->length(), NULL);
10746 mysql_mutex_unlock(&thd->LOCK_thd_data);
10747
10748
10749 char* missing_gtids= NULL;
10750 char* slave_executed_gtids= NULL;
10751 gtid_missing.to_string(&missing_gtids);
10752 slave_executed_gtid_set->to_string(&slave_executed_gtids);
10753
10754 /*
10755 Log the information about the missing purged GTIDs to the error log
10756 if the message is less than MAX_LOG_BUFFER_SIZE.
10757 */
10758 std::ostringstream log_info;
10759 log_info << "The missing transactions are '"<< missing_gtids <<"'";
10760 const char* log_msg= ER(ER_FOUND_MISSING_GTIDS);
10761
10762 /* Don't consider the "%s" in the format string. Subtract 2 from the
10763 total length */
10764 uint total_length= (strlen(log_msg) - 2 + log_info.str().length());
10765
10766 DBUG_EXECUTE_IF("simulate_long_missing_gtids",
10767 { total_length= MAX_LOG_BUFFER_SIZE + 1;});
10768
10769 if (total_length > MAX_LOG_BUFFER_SIZE)
10770 log_info.str("To find the missing purged transactions, run \"SELECT"
10771 " @@GLOBAL.GTID_PURGED\" on the master, then run \"SELECT"
10772 " CONCAT(RECEIVED_TRANSACTION_SET, ',', @@GLOBAL.GTID_EXECUTED)"
10773 " FROM PERFORMANCE_SCHEMA.replication_connection_status\" on"
10774 " the slave, and then run \"SELECT GTID_SUBTRACT(<master_set>,"
10775 " <slave_set>)\" on any server");
10776
10777 sql_print_warning(ER_THD(thd, ER_FOUND_MISSING_GTIDS), tmp_uuid.ptr(),
10778 log_info.str().c_str());
10779
10780 /*
10781 Send the information about the slave executed GTIDs and missing
10782 purged GTIDs to slave if the message is less than MYSQL_ERRMSG_SIZE.
10783 */
10784 std::ostringstream gtid_info;
10785 gtid_info << "The GTID set sent by the slave is '" << slave_executed_gtids
10786 << "', and the missing transactions are '"<< missing_gtids <<"'";
10787 errmsg.assign(ER_THD(thd, ER_MASTER_HAS_PURGED_REQUIRED_GTIDS));
10788
10789 /* Don't consider the "%s" in the format string. Subtract 2 from the
10790 total length */
10791 total_length= (errmsg.length() - 2 + gtid_info.str().length());
10792
10793 DBUG_EXECUTE_IF("simulate_long_missing_gtids",
10794 { total_length= MYSQL_ERRMSG_SIZE + 1;});
10795
10796 if (total_length > MYSQL_ERRMSG_SIZE)
10797 gtid_info.str("The GTID sets and the missing purged transactions are too"
10798 " long to print in this message. For more information,"
10799 " please see the master's error log or the manual for"
10800 " GTID_SUBTRACT");
10801
10802 /* Buffer for formatting the message about the missing GTIDs. */
10803 char buff[MYSQL_ERRMSG_SIZE];
10804 my_snprintf(buff, MYSQL_ERRMSG_SIZE, errmsg.c_str(), gtid_info.str().c_str());
10805 errmsg.assign(const_cast<const char*>(buff));
10806
10807 my_free(missing_gtids);
10808 my_free(slave_executed_gtids);
10809 DBUG_VOID_RETURN;
10810 }
10811
report_missing_gtids(const Gtid_set * previous_gtid_set,const Gtid_set * slave_executed_gtid_set,std::string & errmsg)10812 void MYSQL_BIN_LOG::report_missing_gtids(const Gtid_set* previous_gtid_set,
10813 const Gtid_set* slave_executed_gtid_set,
10814 std::string& errmsg)
10815 {
10816 DBUG_ENTER("MYSQL_BIN_LOG::report_missing_gtids");
10817 THD *thd=current_thd;
10818 char* missing_gtids= NULL;
10819 char* slave_executed_gtids= NULL;
10820 Gtid_set gtid_missing(slave_executed_gtid_set->get_sid_map());
10821 gtid_missing.add_gtid_set(slave_executed_gtid_set);
10822 gtid_missing.remove_gtid_set(previous_gtid_set);
10823 gtid_missing.to_string(&missing_gtids);
10824 slave_executed_gtid_set->to_string(&slave_executed_gtids);
10825
10826 String tmp_uuid;
10827 uchar name[]= "slave_uuid";
10828
10829 /* Protects thd->user_vars. */
10830 mysql_mutex_lock(&thd->LOCK_thd_data);
10831
10832 user_var_entry *entry=
10833 (user_var_entry*) my_hash_search(&thd->user_vars, name, sizeof(name)-1);
10834 if (entry && entry->length() > 0)
10835 tmp_uuid.copy(entry->ptr(), entry->length(), NULL);
10836 mysql_mutex_unlock(&thd->LOCK_thd_data);
10837
10838 /*
10839 Log the information about the missing purged GTIDs to the error log
10840 if the message is less than MAX_LOG_BUFFER_SIZE.
10841 */
10842 std::ostringstream log_info;
10843 log_info << "If the binary log files have been deleted from disk,"
10844 " check the consistency of 'GTID_PURGED' variable."
10845 " The missing transactions are '"<< missing_gtids <<"'";
10846 const char* log_msg= ER(ER_FOUND_MISSING_GTIDS);
10847
10848 /* Don't consider the "%s" in the format string. Subtract 2 from the
10849 total length */
10850 if ((strlen(log_msg) - 2 + log_info.str().length()) > MAX_LOG_BUFFER_SIZE)
10851 log_info.str("To find the missing purged transactions, run \"SELECT"
10852 " @@GLOBAL.GTID_PURGED\" on the master, then run \"SELECT"
10853 " CONCAT(RECEIVED_TRANSACTION_SET, ',', @@GLOBAL.GTID_EXECUTED)"
10854 " FROM PERFORMANCE_SCHEMA.replication_connection_status\" on"
10855 " the slave, and then run \"SELECT GTID_SUBTRACT(<master_set>,"
10856 " <slave_set>)\" on any server");
10857
10858 sql_print_warning(ER_THD(thd, ER_FOUND_MISSING_GTIDS), tmp_uuid.ptr(),
10859 log_info.str().c_str());
10860
10861 /*
10862 Send the information about the slave executed GTIDs and missing
10863 purged GTIDs to slave if the message is less than MYSQL_ERRMSG_SIZE.
10864 */
10865 std::ostringstream gtid_info;
10866 gtid_info << "The GTID set sent by the slave is '" << slave_executed_gtids
10867 << "', and the missing transactions are '"<< missing_gtids <<"'";
10868 errmsg.assign(ER_THD(thd, ER_MASTER_HAS_PURGED_REQUIRED_GTIDS));
10869
10870 /* Don't consider the "%s" in the format string. Subtract 2 from the
10871 total length */
10872 if ((errmsg.length() - 2 + gtid_info.str().length()) > MYSQL_ERRMSG_SIZE)
10873 gtid_info.str("The GTID sets and the missing purged transactions are too"
10874 " long to print in this message. For more information,"
10875 " please see the master's error log or the manual for"
10876 " GTID_SUBTRACT");
10877 /* Buffer for formatting the message about the missing GTIDs. */
10878 char buff[MYSQL_ERRMSG_SIZE];
10879 my_snprintf(buff, MYSQL_ERRMSG_SIZE, errmsg.c_str(), gtid_info.str().c_str());
10880 errmsg.assign(const_cast<const char*>(buff));
10881
10882 my_free(missing_gtids);
10883 my_free(slave_executed_gtids);
10884
10885 DBUG_VOID_RETURN;
10886 }
10887
is_binlog_cache_empty(bool is_transactional)10888 bool THD::is_binlog_cache_empty(bool is_transactional)
10889 {
10890 DBUG_ENTER("THD::is_binlog_cache_empty(bool)");
10891
10892 // If opt_bin_log==0, it is not safe to call thd_get_cache_mngr
10893 // because binlog_hton has not been completely set up.
10894 assert(opt_bin_log);
10895 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(this);
10896
10897 // cache_mngr is NULL until we call thd->binlog_setup_trx_data, so
10898 // we assert that this has been done.
10899 assert(cache_mngr != NULL);
10900
10901 binlog_cache_data *cache_data=
10902 cache_mngr->get_binlog_cache_data(is_transactional);
10903 assert(cache_data != NULL);
10904
10905 DBUG_RETURN(cache_data->is_binlog_empty());
10906 }
10907
10908 /*
10909 These functions are placed in this file since they need access to
10910 binlog_hton, which has internal linkage.
10911 */
10912
binlog_setup_trx_data()10913 int THD::binlog_setup_trx_data()
10914 {
10915 DBUG_ENTER("THD::binlog_setup_trx_data");
10916 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(this);
10917
10918 if (cache_mngr)
10919 DBUG_RETURN(0); // Already set up
10920
10921 IO_CACHE stmt_cache_log, trx_cache_log;
10922 memset(&stmt_cache_log, 0, sizeof(stmt_cache_log));
10923 memset(&trx_cache_log, 0, sizeof(trx_cache_log));
10924
10925 cache_mngr= (binlog_cache_mngr*) my_malloc(key_memory_binlog_cache_mngr,
10926 sizeof(binlog_cache_mngr), MYF(MY_ZEROFILL));
10927 if (!cache_mngr)
10928 {
10929 DBUG_RETURN(1);
10930 }
10931 if (open_cached_file(&stmt_cache_log, mysql_tmpdir,
10932 LOG_PREFIX, binlog_stmt_cache_size, MYF(MY_WME)))
10933 {
10934 my_free(cache_mngr);
10935 DBUG_RETURN(1); // Didn't manage to set it up
10936 }
10937 if (open_cached_file(&trx_cache_log, mysql_tmpdir,
10938 LOG_PREFIX, binlog_cache_size, MYF(MY_WME)))
10939 {
10940 close_cached_file(&stmt_cache_log);
10941 my_free(cache_mngr);
10942 DBUG_RETURN(1);
10943 }
10944 DBUG_PRINT("debug", ("Set ha_data slot %d to 0x%llx", binlog_hton->slot, (ulonglong) cache_mngr));
10945 thd_set_ha_data(this, binlog_hton, cache_mngr);
10946
10947 cache_mngr= new (thd_get_cache_mngr(this))
10948 binlog_cache_mngr(max_binlog_stmt_cache_size,
10949 &binlog_stmt_cache_use,
10950 &binlog_stmt_cache_disk_use,
10951 max_binlog_cache_size,
10952 &binlog_cache_use,
10953 &binlog_cache_disk_use,
10954 stmt_cache_log,
10955 trx_cache_log);
10956 DBUG_RETURN(0);
10957 }
10958
10959 /**
10960
10961 */
register_binlog_handler(THD * thd,bool trx)10962 void register_binlog_handler(THD *thd, bool trx)
10963 {
10964 DBUG_ENTER("register_binlog_handler");
10965 /*
10966 If this is the first call to this function while processing a statement,
10967 the transactional cache does not have a savepoint defined. So, in what
10968 follows:
10969 . an implicit savepoint is defined;
10970 . callbacks are registered;
10971 . binary log is set as read/write.
10972
10973 The savepoint allows for truncating the trx-cache transactional changes
10974 fail. Callbacks are necessary to flush caches upon committing or rolling
10975 back a statement or a transaction. However, notifications do not happen
10976 if the binary log is set as read/write.
10977 */
10978 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
10979 if (cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF)
10980 {
10981 /*
10982 Set an implicit savepoint in order to be able to truncate a trx-cache.
10983 */
10984 my_off_t pos= 0;
10985 binlog_trans_log_savepos(thd, &pos);
10986 cache_mngr->trx_cache.set_prev_position(pos);
10987
10988 /*
10989 Set callbacks in order to be able to call commmit or rollback.
10990 */
10991 if (trx)
10992 trans_register_ha(thd, TRUE, binlog_hton, NULL);
10993 trans_register_ha(thd, FALSE, binlog_hton, NULL);
10994
10995 /*
10996 Set the binary log as read/write otherwise callbacks are not called.
10997 */
10998 thd->ha_data[binlog_hton->slot].ha_info[0].set_trx_read_write();
10999 }
11000 DBUG_VOID_RETURN;
11001 }
11002
11003 /**
11004 Function to start a statement and optionally a transaction for the
11005 binary log.
11006
11007 This function does three things:
11008 - Starts a transaction if not in autocommit mode or if a BEGIN
11009 statement has been seen.
11010
11011 - Start a statement transaction to allow us to truncate the cache.
11012
11013 - Save the currrent binlog position so that we can roll back the
11014 statement by truncating the cache.
11015
11016 We only update the saved position if the old one was undefined,
11017 the reason is that there are some cases (e.g., for CREATE-SELECT)
11018 where the position is saved twice (e.g., both in
11019 Query_result_create::prepare() and THD::binlog_write_table_map()), but
11020 we should use the first. This means that calls to this function
11021 can be used to start the statement before the first table map
11022 event, to include some extra events.
11023
11024 Note however that IMMEDIATE_LOGGING implies that the statement is
11025 written without BEGIN/COMMIT.
11026
11027 @param thd Thread variable
11028 @param start_event The first event requested to be written into the
11029 binary log
11030 */
binlog_start_trans_and_stmt(THD * thd,Log_event * start_event)11031 static int binlog_start_trans_and_stmt(THD *thd, Log_event *start_event)
11032 {
11033 DBUG_ENTER("binlog_start_trans_and_stmt");
11034
11035 /*
11036 Initialize the cache manager if this was not done yet.
11037 */
11038 if (thd->binlog_setup_trx_data())
11039 DBUG_RETURN(1);
11040
11041 /*
11042 Retrieve the appropriated cache.
11043 */
11044 bool is_transactional= start_event->is_using_trans_cache();
11045 binlog_cache_mngr *cache_mngr= thd_get_cache_mngr(thd);
11046 binlog_cache_data *cache_data= cache_mngr->get_binlog_cache_data(is_transactional);
11047
11048 /*
11049 If the event is requesting immediatly logging, there is no need to go
11050 further down and set savepoint and register callbacks.
11051 */
11052 if (start_event->is_using_immediate_logging())
11053 DBUG_RETURN(0);
11054
11055 register_binlog_handler(thd, thd->in_multi_stmt_transaction_mode());
11056
11057 /*
11058 If the cache is empty log "BEGIN" at the beginning of every transaction.
11059 Here, a transaction is either a BEGIN..COMMIT/ROLLBACK block or a single
11060 statement in autocommit mode.
11061 */
11062 if (cache_data->is_binlog_empty())
11063 {
11064 static const char begin[]= "BEGIN";
11065 const char *query= NULL;
11066 char buf[XID::ser_buf_size];
11067 char xa_start[sizeof("XA START") + 1 + sizeof(buf)];
11068 XID_STATE *xs= thd->get_transaction()->xid_state();
11069 int qlen= sizeof(begin) - 1;
11070
11071 if (is_transactional && xs->has_state(XID_STATE::XA_ACTIVE))
11072 {
11073 /*
11074 XA-prepare logging case.
11075 */
11076 qlen= sprintf(xa_start, "XA START %s", xs->get_xid()->serialize(buf));
11077 query= xa_start;
11078 }
11079 else
11080 {
11081 /*
11082 Regular transaction case.
11083 */
11084 query= begin;
11085 }
11086
11087 Query_log_event qinfo(thd, query, qlen,
11088 is_transactional, false, true, 0, true);
11089 if (cache_data->write_event(thd, &qinfo))
11090 DBUG_RETURN(1);
11091 }
11092
11093 DBUG_RETURN(0);
11094 }
11095
11096 /**
11097 This function writes a table map to the binary log.
11098 Note that in order to keep the signature uniform with related methods,
11099 we use a redundant parameter to indicate whether a transactional table
11100 was changed or not.
11101 Sometimes it will write a Rows_query_log_event into binary log before
11102 the table map too.
11103
11104 @param table a pointer to the table.
11105 @param is_transactional @c true indicates a transactional table,
11106 otherwise @c false a non-transactional.
11107 @param binlog_rows_query @c true indicates a Rows_query log event
11108 will be binlogged before table map,
11109 otherwise @c false indicates it will not
11110 be binlogged.
11111 @return
11112 nonzero if an error pops up when writing the table map event
11113 or the Rows_query log event.
11114 */
binlog_write_table_map(TABLE * table,bool is_transactional,bool binlog_rows_query)11115 int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
11116 bool binlog_rows_query)
11117 {
11118 int error;
11119 DBUG_ENTER("THD::binlog_write_table_map");
11120 DBUG_PRINT("enter", ("table: 0x%lx (%s: #%llu)",
11121 (long) table, table->s->table_name.str,
11122 table->s->table_map_id.id()));
11123
11124 /* Pre-conditions */
11125 assert(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
11126 assert(table->s->table_map_id.is_valid());
11127
11128 Table_map_log_event
11129 the_event(this, table, table->s->table_map_id, is_transactional);
11130
11131 binlog_start_trans_and_stmt(this, &the_event);
11132
11133 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(this);
11134
11135 binlog_cache_data *cache_data=
11136 cache_mngr->get_binlog_cache_data(is_transactional);
11137
11138 if (binlog_rows_query && this->query().str)
11139 {
11140 /* Write the Rows_query_log_event into binlog before the table map */
11141 Rows_query_log_event
11142 rows_query_ev(this, this->query().str, this->query().length);
11143 if ((error= cache_data->write_event(this, &rows_query_ev)))
11144 DBUG_RETURN(error);
11145 }
11146
11147 if ((error= cache_data->write_event(this, &the_event)))
11148 DBUG_RETURN(error);
11149
11150 binlog_table_maps++;
11151 DBUG_RETURN(0);
11152 }
11153
11154 /**
11155 This function retrieves a pending row event from a cache which is
11156 specified through the parameter @c is_transactional. Respectively, when it
11157 is @c true, the pending event is returned from the transactional cache.
11158 Otherwise from the non-transactional cache.
11159
11160 @param is_transactional @c true indicates a transactional cache,
11161 otherwise @c false a non-transactional.
11162 @return
11163 The row event if any.
11164 */
11165 Rows_log_event*
binlog_get_pending_rows_event(bool is_transactional) const11166 THD::binlog_get_pending_rows_event(bool is_transactional) const
11167 {
11168 Rows_log_event* rows= NULL;
11169 binlog_cache_mngr *const cache_mngr= thd_get_cache_mngr(this);
11170
11171 /*
11172 This is less than ideal, but here's the story: If there is no cache_mngr,
11173 prepare_pending_rows_event() has never been called (since the cache_mngr
11174 is set up there). In that case, we just return NULL.
11175 */
11176 if (cache_mngr)
11177 {
11178 binlog_cache_data *cache_data=
11179 cache_mngr->get_binlog_cache_data(is_transactional);
11180
11181 rows= cache_data->pending();
11182 }
11183 return (rows);
11184 }
11185
11186 /**
11187 @param db db name c-string to be inserted into alphabetically sorted
11188 THD::binlog_accessed_db_names list.
11189
11190 Note, that space for both the data and the node
11191 struct are allocated in THD::main_mem_root.
11192 The list lasts for the top-level query time and is reset
11193 in @c THD::cleanup_after_query().
11194 */
11195 void
add_to_binlog_accessed_dbs(const char * db_param)11196 THD::add_to_binlog_accessed_dbs(const char *db_param)
11197 {
11198 char *after_db;
11199 /*
11200 binlog_accessed_db_names list is to maintain the database
11201 names which are referenced in a given command.
11202 Prior to bug 17806014 fix, 'main_mem_root' memory root used
11203 to store this list. The 'main_mem_root' scope is till the end
11204 of the query. Hence it caused increasing memory consumption
11205 problem in big procedures like the ones mentioned below.
11206 Eg: CALL p1() where p1 is having 1,00,000 create and drop tables.
11207 'main_mem_root' is freed only at the end of the command CALL p1()'s
11208 execution. But binlog_accessed_db_names list scope is only till the
11209 individual statements specified the procedure(create/drop statements).
11210 Hence the memory allocated in 'main_mem_root' was left uncleared
11211 until the p1's completion, even though it is not required after
11212 completion of individual statements.
11213
11214 Instead of using 'main_mem_root' whose scope is complete query execution,
11215 now the memroot is changed to use 'thd->mem_root' whose scope is until the
11216 individual statement in CALL p1(). 'thd->mem_root' is set to 'execute_mem_root'
11217 in the context of procedure and it's scope is till the individual statement
11218 in CALL p1() and thd->memroot is equal to 'main_mem_root' in the context
11219 of a normal 'top level query'.
11220
11221 Eg: a) create table t1(i int); => If this function is called while
11222 processing this statement, thd->memroot is equal to &main_mem_root
11223 which will be freed immediately after executing this statement.
11224 b) CALL p1() -> p1 contains create table t1(i int); => If this function
11225 is called while processing create table statement which is inside
11226 a stored procedure, then thd->memroot is equal to 'execute_mem_root'
11227 which will be freed immediately after executing this statement.
11228 In both a and b case, thd->memroot will be freed immediately and will not
11229 increase memory consumption.
11230
11231 A special case(stored functions/triggers):
11232 Consider the following example:
11233 create function f1(i int) returns int
11234 begin
11235 insert into db1.t1 values (1);
11236 insert into db2.t1 values (2);
11237 end;
11238 When we are processing SELECT f1(), the list should contain db1, db2 names.
11239 Since thd->mem_root contains 'execute_mem_root' in the context of
11240 stored function, the mem root will be freed after adding db1 in
11241 the list and when we are processing the second statement and when we try
11242 to add 'db2' in the db1's list, it will lead to crash as db1's memory
11243 is already freed. To handle this special case, if in_sub_stmt is set
11244 (which is true incase of stored functions/triggers), we use &main_mem_root,
11245 if not set we will use thd->memroot which changes it's value to
11246 'execute_mem_root' or '&main_mem_root' depends on the context.
11247 */
11248 MEM_ROOT *db_mem_root= in_sub_stmt ? &main_mem_root : mem_root;
11249
11250 if (!binlog_accessed_db_names)
11251 binlog_accessed_db_names= new (db_mem_root) List<char>;
11252
11253 if (binlog_accessed_db_names->elements > MAX_DBS_IN_EVENT_MTS)
11254 {
11255 push_warning_printf(this, Sql_condition::SL_WARNING,
11256 ER_MTS_UPDATED_DBS_GREATER_MAX,
11257 ER(ER_MTS_UPDATED_DBS_GREATER_MAX),
11258 MAX_DBS_IN_EVENT_MTS);
11259 return;
11260 }
11261
11262 after_db= strdup_root(db_mem_root, db_param);
11263
11264 /*
11265 sorted insertion is implemented with first rearranging data
11266 (pointer to char*) of the links and final appending of the least
11267 ordered data to create a new link in the list.
11268 */
11269 if (binlog_accessed_db_names->elements != 0)
11270 {
11271 List_iterator<char> it(*get_binlog_accessed_db_names());
11272
11273 while (it++)
11274 {
11275 char *swap= NULL;
11276 char **ref_cur_db= it.ref();
11277 int cmp= strcmp(after_db, *ref_cur_db);
11278
11279 assert(!swap || cmp < 0);
11280
11281 if (cmp == 0)
11282 {
11283 after_db= NULL; /* dup to ignore */
11284 break;
11285 }
11286 else if (swap || cmp > 0)
11287 {
11288 swap= *ref_cur_db;
11289 *ref_cur_db= after_db;
11290 after_db= swap;
11291 }
11292 }
11293 }
11294 if (after_db)
11295 binlog_accessed_db_names->push_back(after_db, db_mem_root);
11296 }
11297
11298 /*
11299 Tells if two (or more) tables have auto_increment columns and we want to
11300 lock those tables with a write lock.
11301
11302 SYNOPSIS
11303 has_two_write_locked_tables_with_auto_increment
11304 tables Table list
11305
11306 NOTES:
11307 Call this function only when you have established the list of all tables
11308 which you'll want to update (including stored functions, triggers, views
11309 inside your statement).
11310 */
11311
11312 static bool
has_write_table_with_auto_increment(TABLE_LIST * tables)11313 has_write_table_with_auto_increment(TABLE_LIST *tables)
11314 {
11315 for (TABLE_LIST *table= tables; table; table= table->next_global)
11316 {
11317 /* we must do preliminary checks as table->table may be NULL */
11318 if (!table->is_placeholder() &&
11319 table->table->found_next_number_field &&
11320 (table->lock_type >= TL_WRITE_ALLOW_WRITE))
11321 return 1;
11322 }
11323
11324 return 0;
11325 }
11326
11327 /*
11328 checks if we have select tables in the table list and write tables
11329 with auto-increment column.
11330
11331 SYNOPSIS
11332 has_two_write_locked_tables_with_auto_increment_and_select
11333 tables Table list
11334
11335 RETURN VALUES
11336
11337 -true if the table list has atleast one table with auto-increment column
11338
11339
11340 and atleast one table to select from.
11341 -false otherwise
11342 */
11343
11344 static bool
has_write_table_with_auto_increment_and_select(TABLE_LIST * tables)11345 has_write_table_with_auto_increment_and_select(TABLE_LIST *tables)
11346 {
11347 bool has_select= false;
11348 bool has_auto_increment_tables = has_write_table_with_auto_increment(tables);
11349 for(TABLE_LIST *table= tables; table; table= table->next_global)
11350 {
11351 if (!table->is_placeholder() &&
11352 (table->lock_type <= TL_READ_NO_INSERT))
11353 {
11354 has_select= true;
11355 break;
11356 }
11357 }
11358 return(has_select && has_auto_increment_tables);
11359 }
11360
11361 /*
11362 Tells if there is a table whose auto_increment column is a part
11363 of a compound primary key while is not the first column in
11364 the table definition.
11365
11366 @param tables Table list
11367
11368 @return true if the table exists, fais if does not.
11369 */
11370
11371 static bool
has_write_table_auto_increment_not_first_in_pk(TABLE_LIST * tables)11372 has_write_table_auto_increment_not_first_in_pk(TABLE_LIST *tables)
11373 {
11374 for (TABLE_LIST *table= tables; table; table= table->next_global)
11375 {
11376 /* we must do preliminary checks as table->table may be NULL */
11377 if (!table->is_placeholder() &&
11378 table->table->found_next_number_field &&
11379 (table->lock_type >= TL_WRITE_ALLOW_WRITE)
11380 && table->table->s->next_number_keypart != 0)
11381 return 1;
11382 }
11383
11384 return 0;
11385 }
11386
11387 /*
11388 Function to check whether the table in query uses a fulltext parser
11389 plugin or not.
11390
11391 @param s - table share pointer.
11392
11393 @retval TRUE - The table uses fulltext parser plugin.
11394 @retval FALSE - Otherwise.
11395 */
fulltext_unsafe_set(TABLE_SHARE * s)11396 static bool inline fulltext_unsafe_set(TABLE_SHARE *s)
11397 {
11398 for (unsigned int i= 0 ; i < s->keys ; i++)
11399 {
11400 if ((s->key_info[i].flags & HA_USES_PARSER) && s->keys_in_use.is_set(i))
11401 return TRUE;
11402 }
11403 return FALSE;
11404 }
11405 #ifndef NDEBUG
get_locked_tables_mode_name(enum_locked_tables_mode locked_tables_mode)11406 const char * get_locked_tables_mode_name(enum_locked_tables_mode locked_tables_mode)
11407 {
11408 switch (locked_tables_mode)
11409 {
11410 case LTM_NONE:
11411 return "LTM_NONE";
11412 case LTM_LOCK_TABLES:
11413 return "LTM_LOCK_TABLES";
11414 case LTM_PRELOCKED:
11415 return "LTM_PRELOCKED";
11416 case LTM_PRELOCKED_UNDER_LOCK_TABLES:
11417 return "LTM_PRELOCKED_UNDER_LOCK_TABLES";
11418 default:
11419 return "Unknown table lock mode";
11420 }
11421 }
11422 #endif
11423
11424 /**
11425 Decide on logging format to use for the statement and issue errors
11426 or warnings as needed. The decision depends on the following
11427 parameters:
11428
11429 - The logging mode, i.e., the value of binlog_format. Can be
11430 statement, mixed, or row.
11431
11432 - The type of statement. There are three types of statements:
11433 "normal" safe statements; unsafe statements; and row injections.
11434 An unsafe statement is one that, if logged in statement format,
11435 might produce different results when replayed on the slave (e.g.,
11436 queries with a LIMIT clause). A row injection is either a BINLOG
11437 statement, or a row event executed by the slave's SQL thread.
11438
11439 - The capabilities of tables modified by the statement. The
11440 *capabilities vector* for a table is a set of flags associated
11441 with the table. Currently, it only includes two flags: *row
11442 capability flag* and *statement capability flag*.
11443
11444 The row capability flag is set if and only if the engine can
11445 handle row-based logging. The statement capability flag is set if
11446 and only if the table can handle statement-based logging.
11447
11448 Decision table for logging format
11449 ---------------------------------
11450
11451 The following table summarizes how the format and generated
11452 warning/error depends on the tables' capabilities, the statement
11453 type, and the current binlog_format.
11454
11455 Row capable N NNNNNNNNN YYYYYYYYY YYYYYYYYY
11456 Statement capable N YYYYYYYYY NNNNNNNNN YYYYYYYYY
11457
11458 Statement type * SSSUUUIII SSSUUUIII SSSUUUIII
11459
11460 binlog_format * SMRSMRSMR SMRSMRSMR SMRSMRSMR
11461
11462 Logged format - SS-S----- -RR-RR-RR SRRSRR-RR
11463 Warning/Error 1 --2732444 5--5--6-- ---7--6--
11464
11465 Legend
11466 ------
11467
11468 Row capable: N - Some table not row-capable, Y - All tables row-capable
11469 Stmt capable: N - Some table not stmt-capable, Y - All tables stmt-capable
11470 Statement type: (S)afe, (U)nsafe, or Row (I)njection
11471 binlog_format: (S)TATEMENT, (M)IXED, or (R)OW
11472 Logged format: (S)tatement or (R)ow
11473 Warning/Error: Warnings and error messages are as follows:
11474
11475 1. Error: Cannot execute statement: binlogging impossible since both
11476 row-incapable engines and statement-incapable engines are
11477 involved.
11478
11479 2. Error: Cannot execute statement: binlogging impossible since
11480 BINLOG_FORMAT = ROW and at least one table uses a storage engine
11481 limited to statement-logging.
11482
11483 3. Error: Cannot execute statement: binlogging of unsafe statement
11484 is impossible when storage engine is limited to statement-logging
11485 and BINLOG_FORMAT = MIXED.
11486
11487 4. Error: Cannot execute row injection: binlogging impossible since
11488 at least one table uses a storage engine limited to
11489 statement-logging.
11490
11491 5. Error: Cannot execute statement: binlogging impossible since
11492 BINLOG_FORMAT = STATEMENT and at least one table uses a storage
11493 engine limited to row-logging.
11494
11495 6. Error: Cannot execute row injection: binlogging impossible since
11496 BINLOG_FORMAT = STATEMENT.
11497
11498 7. Warning: Unsafe statement binlogged in statement format since
11499 BINLOG_FORMAT = STATEMENT.
11500
11501 In addition, we can produce the following error (not depending on
11502 the variables of the decision diagram):
11503
11504 8. Error: Cannot execute statement: binlogging impossible since more
11505 than one engine is involved and at least one engine is
11506 self-logging.
11507
11508 9. Error: Do not allow users to modify a gtid_executed table
11509 explicitly by a XA transaction.
11510
11511 For each error case above, the statement is prevented from being
11512 logged, we report an error, and roll back the statement. For
11513 warnings, we set the thd->binlog_flags variable: the warning will be
11514 printed only if the statement is successfully logged.
11515
11516 @see THD::binlog_query
11517
11518 @param[in] thd Client thread
11519 @param[in] tables Tables involved in the query
11520 @param[in] use_cached_table_flags use cached value of
11521 handler::cached_table_flags. Do not use cached value and force recalculation
11522 in case of 'false'.
11523
11524 @retval 0 No error; statement can be logged.
11525 @retval -1 One of the error conditions above applies (1, 2, 4, 5, 6 or 9).
11526 */
11527
decide_logging_format(TABLE_LIST * tables,bool use_cached_table_flags)11528 int THD::decide_logging_format(TABLE_LIST *tables, bool use_cached_table_flags)
11529 {
11530 DBUG_ENTER("THD::decide_logging_format");
11531 DBUG_PRINT("info", ("query: %s", query().str));
11532 DBUG_PRINT("info", ("variables.binlog_format: %lu",
11533 variables.binlog_format));
11534 DBUG_PRINT("info", ("lex->get_stmt_unsafe_flags(): 0x%x",
11535 lex->get_stmt_unsafe_flags()));
11536
11537 DEBUG_SYNC(current_thd, "begin_decide_logging_format");
11538
11539 reset_binlog_local_stmt_filter();
11540
11541 /*
11542 We should not decide logging format if the binlog is closed or
11543 binlogging is off, or if the statement is filtered out from the
11544 binlog by filtering rules.
11545 */
11546 if (mysql_bin_log.is_open() && (variables.option_bits & OPTION_BIN_LOG) &&
11547 !(variables.binlog_format == BINLOG_FORMAT_STMT &&
11548 !binlog_filter->db_ok(m_db.str)))
11549 {
11550 /*
11551 Compute one bit field with the union of all the engine
11552 capabilities, and one with the intersection of all the engine
11553 capabilities.
11554 */
11555 handler::Table_flags flags_write_some_set= 0;
11556 handler::Table_flags flags_access_some_set= 0;
11557 handler::Table_flags flags_write_all_set=
11558 HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE;
11559
11560 /*
11561 If different types of engines are about to be updated.
11562 For example: Innodb and Falcon; Innodb and MyIsam.
11563 */
11564 my_bool multi_write_engine= FALSE;
11565 /*
11566 If different types of engines are about to be accessed
11567 and any of them is about to be updated. For example:
11568 Innodb and Falcon; Innodb and MyIsam.
11569 */
11570 my_bool multi_access_engine= FALSE;
11571 /*
11572 72475 : Track if statement creates or drops a temporary table
11573 and log in ROW if it does.
11574 */
11575 bool create_drop_temp_table= false;
11576 /*
11577 Identifies if a table is changed.
11578 */
11579 my_bool is_write= FALSE;
11580 /*
11581 A pointer to a previous table that was changed.
11582 */
11583 TABLE* prev_write_table= NULL;
11584 /*
11585 A pointer to a previous table that was accessed.
11586 */
11587 TABLE* prev_access_table= NULL;
11588 /*
11589 True if at least one table is transactional.
11590 */
11591 bool write_to_some_transactional_table= false;
11592 /*
11593 True if at least one table is non-transactional.
11594 */
11595 bool write_to_some_non_transactional_table= false;
11596 /*
11597 True if all non-transactional tables that has been updated
11598 are temporary.
11599 */
11600 bool write_all_non_transactional_are_tmp_tables= true;
11601 /**
11602 The number of tables used in the current statement,
11603 that should be replicated.
11604 */
11605 uint replicated_tables_count= 0;
11606 /**
11607 The number of tables written to in the current statement,
11608 that should not be replicated.
11609 A table should not be replicated when it is considered
11610 'local' to a MySQL instance.
11611 Currently, these tables are:
11612 - mysql.slow_log
11613 - mysql.general_log
11614 - mysql.slave_relay_log_info
11615 - mysql.slave_master_info
11616 - mysql.slave_worker_info
11617 - performance_schema.*
11618 - TODO: information_schema.*
11619 In practice, from this list, only performance_schema.* tables
11620 are written to by user queries.
11621 */
11622 uint non_replicated_tables_count= 0;
11623 /**
11624 Indicate whether we alreadly reported a warning
11625 on modifying gtid_executed table.
11626 */
11627 int warned_gtid_executed_table= 0;
11628 #ifndef NDEBUG
11629 {
11630 DBUG_PRINT("debug", ("prelocked_mode: %s",
11631 get_locked_tables_mode_name(locked_tables_mode)));
11632 }
11633 #endif
11634
11635 if (variables.binlog_format != BINLOG_FORMAT_ROW && tables)
11636 {
11637 /*
11638 DML statements that modify a table with an auto_increment column based on
11639 rows selected from a table are unsafe as the order in which the rows are
11640 fetched fron the select tables cannot be determined and may differ on
11641 master and slave.
11642 */
11643 if (has_write_table_with_auto_increment_and_select(tables))
11644 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_WRITE_AUTOINC_SELECT);
11645
11646 if (has_write_table_auto_increment_not_first_in_pk(tables))
11647 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_NOT_FIRST);
11648
11649 /*
11650 A query that modifies autoinc column in sub-statement can make the
11651 master and slave inconsistent.
11652 We can solve these problems in mixed mode by switching to binlogging
11653 if at least one updated table is used by sub-statement
11654 */
11655 if (lex->requires_prelocking() &&
11656 has_write_table_with_auto_increment(lex->first_not_own_table()))
11657 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_COLUMNS);
11658 }
11659
11660 /*
11661 Get the capabilities vector for all involved storage engines and
11662 mask out the flags for the binary log.
11663 */
11664 for (TABLE_LIST *table= tables; table; table= table->next_global)
11665 {
11666 if (table->is_placeholder())
11667 {
11668 /*
11669 bug 72475 : Detect if this is a CREATE TEMPORARY or DROP of a
11670 temporary table. This will be used later in determining whether to
11671 log in ROW or STMT if MIXED replication is being used.
11672 */
11673 if(!create_drop_temp_table &&
11674 !table->table &&
11675 ((lex->sql_command == SQLCOM_CREATE_TABLE &&
11676 (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE)) ||
11677 ((lex->sql_command == SQLCOM_DROP_TABLE ||
11678 lex->sql_command == SQLCOM_TRUNCATE) &&
11679 find_temporary_table(this, table))))
11680 {
11681 create_drop_temp_table= true;
11682 }
11683 continue;
11684 }
11685
11686 handler::Table_flags const flags= table->table->file->ha_table_flags(!use_cached_table_flags);
11687
11688 DBUG_PRINT("info", ("table: %s; ha_table_flags: 0x%llx",
11689 table->table_name, flags));
11690
11691 if (table->table->no_replicate)
11692 {
11693 if (!warned_gtid_executed_table)
11694 {
11695 warned_gtid_executed_table=
11696 gtid_state->warn_or_err_on_modify_gtid_table(this, table);
11697 /*
11698 Do not allow users to modify the gtid_executed table
11699 explicitly by a XA transaction.
11700 */
11701 if (warned_gtid_executed_table == 2)
11702 DBUG_RETURN(-1);
11703 }
11704 /*
11705 The statement uses a table that is not replicated.
11706 The following properties about the table:
11707 - persistent / transient
11708 - transactional / non transactional
11709 - temporary / permanent
11710 - read or write
11711 - multiple engines involved because of this table
11712 are not relevant, as this table is completely ignored.
11713 Because the statement uses a non replicated table,
11714 using STATEMENT format in the binlog is impossible.
11715 Either this statement will be discarded entirely,
11716 or it will be logged (possibly partially) in ROW format.
11717 */
11718 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_TABLE);
11719
11720 if (table->lock_type >= TL_WRITE_ALLOW_WRITE)
11721 {
11722 non_replicated_tables_count++;
11723 continue;
11724 }
11725 }
11726
11727 replicated_tables_count++;
11728
11729 my_bool trans= table->table->file->has_transactions();
11730
11731 if (table->lock_type >= TL_WRITE_ALLOW_WRITE)
11732 {
11733 write_to_some_transactional_table=
11734 write_to_some_transactional_table || trans;
11735
11736 write_to_some_non_transactional_table=
11737 write_to_some_non_transactional_table || !trans;
11738
11739 if (prev_write_table && prev_write_table->file->ht !=
11740 table->table->file->ht)
11741 multi_write_engine= TRUE;
11742
11743 if (table->table->s->tmp_table)
11744 lex->set_stmt_accessed_table(trans ? LEX::STMT_WRITES_TEMP_TRANS_TABLE :
11745 LEX::STMT_WRITES_TEMP_NON_TRANS_TABLE);
11746 else
11747 lex->set_stmt_accessed_table(trans ? LEX::STMT_WRITES_TRANS_TABLE :
11748 LEX::STMT_WRITES_NON_TRANS_TABLE);
11749
11750 /*
11751 Non-transactional updates are allowed when row binlog format is
11752 used and all non-transactional tables are temporary.
11753 Binlog format is checked on THD::is_dml_gtid_compatible() method.
11754 */
11755 if (!trans)
11756 write_all_non_transactional_are_tmp_tables=
11757 write_all_non_transactional_are_tmp_tables &&
11758 table->table->s->tmp_table;
11759
11760 flags_write_all_set &= flags;
11761 flags_write_some_set |= flags;
11762 is_write= TRUE;
11763
11764 prev_write_table= table->table;
11765
11766 /*
11767 It should be marked unsafe if a table which uses a fulltext parser
11768 plugin is modified. See also bug#48183.
11769 */
11770 if (!lex->is_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_FULLTEXT_PLUGIN))
11771 {
11772 if (fulltext_unsafe_set(table->table->s))
11773 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_FULLTEXT_PLUGIN);
11774 }
11775 /*
11776 INSERT...ON DUPLICATE KEY UPDATE on a table with more than one unique keys
11777 can be unsafe. Check for it if the flag is already not marked for the
11778 given statement.
11779 */
11780 if (!lex->is_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS) &&
11781 lex->sql_command == SQLCOM_INSERT && lex->duplicates == DUP_UPDATE)
11782 {
11783 uint keys= table->table->s->keys, i= 0, unique_keys= 0;
11784 for (KEY* keyinfo= table->table->s->key_info;
11785 i < keys && unique_keys <= 1; i++, keyinfo++)
11786 {
11787 if (keyinfo->flags & HA_NOSAME)
11788 unique_keys++;
11789 }
11790 if (unique_keys > 1 )
11791 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS);
11792 }
11793 }
11794 if(lex->get_using_match())
11795 {
11796 if (fulltext_unsafe_set(table->table->s))
11797 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_FULLTEXT_PLUGIN);
11798 }
11799
11800 flags_access_some_set |= flags;
11801
11802 if (table->table->s->tmp_table)
11803 lex->set_stmt_accessed_table(trans ? LEX::STMT_READS_TEMP_TRANS_TABLE :
11804 LEX::STMT_READS_TEMP_NON_TRANS_TABLE);
11805 else
11806 lex->set_stmt_accessed_table(trans ? LEX::STMT_READS_TRANS_TABLE :
11807 LEX::STMT_READS_NON_TRANS_TABLE);
11808
11809 if (prev_access_table && prev_access_table->file->ht !=
11810 table->table->file->ht)
11811 multi_access_engine= TRUE;
11812
11813 prev_access_table= table->table;
11814 }
11815 assert(!is_write ||
11816 write_to_some_transactional_table ||
11817 write_to_some_non_transactional_table);
11818 /*
11819 write_all_non_transactional_are_tmp_tables may be true if any
11820 non-transactional table was not updated, so we fix its value here.
11821 */
11822 write_all_non_transactional_are_tmp_tables=
11823 write_all_non_transactional_are_tmp_tables &&
11824 write_to_some_non_transactional_table;
11825
11826 DBUG_PRINT("info", ("flags_write_all_set: 0x%llx", flags_write_all_set));
11827 DBUG_PRINT("info", ("flags_write_some_set: 0x%llx", flags_write_some_set));
11828 DBUG_PRINT("info", ("flags_access_some_set: 0x%llx", flags_access_some_set));
11829 DBUG_PRINT("info", ("multi_write_engine: %d", multi_write_engine));
11830 DBUG_PRINT("info", ("multi_access_engine: %d", multi_access_engine));
11831
11832 int error= 0;
11833 int unsafe_flags;
11834
11835 bool multi_stmt_trans= in_multi_stmt_transaction_mode();
11836 bool trans_table= trans_has_updated_trans_table(this);
11837 bool binlog_direct= variables.binlog_direct_non_trans_update;
11838
11839 if (lex->is_mixed_stmt_unsafe(multi_stmt_trans, binlog_direct,
11840 trans_table, tx_isolation))
11841 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_MIXED_STATEMENT);
11842 else if (multi_stmt_trans && trans_table && !binlog_direct &&
11843 lex->stmt_accessed_table(LEX::STMT_WRITES_NON_TRANS_TABLE))
11844 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_NONTRANS_AFTER_TRANS);
11845
11846 /*
11847 If more than one engine is involved in the statement and at
11848 least one is doing it's own logging (is *self-logging*), the
11849 statement cannot be logged atomically, so we generate an error
11850 rather than allowing the binlog to become corrupt.
11851 */
11852 if (multi_write_engine &&
11853 (flags_write_some_set & HA_HAS_OWN_BINLOGGING))
11854 my_error((error= ER_BINLOG_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE),
11855 MYF(0));
11856 else if (multi_access_engine && flags_access_some_set & HA_HAS_OWN_BINLOGGING)
11857 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE);
11858
11859 /* XA is unsafe for statements */
11860 if (is_write &&
11861 !get_transaction()->xid_state()->has_state(XID_STATE::XA_NOTR))
11862 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_XA);
11863
11864 DBUG_EXECUTE_IF("make_stmt_only_engines",
11865 {
11866 flags_write_all_set= HA_BINLOG_STMT_CAPABLE;
11867 };);
11868
11869 /* both statement-only and row-only engines involved */
11870 if ((flags_write_all_set & (HA_BINLOG_STMT_CAPABLE | HA_BINLOG_ROW_CAPABLE)) == 0)
11871 {
11872 /*
11873 1. Error: Binary logging impossible since both row-incapable
11874 engines and statement-incapable engines are involved
11875 */
11876 my_error((error= ER_BINLOG_ROW_ENGINE_AND_STMT_ENGINE), MYF(0));
11877 }
11878 /* statement-only engines involved */
11879 else if ((flags_write_all_set & HA_BINLOG_ROW_CAPABLE) == 0)
11880 {
11881 if (lex->is_stmt_row_injection())
11882 {
11883 /*
11884 4. Error: Cannot execute row injection since table uses
11885 storage engine limited to statement-logging
11886 */
11887 my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_ENGINE), MYF(0));
11888 }
11889 else if (variables.binlog_format == BINLOG_FORMAT_ROW &&
11890 sqlcom_can_generate_row_events(this->lex->sql_command))
11891 {
11892 /*
11893 2. Error: Cannot modify table that uses a storage engine
11894 limited to statement-logging when BINLOG_FORMAT = ROW
11895 */
11896 my_error((error= ER_BINLOG_ROW_MODE_AND_STMT_ENGINE), MYF(0));
11897 }
11898 else if (variables.binlog_format == BINLOG_FORMAT_MIXED &&
11899 ((unsafe_flags= lex->get_stmt_unsafe_flags()) != 0))
11900 {
11901 /*
11902 3. Error: Cannot execute statement: binlogging of unsafe
11903 statement is impossible when storage engine is limited to
11904 statement-logging and BINLOG_FORMAT = MIXED.
11905 */
11906 for (int unsafe_type= 0;
11907 unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT;
11908 unsafe_type++)
11909 if (unsafe_flags & (1 << unsafe_type))
11910 my_error((error= ER_BINLOG_UNSAFE_AND_STMT_ENGINE), MYF(0),
11911 ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
11912 }
11913 else if (is_write && ((unsafe_flags= lex->get_stmt_unsafe_flags()) != 0))
11914 {
11915 /*
11916 7. Warning: Unsafe statement logged as statement due to
11917 binlog_format = STATEMENT
11918 */
11919 binlog_unsafe_warning_flags|= unsafe_flags;
11920 DBUG_PRINT("info", ("Scheduling warning to be issued by "
11921 "binlog_query: '%s'",
11922 ER(ER_BINLOG_UNSAFE_STATEMENT)));
11923 DBUG_PRINT("info", ("binlog_unsafe_warning_flags: 0x%x",
11924 binlog_unsafe_warning_flags));
11925 }
11926 /* log in statement format! */
11927 }
11928 /* no statement-only engines */
11929 else
11930 {
11931 /* binlog_format = STATEMENT */
11932 if (variables.binlog_format == BINLOG_FORMAT_STMT)
11933 {
11934 if (lex->is_stmt_row_injection())
11935 {
11936 /*
11937 6. Error: Cannot execute row injection since
11938 BINLOG_FORMAT = STATEMENT
11939 */
11940 my_error((error= ER_BINLOG_ROW_INJECTION_AND_STMT_MODE), MYF(0));
11941 }
11942 else if ((flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0 &&
11943 sqlcom_can_generate_row_events(this->lex->sql_command))
11944 {
11945 /*
11946 5. Error: Cannot modify table that uses a storage engine
11947 limited to row-logging when binlog_format = STATEMENT
11948 */
11949 my_error((error= ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), "");
11950 }
11951 else if (is_write && (unsafe_flags= lex->get_stmt_unsafe_flags()) != 0)
11952 {
11953 /*
11954 7. Warning: Unsafe statement logged as statement due to
11955 binlog_format = STATEMENT
11956 */
11957 binlog_unsafe_warning_flags|= unsafe_flags;
11958 DBUG_PRINT("info", ("Scheduling warning to be issued by "
11959 "binlog_query: '%s'",
11960 ER(ER_BINLOG_UNSAFE_STATEMENT)));
11961 DBUG_PRINT("info", ("binlog_unsafe_warning_flags: 0x%x",
11962 binlog_unsafe_warning_flags));
11963 }
11964 /* log in statement format! */
11965 }
11966 /* No statement-only engines and binlog_format != STATEMENT.
11967 I.e., nothing prevents us from row logging if needed. */
11968 else
11969 {
11970 if (lex->is_stmt_unsafe() || lex->is_stmt_row_injection()
11971 || (flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0
11972 || (flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0
11973 || lex->stmt_accessed_table(LEX::STMT_READS_TEMP_TRANS_TABLE)
11974 || lex->stmt_accessed_table(LEX::STMT_READS_TEMP_NON_TRANS_TABLE)
11975 || create_drop_temp_table)
11976 {
11977 #ifndef NDEBUG
11978 int flags= lex->get_stmt_unsafe_flags();
11979 DBUG_PRINT("info", ("setting row format for unsafe statement"));
11980 for (int i= 0; i < Query_tables_list::BINLOG_STMT_UNSAFE_COUNT; i++)
11981 {
11982 if (flags & (1 << i))
11983 DBUG_PRINT("info", ("unsafe reason: %s",
11984 ER(Query_tables_list::binlog_stmt_unsafe_errcode[i])));
11985 }
11986 DBUG_PRINT("info", ("is_row_injection=%d",
11987 lex->is_stmt_row_injection()));
11988 DBUG_PRINT("info", ("stmt_capable=%llu",
11989 (flags_write_all_set & HA_BINLOG_STMT_CAPABLE)));
11990 #endif
11991 /* log in row format! */
11992 set_current_stmt_binlog_format_row_if_mixed();
11993 }
11994 }
11995 }
11996
11997 if (non_replicated_tables_count > 0)
11998 {
11999 if ((replicated_tables_count == 0) || ! is_write)
12000 {
12001 DBUG_PRINT("info", ("decision: no logging, no replicated table affected"));
12002 set_binlog_local_stmt_filter();
12003 }
12004 else
12005 {
12006 if (! is_current_stmt_binlog_format_row())
12007 {
12008 my_error((error= ER_BINLOG_STMT_MODE_AND_NO_REPL_TABLES), MYF(0));
12009 }
12010 else
12011 {
12012 clear_binlog_local_stmt_filter();
12013 }
12014 }
12015 }
12016 else
12017 {
12018 clear_binlog_local_stmt_filter();
12019 }
12020
12021 if (!error &&
12022 !is_dml_gtid_compatible(write_to_some_transactional_table,
12023 write_to_some_non_transactional_table,
12024 write_all_non_transactional_are_tmp_tables))
12025 error= 1;
12026
12027 if (error) {
12028 DBUG_PRINT("info", ("decision: no logging since an error was generated"));
12029 DBUG_RETURN(-1);
12030 }
12031
12032 if (is_write &&
12033 lex->sql_command != SQLCOM_END /* rows-event applying by slave */)
12034 {
12035 /*
12036 Master side of DML in the STMT format events parallelization.
12037 All involving table db:s are stored in a abc-ordered name list.
12038 In case the number of databases exceeds MAX_DBS_IN_EVENT_MTS maximum
12039 the list gathering breaks since it won't be sent to the slave.
12040 */
12041 for (TABLE_LIST *table= tables; table; table= table->next_global)
12042 {
12043 if (table->is_placeholder())
12044 continue;
12045
12046 assert(table->table);
12047
12048 if (table->table->file->referenced_by_foreign_key())
12049 {
12050 /*
12051 FK-referenced dbs can't be gathered currently. The following
12052 event will be marked for sequential execution on slave.
12053 */
12054 binlog_accessed_db_names= NULL;
12055 add_to_binlog_accessed_dbs("");
12056 break;
12057 }
12058 if (!is_current_stmt_binlog_format_row())
12059 add_to_binlog_accessed_dbs(table->db);
12060 }
12061 }
12062 DBUG_PRINT("info", ("decision: logging in %s format",
12063 is_current_stmt_binlog_format_row() ?
12064 "ROW" : "STATEMENT"));
12065
12066 if (variables.binlog_format == BINLOG_FORMAT_ROW &&
12067 (lex->sql_command == SQLCOM_UPDATE ||
12068 lex->sql_command == SQLCOM_UPDATE_MULTI ||
12069 lex->sql_command == SQLCOM_DELETE ||
12070 lex->sql_command == SQLCOM_DELETE_MULTI))
12071 {
12072 String table_names;
12073 /*
12074 Generate a warning for UPDATE/DELETE statements that modify a
12075 BLACKHOLE table, as row events are not logged in row format.
12076 */
12077 for (TABLE_LIST *table= tables; table; table= table->next_global)
12078 {
12079 if (table->is_placeholder())
12080 continue;
12081 if (table->table->file->ht->db_type == DB_TYPE_BLACKHOLE_DB &&
12082 table->lock_type >= TL_WRITE_ALLOW_WRITE)
12083 {
12084 table_names.append(table->table_name);
12085 table_names.append(",");
12086 }
12087 }
12088 if (!table_names.is_empty())
12089 {
12090 bool is_update= (lex->sql_command == SQLCOM_UPDATE ||
12091 lex->sql_command == SQLCOM_UPDATE_MULTI);
12092 /*
12093 Replace the last ',' with '.' for table_names
12094 */
12095 table_names.replace(table_names.length()-1, 1, ".", 1);
12096 push_warning_printf(this, Sql_condition::SL_WARNING,
12097 WARN_ON_BLOCKHOLE_IN_RBR,
12098 ER(WARN_ON_BLOCKHOLE_IN_RBR),
12099 is_update ? "UPDATE" : "DELETE",
12100 table_names.c_ptr());
12101 }
12102 }
12103 }
12104 else
12105 {
12106 DBUG_PRINT("info", ("decision: no logging since "
12107 "mysql_bin_log.is_open() = %d "
12108 "and (options & OPTION_BIN_LOG) = 0x%llx "
12109 "and binlog_format = %lu "
12110 "and binlog_filter->db_ok(db) = %d",
12111 mysql_bin_log.is_open(),
12112 (variables.option_bits & OPTION_BIN_LOG),
12113 variables.binlog_format,
12114 binlog_filter->db_ok(m_db.str)));
12115
12116 for (TABLE_LIST *table= tables; table; table= table->next_global)
12117 {
12118 if (!table->is_placeholder() && table->table->no_replicate &&
12119 gtid_state->warn_or_err_on_modify_gtid_table(this, table))
12120 break;
12121 }
12122 }
12123
12124 DEBUG_SYNC(current_thd, "end_decide_logging_format");
12125
12126 DBUG_RETURN(0);
12127 }
12128
12129
12130 /**
12131 Given that a possible violation of gtid consistency has happened,
12132 checks if gtid-inconsistencies are forbidden by the current value of
12133 ENFORCE_GTID_CONSISTENCY and GTID_MODE. If forbidden, generates
12134 error or warning accordingly.
12135
12136 @param thd The thread that has issued the GTID-violating statement.
12137
12138 @param error_code The error code to use, if error or warning is to
12139 be generated.
12140
12141 @retval false Error was generated.
12142 @retval true No error was generated (possibly a warning was generated).
12143 */
handle_gtid_consistency_violation(THD * thd,int error_code)12144 bool handle_gtid_consistency_violation(THD *thd, int error_code)
12145 {
12146 DBUG_ENTER("handle_gtid_consistency_violation");
12147
12148 enum_group_type gtid_next_type= thd->variables.gtid_next.type;
12149 global_sid_lock->rdlock();
12150 enum_gtid_consistency_mode gtid_consistency_mode=
12151 get_gtid_consistency_mode();
12152 enum_gtid_mode gtid_mode= get_gtid_mode(GTID_MODE_LOCK_SID);
12153
12154 DBUG_PRINT("info", ("gtid_next.type=%d gtid_mode=%s "
12155 "gtid_consistency_mode=%d error=%d query=%s",
12156 gtid_next_type,
12157 get_gtid_mode_string(gtid_mode),
12158 gtid_consistency_mode,
12159 error_code,
12160 thd->query().str));
12161
12162 /*
12163 GTID violations should generate error if:
12164 - GTID_MODE=ON or ON_PERMISSIVE and GTID_NEXT='AUTOMATIC' (since the
12165 transaction is expected to commit using a GTID), or
12166 - GTID_NEXT='UUID:NUMBER' (since the transaction is expected to
12167 commit usinga GTID), or
12168 - ENFORCE_GTID_CONSISTENCY=ON.
12169 */
12170 if ((gtid_next_type == AUTOMATIC_GROUP &&
12171 gtid_mode >= GTID_MODE_ON_PERMISSIVE) ||
12172 gtid_next_type == GTID_GROUP ||
12173 gtid_consistency_mode == GTID_CONSISTENCY_MODE_ON)
12174 {
12175 global_sid_lock->unlock();
12176 my_error(error_code, MYF(0));
12177 DBUG_RETURN(false);
12178 }
12179 else
12180 {
12181 /*
12182 If we are not generating an error, we must increase the counter
12183 of GTID-violating transactions. This will prevent a concurrent
12184 client from executing a SET GTID_MODE or SET
12185 ENFORCE_GTID_CONSISTENCY statement that would be incompatible
12186 with this transaction.
12187
12188 If the transaction had already been accounted as a gtid violating
12189 transaction, then don't increment the counters, just issue the
12190 warning below. This prevents calling
12191 begin_automatic_gtid_violating_transaction or
12192 begin_anonymous_gtid_violating_transaction multiple times for the
12193 same transaction, which would make the counter go out of sync.
12194 */
12195 if (!thd->has_gtid_consistency_violation)
12196 {
12197 if (gtid_next_type == AUTOMATIC_GROUP)
12198 gtid_state->begin_automatic_gtid_violating_transaction();
12199 else
12200 {
12201 assert(gtid_next_type == ANONYMOUS_GROUP);
12202 gtid_state->begin_anonymous_gtid_violating_transaction();
12203 }
12204
12205 /*
12206 If a transaction generates multiple GTID violation conditions,
12207 it must still only update the counters once. Hence we use
12208 this per-thread flag to keep track of whether the thread has a
12209 consistency or not. This function must only be called if the
12210 transaction does not already have a GTID violation.
12211 */
12212 thd->has_gtid_consistency_violation= true;
12213 }
12214
12215 global_sid_lock->unlock();
12216
12217 // Generate warning if ENFORCE_GTID_CONSISTENCY = WARN.
12218 if (gtid_consistency_mode == GTID_CONSISTENCY_MODE_WARN)
12219 {
12220 // Need to print to log so that replication admin knows when users
12221 // have adjusted their workloads.
12222 sql_print_warning("%s", ER(error_code));
12223 // Need to print to client so that users can adjust their workload.
12224 push_warning(thd, Sql_condition::SL_WARNING, error_code, ER(error_code));
12225 }
12226 DBUG_RETURN(true);
12227 }
12228 }
12229
12230
is_ddl_gtid_compatible()12231 bool THD::is_ddl_gtid_compatible()
12232 {
12233 DBUG_ENTER("THD::is_ddl_gtid_compatible");
12234
12235 // If @@session.sql_log_bin has been manually turned off (only
12236 // doable by SUPER), then no problem, we can execute any statement.
12237 if ((variables.option_bits & OPTION_BIN_LOG) == 0 ||
12238 mysql_bin_log.is_open() == false)
12239 DBUG_RETURN(true);
12240
12241 DBUG_PRINT("info",
12242 ("SQLCOM_CREATE:%d CREATE-TMP:%d SELECT:%d SQLCOM_DROP:%d DROP-TMP:%d trx:%d",
12243 lex->sql_command == SQLCOM_CREATE_TABLE,
12244 (lex->sql_command == SQLCOM_CREATE_TABLE &&
12245 (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE)),
12246 lex->select_lex->item_list.elements,
12247 lex->sql_command == SQLCOM_DROP_TABLE,
12248 (lex->sql_command == SQLCOM_DROP_TABLE && lex->drop_temporary),
12249 in_multi_stmt_transaction_mode()));
12250
12251 if (lex->sql_command == SQLCOM_CREATE_TABLE &&
12252 !(lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) &&
12253 lex->select_lex->item_list.elements)
12254 {
12255 /*
12256 CREATE ... SELECT (without TEMPORARY) is unsafe because if
12257 binlog_format=row it will be logged as a CREATE TABLE followed
12258 by row events, re-executed non-atomically as two transactions,
12259 and then written to the slave's binary log as two separate
12260 transactions with the same GTID.
12261 */
12262 bool ret= handle_gtid_consistency_violation(
12263 this, ER_GTID_UNSAFE_CREATE_SELECT);
12264 DBUG_RETURN(ret);
12265 }
12266 else if ((lex->sql_command == SQLCOM_CREATE_TABLE &&
12267 (lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) != 0))
12268 {
12269 /*
12270 In statement binary log format, CREATE TEMPORARY TABLE is unsafe
12271 to execute inside a transaction because the table will stay and the
12272 transaction will be written to the slave's binary log with the GTID even
12273 if the transaction is rolled back. This includes the execution inside
12274 functions and triggers.
12275 The same considerations apply for DROP TEMPORARY TABLE too, this is
12276 checked in mysql_rm_table instead.
12277 */
12278 if ((in_multi_stmt_transaction_mode() || in_sub_stmt)
12279 && variables.binlog_format == BINLOG_FORMAT_STMT)
12280 {
12281 bool ret= handle_gtid_consistency_violation(
12282 this, ER_GTID_UNSAFE_CREATE_DROP_TEMPORARY_TABLE_IN_TRANSACTION);
12283 DBUG_RETURN(ret);
12284 }
12285 }
12286 DBUG_RETURN(true);
12287 }
12288
12289
12290 bool
is_dml_gtid_compatible(bool some_transactional_table,bool some_non_transactional_table,bool non_transactional_tables_are_tmp)12291 THD::is_dml_gtid_compatible(bool some_transactional_table,
12292 bool some_non_transactional_table,
12293 bool non_transactional_tables_are_tmp)
12294 {
12295 DBUG_ENTER("THD::is_dml_gtid_compatible(bool, bool, bool)");
12296
12297 // If @@session.sql_log_bin has been manually turned off (only
12298 // doable by SUPER), then no problem, we can execute any statement.
12299 if ((variables.option_bits & OPTION_BIN_LOG) == 0 ||
12300 mysql_bin_log.is_open() == false)
12301 DBUG_RETURN(true);
12302
12303 /*
12304 Single non-transactional updates are allowed when not mixed
12305 together with transactional statements within a transaction.
12306 Furthermore, writing to transactional and non-transactional
12307 engines in a single statement is also disallowed.
12308 Multi-statement transactions on non-transactional tables are
12309 split into single-statement transactions when
12310 GTID_NEXT = "AUTOMATIC".
12311
12312 Non-transactional updates are allowed when row binlog format is
12313 used and all non-transactional tables are temporary.
12314
12315 The debug symbol "allow_gtid_unsafe_non_transactional_updates"
12316 disables the error. This is useful because it allows us to run
12317 old tests that were not written with the restrictions of GTIDs in
12318 mind.
12319 */
12320 DBUG_PRINT("info", ("some_non_transactional_table=%d "
12321 "some_transactional_table=%d "
12322 "trans_has_updated_trans_table=%d "
12323 "non_transactional_tables_are_tmp=%d "
12324 "is_current_stmt_binlog_format_row=%d",
12325 some_non_transactional_table,
12326 some_transactional_table,
12327 trans_has_updated_trans_table(this),
12328 non_transactional_tables_are_tmp,
12329 is_current_stmt_binlog_format_row()));
12330 if (some_non_transactional_table &&
12331 (some_transactional_table || trans_has_updated_trans_table(this)) &&
12332 !(non_transactional_tables_are_tmp &&
12333 is_current_stmt_binlog_format_row()) &&
12334 !DBUG_EVALUATE_IF("allow_gtid_unsafe_non_transactional_updates", 1, 0))
12335 {
12336 DBUG_RETURN(handle_gtid_consistency_violation(
12337 this, ER_GTID_UNSAFE_NON_TRANSACTIONAL_TABLE));
12338 }
12339
12340 DBUG_RETURN(true);
12341 }
12342
12343 /*
12344 Implementation of interface to write rows to the binary log through the
12345 thread. The thread is responsible for writing the rows it has
12346 inserted/updated/deleted.
12347 */
12348
12349 #ifndef MYSQL_CLIENT
12350
12351 /*
12352 Template member function for ensuring that there is an rows log
12353 event of the apropriate type before proceeding.
12354
12355 PRE CONDITION:
12356 - Events of type 'RowEventT' have the type code 'type_code'.
12357
12358 POST CONDITION:
12359 If a non-NULL pointer is returned, the pending event for thread 'thd' will
12360 be an event of type 'RowEventT' (which have the type code 'type_code')
12361 will either empty or have enough space to hold 'needed' bytes. In
12362 addition, the columns bitmap will be correct for the row, meaning that
12363 the pending event will be flushed if the columns in the event differ from
12364 the columns suppled to the function.
12365
12366 RETURNS
12367 If no error, a non-NULL pending event (either one which already existed or
12368 the newly created one).
12369 If error, NULL.
12370 */
12371
12372 template <class RowsEventT> Rows_log_event*
binlog_prepare_pending_rows_event(TABLE * table,uint32 serv_id,size_t needed,bool is_transactional,RowsEventT * hint MY_ATTRIBUTE ((unused)),const uchar * extra_row_info)12373 THD::binlog_prepare_pending_rows_event(TABLE* table, uint32 serv_id,
12374 size_t needed,
12375 bool is_transactional,
12376 RowsEventT *hint MY_ATTRIBUTE((unused)),
12377 const uchar* extra_row_info)
12378 {
12379 DBUG_ENTER("binlog_prepare_pending_rows_event");
12380
12381 /* Fetch the type code for the RowsEventT template parameter */
12382 int const general_type_code= RowsEventT::TYPE_CODE;
12383
12384 Rows_log_event* pending= binlog_get_pending_rows_event(is_transactional);
12385
12386 if (unlikely(pending && !pending->is_valid()))
12387 DBUG_RETURN(NULL);
12388
12389 /*
12390 Check if the current event is non-NULL and a write-rows
12391 event. Also check if the table provided is mapped: if it is not,
12392 then we have switched to writing to a new table.
12393 If there is no pending event, we need to create one. If there is a pending
12394 event, but it's not about the same table id, or not of the same type
12395 (between Write, Update and Delete), or not the same affected columns, or
12396 going to be too big, flush this event to disk and create a new pending
12397 event.
12398 */
12399 if (!pending ||
12400 pending->server_id != serv_id ||
12401 pending->get_table_id() != table->s->table_map_id ||
12402 pending->get_general_type_code() != general_type_code ||
12403 pending->get_data_size() + needed > opt_binlog_rows_event_max_size ||
12404 pending->read_write_bitmaps_cmp(table) == FALSE ||
12405 !binlog_row_event_extra_data_eq(pending->get_extra_row_data(),
12406 extra_row_info))
12407 {
12408 /* Create a new RowsEventT... */
12409 Rows_log_event* const
12410 ev= new RowsEventT(this, table, table->s->table_map_id,
12411 is_transactional, extra_row_info);
12412 if (unlikely(!ev))
12413 DBUG_RETURN(NULL);
12414 ev->server_id= serv_id; // I don't like this, it's too easy to forget.
12415 /*
12416 flush the pending event and replace it with the newly created
12417 event...
12418 */
12419 if (unlikely(
12420 mysql_bin_log.flush_and_set_pending_rows_event(this, ev,
12421 is_transactional)))
12422 {
12423 delete ev;
12424 DBUG_RETURN(NULL);
12425 }
12426
12427 DBUG_RETURN(ev); /* This is the new pending event */
12428 }
12429 DBUG_RETURN(pending); /* This is the current pending event */
12430 }
12431
12432 /* Declare in unnamed namespace. */
12433 namespace {
12434
12435 /**
12436 Class to handle temporary allocation of memory for row data.
12437
12438 The responsibilities of the class is to provide memory for
12439 packing one or two rows of packed data (depending on what
12440 constructor is called).
12441
12442 In order to make the allocation more efficient for "simple" rows,
12443 i.e., rows that do not contain any blobs, a pointer to the
12444 allocated memory is of memory is stored in the table structure
12445 for simple rows. If memory for a table containing a blob field
12446 is requested, only memory for that is allocated, and subsequently
12447 released when the object is destroyed.
12448
12449 */
12450 class Row_data_memory {
12451 public:
12452 /**
12453 Build an object to keep track of a block-local piece of memory
12454 for storing a row of data.
12455
12456 @param table
12457 Table where the pre-allocated memory is stored.
12458
12459 @param length
12460 Length of data that is needed, if the record contain blobs.
12461 */
Row_data_memory(TABLE * table,size_t const len1)12462 Row_data_memory(TABLE *table, size_t const len1)
12463 : m_memory(0)
12464 {
12465 #ifndef NDEBUG
12466 m_alloc_checked= FALSE;
12467 #endif
12468 allocate_memory(table, len1);
12469 m_ptr[0]= has_memory() ? m_memory : 0;
12470 m_ptr[1]= 0;
12471 }
12472
Row_data_memory(TABLE * table,size_t const len1,size_t const len2)12473 Row_data_memory(TABLE *table, size_t const len1, size_t const len2)
12474 : m_memory(0)
12475 {
12476 #ifndef NDEBUG
12477 m_alloc_checked= FALSE;
12478 #endif
12479 allocate_memory(table, len1 + len2);
12480 m_ptr[0]= has_memory() ? m_memory : 0;
12481 m_ptr[1]= has_memory() ? m_memory + len1 : 0;
12482 }
12483
~Row_data_memory()12484 ~Row_data_memory()
12485 {
12486 if (m_memory != 0 && m_release_memory_on_destruction)
12487 my_free(m_memory);
12488 }
12489
12490 /**
12491 Is there memory allocated?
12492
12493 @retval true There is memory allocated
12494 @retval false Memory allocation failed
12495 */
has_memory() const12496 bool has_memory() const {
12497 #ifndef NDEBUG
12498 m_alloc_checked= TRUE;
12499 #endif
12500 return m_memory != 0;
12501 }
12502
slot(uint s)12503 uchar *slot(uint s)
12504 {
12505 assert(s < sizeof(m_ptr)/sizeof(*m_ptr));
12506 assert(m_ptr[s] != 0);
12507 assert(m_alloc_checked == TRUE);
12508 return m_ptr[s];
12509 }
12510
12511 private:
allocate_memory(TABLE * const table,size_t const total_length)12512 void allocate_memory(TABLE *const table, size_t const total_length)
12513 {
12514 if (table->s->blob_fields == 0)
12515 {
12516 /*
12517 The maximum length of a packed record is less than this
12518 length. We use this value instead of the supplied length
12519 when allocating memory for records, since we don't know how
12520 the memory will be used in future allocations.
12521
12522 Since table->s->reclength is for unpacked records, we have
12523 to add two bytes for each field, which can potentially be
12524 added to hold the length of a packed field.
12525 */
12526 size_t const maxlen= table->s->reclength + 2 * table->s->fields;
12527
12528 /*
12529 Allocate memory for two records if memory hasn't been
12530 allocated. We allocate memory for two records so that it can
12531 be used when processing update rows as well.
12532 */
12533 if (table->write_row_record == 0)
12534 table->write_row_record=
12535 (uchar *) alloc_root(&table->mem_root, 2 * maxlen);
12536 m_memory= table->write_row_record;
12537 m_release_memory_on_destruction= FALSE;
12538 }
12539 else
12540 {
12541 m_memory= (uchar *) my_malloc(key_memory_Row_data_memory_memory,
12542 total_length, MYF(MY_WME));
12543 m_release_memory_on_destruction= TRUE;
12544 }
12545 }
12546
12547 #ifndef NDEBUG
12548 mutable bool m_alloc_checked;
12549 #endif
12550 bool m_release_memory_on_destruction;
12551 uchar *m_memory;
12552 uchar *m_ptr[2];
12553 };
12554
12555 } // namespace
12556
binlog_write_row(TABLE * table,bool is_trans,uchar const * record,const uchar * extra_row_info)12557 int THD::binlog_write_row(TABLE* table, bool is_trans,
12558 uchar const *record,
12559 const uchar* extra_row_info)
12560 {
12561 assert(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
12562
12563 /*
12564 Pack records into format for transfer. We are allocating more
12565 memory than needed, but that doesn't matter.
12566 */
12567 Row_data_memory memory(table, max_row_length(table, record));
12568 if (!memory.has_memory())
12569 return HA_ERR_OUT_OF_MEM;
12570
12571 uchar *row_data= memory.slot(0);
12572
12573 size_t const len= pack_row(table, table->write_set, row_data, record);
12574
12575 Rows_log_event* const ev=
12576 binlog_prepare_pending_rows_event(table, server_id, len, is_trans,
12577 static_cast<Write_rows_log_event*>(0),
12578 extra_row_info);
12579
12580 if (unlikely(ev == 0))
12581 return HA_ERR_OUT_OF_MEM;
12582
12583 return ev->add_row_data(row_data, len);
12584 }
12585
binlog_update_row(TABLE * table,bool is_trans,const uchar * before_record,const uchar * after_record,const uchar * extra_row_info)12586 int THD::binlog_update_row(TABLE* table, bool is_trans,
12587 const uchar *before_record,
12588 const uchar *after_record,
12589 const uchar* extra_row_info)
12590 {
12591 assert(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
12592 int error= 0;
12593
12594 /**
12595 Save a reference to the original read and write set bitmaps.
12596 We will need this to restore the bitmaps at the end.
12597 */
12598 MY_BITMAP *old_read_set= table->read_set;
12599 MY_BITMAP *old_write_set= table->write_set;
12600
12601 /**
12602 This will remove spurious fields required during execution but
12603 not needed for binlogging. This is done according to the:
12604 binlog-row-image option.
12605 */
12606 binlog_prepare_row_images(table);
12607
12608 size_t const before_maxlen = max_row_length(table, before_record);
12609 size_t const after_maxlen = max_row_length(table, after_record);
12610
12611 Row_data_memory row_data(table, before_maxlen, after_maxlen);
12612 if (!row_data.has_memory())
12613 return HA_ERR_OUT_OF_MEM;
12614
12615 uchar *before_row= row_data.slot(0);
12616 uchar *after_row= row_data.slot(1);
12617
12618 size_t const before_size= pack_row(table, table->read_set, before_row,
12619 before_record);
12620 size_t const after_size= pack_row(table, table->write_set, after_row,
12621 after_record);
12622
12623 DBUG_DUMP("before_record", before_record, table->s->reclength);
12624 DBUG_DUMP("after_record", after_record, table->s->reclength);
12625 DBUG_DUMP("before_row", before_row, before_size);
12626 DBUG_DUMP("after_row", after_row, after_size);
12627
12628 Rows_log_event* const ev=
12629 binlog_prepare_pending_rows_event(table, server_id,
12630 before_size + after_size, is_trans,
12631 static_cast<Update_rows_log_event*>(0),
12632 extra_row_info);
12633
12634 if (unlikely(ev == 0))
12635 return HA_ERR_OUT_OF_MEM;
12636
12637 error= ev->add_row_data(before_row, before_size) ||
12638 ev->add_row_data(after_row, after_size);
12639
12640 /* restore read/write set for the rest of execution */
12641 table->column_bitmaps_set_no_signal(old_read_set,
12642 old_write_set);
12643
12644 bitmap_clear_all(&table->tmp_set);
12645
12646 return error;
12647 }
12648
binlog_delete_row(TABLE * table,bool is_trans,uchar const * record,const uchar * extra_row_info)12649 int THD::binlog_delete_row(TABLE* table, bool is_trans,
12650 uchar const *record,
12651 const uchar* extra_row_info)
12652 {
12653 assert(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
12654 int error= 0;
12655
12656 /**
12657 Save a reference to the original read and write set bitmaps.
12658 We will need this to restore the bitmaps at the end.
12659 */
12660 MY_BITMAP *old_read_set= table->read_set;
12661 MY_BITMAP *old_write_set= table->write_set;
12662
12663 /**
12664 This will remove spurious fields required during execution but
12665 not needed for binlogging. This is done according to the:
12666 binlog-row-image option.
12667 */
12668 binlog_prepare_row_images(table);
12669
12670 /*
12671 Pack records into format for transfer. We are allocating more
12672 memory than needed, but that doesn't matter.
12673 */
12674 Row_data_memory memory(table, max_row_length(table, record));
12675 if (unlikely(!memory.has_memory()))
12676 return HA_ERR_OUT_OF_MEM;
12677
12678 uchar *row_data= memory.slot(0);
12679
12680 DBUG_DUMP("table->read_set", (uchar*) table->read_set->bitmap, (table->s->fields + 7) / 8);
12681 size_t const len= pack_row(table, table->read_set, row_data, record);
12682
12683 Rows_log_event* const ev=
12684 binlog_prepare_pending_rows_event(table, server_id, len, is_trans,
12685 static_cast<Delete_rows_log_event*>(0),
12686 extra_row_info);
12687
12688 if (unlikely(ev == 0))
12689 return HA_ERR_OUT_OF_MEM;
12690
12691 error= ev->add_row_data(row_data, len);
12692
12693 /* restore read/write set for the rest of execution */
12694 table->column_bitmaps_set_no_signal(old_read_set,
12695 old_write_set);
12696
12697 bitmap_clear_all(&table->tmp_set);
12698 return error;
12699 }
12700
binlog_prepare_row_images(TABLE * table)12701 void THD::binlog_prepare_row_images(TABLE *table)
12702 {
12703 DBUG_ENTER("THD::binlog_prepare_row_images");
12704 /**
12705 Remove from read_set spurious columns. The write_set has been
12706 handled before in table->mark_columns_needed_for_update.
12707 */
12708
12709 DBUG_PRINT_BITSET("debug", "table->read_set (before preparing): %s", table->read_set);
12710 THD *thd= table->in_use;
12711
12712 /**
12713 if there is a primary key in the table (ie, user declared PK or a
12714 non-null unique index) and we dont want to ship the entire image,
12715 and the handler involved supports this.
12716 */
12717 if (table->s->primary_key < MAX_KEY &&
12718 (thd->variables.binlog_row_image < BINLOG_ROW_IMAGE_FULL) &&
12719 !ha_check_storage_engine_flag(table->s->db_type(), HTON_NO_BINLOG_ROW_OPT))
12720 {
12721 /**
12722 Just to be sure that tmp_set is currently not in use as
12723 the read_set already.
12724 */
12725 assert(table->read_set != &table->tmp_set);
12726 // Verify it's not used
12727 assert(bitmap_is_clear_all(&table->tmp_set));
12728
12729 switch(thd->variables.binlog_row_image)
12730 {
12731 case BINLOG_ROW_IMAGE_MINIMAL:
12732 /* MINIMAL: Mark only PK */
12733 table->mark_columns_used_by_index_no_reset(table->s->primary_key,
12734 &table->tmp_set);
12735 break;
12736 case BINLOG_ROW_IMAGE_NOBLOB:
12737 /**
12738 NOBLOB: Remove unnecessary BLOB fields from read_set
12739 (the ones that are not part of PK).
12740 */
12741 bitmap_union(&table->tmp_set, table->read_set);
12742 for (Field **ptr=table->field ; *ptr ; ptr++)
12743 {
12744 Field *field= (*ptr);
12745 if ((field->type() == MYSQL_TYPE_BLOB) &&
12746 !(field->flags & PRI_KEY_FLAG))
12747 bitmap_clear_bit(&table->tmp_set, field->field_index);
12748 }
12749 break;
12750 default:
12751 assert(0); // impossible.
12752 }
12753
12754 /* set the temporary read_set */
12755 table->column_bitmaps_set_no_signal(&table->tmp_set,
12756 table->write_set);
12757 }
12758
12759 DBUG_PRINT_BITSET("debug", "table->read_set (after preparing): %s", table->read_set);
12760 DBUG_VOID_RETURN;
12761 }
12762
12763
binlog_flush_pending_rows_event(bool stmt_end,bool is_transactional)12764 int THD::binlog_flush_pending_rows_event(bool stmt_end, bool is_transactional)
12765 {
12766 DBUG_ENTER("THD::binlog_flush_pending_rows_event");
12767 /*
12768 We shall flush the pending event even if we are not in row-based
12769 mode: it might be the case that we left row-based mode before
12770 flushing anything (e.g., if we have explicitly locked tables).
12771 */
12772 if (!mysql_bin_log.is_open())
12773 DBUG_RETURN(0);
12774
12775 /*
12776 Mark the event as the last event of a statement if the stmt_end
12777 flag is set.
12778 */
12779 int error= 0;
12780 if (Rows_log_event *pending= binlog_get_pending_rows_event(is_transactional))
12781 {
12782 if (stmt_end)
12783 {
12784 pending->set_flags(Rows_log_event::STMT_END_F);
12785 binlog_table_maps= 0;
12786 }
12787
12788 error= mysql_bin_log.flush_and_set_pending_rows_event(this, 0,
12789 is_transactional);
12790 }
12791
12792 DBUG_RETURN(error);
12793 }
12794
12795
12796 /**
12797 binlog_row_event_extra_data_eq
12798
12799 Comparator for two binlog row event extra data
12800 pointers.
12801
12802 It compares their significant bytes.
12803
12804 Null pointers are acceptable
12805
12806 @param a
12807 first pointer
12808
12809 @param b
12810 first pointer
12811
12812 @return
12813 true if the referenced structures are equal
12814 */
12815 bool
binlog_row_event_extra_data_eq(const uchar * a,const uchar * b)12816 THD::binlog_row_event_extra_data_eq(const uchar* a,
12817 const uchar* b)
12818 {
12819 return ((a == b) ||
12820 ((a != NULL) &&
12821 (b != NULL) &&
12822 (a[EXTRA_ROW_INFO_LEN_OFFSET] ==
12823 b[EXTRA_ROW_INFO_LEN_OFFSET]) &&
12824 (memcmp(a, b,
12825 a[EXTRA_ROW_INFO_LEN_OFFSET]) == 0)));
12826 }
12827
12828 #if !defined(NDEBUG)
12829 static const char *
show_query_type(THD::enum_binlog_query_type qtype)12830 show_query_type(THD::enum_binlog_query_type qtype)
12831 {
12832 switch (qtype) {
12833 case THD::ROW_QUERY_TYPE:
12834 return "ROW";
12835 case THD::STMT_QUERY_TYPE:
12836 return "STMT";
12837 case THD::QUERY_TYPE_COUNT:
12838 default:
12839 assert(0 <= qtype && qtype < THD::QUERY_TYPE_COUNT);
12840 }
12841 static char buf[64];
12842 sprintf(buf, "UNKNOWN#%d", qtype);
12843 return buf;
12844 }
12845 #endif
12846
12847 /**
12848 Auxiliary function to reset the limit unsafety warning suppression.
12849 */
reset_binlog_unsafe_suppression()12850 static void reset_binlog_unsafe_suppression()
12851 {
12852 DBUG_ENTER("reset_binlog_unsafe_suppression");
12853 unsafe_warning_suppression_is_activated= false;
12854 limit_unsafe_warning_count= 0;
12855 limit_unsafe_suppression_start_time= my_getsystime()/10000000;
12856 DBUG_VOID_RETURN;
12857 }
12858
12859 /**
12860 Auxiliary function to print warning in the error log.
12861 */
print_unsafe_warning_to_log(int unsafe_type,char * buf,const char * query)12862 static void print_unsafe_warning_to_log(int unsafe_type, char* buf,
12863 const char* query)
12864 {
12865 DBUG_ENTER("print_unsafe_warning_in_log");
12866 sprintf(buf, ER(ER_BINLOG_UNSAFE_STATEMENT),
12867 ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
12868 sql_print_warning(ER(ER_MESSAGE_AND_STATEMENT), buf, query);
12869 DBUG_VOID_RETURN;
12870 }
12871
12872 /**
12873 Auxiliary function to check if the warning for limit unsafety should be
12874 thrown or suppressed. Details of the implementation can be found in the
12875 comments inline.
12876
12877 @params
12878 buf - buffer to hold the warning message text
12879 unsafe_type - The type of unsafety.
12880 query - The actual query statement.
12881
12882 TODO: Remove this function and implement a general service for all warnings
12883 that would prevent flooding the error log. => switch to log_throttle class?
12884 */
do_unsafe_limit_checkout(char * buf,int unsafe_type,const char * query)12885 static void do_unsafe_limit_checkout(char* buf, int unsafe_type, const char* query)
12886 {
12887 ulonglong now;
12888 DBUG_ENTER("do_unsafe_limit_checkout");
12889 assert(unsafe_type == LEX::BINLOG_STMT_UNSAFE_LIMIT);
12890 limit_unsafe_warning_count++;
12891 /*
12892 INITIALIZING:
12893 If this is the first time this function is called with log warning
12894 enabled, the monitoring the unsafe warnings should start.
12895 */
12896 if (limit_unsafe_suppression_start_time == 0)
12897 {
12898 limit_unsafe_suppression_start_time= my_getsystime()/10000000;
12899 print_unsafe_warning_to_log(unsafe_type, buf, query);
12900 }
12901 else
12902 {
12903 if (!unsafe_warning_suppression_is_activated)
12904 print_unsafe_warning_to_log(unsafe_type, buf, query);
12905
12906 if (limit_unsafe_warning_count >=
12907 LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT)
12908 {
12909 now= my_getsystime()/10000000;
12910 if (!unsafe_warning_suppression_is_activated)
12911 {
12912 /*
12913 ACTIVATION:
12914 We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT warnings in
12915 less than LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT we activate the
12916 suppression.
12917 */
12918 if ((now-limit_unsafe_suppression_start_time) <=
12919 LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT)
12920 {
12921 unsafe_warning_suppression_is_activated= true;
12922 DBUG_PRINT("info",("A warning flood has been detected and the limit \
12923 unsafety warning suppression has been activated."));
12924 }
12925 else
12926 {
12927 /*
12928 there is no flooding till now, therefore we restart the monitoring
12929 */
12930 limit_unsafe_suppression_start_time= my_getsystime()/10000000;
12931 limit_unsafe_warning_count= 0;
12932 }
12933 }
12934 else
12935 {
12936 /*
12937 Print the suppression note and the unsafe warning.
12938 */
12939 sql_print_information("The following warning was suppressed %d times \
12940 during the last %d seconds in the error log",
12941 limit_unsafe_warning_count,
12942 (int)
12943 (now-limit_unsafe_suppression_start_time));
12944 print_unsafe_warning_to_log(unsafe_type, buf, query);
12945 /*
12946 DEACTIVATION: We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT
12947 warnings in more than LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT, the
12948 suppression should be deactivated.
12949 */
12950 if ((now - limit_unsafe_suppression_start_time) >
12951 LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT)
12952 {
12953 reset_binlog_unsafe_suppression();
12954 DBUG_PRINT("info",("The limit unsafety warning supression has been \
12955 deactivated"));
12956 }
12957 }
12958 limit_unsafe_warning_count= 0;
12959 }
12960 }
12961 DBUG_VOID_RETURN;
12962 }
12963
12964 /**
12965 Auxiliary method used by @c binlog_query() to raise warnings.
12966
12967 The type of warning and the type of unsafeness is stored in
12968 THD::binlog_unsafe_warning_flags.
12969 */
issue_unsafe_warnings()12970 void THD::issue_unsafe_warnings()
12971 {
12972 char buf[MYSQL_ERRMSG_SIZE * 2];
12973 DBUG_ENTER("issue_unsafe_warnings");
12974 /*
12975 Ensure that binlog_unsafe_warning_flags is big enough to hold all
12976 bits. This is actually a constant expression.
12977 */
12978 assert(LEX::BINLOG_STMT_UNSAFE_COUNT <=
12979 sizeof(binlog_unsafe_warning_flags) * CHAR_BIT);
12980
12981 uint32 unsafe_type_flags= binlog_unsafe_warning_flags;
12982
12983 if ((unsafe_type_flags & (1U << LEX::BINLOG_STMT_UNSAFE_LIMIT)) != 0)
12984 {
12985 if ((lex->sql_command == SQLCOM_DELETE
12986 || lex->sql_command == SQLCOM_UPDATE) &&
12987 lex->select_lex->select_limit)
12988 {
12989 ORDER *order= (ORDER *) ((lex->select_lex->order_list.elements) ?
12990 lex->select_lex->order_list.first : NULL);
12991 if ((lex->select_lex->select_limit &&
12992 lex->select_lex->select_limit->fixed &&
12993 lex->select_lex->select_limit->val_int() == 0) ||
12994 is_order_deterministic(lex->query_tables,
12995 lex->select_lex->where_cond(), order))
12996 {
12997 unsafe_type_flags&= ~(1U << LEX::BINLOG_STMT_UNSAFE_LIMIT);
12998 }
12999 }
13000 if ((lex->sql_command == SQLCOM_INSERT_SELECT ||
13001 lex->sql_command == SQLCOM_REPLACE_SELECT) &&
13002 order_deterministic)
13003 {
13004 unsafe_type_flags&= ~(1U << LEX::BINLOG_STMT_UNSAFE_LIMIT);
13005 }
13006
13007 }
13008
13009 /*
13010 For each unsafe_type, check if the statement is unsafe in this way
13011 and issue a warning.
13012 */
13013 for (int unsafe_type=0;
13014 unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT;
13015 unsafe_type++)
13016 {
13017 if ((unsafe_type_flags & (1 << unsafe_type)) != 0)
13018 {
13019 push_warning_printf(this, Sql_condition::SL_NOTE,
13020 ER_BINLOG_UNSAFE_STATEMENT,
13021 ER(ER_BINLOG_UNSAFE_STATEMENT),
13022 ER(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
13023 if (log_error_verbosity > 1 && opt_log_unsafe_statements)
13024 {
13025 if (unsafe_type == LEX::BINLOG_STMT_UNSAFE_LIMIT)
13026 do_unsafe_limit_checkout( buf, unsafe_type, query().str);
13027 else //cases other than LIMIT unsafety
13028 print_unsafe_warning_to_log(unsafe_type, buf, query().str);
13029 }
13030 }
13031 }
13032 DBUG_VOID_RETURN;
13033 }
13034
13035 /**
13036 Log the current query.
13037
13038 The query will be logged in either row format or statement format
13039 depending on the value of @c current_stmt_binlog_format_row field and
13040 the value of the @c qtype parameter.
13041
13042 This function must be called:
13043
13044 - After the all calls to ha_*_row() functions have been issued.
13045
13046 - After any writes to system tables. Rationale: if system tables
13047 were written after a call to this function, and the master crashes
13048 after the call to this function and before writing the system
13049 tables, then the master and slave get out of sync.
13050
13051 - Before tables are unlocked and closed.
13052
13053 @see decide_logging_format
13054
13055 @retval 0 Success
13056
13057 @retval nonzero If there is a failure when writing the query (e.g.,
13058 write failure), then the error code is returned.
13059 */
binlog_query(THD::enum_binlog_query_type qtype,const char * query_arg,size_t query_len,bool is_trans,bool direct,bool suppress_use,int errcode)13060 int THD::binlog_query(THD::enum_binlog_query_type qtype, const char *query_arg,
13061 size_t query_len, bool is_trans, bool direct,
13062 bool suppress_use, int errcode)
13063 {
13064 DBUG_ENTER("THD::binlog_query");
13065 DBUG_PRINT("enter", ("qtype: %s query: '%s'",
13066 show_query_type(qtype), query_arg));
13067 assert(query_arg && mysql_bin_log.is_open());
13068
13069 if (get_binlog_local_stmt_filter() == BINLOG_FILTER_SET)
13070 {
13071 /*
13072 The current statement is to be ignored, and not written to
13073 the binlog. Do not call issue_unsafe_warnings().
13074 */
13075 DBUG_RETURN(0);
13076 }
13077
13078 /*
13079 If we are not in prelocked mode, mysql_unlock_tables() will be
13080 called after this binlog_query(), so we have to flush the pending
13081 rows event with the STMT_END_F set to unlock all tables at the
13082 slave side as well.
13083
13084 If we are in prelocked mode, the flushing will be done inside the
13085 top-most close_thread_tables().
13086 */
13087 if (this->locked_tables_mode <= LTM_LOCK_TABLES)
13088 if (int error= binlog_flush_pending_rows_event(TRUE, is_trans))
13089 DBUG_RETURN(error);
13090
13091 /*
13092 Warnings for unsafe statements logged in statement format are
13093 printed in three places instead of in decide_logging_format().
13094 This is because the warnings should be printed only if the statement
13095 is actually logged. When executing decide_logging_format(), we cannot
13096 know for sure if the statement will be logged:
13097
13098 1 - sp_head::execute_procedure which prints out warnings for calls to
13099 stored procedures.
13100
13101 2 - sp_head::execute_function which prints out warnings for calls
13102 involving functions.
13103
13104 3 - THD::binlog_query (here) which prints warning for top level
13105 statements not covered by the two cases above: i.e., if not insided a
13106 procedure and a function.
13107
13108 Besides, we should not try to print these warnings if it is not
13109 possible to write statements to the binary log as it happens when
13110 the execution is inside a function, or generaly speaking, when
13111 the variables.option_bits & OPTION_BIN_LOG is false.
13112 */
13113 if ((variables.option_bits & OPTION_BIN_LOG) &&
13114 sp_runtime_ctx == NULL && !binlog_evt_union.do_union)
13115 {
13116 issue_unsafe_warnings();
13117 order_deterministic= true;
13118 }
13119
13120 switch (qtype) {
13121 /*
13122 ROW_QUERY_TYPE means that the statement may be logged either in
13123 row format or in statement format. If
13124 current_stmt_binlog_format is row, it means that the
13125 statement has already been logged in row format and hence shall
13126 not be logged again.
13127 */
13128 case THD::ROW_QUERY_TYPE:
13129 DBUG_PRINT("debug",
13130 ("is_current_stmt_binlog_format_row: %d",
13131 is_current_stmt_binlog_format_row()));
13132 if (is_current_stmt_binlog_format_row())
13133 DBUG_RETURN(0);
13134 /* Fall through */
13135
13136 /*
13137 STMT_QUERY_TYPE means that the query must be logged in statement
13138 format; it cannot be logged in row format. This is typically
13139 used by DDL statements. It is an error to use this query type
13140 if current_stmt_binlog_format_row is row.
13141
13142 @todo Currently there are places that call this method with
13143 STMT_QUERY_TYPE and current_stmt_binlog_format is row. Fix those
13144 places and add assert to ensure correct behavior. /Sven
13145 */
13146 case THD::STMT_QUERY_TYPE:
13147 /*
13148 The MYSQL_BIN_LOG::write() function will set the STMT_END_F flag and
13149 flush the pending rows event if necessary.
13150 */
13151 {
13152 Query_log_event qinfo(this, query_arg, query_len, is_trans, direct,
13153 suppress_use, errcode);
13154 /*
13155 Binlog table maps will be irrelevant after a Query_log_event
13156 (they are just removed on the slave side) so after the query
13157 log event is written to the binary log, we pretend that no
13158 table maps were written.
13159 */
13160 int error= mysql_bin_log.write_event(&qinfo);
13161 binlog_table_maps= 0;
13162 DBUG_RETURN(error);
13163 }
13164 break;
13165
13166 case THD::QUERY_TYPE_COUNT:
13167 default:
13168 assert(0 <= qtype && qtype < QUERY_TYPE_COUNT);
13169 }
13170 DBUG_RETURN(0);
13171 }
13172
13173 #endif /* !defined(MYSQL_CLIENT) */
13174
get_cache_mngr(THD * thd)13175 static const binlog_cache_mngr *get_cache_mngr(THD *thd)
13176 {
13177 const binlog_cache_mngr *cache_mngr
13178 = (thd && opt_bin_log)
13179 ? static_cast<binlog_cache_mngr *>(thd_get_ha_data(thd, binlog_hton))
13180 : NULL;
13181
13182 return cache_mngr;
13183 }
13184
show_binlog_vars(THD * thd,SHOW_VAR * var,char * buff)13185 static int show_binlog_vars(THD *thd, SHOW_VAR *var, char *buff)
13186 {
13187 mysql_mutex_assert_owner(&LOCK_status);
13188
13189 const binlog_cache_mngr *cache_mngr= get_cache_mngr(thd);
13190
13191 if (cache_mngr && cache_mngr->has_consistent_snapshot())
13192 {
13193 set_binlog_snapshot_file(cache_mngr->binlog_info.log_file_name);
13194 binlog_snapshot_position= cache_mngr->binlog_info.pos;
13195 }
13196 else if (mysql_bin_log.is_open())
13197 {
13198 set_binlog_snapshot_file(binlog_global_snapshot_file);
13199 binlog_snapshot_position= binlog_global_snapshot_position;
13200 }
13201 else
13202 {
13203 binlog_snapshot_file[0]= '\0';
13204 binlog_snapshot_position= 0;
13205 }
13206
13207 var->type= SHOW_ARRAY;
13208 var->value= (char *)&binlog_status_vars_detail;
13209 return 0;
13210 }
13211
show_binlog_snapshot_gtid_executed(THD * thd,SHOW_VAR * var,char * buff)13212 static int show_binlog_snapshot_gtid_executed(THD *thd, SHOW_VAR *var,
13213 char *buff)
13214 {
13215 mysql_mutex_assert_owner(&LOCK_status);
13216
13217 const binlog_cache_mngr *cache_mngr= get_cache_mngr(thd);
13218
13219 if (cache_mngr && cache_mngr->has_consistent_snapshot())
13220 {
13221 binlog_snapshot_gtid_executed= cache_mngr->snapshot_gtid_executed;
13222 }
13223 else if (mysql_bin_log.is_open())
13224 {
13225 binlog_snapshot_gtid_executed= "not-in-consistent-snapshot";
13226 }
13227 else
13228 {
13229 binlog_snapshot_gtid_executed.clear();
13230 }
13231
13232 var->type= SHOW_CHAR;
13233 var->value= const_cast<char *>(binlog_snapshot_gtid_executed.c_str());
13234 return 0;
13235 }
13236
13237 static SHOW_VAR binlog_status_vars_top[]= {
13238 {"Binlog", (char *)&show_binlog_vars, SHOW_FUNC, SHOW_SCOPE_GLOBAL},
13239 {"Binlog_snapshot_gtid_executed",
13240 (char *)&show_binlog_snapshot_gtid_executed, SHOW_FUNC,
13241 SHOW_SCOPE_GLOBAL},
13242 {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}};
13243
13244 struct st_mysql_storage_engine binlog_storage_engine=
13245 { MYSQL_HANDLERTON_INTERFACE_VERSION };
13246
13247 /** @} */
13248
mysql_declare_plugin(binlog)13249 mysql_declare_plugin(binlog)
13250 {
13251 MYSQL_STORAGE_ENGINE_PLUGIN,
13252 &binlog_storage_engine,
13253 "binlog",
13254 "MySQL AB",
13255 "This is a pseudo storage engine to represent the binlog in a transaction",
13256 PLUGIN_LICENSE_GPL,
13257 binlog_init, /* Plugin Init */
13258 binlog_deinit, /* Plugin Deinit */
13259 0x0100 /* 1.0 */,
13260 binlog_status_vars_top, /* status variables */
13261 NULL, /* system variables */
13262 NULL, /* config options */
13263 0,
13264 }
13265 mysql_declare_plugin_end;
13266