1 /* Copyright (c) 2009, 2020, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 #include "sql/binlog.h"
24
25 #include "my_config.h"
26
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32
33 #include "lex_string.h"
34 #include "map_helpers.h"
35 #include "my_alloc.h"
36 #include "my_loglevel.h"
37 #include "my_macros.h"
38 #include "my_systime.h"
39 #include "my_thread.h"
40 #include "sql/check_stack.h"
41 #include "sql/clone_handler.h"
42 #include "sql_string.h"
43 #include "template_utils.h"
44 #ifdef HAVE_UNISTD_H
45 #include <unistd.h>
46 #endif
47 #include <algorithm>
48 #include <list>
49 #include <map>
50 #include <new>
51 #include <queue>
52 #include <sstream>
53 #include <string>
54
55 #include "dur_prop.h"
56 #include "libbinlogevents/include/compression/base.h"
57 #include "libbinlogevents/include/compression/iterator.h"
58 #include "libbinlogevents/include/control_events.h"
59 #include "libbinlogevents/include/debug_vars.h"
60 #include "libbinlogevents/include/rows_event.h"
61 #include "libbinlogevents/include/statement_events.h"
62 #include "libbinlogevents/include/table_id.h"
63 #include "mf_wcomp.h" // wild_one, wild_many
64 #include "mutex_lock.h" // Mutex_lock
65 #include "my_base.h"
66 #include "my_bitmap.h"
67 #include "my_byteorder.h"
68 #include "my_compiler.h"
69 #include "my_dbug.h"
70 #include "my_dir.h"
71 #include "my_sqlcommand.h"
72 #include "my_stacktrace.h" // my_safe_print_system_time
73 #include "my_thread_local.h"
74 #include "mysql/components/services/log_builtins.h"
75 #include "mysql/plugin.h"
76 #include "mysql/psi/mysql_file.h"
77 #include "mysql/service_mysql_alloc.h"
78 #include "mysql/thread_type.h"
79 #include "mysqld_error.h"
80 #include "partition_info.h"
81 #include "prealloced_array.h"
82 #include "sql/binlog/global.h"
83 #include "sql/binlog/tools/iterators.h"
84 #include "sql/binlog_ostream.h"
85 #include "sql/binlog_reader.h"
86 #include "sql/create_field.h"
87 #include "sql/current_thd.h"
88 #include "sql/debug_sync.h" // DEBUG_SYNC
89 #include "sql/derror.h" // ER_THD
90 #include "sql/discrete_interval.h"
91 #include "sql/field.h"
92 #include "sql/handler.h"
93 #include "sql/item_func.h" // user_var_entry
94 #include "sql/key.h"
95 #include "sql/log.h"
96 #include "sql/log_event.h" // Rows_log_event
97 #include "sql/mysqld.h" // sync_binlog_period ...
98 #include "sql/mysqld_thd_manager.h" // Global_THD_manager
99 #include "sql/protocol.h"
100 #include "sql/psi_memory_key.h"
101 #include "sql/query_options.h"
102 #include "sql/rpl_filter.h"
103 #include "sql/rpl_gtid.h"
104 #include "sql/rpl_handler.h" // RUN_HOOK
105 #include "sql/rpl_mi.h" // Master_info
106 #include "sql/rpl_record.h"
107 #include "sql/rpl_rli.h" // Relay_log_info
108 #include "sql/rpl_rli_pdb.h" // Slave_worker
109 #include "sql/rpl_slave.h"
110 #include "sql/rpl_slave_commit_order_manager.h" // Commit_order_manager
111 #include "sql/rpl_transaction_ctx.h"
112 #include "sql/rpl_trx_boundary_parser.h" // Transaction_boundary_parser
113 #include "sql/rpl_utility.h"
114 #include "sql/sql_backup_lock.h" // is_instance_backup_locked
115 #include "sql/sql_base.h" // find_temporary_table
116 #include "sql/sql_bitmap.h"
117 #include "sql/sql_class.h" // THD
118 #include "sql/sql_const.h"
119 #include "sql/sql_data_change.h"
120 #include "sql/sql_error.h"
121 #include "sql/sql_lex.h"
122 #include "sql/sql_list.h"
123 #include "sql/sql_parse.h" // sqlcom_can_generate_row_events
124 #include "sql/sql_show.h" // append_identifier
125 #include "sql/system_variables.h"
126 #include "sql/table.h"
127 #include "sql/transaction_info.h"
128 #include "sql/xa.h"
129 #include "sql_partition.h"
130 #include "thr_lock.h"
131
132 class Item;
133
134 using binary_log::checksum_crc32;
135 using std::list;
136 using std::max;
137 using std::min;
138 using std::string;
139
140 #define FLAGSTR(V, F) ((V) & (F) ? #F " " : "")
141 #define YESNO(X) ((X) ? "yes" : "no")
142
143 /**
144 @defgroup Binary_Log Binary Log
145 @{
146 */
147
148 #define MY_OFF_T_UNDEF (~(my_off_t)0UL)
149
150 /*
151 Constants required for the limit unsafe warnings suppression
152 */
153 // seconds after which the limit unsafe warnings suppression will be activated
154 #define LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT 50
155 // number of limit unsafe warnings after which the suppression will be activated
156 #define LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT 50
157
158 static ulonglong limit_unsafe_suppression_start_time = 0;
159 static bool unsafe_warning_suppression_is_activated = false;
160 static int limit_unsafe_warning_count = 0;
161
162 static handlerton *binlog_hton;
163 bool opt_binlog_order_commits = true;
164
165 const char *log_bin_index = nullptr;
166 const char *log_bin_basename = nullptr;
167
168 /* Size for IO_CACHE buffer for binlog & relay log */
169 ulong rpl_read_size;
170
171 MYSQL_BIN_LOG mysql_bin_log(&sync_binlog_period);
172
173 static int binlog_init(void *p);
174 static int binlog_start_trans_and_stmt(THD *thd, Log_event *start_event);
175 static int binlog_close_connection(handlerton *hton, THD *thd);
176 static int binlog_savepoint_set(handlerton *hton, THD *thd, void *sv);
177 static int binlog_savepoint_rollback(handlerton *hton, THD *thd, void *sv);
178 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *hton,
179 THD *thd);
180 static int binlog_commit(handlerton *hton, THD *thd, bool all);
181 static int binlog_rollback(handlerton *hton, THD *thd, bool all);
182 static int binlog_prepare(handlerton *hton, THD *thd, bool all);
183 static xa_status_code binlog_xa_commit(handlerton *hton, XID *xid);
184 static xa_status_code binlog_xa_rollback(handlerton *hton, XID *xid);
185 static void exec_binlog_error_action_abort(const char *err_string);
186 static bool binlog_recover(Binlog_file_reader *binlog_file_reader,
187 my_off_t *valid_pos);
188 static void binlog_prepare_row_images(const THD *thd, TABLE *table);
189 static bool is_loggable_xa_prepare(THD *thd);
190
normalize_binlog_name(char * to,const char * from,bool is_relay_log)191 bool normalize_binlog_name(char *to, const char *from, bool is_relay_log) {
192 DBUG_TRACE;
193 bool error = false;
194 char buff[FN_REFLEN];
195 char *ptr = const_cast<char *>(from);
196 char *opt_name = is_relay_log ? opt_relay_logname : opt_bin_logname;
197
198 DBUG_ASSERT(from);
199
200 /* opt_name is not null and not empty and from is a relative path */
201 if (opt_name && opt_name[0] && from && !test_if_hard_path(from)) {
202 // take the path from opt_name
203 // take the filename from from
204 char log_dirpart[FN_REFLEN], log_dirname[FN_REFLEN];
205 size_t log_dirpart_len, log_dirname_len;
206 dirname_part(log_dirpart, opt_name, &log_dirpart_len);
207 dirname_part(log_dirname, from, &log_dirname_len);
208
209 /* log may be empty => relay-log or log-bin did not
210 hold paths, just filename pattern */
211 if (log_dirpart_len > 0) {
212 /* create the new path name */
213 if (fn_format(buff, from + log_dirname_len, log_dirpart, "",
214 MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH)) == nullptr) {
215 error = true;
216 goto end;
217 }
218
219 ptr = buff;
220 }
221 }
222
223 DBUG_ASSERT(ptr);
224 if (ptr) {
225 size_t length = strlen(ptr);
226
227 // Strips the CR+LF at the end of log name and \0-terminates it.
228 if (length && ptr[length - 1] == '\n') {
229 ptr[length - 1] = 0;
230 length--;
231 if (length && ptr[length - 1] == '\r') {
232 ptr[length - 1] = 0;
233 length--;
234 }
235 }
236 if (!length) {
237 error = true;
238 goto end;
239 }
240 strmake(to, ptr, length);
241 }
242 end:
243 return error;
244 }
245
246 /**
247 Logical binlog file which wraps and hides the detail of lower layer storage
248 implementation. Binlog code just use this class to control real storage
249 */
250 class MYSQL_BIN_LOG::Binlog_ofile : public Basic_ostream {
251 public:
~Binlog_ofile()252 ~Binlog_ofile() override {
253 DBUG_TRACE;
254 close();
255 return;
256 }
257
258 /**
259 Opens the binlog file. It opens the lower layer storage.
260
261 @param[in] log_file_key The PSI_file_key for this stream
262 @param[in] binlog_name The file to be opened
263 @param[in] flags The flags used by IO_CACHE.
264 @param[in] existing True if opening the file, false if creating a new one.
265
266 @retval false Success
267 @retval true Error
268 */
open(PSI_file_key log_file_key,const char * binlog_name,myf flags,bool existing=false)269 bool open(
270 #ifdef HAVE_PSI_INTERFACE
271 PSI_file_key log_file_key,
272 #endif
273 const char *binlog_name, myf flags, bool existing = false) {
274 DBUG_TRACE;
275 DBUG_ASSERT(m_pipeline_head == nullptr);
276
277 #ifndef DBUG_OFF
278 {
279 #ifndef HAVE_PSI_INTERFACE
280 PSI_file_key log_file_key = PSI_NOT_INSTRUMENTED;
281 #endif
282 MY_STAT info;
283 if (!mysql_file_stat(log_file_key, binlog_name, &info, MYF(0))) {
284 DBUG_ASSERT(existing == !(my_errno() == ENOENT));
285 set_my_errno(0);
286 }
287 }
288 #endif
289
290 std::unique_ptr<IO_CACHE_ostream> file_ostream(new IO_CACHE_ostream);
291 if (file_ostream->open(log_file_key, binlog_name, flags)) return true;
292
293 m_pipeline_head = std::move(file_ostream);
294
295 /* Setup encryption for new files if needed */
296 if (!existing && rpl_encryption.is_enabled()) {
297 std::unique_ptr<Binlog_encryption_ostream> encrypted_ostream(
298 new Binlog_encryption_ostream());
299 if (encrypted_ostream->open(std::move(m_pipeline_head))) return true;
300 m_encrypted_header_size = encrypted_ostream->get_header_size();
301 m_pipeline_head = std::move(encrypted_ostream);
302 }
303
304 return false;
305 }
306
307 /**
308 Opens an existing binlog file. It opens the lower layer storage reusing the
309 existing file password if needed.
310
311 @param[in] log_file_key The PSI_file_key for this stream
312 @param[in] binlog_name The file to be opened
313 @param[in] flags The flags used by IO_CACHE.
314
315 @retval std::unique_ptr A Binlog_ofile object pointer.
316 @retval nullptr Error.
317 */
open_existing(PSI_file_key log_file_key,const char * binlog_name,myf flags)318 static std::unique_ptr<Binlog_ofile> open_existing(
319 #ifdef HAVE_PSI_INTERFACE
320 PSI_file_key log_file_key,
321 #endif
322 const char *binlog_name, myf flags) {
323 DBUG_TRACE;
324 std::unique_ptr<Rpl_encryption_header> header;
325 unsigned char magic[BINLOG_MAGIC_SIZE];
326
327 /* Open a simple istream to read the magic from the file */
328 IO_CACHE_istream istream;
329 if (istream.open(key_file_binlog, key_file_binlog_cache, binlog_name,
330 MYF(MY_WME | MY_DONT_CHECK_FILESIZE), rpl_read_size))
331 return nullptr;
332 if (istream.read(magic, BINLOG_MAGIC_SIZE) != BINLOG_MAGIC_SIZE)
333 return nullptr;
334
335 DBUG_ASSERT(Rpl_encryption_header::ENCRYPTION_MAGIC_SIZE ==
336 BINLOG_MAGIC_SIZE);
337 /* Identify the file type by the magic to get the encryption header */
338 if (memcmp(magic, Rpl_encryption_header::ENCRYPTION_MAGIC,
339 BINLOG_MAGIC_SIZE) == 0) {
340 header = Rpl_encryption_header::get_header(&istream);
341 if (header == nullptr) return nullptr;
342 } else if (memcmp(magic, BINLOG_MAGIC, BINLOG_MAGIC_SIZE) != 0) {
343 return nullptr;
344 }
345
346 /* Open the binlog_ofile */
347 std::unique_ptr<Binlog_ofile> ret_ofile(new Binlog_ofile);
348 if (ret_ofile->open(
349 #ifdef HAVE_PSI_INTERFACE
350 log_file_key,
351 #endif
352 binlog_name, flags, true)) {
353 return nullptr;
354 }
355
356 if (header != nullptr) {
357 /* Add the encryption stream on top of IO_CACHE */
358 std::unique_ptr<Binlog_encryption_ostream> encrypted_ostream(
359 new Binlog_encryption_ostream);
360 ret_ofile->m_encrypted_header_size = header->get_header_size();
361 encrypted_ostream->open(std::move(ret_ofile->m_pipeline_head),
362 std::move(header));
363 ret_ofile->m_pipeline_head = std::move(encrypted_ostream);
364 ret_ofile->set_encrypted();
365 }
366 return ret_ofile;
367 }
368
close()369 void close() {
370 m_pipeline_head.reset(nullptr);
371 m_position = 0;
372 m_encrypted_header_size = 0;
373 }
374
375 /**
376 Writes data into storage and maintains binlog position.
377
378 @param[in] buffer the data will be written
379 @param[in] length the length of the data
380
381 @retval false Success
382 @retval true Error
383 */
write(const unsigned char * buffer,my_off_t length)384 bool write(const unsigned char *buffer, my_off_t length) override {
385 DBUG_ASSERT(m_pipeline_head != nullptr);
386
387 if (m_pipeline_head->write(buffer, length)) return true;
388
389 m_position += length;
390 return false;
391 }
392
393 /**
394 Updates some bytes in the binlog file. If is only used for clearing
395 LOG_EVENT_BINLOG_IN_USE_F.
396
397 @param[in] buffer the data will be written
398 @param[in] length the length of the data
399 @param[in] offset the offset of the bytes will be updated
400
401 @retval false Success
402 @retval true Error
403 */
update(const unsigned char * buffer,my_off_t length,my_off_t offset)404 bool update(const unsigned char *buffer, my_off_t length, my_off_t offset) {
405 DBUG_ASSERT(m_pipeline_head != nullptr);
406 return m_pipeline_head->seek(offset) ||
407 m_pipeline_head->write(buffer, length);
408 }
409
410 /**
411 Truncates some data at the end of the binlog file.
412
413 @param[in] offset where the binlog file will be truncated to.
414
415 @retval false Success
416 @retval true Error
417 */
truncate(my_off_t offset)418 bool truncate(my_off_t offset) {
419 DBUG_ASSERT(m_pipeline_head != nullptr);
420
421 if (m_pipeline_head->truncate(offset)) return true;
422 m_position = offset;
423 return false;
424 }
425
flush()426 bool flush() { return m_pipeline_head->flush(); }
sync()427 bool sync() { return m_pipeline_head->sync(); }
flush_and_sync()428 bool flush_and_sync() { return flush() || sync(); }
position()429 my_off_t position() { return m_position; }
is_empty()430 bool is_empty() { return position() == 0; }
is_open()431 bool is_open() { return m_pipeline_head != nullptr; }
432 /**
433 Returns the encrypted header size of the binary log file.
434
435 @retval 0 The file is not encrypted.
436 @retval >0 The encryption header size.
437 */
get_encrypted_header_size()438 int get_encrypted_header_size() { return m_encrypted_header_size; }
439 /**
440 Returns the real file size.
441
442 While position() returns the "file size" from the plain binary log events
443 stream point of view, this function considers the encryption header when it
444 exists.
445
446 @return The real file size considering the encryption header.
447 */
get_real_file_size()448 my_off_t get_real_file_size() { return m_position + m_encrypted_header_size; }
449 /**
450 Get the pipeline head.
451
452 @retval Returns the pipeline head or nullptr.
453 */
get_pipeline_head()454 std::unique_ptr<Truncatable_ostream> get_pipeline_head() {
455 return std::move(m_pipeline_head);
456 }
457 /**
458 Check if the log file is encrypted.
459
460 @retval True if the log file is encrypted.
461 @retval False if the log file is not encrypted.
462 */
is_encrypted()463 bool is_encrypted() { return m_encrypted; }
464 /**
465 Set that the log file is encrypted.
466 */
set_encrypted()467 void set_encrypted() { m_encrypted = true; }
468
469 private:
470 my_off_t m_position = 0;
471 int m_encrypted_header_size = 0;
472 std::unique_ptr<Truncatable_ostream> m_pipeline_head;
473 bool m_encrypted = false;
474 };
475
476 /**
477 Helper class to switch to a new thread and then go back to the previous one,
478 when the object is destroyed using RAII.
479
480 This class is used to temporarily switch to another session (THD
481 structure). It will set up thread specific "globals" correctly
482 so that the POSIX thread looks exactly like the session attached to.
483 However, PSI_thread info is not touched as it is required to show
484 the actual physial view in PFS instrumentation i.e., it should
485 depict as the real thread doing the work instead of thread it switched
486 to.
487
488 On destruction, the original session (which is supplied to the
489 constructor) will be re-attached automatically. For example, with
490 this code, the value of @c current_thd will be the same before and
491 after execution of the code.
492
493 @code
494 {
495 for (int i = 0 ; i < count ; ++i)
496 {
497 // here we are attached to current_thd
498 // [...]
499 Thd_backup_and_restore switch_thd(current_thd, other_thd[i]);
500 // [...]
501 // here we are attached to other_thd[i]
502 // [...]
503 }
504 // here we are attached to current_thd
505 }
506 @endcode
507
508 @warning The class is not designed to be inherited from.
509 */
510
511 class Thd_backup_and_restore {
512 public:
513 /**
514 Try to attach the POSIX thread to a session.
515
516 @param[in] backup_thd The thd to restore to when object is destructed.
517 @param[in] new_thd The thd to attach to.
518 */
519
Thd_backup_and_restore(THD * backup_thd,THD * new_thd)520 Thd_backup_and_restore(THD *backup_thd, THD *new_thd)
521 : m_backup_thd(backup_thd),
522 m_new_thd(new_thd),
523 m_new_thd_old_real_id(new_thd->real_id),
524 m_new_thd_old_thread_stack(new_thd->thread_stack) {
525 DBUG_ASSERT(m_backup_thd != nullptr && m_new_thd != nullptr);
526 // Reset the state of the current thd.
527 m_backup_thd->restore_globals();
528
529 m_new_thd->thread_stack = m_backup_thd->thread_stack;
530 m_new_thd->store_globals();
531 }
532
533 /**
534 Restores to previous thd.
535 */
~Thd_backup_and_restore()536 ~Thd_backup_and_restore() {
537 /*
538 Restore the global variables of the thd we previously attached to,
539 to its original state. In other words, detach the m_new_thd.
540 */
541 m_new_thd->restore_globals();
542 m_new_thd->real_id = m_new_thd_old_real_id;
543 m_new_thd->thread_stack = m_new_thd_old_thread_stack;
544
545 // Reset the global variables to the original state.
546 m_backup_thd->store_globals();
547 }
548
549 private:
550 THD *m_backup_thd;
551 THD *m_new_thd;
552 my_thread_t m_new_thd_old_real_id;
553 const char *m_new_thd_old_thread_stack;
554 };
555
556 /**
557 Caches for non-transactional and transactional data before writing
558 it to the binary log.
559
560 @todo All the access functions for the flags suggest that the
561 encapsuling is not done correctly, so try to move any logic that
562 requires access to the flags into the cache.
563 */
564 class binlog_cache_data {
565 public:
binlog_cache_data(bool trx_cache_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg)566 binlog_cache_data(bool trx_cache_arg, ulong *ptr_binlog_cache_use_arg,
567 ulong *ptr_binlog_cache_disk_use_arg)
568 : m_pending(nullptr),
569 ptr_binlog_cache_use(ptr_binlog_cache_use_arg),
570 ptr_binlog_cache_disk_use(ptr_binlog_cache_disk_use_arg) {
571 flags.transactional = trx_cache_arg;
572 }
573
open(my_off_t cache_size,my_off_t max_cache_size)574 bool open(my_off_t cache_size, my_off_t max_cache_size) {
575 return m_cache.open(cache_size, max_cache_size);
576 }
577
get_cache()578 Binlog_cache_storage *get_cache() { return &m_cache; }
579 int finalize(THD *thd, Log_event *end_event);
580 int finalize(THD *thd, Log_event *end_event, XID_STATE *xs);
581 int flush(THD *thd, my_off_t *bytes, bool *wrote_xid);
582 int write_event(Log_event *event);
get_event_counter()583 size_t get_event_counter() { return event_counter; }
get_compressed_size()584 size_t get_compressed_size() { return m_compressed_size; }
get_decompressed_size()585 size_t get_decompressed_size() { return m_decompressed_size; }
get_compression_type()586 binary_log::transaction::compression::type get_compression_type() {
587 return m_compression_type;
588 }
589
set_compressed_size(size_t s)590 void set_compressed_size(size_t s) { m_compressed_size = s; }
set_decompressed_size(size_t s)591 void set_decompressed_size(size_t s) { m_decompressed_size = s; }
set_compression_type(binary_log::transaction::compression::type t)592 void set_compression_type(binary_log::transaction::compression::type t) {
593 m_compression_type = t;
594 }
595
~binlog_cache_data()596 virtual ~binlog_cache_data() {
597 DBUG_ASSERT(is_binlog_empty());
598 m_cache.close();
599 }
600
is_binlog_empty() const601 bool is_binlog_empty() const {
602 DBUG_PRINT("debug", ("%s_cache - pending: 0x%llx, bytes: %llu",
603 (flags.transactional ? "trx" : "stmt"),
604 (ulonglong)pending(), (ulonglong)m_cache.length()));
605 return pending() == nullptr && m_cache.is_empty();
606 }
607
is_finalized() const608 bool is_finalized() const { return flags.finalized; }
609
pending() const610 Rows_log_event *pending() const { return m_pending; }
611
set_pending(Rows_log_event * const pending)612 void set_pending(Rows_log_event *const pending) { m_pending = pending; }
613
set_incident(void)614 void set_incident(void) { flags.incident = true; }
615
has_incident(void) const616 bool has_incident(void) const { return flags.incident; }
617
has_xid() const618 bool has_xid() const {
619 // There should only be an XID event if we are transactional
620 DBUG_ASSERT((flags.transactional && flags.with_xid) || !flags.with_xid);
621 return flags.with_xid;
622 }
623
is_trx_cache() const624 bool is_trx_cache() const { return flags.transactional; }
625
get_byte_position() const626 my_off_t get_byte_position() const { return m_cache.length(); }
627
cache_state_checkpoint(my_off_t pos_to_checkpoint)628 void cache_state_checkpoint(my_off_t pos_to_checkpoint) {
629 // We only need to store the cache state for pos > 0
630 if (pos_to_checkpoint) {
631 cache_state state;
632 state.with_rbr = flags.with_rbr;
633 state.with_sbr = flags.with_sbr;
634 state.with_start = flags.with_start;
635 state.with_end = flags.with_end;
636 state.with_content = flags.with_content;
637 state.event_counter = event_counter;
638 cache_state_map[pos_to_checkpoint] = state;
639 }
640 }
641
cache_state_rollback(my_off_t pos_to_rollback)642 void cache_state_rollback(my_off_t pos_to_rollback) {
643 if (pos_to_rollback) {
644 std::map<my_off_t, cache_state>::iterator it;
645 it = cache_state_map.find(pos_to_rollback);
646 if (it != cache_state_map.end()) {
647 flags.with_rbr = it->second.with_rbr;
648 flags.with_sbr = it->second.with_sbr;
649 flags.with_start = it->second.with_start;
650 flags.with_end = it->second.with_end;
651 flags.with_content = it->second.with_content;
652 event_counter = it->second.event_counter;
653 } else
654 DBUG_ASSERT(it == cache_state_map.end());
655 }
656 // Rolling back to pos == 0 means cleaning up the cache.
657 else {
658 flags.with_rbr = false;
659 flags.with_sbr = false;
660 flags.with_start = false;
661 flags.with_end = false;
662 flags.with_content = false;
663 event_counter = 0;
664 }
665 }
666
667 /**
668 Reset the cache to unused state when the transaction is finished. It
669 drops all data in the cache and clears the flags of the transaction state.
670 */
reset()671 virtual void reset() {
672 compute_statistics();
673 remove_pending_event();
674
675 if (m_cache.reset()) {
676 LogErr(WARNING_LEVEL, ER_BINLOG_CANT_RESIZE_CACHE);
677 }
678
679 flags.incident = false;
680 flags.with_xid = false;
681 flags.immediate = false;
682 flags.finalized = false;
683 flags.with_sbr = false;
684 flags.with_rbr = false;
685 flags.with_start = false;
686 flags.with_end = false;
687 flags.with_content = false;
688
689 /*
690 The truncate function calls reinit_io_cache that calls my_b_flush_io_cache
691 which may increase disk_writes. This breaks the disk_writes use by the
692 binary log which aims to compute the ratio between in-memory cache usage
693 and disk cache usage. To avoid this undesirable behavior, we reset the
694 variable after truncating the cache.
695 */
696 cache_state_map.clear();
697 event_counter = 0;
698 m_compressed_size = 0;
699 m_decompressed_size = 0;
700 m_compression_type = binary_log::transaction::compression::NONE;
701 DBUG_ASSERT(is_binlog_empty());
702 }
703
704 /**
705 Returns information about the cache content with respect to
706 the binlog_format of the events.
707
708 This will be used to set a flag on GTID_LOG_EVENT stating that the
709 transaction may have SBR statements or not, but the binlog dump
710 will show this flag as "rbr_only" when it is not set. That's why
711 an empty transaction should return true below, or else an empty
712 transaction would be assumed as "rbr_only" even not having RBR
713 events.
714
715 When dumping a binary log content using mysqlbinlog client program,
716 for any transaction assumed as "rbr_only" it will be printed a
717 statement changing the transaction isolation level to READ COMMITTED.
718 It doesn't make sense to have an empty transaction "requiring" this
719 isolation level change.
720
721 @return true The cache have SBR events or is empty.
722 @return false The cache contains a transaction with no SBR events.
723 */
may_have_sbr_stmts()724 bool may_have_sbr_stmts() { return flags.with_sbr || !flags.with_rbr; }
725
726 /**
727 Check if the binlog cache contains an empty transaction, which has
728 two binlog events "BEGIN" and "COMMIT".
729
730 @return true The binlog cache contains an empty transaction.
731 @return false Otherwise.
732 */
has_empty_transaction()733 bool has_empty_transaction() {
734 /*
735 The empty transaction has two events in trx/stmt binlog cache
736 and no changes: one is a transaction start and other is a transaction
737 end (there should be no SBR changing content and no RBR events).
738 */
739 if (flags.with_start && // Has transaction start statement
740 flags.with_end && // Has transaction end statement
741 !flags.with_content) // Has no other content than START/END
742 {
743 DBUG_ASSERT(event_counter == 2); // Two events in the cache only
744 DBUG_ASSERT(!flags.with_sbr); // No statements changing content
745 DBUG_ASSERT(!flags.with_rbr); // No rows changing content
746 DBUG_ASSERT(!flags.immediate); // Not a DDL
747 DBUG_ASSERT(
748 !flags.with_xid); // Not a XID trx and not an atomic DDL Query
749 return true;
750 }
751 return false;
752 }
753
754 /**
755 Check if the binlog cache is empty or contains an empty transaction,
756 which has two binlog events "BEGIN" and "COMMIT".
757
758 @return true The binlog cache is empty or contains an empty transaction.
759 @return false Otherwise.
760 */
is_empty_or_has_empty_transaction()761 bool is_empty_or_has_empty_transaction() {
762 return is_binlog_empty() || has_empty_transaction();
763 }
764
765 protected:
766 /*
767 This structure should have all cache variables/flags that should be restored
768 when a ROLLBACK TO SAVEPOINT statement be executed.
769 */
770 struct cache_state {
771 bool with_sbr;
772 bool with_rbr;
773 bool with_start;
774 bool with_end;
775 bool with_content;
776 size_t event_counter;
777 };
778 /*
779 For every SAVEPOINT used, we will store a cache_state for the current
780 binlog cache position. So, if a ROLLBACK TO SAVEPOINT is used, we can
781 restore the cache_state values after truncating the binlog cache.
782 */
783 std::map<my_off_t, cache_state> cache_state_map;
784 /*
785 In order to compute the transaction size (because of possible extra checksum
786 bytes), we need to keep track of how many events are in the binlog cache.
787 */
788 size_t event_counter = 0;
789
790 size_t m_compressed_size = 0;
791 size_t m_decompressed_size = 0;
792 binary_log::transaction::compression::type m_compression_type =
793 binary_log::transaction::compression::type::NONE;
794 /*
795 It truncates the cache to a certain position. This includes deleting the
796 pending event. It corresponds to rollback statement or rollback to
797 a savepoint. It doesn't change transaction state.
798 */
truncate(my_off_t pos)799 void truncate(my_off_t pos) {
800 DBUG_PRINT("info", ("truncating to position %lu", (ulong)pos));
801 remove_pending_event();
802
803 // TODO: check the return value.
804 (void)m_cache.truncate(pos);
805 }
806
807 /**
808 Flush pending event to the cache buffer.
809 */
flush_pending_event(THD * thd)810 int flush_pending_event(THD *thd) {
811 if (m_pending) {
812 m_pending->set_flags(Rows_log_event::STMT_END_F);
813 if (int error = write_event(m_pending)) return error;
814 thd->clear_binlog_table_maps();
815 }
816 return 0;
817 }
818
819 /**
820 Remove the pending event.
821 */
remove_pending_event()822 int remove_pending_event() {
823 delete m_pending;
824 m_pending = nullptr;
825 return 0;
826 }
827 struct Flags {
828 /*
829 Defines if this is either a trx-cache or stmt-cache, respectively, a
830 transactional or non-transactional cache.
831 */
832 bool transactional : 1;
833
834 /*
835 This indicates that some events did not get into the cache and most likely
836 it is corrupted.
837 */
838 bool incident : 1;
839
840 /*
841 This indicates that the cache should be written without BEGIN/END.
842 */
843 bool immediate : 1;
844
845 /*
846 This flag indicates that the buffer was finalized and has to be
847 flushed to disk.
848 */
849 bool finalized : 1;
850
851 /*
852 This indicates that either the cache contain an XID event, or it's
853 an atomic DDL Query-log-event. In the latter case the flag is set up
854 on the statement level, namely when the Query-log-event is cached
855 at time the DDL transaction is not committing.
856 The flag therefore gets reset when the cache is cleaned due to
857 the statement rollback, e.g in case of a DDL post-caching execution
858 error.
859 Any statement scope flag among other things must consider its
860 reset policy when the statement is rolled back.
861 */
862 bool with_xid : 1;
863
864 /*
865 This indicates that the cache contain statements changing content.
866 */
867 bool with_sbr : 1;
868
869 /*
870 This indicates that the cache contain RBR event changing content.
871 */
872 bool with_rbr : 1;
873
874 /*
875 This indicates that the cache contain s transaction start statement.
876 */
877 bool with_start : 1;
878
879 /*
880 This indicates that the cache contain a transaction end event.
881 */
882 bool with_end : 1;
883
884 /*
885 This indicates that the cache contain content other than START/END.
886 */
887 bool with_content : 1;
888 } flags;
889
890 virtual bool compress(THD *);
891
892 private:
893 /*
894 Storage for byte data. This binlog_cache_data will serialize
895 events into bytes and put them into m_cache.
896 */
897 Binlog_cache_storage m_cache;
898
899 /*
900 Pending binrows event. This event is the event where the rows are currently
901 written.
902 */
903 Rows_log_event *m_pending;
904
905 /**
906 This function computes binlog cache and disk usage.
907 */
compute_statistics()908 void compute_statistics() {
909 if (!is_binlog_empty()) {
910 (*ptr_binlog_cache_use)++;
911 if (m_cache.disk_writes() != 0) (*ptr_binlog_cache_disk_use)++;
912 }
913 }
914
915 /*
916 Stores a pointer to the status variable that keeps track of the in-memory
917 cache usage. This corresponds to either
918 . binlog_cache_use or binlog_stmt_cache_use.
919 */
920 ulong *ptr_binlog_cache_use;
921
922 /*
923 Stores a pointer to the status variable that keeps track of the disk
924 cache usage. This corresponds to either
925 . binlog_cache_disk_use or binlog_stmt_cache_disk_use.
926 */
927 ulong *ptr_binlog_cache_disk_use;
928
929 binlog_cache_data &operator=(const binlog_cache_data &info);
930 binlog_cache_data(const binlog_cache_data &info);
931 };
932
933 class binlog_stmt_cache_data : public binlog_cache_data {
934 public:
binlog_stmt_cache_data(bool trx_cache_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg)935 binlog_stmt_cache_data(bool trx_cache_arg, ulong *ptr_binlog_cache_use_arg,
936 ulong *ptr_binlog_cache_disk_use_arg)
937 : binlog_cache_data(trx_cache_arg, ptr_binlog_cache_use_arg,
938 ptr_binlog_cache_disk_use_arg) {}
939
940 using binlog_cache_data::finalize;
941
942 int finalize(THD *thd);
943 };
944
finalize(THD * thd)945 int binlog_stmt_cache_data::finalize(THD *thd) {
946 if (flags.immediate) {
947 if (int error = finalize(thd, nullptr)) return error;
948 } else {
949 Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"), false, false, true,
950 0, true);
951 if (int error = finalize(thd, &end_evt)) return error;
952 }
953 return 0;
954 }
955
956 class binlog_trx_cache_data : public binlog_cache_data {
957 public:
binlog_trx_cache_data(bool trx_cache_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg)958 binlog_trx_cache_data(bool trx_cache_arg, ulong *ptr_binlog_cache_use_arg,
959 ulong *ptr_binlog_cache_disk_use_arg)
960 : binlog_cache_data(trx_cache_arg, ptr_binlog_cache_use_arg,
961 ptr_binlog_cache_disk_use_arg),
962 m_cannot_rollback(false),
963 before_stmt_pos(MY_OFF_T_UNDEF) {}
964
reset()965 void reset() {
966 DBUG_TRACE;
967 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong)before_stmt_pos));
968 m_cannot_rollback = false;
969 before_stmt_pos = MY_OFF_T_UNDEF;
970 binlog_cache_data::reset();
971 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong)before_stmt_pos));
972 return;
973 }
974
cannot_rollback() const975 bool cannot_rollback() const { return m_cannot_rollback; }
976
set_cannot_rollback()977 void set_cannot_rollback() { m_cannot_rollback = true; }
978
get_prev_position() const979 my_off_t get_prev_position() const { return before_stmt_pos; }
980
set_prev_position(my_off_t pos)981 void set_prev_position(my_off_t pos) {
982 DBUG_TRACE;
983 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong)before_stmt_pos));
984 before_stmt_pos = pos;
985 cache_state_checkpoint(before_stmt_pos);
986 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong)before_stmt_pos));
987 return;
988 }
989
restore_prev_position()990 void restore_prev_position() {
991 DBUG_TRACE;
992 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong)before_stmt_pos));
993 binlog_cache_data::truncate(before_stmt_pos);
994 cache_state_rollback(before_stmt_pos);
995 before_stmt_pos = MY_OFF_T_UNDEF;
996 /*
997 Binlog statement rollback clears with_xid now as the atomic DDL statement
998 marker which can be set as early as at event creation and caching.
999 */
1000 flags.with_xid = false;
1001 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong)before_stmt_pos));
1002 return;
1003 }
1004
restore_savepoint(my_off_t pos)1005 void restore_savepoint(my_off_t pos) {
1006 DBUG_TRACE;
1007 DBUG_PRINT("enter", ("before_stmt_pos: %llu", (ulonglong)before_stmt_pos));
1008 binlog_cache_data::truncate(pos);
1009 if (pos <= before_stmt_pos) before_stmt_pos = MY_OFF_T_UNDEF;
1010 cache_state_rollback(pos);
1011 DBUG_PRINT("return", ("before_stmt_pos: %llu", (ulonglong)before_stmt_pos));
1012 return;
1013 }
1014
1015 using binlog_cache_data::truncate;
1016
1017 int truncate(THD *thd, bool all);
1018
1019 private:
1020 /*
1021 It will be set true if any statement which cannot be rolled back safely
1022 is put in trx_cache.
1023 */
1024 bool m_cannot_rollback;
1025
1026 /*
1027 Binlog position before the start of the current statement.
1028 */
1029 my_off_t before_stmt_pos;
1030
1031 binlog_trx_cache_data &operator=(const binlog_trx_cache_data &info);
1032 binlog_trx_cache_data(const binlog_trx_cache_data &info);
1033 };
1034
1035 class binlog_cache_mngr {
1036 public:
binlog_cache_mngr(ulong * ptr_binlog_stmt_cache_use_arg,ulong * ptr_binlog_stmt_cache_disk_use_arg,ulong * ptr_binlog_cache_use_arg,ulong * ptr_binlog_cache_disk_use_arg)1037 binlog_cache_mngr(ulong *ptr_binlog_stmt_cache_use_arg,
1038 ulong *ptr_binlog_stmt_cache_disk_use_arg,
1039 ulong *ptr_binlog_cache_use_arg,
1040 ulong *ptr_binlog_cache_disk_use_arg)
1041 : stmt_cache(false, ptr_binlog_stmt_cache_use_arg,
1042 ptr_binlog_stmt_cache_disk_use_arg),
1043 trx_cache(true, ptr_binlog_cache_use_arg,
1044 ptr_binlog_cache_disk_use_arg),
1045 has_logged_xid(false) {}
1046
init()1047 bool init() {
1048 return stmt_cache.open(binlog_stmt_cache_size,
1049 max_binlog_stmt_cache_size) ||
1050 trx_cache.open(binlog_cache_size, max_binlog_cache_size);
1051 }
1052
get_binlog_cache_data(bool is_transactional)1053 binlog_cache_data *get_binlog_cache_data(bool is_transactional) {
1054 if (is_transactional)
1055 return &trx_cache;
1056 else
1057 return &stmt_cache;
1058 }
1059
get_stmt_cache()1060 Binlog_cache_storage *get_stmt_cache() { return stmt_cache.get_cache(); }
get_trx_cache()1061 Binlog_cache_storage *get_trx_cache() { return trx_cache.get_cache(); }
1062 /**
1063 Convenience method to check if both caches are empty.
1064 */
is_binlog_empty() const1065 bool is_binlog_empty() const {
1066 return stmt_cache.is_binlog_empty() && trx_cache.is_binlog_empty();
1067 }
1068
1069 /*
1070 clear stmt_cache and trx_cache if they are not empty
1071 */
reset()1072 void reset() {
1073 if (!stmt_cache.is_binlog_empty()) stmt_cache.reset();
1074 if (!trx_cache.is_binlog_empty()) trx_cache.reset();
1075 }
1076
1077 #ifndef DBUG_OFF
dbug_any_finalized() const1078 bool dbug_any_finalized() const {
1079 return stmt_cache.is_finalized() || trx_cache.is_finalized();
1080 }
1081 #endif
1082
1083 /*
1084 Convenience method to flush both caches to the binary log.
1085
1086 @param bytes_written Pointer to variable that will be set to the
1087 number of bytes written for the flush.
1088 @param wrote_xid Pointer to variable that will be set to @c
1089 true if any XID event was written to the
1090 binary log. Otherwise, the variable will not
1091 be touched.
1092 @return Error code on error, zero if no error.
1093 */
flush(THD * thd,my_off_t * bytes_written,bool * wrote_xid)1094 int flush(THD *thd, my_off_t *bytes_written, bool *wrote_xid) {
1095 my_off_t stmt_bytes = 0;
1096 my_off_t trx_bytes = 0;
1097 DBUG_ASSERT(stmt_cache.has_xid() == 0);
1098 int error = stmt_cache.flush(thd, &stmt_bytes, wrote_xid);
1099 if (error) return error;
1100 DEBUG_SYNC(thd, "after_flush_stm_cache_before_flush_trx_cache");
1101 error = trx_cache.flush(thd, &trx_bytes, wrote_xid);
1102 if (error) return error;
1103 *bytes_written = stmt_bytes + trx_bytes;
1104 return 0;
1105 }
1106
1107 /**
1108 Check if at least one of transacaction and statement binlog caches
1109 contains an empty transaction, other one is empty or contains an
1110 empty transaction.
1111
1112 @return true At least one of transacaction and statement binlog
1113 caches an empty transaction, other one is emptry
1114 or contains an empty transaction.
1115 @return false Otherwise.
1116 */
has_empty_transaction()1117 bool has_empty_transaction() {
1118 return (trx_cache.is_empty_or_has_empty_transaction() &&
1119 stmt_cache.is_empty_or_has_empty_transaction() &&
1120 !is_binlog_empty());
1121 }
1122
1123 binlog_stmt_cache_data stmt_cache;
1124 binlog_trx_cache_data trx_cache;
1125 /*
1126 The bool flag is for preventing do_binlog_xa_commit_rollback()
1127 execution twice which can happen for "external" xa commit/rollback.
1128 */
1129 bool has_logged_xid;
1130
1131 private:
1132 binlog_cache_mngr &operator=(const binlog_cache_mngr &info);
1133 binlog_cache_mngr(const binlog_cache_mngr &info);
1134 };
1135
thd_get_cache_mngr(const THD * thd)1136 static binlog_cache_mngr *thd_get_cache_mngr(const THD *thd) {
1137 /*
1138 If opt_bin_log is not set, binlog_hton->slot == -1 and hence
1139 thd_get_ha_data(thd, hton) segfaults.
1140 */
1141 DBUG_ASSERT(opt_bin_log);
1142 return (binlog_cache_mngr *)thd_get_ha_data(thd, binlog_hton);
1143 }
1144
1145 /**
1146 Checks if the BINLOG_CACHE_SIZE's value is greater than MAX_BINLOG_CACHE_SIZE.
1147 If this happens, the BINLOG_CACHE_SIZE is set to MAX_BINLOG_CACHE_SIZE.
1148 */
check_binlog_cache_size(THD * thd)1149 void check_binlog_cache_size(THD *thd) {
1150 if (binlog_cache_size > max_binlog_cache_size) {
1151 if (thd) {
1152 push_warning_printf(
1153 thd, Sql_condition::SL_WARNING, ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX,
1154 ER_THD(thd, ER_BINLOG_CACHE_SIZE_GREATER_THAN_MAX),
1155 (ulong)binlog_cache_size, (ulong)max_binlog_cache_size);
1156 } else {
1157 LogErr(WARNING_LEVEL, ER_BINLOG_CACHE_SIZE_TOO_LARGE, binlog_cache_size,
1158 (ulong)max_binlog_cache_size);
1159 }
1160 binlog_cache_size = static_cast<ulong>(max_binlog_cache_size);
1161 }
1162 }
1163
1164 /**
1165 Checks if the BINLOG_STMT_CACHE_SIZE's value is greater than
1166 MAX_BINLOG_STMT_CACHE_SIZE. If this happens, the BINLOG_STMT_CACHE_SIZE is set
1167 to MAX_BINLOG_STMT_CACHE_SIZE.
1168 */
check_binlog_stmt_cache_size(THD * thd)1169 void check_binlog_stmt_cache_size(THD *thd) {
1170 if (binlog_stmt_cache_size > max_binlog_stmt_cache_size) {
1171 if (thd) {
1172 push_warning_printf(
1173 thd, Sql_condition::SL_WARNING,
1174 ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX,
1175 ER_THD(thd, ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX),
1176 (ulong)binlog_stmt_cache_size, (ulong)max_binlog_stmt_cache_size);
1177 } else {
1178 LogErr(WARNING_LEVEL, ER_BINLOG_STMT_CACHE_SIZE_TOO_LARGE,
1179 binlog_stmt_cache_size, (ulong)max_binlog_stmt_cache_size);
1180 }
1181 binlog_stmt_cache_size = static_cast<ulong>(max_binlog_stmt_cache_size);
1182 }
1183 }
1184
1185 /**
1186 Check whether binlog_hton has valid slot and enabled
1187 */
binlog_enabled()1188 bool binlog_enabled() {
1189 return (binlog_hton && binlog_hton->slot != HA_SLOT_UNDEF);
1190 }
1191
1192 /*
1193 Save position of binary log transaction cache.
1194
1195 SYNPOSIS
1196 binlog_trans_log_savepos()
1197
1198 thd The thread to take the binlog data from
1199 pos Pointer to variable where the position will be stored
1200
1201 DESCRIPTION
1202
1203 Save the current position in the binary log transaction cache into
1204 the variable pointed to by 'pos'
1205 */
1206
binlog_trans_log_savepos(THD * thd,my_off_t * pos)1207 static void binlog_trans_log_savepos(THD *thd, my_off_t *pos) {
1208 DBUG_TRACE;
1209 DBUG_ASSERT(pos != nullptr);
1210 binlog_cache_mngr *const cache_mngr = thd_get_cache_mngr(thd);
1211 DBUG_ASSERT(mysql_bin_log.is_open());
1212 *pos = cache_mngr->trx_cache.get_byte_position();
1213 DBUG_PRINT("return", ("position: %lu", (ulong)*pos));
1214 cache_mngr->trx_cache.cache_state_checkpoint(*pos);
1215 }
1216
binlog_dummy_recover(handlerton *,XA_recover_txn *,uint,MEM_ROOT *)1217 static int binlog_dummy_recover(handlerton *, XA_recover_txn *, uint,
1218 MEM_ROOT *) {
1219 return 0;
1220 }
1221
1222 /**
1223 Auxiliary class to copy serialized events to the binary log and
1224 correct some of the fields that are not known until just before
1225 writing the event.
1226
1227 This class allows feeding events in parts, so it is practical to use
1228 in do_write_cache() which reads events from an IO_CACHE where events
1229 may span mutiple cache pages.
1230
1231 The following fields are fixed before writing the event:
1232 - end_log_pos is set
1233 - the checksum is computed if checksums are enabled
1234 - the length is incremented by the checksum size if checksums are enabled
1235 */
1236 class Binlog_event_writer : public Basic_ostream {
1237 MYSQL_BIN_LOG::Binlog_ofile *m_binlog_file;
1238 bool have_checksum;
1239 ha_checksum initial_checksum;
1240 ha_checksum checksum;
1241 uint32 end_log_pos;
1242 uchar header[LOG_EVENT_HEADER_LEN];
1243 my_off_t header_len = 0;
1244 uint32 event_len = 0;
1245
1246 public:
1247 /**
1248 Constructs a new Binlog_event_writer. Should be called once before
1249 starting to flush the transaction or statement cache to the
1250 binlog.
1251
1252 @param binlog_file to write to.
1253 */
Binlog_event_writer(MYSQL_BIN_LOG::Binlog_ofile * binlog_file)1254 Binlog_event_writer(MYSQL_BIN_LOG::Binlog_ofile *binlog_file)
1255 : m_binlog_file(binlog_file),
1256 have_checksum(binlog_checksum_options !=
1257 binary_log::BINLOG_CHECKSUM_ALG_OFF),
1258 initial_checksum(my_checksum(0L, nullptr, 0)),
1259 checksum(initial_checksum),
1260 end_log_pos(binlog_file->position()) {
1261 // Simulate checksum error
1262 if (DBUG_EVALUATE_IF("fault_injection_crc_value", 1, 0)) checksum--;
1263 }
1264
update_header()1265 void update_header() {
1266 event_len = uint4korr(header + EVENT_LEN_OFFSET);
1267
1268 // Increase end_log_pos
1269 end_log_pos += event_len;
1270
1271 // Update event length if it has checksum
1272 if (have_checksum) {
1273 int4store(header + EVENT_LEN_OFFSET, event_len + BINLOG_CHECKSUM_LEN);
1274 end_log_pos += BINLOG_CHECKSUM_LEN;
1275 }
1276
1277 // Store end_log_pos
1278 int4store(header + LOG_POS_OFFSET, end_log_pos);
1279 // update the checksum
1280 if (have_checksum) checksum = my_checksum(checksum, header, header_len);
1281 }
1282
write(const unsigned char * buffer,my_off_t length)1283 bool write(const unsigned char *buffer, my_off_t length) {
1284 DBUG_TRACE;
1285
1286 while (length > 0) {
1287 /* Write event header into binlog */
1288 if (event_len == 0) {
1289 /* data in the buf may be smaller than header size.*/
1290 uint32 header_incr =
1291 std::min<uint32>(LOG_EVENT_HEADER_LEN - header_len, length);
1292
1293 memcpy(header + header_len, buffer, header_incr);
1294 header_len += header_incr;
1295 buffer += header_incr;
1296 length -= header_incr;
1297
1298 if (header_len == LOG_EVENT_HEADER_LEN) {
1299 update_header();
1300 if (m_binlog_file->write(header, header_len)) return true;
1301
1302 event_len -= header_len;
1303 header_len = 0;
1304 }
1305 } else {
1306 my_off_t write_bytes = std::min<uint64>(length, event_len);
1307
1308 if (m_binlog_file->write(buffer, write_bytes)) return true;
1309
1310 // update the checksum
1311 if (have_checksum)
1312 checksum = my_checksum(checksum, buffer, write_bytes);
1313
1314 event_len -= write_bytes;
1315 length -= write_bytes;
1316 buffer += write_bytes;
1317
1318 // The whole event is copied, now add the checksum
1319 if (have_checksum && event_len == 0) {
1320 uchar checksum_buf[BINLOG_CHECKSUM_LEN];
1321
1322 int4store(checksum_buf, checksum);
1323 if (m_binlog_file->write(checksum_buf, BINLOG_CHECKSUM_LEN))
1324 return true;
1325 checksum = initial_checksum;
1326 }
1327 }
1328 }
1329 return false;
1330 }
1331 /**
1332 Returns true if per event checksum is enabled.
1333 */
is_checksum_enabled()1334 bool is_checksum_enabled() { return have_checksum; }
1335 };
1336
1337 /*
1338 this function is mostly a placeholder.
1339 conceptually, binlog initialization (now mostly done in MYSQL_BIN_LOG::open)
1340 should be moved here.
1341 */
1342
binlog_init(void * p)1343 static int binlog_init(void *p) {
1344 binlog_hton = (handlerton *)p;
1345 binlog_hton->state = opt_bin_log ? SHOW_OPTION_YES : SHOW_OPTION_NO;
1346 binlog_hton->db_type = DB_TYPE_BINLOG;
1347 binlog_hton->savepoint_offset = sizeof(my_off_t);
1348 binlog_hton->close_connection = binlog_close_connection;
1349 binlog_hton->savepoint_set = binlog_savepoint_set;
1350 binlog_hton->savepoint_rollback = binlog_savepoint_rollback;
1351 binlog_hton->savepoint_rollback_can_release_mdl =
1352 binlog_savepoint_rollback_can_release_mdl;
1353 binlog_hton->commit = binlog_commit;
1354 binlog_hton->commit_by_xid = binlog_xa_commit;
1355 binlog_hton->rollback = binlog_rollback;
1356 binlog_hton->rollback_by_xid = binlog_xa_rollback;
1357 binlog_hton->prepare = binlog_prepare;
1358 binlog_hton->recover = binlog_dummy_recover;
1359 binlog_hton->flags = HTON_NOT_USER_SELECTABLE | HTON_HIDDEN;
1360 return 0;
1361 }
1362
binlog_deinit(void *)1363 static int binlog_deinit(void *) {
1364 /* Using binlog as TC after the binlog has been unloaded, won't work */
1365 if (tc_log == &mysql_bin_log) tc_log = nullptr;
1366 binlog_hton = nullptr;
1367 return 0;
1368 }
1369
binlog_close_connection(handlerton *,THD * thd)1370 static int binlog_close_connection(handlerton *, THD *thd) {
1371 DBUG_TRACE;
1372 binlog_cache_mngr *const cache_mngr = thd_get_cache_mngr(thd);
1373 DBUG_ASSERT(cache_mngr->is_binlog_empty());
1374 DBUG_PRINT("debug", ("Set ha_data slot %d to 0x%llx", binlog_hton->slot,
1375 (ulonglong) nullptr));
1376 thd_set_ha_data(thd, binlog_hton, nullptr);
1377 cache_mngr->~binlog_cache_mngr();
1378 my_free(cache_mngr);
1379 return 0;
1380 }
1381
write_event(Log_event * ev)1382 int binlog_cache_data::write_event(Log_event *ev) {
1383 DBUG_TRACE;
1384
1385 if (ev != nullptr) {
1386 DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending",
1387 { DBUG_SET("+d,simulate_file_write_error"); });
1388
1389 if (binary_event_serialize(ev, &m_cache)) {
1390 DBUG_EXECUTE_IF("simulate_disk_full_at_flush_pending", {
1391 DBUG_SET("-d,simulate_file_write_error");
1392 DBUG_SET("-d,simulate_disk_full_at_flush_pending");
1393 /*
1394 after +d,simulate_file_write_error the local cache
1395 is in unsane state. Since -d,simulate_file_write_error
1396 revokes the first simulation do_write_cache()
1397 can't be run without facing an assert.
1398 So it's blocked with the following 2nd simulation:
1399 */
1400 DBUG_SET("+d,simulate_do_write_cache_failure");
1401 });
1402 return 1;
1403 }
1404 if (ev->get_type_code() == binary_log::XID_EVENT) flags.with_xid = true;
1405 if (ev->is_using_immediate_logging()) flags.immediate = true;
1406 /* DDL gets marked as xid-requiring at its caching. */
1407 if (is_atomic_ddl_event(ev)) flags.with_xid = true;
1408 /* With respect to the event type being written */
1409 if (ev->is_sbr_logging_format()) flags.with_sbr = true;
1410 if (ev->is_rbr_logging_format()) flags.with_rbr = true;
1411 /* With respect to empty transactions */
1412 if (ev->starts_group()) flags.with_start = true;
1413 if (ev->ends_group()) flags.with_end = true;
1414 if (!ev->starts_group() && !ev->ends_group()) flags.with_content = true;
1415 event_counter++;
1416 DBUG_PRINT("debug",
1417 ("event_counter= %lu", static_cast<ulong>(event_counter)));
1418 }
1419 return 0;
1420 }
1421
assign_automatic_gtids_to_flush_group(THD * first_seen)1422 bool MYSQL_BIN_LOG::assign_automatic_gtids_to_flush_group(THD *first_seen) {
1423 DBUG_TRACE;
1424 bool error = false;
1425 bool is_global_sid_locked = false;
1426 rpl_sidno locked_sidno = 0;
1427
1428 for (THD *head = first_seen; head; head = head->next_to_commit) {
1429 DBUG_ASSERT(head->variables.gtid_next.type != UNDEFINED_GTID);
1430
1431 /* Generate GTID */
1432 if (head->variables.gtid_next.type == AUTOMATIC_GTID) {
1433 if (!is_global_sid_locked) {
1434 global_sid_lock->rdlock();
1435 is_global_sid_locked = true;
1436 }
1437 if (gtid_state->generate_automatic_gtid(
1438 head,
1439 head->get_transaction()->get_rpl_transaction_ctx()->get_sidno(),
1440 head->get_transaction()->get_rpl_transaction_ctx()->get_gno(),
1441 &locked_sidno) != RETURN_STATUS_OK) {
1442 head->commit_error = THD::CE_FLUSH_ERROR;
1443 error = true;
1444 }
1445 } else {
1446 DBUG_PRINT("info",
1447 ("thd->variables.gtid_next.type=%d "
1448 "thd->owned_gtid.sidno=%d",
1449 head->variables.gtid_next.type, head->owned_gtid.sidno));
1450 if (head->variables.gtid_next.type == ASSIGNED_GTID)
1451 DBUG_ASSERT(head->owned_gtid.sidno > 0);
1452 else {
1453 DBUG_ASSERT(head->variables.gtid_next.type == ANONYMOUS_GTID);
1454 DBUG_ASSERT(head->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS);
1455 }
1456 }
1457 }
1458
1459 if (locked_sidno > 0) gtid_state->unlock_sidno(locked_sidno);
1460
1461 if (is_global_sid_locked) global_sid_lock->unlock();
1462
1463 return error;
1464 }
1465
1466 /**
1467 Write the Gtid_log_event to the binary log (prior to writing the
1468 statement or transaction cache).
1469
1470 @param thd Thread that is committing.
1471 @param cache_data The cache that is flushing.
1472 @param writer The event will be written to this Binlog_event_writer object.
1473
1474 @retval false Success.
1475 @retval true Error.
1476 */
write_transaction(THD * thd,binlog_cache_data * cache_data,Binlog_event_writer * writer)1477 bool MYSQL_BIN_LOG::write_transaction(THD *thd, binlog_cache_data *cache_data,
1478 Binlog_event_writer *writer) {
1479 DBUG_TRACE;
1480
1481 /*
1482 The GTID for the THD was assigned at
1483 assign_automatic_gtids_to_flush_group()
1484 */
1485 DBUG_ASSERT(thd->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS ||
1486 thd->owned_gtid.sidno > 0);
1487
1488 int64 sequence_number, last_committed;
1489 /* Generate logical timestamps for MTS */
1490 m_dependency_tracker.get_dependency(thd, sequence_number, last_committed);
1491
1492 /*
1493 In case both the transaction cache and the statement cache are
1494 non-empty, both will be flushed in sequence and logged as
1495 different transactions. Then the second transaction must only
1496 be executed after the first one has committed. Therefore, we
1497 need to set last_committed for the second transaction equal to
1498 last_committed for the first transaction. This is done in
1499 binlog_cache_data::flush. binlog_cache_data::flush uses the
1500 condition trn_ctx->last_committed==SEQ_UNINIT to detect this
1501 situation, hence the need to set it here.
1502 */
1503 thd->get_transaction()->last_committed = SEQ_UNINIT;
1504
1505 /*
1506 For delayed replication and also for the purpose of lag monitoring,
1507 we assume that the commit timestamp of the transaction is the time of
1508 executing this code (the time of writing the Gtid_log_event to the binary
1509 log).
1510 */
1511 ulonglong immediate_commit_timestamp = my_micro_time();
1512
1513 /*
1514 When the original_commit_timestamp session variable is set to a value
1515 other than UNDEFINED_COMMIT_TIMESTAMP, it means that either the timestamp
1516 is known ( > 0 ) or the timestamp is not known ( == 0 ).
1517 */
1518 ulonglong original_commit_timestamp =
1519 thd->variables.original_commit_timestamp;
1520 /*
1521 When original_commit_timestamp == UNDEFINED_COMMIT_TIMESTAMP, we assume
1522 that:
1523 a) it is not known if this thread is a slave applier ( = 0 );
1524 b) this is a new transaction ( = immediate_commit_timestamp);
1525 */
1526 if (original_commit_timestamp == UNDEFINED_COMMIT_TIMESTAMP) {
1527 /*
1528 When applying a transaction using replication, assume that the
1529 original commit timestamp is not known (the transaction wasn't
1530 originated on the current server).
1531 */
1532 if (thd->slave_thread || thd->is_binlog_applier()) {
1533 original_commit_timestamp = 0;
1534 } else
1535 /* Assume that this transaction is original from this server */
1536 {
1537 DBUG_EXECUTE_IF("rpl_invalid_gtid_timestamp",
1538 // add one our to the commit timestamps
1539 immediate_commit_timestamp += 3600000000;);
1540 original_commit_timestamp = immediate_commit_timestamp;
1541 }
1542 } else {
1543 // Clear the session variable to have cleared states for next transaction.
1544 thd->variables.original_commit_timestamp = UNDEFINED_COMMIT_TIMESTAMP;
1545 }
1546
1547 if (thd->slave_thread) {
1548 // log warning if the replication timestamps are invalid
1549 if (original_commit_timestamp > immediate_commit_timestamp &&
1550 !thd->rli_slave->get_c_rli()->gtid_timestamps_warning_logged) {
1551 LogErr(WARNING_LEVEL, ER_INVALID_REPLICATION_TIMESTAMPS);
1552 thd->rli_slave->get_c_rli()->gtid_timestamps_warning_logged = true;
1553 } else {
1554 if (thd->rli_slave->get_c_rli()->gtid_timestamps_warning_logged &&
1555 original_commit_timestamp <= immediate_commit_timestamp) {
1556 LogErr(WARNING_LEVEL, ER_RPL_TIMESTAMPS_RETURNED_TO_NORMAL);
1557 thd->rli_slave->get_c_rli()->gtid_timestamps_warning_logged = false;
1558 }
1559 }
1560 }
1561
1562 uint32_t trx_immediate_server_version =
1563 do_server_version_int(::server_version);
1564 // Clear the session variable to have cleared states for next transaction.
1565 thd->variables.immediate_server_version = UNDEFINED_SERVER_VERSION;
1566 DBUG_EXECUTE_IF("fixed_server_version",
1567 trx_immediate_server_version = 888888;);
1568 DBUG_EXECUTE_IF("gr_fixed_server_version",
1569 trx_immediate_server_version = 777777;);
1570
1571 /*
1572 When the original_server_version session variable is set to a value
1573 other than UNDEFINED_SERVER_VERSION, it means that either the
1574 server version is known or the server_version is not known
1575 (UNKNOWN_SERVER_VERSION).
1576 */
1577 uint32_t trx_original_server_version = thd->variables.original_server_version;
1578
1579 /*
1580 When original_server_version == UNDEFINED_SERVER_VERSION, we assume
1581 that:
1582 a) it is not known if this thread is a slave applier ( = 0 );
1583 b) this is a new transaction ( = ::server_version);
1584 */
1585 if (trx_original_server_version == UNDEFINED_SERVER_VERSION) {
1586 /*
1587 When applying a transaction using replication, assume that the
1588 original server version is not known (the transaction wasn't
1589 originated on the current server).
1590 */
1591 if (thd->slave_thread || thd->is_binlog_applier()) {
1592 trx_original_server_version = UNKNOWN_SERVER_VERSION;
1593 } else
1594 /* Assume that this transaction is original from this server */
1595 {
1596 trx_original_server_version = trx_immediate_server_version;
1597 }
1598 } else {
1599 // Clear the session variable to have cleared states for next transaction.
1600 thd->variables.original_server_version = UNDEFINED_SERVER_VERSION;
1601 }
1602 Gtid_log_event gtid_event(
1603 thd, cache_data->is_trx_cache(), last_committed, sequence_number,
1604 cache_data->may_have_sbr_stmts(), original_commit_timestamp,
1605 immediate_commit_timestamp, trx_original_server_version,
1606 trx_immediate_server_version);
1607
1608 // Set the transaction length, based on cache info
1609 gtid_event.set_trx_length_by_cache_size(cache_data->get_byte_position(),
1610 writer->is_checksum_enabled(),
1611 cache_data->get_event_counter());
1612
1613 DBUG_PRINT("debug", ("cache_data->get_byte_position()= %llu",
1614 cache_data->get_byte_position()));
1615 DBUG_PRINT("debug", ("cache_data->get_event_counter()= %lu",
1616 static_cast<ulong>(cache_data->get_event_counter())));
1617 DBUG_PRINT("debug", ("writer->is_checksum_enabled()= %s",
1618 YESNO(writer->is_checksum_enabled())));
1619 DBUG_PRINT("debug", ("gtid_event.get_event_length()= %lu",
1620 static_cast<ulong>(gtid_event.get_event_length())));
1621 DBUG_PRINT("info",
1622 ("transaction_length= %llu", gtid_event.transaction_length));
1623
1624 bool ret = gtid_event.write(writer);
1625 if (ret) goto end;
1626
1627 /*
1628 finally write the transaction data, if it was not compressed
1629 and written as part of the gtid event already
1630 */
1631 ret = mysql_bin_log.write_cache(thd, cache_data, writer);
1632
1633 if (!ret) {
1634 // update stats if monitoring is active
1635 binlog::global_context.monitoring_context()
1636 .transaction_compression()
1637 .update(binlog::monitoring::log_type::BINARY,
1638 cache_data->get_compression_type(), thd->owned_gtid,
1639 gtid_event.immediate_commit_timestamp,
1640 cache_data->get_compressed_size(),
1641 cache_data->get_decompressed_size());
1642 }
1643
1644 end:
1645 return ret;
1646 }
1647
gtid_end_transaction(THD * thd)1648 int MYSQL_BIN_LOG::gtid_end_transaction(THD *thd) {
1649 DBUG_TRACE;
1650
1651 DBUG_PRINT("info", ("query=%s", thd->query().str));
1652
1653 if (thd->owned_gtid.sidno > 0) {
1654 DBUG_ASSERT(thd->variables.gtid_next.type == ASSIGNED_GTID);
1655
1656 if (!opt_bin_log || (thd->slave_thread && !opt_log_slave_updates)) {
1657 /*
1658 If the binary log is disabled for this thread (either by
1659 log_bin=0 or sql_log_bin=0 or by log_slave_updates=0 for a
1660 slave thread), then the statement must not be written to the
1661 binary log. In this case, we just save the GTID into the
1662 table directly.
1663
1664 (This only happens for DDL, since DML will save the GTID into
1665 table and release ownership inside ha_commit_trans.)
1666 */
1667 if (gtid_state->save(thd) != 0) {
1668 gtid_state->update_on_rollback(thd);
1669 return 1;
1670 } else if (!has_commit_order_manager(thd)) {
1671 /*
1672 The gtid_state->save implicitly performs the commit, in the following
1673 stack:
1674 Gtid_state::save ->
1675 Gtid_table_persistor::save ->
1676 Gtid_table_access_context::deinit ->
1677 System_table_access::close_table ->
1678 ha_commit_trans ->
1679 Relay_log_info::pre_commit ->
1680 Slave_worker::commit_positions(THD*) ->
1681 Slave_worker::commit_positions(THD*,Log_event*,...) ->
1682 Slave_worker::flush_info ->
1683 Rpl_info_handler::flush_info ->
1684 Rpl_info_table::do_flush_info ->
1685 Rpl_info_table_access::close_table ->
1686 System_table_access::close_table ->
1687 ha_commit_trans ->
1688 MYSQL_BIN_LOG::commit ->
1689 ha_commit_low
1690
1691 If slave-preserve-commit-order is disabled, it does not call
1692 update_on_commit from this stack. The reason is as follows:
1693
1694 In the normal case of MYSQL_BIN_LOG::commit, where the transaction is
1695 going to be written to the binary log, it invokes
1696 MYSQL_BIN_LOG::ordered_commit, which updates the GTID state (the call
1697 gtid_state->update_commit_group(first) in process_commit_stage_queue).
1698 However, when MYSQL_BIN_LOG::commit is invoked from this stack, it is
1699 because the transaction is not going to be written to the binary log,
1700 and then MYSQL_BIN_LOG::commit has a special case that calls
1701 ha_commit_low directly, skipping ordered_commit. Therefore, the GTID
1702 state is not updated in this stack.
1703
1704 On the other hand, if slave-preserve-commit-order is enabled, the
1705 logic that orders commit carries out a subset of the binlog group
1706 commit from within ha_commit_low, and this includes updating the GTID
1707 state. In particular, there is the following call stack under
1708 ha_commit_low:
1709
1710 ha_commit_low ->
1711 Commit_order_manager::wait_and_finish ->
1712 Commit_order_manager::finish ->
1713 Commit_order_manager::flush_engine_and_signal_threads ->
1714 Gtid_state::update_commit_group
1715
1716 Therefore, it is necessary to call update_on_commit only in case we
1717 are not using slave-preserve-commit-order here.
1718 */
1719 gtid_state->update_on_commit(thd);
1720 }
1721 } else {
1722 /*
1723 If statement is supposed to be written to binlog, we write it
1724 to the binary log. Inserting into table and releasing
1725 ownership will be done in the binlog commit handler.
1726 */
1727
1728 /*
1729 thd->cache_mngr may be uninitialized if the first transaction
1730 executed by the client is empty.
1731 */
1732 if (thd->binlog_setup_trx_data()) return 1;
1733 binlog_cache_data *cache_data = &thd_get_cache_mngr(thd)->trx_cache;
1734
1735 // Generate BEGIN event
1736 Query_log_event qinfo(thd, STRING_WITH_LEN("BEGIN"), true, false, true, 0,
1737 true);
1738 DBUG_ASSERT(!qinfo.is_using_immediate_logging());
1739
1740 /*
1741 Write BEGIN event and then commit (which will generate commit
1742 event and Gtid_log_event)
1743 */
1744 DBUG_PRINT("debug", ("Writing to trx_cache"));
1745 if (cache_data->write_event(&qinfo) || mysql_bin_log.commit(thd, true))
1746 return 1;
1747 }
1748 } else if (thd->owned_gtid.sidno == THD::OWNED_SIDNO_ANONYMOUS ||
1749 /*
1750 A transaction with an empty owned gtid should call
1751 end_gtid_violating_transaction(...) to clear the
1752 flag thd->has_gtid_consistency_violatoin in case
1753 it is set. It missed the clear in ordered_commit,
1754 because its binlog transaction cache is empty.
1755 */
1756 thd->has_gtid_consistency_violation)
1757
1758 {
1759 gtid_state->update_on_commit(thd);
1760 } else if (thd->variables.gtid_next.type == ASSIGNED_GTID &&
1761 thd->owned_gtid_is_empty()) {
1762 DBUG_ASSERT(thd->has_gtid_consistency_violation == false);
1763 gtid_state->update_on_commit(thd);
1764 }
1765
1766 return 0;
1767 }
1768
reencrypt_logs()1769 bool MYSQL_BIN_LOG::reencrypt_logs() {
1770 DBUG_TRACE;
1771
1772 if (!is_open()) return false;
1773
1774 std::string error_message;
1775 /* Gather the set of files to be accessed. */
1776 list<string> filename_list;
1777 LOG_INFO linfo;
1778 int error = 0;
1779 list<string>::reverse_iterator rit;
1780
1781 /* Read binary/relay log file names from index file. */
1782 mysql_mutex_lock(&LOCK_index);
1783 for (error = find_log_pos(&linfo, nullptr, false); !error;
1784 error = find_next_log(&linfo, false)) {
1785 filename_list.push_back(string(linfo.log_file_name));
1786 }
1787 mysql_mutex_unlock(&LOCK_index);
1788 if (error != LOG_INFO_EOF ||
1789 DBUG_EVALUATE_IF("fail_to_open_index_file", true, false)) {
1790 error_message.assign("I/O error reading index file '");
1791 error_message.append(index_file_name);
1792 error_message.append("'");
1793 goto err;
1794 }
1795
1796 rit = filename_list.rbegin();
1797 /* Skip the last binary/relay log. */
1798 if (rit != filename_list.rend()) rit++;
1799 /* Iterate backwards through binary/relay logs. */
1800 while (rit != filename_list.rend()) {
1801 const char *filename = rit->c_str();
1802 DBUG_EXECUTE_IF("purge_logs_during_reencryption", {
1803 purge_logs(filename, true, true /*need_lock_index=true*/,
1804 true /*need_update_threads=true*/, nullptr, false);
1805 });
1806 MUTEX_LOCK(lock, &LOCK_index);
1807 std::unique_ptr<Binlog_ofile> ofile(
1808 Binlog_ofile::open_existing(key_file_binlog, filename, MYF(MY_WME)));
1809
1810 if (ofile == nullptr ||
1811 DBUG_EVALUATE_IF("fail_to_open_log_file", true, false) ||
1812 DBUG_EVALUATE_IF("fail_to_read_index_file", true, false)) {
1813 /* If we can not open the log file, check if it exists in index file. */
1814 error = find_log_pos(&linfo, filename, false);
1815 DBUG_EXECUTE_IF("fail_to_read_index_file", error = LOG_INFO_IO;);
1816 if (error == LOG_INFO_EOF) {
1817 /* If it does not exist in index file, re-encryption has finished. */
1818 if (current_thd->is_error()) current_thd->clear_error();
1819 break;
1820 } else if (error == 0) {
1821 /* If it exists in index file, failed to open the log file. */
1822 error_message.assign("Failed to open log file '");
1823 error_message.append(filename);
1824 error_message.append("'");
1825 goto err;
1826 } else if (error == LOG_INFO_IO) {
1827 /* Failed to read index file. */
1828 error_message.assign("I/O error reading index file '");
1829 error_message.append(index_file_name);
1830 error_message.append("'");
1831 goto err;
1832 }
1833 }
1834
1835 if (ofile->is_encrypted()) {
1836 std::unique_ptr<Truncatable_ostream> pipeline_head =
1837 ofile->get_pipeline_head();
1838 std::unique_ptr<Binlog_encryption_ostream> binlog_encryption_ostream(
1839 down_cast<Binlog_encryption_ostream *>(pipeline_head.release()));
1840
1841 auto ret_value = binlog_encryption_ostream->reencrypt();
1842 if (ret_value.first) {
1843 error_message.assign("Failed to re-encrypt log file '");
1844 error_message.append(filename);
1845 error_message.append("': ");
1846 error_message.append(ret_value.second.c_str());
1847 goto err;
1848 }
1849 }
1850
1851 rit++;
1852 }
1853
1854 filename_list.clear();
1855
1856 return false;
1857
1858 err:
1859 if (current_thd->is_error()) current_thd->clear_error();
1860 my_error(ER_BINLOG_MASTER_KEY_ROTATION_FAIL_TO_REENCRYPT_LOG, MYF(0),
1861 error_message.c_str());
1862 filename_list.clear();
1863
1864 return true;
1865 }
1866
compress(THD * thd)1867 bool binlog_cache_data::compress(THD *thd) {
1868 DBUG_TRACE;
1869 auto error{false};
1870 auto ctype{binary_log::transaction::compression::type::NONE};
1871 auto uncompressed_size{m_cache.length()};
1872 auto size{uncompressed_size};
1873 auto &cctx{thd->rpl_thd_ctx.transaction_compression_ctx()};
1874 binary_log::transaction::compression::Compressor *compressor{nullptr};
1875
1876 // no compression enabled (ctype == NONE at this point)
1877 if (thd->variables.binlog_trx_compression == false) goto end;
1878
1879 // do not compress if there are incident events
1880 DBUG_EXECUTE_IF("binlog_compression_inject_incident", set_incident(););
1881 if (has_incident()) goto end;
1882
1883 // do not compress if there are non-transactional changes
1884 if (thd->get_transaction()->has_modified_non_trans_table(
1885 Transaction_ctx::STMT) ||
1886 thd->get_transaction()->has_modified_non_trans_table(
1887 Transaction_ctx::SESSION))
1888 goto end;
1889
1890 // do not compress if has SBR
1891 if (may_have_sbr_stmts()) goto end;
1892
1893 // Unable to get a reference to a compressor, fallback to
1894 // non compressed
1895 if ((compressor = cctx.get_compressor(thd)) == nullptr) goto end;
1896
1897 // compression is enabled and all pre-conditions checked.
1898 // now compress
1899 else {
1900 std::size_t old_capacity{0};
1901 unsigned char *buffer{nullptr};
1902 unsigned char *old_buffer{nullptr};
1903 Transaction_payload_log_event tple{thd};
1904 Compressed_ostream stream;
1905 PSI_stage_info old_stage;
1906
1907 // set the thread stage to compressing transaction
1908 thd->enter_stage(&stage_binlog_transaction_compress, &old_stage, __func__,
1909 __FILE__, __LINE__);
1910 // do we have enough compression buffer ? If not swap with a larger one
1911 std::tie(buffer, std::ignore, old_capacity) = compressor->get_buffer();
1912 if (old_capacity < size) {
1913 old_buffer = buffer;
1914 auto new_buffer = (unsigned char *)malloc(size);
1915 if (new_buffer)
1916 compressor->set_buffer(new_buffer, size);
1917 else {
1918 /* purecov: begin inspected */
1919 // OOM
1920 error = true;
1921 goto compression_end;
1922 /* purecov: end */
1923 }
1924 }
1925
1926 ctype = compressor->compression_type_code();
1927
1928 compressor->open();
1929
1930 // inject the compressor in the output stream
1931 stream.set_compressor(compressor);
1932
1933 // FIXME: innefficient, we should not copy caches around
1934 // This should be fixed when we revamp the capture
1935 // cache handling (and make this more geared towards
1936 // possible enhancements, such as streaming the changes)
1937 // Also, if the cache actually spills to disk, this may
1938 // the impact may be amplified, since reiniting the
1939 // causes a flush to disk
1940 if ((error = m_cache.copy_to(&stream))) goto compression_end;
1941
1942 compressor->close();
1943
1944 if ((error = m_cache.truncate(0))) goto compression_end;
1945 // Since we deleted all events from the cache, we also need to
1946 // reset event_counter.
1947 event_counter = 0;
1948
1949 // fill in the new transport event
1950 std::tie(buffer, size, std::ignore) = compressor->get_buffer();
1951 tple.set_payload((const char *)buffer);
1952 tple.set_payload_size(size);
1953 tple.set_compression_type(ctype);
1954 tple.set_uncompressed_size(uncompressed_size);
1955
1956 // write back the new cache contents
1957 error = write_event(&tple);
1958
1959 compression_end:
1960 // revert back to the default buffer, so that we don't overuse memory
1961 if (old_buffer) {
1962 std::tie(buffer, std::ignore, std::ignore) = compressor->get_buffer();
1963 compressor->set_buffer(old_buffer, old_capacity);
1964 free(buffer);
1965 }
1966
1967 // revert the stage if needed
1968 if (old_stage.m_key != 0) THD_STAGE_INFO(thd, old_stage);
1969 }
1970
1971 end:
1972 if (!error) {
1973 set_compression_type(ctype);
1974 set_compressed_size(m_cache.length());
1975 set_decompressed_size(uncompressed_size);
1976 }
1977 return error;
1978 }
1979
1980 /**
1981 This function finalizes the cache preparing for commit or rollback.
1982
1983 The function just writes all the necessary events to the cache but
1984 does not flush the data to the binary log file. That is the role of
1985 the binlog_cache_data::flush function.
1986
1987 @see binlog_cache_data::flush
1988
1989 @param thd The thread whose transaction should be flushed
1990 @param end_event The end event either commit/rollback
1991
1992 @return
1993 nonzero if an error pops up when flushing the cache.
1994 */
finalize(THD * thd,Log_event * end_event)1995 int binlog_cache_data::finalize(THD *thd, Log_event *end_event) {
1996 DBUG_TRACE;
1997 if (!is_binlog_empty()) {
1998 DBUG_ASSERT(!flags.finalized);
1999 if (int error = flush_pending_event(thd)) return error;
2000 if (int error = write_event(end_event)) return error;
2001 if (int error = this->compress(thd)) return error;
2002 DBUG_PRINT("debug", ("flags.finalized: %s", YESNO(flags.finalized)));
2003 flags.finalized = true;
2004 }
2005 return 0;
2006 }
2007
2008 /**
2009 The method writes XA END query to XA-prepared transaction's cache
2010 and calls the "basic" finalize().
2011
2012 @return error code, 0 success
2013 */
2014
finalize(THD * thd,Log_event * end_event,XID_STATE * xs)2015 int binlog_cache_data::finalize(THD *thd, Log_event *end_event, XID_STATE *xs) {
2016 int error = 0;
2017 char buf[XID::ser_buf_size];
2018 char query[sizeof("XA END") + 1 + sizeof(buf)];
2019 int qlen = sprintf(query, "XA END %s", xs->get_xid()->serialize(buf));
2020 Query_log_event qev(thd, query, qlen, true, false, true, 0);
2021
2022 if ((error = write_event(&qev))) return error;
2023
2024 return finalize(thd, end_event);
2025 }
2026
2027 /**
2028 Flush caches to the binary log.
2029
2030 If the cache is finalized, the cache will be flushed to the binary
2031 log file. If the cache is not finalized, nothing will be done.
2032
2033 If flushing fails for any reason, an error will be reported and the
2034 cache will be reset. Flushing can fail in two circumstances:
2035
2036 - It was not possible to write the cache to the file. In this case,
2037 it does not make sense to keep the cache.
2038
2039 - The cache was successfully written to disk but post-flush actions
2040 (such as binary log rotation) failed. In this case, the cache is
2041 already written to disk and there is no reason to keep it.
2042
2043 @see binlog_cache_data::finalize
2044 */
flush(THD * thd,my_off_t * bytes_written,bool * wrote_xid)2045 int binlog_cache_data::flush(THD *thd, my_off_t *bytes_written,
2046 bool *wrote_xid) {
2047 /*
2048 Doing a commit or a rollback including non-transactional tables,
2049 i.e., ending a transaction where we might write the transaction
2050 cache to the binary log.
2051
2052 We can always end the statement when ending a transaction since
2053 transactions are not allowed inside stored functions. If they
2054 were, we would have to ensure that we're not ending a statement
2055 inside a stored function.
2056 */
2057 DBUG_TRACE;
2058 DBUG_PRINT("debug", ("flags.finalized: %s", YESNO(flags.finalized)));
2059 int error = 0;
2060 if (flags.finalized) {
2061 my_off_t bytes_in_cache = m_cache.length();
2062 Transaction_ctx *trn_ctx = thd->get_transaction();
2063
2064 DBUG_PRINT("debug", ("bytes_in_cache: %llu", bytes_in_cache));
2065
2066 trn_ctx->sequence_number = mysql_bin_log.m_dependency_tracker.step();
2067
2068 /*
2069 In case of two caches the transaction is split into two groups.
2070 The 2nd group is considered to be a successor of the 1st rather
2071 than to have a common commit parent with it.
2072 Notice that due to a simple method of detection that the current is
2073 the 2nd cache being flushed, the very first few transactions may be logged
2074 sequentially (a next one is tagged as if a preceding one is its
2075 commit parent).
2076 */
2077 if (trn_ctx->last_committed == SEQ_UNINIT)
2078 trn_ctx->last_committed = trn_ctx->sequence_number - 1;
2079
2080 /*
2081 The GTID is written prior to flushing the statement cache, if
2082 the transaction has written to the statement cache; and prior to
2083 flushing the transaction cache if the transaction has written to
2084 the transaction cache. If GTIDs are enabled, then transactional
2085 and non-transactional updates cannot be mixed, so at most one of
2086 the caches can be non-empty, so just one GTID will be
2087 generated. If GTIDs are disabled, then no GTID is generated at
2088 all; if both the transactional cache and the statement cache are
2089 non-empty then we get two Anonymous_gtid_log_events, which is
2090 correct.
2091 */
2092 Binlog_event_writer writer(mysql_bin_log.get_binlog_file());
2093
2094 /* The GTID ownership process might set the commit_error */
2095 error = (thd->commit_error == THD::CE_FLUSH_ERROR);
2096
2097 DBUG_EXECUTE_IF("simulate_binlog_flush_error", {
2098 if (rand() % 3 == 0) {
2099 thd->commit_error = THD::CE_FLUSH_ERROR;
2100 }
2101 };);
2102
2103 DBUG_EXECUTE_IF("fault_injection_reinit_io_cache_while_flushing_to_file",
2104 { DBUG_SET("+d,fault_injection_reinit_io_cache"); });
2105
2106 if (!error)
2107 if ((error = mysql_bin_log.write_transaction(thd, this, &writer)))
2108 thd->commit_error = THD::CE_FLUSH_ERROR;
2109
2110 DBUG_EXECUTE_IF("fault_injection_reinit_io_cache_while_flushing_to_file",
2111 { DBUG_SET("-d,fault_injection_reinit_io_cache"); });
2112
2113 if (flags.with_xid && error == 0) *wrote_xid = true;
2114
2115 /*
2116 Reset have to be after the if above, since it clears the
2117 with_xid flag
2118 */
2119 reset();
2120 if (bytes_written) *bytes_written = bytes_in_cache;
2121 }
2122 DBUG_ASSERT(!flags.finalized);
2123 return error;
2124 }
2125
2126 /**
2127 This function truncates the transactional cache upon committing or rolling
2128 back either a transaction or a statement.
2129
2130 @param thd The thread whose transaction should be flushed
2131 @param all @c true means truncate the transaction, otherwise the
2132 statement must be truncated.
2133
2134 @return
2135 nonzero if an error pops up when truncating the transactional cache.
2136 */
truncate(THD * thd,bool all)2137 int binlog_trx_cache_data::truncate(THD *thd, bool all) {
2138 DBUG_TRACE;
2139 int error = 0;
2140
2141 DBUG_PRINT("info",
2142 ("thd->options={ %s %s}, transaction: %s",
2143 FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
2144 FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
2145 all ? "all" : "stmt"));
2146
2147 remove_pending_event();
2148
2149 /*
2150 If rolling back an entire transaction or a single statement not
2151 inside a transaction, we reset the transaction cache.
2152 Even though formally the atomic DDL statement may not end multi-statement
2153 transaction the cache needs full resetting as there must
2154 be no other data in it but belonging to the DDL.
2155 */
2156 if (ending_trans(thd, all)) {
2157 if (has_incident()) {
2158 const char *err_msg =
2159 "Error happend while resetting the transaction "
2160 "cache for a rolled back transaction or a single "
2161 "statement not inside a transaction.";
2162 error = mysql_bin_log.write_incident(thd, true /*need_lock_log=true*/,
2163 err_msg);
2164 }
2165 reset();
2166 }
2167 /*
2168 If rolling back a statement in a transaction, we truncate the
2169 transaction cache to remove the statement.
2170 */
2171 else if (get_prev_position() != MY_OFF_T_UNDEF)
2172 restore_prev_position();
2173
2174 thd->clear_binlog_table_maps();
2175
2176 return error;
2177 }
2178
get_xa_opt(THD * thd)2179 inline enum xa_option_words get_xa_opt(THD *thd) {
2180 enum xa_option_words xa_opt = XA_NONE;
2181 switch (thd->lex->sql_command) {
2182 case SQLCOM_XA_COMMIT:
2183 xa_opt =
2184 static_cast<Sql_cmd_xa_commit *>(thd->lex->m_sql_cmd)->get_xa_opt();
2185 break;
2186 default:
2187 break;
2188 }
2189
2190 return xa_opt;
2191 }
2192
2193 /**
2194 Predicate function yields true when XA transaction is
2195 being logged having a proper state ready for prepare or
2196 commit in one phase.
2197
2198 @param thd THD pointer of running transaction
2199 @return true When the being prepared transaction should be binlogged,
2200 false otherwise.
2201 */
2202
is_loggable_xa_prepare(THD * thd)2203 inline bool is_loggable_xa_prepare(THD *thd) {
2204 /*
2205 simulate_commit_failure is doing a trick with XID_STATE while
2206 the ongoing transaction is not XA, and therefore to be errored out,
2207 asserted below. In that case because of the
2208 latter fact the function returns @c false.
2209 */
2210 DBUG_EXECUTE_IF("simulate_commit_failure", {
2211 XID_STATE *xs = thd->get_transaction()->xid_state();
2212 DBUG_ASSERT((thd->is_error() && xs->get_state() == XID_STATE::XA_IDLE) ||
2213 xs->get_state() == XID_STATE::XA_NOTR);
2214 });
2215
2216 return DBUG_EVALUATE_IF(
2217 "simulate_commit_failure", false,
2218 thd->get_transaction()->xid_state()->has_state(XID_STATE::XA_IDLE));
2219 }
2220
binlog_prepare(handlerton *,THD * thd,bool all)2221 static int binlog_prepare(handlerton *, THD *thd, bool all) {
2222 DBUG_TRACE;
2223 if (!all) {
2224 thd->get_transaction()->store_commit_parent(
2225 mysql_bin_log.m_dependency_tracker.get_max_committed_timestamp());
2226 }
2227
2228 return all && is_loggable_xa_prepare(thd) ? mysql_bin_log.commit(thd, true)
2229 : 0;
2230 }
2231
2232 /**
2233 Logging XA commit/rollback of a prepared transaction.
2234
2235 The function is called at XA-commit or XA-rollback logging via
2236 two paths: the recovered-or-slave-applier or immediately through
2237 the XA-prepared transaction connection itself.
2238 It fills in appropiate event in the statement cache whenever
2239 xid state is marked with is_binlogged() flag that indicates
2240 the prepared part of the transaction must've been logged.
2241
2242 About early returns from the function.
2243 In the recovered-or-slave-applier case the function may be called
2244 for the 2nd time, which has_logged_xid monitors.
2245 ONE_PHASE option to XA-COMMIT is handled to skip
2246 writing XA-commit event now.
2247 And the final early return check is for the read-only XA that is
2248 not to be logged.
2249
2250 @param thd THD handle
2251 @param xid a pointer to XID object that is serialized
2252 @param commit when @c true XA-COMMIT is to be logged,
2253 and @c false when it's XA-ROLLBACK.
2254 @return error code, 0 success
2255 */
2256
do_binlog_xa_commit_rollback(THD * thd,XID * xid,bool commit)2257 inline int do_binlog_xa_commit_rollback(THD *thd, XID *xid, bool commit) {
2258 DBUG_ASSERT(thd->lex->sql_command == SQLCOM_XA_COMMIT ||
2259 thd->lex->sql_command == SQLCOM_XA_ROLLBACK);
2260
2261 XID_STATE *xid_state = thd->get_transaction()->xid_state();
2262 binlog_cache_mngr *cache_mngr = thd_get_cache_mngr(thd);
2263
2264 if (cache_mngr != nullptr && cache_mngr->has_logged_xid) return 0;
2265
2266 if (get_xa_opt(thd) == XA_ONE_PHASE) return 0;
2267 if (!xid_state->is_binlogged())
2268 return 0; // nothing was really logged at prepare
2269 if (thd->is_error() && DBUG_EVALUATE_IF("simulate_xa_rm_error", 0, 1))
2270 return 0; // don't binlog if there are some errors.
2271
2272 DBUG_ASSERT(!xid->is_null() ||
2273 !(thd->variables.option_bits & OPTION_BIN_LOG));
2274
2275 char buf[XID::ser_buf_size];
2276 char query[(sizeof("XA ROLLBACK")) + 1 + sizeof(buf)];
2277 int qlen = sprintf(query, "XA %s %s", commit ? "COMMIT" : "ROLLBACK",
2278 xid->serialize(buf));
2279 Query_log_event qinfo(thd, query, qlen, false, true, true, 0, false);
2280 return mysql_bin_log.write_event(&qinfo);
2281 }
2282
2283 /**
2284 Logging XA commit/rollback of a prepared transaction in the case
2285 it was disconnected and resumed (recovered), or executed by a slave applier.
2286
2287 @param thd THD handle
2288 @param xid a pointer to XID object
2289 @param commit when @c true XA-COMMIT is logged, otherwise XA-ROLLBACK
2290
2291 @return error code, 0 success
2292 */
2293
binlog_xa_commit_or_rollback(THD * thd,XID * xid,bool commit)2294 inline xa_status_code binlog_xa_commit_or_rollback(THD *thd, XID *xid,
2295 bool commit) {
2296 int error = 0;
2297
2298 #ifndef DBUG_OFF
2299 {
2300 binlog_cache_mngr *cache_mngr = thd_get_cache_mngr(thd);
2301 DBUG_ASSERT(!cache_mngr || !cache_mngr->has_logged_xid);
2302 }
2303 #endif
2304 if (!(error = do_binlog_xa_commit_rollback(thd, xid, commit))) {
2305 /*
2306 Error can't be propagated naturally via result.
2307 A grand-caller has to access to it through thd's da.
2308 todo:
2309 Bug #20488921 ERROR PROPAGATION DOES FULLY WORK IN XA
2310 stands in the way of implementing a failure simulation
2311 for XA PREPARE/COMMIT/ROLLBACK.
2312 */
2313 binlog_cache_mngr *cache_mngr = thd_get_cache_mngr(thd);
2314
2315 if (cache_mngr) cache_mngr->has_logged_xid = true;
2316 if (commit)
2317 error = mysql_bin_log.commit(thd, true);
2318 else
2319 error = mysql_bin_log.rollback(thd, true);
2320 if (cache_mngr) cache_mngr->has_logged_xid = false;
2321 }
2322
2323 return error == TC_LOG::RESULT_SUCCESS ? XA_OK : XAER_RMERR;
2324 }
2325
binlog_xa_commit(handlerton *,XID * xid)2326 static xa_status_code binlog_xa_commit(handlerton *, XID *xid) {
2327 return binlog_xa_commit_or_rollback(current_thd, xid, true);
2328 }
2329
binlog_xa_rollback(handlerton *,XID * xid)2330 static xa_status_code binlog_xa_rollback(handlerton *, XID *xid) {
2331 return binlog_xa_commit_or_rollback(current_thd, xid, false);
2332 }
2333
2334 /**
2335 When a fatal error occurs due to which binary logging becomes impossible and
2336 the user specified binlog_error_action= ABORT_SERVER the following function is
2337 invoked. This function pushes the appropriate error message to client and logs
2338 the same to server error log and then aborts the server.
2339
2340 @param err_string Error string which specifies the exact error
2341 message from the caller.
2342
2343 @retval
2344 none
2345 */
exec_binlog_error_action_abort(const char * err_string)2346 static void exec_binlog_error_action_abort(const char *err_string) {
2347 THD *thd = current_thd;
2348 /*
2349 When the code enters here it means that there was an error at higher layer
2350 and my_error function could have been invoked to let the client know what
2351 went wrong during the execution.
2352
2353 But these errors will not let the client know that the server is going to
2354 abort. Even if we add an additional my_error function call at this point
2355 client will be able to see only the first error message that was set
2356 during the very first invocation of my_error function call.
2357
2358 The advantage of having multiple my_error function calls are visible when
2359 the server is up and running and user issues SHOW WARNINGS or SHOW ERROR
2360 calls. In this special scenario server will be immediately aborted and
2361 user will not be able execute the above SHOW commands.
2362
2363 Hence we clear the previous errors and push one critical error message to
2364 clients.
2365 */
2366 if (thd) {
2367 if (thd->is_error()) thd->clear_error();
2368 /*
2369 Send error to both client and to the server error log.
2370 */
2371 my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(ME_FATALERROR), err_string);
2372 }
2373
2374 LogErr(ERROR_LEVEL, ER_BINLOG_LOGGING_NOT_POSSIBLE, err_string);
2375 flush_error_log_messages();
2376
2377 if (thd) thd->send_statement_status();
2378 abort();
2379 }
2380
2381 /**
2382 This function is called once after each statement.
2383
2384 @todo This function is currently not used any more and will
2385 eventually be eliminated. The real commit job is done in the
2386 MYSQL_BIN_LOG::commit function.
2387
2388 @see MYSQL_BIN_LOG::commit
2389
2390 @see handlerton::commit
2391 */
binlog_commit(handlerton *,THD *,bool)2392 static int binlog_commit(handlerton *, THD *, bool) {
2393 DBUG_TRACE;
2394 /*
2395 Nothing to do (any more) on commit.
2396 */
2397 return 0;
2398 }
2399
2400 /**
2401 This function is called when a transaction or a statement is rolled back.
2402
2403 @internal It is necessary to execute a rollback here if the
2404 transaction was rolled back because of executing a ROLLBACK TO
2405 SAVEPOINT command, but it is not used for normal rollback since
2406 MYSQL_BIN_LOG::rollback is called in that case.
2407
2408 @todo Refactor code to introduce a <code>MYSQL_BIN_LOG::rollback(THD
2409 *thd, SAVEPOINT *sv)</code> function in @c TC_LOG and have that
2410 function execute the necessary work to rollback to a savepoint.
2411
2412 @param thd The client thread that executes the transaction.
2413 @param all This is @c true if this is a real transaction rollback, and
2414 @false otherwise.
2415
2416 @see handlerton::rollback
2417 */
binlog_rollback(handlerton *,THD * thd,bool all)2418 static int binlog_rollback(handlerton *, THD *thd, bool all) {
2419 DBUG_TRACE;
2420 int error = 0;
2421 if (thd->lex->sql_command == SQLCOM_ROLLBACK_TO_SAVEPOINT)
2422 error = mysql_bin_log.rollback(thd, all);
2423 return error;
2424 }
2425
2426 /**
2427 Write a rollback record of the transaction to the binary log.
2428
2429 For binary log group commit, the rollback is separated into three
2430 parts:
2431
2432 1. First part consists of filling the necessary caches and
2433 finalizing them (if they need to be finalized). After a cache is
2434 finalized, nothing can be added to the cache.
2435
2436 2. Second part execute an ordered flush and commit. This will be
2437 done using the group commit functionality in @c ordered_commit.
2438
2439 Since we roll back the transaction early, we call @c
2440 ordered_commit with the @c skip_commit flag set. The @c
2441 ha_commit_low call inside @c ordered_commit will then not be
2442 called.
2443
2444 3. Third part checks any errors resulting from the flush and handles
2445 them appropriately.
2446
2447 @see MYSQL_BIN_LOG::ordered_commit
2448 @see ha_commit_low
2449 @see ha_rollback_low
2450
2451 @param thd Session to commit
2452 @param all This is @c true if this is a real transaction rollback, and
2453 @c false otherwise.
2454
2455 @return Error code, or zero if there were no error.
2456 */
2457
rollback(THD * thd,bool all)2458 int MYSQL_BIN_LOG::rollback(THD *thd, bool all) {
2459 int error = 0;
2460 bool stuff_logged = false;
2461 binlog_cache_mngr *cache_mngr = thd_get_cache_mngr(thd);
2462 bool is_empty = false;
2463
2464 DBUG_TRACE;
2465 DBUG_PRINT("enter",
2466 ("all: %s, cache_mngr: 0x%llx, thd->is_error: %s", YESNO(all),
2467 (ulonglong)cache_mngr, YESNO(thd->is_error())));
2468 /*
2469 Defer XA-transaction rollback until its XA-rollback event is recorded.
2470 When we are executing a ROLLBACK TO SAVEPOINT, we
2471 should only clear the caches since this function is called as part
2472 of the engine rollback.
2473 In other cases we roll back the transaction in the engines early
2474 since this will release locks and allow other transactions to
2475 start executing.
2476 */
2477 if (thd->lex->sql_command == SQLCOM_XA_ROLLBACK) {
2478 XID_STATE *xs = thd->get_transaction()->xid_state();
2479
2480 DBUG_ASSERT(all || !xs->is_binlogged() ||
2481 (!xs->is_in_recovery() && thd->is_error()));
2482 /*
2483 Whenever cache_mngr is not initialized, the xa prepared
2484 transaction's binary logging status must not be set, unless the
2485 transaction is rolled back through an external connection which
2486 has binlogging switched off.
2487 */
2488 DBUG_ASSERT(cache_mngr || !xs->is_binlogged() ||
2489 !(is_open() && thd->variables.option_bits & OPTION_BIN_LOG));
2490
2491 is_empty = !xs->is_binlogged();
2492 if ((error = do_binlog_xa_commit_rollback(thd, xs->get_xid(), false)))
2493 goto end;
2494 cache_mngr = thd_get_cache_mngr(thd);
2495 } else if (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT)
2496 if ((error = ha_rollback_low(thd, all))) goto end;
2497
2498 /*
2499 If there is no cache manager, or if there is nothing in the
2500 caches, there are no caches to roll back, so we're trivially done
2501 unless XA-ROLLBACK that yet to run rollback_low().
2502 */
2503 if (cache_mngr == nullptr || cache_mngr->is_binlog_empty()) {
2504 goto end;
2505 }
2506
2507 DBUG_PRINT("debug", ("all.cannot_safely_rollback(): %s, trx_cache_empty: %s",
2508 YESNO(thd->get_transaction()->cannot_safely_rollback(
2509 Transaction_ctx::SESSION)),
2510 YESNO(cache_mngr->trx_cache.is_binlog_empty())));
2511 DBUG_PRINT("debug",
2512 ("stmt.cannot_safely_rollback(): %s, stmt_cache_empty: %s",
2513 YESNO(thd->get_transaction()->cannot_safely_rollback(
2514 Transaction_ctx::STMT)),
2515 YESNO(cache_mngr->stmt_cache.is_binlog_empty())));
2516
2517 /*
2518 If an incident event is set we do not flush the content of the statement
2519 cache because it may be corrupted.
2520 */
2521 if (cache_mngr->stmt_cache.has_incident()) {
2522 const char *err_msg =
2523 "The content of the statement cache is corrupted "
2524 "while writing a rollback record of the transaction "
2525 "to the binary log.";
2526 error = write_incident(thd, true /*need_lock_log=true*/, err_msg);
2527 cache_mngr->stmt_cache.reset();
2528 } else if (!cache_mngr->stmt_cache.is_binlog_empty()) {
2529 if (thd->lex->sql_command == SQLCOM_CREATE_TABLE &&
2530 thd->lex->select_lex->get_fields_list()->elements && /* With select */
2531 !(thd->lex->create_info->options & HA_LEX_CREATE_TMP_TABLE) &&
2532 thd->is_current_stmt_binlog_format_row()) {
2533 /*
2534 In row based binlog format, we reset the binlog statement cache
2535 when rolling back a single statement 'CREATE...SELECT' transaction,
2536 since the 'CREATE TABLE' event was put in the binlog statement cache.
2537 */
2538 cache_mngr->stmt_cache.reset();
2539 } else {
2540 if ((error = cache_mngr->stmt_cache.finalize(thd))) goto end;
2541 stuff_logged = true;
2542 }
2543 }
2544
2545 if (ending_trans(thd, all)) {
2546 if (trans_cannot_safely_rollback(thd)) {
2547 const char xa_rollback_str[] = "XA ROLLBACK";
2548 /*
2549 sizeof(xa_rollback_str) and XID::ser_buf_size both allocate `\0',
2550 so one of the two is used for necessary in the xa case `space' char
2551 */
2552 char query[sizeof(xa_rollback_str) + XID::ser_buf_size] = "ROLLBACK";
2553 XID_STATE *xs = thd->get_transaction()->xid_state();
2554
2555 if (thd->lex->sql_command == SQLCOM_XA_ROLLBACK) {
2556 /* this block is relevant only for not prepared yet and "local" xa trx
2557 */
2558 DBUG_ASSERT(
2559 thd->get_transaction()->xid_state()->has_state(XID_STATE::XA_IDLE));
2560 DBUG_ASSERT(!cache_mngr->has_logged_xid);
2561
2562 sprintf(query, "%s ", xa_rollback_str);
2563 xs->get_xid()->serialize(query + sizeof(xa_rollback_str));
2564 }
2565 /*
2566 If the transaction is being rolled back and contains changes that
2567 cannot be rolled back, the trx-cache's content is flushed.
2568 */
2569 Query_log_event end_evt(thd, query, strlen(query), true, false, true, 0,
2570 true);
2571 error = thd->lex->sql_command != SQLCOM_XA_ROLLBACK
2572 ? cache_mngr->trx_cache.finalize(thd, &end_evt)
2573 : cache_mngr->trx_cache.finalize(thd, &end_evt, xs);
2574 stuff_logged = true;
2575 } else {
2576 /*
2577 If the transaction is being rolled back and its changes can be
2578 rolled back, the trx-cache's content is truncated.
2579 */
2580 error = cache_mngr->trx_cache.truncate(thd, all);
2581
2582 DBUG_EXECUTE_IF("ensure_binlog_cache_is_reset", {
2583 /* Assert that binlog cache is reset at rollback time. */
2584 DBUG_ASSERT(binlog_cache_is_reset);
2585 binlog_cache_is_reset = false;
2586 };);
2587 }
2588 } else {
2589 /*
2590 If a statement is being rolled back, it is necessary to know
2591 exactly why a statement may not be safely rolled back as in
2592 some specific situations the trx-cache can be truncated.
2593
2594 If a temporary table is created or dropped, the trx-cache is not
2595 truncated. Note that if the stmt-cache is used, there is nothing
2596 to truncate in the trx-cache.
2597
2598 If a non-transactional table is updated and the binlog format is
2599 statement, the trx-cache is not truncated. The trx-cache is used
2600 when the direct option is off and a transactional table has been
2601 updated before the current statement in the context of the
2602 current transaction. Note that if the stmt-cache is used there is
2603 nothing to truncate in the trx-cache.
2604
2605 If other binlog formats are used, updates to non-transactional
2606 tables are written to the stmt-cache and trx-cache can be safely
2607 truncated, if necessary.
2608 */
2609 if (thd->get_transaction()->has_dropped_temp_table(Transaction_ctx::STMT) ||
2610 thd->get_transaction()->has_created_temp_table(Transaction_ctx::STMT) ||
2611 (thd->get_transaction()->has_modified_non_trans_table(
2612 Transaction_ctx::STMT) &&
2613 thd->variables.binlog_format == BINLOG_FORMAT_STMT)) {
2614 /*
2615 If the statement is being rolled back and dropped or created a
2616 temporary table or modified a non-transactional table and the
2617 statement-based replication is in use, the statement's changes
2618 in the trx-cache are preserved.
2619 */
2620 cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
2621 } else {
2622 /*
2623 Otherwise, the statement's changes in the trx-cache are
2624 truncated.
2625 */
2626 error = cache_mngr->trx_cache.truncate(thd, all);
2627 }
2628 }
2629 if (stuff_logged) {
2630 Transaction_ctx *trn_ctx = thd->get_transaction();
2631 trn_ctx->store_commit_parent(
2632 m_dependency_tracker.get_max_committed_timestamp());
2633 }
2634
2635 DBUG_PRINT("debug", ("error: %d", error));
2636 if (error == 0 && stuff_logged) {
2637 if (RUN_HOOK(
2638 transaction, before_commit,
2639 (thd, all, thd_get_cache_mngr(thd)->get_trx_cache(),
2640 thd_get_cache_mngr(thd)->get_stmt_cache(),
2641 max<my_off_t>(max_binlog_cache_size, max_binlog_stmt_cache_size),
2642 false))) {
2643 // Reset the thread OK status before changing the outcome.
2644 if (thd->get_stmt_da()->is_ok())
2645 thd->get_stmt_da()->reset_diagnostics_area();
2646 my_error(ER_RUN_HOOK_ERROR, MYF(0), "before_commit");
2647 return RESULT_ABORTED;
2648 }
2649 #ifndef DBUG_OFF
2650 /*
2651 XA rollback is always accepted.
2652 */
2653 if (thd->get_transaction()
2654 ->get_rpl_transaction_ctx()
2655 ->is_transaction_rollback())
2656 DBUG_ASSERT(0);
2657 #endif
2658
2659 error = ordered_commit(thd, all, /* skip_commit */ true);
2660 }
2661
2662 if (check_write_error(thd)) {
2663 /*
2664 "all == true" means that a "rollback statement" triggered the error and
2665 this function was called. However, this must not happen as a rollback
2666 is written directly to the binary log. And in auto-commit mode, a single
2667 statement that is rolled back has the flag all == false.
2668 */
2669 DBUG_ASSERT(!all);
2670 /*
2671 We reach this point if the effect of a statement did not properly get into
2672 a cache and need to be rolled back.
2673 */
2674 error |= cache_mngr->trx_cache.truncate(thd, all);
2675 }
2676
2677 end:
2678 /* Deferred xa rollback to engines */
2679 if (!error && thd->lex->sql_command == SQLCOM_XA_ROLLBACK) {
2680 error = ha_rollback_low(thd, all);
2681 if (!error && !thd->is_error()) {
2682 /*
2683 XA-rollback ignores the gtid_state, if the transaciton
2684 is empty.
2685 */
2686 if (is_empty && !thd->slave_thread) gtid_state->update_on_rollback(thd);
2687 /*
2688 XA-rollback commits the new gtid_state, if transaction
2689 is not empty.
2690 */
2691 else {
2692 gtid_state->update_on_commit(thd);
2693 /*
2694 Inform hook listeners that a XA ROLLBACK did commit, that
2695 is, did log a transaction to the binary log.
2696 */
2697 (void)RUN_HOOK(transaction, after_commit, (thd, all));
2698 }
2699 }
2700 }
2701 /*
2702 When a statement errors out on auto-commit mode it is rollback
2703 implicitly, so the same should happen to its GTID.
2704 */
2705 if (!thd->in_active_multi_stmt_transaction())
2706 gtid_state->update_on_rollback(thd);
2707
2708 /*
2709 TODO: some errors are overwritten, which may cause problem,
2710 fix it later.
2711 */
2712 DBUG_PRINT("return", ("error: %d", error));
2713 return error;
2714 }
2715
2716 /**
2717 @note
2718 How do we handle this (unlikely but legal) case:
2719 @verbatim
2720 [transaction] + [update to non-trans table] + [rollback to savepoint] ?
2721 @endverbatim
2722 The problem occurs when a savepoint is before the update to the
2723 non-transactional table. Then when there's a rollback to the savepoint, if we
2724 simply truncate the binlog cache, we lose the part of the binlog cache where
2725 the update is. If we want to not lose it, we need to write the SAVEPOINT
2726 command and the ROLLBACK TO SAVEPOINT command to the binlog cache. The latter
2727 is easy: it's just write at the end of the binlog cache, but the former
2728 should be *inserted* to the place where the user called SAVEPOINT. The
2729 solution is that when the user calls SAVEPOINT, we write it to the binlog
2730 cache (so no need to later insert it). As transactions are never intermixed
2731 in the binary log (i.e. they are serialized), we won't have conflicts with
2732 savepoint names when using mysqlbinlog or in the slave SQL thread.
2733 Then when ROLLBACK TO SAVEPOINT is called, if we updated some
2734 non-transactional table, we don't truncate the binlog cache but instead write
2735 ROLLBACK TO SAVEPOINT to it; otherwise we truncate the binlog cache (which
2736 will chop the SAVEPOINT command from the binlog cache, which is good as in
2737 that case there is no need to have it in the binlog).
2738 */
2739
binlog_savepoint_set(handlerton *,THD * thd,void * sv)2740 static int binlog_savepoint_set(handlerton *, THD *thd, void *sv) {
2741 DBUG_TRACE;
2742 int error = 1;
2743
2744 String log_query;
2745 if (log_query.append(STRING_WITH_LEN("SAVEPOINT ")))
2746 return error;
2747 else
2748 append_identifier(thd, &log_query, thd->lex->ident.str,
2749 thd->lex->ident.length);
2750
2751 int errcode = query_error_code(thd, thd->killed == THD::NOT_KILLED);
2752 Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(), true,
2753 false, true, errcode);
2754 /*
2755 We cannot record the position before writing the statement
2756 because a rollback to a savepoint (.e.g. consider it "S") would
2757 prevent the savepoint statement (i.e. "SAVEPOINT S") from being
2758 written to the binary log despite the fact that the server could
2759 still issue other rollback statements to the same savepoint (i.e.
2760 "S").
2761 Given that the savepoint is valid until the server releases it,
2762 ie, until the transaction commits or it is released explicitly,
2763 we need to log it anyway so that we don't have "ROLLBACK TO S"
2764 or "RELEASE S" without the preceding "SAVEPOINT S" in the binary
2765 log.
2766 */
2767 if (!(error = mysql_bin_log.write_event(&qinfo)))
2768 binlog_trans_log_savepos(thd, (my_off_t *)sv);
2769
2770 return error;
2771 }
2772
binlog_savepoint_rollback(handlerton *,THD * thd,void * sv)2773 static int binlog_savepoint_rollback(handlerton *, THD *thd, void *sv) {
2774 DBUG_TRACE;
2775 binlog_cache_mngr *const cache_mngr = thd_get_cache_mngr(thd);
2776 my_off_t pos = *(my_off_t *)sv;
2777 DBUG_ASSERT(pos != ~(my_off_t)0);
2778
2779 /*
2780 Write ROLLBACK TO SAVEPOINT to the binlog cache if we have updated some
2781 non-transactional table. Otherwise, truncate the binlog cache starting
2782 from the SAVEPOINT command.
2783 */
2784 if (trans_cannot_safely_rollback(thd)) {
2785 String log_query;
2786 if (log_query.append(STRING_WITH_LEN("ROLLBACK TO ")))
2787 return 1;
2788 else {
2789 /*
2790 Before writing identifier to the binlog, make sure to
2791 quote the identifier properly so as to prevent any SQL
2792 injection on the slave.
2793 */
2794 append_identifier(thd, &log_query, thd->lex->ident.str,
2795 thd->lex->ident.length);
2796 }
2797
2798 int errcode = query_error_code(thd, thd->killed == THD::NOT_KILLED);
2799 Query_log_event qinfo(thd, log_query.c_ptr_safe(), log_query.length(), true,
2800 false, true, errcode);
2801 return mysql_bin_log.write_event(&qinfo);
2802 }
2803 // Otherwise, we truncate the cache
2804 cache_mngr->trx_cache.restore_savepoint(pos);
2805 /*
2806 When a SAVEPOINT is executed inside a stored function/trigger we force the
2807 pending event to be flushed with a STMT_END_F flag and clear the table maps
2808 as well to ensure that following DMLs will have a clean state to start
2809 with. ROLLBACK inside a stored routine has to finalize possibly existing
2810 current row-based pending event with cleaning up table maps. That ensures
2811 that following DMLs will have a clean state to start with.
2812 */
2813 if (thd->in_sub_stmt) thd->clear_binlog_table_maps();
2814 return 0;
2815 }
2816
2817 /**
2818 purge logs, master and slave sides both, related error code
2819 convertor.
2820 Called from @c purge_error_message(), @c MYSQL_BIN_LOG::reset_logs()
2821
2822 @param res an error code as used by purging routines
2823
2824 @return the user level error code ER_*
2825 */
purge_log_get_error_code(int res)2826 static uint purge_log_get_error_code(int res) {
2827 uint errcode = 0;
2828
2829 switch (res) {
2830 case 0:
2831 break;
2832 case LOG_INFO_EOF:
2833 errcode = ER_UNKNOWN_TARGET_BINLOG;
2834 break;
2835 case LOG_INFO_IO:
2836 errcode = ER_IO_ERR_LOG_INDEX_READ;
2837 break;
2838 case LOG_INFO_INVALID:
2839 errcode = ER_BINLOG_PURGE_PROHIBITED;
2840 break;
2841 case LOG_INFO_SEEK:
2842 errcode = ER_FSEEK_FAIL;
2843 break;
2844 case LOG_INFO_MEM:
2845 errcode = ER_OUT_OF_RESOURCES;
2846 break;
2847 case LOG_INFO_FATAL:
2848 errcode = ER_BINLOG_PURGE_FATAL_ERR;
2849 break;
2850 case LOG_INFO_IN_USE:
2851 errcode = ER_LOG_IN_USE;
2852 break;
2853 case LOG_INFO_EMFILE:
2854 errcode = ER_BINLOG_PURGE_EMFILE;
2855 break;
2856 default:
2857 errcode = ER_LOG_PURGE_UNKNOWN_ERR;
2858 break;
2859 }
2860
2861 return errcode;
2862 }
2863
2864 /**
2865 Check whether binlog state allows to safely release MDL locks after
2866 rollback to savepoint.
2867
2868 @param thd The client thread that executes the transaction.
2869
2870 @return true - It is safe to release MDL locks.
2871 false - If it is not.
2872 */
binlog_savepoint_rollback_can_release_mdl(handlerton *,THD * thd)2873 static bool binlog_savepoint_rollback_can_release_mdl(handlerton *, THD *thd) {
2874 DBUG_TRACE;
2875 /**
2876 If we have not updated any non-transactional tables rollback
2877 to savepoint will simply truncate binlog cache starting from
2878 SAVEPOINT command. So it should be safe to release MDL acquired
2879 after SAVEPOINT command in this case.
2880 */
2881 return !trans_cannot_safely_rollback(thd);
2882 }
2883
2884 /**
2885 Adjust log offset in the binary log file for all running slaves
2886 This class implements call back function for do_for_all_thd().
2887 It is called for each thd in thd list to adjust offset.
2888 */
2889 class Adjust_offset : public Do_THD_Impl {
2890 public:
Adjust_offset(my_off_t value)2891 Adjust_offset(my_off_t value) : m_purge_offset(value) {}
operator ()(THD * thd)2892 virtual void operator()(THD *thd) {
2893 LOG_INFO *linfo;
2894 mysql_mutex_lock(&thd->LOCK_thd_data);
2895 if ((linfo = thd->current_linfo)) {
2896 /*
2897 Index file offset can be less that purge offset only if
2898 we just started reading the index file. In that case
2899 we have nothing to adjust.
2900 */
2901 if (linfo->index_file_offset < m_purge_offset)
2902 linfo->fatal = (linfo->index_file_offset != 0);
2903 else
2904 linfo->index_file_offset -= m_purge_offset;
2905 }
2906 mysql_mutex_unlock(&thd->LOCK_thd_data);
2907 }
2908
2909 private:
2910 my_off_t m_purge_offset;
2911 };
2912
2913 /*
2914 Adjust the position pointer in the binary log file for all running slaves.
2915
2916 SYNOPSIS
2917 adjust_linfo_offsets()
2918 purge_offset Number of bytes removed from start of log index file
2919
2920 NOTES
2921 - This is called when doing a PURGE when we delete lines from the
2922 index log file.
2923
2924 REQUIREMENTS
2925 - Before calling this function, we have to ensure that no threads are
2926 using any binary log file before purge_offset.
2927
2928 TODO
2929 - Inform the slave threads that they should sync the position
2930 in the binary log file with flush_relay_log_info.
2931 Now they sync is done for next read.
2932 */
adjust_linfo_offsets(my_off_t purge_offset)2933 static void adjust_linfo_offsets(my_off_t purge_offset) {
2934 Adjust_offset adjust_offset(purge_offset);
2935 Global_THD_manager::get_instance()->do_for_all_thd(&adjust_offset);
2936 }
2937
2938 /**
2939 This class implements Call back function for do_for_all_thd().
2940 It is called for each thd in thd list to count
2941 threads using bin log file
2942 */
2943
2944 class Log_in_use : public Do_THD_Impl {
2945 public:
Log_in_use(const char * value)2946 Log_in_use(const char *value) : m_log_name(value), m_count(0) {
2947 m_log_name_len = strlen(m_log_name) + 1;
2948 }
operator ()(THD * thd)2949 virtual void operator()(THD *thd) {
2950 LOG_INFO *linfo;
2951 mysql_mutex_lock(&thd->LOCK_thd_data);
2952 if ((linfo = thd->current_linfo)) {
2953 if (!strncmp(m_log_name, linfo->log_file_name, m_log_name_len)) {
2954 LogErr(WARNING_LEVEL, ER_BINLOG_FILE_BEING_READ_NOT_PURGED, m_log_name,
2955 thd->thread_id());
2956 m_count++;
2957 }
2958 }
2959 mysql_mutex_unlock(&thd->LOCK_thd_data);
2960 }
get_count()2961 int get_count() { return m_count; }
2962
2963 private:
2964 const char *m_log_name;
2965 size_t m_log_name_len;
2966 int m_count;
2967 };
2968
log_in_use(const char * log_name)2969 static int log_in_use(const char *log_name) {
2970 Log_in_use log_in_use(log_name);
2971 #ifndef DBUG_OFF
2972 if (current_thd)
2973 DEBUG_SYNC(current_thd, "purge_logs_after_lock_index_before_thread_count");
2974 #endif
2975 Global_THD_manager::get_instance()->do_for_all_thd(&log_in_use);
2976 return log_in_use.get_count();
2977 }
2978
purge_error_message(THD * thd,int res)2979 static bool purge_error_message(THD *thd, int res) {
2980 uint errcode;
2981
2982 if ((errcode = purge_log_get_error_code(res)) != 0) {
2983 my_error(errcode, MYF(0));
2984 return true;
2985 }
2986 my_ok(thd);
2987 return false;
2988 }
2989
is_transaction_empty(THD * thd)2990 bool is_transaction_empty(THD *thd) {
2991 DBUG_TRACE;
2992 int rw_ha_count = check_trx_rw_engines(thd, Transaction_ctx::SESSION);
2993 rw_ha_count += check_trx_rw_engines(thd, Transaction_ctx::STMT);
2994 return rw_ha_count == 0;
2995 }
2996
check_trx_rw_engines(THD * thd,Transaction_ctx::enum_trx_scope trx_scope)2997 int check_trx_rw_engines(THD *thd, Transaction_ctx::enum_trx_scope trx_scope) {
2998 DBUG_TRACE;
2999
3000 int rw_ha_count = 0;
3001 Ha_trx_info *ha_list =
3002 (Ha_trx_info *)thd->get_transaction()->ha_trx_info(trx_scope);
3003
3004 for (Ha_trx_info *ha_info = ha_list; ha_info; ha_info = ha_info->next()) {
3005 if (ha_info->is_trx_read_write()) ++rw_ha_count;
3006 }
3007 return rw_ha_count;
3008 }
3009
is_empty_transaction_in_binlog_cache(const THD * thd)3010 bool is_empty_transaction_in_binlog_cache(const THD *thd) {
3011 DBUG_TRACE;
3012
3013 binlog_cache_mngr *const cache_mngr = thd_get_cache_mngr(thd);
3014 if (cache_mngr != nullptr && cache_mngr->has_empty_transaction()) {
3015 return true;
3016 }
3017
3018 return false;
3019 }
3020
3021 /**
3022 This function checks if a transactional table was updated by the
3023 current transaction.
3024
3025 @param thd The client thread that executed the current statement.
3026 @return
3027 @c true if a transactional table was updated, @c false otherwise.
3028 */
trans_has_updated_trans_table(const THD * thd)3029 bool trans_has_updated_trans_table(const THD *thd) {
3030 binlog_cache_mngr *const cache_mngr = thd_get_cache_mngr(thd);
3031
3032 return (cache_mngr ? !cache_mngr->trx_cache.is_binlog_empty() : 0);
3033 }
3034
3035 /**
3036 This function checks if a transactional table was updated by the
3037 current statement.
3038
3039 @param ha_list Registered storage engine handler list.
3040 @return
3041 @c true if a transactional table was updated, @c false otherwise.
3042 */
stmt_has_updated_trans_table(Ha_trx_info * ha_list)3043 bool stmt_has_updated_trans_table(Ha_trx_info *ha_list) {
3044 const Ha_trx_info *ha_info;
3045 for (ha_info = ha_list; ha_info; ha_info = ha_info->next()) {
3046 if (ha_info->is_trx_read_write() && ha_info->ht() != binlog_hton)
3047 return (true);
3048 }
3049 return (false);
3050 }
3051
3052 /**
3053 This function checks if a transaction, either a multi-statement
3054 or a single statement transaction is about to commit or not.
3055
3056 @param thd The client thread that executed the current statement.
3057 @param all Committing a transaction (i.e. true) or a statement
3058 (i.e. false).
3059 @return
3060 @c true if committing a transaction, otherwise @c false.
3061 */
ending_trans(THD * thd,const bool all)3062 bool ending_trans(THD *thd, const bool all) {
3063 return (all || ending_single_stmt_trans(thd, all));
3064 }
3065
3066 /**
3067 This function checks if a single statement transaction is about
3068 to commit or not.
3069
3070 @param thd The client thread that executed the current statement.
3071 @param all Committing a transaction (i.e. true) or a statement
3072 (i.e. false).
3073 @return
3074 @c true if committing a single statement transaction, otherwise
3075 @c false.
3076 */
ending_single_stmt_trans(THD * thd,const bool all)3077 bool ending_single_stmt_trans(THD *thd, const bool all) {
3078 return (!all && !thd->in_multi_stmt_transaction_mode());
3079 }
3080
3081 /**
3082 This function checks if a transaction cannot be rolled back safely.
3083
3084 @param thd The client thread that executed the current statement.
3085 @return
3086 @c true if cannot be safely rolled back, @c false otherwise.
3087 */
trans_cannot_safely_rollback(const THD * thd)3088 bool trans_cannot_safely_rollback(const THD *thd) {
3089 binlog_cache_mngr *const cache_mngr = thd_get_cache_mngr(thd);
3090
3091 return cache_mngr->trx_cache.cannot_rollback();
3092 }
3093
3094 /**
3095 This function checks if current statement cannot be rollded back safely.
3096
3097 @param thd The client thread that executed the current statement.
3098 @return
3099 @c true if cannot be safely rolled back, @c false otherwise.
3100 */
stmt_cannot_safely_rollback(const THD * thd)3101 bool stmt_cannot_safely_rollback(const THD *thd) {
3102 return thd->get_transaction()->cannot_safely_rollback(Transaction_ctx::STMT);
3103 }
3104
3105 /**
3106 Execute a PURGE BINARY LOGS TO @<log@> command.
3107
3108 @param thd Pointer to THD object for the client thread executing the
3109 statement.
3110
3111 @param to_log Name of the last log to purge.
3112
3113 @retval false success
3114 @retval true failure
3115 */
purge_master_logs(THD * thd,const char * to_log)3116 bool purge_master_logs(THD *thd, const char *to_log) {
3117 char search_file_name[FN_REFLEN];
3118 if (!mysql_bin_log.is_open()) {
3119 my_ok(thd);
3120 return false;
3121 }
3122
3123 mysql_bin_log.make_log_name(search_file_name, to_log);
3124 return purge_error_message(
3125 thd, mysql_bin_log.purge_logs(
3126 search_file_name, false, true /*need_lock_index=true*/,
3127 true /*need_update_threads=true*/, nullptr, false));
3128 }
3129
3130 /**
3131 Execute a PURGE BINARY LOGS BEFORE @<date@> command.
3132
3133 @param thd Pointer to THD object for the client thread executing the
3134 statement.
3135
3136 @param purge_time Date before which logs should be purged.
3137
3138 @retval false success
3139 @retval true failure
3140 */
purge_master_logs_before_date(THD * thd,time_t purge_time)3141 bool purge_master_logs_before_date(THD *thd, time_t purge_time) {
3142 if (!mysql_bin_log.is_open()) {
3143 my_ok(thd);
3144 return false;
3145 }
3146 return purge_error_message(
3147 thd, mysql_bin_log.purge_logs_before_date(purge_time, false));
3148 }
3149
3150 /*
3151 Helper function to get the error code of the query to be binlogged.
3152 */
query_error_code(const THD * thd,bool not_killed)3153 int query_error_code(const THD *thd, bool not_killed) {
3154 int error;
3155
3156 if (not_killed) {
3157 error = thd->is_error() ? thd->get_stmt_da()->mysql_errno() : 0;
3158
3159 /* thd->get_stmt_da()->sql_errno() might be ER_SERVER_SHUTDOWN or
3160 ER_QUERY_INTERRUPTED, So here we need to make sure that error
3161 is not set to these errors when specified not_killed by the
3162 caller.
3163 */
3164 if (error == ER_SERVER_SHUTDOWN || error == ER_QUERY_INTERRUPTED) error = 0;
3165 } else
3166 error = thd->killed;
3167
3168 return error;
3169 }
3170
3171 /**
3172 Copy content of 'from' file from offset to 'to' file.
3173
3174 - We do the copy outside of the IO_CACHE as the cache
3175 buffers would just make things slower and more complicated.
3176 In most cases the copy loop should only do one read.
3177
3178 @param from File to copy.
3179 @param to File to copy to.
3180 @param offset Offset in 'from' file.
3181
3182
3183 @retval
3184 0 ok
3185 @retval
3186 -1 error
3187 */
copy_file(IO_CACHE * from,IO_CACHE * to,my_off_t offset)3188 static bool copy_file(IO_CACHE *from, IO_CACHE *to, my_off_t offset) {
3189 int bytes_read;
3190 uchar io_buf[IO_SIZE * 2];
3191 DBUG_TRACE;
3192
3193 mysql_file_seek(from->file, offset, MY_SEEK_SET, MYF(0));
3194 while (true) {
3195 if ((bytes_read = (int)mysql_file_read(from->file, io_buf, sizeof(io_buf),
3196 MYF(MY_WME))) < 0)
3197 goto err;
3198 if (DBUG_EVALUATE_IF("fault_injection_copy_part_file", 1, 0))
3199 bytes_read = bytes_read / 2;
3200 if (!bytes_read) break; // end of file
3201 if (mysql_file_write(to->file, io_buf, bytes_read, MYF(MY_WME | MY_NABP)))
3202 goto err;
3203 }
3204
3205 return false;
3206
3207 err:
3208 return true;
3209 }
3210
3211 /**
3212 Load data's io cache specific hook to be executed
3213 before a chunk of data is being read into the cache's buffer
3214 The fuction instantianates and writes into the binlog
3215 replication events along LOAD DATA processing.
3216
3217 @param file pointer to io-cache
3218 @retval 0 success
3219 @retval 1 failure
3220 */
log_loaded_block(IO_CACHE * file)3221 int log_loaded_block(IO_CACHE *file) {
3222 DBUG_TRACE;
3223 LOAD_FILE_INFO *lf_info;
3224 uint block_len;
3225 /* buffer contains position where we started last read */
3226 uchar *buffer = (uchar *)my_b_get_buffer_start(file);
3227 uint max_event_size = current_thd->variables.max_allowed_packet;
3228 lf_info = (LOAD_FILE_INFO *)file->arg;
3229 if (lf_info->thd->is_current_stmt_binlog_format_row()) return 0;
3230 if (lf_info->last_pos_in_file != HA_POS_ERROR &&
3231 lf_info->last_pos_in_file >= my_b_get_pos_in_file(file))
3232 return 0;
3233
3234 for (block_len = (uint)(my_b_get_bytes_in_buffer(file)); block_len > 0;
3235 buffer += min(block_len, max_event_size),
3236 block_len -= min(block_len, max_event_size)) {
3237 lf_info->last_pos_in_file = my_b_get_pos_in_file(file);
3238 if (lf_info->logged_data_file) {
3239 Append_block_log_event a(lf_info->thd, lf_info->thd->db().str, buffer,
3240 min(block_len, max_event_size),
3241 lf_info->log_delayed);
3242 if (mysql_bin_log.write_event(&a)) return 1;
3243 } else {
3244 Begin_load_query_log_event b(lf_info->thd, lf_info->thd->db().str, buffer,
3245 min(block_len, max_event_size),
3246 lf_info->log_delayed);
3247 if (mysql_bin_log.write_event(&b)) return 1;
3248 lf_info->logged_data_file = true;
3249 }
3250 }
3251 return 0;
3252 }
3253
3254 /* Helper function for SHOW BINLOG/RELAYLOG EVENTS */
3255 template <class BINLOG_FILE_READER>
show_binlog_events(THD * thd,MYSQL_BIN_LOG * binary_log)3256 bool show_binlog_events(THD *thd, MYSQL_BIN_LOG *binary_log) {
3257 Protocol *protocol = thd->get_protocol();
3258 List<Item> field_list;
3259 std::string errmsg;
3260 LOG_INFO linfo;
3261
3262 DBUG_TRACE;
3263
3264 DBUG_ASSERT(thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS ||
3265 thd->lex->sql_command == SQLCOM_SHOW_RELAYLOG_EVENTS);
3266
3267 if (binary_log->is_open()) {
3268 LEX_MASTER_INFO *lex_mi = &thd->lex->mi;
3269 SELECT_LEX_UNIT *unit = thd->lex->unit;
3270 ha_rows event_count, limit_start, limit_end;
3271 my_off_t pos =
3272 max<my_off_t>(BIN_LOG_HEADER_SIZE, lex_mi->pos); // user-friendly
3273 char search_file_name[FN_REFLEN], *name;
3274 const char *log_file_name = lex_mi->log_file_name;
3275 Log_event *ev = nullptr;
3276
3277 unit->set_limit(thd, thd->lex->current_select());
3278 limit_start = unit->offset_limit_cnt;
3279 limit_end = unit->select_limit_cnt;
3280
3281 name = search_file_name;
3282 if (log_file_name)
3283 binary_log->make_log_name(search_file_name, log_file_name);
3284 else
3285 name = nullptr; // Find first log
3286
3287 linfo.index_file_offset = 0;
3288
3289 if (binary_log->find_log_pos(&linfo, name, true /*need_lock_index=true*/)) {
3290 errmsg = "Could not find target log";
3291 goto err;
3292 }
3293
3294 mysql_mutex_lock(&thd->LOCK_thd_data);
3295 thd->current_linfo = &linfo;
3296 mysql_mutex_unlock(&thd->LOCK_thd_data);
3297
3298 BINLOG_FILE_READER binlog_file_reader(
3299 opt_master_verify_checksum,
3300 std::max(thd->variables.max_allowed_packet,
3301 binlog_row_event_max_size + MAX_LOG_EVENT_HEADER));
3302
3303 if (binlog_file_reader.open(linfo.log_file_name, pos)) {
3304 errmsg = binlog_file_reader.get_error_str();
3305 goto err;
3306 }
3307
3308 /*
3309 Adjust the pos to the correct starting offset of an event after the
3310 specified position if it is an invalid starting offset.
3311 */
3312 pos = binlog_file_reader.position();
3313
3314 /*
3315 For 'in-active' binlog file, it is safe to read all events in it. But
3316 for 'active' binlog file, it is only safe to read the events before
3317 get_binlog_end_pos().
3318
3319 Binlog rotation may happen after calling is_active(). In this case,
3320 end_pos will NOT be set to 0 while the file is actually not 'active'.
3321 It is safe, since 'end_pos' still expresses a correct position.
3322 */
3323 my_off_t end_pos = binary_log->get_binlog_end_pos();
3324 if (!binary_log->is_active(linfo.log_file_name)) end_pos = 0;
3325
3326 DEBUG_SYNC(thd, "after_show_binlog_event_found_file");
3327
3328 /**
3329 Relaylog_file_reader and Binlog_file_reader are typedefs to
3330 Basic_binlog_file_reader whereas Relaylog_file_reader uses
3331 a Relaylog_ifile in the template instantiation and
3332 Binlog_file_reader uses a Binlog_ifile in the template
3333 instantiation.
3334
3335 Binlog_ifile and Relaylog_ifile differ only in the open()
3336 member function and they both derive from Basic_binlog_ifile.
3337
3338 Therefore, it is OK to cast to Binlog_file_reader here.
3339
3340 TODO: in the future investigate if some refactoring is needed
3341 here. Perhaps make the Iterator itself templated.
3342 */
3343 binlog::tools::Iterator it(
3344 reinterpret_cast<Binlog_file_reader *>(&binlog_file_reader));
3345
3346 /*
3347 Unpacked events shall copy their part of the buffer from uncompressed
3348 buffer (the cointainer, i.e., the buffer iterator goes out of scope
3349 once the events are inflated and put in a vector). However, it is
3350 unclear if the *buffer* from which events are deserialized is still
3351 needed for the porposes of displaying events in SHOW BINLOG/RELAYLOG
3352 EVENTS.
3353 */
3354 my_off_t last_log_pos = 0;
3355 for (event_count = 0, ev = it.begin(); ev != it.end();) {
3356 DEBUG_SYNC(thd, "wait_in_show_binlog_events_loop");
3357 if (event_count >= limit_start &&
3358 ev->net_send(protocol, linfo.log_file_name, pos)) {
3359 /* purecov: begin inspected */
3360 errmsg = "Net error";
3361 delete ev;
3362 ev = nullptr;
3363 goto err;
3364 /* purecov: end */
3365 }
3366 last_log_pos = ev->common_header->log_pos;
3367 delete ev;
3368 ev = nullptr;
3369 pos = binlog_file_reader.position();
3370
3371 if (++event_count == limit_end) break;
3372 if ((ev = it.next()) == it.end()) break;
3373 if (it.has_error()) break;
3374 if (end_pos > 0 && pos >= end_pos &&
3375 (ev->common_header->log_pos != last_log_pos)) {
3376 delete ev;
3377 ev = nullptr;
3378 break;
3379 }
3380 }
3381
3382 if (binlog_file_reader.has_fatal_error())
3383 errmsg = binlog_file_reader.get_error_str();
3384 else if (it.has_error())
3385 errmsg = it.get_error_message(); /* purecov: inspected */
3386 else
3387 errmsg = "";
3388 }
3389 // Check that linfo is still on the function scope.
3390 DEBUG_SYNC(thd, "after_show_binlog_events");
3391
3392 err:
3393 if (!errmsg.empty()) {
3394 if (thd->lex->sql_command == SQLCOM_SHOW_RELAYLOG_EVENTS)
3395 my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0), "SHOW RELAYLOG EVENTS",
3396 errmsg.c_str());
3397 else
3398 my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0), "SHOW BINLOG EVENTS",
3399 errmsg.c_str());
3400 } else
3401 my_eof(thd);
3402
3403 mysql_mutex_lock(&thd->LOCK_thd_data);
3404 thd->current_linfo = nullptr;
3405 mysql_mutex_unlock(&thd->LOCK_thd_data);
3406 return !errmsg.empty();
3407 }
3408
show_binlog_events(THD * thd,MYSQL_BIN_LOG * binary_log)3409 bool show_binlog_events(THD *thd, MYSQL_BIN_LOG *binary_log) {
3410 if (binary_log->is_relay_log)
3411 return show_binlog_events<Relaylog_file_reader>(thd, binary_log);
3412 return show_binlog_events<Binlog_file_reader>(thd, binary_log);
3413 }
3414
3415 /**
3416 Execute a SHOW BINLOG EVENTS statement.
3417
3418 @param thd Pointer to THD object for the client thread executing the
3419 statement.
3420
3421 @retval false success
3422 @retval true failure
3423 */
mysql_show_binlog_events(THD * thd)3424 bool mysql_show_binlog_events(THD *thd) {
3425 List<Item> field_list;
3426 DBUG_TRACE;
3427
3428 DBUG_ASSERT(thd->lex->sql_command == SQLCOM_SHOW_BINLOG_EVENTS);
3429
3430 Log_event::init_show_field_list(&field_list);
3431 if (thd->send_result_metadata(&field_list,
3432 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
3433 return true;
3434
3435 /*
3436 Wait for handlers to insert any pending information
3437 into the binlog. For e.g. ndb which updates the binlog asynchronously
3438 this is needed so that the uses sees all its own commands in the binlog
3439 */
3440 ha_binlog_wait(thd);
3441
3442 return show_binlog_events(thd, &mysql_bin_log);
3443 }
3444
MYSQL_BIN_LOG(uint * sync_period,bool relay_log)3445 MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period, bool relay_log)
3446 : name(nullptr),
3447 write_error(false),
3448 inited(false),
3449 m_binlog_file(new Binlog_ofile()),
3450 m_key_LOCK_log(key_LOG_LOCK_log),
3451 bytes_written(0),
3452 file_id(1),
3453 sync_period_ptr(sync_period),
3454 sync_counter(0),
3455 is_relay_log(relay_log),
3456 checksum_alg_reset(binary_log::BINLOG_CHECKSUM_ALG_UNDEF),
3457 relay_log_checksum_alg(binary_log::BINLOG_CHECKSUM_ALG_UNDEF),
3458 previous_gtid_set_relaylog(nullptr),
3459 is_rotating_caused_by_incident(false) {
3460 /*
3461 We don't want to initialize locks here as such initialization depends on
3462 safe_mutex (when using safe_mutex) which depends on MY_INIT(), which is
3463 called only in main(). Doing initialization here would make it happen
3464 before main().
3465 */
3466 index_file_name[0] = 0;
3467 }
3468
~MYSQL_BIN_LOG()3469 MYSQL_BIN_LOG::~MYSQL_BIN_LOG() { delete m_binlog_file; }
3470
3471 /* this is called only once */
3472
cleanup()3473 void MYSQL_BIN_LOG::cleanup() {
3474 DBUG_TRACE;
3475 if (inited) {
3476 inited = false;
3477 close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT, true /*need_lock_log=true*/,
3478 true /*need_lock_index=true*/);
3479 mysql_mutex_destroy(&LOCK_log);
3480 mysql_mutex_destroy(&LOCK_index);
3481 mysql_mutex_destroy(&LOCK_commit);
3482 mysql_mutex_destroy(&LOCK_sync);
3483 mysql_mutex_destroy(&LOCK_binlog_end_pos);
3484 mysql_mutex_destroy(&LOCK_xids);
3485 mysql_cond_destroy(&update_cond);
3486 mysql_cond_destroy(&m_prep_xids_cond);
3487 if (!is_relay_log) {
3488 Commit_stage_manager::get_instance().deinit();
3489 }
3490 }
3491
3492 delete m_binlog_file;
3493 m_binlog_file = nullptr;
3494 }
3495
init_pthread_objects()3496 void MYSQL_BIN_LOG::init_pthread_objects() {
3497 DBUG_ASSERT(inited == 0);
3498 inited = true;
3499
3500 mysql_mutex_init(m_key_LOCK_log, &LOCK_log, MY_MUTEX_INIT_SLOW);
3501 mysql_mutex_init(m_key_LOCK_index, &LOCK_index, MY_MUTEX_INIT_SLOW);
3502 mysql_mutex_init(m_key_LOCK_commit, &LOCK_commit, MY_MUTEX_INIT_FAST);
3503 mysql_mutex_init(m_key_LOCK_sync, &LOCK_sync, MY_MUTEX_INIT_FAST);
3504 mysql_mutex_init(m_key_LOCK_binlog_end_pos, &LOCK_binlog_end_pos,
3505 MY_MUTEX_INIT_FAST);
3506 mysql_mutex_init(m_key_LOCK_xids, &LOCK_xids, MY_MUTEX_INIT_FAST);
3507 mysql_cond_init(m_key_update_cond, &update_cond);
3508 mysql_cond_init(m_key_prep_xids_cond, &m_prep_xids_cond);
3509 if (!is_relay_log) {
3510 Commit_stage_manager::get_instance().init(
3511 m_key_LOCK_flush_queue, m_key_LOCK_sync_queue, m_key_LOCK_commit_queue,
3512 m_key_LOCK_done, m_key_COND_done);
3513 }
3514 }
3515
3516 /**
3517 Check if a string is a valid number.
3518
3519 @param str String to test
3520 @param res Store value here
3521 @param allow_wildcards Set to 1 if we should ignore '%' and '_'
3522
3523 @note
3524 For the moment the allow_wildcards argument is not used
3525 Should be moved to some other file.
3526
3527 @retval
3528 1 String is a number
3529 @retval
3530 0 String is not a number
3531 */
3532
is_number(const char * str,ulong * res,bool allow_wildcards)3533 static bool is_number(const char *str, ulong *res, bool allow_wildcards) {
3534 int flag;
3535 const char *start;
3536 DBUG_TRACE;
3537
3538 flag = 0;
3539 start = str;
3540 while (*str++ == ' ')
3541 ;
3542 if (*--str == '-' || *str == '+') str++;
3543 while (my_isdigit(files_charset_info, *str) ||
3544 (allow_wildcards && (*str == wild_many || *str == wild_one))) {
3545 flag = 1;
3546 str++;
3547 }
3548 if (*str == '.') {
3549 for (str++; my_isdigit(files_charset_info, *str) ||
3550 (allow_wildcards && (*str == wild_many || *str == wild_one));
3551 str++, flag = 1)
3552 ;
3553 }
3554 if (*str != 0 || flag == 0) return false;
3555 if (res) *res = atol(start);
3556 return true; /* Number ok */
3557 } /* is_number */
3558
3559 /**
3560 Find a unique filename for 'filename.#'.
3561
3562 Set '#' to the highest existing log file extension plus one.
3563
3564 This function will return nonzero if: (i) the generated name
3565 exceeds FN_REFLEN; (ii) if the number of extensions is exhausted;
3566 or (iii) some other error happened while examining the filesystem.
3567
3568 @return
3569 nonzero if not possible to get unique filename.
3570 */
3571
find_uniq_filename(char * name,uint32 new_index_number)3572 static int find_uniq_filename(char *name, uint32 new_index_number) {
3573 uint i;
3574 char buff[FN_REFLEN], ext_buf[FN_REFLEN];
3575 MY_DIR *dir_info = nullptr;
3576 struct fileinfo *file_info;
3577 ulong max_found = 0, next = 0, number = 0;
3578 size_t buf_length, length;
3579 char *start, *end;
3580 int error = 0;
3581 DBUG_TRACE;
3582
3583 length = dirname_part(buff, name, &buf_length);
3584 start = name + length;
3585 end = strend(start);
3586
3587 *end = '.';
3588 length = (size_t)(end - start + 1);
3589
3590 if ((DBUG_EVALUATE_IF(
3591 "error_unique_log_filename", 1,
3592 !(dir_info =
3593 my_dir(buff, MYF(MY_DONT_SORT)))))) { // This shouldn't happen
3594 my_stpcpy(end, ".1"); // use name+1
3595 return 1;
3596 }
3597 file_info = dir_info->dir_entry;
3598 for (i = dir_info->number_off_files; i--; file_info++) {
3599 if (strncmp(file_info->name, start, length) == 0 &&
3600 is_number(file_info->name + length, &number, false)) {
3601 max_found = std::max(max_found, number);
3602 }
3603 }
3604 my_dirend(dir_info);
3605
3606 /* check if reached the maximum possible extension number */
3607 if (max_found >= MAX_LOG_UNIQUE_FN_EXT) {
3608 LogErr(ERROR_LEVEL, ER_BINLOG_FILE_EXTENSION_NUMBER_EXHAUSTED, max_found);
3609 error = 1;
3610 goto end;
3611 }
3612
3613 if (new_index_number > 0) {
3614 /*
3615 If "new_index_number" was specified, this means we are handling a
3616 "RESET MASTER TO" command and the binary log was already purged
3617 so max_found should be 0.
3618 */
3619 DBUG_ASSERT(max_found == 0);
3620 next = new_index_number;
3621 } else
3622 next = max_found + 1;
3623 if (sprintf(ext_buf, "%06lu", next) < 0) {
3624 error = 1;
3625 goto end;
3626 }
3627 *end++ = '.';
3628
3629 /*
3630 Check if the generated extension size + the file name exceeds the
3631 buffer size used. If one did not check this, then the filename might be
3632 truncated, resulting in error.
3633 */
3634 if (((strlen(ext_buf) + (end - name)) >= FN_REFLEN)) {
3635 LogErr(ERROR_LEVEL, ER_BINLOG_FILE_NAME_TOO_LONG, name, ext_buf,
3636 (strlen(ext_buf) + (end - name)));
3637 error = 1;
3638 goto end;
3639 }
3640
3641 if (sprintf(end, "%06lu", next) < 0) {
3642 error = 1;
3643 goto end;
3644 }
3645
3646 /* print warning if reaching the end of available extensions. */
3647 if (next > MAX_ALLOWED_FN_EXT_RESET_MASTER)
3648 LogErr(WARNING_LEVEL, ER_BINLOG_FILE_EXTENSION_NUMBER_RUNNING_LOW, next,
3649 (MAX_LOG_UNIQUE_FN_EXT - next));
3650
3651 end:
3652 return error;
3653 }
3654
generate_new_name(char * new_name,const char * log_name,uint32 new_index_number)3655 int MYSQL_BIN_LOG::generate_new_name(char *new_name, const char *log_name,
3656 uint32 new_index_number) {
3657 fn_format(new_name, log_name, mysql_data_home, "", 4);
3658 if (!fn_ext(log_name)[0]) {
3659 if (find_uniq_filename(new_name, new_index_number)) {
3660 if (current_thd != nullptr)
3661 my_printf_error(ER_NO_UNIQUE_LOGFILE,
3662 ER_THD(current_thd, ER_NO_UNIQUE_LOGFILE),
3663 MYF(ME_FATALERROR), log_name);
3664 LogErr(ERROR_LEVEL, ER_FAILED_TO_GENERATE_UNIQUE_LOGFILE, log_name);
3665 return 1;
3666 }
3667 }
3668 return 0;
3669 }
3670
3671 /**
3672 @todo
3673 The following should be using fn_format(); We just need to
3674 first change fn_format() to cut the file name if it's too long.
3675 */
generate_name(const char * log_name,const char * suffix,char * buff)3676 const char *MYSQL_BIN_LOG::generate_name(const char *log_name,
3677 const char *suffix, char *buff) {
3678 if (!log_name || !log_name[0]) {
3679 if (is_relay_log || log_bin_supplied)
3680 strmake(buff, default_logfile_name, FN_REFLEN - strlen(suffix) - 1);
3681 else
3682 strmake(buff, default_binlogfile_name, FN_REFLEN - strlen(suffix) - 1);
3683
3684 return (const char *)fn_format(buff, buff, "", suffix,
3685 MYF(MY_REPLACE_EXT | MY_REPLACE_DIR));
3686 }
3687 // get rid of extension to avoid problems
3688
3689 const char *p = fn_ext(log_name);
3690 uint length = (uint)(p - log_name);
3691 strmake(buff, log_name, min<size_t>(length, FN_REFLEN - 1));
3692 return (const char *)buff;
3693 }
3694
init_and_set_log_file_name(const char * log_name,const char * new_name,uint32 new_index_number)3695 bool MYSQL_BIN_LOG::init_and_set_log_file_name(const char *log_name,
3696 const char *new_name,
3697 uint32 new_index_number) {
3698 if (new_name && !my_stpcpy(log_file_name, new_name))
3699 return true;
3700 else if (!new_name &&
3701 generate_new_name(log_file_name, log_name, new_index_number))
3702 return true;
3703
3704 return false;
3705 }
3706
3707 /**
3708 Open the logfile and init IO_CACHE.
3709
3710 @param log_file_key The file instrumentation key for this file
3711 @param log_name The name of the log to open
3712 @param new_name The new name for the logfile.
3713 NULL forces generate_new_name() to be called.
3714 @param new_index_number The binary log file index number to start from
3715 after the RESET MASTER TO command is called.
3716
3717 @return true if error, false otherwise.
3718 */
3719
open(PSI_file_key log_file_key,const char * log_name,const char * new_name,uint32 new_index_number)3720 bool MYSQL_BIN_LOG::open(PSI_file_key log_file_key, const char *log_name,
3721 const char *new_name, uint32 new_index_number) {
3722 DBUG_TRACE;
3723 bool ret = false;
3724
3725 write_error = false;
3726 myf flags = MY_WME | MY_NABP | MY_WAIT_IF_FULL;
3727 if (is_relay_log) flags = flags | MY_REPORT_WAITING_IF_FULL;
3728
3729 if (!(name = my_strdup(key_memory_MYSQL_LOG_name, log_name, MYF(MY_WME)))) {
3730 goto err;
3731 }
3732
3733 if (init_and_set_log_file_name(name, new_name, new_index_number) ||
3734 DBUG_EVALUATE_IF("fault_injection_init_name", 1, 0))
3735 goto err;
3736
3737 db[0] = 0;
3738
3739 /* Keep the key for reopen */
3740 m_log_file_key = log_file_key;
3741
3742 /*
3743 LOCK_sync guarantees that no thread is calling m_binlog_file to sync data
3744 to disk when another thread is opening the new file
3745 (FLUSH LOG or RESET MASTER).
3746 */
3747 if (!is_relay_log) mysql_mutex_lock(&LOCK_sync);
3748
3749 ret = m_binlog_file->open(log_file_key, log_file_name, flags);
3750
3751 if (!is_relay_log) mysql_mutex_unlock(&LOCK_sync);
3752
3753 if (ret) goto err;
3754
3755 atomic_log_state = LOG_OPENED;
3756 return false;
3757
3758 err:
3759 if (binlog_error_action == ABORT_SERVER) {
3760 exec_binlog_error_action_abort(
3761 "Either disk is full, file system is read only or "
3762 "there was an encryption error while opening the binlog. "
3763 "Aborting the server.");
3764 } else
3765 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_OPEN_FOR_LOGGING, log_name, errno);
3766
3767 my_free(name);
3768 name = nullptr;
3769 atomic_log_state = LOG_CLOSED;
3770 return true;
3771 }
3772
open_index_file(const char * index_file_name_arg,const char * log_name,bool need_lock_index)3773 bool MYSQL_BIN_LOG::open_index_file(const char *index_file_name_arg,
3774 const char *log_name,
3775 bool need_lock_index) {
3776 bool error = false;
3777 File index_file_nr = -1;
3778 if (need_lock_index)
3779 mysql_mutex_lock(&LOCK_index);
3780 else
3781 mysql_mutex_assert_owner(&LOCK_index);
3782
3783 /*
3784 First open of this class instance
3785 Create an index file that will hold all file names uses for logging.
3786 Add new entries to the end of it.
3787 */
3788 myf opt = MY_UNPACK_FILENAME;
3789
3790 if (my_b_inited(&index_file)) goto end;
3791
3792 if (!index_file_name_arg) {
3793 index_file_name_arg = log_name; // Use same basename for index file
3794 opt = MY_UNPACK_FILENAME | MY_REPLACE_EXT;
3795 }
3796 fn_format(index_file_name, index_file_name_arg, mysql_data_home, ".index",
3797 opt);
3798
3799 if (set_crash_safe_index_file_name(index_file_name_arg)) {
3800 error = true;
3801 goto end;
3802 }
3803
3804 /*
3805 We need move crash_safe_index_file to index_file if the index_file
3806 does not exist and crash_safe_index_file exists when mysqld server
3807 restarts.
3808 */
3809 if (my_access(index_file_name, F_OK) &&
3810 !my_access(crash_safe_index_file_name, F_OK) &&
3811 my_rename(crash_safe_index_file_name, index_file_name, MYF(MY_WME))) {
3812 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_MOVE_TMP_TO_INDEX,
3813 "MYSQL_BIN_LOG::open_index_file");
3814 error = true;
3815 goto end;
3816 }
3817
3818 if ((index_file_nr = mysql_file_open(m_key_file_log_index, index_file_name,
3819 O_RDWR | O_CREAT, MYF(MY_WME))) < 0 ||
3820 mysql_file_sync(index_file_nr, MYF(MY_WME)) ||
3821 init_io_cache_ext(&index_file, index_file_nr, IO_SIZE, READ_CACHE,
3822 mysql_file_seek(index_file_nr, 0L, MY_SEEK_END, MYF(0)),
3823 false, MYF(MY_WME | MY_WAIT_IF_FULL),
3824 m_key_file_log_index_cache) ||
3825 DBUG_EVALUATE_IF("fault_injection_openning_index", 1, 0)) {
3826 /*
3827 TODO: all operations creating/deleting the index file or a log, should
3828 call my_sync_dir() or my_sync_dir_by_file() to be durable.
3829 TODO: file creation should be done with mysql_file_create()
3830 not mysql_file_open().
3831 */
3832 if (index_file_nr >= 0) mysql_file_close(index_file_nr, MYF(0));
3833 error = true;
3834 goto end;
3835 }
3836
3837 /*
3838 Sync the index by purging any binary log file that is not registered.
3839 In other words, either purge binary log files that were removed from
3840 the index but not purged from the file system due to a crash or purge
3841 any binary log file that was created but not register in the index
3842 due to a crash.
3843 */
3844
3845 if (set_purge_index_file_name(index_file_name_arg) ||
3846 open_purge_index_file(false) ||
3847 purge_index_entry(nullptr, nullptr, false) || close_purge_index_file() ||
3848 DBUG_EVALUATE_IF("fault_injection_recovering_index", 1, 0)) {
3849 LogErr(ERROR_LEVEL, ER_BINLOG_FAILED_TO_SYNC_INDEX_FILE);
3850 error = true;
3851 goto end;
3852 }
3853
3854 end:
3855 if (need_lock_index) mysql_mutex_unlock(&LOCK_index);
3856 return error;
3857 }
3858
3859 /**
3860 Add the GTIDs from the given relaylog file and also
3861 update the IO thread transaction parser.
3862
3863 @param filename Relaylog file to read from.
3864 @param retrieved_gtids Gtid_set to store the GTIDs found on the relaylog file.
3865 @param verify_checksum Set to true to verify event checksums.
3866 @param trx_parser The transaction boundary parser to be used in order to
3867 only add a GTID to the gtid_set after ensuring the transaction is fully
3868 stored on the relay log.
3869 @param partial_trx The trx_monitoring_info of the last incomplete transaction
3870 found in the relay log.
3871
3872 @retval false The file was successfully read and all GTIDs from
3873 Previous_gtids and Gtid_log_event from complete transactions were added to
3874 the retrieved_set.
3875 @retval true There was an error during the procedure.
3876 */
read_gtids_and_update_trx_parser_from_relaylog(const char * filename,Gtid_set * retrieved_gtids,bool verify_checksum,Transaction_boundary_parser * trx_parser,Gtid_monitoring_info * partial_trx)3877 static bool read_gtids_and_update_trx_parser_from_relaylog(
3878 const char *filename, Gtid_set *retrieved_gtids, bool verify_checksum,
3879 Transaction_boundary_parser *trx_parser,
3880 Gtid_monitoring_info *partial_trx) {
3881 DBUG_TRACE;
3882 DBUG_PRINT("info", ("Opening file %s", filename));
3883
3884 DBUG_ASSERT(retrieved_gtids != nullptr);
3885 DBUG_ASSERT(trx_parser != nullptr);
3886 #ifndef DBUG_OFF
3887 unsigned long event_counter = 0;
3888 #endif
3889 bool error = false;
3890
3891 Relaylog_file_reader relaylog_file_reader(verify_checksum);
3892 if (relaylog_file_reader.open(filename)) {
3893 LogErr(ERROR_LEVEL, ER_BINLOG_FILE_OPEN_FAILED,
3894 relaylog_file_reader.get_error_str());
3895
3896 /*
3897 As read_gtids_from_binlog() will not throw error on truncated
3898 relaylog files, we should do the same here in order to keep the
3899 current behavior.
3900 */
3901 if (relaylog_file_reader.get_error_type() ==
3902 Binlog_read_error::CANNOT_GET_FILE_PASSWORD)
3903 error = true;
3904 return error;
3905 }
3906
3907 Log_event *ev = nullptr;
3908 bool seen_prev_gtids = false;
3909 ulong data_len = 0;
3910
3911 while (!error && (ev = relaylog_file_reader.read_event_object()) != nullptr) {
3912 DBUG_PRINT("info", ("Read event of type %s", ev->get_type_str()));
3913 #ifndef DBUG_OFF
3914 event_counter++;
3915 #endif
3916
3917 data_len = uint4korr(ev->temp_buf + EVENT_LEN_OFFSET);
3918
3919 bool info_error{false};
3920 binary_log::Log_event_basic_info log_event_info;
3921 std::tie(info_error, log_event_info) = extract_log_event_basic_info(
3922 ev->temp_buf, data_len,
3923 relaylog_file_reader.format_description_event());
3924
3925 if (info_error || trx_parser->feed_event(log_event_info, false)) {
3926 /*
3927 The transaction boundary parser found an error while parsing a
3928 sequence of events from the relaylog. As we don't know if the
3929 parsing has started from a reliable point (it might started in
3930 a relay log file that begins with the rest of a transaction
3931 that started in a previous relay log file), it is better to do
3932 nothing in this case. The boundary parser will fix itself once
3933 finding an event that represent a transaction boundary.
3934
3935 Suppose the following relaylog:
3936
3937 rl-bin.000011 | rl-bin.000012 | rl-bin.000013 | rl-bin-000014
3938 ---------------+---------------+---------------+---------------
3939 PREV_GTIDS | PREV_GTIDS | PREV_GTIDS | PREV_GTIDS
3940 (empty) | (UUID:1-2) | (UUID:1-2) | (UUID:1-2)
3941 ---------------+---------------+---------------+---------------
3942 XID | QUERY(INSERT) | QUERY(INSERT) | XID
3943 ---------------+---------------+---------------+---------------
3944 GTID(UUID:2) |
3945 ---------------+
3946 QUERY(CREATE |
3947 TABLE t1 ...) |
3948 ---------------+
3949 GTID(UUID:3) |
3950 ---------------+
3951 QUERY(BEGIN) |
3952 ---------------+
3953
3954 As it is impossible to determine the current Retrieved_Gtid_Set by only
3955 looking to the PREVIOUS_GTIDS on the last relay log file, and scanning
3956 events on it, we tried to find a relay log file that contains at least
3957 one GTID event during the backwards search.
3958
3959 In the example, we will find a GTID only in rl-bin.000011, as the
3960 UUID:3 transaction was spanned across 4 relay log files.
3961
3962 The transaction spanning can be caused by "FLUSH RELAY LOGS" commands
3963 on slave while it is queuing the transaction.
3964
3965 So, in order to correctly add UUID:3 into Retrieved_Gtid_Set, we need
3966 to parse the relay log starting on the file we found the last GTID
3967 queued to know if the transaction was fully retrieved or not.
3968
3969 Start scanning rl-bin.000011 after resetting the transaction parser
3970 will generate an error, as XID event is only expected inside a DML,
3971 but in this case, we can ignore this error and reset the parser.
3972 */
3973 trx_parser->reset();
3974 /*
3975 We also have to discard the GTID of the partial transaction that was
3976 not finished if there is one. This is needed supposing that an
3977 incomplete transaction was replicated with a GTID.
3978
3979 GTID(1), QUERY(BEGIN), QUERY(INSERT), ANONYMOUS_GTID, QUERY(DROP ...)
3980
3981 In the example above, without cleaning the partial_trx,
3982 the GTID(1) would be added to the Retrieved_Gtid_Set after the
3983 QUERY(DROP ...) event.
3984
3985 GTID(1), QUERY(BEGIN), QUERY(INSERT), GTID(2), QUERY(DROP ...)
3986
3987 In the example above the GTID(1) will also be discarded as the
3988 GTID(1) transaction is not complete.
3989 */
3990 if (partial_trx->is_processing_trx_set()) {
3991 DBUG_PRINT("info", ("Discarding Gtid(%d, %lld) as the transaction "
3992 "wasn't complete and we found an error in the"
3993 "transaction boundary parser.",
3994 partial_trx->get_processing_trx_gtid()->sidno,
3995 partial_trx->get_processing_trx_gtid()->gno));
3996 partial_trx->clear_processing_trx();
3997 }
3998 }
3999
4000 switch (ev->get_type_code()) {
4001 case binary_log::FORMAT_DESCRIPTION_EVENT:
4002 case binary_log::ROTATE_EVENT:
4003 // do nothing; just accept this event and go to next
4004 break;
4005 case binary_log::PREVIOUS_GTIDS_LOG_EVENT: {
4006 seen_prev_gtids = true;
4007 // add events to sets
4008 Previous_gtids_log_event *prev_gtids_ev =
4009 (Previous_gtids_log_event *)ev;
4010 if (prev_gtids_ev->add_to_set(retrieved_gtids) != 0) {
4011 error = true;
4012 break;
4013 }
4014 #ifndef DBUG_OFF
4015 char *prev_buffer = prev_gtids_ev->get_str(nullptr, nullptr);
4016 DBUG_PRINT("info", ("Got Previous_gtids from file '%s': Gtid_set='%s'.",
4017 filename, prev_buffer));
4018 my_free(prev_buffer);
4019 #endif
4020 break;
4021 }
4022 case binary_log::GTID_LOG_EVENT: {
4023 /* If we didn't find any PREVIOUS_GTIDS in this file */
4024 if (!seen_prev_gtids) {
4025 my_error(ER_BINLOG_LOGICAL_CORRUPTION, MYF(0), filename,
4026 "The first global transaction identifier was read, but "
4027 "no other information regarding identifiers existing "
4028 "on the previous log files was found.");
4029 error = true;
4030 break;
4031 }
4032
4033 Gtid_log_event *gtid_ev = (Gtid_log_event *)ev;
4034 rpl_sidno sidno = gtid_ev->get_sidno(retrieved_gtids->get_sid_map());
4035 ulonglong immediate_commit_timestamp =
4036 gtid_ev->immediate_commit_timestamp;
4037 longlong original_commit_timestamp = gtid_ev->original_commit_timestamp;
4038
4039 if (sidno < 0) {
4040 error = true;
4041 break;
4042 } else {
4043 if (retrieved_gtids->ensure_sidno(sidno) != RETURN_STATUS_OK) {
4044 error = true;
4045 break;
4046 } else {
4047 Gtid gtid = {sidno, gtid_ev->get_gno()};
4048 /*
4049 As are updating the transaction boundary parser while reading
4050 GTIDs from relay log files to fill the Retrieved_Gtid_Set, we
4051 should not add the GTID here as we don't know if the transaction
4052 is complete on the relay log yet.
4053 */
4054 partial_trx->start(gtid, original_commit_timestamp,
4055 immediate_commit_timestamp);
4056 }
4057 DBUG_PRINT("info",
4058 ("Found Gtid in relaylog file '%s': Gtid(%d, %lld).",
4059 filename, sidno, gtid_ev->get_gno()));
4060 }
4061 break;
4062 }
4063 case binary_log::ANONYMOUS_GTID_LOG_EVENT:
4064 default:
4065 /*
4066 If we reached the end of a transaction after storing it's GTID
4067 in partial_trx structure, it is time to add this GTID to the
4068 retrieved_gtids set because the transaction is complete and there is
4069 no need for asking this transaction again.
4070 */
4071 if (trx_parser->is_not_inside_transaction()) {
4072 if (partial_trx->is_processing_trx_set()) {
4073 const Gtid *fully_retrieved_gtid;
4074 fully_retrieved_gtid = partial_trx->get_processing_trx_gtid();
4075 DBUG_PRINT("info", ("Adding Gtid to Retrieved_Gtid_Set as the "
4076 "transaction was completed at "
4077 "relaylog file '%s': Gtid(%d, %lld).",
4078 filename, fully_retrieved_gtid->sidno,
4079 fully_retrieved_gtid->gno));
4080 retrieved_gtids->_add_gtid(*fully_retrieved_gtid);
4081 /*
4082 We don't need to update the last queued structure here. We just
4083 want to have the information about the partial transaction left in
4084 the relay log.
4085 */
4086 partial_trx->clear();
4087 }
4088 }
4089 break;
4090 }
4091 delete ev;
4092 }
4093
4094 if (relaylog_file_reader.has_fatal_error()) {
4095 // This is not a fatal error; the log may just be truncated.
4096 // @todo but what other errors could happen? IO error?
4097 LogErr(WARNING_LEVEL, ER_BINLOG_ERROR_READING_GTIDS_FROM_RELAY_LOG, -1);
4098 }
4099
4100 #ifndef DBUG_OFF
4101 LogErr(INFORMATION_LEVEL, ER_BINLOG_EVENTS_READ_FROM_RELAY_LOG_INFO,
4102 event_counter, filename);
4103 #endif
4104
4105 return error;
4106 }
4107
4108 enum enum_read_gtids_from_binlog_status {
4109 GOT_GTIDS,
4110 GOT_PREVIOUS_GTIDS,
4111 NO_GTIDS,
4112 ERROR,
4113 TRUNCATED
4114 };
4115 /**
4116 Reads GTIDs from the given binlog file.
4117
4118 @param filename File to read from.
4119 @param all_gtids If not NULL, then the GTIDs from the
4120 Previous_gtids_log_event and from all Gtid_log_events are stored in
4121 this object.
4122 @param prev_gtids If not NULL, then the GTIDs from the
4123 Previous_gtids_log_events are stored in this object.
4124 @param first_gtid If not NULL, then the first GTID information from the
4125 file will be stored in this object.
4126 @param sid_map The sid_map object to use in the rpl_sidno generation
4127 of the Gtid_log_event. If lock is needed in the sid_map, the caller
4128 must hold it.
4129 @param verify_checksum Set to true to verify event checksums.
4130 @param is_relay_log Set to true, if filename is a Relay Log, false if it is a
4131 Binary Log.
4132 @retval GOT_GTIDS The file was successfully read and it contains
4133 both Gtid_log_events and Previous_gtids_log_events.
4134 This is only possible if either all_gtids or first_gtid are not null.
4135 @retval GOT_PREVIOUS_GTIDS The file was successfully read and it
4136 contains Previous_gtids_log_events but no Gtid_log_events.
4137 For binary logs, if no all_gtids and no first_gtid are specified,
4138 this function will be done right after reading the PREVIOUS_GTIDS
4139 regardless of the rest of the content of the binary log file.
4140 @retval NO_GTIDS The file was successfully read and it does not
4141 contain GTID events.
4142 @retval ERROR Out of memory, or IO error, or malformed event
4143 structure, or the file is malformed (e.g., contains Gtid_log_events
4144 but no Previous_gtids_log_event).
4145 @retval TRUNCATED The file was truncated before the end of the
4146 first Previous_gtids_log_event.
4147 */
read_gtids_from_binlog(const char * filename,Gtid_set * all_gtids,Gtid_set * prev_gtids,Gtid * first_gtid,Sid_map * sid_map,bool verify_checksum,bool is_relay_log)4148 static enum_read_gtids_from_binlog_status read_gtids_from_binlog(
4149 const char *filename, Gtid_set *all_gtids, Gtid_set *prev_gtids,
4150 Gtid *first_gtid, Sid_map *sid_map, bool verify_checksum,
4151 bool is_relay_log) {
4152 DBUG_TRACE;
4153 DBUG_PRINT("info", ("Opening file %s", filename));
4154
4155 #ifndef DBUG_OFF
4156 unsigned long event_counter = 0;
4157 /*
4158 We assert here that both all_gtids and prev_gtids, if specified,
4159 uses the same sid_map as the one passed as a parameter. This is just
4160 to ensure that, if the sid_map needed some lock and was locked by
4161 the caller, the lock applies to all the GTID sets this function is
4162 dealing with.
4163 */
4164 if (all_gtids) DBUG_ASSERT(all_gtids->get_sid_map() == sid_map);
4165 if (prev_gtids) DBUG_ASSERT(prev_gtids->get_sid_map() == sid_map);
4166 #endif
4167
4168 Binlog_file_reader binlog_file_reader(verify_checksum);
4169 if (binlog_file_reader.open(filename)) {
4170 LogErr(ERROR_LEVEL, ER_BINLOG_FILE_OPEN_FAILED,
4171 binlog_file_reader.get_error_str());
4172 /*
4173 We need to revisit the recovery procedure for relay log
4174 files. Currently, it is called after this routine.
4175 /Alfranio
4176 */
4177 if (binlog_file_reader.get_error_type() ==
4178 Binlog_read_error::CANNOT_GET_FILE_PASSWORD)
4179 return ERROR;
4180 return TRUNCATED;
4181 }
4182
4183 Log_event *ev = nullptr;
4184 enum_read_gtids_from_binlog_status ret = NO_GTIDS;
4185 bool done = false;
4186 bool seen_first_gtid = false;
4187 while (!done && (ev = binlog_file_reader.read_event_object()) != nullptr) {
4188 #ifndef DBUG_OFF
4189 event_counter++;
4190 #endif
4191 DBUG_PRINT("info", ("Read event of type %s", ev->get_type_str()));
4192 switch (ev->get_type_code()) {
4193 case binary_log::FORMAT_DESCRIPTION_EVENT:
4194 case binary_log::ROTATE_EVENT:
4195 // do nothing; just accept this event and go to next
4196 break;
4197 case binary_log::PREVIOUS_GTIDS_LOG_EVENT: {
4198 ret = GOT_PREVIOUS_GTIDS;
4199 // add events to sets
4200 Previous_gtids_log_event *prev_gtids_ev =
4201 (Previous_gtids_log_event *)ev;
4202 if (all_gtids != nullptr && prev_gtids_ev->add_to_set(all_gtids) != 0)
4203 ret = ERROR, done = true;
4204 else if (prev_gtids != nullptr &&
4205 prev_gtids_ev->add_to_set(prev_gtids) != 0)
4206 ret = ERROR, done = true;
4207 #ifndef DBUG_OFF
4208 char *prev_buffer = prev_gtids_ev->get_str(nullptr, nullptr);
4209 DBUG_PRINT("info", ("Got Previous_gtids from file '%s': Gtid_set='%s'.",
4210 filename, prev_buffer));
4211 my_free(prev_buffer);
4212 #endif
4213 /*
4214 If this is not a relay log, the previous_gtids were asked and no
4215 all_gtids neither first_gtid were asked, it is fine to consider the
4216 job as done.
4217 */
4218 if (!is_relay_log && prev_gtids != nullptr && all_gtids == nullptr &&
4219 first_gtid == nullptr)
4220 done = true;
4221 DBUG_EXECUTE_IF("inject_fault_bug16502579", {
4222 DBUG_PRINT("debug", ("PREVIOUS_GTIDS_LOG_EVENT found. "
4223 "Injected ret=NO_GTIDS."));
4224 if (ret == GOT_PREVIOUS_GTIDS) {
4225 ret = NO_GTIDS;
4226 done = false;
4227 }
4228 });
4229 break;
4230 }
4231 case binary_log::GTID_LOG_EVENT: {
4232 if (ret != GOT_GTIDS) {
4233 if (ret != GOT_PREVIOUS_GTIDS) {
4234 /*
4235 Since this routine is run on startup, there may not be a
4236 THD instance. Therefore, ER(X) cannot be used.
4237 */
4238 const char *msg_fmt =
4239 (current_thd != nullptr)
4240 ? ER_THD(current_thd, ER_BINLOG_LOGICAL_CORRUPTION)
4241 : ER_DEFAULT(ER_BINLOG_LOGICAL_CORRUPTION);
4242 my_printf_error(
4243 ER_BINLOG_LOGICAL_CORRUPTION, msg_fmt, MYF(0), filename,
4244 "The first global transaction identifier was read, but "
4245 "no other information regarding identifiers existing "
4246 "on the previous log files was found.");
4247 ret = ERROR, done = true;
4248 break;
4249 } else
4250 ret = GOT_GTIDS;
4251 }
4252 /*
4253 When this is a relaylog, we just check if the relay log contains at
4254 least one Gtid_log_event, so that we can distinguish the return values
4255 GOT_GTID and GOT_PREVIOUS_GTIDS. We don't need to read anything else
4256 from the relay log.
4257 When this is a binary log, if all_gtids is requested (i.e., NOT NULL),
4258 we should continue to read all gtids. If just first_gtid was
4259 requested, we will be done after storing this Gtid_log_event info on
4260 it.
4261 */
4262 if (is_relay_log) {
4263 ret = GOT_GTIDS, done = true;
4264 } else {
4265 Gtid_log_event *gtid_ev = (Gtid_log_event *)ev;
4266 rpl_sidno sidno = gtid_ev->get_sidno(sid_map);
4267 if (sidno < 0)
4268 ret = ERROR, done = true;
4269 else {
4270 if (all_gtids) {
4271 if (all_gtids->ensure_sidno(sidno) != RETURN_STATUS_OK)
4272 ret = ERROR, done = true;
4273 all_gtids->_add_gtid(sidno, gtid_ev->get_gno());
4274 DBUG_PRINT("info", ("Got Gtid from file '%s': Gtid(%d, %lld).",
4275 filename, sidno, gtid_ev->get_gno()));
4276 }
4277
4278 /* If the first GTID was requested, stores it */
4279 if (first_gtid && !seen_first_gtid) {
4280 first_gtid->set(sidno, gtid_ev->get_gno());
4281 seen_first_gtid = true;
4282 /* If the first_gtid was the only thing requested, we are done */
4283 if (all_gtids == nullptr) ret = GOT_GTIDS, done = true;
4284 }
4285 }
4286 }
4287 break;
4288 }
4289 case binary_log::ANONYMOUS_GTID_LOG_EVENT: {
4290 /*
4291 When this is a relaylog, we just check if it contains
4292 at least one Anonymous_gtid_log_event after initialization
4293 (FDs, Rotates and PREVIOUS_GTIDS), so that we can distinguish the
4294 return values GOT_GTID and GOT_PREVIOUS_GTIDS.
4295 We don't need to read anything else from the relay log.
4296 */
4297 if (is_relay_log) {
4298 ret = GOT_GTIDS;
4299 done = true;
4300 break;
4301 }
4302 DBUG_ASSERT(prev_gtids == nullptr
4303 ? true
4304 : all_gtids != nullptr || first_gtid != nullptr);
4305 }
4306 // Fall through.
4307 default:
4308 // if we found any other event type without finding a
4309 // previous_gtids_log_event, then the rest of this binlog
4310 // cannot contain gtids
4311 if (ret != GOT_GTIDS && ret != GOT_PREVIOUS_GTIDS) done = true;
4312 /*
4313 The GTIDs of the relaylog files will be handled later
4314 because of the possibility of transactions be spanned
4315 along distinct relaylog files.
4316 So, if we found an ordinary event without finding the
4317 GTID but we already found the PREVIOUS_GTIDS, this probably
4318 means that the event is from a transaction that started on
4319 previous relaylog file.
4320 */
4321 if (ret == GOT_PREVIOUS_GTIDS && is_relay_log) done = true;
4322 break;
4323 }
4324 delete ev;
4325 DBUG_PRINT("info", ("done=%d", done));
4326 }
4327
4328 if (binlog_file_reader.has_fatal_error()) {
4329 // This is not a fatal error; the log may just be truncated.
4330
4331 // @todo but what other errors could happen? IO error?
4332 LogErr(WARNING_LEVEL, ER_BINLOG_ERROR_READING_GTIDS_FROM_BINARY_LOG, -1);
4333 }
4334
4335 if (all_gtids)
4336 all_gtids->dbug_print("all_gtids");
4337 else
4338 DBUG_PRINT("info", ("all_gtids==NULL"));
4339 if (prev_gtids)
4340 prev_gtids->dbug_print("prev_gtids");
4341 else
4342 DBUG_PRINT("info", ("prev_gtids==NULL"));
4343 if (first_gtid == nullptr)
4344 DBUG_PRINT("info", ("first_gtid==NULL"));
4345 else if (first_gtid->sidno == 0)
4346 DBUG_PRINT("info", ("first_gtid.sidno==0"));
4347 else
4348 first_gtid->dbug_print(sid_map, "first_gtid");
4349
4350 DBUG_PRINT("info", ("returning %d", ret));
4351 #ifndef DBUG_OFF
4352 if (!is_relay_log && prev_gtids != nullptr && all_gtids == nullptr &&
4353 first_gtid == nullptr)
4354 LogErr(INFORMATION_LEVEL, ER_BINLOG_EVENTS_READ_FROM_BINLOG_INFO,
4355 event_counter, filename);
4356 #endif
4357 return ret;
4358 }
4359
find_first_log_not_in_gtid_set(char * binlog_file_name,const Gtid_set * gtid_set,Gtid * first_gtid,const char ** errmsg)4360 bool MYSQL_BIN_LOG::find_first_log_not_in_gtid_set(char *binlog_file_name,
4361 const Gtid_set *gtid_set,
4362 Gtid *first_gtid,
4363 const char **errmsg) {
4364 DBUG_TRACE;
4365 LOG_INFO linfo;
4366 auto log_index = this->get_log_index();
4367 std::list<std::string> filename_list = log_index.second;
4368 int error = log_index.first;
4369 list<string>::reverse_iterator rit;
4370 Gtid_set binlog_previous_gtid_set{gtid_set->get_sid_map()};
4371
4372 if (error != LOG_INFO_EOF) {
4373 *errmsg =
4374 "Failed to read the binary log index file while "
4375 "looking for the oldest binary log that contains any GTID "
4376 "that is not in the given gtid set";
4377 error = -1;
4378 goto end;
4379 }
4380
4381 if (filename_list.empty()) {
4382 *errmsg =
4383 "Could not find first log file name in binary log index file "
4384 "while looking for the oldest binary log that contains any GTID "
4385 "that is not in the given gtid set";
4386 error = -2;
4387 goto end;
4388 }
4389
4390 /*
4391 Iterate over all the binary logs in reverse order, and read only
4392 the Previous_gtids_log_event, to find the first one, that is the
4393 subset of the given gtid set. Since every binary log begins with
4394 a Previous_gtids_log_event, that contains all GTIDs in all
4395 previous binary logs.
4396 We also ask for the first GTID in the binary log to know if we
4397 should send the FD event with the "created" field cleared or not.
4398 */
4399 DBUG_PRINT("info", ("Iterating backwards through binary logs, and reading "
4400 "only the Previous_gtids_log_event, to find the first "
4401 "one, that is the subset of the given gtid set."));
4402 rit = filename_list.rbegin();
4403 error = 0;
4404 while (rit != filename_list.rend()) {
4405 binlog_previous_gtid_set.clear();
4406 const char *filename = rit->c_str();
4407 DBUG_PRINT("info",
4408 ("Read Previous_gtids_log_event from filename='%s'", filename));
4409 switch (read_gtids_from_binlog(filename, nullptr, &binlog_previous_gtid_set,
4410 first_gtid,
4411 binlog_previous_gtid_set.get_sid_map(),
4412 opt_master_verify_checksum, is_relay_log)) {
4413 case ERROR:
4414 *errmsg =
4415 "Error reading header of binary log while looking for "
4416 "the oldest binary log that contains any GTID that is not in "
4417 "the given gtid set";
4418 error = -3;
4419 goto end;
4420 case NO_GTIDS:
4421 *errmsg =
4422 "Found old binary log without GTIDs while looking for "
4423 "the oldest binary log that contains any GTID that is not in "
4424 "the given gtid set";
4425 error = -4;
4426 goto end;
4427 case GOT_GTIDS:
4428 case GOT_PREVIOUS_GTIDS:
4429 if (binlog_previous_gtid_set.is_subset(gtid_set)) {
4430 strcpy(binlog_file_name, filename);
4431 /*
4432 Verify that the selected binlog is not the first binlog,
4433 */
4434 DBUG_EXECUTE_IF("slave_reconnect_with_gtid_set_executed",
4435 DBUG_ASSERT(strcmp(filename_list.begin()->c_str(),
4436 binlog_file_name) != 0););
4437 goto end;
4438 }
4439 case TRUNCATED:
4440 break;
4441 }
4442
4443 rit++;
4444 }
4445
4446 if (rit == filename_list.rend()) {
4447 report_missing_gtids(&binlog_previous_gtid_set, gtid_set, errmsg);
4448 error = -5;
4449 }
4450
4451 end:
4452 if (error) DBUG_PRINT("error", ("'%s'", *errmsg));
4453 filename_list.clear();
4454 DBUG_PRINT("info", ("returning %d", error));
4455 return error != 0 ? true : false;
4456 }
4457
init_gtid_sets(Gtid_set * all_gtids,Gtid_set * lost_gtids,bool verify_checksum,bool need_lock,Transaction_boundary_parser * trx_parser,Gtid_monitoring_info * partial_trx,bool is_server_starting)4458 bool MYSQL_BIN_LOG::init_gtid_sets(Gtid_set *all_gtids, Gtid_set *lost_gtids,
4459 bool verify_checksum, bool need_lock,
4460 Transaction_boundary_parser *trx_parser,
4461 Gtid_monitoring_info *partial_trx,
4462 bool is_server_starting) {
4463 DBUG_TRACE;
4464 DBUG_PRINT(
4465 "info",
4466 ("lost_gtids=%p; so we are recovering a %s log; is_relay_log=%d",
4467 lost_gtids, lost_gtids == nullptr ? "relay" : "binary", is_relay_log));
4468
4469 Checkable_rwlock *sid_lock =
4470 is_relay_log ? all_gtids->get_sid_map()->get_sid_lock() : global_sid_lock;
4471 /*
4472 If this is a relay log, we must have the IO thread Master_info trx_parser
4473 in order to correctly feed it with relay log events.
4474 */
4475 #ifndef DBUG_OFF
4476 if (is_relay_log) {
4477 DBUG_ASSERT(trx_parser != nullptr);
4478 DBUG_ASSERT(lost_gtids == nullptr);
4479 }
4480 #endif
4481
4482 /*
4483 Acquires the necessary locks to ensure that logs are not either
4484 removed or updated when we are reading from it.
4485 */
4486 if (need_lock) {
4487 // We don't need LOCK_log if we are only going to read the initial
4488 // Prevoius_gtids_log_event and ignore the Gtid_log_events.
4489 if (all_gtids != nullptr) mysql_mutex_lock(&LOCK_log);
4490 mysql_mutex_lock(&LOCK_index);
4491 sid_lock->wrlock();
4492 } else {
4493 if (all_gtids != nullptr) mysql_mutex_assert_owner(&LOCK_log);
4494 mysql_mutex_assert_owner(&LOCK_index);
4495 sid_lock->assert_some_wrlock();
4496 }
4497
4498 /* Initialize the sid_map to be used in read_gtids_from_binlog */
4499 Sid_map *sid_map = nullptr;
4500 if (all_gtids)
4501 sid_map = all_gtids->get_sid_map();
4502 else if (lost_gtids)
4503 sid_map = lost_gtids->get_sid_map();
4504
4505 // Gather the set of files to be accessed.
4506 auto log_index = this->get_log_index(false);
4507 std::list<std::string> filename_list = log_index.second;
4508 int error = log_index.first;
4509 list<string>::iterator it;
4510 list<string>::reverse_iterator rit;
4511 bool reached_first_file = false;
4512
4513 if (error != LOG_INFO_EOF) {
4514 DBUG_PRINT("error", ("Error reading %s index",
4515 is_relay_log ? "relaylog" : "binlog"));
4516 goto end;
4517 }
4518 /*
4519 On server starting, one new empty binlog file is created and
4520 its file name is put into index file before initializing
4521 GLOBAL.GTID_EXECUTED AND GLOBAL.GTID_PURGED, it is not the
4522 last binlog file before the server restarts, so we remove
4523 its file name from filename_list.
4524 */
4525 if (is_server_starting && !is_relay_log && !filename_list.empty())
4526 filename_list.pop_back();
4527
4528 error = 0;
4529 if (all_gtids != nullptr) {
4530 DBUG_PRINT("info", ("Iterating backwards through %s logs, "
4531 "looking for the last %s log that contains "
4532 "a Previous_gtids_log_event.",
4533 is_relay_log ? "relay" : "binary",
4534 is_relay_log ? "relay" : "binary"));
4535 // Iterate over all files in reverse order until we find one that
4536 // contains a Previous_gtids_log_event.
4537 rit = filename_list.rbegin();
4538 bool can_stop_reading = false;
4539 reached_first_file = (rit == filename_list.rend());
4540 DBUG_PRINT("info",
4541 ("filename='%s' reached_first_file=%d",
4542 reached_first_file ? "" : rit->c_str(), reached_first_file));
4543 while (!can_stop_reading && !reached_first_file) {
4544 const char *filename = rit->c_str();
4545 DBUG_ASSERT(rit != filename_list.rend());
4546 rit++;
4547 reached_first_file = (rit == filename_list.rend());
4548 DBUG_PRINT("info", ("filename='%s' can_stop_reading=%d "
4549 "reached_first_file=%d, ",
4550 filename, can_stop_reading, reached_first_file));
4551 switch (read_gtids_from_binlog(
4552 filename, all_gtids, reached_first_file ? lost_gtids : nullptr,
4553 nullptr /* first_gtid */, sid_map, verify_checksum, is_relay_log)) {
4554 case ERROR: {
4555 error = 1;
4556 goto end;
4557 }
4558 case GOT_GTIDS: {
4559 can_stop_reading = true;
4560 break;
4561 }
4562 case GOT_PREVIOUS_GTIDS: {
4563 /*
4564 If this is a binlog file, it is enough to have GOT_PREVIOUS_GTIDS.
4565 If this is a relaylog file, we need to find at least one GTID to
4566 start parsing the relay log to add GTID of transactions that might
4567 have spanned in distinct relaylog files.
4568 */
4569 if (!is_relay_log) can_stop_reading = true;
4570 break;
4571 }
4572 case NO_GTIDS: {
4573 /*
4574 Mysql server iterates backwards through binary logs, looking for
4575 the last binary log that contains a Previous_gtids_log_event for
4576 gathering the set of gtid_executed on server start. This may take
4577 very long time if it has many binary logs and almost all of them
4578 are out of filesystem cache. So if the binlog_gtid_simple_recovery
4579 is enabled, and the last binary log does not contain any GTID
4580 event, do not read any more binary logs, GLOBAL.GTID_EXECUTED and
4581 GLOBAL.GTID_PURGED should be empty in the case.
4582 */
4583 if (binlog_gtid_simple_recovery && is_server_starting &&
4584 !is_relay_log) {
4585 DBUG_ASSERT(all_gtids->is_empty());
4586 DBUG_ASSERT(lost_gtids->is_empty());
4587 goto end;
4588 }
4589 /*FALLTHROUGH*/
4590 }
4591 case TRUNCATED: {
4592 break;
4593 }
4594 }
4595 }
4596
4597 /*
4598 If we use GTIDs and have partial transactions on the relay log,
4599 must check if it ends on next relay log files.
4600 We also need to feed the boundary parser with the rest of the
4601 relay log to put it in the correct state before receiving new
4602 events from the master in the case of GTID auto positioning be
4603 disabled.
4604 */
4605 if (is_relay_log && filename_list.size() > 0) {
4606 /*
4607 Suppose the following relaylog:
4608
4609 rl-bin.000001 | rl-bin.000002 | rl-bin.000003 | rl-bin-000004
4610 ---------------+---------------+---------------+---------------
4611 PREV_GTIDS | PREV_GTIDS | PREV_GTIDS | PREV_GTIDS
4612 (empty) | (UUID:1) | (UUID:1) | (UUID:1)
4613 ---------------+---------------+---------------+---------------
4614 GTID(UUID:1) | QUERY(INSERT) | QUERY(INSERT) | XID
4615 ---------------+---------------+---------------+---------------
4616 QUERY(CREATE |
4617 TABLE t1 ...) |
4618 ---------------+
4619 GTID(UUID:2) |
4620 ---------------+
4621 QUERY(BEGIN) |
4622 ---------------+
4623
4624 As it is impossible to determine the current Retrieved_Gtid_Set by only
4625 looking to the PREVIOUS_GTIDS on the last relay log file, and scanning
4626 events on it, we tried to find a relay log file that contains at least
4627 one GTID event during the backwards search.
4628
4629 In the example, we will find a GTID only in rl-bin.000001, as the
4630 UUID:2 transaction was spanned across 4 relay log files.
4631
4632 The transaction spanning can be caused by "FLUSH RELAY LOGS" commands
4633 on slave while it is queuing the transaction.
4634
4635 So, in order to correctly add UUID:2 into Retrieved_Gtid_Set, we need
4636 to parse the relay log starting on the file we found the last GTID
4637 queued to know if the transaction was fully retrieved or not.
4638 */
4639
4640 /*
4641 Adjust the reverse iterator to point to the relaylog file we
4642 need to start parsing, as it was incremented after generating
4643 the relay log file name.
4644 */
4645 DBUG_ASSERT(rit != filename_list.rbegin());
4646 rit--;
4647 DBUG_ASSERT(rit != filename_list.rend());
4648 /* Reset the transaction parser before feeding it with events */
4649 trx_parser->reset();
4650 partial_trx->clear();
4651
4652 DBUG_PRINT("info", ("Iterating forwards through relay logs, "
4653 "updating the Retrieved_Gtid_Set and updating "
4654 "IO thread trx parser before start."));
4655 for (it = find(filename_list.begin(), filename_list.end(), *rit);
4656 it != filename_list.end(); it++) {
4657 const char *filename = it->c_str();
4658 DBUG_PRINT("info", ("filename='%s'", filename));
4659 if (read_gtids_and_update_trx_parser_from_relaylog(
4660 filename, all_gtids, true, trx_parser, partial_trx)) {
4661 error = 1;
4662 goto end;
4663 }
4664 }
4665 }
4666 }
4667 if (lost_gtids != nullptr && !reached_first_file) {
4668 /*
4669 This branch is only reacheable by a binary log. The relay log
4670 don't need to get lost_gtids information.
4671
4672 A 5.6 server sets GTID_PURGED by rotating the binary log.
4673
4674 A 5.6 server that had recently enabled GTIDs and set GTID_PURGED
4675 would have a sequence of binary logs like:
4676
4677 master-bin.N : No PREVIOUS_GTIDS (GTID wasn't enabled)
4678 master-bin.N+1: Has an empty PREVIOUS_GTIDS and a ROTATE
4679 (GTID was enabled on startup)
4680 master-bin.N+2: Has a PREVIOUS_GTIDS with the content set by a
4681 SET @@GLOBAL.GTID_PURGED + has GTIDs of some
4682 transactions.
4683
4684 If this 5.6 server be upgraded to 5.7 keeping its binary log files,
4685 this routine will have to find the first binary log that contains a
4686 PREVIOUS_GTIDS + a GTID event to ensure that the content of the
4687 GTID_PURGED will be correctly set (assuming binlog_gtid_simple_recovery
4688 is not enabled).
4689 */
4690 DBUG_PRINT("info", ("Iterating forwards through binary logs, looking for "
4691 "the first binary log that contains both a "
4692 "Previous_gtids_log_event and a Gtid_log_event."));
4693 DBUG_ASSERT(!is_relay_log);
4694 for (it = filename_list.begin(); it != filename_list.end(); it++) {
4695 /*
4696 We should pass a first_gtid to read_gtids_from_binlog when
4697 binlog_gtid_simple_recovery is disabled, or else it will return
4698 right after reading the PREVIOUS_GTIDS event to avoid stall on
4699 reading the whole binary log.
4700 */
4701 Gtid first_gtid = {0, 0};
4702 const char *filename = it->c_str();
4703 DBUG_PRINT("info", ("filename='%s'", filename));
4704 switch (read_gtids_from_binlog(
4705 filename, nullptr, lost_gtids,
4706 binlog_gtid_simple_recovery ? nullptr : &first_gtid, sid_map,
4707 verify_checksum, is_relay_log)) {
4708 case ERROR: {
4709 error = 1;
4710 /*FALLTHROUGH*/
4711 }
4712 case GOT_GTIDS: {
4713 goto end;
4714 }
4715 case NO_GTIDS:
4716 case GOT_PREVIOUS_GTIDS: {
4717 /*
4718 Mysql server iterates forwards through binary logs, looking for
4719 the first binary log that contains both Previous_gtids_log_event
4720 and gtid_log_event for gathering the set of gtid_purged on server
4721 start. It also iterates forwards through binary logs, looking for
4722 the first binary log that contains both Previous_gtids_log_event
4723 and gtid_log_event for gathering the set of gtid_purged when
4724 purging binary logs. This may take very long time if it has many
4725 binary logs and almost all of them are out of filesystem cache.
4726 So if the binlog_gtid_simple_recovery is enabled, we just
4727 initialize GLOBAL.GTID_PURGED from the first binary log, do not
4728 read any more binary logs.
4729 */
4730 if (binlog_gtid_simple_recovery) goto end;
4731 /*FALLTHROUGH*/
4732 }
4733 case TRUNCATED: {
4734 break;
4735 }
4736 }
4737 }
4738 }
4739 end:
4740 if (all_gtids) all_gtids->dbug_print("all_gtids");
4741 if (lost_gtids) lost_gtids->dbug_print("lost_gtids");
4742 if (need_lock) {
4743 sid_lock->unlock();
4744 mysql_mutex_unlock(&LOCK_index);
4745 if (all_gtids != nullptr) mysql_mutex_unlock(&LOCK_log);
4746 }
4747 filename_list.clear();
4748 DBUG_PRINT("info", ("returning %d", error));
4749 return error != 0 ? true : false;
4750 }
4751
4752 /**
4753 Open a (new) binlog file.
4754
4755 - Open the log file and the index file. Register the new
4756 file name in it
4757 - When calling this when the file is in use, you must have a locks
4758 on LOCK_log and LOCK_index.
4759
4760 @retval
4761 0 ok
4762 @retval
4763 1 error
4764 */
4765
open_binlog(const char * log_name,const char * new_name,ulong max_size_arg,bool null_created_arg,bool need_lock_index,bool need_sid_lock,Format_description_log_event * extra_description_event,uint32 new_index_number)4766 bool MYSQL_BIN_LOG::open_binlog(
4767 const char *log_name, const char *new_name, ulong max_size_arg,
4768 bool null_created_arg, bool need_lock_index, bool need_sid_lock,
4769 Format_description_log_event *extra_description_event,
4770 uint32 new_index_number) {
4771 // lock_index must be acquired *before* sid_lock.
4772 DBUG_ASSERT(need_sid_lock || !need_lock_index);
4773 DBUG_TRACE;
4774 DBUG_PRINT("enter", ("base filename: %s", log_name));
4775
4776 mysql_mutex_assert_owner(get_log_lock());
4777
4778 if (init_and_set_log_file_name(log_name, new_name, new_index_number)) {
4779 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_GENERATE_NEW_FILE_NAME);
4780 return true;
4781 }
4782
4783 DBUG_PRINT("info", ("generated filename: %s", log_file_name));
4784
4785 DEBUG_SYNC(current_thd, "after_log_file_name_initialized");
4786
4787 if (open_purge_index_file(true) ||
4788 register_create_index_entry(log_file_name) || sync_purge_index_file() ||
4789 DBUG_EVALUATE_IF("fault_injection_registering_index", 1, 0)) {
4790 /**
4791 @todo: although this was introduced to appease valgrind
4792 when injecting emulated faults using fault_injection_registering_index
4793 it may be good to consider what actually happens when
4794 open_purge_index_file succeeds but register or sync fails.
4795
4796 Perhaps we might need the code below in MYSQL_BIN_LOG::cleanup
4797 for "real life" purposes as well?
4798 */
4799 DBUG_EXECUTE_IF("fault_injection_registering_index", {
4800 if (my_b_inited(&purge_index_file)) {
4801 end_io_cache(&purge_index_file);
4802 my_close(purge_index_file.file, MYF(0));
4803 }
4804 });
4805
4806 LogErr(ERROR_LEVEL, ER_BINLOG_FAILED_TO_SYNC_INDEX_FILE_IN_OPEN);
4807 return true;
4808 }
4809 DBUG_EXECUTE_IF("crash_create_non_critical_before_update_index",
4810 DBUG_SUICIDE(););
4811
4812 write_error = false;
4813
4814 /* open the main log file */
4815 if (open(m_key_file_log, log_name, new_name, new_index_number)) {
4816 close_purge_index_file();
4817 return true; /* all warnings issued */
4818 }
4819
4820 max_size = max_size_arg;
4821
4822 bool write_file_name_to_index_file = false;
4823
4824 /* This must be before goto err. */
4825 #ifndef DBUG_OFF
4826 binary_log_debug::debug_pretend_version_50034_in_binlog =
4827 DBUG_EVALUATE_IF("pretend_version_50034_in_binlog", true, false);
4828 #endif
4829 Format_description_log_event s;
4830
4831 if (m_binlog_file->is_empty()) {
4832 /*
4833 The binary log file was empty (probably newly created)
4834 This is the normal case and happens when the user doesn't specify
4835 an extension for the binary log files.
4836 In this case we write a standard header to it.
4837 */
4838 if (m_binlog_file->write(pointer_cast<const uchar *>(BINLOG_MAGIC),
4839 BIN_LOG_HEADER_SIZE))
4840 goto err;
4841 bytes_written += BIN_LOG_HEADER_SIZE;
4842 write_file_name_to_index_file = true;
4843 }
4844
4845 /*
4846 don't set LOG_EVENT_BINLOG_IN_USE_F for the relay log
4847 */
4848 if (!is_relay_log) {
4849 s.common_header->flags |= LOG_EVENT_BINLOG_IN_USE_F;
4850 }
4851
4852 if (is_relay_log) {
4853 /* relay-log */
4854 if (relay_log_checksum_alg == binary_log::BINLOG_CHECKSUM_ALG_UNDEF) {
4855 /* inherit master's A descriptor if one has been received */
4856 if (opt_slave_sql_verify_checksum == 0)
4857 /* otherwise use slave's local preference of RL events verification */
4858 relay_log_checksum_alg = binary_log::BINLOG_CHECKSUM_ALG_OFF;
4859 else
4860 relay_log_checksum_alg =
4861 static_cast<enum_binlog_checksum_alg>(binlog_checksum_options);
4862 }
4863 }
4864
4865 if (!s.is_valid()) goto err;
4866 s.dont_set_created = null_created_arg;
4867 /* Set LOG_EVENT_RELAY_LOG_F flag for relay log's FD */
4868 if (is_relay_log) s.set_relay_log_event();
4869 if (write_event_to_binlog(&s)) goto err;
4870 /*
4871 We need to revisit this code and improve it.
4872 See further comments in the mysqld.
4873 /Alfranio
4874 */
4875 if (current_thd) {
4876 Checkable_rwlock *sid_lock = nullptr;
4877 Gtid_set logged_gtids_binlog(global_sid_map, global_sid_lock);
4878 Gtid_set *previous_logged_gtids;
4879
4880 if (is_relay_log) {
4881 previous_logged_gtids = previous_gtid_set_relaylog;
4882 sid_lock = previous_gtid_set_relaylog->get_sid_map()->get_sid_lock();
4883 } else {
4884 previous_logged_gtids = &logged_gtids_binlog;
4885 sid_lock = global_sid_lock;
4886 }
4887
4888 if (need_sid_lock)
4889 sid_lock->wrlock();
4890 else
4891 sid_lock->assert_some_wrlock();
4892
4893 if (!is_relay_log) {
4894 const Gtid_set *executed_gtids = gtid_state->get_executed_gtids();
4895 const Gtid_set *gtids_only_in_table =
4896 gtid_state->get_gtids_only_in_table();
4897 /* logged_gtids_binlog= executed_gtids - gtids_only_in_table */
4898 if (logged_gtids_binlog.add_gtid_set(executed_gtids) !=
4899 RETURN_STATUS_OK) {
4900 if (need_sid_lock) sid_lock->unlock();
4901 goto err;
4902 }
4903 logged_gtids_binlog.remove_gtid_set(gtids_only_in_table);
4904 }
4905 DBUG_PRINT("info", ("Generating PREVIOUS_GTIDS for %s file.",
4906 is_relay_log ? "relaylog" : "binlog"));
4907 Previous_gtids_log_event prev_gtids_ev(previous_logged_gtids);
4908 if (is_relay_log) prev_gtids_ev.set_relay_log_event();
4909 if (need_sid_lock) sid_lock->unlock();
4910 if (write_event_to_binlog(&prev_gtids_ev)) goto err;
4911 } else // !(current_thd)
4912 {
4913 /*
4914 If the slave was configured before server restart, the server will
4915 generate a new relay log file without having current_thd, but this
4916 new relay log file must have a PREVIOUS_GTIDS event as we now
4917 generate the PREVIOUS_GTIDS event always.
4918
4919 This is only needed for relay log files because the server will add
4920 the PREVIOUS_GTIDS of binary logs (when current_thd==NULL) after
4921 server's GTID initialization.
4922
4923 During server's startup at mysqld_main(), from the binary/relay log
4924 initialization point of view, it will:
4925 1) Call init_server_components() that will generate a new binary log
4926 file but won't write the PREVIOUS_GTIDS event yet;
4927 2) Initialize server's GTIDs;
4928 3) Write the binary log PREVIOUS_GTIDS;
4929 4) Call init_slave() in where the new relay log file will be created
4930 after initializing relay log's Retrieved_Gtid_Set;
4931 */
4932 if (is_relay_log) {
4933 Sid_map *previous_gtid_sid_map =
4934 previous_gtid_set_relaylog->get_sid_map();
4935 Checkable_rwlock *sid_lock = previous_gtid_sid_map->get_sid_lock();
4936
4937 if (need_sid_lock)
4938 sid_lock->wrlock();
4939 else
4940 sid_lock->assert_some_wrlock(); /* purecov: inspected */
4941
4942 DBUG_PRINT("info", ("Generating PREVIOUS_GTIDS for relaylog file."));
4943 Previous_gtids_log_event prev_gtids_ev(previous_gtid_set_relaylog);
4944 prev_gtids_ev.set_relay_log_event();
4945
4946 if (need_sid_lock) sid_lock->unlock();
4947
4948 if (write_event_to_binlog(&prev_gtids_ev)) goto err;
4949 }
4950 }
4951 if (extra_description_event) {
4952 /*
4953 This is a relay log written to by the I/O slave thread.
4954 Write the event so that others can later know the format of this relay
4955 log.
4956 Note that this event is very close to the original event from the
4957 master (it has binlog version of the master, event types of the
4958 master), so this is suitable to parse the next relay log's event. It
4959 has been produced by
4960 Format_description_log_event::Format_description_log_event(char* buf,).
4961 Why don't we want to write the mi_description_event if this
4962 event is for format<4 (3.23 or 4.x): this is because in that case, the
4963 mi_description_event describes the data received from the
4964 master, but not the data written to the relay log (*conversion*),
4965 which is in format 4 (slave's).
4966 */
4967 /*
4968 Set 'created' to 0, so that in next relay logs this event does not
4969 trigger cleaning actions on the slave in
4970 Format_description_log_event::apply_event_impl().
4971 */
4972 extra_description_event->created = 0;
4973 /* Don't set log_pos in event header */
4974 extra_description_event->set_artificial_event();
4975
4976 if (binary_event_serialize(extra_description_event, m_binlog_file))
4977 goto err;
4978 bytes_written += extra_description_event->common_header->data_written;
4979 }
4980 if (m_binlog_file->flush_and_sync()) goto err;
4981
4982 if (write_file_name_to_index_file) {
4983 DBUG_EXECUTE_IF("crash_create_critical_before_update_index",
4984 DBUG_SUICIDE(););
4985 DBUG_ASSERT(my_b_inited(&index_file) != 0);
4986
4987 /*
4988 The new log file name is appended into crash safe index file after
4989 all the content of index file is copyed into the crash safe index
4990 file. Then move the crash safe index file to index file.
4991 */
4992 DBUG_EXECUTE_IF("simulate_disk_full_on_open_binlog",
4993 { DBUG_SET("+d,simulate_no_free_space_error"); });
4994 if (DBUG_EVALUATE_IF("fault_injection_updating_index", 1, 0) ||
4995 add_log_to_index((uchar *)log_file_name, strlen(log_file_name),
4996 need_lock_index)) {
4997 DBUG_EXECUTE_IF("simulate_disk_full_on_open_binlog", {
4998 DBUG_SET("-d,simulate_file_write_error");
4999 DBUG_SET("-d,simulate_no_free_space_error");
5000 DBUG_SET("-d,simulate_disk_full_on_open_binlog");
5001 });
5002 goto err;
5003 }
5004
5005 DBUG_EXECUTE_IF("crash_create_after_update_index", DBUG_SUICIDE(););
5006 }
5007
5008 atomic_log_state = LOG_OPENED;
5009 /*
5010 At every rotate memorize the last transaction counter state to use it as
5011 offset at logging the transaction logical timestamps.
5012 */
5013 mysql_mutex_lock(&LOCK_slave_trans_dep_tracker);
5014 m_dependency_tracker.rotate();
5015 mysql_mutex_unlock(&LOCK_slave_trans_dep_tracker);
5016
5017 close_purge_index_file();
5018
5019 update_binlog_end_pos();
5020 return false;
5021
5022 err:
5023 if (is_inited_purge_index_file())
5024 purge_index_entry(nullptr, nullptr, need_lock_index);
5025 close_purge_index_file();
5026 if (binlog_error_action == ABORT_SERVER) {
5027 exec_binlog_error_action_abort(
5028 "Either disk is full, file system is read only or "
5029 "there was an encryption error while opening the binlog. "
5030 "Aborting the server.");
5031 } else {
5032 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_USE_FOR_LOGGING,
5033 (new_name) ? new_name : name, errno);
5034 close(LOG_CLOSE_INDEX, false, need_lock_index);
5035 }
5036 return true;
5037 }
5038
5039 /**
5040 Move crash safe index file to index file.
5041
5042 @param need_lock_index If true, LOCK_index will be acquired;
5043 otherwise it should already be held.
5044
5045 @retval 0 ok
5046 @retval -1 error
5047 */
move_crash_safe_index_file_to_index_file(bool need_lock_index)5048 int MYSQL_BIN_LOG::move_crash_safe_index_file_to_index_file(
5049 bool need_lock_index) {
5050 int error = 0;
5051 File fd = -1;
5052 DBUG_TRACE;
5053 int failure_trials = MYSQL_BIN_LOG::MAX_RETRIES_FOR_DELETE_RENAME_FAILURE;
5054 bool file_rename_status = false, file_delete_status = false;
5055 THD *thd = current_thd;
5056
5057 if (need_lock_index)
5058 mysql_mutex_lock(&LOCK_index);
5059 else
5060 mysql_mutex_assert_owner(&LOCK_index);
5061
5062 if (my_b_inited(&index_file)) {
5063 end_io_cache(&index_file);
5064 if (mysql_file_close(index_file.file, MYF(0)) < 0) {
5065 error = -1;
5066 LogErr(ERROR_LEVEL, ER_BINLOG_FAILED_TO_CLOSE_INDEX_FILE_WHILE_REBUILDING,
5067 index_file_name);
5068 /*
5069 Delete Crash safe index file here and recover the binlog.index
5070 state(index_file io_cache) from old binlog.index content.
5071 */
5072 mysql_file_delete(key_file_binlog_index, crash_safe_index_file_name,
5073 MYF(0));
5074
5075 goto recoverable_err;
5076 }
5077
5078 /*
5079 Sometimes an outsider can lock index files for temporary viewing
5080 purpose. For eg: MEB locks binlog.index/relaylog.index to view
5081 the content of the file. During that small period of time, deletion
5082 of the file is not possible on some platforms(Eg: Windows)
5083 Server should retry the delete operation for few times instead of
5084 panicking immediately.
5085 */
5086 while ((file_delete_status == false) && (failure_trials > 0)) {
5087 if (DBUG_EVALUATE_IF("force_index_file_delete_failure", 1, 0)) break;
5088
5089 DBUG_EXECUTE_IF("simulate_index_file_delete_failure", {
5090 /* This simulation causes the delete to fail */
5091 static char first_char = index_file_name[0];
5092 index_file_name[0] = 0;
5093 sql_print_information("Retrying delete");
5094 if (failure_trials == 1) index_file_name[0] = first_char;
5095 };);
5096 file_delete_status = !(mysql_file_delete(key_file_binlog_index,
5097 index_file_name, MYF(MY_WME)));
5098 --failure_trials;
5099 if (!file_delete_status) {
5100 my_sleep(1000);
5101 /* Clear the error before retrying. */
5102 if (failure_trials > 0) thd->clear_error();
5103 }
5104 }
5105
5106 if (!file_delete_status) {
5107 error = -1;
5108 LogErr(ERROR_LEVEL,
5109 ER_BINLOG_FAILED_TO_DELETE_INDEX_FILE_WHILE_REBUILDING,
5110 index_file_name);
5111 /*
5112 Delete Crash safe file index file here and recover the binlog.index
5113 state(index_file io_cache) from old binlog.index content.
5114 */
5115 mysql_file_delete(key_file_binlog_index, crash_safe_index_file_name,
5116 MYF(0));
5117
5118 goto recoverable_err;
5119 }
5120 }
5121
5122 DBUG_EXECUTE_IF("crash_create_before_rename_index_file", DBUG_SUICIDE(););
5123 /*
5124 Sometimes an outsider can lock index files for temporary viewing
5125 purpose. For eg: MEB locks binlog.index/relaylog.index to view
5126 the content of the file. During that small period of time, rename
5127 of the file is not possible on some platforms(Eg: Windows)
5128 Server should retry the rename operation for few times instead of panicking
5129 immediately.
5130 */
5131 failure_trials = MYSQL_BIN_LOG::MAX_RETRIES_FOR_DELETE_RENAME_FAILURE;
5132 while ((file_rename_status == false) && (failure_trials > 0)) {
5133 DBUG_EXECUTE_IF("simulate_crash_safe_index_file_rename_failure", {
5134 /* This simulation causes the rename to fail */
5135 static char first_char = index_file_name[0];
5136 index_file_name[0] = 0;
5137 sql_print_information("Retrying rename");
5138 if (failure_trials == 1) index_file_name[0] = first_char;
5139 };);
5140 file_rename_status =
5141 !(my_rename(crash_safe_index_file_name, index_file_name, MYF(MY_WME)));
5142 --failure_trials;
5143 if (!file_rename_status) {
5144 my_sleep(1000);
5145 /* Clear the error before retrying. */
5146 if (failure_trials > 0) thd->clear_error();
5147 }
5148 }
5149 if (!file_rename_status) {
5150 error = -1;
5151 LogErr(ERROR_LEVEL, ER_BINLOG_FAILED_TO_RENAME_INDEX_FILE_WHILE_REBUILDING,
5152 index_file_name);
5153 goto fatal_err;
5154 }
5155 DBUG_EXECUTE_IF("crash_create_after_rename_index_file", DBUG_SUICIDE(););
5156
5157 recoverable_err:
5158 if ((fd = mysql_file_open(key_file_binlog_index, index_file_name,
5159 O_RDWR | O_CREAT, MYF(MY_WME))) < 0 ||
5160 mysql_file_sync(fd, MYF(MY_WME)) ||
5161 init_io_cache_ext(&index_file, fd, IO_SIZE, READ_CACHE,
5162 mysql_file_seek(fd, 0L, MY_SEEK_END, MYF(0)), false,
5163 MYF(MY_WME | MY_WAIT_IF_FULL),
5164 key_file_binlog_index_cache)) {
5165 LogErr(ERROR_LEVEL, ER_BINLOG_FAILED_TO_OPEN_INDEX_FILE_AFTER_REBUILDING,
5166 index_file_name);
5167 goto fatal_err;
5168 }
5169
5170 if (need_lock_index) mysql_mutex_unlock(&LOCK_index);
5171 return error;
5172
5173 fatal_err:
5174 /*
5175 This situation is very very rare to happen (unless there is some serious
5176 memory related issues like OOM) and should be treated as fatal error.
5177 Hence it is better to bring down the server without respecting
5178 'binlog_error_action' value here.
5179 */
5180 exec_binlog_error_action_abort(
5181 "MySQL server failed to update the "
5182 "binlog.index file's content properly. "
5183 "It might not be in sync with available "
5184 "binlogs and the binlog.index file state is in "
5185 "unrecoverable state. Aborting the server.");
5186 /*
5187 Server is aborted in the above function.
5188 This is dead code to make compiler happy.
5189 */
5190 return error;
5191 }
5192
5193 /**
5194 Append log file name to index file.
5195
5196 - To make crash safe, we copy all the content of index file
5197 to crash safe index file firstly and then append the log
5198 file name to the crash safe index file. Finally move the
5199 crash safe index file to index file.
5200
5201 @retval
5202 0 ok
5203 @retval
5204 -1 error
5205 */
add_log_to_index(uchar * log_name,size_t log_name_len,bool need_lock_index)5206 int MYSQL_BIN_LOG::add_log_to_index(uchar *log_name, size_t log_name_len,
5207 bool need_lock_index) {
5208 DBUG_TRACE;
5209
5210 if (open_crash_safe_index_file()) {
5211 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_OPEN_TMP_INDEX,
5212 "MYSQL_BIN_LOG::add_log_to_index");
5213 goto err;
5214 }
5215
5216 if (copy_file(&index_file, &crash_safe_index_file, 0)) {
5217 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_COPY_INDEX_TO_TMP,
5218 "MYSQL_BIN_LOG::add_log_to_index");
5219 goto err;
5220 }
5221
5222 if (my_b_write(&crash_safe_index_file, log_name, log_name_len) ||
5223 my_b_write(&crash_safe_index_file, pointer_cast<const uchar *>("\n"),
5224 1) ||
5225 flush_io_cache(&crash_safe_index_file) ||
5226 mysql_file_sync(crash_safe_index_file.file, MYF(MY_WME))) {
5227 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_APPEND_LOG_TO_TMP_INDEX, log_name);
5228 goto err;
5229 }
5230
5231 if (close_crash_safe_index_file()) {
5232 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_CLOSE_TMP_INDEX,
5233 "MYSQL_BIN_LOG::add_log_to_index");
5234 goto err;
5235 }
5236
5237 if (move_crash_safe_index_file_to_index_file(need_lock_index)) {
5238 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_MOVE_TMP_TO_INDEX,
5239 "MYSQL_BIN_LOG::add_log_to_index");
5240 goto err;
5241 }
5242
5243 return 0;
5244
5245 err:
5246 return -1;
5247 }
5248
get_current_log(LOG_INFO * linfo,bool need_lock_log)5249 int MYSQL_BIN_LOG::get_current_log(LOG_INFO *linfo,
5250 bool need_lock_log /*true*/) {
5251 if (need_lock_log) mysql_mutex_lock(&LOCK_log);
5252 int ret = raw_get_current_log(linfo);
5253 if (need_lock_log) mysql_mutex_unlock(&LOCK_log);
5254 return ret;
5255 }
5256
raw_get_current_log(LOG_INFO * linfo)5257 int MYSQL_BIN_LOG::raw_get_current_log(LOG_INFO *linfo) {
5258 strmake(linfo->log_file_name, log_file_name,
5259 sizeof(linfo->log_file_name) - 1);
5260 linfo->pos = m_binlog_file->position();
5261 linfo->encrypted_header_size = m_binlog_file->get_encrypted_header_size();
5262 return 0;
5263 }
5264
check_write_error(const THD * thd)5265 bool MYSQL_BIN_LOG::check_write_error(const THD *thd) {
5266 DBUG_TRACE;
5267
5268 bool checked = false;
5269
5270 if (!thd->is_error()) return checked;
5271
5272 switch (thd->get_stmt_da()->mysql_errno()) {
5273 case ER_TRANS_CACHE_FULL:
5274 case ER_STMT_CACHE_FULL:
5275 case ER_ERROR_ON_WRITE:
5276 case ER_BINLOG_LOGGING_IMPOSSIBLE:
5277 checked = true;
5278 break;
5279 }
5280 DBUG_PRINT("return", ("checked: %s", YESNO(checked)));
5281 return checked;
5282 }
5283
report_cache_write_error(THD * thd,bool is_transactional)5284 void MYSQL_BIN_LOG::report_cache_write_error(THD *thd, bool is_transactional) {
5285 DBUG_TRACE;
5286
5287 write_error = true;
5288
5289 if (check_write_error(thd)) return;
5290
5291 if (my_errno() == EFBIG) {
5292 if (is_transactional) {
5293 my_error(ER_TRANS_CACHE_FULL, MYF(MY_WME));
5294 } else {
5295 my_error(ER_STMT_CACHE_FULL, MYF(MY_WME));
5296 }
5297 } else {
5298 char errbuf[MYSYS_STRERROR_SIZE];
5299 my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), name, errno,
5300 my_strerror(errbuf, sizeof(errbuf), errno));
5301 }
5302 }
5303
compare_log_name(const char * log_1,const char * log_2)5304 static int compare_log_name(const char *log_1, const char *log_2) {
5305 const char *log_1_basename = log_1 + dirname_length(log_1);
5306 const char *log_2_basename = log_2 + dirname_length(log_2);
5307
5308 return strcmp(log_1_basename, log_2_basename);
5309 }
5310
5311 /**
5312 Find the position in the log-index-file for the given log name.
5313
5314 @param[out] linfo The found log file name will be stored here, along
5315 with the byte offset of the next log file name in the index file.
5316 @param log_name Filename to find in the index file, or NULL if we
5317 want to read the first entry.
5318 @param need_lock_index If false, this function acquires LOCK_index;
5319 otherwise the lock should already be held by the caller.
5320
5321 @note
5322 On systems without the truncate function the file will end with one or
5323 more empty lines. These will be ignored when reading the file.
5324
5325 @retval
5326 0 ok
5327 @retval
5328 LOG_INFO_EOF End of log-index-file found
5329 @retval
5330 LOG_INFO_IO Got IO error while reading file
5331 */
5332
find_log_pos(LOG_INFO * linfo,const char * log_name,bool need_lock_index)5333 int MYSQL_BIN_LOG::find_log_pos(LOG_INFO *linfo, const char *log_name,
5334 bool need_lock_index) {
5335 int error = 0;
5336 char *full_fname = linfo->log_file_name;
5337 char full_log_name[FN_REFLEN], fname[FN_REFLEN];
5338 DBUG_TRACE;
5339 full_log_name[0] = full_fname[0] = 0;
5340
5341 /*
5342 Mutex needed because we need to make sure the file pointer does not
5343 move from under our feet
5344 */
5345 if (need_lock_index)
5346 mysql_mutex_lock(&LOCK_index);
5347 else
5348 mysql_mutex_assert_owner(&LOCK_index);
5349
5350 if (!my_b_inited(&index_file)) {
5351 error = LOG_INFO_IO;
5352 goto end;
5353 }
5354
5355 // extend relative paths for log_name to be searched
5356 if (log_name) {
5357 if (normalize_binlog_name(full_log_name, log_name, is_relay_log)) {
5358 error = LOG_INFO_EOF;
5359 goto end;
5360 }
5361 }
5362
5363 DBUG_PRINT("enter", ("log_name: %s, full_log_name: %s",
5364 log_name ? log_name : "NULL", full_log_name));
5365
5366 /* As the file is flushed, we can't get an error here */
5367 my_b_seek(&index_file, (my_off_t)0);
5368
5369 for (;;) {
5370 size_t length;
5371 my_off_t offset = my_b_tell(&index_file);
5372
5373 DBUG_EXECUTE_IF("simulate_find_log_pos_error", error = LOG_INFO_EOF;
5374 break;);
5375 /* If we get 0 or 1 characters, this is the end of the file */
5376 if ((length = my_b_gets(&index_file, fname, FN_REFLEN)) <= 1) {
5377 /* Did not find the given entry; Return not found or error */
5378 error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
5379 break;
5380 }
5381
5382 // extend relative paths and match against full path
5383 if (normalize_binlog_name(full_fname, fname, is_relay_log)) {
5384 error = LOG_INFO_EOF;
5385 break;
5386 }
5387 // if the log entry matches, null string matching anything
5388 if (!log_name || !compare_log_name(full_fname, full_log_name)) {
5389 DBUG_PRINT("info", ("Found log file entry"));
5390 linfo->index_file_start_offset = offset;
5391 linfo->index_file_offset = my_b_tell(&index_file);
5392 break;
5393 }
5394 linfo->entry_index++;
5395 }
5396
5397 end:
5398 if (need_lock_index) mysql_mutex_unlock(&LOCK_index);
5399 return error;
5400 }
5401
5402 /**
5403 Find the position in the log-index-file for the given log name.
5404
5405 @param[out] linfo The filename will be stored here, along with the
5406 byte offset of the next filename in the index file.
5407
5408 @param need_lock_index If true, LOCK_index will be acquired;
5409 otherwise it should already be held by the caller.
5410
5411 @note
5412 - Before calling this function, one has to call find_log_pos()
5413 to set up 'linfo'
5414 - Mutex needed because we need to make sure the file pointer does not move
5415 from under our feet
5416
5417 @retval 0 ok
5418 @retval LOG_INFO_EOF End of log-index-file found
5419 @retval LOG_INFO_IO Got IO error while reading file
5420 */
find_next_log(LOG_INFO * linfo,bool need_lock_index)5421 int MYSQL_BIN_LOG::find_next_log(LOG_INFO *linfo, bool need_lock_index) {
5422 int error = 0;
5423 size_t length;
5424 char fname[FN_REFLEN];
5425 char *full_fname = linfo->log_file_name;
5426
5427 if (need_lock_index)
5428 mysql_mutex_lock(&LOCK_index);
5429 else
5430 mysql_mutex_assert_owner(&LOCK_index);
5431
5432 if (!my_b_inited(&index_file)) {
5433 error = LOG_INFO_IO;
5434 goto err;
5435 }
5436 /* As the file is flushed, we can't get an error here */
5437 my_b_seek(&index_file, linfo->index_file_offset);
5438
5439 linfo->index_file_start_offset = linfo->index_file_offset;
5440 if ((length = my_b_gets(&index_file, fname, FN_REFLEN)) <= 1) {
5441 error = !index_file.error ? LOG_INFO_EOF : LOG_INFO_IO;
5442 goto err;
5443 }
5444
5445 if (fname[0] != 0) {
5446 if (normalize_binlog_name(full_fname, fname, is_relay_log)) {
5447 error = LOG_INFO_EOF;
5448 goto err;
5449 }
5450 length = strlen(full_fname);
5451 }
5452
5453 linfo->index_file_offset = my_b_tell(&index_file);
5454
5455 err:
5456 if (need_lock_index) mysql_mutex_unlock(&LOCK_index);
5457 return error;
5458 }
5459
5460 /**
5461 Find the relay log name following the given name from relay log index file.
5462
5463 @param[in,out] log_name The name is full path name.
5464
5465 @return return 0 if it finds next relay log. Otherwise return the error code.
5466 */
find_next_relay_log(char log_name[FN_REFLEN+1])5467 int MYSQL_BIN_LOG::find_next_relay_log(char log_name[FN_REFLEN + 1]) {
5468 LOG_INFO info;
5469 int error;
5470 char relative_path_name[FN_REFLEN + 1];
5471
5472 if (fn_format(relative_path_name, log_name + dirname_length(log_name),
5473 mysql_data_home, "", 0) == NullS)
5474 return 1;
5475
5476 mysql_mutex_lock(&LOCK_index);
5477
5478 error = find_log_pos(&info, relative_path_name, false);
5479 if (error == 0) {
5480 error = find_next_log(&info, false);
5481 if (error == 0) strcpy(log_name, info.log_file_name);
5482 }
5483
5484 mysql_mutex_unlock(&LOCK_index);
5485 return error;
5486 }
5487
get_log_index(bool need_lock_index)5488 std::pair<int, std::list<std::string>> MYSQL_BIN_LOG::get_log_index(
5489 bool need_lock_index) {
5490 DBUG_TRACE;
5491 LOG_INFO log_info;
5492
5493 if (need_lock_index)
5494 mysql_mutex_lock(&LOCK_index);
5495 else
5496 mysql_mutex_assert_owner(&LOCK_index);
5497
5498 std::list<std::string> filename_list;
5499 int error = 0;
5500 for (error =
5501 this->find_log_pos(&log_info, nullptr, false /*need_lock_index*/);
5502 error == 0;
5503 error = this->find_next_log(&log_info, false /*need_lock_index*/)) {
5504 filename_list.push_back(std::string(log_info.log_file_name));
5505 }
5506
5507 if (need_lock_index) mysql_mutex_unlock(&LOCK_index);
5508
5509 return std::make_pair(error, filename_list);
5510 }
5511
5512 /**
5513 Removes files, as part of a RESET MASTER or RESET SLAVE statement,
5514 by deleting all logs referred to in the index file and the index
5515 file. Then, it creates a new index file and a new log file.
5516
5517 The new index file will only contain the new log file.
5518
5519 @param thd Thread
5520 @param delete_only If true, do not create a new index file and
5521 a new log file.
5522
5523 @note
5524 If not called from slave thread, write start event to new log
5525
5526 @retval
5527 0 ok
5528 @retval
5529 1 error
5530 */
reset_logs(THD * thd,bool delete_only)5531 bool MYSQL_BIN_LOG::reset_logs(THD *thd, bool delete_only) {
5532 LOG_INFO linfo;
5533 bool error = false;
5534 int err;
5535 const char *save_name = nullptr;
5536 Checkable_rwlock *sid_lock = nullptr;
5537 DBUG_TRACE;
5538
5539 /*
5540 Flush logs for storage engines, so that the last transaction
5541 is persisted inside storage engines.
5542 */
5543 DBUG_ASSERT(!thd->is_log_reset());
5544 thd->set_log_reset();
5545 if (ha_flush_logs()) {
5546 thd->clear_log_reset();
5547 return true;
5548 }
5549 thd->clear_log_reset();
5550
5551 ha_reset_logs(thd);
5552
5553 /*
5554 We need to get both locks to be sure that no one is trying to
5555 write to the index log file.
5556 */
5557 mysql_mutex_lock(&LOCK_log);
5558 mysql_mutex_lock(&LOCK_index);
5559
5560 if (is_relay_log)
5561 sid_lock = previous_gtid_set_relaylog->get_sid_map()->get_sid_lock();
5562 else
5563 sid_lock = global_sid_lock;
5564 sid_lock->wrlock();
5565
5566 /* Save variables so that we can reopen the log */
5567 save_name = name;
5568 name = nullptr; // Protect against free
5569 close(LOG_CLOSE_TO_BE_OPENED, false /*need_lock_log=false*/,
5570 false /*need_lock_index=false*/);
5571
5572 /*
5573 First delete all old log files and then update the index file.
5574 As we first delete the log files and do not use sort of logging,
5575 a crash may lead to an inconsistent state where the index has
5576 references to non-existent files.
5577
5578 We need to invert the steps and use the purge_index_file methods
5579 in order to make the operation safe.
5580 */
5581
5582 if ((err = find_log_pos(&linfo, NullS, false /*need_lock_index=false*/)) !=
5583 0) {
5584 uint errcode = purge_log_get_error_code(err);
5585 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_LOCATE_OLD_BINLOG_OR_RELAY_LOG_FILES);
5586 my_error(errcode, MYF(0));
5587 error = true;
5588 goto err;
5589 }
5590
5591 for (;;) {
5592 if ((error = my_delete_allow_opened(linfo.log_file_name, MYF(0))) != 0) {
5593 if (my_errno() == ENOENT) {
5594 push_warning_printf(
5595 current_thd, Sql_condition::SL_WARNING, ER_LOG_PURGE_NO_FILE,
5596 ER_THD(current_thd, ER_LOG_PURGE_NO_FILE), linfo.log_file_name);
5597 LogErr(INFORMATION_LEVEL, ER_BINLOG_CANT_DELETE_FILE,
5598 linfo.log_file_name);
5599 set_my_errno(0);
5600 error = false;
5601 } else {
5602 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
5603 ER_BINLOG_PURGE_FATAL_ERR,
5604 "a problem with deleting %s; "
5605 "consider examining correspondence "
5606 "of your binlog index file "
5607 "to the actual binlog files",
5608 linfo.log_file_name);
5609 error = true;
5610 goto err;
5611 }
5612 }
5613 if (find_next_log(&linfo, false /*need_lock_index=false*/)) break;
5614 }
5615
5616 /* Start logging with a new file */
5617 close(LOG_CLOSE_INDEX | LOG_CLOSE_TO_BE_OPENED, false /*need_lock_log=false*/,
5618 false /*need_lock_index=false*/);
5619 if ((error = my_delete_allow_opened(index_file_name,
5620 MYF(0)))) // Reset (open will update)
5621 {
5622 if (my_errno() == ENOENT) {
5623 push_warning_printf(
5624 current_thd, Sql_condition::SL_WARNING, ER_LOG_PURGE_NO_FILE,
5625 ER_THD(current_thd, ER_LOG_PURGE_NO_FILE), index_file_name);
5626 LogErr(INFORMATION_LEVEL, ER_BINLOG_CANT_DELETE_FILE, index_file_name);
5627 set_my_errno(0);
5628 error = false;
5629 } else {
5630 push_warning_printf(current_thd, Sql_condition::SL_WARNING,
5631 ER_BINLOG_PURGE_FATAL_ERR,
5632 "a problem with deleting %s; "
5633 "consider examining correspondence "
5634 "of your binlog index file "
5635 "to the actual binlog files",
5636 index_file_name);
5637 error = true;
5638 goto err;
5639 }
5640 }
5641 DBUG_EXECUTE_IF("wait_for_kill_gtid_state_clear", {
5642 const char action[] = "now WAIT_FOR kill_gtid_state_clear";
5643 DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(action)));
5644 };);
5645
5646 /*
5647 For relay logs we clear the gtid state associated per channel(i.e rli)
5648 in the purge_relay_logs()
5649 */
5650 if (!is_relay_log) {
5651 if (gtid_state->clear(thd)) {
5652 error = true;
5653 }
5654 /*
5655 Don't clear global_sid_map because gtid_state->clear() above didn't
5656 touched owned_gtids GTID set.
5657 */
5658 error = error || gtid_state->init();
5659 }
5660
5661 if (!delete_only) {
5662 if (!open_index_file(index_file_name, nullptr,
5663 false /*need_lock_index=false*/))
5664 error = open_binlog(save_name, nullptr, max_size, false,
5665 false /*need_lock_index=false*/,
5666 false /*need_sid_lock=false*/, nullptr,
5667 thd->lex->next_binlog_file_nr) ||
5668 error;
5669 }
5670 /* String has been duplicated, free old file-name */
5671 if (name != nullptr) {
5672 my_free(const_cast<char *>(save_name));
5673 save_name = nullptr;
5674 }
5675
5676 err:
5677 if (name == nullptr)
5678 name = const_cast<char *>(save_name); // restore old file-name
5679 sid_lock->unlock();
5680 mysql_mutex_unlock(&LOCK_index);
5681 mysql_mutex_unlock(&LOCK_log);
5682 return error;
5683 }
5684
5685 /**
5686 Set the name of crash safe index file.
5687
5688 @retval
5689 0 ok
5690 @retval
5691 1 error
5692 */
set_crash_safe_index_file_name(const char * base_file_name)5693 int MYSQL_BIN_LOG::set_crash_safe_index_file_name(const char *base_file_name) {
5694 int error = 0;
5695 DBUG_TRACE;
5696 if (fn_format(crash_safe_index_file_name, base_file_name, mysql_data_home,
5697 ".index_crash_safe",
5698 MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH | MY_REPLACE_EXT)) ==
5699 nullptr) {
5700 error = 1;
5701 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_SET_TMP_INDEX_NAME);
5702 }
5703 return error;
5704 }
5705
5706 /**
5707 Open a (new) crash safe index file.
5708
5709 @note
5710 The crash safe index file is a special file
5711 used for guaranteeing index file crash safe.
5712 @retval
5713 0 ok
5714 @retval
5715 1 error
5716 */
open_crash_safe_index_file()5717 int MYSQL_BIN_LOG::open_crash_safe_index_file() {
5718 int error = 0;
5719 File file = -1;
5720
5721 DBUG_TRACE;
5722
5723 if (!my_b_inited(&crash_safe_index_file)) {
5724 myf flags = MY_WME | MY_NABP | MY_WAIT_IF_FULL;
5725 if (is_relay_log) flags = flags | MY_REPORT_WAITING_IF_FULL;
5726
5727 if ((file = my_open(crash_safe_index_file_name, O_RDWR | O_CREAT,
5728 MYF(MY_WME))) < 0 ||
5729 init_io_cache(&crash_safe_index_file, file, IO_SIZE, WRITE_CACHE, 0,
5730 false, flags)) {
5731 error = 1;
5732 LogErr(ERROR_LEVEL, ER_BINLOG_FAILED_TO_OPEN_TEMPORARY_INDEX_FILE);
5733 }
5734 }
5735 return error;
5736 }
5737
5738 /**
5739 Close the crash safe index file.
5740
5741 @note
5742 The crash safe file is just closed, is not deleted.
5743 Because it is moved to index file later on.
5744 @retval
5745 0 ok
5746 @retval
5747 1 error
5748 */
close_crash_safe_index_file()5749 int MYSQL_BIN_LOG::close_crash_safe_index_file() {
5750 int error = 0;
5751
5752 DBUG_TRACE;
5753
5754 if (my_b_inited(&crash_safe_index_file)) {
5755 end_io_cache(&crash_safe_index_file);
5756 error = my_close(crash_safe_index_file.file, MYF(0));
5757 }
5758 crash_safe_index_file = IO_CACHE();
5759
5760 return error;
5761 }
5762
5763 /**
5764 Remove logs from index file.
5765
5766 - To make crash safe, we copy the content of index file
5767 from index_file_start_offset recored in log_info to
5768 crash safe index file firstly and then move the crash
5769 safe index file to index file.
5770
5771 @param log_info Store here the found log file name and
5772 position to the NEXT log file name in
5773 the index file.
5774
5775 @param need_update_threads If we want to update the log coordinates
5776 of all threads. False for relay logs,
5777 true otherwise.
5778
5779 @retval
5780 0 ok
5781 @retval
5782 LOG_INFO_IO Got IO error while reading/writing file
5783 */
remove_logs_from_index(LOG_INFO * log_info,bool need_update_threads)5784 int MYSQL_BIN_LOG::remove_logs_from_index(LOG_INFO *log_info,
5785 bool need_update_threads) {
5786 if (open_crash_safe_index_file()) {
5787 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_OPEN_TMP_INDEX,
5788 "MYSQL_BIN_LOG::remove_logs_from_index");
5789 goto err;
5790 }
5791
5792 if (copy_file(&index_file, &crash_safe_index_file,
5793 log_info->index_file_start_offset)) {
5794 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_COPY_INDEX_TO_TMP,
5795 "MYSQL_BIN_LOG::remove_logs_from_index");
5796 goto err;
5797 }
5798
5799 if (close_crash_safe_index_file()) {
5800 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_CLOSE_TMP_INDEX,
5801 "MYSQL_BIN_LOG::remove_logs_from_index");
5802 goto err;
5803 }
5804 DBUG_EXECUTE_IF("fault_injection_copy_part_file", DBUG_SUICIDE(););
5805
5806 if (move_crash_safe_index_file_to_index_file(
5807 false /*need_lock_index=false*/)) {
5808 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_MOVE_TMP_TO_INDEX,
5809 "MYSQL_BIN_LOG::remove_logs_from_index");
5810 goto err;
5811 }
5812
5813 // now update offsets in index file for running threads
5814 if (need_update_threads)
5815 adjust_linfo_offsets(log_info->index_file_start_offset);
5816 return 0;
5817
5818 err:
5819 return LOG_INFO_IO;
5820 }
5821
5822 /**
5823 Remove all logs before the given log from disk and from the index file.
5824
5825 @param to_log Delete all log file name before this file.
5826 @param included If true, to_log is deleted too.
5827 @param need_lock_index Set to true, if the lock_index of the binary log
5828 shall be acquired, false if the called is already the owner of the lock_index.
5829 @param need_update_threads If we want to update the log coordinates of
5830 all threads. False for relay logs, true otherwise.
5831 @param decrease_log_space If not null, decrement this variable of
5832 the amount of log space freed
5833 @param auto_purge True if this is an automatic purge.
5834
5835 @note
5836 If any of the logs before the deleted one is in use,
5837 only purge logs up to this one.
5838
5839 @retval 0 ok
5840 @retval LOG_INFO_EOF to_log not found
5841 @retval LOG_INFO_EMFILE too many files opened
5842 @retval LOG_INFO_FATAL if any other than ENOENT error from
5843 mysql_file_stat() or mysql_file_delete()
5844 */
5845
purge_logs(const char * to_log,bool included,bool need_lock_index,bool need_update_threads,ulonglong * decrease_log_space,bool auto_purge)5846 int MYSQL_BIN_LOG::purge_logs(const char *to_log, bool included,
5847 bool need_lock_index, bool need_update_threads,
5848 ulonglong *decrease_log_space, bool auto_purge) {
5849 int error = 0, no_of_log_files_to_purge = 0, no_of_log_files_purged = 0;
5850 int no_of_threads_locking_log = 0;
5851 bool exit_loop = false;
5852 LOG_INFO log_info;
5853 THD *thd = current_thd;
5854 DBUG_TRACE;
5855 DBUG_PRINT("info", ("to_log= %s", to_log));
5856
5857 if (need_lock_index)
5858 mysql_mutex_lock(&LOCK_index);
5859 else
5860 mysql_mutex_assert_owner(&LOCK_index);
5861 if ((error =
5862 find_log_pos(&log_info, to_log, false /*need_lock_index=false*/))) {
5863 LogErr(ERROR_LEVEL, ER_BINLOG_PURGE_LOGS_CALLED_WITH_FILE_NOT_IN_INDEX,
5864 to_log);
5865 goto err;
5866 }
5867
5868 no_of_log_files_to_purge = log_info.entry_index;
5869
5870 if ((error = open_purge_index_file(true))) {
5871 LogErr(ERROR_LEVEL, ER_BINLOG_PURGE_LOGS_CANT_SYNC_INDEX_FILE);
5872 goto err;
5873 }
5874
5875 /*
5876 File name exists in index file; delete until we find this file
5877 or a file that is used.
5878 */
5879 if ((error = find_log_pos(&log_info, NullS, false /*need_lock_index=false*/)))
5880 goto err;
5881
5882 while ((compare_log_name(to_log, log_info.log_file_name) ||
5883 (exit_loop = included))) {
5884 if (is_active(log_info.log_file_name)) {
5885 if (!auto_purge)
5886 push_warning_printf(
5887 thd, Sql_condition::SL_WARNING, ER_WARN_PURGE_LOG_IS_ACTIVE,
5888 ER_THD(thd, ER_WARN_PURGE_LOG_IS_ACTIVE), log_info.log_file_name);
5889 break;
5890 }
5891
5892 if ((no_of_threads_locking_log = log_in_use(log_info.log_file_name))) {
5893 if (!auto_purge)
5894 push_warning_printf(thd, Sql_condition::SL_WARNING,
5895 ER_WARN_PURGE_LOG_IN_USE,
5896 ER_THD(thd, ER_WARN_PURGE_LOG_IN_USE),
5897 log_info.log_file_name, no_of_threads_locking_log,
5898 no_of_log_files_purged, no_of_log_files_to_purge);
5899 break;
5900 }
5901 no_of_log_files_purged++;
5902
5903 if ((error = register_purge_index_entry(log_info.log_file_name))) {
5904 LogErr(ERROR_LEVEL, ER_BINLOG_PURGE_LOGS_CANT_COPY_TO_REGISTER_FILE,
5905 log_info.log_file_name);
5906 goto err;
5907 }
5908
5909 if (find_next_log(&log_info, false /*need_lock_index=false*/) || exit_loop)
5910 break;
5911 }
5912
5913 DBUG_EXECUTE_IF("crash_purge_before_update_index", DBUG_SUICIDE(););
5914
5915 if ((error = sync_purge_index_file())) {
5916 LogErr(ERROR_LEVEL, ER_BINLOG_PURGE_LOGS_CANT_FLUSH_REGISTER_FILE);
5917 goto err;
5918 }
5919
5920 /* We know how many files to delete. Update index file. */
5921 if ((error = remove_logs_from_index(&log_info, need_update_threads))) {
5922 LogErr(ERROR_LEVEL, ER_BINLOG_PURGE_LOGS_CANT_UPDATE_INDEX_FILE);
5923 goto err;
5924 }
5925
5926 // Update gtid_state->lost_gtids
5927 if (!is_relay_log) {
5928 global_sid_lock->wrlock();
5929 error = init_gtid_sets(
5930 nullptr, const_cast<Gtid_set *>(gtid_state->get_lost_gtids()),
5931 opt_master_verify_checksum, false /*false=don't need lock*/,
5932 nullptr /*trx_parser*/, nullptr /*partial_trx*/);
5933 global_sid_lock->unlock();
5934 if (error) goto err;
5935 }
5936
5937 DBUG_EXECUTE_IF("crash_purge_critical_after_update_index", DBUG_SUICIDE(););
5938
5939 err:
5940
5941 int error_index = 0, close_error_index = 0;
5942 /* Read each entry from purge_index_file and delete the file. */
5943 if (!error && is_inited_purge_index_file() &&
5944 (error_index = purge_index_entry(thd, decrease_log_space,
5945 false /*need_lock_index=false*/)))
5946 LogErr(ERROR_LEVEL, ER_BINLOG_PURGE_LOGS_FAILED_TO_PURGE_LOG);
5947
5948 close_error_index = close_purge_index_file();
5949
5950 DBUG_EXECUTE_IF("crash_purge_non_critical_after_update_index",
5951 DBUG_SUICIDE(););
5952
5953 if (need_lock_index) mysql_mutex_unlock(&LOCK_index);
5954
5955 /*
5956 Error codes from purge logs take precedence.
5957 Then error codes from purging the index entry.
5958 Finally, error codes from closing the purge index file.
5959 */
5960 error = error ? error : (error_index ? error_index : close_error_index);
5961
5962 return error;
5963 }
5964
set_purge_index_file_name(const char * base_file_name)5965 int MYSQL_BIN_LOG::set_purge_index_file_name(const char *base_file_name) {
5966 int error = 0;
5967 DBUG_TRACE;
5968 if (fn_format(
5969 purge_index_file_name, base_file_name, mysql_data_home, ".~rec~",
5970 MYF(MY_UNPACK_FILENAME | MY_SAFE_PATH | MY_REPLACE_EXT)) == nullptr) {
5971 error = 1;
5972 LogErr(ERROR_LEVEL, ER_BINLOG_FAILED_TO_SET_PURGE_INDEX_FILE_NAME);
5973 }
5974 return error;
5975 }
5976
open_purge_index_file(bool destroy)5977 int MYSQL_BIN_LOG::open_purge_index_file(bool destroy) {
5978 int error = 0;
5979 File file = -1;
5980
5981 DBUG_TRACE;
5982
5983 if (destroy) close_purge_index_file();
5984
5985 if (!my_b_inited(&purge_index_file)) {
5986 myf flags = MY_WME | MY_NABP | MY_WAIT_IF_FULL;
5987 if (is_relay_log) flags = flags | MY_REPORT_WAITING_IF_FULL;
5988
5989 if ((file = my_open(purge_index_file_name, O_RDWR | O_CREAT, MYF(MY_WME))) <
5990 0 ||
5991 init_io_cache(&purge_index_file, file, IO_SIZE,
5992 (destroy ? WRITE_CACHE : READ_CACHE), 0, false, flags)) {
5993 error = 1;
5994 LogErr(ERROR_LEVEL, ER_BINLOG_FAILED_TO_OPEN_REGISTER_FILE);
5995 }
5996 }
5997 return error;
5998 }
5999
close_purge_index_file()6000 int MYSQL_BIN_LOG::close_purge_index_file() {
6001 int error = 0;
6002
6003 DBUG_TRACE;
6004
6005 if (my_b_inited(&purge_index_file)) {
6006 end_io_cache(&purge_index_file);
6007 error = my_close(purge_index_file.file, MYF(0));
6008 }
6009 my_delete(purge_index_file_name, MYF(0));
6010 new (&purge_index_file) IO_CACHE();
6011
6012 return error;
6013 }
6014
is_inited_purge_index_file()6015 bool MYSQL_BIN_LOG::is_inited_purge_index_file() {
6016 DBUG_TRACE;
6017 return my_b_inited(&purge_index_file);
6018 }
6019
sync_purge_index_file()6020 int MYSQL_BIN_LOG::sync_purge_index_file() {
6021 int error = 0;
6022 DBUG_TRACE;
6023
6024 if ((error = flush_io_cache(&purge_index_file)) ||
6025 (error = my_sync(purge_index_file.file, MYF(MY_WME))))
6026 return error;
6027
6028 return error;
6029 }
6030
register_purge_index_entry(const char * entry)6031 int MYSQL_BIN_LOG::register_purge_index_entry(const char *entry) {
6032 int error = 0;
6033 DBUG_TRACE;
6034
6035 if ((error = my_b_write(&purge_index_file, (const uchar *)entry,
6036 strlen(entry))) ||
6037 (error = my_b_write(&purge_index_file, (const uchar *)"\n", 1)))
6038 return error;
6039
6040 return error;
6041 }
6042
register_create_index_entry(const char * entry)6043 int MYSQL_BIN_LOG::register_create_index_entry(const char *entry) {
6044 DBUG_TRACE;
6045 return register_purge_index_entry(entry);
6046 }
6047
purge_index_entry(THD * thd,ulonglong * decrease_log_space,bool need_lock_index)6048 int MYSQL_BIN_LOG::purge_index_entry(THD *thd, ulonglong *decrease_log_space,
6049 bool need_lock_index) {
6050 MY_STAT s;
6051 int error = 0;
6052 LOG_INFO log_info;
6053 LOG_INFO check_log_info;
6054
6055 DBUG_TRACE;
6056
6057 DBUG_ASSERT(my_b_inited(&purge_index_file));
6058
6059 if ((error =
6060 reinit_io_cache(&purge_index_file, READ_CACHE, 0, false, false))) {
6061 LogErr(ERROR_LEVEL, ER_BINLOG_FAILED_TO_REINIT_REGISTER_FILE);
6062 goto err;
6063 }
6064
6065 for (;;) {
6066 size_t length;
6067
6068 if ((length = my_b_gets(&purge_index_file, log_info.log_file_name,
6069 FN_REFLEN)) <= 1) {
6070 if (purge_index_file.error) {
6071 error = purge_index_file.error;
6072 LogErr(ERROR_LEVEL, ER_BINLOG_FAILED_TO_READ_REGISTER_FILE, error);
6073 goto err;
6074 }
6075
6076 /* Reached EOF */
6077 break;
6078 }
6079
6080 /* Get rid of the trailing '\n' */
6081 log_info.log_file_name[length - 1] = 0;
6082
6083 if (!mysql_file_stat(m_key_file_log, log_info.log_file_name, &s, MYF(0))) {
6084 if (my_errno() == ENOENT) {
6085 /*
6086 It's not fatal if we can't stat a log file that does not exist;
6087 If we could not stat, we won't delete.
6088 */
6089 if (thd) {
6090 push_warning_printf(
6091 thd, Sql_condition::SL_WARNING, ER_LOG_PURGE_NO_FILE,
6092 ER_THD(thd, ER_LOG_PURGE_NO_FILE), log_info.log_file_name);
6093 }
6094 LogErr(INFORMATION_LEVEL, ER_CANT_STAT_FILE, log_info.log_file_name);
6095 set_my_errno(0);
6096 } else {
6097 /*
6098 Other than ENOENT are fatal
6099 */
6100 if (thd) {
6101 push_warning_printf(thd, Sql_condition::SL_WARNING,
6102 ER_BINLOG_PURGE_FATAL_ERR,
6103 "a problem with getting info on being purged %s; "
6104 "consider examining correspondence "
6105 "of your binlog index file "
6106 "to the actual binlog files",
6107 log_info.log_file_name);
6108 } else {
6109 LogErr(INFORMATION_LEVEL,
6110 ER_BINLOG_CANT_DELETE_LOG_FILE_DOES_INDEX_MATCH_FILES,
6111 log_info.log_file_name);
6112 }
6113 error = LOG_INFO_FATAL;
6114 goto err;
6115 }
6116 } else {
6117 if ((error = find_log_pos(&check_log_info, log_info.log_file_name,
6118 need_lock_index))) {
6119 if (error != LOG_INFO_EOF) {
6120 if (thd) {
6121 push_warning_printf(thd, Sql_condition::SL_WARNING,
6122 ER_BINLOG_PURGE_FATAL_ERR,
6123 "a problem with deleting %s and "
6124 "reading the binlog index file",
6125 log_info.log_file_name);
6126 } else {
6127 LogErr(INFORMATION_LEVEL,
6128 ER_BINLOG_CANT_DELETE_FILE_AND_READ_BINLOG_INDEX,
6129 log_info.log_file_name);
6130 }
6131 goto err;
6132 }
6133
6134 error = 0;
6135 if (!need_lock_index) {
6136 /*
6137 This is to avoid triggering an error in NDB.
6138
6139 @todo: This is weird, what does NDB errors have to do with
6140 need_lock_index? Explain better or refactor /Sven
6141 */
6142 ha_binlog_index_purge_file(current_thd, log_info.log_file_name);
6143 }
6144
6145 DBUG_PRINT("info", ("purging %s", log_info.log_file_name));
6146 if (!mysql_file_delete(key_file_binlog, log_info.log_file_name,
6147 MYF(0))) {
6148 DBUG_EXECUTE_IF("wait_in_purge_index_entry", {
6149 const char action[] =
6150 "now SIGNAL in_purge_index_entry WAIT_FOR go_ahead_sql";
6151 DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(action)));
6152 DBUG_SET("-d,wait_in_purge_index_entry");
6153 };);
6154
6155 if (decrease_log_space) *decrease_log_space -= s.st_size;
6156 } else {
6157 if (my_errno() == ENOENT) {
6158 if (thd) {
6159 push_warning_printf(
6160 thd, Sql_condition::SL_WARNING, ER_LOG_PURGE_NO_FILE,
6161 ER_THD(thd, ER_LOG_PURGE_NO_FILE), log_info.log_file_name);
6162 }
6163 LogErr(INFORMATION_LEVEL, ER_BINLOG_CANT_DELETE_FILE,
6164 log_info.log_file_name);
6165 set_my_errno(0);
6166 } else {
6167 if (thd) {
6168 push_warning_printf(thd, Sql_condition::SL_WARNING,
6169 ER_BINLOG_PURGE_FATAL_ERR,
6170 "a problem with deleting %s; "
6171 "consider examining correspondence "
6172 "of your binlog index file "
6173 "to the actual binlog files",
6174 log_info.log_file_name);
6175 } else {
6176 LogErr(INFORMATION_LEVEL,
6177 ER_BINLOG_CANT_DELETE_LOG_FILE_DOES_INDEX_MATCH_FILES,
6178 log_info.log_file_name);
6179 }
6180 if (my_errno() == EMFILE) {
6181 DBUG_PRINT("info", ("my_errno: %d, set ret = LOG_INFO_EMFILE",
6182 my_errno()));
6183 error = LOG_INFO_EMFILE;
6184 goto err;
6185 }
6186 error = LOG_INFO_FATAL;
6187 goto err;
6188 }
6189 }
6190 }
6191 }
6192 }
6193
6194 err:
6195 return error;
6196 }
6197
6198 /**
6199 Remove all logs before the given file date from disk and from the
6200 index file.
6201
6202 @param purge_time Delete all log files before given date.
6203 @param auto_purge True if this is an automatic purge.
6204
6205 @note
6206 If any of the logs before the deleted one is in use,
6207 only purge logs up to this one.
6208
6209 @retval
6210 0 ok
6211 @retval
6212 LOG_INFO_PURGE_NO_ROTATE Binary file that can't be rotated
6213 LOG_INFO_FATAL if any other than ENOENT error from
6214 mysql_file_stat() or mysql_file_delete()
6215 */
6216
purge_logs_before_date(time_t purge_time,bool auto_purge)6217 int MYSQL_BIN_LOG::purge_logs_before_date(time_t purge_time, bool auto_purge) {
6218 int error;
6219 int no_of_threads_locking_log = 0, no_of_log_files_purged = 0;
6220 bool log_is_active = false, log_is_in_use = false;
6221 char to_log[FN_REFLEN], copy_log_in_use[FN_REFLEN];
6222 LOG_INFO log_info;
6223 MY_STAT stat_area;
6224 THD *thd = current_thd;
6225
6226 DBUG_TRACE;
6227
6228 mysql_mutex_lock(&LOCK_index);
6229 to_log[0] = 0;
6230
6231 if ((error = find_log_pos(&log_info, NullS, false /*need_lock_index=false*/)))
6232 goto err;
6233
6234 while (!(log_is_active = is_active(log_info.log_file_name))) {
6235 if (!mysql_file_stat(m_key_file_log, log_info.log_file_name, &stat_area,
6236 MYF(0))) {
6237 if (my_errno() == ENOENT) {
6238 /*
6239 It's not fatal if we can't stat a log file that does not exist.
6240 */
6241 set_my_errno(0);
6242 } else {
6243 /*
6244 Other than ENOENT are fatal
6245 */
6246 if (thd) {
6247 push_warning_printf(thd, Sql_condition::SL_WARNING,
6248 ER_BINLOG_PURGE_FATAL_ERR,
6249 "a problem with getting info on being purged %s; "
6250 "consider examining correspondence "
6251 "of your binlog index file "
6252 "to the actual binlog files",
6253 log_info.log_file_name);
6254 } else {
6255 LogErr(INFORMATION_LEVEL, ER_BINLOG_FAILED_TO_DELETE_LOG_FILE,
6256 log_info.log_file_name);
6257 }
6258 error = LOG_INFO_FATAL;
6259 goto err;
6260 }
6261 }
6262 /* check if the binary log file is older than the purge_time
6263 if yes check if it is in use, if not in use then add
6264 it in the list of binary log files to be purged.
6265 */
6266 else if (stat_area.st_mtime < purge_time) {
6267 if ((no_of_threads_locking_log = log_in_use(log_info.log_file_name))) {
6268 if (!auto_purge) {
6269 log_is_in_use = true;
6270 strcpy(copy_log_in_use, log_info.log_file_name);
6271 }
6272 break;
6273 }
6274 strmake(to_log, log_info.log_file_name,
6275 sizeof(log_info.log_file_name) - 1);
6276 no_of_log_files_purged++;
6277 } else
6278 break;
6279 if (find_next_log(&log_info, false /*need_lock_index=false*/)) break;
6280 }
6281
6282 if (log_is_active) {
6283 if (!auto_purge)
6284 push_warning_printf(
6285 thd, Sql_condition::SL_WARNING, ER_WARN_PURGE_LOG_IS_ACTIVE,
6286 ER_THD(thd, ER_WARN_PURGE_LOG_IS_ACTIVE), log_info.log_file_name);
6287 }
6288
6289 if (log_is_in_use) {
6290 int no_of_log_files_to_purge = no_of_log_files_purged + 1;
6291 while (strcmp(log_file_name, log_info.log_file_name)) {
6292 if (mysql_file_stat(m_key_file_log, log_info.log_file_name, &stat_area,
6293 MYF(0))) {
6294 if (stat_area.st_mtime < purge_time)
6295 no_of_log_files_to_purge++;
6296 else
6297 break;
6298 }
6299 if (find_next_log(&log_info, false /*need_lock_index=false*/)) {
6300 no_of_log_files_to_purge++;
6301 break;
6302 }
6303 }
6304
6305 push_warning_printf(thd, Sql_condition::SL_WARNING,
6306 ER_WARN_PURGE_LOG_IN_USE,
6307 ER_THD(thd, ER_WARN_PURGE_LOG_IN_USE), copy_log_in_use,
6308 no_of_threads_locking_log, no_of_log_files_purged,
6309 no_of_log_files_to_purge);
6310 }
6311
6312 error = (to_log[0] ? purge_logs(to_log, true, false /*need_lock_index=false*/,
6313 true /*need_update_threads=true*/,
6314 (ulonglong *)nullptr, auto_purge)
6315 : 0);
6316
6317 err:
6318 mysql_mutex_unlock(&LOCK_index);
6319 return error;
6320 }
6321
6322 /**
6323 Create a new log file name.
6324
6325 @param[out] buf Buffer allocated with at least FN_REFLEN bytes where
6326 new name is stored.
6327 @param log_ident Identity of the binary/relay log.
6328
6329 @note
6330 If file name will be longer then FN_REFLEN it will be truncated
6331 */
6332
make_log_name(char * buf,const char * log_ident)6333 void MYSQL_BIN_LOG::make_log_name(char *buf, const char *log_ident) {
6334 size_t dir_len = dirname_length(log_file_name);
6335 if (dir_len >= FN_REFLEN) dir_len = FN_REFLEN - 1;
6336 my_stpnmov(buf, log_file_name, dir_len);
6337 strmake(buf + dir_len, log_ident, FN_REFLEN - dir_len - 1);
6338 }
6339
6340 /**
6341 Check if we are writing/reading to the given log file.
6342 */
6343
is_active(const char * log_file_name_arg)6344 bool MYSQL_BIN_LOG::is_active(const char *log_file_name_arg) {
6345 return !compare_log_name(log_file_name, log_file_name_arg);
6346 }
6347
inc_prep_xids(THD * thd)6348 void MYSQL_BIN_LOG::inc_prep_xids(THD *thd) {
6349 DBUG_TRACE;
6350 #ifndef DBUG_OFF
6351 int result = ++m_atomic_prep_xids;
6352 DBUG_PRINT("debug", ("m_atomic_prep_xids: %d", result));
6353 #else
6354 m_atomic_prep_xids++;
6355 #endif
6356 thd->get_transaction()->m_flags.xid_written = true;
6357 }
6358
dec_prep_xids(THD * thd)6359 void MYSQL_BIN_LOG::dec_prep_xids(THD *thd) {
6360 DBUG_TRACE;
6361 int32 result = --m_atomic_prep_xids;
6362 DBUG_PRINT("debug", ("m_atomic_prep_xids: %d", result));
6363 thd->get_transaction()->m_flags.xid_written = false;
6364 if (result == 0) {
6365 mysql_mutex_lock(&LOCK_xids);
6366 mysql_cond_signal(&m_prep_xids_cond);
6367 mysql_mutex_unlock(&LOCK_xids);
6368 }
6369 }
6370
6371 /*
6372 Wrappers around new_file_impl to avoid using argument
6373 to control locking. The argument 1) less readable 2) breaks
6374 incapsulation 3) allows external access to the class without
6375 a lock (which is not possible with private new_file_without_locking
6376 method).
6377
6378 @retval
6379 nonzero - error
6380
6381 */
6382
new_file(Format_description_log_event * extra_description_event)6383 int MYSQL_BIN_LOG::new_file(
6384 Format_description_log_event *extra_description_event) {
6385 return new_file_impl(true /*need_lock_log=true*/, extra_description_event);
6386 }
6387
6388 /*
6389 @retval
6390 nonzero - error
6391 */
new_file_without_locking(Format_description_log_event * extra_description_event)6392 int MYSQL_BIN_LOG::new_file_without_locking(
6393 Format_description_log_event *extra_description_event) {
6394 return new_file_impl(false /*need_lock_log=false*/, extra_description_event);
6395 }
6396
6397 /**
6398 Start writing to a new log file or reopen the old file.
6399
6400 @param need_lock_log If true, this function acquires LOCK_log;
6401 otherwise the caller should already have acquired it.
6402
6403 @param extra_description_event The master's FDE to be written by the I/O
6404 thread while creating a new relay log file. This should be NULL for
6405 binary log files.
6406
6407 @retval 0 success
6408 @retval nonzero - error
6409
6410 @note The new file name is stored last in the index file
6411 */
new_file_impl(bool need_lock_log,Format_description_log_event * extra_description_event)6412 int MYSQL_BIN_LOG::new_file_impl(
6413 bool need_lock_log, Format_description_log_event *extra_description_event) {
6414 int error = 0;
6415 bool close_on_error = false;
6416 char new_name[FN_REFLEN], *new_name_ptr = nullptr, *old_name, *file_to_open;
6417 const size_t ERR_CLOSE_MSG_LEN = 1024;
6418 char close_on_error_msg[ERR_CLOSE_MSG_LEN];
6419 memset(close_on_error_msg, 0, sizeof close_on_error_msg);
6420
6421 DBUG_TRACE;
6422 if (!is_open()) {
6423 DBUG_PRINT("info", ("log is closed"));
6424 return error;
6425 }
6426
6427 if (need_lock_log)
6428 mysql_mutex_lock(&LOCK_log);
6429 else
6430 mysql_mutex_assert_owner(&LOCK_log);
6431 DBUG_EXECUTE_IF("semi_sync_3-way_deadlock",
6432 DEBUG_SYNC(current_thd, "before_rotate_binlog"););
6433 mysql_mutex_lock(&LOCK_xids);
6434 /*
6435 We need to ensure that the number of prepared XIDs are 0.
6436
6437 If m_atomic_prep_xids is not zero:
6438 - We wait for storage engine commit, hence decrease m_atomic_prep_xids
6439 - We keep the LOCK_log to block new transactions from being
6440 written to the binary log.
6441 */
6442 while (get_prep_xids() > 0) {
6443 mysql_cond_wait(&m_prep_xids_cond, &LOCK_xids);
6444 }
6445 mysql_mutex_unlock(&LOCK_xids);
6446
6447 mysql_mutex_lock(&LOCK_index);
6448
6449 mysql_mutex_assert_owner(&LOCK_log);
6450 mysql_mutex_assert_owner(&LOCK_index);
6451
6452 if (DBUG_EVALUATE_IF("expire_logs_always", 0, 1) &&
6453 (error = ha_flush_logs())) {
6454 goto end;
6455 }
6456
6457 if (!is_relay_log) {
6458 /* Save set of GTIDs of the last binlog into table on binlog rotation */
6459 if ((error = gtid_state->save_gtids_of_last_binlog_into_table())) {
6460 if (error == ER_RPL_GTID_TABLE_CANNOT_OPEN) {
6461 close_on_error =
6462 m_binlog_file->get_real_file_size() >=
6463 static_cast<my_off_t>(max_size) ||
6464 DBUG_EVALUATE_IF("simulate_max_binlog_size", true, false);
6465
6466 if (!close_on_error) {
6467 LogErr(ERROR_LEVEL, ER_BINLOG_UNABLE_TO_ROTATE_GTID_TABLE_READONLY,
6468 "Current binlog file was flushed to disk and will be kept in "
6469 "use.");
6470 } else {
6471 snprintf(close_on_error_msg, sizeof close_on_error_msg,
6472 ER_THD(current_thd, ER_RPL_GTID_TABLE_CANNOT_OPEN), "mysql",
6473 "gtid_executed");
6474
6475 if (binlog_error_action != ABORT_SERVER)
6476 LogErr(WARNING_LEVEL,
6477 ER_BINLOG_UNABLE_TO_ROTATE_GTID_TABLE_READONLY,
6478 "Binary logging going to be disabled.");
6479 }
6480
6481 DBUG_EXECUTE_IF("gtid_executed_readonly",
6482 { DBUG_SET("-d,gtid_executed_readonly"); });
6483 DBUG_EXECUTE_IF("simulate_max_binlog_size",
6484 { DBUG_SET("-d,simulate_max_binlog_size"); });
6485 } else {
6486 close_on_error = true;
6487 snprintf(close_on_error_msg, sizeof close_on_error_msg, "%s",
6488 ER_THD(current_thd, ER_OOM_SAVE_GTIDS));
6489 }
6490 goto end;
6491 }
6492 }
6493
6494 /*
6495 If user hasn't specified an extension, generate a new log name
6496 We have to do this here and not in open as we want to store the
6497 new file name in the current binary log file.
6498 */
6499 new_name_ptr = new_name;
6500 if ((error = generate_new_name(new_name, name))) {
6501 // Use the old name if generation of new name fails.
6502 strcpy(new_name, name);
6503 close_on_error = true;
6504 snprintf(close_on_error_msg, sizeof close_on_error_msg,
6505 ER_THD(current_thd, ER_NO_UNIQUE_LOGFILE), name);
6506 if (strlen(close_on_error_msg)) {
6507 close_on_error_msg[strlen(close_on_error_msg) - 1] = '\0';
6508 }
6509 goto end;
6510 }
6511
6512 /*
6513 Make sure that the log_file is initialized before writing
6514 Rotate_log_event into it.
6515 */
6516 if (m_binlog_file->is_open()) {
6517 /*
6518 We log the whole file name for log file as the user may decide
6519 to change base names at some point.
6520 */
6521 Rotate_log_event r(new_name + dirname_length(new_name), 0, LOG_EVENT_OFFSET,
6522 is_relay_log ? Rotate_log_event::RELAY_LOG : 0);
6523
6524 if (DBUG_EVALUATE_IF("fault_injection_new_file_rotate_event", (error = 1),
6525 false) ||
6526 (error = write_event_to_binlog(&r))) {
6527 char errbuf[MYSYS_STRERROR_SIZE];
6528 DBUG_EXECUTE_IF("fault_injection_new_file_rotate_event", errno = 2;);
6529 close_on_error = true;
6530 snprintf(close_on_error_msg, sizeof close_on_error_msg,
6531 ER_THD(current_thd, ER_ERROR_ON_WRITE), name, errno,
6532 my_strerror(errbuf, sizeof(errbuf), errno));
6533 my_printf_error(ER_ERROR_ON_WRITE, ER_THD(current_thd, ER_ERROR_ON_WRITE),
6534 MYF(ME_FATALERROR), name, errno,
6535 my_strerror(errbuf, sizeof(errbuf), errno));
6536 goto end;
6537 }
6538
6539 if ((error = m_binlog_file->flush())) {
6540 close_on_error = true;
6541 snprintf(close_on_error_msg, sizeof close_on_error_msg, "%s",
6542 "Either disk is full or file system is read only");
6543 goto end;
6544 }
6545 }
6546
6547 DEBUG_SYNC(current_thd, "after_rotate_event_appended");
6548
6549 old_name = name;
6550 name = nullptr; // Don't free name
6551 close(LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX, false /*need_lock_log=false*/,
6552 false /*need_lock_index=false*/);
6553
6554 if (checksum_alg_reset != binary_log::BINLOG_CHECKSUM_ALG_UNDEF) {
6555 DBUG_ASSERT(!is_relay_log);
6556 DBUG_ASSERT(binlog_checksum_options != checksum_alg_reset);
6557 binlog_checksum_options = checksum_alg_reset;
6558 }
6559 /*
6560 Note that at this point, atomic_log_state != LOG_CLOSED
6561 (important for is_open()).
6562 */
6563
6564 DEBUG_SYNC(current_thd, "binlog_rotate_between_close_and_open");
6565 /*
6566 new_file() is only used for rotation (in FLUSH LOGS or because size >
6567 max_binlog_size or max_relay_log_size).
6568 If this is a binary log, the Format_description_log_event at the beginning
6569 of the new file should have created=0 (to distinguish with the
6570 Format_description_log_event written at server startup, which should
6571 trigger temp tables deletion on slaves.
6572 */
6573
6574 /* reopen index binlog file, BUG#34582 */
6575 file_to_open = index_file_name;
6576 error = open_index_file(index_file_name, nullptr,
6577 false /*need_lock_index=false*/);
6578 if (!error) {
6579 /* reopen the binary log file. */
6580 file_to_open = new_name_ptr;
6581 error = open_binlog(old_name, new_name_ptr, max_size,
6582 true /*null_created_arg=true*/,
6583 false /*need_lock_index=false*/,
6584 true /*need_sid_lock=true*/, extra_description_event);
6585 }
6586
6587 /* handle reopening errors */
6588 if (error) {
6589 char errbuf[MYSYS_STRERROR_SIZE];
6590 my_printf_error(ER_CANT_OPEN_FILE, ER_THD(current_thd, ER_CANT_OPEN_FILE),
6591 MYF(ME_FATALERROR), file_to_open, error,
6592 my_strerror(errbuf, sizeof(errbuf), error));
6593 close_on_error = true;
6594 snprintf(close_on_error_msg, sizeof close_on_error_msg,
6595 ER_THD(current_thd, ER_CANT_OPEN_FILE), file_to_open, error,
6596 my_strerror(errbuf, sizeof(errbuf), error));
6597 }
6598 my_free(old_name);
6599
6600 end:
6601
6602 if (error && close_on_error /* rotate, flush or reopen failed */) {
6603 /*
6604 Close whatever was left opened.
6605
6606 We are keeping the behavior as it exists today, ie,
6607 we disable logging and move on (see: BUG#51014).
6608
6609 TODO: as part of WL#1790 consider other approaches:
6610 - kill mysql (safety);
6611 - try multiple locations for opening a log file;
6612 - switch server to protected/readonly mode
6613 - ...
6614 */
6615 if (binlog_error_action == ABORT_SERVER) {
6616 char abort_msg[ERR_CLOSE_MSG_LEN + 48];
6617 memset(abort_msg, 0, sizeof abort_msg);
6618 snprintf(abort_msg, sizeof abort_msg,
6619 "%s, while rotating the binlog. "
6620 "Aborting the server",
6621 close_on_error_msg);
6622 exec_binlog_error_action_abort(abort_msg);
6623 } else
6624 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_OPEN_FOR_LOGGING,
6625 new_name_ptr != nullptr ? new_name_ptr : "new file", errno);
6626
6627 close(LOG_CLOSE_INDEX, false /*need_lock_log=false*/,
6628 false /*need_lock_index=false*/);
6629 }
6630
6631 mysql_mutex_unlock(&LOCK_index);
6632 if (need_lock_log) mysql_mutex_unlock(&LOCK_log);
6633
6634 DEBUG_SYNC(current_thd, "after_disable_binlog");
6635 return error;
6636 }
6637
6638 /**
6639 Called after an event has been written to the relay log by the IO
6640 thread. This flushes and possibly syncs the file (according to the
6641 sync options), rotates the file if it has grown over the limit, and
6642 finally calls signal_update().
6643
6644 @note The caller must hold LOCK_log before invoking this function.
6645
6646 @param mi Master_info for the IO thread.
6647
6648 @retval false success
6649 @retval true error
6650 */
after_write_to_relay_log(Master_info * mi)6651 bool MYSQL_BIN_LOG::after_write_to_relay_log(Master_info *mi) {
6652 DBUG_TRACE;
6653 DBUG_PRINT("info", ("max_size: %lu", max_size));
6654
6655 // Check pre-conditions
6656 mysql_mutex_assert_owner(&LOCK_log);
6657 DBUG_ASSERT(is_relay_log);
6658
6659 /*
6660 We allow the relay log rotation by relay log size
6661 only if the trx parser is not inside a transaction.
6662 */
6663 bool can_rotate = mi->transaction_parser.is_not_inside_transaction();
6664
6665 #ifndef DBUG_OFF
6666 if (m_binlog_file->get_real_file_size() >
6667 DBUG_EVALUATE_IF("rotate_slave_debug_group", 500, max_size) &&
6668 !can_rotate) {
6669 DBUG_PRINT("info", ("Postponing the rotation by size waiting for "
6670 "the end of the current transaction."));
6671 }
6672 #endif
6673
6674 // Flush and sync
6675 bool error = flush_and_sync(false);
6676 if (error) {
6677 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
6678 ER_THD(current_thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
6679 "failed to flush event to relay log file");
6680 truncate_relaylog_file(mi, atomic_binlog_end_pos);
6681 } else {
6682 if (can_rotate) {
6683 mysql_mutex_lock(&mi->data_lock);
6684 /*
6685 If the last event of the transaction has been flushed, we can add
6686 the GTID (if it is not empty) to the logged set, or else it will
6687 not be available in the Previous GTIDs of the next relay log file
6688 if we are going to rotate the relay log.
6689 */
6690 const Gtid *last_gtid_queued = mi->get_queueing_trx_gtid();
6691 if (!last_gtid_queued->is_empty()) {
6692 mi->rli->get_sid_lock()->rdlock();
6693 DBUG_SIGNAL_WAIT_FOR(current_thd, "updating_received_transaction_set",
6694 "reached_updating_received_transaction_set",
6695 "continue_updating_received_transaction_set");
6696 mi->rli->add_logged_gtid(last_gtid_queued->sidno,
6697 last_gtid_queued->gno);
6698 mi->rli->get_sid_lock()->unlock();
6699 }
6700
6701 if (mi->is_queueing_trx()) {
6702 mi->finished_queueing();
6703
6704 Trx_monitoring_info processing;
6705 Trx_monitoring_info last;
6706 mi->get_gtid_monitoring_info()->copy_info_to(&processing, &last);
6707
6708 // update the compression information
6709 binlog::global_context.monitoring_context()
6710 .transaction_compression()
6711 .update(binlog::monitoring::log_type::RELAY, last.compression_type,
6712 last.gtid, last.end_time, last.compressed_bytes,
6713 last.uncompressed_bytes,
6714 mi->rli->get_gtid_set()->get_sid_map());
6715 }
6716 mysql_mutex_unlock(&mi->data_lock);
6717
6718 /*
6719 If relay log is too big, rotate. But only if not in the middle of a
6720 transaction when GTIDs are enabled.
6721
6722 Also rotate, if a deffered flush request has been placed.
6723
6724 We now try to mimic the following master binlog behavior: "A transaction
6725 is written in one chunk to the binary log, so it is never split between
6726 several binary logs. Therefore, if you have big transactions, you might
6727 see binary log files larger than max_binlog_size."
6728 */
6729 if (m_binlog_file->get_real_file_size() >
6730 DBUG_EVALUATE_IF("rotate_slave_debug_group", 500, max_size) ||
6731 mi->is_rotate_requested()) {
6732 error = new_file_without_locking(mi->get_mi_description_event());
6733 mi->clear_rotate_requests();
6734 }
6735 }
6736 }
6737
6738 lock_binlog_end_pos();
6739 mi->rli->ign_master_log_name_end[0] = 0;
6740 update_binlog_end_pos(false /*need_lock*/);
6741 harvest_bytes_written(mi->rli, true /*need_log_space_lock=true*/);
6742 unlock_binlog_end_pos();
6743
6744 return error;
6745 }
6746
write_event(Log_event * ev,Master_info * mi)6747 bool MYSQL_BIN_LOG::write_event(Log_event *ev, Master_info *mi) {
6748 DBUG_TRACE;
6749
6750 DBUG_EXECUTE_IF("fail_to_write_ignored_event_to_relay_log", { return true; });
6751 // check preconditions
6752 DBUG_ASSERT(is_relay_log);
6753
6754 mysql_mutex_assert_owner(&LOCK_log);
6755
6756 // write data
6757 bool error = false;
6758 if (!binary_event_serialize(ev, m_binlog_file)) {
6759 bytes_written += ev->common_header->data_written;
6760 error = after_write_to_relay_log(mi);
6761 } else {
6762 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
6763 ER_THD(current_thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
6764 "failed to write event to the relay log file");
6765 truncate_relaylog_file(mi, atomic_binlog_end_pos);
6766 error = true;
6767 }
6768
6769 return error;
6770 }
6771
write_buffer(const char * buf,uint len,Master_info * mi)6772 bool MYSQL_BIN_LOG::write_buffer(const char *buf, uint len, Master_info *mi) {
6773 DBUG_TRACE;
6774
6775 // check preconditions
6776 DBUG_ASSERT(is_relay_log);
6777 mysql_mutex_assert_owner(&LOCK_log);
6778
6779 // write data
6780 bool error = false;
6781 if (m_binlog_file->write(pointer_cast<const uchar *>(buf), len) == 0) {
6782 bytes_written += len;
6783 error = after_write_to_relay_log(mi);
6784 } else {
6785 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
6786 ER_THD(current_thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
6787 "failed to write event to the relay log file");
6788 truncate_relaylog_file(mi, atomic_binlog_end_pos);
6789 error = true;
6790 }
6791
6792 return error;
6793 }
6794
flush()6795 bool MYSQL_BIN_LOG::flush() {
6796 return m_binlog_file->is_open() && m_binlog_file->flush();
6797 }
6798
flush_and_sync(const bool force)6799 bool MYSQL_BIN_LOG::flush_and_sync(const bool force) {
6800 mysql_mutex_assert_owner(&LOCK_log);
6801
6802 if (m_binlog_file->flush()) return true;
6803
6804 std::pair<bool, bool> result = sync_binlog_file(force);
6805
6806 return result.first;
6807 }
6808
start_union_events(THD * thd,query_id_t query_id_param)6809 void MYSQL_BIN_LOG::start_union_events(THD *thd, query_id_t query_id_param) {
6810 DBUG_ASSERT(!thd->binlog_evt_union.do_union);
6811 thd->binlog_evt_union.do_union = true;
6812 thd->binlog_evt_union.unioned_events = false;
6813 thd->binlog_evt_union.unioned_events_trans = false;
6814 thd->binlog_evt_union.first_query_id = query_id_param;
6815 }
6816
stop_union_events(THD * thd)6817 void MYSQL_BIN_LOG::stop_union_events(THD *thd) {
6818 DBUG_ASSERT(thd->binlog_evt_union.do_union);
6819 thd->binlog_evt_union.do_union = false;
6820 }
6821
is_query_in_union(THD * thd,query_id_t query_id_param)6822 bool MYSQL_BIN_LOG::is_query_in_union(THD *thd, query_id_t query_id_param) {
6823 return (thd->binlog_evt_union.do_union &&
6824 query_id_param >= thd->binlog_evt_union.first_query_id);
6825 }
6826
6827 /*
6828 Updates thd's position-of-next-event variables
6829 after a *real* write a file.
6830 */
update_thd_next_event_pos(THD * thd)6831 void MYSQL_BIN_LOG::update_thd_next_event_pos(THD *thd) {
6832 if (likely(thd != nullptr)) {
6833 thd->set_next_event_pos(log_file_name, m_binlog_file->position());
6834 }
6835 }
6836
6837 /*
6838 Moves the last bunch of rows from the pending Rows event to a cache (either
6839 transactional cache if is_transaction is @c true, or the non-transactional
6840 cache otherwise. Sets a new pending event.
6841
6842 @param thd a pointer to the user thread.
6843 @param evt a pointer to the row event.
6844 @param is_transactional @c true indicates a transactional cache,
6845 otherwise @c false a non-transactional.
6846 */
flush_and_set_pending_rows_event(THD * thd,Rows_log_event * event,bool is_transactional)6847 int MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
6848 Rows_log_event *event,
6849 bool is_transactional) {
6850 DBUG_TRACE;
6851 DBUG_ASSERT(mysql_bin_log.is_open());
6852 DBUG_PRINT("enter", ("event: %p", event));
6853
6854 int error = 0;
6855 binlog_cache_mngr *const cache_mngr = thd_get_cache_mngr(thd);
6856
6857 DBUG_ASSERT(cache_mngr);
6858
6859 binlog_cache_data *cache_data =
6860 cache_mngr->get_binlog_cache_data(is_transactional);
6861
6862 DBUG_PRINT("info", ("cache_mngr->pending(): %p", cache_data->pending()));
6863
6864 if (Rows_log_event *pending = cache_data->pending()) {
6865 /*
6866 Write pending event to the cache.
6867 */
6868 if (cache_data->write_event(pending)) {
6869 report_cache_write_error(thd, is_transactional);
6870 if (check_write_error(thd) && cache_data &&
6871 stmt_cannot_safely_rollback(thd))
6872 cache_data->set_incident();
6873 delete pending;
6874 cache_data->set_pending(nullptr);
6875 return 1;
6876 }
6877
6878 delete pending;
6879 }
6880
6881 cache_data->set_pending(event);
6882
6883 return error;
6884 }
6885
6886 /**
6887 Write an event to the binary log cache.
6888 */
6889
write_event(Log_event * event_info)6890 bool MYSQL_BIN_LOG::write_event(Log_event *event_info) {
6891 THD *thd = event_info->thd;
6892 bool error = true;
6893 DBUG_TRACE;
6894
6895 if (thd->binlog_evt_union.do_union) {
6896 /*
6897 In Stored function; Remember that function call caused an update.
6898 We will log the function call to the binary log on function exit
6899 */
6900 thd->binlog_evt_union.unioned_events = true;
6901 thd->binlog_evt_union.unioned_events_trans |=
6902 event_info->is_using_trans_cache();
6903 return false;
6904 }
6905
6906 /*
6907 We only end the statement if we are in a top-level statement. If
6908 we are inside a stored function, we do not end the statement since
6909 this will close all tables on the slave. But there can be a special case
6910 where we are inside a stored function/trigger and a SAVEPOINT is being
6911 set in side the stored function/trigger. This SAVEPOINT execution will
6912 force the pending event to be flushed without an STMT_END_F flag. This
6913 will result in a case where following DMLs will be considered as part of
6914 same statement and result in data loss on slave. Hence in this case we
6915 force the end_stmt to be true.
6916 */
6917 bool const end_stmt =
6918 (thd->in_sub_stmt && thd->lex->sql_command == SQLCOM_SAVEPOINT)
6919 ? true
6920 : (thd->locked_tables_mode && thd->lex->requires_prelocking());
6921 if (thd->binlog_flush_pending_rows_event(end_stmt,
6922 event_info->is_using_trans_cache()))
6923 return error;
6924
6925 /*
6926 In most cases this is only called if 'is_open()' is true; in fact this is
6927 mostly called if is_open() *was* true a few instructions before, but it
6928 could have changed since.
6929 */
6930 if (likely(is_open())) {
6931 /*
6932 In the future we need to add to the following if tests like
6933 "do the involved tables match (to be implemented)
6934 binlog_[wild_]{do|ignore}_table?" (WL#1049)"
6935 */
6936 const char *local_db = event_info->get_db();
6937 if ((thd && !(thd->variables.option_bits & OPTION_BIN_LOG)) ||
6938 (thd->lex->sql_command != SQLCOM_ROLLBACK_TO_SAVEPOINT &&
6939 thd->lex->sql_command != SQLCOM_SAVEPOINT &&
6940 (!event_info->is_no_filter_event() &&
6941 !binlog_filter->db_ok(local_db))))
6942 return false;
6943
6944 DBUG_ASSERT(event_info->is_using_trans_cache() ||
6945 event_info->is_using_stmt_cache());
6946
6947 if (binlog_start_trans_and_stmt(thd, event_info)) return error;
6948
6949 bool is_trans_cache = event_info->is_using_trans_cache();
6950 binlog_cache_mngr *cache_mngr = thd_get_cache_mngr(thd);
6951 binlog_cache_data *cache_data =
6952 cache_mngr->get_binlog_cache_data(is_trans_cache);
6953
6954 DBUG_PRINT("info", ("event type: %d", event_info->get_type_code()));
6955
6956 /*
6957 No check for auto events flag here - this write method should
6958 never be called if auto-events are enabled.
6959
6960 Write first log events which describe the 'run environment'
6961 of the SQL command. If row-based binlogging, Insert_id, Rand
6962 and other kind of "setting context" events are not needed.
6963 */
6964 if (thd) {
6965 if (!thd->is_current_stmt_binlog_format_row()) {
6966 if (thd->stmt_depends_on_first_successful_insert_id_in_prev_stmt) {
6967 Intvar_log_event e(
6968 thd, (uchar)binary_log::Intvar_event::LAST_INSERT_ID_EVENT,
6969 thd->first_successful_insert_id_in_prev_stmt_for_binlog,
6970 event_info->event_cache_type, event_info->event_logging_type);
6971 if (cache_data->write_event(&e)) goto err;
6972 }
6973 if (thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements() > 0) {
6974 DBUG_PRINT(
6975 "info",
6976 ("number of auto_inc intervals: %u",
6977 thd->auto_inc_intervals_in_cur_stmt_for_binlog.nb_elements()));
6978 Intvar_log_event e(
6979 thd, (uchar)binary_log::Intvar_event::INSERT_ID_EVENT,
6980 thd->auto_inc_intervals_in_cur_stmt_for_binlog.minimum(),
6981 event_info->event_cache_type, event_info->event_logging_type);
6982 if (cache_data->write_event(&e)) goto err;
6983 }
6984 if (thd->rand_used) {
6985 Rand_log_event e(thd, thd->rand_saved_seed1, thd->rand_saved_seed2,
6986 event_info->event_cache_type,
6987 event_info->event_logging_type);
6988 if (cache_data->write_event(&e)) goto err;
6989 }
6990 if (!thd->user_var_events.empty()) {
6991 for (size_t i = 0; i < thd->user_var_events.size(); i++) {
6992 Binlog_user_var_event *user_var_event = thd->user_var_events[i];
6993
6994 /* setting flags for user var log event */
6995 uchar flags = User_var_log_event::UNDEF_F;
6996 if (user_var_event->unsigned_flag)
6997 flags |= User_var_log_event::UNSIGNED_F;
6998
6999 User_var_log_event e(
7000 thd, user_var_event->user_var_event->entry_name.ptr(),
7001 user_var_event->user_var_event->entry_name.length(),
7002 user_var_event->value, user_var_event->length,
7003 user_var_event->type, user_var_event->charset_number, flags,
7004 event_info->event_cache_type, event_info->event_logging_type);
7005 if (cache_data->write_event(&e)) goto err;
7006 }
7007 }
7008 }
7009 }
7010
7011 /*
7012 Write the event.
7013 */
7014 if (cache_data->write_event(event_info)) goto err;
7015
7016 if (DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0)) goto err;
7017
7018 /*
7019 After writing the event, if the trx-cache was used and any unsafe
7020 change was written into it, the cache is marked as cannot safely
7021 roll back.
7022 */
7023 if (is_trans_cache && stmt_cannot_safely_rollback(thd))
7024 cache_mngr->trx_cache.set_cannot_rollback();
7025
7026 error = false;
7027
7028 err:
7029 if (error) {
7030 report_cache_write_error(thd, is_trans_cache);
7031 if (check_write_error(thd) && cache_data &&
7032 stmt_cannot_safely_rollback(thd))
7033 cache_data->set_incident();
7034 }
7035 }
7036
7037 return error;
7038 }
7039
7040 /**
7041 The method executes rotation when LOCK_log is already acquired
7042 by the caller.
7043
7044 @param force_rotate caller can request the log rotation
7045 @param check_purge is set to true if rotation took place
7046
7047 @note
7048 If rotation fails, for instance the server was unable
7049 to create a new log file, we still try to write an
7050 incident event to the current log.
7051
7052 @note The caller must hold LOCK_log when invoking this function.
7053
7054 @retval
7055 nonzero - error in rotating routine.
7056 */
rotate(bool force_rotate,bool * check_purge)7057 int MYSQL_BIN_LOG::rotate(bool force_rotate, bool *check_purge) {
7058 int error = 0;
7059 DBUG_TRACE;
7060
7061 DBUG_ASSERT(!is_relay_log);
7062 mysql_mutex_assert_owner(&LOCK_log);
7063
7064 *check_purge = false;
7065
7066 if (DBUG_EVALUATE_IF("force_rotate", 1, 0) || force_rotate ||
7067 (m_binlog_file->get_real_file_size() >= (my_off_t)max_size) ||
7068 DBUG_EVALUATE_IF("simulate_max_binlog_size", true, false)) {
7069 error = new_file_without_locking(nullptr);
7070 *check_purge = true;
7071 }
7072 return error;
7073 }
7074
7075 /**
7076 The method executes logs purging routine.
7077 */
purge()7078 void MYSQL_BIN_LOG::purge() {
7079 if (expire_logs_days || binlog_expire_logs_seconds) {
7080 DEBUG_SYNC(current_thd, "at_purge_logs_before_date");
7081 time_t purge_time = 0;
7082
7083 if (binlog_expire_logs_seconds) {
7084 purge_time = my_time(0) - binlog_expire_logs_seconds;
7085 } else
7086 purge_time = my_time(0) - expire_logs_days * 24 * 60 * 60;
7087
7088 DBUG_EXECUTE_IF("expire_logs_always", { purge_time = my_time(0); });
7089 if (purge_time >= 0) {
7090 Is_instance_backup_locked_result is_instance_locked =
7091 is_instance_backup_locked(current_thd);
7092
7093 if (is_instance_locked == Is_instance_backup_locked_result::OOM) {
7094 exec_binlog_error_action_abort(
7095 "Out of memory happened while checking if "
7096 "instance was locked for backup");
7097 }
7098 if (is_instance_locked == Is_instance_backup_locked_result::NOT_LOCKED) {
7099 /*
7100 Flush logs for storage engines, so that the last transaction
7101 is persisted inside storage engines.
7102 */
7103 ha_flush_logs();
7104 purge_logs_before_date(purge_time, true);
7105 }
7106 }
7107 }
7108 }
7109
7110 /**
7111 Execute a FLUSH LOGS statement.
7112
7113 The method is a shortcut of @c rotate() and @c purge().
7114 LOCK_log is acquired prior to rotate and is released after it.
7115
7116 @param thd Current session.
7117 @param force_rotate caller can request the log rotation
7118
7119 @retval
7120 nonzero - error in rotating routine.
7121 */
rotate_and_purge(THD * thd,bool force_rotate)7122 int MYSQL_BIN_LOG::rotate_and_purge(THD *thd, bool force_rotate) {
7123 int error = 0;
7124 DBUG_TRACE;
7125 bool check_purge = false;
7126
7127 /*
7128 FLUSH BINARY LOGS command should ignore 'read-only' and 'super_read_only'
7129 options so that it can update 'mysql.gtid_executed' replication repository
7130 table.
7131 */
7132 thd->set_skip_readonly_check();
7133 /*
7134 Wait for handlerton to insert any pending information into the binlog.
7135 For e.g. ha_ndbcluster which updates the binlog asynchronously this is
7136 needed so that the user see its own commands in the binlog.
7137 */
7138 ha_binlog_wait(thd);
7139
7140 DBUG_ASSERT(!is_relay_log);
7141 mysql_mutex_lock(&LOCK_log);
7142 error = rotate(force_rotate, &check_purge);
7143 /*
7144 NOTE: Run purge_logs wo/ holding LOCK_log because it does not need
7145 the mutex. Otherwise causes various deadlocks.
7146 */
7147 mysql_mutex_unlock(&LOCK_log);
7148
7149 if (!error && check_purge) purge();
7150
7151 return error;
7152 }
7153
next_file_id()7154 uint MYSQL_BIN_LOG::next_file_id() {
7155 uint res;
7156 mysql_mutex_lock(&LOCK_log);
7157 res = file_id++;
7158 mysql_mutex_unlock(&LOCK_log);
7159 return res;
7160 }
7161
get_gtid_executed(Sid_map * sid_map,Gtid_set * gtid_set)7162 int MYSQL_BIN_LOG::get_gtid_executed(Sid_map *sid_map, Gtid_set *gtid_set) {
7163 DBUG_TRACE;
7164 int error = 0;
7165
7166 mysql_mutex_lock(&mysql_bin_log.LOCK_commit);
7167 global_sid_lock->wrlock();
7168
7169 enum_return_status return_status = global_sid_map->copy(sid_map);
7170 if (return_status != RETURN_STATUS_OK) {
7171 error = 1;
7172 goto end;
7173 }
7174
7175 return_status = gtid_set->add_gtid_set(gtid_state->get_executed_gtids());
7176 if (return_status != RETURN_STATUS_OK) error = 1;
7177
7178 end:
7179 global_sid_lock->unlock();
7180 mysql_mutex_unlock(&mysql_bin_log.LOCK_commit);
7181
7182 return error;
7183 }
7184
7185 /**
7186 Write the contents of the given IO_CACHE to the binary log.
7187
7188 The cache will be reset as a READ_CACHE to be able to read the
7189 contents from it.
7190
7191 The data will be post-processed: see class Binlog_event_writer for
7192 details.
7193
7194 @param cache Events will be read from this IO_CACHE.
7195 @param writer Events will be written to this Binlog_event_writer.
7196
7197 @retval true IO error.
7198 @retval false Success.
7199
7200 @see MYSQL_BIN_LOG::write_cache
7201 */
do_write_cache(Binlog_cache_storage * cache,Binlog_event_writer * writer)7202 bool MYSQL_BIN_LOG::do_write_cache(Binlog_cache_storage *cache,
7203 Binlog_event_writer *writer) {
7204 DBUG_TRACE;
7205
7206 DBUG_EXECUTE_IF("simulate_do_write_cache_failure", {
7207 /*
7208 see binlog_cache_data::write_event() that reacts on
7209 @c simulate_disk_full_at_flush_pending.
7210 */
7211 DBUG_SET("-d,simulate_do_write_cache_failure");
7212 return true;
7213 });
7214
7215 #ifndef DBUG_OFF
7216 uint64 expected_total_len = cache->length();
7217 DBUG_PRINT("info", ("bytes in cache= %" PRIu64, expected_total_len));
7218 #endif
7219
7220 bool error = false;
7221 if (cache->copy_to(writer, &error)) {
7222 if (error) report_binlog_write_error();
7223 return true;
7224 }
7225 return false;
7226 }
7227
7228 /**
7229 Writes an incident event to stmt_cache.
7230
7231 @param ev Incident event to be written
7232 @param thd Thread variable
7233 @param need_lock_log If true, will acquire LOCK_log; otherwise the
7234 caller should already have acquired LOCK_log.
7235 @param err_msg Error message written to log file for the incident.
7236 @param do_flush_and_sync If true, will call flush_and_sync(), rotate() and
7237 purge().
7238
7239 @retval false error
7240 @retval true success
7241 */
write_incident(Incident_log_event * ev,THD * thd,bool need_lock_log,const char * err_msg,bool do_flush_and_sync)7242 bool MYSQL_BIN_LOG::write_incident(Incident_log_event *ev, THD *thd,
7243 bool need_lock_log, const char *err_msg,
7244 bool do_flush_and_sync) {
7245 uint error = 0;
7246 DBUG_TRACE;
7247 DBUG_ASSERT(err_msg);
7248
7249 if (!is_open()) return error;
7250
7251 binlog_cache_mngr *cache_mngr = thd_get_cache_mngr(thd);
7252
7253 /*
7254 thd->cache_mngr may be uninitialized when first transaction resulted in an
7255 incident. If there is no cache manager exists for the session, then we
7256 create one, so that a GTID is generated and is written prior to flushing
7257 the stmt_cache.
7258 */
7259 if (cache_mngr == NULL ||
7260 DBUG_EVALUATE_IF("simulate_cache_creation_failure", 1, 0)) {
7261 if (thd->binlog_setup_trx_data() ||
7262 DBUG_EVALUATE_IF("simulate_cache_creation_failure", 1, 0)) {
7263 auto gtid_mode = global_gtid_mode.get();
7264 if (gtid_mode == Gtid_mode::ON || gtid_mode == Gtid_mode::ON_PERMISSIVE) {
7265 std::ostringstream message;
7266
7267 message << "Could not create IO cache while writing an incident event "
7268 "to the binary log. Since GTID_MODE = "
7269 << gtid_mode
7270 << ", server is unable to proceed with logging. Query: '";
7271 /**
7272 The reason for the error may be that the query was
7273 huge. Better cut it to not run into resource problems.
7274 */
7275 message.write(thd->query().str, MYSQL_ERRMSG_SIZE);
7276 message << "'.";
7277
7278 handle_binlog_flush_or_sync_error(thd, true, message.str().c_str());
7279 return true;
7280 }
7281 } else
7282 cache_mngr = thd_get_cache_mngr(thd);
7283 }
7284
7285 #ifndef DBUG_OFF
7286 if (DBUG_EVALUATE_IF("simulate_write_incident_event_into_binlog_directly", 1,
7287 0) &&
7288 !cache_mngr->stmt_cache.is_binlog_empty()) {
7289 /* The stmt_cache contains corruption data, so we can reset it. */
7290 cache_mngr->stmt_cache.reset();
7291 }
7292 #endif
7293
7294 /*
7295 If there is no binlog cache then we write incidents directly
7296 into the binlog. If caller needs GTIDs it has to setup the
7297 binlog cache (for the injector thread).
7298 */
7299 if (cache_mngr == nullptr ||
7300 DBUG_EVALUATE_IF("simulate_write_incident_event_into_binlog_directly", 1,
7301 0)) {
7302 if (need_lock_log)
7303 mysql_mutex_lock(&LOCK_log);
7304 else
7305 mysql_mutex_assert_owner(&LOCK_log);
7306 /* Write an incident event into binlog directly. */
7307 error = write_event_to_binlog(ev);
7308 /*
7309 Write an error to log. So that user might have a chance
7310 to be alerted and explore incident details.
7311 */
7312 if (!error)
7313 LogErr(ERROR_LEVEL, ER_BINLOG_LOGGING_INCIDENT_TO_STOP_SLAVES, err_msg);
7314 } else // (cache_mngr != NULL)
7315 {
7316 if (!cache_mngr->stmt_cache.is_binlog_empty()) {
7317 /* The stmt_cache contains corruption data, so we can reset it. */
7318 cache_mngr->stmt_cache.reset();
7319 }
7320 if (!cache_mngr->trx_cache.is_binlog_empty()) {
7321 /* The trx_cache contains corruption data, so we can reset it. */
7322 cache_mngr->trx_cache.reset();
7323 }
7324 /*
7325 Write the incident event into stmt_cache, so that a GTID is generated and
7326 written for it prior to flushing the stmt_cache.
7327 */
7328 binlog_cache_data *cache_data = cache_mngr->get_binlog_cache_data(false);
7329 if ((error = cache_data->write_event(ev))) {
7330 LogErr(ERROR_LEVEL, ER_BINLOG_EVENT_WRITE_TO_STMT_CACHE_FAILED);
7331 cache_mngr->stmt_cache.reset();
7332 return error;
7333 }
7334
7335 if (need_lock_log)
7336 mysql_mutex_lock(&LOCK_log);
7337 else
7338 mysql_mutex_assert_owner(&LOCK_log);
7339 }
7340
7341 if (do_flush_and_sync) {
7342 if (!error && !(error = flush_and_sync())) {
7343 bool check_purge = false;
7344 update_binlog_end_pos();
7345 is_rotating_caused_by_incident = true;
7346 error = rotate(true, &check_purge);
7347 is_rotating_caused_by_incident = false;
7348 if (!error && check_purge) purge();
7349 }
7350 }
7351
7352 if (need_lock_log) mysql_mutex_unlock(&LOCK_log);
7353
7354 /*
7355 Write an error to log. So that user might have a chance
7356 to be alerted and explore incident details.
7357 */
7358 if (!error && cache_mngr != nullptr)
7359 LogErr(ERROR_LEVEL, ER_BINLOG_LOGGING_INCIDENT_TO_STOP_SLAVES, err_msg);
7360
7361 return error;
7362 }
7363
write_dml_directly(THD * thd,const char * stmt,size_t stmt_len,enum_sql_command sql_command)7364 bool MYSQL_BIN_LOG::write_dml_directly(THD *thd, const char *stmt,
7365 size_t stmt_len,
7366 enum_sql_command sql_command) {
7367 bool ret = false;
7368 /* backup the original command */
7369 enum_sql_command save_sql_command = thd->lex->sql_command;
7370 thd->lex->sql_command = sql_command;
7371
7372 if (thd->binlog_query(THD::STMT_QUERY_TYPE, stmt, stmt_len, false, false,
7373 false, 0) ||
7374 commit(thd, false) != TC_LOG::RESULT_SUCCESS) {
7375 ret = true;
7376 }
7377
7378 thd->lex->sql_command = save_sql_command;
7379 return ret;
7380 }
7381
7382 /**
7383 Creates an incident event and writes it to the binary log.
7384
7385 @param thd Thread variable
7386 @param need_lock_log If the binary lock should be locked or not
7387 @param err_msg Error message written to log file for the incident.
7388 @param do_flush_and_sync If true, will call flush_and_sync(), rotate() and
7389 purge().
7390
7391 @retval
7392 0 error
7393 @retval
7394 1 success
7395 */
write_incident(THD * thd,bool need_lock_log,const char * err_msg,bool do_flush_and_sync)7396 bool MYSQL_BIN_LOG::write_incident(THD *thd, bool need_lock_log,
7397 const char *err_msg,
7398 bool do_flush_and_sync) {
7399 DBUG_TRACE;
7400
7401 if (!is_open()) return false;
7402
7403 LEX_CSTRING write_error_msg = {err_msg, strlen(err_msg)};
7404 binary_log::Incident_event::enum_incident incident =
7405 binary_log::Incident_event::INCIDENT_LOST_EVENTS;
7406 Incident_log_event ev(thd, incident, write_error_msg);
7407
7408 return write_incident(&ev, thd, need_lock_log, err_msg, do_flush_and_sync);
7409 }
7410
7411 /*
7412 Write the event into current binlog directly without going though a session
7413 binlog cache. It will update the event's log_pos and set checksum accordingly.
7414 binary_event_serialize can be called directly if log_pos should not be
7415 updated.
7416 */
write_event_to_binlog(Log_event * ev)7417 inline bool MYSQL_BIN_LOG::write_event_to_binlog(Log_event *ev) {
7418 ev->common_footer->checksum_alg =
7419 is_relay_log
7420 ? relay_log_checksum_alg
7421 : static_cast<enum_binlog_checksum_alg>(binlog_checksum_options);
7422 DBUG_ASSERT(ev->common_footer->checksum_alg !=
7423 binary_log::BINLOG_CHECKSUM_ALG_UNDEF);
7424
7425 /*
7426 Stores current position into log_pos, it is used to calculate correcty
7427 end_log_pos by adding data_written in Log_event::write_header().
7428 */
7429 ev->common_header->log_pos = m_binlog_file->position();
7430
7431 if (binary_event_serialize(ev, m_binlog_file)) return true;
7432
7433 add_bytes_written(ev->common_header->data_written);
7434 return false;
7435 }
7436
7437 /* Write the event into current binlog and flush and sync */
write_event_to_binlog_and_sync(Log_event * ev)7438 bool MYSQL_BIN_LOG::write_event_to_binlog_and_sync(Log_event *ev) {
7439 if (write_event_to_binlog(ev) || m_binlog_file->flush() ||
7440 m_binlog_file->sync())
7441 return true;
7442
7443 update_binlog_end_pos();
7444 return false;
7445 }
7446
7447 /**
7448 Write the contents of the statement or transaction cache to the binary log.
7449
7450 Comparison with do_write_cache:
7451
7452 - do_write_cache is a lower-level function that only performs the
7453 actual write.
7454
7455 - write_cache is a higher-level function that calls do_write_cache
7456 and additionally performs some maintenance tasks, including:
7457 - report any errors that occurred
7458 - write incident event if needed
7459 - update gtid_state
7460 - update thd.binlog_next_event_pos
7461
7462 @param thd Thread variable
7463
7464 @param cache_data Events will be read from the IO_CACHE of this
7465 cache_data object.
7466
7467 @param writer Events will be written to this Binlog_event_writer.
7468
7469 @retval true IO error.
7470 @retval false Success.
7471
7472 @note We only come here if there is something in the cache.
7473 @note Whatever is in the cache is always a complete transaction.
7474 @note 'cache' needs to be reinitialized after this functions returns.
7475 */
write_cache(THD * thd,binlog_cache_data * cache_data,Binlog_event_writer * writer)7476 bool MYSQL_BIN_LOG::write_cache(THD *thd, binlog_cache_data *cache_data,
7477 Binlog_event_writer *writer) {
7478 DBUG_TRACE;
7479
7480 Binlog_cache_storage *cache = cache_data->get_cache();
7481 bool incident = cache_data->has_incident();
7482
7483 mysql_mutex_assert_owner(&LOCK_log);
7484
7485 DBUG_ASSERT(is_open());
7486 if (likely(is_open())) // Should always be true
7487 {
7488 /*
7489 We only bother to write to the binary log if there is anything
7490 to write.
7491
7492 @todo Is this check redundant? Probably this is only called if
7493 there is anything in the cache (see @note in comment above this
7494 function). Check if we can replace this by an assertion. /Sven
7495 */
7496 if (!cache->is_empty()) {
7497 DBUG_EXECUTE_IF("crash_before_writing_xid", {
7498 if (do_write_cache(cache, writer))
7499 DBUG_PRINT("info", ("error writing binlog cache: %d", write_error));
7500 flush_and_sync(true);
7501 DBUG_PRINT("info", ("crashing before writing xid"));
7502 DBUG_SUICIDE();
7503 });
7504 if (do_write_cache(cache, writer)) goto err;
7505
7506 const char *err_msg =
7507 "Non-transactional changes did not get into "
7508 "the binlog.";
7509 if (incident &&
7510 write_incident(thd, false /*need_lock_log=false*/, err_msg,
7511 false /*do_flush_and_sync==false*/)) {
7512 report_binlog_write_error();
7513 goto err;
7514 }
7515 DBUG_EXECUTE_IF("half_binlogged_transaction", DBUG_SUICIDE(););
7516 }
7517 update_thd_next_event_pos(thd);
7518 }
7519
7520 return false;
7521
7522 err:
7523 thd->commit_error = THD::CE_FLUSH_ERROR;
7524
7525 return true;
7526 }
7527
report_binlog_write_error()7528 void MYSQL_BIN_LOG::report_binlog_write_error() {
7529 char errbuf[MYSYS_STRERROR_SIZE];
7530
7531 write_error = true;
7532 LogErr(ERROR_LEVEL, ER_FAILED_TO_WRITE_TO_FILE, name, errno,
7533 my_strerror(errbuf, sizeof(errbuf), errno));
7534 }
7535
7536 /**
7537 Wait until we get a signal that the binary log has been updated.
7538 Applies to master only.
7539
7540 NOTES
7541 @param[in] timeout a pointer to a timespec;
7542 NULL means to wait w/o timeout.
7543 @retval 0 if got signalled on update
7544 @retval non-0 if wait timeout elapsed
7545 @note
7546 LOCK_binlog_end_pos must be taken before calling this function.
7547 LOCK_binlog_end_pos is being released while the thread is waiting.
7548 LOCK_binlog_end_pos is released by the caller.
7549 */
7550
wait_for_update(const struct timespec * timeout)7551 int MYSQL_BIN_LOG::wait_for_update(const struct timespec *timeout) {
7552 int ret = 0;
7553 DBUG_TRACE;
7554
7555 if (!timeout)
7556 mysql_cond_wait(&update_cond, &LOCK_binlog_end_pos);
7557 else
7558 ret = mysql_cond_timedwait(&update_cond, &LOCK_binlog_end_pos,
7559 const_cast<struct timespec *>(timeout));
7560 return ret;
7561 }
7562
7563 /**
7564 Close the log file.
7565
7566 @param exiting Bitmask for one or more of the following bits:
7567 - LOG_CLOSE_INDEX : if we should close the index file
7568 - LOG_CLOSE_TO_BE_OPENED : if we intend to call open
7569 at once after close.
7570 - LOG_CLOSE_STOP_EVENT : write a 'stop' event to the log
7571
7572 @param need_lock_log If true, this function acquires LOCK_log;
7573 otherwise the caller should already have acquired it.
7574
7575 @param need_lock_index If true, this function acquires LOCK_index;
7576 otherwise the caller should already have acquired it.
7577
7578 @note
7579 One can do an open on the object at once after doing a close.
7580 The internal structures are not freed until cleanup() is called
7581 */
7582
close(uint exiting,bool need_lock_log,bool need_lock_index)7583 void MYSQL_BIN_LOG::close(
7584 uint exiting, bool need_lock_log,
7585 bool need_lock_index) { // One can't set log_type here!
7586 DBUG_TRACE;
7587 DBUG_PRINT("enter", ("exiting: %d", (int)exiting));
7588 if (need_lock_log)
7589 mysql_mutex_lock(&LOCK_log);
7590 else
7591 mysql_mutex_assert_owner(&LOCK_log);
7592
7593 if (atomic_log_state == LOG_OPENED) {
7594 if ((exiting & LOG_CLOSE_STOP_EVENT) != 0) {
7595 /**
7596 TODO(WL#7546): Change the implementation to Stop_event after write() is
7597 moved into libbinlogevents
7598 */
7599 Stop_log_event s;
7600 // the checksumming rule for relay-log case is similar to Rotate
7601 s.common_footer->checksum_alg =
7602 is_relay_log
7603 ? relay_log_checksum_alg
7604 : static_cast<enum_binlog_checksum_alg>(binlog_checksum_options);
7605 DBUG_ASSERT(!is_relay_log || relay_log_checksum_alg !=
7606 binary_log::BINLOG_CHECKSUM_ALG_UNDEF);
7607 if (!write_event_to_binlog(&s) && !m_binlog_file->flush())
7608 update_binlog_end_pos();
7609 }
7610
7611 /* The following update should not be done in relay log files */
7612 if (!is_relay_log) {
7613 my_off_t offset = BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
7614 uchar flags = 0; // clearing LOG_EVENT_BINLOG_IN_USE_F
7615 (void)m_binlog_file->update(&flags, 1, offset);
7616 }
7617
7618 if (m_binlog_file->flush_and_sync() && !write_error) {
7619 report_binlog_write_error();
7620 }
7621
7622 /*
7623 LOCK_sync to guarantee that no thread is calling m_binlog_file
7624 to sync data to disk when another thread is closing m_binlog_file.
7625 */
7626 if (!is_relay_log) mysql_mutex_lock(&LOCK_sync);
7627 m_binlog_file->close();
7628 if (!is_relay_log) mysql_mutex_unlock(&LOCK_sync);
7629
7630 atomic_log_state =
7631 (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
7632 my_free(name);
7633 name = nullptr;
7634 }
7635
7636 /*
7637 The following test is needed even if is_open() is not set, as we may have
7638 called a not complete close earlier and the index file is still open.
7639 */
7640
7641 if (need_lock_index)
7642 mysql_mutex_lock(&LOCK_index);
7643 else
7644 mysql_mutex_assert_owner(&LOCK_index);
7645
7646 if ((exiting & LOG_CLOSE_INDEX) && my_b_inited(&index_file)) {
7647 end_io_cache(&index_file);
7648 if (mysql_file_close(index_file.file, MYF(0)) < 0 && !write_error) {
7649 report_binlog_write_error();
7650 }
7651 }
7652
7653 if (need_lock_index) mysql_mutex_unlock(&LOCK_index);
7654
7655 atomic_log_state =
7656 (exiting & LOG_CLOSE_TO_BE_OPENED) ? LOG_TO_BE_OPENED : LOG_CLOSED;
7657 my_free(name);
7658 name = nullptr;
7659
7660 if (need_lock_log) mysql_mutex_unlock(&LOCK_log);
7661 }
7662
harvest_bytes_written(Relay_log_info * rli,bool need_log_space_lock)7663 void MYSQL_BIN_LOG::harvest_bytes_written(Relay_log_info *rli,
7664 bool need_log_space_lock) {
7665 #ifndef DBUG_OFF
7666 char buf1[22], buf2[22];
7667 #endif
7668
7669 DBUG_TRACE;
7670 if (need_log_space_lock)
7671 mysql_mutex_lock(&rli->log_space_lock);
7672 else
7673 mysql_mutex_assert_owner(&rli->log_space_lock);
7674 rli->log_space_total += bytes_written;
7675 DBUG_PRINT("info",
7676 ("relay_log_space: %s bytes_written: %s",
7677 llstr(rli->log_space_total, buf1), llstr(bytes_written, buf2)));
7678 bytes_written = 0;
7679 if (need_log_space_lock) mysql_mutex_unlock(&rli->log_space_lock);
7680 }
7681
set_max_size(ulong max_size_arg)7682 void MYSQL_BIN_LOG::set_max_size(ulong max_size_arg) {
7683 /*
7684 We need to take locks, otherwise this may happen:
7685 new_file() is called, calls open(old_max_size), then before open() starts,
7686 set_max_size() sets max_size to max_size_arg, then open() starts and
7687 uses the old_max_size argument, so max_size_arg has been overwritten and
7688 it's like if the SET command was never run.
7689 */
7690 DBUG_TRACE;
7691 mysql_mutex_lock(&LOCK_log);
7692 if (is_open()) max_size = max_size_arg;
7693 mysql_mutex_unlock(&LOCK_log);
7694 }
7695
7696 /****** transaction coordinator log for 2pc - binlog() based solution ******/
7697
7698 /**
7699 @todo
7700 keep in-memory list of prepared transactions
7701 (add to list in log(), remove on unlog())
7702 and copy it to the new binlog if rotated
7703 but let's check the behaviour of tc_log_page_waits first!
7704 */
7705
open_binlog(const char * opt_name)7706 int MYSQL_BIN_LOG::open_binlog(const char *opt_name) {
7707 LOG_INFO log_info;
7708 int error = 1;
7709
7710 /*
7711 This function is used for 2pc transaction coordination. Hence, it
7712 is never used for relay logs.
7713 */
7714 DBUG_ASSERT(!is_relay_log);
7715 DBUG_ASSERT(total_ha_2pc > 1 || (1 == total_ha_2pc && opt_bin_log));
7716 DBUG_ASSERT(opt_name && opt_name[0]);
7717
7718 if (!my_b_inited(&index_file)) {
7719 /* There was a failure to open the index file, can't open the binlog */
7720 cleanup();
7721 return 1;
7722 }
7723
7724 if (using_heuristic_recover()) {
7725 /* generate a new binlog to mask a corrupted one */
7726 mysql_mutex_lock(&LOCK_log);
7727 open_binlog(opt_name, nullptr, max_binlog_size, false,
7728 true /*need_lock_index=true*/, true /*need_sid_lock=true*/,
7729 nullptr);
7730 mysql_mutex_unlock(&LOCK_log);
7731 cleanup();
7732 return 1;
7733 }
7734
7735 if ((error = find_log_pos(&log_info, NullS, true /*need_lock_index=true*/))) {
7736 if (error != LOG_INFO_EOF)
7737 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_FIND_LOG_IN_INDEX, error);
7738 else
7739 error = 0;
7740 goto err;
7741 }
7742
7743 {
7744 Log_event *ev = nullptr;
7745 char log_name[FN_REFLEN];
7746 my_off_t valid_pos = 0;
7747 my_off_t binlog_size = 0;
7748
7749 do {
7750 strmake(log_name, log_info.log_file_name, sizeof(log_name) - 1);
7751 } while (
7752 !(error = find_next_log(&log_info, true /*need_lock_index=true*/)));
7753
7754 if (error != LOG_INFO_EOF) {
7755 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_FIND_LOG_IN_INDEX, error);
7756 goto err;
7757 }
7758
7759 Binlog_file_reader binlog_file_reader(opt_master_verify_checksum);
7760 if (binlog_file_reader.open(log_name)) {
7761 LogErr(ERROR_LEVEL, ER_BINLOG_FILE_OPEN_FAILED,
7762 binlog_file_reader.get_error_str());
7763 goto err;
7764 }
7765
7766 /*
7767 If the binary log was not properly closed it means that the server
7768 may have crashed. In that case, we need to call
7769 MYSQL_BIN_LOG::binlog_recover
7770 to:
7771
7772 a) collect logged XIDs;
7773 b) complete the 2PC of the pending XIDs;
7774 c) collect the last valid position.
7775
7776 Therefore, we do need to iterate over the binary log, even if
7777 total_ha_2pc == 1, to find the last valid group of events written.
7778 Later we will take this value and truncate the log if need be.
7779 */
7780 if ((ev = binlog_file_reader.read_event_object()) &&
7781 ev->get_type_code() == binary_log::FORMAT_DESCRIPTION_EVENT &&
7782 (ev->common_header->flags & LOG_EVENT_BINLOG_IN_USE_F ||
7783 DBUG_EVALUATE_IF("eval_force_bin_log_recovery", true, false))) {
7784 LogErr(INFORMATION_LEVEL, ER_BINLOG_RECOVERING_AFTER_CRASH_USING,
7785 opt_name);
7786 valid_pos = binlog_file_reader.position();
7787 error = binlog_recover(&binlog_file_reader, &valid_pos);
7788 binlog_size = binlog_file_reader.ifile()->length();
7789 } else
7790 error = 0;
7791
7792 delete ev;
7793
7794 if (error) goto err;
7795
7796 /* Trim the crashed binlog file to last valid transaction
7797 or event (non-transaction) base on valid_pos. */
7798 if (valid_pos > 0) {
7799 std::unique_ptr<Binlog_ofile> ofile(
7800 Binlog_ofile::open_existing(key_file_binlog, log_name, MYF(MY_WME)));
7801
7802 if (!ofile) {
7803 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_OPEN_CRASHED_BINLOG);
7804 return -1;
7805 }
7806
7807 /* Change binlog file size to valid_pos */
7808 if (valid_pos < binlog_size) {
7809 if (ofile->truncate(valid_pos)) {
7810 LogErr(ERROR_LEVEL, ER_BINLOG_CANT_TRIM_CRASHED_BINLOG);
7811 return -1;
7812 }
7813 LogErr(INFORMATION_LEVEL, ER_BINLOG_CRASHED_BINLOG_TRIMMED, log_name,
7814 binlog_size, valid_pos, valid_pos);
7815 }
7816
7817 /* Clear LOG_EVENT_BINLOG_IN_USE_F */
7818 uchar flags = 0;
7819 if (ofile->update(&flags, 1, BIN_LOG_HEADER_SIZE + FLAGS_OFFSET)) {
7820 LogErr(ERROR_LEVEL,
7821 ER_BINLOG_CANT_CLEAR_IN_USE_FLAG_FOR_CRASHED_BINLOG);
7822 return -1;
7823 }
7824 } // end if (valid_pos > 0)
7825 }
7826
7827 err:
7828 return error;
7829 }
7830
7831 /**
7832 Truncate the active relay log file in the specified position.
7833
7834 @param mi Master_info of the channel going to truncate the relay log file.
7835 @param truncate_pos The position to truncate the active relay log file.
7836 @return False on success and true on failure.
7837 */
truncate_relaylog_file(Master_info * mi,my_off_t truncate_pos)7838 bool MYSQL_BIN_LOG::truncate_relaylog_file(Master_info *mi,
7839 my_off_t truncate_pos) {
7840 DBUG_TRACE;
7841 DBUG_ASSERT(is_relay_log);
7842 mysql_mutex_assert_owner(&LOCK_log);
7843 Relay_log_info *rli = mi->rli;
7844 bool error = false;
7845
7846 /*
7847 If the relay log was closed by an error (binlog_error_action=IGNORE_ERROR)
7848 this truncate function should produce no result as the relay log is already
7849 in really bad shape.
7850 */
7851 if (!is_open()) {
7852 return false;
7853 }
7854
7855 my_off_t relaylog_file_size = m_binlog_file->position();
7856
7857 if (truncate_pos > 0 && truncate_pos < relaylog_file_size) {
7858 if (m_binlog_file->truncate(truncate_pos)) {
7859 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
7860 ER_THD(current_thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
7861 "failed to truncate relay log file");
7862 error = true;
7863 } else {
7864 LogErr(INFORMATION_LEVEL, ER_SLAVE_RELAY_LOG_TRUNCATE_INFO, log_file_name,
7865 relaylog_file_size, truncate_pos);
7866
7867 // Re-init the SQL thread IO_CACHE
7868 DBUG_ASSERT(strcmp(rli->get_event_relay_log_name(), log_file_name) ||
7869 rli->get_event_relay_log_pos() <= truncate_pos);
7870 rli->notify_relay_log_truncated();
7871 }
7872 }
7873 return error;
7874 }
7875
7876 /** This is called on shutdown, after ha_panic. */
close()7877 void MYSQL_BIN_LOG::close() {}
7878
7879 /*
7880 Prepare the transaction in the transaction coordinator.
7881
7882 This function will prepare the transaction in the storage engines
7883 (by calling @c ha_prepare_low) what will write a prepare record
7884 to the log buffers.
7885
7886 @retval 0 success
7887 @retval 1 error
7888 */
prepare(THD * thd,bool all)7889 int MYSQL_BIN_LOG::prepare(THD *thd, bool all) {
7890 DBUG_TRACE;
7891
7892 DBUG_ASSERT(opt_bin_log);
7893 /*
7894 The applier thread explicitly overrides the value of sql_log_bin
7895 with the value of log_slave_updates.
7896 */
7897 DBUG_ASSERT(thd->slave_thread ? opt_log_slave_updates
7898 : thd->variables.sql_log_bin);
7899
7900 /*
7901 Set HA_IGNORE_DURABILITY to not flush the prepared record of the
7902 transaction to the log of storage engine (for example, InnoDB
7903 redo log) during the prepare phase. So that we can flush prepared
7904 records of transactions to the log of storage engine in a group
7905 right before flushing them to binary log during binlog group
7906 commit flush stage. Reset to HA_REGULAR_DURABILITY at the
7907 beginning of parsing next command.
7908 */
7909 thd->durability_property = HA_IGNORE_DURABILITY;
7910
7911 int error = ha_prepare_low(thd, all);
7912
7913 return error;
7914 }
7915
7916 /**
7917 Commit the transaction in the transaction coordinator.
7918
7919 This function will commit the sessions transaction in the binary log
7920 and in the storage engines (by calling @c ha_commit_low). If the
7921 transaction was successfully logged (or not successfully unlogged)
7922 but the commit in the engines did not succed, there is a risk of
7923 inconsistency between the engines and the binary log.
7924
7925 For binary log group commit, the commit is separated into three
7926 parts:
7927
7928 1. First part consists of filling the necessary caches and
7929 finalizing them (if they need to be finalized). After this,
7930 nothing is added to any of the caches.
7931
7932 2. Second part execute an ordered flush and commit. This will be
7933 done using the group commit functionality in ordered_commit.
7934
7935 3. Third part checks any errors resulting from the ordered commit
7936 and handles them appropriately.
7937
7938 @retval RESULT_SUCCESS success
7939 @retval RESULT_ABORTED error, transaction was neither logged nor committed
7940 @retval RESULT_INCONSISTENT error, transaction was logged but not committed
7941 */
commit(THD * thd,bool all)7942 TC_LOG::enum_result MYSQL_BIN_LOG::commit(THD *thd, bool all) {
7943 DBUG_TRACE;
7944 DBUG_PRINT("info",
7945 ("query='%s'", thd == current_thd ? thd->query().str : nullptr));
7946 binlog_cache_mngr *cache_mngr = thd_get_cache_mngr(thd);
7947 Transaction_ctx *trn_ctx = thd->get_transaction();
7948 my_xid xid = trn_ctx->xid_state()->get_xid()->get_my_xid();
7949 bool stmt_stuff_logged = false;
7950 bool trx_stuff_logged = false;
7951 bool skip_commit = is_loggable_xa_prepare(thd);
7952 bool is_atomic_ddl = false;
7953
7954 DBUG_PRINT("enter", ("thd: 0x%llx, all: %s, xid: %llu, cache_mngr: 0x%llx",
7955 (ulonglong)thd, YESNO(all), (ulonglong)xid,
7956 (ulonglong)cache_mngr));
7957
7958 /*
7959 No cache manager means nothing to log, but we still have to commit
7960 the transaction.
7961 */
7962 if (cache_mngr == nullptr) {
7963 if (!skip_commit && ha_commit_low(thd, all)) return RESULT_ABORTED;
7964 return RESULT_SUCCESS;
7965 }
7966
7967 Transaction_ctx::enum_trx_scope trx_scope =
7968 all ? Transaction_ctx::SESSION : Transaction_ctx::STMT;
7969
7970 DBUG_PRINT("debug", ("in_transaction: %s, no_2pc: %s, rw_ha_count: %d",
7971 YESNO(thd->in_multi_stmt_transaction_mode()),
7972 YESNO(trn_ctx->no_2pc(trx_scope)),
7973 trn_ctx->rw_ha_count(trx_scope)));
7974 DBUG_PRINT("debug",
7975 ("all.cannot_safely_rollback(): %s, trx_cache_empty: %s",
7976 YESNO(trn_ctx->cannot_safely_rollback(Transaction_ctx::SESSION)),
7977 YESNO(cache_mngr->trx_cache.is_binlog_empty())));
7978 DBUG_PRINT("debug",
7979 ("stmt.cannot_safely_rollback(): %s, stmt_cache_empty: %s",
7980 YESNO(trn_ctx->cannot_safely_rollback(Transaction_ctx::STMT)),
7981 YESNO(cache_mngr->stmt_cache.is_binlog_empty())));
7982
7983 /*
7984 If there are no handlertons registered, there is nothing to
7985 commit. Note that DDLs are written earlier in this case (inside
7986 binlog_query).
7987
7988 TODO: This can be a problem in those cases that there are no
7989 handlertons registered. DDLs are one example, but the other case
7990 is MyISAM. In this case, we could register a dummy handlerton to
7991 trigger the commit.
7992
7993 Any statement that requires logging will call binlog_query before
7994 trans_commit_stmt, so an alternative is to use the condition
7995 "binlog_query called or stmt.ha_list != 0".
7996 */
7997 if (!all && !trn_ctx->is_active(trx_scope) &&
7998 cache_mngr->stmt_cache.is_binlog_empty())
7999 return RESULT_SUCCESS;
8000
8001 if (thd->lex->sql_command == SQLCOM_XA_COMMIT) {
8002 /* The Commit phase of the XA two phase logging. */
8003
8004 #ifndef DBUG_OFF
8005 bool one_phase = get_xa_opt(thd) == XA_ONE_PHASE;
8006 DBUG_ASSERT(all || (thd->slave_thread && one_phase));
8007 DBUG_ASSERT(!skip_commit || one_phase);
8008 #endif
8009
8010 XID_STATE *xs = thd->get_transaction()->xid_state();
8011 if (DBUG_EVALUATE_IF(
8012 "simulate_xa_commit_log_failure", true,
8013 do_binlog_xa_commit_rollback(thd, xs->get_xid(), true)))
8014 return RESULT_ABORTED;
8015 }
8016
8017 if (!cache_mngr->stmt_cache.is_binlog_empty()) {
8018 /*
8019 Commit parent identification of non-transactional query has
8020 been deferred until now, except for the mixed transaction case.
8021 */
8022 trn_ctx->store_commit_parent(
8023 m_dependency_tracker.get_max_committed_timestamp());
8024 if (cache_mngr->stmt_cache.finalize(thd)) return RESULT_ABORTED;
8025 stmt_stuff_logged = true;
8026 }
8027
8028 /*
8029 We commit the transaction if:
8030 - We are not in a transaction and committing a statement, or
8031 - We are in a transaction and a full transaction is committed.
8032 Otherwise, we accumulate the changes.
8033 */
8034 if (!cache_mngr->trx_cache.is_binlog_empty() && ending_trans(thd, all) &&
8035 !trx_stuff_logged) {
8036 const bool real_trans =
8037 (all || !trn_ctx->is_active(Transaction_ctx::SESSION));
8038
8039 bool one_phase = get_xa_opt(thd) == XA_ONE_PHASE;
8040 bool is_loggable_xa = is_loggable_xa_prepare(thd);
8041 XID_STATE *xs = thd->get_transaction()->xid_state();
8042
8043 /*
8044 Log and finalize transaction cache regarding XA PREPARE/XA COMMIT ONE
8045 PHASE if one of the following statements is true:
8046 - If it is a loggable XA transaction in prepare state;
8047 - If it is a transaction being commited with 'XA COMMIT ONE PHASE',
8048 statement and is not an empty transaction when GTID_NEXT is set to a
8049 manual GTID.
8050
8051 For other XA COMMIT ONE PHASE statements that already have been finalized
8052 or are finalizing empty transactions when GTID_NEXT is set to a manual
8053 GTID, just let the execution flow get into the final 'else' branch and log
8054 a final 'COMMIT;' statement.
8055 */
8056 if (is_loggable_xa || // XA transaction in prepare state
8057 (thd->lex->sql_command == SQLCOM_XA_COMMIT && // Is a 'XA COMMIT
8058 one_phase && // ONE PHASE'
8059 xs != nullptr && // and it has not yet
8060 !xs->is_binlogged() && // been logged
8061 (thd->owned_gtid.sidno <= 0 || // and GTID_NEXT is NOT set to a
8062 // manual GTID
8063 !xs->has_state(XID_STATE::XA_NOTR)))) // and the transaction is NOT
8064 // empty and NOT finalized in
8065 // 'trans_xa_commit'
8066 {
8067 /* The prepare phase of XA transaction two phase logging. */
8068 int err = 0;
8069
8070 DBUG_ASSERT(thd->lex->sql_command != SQLCOM_XA_COMMIT || one_phase);
8071
8072 XA_prepare_log_event end_evt(thd, xs->get_xid(), one_phase);
8073
8074 DBUG_ASSERT(!is_loggable_xa || skip_commit);
8075
8076 err = cache_mngr->trx_cache.finalize(thd, &end_evt, xs);
8077 if (err) return RESULT_ABORTED;
8078 if (is_loggable_xa)
8079 if (DBUG_EVALUATE_IF("simulate_xa_prepare_failure_in_cache_finalize",
8080 true, false))
8081 return RESULT_ABORTED;
8082 }
8083 /*
8084 If is atomic DDL, finalize cache for DDL and no further logging is needed.
8085 */
8086 else if ((is_atomic_ddl = cache_mngr->trx_cache.has_xid())) {
8087 if (cache_mngr->trx_cache.finalize(thd, nullptr)) return RESULT_ABORTED;
8088 }
8089 /*
8090 We are committing a 2PC transaction if it is a "real" transaction
8091 and has an XID assigned (because some handlerton registered). A
8092 transaction is "real" if either 'all' is true or
8093 'trn_ctx->is_active(Transaction_ctx::SESSION)' is not true.
8094
8095 Note: This is kind of strange since registering the binlog
8096 handlerton will then make the transaction 2PC, which is not really
8097 true. This occurs for example if a MyISAM statement is executed
8098 with row-based replication on.
8099 */
8100 else if (real_trans && xid && trn_ctx->rw_ha_count(trx_scope) > 1 &&
8101 !trn_ctx->no_2pc(trx_scope)) {
8102 Xid_log_event end_evt(thd, xid);
8103 if (cache_mngr->trx_cache.finalize(thd, &end_evt)) return RESULT_ABORTED;
8104 }
8105 /*
8106 No further action needed and no special case applies, log a final
8107 'COMMIT' statement and finalize the transaction cache.
8108
8109 Empty transactions finalized with 'XA COMMIT ONE PHASE' will be covered
8110 by this branch.
8111 */
8112 else {
8113 Query_log_event end_evt(thd, STRING_WITH_LEN("COMMIT"), true, false, true,
8114 0, true);
8115 if (cache_mngr->trx_cache.finalize(thd, &end_evt)) return RESULT_ABORTED;
8116 }
8117 trx_stuff_logged = true;
8118 }
8119
8120 /*
8121 This is part of the stmt rollback.
8122 */
8123 if (!all) cache_mngr->trx_cache.set_prev_position(MY_OFF_T_UNDEF);
8124
8125 /*
8126 Now all the events are written to the caches, so we will commit
8127 the transaction in the engines. This is done using the group
8128 commit logic in ordered_commit, which will return when the
8129 transaction is committed.
8130
8131 If the commit in the engines fail, we still have something logged
8132 to the binary log so we have to report this as a "bad" failure
8133 (failed to commit, but logged something).
8134 */
8135 if (stmt_stuff_logged || trx_stuff_logged) {
8136 if (RUN_HOOK(
8137 transaction, before_commit,
8138 (thd, all, thd_get_cache_mngr(thd)->get_trx_cache(),
8139 thd_get_cache_mngr(thd)->get_stmt_cache(),
8140 max<my_off_t>(max_binlog_cache_size, max_binlog_stmt_cache_size),
8141 is_atomic_ddl)) ||
8142 DBUG_EVALUATE_IF("simulate_failure_in_before_commit_hook", true,
8143 false)) {
8144 ha_rollback_low(thd, all);
8145 gtid_state->update_on_rollback(thd);
8146 thd_get_cache_mngr(thd)->reset();
8147 // Reset the thread OK status before changing the outcome.
8148 if (thd->get_stmt_da()->is_ok())
8149 thd->get_stmt_da()->reset_diagnostics_area();
8150 my_error(ER_RUN_HOOK_ERROR, MYF(0), "before_commit");
8151 return RESULT_ABORTED;
8152 }
8153 /*
8154 Check whether the transaction should commit or abort given the
8155 plugin feedback.
8156 */
8157 if (thd->get_transaction()
8158 ->get_rpl_transaction_ctx()
8159 ->is_transaction_rollback() ||
8160 (DBUG_EVALUATE_IF("simulate_transaction_rollback_request", true,
8161 false))) {
8162 ha_rollback_low(thd, all);
8163 gtid_state->update_on_rollback(thd);
8164 thd_get_cache_mngr(thd)->reset();
8165 if (thd->get_stmt_da()->is_ok())
8166 thd->get_stmt_da()->reset_diagnostics_area();
8167 my_error(ER_TRANSACTION_ROLLBACK_DURING_COMMIT, MYF(0));
8168 return RESULT_ABORTED;
8169 }
8170
8171 if (ordered_commit(thd, all, skip_commit)) return RESULT_INCONSISTENT;
8172
8173 DBUG_EXECUTE_IF("ensure_binlog_cache_is_reset", {
8174 /* Assert that binlog cache is reset at commit time. */
8175 DBUG_ASSERT(binlog_cache_is_reset);
8176 binlog_cache_is_reset = false;
8177 };);
8178
8179 /*
8180 Mark the flag m_is_binlogged to true only after we are done
8181 with checking all the error cases.
8182 */
8183 if (is_loggable_xa_prepare(thd)) {
8184 thd->get_transaction()->xid_state()->set_binlogged();
8185 /*
8186 Inform hook listeners that a XA PREPARE did commit, that
8187 is, did log a transaction to the binary log.
8188 */
8189 (void)RUN_HOOK(transaction, after_commit, (thd, all));
8190 }
8191 } else if (!skip_commit) {
8192 if (ha_commit_low(thd, all)) return RESULT_INCONSISTENT;
8193 }
8194
8195 return RESULT_SUCCESS;
8196 }
8197
8198 /**
8199 Flush caches for session.
8200
8201 @note @c set_trans_pos is called with a pointer to the file name
8202 that the binary log currently use and a rotation will change the
8203 contents of the variable.
8204
8205 The position is used when calling the after_flush, after_commit,
8206 and after_rollback hooks, but these have been placed so that they
8207 occur before a rotation is executed.
8208
8209 It is the responsibility of any plugin that use this position to
8210 copy it if they need it after the hook has returned.
8211
8212 The current "global" transaction_counter is stepped and its new value
8213 is assigned to the transaction.
8214 */
flush_thread_caches(THD * thd)8215 std::pair<int, my_off_t> MYSQL_BIN_LOG::flush_thread_caches(THD *thd) {
8216 binlog_cache_mngr *cache_mngr = thd_get_cache_mngr(thd);
8217 my_off_t bytes = 0;
8218 bool wrote_xid = false;
8219 int error = cache_mngr->flush(thd, &bytes, &wrote_xid);
8220 if (!error && bytes > 0) {
8221 /*
8222 Note that set_trans_pos does not copy the file name. See
8223 this function documentation for more info.
8224 */
8225 thd->set_trans_pos(log_file_name, m_binlog_file->position());
8226 if (wrote_xid) inc_prep_xids(thd);
8227 }
8228 DBUG_PRINT("debug", ("bytes: %llu", bytes));
8229 return std::make_pair(error, bytes);
8230 }
8231
init_thd_variables(THD * thd,bool all,bool skip_commit)8232 void MYSQL_BIN_LOG::init_thd_variables(THD *thd, bool all, bool skip_commit) {
8233 /*
8234 These values are used while committing a transaction, so clear
8235 everything.
8236
8237 Notes:
8238
8239 - It would be good if we could keep transaction coordinator
8240 log-specific data out of the THD structure, but that is not the
8241 case right now.
8242
8243 - Everything in the transaction structure is reset when calling
8244 ha_commit_low since that calls Transaction_ctx::cleanup.
8245 */
8246 thd->tx_commit_pending = true;
8247 thd->commit_error = THD::CE_NONE;
8248 thd->next_to_commit = nullptr;
8249 thd->durability_property = HA_IGNORE_DURABILITY;
8250 thd->get_transaction()->m_flags.real_commit = all;
8251 thd->get_transaction()->m_flags.xid_written = false;
8252 thd->get_transaction()->m_flags.commit_low = !skip_commit;
8253 thd->get_transaction()->m_flags.run_hooks = !skip_commit;
8254 #ifndef DBUG_OFF
8255 /*
8256 The group commit Leader may have to wait for follower whose transaction
8257 is not ready to be preempted. Initially the status is pessimistic.
8258 Preemption guarding logics is necessary only when !DBUG_OFF is set.
8259 It won't be required for the dbug-off case as long as the follower won't
8260 execute any thread-specific write access code in this method, which is
8261 the case as of current.
8262 */
8263 thd->get_transaction()->m_flags.ready_preempt = 0;
8264 #endif
8265 }
8266
fetch_and_process_flush_stage_queue(const bool check_and_skip_flush_logs)8267 THD *MYSQL_BIN_LOG::fetch_and_process_flush_stage_queue(
8268 const bool check_and_skip_flush_logs) {
8269 /*
8270 Fetch the entire flush queue and empty it, so that the next batch
8271 has a leader. We must do this before invoking ha_flush_logs(...)
8272 for guaranteeing to flush prepared records of transactions before
8273 flushing them to binary log, which is required by crash recovery.
8274 */
8275 Commit_stage_manager::get_instance().lock_queue(
8276 Commit_stage_manager::BINLOG_FLUSH_STAGE);
8277
8278 THD *first_seen =
8279 Commit_stage_manager::get_instance().fetch_queue_skip_acquire_lock(
8280 Commit_stage_manager::BINLOG_FLUSH_STAGE);
8281 DBUG_ASSERT(first_seen != nullptr);
8282
8283 THD *commit_order_thd =
8284 Commit_stage_manager::get_instance().fetch_queue_skip_acquire_lock(
8285 Commit_stage_manager::COMMIT_ORDER_FLUSH_STAGE);
8286
8287 Commit_stage_manager::get_instance().unlock_queue(
8288 Commit_stage_manager::BINLOG_FLUSH_STAGE);
8289
8290 if (!check_and_skip_flush_logs ||
8291 (check_and_skip_flush_logs && commit_order_thd != nullptr)) {
8292 /*
8293 We flush prepared records of transactions to the log of storage
8294 engine (for example, InnoDB redo log) in a group right before
8295 flushing them to binary log.
8296 */
8297 ha_flush_logs(true);
8298 }
8299
8300 /*
8301 The transactions are flushed to the disk and so threads
8302 executing slave preserve commit order can be unblocked.
8303 */
8304 Commit_stage_manager::get_instance()
8305 .process_final_stage_for_ordered_commit_group(commit_order_thd);
8306 return first_seen;
8307 }
8308
process_flush_stage_queue(my_off_t * total_bytes_var,bool * rotate_var,THD ** out_queue_var)8309 int MYSQL_BIN_LOG::process_flush_stage_queue(my_off_t *total_bytes_var,
8310 bool *rotate_var,
8311 THD **out_queue_var) {
8312 DBUG_TRACE;
8313 #ifndef DBUG_OFF
8314 // number of flushes per group.
8315 int no_flushes = 0;
8316 #endif
8317 DBUG_ASSERT(total_bytes_var && rotate_var && out_queue_var);
8318 my_off_t total_bytes = 0;
8319 int flush_error = 1;
8320 mysql_mutex_assert_owner(&LOCK_log);
8321
8322 THD *first_seen = fetch_and_process_flush_stage_queue();
8323 DBUG_EXECUTE_IF("crash_after_flush_engine_log", DBUG_SUICIDE(););
8324 assign_automatic_gtids_to_flush_group(first_seen);
8325 /* Flush thread caches to binary log. */
8326 for (THD *head = first_seen; head; head = head->next_to_commit) {
8327 std::pair<int, my_off_t> result = flush_thread_caches(head);
8328 total_bytes += result.second;
8329 if (flush_error == 1) flush_error = result.first;
8330 #ifndef DBUG_OFF
8331 no_flushes++;
8332 #endif
8333 }
8334
8335 *out_queue_var = first_seen;
8336 *total_bytes_var = total_bytes;
8337 if (total_bytes > 0 &&
8338 (m_binlog_file->get_real_file_size() >= (my_off_t)max_size ||
8339 DBUG_EVALUATE_IF("simulate_max_binlog_size", true, false)))
8340 *rotate_var = true;
8341 #ifndef DBUG_OFF
8342 DBUG_PRINT("info", ("no_flushes:= %d", no_flushes));
8343 no_flushes = 0;
8344 #endif
8345 return flush_error;
8346 }
8347
8348 /**
8349 Commit a sequence of sessions.
8350
8351 This function commit an entire queue of sessions starting with the
8352 session in @c first. If there were an error in the flushing part of
8353 the ordered commit, the error code is passed in and all the threads
8354 are marked accordingly (but not committed).
8355
8356 It will also add the GTIDs of the transactions to gtid_executed.
8357
8358 @see MYSQL_BIN_LOG::ordered_commit
8359
8360 @param thd The "master" thread
8361 @param first First thread in the queue of threads to commit
8362 */
8363
process_commit_stage_queue(THD * thd,THD * first)8364 void MYSQL_BIN_LOG::process_commit_stage_queue(THD *thd, THD *first) {
8365 mysql_mutex_assert_owner(&LOCK_commit);
8366 #ifndef DBUG_OFF
8367 thd->get_transaction()->m_flags.ready_preempt =
8368 true; // formality by the leader
8369 #endif
8370 for (THD *head = first; head; head = head->next_to_commit) {
8371 DBUG_PRINT("debug", ("Thread ID: %u, commit_error: %d, commit_pending: %s",
8372 head->thread_id(), head->commit_error,
8373 YESNO(head->tx_commit_pending)));
8374 DBUG_EXECUTE_IF(
8375 "block_leader_after_delete",
8376 if (thd != head) { DBUG_SET("+d,after_delete_wait"); };);
8377 /*
8378 If flushing failed, set commit_error for the session, skip the
8379 transaction and proceed with the next transaction instead. This
8380 will mark all threads as failed, since the flush failed.
8381
8382 If flush succeeded, attach to the session and commit it in the
8383 engines.
8384 */
8385 #ifndef DBUG_OFF
8386 Commit_stage_manager::get_instance().clear_preempt_status(head);
8387 #endif
8388 if (head->get_transaction()->sequence_number != SEQ_UNINIT) {
8389 mysql_mutex_lock(&LOCK_slave_trans_dep_tracker);
8390 m_dependency_tracker.update_max_committed(head);
8391 mysql_mutex_unlock(&LOCK_slave_trans_dep_tracker);
8392 }
8393 /*
8394 Flush/Sync error should be ignored and continue
8395 to commit phase. And thd->commit_error cannot be
8396 COMMIT_ERROR at this moment.
8397 */
8398 DBUG_ASSERT(head->commit_error != THD::CE_COMMIT_ERROR);
8399 Thd_backup_and_restore switch_thd(thd, head);
8400 bool all = head->get_transaction()->m_flags.real_commit;
8401 if (head->get_transaction()->m_flags.commit_low) {
8402 /* head is parked to have exited append() */
8403 DBUG_ASSERT(head->get_transaction()->m_flags.ready_preempt);
8404 /*
8405 storage engine commit
8406 */
8407 if (ha_commit_low(head, all, false))
8408 head->commit_error = THD::CE_COMMIT_ERROR;
8409 }
8410 DBUG_PRINT("debug", ("commit_error: %d, commit_pending: %s",
8411 head->commit_error, YESNO(head->tx_commit_pending)));
8412 }
8413
8414 /*
8415 Handle the GTID of the threads.
8416 gtid_executed table is kept updated even though transactions fail to be
8417 logged. That's required by slave auto positioning.
8418 */
8419 gtid_state->update_commit_group(first);
8420
8421 for (THD *head = first; head; head = head->next_to_commit) {
8422 /*
8423 Decrement the prepared XID counter after storage engine commit.
8424 We also need decrement the prepared XID when encountering a
8425 flush error or session attach error for avoiding 3-way deadlock
8426 among user thread, rotate thread and dump thread.
8427 */
8428 if (head->get_transaction()->m_flags.xid_written) dec_prep_xids(head);
8429 }
8430 }
8431
8432 /**
8433 Process after commit for a sequence of sessions.
8434
8435 @param thd The "master" thread
8436 @param first First thread in the queue of threads to commit
8437 */
8438
process_after_commit_stage_queue(THD * thd,THD * first)8439 void MYSQL_BIN_LOG::process_after_commit_stage_queue(THD *thd, THD *first) {
8440 for (THD *head = first; head; head = head->next_to_commit) {
8441 if (head->get_transaction()->m_flags.run_hooks &&
8442 head->commit_error != THD::CE_COMMIT_ERROR) {
8443 /*
8444 TODO: This hook here should probably move outside/below this
8445 if and be the only after_commit invocation left in the
8446 code.
8447 */
8448 Thd_backup_and_restore switch_thd(thd, head);
8449 bool all = head->get_transaction()->m_flags.real_commit;
8450 (void)RUN_HOOK(transaction, after_commit, (head, all));
8451 /*
8452 When after_commit finished for the transaction, clear the run_hooks
8453 flag. This allow other parts of the system to check if after_commit was
8454 called.
8455 */
8456 head->get_transaction()->m_flags.run_hooks = false;
8457 }
8458 }
8459 }
8460
8461 #ifndef DBUG_OFF
8462 /** Names for the stages. */
8463 static const char *g_stage_name[] = {
8464 "FLUSH",
8465 "SYNC",
8466 "COMMIT",
8467 };
8468 #endif
8469
change_stage(THD * thd MY_ATTRIBUTE ((unused)),Commit_stage_manager::StageID stage,THD * queue,mysql_mutex_t * leave_mutex,mysql_mutex_t * enter_mutex)8470 bool MYSQL_BIN_LOG::change_stage(THD *thd MY_ATTRIBUTE((unused)),
8471 Commit_stage_manager::StageID stage,
8472 THD *queue, mysql_mutex_t *leave_mutex,
8473 mysql_mutex_t *enter_mutex) {
8474 DBUG_TRACE;
8475 DBUG_PRINT("enter", ("thd: 0x%llx, stage: %s, queue: 0x%llx", (ulonglong)thd,
8476 g_stage_name[stage], (ulonglong)queue));
8477 DBUG_ASSERT(0 <= stage && stage < Commit_stage_manager::STAGE_COUNTER);
8478 DBUG_ASSERT(enter_mutex);
8479 DBUG_ASSERT(queue);
8480 /*
8481 enroll_for will release the leave_mutex once the sessions are
8482 queued.
8483 */
8484 if (!Commit_stage_manager::get_instance().enroll_for(
8485 stage, queue, leave_mutex, enter_mutex)) {
8486 DBUG_ASSERT(!thd_get_cache_mngr(thd)->dbug_any_finalized());
8487 return true;
8488 }
8489
8490 return false;
8491 }
8492
8493 /**
8494 Flush the I/O cache to file.
8495
8496 Flush the binary log to the binlog file if any byte where written
8497 and signal that the binary log file has been updated if the flush
8498 succeeds.
8499 */
8500
flush_cache_to_file(my_off_t * end_pos_var)8501 int MYSQL_BIN_LOG::flush_cache_to_file(my_off_t *end_pos_var) {
8502 if (m_binlog_file->flush()) {
8503 THD *thd = current_thd;
8504 thd->commit_error = THD::CE_FLUSH_ERROR;
8505 return ER_ERROR_ON_WRITE;
8506 }
8507 *end_pos_var = m_binlog_file->position();
8508 return 0;
8509 }
8510
8511 /**
8512 Call fsync() to sync the file to disk.
8513 */
sync_binlog_file(bool force)8514 std::pair<bool, bool> MYSQL_BIN_LOG::sync_binlog_file(bool force) {
8515 bool synced = false;
8516 unsigned int sync_period = get_sync_period();
8517 if (force || (sync_period && ++sync_counter >= sync_period)) {
8518 sync_counter = 0;
8519
8520 /*
8521 There is a chance that binlog file could be closed by 'RESET MASTER' or
8522 or 'FLUSH LOGS' just after the leader releases LOCK_log and before it
8523 acquires LOCK_sync log. So it should check if m_binlog_file is opened.
8524 */
8525 if (DBUG_EVALUATE_IF("simulate_error_during_sync_binlog_file", 1,
8526 m_binlog_file->is_open() && m_binlog_file->sync())) {
8527 THD *thd = current_thd;
8528 thd->commit_error = THD::CE_SYNC_ERROR;
8529 return std::make_pair(true, synced);
8530 }
8531 synced = true;
8532 }
8533 return std::make_pair(false, synced);
8534 }
8535
8536 /**
8537 Helper function executed when leaving @c ordered_commit.
8538
8539 This function contain the necessary code for fetching the error
8540 code, doing post-commit checks, and wrapping up the commit if
8541 necessary.
8542
8543 It is typically called when enter_stage indicates that the thread
8544 should bail out, and also when the ultimate leader thread finishes
8545 executing @c ordered_commit.
8546
8547 It is typically used in this manner:
8548 @code
8549 if (enter_stage(thd, Thread_queue::BINLOG_FLUSH_STAGE, thd, &LOCK_log))
8550 return finish_commit(thd);
8551 @endcode
8552
8553 @return Error code if the session commit failed, or zero on
8554 success.
8555 */
finish_commit(THD * thd)8556 int MYSQL_BIN_LOG::finish_commit(THD *thd) {
8557 DBUG_TRACE;
8558 DEBUG_SYNC(thd, "reached_finish_commit");
8559 /*
8560 In some unlikely situations, it can happen that binary
8561 log is closed before the thread flushes it's cache.
8562 In that case, clear the caches before doing commit.
8563 */
8564 if (unlikely(!is_open())) {
8565 binlog_cache_mngr *cache_mngr = thd_get_cache_mngr(thd);
8566 if (cache_mngr) cache_mngr->reset();
8567 }
8568
8569 if (thd->get_transaction()->sequence_number != SEQ_UNINIT) {
8570 mysql_mutex_lock(&LOCK_slave_trans_dep_tracker);
8571 m_dependency_tracker.update_max_committed(thd);
8572 mysql_mutex_unlock(&LOCK_slave_trans_dep_tracker);
8573 }
8574 if (thd->get_transaction()->m_flags.commit_low) {
8575 const bool all = thd->get_transaction()->m_flags.real_commit;
8576 /*
8577 Now flush error and sync erros are ignored and we are continuing and
8578 committing. And at this time, commit_error cannot be COMMIT_ERROR.
8579 */
8580 DBUG_ASSERT(thd->commit_error != THD::CE_COMMIT_ERROR);
8581 /*
8582 storage engine commit
8583 */
8584 if (ha_commit_low(thd, all, false))
8585 thd->commit_error = THD::CE_COMMIT_ERROR;
8586 /*
8587 Decrement the prepared XID counter after storage engine commit
8588 */
8589 if (thd->get_transaction()->m_flags.xid_written) dec_prep_xids(thd);
8590 /*
8591 If commit succeeded, we call the after_commit hook
8592
8593 TODO: This hook here should probably move outside/below this
8594 if and be the only after_commit invocation left in the
8595 code.
8596 */
8597 if ((thd->commit_error != THD::CE_COMMIT_ERROR) &&
8598 thd->get_transaction()->m_flags.run_hooks) {
8599 (void)RUN_HOOK(transaction, after_commit, (thd, all));
8600 thd->get_transaction()->m_flags.run_hooks = false;
8601 }
8602 } else if (thd->get_transaction()->m_flags.xid_written)
8603 dec_prep_xids(thd);
8604
8605 /*
8606 If the ordered commit didn't updated the GTIDs for this thd yet
8607 at process_commit_stage_queue (i.e. --binlog-order-commits=0)
8608 the thd still has the ownership of a GTID and we must handle it.
8609 */
8610 if (!thd->owned_gtid_is_empty()) {
8611 /*
8612 Gtid is added to gtid_state.executed_gtids and removed from owned_gtids
8613 on update_on_commit().
8614 */
8615 if (thd->commit_error == THD::CE_NONE) {
8616 gtid_state->update_on_commit(thd);
8617 } else
8618 gtid_state->update_on_rollback(thd);
8619 }
8620
8621 DBUG_EXECUTE_IF("leaving_finish_commit", {
8622 const char act[] = "now SIGNAL signal_leaving_finish_commit";
8623 DBUG_ASSERT(!debug_sync_set_action(current_thd, STRING_WITH_LEN(act)));
8624 };);
8625
8626 DBUG_ASSERT(thd->commit_error || !thd->get_transaction()->m_flags.run_hooks);
8627 DBUG_ASSERT(!thd_get_cache_mngr(thd)->dbug_any_finalized());
8628 DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d", thd->thread_id(),
8629 thd->commit_error));
8630 /*
8631 flush or sync errors are handled by the leader of the group
8632 (using binlog_error_action). Hence treat only COMMIT_ERRORs as errors.
8633 */
8634 return thd->commit_error == THD::CE_COMMIT_ERROR;
8635 }
8636
8637 /**
8638 Auxiliary function used in ordered_commit.
8639 */
call_after_sync_hook(THD * queue_head)8640 static inline int call_after_sync_hook(THD *queue_head) {
8641 const char *log_file = nullptr;
8642 my_off_t pos = 0;
8643
8644 if (NO_HOOK(binlog_storage)) return 0;
8645
8646 DBUG_ASSERT(queue_head != nullptr);
8647 for (THD *thd = queue_head; thd != nullptr; thd = thd->next_to_commit)
8648 if (likely(thd->commit_error == THD::CE_NONE))
8649 thd->get_trans_fixed_pos(&log_file, &pos);
8650
8651 if (DBUG_EVALUATE_IF("simulate_after_sync_hook_error", 1, 0) ||
8652 RUN_HOOK(binlog_storage, after_sync, (queue_head, log_file, pos))) {
8653 LogErr(ERROR_LEVEL, ER_BINLOG_FAILED_TO_RUN_AFTER_SYNC_HOOK);
8654 return ER_ERROR_ON_WRITE;
8655 }
8656 return 0;
8657 }
8658
8659 /**
8660 Helper function to handle flush or sync stage errors.
8661 If binlog_error_action= ABORT_SERVER, server will be aborted
8662 after reporting the error to the client.
8663 If binlog_error_action= IGNORE_ERROR, binlog will be closed
8664 for the reset of the life time of the server. close() call is protected
8665 with LOCK_log to avoid any parallel operations on binary log.
8666
8667 @param thd Thread object that faced flush/sync error
8668 @param need_lock_log
8669 > Indicates true if LOCk_log is needed before closing
8670 binlog (happens when we are handling sync error)
8671 > Indicates false if LOCK_log is already acquired
8672 by the thread (happens when we are handling flush
8673 error)
8674 @param message Message stating the reason of the failure
8675 */
handle_binlog_flush_or_sync_error(THD * thd,bool need_lock_log,const char * message)8676 void MYSQL_BIN_LOG::handle_binlog_flush_or_sync_error(THD *thd,
8677 bool need_lock_log,
8678 const char *message) {
8679 char errmsg[MYSQL_ERRMSG_SIZE] = {0};
8680 if (message == nullptr)
8681 sprintf(
8682 errmsg,
8683 "An error occurred during %s stage of the commit. "
8684 "'binlog_error_action' is set to '%s'.",
8685 thd->commit_error == THD::CE_FLUSH_ERROR ? "flush" : "sync",
8686 binlog_error_action == ABORT_SERVER ? "ABORT_SERVER" : "IGNORE_ERROR");
8687 else
8688 strncpy(errmsg, message, MYSQL_ERRMSG_SIZE - 1);
8689 if (binlog_error_action == ABORT_SERVER) {
8690 char err_buff[MYSQL_ERRMSG_SIZE + 27];
8691 sprintf(err_buff, "%s Hence aborting the server.", errmsg);
8692 exec_binlog_error_action_abort(err_buff);
8693 } else {
8694 DEBUG_SYNC(thd, "before_binlog_closed_due_to_error");
8695 if (need_lock_log)
8696 mysql_mutex_lock(&LOCK_log);
8697 else
8698 mysql_mutex_assert_owner(&LOCK_log);
8699 /*
8700 It can happen that other group leader encountered
8701 error and already closed the binary log. So print
8702 error only if it is in open state. But we should
8703 call close() always just in case if the previous
8704 close did not close index file.
8705 */
8706 if (is_open()) {
8707 LogErr(ERROR_LEVEL, ER_TURNING_LOGGING_OFF_FOR_THE_DURATION, errmsg);
8708 }
8709 close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT, false /*need_lock_log=false*/,
8710 true /*need_lock_index=true*/);
8711 /*
8712 If there is a write error (flush/sync stage) and if
8713 binlog_error_action=IGNORE_ERROR, clear the error
8714 and allow the commit to happen in storage engine.
8715 */
8716 if (check_write_error(thd) &&
8717 DBUG_EVALUATE_IF("simulate_cache_creation_failure", false, true))
8718 thd->clear_error();
8719
8720 if (need_lock_log) mysql_mutex_unlock(&LOCK_log);
8721 DEBUG_SYNC(thd, "after_binlog_closed_due_to_error");
8722 }
8723 }
8724
ordered_commit(THD * thd,bool all,bool skip_commit)8725 int MYSQL_BIN_LOG::ordered_commit(THD *thd, bool all, bool skip_commit) {
8726 DBUG_TRACE;
8727 int flush_error = 0, sync_error = 0;
8728 my_off_t total_bytes = 0;
8729 bool do_rotate = false;
8730
8731 DBUG_EXECUTE_IF("crash_commit_before_log", DBUG_SUICIDE(););
8732 init_thd_variables(thd, all, skip_commit);
8733 DBUG_PRINT("enter", ("commit_pending: %s, commit_error: %d, thread_id: %u",
8734 YESNO(thd->tx_commit_pending), thd->commit_error,
8735 thd->thread_id()));
8736
8737 DEBUG_SYNC(thd, "bgc_before_flush_stage");
8738
8739 /*
8740 Stage #0: ensure slave threads commit order as they appear in the slave's
8741 relay log for transactions flushing to binary log.
8742
8743 This will make thread wait until its turn to commit.
8744 Commit_order_manager maintains it own queue and its own order for the
8745 commit. So Stage#0 doesn't maintain separate StageID.
8746 */
8747 if (Commit_order_manager::wait_for_its_turn_before_flush_stage(thd) ||
8748 ending_trans(thd, all) ||
8749 Commit_order_manager::get_rollback_status(thd)) {
8750 if (Commit_order_manager::wait(thd)) {
8751 return thd->commit_error;
8752 }
8753 }
8754
8755 /*
8756 Stage #1: flushing transactions to binary log
8757
8758 While flushing, we allow new threads to enter and will process
8759 them in due time. Once the queue was empty, we cannot reap
8760 anything more since it is possible that a thread entered and
8761 appointed itself leader for the flush phase.
8762 */
8763
8764 if (change_stage(thd, Commit_stage_manager::BINLOG_FLUSH_STAGE, thd, nullptr,
8765 &LOCK_log)) {
8766 DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d", thd->thread_id(),
8767 thd->commit_error));
8768 return finish_commit(thd);
8769 }
8770
8771 THD *wait_queue = nullptr, *final_queue = nullptr;
8772 mysql_mutex_t *leave_mutex_before_commit_stage = nullptr;
8773 my_off_t flush_end_pos = 0;
8774 bool update_binlog_end_pos_after_sync;
8775 if (unlikely(!is_open())) {
8776 final_queue = fetch_and_process_flush_stage_queue(true);
8777 leave_mutex_before_commit_stage = &LOCK_log;
8778 /*
8779 binary log is closed, flush stage and sync stage should be
8780 ignored. Binlog cache should be cleared, but instead of doing
8781 it here, do that work in 'finish_commit' function so that
8782 leader and followers thread caches will be cleared.
8783 */
8784 goto commit_stage;
8785 }
8786 DEBUG_SYNC(thd, "waiting_in_the_middle_of_flush_stage");
8787 flush_error =
8788 process_flush_stage_queue(&total_bytes, &do_rotate, &wait_queue);
8789
8790 if (flush_error == 0 && total_bytes > 0)
8791 flush_error = flush_cache_to_file(&flush_end_pos);
8792 DBUG_EXECUTE_IF("crash_after_flush_binlog", DBUG_SUICIDE(););
8793
8794 update_binlog_end_pos_after_sync = (get_sync_period() == 1);
8795
8796 /*
8797 If the flush finished successfully, we can call the after_flush
8798 hook. Being invoked here, we have the guarantee that the hook is
8799 executed before the before/after_send_hooks on the dump thread
8800 preventing race conditions among these plug-ins.
8801 */
8802 if (flush_error == 0) {
8803 const char *file_name_ptr = log_file_name + dirname_length(log_file_name);
8804 DBUG_ASSERT(flush_end_pos != 0);
8805 if (RUN_HOOK(binlog_storage, after_flush,
8806 (thd, file_name_ptr, flush_end_pos))) {
8807 LogErr(ERROR_LEVEL, ER_BINLOG_FAILED_TO_RUN_AFTER_FLUSH_HOOK);
8808 flush_error = ER_ERROR_ON_WRITE;
8809 }
8810
8811 if (!update_binlog_end_pos_after_sync) update_binlog_end_pos();
8812
8813 DBUG_EXECUTE_IF("crash_commit_after_log", DBUG_SUICIDE(););
8814 }
8815
8816 if (flush_error) {
8817 /*
8818 Handle flush error (if any) after leader finishes it's flush stage.
8819 */
8820 handle_binlog_flush_or_sync_error(thd, false /* need_lock_log */, nullptr);
8821 }
8822
8823 DEBUG_SYNC(thd, "bgc_after_flush_stage_before_sync_stage");
8824
8825 /*
8826 Stage #2: Syncing binary log file to disk
8827 */
8828
8829 if (change_stage(thd, Commit_stage_manager::SYNC_STAGE, wait_queue, &LOCK_log,
8830 &LOCK_sync)) {
8831 DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d", thd->thread_id(),
8832 thd->commit_error));
8833 return finish_commit(thd);
8834 }
8835
8836 /*
8837 Shall introduce a delay only if it is going to do sync
8838 in this ongoing SYNC stage. The "+1" used below in the
8839 if condition is to count the ongoing sync stage.
8840 When sync_binlog=0 (where we never do sync in BGC group),
8841 it is considered as a special case and delay will be executed
8842 for every group just like how it is done when sync_binlog= 1.
8843 */
8844 if (!flush_error && (sync_counter + 1 >= get_sync_period()))
8845 Commit_stage_manager::get_instance().wait_count_or_timeout(
8846 opt_binlog_group_commit_sync_no_delay_count,
8847 opt_binlog_group_commit_sync_delay, Commit_stage_manager::SYNC_STAGE);
8848
8849 final_queue = Commit_stage_manager::get_instance().fetch_queue_acquire_lock(
8850 Commit_stage_manager::SYNC_STAGE);
8851
8852 if (flush_error == 0 && total_bytes > 0) {
8853 DEBUG_SYNC(thd, "before_sync_binlog_file");
8854 std::pair<bool, bool> result = sync_binlog_file(false);
8855 sync_error = result.first;
8856 }
8857
8858 if (update_binlog_end_pos_after_sync) {
8859 THD *tmp_thd = final_queue;
8860 const char *binlog_file = nullptr;
8861 my_off_t pos = 0;
8862 while (tmp_thd->next_to_commit != nullptr)
8863 tmp_thd = tmp_thd->next_to_commit;
8864 if (flush_error == 0 && sync_error == 0) {
8865 tmp_thd->get_trans_fixed_pos(&binlog_file, &pos);
8866 update_binlog_end_pos(binlog_file, pos);
8867 }
8868 }
8869
8870 DEBUG_SYNC(thd, "bgc_after_sync_stage_before_commit_stage");
8871
8872 leave_mutex_before_commit_stage = &LOCK_sync;
8873 /*
8874 Stage #3: Commit all transactions in order.
8875
8876 This stage is skipped if we do not need to order the commits and
8877 each thread have to execute the handlerton commit instead.
8878
8879 Howver, since we are keeping the lock from the previous stage, we
8880 need to unlock it if we skip the stage.
8881
8882 We must also step commit_clock before the ha_commit_low() is called
8883 either in ordered fashion(by the leader of this stage) or by the tread
8884 themselves.
8885
8886 We are delaying the handling of sync error until
8887 all locks are released but we should not enter into
8888 commit stage if binlog_error_action is ABORT_SERVER.
8889 */
8890 commit_stage:
8891 /* Clone needs binlog commit order. */
8892 if ((opt_binlog_order_commits || Clone_handler::need_commit_order()) &&
8893 (sync_error == 0 || binlog_error_action != ABORT_SERVER)) {
8894 if (change_stage(thd, Commit_stage_manager::COMMIT_STAGE, final_queue,
8895 leave_mutex_before_commit_stage, &LOCK_commit)) {
8896 DBUG_PRINT("return", ("Thread ID: %u, commit_error: %d", thd->thread_id(),
8897 thd->commit_error));
8898 return finish_commit(thd);
8899 }
8900 THD *commit_queue =
8901 Commit_stage_manager::get_instance().fetch_queue_acquire_lock(
8902 Commit_stage_manager::COMMIT_STAGE);
8903 DBUG_EXECUTE_IF("semi_sync_3-way_deadlock",
8904 DEBUG_SYNC(thd, "before_process_commit_stage_queue"););
8905
8906 if (flush_error == 0 && sync_error == 0)
8907 sync_error = call_after_sync_hook(commit_queue);
8908
8909 /*
8910 process_commit_stage_queue will call update_on_commit or
8911 update_on_rollback for the GTID owned by each thd in the queue.
8912
8913 This will be done this way to guarantee that GTIDs are added to
8914 gtid_executed in order, to avoid creating unnecessary temporary
8915 gaps and keep gtid_executed as a single interval at all times.
8916
8917 If we allow each thread to call update_on_commit only when they
8918 are at finish_commit, the GTID order cannot be guaranteed and
8919 temporary gaps may appear in gtid_executed. When this happen,
8920 the server would have to add and remove intervals from the
8921 Gtid_set, and adding and removing intervals requires a mutex,
8922 which would reduce performance.
8923 */
8924 process_commit_stage_queue(thd, commit_queue);
8925 mysql_mutex_unlock(&LOCK_commit);
8926 /*
8927 Process after_commit after LOCK_commit is released for avoiding
8928 3-way deadlock among user thread, rotate thread and dump thread.
8929 */
8930 process_after_commit_stage_queue(thd, commit_queue);
8931 final_queue = commit_queue;
8932 } else {
8933 if (leave_mutex_before_commit_stage)
8934 mysql_mutex_unlock(leave_mutex_before_commit_stage);
8935 if (flush_error == 0 && sync_error == 0)
8936 sync_error = call_after_sync_hook(final_queue);
8937 }
8938
8939 /*
8940 Handle sync error after we release all locks in order to avoid deadlocks
8941 */
8942 if (sync_error)
8943 handle_binlog_flush_or_sync_error(thd, true /* need_lock_log */, nullptr);
8944
8945 DEBUG_SYNC(thd, "before_signal_done");
8946 /* Commit done so signal all waiting threads */
8947 Commit_stage_manager::get_instance().signal_done(final_queue);
8948 DBUG_EXECUTE_IF("block_leader_after_delete", {
8949 const char action[] = "now SIGNAL leader_proceed";
8950 DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(action)));
8951 };);
8952
8953 /*
8954 Finish the commit before executing a rotate, or run the risk of a
8955 deadlock. We don't need the return value here since it is in
8956 thd->commit_error, which is returned below.
8957 */
8958 (void)finish_commit(thd);
8959 DEBUG_SYNC(thd, "bgc_after_commit_stage_before_rotation");
8960
8961 /*
8962 If we need to rotate, we do it without commit error.
8963 Otherwise the thd->commit_error will be possibly reset.
8964 */
8965 if (DBUG_EVALUATE_IF("force_rotate", 1, 0) ||
8966 (do_rotate && thd->commit_error == THD::CE_NONE &&
8967 !is_rotating_caused_by_incident)) {
8968 /*
8969 Do not force the rotate as several consecutive groups may
8970 request unnecessary rotations.
8971
8972 NOTE: Run purge_logs wo/ holding LOCK_log because it does not
8973 need the mutex. Otherwise causes various deadlocks.
8974 */
8975
8976 DEBUG_SYNC(thd, "ready_to_do_rotation");
8977 bool check_purge = false;
8978 mysql_mutex_lock(&LOCK_log);
8979 /*
8980 If rotate fails then depends on binlog_error_action variable
8981 appropriate action will be taken inside rotate call.
8982 */
8983 int error = rotate(false, &check_purge);
8984 mysql_mutex_unlock(&LOCK_log);
8985
8986 if (error)
8987 thd->commit_error = THD::CE_COMMIT_ERROR;
8988 else if (check_purge)
8989 purge();
8990 }
8991 /*
8992 flush or sync errors are handled above (using binlog_error_action).
8993 Hence treat only COMMIT_ERRORs as errors.
8994 */
8995 return thd->commit_error == THD::CE_COMMIT_ERROR;
8996 }
8997
8998 /**
8999 MYSQLD server recovers from last crashed binlog.
9000
9001 @param[in] binlog_file_reader Binlog_file_reader of the crashed binlog.
9002 @param[out] valid_pos The position of the last valid transaction or
9003 event(non-transaction) of the crashed binlog.
9004 valid_pos must be non-NULL.
9005
9006 After a crash, storage engines may contain transactions that are
9007 prepared but not committed (in theory any engine, in practice
9008 InnoDB). This function uses the binary log as the source of truth
9009 to determine which of these transactions should be committed and
9010 which should be rolled back.
9011
9012 The function collects the XIDs of all transactions that are
9013 completely written to the binary log into a hash, and passes this
9014 hash to the storage engines through the ha_recover function in the
9015 handler interface. This tells the storage engines to commit all
9016 prepared transactions that are in the set, and to roll back all
9017 prepared transactions that are not in the set.
9018
9019 To compute the hash, this function iterates over the last binary log
9020 only (i.e. it assumes that 'log' is the last binary log). It
9021 instantiates each event. For XID-events (i.e. commit to InnoDB), it
9022 extracts the xid from the event and stores it in the hash.
9023
9024 It is enough to iterate over only the last binary log because when
9025 the binary log is rotated we force engines to commit (and we fsync
9026 the old binary log).
9027
9028 @retval false Success
9029 @retval true Out of memory, or storage engine returns error.
9030 */
binlog_recover(Binlog_file_reader * binlog_file_reader,my_off_t * valid_pos)9031 static bool binlog_recover(Binlog_file_reader *binlog_file_reader,
9032 my_off_t *valid_pos) {
9033 bool res = false;
9034 binlog::tools::Iterator it(binlog_file_reader);
9035 it.set_copy_event_buffer();
9036
9037 /*
9038 The flag is used for handling the case that a transaction
9039 is partially written to the binlog.
9040 */
9041 bool in_transaction = false;
9042 int memory_page_size = my_getpagesize();
9043 {
9044 MEM_ROOT mem_root(key_memory_binlog_recover_exec, memory_page_size);
9045 mem_root_unordered_set<my_xid> xids(&mem_root);
9046
9047 /*
9048 now process events in the queue. Queue is dynamically changed
9049 everytime we process an event. This may be a bit suboptimal
9050 since it adds an indirection, but it helps to generalize the
9051 usage of the transaction payload event (which unfolds into
9052 several events into the queue when it is processed).
9053 */
9054 for (Log_event *ev = it.begin(); !res && (ev != it.end()); ev = it.next()) {
9055 switch (ev->get_type_code()) {
9056 // may be begin, middle or end of a transaction
9057 case binary_log::QUERY_EVENT: {
9058 // starts a transaction
9059 if (!strcmp(((Query_log_event *)ev)->query, "BEGIN"))
9060 in_transaction = true;
9061
9062 // ends a transaction
9063 if (!strcmp(((Query_log_event *)ev)->query, "COMMIT")) {
9064 DBUG_ASSERT(in_transaction == true);
9065 in_transaction = false;
9066 }
9067 // starts and ends a transaction
9068 if (is_atomic_ddl_event(ev)) {
9069 DBUG_ASSERT(in_transaction == false);
9070 auto qev = dynamic_cast<Query_log_event *>(ev);
9071 DBUG_ASSERT(qev != nullptr);
9072 res = (qev == nullptr || !xids.insert(qev->ddl_xid).second);
9073 }
9074 break;
9075 }
9076 // ends a transaction
9077 case binary_log::XID_EVENT: {
9078 DBUG_ASSERT(in_transaction == true);
9079 in_transaction = false;
9080 Xid_log_event *xev = dynamic_cast<Xid_log_event *>(ev);
9081 DBUG_ASSERT(xev != nullptr);
9082 res = (xev == nullptr || !xids.insert(xev->xid).second);
9083 break;
9084 }
9085 default: {
9086 break;
9087 }
9088 }
9089
9090 /*
9091 Recorded valid position for the crashed binlog file
9092 which did not contain incorrect events. The following
9093 positions increase the variable valid_pos:
9094
9095 1 -
9096 ...
9097 <---> HERE IS VALID <--->
9098 GTID
9099 BEGIN
9100 ...
9101 COMMIT
9102 ...
9103
9104 2 -
9105 ...
9106 <---> HERE IS VALID <--->
9107 GTID
9108 DDL/UTILITY
9109 ...
9110
9111 In other words, the following positions do not increase
9112 the variable valid_pos:
9113
9114 1 -
9115 GTID
9116 <---> HERE IS VALID <--->
9117 ...
9118
9119 2 -
9120 GTID
9121 BEGIN
9122 <---> HERE IS VALID <--->
9123 ...
9124 */
9125 if (!in_transaction && !is_gtid_event(ev))
9126 *valid_pos = binlog_file_reader->position();
9127
9128 delete ev;
9129 ev = nullptr;
9130 res = it.has_error();
9131 }
9132
9133 /*
9134 Call ha_recover if and only if there is a registered engine that
9135 does 2PC, otherwise in DBUG builds calling ha_recover directly
9136 will result in an assert. (Production builds would be safe since
9137 ha_recover returns right away if total_ha_2pc <= opt_log_bin.)
9138 */
9139 res = res || (total_ha_2pc > 1 && ha_recover(&xids));
9140 }
9141
9142 if (res) LogErr(ERROR_LEVEL, ER_BINLOG_CRASH_RECOVERY_FAILED);
9143 return res;
9144 }
9145
report_missing_purged_gtids(const Gtid_set * slave_executed_gtid_set,const char ** errmsg)9146 void MYSQL_BIN_LOG::report_missing_purged_gtids(
9147 const Gtid_set *slave_executed_gtid_set, const char **errmsg) {
9148 DBUG_TRACE;
9149 THD *thd = current_thd;
9150 Gtid_set gtid_missing(gtid_state->get_lost_gtids()->get_sid_map());
9151 gtid_missing.add_gtid_set(gtid_state->get_lost_gtids());
9152 gtid_missing.remove_gtid_set(slave_executed_gtid_set);
9153
9154 String tmp_uuid;
9155
9156 /* Protects thd->user_vars. */
9157 mysql_mutex_lock(¤t_thd->LOCK_thd_data);
9158 const auto it = current_thd->user_vars.find("slave_uuid");
9159 if (it != current_thd->user_vars.end() && it->second->length() > 0) {
9160 tmp_uuid.copy(it->second->ptr(), it->second->length(), NULL);
9161 }
9162 mysql_mutex_unlock(¤t_thd->LOCK_thd_data);
9163
9164 char *missing_gtids = NULL;
9165 char *slave_executed_gtids = NULL;
9166 gtid_missing.to_string(&missing_gtids, false);
9167 slave_executed_gtid_set->to_string(&slave_executed_gtids, false);
9168
9169 /*
9170 Log the information about the missing purged GTIDs to the error log.
9171 */
9172 std::ostringstream log_info;
9173 log_info << "The missing transactions are '" << missing_gtids << "'";
9174
9175 LogErr(WARNING_LEVEL, ER_FOUND_MISSING_GTIDS, tmp_uuid.ptr(),
9176 log_info.str().c_str());
9177
9178 /*
9179 Send the information about the slave executed GTIDs and missing
9180 purged GTIDs to slave if the message is less than MYSQL_ERRMSG_SIZE.
9181 */
9182 std::ostringstream gtid_info;
9183 gtid_info << "The GTID set sent by the slave is '" << slave_executed_gtids
9184 << "', and the missing transactions are '" << missing_gtids << "'";
9185 *errmsg = ER_THD(thd, ER_MASTER_HAS_PURGED_REQUIRED_GTIDS);
9186
9187 /* Don't consider the "%s" in the format string. Subtract 2 from the
9188 total length */
9189 int total_length = (strlen(*errmsg) - 2 + gtid_info.str().length());
9190
9191 DBUG_EXECUTE_IF("simulate_long_missing_gtids",
9192 { total_length = MYSQL_ERRMSG_SIZE + 1; });
9193
9194 if (total_length > MYSQL_ERRMSG_SIZE)
9195 gtid_info.str(
9196 "The GTID sets and the missing purged transactions are too"
9197 " long to print in this message. For more information,"
9198 " please see the master's error log or the manual for"
9199 " GTID_SUBTRACT");
9200
9201 /* Buffer for formatting the message about the missing GTIDs. */
9202 static char buff[MYSQL_ERRMSG_SIZE];
9203 snprintf(buff, MYSQL_ERRMSG_SIZE, *errmsg, gtid_info.str().c_str());
9204 *errmsg = const_cast<const char *>(buff);
9205
9206 my_free(missing_gtids);
9207 my_free(slave_executed_gtids);
9208 }
9209
report_missing_gtids(const Gtid_set * previous_gtid_set,const Gtid_set * slave_executed_gtid_set,const char ** errmsg)9210 void MYSQL_BIN_LOG::report_missing_gtids(
9211 const Gtid_set *previous_gtid_set, const Gtid_set *slave_executed_gtid_set,
9212 const char **errmsg) {
9213 DBUG_TRACE;
9214 THD *thd = current_thd;
9215 char *missing_gtids = NULL;
9216 char *slave_executed_gtids = NULL;
9217 Gtid_set gtid_missing(slave_executed_gtid_set->get_sid_map());
9218 gtid_missing.add_gtid_set(slave_executed_gtid_set);
9219 gtid_missing.remove_gtid_set(previous_gtid_set);
9220 gtid_missing.to_string(&missing_gtids, false);
9221 slave_executed_gtid_set->to_string(&slave_executed_gtids, false);
9222
9223 String tmp_uuid;
9224
9225 /* Protects thd->user_vars. */
9226 mysql_mutex_lock(¤t_thd->LOCK_thd_data);
9227 const auto it = current_thd->user_vars.find("slave_uuid");
9228 if (it != current_thd->user_vars.end() && it->second->length() > 0) {
9229 tmp_uuid.copy(it->second->ptr(), it->second->length(), NULL);
9230 }
9231 mysql_mutex_unlock(¤t_thd->LOCK_thd_data);
9232
9233 /*
9234 Log the information about the missing purged GTIDs to the error log.
9235 */
9236 std::ostringstream log_info;
9237 log_info << "If the binary log files have been deleted from disk,"
9238 " check the consistency of 'GTID_PURGED' variable."
9239 " The missing transactions are '"
9240 << missing_gtids << "'";
9241 LogErr(WARNING_LEVEL, ER_FOUND_MISSING_GTIDS, tmp_uuid.ptr(),
9242 log_info.str().c_str());
9243 /*
9244 Send the information about the slave executed GTIDs and missing
9245 purged GTIDs to slave if the message is less than MYSQL_ERRMSG_SIZE.
9246 */
9247 std::ostringstream gtid_info;
9248 gtid_info << "The GTID set sent by the slave is '" << slave_executed_gtids
9249 << "', and the missing transactions are '" << missing_gtids << "'";
9250 *errmsg = ER_THD(thd, ER_MASTER_HAS_PURGED_REQUIRED_GTIDS);
9251
9252 /* Don't consider the "%s" in the format string. Subtract 2 from the
9253 total length */
9254 if ((strlen(*errmsg) - 2 + gtid_info.str().length()) > MYSQL_ERRMSG_SIZE)
9255 gtid_info.str(
9256 "The GTID sets and the missing purged transactions are too"
9257 " long to print in this message. For more information,"
9258 " please see the master's error log or the manual for"
9259 " GTID_SUBTRACT");
9260 /* Buffer for formatting the message about the missing GTIDs. */
9261 static char buff[MYSQL_ERRMSG_SIZE];
9262 snprintf(buff, MYSQL_ERRMSG_SIZE, *errmsg, gtid_info.str().c_str());
9263 *errmsg = const_cast<const char *>(buff);
9264 my_free(missing_gtids);
9265 my_free(slave_executed_gtids);
9266 }
9267
update_binlog_end_pos(bool need_lock)9268 void MYSQL_BIN_LOG::update_binlog_end_pos(bool need_lock) {
9269 if (need_lock)
9270 lock_binlog_end_pos();
9271 else
9272 mysql_mutex_assert_owner(&LOCK_binlog_end_pos);
9273 atomic_binlog_end_pos = m_binlog_file->position();
9274 signal_update();
9275 if (need_lock) unlock_binlog_end_pos();
9276 }
9277
update_binlog_end_pos(const char * file,my_off_t pos)9278 inline void MYSQL_BIN_LOG::update_binlog_end_pos(const char *file,
9279 my_off_t pos) {
9280 lock_binlog_end_pos();
9281 if (is_active(file) && (pos > atomic_binlog_end_pos))
9282 atomic_binlog_end_pos = pos;
9283 signal_update();
9284 unlock_binlog_end_pos();
9285 }
9286
is_binlog_cache_empty(bool is_transactional) const9287 bool THD::is_binlog_cache_empty(bool is_transactional) const {
9288 DBUG_TRACE;
9289
9290 // If opt_bin_log==0, it is not safe to call thd_get_cache_mngr
9291 // because binlog_hton has not been completely set up.
9292 DBUG_ASSERT(opt_bin_log);
9293 binlog_cache_mngr *cache_mngr = thd_get_cache_mngr(this);
9294
9295 // cache_mngr is NULL until we call thd->binlog_setup_trx_data, so
9296 // we assert that this has been done.
9297 DBUG_ASSERT(cache_mngr != nullptr);
9298
9299 binlog_cache_data *cache_data =
9300 cache_mngr->get_binlog_cache_data(is_transactional);
9301 DBUG_ASSERT(cache_data != nullptr);
9302
9303 return cache_data->is_binlog_empty();
9304 }
9305
9306 /*
9307 These functions are placed in this file since they need access to
9308 binlog_hton, which has internal linkage.
9309 */
9310
binlog_setup_trx_data()9311 int THD::binlog_setup_trx_data() {
9312 DBUG_TRACE;
9313 binlog_cache_mngr *cache_mngr = thd_get_cache_mngr(this);
9314
9315 if (cache_mngr) return 0; // Already set up
9316
9317 cache_mngr = (binlog_cache_mngr *)my_malloc(key_memory_binlog_cache_mngr,
9318 sizeof(binlog_cache_mngr),
9319 MYF(MY_ZEROFILL));
9320 if (!cache_mngr) {
9321 return 1; // Didn't manage to set it up
9322 }
9323
9324 cache_mngr = new (cache_mngr)
9325 binlog_cache_mngr(&binlog_stmt_cache_use, &binlog_stmt_cache_disk_use,
9326 &binlog_cache_use, &binlog_cache_disk_use);
9327 if (cache_mngr->init()) {
9328 cache_mngr->~binlog_cache_mngr();
9329 my_free(cache_mngr);
9330 return 1;
9331 }
9332
9333 DBUG_PRINT("debug", ("Set ha_data slot %d to 0x%llx", binlog_hton->slot,
9334 (ulonglong)cache_mngr));
9335 thd_set_ha_data(this, binlog_hton, cache_mngr);
9336
9337 return 0;
9338 }
9339
9340 /**
9341
9342 */
register_binlog_handler(THD * thd,bool trx)9343 void register_binlog_handler(THD *thd, bool trx) {
9344 DBUG_TRACE;
9345 /*
9346 If this is the first call to this function while processing a statement,
9347 the transactional cache does not have a savepoint defined. So, in what
9348 follows:
9349 . an implicit savepoint is defined;
9350 . callbacks are registered;
9351 . binary log is set as read/write.
9352
9353 The savepoint allows for truncating the trx-cache transactional changes
9354 fail. Callbacks are necessary to flush caches upon committing or rolling
9355 back a statement or a transaction. However, notifications do not happen
9356 if the binary log is set as read/write.
9357 */
9358 binlog_cache_mngr *cache_mngr = thd_get_cache_mngr(thd);
9359 if (cache_mngr->trx_cache.get_prev_position() == MY_OFF_T_UNDEF) {
9360 /*
9361 Set an implicit savepoint in order to be able to truncate a trx-cache.
9362 */
9363 my_off_t pos = 0;
9364 binlog_trans_log_savepos(thd, &pos);
9365 cache_mngr->trx_cache.set_prev_position(pos);
9366
9367 /*
9368 Set callbacks in order to be able to call commmit or rollback.
9369 */
9370 if (trx) trans_register_ha(thd, true, binlog_hton, nullptr);
9371 trans_register_ha(thd, false, binlog_hton, nullptr);
9372
9373 /*
9374 Set the binary log as read/write otherwise callbacks are not called.
9375 */
9376 thd->get_ha_data(binlog_hton->slot)->ha_info[0].set_trx_read_write();
9377 }
9378 }
9379
9380 /**
9381 Function to start a statement and optionally a transaction for the
9382 binary log.
9383
9384 This function does three things:
9385 - Starts a transaction if not in autocommit mode or if a BEGIN
9386 statement has been seen.
9387
9388 - Start a statement transaction to allow us to truncate the cache.
9389
9390 - Save the currrent binlog position so that we can roll back the
9391 statement by truncating the cache.
9392
9393 We only update the saved position if the old one was undefined,
9394 the reason is that there are some cases (e.g., for CREATE-SELECT)
9395 where the position is saved twice (e.g., both in
9396 Query_result_create::prepare() and THD::binlog_write_table_map()), but
9397 we should use the first. This means that calls to this function
9398 can be used to start the statement before the first table map
9399 event, to include some extra events.
9400
9401 Note however that IMMEDIATE_LOGGING implies that the statement is
9402 written without BEGIN/COMMIT.
9403
9404 @param thd Thread variable
9405 @param start_event The first event requested to be written into the
9406 binary log
9407 */
binlog_start_trans_and_stmt(THD * thd,Log_event * start_event)9408 static int binlog_start_trans_and_stmt(THD *thd, Log_event *start_event) {
9409 DBUG_TRACE;
9410
9411 /*
9412 Initialize the cache manager if this was not done yet.
9413 */
9414 if (thd->binlog_setup_trx_data()) return 1;
9415
9416 /*
9417 Retrieve the appropriated cache.
9418 */
9419 bool is_transactional = start_event->is_using_trans_cache();
9420 binlog_cache_mngr *cache_mngr = thd_get_cache_mngr(thd);
9421 binlog_cache_data *cache_data =
9422 cache_mngr->get_binlog_cache_data(is_transactional);
9423
9424 /*
9425 If the event is requesting immediatly logging, there is no need to go
9426 further down and set savepoint and register callbacks.
9427 */
9428 if (start_event->is_using_immediate_logging()) return 0;
9429
9430 register_binlog_handler(thd, thd->in_multi_stmt_transaction_mode());
9431
9432 /* Transactional DDL is logged traditionally without BEGIN. */
9433 if (is_atomic_ddl_event(start_event)) return 0;
9434
9435 /*
9436 If the cache is empty log "BEGIN" at the beginning of every transaction.
9437 Here, a transaction is either a BEGIN..COMMIT/ROLLBACK block or a single
9438 statement in autocommit mode.
9439 */
9440 if (cache_data->is_binlog_empty()) {
9441 static const char begin[] = "BEGIN";
9442 const char *query = nullptr;
9443 char buf[XID::ser_buf_size];
9444 char xa_start[sizeof("XA START") + 1 + sizeof(buf)];
9445 XID_STATE *xs = thd->get_transaction()->xid_state();
9446 int qlen = sizeof(begin) - 1;
9447
9448 if (is_transactional && xs->has_state(XID_STATE::XA_ACTIVE)) {
9449 /*
9450 XA-prepare logging case.
9451 */
9452 qlen = sprintf(xa_start, "XA START %s", xs->get_xid()->serialize(buf));
9453 query = xa_start;
9454 } else {
9455 /*
9456 Regular transaction case.
9457 */
9458 query = begin;
9459 }
9460
9461 Query_log_event qinfo(thd, query, qlen, is_transactional, false, true, 0,
9462 true);
9463 if (cache_data->write_event(&qinfo)) return 1;
9464 }
9465
9466 return 0;
9467 }
9468
9469 /**
9470 This function writes a table map to the binary log.
9471 Note that in order to keep the signature uniform with related methods,
9472 we use a redundant parameter to indicate whether a transactional table
9473 was changed or not.
9474 Sometimes it will write a Rows_query_log_event into binary log before
9475 the table map too.
9476
9477 @param table a pointer to the table.
9478 @param is_transactional @c true indicates a transactional table,
9479 otherwise @c false a non-transactional.
9480 @param binlog_rows_query @c true indicates a Rows_query log event
9481 will be binlogged before table map,
9482 otherwise @c false indicates it will not
9483 be binlogged.
9484 @return
9485 nonzero if an error pops up when writing the table map event
9486 or the Rows_query log event.
9487 */
binlog_write_table_map(TABLE * table,bool is_transactional,bool binlog_rows_query)9488 int THD::binlog_write_table_map(TABLE *table, bool is_transactional,
9489 bool binlog_rows_query) {
9490 int error;
9491 DBUG_TRACE;
9492 DBUG_PRINT("enter", ("table: %p (%s: #%llu)", table, table->s->table_name.str,
9493 table->s->table_map_id.id()));
9494
9495 /* Pre-conditions */
9496 DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
9497 DBUG_ASSERT(table->s->table_map_id.is_valid());
9498
9499 Table_map_log_event the_event(this, table, table->s->table_map_id,
9500 is_transactional);
9501
9502 binlog_start_trans_and_stmt(this, &the_event);
9503
9504 binlog_cache_mngr *const cache_mngr = thd_get_cache_mngr(this);
9505
9506 binlog_cache_data *cache_data =
9507 cache_mngr->get_binlog_cache_data(is_transactional);
9508
9509 if (binlog_rows_query && this->query().str) {
9510 /* Write the Rows_query_log_event into binlog before the table map */
9511 Rows_query_log_event rows_query_ev(this, this->query().str,
9512 this->query().length);
9513 if ((error = cache_data->write_event(&rows_query_ev))) return error;
9514 }
9515
9516 if ((error = cache_data->write_event(&the_event))) return error;
9517
9518 binlog_table_maps++;
9519 return 0;
9520 }
9521
9522 /**
9523 This function retrieves a pending row event from a cache which is
9524 specified through the parameter @c is_transactional. Respectively, when it
9525 is @c true, the pending event is returned from the transactional cache.
9526 Otherwise from the non-transactional cache.
9527
9528 @param is_transactional @c true indicates a transactional cache,
9529 otherwise @c false a non-transactional.
9530 @return
9531 The row event if any.
9532 */
binlog_get_pending_rows_event(bool is_transactional) const9533 Rows_log_event *THD::binlog_get_pending_rows_event(
9534 bool is_transactional) const {
9535 Rows_log_event *rows = nullptr;
9536 binlog_cache_mngr *const cache_mngr = thd_get_cache_mngr(this);
9537
9538 /*
9539 This is less than ideal, but here's the story: If there is no cache_mngr,
9540 prepare_pending_rows_event() has never been called (since the cache_mngr
9541 is set up there). In that case, we just return NULL.
9542 */
9543 if (cache_mngr) {
9544 binlog_cache_data *cache_data =
9545 cache_mngr->get_binlog_cache_data(is_transactional);
9546
9547 rows = cache_data->pending();
9548 }
9549 return (rows);
9550 }
9551
9552 /**
9553 @param db_param db name c-string to be inserted into alphabetically sorted
9554 THD::binlog_accessed_db_names list.
9555
9556 Note, that space for both the data and the node
9557 struct are allocated in THD::main_mem_root.
9558 The list lasts for the top-level query time and is reset
9559 in @c THD::cleanup_after_query().
9560 */
add_to_binlog_accessed_dbs(const char * db_param)9561 void THD::add_to_binlog_accessed_dbs(const char *db_param) {
9562 char *after_db;
9563 /*
9564 binlog_accessed_db_names list is to maintain the database
9565 names which are referenced in a given command.
9566 Prior to bug 17806014 fix, 'main_mem_root' memory root used
9567 to store this list. The 'main_mem_root' scope is till the end
9568 of the query. Hence it caused increasing memory consumption
9569 problem in big procedures like the ones mentioned below.
9570 Eg: CALL p1() where p1 is having 1,00,000 create and drop tables.
9571 'main_mem_root' is freed only at the end of the command CALL p1()'s
9572 execution. But binlog_accessed_db_names list scope is only till the
9573 individual statements specified the procedure(create/drop statements).
9574 Hence the memory allocated in 'main_mem_root' was left uncleared
9575 until the p1's completion, even though it is not required after
9576 completion of individual statements.
9577
9578 Instead of using 'main_mem_root' whose scope is complete query execution,
9579 now the memroot is changed to use 'thd->mem_root' whose scope is until the
9580 individual statement in CALL p1(). 'thd->mem_root' is set to
9581 'execute_mem_root' in the context of procedure and it's scope is till the
9582 individual statement in CALL p1() and thd->memroot is equal to
9583 'main_mem_root' in the context of a normal 'top level query'.
9584
9585 Eg: a) create table t1(i int); => If this function is called while
9586 processing this statement, thd->memroot is equal to &main_mem_root
9587 which will be freed immediately after executing this statement.
9588 b) CALL p1() -> p1 contains create table t1(i int); => If this function
9589 is called while processing create table statement which is inside
9590 a stored procedure, then thd->memroot is equal to 'execute_mem_root'
9591 which will be freed immediately after executing this statement.
9592 In both a and b case, thd->memroot will be freed immediately and will not
9593 increase memory consumption.
9594
9595 A special case(stored functions/triggers):
9596 Consider the following example:
9597 create function f1(i int) returns int
9598 begin
9599 insert into db1.t1 values (1);
9600 insert into db2.t1 values (2);
9601 end;
9602 When we are processing SELECT f1(), the list should contain db1, db2 names.
9603 Since thd->mem_root contains 'execute_mem_root' in the context of
9604 stored function, the mem root will be freed after adding db1 in
9605 the list and when we are processing the second statement and when we try
9606 to add 'db2' in the db1's list, it will lead to crash as db1's memory
9607 is already freed. To handle this special case, if in_sub_stmt is set
9608 (which is true incase of stored functions/triggers), we use &main_mem_root,
9609 if not set we will use thd->memroot which changes it's value to
9610 'execute_mem_root' or '&main_mem_root' depends on the context.
9611 */
9612 MEM_ROOT *db_mem_root = in_sub_stmt ? &main_mem_root : mem_root;
9613
9614 if (!binlog_accessed_db_names)
9615 binlog_accessed_db_names = new (db_mem_root) List<char>;
9616
9617 if (binlog_accessed_db_names->elements > MAX_DBS_IN_EVENT_MTS) {
9618 push_warning_printf(
9619 this, Sql_condition::SL_WARNING, ER_MTS_UPDATED_DBS_GREATER_MAX,
9620 ER_THD(this, ER_MTS_UPDATED_DBS_GREATER_MAX), MAX_DBS_IN_EVENT_MTS);
9621 return;
9622 }
9623
9624 after_db = strdup_root(db_mem_root, db_param);
9625
9626 /*
9627 sorted insertion is implemented with first rearranging data
9628 (pointer to char*) of the links and final appending of the least
9629 ordered data to create a new link in the list.
9630 */
9631 if (binlog_accessed_db_names->elements != 0) {
9632 List_iterator<char> it(*get_binlog_accessed_db_names());
9633
9634 while (it++) {
9635 char *swap = nullptr;
9636 char **ref_cur_db = it.ref();
9637 int cmp = strcmp(after_db, *ref_cur_db);
9638
9639 DBUG_ASSERT(!swap || cmp < 0);
9640
9641 if (cmp == 0) {
9642 after_db = nullptr; /* dup to ignore */
9643 break;
9644 } else if (swap || cmp > 0) {
9645 swap = *ref_cur_db;
9646 *ref_cur_db = after_db;
9647 after_db = swap;
9648 }
9649 }
9650 }
9651 if (after_db) binlog_accessed_db_names->push_back(after_db, db_mem_root);
9652 }
9653
9654 /*
9655 Tells if two (or more) tables have auto_increment columns and we want to
9656 lock those tables with a write lock.
9657
9658 SYNOPSIS
9659 has_two_write_locked_tables_with_auto_increment
9660 tables Table list
9661
9662 NOTES:
9663 Call this function only when you have established the list of all tables
9664 which you'll want to update (including stored functions, triggers, views
9665 inside your statement).
9666 */
9667
has_write_table_with_auto_increment(TABLE_LIST * tables)9668 static bool has_write_table_with_auto_increment(TABLE_LIST *tables) {
9669 for (TABLE_LIST *table = tables; table; table = table->next_global) {
9670 /* we must do preliminary checks as table->table may be NULL */
9671 if (!table->is_placeholder() && table->table->found_next_number_field &&
9672 (table->lock_descriptor().type >= TL_WRITE_ALLOW_WRITE))
9673 return true;
9674 }
9675
9676 return false;
9677 }
9678
9679 /*
9680 checks if we have select tables in the table list and write tables
9681 with auto-increment column.
9682
9683 SYNOPSIS
9684 has_two_write_locked_tables_with_auto_increment_and_select
9685 tables Table list
9686
9687 RETURN VALUES
9688
9689 -true if the table list has atleast one table with auto-increment column
9690
9691
9692 and atleast one table to select from.
9693 -false otherwise
9694 */
9695
has_write_table_with_auto_increment_and_select(TABLE_LIST * tables)9696 static bool has_write_table_with_auto_increment_and_select(TABLE_LIST *tables) {
9697 bool has_select = false;
9698 bool has_auto_increment_tables = has_write_table_with_auto_increment(tables);
9699 for (TABLE_LIST *table = tables; table; table = table->next_global) {
9700 if (!table->is_placeholder() &&
9701 (table->lock_descriptor().type <= TL_READ_NO_INSERT)) {
9702 has_select = true;
9703 break;
9704 }
9705 }
9706 return (has_select && has_auto_increment_tables);
9707 }
9708
9709 /*
9710 Tells if there is a table whose auto_increment column is a part
9711 of a compound primary key while is not the first column in
9712 the table definition.
9713
9714 @param tables Table list
9715
9716 @return true if the table exists, fais if does not.
9717 */
9718
has_write_table_auto_increment_not_first_in_pk(TABLE_LIST * tables)9719 static bool has_write_table_auto_increment_not_first_in_pk(TABLE_LIST *tables) {
9720 for (TABLE_LIST *table = tables; table; table = table->next_global) {
9721 /* we must do preliminary checks as table->table may be NULL */
9722 if (!table->is_placeholder() && table->table->found_next_number_field &&
9723 (table->lock_descriptor().type >= TL_WRITE_ALLOW_WRITE) &&
9724 table->table->s->next_number_keypart != 0)
9725 return true;
9726 }
9727
9728 return false;
9729 }
9730
9731 /**
9732 Checks if a table has a column with a non-deterministic DEFAULT expression.
9733 */
has_nondeterministic_default(const TABLE * table)9734 static bool has_nondeterministic_default(const TABLE *table) {
9735 return std::any_of(
9736 table->field, table->field + table->s->fields, [](const Field *field) {
9737 return field->m_default_val_expr != nullptr &&
9738 field->m_default_val_expr->get_stmt_unsafe_flags() != 0;
9739 });
9740 }
9741
9742 /**
9743 Checks if a TABLE_LIST contains a table that has been opened for writing, and
9744 that has a column with a non-deterministic DEFAULT expression.
9745 */
has_write_table_with_nondeterministic_default(const TABLE_LIST * tables)9746 static bool has_write_table_with_nondeterministic_default(
9747 const TABLE_LIST *tables) {
9748 for (const TABLE_LIST *table = tables; table != nullptr;
9749 table = table->next_global) {
9750 /* we must do preliminary checks as table->table may be NULL */
9751 if (!table->is_placeholder() &&
9752 table->lock_descriptor().type >= TL_WRITE_ALLOW_WRITE &&
9753 has_nondeterministic_default(table->table))
9754 return true;
9755 }
9756 return false;
9757 }
9758
9759 /*
9760 Function to check whether the table in query uses a fulltext parser
9761 plugin or not.
9762
9763 @param s - table share pointer.
9764
9765 @retval true - The table uses fulltext parser plugin.
9766 @retval false - Otherwise.
9767 */
fulltext_unsafe_set(TABLE_SHARE * s)9768 static bool inline fulltext_unsafe_set(TABLE_SHARE *s) {
9769 for (unsigned int i = 0; i < s->keys; i++) {
9770 if ((s->key_info[i].flags & HA_USES_PARSER) && s->keys_in_use.is_set(i))
9771 return true;
9772 }
9773 return false;
9774 }
9775 #ifndef DBUG_OFF
get_locked_tables_mode_name(enum_locked_tables_mode locked_tables_mode)9776 const char *get_locked_tables_mode_name(
9777 enum_locked_tables_mode locked_tables_mode) {
9778 switch (locked_tables_mode) {
9779 case LTM_NONE:
9780 return "LTM_NONE";
9781 case LTM_LOCK_TABLES:
9782 return "LTM_LOCK_TABLES";
9783 case LTM_PRELOCKED:
9784 return "LTM_PRELOCKED";
9785 case LTM_PRELOCKED_UNDER_LOCK_TABLES:
9786 return "LTM_PRELOCKED_UNDER_LOCK_TABLES";
9787 default:
9788 return "Unknown table lock mode";
9789 }
9790 }
9791 #endif
9792
9793 /**
9794 Decide on logging format to use for the statement and issue errors
9795 or warnings as needed. The decision depends on the following
9796 parameters:
9797
9798 - The logging mode, i.e., the value of binlog_format. Can be
9799 statement, mixed, or row.
9800
9801 - The type of statement. There are three types of statements:
9802 "normal" safe statements; unsafe statements; and row injections.
9803 An unsafe statement is one that, if logged in statement format,
9804 might produce different results when replayed on the slave (e.g.,
9805 queries with a LIMIT clause). A row injection is either a BINLOG
9806 statement, or a row event executed by the slave's SQL thread.
9807
9808 - The capabilities of tables modified by the statement. The
9809 *capabilities vector* for a table is a set of flags associated
9810 with the table. Currently, it only includes two flags: *row
9811 capability flag* and *statement capability flag*.
9812
9813 The row capability flag is set if and only if the engine can
9814 handle row-based logging. The statement capability flag is set if
9815 and only if the table can handle statement-based logging.
9816
9817 Decision table for logging format
9818 ---------------------------------
9819
9820 The following table summarizes how the format and generated
9821 warning/error depends on the tables' capabilities, the statement
9822 type, and the current binlog_format.
9823
9824 Row capable N NNNNNNNNN YYYYYYYYY YYYYYYYYY
9825 Statement capable N YYYYYYYYY NNNNNNNNN YYYYYYYYY
9826
9827 Statement type * SSSUUUIII SSSUUUIII SSSUUUIII
9828
9829 binlog_format * SMRSMRSMR SMRSMRSMR SMRSMRSMR
9830
9831 Logged format - SS-S----- -RR-RR-RR SRRSRR-RR
9832 Warning/Error 1 --2732444 5--5--6-- ---7--6--
9833
9834 Legend
9835 ------
9836
9837 Row capable: N - Some table not row-capable, Y - All tables row-capable
9838 Stmt capable: N - Some table not stmt-capable, Y - All tables stmt-capable
9839 Statement type: (S)afe, (U)nsafe, or Row (I)njection
9840 binlog_format: (S)TATEMENT, (M)IXED, or (R)OW
9841 Logged format: (S)tatement or (R)ow
9842 Warning/Error: Warnings and error messages are as follows:
9843
9844 1. Error: Cannot execute statement: binlogging impossible since both
9845 row-incapable engines and statement-incapable engines are
9846 involved.
9847
9848 2. Error: Cannot execute statement: binlogging impossible since
9849 BINLOG_FORMAT = ROW and at least one table uses a storage engine
9850 limited to statement-logging.
9851
9852 3. Error: Cannot execute statement: binlogging of unsafe statement
9853 is impossible when storage engine is limited to statement-logging
9854 and BINLOG_FORMAT = MIXED.
9855
9856 4. Error: Cannot execute row injection: binlogging impossible since
9857 at least one table uses a storage engine limited to
9858 statement-logging.
9859
9860 5. Error: Cannot execute statement: binlogging impossible since
9861 BINLOG_FORMAT = STATEMENT and at least one table uses a storage
9862 engine limited to row-logging.
9863
9864 6. Error: Cannot execute row injection: binlogging impossible since
9865 BINLOG_FORMAT = STATEMENT.
9866
9867 7. Warning: Unsafe statement binlogged in statement format since
9868 BINLOG_FORMAT = STATEMENT.
9869
9870 In addition, we can produce the following error (not depending on
9871 the variables of the decision diagram):
9872
9873 8. Error: Cannot execute statement: binlogging impossible since more
9874 than one engine is involved and at least one engine is
9875 self-logging.
9876
9877 9. Error: Do not allow users to modify a gtid_executed table
9878 explicitly by a XA transaction.
9879
9880 For each error case above, the statement is prevented from being
9881 logged, we report an error, and roll back the statement. For
9882 warnings, we set the thd->binlog_flags variable: the warning will be
9883 printed only if the statement is successfully logged.
9884
9885 @see THD::binlog_query
9886
9887 @param[in] tables Tables involved in the query
9888
9889 @retval 0 No error; statement can be logged.
9890 @retval -1 One of the error conditions above applies (1, 2, 4, 5, 6 or 9).
9891 */
9892
decide_logging_format(TABLE_LIST * tables)9893 int THD::decide_logging_format(TABLE_LIST *tables) {
9894 DBUG_TRACE;
9895 DBUG_PRINT("info", ("query: %s", query().str));
9896 DBUG_PRINT("info", ("variables.binlog_format: %lu", variables.binlog_format));
9897 DBUG_PRINT("info", ("lex->get_stmt_unsafe_flags(): 0x%x",
9898 lex->get_stmt_unsafe_flags()));
9899
9900 #if defined(ENABLED_DEBUG_SYNC)
9901 if (!is_attachable_ro_transaction_active())
9902 DEBUG_SYNC(this, "begin_decide_logging_format");
9903 #endif
9904
9905 reset_binlog_local_stmt_filter();
9906
9907 /*
9908 We should not decide logging format if the binlog is closed or
9909 binlogging is off, or if the statement is filtered out from the
9910 binlog by filtering rules.
9911 */
9912 if (mysql_bin_log.is_open() && (variables.option_bits & OPTION_BIN_LOG) &&
9913 !(variables.binlog_format == BINLOG_FORMAT_STMT &&
9914 !binlog_filter->db_ok(m_db.str))) {
9915 /*
9916 Compute one bit field with the union of all the engine
9917 capabilities, and one with the intersection of all the engine
9918 capabilities.
9919 */
9920 handler::Table_flags flags_write_some_set = 0;
9921 handler::Table_flags flags_access_some_set = 0;
9922 handler::Table_flags flags_write_all_set =
9923 HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE;
9924
9925 /*
9926 If different types of engines are about to be updated.
9927 For example: Innodb and Falcon; Innodb and MyIsam.
9928 */
9929 bool multi_write_engine = false;
9930 /*
9931 If different types of engines are about to be accessed
9932 and any of them is about to be updated. For example:
9933 Innodb and Falcon; Innodb and MyIsam.
9934 */
9935 bool multi_access_engine = false;
9936 /*
9937 Track if statement creates or drops a temporary table
9938 and log in ROW if it does.
9939 */
9940 bool is_create_drop_temp_table = false;
9941 /*
9942 Identifies if a table is changed.
9943 */
9944 bool is_write = false;
9945 /*
9946 A pointer to a previous table that was changed.
9947 */
9948 TABLE *prev_write_table = nullptr;
9949 /*
9950 A pointer to a previous table that was accessed.
9951 */
9952 TABLE *prev_access_table = nullptr;
9953 /*
9954 True if at least one table is transactional.
9955 */
9956 bool write_to_some_transactional_table = false;
9957 /*
9958 True if at least one table is non-transactional.
9959 */
9960 bool write_to_some_non_transactional_table = false;
9961 /*
9962 True if all non-transactional tables that has been updated
9963 are temporary.
9964 */
9965 bool write_all_non_transactional_are_tmp_tables = true;
9966 /**
9967 The number of tables used in the current statement,
9968 that should be replicated.
9969 */
9970 uint replicated_tables_count = 0;
9971 /**
9972 The number of tables written to in the current statement,
9973 that should not be replicated.
9974 A table should not be replicated when it is considered
9975 'local' to a MySQL instance.
9976 Currently, these tables are:
9977 - mysql.slow_log
9978 - mysql.general_log
9979 - mysql.slave_relay_log_info
9980 - mysql.slave_master_info
9981 - mysql.slave_worker_info
9982 - performance_schema.*
9983 - TODO: information_schema.*
9984 In practice, from this list, only performance_schema.* tables
9985 are written to by user queries.
9986 */
9987 uint non_replicated_tables_count = 0;
9988 /**
9989 Indicate whether we alreadly reported a warning
9990 on modifying gtid_executed table.
9991 */
9992 int warned_gtid_executed_table = 0;
9993 #ifndef DBUG_OFF
9994 {
9995 DBUG_PRINT("debug", ("prelocked_mode: %s",
9996 get_locked_tables_mode_name(locked_tables_mode)));
9997 }
9998 #endif
9999
10000 if (variables.binlog_format != BINLOG_FORMAT_ROW && tables) {
10001 /*
10002 DML statements that modify a table with an auto_increment column based
10003 on rows selected from a table are unsafe as the order in which the rows
10004 are fetched fron the select tables cannot be determined and may differ
10005 on master and slave.
10006 */
10007 if (has_write_table_with_auto_increment_and_select(tables))
10008 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_WRITE_AUTOINC_SELECT);
10009
10010 if (has_write_table_auto_increment_not_first_in_pk(tables))
10011 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_NOT_FIRST);
10012
10013 /*
10014 A query that modifies autoinc column in sub-statement can make the
10015 master and slave inconsistent.
10016 We can solve these problems in mixed mode by switching to binlogging
10017 if at least one updated table is used by sub-statement
10018 */
10019 if (lex->requires_prelocking() &&
10020 has_write_table_with_auto_increment(lex->first_not_own_table()))
10021 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_AUTOINC_COLUMNS);
10022
10023 /*
10024 A query that modifies a table with a non-deterministic column default
10025 expression in a substatement, can make the master and the slave
10026 inconsistent. Switch to row logging in mixed mode, and raise a warning
10027 in statement mode.
10028 */
10029 if (lex->requires_prelocking() &&
10030 has_write_table_with_nondeterministic_default(
10031 lex->first_not_own_table()))
10032 lex->set_stmt_unsafe(
10033 LEX::BINLOG_STMT_UNSAFE_DEFAULT_EXPRESSION_IN_SUBSTATEMENT);
10034 }
10035
10036 /*
10037 Get the capabilities vector for all involved storage engines and
10038 mask out the flags for the binary log.
10039 */
10040 for (TABLE_LIST *table = tables; table; table = table->next_global) {
10041 if (table->is_placeholder()) {
10042 /*
10043 Detect if this is a CREATE TEMPORARY or DROP of a
10044 temporary table. This will be used later in determining whether to
10045 log in ROW or STMT if MIXED replication is being used.
10046 */
10047 if (!is_create_drop_temp_table && !table->table &&
10048 ((lex->sql_command == SQLCOM_CREATE_TABLE &&
10049 (lex->create_info->options & HA_LEX_CREATE_TMP_TABLE)) ||
10050 ((lex->sql_command == SQLCOM_DROP_TABLE ||
10051 lex->sql_command == SQLCOM_TRUNCATE) &&
10052 find_temporary_table(this, table)))) {
10053 is_create_drop_temp_table = true;
10054 }
10055 continue;
10056 }
10057 handler::Table_flags const flags = table->table->file->ha_table_flags();
10058
10059 DBUG_PRINT("info", ("table: %s; ha_table_flags: 0x%llx",
10060 table->table_name, flags));
10061
10062 if (table->table->no_replicate) {
10063 if (!warned_gtid_executed_table) {
10064 warned_gtid_executed_table =
10065 gtid_state->warn_or_err_on_modify_gtid_table(this, table);
10066 /*
10067 Do not allow users to modify the gtid_executed table
10068 explicitly by a XA transaction.
10069 */
10070 if (warned_gtid_executed_table == 2) return -1;
10071 }
10072 /*
10073 The statement uses a table that is not replicated.
10074 The following properties about the table:
10075 - persistent / transient
10076 - transactional / non transactional
10077 - temporary / permanent
10078 - read or write
10079 - multiple engines involved because of this table
10080 are not relevant, as this table is completely ignored.
10081 Because the statement uses a non replicated table,
10082 using STATEMENT format in the binlog is impossible.
10083 Either this statement will be discarded entirely,
10084 or it will be logged (possibly partially) in ROW format.
10085 */
10086 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_SYSTEM_TABLE);
10087
10088 if (table->lock_descriptor().type >= TL_WRITE_ALLOW_WRITE) {
10089 non_replicated_tables_count++;
10090 continue;
10091 }
10092 }
10093
10094 replicated_tables_count++;
10095
10096 bool trans = table->table->file->has_transactions();
10097
10098 if (table->lock_descriptor().type >= TL_WRITE_ALLOW_WRITE) {
10099 write_to_some_transactional_table =
10100 write_to_some_transactional_table || trans;
10101
10102 write_to_some_non_transactional_table =
10103 write_to_some_non_transactional_table || !trans;
10104
10105 if (prev_write_table &&
10106 prev_write_table->file->ht != table->table->file->ht)
10107 multi_write_engine = true;
10108
10109 if (table->table->s->tmp_table)
10110 lex->set_stmt_accessed_table(
10111 trans ? LEX::STMT_WRITES_TEMP_TRANS_TABLE
10112 : LEX::STMT_WRITES_TEMP_NON_TRANS_TABLE);
10113 else
10114 lex->set_stmt_accessed_table(trans
10115 ? LEX::STMT_WRITES_TRANS_TABLE
10116 : LEX::STMT_WRITES_NON_TRANS_TABLE);
10117
10118 /*
10119 Non-transactional updates are allowed when row binlog format is
10120 used and all non-transactional tables are temporary.
10121 Binlog format is checked on THD::is_dml_gtid_compatible() method.
10122 */
10123 if (!trans)
10124 write_all_non_transactional_are_tmp_tables =
10125 write_all_non_transactional_are_tmp_tables &&
10126 table->table->s->tmp_table;
10127
10128 flags_write_all_set &= flags;
10129 flags_write_some_set |= flags;
10130 is_write = true;
10131
10132 prev_write_table = table->table;
10133
10134 /*
10135 It should be marked unsafe if a table which uses a fulltext parser
10136 plugin is modified. See also bug#48183.
10137 */
10138 if (!lex->is_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_FULLTEXT_PLUGIN)) {
10139 if (fulltext_unsafe_set(table->table->s))
10140 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_FULLTEXT_PLUGIN);
10141 }
10142 /*
10143 INSERT...ON DUPLICATE KEY UPDATE on a table with more than one unique
10144 keys can be unsafe. Check for it if the flag is already not marked for
10145 the given statement.
10146 */
10147 if (!lex->is_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS) &&
10148 lex->sql_command == SQLCOM_INSERT &&
10149 lex->duplicates == DUP_UPDATE) {
10150 uint keys = table->table->s->keys, i = 0, unique_keys = 0;
10151 for (KEY *keyinfo = table->table->s->key_info;
10152 i < keys && unique_keys <= 1; i++, keyinfo++) {
10153 if (keyinfo->flags & HA_NOSAME) unique_keys++;
10154 }
10155 if (unique_keys > 1)
10156 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_INSERT_TWO_KEYS);
10157 }
10158 }
10159 if (lex->get_using_match()) {
10160 if (fulltext_unsafe_set(table->table->s))
10161 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_FULLTEXT_PLUGIN);
10162 }
10163
10164 flags_access_some_set |= flags;
10165
10166 if (lex->sql_command != SQLCOM_CREATE_TABLE ||
10167 (lex->sql_command == SQLCOM_CREATE_TABLE &&
10168 ((lex->create_info->options & HA_LEX_CREATE_TMP_TABLE) ||
10169 (table->lock_descriptor().type < TL_WRITE_ALLOW_WRITE)))) {
10170 if (table->table->s->tmp_table)
10171 lex->set_stmt_accessed_table(
10172 trans ? LEX::STMT_READS_TEMP_TRANS_TABLE
10173 : LEX::STMT_READS_TEMP_NON_TRANS_TABLE);
10174 else
10175 lex->set_stmt_accessed_table(trans ? LEX::STMT_READS_TRANS_TABLE
10176 : LEX::STMT_READS_NON_TRANS_TABLE);
10177 }
10178
10179 if (prev_access_table &&
10180 prev_access_table->file->ht != table->table->file->ht)
10181 multi_access_engine = true;
10182
10183 prev_access_table = table->table;
10184 }
10185 DBUG_ASSERT(!is_write || write_to_some_transactional_table ||
10186 write_to_some_non_transactional_table);
10187 /*
10188 write_all_non_transactional_are_tmp_tables may be true if any
10189 non-transactional table was not updated, so we fix its value here.
10190 */
10191 write_all_non_transactional_are_tmp_tables =
10192 write_all_non_transactional_are_tmp_tables &&
10193 write_to_some_non_transactional_table;
10194
10195 DBUG_PRINT("info", ("flags_write_all_set: 0x%llx", flags_write_all_set));
10196 DBUG_PRINT("info", ("flags_write_some_set: 0x%llx", flags_write_some_set));
10197 DBUG_PRINT("info",
10198 ("flags_access_some_set: 0x%llx", flags_access_some_set));
10199 DBUG_PRINT("info", ("multi_write_engine: %d", multi_write_engine));
10200 DBUG_PRINT("info", ("multi_access_engine: %d", multi_access_engine));
10201
10202 int error = 0;
10203 int unsafe_flags;
10204
10205 /*
10206 With transactional data dictionary, CREATE TABLE runs as one statement
10207 in a multi-statement transaction internally. Revert this for the
10208 purposes of determining mixed statement safety.
10209 */
10210 const bool multi_stmt_trans = lex->sql_command != SQLCOM_CREATE_TABLE &&
10211 in_multi_stmt_transaction_mode();
10212 bool trans_table = trans_has_updated_trans_table(this);
10213 bool binlog_direct = variables.binlog_direct_non_trans_update;
10214
10215 if (lex->is_mixed_stmt_unsafe(multi_stmt_trans, binlog_direct, trans_table,
10216 tx_isolation))
10217 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_MIXED_STATEMENT);
10218 else if (multi_stmt_trans && trans_table && !binlog_direct &&
10219 lex->stmt_accessed_table(LEX::STMT_WRITES_NON_TRANS_TABLE))
10220 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_NONTRANS_AFTER_TRANS);
10221
10222 /*
10223 If more than one engine is involved in the statement and at
10224 least one is doing it's own logging (is *self-logging*), the
10225 statement cannot be logged atomically, so we generate an error
10226 rather than allowing the binlog to become corrupt.
10227 */
10228 if (multi_write_engine && (flags_write_some_set & HA_HAS_OWN_BINLOGGING))
10229 my_error((error = ER_BINLOG_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE),
10230 MYF(0));
10231 else if (multi_access_engine &&
10232 flags_access_some_set & HA_HAS_OWN_BINLOGGING)
10233 lex->set_stmt_unsafe(
10234 LEX::BINLOG_STMT_UNSAFE_MULTIPLE_ENGINES_AND_SELF_LOGGING_ENGINE);
10235
10236 /* XA is unsafe for statements */
10237 if (is_write &&
10238 !get_transaction()->xid_state()->has_state(XID_STATE::XA_NOTR))
10239 lex->set_stmt_unsafe(LEX::BINLOG_STMT_UNSAFE_XA);
10240
10241 DBUG_EXECUTE_IF("make_stmt_only_engines",
10242 { flags_write_all_set = HA_BINLOG_STMT_CAPABLE; };);
10243
10244 /* both statement-only and row-only engines involved */
10245 if ((flags_write_all_set &
10246 (HA_BINLOG_STMT_CAPABLE | HA_BINLOG_ROW_CAPABLE)) == 0) {
10247 /*
10248 1. Error: Binary logging impossible since both row-incapable
10249 engines and statement-incapable engines are involved
10250 */
10251 my_error((error = ER_BINLOG_ROW_ENGINE_AND_STMT_ENGINE), MYF(0));
10252 }
10253 /* statement-only engines involved */
10254 else if ((flags_write_all_set & HA_BINLOG_ROW_CAPABLE) == 0) {
10255 if (lex->is_stmt_row_injection()) {
10256 /*
10257 4. Error: Cannot execute row injection since table uses
10258 storage engine limited to statement-logging
10259 */
10260 my_error((error = ER_BINLOG_ROW_INJECTION_AND_STMT_ENGINE), MYF(0));
10261 } else if (variables.binlog_format == BINLOG_FORMAT_ROW &&
10262 sqlcom_can_generate_row_events(this->lex->sql_command)) {
10263 /*
10264 2. Error: Cannot modify table that uses a storage engine
10265 limited to statement-logging when BINLOG_FORMAT = ROW
10266 */
10267 my_error((error = ER_BINLOG_ROW_MODE_AND_STMT_ENGINE), MYF(0));
10268 } else if (variables.binlog_format == BINLOG_FORMAT_MIXED &&
10269 ((unsafe_flags = lex->get_stmt_unsafe_flags()) != 0)) {
10270 /*
10271 3. Error: Cannot execute statement: binlogging of unsafe
10272 statement is impossible when storage engine is limited to
10273 statement-logging and BINLOG_FORMAT = MIXED.
10274 */
10275 for (int unsafe_type = 0; unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT;
10276 unsafe_type++)
10277 if (unsafe_flags & (1 << unsafe_type))
10278 my_error(
10279 (error = ER_BINLOG_UNSAFE_AND_STMT_ENGINE), MYF(0),
10280 ER_THD_NONCONST(current_thd,
10281 LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
10282 } else if (is_write &&
10283 ((unsafe_flags = lex->get_stmt_unsafe_flags()) != 0)) {
10284 /*
10285 7. Warning: Unsafe statement logged as statement due to
10286 binlog_format = STATEMENT
10287 */
10288 binlog_unsafe_warning_flags |= unsafe_flags;
10289 DBUG_PRINT("info", ("Scheduling warning to be issued by "
10290 "binlog_query: '%s'",
10291 ER_THD(current_thd, ER_BINLOG_UNSAFE_STATEMENT)));
10292 DBUG_PRINT("info", ("binlog_unsafe_warning_flags: 0x%x",
10293 binlog_unsafe_warning_flags));
10294 }
10295 /* log in statement format! */
10296 }
10297 /* no statement-only engines */
10298 else {
10299 /* binlog_format = STATEMENT */
10300 if (variables.binlog_format == BINLOG_FORMAT_STMT) {
10301 if (lex->is_stmt_row_injection()) {
10302 /*
10303 6. Error: Cannot execute row injection since
10304 BINLOG_FORMAT = STATEMENT
10305 */
10306 my_error((error = ER_BINLOG_ROW_INJECTION_AND_STMT_MODE), MYF(0));
10307 } else if ((flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0 &&
10308 sqlcom_can_generate_row_events(this->lex->sql_command)) {
10309 /*
10310 5. Error: Cannot modify table that uses a storage engine
10311 limited to row-logging when binlog_format = STATEMENT
10312 */
10313 my_error((error = ER_BINLOG_STMT_MODE_AND_ROW_ENGINE), MYF(0), "");
10314 } else if (is_write &&
10315 (unsafe_flags = lex->get_stmt_unsafe_flags()) != 0) {
10316 /*
10317 7. Warning: Unsafe statement logged as statement due to
10318 binlog_format = STATEMENT
10319 */
10320 binlog_unsafe_warning_flags |= unsafe_flags;
10321 DBUG_PRINT("info", ("Scheduling warning to be issued by "
10322 "binlog_query: '%s'",
10323 ER_THD(current_thd, ER_BINLOG_UNSAFE_STATEMENT)));
10324 DBUG_PRINT("info", ("binlog_unsafe_warning_flags: 0x%x",
10325 binlog_unsafe_warning_flags));
10326 }
10327 /* log in statement format! */
10328 }
10329 /* No statement-only engines and binlog_format != STATEMENT.
10330 I.e., nothing prevents us from row logging if needed. */
10331 else {
10332 if (lex->is_stmt_unsafe() || lex->is_stmt_row_injection() ||
10333 lex->is_stmt_unsafe_with_mixed_mode() ||
10334 (flags_write_all_set & HA_BINLOG_STMT_CAPABLE) == 0 ||
10335 lex->stmt_accessed_table(LEX::STMT_READS_TEMP_TRANS_TABLE) ||
10336 lex->stmt_accessed_table(LEX::STMT_READS_TEMP_NON_TRANS_TABLE) ||
10337 is_create_drop_temp_table) {
10338 #ifndef DBUG_OFF
10339 int flags = lex->get_stmt_unsafe_flags();
10340 DBUG_PRINT("info", ("setting row format for unsafe statement"));
10341 for (int i = 0; i < Query_tables_list::BINLOG_STMT_UNSAFE_COUNT;
10342 i++) {
10343 if (flags & (1 << i))
10344 DBUG_PRINT(
10345 "info",
10346 ("unsafe reason: %s",
10347 ER_THD_NONCONST(
10348 current_thd,
10349 Query_tables_list::binlog_stmt_unsafe_errcode[i])));
10350 }
10351 DBUG_PRINT("info",
10352 ("is_row_injection=%d", lex->is_stmt_row_injection()));
10353 DBUG_PRINT("info", ("stmt_capable=%llu",
10354 (flags_write_all_set & HA_BINLOG_STMT_CAPABLE)));
10355 DBUG_PRINT("info", ("lex->is_stmt_unsafe_with_mixed_mode = %d",
10356 lex->is_stmt_unsafe_with_mixed_mode()));
10357 #endif
10358 /* log in row format! */
10359 set_current_stmt_binlog_format_row_if_mixed();
10360 }
10361 }
10362 }
10363
10364 if (non_replicated_tables_count > 0) {
10365 if ((replicated_tables_count == 0) || !is_write) {
10366 DBUG_PRINT("info",
10367 ("decision: no logging, no replicated table affected"));
10368 set_binlog_local_stmt_filter();
10369 } else {
10370 if (!is_current_stmt_binlog_format_row()) {
10371 my_error((error = ER_BINLOG_STMT_MODE_AND_NO_REPL_TABLES), MYF(0));
10372 } else {
10373 clear_binlog_local_stmt_filter();
10374 }
10375 }
10376 } else {
10377 clear_binlog_local_stmt_filter();
10378 }
10379
10380 if (!error &&
10381 !is_dml_gtid_compatible(write_to_some_transactional_table,
10382 write_to_some_non_transactional_table,
10383 write_all_non_transactional_are_tmp_tables))
10384 error = 1;
10385
10386 if (error) {
10387 DBUG_PRINT("info", ("decision: no logging since an error was generated"));
10388 return -1;
10389 }
10390
10391 if (is_write &&
10392 lex->sql_command != SQLCOM_END /* rows-event applying by slave */) {
10393 /*
10394 Master side of DML in the STMT format events parallelization.
10395 All involving table db:s are stored in a abc-ordered name list.
10396 In case the number of databases exceeds MAX_DBS_IN_EVENT_MTS maximum
10397 the list gathering breaks since it won't be sent to the slave.
10398 */
10399 for (TABLE_LIST *table = tables; table; table = table->next_global) {
10400 if (table->is_placeholder()) continue;
10401
10402 DBUG_ASSERT(table->table);
10403
10404 if (table->table->s->is_referenced_by_foreign_key()) {
10405 /*
10406 FK-referenced dbs can't be gathered currently. The following
10407 event will be marked for sequential execution on slave.
10408 */
10409 binlog_accessed_db_names = nullptr;
10410 add_to_binlog_accessed_dbs("");
10411 break;
10412 }
10413 if (!is_current_stmt_binlog_format_row())
10414 add_to_binlog_accessed_dbs(table->db);
10415 }
10416 }
10417 DBUG_PRINT("info",
10418 ("decision: logging in %s format",
10419 is_current_stmt_binlog_format_row() ? "ROW" : "STATEMENT"));
10420
10421 if (variables.binlog_format == BINLOG_FORMAT_ROW &&
10422 (lex->sql_command == SQLCOM_UPDATE ||
10423 lex->sql_command == SQLCOM_UPDATE_MULTI ||
10424 lex->sql_command == SQLCOM_DELETE ||
10425 lex->sql_command == SQLCOM_DELETE_MULTI)) {
10426 String table_names;
10427 /*
10428 Generate a warning for UPDATE/DELETE statements that modify a
10429 BLACKHOLE table, as row events are not logged in row format.
10430 */
10431 for (TABLE_LIST *table = tables; table; table = table->next_global) {
10432 if (table->is_placeholder()) continue;
10433 if (table->table->file->ht->db_type == DB_TYPE_BLACKHOLE_DB &&
10434 table->lock_descriptor().type >= TL_WRITE_ALLOW_WRITE) {
10435 table_names.append(table->table_name);
10436 table_names.append(",");
10437 }
10438 }
10439 if (!table_names.is_empty()) {
10440 bool is_update = (lex->sql_command == SQLCOM_UPDATE ||
10441 lex->sql_command == SQLCOM_UPDATE_MULTI);
10442 /*
10443 Replace the last ',' with '.' for table_names
10444 */
10445 table_names.replace(table_names.length() - 1, 1, ".", 1);
10446 push_warning_printf(
10447 this, Sql_condition::SL_WARNING, WARN_ON_BLOCKHOLE_IN_RBR,
10448 ER_THD(this, WARN_ON_BLOCKHOLE_IN_RBR),
10449 is_update ? "UPDATE" : "DELETE", table_names.c_ptr());
10450 }
10451 }
10452 } else {
10453 DBUG_PRINT(
10454 "info",
10455 ("decision: no logging since "
10456 "mysql_bin_log.is_open() = %d "
10457 "and (options & OPTION_BIN_LOG) = 0x%llx "
10458 "and binlog_format = %lu "
10459 "and binlog_filter->db_ok(db) = %d",
10460 mysql_bin_log.is_open(), (variables.option_bits & OPTION_BIN_LOG),
10461 variables.binlog_format, binlog_filter->db_ok(m_db.str)));
10462
10463 for (TABLE_LIST *table = tables; table; table = table->next_global) {
10464 if (!table->is_placeholder() && table->table->no_replicate &&
10465 gtid_state->warn_or_err_on_modify_gtid_table(this, table))
10466 break;
10467 }
10468 }
10469
10470 #if defined(ENABLED_DEBUG_SYNC)
10471 if (!is_attachable_ro_transaction_active())
10472 DEBUG_SYNC(this, "end_decide_logging_format");
10473 #endif
10474
10475 return 0;
10476 }
10477
10478 /**
10479 Given that a possible violation of gtid consistency has happened,
10480 checks if gtid-inconsistencies are forbidden by the current value of
10481 ENFORCE_GTID_CONSISTENCY and GTID_MODE. If forbidden, generates
10482 error or warning accordingly.
10483
10484 @param thd The thread that has issued the GTID-violating statement.
10485
10486 @param error_code The error code to use, if error or warning is to
10487 be generated.
10488
10489 @param log_error_code The error code to use, if error message is to
10490 be logged.
10491
10492 @retval false Error was generated.
10493 @retval true No error was generated (possibly a warning was generated).
10494 */
handle_gtid_consistency_violation(THD * thd,int error_code,int log_error_code)10495 static bool handle_gtid_consistency_violation(THD *thd, int error_code,
10496 int log_error_code) {
10497 DBUG_TRACE;
10498
10499 enum_gtid_type gtid_next_type = thd->variables.gtid_next.type;
10500 global_sid_lock->rdlock();
10501 enum_gtid_consistency_mode gtid_consistency_mode =
10502 get_gtid_consistency_mode();
10503 auto gtid_mode = global_gtid_mode.get();
10504
10505 DBUG_PRINT("info", ("gtid_next.type=%d gtid_mode=%s "
10506 "gtid_consistency_mode=%d error=%d query=%s",
10507 gtid_next_type, Gtid_mode::to_string(gtid_mode),
10508 gtid_consistency_mode, error_code, thd->query().str));
10509
10510 /*
10511 GTID violations should generate error if:
10512 - GTID_MODE=ON or ON_PERMISSIVE and GTID_NEXT='AUTOMATIC' (since the
10513 transaction is expected to commit using a GTID), or
10514 - GTID_NEXT='UUID:NUMBER' (since the transaction is expected to
10515 commit usinga GTID), or
10516 - ENFORCE_GTID_CONSISTENCY=ON.
10517 */
10518 if ((gtid_next_type == AUTOMATIC_GTID &&
10519 gtid_mode >= Gtid_mode::ON_PERMISSIVE) ||
10520 gtid_next_type == ASSIGNED_GTID ||
10521 gtid_consistency_mode == GTID_CONSISTENCY_MODE_ON) {
10522 global_sid_lock->unlock();
10523 my_error(error_code, MYF(0));
10524 return false;
10525 } else {
10526 /*
10527 If we are not generating an error, we must increase the counter
10528 of GTID-violating transactions. This will prevent a concurrent
10529 client from executing a SET GTID_MODE or SET
10530 ENFORCE_GTID_CONSISTENCY statement that would be incompatible
10531 with this transaction.
10532
10533 If the transaction had already been accounted as a gtid violating
10534 transaction, then don't increment the counters, just issue the
10535 warning below. This prevents calling
10536 begin_automatic_gtid_violating_transaction or
10537 begin_anonymous_gtid_violating_transaction multiple times for the
10538 same transaction, which would make the counter go out of sync.
10539 */
10540 if (!thd->has_gtid_consistency_violation) {
10541 if (gtid_next_type == AUTOMATIC_GTID)
10542 gtid_state->begin_automatic_gtid_violating_transaction();
10543 else {
10544 DBUG_ASSERT(gtid_next_type == ANONYMOUS_GTID);
10545 gtid_state->begin_anonymous_gtid_violating_transaction();
10546 }
10547
10548 /*
10549 If a transaction generates multiple GTID violation conditions,
10550 it must still only update the counters once. Hence we use
10551 this per-thread flag to keep track of whether the thread has a
10552 consistency or not. This function must only be called if the
10553 transaction does not already have a GTID violation.
10554 */
10555 thd->has_gtid_consistency_violation = true;
10556 }
10557
10558 global_sid_lock->unlock();
10559
10560 // Generate warning if ENFORCE_GTID_CONSISTENCY = WARN.
10561 if (gtid_consistency_mode == GTID_CONSISTENCY_MODE_WARN) {
10562 // Need to print to log so that replication admin knows when users
10563 // have adjusted their workloads.
10564 LogErr(WARNING_LEVEL, log_error_code);
10565 // Need to print to client so that users can adjust their workload.
10566 push_warning(thd, Sql_condition::SL_WARNING, error_code,
10567 ER_THD_NONCONST(thd, error_code));
10568 }
10569 return true;
10570 }
10571 }
10572
is_ddl_gtid_compatible()10573 bool THD::is_ddl_gtid_compatible() {
10574 DBUG_TRACE;
10575
10576 // If @@session.sql_log_bin has been manually turned off (only
10577 // doable by SUPER), then no problem, we can execute any statement.
10578 if ((variables.option_bits & OPTION_BIN_LOG) == 0 ||
10579 mysql_bin_log.is_open() == false)
10580 return true;
10581
10582 DBUG_PRINT("info",
10583 ("SQLCOM_CREATE:%d CREATE-TMP:%d SELECT:%d SQLCOM_DROP:%d "
10584 "DROP-TMP:%d trx:%d",
10585 lex->sql_command == SQLCOM_CREATE_TABLE,
10586 (lex->sql_command == SQLCOM_CREATE_TABLE &&
10587 (lex->create_info->options & HA_LEX_CREATE_TMP_TABLE)),
10588 lex->select_lex->fields_list.elements,
10589 lex->sql_command == SQLCOM_DROP_TABLE,
10590 (lex->sql_command == SQLCOM_DROP_TABLE && lex->drop_temporary),
10591 in_multi_stmt_transaction_mode()));
10592
10593 if (lex->sql_command == SQLCOM_CREATE_TABLE &&
10594 !(lex->create_info->options & HA_LEX_CREATE_TMP_TABLE) &&
10595 lex->select_lex->get_fields_list()->elements) {
10596 if (!(get_default_handlerton(this, lex->create_info->db_type)->flags &
10597 HTON_SUPPORTS_ATOMIC_DDL)) {
10598 /*
10599 CREATE ... SELECT (without TEMPORARY) for engines not supporting atomic
10600 DDL is unsafe because if binlog_format=row it will be logged as a CREATE
10601 TABLE followed by row events, re-executed non-atomically as two
10602 transactions, and then written to the slave's binary log as two separate
10603 transactions with the same GTID.
10604 */
10605 bool ret = handle_gtid_consistency_violation(
10606 this, ER_GTID_UNSAFE_CREATE_SELECT,
10607 ER_RPL_GTID_UNSAFE_STMT_CREATE_SELECT);
10608 return ret;
10609 }
10610 } else if ((lex->sql_command == SQLCOM_CREATE_TABLE &&
10611 (lex->create_info->options & HA_LEX_CREATE_TMP_TABLE) != 0) ||
10612 (lex->sql_command == SQLCOM_DROP_TABLE && lex->drop_temporary)) {
10613 /*
10614 When @@session.binlog_format=statement, [CREATE|DROP] TEMPORARY TABLE
10615 is unsafe to execute inside a transaction or Procedure, because the
10616 [CREATE|DROP] statement on the temporary table will be executed and
10617 written into binary log with a GTID even if the transaction or
10618 Procedure is rolled back.
10619 */
10620 if (variables.binlog_format == BINLOG_FORMAT_STMT &&
10621 (in_multi_stmt_transaction_mode() || in_sub_stmt)) {
10622 bool ret = handle_gtid_consistency_violation(
10623 this, ER_CLIENT_GTID_UNSAFE_CREATE_DROP_TEMP_TABLE_IN_TRX_IN_SBR,
10624 ER_SERVER_GTID_UNSAFE_CREATE_DROP_TEMP_TABLE_IN_TRX_IN_SBR);
10625 return ret;
10626 }
10627 }
10628 return true;
10629 }
10630
is_dml_gtid_compatible(bool some_transactional_table,bool some_non_transactional_table,bool non_transactional_tables_are_tmp)10631 bool THD::is_dml_gtid_compatible(bool some_transactional_table,
10632 bool some_non_transactional_table,
10633 bool non_transactional_tables_are_tmp) {
10634 DBUG_TRACE;
10635
10636 // If @@session.sql_log_bin has been manually turned off (only
10637 // doable by SUPER), then no problem, we can execute any statement.
10638 if ((variables.option_bits & OPTION_BIN_LOG) == 0 ||
10639 mysql_bin_log.is_open() == false)
10640 return true;
10641
10642 /*
10643 Single non-transactional updates are allowed when not mixed
10644 together with transactional statements within a transaction.
10645 Furthermore, writing to transactional and non-transactional
10646 engines in a single statement is also disallowed.
10647 Multi-statement transactions on non-transactional tables are
10648 split into single-statement transactions when
10649 GTID_NEXT = "AUTOMATIC".
10650
10651 Non-transactional updates are allowed when row binlog format is
10652 used and all non-transactional tables are temporary.
10653
10654 The debug symbol "allow_gtid_unsafe_non_transactional_updates"
10655 disables the error. This is useful because it allows us to run
10656 old tests that were not written with the restrictions of GTIDs in
10657 mind.
10658 */
10659 DBUG_PRINT("info", ("some_non_transactional_table=%d "
10660 "some_transactional_table=%d "
10661 "trans_has_updated_trans_table=%d "
10662 "non_transactional_tables_are_tmp=%d "
10663 "is_current_stmt_binlog_format_row=%d",
10664 some_non_transactional_table, some_transactional_table,
10665 trans_has_updated_trans_table(this),
10666 non_transactional_tables_are_tmp,
10667 is_current_stmt_binlog_format_row()));
10668 if (some_non_transactional_table &&
10669 (some_transactional_table || trans_has_updated_trans_table(this)) &&
10670 !(non_transactional_tables_are_tmp &&
10671 is_current_stmt_binlog_format_row()) &&
10672 !DBUG_EVALUATE_IF("allow_gtid_unsafe_non_transactional_updates", 1, 0)) {
10673 return handle_gtid_consistency_violation(
10674 this, ER_GTID_UNSAFE_NON_TRANSACTIONAL_TABLE,
10675 ER_RPL_GTID_UNSAFE_STMT_ON_NON_TRANS_TABLE);
10676 }
10677
10678 return true;
10679 }
10680
10681 /*
10682 Implementation of interface to write rows to the binary log through the
10683 thread. The thread is responsible for writing the rows it has
10684 inserted/updated/deleted.
10685 */
10686
10687 /*
10688 Template member function for ensuring that there is an rows log
10689 event of the apropriate type before proceeding.
10690
10691 PRE CONDITION:
10692 - Events of type 'RowEventT' have the type code 'type_code'.
10693
10694 POST CONDITION:
10695 If a non-NULL pointer is returned, the pending event for thread 'thd' will
10696 be an event of type 'RowEventT' (which have the type code 'type_code')
10697 will either empty or have enough space to hold 'needed' bytes. In
10698 addition, the columns bitmap will be correct for the row, meaning that
10699 the pending event will be flushed if the columns in the event differ from
10700 the columns suppled to the function.
10701
10702 RETURNS
10703 If no error, a non-NULL pending event (either one which already existed or
10704 the newly created one).
10705 If error, NULL.
10706 */
10707
10708 template <class RowsEventT>
binlog_prepare_pending_rows_event(TABLE * table,uint32 serv_id,size_t needed,bool is_transactional,const unsigned char * extra_row_info,uint32 source_part_id)10709 Rows_log_event *THD::binlog_prepare_pending_rows_event(
10710 TABLE *table, uint32 serv_id, size_t needed, bool is_transactional,
10711 const unsigned char *extra_row_info, uint32 source_part_id) {
10712 DBUG_TRACE;
10713
10714 DBUG_EXECUTE_IF("simulate_null_pending_rows_event", { return nullptr; });
10715
10716 /* Fetch the type code for the RowsEventT template parameter */
10717 int const general_type_code = RowsEventT::TYPE_CODE;
10718
10719 partition_info *part_info = table->part_info;
10720 auto part_id = get_rpl_part_id(part_info);
10721
10722 Rows_log_event *pending = binlog_get_pending_rows_event(is_transactional);
10723
10724 if (unlikely(pending && !pending->is_valid())) return nullptr;
10725
10726 /*
10727 Check if the current event is non-NULL and a write-rows
10728 event. Also check if the table provided is mapped: if it is not,
10729 then we have switched to writing to a new table.
10730 If there is no pending event, we need to create one. If there is a pending
10731 event, but it's not about the same table id, or not of the same type
10732 (between Write, Update and Delete), or not the same affected columns, or
10733 going to be too big, flush this event to disk and create a new pending
10734 event.
10735
10736 We do not need to check that the pending event and the new event
10737 have the same setting for partial json updates, because
10738 partialness of json can only be changed outside transactions.
10739 */
10740 if (!pending || pending->server_id != serv_id ||
10741 pending->get_table_id() != table->s->table_map_id ||
10742 pending->get_general_type_code() != general_type_code ||
10743 pending->get_data_size() + needed > binlog_row_event_max_size ||
10744 pending->read_write_bitmaps_cmp(table) == false ||
10745 !(pending->m_extra_row_info.compare_extra_row_info(
10746 extra_row_info, part_id, source_part_id))) {
10747 /* Create a new RowsEventT... */
10748 Rows_log_event *const ev = new RowsEventT(
10749 this, table, table->s->table_map_id, is_transactional, extra_row_info);
10750 if (unlikely(!ev)) return nullptr;
10751 ev->server_id = serv_id; // I don't like this, it's too easy to forget.
10752 /*
10753 flush the pending event and replace it with the newly created
10754 event...
10755 */
10756 if (unlikely(mysql_bin_log.flush_and_set_pending_rows_event(
10757 this, ev, is_transactional))) {
10758 delete ev;
10759 return nullptr;
10760 }
10761
10762 return ev; /* This is the new pending event */
10763 }
10764 return pending; /* This is the current pending event */
10765 }
10766
10767 /* Declare in unnamed namespace. */
10768 namespace {
10769
10770 /**
10771 Class to handle temporary allocation of memory for row data.
10772
10773 The responsibilities of the class is to provide memory for
10774 packing one or two rows of packed data (depending on what
10775 constructor is called).
10776
10777 In order to make the allocation more efficient for rows without blobs,
10778 a pointer to the allocated memory is stored in the table structure
10779 for such rows. If memory for a table containing a blob field
10780 is requested, only memory for that is allocated, and subsequently
10781 released when the object is destroyed.
10782
10783 */
10784 class Row_data_memory {
10785 public:
10786 /**
10787 Build an object to keep track of a block-local piece of memory
10788 for storing a row of data.
10789
10790 @param table
10791 Table where the pre-allocated memory is stored.
10792
10793 @param data
10794 Pointer to the table record.
10795 */
Row_data_memory(TABLE * table,const uchar * data)10796 Row_data_memory(TABLE *table, const uchar *data) : m_memory(nullptr) {
10797 #ifndef DBUG_OFF
10798 m_alloc_checked = false;
10799 #endif
10800 allocate_memory(table, max_row_length(table, data));
10801 m_ptr[0] = has_memory() ? m_memory : nullptr;
10802 m_ptr[1] = nullptr;
10803 }
10804
Row_data_memory(TABLE * table,const uchar * data1,const uchar * data2,ulonglong value_options=0)10805 Row_data_memory(TABLE *table, const uchar *data1, const uchar *data2,
10806 ulonglong value_options = 0)
10807 : m_memory(nullptr) {
10808 #ifndef DBUG_OFF
10809 m_alloc_checked = false;
10810 #endif
10811 size_t len1 = max_row_length(table, data1);
10812 size_t len2 = max_row_length(table, data2, value_options);
10813 allocate_memory(table, len1 + len2);
10814 m_ptr[0] = has_memory() ? m_memory : nullptr;
10815 m_ptr[1] = has_memory() ? m_memory + len1 : nullptr;
10816 }
10817
~Row_data_memory()10818 ~Row_data_memory() {
10819 if (m_memory != nullptr && m_release_memory_on_destruction)
10820 my_free(m_memory);
10821 }
10822
10823 /**
10824 Is there memory allocated?
10825
10826 @retval true There is memory allocated
10827 @retval false Memory allocation failed
10828 */
has_memory() const10829 bool has_memory() const {
10830 #ifndef DBUG_OFF
10831 m_alloc_checked = true;
10832 #endif
10833 return m_memory != nullptr;
10834 }
10835
slot(uint s)10836 uchar *slot(uint s) {
10837 DBUG_ASSERT(s < sizeof(m_ptr) / sizeof(*m_ptr));
10838 DBUG_ASSERT(m_ptr[s] != nullptr);
10839 DBUG_ASSERT(m_alloc_checked == true);
10840 return m_ptr[s];
10841 }
10842
10843 private:
10844 /**
10845 Compute an upper bound on the amount of memory needed.
10846
10847 This may return an over-approximation.
10848
10849 @param table The table
10850 @param data The server's row record.
10851 @param value_options The value of @@global.binlog_row_value_options
10852 */
max_row_length(TABLE * table,const uchar * data,ulonglong value_options=0)10853 size_t max_row_length(TABLE *table, const uchar *data,
10854 ulonglong value_options = 0) {
10855 TABLE_SHARE *table_s = table->s;
10856 Replicated_columns_view fields{table, Replicated_columns_view::OUTBOUND};
10857 /*
10858 The server stores rows using "records". A record is a sequence of bytes
10859 which contains values or pointers to values for all fields (columns). The
10860 server uses table_s->reclength bytes for a row record.
10861
10862 The layout of a record is roughly:
10863
10864 - N+1+B bits, packed into CEIL((N+1+B)/8) bytes, where N is the number of
10865 nullable columns in the table, and B is the sum of the number of bits of
10866 all BIT columns.
10867
10868 - A sequence of serialized fields, each corresponding to a non-BIT,
10869 non-NULL column in the table.
10870
10871 For variable-length columns, the first component of the serialized field
10872 is a length, stored using 1, 2, 3, or 4 bytes depending on the maximum
10873 length for the data type.
10874
10875 For most data types, the next component of the serialized field is the
10876 actual data. But for for VARCHAR, VARBINARY, TEXT, BLOB, and JSON, the
10877 next component of the serialized field is a serialized pointer,
10878 i.e. sizeof(pointer) bytes, which point to another memory area where the
10879 actual data is stored.
10880
10881 The layout of a row image in the binary log is roughly:
10882
10883 - If this is an after-image and partial JSON is enabled, 1 byte containing
10884 value_options. If the PARTIAL_JSON bit of value_options is set, this is
10885 followed by P bits (the "partial_bits"), packed into CEIL(P) bytes,
10886 where P is the number of JSON columns in the table.
10887
10888 - M bits (the "null_bits"), packed into CEIL(M) bytes, where M is the
10889 number of columns in the image.
10890
10891 - A sequence of serialized fields, each corresponding to a non-NULL column
10892 in the row image.
10893
10894 For variable-length columns, the first component of the serialized field
10895 is a length, stored using 1, 2, 3, or 4 bytes depending on the maximum
10896 length for the data type.
10897
10898 For most data types, the next component of the serialized field is the
10899 actual field data. But for JSON fields where the corresponding bit of
10900 the partial_bits is 1, this is a sequence of diffs instead.
10901
10902 Now we try to use table_s->reclength to estimate how much memory to
10903 allocate for a row image in the binlog. Due to the differences this will
10904 only be an upper bound. Notice the differences:
10905
10906 - The binlog may only include a subset of the fields (the row image),
10907 whereas reclength contains space for all fields.
10908
10909 - BIT columns are not packed together with NULL bits in the binlog, so up
10910 to 1 more byte per BIT column may be needed.
10911
10912 - The binlog has a null bit even for non-nullable fields, whereas the
10913 reclength only contains space nullable fields, so the binlog may need up
10914 to CEIL(table_s->fields/8) more bytes.
10915
10916 - The binlog only has a null bit for fields in the image, whereas the
10917 reclength contains space for all fields.
10918
10919 - The binlog contains the full blob whereas the record only contains
10920 sizeof(pointer) bytes.
10921
10922 - The binlog contains value_options and partial_bits. So this may use up
10923 to 1+CEIL(table_s->fields/8) more bytes.
10924
10925 - The binlog may contain partial JSON. This is guaranteed to be smaller
10926 than the size of the full value.
10927
10928 - There may exist columns that, due to their nature, are not replicated,
10929 for instance, hidden generated columns used for functional indexes.
10930
10931 For those data types that are not stored using a pointer, the size of the
10932 field in the binary log is at most 2 bytes more than what the field
10933 contributes to in table_s->reclength, because those data types use at most
10934 1 byte for the length and waste less than a byte on extra padding and
10935 extra bits in null_bits or BIT columns.
10936
10937 For those data types that are stored using a pointer, the size of the
10938 field in the binary log is at most 2 bytes more than what the field
10939 contributes to in table_s->reclength, plus the size of the data. The size
10940 of the pointer is at least 4 on all supported platforms, so it is bigger
10941 than what is used by partial_bits, value_format, or any waste due to extra
10942 padding and extra bits in null_bits.
10943 */
10944 size_t length = table_s->reclength + 2 * (fields.filtered_size());
10945
10946 for (uint i = 0; i < table_s->blob_fields; i++) {
10947 if (fields.is_excluded(table_s->blob_field[i])) continue;
10948
10949 Field *field = table->field[table_s->blob_field[i]];
10950 Field_blob *field_blob = down_cast<Field_blob *>(field);
10951
10952 if (field_blob->type() == MYSQL_TYPE_JSON &&
10953 (value_options & PARTIAL_JSON_UPDATES) != 0) {
10954 Field_json *field_json = down_cast<Field_json *>(field_blob);
10955 length += field_json->get_diff_vector_and_length(value_options);
10956 } else
10957 length +=
10958 field_blob->get_length(data + field_blob->offset(table->record[0]));
10959 }
10960 return length;
10961 }
10962
allocate_memory(TABLE * const table,const size_t total_length)10963 void allocate_memory(TABLE *const table, const size_t total_length) {
10964 if (table->s->blob_fields == 0) {
10965 /*
10966 The maximum length of a packed record is less than this
10967 length. We use this value instead of the supplied length
10968 when allocating memory for records, since we don't know how
10969 the memory will be used in future allocations.
10970
10971 Since table->s->reclength is for unpacked records, we have
10972 to add two bytes for each field, which can potentially be
10973 added to hold the length of a packed field.
10974 */
10975 size_t const maxlen = table->s->reclength + 2 * table->s->fields;
10976
10977 /*
10978 Allocate memory for two records if memory hasn't been
10979 allocated. We allocate memory for two records so that it can
10980 be used when processing update rows as well.
10981 */
10982 if (table->write_row_record == nullptr)
10983 table->write_row_record = (uchar *)table->mem_root.Alloc(2 * maxlen);
10984 m_memory = table->write_row_record;
10985 m_release_memory_on_destruction = false;
10986 } else {
10987 m_memory = (uchar *)my_malloc(key_memory_Row_data_memory_memory,
10988 total_length, MYF(MY_WME));
10989 m_release_memory_on_destruction = true;
10990 }
10991 }
10992
10993 #ifndef DBUG_OFF
10994 mutable bool m_alloc_checked;
10995 #endif
10996 bool m_release_memory_on_destruction;
10997 uchar *m_memory;
10998 uchar *m_ptr[2];
10999 };
11000
11001 } // namespace
11002
binlog_write_row(TABLE * table,bool is_trans,uchar const * record,const unsigned char * extra_row_info)11003 int THD::binlog_write_row(TABLE *table, bool is_trans, uchar const *record,
11004 const unsigned char *extra_row_info) {
11005 DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
11006
11007 /*
11008 Pack records into format for transfer. We are allocating more
11009 memory than needed, but that doesn't matter.
11010 */
11011 Row_data_memory memory(table, record);
11012 if (!memory.has_memory()) return HA_ERR_OUT_OF_MEM;
11013
11014 uchar *row_data = memory.slot(0);
11015
11016 size_t const len = pack_row(table, table->write_set, row_data, record,
11017 enum_row_image_type::WRITE_AI);
11018
11019 Rows_log_event *const ev =
11020 binlog_prepare_pending_rows_event<Write_rows_log_event>(
11021 table, server_id, len, is_trans, extra_row_info);
11022
11023 if (unlikely(ev == nullptr)) return HA_ERR_OUT_OF_MEM;
11024
11025 return ev->add_row_data(row_data, len);
11026 }
11027
binlog_update_row(TABLE * table,bool is_trans,const uchar * before_record,const uchar * after_record,const unsigned char * extra_row_info)11028 int THD::binlog_update_row(TABLE *table, bool is_trans,
11029 const uchar *before_record,
11030 const uchar *after_record,
11031 const unsigned char *extra_row_info) {
11032 DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
11033 int error = 0;
11034
11035 /**
11036 Save a reference to the original read and write set bitmaps.
11037 We will need this to restore the bitmaps at the end.
11038 */
11039 MY_BITMAP *old_read_set = table->read_set;
11040 MY_BITMAP *old_write_set = table->write_set;
11041
11042 /**
11043 This will remove spurious fields required during execution but
11044 not needed for binlogging. This is done according to the:
11045 binlog-row-image option.
11046 */
11047 binlog_prepare_row_images(this, table);
11048
11049 Row_data_memory row_data(table, before_record, after_record,
11050 variables.binlog_row_value_options);
11051 if (!row_data.has_memory()) return HA_ERR_OUT_OF_MEM;
11052
11053 uchar *before_row = row_data.slot(0);
11054 uchar *after_row = row_data.slot(1);
11055
11056 size_t const before_size =
11057 pack_row(table, table->read_set, before_row, before_record,
11058 enum_row_image_type::UPDATE_BI);
11059 size_t const after_size = pack_row(
11060 table, table->write_set, after_row, after_record,
11061 enum_row_image_type::UPDATE_AI, variables.binlog_row_value_options);
11062
11063 DBUG_DUMP("before_record", before_record, table->s->reclength);
11064 DBUG_DUMP("after_record", after_record, table->s->reclength);
11065 DBUG_DUMP("before_row", before_row, before_size);
11066 DBUG_DUMP("after_row", after_row, after_size);
11067
11068 partition_info *part_info = table->part_info;
11069 uint32 source_part_id = binary_log::Rows_event::Extra_row_info::UNDEFINED;
11070 if (part_info) {
11071 uint32 new_part_id = binary_log::Rows_event::Extra_row_info::UNDEFINED;
11072 longlong func_value = 0;
11073 get_parts_for_update(before_record, after_record, table->record[0],
11074 part_info, &source_part_id, &new_part_id, &func_value);
11075 }
11076
11077 Rows_log_event *const ev =
11078 binlog_prepare_pending_rows_event<Update_rows_log_event>(
11079 table, server_id, before_size + after_size, is_trans, extra_row_info,
11080 source_part_id);
11081
11082 if (unlikely(ev == nullptr)) return HA_ERR_OUT_OF_MEM;
11083
11084 if (part_info) {
11085 ev->m_extra_row_info.set_source_partition_id(source_part_id);
11086 }
11087
11088 error = ev->add_row_data(before_row, before_size) ||
11089 ev->add_row_data(after_row, after_size);
11090
11091 /* restore read/write set for the rest of execution */
11092 table->column_bitmaps_set_no_signal(old_read_set, old_write_set);
11093
11094 bitmap_clear_all(&table->tmp_set);
11095
11096 return error;
11097 }
11098
binlog_delete_row(TABLE * table,bool is_trans,uchar const * record,const unsigned char * extra_row_info)11099 int THD::binlog_delete_row(TABLE *table, bool is_trans, uchar const *record,
11100 const unsigned char *extra_row_info) {
11101 DBUG_ASSERT(is_current_stmt_binlog_format_row() && mysql_bin_log.is_open());
11102 int error = 0;
11103
11104 /**
11105 Save a reference to the original read and write set bitmaps.
11106 We will need this to restore the bitmaps at the end.
11107 */
11108 MY_BITMAP *old_read_set = table->read_set;
11109 MY_BITMAP *old_write_set = table->write_set;
11110
11111 /**
11112 This will remove spurious fields required during execution but
11113 not needed for binlogging. This is done according to the:
11114 binlog-row-image option.
11115 */
11116 binlog_prepare_row_images(this, table);
11117
11118 /*
11119 Pack records into format for transfer. We are allocating more
11120 memory than needed, but that doesn't matter.
11121 */
11122 Row_data_memory memory(table, record);
11123 if (unlikely(!memory.has_memory())) return HA_ERR_OUT_OF_MEM;
11124
11125 uchar *row_data = memory.slot(0);
11126
11127 DBUG_DUMP("table->read_set", (uchar *)table->read_set->bitmap,
11128 (table->s->fields + 7) / 8);
11129 size_t const len = pack_row(table, table->read_set, row_data, record,
11130 enum_row_image_type::DELETE_BI);
11131
11132 Rows_log_event *const ev =
11133 binlog_prepare_pending_rows_event<Delete_rows_log_event>(
11134 table, server_id, len, is_trans, extra_row_info);
11135
11136 if (unlikely(ev == nullptr)) return HA_ERR_OUT_OF_MEM;
11137
11138 error = ev->add_row_data(row_data, len);
11139
11140 /* restore read/write set for the rest of execution */
11141 table->column_bitmaps_set_no_signal(old_read_set, old_write_set);
11142
11143 bitmap_clear_all(&table->tmp_set);
11144 return error;
11145 }
11146
binlog_prepare_row_images(const THD * thd,TABLE * table)11147 void binlog_prepare_row_images(const THD *thd, TABLE *table) {
11148 DBUG_TRACE;
11149 /**
11150 Remove from read_set spurious columns. The write_set has been
11151 handled before in table->mark_columns_needed_for_update.
11152 */
11153
11154 DBUG_PRINT_BITSET("debug", "table->read_set (before preparing): %s",
11155 table->read_set);
11156
11157 /**
11158 if there is a primary key in the table (ie, user declared PK or a
11159 non-null unique index) and we dont want to ship the entire image,
11160 and the handler involved supports this.
11161 */
11162 if (table->s->primary_key < MAX_KEY &&
11163 (thd->variables.binlog_row_image < BINLOG_ROW_IMAGE_FULL) &&
11164 !ha_check_storage_engine_flag(table->s->db_type(),
11165 HTON_NO_BINLOG_ROW_OPT)) {
11166 /**
11167 Just to be sure that tmp_set is currently not in use as
11168 the read_set already.
11169 */
11170 DBUG_ASSERT(table->read_set != &table->tmp_set);
11171 // Verify it's not used
11172 DBUG_ASSERT(bitmap_is_clear_all(&table->tmp_set));
11173
11174 switch (thd->variables.binlog_row_image) {
11175 case BINLOG_ROW_IMAGE_MINIMAL:
11176 /* MINIMAL: Mark only PK */
11177 table->mark_columns_used_by_index_no_reset(table->s->primary_key,
11178 &table->tmp_set);
11179 break;
11180 case BINLOG_ROW_IMAGE_NOBLOB:
11181 /**
11182 NOBLOB: Remove unnecessary BLOB fields from read_set
11183 (the ones that are not part of PK).
11184 */
11185 bitmap_union(&table->tmp_set, table->read_set);
11186 for (Field **ptr = table->field; *ptr; ptr++) {
11187 Field *field = (*ptr);
11188 if ((field->type() == MYSQL_TYPE_BLOB) &&
11189 !field->is_flag_set(PRI_KEY_FLAG))
11190 bitmap_clear_bit(&table->tmp_set, field->field_index());
11191 }
11192 break;
11193 default:
11194 DBUG_ASSERT(0); // impossible.
11195 }
11196
11197 /* set the temporary read_set */
11198 table->column_bitmaps_set_no_signal(&table->tmp_set, table->write_set);
11199 }
11200
11201 DBUG_PRINT_BITSET("debug", "table->read_set (after preparing): %s",
11202 table->read_set);
11203 }
11204
binlog_flush_pending_rows_event(bool stmt_end,bool is_transactional)11205 int THD::binlog_flush_pending_rows_event(bool stmt_end, bool is_transactional) {
11206 DBUG_TRACE;
11207 /*
11208 We shall flush the pending event even if we are not in row-based
11209 mode: it might be the case that we left row-based mode before
11210 flushing anything (e.g., if we have explicitly locked tables).
11211 */
11212 if (!mysql_bin_log.is_open()) return 0;
11213
11214 /*
11215 Mark the event as the last event of a statement if the stmt_end
11216 flag is set.
11217 */
11218 int error = 0;
11219 if (Rows_log_event *pending =
11220 binlog_get_pending_rows_event(is_transactional)) {
11221 if (stmt_end) {
11222 pending->set_flags(Rows_log_event::STMT_END_F);
11223 binlog_table_maps = 0;
11224 }
11225
11226 error = mysql_bin_log.flush_and_set_pending_rows_event(this, nullptr,
11227 is_transactional);
11228 }
11229
11230 return error;
11231 }
11232
11233 #if !defined(DBUG_OFF)
show_query_type(THD::enum_binlog_query_type qtype)11234 static const char *show_query_type(THD::enum_binlog_query_type qtype) {
11235 switch (qtype) {
11236 case THD::ROW_QUERY_TYPE:
11237 return "ROW";
11238 case THD::STMT_QUERY_TYPE:
11239 return "STMT";
11240 case THD::QUERY_TYPE_COUNT:
11241 default:
11242 DBUG_ASSERT(0 <= qtype && qtype < THD::QUERY_TYPE_COUNT);
11243 }
11244 static char buf[64];
11245 sprintf(buf, "UNKNOWN#%d", qtype);
11246 return buf;
11247 }
11248 #endif
11249
11250 /**
11251 Auxiliary function to reset the limit unsafety warning suppression.
11252 */
reset_binlog_unsafe_suppression()11253 static void reset_binlog_unsafe_suppression() {
11254 DBUG_TRACE;
11255 unsafe_warning_suppression_is_activated = false;
11256 limit_unsafe_warning_count = 0;
11257 limit_unsafe_suppression_start_time = my_getsystime() / 10000000;
11258 }
11259
11260 /**
11261 Auxiliary function to print warning in the error log.
11262 */
print_unsafe_warning_to_log(int unsafe_type,char * buf,const char * query)11263 static void print_unsafe_warning_to_log(int unsafe_type, char *buf,
11264 const char *query) {
11265 DBUG_TRACE;
11266 sprintf(buf, ER_DEFAULT(ER_BINLOG_UNSAFE_STATEMENT),
11267 ER_DEFAULT_NONCONST(LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
11268 LogErr(WARNING_LEVEL, ER_BINLOG_UNSAFE_MESSAGE_AND_STATEMENT, buf, query);
11269 }
11270
11271 /**
11272 Auxiliary function to check if the warning for limit unsafety should be
11273 thrown or suppressed. Details of the implementation can be found in the
11274 comments inline.
11275
11276 @param buf Buffer to hold the warning message text
11277 @param unsafe_type The type of unsafety.
11278 @param query The actual query statement.
11279
11280 TODO: Remove this function and implement a general service for all warnings
11281 that would prevent flooding the error log. => switch to log_throttle class?
11282 */
do_unsafe_limit_checkout(char * buf,int unsafe_type,const char * query)11283 static void do_unsafe_limit_checkout(char *buf, int unsafe_type,
11284 const char *query) {
11285 ulonglong now;
11286 DBUG_TRACE;
11287 DBUG_ASSERT(unsafe_type == LEX::BINLOG_STMT_UNSAFE_LIMIT);
11288 limit_unsafe_warning_count++;
11289 /*
11290 INITIALIZING:
11291 If this is the first time this function is called with log warning
11292 enabled, the monitoring the unsafe warnings should start.
11293 */
11294 if (limit_unsafe_suppression_start_time == 0) {
11295 limit_unsafe_suppression_start_time = my_getsystime() / 10000000;
11296 print_unsafe_warning_to_log(unsafe_type, buf, query);
11297 } else {
11298 if (!unsafe_warning_suppression_is_activated)
11299 print_unsafe_warning_to_log(unsafe_type, buf, query);
11300
11301 if (limit_unsafe_warning_count >=
11302 LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT) {
11303 now = my_getsystime() / 10000000;
11304 if (!unsafe_warning_suppression_is_activated) {
11305 /*
11306 ACTIVATION:
11307 We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT warnings in
11308 less than LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT we activate the
11309 suppression.
11310 */
11311 if ((now - limit_unsafe_suppression_start_time) <=
11312 LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT) {
11313 unsafe_warning_suppression_is_activated = true;
11314 DBUG_PRINT("info", ("A warning flood has been detected and the limit \
11315 unsafety warning suppression has been activated."));
11316 } else {
11317 /*
11318 there is no flooding till now, therefore we restart the monitoring
11319 */
11320 limit_unsafe_suppression_start_time = my_getsystime() / 10000000;
11321 limit_unsafe_warning_count = 0;
11322 }
11323 } else {
11324 /*
11325 Print the suppression note and the unsafe warning.
11326 */
11327 LogErr(INFORMATION_LEVEL, ER_BINLOG_WARNING_SUPPRESSED,
11328 limit_unsafe_warning_count,
11329 (int)(now - limit_unsafe_suppression_start_time));
11330 print_unsafe_warning_to_log(unsafe_type, buf, query);
11331 /*
11332 DEACTIVATION: We got LIMIT_UNSAFE_WARNING_ACTIVATION_THRESHOLD_COUNT
11333 warnings in more than LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT, the
11334 suppression should be deactivated.
11335 */
11336 if ((now - limit_unsafe_suppression_start_time) >
11337 LIMIT_UNSAFE_WARNING_ACTIVATION_TIMEOUT) {
11338 reset_binlog_unsafe_suppression();
11339 DBUG_PRINT("info", ("The limit unsafety warning supression has been \
11340 deactivated"));
11341 }
11342 }
11343 limit_unsafe_warning_count = 0;
11344 }
11345 }
11346 }
11347
11348 /**
11349 Auxiliary method used by @c binlog_query() to raise warnings.
11350
11351 The type of warning and the type of unsafeness is stored in
11352 THD::binlog_unsafe_warning_flags.
11353 */
issue_unsafe_warnings()11354 void THD::issue_unsafe_warnings() {
11355 char buf[MYSQL_ERRMSG_SIZE * 2];
11356 DBUG_TRACE;
11357 /*
11358 Ensure that binlog_unsafe_warning_flags is big enough to hold all
11359 bits. This is actually a constant expression.
11360 */
11361 DBUG_ASSERT(LEX::BINLOG_STMT_UNSAFE_COUNT <=
11362 sizeof(binlog_unsafe_warning_flags) * CHAR_BIT);
11363
11364 uint32 unsafe_type_flags = binlog_unsafe_warning_flags;
11365
11366 /*
11367 For each unsafe_type, check if the statement is unsafe in this way
11368 and issue a warning.
11369 */
11370 for (int unsafe_type = 0; unsafe_type < LEX::BINLOG_STMT_UNSAFE_COUNT;
11371 unsafe_type++) {
11372 if ((unsafe_type_flags & (1 << unsafe_type)) != 0) {
11373 push_warning_printf(
11374 this, Sql_condition::SL_NOTE, ER_BINLOG_UNSAFE_STATEMENT,
11375 ER_THD(this, ER_BINLOG_UNSAFE_STATEMENT),
11376 ER_THD_NONCONST(this, LEX::binlog_stmt_unsafe_errcode[unsafe_type]));
11377 if (log_error_verbosity > 1 && opt_log_unsafe_statements) {
11378 if (unsafe_type == LEX::BINLOG_STMT_UNSAFE_LIMIT)
11379 do_unsafe_limit_checkout(buf, unsafe_type, query().str);
11380 else // cases other than LIMIT unsafety
11381 print_unsafe_warning_to_log(unsafe_type, buf, query().str);
11382 }
11383 }
11384 }
11385 }
11386
11387 /**
11388 Log the current query.
11389
11390 The query will be logged in either row format or statement format
11391 depending on the value of @c current_stmt_binlog_format_row field and
11392 the value of the @c qtype parameter.
11393
11394 This function must be called:
11395
11396 - After the all calls to ha_*_row() functions have been issued.
11397
11398 - After any writes to system tables. Rationale: if system tables
11399 were written after a call to this function, and the master crashes
11400 after the call to this function and before writing the system
11401 tables, then the master and slave get out of sync.
11402
11403 - Before tables are unlocked and closed.
11404
11405 @see decide_logging_format
11406
11407 @retval 0 Success
11408
11409 @retval nonzero If there is a failure when writing the query (e.g.,
11410 write failure), then the error code is returned.
11411 */
binlog_query(THD::enum_binlog_query_type qtype,const char * query_arg,size_t query_len,bool is_trans,bool direct,bool suppress_use,int errcode)11412 int THD::binlog_query(THD::enum_binlog_query_type qtype, const char *query_arg,
11413 size_t query_len, bool is_trans, bool direct,
11414 bool suppress_use, int errcode) {
11415 DBUG_TRACE;
11416 DBUG_PRINT("enter",
11417 ("qtype: %s query: '%s'", show_query_type(qtype), query_arg));
11418 DBUG_ASSERT(query_arg && mysql_bin_log.is_open());
11419
11420 if (get_binlog_local_stmt_filter() == BINLOG_FILTER_SET) {
11421 /*
11422 The current statement is to be ignored, and not written to
11423 the binlog. Do not call issue_unsafe_warnings().
11424 */
11425 return 0;
11426 }
11427
11428 /*
11429 If we are not in prelocked mode, mysql_unlock_tables() will be
11430 called after this binlog_query(), so we have to flush the pending
11431 rows event with the STMT_END_F set to unlock all tables at the
11432 slave side as well.
11433
11434 If we are in prelocked mode, the flushing will be done inside the
11435 top-most close_thread_tables().
11436 */
11437 if (this->locked_tables_mode <= LTM_LOCK_TABLES)
11438 if (int error = binlog_flush_pending_rows_event(true, is_trans))
11439 return error;
11440
11441 /*
11442 Warnings for unsafe statements logged in statement format are
11443 printed in three places instead of in decide_logging_format().
11444 This is because the warnings should be printed only if the statement
11445 is actually logged. When executing decide_logging_format(), we cannot
11446 know for sure if the statement will be logged:
11447
11448 1 - sp_head::execute_procedure which prints out warnings for calls to
11449 stored procedures.
11450
11451 2 - sp_head::execute_function which prints out warnings for calls
11452 involving functions.
11453
11454 3 - THD::binlog_query (here) which prints warning for top level
11455 statements not covered by the two cases above: i.e., if not insided a
11456 procedure and a function.
11457
11458 Besides, we should not try to print these warnings if it is not
11459 possible to write statements to the binary log as it happens when
11460 the execution is inside a function, or generaly speaking, when
11461 the variables.option_bits & OPTION_BIN_LOG is false.
11462 */
11463 if ((variables.option_bits & OPTION_BIN_LOG) && sp_runtime_ctx == nullptr &&
11464 !binlog_evt_union.do_union)
11465 issue_unsafe_warnings();
11466
11467 switch (qtype) {
11468 /*
11469 ROW_QUERY_TYPE means that the statement may be logged either in
11470 row format or in statement format. If
11471 current_stmt_binlog_format is row, it means that the
11472 statement has already been logged in row format and hence shall
11473 not be logged again.
11474 */
11475 case THD::ROW_QUERY_TYPE:
11476 DBUG_PRINT("debug", ("is_current_stmt_binlog_format_row: %d",
11477 is_current_stmt_binlog_format_row()));
11478 if (is_current_stmt_binlog_format_row()) return 0;
11479 /* Fall through */
11480
11481 /*
11482 STMT_QUERY_TYPE means that the query must be logged in statement
11483 format; it cannot be logged in row format. This is typically
11484 used by DDL statements. It is an error to use this query type
11485 if current_stmt_binlog_format_row is row.
11486
11487 @todo Currently there are places that call this method with
11488 STMT_QUERY_TYPE and current_stmt_binlog_format is row. Fix those
11489 places and add assert to ensure correct behavior. /Sven
11490 */
11491 case THD::STMT_QUERY_TYPE:
11492 /*
11493 The MYSQL_BIN_LOG::write() function will set the STMT_END_F flag and
11494 flush the pending rows event if necessary.
11495 */
11496 {
11497 Query_log_event qinfo(this, query_arg, query_len, is_trans, direct,
11498 suppress_use, errcode);
11499 /*
11500 Binlog table maps will be irrelevant after a Query_log_event
11501 (they are just removed on the slave side) so after the query
11502 log event is written to the binary log, we pretend that no
11503 table maps were written.
11504 */
11505 int error = mysql_bin_log.write_event(&qinfo);
11506 binlog_table_maps = 0;
11507 return error;
11508 }
11509 break;
11510
11511 case THD::QUERY_TYPE_COUNT:
11512 default:
11513 DBUG_ASSERT(0 <= qtype && qtype < QUERY_TYPE_COUNT);
11514 }
11515 return 0;
11516 }
11517
11518 struct st_mysql_storage_engine binlog_storage_engine = {
11519 MYSQL_HANDLERTON_INTERFACE_VERSION};
11520
11521 /** @} */
11522
mysql_declare_plugin(binlog)11523 mysql_declare_plugin(binlog){
11524 MYSQL_STORAGE_ENGINE_PLUGIN,
11525 &binlog_storage_engine,
11526 "binlog",
11527 PLUGIN_AUTHOR_ORACLE,
11528 "This is a pseudo storage engine to represent the binlog in a transaction",
11529 PLUGIN_LICENSE_GPL,
11530 binlog_init, /* Plugin Init */
11531 nullptr, /* Plugin Check uninstall */
11532 binlog_deinit, /* Plugin Deinit */
11533 0x0100 /* 1.0 */,
11534 nullptr, /* status variables */
11535 nullptr, /* system variables */
11536 nullptr, /* config options */
11537 0,
11538 } mysql_declare_plugin_end;
11539