1 /* Copyright (c) 2000, 2017, Oracle and/or its affiliates. 2 Copyright (c) 2009, 2020, MariaDB Corporation. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; version 2 of the License. 7 8 This program is distributed in the hope that it will be useful, 9 but WITHOUT ANY WARRANTY; without even the implied warranty of 10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 GNU General Public License for more details. 12 13 You should have received a copy of the GNU General Public License 14 along with this program; if not, write to the Free Software 15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ 16 17 18 /** 19 @addtogroup Replication 20 @{ 21 22 @file 23 24 @brief Code to run the io thread and the sql thread on the 25 replication slave. 26 */ 27 28 #include "mariadb.h" 29 #include "sql_priv.h" 30 #include "slave.h" 31 #include "sql_parse.h" // execute_init_command 32 #include "sql_table.h" // mysql_rm_table 33 #include "rpl_mi.h" 34 #include "rpl_rli.h" 35 #include "sql_repl.h" 36 #include "rpl_filter.h" 37 #include "repl_failsafe.h" 38 #include "transaction.h" 39 #include <thr_alarm.h> 40 #include <my_dir.h> 41 #include <sql_common.h> 42 #include <errmsg.h> 43 #include <ssl_compat.h> 44 #include "unireg.h" 45 #include <mysys_err.h> 46 #include <signal.h> 47 #include <mysql.h> 48 #include <myisam.h> 49 50 #include "sql_base.h" // close_thread_tables 51 #include "tztime.h" // struct Time_zone 52 #include "log_event.h" // Rotate_log_event, 53 // Create_file_log_event, 54 // Format_description_log_event 55 #include "wsrep_mysqld.h" 56 #ifdef WITH_WSREP 57 #include "wsrep_trans_observer.h" 58 #endif 59 60 #ifdef HAVE_REPLICATION 61 62 #include "rpl_tblmap.h" 63 #include "debug_sync.h" 64 #include "rpl_parallel.h" 65 #include "sql_show.h" 66 #include "semisync_slave.h" 67 #include "sql_manager.h" 68 69 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"") 70 71 #define MAX_SLAVE_RETRY_PAUSE 5 72 /* 73 a parameter of sql_slave_killed() to defer the killed status 74 */ 75 #define SLAVE_WAIT_GROUP_DONE 60 76 bool use_slave_mask = 0; 77 MY_BITMAP slave_error_mask; 78 char slave_skip_error_names[SHOW_VAR_FUNC_BUFF_SIZE]; 79 uint *slave_transaction_retry_errors; 80 uint slave_transaction_retry_error_length= 0; 81 char slave_transaction_retry_error_names[SHOW_VAR_FUNC_BUFF_SIZE]; 82 83 char* slave_load_tmpdir = 0; 84 Master_info *active_mi= 0; 85 Master_info_index *master_info_index; 86 my_bool replicate_same_server_id; 87 ulonglong relay_log_space_limit = 0; 88 ulonglong opt_read_binlog_speed_limit = 0; 89 90 const char *relay_log_index= 0; 91 const char *relay_log_basename= 0; 92 93 LEX_CSTRING default_master_connection_name= { (char*) "", 0 }; 94 95 /* 96 When slave thread exits, we need to remember the temporary tables so we 97 can re-use them on slave start. 98 99 TODO: move the vars below under Master_info 100 */ 101 102 int disconnect_slave_event_count = 0, abort_slave_event_count = 0; 103 104 static pthread_key(Master_info*, RPL_MASTER_INFO); 105 106 enum enum_slave_reconnect_actions 107 { 108 SLAVE_RECON_ACT_REG= 0, 109 SLAVE_RECON_ACT_DUMP= 1, 110 SLAVE_RECON_ACT_EVENT= 2, 111 SLAVE_RECON_ACT_MAX 112 }; 113 114 enum enum_slave_reconnect_messages 115 { 116 SLAVE_RECON_MSG_WAIT= 0, 117 SLAVE_RECON_MSG_KILLED_WAITING= 1, 118 SLAVE_RECON_MSG_AFTER= 2, 119 SLAVE_RECON_MSG_FAILED= 3, 120 SLAVE_RECON_MSG_COMMAND= 4, 121 SLAVE_RECON_MSG_KILLED_AFTER= 5, 122 SLAVE_RECON_MSG_MAX 123 }; 124 125 static const char *reconnect_messages[SLAVE_RECON_ACT_MAX][SLAVE_RECON_MSG_MAX]= 126 { 127 { 128 "Waiting to reconnect after a failed registration on master", 129 "Slave I/O thread killed while waiting to reconnect after a failed \ 130 registration on master", 131 "Reconnecting after a failed registration on master", 132 "failed registering on master, reconnecting to try again, \ 133 log '%s' at position %llu%s", 134 "COM_REGISTER_SLAVE", 135 "Slave I/O thread killed during or after reconnect" 136 }, 137 { 138 "Waiting to reconnect after a failed binlog dump request", 139 "Slave I/O thread killed while retrying master dump", 140 "Reconnecting after a failed binlog dump request", 141 "failed dump request, reconnecting to try again, log '%s' at position %llu%s", 142 "COM_BINLOG_DUMP", 143 "Slave I/O thread killed during or after reconnect" 144 }, 145 { 146 "Waiting to reconnect after a failed master event read", 147 "Slave I/O thread killed while waiting to reconnect after a failed read", 148 "Reconnecting after a failed master event read", 149 "Slave I/O thread: Failed reading log event, reconnecting to retry, \ 150 log '%s' at position %llu%s", 151 "", 152 "Slave I/O thread killed during or after a reconnect done to recover from \ 153 failed read" 154 } 155 }; 156 157 158 typedef enum { SLAVE_THD_IO, SLAVE_THD_SQL} SLAVE_THD_TYPE; 159 160 static int process_io_rotate(Master_info* mi, Rotate_log_event* rev); 161 static int process_io_create_file(Master_info* mi, Create_file_log_event* cev); 162 static bool wait_for_relay_log_space(Relay_log_info* rli); 163 static bool io_slave_killed(Master_info* mi); 164 static bool sql_slave_killed(rpl_group_info *rgi); 165 static int init_slave_thread(THD*, Master_info *, SLAVE_THD_TYPE); 166 static void make_slave_skip_errors_printable(void); 167 static void make_slave_transaction_retry_errors_printable(void); 168 static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi); 169 static int safe_reconnect(THD*, MYSQL*, Master_info*, bool); 170 static int connect_to_master(THD*, MYSQL*, Master_info*, bool, bool); 171 static Log_event* next_event(rpl_group_info* rgi, ulonglong *event_size); 172 static int queue_event(Master_info* mi,const char* buf,ulong event_len); 173 static int terminate_slave_thread(THD *, mysql_mutex_t *, mysql_cond_t *, 174 volatile uint *, bool); 175 static bool check_io_slave_killed(Master_info *mi, const char *info); 176 static bool send_show_master_info_data(THD *, Master_info *, bool, String *); 177 /* 178 Function to set the slave's max_allowed_packet based on the value 179 of slave_max_allowed_packet. 180 181 @in_param thd Thread handler for slave 182 @in_param mysql MySQL connection handle 183 */ 184 185 static void set_slave_max_allowed_packet(THD *thd, MYSQL *mysql) 186 { 187 DBUG_ENTER("set_slave_max_allowed_packet"); 188 // thd and mysql must be valid 189 DBUG_ASSERT(thd && mysql); 190 191 thd->variables.max_allowed_packet= slave_max_allowed_packet; 192 thd->net.max_packet_size= slave_max_allowed_packet; 193 /* 194 Adding MAX_LOG_EVENT_HEADER_LEN to the max_packet_size on the I/O 195 thread and the mysql->option max_allowed_packet, since a 196 replication event can become this much larger than 197 the corresponding packet (query) sent from client to master. 198 */ 199 thd->net.max_packet_size+= MAX_LOG_EVENT_HEADER; 200 /* 201 Skipping the setting of mysql->net.max_packet size to slave 202 max_allowed_packet since this is done during mysql_real_connect. 203 */ 204 mysql->options.max_allowed_packet= 205 slave_max_allowed_packet+MAX_LOG_EVENT_HEADER; 206 DBUG_VOID_RETURN; 207 } 208 209 /* 210 Find out which replications threads are running 211 212 SYNOPSIS 213 init_thread_mask() 214 mask Return value here 215 mi master_info for slave 216 inverse If set, returns which threads are not running 217 218 IMPLEMENTATION 219 Get a bit mask for which threads are running so that we can later restart 220 these threads. 221 222 RETURN 223 mask If inverse == 0, running threads 224 If inverse == 1, stopped threads 225 */ 226 227 void init_thread_mask(int* mask,Master_info* mi,bool inverse) 228 { 229 bool set_io = mi->slave_running, set_sql = mi->rli.slave_running; 230 int tmp_mask=0; 231 DBUG_ENTER("init_thread_mask"); 232 233 if (set_io) 234 tmp_mask |= SLAVE_IO; 235 if (set_sql) 236 tmp_mask |= SLAVE_SQL; 237 if (inverse) 238 tmp_mask^= (SLAVE_IO | SLAVE_SQL); 239 *mask = tmp_mask; 240 DBUG_VOID_RETURN; 241 } 242 243 244 /* 245 lock_slave_threads() against other threads doing STOP, START or RESET SLAVE 246 247 */ 248 249 void Master_info::lock_slave_threads() 250 { 251 DBUG_ENTER("lock_slave_threads"); 252 mysql_mutex_lock(&start_stop_lock); 253 DBUG_VOID_RETURN; 254 } 255 256 257 /* 258 unlock_slave_threads() 259 */ 260 261 void Master_info::unlock_slave_threads() 262 { 263 DBUG_ENTER("unlock_slave_threads"); 264 mysql_mutex_unlock(&start_stop_lock); 265 DBUG_VOID_RETURN; 266 } 267 268 #ifdef HAVE_PSI_INTERFACE 269 static PSI_thread_key key_thread_slave_io, key_thread_slave_sql; 270 271 static PSI_thread_info all_slave_threads[]= 272 { 273 { &key_thread_slave_io, "slave_io", PSI_FLAG_GLOBAL}, 274 { &key_thread_slave_sql, "slave_sql", PSI_FLAG_GLOBAL} 275 }; 276 277 static void init_slave_psi_keys(void) 278 { 279 const char* category= "sql"; 280 int count; 281 282 if (PSI_server == NULL) 283 return; 284 285 count= array_elements(all_slave_threads); 286 PSI_server->register_thread(category, all_slave_threads, count); 287 } 288 #endif /* HAVE_PSI_INTERFACE */ 289 290 291 /* 292 Note: This definition needs to be kept in sync with the one in 293 mysql_system_tables.sql which is used by mysql_create_db. 294 */ 295 static const char gtid_pos_table_definition1[]= 296 "CREATE TABLE "; 297 static const char gtid_pos_table_definition2[]= 298 " (domain_id INT UNSIGNED NOT NULL, " 299 "sub_id BIGINT UNSIGNED NOT NULL, " 300 "server_id INT UNSIGNED NOT NULL, " 301 "seq_no BIGINT UNSIGNED NOT NULL, " 302 "PRIMARY KEY (domain_id, sub_id)) CHARSET=latin1 " 303 "COMMENT='Replication slave GTID position' " 304 "ENGINE="; 305 306 /* 307 Build a query string 308 CREATE TABLE mysql.gtid_slave_pos_<engine> ... ENGINE=<engine> 309 */ 310 static bool 311 build_gtid_pos_create_query(THD *thd, String *query, 312 LEX_CSTRING *table_name, 313 LEX_CSTRING *engine_name) 314 { 315 bool err= false; 316 err|= query->append(gtid_pos_table_definition1); 317 err|= append_identifier(thd, query, table_name); 318 err|= query->append(gtid_pos_table_definition2); 319 err|= append_identifier(thd, query, engine_name); 320 return err; 321 } 322 323 324 static int 325 gtid_pos_table_creation(THD *thd, plugin_ref engine, LEX_CSTRING *table_name) 326 { 327 int err; 328 StringBuffer<sizeof(gtid_pos_table_definition1) + 329 sizeof(gtid_pos_table_definition1) + 330 2*FN_REFLEN> query; 331 332 if (build_gtid_pos_create_query(thd, &query, table_name, plugin_name(engine))) 333 { 334 my_error(ER_OUT_OF_RESOURCES, MYF(0)); 335 return 1; 336 } 337 338 thd->set_db(&MYSQL_SCHEMA_NAME); 339 thd->clear_error(); 340 ulonglong thd_saved_option= thd->variables.option_bits; 341 /* This query shuold not be binlogged. */ 342 thd->variables.option_bits&= ~(ulonglong)OPTION_BIN_LOG; 343 thd->set_query_and_id(query.c_ptr(), query.length(), thd->charset(), 344 next_query_id()); 345 Parser_state parser_state; 346 err= parser_state.init(thd, thd->query(), thd->query_length()); 347 if (err) 348 goto end; 349 mysql_parse(thd, thd->query(), thd->query_length(), &parser_state, 350 FALSE, FALSE); 351 if (unlikely(thd->is_error())) 352 err= 1; 353 /* The warning is relevant to 10.3 and earlier. */ 354 sql_print_warning("The automatically created table '%s' name may not be " 355 "entirely in lowercase. The table name will be converted " 356 "to lowercase to any future upgrade to 10.4.0 and later " 357 "version where it will be auto-created at once " 358 "in lowercase.", 359 table_name->str); 360 end: 361 thd->variables.option_bits= thd_saved_option; 362 thd->reset_query(); 363 return err; 364 } 365 366 static THD *new_bg_THD() 367 { 368 THD *thd= new THD(next_thread_id()); 369 thd->thread_stack= (char*) &thd; 370 thd->store_globals(); 371 thd->system_thread = SYSTEM_THREAD_SLAVE_BACKGROUND; 372 thd->security_ctx->skip_grants(); 373 thd->set_command(COM_DAEMON); 374 thd->variables.wsrep_on= 0; 375 return thd; 376 } 377 378 static void bg_gtid_delete_pending(void *) 379 { 380 THD *thd= new_bg_THD(); 381 382 rpl_slave_state::list_element *list; 383 list= rpl_global_gtid_slave_state->gtid_grab_pending_delete_list(); 384 rpl_global_gtid_slave_state->gtid_delete_pending(thd, &list); 385 if (list) 386 rpl_global_gtid_slave_state->put_back_list(list); 387 delete thd; 388 } 389 390 static void bg_gtid_pos_auto_create(void *hton) 391 { 392 THD *thd= NULL; 393 int UNINIT_VAR(err); 394 plugin_ref engine= NULL, *auto_engines; 395 rpl_slave_state::gtid_pos_table *entry; 396 StringBuffer<FN_REFLEN> loc_table_name; 397 LEX_CSTRING table_name; 398 399 /* 400 Check that the plugin is still in @@gtid_pos_auto_engines, and lock 401 it. 402 */ 403 mysql_mutex_lock(&LOCK_global_system_variables); 404 for (auto_engines= opt_gtid_pos_auto_plugins; 405 auto_engines && *auto_engines; 406 ++auto_engines) 407 { 408 if (plugin_hton(*auto_engines) == hton) 409 { 410 engine= my_plugin_lock(NULL, *auto_engines); 411 break; 412 } 413 } 414 mysql_mutex_unlock(&LOCK_global_system_variables); 415 if (!engine) 416 { 417 /* The engine is gone from @@gtid_pos_auto_engines, so no action. */ 418 goto end; 419 } 420 421 /* Find the entry for the table to auto-create. */ 422 mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state); 423 entry= rpl_global_gtid_slave_state-> 424 gtid_pos_tables.load(std::memory_order_relaxed); 425 while (entry) 426 { 427 if (entry->table_hton == hton && 428 entry->state == rpl_slave_state::GTID_POS_CREATE_REQUESTED) 429 break; 430 entry= entry->next; 431 } 432 if (entry) 433 { 434 entry->state = rpl_slave_state::GTID_POS_CREATE_IN_PROGRESS; 435 err= loc_table_name.append(entry->table_name.str, entry->table_name.length); 436 } 437 mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state); 438 if (!entry) 439 goto end; 440 if (err) 441 { 442 sql_print_error("Out of memory while trying to auto-create GTID position table"); 443 goto end; 444 } 445 table_name.str= loc_table_name.c_ptr_safe(); 446 table_name.length= loc_table_name.length(); 447 448 thd= new_bg_THD(); 449 err= gtid_pos_table_creation(thd, engine, &table_name); 450 if (err) 451 { 452 sql_print_error("Error auto-creating GTID position table `mysql.%s`: %s Error_code: %d", 453 table_name.str, thd->get_stmt_da()->message(), 454 thd->get_stmt_da()->sql_errno()); 455 thd->clear_error(); 456 goto end; 457 } 458 459 /* Now enable the entry for the auto-created table. */ 460 mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state); 461 entry= rpl_global_gtid_slave_state-> 462 gtid_pos_tables.load(std::memory_order_relaxed); 463 while (entry) 464 { 465 if (entry->table_hton == hton && 466 entry->state == rpl_slave_state::GTID_POS_CREATE_IN_PROGRESS) 467 { 468 entry->state= rpl_slave_state::GTID_POS_AVAILABLE; 469 break; 470 } 471 entry= entry->next; 472 } 473 mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state); 474 475 end: 476 delete thd; 477 if (engine) 478 plugin_unlock(NULL, engine); 479 } 480 481 static bool slave_background_thread_gtid_loaded; 482 483 static void bg_rpl_load_gtid_slave_state(void *) 484 { 485 THD *thd= new_bg_THD(); 486 thd_proc_info(thd, "Loading slave GTID position from table"); 487 if (rpl_load_gtid_slave_state(thd)) 488 sql_print_warning("Failed to load slave replication state from table " 489 "%s.%s: %u: %s", "mysql", 490 rpl_gtid_slave_state_table_name.str, 491 thd->get_stmt_da()->sql_errno(), 492 thd->get_stmt_da()->message()); 493 494 // hijacking global_rpl_thread_pool cond here - it's only once on startup 495 mysql_mutex_lock(&global_rpl_thread_pool.LOCK_rpl_thread_pool); 496 slave_background_thread_gtid_loaded= true; 497 mysql_cond_signal(&global_rpl_thread_pool.COND_rpl_thread_pool); 498 mysql_mutex_unlock(&global_rpl_thread_pool.LOCK_rpl_thread_pool); 499 delete thd; 500 } 501 502 static void bg_slave_kill(void *victim) 503 { 504 THD *to_kill= (THD *)victim; 505 to_kill->awake(KILL_CONNECTION); 506 mysql_mutex_lock(&to_kill->LOCK_wakeup_ready); 507 to_kill->rgi_slave->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED; 508 mysql_cond_broadcast(&to_kill->COND_wakeup_ready); 509 mysql_mutex_unlock(&to_kill->LOCK_wakeup_ready); 510 } 511 512 void slave_background_kill_request(THD *to_kill) 513 { 514 if (to_kill->rgi_slave->killed_for_retry) 515 return; // Already deadlock killed. 516 to_kill->rgi_slave->killed_for_retry= rpl_group_info::RETRY_KILL_PENDING; 517 mysql_manager_submit(bg_slave_kill, to_kill); 518 } 519 520 /* 521 This function must only be called from a slave SQL thread (or worker thread), 522 to ensure that the table_entry will not go away before we can lock the 523 LOCK_slave_state. 524 */ 525 void slave_background_gtid_pos_create_request( 526 rpl_slave_state::gtid_pos_table *table_entry) 527 { 528 if (table_entry->state != rpl_slave_state::GTID_POS_AUTO_CREATE) 529 return; 530 mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state); 531 if (table_entry->state != rpl_slave_state::GTID_POS_AUTO_CREATE) 532 { 533 mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state); 534 return; 535 } 536 table_entry->state= rpl_slave_state::GTID_POS_CREATE_REQUESTED; 537 mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state); 538 539 mysql_manager_submit(bg_gtid_pos_auto_create, table_entry->table_hton); 540 } 541 542 543 /* 544 Request the manager thread to delete no longer used rows from the 545 mysql.gtid_slave_pos* tables. 546 */ 547 void slave_background_gtid_pending_delete_request(void) 548 { 549 mysql_manager_submit(bg_gtid_delete_pending, NULL); 550 } 551 552 553 /* Initialize slave structures */ 554 555 int init_slave() 556 { 557 DBUG_ENTER("init_slave"); 558 int error= 0; 559 560 #ifdef HAVE_PSI_INTERFACE 561 init_slave_psi_keys(); 562 #endif 563 564 if (global_rpl_thread_pool.init(opt_slave_parallel_threads)) 565 return 1; 566 567 slave_background_thread_gtid_loaded= false; 568 mysql_manager_submit(bg_rpl_load_gtid_slave_state, NULL); 569 570 // hijacking global_rpl_thread_pool cond here - it's only once on startup 571 mysql_mutex_lock(&global_rpl_thread_pool.LOCK_rpl_thread_pool); 572 while (!slave_background_thread_gtid_loaded) 573 mysql_cond_wait(&global_rpl_thread_pool.COND_rpl_thread_pool, 574 &global_rpl_thread_pool.LOCK_rpl_thread_pool); 575 mysql_mutex_unlock(&global_rpl_thread_pool.LOCK_rpl_thread_pool); 576 577 /* 578 This is called when mysqld starts. Before client connections are 579 accepted. However bootstrap may conflict with us if it does START SLAVE. 580 So it's safer to take the lock. 581 */ 582 583 if (pthread_key_create(&RPL_MASTER_INFO, NULL)) 584 goto err; 585 586 master_info_index= new Master_info_index; 587 if (!master_info_index || master_info_index->init_all_master_info()) 588 { 589 sql_print_error("Failed to initialize multi master structures"); 590 DBUG_RETURN(1); 591 } 592 if (!(active_mi= new Master_info(&default_master_connection_name, 593 relay_log_recovery)) || 594 active_mi->error()) 595 { 596 delete active_mi; 597 active_mi= 0; 598 sql_print_error("Failed to allocate memory for the Master Info structure"); 599 goto err; 600 } 601 602 if (master_info_index->add_master_info(active_mi, FALSE)) 603 { 604 delete active_mi; 605 active_mi= 0; 606 goto err; 607 } 608 609 /* 610 If master_host is not specified, try to read it from the master_info file. 611 If master_host is specified, create the master_info file if it doesn't 612 exists. 613 */ 614 615 if (init_master_info(active_mi,master_info_file,relay_log_info_file, 616 1, (SLAVE_IO | SLAVE_SQL))) 617 { 618 sql_print_error("Failed to initialize the master info structure"); 619 goto err; 620 } 621 622 /* If server id is not set, start_slave_thread() will say it */ 623 624 if (active_mi->host[0] && !opt_skip_slave_start) 625 { 626 int error; 627 THD *thd= new THD(next_thread_id()); 628 thd->thread_stack= (char*) &thd; 629 thd->store_globals(); 630 631 error= start_slave_threads(0, /* No active thd */ 632 1 /* need mutex */, 633 1 /* wait for start*/, 634 active_mi, 635 master_info_file, 636 relay_log_info_file, 637 SLAVE_IO | SLAVE_SQL); 638 639 thd->reset_globals(); 640 delete thd; 641 if (unlikely(error)) 642 { 643 sql_print_error("Failed to create slave threads"); 644 goto err; 645 } 646 } 647 648 end: 649 DBUG_RETURN(error); 650 651 err: 652 error= 1; 653 goto end; 654 } 655 656 /* 657 Updates the master info based on the information stored in the 658 relay info and ignores relay logs previously retrieved by the IO 659 thread, which thus starts fetching again based on to the 660 group_master_log_pos and group_master_log_name. Eventually, the old 661 relay logs will be purged by the normal purge mechanism. 662 663 In the feature, we should improve this routine in order to avoid throwing 664 away logs that are safely stored in the disk. Note also that this recovery 665 routine relies on the correctness of the relay-log.info and only tolerates 666 coordinate problems in master.info. 667 668 In this function, there is no need for a mutex as the caller 669 (i.e. init_slave) already has one acquired. 670 671 Specifically, the following structures are updated: 672 673 1 - mi->master_log_pos <-- rli->group_master_log_pos 674 2 - mi->master_log_name <-- rli->group_master_log_name 675 3 - It moves the relay log to the new relay log file, by 676 rli->group_relay_log_pos <-- BIN_LOG_HEADER_SIZE; 677 rli->event_relay_log_pos <-- BIN_LOG_HEADER_SIZE; 678 rli->group_relay_log_name <-- rli->relay_log.get_log_fname(); 679 rli->event_relay_log_name <-- rli->relay_log.get_log_fname(); 680 681 If there is an error, it returns (1), otherwise returns (0). 682 */ 683 int init_recovery(Master_info* mi, const char** errmsg) 684 { 685 DBUG_ENTER("init_recovery"); 686 687 Relay_log_info *rli= &mi->rli; 688 if (rli->group_master_log_name[0]) 689 { 690 mi->master_log_pos= MY_MAX(BIN_LOG_HEADER_SIZE, 691 rli->group_master_log_pos); 692 strmake_buf(mi->master_log_name, rli->group_master_log_name); 693 694 sql_print_warning("Recovery from master pos %ld and file %s.", 695 (ulong) mi->master_log_pos, mi->master_log_name); 696 697 strmake_buf(rli->group_relay_log_name, rli->relay_log.get_log_fname()); 698 strmake_buf(rli->event_relay_log_name, rli->relay_log.get_log_fname()); 699 700 rli->group_relay_log_pos= rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE; 701 } 702 703 DBUG_RETURN(0); 704 } 705 706 707 /** 708 Convert slave skip errors bitmap into a printable string. 709 */ 710 711 static void make_slave_skip_errors_printable(void) 712 { 713 /* 714 To be safe, we want 10 characters of room in the buffer for a number 715 plus terminators. Also, we need some space for constant strings. 716 10 characters must be sufficient for a number plus {',' | '...'} 717 plus a NUL terminator. That is a max 6 digit number. 718 */ 719 const size_t MIN_ROOM= 10; 720 DBUG_ENTER("make_slave_skip_errors_printable"); 721 DBUG_ASSERT(sizeof(slave_skip_error_names) > MIN_ROOM); 722 DBUG_ASSERT(MAX_SLAVE_ERROR <= 999999); // 6 digits 723 724 /* Make @@slave_skip_errors show the nice human-readable value. */ 725 opt_slave_skip_errors= slave_skip_error_names; 726 727 if (!use_slave_mask || bitmap_is_clear_all(&slave_error_mask)) 728 { 729 /* purecov: begin tested */ 730 memcpy(slave_skip_error_names, STRING_WITH_LEN("OFF")); 731 /* purecov: end */ 732 } 733 else if (bitmap_is_set_all(&slave_error_mask)) 734 { 735 /* purecov: begin tested */ 736 memcpy(slave_skip_error_names, STRING_WITH_LEN("ALL")); 737 /* purecov: end */ 738 } 739 else 740 { 741 char *buff= slave_skip_error_names; 742 char *bend= buff + sizeof(slave_skip_error_names) - MIN_ROOM; 743 int errnum; 744 745 for (errnum= 0; errnum < MAX_SLAVE_ERROR; errnum++) 746 { 747 if (bitmap_is_set(&slave_error_mask, errnum)) 748 { 749 if (buff >= bend) 750 break; /* purecov: tested */ 751 buff= int10_to_str(errnum, buff, 10); 752 *buff++= ','; 753 } 754 } 755 if (buff != slave_skip_error_names) 756 buff--; // Remove last ',' 757 if (errnum < MAX_SLAVE_ERROR) 758 { 759 /* Couldn't show all errors */ 760 buff= strmov(buff, "..."); /* purecov: tested */ 761 } 762 *buff=0; 763 } 764 DBUG_PRINT("init", ("error_names: '%s'", slave_skip_error_names)); 765 DBUG_VOID_RETURN; 766 } 767 768 /* 769 Init function to set up array for errors that should be skipped for slave 770 771 SYNOPSIS 772 init_slave_skip_errors() 773 arg List of errors numbers to skip, separated with ',' 774 775 NOTES 776 Called from get_options() in mysqld.cc on start-up 777 */ 778 779 bool init_slave_skip_errors(const char* arg) 780 { 781 const char *p; 782 DBUG_ENTER("init_slave_skip_errors"); 783 784 if (!arg || !*arg) // No errors defined 785 goto end; 786 787 if (unlikely(my_bitmap_init(&slave_error_mask,0,MAX_SLAVE_ERROR,0))) 788 DBUG_RETURN(1); 789 790 use_slave_mask= 1; 791 for (;my_isspace(system_charset_info,*arg);++arg) 792 /* empty */; 793 if (!my_strnncoll(system_charset_info,(uchar*)arg,4,(const uchar*)"all",4)) 794 { 795 bitmap_set_all(&slave_error_mask); 796 goto end; 797 } 798 for (p= arg ; *p; ) 799 { 800 long err_code; 801 if (!(p= str2int(p, 10, 0, LONG_MAX, &err_code))) 802 break; 803 if (err_code < MAX_SLAVE_ERROR) 804 bitmap_set_bit(&slave_error_mask,(uint)err_code); 805 while (!my_isdigit(system_charset_info,*p) && *p) 806 p++; 807 } 808 809 end: 810 make_slave_skip_errors_printable(); 811 DBUG_RETURN(0); 812 } 813 814 /** 815 Make printable version if slave_transaction_retry_errors 816 This is never empty as at least ER_LOCK_DEADLOCK and ER_LOCK_WAIT_TIMEOUT 817 will be there 818 */ 819 820 static void make_slave_transaction_retry_errors_printable(void) 821 { 822 /* 823 To be safe, we want 10 characters of room in the buffer for a number 824 plus terminators. Also, we need some space for constant strings. 825 10 characters must be sufficient for a number plus {',' | '...'} 826 plus a NUL terminator. That is a max 6 digit number. 827 */ 828 const size_t MIN_ROOM= 10; 829 char *buff= slave_transaction_retry_error_names; 830 char *bend= buff + sizeof(slave_transaction_retry_error_names) - MIN_ROOM; 831 uint i; 832 DBUG_ENTER("make_slave_transaction_retry_errors_printable"); 833 DBUG_ASSERT(sizeof(slave_transaction_retry_error_names) > MIN_ROOM); 834 835 /* Make @@slave_transaction_retry_errors show a human-readable value */ 836 opt_slave_transaction_retry_errors= slave_transaction_retry_error_names; 837 838 for (i= 0; i < slave_transaction_retry_error_length && buff < bend; i++) 839 { 840 buff= int10_to_str(slave_transaction_retry_errors[i], buff, 10); 841 *buff++= ','; 842 } 843 if (buff != slave_transaction_retry_error_names) 844 buff--; // Remove last ',' 845 if (i < slave_transaction_retry_error_length) 846 { 847 /* Couldn't show all errors */ 848 buff= strmov(buff, "..."); /* purecov: tested */ 849 } 850 *buff=0; 851 DBUG_PRINT("exit", ("error_names: '%s'", 852 slave_transaction_retry_error_names)); 853 DBUG_VOID_RETURN; 854 } 855 856 857 #define DEFAULT_SLAVE_RETRY_ERRORS 9 858 859 bool init_slave_transaction_retry_errors(const char* arg) 860 { 861 const char *p; 862 long err_code; 863 uint i; 864 DBUG_ENTER("init_slave_transaction_retry_errors"); 865 866 /* Handle empty strings */ 867 if (!arg) 868 arg= ""; 869 870 slave_transaction_retry_error_length= DEFAULT_SLAVE_RETRY_ERRORS; 871 for (;my_isspace(system_charset_info,*arg);++arg) 872 /* empty */; 873 for (p= arg; *p; ) 874 { 875 if (!(p= str2int(p, 10, 0, LONG_MAX, &err_code))) 876 break; 877 slave_transaction_retry_error_length++; 878 while (!my_isdigit(system_charset_info,*p) && *p) 879 p++; 880 } 881 882 if (unlikely(!(slave_transaction_retry_errors= 883 (uint *) my_once_alloc(sizeof(int) * 884 slave_transaction_retry_error_length, 885 MYF(MY_WME))))) 886 DBUG_RETURN(1); 887 888 /* 889 Temporary error codes: 890 currently, InnoDB deadlock detected by InnoDB or lock 891 wait timeout (innodb_lock_wait_timeout exceeded 892 */ 893 slave_transaction_retry_errors[0]= ER_NET_READ_ERROR; 894 slave_transaction_retry_errors[1]= ER_NET_READ_INTERRUPTED; 895 slave_transaction_retry_errors[2]= ER_NET_ERROR_ON_WRITE; 896 slave_transaction_retry_errors[3]= ER_NET_WRITE_INTERRUPTED; 897 slave_transaction_retry_errors[4]= ER_LOCK_WAIT_TIMEOUT; 898 slave_transaction_retry_errors[5]= ER_LOCK_DEADLOCK; 899 slave_transaction_retry_errors[6]= ER_CONNECT_TO_FOREIGN_DATA_SOURCE; 900 slave_transaction_retry_errors[7]= 2013; /* CR_SERVER_LOST */ 901 slave_transaction_retry_errors[8]= 12701; /* ER_SPIDER_REMOTE_SERVER_GONE_AWAY_NUM */ 902 903 /* Add user codes after this */ 904 for (p= arg, i= DEFAULT_SLAVE_RETRY_ERRORS; *p; ) 905 { 906 if (!(p= str2int(p, 10, 0, LONG_MAX, &err_code))) 907 break; 908 if (err_code > 0) 909 slave_transaction_retry_errors[i++]= (uint) err_code; 910 while (!my_isdigit(system_charset_info,*p) && *p) 911 p++; 912 } 913 slave_transaction_retry_error_length= i; 914 915 make_slave_transaction_retry_errors_printable(); 916 DBUG_RETURN(0); 917 } 918 919 920 int terminate_slave_threads(Master_info* mi,int thread_mask,bool skip_lock) 921 { 922 DBUG_ENTER("terminate_slave_threads"); 923 924 if (!mi->inited) 925 DBUG_RETURN(0); /* successfully do nothing */ 926 int error,force_all = (thread_mask & SLAVE_FORCE_ALL); 927 int retval= 0; 928 mysql_mutex_t *sql_lock = &mi->rli.run_lock, *io_lock = &mi->run_lock; 929 mysql_mutex_t *log_lock= mi->rli.relay_log.get_log_lock(); 930 931 if (thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL)) 932 { 933 DBUG_PRINT("info",("Terminating SQL thread")); 934 if (mi->using_parallel() && mi->rli.abort_slave && mi->rli.stop_for_until) 935 { 936 mi->rli.stop_for_until= false; 937 mi->rli.parallel.stop_during_until(); 938 } 939 else 940 mi->rli.abort_slave=1; 941 if (unlikely((error= terminate_slave_thread(mi->rli.sql_driver_thd, 942 sql_lock, 943 &mi->rli.stop_cond, 944 &mi->rli.slave_running, 945 skip_lock))) && 946 !force_all) 947 DBUG_RETURN(error); 948 retval= error; 949 950 mysql_mutex_lock(log_lock); 951 952 DBUG_PRINT("info",("Flushing relay-log info file.")); 953 if (current_thd) 954 THD_STAGE_INFO(current_thd, stage_flushing_relay_log_info_file); 955 if (mi->rli.flush() || my_sync(mi->rli.info_fd, MYF(MY_WME))) 956 retval= ER_ERROR_DURING_FLUSH_LOGS; 957 958 mysql_mutex_unlock(log_lock); 959 } 960 if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL)) 961 { 962 DBUG_PRINT("info",("Terminating IO thread")); 963 mi->abort_slave=1; 964 if (unlikely((error= terminate_slave_thread(mi->io_thd, io_lock, 965 &mi->stop_cond, 966 &mi->slave_running, 967 skip_lock))) && 968 !force_all) 969 DBUG_RETURN(error); 970 if (!retval) 971 retval= error; 972 973 mysql_mutex_lock(log_lock); 974 975 DBUG_PRINT("info",("Flushing relay log and master info file.")); 976 if (current_thd) 977 THD_STAGE_INFO(current_thd, stage_flushing_relay_log_and_master_info_repository); 978 if (likely(mi->fd >= 0)) 979 { 980 if (flush_master_info(mi, TRUE, FALSE) || my_sync(mi->fd, MYF(MY_WME))) 981 retval= ER_ERROR_DURING_FLUSH_LOGS; 982 } 983 if (mi->rli.relay_log.is_open() && 984 my_sync(mi->rli.relay_log.get_log_file()->file, MYF(MY_WME))) 985 retval= ER_ERROR_DURING_FLUSH_LOGS; 986 987 mysql_mutex_unlock(log_lock); 988 } 989 DBUG_RETURN(retval); 990 } 991 992 993 /** 994 Wait for a slave thread to terminate. 995 996 This function is called after requesting the thread to terminate 997 (by setting @c abort_slave member of @c Relay_log_info or @c 998 Master_info structure to 1). Termination of the thread is 999 controlled with the the predicate <code>*slave_running</code>. 1000 1001 Function will acquire @c term_lock before waiting on the condition 1002 unless @c skip_lock is true in which case the mutex should be owned 1003 by the caller of this function and will remain acquired after 1004 return from the function. 1005 1006 @param term_lock 1007 Associated lock to use when waiting for @c term_cond 1008 1009 @param term_cond 1010 Condition that is signalled when the thread has terminated 1011 1012 @param slave_running 1013 Pointer to predicate to check for slave thread termination 1014 1015 @param skip_lock 1016 If @c true the lock will not be acquired before waiting on 1017 the condition. In this case, it is assumed that the calling 1018 function acquires the lock before calling this function. 1019 1020 @retval 0 All OK ER_SLAVE_NOT_RUNNING otherwise. 1021 1022 @note If the executing thread has to acquire term_lock (skip_lock 1023 is false), the negative running status does not represent 1024 any issue therefore no error is reported. 1025 1026 */ 1027 static int 1028 terminate_slave_thread(THD *thd, 1029 mysql_mutex_t *term_lock, 1030 mysql_cond_t *term_cond, 1031 volatile uint *slave_running, 1032 bool skip_lock) 1033 { 1034 DBUG_ENTER("terminate_slave_thread"); 1035 if (!skip_lock) 1036 { 1037 mysql_mutex_lock(term_lock); 1038 } 1039 else 1040 { 1041 mysql_mutex_assert_owner(term_lock); 1042 } 1043 if (!*slave_running) 1044 { 1045 if (!skip_lock) 1046 { 1047 /* 1048 if run_lock (term_lock) is acquired locally then either 1049 slave_running status is fine 1050 */ 1051 mysql_mutex_unlock(term_lock); 1052 DBUG_RETURN(0); 1053 } 1054 else 1055 { 1056 DBUG_RETURN(ER_SLAVE_NOT_RUNNING); 1057 } 1058 } 1059 DBUG_ASSERT(thd != 0); 1060 THD_CHECK_SENTRY(thd); 1061 1062 /* 1063 Is is critical to test if the slave is running. Otherwise, we might 1064 be referening freed memory trying to kick it 1065 */ 1066 1067 while (*slave_running) // Should always be true 1068 { 1069 int error __attribute__((unused)); 1070 DBUG_PRINT("loop", ("killing slave thread")); 1071 1072 mysql_mutex_lock(&thd->LOCK_thd_kill); 1073 mysql_mutex_lock(&thd->LOCK_thd_data); 1074 #ifndef DONT_USE_THR_ALARM 1075 /* 1076 Error codes from pthread_kill are: 1077 EINVAL: invalid signal number (can't happen) 1078 ESRCH: thread already killed (can happen, should be ignored) 1079 */ 1080 int err __attribute__((unused))= pthread_kill(thd->real_id, thr_client_alarm); 1081 DBUG_ASSERT(err != EINVAL); 1082 #endif 1083 thd->awake_no_mutex(NOT_KILLED); 1084 1085 mysql_mutex_unlock(&thd->LOCK_thd_kill); 1086 mysql_mutex_unlock(&thd->LOCK_thd_data); 1087 1088 /* 1089 There is a small chance that slave thread might miss the first 1090 alarm. To protect againts it, resend the signal until it reacts 1091 */ 1092 struct timespec abstime; 1093 set_timespec(abstime,2); 1094 error= mysql_cond_timedwait(term_cond, term_lock, &abstime); 1095 DBUG_ASSERT(error == ETIMEDOUT || error == 0); 1096 } 1097 1098 DBUG_ASSERT(*slave_running == 0); 1099 1100 if (!skip_lock) 1101 mysql_mutex_unlock(term_lock); 1102 DBUG_RETURN(0); 1103 } 1104 1105 1106 int start_slave_thread( 1107 #ifdef HAVE_PSI_INTERFACE 1108 PSI_thread_key thread_key, 1109 #endif 1110 pthread_handler h_func, mysql_mutex_t *start_lock, 1111 mysql_mutex_t *cond_lock, 1112 mysql_cond_t *start_cond, 1113 volatile uint *slave_running, 1114 volatile ulong *slave_run_id, 1115 Master_info* mi) 1116 { 1117 pthread_t th; 1118 ulong start_id; 1119 int error; 1120 DBUG_ENTER("start_slave_thread"); 1121 1122 DBUG_ASSERT(mi->inited); 1123 1124 if (start_lock) 1125 mysql_mutex_lock(start_lock); 1126 if (!global_system_variables.server_id) 1127 { 1128 if (start_cond) 1129 mysql_cond_broadcast(start_cond); 1130 if (start_lock) 1131 mysql_mutex_unlock(start_lock); 1132 sql_print_error("Server id not set, will not start slave"); 1133 DBUG_RETURN(ER_BAD_SLAVE); 1134 } 1135 1136 if (*slave_running) 1137 { 1138 if (start_cond) 1139 mysql_cond_broadcast(start_cond); 1140 if (start_lock) 1141 mysql_mutex_unlock(start_lock); 1142 DBUG_RETURN(ER_SLAVE_MUST_STOP); 1143 } 1144 start_id= *slave_run_id; 1145 DBUG_PRINT("info",("Creating new slave thread")); 1146 if (unlikely((error= mysql_thread_create(thread_key, 1147 &th, &connection_attrib, h_func, 1148 (void*)mi)))) 1149 { 1150 sql_print_error("Can't create slave thread (errno= %d).", error); 1151 if (start_lock) 1152 mysql_mutex_unlock(start_lock); 1153 DBUG_RETURN(ER_SLAVE_THREAD); 1154 } 1155 1156 /* 1157 In the following loop we can't check for thd->killed as we have to 1158 wait until THD structures for the slave thread are created 1159 before we can return. 1160 This should be ok as there is no major work done in the slave 1161 threads before they signal that we can stop waiting. 1162 */ 1163 1164 if (start_cond && cond_lock) // caller has cond_lock 1165 { 1166 THD* thd = current_thd; 1167 while (start_id == *slave_run_id) 1168 { 1169 DBUG_PRINT("sleep",("Waiting for slave thread to start")); 1170 PSI_stage_info saved_stage= {0, "", 0}; 1171 thd->ENTER_COND(start_cond, cond_lock, 1172 & stage_waiting_for_slave_thread_to_start, 1173 & saved_stage); 1174 /* 1175 It is not sufficient to test this at loop bottom. We must test 1176 it after registering the mutex in enter_cond(). If the kill 1177 happens after testing of thd->killed and before the mutex is 1178 registered, we could otherwise go waiting though thd->killed is 1179 set. 1180 */ 1181 mysql_cond_wait(start_cond, cond_lock); 1182 thd->EXIT_COND(& saved_stage); 1183 mysql_mutex_lock(cond_lock); // re-acquire it as exit_cond() released 1184 } 1185 } 1186 if (start_lock) 1187 mysql_mutex_unlock(start_lock); 1188 DBUG_RETURN(0); 1189 } 1190 1191 1192 /* 1193 start_slave_threads() 1194 1195 NOTES 1196 SLAVE_FORCE_ALL is not implemented here on purpose since it does not make 1197 sense to do that for starting a slave--we always care if it actually 1198 started the threads that were not previously running 1199 */ 1200 1201 int start_slave_threads(THD *thd, 1202 bool need_slave_mutex, bool wait_for_start, 1203 Master_info* mi, const char* master_info_fname, 1204 const char* slave_info_fname, int thread_mask) 1205 { 1206 mysql_mutex_t *lock_io=0, *lock_sql=0, *lock_cond_io=0, *lock_cond_sql=0; 1207 mysql_cond_t* cond_io=0, *cond_sql=0; 1208 int error=0; 1209 const char *errmsg; 1210 DBUG_ENTER("start_slave_threads"); 1211 1212 if (need_slave_mutex) 1213 { 1214 lock_io = &mi->run_lock; 1215 lock_sql = &mi->rli.run_lock; 1216 } 1217 if (wait_for_start) 1218 { 1219 cond_io = &mi->start_cond; 1220 cond_sql = &mi->rli.start_cond; 1221 lock_cond_io = &mi->run_lock; 1222 lock_cond_sql = &mi->rli.run_lock; 1223 } 1224 1225 /* 1226 If we are using GTID and both SQL and IO threads are stopped, then get 1227 rid of all relay logs. 1228 1229 Relay logs are not very useful when using GTID, except as a buffer 1230 between the fetch in the IO thread and the apply in SQL thread. However 1231 while one of the threads is running, they are in use and cannot be 1232 removed. 1233 */ 1234 if (mi->using_gtid != Master_info::USE_GTID_NO && 1235 !mi->slave_running && !mi->rli.slave_running) 1236 { 1237 /* 1238 purge_relay_logs() clears the mi->rli.group_master_log_pos. 1239 So save and restore them, like we do in CHANGE MASTER. 1240 (We are not going to use them for GTID, but it might be worth to 1241 keep them in case connection with GTID fails and user wants to go 1242 back and continue with previous old-style replication coordinates). 1243 */ 1244 mi->master_log_pos = MY_MAX(BIN_LOG_HEADER_SIZE, 1245 mi->rli.group_master_log_pos); 1246 strmake(mi->master_log_name, mi->rli.group_master_log_name, 1247 sizeof(mi->master_log_name)-1); 1248 purge_relay_logs(&mi->rli, thd, 0, &errmsg); 1249 mi->rli.group_master_log_pos= mi->master_log_pos; 1250 strmake(mi->rli.group_master_log_name, mi->master_log_name, 1251 sizeof(mi->rli.group_master_log_name)-1); 1252 1253 error= rpl_load_gtid_state(&mi->gtid_current_pos, mi->using_gtid == 1254 Master_info::USE_GTID_CURRENT_POS); 1255 mi->events_queued_since_last_gtid= 0; 1256 mi->gtid_reconnect_event_skip_count= 0; 1257 1258 mi->rli.restart_gtid_pos.reset(); 1259 } 1260 1261 if (likely(!error) && likely((thread_mask & SLAVE_IO))) 1262 error= start_slave_thread( 1263 #ifdef HAVE_PSI_INTERFACE 1264 key_thread_slave_io, 1265 #endif 1266 handle_slave_io, lock_io, lock_cond_io, 1267 cond_io, 1268 &mi->slave_running, &mi->slave_run_id, 1269 mi); 1270 if (likely(!error) && likely(thread_mask & SLAVE_SQL)) 1271 { 1272 error= start_slave_thread( 1273 #ifdef HAVE_PSI_INTERFACE 1274 key_thread_slave_sql, 1275 #endif 1276 handle_slave_sql, lock_sql, lock_cond_sql, 1277 cond_sql, 1278 &mi->rli.slave_running, &mi->rli.slave_run_id, 1279 mi); 1280 if (unlikely(error)) 1281 terminate_slave_threads(mi, thread_mask & SLAVE_IO, !need_slave_mutex); 1282 } 1283 DBUG_RETURN(error); 1284 } 1285 1286 1287 /* 1288 Kill slaves preparing for shutdown 1289 */ 1290 1291 void slave_prepare_for_shutdown() 1292 { 1293 mysql_mutex_lock(&LOCK_active_mi); 1294 master_info_index->free_connections(); 1295 mysql_mutex_unlock(&LOCK_active_mi); 1296 // It's safe to destruct worker pool now when 1297 // all driver threads are gone. 1298 global_rpl_thread_pool.deactivate(); 1299 } 1300 1301 /* 1302 Release slave threads at time of executing shutdown. 1303 */ 1304 1305 void end_slave() 1306 { 1307 DBUG_ENTER("end_slave"); 1308 1309 /* 1310 This is called when the server terminates, in close_connections(). 1311 It terminates slave threads. However, some CHANGE MASTER etc may still be 1312 running presently. If a START SLAVE was in progress, the mutex lock below 1313 will make us wait until slave threads have started, and START SLAVE 1314 returns, then we terminate them here. 1315 1316 We can also be called by cleanup(), which only happens if some 1317 startup parameter to the server was wrong. 1318 */ 1319 mysql_mutex_lock(&LOCK_active_mi); 1320 /* 1321 master_info_index should not have any threads anymore as they where 1322 killed as part of slave_prepare_for_shutdown() 1323 */ 1324 delete master_info_index; 1325 master_info_index= 0; 1326 active_mi= 0; 1327 mysql_mutex_unlock(&LOCK_active_mi); 1328 1329 global_rpl_thread_pool.destroy(); 1330 free_all_rpl_filters(); 1331 DBUG_VOID_RETURN; 1332 } 1333 1334 static bool io_slave_killed(Master_info* mi) 1335 { 1336 DBUG_ENTER("io_slave_killed"); 1337 1338 DBUG_ASSERT(mi->slave_running); // tracking buffer overrun 1339 DBUG_RETURN(mi->abort_slave || mi->io_thd->killed); 1340 } 1341 1342 /** 1343 The function analyzes a possible killed status and makes 1344 a decision whether to accept it or not. 1345 Normally upon accepting the sql thread goes to shutdown. 1346 In the event of deffering decision @rli->last_event_start_time waiting 1347 timer is set to force the killed status be accepted upon its expiration. 1348 1349 @param thd pointer to a THD instance 1350 @param rli pointer to Relay_log_info instance 1351 1352 @return TRUE the killed status is recognized, FALSE a possible killed 1353 status is deferred. 1354 */ 1355 static bool sql_slave_killed(rpl_group_info *rgi) 1356 { 1357 bool ret= FALSE; 1358 Relay_log_info *rli= rgi->rli; 1359 THD *thd= rgi->thd; 1360 DBUG_ENTER("sql_slave_killed"); 1361 1362 DBUG_ASSERT(rli->sql_driver_thd == thd); 1363 DBUG_ASSERT(rli->slave_running == 1);// tracking buffer overrun 1364 if (rli->sql_driver_thd->killed || rli->abort_slave) 1365 { 1366 /* 1367 The transaction should always be binlogged if OPTION_KEEP_LOG is 1368 set (it implies that something can not be rolled back). And such 1369 case should be regarded similarly as modifing a 1370 non-transactional table because retrying of the transaction will 1371 lead to an error or inconsistency as well. 1372 1373 Example: OPTION_KEEP_LOG is set if a temporary table is created 1374 or dropped. 1375 1376 Note that transaction.all.modified_non_trans_table may be 1 1377 if last statement was a single row transaction without begin/end. 1378 Testing this flag must always be done in connection with 1379 rli->is_in_group(). 1380 */ 1381 1382 if ((thd->transaction.all.modified_non_trans_table || 1383 (thd->variables.option_bits & OPTION_KEEP_LOG)) && 1384 rli->is_in_group()) 1385 { 1386 char msg_stopped[]= 1387 "... Slave SQL Thread stopped with incomplete event group " 1388 "having non-transactional changes. " 1389 "If the group consists solely of row-based events, you can try " 1390 "to restart the slave with --slave-exec-mode=IDEMPOTENT, which " 1391 "ignores duplicate key, key not found, and similar errors (see " 1392 "documentation for details)."; 1393 1394 DBUG_PRINT("info", ("modified_non_trans_table: %d OPTION_BEGIN: %d " 1395 "OPTION_KEEP_LOG: %d is_in_group: %d", 1396 thd->transaction.all.modified_non_trans_table, 1397 MY_TEST(thd->variables.option_bits & OPTION_BEGIN), 1398 MY_TEST(thd->variables.option_bits & OPTION_KEEP_LOG), 1399 rli->is_in_group())); 1400 1401 if (rli->abort_slave) 1402 { 1403 DBUG_PRINT("info", 1404 ("Request to stop slave SQL Thread received while " 1405 "applying a group that has non-transactional " 1406 "changes; waiting for completion of the group ... ")); 1407 1408 /* 1409 Slave sql thread shutdown in face of unfinished group 1410 modified Non-trans table is handled via a timer. The slave 1411 may eventually give out to complete the current group and in 1412 that case there might be issues at consequent slave restart, 1413 see the error message. WL#2975 offers a robust solution 1414 requiring to store the last exectuted event's coordinates 1415 along with the group's coordianates instead of waiting with 1416 @c last_event_start_time the timer. 1417 */ 1418 1419 if (rgi->last_event_start_time == 0) 1420 rgi->last_event_start_time= my_time(0); 1421 ret= difftime(my_time(0), rgi->last_event_start_time) <= 1422 SLAVE_WAIT_GROUP_DONE ? FALSE : TRUE; 1423 1424 DBUG_EXECUTE_IF("stop_slave_middle_group", 1425 DBUG_EXECUTE_IF("incomplete_group_in_relay_log", 1426 ret= TRUE;);); // time is over 1427 1428 if (ret == 0) 1429 { 1430 rli->report(WARNING_LEVEL, 0, rgi->gtid_info(), 1431 "Request to stop slave SQL Thread received while " 1432 "applying a group that has non-transactional " 1433 "changes; waiting for completion of the group ... "); 1434 } 1435 else 1436 { 1437 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, rgi->gtid_info(), 1438 ER_THD(thd, ER_SLAVE_FATAL_ERROR), msg_stopped); 1439 } 1440 } 1441 else 1442 { 1443 ret= TRUE; 1444 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, rgi->gtid_info(), 1445 ER_THD(thd, ER_SLAVE_FATAL_ERROR), 1446 msg_stopped); 1447 } 1448 } 1449 else 1450 { 1451 ret= TRUE; 1452 } 1453 } 1454 if (ret) 1455 rgi->last_event_start_time= 0; 1456 1457 DBUG_RETURN(ret); 1458 } 1459 1460 1461 /* 1462 skip_load_data_infile() 1463 1464 NOTES 1465 This is used to tell a 3.23 master to break send_file() 1466 */ 1467 1468 void skip_load_data_infile(NET *net) 1469 { 1470 DBUG_ENTER("skip_load_data_infile"); 1471 1472 (void)net_request_file(net, "/dev/null"); 1473 (void)my_net_read(net); // discard response 1474 (void)net_write_command(net, 0, (uchar*) "", 0, (uchar*) "", 0); // ok 1475 DBUG_VOID_RETURN; 1476 } 1477 1478 1479 bool net_request_file(NET* net, const char* fname) 1480 { 1481 DBUG_ENTER("net_request_file"); 1482 DBUG_RETURN(net_write_command(net, 251, (uchar*) fname, strlen(fname), 1483 (uchar*) "", 0)); 1484 } 1485 1486 /* 1487 From other comments and tests in code, it looks like 1488 sometimes Query_log_event and Load_log_event can have db == 0 1489 (see rewrite_db() above for example) 1490 (cases where this happens are unclear; it may be when the master is 3.23). 1491 */ 1492 1493 const char *print_slave_db_safe(const char* db) 1494 { 1495 DBUG_ENTER("*print_slave_db_safe"); 1496 1497 DBUG_RETURN((db ? db : "")); 1498 } 1499 1500 #endif /* HAVE_REPLICATION */ 1501 1502 int init_strvar_from_file(char *var, int max_size, IO_CACHE *f, 1503 const char *default_val) 1504 { 1505 size_t length; 1506 DBUG_ENTER("init_strvar_from_file"); 1507 1508 if ((length=my_b_gets(f,var, max_size))) 1509 { 1510 char* last_p = var + length -1; 1511 if (*last_p == '\n') 1512 *last_p = 0; // if we stopped on newline, kill it 1513 else 1514 { 1515 /* 1516 If we truncated a line or stopped on last char, remove all chars 1517 up to and including newline. 1518 */ 1519 int c; 1520 while (((c=my_b_get(f)) != '\n' && c != my_b_EOF)) ; 1521 } 1522 DBUG_RETURN(0); 1523 } 1524 else if (default_val) 1525 { 1526 strmake(var, default_val, max_size-1); 1527 DBUG_RETURN(0); 1528 } 1529 DBUG_RETURN(1); 1530 } 1531 1532 /* 1533 when moving these functions to mysys, don't forget to 1534 remove slave.cc from libmysqld/CMakeLists.txt 1535 */ 1536 int init_intvar_from_file(int* var, IO_CACHE* f, int default_val) 1537 { 1538 char buf[32]; 1539 DBUG_ENTER("init_intvar_from_file"); 1540 1541 1542 if (my_b_gets(f, buf, sizeof(buf))) 1543 { 1544 *var = atoi(buf); 1545 DBUG_RETURN(0); 1546 } 1547 else if (default_val) 1548 { 1549 *var = default_val; 1550 DBUG_RETURN(0); 1551 } 1552 DBUG_RETURN(1); 1553 } 1554 1555 int init_floatvar_from_file(float* var, IO_CACHE* f, float default_val) 1556 { 1557 char buf[16]; 1558 DBUG_ENTER("init_floatvar_from_file"); 1559 1560 1561 if (my_b_gets(f, buf, sizeof(buf))) 1562 { 1563 if (sscanf(buf, "%f", var) != 1) 1564 DBUG_RETURN(1); 1565 else 1566 DBUG_RETURN(0); 1567 } 1568 else if (default_val != 0.0) 1569 { 1570 *var = default_val; 1571 DBUG_RETURN(0); 1572 } 1573 DBUG_RETURN(1); 1574 } 1575 1576 1577 /** 1578 A master info read method 1579 1580 This function is called from @c init_master_info() along with 1581 relatives to restore some of @c active_mi members. 1582 Particularly, this function is responsible for restoring 1583 IGNORE_SERVER_IDS list of servers whose events the slave is 1584 going to ignore (to not log them in the relay log). 1585 Items being read are supposed to be decimal output of values of a 1586 type shorter or equal of @c long and separated by the single space. 1587 It also used to restore DO_DOMAIN_IDS & IGNORE_DOMAIN_IDS lists. 1588 1589 @param arr @c DYNAMIC_ARRAY pointer to storage for servers id 1590 @param f @c IO_CACHE pointer to the source file 1591 1592 @retval 0 All OK 1593 @retval non-zero An error 1594 */ 1595 1596 int init_dynarray_intvar_from_file(DYNAMIC_ARRAY* arr, IO_CACHE* f) 1597 { 1598 int ret= 0; 1599 char buf[16 * (sizeof(long)*4 + 1)]; // static buffer to use most of times 1600 char *buf_act= buf; // actual buffer can be dynamic if static is short 1601 char *token, *last; 1602 uint num_items; // number of items of `arr' 1603 size_t read_size; 1604 DBUG_ENTER("init_dynarray_intvar_from_file"); 1605 1606 if ((read_size= my_b_gets(f, buf_act, sizeof(buf))) == 0) 1607 { 1608 DBUG_RETURN(0); // no line in master.info 1609 } 1610 if (read_size + 1 == sizeof(buf) && buf[sizeof(buf) - 2] != '\n') 1611 { 1612 /* 1613 short read happend; allocate sufficient memory and make the 2nd read 1614 */ 1615 char buf_work[(sizeof(long)*3 + 1)*16]; 1616 memcpy(buf_work, buf, sizeof(buf_work)); 1617 num_items= atoi(strtok_r(buf_work, " ", &last)); 1618 size_t snd_size; 1619 /* 1620 max size lower bound approximate estimation bases on the formula: 1621 (the items number + items themselves) * 1622 (decimal size + space) - 1 + `\n' + '\0' 1623 */ 1624 size_t max_size= (1 + num_items) * (sizeof(long)*3 + 1) + 1; 1625 buf_act= (char*) my_malloc(max_size, MYF(MY_WME)); 1626 memcpy(buf_act, buf, read_size); 1627 snd_size= my_b_gets(f, buf_act + read_size, max_size - read_size); 1628 if (snd_size == 0 || 1629 ((snd_size + 1 == max_size - read_size) && buf_act[max_size - 2] != '\n')) 1630 { 1631 /* 1632 failure to make the 2nd read or short read again 1633 */ 1634 ret= 1; 1635 goto err; 1636 } 1637 } 1638 token= strtok_r(buf_act, " ", &last); 1639 if (token == NULL) 1640 { 1641 ret= 1; 1642 goto err; 1643 } 1644 num_items= atoi(token); 1645 for (uint i=0; i < num_items; i++) 1646 { 1647 token= strtok_r(NULL, " ", &last); 1648 if (token == NULL) 1649 { 1650 ret= 1; 1651 goto err; 1652 } 1653 else 1654 { 1655 ulong val= atol(token); 1656 insert_dynamic(arr, (uchar *) &val); 1657 } 1658 } 1659 err: 1660 if (buf_act != buf) 1661 my_free(buf_act); 1662 DBUG_RETURN(ret); 1663 } 1664 1665 #ifdef HAVE_REPLICATION 1666 1667 /* 1668 Check if the error is caused by network. 1669 @param[in] errorno Number of the error. 1670 RETURNS: 1671 TRUE network error 1672 FALSE not network error 1673 */ 1674 1675 bool is_network_error(uint errorno) 1676 { 1677 if (errorno == CR_CONNECTION_ERROR || 1678 errorno == CR_CONN_HOST_ERROR || 1679 errorno == CR_SERVER_GONE_ERROR || 1680 errorno == CR_SERVER_LOST || 1681 errorno == ER_CON_COUNT_ERROR || 1682 errorno == ER_CONNECTION_KILLED || 1683 errorno == ER_NEW_ABORTING_CONNECTION || 1684 errorno == ER_NET_READ_INTERRUPTED || 1685 errorno == ER_SERVER_SHUTDOWN) 1686 return TRUE; 1687 #ifdef WITH_WSREP 1688 if (errorno == ER_UNKNOWN_COM_ERROR) 1689 return TRUE; 1690 #endif 1691 1692 return FALSE; 1693 } 1694 1695 1696 /* 1697 Note that we rely on the master's version (3.23, 4.0.14 etc) instead of 1698 relying on the binlog's version. This is not perfect: imagine an upgrade 1699 of the master without waiting that all slaves are in sync with the master; 1700 then a slave could be fooled about the binlog's format. This is what happens 1701 when people upgrade a 3.23 master to 4.0 without doing RESET MASTER: 4.0 1702 slaves are fooled. So we do this only to distinguish between 3.23 and more 1703 recent masters (it's too late to change things for 3.23). 1704 1705 RETURNS 1706 0 ok 1707 1 error 1708 2 transient network problem, the caller should try to reconnect 1709 */ 1710 1711 static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi) 1712 { 1713 char err_buff[MAX_SLAVE_ERRMSG], err_buff2[MAX_SLAVE_ERRMSG]; 1714 const char* errmsg= 0; 1715 int err_code= 0; 1716 MYSQL_RES *master_res= 0; 1717 MYSQL_ROW master_row; 1718 uint version= mysql_get_server_version(mysql) / 10000; 1719 DBUG_ENTER("get_master_version_and_clock"); 1720 1721 /* 1722 Free old description_event_for_queue (that is needed if we are in 1723 a reconnection). 1724 */ 1725 delete mi->rli.relay_log.description_event_for_queue; 1726 mi->rli.relay_log.description_event_for_queue= 0; 1727 1728 if (!my_isdigit(&my_charset_bin,*mysql->server_version)) 1729 { 1730 errmsg= err_buff2; 1731 snprintf(err_buff2, sizeof(err_buff2), 1732 "Master reported unrecognized MySQL version: %s", 1733 mysql->server_version); 1734 err_code= ER_SLAVE_FATAL_ERROR; 1735 sprintf(err_buff, ER_DEFAULT(err_code), err_buff2); 1736 } 1737 else 1738 { 1739 /* 1740 Note the following switch will bug when we have MySQL branch 30 ;) 1741 */ 1742 switch (version) { 1743 case 0: 1744 case 1: 1745 case 2: 1746 errmsg= err_buff2; 1747 snprintf(err_buff2, sizeof(err_buff2), 1748 "Master reported unrecognized MySQL version: %s", 1749 mysql->server_version); 1750 err_code= ER_SLAVE_FATAL_ERROR; 1751 sprintf(err_buff, ER_DEFAULT(err_code), err_buff2); 1752 break; 1753 case 3: 1754 mi->rli.relay_log.description_event_for_queue= new 1755 Format_description_log_event(1, mysql->server_version); 1756 break; 1757 case 4: 1758 mi->rli.relay_log.description_event_for_queue= new 1759 Format_description_log_event(3, mysql->server_version); 1760 break; 1761 default: 1762 /* 1763 Master is MySQL >=5.0. Give a default Format_desc event, so that we can 1764 take the early steps (like tests for "is this a 3.23 master") which we 1765 have to take before we receive the real master's Format_desc which will 1766 override this one. Note that the Format_desc we create below is garbage 1767 (it has the format of the *slave*); it's only good to help know if the 1768 master is 3.23, 4.0, etc. 1769 */ 1770 mi->rli.relay_log.description_event_for_queue= new 1771 Format_description_log_event(4, mysql->server_version); 1772 break; 1773 } 1774 } 1775 1776 /* 1777 This does not mean that a 5.0 slave will be able to read a 6.0 master; but 1778 as we don't know yet, we don't want to forbid this for now. If a 5.0 slave 1779 can't read a 6.0 master, this will show up when the slave can't read some 1780 events sent by the master, and there will be error messages. 1781 */ 1782 1783 if (errmsg) 1784 goto err; 1785 1786 /* as we are here, we tried to allocate the event */ 1787 if (!mi->rli.relay_log.description_event_for_queue) 1788 { 1789 errmsg= "default Format_description_log_event"; 1790 err_code= ER_SLAVE_CREATE_EVENT_FAILURE; 1791 sprintf(err_buff, ER_DEFAULT(err_code), errmsg); 1792 goto err; 1793 } 1794 1795 /* 1796 FD_q's (A) is set initially from RL's (A): FD_q.(A) := RL.(A). 1797 It's necessary to adjust FD_q.(A) at this point because in the following 1798 course FD_q is going to be dumped to RL. 1799 Generally FD_q is derived from a received FD_m (roughly FD_q := FD_m) 1800 in queue_event and the master's (A) is installed. 1801 At one step with the assignment the Relay-Log's checksum alg is set to 1802 a new value: RL.(A) := FD_q.(A). If the slave service is stopped 1803 the last time assigned RL.(A) will be passed over to the restarting 1804 service (to the current execution point). 1805 RL.A is a "codec" to verify checksum in queue_event() almost all the time 1806 the first fake Rotate event. 1807 Starting from this point IO thread will executes the following checksum 1808 warmup sequence of actions: 1809 1810 FD_q.A := RL.A, 1811 A_m^0 := master.@@global.binlog_checksum, 1812 {queue_event(R_f): verifies(R_f, A_m^0)}, 1813 {queue_event(FD_m): verifies(FD_m, FD_m.A), dump(FD_q), rotate(RL), 1814 FD_q := FD_m, RL.A := FD_q.A)} 1815 1816 See legends definition on MYSQL_BIN_LOG::relay_log_checksum_alg 1817 docs lines (binlog.h). 1818 In above A_m^0 - the value of master's 1819 @@binlog_checksum determined in the upcoming handshake (stored in 1820 mi->checksum_alg_before_fd). 1821 1822 1823 After the warm-up sequence IO gets to "normal" checksum verification mode 1824 to use RL.A in 1825 1826 {queue_event(E_m): verifies(E_m, RL.A)} 1827 1828 until it has received a new FD_m. 1829 */ 1830 mi->rli.relay_log.description_event_for_queue->checksum_alg= 1831 mi->rli.relay_log.relay_log_checksum_alg; 1832 1833 DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg != 1834 BINLOG_CHECKSUM_ALG_UNDEF); 1835 DBUG_ASSERT(mi->rli.relay_log.relay_log_checksum_alg != 1836 BINLOG_CHECKSUM_ALG_UNDEF); 1837 /* 1838 Compare the master and slave's clock. Do not die if master's clock is 1839 unavailable (very old master not supporting UNIX_TIMESTAMP()?). 1840 */ 1841 1842 #ifdef ENABLED_DEBUG_SYNC 1843 DBUG_EXECUTE_IF("dbug.before_get_UNIX_TIMESTAMP", 1844 { 1845 const char act[]= 1846 "now " 1847 "wait_for signal.get_unix_timestamp"; 1848 DBUG_ASSERT(debug_sync_service); 1849 DBUG_ASSERT(!debug_sync_set_action(current_thd, 1850 STRING_WITH_LEN(act))); 1851 };); 1852 #endif 1853 1854 master_res= NULL; 1855 if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT UNIX_TIMESTAMP()")) && 1856 (master_res= mysql_store_result(mysql)) && 1857 (master_row= mysql_fetch_row(master_res))) 1858 { 1859 mysql_mutex_lock(&mi->data_lock); 1860 mi->clock_diff_with_master= 1861 (long) (time((time_t*) 0) - strtoul(master_row[0], 0, 10)); 1862 mysql_mutex_unlock(&mi->data_lock); 1863 } 1864 else if (check_io_slave_killed(mi, NULL)) 1865 goto slave_killed_err; 1866 else if (is_network_error(mysql_errno(mysql))) 1867 { 1868 mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL, 1869 "Get master clock failed with error: %s", mysql_error(mysql)); 1870 goto network_err; 1871 } 1872 else 1873 { 1874 mysql_mutex_lock(&mi->data_lock); 1875 mi->clock_diff_with_master= 0; /* The "most sensible" value */ 1876 mysql_mutex_unlock(&mi->data_lock); 1877 sql_print_warning("\"SELECT UNIX_TIMESTAMP()\" failed on master, " 1878 "do not trust column Seconds_Behind_Master of SHOW " 1879 "SLAVE STATUS. Error: %s (%d)", 1880 mysql_error(mysql), mysql_errno(mysql)); 1881 } 1882 if (master_res) 1883 { 1884 mysql_free_result(master_res); 1885 master_res= NULL; 1886 } 1887 1888 /* 1889 Check that the master's server id and ours are different. Because if they 1890 are equal (which can result from a simple copy of master's datadir to slave, 1891 thus copying some my.cnf), replication will work but all events will be 1892 skipped. 1893 Do not die if SHOW VARIABLES LIKE 'SERVER_ID' fails on master (very old 1894 master?). 1895 Note: we could have put a @@SERVER_ID in the previous SELECT 1896 UNIX_TIMESTAMP() instead, but this would not have worked on 3.23 masters. 1897 */ 1898 #ifdef ENABLED_DEBUG_SYNC 1899 DBUG_EXECUTE_IF("dbug.before_get_SERVER_ID", 1900 { 1901 const char act[]= 1902 "now " 1903 "wait_for signal.get_server_id"; 1904 DBUG_ASSERT(debug_sync_service); 1905 DBUG_ASSERT(!debug_sync_set_action(current_thd, 1906 STRING_WITH_LEN(act))); 1907 };); 1908 #endif 1909 master_res= NULL; 1910 master_row= NULL; 1911 if (!mysql_real_query(mysql, 1912 STRING_WITH_LEN("SHOW VARIABLES LIKE 'SERVER_ID'")) && 1913 (master_res= mysql_store_result(mysql)) && 1914 (master_row= mysql_fetch_row(master_res))) 1915 { 1916 if ((global_system_variables.server_id == 1917 (mi->master_id= strtoul(master_row[1], 0, 10))) && 1918 !mi->rli.replicate_same_server_id) 1919 { 1920 errmsg= "The slave I/O thread stops because master and slave have equal \ 1921 MySQL server ids; these ids must be different for replication to work (or \ 1922 the --replicate-same-server-id option must be used on slave but this does \ 1923 not always make sense; please check the manual before using it)."; 1924 err_code= ER_SLAVE_FATAL_ERROR; 1925 sprintf(err_buff, ER_DEFAULT(err_code), errmsg); 1926 goto err; 1927 } 1928 } 1929 else if (mysql_errno(mysql)) 1930 { 1931 if (check_io_slave_killed(mi, NULL)) 1932 goto slave_killed_err; 1933 else if (is_network_error(mysql_errno(mysql))) 1934 { 1935 mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL, 1936 "Get master SERVER_ID failed with error: %s", mysql_error(mysql)); 1937 goto network_err; 1938 } 1939 /* Fatal error */ 1940 errmsg= "The slave I/O thread stops because a fatal error is encountered \ 1941 when it try to get the value of SERVER_ID variable from master."; 1942 err_code= mysql_errno(mysql); 1943 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql)); 1944 goto err; 1945 } 1946 else if (!master_row && master_res) 1947 { 1948 mi->report(WARNING_LEVEL, ER_UNKNOWN_SYSTEM_VARIABLE, NULL, 1949 "Unknown system variable 'SERVER_ID' on master, \ 1950 maybe it is a *VERY OLD MASTER*."); 1951 } 1952 if (master_res) 1953 { 1954 mysql_free_result(master_res); 1955 master_res= NULL; 1956 } 1957 if (mi->master_id == 0 && mi->ignore_server_ids.elements > 0) 1958 { 1959 errmsg= "Slave configured with server id filtering could not detect the master server id."; 1960 err_code= ER_SLAVE_FATAL_ERROR; 1961 sprintf(err_buff, ER_DEFAULT(err_code), errmsg); 1962 goto err; 1963 } 1964 1965 /* 1966 Check that the master's global character_set_server and ours are the same. 1967 Not fatal if query fails (old master?). 1968 Note that we don't check for equality of global character_set_client and 1969 collation_connection (neither do we prevent their setting in 1970 set_var.cc). That's because from what I (Guilhem) have tested, the global 1971 values of these 2 are never used (new connections don't use them). 1972 We don't test equality of global collation_database either as it's is 1973 going to be deprecated (made read-only) in 4.1 very soon. 1974 The test is only relevant if master < 5.0.3 (we'll test only if it's older 1975 than the 5 branch; < 5.0.3 was alpha...), as >= 5.0.3 master stores 1976 charset info in each binlog event. 1977 We don't do it for 3.23 because masters <3.23.50 hang on 1978 SELECT @@unknown_var (BUG#7965 - see changelog of 3.23.50). So finally we 1979 test only if master is 4.x. 1980 */ 1981 1982 /* redundant with rest of code but safer against later additions */ 1983 if (version == 3) 1984 goto err; 1985 1986 if (version == 4) 1987 { 1988 master_res= NULL; 1989 if (!mysql_real_query(mysql, 1990 STRING_WITH_LEN("SELECT @@GLOBAL.COLLATION_SERVER")) && 1991 (master_res= mysql_store_result(mysql)) && 1992 (master_row= mysql_fetch_row(master_res))) 1993 { 1994 if (strcmp(master_row[0], global_system_variables.collation_server->name)) 1995 { 1996 errmsg= "The slave I/O thread stops because master and slave have \ 1997 different values for the COLLATION_SERVER global variable. The values must \ 1998 be equal for the Statement-format replication to work"; 1999 err_code= ER_SLAVE_FATAL_ERROR; 2000 sprintf(err_buff, ER_DEFAULT(err_code), errmsg); 2001 goto err; 2002 } 2003 } 2004 else if (check_io_slave_killed(mi, NULL)) 2005 goto slave_killed_err; 2006 else if (is_network_error(mysql_errno(mysql))) 2007 { 2008 mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL, 2009 "Get master COLLATION_SERVER failed with error: %s", mysql_error(mysql)); 2010 goto network_err; 2011 } 2012 else if (mysql_errno(mysql) != ER_UNKNOWN_SYSTEM_VARIABLE) 2013 { 2014 /* Fatal error */ 2015 errmsg= "The slave I/O thread stops because a fatal error is encountered \ 2016 when it try to get the value of COLLATION_SERVER global variable from master."; 2017 err_code= mysql_errno(mysql); 2018 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql)); 2019 goto err; 2020 } 2021 else 2022 mi->report(WARNING_LEVEL, ER_UNKNOWN_SYSTEM_VARIABLE, NULL, 2023 "Unknown system variable 'COLLATION_SERVER' on master, \ 2024 maybe it is a *VERY OLD MASTER*. *NOTE*: slave may experience \ 2025 inconsistency if replicated data deals with collation."); 2026 2027 if (master_res) 2028 { 2029 mysql_free_result(master_res); 2030 master_res= NULL; 2031 } 2032 } 2033 2034 /* 2035 Perform analogous check for time zone. Theoretically we also should 2036 perform check here to verify that SYSTEM time zones are the same on 2037 slave and master, but we can't rely on value of @@system_time_zone 2038 variable (it is time zone abbreviation) since it determined at start 2039 time and so could differ for slave and master even if they are really 2040 in the same system time zone. So we are omiting this check and just 2041 relying on documentation. Also according to Monty there are many users 2042 who are using replication between servers in various time zones. Hence 2043 such check will broke everything for them. (And now everything will 2044 work for them because by default both their master and slave will have 2045 'SYSTEM' time zone). 2046 This check is only necessary for 4.x masters (and < 5.0.4 masters but 2047 those were alpha). 2048 */ 2049 if (version == 4) 2050 { 2051 master_res= NULL; 2052 if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT @@GLOBAL.TIME_ZONE")) && 2053 (master_res= mysql_store_result(mysql)) && 2054 (master_row= mysql_fetch_row(master_res))) 2055 { 2056 if (strcmp(master_row[0], 2057 global_system_variables.time_zone->get_name()->ptr())) 2058 { 2059 errmsg= "The slave I/O thread stops because master and slave have \ 2060 different values for the TIME_ZONE global variable. The values must \ 2061 be equal for the Statement-format replication to work"; 2062 err_code= ER_SLAVE_FATAL_ERROR; 2063 sprintf(err_buff, ER_DEFAULT(err_code), errmsg); 2064 goto err; 2065 } 2066 } 2067 else if (check_io_slave_killed(mi, NULL)) 2068 goto slave_killed_err; 2069 else if (is_network_error(err_code= mysql_errno(mysql))) 2070 { 2071 mi->report(ERROR_LEVEL, err_code, NULL, 2072 "Get master TIME_ZONE failed with error: %s", 2073 mysql_error(mysql)); 2074 goto network_err; 2075 } 2076 else if (err_code == ER_UNKNOWN_SYSTEM_VARIABLE) 2077 { 2078 /* We use ERROR_LEVEL to get the error logged to file */ 2079 mi->report(ERROR_LEVEL, err_code, NULL, 2080 2081 "MySQL master doesn't have a TIME_ZONE variable. Note that" 2082 "if your timezone is not same between master and slave, your " 2083 "slave may get wrong data into timestamp columns"); 2084 } 2085 else 2086 { 2087 /* Fatal error */ 2088 errmsg= "The slave I/O thread stops because a fatal error is encountered \ 2089 when it try to get the value of TIME_ZONE global variable from master."; 2090 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql)); 2091 goto err; 2092 } 2093 if (master_res) 2094 { 2095 mysql_free_result(master_res); 2096 master_res= NULL; 2097 } 2098 } 2099 2100 if (mi->heartbeat_period != 0.0) 2101 { 2102 const char query_format[]= "SET @master_heartbeat_period= %llu"; 2103 char query[sizeof(query_format) + 32]; 2104 /* 2105 the period is an ulonglong of nano-secs. 2106 */ 2107 my_snprintf(query, sizeof(query), query_format, 2108 (ulonglong) (mi->heartbeat_period*1000000000UL)); 2109 2110 DBUG_EXECUTE_IF("simulate_slave_heartbeat_network_error", 2111 { static ulong dbug_count= 0; 2112 if (++dbug_count < 3) 2113 goto heartbeat_network_error; 2114 }); 2115 if (mysql_real_query(mysql, query, (ulong)strlen(query))) 2116 { 2117 if (check_io_slave_killed(mi, NULL)) 2118 goto slave_killed_err; 2119 2120 if (is_network_error(mysql_errno(mysql))) 2121 { 2122 IF_DBUG(heartbeat_network_error: , ) 2123 mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL, 2124 "SET @master_heartbeat_period to master failed with error: %s", 2125 mysql_error(mysql)); 2126 mysql_free_result(mysql_store_result(mysql)); 2127 goto network_err; 2128 } 2129 else 2130 { 2131 /* Fatal error */ 2132 errmsg= "The slave I/O thread stops because a fatal error is encountered " 2133 "when it tries to SET @master_heartbeat_period on master."; 2134 err_code= ER_SLAVE_FATAL_ERROR; 2135 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql)); 2136 mysql_free_result(mysql_store_result(mysql)); 2137 goto err; 2138 } 2139 } 2140 mysql_free_result(mysql_store_result(mysql)); 2141 } 2142 2143 /* 2144 Querying if master is capable to checksum and notifying it about own 2145 CRC-awareness. The master's side instant value of @@global.binlog_checksum 2146 is stored in the dump thread's uservar area as well as cached locally 2147 to become known in consensus by master and slave. 2148 */ 2149 DBUG_EXECUTE_IF("simulate_slave_unaware_checksum", 2150 mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_OFF; 2151 goto past_checksum;); 2152 { 2153 int rc; 2154 const char query[]= "SET @master_binlog_checksum= @@global.binlog_checksum"; 2155 master_res= NULL; 2156 mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF; //initially undefined 2157 /* 2158 @c checksum_alg_before_fd is queried from master in this block. 2159 If master is old checksum-unaware the value stays undefined. 2160 Once the first FD will be received its alg descriptor will replace 2161 the being queried one. 2162 */ 2163 rc= mysql_real_query(mysql, query,(ulong)strlen(query)); 2164 if (rc != 0) 2165 { 2166 if (check_io_slave_killed(mi, NULL)) 2167 goto slave_killed_err; 2168 2169 if (mysql_errno(mysql) == ER_UNKNOWN_SYSTEM_VARIABLE) 2170 { 2171 /* Ignore this expected error if not a high error level */ 2172 if (global_system_variables.log_warnings > 1) 2173 { 2174 // this is tolerable as OM -> NS is supported 2175 mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL, 2176 "Notifying master by %s failed with " 2177 "error: %s", query, mysql_error(mysql)); 2178 } 2179 } 2180 else 2181 { 2182 if (is_network_error(mysql_errno(mysql))) 2183 { 2184 mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL, 2185 "Notifying master by %s failed with " 2186 "error: %s", query, mysql_error(mysql)); 2187 mysql_free_result(mysql_store_result(mysql)); 2188 goto network_err; 2189 } 2190 else 2191 { 2192 errmsg= "The slave I/O thread stops because a fatal error is encountered " 2193 "when it tried to SET @master_binlog_checksum on master."; 2194 err_code= ER_SLAVE_FATAL_ERROR; 2195 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql)); 2196 mysql_free_result(mysql_store_result(mysql)); 2197 goto err; 2198 } 2199 } 2200 } 2201 else 2202 { 2203 mysql_free_result(mysql_store_result(mysql)); 2204 if (!mysql_real_query(mysql, 2205 STRING_WITH_LEN("SELECT @master_binlog_checksum")) && 2206 (master_res= mysql_store_result(mysql)) && 2207 (master_row= mysql_fetch_row(master_res)) && 2208 (master_row[0] != NULL)) 2209 { 2210 mi->checksum_alg_before_fd= (enum_binlog_checksum_alg) 2211 (find_type(master_row[0], &binlog_checksum_typelib, 1) - 1); 2212 // valid outcome is either of 2213 DBUG_ASSERT(mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_OFF || 2214 mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_CRC32); 2215 } 2216 else if (check_io_slave_killed(mi, NULL)) 2217 goto slave_killed_err; 2218 else if (is_network_error(mysql_errno(mysql))) 2219 { 2220 mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL, 2221 "Get master BINLOG_CHECKSUM failed with error: %s", mysql_error(mysql)); 2222 goto network_err; 2223 } 2224 else 2225 { 2226 errmsg= "The slave I/O thread stops because a fatal error is encountered " 2227 "when it tried to SELECT @master_binlog_checksum."; 2228 err_code= ER_SLAVE_FATAL_ERROR; 2229 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql)); 2230 mysql_free_result(mysql_store_result(mysql)); 2231 goto err; 2232 } 2233 } 2234 if (master_res) 2235 { 2236 mysql_free_result(master_res); 2237 master_res= NULL; 2238 } 2239 } 2240 2241 #ifndef DBUG_OFF 2242 past_checksum: 2243 #endif 2244 2245 /* 2246 Request the master to filter away events with the @@skip_replication flag 2247 set, if we are running with 2248 --replicate-events-marked-for-skip=FILTER_ON_MASTER. 2249 */ 2250 if (opt_replicate_events_marked_for_skip == RPL_SKIP_FILTER_ON_MASTER) 2251 { 2252 if (unlikely(mysql_real_query(mysql, 2253 STRING_WITH_LEN("SET skip_replication=1")))) 2254 { 2255 err_code= mysql_errno(mysql); 2256 if (is_network_error(err_code)) 2257 { 2258 mi->report(ERROR_LEVEL, err_code, NULL, 2259 "Setting master-side filtering of @@skip_replication failed " 2260 "with error: %s", mysql_error(mysql)); 2261 goto network_err; 2262 } 2263 else if (err_code == ER_UNKNOWN_SYSTEM_VARIABLE) 2264 { 2265 /* 2266 The master is older than the slave and does not support the 2267 @@skip_replication feature. 2268 This is not a problem, as such master will not generate events with 2269 the @@skip_replication flag set in the first place. We will still 2270 do slave-side filtering of such events though, to handle the (rare) 2271 case of downgrading a master and receiving old events generated from 2272 before the downgrade with the @@skip_replication flag set. 2273 */ 2274 DBUG_PRINT("info", ("Old master does not support master-side filtering " 2275 "of @@skip_replication events.")); 2276 } 2277 else 2278 { 2279 /* Fatal error */ 2280 errmsg= "The slave I/O thread stops because a fatal error is " 2281 "encountered when it tries to request filtering of events marked " 2282 "with the @@skip_replication flag."; 2283 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql)); 2284 goto err; 2285 } 2286 } 2287 } 2288 2289 /* Announce MariaDB slave capabilities. */ 2290 DBUG_EXECUTE_IF("simulate_slave_capability_none", goto after_set_capability;); 2291 { 2292 int rc= DBUG_EVALUATE_IF("simulate_slave_capability_old_53", 2293 mysql_real_query(mysql, STRING_WITH_LEN("SET @mariadb_slave_capability=" 2294 STRINGIFY_ARG(MARIA_SLAVE_CAPABILITY_ANNOTATE))), 2295 mysql_real_query(mysql, STRING_WITH_LEN("SET @mariadb_slave_capability=" 2296 STRINGIFY_ARG(MARIA_SLAVE_CAPABILITY_MINE)))); 2297 if (unlikely(rc)) 2298 { 2299 err_code= mysql_errno(mysql); 2300 if (is_network_error(err_code)) 2301 { 2302 mi->report(ERROR_LEVEL, err_code, NULL, 2303 "Setting @mariadb_slave_capability failed with error: %s", 2304 mysql_error(mysql)); 2305 goto network_err; 2306 } 2307 else 2308 { 2309 /* Fatal error */ 2310 errmsg= "The slave I/O thread stops because a fatal error is " 2311 "encountered when it tries to set @mariadb_slave_capability."; 2312 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql)); 2313 goto err; 2314 } 2315 } 2316 } 2317 #ifndef DBUG_OFF 2318 after_set_capability: 2319 #endif 2320 2321 if (mi->using_gtid != Master_info::USE_GTID_NO) 2322 { 2323 /* Request dump to start from slave replication GTID state. */ 2324 int rc; 2325 char str_buf[256]; 2326 String query_str(str_buf, sizeof(str_buf), system_charset_info); 2327 query_str.length(0); 2328 2329 /* 2330 Read the master @@GLOBAL.gtid_domain_id variable. 2331 This is mostly to check that master is GTID aware, but we could later 2332 perhaps use it to check that different multi-source masters are correctly 2333 configured with distinct domain_id. 2334 */ 2335 if (mysql_real_query(mysql, 2336 STRING_WITH_LEN("SELECT @@GLOBAL.gtid_domain_id")) || 2337 !(master_res= mysql_store_result(mysql)) || 2338 !(master_row= mysql_fetch_row(master_res))) 2339 { 2340 err_code= mysql_errno(mysql); 2341 if (is_network_error(err_code)) 2342 { 2343 mi->report(ERROR_LEVEL, err_code, NULL, 2344 "Get master @@GLOBAL.gtid_domain_id failed with error: %s", 2345 mysql_error(mysql)); 2346 goto network_err; 2347 } 2348 else 2349 { 2350 errmsg= "The slave I/O thread stops because master does not support " 2351 "MariaDB global transaction id. A fatal error is encountered when " 2352 "it tries to SELECT @@GLOBAL.gtid_domain_id."; 2353 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql)); 2354 goto err; 2355 } 2356 } 2357 mysql_free_result(master_res); 2358 master_res= NULL; 2359 2360 query_str.append(STRING_WITH_LEN("SET @slave_connect_state='"), 2361 system_charset_info); 2362 if (mi->gtid_current_pos.append_to_string(&query_str)) 2363 { 2364 err_code= ER_OUTOFMEMORY; 2365 errmsg= "The slave I/O thread stops because a fatal out-of-memory " 2366 "error is encountered when it tries to compute @slave_connect_state."; 2367 sprintf(err_buff, "%s Error: Out of memory", errmsg); 2368 goto err; 2369 } 2370 query_str.append(STRING_WITH_LEN("'"), system_charset_info); 2371 2372 rc= mysql_real_query(mysql, query_str.ptr(), query_str.length()); 2373 if (unlikely(rc)) 2374 { 2375 err_code= mysql_errno(mysql); 2376 if (is_network_error(err_code)) 2377 { 2378 mi->report(ERROR_LEVEL, err_code, NULL, 2379 "Setting @slave_connect_state failed with error: %s", 2380 mysql_error(mysql)); 2381 goto network_err; 2382 } 2383 else 2384 { 2385 /* Fatal error */ 2386 errmsg= "The slave I/O thread stops because a fatal error is " 2387 "encountered when it tries to set @slave_connect_state."; 2388 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql)); 2389 goto err; 2390 } 2391 } 2392 2393 query_str.length(0); 2394 if (query_str.append(STRING_WITH_LEN("SET @slave_gtid_strict_mode="), 2395 system_charset_info) || 2396 query_str.append_ulonglong(opt_gtid_strict_mode != false)) 2397 { 2398 err_code= ER_OUTOFMEMORY; 2399 errmsg= "The slave I/O thread stops because a fatal out-of-memory " 2400 "error is encountered when it tries to set @slave_gtid_strict_mode."; 2401 sprintf(err_buff, "%s Error: Out of memory", errmsg); 2402 goto err; 2403 } 2404 2405 rc= mysql_real_query(mysql, query_str.ptr(), query_str.length()); 2406 if (unlikely(rc)) 2407 { 2408 err_code= mysql_errno(mysql); 2409 if (is_network_error(err_code)) 2410 { 2411 mi->report(ERROR_LEVEL, err_code, NULL, 2412 "Setting @slave_gtid_strict_mode failed with error: %s", 2413 mysql_error(mysql)); 2414 goto network_err; 2415 } 2416 else 2417 { 2418 /* Fatal error */ 2419 errmsg= "The slave I/O thread stops because a fatal error is " 2420 "encountered when it tries to set @slave_gtid_strict_mode."; 2421 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql)); 2422 goto err; 2423 } 2424 } 2425 2426 query_str.length(0); 2427 if (query_str.append(STRING_WITH_LEN("SET @slave_gtid_ignore_duplicates="), 2428 system_charset_info) || 2429 query_str.append_ulonglong(opt_gtid_ignore_duplicates != false)) 2430 { 2431 err_code= ER_OUTOFMEMORY; 2432 errmsg= "The slave I/O thread stops because a fatal out-of-memory error " 2433 "is encountered when it tries to set @slave_gtid_ignore_duplicates."; 2434 sprintf(err_buff, "%s Error: Out of memory", errmsg); 2435 goto err; 2436 } 2437 2438 rc= mysql_real_query(mysql, query_str.ptr(), query_str.length()); 2439 if (unlikely(rc)) 2440 { 2441 err_code= mysql_errno(mysql); 2442 if (is_network_error(err_code)) 2443 { 2444 mi->report(ERROR_LEVEL, err_code, NULL, 2445 "Setting @slave_gtid_ignore_duplicates failed with " 2446 "error: %s", mysql_error(mysql)); 2447 goto network_err; 2448 } 2449 else 2450 { 2451 /* Fatal error */ 2452 errmsg= "The slave I/O thread stops because a fatal error is " 2453 "encountered when it tries to set @slave_gtid_ignore_duplicates."; 2454 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql)); 2455 goto err; 2456 } 2457 } 2458 2459 if (mi->rli.until_condition == Relay_log_info::UNTIL_GTID) 2460 { 2461 query_str.length(0); 2462 query_str.append(STRING_WITH_LEN("SET @slave_until_gtid='"), 2463 system_charset_info); 2464 if (mi->rli.until_gtid_pos.append_to_string(&query_str)) 2465 { 2466 err_code= ER_OUTOFMEMORY; 2467 errmsg= "The slave I/O thread stops because a fatal out-of-memory " 2468 "error is encountered when it tries to compute @slave_until_gtid."; 2469 sprintf(err_buff, "%s Error: Out of memory", errmsg); 2470 goto err; 2471 } 2472 query_str.append(STRING_WITH_LEN("'"), system_charset_info); 2473 2474 rc= mysql_real_query(mysql, query_str.ptr(), query_str.length()); 2475 if (unlikely(rc)) 2476 { 2477 err_code= mysql_errno(mysql); 2478 if (is_network_error(err_code)) 2479 { 2480 mi->report(ERROR_LEVEL, err_code, NULL, 2481 "Setting @slave_until_gtid failed with error: %s", 2482 mysql_error(mysql)); 2483 goto network_err; 2484 } 2485 else 2486 { 2487 /* Fatal error */ 2488 errmsg= "The slave I/O thread stops because a fatal error is " 2489 "encountered when it tries to set @slave_until_gtid."; 2490 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql)); 2491 goto err; 2492 } 2493 } 2494 } 2495 } 2496 else 2497 { 2498 /* 2499 If we are not using GTID to connect this time, then instead request 2500 the corresponding GTID position from the master, so that the user 2501 can reconnect the next time using MASTER_GTID_POS=AUTO. 2502 */ 2503 char quote_buf[2*sizeof(mi->master_log_name)+1]; 2504 char str_buf[28+2*sizeof(mi->master_log_name)+10]; 2505 String query(str_buf, sizeof(str_buf), system_charset_info); 2506 query.length(0); 2507 2508 query.append("SELECT binlog_gtid_pos('"); 2509 escape_quotes_for_mysql(&my_charset_bin, quote_buf, sizeof(quote_buf), 2510 mi->master_log_name, strlen(mi->master_log_name)); 2511 query.append(quote_buf); 2512 query.append("',"); 2513 query.append_ulonglong(mi->master_log_pos); 2514 query.append(")"); 2515 2516 if (!mysql_real_query(mysql, query.c_ptr_safe(), query.length()) && 2517 (master_res= mysql_store_result(mysql)) && 2518 (master_row= mysql_fetch_row(master_res)) && 2519 (master_row[0] != NULL)) 2520 { 2521 rpl_global_gtid_slave_state->load(mi->io_thd, master_row[0], 2522 strlen(master_row[0]), false, false); 2523 } 2524 else if (check_io_slave_killed(mi, NULL)) 2525 goto slave_killed_err; 2526 else if (is_network_error(mysql_errno(mysql))) 2527 { 2528 mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL, 2529 "Get master GTID position failed with error: %s", mysql_error(mysql)); 2530 goto network_err; 2531 } 2532 else 2533 { 2534 /* 2535 ToDo: If the master does not have the binlog_gtid_pos() function, it 2536 just means that it is an old master with no GTID support, so we should 2537 do nothing. 2538 2539 However, if binlog_gtid_pos() exists, but fails or returns NULL, then 2540 it means that the requested position is not valid. We could use this 2541 to catch attempts to replicate from within the middle of an event, 2542 avoiding strange failures or possible corruption. 2543 */ 2544 } 2545 if (master_res) 2546 { 2547 mysql_free_result(master_res); 2548 master_res= NULL; 2549 } 2550 } 2551 2552 err: 2553 if (errmsg) 2554 { 2555 if (master_res) 2556 mysql_free_result(master_res); 2557 DBUG_ASSERT(err_code != 0); 2558 mi->report(ERROR_LEVEL, err_code, NULL, "%s", err_buff); 2559 DBUG_RETURN(1); 2560 } 2561 2562 DBUG_RETURN(0); 2563 2564 network_err: 2565 if (master_res) 2566 mysql_free_result(master_res); 2567 DBUG_RETURN(2); 2568 2569 slave_killed_err: 2570 if (master_res) 2571 mysql_free_result(master_res); 2572 DBUG_RETURN(2); 2573 } 2574 2575 2576 static bool wait_for_relay_log_space(Relay_log_info* rli) 2577 { 2578 bool slave_killed=0; 2579 bool ignore_log_space_limit; 2580 Master_info* mi = rli->mi; 2581 PSI_stage_info old_stage; 2582 THD* thd = mi->io_thd; 2583 DBUG_ENTER("wait_for_relay_log_space"); 2584 2585 mysql_mutex_lock(&rli->log_space_lock); 2586 thd->ENTER_COND(&rli->log_space_cond, 2587 &rli->log_space_lock, 2588 &stage_waiting_for_relay_log_space, 2589 &old_stage); 2590 while (rli->log_space_limit < rli->log_space_total && 2591 !(slave_killed=io_slave_killed(mi)) && 2592 !rli->ignore_log_space_limit) 2593 mysql_cond_wait(&rli->log_space_cond, &rli->log_space_lock); 2594 2595 ignore_log_space_limit= rli->ignore_log_space_limit; 2596 rli->ignore_log_space_limit= 0; 2597 2598 thd->EXIT_COND(&old_stage); 2599 2600 /* 2601 Makes the IO thread read only one event at a time 2602 until the SQL thread is able to purge the relay 2603 logs, freeing some space. 2604 2605 Therefore, once the SQL thread processes this next 2606 event, it goes to sleep (no more events in the queue), 2607 sets ignore_log_space_limit=true and wakes the IO thread. 2608 However, this event may have been enough already for 2609 the SQL thread to purge some log files, freeing 2610 rli->log_space_total . 2611 2612 This guarantees that the SQL and IO thread move 2613 forward only one event at a time (to avoid deadlocks), 2614 when the relay space limit is reached. It also 2615 guarantees that when the SQL thread is prepared to 2616 rotate (to be able to purge some logs), the IO thread 2617 will know about it and will rotate. 2618 2619 NOTE: The ignore_log_space_limit is only set when the SQL 2620 thread sleeps waiting for events. 2621 2622 */ 2623 2624 if (ignore_log_space_limit) 2625 { 2626 #ifndef DBUG_OFF 2627 { 2628 DBUG_PRINT("info", ("log_space_limit=%llu log_space_total=%llu " 2629 "ignore_log_space_limit=%d " 2630 "sql_force_rotate_relay=%d", 2631 rli->log_space_limit, uint64(rli->log_space_total), 2632 (int) rli->ignore_log_space_limit, 2633 (int) rli->sql_force_rotate_relay)); 2634 } 2635 #endif 2636 if (rli->sql_force_rotate_relay) 2637 { 2638 mysql_mutex_lock(&mi->data_lock); 2639 rotate_relay_log(rli->mi); 2640 mysql_mutex_unlock(&mi->data_lock); 2641 rli->sql_force_rotate_relay= false; 2642 } 2643 } 2644 2645 DBUG_RETURN(slave_killed); 2646 } 2647 2648 2649 /* 2650 Builds a Rotate from the ignored events' info and writes it to relay log. 2651 2652 SYNOPSIS 2653 write_ignored_events_info_to_relay_log() 2654 thd pointer to I/O thread's thd 2655 mi 2656 2657 DESCRIPTION 2658 Slave I/O thread, going to die, must leave a durable trace of the 2659 ignored events' end position for the use of the slave SQL thread, by 2660 calling this function. Only that thread can call it (see assertion). 2661 */ 2662 static void write_ignored_events_info_to_relay_log(THD *thd, Master_info *mi) 2663 { 2664 Relay_log_info *rli= &mi->rli; 2665 mysql_mutex_t *log_lock= rli->relay_log.get_log_lock(); 2666 DBUG_ENTER("write_ignored_events_info_to_relay_log"); 2667 2668 DBUG_ASSERT(thd == mi->io_thd); 2669 mysql_mutex_lock(log_lock); 2670 if (rli->ign_master_log_name_end[0] || rli->ign_gtids.count()) 2671 { 2672 Rotate_log_event *rev= NULL; 2673 Gtid_list_log_event *glev= NULL; 2674 if (rli->ign_master_log_name_end[0]) 2675 { 2676 rev= new Rotate_log_event(rli->ign_master_log_name_end, 2677 0, rli->ign_master_log_pos_end, 2678 Rotate_log_event::DUP_NAME); 2679 rli->ign_master_log_name_end[0]= 0; 2680 if (unlikely(!(bool)rev)) 2681 mi->report(ERROR_LEVEL, ER_SLAVE_CREATE_EVENT_FAILURE, NULL, 2682 ER_THD(thd, ER_SLAVE_CREATE_EVENT_FAILURE), 2683 "Rotate_event (out of memory?)," 2684 " SHOW SLAVE STATUS may be inaccurate"); 2685 } 2686 if (rli->ign_gtids.count()) 2687 { 2688 DBUG_ASSERT(!rli->is_in_group()); // Ensure no active transaction 2689 glev= new Gtid_list_log_event(&rli->ign_gtids, 2690 Gtid_list_log_event::FLAG_IGN_GTIDS); 2691 rli->ign_gtids.reset(); 2692 if (unlikely(!(bool)glev)) 2693 mi->report(ERROR_LEVEL, ER_SLAVE_CREATE_EVENT_FAILURE, NULL, 2694 ER_THD(thd, ER_SLAVE_CREATE_EVENT_FAILURE), 2695 "Gtid_list_event (out of memory?)," 2696 " gtid_slave_pos may be inaccurate"); 2697 } 2698 2699 /* Can unlock before writing as slave SQL thd will soon see our event. */ 2700 mysql_mutex_unlock(log_lock); 2701 if (rev) 2702 { 2703 DBUG_PRINT("info",("writing a Rotate event to track down ignored events")); 2704 rev->server_id= 0; // don't be ignored by slave SQL thread 2705 if (unlikely(rli->relay_log.append(rev))) 2706 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL, 2707 ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE), 2708 "failed to write a Rotate event" 2709 " to the relay log, SHOW SLAVE STATUS may be" 2710 " inaccurate"); 2711 delete rev; 2712 } 2713 if (glev) 2714 { 2715 DBUG_PRINT("info",("writing a Gtid_list event to track down ignored events")); 2716 glev->server_id= 0; // don't be ignored by slave SQL thread 2717 glev->set_artificial_event(); // Don't mess up Exec_Master_Log_Pos 2718 if (unlikely(rli->relay_log.append(glev))) 2719 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL, 2720 ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE), 2721 "failed to write a Gtid_list event to the relay log, " 2722 "gtid_slave_pos may be inaccurate"); 2723 delete glev; 2724 } 2725 if (likely (rev || glev)) 2726 { 2727 rli->relay_log.harvest_bytes_written(&rli->log_space_total); 2728 if (flush_master_info(mi, TRUE, TRUE)) 2729 sql_print_error("Failed to flush master info file"); 2730 } 2731 } 2732 else 2733 mysql_mutex_unlock(log_lock); 2734 DBUG_VOID_RETURN; 2735 } 2736 2737 2738 int register_slave_on_master(MYSQL* mysql, Master_info *mi, 2739 bool *suppress_warnings) 2740 { 2741 uchar buf[1024], *pos= buf; 2742 size_t report_host_len=0, report_user_len=0, report_password_len=0; 2743 DBUG_ENTER("register_slave_on_master"); 2744 2745 *suppress_warnings= FALSE; 2746 if (report_host) 2747 report_host_len= strlen(report_host); 2748 if (report_host_len > HOSTNAME_LENGTH) 2749 { 2750 sql_print_warning("The length of report_host is %zu. " 2751 "It is larger than the max length(%d), so this " 2752 "slave cannot be registered to the master.", 2753 report_host_len, HOSTNAME_LENGTH); 2754 DBUG_RETURN(0); 2755 } 2756 2757 if (report_user) 2758 report_user_len= strlen(report_user); 2759 if (report_user_len > USERNAME_LENGTH) 2760 { 2761 sql_print_warning("The length of report_user is %zu. " 2762 "It is larger than the max length(%d), so this " 2763 "slave cannot be registered to the master.", 2764 report_user_len, USERNAME_LENGTH); 2765 DBUG_RETURN(0); 2766 } 2767 2768 if (report_password) 2769 report_password_len= strlen(report_password); 2770 if (report_password_len > MAX_PASSWORD_LENGTH) 2771 { 2772 sql_print_warning("The length of report_password is %zu. " 2773 "It is larger than the max length(%d), so this " 2774 "slave cannot be registered to the master.", 2775 report_password_len, MAX_PASSWORD_LENGTH); 2776 DBUG_RETURN(0); 2777 } 2778 2779 int4store(pos, global_system_variables.server_id); pos+= 4; 2780 pos= net_store_data(pos, (uchar*) report_host, report_host_len); 2781 pos= net_store_data(pos, (uchar*) report_user, report_user_len); 2782 pos= net_store_data(pos, (uchar*) report_password, report_password_len); 2783 int2store(pos, (uint16) report_port); pos+= 2; 2784 /* 2785 Fake rpl_recovery_rank, which was removed in BUG#13963, 2786 so that this server can register itself on old servers, 2787 see BUG#49259. 2788 */ 2789 int4store(pos, /* rpl_recovery_rank */ 0); pos+= 4; 2790 /* The master will fill in master_id */ 2791 int4store(pos, 0); pos+= 4; 2792 2793 if (simple_command(mysql, COM_REGISTER_SLAVE, buf, (ulong) (pos- buf), 0)) 2794 { 2795 if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED) 2796 { 2797 *suppress_warnings= TRUE; // Suppress reconnect warning 2798 } 2799 else if (!check_io_slave_killed(mi, NULL)) 2800 { 2801 char buf[256]; 2802 my_snprintf(buf, sizeof(buf), "%s (Errno: %d)", mysql_error(mysql), 2803 mysql_errno(mysql)); 2804 mi->report(ERROR_LEVEL, ER_SLAVE_MASTER_COM_FAILURE, NULL, 2805 ER(ER_SLAVE_MASTER_COM_FAILURE), "COM_REGISTER_SLAVE", buf); 2806 } 2807 DBUG_RETURN(1); 2808 } 2809 DBUG_RETURN(0); 2810 } 2811 2812 2813 /** 2814 Execute a SHOW SLAVE STATUS statement. 2815 2816 @param thd Pointer to THD object for the client thread executing the 2817 statement. 2818 2819 @param mi Pointer to Master_info object for the IO thread. 2820 2821 @retval FALSE success 2822 @retval TRUE failure 2823 */ 2824 2825 bool show_master_info(THD *thd, Master_info *mi, bool full) 2826 { 2827 DBUG_ENTER("show_master_info"); 2828 String gtid_pos; 2829 List<Item> field_list; 2830 2831 if (full && rpl_global_gtid_slave_state->tostring(>id_pos, NULL, 0)) 2832 DBUG_RETURN(TRUE); 2833 show_master_info_get_fields(thd, &field_list, full, gtid_pos.length()); 2834 if (thd->protocol->send_result_set_metadata(&field_list, 2835 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) 2836 DBUG_RETURN(TRUE); 2837 if (send_show_master_info_data(thd, mi, full, >id_pos)) 2838 DBUG_RETURN(TRUE); 2839 my_eof(thd); 2840 DBUG_RETURN(FALSE); 2841 } 2842 2843 void show_master_info_get_fields(THD *thd, List<Item> *field_list, 2844 bool full, size_t gtid_pos_length) 2845 { 2846 Master_info *mi; 2847 MEM_ROOT *mem_root= thd->mem_root; 2848 DBUG_ENTER("show_master_info_get_fields"); 2849 2850 if (full) 2851 { 2852 field_list->push_back(new (mem_root) 2853 Item_empty_string(thd, "Connection_name", 2854 MAX_CONNECTION_NAME), 2855 mem_root); 2856 field_list->push_back(new (mem_root) 2857 Item_empty_string(thd, "Slave_SQL_State", 30), 2858 mem_root); 2859 } 2860 2861 field_list->push_back(new (mem_root) 2862 Item_empty_string(thd, "Slave_IO_State", 30), 2863 mem_root); 2864 field_list->push_back(new (mem_root) 2865 Item_empty_string(thd, "Master_Host", sizeof(mi->host)), 2866 mem_root); 2867 field_list->push_back(new (mem_root) 2868 Item_empty_string(thd, "Master_User", sizeof(mi->user)), 2869 mem_root); 2870 field_list->push_back(new (mem_root) 2871 Item_return_int(thd, "Master_Port", 7, MYSQL_TYPE_LONG), 2872 mem_root); 2873 field_list->push_back(new (mem_root) 2874 Item_return_int(thd, "Connect_Retry", 10, 2875 MYSQL_TYPE_LONG), 2876 mem_root); 2877 field_list->push_back(new (mem_root) 2878 Item_empty_string(thd, "Master_Log_File", FN_REFLEN), 2879 mem_root); 2880 field_list->push_back(new (mem_root) 2881 Item_return_int(thd, "Read_Master_Log_Pos", 10, 2882 MYSQL_TYPE_LONGLONG), 2883 mem_root); 2884 field_list->push_back(new (mem_root) 2885 Item_empty_string(thd, "Relay_Log_File", FN_REFLEN), 2886 mem_root); 2887 field_list->push_back(new (mem_root) 2888 Item_return_int(thd, "Relay_Log_Pos", 10, 2889 MYSQL_TYPE_LONGLONG), 2890 mem_root); 2891 field_list->push_back(new (mem_root) 2892 Item_empty_string(thd, "Relay_Master_Log_File", 2893 FN_REFLEN), 2894 mem_root); 2895 field_list->push_back(new (mem_root) 2896 Item_empty_string(thd, "Slave_IO_Running", 3), 2897 mem_root); 2898 field_list->push_back(new (mem_root) 2899 Item_empty_string(thd, "Slave_SQL_Running", 3), 2900 mem_root); 2901 field_list->push_back(new (mem_root) 2902 Item_empty_string(thd, "Replicate_Do_DB", 20), 2903 mem_root); 2904 field_list->push_back(new (mem_root) 2905 Item_empty_string(thd, "Replicate_Ignore_DB", 20), 2906 mem_root); 2907 field_list->push_back(new (mem_root) 2908 Item_empty_string(thd, "Replicate_Do_Table", 20), 2909 mem_root); 2910 field_list->push_back(new (mem_root) 2911 Item_empty_string(thd, "Replicate_Ignore_Table", 23), 2912 mem_root); 2913 field_list->push_back(new (mem_root) 2914 Item_empty_string(thd, "Replicate_Wild_Do_Table", 24), 2915 mem_root); 2916 field_list->push_back(new (mem_root) 2917 Item_empty_string(thd, "Replicate_Wild_Ignore_Table", 2918 28), 2919 mem_root); 2920 field_list->push_back(new (mem_root) 2921 Item_return_int(thd, "Last_Errno", 4, MYSQL_TYPE_LONG), 2922 mem_root); 2923 field_list->push_back(new (mem_root) 2924 Item_empty_string(thd, "Last_Error", 20), 2925 mem_root); 2926 field_list->push_back(new (mem_root) 2927 Item_return_int(thd, "Skip_Counter", 10, 2928 MYSQL_TYPE_LONG), 2929 mem_root); 2930 field_list->push_back(new (mem_root) 2931 Item_return_int(thd, "Exec_Master_Log_Pos", 10, 2932 MYSQL_TYPE_LONGLONG), 2933 mem_root); 2934 field_list->push_back(new (mem_root) 2935 Item_return_int(thd, "Relay_Log_Space", 10, 2936 MYSQL_TYPE_LONGLONG), 2937 mem_root); 2938 field_list->push_back(new (mem_root) 2939 Item_empty_string(thd, "Until_Condition", 6), 2940 mem_root); 2941 field_list->push_back(new (mem_root) 2942 Item_empty_string(thd, "Until_Log_File", FN_REFLEN), 2943 mem_root); 2944 field_list->push_back(new (mem_root) 2945 Item_return_int(thd, "Until_Log_Pos", 10, 2946 MYSQL_TYPE_LONGLONG), 2947 mem_root); 2948 field_list->push_back(new (mem_root) 2949 Item_empty_string(thd, "Master_SSL_Allowed", 7), 2950 mem_root); 2951 field_list->push_back(new (mem_root) 2952 Item_empty_string(thd, "Master_SSL_CA_File", 2953 sizeof(mi->ssl_ca)), 2954 mem_root); 2955 field_list->push_back(new (mem_root) 2956 Item_empty_string(thd, "Master_SSL_CA_Path", 2957 sizeof(mi->ssl_capath)), 2958 mem_root); 2959 field_list->push_back(new (mem_root) 2960 Item_empty_string(thd, "Master_SSL_Cert", 2961 sizeof(mi->ssl_cert)), 2962 mem_root); 2963 field_list->push_back(new (mem_root) 2964 Item_empty_string(thd, "Master_SSL_Cipher", 2965 sizeof(mi->ssl_cipher)), 2966 mem_root); 2967 field_list->push_back(new (mem_root) 2968 Item_empty_string(thd, "Master_SSL_Key", 2969 sizeof(mi->ssl_key)), 2970 mem_root); 2971 field_list->push_back(new (mem_root) 2972 Item_return_int(thd, "Seconds_Behind_Master", 10, 2973 MYSQL_TYPE_LONGLONG), 2974 mem_root); 2975 field_list->push_back(new (mem_root) 2976 Item_empty_string(thd, "Master_SSL_Verify_Server_Cert", 2977 3), 2978 mem_root); 2979 field_list->push_back(new (mem_root) 2980 Item_return_int(thd, "Last_IO_Errno", 4, 2981 MYSQL_TYPE_LONG), 2982 mem_root); 2983 field_list->push_back(new (mem_root) 2984 Item_empty_string(thd, "Last_IO_Error", 20), 2985 mem_root); 2986 field_list->push_back(new (mem_root) 2987 Item_return_int(thd, "Last_SQL_Errno", 4, 2988 MYSQL_TYPE_LONG), 2989 mem_root); 2990 field_list->push_back(new (mem_root) 2991 Item_empty_string(thd, "Last_SQL_Error", 20), 2992 mem_root); 2993 field_list->push_back(new (mem_root) 2994 Item_empty_string(thd, "Replicate_Ignore_Server_Ids", 2995 FN_REFLEN), 2996 mem_root); 2997 field_list->push_back(new (mem_root) 2998 Item_return_int(thd, "Master_Server_Id", sizeof(ulong), 2999 MYSQL_TYPE_LONG), 3000 mem_root); 3001 field_list->push_back(new (mem_root) 3002 Item_empty_string(thd, "Master_SSL_Crl", 3003 sizeof(mi->ssl_crl)), 3004 mem_root); 3005 field_list->push_back(new (mem_root) 3006 Item_empty_string(thd, "Master_SSL_Crlpath", 3007 sizeof(mi->ssl_crlpath)), 3008 mem_root); 3009 field_list->push_back(new (mem_root) 3010 Item_empty_string(thd, "Using_Gtid", 3011 sizeof("Current_Pos")-1), 3012 mem_root); 3013 field_list->push_back(new (mem_root) 3014 Item_empty_string(thd, "Gtid_IO_Pos", 30), 3015 mem_root); 3016 field_list->push_back(new (mem_root) 3017 Item_empty_string(thd, "Replicate_Do_Domain_Ids", 3018 FN_REFLEN), 3019 mem_root); 3020 field_list->push_back(new (mem_root) 3021 Item_empty_string(thd, "Replicate_Ignore_Domain_Ids", 3022 FN_REFLEN), 3023 mem_root); 3024 field_list->push_back(new (mem_root) 3025 Item_empty_string(thd, "Parallel_Mode", 3026 sizeof("conservative")-1), 3027 mem_root); 3028 field_list->push_back(new (mem_root) 3029 Item_return_int(thd, "SQL_Delay", 10, 3030 MYSQL_TYPE_LONG)); 3031 field_list->push_back(new (mem_root) 3032 Item_return_int(thd, "SQL_Remaining_Delay", 8, 3033 MYSQL_TYPE_LONG)); 3034 field_list->push_back(new (mem_root) 3035 Item_empty_string(thd, "Slave_SQL_Running_State", 3036 20)); 3037 field_list->push_back(new (mem_root) 3038 Item_return_int(thd, "Slave_DDL_Groups", 20, 3039 MYSQL_TYPE_LONGLONG), 3040 mem_root); 3041 field_list->push_back(new (mem_root) 3042 Item_return_int(thd, "Slave_Non_Transactional_Groups", 20, 3043 MYSQL_TYPE_LONGLONG), 3044 mem_root); 3045 field_list->push_back(new (mem_root) 3046 Item_return_int(thd, "Slave_Transactional_Groups", 20, 3047 MYSQL_TYPE_LONGLONG), 3048 mem_root); 3049 3050 if (full) 3051 { 3052 field_list->push_back(new (mem_root) 3053 Item_return_int(thd, "Retried_transactions", 10, 3054 MYSQL_TYPE_LONG), 3055 mem_root); 3056 field_list->push_back(new (mem_root) 3057 Item_return_int(thd, "Max_relay_log_size", 10, 3058 MYSQL_TYPE_LONGLONG), 3059 mem_root); 3060 field_list->push_back(new (mem_root) 3061 Item_return_int(thd, "Executed_log_entries", 10, 3062 MYSQL_TYPE_LONG), 3063 mem_root); 3064 field_list->push_back(new (mem_root) 3065 Item_return_int(thd, "Slave_received_heartbeats", 10, 3066 MYSQL_TYPE_LONG), 3067 mem_root); 3068 field_list->push_back(new (mem_root) 3069 Item_float(thd, "Slave_heartbeat_period", 0.0, 3, 10), 3070 mem_root); 3071 field_list->push_back(new (mem_root) 3072 Item_empty_string(thd, "Gtid_Slave_Pos", 3073 (uint)gtid_pos_length), 3074 mem_root); 3075 } 3076 DBUG_VOID_RETURN; 3077 } 3078 3079 /* Text for Slave_IO_Running */ 3080 static const char *slave_running[]= { "No", "Connecting", "Preparing", "Yes" }; 3081 3082 static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full, 3083 String *gtid_pos) 3084 { 3085 DBUG_ENTER("send_show_master_info_data"); 3086 3087 if (mi->host[0]) 3088 { 3089 DBUG_PRINT("info",("host is set: '%s'", mi->host)); 3090 String *packet= &thd->packet; 3091 Protocol *protocol= thd->protocol; 3092 Rpl_filter *rpl_filter= mi->rpl_filter; 3093 StringBuffer<256> tmp; 3094 3095 protocol->prepare_for_resend(); 3096 3097 /* 3098 slave_running can be accessed without run_lock but not other 3099 non-volotile members like mi->io_thd, which is guarded by the mutex. 3100 */ 3101 if (full) 3102 protocol->store(mi->connection_name.str, mi->connection_name.length, 3103 &my_charset_bin); 3104 mysql_mutex_lock(&mi->run_lock); 3105 if (full) 3106 { 3107 /* 3108 Show what the sql driver replication thread is doing 3109 This is only meaningful if there is only one slave thread. 3110 */ 3111 protocol->store(mi->rli.sql_driver_thd ? 3112 mi->rli.sql_driver_thd->get_proc_info() : "", 3113 &my_charset_bin); 3114 } 3115 protocol->store(mi->io_thd ? mi->io_thd->get_proc_info() : "", &my_charset_bin); 3116 mysql_mutex_unlock(&mi->run_lock); 3117 3118 mysql_mutex_lock(&mi->data_lock); 3119 mysql_mutex_lock(&mi->rli.data_lock); 3120 /* err_lock is to protect mi->last_error() */ 3121 mysql_mutex_lock(&mi->err_lock); 3122 /* err_lock is to protect mi->rli.last_error() */ 3123 mysql_mutex_lock(&mi->rli.err_lock); 3124 protocol->store(mi->host, &my_charset_bin); 3125 protocol->store(mi->user, &my_charset_bin); 3126 protocol->store((uint32) mi->port); 3127 protocol->store((uint32) mi->connect_retry); 3128 protocol->store(mi->master_log_name, &my_charset_bin); 3129 protocol->store((ulonglong) mi->master_log_pos); 3130 protocol->store(mi->rli.group_relay_log_name + 3131 dirname_length(mi->rli.group_relay_log_name), 3132 &my_charset_bin); 3133 protocol->store((ulonglong) mi->rli.group_relay_log_pos); 3134 protocol->store(mi->rli.group_master_log_name, &my_charset_bin); 3135 protocol->store(slave_running[mi->slave_running], &my_charset_bin); 3136 protocol->store(mi->rli.slave_running ? "Yes":"No", &my_charset_bin); 3137 protocol->store(rpl_filter->get_do_db()); 3138 protocol->store(rpl_filter->get_ignore_db()); 3139 3140 rpl_filter->get_do_table(&tmp); 3141 protocol->store(&tmp); 3142 rpl_filter->get_ignore_table(&tmp); 3143 protocol->store(&tmp); 3144 rpl_filter->get_wild_do_table(&tmp); 3145 protocol->store(&tmp); 3146 rpl_filter->get_wild_ignore_table(&tmp); 3147 protocol->store(&tmp); 3148 3149 protocol->store(mi->rli.last_error().number); 3150 protocol->store(mi->rli.last_error().message, &my_charset_bin); 3151 protocol->store((uint32) mi->rli.slave_skip_counter); 3152 protocol->store((ulonglong) mi->rli.group_master_log_pos); 3153 protocol->store((ulonglong) mi->rli.log_space_total); 3154 3155 protocol->store( 3156 mi->rli.until_condition==Relay_log_info::UNTIL_NONE ? "None": 3157 ( mi->rli.until_condition==Relay_log_info::UNTIL_MASTER_POS? "Master": 3158 ( mi->rli.until_condition==Relay_log_info::UNTIL_RELAY_POS? "Relay": 3159 "Gtid")), &my_charset_bin); 3160 protocol->store(mi->rli.until_log_name, &my_charset_bin); 3161 protocol->store((ulonglong) mi->rli.until_log_pos); 3162 3163 #ifdef HAVE_OPENSSL 3164 protocol->store(mi->ssl? "Yes":"No", &my_charset_bin); 3165 #else 3166 protocol->store(mi->ssl? "Ignored":"No", &my_charset_bin); 3167 #endif 3168 protocol->store(mi->ssl_ca, &my_charset_bin); 3169 protocol->store(mi->ssl_capath, &my_charset_bin); 3170 protocol->store(mi->ssl_cert, &my_charset_bin); 3171 protocol->store(mi->ssl_cipher, &my_charset_bin); 3172 protocol->store(mi->ssl_key, &my_charset_bin); 3173 3174 /* 3175 Seconds_Behind_Master: if SQL thread is running and I/O thread is 3176 connected, we can compute it otherwise show NULL (i.e. unknown). 3177 */ 3178 if ((mi->slave_running == MYSQL_SLAVE_RUN_READING) && 3179 mi->rli.slave_running) 3180 { 3181 long time_diff; 3182 bool idle; 3183 time_t stamp= mi->rli.last_master_timestamp; 3184 3185 if (!stamp) 3186 idle= true; 3187 else 3188 { 3189 idle= mi->rli.sql_thread_caught_up; 3190 if (mi->using_parallel() && idle && !mi->rli.parallel.workers_idle()) 3191 idle= false; 3192 } 3193 if (idle) 3194 time_diff= 0; 3195 else 3196 { 3197 time_diff= ((long)(time(0) - stamp) - mi->clock_diff_with_master); 3198 /* 3199 Apparently on some systems time_diff can be <0. Here are possible 3200 reasons related to MySQL: 3201 - the master is itself a slave of another master whose time is ahead. 3202 - somebody used an explicit SET TIMESTAMP on the master. 3203 Possible reason related to granularity-to-second of time functions 3204 (nothing to do with MySQL), which can explain a value of -1: 3205 assume the master's and slave's time are perfectly synchronized, and 3206 that at slave's connection time, when the master's timestamp is read, 3207 it is at the very end of second 1, and (a very short time later) when 3208 the slave's timestamp is read it is at the very beginning of second 3209 2. Then the recorded value for master is 1 and the recorded value for 3210 slave is 2. At SHOW SLAVE STATUS time, assume that the difference 3211 between timestamp of slave and rli->last_master_timestamp is 0 3212 (i.e. they are in the same second), then we get 0-(2-1)=-1 as a result. 3213 This confuses users, so we don't go below 0. 3214 3215 last_master_timestamp == 0 (an "impossible" timestamp 1970) is a 3216 special marker to say "consider we have caught up". 3217 */ 3218 if (time_diff < 0) 3219 time_diff= 0; 3220 } 3221 protocol->store((longlong)time_diff); 3222 } 3223 else 3224 { 3225 protocol->store_null(); 3226 } 3227 protocol->store(mi->ssl_verify_server_cert? "Yes":"No", &my_charset_bin); 3228 3229 // Last_IO_Errno 3230 protocol->store(mi->last_error().number); 3231 // Last_IO_Error 3232 protocol->store(mi->last_error().message, &my_charset_bin); 3233 // Last_SQL_Errno 3234 protocol->store(mi->rli.last_error().number); 3235 // Last_SQL_Error 3236 protocol->store(mi->rli.last_error().message, &my_charset_bin); 3237 // Replicate_Ignore_Server_Ids 3238 prot_store_ids(thd, &mi->ignore_server_ids); 3239 // Master_Server_id 3240 protocol->store((uint32) mi->master_id); 3241 // SQL_Delay 3242 // Master_Ssl_Crl 3243 protocol->store(mi->ssl_ca, &my_charset_bin); 3244 // Master_Ssl_Crlpath 3245 protocol->store(mi->ssl_capath, &my_charset_bin); 3246 // Using_Gtid 3247 protocol->store(mi->using_gtid_astext(mi->using_gtid), &my_charset_bin); 3248 // Gtid_IO_Pos 3249 { 3250 mi->gtid_current_pos.to_string(&tmp); 3251 protocol->store(tmp.ptr(), tmp.length(), &my_charset_bin); 3252 } 3253 3254 // Replicate_Do_Domain_Ids & Replicate_Ignore_Domain_Ids 3255 mi->domain_id_filter.store_ids(thd); 3256 3257 // Parallel_Mode 3258 { 3259 const char *mode_name= get_type(&slave_parallel_mode_typelib, 3260 mi->parallel_mode); 3261 protocol->store(mode_name, strlen(mode_name), &my_charset_bin); 3262 } 3263 3264 protocol->store((uint32) mi->rli.get_sql_delay()); 3265 // SQL_Remaining_Delay 3266 // THD::proc_info is not protected by any lock, so we read it once 3267 // to ensure that we use the same value throughout this function. 3268 const char *slave_sql_running_state= 3269 mi->rli.sql_driver_thd ? mi->rli.sql_driver_thd->proc_info : ""; 3270 if (slave_sql_running_state == Relay_log_info::state_delaying_string) 3271 { 3272 time_t t= my_time(0), sql_delay_end= mi->rli.get_sql_delay_end(); 3273 protocol->store((uint32)(t < sql_delay_end ? sql_delay_end - t : 0)); 3274 } 3275 else 3276 protocol->store_null(); 3277 // Slave_SQL_Running_State 3278 protocol->store(slave_sql_running_state, &my_charset_bin); 3279 3280 protocol->store(mi->total_ddl_groups); 3281 protocol->store(mi->total_non_trans_groups); 3282 protocol->store(mi->total_trans_groups); 3283 3284 if (full) 3285 { 3286 protocol->store((uint32) mi->rli.retried_trans); 3287 protocol->store((ulonglong) mi->rli.max_relay_log_size); 3288 protocol->store(mi->rli.executed_entries); 3289 protocol->store((uint32) mi->received_heartbeats); 3290 protocol->store((double) mi->heartbeat_period, 3, &tmp); 3291 protocol->store(gtid_pos->ptr(), gtid_pos->length(), &my_charset_bin); 3292 } 3293 3294 mysql_mutex_unlock(&mi->rli.err_lock); 3295 mysql_mutex_unlock(&mi->err_lock); 3296 mysql_mutex_unlock(&mi->rli.data_lock); 3297 mysql_mutex_unlock(&mi->data_lock); 3298 3299 if (my_net_write(&thd->net, (uchar*) thd->packet.ptr(), packet->length())) 3300 DBUG_RETURN(TRUE); 3301 } 3302 DBUG_RETURN(FALSE); 3303 } 3304 3305 3306 /* Used to sort connections by name */ 3307 3308 static int cmp_mi_by_name(const Master_info **arg1, 3309 const Master_info **arg2) 3310 { 3311 return my_strcasecmp(system_charset_info, (*arg1)->connection_name.str, 3312 (*arg2)->connection_name.str); 3313 } 3314 3315 3316 /** 3317 Execute a SHOW FULL SLAVE STATUS statement. 3318 3319 @param thd Pointer to THD object for the client thread executing the 3320 statement. 3321 3322 Elements are sorted according to the original connection_name. 3323 3324 @retval FALSE success 3325 @retval TRUE failure 3326 3327 @note 3328 master_info_index is protected by LOCK_active_mi. 3329 */ 3330 3331 bool show_all_master_info(THD* thd) 3332 { 3333 uint i, elements; 3334 String gtid_pos; 3335 Master_info **tmp; 3336 List<Item> field_list; 3337 DBUG_ENTER("show_master_info"); 3338 mysql_mutex_assert_owner(&LOCK_active_mi); 3339 3340 gtid_pos.length(0); 3341 if (rpl_append_gtid_state(>id_pos, true)) 3342 { 3343 my_error(ER_OUT_OF_RESOURCES, MYF(0)); 3344 DBUG_RETURN(TRUE); 3345 } 3346 3347 show_master_info_get_fields(thd, &field_list, 1, gtid_pos.length()); 3348 if (thd->protocol->send_result_set_metadata(&field_list, 3349 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)) 3350 DBUG_RETURN(TRUE); 3351 3352 if (!master_info_index || 3353 !(elements= master_info_index->master_info_hash.records)) 3354 goto end; 3355 3356 /* 3357 Sort lines to get them into a predicted order 3358 (needed for test cases and to not confuse users) 3359 */ 3360 if (!(tmp= (Master_info**) thd->alloc(sizeof(Master_info*) * elements))) 3361 DBUG_RETURN(TRUE); 3362 3363 for (i= 0; i < elements; i++) 3364 { 3365 tmp[i]= (Master_info *) my_hash_element(&master_info_index-> 3366 master_info_hash, i); 3367 } 3368 my_qsort(tmp, elements, sizeof(Master_info*), (qsort_cmp) cmp_mi_by_name); 3369 3370 for (i= 0; i < elements; i++) 3371 { 3372 if (send_show_master_info_data(thd, tmp[i], 1, >id_pos)) 3373 DBUG_RETURN(TRUE); 3374 } 3375 3376 end: 3377 my_eof(thd); 3378 DBUG_RETURN(FALSE); 3379 } 3380 3381 3382 void set_slave_thread_options(THD* thd) 3383 { 3384 DBUG_ENTER("set_slave_thread_options"); 3385 /* 3386 It's nonsense to constrain the slave threads with max_join_size; if a 3387 query succeeded on master, we HAVE to execute it. So set 3388 OPTION_BIG_SELECTS. Setting max_join_size to HA_POS_ERROR is not enough 3389 (and it's not needed if we have OPTION_BIG_SELECTS) because an INSERT 3390 SELECT examining more than 4 billion rows would still fail (yes, because 3391 when max_join_size is 4G, OPTION_BIG_SELECTS is automatically set, but 3392 only for client threads. 3393 */ 3394 ulonglong options= thd->variables.option_bits | OPTION_BIG_SELECTS; 3395 if (opt_log_slave_updates) 3396 options|= OPTION_BIN_LOG; 3397 else 3398 options&= ~OPTION_BIN_LOG; 3399 thd->variables.option_bits= options; 3400 thd->variables.completion_type= 0; 3401 3402 /* For easier test in LOGGER::log_command */ 3403 if (thd->variables.log_disabled_statements & LOG_DISABLE_SLAVE) 3404 thd->variables.option_bits|= OPTION_LOG_OFF; 3405 3406 thd->variables.sql_log_slow= !MY_TEST(thd->variables.log_slow_disabled_statements & 3407 LOG_SLOW_DISABLE_SLAVE); 3408 DBUG_VOID_RETURN; 3409 } 3410 3411 void set_slave_thread_default_charset(THD* thd, rpl_group_info *rgi) 3412 { 3413 DBUG_ENTER("set_slave_thread_default_charset"); 3414 3415 thd->variables.collation_server= 3416 global_system_variables.collation_server; 3417 thd->update_charset(global_system_variables.character_set_client, 3418 global_system_variables.collation_connection); 3419 3420 thd->system_thread_info.rpl_sql_info->cached_charset_invalidate(); 3421 DBUG_VOID_RETURN; 3422 } 3423 3424 /* 3425 init_slave_thread() 3426 */ 3427 3428 static int init_slave_thread(THD* thd, Master_info *mi, 3429 SLAVE_THD_TYPE thd_type) 3430 { 3431 DBUG_ENTER("init_slave_thread"); 3432 int simulate_error __attribute__((unused))= 0; 3433 DBUG_EXECUTE_IF("simulate_io_slave_error_on_init", 3434 simulate_error|= (1 << SLAVE_THD_IO);); 3435 DBUG_EXECUTE_IF("simulate_sql_slave_error_on_init", 3436 simulate_error|= (1 << SLAVE_THD_SQL);); 3437 3438 thd->system_thread = (thd_type == SLAVE_THD_SQL) ? 3439 SYSTEM_THREAD_SLAVE_SQL : SYSTEM_THREAD_SLAVE_IO; 3440 3441 /* We must call store_globals() before doing my_net_init() */ 3442 if (init_thr_lock() || thd->store_globals() || 3443 my_net_init(&thd->net, 0, thd, MYF(MY_THREAD_SPECIFIC)) || 3444 IF_DBUG(simulate_error & (1<< thd_type), 0)) 3445 { 3446 thd->cleanup(); 3447 DBUG_RETURN(-1); 3448 } 3449 3450 thd->security_ctx->skip_grants(); 3451 thd->slave_thread= 1; 3452 thd->connection_name= mi->connection_name; 3453 thd->variables.sql_log_slow= !MY_TEST(thd->variables.log_slow_disabled_statements & LOG_SLOW_DISABLE_SLAVE); 3454 set_slave_thread_options(thd); 3455 3456 if (thd_type == SLAVE_THD_SQL) 3457 THD_STAGE_INFO(thd, stage_waiting_for_the_next_event_in_relay_log); 3458 else 3459 THD_STAGE_INFO(thd, stage_waiting_for_master_update); 3460 thd->set_time(); 3461 /* Do not use user-supplied timeout value for system threads. */ 3462 thd->variables.lock_wait_timeout= LONG_TIMEOUT; 3463 DBUG_RETURN(0); 3464 } 3465 3466 /* 3467 Sleep for a given amount of time or until killed. 3468 3469 @param thd Thread context of the current thread. 3470 @param seconds The number of seconds to sleep. 3471 @param func Function object to check if the thread has been killed. 3472 @param info The Rpl_info object associated with this sleep. 3473 3474 @retval True if the thread has been killed, false otherwise. 3475 */ 3476 template <typename killed_func, typename rpl_info> 3477 static bool slave_sleep(THD *thd, time_t seconds, 3478 killed_func func, rpl_info info) 3479 { 3480 3481 bool ret; 3482 struct timespec abstime; 3483 3484 mysql_mutex_t *lock= &info->sleep_lock; 3485 mysql_cond_t *cond= &info->sleep_cond; 3486 3487 /* Absolute system time at which the sleep time expires. */ 3488 set_timespec(abstime, seconds); 3489 mysql_mutex_lock(lock); 3490 thd->ENTER_COND(cond, lock, NULL, NULL); 3491 3492 while (! (ret= func(info))) 3493 { 3494 int error= mysql_cond_timedwait(cond, lock, &abstime); 3495 if (error == ETIMEDOUT || error == ETIME) 3496 break; 3497 } 3498 /* Implicitly unlocks the mutex. */ 3499 thd->EXIT_COND(NULL); 3500 return ret; 3501 } 3502 3503 3504 static int request_dump(THD *thd, MYSQL* mysql, Master_info* mi, 3505 bool *suppress_warnings) 3506 { 3507 uchar buf[FN_REFLEN + 10]; 3508 int len; 3509 ushort binlog_flags = 0; // for now 3510 char* logname = mi->master_log_name; 3511 DBUG_ENTER("request_dump"); 3512 3513 *suppress_warnings= FALSE; 3514 3515 if (opt_log_slave_updates && opt_replicate_annotate_row_events) 3516 binlog_flags|= BINLOG_SEND_ANNOTATE_ROWS_EVENT; 3517 3518 if (repl_semisync_slave.request_transmit(mi)) 3519 DBUG_RETURN(1); 3520 3521 // TODO if big log files: Change next to int8store() 3522 int4store(buf, (ulong) mi->master_log_pos); 3523 int2store(buf + 4, binlog_flags); 3524 int4store(buf + 6, global_system_variables.server_id); 3525 len = (uint) strlen(logname); 3526 memcpy(buf + 10, logname,len); 3527 if (simple_command(mysql, COM_BINLOG_DUMP, buf, len + 10, 1)) 3528 { 3529 /* 3530 Something went wrong, so we will just reconnect and retry later 3531 in the future, we should do a better error analysis, but for 3532 now we just fill up the error log :-) 3533 */ 3534 if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED || 3535 mysql_errno(mysql) == ER_NET_ERROR_ON_WRITE) 3536 *suppress_warnings= TRUE; // Suppress reconnect warning 3537 else 3538 sql_print_error("Error on COM_BINLOG_DUMP: %d %s, will retry in %d secs", 3539 mysql_errno(mysql), mysql_error(mysql), 3540 mi->connect_retry); 3541 DBUG_RETURN(1); 3542 } 3543 3544 DBUG_RETURN(0); 3545 } 3546 3547 3548 /* 3549 Read one event from the master 3550 3551 SYNOPSIS 3552 read_event() 3553 mysql MySQL connection 3554 mi Master connection information 3555 suppress_warnings TRUE when a normal net read timeout has caused us to 3556 try a reconnect. We do not want to print anything to 3557 the error log in this case because this a anormal 3558 event in an idle server. 3559 network_read_len get the real network read length in VIO, especially using compressed protocol 3560 3561 RETURN VALUES 3562 'packet_error' Error 3563 number Length of packet 3564 */ 3565 3566 static ulong read_event(MYSQL* mysql, Master_info *mi, bool* suppress_warnings, 3567 ulong* network_read_len) 3568 { 3569 ulong len; 3570 DBUG_ENTER("read_event"); 3571 3572 *suppress_warnings= FALSE; 3573 /* 3574 my_real_read() will time us out 3575 We check if we were told to die, and if not, try reading again 3576 */ 3577 #ifndef DBUG_OFF 3578 if (disconnect_slave_event_count && !(mi->events_till_disconnect--)) 3579 DBUG_RETURN(packet_error); 3580 #endif 3581 3582 len = cli_safe_read_reallen(mysql, network_read_len); 3583 if (unlikely(len == packet_error || (long) len < 1)) 3584 { 3585 if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED) 3586 { 3587 /* 3588 We are trying a normal reconnect after a read timeout; 3589 we suppress prints to .err file as long as the reconnect 3590 happens without problems 3591 */ 3592 *suppress_warnings= 3593 global_system_variables.log_warnings < 2 ? TRUE : FALSE; 3594 } 3595 else 3596 { 3597 if (!mi->rli.abort_slave) 3598 { 3599 sql_print_error("Error reading packet from server: %s (server_errno=%d)", 3600 mysql_error(mysql), mysql_errno(mysql)); 3601 } 3602 } 3603 DBUG_RETURN(packet_error); 3604 } 3605 3606 /* Check if eof packet */ 3607 if (len < 8 && mysql->net.read_pos[0] == 254) 3608 { 3609 sql_print_information("Slave: received end packet from server, apparent " 3610 "master shutdown: %s", 3611 mysql_error(mysql)); 3612 DBUG_RETURN(packet_error); 3613 } 3614 3615 DBUG_PRINT("exit", ("len: %lu net->read_pos[4]: %d", 3616 len, mysql->net.read_pos[4])); 3617 DBUG_RETURN(len - 1); 3618 } 3619 3620 3621 /** 3622 Check if the current error is of temporary nature of not. 3623 Some errors are temporary in nature, such as 3624 ER_LOCK_DEADLOCK and ER_LOCK_WAIT_TIMEOUT. 3625 3626 @retval 0 if fatal error 3627 @retval 1 temporary error, do retry 3628 */ 3629 3630 int 3631 has_temporary_error(THD *thd) 3632 { 3633 uint current_errno; 3634 DBUG_ENTER("has_temporary_error"); 3635 3636 DBUG_EXECUTE_IF("all_errors_are_temporary_errors", 3637 if (thd->get_stmt_da()->is_error()) 3638 { 3639 thd->clear_error(); 3640 my_error(ER_LOCK_DEADLOCK, MYF(0)); 3641 }); 3642 3643 /* 3644 If there is no message in THD, we can't say if it's a temporary 3645 error or not. This is currently the case for Incident_log_event, 3646 which sets no message. Return FALSE. 3647 */ 3648 if (!likely(thd->is_error())) 3649 DBUG_RETURN(0); 3650 3651 current_errno= thd->get_stmt_da()->sql_errno(); 3652 for (uint i= 0; i < slave_transaction_retry_error_length; i++) 3653 { 3654 if (current_errno == slave_transaction_retry_errors[i]) 3655 DBUG_RETURN(1); 3656 } 3657 3658 DBUG_RETURN(0); 3659 } 3660 3661 3662 /** 3663 If this is a lagging slave (specified with CHANGE MASTER TO MASTER_DELAY = X), delays accordingly. Also unlocks rli->data_lock. 3664 3665 Design note: this is the place to unlock rli->data_lock. The lock 3666 must be held when reading delay info from rli, but it should not be 3667 held while sleeping. 3668 3669 @param ev Event that is about to be executed. 3670 3671 @param thd The sql thread's THD object. 3672 3673 @param rli The sql thread's Relay_log_info structure. 3674 3675 @retval 0 If the delay timed out and the event shall be executed. 3676 3677 @retval nonzero If the delay was interrupted and the event shall be skipped. 3678 */ 3679 int 3680 sql_delay_event(Log_event *ev, THD *thd, rpl_group_info *rgi) 3681 { 3682 Relay_log_info* rli= rgi->rli; 3683 long sql_delay= rli->get_sql_delay(); 3684 3685 DBUG_ENTER("sql_delay_event"); 3686 mysql_mutex_assert_owner(&rli->data_lock); 3687 DBUG_ASSERT(!rli->belongs_to_client()); 3688 3689 int type= ev->get_type_code(); 3690 if (sql_delay && type != ROTATE_EVENT && 3691 type != FORMAT_DESCRIPTION_EVENT && type != START_EVENT_V3) 3692 { 3693 // The time when we should execute the event. 3694 time_t sql_delay_end= 3695 ev->when + rli->mi->clock_diff_with_master + sql_delay; 3696 // The current time. 3697 time_t now= my_time(0); 3698 // The time we will have to sleep before executing the event. 3699 unsigned long nap_time= 0; 3700 if (sql_delay_end > now) 3701 nap_time= (ulong)(sql_delay_end - now); 3702 3703 DBUG_PRINT("info", ("sql_delay= %lu " 3704 "ev->when= %lu " 3705 "rli->mi->clock_diff_with_master= %lu " 3706 "now= %ld " 3707 "sql_delay_end= %llu " 3708 "nap_time= %ld", 3709 sql_delay, (long)ev->when, 3710 rli->mi->clock_diff_with_master, 3711 (long)now, (ulonglong)sql_delay_end, (long)nap_time)); 3712 3713 if (sql_delay_end > now) 3714 { 3715 DBUG_PRINT("info", ("delaying replication event %lu secs", 3716 nap_time)); 3717 rli->start_sql_delay(sql_delay_end); 3718 mysql_mutex_unlock(&rli->data_lock); 3719 DBUG_RETURN(slave_sleep(thd, nap_time, sql_slave_killed, rgi)); 3720 } 3721 } 3722 3723 mysql_mutex_unlock(&rli->data_lock); 3724 3725 DBUG_RETURN(0); 3726 } 3727 3728 3729 /* 3730 First half of apply_event_and_update_pos(), see below. 3731 Setup some THD variables for applying the event. 3732 3733 Split out so that it can run with rli->data_lock held in non-parallel 3734 replication, but without the mutex held in the parallel case. 3735 */ 3736 static int 3737 apply_event_and_update_pos_setup(Log_event* ev, THD* thd, rpl_group_info *rgi) 3738 { 3739 DBUG_ENTER("apply_event_and_update_pos_setup"); 3740 3741 DBUG_PRINT("exec_event",("%s(type_code: %d; server_id: %d)", 3742 ev->get_type_str(), ev->get_type_code(), 3743 ev->server_id)); 3744 DBUG_PRINT("info", ("thd->options: '%s%s%s' rgi->last_event_start_time: %lu", 3745 FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT), 3746 FLAGSTR(thd->variables.option_bits, OPTION_BEGIN), 3747 FLAGSTR(thd->variables.option_bits, OPTION_GTID_BEGIN), 3748 (ulong) rgi->last_event_start_time)); 3749 3750 /* 3751 Execute the event to change the database and update the binary 3752 log coordinates, but first we set some data that is needed for 3753 the thread. 3754 3755 The event will be executed unless it is supposed to be skipped. 3756 3757 Queries originating from this server must be skipped. Low-level 3758 events (Format_description_log_event, Rotate_log_event, 3759 Stop_log_event) from this server must also be skipped. But for 3760 those we don't want to modify 'group_master_log_pos', because 3761 these events did not exist on the master. 3762 Format_description_log_event is not completely skipped. 3763 3764 Skip queries specified by the user in 'slave_skip_counter'. We 3765 can't however skip events that has something to do with the log 3766 files themselves. 3767 3768 Filtering on own server id is extremely important, to ignore 3769 execution of events created by the creation/rotation of the relay 3770 log (remember that now the relay log starts with its Format_desc, 3771 has a Rotate etc). 3772 */ 3773 3774 /* Use the original server id for logging. */ 3775 thd->variables.server_id = ev->server_id; 3776 thd->set_time(); // time the query 3777 thd->lex->current_select= 0; 3778 thd->variables.option_bits= 3779 (thd->variables.option_bits & ~OPTION_SKIP_REPLICATION) | 3780 (ev->flags & LOG_EVENT_SKIP_REPLICATION_F ? OPTION_SKIP_REPLICATION : 0); 3781 ev->thd = thd; // because up to this point, ev->thd == 0 3782 3783 DBUG_RETURN(ev->shall_skip(rgi)); 3784 } 3785 3786 3787 /* 3788 Second half of apply_event_and_update_pos(), see below. 3789 3790 Do the actual event apply (or skip), and position update. 3791 */ 3792 static int 3793 apply_event_and_update_pos_apply(Log_event* ev, THD* thd, rpl_group_info *rgi, 3794 int reason) 3795 { 3796 int exec_res= 0; 3797 Relay_log_info* rli= rgi->rli; 3798 3799 DBUG_ENTER("apply_event_and_update_pos_apply"); 3800 DBUG_EXECUTE_IF("inject_slave_sql_before_apply_event", 3801 { 3802 DBUG_ASSERT(!debug_sync_set_action 3803 (thd, STRING_WITH_LEN("now WAIT_FOR continue"))); 3804 DBUG_SET_INITIAL("-d,inject_slave_sql_before_apply_event"); 3805 };); 3806 if (reason == Log_event::EVENT_SKIP_NOT) 3807 exec_res= ev->apply_event(rgi); 3808 3809 #ifdef WITH_WSREP 3810 if (WSREP(thd)) { 3811 3812 if (exec_res) { 3813 mysql_mutex_lock(&thd->LOCK_thd_data); 3814 switch(thd->wsrep_trx().state()) { 3815 case wsrep::transaction::s_must_replay: 3816 /* this transaction will be replayed, 3817 so not raising slave error here */ 3818 WSREP_DEBUG("SQL apply failed for MUST_REPLAY, res %d", exec_res); 3819 exec_res = 0; 3820 break; 3821 default: 3822 WSREP_DEBUG("SQL apply failed, res %d conflict state: %s", 3823 exec_res, wsrep_thd_transaction_state_str(thd)); 3824 rli->abort_slave= 1; 3825 rli->report(ERROR_LEVEL, ER_UNKNOWN_COM_ERROR, rgi->gtid_info(), 3826 "Node has dropped from cluster"); 3827 break; 3828 } 3829 mysql_mutex_unlock(&thd->LOCK_thd_data); 3830 } 3831 } 3832 #endif 3833 3834 #ifndef DBUG_OFF 3835 /* 3836 This only prints information to the debug trace. 3837 3838 TODO: Print an informational message to the error log? 3839 */ 3840 static const char *const explain[] = { 3841 // EVENT_SKIP_NOT, 3842 "not skipped", 3843 // EVENT_SKIP_IGNORE, 3844 "skipped because event should be ignored", 3845 // EVENT_SKIP_COUNT 3846 "skipped because event skip counter was non-zero" 3847 }; 3848 DBUG_PRINT("info", ("OPTION_BEGIN: %d IN_STMT: %d IN_TRANSACTION: %d", 3849 MY_TEST(thd->variables.option_bits & OPTION_BEGIN), 3850 rli->get_flag(Relay_log_info::IN_STMT), 3851 rli->get_flag(Relay_log_info::IN_TRANSACTION))); 3852 DBUG_PRINT("skip_event", ("%s event was %s", 3853 ev->get_type_str(), explain[reason])); 3854 #endif 3855 3856 DBUG_PRINT("info", ("apply_event error = %d", exec_res)); 3857 if (exec_res == 0) 3858 { 3859 int error= ev->update_pos(rgi); 3860 #ifndef DBUG_OFF 3861 DBUG_PRINT("info", ("update_pos error = %d", error)); 3862 if (!rli->belongs_to_client()) 3863 { 3864 DBUG_PRINT("info", ("group %llu %s", rli->group_relay_log_pos, 3865 rli->group_relay_log_name)); 3866 DBUG_PRINT("info", ("event %llu %s", rli->event_relay_log_pos, 3867 rli->event_relay_log_name)); 3868 } 3869 #endif 3870 /* 3871 The update should not fail, so print an error message and 3872 return an error code. 3873 3874 TODO: Replace this with a decent error message when merged 3875 with BUG#24954 (which adds several new error message). 3876 */ 3877 if (unlikely(error)) 3878 { 3879 rli->report(ERROR_LEVEL, ER_UNKNOWN_ERROR, rgi->gtid_info(), 3880 "It was not possible to update the positions" 3881 " of the relay log information: the slave may" 3882 " be in an inconsistent state." 3883 " Stopped in %s position %llu", 3884 rli->group_relay_log_name, rli->group_relay_log_pos); 3885 DBUG_RETURN(2); 3886 } 3887 } 3888 else 3889 { 3890 /* 3891 Make sure we do not erroneously update gtid_slave_pos with a lingering 3892 GTID from this failed event group (MDEV-4906). 3893 */ 3894 rgi->gtid_pending= false; 3895 } 3896 3897 DBUG_RETURN(exec_res ? 1 : 0); 3898 } 3899 3900 3901 /** 3902 Applies the given event and advances the relay log position. 3903 3904 This is needed by the sql thread to execute events from the binlog, 3905 and by clients executing BINLOG statements. Conceptually, this 3906 function does: 3907 3908 @code 3909 ev->apply_event(rli); 3910 ev->update_pos(rli); 3911 @endcode 3912 3913 It also does the following maintainance: 3914 3915 - Initializes the thread's server_id and time; and the event's 3916 thread. 3917 3918 - If !rli->belongs_to_client() (i.e., if it belongs to the slave 3919 sql thread instead of being used for executing BINLOG 3920 statements), it does the following things: (1) skips events if it 3921 is needed according to the server id or slave_skip_counter; (2) 3922 unlocks rli->data_lock; (3) sleeps if required by 'CHANGE MASTER 3923 TO MASTER_DELAY=X'; (4) maintains the running state of the sql 3924 thread (rli->thread_state). 3925 3926 - Reports errors as needed. 3927 3928 @param ev The event to apply. 3929 3930 @param thd The client thread that executes the event (i.e., the 3931 slave sql thread if called from a replication slave, or the client 3932 thread if called to execute a BINLOG statement). 3933 3934 @param rli The relay log info (i.e., the slave's rli if called from 3935 a replication slave, or the client's thd->rli_fake if called to 3936 execute a BINLOG statement). 3937 3938 @retval 0 OK. 3939 3940 @retval 1 Error calling ev->apply_event(). 3941 3942 @retval 2 No error calling ev->apply_event(), but error calling 3943 ev->update_pos(). 3944 3945 This function is only used in non-parallel replication, where it is called 3946 with rli->data_lock held; this lock is released during this function. 3947 */ 3948 int 3949 apply_event_and_update_pos(Log_event* ev, THD* thd, rpl_group_info *rgi) 3950 { 3951 Relay_log_info* rli= rgi->rli; 3952 mysql_mutex_assert_owner(&rli->data_lock); 3953 int reason= apply_event_and_update_pos_setup(ev, thd, rgi); 3954 if (reason == Log_event::EVENT_SKIP_COUNT) 3955 { 3956 DBUG_ASSERT(rli->slave_skip_counter > 0); 3957 rli->slave_skip_counter--; 3958 } 3959 3960 if (reason == Log_event::EVENT_SKIP_NOT) 3961 { 3962 // Sleeps if needed, and unlocks rli->data_lock. 3963 if (sql_delay_event(ev, thd, rgi)) 3964 return 0; 3965 } 3966 else 3967 mysql_mutex_unlock(&rli->data_lock); 3968 3969 return apply_event_and_update_pos_apply(ev, thd, rgi, reason); 3970 } 3971 3972 3973 /* 3974 The version of above apply_event_and_update_pos() used in parallel 3975 replication. Unlike the non-parallel case, this function is called without 3976 rli->data_lock held. 3977 */ 3978 int 3979 apply_event_and_update_pos_for_parallel(Log_event* ev, THD* thd, 3980 rpl_group_info *rgi) 3981 { 3982 mysql_mutex_assert_not_owner(&rgi->rli->data_lock); 3983 int reason= apply_event_and_update_pos_setup(ev, thd, rgi); 3984 /* 3985 In parallel replication, sql_slave_skip_counter is handled in the SQL 3986 driver thread, so 23 should never see EVENT_SKIP_COUNT here. 3987 */ 3988 DBUG_ASSERT(reason != Log_event::EVENT_SKIP_COUNT); 3989 /* 3990 Calling sql_delay_event() was handled in the SQL driver thread when 3991 doing parallel replication. 3992 */ 3993 return apply_event_and_update_pos_apply(ev, thd, rgi, reason); 3994 } 3995 3996 3997 /** 3998 Keep the relay log transaction state up to date. 3999 4000 The state reflects how things are after the given event, that has just been 4001 read from the relay log, is executed. 4002 4003 This is only needed to ensure we: 4004 - Don't abort the sql driver thread in the middle of an event group. 4005 - Don't rotate the io thread in the middle of a statement or transaction. 4006 The mechanism is that the io thread, when it needs to rotate the relay 4007 log, will wait until the sql driver has read all the cached events 4008 and then continue reading events one by one from the master until 4009 the sql threads signals that log doesn't have an active group anymore. 4010 4011 There are two possible cases. We keep them as 2 separate flags mainly 4012 to make debugging easier. 4013 4014 - IN_STMT is set when we have read an event that should be used 4015 together with the next event. This is for example setting a 4016 variable that is used when executing the next statement. 4017 - IN_TRANSACTION is set when we are inside a BEGIN...COMMIT group 4018 4019 To test the state one should use the is_in_group() function. 4020 */ 4021 4022 inline void update_state_of_relay_log(Relay_log_info *rli, Log_event *ev) 4023 { 4024 Log_event_type typ= ev->get_type_code(); 4025 4026 /* check if we are in a multi part event */ 4027 if (ev->is_part_of_group()) 4028 rli->set_flag(Relay_log_info::IN_STMT); 4029 else if (Log_event::is_group_event(typ)) 4030 { 4031 /* 4032 If it was not a is_part_of_group() and not a group event (like 4033 rotate) then we can reset the IN_STMT flag. We have the above 4034 if only to allow us to have a rotate element anywhere. 4035 */ 4036 rli->clear_flag(Relay_log_info::IN_STMT); 4037 } 4038 4039 /* Check for an event that starts or stops a transaction */ 4040 if (LOG_EVENT_IS_QUERY(typ)) 4041 { 4042 Query_log_event *qev= (Query_log_event*) ev; 4043 /* 4044 Trivial optimization to avoid the following somewhat expensive 4045 checks. 4046 */ 4047 if (qev->q_len <= sizeof("ROLLBACK")) 4048 { 4049 if (qev->is_begin()) 4050 rli->set_flag(Relay_log_info::IN_TRANSACTION); 4051 if (qev->is_commit() || qev->is_rollback()) 4052 rli->clear_flag(Relay_log_info::IN_TRANSACTION); 4053 } 4054 } 4055 if (typ == XID_EVENT) 4056 rli->clear_flag(Relay_log_info::IN_TRANSACTION); 4057 if (typ == GTID_EVENT && 4058 !(((Gtid_log_event*) ev)->flags2 & Gtid_log_event::FL_STANDALONE)) 4059 { 4060 /* This GTID_EVENT will generate a BEGIN event */ 4061 rli->set_flag(Relay_log_info::IN_TRANSACTION); 4062 } 4063 4064 DBUG_PRINT("info", ("event: %u IN_STMT: %d IN_TRANSACTION: %d", 4065 (uint) typ, 4066 rli->get_flag(Relay_log_info::IN_STMT), 4067 rli->get_flag(Relay_log_info::IN_TRANSACTION))); 4068 } 4069 4070 4071 /** 4072 Top-level function for executing the next event in the relay log. 4073 This is called from the SQL thread. 4074 4075 This function reads the event from the relay log, executes it, and 4076 advances the relay log position. It also handles errors, etc. 4077 4078 This function may fail to apply the event for the following reasons: 4079 4080 - The position specfied by the UNTIL condition of the START SLAVE 4081 command is reached. 4082 4083 - It was not possible to read the event from the log. 4084 4085 - The slave is killed. 4086 4087 - An error occurred when applying the event, and the event has been 4088 tried slave_trans_retries times. If the event has been retried 4089 fewer times, 0 is returned. 4090 4091 - init_master_info or init_relay_log_pos failed. (These are called 4092 if a failure occurs when applying the event.) 4093 4094 - An error occurred when updating the binlog position. 4095 4096 @retval 0 The event was applied. 4097 4098 @retval 1 The event was not applied. 4099 */ 4100 4101 static int exec_relay_log_event(THD* thd, Relay_log_info* rli, 4102 rpl_group_info *serial_rgi) 4103 { 4104 ulonglong event_size; 4105 DBUG_ENTER("exec_relay_log_event"); 4106 4107 /* 4108 We acquire this mutex since we need it for all operations except 4109 event execution. But we will release it in places where we will 4110 wait for something for example inside of next_event(). 4111 */ 4112 mysql_mutex_lock(&rli->data_lock); 4113 4114 Log_event *ev= next_event(serial_rgi, &event_size); 4115 4116 if (sql_slave_killed(serial_rgi)) 4117 { 4118 mysql_mutex_unlock(&rli->data_lock); 4119 delete ev; 4120 DBUG_RETURN(1); 4121 } 4122 if (ev) 4123 { 4124 #ifdef WITH_WSREP 4125 if (wsrep_before_statement(thd)) 4126 { 4127 mysql_mutex_unlock(&rli->data_lock); 4128 delete ev; 4129 WSREP_INFO("Wsrep before statement error"); 4130 DBUG_RETURN(1); 4131 } 4132 #endif /* WITH_WSREP */ 4133 int exec_res; 4134 Log_event_type typ= ev->get_type_code(); 4135 4136 /* 4137 Even if we don't execute this event, we keep the master timestamp, 4138 so that seconds behind master shows correct delta (there are events 4139 that are not replayed, so we keep falling behind). 4140 4141 If it is an artificial event, or a relay log event (IO thread generated 4142 event) or ev->when is set to 0, we don't update the 4143 last_master_timestamp. 4144 4145 In parallel replication, we might queue a large number of events, and 4146 the user might be surprised to see a claim that the slave is up to date 4147 long before those queued events are actually executed. 4148 */ 4149 if (!rli->mi->using_parallel() && 4150 !(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0))) 4151 { 4152 rli->last_master_timestamp= ev->when + (time_t) ev->exec_time; 4153 DBUG_ASSERT(rli->last_master_timestamp >= 0); 4154 } 4155 4156 /* 4157 This tests if the position of the beginning of the current event 4158 hits the UNTIL barrier. 4159 */ 4160 if ((rli->until_condition == Relay_log_info::UNTIL_MASTER_POS || 4161 rli->until_condition == Relay_log_info::UNTIL_RELAY_POS) && 4162 (ev->server_id != global_system_variables.server_id || 4163 rli->replicate_same_server_id) && 4164 rli->is_until_satisfied(ev)) 4165 { 4166 /* 4167 Setting abort_slave flag because we do not want additional 4168 message about error in query execution to be printed. 4169 */ 4170 rli->abort_slave= 1; 4171 rli->stop_for_until= true; 4172 mysql_mutex_unlock(&rli->data_lock); 4173 #ifdef WITH_WSREP 4174 wsrep_after_statement(thd); 4175 #endif /* WITH_WSREP */ 4176 delete ev; 4177 DBUG_RETURN(1); 4178 } 4179 4180 { /** 4181 The following failure injecion works in cooperation with tests 4182 setting @@global.debug= 'd,incomplete_group_in_relay_log'. 4183 Xid or Commit events are not executed to force the slave sql 4184 read hanging if the realy log does not have any more events. 4185 */ 4186 DBUG_EXECUTE_IF("incomplete_group_in_relay_log", 4187 if ((typ == XID_EVENT) || 4188 (LOG_EVENT_IS_QUERY(typ) && 4189 strcmp("COMMIT", ((Query_log_event *) ev)->query) == 0)) 4190 { 4191 DBUG_ASSERT(thd->transaction.all.modified_non_trans_table); 4192 rli->abort_slave= 1; 4193 mysql_mutex_unlock(&rli->data_lock); 4194 delete ev; 4195 serial_rgi->inc_event_relay_log_pos(); 4196 DBUG_RETURN(0); 4197 };); 4198 } 4199 4200 update_state_of_relay_log(rli, ev); 4201 4202 if (rli->mi->using_parallel()) 4203 { 4204 int res= rli->parallel.do_event(serial_rgi, ev, event_size); 4205 /* 4206 In parallel replication, we need to update the relay log position 4207 immediately so that it will be the correct position from which to 4208 read the next event. 4209 */ 4210 if (res == 0) 4211 rli->event_relay_log_pos= rli->future_event_relay_log_pos; 4212 if (res >= 0) 4213 { 4214 #ifdef WITH_WSREP 4215 wsrep_after_statement(thd); 4216 #endif /* WITH_WSREP */ 4217 DBUG_RETURN(res); 4218 } 4219 /* 4220 Else we proceed to execute the event non-parallel. 4221 This is the case for pre-10.0 events without GTID, and for handling 4222 slave_skip_counter. 4223 */ 4224 if (!(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0))) 4225 { 4226 /* 4227 Ignore FD's timestamp as it does not reflect the slave execution 4228 state but likely to reflect a deep past. Consequently when the first 4229 data modification event execution last long all this time 4230 Seconds_Behind_Master is zero. 4231 */ 4232 if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT) 4233 rli->last_master_timestamp= ev->when + (time_t) ev->exec_time; 4234 4235 DBUG_ASSERT(rli->last_master_timestamp >= 0); 4236 } 4237 } 4238 4239 if (typ == GTID_EVENT) 4240 { 4241 Gtid_log_event *gev= static_cast<Gtid_log_event *>(ev); 4242 4243 /* 4244 For GTID, allocate a new sub_id for the given domain_id. 4245 The sub_id must be allocated in increasing order of binlog order. 4246 */ 4247 if (event_group_new_gtid(serial_rgi, gev)) 4248 { 4249 sql_print_error("Error reading relay log event: %s", "slave SQL thread " 4250 "aborted because of out-of-memory error"); 4251 mysql_mutex_unlock(&rli->data_lock); 4252 delete ev; 4253 #ifdef WITH_WSREP 4254 wsrep_after_statement(thd); 4255 #endif /* WITH_WSREP */ 4256 DBUG_RETURN(1); 4257 } 4258 4259 if (opt_gtid_ignore_duplicates && 4260 rli->mi->using_gtid != Master_info::USE_GTID_NO) 4261 { 4262 int res= rpl_global_gtid_slave_state->check_duplicate_gtid 4263 (&serial_rgi->current_gtid, serial_rgi); 4264 if (res < 0) 4265 { 4266 sql_print_error("Error processing GTID event: %s", "slave SQL " 4267 "thread aborted because of out-of-memory error"); 4268 mysql_mutex_unlock(&rli->data_lock); 4269 delete ev; 4270 #ifdef WITH_WSREP 4271 wsrep_after_statement(thd); 4272 #endif /* WITH_WSREP */ 4273 DBUG_RETURN(1); 4274 } 4275 /* 4276 If we need to skip this event group (because the GTID was already 4277 applied), then do it using the code for slave_skip_counter, which 4278 is able to handle skipping until the end of the event group. 4279 */ 4280 if (!res) 4281 rli->slave_skip_counter= 1; 4282 } 4283 } 4284 4285 serial_rgi->future_event_relay_log_pos= rli->future_event_relay_log_pos; 4286 serial_rgi->event_relay_log_name= rli->event_relay_log_name; 4287 serial_rgi->event_relay_log_pos= rli->event_relay_log_pos; 4288 exec_res= apply_event_and_update_pos(ev, thd, serial_rgi); 4289 4290 #ifdef WITH_WSREP 4291 WSREP_DEBUG("apply_event_and_update_pos() result: %d", exec_res); 4292 #endif /* WITH_WSREP */ 4293 4294 delete_or_keep_event_post_apply(serial_rgi, typ, ev); 4295 4296 /* 4297 update_log_pos failed: this should not happen, so we don't 4298 retry. 4299 */ 4300 if (unlikely(exec_res == 2)) 4301 { 4302 #ifdef WITH_WSREP 4303 wsrep_after_statement(thd); 4304 #endif /* WITH_WSREP */ 4305 DBUG_RETURN(1); 4306 } 4307 #ifdef WITH_WSREP 4308 mysql_mutex_lock(&thd->LOCK_thd_data); 4309 enum wsrep::client_error wsrep_error= thd->wsrep_cs().current_error(); 4310 mysql_mutex_unlock(&thd->LOCK_thd_data); 4311 if (wsrep_error == wsrep::e_success) 4312 #endif /* WITH_WSREP */ 4313 if (slave_trans_retries) 4314 { 4315 int UNINIT_VAR(temp_err); 4316 if (unlikely(exec_res) && (temp_err= has_temporary_error(thd))) 4317 { 4318 const char *errmsg; 4319 rli->clear_error(); 4320 /* 4321 We were in a transaction which has been rolled back because of a 4322 temporary error; 4323 let's seek back to BEGIN log event and retry it all again. 4324 Note, if lock wait timeout (innodb_lock_wait_timeout exceeded) 4325 there is no rollback since 5.0.13 (ref: manual). 4326 We have to not only seek but also 4327 4328 a) init_master_info(), to seek back to hot relay log's start 4329 for later (for when we will come back to this hot log after 4330 re-processing the possibly existing old logs where BEGIN is: 4331 check_binlog_magic() will then need the cache to be at 4332 position 0 (see comments at beginning of 4333 init_master_info()). 4334 b) init_relay_log_pos(), because the BEGIN may be an older relay log. 4335 */ 4336 if (serial_rgi->trans_retries < slave_trans_retries) 4337 { 4338 if (init_master_info(rli->mi, 0, 0, 0, SLAVE_SQL)) 4339 sql_print_error("Failed to initialize the master info structure"); 4340 else if (init_relay_log_pos(rli, 4341 rli->group_relay_log_name, 4342 rli->group_relay_log_pos, 4343 1, &errmsg, 1)) 4344 sql_print_error("Error initializing relay log position: %s", 4345 errmsg); 4346 else 4347 { 4348 exec_res= 0; 4349 serial_rgi->cleanup_context(thd, 1); 4350 /* chance for concurrent connection to get more locks */ 4351 slave_sleep(thd, MY_MAX(MY_MIN(serial_rgi->trans_retries, 4352 MAX_SLAVE_RETRY_PAUSE), 4353 slave_trans_retry_interval), 4354 sql_slave_killed, serial_rgi); 4355 serial_rgi->trans_retries++; 4356 mysql_mutex_lock(&rli->data_lock); // because of SHOW STATUS 4357 rli->retried_trans++; 4358 statistic_increment(slave_retried_transactions, LOCK_status); 4359 mysql_mutex_unlock(&rli->data_lock); 4360 DBUG_PRINT("info", ("Slave retries transaction " 4361 "rgi->trans_retries: %lu", 4362 serial_rgi->trans_retries)); 4363 } 4364 } 4365 else 4366 sql_print_error("Slave SQL thread retried transaction %lu time(s) " 4367 "in vain, giving up. Consider raising the value of " 4368 "the slave_transaction_retries variable.", 4369 slave_trans_retries); 4370 } 4371 else if ((exec_res && !temp_err) || 4372 (opt_using_transactions && 4373 rli->group_relay_log_pos == rli->event_relay_log_pos)) 4374 { 4375 /* 4376 Only reset the retry counter if the entire group succeeded 4377 or failed with a non-transient error. On a successful 4378 event, the execution will proceed as usual; in the case of a 4379 non-transient error, the slave will stop with an error. 4380 */ 4381 serial_rgi->trans_retries= 0; // restart from fresh 4382 DBUG_PRINT("info", ("Resetting retry counter, rgi->trans_retries: %lu", 4383 serial_rgi->trans_retries)); 4384 } 4385 } 4386 4387 rli->executed_entries++; 4388 #ifdef WITH_WSREP 4389 wsrep_after_statement(thd); 4390 #endif /* WITH_WSREP */ 4391 DBUG_EXECUTE_IF( 4392 "pause_sql_thread_on_fde", 4393 if (ev && typ == FORMAT_DESCRIPTION_EVENT) { 4394 DBUG_ASSERT(!debug_sync_set_action( 4395 thd, 4396 STRING_WITH_LEN( 4397 "now SIGNAL paused_on_fde WAIT_FOR sql_thread_continue"))); 4398 }); 4399 4400 DBUG_RETURN(exec_res); 4401 } 4402 mysql_mutex_unlock(&rli->data_lock); 4403 rli->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_READ_FAILURE, NULL, 4404 ER_THD(thd, ER_SLAVE_RELAY_LOG_READ_FAILURE), "\ 4405 Could not parse relay log event entry. The possible reasons are: the master's \ 4406 binary log is corrupted (you can check this by running 'mysqlbinlog' on the \ 4407 binary log), the slave's relay log is corrupted (you can check this by running \ 4408 'mysqlbinlog' on the relay log), a network problem, or a bug in the master's \ 4409 or slave's MySQL code. If you want to check the master's binary log or slave's \ 4410 relay log, you will be able to know their names by issuing 'SHOW SLAVE STATUS' \ 4411 on this slave.\ 4412 "); 4413 DBUG_RETURN(1); 4414 } 4415 4416 4417 static bool check_io_slave_killed(Master_info *mi, const char *info) 4418 { 4419 if (io_slave_killed(mi)) 4420 { 4421 if (info && global_system_variables.log_warnings) 4422 sql_print_information("%s", info); 4423 return TRUE; 4424 } 4425 return FALSE; 4426 } 4427 4428 /** 4429 @brief Try to reconnect slave IO thread. 4430 4431 @details Terminates current connection to master, sleeps for 4432 @c mi->connect_retry msecs and initiates new connection with 4433 @c safe_reconnect(). Variable pointed by @c retry_count is increased - 4434 if it exceeds @c master_retry_count then connection is not re-established 4435 and function signals error. 4436 Unless @c suppres_warnings is TRUE, a warning is put in the server error log 4437 when reconnecting. The warning message and messages used to report errors 4438 are taken from @c messages array. In case @c master_retry_count is exceeded, 4439 no messages are added to the log. 4440 4441 @param[in] thd Thread context. 4442 @param[in] mysql MySQL connection. 4443 @param[in] mi Master connection information. 4444 @param[in,out] retry_count Number of attempts to reconnect. 4445 @param[in] suppress_warnings TRUE when a normal net read timeout 4446 has caused to reconnecting. 4447 @param[in] messages Messages to print/log, see 4448 reconnect_messages[] array. 4449 4450 @retval 0 OK. 4451 @retval 1 There was an error. 4452 */ 4453 4454 static int try_to_reconnect(THD *thd, MYSQL *mysql, Master_info *mi, 4455 uint *retry_count, bool suppress_warnings, 4456 const char *messages[SLAVE_RECON_MSG_MAX]) 4457 { 4458 mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT; 4459 thd->proc_info= messages[SLAVE_RECON_MSG_WAIT]; 4460 #ifdef SIGNAL_WITH_VIO_CLOSE 4461 thd->clear_active_vio(); 4462 #endif 4463 end_server(mysql); 4464 if ((*retry_count)++) 4465 { 4466 if (*retry_count > master_retry_count) 4467 return 1; // Don't retry forever 4468 slave_sleep(thd, mi->connect_retry, io_slave_killed, mi); 4469 } 4470 if (check_io_slave_killed(mi, messages[SLAVE_RECON_MSG_KILLED_WAITING])) 4471 return 1; 4472 thd->proc_info = messages[SLAVE_RECON_MSG_AFTER]; 4473 if (!suppress_warnings) 4474 { 4475 char buf[256]; 4476 StringBuffer<100> tmp; 4477 if (mi->using_gtid != Master_info::USE_GTID_NO) 4478 { 4479 tmp.append(STRING_WITH_LEN("; GTID position '")); 4480 mi->gtid_current_pos.append_to_string(&tmp); 4481 if (mi->events_queued_since_last_gtid == 0) 4482 tmp.append(STRING_WITH_LEN("'")); 4483 else 4484 { 4485 tmp.append(STRING_WITH_LEN("', GTID event skip ")); 4486 tmp.append_ulonglong((ulonglong)mi->events_queued_since_last_gtid); 4487 } 4488 } 4489 my_snprintf(buf, sizeof(buf), messages[SLAVE_RECON_MSG_FAILED], 4490 IO_RPL_LOG_NAME, mi->master_log_pos, 4491 tmp.c_ptr_safe()); 4492 /* 4493 Raise a warining during registering on master/requesting dump. 4494 Log a message reading event. 4495 */ 4496 if (messages[SLAVE_RECON_MSG_COMMAND][0]) 4497 { 4498 mi->report(WARNING_LEVEL, ER_SLAVE_MASTER_COM_FAILURE, NULL, 4499 ER_THD(thd, ER_SLAVE_MASTER_COM_FAILURE), 4500 messages[SLAVE_RECON_MSG_COMMAND], buf); 4501 } 4502 else 4503 { 4504 sql_print_information("%s", buf); 4505 } 4506 } 4507 if (safe_reconnect(thd, mysql, mi, 1) || io_slave_killed(mi)) 4508 { 4509 if (global_system_variables.log_warnings) 4510 sql_print_information("%s", messages[SLAVE_RECON_MSG_KILLED_AFTER]); 4511 return 1; 4512 } 4513 return 0; 4514 } 4515 4516 4517 /** 4518 Slave IO thread entry point. 4519 4520 @param arg Pointer to Master_info struct that holds information for 4521 the IO thread. 4522 4523 @return Always 0. 4524 */ 4525 pthread_handler_t handle_slave_io(void *arg) 4526 { 4527 THD *thd; // needs to be first for thread_stack 4528 MYSQL *mysql; 4529 Master_info *mi = (Master_info*)arg; 4530 Relay_log_info *rli= &mi->rli; 4531 uint retry_count; 4532 bool suppress_warnings; 4533 int ret; 4534 rpl_io_thread_info io_info; 4535 #ifndef DBUG_OFF 4536 mi->dbug_do_disconnect= false; 4537 #endif 4538 // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff 4539 my_thread_init(); 4540 DBUG_ENTER("handle_slave_io"); 4541 4542 DBUG_ASSERT(mi->inited); 4543 mysql= NULL ; 4544 retry_count= 0; 4545 4546 thd= new THD(next_thread_id()); // note that contructor of THD uses DBUG_ ! 4547 4548 mysql_mutex_lock(&mi->run_lock); 4549 /* Inform waiting threads that slave has started */ 4550 mi->slave_run_id++; 4551 4552 #ifndef DBUG_OFF 4553 mi->events_till_disconnect = disconnect_slave_event_count; 4554 #endif 4555 4556 THD_CHECK_SENTRY(thd); 4557 mi->io_thd = thd; 4558 4559 pthread_detach_this_thread(); 4560 thd->thread_stack= (char*) &thd; // remember where our stack is 4561 mi->clear_error(); 4562 if (init_slave_thread(thd, mi, SLAVE_THD_IO)) 4563 { 4564 mysql_cond_broadcast(&mi->start_cond); 4565 sql_print_error("Failed during slave I/O thread initialization"); 4566 goto err_during_init; 4567 } 4568 thd->system_thread_info.rpl_io_info= &io_info; 4569 server_threads.insert(thd); 4570 mi->slave_running = MYSQL_SLAVE_RUN_NOT_CONNECT; 4571 mi->abort_slave = 0; 4572 mysql_mutex_unlock(&mi->run_lock); 4573 mysql_cond_broadcast(&mi->start_cond); 4574 mi->rows_event_tracker.reset(); 4575 4576 DBUG_PRINT("master_info",("log_file_name: '%s' position: %llu", 4577 mi->master_log_name, mi->master_log_pos)); 4578 4579 /* This must be called before run any binlog_relay_io hooks */ 4580 my_pthread_setspecific_ptr(RPL_MASTER_INFO, mi); 4581 4582 /* Load the set of seen GTIDs, if we did not already. */ 4583 if (rpl_load_gtid_slave_state(thd)) 4584 { 4585 mi->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL, 4586 "Unable to load replication GTID slave state from mysql.%s: %s", 4587 rpl_gtid_slave_state_table_name.str, 4588 thd->get_stmt_da()->message()); 4589 /* 4590 If we are using old-style replication, we can continue, even though we 4591 then will not be able to record the GTIDs we receive. But if using GTID, 4592 we must give up. 4593 */ 4594 if (mi->using_gtid != Master_info::USE_GTID_NO || opt_gtid_strict_mode) 4595 goto err; 4596 } 4597 4598 thd->variables.wsrep_on= 0; 4599 if (DBUG_EVALUATE_IF("failed_slave_start", 1, 0) 4600 || repl_semisync_slave.slave_start(mi)) 4601 { 4602 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL, 4603 ER_THD(thd, ER_SLAVE_FATAL_ERROR), 4604 "Failed to run 'thread_start' hook"); 4605 goto err; 4606 } 4607 4608 if (!(mi->mysql = mysql = mysql_init(NULL))) 4609 { 4610 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL, 4611 ER_THD(thd, ER_SLAVE_FATAL_ERROR), "error in mysql_init()"); 4612 goto err; 4613 } 4614 4615 THD_STAGE_INFO(thd, stage_connecting_to_master); 4616 // we can get killed during safe_connect 4617 if (!safe_connect(thd, mysql, mi)) 4618 { 4619 if (mi->using_gtid == Master_info::USE_GTID_NO) 4620 sql_print_information("Slave I/O thread: connected to master '%s@%s:%d'," 4621 "replication started in log '%s' at position %llu", 4622 mi->user, mi->host, mi->port, 4623 IO_RPL_LOG_NAME, mi->master_log_pos); 4624 else 4625 { 4626 StringBuffer<100> tmp; 4627 mi->gtid_current_pos.to_string(&tmp); 4628 sql_print_information("Slave I/O thread: connected to master '%s@%s:%d'," 4629 "replication starts at GTID position '%s'", 4630 mi->user, mi->host, mi->port, tmp.c_ptr_safe()); 4631 } 4632 } 4633 else 4634 { 4635 sql_print_information("Slave I/O thread killed while connecting to master"); 4636 goto err; 4637 } 4638 4639 connected: 4640 4641 if (mi->using_gtid != Master_info::USE_GTID_NO) 4642 { 4643 /* 4644 When the IO thread (re)connects to the master using GTID, it will 4645 connect at the start of an event group. But the IO thread may have 4646 previously logged part of the following event group to the relay 4647 log. 4648 4649 When the IO and SQL thread are started together, we erase any previous 4650 relay logs, but this is not possible/desirable while the SQL thread is 4651 running. To avoid duplicating partial event groups in the relay logs in 4652 this case, we remember the count of events in any partially logged event 4653 group before the reconnect, and then here at connect we set up a counter 4654 to skip the already-logged part of the group. 4655 */ 4656 mi->gtid_reconnect_event_skip_count= mi->events_queued_since_last_gtid; 4657 mi->gtid_event_seen= false; 4658 /* 4659 Reset stale state of the rows-event group tracker at reconnect. 4660 */ 4661 mi->rows_event_tracker.reset(); 4662 } 4663 4664 #ifdef ENABLED_DEBUG_SYNC 4665 DBUG_EXECUTE_IF("dbug.before_get_running_status_yes", 4666 { 4667 const char act[]= 4668 "now " 4669 "wait_for signal.io_thread_let_running"; 4670 DBUG_ASSERT(debug_sync_service); 4671 DBUG_ASSERT(!debug_sync_set_action(thd, 4672 STRING_WITH_LEN(act))); 4673 };); 4674 #endif 4675 4676 mysql_mutex_lock(&mi->run_lock); 4677 mi->slave_running= MYSQL_SLAVE_RUN_CONNECT; 4678 mysql_mutex_unlock(&mi->run_lock); 4679 4680 thd->slave_net = &mysql->net; 4681 THD_STAGE_INFO(thd, stage_checking_master_version); 4682 ret= get_master_version_and_clock(mysql, mi); 4683 if (ret == 1) 4684 /* Fatal error */ 4685 goto err; 4686 4687 if (ret == 2) 4688 { 4689 if (check_io_slave_killed(mi, "Slave I/O thread killed " 4690 "while calling get_master_version_and_clock(...)")) 4691 goto err; 4692 suppress_warnings= FALSE; 4693 /* 4694 Try to reconnect because the error was caused by a transient network 4695 problem 4696 */ 4697 if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings, 4698 reconnect_messages[SLAVE_RECON_ACT_REG])) 4699 goto err; 4700 goto connected; 4701 } 4702 4703 if (mi->rli.relay_log.description_event_for_queue->binlog_version > 1) 4704 { 4705 /* 4706 Register ourselves with the master. 4707 */ 4708 THD_STAGE_INFO(thd, stage_registering_slave_on_master); 4709 if (register_slave_on_master(mysql, mi, &suppress_warnings)) 4710 { 4711 if (!check_io_slave_killed(mi, "Slave I/O thread killed " 4712 "while registering slave on master")) 4713 { 4714 sql_print_error("Slave I/O thread couldn't register on master"); 4715 if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings, 4716 reconnect_messages[SLAVE_RECON_ACT_REG])) 4717 goto err; 4718 } 4719 else 4720 goto err; 4721 goto connected; 4722 } 4723 DBUG_EXECUTE_IF("fail_com_register_slave", goto err;); 4724 } 4725 4726 DBUG_PRINT("info",("Starting reading binary log from master")); 4727 thd->set_command(COM_SLAVE_IO); 4728 while (!io_slave_killed(mi)) 4729 { 4730 THD_STAGE_INFO(thd, stage_requesting_binlog_dump); 4731 if (request_dump(thd, mysql, mi, &suppress_warnings)) 4732 { 4733 sql_print_error("Failed on request_dump()"); 4734 if (check_io_slave_killed(mi, NullS) || 4735 try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings, 4736 reconnect_messages[SLAVE_RECON_ACT_DUMP])) 4737 goto err; 4738 goto connected; 4739 } 4740 4741 const char *event_buf; 4742 4743 mi->slave_running= MYSQL_SLAVE_RUN_READING; 4744 DBUG_ASSERT(mi->last_error().number == 0); 4745 ulonglong lastchecktime = my_hrtime().val; 4746 ulonglong tokenamount = opt_read_binlog_speed_limit*1024; 4747 while (!io_slave_killed(mi)) 4748 { 4749 ulong event_len, network_read_len = 0; 4750 /* 4751 We say "waiting" because read_event() will wait if there's nothing to 4752 read. But if there's something to read, it will not wait. The 4753 important thing is to not confuse users by saying "reading" whereas 4754 we're in fact receiving nothing. 4755 */ 4756 THD_STAGE_INFO(thd, stage_waiting_for_master_to_send_event); 4757 event_len= read_event(mysql, mi, &suppress_warnings, &network_read_len); 4758 if (check_io_slave_killed(mi, NullS)) 4759 goto err; 4760 4761 if (unlikely(event_len == packet_error)) 4762 { 4763 uint mysql_error_number= mysql_errno(mysql); 4764 switch (mysql_error_number) { 4765 case CR_NET_PACKET_TOO_LARGE: 4766 sql_print_error("\ 4767 Log entry on master is longer than slave_max_allowed_packet (%lu) on \ 4768 slave. If the entry is correct, restart the server with a higher value of \ 4769 slave_max_allowed_packet", 4770 slave_max_allowed_packet); 4771 mi->report(ERROR_LEVEL, ER_NET_PACKET_TOO_LARGE, NULL, 4772 "%s", "Got a packet bigger than 'slave_max_allowed_packet' bytes"); 4773 goto err; 4774 case ER_MASTER_FATAL_ERROR_READING_BINLOG: 4775 mi->report(ERROR_LEVEL, ER_MASTER_FATAL_ERROR_READING_BINLOG, NULL, 4776 ER_THD(thd, ER_MASTER_FATAL_ERROR_READING_BINLOG), 4777 mysql_error_number, mysql_error(mysql)); 4778 goto err; 4779 case ER_OUT_OF_RESOURCES: 4780 sql_print_error("\ 4781 Stopping slave I/O thread due to out-of-memory error from master"); 4782 mi->report(ERROR_LEVEL, ER_OUT_OF_RESOURCES, NULL, 4783 "%s", ER_THD(thd, ER_OUT_OF_RESOURCES)); 4784 goto err; 4785 } 4786 if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings, 4787 reconnect_messages[SLAVE_RECON_ACT_EVENT])) 4788 goto err; 4789 goto connected; 4790 } // if (event_len == packet_error) 4791 4792 retry_count=0; // ok event, reset retry counter 4793 THD_STAGE_INFO(thd, stage_queueing_master_event_to_the_relay_log); 4794 event_buf= (const char*)mysql->net.read_pos + 1; 4795 mi->semi_ack= 0; 4796 if (repl_semisync_slave. 4797 slave_read_sync_header((const char*)mysql->net.read_pos + 1, event_len, 4798 &(mi->semi_ack), &event_buf, &event_len)) 4799 { 4800 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL, 4801 ER_THD(thd, ER_SLAVE_FATAL_ERROR), 4802 "Failed to run 'after_read_event' hook"); 4803 goto err; 4804 } 4805 4806 /* Control the binlog read speed of master 4807 when read_binlog_speed_limit is non-zero 4808 */ 4809 ulonglong speed_limit_in_bytes = opt_read_binlog_speed_limit * 1024; 4810 if (speed_limit_in_bytes) 4811 { 4812 /* Prevent the tokenamount become a large value, 4813 for example, the IO thread doesn't work for a long time 4814 */ 4815 if (tokenamount > speed_limit_in_bytes * 2) 4816 { 4817 lastchecktime = my_hrtime().val; 4818 tokenamount = speed_limit_in_bytes * 2; 4819 } 4820 4821 do 4822 { 4823 ulonglong currenttime = my_hrtime().val; 4824 tokenamount += (currenttime - lastchecktime) * speed_limit_in_bytes / (1000*1000); 4825 lastchecktime = currenttime; 4826 if(tokenamount < network_read_len) 4827 { 4828 ulonglong duration =1000ULL*1000 * (network_read_len - tokenamount) / speed_limit_in_bytes; 4829 time_t second_time = (time_t)(duration / (1000 * 1000)); 4830 uint micro_time = duration % (1000 * 1000); 4831 4832 // at least sleep 1000 micro second 4833 my_sleep(MY_MAX(micro_time,1000)); 4834 4835 /* 4836 If it sleep more than one second, 4837 it should use slave_sleep() to avoid the STOP SLAVE hang. 4838 */ 4839 if (second_time) 4840 slave_sleep(thd, second_time, io_slave_killed, mi); 4841 4842 } 4843 }while(tokenamount < network_read_len); 4844 tokenamount -= network_read_len; 4845 } 4846 4847 if (queue_event(mi, event_buf, event_len)) 4848 { 4849 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL, 4850 ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE), 4851 "could not queue event from master"); 4852 goto err; 4853 } 4854 4855 if (rpl_semi_sync_slave_status && (mi->semi_ack & SEMI_SYNC_NEED_ACK)) 4856 { 4857 /* 4858 We deliberately ignore the error in slave_reply, such error should 4859 not cause the slave IO thread to stop, and the error messages are 4860 already reported. 4861 */ 4862 (void)repl_semisync_slave.slave_reply(mi); 4863 } 4864 4865 if (mi->using_gtid == Master_info::USE_GTID_NO && 4866 /* 4867 If rpl_semi_sync_slave_delay_master is enabled, we will flush 4868 master info only when ack is needed. This may lead to at least one 4869 group transaction delay but affords better performance improvement. 4870 */ 4871 (!repl_semisync_slave.get_slave_enabled() || 4872 (!(mi->semi_ack & SEMI_SYNC_SLAVE_DELAY_SYNC) || 4873 (mi->semi_ack & (SEMI_SYNC_NEED_ACK)))) && 4874 (DBUG_EVALUATE_IF("failed_flush_master_info", 1, 0) || 4875 flush_master_info(mi, TRUE, TRUE))) 4876 { 4877 sql_print_error("Failed to flush master info file"); 4878 goto err; 4879 } 4880 /* 4881 See if the relay logs take too much space. 4882 We don't lock mi->rli.log_space_lock here; this dirty read saves time 4883 and does not introduce any problem: 4884 - if mi->rli.ignore_log_space_limit is 1 but becomes 0 just after (so 4885 the clean value is 0), then we are reading only one more event as we 4886 should, and we'll block only at the next event. No big deal. 4887 - if mi->rli.ignore_log_space_limit is 0 but becomes 1 just 4888 after (so the clean value is 1), then we are going into 4889 wait_for_relay_log_space() for no reason, but this function 4890 will do a clean read, notice the clean value and exit 4891 immediately. 4892 */ 4893 #ifndef DBUG_OFF 4894 { 4895 DBUG_PRINT("info", ("log_space_limit=%llu log_space_total=%llu " 4896 "ignore_log_space_limit=%d", 4897 rli->log_space_limit, uint64(rli->log_space_total), 4898 (int) rli->ignore_log_space_limit)); 4899 } 4900 #endif 4901 4902 if (rli->log_space_limit && rli->log_space_limit < 4903 rli->log_space_total && 4904 !rli->ignore_log_space_limit) 4905 if (wait_for_relay_log_space(rli)) 4906 { 4907 sql_print_error("Slave I/O thread aborted while waiting for relay \ 4908 log space"); 4909 goto err; 4910 } 4911 } 4912 } 4913 4914 // error = 0; 4915 err: 4916 // print the current replication position 4917 if (mi->using_gtid == Master_info::USE_GTID_NO) 4918 { 4919 sql_print_information("Slave I/O thread exiting, read up to log '%s', " 4920 "position %llu", IO_RPL_LOG_NAME, mi->master_log_pos); 4921 sql_print_information("master was %s:%d", mi->host, mi->port); 4922 } 4923 else 4924 { 4925 StringBuffer<100> tmp; 4926 mi->gtid_current_pos.to_string(&tmp); 4927 sql_print_information("Slave I/O thread exiting, read up to log '%s', " 4928 "position %llu; GTID position %s", 4929 IO_RPL_LOG_NAME, mi->master_log_pos, 4930 tmp.c_ptr_safe()); 4931 sql_print_information("master was %s:%d", mi->host, mi->port); 4932 } 4933 repl_semisync_slave.slave_stop(mi); 4934 thd->reset_query(); 4935 thd->reset_db(&null_clex_str); 4936 if (mysql) 4937 { 4938 /* 4939 Here we need to clear the active VIO before closing the 4940 connection with the master. The reason is that THD::awake() 4941 might be called from terminate_slave_thread() because somebody 4942 issued a STOP SLAVE. If that happends, the close_active_vio() 4943 can be called in the middle of closing the VIO associated with 4944 the 'mysql' object, causing a crash. 4945 */ 4946 #ifdef SIGNAL_WITH_VIO_CLOSE 4947 thd->clear_active_vio(); 4948 #endif 4949 mysql_close(mysql); 4950 mi->mysql=0; 4951 } 4952 write_ignored_events_info_to_relay_log(thd, mi); 4953 if (mi->using_gtid != Master_info::USE_GTID_NO) 4954 flush_master_info(mi, TRUE, TRUE); 4955 THD_STAGE_INFO(thd, stage_waiting_for_slave_mutex_on_exit); 4956 thd->add_status_to_global(); 4957 server_threads.erase(thd); 4958 mysql_mutex_lock(&mi->run_lock); 4959 4960 err_during_init: 4961 /* Forget the relay log's format */ 4962 delete mi->rli.relay_log.description_event_for_queue; 4963 mi->rli.relay_log.description_event_for_queue= 0; 4964 // TODO: make rpl_status part of Master_info 4965 change_rpl_status(RPL_ACTIVE_SLAVE,RPL_IDLE_SLAVE); 4966 4967 thd->assert_not_linked(); 4968 delete thd; 4969 4970 mi->abort_slave= 0; 4971 mi->slave_running= MYSQL_SLAVE_NOT_RUN; 4972 mi->io_thd= 0; 4973 /* 4974 Note: the order of the two following calls (first broadcast, then unlock) 4975 is important. Otherwise a killer_thread can execute between the calls and 4976 delete the mi structure leading to a crash! (see BUG#25306 for details) 4977 */ 4978 mysql_cond_broadcast(&mi->stop_cond); // tell the world we are done 4979 DBUG_EXECUTE_IF("simulate_slave_delay_at_terminate_bug38694", sleep(5);); 4980 mysql_mutex_unlock(&mi->run_lock); 4981 4982 DBUG_LEAVE; // Must match DBUG_ENTER() 4983 my_thread_end(); 4984 ERR_remove_state(0); 4985 pthread_exit(0); 4986 return 0; // Avoid compiler warnings 4987 } 4988 4989 /* 4990 Check the temporary directory used by commands like 4991 LOAD DATA INFILE. 4992 4993 As the directory never changes during a mysqld run, we only 4994 test this once and cache the result. This also resolve a race condition 4995 when this can be run by multiple threads at the same time. 4996 */ 4997 4998 static bool check_temp_dir_run= 0; 4999 static int check_temp_dir_result= 0; 5000 5001 static 5002 int check_temp_dir(char* tmp_file) 5003 { 5004 File fd; 5005 int result= 1; // Assume failure 5006 MY_DIR *dirp; 5007 char tmp_dir[FN_REFLEN]; 5008 size_t tmp_dir_size; 5009 DBUG_ENTER("check_temp_dir"); 5010 5011 /* This look is safe to use as this function is only called once */ 5012 mysql_mutex_lock(&LOCK_start_thread); 5013 if (check_temp_dir_run) 5014 { 5015 if ((result= check_temp_dir_result)) 5016 my_message(result, tmp_file, MYF(0)); 5017 goto end; 5018 } 5019 check_temp_dir_run= 1; 5020 5021 /* 5022 Get the directory from the temporary file. 5023 */ 5024 dirname_part(tmp_dir, tmp_file, &tmp_dir_size); 5025 5026 /* 5027 Check if the directory exists. 5028 */ 5029 if (!(dirp=my_dir(tmp_dir,MYF(MY_WME)))) 5030 goto end; 5031 my_dirend(dirp); 5032 5033 /* 5034 Check permissions to create a file. We use O_TRUNC to ensure that 5035 things works even if we happen to have and old file laying around. 5036 */ 5037 if ((fd= mysql_file_create(key_file_misc, 5038 tmp_file, CREATE_MODE, 5039 O_WRONLY | O_BINARY | O_TRUNC | O_NOFOLLOW, 5040 MYF(MY_WME))) < 0) 5041 goto end; 5042 5043 result= 0; // Directory name ok 5044 /* 5045 Clean up. 5046 */ 5047 mysql_file_close(fd, MYF(0)); 5048 mysql_file_delete(key_file_misc, tmp_file, MYF(0)); 5049 5050 end: 5051 mysql_mutex_unlock(&LOCK_start_thread); 5052 DBUG_RETURN(result); 5053 } 5054 5055 5056 void 5057 slave_output_error_info(rpl_group_info *rgi, THD *thd) 5058 { 5059 /* 5060 retrieve as much info as possible from the thd and, error 5061 codes and warnings and print this to the error log as to 5062 allow the user to locate the error 5063 */ 5064 Relay_log_info *rli= rgi->rli; 5065 uint32 const last_errno= rli->last_error().number; 5066 5067 if (unlikely(thd->is_error())) 5068 { 5069 char const *const errmsg= thd->get_stmt_da()->message(); 5070 5071 DBUG_PRINT("info", 5072 ("thd->get_stmt_da()->sql_errno()=%d; rli->last_error.number=%d", 5073 thd->get_stmt_da()->sql_errno(), last_errno)); 5074 if (last_errno == 0) 5075 { 5076 /* 5077 This function is reporting an error which was not reported 5078 while executing exec_relay_log_event(). 5079 */ 5080 rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), 5081 rgi->gtid_info(), "%s", errmsg); 5082 } 5083 else if (last_errno != thd->get_stmt_da()->sql_errno()) 5084 { 5085 /* 5086 * An error was reported while executing exec_relay_log_event() 5087 * however the error code differs from what is in the thread. 5088 * This function prints out more information to help finding 5089 * what caused the problem. 5090 */ 5091 sql_print_error("Slave (additional info): %s Error_code: %d", 5092 errmsg, thd->get_stmt_da()->sql_errno()); 5093 } 5094 } 5095 5096 /* Print any warnings issued */ 5097 Diagnostics_area::Sql_condition_iterator it= 5098 thd->get_stmt_da()->sql_conditions(); 5099 const Sql_condition *err; 5100 /* 5101 Added controlled slave thread cancel for replication 5102 of user-defined variables. 5103 */ 5104 bool udf_error = false; 5105 while ((err= it++)) 5106 { 5107 if (err->get_sql_errno() == ER_CANT_OPEN_LIBRARY) 5108 udf_error = true; 5109 sql_print_warning("Slave: %s Error_code: %d", err->get_message_text(), err->get_sql_errno()); 5110 } 5111 if (unlikely(udf_error)) 5112 { 5113 StringBuffer<100> tmp; 5114 if (rli->mi->using_gtid != Master_info::USE_GTID_NO) 5115 { 5116 tmp.append(STRING_WITH_LEN("; GTID position '")); 5117 rpl_append_gtid_state(&tmp, false); 5118 tmp.append(STRING_WITH_LEN("'")); 5119 } 5120 sql_print_error("Error loading user-defined library, slave SQL " 5121 "thread aborted. Install the missing library, and restart the " 5122 "slave SQL thread with \"SLAVE START\". We stopped at log '%s' " 5123 "position %llu%s", RPL_LOG_NAME, rli->group_master_log_pos, 5124 tmp.c_ptr_safe()); 5125 } 5126 else 5127 { 5128 StringBuffer<100> tmp; 5129 if (rli->mi->using_gtid != Master_info::USE_GTID_NO) 5130 { 5131 tmp.append(STRING_WITH_LEN("; GTID position '")); 5132 rpl_append_gtid_state(&tmp, false); 5133 tmp.append(STRING_WITH_LEN("'")); 5134 } 5135 sql_print_error("Error running query, slave SQL thread aborted. " 5136 "Fix the problem, and restart the slave SQL thread " 5137 "with \"SLAVE START\". We stopped at log '%s' position " 5138 "%llu%s", RPL_LOG_NAME, rli->group_master_log_pos, 5139 tmp.c_ptr_safe()); 5140 } 5141 } 5142 5143 5144 /** 5145 Slave SQL thread entry point. 5146 5147 @param arg Pointer to Relay_log_info object that holds information 5148 for the SQL thread. 5149 5150 @return Always 0. 5151 */ 5152 pthread_handler_t handle_slave_sql(void *arg) 5153 { 5154 THD *thd; /* needs to be first for thread_stack */ 5155 char saved_log_name[FN_REFLEN]; 5156 char saved_master_log_name[FN_REFLEN]; 5157 my_off_t UNINIT_VAR(saved_log_pos); 5158 my_off_t UNINIT_VAR(saved_master_log_pos); 5159 String saved_skip_gtid_pos; 5160 my_off_t saved_skip= 0; 5161 Master_info *mi= ((Master_info*)arg); 5162 Relay_log_info* rli = &mi->rli; 5163 my_bool wsrep_node_dropped __attribute__((unused)) = FALSE; 5164 const char *errmsg; 5165 rpl_group_info *serial_rgi; 5166 rpl_sql_thread_info sql_info(mi->rpl_filter); 5167 5168 // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff 5169 my_thread_init(); 5170 DBUG_ENTER("handle_slave_sql"); 5171 5172 #ifdef WITH_WSREP 5173 wsrep_restart_point: 5174 #endif 5175 5176 serial_rgi= new rpl_group_info(rli); 5177 thd = new THD(next_thread_id()); // note that contructor of THD uses DBUG_ ! 5178 thd->thread_stack = (char*)&thd; // remember where our stack is 5179 thd->system_thread_info.rpl_sql_info= &sql_info; 5180 5181 DBUG_ASSERT(rli->inited); 5182 DBUG_ASSERT(rli->mi == mi); 5183 mysql_mutex_lock(&rli->run_lock); 5184 DBUG_ASSERT(!rli->slave_running); 5185 errmsg= 0; 5186 #ifndef DBUG_OFF 5187 rli->events_till_abort = abort_slave_event_count; 5188 #endif 5189 5190 /* 5191 THD for the sql driver thd. In parallel replication this is the thread 5192 that reads things from the relay log and calls rpl_parallel::do_event() 5193 to execute queries. 5194 5195 In single thread replication this is the THD for the thread that is 5196 executing SQL queries too. 5197 */ 5198 serial_rgi->thd= rli->sql_driver_thd= thd; 5199 5200 /* Inform waiting threads that slave has started */ 5201 rli->slave_run_id++; 5202 rli->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT; 5203 5204 pthread_detach_this_thread(); 5205 5206 if (opt_slave_parallel_threads > 0 && 5207 rpl_parallel_activate_pool(&global_rpl_thread_pool)) 5208 { 5209 mysql_cond_broadcast(&rli->start_cond); 5210 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL, 5211 "Failed during parallel slave pool activation"); 5212 goto err_during_init; 5213 } 5214 5215 if (init_slave_thread(thd, mi, SLAVE_THD_SQL)) 5216 { 5217 /* 5218 TODO: this is currently broken - slave start and change master 5219 will be stuck if we fail here 5220 */ 5221 mysql_cond_broadcast(&rli->start_cond); 5222 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL, 5223 "Failed during slave thread initialization"); 5224 goto err_during_init; 5225 } 5226 thd->init_for_queries(); 5227 thd->rgi_slave= serial_rgi; 5228 if ((serial_rgi->deferred_events_collecting= mi->rpl_filter->is_on())) 5229 { 5230 serial_rgi->deferred_events= new Deferred_log_events(rli); 5231 } 5232 5233 /* 5234 binlog_annotate_row_events must be TRUE only after an Annotate_rows event 5235 has been received and only till the last corresponding rbr event has been 5236 applied. In all other cases it must be FALSE. 5237 */ 5238 thd->variables.binlog_annotate_row_events= 0; 5239 5240 /* Ensure that slave can exeute any alter table it gets from master */ 5241 thd->variables.alter_algorithm= (ulong) Alter_info::ALTER_TABLE_ALGORITHM_DEFAULT; 5242 5243 server_threads.insert(thd); 5244 /* 5245 We are going to set slave_running to 1. Assuming slave I/O thread is 5246 alive and connected, this is going to make Seconds_Behind_Master be 0 5247 i.e. "caught up". Even if we're just at start of thread. Well it's ok, at 5248 the moment we start we can think we are caught up, and the next second we 5249 start receiving data so we realize we are not caught up and 5250 Seconds_Behind_Master grows. No big deal. 5251 */ 5252 rli->abort_slave = 0; 5253 rli->stop_for_until= false; 5254 mysql_mutex_unlock(&rli->run_lock); 5255 mysql_cond_broadcast(&rli->start_cond); 5256 5257 /* 5258 Reset errors for a clean start (otherwise, if the master is idle, the SQL 5259 thread may execute no Query_log_event, so the error will remain even 5260 though there's no problem anymore). Do not reset the master timestamp 5261 (imagine the slave has caught everything, the STOP SLAVE and START SLAVE: 5262 as we are not sure that we are going to receive a query, we want to 5263 remember the last master timestamp (to say how many seconds behind we are 5264 now. 5265 But the master timestamp is reset by RESET SLAVE & CHANGE MASTER. 5266 */ 5267 rli->clear_error(); 5268 rli->parallel.reset(); 5269 5270 //tell the I/O thread to take relay_log_space_limit into account from now on 5271 rli->ignore_log_space_limit= 0; 5272 5273 serial_rgi->gtid_sub_id= 0; 5274 serial_rgi->gtid_pending= false; 5275 if (mi->using_gtid != Master_info::USE_GTID_NO && mi->using_parallel() && 5276 rli->restart_gtid_pos.count() > 0) 5277 { 5278 /* 5279 With parallel replication in GTID mode, if we have a multi-domain GTID 5280 position, we need to start some way back in the relay log and skip any 5281 GTID that was already applied before. Since event groups can be split 5282 across multiple relay logs, this earlier starting point may be in the 5283 middle of an already applied event group, so we also need to skip any 5284 remaining part of such group. 5285 */ 5286 rli->gtid_skip_flag = GTID_SKIP_TRANSACTION; 5287 } 5288 else 5289 rli->gtid_skip_flag = GTID_SKIP_NOT; 5290 if (init_relay_log_pos(rli, 5291 rli->group_relay_log_name, 5292 rli->group_relay_log_pos, 5293 1 /*need data lock*/, &errmsg, 5294 1 /*look for a description_event*/)) 5295 { 5296 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL, 5297 "Error initializing relay log position: %s", errmsg); 5298 goto err_before_start; 5299 } 5300 rli->reset_inuse_relaylog(); 5301 if (rli->alloc_inuse_relaylog(rli->group_relay_log_name)) 5302 goto err_before_start; 5303 5304 strcpy(rli->future_event_master_log_name, rli->group_master_log_name); 5305 THD_CHECK_SENTRY(thd); 5306 #ifndef DBUG_OFF 5307 { 5308 DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%llu " 5309 "rli->event_relay_log_pos=%llu", 5310 my_b_tell(rli->cur_log), rli->event_relay_log_pos)); 5311 DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE); 5312 /* 5313 Wonder if this is correct. I (Guilhem) wonder if my_b_tell() returns the 5314 correct position when it's called just after my_b_seek() (the questionable 5315 stuff is those "seek is done on next read" comments in the my_b_seek() 5316 source code). 5317 The crude reality is that this assertion randomly fails whereas 5318 replication seems to work fine. And there is no easy explanation why it 5319 fails (as we my_b_seek(rli->event_relay_log_pos) at the very end of 5320 init_relay_log_pos() called above). Maybe the assertion would be 5321 meaningful if we held rli->data_lock between the my_b_seek() and the 5322 DBUG_ASSERT(). 5323 */ 5324 #ifdef SHOULD_BE_CHECKED 5325 DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos); 5326 #endif 5327 } 5328 #endif 5329 5330 DBUG_PRINT("master_info",("log_file_name: %s position: %llu", 5331 rli->group_master_log_name, 5332 rli->group_master_log_pos)); 5333 if (global_system_variables.log_warnings) 5334 { 5335 StringBuffer<100> tmp; 5336 if (mi->using_gtid != Master_info::USE_GTID_NO) 5337 { 5338 tmp.append(STRING_WITH_LEN("; GTID position '")); 5339 rpl_append_gtid_state(&tmp, 5340 mi->using_gtid==Master_info::USE_GTID_CURRENT_POS); 5341 tmp.append(STRING_WITH_LEN("'")); 5342 } 5343 sql_print_information("Slave SQL thread initialized, starting replication " 5344 "in log '%s' at position %llu, relay log '%s' " 5345 "position: %llu%s", RPL_LOG_NAME, 5346 rli->group_master_log_pos, rli->group_relay_log_name, 5347 rli->group_relay_log_pos, tmp.c_ptr_safe()); 5348 } 5349 5350 if (check_temp_dir(rli->slave_patternload_file)) 5351 { 5352 check_temp_dir_result= thd->get_stmt_da()->sql_errno(); 5353 rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL, 5354 "Unable to use slave's temporary directory %s - %s", 5355 slave_load_tmpdir, thd->get_stmt_da()->message()); 5356 goto err; 5357 } 5358 else 5359 check_temp_dir_result= 0; 5360 5361 /* Load the set of seen GTIDs, if we did not already. */ 5362 if (rpl_load_gtid_slave_state(thd)) 5363 { 5364 rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL, 5365 "Unable to load replication GTID slave state from mysql.%s: %s", 5366 rpl_gtid_slave_state_table_name.str, 5367 thd->get_stmt_da()->message()); 5368 /* 5369 If we are using old-style replication, we can continue, even though we 5370 then will not be able to record the GTIDs we receive. But if using GTID, 5371 we must give up. 5372 */ 5373 if (mi->using_gtid != Master_info::USE_GTID_NO || opt_gtid_strict_mode) 5374 goto err; 5375 } 5376 /* Re-load the set of mysql.gtid_slave_posXXX tables available. */ 5377 if (find_gtid_slave_pos_tables(thd)) 5378 { 5379 rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL, 5380 "Error processing replication GTID position tables: %s", 5381 thd->get_stmt_da()->message()); 5382 goto err; 5383 } 5384 5385 /* execute init_slave variable */ 5386 if (opt_init_slave.length) 5387 { 5388 execute_init_command(thd, &opt_init_slave, &LOCK_sys_init_slave); 5389 if (unlikely(thd->is_slave_error)) 5390 { 5391 rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL, 5392 "Slave SQL thread aborted. Can't execute init_slave query"); 5393 goto err; 5394 } 5395 } 5396 5397 /* 5398 First check until condition - probably there is nothing to execute. We 5399 do not want to wait for next event in this case. 5400 */ 5401 mysql_mutex_lock(&rli->data_lock); 5402 if (rli->slave_skip_counter) 5403 { 5404 strmake_buf(saved_log_name, rli->group_relay_log_name); 5405 strmake_buf(saved_master_log_name, rli->group_master_log_name); 5406 saved_log_pos= rli->group_relay_log_pos; 5407 saved_master_log_pos= rli->group_master_log_pos; 5408 if (mi->using_gtid != Master_info::USE_GTID_NO) 5409 { 5410 saved_skip_gtid_pos.append(STRING_WITH_LEN(", GTID '")); 5411 rpl_append_gtid_state(&saved_skip_gtid_pos, false); 5412 saved_skip_gtid_pos.append(STRING_WITH_LEN("'; ")); 5413 } 5414 saved_skip= rli->slave_skip_counter; 5415 } 5416 if ((rli->until_condition == Relay_log_info::UNTIL_MASTER_POS || 5417 rli->until_condition == Relay_log_info::UNTIL_RELAY_POS) && 5418 rli->is_until_satisfied(NULL)) 5419 { 5420 sql_print_information("Slave SQL thread stopped because it reached its" 5421 " UNTIL position %llu in %s %s file", 5422 rli->until_pos(), rli->until_name(), 5423 rli->until_condition == 5424 Relay_log_info::UNTIL_MASTER_POS ? 5425 "binlog" : "relaylog"); 5426 mysql_mutex_unlock(&rli->data_lock); 5427 goto err; 5428 } 5429 mysql_mutex_unlock(&rli->data_lock); 5430 #ifdef WITH_WSREP 5431 wsrep_open(thd); 5432 if (wsrep_before_command(thd)) 5433 { 5434 WSREP_WARN("Slave SQL wsrep_before_command() failed"); 5435 goto err; 5436 } 5437 #endif /* WITH_WSREP */ 5438 /* Read queries from the IO/THREAD until this thread is killed */ 5439 5440 thd->set_command(COM_SLAVE_SQL); 5441 while (!sql_slave_killed(serial_rgi)) 5442 { 5443 THD_STAGE_INFO(thd, stage_reading_event_from_the_relay_log); 5444 THD_CHECK_SENTRY(thd); 5445 5446 if (saved_skip && rli->slave_skip_counter == 0) 5447 { 5448 StringBuffer<100> tmp; 5449 if (mi->using_gtid != Master_info::USE_GTID_NO) 5450 { 5451 tmp.append(STRING_WITH_LEN(", GTID '")); 5452 rpl_append_gtid_state(&tmp, false); 5453 tmp.append(STRING_WITH_LEN("'; ")); 5454 } 5455 5456 sql_print_information("'SQL_SLAVE_SKIP_COUNTER=%ld' executed at " 5457 "relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', " 5458 "master_log_pos='%ld'%s and new position at " 5459 "relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', " 5460 "master_log_pos='%ld'%s ", 5461 (ulong) saved_skip, saved_log_name, (ulong) saved_log_pos, 5462 saved_master_log_name, (ulong) saved_master_log_pos, 5463 saved_skip_gtid_pos.c_ptr_safe(), 5464 rli->group_relay_log_name, (ulong) rli->group_relay_log_pos, 5465 rli->group_master_log_name, (ulong) rli->group_master_log_pos, 5466 tmp.c_ptr_safe()); 5467 saved_skip= 0; 5468 saved_skip_gtid_pos.free(); 5469 } 5470 5471 if (exec_relay_log_event(thd, rli, serial_rgi)) 5472 { 5473 #ifdef WITH_WSREP 5474 if (WSREP(thd)) 5475 { 5476 mysql_mutex_lock(&thd->LOCK_thd_data); 5477 5478 if (thd->wsrep_cs().current_error()) 5479 { 5480 wsrep_node_dropped = TRUE; 5481 rli->abort_slave = TRUE; 5482 } 5483 mysql_mutex_unlock(&thd->LOCK_thd_data); 5484 } 5485 #endif /* WITH_WSREP */ 5486 5487 DBUG_PRINT("info", ("exec_relay_log_event() failed")); 5488 // do not scare the user if SQL thread was simply killed or stopped 5489 if (!sql_slave_killed(serial_rgi)) 5490 { 5491 slave_output_error_info(serial_rgi, thd); 5492 if (WSREP(thd) && rli->last_error().number == ER_UNKNOWN_COM_ERROR) 5493 { 5494 wsrep_node_dropped= TRUE; 5495 } 5496 } 5497 goto err; 5498 } 5499 } 5500 5501 err: 5502 if (mi->using_parallel()) 5503 rli->parallel.wait_for_done(thd, rli); 5504 /* Gtid_list_log_event::do_apply_event has already reported the GTID until */ 5505 if (rli->stop_for_until && rli->until_condition != Relay_log_info::UNTIL_GTID) 5506 { 5507 if (global_system_variables.log_warnings > 2) 5508 sql_print_information("Slave SQL thread UNTIL stop was requested at position " 5509 "%llu in %s %s file", 5510 rli->until_log_pos, rli->until_log_name, 5511 rli->until_condition == 5512 Relay_log_info::UNTIL_MASTER_POS ? 5513 "binlog" : "relaylog"); 5514 sql_print_information("Slave SQL thread stopped because it reached its" 5515 " UNTIL position %llu in %s %s file", 5516 rli->until_pos(), rli->until_name(), 5517 rli->until_condition == 5518 Relay_log_info::UNTIL_MASTER_POS ? 5519 "binlog" : "relaylog"); 5520 5521 }; 5522 /* Thread stopped. Print the current replication position to the log */ 5523 { 5524 StringBuffer<100> tmp; 5525 if (mi->using_gtid != Master_info::USE_GTID_NO) 5526 { 5527 tmp.append(STRING_WITH_LEN("; GTID position '")); 5528 rpl_append_gtid_state(&tmp, false); 5529 tmp.append(STRING_WITH_LEN("'")); 5530 } 5531 sql_print_information("Slave SQL thread exiting, replication stopped in " 5532 "log '%s' at position %llu%s", RPL_LOG_NAME, 5533 rli->group_master_log_pos, tmp.c_ptr_safe()); 5534 sql_print_information("master was %s:%d", mi->host, mi->port); 5535 } 5536 #ifdef WITH_WSREP 5537 wsrep_after_command_before_result(thd); 5538 wsrep_after_command_after_result(thd); 5539 #endif /* WITH_WSREP */ 5540 5541 err_before_start: 5542 5543 /* 5544 Some events set some playgrounds, which won't be cleared because thread 5545 stops. Stopping of this thread may not be known to these events ("stop" 5546 request is detected only by the present function, not by events), so we 5547 must "proactively" clear playgrounds: 5548 */ 5549 thd->clear_error(); 5550 serial_rgi->cleanup_context(thd, 1); 5551 /* 5552 Some extra safety, which should not been needed (normally, event deletion 5553 should already have done these assignments (each event which sets these 5554 variables is supposed to set them to 0 before terminating)). 5555 */ 5556 thd->catalog= 0; 5557 thd->reset_query(); 5558 thd->reset_db(&null_clex_str); 5559 if (rli->mi->using_gtid != Master_info::USE_GTID_NO) 5560 { 5561 ulong domain_count; 5562 my_bool save_log_all_errors= thd->log_all_errors; 5563 5564 /* 5565 We don't need to check return value for rli->flush() 5566 as any errors should be logged to stderr 5567 */ 5568 thd->log_all_errors= 1; 5569 rli->flush(); 5570 thd->log_all_errors= save_log_all_errors; 5571 if (mi->using_parallel()) 5572 { 5573 /* 5574 In parallel replication GTID mode, we may stop with different domains 5575 at different positions in the relay log. 5576 5577 To handle this when we restart the SQL thread, mark the current 5578 per-domain position in the Relay_log_info. 5579 */ 5580 mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state); 5581 domain_count= rpl_global_gtid_slave_state->count(); 5582 mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state); 5583 if (domain_count > 1) 5584 { 5585 inuse_relaylog *ir; 5586 5587 /* 5588 Load the starting GTID position, so that we can skip already applied 5589 GTIDs when we restart the SQL thread. And set the start position in 5590 the relay log back to a known safe place to start (prior to any not 5591 yet applied transaction in any domain). 5592 */ 5593 rli->restart_gtid_pos.load(rpl_global_gtid_slave_state, NULL, 0); 5594 if ((ir= rli->inuse_relaylog_list)) 5595 { 5596 rpl_gtid *gtid= ir->relay_log_state; 5597 uint32 count= ir->relay_log_state_count; 5598 while (count > 0) 5599 { 5600 process_gtid_for_restart_pos(rli, gtid); 5601 ++gtid; 5602 --count; 5603 } 5604 strmake_buf(rli->group_relay_log_name, ir->name); 5605 rli->group_relay_log_pos= BIN_LOG_HEADER_SIZE; 5606 rli->relay_log_state.load(ir->relay_log_state, ir->relay_log_state_count); 5607 } 5608 } 5609 } 5610 } 5611 THD_STAGE_INFO(thd, stage_waiting_for_slave_mutex_on_exit); 5612 thd->add_status_to_global(); 5613 server_threads.erase(thd); 5614 mysql_mutex_lock(&rli->run_lock); 5615 5616 err_during_init: 5617 /* We need data_lock, at least to wake up any waiting master_pos_wait() */ 5618 mysql_mutex_lock(&rli->data_lock); 5619 DBUG_ASSERT(rli->slave_running == MYSQL_SLAVE_RUN_NOT_CONNECT); // tracking buffer overrun 5620 /* When master_pos_wait() wakes up it will check this and terminate */ 5621 rli->slave_running= MYSQL_SLAVE_NOT_RUN; 5622 /* Forget the relay log's format */ 5623 delete rli->relay_log.description_event_for_exec; 5624 rli->relay_log.description_event_for_exec= 0; 5625 rli->reset_inuse_relaylog(); 5626 /* Wake up master_pos_wait() */ 5627 mysql_mutex_unlock(&rli->data_lock); 5628 DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions")); 5629 mysql_cond_broadcast(&rli->data_cond); 5630 rli->ignore_log_space_limit= 0; /* don't need any lock */ 5631 /* we die so won't remember charset - re-update them on next thread start */ 5632 thd->system_thread_info.rpl_sql_info->cached_charset_invalidate(); 5633 5634 /* 5635 TODO: see if we can do this conditionally in next_event() instead 5636 to avoid unneeded position re-init 5637 5638 We only reset THD::temporary_tables to 0 here and not free it, as this 5639 could be used by slave through Relay_log_info::save_temporary_tables. 5640 */ 5641 thd->temporary_tables= 0; 5642 rli->sql_driver_thd= 0; 5643 thd->rgi_fake= thd->rgi_slave= NULL; 5644 5645 #ifdef WITH_WSREP 5646 /* 5647 If slave stopped due to node going non primary, we set global flag to 5648 trigger automatic restart of slave when node joins back to cluster. 5649 */ 5650 if (WSREP(thd) && wsrep_node_dropped && wsrep_restart_slave) 5651 { 5652 if (wsrep_ready_get()) 5653 { 5654 WSREP_INFO("Slave error due to node temporarily non-primary" 5655 "SQL slave will continue"); 5656 wsrep_node_dropped= FALSE; 5657 mysql_mutex_unlock(&rli->run_lock); 5658 goto wsrep_restart_point; 5659 } 5660 else 5661 { 5662 WSREP_INFO("Slave error due to node going non-primary"); 5663 WSREP_INFO("wsrep_restart_slave was set and therefore slave will be " 5664 "automatically restarted when node joins back to cluster"); 5665 wsrep_restart_slave_activated= TRUE; 5666 } 5667 } 5668 wsrep_close(thd); 5669 #endif /* WITH_WSREP */ 5670 5671 /* 5672 Note: the order of the broadcast and unlock calls below (first 5673 broadcast, then unlock) is important. Otherwise a killer_thread can 5674 execute between the calls and delete the mi structure leading to a 5675 crash! (see BUG#25306 for details) 5676 */ 5677 mysql_cond_broadcast(&rli->stop_cond); 5678 DBUG_EXECUTE_IF("simulate_slave_delay_at_terminate_bug38694", sleep(5);); 5679 mysql_mutex_unlock(&rli->run_lock); // tell the world we are done 5680 5681 rpl_parallel_resize_pool_if_no_slaves(); 5682 5683 delete serial_rgi; 5684 delete thd; 5685 5686 DBUG_LEAVE; // Must match DBUG_ENTER() 5687 my_thread_end(); 5688 ERR_remove_state(0); 5689 pthread_exit(0); 5690 return 0; // Avoid compiler warnings 5691 } 5692 5693 5694 /* 5695 process_io_create_file() 5696 */ 5697 5698 static int process_io_create_file(Master_info* mi, Create_file_log_event* cev) 5699 { 5700 int error = 1; 5701 ulong num_bytes; 5702 bool cev_not_written; 5703 THD *thd = mi->io_thd; 5704 NET *net = &mi->mysql->net; 5705 DBUG_ENTER("process_io_create_file"); 5706 5707 if (unlikely(!cev->is_valid())) 5708 DBUG_RETURN(1); 5709 5710 if (!mi->rpl_filter->db_ok(cev->db)) 5711 { 5712 skip_load_data_infile(net); 5713 DBUG_RETURN(0); 5714 } 5715 DBUG_ASSERT(cev->inited_from_old); 5716 thd->file_id = cev->file_id = mi->file_id++; 5717 thd->variables.server_id = cev->server_id; 5718 cev_not_written = 1; 5719 5720 if (unlikely(net_request_file(net,cev->fname))) 5721 { 5722 sql_print_error("Slave I/O: failed requesting download of '%s'", 5723 cev->fname); 5724 goto err; 5725 } 5726 5727 /* 5728 This dummy block is so we could instantiate Append_block_log_event 5729 once and then modify it slightly instead of doing it multiple times 5730 in the loop 5731 */ 5732 { 5733 Append_block_log_event aev(thd,0,0,0,0); 5734 5735 for (;;) 5736 { 5737 if (unlikely((num_bytes=my_net_read(net)) == packet_error)) 5738 { 5739 sql_print_error("Network read error downloading '%s' from master", 5740 cev->fname); 5741 goto err; 5742 } 5743 if (unlikely(!num_bytes)) /* eof */ 5744 { 5745 /* 3.23 master wants it */ 5746 net_write_command(net, 0, (uchar*) "", 0, (uchar*) "", 0); 5747 /* 5748 If we wrote Create_file_log_event, then we need to write 5749 Execute_load_log_event. If we did not write Create_file_log_event, 5750 then this is an empty file and we can just do as if the LOAD DATA 5751 INFILE had not existed, i.e. write nothing. 5752 */ 5753 if (unlikely(cev_not_written)) 5754 break; 5755 Execute_load_log_event xev(thd,0,0); 5756 xev.log_pos = cev->log_pos; 5757 if (unlikely(mi->rli.relay_log.append(&xev))) 5758 { 5759 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL, 5760 ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE), 5761 "error writing Exec_load event to relay log"); 5762 goto err; 5763 } 5764 mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total); 5765 break; 5766 } 5767 if (unlikely(cev_not_written)) 5768 { 5769 cev->block = net->read_pos; 5770 cev->block_len = num_bytes; 5771 if (unlikely(mi->rli.relay_log.append(cev))) 5772 { 5773 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL, 5774 ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE), 5775 "error writing Create_file event to relay log"); 5776 goto err; 5777 } 5778 cev_not_written=0; 5779 mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total); 5780 } 5781 else 5782 { 5783 aev.block = net->read_pos; 5784 aev.block_len = num_bytes; 5785 aev.log_pos = cev->log_pos; 5786 if (unlikely(mi->rli.relay_log.append(&aev))) 5787 { 5788 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL, 5789 ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE), 5790 "error writing Append_block event to relay log"); 5791 goto err; 5792 } 5793 mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total) ; 5794 } 5795 } 5796 } 5797 error=0; 5798 err: 5799 DBUG_RETURN(error); 5800 } 5801 5802 5803 /* 5804 Start using a new binary log on the master 5805 5806 SYNOPSIS 5807 process_io_rotate() 5808 mi master_info for the slave 5809 rev The rotate log event read from the binary log 5810 5811 DESCRIPTION 5812 Updates the master info with the place in the next binary 5813 log where we should start reading. 5814 Rotate the relay log to avoid mixed-format relay logs. 5815 5816 NOTES 5817 We assume we already locked mi->data_lock 5818 5819 RETURN VALUES 5820 0 ok 5821 1 Log event is illegal 5822 5823 */ 5824 5825 static int process_io_rotate(Master_info *mi, Rotate_log_event *rev) 5826 { 5827 DBUG_ENTER("process_io_rotate"); 5828 mysql_mutex_assert_owner(&mi->data_lock); 5829 5830 if (unlikely(!rev->is_valid())) 5831 DBUG_RETURN(1); 5832 5833 /* Safe copy as 'rev' has been "sanitized" in Rotate_log_event's ctor */ 5834 memcpy(mi->master_log_name, rev->new_log_ident, rev->ident_len+1); 5835 mi->master_log_pos= rev->pos; 5836 DBUG_PRINT("info", ("master_log_pos: '%s' %lu", 5837 mi->master_log_name, (ulong) mi->master_log_pos)); 5838 #ifndef DBUG_OFF 5839 /* 5840 If we do not do this, we will be getting the first 5841 rotate event forever, so we need to not disconnect after one. 5842 */ 5843 if (disconnect_slave_event_count) 5844 mi->events_till_disconnect++; 5845 #endif 5846 5847 /* 5848 If description_event_for_queue is format <4, there is conversion in the 5849 relay log to the slave's format (4). And Rotate can mean upgrade or 5850 nothing. If upgrade, it's to 5.0 or newer, so we will get a Format_desc, so 5851 no need to reset description_event_for_queue now. And if it's nothing (same 5852 master version as before), no need (still using the slave's format). 5853 */ 5854 if (mi->rli.relay_log.description_event_for_queue->binlog_version >= 4) 5855 { 5856 DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg == 5857 mi->rli.relay_log.relay_log_checksum_alg); 5858 5859 delete mi->rli.relay_log.description_event_for_queue; 5860 /* start from format 3 (MySQL 4.0) again */ 5861 mi->rli.relay_log.description_event_for_queue= new 5862 Format_description_log_event(3); 5863 mi->rli.relay_log.description_event_for_queue->checksum_alg= 5864 mi->rli.relay_log.relay_log_checksum_alg; 5865 } 5866 /* 5867 Rotate the relay log makes binlog format detection easier (at next slave 5868 start or mysqlbinlog) 5869 */ 5870 DBUG_RETURN(rotate_relay_log(mi) /* will take the right mutexes */); 5871 } 5872 5873 /* 5874 Reads a 3.23 event and converts it to the slave's format. This code was 5875 copied from MySQL 4.0. 5876 */ 5877 static int queue_binlog_ver_1_event(Master_info *mi, const char *buf, 5878 ulong event_len) 5879 { 5880 const char *errmsg = 0; 5881 ulong inc_pos; 5882 bool ignore_event= 0; 5883 char *tmp_buf = 0; 5884 Relay_log_info *rli= &mi->rli; 5885 DBUG_ENTER("queue_binlog_ver_1_event"); 5886 5887 /* 5888 If we get Load event, we need to pass a non-reusable buffer 5889 to read_log_event, so we do a trick 5890 */ 5891 if ((uchar)buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) 5892 { 5893 if (unlikely(!(tmp_buf=(char*)my_malloc(event_len+1,MYF(MY_WME))))) 5894 { 5895 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL, 5896 ER(ER_SLAVE_FATAL_ERROR), "Memory allocation failed"); 5897 DBUG_RETURN(1); 5898 } 5899 memcpy(tmp_buf,buf,event_len); 5900 /* 5901 Create_file constructor wants a 0 as last char of buffer, this 0 will 5902 serve as the string-termination char for the file's name (which is at the 5903 end of the buffer) 5904 We must increment event_len, otherwise the event constructor will not see 5905 this end 0, which leads to segfault. 5906 */ 5907 tmp_buf[event_len++]=0; 5908 int4store(tmp_buf+EVENT_LEN_OFFSET, event_len); 5909 buf = (const char*)tmp_buf; 5910 } 5911 /* 5912 This will transform LOAD_EVENT into CREATE_FILE_EVENT, ask the master to 5913 send the loaded file, and write it to the relay log in the form of 5914 Append_block/Exec_load (the SQL thread needs the data, as that thread is not 5915 connected to the master). 5916 */ 5917 Log_event *ev= 5918 Log_event::read_log_event(buf, event_len, &errmsg, 5919 mi->rli.relay_log.description_event_for_queue, 0); 5920 if (unlikely(!ev)) 5921 { 5922 sql_print_error("Read invalid event from master: '%s',\ 5923 master could be corrupt but a more likely cause of this is a bug", 5924 errmsg); 5925 my_free(tmp_buf); 5926 DBUG_RETURN(1); 5927 } 5928 5929 mysql_mutex_lock(&mi->data_lock); 5930 ev->log_pos= mi->master_log_pos; /* 3.23 events don't contain log_pos */ 5931 switch (ev->get_type_code()) { 5932 case STOP_EVENT: 5933 ignore_event= 1; 5934 inc_pos= event_len; 5935 break; 5936 case ROTATE_EVENT: 5937 if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev))) 5938 { 5939 delete ev; 5940 mysql_mutex_unlock(&mi->data_lock); 5941 DBUG_RETURN(1); 5942 } 5943 inc_pos= 0; 5944 break; 5945 case CREATE_FILE_EVENT: 5946 /* 5947 Yes it's possible to have CREATE_FILE_EVENT here, even if we're in 5948 queue_old_event() which is for 3.23 events which don't comprise 5949 CREATE_FILE_EVENT. This is because read_log_event() above has just 5950 transformed LOAD_EVENT into CREATE_FILE_EVENT. 5951 */ 5952 { 5953 /* We come here when and only when tmp_buf != 0 */ 5954 DBUG_ASSERT(tmp_buf != 0); 5955 inc_pos=event_len; 5956 ev->log_pos+= inc_pos; 5957 int error = process_io_create_file(mi,(Create_file_log_event*)ev); 5958 delete ev; 5959 mi->master_log_pos += inc_pos; 5960 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos)); 5961 mysql_mutex_unlock(&mi->data_lock); 5962 my_free(tmp_buf); 5963 DBUG_RETURN(error); 5964 } 5965 default: 5966 inc_pos= event_len; 5967 break; 5968 } 5969 if (likely(!ignore_event)) 5970 { 5971 if (ev->log_pos) 5972 /* 5973 Don't do it for fake Rotate events (see comment in 5974 Log_event::Log_event(const char* buf...) in log_event.cc). 5975 */ 5976 ev->log_pos+= event_len; /* make log_pos be the pos of the end of the event */ 5977 if (unlikely(rli->relay_log.append(ev))) 5978 { 5979 delete ev; 5980 mysql_mutex_unlock(&mi->data_lock); 5981 DBUG_RETURN(1); 5982 } 5983 rli->relay_log.harvest_bytes_written(&rli->log_space_total); 5984 } 5985 delete ev; 5986 mi->master_log_pos+= inc_pos; 5987 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos)); 5988 mysql_mutex_unlock(&mi->data_lock); 5989 DBUG_RETURN(0); 5990 } 5991 5992 /* 5993 Reads a 4.0 event and converts it to the slave's format. This code was copied 5994 from queue_binlog_ver_1_event(), with some affordable simplifications. 5995 */ 5996 static int queue_binlog_ver_3_event(Master_info *mi, const char *buf, 5997 ulong event_len) 5998 { 5999 const char *errmsg = 0; 6000 ulong inc_pos; 6001 char *tmp_buf = 0; 6002 Relay_log_info *rli= &mi->rli; 6003 DBUG_ENTER("queue_binlog_ver_3_event"); 6004 6005 /* read_log_event() will adjust log_pos to be end_log_pos */ 6006 Log_event *ev= 6007 Log_event::read_log_event(buf,event_len, &errmsg, 6008 mi->rli.relay_log.description_event_for_queue, 0); 6009 if (unlikely(!ev)) 6010 { 6011 sql_print_error("Read invalid event from master: '%s',\ 6012 master could be corrupt but a more likely cause of this is a bug", 6013 errmsg); 6014 my_free(tmp_buf); 6015 DBUG_RETURN(1); 6016 } 6017 mysql_mutex_lock(&mi->data_lock); 6018 switch (ev->get_type_code()) { 6019 case STOP_EVENT: 6020 goto err; 6021 case ROTATE_EVENT: 6022 if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev))) 6023 { 6024 delete ev; 6025 mysql_mutex_unlock(&mi->data_lock); 6026 DBUG_RETURN(1); 6027 } 6028 inc_pos= 0; 6029 break; 6030 default: 6031 inc_pos= event_len; 6032 break; 6033 } 6034 6035 if (unlikely(rli->relay_log.append(ev))) 6036 { 6037 delete ev; 6038 mysql_mutex_unlock(&mi->data_lock); 6039 DBUG_RETURN(1); 6040 } 6041 rli->relay_log.harvest_bytes_written(&rli->log_space_total); 6042 delete ev; 6043 mi->master_log_pos+= inc_pos; 6044 err: 6045 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos)); 6046 mysql_mutex_unlock(&mi->data_lock); 6047 DBUG_RETURN(0); 6048 } 6049 6050 /* 6051 queue_old_event() 6052 6053 Writes a 3.23 or 4.0 event to the relay log, after converting it to the 5.0 6054 (exactly, slave's) format. To do the conversion, we create a 5.0 event from 6055 the 3.23/4.0 bytes, then write this event to the relay log. 6056 6057 TODO: 6058 Test this code before release - it has to be tested on a separate 6059 setup with 3.23 master or 4.0 master 6060 */ 6061 6062 static int queue_old_event(Master_info *mi, const char *buf, 6063 ulong event_len) 6064 { 6065 DBUG_ENTER("queue_old_event"); 6066 6067 switch (mi->rli.relay_log.description_event_for_queue->binlog_version) 6068 { 6069 case 1: 6070 DBUG_RETURN(queue_binlog_ver_1_event(mi,buf,event_len)); 6071 case 3: 6072 DBUG_RETURN(queue_binlog_ver_3_event(mi,buf,event_len)); 6073 default: /* unsupported format; eg version 2 */ 6074 DBUG_PRINT("info",("unsupported binlog format %d in queue_old_event()", 6075 mi->rli.relay_log.description_event_for_queue->binlog_version)); 6076 DBUG_RETURN(1); 6077 } 6078 } 6079 6080 /* 6081 queue_event() 6082 6083 If the event is 3.23/4.0, passes it to queue_old_event() which will convert 6084 it. Otherwise, writes a 5.0 (or newer) event to the relay log. Then there is 6085 no format conversion, it's pure read/write of bytes. 6086 So a 5.0.0 slave's relay log can contain events in the slave's format or in 6087 any >=5.0.0 format. 6088 */ 6089 6090 static int queue_event(Master_info* mi,const char* buf, ulong event_len) 6091 { 6092 int error= 0; 6093 StringBuffer<1024> error_msg; 6094 ulonglong inc_pos= 0; 6095 ulonglong event_pos; 6096 Relay_log_info *rli= &mi->rli; 6097 mysql_mutex_t *log_lock= rli->relay_log.get_log_lock(); 6098 ulong s_id; 6099 bool unlock_data_lock= TRUE; 6100 bool gtid_skip_enqueue= false; 6101 bool got_gtid_event= false; 6102 rpl_gtid event_gtid; 6103 static uint dbug_rows_event_count __attribute__((unused))= 0; 6104 bool is_compress_event = false; 6105 char* new_buf = NULL; 6106 char new_buf_arr[4096]; 6107 bool is_malloc = false; 6108 bool is_rows_event= false; 6109 /* 6110 FD_q must have been prepared for the first R_a event 6111 inside get_master_version_and_clock() 6112 Show-up of FD:s affects checksum_alg at once because 6113 that changes FD_queue. 6114 */ 6115 enum enum_binlog_checksum_alg checksum_alg= 6116 mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF ? 6117 mi->checksum_alg_before_fd : mi->rli.relay_log.relay_log_checksum_alg; 6118 6119 char *save_buf= NULL; // needed for checksumming the fake Rotate event 6120 char rot_buf[LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN + FN_REFLEN]; 6121 6122 DBUG_ASSERT(checksum_alg == BINLOG_CHECKSUM_ALG_OFF || 6123 checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF || 6124 checksum_alg == BINLOG_CHECKSUM_ALG_CRC32); 6125 6126 DBUG_ENTER("queue_event"); 6127 /* 6128 FD_queue checksum alg description does not apply in a case of 6129 FD itself. The one carries both parts of the checksum data. 6130 */ 6131 if (buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT) 6132 { 6133 checksum_alg= get_checksum_alg(buf, event_len); 6134 } 6135 else if (buf[EVENT_TYPE_OFFSET] == START_EVENT_V3) 6136 { 6137 // checksum behaviour is similar to the pre-checksum FD handling 6138 mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF; 6139 mi->rli.relay_log.description_event_for_queue->checksum_alg= 6140 mi->rli.relay_log.relay_log_checksum_alg= checksum_alg= 6141 BINLOG_CHECKSUM_ALG_OFF; 6142 } 6143 6144 // does not hold always because of old binlog can work with NM 6145 // DBUG_ASSERT(checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF); 6146 6147 // should hold unless manipulations with RL. Tests that do that 6148 // will have to refine the clause. 6149 DBUG_ASSERT(mi->rli.relay_log.relay_log_checksum_alg != 6150 BINLOG_CHECKSUM_ALG_UNDEF); 6151 6152 // Emulate the network corruption 6153 DBUG_EXECUTE_IF("corrupt_queue_event", 6154 if ((uchar)buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT) 6155 { 6156 char *debug_event_buf_c = (char*) buf; 6157 int debug_cor_pos = rand() % (event_len - BINLOG_CHECKSUM_LEN); 6158 debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos]; 6159 DBUG_PRINT("info", ("Corrupt the event at queue_event: byte on position %d", debug_cor_pos)); 6160 DBUG_SET("-d,corrupt_queue_event"); 6161 } 6162 ); 6163 6164 if (event_checksum_test((uchar *) buf, event_len, checksum_alg)) 6165 { 6166 error= ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE; 6167 unlock_data_lock= FALSE; 6168 goto err; 6169 } 6170 6171 if (mi->rli.relay_log.description_event_for_queue->binlog_version<4 && 6172 (uchar)buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */) 6173 DBUG_RETURN(queue_old_event(mi,buf,event_len)); 6174 6175 #ifdef ENABLED_DEBUG_SYNC 6176 /* 6177 A (+d,dbug.rows_events_to_delay_relay_logging)-test is supposed to 6178 create a few Write_log_events and after receiving the 1st of them 6179 the IO thread signals to launch the SQL thread, and sets itself to 6180 wait for a release signal. 6181 */ 6182 DBUG_EXECUTE_IF("dbug.rows_events_to_delay_relay_logging", 6183 if ((buf[EVENT_TYPE_OFFSET] == WRITE_ROWS_EVENT_V1 || 6184 buf[EVENT_TYPE_OFFSET] == WRITE_ROWS_EVENT) && 6185 ++dbug_rows_event_count == 2) 6186 { 6187 const char act[]= 6188 "now SIGNAL start_sql_thread " 6189 "WAIT_FOR go_on_relay_logging"; 6190 DBUG_ASSERT(debug_sync_service); 6191 DBUG_ASSERT(!debug_sync_set_action(current_thd, 6192 STRING_WITH_LEN(act))); 6193 dbug_rows_event_count = 0; 6194 };); 6195 #endif 6196 mysql_mutex_lock(&mi->data_lock); 6197 6198 switch ((uchar)buf[EVENT_TYPE_OFFSET]) { 6199 case STOP_EVENT: 6200 /* 6201 We needn't write this event to the relay log. Indeed, it just indicates a 6202 master server shutdown. The only thing this does is cleaning. But 6203 cleaning is already done on a per-master-thread basis (as the master 6204 server is shutting down cleanly, it has written all DROP TEMPORARY TABLE 6205 prepared statements' deletion are TODO only when we binlog prep stmts). 6206 6207 We don't even increment mi->master_log_pos, because we may be just after 6208 a Rotate event. Btw, in a few milliseconds we are going to have a Start 6209 event from the next binlog (unless the master is presently running 6210 without --log-bin). 6211 */ 6212 goto err; 6213 case ROTATE_EVENT: 6214 { 6215 Rotate_log_event rev(buf, checksum_alg != BINLOG_CHECKSUM_ALG_OFF ? 6216 event_len - BINLOG_CHECKSUM_LEN : event_len, 6217 mi->rli.relay_log.description_event_for_queue); 6218 6219 if (unlikely(mi->gtid_reconnect_event_skip_count) && 6220 unlikely(!mi->gtid_event_seen) && 6221 rev.is_artificial_event() && 6222 (mi->prev_master_id != mi->master_id || 6223 strcmp(rev.new_log_ident, mi->master_log_name) != 0)) 6224 { 6225 /* 6226 Artificial Rotate_log_event is the first event we receive at the start 6227 of each master binlog file. It gives the name of the new binlog file. 6228 6229 Normally, we already have this name from the real rotate event at the 6230 end of the previous binlog file (unless we are making a new connection 6231 using GTID). But if the master server restarted/crashed, there is no 6232 rotate event at the end of the prior binlog file, so the name is new. 6233 6234 We use this fact to handle a special case of master crashing. If the 6235 master crashed while writing the binlog, it might end with a partial 6236 event group lacking the COMMIT/XID event, which must be rolled 6237 back. If the slave IO thread happens to get a disconnect in the middle 6238 of exactly this event group, it will try to reconnect at the same GTID 6239 and skip already fetched events. However, that GTID did not commit on 6240 the master before the crash, so it does not really exist, and the 6241 master will connect the slave at the next following GTID starting in 6242 the next binlog. This could confuse the slave and make it mix the 6243 start of one event group with the end of another. 6244 6245 But we detect this case here, by noticing the change of binlog name 6246 which detects the missing rotate event at the end of the previous 6247 binlog file. In this case, we reset the counters to make us not skip 6248 the next event group, and queue an artificial Format Description 6249 event. The previously fetched incomplete event group will then be 6250 rolled back when the Format Description event is executed by the SQL 6251 thread. 6252 6253 A similar case is if the reconnect somehow connects to a different 6254 master server (like due to a network proxy or IP address takeover). 6255 We detect this case by noticing a change of server_id and in this 6256 case likewise rollback the partially received event group. 6257 */ 6258 Format_description_log_event fdle(4); 6259 6260 if (mi->prev_master_id != mi->master_id) 6261 sql_print_warning("The server_id of master server changed in the " 6262 "middle of GTID %u-%u-%llu. Assuming a change of " 6263 "master server, so rolling back the previously " 6264 "received partial transaction. Expected: %lu, " 6265 "received: %lu", mi->last_queued_gtid.domain_id, 6266 mi->last_queued_gtid.server_id, 6267 mi->last_queued_gtid.seq_no, 6268 mi->prev_master_id, mi->master_id); 6269 else if (strcmp(rev.new_log_ident, mi->master_log_name) != 0) 6270 sql_print_warning("Unexpected change of master binlog file name in the " 6271 "middle of GTID %u-%u-%llu, assuming that master has " 6272 "crashed and rolling back the transaction. Expected: " 6273 "'%s', received: '%s'", 6274 mi->last_queued_gtid.domain_id, 6275 mi->last_queued_gtid.server_id, 6276 mi->last_queued_gtid.seq_no, 6277 mi->master_log_name, rev.new_log_ident); 6278 6279 mysql_mutex_lock(log_lock); 6280 if (likely(!rli->relay_log.write_event(&fdle) && 6281 !rli->relay_log.flush_and_sync(NULL))) 6282 { 6283 rli->relay_log.harvest_bytes_written(&rli->log_space_total); 6284 } 6285 else 6286 { 6287 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; 6288 mysql_mutex_unlock(log_lock); 6289 goto err; 6290 } 6291 rli->relay_log.signal_relay_log_update(); 6292 mysql_mutex_unlock(log_lock); 6293 6294 mi->gtid_reconnect_event_skip_count= 0; 6295 mi->events_queued_since_last_gtid= 0; 6296 } 6297 mi->prev_master_id= mi->master_id; 6298 6299 if (unlikely(process_io_rotate(mi, &rev))) 6300 { 6301 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; 6302 goto err; 6303 } 6304 /* 6305 Checksum special cases for the fake Rotate (R_f) event caused by the protocol 6306 of events generation and serialization in RL where Rotate of master is 6307 queued right next to FD of slave. 6308 Since it's only FD that carries the alg desc of FD_s has to apply to R_m. 6309 Two special rules apply only to the first R_f which comes in before any FD_m. 6310 The 2nd R_f should be compatible with the FD_s that must have taken over 6311 the last seen FD_m's (A). 6312 6313 RSC_1: If OM \and fake Rotate \and slave is configured to 6314 to compute checksum for its first FD event for RL 6315 the fake Rotate gets checksummed here. 6316 */ 6317 if (uint4korr(&buf[0]) == 0 && checksum_alg == BINLOG_CHECKSUM_ALG_OFF && 6318 mi->rli.relay_log.relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_OFF) 6319 { 6320 ha_checksum rot_crc= 0; 6321 event_len += BINLOG_CHECKSUM_LEN; 6322 memcpy(rot_buf, buf, event_len - BINLOG_CHECKSUM_LEN); 6323 int4store(&rot_buf[EVENT_LEN_OFFSET], 6324 uint4korr(&rot_buf[EVENT_LEN_OFFSET]) + BINLOG_CHECKSUM_LEN); 6325 rot_crc= my_checksum(rot_crc, (const uchar *) rot_buf, 6326 event_len - BINLOG_CHECKSUM_LEN); 6327 int4store(&rot_buf[event_len - BINLOG_CHECKSUM_LEN], rot_crc); 6328 DBUG_ASSERT(event_len == uint4korr(&rot_buf[EVENT_LEN_OFFSET])); 6329 DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg == 6330 mi->rli.relay_log.relay_log_checksum_alg); 6331 /* the first one */ 6332 DBUG_ASSERT(mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF); 6333 save_buf= (char *) buf; 6334 buf= rot_buf; 6335 } 6336 else 6337 /* 6338 RSC_2: If NM \and fake Rotate \and slave does not compute checksum 6339 the fake Rotate's checksum is stripped off before relay-logging. 6340 */ 6341 if (uint4korr(&buf[0]) == 0 && checksum_alg != BINLOG_CHECKSUM_ALG_OFF && 6342 mi->rli.relay_log.relay_log_checksum_alg == BINLOG_CHECKSUM_ALG_OFF) 6343 { 6344 event_len -= BINLOG_CHECKSUM_LEN; 6345 memcpy(rot_buf, buf, event_len); 6346 int4store(&rot_buf[EVENT_LEN_OFFSET], 6347 uint4korr(&rot_buf[EVENT_LEN_OFFSET]) - BINLOG_CHECKSUM_LEN); 6348 DBUG_ASSERT(event_len == uint4korr(&rot_buf[EVENT_LEN_OFFSET])); 6349 DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg == 6350 mi->rli.relay_log.relay_log_checksum_alg); 6351 /* the first one */ 6352 DBUG_ASSERT(mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF); 6353 save_buf= (char *) buf; 6354 buf= rot_buf; 6355 } 6356 /* 6357 Now the I/O thread has just changed its mi->master_log_name, so 6358 incrementing mi->master_log_pos is nonsense. 6359 */ 6360 inc_pos= 0; 6361 break; 6362 } 6363 case FORMAT_DESCRIPTION_EVENT: 6364 { 6365 /* 6366 Create an event, and save it (when we rotate the relay log, we will have 6367 to write this event again). 6368 */ 6369 /* 6370 We are the only thread which reads/writes description_event_for_queue. 6371 The relay_log struct does not move (though some members of it can 6372 change), so we needn't any lock (no rli->data_lock, no log lock). 6373 */ 6374 Format_description_log_event* tmp; 6375 const char* errmsg; 6376 // mark it as undefined that is irrelevant anymore 6377 mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF; 6378 if (!(tmp= (Format_description_log_event*) 6379 Log_event::read_log_event(buf, event_len, &errmsg, 6380 mi->rli.relay_log.description_event_for_queue, 6381 1))) 6382 { 6383 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; 6384 goto err; 6385 } 6386 tmp->copy_crypto_data(mi->rli.relay_log.description_event_for_queue); 6387 delete mi->rli.relay_log.description_event_for_queue; 6388 mi->rli.relay_log.description_event_for_queue= tmp; 6389 if (tmp->checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF) 6390 tmp->checksum_alg= BINLOG_CHECKSUM_ALG_OFF; 6391 6392 /* installing new value of checksum Alg for relay log */ 6393 mi->rli.relay_log.relay_log_checksum_alg= tmp->checksum_alg; 6394 6395 /* 6396 Do not queue any format description event that we receive after a 6397 reconnect where we are skipping over a partial event group received 6398 before the reconnect. 6399 6400 (If we queued such an event, and it was the first format_description 6401 event after master restart, the slave SQL thread would think that 6402 the partial event group before it in the relay log was from a 6403 previous master crash and should be rolled back). 6404 */ 6405 if (unlikely(mi->gtid_reconnect_event_skip_count && !mi->gtid_event_seen)) 6406 gtid_skip_enqueue= true; 6407 6408 /* 6409 Though this does some conversion to the slave's format, this will 6410 preserve the master's binlog format version, and number of event types. 6411 */ 6412 /* 6413 If the event was not requested by the slave (the slave did not ask for 6414 it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos 6415 */ 6416 inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0; 6417 DBUG_PRINT("info",("binlog format is now %d", 6418 mi->rli.relay_log.description_event_for_queue->binlog_version)); 6419 6420 } 6421 break; 6422 6423 case HEARTBEAT_LOG_EVENT: 6424 { 6425 /* 6426 HB (heartbeat) cannot come before RL (Relay) 6427 */ 6428 Heartbeat_log_event hb(buf, 6429 mi->rli.relay_log.relay_log_checksum_alg 6430 != BINLOG_CHECKSUM_ALG_OFF ? 6431 event_len - BINLOG_CHECKSUM_LEN : event_len, 6432 mi->rli.relay_log.description_event_for_queue); 6433 if (!hb.is_valid()) 6434 { 6435 error= ER_SLAVE_HEARTBEAT_FAILURE; 6436 error_msg.append(STRING_WITH_LEN("inconsistent heartbeat event content;")); 6437 error_msg.append(STRING_WITH_LEN("the event's data: log_file_name ")); 6438 error_msg.append(hb.get_log_ident(), (uint) hb.get_ident_len()); 6439 error_msg.append(STRING_WITH_LEN(" log_pos ")); 6440 error_msg.append_ulonglong(hb.log_pos); 6441 goto err; 6442 } 6443 mi->received_heartbeats++; 6444 /* 6445 compare local and event's versions of log_file, log_pos. 6446 6447 Heartbeat is sent only after an event corresponding to the corrdinates 6448 the heartbeat carries. 6449 Slave can not have a higher coordinate except in the only 6450 special case when mi->master_log_name, master_log_pos have never 6451 been updated by Rotate event i.e when slave does not have any history 6452 with the master (and thereafter mi->master_log_pos is NULL). 6453 6454 Slave can have lower coordinates, if some event from master was omitted. 6455 6456 TODO: handling `when' for SHOW SLAVE STATUS' snds behind 6457 */ 6458 if (memcmp(mi->master_log_name, hb.get_log_ident(), hb.get_ident_len()) || 6459 mi->master_log_pos > hb.log_pos) { 6460 /* missed events of heartbeat from the past */ 6461 error= ER_SLAVE_HEARTBEAT_FAILURE; 6462 error_msg.append(STRING_WITH_LEN("heartbeat is not compatible with local info;")); 6463 error_msg.append(STRING_WITH_LEN("the event's data: log_file_name ")); 6464 error_msg.append(hb.get_log_ident(), (uint) hb.get_ident_len()); 6465 error_msg.append(STRING_WITH_LEN(" log_pos ")); 6466 error_msg.append_ulonglong(hb.log_pos); 6467 goto err; 6468 } 6469 6470 /* 6471 Heartbeat events doesn't count in the binlog size, so we don't have to 6472 increment mi->master_log_pos 6473 */ 6474 goto skip_relay_logging; 6475 } 6476 break; 6477 6478 case GTID_LIST_EVENT: 6479 { 6480 const char *errmsg; 6481 Gtid_list_log_event *glev; 6482 Log_event *tmp; 6483 uint32 flags; 6484 6485 if (!(tmp= Log_event::read_log_event(buf, event_len, &errmsg, 6486 mi->rli.relay_log.description_event_for_queue, 6487 opt_slave_sql_verify_checksum))) 6488 { 6489 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; 6490 goto err; 6491 } 6492 glev= static_cast<Gtid_list_log_event *>(tmp); 6493 event_pos= glev->log_pos; 6494 flags= glev->gl_flags; 6495 delete glev; 6496 6497 /* 6498 We use fake Gtid_list events to update the old-style position (among 6499 other things). 6500 6501 Early code created fake Gtid_list events with zero log_pos, those should 6502 not modify old-style position. 6503 */ 6504 if (event_pos == 0 || event_pos <= mi->master_log_pos) 6505 inc_pos= 0; 6506 else 6507 inc_pos= event_pos - mi->master_log_pos; 6508 6509 if (mi->rli.until_condition == Relay_log_info::UNTIL_GTID && 6510 flags & Gtid_list_log_event::FLAG_UNTIL_REACHED) 6511 { 6512 char str_buf[128]; 6513 String str(str_buf, sizeof(str_buf), system_charset_info); 6514 mi->rli.until_gtid_pos.to_string(&str); 6515 sql_print_information("Slave I/O thread stops because it reached its" 6516 " UNTIL master_gtid_pos %s", str.c_ptr_safe()); 6517 mi->abort_slave= true; 6518 } 6519 } 6520 break; 6521 6522 case GTID_EVENT: 6523 { 6524 DBUG_EXECUTE_IF("kill_slave_io_after_2_events", 6525 { 6526 mi->dbug_do_disconnect= true; 6527 mi->dbug_event_counter= 2; 6528 };); 6529 6530 uchar gtid_flag; 6531 6532 if (Gtid_log_event::peek(buf, event_len, checksum_alg, 6533 &event_gtid.domain_id, &event_gtid.server_id, 6534 &event_gtid.seq_no, >id_flag, 6535 rli->relay_log.description_event_for_queue)) 6536 { 6537 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; 6538 goto err; 6539 } 6540 got_gtid_event= true; 6541 if (mi->using_gtid == Master_info::USE_GTID_NO) 6542 goto default_action; 6543 if (unlikely(mi->gtid_reconnect_event_skip_count)) 6544 { 6545 if (likely(!mi->gtid_event_seen)) 6546 { 6547 mi->gtid_event_seen= true; 6548 /* 6549 If we are reconnecting, and we need to skip a partial event group 6550 already queued to the relay log before the reconnect, then we check 6551 that we actually get the same event group (same GTID) as before, so 6552 we do not end up with half of one group and half another. 6553 6554 The only way we should be able to receive a different GTID than what 6555 we expect is if the binlog on the master (or more likely the whole 6556 master server) was replaced with a different one, on the same IP 6557 address, _and_ the new master happens to have domains in a different 6558 order so we get the GTID from a different domain first. Still, it is 6559 best to protect against this case. 6560 */ 6561 if (event_gtid.domain_id != mi->last_queued_gtid.domain_id || 6562 event_gtid.server_id != mi->last_queued_gtid.server_id || 6563 event_gtid.seq_no != mi->last_queued_gtid.seq_no) 6564 { 6565 bool first; 6566 error= ER_SLAVE_UNEXPECTED_MASTER_SWITCH; 6567 error_msg.append(STRING_WITH_LEN("Expected: ")); 6568 first= true; 6569 rpl_slave_state_tostring_helper(&error_msg, &mi->last_queued_gtid, 6570 &first); 6571 error_msg.append(STRING_WITH_LEN(", received: ")); 6572 first= true; 6573 rpl_slave_state_tostring_helper(&error_msg, &event_gtid, &first); 6574 goto err; 6575 } 6576 if (global_system_variables.log_warnings > 1) 6577 { 6578 bool first= true; 6579 StringBuffer<1024> gtid_text; 6580 rpl_slave_state_tostring_helper(>id_text, &mi->last_queued_gtid, 6581 &first); 6582 sql_print_information("Slave IO thread is reconnected to " 6583 "receive Gtid_log_event %s. It is to skip %llu " 6584 "already received events including the gtid one", 6585 gtid_text.ptr(), 6586 mi->events_queued_since_last_gtid); 6587 } 6588 goto default_action; 6589 } 6590 else 6591 { 6592 bool first; 6593 StringBuffer<1024> gtid_text; 6594 6595 gtid_text.append(STRING_WITH_LEN("Last received gtid: ")); 6596 first= true; 6597 rpl_slave_state_tostring_helper(>id_text, &mi->last_queued_gtid, 6598 &first); 6599 gtid_text.append(STRING_WITH_LEN(", currently received: ")); 6600 first= true; 6601 rpl_slave_state_tostring_helper(>id_text, &event_gtid, &first); 6602 6603 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; 6604 sql_print_error("Slave IO thread has received a new Gtid_log_event " 6605 "while skipping already logged events " 6606 "after reconnect. %s. %llu remains to be skipped. " 6607 "The number of originally read events was %llu", 6608 gtid_text.ptr(), 6609 mi->gtid_reconnect_event_skip_count, 6610 mi->events_queued_since_last_gtid); 6611 goto err; 6612 } 6613 } 6614 mi->gtid_event_seen= true; 6615 6616 /* 6617 We have successfully queued to relay log everything before this GTID, so 6618 in case of reconnect we can start from after any previous GTID. 6619 (Normally we would have updated gtid_current_pos earlier at the end of 6620 the previous event group, but better leave an extra check here for 6621 safety). 6622 */ 6623 if (mi->events_queued_since_last_gtid) 6624 { 6625 mi->gtid_current_pos.update(&mi->last_queued_gtid); 6626 mi->events_queued_since_last_gtid= 0; 6627 } 6628 mi->last_queued_gtid= event_gtid; 6629 mi->last_queued_gtid_standalone= 6630 (gtid_flag & Gtid_log_event::FL_STANDALONE) != 0; 6631 6632 /* Should filter all the subsequent events in the current GTID group? */ 6633 mi->domain_id_filter.do_filter(event_gtid.domain_id); 6634 6635 ++mi->events_queued_since_last_gtid; 6636 inc_pos= event_len; 6637 } 6638 break; 6639 /* 6640 Binlog compressed event should uncompress in IO thread 6641 */ 6642 case QUERY_COMPRESSED_EVENT: 6643 inc_pos= event_len; 6644 if (query_event_uncompress(rli->relay_log.description_event_for_queue, 6645 checksum_alg == BINLOG_CHECKSUM_ALG_CRC32, 6646 buf, event_len, new_buf_arr, sizeof(new_buf_arr), 6647 &is_malloc, (char **)&new_buf, &event_len)) 6648 { 6649 char llbuf[22]; 6650 error = ER_BINLOG_UNCOMPRESS_ERROR; 6651 error_msg.append(STRING_WITH_LEN("binlog uncompress error, master log_pos: ")); 6652 llstr(mi->master_log_pos, llbuf); 6653 error_msg.append(llbuf, strlen(llbuf)); 6654 goto err; 6655 } 6656 buf = new_buf; 6657 is_compress_event = true; 6658 goto default_action; 6659 6660 case WRITE_ROWS_COMPRESSED_EVENT: 6661 case UPDATE_ROWS_COMPRESSED_EVENT: 6662 case DELETE_ROWS_COMPRESSED_EVENT: 6663 case WRITE_ROWS_COMPRESSED_EVENT_V1: 6664 case UPDATE_ROWS_COMPRESSED_EVENT_V1: 6665 case DELETE_ROWS_COMPRESSED_EVENT_V1: 6666 inc_pos = event_len; 6667 { 6668 if (row_log_event_uncompress(rli->relay_log.description_event_for_queue, 6669 checksum_alg == BINLOG_CHECKSUM_ALG_CRC32, 6670 buf, event_len, new_buf_arr, sizeof(new_buf_arr), 6671 &is_malloc, (char **)&new_buf, &event_len)) 6672 { 6673 char llbuf[22]; 6674 error = ER_BINLOG_UNCOMPRESS_ERROR; 6675 error_msg.append(STRING_WITH_LEN("binlog uncompress error, master log_pos: ")); 6676 llstr(mi->master_log_pos, llbuf); 6677 error_msg.append(llbuf, strlen(llbuf)); 6678 goto err; 6679 } 6680 } 6681 is_compress_event = true; 6682 buf = new_buf; 6683 /* 6684 As we are uncertain about compressed V2 rows events, we don't track 6685 them 6686 */ 6687 if (LOG_EVENT_IS_ROW_V2((Log_event_type) buf[EVENT_TYPE_OFFSET])) 6688 goto default_action; 6689 /* fall through */ 6690 case WRITE_ROWS_EVENT_V1: 6691 case UPDATE_ROWS_EVENT_V1: 6692 case DELETE_ROWS_EVENT_V1: 6693 case WRITE_ROWS_EVENT: 6694 case UPDATE_ROWS_EVENT: 6695 case DELETE_ROWS_EVENT: 6696 { 6697 is_rows_event= true; 6698 mi->rows_event_tracker.update(mi->master_log_name, 6699 mi->master_log_pos, 6700 buf, 6701 mi->rli.relay_log. 6702 description_event_for_queue); 6703 6704 DBUG_EXECUTE_IF("simulate_stmt_end_rows_event_loss", 6705 { 6706 mi->rows_event_tracker.stmt_end_seen= false; 6707 }); 6708 } 6709 goto default_action; 6710 6711 #ifndef DBUG_OFF 6712 case XID_EVENT: 6713 DBUG_EXECUTE_IF("slave_discard_xid_for_gtid_0_x_1000", 6714 { 6715 /* Inject an event group that is missing its XID commit event. */ 6716 if (mi->last_queued_gtid.domain_id == 0 && 6717 mi->last_queued_gtid.seq_no == 1000) 6718 goto skip_relay_logging; 6719 }); 6720 goto default_action; 6721 #endif 6722 case START_ENCRYPTION_EVENT: 6723 if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F) 6724 { 6725 /* 6726 If the event was not requested by the slave (the slave did not ask for 6727 it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos 6728 */ 6729 inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0; 6730 break; 6731 } 6732 /* fall through */ 6733 default: 6734 default_action: 6735 DBUG_EXECUTE_IF("kill_slave_io_after_2_events", 6736 { 6737 if (mi->dbug_do_disconnect && 6738 (LOG_EVENT_IS_QUERY((Log_event_type)(uchar)buf[EVENT_TYPE_OFFSET]) || 6739 ((uchar)buf[EVENT_TYPE_OFFSET] == TABLE_MAP_EVENT)) 6740 && (--mi->dbug_event_counter == 0)) 6741 { 6742 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; 6743 mi->dbug_do_disconnect= false; /* Safety */ 6744 goto err; 6745 } 6746 };); 6747 6748 DBUG_EXECUTE_IF("kill_slave_io_before_commit", 6749 { 6750 if ((uchar)buf[EVENT_TYPE_OFFSET] == XID_EVENT || 6751 ((uchar)buf[EVENT_TYPE_OFFSET] == QUERY_EVENT && /* QUERY_COMPRESSED_EVENT would never be commmit or rollback */ 6752 Query_log_event::peek_is_commit_rollback(buf, event_len, 6753 checksum_alg))) 6754 { 6755 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; 6756 goto err; 6757 } 6758 };); 6759 6760 if (mi->using_gtid != Master_info::USE_GTID_NO && mi->gtid_event_seen) 6761 { 6762 if (unlikely(mi->gtid_reconnect_event_skip_count)) 6763 { 6764 --mi->gtid_reconnect_event_skip_count; 6765 gtid_skip_enqueue= true; 6766 } 6767 else if (mi->events_queued_since_last_gtid) 6768 ++mi->events_queued_since_last_gtid; 6769 } 6770 6771 if (!is_compress_event) 6772 inc_pos= event_len; 6773 6774 break; 6775 } 6776 6777 /* 6778 Integrity of Rows- event group check. 6779 A sequence of Rows- events must end with STMT_END_F flagged one. 6780 Even when Heartbeat event interrupts Rows- events flow this must indicate a 6781 malfunction e.g logging on the master. 6782 */ 6783 if (((uchar) buf[EVENT_TYPE_OFFSET] != HEARTBEAT_LOG_EVENT) && 6784 !is_rows_event && 6785 mi->rows_event_tracker.check_and_report(mi->master_log_name, 6786 mi->master_log_pos)) 6787 { 6788 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; 6789 goto err; 6790 } 6791 6792 /* 6793 If we filter events master-side (eg. @@skip_replication), we will see holes 6794 in the event positions from the master. If we see such a hole, adjust 6795 mi->master_log_pos accordingly so we maintain the correct position (for 6796 reconnect, MASTER_POS_WAIT(), etc.) 6797 */ 6798 if (inc_pos > 0 && 6799 event_len >= LOG_POS_OFFSET+4 && 6800 (event_pos= uint4korr(buf+LOG_POS_OFFSET)) > mi->master_log_pos + inc_pos) 6801 { 6802 inc_pos= event_pos - mi->master_log_pos; 6803 DBUG_PRINT("info", ("Adjust master_log_pos %llu->%llu to account for " 6804 "master-side filtering", 6805 mi->master_log_pos + inc_pos, event_pos)); 6806 } 6807 6808 /* 6809 If this event is originating from this server, don't queue it. 6810 We don't check this for 3.23 events because it's simpler like this; 3.23 6811 will be filtered anyway by the SQL slave thread which also tests the 6812 server id (we must also keep this test in the SQL thread, in case somebody 6813 upgrades a 4.0 slave which has a not-filtered relay log). 6814 6815 ANY event coming from ourselves can be ignored: it is obvious for queries; 6816 for STOP_EVENT/ROTATE_EVENT/START_EVENT: these cannot come from ourselves 6817 (--log-slave-updates would not log that) unless this slave is also its 6818 direct master (an unsupported, useless setup!). 6819 */ 6820 6821 mysql_mutex_lock(log_lock); 6822 s_id= uint4korr(buf + SERVER_ID_OFFSET); 6823 /* 6824 Write the event to the relay log, unless we reconnected in the middle 6825 of an event group and now need to skip the initial part of the group that 6826 we already wrote before reconnecting. 6827 */ 6828 if (unlikely(gtid_skip_enqueue)) 6829 { 6830 mi->master_log_pos+= inc_pos; 6831 if ((uchar)buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT && 6832 s_id == mi->master_id) 6833 { 6834 /* 6835 If we write this master's description event in the middle of an event 6836 group due to GTID reconnect, SQL thread will think that master crashed 6837 in the middle of the group and roll back the first half, so we must not. 6838 6839 But we still have to write an artificial copy of the masters description 6840 event, to override the initial slave-version description event so that 6841 SQL thread has the right information for parsing the events it reads. 6842 */ 6843 rli->relay_log.description_event_for_queue->created= 0; 6844 rli->relay_log.description_event_for_queue->set_artificial_event(); 6845 if (rli->relay_log.append_no_lock 6846 (rli->relay_log.description_event_for_queue)) 6847 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; 6848 else 6849 rli->relay_log.harvest_bytes_written(&rli->log_space_total); 6850 } 6851 else if (mi->gtid_reconnect_event_skip_count == 0) 6852 { 6853 /* 6854 Add a fake rotate event so that SQL thread can see the old-style 6855 position where we re-connected in the middle of a GTID event group. 6856 */ 6857 Rotate_log_event fake_rev(mi->master_log_name, 0, mi->master_log_pos, 0); 6858 fake_rev.server_id= mi->master_id; 6859 if (rli->relay_log.append_no_lock(&fake_rev)) 6860 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; 6861 else 6862 rli->relay_log.harvest_bytes_written(&rli->log_space_total); 6863 } 6864 } 6865 else 6866 if ((s_id == global_system_variables.server_id && 6867 !mi->rli.replicate_same_server_id) || 6868 event_that_should_be_ignored(buf) || 6869 /* 6870 the following conjunction deals with IGNORE_SERVER_IDS, if set 6871 If the master is on the ignore list, execution of 6872 format description log events and rotate events is necessary. 6873 */ 6874 (mi->ignore_server_ids.elements > 0 && 6875 mi->shall_ignore_server_id(s_id) && 6876 /* everything is filtered out from non-master */ 6877 (s_id != mi->master_id || 6878 /* for the master meta information is necessary */ 6879 ((uchar)buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT && 6880 (uchar)buf[EVENT_TYPE_OFFSET] != ROTATE_EVENT))) || 6881 6882 /* 6883 Check whether it needs to be filtered based on domain_id 6884 (DO_DOMAIN_IDS/IGNORE_DOMAIN_IDS). 6885 */ 6886 (mi->domain_id_filter.is_group_filtered() && 6887 Log_event::is_group_event((Log_event_type)(uchar) 6888 buf[EVENT_TYPE_OFFSET]))) 6889 { 6890 /* 6891 Do not write it to the relay log. 6892 a) We still want to increment mi->master_log_pos, so that we won't 6893 re-read this event from the master if the slave IO thread is now 6894 stopped/restarted (more efficient if the events we are ignoring are big 6895 LOAD DATA INFILE). 6896 b) We want to record that we are skipping events, for the information of 6897 the slave SQL thread, otherwise that thread may let 6898 rli->group_relay_log_pos stay too small if the last binlog's event is 6899 ignored. 6900 But events which were generated by this slave and which do not exist in 6901 the master's binlog (i.e. Format_desc, Rotate & Stop) should not increment 6902 mi->master_log_pos. 6903 If the event is originated remotely and is being filtered out by 6904 IGNORE_SERVER_IDS it increments mi->master_log_pos 6905 as well as rli->group_relay_log_pos. 6906 */ 6907 if (!(s_id == global_system_variables.server_id && 6908 !mi->rli.replicate_same_server_id) || 6909 ((uchar)buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT && 6910 (uchar)buf[EVENT_TYPE_OFFSET] != ROTATE_EVENT && 6911 (uchar)buf[EVENT_TYPE_OFFSET] != STOP_EVENT)) 6912 { 6913 mi->master_log_pos+= inc_pos; 6914 memcpy(rli->ign_master_log_name_end, mi->master_log_name, FN_REFLEN); 6915 DBUG_ASSERT(rli->ign_master_log_name_end[0]); 6916 rli->ign_master_log_pos_end= mi->master_log_pos; 6917 if (got_gtid_event) 6918 rli->ign_gtids.update(&event_gtid); 6919 } 6920 // the slave SQL thread needs to re-check 6921 rli->relay_log.signal_relay_log_update(); 6922 DBUG_PRINT("info", ("master_log_pos: %lu, event originating from %u server, ignored", 6923 (ulong) mi->master_log_pos, uint4korr(buf + SERVER_ID_OFFSET))); 6924 } 6925 else 6926 { 6927 if (likely(!rli->relay_log.write_event_buffer((uchar*)buf, event_len))) 6928 { 6929 mi->master_log_pos+= inc_pos; 6930 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos)); 6931 rli->relay_log.harvest_bytes_written(&rli->log_space_total); 6932 } 6933 else 6934 { 6935 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE; 6936 } 6937 rli->ign_master_log_name_end[0]= 0; // last event is not ignored 6938 if (got_gtid_event) 6939 rli->ign_gtids.remove_if_present(&event_gtid); 6940 if (save_buf != NULL) 6941 buf= save_buf; 6942 } 6943 mysql_mutex_unlock(log_lock); 6944 6945 if (likely(!error) && 6946 mi->using_gtid != Master_info::USE_GTID_NO && 6947 mi->events_queued_since_last_gtid > 0 && 6948 ( (mi->last_queued_gtid_standalone && 6949 !Log_event::is_part_of_group((Log_event_type)(uchar) 6950 buf[EVENT_TYPE_OFFSET])) || 6951 (!mi->last_queued_gtid_standalone && 6952 ((uchar)buf[EVENT_TYPE_OFFSET] == XID_EVENT || 6953 ((uchar)buf[EVENT_TYPE_OFFSET] == QUERY_EVENT && /* QUERY_COMPRESSED_EVENT would never be commmit or rollback */ 6954 Query_log_event::peek_is_commit_rollback(buf, event_len, 6955 checksum_alg)))))) 6956 { 6957 /* 6958 The whole of the current event group is queued. So in case of 6959 reconnect we can start from after the current GTID. 6960 */ 6961 if (mi->gtid_reconnect_event_skip_count) 6962 { 6963 bool first= true; 6964 StringBuffer<1024> gtid_text; 6965 6966 rpl_slave_state_tostring_helper(>id_text, &mi->last_queued_gtid, 6967 &first); 6968 sql_print_error("Slave IO thread received a terminal event from " 6969 "group %s whose retrieval was interrupted " 6970 "with reconnect. We still had %llu events to read. " 6971 "The number of originally read events was %llu", 6972 gtid_text.ptr(), 6973 mi->gtid_reconnect_event_skip_count, 6974 mi->events_queued_since_last_gtid); 6975 } 6976 mi->gtid_current_pos.update(&mi->last_queued_gtid); 6977 mi->events_queued_since_last_gtid= 0; 6978 6979 /* Reset the domain_id_filter flag. */ 6980 mi->domain_id_filter.reset_filter(); 6981 } 6982 6983 skip_relay_logging: 6984 6985 err: 6986 if (unlock_data_lock) 6987 mysql_mutex_unlock(&mi->data_lock); 6988 DBUG_PRINT("info", ("error: %d", error)); 6989 6990 /* 6991 Do not print ER_SLAVE_RELAY_LOG_WRITE_FAILURE error here, as the caller 6992 handle_slave_io() prints it on return. 6993 */ 6994 if (unlikely(error) && error != ER_SLAVE_RELAY_LOG_WRITE_FAILURE) 6995 mi->report(ERROR_LEVEL, error, NULL, ER_DEFAULT(error), 6996 error_msg.ptr()); 6997 6998 if (unlikely(is_malloc)) 6999 my_free((void *)new_buf); 7000 7001 DBUG_RETURN(error); 7002 } 7003 7004 7005 void end_relay_log_info(Relay_log_info* rli) 7006 { 7007 mysql_mutex_t *log_lock; 7008 DBUG_ENTER("end_relay_log_info"); 7009 7010 rli->error_on_rli_init_info= false; 7011 if (!rli->inited) 7012 DBUG_VOID_RETURN; 7013 if (rli->info_fd >= 0) 7014 { 7015 end_io_cache(&rli->info_file); 7016 mysql_file_close(rli->info_fd, MYF(MY_WME)); 7017 rli->info_fd = -1; 7018 } 7019 if (rli->cur_log_fd >= 0) 7020 { 7021 end_io_cache(&rli->cache_buf); 7022 mysql_file_close(rli->cur_log_fd, MYF(MY_WME)); 7023 rli->cur_log_fd = -1; 7024 } 7025 rli->inited = 0; 7026 log_lock= rli->relay_log.get_log_lock(); 7027 mysql_mutex_lock(log_lock); 7028 rli->relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT); 7029 rli->relay_log.harvest_bytes_written(&rli->log_space_total); 7030 mysql_mutex_unlock(log_lock); 7031 /* 7032 Delete the slave's temporary tables from memory. 7033 In the future there will be other actions than this, to ensure persistance 7034 of slave's temp tables after shutdown. 7035 */ 7036 rli->close_temporary_tables(); 7037 DBUG_VOID_RETURN; 7038 } 7039 7040 7041 /** 7042 Hook to detach the active VIO before closing a connection handle. 7043 7044 The client API might close the connection (and associated data) 7045 in case it encounters a unrecoverable (network) error. This hook 7046 is called from the client code before the VIO handle is deleted 7047 allows the thread to detach the active vio so it does not point 7048 to freed memory. 7049 7050 Other calls to THD::clear_active_vio throughout this module are 7051 redundant due to the hook but are left in place for illustrative 7052 purposes. 7053 */ 7054 7055 extern "C" void slave_io_thread_detach_vio() 7056 { 7057 #ifdef SIGNAL_WITH_VIO_CLOSE 7058 THD *thd= current_thd; 7059 if (thd && thd->slave_thread) 7060 thd->clear_active_vio(); 7061 #endif 7062 } 7063 7064 7065 /* 7066 Try to connect until successful or slave killed 7067 7068 SYNPOSIS 7069 safe_connect() 7070 thd Thread handler for slave 7071 mysql MySQL connection handle 7072 mi Replication handle 7073 7074 RETURN 7075 0 ok 7076 # Error 7077 */ 7078 7079 static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi) 7080 { 7081 DBUG_ENTER("safe_connect"); 7082 7083 DBUG_RETURN(connect_to_master(thd, mysql, mi, 0, 0)); 7084 } 7085 7086 7087 /* 7088 SYNPOSIS 7089 connect_to_master() 7090 7091 IMPLEMENTATION 7092 Try to connect until successful or slave killed or we have retried 7093 master_retry_count times 7094 */ 7095 7096 static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi, 7097 bool reconnect, bool suppress_warnings) 7098 { 7099 int slave_was_killed; 7100 int last_errno= -2; // impossible error 7101 ulong err_count=0; 7102 my_bool my_true= 1; 7103 DBUG_ENTER("connect_to_master"); 7104 set_slave_max_allowed_packet(thd, mysql); 7105 #ifndef DBUG_OFF 7106 mi->events_till_disconnect = disconnect_slave_event_count; 7107 #endif 7108 ulong client_flag= CLIENT_REMEMBER_OPTIONS; 7109 if (opt_slave_compressed_protocol) 7110 client_flag|= CLIENT_COMPRESS; /* We will use compression */ 7111 7112 mysql_options(mysql, MYSQL_OPT_CONNECT_TIMEOUT, (char *) &slave_net_timeout); 7113 mysql_options(mysql, MYSQL_OPT_READ_TIMEOUT, (char *) &slave_net_timeout); 7114 mysql_options(mysql, MYSQL_OPT_USE_THREAD_SPECIFIC_MEMORY, 7115 (char*) &my_true); 7116 7117 #ifdef HAVE_OPENSSL 7118 if (mi->ssl) 7119 { 7120 mysql_ssl_set(mysql, 7121 mi->ssl_key[0]?mi->ssl_key:0, 7122 mi->ssl_cert[0]?mi->ssl_cert:0, 7123 mi->ssl_ca[0]?mi->ssl_ca:0, 7124 mi->ssl_capath[0]?mi->ssl_capath:0, 7125 mi->ssl_cipher[0]?mi->ssl_cipher:0); 7126 mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT, 7127 &mi->ssl_verify_server_cert); 7128 mysql_options(mysql, MYSQL_OPT_SSL_CRLPATH, 7129 mi->ssl_crlpath[0] ? mi->ssl_crlpath : 0); 7130 mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT, 7131 &mi->ssl_verify_server_cert); 7132 } 7133 #endif 7134 7135 /* 7136 If server's default charset is not supported (like utf16, utf32) as client 7137 charset, then set client charset to 'latin1' (default client charset). 7138 */ 7139 if (is_supported_parser_charset(default_charset_info)) 7140 mysql_options(mysql, MYSQL_SET_CHARSET_NAME, default_charset_info->csname); 7141 else 7142 { 7143 sql_print_information("'%s' can not be used as client character set. " 7144 "'%s' will be used as default client character set " 7145 "while connecting to master.", 7146 default_charset_info->csname, 7147 default_client_charset_info->csname); 7148 mysql_options(mysql, MYSQL_SET_CHARSET_NAME, 7149 default_client_charset_info->csname); 7150 } 7151 7152 /* This one is not strictly needed but we have it here for completeness */ 7153 mysql_options(mysql, MYSQL_SET_CHARSET_DIR, (char *) charsets_dir); 7154 7155 /* Set MYSQL_PLUGIN_DIR in case master asks for an external authentication plugin */ 7156 if (opt_plugin_dir_ptr && *opt_plugin_dir_ptr) 7157 mysql_options(mysql, MYSQL_PLUGIN_DIR, opt_plugin_dir_ptr); 7158 7159 /* we disallow empty users */ 7160 if (mi->user[0] == 0) 7161 { 7162 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL, 7163 ER_THD(thd, ER_SLAVE_FATAL_ERROR), 7164 "Invalid (empty) username when attempting to " 7165 "connect to the master server. Connection attempt " 7166 "terminated."); 7167 DBUG_RETURN(1); 7168 } 7169 while (!(slave_was_killed = io_slave_killed(mi)) && 7170 (reconnect ? mysql_reconnect(mysql) != 0 : 7171 mysql_real_connect(mysql, mi->host, mi->user, mi->password, 0, 7172 mi->port, 0, client_flag) == 0)) 7173 { 7174 /* Don't repeat last error */ 7175 if ((int)mysql_errno(mysql) != last_errno) 7176 { 7177 last_errno=mysql_errno(mysql); 7178 suppress_warnings= 0; 7179 mi->report(ERROR_LEVEL, last_errno, NULL, 7180 "error %s to master '%s@%s:%d'" 7181 " - retry-time: %d maximum-retries: %lu message: %s", 7182 (reconnect ? "reconnecting" : "connecting"), 7183 mi->user, mi->host, mi->port, 7184 mi->connect_retry, master_retry_count, 7185 mysql_error(mysql)); 7186 } 7187 /* 7188 By default we try forever. The reason is that failure will trigger 7189 master election, so if the user did not set master_retry_count we 7190 do not want to have election triggered on the first failure to 7191 connect 7192 */ 7193 if (++err_count == master_retry_count) 7194 { 7195 slave_was_killed=1; 7196 if (reconnect) 7197 change_rpl_status(RPL_ACTIVE_SLAVE,RPL_LOST_SOLDIER); 7198 break; 7199 } 7200 slave_sleep(thd,mi->connect_retry,io_slave_killed, mi); 7201 } 7202 7203 if (!slave_was_killed) 7204 { 7205 mi->clear_error(); // clear possible left over reconnect error 7206 if (reconnect) 7207 { 7208 if (!suppress_warnings && global_system_variables.log_warnings) 7209 sql_print_information("Slave: connected to master '%s@%s:%d'," 7210 "replication resumed in log '%s' at " 7211 "position %llu", mi->user, mi->host, mi->port, 7212 IO_RPL_LOG_NAME, mi->master_log_pos); 7213 } 7214 else 7215 { 7216 change_rpl_status(RPL_IDLE_SLAVE,RPL_ACTIVE_SLAVE); 7217 general_log_print(thd, COM_CONNECT_OUT, "%s@%s:%d", 7218 mi->user, mi->host, mi->port); 7219 } 7220 #ifdef SIGNAL_WITH_VIO_CLOSE 7221 thd->set_active_vio(mysql->net.vio); 7222 #endif 7223 } 7224 mysql->reconnect= 1; 7225 DBUG_PRINT("exit",("slave_was_killed: %d", slave_was_killed)); 7226 DBUG_RETURN(slave_was_killed); 7227 } 7228 7229 7230 /* 7231 safe_reconnect() 7232 7233 IMPLEMENTATION 7234 Try to connect until successful or slave killed or we have retried 7235 master_retry_count times 7236 */ 7237 7238 static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi, 7239 bool suppress_warnings) 7240 { 7241 DBUG_ENTER("safe_reconnect"); 7242 DBUG_RETURN(connect_to_master(thd, mysql, mi, 1, suppress_warnings)); 7243 } 7244 7245 7246 #ifdef NOT_USED 7247 MYSQL *rpl_connect_master(MYSQL *mysql) 7248 { 7249 Master_info *mi= my_pthread_getspecific_ptr(Master_info*, RPL_MASTER_INFO); 7250 bool allocated= false; 7251 my_bool my_true= 1; 7252 THD *thd; 7253 7254 if (!mi) 7255 { 7256 sql_print_error("'rpl_connect_master' must be called in slave I/O thread context."); 7257 return NULL; 7258 } 7259 thd= mi->io_thd; 7260 if (!mysql) 7261 { 7262 if(!(mysql= mysql_init(NULL))) 7263 { 7264 sql_print_error("rpl_connect_master: failed in mysql_init()"); 7265 return NULL; 7266 } 7267 allocated= true; 7268 } 7269 7270 /* 7271 XXX: copied from connect_to_master, this function should not 7272 change the slave status, so we cannot use connect_to_master 7273 directly 7274 7275 TODO: make this part a seperate function to eliminate duplication 7276 */ 7277 mysql_options(mysql, MYSQL_OPT_CONNECT_TIMEOUT, (char *) &slave_net_timeout); 7278 mysql_options(mysql, MYSQL_OPT_READ_TIMEOUT, (char *) &slave_net_timeout); 7279 mysql_options(mysql, MYSQL_OPT_USE_THREAD_SPECIFIC_MEMORY, 7280 (char*) &my_true); 7281 7282 #ifdef HAVE_OPENSSL 7283 if (mi->ssl) 7284 { 7285 mysql_ssl_set(mysql, 7286 mi->ssl_key[0]?mi->ssl_key:0, 7287 mi->ssl_cert[0]?mi->ssl_cert:0, 7288 mi->ssl_ca[0]?mi->ssl_ca:0, 7289 mi->ssl_capath[0]?mi->ssl_capath:0, 7290 mi->ssl_cipher[0]?mi->ssl_cipher:0); 7291 mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT, 7292 &mi->ssl_verify_server_cert); 7293 } 7294 #endif 7295 7296 mysql_options(mysql, MYSQL_SET_CHARSET_NAME, default_charset_info->csname); 7297 /* This one is not strictly needed but we have it here for completeness */ 7298 mysql_options(mysql, MYSQL_SET_CHARSET_DIR, (char *) charsets_dir); 7299 7300 if (mi->user == NULL 7301 || mi->user[0] == 0 7302 || io_slave_killed( mi) 7303 || !mysql_real_connect(mysql, mi->host, mi->user, mi->password, 0, 7304 mi->port, 0, 0)) 7305 { 7306 if (!io_slave_killed( mi)) 7307 sql_print_error("rpl_connect_master: error connecting to master: %s (server_error: %d)", 7308 mysql_error(mysql), mysql_errno(mysql)); 7309 7310 if (allocated) 7311 mysql_close(mysql); // this will free the object 7312 return NULL; 7313 } 7314 return mysql; 7315 } 7316 #endif 7317 7318 7319 /* 7320 Called when we notice that the current "hot" log got rotated under our feet. 7321 */ 7322 7323 static IO_CACHE *reopen_relay_log(Relay_log_info *rli, const char **errmsg) 7324 { 7325 DBUG_ENTER("reopen_relay_log"); 7326 DBUG_ASSERT(rli->cur_log != &rli->cache_buf); 7327 DBUG_ASSERT(rli->cur_log_fd == -1); 7328 7329 IO_CACHE *cur_log = rli->cur_log=&rli->cache_buf; 7330 if ((rli->cur_log_fd=open_binlog(cur_log,rli->event_relay_log_name, 7331 errmsg)) <0) 7332 DBUG_RETURN(0); 7333 /* 7334 We want to start exactly where we was before: 7335 relay_log_pos Current log pos 7336 pending Number of bytes already processed from the event 7337 */ 7338 rli->event_relay_log_pos= MY_MAX(rli->event_relay_log_pos, BIN_LOG_HEADER_SIZE); 7339 my_b_seek(cur_log,rli->event_relay_log_pos); 7340 DBUG_RETURN(cur_log); 7341 } 7342 7343 7344 /** 7345 Reads next event from the relay log. Should be called from the 7346 slave IO thread. 7347 7348 @param rli Relay_log_info structure for the slave IO thread. 7349 7350 @return The event read, or NULL on error. If an error occurs, the 7351 error is reported through the sql_print_information() or 7352 sql_print_error() functions. 7353 7354 The size of the read event (in bytes) is returned in *event_size. 7355 */ 7356 static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size) 7357 { 7358 Log_event* ev; 7359 Relay_log_info *rli= rgi->rli; 7360 IO_CACHE* cur_log = rli->cur_log; 7361 mysql_mutex_t *log_lock = rli->relay_log.get_log_lock(); 7362 const char* errmsg=0; 7363 DBUG_ENTER("next_event"); 7364 7365 DBUG_ASSERT(rgi->thd != 0 && rgi->thd == rli->sql_driver_thd); 7366 *event_size= 0; 7367 7368 #ifndef DBUG_OFF 7369 if (abort_slave_event_count && !rli->events_till_abort--) 7370 DBUG_RETURN(0); 7371 #endif 7372 7373 /* 7374 For most operations we need to protect rli members with data_lock, 7375 so we assume calling function acquired this mutex for us and we will 7376 hold it for the most of the loop below However, we will release it 7377 whenever it is worth the hassle, and in the cases when we go into a 7378 mysql_cond_wait() with the non-data_lock mutex 7379 */ 7380 mysql_mutex_assert_owner(&rli->data_lock); 7381 7382 while (!sql_slave_killed(rgi)) 7383 { 7384 /* 7385 We can have two kinds of log reading: 7386 hot_log: 7387 rli->cur_log points at the IO_CACHE of relay_log, which 7388 is actively being updated by the I/O thread. We need to be careful 7389 in this case and make sure that we are not looking at a stale log that 7390 has already been rotated. If it has been, we reopen the log. 7391 7392 The other case is much simpler: 7393 We just have a read only log that nobody else will be updating. 7394 */ 7395 ulonglong old_pos; 7396 bool hot_log; 7397 if ((hot_log = (cur_log != &rli->cache_buf))) 7398 { 7399 DBUG_ASSERT(rli->cur_log_fd == -1); // foreign descriptor 7400 mysql_mutex_lock(log_lock); 7401 7402 /* 7403 Reading xxx_file_id is safe because the log will only 7404 be rotated when we hold relay_log.LOCK_log 7405 */ 7406 if (rli->relay_log.get_open_count() != rli->cur_log_old_open_count) 7407 { 7408 // The master has switched to a new log file; Reopen the old log file 7409 cur_log=reopen_relay_log(rli, &errmsg); 7410 mysql_mutex_unlock(log_lock); 7411 if (!cur_log) // No more log files 7412 goto err; 7413 hot_log=0; // Using old binary log 7414 } 7415 } 7416 /* 7417 As there is no guarantee that the relay is open (for example, an I/O 7418 error during a write by the slave I/O thread may have closed it), we 7419 have to test it. 7420 */ 7421 if (!my_b_inited(cur_log)) 7422 goto err; 7423 #ifndef DBUG_OFF 7424 { 7425 /* This is an assertion which sometimes fails, let's try to track it */ 7426 DBUG_PRINT("info", ("my_b_tell(cur_log)=%llu rli->event_relay_log_pos=%llu", 7427 my_b_tell(cur_log), rli->event_relay_log_pos)); 7428 DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE); 7429 DBUG_ASSERT(rli->mi->using_parallel() || 7430 my_b_tell(cur_log) == rli->event_relay_log_pos); 7431 } 7432 #endif 7433 /* 7434 Relay log is always in new format - if the master is 3.23, the 7435 I/O thread will convert the format for us. 7436 A problem: the description event may be in a previous relay log. So if 7437 the slave has been shutdown meanwhile, we would have to look in old relay 7438 logs, which may even have been deleted. So we need to write this 7439 description event at the beginning of the relay log. 7440 When the relay log is created when the I/O thread starts, easy: the 7441 master will send the description event and we will queue it. 7442 But if the relay log is created by new_file(): then the solution is: 7443 MYSQL_BIN_LOG::open() will write the buffered description event. 7444 */ 7445 old_pos= rli->event_relay_log_pos; 7446 if ((ev= Log_event::read_log_event(cur_log, 7447 rli->relay_log.description_event_for_exec, 7448 opt_slave_sql_verify_checksum))) 7449 7450 { 7451 /* 7452 read it while we have a lock, to avoid a mutex lock in 7453 inc_event_relay_log_pos() 7454 */ 7455 rli->future_event_relay_log_pos= my_b_tell(cur_log); 7456 *event_size= rli->future_event_relay_log_pos - old_pos; 7457 7458 if (hot_log) 7459 mysql_mutex_unlock(log_lock); 7460 rli->sql_thread_caught_up= false; 7461 DBUG_RETURN(ev); 7462 } 7463 if (opt_reckless_slave) // For mysql-test 7464 cur_log->error = 0; 7465 if (unlikely(cur_log->error < 0)) 7466 { 7467 errmsg = "slave SQL thread aborted because of I/O error"; 7468 if (hot_log) 7469 mysql_mutex_unlock(log_lock); 7470 goto err; 7471 } 7472 if (!cur_log->error) /* EOF */ 7473 { 7474 /* 7475 On a hot log, EOF means that there are no more updates to 7476 process and we must block until I/O thread adds some and 7477 signals us to continue 7478 */ 7479 if (hot_log) 7480 { 7481 /* 7482 We say in Seconds_Behind_Master that we have "caught up". Note that 7483 for example if network link is broken but I/O slave thread hasn't 7484 noticed it (slave_net_timeout not elapsed), then we'll say "caught 7485 up" whereas we're not really caught up. Fixing that would require 7486 internally cutting timeout in smaller pieces in network read, no 7487 thanks. Another example: SQL has caught up on I/O, now I/O has read 7488 a new event and is queuing it; the false "0" will exist until SQL 7489 finishes executing the new event; it will be look abnormal only if 7490 the events have old timestamps (then you get "many", 0, "many"). 7491 7492 Transient phases like this can be fixed with implemeting 7493 Heartbeat event which provides the slave the status of the 7494 master at time the master does not have any new update to send. 7495 Seconds_Behind_Master would be zero only when master has no 7496 more updates in binlog for slave. The heartbeat can be sent 7497 in a (small) fraction of slave_net_timeout. Until it's done 7498 rli->sql_thread_caught_up is temporarely (for time of waiting for 7499 the following event) set whenever EOF is reached. 7500 */ 7501 rli->sql_thread_caught_up= true; 7502 7503 DBUG_ASSERT(rli->relay_log.get_open_count() == 7504 rli->cur_log_old_open_count); 7505 7506 if (rli->ign_master_log_name_end[0]) 7507 { 7508 /* We generate and return a Rotate, to make our positions advance */ 7509 DBUG_PRINT("info",("seeing an ignored end segment")); 7510 ev= new Rotate_log_event(rli->ign_master_log_name_end, 7511 0, rli->ign_master_log_pos_end, 7512 Rotate_log_event::DUP_NAME); 7513 rli->ign_master_log_name_end[0]= 0; 7514 mysql_mutex_unlock(log_lock); 7515 if (unlikely(!ev)) 7516 { 7517 errmsg= "Slave SQL thread failed to create a Rotate event " 7518 "(out of memory?), SHOW SLAVE STATUS may be inaccurate"; 7519 goto err; 7520 } 7521 ev->server_id= 0; // don't be ignored by slave SQL thread 7522 DBUG_RETURN(ev); 7523 } 7524 7525 if (rli->ign_gtids.count() && !rli->is_in_group()) 7526 { 7527 /* 7528 We generate and return a Gtid_list, to update gtid_slave_pos, 7529 unless being in the middle of a group. 7530 */ 7531 DBUG_PRINT("info",("seeing ignored end gtids")); 7532 ev= new Gtid_list_log_event(&rli->ign_gtids, 7533 Gtid_list_log_event::FLAG_IGN_GTIDS); 7534 rli->ign_gtids.reset(); 7535 mysql_mutex_unlock(log_lock); 7536 if (unlikely(!ev)) 7537 { 7538 errmsg= "Slave SQL thread failed to create a Gtid_list event " 7539 "(out of memory?), gtid_slave_pos may be inaccurate"; 7540 goto err; 7541 } 7542 ev->server_id= 0; // don't be ignored by slave SQL thread 7543 ev->set_artificial_event(); // Don't mess up Exec_Master_Log_Pos 7544 DBUG_RETURN(ev); 7545 } 7546 7547 /* 7548 We have to check sql_slave_killed() here an extra time. 7549 Otherwise we may miss a wakeup, since last check was done 7550 without holding LOCK_log. 7551 */ 7552 if (sql_slave_killed(rgi)) 7553 { 7554 mysql_mutex_unlock(log_lock); 7555 break; 7556 } 7557 7558 /* 7559 We can, and should release data_lock while we are waiting for 7560 update. If we do not, show slave status will block 7561 */ 7562 mysql_mutex_unlock(&rli->data_lock); 7563 7564 /* 7565 Possible deadlock : 7566 - the I/O thread has reached log_space_limit 7567 - the SQL thread has read all relay logs, but cannot purge for some 7568 reason: 7569 * it has already purged all logs except the current one 7570 * there are other logs than the current one but they're involved in 7571 a transaction that finishes in the current one (or is not finished) 7572 Solution : 7573 Wake up the possibly waiting I/O thread, and set a boolean asking 7574 the I/O thread to temporarily ignore the log_space_limit 7575 constraint, because we do not want the I/O thread to block because of 7576 space (it's ok if it blocks for any other reason (e.g. because the 7577 master does not send anything). Then the I/O thread stops waiting 7578 and reads one more event and starts honoring log_space_limit again. 7579 7580 If the SQL thread needs more events to be able to rotate the log (it 7581 might need to finish the current group first), then it can ask for 7582 one more at a time. Thus we don't outgrow the relay log indefinitely, 7583 but rather in a controlled manner, until the next rotate. 7584 7585 When the SQL thread starts it sets ignore_log_space_limit to false. 7586 We should also reset ignore_log_space_limit to 0 when the user does 7587 RESET SLAVE, but in fact, no need as RESET SLAVE requires that the 7588 slave be stopped, and the SQL thread sets ignore_log_space_limit 7589 to 0 when 7590 it stops. 7591 */ 7592 mysql_mutex_lock(&rli->log_space_lock); 7593 7594 /* 7595 If we have reached the limit of the relay space and we 7596 are going to sleep, waiting for more events: 7597 7598 1. If outside a group, SQL thread asks the IO thread 7599 to force a rotation so that the SQL thread purges 7600 logs next time it processes an event (thus space is 7601 freed). 7602 7603 2. If in a group, SQL thread asks the IO thread to 7604 ignore the limit and queues yet one more event 7605 so that the SQL thread finishes the group and 7606 is are able to rotate and purge sometime soon. 7607 */ 7608 if (rli->log_space_limit && 7609 rli->log_space_limit < rli->log_space_total) 7610 { 7611 /* force rotation if not in an unfinished group */ 7612 rli->sql_force_rotate_relay= !rli->is_in_group(); 7613 7614 /* ask for one more event */ 7615 rli->ignore_log_space_limit= true; 7616 } 7617 7618 mysql_cond_broadcast(&rli->log_space_cond); 7619 mysql_mutex_unlock(&rli->log_space_lock); 7620 // Note that wait_for_update_relay_log unlocks lock_log ! 7621 rli->relay_log.wait_for_update_relay_log(rli->sql_driver_thd); 7622 // re-acquire data lock since we released it earlier 7623 mysql_mutex_lock(&rli->data_lock); 7624 rli->sql_thread_caught_up= false; 7625 continue; 7626 } 7627 /* 7628 If the log was not hot, we need to move to the next log in 7629 sequence. The next log could be hot or cold, we deal with both 7630 cases separately after doing some common initialization 7631 */ 7632 end_io_cache(cur_log); 7633 DBUG_ASSERT(rli->cur_log_fd >= 0); 7634 mysql_file_close(rli->cur_log_fd, MYF(MY_WME)); 7635 rli->cur_log_fd = -1; 7636 rli->last_inuse_relaylog->completed= true; 7637 rli->relay_log.description_event_for_exec->reset_crypto(); 7638 7639 if (relay_log_purge) 7640 { 7641 /* 7642 purge_first_log will properly set up relay log coordinates in rli. 7643 If the group's coordinates are equal to the event's coordinates 7644 (i.e. the relay log was not rotated in the middle of a group), 7645 we can purge this relay log too. 7646 We do ulonglong and string comparisons, this may be slow but 7647 - purging the last relay log is nice (it can save 1GB of disk), so we 7648 like to detect the case where we can do it, and given this, 7649 - I see no better detection method 7650 - purge_first_log is not called that often 7651 */ 7652 if (rli->relay_log.purge_first_log 7653 (rli, 7654 rli->group_relay_log_pos == rli->event_relay_log_pos 7655 && !strcmp(rli->group_relay_log_name,rli->event_relay_log_name))) 7656 { 7657 errmsg = "Error purging processed logs"; 7658 goto err; 7659 } 7660 } 7661 else 7662 { 7663 /* 7664 If hot_log is set, then we already have a lock on 7665 LOCK_log. If not, we have to get the lock. 7666 7667 According to Sasha, the only time this code will ever be executed 7668 is if we are recovering from a bug. 7669 */ 7670 if (rli->relay_log.find_next_log(&rli->linfo, !hot_log)) 7671 { 7672 errmsg = "error switching to the next log"; 7673 goto err; 7674 } 7675 rli->event_relay_log_pos = BIN_LOG_HEADER_SIZE; 7676 strmake_buf(rli->event_relay_log_name,rli->linfo.log_file_name); 7677 if (rli->flush()) 7678 { 7679 errmsg= "error flushing relay log"; 7680 goto err; 7681 } 7682 } 7683 /* 7684 Now we want to open this next log. To know if it's a hot log (the one 7685 being written by the I/O thread now) or a cold log, we can use 7686 is_active(); if it is hot, we use the I/O cache; if it's cold we open 7687 the file normally. But if is_active() reports that the log is hot, this 7688 may change between the test and the consequence of the test. So we may 7689 open the I/O cache whereas the log is now cold, which is nonsense. 7690 To guard against this, we need to have LOCK_log. 7691 */ 7692 7693 DBUG_PRINT("info",("hot_log: %d",hot_log)); 7694 if (!hot_log) /* if hot_log, we already have this mutex */ 7695 mysql_mutex_lock(log_lock); 7696 if (rli->relay_log.is_active(rli->linfo.log_file_name)) 7697 { 7698 rli->cur_log= cur_log= rli->relay_log.get_log_file(); 7699 rli->cur_log_old_open_count= rli->relay_log.get_open_count(); 7700 DBUG_ASSERT(rli->cur_log_fd == -1); 7701 7702 /* 7703 When the SQL thread is [stopped and] (re)started the 7704 following may happen: 7705 7706 1. Log was hot at stop time and remains hot at restart 7707 7708 SQL thread reads again from hot_log (SQL thread was 7709 reading from the active log when it was stopped and the 7710 very same log is still active on SQL thread restart). 7711 7712 In this case, my_b_seek is performed on cur_log, while 7713 cur_log points to relay_log.get_log_file(); 7714 7715 2. Log was hot at stop time but got cold before restart 7716 7717 The log was hot when SQL thread stopped, but it is not 7718 anymore when the SQL thread restarts. 7719 7720 In this case, the SQL thread reopens the log, using 7721 cache_buf, ie, cur_log points to &cache_buf, and thence 7722 its coordinates are reset. 7723 7724 3. Log was already cold at stop time 7725 7726 The log was not hot when the SQL thread stopped, and, of 7727 course, it will not be hot when it restarts. 7728 7729 In this case, the SQL thread opens the cold log again, 7730 using cache_buf, ie, cur_log points to &cache_buf, and 7731 thence its coordinates are reset. 7732 7733 4. Log was hot at stop time, DBA changes to previous cold 7734 log and restarts SQL thread 7735 7736 The log was hot when the SQL thread was stopped, but the 7737 user changed the coordinates of the SQL thread to 7738 restart from a previous cold log. 7739 7740 In this case, at start time, cur_log points to a cold 7741 log, opened using &cache_buf as cache, and coordinates 7742 are reset. However, as it moves on to the next logs, it 7743 will eventually reach the hot log. If the hot log is the 7744 same at the time the SQL thread was stopped, then 7745 coordinates were not reset - the cur_log will point to 7746 relay_log.get_log_file(), and not a freshly opened 7747 IO_CACHE through cache_buf. For this reason we need to 7748 deploy a my_b_seek before calling check_binlog_magic at 7749 this point of the code (see: BUG#55263 for more 7750 details). 7751 7752 NOTES: 7753 - We must keep the LOCK_log to read the 4 first bytes, as 7754 this is a hot log (same as when we call read_log_event() 7755 above: for a hot log we take the mutex). 7756 7757 - Because of scenario #4 above, we need to have a 7758 my_b_seek here. Otherwise, we might hit the assertion 7759 inside check_binlog_magic. 7760 */ 7761 7762 my_b_seek(cur_log, (my_off_t) 0); 7763 if (check_binlog_magic(cur_log,&errmsg)) 7764 { 7765 if (!hot_log) 7766 mysql_mutex_unlock(log_lock); 7767 goto err; 7768 } 7769 if (rli->alloc_inuse_relaylog(rli->linfo.log_file_name)) 7770 { 7771 if (!hot_log) 7772 mysql_mutex_unlock(log_lock); 7773 goto err; 7774 } 7775 if (!hot_log) 7776 mysql_mutex_unlock(log_lock); 7777 continue; 7778 } 7779 if (!hot_log) 7780 mysql_mutex_unlock(log_lock); 7781 /* 7782 if we get here, the log was not hot, so we will have to open it 7783 ourselves. We are sure that the log is still not hot now (a log can get 7784 from hot to cold, but not from cold to hot). No need for LOCK_log. 7785 */ 7786 // open_binlog() will check the magic header 7787 if ((rli->cur_log_fd=open_binlog(cur_log,rli->linfo.log_file_name, 7788 &errmsg)) <0) 7789 goto err; 7790 if (rli->alloc_inuse_relaylog(rli->linfo.log_file_name)) 7791 goto err; 7792 } 7793 else 7794 { 7795 /* 7796 Read failed with a non-EOF error. 7797 TODO: come up with something better to handle this error 7798 */ 7799 if (hot_log) 7800 mysql_mutex_unlock(log_lock); 7801 sql_print_error("Slave SQL thread: I/O error reading \ 7802 event(errno: %d cur_log->error: %d)", 7803 my_errno,cur_log->error); 7804 // set read position to the beginning of the event 7805 my_b_seek(cur_log,rli->event_relay_log_pos); 7806 /* otherwise, we have had a partial read */ 7807 errmsg = "Aborting slave SQL thread because of partial event read"; 7808 break; // To end of function 7809 } 7810 } 7811 if (!errmsg && global_system_variables.log_warnings) 7812 { 7813 sql_print_information("Error reading relay log event: %s", 7814 "slave SQL thread was killed"); 7815 DBUG_RETURN(0); 7816 } 7817 7818 err: 7819 if (errmsg) 7820 sql_print_error("Error reading relay log event: %s", errmsg); 7821 DBUG_RETURN(0); 7822 } 7823 #ifdef WITH_WSREP 7824 enum Log_event_type wsrep_peak_event(rpl_group_info *rgi, ulonglong* event_size) 7825 { 7826 enum Log_event_type ev_type; 7827 7828 mysql_mutex_lock(&rgi->rli->data_lock); 7829 7830 unsigned long long event_pos= rgi->event_relay_log_pos; 7831 unsigned long long orig_future_pos= rgi->future_event_relay_log_pos; 7832 unsigned long long future_pos= rgi->future_event_relay_log_pos; 7833 7834 /* scan the log to read next event and we skip 7835 annotate events. */ 7836 do { 7837 my_b_seek(rgi->rli->cur_log, future_pos); 7838 rgi->rli->event_relay_log_pos= future_pos; 7839 rgi->event_relay_log_pos= future_pos; 7840 Log_event* ev= next_event(rgi, event_size); 7841 ev_type= (ev) ? ev->get_type_code() : UNKNOWN_EVENT; 7842 delete ev; 7843 future_pos+= *event_size; 7844 } while (ev_type == ANNOTATE_ROWS_EVENT || ev_type == XID_EVENT); 7845 7846 /* scan the log back and re-set the positions to original values */ 7847 rgi->rli->event_relay_log_pos= event_pos; 7848 rgi->event_relay_log_pos= event_pos; 7849 my_b_seek(rgi->rli->cur_log, orig_future_pos); 7850 7851 mysql_mutex_unlock(&rgi->rli->data_lock); 7852 7853 return ev_type; 7854 } 7855 #endif /* WITH_WSREP */ 7856 /* 7857 Rotate a relay log (this is used only by FLUSH LOGS; the automatic rotation 7858 because of size is simpler because when we do it we already have all relevant 7859 locks; here we don't, so this function is mainly taking locks). 7860 Returns nothing as we cannot catch any error (MYSQL_BIN_LOG::new_file() 7861 is void). 7862 */ 7863 7864 int rotate_relay_log(Master_info* mi) 7865 { 7866 DBUG_ENTER("rotate_relay_log"); 7867 Relay_log_info* rli= &mi->rli; 7868 int error= 0; 7869 7870 DBUG_EXECUTE_IF("crash_before_rotate_relaylog", DBUG_SUICIDE();); 7871 7872 /* 7873 We need to test inited because otherwise, new_file() will attempt to lock 7874 LOCK_log, which may not be inited (if we're not a slave). 7875 */ 7876 if (!rli->inited) 7877 { 7878 DBUG_PRINT("info", ("rli->inited == 0")); 7879 goto end; 7880 } 7881 7882 /* If the relay log is closed, new_file() will do nothing. */ 7883 if ((error= rli->relay_log.new_file())) 7884 goto end; 7885 7886 /* 7887 We harvest now, because otherwise BIN_LOG_HEADER_SIZE will not immediately 7888 be counted, so imagine a succession of FLUSH LOGS and assume the slave 7889 threads are started: 7890 relay_log_space decreases by the size of the deleted relay log, but does 7891 not increase, so flush-after-flush we may become negative, which is wrong. 7892 Even if this will be corrected as soon as a query is replicated on the 7893 slave (because the I/O thread will then call harvest_bytes_written() which 7894 will harvest all these BIN_LOG_HEADER_SIZE we forgot), it may give strange 7895 output in SHOW SLAVE STATUS meanwhile. So we harvest now. 7896 If the log is closed, then this will just harvest the last writes, probably 7897 0 as they probably have been harvested. 7898 7899 Note that it needs to be protected by mi->data_lock. 7900 */ 7901 mysql_mutex_assert_owner(&mi->data_lock); 7902 rli->relay_log.harvest_bytes_written(&rli->log_space_total); 7903 end: 7904 DBUG_RETURN(error); 7905 } 7906 7907 7908 /** 7909 Detects, based on master's version (as found in the relay log), if master 7910 has a certain bug. 7911 @param rli Relay_log_info which tells the master's version 7912 @param bug_id Number of the bug as found in bugs.mysql.com 7913 @param report bool report error message, default TRUE 7914 7915 @param pred Predicate function that will be called with @c param to 7916 check for the bug. If the function return @c true, the bug is present, 7917 otherwise, it is not. 7918 7919 @param param State passed to @c pred function. 7920 7921 @return TRUE if master has the bug, FALSE if it does not. 7922 */ 7923 bool rpl_master_has_bug(const Relay_log_info *rli, uint bug_id, bool report, 7924 bool (*pred)(const void *), const void *param) 7925 { 7926 struct st_version_range_for_one_bug { 7927 uint bug_id; 7928 Version introduced_in; // first version with bug 7929 Version fixed_in; // first version with fix 7930 }; 7931 static struct st_version_range_for_one_bug versions_for_all_bugs[]= 7932 { 7933 {24432, { 5, 0, 24 }, { 5, 0, 38 } }, 7934 {24432, { 5, 1, 12 }, { 5, 1, 17 } }, 7935 {33029, { 5, 0, 0 }, { 5, 0, 58 } }, 7936 {33029, { 5, 1, 0 }, { 5, 1, 12 } }, 7937 {37426, { 5, 1, 0 }, { 5, 1, 26 } }, 7938 }; 7939 const Version &master_ver= 7940 rli->relay_log.description_event_for_exec->server_version_split; 7941 7942 for (uint i= 0; 7943 i < sizeof(versions_for_all_bugs)/sizeof(*versions_for_all_bugs);i++) 7944 { 7945 const Version &introduced_in= versions_for_all_bugs[i].introduced_in; 7946 const Version &fixed_in= versions_for_all_bugs[i].fixed_in; 7947 if ((versions_for_all_bugs[i].bug_id == bug_id) && 7948 introduced_in <= master_ver && 7949 fixed_in > master_ver && 7950 (pred == NULL || (*pred)(param))) 7951 { 7952 if (!report) 7953 return TRUE; 7954 // a short message for SHOW SLAVE STATUS (message length constraints) 7955 my_printf_error(ER_UNKNOWN_ERROR, "master may suffer from" 7956 " http://bugs.mysql.com/bug.php?id=%u" 7957 " so slave stops; check error log on slave" 7958 " for more info", MYF(0), bug_id); 7959 // a verbose message for the error log 7960 rli->report(ERROR_LEVEL, ER_UNKNOWN_ERROR, NULL, 7961 "According to the master's version ('%s')," 7962 " it is probable that master suffers from this bug:" 7963 " http://bugs.mysql.com/bug.php?id=%u" 7964 " and thus replicating the current binary log event" 7965 " may make the slave's data become different from the" 7966 " master's data." 7967 " To take no risk, slave refuses to replicate" 7968 " this event and stops." 7969 " We recommend that all updates be stopped on the" 7970 " master and slave, that the data of both be" 7971 " manually synchronized," 7972 " that master's binary logs be deleted," 7973 " that master be upgraded to a version at least" 7974 " equal to '%d.%d.%d'. Then replication can be" 7975 " restarted.", 7976 rli->relay_log.description_event_for_exec->server_version, 7977 bug_id, 7978 fixed_in[0], fixed_in[1], fixed_in[2]); 7979 return TRUE; 7980 } 7981 } 7982 return FALSE; 7983 } 7984 7985 /** 7986 BUG#33029, For all 5.0 up to 5.0.58 exclusive, and 5.1 up to 5.1.12 7987 exclusive, if one statement in a SP generated AUTO_INCREMENT value 7988 by the top statement, all statements after it would be considered 7989 generated AUTO_INCREMENT value by the top statement, and a 7990 erroneous INSERT_ID value might be associated with these statement, 7991 which could cause duplicate entry error and stop the slave. 7992 7993 Detect buggy master to work around. 7994 */ 7995 bool rpl_master_erroneous_autoinc(THD *thd) 7996 { 7997 if (thd->rgi_slave) 7998 { 7999 DBUG_EXECUTE_IF("simulate_bug33029", return TRUE;); 8000 return rpl_master_has_bug(thd->rgi_slave->rli, 33029, FALSE, NULL, NULL); 8001 } 8002 return FALSE; 8003 } 8004 8005 8006 static bool get_row_event_stmt_end(const char* buf, 8007 const Format_description_log_event *fdle) 8008 { 8009 uint8 const common_header_len= fdle->common_header_len; 8010 Log_event_type event_type= (Log_event_type)(uchar)buf[EVENT_TYPE_OFFSET]; 8011 8012 uint8 const post_header_len= fdle->post_header_len[event_type-1]; 8013 const char *flag_start= buf + common_header_len; 8014 /* 8015 The term 4 below signifies that master is of 'an intermediate source', see 8016 Rows_log_event::Rows_log_event. 8017 */ 8018 flag_start += RW_MAPID_OFFSET + ((post_header_len == 6) ? 4 : RW_FLAGS_OFFSET); 8019 8020 return (uint2korr(flag_start) & Rows_log_event::STMT_END_F) != 0; 8021 } 8022 8023 8024 /* 8025 Reset log event tracking data. 8026 */ 8027 8028 void Rows_event_tracker::reset() 8029 { 8030 binlog_file_name[0]= 0; 8031 first_seen= last_seen= 0; 8032 stmt_end_seen= false; 8033 } 8034 8035 8036 /* 8037 Update log event tracking data. 8038 8039 The first- and last- seen event binlog position get memorized, as 8040 well as the end-of-statement status of the last one. 8041 */ 8042 8043 void Rows_event_tracker::update(const char* file_name, my_off_t pos, 8044 const char* buf, 8045 const Format_description_log_event *fdle) 8046 { 8047 if (!first_seen) 8048 { 8049 first_seen= pos; 8050 strmake(binlog_file_name, file_name, sizeof(binlog_file_name) - 1); 8051 } 8052 last_seen= pos; 8053 DBUG_ASSERT(stmt_end_seen == 0); // We can only have one 8054 stmt_end_seen= get_row_event_stmt_end(buf, fdle); 8055 }; 8056 8057 8058 /** 8059 The function is called at next event reading 8060 after a sequence of Rows- log-events. It checks the end-of-statement status 8061 of the past sequence to report on any isssue. 8062 In the positive case the tracker gets reset. 8063 8064 @return true when the Rows- event group integrity found compromised, 8065 false otherwise. 8066 */ 8067 bool Rows_event_tracker::check_and_report(const char* file_name, 8068 my_off_t pos) 8069 { 8070 if (last_seen) 8071 { 8072 // there was at least one "block" event previously 8073 if (!stmt_end_seen) 8074 { 8075 sql_print_error("Slave IO thread did not receive an expected " 8076 "Rows-log end-of-statement for event starting " 8077 "at log '%s' position %llu " 8078 "whose last block was seen at log '%s' position %llu. " 8079 "The end-of-statement should have been delivered " 8080 "before the current one at log '%s' position %llu", 8081 binlog_file_name, first_seen, 8082 binlog_file_name, last_seen, file_name, pos); 8083 return true; 8084 } 8085 reset(); 8086 } 8087 8088 return false; 8089 } 8090 8091 /** 8092 @} (end of group Replication) 8093 */ 8094 8095 #endif /* HAVE_REPLICATION */ 8096