1 /* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software Foundation,
21 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
22
23
24 /**
25 @addtogroup Replication
26 @{
27
28 @file
29
30 @brief Code to run the io thread and the sql thread on the
31 replication slave.
32 */
33
34 #include "sql_priv.h"
35 #include "my_global.h"
36 #include "rpl_slave.h"
37 #include "sql_parse.h" // execute_init_command
38 #include "sql_table.h" // mysql_rm_table
39 #include "rpl_mi.h"
40 #include "rpl_rli.h"
41 #include "rpl_filter.h"
42 #include "rpl_info_factory.h"
43 #include "transaction.h"
44 #include <thr_alarm.h>
45 #include <my_dir.h>
46 #include <sql_common.h>
47 #include <errmsg.h>
48 #include <mysqld_error.h>
49 #include <mysys_err.h>
50 #include "rpl_handler.h"
51 #include "rpl_info_dummy.h"
52 #include <signal.h>
53 #include <mysql.h>
54 #include <myisam.h>
55
56 #include "sql_base.h" // close_thread_tables
57 #include "tztime.h" // struct Time_zone
58 #include "log_event.h" // Rotate_log_event,
59 // Create_file_log_event,
60 // Format_description_log_event
61 #include "dynamic_ids.h"
62 #include "rpl_rli_pdb.h"
63 #include "global_threads.h"
64
65 #ifdef HAVE_REPLICATION
66
67 #include "rpl_tblmap.h"
68 #include "debug_sync.h"
69
70 using std::min;
71 using std::max;
72
73 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
74
75 #define MAX_SLAVE_RETRY_PAUSE 5
76 /*
77 a parameter of sql_slave_killed() to defer the killed status
78 */
79 #define SLAVE_WAIT_GROUP_DONE 60
80 bool use_slave_mask = 0;
81 MY_BITMAP slave_error_mask;
82 char slave_skip_error_names[SHOW_VAR_FUNC_BUFF_SIZE];
83
84 static unsigned long stop_wait_timeout;
85 char* slave_load_tmpdir = 0;
86 Master_info *active_mi= 0;
87 my_bool replicate_same_server_id;
88 ulonglong relay_log_space_limit = 0;
89
90 const char *relay_log_index= 0;
91 const char *relay_log_basename= 0;
92
93 /*
94 MTS load-ballancing parameter.
95 Max length of one MTS Worker queue. The value also determines the size
96 of Relay_log_info::gaq (see @c slave_start_workers()).
97 It can be set to any value in [1, ULONG_MAX - 1] range.
98 */
99 const ulong mts_slave_worker_queue_len_max= 16384;
100
101 /*
102 Statistics go to the error log every # of seconds when --log-warnings > 1
103 */
104 const long mts_online_stat_period= 60 * 2;
105
106
107 /*
108 MTS load-ballancing parameter.
109 Time unit in microsecs to sleep by MTS Coordinator to avoid extra thread
110 signalling in the case of Worker queues are close to be filled up.
111 */
112 const ulong mts_coordinator_basic_nap= 5;
113
114 /*
115 MTS load-ballancing parameter.
116 Percent of Worker queue size at which Worker is considered to become
117 hungry.
118
119 C enqueues --+ . underrun level
120 V "
121 +----------+-+------------------+--------------+
122 | empty |.|::::::::::::::::::|xxxxxxxxxxxxxx| ---> Worker dequeues
123 +----------+-+------------------+--------------+
124
125 Like in the above diagram enqueuing to the x-d area would indicate
126 actual underrruning by Worker.
127 */
128 const ulong mts_worker_underrun_level= 10;
129
130 Slave_job_item * de_queue(Slave_jobs_queue *jobs, Slave_job_item *ret);
131 bool append_item_to_jobs(slave_job_item *job_item,
132 Slave_worker *w, Relay_log_info *rli);
133
134 /*
135 When slave thread exits, we need to remember the temporary tables so we
136 can re-use them on slave start.
137
138 TODO: move the vars below under Master_info
139 */
140
141 int disconnect_slave_event_count = 0, abort_slave_event_count = 0;
142
143 static pthread_key(Master_info*, RPL_MASTER_INFO);
144
145 enum enum_slave_reconnect_actions
146 {
147 SLAVE_RECON_ACT_REG= 0,
148 SLAVE_RECON_ACT_DUMP= 1,
149 SLAVE_RECON_ACT_EVENT= 2,
150 SLAVE_RECON_ACT_MAX
151 };
152
153 enum enum_slave_reconnect_messages
154 {
155 SLAVE_RECON_MSG_WAIT= 0,
156 SLAVE_RECON_MSG_KILLED_WAITING= 1,
157 SLAVE_RECON_MSG_AFTER= 2,
158 SLAVE_RECON_MSG_FAILED= 3,
159 SLAVE_RECON_MSG_COMMAND= 4,
160 SLAVE_RECON_MSG_KILLED_AFTER= 5,
161 SLAVE_RECON_MSG_MAX
162 };
163
164 static const char *reconnect_messages[SLAVE_RECON_ACT_MAX][SLAVE_RECON_MSG_MAX]=
165 {
166 {
167 "Waiting to reconnect after a failed registration on master",
168 "Slave I/O thread killed while waitnig to reconnect after a failed \
169 registration on master",
170 "Reconnecting after a failed registration on master",
171 "failed registering on master, reconnecting to try again, \
172 log '%s' at position %s",
173 "COM_REGISTER_SLAVE",
174 "Slave I/O thread killed during or after reconnect"
175 },
176 {
177 "Waiting to reconnect after a failed binlog dump request",
178 "Slave I/O thread killed while retrying master dump",
179 "Reconnecting after a failed binlog dump request",
180 "failed dump request, reconnecting to try again, log '%s' at position %s",
181 "COM_BINLOG_DUMP",
182 "Slave I/O thread killed during or after reconnect"
183 },
184 {
185 "Waiting to reconnect after a failed master event read",
186 "Slave I/O thread killed while waiting to reconnect after a failed read",
187 "Reconnecting after a failed master event read",
188 "Slave I/O thread: Failed reading log event, reconnecting to retry, \
189 log '%s' at position %s",
190 "",
191 "Slave I/O thread killed during or after a reconnect done to recover from \
192 failed read"
193 }
194 };
195
196 enum enum_slave_apply_event_and_update_pos_retval
197 {
198 SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK= 0,
199 SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPLY_ERROR= 1,
200 SLAVE_APPLY_EVENT_AND_UPDATE_POS_UPDATE_POS_ERROR= 2,
201 SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPEND_JOB_ERROR= 3,
202 SLAVE_APPLY_EVENT_AND_UPDATE_POS_MAX
203 };
204
205
206 static int process_io_rotate(Master_info* mi, Rotate_log_event* rev);
207 static int process_io_create_file(Master_info* mi, Create_file_log_event* cev);
208 static bool wait_for_relay_log_space(Relay_log_info* rli);
209 static inline bool io_slave_killed(THD* thd,Master_info* mi);
210 static inline bool sql_slave_killed(THD* thd,Relay_log_info* rli);
211 static inline bool is_autocommit_off_and_infotables(THD* thd);
212 static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type);
213 static void print_slave_skip_errors(void);
214 static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi);
215 static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
216 bool suppress_warnings);
217 static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
218 bool reconnect, bool suppress_warnings);
219 static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi);
220 static int get_master_uuid(MYSQL *mysql, Master_info *mi);
221 int io_thread_init_commands(MYSQL *mysql, Master_info *mi);
222 static Log_event* next_event(Relay_log_info* rli);
223 static int queue_event(Master_info* mi,const char* buf,ulong event_len);
224 static void set_stop_slave_wait_timeout(unsigned long wait_timeout);
225 static int terminate_slave_thread(THD *thd,
226 mysql_mutex_t *term_lock,
227 mysql_cond_t *term_cond,
228 volatile uint *slave_running,
229 bool need_lock_term);
230 static bool check_io_slave_killed(THD *thd, Master_info *mi, const char *info);
231 int slave_worker_exec_job(Slave_worker * w, Relay_log_info *rli);
232 static int mts_event_coord_cmp(LOG_POS_COORD *id1, LOG_POS_COORD *id2);
233 /*
234 Function to set the slave's max_allowed_packet based on the value
235 of slave_max_allowed_packet.
236
237 @in_param thd Thread handler for slave
238 @in_param mysql MySQL connection handle
239 */
240
set_slave_max_allowed_packet(THD * thd,MYSQL * mysql)241 static void set_slave_max_allowed_packet(THD *thd, MYSQL *mysql)
242 {
243 DBUG_ENTER("set_slave_max_allowed_packet");
244 // thd and mysql must be valid
245 DBUG_ASSERT(thd && mysql);
246
247 thd->variables.max_allowed_packet= slave_max_allowed_packet;
248 thd->net.max_packet_size= slave_max_allowed_packet;
249 /*
250 Adding MAX_LOG_EVENT_HEADER_LEN to the max_packet_size on the I/O
251 thread and the mysql->option max_allowed_packet, since a
252 replication event can become this much larger than
253 the corresponding packet (query) sent from client to master.
254 */
255 thd->net.max_packet_size+= MAX_LOG_EVENT_HEADER;
256 /*
257 Skipping the setting of mysql->net.max_packet size to slave
258 max_allowed_packet since this is done during mysql_real_connect.
259 */
260 mysql->options.max_allowed_packet=
261 slave_max_allowed_packet+MAX_LOG_EVENT_HEADER;
262 DBUG_VOID_RETURN;
263 }
264
265 /*
266 Find out which replications threads are running
267
268 SYNOPSIS
269 init_thread_mask()
270 mask Return value here
271 mi master_info for slave
272 inverse If set, returns which threads are not running
273
274 IMPLEMENTATION
275 Get a bit mask for which threads are running so that we can later restart
276 these threads.
277
278 RETURN
279 mask If inverse == 0, running threads
280 If inverse == 1, stopped threads
281 */
282
init_thread_mask(int * mask,Master_info * mi,bool inverse)283 void init_thread_mask(int* mask, Master_info* mi, bool inverse)
284 {
285 bool set_io = mi->slave_running, set_sql = mi->rli->slave_running;
286 int tmp_mask=0;
287 DBUG_ENTER("init_thread_mask");
288
289 if (set_io)
290 tmp_mask |= SLAVE_IO;
291 if (set_sql)
292 tmp_mask |= SLAVE_SQL;
293 if (inverse)
294 tmp_mask^= (SLAVE_IO | SLAVE_SQL);
295 *mask = tmp_mask;
296 DBUG_VOID_RETURN;
297 }
298
299
300 /*
301 lock_slave_threads()
302 */
303
lock_slave_threads(Master_info * mi)304 void lock_slave_threads(Master_info* mi)
305 {
306 DBUG_ENTER("lock_slave_threads");
307
308 //TODO: see if we can do this without dual mutex
309 mysql_mutex_lock(&mi->run_lock);
310 mysql_mutex_lock(&mi->rli->run_lock);
311 DBUG_VOID_RETURN;
312 }
313
314
315 /*
316 unlock_slave_threads()
317 */
318
unlock_slave_threads(Master_info * mi)319 void unlock_slave_threads(Master_info* mi)
320 {
321 DBUG_ENTER("unlock_slave_threads");
322
323 //TODO: see if we can do this without dual mutex
324 mysql_mutex_unlock(&mi->rli->run_lock);
325 mysql_mutex_unlock(&mi->run_lock);
326 DBUG_VOID_RETURN;
327 }
328
329 #ifdef HAVE_PSI_INTERFACE
330 static PSI_thread_key key_thread_slave_io, key_thread_slave_sql, key_thread_slave_worker;
331
332 static PSI_thread_info all_slave_threads[]=
333 {
334 { &key_thread_slave_io, "slave_io", PSI_FLAG_GLOBAL},
335 { &key_thread_slave_sql, "slave_sql", PSI_FLAG_GLOBAL},
336 { &key_thread_slave_worker, "slave_worker", PSI_FLAG_GLOBAL}
337 };
338
init_slave_psi_keys(void)339 static void init_slave_psi_keys(void)
340 {
341 const char* category= "sql";
342 int count;
343
344 count= array_elements(all_slave_threads);
345 mysql_thread_register(category, all_slave_threads, count);
346 }
347 #endif /* HAVE_PSI_INTERFACE */
348
349 /* Initialize slave structures */
350
init_slave()351 int init_slave()
352 {
353 DBUG_ENTER("init_slave");
354 int error= 0;
355 int thread_mask= SLAVE_SQL | SLAVE_IO;
356 Relay_log_info* rli= NULL;
357
358 #ifdef HAVE_PSI_INTERFACE
359 init_slave_psi_keys();
360 #endif
361
362 /*
363 This is called when mysqld starts. Before client connections are
364 accepted. However bootstrap may conflict with us if it does START SLAVE.
365 So it's safer to take the lock.
366 */
367 mysql_mutex_lock(&LOCK_active_mi);
368
369 if (pthread_key_create(&RPL_MASTER_INFO, NULL))
370 DBUG_RETURN(1);
371
372 if ((error= Rpl_info_factory::create_coordinators(opt_mi_repository_id, &active_mi,
373 opt_rli_repository_id, &rli)))
374 {
375 sql_print_error("Failed to create or recover replication info repository.");
376 error= 1;
377 goto err;
378 }
379
380 /*
381 This is the startup routine and as such we try to
382 configure both the SLAVE_SQL and SLAVE_IO.
383 */
384 if (global_init_info(active_mi, true, thread_mask))
385 {
386 sql_print_error("Failed to initialize the master info structure");
387 error= 1;
388 goto err;
389 }
390
391 DBUG_PRINT("info", ("init group master %s %lu group relay %s %lu event %s %lu\n",
392 rli->get_group_master_log_name(),
393 (ulong) rli->get_group_master_log_pos(),
394 rli->get_group_relay_log_name(),
395 (ulong) rli->get_group_relay_log_pos(),
396 rli->get_event_relay_log_name(),
397 (ulong) rli->get_event_relay_log_pos()));
398
399 /* If server id is not set, start_slave_thread() will say it */
400 if (active_mi->host[0] && !opt_skip_slave_start)
401 {
402 /* same as in start_slave() cache the global var values into rli's members */
403 active_mi->rli->opt_slave_parallel_workers= opt_mts_slave_parallel_workers;
404 active_mi->rli->checkpoint_group= opt_mts_checkpoint_group;
405 if (start_slave_threads(true/*need_lock_slave=true*/,
406 false/*wait_for_start=false*/,
407 active_mi,
408 thread_mask))
409 {
410 sql_print_error("Failed to create slave threads");
411 error= 1;
412 goto err;
413 }
414 }
415
416 err:
417 mysql_mutex_unlock(&LOCK_active_mi);
418 if (error)
419 sql_print_information("Check error log for additional messages. "
420 "You will not be able to start replication until "
421 "the issue is resolved and the server restarted.");
422 DBUG_RETURN(error);
423 }
424
425 /**
426 Parse the given relay log and identify the rotate event from the master.
427 Ignore the Format description event, Previous_gtid log event and ignorable
428 events within the relay log. When a rotate event is found check if it is a
429 rotate that is originated from the master or not based on the server_id. If
430 the rotate is from slave or if it is a fake rotate event ignore the event.
431 If any other events are encountered apart from the above events generate an
432 error. From the rotate event extract the master's binary log name and
433 position.
434
435 @param filename
436 Relay log name which needs to be parsed.
437
438 @param[OUT] master_log_file
439 Set the master_log_file to the log file name that is extracted from
440 rotate event. The master_log_file should contain string of len
441 FN_REFLEN.
442
443 @param[OUT] master_log_pos
444 Set the master_log_pos to the log position extracted from rotate
445 event.
446
447 @retval FOUND_ROTATE: When rotate event is found in the relay log
448 @retval NOT_FOUND_ROTATE: When rotate event is not found in the relay log
449 @retval ERROR: On error
450 */
451 enum enum_read_rotate_from_relay_log_status
452 { FOUND_ROTATE, NOT_FOUND_ROTATE, ERROR };
453
454 static enum_read_rotate_from_relay_log_status
read_rotate_from_relay_log(char * filename,char * master_log_file,my_off_t * master_log_pos)455 read_rotate_from_relay_log(char *filename, char *master_log_file,
456 my_off_t *master_log_pos)
457 {
458 DBUG_ENTER("read_rotate_from_relay_log");
459 /*
460 Create a Format_description_log_event that is used to read the
461 first event of the log.
462 */
463 Format_description_log_event fd_ev(BINLOG_VERSION), *fd_ev_p= &fd_ev;
464 DBUG_ASSERT(fd_ev.is_valid());
465 IO_CACHE log;
466 const char *errmsg= NULL;
467 File file= open_binlog_file(&log, filename, &errmsg);
468 if (file < 0)
469 {
470 sql_print_error("Error during --relay-log-recovery: %s", errmsg);
471 DBUG_RETURN(ERROR);
472 }
473 my_b_seek(&log, BIN_LOG_HEADER_SIZE);
474 Log_event *ev= NULL;
475 bool done= false;
476 enum_read_rotate_from_relay_log_status ret= NOT_FOUND_ROTATE;
477 while (!done &&
478 (ev= Log_event::read_log_event(&log, 0, fd_ev_p, opt_slave_sql_verify_checksum)) !=
479 NULL)
480 {
481 DBUG_PRINT("info", ("Read event of type %s", ev->get_type_str()));
482 switch (ev->get_type_code())
483 {
484 case FORMAT_DESCRIPTION_EVENT:
485 if (fd_ev_p != &fd_ev)
486 delete fd_ev_p;
487 fd_ev_p= (Format_description_log_event *)ev;
488 break;
489 case ROTATE_EVENT:
490 /*
491 Check for rotate event from the master. Ignore the ROTATE event if it
492 is a fake rotate event with server_id=0.
493 */
494 if (ev->server_id && ev->server_id != ::server_id)
495 {
496 Rotate_log_event *rotate_ev= (Rotate_log_event *)ev;
497 DBUG_ASSERT(FN_REFLEN >= rotate_ev->ident_len + 1);
498 memcpy(master_log_file, rotate_ev->new_log_ident, rotate_ev->ident_len + 1);
499 *master_log_pos= rotate_ev->pos;
500 ret= FOUND_ROTATE;
501 done= true;
502 }
503 break;
504 case PREVIOUS_GTIDS_LOG_EVENT:
505 break;
506 case IGNORABLE_LOG_EVENT:
507 break;
508 default:
509 sql_print_error("Error during --relay-log-recovery: Could not locate "
510 "rotate event from the master.");
511 ret= ERROR;
512 done= true;
513 break;
514 }
515 if (ev != fd_ev_p)
516 delete ev;
517 }
518 if (log.error < 0)
519 {
520 sql_print_error("Error during --relay-log-recovery: Error reading events from relay log: %d",
521 log.error);
522 DBUG_RETURN(ERROR);
523 }
524
525 if (fd_ev_p != &fd_ev)
526 {
527 delete fd_ev_p;
528 fd_ev_p= &fd_ev;
529 }
530
531 if (mysql_file_close(file, MYF(MY_WME)))
532 DBUG_RETURN(ERROR);
533 if (end_io_cache(&log))
534 {
535 sql_print_error("Error during --relay-log-recovery: Error while freeing "
536 "IO_CACHE object");
537 DBUG_RETURN(ERROR);
538 }
539 DBUG_RETURN(ret);
540 }
541
542 /**
543 Reads relay logs one by one starting from the first relay log. Looks for
544 the first rotate event from the master. If rotate is not found in the relay
545 log search continues to next relay log. If rotate event from master is
546 found then the extracted master_log_file and master_log_pos are used to set
547 rli->group_master_log_name and rli->group_master_log_pos. If an error has
548 occurred the error code is retuned back.
549
550 @param rli
551 Relay_log_info object to read relay log files and to set
552 group_master_log_name and group_master_log_pos.
553
554 @retval 0 On success
555 @retval 1 On failure
556 */
557 static int
find_first_relay_log_with_rotate_from_master(Relay_log_info * rli)558 find_first_relay_log_with_rotate_from_master(Relay_log_info* rli)
559 {
560 DBUG_ENTER("find_first_relay_log_with_rotate_from_master");
561 int error= 0;
562 LOG_INFO linfo;
563 bool got_rotate_from_master= false;
564 int pos;
565 char master_log_file[FN_REFLEN];
566 my_off_t master_log_pos= 0;
567
568 for (pos= rli->relay_log.find_log_pos(&linfo, NULL, true);
569 !pos;
570 pos= rli->relay_log.find_next_log(&linfo, true))
571 {
572 switch (read_rotate_from_relay_log(linfo.log_file_name, master_log_file,
573 &master_log_pos))
574 {
575 case ERROR:
576 error= 1;
577 break;
578 case FOUND_ROTATE:
579 got_rotate_from_master= true;
580 break;
581 case NOT_FOUND_ROTATE:
582 break;
583 }
584 if (error || got_rotate_from_master)
585 break;
586 }
587 if (pos== LOG_INFO_IO)
588 {
589 error= 1;
590 sql_print_error("Error during --relay-log-recovery: Could not read "
591 "relay log index file due to an IO error.");
592 goto err;
593 }
594 if (pos== LOG_INFO_EOF)
595 {
596 error= 1;
597 sql_print_error("Error during --relay-log-recovery: Could not locate "
598 "rotate event from master in relay log file.");
599 goto err;
600 }
601 if (!error && got_rotate_from_master)
602 {
603 rli->set_group_master_log_name(master_log_file);
604 rli->set_group_master_log_pos(master_log_pos);
605 }
606 err:
607 DBUG_RETURN(error);
608 }
609
610 /*
611 Updates the master info based on the information stored in the
612 relay info and ignores relay logs previously retrieved by the IO
613 thread, which thus starts fetching again based on to the
614 master_log_pos and master_log_name. Eventually, the old
615 relay logs will be purged by the normal purge mechanism.
616
617 When GTID's are enabled the "Retrieved GTID" set should be cleared
618 so that partial read events are discarded and they are
619 fetched once again
620
621 @param mi pointer to Master_info instance
622 */
recover_relay_log(Master_info * mi)623 static void recover_relay_log(Master_info *mi)
624 {
625 Relay_log_info *rli=mi->rli;
626 // Set Receiver Thread's positions as per the recovered Applier Thread.
627 mi->set_master_log_pos(max<ulonglong>(BIN_LOG_HEADER_SIZE,
628 rli->get_group_master_log_pos()));
629 mi->set_master_log_name(rli->get_group_master_log_name());
630
631 sql_print_warning("Recovery from master pos %ld and file %s. "
632 "Previous relay log pos and relay log file had "
633 "been set to %lld, %s respectively.",
634 (ulong) mi->get_master_log_pos(), mi->get_master_log_name(),
635 rli->get_group_relay_log_pos(), rli->get_group_relay_log_name());
636
637 // Start with a fresh relay log.
638 rli->set_group_relay_log_name(rli->relay_log.get_log_fname());
639 rli->set_event_relay_log_name(rli->relay_log.get_log_fname());
640 rli->set_group_relay_log_pos(BIN_LOG_HEADER_SIZE);
641 rli->set_event_relay_log_pos(BIN_LOG_HEADER_SIZE);
642 /*
643 Clear the retrieved GTID set so that events that are written partially
644 will be fetched again.
645 */
646 if (gtid_mode == GTID_MODE_ON)
647 {
648 global_sid_lock->wrlock();
649 (const_cast<Gtid_set *>(rli->get_gtid_set()))->clear();
650 global_sid_lock->unlock();
651 }
652 }
653
654
655 /*
656 Updates the master info based on the information stored in the
657 relay info and ignores relay logs previously retrieved by the IO
658 thread, which thus starts fetching again based on to the
659 master_log_pos and master_log_name. Eventually, the old
660 relay logs will be purged by the normal purge mechanism.
661
662 There can be a special case where rli->group_master_log_name and
663 rli->group_master_log_pos are not intialized, as the sql thread was never
664 started at all. In those cases all the existing relay logs are parsed
665 starting from the first one and the initial rotate event that was received
666 from the master is identified. From the rotate event master_log_name and
667 master_log_pos are extracted and they are set to rli->group_master_log_name
668 and rli->group_master_log_pos.
669
670 In the feature, we should improve this routine in order to avoid throwing
671 away logs that are safely stored in the disk. Note also that this recovery
672 routine relies on the correctness of the relay-log.info and only tolerates
673 coordinate problems in master.info.
674
675 In this function, there is no need for a mutex as the caller
676 (i.e. init_slave) already has one acquired.
677
678 Specifically, the following structures are updated:
679
680 1 - mi->master_log_pos <-- rli->group_master_log_pos
681 2 - mi->master_log_name <-- rli->group_master_log_name
682 3 - It moves the relay log to the new relay log file, by
683 rli->group_relay_log_pos <-- BIN_LOG_HEADER_SIZE;
684 rli->event_relay_log_pos <-- BIN_LOG_HEADER_SIZE;
685 rli->group_relay_log_name <-- rli->relay_log.get_log_fname();
686 rli->event_relay_log_name <-- rli->relay_log.get_log_fname();
687
688 If there is an error, it returns (1), otherwise returns (0).
689 */
init_recovery(Master_info * mi,const char ** errmsg)690 int init_recovery(Master_info* mi, const char** errmsg)
691 {
692 DBUG_ENTER("init_recovery");
693
694 int error= 0;
695 Relay_log_info *rli= mi->rli;
696 char *group_master_log_name= NULL;
697
698 if (rli->recovery_parallel_workers)
699 {
700 /*
701 This is not idempotent and a crash after this function and before
702 the recovery is actually done may lead the system to an inconsistent
703 state.
704
705 This may happen because the gap is not persitent stored anywhere
706 and eventually old relay log files will be removed and further
707 calculations on the gaps will be impossible.
708
709 We need to improve this. /Alfranio.
710 */
711 error= mts_recovery_groups(rli);
712 if (rli->mts_recovery_group_cnt)
713 {
714 if (gtid_mode == GTID_MODE_ON)
715 {
716 rli->recovery_parallel_workers= 0;
717 rli->clear_mts_recovery_groups();
718 }
719 else
720 DBUG_RETURN(error);
721 }
722 }
723
724 group_master_log_name= const_cast<char *>(rli->get_group_master_log_name());
725 if (!error)
726 {
727 if (!group_master_log_name[0])
728 {
729 if (rli->replicate_same_server_id)
730 {
731 error= 1;
732 sql_print_error("Error during --relay-log-recovery: "
733 "replicate_same_server_id is in use and sql thread's "
734 "positions are not initialized, hence relay log "
735 "recovery cannot happen.");
736 DBUG_RETURN(error);
737 }
738 error= find_first_relay_log_with_rotate_from_master(rli);
739 if (error)
740 DBUG_RETURN(error);
741 }
742 recover_relay_log(mi);
743 }
744 DBUG_RETURN(error);
745 }
746
747 /*
748 Relay log recovery in the case of MTS, is handled by the following function.
749 Gaps in MTS execution are filled using implicit execution of
750 START SLAVE UNTIL SQL_AFTER_MTS_GAPS call. Once slave reaches a consistent
751 gapless state receiver thread's positions are initialized to applier thread's
752 positions and the old relay logs are discarded. This completes the recovery
753 process.
754
755 @param mi pointer to Master_info instance.
756
757 @retval 0 success
758 @retval 1 error
759 */
fill_mts_gaps_and_recover(Master_info * mi)760 static inline int fill_mts_gaps_and_recover(Master_info* mi)
761 {
762 DBUG_ENTER("fill_mts_gaps_and_recover");
763 Relay_log_info *rli= mi->rli;
764 int recovery_error= 0;
765 rli->is_relay_log_recovery= FALSE;
766 rli->until_condition= Relay_log_info::UNTIL_SQL_AFTER_MTS_GAPS;
767 rli->opt_slave_parallel_workers= rli->recovery_parallel_workers;
768 sql_print_information("MTS recovery: starting coordinator thread to fill MTS "
769 "gaps.");
770 recovery_error= start_slave_thread(
771 #ifdef HAVE_PSI_INTERFACE
772 key_thread_slave_sql,
773 #endif
774 handle_slave_sql, &rli->run_lock,
775 &rli->run_lock,
776 &rli->start_cond,
777 &rli->slave_running,
778 &rli->slave_run_id,
779 mi);
780
781 if (recovery_error)
782 {
783 sql_print_warning("MTS recovery: failed to start the coordinator "
784 "thread. Check the error log for additional"
785 " details.");
786 goto err;
787 }
788 mysql_mutex_lock(&rli->run_lock);
789 mysql_cond_wait(&rli->stop_cond, &rli->run_lock);
790 mysql_mutex_unlock(&rli->run_lock);
791 if (rli->until_condition != Relay_log_info::UNTIL_DONE)
792 {
793 sql_print_warning("MTS recovery: automatic recovery failed. Either the "
794 "slave server had stopped due to an error during an "
795 "earlier session or relay logs are corrupted."
796 "Fix the cause of the slave side error and restart the "
797 "slave server or consider using RESET SLAVE.");
798 goto err;
799 }
800
801 /*
802 We need a mutex while we are changing master info parameters to
803 keep other threads from reading bogus info
804 */
805 mysql_mutex_lock(&mi->data_lock);
806 mysql_mutex_lock(&rli->data_lock);
807 recover_relay_log(mi);
808
809 const char* msg;
810 if (rli->init_relay_log_pos(rli->get_group_relay_log_name(),
811 rli->get_group_relay_log_pos(),
812 false/*need_data_lock=false*/,
813 &msg, 0))
814 {
815 char llbuf[22];
816 sql_print_error("Failed to open the relay log '%s' (relay_log_pos %s).",
817 rli->get_group_relay_log_name(),
818 llstr(rli->get_group_relay_log_pos(), llbuf));
819
820 recovery_error=1;
821 mysql_mutex_unlock(&mi->data_lock);
822 mysql_mutex_unlock(&rli->data_lock);
823 goto err;
824 }
825 if (mi->flush_info(true) || rli->flush_info(true))
826 {
827 recovery_error= 1;
828 mysql_mutex_unlock(&mi->data_lock);
829 mysql_mutex_unlock(&rli->data_lock);
830 goto err;
831 }
832 rli->inited=1;
833 rli->error_on_rli_init_info= false;
834 mysql_mutex_unlock(&mi->data_lock);
835 mysql_mutex_unlock(&rli->data_lock);
836 sql_print_information("MTS recovery: completed successfully.\n");
837 DBUG_RETURN(recovery_error);
838 err:
839 /*
840 If recovery failed means we failed to initialize rli object in the case
841 of MTS. We should not allow the START SLAVE command to work as we do in
842 the case of STS. i.e if init_recovery call fails then we set inited=0.
843 */
844 rli->end_info();
845 rli->inited=0;
846 rli->error_on_rli_init_info= true;
847 DBUG_RETURN(recovery_error);
848 }
849
global_init_info(Master_info * mi,bool ignore_if_no_info,int thread_mask)850 int global_init_info(Master_info* mi, bool ignore_if_no_info, int thread_mask)
851 {
852 DBUG_ENTER("init_info");
853 DBUG_ASSERT(mi != NULL && mi->rli != NULL);
854 int init_error= 0;
855 enum_return_check check_return= ERROR_CHECKING_REPOSITORY;
856 THD *thd= current_thd;
857 bool binlog_prot_acquired= false;
858
859 if (thd && !thd->backup_binlog_lock.is_acquired())
860 {
861 const ulong timeout= thd->variables.lock_wait_timeout;
862
863 DBUG_PRINT("debug", ("Acquiring binlog protection lock"));
864 mysql_mutex_assert_not_owner(&mi->rli->data_lock);
865
866 if (thd->backup_binlog_lock.acquire_protection(thd, MDL_EXPLICIT,
867 timeout))
868 DBUG_RETURN(1);
869
870 binlog_prot_acquired= true;
871 }
872
873 /*
874 We need a mutex while we are changing master info parameters to
875 keep other threads from reading bogus info
876 */
877 mysql_mutex_lock(&mi->data_lock);
878 mysql_mutex_lock(&mi->rli->data_lock);
879
880 /*
881 When info tables are used and autocommit= 0 we force a new
882 transaction start to avoid table access deadlocks when START SLAVE
883 is executed after RESET SLAVE.
884 */
885 if (is_autocommit_off_and_infotables(thd))
886 {
887 if (trans_begin(thd))
888 {
889 init_error= 1;
890 goto end;
891 }
892 }
893
894 /*
895 This takes care of the startup dependency between the master_info
896 and relay_info. It initializes the master info if the SLAVE_IO
897 thread is being started and the relay log info if either the
898 SLAVE_SQL thread is being started or was not initialized as it is
899 required by the SLAVE_IO thread.
900 */
901 check_return= mi->check_info();
902 if (check_return == ERROR_CHECKING_REPOSITORY)
903 {
904 init_error= 1;
905 goto end;
906 }
907
908 if (!(ignore_if_no_info && check_return == REPOSITORY_DOES_NOT_EXIST))
909 {
910 if ((thread_mask & SLAVE_IO) != 0 && mi->mi_init_info())
911 init_error= 1;
912 }
913
914 check_return= mi->rli->check_info();
915 if (check_return == ERROR_CHECKING_REPOSITORY)
916 {
917 init_error= 1;
918 goto end;
919 }
920 if (!(ignore_if_no_info && check_return == REPOSITORY_DOES_NOT_EXIST))
921 {
922 if (((thread_mask & SLAVE_SQL) != 0 || !(mi->rli->inited))
923 && mi->rli->rli_init_info())
924 init_error= 1;
925 }
926
927 DBUG_EXECUTE_IF("enable_mts_worker_failure_init",
928 {DBUG_SET("+d,mts_worker_thread_init_fails");});
929 end:
930 /*
931 When info tables are used and autocommit= 0 we force transaction
932 commit to avoid table access deadlocks when START SLAVE is executed
933 after RESET SLAVE.
934 */
935 if (is_autocommit_off_and_infotables(thd))
936 if (trans_commit(thd))
937 init_error= 1;
938
939 mysql_mutex_unlock(&mi->rli->data_lock);
940 mysql_mutex_unlock(&mi->data_lock);
941
942 /*
943 Handling MTS Relay-log recovery after successful initialization of mi and
944 rli objects.
945
946 MTS Relay-log recovery is handled by SSUG command. In order to start the
947 slave applier thread rli needs to be inited and mi->rli->data_lock should
948 be in released state. Hence we do the MTS recovery at this point of time
949 where both conditions are satisfied.
950 */
951 if (!init_error && mi->rli->is_relay_log_recovery
952 && mi->rli->mts_recovery_group_cnt)
953 init_error= fill_mts_gaps_and_recover(mi);
954
955 if (binlog_prot_acquired)
956 {
957 DBUG_PRINT("debug", ("Releasing binlog protection lock"));
958 thd->backup_binlog_lock.release_protection(thd);
959 }
960
961 DBUG_RETURN(init_error);
962 }
963
end_info(Master_info * mi)964 void end_info(Master_info* mi)
965 {
966 DBUG_ENTER("end_info");
967 DBUG_ASSERT(mi != NULL && mi->rli != NULL);
968
969 /*
970 The previous implementation was not acquiring locks. We do the same here.
971 However, this is quite strange.
972 */
973 mi->end_info();
974 mi->rli->end_info();
975
976 DBUG_VOID_RETURN;
977 }
978
remove_info(Master_info * mi)979 int remove_info(Master_info* mi)
980 {
981 int error= 1;
982 DBUG_ENTER("remove_info");
983 DBUG_ASSERT(mi != NULL && mi->rli != NULL);
984
985 /*
986 The previous implementation was not acquiring locks.
987 We do the same here. However, this is quite strange.
988 */
989 /*
990 Reset errors (the idea is that we forget about the
991 old master).
992 */
993 mi->clear_error();
994 mi->rli->clear_error();
995 mi->rli->clear_until_condition();
996 mi->rli->clear_sql_delay();
997
998 mi->end_info();
999 mi->rli->end_info();
1000
1001 if (mi->remove_info() || Rpl_info_factory::reset_workers(mi->rli) ||
1002 mi->rli->remove_info())
1003 goto err;
1004
1005 error= 0;
1006
1007 err:
1008 DBUG_RETURN(error);
1009 }
1010
flush_master_info(Master_info * mi,bool force)1011 int flush_master_info(Master_info* mi, bool force)
1012 {
1013 DBUG_ENTER("flush_master_info");
1014 DBUG_ASSERT(mi != NULL && mi->rli != NULL);
1015 /*
1016 The previous implementation was not acquiring locks.
1017 We do the same here. However, this is quite strange.
1018 */
1019 /*
1020 With the appropriate recovery process, we will not need to flush
1021 the content of the current log.
1022
1023 For now, we flush the relay log BEFORE the master.info file, because
1024 if we crash, we will get a duplicate event in the relay log at restart.
1025 If we change the order, there might be missing events.
1026
1027 If we don't do this and the slave server dies when the relay log has
1028 some parts (its last kilobytes) in memory only, with, say, from master's
1029 position 100 to 150 in memory only (not on disk), and with position 150
1030 in master.info, there will be missing information. When the slave restarts,
1031 the I/O thread will fetch binlogs from 150, so in the relay log we will
1032 have "[0, 100] U [150, infinity[" and nobody will notice it, so the SQL
1033 thread will jump from 100 to 150, and replication will silently break.
1034 */
1035 mysql_mutex_t *log_lock= mi->rli->relay_log.get_log_lock();
1036
1037 mysql_mutex_lock(log_lock);
1038
1039 int err= (mi->rli->flush_current_log() ||
1040 mi->flush_info(force));
1041
1042 mysql_mutex_unlock(log_lock);
1043
1044 DBUG_RETURN (err);
1045 }
1046
1047 /**
1048 Convert slave skip errors bitmap into a printable string.
1049 */
1050
print_slave_skip_errors(void)1051 static void print_slave_skip_errors(void)
1052 {
1053 /*
1054 To be safe, we want 10 characters of room in the buffer for a number
1055 plus terminators. Also, we need some space for constant strings.
1056 10 characters must be sufficient for a number plus {',' | '...'}
1057 plus a NUL terminator. That is a max 6 digit number.
1058 */
1059 const size_t MIN_ROOM= 10;
1060 DBUG_ENTER("print_slave_skip_errors");
1061 DBUG_ASSERT(sizeof(slave_skip_error_names) > MIN_ROOM);
1062 DBUG_ASSERT(MAX_SLAVE_ERROR <= 999999); // 6 digits
1063
1064 if (!use_slave_mask || bitmap_is_clear_all(&slave_error_mask))
1065 {
1066 /* purecov: begin tested */
1067 memcpy(slave_skip_error_names, STRING_WITH_LEN("OFF"));
1068 /* purecov: end */
1069 }
1070 else if (bitmap_is_set_all(&slave_error_mask))
1071 {
1072 /* purecov: begin tested */
1073 memcpy(slave_skip_error_names, STRING_WITH_LEN("ALL"));
1074 /* purecov: end */
1075 }
1076 else
1077 {
1078 char *buff= slave_skip_error_names;
1079 char *bend= buff + sizeof(slave_skip_error_names);
1080 int errnum;
1081
1082 for (errnum= 0; errnum < MAX_SLAVE_ERROR; errnum++)
1083 {
1084 if (bitmap_is_set(&slave_error_mask, errnum))
1085 {
1086 if (buff + MIN_ROOM >= bend)
1087 break; /* purecov: tested */
1088 buff= int10_to_str(errnum, buff, 10);
1089 *buff++= ',';
1090 }
1091 }
1092 if (buff != slave_skip_error_names)
1093 buff--; // Remove last ','
1094 if (errnum < MAX_SLAVE_ERROR)
1095 {
1096 /* Couldn't show all errors */
1097 buff= strmov(buff, "..."); /* purecov: tested */
1098 }
1099 *buff=0;
1100 }
1101 DBUG_PRINT("init", ("error_names: '%s'", slave_skip_error_names));
1102 DBUG_VOID_RETURN;
1103 }
1104
set_stop_slave_wait_timeout(unsigned long wait_timeout)1105 static void set_stop_slave_wait_timeout(unsigned long wait_timeout) {
1106 stop_wait_timeout = wait_timeout;
1107 }
1108
1109 /**
1110 Change arg to the string with the nice, human-readable skip error values.
1111 @param slave_skip_errors_ptr
1112 The pointer to be changed
1113 */
set_slave_skip_errors(char ** slave_skip_errors_ptr)1114 void set_slave_skip_errors(char** slave_skip_errors_ptr)
1115 {
1116 DBUG_ENTER("set_slave_skip_errors");
1117 print_slave_skip_errors();
1118 *slave_skip_errors_ptr= slave_skip_error_names;
1119 DBUG_VOID_RETURN;
1120 }
1121
1122 /**
1123 Init function to set up array for errors that should be skipped for slave
1124 */
init_slave_skip_errors()1125 static void init_slave_skip_errors()
1126 {
1127 DBUG_ENTER("init_slave_skip_errors");
1128 DBUG_ASSERT(!use_slave_mask); // not already initialized
1129
1130 if (bitmap_init(&slave_error_mask,0,MAX_SLAVE_ERROR,0))
1131 {
1132 fprintf(stderr, "Badly out of memory, please check your system status\n");
1133 exit(1);
1134 }
1135 use_slave_mask = 1;
1136 DBUG_VOID_RETURN;
1137 }
1138
add_slave_skip_errors(const uint * errors,uint n_errors)1139 static void add_slave_skip_errors(const uint* errors, uint n_errors)
1140 {
1141 DBUG_ENTER("add_slave_skip_errors");
1142 DBUG_ASSERT(errors);
1143 DBUG_ASSERT(use_slave_mask);
1144
1145 for (uint i = 0; i < n_errors; i++)
1146 {
1147 const uint err_code = errors[i];
1148 if (err_code < MAX_SLAVE_ERROR)
1149 bitmap_set_bit(&slave_error_mask, err_code);
1150 }
1151 DBUG_VOID_RETURN;
1152 }
1153
1154 /*
1155 Add errors that should be skipped for slave
1156
1157 SYNOPSIS
1158 add_slave_skip_errors()
1159 arg List of errors numbers to be added to skip, separated with ','
1160
1161 NOTES
1162 Called from get_options() in mysqld.cc on start-up
1163 */
1164
add_slave_skip_errors(const char * arg)1165 void add_slave_skip_errors(const char* arg)
1166 {
1167 const char *p= NULL;
1168 /*
1169 ALL is only valid when nothing else is provided.
1170 */
1171 const uchar SKIP_ALL[]= "all";
1172 size_t SIZE_SKIP_ALL= strlen((const char *) SKIP_ALL) + 1;
1173 /*
1174 IGNORE_DDL_ERRORS can be combined with other parameters
1175 but must be the first one provided.
1176 */
1177 const uchar SKIP_DDL_ERRORS[]= "ddl_exist_errors";
1178 size_t SIZE_SKIP_DDL_ERRORS= strlen((const char *) SKIP_DDL_ERRORS);
1179 DBUG_ENTER("add_slave_skip_errors");
1180
1181 // initialize mask if not done yet
1182 if (!use_slave_mask)
1183 init_slave_skip_errors();
1184
1185 for (; my_isspace(system_charset_info,*arg); ++arg)
1186 /* empty */;
1187 if (!my_strnncoll(system_charset_info, (uchar*)arg, SIZE_SKIP_ALL,
1188 SKIP_ALL, SIZE_SKIP_ALL))
1189 {
1190 bitmap_set_all(&slave_error_mask);
1191 DBUG_VOID_RETURN;
1192 }
1193 if (!my_strnncoll(system_charset_info, (uchar*)arg, SIZE_SKIP_DDL_ERRORS,
1194 SKIP_DDL_ERRORS, SIZE_SKIP_DDL_ERRORS))
1195 {
1196 // DDL errors to be skipped for relaxed 'exist' handling
1197 const uint ddl_errors[] = {
1198 // error codes with create/add <schema object>
1199 ER_DB_CREATE_EXISTS, ER_TABLE_EXISTS_ERROR, ER_DUP_KEYNAME,
1200 ER_MULTIPLE_PRI_KEY,
1201 // error codes with change/rename <schema object>
1202 ER_BAD_FIELD_ERROR, ER_NO_SUCH_TABLE, ER_DUP_FIELDNAME,
1203 // error codes with drop <schema object>
1204 ER_DB_DROP_EXISTS, ER_BAD_TABLE_ERROR, ER_CANT_DROP_FIELD_OR_KEY
1205 };
1206
1207 add_slave_skip_errors(ddl_errors,
1208 sizeof(ddl_errors)/sizeof(ddl_errors[0]));
1209 /*
1210 After processing the SKIP_DDL_ERRORS, the pointer is
1211 increased to the position after the comma.
1212 */
1213 if (strlen(arg) > SIZE_SKIP_DDL_ERRORS + 1)
1214 arg+= SIZE_SKIP_DDL_ERRORS + 1;
1215 }
1216 for (p= arg ; *p; )
1217 {
1218 long err_code;
1219 if (!(p= str2int(p, 10, 0, LONG_MAX, &err_code)))
1220 break;
1221 if (err_code < MAX_SLAVE_ERROR)
1222 bitmap_set_bit(&slave_error_mask,(uint)err_code);
1223 while (!my_isdigit(system_charset_info,*p) && *p)
1224 p++;
1225 }
1226 DBUG_VOID_RETURN;
1227 }
1228
set_thd_in_use_temporary_tables(Relay_log_info * rli)1229 static void set_thd_in_use_temporary_tables(Relay_log_info *rli)
1230 {
1231 TABLE *table;
1232
1233 for (table= rli->save_temporary_tables ; table ; table= table->next)
1234 {
1235 table->in_use= rli->info_thd;
1236 if (table->file != NULL)
1237 {
1238 /*
1239 Since we are stealing opened temporary tables from one thread to another,
1240 we need to let the performance schema know that,
1241 for aggregates per thread to work properly.
1242 */
1243 table->file->unbind_psi();
1244 table->file->rebind_psi();
1245 }
1246 }
1247 }
1248
terminate_slave_threads(Master_info * mi,int thread_mask,bool need_lock_term)1249 int terminate_slave_threads(Master_info* mi,int thread_mask,bool need_lock_term)
1250 {
1251 DBUG_ENTER("terminate_slave_threads");
1252
1253 if (!mi->inited)
1254 DBUG_RETURN(0); /* successfully do nothing */
1255 int error,force_all = (thread_mask & SLAVE_FORCE_ALL);
1256 mysql_mutex_t *sql_lock = &mi->rli->run_lock, *io_lock = &mi->run_lock;
1257 mysql_mutex_t *log_lock= mi->rli->relay_log.get_log_lock();
1258 set_stop_slave_wait_timeout(rpl_stop_slave_timeout);
1259
1260 if (thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL))
1261 {
1262 DBUG_PRINT("info",("Terminating SQL thread"));
1263 mi->rli->abort_slave= 1;
1264 if ((error=terminate_slave_thread(mi->rli->info_thd, sql_lock,
1265 &mi->rli->stop_cond,
1266 &mi->rli->slave_running,
1267 need_lock_term)) &&
1268 !force_all)
1269 {
1270 if (error == 1)
1271 {
1272 DBUG_RETURN(ER_STOP_SLAVE_SQL_THREAD_TIMEOUT);
1273 }
1274 DBUG_RETURN(error);
1275 }
1276 mysql_mutex_lock(log_lock);
1277
1278 DBUG_PRINT("info",("Flushing relay-log info file."));
1279 if (current_thd)
1280 THD_STAGE_INFO(current_thd, stage_flushing_relay_log_info_file);
1281
1282 /*
1283 Flushes the relay log info regardles of the sync_relay_log_info option.
1284 */
1285 if (mi->rli->flush_info(TRUE))
1286 {
1287 mysql_mutex_unlock(log_lock);
1288 DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS);
1289 }
1290
1291 mysql_mutex_unlock(log_lock);
1292 }
1293 if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL))
1294 {
1295 DBUG_PRINT("info",("Terminating IO thread"));
1296 mi->abort_slave=1;
1297 if ((error=terminate_slave_thread(mi->info_thd,io_lock,
1298 &mi->stop_cond,
1299 &mi->slave_running,
1300 need_lock_term)) &&
1301 !force_all)
1302 {
1303 if (error == 1)
1304 {
1305 DBUG_RETURN(ER_STOP_SLAVE_IO_THREAD_TIMEOUT);
1306 }
1307 DBUG_RETURN(error);
1308 }
1309 mysql_mutex_lock(log_lock);
1310
1311 DBUG_PRINT("info",("Flushing relay log and master info repository."));
1312 if (current_thd)
1313 THD_STAGE_INFO(current_thd, stage_flushing_relay_log_and_master_info_repository);
1314
1315 /*
1316 Flushes the master info regardles of the sync_master_info option.
1317 */
1318 if (mi->flush_info(TRUE))
1319 {
1320 mysql_mutex_unlock(log_lock);
1321 DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS);
1322 }
1323
1324 /*
1325 Flushes the relay log regardles of the sync_relay_log option.
1326 */
1327 if (mi->rli->relay_log.is_open() &&
1328 mi->rli->relay_log.flush_and_sync(true))
1329 {
1330 mysql_mutex_unlock(log_lock);
1331 DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS);
1332 }
1333
1334 mysql_mutex_unlock(log_lock);
1335 }
1336 DBUG_RETURN(0);
1337 }
1338
1339
1340 /**
1341 Wait for a slave thread to terminate.
1342
1343 This function is called after requesting the thread to terminate
1344 (by setting @c abort_slave member of @c Relay_log_info or @c
1345 Master_info structure to 1). Termination of the thread is
1346 controlled with the the predicate <code>*slave_running</code>.
1347
1348 Function will acquire @c term_lock before waiting on the condition
1349 unless @c need_lock_term is false in which case the mutex should be
1350 owned by the caller of this function and will remain acquired after
1351 return from the function.
1352
1353 @param term_lock
1354 Associated lock to use when waiting for @c term_cond
1355
1356 @param term_cond
1357 Condition that is signalled when the thread has terminated
1358
1359 @param slave_running
1360 Pointer to predicate to check for slave thread termination
1361
1362 @param need_lock_term
1363 If @c false the lock will not be acquired before waiting on
1364 the condition. In this case, it is assumed that the calling
1365 function acquires the lock before calling this function.
1366
1367 @retval 0 All OK, 1 on "STOP SLAVE" command timeout, ER_SLAVE_NOT_RUNNING otherwise.
1368
1369 @note If the executing thread has to acquire term_lock
1370 (need_lock_term is true, the negative running status does not
1371 represent any issue therefore no error is reported.
1372
1373 */
1374 static int
terminate_slave_thread(THD * thd,mysql_mutex_t * term_lock,mysql_cond_t * term_cond,volatile uint * slave_running,bool need_lock_term)1375 terminate_slave_thread(THD *thd,
1376 mysql_mutex_t *term_lock,
1377 mysql_cond_t *term_cond,
1378 volatile uint *slave_running,
1379 bool need_lock_term)
1380 {
1381 DBUG_ENTER("terminate_slave_thread");
1382 if (need_lock_term)
1383 {
1384 mysql_mutex_lock(term_lock);
1385 }
1386 else
1387 {
1388 mysql_mutex_assert_owner(term_lock);
1389 }
1390 if (!*slave_running)
1391 {
1392 if (need_lock_term)
1393 {
1394 /*
1395 if run_lock (term_lock) is acquired locally then either
1396 slave_running status is fine
1397 */
1398 mysql_mutex_unlock(term_lock);
1399 DBUG_RETURN(0);
1400 }
1401 else
1402 {
1403 DBUG_RETURN(ER_SLAVE_NOT_RUNNING);
1404 }
1405 }
1406 DBUG_ASSERT(thd != 0);
1407 THD_CHECK_SENTRY(thd);
1408
1409 /*
1410 Is is critical to test if the slave is running. Otherwise, we might
1411 be referening freed memory trying to kick it
1412 */
1413
1414 while (*slave_running) // Should always be true
1415 {
1416 int error MY_ATTRIBUTE((unused));
1417 DBUG_PRINT("loop", ("killing slave thread"));
1418
1419 mysql_mutex_lock(&thd->LOCK_thd_data);
1420 #ifndef DONT_USE_THR_ALARM
1421 /*
1422 Error codes from pthread_kill are:
1423 EINVAL: invalid signal number (can't happen)
1424 ESRCH: thread already killed (can happen, should be ignored)
1425 */
1426 int err MY_ATTRIBUTE((unused))= pthread_kill(thd->real_id, thr_client_alarm);
1427 DBUG_ASSERT(err != EINVAL);
1428 #endif
1429 thd->awake(THD::NOT_KILLED);
1430 mysql_mutex_unlock(&thd->LOCK_thd_data);
1431
1432 /*
1433 There is a small chance that slave thread might miss the first
1434 alarm. To protect againts it, resend the signal until it reacts
1435 */
1436 struct timespec abstime;
1437 set_timespec(abstime,2);
1438 error= mysql_cond_timedwait(term_cond, term_lock, &abstime);
1439 if (stop_wait_timeout >= 2)
1440 stop_wait_timeout= stop_wait_timeout - 2;
1441 else if (*slave_running)
1442 {
1443 if (need_lock_term)
1444 mysql_mutex_unlock(term_lock);
1445 DBUG_RETURN (1);
1446 }
1447 DBUG_ASSERT(error == ETIMEDOUT || error == 0);
1448 }
1449
1450 DBUG_ASSERT(*slave_running == 0);
1451
1452 if (need_lock_term)
1453 mysql_mutex_unlock(term_lock);
1454 DBUG_RETURN(0);
1455 }
1456
1457
start_slave_thread(PSI_thread_key thread_key,pthread_handler h_func,mysql_mutex_t * start_lock,mysql_mutex_t * cond_lock,mysql_cond_t * start_cond,volatile uint * slave_running,volatile ulong * slave_run_id,Master_info * mi)1458 int start_slave_thread(
1459 #ifdef HAVE_PSI_INTERFACE
1460 PSI_thread_key thread_key,
1461 #endif
1462 pthread_handler h_func, mysql_mutex_t *start_lock,
1463 mysql_mutex_t *cond_lock,
1464 mysql_cond_t *start_cond,
1465 volatile uint *slave_running,
1466 volatile ulong *slave_run_id,
1467 Master_info* mi)
1468 {
1469 pthread_t th;
1470 ulong start_id;
1471 int error;
1472 DBUG_ENTER("start_slave_thread");
1473
1474 if (start_lock)
1475 mysql_mutex_lock(start_lock);
1476 if (!server_id)
1477 {
1478 if (start_cond)
1479 mysql_cond_broadcast(start_cond);
1480 if (start_lock)
1481 mysql_mutex_unlock(start_lock);
1482 sql_print_error("Server id not set, will not start slave");
1483 DBUG_RETURN(ER_BAD_SLAVE);
1484 }
1485
1486 if (*slave_running)
1487 {
1488 if (start_cond)
1489 mysql_cond_broadcast(start_cond);
1490 if (start_lock)
1491 mysql_mutex_unlock(start_lock);
1492 DBUG_RETURN(ER_SLAVE_MUST_STOP);
1493 }
1494 start_id= *slave_run_id;
1495 DBUG_PRINT("info",("Creating new slave thread"));
1496 if ((error= mysql_thread_create(thread_key,
1497 &th, &connection_attrib, h_func, (void*)mi)))
1498 {
1499 sql_print_error("Can't create slave thread (errno= %d).", error);
1500 if (start_lock)
1501 mysql_mutex_unlock(start_lock);
1502 DBUG_RETURN(ER_SLAVE_THREAD);
1503 }
1504 if (start_cond && cond_lock) // caller has cond_lock
1505 {
1506 THD* thd = current_thd;
1507 while (start_id == *slave_run_id && thd != NULL)
1508 {
1509 DBUG_PRINT("sleep",("Waiting for slave thread to start"));
1510 PSI_stage_info saved_stage= {0, "", 0};
1511 thd->ENTER_COND(start_cond, cond_lock,
1512 & stage_waiting_for_slave_thread_to_start,
1513 & saved_stage);
1514 /*
1515 It is not sufficient to test this at loop bottom. We must test
1516 it after registering the mutex in enter_cond(). If the kill
1517 happens after testing of thd->killed and before the mutex is
1518 registered, we could otherwise go waiting though thd->killed is
1519 set.
1520 */
1521 if (!thd->killed)
1522 mysql_cond_wait(start_cond, cond_lock);
1523 thd->EXIT_COND(& saved_stage);
1524 mysql_mutex_lock(cond_lock); // re-acquire it as exit_cond() released
1525 if (thd->killed)
1526 {
1527 if (start_lock)
1528 mysql_mutex_unlock(start_lock);
1529 DBUG_RETURN(thd->killed_errno());
1530 }
1531 }
1532 }
1533 if (start_lock)
1534 mysql_mutex_unlock(start_lock);
1535 DBUG_RETURN(0);
1536 }
1537
1538
1539 /*
1540 start_slave_threads()
1541
1542 NOTES
1543 SLAVE_FORCE_ALL is not implemented here on purpose since it does not make
1544 sense to do that for starting a slave--we always care if it actually
1545 started the threads that were not previously running
1546 */
1547
start_slave_threads(bool need_lock_slave,bool wait_for_start,Master_info * mi,int thread_mask)1548 int start_slave_threads(bool need_lock_slave, bool wait_for_start,
1549 Master_info* mi, int thread_mask)
1550 {
1551 mysql_mutex_t *lock_io=0, *lock_sql=0, *lock_cond_io=0, *lock_cond_sql=0;
1552 mysql_cond_t* cond_io=0, *cond_sql=0;
1553 int error=0;
1554 DBUG_ENTER("start_slave_threads");
1555 DBUG_EXECUTE_IF("uninitialized_master-info_structure",
1556 mi->inited= FALSE;);
1557
1558 if (!mi->inited || !mi->rli->inited)
1559 {
1560 error= !mi->inited ? ER_SLAVE_MI_INIT_REPOSITORY :
1561 ER_SLAVE_RLI_INIT_REPOSITORY;
1562 Rpl_info *info= (!mi->inited ? mi : static_cast<Rpl_info *>(mi->rli));
1563 const char* prefix= current_thd ? ER(error) : ER_DEFAULT(error);
1564 info->report(ERROR_LEVEL, error, prefix, NULL);
1565
1566 DBUG_RETURN(error);
1567 }
1568
1569 if (need_lock_slave)
1570 {
1571 lock_io = &mi->run_lock;
1572 lock_sql = &mi->rli->run_lock;
1573 }
1574 if (wait_for_start)
1575 {
1576 cond_io = &mi->start_cond;
1577 cond_sql = &mi->rli->start_cond;
1578 lock_cond_io = &mi->run_lock;
1579 lock_cond_sql = &mi->rli->run_lock;
1580 }
1581
1582 if (thread_mask & SLAVE_IO)
1583 error= start_slave_thread(
1584 #ifdef HAVE_PSI_INTERFACE
1585 key_thread_slave_io,
1586 #endif
1587 handle_slave_io, lock_io, lock_cond_io,
1588 cond_io,
1589 &mi->slave_running, &mi->slave_run_id,
1590 mi);
1591 if (!error && (thread_mask & SLAVE_SQL))
1592 {
1593 /*
1594 MTS-recovery gaps gathering is placed onto common execution path
1595 for either START-SLAVE and --skip-start-slave= 0
1596 */
1597 if (mi->rli->recovery_parallel_workers != 0)
1598 error= mts_recovery_groups(mi->rli);
1599 if (!error)
1600 error= start_slave_thread(
1601 #ifdef HAVE_PSI_INTERFACE
1602 key_thread_slave_sql,
1603 #endif
1604 handle_slave_sql, lock_sql, lock_cond_sql,
1605 cond_sql,
1606 &mi->rli->slave_running, &mi->rli->slave_run_id,
1607 mi);
1608 if (error)
1609 terminate_slave_threads(mi, thread_mask & SLAVE_IO, need_lock_slave);
1610 }
1611 DBUG_RETURN(error);
1612 }
1613
1614 /*
1615 Release slave threads at time of executing shutdown.
1616
1617 SYNOPSIS
1618 end_slave()
1619 */
1620
end_slave()1621 void end_slave()
1622 {
1623 DBUG_ENTER("end_slave");
1624
1625 /*
1626 This is called when the server terminates, in close_connections().
1627 It terminates slave threads. However, some CHANGE MASTER etc may still be
1628 running presently. If a START SLAVE was in progress, the mutex lock below
1629 will make us wait until slave threads have started, and START SLAVE
1630 returns, then we terminate them here.
1631 */
1632 mysql_mutex_lock(&LOCK_active_mi);
1633 if (active_mi)
1634 {
1635 /*
1636 TODO: replace the line below with
1637 list_walk(&master_list, (list_walk_action)end_slave_on_walk,0);
1638 once multi-master code is ready.
1639 */
1640 terminate_slave_threads(active_mi,SLAVE_FORCE_ALL);
1641 }
1642 mysql_mutex_unlock(&LOCK_active_mi);
1643 DBUG_VOID_RETURN;
1644 }
1645
1646 /**
1647 Free all resources used by slave threads at time of executing shutdown.
1648 The routine must be called after all possible users of @c active_mi
1649 have left.
1650
1651 SYNOPSIS
1652 close_active_mi()
1653
1654 */
close_active_mi()1655 void close_active_mi()
1656 {
1657 mysql_mutex_lock(&LOCK_active_mi);
1658 if (active_mi)
1659 {
1660 end_info(active_mi);
1661 if (active_mi->rli)
1662 delete active_mi->rli;
1663 delete active_mi;
1664 active_mi= 0;
1665 }
1666 mysql_mutex_unlock(&LOCK_active_mi);
1667 }
1668
1669 /**
1670 Check if multi-statement transaction mode and master and slave info
1671 repositories are set to table.
1672
1673 @param THD THD object
1674
1675 @retval true Success
1676 @retval false Failure
1677 */
is_autocommit_off_and_infotables(THD * thd)1678 static bool is_autocommit_off_and_infotables(THD* thd)
1679 {
1680 DBUG_ENTER("is_autocommit_off_and_infotables");
1681 DBUG_RETURN((thd && thd->in_multi_stmt_transaction_mode() &&
1682 (opt_mi_repository_id == INFO_REPOSITORY_TABLE ||
1683 opt_rli_repository_id == INFO_REPOSITORY_TABLE))?
1684 true : false);
1685 }
1686
io_slave_killed(THD * thd,Master_info * mi)1687 static bool io_slave_killed(THD* thd, Master_info* mi)
1688 {
1689 DBUG_ENTER("io_slave_killed");
1690
1691 DBUG_ASSERT(mi->info_thd == thd);
1692 DBUG_ASSERT(mi->slave_running); // tracking buffer overrun
1693 DBUG_RETURN(mi->abort_slave || abort_loop || thd->killed);
1694 }
1695
1696 /**
1697 The function analyzes a possible killed status and makes
1698 a decision whether to accept it or not.
1699 Normally upon accepting the sql thread goes to shutdown.
1700 In the event of deferring decision @rli->last_event_start_time waiting
1701 timer is set to force the killed status be accepted upon its expiration.
1702
1703 Notice Multi-Threaded-Slave behaves similarly in that when it's being
1704 stopped and the current group of assigned events has not yet scheduled
1705 completely, Coordinator defers to accept to leave its read-distribute
1706 state. The above timeout ensures waiting won't last endlessly, and in
1707 such case an error is reported.
1708
1709 @param thd pointer to a THD instance
1710 @param rli pointer to Relay_log_info instance
1711
1712 @return TRUE the killed status is recognized, FALSE a possible killed
1713 status is deferred.
1714 */
sql_slave_killed(THD * thd,Relay_log_info * rli)1715 static bool sql_slave_killed(THD* thd, Relay_log_info* rli)
1716 {
1717 bool is_parallel_warn= FALSE;
1718
1719 DBUG_ENTER("sql_slave_killed");
1720
1721 DBUG_ASSERT(rli->info_thd == thd);
1722 DBUG_ASSERT(rli->slave_running == 1);
1723 if (rli->sql_thread_kill_accepted)
1724 DBUG_RETURN(true);
1725 DBUG_EXECUTE_IF("stop_when_mts_in_group", rli->abort_slave = 1;
1726 DBUG_SET("-d,stop_when_mts_in_group");
1727 DBUG_SET("-d,simulate_stop_when_mts_in_group");
1728 DBUG_RETURN(false););
1729 if (abort_loop || thd->killed || rli->abort_slave)
1730 {
1731 rli->sql_thread_kill_accepted= true;
1732 is_parallel_warn= (rli->is_parallel_exec() &&
1733 (rli->is_mts_in_group() || thd->killed));
1734 /*
1735 Slave can execute stop being in one of two MTS or Single-Threaded mode.
1736 The modes define different criteria to accept the stop.
1737 In particular that relates to the concept of groupping.
1738 Killed Coordinator thread expects the worst so it warns on
1739 possible consistency issue.
1740 */
1741 if (is_parallel_warn ||
1742 (!rli->is_parallel_exec() &&
1743 thd->transaction.all.cannot_safely_rollback() && rli->is_in_group()))
1744 {
1745 char msg_stopped[]=
1746 "... Slave SQL Thread stopped with incomplete event group "
1747 "having non-transactional changes. "
1748 "If the group consists solely of row-based events, you can try "
1749 "to restart the slave with --slave-exec-mode=IDEMPOTENT, which "
1750 "ignores duplicate key, key not found, and similar errors (see "
1751 "documentation for details).";
1752 char msg_stopped_mts[]=
1753 "... The slave coordinator and worker threads are stopped, possibly "
1754 "leaving data in inconsistent state. A restart should "
1755 "restore consistency automatically, although using non-transactional "
1756 "storage for data or info tables or DDL queries could lead to problems. "
1757 "In such cases you have to examine your data (see documentation for "
1758 "details).";
1759
1760 if (rli->abort_slave)
1761 {
1762 DBUG_PRINT("info", ("Request to stop slave SQL Thread received while "
1763 "applying an MTS group or a group that "
1764 "has non-transactional "
1765 "changes; waiting for completion of the group ... "));
1766
1767 /*
1768 Slave sql thread shutdown in face of unfinished group modified
1769 Non-trans table is handled via a timer. The slave may eventually
1770 give out to complete the current group and in that case there
1771 might be issues at consequent slave restart, see the error message.
1772 WL#2975 offers a robust solution requiring to store the last exectuted
1773 event's coordinates along with the group's coordianates
1774 instead of waiting with @c last_event_start_time the timer.
1775 */
1776
1777 if (rli->last_event_start_time == 0)
1778 rli->last_event_start_time= my_time(0);
1779 rli->sql_thread_kill_accepted= difftime(my_time(0),
1780 rli->last_event_start_time) <=
1781 SLAVE_WAIT_GROUP_DONE ?
1782 FALSE : TRUE;
1783
1784 DBUG_EXECUTE_IF("stop_slave_middle_group",
1785 DBUG_EXECUTE_IF("incomplete_group_in_relay_log",
1786 rli->sql_thread_kill_accepted= TRUE;);); // time is over
1787
1788 if (!rli->sql_thread_kill_accepted && !rli->reported_unsafe_warning)
1789 {
1790 rli->report(WARNING_LEVEL, 0,
1791 !is_parallel_warn ?
1792 "Request to stop slave SQL Thread received while "
1793 "applying a group that has non-transactional "
1794 "changes; waiting for completion of the group ... "
1795 :
1796 "Coordinator thread of multi-threaded slave is being "
1797 "stopped in the middle of assigning a group of events; "
1798 "deferring to exit until the group completion ... ");
1799 rli->reported_unsafe_warning= true;
1800 }
1801 }
1802 if (rli->sql_thread_kill_accepted)
1803 {
1804 rli->last_event_start_time= 0;
1805 if (rli->mts_group_status == Relay_log_info::MTS_IN_GROUP)
1806 {
1807 rli->mts_group_status= Relay_log_info::MTS_KILLED_GROUP;
1808 }
1809 if (is_parallel_warn)
1810 rli->report(!rli->is_error() ? ERROR_LEVEL :
1811 WARNING_LEVEL, // an error was reported by Worker
1812 ER_MTS_INCONSISTENT_DATA,
1813 ER(ER_MTS_INCONSISTENT_DATA),
1814 msg_stopped_mts);
1815 else
1816 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
1817 ER(ER_SLAVE_FATAL_ERROR), msg_stopped);
1818 }
1819 }
1820 }
1821 DBUG_RETURN(rli->sql_thread_kill_accepted);
1822 }
1823
1824
1825 /*
1826 skip_load_data_infile()
1827
1828 NOTES
1829 This is used to tell a 3.23 master to break send_file()
1830 */
1831
skip_load_data_infile(NET * net)1832 void skip_load_data_infile(NET *net)
1833 {
1834 DBUG_ENTER("skip_load_data_infile");
1835
1836 (void)net_request_file(net, "/dev/null");
1837 (void)my_net_read(net); // discard response
1838 (void)net_write_command(net, 0, (uchar*) "", 0, (uchar*) "", 0); // ok
1839 DBUG_VOID_RETURN;
1840 }
1841
1842
net_request_file(NET * net,const char * fname)1843 bool net_request_file(NET* net, const char* fname)
1844 {
1845 DBUG_ENTER("net_request_file");
1846 DBUG_RETURN(net_write_command(net, 251, (uchar*) fname, strlen(fname),
1847 (uchar*) "", 0));
1848 }
1849
1850 /*
1851 From other comments and tests in code, it looks like
1852 sometimes Query_log_event and Load_log_event can have db == 0
1853 (see rewrite_db() above for example)
1854 (cases where this happens are unclear; it may be when the master is 3.23).
1855 */
1856
print_slave_db_safe(const char * db)1857 const char *print_slave_db_safe(const char* db)
1858 {
1859 DBUG_ENTER("*print_slave_db_safe");
1860
1861 DBUG_RETURN((db ? db : ""));
1862 }
1863
1864 /*
1865 Check if the error is caused by network.
1866 @param[in] errorno Number of the error.
1867 RETURNS:
1868 TRUE network error
1869 FALSE not network error
1870 */
1871
is_network_error(uint errorno)1872 bool is_network_error(uint errorno)
1873 {
1874 if (errorno == CR_CONNECTION_ERROR ||
1875 errorno == CR_CONN_HOST_ERROR ||
1876 errorno == CR_SERVER_GONE_ERROR ||
1877 errorno == CR_SERVER_LOST ||
1878 errorno == ER_CON_COUNT_ERROR ||
1879 errorno == ER_SERVER_SHUTDOWN ||
1880 errorno == ER_NET_READ_INTERRUPTED)
1881 return TRUE;
1882
1883 return FALSE;
1884 }
1885
1886
1887 /**
1888 Execute an initialization query for the IO thread.
1889
1890 If there is an error, then this function calls mysql_free_result;
1891 otherwise the MYSQL object holds the result after this call. If
1892 there is an error other than allowed_error, then this function
1893 prints a message and returns -1.
1894
1895 @param mysql MYSQL object.
1896 @param query Query string.
1897 @param allowed_error Allowed error code, or 0 if no errors are allowed.
1898 @param[out] master_res If this is not NULL and there is no error, then
1899 mysql_store_result() will be called and the result stored in this pointer.
1900 @param[out] master_row If this is not NULL and there is no error, then
1901 mysql_fetch_row() will be called and the result stored in this pointer.
1902
1903 @retval COMMAND_STATUS_OK No error.
1904 @retval COMMAND_STATUS_ALLOWED_ERROR There was an error and the
1905 error code was 'allowed_error'.
1906 @retval COMMAND_STATUS_ERROR There was an error and the error code
1907 was not 'allowed_error'.
1908 */
1909 enum enum_command_status
1910 { COMMAND_STATUS_OK, COMMAND_STATUS_ERROR, COMMAND_STATUS_ALLOWED_ERROR };
1911 static enum_command_status
io_thread_init_command(Master_info * mi,const char * query,int allowed_error,MYSQL_RES ** master_res=NULL,MYSQL_ROW * master_row=NULL)1912 io_thread_init_command(Master_info *mi, const char *query, int allowed_error,
1913 MYSQL_RES **master_res= NULL,
1914 MYSQL_ROW *master_row= NULL)
1915 {
1916 DBUG_ENTER("io_thread_init_command");
1917 DBUG_PRINT("info", ("IO thread initialization command: '%s'", query));
1918 MYSQL *mysql= mi->mysql;
1919 int ret= mysql_real_query(mysql, query, strlen(query));
1920 if (io_slave_killed(mi->info_thd, mi))
1921 {
1922 sql_print_information("The slave IO thread was killed while executing "
1923 "initialization query '%s'", query);
1924 mysql_free_result(mysql_store_result(mysql));
1925 DBUG_RETURN(COMMAND_STATUS_ERROR);
1926 }
1927 if (ret != 0)
1928 {
1929 int err= mysql_errno(mysql);
1930 mysql_free_result(mysql_store_result(mysql));
1931 if (!err || err != allowed_error)
1932 {
1933 mi->report(is_network_error(err) ? WARNING_LEVEL : ERROR_LEVEL, err,
1934 "The slave IO thread stops because the initialization query "
1935 "'%s' failed with error '%s'.",
1936 query, mysql_error(mysql));
1937 DBUG_RETURN(COMMAND_STATUS_ERROR);
1938 }
1939 DBUG_RETURN(COMMAND_STATUS_ALLOWED_ERROR);
1940 }
1941 if (master_res != NULL)
1942 {
1943 if ((*master_res= mysql_store_result(mysql)) == NULL)
1944 {
1945 mi->report(WARNING_LEVEL, mysql_errno(mysql),
1946 "The slave IO thread stops because the initialization query "
1947 "'%s' did not return any result.",
1948 query);
1949 DBUG_RETURN(COMMAND_STATUS_ERROR);
1950 }
1951 if (master_row != NULL)
1952 {
1953 if ((*master_row= mysql_fetch_row(*master_res)) == NULL)
1954 {
1955 mysql_free_result(*master_res);
1956 mi->report(WARNING_LEVEL, mysql_errno(mysql),
1957 "The slave IO thread stops because the initialization query "
1958 "'%s' did not return any row.",
1959 query);
1960 DBUG_RETURN(COMMAND_STATUS_ERROR);
1961 }
1962 }
1963 }
1964 else
1965 DBUG_ASSERT(master_row == NULL);
1966 DBUG_RETURN(COMMAND_STATUS_OK);
1967 }
1968
1969
1970 /**
1971 Set user variables after connecting to the master.
1972
1973 @param mysql MYSQL to request uuid from master.
1974 @param mi Master_info to set master_uuid
1975
1976 @return 0: Success, 1: Fatal error, 2: Network error.
1977 */
io_thread_init_commands(MYSQL * mysql,Master_info * mi)1978 int io_thread_init_commands(MYSQL *mysql, Master_info *mi)
1979 {
1980 char query[256];
1981 int ret= 0;
1982 DBUG_EXECUTE_IF("fake_5_5_version_slave", return ret;);
1983
1984 sprintf(query, "SET @slave_uuid= '%s'", server_uuid);
1985 if (mysql_real_query(mysql, query, strlen(query))
1986 && !check_io_slave_killed(mi->info_thd, mi, NULL))
1987 goto err;
1988
1989 mysql_free_result(mysql_store_result(mysql));
1990 return ret;
1991
1992 err:
1993 if (mysql_errno(mysql) && is_network_error(mysql_errno(mysql)))
1994 {
1995 mi->report(WARNING_LEVEL, mysql_errno(mysql),
1996 "The initialization command '%s' failed with the following"
1997 " error: '%s'.", query, mysql_error(mysql));
1998 ret= 2;
1999 }
2000 else
2001 {
2002 char errmsg[512];
2003 const char *errmsg_fmt=
2004 "The slave I/O thread stops because a fatal error is encountered "
2005 "when it tries to send query to master(query: %s).";
2006
2007 sprintf(errmsg, errmsg_fmt, query);
2008 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, ER(ER_SLAVE_FATAL_ERROR),
2009 errmsg);
2010 ret= 1;
2011 }
2012 mysql_free_result(mysql_store_result(mysql));
2013 return ret;
2014 }
2015
2016 /**
2017 Get master's uuid on connecting.
2018
2019 @param mysql MYSQL to request uuid from master.
2020 @param mi Master_info to set master_uuid
2021
2022 @return 0: Success, 1: Fatal error, 2: Network error.
2023 */
get_master_uuid(MYSQL * mysql,Master_info * mi)2024 static int get_master_uuid(MYSQL *mysql, Master_info *mi)
2025 {
2026 const char *errmsg;
2027 MYSQL_RES *master_res= NULL;
2028 MYSQL_ROW master_row= NULL;
2029 int ret= 0;
2030
2031 DBUG_EXECUTE_IF("dbug.before_get_MASTER_UUID",
2032 {
2033 const char act[]
2034 = "now signal in_get_master_version_and_clock "
2035 "wait_for signal.get_master_uuid";
2036 DBUG_ASSERT(opt_debug_sync_timeout > 0);
2037 DBUG_ASSERT(!debug_sync_set_action(current_thd,
2038 STRING_WITH_LEN(act)));
2039 };);
2040
2041 DBUG_EXECUTE_IF("dbug.simulate_busy_io",
2042 {
2043 const char act[]= "now signal Reached wait_for signal.got_stop_slave";
2044 DBUG_ASSERT(opt_debug_sync_timeout > 0);
2045 DBUG_ASSERT(!debug_sync_set_action(current_thd,
2046 STRING_WITH_LEN(act)));
2047 };);
2048 if (!mysql_real_query(mysql,
2049 STRING_WITH_LEN("SHOW VARIABLES LIKE 'SERVER_UUID'")) &&
2050 (master_res= mysql_store_result(mysql)) &&
2051 (master_row= mysql_fetch_row(master_res)))
2052 {
2053 if (!strcmp(::server_uuid, master_row[1]) &&
2054 !mi->rli->replicate_same_server_id)
2055 {
2056 errmsg= "The slave I/O thread stops because master and slave have equal "
2057 "MySQL server UUIDs; these UUIDs must be different for "
2058 "replication to work.";
2059 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, ER(ER_SLAVE_FATAL_ERROR),
2060 errmsg);
2061 // Fatal error
2062 ret= 1;
2063 }
2064 else
2065 {
2066 if (mi->master_uuid[0] != 0 && strcmp(mi->master_uuid, master_row[1]))
2067 sql_print_warning("The master's UUID has changed, although this should"
2068 " not happen unless you have changed it manually."
2069 " The old UUID was %s.",
2070 mi->master_uuid);
2071 strncpy(mi->master_uuid, master_row[1], UUID_LENGTH);
2072 mi->master_uuid[UUID_LENGTH]= 0;
2073 }
2074 }
2075 else if (mysql_errno(mysql))
2076 {
2077 if (is_network_error(mysql_errno(mysql)))
2078 {
2079 mi->report(WARNING_LEVEL, mysql_errno(mysql),
2080 "Get master SERVER_UUID failed with error: %s",
2081 mysql_error(mysql));
2082 ret= 2;
2083 }
2084 else
2085 {
2086 /* Fatal error */
2087 errmsg= "The slave I/O thread stops because a fatal error is encountered "
2088 "when it tries to get the value of SERVER_UUID variable from master.";
2089 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, ER(ER_SLAVE_FATAL_ERROR),
2090 errmsg);
2091 ret= 1;
2092 }
2093 }
2094 else if (!master_row && master_res)
2095 {
2096 mi->report(WARNING_LEVEL, ER_UNKNOWN_SYSTEM_VARIABLE,
2097 "Unknown system variable 'SERVER_UUID' on master. "
2098 "A probable cause is that the variable is not supported on the "
2099 "master (version: %s), even though it is on the slave (version: %s)",
2100 mysql->server_version, server_version);
2101 }
2102
2103 if (master_res)
2104 mysql_free_result(master_res);
2105 return ret;
2106 }
2107
2108
2109 /**
2110 Determine, case-sensitively, if short_string is equal to
2111 long_string, or a true prefix of long_string, or not a prefix.
2112
2113 @retval 0 short_string is not a prefix of long_string.
2114 @retval 1 short_string is a true prefix of long_string (not equal).
2115 @retval 2 short_string is equal to long_string.
2116 */
is_str_prefix_case(const char * short_string,const char * long_string)2117 static int is_str_prefix_case(const char *short_string, const char *long_string)
2118 {
2119 int i;
2120 for (i= 0; short_string[i]; i++)
2121 if (my_toupper(system_charset_info, short_string[i]) !=
2122 my_toupper(system_charset_info, long_string[i]))
2123 return 0;
2124 return long_string[i] ? 1 : 2;
2125 }
2126
2127 /*
2128 Note that we rely on the master's version (3.23, 4.0.14 etc) instead of
2129 relying on the binlog's version. This is not perfect: imagine an upgrade
2130 of the master without waiting that all slaves are in sync with the master;
2131 then a slave could be fooled about the binlog's format. This is what happens
2132 when people upgrade a 3.23 master to 4.0 without doing RESET MASTER: 4.0
2133 slaves are fooled. So we do this only to distinguish between 3.23 and more
2134 recent masters (it's too late to change things for 3.23).
2135
2136 RETURNS
2137 0 ok
2138 1 error
2139 2 transient network problem, the caller should try to reconnect
2140 */
2141
get_master_version_and_clock(MYSQL * mysql,Master_info * mi)2142 static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi)
2143 {
2144 char err_buff[MAX_SLAVE_ERRMSG];
2145 const char* errmsg= 0;
2146 int err_code= 0;
2147 int version_number=0;
2148 version_number= atoi(mysql->server_version);
2149
2150 MYSQL_RES *master_res= 0;
2151 MYSQL_ROW master_row;
2152 DBUG_ENTER("get_master_version_and_clock");
2153
2154 /*
2155 Free old mi_description_event (that is needed if we are in
2156 a reconnection).
2157 */
2158 DBUG_EXECUTE_IF("unrecognized_master_version",
2159 {
2160 version_number= 1;
2161 };);
2162 mysql_mutex_lock(&mi->data_lock);
2163 mi->set_mi_description_event(NULL);
2164
2165 if (!my_isdigit(&my_charset_bin,*mysql->server_version))
2166 {
2167 errmsg = "Master reported unrecognized MySQL version";
2168 err_code= ER_SLAVE_FATAL_ERROR;
2169 sprintf(err_buff, ER(err_code), errmsg);
2170 }
2171 else
2172 {
2173 /*
2174 Note the following switch will bug when we have MySQL branch 30 ;)
2175 */
2176 switch (version_number)
2177 {
2178 case 0:
2179 case 1:
2180 case 2:
2181 errmsg = "Master reported unrecognized MySQL version";
2182 err_code= ER_SLAVE_FATAL_ERROR;
2183 sprintf(err_buff, ER(err_code), errmsg);
2184 break;
2185 case 3:
2186 mi->set_mi_description_event(new
2187 Format_description_log_event(1, mysql->server_version));
2188 break;
2189 case 4:
2190 mi->set_mi_description_event(new
2191 Format_description_log_event(3, mysql->server_version));
2192 break;
2193 default:
2194 /*
2195 Master is MySQL >=5.0. Give a default Format_desc event, so that we can
2196 take the early steps (like tests for "is this a 3.23 master") which we
2197 have to take before we receive the real master's Format_desc which will
2198 override this one. Note that the Format_desc we create below is garbage
2199 (it has the format of the *slave*); it's only good to help know if the
2200 master is 3.23, 4.0, etc.
2201 */
2202 mi->set_mi_description_event(new
2203 Format_description_log_event(4, mysql->server_version));
2204 break;
2205 }
2206 }
2207
2208 /*
2209 This does not mean that a 5.0 slave will be able to read a 5.5 master; but
2210 as we don't know yet, we don't want to forbid this for now. If a 5.0 slave
2211 can't read a 5.5 master, this will show up when the slave can't read some
2212 events sent by the master, and there will be error messages.
2213 */
2214
2215 if (errmsg)
2216 {
2217 /* unlock the mutex on master info structure */
2218 mysql_mutex_unlock(&mi->data_lock);
2219 goto err;
2220 }
2221
2222 /* as we are here, we tried to allocate the event */
2223 if (mi->get_mi_description_event() == NULL)
2224 {
2225 mysql_mutex_unlock(&mi->data_lock);
2226 errmsg= "default Format_description_log_event";
2227 err_code= ER_SLAVE_CREATE_EVENT_FAILURE;
2228 sprintf(err_buff, ER(err_code), errmsg);
2229 goto err;
2230 }
2231
2232 if (mi->get_mi_description_event()->binlog_version < 4 &&
2233 opt_slave_sql_verify_checksum)
2234 {
2235 sql_print_warning("Found a master with MySQL server version older than "
2236 "5.0. With checksums enabled on the slave, replication "
2237 "might not work correctly. To ensure correct "
2238 "replication, restart the slave server with "
2239 "--slave_sql_verify_checksum=0.");
2240 }
2241 /*
2242 FD_q's (A) is set initially from RL's (A): FD_q.(A) := RL.(A).
2243 It's necessary to adjust FD_q.(A) at this point because in the following
2244 course FD_q is going to be dumped to RL.
2245 Generally FD_q is derived from a received FD_m (roughly FD_q := FD_m)
2246 in queue_event and the master's (A) is installed.
2247 At one step with the assignment the Relay-Log's checksum alg is set to
2248 a new value: RL.(A) := FD_q.(A). If the slave service is stopped
2249 the last time assigned RL.(A) will be passed over to the restarting
2250 service (to the current execution point).
2251 RL.A is a "codec" to verify checksum in queue_event() almost all the time
2252 the first fake Rotate event.
2253 Starting from this point IO thread will executes the following checksum
2254 warmup sequence of actions:
2255
2256 FD_q.A := RL.A,
2257 A_m^0 := master.@@global.binlog_checksum,
2258 {queue_event(R_f): verifies(R_f, A_m^0)},
2259 {queue_event(FD_m): verifies(FD_m, FD_m.A), dump(FD_q), rotate(RL),
2260 FD_q := FD_m, RL.A := FD_q.A)}
2261
2262 See legends definition on MYSQL_BIN_LOG::relay_log_checksum_alg
2263 docs lines (binlog.h).
2264 In above A_m^0 - the value of master's
2265 @@binlog_checksum determined in the upcoming handshake (stored in
2266 mi->checksum_alg_before_fd).
2267
2268
2269 After the warm-up sequence IO gets to "normal" checksum verification mode
2270 to use RL.A in
2271
2272 {queue_event(E_m): verifies(E_m, RL.A)}
2273
2274 until it has received a new FD_m.
2275 */
2276 mi->get_mi_description_event()->checksum_alg=
2277 mi->rli->relay_log.relay_log_checksum_alg;
2278
2279 DBUG_ASSERT(mi->get_mi_description_event()->checksum_alg !=
2280 BINLOG_CHECKSUM_ALG_UNDEF);
2281 DBUG_ASSERT(mi->rli->relay_log.relay_log_checksum_alg !=
2282 BINLOG_CHECKSUM_ALG_UNDEF);
2283
2284 mysql_mutex_unlock(&mi->data_lock);
2285
2286 /*
2287 Compare the master and slave's clock. Do not die if master's clock is
2288 unavailable (very old master not supporting UNIX_TIMESTAMP()?).
2289 */
2290
2291 DBUG_EXECUTE_IF("dbug.before_get_UNIX_TIMESTAMP",
2292 {
2293 const char act[]=
2294 "now signal in_get_master_version_and_clock "
2295 "wait_for signal.get_unix_timestamp";
2296 DBUG_ASSERT(opt_debug_sync_timeout > 0);
2297 DBUG_ASSERT(!debug_sync_set_action(current_thd,
2298 STRING_WITH_LEN(act)));
2299 };);
2300
2301 master_res= NULL;
2302 DBUG_EXECUTE_IF("get_master_version.timestamp.ER_NET_READ_INTERRUPTED",
2303 {
2304 DBUG_SET("+d,inject_ER_NET_READ_INTERRUPTED");
2305 DBUG_SET("-d,get_master_version.timestamp."
2306 "ER_NET_READ_INTERRUPTED");
2307 });
2308 if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT UNIX_TIMESTAMP()")) &&
2309 (master_res= mysql_store_result(mysql)) &&
2310 (master_row= mysql_fetch_row(master_res)))
2311 {
2312 mysql_mutex_lock(&mi->data_lock);
2313 mi->clock_diff_with_master=
2314 (long) (time((time_t*) 0) - strtoul(master_row[0], 0, 10));
2315 DBUG_EXECUTE_IF("dbug.mts.force_clock_diff_eq_0",
2316 mi->clock_diff_with_master= 0;);
2317 mysql_mutex_unlock(&mi->data_lock);
2318 }
2319 else if (check_io_slave_killed(mi->info_thd, mi, NULL))
2320 goto slave_killed_err;
2321 else if (is_network_error(mysql_errno(mysql)))
2322 {
2323 mi->report(WARNING_LEVEL, mysql_errno(mysql),
2324 "Get master clock failed with error: %s", mysql_error(mysql));
2325 goto network_err;
2326 }
2327 else
2328 {
2329 mysql_mutex_lock(&mi->data_lock);
2330 mi->clock_diff_with_master= 0; /* The "most sensible" value */
2331 mysql_mutex_unlock(&mi->data_lock);
2332 sql_print_warning("\"SELECT UNIX_TIMESTAMP()\" failed on master, "
2333 "do not trust column Seconds_Behind_Master of SHOW "
2334 "SLAVE STATUS. Error: %s (%d)",
2335 mysql_error(mysql), mysql_errno(mysql));
2336 }
2337 if (master_res)
2338 {
2339 mysql_free_result(master_res);
2340 master_res= NULL;
2341 }
2342
2343 /*
2344 Check that the master's server id and ours are different. Because if they
2345 are equal (which can result from a simple copy of master's datadir to slave,
2346 thus copying some my.cnf), replication will work but all events will be
2347 skipped.
2348 Do not die if SHOW VARIABLES LIKE 'SERVER_ID' fails on master (very old
2349 master?).
2350 Note: we could have put a @@SERVER_ID in the previous SELECT
2351 UNIX_TIMESTAMP() instead, but this would not have worked on 3.23 masters.
2352 */
2353 DBUG_EXECUTE_IF("dbug.before_get_SERVER_ID",
2354 {
2355 const char act[]=
2356 "now signal in_get_master_version_and_clock "
2357 "wait_for signal.get_server_id";
2358 DBUG_ASSERT(opt_debug_sync_timeout > 0);
2359 DBUG_ASSERT(!debug_sync_set_action(current_thd,
2360 STRING_WITH_LEN(act)));
2361 };);
2362 master_res= NULL;
2363 master_row= NULL;
2364 DBUG_EXECUTE_IF("get_master_version.server_id.ER_NET_READ_INTERRUPTED",
2365 {
2366 DBUG_SET("+d,inject_ER_NET_READ_INTERRUPTED");
2367 DBUG_SET("-d,get_master_version.server_id."
2368 "ER_NET_READ_INTERRUPTED");
2369 });
2370 if (!mysql_real_query(mysql,
2371 STRING_WITH_LEN("SHOW VARIABLES LIKE 'SERVER_ID'")) &&
2372 (master_res= mysql_store_result(mysql)) &&
2373 (master_row= mysql_fetch_row(master_res)))
2374 {
2375 if ((::server_id == (mi->master_id= strtoul(master_row[1], 0, 10))) &&
2376 !mi->rli->replicate_same_server_id)
2377 {
2378 errmsg= "The slave I/O thread stops because master and slave have equal \
2379 MySQL server ids; these ids must be different for replication to work (or \
2380 the --replicate-same-server-id option must be used on slave but this does \
2381 not always make sense; please check the manual before using it).";
2382 err_code= ER_SLAVE_FATAL_ERROR;
2383 sprintf(err_buff, ER(err_code), errmsg);
2384 goto err;
2385 }
2386 }
2387 else if (mysql_errno(mysql))
2388 {
2389 if (check_io_slave_killed(mi->info_thd, mi, NULL))
2390 goto slave_killed_err;
2391 else if (is_network_error(mysql_errno(mysql)))
2392 {
2393 mi->report(WARNING_LEVEL, mysql_errno(mysql),
2394 "Get master SERVER_ID failed with error: %s", mysql_error(mysql));
2395 goto network_err;
2396 }
2397 /* Fatal error */
2398 errmsg= "The slave I/O thread stops because a fatal error is encountered \
2399 when it try to get the value of SERVER_ID variable from master.";
2400 err_code= mysql_errno(mysql);
2401 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2402 goto err;
2403 }
2404 else if (!master_row && master_res)
2405 {
2406 mi->report(WARNING_LEVEL, ER_UNKNOWN_SYSTEM_VARIABLE,
2407 "Unknown system variable 'SERVER_ID' on master, \
2408 maybe it is a *VERY OLD MASTER*.");
2409 }
2410 if (master_res)
2411 {
2412 mysql_free_result(master_res);
2413 master_res= NULL;
2414 }
2415 if (mi->master_id == 0 && mi->ignore_server_ids->dynamic_ids.elements > 0)
2416 {
2417 errmsg= "Slave configured with server id filtering could not detect the master server id.";
2418 err_code= ER_SLAVE_FATAL_ERROR;
2419 sprintf(err_buff, ER(err_code), errmsg);
2420 goto err;
2421 }
2422
2423 /*
2424 Check that the master's global character_set_server and ours are the same.
2425 Not fatal if query fails (old master?).
2426 Note that we don't check for equality of global character_set_client and
2427 collation_connection (neither do we prevent their setting in
2428 set_var.cc). That's because from what I (Guilhem) have tested, the global
2429 values of these 2 are never used (new connections don't use them).
2430 We don't test equality of global collation_database either as it's is
2431 going to be deprecated (made read-only) in 4.1 very soon.
2432 The test is only relevant if master < 5.0.3 (we'll test only if it's older
2433 than the 5 branch; < 5.0.3 was alpha...), as >= 5.0.3 master stores
2434 charset info in each binlog event.
2435 We don't do it for 3.23 because masters <3.23.50 hang on
2436 SELECT @@unknown_var (BUG#7965 - see changelog of 3.23.50). So finally we
2437 test only if master is 4.x.
2438 */
2439
2440 /* redundant with rest of code but safer against later additions */
2441 if (*mysql->server_version == '3')
2442 goto err;
2443
2444 if (*mysql->server_version == '4')
2445 {
2446 master_res= NULL;
2447 if (!mysql_real_query(mysql,
2448 STRING_WITH_LEN("SELECT @@GLOBAL.COLLATION_SERVER")) &&
2449 (master_res= mysql_store_result(mysql)) &&
2450 (master_row= mysql_fetch_row(master_res)))
2451 {
2452 if (strcmp(master_row[0], global_system_variables.collation_server->name))
2453 {
2454 errmsg= "The slave I/O thread stops because master and slave have \
2455 different values for the COLLATION_SERVER global variable. The values must \
2456 be equal for the Statement-format replication to work";
2457 err_code= ER_SLAVE_FATAL_ERROR;
2458 sprintf(err_buff, ER(err_code), errmsg);
2459 goto err;
2460 }
2461 }
2462 else if (check_io_slave_killed(mi->info_thd, mi, NULL))
2463 goto slave_killed_err;
2464 else if (is_network_error(mysql_errno(mysql)))
2465 {
2466 mi->report(WARNING_LEVEL, mysql_errno(mysql),
2467 "Get master COLLATION_SERVER failed with error: %s", mysql_error(mysql));
2468 goto network_err;
2469 }
2470 else if (mysql_errno(mysql) != ER_UNKNOWN_SYSTEM_VARIABLE)
2471 {
2472 /* Fatal error */
2473 errmsg= "The slave I/O thread stops because a fatal error is encountered \
2474 when it try to get the value of COLLATION_SERVER global variable from master.";
2475 err_code= mysql_errno(mysql);
2476 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2477 goto err;
2478 }
2479 else
2480 mi->report(WARNING_LEVEL, ER_UNKNOWN_SYSTEM_VARIABLE,
2481 "Unknown system variable 'COLLATION_SERVER' on master, \
2482 maybe it is a *VERY OLD MASTER*. *NOTE*: slave may experience \
2483 inconsistency if replicated data deals with collation.");
2484
2485 if (master_res)
2486 {
2487 mysql_free_result(master_res);
2488 master_res= NULL;
2489 }
2490 }
2491
2492 /*
2493 Perform analogous check for time zone. Theoretically we also should
2494 perform check here to verify that SYSTEM time zones are the same on
2495 slave and master, but we can't rely on value of @@system_time_zone
2496 variable (it is time zone abbreviation) since it determined at start
2497 time and so could differ for slave and master even if they are really
2498 in the same system time zone. So we are omiting this check and just
2499 relying on documentation. Also according to Monty there are many users
2500 who are using replication between servers in various time zones. Hence
2501 such check will broke everything for them. (And now everything will
2502 work for them because by default both their master and slave will have
2503 'SYSTEM' time zone).
2504 This check is only necessary for 4.x masters (and < 5.0.4 masters but
2505 those were alpha).
2506 */
2507 if (*mysql->server_version == '4')
2508 {
2509 master_res= NULL;
2510 if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT @@GLOBAL.TIME_ZONE")) &&
2511 (master_res= mysql_store_result(mysql)) &&
2512 (master_row= mysql_fetch_row(master_res)))
2513 {
2514 if (strcmp(master_row[0],
2515 global_system_variables.time_zone->get_name()->ptr()))
2516 {
2517 errmsg= "The slave I/O thread stops because master and slave have \
2518 different values for the TIME_ZONE global variable. The values must \
2519 be equal for the Statement-format replication to work";
2520 err_code= ER_SLAVE_FATAL_ERROR;
2521 sprintf(err_buff, ER(err_code), errmsg);
2522 goto err;
2523 }
2524 }
2525 else if (check_io_slave_killed(mi->info_thd, mi, NULL))
2526 goto slave_killed_err;
2527 else if (is_network_error(mysql_errno(mysql)))
2528 {
2529 mi->report(WARNING_LEVEL, mysql_errno(mysql),
2530 "Get master TIME_ZONE failed with error: %s", mysql_error(mysql));
2531 goto network_err;
2532 }
2533 else
2534 {
2535 /* Fatal error */
2536 errmsg= "The slave I/O thread stops because a fatal error is encountered \
2537 when it try to get the value of TIME_ZONE global variable from master.";
2538 err_code= mysql_errno(mysql);
2539 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2540 goto err;
2541 }
2542 if (master_res)
2543 {
2544 mysql_free_result(master_res);
2545 master_res= NULL;
2546 }
2547 }
2548
2549 if (mi->heartbeat_period != 0.0)
2550 {
2551 char llbuf[22];
2552 const char query_format[]= "SET @master_heartbeat_period= %s";
2553 char query[sizeof(query_format) - 2 + sizeof(llbuf)];
2554 /*
2555 the period is an ulonglong of nano-secs.
2556 */
2557 llstr((ulonglong) (mi->heartbeat_period*1000000000UL), llbuf);
2558 sprintf(query, query_format, llbuf);
2559 DBUG_EXECUTE_IF("get_master_version.heartbeat.ER_NET_READ_INTERRUPTED",
2560 {
2561 DBUG_SET("+d,inject_ER_NET_READ_INTERRUPTED");
2562 DBUG_SET("-d,get_master_version.heartbeat."
2563 "ER_NET_READ_INTERRUPTED");
2564 });
2565 if (mysql_real_query(mysql, query, strlen(query)))
2566 {
2567 if (check_io_slave_killed(mi->info_thd, mi, NULL))
2568 goto slave_killed_err;
2569
2570 if (is_network_error(mysql_errno(mysql)))
2571 {
2572 mi->report(WARNING_LEVEL, mysql_errno(mysql),
2573 "SET @master_heartbeat_period to master failed with error: %s",
2574 mysql_error(mysql));
2575 mysql_free_result(mysql_store_result(mysql));
2576 goto network_err;
2577 }
2578 else
2579 {
2580 /* Fatal error */
2581 errmsg= "The slave I/O thread stops because a fatal error is encountered "
2582 " when it tries to SET @master_heartbeat_period on master.";
2583 err_code= ER_SLAVE_FATAL_ERROR;
2584 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2585 mysql_free_result(mysql_store_result(mysql));
2586 goto err;
2587 }
2588 }
2589 mysql_free_result(mysql_store_result(mysql));
2590 }
2591
2592 /*
2593 Querying if master is capable to checksum and notifying it about own
2594 CRC-awareness. The master's side instant value of @@global.binlog_checksum
2595 is stored in the dump thread's uservar area as well as cached locally
2596 to become known in consensus by master and slave.
2597 */
2598 if (DBUG_EVALUATE_IF("simulate_slave_unaware_checksum", 0, 1))
2599 {
2600 int rc;
2601 const char query[]= "SET @master_binlog_checksum= @@global.binlog_checksum";
2602 master_res= NULL;
2603 mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF; //initially undefined
2604 /*
2605 @c checksum_alg_before_fd is queried from master in this block.
2606 If master is old checksum-unaware the value stays undefined.
2607 Once the first FD will be received its alg descriptor will replace
2608 the being queried one.
2609 */
2610 rc= mysql_real_query(mysql, query, strlen(query));
2611 if (rc != 0)
2612 {
2613 mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_OFF;
2614 if (check_io_slave_killed(mi->info_thd, mi, NULL))
2615 goto slave_killed_err;
2616
2617 if (mysql_errno(mysql) == ER_UNKNOWN_SYSTEM_VARIABLE)
2618 {
2619 // this is tolerable as OM -> NS is supported
2620 mi->report(WARNING_LEVEL, mysql_errno(mysql),
2621 "Notifying master by %s failed with "
2622 "error: %s", query, mysql_error(mysql));
2623 }
2624 else
2625 {
2626 if (is_network_error(mysql_errno(mysql)))
2627 {
2628 mi->report(WARNING_LEVEL, mysql_errno(mysql),
2629 "Notifying master by %s failed with "
2630 "error: %s", query, mysql_error(mysql));
2631 mysql_free_result(mysql_store_result(mysql));
2632 goto network_err;
2633 }
2634 else
2635 {
2636 errmsg= "The slave I/O thread stops because a fatal error is encountered "
2637 "when it tried to SET @master_binlog_checksum on master.";
2638 err_code= ER_SLAVE_FATAL_ERROR;
2639 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2640 mysql_free_result(mysql_store_result(mysql));
2641 goto err;
2642 }
2643 }
2644 }
2645 else
2646 {
2647 mysql_free_result(mysql_store_result(mysql));
2648 if (!mysql_real_query(mysql,
2649 STRING_WITH_LEN("SELECT @master_binlog_checksum")) &&
2650 (master_res= mysql_store_result(mysql)) &&
2651 (master_row= mysql_fetch_row(master_res)) &&
2652 (master_row[0] != NULL))
2653 {
2654 mi->checksum_alg_before_fd= (uint8)
2655 find_type(master_row[0], &binlog_checksum_typelib, 1) - 1;
2656
2657 DBUG_EXECUTE_IF("undefined_algorithm_on_slave",
2658 mi->checksum_alg_before_fd = BINLOG_CHECKSUM_ALG_UNDEF;);
2659 if(mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_UNDEF)
2660 {
2661 errmsg= "The slave I/O thread was stopped because a fatal error is encountered "
2662 "The checksum algorithm used by master is unknown to slave.";
2663 err_code= ER_SLAVE_FATAL_ERROR;
2664 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2665 mysql_free_result(mysql_store_result(mysql));
2666 goto err;
2667 }
2668
2669 // valid outcome is either of
2670 DBUG_ASSERT(mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_OFF ||
2671 mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_CRC32);
2672 }
2673 else if (check_io_slave_killed(mi->info_thd, mi, NULL))
2674 goto slave_killed_err;
2675 else if (is_network_error(mysql_errno(mysql)))
2676 {
2677 mi->report(WARNING_LEVEL, mysql_errno(mysql),
2678 "Get master BINLOG_CHECKSUM failed with error: %s", mysql_error(mysql));
2679 goto network_err;
2680 }
2681 else
2682 {
2683 errmsg= "The slave I/O thread stops because a fatal error is encountered "
2684 "when it tried to SELECT @master_binlog_checksum.";
2685 err_code= ER_SLAVE_FATAL_ERROR;
2686 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2687 mysql_free_result(mysql_store_result(mysql));
2688 goto err;
2689 }
2690 }
2691 if (master_res)
2692 {
2693 mysql_free_result(master_res);
2694 master_res= NULL;
2695 }
2696 }
2697 else
2698 mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_OFF;
2699
2700 if (DBUG_EVALUATE_IF("simulate_slave_unaware_gtid", 0, 1))
2701 {
2702 switch (io_thread_init_command(mi, "SELECT @@GLOBAL.GTID_MODE",
2703 ER_UNKNOWN_SYSTEM_VARIABLE,
2704 &master_res, &master_row))
2705 {
2706 case COMMAND_STATUS_ERROR:
2707 DBUG_RETURN(2);
2708 case COMMAND_STATUS_ALLOWED_ERROR:
2709 // master is old and does not have @@GLOBAL.GTID_MODE
2710 mi->master_gtid_mode= 0;
2711 break;
2712 case COMMAND_STATUS_OK:
2713 const char *master_gtid_mode_string= master_row[0];
2714 bool found_valid_mode= false;
2715 DBUG_EXECUTE_IF("simulate_master_has_gtid_mode_on_permissive",
2716 { master_gtid_mode_string= "on_permissive"; });
2717 DBUG_EXECUTE_IF("simulate_master_has_gtid_mode_off_permissive",
2718 { master_gtid_mode_string= "off_permissive"; });
2719 DBUG_EXECUTE_IF("simulate_master_has_gtid_mode_on_something",
2720 { master_gtid_mode_string= "on_something"; });
2721 DBUG_EXECUTE_IF("simulate_master_has_gtid_mode_off_something",
2722 { master_gtid_mode_string= "off_something"; });
2723 DBUG_EXECUTE_IF("simulate_master_has_unknown_gtid_mode",
2724 { master_gtid_mode_string= "Krakel Spektakel"; });
2725 for (int mode= 0; mode <= 3 && !found_valid_mode; mode+= 3)
2726 {
2727 switch (is_str_prefix_case(gtid_mode_typelib.type_names[mode],
2728 master_gtid_mode_string))
2729 {
2730 case 0: // is not a prefix
2731 break;
2732 case 1: // is a true prefix, i.e. not equal
2733 mi->report(WARNING_LEVEL, ER_UNKNOWN_ERROR,
2734 "The master uses an unknown GTID_MODE '%s'. "
2735 "Treating it as '%s'.",
2736 master_gtid_mode_string,
2737 gtid_mode_typelib.type_names[mode]);
2738 // fall through
2739 case 2: // is equal
2740 found_valid_mode= true;
2741 mi->master_gtid_mode= mode;
2742 break;
2743 }
2744 }
2745 if (!found_valid_mode)
2746 {
2747 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
2748 "The slave IO thread stops because the master has "
2749 "an unknown @@GLOBAL.GTID_MODE '%s'.",
2750 master_gtid_mode_string);
2751 mysql_free_result(master_res);
2752 DBUG_RETURN(1);
2753 }
2754 mysql_free_result(master_res);
2755 break;
2756 }
2757 if ((mi->master_gtid_mode > gtid_mode + 1 ||
2758 gtid_mode > mi->master_gtid_mode + 1) &&
2759 !opt_gtid_deployment_step)
2760 {
2761 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
2762 "The slave IO thread stops because the master has "
2763 "@@GLOBAL.GTID_MODE %s and this server has "
2764 "@@GLOBAL.GTID_MODE %s",
2765 gtid_mode_names[mi->master_gtid_mode],
2766 gtid_mode_names[gtid_mode]);
2767 DBUG_RETURN(1);
2768 }
2769 if (mi->is_auto_position() && mi->master_gtid_mode != 3)
2770 {
2771 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
2772 "The slave IO thread stops because the master has "
2773 "@@GLOBAL.GTID_MODE %s and we are trying to connect "
2774 "using MASTER_AUTO_POSITION.",
2775 gtid_mode_names[mi->master_gtid_mode]);
2776 DBUG_RETURN(1);
2777 }
2778 }
2779
2780 err:
2781 if (errmsg)
2782 {
2783 if (master_res)
2784 mysql_free_result(master_res);
2785 DBUG_ASSERT(err_code != 0);
2786 mi->report(ERROR_LEVEL, err_code, "%s", err_buff);
2787 DBUG_RETURN(1);
2788 }
2789
2790 DBUG_RETURN(0);
2791
2792 network_err:
2793 if (master_res)
2794 mysql_free_result(master_res);
2795 DBUG_RETURN(2);
2796
2797 slave_killed_err:
2798 if (master_res)
2799 mysql_free_result(master_res);
2800 DBUG_RETURN(2);
2801 }
2802
wait_for_relay_log_space(Relay_log_info * rli)2803 static bool wait_for_relay_log_space(Relay_log_info* rli)
2804 {
2805 bool slave_killed=0;
2806 Master_info* mi = rli->mi;
2807 PSI_stage_info old_stage;
2808 THD* thd = mi->info_thd;
2809 DBUG_ENTER("wait_for_relay_log_space");
2810
2811 mysql_mutex_lock(&rli->log_space_lock);
2812 thd->ENTER_COND(&rli->log_space_cond,
2813 &rli->log_space_lock,
2814 &stage_waiting_for_relay_log_space,
2815 &old_stage);
2816 while (rli->log_space_limit < rli->log_space_total &&
2817 !(slave_killed=io_slave_killed(thd,mi)) &&
2818 !rli->ignore_log_space_limit)
2819 mysql_cond_wait(&rli->log_space_cond, &rli->log_space_lock);
2820
2821 /*
2822 Makes the IO thread read only one event at a time
2823 until the SQL thread is able to purge the relay
2824 logs, freeing some space.
2825
2826 Therefore, once the SQL thread processes this next
2827 event, it goes to sleep (no more events in the queue),
2828 sets ignore_log_space_limit=true and wakes the IO thread.
2829 However, this event may have been enough already for
2830 the SQL thread to purge some log files, freeing
2831 rli->log_space_total .
2832
2833 This guarantees that the SQL and IO thread move
2834 forward only one event at a time (to avoid deadlocks),
2835 when the relay space limit is reached. It also
2836 guarantees that when the SQL thread is prepared to
2837 rotate (to be able to purge some logs), the IO thread
2838 will know about it and will rotate.
2839
2840 NOTE: The ignore_log_space_limit is only set when the SQL
2841 thread sleeps waiting for events.
2842
2843 */
2844 if (rli->ignore_log_space_limit)
2845 {
2846 #ifndef DBUG_OFF
2847 {
2848 char llbuf1[22], llbuf2[22];
2849 DBUG_PRINT("info", ("log_space_limit=%s "
2850 "log_space_total=%s "
2851 "ignore_log_space_limit=%d "
2852 "sql_force_rotate_relay=%d",
2853 llstr(rli->log_space_limit,llbuf1),
2854 llstr(rli->log_space_total,llbuf2),
2855 (int) rli->ignore_log_space_limit,
2856 (int) rli->sql_force_rotate_relay));
2857 }
2858 #endif
2859 if (rli->sql_force_rotate_relay)
2860 {
2861 mysql_mutex_lock(&mi->data_lock);
2862 rotate_relay_log(mi, false/*need_log_space_lock=false*/);
2863 mysql_mutex_unlock(&mi->data_lock);
2864 rli->sql_force_rotate_relay= false;
2865 }
2866
2867 rli->ignore_log_space_limit= false;
2868 }
2869
2870 thd->EXIT_COND(&old_stage);
2871 DBUG_RETURN(slave_killed);
2872 }
2873
2874
2875 /*
2876 Builds a Rotate from the ignored events' info and writes it to relay log.
2877
2878 The caller must hold mi->data_lock before invoking this function.
2879
2880 @param thd pointer to I/O Thread's Thd.
2881 @param mi point to I/O Thread metadata class.
2882
2883 @return 0 if everything went fine, 1 otherwise.
2884 */
write_ignored_events_info_to_relay_log(THD * thd,Master_info * mi)2885 static int write_ignored_events_info_to_relay_log(THD *thd, Master_info *mi)
2886 {
2887 Relay_log_info *rli= mi->rli;
2888 mysql_mutex_t *log_lock= rli->relay_log.get_log_lock();
2889 int error= 0;
2890 DBUG_ENTER("write_ignored_events_info_to_relay_log");
2891
2892 DBUG_ASSERT(thd == mi->info_thd);
2893 mysql_mutex_assert_owner(&mi->data_lock);
2894 mysql_mutex_lock(log_lock);
2895 if (rli->ign_master_log_name_end[0])
2896 {
2897 DBUG_PRINT("info",("writing a Rotate event to track down ignored events"));
2898 Rotate_log_event *ev= new Rotate_log_event(rli->ign_master_log_name_end,
2899 0, rli->ign_master_log_pos_end,
2900 Rotate_log_event::DUP_NAME);
2901 if (mi->get_mi_description_event() != NULL)
2902 ev->checksum_alg= mi->get_mi_description_event()->checksum_alg;
2903
2904 rli->ign_master_log_name_end[0]= 0;
2905 /* can unlock before writing as slave SQL thd will soon see our Rotate */
2906 mysql_mutex_unlock(log_lock);
2907 if (likely((bool)ev))
2908 {
2909 ev->server_id= 0; // don't be ignored by slave SQL thread
2910 if (unlikely(rli->relay_log.append_event(ev, mi) != 0))
2911 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
2912 ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
2913 "failed to write a Rotate event"
2914 " to the relay log, SHOW SLAVE STATUS may be"
2915 " inaccurate");
2916 rli->relay_log.harvest_bytes_written(rli, true/*need_log_space_lock=true*/);
2917 if (flush_master_info(mi, TRUE))
2918 {
2919 error= 1;
2920 sql_print_error("Failed to flush master info file.");
2921 }
2922 delete ev;
2923 }
2924 else
2925 {
2926 error= 1;
2927 mi->report(ERROR_LEVEL, ER_SLAVE_CREATE_EVENT_FAILURE,
2928 ER(ER_SLAVE_CREATE_EVENT_FAILURE),
2929 "Rotate_event (out of memory?),"
2930 " SHOW SLAVE STATUS may be inaccurate");
2931 }
2932 }
2933 else
2934 mysql_mutex_unlock(log_lock);
2935
2936 DBUG_RETURN(error);
2937 }
2938
2939
register_slave_on_master(MYSQL * mysql,Master_info * mi,bool * suppress_warnings)2940 int register_slave_on_master(MYSQL* mysql, Master_info *mi,
2941 bool *suppress_warnings)
2942 {
2943 uchar buf[1024], *pos= buf;
2944 uint report_host_len=0, report_user_len=0, report_password_len=0;
2945 DBUG_ENTER("register_slave_on_master");
2946
2947 *suppress_warnings= FALSE;
2948 if (report_host)
2949 report_host_len= strlen(report_host);
2950 if (report_host_len > HOSTNAME_LENGTH)
2951 {
2952 sql_print_warning("The length of report_host is %d. "
2953 "It is larger than the max length(%d), so this "
2954 "slave cannot be registered to the master.",
2955 report_host_len, HOSTNAME_LENGTH);
2956 DBUG_RETURN(0);
2957 }
2958
2959 if (report_user)
2960 report_user_len= strlen(report_user);
2961 if (report_user_len > USERNAME_LENGTH)
2962 {
2963 sql_print_warning("The length of report_user is %d. "
2964 "It is larger than the max length(%d), so this "
2965 "slave cannot be registered to the master.",
2966 report_user_len, USERNAME_LENGTH);
2967 DBUG_RETURN(0);
2968 }
2969
2970 if (report_password)
2971 report_password_len= strlen(report_password);
2972 if (report_password_len > MAX_PASSWORD_LENGTH)
2973 {
2974 sql_print_warning("The length of report_password is %d. "
2975 "It is larger than the max length(%d), so this "
2976 "slave cannot be registered to the master.",
2977 report_password_len, MAX_PASSWORD_LENGTH);
2978 DBUG_RETURN(0);
2979 }
2980
2981 int4store(pos, server_id); pos+= 4;
2982 pos= net_store_data(pos, (uchar*) report_host, report_host_len);
2983 pos= net_store_data(pos, (uchar*) report_user, report_user_len);
2984 pos= net_store_data(pos, (uchar*) report_password, report_password_len);
2985 int2store(pos, (uint16) report_port); pos+= 2;
2986 /*
2987 Fake rpl_recovery_rank, which was removed in BUG#13963,
2988 so that this server can register itself on old servers,
2989 see BUG#49259.
2990 */
2991 int4store(pos, /* rpl_recovery_rank */ 0); pos+= 4;
2992 /* The master will fill in master_id */
2993 int4store(pos, 0); pos+= 4;
2994
2995 if (simple_command(mysql, COM_REGISTER_SLAVE, buf, (size_t) (pos- buf), 0))
2996 {
2997 if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
2998 {
2999 *suppress_warnings= TRUE; // Suppress reconnect warning
3000 }
3001 else if (!check_io_slave_killed(mi->info_thd, mi, NULL))
3002 {
3003 char buf[256];
3004 my_snprintf(buf, sizeof(buf), "%s (Errno: %d)", mysql_error(mysql),
3005 mysql_errno(mysql));
3006 mi->report(ERROR_LEVEL, ER_SLAVE_MASTER_COM_FAILURE,
3007 ER(ER_SLAVE_MASTER_COM_FAILURE), "COM_REGISTER_SLAVE", buf);
3008 }
3009 DBUG_RETURN(1);
3010 }
3011
3012 DBUG_EXECUTE_IF("simulate_register_slave_killed", {
3013 mi->abort_slave = 1;
3014 DBUG_RETURN(1);
3015 };);
3016
3017 DBUG_RETURN(0);
3018 }
3019
3020
3021 /**
3022 Execute a SHOW SLAVE STATUS statement.
3023
3024 @param thd Pointer to THD object for the client thread executing the
3025 statement.
3026
3027 @param mi Pointer to Master_info object for the IO thread.
3028
3029 @retval FALSE success
3030 @retval TRUE failure
3031 */
show_slave_status(THD * thd,Master_info * mi)3032 bool show_slave_status(THD* thd, Master_info* mi)
3033 {
3034 // TODO: fix this for multi-master
3035 List<Item> field_list;
3036 Protocol *protocol= thd->protocol;
3037 char *slave_sql_running_state= NULL;
3038 char *sql_gtid_set_buffer= NULL, *io_gtid_set_buffer= NULL;
3039 int sql_gtid_set_size= 0, io_gtid_set_size= 0;
3040 DBUG_ENTER("show_slave_status");
3041
3042 if (mi != NULL)
3043 {
3044 global_sid_lock->wrlock();
3045 const Gtid_set* sql_gtid_set= gtid_state->get_logged_gtids();
3046 const Gtid_set* io_gtid_set= mi->rli->get_gtid_set();
3047 if ((sql_gtid_set_size= sql_gtid_set->to_string(&sql_gtid_set_buffer)) < 0 ||
3048 (io_gtid_set_size= io_gtid_set->to_string(&io_gtid_set_buffer)) < 0)
3049 {
3050 my_eof(thd);
3051 my_free(sql_gtid_set_buffer);
3052 my_free(io_gtid_set_buffer);
3053 global_sid_lock->unlock();
3054 DBUG_RETURN(true);
3055 }
3056 global_sid_lock->unlock();
3057 }
3058
3059 field_list.push_back(new Item_empty_string("Slave_IO_State",
3060 14));
3061 field_list.push_back(new Item_empty_string("Master_Host", mi != NULL ?
3062 sizeof(mi->host) : 0));
3063 field_list.push_back(new Item_empty_string("Master_User", mi != NULL ?
3064 mi->get_user_size() : 0));
3065 field_list.push_back(new Item_return_int("Master_Port", 7,
3066 MYSQL_TYPE_LONG));
3067 field_list.push_back(new Item_return_int("Connect_Retry", 10,
3068 MYSQL_TYPE_LONG));
3069 field_list.push_back(new Item_empty_string("Master_Log_File",
3070 FN_REFLEN));
3071 field_list.push_back(new Item_return_int("Read_Master_Log_Pos", 10,
3072 MYSQL_TYPE_LONGLONG));
3073 field_list.push_back(new Item_empty_string("Relay_Log_File",
3074 FN_REFLEN));
3075 field_list.push_back(new Item_return_int("Relay_Log_Pos", 10,
3076 MYSQL_TYPE_LONGLONG));
3077 field_list.push_back(new Item_empty_string("Relay_Master_Log_File",
3078 FN_REFLEN));
3079 field_list.push_back(new Item_empty_string("Slave_IO_Running", 3));
3080 field_list.push_back(new Item_empty_string("Slave_SQL_Running", 3));
3081 field_list.push_back(new Item_empty_string("Replicate_Do_DB", 20));
3082 field_list.push_back(new Item_empty_string("Replicate_Ignore_DB", 20));
3083 field_list.push_back(new Item_empty_string("Replicate_Do_Table", 20));
3084 field_list.push_back(new Item_empty_string("Replicate_Ignore_Table", 23));
3085 field_list.push_back(new Item_empty_string("Replicate_Wild_Do_Table", 24));
3086 field_list.push_back(new Item_empty_string("Replicate_Wild_Ignore_Table",
3087 28));
3088 field_list.push_back(new Item_return_int("Last_Errno", 4, MYSQL_TYPE_LONG));
3089 field_list.push_back(new Item_empty_string("Last_Error", 20));
3090 field_list.push_back(new Item_return_int("Skip_Counter", 10,
3091 MYSQL_TYPE_LONG));
3092 field_list.push_back(new Item_return_int("Exec_Master_Log_Pos", 10,
3093 MYSQL_TYPE_LONGLONG));
3094 field_list.push_back(new Item_return_int("Relay_Log_Space", 10,
3095 MYSQL_TYPE_LONGLONG));
3096 field_list.push_back(new Item_empty_string("Until_Condition", 6));
3097 field_list.push_back(new Item_empty_string("Until_Log_File", FN_REFLEN));
3098 field_list.push_back(new Item_return_int("Until_Log_Pos", 10,
3099 MYSQL_TYPE_LONGLONG));
3100 field_list.push_back(new Item_empty_string("Master_SSL_Allowed", 7));
3101 field_list.push_back(new Item_empty_string("Master_SSL_CA_File", mi != NULL ?
3102 sizeof(mi->ssl_ca) : 0));
3103 field_list.push_back(new Item_empty_string("Master_SSL_CA_Path", mi != NULL ?
3104 sizeof(mi->ssl_capath) : 0));
3105 field_list.push_back(new Item_empty_string("Master_SSL_Cert", mi != NULL ?
3106 sizeof(mi->ssl_cert) : 0));
3107 field_list.push_back(new Item_empty_string("Master_SSL_Cipher", mi != NULL ?
3108 sizeof(mi->ssl_cipher) : 0));
3109 field_list.push_back(new Item_empty_string("Master_SSL_Key", mi != NULL ?
3110 sizeof(mi->ssl_key) : 0));
3111 field_list.push_back(new Item_return_int("Seconds_Behind_Master", 10,
3112 MYSQL_TYPE_LONGLONG));
3113 field_list.push_back(new Item_empty_string("Master_SSL_Verify_Server_Cert",
3114 3));
3115 field_list.push_back(new Item_return_int("Last_IO_Errno", 4, MYSQL_TYPE_LONG));
3116 field_list.push_back(new Item_empty_string("Last_IO_Error", 20));
3117 field_list.push_back(new Item_return_int("Last_SQL_Errno", 4, MYSQL_TYPE_LONG));
3118 field_list.push_back(new Item_empty_string("Last_SQL_Error", 20));
3119 field_list.push_back(new Item_empty_string("Replicate_Ignore_Server_Ids",
3120 FN_REFLEN));
3121 field_list.push_back(new Item_return_int("Master_Server_Id", sizeof(ulong),
3122 MYSQL_TYPE_LONG));
3123 field_list.push_back(new Item_empty_string("Master_UUID", UUID_LENGTH));
3124 field_list.push_back(new Item_empty_string("Master_Info_File",
3125 2 * FN_REFLEN));
3126 field_list.push_back(new Item_return_int("SQL_Delay", 10, MYSQL_TYPE_LONG));
3127 field_list.push_back(new Item_return_int("SQL_Remaining_Delay", 8, MYSQL_TYPE_LONG));
3128 field_list.push_back(new Item_empty_string("Slave_SQL_Running_State", 20));
3129 field_list.push_back(new Item_return_int("Master_Retry_Count", 10,
3130 MYSQL_TYPE_LONGLONG));
3131 field_list.push_back(new Item_empty_string("Master_Bind", mi != NULL ?
3132 sizeof(mi->bind_addr) : 0));
3133 field_list.push_back(new Item_empty_string("Last_IO_Error_Timestamp", 20));
3134 field_list.push_back(new Item_empty_string("Last_SQL_Error_Timestamp", 20));
3135 field_list.push_back(new Item_empty_string("Master_SSL_Crl", mi != NULL ?
3136 sizeof(mi->ssl_crl) : 0));
3137 field_list.push_back(new Item_empty_string("Master_SSL_Crlpath", mi != NULL ?
3138 sizeof(mi->ssl_crlpath) : 0));
3139 field_list.push_back(new Item_empty_string("Retrieved_Gtid_Set",
3140 io_gtid_set_size));
3141 field_list.push_back(new Item_empty_string("Executed_Gtid_Set",
3142 sql_gtid_set_size));
3143 field_list.push_back(new Item_return_int("Auto_Position", sizeof(ulong),
3144 MYSQL_TYPE_LONG));
3145
3146 if (protocol->send_result_set_metadata(&field_list,
3147 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
3148 {
3149 my_free(sql_gtid_set_buffer);
3150 my_free(io_gtid_set_buffer);
3151 DBUG_RETURN(true);
3152 }
3153
3154 if (mi != NULL && mi->host[0])
3155 {
3156 bool do_lock=SQLCOM_SHOW_SLAVE_NOLOCK_STAT != thd->lex->sql_command;
3157 DBUG_PRINT("info",("host is set: '%s'", mi->host));
3158 String *packet= &thd->packet;
3159 protocol->prepare_for_resend();
3160
3161 /*
3162 slave_running can be accessed without run_lock but not other
3163 non-volotile members like mi->info_thd, which is guarded by the mutex.
3164 */
3165 if (do_lock)
3166 {
3167 mysql_mutex_lock(&mi->run_lock);
3168 }
3169 protocol->store(mi->info_thd ? mi->info_thd->get_proc_info() : "",
3170 &my_charset_bin);
3171 if (do_lock)
3172 {
3173 mysql_mutex_unlock(&mi->run_lock);
3174 }
3175
3176 mysql_mutex_lock(&mi->rli->run_lock);
3177 slave_sql_running_state= const_cast<char *>(mi->rli->info_thd ? mi->rli->info_thd->get_proc_info() : "");
3178 mysql_mutex_unlock(&mi->rli->run_lock);
3179
3180 mysql_mutex_lock(&mi->data_lock);
3181 mysql_mutex_lock(&mi->rli->data_lock);
3182 mysql_mutex_lock(&mi->err_lock);
3183 mysql_mutex_lock(&mi->rli->err_lock);
3184
3185 DEBUG_SYNC(thd, "wait_after_lock_active_mi_and_rli_data_lock_is_acquired");
3186 protocol->store(mi->host, &my_charset_bin);
3187 protocol->store(mi->get_user(), &my_charset_bin);
3188 protocol->store((uint32) mi->port);
3189 protocol->store((uint32) mi->connect_retry);
3190 const char * const master_log_file=
3191 mi->get_master_log_name();
3192 protocol->store(master_log_file, &my_charset_bin);
3193 protocol->store((ulonglong) mi->get_master_log_pos());
3194 protocol->store(mi->rli->get_group_relay_log_name() +
3195 dirname_length(mi->rli->get_group_relay_log_name()),
3196 &my_charset_bin);
3197 protocol->store((ulonglong) mi->rli->get_group_relay_log_pos());
3198 const char * const relay_master_log_file=
3199 mi->rli->get_group_master_log_name();
3200 #ifndef DBUG_OFF
3201 const size_t master_log_file_len= strlen(master_log_file);
3202 const size_t relay_master_log_file_len= strlen(relay_master_log_file);
3203 #endif
3204 DBUG_ASSERT((relay_master_log_file_len == master_log_file_len)
3205 || !relay_master_log_file_len || !master_log_file_len);
3206 protocol->store(relay_master_log_file, &my_charset_bin);
3207 protocol->store(mi->slave_running == MYSQL_SLAVE_RUN_CONNECT ?
3208 "Yes" : (mi->slave_running == MYSQL_SLAVE_RUN_NOT_CONNECT ?
3209 "Connecting" : "No"), &my_charset_bin);
3210 protocol->store(mi->rli->slave_running ? "Yes":"No", &my_charset_bin);
3211 protocol->store(rpl_filter->get_do_db());
3212 protocol->store(rpl_filter->get_ignore_db());
3213
3214 char buf[256];
3215 String tmp(buf, sizeof(buf), &my_charset_bin);
3216 rpl_filter->get_do_table(&tmp);
3217 protocol->store(&tmp);
3218 rpl_filter->get_ignore_table(&tmp);
3219 protocol->store(&tmp);
3220 rpl_filter->get_wild_do_table(&tmp);
3221 protocol->store(&tmp);
3222 rpl_filter->get_wild_ignore_table(&tmp);
3223 protocol->store(&tmp);
3224
3225 protocol->store(mi->rli->last_error().number);
3226 protocol->store(mi->rli->last_error().message, &my_charset_bin);
3227 protocol->store((uint32) mi->rli->slave_skip_counter);
3228 protocol->store((ulonglong) mi->rli->get_group_master_log_pos());
3229 protocol->store((ulonglong) mi->rli->log_space_total);
3230
3231 const char *until_type= "";
3232
3233 switch (mi->rli->until_condition)
3234 {
3235 case Relay_log_info::UNTIL_NONE:
3236 until_type= "None";
3237 break;
3238 case Relay_log_info::UNTIL_MASTER_POS:
3239 until_type= "Master";
3240 break;
3241 case Relay_log_info::UNTIL_RELAY_POS:
3242 until_type= "Relay";
3243 break;
3244 case Relay_log_info::UNTIL_SQL_BEFORE_GTIDS:
3245 until_type= "SQL_BEFORE_GTIDS";
3246 break;
3247 case Relay_log_info::UNTIL_SQL_AFTER_GTIDS:
3248 until_type= "SQL_AFTER_GTIDS";
3249 break;
3250 case Relay_log_info::UNTIL_SQL_AFTER_MTS_GAPS:
3251 until_type= "SQL_AFTER_MTS_GAPS";
3252 break;
3253 case Relay_log_info::UNTIL_DONE:
3254 until_type= "DONE";
3255 break;
3256 default:
3257 DBUG_ASSERT(0);
3258 }
3259 protocol->store(until_type, &my_charset_bin);
3260 protocol->store(mi->rli->until_log_name, &my_charset_bin);
3261 protocol->store((ulonglong) mi->rli->until_log_pos);
3262
3263 #ifdef HAVE_OPENSSL
3264 protocol->store(mi->ssl? "Yes":"No", &my_charset_bin);
3265 #else
3266 protocol->store(mi->ssl? "Ignored":"No", &my_charset_bin);
3267 #endif
3268 protocol->store(mi->ssl_ca, &my_charset_bin);
3269 protocol->store(mi->ssl_capath, &my_charset_bin);
3270 protocol->store(mi->ssl_cert, &my_charset_bin);
3271 protocol->store(mi->ssl_cipher, &my_charset_bin);
3272 protocol->store(mi->ssl_key, &my_charset_bin);
3273
3274 /*
3275 The pseudo code to compute Seconds_Behind_Master:
3276 if (SQL thread is running)
3277 {
3278 if (SQL thread processed all the available relay log)
3279 {
3280 if (IO thread is running)
3281 print 0;
3282 else
3283 print NULL;
3284 }
3285 else
3286 compute Seconds_Behind_Master;
3287 }
3288 else
3289 print NULL;
3290 */
3291 if (mi->rli->slave_running)
3292 {
3293 /* Check if SQL thread is at the end of relay log
3294 Checking should be done using two conditions
3295 condition1: compare the log positions and
3296 condition2: compare the file names (to handle rotation case)
3297 */
3298 if ((mi->get_master_log_pos() == mi->rli->get_group_master_log_pos()) &&
3299 (!strcmp(mi->get_master_log_name(), mi->rli->get_group_master_log_name())))
3300 {
3301 if (mi->slave_running == MYSQL_SLAVE_RUN_CONNECT)
3302 protocol->store(0LL);
3303 else
3304 protocol->store_null();
3305 }
3306 else
3307 {
3308 long time_diff= ((long)(time(0) - mi->rli->last_master_timestamp)
3309 - mi->clock_diff_with_master);
3310 /*
3311 Apparently on some systems time_diff can be <0. Here are possible
3312 reasons related to MySQL:
3313 - the master is itself a slave of another master whose time is ahead.
3314 - somebody used an explicit SET TIMESTAMP on the master.
3315 Possible reason related to granularity-to-second of time functions
3316 (nothing to do with MySQL), which can explain a value of -1:
3317 assume the master's and slave's time are perfectly synchronized, and
3318 that at slave's connection time, when the master's timestamp is read,
3319 it is at the very end of second 1, and (a very short time later) when
3320 the slave's timestamp is read it is at the very beginning of second
3321 2. Then the recorded value for master is 1 and the recorded value for
3322 slave is 2. At SHOW SLAVE STATUS time, assume that the difference
3323 between timestamp of slave and rli->last_master_timestamp is 0
3324 (i.e. they are in the same second), then we get 0-(2-1)=-1 as a result.
3325 This confuses users, so we don't go below 0: hence the max().
3326
3327 last_master_timestamp == 0 (an "impossible" timestamp 1970) is a
3328 special marker to say "consider we have caught up".
3329 */
3330 protocol->store((longlong)(mi->rli->last_master_timestamp ?
3331 max(0L, time_diff) : 0));
3332 }
3333 }
3334 else
3335 {
3336 protocol->store_null();
3337 }
3338 protocol->store(mi->ssl_verify_server_cert? "Yes":"No", &my_charset_bin);
3339
3340 // Last_IO_Errno
3341 protocol->store(mi->last_error().number);
3342 // Last_IO_Error
3343 protocol->store(mi->last_error().message, &my_charset_bin);
3344 // Last_SQL_Errno
3345 protocol->store(mi->rli->last_error().number);
3346 // Last_SQL_Error
3347 protocol->store(mi->rli->last_error().message, &my_charset_bin);
3348 // Replicate_Ignore_Server_Ids
3349 {
3350 char buff[FN_REFLEN];
3351 ulong i, cur_len;
3352 for (i= 0, buff[0]= 0, cur_len= 0;
3353 i < mi->ignore_server_ids->dynamic_ids.elements; i++)
3354 {
3355 ulong s_id, slen;
3356 char sbuff[FN_REFLEN];
3357 get_dynamic(&(mi->ignore_server_ids->dynamic_ids), (uchar*) &s_id, i);
3358 slen= sprintf(sbuff, (i == 0 ? "%lu" : ", %lu"), s_id);
3359 if (cur_len + slen + 4 > FN_REFLEN)
3360 {
3361 /*
3362 break the loop whenever remained space could not fit
3363 ellipses on the next cycle
3364 */
3365 sprintf(buff + cur_len, "...");
3366 break;
3367 }
3368 cur_len += sprintf(buff + cur_len, "%s", sbuff);
3369 }
3370 protocol->store(buff, &my_charset_bin);
3371 }
3372 // Master_Server_id
3373 protocol->store((uint32) mi->master_id);
3374 protocol->store(mi->master_uuid, &my_charset_bin);
3375 // Master_Info_File
3376 protocol->store(mi->get_description_info(), &my_charset_bin);
3377 // SQL_Delay
3378 protocol->store((uint32) mi->rli->get_sql_delay());
3379 // SQL_Remaining_Delay
3380 if (slave_sql_running_state == stage_sql_thd_waiting_until_delay.m_name)
3381 {
3382 time_t t= my_time(0), sql_delay_end= mi->rli->get_sql_delay_end();
3383 protocol->store((uint32)(t < sql_delay_end ? sql_delay_end - t : 0));
3384 }
3385 else
3386 protocol->store_null();
3387 // Slave_SQL_Running_State
3388 protocol->store(slave_sql_running_state, &my_charset_bin);
3389 // Master_Retry_Count
3390 protocol->store((ulonglong) mi->retry_count);
3391 // Master_Bind
3392 protocol->store(mi->bind_addr, &my_charset_bin);
3393 // Last_IO_Error_Timestamp
3394 protocol->store(mi->last_error().timestamp, &my_charset_bin);
3395 // Last_SQL_Error_Timestamp
3396 protocol->store(mi->rli->last_error().timestamp, &my_charset_bin);
3397 // Master_Ssl_Crl
3398 protocol->store(mi->ssl_crl, &my_charset_bin);
3399 // Master_Ssl_Crlpath
3400 protocol->store(mi->ssl_crlpath, &my_charset_bin);
3401 // Retrieved_Gtid_Set
3402 protocol->store(io_gtid_set_buffer, &my_charset_bin);
3403 // Executed_Gtid_Set
3404 protocol->store(sql_gtid_set_buffer, &my_charset_bin);
3405 // Auto_Position
3406 protocol->store(mi->is_auto_position() ? 1 : 0);
3407
3408 mysql_mutex_unlock(&mi->rli->err_lock);
3409 mysql_mutex_unlock(&mi->err_lock);
3410 mysql_mutex_unlock(&mi->rli->data_lock);
3411 mysql_mutex_unlock(&mi->data_lock);
3412
3413 if (my_net_write(&thd->net, (uchar*) thd->packet.ptr(), packet->length()))
3414 {
3415 my_free(sql_gtid_set_buffer);
3416 my_free(io_gtid_set_buffer);
3417 DBUG_RETURN(true);
3418 }
3419 }
3420 my_eof(thd);
3421 my_free(sql_gtid_set_buffer);
3422 my_free(io_gtid_set_buffer);
3423 DBUG_RETURN(false);
3424 }
3425
3426
set_slave_thread_options(THD * thd)3427 void set_slave_thread_options(THD* thd)
3428 {
3429 DBUG_ENTER("set_slave_thread_options");
3430 /*
3431 It's nonsense to constrain the slave threads with max_join_size; if a
3432 query succeeded on master, we HAVE to execute it. So set
3433 OPTION_BIG_SELECTS. Setting max_join_size to HA_POS_ERROR is not enough
3434 (and it's not needed if we have OPTION_BIG_SELECTS) because an INSERT
3435 SELECT examining more than 4 billion rows would still fail (yes, because
3436 when max_join_size is 4G, OPTION_BIG_SELECTS is automatically set, but
3437 only for client threads.
3438 */
3439 ulonglong options= thd->variables.option_bits | OPTION_BIG_SELECTS;
3440 if (opt_log_slave_updates)
3441 options|= OPTION_BIN_LOG;
3442 else
3443 options&= ~OPTION_BIN_LOG;
3444 thd->variables.option_bits= options;
3445 thd->variables.completion_type= 0;
3446
3447 /*
3448 Set autocommit= 1 when info tables are used and autocommit == 0 to
3449 avoid trigger asserts on mysql_execute_command(THD *thd) caused by
3450 info tables updates which do not commit, like Rotate, Stop and
3451 skipped events handling.
3452 */
3453 if (is_autocommit_off_and_infotables(thd))
3454 {
3455 thd->variables.option_bits|= OPTION_AUTOCOMMIT;
3456 thd->variables.option_bits&= ~OPTION_NOT_AUTOCOMMIT;
3457 thd->server_status|= SERVER_STATUS_AUTOCOMMIT;
3458 }
3459
3460 DBUG_VOID_RETURN;
3461 }
3462
set_slave_thread_default_charset(THD * thd,Relay_log_info const * rli)3463 void set_slave_thread_default_charset(THD* thd, Relay_log_info const *rli)
3464 {
3465 DBUG_ENTER("set_slave_thread_default_charset");
3466
3467 thd->variables.character_set_client=
3468 global_system_variables.character_set_client;
3469 thd->variables.collation_connection=
3470 global_system_variables.collation_connection;
3471 thd->variables.collation_server=
3472 global_system_variables.collation_server;
3473 thd->update_charset();
3474
3475 /*
3476 We use a const cast here since the conceptual (and externally
3477 visible) behavior of the function is to set the default charset of
3478 the thread. That the cache has to be invalidated is a secondary
3479 effect.
3480 */
3481 const_cast<Relay_log_info*>(rli)->cached_charset_invalidate();
3482 DBUG_VOID_RETURN;
3483 }
3484
3485 /*
3486 init_slave_thread()
3487 */
3488
init_slave_thread(THD * thd,SLAVE_THD_TYPE thd_type)3489 static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
3490 {
3491 DBUG_ENTER("init_slave_thread");
3492 #if !defined(DBUG_OFF)
3493 int simulate_error= 0;
3494 #endif
3495 thd->system_thread= (thd_type == SLAVE_THD_WORKER) ?
3496 SYSTEM_THREAD_SLAVE_WORKER : (thd_type == SLAVE_THD_SQL) ?
3497 SYSTEM_THREAD_SLAVE_SQL : SYSTEM_THREAD_SLAVE_IO;
3498 thd->security_ctx->skip_grants();
3499 my_net_init(&thd->net, 0);
3500 thd->slave_thread = 1;
3501 thd->enable_slow_log= TRUE;
3502 set_slave_thread_options(thd);
3503 mysql_mutex_lock(&LOCK_thread_count);
3504 thd->thread_id= thd->variables.pseudo_thread_id= thread_id++;
3505 mysql_mutex_unlock(&LOCK_thread_count);
3506
3507 DBUG_EXECUTE_IF("simulate_io_slave_error_on_init",
3508 simulate_error|= (1 << SLAVE_THD_IO););
3509 DBUG_EXECUTE_IF("simulate_sql_slave_error_on_init",
3510 simulate_error|= (1 << SLAVE_THD_SQL););
3511 #if !defined(DBUG_OFF)
3512 if (init_thr_lock() || thd->store_globals() || simulate_error & (1<< thd_type))
3513 #else
3514 if (init_thr_lock() || thd->store_globals())
3515 #endif
3516 {
3517 DBUG_RETURN(-1);
3518 }
3519
3520 if (thd_type == SLAVE_THD_SQL)
3521 {
3522 THD_STAGE_INFO(thd, stage_waiting_for_the_next_event_in_relay_log);
3523 }
3524 else
3525 {
3526 THD_STAGE_INFO(thd, stage_waiting_for_master_update);
3527 }
3528 thd->set_time();
3529 /* Do not use user-supplied timeout value for system threads. */
3530 thd->variables.lock_wait_timeout= LONG_TIMEOUT;
3531 DBUG_RETURN(0);
3532 }
3533
3534
3535 /**
3536 Sleep for a given amount of time or until killed.
3537
3538 @param thd Thread context of the current thread.
3539 @param seconds The number of seconds to sleep.
3540 @param func Function object to check if the thread has been killed.
3541 @param info The Rpl_info object associated with this sleep.
3542
3543 @retval True if the thread has been killed, false otherwise.
3544 */
3545 template <typename killed_func, typename rpl_info>
slave_sleep(THD * thd,time_t seconds,killed_func func,rpl_info info)3546 static inline bool slave_sleep(THD *thd, time_t seconds,
3547 killed_func func, rpl_info info)
3548 {
3549 bool ret;
3550 struct timespec abstime;
3551 mysql_mutex_t *lock= &info->sleep_lock;
3552 mysql_cond_t *cond= &info->sleep_cond;
3553
3554 /* Absolute system time at which the sleep time expires. */
3555 set_timespec(abstime, seconds);
3556
3557 mysql_mutex_lock(lock);
3558 thd->ENTER_COND(cond, lock, NULL, NULL);
3559
3560 while (! (ret= func(thd, info)))
3561 {
3562 int error= mysql_cond_timedwait(cond, lock, &abstime);
3563 if (error == ETIMEDOUT || error == ETIME)
3564 break;
3565 }
3566
3567 /* Implicitly unlocks the mutex. */
3568 thd->EXIT_COND(NULL);
3569
3570 return ret;
3571 }
3572
request_dump(THD * thd,MYSQL * mysql,Master_info * mi,bool * suppress_warnings)3573 static int request_dump(THD *thd, MYSQL* mysql, Master_info* mi,
3574 bool *suppress_warnings)
3575 {
3576 DBUG_ENTER("request_dump");
3577
3578 const int BINLOG_NAME_INFO_SIZE= strlen(mi->get_master_log_name());
3579 int error= 1;
3580 size_t command_size= 0;
3581 enum_server_command command= mi->is_auto_position() ?
3582 COM_BINLOG_DUMP_GTID : COM_BINLOG_DUMP;
3583 uchar* command_buffer= NULL;
3584 ushort binlog_flags= 0;
3585
3586 if (RUN_HOOK(binlog_relay_io,
3587 before_request_transmit,
3588 (thd, mi, binlog_flags)))
3589 goto err;
3590
3591 *suppress_warnings= false;
3592 if (command == COM_BINLOG_DUMP_GTID)
3593 {
3594 // get set of GTIDs
3595 Sid_map sid_map(NULL/*no lock needed*/);
3596 Gtid_set gtid_executed(&sid_map);
3597 global_sid_lock->wrlock();
3598 gtid_state->dbug_print();
3599
3600 /*
3601 We are unsure whether I/O thread retrieved the last gtid transaction
3602 completely or not (before it is going down because of a crash/normal
3603 shutdown/normal stop slave io_thread). It is possible that I/O thread
3604 would have retrieved and written only partial transaction events. So We
3605 request Master to send the last gtid event once again. We do this by
3606 removing the last I/O thread retrieved gtid event from
3607 "Retrieved_gtid_set". Possible cases: 1) I/O thread would have
3608 retrieved full transaction already in the first time itself, but
3609 retrieving them again will not cause problem because GTID number is
3610 same, Hence SQL thread will not commit it again. 2) I/O thread would
3611 have retrieved full transaction already and SQL thread would have
3612 already executed it. In that case, We are not going remove last
3613 retrieved gtid from "Retrieved_gtid_set" otherwise we will see gaps in
3614 "Retrieved set". The same case is handled in the below code. Please
3615 note there will be paritial transactions written in relay log but they
3616 will not cause any problem incase of transactional tables. But incase
3617 of non-transaction tables, partial trx will create inconsistency
3618 between master and slave. In that case, users need to check manually.
3619 */
3620
3621 Gtid_set * retrieved_set= (const_cast<Gtid_set *>(mi->rli->get_gtid_set()));
3622 Gtid *last_retrieved_gtid= mi->rli->get_last_retrieved_gtid();
3623
3624 /*
3625 Remove last_retrieved_gtid only if it is not part of
3626 executed_gtid_set
3627 */
3628 if (!last_retrieved_gtid->empty() &&
3629 !gtid_state->get_logged_gtids()->contains_gtid(*last_retrieved_gtid))
3630 {
3631 if (retrieved_set->_remove_gtid(*last_retrieved_gtid) != RETURN_STATUS_OK)
3632 {
3633 global_sid_lock->unlock();
3634 goto err;
3635 }
3636 }
3637
3638 if (gtid_executed.add_gtid_set(mi->rli->get_gtid_set()) != RETURN_STATUS_OK ||
3639 gtid_executed.add_gtid_set(gtid_state->get_logged_gtids()) !=
3640 RETURN_STATUS_OK)
3641 {
3642 global_sid_lock->unlock();
3643 goto err;
3644 }
3645 global_sid_lock->unlock();
3646
3647 // allocate buffer
3648 size_t encoded_data_size= gtid_executed.get_encoded_length();
3649 size_t allocation_size=
3650 ::BINLOG_FLAGS_INFO_SIZE + ::BINLOG_SERVER_ID_INFO_SIZE +
3651 ::BINLOG_NAME_SIZE_INFO_SIZE + BINLOG_NAME_INFO_SIZE +
3652 ::BINLOG_POS_INFO_SIZE + ::BINLOG_DATA_SIZE_INFO_SIZE +
3653 encoded_data_size + 1;
3654 if (!(command_buffer= (uchar *) my_malloc(allocation_size, MYF(MY_WME))))
3655 goto err;
3656 uchar* ptr_buffer= command_buffer;
3657
3658 DBUG_PRINT("info", ("Do I know something about the master? (binary log's name %s - auto position %d).",
3659 mi->get_master_log_name(), mi->is_auto_position()));
3660 /*
3661 Note: binlog_flags is always 0. However, in versions up to 5.6
3662 RC, the master would check the lowest bit and do something
3663 unexpected if it was set; in early versions of 5.6 it would also
3664 use the two next bits. Therefore, for backward compatibility,
3665 if we ever start to use the flags, we should leave the three
3666 lowest bits unused.
3667 */
3668 int2store(ptr_buffer, binlog_flags);
3669 ptr_buffer+= ::BINLOG_FLAGS_INFO_SIZE;
3670 int4store(ptr_buffer, server_id);
3671 ptr_buffer+= ::BINLOG_SERVER_ID_INFO_SIZE;
3672 int4store(ptr_buffer, BINLOG_NAME_INFO_SIZE);
3673 ptr_buffer+= ::BINLOG_NAME_SIZE_INFO_SIZE;
3674 memset(ptr_buffer, 0, BINLOG_NAME_INFO_SIZE);
3675 ptr_buffer+= BINLOG_NAME_INFO_SIZE;
3676 int8store(ptr_buffer, 4LL);
3677 ptr_buffer+= ::BINLOG_POS_INFO_SIZE;
3678
3679 int4store(ptr_buffer, encoded_data_size);
3680 ptr_buffer+= ::BINLOG_DATA_SIZE_INFO_SIZE;
3681 gtid_executed.encode(ptr_buffer);
3682 ptr_buffer+= encoded_data_size;
3683
3684 command_size= ptr_buffer - command_buffer;
3685 DBUG_ASSERT(command_size == (allocation_size - 1));
3686 }
3687 else
3688 {
3689 size_t allocation_size= ::BINLOG_POS_OLD_INFO_SIZE +
3690 BINLOG_NAME_INFO_SIZE + ::BINLOG_FLAGS_INFO_SIZE +
3691 ::BINLOG_SERVER_ID_INFO_SIZE + 1;
3692 if (!(command_buffer= (uchar *) my_malloc(allocation_size, MYF(MY_WME))))
3693 goto err;
3694 uchar* ptr_buffer= command_buffer;
3695
3696 int4store(ptr_buffer, mi->get_master_log_pos());
3697 ptr_buffer+= ::BINLOG_POS_OLD_INFO_SIZE;
3698 // See comment regarding binlog_flags above.
3699 int2store(ptr_buffer, binlog_flags);
3700 ptr_buffer+= ::BINLOG_FLAGS_INFO_SIZE;
3701 int4store(ptr_buffer, server_id);
3702 ptr_buffer+= ::BINLOG_SERVER_ID_INFO_SIZE;
3703 memcpy(ptr_buffer, mi->get_master_log_name(), BINLOG_NAME_INFO_SIZE);
3704 ptr_buffer+= BINLOG_NAME_INFO_SIZE;
3705
3706 command_size= ptr_buffer - command_buffer;
3707 DBUG_ASSERT(command_size == (allocation_size - 1));
3708 }
3709
3710 if (simple_command(mysql, command, command_buffer, command_size, 1))
3711 {
3712 /*
3713 Something went wrong, so we will just reconnect and retry later
3714 in the future, we should do a better error analysis, but for
3715 now we just fill up the error log :-)
3716 */
3717 if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
3718 *suppress_warnings= true; // Suppress reconnect warning
3719 else
3720 sql_print_error("Error on %s: %d %s, will retry in %d secs",
3721 command_name[command].str,
3722 mysql_errno(mysql), mysql_error(mysql),
3723 mi->connect_retry);
3724 goto err;
3725 }
3726 error= 0;
3727
3728 err:
3729 my_free(command_buffer);
3730 DBUG_RETURN(error);
3731 }
3732
3733
3734 /*
3735 Read one event from the master
3736
3737 SYNOPSIS
3738 read_event()
3739 mysql MySQL connection
3740 mi Master connection information
3741 suppress_warnings TRUE when a normal net read timeout has caused us to
3742 try a reconnect. We do not want to print anything to
3743 the error log in this case because this a anormal
3744 event in an idle server.
3745
3746 RETURN VALUES
3747 'packet_error' Error
3748 number Length of packet
3749 */
3750
read_event(MYSQL * mysql,Master_info * mi,bool * suppress_warnings)3751 static ulong read_event(MYSQL* mysql, Master_info *mi, bool* suppress_warnings)
3752 {
3753 ulong len;
3754 DBUG_ENTER("read_event");
3755
3756 *suppress_warnings= FALSE;
3757 /*
3758 my_real_read() will time us out
3759 We check if we were told to die, and if not, try reading again
3760 */
3761 #ifndef DBUG_OFF
3762 if (disconnect_slave_event_count && !(mi->events_until_exit--))
3763 DBUG_RETURN(packet_error);
3764 #endif
3765
3766 len = cli_safe_read(mysql);
3767 if (len == packet_error || (long) len < 1)
3768 {
3769 if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
3770 {
3771 /*
3772 We are trying a normal reconnect after a read timeout;
3773 we suppress prints to .err file as long as the reconnect
3774 happens without problems
3775 */
3776 *suppress_warnings= TRUE;
3777 }
3778 else
3779 {
3780 if (!mi->abort_slave)
3781 {
3782 sql_print_error("Error reading packet from server: %s (server_errno=%d)",
3783 mysql_error(mysql), mysql_errno(mysql));
3784 }
3785 }
3786 DBUG_RETURN(packet_error);
3787 }
3788
3789 /* Check if eof packet */
3790 if (len < 8 && mysql->net.read_pos[0] == 254)
3791 {
3792 sql_print_information("Slave: received end packet from server due to dump "
3793 "thread being killed on master. Dump threads are "
3794 "killed for example during master shutdown, "
3795 "explicitly by a user, or when the master receives "
3796 "a binlog send request from a duplicate server "
3797 "UUID <%s> : Error %s", ::server_uuid,
3798 mysql_error(mysql));
3799 DBUG_RETURN(packet_error);
3800 }
3801
3802 DBUG_PRINT("exit", ("len: %lu net->read_pos[4]: %d",
3803 len, mysql->net.read_pos[4]));
3804 DBUG_RETURN(len - 1);
3805 }
3806
3807
3808 /**
3809 If this is a lagging slave (specified with CHANGE MASTER TO MASTER_DELAY = X), delays accordingly. Also unlocks rli->data_lock.
3810
3811 Design note: this is the place to unlock rli->data_lock. The lock
3812 must be held when reading delay info from rli, but it should not be
3813 held while sleeping.
3814
3815 @param ev Event that is about to be executed.
3816
3817 @param thd The sql thread's THD object.
3818
3819 @param rli The sql thread's Relay_log_info structure.
3820
3821 @retval 0 If the delay timed out and the event shall be executed.
3822
3823 @retval nonzero If the delay was interrupted and the event shall be skipped.
3824 */
sql_delay_event(Log_event * ev,THD * thd,Relay_log_info * rli)3825 static int sql_delay_event(Log_event *ev, THD *thd, Relay_log_info *rli)
3826 {
3827 long sql_delay= rli->get_sql_delay();
3828
3829 DBUG_ENTER("sql_delay_event");
3830 mysql_mutex_assert_owner(&rli->data_lock);
3831 DBUG_ASSERT(!rli->belongs_to_client());
3832
3833 int type= ev->get_type_code();
3834 if (sql_delay && type != ROTATE_EVENT &&
3835 type != FORMAT_DESCRIPTION_EVENT && type != START_EVENT_V3)
3836 {
3837 // The time when we should execute the event.
3838 time_t sql_delay_end=
3839 ev->when.tv_sec + rli->mi->clock_diff_with_master + sql_delay;
3840 // The current time.
3841 time_t now= my_time(0);
3842 // The time we will have to sleep before executing the event.
3843 unsigned long nap_time= 0;
3844 if (sql_delay_end > now)
3845 nap_time= sql_delay_end - now;
3846
3847 DBUG_PRINT("info", ("sql_delay= %lu "
3848 "ev->when= %lu "
3849 "rli->mi->clock_diff_with_master= %lu "
3850 "now= %ld "
3851 "sql_delay_end= %ld "
3852 "nap_time= %ld",
3853 sql_delay, (long) ev->when.tv_sec,
3854 rli->mi->clock_diff_with_master,
3855 (long)now, (long)sql_delay_end, (long)nap_time));
3856
3857 if (sql_delay_end > now)
3858 {
3859 DBUG_PRINT("info", ("delaying replication event %lu secs",
3860 nap_time));
3861 rli->start_sql_delay(sql_delay_end);
3862 mysql_mutex_unlock(&rli->data_lock);
3863 DBUG_RETURN(slave_sleep(thd, nap_time, sql_slave_killed, rli));
3864 }
3865 }
3866
3867 mysql_mutex_unlock(&rli->data_lock);
3868
3869 DBUG_RETURN(0);
3870 }
3871
3872 /**
3873 a sort_dynamic function on ulong type
3874 returns as specified by @c qsort_cmp
3875 */
ulong_cmp(ulong * id1,ulong * id2)3876 int ulong_cmp(ulong *id1, ulong *id2)
3877 {
3878 return *id1 < *id2? -1 : (*id1 > *id2? 1 : 0);
3879 }
3880
3881 /**
3882 Applies the given event and advances the relay log position.
3883
3884 This is needed by the sql thread to execute events from the binlog,
3885 and by clients executing BINLOG statements. Conceptually, this
3886 function does:
3887
3888 @code
3889 ev->apply_event(rli);
3890 ev->update_pos(rli);
3891 @endcode
3892
3893 It also does the following maintainance:
3894
3895 - Initializes the thread's server_id and time; and the event's
3896 thread.
3897
3898 - If !rli->belongs_to_client() (i.e., if it belongs to the slave
3899 sql thread instead of being used for executing BINLOG
3900 statements), it does the following things: (1) skips events if it
3901 is needed according to the server id or slave_skip_counter; (2)
3902 unlocks rli->data_lock; (3) sleeps if required by 'CHANGE MASTER
3903 TO MASTER_DELAY=X'; (4) maintains the running state of the sql
3904 thread (rli->thread_state).
3905
3906 - Reports errors as needed.
3907
3908 @param ptr_ev a pointer to a reference to the event to apply.
3909
3910 @param thd The client thread that executes the event (i.e., the
3911 slave sql thread if called from a replication slave, or the client
3912 thread if called to execute a BINLOG statement).
3913
3914 @param rli The relay log info (i.e., the slave's rli if called from
3915 a replication slave, or the client's thd->rli_fake if called to
3916 execute a BINLOG statement).
3917
3918 @note MTS can store NULL to @c ptr_ev location to indicate
3919 the event is taken over by a Worker.
3920
3921 @retval SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK
3922 OK.
3923
3924 @retval SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPLY_ERROR
3925 Error calling ev->apply_event().
3926
3927 @retval SLAVE_APPLY_EVENT_AND_UPDATE_POS_UPDATE_POS_ERROR
3928 No error calling ev->apply_event(), but error calling
3929 ev->update_pos().
3930
3931 @retval SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPEND_JOB_ERROR
3932 append_item_to_jobs() failed, thread was killed while waiting
3933 for successful enqueue on worker.
3934 */
3935 enum enum_slave_apply_event_and_update_pos_retval
apply_event_and_update_pos(Log_event ** ptr_ev,THD * thd,Relay_log_info * rli)3936 apply_event_and_update_pos(Log_event** ptr_ev, THD* thd, Relay_log_info* rli)
3937 {
3938 int exec_res= 0;
3939 bool skip_event= FALSE;
3940 Log_event *ev= *ptr_ev;
3941 Log_event::enum_skip_reason reason= Log_event::EVENT_SKIP_NOT;
3942
3943 DBUG_ENTER("apply_event_and_update_pos");
3944
3945 DBUG_PRINT("exec_event",("%s(type_code: %d; server_id: %d)",
3946 ev->get_type_str(), ev->get_type_code(),
3947 ev->server_id));
3948 DBUG_PRINT("info", ("thd->options: %s%s; rli->last_event_start_time: %lu",
3949 FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
3950 FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
3951 (ulong) rli->last_event_start_time));
3952
3953 /*
3954 Execute the event to change the database and update the binary
3955 log coordinates, but first we set some data that is needed for
3956 the thread.
3957
3958 The event will be executed unless it is supposed to be skipped.
3959
3960 Queries originating from this server must be skipped. Low-level
3961 events (Format_description_log_event, Rotate_log_event,
3962 Stop_log_event) from this server must also be skipped. But for
3963 those we don't want to modify 'group_master_log_pos', because
3964 these events did not exist on the master.
3965 Format_description_log_event is not completely skipped.
3966
3967 Skip queries specified by the user in 'slave_skip_counter'. We
3968 can't however skip events that has something to do with the log
3969 files themselves.
3970
3971 Filtering on own server id is extremely important, to ignore
3972 execution of events created by the creation/rotation of the relay
3973 log (remember that now the relay log starts with its Format_desc,
3974 has a Rotate etc).
3975 */
3976 /*
3977 Set the unmasked and actual server ids from the event
3978 */
3979 thd->server_id = ev->server_id; // use the original server id for logging
3980 thd->unmasked_server_id = ev->unmasked_server_id;
3981 thd->set_time(); // time the query
3982 thd->lex->current_select= 0;
3983 if (!ev->when.tv_sec)
3984 my_micro_time_to_timeval(my_micro_time(), &ev->when);
3985 ev->thd = thd; // because up to this point, ev->thd == 0
3986
3987 if (!(rli->is_mts_recovery() && bitmap_is_set(&rli->recovery_groups,
3988 rli->mts_recovery_index)))
3989 {
3990 reason= ev->shall_skip(rli);
3991 }
3992 #ifndef DBUG_OFF
3993 if (rli->is_mts_recovery())
3994 {
3995 DBUG_PRINT("mts", ("Mts is recovering %d, number of bits set %d, "
3996 "bitmap is set %d, index %lu.\n",
3997 rli->is_mts_recovery(),
3998 bitmap_bits_set(&rli->recovery_groups),
3999 bitmap_is_set(&rli->recovery_groups,
4000 rli->mts_recovery_index),
4001 rli->mts_recovery_index));
4002 }
4003 #endif
4004 if (reason == Log_event::EVENT_SKIP_COUNT)
4005 {
4006 sql_slave_skip_counter= --rli->slave_skip_counter;
4007 skip_event= TRUE;
4008 }
4009 if (reason == Log_event::EVENT_SKIP_NOT)
4010 {
4011 // Sleeps if needed, and unlocks rli->data_lock.
4012 if (sql_delay_event(ev, thd, rli))
4013 DBUG_RETURN(SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK);
4014
4015 exec_res= ev->apply_event(rli);
4016 DBUG_EXECUTE_IF("simulate_stop_when_mts_in_group",
4017 if (rli->mts_group_status == Relay_log_info::MTS_IN_GROUP
4018 && rli->curr_group_seen_begin)
4019 DBUG_SET("+d,stop_when_mts_in_group"););
4020
4021 if (!exec_res && (ev->worker != rli))
4022 {
4023 if (ev->worker)
4024 {
4025 Slave_job_item item= {ev}, *job_item= &item;
4026 Slave_worker *w= (Slave_worker *) ev->worker;
4027 // specially marked group typically with OVER_MAX_DBS_IN_EVENT_MTS db:s
4028 bool need_sync= ev->is_mts_group_isolated();
4029
4030 // all events except BEGIN-query must be marked with a non-NULL Worker
4031 DBUG_ASSERT(((Slave_worker*) ev->worker) == rli->last_assigned_worker);
4032
4033 DBUG_PRINT("Log_event::apply_event:",
4034 ("-> job item data %p to W_%lu", job_item->data, w->id));
4035
4036 // Reset mts in-group state
4037 if (rli->mts_group_status == Relay_log_info::MTS_END_GROUP)
4038 {
4039 // CGAP cleanup
4040 for (uint i= rli->curr_group_assigned_parts.elements; i > 0; i--)
4041 delete_dynamic_element(&rli->
4042 curr_group_assigned_parts, i - 1);
4043 // reset the B-group and Gtid-group marker
4044 rli->curr_group_seen_begin= rli->curr_group_seen_gtid= false;
4045 rli->last_assigned_worker= NULL;
4046 }
4047 /*
4048 Stroring GAQ index of the group that the event belongs to
4049 in the event. Deferred events are handled similarly below.
4050 */
4051 ev->mts_group_idx= rli->gaq->assigned_group_index;
4052
4053 bool append_item_to_jobs_error= false;
4054 if (rli->curr_group_da.elements > 0)
4055 {
4056 /*
4057 the current event sorted out which partion the current group
4058 belongs to. It's time now to processed deferred array events.
4059 */
4060 for (uint i= 0; i < rli->curr_group_da.elements; i++)
4061 {
4062 Slave_job_item da_item;
4063 get_dynamic(&rli->curr_group_da, (uchar*) &da_item.data, i);
4064 DBUG_PRINT("mts", ("Assigning job %llu to worker %lu",
4065 ((Log_event* )da_item.data)->log_pos, w->id));
4066 static_cast<Log_event*>(da_item.data)->mts_group_idx=
4067 rli->gaq->assigned_group_index; // similarly to above
4068 if (!append_item_to_jobs_error)
4069 append_item_to_jobs_error= append_item_to_jobs(&da_item, w, rli);
4070 if (append_item_to_jobs_error)
4071 delete static_cast<Log_event*>(da_item.data);
4072 }
4073 if (rli->curr_group_da.elements > rli->curr_group_da.max_element)
4074 {
4075 // reallocate to less mem
4076 rli->curr_group_da.elements= rli->curr_group_da.max_element;
4077 rli->curr_group_da.max_element= 0;
4078 freeze_size(&rli->curr_group_da); // restores max_element
4079 }
4080 rli->curr_group_da.elements= 0;
4081 }
4082 if (append_item_to_jobs_error)
4083 DBUG_RETURN(SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPEND_JOB_ERROR);
4084
4085 DBUG_PRINT("mts", ("Assigning job %llu to worker %lu\n",
4086 ((Log_event* )job_item->data)->log_pos, w->id));
4087
4088 /* Notice `ev' instance can be destoyed after `append()' */
4089 if (append_item_to_jobs(job_item, w, rli))
4090 DBUG_RETURN(SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPEND_JOB_ERROR);
4091 if (need_sync)
4092 {
4093 /*
4094 combination of over-max db:s and end of the current group
4095 forces to wait for the assigned groups completion by assigned
4096 to the event worker.
4097 Indeed MTS group status could be safely set to MTS_NOT_IN_GROUP
4098 after wait_() returns.
4099 No need to know a possible error out of synchronization call.
4100 */
4101 (void) wait_for_workers_to_finish(rli);
4102 }
4103
4104 }
4105 *ptr_ev= NULL; // announcing the event is passed to w-worker
4106
4107 if (log_warnings > 1 &&
4108 rli->is_parallel_exec() && rli->mts_events_assigned % 1024 == 1)
4109 {
4110 time_t my_now= my_time(0);
4111
4112 if ((my_now - rli->mts_last_online_stat) >=
4113 mts_online_stat_period)
4114 {
4115 sql_print_information("Multi-threaded slave statistics: "
4116 "seconds elapsed = %lu; "
4117 "events assigned = %llu; "
4118 "worker queues filled over overrun level = %lu; "
4119 "waited due a Worker queue full = %lu; "
4120 "waited due the total size = %lu; "
4121 "slept when Workers occupied = %lu ",
4122 static_cast<unsigned long>
4123 (my_now - rli->mts_last_online_stat),
4124 rli->mts_events_assigned,
4125 rli->mts_wq_overrun_cnt,
4126 rli->mts_wq_overfill_cnt,
4127 rli->wq_size_waits_cnt,
4128 rli->mts_wq_no_underrun_cnt);
4129 rli->mts_last_online_stat= my_now;
4130 }
4131 }
4132 }
4133 }
4134 else
4135 mysql_mutex_unlock(&rli->data_lock);
4136
4137 DBUG_PRINT("info", ("apply_event error = %d", exec_res));
4138 if (exec_res == 0)
4139 {
4140 /*
4141 Positions are not updated here when an XID is processed. To make
4142 a slave crash-safe, positions must be updated while processing a
4143 XID event and as such do not need to be updated here again.
4144
4145 However, if the event needs to be skipped, this means that it
4146 will not be processed and then positions need to be updated here.
4147
4148 See sql/rpl_rli.h for further details.
4149 */
4150 int error= 0;
4151 if (*ptr_ev &&
4152 (ev->get_type_code() != XID_EVENT ||
4153 skip_event || (rli->is_mts_recovery() && !is_gtid_event(ev) &&
4154 (ev->ends_group() || !rli->mts_recovery_group_seen_begin) &&
4155 bitmap_is_set(&rli->recovery_groups, rli->mts_recovery_index))))
4156 {
4157 #ifndef DBUG_OFF
4158 /*
4159 This only prints information to the debug trace.
4160
4161 TODO: Print an informational message to the error log?
4162 */
4163 static const char *const explain[] = {
4164 // EVENT_SKIP_NOT,
4165 "not skipped",
4166 // EVENT_SKIP_IGNORE,
4167 "skipped because event should be ignored",
4168 // EVENT_SKIP_COUNT
4169 "skipped because event skip counter was non-zero"
4170 };
4171 DBUG_PRINT("info", ("OPTION_BEGIN: %d; IN_STMT: %d",
4172 MY_TEST(thd->variables.option_bits & OPTION_BEGIN),
4173 rli->get_flag(Relay_log_info::IN_STMT)));
4174 DBUG_PRINT("skip_event", ("%s event was %s",
4175 ev->get_type_str(), explain[reason]));
4176 #endif
4177
4178 error= ev->update_pos(rli);
4179
4180 #ifndef DBUG_OFF
4181 DBUG_PRINT("info", ("update_pos error = %d", error));
4182 if (!rli->belongs_to_client())
4183 {
4184 char buf[22];
4185 DBUG_PRINT("info", ("group %s %s",
4186 llstr(rli->get_group_relay_log_pos(), buf),
4187 rli->get_group_relay_log_name()));
4188 DBUG_PRINT("info", ("event %s %s",
4189 llstr(rli->get_event_relay_log_pos(), buf),
4190 rli->get_event_relay_log_name()));
4191 }
4192 #endif
4193 }
4194 else
4195 {
4196 DBUG_ASSERT(*ptr_ev == ev || rli->is_parallel_exec() ||
4197 (!ev->worker &&
4198 (ev->get_type_code() == INTVAR_EVENT ||
4199 ev->get_type_code() == RAND_EVENT ||
4200 ev->get_type_code() == USER_VAR_EVENT)));
4201
4202 rli->inc_event_relay_log_pos();
4203 }
4204
4205 if (!error && rli->is_mts_recovery() &&
4206 ev->get_type_code() != ROTATE_EVENT &&
4207 ev->get_type_code() != FORMAT_DESCRIPTION_EVENT &&
4208 ev->get_type_code() != PREVIOUS_GTIDS_LOG_EVENT)
4209 {
4210 if (ev->starts_group())
4211 {
4212 rli->mts_recovery_group_seen_begin= true;
4213 }
4214 else if ((ev->ends_group() || !rli->mts_recovery_group_seen_begin) &&
4215 !is_gtid_event(ev))
4216 {
4217 rli->mts_recovery_index++;
4218 if (--rli->mts_recovery_group_cnt == 0)
4219 {
4220 rli->mts_recovery_index= 0;
4221 sql_print_information("Slave: MTS Recovery has completed at "
4222 "relay log %s, position %llu "
4223 "master log %s, position %llu.",
4224 rli->get_group_relay_log_name(),
4225 rli->get_group_relay_log_pos(),
4226 rli->get_group_master_log_name(),
4227 rli->get_group_master_log_pos());
4228 /*
4229 Few tests wait for UNTIL_SQL_AFTER_MTS_GAPS completion.
4230 Due to exisiting convention the status won't change
4231 prior to slave restarts.
4232 So making of UNTIL_SQL_AFTER_MTS_GAPS completion isdone here,
4233 and only in the debug build to make the test to catch the change
4234 despite a faulty design of UNTIL checking before execution.
4235 */
4236 if (rli->until_condition == Relay_log_info::UNTIL_SQL_AFTER_MTS_GAPS)
4237 {
4238 rli->until_condition= Relay_log_info::UNTIL_DONE;
4239 }
4240 // reset the Worker tables to remove last slave session time info
4241 if ((error= rli->mts_finalize_recovery()))
4242 {
4243 (void) Rpl_info_factory::reset_workers(rli);
4244 }
4245 }
4246 rli->mts_recovery_group_seen_begin= false;
4247 if (!error)
4248 error= rli->flush_info(true);
4249 }
4250 }
4251
4252 if (error)
4253 {
4254 /*
4255 The update should not fail, so print an error message and
4256 return an error code.
4257
4258 TODO: Replace this with a decent error message when merged
4259 with BUG#24954 (which adds several new error message).
4260 */
4261 char buf[22];
4262 rli->report(ERROR_LEVEL, ER_UNKNOWN_ERROR,
4263 "It was not possible to update the positions"
4264 " of the relay log information: the slave may"
4265 " be in an inconsistent state."
4266 " Stopped in %s position %s",
4267 rli->get_group_relay_log_name(),
4268 llstr(rli->get_group_relay_log_pos(), buf));
4269 DBUG_RETURN(SLAVE_APPLY_EVENT_AND_UPDATE_POS_UPDATE_POS_ERROR);
4270 }
4271 }
4272
4273 DBUG_RETURN(exec_res ? SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPLY_ERROR
4274 : SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK);
4275 }
4276
4277 /**
4278 Let the worker applying the current group to rollback and gracefully
4279 finish its work before.
4280
4281 @param rli The slave's relay log info.
4282
4283 @param ev a pointer to the event on hold before applying this rollback
4284 procedure.
4285
4286 @retval false The rollback succeeded.
4287
4288 @retval true There was an error while injecting events.
4289 */
coord_handle_partial_binlogged_transaction(Relay_log_info * rli,const Log_event * ev)4290 static bool coord_handle_partial_binlogged_transaction(Relay_log_info *rli,
4291 const Log_event *ev)
4292 {
4293 DBUG_ENTER("coord_handle_partial_binlogged_transaction");
4294 /*
4295 This function is called holding the rli->data_lock.
4296 We must return it still holding this lock, except in the case of returning
4297 error.
4298 */
4299 mysql_mutex_assert_owner(&rli->data_lock);
4300 THD *thd= rli->info_thd;
4301
4302 if (!rli->curr_group_seen_begin)
4303 {
4304 DBUG_PRINT("info",("Injecting QUERY(BEGIN) to rollback worker"));
4305 Log_event *begin_event= new Query_log_event(thd,
4306 STRING_WITH_LEN("BEGIN"),
4307 true, /* using_trans */
4308 false, /* immediate */
4309 true, /* suppress_use */
4310 0, /* error */
4311 true /* ignore_command */);
4312 ((Query_log_event*) begin_event)->db= "";
4313 begin_event->data_written= 0;
4314 begin_event->server_id= ev->server_id;
4315 /*
4316 We must be careful to avoid SQL thread increasing its position
4317 farther than the event that triggered this QUERY(BEGIN).
4318 */
4319 begin_event->log_pos= ev->log_pos;
4320 begin_event->future_event_relay_log_pos= ev->future_event_relay_log_pos;
4321
4322 if (apply_event_and_update_pos(&begin_event, thd, rli) !=
4323 SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK)
4324 {
4325 delete begin_event;
4326 DBUG_RETURN(true);
4327 }
4328 mysql_mutex_lock(&rli->data_lock);
4329 }
4330
4331 DBUG_PRINT("info",("Injecting QUERY(ROLLBACK) to rollback worker"));
4332 Log_event *rollback_event= new Query_log_event(thd,
4333 STRING_WITH_LEN("ROLLBACK"),
4334 true, /* using_trans */
4335 false, /* immediate */
4336 true, /* suppress_use */
4337 0, /* error */
4338 true /* ignore_command */);
4339 ((Query_log_event*) rollback_event)->db= "";
4340 rollback_event->data_written= 0;
4341 rollback_event->server_id= ev->server_id;
4342 /*
4343 We must be careful to avoid SQL thread increasing its position
4344 farther than the event that triggered this QUERY(ROLLBACK).
4345 */
4346 rollback_event->log_pos= ev->log_pos;
4347 rollback_event->future_event_relay_log_pos= ev->future_event_relay_log_pos;
4348
4349 if (apply_event_and_update_pos(&rollback_event, thd, rli) !=
4350 SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK)
4351 {
4352 delete rollback_event;
4353 DBUG_RETURN(true);
4354 }
4355 mysql_mutex_lock(&rli->data_lock);
4356
4357 DBUG_RETURN(false);
4358 }
4359
4360 /**
4361 Top-level function for executing the next event in the relay log.
4362 This is called from the SQL thread.
4363
4364 This function reads the event from the relay log, executes it, and
4365 advances the relay log position. It also handles errors, etc.
4366
4367 This function may fail to apply the event for the following reasons:
4368
4369 - The position specfied by the UNTIL condition of the START SLAVE
4370 command is reached.
4371
4372 - It was not possible to read the event from the log.
4373
4374 - The slave is killed.
4375
4376 - An error occurred when applying the event, and the event has been
4377 tried slave_trans_retries times. If the event has been retried
4378 fewer times, 0 is returned.
4379
4380 - init_info or init_relay_log_pos failed. (These are called
4381 if a failure occurs when applying the event.)
4382
4383 - An error occurred when updating the binlog position.
4384
4385 @retval 0 The event was applied.
4386
4387 @retval 1 The event was not applied.
4388 */
exec_relay_log_event(THD * thd,Relay_log_info * rli)4389 static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
4390 {
4391 DBUG_ENTER("exec_relay_log_event");
4392
4393 /*
4394 We acquire this mutex since we need it for all operations except
4395 event execution. But we will release it in places where we will
4396 wait for something for example inside of next_event().
4397 */
4398 mysql_mutex_lock(&rli->data_lock);
4399
4400 /*
4401 UNTIL_SQL_AFTER_GTIDS requires special handling since we have to check
4402 whether the until_condition is satisfied *before* the SQL threads goes on
4403 a wait inside next_event() for the relay log to grow. This is reuired since
4404 if we have already applied the last event in the waiting set but since he
4405 check happens only at the start of the next event we may end up waiting
4406 forever the next event is not available or is delayed.
4407 */
4408 if (rli->until_condition == Relay_log_info::UNTIL_SQL_AFTER_GTIDS &&
4409 rli->is_until_satisfied(thd, NULL))
4410 {
4411 rli->abort_slave= 1;
4412 mysql_mutex_unlock(&rli->data_lock);
4413 DBUG_RETURN(1);
4414 }
4415
4416 Log_event *ev = next_event(rli), **ptr_ev;
4417
4418 DBUG_ASSERT(rli->info_thd==thd);
4419
4420 if (sql_slave_killed(thd,rli))
4421 {
4422 mysql_mutex_unlock(&rli->data_lock);
4423 delete ev;
4424 DBUG_RETURN(1);
4425 }
4426 if (ev)
4427 {
4428 enum enum_slave_apply_event_and_update_pos_retval exec_res;
4429
4430 ptr_ev= &ev;
4431 /*
4432 Even if we don't execute this event, we keep the master timestamp,
4433 so that seconds behind master shows correct delta (there are events
4434 that are not replayed, so we keep falling behind).
4435
4436 If it is an artificial event, or a relay log event (IO thread generated
4437 event) or ev->when is set to 0, or a FD from master, or a heartbeat
4438 event with server_id '0' then we don't update the last_master_timestamp.
4439
4440 In case of parallel execution last_master_timestamp is only updated when
4441 an event is taken out of GAQ. Thus when last_master_timestamp is 0 we need
4442 to initialize it with a timestamp from the first event to be executed in
4443 parallel.
4444 */
4445 if ((!rli->is_parallel_exec() || rli->last_master_timestamp == 0) &&
4446 !(ev->is_artificial_event() || ev->is_relay_log_event() ||
4447 ev->when.tv_sec == 0 ||
4448 ev->get_type_code() == FORMAT_DESCRIPTION_EVENT ||
4449 ev->server_id == 0))
4450 {
4451 rli->last_master_timestamp= ev->when.tv_sec + (time_t) ev->exec_time;
4452 DBUG_ASSERT(rli->last_master_timestamp >= 0);
4453 }
4454
4455 /*
4456 This tests if the position of the beginning of the current event
4457 hits the UNTIL barrier.
4458 MTS: since the master and the relay-group coordinates change
4459 asynchronously logics of rli->is_until_satisfied() can't apply.
4460 A special UNTIL_SQL_AFTER_MTS_GAPS is still deployed here
4461 temporarily (see is_until_satisfied todo).
4462 */
4463 if (rli->until_condition != Relay_log_info::UNTIL_NONE &&
4464 rli->until_condition != Relay_log_info::UNTIL_SQL_AFTER_GTIDS &&
4465 rli->is_until_satisfied(thd, ev))
4466 {
4467 /*
4468 Setting abort_slave flag because we do not want additional message about
4469 error in query execution to be printed.
4470 */
4471 rli->abort_slave= 1;
4472 mysql_mutex_unlock(&rli->data_lock);
4473 delete ev;
4474 DBUG_RETURN(1);
4475 }
4476
4477 { /**
4478 The following failure injecion works in cooperation with tests
4479 setting @@global.debug= 'd,incomplete_group_in_relay_log'.
4480 Xid or Commit events are not executed to force the slave sql
4481 read hanging if the realy log does not have any more events.
4482 */
4483 DBUG_EXECUTE_IF("incomplete_group_in_relay_log",
4484 if ((ev->get_type_code() == XID_EVENT) ||
4485 ((ev->get_type_code() == QUERY_EVENT) &&
4486 strcmp("COMMIT", ((Query_log_event *) ev)->query) == 0))
4487 {
4488 DBUG_ASSERT(thd->transaction.all.cannot_safely_rollback());
4489 rli->abort_slave= 1;
4490 mysql_mutex_unlock(&rli->data_lock);
4491 delete ev;
4492 rli->inc_event_relay_log_pos();
4493 DBUG_RETURN(0);
4494 };);
4495 }
4496
4497 /*
4498 GTID protocol will put a FORMAT_DESCRIPTION_EVENT from the master with
4499 log_pos != 0 after each (re)connection if auto positioning is enabled.
4500 This means that the SQL thread might have already started to apply the
4501 current group but, as the IO thread had to reconnect, it left this
4502 group incomplete and will start it again from the beginning.
4503 So, before applying this FORMAT_DESCRIPTION_EVENT, we must let the
4504 worker roll back the current group and gracefully finish its work,
4505 before starting to apply the new (complete) copy of the group.
4506 */
4507 if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
4508 ev->server_id != ::server_id && ev->log_pos != 0 &&
4509 rli->is_parallel_exec() && rli->curr_group_seen_gtid)
4510 {
4511 if (coord_handle_partial_binlogged_transaction(rli, ev))
4512 /*
4513 In the case of an error, coord_handle_partial_binlogged_transaction
4514 will not try to get the rli->data_lock again.
4515 */
4516 DBUG_RETURN(1);
4517 }
4518
4519 /* ptr_ev can change to NULL indicating MTS coorinator passed to a Worker */
4520 exec_res= apply_event_and_update_pos(ptr_ev, thd, rli);
4521 /*
4522 Note: the above call to apply_event_and_update_pos executes
4523 mysql_mutex_unlock(&rli->data_lock);
4524 */
4525
4526 /* For deferred events, the ptr_ev is set to NULL
4527 in Deferred_log_events::add() function.
4528 Hence deferred events wont be deleted here.
4529 They will be deleted in Deferred_log_events::rewind() funciton.
4530 */
4531 if (*ptr_ev)
4532 {
4533 DBUG_ASSERT(*ptr_ev == ev); // event remains to belong to Coordinator
4534
4535 DBUG_EXECUTE_IF("dbug.calculate_sbm_after_previous_gtid_log_event",
4536 {
4537 if (ev->get_type_code() == PREVIOUS_GTIDS_LOG_EVENT)
4538 {
4539 const char act[]= "now signal signal.reached wait_for signal.done_sbm_calculation";
4540 DBUG_ASSERT(opt_debug_sync_timeout > 0);
4541 DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act)));
4542 }
4543 };);
4544 DBUG_EXECUTE_IF("dbug.calculate_sbm_after_fake_rotate_log_event",
4545 {
4546 if (ev->get_type_code() == ROTATE_EVENT && ev->is_artificial_event())
4547 {
4548 const char act[]= "now signal signal.reached wait_for signal.done_sbm_calculation";
4549 DBUG_ASSERT(opt_debug_sync_timeout > 0);
4550 DBUG_ASSERT(!debug_sync_set_action(thd,
4551 STRING_WITH_LEN(act)));
4552 }
4553 };);
4554 /*
4555 Format_description_log_event should not be deleted because it will be
4556 used to read info about the relay log's format; it will be deleted when
4557 the SQL thread does not need it, i.e. when this thread terminates.
4558 ROWS_QUERY_LOG_EVENT is destroyed at the end of the current statement
4559 clean-up routine.
4560 */
4561 if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT &&
4562 ev->get_type_code() != ROWS_QUERY_LOG_EVENT)
4563 {
4564 DBUG_PRINT("info", ("Deleting the event after it has been executed"));
4565 delete ev;
4566 ev= NULL;
4567 }
4568 }
4569
4570 /*
4571 exec_res == SLAVE_APPLY_EVENT_AND_UPDATE_POS_UPDATE_POS_ERROR
4572 update_log_pos failed: this should not happen, so we
4573 don't retry.
4574 exec_res == SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPEND_JOB_ERROR
4575 append_item_to_jobs() failed, this happened because
4576 thread was killed while waiting for enqueue on worker.
4577 */
4578 if (exec_res >= SLAVE_APPLY_EVENT_AND_UPDATE_POS_UPDATE_POS_ERROR)
4579 {
4580 delete ev;
4581 DBUG_RETURN(1);
4582 }
4583
4584 if (slave_trans_retries)
4585 {
4586 int UNINIT_VAR(temp_err);
4587 bool silent= false;
4588 if (exec_res && !is_mts_worker(thd) /* no reexecution in MTS mode */ &&
4589 (temp_err= rli->has_temporary_error(thd, 0, &silent)) &&
4590 !thd->transaction.all.cannot_safely_rollback())
4591 {
4592 const char *errmsg;
4593 /*
4594 We were in a transaction which has been rolled back because of a
4595 temporary error;
4596 let's seek back to BEGIN log event and retry it all again.
4597 Note, if lock wait timeout (innodb_lock_wait_timeout exceeded)
4598 there is no rollback since 5.0.13 (ref: manual).
4599 We have to not only seek but also
4600 a) init_info(), to seek back to hot relay log's start for later
4601 (for when we will come back to this hot log after re-processing the
4602 possibly existing old logs where BEGIN is: check_binlog_magic() will
4603 then need the cache to be at position 0 (see comments at beginning of
4604 init_info()).
4605 b) init_relay_log_pos(), because the BEGIN may be an older relay log.
4606 */
4607 if (rli->trans_retries < slave_trans_retries)
4608 {
4609 /*
4610 The transactions has to be rolled back before global_init_info is
4611 called. Because global_init_info will starts a new transaction if
4612 master_info_repository is TABLE.
4613 */
4614 rli->cleanup_context(thd, 1);
4615 /*
4616 We need to figure out if there is a test case that covers
4617 this part. \Alfranio.
4618 */
4619 if (global_init_info(rli->mi, false, SLAVE_SQL))
4620 sql_print_error("Failed to initialize the master info structure");
4621 else if (rli->init_relay_log_pos(rli->get_group_relay_log_name(),
4622 rli->get_group_relay_log_pos(),
4623 true/*need_data_lock=true*/,
4624 &errmsg, 1))
4625 sql_print_error("Error initializing relay log position: %s",
4626 errmsg);
4627 else
4628 {
4629 exec_res= SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK;
4630 /* chance for concurrent connection to get more locks */
4631 slave_sleep(thd, min<ulong>(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
4632 sql_slave_killed, rli);
4633 mysql_mutex_lock(&rli->data_lock); // because of SHOW STATUS
4634 if (!silent)
4635 rli->trans_retries++;
4636
4637 rli->retried_trans++;
4638 mysql_mutex_unlock(&rli->data_lock);
4639 DBUG_PRINT("info", ("Slave retries transaction "
4640 "rli->trans_retries: %lu", rli->trans_retries));
4641 }
4642 }
4643 else
4644 {
4645 thd->is_fatal_error= 1;
4646 rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(),
4647 "Slave SQL thread retried transaction %lu time(s) "
4648 "in vain, giving up. Consider raising the value of "
4649 "the slave_transaction_retries variable.", rli->trans_retries);
4650 }
4651 }
4652 else if ((exec_res && !temp_err) ||
4653 (opt_using_transactions &&
4654 rli->get_group_relay_log_pos() == rli->get_event_relay_log_pos()))
4655 {
4656 /*
4657 Only reset the retry counter if the entire group succeeded
4658 or failed with a non-transient error. On a successful
4659 event, the execution will proceed as usual; in the case of a
4660 non-transient error, the slave will stop with an error.
4661 */
4662 rli->trans_retries= 0; // restart from fresh
4663 DBUG_PRINT("info", ("Resetting retry counter, rli->trans_retries: %lu",
4664 rli->trans_retries));
4665 }
4666 }
4667 if (exec_res)
4668 delete ev;
4669 DBUG_RETURN(exec_res);
4670 }
4671 mysql_mutex_unlock(&rli->data_lock);
4672 rli->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_READ_FAILURE,
4673 ER(ER_SLAVE_RELAY_LOG_READ_FAILURE), "\
4674 Could not parse relay log event entry. The possible reasons are: the master's \
4675 binary log is corrupted (you can check this by running 'mysqlbinlog' on the \
4676 binary log), the slave's relay log is corrupted (you can check this by running \
4677 'mysqlbinlog' on the relay log), a network problem, or a bug in the master's \
4678 or slave's MySQL code. If you want to check the master's binary log or slave's \
4679 relay log, you will be able to know their names by issuing 'SHOW SLAVE STATUS' \
4680 on this slave.\
4681 ");
4682 DBUG_RETURN(1);
4683 }
4684
check_io_slave_killed(THD * thd,Master_info * mi,const char * info)4685 static bool check_io_slave_killed(THD *thd, Master_info *mi, const char *info)
4686 {
4687 if (io_slave_killed(thd, mi))
4688 {
4689 if (info && log_warnings)
4690 sql_print_information("%s", info);
4691 return TRUE;
4692 }
4693 return FALSE;
4694 }
4695
4696 /**
4697 @brief Try to reconnect slave IO thread.
4698
4699 @details Terminates current connection to master, sleeps for
4700 @c mi->connect_retry msecs and initiates new connection with
4701 @c safe_reconnect(). Variable pointed by @c retry_count is increased -
4702 if it exceeds @c mi->retry_count then connection is not re-established
4703 and function signals error.
4704 Unless @c suppres_warnings is TRUE, a warning is put in the server error log
4705 when reconnecting. The warning message and messages used to report errors
4706 are taken from @c messages array. In case @c mi->retry_count is exceeded,
4707 no messages are added to the log.
4708
4709 @param[in] thd Thread context.
4710 @param[in] mysql MySQL connection.
4711 @param[in] mi Master connection information.
4712 @param[in,out] retry_count Number of attempts to reconnect.
4713 @param[in] suppress_warnings TRUE when a normal net read timeout
4714 has caused to reconnecting.
4715 @param[in] messages Messages to print/log, see
4716 reconnect_messages[] array.
4717
4718 @retval 0 OK.
4719 @retval 1 There was an error.
4720 */
4721
try_to_reconnect(THD * thd,MYSQL * mysql,Master_info * mi,uint * retry_count,bool suppress_warnings,const char * messages[SLAVE_RECON_MSG_MAX])4722 static int try_to_reconnect(THD *thd, MYSQL *mysql, Master_info *mi,
4723 uint *retry_count, bool suppress_warnings,
4724 const char *messages[SLAVE_RECON_MSG_MAX])
4725 {
4726 mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT;
4727 thd->proc_info= messages[SLAVE_RECON_MSG_WAIT];
4728 #ifdef SIGNAL_WITH_VIO_SHUTDOWN
4729 thd->clear_active_vio();
4730 #endif
4731 end_server(mysql);
4732 DBUG_EXECUTE_IF("simulate_no_master_reconnect",
4733 {
4734 return 1;
4735 });
4736 if ((*retry_count)++)
4737 {
4738 if (*retry_count > mi->retry_count)
4739 return 1; // Don't retry forever
4740 slave_sleep(thd, mi->connect_retry, io_slave_killed, mi);
4741 }
4742 if (check_io_slave_killed(thd, mi, messages[SLAVE_RECON_MSG_KILLED_WAITING]))
4743 return 1;
4744 thd->proc_info = messages[SLAVE_RECON_MSG_AFTER];
4745 if (!suppress_warnings)
4746 {
4747 char buf[256], llbuff[22];
4748 my_snprintf(buf, sizeof(buf), messages[SLAVE_RECON_MSG_FAILED],
4749 mi->get_io_rpl_log_name(), llstr(mi->get_master_log_pos(),
4750 llbuff));
4751 /*
4752 Raise a warining during registering on master/requesting dump.
4753 Log a message reading event.
4754 */
4755 if (messages[SLAVE_RECON_MSG_COMMAND][0])
4756 {
4757 mi->report(WARNING_LEVEL, ER_SLAVE_MASTER_COM_FAILURE,
4758 ER(ER_SLAVE_MASTER_COM_FAILURE),
4759 messages[SLAVE_RECON_MSG_COMMAND], buf);
4760 }
4761 else
4762 {
4763 sql_print_information("%s", buf);
4764 }
4765 }
4766 if (safe_reconnect(thd, mysql, mi, 1) || io_slave_killed(thd, mi))
4767 {
4768 if (log_warnings)
4769 sql_print_information("%s", messages[SLAVE_RECON_MSG_KILLED_AFTER]);
4770 return 1;
4771 }
4772 return 0;
4773 }
4774
4775
4776 /**
4777 Slave IO thread entry point.
4778
4779 @param arg Pointer to Master_info struct that holds information for
4780 the IO thread.
4781
4782 @return Always 0.
4783 */
handle_slave_io(void * arg)4784 pthread_handler_t handle_slave_io(void *arg)
4785 {
4786 THD *thd= NULL; // needs to be first for thread_stack
4787 bool thd_added= false;
4788 MYSQL *mysql;
4789 Master_info *mi = (Master_info*)arg;
4790 Relay_log_info *rli= mi->rli;
4791 char llbuff[22];
4792 uint retry_count;
4793 bool suppress_warnings;
4794 int ret;
4795 int binlog_version;
4796 #ifndef DBUG_OFF
4797 uint retry_count_reg= 0, retry_count_dump= 0, retry_count_event= 0;
4798 #endif
4799 // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
4800 my_thread_init();
4801 DBUG_ENTER("handle_slave_io");
4802
4803 DBUG_ASSERT(mi->inited);
4804 mysql= NULL ;
4805 retry_count= 0;
4806
4807 mysql_mutex_lock(&mi->run_lock);
4808 /* Inform waiting threads that slave has started */
4809 mi->slave_run_id++;
4810
4811 #ifndef DBUG_OFF
4812 mi->events_until_exit = disconnect_slave_event_count;
4813 #endif
4814
4815 thd= new THD; // note that contructor of THD uses DBUG_ !
4816 THD_CHECK_SENTRY(thd);
4817 mi->info_thd = thd;
4818
4819 pthread_detach_this_thread();
4820 thd->thread_stack= (char*) &thd; // remember where our stack is
4821 mi->clear_error();
4822 mi->slave_running = 1;
4823 if (init_slave_thread(thd, SLAVE_THD_IO))
4824 {
4825 mysql_cond_broadcast(&mi->start_cond);
4826 mysql_mutex_unlock(&mi->run_lock);
4827 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
4828 ER_THD(thd, ER_SLAVE_FATAL_ERROR),
4829 "Failed during slave I/O thread initialization");
4830 goto err;
4831 }
4832
4833 mysql_mutex_lock(&LOCK_thread_count);
4834 add_global_thread(thd);
4835 thd_added= true;
4836 mysql_mutex_unlock(&LOCK_thread_count);
4837
4838 mi->abort_slave = 0;
4839 mysql_mutex_unlock(&mi->run_lock);
4840 mysql_cond_broadcast(&mi->start_cond);
4841
4842 DBUG_PRINT("master_info",("log_file_name: '%s' position: %s",
4843 mi->get_master_log_name(),
4844 llstr(mi->get_master_log_pos(), llbuff)));
4845
4846 /* This must be called before run any binlog_relay_io hooks */
4847 my_pthread_setspecific_ptr(RPL_MASTER_INFO, mi);
4848
4849 if (RUN_HOOK(binlog_relay_io, thread_start, (thd, mi)))
4850 {
4851 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
4852 ER(ER_SLAVE_FATAL_ERROR), "Failed to run 'thread_start' hook");
4853 goto err;
4854 }
4855
4856 if (!(mi->mysql = mysql = mysql_init(NULL)))
4857 {
4858 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
4859 ER(ER_SLAVE_FATAL_ERROR), "error in mysql_init()");
4860 goto err;
4861 }
4862
4863 THD_STAGE_INFO(thd, stage_connecting_to_master);
4864 // we can get killed during safe_connect
4865 if (!safe_connect(thd, mysql, mi))
4866 {
4867 sql_print_information("Slave I/O thread: connected to master '%s@%s:%d',"
4868 "replication started in log '%s' at position %s",
4869 mi->get_user(), mi->host, mi->port,
4870 mi->get_io_rpl_log_name(),
4871 llstr(mi->get_master_log_pos(), llbuff));
4872 }
4873 else
4874 {
4875 sql_print_information("Slave I/O thread killed while connecting to master");
4876 goto err;
4877 }
4878
4879 connected:
4880
4881 DBUG_EXECUTE_IF("dbug.before_get_running_status_yes",
4882 {
4883 const char act[]=
4884 "now "
4885 "wait_for signal.io_thread_let_running";
4886 DBUG_ASSERT(opt_debug_sync_timeout > 0);
4887 DBUG_ASSERT(!debug_sync_set_action(thd,
4888 STRING_WITH_LEN(act)));
4889 };);
4890 DBUG_EXECUTE_IF("dbug.calculate_sbm_after_previous_gtid_log_event",
4891 {
4892 /* Fake that thread started 3 minutes ago */
4893 thd->start_time.tv_sec-=180;
4894 };);
4895 DBUG_EXECUTE_IF("dbug.calculate_sbm_after_fake_rotate_log_event",
4896 {
4897 /* Fake that thread started 3 minutes ago */
4898 thd->start_time.tv_sec-=180;
4899 };);
4900 mysql_mutex_lock(&mi->run_lock);
4901 mi->slave_running= MYSQL_SLAVE_RUN_CONNECT;
4902 mysql_mutex_unlock(&mi->run_lock);
4903
4904 thd->slave_net = &mysql->net;
4905 THD_STAGE_INFO(thd, stage_checking_master_version);
4906 ret= get_master_version_and_clock(mysql, mi);
4907 if (!ret)
4908 ret= get_master_uuid(mysql, mi);
4909 if (!ret)
4910 ret= io_thread_init_commands(mysql, mi);
4911
4912 if (ret == 1)
4913 /* Fatal error */
4914 goto err;
4915
4916 if (ret == 2)
4917 {
4918 if (check_io_slave_killed(mi->info_thd, mi, "Slave I/O thread killed"
4919 "while calling get_master_version_and_clock(...)"))
4920 goto err;
4921 suppress_warnings= FALSE;
4922 /* Try to reconnect because the error was caused by a transient network problem */
4923 if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4924 reconnect_messages[SLAVE_RECON_ACT_REG]))
4925 goto err;
4926 goto connected;
4927 }
4928
4929 mysql_mutex_lock(&mi->data_lock);
4930 binlog_version= mi->get_mi_description_event()->binlog_version;
4931 mysql_mutex_unlock(&mi->data_lock);
4932
4933 if (binlog_version > 1)
4934 {
4935 /*
4936 Register ourselves with the master.
4937 */
4938 THD_STAGE_INFO(thd, stage_registering_slave_on_master);
4939 if (register_slave_on_master(mysql, mi, &suppress_warnings))
4940 {
4941 if (!check_io_slave_killed(thd, mi, "Slave I/O thread killed "
4942 "while registering slave on master"))
4943 {
4944 sql_print_error("Slave I/O thread couldn't register on master");
4945 if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4946 reconnect_messages[SLAVE_RECON_ACT_REG]))
4947 goto err;
4948 }
4949 else
4950 goto err;
4951 goto connected;
4952 }
4953 DBUG_EXECUTE_IF("FORCE_SLAVE_TO_RECONNECT_REG",
4954 if (!retry_count_reg)
4955 {
4956 retry_count_reg++;
4957 sql_print_information("Forcing to reconnect slave I/O thread");
4958 if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4959 reconnect_messages[SLAVE_RECON_ACT_REG]))
4960 goto err;
4961 goto connected;
4962 });
4963 }
4964
4965 DBUG_PRINT("info",("Starting reading binary log from master"));
4966 while (!io_slave_killed(thd,mi))
4967 {
4968 THD_STAGE_INFO(thd, stage_requesting_binlog_dump);
4969 if (request_dump(thd, mysql, mi, &suppress_warnings))
4970 {
4971 sql_print_error("Failed on request_dump()");
4972 if (check_io_slave_killed(thd, mi, "Slave I/O thread killed while \
4973 requesting master dump") ||
4974 try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4975 reconnect_messages[SLAVE_RECON_ACT_DUMP]))
4976 goto err;
4977 goto connected;
4978 }
4979 DBUG_EXECUTE_IF("FORCE_SLAVE_TO_RECONNECT_DUMP",
4980 if (!retry_count_dump)
4981 {
4982 retry_count_dump++;
4983 sql_print_information("Forcing to reconnect slave I/O thread");
4984 if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4985 reconnect_messages[SLAVE_RECON_ACT_DUMP]))
4986 goto err;
4987 goto connected;
4988 });
4989 const char *event_buf;
4990
4991 DBUG_ASSERT(mi->last_error().number == 0);
4992 while (!io_slave_killed(thd,mi))
4993 {
4994 ulong event_len;
4995 /*
4996 We say "waiting" because read_event() will wait if there's nothing to
4997 read. But if there's something to read, it will not wait. The
4998 important thing is to not confuse users by saying "reading" whereas
4999 we're in fact receiving nothing.
5000 */
5001 THD_STAGE_INFO(thd, stage_waiting_for_master_to_send_event);
5002 event_len= read_event(mysql, mi, &suppress_warnings);
5003
5004 DBUG_EXECUTE_IF("relay_xid_trigger",
5005 if (event_len != packet_error)
5006 {
5007 const char* event_buf= (const char*)mysql->net.read_pos + 1;
5008 Log_event_type event_type= (Log_event_type)
5009 event_buf[EVENT_TYPE_OFFSET];
5010 if (event_type == XID_EVENT)
5011 {
5012 const char act[]= "now signal relay_xid_reached wait_for resume";
5013 DBUG_ASSERT(!debug_sync_set_action(current_thd,
5014 STRING_WITH_LEN(act)));
5015 }
5016 }
5017 );
5018
5019 if (check_io_slave_killed(thd, mi, "Slave I/O thread killed while \
5020 reading event"))
5021 goto err;
5022 DBUG_EXECUTE_IF("FORCE_SLAVE_TO_RECONNECT_EVENT",
5023 if (!retry_count_event)
5024 {
5025 retry_count_event++;
5026 sql_print_information("Forcing to reconnect slave I/O thread");
5027 if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
5028 reconnect_messages[SLAVE_RECON_ACT_EVENT]))
5029 goto err;
5030 goto connected;
5031 });
5032
5033 if (event_len == packet_error)
5034 {
5035 uint mysql_error_number= mysql_errno(mysql);
5036 switch (mysql_error_number) {
5037 case CR_NET_PACKET_TOO_LARGE:
5038 sql_print_error("\
5039 Log entry on master is longer than slave_max_allowed_packet (%lu) on \
5040 slave. If the entry is correct, restart the server with a higher value of \
5041 slave_max_allowed_packet",
5042 slave_max_allowed_packet);
5043 mi->report(ERROR_LEVEL, ER_NET_PACKET_TOO_LARGE,
5044 "%s", "Got a packet bigger than 'slave_max_allowed_packet' bytes");
5045 goto err;
5046 case ER_MASTER_FATAL_ERROR_READING_BINLOG:
5047 mi->report(ERROR_LEVEL, ER_MASTER_FATAL_ERROR_READING_BINLOG,
5048 ER(ER_MASTER_FATAL_ERROR_READING_BINLOG),
5049 mysql_error_number, mysql_error(mysql));
5050 goto err;
5051 case ER_OUT_OF_RESOURCES:
5052 sql_print_error("\
5053 Stopping slave I/O thread due to out-of-memory error from master");
5054 mi->report(ERROR_LEVEL, ER_OUT_OF_RESOURCES,
5055 "%s", ER(ER_OUT_OF_RESOURCES));
5056 goto err;
5057 }
5058 if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
5059 reconnect_messages[SLAVE_RECON_ACT_EVENT]))
5060 goto err;
5061 goto connected;
5062 } // if (event_len == packet_error)
5063
5064 retry_count=0; // ok event, reset retry counter
5065 THD_STAGE_INFO(thd, stage_queueing_master_event_to_the_relay_log);
5066 event_buf= (const char*)mysql->net.read_pos + 1;
5067 DBUG_PRINT("info", ("IO thread received event of type %s", Log_event::get_type_str((Log_event_type)event_buf[EVENT_TYPE_OFFSET])));
5068 if (RUN_HOOK(binlog_relay_io, after_read_event,
5069 (thd, mi,(const char*)mysql->net.read_pos + 1,
5070 event_len, &event_buf, &event_len)))
5071 {
5072 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
5073 ER(ER_SLAVE_FATAL_ERROR),
5074 "Failed to run 'after_read_event' hook");
5075 goto err;
5076 }
5077
5078 /* XXX: 'synced' should be updated by queue_event to indicate
5079 whether event has been synced to disk */
5080 bool synced= 0;
5081 if (queue_event(mi, event_buf, event_len))
5082 {
5083 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
5084 ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
5085 "could not queue event from master");
5086 goto err;
5087 }
5088 if (RUN_HOOK(binlog_relay_io, after_queue_event,
5089 (thd, mi, event_buf, event_len, synced)))
5090 {
5091 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
5092 ER(ER_SLAVE_FATAL_ERROR),
5093 "Failed to run 'after_queue_event' hook");
5094 goto err;
5095 }
5096
5097 mysql_mutex_lock(&mi->data_lock);
5098 if (flush_master_info(mi, FALSE))
5099 {
5100 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
5101 ER(ER_SLAVE_FATAL_ERROR),
5102 "Failed to flush master info.");
5103 mysql_mutex_unlock(&mi->data_lock);
5104 goto err;
5105 }
5106 mysql_mutex_unlock(&mi->data_lock);
5107
5108 /*
5109 See if the relay logs take too much space.
5110 We don't lock mi->rli->log_space_lock here; this dirty read saves time
5111 and does not introduce any problem:
5112 - if mi->rli->ignore_log_space_limit is 1 but becomes 0 just after (so
5113 the clean value is 0), then we are reading only one more event as we
5114 should, and we'll block only at the next event. No big deal.
5115 - if mi->rli->ignore_log_space_limit is 0 but becomes 1 just after (so
5116 the clean value is 1), then we are going into wait_for_relay_log_space()
5117 for no reason, but this function will do a clean read, notice the clean
5118 value and exit immediately.
5119 */
5120 #ifndef DBUG_OFF
5121 {
5122 char llbuf1[22], llbuf2[22];
5123 DBUG_PRINT("info", ("log_space_limit=%s log_space_total=%s \
5124 ignore_log_space_limit=%d",
5125 llstr(rli->log_space_limit,llbuf1),
5126 llstr(rli->log_space_total,llbuf2),
5127 (int) rli->ignore_log_space_limit));
5128 }
5129 #endif
5130
5131 if (rli->log_space_limit && rli->log_space_limit <
5132 rli->log_space_total &&
5133 !rli->ignore_log_space_limit)
5134 if (wait_for_relay_log_space(rli))
5135 {
5136 sql_print_error("Slave I/O thread aborted while waiting for relay \
5137 log space");
5138 goto err;
5139 }
5140 DBUG_EXECUTE_IF("flush_after_reading_user_var_event",
5141 {
5142 if (event_buf[EVENT_TYPE_OFFSET] == USER_VAR_EVENT)
5143 {
5144 const char act[]= "now signal Reached wait_for signal.flush_complete_continue";
5145 DBUG_ASSERT(opt_debug_sync_timeout > 0);
5146 DBUG_ASSERT(!debug_sync_set_action(current_thd,
5147 STRING_WITH_LEN(act)));
5148
5149 }
5150 });
5151 DBUG_EXECUTE_IF("stop_io_after_reading_gtid_log_event",
5152 if (event_buf[EVENT_TYPE_OFFSET] == GTID_LOG_EVENT)
5153 thd->killed= THD::KILLED_NO_VALUE;
5154 );
5155 DBUG_EXECUTE_IF("stop_io_after_reading_query_log_event",
5156 if (event_buf[EVENT_TYPE_OFFSET] == QUERY_EVENT)
5157 thd->killed= THD::KILLED_NO_VALUE;
5158 );
5159 DBUG_EXECUTE_IF("stop_io_after_reading_user_var_log_event",
5160 if (event_buf[EVENT_TYPE_OFFSET] == USER_VAR_EVENT)
5161 thd->killed= THD::KILLED_NO_VALUE;
5162 );
5163 DBUG_EXECUTE_IF("stop_io_after_reading_table_map_event",
5164 if (event_buf[EVENT_TYPE_OFFSET] == TABLE_MAP_EVENT)
5165 thd->killed= THD::KILLED_NO_VALUE;
5166 );
5167 DBUG_EXECUTE_IF("stop_io_after_reading_xid_log_event",
5168 if (event_buf[EVENT_TYPE_OFFSET] == XID_EVENT)
5169 thd->killed= THD::KILLED_NO_VALUE;
5170 );
5171 DBUG_EXECUTE_IF("stop_io_after_reading_write_rows_log_event",
5172 if (event_buf[EVENT_TYPE_OFFSET] == WRITE_ROWS_EVENT)
5173 thd->killed= THD::KILLED_NO_VALUE;
5174 );
5175 /*
5176 After event is flushed to relay log file, memory used
5177 by thread's mem_root is not required any more.
5178 Hence adding free_root(thd->mem_root,...) to do the
5179 cleanup, otherwise a long running IO thread can
5180 cause OOM error.
5181 */
5182 free_root(thd->mem_root, MYF(MY_KEEP_PREALLOC));
5183 }
5184 }
5185
5186 // error = 0;
5187 err:
5188 // print the current replication position
5189 sql_print_information("Slave I/O thread exiting, read up to log '%s', position %s",
5190 mi->get_io_rpl_log_name(), llstr(mi->get_master_log_pos(), llbuff));
5191 (void) RUN_HOOK(binlog_relay_io, thread_stop, (thd, mi));
5192 thd->reset_query();
5193 thd->reset_db(NULL, 0);
5194 if (mysql)
5195 {
5196 /*
5197 Here we need to clear the active VIO before closing the
5198 connection with the master. The reason is that THD::awake()
5199 might be called from terminate_slave_thread() because somebody
5200 issued a STOP SLAVE. If that happends, the shutdown_active_vio()
5201 can be called in the middle of closing the VIO associated with
5202 the 'mysql' object, causing a crash.
5203 */
5204 #ifdef SIGNAL_WITH_VIO_SHUTDOWN
5205 thd->clear_active_vio();
5206 #endif
5207 mysql_close(mysql);
5208 mi->mysql=0;
5209 }
5210 mysql_mutex_lock(&mi->data_lock);
5211 write_ignored_events_info_to_relay_log(thd, mi);
5212 mysql_mutex_unlock(&mi->data_lock);
5213 THD_STAGE_INFO(thd, stage_waiting_for_slave_mutex_on_exit);
5214 mysql_mutex_lock(&mi->run_lock);
5215 /*
5216 Clean information used to start slave in order to avoid
5217 security issues.
5218 */
5219 mi->reset_start_info();
5220 /* Forget the relay log's format */
5221 mysql_mutex_lock(&mi->data_lock);
5222 mi->set_mi_description_event(NULL);
5223 mysql_mutex_unlock(&mi->data_lock);
5224
5225 DBUG_ASSERT(thd->net.buff != 0);
5226 net_end(&thd->net); // destructor will not free it, because net.vio is 0
5227
5228 thd->release_resources();
5229 THD_CHECK_SENTRY(thd);
5230 if (thd_added)
5231 remove_global_thread(thd);
5232 delete thd;
5233
5234 mi->abort_slave= 0;
5235 mi->slave_running= 0;
5236 mi->info_thd= 0;
5237 /*
5238 Note: the order of the two following calls (first broadcast, then unlock)
5239 is important. Otherwise a killer_thread can execute between the calls and
5240 delete the mi structure leading to a crash! (see BUG#25306 for details)
5241 */
5242 mysql_cond_broadcast(&mi->stop_cond); // tell the world we are done
5243 DBUG_EXECUTE_IF("simulate_slave_delay_at_terminate_bug38694", sleep(5););
5244 mysql_mutex_unlock(&mi->run_lock);
5245 DBUG_LEAVE; // Must match DBUG_ENTER()
5246 my_thread_end();
5247 #if OPENSSL_VERSION_NUMBER < 0x10100000L
5248 ERR_remove_thread_state(0);
5249 #endif /* OPENSSL_VERSION_NUMBER < 0x10100000L */
5250 pthread_exit(0);
5251 return(0); // Avoid compiler warnings
5252 }
5253
5254 /*
5255 Check the temporary directory used by commands like
5256 LOAD DATA INFILE.
5257 */
5258 static
check_temp_dir(char * tmp_file)5259 int check_temp_dir(char* tmp_file)
5260 {
5261 int fd;
5262 MY_DIR *dirp;
5263 char tmp_dir[FN_REFLEN];
5264 size_t tmp_dir_size;
5265
5266 DBUG_ENTER("check_temp_dir");
5267
5268 /*
5269 Get the directory from the temporary file.
5270 */
5271 dirname_part(tmp_dir, tmp_file, &tmp_dir_size);
5272
5273 /*
5274 Check if the directory exists.
5275 */
5276 if (!(dirp=my_dir(tmp_dir,MYF(MY_WME))))
5277 DBUG_RETURN(1);
5278 my_dirend(dirp);
5279
5280 /*
5281 Check permissions to create a file.
5282 */
5283 //append the server UUID to the temp file name.
5284 char *unique_tmp_file_name= (char*)my_malloc((FN_REFLEN+TEMP_FILE_MAX_LEN)*sizeof(char), MYF(0));
5285 sprintf(unique_tmp_file_name, "%s%s", tmp_file, server_uuid);
5286 if ((fd= mysql_file_create(key_file_misc,
5287 unique_tmp_file_name, CREATE_MODE,
5288 O_WRONLY | O_BINARY | O_EXCL | O_NOFOLLOW,
5289 MYF(MY_WME))) < 0)
5290 DBUG_RETURN(1);
5291
5292 /*
5293 Clean up.
5294 */
5295 mysql_file_close(fd, MYF(0));
5296
5297 mysql_file_delete(key_file_misc, unique_tmp_file_name, MYF(0));
5298 my_free(unique_tmp_file_name);
5299 DBUG_RETURN(0);
5300 }
5301
5302 /*
5303 Worker thread for the parallel execution of the replication events.
5304 */
handle_slave_worker(void * arg)5305 pthread_handler_t handle_slave_worker(void *arg)
5306 {
5307 THD *thd; /* needs to be first for thread_stack */
5308 bool thd_added= false;
5309 int error= 0;
5310 Slave_worker *w= (Slave_worker *) arg;
5311 Relay_log_info* rli= w->c_rli;
5312 ulong purge_cnt= 0;
5313 ulonglong purge_size= 0;
5314 struct slave_job_item _item, *job_item= &_item;
5315
5316 my_thread_init();
5317 DBUG_ENTER("handle_slave_worker");
5318
5319 thd= new THD;
5320 if (!thd)
5321 {
5322 sql_print_error("Failed during slave worker initialization");
5323 goto err;
5324 }
5325 w->info_thd= thd;
5326 thd->thread_stack = (char*)&thd;
5327
5328 pthread_detach_this_thread();
5329 if (init_slave_thread(thd, SLAVE_THD_WORKER))
5330 {
5331 // todo make SQL thread killed
5332 sql_print_error("Failed during slave worker initialization");
5333 goto err;
5334 }
5335 thd->init_for_queries(w);
5336
5337 mysql_mutex_lock(&LOCK_thread_count);
5338 add_global_thread(thd);
5339 thd_added= true;
5340 mysql_mutex_unlock(&LOCK_thread_count);
5341
5342 if (w->update_is_transactional())
5343 {
5344 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
5345 "Error checking if the worker repository is transactional.");
5346 goto err;
5347 }
5348
5349 mysql_mutex_lock(&w->jobs_lock);
5350 w->running_status= Slave_worker::RUNNING;
5351 mysql_cond_signal(&w->jobs_cond);
5352
5353 mysql_mutex_unlock(&w->jobs_lock);
5354
5355 DBUG_ASSERT(thd->is_slave_error == 0);
5356
5357 while (!error)
5358 {
5359 error= slave_worker_exec_job(w, rli);
5360 }
5361
5362 /*
5363 Cleanup after an error requires clear_error() go first.
5364 Otherwise assert(!all) in binlog_rollback()
5365 */
5366 thd->clear_error();
5367 w->cleanup_context(thd, error);
5368
5369 mysql_mutex_lock(&w->jobs_lock);
5370
5371 while(de_queue(&w->jobs, job_item))
5372 {
5373 purge_cnt++;
5374 purge_size += ((Log_event*) (job_item->data))->data_written;
5375 DBUG_ASSERT(job_item->data);
5376 delete static_cast<Log_event*>(job_item->data);
5377 }
5378
5379 DBUG_ASSERT(w->jobs.len == 0);
5380
5381 mysql_mutex_unlock(&w->jobs_lock);
5382
5383 mysql_mutex_lock(&rli->pending_jobs_lock);
5384 rli->pending_jobs -= purge_cnt;
5385 rli->mts_pending_jobs_size -= purge_size;
5386 DBUG_ASSERT(rli->mts_pending_jobs_size < rli->mts_pending_jobs_size_max);
5387
5388 mysql_mutex_unlock(&rli->pending_jobs_lock);
5389
5390 /*
5391 In MTS case cleanup_after_session() has be called explicitly.
5392 TODO: to make worker thd be deleted before Slave_worker instance.
5393 */
5394 if (thd->rli_slave)
5395 {
5396 w->cleanup_after_session();
5397 thd->rli_slave= NULL;
5398 }
5399 mysql_mutex_lock(&w->jobs_lock);
5400
5401 w->running_status= Slave_worker::NOT_RUNNING;
5402 if (log_warnings > 1)
5403 sql_print_information("Worker %lu statistics: "
5404 "events processed = %lu "
5405 "hungry waits = %lu "
5406 "priv queue overfills = %llu ",
5407 w->id, w->events_done, w->wq_size_waits_cnt,
5408 w->jobs.waited_overfill);
5409 mysql_cond_signal(&w->jobs_cond); // famous last goodbye
5410
5411 mysql_mutex_unlock(&w->jobs_lock);
5412
5413 err:
5414
5415 if (thd)
5416 {
5417 /*
5418 The slave code is very bad. Notice that it is missing
5419 several clean up calls here. I've just added what was
5420 necessary to avoid valgrind errors.
5421
5422 /Alfranio
5423 */
5424 DBUG_ASSERT(thd->net.buff != 0);
5425 net_end(&thd->net);
5426
5427 /*
5428 to avoid close_temporary_tables() closing temp tables as those
5429 are Coordinator's burden.
5430 */
5431 thd->system_thread= NON_SYSTEM_THREAD;
5432 thd->release_resources();
5433 THD_CHECK_SENTRY(thd);
5434 if (thd_added)
5435 remove_global_thread(thd);
5436 delete thd;
5437 }
5438
5439 my_thread_end();
5440 #if OPENSSL_VERSION_NUMBER < 0x10100000L
5441 ERR_remove_thread_state(0);
5442 #endif /* OPENSSL_VERSION_NUMBER < 0x10100000L */
5443 pthread_exit(0);
5444 DBUG_RETURN(0);
5445 }
5446
5447 /**
5448 Orders jobs by comparing relay log information.
5449 */
5450
mts_event_coord_cmp(LOG_POS_COORD * id1,LOG_POS_COORD * id2)5451 int mts_event_coord_cmp(LOG_POS_COORD *id1, LOG_POS_COORD *id2)
5452 {
5453 longlong filecmp= strcmp(id1->file_name, id2->file_name);
5454 longlong poscmp= id1->pos - id2->pos;
5455 return (filecmp < 0 ? -1 : (filecmp > 0 ? 1 :
5456 (poscmp < 0 ? -1 : (poscmp > 0 ? 1 : 0))));
5457 }
5458
mts_recovery_groups(Relay_log_info * rli)5459 int mts_recovery_groups(Relay_log_info *rli)
5460 {
5461 Log_event *ev= NULL;
5462 const char *errmsg= NULL;
5463 bool error= FALSE;
5464 bool flag_group_seen_begin= FALSE;
5465 uint recovery_group_cnt= 0;
5466 bool not_reached_commit= true;
5467 DYNAMIC_ARRAY above_lwm_jobs;
5468 Slave_job_group job_worker;
5469 IO_CACHE log;
5470 File file;
5471 LOG_INFO linfo;
5472 my_off_t offset= 0;
5473 MY_BITMAP *groups= &rli->recovery_groups;
5474 THD *thd= current_thd;
5475
5476 DBUG_ENTER("mts_recovery_groups");
5477
5478 DBUG_ASSERT(rli->slave_parallel_workers == 0);
5479
5480 /*
5481 Although mts_recovery_groups() is reentrant it returns
5482 early if the previous invocation raised any bit in
5483 recovery_groups bitmap.
5484 */
5485 if (rli->is_mts_recovery())
5486 DBUG_RETURN(0);
5487
5488 /*
5489 Save relay log position to compare with worker's position.
5490 */
5491 LOG_POS_COORD cp=
5492 {
5493 (char *) rli->get_group_master_log_name(),
5494 rli->get_group_master_log_pos()
5495 };
5496
5497 Format_description_log_event fdle(BINLOG_VERSION), *p_fdle= &fdle;
5498
5499 if (!p_fdle->is_valid())
5500 DBUG_RETURN(TRUE);
5501
5502 /*
5503 Gathers information on valuable workers and stores it in
5504 above_lwm_jobs in asc ordered by the master binlog coordinates.
5505 */
5506 my_init_dynamic_array(&above_lwm_jobs, sizeof(Slave_job_group),
5507 rli->recovery_parallel_workers,
5508 rli->recovery_parallel_workers);
5509
5510 /*
5511 When info tables are used and autocommit= 0 we force a new
5512 transaction start to avoid table access deadlocks when START SLAVE
5513 is executed after STOP SLAVE with MTS enabled.
5514 */
5515 if (is_autocommit_off_and_infotables(thd))
5516 {
5517 if (trans_begin(thd))
5518 {
5519 error= TRUE;
5520 goto err;
5521 }
5522 }
5523
5524 for (uint id= 0; id < rli->recovery_parallel_workers; id++)
5525 {
5526 Slave_worker *worker=
5527 Rpl_info_factory::create_worker(opt_rli_repository_id, id, rli, true);
5528
5529 if (!worker)
5530 {
5531 if (is_autocommit_off_and_infotables(thd))
5532 trans_rollback(thd);
5533 error= TRUE;
5534 goto err;
5535 }
5536
5537 LOG_POS_COORD w_last= { const_cast<char*>(worker->get_group_master_log_name()),
5538 worker->get_group_master_log_pos() };
5539 if (mts_event_coord_cmp(&w_last, &cp) > 0)
5540 {
5541 /*
5542 Inserts information into a dynamic array for further processing.
5543 The jobs/workers are ordered by the last checkpoint positions
5544 workers have seen.
5545 */
5546 job_worker.worker= worker;
5547 job_worker.checkpoint_log_pos= worker->checkpoint_master_log_pos;
5548 job_worker.checkpoint_log_name= worker->checkpoint_master_log_name;
5549
5550 insert_dynamic(&above_lwm_jobs, (uchar*) &job_worker);
5551 }
5552 else
5553 {
5554 /*
5555 Deletes the worker because its jobs are included in the latest
5556 checkpoint.
5557 */
5558 delete worker;
5559 }
5560 }
5561
5562 /*
5563 When info tables are used and autocommit= 0 we force transaction
5564 commit to avoid table access deadlocks when START SLAVE is executed
5565 after STOP SLAVE with MTS enabled.
5566 */
5567 if (is_autocommit_off_and_infotables(thd))
5568 {
5569 if (trans_commit(thd))
5570 {
5571 error= TRUE;
5572 goto err;
5573 }
5574 }
5575
5576 /*
5577 In what follows, the group Recovery Bitmap is constructed.
5578
5579 seek(lwm);
5580
5581 while(w= next(above_lwm_w))
5582 do
5583 read G
5584 if G == w->last_comm
5585 w.B << group_cnt++;
5586 RB |= w.B;
5587 break;
5588 else
5589 group_cnt++;
5590 while(!eof);
5591 continue;
5592 */
5593 DBUG_ASSERT(!rli->recovery_groups_inited);
5594
5595 if (above_lwm_jobs.elements != 0)
5596 {
5597 bitmap_init(groups, NULL, MTS_MAX_BITS_IN_GROUP, FALSE);
5598 rli->recovery_groups_inited= true;
5599 bitmap_clear_all(groups);
5600 }
5601 rli->mts_recovery_group_cnt= 0;
5602 for (uint it_job= 0; it_job < above_lwm_jobs.elements; it_job++)
5603 {
5604 Slave_worker *w= ((Slave_job_group *)
5605 dynamic_array_ptr(&above_lwm_jobs, it_job))->worker;
5606 LOG_POS_COORD w_last= { const_cast<char*>(w->get_group_master_log_name()),
5607 w->get_group_master_log_pos() };
5608 bool checksum_detected= FALSE;
5609
5610 sql_print_information("Slave: MTS group recovery relay log info based on Worker-Id %lu, "
5611 "group_relay_log_name %s, group_relay_log_pos %llu "
5612 "group_master_log_name %s, group_master_log_pos %llu",
5613 w->id,
5614 w->get_group_relay_log_name(),
5615 w->get_group_relay_log_pos(),
5616 w->get_group_master_log_name(),
5617 w->get_group_master_log_pos());
5618
5619 recovery_group_cnt= 0;
5620 not_reached_commit= true;
5621 if (rli->relay_log.find_log_pos(&linfo, rli->get_group_relay_log_name(), 1))
5622 {
5623 error= TRUE;
5624 sql_print_error("Error looking for %s.", rli->get_group_relay_log_name());
5625 goto err;
5626 }
5627 offset= rli->get_group_relay_log_pos();
5628 for (int checking= 0 ; not_reached_commit; checking++)
5629 {
5630 if ((file= open_binlog_file(&log, linfo.log_file_name, &errmsg)) < 0)
5631 {
5632 error= TRUE;
5633 sql_print_error("%s", errmsg);
5634 goto err;
5635 }
5636 /*
5637 Looking for the actual relay checksum algorithm that is present in
5638 a FD at head events of the relay log.
5639 */
5640 if (!checksum_detected)
5641 {
5642 int i= 0;
5643 while (i < 4 && (ev= Log_event::read_log_event(&log,
5644 (mysql_mutex_t*) 0, p_fdle, 0)))
5645 {
5646 if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
5647 {
5648 p_fdle->checksum_alg= ev->checksum_alg;
5649 checksum_detected= TRUE;
5650 }
5651 delete ev;
5652 i++;
5653 }
5654 if (!checksum_detected)
5655 {
5656 error= TRUE;
5657 sql_print_error("%s", "malformed or very old relay log which "
5658 "does not have FormatDescriptor");
5659 goto err;
5660 }
5661 }
5662
5663 my_b_seek(&log, offset);
5664
5665 while (not_reached_commit &&
5666 (ev= Log_event::read_log_event(&log, 0, p_fdle,
5667 opt_slave_sql_verify_checksum)))
5668 {
5669 DBUG_ASSERT(ev->is_valid());
5670
5671 if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
5672 p_fdle->checksum_alg= ev->checksum_alg;
5673
5674 if (ev->get_type_code() == ROTATE_EVENT ||
5675 ev->get_type_code() == FORMAT_DESCRIPTION_EVENT ||
5676 ev->get_type_code() == PREVIOUS_GTIDS_LOG_EVENT)
5677 {
5678 delete ev;
5679 ev= NULL;
5680 continue;
5681 }
5682
5683 DBUG_PRINT("mts", ("Event Recoverying relay log info "
5684 "group_mster_log_name %s, event_master_log_pos %llu type code %u.",
5685 linfo.log_file_name, ev->log_pos, ev->get_type_code()));
5686
5687 if (ev->starts_group())
5688 {
5689 flag_group_seen_begin= true;
5690 }
5691 else if ((ev->ends_group() || !flag_group_seen_begin) &&
5692 !is_gtid_event(ev))
5693 {
5694 int ret= 0;
5695 LOG_POS_COORD ev_coord= { (char *) rli->get_group_master_log_name(),
5696 ev->log_pos };
5697 flag_group_seen_begin= false;
5698 recovery_group_cnt++;
5699
5700 sql_print_information("Slave: MTS group recovery relay log info "
5701 "group_master_log_name %s, "
5702 "event_master_log_pos %llu.",
5703 rli->get_group_master_log_name(), ev->log_pos);
5704 if ((ret= mts_event_coord_cmp(&ev_coord, &w_last)) == 0)
5705 {
5706 #ifndef DBUG_OFF
5707 for (uint i= 0; i <= w->checkpoint_seqno; i++)
5708 {
5709 if (bitmap_is_set(&w->group_executed, i))
5710 DBUG_PRINT("mts", ("Bit %u is set.", i));
5711 else
5712 DBUG_PRINT("mts", ("Bit %u is not set.", i));
5713 }
5714 #endif
5715 DBUG_PRINT("mts",
5716 ("Doing a shift ini(%lu) end(%lu).",
5717 (w->checkpoint_seqno + 1) - recovery_group_cnt,
5718 w->checkpoint_seqno));
5719
5720 for (uint i= (w->checkpoint_seqno + 1) - recovery_group_cnt,
5721 j= 0; i <= w->checkpoint_seqno; i++, j++)
5722 {
5723 if (bitmap_is_set(&w->group_executed, i))
5724 {
5725 DBUG_PRINT("mts", ("Setting bit %u.", j));
5726 bitmap_fast_test_and_set(groups, j);
5727 }
5728 }
5729 not_reached_commit= false;
5730 }
5731 else
5732 DBUG_ASSERT(ret < 0);
5733 }
5734 delete ev;
5735 ev= NULL;
5736 }
5737 end_io_cache(&log);
5738 mysql_file_close(file, MYF(MY_WME));
5739 offset= BIN_LOG_HEADER_SIZE;
5740 if (not_reached_commit && rli->relay_log.find_next_log(&linfo, 1))
5741 {
5742 error= TRUE;
5743 sql_print_error("Error looking for file after %s.", linfo.log_file_name);
5744 goto err;
5745 }
5746 }
5747
5748 rli->mts_recovery_group_cnt= (rli->mts_recovery_group_cnt < recovery_group_cnt ?
5749 recovery_group_cnt : rli->mts_recovery_group_cnt);
5750 }
5751
5752 DBUG_ASSERT(!rli->recovery_groups_inited ||
5753 rli->mts_recovery_group_cnt <= groups->n_bits);
5754
5755 err:
5756
5757 for (uint it_job= 0; it_job < above_lwm_jobs.elements; it_job++)
5758 {
5759 get_dynamic(&above_lwm_jobs, (uchar *) &job_worker, it_job);
5760 delete job_worker.worker;
5761 }
5762
5763 delete_dynamic(&above_lwm_jobs);
5764 if (rli->mts_recovery_group_cnt == 0)
5765 rli->clear_mts_recovery_groups();
5766
5767 DBUG_RETURN(error ? ER_MTS_RECOVERY_FAILURE : 0);
5768 }
5769
5770 /**
5771 Processing rli->gaq to find out the low-water-mark (lwm) coordinates
5772 which is stored into the cental recovery table.
5773
5774 @param rli pointer to Relay-log-info of Coordinator
5775 @param period period of processing GAQ, normally derived from
5776 @c mts_checkpoint_period
5777 @param force if TRUE then hang in a loop till some progress
5778 @param need_data_lock False if rli->data_lock mutex is aquired by
5779 the caller.
5780
5781 @return FALSE success, TRUE otherwise
5782 */
mts_checkpoint_routine(Relay_log_info * rli,ulonglong period,bool force,bool need_data_lock)5783 bool mts_checkpoint_routine(Relay_log_info *rli, ulonglong period,
5784 bool force, bool need_data_lock)
5785 {
5786 ulong cnt;
5787 bool error= FALSE;
5788 struct timespec curr_clock;
5789 bool binlog_prot_acquired= false;
5790 time_t ts=0;
5791
5792 DBUG_ENTER("checkpoint_routine");
5793
5794 #ifndef DBUG_OFF
5795 if (DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0))
5796 {
5797 if (!rli->gaq->count_done(rli))
5798 DBUG_RETURN(FALSE);
5799 }
5800 DBUG_EXECUTE_IF("mts_checkpoint",
5801 {
5802 const char act[]=
5803 "now signal mts_checkpoint_start";
5804 DBUG_ASSERT(!debug_sync_set_action(rli->info_thd,
5805 STRING_WITH_LEN(act)));
5806 };);
5807 #endif
5808
5809 #ifndef DBUG_OFF
5810 /*
5811 rli->checkpoint_group can have two possible values due to
5812 two possible status of the last (being scheduled) group.
5813 */
5814 const bool precondition= !rli->gaq->full() ||
5815 ((rli->checkpoint_seqno == rli->checkpoint_group -1 &&
5816 rli->mts_group_status == Relay_log_info::MTS_IN_GROUP) ||
5817 rli->checkpoint_seqno == rli->checkpoint_group);
5818 if (!precondition)
5819 {
5820 fprintf(stderr, "rli->gaq->full() = %d\n", rli->gaq->full());
5821 fprintf(stderr, "rli->checkpoint_seqno = %u\n", rli->checkpoint_seqno);
5822 fprintf(stderr, "rli->checkpoint_group = %u\n", rli->checkpoint_group);
5823 fprintf(stderr, "rli->mts_group_status = %d\n", rli->mts_group_status);
5824 DBUG_ASSERT(precondition);
5825 }
5826 #endif
5827
5828 /*
5829 Currently, the checkpoint routine is being called by the SQL Thread.
5830 For that reason, this function is called call from appropriate points
5831 in the SQL Thread's execution path and the elapsed time is calculated
5832 here to check if it is time to execute it.
5833 */
5834 set_timespec_nsec(curr_clock, 0);
5835 ulonglong diff= diff_timespec(curr_clock, rli->last_clock);
5836 if (!force && diff < period)
5837 {
5838 /*
5839 We do not need to execute the checkpoint now because
5840 the time elapsed is not enough.
5841 */
5842 DBUG_RETURN(FALSE);
5843 }
5844
5845 do
5846 {
5847 cnt= rli->gaq->move_queue_head(&rli->workers);
5848 #ifndef DBUG_OFF
5849 if (DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0) &&
5850 cnt != opt_mts_checkpoint_period)
5851 sql_print_error("This an error cnt != mts_checkpoint_period");
5852 #endif
5853 } while (!sql_slave_killed(rli->info_thd, rli) &&
5854 cnt == 0 && force &&
5855 !DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0) &&
5856 (my_sleep(rli->mts_coordinator_basic_nap), 1));
5857 /*
5858 This checks how many consecutive jobs where processed.
5859 If this value is different than zero the checkpoint
5860 routine can proceed. Otherwise, there is nothing to be
5861 done.
5862 */
5863 if (cnt == 0)
5864 goto end;
5865
5866
5867 /* TODO:
5868 to turn the least occupied selection in terms of jobs pieces
5869 */
5870 for (uint i= 0; i < rli->workers.elements; i++)
5871 {
5872 Slave_worker *w_i;
5873 get_dynamic(&rli->workers, (uchar *) &w_i, i);
5874 set_dynamic(&rli->least_occupied_workers, (uchar*) &w_i->jobs.len, w_i->id);
5875 };
5876 sort_dynamic(&rli->least_occupied_workers, (qsort_cmp) ulong_cmp);
5877
5878 if (need_data_lock)
5879 {
5880 THD * const info_thd= rli->info_thd;
5881 const ulong timeout= info_thd->variables.lock_wait_timeout;
5882
5883 /*
5884 Acquire protection against global BINLOG lock before rli->data_lock is
5885 locked (otherwise we would also block SHOW SLAVE STATUS).
5886 */
5887 DBUG_ASSERT(!info_thd->backup_binlog_lock.is_acquired());
5888 DBUG_PRINT("debug", ("Acquiring binlog protection lock"));
5889 mysql_mutex_assert_not_owner(&rli->data_lock);
5890 error= info_thd->backup_binlog_lock.acquire_protection(info_thd,
5891 MDL_EXPLICIT,
5892 timeout);
5893 if (error)
5894 goto end;
5895
5896 binlog_prot_acquired= true;
5897
5898 mysql_mutex_lock(&rli->data_lock);
5899 }
5900 else
5901 {
5902 mysql_mutex_assert_owner(&rli->data_lock);
5903 DBUG_ASSERT(rli->info_thd->backup_binlog_lock.is_protection_acquired());
5904 }
5905
5906 /*
5907 "Coordinator::commit_positions" {
5908
5909 rli->gaq->lwm has been updated in move_queue_head() and
5910 to contain all but rli->group_master_log_name which
5911 is altered solely by Coordinator at special checkpoints.
5912 */
5913 rli->set_group_master_log_pos(rli->gaq->lwm.group_master_log_pos);
5914 rli->set_group_relay_log_pos(rli->gaq->lwm.group_relay_log_pos);
5915 DBUG_PRINT("mts", ("New checkpoint %llu %llu %s",
5916 rli->gaq->lwm.group_master_log_pos,
5917 rli->gaq->lwm.group_relay_log_pos,
5918 rli->gaq->lwm.group_relay_log_name));
5919
5920 if (rli->gaq->lwm.group_relay_log_name[0] != 0)
5921 rli->set_group_relay_log_name(rli->gaq->lwm.group_relay_log_name);
5922
5923 /*
5924 todo: uncomment notifies when UNTIL will be supported
5925
5926 rli->notify_group_master_log_name_update();
5927 rli->notify_group_relay_log_name_update();
5928
5929 Todo: optimize with if (wait_flag) broadcast
5930 waiter: set wait_flag; waits....; drops wait_flag;
5931 */
5932
5933 error= rli->flush_info(TRUE);
5934
5935 mysql_cond_broadcast(&rli->data_cond);
5936 if (need_data_lock)
5937 mysql_mutex_unlock(&rli->data_lock);
5938
5939 /*
5940 We need to ensure that this is never called at this point when
5941 cnt is zero. This value means that the checkpoint information
5942 will be completely reset.
5943 */
5944 ts= rli->gaq->empty()
5945 ? 0
5946 : reinterpret_cast<Slave_job_group*>(rli->gaq->head_queue())->ts;
5947 rli->reset_notified_checkpoint(cnt, &ts, need_data_lock);
5948 /* end-of "Coordinator::"commit_positions" */
5949
5950 end:
5951
5952 if (binlog_prot_acquired)
5953 {
5954 DBUG_PRINT("debug", ("Releasing binlog protection lock"));
5955 rli->info_thd->backup_binlog_lock.release_protection(rli->info_thd);
5956 }
5957
5958 #ifndef DBUG_OFF
5959 if (DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0))
5960 DBUG_SUICIDE();
5961 DBUG_EXECUTE_IF("mts_checkpoint",
5962 {
5963 const char act[]=
5964 "now signal mts_checkpoint_end";
5965 DBUG_ASSERT(!debug_sync_set_action(rli->info_thd,
5966 STRING_WITH_LEN(act)));
5967 };);
5968 #endif
5969 set_timespec_nsec(rli->last_clock, 0);
5970
5971 DBUG_RETURN(error);
5972 }
5973
5974 /**
5975 Instantiation of a Slave_worker and forking out a single Worker thread.
5976
5977 @param rli Coordinator's Relay_log_info pointer
5978 @param i identifier of the Worker
5979
5980 @return 0 suppress or 1 if fails
5981 */
slave_start_single_worker(Relay_log_info * rli,ulong i)5982 int slave_start_single_worker(Relay_log_info *rli, ulong i)
5983 {
5984 int error= 0;
5985 pthread_t th;
5986 Slave_worker *w= NULL;
5987
5988 mysql_mutex_assert_owner(&rli->run_lock);
5989
5990 if (!(w=
5991 Rpl_info_factory::create_worker(opt_rli_repository_id, i, rli, false)))
5992 {
5993 sql_print_error("Failed during slave worker thread create");
5994 error= 1;
5995 goto err;
5996 }
5997
5998 if (w->init_worker(rli, i))
5999 {
6000 sql_print_error("Failed during slave worker thread create");
6001 error= 1;
6002 goto err;
6003 }
6004 set_dynamic(&rli->workers, (uchar*) &w, i);
6005
6006 if (DBUG_EVALUATE_IF("mts_worker_thread_fails", i == 1, 0) ||
6007 (error= mysql_thread_create(key_thread_slave_worker, &th,
6008 &connection_attrib,
6009 handle_slave_worker, (void*) w)))
6010 {
6011 sql_print_error("Failed during slave worker thread create (errno= %d)",
6012 error);
6013 error= 1;
6014 goto err;
6015 }
6016
6017 mysql_mutex_lock(&w->jobs_lock);
6018 if (w->running_status == Slave_worker::NOT_RUNNING)
6019 mysql_cond_wait(&w->jobs_cond, &w->jobs_lock);
6020 mysql_mutex_unlock(&w->jobs_lock);
6021 // Least occupied inited with zero
6022 insert_dynamic(&rli->least_occupied_workers, (uchar*) &w->jobs.len);
6023
6024 err:
6025 if (error && w)
6026 {
6027 delete w;
6028 /*
6029 Any failure after dynarray inserted must follow with deletion
6030 of just created item.
6031 */
6032 if (rli->workers.elements == i + 1)
6033 delete_dynamic_element(&rli->workers, i);
6034 }
6035 return error;
6036 }
6037
6038 /**
6039 Initialization of the central rli members for Coordinator's role,
6040 communication channels such as Assigned Partition Hash (APH),
6041 and starting the Worker pool.
6042
6043 @param n Number of configured Workers in the upcoming session.
6044
6045 @return 0 success
6046 non-zero as failure
6047 */
slave_start_workers(Relay_log_info * rli,ulong n,bool * mts_inited)6048 int slave_start_workers(Relay_log_info *rli, ulong n, bool *mts_inited)
6049 {
6050 uint i;
6051 int error= 0;
6052
6053 mysql_mutex_assert_owner(&rli->run_lock);
6054
6055 if (n == 0 && rli->mts_recovery_group_cnt == 0)
6056 {
6057 reset_dynamic(&rli->workers);
6058 goto end;
6059 }
6060
6061 *mts_inited= true;
6062
6063 /*
6064 The requested through argument number of Workers can be different
6065 from the previous time which ended with an error. Thereby
6066 the effective number of configured Workers is max of the two.
6067 */
6068 rli->init_workers(max(n, rli->recovery_parallel_workers));
6069
6070 // CGAP dynarray holds id:s of partitions of the Current being executed Group
6071 my_init_dynamic_array(&rli->curr_group_assigned_parts,
6072 sizeof(db_worker_hash_entry*),
6073 SLAVE_INIT_DBS_IN_GROUP, 1);
6074 rli->last_assigned_worker= NULL; // associated with curr_group_assigned
6075 my_init_dynamic_array(&rli->curr_group_da, sizeof(Log_event*), 8, 2);
6076 // Least_occupied_workers array to hold items size of Slave_jobs_queue::len
6077 my_init_dynamic_array(&rli->least_occupied_workers, sizeof(ulong), n, 0);
6078
6079 /*
6080 GAQ queue holds seqno:s of scheduled groups. C polls workers in
6081 @c opt_mts_checkpoint_period to update GAQ (see @c next_event())
6082 The length of GAQ is set to be equal to checkpoint_group.
6083 Notice, the size matters for mts_checkpoint_routine's progress loop.
6084 */
6085
6086 rli->gaq= new Slave_committed_queue(rli->get_group_master_log_name(),
6087 sizeof(Slave_job_group),
6088 rli->checkpoint_group, n);
6089 if (!rli->gaq->inited)
6090 return 1;
6091
6092 // length of WQ is actually constant though can be made configurable
6093 rli->mts_slave_worker_queue_len_max= mts_slave_worker_queue_len_max;
6094 rli->mts_pending_jobs_size= 0;
6095 rli->mts_pending_jobs_size_max= ::opt_mts_pending_jobs_size_max;
6096 rli->mts_wq_underrun_w_id= MTS_WORKER_UNDEF;
6097 rli->mts_wq_excess_cnt= 0;
6098 rli->mts_wq_overrun_cnt= 0;
6099 rli->mts_wq_oversize= FALSE;
6100 rli->mts_coordinator_basic_nap= mts_coordinator_basic_nap;
6101 rli->mts_worker_underrun_level= mts_worker_underrun_level;
6102 rli->curr_group_seen_begin= rli->curr_group_seen_gtid= false;
6103 rli->curr_group_isolated= FALSE;
6104 rli->checkpoint_seqno= 0;
6105 rli->mts_last_online_stat= my_time(0);
6106 rli->mts_group_status= Relay_log_info::MTS_NOT_IN_GROUP;
6107
6108 if (init_hash_workers(n)) // MTS: mapping_db_to_worker
6109 {
6110 sql_print_error("Failed to init partitions hash");
6111 error= 1;
6112 goto err;
6113 }
6114
6115 for (i= 0; i < n; i++)
6116 {
6117 if ((error= slave_start_single_worker(rli, i)))
6118 goto err;
6119 rli->slave_parallel_workers++;
6120 }
6121
6122 end:
6123 // Effective end of the recovery right now when there is no gaps
6124 if (!error && rli->mts_recovery_group_cnt == 0)
6125 {
6126 if ((error= rli->mts_finalize_recovery()))
6127 (void) Rpl_info_factory::reset_workers(rli);
6128 if (!error)
6129 error= rli->flush_info(TRUE);
6130 }
6131
6132 err:
6133 return error;
6134 }
6135
6136 /*
6137 Ending Worker threads.
6138
6139 Not in case Coordinator is killed itself, it first waits for
6140 Workers have finished their assignements, and then updates checkpoint.
6141 Workers are notified with setting KILLED status
6142 and waited for their acknowledgment as specified by
6143 worker's running_status.
6144 Coordinator finalizes with its MTS running status to reset few objects.
6145 */
slave_stop_workers(Relay_log_info * rli,bool * mts_inited)6146 void slave_stop_workers(Relay_log_info *rli, bool *mts_inited)
6147 {
6148 int i;
6149 THD *thd= rli->info_thd;
6150 if (!*mts_inited)
6151 return;
6152 else if (rli->slave_parallel_workers == 0)
6153 goto end;
6154
6155 /*
6156 If request for stop slave is received notify worker
6157 to stop.
6158 */
6159 // Initialize worker exit count and max_updated_index to 0 during each stop.
6160 rli->exit_counter= 0;
6161 rli->max_updated_index= (rli->until_condition !=
6162 Relay_log_info::UNTIL_NONE)?
6163 rli->mts_groups_assigned:0;
6164
6165 for (i= rli->workers.elements - 1; i >= 0; i--)
6166 {
6167 Slave_worker *w;
6168 struct slave_job_item item= {NULL}, *job_item= &item;
6169 get_dynamic((DYNAMIC_ARRAY*)&rli->workers, (uchar*) &w, i);
6170 mysql_mutex_lock(&w->jobs_lock);
6171 //Inform all workers to stop
6172 if (w->running_status != Slave_worker::RUNNING)
6173 {
6174 mysql_mutex_unlock(&w->jobs_lock);
6175 continue;
6176 }
6177
6178 w->running_status= Slave_worker::STOP;
6179 (void) set_max_updated_index_on_stop(w, job_item);
6180 mysql_cond_signal(&w->jobs_cond);
6181
6182 mysql_mutex_unlock(&w->jobs_lock);
6183
6184 if (log_warnings > 1)
6185 sql_print_information("Notifying Worker %lu to exit, thd %p", w->id,
6186 w->info_thd);
6187 }
6188
6189 thd_proc_info(thd, "Waiting for workers to exit");
6190
6191 for (i= rli->workers.elements - 1; i >= 0; i--)
6192 {
6193 Slave_worker *w= NULL;
6194 get_dynamic((DYNAMIC_ARRAY*)&rli->workers, (uchar*) &w, i);
6195
6196 mysql_mutex_lock(&w->jobs_lock);
6197 while (w->running_status != Slave_worker::NOT_RUNNING)
6198 {
6199 PSI_stage_info old_stage;
6200 DBUG_ASSERT(w->running_status == Slave_worker::ERROR_LEAVING ||
6201 w->running_status == Slave_worker::STOP ||
6202 w->running_status == Slave_worker::STOP_ACCEPTED);
6203
6204 thd->ENTER_COND(&w->jobs_cond, &w->jobs_lock,
6205 &stage_slave_waiting_workers_to_exit, &old_stage);
6206 mysql_cond_wait(&w->jobs_cond, &w->jobs_lock);
6207 thd->EXIT_COND(&old_stage);
6208 mysql_mutex_lock(&w->jobs_lock);
6209 }
6210 mysql_mutex_unlock(&w->jobs_lock);
6211 }
6212
6213 if (thd->killed == THD::NOT_KILLED)
6214 (void) mts_checkpoint_routine(rli, 0, false, true/*need_data_lock=true*/); // TODO:consider to propagate an error out of the function
6215
6216 for (i= rli->workers.elements - 1; i >= 0; i--)
6217 {
6218 Slave_worker *w= NULL;
6219 get_dynamic((DYNAMIC_ARRAY*)&rli->workers, (uchar*) &w, i);
6220 delete_dynamic_element(&rli->workers, i);
6221 delete w;
6222 }
6223 if (log_warnings > 1)
6224 sql_print_information("Total MTS session statistics: "
6225 "events processed = %llu; "
6226 "worker queues filled over overrun level = %lu; "
6227 "waited due a Worker queue full = %lu; "
6228 "waited due the total size = %lu; "
6229 "slept when Workers occupied = %lu ",
6230 rli->mts_events_assigned, rli->mts_wq_overrun_cnt,
6231 rli->mts_wq_overfill_cnt, rli->wq_size_waits_cnt,
6232 rli->mts_wq_no_underrun_cnt);
6233
6234 DBUG_ASSERT(rli->pending_jobs == 0);
6235 DBUG_ASSERT(rli->mts_pending_jobs_size == 0);
6236
6237 end:
6238 rli->mts_group_status= Relay_log_info::MTS_NOT_IN_GROUP;
6239 destroy_hash_workers(rli);
6240 delete rli->gaq;
6241 delete_dynamic(&rli->least_occupied_workers); // least occupied
6242
6243 // Destroy buffered events of the current group prior to exit.
6244 for (uint i= 0; i < rli->curr_group_da.elements; i++)
6245 delete *(Log_event**) dynamic_array_ptr(&rli->curr_group_da, i);
6246 delete_dynamic(&rli->curr_group_da); // GCDA
6247
6248 delete_dynamic(&rli->curr_group_assigned_parts); // GCAP
6249 rli->deinit_workers();
6250 rli->slave_parallel_workers= 0;
6251 *mts_inited= false;
6252 }
6253
6254
6255 /**
6256 Slave SQL thread entry point.
6257
6258 @param arg Pointer to Relay_log_info object that holds information
6259 for the SQL thread.
6260
6261 @return Always 0.
6262 */
handle_slave_sql(void * arg)6263 pthread_handler_t handle_slave_sql(void *arg)
6264 {
6265 THD *thd; /* needs to be first for thread_stack */
6266 bool thd_added= false;
6267 char llbuff[22],llbuff1[22];
6268 char saved_log_name[FN_REFLEN];
6269 char saved_master_log_name[FN_REFLEN];
6270 my_off_t saved_log_pos= 0;
6271 my_off_t saved_master_log_pos= 0;
6272 my_off_t saved_skip= 0;
6273
6274 Relay_log_info* rli = ((Master_info*)arg)->rli;
6275 const char *errmsg;
6276 const char *error_string;
6277 bool mts_inited= false;
6278
6279 // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
6280 my_thread_init();
6281 DBUG_ENTER("handle_slave_sql");
6282
6283 DBUG_ASSERT(rli->inited);
6284 mysql_mutex_lock(&rli->run_lock);
6285 DBUG_ASSERT(!rli->slave_running);
6286 errmsg= 0;
6287 error_string= 0;
6288 #ifndef DBUG_OFF
6289 rli->events_until_exit = abort_slave_event_count;
6290 #endif
6291
6292 thd = new THD; // note that contructor of THD uses DBUG_ !
6293 thd->thread_stack = (char*)&thd; // remember where our stack is
6294 rli->info_thd= thd;
6295
6296 /* Inform waiting threads that slave has started */
6297 rli->slave_run_id++;
6298 rli->slave_running = 1;
6299 rli->reported_unsafe_warning= false;
6300 rli->sql_thread_kill_accepted= false;
6301
6302 pthread_detach_this_thread();
6303 if (init_slave_thread(thd, SLAVE_THD_SQL))
6304 {
6305 /*
6306 TODO: this is currently broken - slave start and change master
6307 will be stuck if we fail here
6308 */
6309 mysql_cond_broadcast(&rli->start_cond);
6310 mysql_mutex_unlock(&rli->run_lock);
6311 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
6312 "Failed during slave thread initialization");
6313 goto err;
6314 }
6315 thd->init_for_queries(rli);
6316 thd->temporary_tables = rli->save_temporary_tables; // restore temp tables
6317 set_thd_in_use_temporary_tables(rli); // (re)set sql_thd in use for saved temp tables
6318
6319 mysql_mutex_lock(&LOCK_thread_count);
6320 add_global_thread(thd);
6321 thd_added= true;
6322 mysql_mutex_unlock(&LOCK_thread_count);
6323
6324 /* MTS: starting the worker pool */
6325 if (slave_start_workers(rli, rli->opt_slave_parallel_workers, &mts_inited) != 0)
6326 {
6327 mysql_cond_broadcast(&rli->start_cond);
6328 mysql_mutex_unlock(&rli->run_lock);
6329 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
6330 "Failed during slave workers initialization");
6331 goto err;
6332 }
6333 /*
6334 We are going to set slave_running to 1. Assuming slave I/O thread is
6335 alive and connected, this is going to make Seconds_Behind_Master be 0
6336 i.e. "caught up". Even if we're just at start of thread. Well it's ok, at
6337 the moment we start we can think we are caught up, and the next second we
6338 start receiving data so we realize we are not caught up and
6339 Seconds_Behind_Master grows. No big deal.
6340 */
6341 rli->abort_slave = 0;
6342
6343 /*
6344 Reset errors for a clean start (otherwise, if the master is idle, the SQL
6345 thread may execute no Query_log_event, so the error will remain even
6346 though there's no problem anymore). Do not reset the master timestamp
6347 (imagine the slave has caught everything, the STOP SLAVE and START SLAVE:
6348 as we are not sure that we are going to receive a query, we want to
6349 remember the last master timestamp (to say how many seconds behind we are
6350 now.
6351 But the master timestamp is reset by RESET SLAVE & CHANGE MASTER.
6352 */
6353 rli->clear_error();
6354
6355 if (rli->update_is_transactional())
6356 {
6357 mysql_cond_broadcast(&rli->start_cond);
6358 mysql_mutex_unlock(&rli->run_lock);
6359 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
6360 "Error checking if the relay log repository is transactional.");
6361 goto err;
6362 }
6363
6364 if (!rli->is_transactional())
6365 rli->report(WARNING_LEVEL, 0,
6366 "If a crash happens this configuration does not guarantee that the relay "
6367 "log info will be consistent");
6368
6369 mysql_mutex_unlock(&rli->run_lock);
6370 mysql_cond_broadcast(&rli->start_cond);
6371
6372 DEBUG_SYNC(thd, "after_start_slave");
6373
6374 //tell the I/O thread to take relay_log_space_limit into account from now on
6375 mysql_mutex_lock(&rli->log_space_lock);
6376 rli->ignore_log_space_limit= 0;
6377 mysql_mutex_unlock(&rli->log_space_lock);
6378 rli->trans_retries= 0; // start from "no error"
6379 DBUG_PRINT("info", ("rli->trans_retries: %lu", rli->trans_retries));
6380
6381 if (rli->init_relay_log_pos(rli->get_group_relay_log_name(),
6382 rli->get_group_relay_log_pos(),
6383 true/*need_data_lock=true*/, &errmsg,
6384 1 /*look for a description_event*/))
6385 {
6386 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
6387 "Error initializing relay log position: %s", errmsg);
6388 goto err;
6389 }
6390 THD_CHECK_SENTRY(thd);
6391 #ifndef DBUG_OFF
6392 {
6393 char llbuf1[22], llbuf2[22];
6394 DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s",
6395 llstr(my_b_tell(rli->cur_log),llbuf1),
6396 llstr(rli->get_event_relay_log_pos(),llbuf2)));
6397 DBUG_ASSERT(rli->get_event_relay_log_pos() >= BIN_LOG_HEADER_SIZE);
6398 /*
6399 Wonder if this is correct. I (Guilhem) wonder if my_b_tell() returns the
6400 correct position when it's called just after my_b_seek() (the questionable
6401 stuff is those "seek is done on next read" comments in the my_b_seek()
6402 source code).
6403 The crude reality is that this assertion randomly fails whereas
6404 replication seems to work fine. And there is no easy explanation why it
6405 fails (as we my_b_seek(rli->event_relay_log_pos) at the very end of
6406 init_relay_log_pos() called above). Maybe the assertion would be
6407 meaningful if we held rli->data_lock between the my_b_seek() and the
6408 DBUG_ASSERT().
6409 */
6410 #ifdef SHOULD_BE_CHECKED
6411 DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->get_event_relay_log_pos());
6412 #endif
6413 }
6414 #endif
6415 DBUG_ASSERT(rli->info_thd == thd);
6416
6417 #ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
6418 /* engine specific hook, to be made generic */
6419 if (ndb_wait_setup_func && ndb_wait_setup_func(opt_ndb_wait_setup))
6420 {
6421 sql_print_warning("Slave SQL thread : NDB : Tables not available after %lu"
6422 " seconds. Consider increasing --ndb-wait-setup value",
6423 opt_ndb_wait_setup);
6424 }
6425 #endif
6426
6427 DBUG_PRINT("master_info",("log_file_name: %s position: %s",
6428 rli->get_group_master_log_name(),
6429 llstr(rli->get_group_master_log_pos(),llbuff)));
6430 if (log_warnings)
6431 sql_print_information("Slave SQL thread initialized, starting replication in \
6432 log '%s' at position %s, relay log '%s' position: %s", rli->get_rpl_log_name(),
6433 llstr(rli->get_group_master_log_pos(),llbuff),rli->get_group_relay_log_name(),
6434 llstr(rli->get_group_relay_log_pos(),llbuff1));
6435
6436 if (check_temp_dir(rli->slave_patternload_file))
6437 {
6438 rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(),
6439 "Unable to use slave's temporary directory %s - %s",
6440 slave_load_tmpdir, thd->get_stmt_da()->message());
6441 goto err;
6442 }
6443
6444 /* execute init_slave variable */
6445 if (opt_init_slave.length)
6446 {
6447 execute_init_command(thd, &opt_init_slave, &LOCK_sys_init_slave);
6448 if (thd->is_slave_error)
6449 {
6450 rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(),
6451 "Slave SQL thread aborted. Can't execute init_slave query");
6452 goto err;
6453 }
6454 }
6455
6456 /*
6457 First check until condition - probably there is nothing to execute. We
6458 do not want to wait for next event in this case.
6459 */
6460 mysql_mutex_lock(&rli->data_lock);
6461 if (rli->slave_skip_counter)
6462 {
6463 strmake(saved_log_name, rli->get_group_relay_log_name(), FN_REFLEN - 1);
6464 strmake(saved_master_log_name, rli->get_group_master_log_name(), FN_REFLEN - 1);
6465 saved_log_pos= rli->get_group_relay_log_pos();
6466 saved_master_log_pos= rli->get_group_master_log_pos();
6467 saved_skip= rli->slave_skip_counter;
6468 }
6469 if (rli->until_condition != Relay_log_info::UNTIL_NONE &&
6470 rli->is_until_satisfied(thd, NULL))
6471 {
6472 mysql_mutex_unlock(&rli->data_lock);
6473 goto err;
6474 }
6475 mysql_mutex_unlock(&rli->data_lock);
6476
6477 /* Read queries from the IO/THREAD until this thread is killed */
6478
6479 while (!sql_slave_killed(thd,rli))
6480 {
6481 THD_STAGE_INFO(thd, stage_reading_event_from_the_relay_log);
6482 DBUG_ASSERT(rli->info_thd == thd);
6483 THD_CHECK_SENTRY(thd);
6484
6485 if (saved_skip && rli->slave_skip_counter == 0)
6486 {
6487 sql_print_information("'SQL_SLAVE_SKIP_COUNTER=%ld' executed at "
6488 "relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', "
6489 "master_log_pos='%ld' and new position at "
6490 "relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', "
6491 "master_log_pos='%ld' ",
6492 (ulong) saved_skip, saved_log_name, (ulong) saved_log_pos,
6493 saved_master_log_name, (ulong) saved_master_log_pos,
6494 rli->get_group_relay_log_name(), (ulong) rli->get_group_relay_log_pos(),
6495 rli->get_group_master_log_name(), (ulong) rli->get_group_master_log_pos());
6496 saved_skip= 0;
6497 }
6498
6499 if (exec_relay_log_event(thd,rli))
6500 {
6501 DBUG_PRINT("info", ("exec_relay_log_event() failed"));
6502 // do not scare the user if SQL thread was simply killed or stopped
6503 if (!sql_slave_killed(thd,rli))
6504 {
6505 /*
6506 retrieve as much info as possible from the thd and, error
6507 codes and warnings and print this to the error log as to
6508 allow the user to locate the error
6509 */
6510 uint32 const last_errno= rli->last_error().number;
6511
6512 if (thd->is_error())
6513 {
6514 char const *const errmsg= thd->get_stmt_da()->message();
6515
6516 DBUG_PRINT("info",
6517 ("thd->get_stmt_da()->sql_errno()=%d; "
6518 "rli->last_error.number=%d",
6519 thd->get_stmt_da()->sql_errno(), last_errno));
6520 if (last_errno == 0)
6521 {
6522 /*
6523 This function is reporting an error which was not reported
6524 while executing exec_relay_log_event().
6525 */
6526 rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(),
6527 "%s", errmsg);
6528 }
6529 else if (last_errno != thd->get_stmt_da()->sql_errno())
6530 {
6531 /*
6532 * An error was reported while executing exec_relay_log_event()
6533 * however the error code differs from what is in the thread.
6534 * This function prints out more information to help finding
6535 * what caused the problem.
6536 */
6537 sql_print_error("Slave (additional info): %s Error_code: %d",
6538 errmsg, thd->get_stmt_da()->sql_errno());
6539 }
6540 }
6541
6542 /* Print any warnings issued */
6543 Diagnostics_area::Sql_condition_iterator it=
6544 thd->get_stmt_da()->sql_conditions();
6545 const Sql_condition *err;
6546 /*
6547 Added controlled slave thread cancel for replication
6548 of user-defined variables.
6549 */
6550 bool udf_error = false;
6551 while ((err= it++))
6552 {
6553 if (err->get_sql_errno() == ER_CANT_OPEN_LIBRARY)
6554 udf_error = true;
6555 sql_print_warning("Slave: %s Error_code: %d", err->get_message_text(), err->get_sql_errno());
6556 }
6557 if (udf_error)
6558 error_string= "Error loading user-defined library, slave SQL "
6559 "thread aborted. Install the missing library, and restart the"
6560 " slave SQL thread with \"SLAVE START\".";
6561 else
6562 error_string= "Error running query, slave SQL thread aborted."
6563 " Fix the problem, and restart the slave SQL thread with "
6564 "\"SLAVE START\".";
6565 }
6566 goto err;
6567 }
6568 }
6569
6570 err:
6571
6572 slave_stop_workers(rli, &mts_inited); // stopping worker pool
6573 /* Thread stopped. Print the current replication position to the log */
6574 if (error_string)
6575 sql_print_error("%s We stopped at log '%s' position %s.", error_string,
6576 rli->get_rpl_log_name(),
6577 llstr(rli->get_group_master_log_pos(), llbuff));
6578 else
6579 sql_print_information("Slave SQL thread exiting, replication stopped in log"
6580 " '%s' at position %s",
6581 rli->get_rpl_log_name(),
6582 llstr(rli->get_group_master_log_pos(), llbuff));
6583 rli->clear_mts_recovery_groups();
6584
6585 /*
6586 Some events set some playgrounds, which won't be cleared because thread
6587 stops. Stopping of this thread may not be known to these events ("stop"
6588 request is detected only by the present function, not by events), so we
6589 must "proactively" clear playgrounds:
6590 */
6591 thd->clear_error();
6592 rli->cleanup_context(thd, 1);
6593 /*
6594 Some extra safety, which should not been needed (normally, event deletion
6595 should already have done these assignments (each event which sets these
6596 variables is supposed to set them to 0 before terminating)).
6597 */
6598 thd->catalog= 0;
6599 thd->reset_query();
6600 thd->reset_db(NULL, 0);
6601
6602 THD_STAGE_INFO(thd, stage_waiting_for_slave_mutex_on_exit);
6603 mysql_mutex_lock(&rli->run_lock);
6604 /* We need data_lock, at least to wake up any waiting master_pos_wait() */
6605 mysql_mutex_lock(&rli->data_lock);
6606 DBUG_ASSERT(rli->slave_running == 1); // tracking buffer overrun
6607 /* When master_pos_wait() wakes up it will check this and terminate */
6608 rli->slave_running= 0;
6609 /* Forget the relay log's format */
6610 rli->set_rli_description_event(NULL);
6611 /* Wake up master_pos_wait() */
6612 mysql_mutex_unlock(&rli->data_lock);
6613 DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions"));
6614 mysql_cond_broadcast(&rli->data_cond);
6615 rli->ignore_log_space_limit= 0; /* don't need any lock */
6616 /* we die so won't remember charset - re-update them on next thread start */
6617 rli->cached_charset_invalidate();
6618 rli->save_temporary_tables = thd->temporary_tables;
6619
6620 /*
6621 TODO: see if we can do this conditionally in next_event() instead
6622 to avoid unneeded position re-init
6623 */
6624 thd->temporary_tables = 0; // remove tempation from destructor to close them
6625 DBUG_ASSERT(thd->net.buff != 0);
6626 net_end(&thd->net); // destructor will not free it, because we are weird
6627 DBUG_ASSERT(rli->info_thd == thd);
6628 THD_CHECK_SENTRY(thd);
6629 rli->info_thd= 0;
6630 set_thd_in_use_temporary_tables(rli); // (re)set info_thd in use for saved temp tables
6631
6632 thd->release_resources();
6633 THD_CHECK_SENTRY(thd);
6634 if (thd_added)
6635 remove_global_thread(thd);
6636 delete thd;
6637 /*
6638 Note: the order of the broadcast and unlock calls below (first broadcast, then unlock)
6639 is important. Otherwise a killer_thread can execute between the calls and
6640 delete the mi structure leading to a crash! (see BUG#25306 for details)
6641 */
6642 mysql_cond_broadcast(&rli->stop_cond);
6643 DBUG_EXECUTE_IF("simulate_slave_delay_at_terminate_bug38694", sleep(5););
6644 mysql_mutex_unlock(&rli->run_lock); // tell the world we are done
6645
6646 DBUG_LEAVE; // Must match DBUG_ENTER()
6647 my_thread_end();
6648 #if OPENSSL_VERSION_NUMBER < 0x10100000L
6649 ERR_remove_thread_state(0);
6650 #endif /* OPENSSL_VERSION_NUMBER < 0x10100000L */
6651 pthread_exit(0);
6652 return 0; // Avoid compiler warnings
6653 }
6654
6655
6656 /*
6657 process_io_create_file()
6658 */
6659
process_io_create_file(Master_info * mi,Create_file_log_event * cev)6660 static int process_io_create_file(Master_info* mi, Create_file_log_event* cev)
6661 {
6662 int error = 1;
6663 ulong num_bytes;
6664 bool cev_not_written;
6665 THD *thd = mi->info_thd;
6666 NET *net = &mi->mysql->net;
6667 DBUG_ENTER("process_io_create_file");
6668
6669 mysql_mutex_assert_owner(&mi->data_lock);
6670
6671 if (unlikely(!cev->is_valid()))
6672 DBUG_RETURN(1);
6673
6674 if (!rpl_filter->db_ok(cev->db))
6675 {
6676 skip_load_data_infile(net);
6677 DBUG_RETURN(0);
6678 }
6679 DBUG_ASSERT(cev->inited_from_old);
6680 thd->file_id = cev->file_id = mi->file_id++;
6681 thd->server_id = cev->server_id;
6682 cev_not_written = 1;
6683
6684 if (unlikely(net_request_file(net,cev->fname)))
6685 {
6686 sql_print_error("Slave I/O: failed requesting download of '%s'",
6687 cev->fname);
6688 goto err;
6689 }
6690
6691 /*
6692 This dummy block is so we could instantiate Append_block_log_event
6693 once and then modify it slightly instead of doing it multiple times
6694 in the loop
6695 */
6696 {
6697 Append_block_log_event aev(thd,0,0,0,0);
6698
6699 for (;;)
6700 {
6701 if (unlikely((num_bytes=my_net_read(net)) == packet_error))
6702 {
6703 sql_print_error("Network read error downloading '%s' from master",
6704 cev->fname);
6705 goto err;
6706 }
6707 if (unlikely(!num_bytes)) /* eof */
6708 {
6709 /* 3.23 master wants it */
6710 net_write_command(net, 0, (uchar*) "", 0, (uchar*) "", 0);
6711 /*
6712 If we wrote Create_file_log_event, then we need to write
6713 Execute_load_log_event. If we did not write Create_file_log_event,
6714 then this is an empty file and we can just do as if the LOAD DATA
6715 INFILE had not existed, i.e. write nothing.
6716 */
6717 if (unlikely(cev_not_written))
6718 break;
6719 Execute_load_log_event xev(thd,0,0);
6720 xev.log_pos = cev->log_pos;
6721 if (unlikely(mi->rli->relay_log.append_event(&xev, mi) != 0))
6722 {
6723 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
6724 ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
6725 "error writing Exec_load event to relay log");
6726 goto err;
6727 }
6728 mi->rli->relay_log.harvest_bytes_written(mi->rli, true/*need_log_space_lock=true*/);
6729 break;
6730 }
6731 if (unlikely(cev_not_written))
6732 {
6733 cev->block = net->read_pos;
6734 cev->block_len = num_bytes;
6735 if (unlikely(mi->rli->relay_log.append_event(cev, mi) != 0))
6736 {
6737 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
6738 ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
6739 "error writing Create_file event to relay log");
6740 goto err;
6741 }
6742 cev_not_written=0;
6743 mi->rli->relay_log.harvest_bytes_written(mi->rli, true/*need_log_space_lock=true*/);
6744 }
6745 else
6746 {
6747 aev.block = net->read_pos;
6748 aev.block_len = num_bytes;
6749 aev.log_pos = cev->log_pos;
6750 if (unlikely(mi->rli->relay_log.append_event(&aev, mi) != 0))
6751 {
6752 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
6753 ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
6754 "error writing Append_block event to relay log");
6755 goto err;
6756 }
6757 mi->rli->relay_log.harvest_bytes_written(mi->rli, true/*need_log_space_lock=true*/);
6758 }
6759 }
6760 }
6761 error=0;
6762 err:
6763 DBUG_RETURN(error);
6764 }
6765
6766
6767 /**
6768 Used by the slave IO thread when it receives a rotate event from the
6769 master.
6770
6771 Updates the master info with the place in the next binary log where
6772 we should start reading. Rotate the relay log to avoid mixed-format
6773 relay logs.
6774
6775 @param mi master_info for the slave
6776 @param rev The rotate log event read from the master
6777
6778 @note The caller must hold mi->data_lock before invoking this function.
6779
6780 @retval 0 ok
6781 @retval 1 error
6782 */
process_io_rotate(Master_info * mi,Rotate_log_event * rev)6783 static int process_io_rotate(Master_info *mi, Rotate_log_event *rev)
6784 {
6785 DBUG_ENTER("process_io_rotate");
6786 mysql_mutex_assert_owner(&mi->data_lock);
6787
6788 if (unlikely(!rev->is_valid()))
6789 DBUG_RETURN(1);
6790
6791 /* Safe copy as 'rev' has been "sanitized" in Rotate_log_event's ctor */
6792 memcpy(const_cast<char *>(mi->get_master_log_name()),
6793 rev->new_log_ident, rev->ident_len + 1);
6794 mi->set_master_log_pos(rev->pos);
6795 DBUG_PRINT("info", ("new (master_log_name, master_log_pos): ('%s', %lu)",
6796 mi->get_master_log_name(), (ulong) mi->get_master_log_pos()));
6797 #ifndef DBUG_OFF
6798 /*
6799 If we do not do this, we will be getting the first
6800 rotate event forever, so we need to not disconnect after one.
6801 */
6802 if (disconnect_slave_event_count)
6803 mi->events_until_exit++;
6804 #endif
6805
6806 /*
6807 If mi_description_event is format <4, there is conversion in the
6808 relay log to the slave's format (4). And Rotate can mean upgrade or
6809 nothing. If upgrade, it's to 5.0 or newer, so we will get a Format_desc, so
6810 no need to reset mi_description_event now. And if it's nothing (same
6811 master version as before), no need (still using the slave's format).
6812 */
6813 Format_description_log_event *old_fdle= mi->get_mi_description_event();
6814 if (old_fdle->binlog_version >= 4)
6815 {
6816 DBUG_ASSERT(old_fdle->checksum_alg ==
6817 mi->rli->relay_log.relay_log_checksum_alg);
6818 Format_description_log_event *new_fdle= new
6819 Format_description_log_event(3);
6820 new_fdle->checksum_alg= mi->rli->relay_log.relay_log_checksum_alg;
6821 mi->set_mi_description_event(new_fdle);
6822 }
6823 /*
6824 Rotate the relay log makes binlog format detection easier (at next slave
6825 start or mysqlbinlog)
6826 */
6827 int ret= rotate_relay_log(mi, true/*need_log_space_lock=true*/);
6828 DBUG_RETURN(ret);
6829 }
6830
6831 /**
6832 Reads a 3.23 event and converts it to the slave's format. This code was
6833 copied from MySQL 4.0.
6834
6835 @note The caller must hold mi->data_lock before invoking this function.
6836 */
queue_binlog_ver_1_event(Master_info * mi,const char * buf,ulong event_len)6837 static int queue_binlog_ver_1_event(Master_info *mi, const char *buf,
6838 ulong event_len)
6839 {
6840 const char *errmsg = 0;
6841 ulong inc_pos;
6842 bool ignore_event= 0;
6843 char *tmp_buf = 0;
6844 Relay_log_info *rli= mi->rli;
6845 DBUG_ENTER("queue_binlog_ver_1_event");
6846
6847 mysql_mutex_assert_owner(&mi->data_lock);
6848
6849 /*
6850 If we get Load event, we need to pass a non-reusable buffer
6851 to read_log_event, so we do a trick
6852 */
6853 if (buf[EVENT_TYPE_OFFSET] == LOAD_EVENT)
6854 {
6855 if (unlikely(!(tmp_buf=(char*)my_malloc(event_len+1,MYF(MY_WME)))))
6856 {
6857 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
6858 ER(ER_SLAVE_FATAL_ERROR), "Memory allocation failed");
6859 DBUG_RETURN(1);
6860 }
6861 memcpy(tmp_buf,buf,event_len);
6862 /*
6863 Create_file constructor wants a 0 as last char of buffer, this 0 will
6864 serve as the string-termination char for the file's name (which is at the
6865 end of the buffer)
6866 We must increment event_len, otherwise the event constructor will not see
6867 this end 0, which leads to segfault.
6868 */
6869 tmp_buf[event_len++]=0;
6870 int4store(tmp_buf+EVENT_LEN_OFFSET, event_len);
6871 buf = (const char*)tmp_buf;
6872 }
6873 /*
6874 This will transform LOAD_EVENT into CREATE_FILE_EVENT, ask the master to
6875 send the loaded file, and write it to the relay log in the form of
6876 Append_block/Exec_load (the SQL thread needs the data, as that thread is not
6877 connected to the master).
6878 */
6879 Log_event *ev=
6880 Log_event::read_log_event(buf, event_len, &errmsg,
6881 mi->get_mi_description_event(), 0);
6882 if (unlikely(!ev))
6883 {
6884 sql_print_error("Read invalid event from master: '%s',\
6885 master could be corrupt but a more likely cause of this is a bug",
6886 errmsg);
6887 my_free((char*) tmp_buf);
6888 DBUG_RETURN(1);
6889 }
6890
6891 mi->set_master_log_pos(ev->log_pos); /* 3.23 events don't contain log_pos */
6892 switch (ev->get_type_code()) {
6893 case STOP_EVENT:
6894 ignore_event= 1;
6895 inc_pos= event_len;
6896 break;
6897 case ROTATE_EVENT:
6898 if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
6899 {
6900 delete ev;
6901 DBUG_RETURN(1);
6902 }
6903 inc_pos= 0;
6904 break;
6905 case CREATE_FILE_EVENT:
6906 /*
6907 Yes it's possible to have CREATE_FILE_EVENT here, even if we're in
6908 queue_old_event() which is for 3.23 events which don't comprise
6909 CREATE_FILE_EVENT. This is because read_log_event() above has just
6910 transformed LOAD_EVENT into CREATE_FILE_EVENT.
6911 */
6912 {
6913 /* We come here when and only when tmp_buf != 0 */
6914 DBUG_ASSERT(tmp_buf != 0);
6915 inc_pos=event_len;
6916 ev->log_pos+= inc_pos;
6917 int error = process_io_create_file(mi,(Create_file_log_event*)ev);
6918 delete ev;
6919 mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
6920 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
6921 my_free((char*)tmp_buf);
6922 DBUG_RETURN(error);
6923 }
6924 default:
6925 inc_pos= event_len;
6926 break;
6927 }
6928 if (likely(!ignore_event))
6929 {
6930 if (ev->log_pos)
6931 /*
6932 Don't do it for fake Rotate events (see comment in
6933 Log_event::Log_event(const char* buf...) in log_event.cc).
6934 */
6935 ev->log_pos+= event_len; /* make log_pos be the pos of the end of the event */
6936 if (unlikely(rli->relay_log.append_event(ev, mi) != 0))
6937 {
6938 delete ev;
6939 DBUG_RETURN(1);
6940 }
6941 rli->relay_log.harvest_bytes_written(rli, true/*need_log_space_lock=true*/);
6942 }
6943 delete ev;
6944 mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
6945 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
6946 DBUG_RETURN(0);
6947 }
6948
6949 /**
6950 Reads a 4.0 event and converts it to the slave's format. This code was copied
6951 from queue_binlog_ver_1_event(), with some affordable simplifications.
6952
6953 @note The caller must hold mi->data_lock before invoking this function.
6954 */
queue_binlog_ver_3_event(Master_info * mi,const char * buf,ulong event_len)6955 static int queue_binlog_ver_3_event(Master_info *mi, const char *buf,
6956 ulong event_len)
6957 {
6958 const char *errmsg = 0;
6959 ulong inc_pos;
6960 char *tmp_buf = 0;
6961 Relay_log_info *rli= mi->rli;
6962 DBUG_ENTER("queue_binlog_ver_3_event");
6963
6964 mysql_mutex_assert_owner(&mi->data_lock);
6965
6966 /* read_log_event() will adjust log_pos to be end_log_pos */
6967 Log_event *ev=
6968 Log_event::read_log_event(buf, event_len, &errmsg,
6969 mi->get_mi_description_event(), 0);
6970 if (unlikely(!ev))
6971 {
6972 sql_print_error("Read invalid event from master: '%s',\
6973 master could be corrupt but a more likely cause of this is a bug",
6974 errmsg);
6975 my_free((char*) tmp_buf);
6976 DBUG_RETURN(1);
6977 }
6978 switch (ev->get_type_code()) {
6979 case STOP_EVENT:
6980 goto err;
6981 case ROTATE_EVENT:
6982 if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
6983 {
6984 delete ev;
6985 DBUG_RETURN(1);
6986 }
6987 inc_pos= 0;
6988 break;
6989 default:
6990 inc_pos= event_len;
6991 break;
6992 }
6993
6994 if (unlikely(rli->relay_log.append_event(ev, mi) != 0))
6995 {
6996 delete ev;
6997 DBUG_RETURN(1);
6998 }
6999 rli->relay_log.harvest_bytes_written(rli, true/*need_log_space_lock=true*/);
7000 delete ev;
7001 mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
7002 err:
7003 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
7004 DBUG_RETURN(0);
7005 }
7006
7007 /*
7008 queue_old_event()
7009
7010 Writes a 3.23 or 4.0 event to the relay log, after converting it to the 5.0
7011 (exactly, slave's) format. To do the conversion, we create a 5.0 event from
7012 the 3.23/4.0 bytes, then write this event to the relay log.
7013
7014 TODO:
7015 Test this code before release - it has to be tested on a separate
7016 setup with 3.23 master or 4.0 master
7017 */
7018
queue_old_event(Master_info * mi,const char * buf,ulong event_len)7019 static int queue_old_event(Master_info *mi, const char *buf,
7020 ulong event_len)
7021 {
7022 DBUG_ENTER("queue_old_event");
7023
7024 mysql_mutex_assert_owner(&mi->data_lock);
7025
7026 switch (mi->get_mi_description_event()->binlog_version)
7027 {
7028 case 1:
7029 DBUG_RETURN(queue_binlog_ver_1_event(mi,buf,event_len));
7030 case 3:
7031 DBUG_RETURN(queue_binlog_ver_3_event(mi,buf,event_len));
7032 default: /* unsupported format; eg version 2 */
7033 DBUG_PRINT("info",("unsupported binlog format %d in queue_old_event()",
7034 mi->get_mi_description_event()->binlog_version));
7035 DBUG_RETURN(1);
7036 }
7037 }
7038
7039 /*
7040 queue_event()
7041
7042 If the event is 3.23/4.0, passes it to queue_old_event() which will convert
7043 it. Otherwise, writes a 5.0 (or newer) event to the relay log. Then there is
7044 no format conversion, it's pure read/write of bytes.
7045 So a 5.0.0 slave's relay log can contain events in the slave's format or in
7046 any >=5.0.0 format.
7047 */
7048
queue_event(Master_info * mi,const char * buf,ulong event_len)7049 static int queue_event(Master_info* mi,const char* buf, ulong event_len)
7050 {
7051 int error= 0;
7052 String error_msg;
7053 ulong inc_pos= 0;
7054 Relay_log_info *rli= mi->rli;
7055 mysql_mutex_t *log_lock= rli->relay_log.get_log_lock();
7056 ulong s_id;
7057 bool unlock_data_lock= TRUE;
7058 /*
7059 FD_q must have been prepared for the first R_a event
7060 inside get_master_version_and_clock()
7061 Show-up of FD:s affects checksum_alg at once because
7062 that changes FD_queue.
7063 */
7064 uint8 checksum_alg= mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF ?
7065 mi->checksum_alg_before_fd :
7066 mi->rli->relay_log.relay_log_checksum_alg;
7067
7068 char *save_buf= NULL; // needed for checksumming the fake Rotate event
7069 char rot_buf[LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN + FN_REFLEN];
7070 Gtid gtid= { 0, 0 };
7071 Gtid old_retrieved_gtid= { 0, 0 };
7072 Log_event_type event_type= (Log_event_type)buf[EVENT_TYPE_OFFSET];
7073
7074 DBUG_ASSERT(checksum_alg == BINLOG_CHECKSUM_ALG_OFF ||
7075 checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF ||
7076 checksum_alg == BINLOG_CHECKSUM_ALG_CRC32);
7077
7078 DBUG_ENTER("queue_event");
7079 /*
7080 FD_queue checksum alg description does not apply in a case of
7081 FD itself. The one carries both parts of the checksum data.
7082 */
7083 if (event_type == FORMAT_DESCRIPTION_EVENT)
7084 {
7085 checksum_alg= get_checksum_alg(buf, event_len);
7086 }
7087 else if (event_type == START_EVENT_V3)
7088 {
7089 // checksum behaviour is similar to the pre-checksum FD handling
7090 mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF;
7091 mysql_mutex_lock(&mi->data_lock);
7092 mi->get_mi_description_event()->checksum_alg=
7093 mi->rli->relay_log.relay_log_checksum_alg= checksum_alg=
7094 BINLOG_CHECKSUM_ALG_OFF;
7095 mysql_mutex_unlock(&mi->data_lock);
7096 }
7097
7098 // does not hold always because of old binlog can work with NM
7099 // DBUG_ASSERT(checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
7100
7101 // should hold unless manipulations with RL. Tests that do that
7102 // will have to refine the clause.
7103 DBUG_ASSERT(mi->rli->relay_log.relay_log_checksum_alg !=
7104 BINLOG_CHECKSUM_ALG_UNDEF);
7105
7106 // Emulate the network corruption
7107 DBUG_EXECUTE_IF("corrupt_queue_event",
7108 if (event_type != FORMAT_DESCRIPTION_EVENT)
7109 {
7110 char *debug_event_buf_c = (char*) buf;
7111 int debug_cor_pos = rand() % (event_len - BINLOG_CHECKSUM_LEN);
7112 debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
7113 DBUG_PRINT("info", ("Corrupt the event at queue_event: byte on position %d", debug_cor_pos));
7114 DBUG_SET("");
7115 }
7116 );
7117
7118 if (event_checksum_test((uchar *) buf, event_len, checksum_alg))
7119 {
7120 error= ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE;
7121 unlock_data_lock= FALSE;
7122 goto err;
7123 }
7124
7125 mysql_mutex_lock(&mi->data_lock);
7126
7127 if (mi->get_mi_description_event()->binlog_version < 4 &&
7128 event_type != FORMAT_DESCRIPTION_EVENT /* a way to escape */)
7129 {
7130 int ret= queue_old_event(mi,buf,event_len);
7131 mysql_mutex_unlock(&mi->data_lock);
7132 DBUG_RETURN(ret);
7133 }
7134
7135 switch (event_type) {
7136 case STOP_EVENT:
7137 /*
7138 We needn't write this event to the relay log. Indeed, it just indicates a
7139 master server shutdown. The only thing this does is cleaning. But
7140 cleaning is already done on a per-master-thread basis (as the master
7141 server is shutting down cleanly, it has written all DROP TEMPORARY TABLE
7142 prepared statements' deletion are TODO only when we binlog prep stmts).
7143
7144 We don't even increment mi->get_master_log_pos(), because we may be just after
7145 a Rotate event. Btw, in a few milliseconds we are going to have a Start
7146 event from the next binlog (unless the master is presently running
7147 without --log-bin).
7148 */
7149 goto err;
7150 case ROTATE_EVENT:
7151 {
7152 Rotate_log_event rev(buf, checksum_alg != BINLOG_CHECKSUM_ALG_OFF ?
7153 event_len - BINLOG_CHECKSUM_LEN : event_len,
7154 mi->get_mi_description_event());
7155
7156 if (unlikely(process_io_rotate(mi, &rev)))
7157 {
7158 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
7159 goto err;
7160 }
7161 /*
7162 Checksum special cases for the fake Rotate (R_f) event caused by the protocol
7163 of events generation and serialization in RL where Rotate of master is
7164 queued right next to FD of slave.
7165 Since it's only FD that carries the alg desc of FD_s has to apply to R_m.
7166 Two special rules apply only to the first R_f which comes in before any FD_m.
7167 The 2nd R_f should be compatible with the FD_s that must have taken over
7168 the last seen FD_m's (A).
7169
7170 RSC_1: If OM \and fake Rotate \and slave is configured to
7171 to compute checksum for its first FD event for RL
7172 the fake Rotate gets checksummed here.
7173 */
7174 if (uint4korr(&buf[0]) == 0 && checksum_alg == BINLOG_CHECKSUM_ALG_OFF &&
7175 mi->rli->relay_log.relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_OFF)
7176 {
7177 ha_checksum rot_crc= my_checksum(0L, NULL, 0);
7178 event_len += BINLOG_CHECKSUM_LEN;
7179 memcpy(rot_buf, buf, event_len - BINLOG_CHECKSUM_LEN);
7180 int4store(&rot_buf[EVENT_LEN_OFFSET],
7181 uint4korr(rot_buf + EVENT_LEN_OFFSET) + BINLOG_CHECKSUM_LEN);
7182 rot_crc= my_checksum(rot_crc, (const uchar *) rot_buf,
7183 event_len - BINLOG_CHECKSUM_LEN);
7184 int4store(&rot_buf[event_len - BINLOG_CHECKSUM_LEN], rot_crc);
7185 DBUG_ASSERT(event_len == uint4korr(&rot_buf[EVENT_LEN_OFFSET]));
7186 DBUG_ASSERT(mi->get_mi_description_event()->checksum_alg ==
7187 mi->rli->relay_log.relay_log_checksum_alg);
7188 /* the first one */
7189 DBUG_ASSERT(mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF);
7190 save_buf= (char *) buf;
7191 buf= rot_buf;
7192 }
7193 else
7194 /*
7195 RSC_2: If NM \and fake Rotate \and slave does not compute checksum
7196 the fake Rotate's checksum is stripped off before relay-logging.
7197 */
7198 if (uint4korr(&buf[0]) == 0 && checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
7199 mi->rli->relay_log.relay_log_checksum_alg == BINLOG_CHECKSUM_ALG_OFF)
7200 {
7201 event_len -= BINLOG_CHECKSUM_LEN;
7202 memcpy(rot_buf, buf, event_len);
7203 int4store(&rot_buf[EVENT_LEN_OFFSET],
7204 uint4korr(rot_buf + EVENT_LEN_OFFSET) - BINLOG_CHECKSUM_LEN);
7205 DBUG_ASSERT(event_len == uint4korr(&rot_buf[EVENT_LEN_OFFSET]));
7206 DBUG_ASSERT(mi->get_mi_description_event()->checksum_alg ==
7207 mi->rli->relay_log.relay_log_checksum_alg);
7208 /* the first one */
7209 DBUG_ASSERT(mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF);
7210 save_buf= (char *) buf;
7211 buf= rot_buf;
7212 }
7213 /*
7214 Now the I/O thread has just changed its mi->get_master_log_name(), so
7215 incrementing mi->get_master_log_pos() is nonsense.
7216 */
7217 inc_pos= 0;
7218 break;
7219 }
7220 case FORMAT_DESCRIPTION_EVENT:
7221 {
7222 /*
7223 Create an event, and save it (when we rotate the relay log, we will have
7224 to write this event again).
7225 */
7226 /*
7227 We are the only thread which reads/writes mi_description_event.
7228 The relay_log struct does not move (though some members of it can
7229 change), so we needn't any lock (no rli->data_lock, no log lock).
7230 */
7231 const char* errmsg;
7232 // mark it as undefined that is irrelevant anymore
7233 mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF;
7234 Format_description_log_event *new_fdle=
7235 (Format_description_log_event*)
7236 Log_event::read_log_event(buf, event_len, &errmsg,
7237 mi->get_mi_description_event(), 1);
7238 if (new_fdle == NULL)
7239 {
7240 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
7241 goto err;
7242 }
7243 if (new_fdle->checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
7244 new_fdle->checksum_alg= BINLOG_CHECKSUM_ALG_OFF;
7245 mi->set_mi_description_event(new_fdle);
7246
7247 /* installing new value of checksum Alg for relay log */
7248 mi->rli->relay_log.relay_log_checksum_alg= new_fdle->checksum_alg;
7249
7250 /*
7251 Though this does some conversion to the slave's format, this will
7252 preserve the master's binlog format version, and number of event types.
7253 */
7254 /*
7255 If the event was not requested by the slave (the slave did not ask for
7256 it), i.e. has end_log_pos=0, we do not increment mi->get_master_log_pos()
7257 */
7258 inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
7259 DBUG_PRINT("info",("binlog format is now %d",
7260 mi->get_mi_description_event()->binlog_version));
7261
7262 }
7263 break;
7264
7265 case HEARTBEAT_LOG_EVENT:
7266 {
7267 /*
7268 HB (heartbeat) cannot come before RL (Relay)
7269 */
7270 char llbuf[22];
7271 Heartbeat_log_event hb(buf,
7272 mi->rli->relay_log.relay_log_checksum_alg
7273 != BINLOG_CHECKSUM_ALG_OFF ?
7274 event_len - BINLOG_CHECKSUM_LEN : event_len,
7275 mi->get_mi_description_event());
7276 if (!hb.is_valid())
7277 {
7278 error= ER_SLAVE_HEARTBEAT_FAILURE;
7279 error_msg.append(STRING_WITH_LEN("inconsistent heartbeat event content;"));
7280 error_msg.append(STRING_WITH_LEN("the event's data: log_file_name "));
7281 error_msg.append(hb.get_log_ident(), (uint) strlen(hb.get_log_ident()));
7282 error_msg.append(STRING_WITH_LEN(" log_pos "));
7283 llstr(hb.log_pos, llbuf);
7284 error_msg.append(llbuf, strlen(llbuf));
7285 goto err;
7286 }
7287 mi->received_heartbeats++;
7288 mi->last_heartbeat= my_time(0);
7289
7290
7291 /*
7292 During GTID protocol, if the master skips transactions,
7293 a heartbeat event is sent to the slave at the end of last
7294 skipped transaction to update coordinates.
7295
7296 I/O thread receives the heartbeat event and updates mi
7297 only if the received heartbeat position is greater than
7298 mi->get_master_log_pos(). This event is written to the
7299 relay log as an ignored Rotate event. SQL thread reads
7300 the rotate event only to update the coordinates corresponding
7301 to the last skipped transaction. Note that,
7302 we update only the positions and not the file names, as a ROTATE
7303 EVENT from the master prior to this will update the file name.
7304 */
7305 if (mi->is_auto_position() && mi->get_master_log_pos() < hb.log_pos
7306 && mi->get_master_log_name() != NULL)
7307 {
7308
7309 DBUG_ASSERT(memcmp(const_cast<char*>(mi->get_master_log_name()),
7310 hb.get_log_ident(), hb.get_ident_len()) == 0);
7311
7312 mi->set_master_log_pos(hb.log_pos);
7313
7314 /*
7315 Put this heartbeat event in the relay log as a Rotate Event.
7316 */
7317 inc_pos= 0;
7318 memcpy(rli->ign_master_log_name_end, mi->get_master_log_name(),
7319 FN_REFLEN);
7320 rli->ign_master_log_pos_end = mi->get_master_log_pos();
7321
7322 if (write_ignored_events_info_to_relay_log(mi->info_thd, mi))
7323 goto err;
7324 }
7325
7326 /*
7327 compare local and event's versions of log_file, log_pos.
7328
7329 Heartbeat is sent only after an event corresponding to the corrdinates
7330 the heartbeat carries.
7331 Slave can not have a difference in coordinates except in the only
7332 special case when mi->get_master_log_name(), mi->get_master_log_pos() have never
7333 been updated by Rotate event i.e when slave does not have any history
7334 with the master (and thereafter mi->get_master_log_pos() is NULL).
7335
7336 TODO: handling `when' for SHOW SLAVE STATUS' snds behind
7337 */
7338 if ((memcmp(const_cast<char *>(mi->get_master_log_name()),
7339 hb.get_log_ident(), hb.get_ident_len())
7340 && mi->get_master_log_name() != NULL)
7341 || ((mi->get_master_log_pos() != hb.log_pos && gtid_mode == 0) ||
7342 /*
7343 When Gtid mode is on only monotocity can be claimed.
7344 Todo: enhance HB event with the skipped events size
7345 and to convert HB.pos == MI.pos to HB.pos - HB.skip_size == MI.pos
7346 */
7347 (mi->get_master_log_pos() > hb.log_pos)))
7348 {
7349 /* missed events of heartbeat from the past */
7350 error= ER_SLAVE_HEARTBEAT_FAILURE;
7351 error_msg.append(STRING_WITH_LEN("heartbeat is not compatible with local info;"));
7352 error_msg.append(STRING_WITH_LEN("the event's data: log_file_name "));
7353 error_msg.append(hb.get_log_ident(), (uint) strlen(hb.get_log_ident()));
7354 error_msg.append(STRING_WITH_LEN(" log_pos "));
7355 llstr(hb.log_pos, llbuf);
7356 error_msg.append(llbuf, strlen(llbuf));
7357 goto err;
7358 }
7359 goto skip_relay_logging;
7360 }
7361 break;
7362
7363 case PREVIOUS_GTIDS_LOG_EVENT:
7364 {
7365 /*
7366 This event does not have any meaning for the slave and
7367 was just sent to show the slave the master is making
7368 progress and avoid possible deadlocks.
7369 So at this point, the event is replaced by a rotate
7370 event what will make the slave to update what it knows
7371 about the master's coordinates.
7372 */
7373 inc_pos= 0;
7374 mi->set_master_log_pos(mi->get_master_log_pos() + event_len);
7375 memcpy(rli->ign_master_log_name_end, mi->get_master_log_name(), FN_REFLEN);
7376 rli->ign_master_log_pos_end= mi->get_master_log_pos();
7377
7378 if (write_ignored_events_info_to_relay_log(mi->info_thd, mi))
7379 goto err;
7380
7381 goto skip_relay_logging;
7382 }
7383 break;
7384
7385 case GTID_LOG_EVENT:
7386 {
7387 if (gtid_mode == 0)
7388 {
7389 error= ER_FOUND_GTID_EVENT_WHEN_GTID_MODE_IS_OFF;
7390 goto err;
7391 }
7392 global_sid_lock->rdlock();
7393 Gtid_log_event gtid_ev(buf, checksum_alg != BINLOG_CHECKSUM_ALG_OFF ?
7394 event_len - BINLOG_CHECKSUM_LEN : event_len,
7395 mi->get_mi_description_event());
7396 gtid.sidno= gtid_ev.get_sidno(false);
7397 global_sid_lock->unlock();
7398 if (gtid.sidno < 0)
7399 goto err;
7400 gtid.gno= gtid_ev.get_gno();
7401 inc_pos= event_len;
7402 }
7403 break;
7404
7405 case ANONYMOUS_GTID_LOG_EVENT:
7406
7407 default:
7408 inc_pos= event_len;
7409 break;
7410 }
7411
7412 /*
7413 Simulate an unknown ignorable log event by rewriting the write_rows log
7414 event and previous_gtids log event before writing them in relay log.
7415 */
7416 DBUG_EXECUTE_IF("simulate_unknown_ignorable_log_event",
7417 if (event_type == WRITE_ROWS_EVENT ||
7418 event_type == PREVIOUS_GTIDS_LOG_EVENT)
7419 {
7420 char *event_buf= const_cast<char*>(buf);
7421 /* Overwrite the log event type with an unknown type. */
7422 event_buf[EVENT_TYPE_OFFSET]= ENUM_END_EVENT + 1;
7423 /* Set LOG_EVENT_IGNORABLE_F for the log event. */
7424 int2store(event_buf + FLAGS_OFFSET,
7425 uint2korr(event_buf + FLAGS_OFFSET) | LOG_EVENT_IGNORABLE_F);
7426 }
7427 );
7428
7429 /*
7430 If this event is originating from this server, don't queue it.
7431 We don't check this for 3.23 events because it's simpler like this; 3.23
7432 will be filtered anyway by the SQL slave thread which also tests the
7433 server id (we must also keep this test in the SQL thread, in case somebody
7434 upgrades a 4.0 slave which has a not-filtered relay log).
7435
7436 ANY event coming from ourselves can be ignored: it is obvious for queries;
7437 for STOP_EVENT/ROTATE_EVENT/START_EVENT: these cannot come from ourselves
7438 (--log-slave-updates would not log that) unless this slave is also its
7439 direct master (an unsupported, useless setup!).
7440 */
7441
7442 mysql_mutex_lock(log_lock);
7443 s_id= uint4korr(buf + SERVER_ID_OFFSET);
7444
7445 /*
7446 If server_id_bits option is set we need to mask out irrelevant bits
7447 when checking server_id, but we still put the full unmasked server_id
7448 into the Relay log so that it can be accessed when applying the event
7449 */
7450 s_id&= opt_server_id_mask;
7451
7452 if ((s_id == ::server_id && !mi->rli->replicate_same_server_id) ||
7453 /*
7454 the following conjunction deals with IGNORE_SERVER_IDS, if set
7455 If the master is on the ignore list, execution of
7456 format description log events and rotate events is necessary.
7457 */
7458 (mi->ignore_server_ids->dynamic_ids.elements > 0 &&
7459 mi->shall_ignore_server_id(s_id) &&
7460 /* everything is filtered out from non-master */
7461 (s_id != mi->master_id ||
7462 /* for the master meta information is necessary */
7463 (event_type != FORMAT_DESCRIPTION_EVENT &&
7464 event_type != ROTATE_EVENT))))
7465 {
7466 /*
7467 Do not write it to the relay log.
7468 a) We still want to increment mi->get_master_log_pos(), so that we won't
7469 re-read this event from the master if the slave IO thread is now
7470 stopped/restarted (more efficient if the events we are ignoring are big
7471 LOAD DATA INFILE).
7472 b) We want to record that we are skipping events, for the information of
7473 the slave SQL thread, otherwise that thread may let
7474 rli->group_relay_log_pos stay too small if the last binlog's event is
7475 ignored.
7476 But events which were generated by this slave and which do not exist in
7477 the master's binlog (i.e. Format_desc, Rotate & Stop) should not increment
7478 mi->get_master_log_pos().
7479 If the event is originated remotely and is being filtered out by
7480 IGNORE_SERVER_IDS it increments mi->get_master_log_pos()
7481 as well as rli->group_relay_log_pos.
7482 */
7483 if (!(s_id == ::server_id && !mi->rli->replicate_same_server_id) ||
7484 (event_type != FORMAT_DESCRIPTION_EVENT &&
7485 event_type != ROTATE_EVENT &&
7486 event_type != STOP_EVENT))
7487 {
7488 mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
7489 memcpy(rli->ign_master_log_name_end, mi->get_master_log_name(), FN_REFLEN);
7490 DBUG_ASSERT(rli->ign_master_log_name_end[0]);
7491 rli->ign_master_log_pos_end= mi->get_master_log_pos();
7492 }
7493 rli->relay_log.signal_update(); // the slave SQL thread needs to re-check
7494 DBUG_PRINT("info", ("master_log_pos: %lu, event originating from %u server, ignored",
7495 (ulong) mi->get_master_log_pos(), uint4korr(buf + SERVER_ID_OFFSET)));
7496 }
7497 else
7498 {
7499 DBUG_EXECUTE_IF("flush_after_reading_gtid_event",
7500 if (event_type == GTID_LOG_EVENT && gtid.gno == 4)
7501 DBUG_SET("+d,set_max_size_zero");
7502 );
7503 DBUG_EXECUTE_IF("set_append_buffer_error",
7504 if (event_type == GTID_LOG_EVENT && gtid.gno == 4)
7505 DBUG_SET("+d,simulate_append_buffer_error");
7506 );
7507 /*
7508 Add the GTID to the retrieved set before actually appending it to relay
7509 log. This will ensure that if a rotation happens at this point of time the
7510 new GTID will be reflected as part of Previous_Gtid set and
7511 Retrieved_Gtid_Set will not have any gaps.
7512 */
7513 if (event_type == GTID_LOG_EVENT)
7514 {
7515 global_sid_lock->rdlock();
7516 old_retrieved_gtid= *(mi->rli->get_last_retrieved_gtid());
7517 int ret= rli->add_logged_gtid(gtid.sidno, gtid.gno);
7518 if (!ret)
7519 rli->set_last_retrieved_gtid(gtid);
7520 global_sid_lock->unlock();
7521 if (ret != 0)
7522 {
7523 mysql_mutex_unlock(log_lock);
7524 goto err;
7525 }
7526 }
7527 /* write the event to the relay log */
7528 if (!DBUG_EVALUATE_IF("simulate_append_buffer_error", 1, 0) &&
7529 likely(rli->relay_log.append_buffer(buf, event_len, mi) == 0))
7530 {
7531 mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
7532 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
7533 rli->relay_log.harvest_bytes_written(rli, true/*need_log_space_lock=true*/);
7534 }
7535 else
7536 {
7537 if (event_type == GTID_LOG_EVENT)
7538 {
7539 global_sid_lock->rdlock();
7540 Gtid_set * retrieved_set= (const_cast<Gtid_set *>(mi->rli->get_gtid_set()));
7541 if (retrieved_set->_remove_gtid(gtid) != RETURN_STATUS_OK)
7542 {
7543 global_sid_lock->unlock();
7544 mysql_mutex_unlock(log_lock);
7545 goto err;
7546 }
7547 if (!old_retrieved_gtid.empty())
7548 rli->set_last_retrieved_gtid(old_retrieved_gtid);
7549 global_sid_lock->unlock();
7550 }
7551 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
7552 }
7553 rli->ign_master_log_name_end[0]= 0; // last event is not ignored
7554 if (save_buf != NULL)
7555 buf= save_buf;
7556 }
7557 mysql_mutex_unlock(log_lock);
7558
7559 skip_relay_logging:
7560
7561 err:
7562 if (unlock_data_lock)
7563 mysql_mutex_unlock(&mi->data_lock);
7564 DBUG_PRINT("info", ("error: %d", error));
7565 if (error)
7566 mi->report(ERROR_LEVEL, error, ER(error),
7567 (error == ER_SLAVE_RELAY_LOG_WRITE_FAILURE)?
7568 "could not queue event from master" :
7569 error_msg.ptr());
7570 DBUG_RETURN(error);
7571 }
7572
7573 /**
7574 Hook to detach the active VIO before closing a connection handle.
7575
7576 The client API might close the connection (and associated data)
7577 in case it encounters a unrecoverable (network) error. This hook
7578 is called from the client code before the VIO handle is deleted
7579 allows the thread to detach the active vio so it does not point
7580 to freed memory.
7581
7582 Other calls to THD::clear_active_vio throughout this module are
7583 redundant due to the hook but are left in place for illustrative
7584 purposes.
7585 */
7586
slave_io_thread_detach_vio()7587 extern "C" void slave_io_thread_detach_vio()
7588 {
7589 #ifdef SIGNAL_WITH_VIO_SHUTDOWN
7590 THD *thd= current_thd;
7591 if (thd && thd->slave_thread)
7592 thd->clear_active_vio();
7593 #endif
7594 }
7595
7596
7597 /*
7598 Try to connect until successful or slave killed
7599
7600 SYNPOSIS
7601 safe_connect()
7602 thd Thread handler for slave
7603 mysql MySQL connection handle
7604 mi Replication handle
7605
7606 RETURN
7607 0 ok
7608 # Error
7609 */
7610
safe_connect(THD * thd,MYSQL * mysql,Master_info * mi)7611 static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi)
7612 {
7613 DBUG_ENTER("safe_connect");
7614
7615 DBUG_RETURN(connect_to_master(thd, mysql, mi, 0, 0));
7616 }
7617
7618
7619 /*
7620 SYNPOSIS
7621 connect_to_master()
7622
7623 IMPLEMENTATION
7624 Try to connect until successful or slave killed or we have retried
7625 mi->retry_count times
7626 */
7627
connect_to_master(THD * thd,MYSQL * mysql,Master_info * mi,bool reconnect,bool suppress_warnings)7628 static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
7629 bool reconnect, bool suppress_warnings)
7630 {
7631 int slave_was_killed= 0;
7632 int last_errno= -2; // impossible error
7633 ulong err_count=0;
7634 char llbuff[22];
7635 char password[MAX_PASSWORD_LENGTH + 1];
7636 int password_size= sizeof(password);
7637 DBUG_ENTER("connect_to_master");
7638 set_slave_max_allowed_packet(thd, mysql);
7639 #ifndef DBUG_OFF
7640 mi->events_until_exit = disconnect_slave_event_count;
7641 #endif
7642 ulong client_flag= CLIENT_REMEMBER_OPTIONS;
7643 if (opt_slave_compressed_protocol)
7644 client_flag|= CLIENT_COMPRESS; /* We will use compression */
7645
7646 mysql_options(mysql, MYSQL_OPT_CONNECT_TIMEOUT, (char *) &slave_net_timeout);
7647 mysql_options(mysql, MYSQL_OPT_READ_TIMEOUT, (char *) &slave_net_timeout);
7648
7649 if (mi->bind_addr[0])
7650 {
7651 DBUG_PRINT("info",("bind_addr: %s", mi->bind_addr));
7652 mysql_options(mysql, MYSQL_OPT_BIND, mi->bind_addr);
7653 }
7654
7655 #ifdef HAVE_OPENSSL
7656 if (mi->ssl)
7657 {
7658 mysql_ssl_set(mysql,
7659 mi->ssl_key[0]?mi->ssl_key:0,
7660 mi->ssl_cert[0]?mi->ssl_cert:0,
7661 mi->ssl_ca[0]?mi->ssl_ca:0,
7662 mi->ssl_capath[0]?mi->ssl_capath:0,
7663 mi->ssl_cipher[0]?mi->ssl_cipher:0);
7664 mysql_options(mysql, MYSQL_OPT_SSL_CRL,
7665 mi->ssl_crl[0] ? mi->ssl_crl : 0);
7666 mysql_options(mysql, MYSQL_OPT_SSL_CRLPATH,
7667 mi->ssl_crlpath[0] ? mi->ssl_crlpath : 0);
7668 mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT,
7669 &mi->ssl_verify_server_cert);
7670 }
7671 #endif
7672
7673 /*
7674 If server's default charset is not supported (like utf16, utf32) as client
7675 charset, then set client charset to 'latin1' (default client charset).
7676 */
7677 if (is_supported_parser_charset(default_charset_info))
7678 mysql_options(mysql, MYSQL_SET_CHARSET_NAME, default_charset_info->csname);
7679 else
7680 {
7681 sql_print_information("'%s' can not be used as client character set. "
7682 "'%s' will be used as default client character set "
7683 "while connecting to master.",
7684 default_charset_info->csname,
7685 default_client_charset_info->csname);
7686 mysql_options(mysql, MYSQL_SET_CHARSET_NAME,
7687 default_client_charset_info->csname);
7688 }
7689
7690
7691 /* This one is not strictly needed but we have it here for completeness */
7692 mysql_options(mysql, MYSQL_SET_CHARSET_DIR, (char *) charsets_dir);
7693
7694 if (mi->is_start_plugin_auth_configured())
7695 {
7696 DBUG_PRINT("info", ("Slaving is using MYSQL_DEFAULT_AUTH %s",
7697 mi->get_start_plugin_auth()));
7698 mysql_options(mysql, MYSQL_DEFAULT_AUTH, mi->get_start_plugin_auth());
7699 }
7700
7701 if (mi->is_start_plugin_dir_configured())
7702 {
7703 DBUG_PRINT("info", ("Slaving is using MYSQL_PLUGIN_DIR %s",
7704 mi->get_start_plugin_dir()));
7705 mysql_options(mysql, MYSQL_PLUGIN_DIR, mi->get_start_plugin_dir());
7706 }
7707 /* Set MYSQL_PLUGIN_DIR in case master asks for an external authentication plugin */
7708 else if (opt_plugin_dir_ptr && *opt_plugin_dir_ptr)
7709 mysql_options(mysql, MYSQL_PLUGIN_DIR, opt_plugin_dir_ptr);
7710
7711 if (!mi->is_start_user_configured())
7712 sql_print_warning("%s", ER(ER_INSECURE_CHANGE_MASTER));
7713
7714 if (mi->get_password(password, &password_size))
7715 {
7716 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
7717 ER(ER_SLAVE_FATAL_ERROR),
7718 "Unable to configure password when attempting to "
7719 "connect to the master server. Connection attempt "
7720 "terminated.");
7721 DBUG_RETURN(1);
7722 }
7723
7724 const char* user= mi->get_user();
7725 if (user == NULL || user[0] == 0)
7726 {
7727 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
7728 ER(ER_SLAVE_FATAL_ERROR),
7729 "Invalid (empty) username when attempting to "
7730 "connect to the master server. Connection attempt "
7731 "terminated.");
7732 DBUG_RETURN(1);
7733 }
7734
7735 while (!(slave_was_killed = io_slave_killed(thd,mi))
7736 && (reconnect ? mysql_reconnect(mysql) != 0 :
7737 mysql_real_connect(mysql, mi->host, user,
7738 password, 0, mi->port, 0, client_flag) == 0))
7739 {
7740 /*
7741 SHOW SLAVE STATUS will display the number of retries which
7742 would be real retry counts instead of mi->retry_count for
7743 each connection attempt by 'Last_IO_Error' entry.
7744 */
7745 last_errno=mysql_errno(mysql);
7746 suppress_warnings= 0;
7747 mi->report(ERROR_LEVEL, last_errno,
7748 "error %s to master '%s@%s:%d'"
7749 " - retry-time: %d retries: %lu",
7750 (reconnect ? "reconnecting" : "connecting"),
7751 mi->get_user(), mi->host, mi->port,
7752 mi->connect_retry, err_count + 1);
7753 /*
7754 By default we try forever. The reason is that failure will trigger
7755 master election, so if the user did not set mi->retry_count we
7756 do not want to have election triggered on the first failure to
7757 connect
7758 */
7759 if (++err_count == mi->retry_count)
7760 {
7761 slave_was_killed=1;
7762 break;
7763 }
7764 slave_sleep(thd, mi->connect_retry, io_slave_killed, mi);
7765 }
7766
7767 if (!slave_was_killed)
7768 {
7769 mi->clear_error(); // clear possible left over reconnect error
7770 if (reconnect)
7771 {
7772 if (!suppress_warnings && log_warnings)
7773 sql_print_information("Slave: connected to master '%s@%s:%d',\
7774 replication resumed in log '%s' at position %s", mi->get_user(),
7775 mi->host, mi->port,
7776 mi->get_io_rpl_log_name(),
7777 llstr(mi->get_master_log_pos(),llbuff));
7778 }
7779 else
7780 {
7781 general_log_print(thd, COM_CONNECT_OUT, "%s@%s:%d",
7782 mi->get_user(), mi->host, mi->port);
7783 }
7784 #ifdef SIGNAL_WITH_VIO_SHUTDOWN
7785 thd->set_active_vio(mysql->net.vio);
7786 #endif
7787 }
7788 mysql->reconnect= 1;
7789 DBUG_PRINT("exit",("slave_was_killed: %d", slave_was_killed));
7790 DBUG_RETURN(slave_was_killed);
7791 }
7792
7793
7794 /*
7795 safe_reconnect()
7796
7797 IMPLEMENTATION
7798 Try to connect until successful or slave killed or we have retried
7799 mi->retry_count times
7800 */
7801
safe_reconnect(THD * thd,MYSQL * mysql,Master_info * mi,bool suppress_warnings)7802 static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
7803 bool suppress_warnings)
7804 {
7805 DBUG_ENTER("safe_reconnect");
7806 DBUG_RETURN(connect_to_master(thd, mysql, mi, 1, suppress_warnings));
7807 }
7808
7809
7810 /*
7811 Called when we notice that the current "hot" log got rotated under our feet.
7812 */
7813
reopen_relay_log(Relay_log_info * rli,const char ** errmsg)7814 static IO_CACHE *reopen_relay_log(Relay_log_info *rli, const char **errmsg)
7815 {
7816 DBUG_ENTER("reopen_relay_log");
7817 DBUG_ASSERT(rli->cur_log != &rli->cache_buf);
7818 DBUG_ASSERT(rli->cur_log_fd == -1);
7819
7820 IO_CACHE *cur_log = rli->cur_log=&rli->cache_buf;
7821 if ((rli->cur_log_fd=open_binlog_file(cur_log,rli->get_event_relay_log_name(),
7822 errmsg)) <0)
7823 DBUG_RETURN(0);
7824 /*
7825 We want to start exactly where we was before:
7826 relay_log_pos Current log pos
7827 pending Number of bytes already processed from the event
7828 */
7829 rli->set_event_relay_log_pos(max<ulonglong>(rli->get_event_relay_log_pos(),
7830 BIN_LOG_HEADER_SIZE));
7831 my_b_seek(cur_log,rli->get_event_relay_log_pos());
7832 DBUG_RETURN(cur_log);
7833 }
7834
7835
7836 /**
7837 Reads next event from the relay log. Should be called from the
7838 slave SQL thread.
7839
7840 @param rli Relay_log_info structure for the slave SQL thread.
7841
7842 @return The event read, or NULL on error. If an error occurs, the
7843 error is reported through the sql_print_information() or
7844 sql_print_error() functions.
7845 */
next_event(Relay_log_info * rli)7846 static Log_event* next_event(Relay_log_info* rli)
7847 {
7848 Log_event* ev;
7849 IO_CACHE* cur_log = rli->cur_log;
7850 mysql_mutex_t *log_lock = rli->relay_log.get_log_lock();
7851 const char* errmsg=0;
7852 THD* thd = rli->info_thd;
7853 DBUG_ENTER("next_event");
7854
7855 DBUG_ASSERT(thd != 0);
7856
7857 #ifndef DBUG_OFF
7858 if (abort_slave_event_count && !rli->events_until_exit--)
7859 DBUG_RETURN(0);
7860 #endif
7861
7862 /*
7863 For most operations we need to protect rli members with data_lock,
7864 so we assume calling function acquired this mutex for us and we will
7865 hold it for the most of the loop below However, we will release it
7866 whenever it is worth the hassle, and in the cases when we go into a
7867 mysql_cond_wait() with the non-data_lock mutex
7868 */
7869 mysql_mutex_assert_owner(&rli->data_lock);
7870
7871 while (!sql_slave_killed(thd,rli))
7872 {
7873 /*
7874 We can have two kinds of log reading:
7875 hot_log:
7876 rli->cur_log points at the IO_CACHE of relay_log, which
7877 is actively being updated by the I/O thread. We need to be careful
7878 in this case and make sure that we are not looking at a stale log that
7879 has already been rotated. If it has been, we reopen the log.
7880
7881 The other case is much simpler:
7882 We just have a read only log that nobody else will be updating.
7883 */
7884 bool hot_log;
7885 if ((hot_log = (cur_log != &rli->cache_buf)) ||
7886 DBUG_EVALUATE_IF("force_sql_thread_error", 1, 0))
7887 {
7888 DBUG_ASSERT(rli->cur_log_fd == -1); // foreign descriptor
7889 mysql_mutex_lock(log_lock);
7890
7891 /*
7892 Reading xxx_file_id is safe because the log will only
7893 be rotated when we hold relay_log.LOCK_log
7894 */
7895 if (rli->relay_log.get_open_count() != rli->cur_log_old_open_count &&
7896 DBUG_EVALUATE_IF("force_sql_thread_error", 0, 1))
7897 {
7898 // The master has switched to a new log file; Reopen the old log file
7899 cur_log=reopen_relay_log(rli, &errmsg);
7900 mysql_mutex_unlock(log_lock);
7901 if (!cur_log) // No more log files
7902 goto err;
7903 hot_log=0; // Using old binary log
7904 }
7905 }
7906 /*
7907 As there is no guarantee that the relay is open (for example, an I/O
7908 error during a write by the slave I/O thread may have closed it), we
7909 have to test it.
7910 */
7911 if (!my_b_inited(cur_log) ||
7912 DBUG_EVALUATE_IF("force_sql_thread_error", 1, 0))
7913 {
7914 if (hot_log)
7915 mysql_mutex_unlock(log_lock);
7916 goto err;
7917 }
7918 #ifndef DBUG_OFF
7919 {
7920 DBUG_PRINT("info", ("assertion skip %lu file pos %lu event relay log pos %lu file %s\n",
7921 (ulong) rli->slave_skip_counter, (ulong) my_b_tell(cur_log),
7922 (ulong) rli->get_event_relay_log_pos(),
7923 rli->get_event_relay_log_name()));
7924
7925 /* This is an assertion which sometimes fails, let's try to track it */
7926 char llbuf1[22], llbuf2[22];
7927 DBUG_PRINT("info", ("my_b_tell(cur_log)=%s rli->event_relay_log_pos=%s",
7928 llstr(my_b_tell(cur_log),llbuf1),
7929 llstr(rli->get_event_relay_log_pos(),llbuf2)));
7930
7931 DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE);
7932 DBUG_ASSERT(my_b_tell(cur_log) == rli->get_event_relay_log_pos() || rli->is_parallel_exec());
7933
7934 DBUG_PRINT("info", ("next_event group master %s %lu group relay %s %lu event %s %lu\n",
7935 rli->get_group_master_log_name(),
7936 (ulong) rli->get_group_master_log_pos(),
7937 rli->get_group_relay_log_name(),
7938 (ulong) rli->get_group_relay_log_pos(),
7939 rli->get_event_relay_log_name(),
7940 (ulong) rli->get_event_relay_log_pos()));
7941 }
7942 #endif
7943 /*
7944 Relay log is always in new format - if the master is 3.23, the
7945 I/O thread will convert the format for us.
7946 A problem: the description event may be in a previous relay log. So if
7947 the slave has been shutdown meanwhile, we would have to look in old relay
7948 logs, which may even have been deleted. So we need to write this
7949 description event at the beginning of the relay log.
7950 When the relay log is created when the I/O thread starts, easy: the
7951 master will send the description event and we will queue it.
7952 But if the relay log is created by new_file(): then the solution is:
7953 MYSQL_BIN_LOG::open() will write the buffered description event.
7954 */
7955 if ((ev= Log_event::read_log_event(cur_log, 0,
7956 rli->get_rli_description_event(),
7957 opt_slave_sql_verify_checksum)))
7958 {
7959 DBUG_ASSERT(thd==rli->info_thd);
7960 /*
7961 read it while we have a lock, to avoid a mutex lock in
7962 inc_event_relay_log_pos()
7963 */
7964 rli->set_future_event_relay_log_pos(my_b_tell(cur_log));
7965 ev->future_event_relay_log_pos= rli->get_future_event_relay_log_pos();
7966
7967 if (hot_log)
7968 mysql_mutex_unlock(log_lock);
7969
7970 /*
7971 MTS checkpoint in the successful read branch
7972 */
7973 bool force= (rli->checkpoint_seqno > (rli->checkpoint_group - 1));
7974 if (rli->is_parallel_exec() && (opt_mts_checkpoint_period != 0 || force))
7975 {
7976 ulonglong period= static_cast<ulonglong>(opt_mts_checkpoint_period * 1000000ULL);
7977 mysql_mutex_unlock(&rli->data_lock);
7978 /*
7979 At this point the coordinator has is delegating jobs to workers and
7980 the checkpoint routine must be periodically invoked.
7981 */
7982 (void) mts_checkpoint_routine(rli, period, force, true/*need_data_lock=true*/); // TODO: ALFRANIO ERROR
7983 DBUG_ASSERT(!force ||
7984 (force && (rli->checkpoint_seqno <= (rli->checkpoint_group - 1))) ||
7985 sql_slave_killed(thd, rli));
7986 mysql_mutex_lock(&rli->data_lock);
7987 }
7988 DBUG_RETURN(ev);
7989 }
7990 DBUG_ASSERT(thd==rli->info_thd);
7991 if (opt_reckless_slave) // For mysql-test
7992 cur_log->error = 0;
7993 if (cur_log->error < 0)
7994 {
7995 errmsg = "slave SQL thread aborted because of I/O error";
7996 if (rli->mts_group_status == Relay_log_info::MTS_IN_GROUP)
7997 /*
7998 MTS group status is set to MTS_KILLED_GROUP, whenever a read event
7999 error happens and there was already a non-terminal event scheduled.
8000 */
8001 rli->mts_group_status= Relay_log_info::MTS_KILLED_GROUP;
8002 if (hot_log)
8003 mysql_mutex_unlock(log_lock);
8004 goto err;
8005 }
8006 if (!cur_log->error) /* EOF */
8007 {
8008 /*
8009 On a hot log, EOF means that there are no more updates to
8010 process and we must block until I/O thread adds some and
8011 signals us to continue
8012 */
8013 if (hot_log)
8014 {
8015 /*
8016 We say in Seconds_Behind_Master that we have "caught up". Note that
8017 for example if network link is broken but I/O slave thread hasn't
8018 noticed it (slave_net_timeout not elapsed), then we'll say "caught
8019 up" whereas we're not really caught up. Fixing that would require
8020 internally cutting timeout in smaller pieces in network read, no
8021 thanks. Another example: SQL has caught up on I/O, now I/O has read
8022 a new event and is queuing it; the false "0" will exist until SQL
8023 finishes executing the new event; it will be look abnormal only if
8024 the events have old timestamps (then you get "many", 0, "many").
8025
8026 Transient phases like this can be fixed with implemeting
8027 Heartbeat event which provides the slave the status of the
8028 master at time the master does not have any new update to send.
8029 Seconds_Behind_Master would be zero only when master has no
8030 more updates in binlog for slave. The heartbeat can be sent
8031 in a (small) fraction of slave_net_timeout. Until it's done
8032 rli->last_master_timestamp is temporarely (for time of
8033 waiting for the following event) reset whenever EOF is
8034 reached.
8035 */
8036
8037 /* shows zero while it is sleeping (and until the next event
8038 is about to be executed). Note, in MTS case
8039 Seconds_Behind_Master resetting follows slightly different
8040 schema where reaching EOF is not enough. The status
8041 parameter is updated per some number of processed group of
8042 events. The number can't be greater than
8043 @@global.slave_checkpoint_group and anyway SBM updating
8044 rate does not exceed @@global.slave_checkpoint_period.
8045 Notice that SBM is set to a new value after processing the
8046 terminal event (e.g Commit) of a group. Coordinator resets
8047 SBM when notices no more groups left neither to read from
8048 Relay-log nor to process by Workers.
8049 */
8050 if (!rli->is_parallel_exec())
8051 rli->last_master_timestamp= 0;
8052
8053 DBUG_ASSERT(rli->relay_log.get_open_count() ==
8054 rli->cur_log_old_open_count);
8055
8056 if (rli->ign_master_log_name_end[0])
8057 {
8058 /* We generate and return a Rotate, to make our positions advance */
8059 DBUG_PRINT("info",("seeing an ignored end segment"));
8060 ev= new Rotate_log_event(rli->ign_master_log_name_end,
8061 0, rli->ign_master_log_pos_end,
8062 Rotate_log_event::DUP_NAME);
8063 rli->ign_master_log_name_end[0]= 0;
8064 mysql_mutex_unlock(log_lock);
8065 if (unlikely(!ev))
8066 {
8067 errmsg= "Slave SQL thread failed to create a Rotate event "
8068 "(out of memory?), SHOW SLAVE STATUS may be inaccurate";
8069 goto err;
8070 }
8071 ev->server_id= 0; // don't be ignored by slave SQL thread
8072 DBUG_RETURN(ev);
8073 }
8074
8075 /*
8076 We can, and should release data_lock while we are waiting for
8077 update. If we do not, show slave status will block
8078 */
8079 mysql_mutex_unlock(&rli->data_lock);
8080
8081 /*
8082 Possible deadlock :
8083 - the I/O thread has reached log_space_limit
8084 - the SQL thread has read all relay logs, but cannot purge for some
8085 reason:
8086 * it has already purged all logs except the current one
8087 * there are other logs than the current one but they're involved in
8088 a transaction that finishes in the current one (or is not finished)
8089 Solution :
8090 Wake up the possibly waiting I/O thread, and set a boolean asking
8091 the I/O thread to temporarily ignore the log_space_limit
8092 constraint, because we do not want the I/O thread to block because of
8093 space (it's ok if it blocks for any other reason (e.g. because the
8094 master does not send anything). Then the I/O thread stops waiting
8095 and reads one more event and starts honoring log_space_limit again.
8096
8097 If the SQL thread needs more events to be able to rotate the log (it
8098 might need to finish the current group first), then it can ask for one
8099 more at a time. Thus we don't outgrow the relay log indefinitely,
8100 but rather in a controlled manner, until the next rotate.
8101
8102 When the SQL thread starts it sets ignore_log_space_limit to false.
8103 We should also reset ignore_log_space_limit to 0 when the user does
8104 RESET SLAVE, but in fact, no need as RESET SLAVE requires that the slave
8105 be stopped, and the SQL thread sets ignore_log_space_limit to 0 when
8106 it stops.
8107 */
8108 mysql_mutex_lock(&rli->log_space_lock);
8109
8110 /*
8111 If we have reached the limit of the relay space and we
8112 are going to sleep, waiting for more events:
8113
8114 1. If outside a group, SQL thread asks the IO thread
8115 to force a rotation so that the SQL thread purges
8116 logs next time it processes an event (thus space is
8117 freed).
8118
8119 2. If in a group, SQL thread asks the IO thread to
8120 ignore the limit and queues yet one more event
8121 so that the SQL thread finishes the group and
8122 is are able to rotate and purge sometime soon.
8123 */
8124 if (rli->log_space_limit &&
8125 rli->log_space_limit < rli->log_space_total)
8126 {
8127 /* force rotation if not in an unfinished group */
8128 if (!rli->is_parallel_exec())
8129 {
8130 rli->sql_force_rotate_relay= !rli->is_in_group();
8131 }
8132 else
8133 {
8134 rli->sql_force_rotate_relay=
8135 (rli->mts_group_status != Relay_log_info::MTS_IN_GROUP);
8136 }
8137 /* ask for one more event */
8138 rli->ignore_log_space_limit= true;
8139 }
8140
8141 /*
8142 If the I/O thread is blocked, unblock it. Ok to broadcast
8143 after unlock, because the mutex is only destroyed in
8144 ~Relay_log_info(), i.e. when rli is destroyed, and rli will
8145 not be destroyed before we exit the present function.
8146 */
8147 mysql_mutex_unlock(&rli->log_space_lock);
8148 mysql_cond_broadcast(&rli->log_space_cond);
8149 // Note that wait_for_update_relay_log unlocks lock_log !
8150
8151 if (rli->is_parallel_exec() && (opt_mts_checkpoint_period != 0 ||
8152 DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0)))
8153 {
8154 int ret= 0;
8155 struct timespec waittime;
8156 ulonglong period= static_cast<ulonglong>(opt_mts_checkpoint_period * 1000000ULL);
8157 ulong signal_cnt= rli->relay_log.signal_cnt;
8158
8159 mysql_mutex_unlock(log_lock);
8160 do
8161 {
8162 /*
8163 At this point the coordinator has no job to delegate to workers.
8164 However, workers are executing their assigned jobs and as such
8165 the checkpoint routine must be periodically invoked.
8166 */
8167 (void) mts_checkpoint_routine(rli, period, false, true/*need_data_lock=true*/); // TODO: ALFRANIO ERROR
8168 mysql_mutex_lock(log_lock);
8169
8170 if (DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0))
8171 period= 10000000ULL;
8172
8173 set_timespec_nsec(waittime, period);
8174 ret= rli->relay_log.wait_for_update_relay_log(thd, &waittime);
8175 } while ((ret == ETIMEDOUT || ret == ETIME) /* todo:remove */ &&
8176 signal_cnt == rli->relay_log.signal_cnt && !thd->killed);
8177 }
8178 else
8179 {
8180 rli->relay_log.wait_for_update_relay_log(thd, NULL);
8181 }
8182
8183 // re-acquire data lock since we released it earlier
8184 mysql_mutex_lock(&rli->data_lock);
8185 continue;
8186 }
8187 /*
8188 If the log was not hot, we need to move to the next log in
8189 sequence. The next log could be hot or cold, we deal with both
8190 cases separately after doing some common initialization
8191 */
8192 end_io_cache(cur_log);
8193 DBUG_ASSERT(rli->cur_log_fd >= 0);
8194 mysql_file_close(rli->cur_log_fd, MYF(MY_WME));
8195 rli->cur_log_fd = -1;
8196
8197 if (relay_log_purge)
8198 {
8199 /*
8200 purge_first_log will properly set up relay log coordinates in rli.
8201 If the group's coordinates are equal to the event's coordinates
8202 (i.e. the relay log was not rotated in the middle of a group),
8203 we can purge this relay log too.
8204 We do ulonglong and string comparisons, this may be slow but
8205 - purging the last relay log is nice (it can save 1GB of disk), so we
8206 like to detect the case where we can do it, and given this,
8207 - I see no better detection method
8208 - purge_first_log is not called that often
8209 */
8210 if (rli->relay_log.purge_first_log
8211 (rli,
8212 rli->get_group_relay_log_pos() == rli->get_event_relay_log_pos()
8213 && !strcmp(rli->get_group_relay_log_name(),rli->get_event_relay_log_name())))
8214 {
8215 errmsg = "Error purging processed logs";
8216 goto err;
8217 }
8218 DBUG_PRINT("info", ("next_event group master %s %lu group relay %s %lu event %s %lu\n",
8219 rli->get_group_master_log_name(),
8220 (ulong) rli->get_group_master_log_pos(),
8221 rli->get_group_relay_log_name(),
8222 (ulong) rli->get_group_relay_log_pos(),
8223 rli->get_event_relay_log_name(),
8224 (ulong) rli->get_event_relay_log_pos()));
8225 }
8226 else
8227 {
8228 /*
8229 If hot_log is set, then we already have a lock on
8230 LOCK_log. If not, we have to get the lock.
8231
8232 According to Sasha, the only time this code will ever be executed
8233 is if we are recovering from a bug.
8234 */
8235 if (rli->relay_log.find_next_log(&rli->linfo, !hot_log))
8236 {
8237 errmsg = "error switching to the next log";
8238 goto err;
8239 }
8240 rli->set_event_relay_log_pos(BIN_LOG_HEADER_SIZE);
8241 rli->set_event_relay_log_name(rli->linfo.log_file_name);
8242 /*
8243 We may update the worker here but this is not extremlly
8244 necessary. /Alfranio
8245 */
8246 rli->flush_info();
8247 }
8248
8249 /* Reset the relay-log-change-notified status of Slave Workers */
8250 if (rli->is_parallel_exec())
8251 {
8252 DBUG_PRINT("info", ("next_event: MTS group relay log changes to %s %lu\n",
8253 rli->get_group_relay_log_name(),
8254 (ulong) rli->get_group_relay_log_pos()));
8255 rli->reset_notified_relay_log_change();
8256 }
8257
8258 /*
8259 Now we want to open this next log. To know if it's a hot log (the one
8260 being written by the I/O thread now) or a cold log, we can use
8261 is_active(); if it is hot, we use the I/O cache; if it's cold we open
8262 the file normally. But if is_active() reports that the log is hot, this
8263 may change between the test and the consequence of the test. So we may
8264 open the I/O cache whereas the log is now cold, which is nonsense.
8265 To guard against this, we need to have LOCK_log.
8266 */
8267
8268 DBUG_PRINT("info",("hot_log: %d",hot_log));
8269 if (!hot_log) /* if hot_log, we already have this mutex */
8270 mysql_mutex_lock(log_lock);
8271 if (rli->relay_log.is_active(rli->linfo.log_file_name))
8272 {
8273 #ifdef EXTRA_DEBUG
8274 if (log_warnings)
8275 sql_print_information("next log '%s' is currently active",
8276 rli->linfo.log_file_name);
8277 #endif
8278 rli->cur_log= cur_log= rli->relay_log.get_log_file();
8279 rli->cur_log_old_open_count= rli->relay_log.get_open_count();
8280 DBUG_ASSERT(rli->cur_log_fd == -1);
8281
8282 /*
8283 When the SQL thread is [stopped and] (re)started the
8284 following may happen:
8285
8286 1. Log was hot at stop time and remains hot at restart
8287
8288 SQL thread reads again from hot_log (SQL thread was
8289 reading from the active log when it was stopped and the
8290 very same log is still active on SQL thread restart).
8291
8292 In this case, my_b_seek is performed on cur_log, while
8293 cur_log points to relay_log.get_log_file();
8294
8295 2. Log was hot at stop time but got cold before restart
8296
8297 The log was hot when SQL thread stopped, but it is not
8298 anymore when the SQL thread restarts.
8299
8300 In this case, the SQL thread reopens the log, using
8301 cache_buf, ie, cur_log points to &cache_buf, and thence
8302 its coordinates are reset.
8303
8304 3. Log was already cold at stop time
8305
8306 The log was not hot when the SQL thread stopped, and, of
8307 course, it will not be hot when it restarts.
8308
8309 In this case, the SQL thread opens the cold log again,
8310 using cache_buf, ie, cur_log points to &cache_buf, and
8311 thence its coordinates are reset.
8312
8313 4. Log was hot at stop time, DBA changes to previous cold
8314 log and restarts SQL thread
8315
8316 The log was hot when the SQL thread was stopped, but the
8317 user changed the coordinates of the SQL thread to
8318 restart from a previous cold log.
8319
8320 In this case, at start time, cur_log points to a cold
8321 log, opened using &cache_buf as cache, and coordinates
8322 are reset. However, as it moves on to the next logs, it
8323 will eventually reach the hot log. If the hot log is the
8324 same at the time the SQL thread was stopped, then
8325 coordinates were not reset - the cur_log will point to
8326 relay_log.get_log_file(), and not a freshly opened
8327 IO_CACHE through cache_buf. For this reason we need to
8328 deploy a my_b_seek before calling check_binlog_magic at
8329 this point of the code (see: BUG#55263 for more
8330 details).
8331
8332 NOTES:
8333 - We must keep the LOCK_log to read the 4 first bytes, as
8334 this is a hot log (same as when we call read_log_event()
8335 above: for a hot log we take the mutex).
8336
8337 - Because of scenario #4 above, we need to have a
8338 my_b_seek here. Otherwise, we might hit the assertion
8339 inside check_binlog_magic.
8340 */
8341
8342 my_b_seek(cur_log, (my_off_t) 0);
8343 if (check_binlog_magic(cur_log,&errmsg))
8344 {
8345 if (!hot_log)
8346 mysql_mutex_unlock(log_lock);
8347 goto err;
8348 }
8349 if (!hot_log)
8350 mysql_mutex_unlock(log_lock);
8351 continue;
8352 }
8353 if (!hot_log)
8354 mysql_mutex_unlock(log_lock);
8355 /*
8356 if we get here, the log was not hot, so we will have to open it
8357 ourselves. We are sure that the log is still not hot now (a log can get
8358 from hot to cold, but not from cold to hot). No need for LOCK_log.
8359 */
8360 #ifdef EXTRA_DEBUG
8361 if (log_warnings)
8362 sql_print_information("next log '%s' is not active",
8363 rli->linfo.log_file_name);
8364 #endif
8365 // open_binlog_file() will check the magic header
8366 if ((rli->cur_log_fd=open_binlog_file(cur_log,rli->linfo.log_file_name,
8367 &errmsg)) <0)
8368 goto err;
8369 }
8370 else
8371 {
8372 /*
8373 Read failed with a non-EOF error.
8374 TODO: come up with something better to handle this error
8375 */
8376 if (hot_log)
8377 mysql_mutex_unlock(log_lock);
8378 sql_print_error("Slave SQL thread: I/O error reading \
8379 event(errno: %d cur_log->error: %d)",
8380 my_errno,cur_log->error);
8381 // set read position to the beginning of the event
8382 my_b_seek(cur_log,rli->get_event_relay_log_pos());
8383 /* otherwise, we have had a partial read */
8384 errmsg = "Aborting slave SQL thread because of partial event read";
8385 break; // To end of function
8386 }
8387 }
8388 if (!errmsg && log_warnings)
8389 {
8390 sql_print_information("Error reading relay log event: %s",
8391 "slave SQL thread was killed");
8392 DBUG_RETURN(0);
8393 }
8394
8395 err:
8396 if (errmsg)
8397 sql_print_error("Error reading relay log event: %s", errmsg);
8398 DBUG_RETURN(0);
8399 }
8400
8401 /*
8402 Rotate a relay log (this is used only by FLUSH LOGS; the automatic rotation
8403 because of size is simpler because when we do it we already have all relevant
8404 locks; here we don't, so this function is mainly taking locks).
8405 Returns nothing as we cannot catch any error (MYSQL_BIN_LOG::new_file()
8406 is void).
8407 */
8408
rotate_relay_log(Master_info * mi,bool need_log_space_lock)8409 int rotate_relay_log(Master_info* mi, bool need_log_space_lock)
8410 {
8411 DBUG_ENTER("rotate_relay_log");
8412
8413 mysql_mutex_assert_owner(&mi->data_lock);
8414 DBUG_EXECUTE_IF("crash_before_rotate_relaylog", DBUG_SUICIDE(););
8415
8416 Relay_log_info* rli= mi->rli;
8417 int error= 0;
8418
8419 /*
8420 We need to test inited because otherwise, new_file() will attempt to lock
8421 LOCK_log, which may not be inited (if we're not a slave).
8422 */
8423 if (!rli->inited)
8424 {
8425 DBUG_PRINT("info", ("rli->inited == 0"));
8426 goto end;
8427 }
8428
8429 /* If the relay log is closed, new_file() will do nothing. */
8430 error= rli->relay_log.new_file(mi->get_mi_description_event());
8431 if (error != 0)
8432 goto end;
8433
8434 /*
8435 We harvest now, because otherwise BIN_LOG_HEADER_SIZE will not immediately
8436 be counted, so imagine a succession of FLUSH LOGS and assume the slave
8437 threads are started:
8438 relay_log_space decreases by the size of the deleted relay log, but does
8439 not increase, so flush-after-flush we may become negative, which is wrong.
8440 Even if this will be corrected as soon as a query is replicated on the
8441 slave (because the I/O thread will then call harvest_bytes_written() which
8442 will harvest all these BIN_LOG_HEADER_SIZE we forgot), it may give strange
8443 output in SHOW SLAVE STATUS meanwhile. So we harvest now.
8444 If the log is closed, then this will just harvest the last writes, probably
8445 0 as they probably have been harvested.
8446 */
8447 rli->relay_log.harvest_bytes_written(rli, need_log_space_lock);
8448 end:
8449 DBUG_RETURN(error);
8450 }
8451
8452
8453 /**
8454 Detects, based on master's version (as found in the relay log), if master
8455 has a certain bug.
8456 @param rli Relay_log_info which tells the master's version
8457 @param bug_id Number of the bug as found in bugs.mysql.com
8458 @param report bool report error message, default TRUE
8459
8460 @param pred Predicate function that will be called with @c param to
8461 check for the bug. If the function return @c true, the bug is present,
8462 otherwise, it is not.
8463
8464 @param param State passed to @c pred function.
8465
8466 @return TRUE if master has the bug, FALSE if it does not.
8467 */
rpl_master_has_bug(const Relay_log_info * rli,uint bug_id,bool report,bool (* pred)(const void *),const void * param)8468 bool rpl_master_has_bug(const Relay_log_info *rli, uint bug_id, bool report,
8469 bool (*pred)(const void *), const void *param)
8470 {
8471 struct st_version_range_for_one_bug {
8472 uint bug_id;
8473 const uchar introduced_in[3]; // first version with bug
8474 const uchar fixed_in[3]; // first version with fix
8475 };
8476 static struct st_version_range_for_one_bug versions_for_all_bugs[]=
8477 {
8478 {24432, { 5, 0, 24 }, { 5, 0, 38 } },
8479 {24432, { 5, 1, 12 }, { 5, 1, 17 } },
8480 {33029, { 5, 0, 0 }, { 5, 0, 58 } },
8481 {33029, { 5, 1, 0 }, { 5, 1, 12 } },
8482 {37426, { 5, 1, 0 }, { 5, 1, 26 } },
8483 };
8484 const uchar *master_ver=
8485 rli->get_rli_description_event()->server_version_split;
8486
8487 DBUG_ASSERT(sizeof(rli->get_rli_description_event()->server_version_split) == 3);
8488
8489 for (uint i= 0;
8490 i < sizeof(versions_for_all_bugs)/sizeof(*versions_for_all_bugs);i++)
8491 {
8492 const uchar *introduced_in= versions_for_all_bugs[i].introduced_in,
8493 *fixed_in= versions_for_all_bugs[i].fixed_in;
8494 if ((versions_for_all_bugs[i].bug_id == bug_id) &&
8495 (memcmp(introduced_in, master_ver, 3) <= 0) &&
8496 (memcmp(fixed_in, master_ver, 3) > 0) &&
8497 (pred == NULL || (*pred)(param)))
8498 {
8499 enum loglevel report_level= INFORMATION_LEVEL;
8500 if (!report)
8501 return TRUE;
8502 // a short message for SHOW SLAVE STATUS (message length constraints)
8503 my_printf_error(ER_UNKNOWN_ERROR, "master may suffer from"
8504 " http://bugs.mysql.com/bug.php?id=%u"
8505 " so slave stops; check error log on slave"
8506 " for more info", MYF(0), bug_id);
8507 // a verbose message for the error log
8508 if (!ignored_error_code(ER_UNKNOWN_ERROR))
8509 {
8510 report_level= ERROR_LEVEL;
8511 current_thd->is_slave_error= 1;
8512 }
8513 /* In case of ignored errors report warnings only if log_warnings > 1. */
8514 else if (log_warnings > 1)
8515 report_level= WARNING_LEVEL;
8516
8517 if (report_level != INFORMATION_LEVEL)
8518 rli->report(report_level, ER_UNKNOWN_ERROR,
8519 "According to the master's version ('%s'),"
8520 " it is probable that master suffers from this bug:"
8521 " http://bugs.mysql.com/bug.php?id=%u"
8522 " and thus replicating the current binary log event"
8523 " may make the slave's data become different from the"
8524 " master's data."
8525 " To take no risk, slave refuses to replicate"
8526 " this event and stops."
8527 " We recommend that all updates be stopped on the"
8528 " master and slave, that the data of both be"
8529 " manually synchronized,"
8530 " that master's binary logs be deleted,"
8531 " that master be upgraded to a version at least"
8532 " equal to '%d.%d.%d'. Then replication can be"
8533 " restarted.",
8534 rli->get_rli_description_event()->server_version,
8535 bug_id,
8536 fixed_in[0], fixed_in[1], fixed_in[2]);
8537 return TRUE;
8538 }
8539 }
8540 return FALSE;
8541 }
8542
8543 /**
8544 BUG#33029, For all 5.0 up to 5.0.58 exclusive, and 5.1 up to 5.1.12
8545 exclusive, if one statement in a SP generated AUTO_INCREMENT value
8546 by the top statement, all statements after it would be considered
8547 generated AUTO_INCREMENT value by the top statement, and a
8548 erroneous INSERT_ID value might be associated with these statement,
8549 which could cause duplicate entry error and stop the slave.
8550
8551 Detect buggy master to work around.
8552 */
rpl_master_erroneous_autoinc(THD * thd)8553 bool rpl_master_erroneous_autoinc(THD *thd)
8554 {
8555 if (active_mi != NULL && active_mi->rli->info_thd == thd)
8556 {
8557 Relay_log_info *rli= active_mi->rli;
8558 DBUG_EXECUTE_IF("simulate_bug33029", return TRUE;);
8559 return rpl_master_has_bug(rli, 33029, FALSE, NULL, NULL);
8560 }
8561 return FALSE;
8562 }
8563
8564 /**
8565 a copy of active_mi->rli->slave_skip_counter, for showing in SHOW VARIABLES,
8566 INFORMATION_SCHEMA.GLOBAL_VARIABLES and @@sql_slave_skip_counter without
8567 taking all the mutexes needed to access active_mi->rli->slave_skip_counter
8568 properly.
8569 */
8570 uint sql_slave_skip_counter;
8571
8572 /**
8573 Execute a START SLAVE statement.
8574
8575 @param thd Pointer to THD object for the client thread executing the
8576 statement.
8577
8578 @param mi Pointer to Master_info object for the slave's IO thread.
8579
8580 @param net_report If true, saves the exit status into Diagnostics_area.
8581
8582 @retval 0 success
8583 @retval 1 error
8584 */
start_slave(THD * thd,Master_info * mi,bool net_report)8585 int start_slave(THD* thd , Master_info* mi, bool net_report)
8586 {
8587 int slave_errno= 0;
8588 int thread_mask;
8589 DBUG_ENTER("start_slave");
8590
8591 if (check_access(thd, SUPER_ACL, any_db, NULL, NULL, 0, 0))
8592 DBUG_RETURN(1);
8593
8594 if (thd->lex->slave_connection.user ||
8595 thd->lex->slave_connection.password)
8596 {
8597 #if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)
8598 if (thd->vio_ok() && !thd->net.vio->ssl_arg)
8599 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
8600 ER_INSECURE_PLAIN_TEXT,
8601 ER(ER_INSECURE_PLAIN_TEXT));
8602 #endif
8603 #if !defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)
8604 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
8605 ER_INSECURE_PLAIN_TEXT,
8606 ER(ER_INSECURE_PLAIN_TEXT));
8607 #endif
8608 }
8609
8610 lock_slave_threads(mi); // this allows us to cleanly read slave_running
8611 // Get a mask of _stopped_ threads
8612 init_thread_mask(&thread_mask,mi,1 /* inverse */);
8613 /*
8614 Below we will start all stopped threads. But if the user wants to
8615 start only one thread, do as if the other thread was running (as we
8616 don't wan't to touch the other thread), so set the bit to 0 for the
8617 other thread
8618 */
8619 if (thd->lex->slave_thd_opt)
8620 thread_mask&= thd->lex->slave_thd_opt;
8621 if (thread_mask) //some threads are stopped, start them
8622 {
8623 if (global_init_info(mi, false, thread_mask))
8624 slave_errno=ER_MASTER_INFO;
8625 else if (server_id_supplied && *mi->host)
8626 {
8627 /*
8628 If we will start IO thread we need to take care of possible
8629 options provided through the START SLAVE if there is any.
8630 */
8631 if (thread_mask & SLAVE_IO)
8632 {
8633 if (thd->lex->slave_connection.user)
8634 {
8635 mi->set_start_user_configured(true);
8636 mi->set_user(thd->lex->slave_connection.user);
8637 }
8638 if (thd->lex->slave_connection.password)
8639 {
8640 mi->set_start_user_configured(true);
8641 mi->set_password(thd->lex->slave_connection.password,
8642 strlen(thd->lex->slave_connection.password));
8643 }
8644 if (thd->lex->slave_connection.plugin_auth)
8645 mi->set_plugin_auth(thd->lex->slave_connection.plugin_auth);
8646 if (thd->lex->slave_connection.plugin_dir)
8647 mi->set_plugin_dir(thd->lex->slave_connection.plugin_dir);
8648 }
8649
8650 /*
8651 If we will start SQL thread we will care about UNTIL options If
8652 not and they are specified we will ignore them and warn user
8653 about this fact.
8654 */
8655 if (thread_mask & SLAVE_SQL)
8656 {
8657 /*
8658 To cache the MTS system var values and used them in the following
8659 runtime. The system var:s can change meanwhile but having no other
8660 effects.
8661 */
8662 mi->rli->opt_slave_parallel_workers= opt_mts_slave_parallel_workers;
8663 #ifndef DBUG_OFF
8664 if (!DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0))
8665 #endif
8666 mi->rli->checkpoint_group= opt_mts_checkpoint_group;
8667
8668 mysql_mutex_lock(&mi->rli->data_lock);
8669
8670 if (thd->lex->mi.pos)
8671 {
8672 if (thd->lex->mi.relay_log_pos)
8673 slave_errno= ER_BAD_SLAVE_UNTIL_COND;
8674 mi->rli->until_condition= Relay_log_info::UNTIL_MASTER_POS;
8675 mi->rli->until_log_pos= thd->lex->mi.pos;
8676 /*
8677 We don't check thd->lex->mi.log_file_name for NULL here
8678 since it is checked in sql_yacc.yy
8679 */
8680 strmake(mi->rli->until_log_name, thd->lex->mi.log_file_name,
8681 sizeof(mi->rli->until_log_name)-1);
8682 }
8683 else if (thd->lex->mi.relay_log_pos)
8684 {
8685 if (thd->lex->mi.pos)
8686 slave_errno= ER_BAD_SLAVE_UNTIL_COND;
8687 mi->rli->until_condition= Relay_log_info::UNTIL_RELAY_POS;
8688 mi->rli->until_log_pos= thd->lex->mi.relay_log_pos;
8689 strmake(mi->rli->until_log_name, thd->lex->mi.relay_log_name,
8690 sizeof(mi->rli->until_log_name)-1);
8691 }
8692 else if (thd->lex->mi.gtid)
8693 {
8694 global_sid_lock->wrlock();
8695 mi->rli->clear_until_condition();
8696 if (mi->rli->until_sql_gtids.add_gtid_text(thd->lex->mi.gtid)
8697 != RETURN_STATUS_OK)
8698 slave_errno= ER_BAD_SLAVE_UNTIL_COND;
8699 else {
8700 mi->rli->until_condition=
8701 LEX_MASTER_INFO::UNTIL_SQL_BEFORE_GTIDS == thd->lex->mi.gtid_until_condition
8702 ? Relay_log_info::UNTIL_SQL_BEFORE_GTIDS
8703 : Relay_log_info::UNTIL_SQL_AFTER_GTIDS;
8704 if ((mi->rli->until_condition ==
8705 Relay_log_info::UNTIL_SQL_AFTER_GTIDS) &&
8706 mi->rli->opt_slave_parallel_workers != 0)
8707 {
8708 mi->rli->opt_slave_parallel_workers= 0;
8709 push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
8710 ER_MTS_FEATURE_IS_NOT_SUPPORTED,
8711 ER(ER_MTS_FEATURE_IS_NOT_SUPPORTED),
8712 "UNTIL condtion",
8713 "Slave is started in the sequential execution mode.");
8714 }
8715 }
8716 global_sid_lock->unlock();
8717 }
8718 else if (thd->lex->mi.until_after_gaps)
8719 {
8720 mi->rli->until_condition= Relay_log_info::UNTIL_SQL_AFTER_MTS_GAPS;
8721 mi->rli->opt_slave_parallel_workers=
8722 mi->rli->recovery_parallel_workers;
8723 }
8724 else
8725 mi->rli->clear_until_condition();
8726
8727 if (mi->rli->until_condition == Relay_log_info::UNTIL_MASTER_POS ||
8728 mi->rli->until_condition == Relay_log_info::UNTIL_RELAY_POS)
8729 {
8730 /* Preparing members for effective until condition checking */
8731 const char *p= fn_ext(mi->rli->until_log_name);
8732 char *p_end;
8733 if (*p)
8734 {
8735 //p points to '.'
8736 mi->rli->until_log_name_extension= strtoul(++p,&p_end, 10);
8737 /*
8738 p_end points to the first invalid character. If it equals
8739 to p, no digits were found, error. If it contains '\0' it
8740 means conversion went ok.
8741 */
8742 if (p_end==p || *p_end)
8743 slave_errno=ER_BAD_SLAVE_UNTIL_COND;
8744 }
8745 else
8746 slave_errno=ER_BAD_SLAVE_UNTIL_COND;
8747
8748 /* mark the cached result of the UNTIL comparison as "undefined" */
8749 mi->rli->until_log_names_cmp_result=
8750 Relay_log_info::UNTIL_LOG_NAMES_CMP_UNKNOWN;
8751
8752 /* Issuing warning then started without --skip-slave-start */
8753 if (!opt_skip_slave_start)
8754 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
8755 ER_MISSING_SKIP_SLAVE,
8756 ER(ER_MISSING_SKIP_SLAVE));
8757 if (mi->rli->opt_slave_parallel_workers != 0)
8758 {
8759 mi->rli->opt_slave_parallel_workers= 0;
8760 push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
8761 ER_MTS_FEATURE_IS_NOT_SUPPORTED,
8762 ER(ER_MTS_FEATURE_IS_NOT_SUPPORTED),
8763 "UNTIL condtion",
8764 "Slave is started in the sequential execution mode.");
8765 }
8766 }
8767
8768 mysql_mutex_unlock(&mi->rli->data_lock);
8769
8770 /* MTS technical limitation no support of trans retry */
8771 if (mi->rli->opt_slave_parallel_workers != 0 && slave_trans_retries != 0)
8772 {
8773 push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
8774 ER_MTS_FEATURE_IS_NOT_SUPPORTED,
8775 ER(ER_MTS_FEATURE_IS_NOT_SUPPORTED),
8776 "slave_transaction_retries",
8777 "In the event of a transient failure, the slave will "
8778 "not retry the transaction and will stop.");
8779 }
8780 }
8781 else if (thd->lex->mi.pos || thd->lex->mi.relay_log_pos || thd->lex->mi.gtid)
8782 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, ER_UNTIL_COND_IGNORED,
8783 ER(ER_UNTIL_COND_IGNORED));
8784
8785 if (!slave_errno)
8786 slave_errno = start_slave_threads(false/*need_lock_slave=false*/,
8787 true/*wait_for_start=true*/,
8788 mi,
8789 thread_mask);
8790 }
8791 else
8792 slave_errno = ER_BAD_SLAVE;
8793 }
8794 else
8795 {
8796 /* no error if all threads are already started, only a warning */
8797 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, ER_SLAVE_WAS_RUNNING,
8798 ER(ER_SLAVE_WAS_RUNNING));
8799 }
8800
8801 /*
8802 Clean up start information if there was an attempt to start
8803 the IO thread to avoid any security issue.
8804 */
8805 if (slave_errno &&
8806 (thread_mask & SLAVE_IO) == SLAVE_IO)
8807 mi->reset_start_info();
8808
8809 unlock_slave_threads(mi);
8810
8811 if (slave_errno)
8812 {
8813 if (net_report)
8814 my_message(slave_errno, ER(slave_errno), MYF(0));
8815 DBUG_RETURN(1);
8816 }
8817 else if (net_report)
8818 my_ok(thd);
8819
8820 DBUG_RETURN(0);
8821 }
8822
8823
8824 /**
8825 Execute a STOP SLAVE statement.
8826
8827 @param thd Pointer to THD object for the client thread executing the
8828 statement.
8829
8830 @param mi Pointer to Master_info object for the slave's IO thread.
8831
8832 @param net_report If true, saves the exit status into Diagnostics_area.
8833
8834 @retval 0 success
8835 @retval 1 error
8836 */
stop_slave(THD * thd,Master_info * mi,bool net_report)8837 int stop_slave(THD* thd, Master_info* mi, bool net_report )
8838 {
8839 DBUG_ENTER("stop_slave");
8840
8841 int slave_errno;
8842 if (!thd)
8843 thd = current_thd;
8844
8845 if (check_access(thd, SUPER_ACL, any_db, NULL, NULL, 0, 0))
8846 DBUG_RETURN(1);
8847 THD_STAGE_INFO(thd, stage_killing_slave);
8848 int thread_mask;
8849 lock_slave_threads(mi);
8850 // Get a mask of _running_ threads
8851 init_thread_mask(&thread_mask,mi,0 /* not inverse*/);
8852 /*
8853 Below we will stop all running threads.
8854 But if the user wants to stop only one thread, do as if the other thread
8855 was stopped (as we don't wan't to touch the other thread), so set the
8856 bit to 0 for the other thread
8857 */
8858 if (thd->lex->slave_thd_opt)
8859 thread_mask &= thd->lex->slave_thd_opt;
8860
8861 if (thread_mask)
8862 {
8863 slave_errno= terminate_slave_threads(mi,thread_mask,
8864 false/*need_lock_term=false*/);
8865 }
8866 else
8867 {
8868 //no error if both threads are already stopped, only a warning
8869 slave_errno= 0;
8870 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, ER_SLAVE_WAS_NOT_RUNNING,
8871 ER(ER_SLAVE_WAS_NOT_RUNNING));
8872 }
8873 unlock_slave_threads(mi);
8874
8875 if (slave_errno)
8876 {
8877 if ((slave_errno == ER_STOP_SLAVE_SQL_THREAD_TIMEOUT) ||
8878 (slave_errno == ER_STOP_SLAVE_IO_THREAD_TIMEOUT))
8879 {
8880 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, slave_errno,
8881 ER(slave_errno));
8882 sql_print_warning("%s",ER(slave_errno));
8883 }
8884 if (net_report)
8885 my_message(slave_errno, ER(slave_errno), MYF(0));
8886 DBUG_RETURN(1);
8887 }
8888 else if (net_report)
8889 my_ok(thd);
8890
8891 DBUG_RETURN(0);
8892 }
8893
8894
8895 /**
8896 Execute a RESET SLAVE statement.
8897
8898 @param thd Pointer to THD object of the client thread executing the
8899 statement.
8900
8901 @param mi Pointer to Master_info object for the slave.
8902
8903 @retval 0 success
8904 @retval 1 error
8905 */
reset_slave(THD * thd,Master_info * mi)8906 int reset_slave(THD *thd, Master_info* mi)
8907 {
8908 int thread_mask= 0, error= 0;
8909 uint sql_errno=ER_UNKNOWN_ERROR;
8910 const char* errmsg= "Unknown error occured while reseting slave";
8911 DBUG_ENTER("reset_slave");
8912
8913 lock_slave_threads(mi);
8914 init_thread_mask(&thread_mask,mi,0 /* not inverse */);
8915 if (thread_mask) // We refuse if any slave thread is running
8916 {
8917 sql_errno= ER_SLAVE_MUST_STOP;
8918 error=1;
8919 goto err;
8920 }
8921
8922 ha_reset_slave(thd);
8923
8924 // delete relay logs, clear relay log coordinates
8925 if ((error= mi->rli->purge_relay_logs(thd,
8926 1 /* just reset */,
8927 &errmsg)))
8928 {
8929 sql_errno= ER_RELAY_LOG_FAIL;
8930 goto err;
8931 }
8932
8933 /* Clear master's log coordinates and associated information */
8934 DBUG_ASSERT(!mi->rli || !mi->rli->slave_running); // none writes in rli table
8935 mi->clear_in_memory_info(thd->lex->reset_slave_info.all);
8936
8937 if (remove_info(mi))
8938 {
8939 error= 1;
8940 goto err;
8941 }
8942
8943 (void) RUN_HOOK(binlog_relay_io, after_reset_slave, (thd, mi));
8944 err:
8945 unlock_slave_threads(mi);
8946 if (error)
8947 my_error(sql_errno, MYF(0), errmsg);
8948 DBUG_RETURN(error);
8949 }
8950
8951 /**
8952 Execute a CHANGE MASTER statement. MTS workers info tables data are removed
8953 in the successful branch (i.e. there are no gaps in the execution history).
8954
8955 @param thd Pointer to THD object for the client thread executing the
8956 statement.
8957
8958 @param mi Pointer to Master_info object belonging to the slave's IO
8959 thread.
8960
8961 @retval FALSE success
8962 @retval TRUE error
8963 */
change_master(THD * thd,Master_info * mi)8964 bool change_master(THD* thd, Master_info* mi)
8965 {
8966 int thread_mask;
8967 const char* errmsg= 0;
8968 bool need_relay_log_purge= 1;
8969 char *var_master_log_name= NULL, *var_group_master_log_name= NULL;
8970 bool ret= false;
8971 char saved_host[HOSTNAME_LENGTH + 1], saved_bind_addr[HOSTNAME_LENGTH + 1];
8972 uint saved_port= 0;
8973 char saved_log_name[FN_REFLEN];
8974 my_off_t saved_log_pos= 0;
8975 my_bool save_relay_log_purge= relay_log_purge;
8976 bool mts_remove_workers= false;
8977 bool binlog_prot_acquired= false;
8978
8979 DBUG_ENTER("change_master");
8980
8981 lock_slave_threads(mi);
8982 init_thread_mask(&thread_mask,mi,0 /*not inverse*/);
8983 LEX_MASTER_INFO* lex_mi= &thd->lex->mi;
8984 if (thread_mask) // We refuse if any slave thread is running
8985 {
8986 my_message(ER_SLAVE_MUST_STOP, ER(ER_SLAVE_MUST_STOP), MYF(0));
8987 ret= true;
8988 goto err;
8989 }
8990 thread_mask= SLAVE_IO | SLAVE_SQL;
8991
8992 THD_STAGE_INFO(thd, stage_changing_master);
8993 /*
8994 We need to check if there is an empty master_host. Otherwise
8995 change master succeeds, a master.info file is created containing
8996 empty master_host string and when issuing: start slave; an error
8997 is thrown stating that the server is not configured as slave.
8998 (See BUG#28796).
8999 */
9000 if(lex_mi->host && !*lex_mi->host)
9001 {
9002 my_error(ER_WRONG_ARGUMENTS, MYF(0), "MASTER_HOST");
9003 unlock_slave_threads(mi);
9004 DBUG_RETURN(TRUE);
9005 }
9006 if (global_init_info(mi, false, thread_mask))
9007 {
9008 my_message(ER_MASTER_INFO, ER(ER_MASTER_INFO), MYF(0));
9009 ret= true;
9010 goto err;
9011 }
9012 if (mi->rli->mts_recovery_group_cnt)
9013 {
9014 /*
9015 Change-Master can't be done if there is a mts group gap.
9016 That requires mts-recovery which START SLAVE provides.
9017 */
9018 DBUG_ASSERT(mi->rli->recovery_parallel_workers);
9019
9020 my_message(ER_MTS_CHANGE_MASTER_CANT_RUN_WITH_GAPS,
9021 ER(ER_MTS_CHANGE_MASTER_CANT_RUN_WITH_GAPS), MYF(0));
9022 ret= true;
9023 goto err;
9024 }
9025 else
9026 {
9027 /*
9028 Lack of mts group gaps makes Workers info stale
9029 regardless of need_relay_log_purge computation.
9030 */
9031 if (mi->rli->recovery_parallel_workers)
9032 mts_remove_workers= true;
9033 }
9034 /*
9035 We cannot specify auto position and set either the coordinates
9036 on master or slave. If we try to do so, an error message is
9037 printed out.
9038 */
9039 if (lex_mi->log_file_name != NULL || lex_mi->pos != 0 ||
9040 lex_mi->relay_log_name != NULL || lex_mi->relay_log_pos != 0)
9041 {
9042 if (lex_mi->auto_position == LEX_MASTER_INFO::LEX_MI_ENABLE ||
9043 (lex_mi->auto_position != LEX_MASTER_INFO::LEX_MI_DISABLE &&
9044 mi->is_auto_position()))
9045 {
9046 my_message(ER_BAD_SLAVE_AUTO_POSITION,
9047 ER(ER_BAD_SLAVE_AUTO_POSITION), MYF(0));
9048 ret= true;
9049 goto err;
9050 }
9051 }
9052
9053 // CHANGE MASTER TO MASTER_AUTO_POSITION = 1 requires GTID_MODE = ON
9054 if (lex_mi->auto_position == LEX_MASTER_INFO::LEX_MI_ENABLE && gtid_mode != 3)
9055 {
9056 my_message(ER_AUTO_POSITION_REQUIRES_GTID_MODE_ON,
9057 ER(ER_AUTO_POSITION_REQUIRES_GTID_MODE_ON), MYF(0));
9058 ret= true;
9059 goto err;
9060 }
9061
9062 /*
9063 Data lock not needed since we have already stopped the running threads,
9064 and we have the hold on the run locks which will keep all threads that
9065 could possibly modify the data structures from running
9066 */
9067
9068 /*
9069 Before processing the command, save the previous state.
9070 */
9071 strmake(saved_host, mi->host, HOSTNAME_LENGTH);
9072 strmake(saved_bind_addr, mi->bind_addr, HOSTNAME_LENGTH);
9073 saved_port= mi->port;
9074 strmake(saved_log_name, mi->get_master_log_name(), FN_REFLEN - 1);
9075 saved_log_pos= mi->get_master_log_pos();
9076
9077 /*
9078 If the user specified host or port without binlog or position,
9079 reset binlog's name to FIRST and position to 4.
9080 */
9081
9082 if ((lex_mi->host && strcmp(lex_mi->host, mi->host)) ||
9083 (lex_mi->port && lex_mi->port != mi->port))
9084 {
9085 /*
9086 This is necessary because the primary key, i.e. host or port, has
9087 changed.
9088
9089 The repository does not support direct changes on the primary key,
9090 so the row is dropped and re-inserted with a new primary key. If we
9091 don't do that, the master info repository we will end up with several
9092 rows.
9093 */
9094 if (mi->clean_info())
9095 {
9096 ret= true;
9097 goto err;
9098 }
9099 mi->master_uuid[0]= 0;
9100 mi->master_id= 0;
9101 }
9102
9103 if ((lex_mi->host || lex_mi->port) && !lex_mi->log_file_name && !lex_mi->pos)
9104 {
9105 var_master_log_name= const_cast<char*>(mi->get_master_log_name());
9106 var_master_log_name[0]= '\0';
9107 mi->set_master_log_pos(BIN_LOG_HEADER_SIZE);
9108 }
9109
9110 if (lex_mi->log_file_name)
9111 mi->set_master_log_name(lex_mi->log_file_name);
9112 if (lex_mi->pos)
9113 {
9114 mi->set_master_log_pos(lex_mi->pos);
9115 }
9116 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
9117
9118 if (lex_mi->user || lex_mi->password)
9119 {
9120 #if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)
9121 if (thd->vio_ok() && !thd->net.vio->ssl_arg)
9122 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
9123 ER_INSECURE_PLAIN_TEXT,
9124 ER(ER_INSECURE_PLAIN_TEXT));
9125 #endif
9126 #if !defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)
9127 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
9128 ER_INSECURE_PLAIN_TEXT,
9129 ER(ER_INSECURE_PLAIN_TEXT));
9130 #endif
9131 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
9132 ER_INSECURE_CHANGE_MASTER,
9133 ER(ER_INSECURE_CHANGE_MASTER));
9134 }
9135
9136 if (lex_mi->user)
9137 mi->set_user(lex_mi->user);
9138
9139 if (lex_mi->password)
9140 {
9141 if (mi->set_password(lex_mi->password, strlen(lex_mi->password)))
9142 {
9143 /*
9144 After implementing WL#5769, we should create a better error message
9145 to denote that the call may have failed due to an error while trying
9146 to encrypt/store the password in a secure key store.
9147 */
9148 my_message(ER_MASTER_INFO, ER(ER_MASTER_INFO), MYF(0));
9149 ret= false;
9150 goto err;
9151 }
9152 }
9153 if (lex_mi->host)
9154 strmake(mi->host, lex_mi->host, sizeof(mi->host)-1);
9155 if (lex_mi->bind_addr)
9156 strmake(mi->bind_addr, lex_mi->bind_addr, sizeof(mi->bind_addr)-1);
9157 if (lex_mi->port)
9158 mi->port = lex_mi->port;
9159 if (lex_mi->connect_retry)
9160 mi->connect_retry = lex_mi->connect_retry;
9161 if (lex_mi->retry_count_opt != LEX_MASTER_INFO::LEX_MI_UNCHANGED)
9162 mi->retry_count = lex_mi->retry_count;
9163 if (lex_mi->heartbeat_opt != LEX_MASTER_INFO::LEX_MI_UNCHANGED)
9164 mi->heartbeat_period = lex_mi->heartbeat_period;
9165 else
9166 mi->heartbeat_period= min<float>(SLAVE_MAX_HEARTBEAT_PERIOD,
9167 (slave_net_timeout/2.0));
9168 mi->received_heartbeats= LL(0); // counter lives until master is CHANGEd
9169 /*
9170 reset the last time server_id list if the current CHANGE MASTER
9171 is mentioning IGNORE_SERVER_IDS= (...)
9172 */
9173 if (lex_mi->repl_ignore_server_ids_opt == LEX_MASTER_INFO::LEX_MI_ENABLE)
9174 reset_dynamic(&(mi->ignore_server_ids->dynamic_ids));
9175 for (uint i= 0; i < lex_mi->repl_ignore_server_ids.elements; i++)
9176 {
9177 ulong s_id;
9178 get_dynamic(&lex_mi->repl_ignore_server_ids, (uchar*) &s_id, i);
9179 if (s_id == ::server_id && replicate_same_server_id)
9180 {
9181 my_error(ER_SLAVE_IGNORE_SERVER_IDS, MYF(0), static_cast<int>(s_id));
9182 ret= TRUE;
9183 goto err;
9184 }
9185 else
9186 {
9187 if (bsearch((const ulong *) &s_id,
9188 mi->ignore_server_ids->dynamic_ids.buffer,
9189 mi->ignore_server_ids->dynamic_ids.elements, sizeof(ulong),
9190 (int (*) (const void*, const void*))
9191 change_master_server_id_cmp) == NULL)
9192 insert_dynamic(&(mi->ignore_server_ids->dynamic_ids), (uchar*) &s_id);
9193 }
9194 }
9195 sort_dynamic(&(mi->ignore_server_ids->dynamic_ids), (qsort_cmp) change_master_server_id_cmp);
9196
9197 if (lex_mi->ssl != LEX_MASTER_INFO::LEX_MI_UNCHANGED)
9198 mi->ssl= (lex_mi->ssl == LEX_MASTER_INFO::LEX_MI_ENABLE);
9199
9200 if (lex_mi->sql_delay != -1)
9201 mi->rli->set_sql_delay(lex_mi->sql_delay);
9202
9203 if (lex_mi->ssl_verify_server_cert != LEX_MASTER_INFO::LEX_MI_UNCHANGED)
9204 mi->ssl_verify_server_cert=
9205 (lex_mi->ssl_verify_server_cert == LEX_MASTER_INFO::LEX_MI_ENABLE);
9206
9207 if (lex_mi->ssl_ca)
9208 strmake(mi->ssl_ca, lex_mi->ssl_ca, sizeof(mi->ssl_ca)-1);
9209 if (lex_mi->ssl_capath)
9210 strmake(mi->ssl_capath, lex_mi->ssl_capath, sizeof(mi->ssl_capath)-1);
9211 if (lex_mi->ssl_cert)
9212 strmake(mi->ssl_cert, lex_mi->ssl_cert, sizeof(mi->ssl_cert)-1);
9213 if (lex_mi->ssl_cipher)
9214 strmake(mi->ssl_cipher, lex_mi->ssl_cipher, sizeof(mi->ssl_cipher)-1);
9215 if (lex_mi->ssl_key)
9216 strmake(mi->ssl_key, lex_mi->ssl_key, sizeof(mi->ssl_key)-1);
9217 if (lex_mi->ssl_crl)
9218 strmake(mi->ssl_crl, lex_mi->ssl_crl, sizeof(mi->ssl_crl)-1);
9219 if (lex_mi->ssl_crlpath)
9220 strmake(mi->ssl_crlpath, lex_mi->ssl_crlpath, sizeof(mi->ssl_crlpath)-1);
9221 #ifndef HAVE_OPENSSL
9222 if (lex_mi->ssl || lex_mi->ssl_ca || lex_mi->ssl_capath ||
9223 lex_mi->ssl_cert || lex_mi->ssl_cipher || lex_mi->ssl_key ||
9224 lex_mi->ssl_verify_server_cert || lex_mi->ssl_crl || lex_mi->ssl_crlpath)
9225 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
9226 ER_SLAVE_IGNORED_SSL_PARAMS, ER(ER_SLAVE_IGNORED_SSL_PARAMS));
9227 #endif
9228
9229 if (lex_mi->relay_log_name)
9230 {
9231 need_relay_log_purge= 0;
9232 char relay_log_name[FN_REFLEN];
9233
9234 mi->rli->relay_log.make_log_name(relay_log_name, lex_mi->relay_log_name);
9235 mi->rli->set_group_relay_log_name(relay_log_name);
9236 mi->rli->set_event_relay_log_name(relay_log_name);
9237 }
9238
9239 if (lex_mi->relay_log_pos)
9240 {
9241 need_relay_log_purge= 0;
9242 mi->rli->set_group_relay_log_pos(lex_mi->relay_log_pos);
9243 mi->rli->set_event_relay_log_pos(lex_mi->relay_log_pos);
9244 }
9245
9246 /*
9247 If user did specify neither host nor port nor any log name nor any log
9248 pos, i.e. he specified only user/password/master_connect_retry, he probably
9249 wants replication to resume from where it had left, i.e. from the
9250 coordinates of the **SQL** thread (imagine the case where the I/O is ahead
9251 of the SQL; restarting from the coordinates of the I/O would lose some
9252 events which is probably unwanted when you are just doing minor changes
9253 like changing master_connect_retry).
9254 A side-effect is that if only the I/O thread was started, this thread may
9255 restart from ''/4 after the CHANGE MASTER. That's a minor problem (it is a
9256 much more unlikely situation than the one we are fixing here).
9257 Note: coordinates of the SQL thread must be read here, before the
9258 'if (need_relay_log_purge)' block which resets them.
9259 */
9260 if (!lex_mi->host && !lex_mi->port &&
9261 !lex_mi->log_file_name && !lex_mi->pos &&
9262 need_relay_log_purge)
9263 {
9264 /*
9265 Sometimes mi->rli->master_log_pos == 0 (it happens when the SQL thread is
9266 not initialized), so we use a max().
9267 What happens to mi->rli->master_log_pos during the initialization stages
9268 of replication is not 100% clear, so we guard against problems using
9269 max().
9270 */
9271 mi->set_master_log_pos(max<ulonglong>(BIN_LOG_HEADER_SIZE,
9272 mi->rli->get_group_master_log_pos()));
9273 mi->set_master_log_name(mi->rli->get_group_master_log_name());
9274 }
9275
9276 /*
9277 Sets if the slave should connect to the master and look for
9278 GTIDs.
9279 */
9280 if (lex_mi->auto_position != LEX_MASTER_INFO::LEX_MI_UNCHANGED)
9281 mi->set_auto_position(
9282 (lex_mi->auto_position == LEX_MASTER_INFO::LEX_MI_ENABLE));
9283
9284 /*
9285 Relay log's IO_CACHE may not be inited, if rli->inited==0 (server was never
9286 a slave before).
9287 */
9288 if (flush_master_info(mi, true))
9289 {
9290 my_error(ER_RELAY_LOG_INIT, MYF(0), "Failed to flush master info file");
9291 ret= TRUE;
9292 goto err;
9293 }
9294 if (need_relay_log_purge)
9295 {
9296 relay_log_purge= 1;
9297 THD_STAGE_INFO(thd, stage_purging_old_relay_logs);
9298 if (mi->rli->purge_relay_logs(thd,
9299 0 /* not only reset, but also reinit */,
9300 &errmsg))
9301 {
9302 my_error(ER_RELAY_LOG_FAIL, MYF(0), errmsg);
9303 ret= TRUE;
9304 goto err;
9305 }
9306 }
9307 else
9308 {
9309 const char* msg;
9310 relay_log_purge= 0;
9311 /* Relay log is already initialized */
9312
9313 if (mi->rli->init_relay_log_pos(mi->rli->get_group_relay_log_name(),
9314 mi->rli->get_group_relay_log_pos(),
9315 true/*need_data_lock=true*/,
9316 &msg, 0))
9317 {
9318 my_error(ER_RELAY_LOG_INIT, MYF(0), msg);
9319 ret= TRUE;
9320 goto err;
9321 }
9322 }
9323 relay_log_purge= save_relay_log_purge;
9324
9325 if (!thd->backup_binlog_lock.is_acquired())
9326 {
9327 const ulong timeout= thd->variables.lock_wait_timeout;
9328
9329 DBUG_PRINT("debug", ("Acquiring binlog protection lock"));
9330 mysql_mutex_assert_not_owner(&mi->rli->data_lock);
9331 if (thd->backup_binlog_lock.acquire_protection(thd, MDL_EXPLICIT,
9332 timeout))
9333 {
9334 ret= true;
9335 goto err;
9336 }
9337
9338 binlog_prot_acquired= true;
9339 }
9340
9341 /*
9342 Coordinates in rli were spoilt by the 'if (need_relay_log_purge)' block,
9343 so restore them to good values. If we left them to ''/0, that would work;
9344 but that would fail in the case of 2 successive CHANGE MASTER (without a
9345 START SLAVE in between): because first one would set the coords in mi to
9346 the good values of those in rli, the set those in rli to ''/0, then
9347 second CHANGE MASTER would set the coords in mi to those of rli, i.e. to
9348 ''/0: we have lost all copies of the original good coordinates.
9349 That's why we always save good coords in rli.
9350 */
9351 if (need_relay_log_purge)
9352 {
9353 mi->rli->set_group_master_log_pos(mi->get_master_log_pos());
9354 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
9355 mi->rli->set_group_master_log_name(mi->get_master_log_name());
9356 }
9357 var_group_master_log_name= const_cast<char *>(mi->rli->get_group_master_log_name());
9358 if (!var_group_master_log_name[0]) // uninitialized case
9359 mi->rli->set_group_master_log_pos(0);
9360
9361 mysql_mutex_lock(&mi->rli->data_lock);
9362 mi->rli->abort_pos_wait++; /* for MASTER_POS_WAIT() to abort */
9363 /* Clear the errors, for a clean start */
9364 mi->rli->clear_error();
9365 mi->rli->clear_until_condition();
9366
9367 sql_print_information("'CHANGE MASTER TO executed'. "
9368 "Previous state master_host='%s', master_port= %u, master_log_file='%s', "
9369 "master_log_pos= %ld, master_bind='%s'. "
9370 "New state master_host='%s', master_port= %u, master_log_file='%s', "
9371 "master_log_pos= %ld, master_bind='%s'.",
9372 saved_host, saved_port, saved_log_name, (ulong) saved_log_pos,
9373 saved_bind_addr, mi->host, mi->port, mi->get_master_log_name(),
9374 (ulong) mi->get_master_log_pos(), mi->bind_addr);
9375
9376 /*
9377 If we don't write new coordinates to disk now, then old will remain in
9378 relay-log.info until START SLAVE is issued; but if mysqld is shutdown
9379 before START SLAVE, then old will remain in relay-log.info, and will be the
9380 in-memory value at restart (thus causing errors, as the old relay log does
9381 not exist anymore).
9382
9383 Notice that the rli table is available exclusively as slave is not
9384 running.
9385 */
9386 DBUG_ASSERT(!mi->rli->slave_running);
9387 if ((ret= mi->rli->flush_info(true)))
9388 my_error(ER_RELAY_LOG_INIT, MYF(0), "Failed to flush relay info file.");
9389 mysql_cond_broadcast(&mi->data_cond);
9390 mysql_mutex_unlock(&mi->rli->data_lock);
9391
9392 err:
9393
9394 if (binlog_prot_acquired)
9395 {
9396 DBUG_PRINT("debug", ("Releasing binlog protection lock"));
9397 thd->backup_binlog_lock.release_protection(thd);
9398 }
9399
9400 unlock_slave_threads(mi);
9401 if (ret == FALSE)
9402 {
9403 if (!mts_remove_workers)
9404 my_ok(thd);
9405 else
9406 if (!Rpl_info_factory::reset_workers(mi->rli))
9407 my_ok(thd);
9408 else
9409 my_error(ER_MTS_RESET_WORKERS, MYF(0));
9410 }
9411 DBUG_RETURN(ret);
9412 }
9413 /**
9414 @} (end of group Replication)
9415 */
9416 #endif /* HAVE_REPLICATION */
9417