1 /* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software Foundation,
21 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */
22
23
24 /**
25 @addtogroup Replication
26 @{
27
28 @file
29
30 @brief Code to run the io thread and the sql thread on the
31 replication slave.
32 */
33
34 #include "sql_priv.h"
35 #include "my_global.h"
36 #include "rpl_slave.h"
37 #include "sql_parse.h" // execute_init_command
38 #include "sql_table.h" // mysql_rm_table
39 #include "rpl_mi.h"
40 #include "rpl_rli.h"
41 #include "rpl_filter.h"
42 #include "rpl_info_factory.h"
43 #include "transaction.h"
44 #include <thr_alarm.h>
45 #include <my_dir.h>
46 #include <sql_common.h>
47 #include <errmsg.h>
48 #include <mysqld_error.h>
49 #include <mysys_err.h>
50 #include "rpl_handler.h"
51 #include "rpl_info_dummy.h"
52 #include <signal.h>
53 #include <mysql.h>
54 #include <myisam.h>
55
56 #include "sql_base.h" // close_thread_tables
57 #include "tztime.h" // struct Time_zone
58 #include "log_event.h" // Rotate_log_event,
59 // Create_file_log_event,
60 // Format_description_log_event
61 #include "dynamic_ids.h"
62 #include "rpl_rli_pdb.h"
63 #include "global_threads.h"
64
65 #ifdef WITH_WSREP
66 #include "wsrep_mysqld.h"
67 #include "wsrep_thd.h"
68 #endif
69 #ifdef HAVE_REPLICATION
70
71 #include "rpl_tblmap.h"
72 #include "debug_sync.h"
73
74 using std::min;
75 using std::max;
76
77 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
78
79 #define MAX_SLAVE_RETRY_PAUSE 5
80 /*
81 a parameter of sql_slave_killed() to defer the killed status
82 */
83 #define SLAVE_WAIT_GROUP_DONE 60
84 bool use_slave_mask = 0;
85 MY_BITMAP slave_error_mask;
86 char slave_skip_error_names[SHOW_VAR_FUNC_BUFF_SIZE];
87
88 static unsigned long stop_wait_timeout;
89 char* slave_load_tmpdir = 0;
90 Master_info *active_mi= 0;
91 my_bool replicate_same_server_id;
92 ulonglong relay_log_space_limit = 0;
93
94 const char *relay_log_index= 0;
95 const char *relay_log_basename= 0;
96
97 /*
98 MTS load-ballancing parameter.
99 Max length of one MTS Worker queue. The value also determines the size
100 of Relay_log_info::gaq (see @c slave_start_workers()).
101 It can be set to any value in [1, ULONG_MAX - 1] range.
102 */
103 const ulong mts_slave_worker_queue_len_max= 16384;
104
105 /*
106 Statistics go to the error log every # of seconds when --log-warnings > 1
107 */
108 const long mts_online_stat_period= 60 * 2;
109
110
111 /*
112 MTS load-ballancing parameter.
113 Time unit in microsecs to sleep by MTS Coordinator to avoid extra thread
114 signalling in the case of Worker queues are close to be filled up.
115 */
116 const ulong mts_coordinator_basic_nap= 5;
117
118 /*
119 MTS load-ballancing parameter.
120 Percent of Worker queue size at which Worker is considered to become
121 hungry.
122
123 C enqueues --+ . underrun level
124 V "
125 +----------+-+------------------+--------------+
126 | empty |.|::::::::::::::::::|xxxxxxxxxxxxxx| ---> Worker dequeues
127 +----------+-+------------------+--------------+
128
129 Like in the above diagram enqueuing to the x-d area would indicate
130 actual underrruning by Worker.
131 */
132 const ulong mts_worker_underrun_level= 10;
133
134 Slave_job_item * de_queue(Slave_jobs_queue *jobs, Slave_job_item *ret);
135 bool append_item_to_jobs(slave_job_item *job_item,
136 Slave_worker *w, Relay_log_info *rli);
137
138 /*
139 When slave thread exits, we need to remember the temporary tables so we
140 can re-use them on slave start.
141
142 TODO: move the vars below under Master_info
143 */
144
145 int disconnect_slave_event_count = 0, abort_slave_event_count = 0;
146
147 static pthread_key(Master_info*, RPL_MASTER_INFO);
148
149 enum enum_slave_reconnect_actions
150 {
151 SLAVE_RECON_ACT_REG= 0,
152 SLAVE_RECON_ACT_DUMP= 1,
153 SLAVE_RECON_ACT_EVENT= 2,
154 SLAVE_RECON_ACT_MAX
155 };
156
157 enum enum_slave_reconnect_messages
158 {
159 SLAVE_RECON_MSG_WAIT= 0,
160 SLAVE_RECON_MSG_KILLED_WAITING= 1,
161 SLAVE_RECON_MSG_AFTER= 2,
162 SLAVE_RECON_MSG_FAILED= 3,
163 SLAVE_RECON_MSG_COMMAND= 4,
164 SLAVE_RECON_MSG_KILLED_AFTER= 5,
165 SLAVE_RECON_MSG_MAX
166 };
167
168 static const char *reconnect_messages[SLAVE_RECON_ACT_MAX][SLAVE_RECON_MSG_MAX]=
169 {
170 {
171 "Waiting to reconnect after a failed registration on master",
172 "Slave I/O thread killed while waitnig to reconnect after a failed \
173 registration on master",
174 "Reconnecting after a failed registration on master",
175 "failed registering on master, reconnecting to try again, \
176 log '%s' at position %s",
177 "COM_REGISTER_SLAVE",
178 "Slave I/O thread killed during or after reconnect"
179 },
180 {
181 "Waiting to reconnect after a failed binlog dump request",
182 "Slave I/O thread killed while retrying master dump",
183 "Reconnecting after a failed binlog dump request",
184 "failed dump request, reconnecting to try again, log '%s' at position %s",
185 "COM_BINLOG_DUMP",
186 "Slave I/O thread killed during or after reconnect"
187 },
188 {
189 "Waiting to reconnect after a failed master event read",
190 "Slave I/O thread killed while waiting to reconnect after a failed read",
191 "Reconnecting after a failed master event read",
192 "Slave I/O thread: Failed reading log event, reconnecting to retry, \
193 log '%s' at position %s",
194 "",
195 "Slave I/O thread killed during or after a reconnect done to recover from \
196 failed read"
197 }
198 };
199
200 enum enum_slave_apply_event_and_update_pos_retval
201 {
202 SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK= 0,
203 SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPLY_ERROR= 1,
204 SLAVE_APPLY_EVENT_AND_UPDATE_POS_UPDATE_POS_ERROR= 2,
205 SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPEND_JOB_ERROR= 3,
206 SLAVE_APPLY_EVENT_AND_UPDATE_POS_MAX
207 };
208
209
210 static int process_io_rotate(Master_info* mi, Rotate_log_event* rev);
211 static int process_io_create_file(Master_info* mi, Create_file_log_event* cev);
212 static bool wait_for_relay_log_space(Relay_log_info* rli);
213 static inline bool io_slave_killed(THD* thd,Master_info* mi);
214 static inline bool sql_slave_killed(THD* thd,Relay_log_info* rli);
215 static inline bool is_autocommit_off_and_infotables(THD* thd);
216 static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type);
217 static void print_slave_skip_errors(void);
218 static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi);
219 static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
220 bool suppress_warnings);
221 static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
222 bool reconnect, bool suppress_warnings);
223 static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi);
224 static int get_master_uuid(MYSQL *mysql, Master_info *mi);
225 int io_thread_init_commands(MYSQL *mysql, Master_info *mi);
226 static Log_event* next_event(Relay_log_info* rli);
227 static int queue_event(Master_info* mi,const char* buf,ulong event_len);
228 static void set_stop_slave_wait_timeout(unsigned long wait_timeout);
229 static int terminate_slave_thread(THD *thd,
230 mysql_mutex_t *term_lock,
231 mysql_cond_t *term_cond,
232 volatile uint *slave_running,
233 bool need_lock_term);
234 static bool check_io_slave_killed(THD *thd, Master_info *mi, const char *info);
235 int slave_worker_exec_job(Slave_worker * w, Relay_log_info *rli);
236 static int mts_event_coord_cmp(LOG_POS_COORD *id1, LOG_POS_COORD *id2);
237 /*
238 Function to set the slave's max_allowed_packet based on the value
239 of slave_max_allowed_packet.
240
241 @in_param thd Thread handler for slave
242 @in_param mysql MySQL connection handle
243 */
244
set_slave_max_allowed_packet(THD * thd,MYSQL * mysql)245 static void set_slave_max_allowed_packet(THD *thd, MYSQL *mysql)
246 {
247 DBUG_ENTER("set_slave_max_allowed_packet");
248 // thd and mysql must be valid
249 DBUG_ASSERT(thd && mysql);
250
251 thd->variables.max_allowed_packet= slave_max_allowed_packet;
252 thd->net.max_packet_size= slave_max_allowed_packet;
253 /*
254 Adding MAX_LOG_EVENT_HEADER_LEN to the max_packet_size on the I/O
255 thread and the mysql->option max_allowed_packet, since a
256 replication event can become this much larger than
257 the corresponding packet (query) sent from client to master.
258 */
259 thd->net.max_packet_size+= MAX_LOG_EVENT_HEADER;
260 /*
261 Skipping the setting of mysql->net.max_packet size to slave
262 max_allowed_packet since this is done during mysql_real_connect.
263 */
264 mysql->options.max_allowed_packet=
265 slave_max_allowed_packet+MAX_LOG_EVENT_HEADER;
266 DBUG_VOID_RETURN;
267 }
268
269 /*
270 Find out which replications threads are running
271
272 SYNOPSIS
273 init_thread_mask()
274 mask Return value here
275 mi master_info for slave
276 inverse If set, returns which threads are not running
277
278 IMPLEMENTATION
279 Get a bit mask for which threads are running so that we can later restart
280 these threads.
281
282 RETURN
283 mask If inverse == 0, running threads
284 If inverse == 1, stopped threads
285 */
286
init_thread_mask(int * mask,Master_info * mi,bool inverse)287 void init_thread_mask(int* mask, Master_info* mi, bool inverse)
288 {
289 bool set_io = mi->slave_running, set_sql = mi->rli->slave_running;
290 register int tmp_mask=0;
291 DBUG_ENTER("init_thread_mask");
292
293 if (set_io)
294 tmp_mask |= SLAVE_IO;
295 if (set_sql)
296 tmp_mask |= SLAVE_SQL;
297 if (inverse)
298 tmp_mask^= (SLAVE_IO | SLAVE_SQL);
299 *mask = tmp_mask;
300 DBUG_VOID_RETURN;
301 }
302
303
304 /*
305 lock_slave_threads()
306 */
307
lock_slave_threads(Master_info * mi)308 void lock_slave_threads(Master_info* mi)
309 {
310 DBUG_ENTER("lock_slave_threads");
311
312 //TODO: see if we can do this without dual mutex
313 mysql_mutex_lock(&mi->run_lock);
314 mysql_mutex_lock(&mi->rli->run_lock);
315 DBUG_VOID_RETURN;
316 }
317
318
319 /*
320 unlock_slave_threads()
321 */
322
unlock_slave_threads(Master_info * mi)323 void unlock_slave_threads(Master_info* mi)
324 {
325 DBUG_ENTER("unlock_slave_threads");
326
327 //TODO: see if we can do this without dual mutex
328 mysql_mutex_unlock(&mi->rli->run_lock);
329 mysql_mutex_unlock(&mi->run_lock);
330 DBUG_VOID_RETURN;
331 }
332
333 #ifdef HAVE_PSI_INTERFACE
334 static PSI_thread_key key_thread_slave_io, key_thread_slave_sql, key_thread_slave_worker;
335
336 static PSI_thread_info all_slave_threads[]=
337 {
338 { &key_thread_slave_io, "slave_io", PSI_FLAG_GLOBAL},
339 { &key_thread_slave_sql, "slave_sql", PSI_FLAG_GLOBAL},
340 { &key_thread_slave_worker, "slave_worker", PSI_FLAG_GLOBAL}
341 };
342
init_slave_psi_keys(void)343 static void init_slave_psi_keys(void)
344 {
345 const char* category= "sql";
346 int count;
347
348 count= array_elements(all_slave_threads);
349 mysql_thread_register(category, all_slave_threads, count);
350 }
351 #endif /* HAVE_PSI_INTERFACE */
352
353 /* Initialize slave structures */
354
init_slave()355 int init_slave()
356 {
357 DBUG_ENTER("init_slave");
358 int error= 0;
359 int thread_mask= SLAVE_SQL | SLAVE_IO;
360 Relay_log_info* rli= NULL;
361
362 #ifdef HAVE_PSI_INTERFACE
363 init_slave_psi_keys();
364 #endif
365
366 /*
367 This is called when mysqld starts. Before client connections are
368 accepted. However bootstrap may conflict with us if it does START SLAVE.
369 So it's safer to take the lock.
370 */
371 mysql_mutex_lock(&LOCK_active_mi);
372
373 if (pthread_key_create(&RPL_MASTER_INFO, NULL))
374 DBUG_RETURN(1);
375
376 if ((error= Rpl_info_factory::create_coordinators(opt_mi_repository_id, &active_mi,
377 opt_rli_repository_id, &rli)))
378 {
379 sql_print_error("Failed to create or recover replication info repository.");
380 error= 1;
381 goto err;
382 }
383
384 /*
385 This is the startup routine and as such we try to
386 configure both the SLAVE_SQL and SLAVE_IO.
387 */
388 if (global_init_info(active_mi, true, thread_mask))
389 {
390 sql_print_error("Failed to initialize the master info structure");
391 error= 1;
392 goto err;
393 }
394
395 DBUG_PRINT("info", ("init group master %s %lu group relay %s %lu event %s %lu\n",
396 rli->get_group_master_log_name(),
397 (ulong) rli->get_group_master_log_pos(),
398 rli->get_group_relay_log_name(),
399 (ulong) rli->get_group_relay_log_pos(),
400 rli->get_event_relay_log_name(),
401 (ulong) rli->get_event_relay_log_pos()));
402
403 /* If server id is not set, start_slave_thread() will say it */
404 if (active_mi->host[0] && !opt_skip_slave_start)
405 {
406 /* same as in start_slave() cache the global var values into rli's members */
407 active_mi->rli->opt_slave_parallel_workers= opt_mts_slave_parallel_workers;
408 active_mi->rli->checkpoint_group= opt_mts_checkpoint_group;
409 if (start_slave_threads(true/*need_lock_slave=true*/,
410 false/*wait_for_start=false*/,
411 active_mi,
412 thread_mask))
413 {
414 sql_print_error("Failed to create slave threads");
415 error= 1;
416 goto err;
417 }
418 }
419
420 err:
421 mysql_mutex_unlock(&LOCK_active_mi);
422 if (error)
423 sql_print_information("Check error log for additional messages. "
424 "You will not be able to start replication until "
425 "the issue is resolved and the server restarted.");
426 DBUG_RETURN(error);
427 }
428
429 /**
430 Parse the given relay log and identify the rotate event from the master.
431 Ignore the Format description event, Previous_gtid log event and ignorable
432 events within the relay log. When a rotate event is found check if it is a
433 rotate that is originated from the master or not based on the server_id. If
434 the rotate is from slave or if it is a fake rotate event ignore the event.
435 If any other events are encountered apart from the above events generate an
436 error. From the rotate event extract the master's binary log name and
437 position.
438
439 @param filename
440 Relay log name which needs to be parsed.
441
442 @param[OUT] master_log_file
443 Set the master_log_file to the log file name that is extracted from
444 rotate event. The master_log_file should contain string of len
445 FN_REFLEN.
446
447 @param[OUT] master_log_pos
448 Set the master_log_pos to the log position extracted from rotate
449 event.
450
451 @retval FOUND_ROTATE: When rotate event is found in the relay log
452 @retval NOT_FOUND_ROTATE: When rotate event is not found in the relay log
453 @retval ERROR: On error
454 */
455 enum enum_read_rotate_from_relay_log_status
456 { FOUND_ROTATE, NOT_FOUND_ROTATE, ERROR };
457
458 static enum_read_rotate_from_relay_log_status
read_rotate_from_relay_log(char * filename,char * master_log_file,my_off_t * master_log_pos)459 read_rotate_from_relay_log(char *filename, char *master_log_file,
460 my_off_t *master_log_pos)
461 {
462 DBUG_ENTER("read_rotate_from_relay_log");
463 /*
464 Create a Format_description_log_event that is used to read the
465 first event of the log.
466 */
467 Format_description_log_event fd_ev(BINLOG_VERSION), *fd_ev_p= &fd_ev;
468 DBUG_ASSERT(fd_ev.is_valid());
469 IO_CACHE log;
470 const char *errmsg= NULL;
471 File file= open_binlog_file(&log, filename, &errmsg);
472 if (file < 0)
473 {
474 sql_print_error("Error during --relay-log-recovery: %s", errmsg);
475 DBUG_RETURN(ERROR);
476 }
477 my_b_seek(&log, BIN_LOG_HEADER_SIZE);
478 Log_event *ev= NULL;
479 bool done= false;
480 enum_read_rotate_from_relay_log_status ret= NOT_FOUND_ROTATE;
481 while (!done &&
482 (ev= Log_event::read_log_event(&log, 0, fd_ev_p, opt_slave_sql_verify_checksum)) !=
483 NULL)
484 {
485 DBUG_PRINT("info", ("Read event of type %s", ev->get_type_str()));
486 switch (ev->get_type_code())
487 {
488 case FORMAT_DESCRIPTION_EVENT:
489 if (fd_ev_p != &fd_ev)
490 delete fd_ev_p;
491 fd_ev_p= (Format_description_log_event *)ev;
492 break;
493 case ROTATE_EVENT:
494 /*
495 Check for rotate event from the master. Ignore the ROTATE event if it
496 is a fake rotate event with server_id=0.
497 */
498 if (ev->server_id && ev->server_id != ::server_id)
499 {
500 Rotate_log_event *rotate_ev= (Rotate_log_event *)ev;
501 DBUG_ASSERT(FN_REFLEN >= rotate_ev->ident_len + 1);
502 memcpy(master_log_file, rotate_ev->new_log_ident, rotate_ev->ident_len + 1);
503 *master_log_pos= rotate_ev->pos;
504 ret= FOUND_ROTATE;
505 done= true;
506 }
507 break;
508 case PREVIOUS_GTIDS_LOG_EVENT:
509 break;
510 case IGNORABLE_LOG_EVENT:
511 break;
512 default:
513 sql_print_error("Error during --relay-log-recovery: Could not locate "
514 "rotate event from the master.");
515 ret= ERROR;
516 done= true;
517 break;
518 }
519 if (ev != fd_ev_p)
520 delete ev;
521 }
522 if (log.error < 0)
523 {
524 sql_print_error("Error during --relay-log-recovery: Error reading events from relay log: %d",
525 log.error);
526 DBUG_RETURN(ERROR);
527 }
528
529 if (fd_ev_p != &fd_ev)
530 {
531 delete fd_ev_p;
532 fd_ev_p= &fd_ev;
533 }
534
535 if (mysql_file_close(file, MYF(MY_WME)))
536 DBUG_RETURN(ERROR);
537 if (end_io_cache(&log))
538 {
539 sql_print_error("Error during --relay-log-recovery: Error while freeing "
540 "IO_CACHE object");
541 DBUG_RETURN(ERROR);
542 }
543 DBUG_RETURN(ret);
544 }
545
546 /**
547 Reads relay logs one by one starting from the first relay log. Looks for
548 the first rotate event from the master. If rotate is not found in the relay
549 log search continues to next relay log. If rotate event from master is
550 found then the extracted master_log_file and master_log_pos are used to set
551 rli->group_master_log_name and rli->group_master_log_pos. If an error has
552 occurred the error code is retuned back.
553
554 @param rli
555 Relay_log_info object to read relay log files and to set
556 group_master_log_name and group_master_log_pos.
557
558 @retval 0 On success
559 @retval 1 On failure
560 */
561 static int
find_first_relay_log_with_rotate_from_master(Relay_log_info * rli)562 find_first_relay_log_with_rotate_from_master(Relay_log_info* rli)
563 {
564 DBUG_ENTER("find_first_relay_log_with_rotate_from_master");
565 int error= 0;
566 LOG_INFO linfo;
567 bool got_rotate_from_master= false;
568 int pos;
569 char master_log_file[FN_REFLEN];
570 my_off_t master_log_pos= 0;
571
572 for (pos= rli->relay_log.find_log_pos(&linfo, NULL, true);
573 !pos;
574 pos= rli->relay_log.find_next_log(&linfo, true))
575 {
576 switch (read_rotate_from_relay_log(linfo.log_file_name, master_log_file,
577 &master_log_pos))
578 {
579 case ERROR:
580 error= 1;
581 break;
582 case FOUND_ROTATE:
583 got_rotate_from_master= true;
584 break;
585 case NOT_FOUND_ROTATE:
586 break;
587 }
588 if (error || got_rotate_from_master)
589 break;
590 }
591 if (pos== LOG_INFO_IO)
592 {
593 error= 1;
594 sql_print_error("Error during --relay-log-recovery: Could not read "
595 "relay log index file due to an IO error.");
596 goto err;
597 }
598 if (pos== LOG_INFO_EOF)
599 {
600 error= 1;
601 sql_print_error("Error during --relay-log-recovery: Could not locate "
602 "rotate event from master in relay log file.");
603 goto err;
604 }
605 if (!error && got_rotate_from_master)
606 {
607 rli->set_group_master_log_name(master_log_file);
608 rli->set_group_master_log_pos(master_log_pos);
609 }
610 err:
611 DBUG_RETURN(error);
612 }
613
614 /*
615 Updates the master info based on the information stored in the
616 relay info and ignores relay logs previously retrieved by the IO
617 thread, which thus starts fetching again based on to the
618 master_log_pos and master_log_name. Eventually, the old
619 relay logs will be purged by the normal purge mechanism.
620
621 When GTID's are enabled the "Retrieved GTID" set should be cleared
622 so that partial read events are discarded and they are
623 fetched once again
624
625 @param mi pointer to Master_info instance
626 */
recover_relay_log(Master_info * mi)627 static void recover_relay_log(Master_info *mi)
628 {
629 Relay_log_info *rli=mi->rli;
630 // Set Receiver Thread's positions as per the recovered Applier Thread.
631 mi->set_master_log_pos(max<ulonglong>(BIN_LOG_HEADER_SIZE,
632 rli->get_group_master_log_pos()));
633 mi->set_master_log_name(rli->get_group_master_log_name());
634
635 sql_print_warning("Recovery from master pos %ld and file %s. "
636 "Previous relay log pos and relay log file had "
637 "been set to %lld, %s respectively.",
638 (ulong) mi->get_master_log_pos(), mi->get_master_log_name(),
639 rli->get_group_relay_log_pos(), rli->get_group_relay_log_name());
640
641 // Start with a fresh relay log.
642 rli->set_group_relay_log_name(rli->relay_log.get_log_fname());
643 rli->set_event_relay_log_name(rli->relay_log.get_log_fname());
644 rli->set_group_relay_log_pos(BIN_LOG_HEADER_SIZE);
645 rli->set_event_relay_log_pos(BIN_LOG_HEADER_SIZE);
646 /*
647 Clear the retrieved GTID set so that events that are written partially
648 will be fetched again.
649 */
650 if (gtid_mode == GTID_MODE_ON)
651 {
652 global_sid_lock->wrlock();
653 (const_cast<Gtid_set *>(rli->get_gtid_set()))->clear();
654 global_sid_lock->unlock();
655 }
656 }
657
658
659 /*
660 Updates the master info based on the information stored in the
661 relay info and ignores relay logs previously retrieved by the IO
662 thread, which thus starts fetching again based on to the
663 master_log_pos and master_log_name. Eventually, the old
664 relay logs will be purged by the normal purge mechanism.
665
666 There can be a special case where rli->group_master_log_name and
667 rli->group_master_log_pos are not intialized, as the sql thread was never
668 started at all. In those cases all the existing relay logs are parsed
669 starting from the first one and the initial rotate event that was received
670 from the master is identified. From the rotate event master_log_name and
671 master_log_pos are extracted and they are set to rli->group_master_log_name
672 and rli->group_master_log_pos.
673
674 In the feature, we should improve this routine in order to avoid throwing
675 away logs that are safely stored in the disk. Note also that this recovery
676 routine relies on the correctness of the relay-log.info and only tolerates
677 coordinate problems in master.info.
678
679 In this function, there is no need for a mutex as the caller
680 (i.e. init_slave) already has one acquired.
681
682 Specifically, the following structures are updated:
683
684 1 - mi->master_log_pos <-- rli->group_master_log_pos
685 2 - mi->master_log_name <-- rli->group_master_log_name
686 3 - It moves the relay log to the new relay log file, by
687 rli->group_relay_log_pos <-- BIN_LOG_HEADER_SIZE;
688 rli->event_relay_log_pos <-- BIN_LOG_HEADER_SIZE;
689 rli->group_relay_log_name <-- rli->relay_log.get_log_fname();
690 rli->event_relay_log_name <-- rli->relay_log.get_log_fname();
691
692 If there is an error, it returns (1), otherwise returns (0).
693 */
init_recovery(Master_info * mi,const char ** errmsg)694 int init_recovery(Master_info* mi, const char** errmsg)
695 {
696 DBUG_ENTER("init_recovery");
697
698 int error= 0;
699 Relay_log_info *rli= mi->rli;
700 char *group_master_log_name= NULL;
701
702 if (rli->recovery_parallel_workers)
703 {
704 /*
705 This is not idempotent and a crash after this function and before
706 the recovery is actually done may lead the system to an inconsistent
707 state.
708
709 This may happen because the gap is not persitent stored anywhere
710 and eventually old relay log files will be removed and further
711 calculations on the gaps will be impossible.
712
713 We need to improve this. /Alfranio.
714 */
715 error= mts_recovery_groups(rli);
716 if (rli->mts_recovery_group_cnt)
717 {
718 if (gtid_mode == GTID_MODE_ON)
719 {
720 rli->recovery_parallel_workers= 0;
721 rli->clear_mts_recovery_groups();
722 }
723 else
724 DBUG_RETURN(error);
725 }
726 }
727
728 group_master_log_name= const_cast<char *>(rli->get_group_master_log_name());
729 if (!error)
730 {
731 if (!group_master_log_name[0])
732 {
733 if (rli->replicate_same_server_id)
734 {
735 error= 1;
736 sql_print_error("Error during --relay-log-recovery: "
737 "replicate_same_server_id is in use and sql thread's "
738 "positions are not initialized, hence relay log "
739 "recovery cannot happen.");
740 DBUG_RETURN(error);
741 }
742 error= find_first_relay_log_with_rotate_from_master(rli);
743 if (error)
744 DBUG_RETURN(error);
745 }
746 recover_relay_log(mi);
747 }
748 DBUG_RETURN(error);
749 }
750
751 /*
752 Relay log recovery in the case of MTS, is handled by the following function.
753 Gaps in MTS execution are filled using implicit execution of
754 START SLAVE UNTIL SQL_AFTER_MTS_GAPS call. Once slave reaches a consistent
755 gapless state receiver thread's positions are initialized to applier thread's
756 positions and the old relay logs are discarded. This completes the recovery
757 process.
758
759 @param mi pointer to Master_info instance.
760
761 @retval 0 success
762 @retval 1 error
763 */
fill_mts_gaps_and_recover(Master_info * mi)764 static inline int fill_mts_gaps_and_recover(Master_info* mi)
765 {
766 DBUG_ENTER("fill_mts_gaps_and_recover");
767 Relay_log_info *rli= mi->rli;
768 int recovery_error= 0;
769 rli->is_relay_log_recovery= FALSE;
770 rli->until_condition= Relay_log_info::UNTIL_SQL_AFTER_MTS_GAPS;
771 rli->opt_slave_parallel_workers= rli->recovery_parallel_workers;
772 sql_print_information("MTS recovery: starting coordinator thread to fill MTS "
773 "gaps.");
774 recovery_error= start_slave_thread(
775 #ifdef HAVE_PSI_INTERFACE
776 key_thread_slave_sql,
777 #endif
778 handle_slave_sql, &rli->run_lock,
779 &rli->run_lock,
780 &rli->start_cond,
781 &rli->slave_running,
782 &rli->slave_run_id,
783 mi);
784
785 if (recovery_error)
786 {
787 sql_print_warning("MTS recovery: failed to start the coordinator "
788 "thread. Check the error log for additional"
789 " details.");
790 goto err;
791 }
792 mysql_mutex_lock(&rli->run_lock);
793 mysql_cond_wait(&rli->stop_cond, &rli->run_lock);
794 mysql_mutex_unlock(&rli->run_lock);
795 if (rli->until_condition != Relay_log_info::UNTIL_DONE)
796 {
797 sql_print_warning("MTS recovery: automatic recovery failed. Either the "
798 "slave server had stopped due to an error during an "
799 "earlier session or relay logs are corrupted."
800 "Fix the cause of the slave side error and restart the "
801 "slave server or consider using RESET SLAVE.");
802 goto err;
803 }
804
805 /*
806 We need a mutex while we are changing master info parameters to
807 keep other threads from reading bogus info
808 */
809 mysql_mutex_lock(&mi->data_lock);
810 mysql_mutex_lock(&rli->data_lock);
811 recover_relay_log(mi);
812
813 const char* msg;
814 if (rli->init_relay_log_pos(rli->get_group_relay_log_name(),
815 rli->get_group_relay_log_pos(),
816 false/*need_data_lock=false*/,
817 &msg, 0))
818 {
819 char llbuf[22];
820 sql_print_error("Failed to open the relay log '%s' (relay_log_pos %s).",
821 rli->get_group_relay_log_name(),
822 llstr(rli->get_group_relay_log_pos(), llbuf));
823
824 recovery_error=1;
825 mysql_mutex_unlock(&mi->data_lock);
826 mysql_mutex_unlock(&rli->data_lock);
827 goto err;
828 }
829 if (mi->flush_info(true) || rli->flush_info(true))
830 {
831 recovery_error= 1;
832 mysql_mutex_unlock(&mi->data_lock);
833 mysql_mutex_unlock(&rli->data_lock);
834 goto err;
835 }
836 rli->inited=1;
837 rli->error_on_rli_init_info= false;
838 mysql_mutex_unlock(&mi->data_lock);
839 mysql_mutex_unlock(&rli->data_lock);
840 sql_print_information("MTS recovery: completed successfully.\n");
841 DBUG_RETURN(recovery_error);
842 err:
843 /*
844 If recovery failed means we failed to initialize rli object in the case
845 of MTS. We should not allow the START SLAVE command to work as we do in
846 the case of STS. i.e if init_recovery call fails then we set inited=0.
847 */
848 rli->end_info();
849 rli->inited=0;
850 rli->error_on_rli_init_info= true;
851 DBUG_RETURN(recovery_error);
852 }
853
global_init_info(Master_info * mi,bool ignore_if_no_info,int thread_mask)854 int global_init_info(Master_info* mi, bool ignore_if_no_info, int thread_mask)
855 {
856 DBUG_ENTER("init_info");
857 DBUG_ASSERT(mi != NULL && mi->rli != NULL);
858 int init_error= 0;
859 enum_return_check check_return= ERROR_CHECKING_REPOSITORY;
860 THD *thd= current_thd;
861
862 /*
863 We need a mutex while we are changing master info parameters to
864 keep other threads from reading bogus info
865 */
866 mysql_mutex_lock(&mi->data_lock);
867 mysql_mutex_lock(&mi->rli->data_lock);
868
869 /*
870 When info tables are used and autocommit= 0 we force a new
871 transaction start to avoid table access deadlocks when START SLAVE
872 is executed after RESET SLAVE.
873 */
874 if (is_autocommit_off_and_infotables(thd))
875 {
876 if (trans_begin(thd))
877 {
878 init_error= 1;
879 goto end;
880 }
881 }
882
883 /*
884 This takes care of the startup dependency between the master_info
885 and relay_info. It initializes the master info if the SLAVE_IO
886 thread is being started and the relay log info if either the
887 SLAVE_SQL thread is being started or was not initialized as it is
888 required by the SLAVE_IO thread.
889 */
890 check_return= mi->check_info();
891 if (check_return == ERROR_CHECKING_REPOSITORY)
892 {
893 init_error= 1;
894 goto end;
895 }
896
897 if (!(ignore_if_no_info && check_return == REPOSITORY_DOES_NOT_EXIST))
898 {
899 if ((thread_mask & SLAVE_IO) != 0 && mi->mi_init_info())
900 init_error= 1;
901 }
902
903 check_return= mi->rli->check_info();
904 if (check_return == ERROR_CHECKING_REPOSITORY)
905 {
906 init_error= 1;
907 goto end;
908 }
909 if (!(ignore_if_no_info && check_return == REPOSITORY_DOES_NOT_EXIST))
910 {
911 if (((thread_mask & SLAVE_SQL) != 0 || !(mi->rli->inited))
912 && mi->rli->rli_init_info())
913 init_error= 1;
914 }
915
916 DBUG_EXECUTE_IF("enable_mts_worker_failure_init",
917 {DBUG_SET("+d,mts_worker_thread_init_fails");});
918 end:
919 /*
920 When info tables are used and autocommit= 0 we force transaction
921 commit to avoid table access deadlocks when START SLAVE is executed
922 after RESET SLAVE.
923 */
924 if (is_autocommit_off_and_infotables(thd))
925 if (trans_commit(thd))
926 init_error= 1;
927
928 mysql_mutex_unlock(&mi->rli->data_lock);
929 mysql_mutex_unlock(&mi->data_lock);
930
931 /*
932 Handling MTS Relay-log recovery after successful initialization of mi and
933 rli objects.
934
935 MTS Relay-log recovery is handled by SSUG command. In order to start the
936 slave applier thread rli needs to be inited and mi->rli->data_lock should
937 be in released state. Hence we do the MTS recovery at this point of time
938 where both conditions are satisfied.
939 */
940 if (!init_error && mi->rli->is_relay_log_recovery
941 && mi->rli->mts_recovery_group_cnt)
942 init_error= fill_mts_gaps_and_recover(mi);
943 DBUG_RETURN(init_error);
944 }
945
end_info(Master_info * mi)946 void end_info(Master_info* mi)
947 {
948 DBUG_ENTER("end_info");
949 DBUG_ASSERT(mi != NULL && mi->rli != NULL);
950
951 /*
952 The previous implementation was not acquiring locks. We do the same here.
953 However, this is quite strange.
954 */
955 mi->end_info();
956 mi->rli->end_info();
957
958 DBUG_VOID_RETURN;
959 }
960
remove_info(Master_info * mi)961 int remove_info(Master_info* mi)
962 {
963 int error= 1;
964 DBUG_ENTER("remove_info");
965 DBUG_ASSERT(mi != NULL && mi->rli != NULL);
966
967 /*
968 The previous implementation was not acquiring locks.
969 We do the same here. However, this is quite strange.
970 */
971 /*
972 Reset errors (the idea is that we forget about the
973 old master).
974 */
975 mi->clear_error();
976 mi->rli->clear_error();
977 mi->rli->clear_until_condition();
978 mi->rli->clear_sql_delay();
979
980 mi->end_info();
981 mi->rli->end_info();
982
983 if (mi->remove_info() || Rpl_info_factory::reset_workers(mi->rli) ||
984 mi->rli->remove_info())
985 goto err;
986
987 error= 0;
988
989 err:
990 DBUG_RETURN(error);
991 }
992
flush_master_info(Master_info * mi,bool force)993 int flush_master_info(Master_info* mi, bool force)
994 {
995 DBUG_ENTER("flush_master_info");
996 DBUG_ASSERT(mi != NULL && mi->rli != NULL);
997 /*
998 The previous implementation was not acquiring locks.
999 We do the same here. However, this is quite strange.
1000 */
1001 /*
1002 With the appropriate recovery process, we will not need to flush
1003 the content of the current log.
1004
1005 For now, we flush the relay log BEFORE the master.info file, because
1006 if we crash, we will get a duplicate event in the relay log at restart.
1007 If we change the order, there might be missing events.
1008
1009 If we don't do this and the slave server dies when the relay log has
1010 some parts (its last kilobytes) in memory only, with, say, from master's
1011 position 100 to 150 in memory only (not on disk), and with position 150
1012 in master.info, there will be missing information. When the slave restarts,
1013 the I/O thread will fetch binlogs from 150, so in the relay log we will
1014 have "[0, 100] U [150, infinity[" and nobody will notice it, so the SQL
1015 thread will jump from 100 to 150, and replication will silently break.
1016 */
1017 mysql_mutex_t *log_lock= mi->rli->relay_log.get_log_lock();
1018
1019 mysql_mutex_lock(log_lock);
1020
1021 int err= (mi->rli->flush_current_log() ||
1022 mi->flush_info(force));
1023
1024 mysql_mutex_unlock(log_lock);
1025
1026 DBUG_RETURN (err);
1027 }
1028
1029 /**
1030 Convert slave skip errors bitmap into a printable string.
1031 */
1032
print_slave_skip_errors(void)1033 static void print_slave_skip_errors(void)
1034 {
1035 /*
1036 To be safe, we want 10 characters of room in the buffer for a number
1037 plus terminators. Also, we need some space for constant strings.
1038 10 characters must be sufficient for a number plus {',' | '...'}
1039 plus a NUL terminator. That is a max 6 digit number.
1040 */
1041 const size_t MIN_ROOM= 10;
1042 DBUG_ENTER("print_slave_skip_errors");
1043 DBUG_ASSERT(sizeof(slave_skip_error_names) > MIN_ROOM);
1044 DBUG_ASSERT(MAX_SLAVE_ERROR <= 999999); // 6 digits
1045
1046 if (!use_slave_mask || bitmap_is_clear_all(&slave_error_mask))
1047 {
1048 /* purecov: begin tested */
1049 memcpy(slave_skip_error_names, STRING_WITH_LEN("OFF"));
1050 /* purecov: end */
1051 }
1052 else if (bitmap_is_set_all(&slave_error_mask))
1053 {
1054 /* purecov: begin tested */
1055 memcpy(slave_skip_error_names, STRING_WITH_LEN("ALL"));
1056 /* purecov: end */
1057 }
1058 else
1059 {
1060 char *buff= slave_skip_error_names;
1061 char *bend= buff + sizeof(slave_skip_error_names);
1062 int errnum;
1063
1064 for (errnum= 0; errnum < MAX_SLAVE_ERROR; errnum++)
1065 {
1066 if (bitmap_is_set(&slave_error_mask, errnum))
1067 {
1068 if (buff + MIN_ROOM >= bend)
1069 break; /* purecov: tested */
1070 buff= int10_to_str(errnum, buff, 10);
1071 *buff++= ',';
1072 }
1073 }
1074 if (buff != slave_skip_error_names)
1075 buff--; // Remove last ','
1076 if (errnum < MAX_SLAVE_ERROR)
1077 {
1078 /* Couldn't show all errors */
1079 buff= strmov(buff, "..."); /* purecov: tested */
1080 }
1081 *buff=0;
1082 }
1083 DBUG_PRINT("init", ("error_names: '%s'", slave_skip_error_names));
1084 DBUG_VOID_RETURN;
1085 }
1086
set_stop_slave_wait_timeout(unsigned long wait_timeout)1087 static void set_stop_slave_wait_timeout(unsigned long wait_timeout) {
1088 stop_wait_timeout = wait_timeout;
1089 }
1090
1091 /**
1092 Change arg to the string with the nice, human-readable skip error values.
1093 @param slave_skip_errors_ptr
1094 The pointer to be changed
1095 */
set_slave_skip_errors(char ** slave_skip_errors_ptr)1096 void set_slave_skip_errors(char** slave_skip_errors_ptr)
1097 {
1098 DBUG_ENTER("set_slave_skip_errors");
1099 print_slave_skip_errors();
1100 *slave_skip_errors_ptr= slave_skip_error_names;
1101 DBUG_VOID_RETURN;
1102 }
1103
1104 /**
1105 Init function to set up array for errors that should be skipped for slave
1106 */
init_slave_skip_errors()1107 static void init_slave_skip_errors()
1108 {
1109 DBUG_ENTER("init_slave_skip_errors");
1110 DBUG_ASSERT(!use_slave_mask); // not already initialized
1111
1112 if (bitmap_init(&slave_error_mask,0,MAX_SLAVE_ERROR,0))
1113 {
1114 fprintf(stderr, "Badly out of memory, please check your system status\n");
1115 exit(1);
1116 }
1117 use_slave_mask = 1;
1118 DBUG_VOID_RETURN;
1119 }
1120
add_slave_skip_errors(const uint * errors,uint n_errors)1121 static void add_slave_skip_errors(const uint* errors, uint n_errors)
1122 {
1123 DBUG_ENTER("add_slave_skip_errors");
1124 DBUG_ASSERT(errors);
1125 DBUG_ASSERT(use_slave_mask);
1126
1127 for (uint i = 0; i < n_errors; i++)
1128 {
1129 const uint err_code = errors[i];
1130 if (err_code < MAX_SLAVE_ERROR)
1131 bitmap_set_bit(&slave_error_mask, err_code);
1132 }
1133 DBUG_VOID_RETURN;
1134 }
1135
1136 /*
1137 Add errors that should be skipped for slave
1138
1139 SYNOPSIS
1140 add_slave_skip_errors()
1141 arg List of errors numbers to be added to skip, separated with ','
1142
1143 NOTES
1144 Called from get_options() in mysqld.cc on start-up
1145 */
1146
add_slave_skip_errors(const char * arg)1147 void add_slave_skip_errors(const char* arg)
1148 {
1149 const char *p= NULL;
1150 /*
1151 ALL is only valid when nothing else is provided.
1152 */
1153 const uchar SKIP_ALL[]= "all";
1154 size_t SIZE_SKIP_ALL= strlen((const char *) SKIP_ALL) + 1;
1155 /*
1156 IGNORE_DDL_ERRORS can be combined with other parameters
1157 but must be the first one provided.
1158 */
1159 const uchar SKIP_DDL_ERRORS[]= "ddl_exist_errors";
1160 size_t SIZE_SKIP_DDL_ERRORS= strlen((const char *) SKIP_DDL_ERRORS);
1161 DBUG_ENTER("add_slave_skip_errors");
1162
1163 // initialize mask if not done yet
1164 if (!use_slave_mask)
1165 init_slave_skip_errors();
1166
1167 for (; my_isspace(system_charset_info,*arg); ++arg)
1168 /* empty */;
1169 if (!my_strnncoll(system_charset_info, (uchar*)arg, SIZE_SKIP_ALL,
1170 SKIP_ALL, SIZE_SKIP_ALL))
1171 {
1172 bitmap_set_all(&slave_error_mask);
1173 DBUG_VOID_RETURN;
1174 }
1175 if (!my_strnncoll(system_charset_info, (uchar*)arg, SIZE_SKIP_DDL_ERRORS,
1176 SKIP_DDL_ERRORS, SIZE_SKIP_DDL_ERRORS))
1177 {
1178 // DDL errors to be skipped for relaxed 'exist' handling
1179 const uint ddl_errors[] = {
1180 // error codes with create/add <schema object>
1181 ER_DB_CREATE_EXISTS, ER_TABLE_EXISTS_ERROR, ER_DUP_KEYNAME,
1182 ER_MULTIPLE_PRI_KEY,
1183 // error codes with change/rename <schema object>
1184 ER_BAD_FIELD_ERROR, ER_NO_SUCH_TABLE, ER_DUP_FIELDNAME,
1185 // error codes with drop <schema object>
1186 ER_DB_DROP_EXISTS, ER_BAD_TABLE_ERROR, ER_CANT_DROP_FIELD_OR_KEY
1187 };
1188
1189 add_slave_skip_errors(ddl_errors,
1190 sizeof(ddl_errors)/sizeof(ddl_errors[0]));
1191 /*
1192 After processing the SKIP_DDL_ERRORS, the pointer is
1193 increased to the position after the comma.
1194 */
1195 if (strlen(arg) > SIZE_SKIP_DDL_ERRORS + 1)
1196 arg+= SIZE_SKIP_DDL_ERRORS + 1;
1197 }
1198 for (p= arg ; *p; )
1199 {
1200 long err_code;
1201 if (!(p= str2int(p, 10, 0, LONG_MAX, &err_code)))
1202 break;
1203 if (err_code < MAX_SLAVE_ERROR)
1204 bitmap_set_bit(&slave_error_mask,(uint)err_code);
1205 while (!my_isdigit(system_charset_info,*p) && *p)
1206 p++;
1207 }
1208 DBUG_VOID_RETURN;
1209 }
1210
set_thd_in_use_temporary_tables(Relay_log_info * rli)1211 static void set_thd_in_use_temporary_tables(Relay_log_info *rli)
1212 {
1213 TABLE *table;
1214
1215 for (table= rli->save_temporary_tables ; table ; table= table->next)
1216 {
1217 table->in_use= rli->info_thd;
1218 if (table->file != NULL)
1219 {
1220 /*
1221 Since we are stealing opened temporary tables from one thread to another,
1222 we need to let the performance schema know that,
1223 for aggregates per thread to work properly.
1224 */
1225 table->file->unbind_psi();
1226 table->file->rebind_psi();
1227 }
1228 }
1229 }
1230
terminate_slave_threads(Master_info * mi,int thread_mask,bool need_lock_term)1231 int terminate_slave_threads(Master_info* mi,int thread_mask,bool need_lock_term)
1232 {
1233 DBUG_ENTER("terminate_slave_threads");
1234
1235 if (!mi->inited)
1236 DBUG_RETURN(0); /* successfully do nothing */
1237 int error,force_all = (thread_mask & SLAVE_FORCE_ALL);
1238 mysql_mutex_t *sql_lock = &mi->rli->run_lock, *io_lock = &mi->run_lock;
1239 mysql_mutex_t *log_lock= mi->rli->relay_log.get_log_lock();
1240 set_stop_slave_wait_timeout(rpl_stop_slave_timeout);
1241
1242 if (thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL))
1243 {
1244 DBUG_PRINT("info",("Terminating SQL thread"));
1245 mi->rli->abort_slave= 1;
1246 if ((error=terminate_slave_thread(mi->rli->info_thd, sql_lock,
1247 &mi->rli->stop_cond,
1248 &mi->rli->slave_running,
1249 need_lock_term)) &&
1250 !force_all)
1251 {
1252 if (error == 1)
1253 {
1254 DBUG_RETURN(ER_STOP_SLAVE_SQL_THREAD_TIMEOUT);
1255 }
1256 DBUG_RETURN(error);
1257 }
1258 mysql_mutex_lock(log_lock);
1259
1260 DBUG_PRINT("info",("Flushing relay-log info file."));
1261 if (current_thd)
1262 THD_STAGE_INFO(current_thd, stage_flushing_relay_log_info_file);
1263
1264 /*
1265 Flushes the relay log info regardles of the sync_relay_log_info option.
1266 */
1267 if (mi->rli->flush_info(TRUE))
1268 {
1269 mysql_mutex_unlock(log_lock);
1270 DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS);
1271 }
1272
1273 mysql_mutex_unlock(log_lock);
1274 }
1275 if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL))
1276 {
1277 DBUG_PRINT("info",("Terminating IO thread"));
1278 mi->abort_slave=1;
1279 if ((error=terminate_slave_thread(mi->info_thd,io_lock,
1280 &mi->stop_cond,
1281 &mi->slave_running,
1282 need_lock_term)) &&
1283 !force_all)
1284 {
1285 if (error == 1)
1286 {
1287 DBUG_RETURN(ER_STOP_SLAVE_IO_THREAD_TIMEOUT);
1288 }
1289 DBUG_RETURN(error);
1290 }
1291 mysql_mutex_lock(log_lock);
1292
1293 DBUG_PRINT("info",("Flushing relay log and master info repository."));
1294 if (current_thd)
1295 THD_STAGE_INFO(current_thd, stage_flushing_relay_log_and_master_info_repository);
1296
1297 /*
1298 Flushes the master info regardles of the sync_master_info option.
1299 */
1300 if (mi->flush_info(TRUE))
1301 {
1302 mysql_mutex_unlock(log_lock);
1303 DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS);
1304 }
1305
1306 /*
1307 Flushes the relay log regardles of the sync_relay_log option.
1308 */
1309 if (mi->rli->relay_log.is_open() &&
1310 mi->rli->relay_log.flush_and_sync(true))
1311 {
1312 mysql_mutex_unlock(log_lock);
1313 DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS);
1314 }
1315
1316 mysql_mutex_unlock(log_lock);
1317 }
1318 DBUG_RETURN(0);
1319 }
1320
1321
1322 /**
1323 Wait for a slave thread to terminate.
1324
1325 This function is called after requesting the thread to terminate
1326 (by setting @c abort_slave member of @c Relay_log_info or @c
1327 Master_info structure to 1). Termination of the thread is
1328 controlled with the the predicate <code>*slave_running</code>.
1329
1330 Function will acquire @c term_lock before waiting on the condition
1331 unless @c need_lock_term is false in which case the mutex should be
1332 owned by the caller of this function and will remain acquired after
1333 return from the function.
1334
1335 @param term_lock
1336 Associated lock to use when waiting for @c term_cond
1337
1338 @param term_cond
1339 Condition that is signalled when the thread has terminated
1340
1341 @param slave_running
1342 Pointer to predicate to check for slave thread termination
1343
1344 @param need_lock_term
1345 If @c false the lock will not be acquired before waiting on
1346 the condition. In this case, it is assumed that the calling
1347 function acquires the lock before calling this function.
1348
1349 @retval 0 All OK, 1 on "STOP SLAVE" command timeout, ER_SLAVE_NOT_RUNNING otherwise.
1350
1351 @note If the executing thread has to acquire term_lock
1352 (need_lock_term is true, the negative running status does not
1353 represent any issue therefore no error is reported.
1354
1355 */
1356 static int
terminate_slave_thread(THD * thd,mysql_mutex_t * term_lock,mysql_cond_t * term_cond,volatile uint * slave_running,bool need_lock_term)1357 terminate_slave_thread(THD *thd,
1358 mysql_mutex_t *term_lock,
1359 mysql_cond_t *term_cond,
1360 volatile uint *slave_running,
1361 bool need_lock_term)
1362 {
1363 DBUG_ENTER("terminate_slave_thread");
1364 if (need_lock_term)
1365 {
1366 mysql_mutex_lock(term_lock);
1367 }
1368 else
1369 {
1370 mysql_mutex_assert_owner(term_lock);
1371 }
1372 if (!*slave_running)
1373 {
1374 if (need_lock_term)
1375 {
1376 /*
1377 if run_lock (term_lock) is acquired locally then either
1378 slave_running status is fine
1379 */
1380 mysql_mutex_unlock(term_lock);
1381 DBUG_RETURN(0);
1382 }
1383 else
1384 {
1385 DBUG_RETURN(ER_SLAVE_NOT_RUNNING);
1386 }
1387 }
1388 DBUG_ASSERT(thd != 0);
1389 THD_CHECK_SENTRY(thd);
1390
1391 /*
1392 Is is critical to test if the slave is running. Otherwise, we might
1393 be referening freed memory trying to kick it
1394 */
1395
1396 while (*slave_running) // Should always be true
1397 {
1398 int error MY_ATTRIBUTE((unused));
1399 DBUG_PRINT("loop", ("killing slave thread"));
1400
1401 mysql_mutex_lock(&thd->LOCK_thd_data);
1402 #ifndef DONT_USE_THR_ALARM
1403 /*
1404 Error codes from pthread_kill are:
1405 EINVAL: invalid signal number (can't happen)
1406 ESRCH: thread already killed (can happen, should be ignored)
1407 */
1408 int err MY_ATTRIBUTE((unused))= pthread_kill(thd->real_id, thr_client_alarm);
1409 DBUG_ASSERT(err != EINVAL);
1410 #endif
1411 thd->awake(THD::NOT_KILLED);
1412 mysql_mutex_unlock(&thd->LOCK_thd_data);
1413
1414 /*
1415 There is a small chance that slave thread might miss the first
1416 alarm. To protect againts it, resend the signal until it reacts
1417 */
1418 struct timespec abstime;
1419 set_timespec(abstime,2);
1420 error= mysql_cond_timedwait(term_cond, term_lock, &abstime);
1421 if (stop_wait_timeout >= 2)
1422 stop_wait_timeout= stop_wait_timeout - 2;
1423 else if (*slave_running)
1424 {
1425 if (need_lock_term)
1426 mysql_mutex_unlock(term_lock);
1427 DBUG_RETURN (1);
1428 }
1429 DBUG_ASSERT(error == ETIMEDOUT || error == 0);
1430 }
1431
1432 DBUG_ASSERT(*slave_running == 0);
1433
1434 if (need_lock_term)
1435 mysql_mutex_unlock(term_lock);
1436 DBUG_RETURN(0);
1437 }
1438
1439
start_slave_thread(PSI_thread_key thread_key,pthread_handler h_func,mysql_mutex_t * start_lock,mysql_mutex_t * cond_lock,mysql_cond_t * start_cond,volatile uint * slave_running,volatile ulong * slave_run_id,Master_info * mi)1440 int start_slave_thread(
1441 #ifdef HAVE_PSI_INTERFACE
1442 PSI_thread_key thread_key,
1443 #endif
1444 pthread_handler h_func, mysql_mutex_t *start_lock,
1445 mysql_mutex_t *cond_lock,
1446 mysql_cond_t *start_cond,
1447 volatile uint *slave_running,
1448 volatile ulong *slave_run_id,
1449 Master_info* mi)
1450 {
1451 pthread_t th;
1452 ulong start_id;
1453 int error;
1454 DBUG_ENTER("start_slave_thread");
1455
1456 if (start_lock)
1457 mysql_mutex_lock(start_lock);
1458 if (!server_id)
1459 {
1460 if (start_cond)
1461 mysql_cond_broadcast(start_cond);
1462 if (start_lock)
1463 mysql_mutex_unlock(start_lock);
1464 sql_print_error("Server id not set, will not start slave");
1465 DBUG_RETURN(ER_BAD_SLAVE);
1466 }
1467
1468 if (*slave_running)
1469 {
1470 if (start_cond)
1471 mysql_cond_broadcast(start_cond);
1472 if (start_lock)
1473 mysql_mutex_unlock(start_lock);
1474 DBUG_RETURN(ER_SLAVE_MUST_STOP);
1475 }
1476 start_id= *slave_run_id;
1477 DBUG_PRINT("info",("Creating new slave thread"));
1478 if ((error= mysql_thread_create(thread_key,
1479 &th, &connection_attrib, h_func, (void*)mi)))
1480 {
1481 sql_print_error("Can't create slave thread (errno= %d).", error);
1482 if (start_lock)
1483 mysql_mutex_unlock(start_lock);
1484 DBUG_RETURN(ER_SLAVE_THREAD);
1485 }
1486 if (start_cond && cond_lock) // caller has cond_lock
1487 {
1488 THD* thd = current_thd;
1489 while (start_id == *slave_run_id && thd != NULL)
1490 {
1491 DBUG_PRINT("sleep",("Waiting for slave thread to start"));
1492 PSI_stage_info saved_stage= {0, "", 0};
1493 thd->ENTER_COND(start_cond, cond_lock,
1494 & stage_waiting_for_slave_thread_to_start,
1495 & saved_stage);
1496 /*
1497 It is not sufficient to test this at loop bottom. We must test
1498 it after registering the mutex in enter_cond(). If the kill
1499 happens after testing of thd->killed and before the mutex is
1500 registered, we could otherwise go waiting though thd->killed is
1501 set.
1502 */
1503 if (!thd->killed)
1504 mysql_cond_wait(start_cond, cond_lock);
1505 thd->EXIT_COND(& saved_stage);
1506 mysql_mutex_lock(cond_lock); // re-acquire it as exit_cond() released
1507 if (thd->killed)
1508 {
1509 if (start_lock)
1510 mysql_mutex_unlock(start_lock);
1511 DBUG_RETURN(thd->killed_errno());
1512 }
1513 }
1514 }
1515 if (start_lock)
1516 mysql_mutex_unlock(start_lock);
1517 DBUG_RETURN(0);
1518 }
1519
1520
1521 /*
1522 start_slave_threads()
1523
1524 NOTES
1525 SLAVE_FORCE_ALL is not implemented here on purpose since it does not make
1526 sense to do that for starting a slave--we always care if it actually
1527 started the threads that were not previously running
1528 */
1529
start_slave_threads(bool need_lock_slave,bool wait_for_start,Master_info * mi,int thread_mask)1530 int start_slave_threads(bool need_lock_slave, bool wait_for_start,
1531 Master_info* mi, int thread_mask)
1532 {
1533 mysql_mutex_t *lock_io=0, *lock_sql=0, *lock_cond_io=0, *lock_cond_sql=0;
1534 mysql_cond_t* cond_io=0, *cond_sql=0;
1535 int error=0;
1536 DBUG_ENTER("start_slave_threads");
1537 DBUG_EXECUTE_IF("uninitialized_master-info_structure",
1538 mi->inited= FALSE;);
1539
1540 if (!mi->inited || !mi->rli->inited)
1541 {
1542 error= !mi->inited ? ER_SLAVE_MI_INIT_REPOSITORY :
1543 ER_SLAVE_RLI_INIT_REPOSITORY;
1544 Rpl_info *info= (!mi->inited ? mi : static_cast<Rpl_info *>(mi->rli));
1545 const char* prefix= current_thd ? ER(error) : ER_DEFAULT(error);
1546 info->report(ERROR_LEVEL, error, prefix, NULL);
1547
1548 DBUG_RETURN(error);
1549 }
1550
1551 if (need_lock_slave)
1552 {
1553 lock_io = &mi->run_lock;
1554 lock_sql = &mi->rli->run_lock;
1555 }
1556 if (wait_for_start)
1557 {
1558 cond_io = &mi->start_cond;
1559 cond_sql = &mi->rli->start_cond;
1560 lock_cond_io = &mi->run_lock;
1561 lock_cond_sql = &mi->rli->run_lock;
1562 }
1563
1564 if (thread_mask & SLAVE_IO)
1565 error= start_slave_thread(
1566 #ifdef HAVE_PSI_INTERFACE
1567 key_thread_slave_io,
1568 #endif
1569 handle_slave_io, lock_io, lock_cond_io,
1570 cond_io,
1571 &mi->slave_running, &mi->slave_run_id,
1572 mi);
1573 if (!error && (thread_mask & SLAVE_SQL))
1574 {
1575 /*
1576 MTS-recovery gaps gathering is placed onto common execution path
1577 for either START-SLAVE and --skip-start-slave= 0
1578 */
1579 if (mi->rli->recovery_parallel_workers != 0)
1580 error= mts_recovery_groups(mi->rli);
1581 if (!error)
1582 error= start_slave_thread(
1583 #ifdef HAVE_PSI_INTERFACE
1584 key_thread_slave_sql,
1585 #endif
1586 handle_slave_sql, lock_sql, lock_cond_sql,
1587 cond_sql,
1588 &mi->rli->slave_running, &mi->rli->slave_run_id,
1589 mi);
1590 if (error)
1591 terminate_slave_threads(mi, thread_mask & SLAVE_IO, need_lock_slave);
1592 }
1593 DBUG_RETURN(error);
1594 }
1595
1596 /*
1597 Release slave threads at time of executing shutdown.
1598
1599 SYNOPSIS
1600 end_slave()
1601 */
1602
end_slave()1603 void end_slave()
1604 {
1605 DBUG_ENTER("end_slave");
1606
1607 /*
1608 This is called when the server terminates, in close_connections().
1609 It terminates slave threads. However, some CHANGE MASTER etc may still be
1610 running presently. If a START SLAVE was in progress, the mutex lock below
1611 will make us wait until slave threads have started, and START SLAVE
1612 returns, then we terminate them here.
1613 */
1614 mysql_mutex_lock(&LOCK_active_mi);
1615 if (active_mi)
1616 {
1617 /*
1618 TODO: replace the line below with
1619 list_walk(&master_list, (list_walk_action)end_slave_on_walk,0);
1620 once multi-master code is ready.
1621 */
1622 terminate_slave_threads(active_mi,SLAVE_FORCE_ALL);
1623 }
1624 mysql_mutex_unlock(&LOCK_active_mi);
1625 DBUG_VOID_RETURN;
1626 }
1627
1628 /**
1629 Free all resources used by slave threads at time of executing shutdown.
1630 The routine must be called after all possible users of @c active_mi
1631 have left.
1632
1633 SYNOPSIS
1634 close_active_mi()
1635
1636 */
close_active_mi()1637 void close_active_mi()
1638 {
1639 mysql_mutex_lock(&LOCK_active_mi);
1640 if (active_mi)
1641 {
1642 end_info(active_mi);
1643 if (active_mi->rli)
1644 delete active_mi->rli;
1645 delete active_mi;
1646 active_mi= 0;
1647 }
1648 mysql_mutex_unlock(&LOCK_active_mi);
1649 }
1650
1651 /**
1652 Check if multi-statement transaction mode and master and slave info
1653 repositories are set to table.
1654
1655 @param THD THD object
1656
1657 @retval true Success
1658 @retval false Failure
1659 */
is_autocommit_off_and_infotables(THD * thd)1660 static bool is_autocommit_off_and_infotables(THD* thd)
1661 {
1662 DBUG_ENTER("is_autocommit_off_and_infotables");
1663 DBUG_RETURN((thd && thd->in_multi_stmt_transaction_mode() &&
1664 (opt_mi_repository_id == INFO_REPOSITORY_TABLE ||
1665 opt_rli_repository_id == INFO_REPOSITORY_TABLE))?
1666 true : false);
1667 }
1668
io_slave_killed(THD * thd,Master_info * mi)1669 static bool io_slave_killed(THD* thd, Master_info* mi)
1670 {
1671 DBUG_ENTER("io_slave_killed");
1672
1673 DBUG_ASSERT(mi->info_thd == thd);
1674 DBUG_ASSERT(mi->slave_running); // tracking buffer overrun
1675 DBUG_RETURN(mi->abort_slave || abort_loop || thd->killed);
1676 }
1677
1678 /**
1679 The function analyzes a possible killed status and makes
1680 a decision whether to accept it or not.
1681 Normally upon accepting the sql thread goes to shutdown.
1682 In the event of deferring decision @rli->last_event_start_time waiting
1683 timer is set to force the killed status be accepted upon its expiration.
1684
1685 Notice Multi-Threaded-Slave behaves similarly in that when it's being
1686 stopped and the current group of assigned events has not yet scheduled
1687 completely, Coordinator defers to accept to leave its read-distribute
1688 state. The above timeout ensures waiting won't last endlessly, and in
1689 such case an error is reported.
1690
1691 @param thd pointer to a THD instance
1692 @param rli pointer to Relay_log_info instance
1693
1694 @return TRUE the killed status is recognized, FALSE a possible killed
1695 status is deferred.
1696 */
sql_slave_killed(THD * thd,Relay_log_info * rli)1697 static bool sql_slave_killed(THD* thd, Relay_log_info* rli)
1698 {
1699 bool is_parallel_warn= FALSE;
1700
1701 DBUG_ENTER("sql_slave_killed");
1702
1703 DBUG_ASSERT(rli->info_thd == thd);
1704 DBUG_ASSERT(rli->slave_running == 1);
1705 if (rli->sql_thread_kill_accepted)
1706 DBUG_RETURN(true);
1707 DBUG_EXECUTE_IF("stop_when_mts_in_group", rli->abort_slave = 1;
1708 DBUG_SET("-d,stop_when_mts_in_group");
1709 DBUG_SET("-d,simulate_stop_when_mts_in_group");
1710 DBUG_RETURN(false););
1711 if (abort_loop || thd->killed || rli->abort_slave)
1712 {
1713 rli->sql_thread_kill_accepted= true;
1714 is_parallel_warn= (rli->is_parallel_exec() &&
1715 (rli->is_mts_in_group() || thd->killed));
1716 /*
1717 Slave can execute stop being in one of two MTS or Single-Threaded mode.
1718 The modes define different criteria to accept the stop.
1719 In particular that relates to the concept of groupping.
1720 Killed Coordinator thread expects the worst so it warns on
1721 possible consistency issue.
1722 */
1723 if (is_parallel_warn ||
1724 (!rli->is_parallel_exec() &&
1725 thd->transaction.all.cannot_safely_rollback() && rli->is_in_group()))
1726 {
1727 char msg_stopped[]=
1728 "... Slave SQL Thread stopped with incomplete event group "
1729 "having non-transactional changes. "
1730 "If the group consists solely of row-based events, you can try "
1731 "to restart the slave with --slave-exec-mode=IDEMPOTENT, which "
1732 "ignores duplicate key, key not found, and similar errors (see "
1733 "documentation for details).";
1734 char msg_stopped_mts[]=
1735 "... The slave coordinator and worker threads are stopped, possibly "
1736 "leaving data in inconsistent state. A restart should "
1737 "restore consistency automatically, although using non-transactional "
1738 "storage for data or info tables or DDL queries could lead to problems. "
1739 "In such cases you have to examine your data (see documentation for "
1740 "details).";
1741
1742 if (rli->abort_slave)
1743 {
1744 DBUG_PRINT("info", ("Request to stop slave SQL Thread received while "
1745 "applying an MTS group or a group that "
1746 "has non-transactional "
1747 "changes; waiting for completion of the group ... "));
1748
1749 /*
1750 Slave sql thread shutdown in face of unfinished group modified
1751 Non-trans table is handled via a timer. The slave may eventually
1752 give out to complete the current group and in that case there
1753 might be issues at consequent slave restart, see the error message.
1754 WL#2975 offers a robust solution requiring to store the last exectuted
1755 event's coordinates along with the group's coordianates
1756 instead of waiting with @c last_event_start_time the timer.
1757 */
1758
1759 if (rli->last_event_start_time == 0)
1760 rli->last_event_start_time= my_time(0);
1761 rli->sql_thread_kill_accepted= difftime(my_time(0),
1762 rli->last_event_start_time) <=
1763 SLAVE_WAIT_GROUP_DONE ?
1764 FALSE : TRUE;
1765
1766 DBUG_EXECUTE_IF("stop_slave_middle_group",
1767 DBUG_EXECUTE_IF("incomplete_group_in_relay_log",
1768 rli->sql_thread_kill_accepted= TRUE;);); // time is over
1769
1770 if (!rli->sql_thread_kill_accepted && !rli->reported_unsafe_warning)
1771 {
1772 rli->report(WARNING_LEVEL, 0,
1773 !is_parallel_warn ?
1774 "Request to stop slave SQL Thread received while "
1775 "applying a group that has non-transactional "
1776 "changes; waiting for completion of the group ... "
1777 :
1778 "Coordinator thread of multi-threaded slave is being "
1779 "stopped in the middle of assigning a group of events; "
1780 "deferring to exit until the group completion ... ");
1781 rli->reported_unsafe_warning= true;
1782 }
1783 }
1784 if (rli->sql_thread_kill_accepted)
1785 {
1786 rli->last_event_start_time= 0;
1787 if (rli->mts_group_status == Relay_log_info::MTS_IN_GROUP)
1788 {
1789 rli->mts_group_status= Relay_log_info::MTS_KILLED_GROUP;
1790 }
1791 if (is_parallel_warn)
1792 rli->report(!rli->is_error() ? ERROR_LEVEL :
1793 WARNING_LEVEL, // an error was reported by Worker
1794 ER_MTS_INCONSISTENT_DATA,
1795 ER(ER_MTS_INCONSISTENT_DATA),
1796 msg_stopped_mts);
1797 else
1798 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
1799 ER(ER_SLAVE_FATAL_ERROR), msg_stopped);
1800 }
1801 }
1802 }
1803 DBUG_RETURN(rli->sql_thread_kill_accepted);
1804 }
1805
1806
1807 /*
1808 skip_load_data_infile()
1809
1810 NOTES
1811 This is used to tell a 3.23 master to break send_file()
1812 */
1813
skip_load_data_infile(NET * net)1814 void skip_load_data_infile(NET *net)
1815 {
1816 DBUG_ENTER("skip_load_data_infile");
1817
1818 (void)net_request_file(net, "/dev/null");
1819 (void)my_net_read(net); // discard response
1820 (void)net_write_command(net, 0, (uchar*) "", 0, (uchar*) "", 0); // ok
1821 DBUG_VOID_RETURN;
1822 }
1823
1824
net_request_file(NET * net,const char * fname)1825 bool net_request_file(NET* net, const char* fname)
1826 {
1827 DBUG_ENTER("net_request_file");
1828 DBUG_RETURN(net_write_command(net, 251, (uchar*) fname, strlen(fname),
1829 (uchar*) "", 0));
1830 }
1831
1832 /*
1833 From other comments and tests in code, it looks like
1834 sometimes Query_log_event and Load_log_event can have db == 0
1835 (see rewrite_db() above for example)
1836 (cases where this happens are unclear; it may be when the master is 3.23).
1837 */
1838
print_slave_db_safe(const char * db)1839 const char *print_slave_db_safe(const char* db)
1840 {
1841 DBUG_ENTER("*print_slave_db_safe");
1842
1843 DBUG_RETURN((db ? db : ""));
1844 }
1845
1846 /*
1847 Check if the error is caused by network.
1848 @param[in] errorno Number of the error.
1849 RETURNS:
1850 TRUE network error
1851 FALSE not network error
1852 */
1853
is_network_error(uint errorno)1854 bool is_network_error(uint errorno)
1855 {
1856 if (errorno == CR_CONNECTION_ERROR ||
1857 errorno == CR_CONN_HOST_ERROR ||
1858 errorno == CR_SERVER_GONE_ERROR ||
1859 errorno == CR_SERVER_LOST ||
1860 errorno == ER_CON_COUNT_ERROR ||
1861 errorno == ER_SERVER_SHUTDOWN)
1862 return TRUE;
1863 #ifdef WITH_WSREP
1864 if (errorno == ER_UNKNOWN_COM_ERROR)
1865 return TRUE;
1866 #endif /* WITH_WSREP */
1867
1868 return FALSE;
1869 }
1870
1871
1872 /**
1873 Execute an initialization query for the IO thread.
1874
1875 If there is an error, then this function calls mysql_free_result;
1876 otherwise the MYSQL object holds the result after this call. If
1877 there is an error other than allowed_error, then this function
1878 prints a message and returns -1.
1879
1880 @param mysql MYSQL object.
1881 @param query Query string.
1882 @param allowed_error Allowed error code, or 0 if no errors are allowed.
1883 @param[out] master_res If this is not NULL and there is no error, then
1884 mysql_store_result() will be called and the result stored in this pointer.
1885 @param[out] master_row If this is not NULL and there is no error, then
1886 mysql_fetch_row() will be called and the result stored in this pointer.
1887
1888 @retval COMMAND_STATUS_OK No error.
1889 @retval COMMAND_STATUS_ALLOWED_ERROR There was an error and the
1890 error code was 'allowed_error'.
1891 @retval COMMAND_STATUS_ERROR There was an error and the error code
1892 was not 'allowed_error'.
1893 */
1894 enum enum_command_status
1895 { COMMAND_STATUS_OK, COMMAND_STATUS_ERROR, COMMAND_STATUS_ALLOWED_ERROR };
1896 static enum_command_status
io_thread_init_command(Master_info * mi,const char * query,int allowed_error,MYSQL_RES ** master_res=NULL,MYSQL_ROW * master_row=NULL)1897 io_thread_init_command(Master_info *mi, const char *query, int allowed_error,
1898 MYSQL_RES **master_res= NULL,
1899 MYSQL_ROW *master_row= NULL)
1900 {
1901 DBUG_ENTER("io_thread_init_command");
1902 DBUG_PRINT("info", ("IO thread initialization command: '%s'", query));
1903 MYSQL *mysql= mi->mysql;
1904 int ret= mysql_real_query(mysql, query, strlen(query));
1905 if (io_slave_killed(mi->info_thd, mi))
1906 {
1907 sql_print_information("The slave IO thread was killed while executing "
1908 "initialization query '%s'", query);
1909 mysql_free_result(mysql_store_result(mysql));
1910 DBUG_RETURN(COMMAND_STATUS_ERROR);
1911 }
1912 if (ret != 0)
1913 {
1914 int err= mysql_errno(mysql);
1915 mysql_free_result(mysql_store_result(mysql));
1916 if (!err || err != allowed_error)
1917 {
1918 mi->report(is_network_error(err) ? WARNING_LEVEL : ERROR_LEVEL, err,
1919 "The slave IO thread stops because the initialization query "
1920 "'%s' failed with error '%s'.",
1921 query, mysql_error(mysql));
1922 DBUG_RETURN(COMMAND_STATUS_ERROR);
1923 }
1924 DBUG_RETURN(COMMAND_STATUS_ALLOWED_ERROR);
1925 }
1926 if (master_res != NULL)
1927 {
1928 if ((*master_res= mysql_store_result(mysql)) == NULL)
1929 {
1930 mi->report(WARNING_LEVEL, mysql_errno(mysql),
1931 "The slave IO thread stops because the initialization query "
1932 "'%s' did not return any result.",
1933 query);
1934 DBUG_RETURN(COMMAND_STATUS_ERROR);
1935 }
1936 if (master_row != NULL)
1937 {
1938 if ((*master_row= mysql_fetch_row(*master_res)) == NULL)
1939 {
1940 mysql_free_result(*master_res);
1941 mi->report(WARNING_LEVEL, mysql_errno(mysql),
1942 "The slave IO thread stops because the initialization query "
1943 "'%s' did not return any row.",
1944 query);
1945 DBUG_RETURN(COMMAND_STATUS_ERROR);
1946 }
1947 }
1948 }
1949 else
1950 DBUG_ASSERT(master_row == NULL);
1951 DBUG_RETURN(COMMAND_STATUS_OK);
1952 }
1953
1954
1955 /**
1956 Set user variables after connecting to the master.
1957
1958 @param mysql MYSQL to request uuid from master.
1959 @param mi Master_info to set master_uuid
1960
1961 @return 0: Success, 1: Fatal error, 2: Network error.
1962 */
io_thread_init_commands(MYSQL * mysql,Master_info * mi)1963 int io_thread_init_commands(MYSQL *mysql, Master_info *mi)
1964 {
1965 char query[256];
1966 int ret= 0;
1967 DBUG_EXECUTE_IF("fake_5_5_version_slave", return ret;);
1968
1969 sprintf(query, "SET @slave_uuid= '%s'", server_uuid);
1970 if (mysql_real_query(mysql, query, strlen(query))
1971 && !check_io_slave_killed(mi->info_thd, mi, NULL))
1972 goto err;
1973
1974 mysql_free_result(mysql_store_result(mysql));
1975 return ret;
1976
1977 err:
1978 if (mysql_errno(mysql) && is_network_error(mysql_errno(mysql)))
1979 {
1980 mi->report(WARNING_LEVEL, mysql_errno(mysql),
1981 "The initialization command '%s' failed with the following"
1982 " error: '%s'.", query, mysql_error(mysql));
1983 ret= 2;
1984 }
1985 else
1986 {
1987 char errmsg[512];
1988 const char *errmsg_fmt=
1989 "The slave I/O thread stops because a fatal error is encountered "
1990 "when it tries to send query to master(query: %s).";
1991
1992 sprintf(errmsg, errmsg_fmt, query);
1993 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, ER(ER_SLAVE_FATAL_ERROR),
1994 errmsg);
1995 ret= 1;
1996 }
1997 mysql_free_result(mysql_store_result(mysql));
1998 return ret;
1999 }
2000
2001 /**
2002 Get master's uuid on connecting.
2003
2004 @param mysql MYSQL to request uuid from master.
2005 @param mi Master_info to set master_uuid
2006
2007 @return 0: Success, 1: Fatal error, 2: Network error.
2008 */
get_master_uuid(MYSQL * mysql,Master_info * mi)2009 static int get_master_uuid(MYSQL *mysql, Master_info *mi)
2010 {
2011 const char *errmsg;
2012 MYSQL_RES *master_res= NULL;
2013 MYSQL_ROW master_row= NULL;
2014 int ret= 0;
2015
2016 DBUG_EXECUTE_IF("dbug.before_get_MASTER_UUID",
2017 {
2018 const char act[]= "now wait_for signal.get_master_uuid";
2019 DBUG_ASSERT(opt_debug_sync_timeout > 0);
2020 DBUG_ASSERT(!debug_sync_set_action(current_thd,
2021 STRING_WITH_LEN(act)));
2022 };);
2023
2024 DBUG_EXECUTE_IF("dbug.simulate_busy_io",
2025 {
2026 const char act[]= "now signal Reached wait_for signal.got_stop_slave";
2027 DBUG_ASSERT(opt_debug_sync_timeout > 0);
2028 DBUG_ASSERT(!debug_sync_set_action(current_thd,
2029 STRING_WITH_LEN(act)));
2030 };);
2031 if (!mysql_real_query(mysql,
2032 STRING_WITH_LEN("SHOW VARIABLES LIKE 'SERVER_UUID'")) &&
2033 (master_res= mysql_store_result(mysql)) &&
2034 (master_row= mysql_fetch_row(master_res)))
2035 {
2036 if (!strcmp(::server_uuid, master_row[1]) &&
2037 !mi->rli->replicate_same_server_id)
2038 {
2039 errmsg= "The slave I/O thread stops because master and slave have equal "
2040 "MySQL server UUIDs; these UUIDs must be different for "
2041 "replication to work.";
2042 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, ER(ER_SLAVE_FATAL_ERROR),
2043 errmsg);
2044 // Fatal error
2045 ret= 1;
2046 }
2047 else
2048 {
2049 if (mi->master_uuid[0] != 0 && strcmp(mi->master_uuid, master_row[1]))
2050 sql_print_warning("The master's UUID has changed, although this should"
2051 " not happen unless you have changed it manually."
2052 " The old UUID was %s.",
2053 mi->master_uuid);
2054 strncpy(mi->master_uuid, master_row[1], UUID_LENGTH);
2055 mi->master_uuid[UUID_LENGTH]= 0;
2056 }
2057 }
2058 else if (mysql_errno(mysql))
2059 {
2060 if (is_network_error(mysql_errno(mysql)))
2061 {
2062 mi->report(WARNING_LEVEL, mysql_errno(mysql),
2063 "Get master SERVER_UUID failed with error: %s",
2064 mysql_error(mysql));
2065 ret= 2;
2066 }
2067 else
2068 {
2069 /* Fatal error */
2070 errmsg= "The slave I/O thread stops because a fatal error is encountered "
2071 "when it tries to get the value of SERVER_UUID variable from master.";
2072 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, ER(ER_SLAVE_FATAL_ERROR),
2073 errmsg);
2074 ret= 1;
2075 }
2076 }
2077 else if (!master_row && master_res)
2078 {
2079 mi->report(WARNING_LEVEL, ER_UNKNOWN_SYSTEM_VARIABLE,
2080 "Unknown system variable 'SERVER_UUID' on master. "
2081 "A probable cause is that the variable is not supported on the "
2082 "master (version: %s), even though it is on the slave (version: %s)",
2083 mysql->server_version, server_version);
2084 }
2085
2086 if (master_res)
2087 mysql_free_result(master_res);
2088 return ret;
2089 }
2090
2091
2092 /**
2093 Determine, case-sensitively, if short_string is equal to
2094 long_string, or a true prefix of long_string, or not a prefix.
2095
2096 @retval 0 short_string is not a prefix of long_string.
2097 @retval 1 short_string is a true prefix of long_string (not equal).
2098 @retval 2 short_string is equal to long_string.
2099 */
is_str_prefix_case(const char * short_string,const char * long_string)2100 static int is_str_prefix_case(const char *short_string, const char *long_string)
2101 {
2102 int i;
2103 for (i= 0; short_string[i]; i++)
2104 if (my_toupper(system_charset_info, short_string[i]) !=
2105 my_toupper(system_charset_info, long_string[i]))
2106 return 0;
2107 return long_string[i] ? 1 : 2;
2108 }
2109
2110 /*
2111 Note that we rely on the master's version (3.23, 4.0.14 etc) instead of
2112 relying on the binlog's version. This is not perfect: imagine an upgrade
2113 of the master without waiting that all slaves are in sync with the master;
2114 then a slave could be fooled about the binlog's format. This is what happens
2115 when people upgrade a 3.23 master to 4.0 without doing RESET MASTER: 4.0
2116 slaves are fooled. So we do this only to distinguish between 3.23 and more
2117 recent masters (it's too late to change things for 3.23).
2118
2119 RETURNS
2120 0 ok
2121 1 error
2122 2 transient network problem, the caller should try to reconnect
2123 */
2124
get_master_version_and_clock(MYSQL * mysql,Master_info * mi)2125 static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi)
2126 {
2127 char err_buff[MAX_SLAVE_ERRMSG];
2128 const char* errmsg= 0;
2129 int err_code= 0;
2130 int version_number=0;
2131 version_number= atoi(mysql->server_version);
2132
2133 MYSQL_RES *master_res= 0;
2134 MYSQL_ROW master_row;
2135 DBUG_ENTER("get_master_version_and_clock");
2136
2137 /*
2138 Free old mi_description_event (that is needed if we are in
2139 a reconnection).
2140 */
2141 DBUG_EXECUTE_IF("unrecognized_master_version",
2142 {
2143 version_number= 1;
2144 };);
2145 mysql_mutex_lock(&mi->data_lock);
2146 mi->set_mi_description_event(NULL);
2147
2148 if (!my_isdigit(&my_charset_bin,*mysql->server_version))
2149 {
2150 errmsg = "Master reported unrecognized MySQL version";
2151 err_code= ER_SLAVE_FATAL_ERROR;
2152 sprintf(err_buff, ER(err_code), errmsg);
2153 }
2154 else
2155 {
2156 /*
2157 Note the following switch will bug when we have MySQL branch 30 ;)
2158 */
2159 switch (version_number)
2160 {
2161 case 0:
2162 case 1:
2163 case 2:
2164 errmsg = "Master reported unrecognized MySQL version";
2165 err_code= ER_SLAVE_FATAL_ERROR;
2166 sprintf(err_buff, ER(err_code), errmsg);
2167 break;
2168 case 3:
2169 mi->set_mi_description_event(new
2170 Format_description_log_event(1, mysql->server_version));
2171 break;
2172 case 4:
2173 mi->set_mi_description_event(new
2174 Format_description_log_event(3, mysql->server_version));
2175 break;
2176 default:
2177 /*
2178 Master is MySQL >=5.0. Give a default Format_desc event, so that we can
2179 take the early steps (like tests for "is this a 3.23 master") which we
2180 have to take before we receive the real master's Format_desc which will
2181 override this one. Note that the Format_desc we create below is garbage
2182 (it has the format of the *slave*); it's only good to help know if the
2183 master is 3.23, 4.0, etc.
2184 */
2185 mi->set_mi_description_event(new
2186 Format_description_log_event(4, mysql->server_version));
2187 break;
2188 }
2189 }
2190
2191 /*
2192 This does not mean that a 5.0 slave will be able to read a 5.5 master; but
2193 as we don't know yet, we don't want to forbid this for now. If a 5.0 slave
2194 can't read a 5.5 master, this will show up when the slave can't read some
2195 events sent by the master, and there will be error messages.
2196 */
2197
2198 if (errmsg)
2199 {
2200 /* unlock the mutex on master info structure */
2201 mysql_mutex_unlock(&mi->data_lock);
2202 goto err;
2203 }
2204
2205 /* as we are here, we tried to allocate the event */
2206 if (mi->get_mi_description_event() == NULL)
2207 {
2208 mysql_mutex_unlock(&mi->data_lock);
2209 errmsg= "default Format_description_log_event";
2210 err_code= ER_SLAVE_CREATE_EVENT_FAILURE;
2211 sprintf(err_buff, ER(err_code), errmsg);
2212 goto err;
2213 }
2214
2215 if (mi->get_mi_description_event()->binlog_version < 4 &&
2216 opt_slave_sql_verify_checksum)
2217 {
2218 sql_print_warning("Found a master with MySQL server version older than "
2219 "5.0. With checksums enabled on the slave, replication "
2220 "might not work correctly. To ensure correct "
2221 "replication, restart the slave server with "
2222 "--slave_sql_verify_checksum=0.");
2223 }
2224 /*
2225 FD_q's (A) is set initially from RL's (A): FD_q.(A) := RL.(A).
2226 It's necessary to adjust FD_q.(A) at this point because in the following
2227 course FD_q is going to be dumped to RL.
2228 Generally FD_q is derived from a received FD_m (roughly FD_q := FD_m)
2229 in queue_event and the master's (A) is installed.
2230 At one step with the assignment the Relay-Log's checksum alg is set to
2231 a new value: RL.(A) := FD_q.(A). If the slave service is stopped
2232 the last time assigned RL.(A) will be passed over to the restarting
2233 service (to the current execution point).
2234 RL.A is a "codec" to verify checksum in queue_event() almost all the time
2235 the first fake Rotate event.
2236 Starting from this point IO thread will executes the following checksum
2237 warmup sequence of actions:
2238
2239 FD_q.A := RL.A,
2240 A_m^0 := master.@@global.binlog_checksum,
2241 {queue_event(R_f): verifies(R_f, A_m^0)},
2242 {queue_event(FD_m): verifies(FD_m, FD_m.A), dump(FD_q), rotate(RL),
2243 FD_q := FD_m, RL.A := FD_q.A)}
2244
2245 See legends definition on MYSQL_BIN_LOG::relay_log_checksum_alg
2246 docs lines (binlog.h).
2247 In above A_m^0 - the value of master's
2248 @@binlog_checksum determined in the upcoming handshake (stored in
2249 mi->checksum_alg_before_fd).
2250
2251
2252 After the warm-up sequence IO gets to "normal" checksum verification mode
2253 to use RL.A in
2254
2255 {queue_event(E_m): verifies(E_m, RL.A)}
2256
2257 until it has received a new FD_m.
2258 */
2259 mi->get_mi_description_event()->checksum_alg=
2260 mi->rli->relay_log.relay_log_checksum_alg;
2261
2262 DBUG_ASSERT(mi->get_mi_description_event()->checksum_alg !=
2263 BINLOG_CHECKSUM_ALG_UNDEF);
2264 DBUG_ASSERT(mi->rli->relay_log.relay_log_checksum_alg !=
2265 BINLOG_CHECKSUM_ALG_UNDEF);
2266
2267 mysql_mutex_unlock(&mi->data_lock);
2268
2269 /*
2270 Compare the master and slave's clock. Do not die if master's clock is
2271 unavailable (very old master not supporting UNIX_TIMESTAMP()?).
2272 */
2273
2274 DBUG_EXECUTE_IF("dbug.before_get_UNIX_TIMESTAMP",
2275 {
2276 const char act[]=
2277 "now "
2278 "wait_for signal.get_unix_timestamp";
2279 DBUG_ASSERT(opt_debug_sync_timeout > 0);
2280 DBUG_ASSERT(!debug_sync_set_action(current_thd,
2281 STRING_WITH_LEN(act)));
2282 };);
2283
2284 master_res= NULL;
2285 if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT UNIX_TIMESTAMP()")) &&
2286 (master_res= mysql_store_result(mysql)) &&
2287 (master_row= mysql_fetch_row(master_res)))
2288 {
2289 mysql_mutex_lock(&mi->data_lock);
2290 mi->clock_diff_with_master=
2291 (long) (time((time_t*) 0) - strtoul(master_row[0], 0, 10));
2292 mysql_mutex_unlock(&mi->data_lock);
2293 }
2294 else if (check_io_slave_killed(mi->info_thd, mi, NULL))
2295 goto slave_killed_err;
2296 else if (is_network_error(mysql_errno(mysql)))
2297 {
2298 mi->report(WARNING_LEVEL, mysql_errno(mysql),
2299 "Get master clock failed with error: %s", mysql_error(mysql));
2300 goto network_err;
2301 }
2302 else
2303 {
2304 mysql_mutex_lock(&mi->data_lock);
2305 mi->clock_diff_with_master= 0; /* The "most sensible" value */
2306 mysql_mutex_unlock(&mi->data_lock);
2307 sql_print_warning("\"SELECT UNIX_TIMESTAMP()\" failed on master, "
2308 "do not trust column Seconds_Behind_Master of SHOW "
2309 "SLAVE STATUS. Error: %s (%d)",
2310 mysql_error(mysql), mysql_errno(mysql));
2311 }
2312 if (master_res)
2313 {
2314 mysql_free_result(master_res);
2315 master_res= NULL;
2316 }
2317
2318 /*
2319 Check that the master's server id and ours are different. Because if they
2320 are equal (which can result from a simple copy of master's datadir to slave,
2321 thus copying some my.cnf), replication will work but all events will be
2322 skipped.
2323 Do not die if SHOW VARIABLES LIKE 'SERVER_ID' fails on master (very old
2324 master?).
2325 Note: we could have put a @@SERVER_ID in the previous SELECT
2326 UNIX_TIMESTAMP() instead, but this would not have worked on 3.23 masters.
2327 */
2328 DBUG_EXECUTE_IF("dbug.before_get_SERVER_ID",
2329 {
2330 const char act[]=
2331 "now "
2332 "wait_for signal.get_server_id";
2333 DBUG_ASSERT(opt_debug_sync_timeout > 0);
2334 DBUG_ASSERT(!debug_sync_set_action(current_thd,
2335 STRING_WITH_LEN(act)));
2336 };);
2337 master_res= NULL;
2338 master_row= NULL;
2339 if (!mysql_real_query(mysql,
2340 STRING_WITH_LEN("SHOW VARIABLES LIKE 'SERVER_ID'")) &&
2341 (master_res= mysql_store_result(mysql)) &&
2342 (master_row= mysql_fetch_row(master_res)))
2343 {
2344 if ((::server_id == (mi->master_id= strtoul(master_row[1], 0, 10))) &&
2345 !mi->rli->replicate_same_server_id)
2346 {
2347 errmsg= "The slave I/O thread stops because master and slave have equal \
2348 MySQL server ids; these ids must be different for replication to work (or \
2349 the --replicate-same-server-id option must be used on slave but this does \
2350 not always make sense; please check the manual before using it).";
2351 err_code= ER_SLAVE_FATAL_ERROR;
2352 sprintf(err_buff, ER(err_code), errmsg);
2353 goto err;
2354 }
2355 }
2356 else if (mysql_errno(mysql))
2357 {
2358 if (check_io_slave_killed(mi->info_thd, mi, NULL))
2359 goto slave_killed_err;
2360 else if (is_network_error(mysql_errno(mysql)))
2361 {
2362 mi->report(WARNING_LEVEL, mysql_errno(mysql),
2363 "Get master SERVER_ID failed with error: %s", mysql_error(mysql));
2364 goto network_err;
2365 }
2366 /* Fatal error */
2367 errmsg= "The slave I/O thread stops because a fatal error is encountered \
2368 when it try to get the value of SERVER_ID variable from master.";
2369 err_code= mysql_errno(mysql);
2370 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2371 goto err;
2372 }
2373 else if (!master_row && master_res)
2374 {
2375 mi->report(WARNING_LEVEL, ER_UNKNOWN_SYSTEM_VARIABLE,
2376 "Unknown system variable 'SERVER_ID' on master, \
2377 maybe it is a *VERY OLD MASTER*.");
2378 }
2379 if (master_res)
2380 {
2381 mysql_free_result(master_res);
2382 master_res= NULL;
2383 }
2384 if (mi->master_id == 0 && mi->ignore_server_ids->dynamic_ids.elements > 0)
2385 {
2386 errmsg= "Slave configured with server id filtering could not detect the master server id.";
2387 err_code= ER_SLAVE_FATAL_ERROR;
2388 sprintf(err_buff, ER(err_code), errmsg);
2389 goto err;
2390 }
2391
2392 /*
2393 Check that the master's global character_set_server and ours are the same.
2394 Not fatal if query fails (old master?).
2395 Note that we don't check for equality of global character_set_client and
2396 collation_connection (neither do we prevent their setting in
2397 set_var.cc). That's because from what I (Guilhem) have tested, the global
2398 values of these 2 are never used (new connections don't use them).
2399 We don't test equality of global collation_database either as it's is
2400 going to be deprecated (made read-only) in 4.1 very soon.
2401 The test is only relevant if master < 5.0.3 (we'll test only if it's older
2402 than the 5 branch; < 5.0.3 was alpha...), as >= 5.0.3 master stores
2403 charset info in each binlog event.
2404 We don't do it for 3.23 because masters <3.23.50 hang on
2405 SELECT @@unknown_var (BUG#7965 - see changelog of 3.23.50). So finally we
2406 test only if master is 4.x.
2407 */
2408
2409 /* redundant with rest of code but safer against later additions */
2410 if (*mysql->server_version == '3')
2411 goto err;
2412
2413 if (*mysql->server_version == '4')
2414 {
2415 master_res= NULL;
2416 if (!mysql_real_query(mysql,
2417 STRING_WITH_LEN("SELECT @@GLOBAL.COLLATION_SERVER")) &&
2418 (master_res= mysql_store_result(mysql)) &&
2419 (master_row= mysql_fetch_row(master_res)))
2420 {
2421 if (strcmp(master_row[0], global_system_variables.collation_server->name))
2422 {
2423 errmsg= "The slave I/O thread stops because master and slave have \
2424 different values for the COLLATION_SERVER global variable. The values must \
2425 be equal for the Statement-format replication to work";
2426 err_code= ER_SLAVE_FATAL_ERROR;
2427 sprintf(err_buff, ER(err_code), errmsg);
2428 goto err;
2429 }
2430 }
2431 else if (check_io_slave_killed(mi->info_thd, mi, NULL))
2432 goto slave_killed_err;
2433 else if (is_network_error(mysql_errno(mysql)))
2434 {
2435 mi->report(WARNING_LEVEL, mysql_errno(mysql),
2436 "Get master COLLATION_SERVER failed with error: %s", mysql_error(mysql));
2437 goto network_err;
2438 }
2439 else if (mysql_errno(mysql) != ER_UNKNOWN_SYSTEM_VARIABLE)
2440 {
2441 /* Fatal error */
2442 errmsg= "The slave I/O thread stops because a fatal error is encountered \
2443 when it try to get the value of COLLATION_SERVER global variable from master.";
2444 err_code= mysql_errno(mysql);
2445 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2446 goto err;
2447 }
2448 else
2449 mi->report(WARNING_LEVEL, ER_UNKNOWN_SYSTEM_VARIABLE,
2450 "Unknown system variable 'COLLATION_SERVER' on master, \
2451 maybe it is a *VERY OLD MASTER*. *NOTE*: slave may experience \
2452 inconsistency if replicated data deals with collation.");
2453
2454 if (master_res)
2455 {
2456 mysql_free_result(master_res);
2457 master_res= NULL;
2458 }
2459 }
2460
2461 /*
2462 Perform analogous check for time zone. Theoretically we also should
2463 perform check here to verify that SYSTEM time zones are the same on
2464 slave and master, but we can't rely on value of @@system_time_zone
2465 variable (it is time zone abbreviation) since it determined at start
2466 time and so could differ for slave and master even if they are really
2467 in the same system time zone. So we are omiting this check and just
2468 relying on documentation. Also according to Monty there are many users
2469 who are using replication between servers in various time zones. Hence
2470 such check will broke everything for them. (And now everything will
2471 work for them because by default both their master and slave will have
2472 'SYSTEM' time zone).
2473 This check is only necessary for 4.x masters (and < 5.0.4 masters but
2474 those were alpha).
2475 */
2476 if (*mysql->server_version == '4')
2477 {
2478 master_res= NULL;
2479 if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT @@GLOBAL.TIME_ZONE")) &&
2480 (master_res= mysql_store_result(mysql)) &&
2481 (master_row= mysql_fetch_row(master_res)))
2482 {
2483 if (strcmp(master_row[0],
2484 global_system_variables.time_zone->get_name()->ptr()))
2485 {
2486 errmsg= "The slave I/O thread stops because master and slave have \
2487 different values for the TIME_ZONE global variable. The values must \
2488 be equal for the Statement-format replication to work";
2489 err_code= ER_SLAVE_FATAL_ERROR;
2490 sprintf(err_buff, ER(err_code), errmsg);
2491 goto err;
2492 }
2493 }
2494 else if (check_io_slave_killed(mi->info_thd, mi, NULL))
2495 goto slave_killed_err;
2496 else if (is_network_error(mysql_errno(mysql)))
2497 {
2498 mi->report(WARNING_LEVEL, mysql_errno(mysql),
2499 "Get master TIME_ZONE failed with error: %s", mysql_error(mysql));
2500 goto network_err;
2501 }
2502 else
2503 {
2504 /* Fatal error */
2505 errmsg= "The slave I/O thread stops because a fatal error is encountered \
2506 when it try to get the value of TIME_ZONE global variable from master.";
2507 err_code= mysql_errno(mysql);
2508 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2509 goto err;
2510 }
2511 if (master_res)
2512 {
2513 mysql_free_result(master_res);
2514 master_res= NULL;
2515 }
2516 }
2517
2518 if (mi->heartbeat_period != 0.0)
2519 {
2520 char llbuf[22];
2521 const char query_format[]= "SET @master_heartbeat_period= %s";
2522 char query[sizeof(query_format) - 2 + sizeof(llbuf)];
2523 /*
2524 the period is an ulonglong of nano-secs.
2525 */
2526 llstr((ulonglong) (mi->heartbeat_period*1000000000UL), llbuf);
2527 sprintf(query, query_format, llbuf);
2528
2529 if (mysql_real_query(mysql, query, strlen(query)))
2530 {
2531 if (check_io_slave_killed(mi->info_thd, mi, NULL))
2532 goto slave_killed_err;
2533
2534 if (is_network_error(mysql_errno(mysql)))
2535 {
2536 mi->report(WARNING_LEVEL, mysql_errno(mysql),
2537 "SET @master_heartbeat_period to master failed with error: %s",
2538 mysql_error(mysql));
2539 mysql_free_result(mysql_store_result(mysql));
2540 goto network_err;
2541 }
2542 else
2543 {
2544 /* Fatal error */
2545 errmsg= "The slave I/O thread stops because a fatal error is encountered "
2546 " when it tries to SET @master_heartbeat_period on master.";
2547 err_code= ER_SLAVE_FATAL_ERROR;
2548 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2549 mysql_free_result(mysql_store_result(mysql));
2550 goto err;
2551 }
2552 }
2553 mysql_free_result(mysql_store_result(mysql));
2554 }
2555
2556 /*
2557 Querying if master is capable to checksum and notifying it about own
2558 CRC-awareness. The master's side instant value of @@global.binlog_checksum
2559 is stored in the dump thread's uservar area as well as cached locally
2560 to become known in consensus by master and slave.
2561 */
2562 if (DBUG_EVALUATE_IF("simulate_slave_unaware_checksum", 0, 1))
2563 {
2564 int rc;
2565 const char query[]= "SET @master_binlog_checksum= @@global.binlog_checksum";
2566 master_res= NULL;
2567 mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF; //initially undefined
2568 /*
2569 @c checksum_alg_before_fd is queried from master in this block.
2570 If master is old checksum-unaware the value stays undefined.
2571 Once the first FD will be received its alg descriptor will replace
2572 the being queried one.
2573 */
2574 rc= mysql_real_query(mysql, query, strlen(query));
2575 if (rc != 0)
2576 {
2577 mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_OFF;
2578 if (check_io_slave_killed(mi->info_thd, mi, NULL))
2579 goto slave_killed_err;
2580
2581 if (mysql_errno(mysql) == ER_UNKNOWN_SYSTEM_VARIABLE)
2582 {
2583 // this is tolerable as OM -> NS is supported
2584 mi->report(WARNING_LEVEL, mysql_errno(mysql),
2585 "Notifying master by %s failed with "
2586 "error: %s", query, mysql_error(mysql));
2587 }
2588 else
2589 {
2590 if (is_network_error(mysql_errno(mysql)))
2591 {
2592 mi->report(WARNING_LEVEL, mysql_errno(mysql),
2593 "Notifying master by %s failed with "
2594 "error: %s", query, mysql_error(mysql));
2595 mysql_free_result(mysql_store_result(mysql));
2596 goto network_err;
2597 }
2598 else
2599 {
2600 errmsg= "The slave I/O thread stops because a fatal error is encountered "
2601 "when it tried to SET @master_binlog_checksum on master.";
2602 err_code= ER_SLAVE_FATAL_ERROR;
2603 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2604 mysql_free_result(mysql_store_result(mysql));
2605 goto err;
2606 }
2607 }
2608 }
2609 else
2610 {
2611 mysql_free_result(mysql_store_result(mysql));
2612 if (!mysql_real_query(mysql,
2613 STRING_WITH_LEN("SELECT @master_binlog_checksum")) &&
2614 (master_res= mysql_store_result(mysql)) &&
2615 (master_row= mysql_fetch_row(master_res)) &&
2616 (master_row[0] != NULL))
2617 {
2618 mi->checksum_alg_before_fd= (uint8)
2619 find_type(master_row[0], &binlog_checksum_typelib, 1) - 1;
2620
2621 DBUG_EXECUTE_IF("undefined_algorithm_on_slave",
2622 mi->checksum_alg_before_fd = BINLOG_CHECKSUM_ALG_UNDEF;);
2623 if(mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_UNDEF)
2624 {
2625 errmsg= "The slave I/O thread was stopped because a fatal error is encountered "
2626 "The checksum algorithm used by master is unknown to slave.";
2627 err_code= ER_SLAVE_FATAL_ERROR;
2628 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2629 mysql_free_result(mysql_store_result(mysql));
2630 goto err;
2631 }
2632
2633 // valid outcome is either of
2634 DBUG_ASSERT(mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_OFF ||
2635 mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_CRC32);
2636 }
2637 else if (check_io_slave_killed(mi->info_thd, mi, NULL))
2638 goto slave_killed_err;
2639 else if (is_network_error(mysql_errno(mysql)))
2640 {
2641 mi->report(WARNING_LEVEL, mysql_errno(mysql),
2642 "Get master BINLOG_CHECKSUM failed with error: %s", mysql_error(mysql));
2643 goto network_err;
2644 }
2645 else
2646 {
2647 errmsg= "The slave I/O thread stops because a fatal error is encountered "
2648 "when it tried to SELECT @master_binlog_checksum.";
2649 err_code= ER_SLAVE_FATAL_ERROR;
2650 sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2651 mysql_free_result(mysql_store_result(mysql));
2652 goto err;
2653 }
2654 }
2655 if (master_res)
2656 {
2657 mysql_free_result(master_res);
2658 master_res= NULL;
2659 }
2660 }
2661 else
2662 mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_OFF;
2663
2664 if (DBUG_EVALUATE_IF("simulate_slave_unaware_gtid", 0, 1))
2665 {
2666 switch (io_thread_init_command(mi, "SELECT @@GLOBAL.GTID_MODE",
2667 ER_UNKNOWN_SYSTEM_VARIABLE,
2668 &master_res, &master_row))
2669 {
2670 case COMMAND_STATUS_ERROR:
2671 DBUG_RETURN(2);
2672 case COMMAND_STATUS_ALLOWED_ERROR:
2673 // master is old and does not have @@GLOBAL.GTID_MODE
2674 mi->master_gtid_mode= 0;
2675 break;
2676 case COMMAND_STATUS_OK:
2677 const char *master_gtid_mode_string= master_row[0];
2678 bool found_valid_mode= false;
2679 DBUG_EXECUTE_IF("simulate_master_has_gtid_mode_on_permissive",
2680 { master_gtid_mode_string= "on_permissive"; });
2681 DBUG_EXECUTE_IF("simulate_master_has_gtid_mode_off_permissive",
2682 { master_gtid_mode_string= "off_permissive"; });
2683 DBUG_EXECUTE_IF("simulate_master_has_gtid_mode_on_something",
2684 { master_gtid_mode_string= "on_something"; });
2685 DBUG_EXECUTE_IF("simulate_master_has_gtid_mode_off_something",
2686 { master_gtid_mode_string= "off_something"; });
2687 DBUG_EXECUTE_IF("simulate_master_has_unknown_gtid_mode",
2688 { master_gtid_mode_string= "Krakel Spektakel"; });
2689 for (int mode= 0; mode <= 3 && !found_valid_mode; mode+= 3)
2690 {
2691 switch (is_str_prefix_case(gtid_mode_typelib.type_names[mode],
2692 master_gtid_mode_string))
2693 {
2694 case 0: // is not a prefix
2695 break;
2696 case 1: // is a true prefix, i.e. not equal
2697 mi->report(WARNING_LEVEL, ER_UNKNOWN_ERROR,
2698 "The master uses an unknown GTID_MODE '%s'. "
2699 "Treating it as '%s'.",
2700 master_gtid_mode_string,
2701 gtid_mode_typelib.type_names[mode]);
2702 // fall through
2703 case 2: // is equal
2704 found_valid_mode= true;
2705 mi->master_gtid_mode= mode;
2706 break;
2707 }
2708 }
2709 if (!found_valid_mode)
2710 {
2711 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
2712 "The slave IO thread stops because the master has "
2713 "an unknown @@GLOBAL.GTID_MODE '%s'.",
2714 master_gtid_mode_string);
2715 mysql_free_result(master_res);
2716 DBUG_RETURN(1);
2717 }
2718 mysql_free_result(master_res);
2719 break;
2720 }
2721 if (mi->master_gtid_mode > gtid_mode + 1 ||
2722 gtid_mode > mi->master_gtid_mode + 1)
2723 {
2724 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
2725 "The slave IO thread stops because the master has "
2726 "@@GLOBAL.GTID_MODE %s and this server has "
2727 "@@GLOBAL.GTID_MODE %s",
2728 gtid_mode_names[mi->master_gtid_mode],
2729 gtid_mode_names[gtid_mode]);
2730 DBUG_RETURN(1);
2731 }
2732 if (mi->is_auto_position() && mi->master_gtid_mode != 3)
2733 {
2734 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
2735 "The slave IO thread stops because the master has "
2736 "@@GLOBAL.GTID_MODE %s and we are trying to connect "
2737 "using MASTER_AUTO_POSITION.",
2738 gtid_mode_names[mi->master_gtid_mode]);
2739 DBUG_RETURN(1);
2740 }
2741 }
2742
2743 err:
2744 if (errmsg)
2745 {
2746 if (master_res)
2747 mysql_free_result(master_res);
2748 DBUG_ASSERT(err_code != 0);
2749 mi->report(ERROR_LEVEL, err_code, "%s", err_buff);
2750 DBUG_RETURN(1);
2751 }
2752
2753 DBUG_RETURN(0);
2754
2755 network_err:
2756 if (master_res)
2757 mysql_free_result(master_res);
2758 DBUG_RETURN(2);
2759
2760 slave_killed_err:
2761 if (master_res)
2762 mysql_free_result(master_res);
2763 DBUG_RETURN(2);
2764 }
2765
wait_for_relay_log_space(Relay_log_info * rli)2766 static bool wait_for_relay_log_space(Relay_log_info* rli)
2767 {
2768 bool slave_killed=0;
2769 Master_info* mi = rli->mi;
2770 PSI_stage_info old_stage;
2771 THD* thd = mi->info_thd;
2772 DBUG_ENTER("wait_for_relay_log_space");
2773
2774 mysql_mutex_lock(&rli->log_space_lock);
2775 thd->ENTER_COND(&rli->log_space_cond,
2776 &rli->log_space_lock,
2777 &stage_waiting_for_relay_log_space,
2778 &old_stage);
2779 while (rli->log_space_limit < rli->log_space_total &&
2780 !(slave_killed=io_slave_killed(thd,mi)) &&
2781 !rli->ignore_log_space_limit)
2782 mysql_cond_wait(&rli->log_space_cond, &rli->log_space_lock);
2783
2784 /*
2785 Makes the IO thread read only one event at a time
2786 until the SQL thread is able to purge the relay
2787 logs, freeing some space.
2788
2789 Therefore, once the SQL thread processes this next
2790 event, it goes to sleep (no more events in the queue),
2791 sets ignore_log_space_limit=true and wakes the IO thread.
2792 However, this event may have been enough already for
2793 the SQL thread to purge some log files, freeing
2794 rli->log_space_total .
2795
2796 This guarantees that the SQL and IO thread move
2797 forward only one event at a time (to avoid deadlocks),
2798 when the relay space limit is reached. It also
2799 guarantees that when the SQL thread is prepared to
2800 rotate (to be able to purge some logs), the IO thread
2801 will know about it and will rotate.
2802
2803 NOTE: The ignore_log_space_limit is only set when the SQL
2804 thread sleeps waiting for events.
2805
2806 */
2807 if (rli->ignore_log_space_limit)
2808 {
2809 #ifndef DBUG_OFF
2810 {
2811 char llbuf1[22], llbuf2[22];
2812 DBUG_PRINT("info", ("log_space_limit=%s "
2813 "log_space_total=%s "
2814 "ignore_log_space_limit=%d "
2815 "sql_force_rotate_relay=%d",
2816 llstr(rli->log_space_limit,llbuf1),
2817 llstr(rli->log_space_total,llbuf2),
2818 (int) rli->ignore_log_space_limit,
2819 (int) rli->sql_force_rotate_relay));
2820 }
2821 #endif
2822 if (rli->sql_force_rotate_relay)
2823 {
2824 mysql_mutex_lock(&mi->data_lock);
2825 rotate_relay_log(mi, false/*need_log_space_lock=false*/);
2826 mysql_mutex_unlock(&mi->data_lock);
2827 rli->sql_force_rotate_relay= false;
2828 }
2829
2830 rli->ignore_log_space_limit= false;
2831 }
2832
2833 thd->EXIT_COND(&old_stage);
2834 DBUG_RETURN(slave_killed);
2835 }
2836
2837
2838 /*
2839 Builds a Rotate from the ignored events' info and writes it to relay log.
2840
2841 The caller must hold mi->data_lock before invoking this function.
2842
2843 @param thd pointer to I/O Thread's Thd.
2844 @param mi point to I/O Thread metadata class.
2845
2846 @return 0 if everything went fine, 1 otherwise.
2847 */
write_ignored_events_info_to_relay_log(THD * thd,Master_info * mi)2848 static int write_ignored_events_info_to_relay_log(THD *thd, Master_info *mi)
2849 {
2850 Relay_log_info *rli= mi->rli;
2851 mysql_mutex_t *log_lock= rli->relay_log.get_log_lock();
2852 int error= 0;
2853 DBUG_ENTER("write_ignored_events_info_to_relay_log");
2854
2855 DBUG_ASSERT(thd == mi->info_thd);
2856 mysql_mutex_assert_owner(&mi->data_lock);
2857 mysql_mutex_lock(log_lock);
2858 if (rli->ign_master_log_name_end[0])
2859 {
2860 DBUG_PRINT("info",("writing a Rotate event to track down ignored events"));
2861 Rotate_log_event *ev= new Rotate_log_event(rli->ign_master_log_name_end,
2862 0, rli->ign_master_log_pos_end,
2863 Rotate_log_event::DUP_NAME);
2864 if (mi->get_mi_description_event() != NULL)
2865 ev->checksum_alg= mi->get_mi_description_event()->checksum_alg;
2866
2867 rli->ign_master_log_name_end[0]= 0;
2868 /* can unlock before writing as slave SQL thd will soon see our Rotate */
2869 mysql_mutex_unlock(log_lock);
2870 if (likely((bool)ev))
2871 {
2872 ev->server_id= 0; // don't be ignored by slave SQL thread
2873 if (unlikely(rli->relay_log.append_event(ev, mi) != 0))
2874 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
2875 ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
2876 "failed to write a Rotate event"
2877 " to the relay log, SHOW SLAVE STATUS may be"
2878 " inaccurate");
2879 rli->relay_log.harvest_bytes_written(rli, true/*need_log_space_lock=true*/);
2880 if (flush_master_info(mi, TRUE))
2881 {
2882 error= 1;
2883 sql_print_error("Failed to flush master info file.");
2884 }
2885 delete ev;
2886 }
2887 else
2888 {
2889 error= 1;
2890 mi->report(ERROR_LEVEL, ER_SLAVE_CREATE_EVENT_FAILURE,
2891 ER(ER_SLAVE_CREATE_EVENT_FAILURE),
2892 "Rotate_event (out of memory?),"
2893 " SHOW SLAVE STATUS may be inaccurate");
2894 }
2895 }
2896 else
2897 mysql_mutex_unlock(log_lock);
2898
2899 DBUG_RETURN(error);
2900 }
2901
2902
register_slave_on_master(MYSQL * mysql,Master_info * mi,bool * suppress_warnings)2903 int register_slave_on_master(MYSQL* mysql, Master_info *mi,
2904 bool *suppress_warnings)
2905 {
2906 uchar buf[1024], *pos= buf;
2907 uint report_host_len=0, report_user_len=0, report_password_len=0;
2908 DBUG_ENTER("register_slave_on_master");
2909
2910 *suppress_warnings= FALSE;
2911 if (report_host)
2912 report_host_len= strlen(report_host);
2913 if (report_host_len > HOSTNAME_LENGTH)
2914 {
2915 sql_print_warning("The length of report_host is %d. "
2916 "It is larger than the max length(%d), so this "
2917 "slave cannot be registered to the master.",
2918 report_host_len, HOSTNAME_LENGTH);
2919 DBUG_RETURN(0);
2920 }
2921
2922 if (report_user)
2923 report_user_len= strlen(report_user);
2924 if (report_user_len > USERNAME_LENGTH)
2925 {
2926 sql_print_warning("The length of report_user is %d. "
2927 "It is larger than the max length(%d), so this "
2928 "slave cannot be registered to the master.",
2929 report_user_len, USERNAME_LENGTH);
2930 DBUG_RETURN(0);
2931 }
2932
2933 if (report_password)
2934 report_password_len= strlen(report_password);
2935 if (report_password_len > MAX_PASSWORD_LENGTH)
2936 {
2937 sql_print_warning("The length of report_password is %d. "
2938 "It is larger than the max length(%d), so this "
2939 "slave cannot be registered to the master.",
2940 report_password_len, MAX_PASSWORD_LENGTH);
2941 DBUG_RETURN(0);
2942 }
2943
2944 int4store(pos, server_id); pos+= 4;
2945 pos= net_store_data(pos, (uchar*) report_host, report_host_len);
2946 pos= net_store_data(pos, (uchar*) report_user, report_user_len);
2947 pos= net_store_data(pos, (uchar*) report_password, report_password_len);
2948 int2store(pos, (uint16) report_port); pos+= 2;
2949 /*
2950 Fake rpl_recovery_rank, which was removed in BUG#13963,
2951 so that this server can register itself on old servers,
2952 see BUG#49259.
2953 */
2954 int4store(pos, /* rpl_recovery_rank */ 0); pos+= 4;
2955 /* The master will fill in master_id */
2956 int4store(pos, 0); pos+= 4;
2957
2958 if (simple_command(mysql, COM_REGISTER_SLAVE, buf, (size_t) (pos- buf), 0))
2959 {
2960 if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
2961 {
2962 *suppress_warnings= TRUE; // Suppress reconnect warning
2963 }
2964 else if (!check_io_slave_killed(mi->info_thd, mi, NULL))
2965 {
2966 char buf[256];
2967 my_snprintf(buf, sizeof(buf), "%s (Errno: %d)", mysql_error(mysql),
2968 mysql_errno(mysql));
2969 mi->report(ERROR_LEVEL, ER_SLAVE_MASTER_COM_FAILURE,
2970 ER(ER_SLAVE_MASTER_COM_FAILURE), "COM_REGISTER_SLAVE", buf);
2971 }
2972 DBUG_RETURN(1);
2973 }
2974
2975 DBUG_EXECUTE_IF("simulate_register_slave_killed", {
2976 mi->abort_slave = 1;
2977 DBUG_RETURN(1);
2978 };);
2979
2980 DBUG_RETURN(0);
2981 }
2982
2983
2984 /**
2985 Execute a SHOW SLAVE STATUS statement.
2986
2987 @param thd Pointer to THD object for the client thread executing the
2988 statement.
2989
2990 @param mi Pointer to Master_info object for the IO thread.
2991
2992 @retval FALSE success
2993 @retval TRUE failure
2994 */
show_slave_status(THD * thd,Master_info * mi)2995 bool show_slave_status(THD* thd, Master_info* mi)
2996 {
2997 // TODO: fix this for multi-master
2998 List<Item> field_list;
2999 Protocol *protocol= thd->protocol;
3000 char *slave_sql_running_state= NULL;
3001 char *sql_gtid_set_buffer= NULL, *io_gtid_set_buffer= NULL;
3002 int sql_gtid_set_size= 0, io_gtid_set_size= 0;
3003 DBUG_ENTER("show_slave_status");
3004
3005 if (mi != NULL)
3006 {
3007 global_sid_lock->wrlock();
3008 const Gtid_set* sql_gtid_set= gtid_state->get_logged_gtids();
3009 const Gtid_set* io_gtid_set= mi->rli->get_gtid_set();
3010 if ((sql_gtid_set_size= sql_gtid_set->to_string(&sql_gtid_set_buffer)) < 0 ||
3011 (io_gtid_set_size= io_gtid_set->to_string(&io_gtid_set_buffer)) < 0)
3012 {
3013 my_eof(thd);
3014 my_free(sql_gtid_set_buffer);
3015 my_free(io_gtid_set_buffer);
3016 global_sid_lock->unlock();
3017 DBUG_RETURN(true);
3018 }
3019 global_sid_lock->unlock();
3020 }
3021
3022 field_list.push_back(new Item_empty_string("Slave_IO_State",
3023 14));
3024 field_list.push_back(new Item_empty_string("Master_Host", mi != NULL ?
3025 sizeof(mi->host) : 0));
3026 field_list.push_back(new Item_empty_string("Master_User", mi != NULL ?
3027 mi->get_user_size() : 0));
3028 field_list.push_back(new Item_return_int("Master_Port", 7,
3029 MYSQL_TYPE_LONG));
3030 field_list.push_back(new Item_return_int("Connect_Retry", 10,
3031 MYSQL_TYPE_LONG));
3032 field_list.push_back(new Item_empty_string("Master_Log_File",
3033 FN_REFLEN));
3034 field_list.push_back(new Item_return_int("Read_Master_Log_Pos", 10,
3035 MYSQL_TYPE_LONGLONG));
3036 field_list.push_back(new Item_empty_string("Relay_Log_File",
3037 FN_REFLEN));
3038 field_list.push_back(new Item_return_int("Relay_Log_Pos", 10,
3039 MYSQL_TYPE_LONGLONG));
3040 field_list.push_back(new Item_empty_string("Relay_Master_Log_File",
3041 FN_REFLEN));
3042 field_list.push_back(new Item_empty_string("Slave_IO_Running", 3));
3043 field_list.push_back(new Item_empty_string("Slave_SQL_Running", 3));
3044 field_list.push_back(new Item_empty_string("Replicate_Do_DB", 20));
3045 field_list.push_back(new Item_empty_string("Replicate_Ignore_DB", 20));
3046 field_list.push_back(new Item_empty_string("Replicate_Do_Table", 20));
3047 field_list.push_back(new Item_empty_string("Replicate_Ignore_Table", 23));
3048 field_list.push_back(new Item_empty_string("Replicate_Wild_Do_Table", 24));
3049 field_list.push_back(new Item_empty_string("Replicate_Wild_Ignore_Table",
3050 28));
3051 field_list.push_back(new Item_return_int("Last_Errno", 4, MYSQL_TYPE_LONG));
3052 field_list.push_back(new Item_empty_string("Last_Error", 20));
3053 field_list.push_back(new Item_return_int("Skip_Counter", 10,
3054 MYSQL_TYPE_LONG));
3055 field_list.push_back(new Item_return_int("Exec_Master_Log_Pos", 10,
3056 MYSQL_TYPE_LONGLONG));
3057 field_list.push_back(new Item_return_int("Relay_Log_Space", 10,
3058 MYSQL_TYPE_LONGLONG));
3059 field_list.push_back(new Item_empty_string("Until_Condition", 6));
3060 field_list.push_back(new Item_empty_string("Until_Log_File", FN_REFLEN));
3061 field_list.push_back(new Item_return_int("Until_Log_Pos", 10,
3062 MYSQL_TYPE_LONGLONG));
3063 field_list.push_back(new Item_empty_string("Master_SSL_Allowed", 7));
3064 field_list.push_back(new Item_empty_string("Master_SSL_CA_File", mi != NULL ?
3065 sizeof(mi->ssl_ca) : 0));
3066 field_list.push_back(new Item_empty_string("Master_SSL_CA_Path", mi != NULL ?
3067 sizeof(mi->ssl_capath) : 0));
3068 field_list.push_back(new Item_empty_string("Master_SSL_Cert", mi != NULL ?
3069 sizeof(mi->ssl_cert) : 0));
3070 field_list.push_back(new Item_empty_string("Master_SSL_Cipher", mi != NULL ?
3071 sizeof(mi->ssl_cipher) : 0));
3072 field_list.push_back(new Item_empty_string("Master_SSL_Key", mi != NULL ?
3073 sizeof(mi->ssl_key) : 0));
3074 field_list.push_back(new Item_return_int("Seconds_Behind_Master", 10,
3075 MYSQL_TYPE_LONGLONG));
3076 field_list.push_back(new Item_empty_string("Master_SSL_Verify_Server_Cert",
3077 3));
3078 field_list.push_back(new Item_return_int("Last_IO_Errno", 4, MYSQL_TYPE_LONG));
3079 field_list.push_back(new Item_empty_string("Last_IO_Error", 20));
3080 field_list.push_back(new Item_return_int("Last_SQL_Errno", 4, MYSQL_TYPE_LONG));
3081 field_list.push_back(new Item_empty_string("Last_SQL_Error", 20));
3082 field_list.push_back(new Item_empty_string("Replicate_Ignore_Server_Ids",
3083 FN_REFLEN));
3084 field_list.push_back(new Item_return_int("Master_Server_Id", sizeof(ulong),
3085 MYSQL_TYPE_LONG));
3086 field_list.push_back(new Item_empty_string("Master_UUID", UUID_LENGTH));
3087 field_list.push_back(new Item_empty_string("Master_Info_File",
3088 2 * FN_REFLEN));
3089 field_list.push_back(new Item_return_int("SQL_Delay", 10, MYSQL_TYPE_LONG));
3090 field_list.push_back(new Item_return_int("SQL_Remaining_Delay", 8, MYSQL_TYPE_LONG));
3091 field_list.push_back(new Item_empty_string("Slave_SQL_Running_State", 20));
3092 field_list.push_back(new Item_return_int("Master_Retry_Count", 10,
3093 MYSQL_TYPE_LONGLONG));
3094 field_list.push_back(new Item_empty_string("Master_Bind", mi != NULL ?
3095 sizeof(mi->bind_addr) : 0));
3096 field_list.push_back(new Item_empty_string("Last_IO_Error_Timestamp", 20));
3097 field_list.push_back(new Item_empty_string("Last_SQL_Error_Timestamp", 20));
3098 field_list.push_back(new Item_empty_string("Master_SSL_Crl", mi != NULL ?
3099 sizeof(mi->ssl_crl) : 0));
3100 field_list.push_back(new Item_empty_string("Master_SSL_Crlpath", mi != NULL ?
3101 sizeof(mi->ssl_crlpath) : 0));
3102 field_list.push_back(new Item_empty_string("Retrieved_Gtid_Set",
3103 io_gtid_set_size));
3104 field_list.push_back(new Item_empty_string("Executed_Gtid_Set",
3105 sql_gtid_set_size));
3106 field_list.push_back(new Item_return_int("Auto_Position", sizeof(ulong),
3107 MYSQL_TYPE_LONG));
3108
3109 if (protocol->send_result_set_metadata(&field_list,
3110 Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
3111 {
3112 my_free(sql_gtid_set_buffer);
3113 my_free(io_gtid_set_buffer);
3114 DBUG_RETURN(true);
3115 }
3116
3117 if (mi != NULL && mi->host[0])
3118 {
3119 DBUG_PRINT("info",("host is set: '%s'", mi->host));
3120 String *packet= &thd->packet;
3121 protocol->prepare_for_resend();
3122
3123 /*
3124 slave_running can be accessed without run_lock but not other
3125 non-volotile members like mi->info_thd, which is guarded by the mutex.
3126 */
3127 mysql_mutex_lock(&mi->run_lock);
3128 protocol->store(mi->info_thd ? mi->info_thd->get_proc_info() : "", &my_charset_bin);
3129 mysql_mutex_unlock(&mi->run_lock);
3130
3131 mysql_mutex_lock(&mi->rli->run_lock);
3132 slave_sql_running_state= const_cast<char *>(mi->rli->info_thd ? mi->rli->info_thd->get_proc_info() : "");
3133 mysql_mutex_unlock(&mi->rli->run_lock);
3134
3135 mysql_mutex_lock(&mi->data_lock);
3136 mysql_mutex_lock(&mi->rli->data_lock);
3137 mysql_mutex_lock(&mi->err_lock);
3138 mysql_mutex_lock(&mi->rli->err_lock);
3139
3140 DEBUG_SYNC(thd, "wait_after_lock_active_mi_and_rli_data_lock_is_acquired");
3141 protocol->store(mi->host, &my_charset_bin);
3142 protocol->store(mi->get_user(), &my_charset_bin);
3143 protocol->store((uint32) mi->port);
3144 protocol->store((uint32) mi->connect_retry);
3145 protocol->store(mi->get_master_log_name(), &my_charset_bin);
3146 protocol->store((ulonglong) mi->get_master_log_pos());
3147 protocol->store(mi->rli->get_group_relay_log_name() +
3148 dirname_length(mi->rli->get_group_relay_log_name()),
3149 &my_charset_bin);
3150 protocol->store((ulonglong) mi->rli->get_group_relay_log_pos());
3151 protocol->store(mi->rli->get_group_master_log_name(), &my_charset_bin);
3152 protocol->store(mi->slave_running == MYSQL_SLAVE_RUN_CONNECT ?
3153 "Yes" : (mi->slave_running == MYSQL_SLAVE_RUN_NOT_CONNECT ?
3154 "Connecting" : "No"), &my_charset_bin);
3155 protocol->store(mi->rli->slave_running ? "Yes":"No", &my_charset_bin);
3156 protocol->store(rpl_filter->get_do_db());
3157 protocol->store(rpl_filter->get_ignore_db());
3158
3159 char buf[256];
3160 String tmp(buf, sizeof(buf), &my_charset_bin);
3161 rpl_filter->get_do_table(&tmp);
3162 protocol->store(&tmp);
3163 rpl_filter->get_ignore_table(&tmp);
3164 protocol->store(&tmp);
3165 rpl_filter->get_wild_do_table(&tmp);
3166 protocol->store(&tmp);
3167 rpl_filter->get_wild_ignore_table(&tmp);
3168 protocol->store(&tmp);
3169
3170 protocol->store(mi->rli->last_error().number);
3171 protocol->store(mi->rli->last_error().message, &my_charset_bin);
3172 protocol->store((uint32) mi->rli->slave_skip_counter);
3173 protocol->store((ulonglong) mi->rli->get_group_master_log_pos());
3174 protocol->store((ulonglong) mi->rli->log_space_total);
3175
3176 const char *until_type= "";
3177
3178 switch (mi->rli->until_condition)
3179 {
3180 case Relay_log_info::UNTIL_NONE:
3181 until_type= "None";
3182 break;
3183 case Relay_log_info::UNTIL_MASTER_POS:
3184 until_type= "Master";
3185 break;
3186 case Relay_log_info::UNTIL_RELAY_POS:
3187 until_type= "Relay";
3188 break;
3189 case Relay_log_info::UNTIL_SQL_BEFORE_GTIDS:
3190 until_type= "SQL_BEFORE_GTIDS";
3191 break;
3192 case Relay_log_info::UNTIL_SQL_AFTER_GTIDS:
3193 until_type= "SQL_AFTER_GTIDS";
3194 break;
3195 case Relay_log_info::UNTIL_SQL_AFTER_MTS_GAPS:
3196 until_type= "SQL_AFTER_MTS_GAPS";
3197 case Relay_log_info::UNTIL_DONE:
3198 until_type= "DONE";
3199 break;
3200 default:
3201 DBUG_ASSERT(0);
3202 }
3203 protocol->store(until_type, &my_charset_bin);
3204 protocol->store(mi->rli->until_log_name, &my_charset_bin);
3205 protocol->store((ulonglong) mi->rli->until_log_pos);
3206
3207 #ifdef HAVE_OPENSSL
3208 protocol->store(mi->ssl? "Yes":"No", &my_charset_bin);
3209 #else
3210 protocol->store(mi->ssl? "Ignored":"No", &my_charset_bin);
3211 #endif
3212 protocol->store(mi->ssl_ca, &my_charset_bin);
3213 protocol->store(mi->ssl_capath, &my_charset_bin);
3214 protocol->store(mi->ssl_cert, &my_charset_bin);
3215 protocol->store(mi->ssl_cipher, &my_charset_bin);
3216 protocol->store(mi->ssl_key, &my_charset_bin);
3217
3218 /*
3219 The pseudo code to compute Seconds_Behind_Master:
3220 if (SQL thread is running)
3221 {
3222 if (SQL thread processed all the available relay log)
3223 {
3224 if (IO thread is running)
3225 print 0;
3226 else
3227 print NULL;
3228 }
3229 else
3230 compute Seconds_Behind_Master;
3231 }
3232 else
3233 print NULL;
3234 */
3235 if (mi->rli->slave_running)
3236 {
3237 /* Check if SQL thread is at the end of relay log
3238 Checking should be done using two conditions
3239 condition1: compare the log positions and
3240 condition2: compare the file names (to handle rotation case)
3241 */
3242 if ((mi->get_master_log_pos() == mi->rli->get_group_master_log_pos()) &&
3243 (!strcmp(mi->get_master_log_name(), mi->rli->get_group_master_log_name())))
3244 {
3245 if (mi->slave_running == MYSQL_SLAVE_RUN_CONNECT)
3246 protocol->store(0LL);
3247 else
3248 protocol->store_null();
3249 }
3250 else
3251 {
3252 long time_diff= ((long)(time(0) - mi->rli->last_master_timestamp)
3253 - mi->clock_diff_with_master);
3254 /*
3255 Apparently on some systems time_diff can be <0. Here are possible
3256 reasons related to MySQL:
3257 - the master is itself a slave of another master whose time is ahead.
3258 - somebody used an explicit SET TIMESTAMP on the master.
3259 Possible reason related to granularity-to-second of time functions
3260 (nothing to do with MySQL), which can explain a value of -1:
3261 assume the master's and slave's time are perfectly synchronized, and
3262 that at slave's connection time, when the master's timestamp is read,
3263 it is at the very end of second 1, and (a very short time later) when
3264 the slave's timestamp is read it is at the very beginning of second
3265 2. Then the recorded value for master is 1 and the recorded value for
3266 slave is 2. At SHOW SLAVE STATUS time, assume that the difference
3267 between timestamp of slave and rli->last_master_timestamp is 0
3268 (i.e. they are in the same second), then we get 0-(2-1)=-1 as a result.
3269 This confuses users, so we don't go below 0: hence the max().
3270
3271 last_master_timestamp == 0 (an "impossible" timestamp 1970) is a
3272 special marker to say "consider we have caught up".
3273 */
3274 protocol->store((longlong)(mi->rli->last_master_timestamp ?
3275 max(0L, time_diff) : 0));
3276 }
3277 }
3278 else
3279 {
3280 protocol->store_null();
3281 }
3282 protocol->store(mi->ssl_verify_server_cert? "Yes":"No", &my_charset_bin);
3283
3284 // Last_IO_Errno
3285 protocol->store(mi->last_error().number);
3286 // Last_IO_Error
3287 protocol->store(mi->last_error().message, &my_charset_bin);
3288 // Last_SQL_Errno
3289 protocol->store(mi->rli->last_error().number);
3290 // Last_SQL_Error
3291 protocol->store(mi->rli->last_error().message, &my_charset_bin);
3292 // Replicate_Ignore_Server_Ids
3293 {
3294 char buff[FN_REFLEN];
3295 ulong i, cur_len;
3296 for (i= 0, buff[0]= 0, cur_len= 0;
3297 i < mi->ignore_server_ids->dynamic_ids.elements; i++)
3298 {
3299 ulong s_id, slen;
3300 char sbuff[FN_REFLEN];
3301 get_dynamic(&(mi->ignore_server_ids->dynamic_ids), (uchar*) &s_id, i);
3302 slen= sprintf(sbuff, (i == 0 ? "%lu" : ", %lu"), s_id);
3303 if (cur_len + slen + 4 > FN_REFLEN)
3304 {
3305 /*
3306 break the loop whenever remained space could not fit
3307 ellipses on the next cycle
3308 */
3309 sprintf(buff + cur_len, "...");
3310 break;
3311 }
3312 cur_len += sprintf(buff + cur_len, "%s", sbuff);
3313 }
3314 protocol->store(buff, &my_charset_bin);
3315 }
3316 // Master_Server_id
3317 protocol->store((uint32) mi->master_id);
3318 protocol->store(mi->master_uuid, &my_charset_bin);
3319 // Master_Info_File
3320 protocol->store(mi->get_description_info(), &my_charset_bin);
3321 // SQL_Delay
3322 protocol->store((uint32) mi->rli->get_sql_delay());
3323 // SQL_Remaining_Delay
3324 if (slave_sql_running_state == stage_sql_thd_waiting_until_delay.m_name)
3325 {
3326 time_t t= my_time(0), sql_delay_end= mi->rli->get_sql_delay_end();
3327 protocol->store((uint32)(t < sql_delay_end ? sql_delay_end - t : 0));
3328 }
3329 else
3330 protocol->store_null();
3331 // Slave_SQL_Running_State
3332 protocol->store(slave_sql_running_state, &my_charset_bin);
3333 // Master_Retry_Count
3334 protocol->store((ulonglong) mi->retry_count);
3335 // Master_Bind
3336 protocol->store(mi->bind_addr, &my_charset_bin);
3337 // Last_IO_Error_Timestamp
3338 protocol->store(mi->last_error().timestamp, &my_charset_bin);
3339 // Last_SQL_Error_Timestamp
3340 protocol->store(mi->rli->last_error().timestamp, &my_charset_bin);
3341 // Master_Ssl_Crl
3342 protocol->store(mi->ssl_crl, &my_charset_bin);
3343 // Master_Ssl_Crlpath
3344 protocol->store(mi->ssl_crlpath, &my_charset_bin);
3345 // Retrieved_Gtid_Set
3346 protocol->store(io_gtid_set_buffer, &my_charset_bin);
3347 // Executed_Gtid_Set
3348 protocol->store(sql_gtid_set_buffer, &my_charset_bin);
3349 // Auto_Position
3350 protocol->store(mi->is_auto_position() ? 1 : 0);
3351
3352 mysql_mutex_unlock(&mi->rli->err_lock);
3353 mysql_mutex_unlock(&mi->err_lock);
3354 mysql_mutex_unlock(&mi->rli->data_lock);
3355 mysql_mutex_unlock(&mi->data_lock);
3356
3357 if (my_net_write(&thd->net, (uchar*) thd->packet.ptr(), packet->length()))
3358 {
3359 my_free(sql_gtid_set_buffer);
3360 my_free(io_gtid_set_buffer);
3361 DBUG_RETURN(true);
3362 }
3363 }
3364 my_eof(thd);
3365 my_free(sql_gtid_set_buffer);
3366 my_free(io_gtid_set_buffer);
3367 DBUG_RETURN(false);
3368 }
3369
3370
set_slave_thread_options(THD * thd)3371 void set_slave_thread_options(THD* thd)
3372 {
3373 DBUG_ENTER("set_slave_thread_options");
3374 /*
3375 It's nonsense to constrain the slave threads with max_join_size; if a
3376 query succeeded on master, we HAVE to execute it. So set
3377 OPTION_BIG_SELECTS. Setting max_join_size to HA_POS_ERROR is not enough
3378 (and it's not needed if we have OPTION_BIG_SELECTS) because an INSERT
3379 SELECT examining more than 4 billion rows would still fail (yes, because
3380 when max_join_size is 4G, OPTION_BIG_SELECTS is automatically set, but
3381 only for client threads.
3382 */
3383 ulonglong options= thd->variables.option_bits | OPTION_BIG_SELECTS;
3384 if (opt_log_slave_updates)
3385 options|= OPTION_BIN_LOG;
3386 else
3387 options&= ~OPTION_BIN_LOG;
3388 thd->variables.option_bits= options;
3389 thd->variables.completion_type= 0;
3390
3391 /*
3392 Set autocommit= 1 when info tables are used and autocommit == 0 to
3393 avoid trigger asserts on mysql_execute_command(THD *thd) caused by
3394 info tables updates which do not commit, like Rotate, Stop and
3395 skipped events handling.
3396 */
3397 if (is_autocommit_off_and_infotables(thd))
3398 {
3399 thd->variables.option_bits|= OPTION_AUTOCOMMIT;
3400 thd->variables.option_bits&= ~OPTION_NOT_AUTOCOMMIT;
3401 thd->server_status|= SERVER_STATUS_AUTOCOMMIT;
3402 }
3403
3404 DBUG_VOID_RETURN;
3405 }
3406
set_slave_thread_default_charset(THD * thd,Relay_log_info const * rli)3407 void set_slave_thread_default_charset(THD* thd, Relay_log_info const *rli)
3408 {
3409 DBUG_ENTER("set_slave_thread_default_charset");
3410
3411 thd->variables.character_set_client=
3412 global_system_variables.character_set_client;
3413 thd->variables.collation_connection=
3414 global_system_variables.collation_connection;
3415 thd->variables.collation_server=
3416 global_system_variables.collation_server;
3417 thd->update_charset();
3418
3419 /*
3420 We use a const cast here since the conceptual (and externally
3421 visible) behavior of the function is to set the default charset of
3422 the thread. That the cache has to be invalidated is a secondary
3423 effect.
3424 */
3425 const_cast<Relay_log_info*>(rli)->cached_charset_invalidate();
3426 DBUG_VOID_RETURN;
3427 }
3428
3429 /*
3430 init_slave_thread()
3431 */
3432
init_slave_thread(THD * thd,SLAVE_THD_TYPE thd_type)3433 static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
3434 {
3435 DBUG_ENTER("init_slave_thread");
3436 #if !defined(DBUG_OFF)
3437 int simulate_error= 0;
3438 #endif
3439 thd->system_thread= (thd_type == SLAVE_THD_WORKER) ?
3440 SYSTEM_THREAD_SLAVE_WORKER : (thd_type == SLAVE_THD_SQL) ?
3441 SYSTEM_THREAD_SLAVE_SQL : SYSTEM_THREAD_SLAVE_IO;
3442 thd->security_ctx->skip_grants();
3443 my_net_init(&thd->net, 0);
3444 thd->slave_thread = 1;
3445 thd->enable_slow_log= opt_log_slow_slave_statements;
3446 set_slave_thread_options(thd);
3447 mysql_mutex_lock(&LOCK_thread_count);
3448 thd->thread_id= thd->variables.pseudo_thread_id= thread_id++;
3449 mysql_mutex_unlock(&LOCK_thread_count);
3450
3451 DBUG_EXECUTE_IF("simulate_io_slave_error_on_init",
3452 simulate_error|= (1 << SLAVE_THD_IO););
3453 DBUG_EXECUTE_IF("simulate_sql_slave_error_on_init",
3454 simulate_error|= (1 << SLAVE_THD_SQL););
3455 #if !defined(DBUG_OFF)
3456 if (init_thr_lock() || thd->store_globals() || simulate_error & (1<< thd_type))
3457 #else
3458 if (init_thr_lock() || thd->store_globals())
3459 #endif
3460 {
3461 DBUG_RETURN(-1);
3462 }
3463
3464 if (thd_type == SLAVE_THD_SQL)
3465 {
3466 THD_STAGE_INFO(thd, stage_waiting_for_the_next_event_in_relay_log);
3467 }
3468 else
3469 {
3470 THD_STAGE_INFO(thd, stage_waiting_for_master_update);
3471 }
3472 thd->set_time();
3473 /* Do not use user-supplied timeout value for system threads. */
3474 thd->variables.lock_wait_timeout= LONG_TIMEOUT;
3475 DBUG_RETURN(0);
3476 }
3477
3478
3479 /**
3480 Sleep for a given amount of time or until killed.
3481
3482 @param thd Thread context of the current thread.
3483 @param seconds The number of seconds to sleep.
3484 @param func Function object to check if the thread has been killed.
3485 @param info The Rpl_info object associated with this sleep.
3486
3487 @retval True if the thread has been killed, false otherwise.
3488 */
3489 template <typename killed_func, typename rpl_info>
slave_sleep(THD * thd,time_t seconds,killed_func func,rpl_info info)3490 static inline bool slave_sleep(THD *thd, time_t seconds,
3491 killed_func func, rpl_info info)
3492 {
3493 bool ret;
3494 struct timespec abstime;
3495 mysql_mutex_t *lock= &info->sleep_lock;
3496 mysql_cond_t *cond= &info->sleep_cond;
3497
3498 /* Absolute system time at which the sleep time expires. */
3499 set_timespec(abstime, seconds);
3500
3501 mysql_mutex_lock(lock);
3502 thd->ENTER_COND(cond, lock, NULL, NULL);
3503
3504 while (! (ret= func(thd, info)))
3505 {
3506 int error= mysql_cond_timedwait(cond, lock, &abstime);
3507 if (error == ETIMEDOUT || error == ETIME)
3508 break;
3509 }
3510
3511 /* Implicitly unlocks the mutex. */
3512 thd->EXIT_COND(NULL);
3513
3514 return ret;
3515 }
3516
request_dump(THD * thd,MYSQL * mysql,Master_info * mi,bool * suppress_warnings)3517 static int request_dump(THD *thd, MYSQL* mysql, Master_info* mi,
3518 bool *suppress_warnings)
3519 {
3520 DBUG_ENTER("request_dump");
3521
3522 const int BINLOG_NAME_INFO_SIZE= strlen(mi->get_master_log_name());
3523 int error= 1;
3524 size_t command_size= 0;
3525 enum_server_command command= mi->is_auto_position() ?
3526 COM_BINLOG_DUMP_GTID : COM_BINLOG_DUMP;
3527 uchar* command_buffer= NULL;
3528 ushort binlog_flags= 0;
3529
3530 if (RUN_HOOK(binlog_relay_io,
3531 before_request_transmit,
3532 (thd, mi, binlog_flags)))
3533 goto err;
3534
3535 *suppress_warnings= false;
3536 if (command == COM_BINLOG_DUMP_GTID)
3537 {
3538 // get set of GTIDs
3539 Sid_map sid_map(NULL/*no lock needed*/);
3540 Gtid_set gtid_executed(&sid_map);
3541 global_sid_lock->wrlock();
3542 gtid_state->dbug_print();
3543
3544 /*
3545 We are unsure whether I/O thread retrieved the last gtid transaction
3546 completely or not (before it is going down because of a crash/normal
3547 shutdown/normal stop slave io_thread). It is possible that I/O thread
3548 would have retrieved and written only partial transaction events. So We
3549 request Master to send the last gtid event once again. We do this by
3550 removing the last I/O thread retrieved gtid event from
3551 "Retrieved_gtid_set". Possible cases: 1) I/O thread would have
3552 retrieved full transaction already in the first time itself, but
3553 retrieving them again will not cause problem because GTID number is
3554 same, Hence SQL thread will not commit it again. 2) I/O thread would
3555 have retrieved full transaction already and SQL thread would have
3556 already executed it. In that case, We are not going remove last
3557 retrieved gtid from "Retrieved_gtid_set" otherwise we will see gaps in
3558 "Retrieved set". The same case is handled in the below code. Please
3559 note there will be paritial transactions written in relay log but they
3560 will not cause any problem incase of transactional tables. But incase
3561 of non-transaction tables, partial trx will create inconsistency
3562 between master and slave. In that case, users need to check manually.
3563 */
3564
3565 Gtid_set * retrieved_set= (const_cast<Gtid_set *>(mi->rli->get_gtid_set()));
3566 Gtid *last_retrieved_gtid= mi->rli->get_last_retrieved_gtid();
3567
3568 /*
3569 Remove last_retrieved_gtid only if it is not part of
3570 executed_gtid_set
3571 */
3572 if (!last_retrieved_gtid->empty() &&
3573 !gtid_state->get_logged_gtids()->contains_gtid(*last_retrieved_gtid))
3574 {
3575 if (retrieved_set->_remove_gtid(*last_retrieved_gtid) != RETURN_STATUS_OK)
3576 {
3577 global_sid_lock->unlock();
3578 goto err;
3579 }
3580 }
3581
3582 if (gtid_executed.add_gtid_set(mi->rli->get_gtid_set()) != RETURN_STATUS_OK ||
3583 gtid_executed.add_gtid_set(gtid_state->get_logged_gtids()) !=
3584 RETURN_STATUS_OK)
3585 {
3586 global_sid_lock->unlock();
3587 goto err;
3588 }
3589 global_sid_lock->unlock();
3590
3591 // allocate buffer
3592 size_t encoded_data_size= gtid_executed.get_encoded_length();
3593 size_t allocation_size=
3594 ::BINLOG_FLAGS_INFO_SIZE + ::BINLOG_SERVER_ID_INFO_SIZE +
3595 ::BINLOG_NAME_SIZE_INFO_SIZE + BINLOG_NAME_INFO_SIZE +
3596 ::BINLOG_POS_INFO_SIZE + ::BINLOG_DATA_SIZE_INFO_SIZE +
3597 encoded_data_size + 1;
3598 if (!(command_buffer= (uchar *) my_malloc(allocation_size, MYF(MY_WME))))
3599 goto err;
3600 uchar* ptr_buffer= command_buffer;
3601
3602 DBUG_PRINT("info", ("Do I know something about the master? (binary log's name %s - auto position %d).",
3603 mi->get_master_log_name(), mi->is_auto_position()));
3604 /*
3605 Note: binlog_flags is always 0. However, in versions up to 5.6
3606 RC, the master would check the lowest bit and do something
3607 unexpected if it was set; in early versions of 5.6 it would also
3608 use the two next bits. Therefore, for backward compatibility,
3609 if we ever start to use the flags, we should leave the three
3610 lowest bits unused.
3611 */
3612 int2store(ptr_buffer, binlog_flags);
3613 ptr_buffer+= ::BINLOG_FLAGS_INFO_SIZE;
3614 int4store(ptr_buffer, server_id);
3615 ptr_buffer+= ::BINLOG_SERVER_ID_INFO_SIZE;
3616 int4store(ptr_buffer, BINLOG_NAME_INFO_SIZE);
3617 ptr_buffer+= ::BINLOG_NAME_SIZE_INFO_SIZE;
3618 memset(ptr_buffer, 0, BINLOG_NAME_INFO_SIZE);
3619 ptr_buffer+= BINLOG_NAME_INFO_SIZE;
3620 int8store(ptr_buffer, 4LL);
3621 ptr_buffer+= ::BINLOG_POS_INFO_SIZE;
3622
3623 int4store(ptr_buffer, encoded_data_size);
3624 ptr_buffer+= ::BINLOG_DATA_SIZE_INFO_SIZE;
3625 gtid_executed.encode(ptr_buffer);
3626 ptr_buffer+= encoded_data_size;
3627
3628 command_size= ptr_buffer - command_buffer;
3629 DBUG_ASSERT(command_size == (allocation_size - 1));
3630 }
3631 else
3632 {
3633 size_t allocation_size= ::BINLOG_POS_OLD_INFO_SIZE +
3634 BINLOG_NAME_INFO_SIZE + ::BINLOG_FLAGS_INFO_SIZE +
3635 ::BINLOG_SERVER_ID_INFO_SIZE + 1;
3636 if (!(command_buffer= (uchar *) my_malloc(allocation_size, MYF(MY_WME))))
3637 goto err;
3638 uchar* ptr_buffer= command_buffer;
3639
3640 int4store(ptr_buffer, mi->get_master_log_pos());
3641 ptr_buffer+= ::BINLOG_POS_OLD_INFO_SIZE;
3642 // See comment regarding binlog_flags above.
3643 int2store(ptr_buffer, binlog_flags);
3644 ptr_buffer+= ::BINLOG_FLAGS_INFO_SIZE;
3645 int4store(ptr_buffer, server_id);
3646 ptr_buffer+= ::BINLOG_SERVER_ID_INFO_SIZE;
3647 memcpy(ptr_buffer, mi->get_master_log_name(), BINLOG_NAME_INFO_SIZE);
3648 ptr_buffer+= BINLOG_NAME_INFO_SIZE;
3649
3650 command_size= ptr_buffer - command_buffer;
3651 DBUG_ASSERT(command_size == (allocation_size - 1));
3652 }
3653
3654 if (simple_command(mysql, command, command_buffer, command_size, 1))
3655 {
3656 /*
3657 Something went wrong, so we will just reconnect and retry later
3658 in the future, we should do a better error analysis, but for
3659 now we just fill up the error log :-)
3660 */
3661 if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
3662 *suppress_warnings= true; // Suppress reconnect warning
3663 else
3664 sql_print_error("Error on %s: %d %s, will retry in %d secs",
3665 command_name[command].str,
3666 mysql_errno(mysql), mysql_error(mysql),
3667 mi->connect_retry);
3668 goto err;
3669 }
3670 error= 0;
3671
3672 err:
3673 my_free(command_buffer);
3674 DBUG_RETURN(error);
3675 }
3676
3677
3678 /*
3679 Read one event from the master
3680
3681 SYNOPSIS
3682 read_event()
3683 mysql MySQL connection
3684 mi Master connection information
3685 suppress_warnings TRUE when a normal net read timeout has caused us to
3686 try a reconnect. We do not want to print anything to
3687 the error log in this case because this a anormal
3688 event in an idle server.
3689
3690 RETURN VALUES
3691 'packet_error' Error
3692 number Length of packet
3693 */
3694
read_event(MYSQL * mysql,Master_info * mi,bool * suppress_warnings)3695 static ulong read_event(MYSQL* mysql, Master_info *mi, bool* suppress_warnings)
3696 {
3697 ulong len;
3698 DBUG_ENTER("read_event");
3699
3700 *suppress_warnings= FALSE;
3701 /*
3702 my_real_read() will time us out
3703 We check if we were told to die, and if not, try reading again
3704 */
3705 #ifndef DBUG_OFF
3706 if (disconnect_slave_event_count && !(mi->events_until_exit--))
3707 DBUG_RETURN(packet_error);
3708 #endif
3709
3710 len = cli_safe_read(mysql);
3711 if (len == packet_error || (long) len < 1)
3712 {
3713 if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
3714 {
3715 /*
3716 We are trying a normal reconnect after a read timeout;
3717 we suppress prints to .err file as long as the reconnect
3718 happens without problems
3719 */
3720 *suppress_warnings= TRUE;
3721 }
3722 else
3723 {
3724 if (!mi->abort_slave)
3725 {
3726 sql_print_error("Error reading packet from server: %s (server_errno=%d)",
3727 mysql_error(mysql), mysql_errno(mysql));
3728 }
3729 }
3730 DBUG_RETURN(packet_error);
3731 }
3732
3733 /* Check if eof packet */
3734 if (len < 8 && mysql->net.read_pos[0] == 254)
3735 {
3736 sql_print_information("Slave: received end packet from server due to dump "
3737 "thread being killed on master. Dump threads are "
3738 "killed for example during master shutdown, "
3739 "explicitly by a user, or when the master receives "
3740 "a binlog send request from a duplicate server "
3741 "UUID <%s> : Error %s", ::server_uuid,
3742 mysql_error(mysql));
3743 DBUG_RETURN(packet_error);
3744 }
3745
3746 DBUG_PRINT("exit", ("len: %lu net->read_pos[4]: %d",
3747 len, mysql->net.read_pos[4]));
3748 DBUG_RETURN(len - 1);
3749 }
3750
3751
3752 /**
3753 If this is a lagging slave (specified with CHANGE MASTER TO MASTER_DELAY = X), delays accordingly. Also unlocks rli->data_lock.
3754
3755 Design note: this is the place to unlock rli->data_lock. The lock
3756 must be held when reading delay info from rli, but it should not be
3757 held while sleeping.
3758
3759 @param ev Event that is about to be executed.
3760
3761 @param thd The sql thread's THD object.
3762
3763 @param rli The sql thread's Relay_log_info structure.
3764
3765 @retval 0 If the delay timed out and the event shall be executed.
3766
3767 @retval nonzero If the delay was interrupted and the event shall be skipped.
3768 */
sql_delay_event(Log_event * ev,THD * thd,Relay_log_info * rli)3769 static int sql_delay_event(Log_event *ev, THD *thd, Relay_log_info *rli)
3770 {
3771 long sql_delay= rli->get_sql_delay();
3772
3773 DBUG_ENTER("sql_delay_event");
3774 mysql_mutex_assert_owner(&rli->data_lock);
3775 DBUG_ASSERT(!rli->belongs_to_client());
3776
3777 int type= ev->get_type_code();
3778 if (sql_delay && type != ROTATE_EVENT &&
3779 type != FORMAT_DESCRIPTION_EVENT && type != START_EVENT_V3)
3780 {
3781 // The time when we should execute the event.
3782 time_t sql_delay_end=
3783 ev->when.tv_sec + rli->mi->clock_diff_with_master + sql_delay;
3784 // The current time.
3785 time_t now= my_time(0);
3786 // The time we will have to sleep before executing the event.
3787 unsigned long nap_time= 0;
3788 if (sql_delay_end > now)
3789 nap_time= sql_delay_end - now;
3790
3791 DBUG_PRINT("info", ("sql_delay= %lu "
3792 "ev->when= %lu "
3793 "rli->mi->clock_diff_with_master= %lu "
3794 "now= %ld "
3795 "sql_delay_end= %ld "
3796 "nap_time= %ld",
3797 sql_delay, (long) ev->when.tv_sec,
3798 rli->mi->clock_diff_with_master,
3799 (long)now, (long)sql_delay_end, (long)nap_time));
3800
3801 if (sql_delay_end > now)
3802 {
3803 DBUG_PRINT("info", ("delaying replication event %lu secs",
3804 nap_time));
3805 rli->start_sql_delay(sql_delay_end);
3806 mysql_mutex_unlock(&rli->data_lock);
3807 DBUG_RETURN(slave_sleep(thd, nap_time, sql_slave_killed, rli));
3808 }
3809 }
3810
3811 mysql_mutex_unlock(&rli->data_lock);
3812
3813 DBUG_RETURN(0);
3814 }
3815
3816 /**
3817 a sort_dynamic function on ulong type
3818 returns as specified by @c qsort_cmp
3819 */
ulong_cmp(ulong * id1,ulong * id2)3820 int ulong_cmp(ulong *id1, ulong *id2)
3821 {
3822 return *id1 < *id2? -1 : (*id1 > *id2? 1 : 0);
3823 }
3824
3825 /**
3826 Applies the given event and advances the relay log position.
3827
3828 This is needed by the sql thread to execute events from the binlog,
3829 and by clients executing BINLOG statements. Conceptually, this
3830 function does:
3831
3832 @code
3833 ev->apply_event(rli);
3834 ev->update_pos(rli);
3835 @endcode
3836
3837 It also does the following maintainance:
3838
3839 - Initializes the thread's server_id and time; and the event's
3840 thread.
3841
3842 - If !rli->belongs_to_client() (i.e., if it belongs to the slave
3843 sql thread instead of being used for executing BINLOG
3844 statements), it does the following things: (1) skips events if it
3845 is needed according to the server id or slave_skip_counter; (2)
3846 unlocks rli->data_lock; (3) sleeps if required by 'CHANGE MASTER
3847 TO MASTER_DELAY=X'; (4) maintains the running state of the sql
3848 thread (rli->thread_state).
3849
3850 - Reports errors as needed.
3851
3852 @param ptr_ev a pointer to a reference to the event to apply.
3853
3854 @param thd The client thread that executes the event (i.e., the
3855 slave sql thread if called from a replication slave, or the client
3856 thread if called to execute a BINLOG statement).
3857
3858 @param rli The relay log info (i.e., the slave's rli if called from
3859 a replication slave, or the client's thd->rli_fake if called to
3860 execute a BINLOG statement).
3861
3862 @note MTS can store NULL to @c ptr_ev location to indicate
3863 the event is taken over by a Worker.
3864
3865 @retval SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK
3866 OK.
3867
3868 @retval SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPLY_ERROR
3869 Error calling ev->apply_event().
3870
3871 @retval SLAVE_APPLY_EVENT_AND_UPDATE_POS_UPDATE_POS_ERROR
3872 No error calling ev->apply_event(), but error calling
3873 ev->update_pos().
3874
3875 @retval SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPEND_JOB_ERROR
3876 append_item_to_jobs() failed, thread was killed while waiting
3877 for successful enqueue on worker.
3878 */
3879 enum enum_slave_apply_event_and_update_pos_retval
apply_event_and_update_pos(Log_event ** ptr_ev,THD * thd,Relay_log_info * rli)3880 apply_event_and_update_pos(Log_event** ptr_ev, THD* thd, Relay_log_info* rli)
3881 {
3882 int exec_res= 0;
3883 bool skip_event= FALSE;
3884 Log_event *ev= *ptr_ev;
3885 Log_event::enum_skip_reason reason= Log_event::EVENT_SKIP_NOT;
3886
3887 DBUG_ENTER("apply_event_and_update_pos");
3888
3889 DBUG_PRINT("exec_event",("%s(type_code: %d; server_id: %d)",
3890 ev->get_type_str(), ev->get_type_code(),
3891 ev->server_id));
3892 DBUG_PRINT("info", ("thd->options: %s%s; rli->last_event_start_time: %lu",
3893 FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
3894 FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
3895 (ulong) rli->last_event_start_time));
3896
3897 /*
3898 Execute the event to change the database and update the binary
3899 log coordinates, but first we set some data that is needed for
3900 the thread.
3901
3902 The event will be executed unless it is supposed to be skipped.
3903
3904 Queries originating from this server must be skipped. Low-level
3905 events (Format_description_log_event, Rotate_log_event,
3906 Stop_log_event) from this server must also be skipped. But for
3907 those we don't want to modify 'group_master_log_pos', because
3908 these events did not exist on the master.
3909 Format_description_log_event is not completely skipped.
3910
3911 Skip queries specified by the user in 'slave_skip_counter'. We
3912 can't however skip events that has something to do with the log
3913 files themselves.
3914
3915 Filtering on own server id is extremely important, to ignore
3916 execution of events created by the creation/rotation of the relay
3917 log (remember that now the relay log starts with its Format_desc,
3918 has a Rotate etc).
3919 */
3920 /*
3921 Set the unmasked and actual server ids from the event
3922 */
3923 thd->server_id = ev->server_id; // use the original server id for logging
3924 thd->unmasked_server_id = ev->unmasked_server_id;
3925 thd->set_time(); // time the query
3926 thd->lex->current_select= 0;
3927 if (!ev->when.tv_sec)
3928 my_micro_time_to_timeval(my_micro_time(), &ev->when);
3929 ev->thd = thd; // because up to this point, ev->thd == 0
3930
3931 if (!(rli->is_mts_recovery() && bitmap_is_set(&rli->recovery_groups,
3932 rli->mts_recovery_index)))
3933 {
3934 reason= ev->shall_skip(rli);
3935 }
3936 #ifndef DBUG_OFF
3937 if (rli->is_mts_recovery())
3938 {
3939 DBUG_PRINT("mts", ("Mts is recovering %d, number of bits set %d, "
3940 "bitmap is set %d, index %lu.\n",
3941 rli->is_mts_recovery(),
3942 bitmap_bits_set(&rli->recovery_groups),
3943 bitmap_is_set(&rli->recovery_groups,
3944 rli->mts_recovery_index),
3945 rli->mts_recovery_index));
3946 }
3947 #endif
3948 #ifdef WITH_WSREP
3949 if (wsrep_preordered_opt && WSREP_ON &&
3950 (ev->get_type_code() == QUERY_EVENT ||
3951 ev->get_type_code() == XID_EVENT ||
3952 ev->get_type_code() == TABLE_MAP_EVENT ||
3953 ev->get_type_code() == WRITE_ROWS_EVENT ||
3954 ev->get_type_code() == UPDATE_ROWS_EVENT ||
3955 ev->get_type_code() == DELETE_ROWS_EVENT ||
3956 ev->get_type_code() == GTID_LOG_EVENT))
3957 {
3958 if (ev->get_type_code() == GTID_LOG_EVENT)
3959 {
3960 thd->wsrep_po_sid= *((Gtid_log_event*)ev)->get_sid();
3961 }
3962 wsrep_status_t err;
3963 if (thd->wsrep_po_cnt == 0)
3964 {
3965 /* First event in write set, write format description event
3966 as a write set header so that the receiver will know how
3967 to interpret following events. */
3968 Log_event* fde= rli->get_rli_description_event();
3969 ulong len= uint4korr(fde->temp_buf + EVENT_LEN_OFFSET);
3970 wsrep_buf_t data= {fde->temp_buf, len};
3971 if ((err= wsrep->preordered_collect(
3972 wsrep, &thd->wsrep_po_handle, &data, 1, true)) != WSREP_OK)
3973 {
3974 WSREP_ERROR("wsrep preordered collect failed: %d", err);
3975 if (err == WSREP_TRX_FAIL &&
3976 wsrep->preordered_commit(wsrep, &thd->wsrep_po_handle, NULL,
3977 0, 0, false))
3978 {
3979 WSREP_WARN("failed to cancel preordered write set");
3980 }
3981 DBUG_RETURN(SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPLY_ERROR);
3982 }
3983 }
3984 ++thd->wsrep_po_cnt;
3985 ulong len= uint4korr(ev->temp_buf + EVENT_LEN_OFFSET);
3986 wsrep_buf_t data= {ev->temp_buf, len};
3987 if ((err= wsrep->preordered_collect(wsrep, &thd->wsrep_po_handle,
3988 &data, 1, 1)) != WSREP_OK)
3989 {
3990 WSREP_ERROR("wsrep preordered collect failed: %d", err);
3991 DBUG_RETURN(SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPLY_ERROR);
3992 }
3993
3994 if (ev->get_type_code() == QUERY_EVENT &&
3995 ((Query_log_event*)ev)->starts_group())
3996 {
3997 thd->wsrep_po_in_trans= TRUE;
3998 }
3999 else if (ev->get_type_code() == XID_EVENT ||
4000 (ev->get_type_code() == QUERY_EVENT &&
4001 (thd->wsrep_po_in_trans == FALSE ||
4002 ((Query_log_event*)ev)->ends_group())))
4003 {
4004 int flags= WSREP_FLAG_COMMIT | (thd->wsrep_po_in_trans == FALSE ?
4005 WSREP_FLAG_ISOLATION : 0);
4006 thd->wsrep_po_in_trans= FALSE;
4007 thd->wsrep_po_cnt= 0;
4008 wsrep_uuid_t source;
4009 memcpy(source.data, thd->wsrep_po_sid.bytes, sizeof(source.data));
4010 if ((err= wsrep->preordered_commit(wsrep, &thd->wsrep_po_handle,
4011 &source, flags, 1, true)) != WSREP_OK)
4012 {
4013 WSREP_ERROR("failed to commit preordered event: %d", err);
4014 DBUG_RETURN(SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPLY_ERROR);
4015 }
4016 }
4017 reason= Log_event::EVENT_SKIP_IGNORE;
4018 skip_event= TRUE;
4019 }
4020 else if (WSREP_ON && (ev->get_type_code() == XID_EVENT ||
4021 (ev->get_type_code() == QUERY_EVENT && thd->wsrep_mysql_replicated > 0 &&
4022 (!strncasecmp(((Query_log_event*)ev)->query , "BEGIN", 5) ||
4023 !strncasecmp(((Query_log_event*)ev)->query , "COMMIT", 6) ))))
4024 {
4025 if (++thd->wsrep_mysql_replicated < (int)wsrep_mysql_replication_bundle)
4026 {
4027 WSREP_DEBUG("skipping wsrep commit %d", thd->wsrep_mysql_replicated);
4028 reason = Log_event::EVENT_SKIP_IGNORE;
4029 }
4030 else
4031 {
4032 thd->wsrep_mysql_replicated = 0;
4033 }
4034 }
4035 #endif /* WITH_WSREP */
4036 if (reason == Log_event::EVENT_SKIP_COUNT)
4037 {
4038 sql_slave_skip_counter= --rli->slave_skip_counter;
4039 skip_event= TRUE;
4040 }
4041 if (reason == Log_event::EVENT_SKIP_NOT)
4042 {
4043 // Sleeps if needed, and unlocks rli->data_lock.
4044 if (sql_delay_event(ev, thd, rli))
4045 DBUG_RETURN(SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK);
4046
4047 exec_res= ev->apply_event(rli);
4048 DBUG_EXECUTE_IF("simulate_stop_when_mts_in_group",
4049 if (rli->mts_group_status == Relay_log_info::MTS_IN_GROUP
4050 && rli->curr_group_seen_begin)
4051 DBUG_SET("+d,stop_when_mts_in_group"););
4052 #ifdef WITH_WSREP
4053 if (WSREP_ON && exec_res)
4054 {
4055 mysql_mutex_lock(&thd->LOCK_wsrep_thd);
4056 switch(thd->wsrep_conflict_state) {
4057 case NO_CONFLICT: break;
4058 case MUST_REPLAY:
4059 /* this transaction will be replayed,
4060 so not raising slave error here */
4061 WSREP_DEBUG("SQL apply failed for MUST_REPLAY, res %d", exec_res);
4062 wsrep_replay_transaction(thd);
4063 switch (thd->wsrep_conflict_state) {
4064 case NO_CONFLICT:
4065 exec_res = 0; /* replaying succeeded, and slave may continue */
4066 break;
4067 case ABORTED:
4068 WSREP_WARN("aborted result of slave transaction replaying: %lu, %d",
4069 thd->thread_id, thd->wsrep_conflict_state);
4070 break; /* replaying has failed, trx is rolled back */
4071 default:
4072 WSREP_WARN("unexpected result of slave transaction replaying: %lu, %d",
4073 thd->thread_id, thd->wsrep_conflict_state);
4074 }
4075
4076 break;
4077 default:
4078 WSREP_DEBUG("SQL apply failed, res %d conflict state: %d",
4079 exec_res, thd->wsrep_conflict_state);
4080 rli->abort_slave= 1;
4081 rli->report(ERROR_LEVEL, ER_UNKNOWN_COM_ERROR,
4082 "Node has dropped from cluster");
4083 break;
4084 }
4085 mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
4086 }
4087 #endif
4088
4089 if (!exec_res && (ev->worker != rli))
4090 {
4091 if (ev->worker)
4092 {
4093 Slave_job_item item= {ev}, *job_item= &item;
4094 Slave_worker *w= (Slave_worker *) ev->worker;
4095 // specially marked group typically with OVER_MAX_DBS_IN_EVENT_MTS db:s
4096 bool need_sync= ev->is_mts_group_isolated();
4097
4098 // all events except BEGIN-query must be marked with a non-NULL Worker
4099 DBUG_ASSERT(((Slave_worker*) ev->worker) == rli->last_assigned_worker);
4100
4101 DBUG_PRINT("Log_event::apply_event:",
4102 ("-> job item data %p to W_%lu", job_item->data, w->id));
4103
4104 // Reset mts in-group state
4105 if (rli->mts_group_status == Relay_log_info::MTS_END_GROUP)
4106 {
4107 // CGAP cleanup
4108 for (uint i= rli->curr_group_assigned_parts.elements; i > 0; i--)
4109 delete_dynamic_element(&rli->
4110 curr_group_assigned_parts, i - 1);
4111 // reset the B-group and Gtid-group marker
4112 rli->curr_group_seen_begin= rli->curr_group_seen_gtid= false;
4113 rli->last_assigned_worker= NULL;
4114 }
4115 /*
4116 Stroring GAQ index of the group that the event belongs to
4117 in the event. Deferred events are handled similarly below.
4118 */
4119 ev->mts_group_idx= rli->gaq->assigned_group_index;
4120
4121 bool append_item_to_jobs_error= false;
4122 if (rli->curr_group_da.elements > 0)
4123 {
4124 /*
4125 the current event sorted out which partion the current group
4126 belongs to. It's time now to processed deferred array events.
4127 */
4128 for (uint i= 0; i < rli->curr_group_da.elements; i++)
4129 {
4130 Slave_job_item da_item;
4131 get_dynamic(&rli->curr_group_da, (uchar*) &da_item.data, i);
4132 DBUG_PRINT("mts", ("Assigning job %llu to worker %lu",
4133 ((Log_event* )da_item.data)->log_pos, w->id));
4134 static_cast<Log_event*>(da_item.data)->mts_group_idx=
4135 rli->gaq->assigned_group_index; // similarly to above
4136 if (!append_item_to_jobs_error)
4137 append_item_to_jobs_error= append_item_to_jobs(&da_item, w, rli);
4138 if (append_item_to_jobs_error)
4139 delete static_cast<Log_event*>(da_item.data);
4140 }
4141 if (rli->curr_group_da.elements > rli->curr_group_da.max_element)
4142 {
4143 // reallocate to less mem
4144 rli->curr_group_da.elements= rli->curr_group_da.max_element;
4145 rli->curr_group_da.max_element= 0;
4146 freeze_size(&rli->curr_group_da); // restores max_element
4147 }
4148 rli->curr_group_da.elements= 0;
4149 }
4150 if (append_item_to_jobs_error)
4151 DBUG_RETURN(SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPEND_JOB_ERROR);
4152
4153 DBUG_PRINT("mts", ("Assigning job %llu to worker %lu\n",
4154 ((Log_event* )job_item->data)->log_pos, w->id));
4155
4156 /* Notice `ev' instance can be destoyed after `append()' */
4157 if (append_item_to_jobs(job_item, w, rli))
4158 DBUG_RETURN(SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPEND_JOB_ERROR);
4159 if (need_sync)
4160 {
4161 /*
4162 combination of over-max db:s and end of the current group
4163 forces to wait for the assigned groups completion by assigned
4164 to the event worker.
4165 Indeed MTS group status could be safely set to MTS_NOT_IN_GROUP
4166 after wait_() returns.
4167 No need to know a possible error out of synchronization call.
4168 */
4169 (void) wait_for_workers_to_finish(rli);
4170 }
4171
4172 }
4173 *ptr_ev= NULL; // announcing the event is passed to w-worker
4174
4175 if (log_warnings > 1 &&
4176 rli->is_parallel_exec() && rli->mts_events_assigned % 1024 == 1)
4177 {
4178 time_t my_now= my_time(0);
4179
4180 if ((my_now - rli->mts_last_online_stat) >=
4181 mts_online_stat_period)
4182 {
4183 sql_print_information("Multi-threaded slave statistics: "
4184 "seconds elapsed = %lu; "
4185 "events assigned = %llu; "
4186 "worker queues filled over overrun level = %lu; "
4187 "waited due a Worker queue full = %lu; "
4188 "waited due the total size = %lu; "
4189 "slept when Workers occupied = %lu ",
4190 static_cast<unsigned long>
4191 (my_now - rli->mts_last_online_stat),
4192 rli->mts_events_assigned,
4193 rli->mts_wq_overrun_cnt,
4194 rli->mts_wq_overfill_cnt,
4195 rli->wq_size_waits_cnt,
4196 rli->mts_wq_no_underrun_cnt);
4197 rli->mts_last_online_stat= my_now;
4198 }
4199 }
4200 }
4201 }
4202 else
4203 mysql_mutex_unlock(&rli->data_lock);
4204
4205 DBUG_PRINT("info", ("apply_event error = %d", exec_res));
4206 if (exec_res == 0)
4207 {
4208 /*
4209 Positions are not updated here when an XID is processed. To make
4210 a slave crash-safe, positions must be updated while processing a
4211 XID event and as such do not need to be updated here again.
4212
4213 However, if the event needs to be skipped, this means that it
4214 will not be processed and then positions need to be updated here.
4215
4216 See sql/rpl_rli.h for further details.
4217 */
4218 int error= 0;
4219 if (*ptr_ev &&
4220 (ev->get_type_code() != XID_EVENT ||
4221 skip_event || (rli->is_mts_recovery() && !is_gtid_event(ev) &&
4222 (ev->ends_group() || !rli->mts_recovery_group_seen_begin) &&
4223 bitmap_is_set(&rli->recovery_groups, rli->mts_recovery_index))))
4224 {
4225 #ifndef DBUG_OFF
4226 /*
4227 This only prints information to the debug trace.
4228
4229 TODO: Print an informational message to the error log?
4230 */
4231 static const char *const explain[] = {
4232 // EVENT_SKIP_NOT,
4233 "not skipped",
4234 // EVENT_SKIP_IGNORE,
4235 "skipped because event should be ignored",
4236 // EVENT_SKIP_COUNT
4237 "skipped because event skip counter was non-zero"
4238 };
4239 DBUG_PRINT("info", ("OPTION_BEGIN: %d; IN_STMT: %d",
4240 MY_TEST(thd->variables.option_bits & OPTION_BEGIN),
4241 rli->get_flag(Relay_log_info::IN_STMT)));
4242 DBUG_PRINT("skip_event", ("%s event was %s",
4243 ev->get_type_str(), explain[reason]));
4244 #endif
4245
4246 error= ev->update_pos(rli);
4247
4248 #ifndef DBUG_OFF
4249 DBUG_PRINT("info", ("update_pos error = %d", error));
4250 if (!rli->belongs_to_client())
4251 {
4252 char buf[22];
4253 DBUG_PRINT("info", ("group %s %s",
4254 llstr(rli->get_group_relay_log_pos(), buf),
4255 rli->get_group_relay_log_name()));
4256 DBUG_PRINT("info", ("event %s %s",
4257 llstr(rli->get_event_relay_log_pos(), buf),
4258 rli->get_event_relay_log_name()));
4259 }
4260 #endif
4261 }
4262 else
4263 {
4264 DBUG_ASSERT(*ptr_ev == ev || rli->is_parallel_exec() ||
4265 (!ev->worker &&
4266 (ev->get_type_code() == INTVAR_EVENT ||
4267 ev->get_type_code() == RAND_EVENT ||
4268 ev->get_type_code() == USER_VAR_EVENT)));
4269
4270 rli->inc_event_relay_log_pos();
4271 }
4272
4273 if (!error && rli->is_mts_recovery() &&
4274 ev->get_type_code() != ROTATE_EVENT &&
4275 ev->get_type_code() != FORMAT_DESCRIPTION_EVENT &&
4276 ev->get_type_code() != PREVIOUS_GTIDS_LOG_EVENT)
4277 {
4278 if (ev->starts_group())
4279 {
4280 rli->mts_recovery_group_seen_begin= true;
4281 }
4282 else if ((ev->ends_group() || !rli->mts_recovery_group_seen_begin) &&
4283 !is_gtid_event(ev))
4284 {
4285 rli->mts_recovery_index++;
4286 if (--rli->mts_recovery_group_cnt == 0)
4287 {
4288 rli->mts_recovery_index= 0;
4289 sql_print_information("Slave: MTS Recovery has completed at "
4290 "relay log %s, position %llu "
4291 "master log %s, position %llu.",
4292 rli->get_group_relay_log_name(),
4293 rli->get_group_relay_log_pos(),
4294 rli->get_group_master_log_name(),
4295 rli->get_group_master_log_pos());
4296 /*
4297 Few tests wait for UNTIL_SQL_AFTER_MTS_GAPS completion.
4298 Due to exisiting convention the status won't change
4299 prior to slave restarts.
4300 So making of UNTIL_SQL_AFTER_MTS_GAPS completion isdone here,
4301 and only in the debug build to make the test to catch the change
4302 despite a faulty design of UNTIL checking before execution.
4303 */
4304 if (rli->until_condition == Relay_log_info::UNTIL_SQL_AFTER_MTS_GAPS)
4305 {
4306 rli->until_condition= Relay_log_info::UNTIL_DONE;
4307 }
4308 // reset the Worker tables to remove last slave session time info
4309 if ((error= rli->mts_finalize_recovery()))
4310 {
4311 (void) Rpl_info_factory::reset_workers(rli);
4312 }
4313 }
4314 rli->mts_recovery_group_seen_begin= false;
4315 if (!error)
4316 error= rli->flush_info(true);
4317 }
4318 }
4319
4320 if (error)
4321 {
4322 /*
4323 The update should not fail, so print an error message and
4324 return an error code.
4325
4326 TODO: Replace this with a decent error message when merged
4327 with BUG#24954 (which adds several new error message).
4328 */
4329 char buf[22];
4330 rli->report(ERROR_LEVEL, ER_UNKNOWN_ERROR,
4331 "It was not possible to update the positions"
4332 " of the relay log information: the slave may"
4333 " be in an inconsistent state."
4334 " Stopped in %s position %s",
4335 rli->get_group_relay_log_name(),
4336 llstr(rli->get_group_relay_log_pos(), buf));
4337 DBUG_RETURN(SLAVE_APPLY_EVENT_AND_UPDATE_POS_UPDATE_POS_ERROR);
4338 }
4339 }
4340
4341 DBUG_RETURN(exec_res ? SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPLY_ERROR
4342 : SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK);
4343 }
4344
4345 /**
4346 Let the worker applying the current group to rollback and gracefully
4347 finish its work before.
4348
4349 @param rli The slave's relay log info.
4350
4351 @param ev a pointer to the event on hold before applying this rollback
4352 procedure.
4353
4354 @retval false The rollback succeeded.
4355
4356 @retval true There was an error while injecting events.
4357 */
coord_handle_partial_binlogged_transaction(Relay_log_info * rli,const Log_event * ev)4358 static bool coord_handle_partial_binlogged_transaction(Relay_log_info *rli,
4359 const Log_event *ev)
4360 {
4361 DBUG_ENTER("coord_handle_partial_binlogged_transaction");
4362 /*
4363 This function is called holding the rli->data_lock.
4364 We must return it still holding this lock, except in the case of returning
4365 error.
4366 */
4367 mysql_mutex_assert_owner(&rli->data_lock);
4368 THD *thd= rli->info_thd;
4369
4370 if (!rli->curr_group_seen_begin)
4371 {
4372 DBUG_PRINT("info",("Injecting QUERY(BEGIN) to rollback worker"));
4373 Log_event *begin_event= new Query_log_event(thd,
4374 STRING_WITH_LEN("BEGIN"),
4375 true, /* using_trans */
4376 false, /* immediate */
4377 true, /* suppress_use */
4378 0, /* error */
4379 true /* ignore_command */);
4380 ((Query_log_event*) begin_event)->db= "";
4381 begin_event->data_written= 0;
4382 begin_event->server_id= ev->server_id;
4383 /*
4384 We must be careful to avoid SQL thread increasing its position
4385 farther than the event that triggered this QUERY(BEGIN).
4386 */
4387 begin_event->log_pos= ev->log_pos;
4388 begin_event->future_event_relay_log_pos= ev->future_event_relay_log_pos;
4389
4390 if (apply_event_and_update_pos(&begin_event, thd, rli) !=
4391 SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK)
4392 {
4393 delete begin_event;
4394 DBUG_RETURN(true);
4395 }
4396 mysql_mutex_lock(&rli->data_lock);
4397 }
4398
4399 DBUG_PRINT("info",("Injecting QUERY(ROLLBACK) to rollback worker"));
4400 Log_event *rollback_event= new Query_log_event(thd,
4401 STRING_WITH_LEN("ROLLBACK"),
4402 true, /* using_trans */
4403 false, /* immediate */
4404 true, /* suppress_use */
4405 0, /* error */
4406 true /* ignore_command */);
4407 ((Query_log_event*) rollback_event)->db= "";
4408 rollback_event->data_written= 0;
4409 rollback_event->server_id= ev->server_id;
4410 /*
4411 We must be careful to avoid SQL thread increasing its position
4412 farther than the event that triggered this QUERY(ROLLBACK).
4413 */
4414 rollback_event->log_pos= ev->log_pos;
4415 rollback_event->future_event_relay_log_pos= ev->future_event_relay_log_pos;
4416
4417 if (apply_event_and_update_pos(&rollback_event, thd, rli) !=
4418 SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK)
4419 {
4420 delete rollback_event;
4421 DBUG_RETURN(true);
4422 }
4423 mysql_mutex_lock(&rli->data_lock);
4424
4425 DBUG_RETURN(false);
4426 }
4427
4428 /**
4429 Top-level function for executing the next event in the relay log.
4430 This is called from the SQL thread.
4431
4432 This function reads the event from the relay log, executes it, and
4433 advances the relay log position. It also handles errors, etc.
4434
4435 This function may fail to apply the event for the following reasons:
4436
4437 - The position specfied by the UNTIL condition of the START SLAVE
4438 command is reached.
4439
4440 - It was not possible to read the event from the log.
4441
4442 - The slave is killed.
4443
4444 - An error occurred when applying the event, and the event has been
4445 tried slave_trans_retries times. If the event has been retried
4446 fewer times, 0 is returned.
4447
4448 - init_info or init_relay_log_pos failed. (These are called
4449 if a failure occurs when applying the event.)
4450
4451 - An error occurred when updating the binlog position.
4452
4453 @retval 0 The event was applied.
4454
4455 @retval 1 The event was not applied.
4456 */
exec_relay_log_event(THD * thd,Relay_log_info * rli)4457 static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
4458 {
4459 DBUG_ENTER("exec_relay_log_event");
4460
4461 /*
4462 We acquire this mutex since we need it for all operations except
4463 event execution. But we will release it in places where we will
4464 wait for something for example inside of next_event().
4465 */
4466 mysql_mutex_lock(&rli->data_lock);
4467
4468 /*
4469 UNTIL_SQL_AFTER_GTIDS requires special handling since we have to check
4470 whether the until_condition is satisfied *before* the SQL threads goes on
4471 a wait inside next_event() for the relay log to grow. This is reuired since
4472 if we have already applied the last event in the waiting set but since he
4473 check happens only at the start of the next event we may end up waiting
4474 forever the next event is not available or is delayed.
4475 */
4476 if (rli->until_condition == Relay_log_info::UNTIL_SQL_AFTER_GTIDS &&
4477 rli->is_until_satisfied(thd, NULL))
4478 {
4479 rli->abort_slave= 1;
4480 mysql_mutex_unlock(&rli->data_lock);
4481 DBUG_RETURN(1);
4482 }
4483
4484 Log_event *ev = next_event(rli), **ptr_ev;
4485
4486 DBUG_ASSERT(rli->info_thd==thd);
4487
4488 if (sql_slave_killed(thd,rli))
4489 {
4490 mysql_mutex_unlock(&rli->data_lock);
4491 delete ev;
4492 DBUG_RETURN(1);
4493 }
4494 if (ev)
4495 {
4496 enum enum_slave_apply_event_and_update_pos_retval exec_res;
4497
4498 ptr_ev= &ev;
4499 /*
4500 Even if we don't execute this event, we keep the master timestamp,
4501 so that seconds behind master shows correct delta (there are events
4502 that are not replayed, so we keep falling behind).
4503
4504 If it is an artificial event, or a relay log event (IO thread generated
4505 event) or ev->when is set to 0, or a FD from master, or a heartbeat
4506 event with server_id '0' then we don't update the last_master_timestamp.
4507 */
4508 if (!(rli->is_parallel_exec() ||
4509 ev->is_artificial_event() || ev->is_relay_log_event() ||
4510 ev->when.tv_sec == 0 || ev->get_type_code() == FORMAT_DESCRIPTION_EVENT ||
4511 ev->server_id == 0))
4512 {
4513 rli->last_master_timestamp= ev->when.tv_sec + (time_t) ev->exec_time;
4514 DBUG_ASSERT(rli->last_master_timestamp >= 0);
4515 }
4516
4517 /*
4518 This tests if the position of the beginning of the current event
4519 hits the UNTIL barrier.
4520 MTS: since the master and the relay-group coordinates change
4521 asynchronously logics of rli->is_until_satisfied() can't apply.
4522 A special UNTIL_SQL_AFTER_MTS_GAPS is still deployed here
4523 temporarily (see is_until_satisfied todo).
4524 */
4525 if (rli->until_condition != Relay_log_info::UNTIL_NONE &&
4526 rli->until_condition != Relay_log_info::UNTIL_SQL_AFTER_GTIDS &&
4527 rli->is_until_satisfied(thd, ev))
4528 {
4529 /*
4530 Setting abort_slave flag because we do not want additional message about
4531 error in query execution to be printed.
4532 */
4533 rli->abort_slave= 1;
4534 mysql_mutex_unlock(&rli->data_lock);
4535 delete ev;
4536 DBUG_RETURN(1);
4537 }
4538
4539 { /**
4540 The following failure injecion works in cooperation with tests
4541 setting @@global.debug= 'd,incomplete_group_in_relay_log'.
4542 Xid or Commit events are not executed to force the slave sql
4543 read hanging if the realy log does not have any more events.
4544 */
4545 DBUG_EXECUTE_IF("incomplete_group_in_relay_log",
4546 if ((ev->get_type_code() == XID_EVENT) ||
4547 ((ev->get_type_code() == QUERY_EVENT) &&
4548 strcmp("COMMIT", ((Query_log_event *) ev)->query) == 0))
4549 {
4550 DBUG_ASSERT(thd->transaction.all.cannot_safely_rollback());
4551 rli->abort_slave= 1;
4552 mysql_mutex_unlock(&rli->data_lock);
4553 delete ev;
4554 rli->inc_event_relay_log_pos();
4555 DBUG_RETURN(0);
4556 };);
4557 }
4558
4559 /*
4560 GTID protocol will put a FORMAT_DESCRIPTION_EVENT from the master with
4561 log_pos != 0 after each (re)connection if auto positioning is enabled.
4562 This means that the SQL thread might have already started to apply the
4563 current group but, as the IO thread had to reconnect, it left this
4564 group incomplete and will start it again from the beginning.
4565 So, before applying this FORMAT_DESCRIPTION_EVENT, we must let the
4566 worker roll back the current group and gracefully finish its work,
4567 before starting to apply the new (complete) copy of the group.
4568 */
4569 if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
4570 ev->server_id != ::server_id && ev->log_pos != 0 &&
4571 rli->is_parallel_exec() && rli->curr_group_seen_gtid)
4572 {
4573 if (coord_handle_partial_binlogged_transaction(rli, ev))
4574 /*
4575 In the case of an error, coord_handle_partial_binlogged_transaction
4576 will not try to get the rli->data_lock again.
4577 */
4578 DBUG_RETURN(1);
4579 }
4580
4581 /* ptr_ev can change to NULL indicating MTS coorinator passed to a Worker */
4582 exec_res= apply_event_and_update_pos(ptr_ev, thd, rli);
4583 /*
4584 Note: the above call to apply_event_and_update_pos executes
4585 mysql_mutex_unlock(&rli->data_lock);
4586 */
4587
4588 /* For deferred events, the ptr_ev is set to NULL
4589 in Deferred_log_events::add() function.
4590 Hence deferred events wont be deleted here.
4591 They will be deleted in Deferred_log_events::rewind() funciton.
4592 */
4593 if (*ptr_ev)
4594 {
4595 DBUG_ASSERT(*ptr_ev == ev); // event remains to belong to Coordinator
4596
4597 DBUG_EXECUTE_IF("dbug.calculate_sbm_after_previous_gtid_log_event",
4598 {
4599 if (ev->get_type_code() == PREVIOUS_GTIDS_LOG_EVENT)
4600 {
4601 const char act[]= "now signal signal.reached wait_for signal.done_sbm_calculation";
4602 DBUG_ASSERT(opt_debug_sync_timeout > 0);
4603 DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act)));
4604 }
4605 };);
4606 DBUG_EXECUTE_IF("dbug.calculate_sbm_after_fake_rotate_log_event",
4607 {
4608 if (ev->get_type_code() == ROTATE_EVENT && ev->is_artificial_event())
4609 {
4610 const char act[]= "now signal signal.reached wait_for signal.done_sbm_calculation";
4611 DBUG_ASSERT(opt_debug_sync_timeout > 0);
4612 DBUG_ASSERT(!debug_sync_set_action(thd,
4613 STRING_WITH_LEN(act)));
4614 }
4615 };);
4616 /*
4617 Format_description_log_event should not be deleted because it will be
4618 used to read info about the relay log's format; it will be deleted when
4619 the SQL thread does not need it, i.e. when this thread terminates.
4620 ROWS_QUERY_LOG_EVENT is destroyed at the end of the current statement
4621 clean-up routine.
4622 */
4623 if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT &&
4624 ev->get_type_code() != ROWS_QUERY_LOG_EVENT)
4625 {
4626 DBUG_PRINT("info", ("Deleting the event after it has been executed"));
4627 delete ev;
4628 ev= NULL;
4629 }
4630 }
4631
4632 /*
4633 exec_res == SLAVE_APPLY_EVENT_AND_UPDATE_POS_UPDATE_POS_ERROR
4634 update_log_pos failed: this should not happen, so we
4635 don't retry.
4636 exec_res == SLAVE_APPLY_EVENT_AND_UPDATE_POS_APPEND_JOB_ERROR
4637 append_item_to_jobs() failed, this happened because
4638 thread was killed while waiting for enqueue on worker.
4639 */
4640 if (exec_res >= SLAVE_APPLY_EVENT_AND_UPDATE_POS_UPDATE_POS_ERROR)
4641 {
4642 delete ev;
4643 DBUG_RETURN(1);
4644 }
4645
4646 #ifdef WITH_WSREP
4647 if (WSREP_ON) mysql_mutex_lock(&thd->LOCK_wsrep_thd);
4648 if (!WSREP_ON || thd->wsrep_conflict_state == NO_CONFLICT)
4649 {
4650 if (WSREP_ON) mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
4651 #endif /* WITH_WSREP */
4652 if (slave_trans_retries)
4653 {
4654 int UNINIT_VAR(temp_err);
4655 bool silent= false;
4656 if (exec_res && !is_mts_worker(thd) /* no reexecution in MTS mode */ &&
4657 (temp_err= rli->has_temporary_error(thd, 0, &silent)) &&
4658 !thd->transaction.all.cannot_safely_rollback())
4659 {
4660 const char *errmsg;
4661 /*
4662 We were in a transaction which has been rolled back because of a
4663 temporary error;
4664 let's seek back to BEGIN log event and retry it all again.
4665 Note, if lock wait timeout (innodb_lock_wait_timeout exceeded)
4666 there is no rollback since 5.0.13 (ref: manual).
4667 We have to not only seek but also
4668 a) init_info(), to seek back to hot relay log's start for later
4669 (for when we will come back to this hot log after re-processing the
4670 possibly existing old logs where BEGIN is: check_binlog_magic() will
4671 then need the cache to be at position 0 (see comments at beginning of
4672 init_info()).
4673 b) init_relay_log_pos(), because the BEGIN may be an older relay log.
4674 */
4675 if (rli->trans_retries < slave_trans_retries)
4676 {
4677 /*
4678 The transactions has to be rolled back before global_init_info is
4679 called. Because global_init_info will starts a new transaction if
4680 master_info_repository is TABLE.
4681 */
4682 rli->cleanup_context(thd, 1);
4683 /*
4684 We need to figure out if there is a test case that covers
4685 this part. \Alfranio.
4686 */
4687 if (global_init_info(rli->mi, false, SLAVE_SQL))
4688 sql_print_error("Failed to initialize the master info structure");
4689 else if (rli->init_relay_log_pos(rli->get_group_relay_log_name(),
4690 rli->get_group_relay_log_pos(),
4691 true/*need_data_lock=true*/,
4692 &errmsg, 1))
4693 sql_print_error("Error initializing relay log position: %s",
4694 errmsg);
4695 else
4696 {
4697 exec_res= SLAVE_APPLY_EVENT_AND_UPDATE_POS_OK;
4698 /* chance for concurrent connection to get more locks */
4699 slave_sleep(thd, min<ulong>(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
4700 sql_slave_killed, rli);
4701 mysql_mutex_lock(&rli->data_lock); // because of SHOW STATUS
4702 if (!silent)
4703 rli->trans_retries++;
4704
4705 rli->retried_trans++;
4706 mysql_mutex_unlock(&rli->data_lock);
4707 DBUG_PRINT("info", ("Slave retries transaction "
4708 "rli->trans_retries: %lu", rli->trans_retries));
4709 }
4710 }
4711 else
4712 {
4713 thd->is_fatal_error= 1;
4714 rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(),
4715 "Slave SQL thread retried transaction %lu time(s) "
4716 "in vain, giving up. Consider raising the value of "
4717 "the slave_transaction_retries variable.", rli->trans_retries);
4718 }
4719 }
4720 else if ((exec_res && !temp_err) ||
4721 (opt_using_transactions &&
4722 rli->get_group_relay_log_pos() == rli->get_event_relay_log_pos()))
4723 {
4724 /*
4725 Only reset the retry counter if the entire group succeeded
4726 or failed with a non-transient error. On a successful
4727 event, the execution will proceed as usual; in the case of a
4728 non-transient error, the slave will stop with an error.
4729 */
4730 rli->trans_retries= 0; // restart from fresh
4731 DBUG_PRINT("info", ("Resetting retry counter, rli->trans_retries: %lu",
4732 rli->trans_retries));
4733 }
4734 }
4735 #ifdef WITH_WSREP
4736 } else if (WSREP_ON) mysql_mutex_unlock(&thd->LOCK_wsrep_thd);
4737 #endif /* WITH_WSREP */
4738
4739 if (exec_res)
4740 delete ev;
4741 DBUG_RETURN(exec_res);
4742 }
4743 mysql_mutex_unlock(&rli->data_lock);
4744 rli->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_READ_FAILURE,
4745 ER(ER_SLAVE_RELAY_LOG_READ_FAILURE), "\
4746 Could not parse relay log event entry. The possible reasons are: the master's \
4747 binary log is corrupted (you can check this by running 'mysqlbinlog' on the \
4748 binary log), the slave's relay log is corrupted (you can check this by running \
4749 'mysqlbinlog' on the relay log), a network problem, or a bug in the master's \
4750 or slave's MySQL code. If you want to check the master's binary log or slave's \
4751 relay log, you will be able to know their names by issuing 'SHOW SLAVE STATUS' \
4752 on this slave.\
4753 ");
4754 DBUG_RETURN(1);
4755 }
4756
check_io_slave_killed(THD * thd,Master_info * mi,const char * info)4757 static bool check_io_slave_killed(THD *thd, Master_info *mi, const char *info)
4758 {
4759 if (io_slave_killed(thd, mi))
4760 {
4761 if (info && log_warnings)
4762 sql_print_information("%s", info);
4763 return TRUE;
4764 }
4765 return FALSE;
4766 }
4767
4768 /**
4769 @brief Try to reconnect slave IO thread.
4770
4771 @details Terminates current connection to master, sleeps for
4772 @c mi->connect_retry msecs and initiates new connection with
4773 @c safe_reconnect(). Variable pointed by @c retry_count is increased -
4774 if it exceeds @c mi->retry_count then connection is not re-established
4775 and function signals error.
4776 Unless @c suppres_warnings is TRUE, a warning is put in the server error log
4777 when reconnecting. The warning message and messages used to report errors
4778 are taken from @c messages array. In case @c mi->retry_count is exceeded,
4779 no messages are added to the log.
4780
4781 @param[in] thd Thread context.
4782 @param[in] mysql MySQL connection.
4783 @param[in] mi Master connection information.
4784 @param[in,out] retry_count Number of attempts to reconnect.
4785 @param[in] suppress_warnings TRUE when a normal net read timeout
4786 has caused to reconnecting.
4787 @param[in] messages Messages to print/log, see
4788 reconnect_messages[] array.
4789
4790 @retval 0 OK.
4791 @retval 1 There was an error.
4792 */
4793
try_to_reconnect(THD * thd,MYSQL * mysql,Master_info * mi,uint * retry_count,bool suppress_warnings,const char * messages[SLAVE_RECON_MSG_MAX])4794 static int try_to_reconnect(THD *thd, MYSQL *mysql, Master_info *mi,
4795 uint *retry_count, bool suppress_warnings,
4796 const char *messages[SLAVE_RECON_MSG_MAX])
4797 {
4798 mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT;
4799 thd->proc_info= messages[SLAVE_RECON_MSG_WAIT];
4800 #ifdef SIGNAL_WITH_VIO_SHUTDOWN
4801 thd->clear_active_vio();
4802 #endif
4803 end_server(mysql);
4804 DBUG_EXECUTE_IF("simulate_no_master_reconnect",
4805 {
4806 return 1;
4807 });
4808 if ((*retry_count)++)
4809 {
4810 if (*retry_count > mi->retry_count)
4811 return 1; // Don't retry forever
4812 slave_sleep(thd, mi->connect_retry, io_slave_killed, mi);
4813 }
4814 if (check_io_slave_killed(thd, mi, messages[SLAVE_RECON_MSG_KILLED_WAITING]))
4815 return 1;
4816 thd->proc_info = messages[SLAVE_RECON_MSG_AFTER];
4817 if (!suppress_warnings)
4818 {
4819 char buf[256], llbuff[22];
4820 my_snprintf(buf, sizeof(buf), messages[SLAVE_RECON_MSG_FAILED],
4821 mi->get_io_rpl_log_name(), llstr(mi->get_master_log_pos(),
4822 llbuff));
4823 /*
4824 Raise a warining during registering on master/requesting dump.
4825 Log a message reading event.
4826 */
4827 if (messages[SLAVE_RECON_MSG_COMMAND][0])
4828 {
4829 mi->report(WARNING_LEVEL, ER_SLAVE_MASTER_COM_FAILURE,
4830 ER(ER_SLAVE_MASTER_COM_FAILURE),
4831 messages[SLAVE_RECON_MSG_COMMAND], buf);
4832 }
4833 else
4834 {
4835 sql_print_information("%s", buf);
4836 }
4837 }
4838 if (safe_reconnect(thd, mysql, mi, 1) || io_slave_killed(thd, mi))
4839 {
4840 if (log_warnings)
4841 sql_print_information("%s", messages[SLAVE_RECON_MSG_KILLED_AFTER]);
4842 return 1;
4843 }
4844 return 0;
4845 }
4846
4847
4848 /**
4849 Slave IO thread entry point.
4850
4851 @param arg Pointer to Master_info struct that holds information for
4852 the IO thread.
4853
4854 @return Always 0.
4855 */
handle_slave_io(void * arg)4856 pthread_handler_t handle_slave_io(void *arg)
4857 {
4858 THD *thd= NULL; // needs to be first for thread_stack
4859 bool thd_added= false;
4860 MYSQL *mysql;
4861 Master_info *mi = (Master_info*)arg;
4862 Relay_log_info *rli= mi->rli;
4863 char llbuff[22];
4864 uint retry_count;
4865 bool suppress_warnings;
4866 int ret;
4867 int binlog_version;
4868 #ifndef DBUG_OFF
4869 uint retry_count_reg= 0, retry_count_dump= 0, retry_count_event= 0;
4870 #endif
4871 // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
4872 my_thread_init();
4873 DBUG_ENTER("handle_slave_io");
4874
4875 DBUG_ASSERT(mi->inited);
4876 mysql= NULL ;
4877 retry_count= 0;
4878
4879 mysql_mutex_lock(&mi->run_lock);
4880 /* Inform waiting threads that slave has started */
4881 mi->slave_run_id++;
4882
4883 #ifndef DBUG_OFF
4884 mi->events_until_exit = disconnect_slave_event_count;
4885 #endif
4886
4887 thd= new THD; // note that contructor of THD uses DBUG_ !
4888 THD_CHECK_SENTRY(thd);
4889 mi->info_thd = thd;
4890
4891 pthread_detach_this_thread();
4892 thd->thread_stack= (char*) &thd; // remember where our stack is
4893 mi->clear_error();
4894 if (init_slave_thread(thd, SLAVE_THD_IO))
4895 {
4896 mysql_cond_broadcast(&mi->start_cond);
4897 mysql_mutex_unlock(&mi->run_lock);
4898 sql_print_error("Failed during slave I/O thread initialization");
4899 goto err;
4900 }
4901
4902 mysql_mutex_lock(&LOCK_thread_count);
4903 add_global_thread(thd);
4904 thd_added= true;
4905 mysql_mutex_unlock(&LOCK_thread_count);
4906
4907 mi->slave_running = 1;
4908 mi->abort_slave = 0;
4909 mysql_mutex_unlock(&mi->run_lock);
4910 mysql_cond_broadcast(&mi->start_cond);
4911
4912 DBUG_PRINT("master_info",("log_file_name: '%s' position: %s",
4913 mi->get_master_log_name(),
4914 llstr(mi->get_master_log_pos(), llbuff)));
4915
4916 /* This must be called before run any binlog_relay_io hooks */
4917 my_pthread_setspecific_ptr(RPL_MASTER_INFO, mi);
4918
4919 if (RUN_HOOK(binlog_relay_io, thread_start, (thd, mi)))
4920 {
4921 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
4922 ER(ER_SLAVE_FATAL_ERROR), "Failed to run 'thread_start' hook");
4923 goto err;
4924 }
4925
4926 if (!(mi->mysql = mysql = mysql_init(NULL)))
4927 {
4928 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
4929 ER(ER_SLAVE_FATAL_ERROR), "error in mysql_init()");
4930 goto err;
4931 }
4932
4933 THD_STAGE_INFO(thd, stage_connecting_to_master);
4934 // we can get killed during safe_connect
4935 if (!safe_connect(thd, mysql, mi))
4936 {
4937 sql_print_information("Slave I/O thread: connected to master '%s@%s:%d',"
4938 "replication started in log '%s' at position %s",
4939 mi->get_user(), mi->host, mi->port,
4940 mi->get_io_rpl_log_name(),
4941 llstr(mi->get_master_log_pos(), llbuff));
4942 }
4943 else
4944 {
4945 sql_print_information("Slave I/O thread killed while connecting to master");
4946 goto err;
4947 }
4948
4949 connected:
4950
4951 DBUG_EXECUTE_IF("dbug.before_get_running_status_yes",
4952 {
4953 const char act[]=
4954 "now "
4955 "wait_for signal.io_thread_let_running";
4956 DBUG_ASSERT(opt_debug_sync_timeout > 0);
4957 DBUG_ASSERT(!debug_sync_set_action(thd,
4958 STRING_WITH_LEN(act)));
4959 };);
4960 DBUG_EXECUTE_IF("dbug.calculate_sbm_after_previous_gtid_log_event",
4961 {
4962 /* Fake that thread started 3 minutes ago */
4963 thd->start_time.tv_sec-=180;
4964 };);
4965 DBUG_EXECUTE_IF("dbug.calculate_sbm_after_fake_rotate_log_event",
4966 {
4967 /* Fake that thread started 3 minutes ago */
4968 thd->start_time.tv_sec-=180;
4969 };);
4970 mysql_mutex_lock(&mi->run_lock);
4971 mi->slave_running= MYSQL_SLAVE_RUN_CONNECT;
4972 mysql_mutex_unlock(&mi->run_lock);
4973
4974 thd->slave_net = &mysql->net;
4975 THD_STAGE_INFO(thd, stage_checking_master_version);
4976 ret= get_master_version_and_clock(mysql, mi);
4977 if (!ret)
4978 ret= get_master_uuid(mysql, mi);
4979 if (!ret)
4980 ret= io_thread_init_commands(mysql, mi);
4981
4982 if (ret == 1)
4983 /* Fatal error */
4984 goto err;
4985
4986 if (ret == 2)
4987 {
4988 if (check_io_slave_killed(mi->info_thd, mi, "Slave I/O thread killed"
4989 "while calling get_master_version_and_clock(...)"))
4990 goto err;
4991 suppress_warnings= FALSE;
4992 /* Try to reconnect because the error was caused by a transient network problem */
4993 if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4994 reconnect_messages[SLAVE_RECON_ACT_REG]))
4995 goto err;
4996 goto connected;
4997 }
4998
4999 mysql_mutex_lock(&mi->data_lock);
5000 binlog_version= mi->get_mi_description_event()->binlog_version;
5001 mysql_mutex_unlock(&mi->data_lock);
5002
5003 if (binlog_version > 1)
5004 {
5005 /*
5006 Register ourselves with the master.
5007 */
5008 THD_STAGE_INFO(thd, stage_registering_slave_on_master);
5009 if (register_slave_on_master(mysql, mi, &suppress_warnings))
5010 {
5011 if (!check_io_slave_killed(thd, mi, "Slave I/O thread killed "
5012 "while registering slave on master"))
5013 {
5014 sql_print_error("Slave I/O thread couldn't register on master");
5015 if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
5016 reconnect_messages[SLAVE_RECON_ACT_REG]))
5017 goto err;
5018 }
5019 else
5020 goto err;
5021 goto connected;
5022 }
5023 DBUG_EXECUTE_IF("FORCE_SLAVE_TO_RECONNECT_REG",
5024 if (!retry_count_reg)
5025 {
5026 retry_count_reg++;
5027 sql_print_information("Forcing to reconnect slave I/O thread");
5028 if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
5029 reconnect_messages[SLAVE_RECON_ACT_REG]))
5030 goto err;
5031 goto connected;
5032 });
5033 }
5034
5035 DBUG_PRINT("info",("Starting reading binary log from master"));
5036 while (!io_slave_killed(thd,mi))
5037 {
5038 THD_STAGE_INFO(thd, stage_requesting_binlog_dump);
5039 if (request_dump(thd, mysql, mi, &suppress_warnings))
5040 {
5041 sql_print_error("Failed on request_dump()");
5042 if (check_io_slave_killed(thd, mi, "Slave I/O thread killed while \
5043 requesting master dump") ||
5044 try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
5045 reconnect_messages[SLAVE_RECON_ACT_DUMP]))
5046 goto err;
5047 goto connected;
5048 }
5049 DBUG_EXECUTE_IF("FORCE_SLAVE_TO_RECONNECT_DUMP",
5050 if (!retry_count_dump)
5051 {
5052 retry_count_dump++;
5053 sql_print_information("Forcing to reconnect slave I/O thread");
5054 if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
5055 reconnect_messages[SLAVE_RECON_ACT_DUMP]))
5056 goto err;
5057 goto connected;
5058 });
5059 const char *event_buf;
5060
5061 DBUG_ASSERT(mi->last_error().number == 0);
5062 while (!io_slave_killed(thd,mi))
5063 {
5064 ulong event_len;
5065 /*
5066 We say "waiting" because read_event() will wait if there's nothing to
5067 read. But if there's something to read, it will not wait. The
5068 important thing is to not confuse users by saying "reading" whereas
5069 we're in fact receiving nothing.
5070 */
5071 THD_STAGE_INFO(thd, stage_waiting_for_master_to_send_event);
5072 event_len= read_event(mysql, mi, &suppress_warnings);
5073 if (check_io_slave_killed(thd, mi, "Slave I/O thread killed while \
5074 reading event"))
5075 goto err;
5076 DBUG_EXECUTE_IF("FORCE_SLAVE_TO_RECONNECT_EVENT",
5077 if (!retry_count_event)
5078 {
5079 retry_count_event++;
5080 sql_print_information("Forcing to reconnect slave I/O thread");
5081 if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
5082 reconnect_messages[SLAVE_RECON_ACT_EVENT]))
5083 goto err;
5084 goto connected;
5085 });
5086
5087 if (event_len == packet_error)
5088 {
5089 uint mysql_error_number= mysql_errno(mysql);
5090 switch (mysql_error_number) {
5091 case CR_NET_PACKET_TOO_LARGE:
5092 sql_print_error("\
5093 Log entry on master is longer than slave_max_allowed_packet (%lu) on \
5094 slave. If the entry is correct, restart the server with a higher value of \
5095 slave_max_allowed_packet",
5096 slave_max_allowed_packet);
5097 mi->report(ERROR_LEVEL, ER_NET_PACKET_TOO_LARGE,
5098 "%s", "Got a packet bigger than 'slave_max_allowed_packet' bytes");
5099 goto err;
5100 case ER_MASTER_FATAL_ERROR_READING_BINLOG:
5101 mi->report(ERROR_LEVEL, ER_MASTER_FATAL_ERROR_READING_BINLOG,
5102 ER(ER_MASTER_FATAL_ERROR_READING_BINLOG),
5103 mysql_error_number, mysql_error(mysql));
5104 goto err;
5105 case ER_OUT_OF_RESOURCES:
5106 sql_print_error("\
5107 Stopping slave I/O thread due to out-of-memory error from master");
5108 mi->report(ERROR_LEVEL, ER_OUT_OF_RESOURCES,
5109 "%s", ER(ER_OUT_OF_RESOURCES));
5110 goto err;
5111 }
5112 if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
5113 reconnect_messages[SLAVE_RECON_ACT_EVENT]))
5114 goto err;
5115 goto connected;
5116 } // if (event_len == packet_error)
5117
5118 retry_count=0; // ok event, reset retry counter
5119 THD_STAGE_INFO(thd, stage_queueing_master_event_to_the_relay_log);
5120 event_buf= (const char*)mysql->net.read_pos + 1;
5121 DBUG_PRINT("info", ("IO thread received event of type %s", Log_event::get_type_str((Log_event_type)event_buf[EVENT_TYPE_OFFSET])));
5122 if (RUN_HOOK(binlog_relay_io, after_read_event,
5123 (thd, mi,(const char*)mysql->net.read_pos + 1,
5124 event_len, &event_buf, &event_len)))
5125 {
5126 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
5127 ER(ER_SLAVE_FATAL_ERROR),
5128 "Failed to run 'after_read_event' hook");
5129 goto err;
5130 }
5131
5132 /* XXX: 'synced' should be updated by queue_event to indicate
5133 whether event has been synced to disk */
5134 bool synced= 0;
5135 if (queue_event(mi, event_buf, event_len))
5136 {
5137 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
5138 ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
5139 "could not queue event from master");
5140 goto err;
5141 }
5142 if (RUN_HOOK(binlog_relay_io, after_queue_event,
5143 (thd, mi, event_buf, event_len, synced)))
5144 {
5145 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
5146 ER(ER_SLAVE_FATAL_ERROR),
5147 "Failed to run 'after_queue_event' hook");
5148 goto err;
5149 }
5150
5151 mysql_mutex_lock(&mi->data_lock);
5152 if (flush_master_info(mi, FALSE))
5153 {
5154 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
5155 ER(ER_SLAVE_FATAL_ERROR),
5156 "Failed to flush master info.");
5157 mysql_mutex_unlock(&mi->data_lock);
5158 goto err;
5159 }
5160 mysql_mutex_unlock(&mi->data_lock);
5161
5162 /*
5163 See if the relay logs take too much space.
5164 We don't lock mi->rli->log_space_lock here; this dirty read saves time
5165 and does not introduce any problem:
5166 - if mi->rli->ignore_log_space_limit is 1 but becomes 0 just after (so
5167 the clean value is 0), then we are reading only one more event as we
5168 should, and we'll block only at the next event. No big deal.
5169 - if mi->rli->ignore_log_space_limit is 0 but becomes 1 just after (so
5170 the clean value is 1), then we are going into wait_for_relay_log_space()
5171 for no reason, but this function will do a clean read, notice the clean
5172 value and exit immediately.
5173 */
5174 #ifndef DBUG_OFF
5175 {
5176 char llbuf1[22], llbuf2[22];
5177 DBUG_PRINT("info", ("log_space_limit=%s log_space_total=%s \
5178 ignore_log_space_limit=%d",
5179 llstr(rli->log_space_limit,llbuf1),
5180 llstr(rli->log_space_total,llbuf2),
5181 (int) rli->ignore_log_space_limit));
5182 }
5183 #endif
5184
5185 if (rli->log_space_limit && rli->log_space_limit <
5186 rli->log_space_total &&
5187 !rli->ignore_log_space_limit)
5188 if (wait_for_relay_log_space(rli))
5189 {
5190 sql_print_error("Slave I/O thread aborted while waiting for relay \
5191 log space");
5192 goto err;
5193 }
5194 DBUG_EXECUTE_IF("flush_after_reading_user_var_event",
5195 {
5196 if (event_buf[EVENT_TYPE_OFFSET] == USER_VAR_EVENT)
5197 {
5198 const char act[]= "now signal Reached wait_for signal.flush_complete_continue";
5199 DBUG_ASSERT(opt_debug_sync_timeout > 0);
5200 DBUG_ASSERT(!debug_sync_set_action(current_thd,
5201 STRING_WITH_LEN(act)));
5202
5203 }
5204 });
5205 DBUG_EXECUTE_IF("stop_io_after_reading_gtid_log_event",
5206 if (event_buf[EVENT_TYPE_OFFSET] == GTID_LOG_EVENT)
5207 thd->killed= THD::KILLED_NO_VALUE;
5208 );
5209 DBUG_EXECUTE_IF("stop_io_after_reading_query_log_event",
5210 if (event_buf[EVENT_TYPE_OFFSET] == QUERY_EVENT)
5211 thd->killed= THD::KILLED_NO_VALUE;
5212 );
5213 DBUG_EXECUTE_IF("stop_io_after_reading_user_var_log_event",
5214 if (event_buf[EVENT_TYPE_OFFSET] == USER_VAR_EVENT)
5215 thd->killed= THD::KILLED_NO_VALUE;
5216 );
5217 DBUG_EXECUTE_IF("stop_io_after_reading_table_map_event",
5218 if (event_buf[EVENT_TYPE_OFFSET] == TABLE_MAP_EVENT)
5219 thd->killed= THD::KILLED_NO_VALUE;
5220 );
5221 DBUG_EXECUTE_IF("stop_io_after_reading_xid_log_event",
5222 if (event_buf[EVENT_TYPE_OFFSET] == XID_EVENT)
5223 thd->killed= THD::KILLED_NO_VALUE;
5224 );
5225 DBUG_EXECUTE_IF("stop_io_after_reading_write_rows_log_event",
5226 if (event_buf[EVENT_TYPE_OFFSET] == WRITE_ROWS_EVENT)
5227 thd->killed= THD::KILLED_NO_VALUE;
5228 );
5229 /*
5230 After event is flushed to relay log file, memory used
5231 by thread's mem_root is not required any more.
5232 Hence adding free_root(thd->mem_root,...) to do the
5233 cleanup, otherwise a long running IO thread can
5234 cause OOM error.
5235 */
5236 free_root(thd->mem_root, MYF(MY_KEEP_PREALLOC));
5237 }
5238 }
5239
5240 // error = 0;
5241 err:
5242 // print the current replication position
5243 sql_print_information("Slave I/O thread exiting, read up to log '%s', position %s",
5244 mi->get_io_rpl_log_name(), llstr(mi->get_master_log_pos(), llbuff));
5245 (void) RUN_HOOK(binlog_relay_io, thread_stop, (thd, mi));
5246 thd->reset_query();
5247 thd->reset_db(NULL, 0);
5248 if (mysql)
5249 {
5250 /*
5251 Here we need to clear the active VIO before closing the
5252 connection with the master. The reason is that THD::awake()
5253 might be called from terminate_slave_thread() because somebody
5254 issued a STOP SLAVE. If that happends, the shutdown_active_vio()
5255 can be called in the middle of closing the VIO associated with
5256 the 'mysql' object, causing a crash.
5257 */
5258 #ifdef SIGNAL_WITH_VIO_SHUTDOWN
5259 thd->clear_active_vio();
5260 #endif
5261 mysql_close(mysql);
5262 mi->mysql=0;
5263 }
5264 mysql_mutex_lock(&mi->data_lock);
5265 write_ignored_events_info_to_relay_log(thd, mi);
5266 mysql_mutex_unlock(&mi->data_lock);
5267 THD_STAGE_INFO(thd, stage_waiting_for_slave_mutex_on_exit);
5268 mysql_mutex_lock(&mi->run_lock);
5269 /*
5270 Clean information used to start slave in order to avoid
5271 security issues.
5272 */
5273 mi->reset_start_info();
5274 /* Forget the relay log's format */
5275 mysql_mutex_lock(&mi->data_lock);
5276 mi->set_mi_description_event(NULL);
5277 mysql_mutex_unlock(&mi->data_lock);
5278
5279 DBUG_ASSERT(thd->net.buff != 0);
5280 net_end(&thd->net); // destructor will not free it, because net.vio is 0
5281
5282 thd->release_resources();
5283 THD_CHECK_SENTRY(thd);
5284 if (thd_added)
5285 remove_global_thread(thd);
5286 delete thd;
5287
5288 mi->abort_slave= 0;
5289 mi->slave_running= 0;
5290 mi->info_thd= 0;
5291 /*
5292 Note: the order of the two following calls (first broadcast, then unlock)
5293 is important. Otherwise a killer_thread can execute between the calls and
5294 delete the mi structure leading to a crash! (see BUG#25306 for details)
5295 */
5296 mysql_cond_broadcast(&mi->stop_cond); // tell the world we are done
5297 DBUG_EXECUTE_IF("simulate_slave_delay_at_terminate_bug38694", sleep(5););
5298 mysql_mutex_unlock(&mi->run_lock);
5299 DBUG_LEAVE; // Must match DBUG_ENTER()
5300 my_thread_end();
5301 #if OPENSSL_VERSION_NUMBER < 0x10100000L
5302 ERR_remove_thread_state(0);
5303 #endif /* OPENSSL_VERSION_NUMBER < 0x10100000L */
5304 pthread_exit(0);
5305 return(0); // Avoid compiler warnings
5306 }
5307
5308 /*
5309 Check the temporary directory used by commands like
5310 LOAD DATA INFILE.
5311 */
5312 static
check_temp_dir(char * tmp_file)5313 int check_temp_dir(char* tmp_file)
5314 {
5315 int fd;
5316 MY_DIR *dirp;
5317 char tmp_dir[FN_REFLEN];
5318 size_t tmp_dir_size;
5319
5320 DBUG_ENTER("check_temp_dir");
5321
5322 /*
5323 Get the directory from the temporary file.
5324 */
5325 dirname_part(tmp_dir, tmp_file, &tmp_dir_size);
5326
5327 /*
5328 Check if the directory exists.
5329 */
5330 if (!(dirp=my_dir(tmp_dir,MYF(MY_WME))))
5331 DBUG_RETURN(1);
5332 my_dirend(dirp);
5333
5334 /*
5335 Check permissions to create a file.
5336 */
5337 //append the server UUID to the temp file name.
5338 char *unique_tmp_file_name= (char*)my_malloc((FN_REFLEN+TEMP_FILE_MAX_LEN)*sizeof(char), MYF(0));
5339 sprintf(unique_tmp_file_name, "%s%s", tmp_file, server_uuid);
5340 if ((fd= mysql_file_create(key_file_misc,
5341 unique_tmp_file_name, CREATE_MODE,
5342 O_WRONLY | O_BINARY | O_EXCL | O_NOFOLLOW,
5343 MYF(MY_WME))) < 0)
5344 DBUG_RETURN(1);
5345
5346 /*
5347 Clean up.
5348 */
5349 mysql_file_close(fd, MYF(0));
5350
5351 mysql_file_delete(key_file_misc, unique_tmp_file_name, MYF(0));
5352 my_free(unique_tmp_file_name);
5353 DBUG_RETURN(0);
5354 }
5355
5356 /*
5357 Worker thread for the parallel execution of the replication events.
5358 */
handle_slave_worker(void * arg)5359 pthread_handler_t handle_slave_worker(void *arg)
5360 {
5361 THD *thd; /* needs to be first for thread_stack */
5362 bool thd_added= false;
5363 int error= 0;
5364 Slave_worker *w= (Slave_worker *) arg;
5365 Relay_log_info* rli= w->c_rli;
5366 ulong purge_cnt= 0;
5367 ulonglong purge_size= 0;
5368 struct slave_job_item _item, *job_item= &_item;
5369
5370 my_thread_init();
5371 DBUG_ENTER("handle_slave_worker");
5372
5373 thd= new THD;
5374 if (!thd)
5375 {
5376 sql_print_error("Failed during slave worker initialization");
5377 goto err;
5378 }
5379 w->info_thd= thd;
5380 thd->thread_stack = (char*)&thd;
5381
5382 pthread_detach_this_thread();
5383 if (init_slave_thread(thd, SLAVE_THD_WORKER))
5384 {
5385 // todo make SQL thread killed
5386 sql_print_error("Failed during slave worker initialization");
5387 goto err;
5388 }
5389 thd->init_for_queries(w);
5390
5391 mysql_mutex_lock(&LOCK_thread_count);
5392 add_global_thread(thd);
5393 thd_added= true;
5394 mysql_mutex_unlock(&LOCK_thread_count);
5395
5396 if (w->update_is_transactional())
5397 {
5398 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
5399 "Error checking if the worker repository is transactional.");
5400 goto err;
5401 }
5402
5403 mysql_mutex_lock(&w->jobs_lock);
5404 w->running_status= Slave_worker::RUNNING;
5405 mysql_cond_signal(&w->jobs_cond);
5406
5407 mysql_mutex_unlock(&w->jobs_lock);
5408
5409 DBUG_ASSERT(thd->is_slave_error == 0);
5410
5411 while (!error)
5412 {
5413 error= slave_worker_exec_job(w, rli);
5414 }
5415
5416 /*
5417 Cleanup after an error requires clear_error() go first.
5418 Otherwise assert(!all) in binlog_rollback()
5419 */
5420 thd->clear_error();
5421 w->cleanup_context(thd, error);
5422
5423 mysql_mutex_lock(&w->jobs_lock);
5424
5425 while(de_queue(&w->jobs, job_item))
5426 {
5427 purge_cnt++;
5428 purge_size += ((Log_event*) (job_item->data))->data_written;
5429 DBUG_ASSERT(job_item->data);
5430 delete static_cast<Log_event*>(job_item->data);
5431 }
5432
5433 DBUG_ASSERT(w->jobs.len == 0);
5434
5435 mysql_mutex_unlock(&w->jobs_lock);
5436
5437 mysql_mutex_lock(&rli->pending_jobs_lock);
5438 rli->pending_jobs -= purge_cnt;
5439 rli->mts_pending_jobs_size -= purge_size;
5440 DBUG_ASSERT(rli->mts_pending_jobs_size < rli->mts_pending_jobs_size_max);
5441
5442 mysql_mutex_unlock(&rli->pending_jobs_lock);
5443
5444 /*
5445 In MTS case cleanup_after_session() has be called explicitly.
5446 TODO: to make worker thd be deleted before Slave_worker instance.
5447 */
5448 if (thd->rli_slave)
5449 {
5450 w->cleanup_after_session();
5451 thd->rli_slave= NULL;
5452 }
5453 mysql_mutex_lock(&w->jobs_lock);
5454
5455 w->running_status= Slave_worker::NOT_RUNNING;
5456 if (log_warnings > 1)
5457 sql_print_information("Worker %lu statistics: "
5458 "events processed = %lu "
5459 "hungry waits = %lu "
5460 "priv queue overfills = %llu ",
5461 w->id, w->events_done, w->wq_size_waits_cnt,
5462 w->jobs.waited_overfill);
5463 mysql_cond_signal(&w->jobs_cond); // famous last goodbye
5464
5465 mysql_mutex_unlock(&w->jobs_lock);
5466
5467 err:
5468
5469 if (thd)
5470 {
5471 /*
5472 The slave code is very bad. Notice that it is missing
5473 several clean up calls here. I've just added what was
5474 necessary to avoid valgrind errors.
5475
5476 /Alfranio
5477 */
5478 DBUG_ASSERT(thd->net.buff != 0);
5479 net_end(&thd->net);
5480
5481 /*
5482 to avoid close_temporary_tables() closing temp tables as those
5483 are Coordinator's burden.
5484 */
5485 thd->system_thread= NON_SYSTEM_THREAD;
5486 thd->release_resources();
5487 THD_CHECK_SENTRY(thd);
5488 if (thd_added)
5489 remove_global_thread(thd);
5490 delete thd;
5491 }
5492
5493 my_thread_end();
5494 #if OPENSSL_VERSION_NUMBER < 0x10100000L
5495 ERR_remove_thread_state(0);
5496 #endif /* OPENSSL_VERSION_NUMBER < 0x10100000L */
5497 pthread_exit(0);
5498 DBUG_RETURN(0);
5499 }
5500
5501 /**
5502 Orders jobs by comparing relay log information.
5503 */
5504
mts_event_coord_cmp(LOG_POS_COORD * id1,LOG_POS_COORD * id2)5505 int mts_event_coord_cmp(LOG_POS_COORD *id1, LOG_POS_COORD *id2)
5506 {
5507 longlong filecmp= strcmp(id1->file_name, id2->file_name);
5508 longlong poscmp= id1->pos - id2->pos;
5509 return (filecmp < 0 ? -1 : (filecmp > 0 ? 1 :
5510 (poscmp < 0 ? -1 : (poscmp > 0 ? 1 : 0))));
5511 }
5512
mts_recovery_groups(Relay_log_info * rli)5513 int mts_recovery_groups(Relay_log_info *rli)
5514 {
5515 Log_event *ev= NULL;
5516 const char *errmsg= NULL;
5517 bool error= FALSE;
5518 bool flag_group_seen_begin= FALSE;
5519 uint recovery_group_cnt= 0;
5520 bool not_reached_commit= true;
5521 DYNAMIC_ARRAY above_lwm_jobs;
5522 Slave_job_group job_worker;
5523 IO_CACHE log;
5524 File file;
5525 LOG_INFO linfo;
5526 my_off_t offset= 0;
5527 MY_BITMAP *groups= &rli->recovery_groups;
5528 THD *thd= current_thd;
5529
5530 DBUG_ENTER("mts_recovery_groups");
5531
5532 DBUG_ASSERT(rli->slave_parallel_workers == 0);
5533
5534 /*
5535 Although mts_recovery_groups() is reentrant it returns
5536 early if the previous invocation raised any bit in
5537 recovery_groups bitmap.
5538 */
5539 if (rli->is_mts_recovery())
5540 DBUG_RETURN(0);
5541
5542 /*
5543 Save relay log position to compare with worker's position.
5544 */
5545 LOG_POS_COORD cp=
5546 {
5547 (char *) rli->get_group_master_log_name(),
5548 rli->get_group_master_log_pos()
5549 };
5550
5551 Format_description_log_event fdle(BINLOG_VERSION), *p_fdle= &fdle;
5552
5553 if (!p_fdle->is_valid())
5554 DBUG_RETURN(TRUE);
5555
5556 /*
5557 Gathers information on valuable workers and stores it in
5558 above_lwm_jobs in asc ordered by the master binlog coordinates.
5559 */
5560 my_init_dynamic_array(&above_lwm_jobs, sizeof(Slave_job_group),
5561 rli->recovery_parallel_workers,
5562 rli->recovery_parallel_workers);
5563
5564 /*
5565 When info tables are used and autocommit= 0 we force a new
5566 transaction start to avoid table access deadlocks when START SLAVE
5567 is executed after STOP SLAVE with MTS enabled.
5568 */
5569 if (is_autocommit_off_and_infotables(thd))
5570 {
5571 if (trans_begin(thd))
5572 {
5573 error= TRUE;
5574 goto err;
5575 }
5576 }
5577
5578 for (uint id= 0; id < rli->recovery_parallel_workers; id++)
5579 {
5580 Slave_worker *worker=
5581 Rpl_info_factory::create_worker(opt_rli_repository_id, id, rli, true);
5582
5583 if (!worker)
5584 {
5585 if (is_autocommit_off_and_infotables(thd))
5586 trans_rollback(thd);
5587 error= TRUE;
5588 goto err;
5589 }
5590
5591 LOG_POS_COORD w_last= { const_cast<char*>(worker->get_group_master_log_name()),
5592 worker->get_group_master_log_pos() };
5593 if (mts_event_coord_cmp(&w_last, &cp) > 0)
5594 {
5595 /*
5596 Inserts information into a dynamic array for further processing.
5597 The jobs/workers are ordered by the last checkpoint positions
5598 workers have seen.
5599 */
5600 job_worker.worker= worker;
5601 job_worker.checkpoint_log_pos= worker->checkpoint_master_log_pos;
5602 job_worker.checkpoint_log_name= worker->checkpoint_master_log_name;
5603
5604 insert_dynamic(&above_lwm_jobs, (uchar*) &job_worker);
5605 }
5606 else
5607 {
5608 /*
5609 Deletes the worker because its jobs are included in the latest
5610 checkpoint.
5611 */
5612 delete worker;
5613 }
5614 }
5615
5616 /*
5617 When info tables are used and autocommit= 0 we force transaction
5618 commit to avoid table access deadlocks when START SLAVE is executed
5619 after STOP SLAVE with MTS enabled.
5620 */
5621 if (is_autocommit_off_and_infotables(thd))
5622 {
5623 if (trans_commit(thd))
5624 {
5625 error= TRUE;
5626 goto err;
5627 }
5628 }
5629
5630 /*
5631 In what follows, the group Recovery Bitmap is constructed.
5632
5633 seek(lwm);
5634
5635 while(w= next(above_lwm_w))
5636 do
5637 read G
5638 if G == w->last_comm
5639 w.B << group_cnt++;
5640 RB |= w.B;
5641 break;
5642 else
5643 group_cnt++;
5644 while(!eof);
5645 continue;
5646 */
5647 DBUG_ASSERT(!rli->recovery_groups_inited);
5648
5649 if (above_lwm_jobs.elements != 0)
5650 {
5651 bitmap_init(groups, NULL, MTS_MAX_BITS_IN_GROUP, FALSE);
5652 rli->recovery_groups_inited= true;
5653 bitmap_clear_all(groups);
5654 }
5655 rli->mts_recovery_group_cnt= 0;
5656 for (uint it_job= 0; it_job < above_lwm_jobs.elements; it_job++)
5657 {
5658 Slave_worker *w= ((Slave_job_group *)
5659 dynamic_array_ptr(&above_lwm_jobs, it_job))->worker;
5660 LOG_POS_COORD w_last= { const_cast<char*>(w->get_group_master_log_name()),
5661 w->get_group_master_log_pos() };
5662 bool checksum_detected= FALSE;
5663
5664 sql_print_information("Slave: MTS group recovery relay log info based on Worker-Id %lu, "
5665 "group_relay_log_name %s, group_relay_log_pos %llu "
5666 "group_master_log_name %s, group_master_log_pos %llu",
5667 w->id,
5668 w->get_group_relay_log_name(),
5669 w->get_group_relay_log_pos(),
5670 w->get_group_master_log_name(),
5671 w->get_group_master_log_pos());
5672
5673 recovery_group_cnt= 0;
5674 not_reached_commit= true;
5675 if (rli->relay_log.find_log_pos(&linfo, rli->get_group_relay_log_name(), 1))
5676 {
5677 error= TRUE;
5678 sql_print_error("Error looking for %s.", rli->get_group_relay_log_name());
5679 goto err;
5680 }
5681 offset= rli->get_group_relay_log_pos();
5682 for (int checking= 0 ; not_reached_commit; checking++)
5683 {
5684 if ((file= open_binlog_file(&log, linfo.log_file_name, &errmsg)) < 0)
5685 {
5686 error= TRUE;
5687 sql_print_error("%s", errmsg);
5688 goto err;
5689 }
5690 /*
5691 Looking for the actual relay checksum algorithm that is present in
5692 a FD at head events of the relay log.
5693 */
5694 if (!checksum_detected)
5695 {
5696 int i= 0;
5697 while (i < 4 && (ev= Log_event::read_log_event(&log,
5698 (mysql_mutex_t*) 0, p_fdle, 0)))
5699 {
5700 if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
5701 {
5702 p_fdle->checksum_alg= ev->checksum_alg;
5703 checksum_detected= TRUE;
5704 }
5705 delete ev;
5706 i++;
5707 }
5708 if (!checksum_detected)
5709 {
5710 error= TRUE;
5711 sql_print_error("%s", "malformed or very old relay log which "
5712 "does not have FormatDescriptor");
5713 goto err;
5714 }
5715 }
5716
5717 my_b_seek(&log, offset);
5718
5719 while (not_reached_commit &&
5720 (ev= Log_event::read_log_event(&log, 0, p_fdle,
5721 opt_slave_sql_verify_checksum)))
5722 {
5723 DBUG_ASSERT(ev->is_valid());
5724
5725 if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
5726 p_fdle->checksum_alg= ev->checksum_alg;
5727
5728 if (ev->get_type_code() == ROTATE_EVENT ||
5729 ev->get_type_code() == FORMAT_DESCRIPTION_EVENT ||
5730 ev->get_type_code() == PREVIOUS_GTIDS_LOG_EVENT)
5731 {
5732 delete ev;
5733 ev= NULL;
5734 continue;
5735 }
5736
5737 DBUG_PRINT("mts", ("Event Recoverying relay log info "
5738 "group_mster_log_name %s, event_master_log_pos %llu type code %u.",
5739 linfo.log_file_name, ev->log_pos, ev->get_type_code()));
5740
5741 if (ev->starts_group())
5742 {
5743 flag_group_seen_begin= true;
5744 }
5745 else if ((ev->ends_group() || !flag_group_seen_begin) &&
5746 !is_gtid_event(ev))
5747 {
5748 int ret= 0;
5749 LOG_POS_COORD ev_coord= { (char *) rli->get_group_master_log_name(),
5750 ev->log_pos };
5751 flag_group_seen_begin= false;
5752 recovery_group_cnt++;
5753
5754 sql_print_information("Slave: MTS group recovery relay log info "
5755 "group_master_log_name %s, "
5756 "event_master_log_pos %llu.",
5757 rli->get_group_master_log_name(), ev->log_pos);
5758 if ((ret= mts_event_coord_cmp(&ev_coord, &w_last)) == 0)
5759 {
5760 #ifndef DBUG_OFF
5761 for (uint i= 0; i <= w->checkpoint_seqno; i++)
5762 {
5763 if (bitmap_is_set(&w->group_executed, i))
5764 DBUG_PRINT("mts", ("Bit %u is set.", i));
5765 else
5766 DBUG_PRINT("mts", ("Bit %u is not set.", i));
5767 }
5768 #endif
5769 DBUG_PRINT("mts",
5770 ("Doing a shift ini(%lu) end(%lu).",
5771 (w->checkpoint_seqno + 1) - recovery_group_cnt,
5772 w->checkpoint_seqno));
5773
5774 for (uint i= (w->checkpoint_seqno + 1) - recovery_group_cnt,
5775 j= 0; i <= w->checkpoint_seqno; i++, j++)
5776 {
5777 if (bitmap_is_set(&w->group_executed, i))
5778 {
5779 DBUG_PRINT("mts", ("Setting bit %u.", j));
5780 bitmap_fast_test_and_set(groups, j);
5781 }
5782 }
5783 not_reached_commit= false;
5784 }
5785 else
5786 DBUG_ASSERT(ret < 0);
5787 }
5788 delete ev;
5789 ev= NULL;
5790 }
5791 end_io_cache(&log);
5792 mysql_file_close(file, MYF(MY_WME));
5793 offset= BIN_LOG_HEADER_SIZE;
5794 if (not_reached_commit && rli->relay_log.find_next_log(&linfo, 1))
5795 {
5796 error= TRUE;
5797 sql_print_error("Error looking for file after %s.", linfo.log_file_name);
5798 goto err;
5799 }
5800 }
5801
5802 rli->mts_recovery_group_cnt= (rli->mts_recovery_group_cnt < recovery_group_cnt ?
5803 recovery_group_cnt : rli->mts_recovery_group_cnt);
5804 }
5805
5806 DBUG_ASSERT(!rli->recovery_groups_inited ||
5807 rli->mts_recovery_group_cnt <= groups->n_bits);
5808
5809 err:
5810
5811 for (uint it_job= 0; it_job < above_lwm_jobs.elements; it_job++)
5812 {
5813 get_dynamic(&above_lwm_jobs, (uchar *) &job_worker, it_job);
5814 delete job_worker.worker;
5815 }
5816
5817 delete_dynamic(&above_lwm_jobs);
5818 if (rli->mts_recovery_group_cnt == 0)
5819 rli->clear_mts_recovery_groups();
5820
5821 DBUG_RETURN(error ? ER_MTS_RECOVERY_FAILURE : 0);
5822 }
5823
5824 /**
5825 Processing rli->gaq to find out the low-water-mark (lwm) coordinates
5826 which is stored into the cental recovery table.
5827
5828 @param rli pointer to Relay-log-info of Coordinator
5829 @param period period of processing GAQ, normally derived from
5830 @c mts_checkpoint_period
5831 @param force if TRUE then hang in a loop till some progress
5832 @param need_data_lock False if rli->data_lock mutex is aquired by
5833 the caller.
5834
5835 @return FALSE success, TRUE otherwise
5836 */
mts_checkpoint_routine(Relay_log_info * rli,ulonglong period,bool force,bool need_data_lock)5837 bool mts_checkpoint_routine(Relay_log_info *rli, ulonglong period,
5838 bool force, bool need_data_lock)
5839 {
5840 ulong cnt;
5841 bool error= FALSE;
5842 struct timespec curr_clock;
5843
5844 DBUG_ENTER("checkpoint_routine");
5845
5846 #ifndef DBUG_OFF
5847 if (DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0))
5848 {
5849 if (!rli->gaq->count_done(rli))
5850 DBUG_RETURN(FALSE);
5851 }
5852 #endif
5853
5854 /*
5855 rli->checkpoint_group can have two possible values due to
5856 two possible status of the last (being scheduled) group.
5857 */
5858 DBUG_ASSERT(!rli->gaq->full() ||
5859 ((rli->checkpoint_seqno == rli->checkpoint_group -1 &&
5860 rli->mts_group_status == Relay_log_info::MTS_IN_GROUP) ||
5861 rli->checkpoint_seqno == rli->checkpoint_group));
5862
5863 /*
5864 Currently, the checkpoint routine is being called by the SQL Thread.
5865 For that reason, this function is called call from appropriate points
5866 in the SQL Thread's execution path and the elapsed time is calculated
5867 here to check if it is time to execute it.
5868 */
5869 set_timespec_nsec(curr_clock, 0);
5870 ulonglong diff= diff_timespec(curr_clock, rli->last_clock);
5871 if (!force && diff < period)
5872 {
5873 /*
5874 We do not need to execute the checkpoint now because
5875 the time elapsed is not enough.
5876 */
5877 DBUG_RETURN(FALSE);
5878 }
5879
5880 do
5881 {
5882 cnt= rli->gaq->move_queue_head(&rli->workers);
5883 #ifndef DBUG_OFF
5884 if (DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0) &&
5885 cnt != opt_mts_checkpoint_period)
5886 sql_print_error("This an error cnt != mts_checkpoint_period");
5887 #endif
5888 } while (!sql_slave_killed(rli->info_thd, rli) &&
5889 cnt == 0 && force &&
5890 !DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0) &&
5891 (my_sleep(rli->mts_coordinator_basic_nap), 1));
5892 /*
5893 This checks how many consecutive jobs where processed.
5894 If this value is different than zero the checkpoint
5895 routine can proceed. Otherwise, there is nothing to be
5896 done.
5897 */
5898 if (cnt == 0)
5899 goto end;
5900
5901
5902 /* TODO:
5903 to turn the least occupied selection in terms of jobs pieces
5904 */
5905 for (uint i= 0; i < rli->workers.elements; i++)
5906 {
5907 Slave_worker *w_i;
5908 get_dynamic(&rli->workers, (uchar *) &w_i, i);
5909 set_dynamic(&rli->least_occupied_workers, (uchar*) &w_i->jobs.len, w_i->id);
5910 };
5911 sort_dynamic(&rli->least_occupied_workers, (qsort_cmp) ulong_cmp);
5912
5913 if (need_data_lock)
5914 mysql_mutex_lock(&rli->data_lock);
5915 else
5916 mysql_mutex_assert_owner(&rli->data_lock);
5917
5918 /*
5919 "Coordinator::commit_positions" {
5920
5921 rli->gaq->lwm has been updated in move_queue_head() and
5922 to contain all but rli->group_master_log_name which
5923 is altered solely by Coordinator at special checkpoints.
5924 */
5925 rli->set_group_master_log_pos(rli->gaq->lwm.group_master_log_pos);
5926 rli->set_group_relay_log_pos(rli->gaq->lwm.group_relay_log_pos);
5927 DBUG_PRINT("mts", ("New checkpoint %llu %llu %s",
5928 rli->gaq->lwm.group_master_log_pos,
5929 rli->gaq->lwm.group_relay_log_pos,
5930 rli->gaq->lwm.group_relay_log_name));
5931
5932 if (rli->gaq->lwm.group_relay_log_name[0] != 0)
5933 rli->set_group_relay_log_name(rli->gaq->lwm.group_relay_log_name);
5934
5935 /*
5936 todo: uncomment notifies when UNTIL will be supported
5937
5938 rli->notify_group_master_log_name_update();
5939 rli->notify_group_relay_log_name_update();
5940
5941 Todo: optimize with if (wait_flag) broadcast
5942 waiter: set wait_flag; waits....; drops wait_flag;
5943 */
5944
5945 error= rli->flush_info(TRUE);
5946
5947 mysql_cond_broadcast(&rli->data_cond);
5948 if (need_data_lock)
5949 mysql_mutex_unlock(&rli->data_lock);
5950
5951 /*
5952 We need to ensure that this is never called at this point when
5953 cnt is zero. This value means that the checkpoint information
5954 will be completely reset.
5955 */
5956 rli->reset_notified_checkpoint(cnt, rli->gaq->lwm.ts, need_data_lock);
5957
5958 /* end-of "Coordinator::"commit_positions" */
5959
5960 end:
5961 #ifndef DBUG_OFF
5962 if (DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0))
5963 DBUG_SUICIDE();
5964 #endif
5965 set_timespec_nsec(rli->last_clock, 0);
5966
5967 DBUG_RETURN(error);
5968 }
5969
5970 /**
5971 Instantiation of a Slave_worker and forking out a single Worker thread.
5972
5973 @param rli Coordinator's Relay_log_info pointer
5974 @param i identifier of the Worker
5975
5976 @return 0 suppress or 1 if fails
5977 */
slave_start_single_worker(Relay_log_info * rli,ulong i)5978 int slave_start_single_worker(Relay_log_info *rli, ulong i)
5979 {
5980 int error= 0;
5981 pthread_t th;
5982 Slave_worker *w= NULL;
5983
5984 mysql_mutex_assert_owner(&rli->run_lock);
5985
5986 if (!(w=
5987 Rpl_info_factory::create_worker(opt_rli_repository_id, i, rli, false)))
5988 {
5989 sql_print_error("Failed during slave worker thread create");
5990 error= 1;
5991 goto err;
5992 }
5993
5994 if (w->init_worker(rli, i))
5995 {
5996 sql_print_error("Failed during slave worker thread create");
5997 error= 1;
5998 goto err;
5999 }
6000 set_dynamic(&rli->workers, (uchar*) &w, i);
6001
6002 if (DBUG_EVALUATE_IF("mts_worker_thread_fails", i == 1, 0) ||
6003 (error= mysql_thread_create(key_thread_slave_worker, &th,
6004 &connection_attrib,
6005 handle_slave_worker, (void*) w)))
6006 {
6007 sql_print_error("Failed during slave worker thread create (errno= %d)",
6008 error);
6009 error= 1;
6010 goto err;
6011 }
6012
6013 mysql_mutex_lock(&w->jobs_lock);
6014 if (w->running_status == Slave_worker::NOT_RUNNING)
6015 mysql_cond_wait(&w->jobs_cond, &w->jobs_lock);
6016 mysql_mutex_unlock(&w->jobs_lock);
6017 // Least occupied inited with zero
6018 insert_dynamic(&rli->least_occupied_workers, (uchar*) &w->jobs.len);
6019
6020 err:
6021 if (error && w)
6022 {
6023 delete w;
6024 /*
6025 Any failure after dynarray inserted must follow with deletion
6026 of just created item.
6027 */
6028 if (rli->workers.elements == i + 1)
6029 delete_dynamic_element(&rli->workers, i);
6030 }
6031 return error;
6032 }
6033
6034 /**
6035 Initialization of the central rli members for Coordinator's role,
6036 communication channels such as Assigned Partition Hash (APH),
6037 and starting the Worker pool.
6038
6039 @param n Number of configured Workers in the upcoming session.
6040
6041 @return 0 success
6042 non-zero as failure
6043 */
slave_start_workers(Relay_log_info * rli,ulong n,bool * mts_inited)6044 int slave_start_workers(Relay_log_info *rli, ulong n, bool *mts_inited)
6045 {
6046 uint i;
6047 int error= 0;
6048
6049 mysql_mutex_assert_owner(&rli->run_lock);
6050
6051 if (n == 0 && rli->mts_recovery_group_cnt == 0)
6052 {
6053 reset_dynamic(&rli->workers);
6054 goto end;
6055 }
6056
6057 *mts_inited= true;
6058
6059 /*
6060 The requested through argument number of Workers can be different
6061 from the previous time which ended with an error. Thereby
6062 the effective number of configured Workers is max of the two.
6063 */
6064 rli->init_workers(max(n, rli->recovery_parallel_workers));
6065
6066 // CGAP dynarray holds id:s of partitions of the Current being executed Group
6067 my_init_dynamic_array(&rli->curr_group_assigned_parts,
6068 sizeof(db_worker_hash_entry*),
6069 SLAVE_INIT_DBS_IN_GROUP, 1);
6070 rli->last_assigned_worker= NULL; // associated with curr_group_assigned
6071 my_init_dynamic_array(&rli->curr_group_da, sizeof(Log_event*), 8, 2);
6072 // Least_occupied_workers array to hold items size of Slave_jobs_queue::len
6073 my_init_dynamic_array(&rli->least_occupied_workers, sizeof(ulong), n, 0);
6074
6075 /*
6076 GAQ queue holds seqno:s of scheduled groups. C polls workers in
6077 @c opt_mts_checkpoint_period to update GAQ (see @c next_event())
6078 The length of GAQ is set to be equal to checkpoint_group.
6079 Notice, the size matters for mts_checkpoint_routine's progress loop.
6080 */
6081
6082 rli->gaq= new Slave_committed_queue(rli->get_group_master_log_name(),
6083 sizeof(Slave_job_group),
6084 rli->checkpoint_group, n);
6085 if (!rli->gaq->inited)
6086 return 1;
6087
6088 // length of WQ is actually constant though can be made configurable
6089 rli->mts_slave_worker_queue_len_max= mts_slave_worker_queue_len_max;
6090 rli->mts_pending_jobs_size= 0;
6091 rli->mts_pending_jobs_size_max= ::opt_mts_pending_jobs_size_max;
6092 rli->mts_wq_underrun_w_id= MTS_WORKER_UNDEF;
6093 rli->mts_wq_excess_cnt= 0;
6094 rli->mts_wq_overrun_cnt= 0;
6095 rli->mts_wq_oversize= FALSE;
6096 rli->mts_coordinator_basic_nap= mts_coordinator_basic_nap;
6097 rli->mts_worker_underrun_level= mts_worker_underrun_level;
6098 rli->curr_group_seen_begin= rli->curr_group_seen_gtid= false;
6099 rli->curr_group_isolated= FALSE;
6100 rli->checkpoint_seqno= 0;
6101 rli->mts_last_online_stat= my_time(0);
6102 rli->mts_group_status= Relay_log_info::MTS_NOT_IN_GROUP;
6103
6104 if (init_hash_workers(n)) // MTS: mapping_db_to_worker
6105 {
6106 sql_print_error("Failed to init partitions hash");
6107 error= 1;
6108 goto err;
6109 }
6110
6111 for (i= 0; i < n; i++)
6112 {
6113 if ((error= slave_start_single_worker(rli, i)))
6114 goto err;
6115 rli->slave_parallel_workers++;
6116 }
6117
6118 end:
6119 // Effective end of the recovery right now when there is no gaps
6120 if (!error && rli->mts_recovery_group_cnt == 0)
6121 {
6122 if ((error= rli->mts_finalize_recovery()))
6123 (void) Rpl_info_factory::reset_workers(rli);
6124 if (!error)
6125 error= rli->flush_info(TRUE);
6126 }
6127
6128 err:
6129 return error;
6130 }
6131
6132 /*
6133 Ending Worker threads.
6134
6135 Not in case Coordinator is killed itself, it first waits for
6136 Workers have finished their assignements, and then updates checkpoint.
6137 Workers are notified with setting KILLED status
6138 and waited for their acknowledgment as specified by
6139 worker's running_status.
6140 Coordinator finalizes with its MTS running status to reset few objects.
6141 */
slave_stop_workers(Relay_log_info * rli,bool * mts_inited)6142 void slave_stop_workers(Relay_log_info *rli, bool *mts_inited)
6143 {
6144 int i;
6145 THD *thd= rli->info_thd;
6146 if (!*mts_inited)
6147 return;
6148 else if (rli->slave_parallel_workers == 0)
6149 goto end;
6150
6151 /*
6152 If request for stop slave is received notify worker
6153 to stop.
6154 */
6155 // Initialize worker exit count and max_updated_index to 0 during each stop.
6156 rli->exit_counter= 0;
6157 rli->max_updated_index= (rli->until_condition !=
6158 Relay_log_info::UNTIL_NONE)?
6159 rli->mts_groups_assigned:0;
6160
6161 for (i= rli->workers.elements - 1; i >= 0; i--)
6162 {
6163 Slave_worker *w;
6164 struct slave_job_item item= {NULL}, *job_item= &item;
6165 get_dynamic((DYNAMIC_ARRAY*)&rli->workers, (uchar*) &w, i);
6166 mysql_mutex_lock(&w->jobs_lock);
6167 //Inform all workers to stop
6168 if (w->running_status != Slave_worker::RUNNING)
6169 {
6170 mysql_mutex_unlock(&w->jobs_lock);
6171 continue;
6172 }
6173
6174 w->running_status= Slave_worker::STOP;
6175 (void) set_max_updated_index_on_stop(w, job_item);
6176 mysql_cond_signal(&w->jobs_cond);
6177
6178 mysql_mutex_unlock(&w->jobs_lock);
6179
6180 if (log_warnings > 1)
6181 sql_print_information("Notifying Worker %lu to exit, thd %p", w->id,
6182 w->info_thd);
6183 }
6184
6185 thd_proc_info(thd, "Waiting for workers to exit");
6186
6187 for (i= rli->workers.elements - 1; i >= 0; i--)
6188 {
6189 Slave_worker *w= NULL;
6190 get_dynamic((DYNAMIC_ARRAY*)&rli->workers, (uchar*) &w, i);
6191
6192 mysql_mutex_lock(&w->jobs_lock);
6193 while (w->running_status != Slave_worker::NOT_RUNNING)
6194 {
6195 PSI_stage_info old_stage;
6196 DBUG_ASSERT(w->running_status == Slave_worker::ERROR_LEAVING ||
6197 w->running_status == Slave_worker::STOP ||
6198 w->running_status == Slave_worker::STOP_ACCEPTED);
6199
6200 thd->ENTER_COND(&w->jobs_cond, &w->jobs_lock,
6201 &stage_slave_waiting_workers_to_exit, &old_stage);
6202 mysql_cond_wait(&w->jobs_cond, &w->jobs_lock);
6203 thd->EXIT_COND(&old_stage);
6204 mysql_mutex_lock(&w->jobs_lock);
6205 }
6206 mysql_mutex_unlock(&w->jobs_lock);
6207 }
6208
6209 if (thd->killed == THD::NOT_KILLED)
6210 (void) mts_checkpoint_routine(rli, 0, false, true/*need_data_lock=true*/); // TODO:consider to propagate an error out of the function
6211
6212 for (i= rli->workers.elements - 1; i >= 0; i--)
6213 {
6214 Slave_worker *w= NULL;
6215 get_dynamic((DYNAMIC_ARRAY*)&rli->workers, (uchar*) &w, i);
6216 delete_dynamic_element(&rli->workers, i);
6217 delete w;
6218 }
6219 if (log_warnings > 1)
6220 sql_print_information("Total MTS session statistics: "
6221 "events processed = %llu; "
6222 "worker queues filled over overrun level = %lu; "
6223 "waited due a Worker queue full = %lu; "
6224 "waited due the total size = %lu; "
6225 "slept when Workers occupied = %lu ",
6226 rli->mts_events_assigned, rli->mts_wq_overrun_cnt,
6227 rli->mts_wq_overfill_cnt, rli->wq_size_waits_cnt,
6228 rli->mts_wq_no_underrun_cnt);
6229
6230 DBUG_ASSERT(rli->pending_jobs == 0);
6231 DBUG_ASSERT(rli->mts_pending_jobs_size == 0);
6232
6233 end:
6234 rli->mts_group_status= Relay_log_info::MTS_NOT_IN_GROUP;
6235 destroy_hash_workers(rli);
6236 delete rli->gaq;
6237 delete_dynamic(&rli->least_occupied_workers); // least occupied
6238
6239 // Destroy buffered events of the current group prior to exit.
6240 for (uint i= 0; i < rli->curr_group_da.elements; i++)
6241 delete *(Log_event**) dynamic_array_ptr(&rli->curr_group_da, i);
6242 delete_dynamic(&rli->curr_group_da); // GCDA
6243
6244 delete_dynamic(&rli->curr_group_assigned_parts); // GCAP
6245 rli->deinit_workers();
6246 rli->slave_parallel_workers= 0;
6247 *mts_inited= false;
6248 }
6249
6250
6251 /**
6252 Slave SQL thread entry point.
6253
6254 @param arg Pointer to Relay_log_info object that holds information
6255 for the SQL thread.
6256
6257 @return Always 0.
6258 */
handle_slave_sql(void * arg)6259 pthread_handler_t handle_slave_sql(void *arg)
6260 {
6261 THD *thd; /* needs to be first for thread_stack */
6262 bool thd_added= false;
6263 char llbuff[22],llbuff1[22];
6264 char saved_log_name[FN_REFLEN];
6265 char saved_master_log_name[FN_REFLEN];
6266 my_off_t saved_log_pos= 0;
6267 my_off_t saved_master_log_pos= 0;
6268 my_off_t saved_skip= 0;
6269 #ifdef WITH_WSREP
6270 my_bool wsrep_node_dropped= FALSE;
6271 #endif /* WITH_WSREP */
6272
6273 Relay_log_info* rli = ((Master_info*)arg)->rli;
6274 const char *errmsg;
6275 const char *error_string;
6276 bool mts_inited= false;
6277
6278 // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
6279 my_thread_init();
6280 DBUG_ENTER("handle_slave_sql");
6281 #ifdef WITH_WSREP
6282 wsrep_restart_point:
6283 #endif /* WITH_WSREP */
6284
6285 DBUG_ASSERT(rli->inited);
6286 mysql_mutex_lock(&rli->run_lock);
6287 DBUG_ASSERT(!rli->slave_running);
6288 errmsg= 0;
6289 error_string= 0;
6290 #ifndef DBUG_OFF
6291 rli->events_until_exit = abort_slave_event_count;
6292 #endif
6293
6294 thd = new THD; // note that contructor of THD uses DBUG_ !
6295 thd->thread_stack = (char*)&thd; // remember where our stack is
6296 rli->info_thd= thd;
6297
6298 /* Inform waiting threads that slave has started */
6299 rli->slave_run_id++;
6300 rli->slave_running = 1;
6301 rli->reported_unsafe_warning= false;
6302 rli->sql_thread_kill_accepted= false;
6303
6304 pthread_detach_this_thread();
6305 if (init_slave_thread(thd, SLAVE_THD_SQL))
6306 {
6307 /*
6308 TODO: this is currently broken - slave start and change master
6309 will be stuck if we fail here
6310 */
6311 mysql_cond_broadcast(&rli->start_cond);
6312 mysql_mutex_unlock(&rli->run_lock);
6313 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
6314 "Failed during slave thread initialization");
6315 goto err;
6316 }
6317 thd->init_for_queries(rli);
6318 thd->temporary_tables = rli->save_temporary_tables; // restore temp tables
6319 set_thd_in_use_temporary_tables(rli); // (re)set sql_thd in use for saved temp tables
6320
6321 mysql_mutex_lock(&LOCK_thread_count);
6322 add_global_thread(thd);
6323 thd_added= true;
6324 mysql_mutex_unlock(&LOCK_thread_count);
6325
6326 /* MTS: starting the worker pool */
6327 if (slave_start_workers(rli, rli->opt_slave_parallel_workers, &mts_inited) != 0)
6328 {
6329 mysql_cond_broadcast(&rli->start_cond);
6330 mysql_mutex_unlock(&rli->run_lock);
6331 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
6332 "Failed during slave workers initialization");
6333 goto err;
6334 }
6335 /*
6336 We are going to set slave_running to 1. Assuming slave I/O thread is
6337 alive and connected, this is going to make Seconds_Behind_Master be 0
6338 i.e. "caught up". Even if we're just at start of thread. Well it's ok, at
6339 the moment we start we can think we are caught up, and the next second we
6340 start receiving data so we realize we are not caught up and
6341 Seconds_Behind_Master grows. No big deal.
6342 */
6343 rli->abort_slave = 0;
6344
6345 /*
6346 Reset errors for a clean start (otherwise, if the master is idle, the SQL
6347 thread may execute no Query_log_event, so the error will remain even
6348 though there's no problem anymore). Do not reset the master timestamp
6349 (imagine the slave has caught everything, the STOP SLAVE and START SLAVE:
6350 as we are not sure that we are going to receive a query, we want to
6351 remember the last master timestamp (to say how many seconds behind we are
6352 now.
6353 But the master timestamp is reset by RESET SLAVE & CHANGE MASTER.
6354 */
6355 rli->clear_error();
6356
6357 if (rli->update_is_transactional())
6358 {
6359 mysql_cond_broadcast(&rli->start_cond);
6360 mysql_mutex_unlock(&rli->run_lock);
6361 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
6362 "Error checking if the relay log repository is transactional.");
6363 goto err;
6364 }
6365
6366 if (!rli->is_transactional())
6367 rli->report(WARNING_LEVEL, 0,
6368 "If a crash happens this configuration does not guarantee that the relay "
6369 "log info will be consistent");
6370
6371 mysql_mutex_unlock(&rli->run_lock);
6372 mysql_cond_broadcast(&rli->start_cond);
6373
6374 DEBUG_SYNC(thd, "after_start_slave");
6375
6376 //tell the I/O thread to take relay_log_space_limit into account from now on
6377 mysql_mutex_lock(&rli->log_space_lock);
6378 rli->ignore_log_space_limit= 0;
6379 mysql_mutex_unlock(&rli->log_space_lock);
6380 rli->trans_retries= 0; // start from "no error"
6381 DBUG_PRINT("info", ("rli->trans_retries: %lu", rli->trans_retries));
6382
6383 if (rli->init_relay_log_pos(rli->get_group_relay_log_name(),
6384 rli->get_group_relay_log_pos(),
6385 true/*need_data_lock=true*/, &errmsg,
6386 1 /*look for a description_event*/))
6387 {
6388 rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
6389 "Error initializing relay log position: %s", errmsg);
6390 goto err;
6391 }
6392 THD_CHECK_SENTRY(thd);
6393 #ifndef DBUG_OFF
6394 {
6395 char llbuf1[22], llbuf2[22];
6396 DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s",
6397 llstr(my_b_tell(rli->cur_log),llbuf1),
6398 llstr(rli->get_event_relay_log_pos(),llbuf2)));
6399 DBUG_ASSERT(rli->get_event_relay_log_pos() >= BIN_LOG_HEADER_SIZE);
6400 /*
6401 Wonder if this is correct. I (Guilhem) wonder if my_b_tell() returns the
6402 correct position when it's called just after my_b_seek() (the questionable
6403 stuff is those "seek is done on next read" comments in the my_b_seek()
6404 source code).
6405 The crude reality is that this assertion randomly fails whereas
6406 replication seems to work fine. And there is no easy explanation why it
6407 fails (as we my_b_seek(rli->event_relay_log_pos) at the very end of
6408 init_relay_log_pos() called above). Maybe the assertion would be
6409 meaningful if we held rli->data_lock between the my_b_seek() and the
6410 DBUG_ASSERT().
6411 */
6412 #ifdef SHOULD_BE_CHECKED
6413 DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->get_event_relay_log_pos());
6414 #endif
6415 }
6416 #endif
6417 DBUG_ASSERT(rli->info_thd == thd);
6418
6419 #ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
6420 /* engine specific hook, to be made generic */
6421 if (ndb_wait_setup_func && ndb_wait_setup_func(opt_ndb_wait_setup))
6422 {
6423 sql_print_warning("Slave SQL thread : NDB : Tables not available after %lu"
6424 " seconds. Consider increasing --ndb-wait-setup value",
6425 opt_ndb_wait_setup);
6426 }
6427 #endif
6428
6429 #ifdef WITH_WSREP
6430 thd->wsrep_exec_mode= LOCAL_STATE;
6431 wsrep_thd_set_query_state(thd, QUERY_EXEC);
6432 /* synchronize with wsrep replication */
6433 if (WSREP_ON)
6434 {
6435 thd->wsrep_po_handle= WSREP_PO_INITIALIZER;
6436 thd->wsrep_po_cnt= 0;
6437 thd->wsrep_po_in_trans= FALSE;
6438 memset(&thd->wsrep_po_sid, 0, sizeof(thd->wsrep_po_sid));
6439 wsrep_ready_wait();
6440 }
6441 #endif
6442 DBUG_PRINT("master_info",("log_file_name: %s position: %s",
6443 rli->get_group_master_log_name(),
6444 llstr(rli->get_group_master_log_pos(),llbuff)));
6445 if (log_warnings)
6446 sql_print_information("Slave SQL thread initialized, starting replication in \
6447 log '%s' at position %s, relay log '%s' position: %s", rli->get_rpl_log_name(),
6448 llstr(rli->get_group_master_log_pos(),llbuff),rli->get_group_relay_log_name(),
6449 llstr(rli->get_group_relay_log_pos(),llbuff1));
6450
6451 if (check_temp_dir(rli->slave_patternload_file))
6452 {
6453 rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(),
6454 "Unable to use slave's temporary directory %s - %s",
6455 slave_load_tmpdir, thd->get_stmt_da()->message());
6456 goto err;
6457 }
6458
6459 /* execute init_slave variable */
6460 if (opt_init_slave.length)
6461 {
6462 execute_init_command(thd, &opt_init_slave, &LOCK_sys_init_slave);
6463 if (thd->is_slave_error)
6464 {
6465 rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(),
6466 "Slave SQL thread aborted. Can't execute init_slave query");
6467 goto err;
6468 }
6469 }
6470
6471 /*
6472 First check until condition - probably there is nothing to execute. We
6473 do not want to wait for next event in this case.
6474 */
6475 mysql_mutex_lock(&rli->data_lock);
6476 if (rli->slave_skip_counter)
6477 {
6478 strmake(saved_log_name, rli->get_group_relay_log_name(), FN_REFLEN - 1);
6479 strmake(saved_master_log_name, rli->get_group_master_log_name(), FN_REFLEN - 1);
6480 saved_log_pos= rli->get_group_relay_log_pos();
6481 saved_master_log_pos= rli->get_group_master_log_pos();
6482 saved_skip= rli->slave_skip_counter;
6483 }
6484 if (rli->until_condition != Relay_log_info::UNTIL_NONE &&
6485 rli->is_until_satisfied(thd, NULL))
6486 {
6487 mysql_mutex_unlock(&rli->data_lock);
6488 goto err;
6489 }
6490 mysql_mutex_unlock(&rli->data_lock);
6491
6492 /* Read queries from the IO/THREAD until this thread is killed */
6493
6494 while (!sql_slave_killed(thd,rli))
6495 {
6496 THD_STAGE_INFO(thd, stage_reading_event_from_the_relay_log);
6497 DBUG_ASSERT(rli->info_thd == thd);
6498 THD_CHECK_SENTRY(thd);
6499
6500 if (saved_skip && rli->slave_skip_counter == 0)
6501 {
6502 sql_print_information("'SQL_SLAVE_SKIP_COUNTER=%ld' executed at "
6503 "relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', "
6504 "master_log_pos='%ld' and new position at "
6505 "relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', "
6506 "master_log_pos='%ld' ",
6507 (ulong) saved_skip, saved_log_name, (ulong) saved_log_pos,
6508 saved_master_log_name, (ulong) saved_master_log_pos,
6509 rli->get_group_relay_log_name(), (ulong) rli->get_group_relay_log_pos(),
6510 rli->get_group_master_log_name(), (ulong) rli->get_group_master_log_pos());
6511 saved_skip= 0;
6512 }
6513
6514 if (exec_relay_log_event(thd,rli))
6515 {
6516 #ifdef WITH_WSREP
6517 if (thd->wsrep_conflict_state != NO_CONFLICT)
6518 {
6519 wsrep_node_dropped = 1;
6520 rli->abort_slave = 1;
6521 }
6522 #endif /* WITH_WSREP */
6523 DBUG_PRINT("info", ("exec_relay_log_event() failed"));
6524 // do not scare the user if SQL thread was simply killed or stopped
6525 if (!sql_slave_killed(thd,rli))
6526 {
6527 /*
6528 retrieve as much info as possible from the thd and, error
6529 codes and warnings and print this to the error log as to
6530 allow the user to locate the error
6531 */
6532 uint32 const last_errno= rli->last_error().number;
6533
6534 if (thd->is_error())
6535 {
6536 char const *const errmsg= thd->get_stmt_da()->message();
6537
6538 DBUG_PRINT("info",
6539 ("thd->get_stmt_da()->sql_errno()=%d; "
6540 "rli->last_error.number=%d",
6541 thd->get_stmt_da()->sql_errno(), last_errno));
6542 if (last_errno == 0)
6543 {
6544 /*
6545 This function is reporting an error which was not reported
6546 while executing exec_relay_log_event().
6547 */
6548 rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(),
6549 "%s", errmsg);
6550 }
6551 else if (last_errno != thd->get_stmt_da()->sql_errno())
6552 {
6553 /*
6554 * An error was reported while executing exec_relay_log_event()
6555 * however the error code differs from what is in the thread.
6556 * This function prints out more information to help finding
6557 * what caused the problem.
6558 */
6559 sql_print_error("Slave (additional info): %s Error_code: %d",
6560 errmsg, thd->get_stmt_da()->sql_errno());
6561 }
6562 }
6563
6564 /* Print any warnings issued */
6565 Diagnostics_area::Sql_condition_iterator it=
6566 thd->get_stmt_da()->sql_conditions();
6567 const Sql_condition *err;
6568 /*
6569 Added controlled slave thread cancel for replication
6570 of user-defined variables.
6571 */
6572 bool udf_error = false;
6573 while ((err= it++))
6574 {
6575 if (err->get_sql_errno() == ER_CANT_OPEN_LIBRARY)
6576 udf_error = true;
6577 sql_print_warning("Slave: %s Error_code: %d", err->get_message_text(), err->get_sql_errno());
6578 }
6579 if (udf_error)
6580 error_string= "Error loading user-defined library, slave SQL "
6581 "thread aborted. Install the missing library, and restart the"
6582 " slave SQL thread with \"SLAVE START\".";
6583 else
6584 error_string= "Error running query, slave SQL thread aborted."
6585 " Fix the problem, and restart the slave SQL thread with "
6586 "\"SLAVE START\".";
6587
6588 #ifdef WITH_WSREP
6589 if (WSREP_ON && last_errno == ER_UNKNOWN_COM_ERROR)
6590 {
6591 wsrep_node_dropped= TRUE;
6592 }
6593 #endif /* WITH_WSREP */
6594 }
6595 goto err;
6596 }
6597 }
6598
6599 err:
6600
6601 slave_stop_workers(rli, &mts_inited); // stopping worker pool
6602 /* Thread stopped. Print the current replication position to the log */
6603 if (error_string)
6604 sql_print_error("%s We stopped at log '%s' position %s.", error_string,
6605 rli->get_rpl_log_name(),
6606 llstr(rli->get_group_master_log_pos(), llbuff));
6607 else
6608 sql_print_information("Slave SQL thread exiting, replication stopped in log"
6609 " '%s' at position %s",
6610 rli->get_rpl_log_name(),
6611 llstr(rli->get_group_master_log_pos(), llbuff));
6612 rli->clear_mts_recovery_groups();
6613
6614 #ifdef WITH_WSREP
6615 if (WSREP_ON)
6616 {
6617 if (wsrep->preordered_commit(wsrep, &thd->wsrep_po_handle,
6618 NULL, 0, 0, false))
6619 {
6620 WSREP_WARN("preordered cleanup failed");
6621 }
6622 }
6623 #endif /* WITH_WSREP */
6624 /*
6625 Some events set some playgrounds, which won't be cleared because thread
6626 stops. Stopping of this thread may not be known to these events ("stop"
6627 request is detected only by the present function, not by events), so we
6628 must "proactively" clear playgrounds:
6629 */
6630 thd->clear_error();
6631 rli->cleanup_context(thd, 1);
6632 /*
6633 Some extra safety, which should not been needed (normally, event deletion
6634 should already have done these assignments (each event which sets these
6635 variables is supposed to set them to 0 before terminating)).
6636 */
6637 thd->catalog= 0;
6638 thd->reset_query();
6639 thd->reset_db(NULL, 0);
6640
6641 THD_STAGE_INFO(thd, stage_waiting_for_slave_mutex_on_exit);
6642 mysql_mutex_lock(&rli->run_lock);
6643 /* We need data_lock, at least to wake up any waiting master_pos_wait() */
6644 mysql_mutex_lock(&rli->data_lock);
6645 DBUG_ASSERT(rli->slave_running == 1); // tracking buffer overrun
6646 /* When master_pos_wait() wakes up it will check this and terminate */
6647 rli->slave_running= 0;
6648 /* Forget the relay log's format */
6649 rli->set_rli_description_event(NULL);
6650 /* Wake up master_pos_wait() */
6651 mysql_mutex_unlock(&rli->data_lock);
6652 DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions"));
6653 mysql_cond_broadcast(&rli->data_cond);
6654 rli->ignore_log_space_limit= 0; /* don't need any lock */
6655 /* we die so won't remember charset - re-update them on next thread start */
6656 rli->cached_charset_invalidate();
6657 rli->save_temporary_tables = thd->temporary_tables;
6658
6659 /*
6660 TODO: see if we can do this conditionally in next_event() instead
6661 to avoid unneeded position re-init
6662 */
6663 thd->temporary_tables = 0; // remove tempation from destructor to close them
6664 DBUG_ASSERT(thd->net.buff != 0);
6665 net_end(&thd->net); // destructor will not free it, because we are weird
6666 DBUG_ASSERT(rli->info_thd == thd);
6667 THD_CHECK_SENTRY(thd);
6668 rli->info_thd= 0;
6669 set_thd_in_use_temporary_tables(rli); // (re)set info_thd in use for saved temp tables
6670
6671 thd->release_resources();
6672 THD_CHECK_SENTRY(thd);
6673 if (thd_added)
6674 remove_global_thread(thd);
6675 delete thd;
6676 #ifdef WITH_WSREP
6677 /* if slave stopped due to node going non primary, we set global flag to
6678 trigger automatic restart of slave when node joins back to cluster
6679 */
6680 if (wsrep_node_dropped && wsrep_restart_slave)
6681 {
6682 if (wsrep_ready_get())
6683 {
6684 WSREP_INFO("Slave error due to node temporarily non-primary"
6685 "SQL slave will continue");
6686 wsrep_node_dropped= FALSE;
6687 mysql_mutex_unlock(&rli->run_lock);
6688 WSREP_DEBUG("wsrep_conflict_state now: %d", thd->wsrep_conflict_state);
6689 WSREP_INFO("slave restart: %d", thd->wsrep_conflict_state);
6690 thd->wsrep_conflict_state = NO_CONFLICT;
6691 goto wsrep_restart_point;
6692 } else {
6693 WSREP_INFO("Slave error due to node going non-primary");
6694 WSREP_INFO("wsrep_restart_slave was set and therefore slave will be "
6695 "automatically restarted when node joins back to cluster");
6696 wsrep_restart_slave_activated= TRUE;
6697 }
6698 }
6699 #endif /* WITH_WSREP */
6700 /*
6701 Note: the order of the broadcast and unlock calls below (first broadcast, then unlock)
6702 is important. Otherwise a killer_thread can execute between the calls and
6703 delete the mi structure leading to a crash! (see BUG#25306 for details)
6704 */
6705 mysql_cond_broadcast(&rli->stop_cond);
6706 DBUG_EXECUTE_IF("simulate_slave_delay_at_terminate_bug38694", sleep(5););
6707 mysql_mutex_unlock(&rli->run_lock); // tell the world we are done
6708
6709 DBUG_LEAVE; // Must match DBUG_ENTER()
6710 my_thread_end();
6711 #if OPENSSL_VERSION_NUMBER < 0x10100000L
6712 ERR_remove_thread_state(0);
6713 #endif /* OPENSSL_VERSION_NUMBER < 0x10100000L */
6714 pthread_exit(0);
6715 return 0; // Avoid compiler warnings
6716 }
6717
6718
6719 /*
6720 process_io_create_file()
6721 */
6722
process_io_create_file(Master_info * mi,Create_file_log_event * cev)6723 static int process_io_create_file(Master_info* mi, Create_file_log_event* cev)
6724 {
6725 int error = 1;
6726 ulong num_bytes;
6727 bool cev_not_written;
6728 THD *thd = mi->info_thd;
6729 NET *net = &mi->mysql->net;
6730 DBUG_ENTER("process_io_create_file");
6731
6732 mysql_mutex_assert_owner(&mi->data_lock);
6733
6734 if (unlikely(!cev->is_valid()))
6735 DBUG_RETURN(1);
6736
6737 if (!rpl_filter->db_ok(cev->db))
6738 {
6739 skip_load_data_infile(net);
6740 DBUG_RETURN(0);
6741 }
6742 DBUG_ASSERT(cev->inited_from_old);
6743 thd->file_id = cev->file_id = mi->file_id++;
6744 thd->server_id = cev->server_id;
6745 cev_not_written = 1;
6746
6747 if (unlikely(net_request_file(net,cev->fname)))
6748 {
6749 sql_print_error("Slave I/O: failed requesting download of '%s'",
6750 cev->fname);
6751 goto err;
6752 }
6753
6754 /*
6755 This dummy block is so we could instantiate Append_block_log_event
6756 once and then modify it slightly instead of doing it multiple times
6757 in the loop
6758 */
6759 {
6760 Append_block_log_event aev(thd,0,0,0,0);
6761
6762 for (;;)
6763 {
6764 if (unlikely((num_bytes=my_net_read(net)) == packet_error))
6765 {
6766 sql_print_error("Network read error downloading '%s' from master",
6767 cev->fname);
6768 goto err;
6769 }
6770 if (unlikely(!num_bytes)) /* eof */
6771 {
6772 /* 3.23 master wants it */
6773 net_write_command(net, 0, (uchar*) "", 0, (uchar*) "", 0);
6774 /*
6775 If we wrote Create_file_log_event, then we need to write
6776 Execute_load_log_event. If we did not write Create_file_log_event,
6777 then this is an empty file and we can just do as if the LOAD DATA
6778 INFILE had not existed, i.e. write nothing.
6779 */
6780 if (unlikely(cev_not_written))
6781 break;
6782 Execute_load_log_event xev(thd,0,0);
6783 xev.log_pos = cev->log_pos;
6784 if (unlikely(mi->rli->relay_log.append_event(&xev, mi) != 0))
6785 {
6786 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
6787 ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
6788 "error writing Exec_load event to relay log");
6789 goto err;
6790 }
6791 mi->rli->relay_log.harvest_bytes_written(mi->rli, true/*need_log_space_lock=true*/);
6792 break;
6793 }
6794 if (unlikely(cev_not_written))
6795 {
6796 cev->block = net->read_pos;
6797 cev->block_len = num_bytes;
6798 if (unlikely(mi->rli->relay_log.append_event(cev, mi) != 0))
6799 {
6800 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
6801 ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
6802 "error writing Create_file event to relay log");
6803 goto err;
6804 }
6805 cev_not_written=0;
6806 mi->rli->relay_log.harvest_bytes_written(mi->rli, true/*need_log_space_lock=true*/);
6807 }
6808 else
6809 {
6810 aev.block = net->read_pos;
6811 aev.block_len = num_bytes;
6812 aev.log_pos = cev->log_pos;
6813 if (unlikely(mi->rli->relay_log.append_event(&aev, mi) != 0))
6814 {
6815 mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE,
6816 ER(ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
6817 "error writing Append_block event to relay log");
6818 goto err;
6819 }
6820 mi->rli->relay_log.harvest_bytes_written(mi->rli, true/*need_log_space_lock=true*/);
6821 }
6822 }
6823 }
6824 error=0;
6825 err:
6826 DBUG_RETURN(error);
6827 }
6828
6829
6830 /**
6831 Used by the slave IO thread when it receives a rotate event from the
6832 master.
6833
6834 Updates the master info with the place in the next binary log where
6835 we should start reading. Rotate the relay log to avoid mixed-format
6836 relay logs.
6837
6838 @param mi master_info for the slave
6839 @param rev The rotate log event read from the master
6840
6841 @note The caller must hold mi->data_lock before invoking this function.
6842
6843 @retval 0 ok
6844 @retval 1 error
6845 */
process_io_rotate(Master_info * mi,Rotate_log_event * rev)6846 static int process_io_rotate(Master_info *mi, Rotate_log_event *rev)
6847 {
6848 DBUG_ENTER("process_io_rotate");
6849 mysql_mutex_assert_owner(&mi->data_lock);
6850
6851 if (unlikely(!rev->is_valid()))
6852 DBUG_RETURN(1);
6853
6854 /* Safe copy as 'rev' has been "sanitized" in Rotate_log_event's ctor */
6855 memcpy(const_cast<char *>(mi->get_master_log_name()),
6856 rev->new_log_ident, rev->ident_len + 1);
6857 mi->set_master_log_pos(rev->pos);
6858 DBUG_PRINT("info", ("new (master_log_name, master_log_pos): ('%s', %lu)",
6859 mi->get_master_log_name(), (ulong) mi->get_master_log_pos()));
6860 #ifndef DBUG_OFF
6861 /*
6862 If we do not do this, we will be getting the first
6863 rotate event forever, so we need to not disconnect after one.
6864 */
6865 if (disconnect_slave_event_count)
6866 mi->events_until_exit++;
6867 #endif
6868
6869 /*
6870 If mi_description_event is format <4, there is conversion in the
6871 relay log to the slave's format (4). And Rotate can mean upgrade or
6872 nothing. If upgrade, it's to 5.0 or newer, so we will get a Format_desc, so
6873 no need to reset mi_description_event now. And if it's nothing (same
6874 master version as before), no need (still using the slave's format).
6875 */
6876 Format_description_log_event *old_fdle= mi->get_mi_description_event();
6877 if (old_fdle->binlog_version >= 4)
6878 {
6879 DBUG_ASSERT(old_fdle->checksum_alg ==
6880 mi->rli->relay_log.relay_log_checksum_alg);
6881 Format_description_log_event *new_fdle= new
6882 Format_description_log_event(3);
6883 new_fdle->checksum_alg= mi->rli->relay_log.relay_log_checksum_alg;
6884 mi->set_mi_description_event(new_fdle);
6885 }
6886 /*
6887 Rotate the relay log makes binlog format detection easier (at next slave
6888 start or mysqlbinlog)
6889 */
6890 int ret= rotate_relay_log(mi, true/*need_log_space_lock=true*/);
6891 DBUG_RETURN(ret);
6892 }
6893
6894 /**
6895 Reads a 3.23 event and converts it to the slave's format. This code was
6896 copied from MySQL 4.0.
6897
6898 @note The caller must hold mi->data_lock before invoking this function.
6899 */
queue_binlog_ver_1_event(Master_info * mi,const char * buf,ulong event_len)6900 static int queue_binlog_ver_1_event(Master_info *mi, const char *buf,
6901 ulong event_len)
6902 {
6903 const char *errmsg = 0;
6904 ulong inc_pos;
6905 bool ignore_event= 0;
6906 char *tmp_buf = 0;
6907 Relay_log_info *rli= mi->rli;
6908 DBUG_ENTER("queue_binlog_ver_1_event");
6909
6910 mysql_mutex_assert_owner(&mi->data_lock);
6911
6912 /*
6913 If we get Load event, we need to pass a non-reusable buffer
6914 to read_log_event, so we do a trick
6915 */
6916 if (buf[EVENT_TYPE_OFFSET] == LOAD_EVENT)
6917 {
6918 if (unlikely(!(tmp_buf=(char*)my_malloc(event_len+1,MYF(MY_WME)))))
6919 {
6920 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
6921 ER(ER_SLAVE_FATAL_ERROR), "Memory allocation failed");
6922 DBUG_RETURN(1);
6923 }
6924 memcpy(tmp_buf,buf,event_len);
6925 /*
6926 Create_file constructor wants a 0 as last char of buffer, this 0 will
6927 serve as the string-termination char for the file's name (which is at the
6928 end of the buffer)
6929 We must increment event_len, otherwise the event constructor will not see
6930 this end 0, which leads to segfault.
6931 */
6932 tmp_buf[event_len++]=0;
6933 int4store(tmp_buf+EVENT_LEN_OFFSET, event_len);
6934 buf = (const char*)tmp_buf;
6935 }
6936 /*
6937 This will transform LOAD_EVENT into CREATE_FILE_EVENT, ask the master to
6938 send the loaded file, and write it to the relay log in the form of
6939 Append_block/Exec_load (the SQL thread needs the data, as that thread is not
6940 connected to the master).
6941 */
6942 Log_event *ev=
6943 Log_event::read_log_event(buf, event_len, &errmsg,
6944 mi->get_mi_description_event(), 0);
6945 if (unlikely(!ev))
6946 {
6947 sql_print_error("Read invalid event from master: '%s',\
6948 master could be corrupt but a more likely cause of this is a bug",
6949 errmsg);
6950 my_free((char*) tmp_buf);
6951 DBUG_RETURN(1);
6952 }
6953
6954 mi->set_master_log_pos(ev->log_pos); /* 3.23 events don't contain log_pos */
6955 switch (ev->get_type_code()) {
6956 case STOP_EVENT:
6957 ignore_event= 1;
6958 inc_pos= event_len;
6959 break;
6960 case ROTATE_EVENT:
6961 if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
6962 {
6963 delete ev;
6964 DBUG_RETURN(1);
6965 }
6966 inc_pos= 0;
6967 break;
6968 case CREATE_FILE_EVENT:
6969 /*
6970 Yes it's possible to have CREATE_FILE_EVENT here, even if we're in
6971 queue_old_event() which is for 3.23 events which don't comprise
6972 CREATE_FILE_EVENT. This is because read_log_event() above has just
6973 transformed LOAD_EVENT into CREATE_FILE_EVENT.
6974 */
6975 {
6976 /* We come here when and only when tmp_buf != 0 */
6977 DBUG_ASSERT(tmp_buf != 0);
6978 inc_pos=event_len;
6979 ev->log_pos+= inc_pos;
6980 int error = process_io_create_file(mi,(Create_file_log_event*)ev);
6981 delete ev;
6982 mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
6983 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
6984 my_free((char*)tmp_buf);
6985 DBUG_RETURN(error);
6986 }
6987 default:
6988 inc_pos= event_len;
6989 break;
6990 }
6991 if (likely(!ignore_event))
6992 {
6993 if (ev->log_pos)
6994 /*
6995 Don't do it for fake Rotate events (see comment in
6996 Log_event::Log_event(const char* buf...) in log_event.cc).
6997 */
6998 ev->log_pos+= event_len; /* make log_pos be the pos of the end of the event */
6999 if (unlikely(rli->relay_log.append_event(ev, mi) != 0))
7000 {
7001 delete ev;
7002 DBUG_RETURN(1);
7003 }
7004 rli->relay_log.harvest_bytes_written(rli, true/*need_log_space_lock=true*/);
7005 }
7006 delete ev;
7007 mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
7008 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
7009 DBUG_RETURN(0);
7010 }
7011
7012 /**
7013 Reads a 4.0 event and converts it to the slave's format. This code was copied
7014 from queue_binlog_ver_1_event(), with some affordable simplifications.
7015
7016 @note The caller must hold mi->data_lock before invoking this function.
7017 */
queue_binlog_ver_3_event(Master_info * mi,const char * buf,ulong event_len)7018 static int queue_binlog_ver_3_event(Master_info *mi, const char *buf,
7019 ulong event_len)
7020 {
7021 const char *errmsg = 0;
7022 ulong inc_pos;
7023 char *tmp_buf = 0;
7024 Relay_log_info *rli= mi->rli;
7025 DBUG_ENTER("queue_binlog_ver_3_event");
7026
7027 mysql_mutex_assert_owner(&mi->data_lock);
7028
7029 /* read_log_event() will adjust log_pos to be end_log_pos */
7030 Log_event *ev=
7031 Log_event::read_log_event(buf, event_len, &errmsg,
7032 mi->get_mi_description_event(), 0);
7033 if (unlikely(!ev))
7034 {
7035 sql_print_error("Read invalid event from master: '%s',\
7036 master could be corrupt but a more likely cause of this is a bug",
7037 errmsg);
7038 my_free((char*) tmp_buf);
7039 DBUG_RETURN(1);
7040 }
7041 switch (ev->get_type_code()) {
7042 case STOP_EVENT:
7043 goto err;
7044 case ROTATE_EVENT:
7045 if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
7046 {
7047 delete ev;
7048 DBUG_RETURN(1);
7049 }
7050 inc_pos= 0;
7051 break;
7052 default:
7053 inc_pos= event_len;
7054 break;
7055 }
7056
7057 if (unlikely(rli->relay_log.append_event(ev, mi) != 0))
7058 {
7059 delete ev;
7060 DBUG_RETURN(1);
7061 }
7062 rli->relay_log.harvest_bytes_written(rli, true/*need_log_space_lock=true*/);
7063 delete ev;
7064 mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
7065 err:
7066 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
7067 DBUG_RETURN(0);
7068 }
7069
7070 /*
7071 queue_old_event()
7072
7073 Writes a 3.23 or 4.0 event to the relay log, after converting it to the 5.0
7074 (exactly, slave's) format. To do the conversion, we create a 5.0 event from
7075 the 3.23/4.0 bytes, then write this event to the relay log.
7076
7077 TODO:
7078 Test this code before release - it has to be tested on a separate
7079 setup with 3.23 master or 4.0 master
7080 */
7081
queue_old_event(Master_info * mi,const char * buf,ulong event_len)7082 static int queue_old_event(Master_info *mi, const char *buf,
7083 ulong event_len)
7084 {
7085 DBUG_ENTER("queue_old_event");
7086
7087 mysql_mutex_assert_owner(&mi->data_lock);
7088
7089 switch (mi->get_mi_description_event()->binlog_version)
7090 {
7091 case 1:
7092 DBUG_RETURN(queue_binlog_ver_1_event(mi,buf,event_len));
7093 case 3:
7094 DBUG_RETURN(queue_binlog_ver_3_event(mi,buf,event_len));
7095 default: /* unsupported format; eg version 2 */
7096 DBUG_PRINT("info",("unsupported binlog format %d in queue_old_event()",
7097 mi->get_mi_description_event()->binlog_version));
7098 DBUG_RETURN(1);
7099 }
7100 }
7101
7102 /*
7103 queue_event()
7104
7105 If the event is 3.23/4.0, passes it to queue_old_event() which will convert
7106 it. Otherwise, writes a 5.0 (or newer) event to the relay log. Then there is
7107 no format conversion, it's pure read/write of bytes.
7108 So a 5.0.0 slave's relay log can contain events in the slave's format or in
7109 any >=5.0.0 format.
7110 */
7111
queue_event(Master_info * mi,const char * buf,ulong event_len)7112 static int queue_event(Master_info* mi,const char* buf, ulong event_len)
7113 {
7114 int error= 0;
7115 String error_msg;
7116 ulong inc_pos= 0;
7117 Relay_log_info *rli= mi->rli;
7118 mysql_mutex_t *log_lock= rli->relay_log.get_log_lock();
7119 ulong s_id;
7120 bool unlock_data_lock= TRUE;
7121 /*
7122 FD_q must have been prepared for the first R_a event
7123 inside get_master_version_and_clock()
7124 Show-up of FD:s affects checksum_alg at once because
7125 that changes FD_queue.
7126 */
7127 uint8 checksum_alg= mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF ?
7128 mi->checksum_alg_before_fd :
7129 mi->rli->relay_log.relay_log_checksum_alg;
7130
7131 char *save_buf= NULL; // needed for checksumming the fake Rotate event
7132 char rot_buf[LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN + FN_REFLEN];
7133 Gtid gtid= { 0, 0 };
7134 Gtid old_retrieved_gtid= { 0, 0 };
7135 Log_event_type event_type= (Log_event_type)buf[EVENT_TYPE_OFFSET];
7136
7137 DBUG_ASSERT(checksum_alg == BINLOG_CHECKSUM_ALG_OFF ||
7138 checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF ||
7139 checksum_alg == BINLOG_CHECKSUM_ALG_CRC32);
7140
7141 DBUG_ENTER("queue_event");
7142 /*
7143 FD_queue checksum alg description does not apply in a case of
7144 FD itself. The one carries both parts of the checksum data.
7145 */
7146 if (event_type == FORMAT_DESCRIPTION_EVENT)
7147 {
7148 checksum_alg= get_checksum_alg(buf, event_len);
7149 }
7150 else if (event_type == START_EVENT_V3)
7151 {
7152 // checksum behaviour is similar to the pre-checksum FD handling
7153 mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF;
7154 mysql_mutex_lock(&mi->data_lock);
7155 mi->get_mi_description_event()->checksum_alg=
7156 mi->rli->relay_log.relay_log_checksum_alg= checksum_alg=
7157 BINLOG_CHECKSUM_ALG_OFF;
7158 mysql_mutex_unlock(&mi->data_lock);
7159 }
7160
7161 // does not hold always because of old binlog can work with NM
7162 // DBUG_ASSERT(checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
7163
7164 // should hold unless manipulations with RL. Tests that do that
7165 // will have to refine the clause.
7166 DBUG_ASSERT(mi->rli->relay_log.relay_log_checksum_alg !=
7167 BINLOG_CHECKSUM_ALG_UNDEF);
7168
7169 // Emulate the network corruption
7170 DBUG_EXECUTE_IF("corrupt_queue_event",
7171 if (event_type != FORMAT_DESCRIPTION_EVENT)
7172 {
7173 char *debug_event_buf_c = (char*) buf;
7174 int debug_cor_pos = rand() % (event_len - BINLOG_CHECKSUM_LEN);
7175 debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
7176 DBUG_PRINT("info", ("Corrupt the event at queue_event: byte on position %d", debug_cor_pos));
7177 DBUG_SET("");
7178 }
7179 );
7180
7181 if (event_checksum_test((uchar *) buf, event_len, checksum_alg))
7182 {
7183 error= ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE;
7184 unlock_data_lock= FALSE;
7185 goto err;
7186 }
7187
7188 mysql_mutex_lock(&mi->data_lock);
7189
7190 if (mi->get_mi_description_event()->binlog_version < 4 &&
7191 event_type != FORMAT_DESCRIPTION_EVENT /* a way to escape */)
7192 {
7193 int ret= queue_old_event(mi,buf,event_len);
7194 mysql_mutex_unlock(&mi->data_lock);
7195 DBUG_RETURN(ret);
7196 }
7197
7198 switch (event_type) {
7199 case STOP_EVENT:
7200 /*
7201 We needn't write this event to the relay log. Indeed, it just indicates a
7202 master server shutdown. The only thing this does is cleaning. But
7203 cleaning is already done on a per-master-thread basis (as the master
7204 server is shutting down cleanly, it has written all DROP TEMPORARY TABLE
7205 prepared statements' deletion are TODO only when we binlog prep stmts).
7206
7207 We don't even increment mi->get_master_log_pos(), because we may be just after
7208 a Rotate event. Btw, in a few milliseconds we are going to have a Start
7209 event from the next binlog (unless the master is presently running
7210 without --log-bin).
7211 */
7212 goto err;
7213 case ROTATE_EVENT:
7214 {
7215 Rotate_log_event rev(buf, checksum_alg != BINLOG_CHECKSUM_ALG_OFF ?
7216 event_len - BINLOG_CHECKSUM_LEN : event_len,
7217 mi->get_mi_description_event());
7218
7219 if (unlikely(process_io_rotate(mi, &rev)))
7220 {
7221 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
7222 goto err;
7223 }
7224 /*
7225 Checksum special cases for the fake Rotate (R_f) event caused by the protocol
7226 of events generation and serialization in RL where Rotate of master is
7227 queued right next to FD of slave.
7228 Since it's only FD that carries the alg desc of FD_s has to apply to R_m.
7229 Two special rules apply only to the first R_f which comes in before any FD_m.
7230 The 2nd R_f should be compatible with the FD_s that must have taken over
7231 the last seen FD_m's (A).
7232
7233 RSC_1: If OM \and fake Rotate \and slave is configured to
7234 to compute checksum for its first FD event for RL
7235 the fake Rotate gets checksummed here.
7236 */
7237 if (uint4korr(&buf[0]) == 0 && checksum_alg == BINLOG_CHECKSUM_ALG_OFF &&
7238 mi->rli->relay_log.relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_OFF)
7239 {
7240 ha_checksum rot_crc= my_checksum(0L, NULL, 0);
7241 event_len += BINLOG_CHECKSUM_LEN;
7242 memcpy(rot_buf, buf, event_len - BINLOG_CHECKSUM_LEN);
7243 int4store(&rot_buf[EVENT_LEN_OFFSET],
7244 uint4korr(rot_buf + EVENT_LEN_OFFSET) + BINLOG_CHECKSUM_LEN);
7245 rot_crc= my_checksum(rot_crc, (const uchar *) rot_buf,
7246 event_len - BINLOG_CHECKSUM_LEN);
7247 int4store(&rot_buf[event_len - BINLOG_CHECKSUM_LEN], rot_crc);
7248 DBUG_ASSERT(event_len == uint4korr(&rot_buf[EVENT_LEN_OFFSET]));
7249 DBUG_ASSERT(mi->get_mi_description_event()->checksum_alg ==
7250 mi->rli->relay_log.relay_log_checksum_alg);
7251 /* the first one */
7252 DBUG_ASSERT(mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF);
7253 save_buf= (char *) buf;
7254 buf= rot_buf;
7255 }
7256 else
7257 /*
7258 RSC_2: If NM \and fake Rotate \and slave does not compute checksum
7259 the fake Rotate's checksum is stripped off before relay-logging.
7260 */
7261 if (uint4korr(&buf[0]) == 0 && checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
7262 mi->rli->relay_log.relay_log_checksum_alg == BINLOG_CHECKSUM_ALG_OFF)
7263 {
7264 event_len -= BINLOG_CHECKSUM_LEN;
7265 memcpy(rot_buf, buf, event_len);
7266 int4store(&rot_buf[EVENT_LEN_OFFSET],
7267 uint4korr(rot_buf + EVENT_LEN_OFFSET) - BINLOG_CHECKSUM_LEN);
7268 DBUG_ASSERT(event_len == uint4korr(&rot_buf[EVENT_LEN_OFFSET]));
7269 DBUG_ASSERT(mi->get_mi_description_event()->checksum_alg ==
7270 mi->rli->relay_log.relay_log_checksum_alg);
7271 /* the first one */
7272 DBUG_ASSERT(mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF);
7273 save_buf= (char *) buf;
7274 buf= rot_buf;
7275 }
7276 /*
7277 Now the I/O thread has just changed its mi->get_master_log_name(), so
7278 incrementing mi->get_master_log_pos() is nonsense.
7279 */
7280 inc_pos= 0;
7281 break;
7282 }
7283 case FORMAT_DESCRIPTION_EVENT:
7284 {
7285 /*
7286 Create an event, and save it (when we rotate the relay log, we will have
7287 to write this event again).
7288 */
7289 /*
7290 We are the only thread which reads/writes mi_description_event.
7291 The relay_log struct does not move (though some members of it can
7292 change), so we needn't any lock (no rli->data_lock, no log lock).
7293 */
7294 const char* errmsg;
7295 // mark it as undefined that is irrelevant anymore
7296 mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF;
7297 Format_description_log_event *new_fdle=
7298 (Format_description_log_event*)
7299 Log_event::read_log_event(buf, event_len, &errmsg,
7300 mi->get_mi_description_event(), 1);
7301 if (new_fdle == NULL)
7302 {
7303 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
7304 goto err;
7305 }
7306 if (new_fdle->checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
7307 new_fdle->checksum_alg= BINLOG_CHECKSUM_ALG_OFF;
7308 mi->set_mi_description_event(new_fdle);
7309
7310 /* installing new value of checksum Alg for relay log */
7311 mi->rli->relay_log.relay_log_checksum_alg= new_fdle->checksum_alg;
7312
7313 /*
7314 Though this does some conversion to the slave's format, this will
7315 preserve the master's binlog format version, and number of event types.
7316 */
7317 /*
7318 If the event was not requested by the slave (the slave did not ask for
7319 it), i.e. has end_log_pos=0, we do not increment mi->get_master_log_pos()
7320 */
7321 inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
7322 DBUG_PRINT("info",("binlog format is now %d",
7323 mi->get_mi_description_event()->binlog_version));
7324
7325 }
7326 break;
7327
7328 case HEARTBEAT_LOG_EVENT:
7329 {
7330 /*
7331 HB (heartbeat) cannot come before RL (Relay)
7332 */
7333 char llbuf[22];
7334 Heartbeat_log_event hb(buf,
7335 mi->rli->relay_log.relay_log_checksum_alg
7336 != BINLOG_CHECKSUM_ALG_OFF ?
7337 event_len - BINLOG_CHECKSUM_LEN : event_len,
7338 mi->get_mi_description_event());
7339 if (!hb.is_valid())
7340 {
7341 error= ER_SLAVE_HEARTBEAT_FAILURE;
7342 error_msg.append(STRING_WITH_LEN("inconsistent heartbeat event content;"));
7343 error_msg.append(STRING_WITH_LEN("the event's data: log_file_name "));
7344 error_msg.append(hb.get_log_ident(), (uint) strlen(hb.get_log_ident()));
7345 error_msg.append(STRING_WITH_LEN(" log_pos "));
7346 llstr(hb.log_pos, llbuf);
7347 error_msg.append(llbuf, strlen(llbuf));
7348 goto err;
7349 }
7350 mi->received_heartbeats++;
7351 mi->last_heartbeat= my_time(0);
7352
7353
7354 /*
7355 During GTID protocol, if the master skips transactions,
7356 a heartbeat event is sent to the slave at the end of last
7357 skipped transaction to update coordinates.
7358
7359 I/O thread receives the heartbeat event and updates mi
7360 only if the received heartbeat position is greater than
7361 mi->get_master_log_pos(). This event is written to the
7362 relay log as an ignored Rotate event. SQL thread reads
7363 the rotate event only to update the coordinates corresponding
7364 to the last skipped transaction. Note that,
7365 we update only the positions and not the file names, as a ROTATE
7366 EVENT from the master prior to this will update the file name.
7367 */
7368 if (mi->is_auto_position() && mi->get_master_log_pos() < hb.log_pos
7369 && mi->get_master_log_name() != NULL)
7370 {
7371
7372 DBUG_ASSERT(memcmp(const_cast<char*>(mi->get_master_log_name()),
7373 hb.get_log_ident(), hb.get_ident_len()) == 0);
7374
7375 mi->set_master_log_pos(hb.log_pos);
7376
7377 /*
7378 Put this heartbeat event in the relay log as a Rotate Event.
7379 */
7380 inc_pos= 0;
7381 memcpy(rli->ign_master_log_name_end, mi->get_master_log_name(),
7382 FN_REFLEN);
7383 rli->ign_master_log_pos_end = mi->get_master_log_pos();
7384
7385 if (write_ignored_events_info_to_relay_log(mi->info_thd, mi))
7386 goto err;
7387 }
7388
7389 /*
7390 compare local and event's versions of log_file, log_pos.
7391
7392 Heartbeat is sent only after an event corresponding to the corrdinates
7393 the heartbeat carries.
7394 Slave can not have a difference in coordinates except in the only
7395 special case when mi->get_master_log_name(), mi->get_master_log_pos() have never
7396 been updated by Rotate event i.e when slave does not have any history
7397 with the master (and thereafter mi->get_master_log_pos() is NULL).
7398
7399 TODO: handling `when' for SHOW SLAVE STATUS' snds behind
7400 */
7401 if ((memcmp(const_cast<char *>(mi->get_master_log_name()),
7402 hb.get_log_ident(), hb.get_ident_len())
7403 && mi->get_master_log_name() != NULL)
7404 || ((mi->get_master_log_pos() != hb.log_pos && gtid_mode == 0) ||
7405 /*
7406 When Gtid mode is on only monotocity can be claimed.
7407 Todo: enhance HB event with the skipped events size
7408 and to convert HB.pos == MI.pos to HB.pos - HB.skip_size == MI.pos
7409 */
7410 (mi->get_master_log_pos() > hb.log_pos)))
7411 {
7412 /* missed events of heartbeat from the past */
7413 error= ER_SLAVE_HEARTBEAT_FAILURE;
7414 error_msg.append(STRING_WITH_LEN("heartbeat is not compatible with local info;"));
7415 error_msg.append(STRING_WITH_LEN("the event's data: log_file_name "));
7416 error_msg.append(hb.get_log_ident(), (uint) strlen(hb.get_log_ident()));
7417 error_msg.append(STRING_WITH_LEN(" log_pos "));
7418 llstr(hb.log_pos, llbuf);
7419 error_msg.append(llbuf, strlen(llbuf));
7420 goto err;
7421 }
7422 goto skip_relay_logging;
7423 }
7424 break;
7425
7426 case PREVIOUS_GTIDS_LOG_EVENT:
7427 {
7428 /*
7429 This event does not have any meaning for the slave and
7430 was just sent to show the slave the master is making
7431 progress and avoid possible deadlocks.
7432 So at this point, the event is replaced by a rotate
7433 event what will make the slave to update what it knows
7434 about the master's coordinates.
7435 */
7436 inc_pos= 0;
7437 mi->set_master_log_pos(mi->get_master_log_pos() + event_len);
7438 memcpy(rli->ign_master_log_name_end, mi->get_master_log_name(), FN_REFLEN);
7439 rli->ign_master_log_pos_end= mi->get_master_log_pos();
7440
7441 if (write_ignored_events_info_to_relay_log(mi->info_thd, mi))
7442 goto err;
7443
7444 goto skip_relay_logging;
7445 }
7446 break;
7447
7448 case GTID_LOG_EVENT:
7449 {
7450 if (gtid_mode == 0)
7451 {
7452 error= ER_FOUND_GTID_EVENT_WHEN_GTID_MODE_IS_OFF;
7453 goto err;
7454 }
7455 global_sid_lock->rdlock();
7456 Gtid_log_event gtid_ev(buf, checksum_alg != BINLOG_CHECKSUM_ALG_OFF ?
7457 event_len - BINLOG_CHECKSUM_LEN : event_len,
7458 mi->get_mi_description_event());
7459 gtid.sidno= gtid_ev.get_sidno(false);
7460 global_sid_lock->unlock();
7461 if (gtid.sidno < 0)
7462 goto err;
7463 gtid.gno= gtid_ev.get_gno();
7464 inc_pos= event_len;
7465 }
7466 break;
7467
7468 case ANONYMOUS_GTID_LOG_EVENT:
7469
7470 default:
7471 inc_pos= event_len;
7472 break;
7473 }
7474
7475 /*
7476 Simulate an unknown ignorable log event by rewriting the write_rows log
7477 event and previous_gtids log event before writing them in relay log.
7478 */
7479 DBUG_EXECUTE_IF("simulate_unknown_ignorable_log_event",
7480 if (event_type == WRITE_ROWS_EVENT ||
7481 event_type == PREVIOUS_GTIDS_LOG_EVENT)
7482 {
7483 char *event_buf= const_cast<char*>(buf);
7484 /* Overwrite the log event type with an unknown type. */
7485 event_buf[EVENT_TYPE_OFFSET]= ENUM_END_EVENT + 1;
7486 /* Set LOG_EVENT_IGNORABLE_F for the log event. */
7487 int2store(event_buf + FLAGS_OFFSET,
7488 uint2korr(event_buf + FLAGS_OFFSET) | LOG_EVENT_IGNORABLE_F);
7489 }
7490 );
7491
7492 /*
7493 If this event is originating from this server, don't queue it.
7494 We don't check this for 3.23 events because it's simpler like this; 3.23
7495 will be filtered anyway by the SQL slave thread which also tests the
7496 server id (we must also keep this test in the SQL thread, in case somebody
7497 upgrades a 4.0 slave which has a not-filtered relay log).
7498
7499 ANY event coming from ourselves can be ignored: it is obvious for queries;
7500 for STOP_EVENT/ROTATE_EVENT/START_EVENT: these cannot come from ourselves
7501 (--log-slave-updates would not log that) unless this slave is also its
7502 direct master (an unsupported, useless setup!).
7503 */
7504
7505 mysql_mutex_lock(log_lock);
7506 s_id= uint4korr(buf + SERVER_ID_OFFSET);
7507
7508 /*
7509 If server_id_bits option is set we need to mask out irrelevant bits
7510 when checking server_id, but we still put the full unmasked server_id
7511 into the Relay log so that it can be accessed when applying the event
7512 */
7513 s_id&= opt_server_id_mask;
7514
7515 if ((s_id == ::server_id && !mi->rli->replicate_same_server_id) ||
7516 /*
7517 the following conjunction deals with IGNORE_SERVER_IDS, if set
7518 If the master is on the ignore list, execution of
7519 format description log events and rotate events is necessary.
7520 */
7521 (mi->ignore_server_ids->dynamic_ids.elements > 0 &&
7522 mi->shall_ignore_server_id(s_id) &&
7523 /* everything is filtered out from non-master */
7524 (s_id != mi->master_id ||
7525 /* for the master meta information is necessary */
7526 (event_type != FORMAT_DESCRIPTION_EVENT &&
7527 event_type != ROTATE_EVENT))))
7528 {
7529 /*
7530 Do not write it to the relay log.
7531 a) We still want to increment mi->get_master_log_pos(), so that we won't
7532 re-read this event from the master if the slave IO thread is now
7533 stopped/restarted (more efficient if the events we are ignoring are big
7534 LOAD DATA INFILE).
7535 b) We want to record that we are skipping events, for the information of
7536 the slave SQL thread, otherwise that thread may let
7537 rli->group_relay_log_pos stay too small if the last binlog's event is
7538 ignored.
7539 But events which were generated by this slave and which do not exist in
7540 the master's binlog (i.e. Format_desc, Rotate & Stop) should not increment
7541 mi->get_master_log_pos().
7542 If the event is originated remotely and is being filtered out by
7543 IGNORE_SERVER_IDS it increments mi->get_master_log_pos()
7544 as well as rli->group_relay_log_pos.
7545 */
7546 if (!(s_id == ::server_id && !mi->rli->replicate_same_server_id) ||
7547 (event_type != FORMAT_DESCRIPTION_EVENT &&
7548 event_type != ROTATE_EVENT &&
7549 event_type != STOP_EVENT))
7550 {
7551 mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
7552 memcpy(rli->ign_master_log_name_end, mi->get_master_log_name(), FN_REFLEN);
7553 DBUG_ASSERT(rli->ign_master_log_name_end[0]);
7554 rli->ign_master_log_pos_end= mi->get_master_log_pos();
7555 }
7556 rli->relay_log.signal_update(); // the slave SQL thread needs to re-check
7557 DBUG_PRINT("info", ("master_log_pos: %lu, event originating from %u server, ignored",
7558 (ulong) mi->get_master_log_pos(), uint4korr(buf + SERVER_ID_OFFSET)));
7559 }
7560 else
7561 {
7562 DBUG_EXECUTE_IF("flush_after_reading_gtid_event",
7563 if (event_type == GTID_LOG_EVENT && gtid.gno == 4)
7564 DBUG_SET("+d,set_max_size_zero");
7565 );
7566 DBUG_EXECUTE_IF("set_append_buffer_error",
7567 if (event_type == GTID_LOG_EVENT && gtid.gno == 4)
7568 DBUG_SET("+d,simulate_append_buffer_error");
7569 );
7570 /*
7571 Add the GTID to the retrieved set before actually appending it to relay
7572 log. This will ensure that if a rotation happens at this point of time the
7573 new GTID will be reflected as part of Previous_Gtid set and
7574 Retrieved_Gtid_Set will not have any gaps.
7575 */
7576 if (event_type == GTID_LOG_EVENT)
7577 {
7578 global_sid_lock->rdlock();
7579 old_retrieved_gtid= *(mi->rli->get_last_retrieved_gtid());
7580 int ret= rli->add_logged_gtid(gtid.sidno, gtid.gno);
7581 if (!ret)
7582 rli->set_last_retrieved_gtid(gtid);
7583 global_sid_lock->unlock();
7584 if (ret != 0)
7585 {
7586 mysql_mutex_unlock(log_lock);
7587 goto err;
7588 }
7589 }
7590 /* write the event to the relay log */
7591 if (!DBUG_EVALUATE_IF("simulate_append_buffer_error", 1, 0) &&
7592 likely(rli->relay_log.append_buffer(buf, event_len, mi) == 0))
7593 {
7594 mi->set_master_log_pos(mi->get_master_log_pos() + inc_pos);
7595 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
7596 rli->relay_log.harvest_bytes_written(rli, true/*need_log_space_lock=true*/);
7597 }
7598 else
7599 {
7600 if (event_type == GTID_LOG_EVENT)
7601 {
7602 global_sid_lock->rdlock();
7603 Gtid_set * retrieved_set= (const_cast<Gtid_set *>(mi->rli->get_gtid_set()));
7604 if (retrieved_set->_remove_gtid(gtid) != RETURN_STATUS_OK)
7605 {
7606 global_sid_lock->unlock();
7607 mysql_mutex_unlock(log_lock);
7608 goto err;
7609 }
7610 if (!old_retrieved_gtid.empty())
7611 rli->set_last_retrieved_gtid(old_retrieved_gtid);
7612 global_sid_lock->unlock();
7613 }
7614 error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
7615 }
7616 rli->ign_master_log_name_end[0]= 0; // last event is not ignored
7617 if (save_buf != NULL)
7618 buf= save_buf;
7619 }
7620 mysql_mutex_unlock(log_lock);
7621
7622 skip_relay_logging:
7623
7624 err:
7625 if (unlock_data_lock)
7626 mysql_mutex_unlock(&mi->data_lock);
7627 DBUG_PRINT("info", ("error: %d", error));
7628 if (error)
7629 mi->report(ERROR_LEVEL, error, ER(error),
7630 (error == ER_SLAVE_RELAY_LOG_WRITE_FAILURE)?
7631 "could not queue event from master" :
7632 error_msg.ptr());
7633 DBUG_RETURN(error);
7634 }
7635
7636 /**
7637 Hook to detach the active VIO before closing a connection handle.
7638
7639 The client API might close the connection (and associated data)
7640 in case it encounters a unrecoverable (network) error. This hook
7641 is called from the client code before the VIO handle is deleted
7642 allows the thread to detach the active vio so it does not point
7643 to freed memory.
7644
7645 Other calls to THD::clear_active_vio throughout this module are
7646 redundant due to the hook but are left in place for illustrative
7647 purposes.
7648 */
7649
slave_io_thread_detach_vio()7650 extern "C" void slave_io_thread_detach_vio()
7651 {
7652 #ifdef SIGNAL_WITH_VIO_SHUTDOWN
7653 THD *thd= current_thd;
7654 if (thd && thd->slave_thread)
7655 thd->clear_active_vio();
7656 #endif
7657 }
7658
7659
7660 /*
7661 Try to connect until successful or slave killed
7662
7663 SYNPOSIS
7664 safe_connect()
7665 thd Thread handler for slave
7666 mysql MySQL connection handle
7667 mi Replication handle
7668
7669 RETURN
7670 0 ok
7671 # Error
7672 */
7673
safe_connect(THD * thd,MYSQL * mysql,Master_info * mi)7674 static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi)
7675 {
7676 DBUG_ENTER("safe_connect");
7677
7678 DBUG_RETURN(connect_to_master(thd, mysql, mi, 0, 0));
7679 }
7680
7681
7682 /*
7683 SYNPOSIS
7684 connect_to_master()
7685
7686 IMPLEMENTATION
7687 Try to connect until successful or slave killed or we have retried
7688 mi->retry_count times
7689 */
7690
connect_to_master(THD * thd,MYSQL * mysql,Master_info * mi,bool reconnect,bool suppress_warnings)7691 static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
7692 bool reconnect, bool suppress_warnings)
7693 {
7694 int slave_was_killed= 0;
7695 int last_errno= -2; // impossible error
7696 ulong err_count=0;
7697 char llbuff[22];
7698 char password[MAX_PASSWORD_LENGTH + 1];
7699 int password_size= sizeof(password);
7700 DBUG_ENTER("connect_to_master");
7701 set_slave_max_allowed_packet(thd, mysql);
7702 #ifndef DBUG_OFF
7703 mi->events_until_exit = disconnect_slave_event_count;
7704 #endif
7705 ulong client_flag= CLIENT_REMEMBER_OPTIONS;
7706 if (opt_slave_compressed_protocol)
7707 client_flag|= CLIENT_COMPRESS; /* We will use compression */
7708
7709 mysql_options(mysql, MYSQL_OPT_CONNECT_TIMEOUT, (char *) &slave_net_timeout);
7710 mysql_options(mysql, MYSQL_OPT_READ_TIMEOUT, (char *) &slave_net_timeout);
7711
7712 if (mi->bind_addr[0])
7713 {
7714 DBUG_PRINT("info",("bind_addr: %s", mi->bind_addr));
7715 mysql_options(mysql, MYSQL_OPT_BIND, mi->bind_addr);
7716 }
7717
7718 #ifdef HAVE_OPENSSL
7719 if (mi->ssl)
7720 {
7721 mysql_ssl_set(mysql,
7722 mi->ssl_key[0]?mi->ssl_key:0,
7723 mi->ssl_cert[0]?mi->ssl_cert:0,
7724 mi->ssl_ca[0]?mi->ssl_ca:0,
7725 mi->ssl_capath[0]?mi->ssl_capath:0,
7726 mi->ssl_cipher[0]?mi->ssl_cipher:0);
7727 mysql_options(mysql, MYSQL_OPT_SSL_CRL,
7728 mi->ssl_crl[0] ? mi->ssl_crl : 0);
7729 mysql_options(mysql, MYSQL_OPT_SSL_CRLPATH,
7730 mi->ssl_crlpath[0] ? mi->ssl_crlpath : 0);
7731 mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT,
7732 &mi->ssl_verify_server_cert);
7733 }
7734 #endif
7735
7736 /*
7737 If server's default charset is not supported (like utf16, utf32) as client
7738 charset, then set client charset to 'latin1' (default client charset).
7739 */
7740 if (is_supported_parser_charset(default_charset_info))
7741 mysql_options(mysql, MYSQL_SET_CHARSET_NAME, default_charset_info->csname);
7742 else
7743 {
7744 sql_print_information("'%s' can not be used as client character set. "
7745 "'%s' will be used as default client character set "
7746 "while connecting to master.",
7747 default_charset_info->csname,
7748 default_client_charset_info->csname);
7749 mysql_options(mysql, MYSQL_SET_CHARSET_NAME,
7750 default_client_charset_info->csname);
7751 }
7752
7753
7754 /* This one is not strictly needed but we have it here for completeness */
7755 mysql_options(mysql, MYSQL_SET_CHARSET_DIR, (char *) charsets_dir);
7756
7757 if (mi->is_start_plugin_auth_configured())
7758 {
7759 DBUG_PRINT("info", ("Slaving is using MYSQL_DEFAULT_AUTH %s",
7760 mi->get_start_plugin_auth()));
7761 mysql_options(mysql, MYSQL_DEFAULT_AUTH, mi->get_start_plugin_auth());
7762 }
7763
7764 if (mi->is_start_plugin_dir_configured())
7765 {
7766 DBUG_PRINT("info", ("Slaving is using MYSQL_PLUGIN_DIR %s",
7767 mi->get_start_plugin_dir()));
7768 mysql_options(mysql, MYSQL_PLUGIN_DIR, mi->get_start_plugin_dir());
7769 }
7770 /* Set MYSQL_PLUGIN_DIR in case master asks for an external authentication plugin */
7771 else if (opt_plugin_dir_ptr && *opt_plugin_dir_ptr)
7772 mysql_options(mysql, MYSQL_PLUGIN_DIR, opt_plugin_dir_ptr);
7773
7774 if (!mi->is_start_user_configured())
7775 sql_print_warning("%s", ER(ER_INSECURE_CHANGE_MASTER));
7776
7777 if (mi->get_password(password, &password_size))
7778 {
7779 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
7780 ER(ER_SLAVE_FATAL_ERROR),
7781 "Unable to configure password when attempting to "
7782 "connect to the master server. Connection attempt "
7783 "terminated.");
7784 DBUG_RETURN(1);
7785 }
7786
7787 const char* user= mi->get_user();
7788 if (user == NULL || user[0] == 0)
7789 {
7790 mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
7791 ER(ER_SLAVE_FATAL_ERROR),
7792 "Invalid (empty) username when attempting to "
7793 "connect to the master server. Connection attempt "
7794 "terminated.");
7795 DBUG_RETURN(1);
7796 }
7797
7798 while (!(slave_was_killed = io_slave_killed(thd,mi))
7799 && (reconnect ? mysql_reconnect(mysql) != 0 :
7800 mysql_real_connect(mysql, mi->host, user,
7801 password, 0, mi->port, 0, client_flag) == 0))
7802 {
7803 /*
7804 SHOW SLAVE STATUS will display the number of retries which
7805 would be real retry counts instead of mi->retry_count for
7806 each connection attempt by 'Last_IO_Error' entry.
7807 */
7808 last_errno=mysql_errno(mysql);
7809 suppress_warnings= 0;
7810 mi->report(ERROR_LEVEL, last_errno,
7811 "error %s to master '%s@%s:%d'"
7812 " - retry-time: %d retries: %lu",
7813 (reconnect ? "reconnecting" : "connecting"),
7814 mi->get_user(), mi->host, mi->port,
7815 mi->connect_retry, err_count + 1);
7816 /*
7817 By default we try forever. The reason is that failure will trigger
7818 master election, so if the user did not set mi->retry_count we
7819 do not want to have election triggered on the first failure to
7820 connect
7821 */
7822 if (++err_count == mi->retry_count)
7823 {
7824 slave_was_killed=1;
7825 break;
7826 }
7827 slave_sleep(thd, mi->connect_retry, io_slave_killed, mi);
7828 }
7829
7830 if (!slave_was_killed)
7831 {
7832 mi->clear_error(); // clear possible left over reconnect error
7833 if (reconnect)
7834 {
7835 if (!suppress_warnings && log_warnings)
7836 sql_print_information("Slave: connected to master '%s@%s:%d',\
7837 replication resumed in log '%s' at position %s", mi->get_user(),
7838 mi->host, mi->port,
7839 mi->get_io_rpl_log_name(),
7840 llstr(mi->get_master_log_pos(),llbuff));
7841 }
7842 else
7843 {
7844 general_log_print(thd, COM_CONNECT_OUT, "%s@%s:%d",
7845 mi->get_user(), mi->host, mi->port);
7846 }
7847 #ifdef SIGNAL_WITH_VIO_SHUTDOWN
7848 thd->set_active_vio(mysql->net.vio);
7849 #endif
7850 }
7851 mysql->reconnect= 1;
7852 DBUG_PRINT("exit",("slave_was_killed: %d", slave_was_killed));
7853 DBUG_RETURN(slave_was_killed);
7854 }
7855
7856
7857 /*
7858 safe_reconnect()
7859
7860 IMPLEMENTATION
7861 Try to connect until successful or slave killed or we have retried
7862 mi->retry_count times
7863 */
7864
safe_reconnect(THD * thd,MYSQL * mysql,Master_info * mi,bool suppress_warnings)7865 static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
7866 bool suppress_warnings)
7867 {
7868 DBUG_ENTER("safe_reconnect");
7869 DBUG_RETURN(connect_to_master(thd, mysql, mi, 1, suppress_warnings));
7870 }
7871
7872
7873 /*
7874 Called when we notice that the current "hot" log got rotated under our feet.
7875 */
7876
reopen_relay_log(Relay_log_info * rli,const char ** errmsg)7877 static IO_CACHE *reopen_relay_log(Relay_log_info *rli, const char **errmsg)
7878 {
7879 DBUG_ENTER("reopen_relay_log");
7880 DBUG_ASSERT(rli->cur_log != &rli->cache_buf);
7881 DBUG_ASSERT(rli->cur_log_fd == -1);
7882
7883 IO_CACHE *cur_log = rli->cur_log=&rli->cache_buf;
7884 if ((rli->cur_log_fd=open_binlog_file(cur_log,rli->get_event_relay_log_name(),
7885 errmsg)) <0)
7886 DBUG_RETURN(0);
7887 /*
7888 We want to start exactly where we was before:
7889 relay_log_pos Current log pos
7890 pending Number of bytes already processed from the event
7891 */
7892 rli->set_event_relay_log_pos(max<ulonglong>(rli->get_event_relay_log_pos(),
7893 BIN_LOG_HEADER_SIZE));
7894 my_b_seek(cur_log,rli->get_event_relay_log_pos());
7895 DBUG_RETURN(cur_log);
7896 }
7897
7898
7899 /**
7900 Reads next event from the relay log. Should be called from the
7901 slave SQL thread.
7902
7903 @param rli Relay_log_info structure for the slave SQL thread.
7904
7905 @return The event read, or NULL on error. If an error occurs, the
7906 error is reported through the sql_print_information() or
7907 sql_print_error() functions.
7908 */
next_event(Relay_log_info * rli)7909 static Log_event* next_event(Relay_log_info* rli)
7910 {
7911 Log_event* ev;
7912 IO_CACHE* cur_log = rli->cur_log;
7913 mysql_mutex_t *log_lock = rli->relay_log.get_log_lock();
7914 const char* errmsg=0;
7915 THD* thd = rli->info_thd;
7916 DBUG_ENTER("next_event");
7917
7918 DBUG_ASSERT(thd != 0);
7919
7920 #ifndef DBUG_OFF
7921 if (abort_slave_event_count && !rli->events_until_exit--)
7922 DBUG_RETURN(0);
7923 #endif
7924
7925 /*
7926 For most operations we need to protect rli members with data_lock,
7927 so we assume calling function acquired this mutex for us and we will
7928 hold it for the most of the loop below However, we will release it
7929 whenever it is worth the hassle, and in the cases when we go into a
7930 mysql_cond_wait() with the non-data_lock mutex
7931 */
7932 mysql_mutex_assert_owner(&rli->data_lock);
7933
7934 while (!sql_slave_killed(thd,rli))
7935 {
7936 /*
7937 We can have two kinds of log reading:
7938 hot_log:
7939 rli->cur_log points at the IO_CACHE of relay_log, which
7940 is actively being updated by the I/O thread. We need to be careful
7941 in this case and make sure that we are not looking at a stale log that
7942 has already been rotated. If it has been, we reopen the log.
7943
7944 The other case is much simpler:
7945 We just have a read only log that nobody else will be updating.
7946 */
7947 bool hot_log;
7948 if ((hot_log = (cur_log != &rli->cache_buf)) ||
7949 DBUG_EVALUATE_IF("force_sql_thread_error", 1, 0))
7950 {
7951 DBUG_ASSERT(rli->cur_log_fd == -1); // foreign descriptor
7952 mysql_mutex_lock(log_lock);
7953
7954 /*
7955 Reading xxx_file_id is safe because the log will only
7956 be rotated when we hold relay_log.LOCK_log
7957 */
7958 if (rli->relay_log.get_open_count() != rli->cur_log_old_open_count &&
7959 DBUG_EVALUATE_IF("force_sql_thread_error", 0, 1))
7960 {
7961 // The master has switched to a new log file; Reopen the old log file
7962 cur_log=reopen_relay_log(rli, &errmsg);
7963 mysql_mutex_unlock(log_lock);
7964 if (!cur_log) // No more log files
7965 goto err;
7966 hot_log=0; // Using old binary log
7967 }
7968 }
7969 /*
7970 As there is no guarantee that the relay is open (for example, an I/O
7971 error during a write by the slave I/O thread may have closed it), we
7972 have to test it.
7973 */
7974 if (!my_b_inited(cur_log) ||
7975 DBUG_EVALUATE_IF("force_sql_thread_error", 1, 0))
7976 {
7977 if (hot_log)
7978 mysql_mutex_unlock(log_lock);
7979 goto err;
7980 }
7981 #ifndef DBUG_OFF
7982 {
7983 DBUG_PRINT("info", ("assertion skip %lu file pos %lu event relay log pos %lu file %s\n",
7984 (ulong) rli->slave_skip_counter, (ulong) my_b_tell(cur_log),
7985 (ulong) rli->get_event_relay_log_pos(),
7986 rli->get_event_relay_log_name()));
7987
7988 /* This is an assertion which sometimes fails, let's try to track it */
7989 char llbuf1[22], llbuf2[22];
7990 DBUG_PRINT("info", ("my_b_tell(cur_log)=%s rli->event_relay_log_pos=%s",
7991 llstr(my_b_tell(cur_log),llbuf1),
7992 llstr(rli->get_event_relay_log_pos(),llbuf2)));
7993
7994 DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE);
7995 DBUG_ASSERT(my_b_tell(cur_log) == rli->get_event_relay_log_pos() || rli->is_parallel_exec());
7996
7997 DBUG_PRINT("info", ("next_event group master %s %lu group relay %s %lu event %s %lu\n",
7998 rli->get_group_master_log_name(),
7999 (ulong) rli->get_group_master_log_pos(),
8000 rli->get_group_relay_log_name(),
8001 (ulong) rli->get_group_relay_log_pos(),
8002 rli->get_event_relay_log_name(),
8003 (ulong) rli->get_event_relay_log_pos()));
8004 }
8005 #endif
8006 /*
8007 Relay log is always in new format - if the master is 3.23, the
8008 I/O thread will convert the format for us.
8009 A problem: the description event may be in a previous relay log. So if
8010 the slave has been shutdown meanwhile, we would have to look in old relay
8011 logs, which may even have been deleted. So we need to write this
8012 description event at the beginning of the relay log.
8013 When the relay log is created when the I/O thread starts, easy: the
8014 master will send the description event and we will queue it.
8015 But if the relay log is created by new_file(): then the solution is:
8016 MYSQL_BIN_LOG::open() will write the buffered description event.
8017 */
8018 if ((ev= Log_event::read_log_event(cur_log, 0,
8019 rli->get_rli_description_event(),
8020 opt_slave_sql_verify_checksum)))
8021 {
8022 DBUG_ASSERT(thd==rli->info_thd);
8023 /*
8024 read it while we have a lock, to avoid a mutex lock in
8025 inc_event_relay_log_pos()
8026 */
8027 rli->set_future_event_relay_log_pos(my_b_tell(cur_log));
8028 ev->future_event_relay_log_pos= rli->get_future_event_relay_log_pos();
8029
8030 if (hot_log)
8031 mysql_mutex_unlock(log_lock);
8032
8033 /*
8034 MTS checkpoint in the successful read branch
8035 */
8036 bool force= (rli->checkpoint_seqno > (rli->checkpoint_group - 1));
8037 if (rli->is_parallel_exec() && (opt_mts_checkpoint_period != 0 || force))
8038 {
8039 ulonglong period= static_cast<ulonglong>(opt_mts_checkpoint_period * 1000000ULL);
8040 mysql_mutex_unlock(&rli->data_lock);
8041 /*
8042 At this point the coordinator has is delegating jobs to workers and
8043 the checkpoint routine must be periodically invoked.
8044 */
8045 (void) mts_checkpoint_routine(rli, period, force, true/*need_data_lock=true*/); // TODO: ALFRANIO ERROR
8046 DBUG_ASSERT(!force ||
8047 (force && (rli->checkpoint_seqno <= (rli->checkpoint_group - 1))) ||
8048 sql_slave_killed(thd, rli));
8049 mysql_mutex_lock(&rli->data_lock);
8050 }
8051 DBUG_RETURN(ev);
8052 }
8053 DBUG_ASSERT(thd==rli->info_thd);
8054 if (opt_reckless_slave) // For mysql-test
8055 cur_log->error = 0;
8056 if (cur_log->error < 0)
8057 {
8058 errmsg = "slave SQL thread aborted because of I/O error";
8059 if (rli->mts_group_status == Relay_log_info::MTS_IN_GROUP)
8060 /*
8061 MTS group status is set to MTS_KILLED_GROUP, whenever a read event
8062 error happens and there was already a non-terminal event scheduled.
8063 */
8064 rli->mts_group_status= Relay_log_info::MTS_KILLED_GROUP;
8065 if (hot_log)
8066 mysql_mutex_unlock(log_lock);
8067 goto err;
8068 }
8069 if (!cur_log->error) /* EOF */
8070 {
8071 /*
8072 On a hot log, EOF means that there are no more updates to
8073 process and we must block until I/O thread adds some and
8074 signals us to continue
8075 */
8076 if (hot_log)
8077 {
8078 /*
8079 We say in Seconds_Behind_Master that we have "caught up". Note that
8080 for example if network link is broken but I/O slave thread hasn't
8081 noticed it (slave_net_timeout not elapsed), then we'll say "caught
8082 up" whereas we're not really caught up. Fixing that would require
8083 internally cutting timeout in smaller pieces in network read, no
8084 thanks. Another example: SQL has caught up on I/O, now I/O has read
8085 a new event and is queuing it; the false "0" will exist until SQL
8086 finishes executing the new event; it will be look abnormal only if
8087 the events have old timestamps (then you get "many", 0, "many").
8088
8089 Transient phases like this can be fixed with implemeting
8090 Heartbeat event which provides the slave the status of the
8091 master at time the master does not have any new update to send.
8092 Seconds_Behind_Master would be zero only when master has no
8093 more updates in binlog for slave. The heartbeat can be sent
8094 in a (small) fraction of slave_net_timeout. Until it's done
8095 rli->last_master_timestamp is temporarely (for time of
8096 waiting for the following event) reset whenever EOF is
8097 reached.
8098 */
8099
8100 /* shows zero while it is sleeping (and until the next event
8101 is about to be executed). Note, in MTS case
8102 Seconds_Behind_Master resetting follows slightly different
8103 schema where reaching EOF is not enough. The status
8104 parameter is updated per some number of processed group of
8105 events. The number can't be greater than
8106 @@global.slave_checkpoint_group and anyway SBM updating
8107 rate does not exceed @@global.slave_checkpoint_period.
8108 Notice that SBM is set to a new value after processing the
8109 terminal event (e.g Commit) of a group. Coordinator resets
8110 SBM when notices no more groups left neither to read from
8111 Relay-log nor to process by Workers.
8112 */
8113 if (!rli->is_parallel_exec())
8114 rli->last_master_timestamp= 0;
8115
8116 DBUG_ASSERT(rli->relay_log.get_open_count() ==
8117 rli->cur_log_old_open_count);
8118
8119 if (rli->ign_master_log_name_end[0])
8120 {
8121 /* We generate and return a Rotate, to make our positions advance */
8122 DBUG_PRINT("info",("seeing an ignored end segment"));
8123 ev= new Rotate_log_event(rli->ign_master_log_name_end,
8124 0, rli->ign_master_log_pos_end,
8125 Rotate_log_event::DUP_NAME);
8126 rli->ign_master_log_name_end[0]= 0;
8127 mysql_mutex_unlock(log_lock);
8128 if (unlikely(!ev))
8129 {
8130 errmsg= "Slave SQL thread failed to create a Rotate event "
8131 "(out of memory?), SHOW SLAVE STATUS may be inaccurate";
8132 goto err;
8133 }
8134 ev->server_id= 0; // don't be ignored by slave SQL thread
8135 DBUG_RETURN(ev);
8136 }
8137
8138 /*
8139 We can, and should release data_lock while we are waiting for
8140 update. If we do not, show slave status will block
8141 */
8142 mysql_mutex_unlock(&rli->data_lock);
8143
8144 /*
8145 Possible deadlock :
8146 - the I/O thread has reached log_space_limit
8147 - the SQL thread has read all relay logs, but cannot purge for some
8148 reason:
8149 * it has already purged all logs except the current one
8150 * there are other logs than the current one but they're involved in
8151 a transaction that finishes in the current one (or is not finished)
8152 Solution :
8153 Wake up the possibly waiting I/O thread, and set a boolean asking
8154 the I/O thread to temporarily ignore the log_space_limit
8155 constraint, because we do not want the I/O thread to block because of
8156 space (it's ok if it blocks for any other reason (e.g. because the
8157 master does not send anything). Then the I/O thread stops waiting
8158 and reads one more event and starts honoring log_space_limit again.
8159
8160 If the SQL thread needs more events to be able to rotate the log (it
8161 might need to finish the current group first), then it can ask for one
8162 more at a time. Thus we don't outgrow the relay log indefinitely,
8163 but rather in a controlled manner, until the next rotate.
8164
8165 When the SQL thread starts it sets ignore_log_space_limit to false.
8166 We should also reset ignore_log_space_limit to 0 when the user does
8167 RESET SLAVE, but in fact, no need as RESET SLAVE requires that the slave
8168 be stopped, and the SQL thread sets ignore_log_space_limit to 0 when
8169 it stops.
8170 */
8171 mysql_mutex_lock(&rli->log_space_lock);
8172
8173 /*
8174 If we have reached the limit of the relay space and we
8175 are going to sleep, waiting for more events:
8176
8177 1. If outside a group, SQL thread asks the IO thread
8178 to force a rotation so that the SQL thread purges
8179 logs next time it processes an event (thus space is
8180 freed).
8181
8182 2. If in a group, SQL thread asks the IO thread to
8183 ignore the limit and queues yet one more event
8184 so that the SQL thread finishes the group and
8185 is are able to rotate and purge sometime soon.
8186 */
8187 if (rli->log_space_limit &&
8188 rli->log_space_limit < rli->log_space_total)
8189 {
8190 /* force rotation if not in an unfinished group */
8191 if (!rli->is_parallel_exec())
8192 {
8193 rli->sql_force_rotate_relay= !rli->is_in_group();
8194 }
8195 else
8196 {
8197 rli->sql_force_rotate_relay=
8198 (rli->mts_group_status != Relay_log_info::MTS_IN_GROUP);
8199 }
8200 /* ask for one more event */
8201 rli->ignore_log_space_limit= true;
8202 }
8203
8204 /*
8205 If the I/O thread is blocked, unblock it. Ok to broadcast
8206 after unlock, because the mutex is only destroyed in
8207 ~Relay_log_info(), i.e. when rli is destroyed, and rli will
8208 not be destroyed before we exit the present function.
8209 */
8210 mysql_mutex_unlock(&rli->log_space_lock);
8211 mysql_cond_broadcast(&rli->log_space_cond);
8212 // Note that wait_for_update_relay_log unlocks lock_log !
8213
8214 if (rli->is_parallel_exec() && (opt_mts_checkpoint_period != 0 ||
8215 DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0)))
8216 {
8217 int ret= 0;
8218 struct timespec waittime;
8219 ulonglong period= static_cast<ulonglong>(opt_mts_checkpoint_period * 1000000ULL);
8220 ulong signal_cnt= rli->relay_log.signal_cnt;
8221
8222 mysql_mutex_unlock(log_lock);
8223 do
8224 {
8225 /*
8226 At this point the coordinator has no job to delegate to workers.
8227 However, workers are executing their assigned jobs and as such
8228 the checkpoint routine must be periodically invoked.
8229 */
8230 (void) mts_checkpoint_routine(rli, period, false, true/*need_data_lock=true*/); // TODO: ALFRANIO ERROR
8231 mysql_mutex_lock(log_lock);
8232 // More to the empty relay-log all assigned events done so reset it.
8233 if (rli->gaq->empty())
8234 rli->last_master_timestamp= 0;
8235
8236 if (DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0))
8237 period= 10000000ULL;
8238
8239 set_timespec_nsec(waittime, period);
8240 ret= rli->relay_log.wait_for_update_relay_log(thd, &waittime);
8241 } while ((ret == ETIMEDOUT || ret == ETIME) /* todo:remove */ &&
8242 signal_cnt == rli->relay_log.signal_cnt && !thd->killed);
8243 }
8244 else
8245 {
8246 rli->relay_log.wait_for_update_relay_log(thd, NULL);
8247 }
8248
8249 // re-acquire data lock since we released it earlier
8250 mysql_mutex_lock(&rli->data_lock);
8251 continue;
8252 }
8253 /*
8254 If the log was not hot, we need to move to the next log in
8255 sequence. The next log could be hot or cold, we deal with both
8256 cases separately after doing some common initialization
8257 */
8258 end_io_cache(cur_log);
8259 DBUG_ASSERT(rli->cur_log_fd >= 0);
8260 mysql_file_close(rli->cur_log_fd, MYF(MY_WME));
8261 rli->cur_log_fd = -1;
8262
8263 if (relay_log_purge)
8264 {
8265 /*
8266 purge_first_log will properly set up relay log coordinates in rli.
8267 If the group's coordinates are equal to the event's coordinates
8268 (i.e. the relay log was not rotated in the middle of a group),
8269 we can purge this relay log too.
8270 We do ulonglong and string comparisons, this may be slow but
8271 - purging the last relay log is nice (it can save 1GB of disk), so we
8272 like to detect the case where we can do it, and given this,
8273 - I see no better detection method
8274 - purge_first_log is not called that often
8275 */
8276 if (rli->relay_log.purge_first_log
8277 (rli,
8278 rli->get_group_relay_log_pos() == rli->get_event_relay_log_pos()
8279 && !strcmp(rli->get_group_relay_log_name(),rli->get_event_relay_log_name())))
8280 {
8281 errmsg = "Error purging processed logs";
8282 goto err;
8283 }
8284 DBUG_PRINT("info", ("next_event group master %s %lu group relay %s %lu event %s %lu\n",
8285 rli->get_group_master_log_name(),
8286 (ulong) rli->get_group_master_log_pos(),
8287 rli->get_group_relay_log_name(),
8288 (ulong) rli->get_group_relay_log_pos(),
8289 rli->get_event_relay_log_name(),
8290 (ulong) rli->get_event_relay_log_pos()));
8291 }
8292 else
8293 {
8294 /*
8295 If hot_log is set, then we already have a lock on
8296 LOCK_log. If not, we have to get the lock.
8297
8298 According to Sasha, the only time this code will ever be executed
8299 is if we are recovering from a bug.
8300 */
8301 if (rli->relay_log.find_next_log(&rli->linfo, !hot_log))
8302 {
8303 errmsg = "error switching to the next log";
8304 goto err;
8305 }
8306 rli->set_event_relay_log_pos(BIN_LOG_HEADER_SIZE);
8307 rli->set_event_relay_log_name(rli->linfo.log_file_name);
8308 /*
8309 We may update the worker here but this is not extremlly
8310 necessary. /Alfranio
8311 */
8312 rli->flush_info();
8313 }
8314
8315 /* Reset the relay-log-change-notified status of Slave Workers */
8316 if (rli->is_parallel_exec())
8317 {
8318 DBUG_PRINT("info", ("next_event: MTS group relay log changes to %s %lu\n",
8319 rli->get_group_relay_log_name(),
8320 (ulong) rli->get_group_relay_log_pos()));
8321 rli->reset_notified_relay_log_change();
8322 }
8323
8324 /*
8325 Now we want to open this next log. To know if it's a hot log (the one
8326 being written by the I/O thread now) or a cold log, we can use
8327 is_active(); if it is hot, we use the I/O cache; if it's cold we open
8328 the file normally. But if is_active() reports that the log is hot, this
8329 may change between the test and the consequence of the test. So we may
8330 open the I/O cache whereas the log is now cold, which is nonsense.
8331 To guard against this, we need to have LOCK_log.
8332 */
8333
8334 DBUG_PRINT("info",("hot_log: %d",hot_log));
8335 if (!hot_log) /* if hot_log, we already have this mutex */
8336 mysql_mutex_lock(log_lock);
8337 if (rli->relay_log.is_active(rli->linfo.log_file_name))
8338 {
8339 #ifdef EXTRA_DEBUG
8340 if (log_warnings)
8341 sql_print_information("next log '%s' is currently active",
8342 rli->linfo.log_file_name);
8343 #endif
8344 rli->cur_log= cur_log= rli->relay_log.get_log_file();
8345 rli->cur_log_old_open_count= rli->relay_log.get_open_count();
8346 DBUG_ASSERT(rli->cur_log_fd == -1);
8347
8348 /*
8349 When the SQL thread is [stopped and] (re)started the
8350 following may happen:
8351
8352 1. Log was hot at stop time and remains hot at restart
8353
8354 SQL thread reads again from hot_log (SQL thread was
8355 reading from the active log when it was stopped and the
8356 very same log is still active on SQL thread restart).
8357
8358 In this case, my_b_seek is performed on cur_log, while
8359 cur_log points to relay_log.get_log_file();
8360
8361 2. Log was hot at stop time but got cold before restart
8362
8363 The log was hot when SQL thread stopped, but it is not
8364 anymore when the SQL thread restarts.
8365
8366 In this case, the SQL thread reopens the log, using
8367 cache_buf, ie, cur_log points to &cache_buf, and thence
8368 its coordinates are reset.
8369
8370 3. Log was already cold at stop time
8371
8372 The log was not hot when the SQL thread stopped, and, of
8373 course, it will not be hot when it restarts.
8374
8375 In this case, the SQL thread opens the cold log again,
8376 using cache_buf, ie, cur_log points to &cache_buf, and
8377 thence its coordinates are reset.
8378
8379 4. Log was hot at stop time, DBA changes to previous cold
8380 log and restarts SQL thread
8381
8382 The log was hot when the SQL thread was stopped, but the
8383 user changed the coordinates of the SQL thread to
8384 restart from a previous cold log.
8385
8386 In this case, at start time, cur_log points to a cold
8387 log, opened using &cache_buf as cache, and coordinates
8388 are reset. However, as it moves on to the next logs, it
8389 will eventually reach the hot log. If the hot log is the
8390 same at the time the SQL thread was stopped, then
8391 coordinates were not reset - the cur_log will point to
8392 relay_log.get_log_file(), and not a freshly opened
8393 IO_CACHE through cache_buf. For this reason we need to
8394 deploy a my_b_seek before calling check_binlog_magic at
8395 this point of the code (see: BUG#55263 for more
8396 details).
8397
8398 NOTES:
8399 - We must keep the LOCK_log to read the 4 first bytes, as
8400 this is a hot log (same as when we call read_log_event()
8401 above: for a hot log we take the mutex).
8402
8403 - Because of scenario #4 above, we need to have a
8404 my_b_seek here. Otherwise, we might hit the assertion
8405 inside check_binlog_magic.
8406 */
8407
8408 my_b_seek(cur_log, (my_off_t) 0);
8409 if (check_binlog_magic(cur_log,&errmsg))
8410 {
8411 if (!hot_log)
8412 mysql_mutex_unlock(log_lock);
8413 goto err;
8414 }
8415 if (!hot_log)
8416 mysql_mutex_unlock(log_lock);
8417 continue;
8418 }
8419 if (!hot_log)
8420 mysql_mutex_unlock(log_lock);
8421 /*
8422 if we get here, the log was not hot, so we will have to open it
8423 ourselves. We are sure that the log is still not hot now (a log can get
8424 from hot to cold, but not from cold to hot). No need for LOCK_log.
8425 */
8426 #ifdef EXTRA_DEBUG
8427 if (log_warnings)
8428 sql_print_information("next log '%s' is not active",
8429 rli->linfo.log_file_name);
8430 #endif
8431 // open_binlog_file() will check the magic header
8432 if ((rli->cur_log_fd=open_binlog_file(cur_log,rli->linfo.log_file_name,
8433 &errmsg)) <0)
8434 goto err;
8435 }
8436 else
8437 {
8438 /*
8439 Read failed with a non-EOF error.
8440 TODO: come up with something better to handle this error
8441 */
8442 if (hot_log)
8443 mysql_mutex_unlock(log_lock);
8444 sql_print_error("Slave SQL thread: I/O error reading \
8445 event(errno: %d cur_log->error: %d)",
8446 my_errno,cur_log->error);
8447 // set read position to the beginning of the event
8448 my_b_seek(cur_log,rli->get_event_relay_log_pos());
8449 /* otherwise, we have had a partial read */
8450 errmsg = "Aborting slave SQL thread because of partial event read";
8451 break; // To end of function
8452 }
8453 }
8454 if (!errmsg && log_warnings)
8455 {
8456 sql_print_information("Error reading relay log event: %s",
8457 "slave SQL thread was killed");
8458 DBUG_RETURN(0);
8459 }
8460
8461 err:
8462 if (errmsg)
8463 sql_print_error("Error reading relay log event: %s", errmsg);
8464 DBUG_RETURN(0);
8465 }
8466
8467 /*
8468 Rotate a relay log (this is used only by FLUSH LOGS; the automatic rotation
8469 because of size is simpler because when we do it we already have all relevant
8470 locks; here we don't, so this function is mainly taking locks).
8471 Returns nothing as we cannot catch any error (MYSQL_BIN_LOG::new_file()
8472 is void).
8473 */
8474
rotate_relay_log(Master_info * mi,bool need_log_space_lock)8475 int rotate_relay_log(Master_info* mi, bool need_log_space_lock)
8476 {
8477 DBUG_ENTER("rotate_relay_log");
8478
8479 mysql_mutex_assert_owner(&mi->data_lock);
8480 DBUG_EXECUTE_IF("crash_before_rotate_relaylog", DBUG_SUICIDE(););
8481
8482 Relay_log_info* rli= mi->rli;
8483 int error= 0;
8484
8485 /*
8486 We need to test inited because otherwise, new_file() will attempt to lock
8487 LOCK_log, which may not be inited (if we're not a slave).
8488 */
8489 if (!rli->inited)
8490 {
8491 DBUG_PRINT("info", ("rli->inited == 0"));
8492 goto end;
8493 }
8494
8495 /* If the relay log is closed, new_file() will do nothing. */
8496 error= rli->relay_log.new_file(mi->get_mi_description_event());
8497 if (error != 0)
8498 goto end;
8499
8500 /*
8501 We harvest now, because otherwise BIN_LOG_HEADER_SIZE will not immediately
8502 be counted, so imagine a succession of FLUSH LOGS and assume the slave
8503 threads are started:
8504 relay_log_space decreases by the size of the deleted relay log, but does
8505 not increase, so flush-after-flush we may become negative, which is wrong.
8506 Even if this will be corrected as soon as a query is replicated on the
8507 slave (because the I/O thread will then call harvest_bytes_written() which
8508 will harvest all these BIN_LOG_HEADER_SIZE we forgot), it may give strange
8509 output in SHOW SLAVE STATUS meanwhile. So we harvest now.
8510 If the log is closed, then this will just harvest the last writes, probably
8511 0 as they probably have been harvested.
8512 */
8513 rli->relay_log.harvest_bytes_written(rli, need_log_space_lock);
8514 end:
8515 DBUG_RETURN(error);
8516 }
8517
8518
8519 /**
8520 Detects, based on master's version (as found in the relay log), if master
8521 has a certain bug.
8522 @param rli Relay_log_info which tells the master's version
8523 @param bug_id Number of the bug as found in bugs.mysql.com
8524 @param report bool report error message, default TRUE
8525
8526 @param pred Predicate function that will be called with @c param to
8527 check for the bug. If the function return @c true, the bug is present,
8528 otherwise, it is not.
8529
8530 @param param State passed to @c pred function.
8531
8532 @return TRUE if master has the bug, FALSE if it does not.
8533 */
rpl_master_has_bug(const Relay_log_info * rli,uint bug_id,bool report,bool (* pred)(const void *),const void * param)8534 bool rpl_master_has_bug(const Relay_log_info *rli, uint bug_id, bool report,
8535 bool (*pred)(const void *), const void *param)
8536 {
8537 struct st_version_range_for_one_bug {
8538 uint bug_id;
8539 const uchar introduced_in[3]; // first version with bug
8540 const uchar fixed_in[3]; // first version with fix
8541 };
8542 static struct st_version_range_for_one_bug versions_for_all_bugs[]=
8543 {
8544 {24432, { 5, 0, 24 }, { 5, 0, 38 } },
8545 {24432, { 5, 1, 12 }, { 5, 1, 17 } },
8546 {33029, { 5, 0, 0 }, { 5, 0, 58 } },
8547 {33029, { 5, 1, 0 }, { 5, 1, 12 } },
8548 {37426, { 5, 1, 0 }, { 5, 1, 26 } },
8549 };
8550 const uchar *master_ver=
8551 rli->get_rli_description_event()->server_version_split;
8552
8553 DBUG_ASSERT(sizeof(rli->get_rli_description_event()->server_version_split) == 3);
8554
8555 for (uint i= 0;
8556 i < sizeof(versions_for_all_bugs)/sizeof(*versions_for_all_bugs);i++)
8557 {
8558 const uchar *introduced_in= versions_for_all_bugs[i].introduced_in,
8559 *fixed_in= versions_for_all_bugs[i].fixed_in;
8560 if ((versions_for_all_bugs[i].bug_id == bug_id) &&
8561 (memcmp(introduced_in, master_ver, 3) <= 0) &&
8562 (memcmp(fixed_in, master_ver, 3) > 0) &&
8563 (pred == NULL || (*pred)(param)))
8564 {
8565 enum loglevel report_level= INFORMATION_LEVEL;
8566 if (!report)
8567 return TRUE;
8568 // a short message for SHOW SLAVE STATUS (message length constraints)
8569 my_printf_error(ER_UNKNOWN_ERROR, "master may suffer from"
8570 " http://bugs.mysql.com/bug.php?id=%u"
8571 " so slave stops; check error log on slave"
8572 " for more info", MYF(0), bug_id);
8573 // a verbose message for the error log
8574 if (!ignored_error_code(ER_UNKNOWN_ERROR))
8575 {
8576 report_level= ERROR_LEVEL;
8577 current_thd->is_slave_error= 1;
8578 }
8579 /* In case of ignored errors report warnings only if log_warnings > 1. */
8580 else if (log_warnings > 1)
8581 report_level= WARNING_LEVEL;
8582
8583 if (report_level != INFORMATION_LEVEL)
8584 rli->report(report_level, ER_UNKNOWN_ERROR,
8585 "According to the master's version ('%s'),"
8586 " it is probable that master suffers from this bug:"
8587 " http://bugs.mysql.com/bug.php?id=%u"
8588 " and thus replicating the current binary log event"
8589 " may make the slave's data become different from the"
8590 " master's data."
8591 " To take no risk, slave refuses to replicate"
8592 " this event and stops."
8593 " We recommend that all updates be stopped on the"
8594 " master and slave, that the data of both be"
8595 " manually synchronized,"
8596 " that master's binary logs be deleted,"
8597 " that master be upgraded to a version at least"
8598 " equal to '%d.%d.%d'. Then replication can be"
8599 " restarted.",
8600 rli->get_rli_description_event()->server_version,
8601 bug_id,
8602 fixed_in[0], fixed_in[1], fixed_in[2]);
8603 return TRUE;
8604 }
8605 }
8606 return FALSE;
8607 }
8608
8609 /**
8610 BUG#33029, For all 5.0 up to 5.0.58 exclusive, and 5.1 up to 5.1.12
8611 exclusive, if one statement in a SP generated AUTO_INCREMENT value
8612 by the top statement, all statements after it would be considered
8613 generated AUTO_INCREMENT value by the top statement, and a
8614 erroneous INSERT_ID value might be associated with these statement,
8615 which could cause duplicate entry error and stop the slave.
8616
8617 Detect buggy master to work around.
8618 */
rpl_master_erroneous_autoinc(THD * thd)8619 bool rpl_master_erroneous_autoinc(THD *thd)
8620 {
8621 if (active_mi != NULL && active_mi->rli->info_thd == thd)
8622 {
8623 Relay_log_info *rli= active_mi->rli;
8624 DBUG_EXECUTE_IF("simulate_bug33029", return TRUE;);
8625 return rpl_master_has_bug(rli, 33029, FALSE, NULL, NULL);
8626 }
8627 return FALSE;
8628 }
8629
8630 /**
8631 a copy of active_mi->rli->slave_skip_counter, for showing in SHOW VARIABLES,
8632 INFORMATION_SCHEMA.GLOBAL_VARIABLES and @@sql_slave_skip_counter without
8633 taking all the mutexes needed to access active_mi->rli->slave_skip_counter
8634 properly.
8635 */
8636 uint sql_slave_skip_counter;
8637
8638 /**
8639 Execute a START SLAVE statement.
8640
8641 @param thd Pointer to THD object for the client thread executing the
8642 statement.
8643
8644 @param mi Pointer to Master_info object for the slave's IO thread.
8645
8646 @param net_report If true, saves the exit status into Diagnostics_area.
8647
8648 @retval 0 success
8649 @retval 1 error
8650 */
start_slave(THD * thd,Master_info * mi,bool net_report)8651 int start_slave(THD* thd , Master_info* mi, bool net_report)
8652 {
8653 int slave_errno= 0;
8654 int thread_mask;
8655 DBUG_ENTER("start_slave");
8656
8657 if (check_access(thd, SUPER_ACL, any_db, NULL, NULL, 0, 0))
8658 DBUG_RETURN(1);
8659
8660 if (thd->lex->slave_connection.user ||
8661 thd->lex->slave_connection.password)
8662 {
8663 #if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)
8664 if (thd->vio_ok() && !thd->net.vio->ssl_arg)
8665 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
8666 ER_INSECURE_PLAIN_TEXT,
8667 ER(ER_INSECURE_PLAIN_TEXT));
8668 #endif
8669 #if !defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)
8670 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
8671 ER_INSECURE_PLAIN_TEXT,
8672 ER(ER_INSECURE_PLAIN_TEXT));
8673 #endif
8674 }
8675
8676 lock_slave_threads(mi); // this allows us to cleanly read slave_running
8677 // Get a mask of _stopped_ threads
8678 init_thread_mask(&thread_mask,mi,1 /* inverse */);
8679 /*
8680 Below we will start all stopped threads. But if the user wants to
8681 start only one thread, do as if the other thread was running (as we
8682 don't wan't to touch the other thread), so set the bit to 0 for the
8683 other thread
8684 */
8685 if (thd->lex->slave_thd_opt)
8686 thread_mask&= thd->lex->slave_thd_opt;
8687 if (thread_mask) //some threads are stopped, start them
8688 {
8689 if (global_init_info(mi, false, thread_mask))
8690 slave_errno=ER_MASTER_INFO;
8691 else if (server_id_supplied && *mi->host)
8692 {
8693 /*
8694 If we will start IO thread we need to take care of possible
8695 options provided through the START SLAVE if there is any.
8696 */
8697 if (thread_mask & SLAVE_IO)
8698 {
8699 if (thd->lex->slave_connection.user)
8700 {
8701 mi->set_start_user_configured(true);
8702 mi->set_user(thd->lex->slave_connection.user);
8703 }
8704 if (thd->lex->slave_connection.password)
8705 {
8706 mi->set_start_user_configured(true);
8707 mi->set_password(thd->lex->slave_connection.password,
8708 strlen(thd->lex->slave_connection.password));
8709 }
8710 if (thd->lex->slave_connection.plugin_auth)
8711 mi->set_plugin_auth(thd->lex->slave_connection.plugin_auth);
8712 if (thd->lex->slave_connection.plugin_dir)
8713 mi->set_plugin_dir(thd->lex->slave_connection.plugin_dir);
8714 }
8715
8716 /*
8717 If we will start SQL thread we will care about UNTIL options If
8718 not and they are specified we will ignore them and warn user
8719 about this fact.
8720 */
8721 if (thread_mask & SLAVE_SQL)
8722 {
8723 /*
8724 To cache the MTS system var values and used them in the following
8725 runtime. The system var:s can change meanwhile but having no other
8726 effects.
8727 */
8728 mi->rli->opt_slave_parallel_workers= opt_mts_slave_parallel_workers;
8729 #ifndef DBUG_OFF
8730 if (!DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0))
8731 #endif
8732 mi->rli->checkpoint_group= opt_mts_checkpoint_group;
8733
8734 mysql_mutex_lock(&mi->rli->data_lock);
8735
8736 if (thd->lex->mi.pos)
8737 {
8738 if (thd->lex->mi.relay_log_pos)
8739 slave_errno= ER_BAD_SLAVE_UNTIL_COND;
8740 mi->rli->until_condition= Relay_log_info::UNTIL_MASTER_POS;
8741 mi->rli->until_log_pos= thd->lex->mi.pos;
8742 /*
8743 We don't check thd->lex->mi.log_file_name for NULL here
8744 since it is checked in sql_yacc.yy
8745 */
8746 strmake(mi->rli->until_log_name, thd->lex->mi.log_file_name,
8747 sizeof(mi->rli->until_log_name)-1);
8748 }
8749 else if (thd->lex->mi.relay_log_pos)
8750 {
8751 if (thd->lex->mi.pos)
8752 slave_errno= ER_BAD_SLAVE_UNTIL_COND;
8753 mi->rli->until_condition= Relay_log_info::UNTIL_RELAY_POS;
8754 mi->rli->until_log_pos= thd->lex->mi.relay_log_pos;
8755 strmake(mi->rli->until_log_name, thd->lex->mi.relay_log_name,
8756 sizeof(mi->rli->until_log_name)-1);
8757 }
8758 else if (thd->lex->mi.gtid)
8759 {
8760 global_sid_lock->wrlock();
8761 mi->rli->clear_until_condition();
8762 if (mi->rli->until_sql_gtids.add_gtid_text(thd->lex->mi.gtid)
8763 != RETURN_STATUS_OK)
8764 slave_errno= ER_BAD_SLAVE_UNTIL_COND;
8765 else {
8766 mi->rli->until_condition=
8767 LEX_MASTER_INFO::UNTIL_SQL_BEFORE_GTIDS == thd->lex->mi.gtid_until_condition
8768 ? Relay_log_info::UNTIL_SQL_BEFORE_GTIDS
8769 : Relay_log_info::UNTIL_SQL_AFTER_GTIDS;
8770 if ((mi->rli->until_condition ==
8771 Relay_log_info::UNTIL_SQL_AFTER_GTIDS) &&
8772 mi->rli->opt_slave_parallel_workers != 0)
8773 {
8774 mi->rli->opt_slave_parallel_workers= 0;
8775 push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
8776 ER_MTS_FEATURE_IS_NOT_SUPPORTED,
8777 ER(ER_MTS_FEATURE_IS_NOT_SUPPORTED),
8778 "UNTIL condtion",
8779 "Slave is started in the sequential execution mode.");
8780 }
8781 }
8782 global_sid_lock->unlock();
8783 }
8784 else if (thd->lex->mi.until_after_gaps)
8785 {
8786 mi->rli->until_condition= Relay_log_info::UNTIL_SQL_AFTER_MTS_GAPS;
8787 mi->rli->opt_slave_parallel_workers=
8788 mi->rli->recovery_parallel_workers;
8789 }
8790 else
8791 mi->rli->clear_until_condition();
8792
8793 if (mi->rli->until_condition == Relay_log_info::UNTIL_MASTER_POS ||
8794 mi->rli->until_condition == Relay_log_info::UNTIL_RELAY_POS)
8795 {
8796 /* Preparing members for effective until condition checking */
8797 const char *p= fn_ext(mi->rli->until_log_name);
8798 char *p_end;
8799 if (*p)
8800 {
8801 //p points to '.'
8802 mi->rli->until_log_name_extension= strtoul(++p,&p_end, 10);
8803 /*
8804 p_end points to the first invalid character. If it equals
8805 to p, no digits were found, error. If it contains '\0' it
8806 means conversion went ok.
8807 */
8808 if (p_end==p || *p_end)
8809 slave_errno=ER_BAD_SLAVE_UNTIL_COND;
8810 }
8811 else
8812 slave_errno=ER_BAD_SLAVE_UNTIL_COND;
8813
8814 /* mark the cached result of the UNTIL comparison as "undefined" */
8815 mi->rli->until_log_names_cmp_result=
8816 Relay_log_info::UNTIL_LOG_NAMES_CMP_UNKNOWN;
8817
8818 /* Issuing warning then started without --skip-slave-start */
8819 if (!opt_skip_slave_start)
8820 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
8821 ER_MISSING_SKIP_SLAVE,
8822 ER(ER_MISSING_SKIP_SLAVE));
8823 if (mi->rli->opt_slave_parallel_workers != 0)
8824 {
8825 mi->rli->opt_slave_parallel_workers= 0;
8826 push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
8827 ER_MTS_FEATURE_IS_NOT_SUPPORTED,
8828 ER(ER_MTS_FEATURE_IS_NOT_SUPPORTED),
8829 "UNTIL condtion",
8830 "Slave is started in the sequential execution mode.");
8831 }
8832 }
8833
8834 mysql_mutex_unlock(&mi->rli->data_lock);
8835
8836 /* MTS technical limitation no support of trans retry */
8837 if (mi->rli->opt_slave_parallel_workers != 0 && slave_trans_retries != 0)
8838 {
8839 push_warning_printf(thd, Sql_condition::WARN_LEVEL_NOTE,
8840 ER_MTS_FEATURE_IS_NOT_SUPPORTED,
8841 ER(ER_MTS_FEATURE_IS_NOT_SUPPORTED),
8842 "slave_transaction_retries",
8843 "In the event of a transient failure, the slave will "
8844 "not retry the transaction and will stop.");
8845 }
8846 }
8847 else if (thd->lex->mi.pos || thd->lex->mi.relay_log_pos || thd->lex->mi.gtid)
8848 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, ER_UNTIL_COND_IGNORED,
8849 ER(ER_UNTIL_COND_IGNORED));
8850
8851 if (!slave_errno)
8852 slave_errno = start_slave_threads(false/*need_lock_slave=false*/,
8853 true/*wait_for_start=true*/,
8854 mi,
8855 thread_mask);
8856 }
8857 else
8858 slave_errno = ER_BAD_SLAVE;
8859 }
8860 else
8861 {
8862 /* no error if all threads are already started, only a warning */
8863 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, ER_SLAVE_WAS_RUNNING,
8864 ER(ER_SLAVE_WAS_RUNNING));
8865 }
8866
8867 /*
8868 Clean up start information if there was an attempt to start
8869 the IO thread to avoid any security issue.
8870 */
8871 if (slave_errno &&
8872 (thread_mask & SLAVE_IO) == SLAVE_IO)
8873 mi->reset_start_info();
8874
8875 unlock_slave_threads(mi);
8876
8877 if (slave_errno)
8878 {
8879 if (net_report)
8880 my_message(slave_errno, ER(slave_errno), MYF(0));
8881 DBUG_RETURN(1);
8882 }
8883 else if (net_report)
8884 my_ok(thd);
8885
8886 DBUG_RETURN(0);
8887 }
8888
8889
8890 /**
8891 Execute a STOP SLAVE statement.
8892
8893 @param thd Pointer to THD object for the client thread executing the
8894 statement.
8895
8896 @param mi Pointer to Master_info object for the slave's IO thread.
8897
8898 @param net_report If true, saves the exit status into Diagnostics_area.
8899
8900 @retval 0 success
8901 @retval 1 error
8902 */
stop_slave(THD * thd,Master_info * mi,bool net_report)8903 int stop_slave(THD* thd, Master_info* mi, bool net_report )
8904 {
8905 DBUG_ENTER("stop_slave");
8906
8907 int slave_errno;
8908 if (!thd)
8909 thd = current_thd;
8910
8911 if (check_access(thd, SUPER_ACL, any_db, NULL, NULL, 0, 0))
8912 DBUG_RETURN(1);
8913 THD_STAGE_INFO(thd, stage_killing_slave);
8914 int thread_mask;
8915 lock_slave_threads(mi);
8916 // Get a mask of _running_ threads
8917 init_thread_mask(&thread_mask,mi,0 /* not inverse*/);
8918 /*
8919 Below we will stop all running threads.
8920 But if the user wants to stop only one thread, do as if the other thread
8921 was stopped (as we don't wan't to touch the other thread), so set the
8922 bit to 0 for the other thread
8923 */
8924 if (thd->lex->slave_thd_opt)
8925 thread_mask &= thd->lex->slave_thd_opt;
8926
8927 if (thread_mask)
8928 {
8929 slave_errno= terminate_slave_threads(mi,thread_mask,
8930 false/*need_lock_term=false*/);
8931 }
8932 else
8933 {
8934 //no error if both threads are already stopped, only a warning
8935 slave_errno= 0;
8936 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, ER_SLAVE_WAS_NOT_RUNNING,
8937 ER(ER_SLAVE_WAS_NOT_RUNNING));
8938 }
8939 unlock_slave_threads(mi);
8940
8941 if (slave_errno)
8942 {
8943 if ((slave_errno == ER_STOP_SLAVE_SQL_THREAD_TIMEOUT) ||
8944 (slave_errno == ER_STOP_SLAVE_IO_THREAD_TIMEOUT))
8945 {
8946 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE, slave_errno,
8947 ER(slave_errno));
8948 sql_print_warning("%s",ER(slave_errno));
8949 }
8950 if (net_report)
8951 my_message(slave_errno, ER(slave_errno), MYF(0));
8952 DBUG_RETURN(1);
8953 }
8954 else if (net_report)
8955 my_ok(thd);
8956
8957 DBUG_RETURN(0);
8958 }
8959
8960
8961 /**
8962 Execute a RESET SLAVE statement.
8963
8964 @param thd Pointer to THD object of the client thread executing the
8965 statement.
8966
8967 @param mi Pointer to Master_info object for the slave.
8968
8969 @retval 0 success
8970 @retval 1 error
8971 */
reset_slave(THD * thd,Master_info * mi)8972 int reset_slave(THD *thd, Master_info* mi)
8973 {
8974 int thread_mask= 0, error= 0;
8975 uint sql_errno=ER_UNKNOWN_ERROR;
8976 const char* errmsg= "Unknown error occured while reseting slave";
8977 DBUG_ENTER("reset_slave");
8978
8979 lock_slave_threads(mi);
8980 init_thread_mask(&thread_mask,mi,0 /* not inverse */);
8981 if (thread_mask) // We refuse if any slave thread is running
8982 {
8983 sql_errno= ER_SLAVE_MUST_STOP;
8984 error=1;
8985 goto err;
8986 }
8987
8988 ha_reset_slave(thd);
8989
8990 // delete relay logs, clear relay log coordinates
8991 if ((error= mi->rli->purge_relay_logs(thd,
8992 1 /* just reset */,
8993 &errmsg)))
8994 {
8995 sql_errno= ER_RELAY_LOG_FAIL;
8996 goto err;
8997 }
8998
8999 /* Clear master's log coordinates and associated information */
9000 DBUG_ASSERT(!mi->rli || !mi->rli->slave_running); // none writes in rli table
9001 mi->clear_in_memory_info(thd->lex->reset_slave_info.all);
9002
9003 if (remove_info(mi))
9004 {
9005 error= 1;
9006 goto err;
9007 }
9008
9009 (void) RUN_HOOK(binlog_relay_io, after_reset_slave, (thd, mi));
9010 err:
9011 unlock_slave_threads(mi);
9012 if (error)
9013 my_error(sql_errno, MYF(0), errmsg);
9014 DBUG_RETURN(error);
9015 }
9016
9017 /**
9018 Execute a CHANGE MASTER statement. MTS workers info tables data are removed
9019 in the successful branch (i.e. there are no gaps in the execution history).
9020
9021 @param thd Pointer to THD object for the client thread executing the
9022 statement.
9023
9024 @param mi Pointer to Master_info object belonging to the slave's IO
9025 thread.
9026
9027 @retval FALSE success
9028 @retval TRUE error
9029 */
change_master(THD * thd,Master_info * mi)9030 bool change_master(THD* thd, Master_info* mi)
9031 {
9032 int thread_mask;
9033 const char* errmsg= 0;
9034 bool need_relay_log_purge= 1;
9035 char *var_master_log_name= NULL, *var_group_master_log_name= NULL;
9036 bool ret= false;
9037 char saved_host[HOSTNAME_LENGTH + 1], saved_bind_addr[HOSTNAME_LENGTH + 1];
9038 uint saved_port= 0;
9039 char saved_log_name[FN_REFLEN];
9040 my_off_t saved_log_pos= 0;
9041 my_bool save_relay_log_purge= relay_log_purge;
9042 bool mts_remove_workers= false;
9043
9044 DBUG_ENTER("change_master");
9045
9046 lock_slave_threads(mi);
9047 init_thread_mask(&thread_mask,mi,0 /*not inverse*/);
9048 LEX_MASTER_INFO* lex_mi= &thd->lex->mi;
9049 if (thread_mask) // We refuse if any slave thread is running
9050 {
9051 my_message(ER_SLAVE_MUST_STOP, ER(ER_SLAVE_MUST_STOP), MYF(0));
9052 ret= true;
9053 goto err;
9054 }
9055 thread_mask= SLAVE_IO | SLAVE_SQL;
9056
9057 THD_STAGE_INFO(thd, stage_changing_master);
9058 /*
9059 We need to check if there is an empty master_host. Otherwise
9060 change master succeeds, a master.info file is created containing
9061 empty master_host string and when issuing: start slave; an error
9062 is thrown stating that the server is not configured as slave.
9063 (See BUG#28796).
9064 */
9065 if(lex_mi->host && !*lex_mi->host)
9066 {
9067 my_error(ER_WRONG_ARGUMENTS, MYF(0), "MASTER_HOST");
9068 unlock_slave_threads(mi);
9069 DBUG_RETURN(TRUE);
9070 }
9071 if (global_init_info(mi, false, thread_mask))
9072 {
9073 my_message(ER_MASTER_INFO, ER(ER_MASTER_INFO), MYF(0));
9074 ret= true;
9075 goto err;
9076 }
9077 if (mi->rli->mts_recovery_group_cnt)
9078 {
9079 /*
9080 Change-Master can't be done if there is a mts group gap.
9081 That requires mts-recovery which START SLAVE provides.
9082 */
9083 DBUG_ASSERT(mi->rli->recovery_parallel_workers);
9084
9085 my_message(ER_MTS_CHANGE_MASTER_CANT_RUN_WITH_GAPS,
9086 ER(ER_MTS_CHANGE_MASTER_CANT_RUN_WITH_GAPS), MYF(0));
9087 ret= true;
9088 goto err;
9089 }
9090 else
9091 {
9092 /*
9093 Lack of mts group gaps makes Workers info stale
9094 regardless of need_relay_log_purge computation.
9095 */
9096 if (mi->rli->recovery_parallel_workers)
9097 mts_remove_workers= true;
9098 }
9099 /*
9100 We cannot specify auto position and set either the coordinates
9101 on master or slave. If we try to do so, an error message is
9102 printed out.
9103 */
9104 if (lex_mi->log_file_name != NULL || lex_mi->pos != 0 ||
9105 lex_mi->relay_log_name != NULL || lex_mi->relay_log_pos != 0)
9106 {
9107 if (lex_mi->auto_position == LEX_MASTER_INFO::LEX_MI_ENABLE ||
9108 (lex_mi->auto_position != LEX_MASTER_INFO::LEX_MI_DISABLE &&
9109 mi->is_auto_position()))
9110 {
9111 my_message(ER_BAD_SLAVE_AUTO_POSITION,
9112 ER(ER_BAD_SLAVE_AUTO_POSITION), MYF(0));
9113 ret= true;
9114 goto err;
9115 }
9116 }
9117
9118 // CHANGE MASTER TO MASTER_AUTO_POSITION = 1 requires GTID_MODE = ON
9119 if (lex_mi->auto_position == LEX_MASTER_INFO::LEX_MI_ENABLE && gtid_mode != 3)
9120 {
9121 my_message(ER_AUTO_POSITION_REQUIRES_GTID_MODE_ON,
9122 ER(ER_AUTO_POSITION_REQUIRES_GTID_MODE_ON), MYF(0));
9123 ret= true;
9124 goto err;
9125 }
9126
9127 /*
9128 Data lock not needed since we have already stopped the running threads,
9129 and we have the hold on the run locks which will keep all threads that
9130 could possibly modify the data structures from running
9131 */
9132
9133 /*
9134 Before processing the command, save the previous state.
9135 */
9136 strmake(saved_host, mi->host, HOSTNAME_LENGTH);
9137 strmake(saved_bind_addr, mi->bind_addr, HOSTNAME_LENGTH);
9138 saved_port= mi->port;
9139 strmake(saved_log_name, mi->get_master_log_name(), FN_REFLEN - 1);
9140 saved_log_pos= mi->get_master_log_pos();
9141
9142 /*
9143 If the user specified host or port without binlog or position,
9144 reset binlog's name to FIRST and position to 4.
9145 */
9146
9147 if ((lex_mi->host && strcmp(lex_mi->host, mi->host)) ||
9148 (lex_mi->port && lex_mi->port != mi->port))
9149 {
9150 /*
9151 This is necessary because the primary key, i.e. host or port, has
9152 changed.
9153
9154 The repository does not support direct changes on the primary key,
9155 so the row is dropped and re-inserted with a new primary key. If we
9156 don't do that, the master info repository we will end up with several
9157 rows.
9158 */
9159 if (mi->clean_info())
9160 {
9161 ret= true;
9162 goto err;
9163 }
9164 mi->master_uuid[0]= 0;
9165 mi->master_id= 0;
9166 }
9167
9168 if ((lex_mi->host || lex_mi->port) && !lex_mi->log_file_name && !lex_mi->pos)
9169 {
9170 var_master_log_name= const_cast<char*>(mi->get_master_log_name());
9171 var_master_log_name[0]= '\0';
9172 mi->set_master_log_pos(BIN_LOG_HEADER_SIZE);
9173 }
9174
9175 if (lex_mi->log_file_name)
9176 mi->set_master_log_name(lex_mi->log_file_name);
9177 if (lex_mi->pos)
9178 {
9179 mi->set_master_log_pos(lex_mi->pos);
9180 }
9181 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
9182
9183 if (lex_mi->user || lex_mi->password)
9184 {
9185 #if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)
9186 if (thd->vio_ok() && !thd->net.vio->ssl_arg)
9187 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
9188 ER_INSECURE_PLAIN_TEXT,
9189 ER(ER_INSECURE_PLAIN_TEXT));
9190 #endif
9191 #if !defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)
9192 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
9193 ER_INSECURE_PLAIN_TEXT,
9194 ER(ER_INSECURE_PLAIN_TEXT));
9195 #endif
9196 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
9197 ER_INSECURE_CHANGE_MASTER,
9198 ER(ER_INSECURE_CHANGE_MASTER));
9199 }
9200
9201 if (lex_mi->user)
9202 mi->set_user(lex_mi->user);
9203
9204 if (lex_mi->password)
9205 {
9206 if (mi->set_password(lex_mi->password, strlen(lex_mi->password)))
9207 {
9208 /*
9209 After implementing WL#5769, we should create a better error message
9210 to denote that the call may have failed due to an error while trying
9211 to encrypt/store the password in a secure key store.
9212 */
9213 my_message(ER_MASTER_INFO, ER(ER_MASTER_INFO), MYF(0));
9214 ret= false;
9215 goto err;
9216 }
9217 }
9218 if (lex_mi->host)
9219 strmake(mi->host, lex_mi->host, sizeof(mi->host)-1);
9220 if (lex_mi->bind_addr)
9221 strmake(mi->bind_addr, lex_mi->bind_addr, sizeof(mi->bind_addr)-1);
9222 if (lex_mi->port)
9223 mi->port = lex_mi->port;
9224 if (lex_mi->connect_retry)
9225 mi->connect_retry = lex_mi->connect_retry;
9226 if (lex_mi->retry_count_opt != LEX_MASTER_INFO::LEX_MI_UNCHANGED)
9227 mi->retry_count = lex_mi->retry_count;
9228 if (lex_mi->heartbeat_opt != LEX_MASTER_INFO::LEX_MI_UNCHANGED)
9229 mi->heartbeat_period = lex_mi->heartbeat_period;
9230 else
9231 mi->heartbeat_period= min<float>(SLAVE_MAX_HEARTBEAT_PERIOD,
9232 (slave_net_timeout/2.0));
9233 mi->received_heartbeats= LL(0); // counter lives until master is CHANGEd
9234 /*
9235 reset the last time server_id list if the current CHANGE MASTER
9236 is mentioning IGNORE_SERVER_IDS= (...)
9237 */
9238 if (lex_mi->repl_ignore_server_ids_opt == LEX_MASTER_INFO::LEX_MI_ENABLE)
9239 reset_dynamic(&(mi->ignore_server_ids->dynamic_ids));
9240 for (uint i= 0; i < lex_mi->repl_ignore_server_ids.elements; i++)
9241 {
9242 ulong s_id;
9243 get_dynamic(&lex_mi->repl_ignore_server_ids, (uchar*) &s_id, i);
9244 if (s_id == ::server_id && replicate_same_server_id)
9245 {
9246 my_error(ER_SLAVE_IGNORE_SERVER_IDS, MYF(0), static_cast<int>(s_id));
9247 ret= TRUE;
9248 goto err;
9249 }
9250 else
9251 {
9252 if (bsearch((const ulong *) &s_id,
9253 mi->ignore_server_ids->dynamic_ids.buffer,
9254 mi->ignore_server_ids->dynamic_ids.elements, sizeof(ulong),
9255 (int (*) (const void*, const void*))
9256 change_master_server_id_cmp) == NULL)
9257 insert_dynamic(&(mi->ignore_server_ids->dynamic_ids), (uchar*) &s_id);
9258 }
9259 }
9260 sort_dynamic(&(mi->ignore_server_ids->dynamic_ids), (qsort_cmp) change_master_server_id_cmp);
9261
9262 if (lex_mi->ssl != LEX_MASTER_INFO::LEX_MI_UNCHANGED)
9263 mi->ssl= (lex_mi->ssl == LEX_MASTER_INFO::LEX_MI_ENABLE);
9264
9265 if (lex_mi->sql_delay != -1)
9266 mi->rli->set_sql_delay(lex_mi->sql_delay);
9267
9268 if (lex_mi->ssl_verify_server_cert != LEX_MASTER_INFO::LEX_MI_UNCHANGED)
9269 mi->ssl_verify_server_cert=
9270 (lex_mi->ssl_verify_server_cert == LEX_MASTER_INFO::LEX_MI_ENABLE);
9271
9272 if (lex_mi->ssl_ca)
9273 strmake(mi->ssl_ca, lex_mi->ssl_ca, sizeof(mi->ssl_ca)-1);
9274 if (lex_mi->ssl_capath)
9275 strmake(mi->ssl_capath, lex_mi->ssl_capath, sizeof(mi->ssl_capath)-1);
9276 if (lex_mi->ssl_cert)
9277 strmake(mi->ssl_cert, lex_mi->ssl_cert, sizeof(mi->ssl_cert)-1);
9278 if (lex_mi->ssl_cipher)
9279 strmake(mi->ssl_cipher, lex_mi->ssl_cipher, sizeof(mi->ssl_cipher)-1);
9280 if (lex_mi->ssl_key)
9281 strmake(mi->ssl_key, lex_mi->ssl_key, sizeof(mi->ssl_key)-1);
9282 if (lex_mi->ssl_crl)
9283 strmake(mi->ssl_crl, lex_mi->ssl_crl, sizeof(mi->ssl_crl)-1);
9284 if (lex_mi->ssl_crlpath)
9285 strmake(mi->ssl_crlpath, lex_mi->ssl_crlpath, sizeof(mi->ssl_crlpath)-1);
9286 #ifndef HAVE_OPENSSL
9287 if (lex_mi->ssl || lex_mi->ssl_ca || lex_mi->ssl_capath ||
9288 lex_mi->ssl_cert || lex_mi->ssl_cipher || lex_mi->ssl_key ||
9289 lex_mi->ssl_verify_server_cert || lex_mi->ssl_crl || lex_mi->ssl_crlpath)
9290 push_warning(thd, Sql_condition::WARN_LEVEL_NOTE,
9291 ER_SLAVE_IGNORED_SSL_PARAMS, ER(ER_SLAVE_IGNORED_SSL_PARAMS));
9292 #endif
9293
9294 if (lex_mi->relay_log_name)
9295 {
9296 need_relay_log_purge= 0;
9297 char relay_log_name[FN_REFLEN];
9298
9299 mi->rli->relay_log.make_log_name(relay_log_name, lex_mi->relay_log_name);
9300 mi->rli->set_group_relay_log_name(relay_log_name);
9301 mi->rli->set_event_relay_log_name(relay_log_name);
9302 }
9303
9304 if (lex_mi->relay_log_pos)
9305 {
9306 need_relay_log_purge= 0;
9307 mi->rli->set_group_relay_log_pos(lex_mi->relay_log_pos);
9308 mi->rli->set_event_relay_log_pos(lex_mi->relay_log_pos);
9309 }
9310
9311 /*
9312 If user did specify neither host nor port nor any log name nor any log
9313 pos, i.e. he specified only user/password/master_connect_retry, he probably
9314 wants replication to resume from where it had left, i.e. from the
9315 coordinates of the **SQL** thread (imagine the case where the I/O is ahead
9316 of the SQL; restarting from the coordinates of the I/O would lose some
9317 events which is probably unwanted when you are just doing minor changes
9318 like changing master_connect_retry).
9319 A side-effect is that if only the I/O thread was started, this thread may
9320 restart from ''/4 after the CHANGE MASTER. That's a minor problem (it is a
9321 much more unlikely situation than the one we are fixing here).
9322 Note: coordinates of the SQL thread must be read here, before the
9323 'if (need_relay_log_purge)' block which resets them.
9324 */
9325 if (!lex_mi->host && !lex_mi->port &&
9326 !lex_mi->log_file_name && !lex_mi->pos &&
9327 need_relay_log_purge)
9328 {
9329 /*
9330 Sometimes mi->rli->master_log_pos == 0 (it happens when the SQL thread is
9331 not initialized), so we use a max().
9332 What happens to mi->rli->master_log_pos during the initialization stages
9333 of replication is not 100% clear, so we guard against problems using
9334 max().
9335 */
9336 mi->set_master_log_pos(max<ulonglong>(BIN_LOG_HEADER_SIZE,
9337 mi->rli->get_group_master_log_pos()));
9338 mi->set_master_log_name(mi->rli->get_group_master_log_name());
9339 }
9340
9341 /*
9342 Sets if the slave should connect to the master and look for
9343 GTIDs.
9344 */
9345 if (lex_mi->auto_position != LEX_MASTER_INFO::LEX_MI_UNCHANGED)
9346 mi->set_auto_position(
9347 (lex_mi->auto_position == LEX_MASTER_INFO::LEX_MI_ENABLE));
9348
9349 /*
9350 Relay log's IO_CACHE may not be inited, if rli->inited==0 (server was never
9351 a slave before).
9352 */
9353 if (flush_master_info(mi, true))
9354 {
9355 my_error(ER_RELAY_LOG_INIT, MYF(0), "Failed to flush master info file");
9356 ret= TRUE;
9357 goto err;
9358 }
9359 if (need_relay_log_purge)
9360 {
9361 relay_log_purge= 1;
9362 THD_STAGE_INFO(thd, stage_purging_old_relay_logs);
9363 if (mi->rli->purge_relay_logs(thd,
9364 0 /* not only reset, but also reinit */,
9365 &errmsg))
9366 {
9367 my_error(ER_RELAY_LOG_FAIL, MYF(0), errmsg);
9368 ret= TRUE;
9369 goto err;
9370 }
9371 }
9372 else
9373 {
9374 const char* msg;
9375 relay_log_purge= 0;
9376 /* Relay log is already initialized */
9377
9378 if (mi->rli->init_relay_log_pos(mi->rli->get_group_relay_log_name(),
9379 mi->rli->get_group_relay_log_pos(),
9380 true/*need_data_lock=true*/,
9381 &msg, 0))
9382 {
9383 my_error(ER_RELAY_LOG_INIT, MYF(0), msg);
9384 ret= TRUE;
9385 goto err;
9386 }
9387 }
9388 relay_log_purge= save_relay_log_purge;
9389
9390 /*
9391 Coordinates in rli were spoilt by the 'if (need_relay_log_purge)' block,
9392 so restore them to good values. If we left them to ''/0, that would work;
9393 but that would fail in the case of 2 successive CHANGE MASTER (without a
9394 START SLAVE in between): because first one would set the coords in mi to
9395 the good values of those in rli, the set those in rli to ''/0, then
9396 second CHANGE MASTER would set the coords in mi to those of rli, i.e. to
9397 ''/0: we have lost all copies of the original good coordinates.
9398 That's why we always save good coords in rli.
9399 */
9400 if (need_relay_log_purge)
9401 {
9402 mi->rli->set_group_master_log_pos(mi->get_master_log_pos());
9403 DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->get_master_log_pos()));
9404 mi->rli->set_group_master_log_name(mi->get_master_log_name());
9405 }
9406 var_group_master_log_name= const_cast<char *>(mi->rli->get_group_master_log_name());
9407 if (!var_group_master_log_name[0]) // uninitialized case
9408 mi->rli->set_group_master_log_pos(0);
9409
9410 mysql_mutex_lock(&mi->rli->data_lock);
9411 mi->rli->abort_pos_wait++; /* for MASTER_POS_WAIT() to abort */
9412 /* Clear the errors, for a clean start */
9413 mi->rli->clear_error();
9414 mi->rli->clear_until_condition();
9415
9416 sql_print_information("'CHANGE MASTER TO executed'. "
9417 "Previous state master_host='%s', master_port= %u, master_log_file='%s', "
9418 "master_log_pos= %ld, master_bind='%s'. "
9419 "New state master_host='%s', master_port= %u, master_log_file='%s', "
9420 "master_log_pos= %ld, master_bind='%s'.",
9421 saved_host, saved_port, saved_log_name, (ulong) saved_log_pos,
9422 saved_bind_addr, mi->host, mi->port, mi->get_master_log_name(),
9423 (ulong) mi->get_master_log_pos(), mi->bind_addr);
9424
9425 /*
9426 If we don't write new coordinates to disk now, then old will remain in
9427 relay-log.info until START SLAVE is issued; but if mysqld is shutdown
9428 before START SLAVE, then old will remain in relay-log.info, and will be the
9429 in-memory value at restart (thus causing errors, as the old relay log does
9430 not exist anymore).
9431
9432 Notice that the rli table is available exclusively as slave is not
9433 running.
9434 */
9435 DBUG_ASSERT(!mi->rli->slave_running);
9436 if ((ret= mi->rli->flush_info(true)))
9437 my_error(ER_RELAY_LOG_INIT, MYF(0), "Failed to flush relay info file.");
9438 mysql_cond_broadcast(&mi->data_cond);
9439 mysql_mutex_unlock(&mi->rli->data_lock);
9440
9441 err:
9442 unlock_slave_threads(mi);
9443 if (ret == FALSE)
9444 {
9445 if (!mts_remove_workers)
9446 my_ok(thd);
9447 else
9448 if (!Rpl_info_factory::reset_workers(mi->rli))
9449 my_ok(thd);
9450 else
9451 my_error(ER_MTS_RESET_WORKERS, MYF(0));
9452 }
9453 DBUG_RETURN(ret);
9454 }
9455 /**
9456 @} (end of group Replication)
9457 */
9458 #endif /* HAVE_REPLICATION */
9459