1 /* Copyright (c) 2000, 2017, Oracle and/or its affiliates.
2    Copyright (c) 2009, 2020, MariaDB Corporation.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; version 2 of the License.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
16 
17 
18 /**
19   @addtogroup Replication
20   @{
21 
22   @file
23 
24   @brief Code to run the io thread and the sql thread on the
25   replication slave.
26 */
27 
28 #include "mariadb.h"
29 #include "sql_priv.h"
30 #include "slave.h"
31 #include "sql_parse.h"                         // execute_init_command
32 #include "sql_table.h"                         // mysql_rm_table
33 #include "rpl_mi.h"
34 #include "rpl_rli.h"
35 #include "sql_repl.h"
36 #include "rpl_filter.h"
37 #include "repl_failsafe.h"
38 #include "transaction.h"
39 #include <thr_alarm.h>
40 #include <my_dir.h>
41 #include <sql_common.h>
42 #include <errmsg.h>
43 #include <ssl_compat.h>
44 #include "unireg.h"
45 #include <mysys_err.h>
46 #include <signal.h>
47 #include <mysql.h>
48 #include <myisam.h>
49 
50 #include "sql_base.h"                           // close_thread_tables
51 #include "tztime.h"                             // struct Time_zone
52 #include "log_event.h"                          // Rotate_log_event,
53                                                 // Create_file_log_event,
54                                                 // Format_description_log_event
55 #include "wsrep_mysqld.h"
56 #ifdef WITH_WSREP
57 #include "wsrep_trans_observer.h"
58 #endif
59 
60 #ifdef HAVE_REPLICATION
61 
62 #include "rpl_tblmap.h"
63 #include "debug_sync.h"
64 #include "rpl_parallel.h"
65 #include "sql_show.h"
66 #include "semisync_slave.h"
67 #include "sql_manager.h"
68 
69 #define FLAGSTR(V,F) ((V)&(F)?#F" ":"")
70 
71 #define MAX_SLAVE_RETRY_PAUSE 5
72 /*
73   a parameter of sql_slave_killed() to defer the killed status
74 */
75 #define SLAVE_WAIT_GROUP_DONE 60
76 bool use_slave_mask = 0;
77 MY_BITMAP slave_error_mask;
78 char slave_skip_error_names[SHOW_VAR_FUNC_BUFF_SIZE];
79 uint *slave_transaction_retry_errors;
80 uint slave_transaction_retry_error_length= 0;
81 char slave_transaction_retry_error_names[SHOW_VAR_FUNC_BUFF_SIZE];
82 
83 char* slave_load_tmpdir = 0;
84 Master_info *active_mi= 0;
85 Master_info_index *master_info_index;
86 my_bool replicate_same_server_id;
87 ulonglong relay_log_space_limit = 0;
88 ulonglong opt_read_binlog_speed_limit = 0;
89 
90 const char *relay_log_index= 0;
91 const char *relay_log_basename= 0;
92 
93 LEX_CSTRING default_master_connection_name= { (char*) "", 0 };
94 
95 /*
96   When slave thread exits, we need to remember the temporary tables so we
97   can re-use them on slave start.
98 
99   TODO: move the vars below under Master_info
100 */
101 
102 int disconnect_slave_event_count = 0, abort_slave_event_count = 0;
103 
104 static pthread_key(Master_info*, RPL_MASTER_INFO);
105 
106 enum enum_slave_reconnect_actions
107 {
108   SLAVE_RECON_ACT_REG= 0,
109   SLAVE_RECON_ACT_DUMP= 1,
110   SLAVE_RECON_ACT_EVENT= 2,
111   SLAVE_RECON_ACT_MAX
112 };
113 
114 enum enum_slave_reconnect_messages
115 {
116   SLAVE_RECON_MSG_WAIT= 0,
117   SLAVE_RECON_MSG_KILLED_WAITING= 1,
118   SLAVE_RECON_MSG_AFTER= 2,
119   SLAVE_RECON_MSG_FAILED= 3,
120   SLAVE_RECON_MSG_COMMAND= 4,
121   SLAVE_RECON_MSG_KILLED_AFTER= 5,
122   SLAVE_RECON_MSG_MAX
123 };
124 
125 static const char *reconnect_messages[SLAVE_RECON_ACT_MAX][SLAVE_RECON_MSG_MAX]=
126 {
127   {
128     "Waiting to reconnect after a failed registration on master",
129     "Slave I/O thread killed while waiting to reconnect after a failed \
130 registration on master",
131     "Reconnecting after a failed registration on master",
132     "failed registering on master, reconnecting to try again, \
133 log '%s' at position %llu%s",
134     "COM_REGISTER_SLAVE",
135     "Slave I/O thread killed during or after reconnect"
136   },
137   {
138     "Waiting to reconnect after a failed binlog dump request",
139     "Slave I/O thread killed while retrying master dump",
140     "Reconnecting after a failed binlog dump request",
141     "failed dump request, reconnecting to try again, log '%s' at position %llu%s",
142     "COM_BINLOG_DUMP",
143     "Slave I/O thread killed during or after reconnect"
144   },
145   {
146     "Waiting to reconnect after a failed master event read",
147     "Slave I/O thread killed while waiting to reconnect after a failed read",
148     "Reconnecting after a failed master event read",
149     "Slave I/O thread: Failed reading log event, reconnecting to retry, \
150 log '%s' at position %llu%s",
151     "",
152     "Slave I/O thread killed during or after a reconnect done to recover from \
153 failed read"
154   }
155 };
156 
157 
158 typedef enum { SLAVE_THD_IO, SLAVE_THD_SQL} SLAVE_THD_TYPE;
159 
160 static int process_io_rotate(Master_info* mi, Rotate_log_event* rev);
161 static int process_io_create_file(Master_info* mi, Create_file_log_event* cev);
162 static bool wait_for_relay_log_space(Relay_log_info* rli);
163 static bool io_slave_killed(Master_info* mi);
164 static bool sql_slave_killed(rpl_group_info *rgi);
165 static int init_slave_thread(THD*, Master_info *, SLAVE_THD_TYPE);
166 static void make_slave_skip_errors_printable(void);
167 static void make_slave_transaction_retry_errors_printable(void);
168 static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi);
169 static int safe_reconnect(THD*, MYSQL*, Master_info*, bool);
170 static int connect_to_master(THD*, MYSQL*, Master_info*, bool, bool);
171 static Log_event* next_event(rpl_group_info* rgi, ulonglong *event_size);
172 static int queue_event(Master_info* mi,const char* buf,ulong event_len);
173 static int terminate_slave_thread(THD *, mysql_mutex_t *, mysql_cond_t *,
174                                   volatile uint *, bool);
175 static bool check_io_slave_killed(Master_info *mi, const char *info);
176 static bool send_show_master_info_data(THD *, Master_info *, bool, String *);
177 /*
178   Function to set the slave's max_allowed_packet based on the value
179   of slave_max_allowed_packet.
180 
181     @in_param    thd    Thread handler for slave
182     @in_param    mysql  MySQL connection handle
183 */
184 
185 static void set_slave_max_allowed_packet(THD *thd, MYSQL *mysql)
186 {
187   DBUG_ENTER("set_slave_max_allowed_packet");
188   // thd and mysql must be valid
189   DBUG_ASSERT(thd && mysql);
190 
191   thd->variables.max_allowed_packet= slave_max_allowed_packet;
192   thd->net.max_packet_size= slave_max_allowed_packet;
193   /*
194     Adding MAX_LOG_EVENT_HEADER_LEN to the max_packet_size on the I/O
195     thread and the mysql->option max_allowed_packet, since a
196     replication event can become this much  larger than
197     the corresponding packet (query) sent from client to master.
198   */
199   thd->net.max_packet_size+= MAX_LOG_EVENT_HEADER;
200   /*
201     Skipping the setting of mysql->net.max_packet size to slave
202     max_allowed_packet since this is done during mysql_real_connect.
203   */
204   mysql->options.max_allowed_packet=
205     slave_max_allowed_packet+MAX_LOG_EVENT_HEADER;
206   DBUG_VOID_RETURN;
207 }
208 
209 /*
210   Find out which replications threads are running
211 
212   SYNOPSIS
213     init_thread_mask()
214     mask                Return value here
215     mi                  master_info for slave
216     inverse             If set, returns which threads are not running
217 
218   IMPLEMENTATION
219     Get a bit mask for which threads are running so that we can later restart
220     these threads.
221 
222   RETURN
223     mask        If inverse == 0, running threads
224                 If inverse == 1, stopped threads
225 */
226 
227 void init_thread_mask(int* mask,Master_info* mi,bool inverse)
228 {
229   bool set_io = mi->slave_running, set_sql = mi->rli.slave_running;
230   int tmp_mask=0;
231   DBUG_ENTER("init_thread_mask");
232 
233   if (set_io)
234     tmp_mask |= SLAVE_IO;
235   if (set_sql)
236     tmp_mask |= SLAVE_SQL;
237   if (inverse)
238     tmp_mask^= (SLAVE_IO | SLAVE_SQL);
239   *mask = tmp_mask;
240   DBUG_VOID_RETURN;
241 }
242 
243 
244 /*
245   lock_slave_threads() against other threads doing STOP, START or RESET SLAVE
246 
247 */
248 
249 void Master_info::lock_slave_threads()
250 {
251   DBUG_ENTER("lock_slave_threads");
252   mysql_mutex_lock(&start_stop_lock);
253   DBUG_VOID_RETURN;
254 }
255 
256 
257 /*
258   unlock_slave_threads()
259 */
260 
261 void Master_info::unlock_slave_threads()
262 {
263   DBUG_ENTER("unlock_slave_threads");
264   mysql_mutex_unlock(&start_stop_lock);
265   DBUG_VOID_RETURN;
266 }
267 
268 #ifdef HAVE_PSI_INTERFACE
269 static PSI_thread_key key_thread_slave_io, key_thread_slave_sql;
270 
271 static PSI_thread_info all_slave_threads[]=
272 {
273   { &key_thread_slave_io, "slave_io", PSI_FLAG_GLOBAL},
274   { &key_thread_slave_sql, "slave_sql", PSI_FLAG_GLOBAL}
275 };
276 
277 static void init_slave_psi_keys(void)
278 {
279   const char* category= "sql";
280   int count;
281 
282   if (PSI_server == NULL)
283     return;
284 
285   count= array_elements(all_slave_threads);
286   PSI_server->register_thread(category, all_slave_threads, count);
287 }
288 #endif /* HAVE_PSI_INTERFACE */
289 
290 
291 /*
292   Note: This definition needs to be kept in sync with the one in
293   mysql_system_tables.sql which is used by mysql_create_db.
294 */
295 static const char gtid_pos_table_definition1[]=
296   "CREATE TABLE ";
297 static const char gtid_pos_table_definition2[]=
298   " (domain_id INT UNSIGNED NOT NULL, "
299   "sub_id BIGINT UNSIGNED NOT NULL, "
300   "server_id INT UNSIGNED NOT NULL, "
301   "seq_no BIGINT UNSIGNED NOT NULL, "
302   "PRIMARY KEY (domain_id, sub_id)) CHARSET=latin1 "
303   "COMMENT='Replication slave GTID position' "
304   "ENGINE=";
305 
306 /*
307   Build a query string
308     CREATE TABLE mysql.gtid_slave_pos_<engine> ... ENGINE=<engine>
309 */
310 static bool
311 build_gtid_pos_create_query(THD *thd, String *query,
312                             LEX_CSTRING *table_name,
313                             LEX_CSTRING *engine_name)
314 {
315   bool err= false;
316   err|= query->append(gtid_pos_table_definition1);
317   err|= append_identifier(thd, query, table_name);
318   err|= query->append(gtid_pos_table_definition2);
319   err|= append_identifier(thd, query, engine_name);
320   return err;
321 }
322 
323 
324 static int
325 gtid_pos_table_creation(THD *thd, plugin_ref engine, LEX_CSTRING *table_name)
326 {
327   int err;
328   StringBuffer<sizeof(gtid_pos_table_definition1) +
329                sizeof(gtid_pos_table_definition1) +
330                2*FN_REFLEN> query;
331 
332   if (build_gtid_pos_create_query(thd, &query, table_name, plugin_name(engine)))
333   {
334     my_error(ER_OUT_OF_RESOURCES, MYF(0));
335     return 1;
336   }
337 
338   thd->set_db(&MYSQL_SCHEMA_NAME);
339   thd->clear_error();
340   ulonglong thd_saved_option= thd->variables.option_bits;
341   /* This query shuold not be binlogged. */
342   thd->variables.option_bits&= ~(ulonglong)OPTION_BIN_LOG;
343   thd->set_query_and_id(query.c_ptr(), query.length(), thd->charset(),
344                         next_query_id());
345   Parser_state parser_state;
346   err= parser_state.init(thd, thd->query(), thd->query_length());
347   if (err)
348     goto end;
349   mysql_parse(thd, thd->query(), thd->query_length(), &parser_state,
350               FALSE, FALSE);
351   if (unlikely(thd->is_error()))
352     err= 1;
353   /* The warning is relevant to 10.3 and earlier. */
354   sql_print_warning("The automatically created table '%s' name may not be "
355                     "entirely in lowercase. The table name will be converted "
356                     "to lowercase to any future upgrade to 10.4.0 and later "
357                     "version where it will be auto-created at once "
358                     "in lowercase.",
359                     table_name->str);
360 end:
361   thd->variables.option_bits= thd_saved_option;
362   thd->reset_query();
363   return err;
364 }
365 
366 static THD *new_bg_THD()
367 {
368   THD *thd= new THD(next_thread_id());
369   thd->thread_stack= (char*) &thd;
370   thd->store_globals();
371   thd->system_thread = SYSTEM_THREAD_SLAVE_BACKGROUND;
372   thd->security_ctx->skip_grants();
373   thd->set_command(COM_DAEMON);
374   thd->variables.wsrep_on= 0;
375   return thd;
376 }
377 
378 static void bg_gtid_delete_pending(void *)
379 {
380   THD *thd= new_bg_THD();
381 
382   rpl_slave_state::list_element *list;
383   list= rpl_global_gtid_slave_state->gtid_grab_pending_delete_list();
384   rpl_global_gtid_slave_state->gtid_delete_pending(thd, &list);
385   if (list)
386     rpl_global_gtid_slave_state->put_back_list(list);
387   delete thd;
388 }
389 
390 static void bg_gtid_pos_auto_create(void *hton)
391 {
392   THD *thd= NULL;
393   int UNINIT_VAR(err);
394   plugin_ref engine= NULL, *auto_engines;
395   rpl_slave_state::gtid_pos_table *entry;
396   StringBuffer<FN_REFLEN> loc_table_name;
397   LEX_CSTRING table_name;
398 
399   /*
400     Check that the plugin is still in @@gtid_pos_auto_engines, and lock
401     it.
402   */
403   mysql_mutex_lock(&LOCK_global_system_variables);
404   for (auto_engines= opt_gtid_pos_auto_plugins;
405        auto_engines && *auto_engines;
406        ++auto_engines)
407   {
408     if (plugin_hton(*auto_engines) == hton)
409     {
410       engine= my_plugin_lock(NULL, *auto_engines);
411       break;
412     }
413   }
414   mysql_mutex_unlock(&LOCK_global_system_variables);
415   if (!engine)
416   {
417     /* The engine is gone from @@gtid_pos_auto_engines, so no action. */
418     goto end;
419   }
420 
421   /* Find the entry for the table to auto-create. */
422   mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state);
423   entry= rpl_global_gtid_slave_state->
424          gtid_pos_tables.load(std::memory_order_relaxed);
425   while (entry)
426   {
427     if (entry->table_hton == hton &&
428         entry->state == rpl_slave_state::GTID_POS_CREATE_REQUESTED)
429       break;
430     entry= entry->next;
431   }
432   if (entry)
433   {
434     entry->state = rpl_slave_state::GTID_POS_CREATE_IN_PROGRESS;
435     err= loc_table_name.append(entry->table_name.str, entry->table_name.length);
436   }
437   mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
438   if (!entry)
439     goto end;
440   if (err)
441   {
442     sql_print_error("Out of memory while trying to auto-create GTID position table");
443     goto end;
444   }
445   table_name.str= loc_table_name.c_ptr_safe();
446   table_name.length= loc_table_name.length();
447 
448   thd= new_bg_THD();
449   err= gtid_pos_table_creation(thd, engine, &table_name);
450   if (err)
451   {
452     sql_print_error("Error auto-creating GTID position table `mysql.%s`: %s Error_code: %d",
453                     table_name.str, thd->get_stmt_da()->message(),
454                     thd->get_stmt_da()->sql_errno());
455     thd->clear_error();
456     goto end;
457   }
458 
459   /* Now enable the entry for the auto-created table. */
460   mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state);
461   entry= rpl_global_gtid_slave_state->
462          gtid_pos_tables.load(std::memory_order_relaxed);
463   while (entry)
464   {
465     if (entry->table_hton == hton &&
466         entry->state == rpl_slave_state::GTID_POS_CREATE_IN_PROGRESS)
467     {
468       entry->state= rpl_slave_state::GTID_POS_AVAILABLE;
469       break;
470     }
471     entry= entry->next;
472   }
473   mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
474 
475 end:
476   delete thd;
477   if (engine)
478     plugin_unlock(NULL, engine);
479 }
480 
481 static bool slave_background_thread_gtid_loaded;
482 
483 static void bg_rpl_load_gtid_slave_state(void *)
484 {
485   THD *thd= new_bg_THD();
486   thd_proc_info(thd, "Loading slave GTID position from table");
487   if (rpl_load_gtid_slave_state(thd))
488     sql_print_warning("Failed to load slave replication state from table "
489                       "%s.%s: %u: %s", "mysql",
490                       rpl_gtid_slave_state_table_name.str,
491                       thd->get_stmt_da()->sql_errno(),
492                       thd->get_stmt_da()->message());
493 
494   // hijacking global_rpl_thread_pool cond here - it's only once on startup
495   mysql_mutex_lock(&global_rpl_thread_pool.LOCK_rpl_thread_pool);
496   slave_background_thread_gtid_loaded= true;
497   mysql_cond_signal(&global_rpl_thread_pool.COND_rpl_thread_pool);
498   mysql_mutex_unlock(&global_rpl_thread_pool.LOCK_rpl_thread_pool);
499   delete thd;
500 }
501 
502 static void bg_slave_kill(void *victim)
503 {
504   THD *to_kill= (THD *)victim;
505   to_kill->awake(KILL_CONNECTION);
506   mysql_mutex_lock(&to_kill->LOCK_wakeup_ready);
507   to_kill->rgi_slave->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED;
508   mysql_cond_broadcast(&to_kill->COND_wakeup_ready);
509   mysql_mutex_unlock(&to_kill->LOCK_wakeup_ready);
510 }
511 
512 void slave_background_kill_request(THD *to_kill)
513 {
514   if (to_kill->rgi_slave->killed_for_retry)
515     return;                                     // Already deadlock killed.
516   to_kill->rgi_slave->killed_for_retry= rpl_group_info::RETRY_KILL_PENDING;
517   mysql_manager_submit(bg_slave_kill, to_kill);
518 }
519 
520 /*
521   This function must only be called from a slave SQL thread (or worker thread),
522   to ensure that the table_entry will not go away before we can lock the
523   LOCK_slave_state.
524 */
525 void slave_background_gtid_pos_create_request(
526         rpl_slave_state::gtid_pos_table *table_entry)
527 {
528   if (table_entry->state != rpl_slave_state::GTID_POS_AUTO_CREATE)
529     return;
530   mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state);
531   if (table_entry->state != rpl_slave_state::GTID_POS_AUTO_CREATE)
532   {
533     mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
534     return;
535   }
536   table_entry->state= rpl_slave_state::GTID_POS_CREATE_REQUESTED;
537   mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
538 
539   mysql_manager_submit(bg_gtid_pos_auto_create, table_entry->table_hton);
540 }
541 
542 
543 /*
544   Request the manager thread to delete no longer used rows from the
545   mysql.gtid_slave_pos* tables.
546 */
547 void slave_background_gtid_pending_delete_request(void)
548 {
549   mysql_manager_submit(bg_gtid_delete_pending, NULL);
550 }
551 
552 
553 /* Initialize slave structures */
554 
555 int init_slave()
556 {
557   DBUG_ENTER("init_slave");
558   int error= 0;
559 
560 #ifdef HAVE_PSI_INTERFACE
561   init_slave_psi_keys();
562 #endif
563 
564   if (global_rpl_thread_pool.init(opt_slave_parallel_threads))
565     return 1;
566 
567   slave_background_thread_gtid_loaded= false;
568   mysql_manager_submit(bg_rpl_load_gtid_slave_state, NULL);
569 
570   // hijacking global_rpl_thread_pool cond here - it's only once on startup
571   mysql_mutex_lock(&global_rpl_thread_pool.LOCK_rpl_thread_pool);
572   while (!slave_background_thread_gtid_loaded)
573     mysql_cond_wait(&global_rpl_thread_pool.COND_rpl_thread_pool,
574                     &global_rpl_thread_pool.LOCK_rpl_thread_pool);
575   mysql_mutex_unlock(&global_rpl_thread_pool.LOCK_rpl_thread_pool);
576 
577   /*
578     This is called when mysqld starts. Before client connections are
579     accepted. However bootstrap may conflict with us if it does START SLAVE.
580     So it's safer to take the lock.
581   */
582 
583   if (pthread_key_create(&RPL_MASTER_INFO, NULL))
584     goto err;
585 
586   master_info_index= new Master_info_index;
587   if (!master_info_index || master_info_index->init_all_master_info())
588   {
589     sql_print_error("Failed to initialize multi master structures");
590     DBUG_RETURN(1);
591   }
592   if (!(active_mi= new Master_info(&default_master_connection_name,
593                                    relay_log_recovery)) ||
594       active_mi->error())
595   {
596     delete active_mi;
597     active_mi= 0;
598     sql_print_error("Failed to allocate memory for the Master Info structure");
599     goto err;
600   }
601 
602   if (master_info_index->add_master_info(active_mi, FALSE))
603   {
604     delete active_mi;
605     active_mi= 0;
606     goto err;
607   }
608 
609   /*
610     If master_host is not specified, try to read it from the master_info file.
611     If master_host is specified, create the master_info file if it doesn't
612     exists.
613   */
614 
615   if (init_master_info(active_mi,master_info_file,relay_log_info_file,
616                        1, (SLAVE_IO | SLAVE_SQL)))
617   {
618     sql_print_error("Failed to initialize the master info structure");
619     goto err;
620   }
621 
622   /* If server id is not set, start_slave_thread() will say it */
623 
624   if (active_mi->host[0] && !opt_skip_slave_start)
625   {
626     int error;
627     THD *thd= new THD(next_thread_id());
628     thd->thread_stack= (char*) &thd;
629     thd->store_globals();
630 
631     error= start_slave_threads(0, /* No active thd */
632                                1 /* need mutex */,
633                                1 /* wait for start*/,
634                                active_mi,
635                                master_info_file,
636                                relay_log_info_file,
637                                SLAVE_IO | SLAVE_SQL);
638 
639     thd->reset_globals();
640     delete thd;
641     if (unlikely(error))
642     {
643       sql_print_error("Failed to create slave threads");
644       goto err;
645     }
646   }
647 
648 end:
649   DBUG_RETURN(error);
650 
651 err:
652   error= 1;
653   goto end;
654 }
655 
656 /*
657   Updates the master info based on the information stored in the
658   relay info and ignores relay logs previously retrieved by the IO
659   thread, which thus starts fetching again based on to the
660   group_master_log_pos and group_master_log_name. Eventually, the old
661   relay logs will be purged by the normal purge mechanism.
662 
663   In the feature, we should improve this routine in order to avoid throwing
664   away logs that are safely stored in the disk. Note also that this recovery
665   routine relies on the correctness of the relay-log.info and only tolerates
666   coordinate problems in master.info.
667 
668   In this function, there is no need for a mutex as the caller
669   (i.e. init_slave) already has one acquired.
670 
671   Specifically, the following structures are updated:
672 
673   1 - mi->master_log_pos  <-- rli->group_master_log_pos
674   2 - mi->master_log_name <-- rli->group_master_log_name
675   3 - It moves the relay log to the new relay log file, by
676       rli->group_relay_log_pos  <-- BIN_LOG_HEADER_SIZE;
677       rli->event_relay_log_pos  <-- BIN_LOG_HEADER_SIZE;
678       rli->group_relay_log_name <-- rli->relay_log.get_log_fname();
679       rli->event_relay_log_name <-- rli->relay_log.get_log_fname();
680 
681    If there is an error, it returns (1), otherwise returns (0).
682  */
683 int init_recovery(Master_info* mi, const char** errmsg)
684 {
685   DBUG_ENTER("init_recovery");
686 
687   Relay_log_info *rli= &mi->rli;
688   if (rli->group_master_log_name[0])
689   {
690     mi->master_log_pos= MY_MAX(BIN_LOG_HEADER_SIZE,
691                              rli->group_master_log_pos);
692     strmake_buf(mi->master_log_name, rli->group_master_log_name);
693 
694     sql_print_warning("Recovery from master pos %ld and file %s.",
695                       (ulong) mi->master_log_pos, mi->master_log_name);
696 
697     strmake_buf(rli->group_relay_log_name, rli->relay_log.get_log_fname());
698     strmake_buf(rli->event_relay_log_name, rli->relay_log.get_log_fname());
699 
700     rli->group_relay_log_pos= rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
701   }
702 
703   DBUG_RETURN(0);
704 }
705 
706 
707 /**
708   Convert slave skip errors bitmap into a printable string.
709 */
710 
711 static void make_slave_skip_errors_printable(void)
712 {
713   /*
714     To be safe, we want 10 characters of room in the buffer for a number
715     plus terminators. Also, we need some space for constant strings.
716     10 characters must be sufficient for a number plus {',' | '...'}
717     plus a NUL terminator. That is a max 6 digit number.
718   */
719   const size_t MIN_ROOM= 10;
720   DBUG_ENTER("make_slave_skip_errors_printable");
721   DBUG_ASSERT(sizeof(slave_skip_error_names) > MIN_ROOM);
722   DBUG_ASSERT(MAX_SLAVE_ERROR <= 999999); // 6 digits
723 
724   /* Make @@slave_skip_errors show the nice human-readable value.  */
725   opt_slave_skip_errors= slave_skip_error_names;
726 
727   if (!use_slave_mask || bitmap_is_clear_all(&slave_error_mask))
728   {
729     /* purecov: begin tested */
730     memcpy(slave_skip_error_names, STRING_WITH_LEN("OFF"));
731     /* purecov: end */
732   }
733   else if (bitmap_is_set_all(&slave_error_mask))
734   {
735     /* purecov: begin tested */
736     memcpy(slave_skip_error_names, STRING_WITH_LEN("ALL"));
737     /* purecov: end */
738   }
739   else
740   {
741     char *buff= slave_skip_error_names;
742     char *bend= buff + sizeof(slave_skip_error_names) - MIN_ROOM;
743     int  errnum;
744 
745     for (errnum= 0; errnum < MAX_SLAVE_ERROR; errnum++)
746     {
747       if (bitmap_is_set(&slave_error_mask, errnum))
748       {
749         if (buff >= bend)
750           break; /* purecov: tested */
751         buff= int10_to_str(errnum, buff, 10);
752         *buff++= ',';
753       }
754     }
755     if (buff != slave_skip_error_names)
756       buff--; // Remove last ','
757     if (errnum < MAX_SLAVE_ERROR)
758     {
759       /* Couldn't show all errors */
760       buff= strmov(buff, "..."); /* purecov: tested */
761     }
762     *buff=0;
763   }
764   DBUG_PRINT("init", ("error_names: '%s'", slave_skip_error_names));
765   DBUG_VOID_RETURN;
766 }
767 
768 /*
769   Init function to set up array for errors that should be skipped for slave
770 
771   SYNOPSIS
772     init_slave_skip_errors()
773     arg         List of errors numbers to skip, separated with ','
774 
775   NOTES
776     Called from get_options() in mysqld.cc on start-up
777 */
778 
779 bool init_slave_skip_errors(const char* arg)
780 {
781   const char *p;
782   DBUG_ENTER("init_slave_skip_errors");
783 
784   if (!arg || !*arg)                            // No errors defined
785     goto end;
786 
787   if (unlikely(my_bitmap_init(&slave_error_mask,0,MAX_SLAVE_ERROR,0)))
788     DBUG_RETURN(1);
789 
790   use_slave_mask= 1;
791   for (;my_isspace(system_charset_info,*arg);++arg)
792     /* empty */;
793   if (!my_strnncoll(system_charset_info,(uchar*)arg,4,(const uchar*)"all",4))
794   {
795     bitmap_set_all(&slave_error_mask);
796     goto end;
797   }
798   for (p= arg ; *p; )
799   {
800     long err_code;
801     if (!(p= str2int(p, 10, 0, LONG_MAX, &err_code)))
802       break;
803     if (err_code < MAX_SLAVE_ERROR)
804        bitmap_set_bit(&slave_error_mask,(uint)err_code);
805     while (!my_isdigit(system_charset_info,*p) && *p)
806       p++;
807   }
808 
809 end:
810   make_slave_skip_errors_printable();
811   DBUG_RETURN(0);
812 }
813 
814 /**
815   Make printable version if slave_transaction_retry_errors
816   This is never empty as at least ER_LOCK_DEADLOCK and ER_LOCK_WAIT_TIMEOUT
817   will be there
818 */
819 
820 static void make_slave_transaction_retry_errors_printable(void)
821 {
822   /*
823     To be safe, we want 10 characters of room in the buffer for a number
824     plus terminators. Also, we need some space for constant strings.
825     10 characters must be sufficient for a number plus {',' | '...'}
826     plus a NUL terminator. That is a max 6 digit number.
827   */
828   const size_t MIN_ROOM= 10;
829   char *buff= slave_transaction_retry_error_names;
830   char *bend= buff + sizeof(slave_transaction_retry_error_names) - MIN_ROOM;
831   uint  i;
832   DBUG_ENTER("make_slave_transaction_retry_errors_printable");
833   DBUG_ASSERT(sizeof(slave_transaction_retry_error_names) > MIN_ROOM);
834 
835   /* Make @@slave_transaction_retry_errors show a human-readable value */
836   opt_slave_transaction_retry_errors= slave_transaction_retry_error_names;
837 
838   for (i= 0; i < slave_transaction_retry_error_length && buff < bend; i++)
839   {
840     buff= int10_to_str(slave_transaction_retry_errors[i], buff, 10);
841     *buff++= ',';
842   }
843   if (buff != slave_transaction_retry_error_names)
844     buff--; // Remove last ','
845   if (i < slave_transaction_retry_error_length)
846   {
847     /* Couldn't show all errors */
848     buff= strmov(buff, "..."); /* purecov: tested */
849   }
850   *buff=0;
851   DBUG_PRINT("exit", ("error_names: '%s'",
852                       slave_transaction_retry_error_names));
853   DBUG_VOID_RETURN;
854 }
855 
856 
857 #define DEFAULT_SLAVE_RETRY_ERRORS 9
858 
859 bool init_slave_transaction_retry_errors(const char* arg)
860 {
861   const char *p;
862   long err_code;
863   uint i;
864   DBUG_ENTER("init_slave_transaction_retry_errors");
865 
866   /* Handle empty strings */
867   if (!arg)
868     arg= "";
869 
870   slave_transaction_retry_error_length= DEFAULT_SLAVE_RETRY_ERRORS;
871   for (;my_isspace(system_charset_info,*arg);++arg)
872     /* empty */;
873   for (p= arg; *p; )
874   {
875     if (!(p= str2int(p, 10, 0, LONG_MAX, &err_code)))
876       break;
877     slave_transaction_retry_error_length++;
878     while (!my_isdigit(system_charset_info,*p) && *p)
879       p++;
880   }
881 
882   if (unlikely(!(slave_transaction_retry_errors=
883                  (uint *) my_once_alloc(sizeof(int) *
884                                         slave_transaction_retry_error_length,
885                                         MYF(MY_WME)))))
886     DBUG_RETURN(1);
887 
888   /*
889     Temporary error codes:
890     currently, InnoDB deadlock detected by InnoDB or lock
891     wait timeout (innodb_lock_wait_timeout exceeded
892   */
893   slave_transaction_retry_errors[0]= ER_NET_READ_ERROR;
894   slave_transaction_retry_errors[1]= ER_NET_READ_INTERRUPTED;
895   slave_transaction_retry_errors[2]= ER_NET_ERROR_ON_WRITE;
896   slave_transaction_retry_errors[3]= ER_NET_WRITE_INTERRUPTED;
897   slave_transaction_retry_errors[4]= ER_LOCK_WAIT_TIMEOUT;
898   slave_transaction_retry_errors[5]= ER_LOCK_DEADLOCK;
899   slave_transaction_retry_errors[6]= ER_CONNECT_TO_FOREIGN_DATA_SOURCE;
900   slave_transaction_retry_errors[7]= 2013; /* CR_SERVER_LOST */
901   slave_transaction_retry_errors[8]= 12701; /* ER_SPIDER_REMOTE_SERVER_GONE_AWAY_NUM */
902 
903   /* Add user codes after this */
904   for (p= arg, i= DEFAULT_SLAVE_RETRY_ERRORS; *p; )
905   {
906     if (!(p= str2int(p, 10, 0, LONG_MAX, &err_code)))
907       break;
908     if (err_code > 0)
909       slave_transaction_retry_errors[i++]= (uint) err_code;
910     while (!my_isdigit(system_charset_info,*p) && *p)
911       p++;
912   }
913   slave_transaction_retry_error_length= i;
914 
915   make_slave_transaction_retry_errors_printable();
916   DBUG_RETURN(0);
917 }
918 
919 
920 int terminate_slave_threads(Master_info* mi,int thread_mask,bool skip_lock)
921 {
922   DBUG_ENTER("terminate_slave_threads");
923 
924   if (!mi->inited)
925     DBUG_RETURN(0); /* successfully do nothing */
926   int error,force_all = (thread_mask & SLAVE_FORCE_ALL);
927   int retval= 0;
928   mysql_mutex_t *sql_lock = &mi->rli.run_lock, *io_lock = &mi->run_lock;
929   mysql_mutex_t *log_lock= mi->rli.relay_log.get_log_lock();
930 
931   if (thread_mask & (SLAVE_SQL|SLAVE_FORCE_ALL))
932   {
933     DBUG_PRINT("info",("Terminating SQL thread"));
934     if (mi->using_parallel() && mi->rli.abort_slave && mi->rli.stop_for_until)
935     {
936       mi->rli.stop_for_until= false;
937       mi->rli.parallel.stop_during_until();
938     }
939     else
940       mi->rli.abort_slave=1;
941     if (unlikely((error= terminate_slave_thread(mi->rli.sql_driver_thd,
942                                                 sql_lock,
943                                                 &mi->rli.stop_cond,
944                                                 &mi->rli.slave_running,
945                                                 skip_lock))) &&
946                  !force_all)
947       DBUG_RETURN(error);
948     retval= error;
949 
950     mysql_mutex_lock(log_lock);
951 
952     DBUG_PRINT("info",("Flushing relay-log info file."));
953     if (current_thd)
954       THD_STAGE_INFO(current_thd, stage_flushing_relay_log_info_file);
955     if (mi->rli.flush() || my_sync(mi->rli.info_fd, MYF(MY_WME)))
956       retval= ER_ERROR_DURING_FLUSH_LOGS;
957 
958     mysql_mutex_unlock(log_lock);
959   }
960   if (thread_mask & (SLAVE_IO|SLAVE_FORCE_ALL))
961   {
962     DBUG_PRINT("info",("Terminating IO thread"));
963     mi->abort_slave=1;
964     if (unlikely((error= terminate_slave_thread(mi->io_thd, io_lock,
965                                                 &mi->stop_cond,
966                                                 &mi->slave_running,
967                                                 skip_lock))) &&
968                  !force_all)
969       DBUG_RETURN(error);
970     if (!retval)
971       retval= error;
972 
973     mysql_mutex_lock(log_lock);
974 
975     DBUG_PRINT("info",("Flushing relay log and master info file."));
976     if (current_thd)
977       THD_STAGE_INFO(current_thd, stage_flushing_relay_log_and_master_info_repository);
978     if (likely(mi->fd >= 0))
979     {
980       if (flush_master_info(mi, TRUE, FALSE) || my_sync(mi->fd, MYF(MY_WME)))
981         retval= ER_ERROR_DURING_FLUSH_LOGS;
982     }
983     if (mi->rli.relay_log.is_open() &&
984         my_sync(mi->rli.relay_log.get_log_file()->file, MYF(MY_WME)))
985       retval= ER_ERROR_DURING_FLUSH_LOGS;
986 
987     mysql_mutex_unlock(log_lock);
988   }
989   DBUG_RETURN(retval);
990 }
991 
992 
993 /**
994    Wait for a slave thread to terminate.
995 
996    This function is called after requesting the thread to terminate
997    (by setting @c abort_slave member of @c Relay_log_info or @c
998    Master_info structure to 1). Termination of the thread is
999    controlled with the the predicate <code>*slave_running</code>.
1000 
1001    Function will acquire @c term_lock before waiting on the condition
1002    unless @c skip_lock is true in which case the mutex should be owned
1003    by the caller of this function and will remain acquired after
1004    return from the function.
1005 
1006    @param term_lock
1007           Associated lock to use when waiting for @c term_cond
1008 
1009    @param term_cond
1010           Condition that is signalled when the thread has terminated
1011 
1012    @param slave_running
1013           Pointer to predicate to check for slave thread termination
1014 
1015    @param skip_lock
1016           If @c true the lock will not be acquired before waiting on
1017           the condition. In this case, it is assumed that the calling
1018           function acquires the lock before calling this function.
1019 
1020    @retval 0 All OK ER_SLAVE_NOT_RUNNING otherwise.
1021 
1022    @note  If the executing thread has to acquire term_lock (skip_lock
1023           is false), the negative running status does not represent
1024           any issue therefore no error is reported.
1025 
1026  */
1027 static int
1028 terminate_slave_thread(THD *thd,
1029                        mysql_mutex_t *term_lock,
1030                        mysql_cond_t *term_cond,
1031                        volatile uint *slave_running,
1032                        bool skip_lock)
1033 {
1034   DBUG_ENTER("terminate_slave_thread");
1035   if (!skip_lock)
1036   {
1037     mysql_mutex_lock(term_lock);
1038   }
1039   else
1040   {
1041     mysql_mutex_assert_owner(term_lock);
1042   }
1043   if (!*slave_running)
1044   {
1045     if (!skip_lock)
1046     {
1047       /*
1048         if run_lock (term_lock) is acquired locally then either
1049         slave_running status is fine
1050       */
1051       mysql_mutex_unlock(term_lock);
1052       DBUG_RETURN(0);
1053     }
1054     else
1055     {
1056       DBUG_RETURN(ER_SLAVE_NOT_RUNNING);
1057     }
1058   }
1059   DBUG_ASSERT(thd != 0);
1060   THD_CHECK_SENTRY(thd);
1061 
1062   /*
1063     Is is critical to test if the slave is running. Otherwise, we might
1064     be referening freed memory trying to kick it
1065   */
1066 
1067   while (*slave_running)                        // Should always be true
1068   {
1069     int error __attribute__((unused));
1070     DBUG_PRINT("loop", ("killing slave thread"));
1071 
1072     mysql_mutex_lock(&thd->LOCK_thd_kill);
1073     mysql_mutex_lock(&thd->LOCK_thd_data);
1074 #ifndef DONT_USE_THR_ALARM
1075     /*
1076       Error codes from pthread_kill are:
1077       EINVAL: invalid signal number (can't happen)
1078       ESRCH: thread already killed (can happen, should be ignored)
1079     */
1080     int err __attribute__((unused))= pthread_kill(thd->real_id, thr_client_alarm);
1081     DBUG_ASSERT(err != EINVAL);
1082 #endif
1083     thd->awake_no_mutex(NOT_KILLED);
1084 
1085     mysql_mutex_unlock(&thd->LOCK_thd_kill);
1086     mysql_mutex_unlock(&thd->LOCK_thd_data);
1087 
1088     /*
1089       There is a small chance that slave thread might miss the first
1090       alarm. To protect againts it, resend the signal until it reacts
1091     */
1092     struct timespec abstime;
1093     set_timespec(abstime,2);
1094     error= mysql_cond_timedwait(term_cond, term_lock, &abstime);
1095     DBUG_ASSERT(error == ETIMEDOUT || error == 0);
1096   }
1097 
1098   DBUG_ASSERT(*slave_running == 0);
1099 
1100   if (!skip_lock)
1101     mysql_mutex_unlock(term_lock);
1102   DBUG_RETURN(0);
1103 }
1104 
1105 
1106 int start_slave_thread(
1107 #ifdef HAVE_PSI_INTERFACE
1108                        PSI_thread_key thread_key,
1109 #endif
1110                        pthread_handler h_func, mysql_mutex_t *start_lock,
1111                        mysql_mutex_t *cond_lock,
1112                        mysql_cond_t *start_cond,
1113                        volatile uint *slave_running,
1114                        volatile ulong *slave_run_id,
1115                        Master_info* mi)
1116 {
1117   pthread_t th;
1118   ulong start_id;
1119   int error;
1120   DBUG_ENTER("start_slave_thread");
1121 
1122   DBUG_ASSERT(mi->inited);
1123 
1124   if (start_lock)
1125     mysql_mutex_lock(start_lock);
1126   if (!global_system_variables.server_id)
1127   {
1128     if (start_cond)
1129       mysql_cond_broadcast(start_cond);
1130     if (start_lock)
1131       mysql_mutex_unlock(start_lock);
1132     sql_print_error("Server id not set, will not start slave");
1133     DBUG_RETURN(ER_BAD_SLAVE);
1134   }
1135 
1136   if (*slave_running)
1137   {
1138     if (start_cond)
1139       mysql_cond_broadcast(start_cond);
1140     if (start_lock)
1141       mysql_mutex_unlock(start_lock);
1142     DBUG_RETURN(ER_SLAVE_MUST_STOP);
1143   }
1144   start_id= *slave_run_id;
1145   DBUG_PRINT("info",("Creating new slave thread"));
1146   if (unlikely((error= mysql_thread_create(thread_key,
1147                                            &th, &connection_attrib, h_func,
1148                                            (void*)mi))))
1149   {
1150     sql_print_error("Can't create slave thread (errno= %d).", error);
1151     if (start_lock)
1152       mysql_mutex_unlock(start_lock);
1153     DBUG_RETURN(ER_SLAVE_THREAD);
1154   }
1155 
1156   /*
1157     In the following loop we can't check for thd->killed as we have to
1158     wait until THD structures for the slave thread are created
1159     before we can return.
1160     This should be ok as there is no major work done in the slave
1161     threads before they signal that we can stop waiting.
1162   */
1163 
1164   if (start_cond && cond_lock) // caller has cond_lock
1165   {
1166     THD* thd = current_thd;
1167     while (start_id == *slave_run_id)
1168     {
1169       DBUG_PRINT("sleep",("Waiting for slave thread to start"));
1170       PSI_stage_info saved_stage= {0, "", 0};
1171       thd->ENTER_COND(start_cond, cond_lock,
1172                       & stage_waiting_for_slave_thread_to_start,
1173                       & saved_stage);
1174       /*
1175         It is not sufficient to test this at loop bottom. We must test
1176         it after registering the mutex in enter_cond(). If the kill
1177         happens after testing of thd->killed and before the mutex is
1178         registered, we could otherwise go waiting though thd->killed is
1179         set.
1180       */
1181       mysql_cond_wait(start_cond, cond_lock);
1182       thd->EXIT_COND(& saved_stage);
1183       mysql_mutex_lock(cond_lock); // re-acquire it as exit_cond() released
1184     }
1185   }
1186   if (start_lock)
1187     mysql_mutex_unlock(start_lock);
1188   DBUG_RETURN(0);
1189 }
1190 
1191 
1192 /*
1193   start_slave_threads()
1194 
1195   NOTES
1196     SLAVE_FORCE_ALL is not implemented here on purpose since it does not make
1197     sense to do that for starting a slave--we always care if it actually
1198     started the threads that were not previously running
1199 */
1200 
1201 int start_slave_threads(THD *thd,
1202                         bool need_slave_mutex, bool wait_for_start,
1203                         Master_info* mi, const char* master_info_fname,
1204                         const char* slave_info_fname, int thread_mask)
1205 {
1206   mysql_mutex_t *lock_io=0, *lock_sql=0, *lock_cond_io=0, *lock_cond_sql=0;
1207   mysql_cond_t* cond_io=0, *cond_sql=0;
1208   int error=0;
1209   const char *errmsg;
1210   DBUG_ENTER("start_slave_threads");
1211 
1212   if (need_slave_mutex)
1213   {
1214     lock_io = &mi->run_lock;
1215     lock_sql = &mi->rli.run_lock;
1216   }
1217   if (wait_for_start)
1218   {
1219     cond_io = &mi->start_cond;
1220     cond_sql = &mi->rli.start_cond;
1221     lock_cond_io = &mi->run_lock;
1222     lock_cond_sql = &mi->rli.run_lock;
1223   }
1224 
1225   /*
1226     If we are using GTID and both SQL and IO threads are stopped, then get
1227     rid of all relay logs.
1228 
1229     Relay logs are not very useful when using GTID, except as a buffer
1230     between the fetch in the IO thread and the apply in SQL thread. However
1231     while one of the threads is running, they are in use and cannot be
1232     removed.
1233   */
1234   if (mi->using_gtid != Master_info::USE_GTID_NO &&
1235       !mi->slave_running && !mi->rli.slave_running)
1236   {
1237     /*
1238       purge_relay_logs() clears the mi->rli.group_master_log_pos.
1239       So save and restore them, like we do in CHANGE MASTER.
1240       (We are not going to use them for GTID, but it might be worth to
1241       keep them in case connection with GTID fails and user wants to go
1242       back and continue with previous old-style replication coordinates).
1243     */
1244     mi->master_log_pos = MY_MAX(BIN_LOG_HEADER_SIZE,
1245                                 mi->rli.group_master_log_pos);
1246     strmake(mi->master_log_name, mi->rli.group_master_log_name,
1247             sizeof(mi->master_log_name)-1);
1248     purge_relay_logs(&mi->rli, thd, 0, &errmsg);
1249     mi->rli.group_master_log_pos= mi->master_log_pos;
1250     strmake(mi->rli.group_master_log_name, mi->master_log_name,
1251             sizeof(mi->rli.group_master_log_name)-1);
1252 
1253     error= rpl_load_gtid_state(&mi->gtid_current_pos, mi->using_gtid ==
1254                                              Master_info::USE_GTID_CURRENT_POS);
1255     mi->events_queued_since_last_gtid= 0;
1256     mi->gtid_reconnect_event_skip_count= 0;
1257 
1258     mi->rli.restart_gtid_pos.reset();
1259   }
1260 
1261   if (likely(!error) && likely((thread_mask & SLAVE_IO)))
1262     error= start_slave_thread(
1263 #ifdef HAVE_PSI_INTERFACE
1264                               key_thread_slave_io,
1265 #endif
1266                               handle_slave_io, lock_io, lock_cond_io,
1267                               cond_io,
1268                               &mi->slave_running, &mi->slave_run_id,
1269                               mi);
1270   if (likely(!error) && likely(thread_mask & SLAVE_SQL))
1271   {
1272     error= start_slave_thread(
1273 #ifdef HAVE_PSI_INTERFACE
1274                               key_thread_slave_sql,
1275 #endif
1276                               handle_slave_sql, lock_sql, lock_cond_sql,
1277                               cond_sql,
1278                               &mi->rli.slave_running, &mi->rli.slave_run_id,
1279                               mi);
1280     if (unlikely(error))
1281       terminate_slave_threads(mi, thread_mask & SLAVE_IO, !need_slave_mutex);
1282   }
1283   DBUG_RETURN(error);
1284 }
1285 
1286 
1287 /*
1288   Kill slaves preparing for shutdown
1289 */
1290 
1291 void slave_prepare_for_shutdown()
1292 {
1293   mysql_mutex_lock(&LOCK_active_mi);
1294   master_info_index->free_connections();
1295   mysql_mutex_unlock(&LOCK_active_mi);
1296   // It's safe to destruct worker pool now when
1297   // all driver threads are gone.
1298   global_rpl_thread_pool.deactivate();
1299 }
1300 
1301 /*
1302   Release slave threads at time of executing shutdown.
1303 */
1304 
1305 void end_slave()
1306 {
1307   DBUG_ENTER("end_slave");
1308 
1309   /*
1310     This is called when the server terminates, in close_connections().
1311     It terminates slave threads. However, some CHANGE MASTER etc may still be
1312     running presently. If a START SLAVE was in progress, the mutex lock below
1313     will make us wait until slave threads have started, and START SLAVE
1314     returns, then we terminate them here.
1315 
1316     We can also be called by cleanup(), which only happens if some
1317     startup parameter to the server was wrong.
1318   */
1319   mysql_mutex_lock(&LOCK_active_mi);
1320   /*
1321     master_info_index should not have any threads anymore as they where
1322     killed as part of slave_prepare_for_shutdown()
1323   */
1324   delete master_info_index;
1325   master_info_index= 0;
1326   active_mi= 0;
1327   mysql_mutex_unlock(&LOCK_active_mi);
1328 
1329   global_rpl_thread_pool.destroy();
1330   free_all_rpl_filters();
1331   DBUG_VOID_RETURN;
1332 }
1333 
1334 static bool io_slave_killed(Master_info* mi)
1335 {
1336   DBUG_ENTER("io_slave_killed");
1337 
1338   DBUG_ASSERT(mi->slave_running); // tracking buffer overrun
1339   DBUG_RETURN(mi->abort_slave || mi->io_thd->killed);
1340 }
1341 
1342 /**
1343    The function analyzes a possible killed status and makes
1344    a decision whether to accept it or not.
1345    Normally upon accepting the sql thread goes to shutdown.
1346    In the event of deffering decision @rli->last_event_start_time waiting
1347    timer is set to force the killed status be accepted upon its expiration.
1348 
1349    @param thd   pointer to a THD instance
1350    @param rli   pointer to Relay_log_info instance
1351 
1352    @return TRUE the killed status is recognized, FALSE a possible killed
1353            status is deferred.
1354 */
1355 static bool sql_slave_killed(rpl_group_info *rgi)
1356 {
1357   bool ret= FALSE;
1358   Relay_log_info *rli= rgi->rli;
1359   THD *thd= rgi->thd;
1360   DBUG_ENTER("sql_slave_killed");
1361 
1362   DBUG_ASSERT(rli->sql_driver_thd == thd);
1363   DBUG_ASSERT(rli->slave_running == 1);// tracking buffer overrun
1364   if (rli->sql_driver_thd->killed || rli->abort_slave)
1365   {
1366     /*
1367       The transaction should always be binlogged if OPTION_KEEP_LOG is
1368       set (it implies that something can not be rolled back). And such
1369       case should be regarded similarly as modifing a
1370       non-transactional table because retrying of the transaction will
1371       lead to an error or inconsistency as well.
1372 
1373       Example: OPTION_KEEP_LOG is set if a temporary table is created
1374       or dropped.
1375 
1376       Note that transaction.all.modified_non_trans_table may be 1
1377       if last statement was a single row transaction without begin/end.
1378       Testing this flag must always be done in connection with
1379       rli->is_in_group().
1380     */
1381 
1382     if ((thd->transaction.all.modified_non_trans_table ||
1383          (thd->variables.option_bits & OPTION_KEEP_LOG)) &&
1384         rli->is_in_group())
1385     {
1386       char msg_stopped[]=
1387         "... Slave SQL Thread stopped with incomplete event group "
1388         "having non-transactional changes. "
1389         "If the group consists solely of row-based events, you can try "
1390         "to restart the slave with --slave-exec-mode=IDEMPOTENT, which "
1391         "ignores duplicate key, key not found, and similar errors (see "
1392         "documentation for details).";
1393 
1394       DBUG_PRINT("info", ("modified_non_trans_table: %d  OPTION_BEGIN: %d  "
1395                           "OPTION_KEEP_LOG: %d  is_in_group: %d",
1396                           thd->transaction.all.modified_non_trans_table,
1397                           MY_TEST(thd->variables.option_bits & OPTION_BEGIN),
1398                           MY_TEST(thd->variables.option_bits & OPTION_KEEP_LOG),
1399                           rli->is_in_group()));
1400 
1401       if (rli->abort_slave)
1402       {
1403         DBUG_PRINT("info",
1404                    ("Request to stop slave SQL Thread received while "
1405                     "applying a group that has non-transactional "
1406                     "changes; waiting for completion of the group ... "));
1407 
1408         /*
1409           Slave sql thread shutdown in face of unfinished group
1410           modified Non-trans table is handled via a timer. The slave
1411           may eventually give out to complete the current group and in
1412           that case there might be issues at consequent slave restart,
1413           see the error message.  WL#2975 offers a robust solution
1414           requiring to store the last exectuted event's coordinates
1415           along with the group's coordianates instead of waiting with
1416           @c last_event_start_time the timer.
1417         */
1418 
1419         if (rgi->last_event_start_time == 0)
1420           rgi->last_event_start_time= my_time(0);
1421         ret= difftime(my_time(0), rgi->last_event_start_time) <=
1422           SLAVE_WAIT_GROUP_DONE ? FALSE : TRUE;
1423 
1424         DBUG_EXECUTE_IF("stop_slave_middle_group",
1425                         DBUG_EXECUTE_IF("incomplete_group_in_relay_log",
1426                                         ret= TRUE;);); // time is over
1427 
1428         if (ret == 0)
1429         {
1430           rli->report(WARNING_LEVEL, 0, rgi->gtid_info(),
1431                       "Request to stop slave SQL Thread received while "
1432                       "applying a group that has non-transactional "
1433                       "changes; waiting for completion of the group ... ");
1434         }
1435         else
1436         {
1437           rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, rgi->gtid_info(),
1438                       ER_THD(thd, ER_SLAVE_FATAL_ERROR), msg_stopped);
1439         }
1440       }
1441       else
1442       {
1443         ret= TRUE;
1444         rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, rgi->gtid_info(),
1445                     ER_THD(thd, ER_SLAVE_FATAL_ERROR),
1446                     msg_stopped);
1447       }
1448     }
1449     else
1450     {
1451       ret= TRUE;
1452     }
1453   }
1454   if (ret)
1455     rgi->last_event_start_time= 0;
1456 
1457   DBUG_RETURN(ret);
1458 }
1459 
1460 
1461 /*
1462   skip_load_data_infile()
1463 
1464   NOTES
1465     This is used to tell a 3.23 master to break send_file()
1466 */
1467 
1468 void skip_load_data_infile(NET *net)
1469 {
1470   DBUG_ENTER("skip_load_data_infile");
1471 
1472   (void)net_request_file(net, "/dev/null");
1473   (void)my_net_read(net);                               // discard response
1474   (void)net_write_command(net, 0, (uchar*) "", 0, (uchar*) "", 0); // ok
1475   DBUG_VOID_RETURN;
1476 }
1477 
1478 
1479 bool net_request_file(NET* net, const char* fname)
1480 {
1481   DBUG_ENTER("net_request_file");
1482   DBUG_RETURN(net_write_command(net, 251, (uchar*) fname, strlen(fname),
1483                                 (uchar*) "", 0));
1484 }
1485 
1486 /*
1487   From other comments and tests in code, it looks like
1488   sometimes Query_log_event and Load_log_event can have db == 0
1489   (see rewrite_db() above for example)
1490   (cases where this happens are unclear; it may be when the master is 3.23).
1491 */
1492 
1493 const char *print_slave_db_safe(const char* db)
1494 {
1495   DBUG_ENTER("*print_slave_db_safe");
1496 
1497   DBUG_RETURN((db ? db : ""));
1498 }
1499 
1500 #endif /* HAVE_REPLICATION */
1501 
1502 int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
1503                                  const char *default_val)
1504 {
1505   size_t length;
1506   DBUG_ENTER("init_strvar_from_file");
1507 
1508   if ((length=my_b_gets(f,var, max_size)))
1509   {
1510     char* last_p = var + length -1;
1511     if (*last_p == '\n')
1512       *last_p = 0; // if we stopped on newline, kill it
1513     else
1514     {
1515       /*
1516         If we truncated a line or stopped on last char, remove all chars
1517         up to and including newline.
1518       */
1519       int c;
1520       while (((c=my_b_get(f)) != '\n' && c != my_b_EOF)) ;
1521     }
1522     DBUG_RETURN(0);
1523   }
1524   else if (default_val)
1525   {
1526     strmake(var,  default_val, max_size-1);
1527     DBUG_RETURN(0);
1528   }
1529   DBUG_RETURN(1);
1530 }
1531 
1532 /*
1533   when moving these functions to mysys, don't forget to
1534   remove slave.cc from libmysqld/CMakeLists.txt
1535 */
1536 int init_intvar_from_file(int* var, IO_CACHE* f, int default_val)
1537 {
1538   char buf[32];
1539   DBUG_ENTER("init_intvar_from_file");
1540 
1541 
1542   if (my_b_gets(f, buf, sizeof(buf)))
1543   {
1544     *var = atoi(buf);
1545     DBUG_RETURN(0);
1546   }
1547   else if (default_val)
1548   {
1549     *var = default_val;
1550     DBUG_RETURN(0);
1551   }
1552   DBUG_RETURN(1);
1553 }
1554 
1555 int init_floatvar_from_file(float* var, IO_CACHE* f, float default_val)
1556 {
1557   char buf[16];
1558   DBUG_ENTER("init_floatvar_from_file");
1559 
1560 
1561   if (my_b_gets(f, buf, sizeof(buf)))
1562   {
1563     if (sscanf(buf, "%f", var) != 1)
1564       DBUG_RETURN(1);
1565     else
1566       DBUG_RETURN(0);
1567   }
1568   else if (default_val != 0.0)
1569   {
1570     *var = default_val;
1571     DBUG_RETURN(0);
1572   }
1573   DBUG_RETURN(1);
1574 }
1575 
1576 
1577 /**
1578    A master info read method
1579 
1580    This function is called from @c init_master_info() along with
1581    relatives to restore some of @c active_mi members.
1582    Particularly, this function is responsible for restoring
1583    IGNORE_SERVER_IDS list of servers whose events the slave is
1584    going to ignore (to not log them in the relay log).
1585    Items being read are supposed to be decimal output of values of a
1586    type shorter or equal of @c long and separated by the single space.
1587    It also used to restore DO_DOMAIN_IDS & IGNORE_DOMAIN_IDS lists.
1588 
1589    @param arr         @c DYNAMIC_ARRAY pointer to storage for servers id
1590    @param f           @c IO_CACHE pointer to the source file
1591 
1592    @retval 0         All OK
1593    @retval non-zero  An error
1594 */
1595 
1596 int init_dynarray_intvar_from_file(DYNAMIC_ARRAY* arr, IO_CACHE* f)
1597 {
1598   int ret= 0;
1599   char buf[16 * (sizeof(long)*4 + 1)]; // static buffer to use most of times
1600   char *buf_act= buf; // actual buffer can be dynamic if static is short
1601   char *token, *last;
1602   uint num_items;     // number of items of `arr'
1603   size_t read_size;
1604   DBUG_ENTER("init_dynarray_intvar_from_file");
1605 
1606   if ((read_size= my_b_gets(f, buf_act, sizeof(buf))) == 0)
1607   {
1608     DBUG_RETURN(0);                             // no line in master.info
1609   }
1610   if (read_size + 1 == sizeof(buf) && buf[sizeof(buf) - 2] != '\n')
1611   {
1612     /*
1613       short read happend; allocate sufficient memory and make the 2nd read
1614     */
1615     char buf_work[(sizeof(long)*3 + 1)*16];
1616     memcpy(buf_work, buf, sizeof(buf_work));
1617     num_items= atoi(strtok_r(buf_work, " ", &last));
1618     size_t snd_size;
1619     /*
1620       max size lower bound approximate estimation bases on the formula:
1621       (the items number + items themselves) *
1622           (decimal size + space) - 1 + `\n' + '\0'
1623     */
1624     size_t max_size= (1 + num_items) * (sizeof(long)*3 + 1) + 1;
1625     buf_act= (char*) my_malloc(max_size, MYF(MY_WME));
1626     memcpy(buf_act, buf, read_size);
1627     snd_size= my_b_gets(f, buf_act + read_size, max_size - read_size);
1628     if (snd_size == 0 ||
1629         ((snd_size + 1 == max_size - read_size) &&  buf_act[max_size - 2] != '\n'))
1630     {
1631       /*
1632         failure to make the 2nd read or short read again
1633       */
1634       ret= 1;
1635       goto err;
1636     }
1637   }
1638   token= strtok_r(buf_act, " ", &last);
1639   if (token == NULL)
1640   {
1641     ret= 1;
1642     goto err;
1643   }
1644   num_items= atoi(token);
1645   for (uint i=0; i < num_items; i++)
1646   {
1647     token= strtok_r(NULL, " ", &last);
1648     if (token == NULL)
1649     {
1650       ret= 1;
1651       goto err;
1652     }
1653     else
1654     {
1655       ulong val= atol(token);
1656       insert_dynamic(arr, (uchar *) &val);
1657     }
1658   }
1659 err:
1660   if (buf_act != buf)
1661     my_free(buf_act);
1662   DBUG_RETURN(ret);
1663 }
1664 
1665 #ifdef HAVE_REPLICATION
1666 
1667 /*
1668   Check if the error is caused by network.
1669   @param[in]   errorno   Number of the error.
1670   RETURNS:
1671   TRUE         network error
1672   FALSE        not network error
1673 */
1674 
1675 bool is_network_error(uint errorno)
1676 {
1677   if (errorno == CR_CONNECTION_ERROR ||
1678       errorno == CR_CONN_HOST_ERROR ||
1679       errorno == CR_SERVER_GONE_ERROR ||
1680       errorno == CR_SERVER_LOST ||
1681       errorno == ER_CON_COUNT_ERROR ||
1682       errorno == ER_CONNECTION_KILLED ||
1683       errorno == ER_NEW_ABORTING_CONNECTION ||
1684       errorno == ER_NET_READ_INTERRUPTED ||
1685       errorno == ER_SERVER_SHUTDOWN)
1686     return TRUE;
1687 #ifdef WITH_WSREP
1688   if (errorno == ER_UNKNOWN_COM_ERROR)
1689     return TRUE;
1690 #endif
1691 
1692   return FALSE;
1693 }
1694 
1695 
1696 /*
1697   Note that we rely on the master's version (3.23, 4.0.14 etc) instead of
1698   relying on the binlog's version. This is not perfect: imagine an upgrade
1699   of the master without waiting that all slaves are in sync with the master;
1700   then a slave could be fooled about the binlog's format. This is what happens
1701   when people upgrade a 3.23 master to 4.0 without doing RESET MASTER: 4.0
1702   slaves are fooled. So we do this only to distinguish between 3.23 and more
1703   recent masters (it's too late to change things for 3.23).
1704 
1705   RETURNS
1706   0       ok
1707   1       error
1708   2       transient network problem, the caller should try to reconnect
1709 */
1710 
1711 static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi)
1712 {
1713   char err_buff[MAX_SLAVE_ERRMSG], err_buff2[MAX_SLAVE_ERRMSG];
1714   const char* errmsg= 0;
1715   int err_code= 0;
1716   MYSQL_RES *master_res= 0;
1717   MYSQL_ROW master_row;
1718   uint version= mysql_get_server_version(mysql) / 10000;
1719   DBUG_ENTER("get_master_version_and_clock");
1720 
1721   /*
1722     Free old description_event_for_queue (that is needed if we are in
1723     a reconnection).
1724   */
1725   delete mi->rli.relay_log.description_event_for_queue;
1726   mi->rli.relay_log.description_event_for_queue= 0;
1727 
1728   if (!my_isdigit(&my_charset_bin,*mysql->server_version))
1729   {
1730     errmsg= err_buff2;
1731     snprintf(err_buff2, sizeof(err_buff2),
1732              "Master reported unrecognized MySQL version: %s",
1733              mysql->server_version);
1734     err_code= ER_SLAVE_FATAL_ERROR;
1735     sprintf(err_buff, ER_DEFAULT(err_code), err_buff2);
1736   }
1737   else
1738   {
1739     /*
1740       Note the following switch will bug when we have MySQL branch 30 ;)
1741     */
1742     switch (version) {
1743     case 0:
1744     case 1:
1745     case 2:
1746       errmsg= err_buff2;
1747       snprintf(err_buff2, sizeof(err_buff2),
1748                "Master reported unrecognized MySQL version: %s",
1749                mysql->server_version);
1750       err_code= ER_SLAVE_FATAL_ERROR;
1751       sprintf(err_buff, ER_DEFAULT(err_code), err_buff2);
1752       break;
1753     case 3:
1754       mi->rli.relay_log.description_event_for_queue= new
1755         Format_description_log_event(1, mysql->server_version);
1756       break;
1757     case 4:
1758       mi->rli.relay_log.description_event_for_queue= new
1759         Format_description_log_event(3, mysql->server_version);
1760       break;
1761     default:
1762       /*
1763         Master is MySQL >=5.0. Give a default Format_desc event, so that we can
1764         take the early steps (like tests for "is this a 3.23 master") which we
1765         have to take before we receive the real master's Format_desc which will
1766         override this one. Note that the Format_desc we create below is garbage
1767         (it has the format of the *slave*); it's only good to help know if the
1768         master is 3.23, 4.0, etc.
1769       */
1770       mi->rli.relay_log.description_event_for_queue= new
1771         Format_description_log_event(4, mysql->server_version);
1772       break;
1773     }
1774   }
1775 
1776   /*
1777      This does not mean that a 5.0 slave will be able to read a 6.0 master; but
1778      as we don't know yet, we don't want to forbid this for now. If a 5.0 slave
1779      can't read a 6.0 master, this will show up when the slave can't read some
1780      events sent by the master, and there will be error messages.
1781   */
1782 
1783   if (errmsg)
1784     goto err;
1785 
1786   /* as we are here, we tried to allocate the event */
1787   if (!mi->rli.relay_log.description_event_for_queue)
1788   {
1789     errmsg= "default Format_description_log_event";
1790     err_code= ER_SLAVE_CREATE_EVENT_FAILURE;
1791     sprintf(err_buff, ER_DEFAULT(err_code), errmsg);
1792     goto err;
1793   }
1794 
1795   /*
1796     FD_q's (A) is set initially from RL's (A): FD_q.(A) := RL.(A).
1797     It's necessary to adjust FD_q.(A) at this point because in the following
1798     course FD_q is going to be dumped to RL.
1799     Generally FD_q is derived from a received FD_m (roughly FD_q := FD_m)
1800     in queue_event and the master's (A) is installed.
1801     At one step with the assignment the Relay-Log's checksum alg is set to
1802     a new value: RL.(A) := FD_q.(A). If the slave service is stopped
1803     the last time assigned RL.(A) will be passed over to the restarting
1804     service (to the current execution point).
1805     RL.A is a "codec" to verify checksum in queue_event() almost all the time
1806     the first fake Rotate event.
1807     Starting from this point IO thread will executes the following checksum
1808     warmup sequence  of actions:
1809 
1810     FD_q.A := RL.A,
1811     A_m^0 := master.@@global.binlog_checksum,
1812     {queue_event(R_f): verifies(R_f, A_m^0)},
1813     {queue_event(FD_m): verifies(FD_m, FD_m.A), dump(FD_q), rotate(RL),
1814                         FD_q := FD_m, RL.A := FD_q.A)}
1815 
1816     See legends definition on MYSQL_BIN_LOG::relay_log_checksum_alg
1817     docs lines (binlog.h).
1818     In above A_m^0 - the value of master's
1819     @@binlog_checksum determined in the upcoming handshake (stored in
1820     mi->checksum_alg_before_fd).
1821 
1822 
1823     After the warm-up sequence IO gets to "normal" checksum verification mode
1824     to use RL.A in
1825 
1826     {queue_event(E_m): verifies(E_m, RL.A)}
1827 
1828     until it has received a new FD_m.
1829   */
1830   mi->rli.relay_log.description_event_for_queue->checksum_alg=
1831     mi->rli.relay_log.relay_log_checksum_alg;
1832 
1833   DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg !=
1834               BINLOG_CHECKSUM_ALG_UNDEF);
1835   DBUG_ASSERT(mi->rli.relay_log.relay_log_checksum_alg !=
1836               BINLOG_CHECKSUM_ALG_UNDEF);
1837   /*
1838     Compare the master and slave's clock. Do not die if master's clock is
1839     unavailable (very old master not supporting UNIX_TIMESTAMP()?).
1840   */
1841 
1842 #ifdef ENABLED_DEBUG_SYNC
1843   DBUG_EXECUTE_IF("dbug.before_get_UNIX_TIMESTAMP",
1844                   {
1845                     const char act[]=
1846                       "now "
1847                       "wait_for signal.get_unix_timestamp";
1848                     DBUG_ASSERT(debug_sync_service);
1849                     DBUG_ASSERT(!debug_sync_set_action(current_thd,
1850                                                        STRING_WITH_LEN(act)));
1851                   };);
1852 #endif
1853 
1854   master_res= NULL;
1855   if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT UNIX_TIMESTAMP()")) &&
1856       (master_res= mysql_store_result(mysql)) &&
1857       (master_row= mysql_fetch_row(master_res)))
1858   {
1859     mysql_mutex_lock(&mi->data_lock);
1860     mi->clock_diff_with_master=
1861       (long) (time((time_t*) 0) - strtoul(master_row[0], 0, 10));
1862     mysql_mutex_unlock(&mi->data_lock);
1863   }
1864   else if (check_io_slave_killed(mi, NULL))
1865     goto slave_killed_err;
1866   else if (is_network_error(mysql_errno(mysql)))
1867   {
1868     mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
1869                "Get master clock failed with error: %s", mysql_error(mysql));
1870     goto network_err;
1871   }
1872   else
1873   {
1874     mysql_mutex_lock(&mi->data_lock);
1875     mi->clock_diff_with_master= 0; /* The "most sensible" value */
1876     mysql_mutex_unlock(&mi->data_lock);
1877     sql_print_warning("\"SELECT UNIX_TIMESTAMP()\" failed on master, "
1878                       "do not trust column Seconds_Behind_Master of SHOW "
1879                       "SLAVE STATUS. Error: %s (%d)",
1880                       mysql_error(mysql), mysql_errno(mysql));
1881   }
1882   if (master_res)
1883   {
1884     mysql_free_result(master_res);
1885     master_res= NULL;
1886   }
1887 
1888   /*
1889     Check that the master's server id and ours are different. Because if they
1890     are equal (which can result from a simple copy of master's datadir to slave,
1891     thus copying some my.cnf), replication will work but all events will be
1892     skipped.
1893     Do not die if SHOW VARIABLES LIKE 'SERVER_ID' fails on master (very old
1894     master?).
1895     Note: we could have put a @@SERVER_ID in the previous SELECT
1896     UNIX_TIMESTAMP() instead, but this would not have worked on 3.23 masters.
1897   */
1898 #ifdef ENABLED_DEBUG_SYNC
1899   DBUG_EXECUTE_IF("dbug.before_get_SERVER_ID",
1900                   {
1901                     const char act[]=
1902                       "now "
1903                       "wait_for signal.get_server_id";
1904                     DBUG_ASSERT(debug_sync_service);
1905                     DBUG_ASSERT(!debug_sync_set_action(current_thd,
1906                                                        STRING_WITH_LEN(act)));
1907                   };);
1908 #endif
1909   master_res= NULL;
1910   master_row= NULL;
1911   if (!mysql_real_query(mysql,
1912                         STRING_WITH_LEN("SHOW VARIABLES LIKE 'SERVER_ID'")) &&
1913       (master_res= mysql_store_result(mysql)) &&
1914       (master_row= mysql_fetch_row(master_res)))
1915   {
1916     if ((global_system_variables.server_id ==
1917              (mi->master_id= strtoul(master_row[1], 0, 10))) &&
1918         !mi->rli.replicate_same_server_id)
1919     {
1920       errmsg= "The slave I/O thread stops because master and slave have equal \
1921 MySQL server ids; these ids must be different for replication to work (or \
1922 the --replicate-same-server-id option must be used on slave but this does \
1923 not always make sense; please check the manual before using it).";
1924       err_code= ER_SLAVE_FATAL_ERROR;
1925       sprintf(err_buff, ER_DEFAULT(err_code), errmsg);
1926       goto err;
1927     }
1928   }
1929   else if (mysql_errno(mysql))
1930   {
1931     if (check_io_slave_killed(mi, NULL))
1932       goto slave_killed_err;
1933     else if (is_network_error(mysql_errno(mysql)))
1934     {
1935       mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
1936                  "Get master SERVER_ID failed with error: %s", mysql_error(mysql));
1937       goto network_err;
1938     }
1939     /* Fatal error */
1940     errmsg= "The slave I/O thread stops because a fatal error is encountered \
1941 when it try to get the value of SERVER_ID variable from master.";
1942     err_code= mysql_errno(mysql);
1943     sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
1944     goto err;
1945   }
1946   else if (!master_row && master_res)
1947   {
1948     mi->report(WARNING_LEVEL, ER_UNKNOWN_SYSTEM_VARIABLE, NULL,
1949                "Unknown system variable 'SERVER_ID' on master, \
1950 maybe it is a *VERY OLD MASTER*.");
1951   }
1952   if (master_res)
1953   {
1954     mysql_free_result(master_res);
1955     master_res= NULL;
1956   }
1957   if (mi->master_id == 0 && mi->ignore_server_ids.elements > 0)
1958   {
1959     errmsg= "Slave configured with server id filtering could not detect the master server id.";
1960     err_code= ER_SLAVE_FATAL_ERROR;
1961     sprintf(err_buff, ER_DEFAULT(err_code), errmsg);
1962     goto err;
1963   }
1964 
1965   /*
1966     Check that the master's global character_set_server and ours are the same.
1967     Not fatal if query fails (old master?).
1968     Note that we don't check for equality of global character_set_client and
1969     collation_connection (neither do we prevent their setting in
1970     set_var.cc). That's because from what I (Guilhem) have tested, the global
1971     values of these 2 are never used (new connections don't use them).
1972     We don't test equality of global collation_database either as it's is
1973     going to be deprecated (made read-only) in 4.1 very soon.
1974     The test is only relevant if master < 5.0.3 (we'll test only if it's older
1975     than the 5 branch; < 5.0.3 was alpha...), as >= 5.0.3 master stores
1976     charset info in each binlog event.
1977     We don't do it for 3.23 because masters <3.23.50 hang on
1978     SELECT @@unknown_var (BUG#7965 - see changelog of 3.23.50). So finally we
1979     test only if master is 4.x.
1980   */
1981 
1982   /* redundant with rest of code but safer against later additions */
1983   if (version == 3)
1984     goto err;
1985 
1986   if (version == 4)
1987   {
1988     master_res= NULL;
1989     if (!mysql_real_query(mysql,
1990                           STRING_WITH_LEN("SELECT @@GLOBAL.COLLATION_SERVER")) &&
1991         (master_res= mysql_store_result(mysql)) &&
1992         (master_row= mysql_fetch_row(master_res)))
1993     {
1994       if (strcmp(master_row[0], global_system_variables.collation_server->name))
1995       {
1996         errmsg= "The slave I/O thread stops because master and slave have \
1997 different values for the COLLATION_SERVER global variable. The values must \
1998 be equal for the Statement-format replication to work";
1999         err_code= ER_SLAVE_FATAL_ERROR;
2000         sprintf(err_buff, ER_DEFAULT(err_code), errmsg);
2001         goto err;
2002       }
2003     }
2004     else if (check_io_slave_killed(mi, NULL))
2005       goto slave_killed_err;
2006     else if (is_network_error(mysql_errno(mysql)))
2007     {
2008       mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
2009                  "Get master COLLATION_SERVER failed with error: %s", mysql_error(mysql));
2010       goto network_err;
2011     }
2012     else if (mysql_errno(mysql) != ER_UNKNOWN_SYSTEM_VARIABLE)
2013     {
2014       /* Fatal error */
2015       errmsg= "The slave I/O thread stops because a fatal error is encountered \
2016 when it try to get the value of COLLATION_SERVER global variable from master.";
2017       err_code= mysql_errno(mysql);
2018       sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2019       goto err;
2020     }
2021     else
2022       mi->report(WARNING_LEVEL, ER_UNKNOWN_SYSTEM_VARIABLE, NULL,
2023                  "Unknown system variable 'COLLATION_SERVER' on master, \
2024 maybe it is a *VERY OLD MASTER*. *NOTE*: slave may experience \
2025 inconsistency if replicated data deals with collation.");
2026 
2027     if (master_res)
2028     {
2029       mysql_free_result(master_res);
2030       master_res= NULL;
2031     }
2032   }
2033 
2034   /*
2035     Perform analogous check for time zone. Theoretically we also should
2036     perform check here to verify that SYSTEM time zones are the same on
2037     slave and master, but we can't rely on value of @@system_time_zone
2038     variable (it is time zone abbreviation) since it determined at start
2039     time and so could differ for slave and master even if they are really
2040     in the same system time zone. So we are omiting this check and just
2041     relying on documentation. Also according to Monty there are many users
2042     who are using replication between servers in various time zones. Hence
2043     such check will broke everything for them. (And now everything will
2044     work for them because by default both their master and slave will have
2045     'SYSTEM' time zone).
2046     This check is only necessary for 4.x masters (and < 5.0.4 masters but
2047     those were alpha).
2048   */
2049   if (version == 4)
2050   {
2051     master_res= NULL;
2052     if (!mysql_real_query(mysql, STRING_WITH_LEN("SELECT @@GLOBAL.TIME_ZONE")) &&
2053         (master_res= mysql_store_result(mysql)) &&
2054         (master_row= mysql_fetch_row(master_res)))
2055     {
2056       if (strcmp(master_row[0],
2057                  global_system_variables.time_zone->get_name()->ptr()))
2058       {
2059         errmsg= "The slave I/O thread stops because master and slave have \
2060 different values for the TIME_ZONE global variable. The values must \
2061 be equal for the Statement-format replication to work";
2062         err_code= ER_SLAVE_FATAL_ERROR;
2063         sprintf(err_buff, ER_DEFAULT(err_code), errmsg);
2064         goto err;
2065       }
2066     }
2067     else if (check_io_slave_killed(mi, NULL))
2068       goto slave_killed_err;
2069     else if (is_network_error(err_code= mysql_errno(mysql)))
2070     {
2071       mi->report(ERROR_LEVEL, err_code, NULL,
2072                  "Get master TIME_ZONE failed with error: %s",
2073                  mysql_error(mysql));
2074       goto network_err;
2075     }
2076     else if (err_code == ER_UNKNOWN_SYSTEM_VARIABLE)
2077     {
2078       /* We use ERROR_LEVEL to get the error logged to file */
2079       mi->report(ERROR_LEVEL, err_code, NULL,
2080 
2081                  "MySQL master doesn't have a TIME_ZONE variable. Note that"
2082                  "if your timezone is not same between master and slave, your "
2083                  "slave may get wrong data into timestamp columns");
2084     }
2085     else
2086     {
2087       /* Fatal error */
2088       errmsg= "The slave I/O thread stops because a fatal error is encountered \
2089 when it try to get the value of TIME_ZONE global variable from master.";
2090       sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2091       goto err;
2092     }
2093     if (master_res)
2094     {
2095       mysql_free_result(master_res);
2096       master_res= NULL;
2097     }
2098   }
2099 
2100   if (mi->heartbeat_period != 0.0)
2101   {
2102     const char query_format[]= "SET @master_heartbeat_period= %llu";
2103     char query[sizeof(query_format) + 32];
2104     /*
2105        the period is an ulonglong of nano-secs.
2106     */
2107     my_snprintf(query, sizeof(query), query_format,
2108                 (ulonglong) (mi->heartbeat_period*1000000000UL));
2109 
2110     DBUG_EXECUTE_IF("simulate_slave_heartbeat_network_error",
2111                     { static ulong dbug_count= 0;
2112                       if (++dbug_count < 3)
2113                         goto heartbeat_network_error;
2114                     });
2115     if (mysql_real_query(mysql, query, (ulong)strlen(query)))
2116     {
2117       if (check_io_slave_killed(mi, NULL))
2118         goto slave_killed_err;
2119 
2120       if (is_network_error(mysql_errno(mysql)))
2121       {
2122       IF_DBUG(heartbeat_network_error: , )
2123         mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
2124                    "SET @master_heartbeat_period to master failed with error: %s",
2125                    mysql_error(mysql));
2126         mysql_free_result(mysql_store_result(mysql));
2127         goto network_err;
2128       }
2129       else
2130       {
2131         /* Fatal error */
2132         errmsg= "The slave I/O thread stops because a fatal error is encountered "
2133           "when it tries to SET @master_heartbeat_period on master.";
2134         err_code= ER_SLAVE_FATAL_ERROR;
2135         sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2136         mysql_free_result(mysql_store_result(mysql));
2137         goto err;
2138       }
2139     }
2140     mysql_free_result(mysql_store_result(mysql));
2141   }
2142 
2143   /*
2144     Querying if master is capable to checksum and notifying it about own
2145     CRC-awareness. The master's side instant value of @@global.binlog_checksum
2146     is stored in the dump thread's uservar area as well as cached locally
2147     to become known in consensus by master and slave.
2148   */
2149   DBUG_EXECUTE_IF("simulate_slave_unaware_checksum",
2150                   mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_OFF;
2151                   goto past_checksum;);
2152   {
2153     int rc;
2154     const char query[]= "SET @master_binlog_checksum= @@global.binlog_checksum";
2155     master_res= NULL;
2156     mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF; //initially undefined
2157     /*
2158       @c checksum_alg_before_fd is queried from master in this block.
2159       If master is old checksum-unaware the value stays undefined.
2160       Once the first FD will be received its alg descriptor will replace
2161       the being queried one.
2162     */
2163     rc= mysql_real_query(mysql, query,(ulong)strlen(query));
2164     if (rc != 0)
2165     {
2166       if (check_io_slave_killed(mi, NULL))
2167         goto slave_killed_err;
2168 
2169       if (mysql_errno(mysql) == ER_UNKNOWN_SYSTEM_VARIABLE)
2170       {
2171         /* Ignore this expected error if not a high error level */
2172         if (global_system_variables.log_warnings > 1)
2173         {
2174           // this is tolerable as OM -> NS is supported
2175           mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
2176                      "Notifying master by %s failed with "
2177                      "error: %s", query, mysql_error(mysql));
2178         }
2179       }
2180       else
2181       {
2182         if (is_network_error(mysql_errno(mysql)))
2183         {
2184           mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
2185                      "Notifying master by %s failed with "
2186                      "error: %s", query, mysql_error(mysql));
2187           mysql_free_result(mysql_store_result(mysql));
2188           goto network_err;
2189         }
2190         else
2191         {
2192           errmsg= "The slave I/O thread stops because a fatal error is encountered "
2193             "when it tried to SET @master_binlog_checksum on master.";
2194           err_code= ER_SLAVE_FATAL_ERROR;
2195           sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2196           mysql_free_result(mysql_store_result(mysql));
2197           goto err;
2198         }
2199       }
2200     }
2201     else
2202     {
2203       mysql_free_result(mysql_store_result(mysql));
2204       if (!mysql_real_query(mysql,
2205                             STRING_WITH_LEN("SELECT @master_binlog_checksum")) &&
2206           (master_res= mysql_store_result(mysql)) &&
2207           (master_row= mysql_fetch_row(master_res)) &&
2208           (master_row[0] != NULL))
2209       {
2210         mi->checksum_alg_before_fd= (enum_binlog_checksum_alg)
2211           (find_type(master_row[0], &binlog_checksum_typelib, 1) - 1);
2212         // valid outcome is either of
2213         DBUG_ASSERT(mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_OFF ||
2214                     mi->checksum_alg_before_fd == BINLOG_CHECKSUM_ALG_CRC32);
2215       }
2216       else if (check_io_slave_killed(mi, NULL))
2217         goto slave_killed_err;
2218       else if (is_network_error(mysql_errno(mysql)))
2219       {
2220         mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
2221                    "Get master BINLOG_CHECKSUM failed with error: %s", mysql_error(mysql));
2222         goto network_err;
2223       }
2224       else
2225       {
2226         errmsg= "The slave I/O thread stops because a fatal error is encountered "
2227           "when it tried to SELECT @master_binlog_checksum.";
2228         err_code= ER_SLAVE_FATAL_ERROR;
2229         sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2230         mysql_free_result(mysql_store_result(mysql));
2231         goto err;
2232       }
2233     }
2234     if (master_res)
2235     {
2236       mysql_free_result(master_res);
2237       master_res= NULL;
2238     }
2239   }
2240 
2241 #ifndef DBUG_OFF
2242 past_checksum:
2243 #endif
2244 
2245   /*
2246     Request the master to filter away events with the @@skip_replication flag
2247     set, if we are running with
2248     --replicate-events-marked-for-skip=FILTER_ON_MASTER.
2249   */
2250   if (opt_replicate_events_marked_for_skip == RPL_SKIP_FILTER_ON_MASTER)
2251   {
2252     if (unlikely(mysql_real_query(mysql,
2253                                   STRING_WITH_LEN("SET skip_replication=1"))))
2254     {
2255       err_code= mysql_errno(mysql);
2256       if (is_network_error(err_code))
2257       {
2258         mi->report(ERROR_LEVEL, err_code, NULL,
2259                    "Setting master-side filtering of @@skip_replication failed "
2260                    "with error: %s", mysql_error(mysql));
2261         goto network_err;
2262       }
2263       else if (err_code == ER_UNKNOWN_SYSTEM_VARIABLE)
2264       {
2265         /*
2266           The master is older than the slave and does not support the
2267           @@skip_replication feature.
2268           This is not a problem, as such master will not generate events with
2269           the @@skip_replication flag set in the first place. We will still
2270           do slave-side filtering of such events though, to handle the (rare)
2271           case of downgrading a master and receiving old events generated from
2272           before the downgrade with the @@skip_replication flag set.
2273         */
2274         DBUG_PRINT("info", ("Old master does not support master-side filtering "
2275                             "of @@skip_replication events."));
2276       }
2277       else
2278       {
2279         /* Fatal error */
2280         errmsg= "The slave I/O thread stops because a fatal error is "
2281           "encountered when it tries to request filtering of events marked "
2282           "with the @@skip_replication flag.";
2283         sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2284         goto err;
2285       }
2286     }
2287   }
2288 
2289   /* Announce MariaDB slave capabilities. */
2290   DBUG_EXECUTE_IF("simulate_slave_capability_none", goto after_set_capability;);
2291   {
2292     int rc= DBUG_EVALUATE_IF("simulate_slave_capability_old_53",
2293         mysql_real_query(mysql, STRING_WITH_LEN("SET @mariadb_slave_capability="
2294                          STRINGIFY_ARG(MARIA_SLAVE_CAPABILITY_ANNOTATE))),
2295         mysql_real_query(mysql, STRING_WITH_LEN("SET @mariadb_slave_capability="
2296                          STRINGIFY_ARG(MARIA_SLAVE_CAPABILITY_MINE))));
2297     if (unlikely(rc))
2298     {
2299       err_code= mysql_errno(mysql);
2300       if (is_network_error(err_code))
2301       {
2302         mi->report(ERROR_LEVEL, err_code, NULL,
2303                    "Setting @mariadb_slave_capability failed with error: %s",
2304                    mysql_error(mysql));
2305         goto network_err;
2306       }
2307       else
2308       {
2309         /* Fatal error */
2310         errmsg= "The slave I/O thread stops because a fatal error is "
2311           "encountered when it tries to set @mariadb_slave_capability.";
2312         sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2313         goto err;
2314       }
2315     }
2316   }
2317 #ifndef DBUG_OFF
2318 after_set_capability:
2319 #endif
2320 
2321   if (mi->using_gtid != Master_info::USE_GTID_NO)
2322   {
2323     /* Request dump to start from slave replication GTID state. */
2324     int rc;
2325     char str_buf[256];
2326     String query_str(str_buf, sizeof(str_buf), system_charset_info);
2327     query_str.length(0);
2328 
2329     /*
2330       Read the master @@GLOBAL.gtid_domain_id variable.
2331       This is mostly to check that master is GTID aware, but we could later
2332       perhaps use it to check that different multi-source masters are correctly
2333       configured with distinct domain_id.
2334     */
2335     if (mysql_real_query(mysql,
2336                          STRING_WITH_LEN("SELECT @@GLOBAL.gtid_domain_id")) ||
2337         !(master_res= mysql_store_result(mysql)) ||
2338         !(master_row= mysql_fetch_row(master_res)))
2339     {
2340       err_code= mysql_errno(mysql);
2341       if (is_network_error(err_code))
2342       {
2343         mi->report(ERROR_LEVEL, err_code, NULL,
2344                    "Get master @@GLOBAL.gtid_domain_id failed with error: %s",
2345                    mysql_error(mysql));
2346         goto network_err;
2347       }
2348       else
2349       {
2350         errmsg= "The slave I/O thread stops because master does not support "
2351           "MariaDB global transaction id. A fatal error is encountered when "
2352           "it tries to SELECT @@GLOBAL.gtid_domain_id.";
2353         sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2354         goto err;
2355       }
2356     }
2357     mysql_free_result(master_res);
2358     master_res= NULL;
2359 
2360     query_str.append(STRING_WITH_LEN("SET @slave_connect_state='"),
2361                      system_charset_info);
2362     if (mi->gtid_current_pos.append_to_string(&query_str))
2363     {
2364       err_code= ER_OUTOFMEMORY;
2365       errmsg= "The slave I/O thread stops because a fatal out-of-memory "
2366         "error is encountered when it tries to compute @slave_connect_state.";
2367       sprintf(err_buff, "%s Error: Out of memory", errmsg);
2368       goto err;
2369     }
2370     query_str.append(STRING_WITH_LEN("'"), system_charset_info);
2371 
2372     rc= mysql_real_query(mysql, query_str.ptr(), query_str.length());
2373     if (unlikely(rc))
2374     {
2375       err_code= mysql_errno(mysql);
2376       if (is_network_error(err_code))
2377       {
2378         mi->report(ERROR_LEVEL, err_code, NULL,
2379                    "Setting @slave_connect_state failed with error: %s",
2380                    mysql_error(mysql));
2381         goto network_err;
2382       }
2383       else
2384       {
2385         /* Fatal error */
2386         errmsg= "The slave I/O thread stops because a fatal error is "
2387           "encountered when it tries to set @slave_connect_state.";
2388         sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2389         goto err;
2390       }
2391     }
2392 
2393     query_str.length(0);
2394     if (query_str.append(STRING_WITH_LEN("SET @slave_gtid_strict_mode="),
2395                          system_charset_info) ||
2396         query_str.append_ulonglong(opt_gtid_strict_mode != false))
2397     {
2398       err_code= ER_OUTOFMEMORY;
2399       errmsg= "The slave I/O thread stops because a fatal out-of-memory "
2400         "error is encountered when it tries to set @slave_gtid_strict_mode.";
2401       sprintf(err_buff, "%s Error: Out of memory", errmsg);
2402       goto err;
2403     }
2404 
2405     rc= mysql_real_query(mysql, query_str.ptr(), query_str.length());
2406     if (unlikely(rc))
2407     {
2408       err_code= mysql_errno(mysql);
2409       if (is_network_error(err_code))
2410       {
2411         mi->report(ERROR_LEVEL, err_code, NULL,
2412                    "Setting @slave_gtid_strict_mode failed with error: %s",
2413                    mysql_error(mysql));
2414         goto network_err;
2415       }
2416       else
2417       {
2418         /* Fatal error */
2419         errmsg= "The slave I/O thread stops because a fatal error is "
2420           "encountered when it tries to set @slave_gtid_strict_mode.";
2421         sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2422         goto err;
2423       }
2424     }
2425 
2426     query_str.length(0);
2427     if (query_str.append(STRING_WITH_LEN("SET @slave_gtid_ignore_duplicates="),
2428                          system_charset_info) ||
2429         query_str.append_ulonglong(opt_gtid_ignore_duplicates != false))
2430     {
2431       err_code= ER_OUTOFMEMORY;
2432       errmsg= "The slave I/O thread stops because a fatal out-of-memory error "
2433         "is encountered when it tries to set @slave_gtid_ignore_duplicates.";
2434       sprintf(err_buff, "%s Error: Out of memory", errmsg);
2435       goto err;
2436     }
2437 
2438     rc= mysql_real_query(mysql, query_str.ptr(), query_str.length());
2439     if (unlikely(rc))
2440     {
2441       err_code= mysql_errno(mysql);
2442       if (is_network_error(err_code))
2443       {
2444         mi->report(ERROR_LEVEL, err_code, NULL,
2445                    "Setting @slave_gtid_ignore_duplicates failed with "
2446                    "error: %s", mysql_error(mysql));
2447         goto network_err;
2448       }
2449       else
2450       {
2451         /* Fatal error */
2452         errmsg= "The slave I/O thread stops because a fatal error is "
2453           "encountered when it tries to set @slave_gtid_ignore_duplicates.";
2454         sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2455         goto err;
2456       }
2457     }
2458 
2459     if (mi->rli.until_condition == Relay_log_info::UNTIL_GTID)
2460     {
2461       query_str.length(0);
2462       query_str.append(STRING_WITH_LEN("SET @slave_until_gtid='"),
2463                        system_charset_info);
2464       if (mi->rli.until_gtid_pos.append_to_string(&query_str))
2465       {
2466         err_code= ER_OUTOFMEMORY;
2467         errmsg= "The slave I/O thread stops because a fatal out-of-memory "
2468           "error is encountered when it tries to compute @slave_until_gtid.";
2469         sprintf(err_buff, "%s Error: Out of memory", errmsg);
2470         goto err;
2471       }
2472       query_str.append(STRING_WITH_LEN("'"), system_charset_info);
2473 
2474       rc= mysql_real_query(mysql, query_str.ptr(), query_str.length());
2475       if (unlikely(rc))
2476       {
2477         err_code= mysql_errno(mysql);
2478         if (is_network_error(err_code))
2479         {
2480           mi->report(ERROR_LEVEL, err_code, NULL,
2481                      "Setting @slave_until_gtid failed with error: %s",
2482                      mysql_error(mysql));
2483           goto network_err;
2484         }
2485         else
2486         {
2487           /* Fatal error */
2488           errmsg= "The slave I/O thread stops because a fatal error is "
2489             "encountered when it tries to set @slave_until_gtid.";
2490           sprintf(err_buff, "%s Error: %s", errmsg, mysql_error(mysql));
2491           goto err;
2492         }
2493       }
2494     }
2495   }
2496   else
2497   {
2498     /*
2499       If we are not using GTID to connect this time, then instead request
2500       the corresponding GTID position from the master, so that the user
2501       can reconnect the next time using MASTER_GTID_POS=AUTO.
2502     */
2503     char quote_buf[2*sizeof(mi->master_log_name)+1];
2504     char str_buf[28+2*sizeof(mi->master_log_name)+10];
2505     String query(str_buf, sizeof(str_buf), system_charset_info);
2506     query.length(0);
2507 
2508     query.append("SELECT binlog_gtid_pos('");
2509     escape_quotes_for_mysql(&my_charset_bin, quote_buf, sizeof(quote_buf),
2510                             mi->master_log_name, strlen(mi->master_log_name));
2511     query.append(quote_buf);
2512     query.append("',");
2513     query.append_ulonglong(mi->master_log_pos);
2514     query.append(")");
2515 
2516     if (!mysql_real_query(mysql, query.c_ptr_safe(), query.length()) &&
2517         (master_res= mysql_store_result(mysql)) &&
2518         (master_row= mysql_fetch_row(master_res)) &&
2519         (master_row[0] != NULL))
2520     {
2521       rpl_global_gtid_slave_state->load(mi->io_thd, master_row[0],
2522                                         strlen(master_row[0]), false, false);
2523     }
2524     else if (check_io_slave_killed(mi, NULL))
2525       goto slave_killed_err;
2526     else if (is_network_error(mysql_errno(mysql)))
2527     {
2528       mi->report(WARNING_LEVEL, mysql_errno(mysql), NULL,
2529                  "Get master GTID position failed with error: %s", mysql_error(mysql));
2530       goto network_err;
2531     }
2532     else
2533     {
2534       /*
2535         ToDo: If the master does not have the binlog_gtid_pos() function, it
2536         just means that it is an old master with no GTID support, so we should
2537         do nothing.
2538 
2539         However, if binlog_gtid_pos() exists, but fails or returns NULL, then
2540         it means that the requested position is not valid. We could use this
2541         to catch attempts to replicate from within the middle of an event,
2542         avoiding strange failures or possible corruption.
2543       */
2544     }
2545     if (master_res)
2546     {
2547       mysql_free_result(master_res);
2548       master_res= NULL;
2549     }
2550   }
2551 
2552 err:
2553   if (errmsg)
2554   {
2555     if (master_res)
2556       mysql_free_result(master_res);
2557     DBUG_ASSERT(err_code != 0);
2558     mi->report(ERROR_LEVEL, err_code, NULL, "%s", err_buff);
2559     DBUG_RETURN(1);
2560   }
2561 
2562   DBUG_RETURN(0);
2563 
2564 network_err:
2565   if (master_res)
2566     mysql_free_result(master_res);
2567   DBUG_RETURN(2);
2568 
2569 slave_killed_err:
2570   if (master_res)
2571     mysql_free_result(master_res);
2572   DBUG_RETURN(2);
2573 }
2574 
2575 
2576 static bool wait_for_relay_log_space(Relay_log_info* rli)
2577 {
2578   bool slave_killed=0;
2579   bool ignore_log_space_limit;
2580   Master_info* mi = rli->mi;
2581   PSI_stage_info old_stage;
2582   THD* thd = mi->io_thd;
2583   DBUG_ENTER("wait_for_relay_log_space");
2584 
2585   mysql_mutex_lock(&rli->log_space_lock);
2586   thd->ENTER_COND(&rli->log_space_cond,
2587                   &rli->log_space_lock,
2588                   &stage_waiting_for_relay_log_space,
2589                   &old_stage);
2590   while (rli->log_space_limit < rli->log_space_total &&
2591          !(slave_killed=io_slave_killed(mi)) &&
2592          !rli->ignore_log_space_limit)
2593     mysql_cond_wait(&rli->log_space_cond, &rli->log_space_lock);
2594 
2595   ignore_log_space_limit= rli->ignore_log_space_limit;
2596   rli->ignore_log_space_limit= 0;
2597 
2598   thd->EXIT_COND(&old_stage);
2599 
2600   /*
2601     Makes the IO thread read only one event at a time
2602     until the SQL thread is able to purge the relay
2603     logs, freeing some space.
2604 
2605     Therefore, once the SQL thread processes this next
2606     event, it goes to sleep (no more events in the queue),
2607     sets ignore_log_space_limit=true and wakes the IO thread.
2608     However, this event may have been enough already for
2609     the SQL thread to purge some log files, freeing
2610     rli->log_space_total .
2611 
2612     This guarantees that the SQL and IO thread move
2613     forward only one event at a time (to avoid deadlocks),
2614     when the relay space limit is reached. It also
2615     guarantees that when the SQL thread is prepared to
2616     rotate (to be able to purge some logs), the IO thread
2617     will know about it and will rotate.
2618 
2619     NOTE: The ignore_log_space_limit is only set when the SQL
2620           thread sleeps waiting for events.
2621 
2622    */
2623 
2624   if (ignore_log_space_limit)
2625   {
2626 #ifndef DBUG_OFF
2627     {
2628       DBUG_PRINT("info", ("log_space_limit=%llu log_space_total=%llu "
2629                           "ignore_log_space_limit=%d "
2630                           "sql_force_rotate_relay=%d",
2631                         rli->log_space_limit, uint64(rli->log_space_total),
2632                         (int) rli->ignore_log_space_limit,
2633                         (int) rli->sql_force_rotate_relay));
2634     }
2635 #endif
2636     if (rli->sql_force_rotate_relay)
2637     {
2638       mysql_mutex_lock(&mi->data_lock);
2639       rotate_relay_log(rli->mi);
2640       mysql_mutex_unlock(&mi->data_lock);
2641       rli->sql_force_rotate_relay= false;
2642     }
2643   }
2644 
2645   DBUG_RETURN(slave_killed);
2646 }
2647 
2648 
2649 /*
2650   Builds a Rotate from the ignored events' info and writes it to relay log.
2651 
2652   SYNOPSIS
2653   write_ignored_events_info_to_relay_log()
2654     thd             pointer to I/O thread's thd
2655     mi
2656 
2657   DESCRIPTION
2658     Slave I/O thread, going to die, must leave a durable trace of the
2659     ignored events' end position for the use of the slave SQL thread, by
2660     calling this function. Only that thread can call it (see assertion).
2661  */
2662 static void write_ignored_events_info_to_relay_log(THD *thd, Master_info *mi)
2663 {
2664   Relay_log_info *rli= &mi->rli;
2665   mysql_mutex_t *log_lock= rli->relay_log.get_log_lock();
2666   DBUG_ENTER("write_ignored_events_info_to_relay_log");
2667 
2668   DBUG_ASSERT(thd == mi->io_thd);
2669   mysql_mutex_lock(log_lock);
2670   if (rli->ign_master_log_name_end[0] || rli->ign_gtids.count())
2671   {
2672     Rotate_log_event *rev= NULL;
2673     Gtid_list_log_event *glev= NULL;
2674     if (rli->ign_master_log_name_end[0])
2675     {
2676       rev= new Rotate_log_event(rli->ign_master_log_name_end,
2677                                 0, rli->ign_master_log_pos_end,
2678                                 Rotate_log_event::DUP_NAME);
2679       rli->ign_master_log_name_end[0]= 0;
2680       if (unlikely(!(bool)rev))
2681         mi->report(ERROR_LEVEL, ER_SLAVE_CREATE_EVENT_FAILURE, NULL,
2682                    ER_THD(thd, ER_SLAVE_CREATE_EVENT_FAILURE),
2683                    "Rotate_event (out of memory?),"
2684                    " SHOW SLAVE STATUS may be inaccurate");
2685     }
2686     if (rli->ign_gtids.count())
2687     {
2688       DBUG_ASSERT(!rli->is_in_group());         // Ensure no active transaction
2689       glev= new Gtid_list_log_event(&rli->ign_gtids,
2690                                     Gtid_list_log_event::FLAG_IGN_GTIDS);
2691       rli->ign_gtids.reset();
2692       if (unlikely(!(bool)glev))
2693         mi->report(ERROR_LEVEL, ER_SLAVE_CREATE_EVENT_FAILURE, NULL,
2694                    ER_THD(thd, ER_SLAVE_CREATE_EVENT_FAILURE),
2695                    "Gtid_list_event (out of memory?),"
2696                    " gtid_slave_pos may be inaccurate");
2697     }
2698 
2699     /* Can unlock before writing as slave SQL thd will soon see our event. */
2700     mysql_mutex_unlock(log_lock);
2701     if (rev)
2702     {
2703       DBUG_PRINT("info",("writing a Rotate event to track down ignored events"));
2704       rev->server_id= 0; // don't be ignored by slave SQL thread
2705       if (unlikely(rli->relay_log.append(rev)))
2706         mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL,
2707                    ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
2708                    "failed to write a Rotate event"
2709                    " to the relay log, SHOW SLAVE STATUS may be"
2710                    " inaccurate");
2711       delete rev;
2712     }
2713     if (glev)
2714     {
2715       DBUG_PRINT("info",("writing a Gtid_list event to track down ignored events"));
2716       glev->server_id= 0; // don't be ignored by slave SQL thread
2717       glev->set_artificial_event(); // Don't mess up Exec_Master_Log_Pos
2718       if (unlikely(rli->relay_log.append(glev)))
2719         mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL,
2720                    ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
2721                    "failed to write a Gtid_list event to the relay log, "
2722                    "gtid_slave_pos may be inaccurate");
2723       delete glev;
2724     }
2725     if (likely (rev || glev))
2726     {
2727       rli->relay_log.harvest_bytes_written(&rli->log_space_total);
2728       if (flush_master_info(mi, TRUE, TRUE))
2729         sql_print_error("Failed to flush master info file");
2730     }
2731   }
2732   else
2733     mysql_mutex_unlock(log_lock);
2734   DBUG_VOID_RETURN;
2735 }
2736 
2737 
2738 int register_slave_on_master(MYSQL* mysql, Master_info *mi,
2739                              bool *suppress_warnings)
2740 {
2741   uchar buf[1024], *pos= buf;
2742   size_t report_host_len=0, report_user_len=0, report_password_len=0;
2743   DBUG_ENTER("register_slave_on_master");
2744 
2745   *suppress_warnings= FALSE;
2746   if (report_host)
2747     report_host_len= strlen(report_host);
2748   if (report_host_len > HOSTNAME_LENGTH)
2749   {
2750     sql_print_warning("The length of report_host is %zu. "
2751                       "It is larger than the max length(%d), so this "
2752                       "slave cannot be registered to the master.",
2753                       report_host_len, HOSTNAME_LENGTH);
2754     DBUG_RETURN(0);
2755   }
2756 
2757   if (report_user)
2758     report_user_len= strlen(report_user);
2759   if (report_user_len > USERNAME_LENGTH)
2760   {
2761     sql_print_warning("The length of report_user is %zu. "
2762                       "It is larger than the max length(%d), so this "
2763                       "slave cannot be registered to the master.",
2764                       report_user_len, USERNAME_LENGTH);
2765     DBUG_RETURN(0);
2766   }
2767 
2768   if (report_password)
2769     report_password_len= strlen(report_password);
2770   if (report_password_len > MAX_PASSWORD_LENGTH)
2771   {
2772     sql_print_warning("The length of report_password is %zu. "
2773                       "It is larger than the max length(%d), so this "
2774                       "slave cannot be registered to the master.",
2775                       report_password_len, MAX_PASSWORD_LENGTH);
2776     DBUG_RETURN(0);
2777   }
2778 
2779   int4store(pos, global_system_variables.server_id); pos+= 4;
2780   pos= net_store_data(pos, (uchar*) report_host, report_host_len);
2781   pos= net_store_data(pos, (uchar*) report_user, report_user_len);
2782   pos= net_store_data(pos, (uchar*) report_password, report_password_len);
2783   int2store(pos, (uint16) report_port); pos+= 2;
2784   /*
2785     Fake rpl_recovery_rank, which was removed in BUG#13963,
2786     so that this server can register itself on old servers,
2787     see BUG#49259.
2788    */
2789   int4store(pos, /* rpl_recovery_rank */ 0);    pos+= 4;
2790   /* The master will fill in master_id */
2791   int4store(pos, 0);                    pos+= 4;
2792 
2793   if (simple_command(mysql, COM_REGISTER_SLAVE, buf, (ulong) (pos- buf), 0))
2794   {
2795     if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
2796     {
2797       *suppress_warnings= TRUE;                 // Suppress reconnect warning
2798     }
2799     else if (!check_io_slave_killed(mi, NULL))
2800     {
2801       char buf[256];
2802       my_snprintf(buf, sizeof(buf), "%s (Errno: %d)", mysql_error(mysql),
2803                   mysql_errno(mysql));
2804       mi->report(ERROR_LEVEL, ER_SLAVE_MASTER_COM_FAILURE, NULL,
2805                  ER(ER_SLAVE_MASTER_COM_FAILURE), "COM_REGISTER_SLAVE", buf);
2806     }
2807     DBUG_RETURN(1);
2808   }
2809   DBUG_RETURN(0);
2810 }
2811 
2812 
2813 /**
2814   Execute a SHOW SLAVE STATUS statement.
2815 
2816   @param thd Pointer to THD object for the client thread executing the
2817   statement.
2818 
2819   @param mi Pointer to Master_info object for the IO thread.
2820 
2821   @retval FALSE success
2822   @retval TRUE failure
2823 */
2824 
2825 bool show_master_info(THD *thd, Master_info *mi, bool full)
2826 {
2827   DBUG_ENTER("show_master_info");
2828   String gtid_pos;
2829   List<Item> field_list;
2830 
2831   if (full && rpl_global_gtid_slave_state->tostring(&gtid_pos, NULL, 0))
2832     DBUG_RETURN(TRUE);
2833   show_master_info_get_fields(thd, &field_list, full, gtid_pos.length());
2834   if (thd->protocol->send_result_set_metadata(&field_list,
2835                        Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
2836     DBUG_RETURN(TRUE);
2837   if (send_show_master_info_data(thd, mi, full, &gtid_pos))
2838     DBUG_RETURN(TRUE);
2839   my_eof(thd);
2840   DBUG_RETURN(FALSE);
2841 }
2842 
2843 void show_master_info_get_fields(THD *thd, List<Item> *field_list,
2844                                  bool full, size_t gtid_pos_length)
2845 {
2846   Master_info *mi;
2847   MEM_ROOT *mem_root= thd->mem_root;
2848   DBUG_ENTER("show_master_info_get_fields");
2849 
2850   if (full)
2851   {
2852     field_list->push_back(new (mem_root)
2853                           Item_empty_string(thd, "Connection_name",
2854                                             MAX_CONNECTION_NAME),
2855                           mem_root);
2856     field_list->push_back(new (mem_root)
2857                           Item_empty_string(thd, "Slave_SQL_State", 30),
2858                           mem_root);
2859   }
2860 
2861   field_list->push_back(new (mem_root)
2862                         Item_empty_string(thd, "Slave_IO_State", 30),
2863                         mem_root);
2864   field_list->push_back(new (mem_root)
2865                         Item_empty_string(thd, "Master_Host", sizeof(mi->host)),
2866                         mem_root);
2867   field_list->push_back(new (mem_root)
2868                         Item_empty_string(thd, "Master_User", sizeof(mi->user)),
2869                         mem_root);
2870   field_list->push_back(new (mem_root)
2871                         Item_return_int(thd, "Master_Port", 7, MYSQL_TYPE_LONG),
2872                         mem_root);
2873   field_list->push_back(new (mem_root)
2874                         Item_return_int(thd, "Connect_Retry", 10,
2875                                         MYSQL_TYPE_LONG),
2876                         mem_root);
2877   field_list->push_back(new (mem_root)
2878                         Item_empty_string(thd, "Master_Log_File", FN_REFLEN),
2879                         mem_root);
2880   field_list->push_back(new (mem_root)
2881                         Item_return_int(thd, "Read_Master_Log_Pos", 10,
2882                                         MYSQL_TYPE_LONGLONG),
2883                         mem_root);
2884   field_list->push_back(new (mem_root)
2885                         Item_empty_string(thd, "Relay_Log_File", FN_REFLEN),
2886                         mem_root);
2887   field_list->push_back(new (mem_root)
2888                         Item_return_int(thd, "Relay_Log_Pos", 10,
2889                                         MYSQL_TYPE_LONGLONG),
2890                         mem_root);
2891   field_list->push_back(new (mem_root)
2892                         Item_empty_string(thd, "Relay_Master_Log_File",
2893                                           FN_REFLEN),
2894                         mem_root);
2895   field_list->push_back(new (mem_root)
2896                         Item_empty_string(thd, "Slave_IO_Running", 3),
2897                         mem_root);
2898   field_list->push_back(new (mem_root)
2899                         Item_empty_string(thd, "Slave_SQL_Running", 3),
2900                         mem_root);
2901   field_list->push_back(new (mem_root)
2902                         Item_empty_string(thd, "Replicate_Do_DB", 20),
2903                         mem_root);
2904   field_list->push_back(new (mem_root)
2905                         Item_empty_string(thd, "Replicate_Ignore_DB", 20),
2906                         mem_root);
2907   field_list->push_back(new (mem_root)
2908                         Item_empty_string(thd, "Replicate_Do_Table", 20),
2909                         mem_root);
2910   field_list->push_back(new (mem_root)
2911                         Item_empty_string(thd, "Replicate_Ignore_Table", 23),
2912                         mem_root);
2913   field_list->push_back(new (mem_root)
2914                         Item_empty_string(thd, "Replicate_Wild_Do_Table", 24),
2915                         mem_root);
2916   field_list->push_back(new (mem_root)
2917                         Item_empty_string(thd, "Replicate_Wild_Ignore_Table",
2918                                           28),
2919                         mem_root);
2920   field_list->push_back(new (mem_root)
2921                         Item_return_int(thd, "Last_Errno", 4, MYSQL_TYPE_LONG),
2922                         mem_root);
2923   field_list->push_back(new (mem_root)
2924                         Item_empty_string(thd, "Last_Error", 20),
2925                         mem_root);
2926   field_list->push_back(new (mem_root)
2927                         Item_return_int(thd, "Skip_Counter", 10,
2928                                         MYSQL_TYPE_LONG),
2929                         mem_root);
2930   field_list->push_back(new (mem_root)
2931                         Item_return_int(thd, "Exec_Master_Log_Pos", 10,
2932                                         MYSQL_TYPE_LONGLONG),
2933                         mem_root);
2934   field_list->push_back(new (mem_root)
2935                         Item_return_int(thd, "Relay_Log_Space", 10,
2936                                         MYSQL_TYPE_LONGLONG),
2937                         mem_root);
2938   field_list->push_back(new (mem_root)
2939                         Item_empty_string(thd, "Until_Condition", 6),
2940                         mem_root);
2941   field_list->push_back(new (mem_root)
2942                         Item_empty_string(thd, "Until_Log_File", FN_REFLEN),
2943                         mem_root);
2944   field_list->push_back(new (mem_root)
2945                         Item_return_int(thd, "Until_Log_Pos", 10,
2946                                         MYSQL_TYPE_LONGLONG),
2947                         mem_root);
2948   field_list->push_back(new (mem_root)
2949                         Item_empty_string(thd, "Master_SSL_Allowed", 7),
2950                         mem_root);
2951   field_list->push_back(new (mem_root)
2952                         Item_empty_string(thd, "Master_SSL_CA_File",
2953                                           sizeof(mi->ssl_ca)),
2954                         mem_root);
2955   field_list->push_back(new (mem_root)
2956                         Item_empty_string(thd, "Master_SSL_CA_Path",
2957                                           sizeof(mi->ssl_capath)),
2958                         mem_root);
2959   field_list->push_back(new (mem_root)
2960                         Item_empty_string(thd, "Master_SSL_Cert",
2961                                           sizeof(mi->ssl_cert)),
2962                         mem_root);
2963   field_list->push_back(new (mem_root)
2964                         Item_empty_string(thd, "Master_SSL_Cipher",
2965                                           sizeof(mi->ssl_cipher)),
2966                         mem_root);
2967   field_list->push_back(new (mem_root)
2968                         Item_empty_string(thd, "Master_SSL_Key",
2969                                           sizeof(mi->ssl_key)),
2970                         mem_root);
2971   field_list->push_back(new (mem_root)
2972                         Item_return_int(thd, "Seconds_Behind_Master", 10,
2973                                         MYSQL_TYPE_LONGLONG),
2974                         mem_root);
2975   field_list->push_back(new (mem_root)
2976                         Item_empty_string(thd, "Master_SSL_Verify_Server_Cert",
2977                                           3),
2978                         mem_root);
2979   field_list->push_back(new (mem_root)
2980                         Item_return_int(thd, "Last_IO_Errno", 4,
2981                                         MYSQL_TYPE_LONG),
2982                         mem_root);
2983   field_list->push_back(new (mem_root)
2984                         Item_empty_string(thd, "Last_IO_Error", 20),
2985                         mem_root);
2986   field_list->push_back(new (mem_root)
2987                         Item_return_int(thd, "Last_SQL_Errno", 4,
2988                                         MYSQL_TYPE_LONG),
2989                         mem_root);
2990   field_list->push_back(new (mem_root)
2991                         Item_empty_string(thd, "Last_SQL_Error", 20),
2992                         mem_root);
2993   field_list->push_back(new (mem_root)
2994                         Item_empty_string(thd, "Replicate_Ignore_Server_Ids",
2995                                           FN_REFLEN),
2996                         mem_root);
2997   field_list->push_back(new (mem_root)
2998                         Item_return_int(thd, "Master_Server_Id", sizeof(ulong),
2999                                             MYSQL_TYPE_LONG),
3000                         mem_root);
3001   field_list->push_back(new (mem_root)
3002                         Item_empty_string(thd, "Master_SSL_Crl",
3003                                           sizeof(mi->ssl_crl)),
3004                         mem_root);
3005   field_list->push_back(new (mem_root)
3006                         Item_empty_string(thd, "Master_SSL_Crlpath",
3007                                           sizeof(mi->ssl_crlpath)),
3008                         mem_root);
3009   field_list->push_back(new (mem_root)
3010                         Item_empty_string(thd, "Using_Gtid",
3011                                           sizeof("Current_Pos")-1),
3012                         mem_root);
3013   field_list->push_back(new (mem_root)
3014                         Item_empty_string(thd, "Gtid_IO_Pos", 30),
3015                         mem_root);
3016   field_list->push_back(new (mem_root)
3017                         Item_empty_string(thd, "Replicate_Do_Domain_Ids",
3018                                           FN_REFLEN),
3019                         mem_root);
3020   field_list->push_back(new (mem_root)
3021                         Item_empty_string(thd, "Replicate_Ignore_Domain_Ids",
3022                                           FN_REFLEN),
3023                         mem_root);
3024   field_list->push_back(new (mem_root)
3025                         Item_empty_string(thd, "Parallel_Mode",
3026                                           sizeof("conservative")-1),
3027                         mem_root);
3028   field_list->push_back(new (mem_root)
3029                         Item_return_int(thd, "SQL_Delay", 10,
3030                                         MYSQL_TYPE_LONG));
3031   field_list->push_back(new (mem_root)
3032                         Item_return_int(thd, "SQL_Remaining_Delay", 8,
3033                                         MYSQL_TYPE_LONG));
3034   field_list->push_back(new (mem_root)
3035                         Item_empty_string(thd, "Slave_SQL_Running_State",
3036                                           20));
3037   field_list->push_back(new (mem_root)
3038                        Item_return_int(thd, "Slave_DDL_Groups", 20,
3039                                        MYSQL_TYPE_LONGLONG),
3040                        mem_root);
3041   field_list->push_back(new (mem_root)
3042                        Item_return_int(thd, "Slave_Non_Transactional_Groups", 20,
3043                                        MYSQL_TYPE_LONGLONG),
3044                         mem_root);
3045   field_list->push_back(new (mem_root)
3046                        Item_return_int(thd, "Slave_Transactional_Groups", 20,
3047                                        MYSQL_TYPE_LONGLONG),
3048                         mem_root);
3049 
3050   if (full)
3051   {
3052     field_list->push_back(new (mem_root)
3053                           Item_return_int(thd, "Retried_transactions", 10,
3054                                           MYSQL_TYPE_LONG),
3055                           mem_root);
3056     field_list->push_back(new (mem_root)
3057                           Item_return_int(thd, "Max_relay_log_size", 10,
3058                                           MYSQL_TYPE_LONGLONG),
3059                           mem_root);
3060     field_list->push_back(new (mem_root)
3061                           Item_return_int(thd, "Executed_log_entries", 10,
3062                                           MYSQL_TYPE_LONG),
3063                           mem_root);
3064     field_list->push_back(new (mem_root)
3065                           Item_return_int(thd, "Slave_received_heartbeats", 10,
3066                                           MYSQL_TYPE_LONG),
3067                           mem_root);
3068     field_list->push_back(new (mem_root)
3069                           Item_float(thd, "Slave_heartbeat_period", 0.0, 3, 10),
3070                           mem_root);
3071     field_list->push_back(new (mem_root)
3072                           Item_empty_string(thd, "Gtid_Slave_Pos",
3073                                             (uint)gtid_pos_length),
3074                           mem_root);
3075   }
3076   DBUG_VOID_RETURN;
3077 }
3078 
3079 /* Text for Slave_IO_Running */
3080 static const char *slave_running[]= { "No", "Connecting", "Preparing", "Yes" };
3081 
3082 static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full,
3083                                        String *gtid_pos)
3084 {
3085   DBUG_ENTER("send_show_master_info_data");
3086 
3087   if (mi->host[0])
3088   {
3089     DBUG_PRINT("info",("host is set: '%s'", mi->host));
3090     String *packet= &thd->packet;
3091     Protocol *protocol= thd->protocol;
3092     Rpl_filter *rpl_filter= mi->rpl_filter;
3093     StringBuffer<256> tmp;
3094 
3095     protocol->prepare_for_resend();
3096 
3097     /*
3098       slave_running can be accessed without run_lock but not other
3099       non-volotile members like mi->io_thd, which is guarded by the mutex.
3100     */
3101     if (full)
3102       protocol->store(mi->connection_name.str, mi->connection_name.length,
3103                       &my_charset_bin);
3104     mysql_mutex_lock(&mi->run_lock);
3105     if (full)
3106     {
3107       /*
3108         Show what the sql driver replication thread is doing
3109         This is only meaningful if there is only one slave thread.
3110       */
3111       protocol->store(mi->rli.sql_driver_thd ?
3112                       mi->rli.sql_driver_thd->get_proc_info() : "",
3113                       &my_charset_bin);
3114     }
3115     protocol->store(mi->io_thd ? mi->io_thd->get_proc_info() : "", &my_charset_bin);
3116     mysql_mutex_unlock(&mi->run_lock);
3117 
3118     mysql_mutex_lock(&mi->data_lock);
3119     mysql_mutex_lock(&mi->rli.data_lock);
3120     /* err_lock is to protect mi->last_error() */
3121     mysql_mutex_lock(&mi->err_lock);
3122     /* err_lock is to protect mi->rli.last_error() */
3123     mysql_mutex_lock(&mi->rli.err_lock);
3124     protocol->store(mi->host, &my_charset_bin);
3125     protocol->store(mi->user, &my_charset_bin);
3126     protocol->store((uint32) mi->port);
3127     protocol->store((uint32) mi->connect_retry);
3128     protocol->store(mi->master_log_name, &my_charset_bin);
3129     protocol->store((ulonglong) mi->master_log_pos);
3130     protocol->store(mi->rli.group_relay_log_name +
3131                     dirname_length(mi->rli.group_relay_log_name),
3132                     &my_charset_bin);
3133     protocol->store((ulonglong) mi->rli.group_relay_log_pos);
3134     protocol->store(mi->rli.group_master_log_name, &my_charset_bin);
3135     protocol->store(slave_running[mi->slave_running], &my_charset_bin);
3136     protocol->store(mi->rli.slave_running ? "Yes":"No", &my_charset_bin);
3137     protocol->store(rpl_filter->get_do_db());
3138     protocol->store(rpl_filter->get_ignore_db());
3139 
3140     rpl_filter->get_do_table(&tmp);
3141     protocol->store(&tmp);
3142     rpl_filter->get_ignore_table(&tmp);
3143     protocol->store(&tmp);
3144     rpl_filter->get_wild_do_table(&tmp);
3145     protocol->store(&tmp);
3146     rpl_filter->get_wild_ignore_table(&tmp);
3147     protocol->store(&tmp);
3148 
3149     protocol->store(mi->rli.last_error().number);
3150     protocol->store(mi->rli.last_error().message, &my_charset_bin);
3151     protocol->store((uint32) mi->rli.slave_skip_counter);
3152     protocol->store((ulonglong) mi->rli.group_master_log_pos);
3153     protocol->store((ulonglong) mi->rli.log_space_total);
3154 
3155     protocol->store(
3156       mi->rli.until_condition==Relay_log_info::UNTIL_NONE ? "None":
3157         ( mi->rli.until_condition==Relay_log_info::UNTIL_MASTER_POS? "Master":
3158           ( mi->rli.until_condition==Relay_log_info::UNTIL_RELAY_POS? "Relay":
3159             "Gtid")), &my_charset_bin);
3160     protocol->store(mi->rli.until_log_name, &my_charset_bin);
3161     protocol->store((ulonglong) mi->rli.until_log_pos);
3162 
3163 #ifdef HAVE_OPENSSL
3164     protocol->store(mi->ssl? "Yes":"No", &my_charset_bin);
3165 #else
3166     protocol->store(mi->ssl? "Ignored":"No", &my_charset_bin);
3167 #endif
3168     protocol->store(mi->ssl_ca, &my_charset_bin);
3169     protocol->store(mi->ssl_capath, &my_charset_bin);
3170     protocol->store(mi->ssl_cert, &my_charset_bin);
3171     protocol->store(mi->ssl_cipher, &my_charset_bin);
3172     protocol->store(mi->ssl_key, &my_charset_bin);
3173 
3174     /*
3175       Seconds_Behind_Master: if SQL thread is running and I/O thread is
3176       connected, we can compute it otherwise show NULL (i.e. unknown).
3177     */
3178     if ((mi->slave_running == MYSQL_SLAVE_RUN_READING) &&
3179         mi->rli.slave_running)
3180     {
3181       long time_diff;
3182       bool idle;
3183       time_t stamp= mi->rli.last_master_timestamp;
3184 
3185       if (!stamp)
3186         idle= true;
3187       else
3188       {
3189         idle= mi->rli.sql_thread_caught_up;
3190         if (mi->using_parallel() && idle && !mi->rli.parallel.workers_idle())
3191           idle= false;
3192       }
3193       if (idle)
3194         time_diff= 0;
3195       else
3196       {
3197         time_diff= ((long)(time(0) - stamp) - mi->clock_diff_with_master);
3198       /*
3199         Apparently on some systems time_diff can be <0. Here are possible
3200         reasons related to MySQL:
3201         - the master is itself a slave of another master whose time is ahead.
3202         - somebody used an explicit SET TIMESTAMP on the master.
3203         Possible reason related to granularity-to-second of time functions
3204         (nothing to do with MySQL), which can explain a value of -1:
3205         assume the master's and slave's time are perfectly synchronized, and
3206         that at slave's connection time, when the master's timestamp is read,
3207         it is at the very end of second 1, and (a very short time later) when
3208         the slave's timestamp is read it is at the very beginning of second
3209         2. Then the recorded value for master is 1 and the recorded value for
3210         slave is 2. At SHOW SLAVE STATUS time, assume that the difference
3211         between timestamp of slave and rli->last_master_timestamp is 0
3212         (i.e. they are in the same second), then we get 0-(2-1)=-1 as a result.
3213         This confuses users, so we don't go below 0.
3214 
3215         last_master_timestamp == 0 (an "impossible" timestamp 1970) is a
3216         special marker to say "consider we have caught up".
3217       */
3218         if (time_diff < 0)
3219           time_diff= 0;
3220       }
3221       protocol->store((longlong)time_diff);
3222     }
3223     else
3224     {
3225       protocol->store_null();
3226     }
3227     protocol->store(mi->ssl_verify_server_cert? "Yes":"No", &my_charset_bin);
3228 
3229     // Last_IO_Errno
3230     protocol->store(mi->last_error().number);
3231     // Last_IO_Error
3232     protocol->store(mi->last_error().message, &my_charset_bin);
3233     // Last_SQL_Errno
3234     protocol->store(mi->rli.last_error().number);
3235     // Last_SQL_Error
3236     protocol->store(mi->rli.last_error().message, &my_charset_bin);
3237     // Replicate_Ignore_Server_Ids
3238     prot_store_ids(thd, &mi->ignore_server_ids);
3239     // Master_Server_id
3240     protocol->store((uint32) mi->master_id);
3241     // SQL_Delay
3242     // Master_Ssl_Crl
3243     protocol->store(mi->ssl_ca, &my_charset_bin);
3244     // Master_Ssl_Crlpath
3245     protocol->store(mi->ssl_capath, &my_charset_bin);
3246     // Using_Gtid
3247     protocol->store(mi->using_gtid_astext(mi->using_gtid), &my_charset_bin);
3248     // Gtid_IO_Pos
3249     {
3250       mi->gtid_current_pos.to_string(&tmp);
3251       protocol->store(tmp.ptr(), tmp.length(), &my_charset_bin);
3252     }
3253 
3254     // Replicate_Do_Domain_Ids & Replicate_Ignore_Domain_Ids
3255     mi->domain_id_filter.store_ids(thd);
3256 
3257     // Parallel_Mode
3258     {
3259       const char *mode_name= get_type(&slave_parallel_mode_typelib,
3260                                       mi->parallel_mode);
3261       protocol->store(mode_name, strlen(mode_name), &my_charset_bin);
3262     }
3263 
3264     protocol->store((uint32) mi->rli.get_sql_delay());
3265     // SQL_Remaining_Delay
3266     // THD::proc_info is not protected by any lock, so we read it once
3267     // to ensure that we use the same value throughout this function.
3268     const char *slave_sql_running_state=
3269       mi->rli.sql_driver_thd ? mi->rli.sql_driver_thd->proc_info : "";
3270     if (slave_sql_running_state == Relay_log_info::state_delaying_string)
3271     {
3272       time_t t= my_time(0), sql_delay_end= mi->rli.get_sql_delay_end();
3273       protocol->store((uint32)(t < sql_delay_end ? sql_delay_end - t : 0));
3274     }
3275     else
3276       protocol->store_null();
3277     // Slave_SQL_Running_State
3278     protocol->store(slave_sql_running_state, &my_charset_bin);
3279 
3280     protocol->store(mi->total_ddl_groups);
3281     protocol->store(mi->total_non_trans_groups);
3282     protocol->store(mi->total_trans_groups);
3283 
3284     if (full)
3285     {
3286       protocol->store((uint32)    mi->rli.retried_trans);
3287       protocol->store((ulonglong) mi->rli.max_relay_log_size);
3288       protocol->store(mi->rli.executed_entries);
3289       protocol->store((uint32)    mi->received_heartbeats);
3290       protocol->store((double)    mi->heartbeat_period, 3, &tmp);
3291       protocol->store(gtid_pos->ptr(), gtid_pos->length(), &my_charset_bin);
3292     }
3293 
3294     mysql_mutex_unlock(&mi->rli.err_lock);
3295     mysql_mutex_unlock(&mi->err_lock);
3296     mysql_mutex_unlock(&mi->rli.data_lock);
3297     mysql_mutex_unlock(&mi->data_lock);
3298 
3299     if (my_net_write(&thd->net, (uchar*) thd->packet.ptr(), packet->length()))
3300       DBUG_RETURN(TRUE);
3301   }
3302   DBUG_RETURN(FALSE);
3303 }
3304 
3305 
3306 /* Used to sort connections by name */
3307 
3308 static int cmp_mi_by_name(const Master_info **arg1,
3309                           const Master_info **arg2)
3310 {
3311   return my_strcasecmp(system_charset_info, (*arg1)->connection_name.str,
3312                        (*arg2)->connection_name.str);
3313 }
3314 
3315 
3316 /**
3317   Execute a SHOW FULL SLAVE STATUS statement.
3318 
3319   @param thd Pointer to THD object for the client thread executing the
3320   statement.
3321 
3322   Elements are sorted according to the original connection_name.
3323 
3324   @retval FALSE success
3325   @retval TRUE failure
3326 
3327   @note
3328   master_info_index is protected by LOCK_active_mi.
3329 */
3330 
3331 bool show_all_master_info(THD* thd)
3332 {
3333   uint i, elements;
3334   String gtid_pos;
3335   Master_info **tmp;
3336   List<Item> field_list;
3337   DBUG_ENTER("show_master_info");
3338   mysql_mutex_assert_owner(&LOCK_active_mi);
3339 
3340   gtid_pos.length(0);
3341   if (rpl_append_gtid_state(&gtid_pos, true))
3342   {
3343     my_error(ER_OUT_OF_RESOURCES, MYF(0));
3344     DBUG_RETURN(TRUE);
3345   }
3346 
3347   show_master_info_get_fields(thd, &field_list, 1, gtid_pos.length());
3348   if (thd->protocol->send_result_set_metadata(&field_list,
3349                        Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
3350     DBUG_RETURN(TRUE);
3351 
3352   if (!master_info_index ||
3353       !(elements= master_info_index->master_info_hash.records))
3354     goto end;
3355 
3356   /*
3357     Sort lines to get them into a predicted order
3358     (needed for test cases and to not confuse users)
3359   */
3360   if (!(tmp= (Master_info**) thd->alloc(sizeof(Master_info*) * elements)))
3361     DBUG_RETURN(TRUE);
3362 
3363   for (i= 0; i < elements; i++)
3364   {
3365     tmp[i]= (Master_info *) my_hash_element(&master_info_index->
3366                                             master_info_hash, i);
3367   }
3368   my_qsort(tmp, elements, sizeof(Master_info*), (qsort_cmp) cmp_mi_by_name);
3369 
3370   for (i= 0; i < elements; i++)
3371   {
3372     if (send_show_master_info_data(thd, tmp[i], 1, &gtid_pos))
3373       DBUG_RETURN(TRUE);
3374   }
3375 
3376 end:
3377   my_eof(thd);
3378   DBUG_RETURN(FALSE);
3379 }
3380 
3381 
3382 void set_slave_thread_options(THD* thd)
3383 {
3384   DBUG_ENTER("set_slave_thread_options");
3385   /*
3386      It's nonsense to constrain the slave threads with max_join_size; if a
3387      query succeeded on master, we HAVE to execute it. So set
3388      OPTION_BIG_SELECTS. Setting max_join_size to HA_POS_ERROR is not enough
3389      (and it's not needed if we have OPTION_BIG_SELECTS) because an INSERT
3390      SELECT examining more than 4 billion rows would still fail (yes, because
3391      when max_join_size is 4G, OPTION_BIG_SELECTS is automatically set, but
3392      only for client threads.
3393   */
3394   ulonglong options= thd->variables.option_bits | OPTION_BIG_SELECTS;
3395   if (opt_log_slave_updates)
3396     options|= OPTION_BIN_LOG;
3397   else
3398     options&= ~OPTION_BIN_LOG;
3399   thd->variables.option_bits= options;
3400   thd->variables.completion_type= 0;
3401 
3402   /* For easier test in LOGGER::log_command */
3403   if (thd->variables.log_disabled_statements & LOG_DISABLE_SLAVE)
3404     thd->variables.option_bits|= OPTION_LOG_OFF;
3405 
3406   thd->variables.sql_log_slow= !MY_TEST(thd->variables.log_slow_disabled_statements &
3407                                         LOG_SLOW_DISABLE_SLAVE);
3408   DBUG_VOID_RETURN;
3409 }
3410 
3411 void set_slave_thread_default_charset(THD* thd, rpl_group_info *rgi)
3412 {
3413   DBUG_ENTER("set_slave_thread_default_charset");
3414 
3415   thd->variables.collation_server=
3416     global_system_variables.collation_server;
3417   thd->update_charset(global_system_variables.character_set_client,
3418                       global_system_variables.collation_connection);
3419 
3420   thd->system_thread_info.rpl_sql_info->cached_charset_invalidate();
3421   DBUG_VOID_RETURN;
3422 }
3423 
3424 /*
3425   init_slave_thread()
3426 */
3427 
3428 static int init_slave_thread(THD* thd, Master_info *mi,
3429                              SLAVE_THD_TYPE thd_type)
3430 {
3431   DBUG_ENTER("init_slave_thread");
3432   int simulate_error __attribute__((unused))= 0;
3433   DBUG_EXECUTE_IF("simulate_io_slave_error_on_init",
3434                   simulate_error|= (1 << SLAVE_THD_IO););
3435   DBUG_EXECUTE_IF("simulate_sql_slave_error_on_init",
3436                   simulate_error|= (1 << SLAVE_THD_SQL););
3437 
3438   thd->system_thread = (thd_type == SLAVE_THD_SQL) ?
3439     SYSTEM_THREAD_SLAVE_SQL : SYSTEM_THREAD_SLAVE_IO;
3440 
3441   /* We must call store_globals() before doing my_net_init() */
3442   if (init_thr_lock() || thd->store_globals() ||
3443       my_net_init(&thd->net, 0, thd, MYF(MY_THREAD_SPECIFIC)) ||
3444       IF_DBUG(simulate_error & (1<< thd_type), 0))
3445   {
3446     thd->cleanup();
3447     DBUG_RETURN(-1);
3448   }
3449 
3450   thd->security_ctx->skip_grants();
3451   thd->slave_thread= 1;
3452   thd->connection_name= mi->connection_name;
3453   thd->variables.sql_log_slow= !MY_TEST(thd->variables.log_slow_disabled_statements & LOG_SLOW_DISABLE_SLAVE);
3454   set_slave_thread_options(thd);
3455 
3456   if (thd_type == SLAVE_THD_SQL)
3457     THD_STAGE_INFO(thd, stage_waiting_for_the_next_event_in_relay_log);
3458   else
3459     THD_STAGE_INFO(thd, stage_waiting_for_master_update);
3460   thd->set_time();
3461   /* Do not use user-supplied timeout value for system threads. */
3462   thd->variables.lock_wait_timeout= LONG_TIMEOUT;
3463   DBUG_RETURN(0);
3464 }
3465 
3466 /*
3467   Sleep for a given amount of time or until killed.
3468 
3469   @param thd        Thread context of the current thread.
3470   @param seconds    The number of seconds to sleep.
3471   @param func       Function object to check if the thread has been killed.
3472   @param info       The Rpl_info object associated with this sleep.
3473 
3474   @retval True if the thread has been killed, false otherwise.
3475 */
3476 template <typename killed_func, typename rpl_info>
3477 static bool slave_sleep(THD *thd, time_t seconds,
3478                         killed_func func, rpl_info info)
3479 {
3480 
3481   bool ret;
3482   struct timespec abstime;
3483 
3484   mysql_mutex_t *lock= &info->sleep_lock;
3485   mysql_cond_t *cond= &info->sleep_cond;
3486 
3487   /* Absolute system time at which the sleep time expires. */
3488   set_timespec(abstime, seconds);
3489   mysql_mutex_lock(lock);
3490   thd->ENTER_COND(cond, lock, NULL, NULL);
3491 
3492   while (! (ret= func(info)))
3493   {
3494     int error= mysql_cond_timedwait(cond, lock, &abstime);
3495     if (error == ETIMEDOUT || error == ETIME)
3496       break;
3497   }
3498   /* Implicitly unlocks the mutex. */
3499   thd->EXIT_COND(NULL);
3500   return ret;
3501 }
3502 
3503 
3504 static int request_dump(THD *thd, MYSQL* mysql, Master_info* mi,
3505 			bool *suppress_warnings)
3506 {
3507   uchar buf[FN_REFLEN + 10];
3508   int len;
3509   ushort binlog_flags = 0; // for now
3510   char* logname = mi->master_log_name;
3511   DBUG_ENTER("request_dump");
3512 
3513   *suppress_warnings= FALSE;
3514 
3515   if (opt_log_slave_updates && opt_replicate_annotate_row_events)
3516     binlog_flags|= BINLOG_SEND_ANNOTATE_ROWS_EVENT;
3517 
3518   if (repl_semisync_slave.request_transmit(mi))
3519     DBUG_RETURN(1);
3520 
3521   // TODO if big log files: Change next to int8store()
3522   int4store(buf, (ulong) mi->master_log_pos);
3523   int2store(buf + 4, binlog_flags);
3524   int4store(buf + 6, global_system_variables.server_id);
3525   len = (uint) strlen(logname);
3526   memcpy(buf + 10, logname,len);
3527   if (simple_command(mysql, COM_BINLOG_DUMP, buf, len + 10, 1))
3528   {
3529     /*
3530       Something went wrong, so we will just reconnect and retry later
3531       in the future, we should do a better error analysis, but for
3532       now we just fill up the error log :-)
3533     */
3534     if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED ||
3535         mysql_errno(mysql) == ER_NET_ERROR_ON_WRITE)
3536       *suppress_warnings= TRUE;                 // Suppress reconnect warning
3537     else
3538       sql_print_error("Error on COM_BINLOG_DUMP: %d  %s, will retry in %d secs",
3539                       mysql_errno(mysql), mysql_error(mysql),
3540                       mi->connect_retry);
3541     DBUG_RETURN(1);
3542   }
3543 
3544   DBUG_RETURN(0);
3545 }
3546 
3547 
3548 /*
3549   Read one event from the master
3550 
3551   SYNOPSIS
3552     read_event()
3553     mysql               MySQL connection
3554     mi                  Master connection information
3555     suppress_warnings   TRUE when a normal net read timeout has caused us to
3556                         try a reconnect.  We do not want to print anything to
3557                         the error log in this case because this a anormal
3558                         event in an idle server.
3559     network_read_len    get the real network read length in VIO, especially using compressed protocol
3560 
3561     RETURN VALUES
3562     'packet_error'      Error
3563     number              Length of packet
3564 */
3565 
3566 static ulong read_event(MYSQL* mysql, Master_info *mi, bool* suppress_warnings,
3567                         ulong* network_read_len)
3568 {
3569   ulong len;
3570   DBUG_ENTER("read_event");
3571 
3572   *suppress_warnings= FALSE;
3573   /*
3574     my_real_read() will time us out
3575     We check if we were told to die, and if not, try reading again
3576   */
3577 #ifndef DBUG_OFF
3578   if (disconnect_slave_event_count && !(mi->events_till_disconnect--))
3579     DBUG_RETURN(packet_error);
3580 #endif
3581 
3582   len = cli_safe_read_reallen(mysql, network_read_len);
3583   if (unlikely(len == packet_error || (long) len < 1))
3584   {
3585     if (mysql_errno(mysql) == ER_NET_READ_INTERRUPTED)
3586     {
3587       /*
3588         We are trying a normal reconnect after a read timeout;
3589         we suppress prints to .err file as long as the reconnect
3590         happens without problems
3591       */
3592       *suppress_warnings=
3593         global_system_variables.log_warnings < 2 ? TRUE : FALSE;
3594     }
3595     else
3596     {
3597       if (!mi->rli.abort_slave)
3598       {
3599         sql_print_error("Error reading packet from server: %s (server_errno=%d)",
3600                         mysql_error(mysql), mysql_errno(mysql));
3601       }
3602     }
3603     DBUG_RETURN(packet_error);
3604   }
3605 
3606   /* Check if eof packet */
3607   if (len < 8 && mysql->net.read_pos[0] == 254)
3608   {
3609     sql_print_information("Slave: received end packet from server, apparent "
3610                           "master shutdown: %s",
3611                      mysql_error(mysql));
3612      DBUG_RETURN(packet_error);
3613   }
3614 
3615   DBUG_PRINT("exit", ("len: %lu  net->read_pos[4]: %d",
3616                       len, mysql->net.read_pos[4]));
3617   DBUG_RETURN(len - 1);
3618 }
3619 
3620 
3621 /**
3622   Check if the current error is of temporary nature of not.
3623   Some errors are temporary in nature, such as
3624   ER_LOCK_DEADLOCK and ER_LOCK_WAIT_TIMEOUT.
3625 
3626   @retval 0 if fatal error
3627   @retval 1 temporary error, do retry
3628 */
3629 
3630 int
3631 has_temporary_error(THD *thd)
3632 {
3633   uint current_errno;
3634   DBUG_ENTER("has_temporary_error");
3635 
3636   DBUG_EXECUTE_IF("all_errors_are_temporary_errors",
3637                   if (thd->get_stmt_da()->is_error())
3638                   {
3639                     thd->clear_error();
3640                     my_error(ER_LOCK_DEADLOCK, MYF(0));
3641                   });
3642 
3643   /*
3644     If there is no message in THD, we can't say if it's a temporary
3645     error or not. This is currently the case for Incident_log_event,
3646     which sets no message. Return FALSE.
3647   */
3648   if (!likely(thd->is_error()))
3649     DBUG_RETURN(0);
3650 
3651   current_errno= thd->get_stmt_da()->sql_errno();
3652   for (uint i= 0; i < slave_transaction_retry_error_length; i++)
3653   {
3654     if (current_errno == slave_transaction_retry_errors[i])
3655       DBUG_RETURN(1);
3656   }
3657 
3658   DBUG_RETURN(0);
3659 }
3660 
3661 
3662 /**
3663   If this is a lagging slave (specified with CHANGE MASTER TO MASTER_DELAY = X), delays accordingly. Also unlocks rli->data_lock.
3664 
3665   Design note: this is the place to unlock rli->data_lock. The lock
3666   must be held when reading delay info from rli, but it should not be
3667   held while sleeping.
3668 
3669   @param ev Event that is about to be executed.
3670 
3671   @param thd The sql thread's THD object.
3672 
3673   @param rli The sql thread's Relay_log_info structure.
3674 
3675   @retval 0 If the delay timed out and the event shall be executed.
3676 
3677   @retval nonzero If the delay was interrupted and the event shall be skipped.
3678 */
3679 int
3680 sql_delay_event(Log_event *ev, THD *thd, rpl_group_info *rgi)
3681 {
3682   Relay_log_info* rli= rgi->rli;
3683   long sql_delay= rli->get_sql_delay();
3684 
3685   DBUG_ENTER("sql_delay_event");
3686   mysql_mutex_assert_owner(&rli->data_lock);
3687   DBUG_ASSERT(!rli->belongs_to_client());
3688 
3689   int type= ev->get_type_code();
3690   if (sql_delay && type != ROTATE_EVENT &&
3691       type != FORMAT_DESCRIPTION_EVENT && type != START_EVENT_V3)
3692   {
3693     // The time when we should execute the event.
3694     time_t sql_delay_end=
3695       ev->when + rli->mi->clock_diff_with_master + sql_delay;
3696     // The current time.
3697     time_t now= my_time(0);
3698     // The time we will have to sleep before executing the event.
3699     unsigned long nap_time= 0;
3700     if (sql_delay_end > now)
3701       nap_time= (ulong)(sql_delay_end - now);
3702 
3703     DBUG_PRINT("info", ("sql_delay= %lu "
3704                         "ev->when= %lu "
3705                         "rli->mi->clock_diff_with_master= %lu "
3706                         "now= %ld "
3707                         "sql_delay_end= %llu "
3708                         "nap_time= %ld",
3709                         sql_delay, (long)ev->when,
3710                         rli->mi->clock_diff_with_master,
3711                         (long)now, (ulonglong)sql_delay_end, (long)nap_time));
3712 
3713     if (sql_delay_end > now)
3714     {
3715       DBUG_PRINT("info", ("delaying replication event %lu secs",
3716                           nap_time));
3717       rli->start_sql_delay(sql_delay_end);
3718       mysql_mutex_unlock(&rli->data_lock);
3719       DBUG_RETURN(slave_sleep(thd, nap_time, sql_slave_killed, rgi));
3720     }
3721   }
3722 
3723   mysql_mutex_unlock(&rli->data_lock);
3724 
3725   DBUG_RETURN(0);
3726 }
3727 
3728 
3729 /*
3730   First half of apply_event_and_update_pos(), see below.
3731   Setup some THD variables for applying the event.
3732 
3733   Split out so that it can run with rli->data_lock held in non-parallel
3734   replication, but without the mutex held in the parallel case.
3735 */
3736 static int
3737 apply_event_and_update_pos_setup(Log_event* ev, THD* thd, rpl_group_info *rgi)
3738 {
3739   DBUG_ENTER("apply_event_and_update_pos_setup");
3740 
3741   DBUG_PRINT("exec_event",("%s(type_code: %d; server_id: %d)",
3742                            ev->get_type_str(), ev->get_type_code(),
3743                            ev->server_id));
3744   DBUG_PRINT("info", ("thd->options: '%s%s%s'  rgi->last_event_start_time: %lu",
3745                       FLAGSTR(thd->variables.option_bits, OPTION_NOT_AUTOCOMMIT),
3746                       FLAGSTR(thd->variables.option_bits, OPTION_BEGIN),
3747                       FLAGSTR(thd->variables.option_bits, OPTION_GTID_BEGIN),
3748                       (ulong) rgi->last_event_start_time));
3749 
3750   /*
3751     Execute the event to change the database and update the binary
3752     log coordinates, but first we set some data that is needed for
3753     the thread.
3754 
3755     The event will be executed unless it is supposed to be skipped.
3756 
3757     Queries originating from this server must be skipped.  Low-level
3758     events (Format_description_log_event, Rotate_log_event,
3759     Stop_log_event) from this server must also be skipped. But for
3760     those we don't want to modify 'group_master_log_pos', because
3761     these events did not exist on the master.
3762     Format_description_log_event is not completely skipped.
3763 
3764     Skip queries specified by the user in 'slave_skip_counter'.  We
3765     can't however skip events that has something to do with the log
3766     files themselves.
3767 
3768     Filtering on own server id is extremely important, to ignore
3769     execution of events created by the creation/rotation of the relay
3770     log (remember that now the relay log starts with its Format_desc,
3771     has a Rotate etc).
3772   */
3773 
3774   /* Use the original server id for logging. */
3775   thd->variables.server_id = ev->server_id;
3776   thd->set_time();                            // time the query
3777   thd->lex->current_select= 0;
3778   thd->variables.option_bits=
3779     (thd->variables.option_bits & ~OPTION_SKIP_REPLICATION) |
3780     (ev->flags & LOG_EVENT_SKIP_REPLICATION_F ? OPTION_SKIP_REPLICATION : 0);
3781   ev->thd = thd; // because up to this point, ev->thd == 0
3782 
3783   DBUG_RETURN(ev->shall_skip(rgi));
3784 }
3785 
3786 
3787 /*
3788   Second half of apply_event_and_update_pos(), see below.
3789 
3790   Do the actual event apply (or skip), and position update.
3791  */
3792 static int
3793 apply_event_and_update_pos_apply(Log_event* ev, THD* thd, rpl_group_info *rgi,
3794                                  int reason)
3795 {
3796   int exec_res= 0;
3797   Relay_log_info* rli= rgi->rli;
3798 
3799   DBUG_ENTER("apply_event_and_update_pos_apply");
3800   DBUG_EXECUTE_IF("inject_slave_sql_before_apply_event",
3801     {
3802       DBUG_ASSERT(!debug_sync_set_action
3803                   (thd, STRING_WITH_LEN("now WAIT_FOR continue")));
3804       DBUG_SET_INITIAL("-d,inject_slave_sql_before_apply_event");
3805     };);
3806   if (reason == Log_event::EVENT_SKIP_NOT)
3807     exec_res= ev->apply_event(rgi);
3808 
3809 #ifdef WITH_WSREP
3810   if (WSREP(thd)) {
3811 
3812     if (exec_res) {
3813       mysql_mutex_lock(&thd->LOCK_thd_data);
3814       switch(thd->wsrep_trx().state()) {
3815       case wsrep::transaction::s_must_replay:
3816         /* this transaction will be replayed,
3817            so not raising slave error here */
3818         WSREP_DEBUG("SQL apply failed for MUST_REPLAY, res %d", exec_res);
3819 	exec_res = 0;
3820         break;
3821       default:
3822           WSREP_DEBUG("SQL apply failed, res %d conflict state: %s",
3823                       exec_res, wsrep_thd_transaction_state_str(thd));
3824           rli->abort_slave= 1;
3825           rli->report(ERROR_LEVEL, ER_UNKNOWN_COM_ERROR, rgi->gtid_info(),
3826                       "Node has dropped from cluster");
3827           break;
3828       }
3829       mysql_mutex_unlock(&thd->LOCK_thd_data);
3830     }
3831   }
3832 #endif
3833 
3834 #ifndef DBUG_OFF
3835   /*
3836     This only prints information to the debug trace.
3837 
3838     TODO: Print an informational message to the error log?
3839   */
3840   static const char *const explain[] = {
3841     // EVENT_SKIP_NOT,
3842     "not skipped",
3843     // EVENT_SKIP_IGNORE,
3844     "skipped because event should be ignored",
3845     // EVENT_SKIP_COUNT
3846     "skipped because event skip counter was non-zero"
3847   };
3848   DBUG_PRINT("info", ("OPTION_BEGIN: %d  IN_STMT: %d  IN_TRANSACTION: %d",
3849                       MY_TEST(thd->variables.option_bits & OPTION_BEGIN),
3850                       rli->get_flag(Relay_log_info::IN_STMT),
3851                       rli->get_flag(Relay_log_info::IN_TRANSACTION)));
3852   DBUG_PRINT("skip_event", ("%s event was %s",
3853                             ev->get_type_str(), explain[reason]));
3854 #endif
3855 
3856   DBUG_PRINT("info", ("apply_event error = %d", exec_res));
3857   if (exec_res == 0)
3858   {
3859     int error= ev->update_pos(rgi);
3860  #ifndef DBUG_OFF
3861     DBUG_PRINT("info", ("update_pos error = %d", error));
3862     if (!rli->belongs_to_client())
3863     {
3864       DBUG_PRINT("info", ("group %llu %s", rli->group_relay_log_pos,
3865                           rli->group_relay_log_name));
3866       DBUG_PRINT("info", ("event %llu %s", rli->event_relay_log_pos,
3867                           rli->event_relay_log_name));
3868     }
3869 #endif
3870     /*
3871       The update should not fail, so print an error message and
3872       return an error code.
3873 
3874       TODO: Replace this with a decent error message when merged
3875       with BUG#24954 (which adds several new error message).
3876     */
3877     if (unlikely(error))
3878     {
3879       rli->report(ERROR_LEVEL, ER_UNKNOWN_ERROR, rgi->gtid_info(),
3880                   "It was not possible to update the positions"
3881                   " of the relay log information: the slave may"
3882                   " be in an inconsistent state."
3883                   " Stopped in %s position %llu",
3884                   rli->group_relay_log_name, rli->group_relay_log_pos);
3885       DBUG_RETURN(2);
3886     }
3887   }
3888   else
3889   {
3890     /*
3891       Make sure we do not erroneously update gtid_slave_pos with a lingering
3892       GTID from this failed event group (MDEV-4906).
3893     */
3894     rgi->gtid_pending= false;
3895   }
3896 
3897   DBUG_RETURN(exec_res ? 1 : 0);
3898 }
3899 
3900 
3901 /**
3902   Applies the given event and advances the relay log position.
3903 
3904   This is needed by the sql thread to execute events from the binlog,
3905   and by clients executing BINLOG statements.  Conceptually, this
3906   function does:
3907 
3908   @code
3909     ev->apply_event(rli);
3910     ev->update_pos(rli);
3911   @endcode
3912 
3913   It also does the following maintainance:
3914 
3915    - Initializes the thread's server_id and time; and the event's
3916      thread.
3917 
3918    - If !rli->belongs_to_client() (i.e., if it belongs to the slave
3919      sql thread instead of being used for executing BINLOG
3920      statements), it does the following things: (1) skips events if it
3921      is needed according to the server id or slave_skip_counter; (2)
3922      unlocks rli->data_lock; (3) sleeps if required by 'CHANGE MASTER
3923      TO MASTER_DELAY=X'; (4) maintains the running state of the sql
3924      thread (rli->thread_state).
3925 
3926    - Reports errors as needed.
3927 
3928   @param ev The event to apply.
3929 
3930   @param thd The client thread that executes the event (i.e., the
3931   slave sql thread if called from a replication slave, or the client
3932   thread if called to execute a BINLOG statement).
3933 
3934   @param rli The relay log info (i.e., the slave's rli if called from
3935   a replication slave, or the client's thd->rli_fake if called to
3936   execute a BINLOG statement).
3937 
3938   @retval 0 OK.
3939 
3940   @retval 1 Error calling ev->apply_event().
3941 
3942   @retval 2 No error calling ev->apply_event(), but error calling
3943   ev->update_pos().
3944 
3945   This function is only used in non-parallel replication, where it is called
3946   with rli->data_lock held; this lock is released during this function.
3947 */
3948 int
3949 apply_event_and_update_pos(Log_event* ev, THD* thd, rpl_group_info *rgi)
3950 {
3951   Relay_log_info* rli= rgi->rli;
3952   mysql_mutex_assert_owner(&rli->data_lock);
3953   int reason= apply_event_and_update_pos_setup(ev, thd, rgi);
3954   if (reason == Log_event::EVENT_SKIP_COUNT)
3955   {
3956     DBUG_ASSERT(rli->slave_skip_counter > 0);
3957     rli->slave_skip_counter--;
3958   }
3959 
3960   if (reason == Log_event::EVENT_SKIP_NOT)
3961   {
3962     // Sleeps if needed, and unlocks rli->data_lock.
3963     if (sql_delay_event(ev, thd, rgi))
3964       return 0;
3965   }
3966   else
3967     mysql_mutex_unlock(&rli->data_lock);
3968 
3969   return apply_event_and_update_pos_apply(ev, thd, rgi, reason);
3970 }
3971 
3972 
3973 /*
3974   The version of above apply_event_and_update_pos() used in parallel
3975   replication. Unlike the non-parallel case, this function is called without
3976   rli->data_lock held.
3977 */
3978 int
3979 apply_event_and_update_pos_for_parallel(Log_event* ev, THD* thd,
3980                                         rpl_group_info *rgi)
3981 {
3982   mysql_mutex_assert_not_owner(&rgi->rli->data_lock);
3983   int reason= apply_event_and_update_pos_setup(ev, thd, rgi);
3984   /*
3985     In parallel replication, sql_slave_skip_counter is handled in the SQL
3986     driver thread, so 23 should never see EVENT_SKIP_COUNT here.
3987   */
3988   DBUG_ASSERT(reason != Log_event::EVENT_SKIP_COUNT);
3989   /*
3990     Calling sql_delay_event() was handled in the SQL driver thread when
3991     doing parallel replication.
3992   */
3993   return apply_event_and_update_pos_apply(ev, thd, rgi, reason);
3994 }
3995 
3996 
3997 /**
3998    Keep the relay log transaction state up to date.
3999 
4000    The state reflects how things are after the given event, that has just been
4001    read from the relay log, is executed.
4002 
4003    This is only needed to ensure we:
4004    - Don't abort the sql driver thread in the middle of an event group.
4005    - Don't rotate the io thread in the middle of a statement or transaction.
4006      The mechanism is that the io thread, when it needs to rotate the relay
4007      log, will wait until the sql driver has read all the cached events
4008      and then continue reading events one by one from the master until
4009      the sql threads signals that log doesn't have an active group anymore.
4010 
4011      There are two possible cases. We keep them as 2 separate flags mainly
4012      to make debugging easier.
4013 
4014      - IN_STMT is set when we have read an event that should be used
4015        together with the next event.  This is for example setting a
4016        variable that is used when executing the next statement.
4017      - IN_TRANSACTION is set when we are inside a BEGIN...COMMIT group
4018 
4019      To test the state one should use the is_in_group() function.
4020 */
4021 
4022 inline void update_state_of_relay_log(Relay_log_info *rli, Log_event *ev)
4023 {
4024   Log_event_type typ= ev->get_type_code();
4025 
4026   /* check if we are in a multi part event */
4027   if (ev->is_part_of_group())
4028     rli->set_flag(Relay_log_info::IN_STMT);
4029   else if (Log_event::is_group_event(typ))
4030   {
4031     /*
4032       If it was not a is_part_of_group() and not a group event (like
4033       rotate) then we can reset the IN_STMT flag.  We have the above
4034       if only to allow us to have a rotate element anywhere.
4035     */
4036     rli->clear_flag(Relay_log_info::IN_STMT);
4037   }
4038 
4039   /* Check for an event that starts or stops a transaction */
4040   if (LOG_EVENT_IS_QUERY(typ))
4041   {
4042     Query_log_event *qev= (Query_log_event*) ev;
4043     /*
4044       Trivial optimization to avoid the following somewhat expensive
4045       checks.
4046     */
4047     if (qev->q_len <= sizeof("ROLLBACK"))
4048     {
4049       if (qev->is_begin())
4050         rli->set_flag(Relay_log_info::IN_TRANSACTION);
4051       if (qev->is_commit() || qev->is_rollback())
4052         rli->clear_flag(Relay_log_info::IN_TRANSACTION);
4053     }
4054   }
4055   if (typ == XID_EVENT)
4056     rli->clear_flag(Relay_log_info::IN_TRANSACTION);
4057   if (typ == GTID_EVENT &&
4058       !(((Gtid_log_event*) ev)->flags2 & Gtid_log_event::FL_STANDALONE))
4059   {
4060     /* This GTID_EVENT will generate a BEGIN event */
4061     rli->set_flag(Relay_log_info::IN_TRANSACTION);
4062   }
4063 
4064   DBUG_PRINT("info", ("event: %u  IN_STMT: %d  IN_TRANSACTION: %d",
4065                       (uint) typ,
4066                       rli->get_flag(Relay_log_info::IN_STMT),
4067                       rli->get_flag(Relay_log_info::IN_TRANSACTION)));
4068 }
4069 
4070 
4071 /**
4072   Top-level function for executing the next event in the relay log.
4073   This is called from the SQL thread.
4074 
4075   This function reads the event from the relay log, executes it, and
4076   advances the relay log position.  It also handles errors, etc.
4077 
4078   This function may fail to apply the event for the following reasons:
4079 
4080    - The position specfied by the UNTIL condition of the START SLAVE
4081      command is reached.
4082 
4083    - It was not possible to read the event from the log.
4084 
4085    - The slave is killed.
4086 
4087    - An error occurred when applying the event, and the event has been
4088      tried slave_trans_retries times.  If the event has been retried
4089      fewer times, 0 is returned.
4090 
4091    - init_master_info or init_relay_log_pos failed. (These are called
4092      if a failure occurs when applying the event.)
4093 
4094    - An error occurred when updating the binlog position.
4095 
4096   @retval 0 The event was applied.
4097 
4098   @retval 1 The event was not applied.
4099 */
4100 
4101 static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
4102                                 rpl_group_info *serial_rgi)
4103 {
4104   ulonglong event_size;
4105   DBUG_ENTER("exec_relay_log_event");
4106 
4107   /*
4108     We acquire this mutex since we need it for all operations except
4109     event execution. But we will release it in places where we will
4110     wait for something for example inside of next_event().
4111   */
4112   mysql_mutex_lock(&rli->data_lock);
4113 
4114   Log_event *ev= next_event(serial_rgi, &event_size);
4115 
4116   if (sql_slave_killed(serial_rgi))
4117   {
4118     mysql_mutex_unlock(&rli->data_lock);
4119     delete ev;
4120     DBUG_RETURN(1);
4121   }
4122   if (ev)
4123   {
4124 #ifdef WITH_WSREP
4125     if (wsrep_before_statement(thd))
4126     {
4127       mysql_mutex_unlock(&rli->data_lock);
4128       delete ev;
4129       WSREP_INFO("Wsrep before statement error");
4130       DBUG_RETURN(1);
4131     }
4132 #endif /* WITH_WSREP */
4133     int exec_res;
4134     Log_event_type typ= ev->get_type_code();
4135 
4136     /*
4137       Even if we don't execute this event, we keep the master timestamp,
4138       so that seconds behind master shows correct delta (there are events
4139       that are not replayed, so we keep falling behind).
4140 
4141       If it is an artificial event, or a relay log event (IO thread generated
4142       event) or ev->when is set to 0, we don't update the
4143       last_master_timestamp.
4144 
4145       In parallel replication, we might queue a large number of events, and
4146       the user might be surprised to see a claim that the slave is up to date
4147       long before those queued events are actually executed.
4148      */
4149     if (!rli->mi->using_parallel() &&
4150         !(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0)))
4151     {
4152       rli->last_master_timestamp= ev->when + (time_t) ev->exec_time;
4153       DBUG_ASSERT(rli->last_master_timestamp >= 0);
4154     }
4155 
4156     /*
4157       This tests if the position of the beginning of the current event
4158       hits the UNTIL barrier.
4159     */
4160     if ((rli->until_condition == Relay_log_info::UNTIL_MASTER_POS ||
4161          rli->until_condition == Relay_log_info::UNTIL_RELAY_POS) &&
4162         (ev->server_id != global_system_variables.server_id ||
4163          rli->replicate_same_server_id) &&
4164         rli->is_until_satisfied(ev))
4165     {
4166       /*
4167         Setting abort_slave flag because we do not want additional
4168         message about error in query execution to be printed.
4169       */
4170       rli->abort_slave= 1;
4171       rli->stop_for_until= true;
4172       mysql_mutex_unlock(&rli->data_lock);
4173 #ifdef WITH_WSREP
4174       wsrep_after_statement(thd);
4175 #endif /* WITH_WSREP */
4176       delete ev;
4177       DBUG_RETURN(1);
4178     }
4179 
4180     { /**
4181          The following failure injecion works in cooperation with tests
4182          setting @@global.debug= 'd,incomplete_group_in_relay_log'.
4183          Xid or Commit events are not executed to force the slave sql
4184          read hanging if the realy log does not have any more events.
4185       */
4186       DBUG_EXECUTE_IF("incomplete_group_in_relay_log",
4187                       if ((typ == XID_EVENT) ||
4188                           (LOG_EVENT_IS_QUERY(typ) &&
4189                            strcmp("COMMIT", ((Query_log_event *) ev)->query) == 0))
4190                       {
4191                         DBUG_ASSERT(thd->transaction.all.modified_non_trans_table);
4192                         rli->abort_slave= 1;
4193                         mysql_mutex_unlock(&rli->data_lock);
4194                         delete ev;
4195                         serial_rgi->inc_event_relay_log_pos();
4196                         DBUG_RETURN(0);
4197                       };);
4198     }
4199 
4200     update_state_of_relay_log(rli, ev);
4201 
4202     if (rli->mi->using_parallel())
4203     {
4204       int res= rli->parallel.do_event(serial_rgi, ev, event_size);
4205       /*
4206         In parallel replication, we need to update the relay log position
4207         immediately so that it will be the correct position from which to
4208         read the next event.
4209       */
4210       if (res == 0)
4211         rli->event_relay_log_pos= rli->future_event_relay_log_pos;
4212       if (res >= 0)
4213       {
4214 #ifdef WITH_WSREP
4215 	wsrep_after_statement(thd);
4216 #endif /* WITH_WSREP */
4217         DBUG_RETURN(res);
4218       }
4219       /*
4220         Else we proceed to execute the event non-parallel.
4221         This is the case for pre-10.0 events without GTID, and for handling
4222         slave_skip_counter.
4223       */
4224       if (!(ev->is_artificial_event() || ev->is_relay_log_event() || (ev->when == 0)))
4225       {
4226         /*
4227           Ignore FD's timestamp as it does not reflect the slave execution
4228           state but likely to reflect a deep past. Consequently when the first
4229           data modification event execution last long all this time
4230           Seconds_Behind_Master is zero.
4231         */
4232         if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
4233           rli->last_master_timestamp= ev->when + (time_t) ev->exec_time;
4234 
4235         DBUG_ASSERT(rli->last_master_timestamp >= 0);
4236       }
4237     }
4238 
4239     if (typ == GTID_EVENT)
4240     {
4241       Gtid_log_event *gev= static_cast<Gtid_log_event *>(ev);
4242 
4243       /*
4244         For GTID, allocate a new sub_id for the given domain_id.
4245         The sub_id must be allocated in increasing order of binlog order.
4246       */
4247       if (event_group_new_gtid(serial_rgi, gev))
4248       {
4249         sql_print_error("Error reading relay log event: %s", "slave SQL thread "
4250                         "aborted because of out-of-memory error");
4251         mysql_mutex_unlock(&rli->data_lock);
4252         delete ev;
4253 #ifdef WITH_WSREP
4254 	  wsrep_after_statement(thd);
4255 #endif /* WITH_WSREP */
4256         DBUG_RETURN(1);
4257       }
4258 
4259       if (opt_gtid_ignore_duplicates &&
4260           rli->mi->using_gtid != Master_info::USE_GTID_NO)
4261       {
4262         int res= rpl_global_gtid_slave_state->check_duplicate_gtid
4263           (&serial_rgi->current_gtid, serial_rgi);
4264         if (res < 0)
4265         {
4266           sql_print_error("Error processing GTID event: %s", "slave SQL "
4267                           "thread aborted because of out-of-memory error");
4268           mysql_mutex_unlock(&rli->data_lock);
4269           delete ev;
4270 #ifdef WITH_WSREP
4271           wsrep_after_statement(thd);
4272 #endif /* WITH_WSREP */
4273           DBUG_RETURN(1);
4274         }
4275         /*
4276           If we need to skip this event group (because the GTID was already
4277           applied), then do it using the code for slave_skip_counter, which
4278           is able to handle skipping until the end of the event group.
4279         */
4280         if (!res)
4281           rli->slave_skip_counter= 1;
4282       }
4283     }
4284 
4285     serial_rgi->future_event_relay_log_pos= rli->future_event_relay_log_pos;
4286     serial_rgi->event_relay_log_name= rli->event_relay_log_name;
4287     serial_rgi->event_relay_log_pos= rli->event_relay_log_pos;
4288     exec_res= apply_event_and_update_pos(ev, thd, serial_rgi);
4289 
4290 #ifdef WITH_WSREP
4291     WSREP_DEBUG("apply_event_and_update_pos() result: %d", exec_res);
4292 #endif /* WITH_WSREP */
4293 
4294     delete_or_keep_event_post_apply(serial_rgi, typ, ev);
4295 
4296     /*
4297       update_log_pos failed: this should not happen, so we don't
4298       retry.
4299     */
4300     if (unlikely(exec_res == 2))
4301     {
4302 #ifdef WITH_WSREP
4303       wsrep_after_statement(thd);
4304 #endif /* WITH_WSREP */
4305       DBUG_RETURN(1);
4306     }
4307 #ifdef WITH_WSREP
4308     mysql_mutex_lock(&thd->LOCK_thd_data);
4309     enum wsrep::client_error wsrep_error= thd->wsrep_cs().current_error();
4310     mysql_mutex_unlock(&thd->LOCK_thd_data);
4311     if (wsrep_error == wsrep::e_success)
4312 #endif /* WITH_WSREP */
4313     if (slave_trans_retries)
4314     {
4315       int UNINIT_VAR(temp_err);
4316       if (unlikely(exec_res) && (temp_err= has_temporary_error(thd)))
4317       {
4318         const char *errmsg;
4319         rli->clear_error();
4320         /*
4321           We were in a transaction which has been rolled back because of a
4322           temporary error;
4323           let's seek back to BEGIN log event and retry it all again.
4324           Note, if lock wait timeout (innodb_lock_wait_timeout exceeded)
4325           there is no rollback since 5.0.13 (ref: manual).
4326           We have to not only seek but also
4327 
4328           a) init_master_info(), to seek back to hot relay log's start
4329           for later (for when we will come back to this hot log after
4330           re-processing the possibly existing old logs where BEGIN is:
4331           check_binlog_magic() will then need the cache to be at
4332           position 0 (see comments at beginning of
4333           init_master_info()).
4334           b) init_relay_log_pos(), because the BEGIN may be an older relay log.
4335         */
4336         if (serial_rgi->trans_retries < slave_trans_retries)
4337         {
4338           if (init_master_info(rli->mi, 0, 0, 0, SLAVE_SQL))
4339             sql_print_error("Failed to initialize the master info structure");
4340           else if (init_relay_log_pos(rli,
4341                                       rli->group_relay_log_name,
4342                                       rli->group_relay_log_pos,
4343                                       1, &errmsg, 1))
4344             sql_print_error("Error initializing relay log position: %s",
4345                             errmsg);
4346           else
4347           {
4348             exec_res= 0;
4349             serial_rgi->cleanup_context(thd, 1);
4350             /* chance for concurrent connection to get more locks */
4351             slave_sleep(thd, MY_MAX(MY_MIN(serial_rgi->trans_retries,
4352                                     MAX_SLAVE_RETRY_PAUSE),
4353                                     slave_trans_retry_interval),
4354                        sql_slave_killed, serial_rgi);
4355             serial_rgi->trans_retries++;
4356             mysql_mutex_lock(&rli->data_lock); // because of SHOW STATUS
4357             rli->retried_trans++;
4358             statistic_increment(slave_retried_transactions, LOCK_status);
4359             mysql_mutex_unlock(&rli->data_lock);
4360             DBUG_PRINT("info", ("Slave retries transaction "
4361                                 "rgi->trans_retries: %lu",
4362                                 serial_rgi->trans_retries));
4363           }
4364         }
4365         else
4366           sql_print_error("Slave SQL thread retried transaction %lu time(s) "
4367                           "in vain, giving up. Consider raising the value of "
4368                           "the slave_transaction_retries variable.",
4369                           slave_trans_retries);
4370       }
4371       else if ((exec_res && !temp_err) ||
4372                (opt_using_transactions &&
4373                 rli->group_relay_log_pos == rli->event_relay_log_pos))
4374       {
4375         /*
4376           Only reset the retry counter if the entire group succeeded
4377           or failed with a non-transient error.  On a successful
4378           event, the execution will proceed as usual; in the case of a
4379           non-transient error, the slave will stop with an error.
4380          */
4381         serial_rgi->trans_retries= 0; // restart from fresh
4382         DBUG_PRINT("info", ("Resetting retry counter, rgi->trans_retries: %lu",
4383                             serial_rgi->trans_retries));
4384       }
4385     }
4386 
4387     rli->executed_entries++;
4388 #ifdef WITH_WSREP
4389     wsrep_after_statement(thd);
4390 #endif /* WITH_WSREP */
4391     DBUG_EXECUTE_IF(
4392         "pause_sql_thread_on_fde",
4393         if (ev && typ == FORMAT_DESCRIPTION_EVENT) {
4394           DBUG_ASSERT(!debug_sync_set_action(
4395               thd,
4396               STRING_WITH_LEN(
4397                   "now SIGNAL paused_on_fde WAIT_FOR sql_thread_continue")));
4398         });
4399 
4400     DBUG_RETURN(exec_res);
4401   }
4402   mysql_mutex_unlock(&rli->data_lock);
4403   rli->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_READ_FAILURE, NULL,
4404               ER_THD(thd, ER_SLAVE_RELAY_LOG_READ_FAILURE), "\
4405 Could not parse relay log event entry. The possible reasons are: the master's \
4406 binary log is corrupted (you can check this by running 'mysqlbinlog' on the \
4407 binary log), the slave's relay log is corrupted (you can check this by running \
4408 'mysqlbinlog' on the relay log), a network problem, or a bug in the master's \
4409 or slave's MySQL code. If you want to check the master's binary log or slave's \
4410 relay log, you will be able to know their names by issuing 'SHOW SLAVE STATUS' \
4411 on this slave.\
4412 ");
4413   DBUG_RETURN(1);
4414 }
4415 
4416 
4417 static bool check_io_slave_killed(Master_info *mi, const char *info)
4418 {
4419   if (io_slave_killed(mi))
4420   {
4421     if (info && global_system_variables.log_warnings)
4422       sql_print_information("%s", info);
4423     return TRUE;
4424   }
4425   return FALSE;
4426 }
4427 
4428 /**
4429   @brief Try to reconnect slave IO thread.
4430 
4431   @details Terminates current connection to master, sleeps for
4432   @c mi->connect_retry msecs and initiates new connection with
4433   @c safe_reconnect(). Variable pointed by @c retry_count is increased -
4434   if it exceeds @c master_retry_count then connection is not re-established
4435   and function signals error.
4436   Unless @c suppres_warnings is TRUE, a warning is put in the server error log
4437   when reconnecting. The warning message and messages used to report errors
4438   are taken from @c messages array. In case @c master_retry_count is exceeded,
4439   no messages are added to the log.
4440 
4441   @param[in]     thd                 Thread context.
4442   @param[in]     mysql               MySQL connection.
4443   @param[in]     mi                  Master connection information.
4444   @param[in,out] retry_count         Number of attempts to reconnect.
4445   @param[in]     suppress_warnings   TRUE when a normal net read timeout
4446                                      has caused to reconnecting.
4447   @param[in]     messages            Messages to print/log, see
4448                                      reconnect_messages[] array.
4449 
4450   @retval        0                   OK.
4451   @retval        1                   There was an error.
4452 */
4453 
4454 static int try_to_reconnect(THD *thd, MYSQL *mysql, Master_info *mi,
4455                             uint *retry_count, bool suppress_warnings,
4456                             const char *messages[SLAVE_RECON_MSG_MAX])
4457 {
4458   mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT;
4459   thd->proc_info= messages[SLAVE_RECON_MSG_WAIT];
4460 #ifdef SIGNAL_WITH_VIO_CLOSE
4461   thd->clear_active_vio();
4462 #endif
4463   end_server(mysql);
4464   if ((*retry_count)++)
4465   {
4466     if (*retry_count > master_retry_count)
4467       return 1;                             // Don't retry forever
4468     slave_sleep(thd, mi->connect_retry, io_slave_killed, mi);
4469   }
4470   if (check_io_slave_killed(mi, messages[SLAVE_RECON_MSG_KILLED_WAITING]))
4471     return 1;
4472   thd->proc_info = messages[SLAVE_RECON_MSG_AFTER];
4473   if (!suppress_warnings)
4474   {
4475     char buf[256];
4476     StringBuffer<100> tmp;
4477     if (mi->using_gtid != Master_info::USE_GTID_NO)
4478     {
4479       tmp.append(STRING_WITH_LEN("; GTID position '"));
4480       mi->gtid_current_pos.append_to_string(&tmp);
4481       if (mi->events_queued_since_last_gtid == 0)
4482         tmp.append(STRING_WITH_LEN("'"));
4483       else
4484       {
4485         tmp.append(STRING_WITH_LEN("', GTID event skip "));
4486         tmp.append_ulonglong((ulonglong)mi->events_queued_since_last_gtid);
4487       }
4488     }
4489     my_snprintf(buf, sizeof(buf), messages[SLAVE_RECON_MSG_FAILED],
4490                 IO_RPL_LOG_NAME, mi->master_log_pos,
4491                 tmp.c_ptr_safe());
4492     /*
4493       Raise a warining during registering on master/requesting dump.
4494       Log a message reading event.
4495     */
4496     if (messages[SLAVE_RECON_MSG_COMMAND][0])
4497     {
4498       mi->report(WARNING_LEVEL, ER_SLAVE_MASTER_COM_FAILURE, NULL,
4499                  ER_THD(thd, ER_SLAVE_MASTER_COM_FAILURE),
4500                  messages[SLAVE_RECON_MSG_COMMAND], buf);
4501     }
4502     else
4503     {
4504       sql_print_information("%s", buf);
4505     }
4506   }
4507   if (safe_reconnect(thd, mysql, mi, 1) || io_slave_killed(mi))
4508   {
4509     if (global_system_variables.log_warnings)
4510       sql_print_information("%s", messages[SLAVE_RECON_MSG_KILLED_AFTER]);
4511     return 1;
4512   }
4513   return 0;
4514 }
4515 
4516 
4517 /**
4518   Slave IO thread entry point.
4519 
4520   @param arg Pointer to Master_info struct that holds information for
4521   the IO thread.
4522 
4523   @return Always 0.
4524 */
4525 pthread_handler_t handle_slave_io(void *arg)
4526 {
4527   THD *thd; // needs to be first for thread_stack
4528   MYSQL *mysql;
4529   Master_info *mi = (Master_info*)arg;
4530   Relay_log_info *rli= &mi->rli;
4531   uint retry_count;
4532   bool suppress_warnings;
4533   int ret;
4534   rpl_io_thread_info io_info;
4535 #ifndef DBUG_OFF
4536   mi->dbug_do_disconnect= false;
4537 #endif
4538   // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
4539   my_thread_init();
4540   DBUG_ENTER("handle_slave_io");
4541 
4542   DBUG_ASSERT(mi->inited);
4543   mysql= NULL ;
4544   retry_count= 0;
4545 
4546   thd= new THD(next_thread_id()); // note that contructor of THD uses DBUG_ !
4547 
4548   mysql_mutex_lock(&mi->run_lock);
4549   /* Inform waiting threads that slave has started */
4550   mi->slave_run_id++;
4551 
4552 #ifndef DBUG_OFF
4553   mi->events_till_disconnect = disconnect_slave_event_count;
4554 #endif
4555 
4556   THD_CHECK_SENTRY(thd);
4557   mi->io_thd = thd;
4558 
4559   pthread_detach_this_thread();
4560   thd->thread_stack= (char*) &thd; // remember where our stack is
4561   mi->clear_error();
4562   if (init_slave_thread(thd, mi, SLAVE_THD_IO))
4563   {
4564     mysql_cond_broadcast(&mi->start_cond);
4565     sql_print_error("Failed during slave I/O thread initialization");
4566     goto err_during_init;
4567   }
4568   thd->system_thread_info.rpl_io_info= &io_info;
4569   server_threads.insert(thd);
4570   mi->slave_running = MYSQL_SLAVE_RUN_NOT_CONNECT;
4571   mi->abort_slave = 0;
4572   mysql_mutex_unlock(&mi->run_lock);
4573   mysql_cond_broadcast(&mi->start_cond);
4574   mi->rows_event_tracker.reset();
4575 
4576   DBUG_PRINT("master_info",("log_file_name: '%s'  position: %llu",
4577                             mi->master_log_name, mi->master_log_pos));
4578 
4579   /* This must be called before run any binlog_relay_io hooks */
4580   my_pthread_setspecific_ptr(RPL_MASTER_INFO, mi);
4581 
4582   /* Load the set of seen GTIDs, if we did not already. */
4583   if (rpl_load_gtid_slave_state(thd))
4584   {
4585     mi->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL,
4586                 "Unable to load replication GTID slave state from mysql.%s: %s",
4587                 rpl_gtid_slave_state_table_name.str,
4588                 thd->get_stmt_da()->message());
4589     /*
4590       If we are using old-style replication, we can continue, even though we
4591       then will not be able to record the GTIDs we receive. But if using GTID,
4592       we must give up.
4593     */
4594     if (mi->using_gtid != Master_info::USE_GTID_NO || opt_gtid_strict_mode)
4595       goto err;
4596   }
4597 
4598   thd->variables.wsrep_on= 0;
4599   if (DBUG_EVALUATE_IF("failed_slave_start", 1, 0)
4600       || repl_semisync_slave.slave_start(mi))
4601   {
4602     mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
4603                ER_THD(thd, ER_SLAVE_FATAL_ERROR),
4604                "Failed to run 'thread_start' hook");
4605     goto err;
4606   }
4607 
4608   if (!(mi->mysql = mysql = mysql_init(NULL)))
4609   {
4610     mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
4611                ER_THD(thd, ER_SLAVE_FATAL_ERROR), "error in mysql_init()");
4612     goto err;
4613   }
4614 
4615   THD_STAGE_INFO(thd, stage_connecting_to_master);
4616   // we can get killed during safe_connect
4617   if (!safe_connect(thd, mysql, mi))
4618   {
4619     if (mi->using_gtid == Master_info::USE_GTID_NO)
4620       sql_print_information("Slave I/O thread: connected to master '%s@%s:%d',"
4621                             "replication started in log '%s' at position %llu",
4622                             mi->user, mi->host, mi->port,
4623                             IO_RPL_LOG_NAME, mi->master_log_pos);
4624     else
4625     {
4626       StringBuffer<100> tmp;
4627       mi->gtid_current_pos.to_string(&tmp);
4628       sql_print_information("Slave I/O thread: connected to master '%s@%s:%d',"
4629                             "replication starts at GTID position '%s'",
4630                             mi->user, mi->host, mi->port, tmp.c_ptr_safe());
4631     }
4632   }
4633   else
4634   {
4635     sql_print_information("Slave I/O thread killed while connecting to master");
4636     goto err;
4637   }
4638 
4639 connected:
4640 
4641   if (mi->using_gtid != Master_info::USE_GTID_NO)
4642   {
4643     /*
4644       When the IO thread (re)connects to the master using GTID, it will
4645       connect at the start of an event group. But the IO thread may have
4646       previously logged part of the following event group to the relay
4647       log.
4648 
4649       When the IO and SQL thread are started together, we erase any previous
4650       relay logs, but this is not possible/desirable while the SQL thread is
4651       running. To avoid duplicating partial event groups in the relay logs in
4652       this case, we remember the count of events in any partially logged event
4653       group before the reconnect, and then here at connect we set up a counter
4654       to skip the already-logged part of the group.
4655     */
4656     mi->gtid_reconnect_event_skip_count= mi->events_queued_since_last_gtid;
4657     mi->gtid_event_seen= false;
4658     /*
4659       Reset stale state of the rows-event group tracker at reconnect.
4660     */
4661     mi->rows_event_tracker.reset();
4662   }
4663 
4664 #ifdef ENABLED_DEBUG_SYNC
4665     DBUG_EXECUTE_IF("dbug.before_get_running_status_yes",
4666                     {
4667                       const char act[]=
4668                         "now "
4669                         "wait_for signal.io_thread_let_running";
4670                       DBUG_ASSERT(debug_sync_service);
4671                       DBUG_ASSERT(!debug_sync_set_action(thd,
4672                                                          STRING_WITH_LEN(act)));
4673                     };);
4674 #endif
4675 
4676   mysql_mutex_lock(&mi->run_lock);
4677   mi->slave_running= MYSQL_SLAVE_RUN_CONNECT;
4678   mysql_mutex_unlock(&mi->run_lock);
4679 
4680   thd->slave_net = &mysql->net;
4681   THD_STAGE_INFO(thd, stage_checking_master_version);
4682   ret= get_master_version_and_clock(mysql, mi);
4683   if (ret == 1)
4684     /* Fatal error */
4685     goto err;
4686 
4687   if (ret == 2)
4688   {
4689     if (check_io_slave_killed(mi, "Slave I/O thread killed "
4690                               "while calling get_master_version_and_clock(...)"))
4691       goto err;
4692     suppress_warnings= FALSE;
4693     /*
4694       Try to reconnect because the error was caused by a transient network
4695       problem
4696     */
4697     if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4698                              reconnect_messages[SLAVE_RECON_ACT_REG]))
4699       goto err;
4700     goto connected;
4701   }
4702 
4703   if (mi->rli.relay_log.description_event_for_queue->binlog_version > 1)
4704   {
4705     /*
4706       Register ourselves with the master.
4707     */
4708     THD_STAGE_INFO(thd, stage_registering_slave_on_master);
4709     if (register_slave_on_master(mysql, mi, &suppress_warnings))
4710     {
4711       if (!check_io_slave_killed(mi, "Slave I/O thread killed "
4712                                 "while registering slave on master"))
4713       {
4714         sql_print_error("Slave I/O thread couldn't register on master");
4715         if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4716                              reconnect_messages[SLAVE_RECON_ACT_REG]))
4717           goto err;
4718       }
4719       else
4720         goto err;
4721       goto connected;
4722     }
4723     DBUG_EXECUTE_IF("fail_com_register_slave", goto err;);
4724   }
4725 
4726   DBUG_PRINT("info",("Starting reading binary log from master"));
4727   thd->set_command(COM_SLAVE_IO);
4728   while (!io_slave_killed(mi))
4729   {
4730     THD_STAGE_INFO(thd, stage_requesting_binlog_dump);
4731     if (request_dump(thd, mysql, mi, &suppress_warnings))
4732     {
4733       sql_print_error("Failed on request_dump()");
4734       if (check_io_slave_killed(mi, NullS) ||
4735         try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4736                          reconnect_messages[SLAVE_RECON_ACT_DUMP]))
4737         goto err;
4738       goto connected;
4739     }
4740 
4741     const char *event_buf;
4742 
4743     mi->slave_running= MYSQL_SLAVE_RUN_READING;
4744     DBUG_ASSERT(mi->last_error().number == 0);
4745     ulonglong lastchecktime = my_hrtime().val;
4746     ulonglong tokenamount   = opt_read_binlog_speed_limit*1024;
4747     while (!io_slave_killed(mi))
4748     {
4749       ulong event_len, network_read_len = 0;
4750       /*
4751          We say "waiting" because read_event() will wait if there's nothing to
4752          read. But if there's something to read, it will not wait. The
4753          important thing is to not confuse users by saying "reading" whereas
4754          we're in fact receiving nothing.
4755       */
4756       THD_STAGE_INFO(thd, stage_waiting_for_master_to_send_event);
4757       event_len= read_event(mysql, mi, &suppress_warnings, &network_read_len);
4758       if (check_io_slave_killed(mi, NullS))
4759         goto err;
4760 
4761       if (unlikely(event_len == packet_error))
4762       {
4763         uint mysql_error_number= mysql_errno(mysql);
4764         switch (mysql_error_number) {
4765         case CR_NET_PACKET_TOO_LARGE:
4766           sql_print_error("\
4767 Log entry on master is longer than slave_max_allowed_packet (%lu) on \
4768 slave. If the entry is correct, restart the server with a higher value of \
4769 slave_max_allowed_packet",
4770                          slave_max_allowed_packet);
4771           mi->report(ERROR_LEVEL, ER_NET_PACKET_TOO_LARGE, NULL,
4772                      "%s", "Got a packet bigger than 'slave_max_allowed_packet' bytes");
4773           goto err;
4774         case ER_MASTER_FATAL_ERROR_READING_BINLOG:
4775           mi->report(ERROR_LEVEL, ER_MASTER_FATAL_ERROR_READING_BINLOG, NULL,
4776                      ER_THD(thd, ER_MASTER_FATAL_ERROR_READING_BINLOG),
4777                      mysql_error_number, mysql_error(mysql));
4778           goto err;
4779         case ER_OUT_OF_RESOURCES:
4780           sql_print_error("\
4781 Stopping slave I/O thread due to out-of-memory error from master");
4782           mi->report(ERROR_LEVEL, ER_OUT_OF_RESOURCES, NULL,
4783                      "%s", ER_THD(thd, ER_OUT_OF_RESOURCES));
4784           goto err;
4785         }
4786         if (try_to_reconnect(thd, mysql, mi, &retry_count, suppress_warnings,
4787                              reconnect_messages[SLAVE_RECON_ACT_EVENT]))
4788           goto err;
4789         goto connected;
4790       } // if (event_len == packet_error)
4791 
4792       retry_count=0;                    // ok event, reset retry counter
4793       THD_STAGE_INFO(thd, stage_queueing_master_event_to_the_relay_log);
4794       event_buf= (const char*)mysql->net.read_pos + 1;
4795       mi->semi_ack= 0;
4796       if (repl_semisync_slave.
4797           slave_read_sync_header((const char*)mysql->net.read_pos + 1, event_len,
4798                                  &(mi->semi_ack), &event_buf, &event_len))
4799       {
4800         mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
4801                    ER_THD(thd, ER_SLAVE_FATAL_ERROR),
4802                    "Failed to run 'after_read_event' hook");
4803         goto err;
4804       }
4805 
4806       /* Control the binlog read speed of master
4807          when read_binlog_speed_limit is non-zero
4808       */
4809       ulonglong speed_limit_in_bytes = opt_read_binlog_speed_limit * 1024;
4810       if (speed_limit_in_bytes)
4811       {
4812         /* Prevent the tokenamount become a large value,
4813            for example, the IO thread doesn't work for a long time
4814         */
4815         if (tokenamount > speed_limit_in_bytes * 2)
4816         {
4817           lastchecktime = my_hrtime().val;
4818           tokenamount = speed_limit_in_bytes * 2;
4819         }
4820 
4821         do
4822         {
4823           ulonglong currenttime = my_hrtime().val;
4824           tokenamount += (currenttime - lastchecktime) * speed_limit_in_bytes / (1000*1000);
4825           lastchecktime = currenttime;
4826           if(tokenamount < network_read_len)
4827           {
4828             ulonglong duration =1000ULL*1000 * (network_read_len - tokenamount) / speed_limit_in_bytes;
4829             time_t second_time = (time_t)(duration / (1000 * 1000));
4830             uint micro_time = duration % (1000 * 1000);
4831 
4832             // at least sleep 1000 micro second
4833             my_sleep(MY_MAX(micro_time,1000));
4834 
4835             /*
4836               If it sleep more than one second,
4837               it should use slave_sleep() to avoid the STOP SLAVE hang.
4838             */
4839             if (second_time)
4840               slave_sleep(thd, second_time, io_slave_killed, mi);
4841 
4842           }
4843         }while(tokenamount < network_read_len);
4844         tokenamount -= network_read_len;
4845       }
4846 
4847       if (queue_event(mi, event_buf, event_len))
4848       {
4849         mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL,
4850                    ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
4851                    "could not queue event from master");
4852         goto err;
4853       }
4854 
4855       if (rpl_semi_sync_slave_status && (mi->semi_ack & SEMI_SYNC_NEED_ACK))
4856       {
4857         /*
4858           We deliberately ignore the error in slave_reply, such error should
4859           not cause the slave IO thread to stop, and the error messages are
4860           already reported.
4861         */
4862         (void)repl_semisync_slave.slave_reply(mi);
4863       }
4864 
4865       if (mi->using_gtid == Master_info::USE_GTID_NO &&
4866           /*
4867             If rpl_semi_sync_slave_delay_master is enabled, we will flush
4868             master info only when ack is needed. This may lead to at least one
4869             group transaction delay but affords better performance improvement.
4870           */
4871           (!repl_semisync_slave.get_slave_enabled() ||
4872            (!(mi->semi_ack & SEMI_SYNC_SLAVE_DELAY_SYNC) ||
4873             (mi->semi_ack & (SEMI_SYNC_NEED_ACK)))) &&
4874           (DBUG_EVALUATE_IF("failed_flush_master_info", 1, 0) ||
4875            flush_master_info(mi, TRUE, TRUE)))
4876       {
4877         sql_print_error("Failed to flush master info file");
4878         goto err;
4879       }
4880       /*
4881         See if the relay logs take too much space.
4882         We don't lock mi->rli.log_space_lock here; this dirty read saves time
4883         and does not introduce any problem:
4884         - if mi->rli.ignore_log_space_limit is 1 but becomes 0 just after (so
4885         the clean value is 0), then we are reading only one more event as we
4886         should, and we'll block only at the next event. No big deal.
4887         - if mi->rli.ignore_log_space_limit is 0 but becomes 1 just
4888         after (so the clean value is 1), then we are going into
4889         wait_for_relay_log_space() for no reason, but this function
4890         will do a clean read, notice the clean value and exit
4891         immediately.
4892       */
4893 #ifndef DBUG_OFF
4894       {
4895         DBUG_PRINT("info", ("log_space_limit=%llu log_space_total=%llu "
4896                             "ignore_log_space_limit=%d",
4897                             rli->log_space_limit, uint64(rli->log_space_total),
4898                             (int) rli->ignore_log_space_limit));
4899       }
4900 #endif
4901 
4902       if (rli->log_space_limit && rli->log_space_limit <
4903           rli->log_space_total &&
4904           !rli->ignore_log_space_limit)
4905         if (wait_for_relay_log_space(rli))
4906         {
4907           sql_print_error("Slave I/O thread aborted while waiting for relay \
4908 log space");
4909           goto err;
4910         }
4911     }
4912   }
4913 
4914   // error = 0;
4915 err:
4916   // print the current replication position
4917   if (mi->using_gtid == Master_info::USE_GTID_NO)
4918   {
4919     sql_print_information("Slave I/O thread exiting, read up to log '%s', "
4920                           "position %llu", IO_RPL_LOG_NAME, mi->master_log_pos);
4921     sql_print_information("master was %s:%d", mi->host, mi->port);
4922   }
4923   else
4924   {
4925     StringBuffer<100> tmp;
4926     mi->gtid_current_pos.to_string(&tmp);
4927     sql_print_information("Slave I/O thread exiting, read up to log '%s', "
4928                           "position %llu; GTID position %s",
4929                           IO_RPL_LOG_NAME, mi->master_log_pos,
4930                           tmp.c_ptr_safe());
4931     sql_print_information("master was %s:%d", mi->host, mi->port);
4932   }
4933   repl_semisync_slave.slave_stop(mi);
4934   thd->reset_query();
4935   thd->reset_db(&null_clex_str);
4936   if (mysql)
4937   {
4938     /*
4939       Here we need to clear the active VIO before closing the
4940       connection with the master.  The reason is that THD::awake()
4941       might be called from terminate_slave_thread() because somebody
4942       issued a STOP SLAVE.  If that happends, the close_active_vio()
4943       can be called in the middle of closing the VIO associated with
4944       the 'mysql' object, causing a crash.
4945     */
4946 #ifdef SIGNAL_WITH_VIO_CLOSE
4947     thd->clear_active_vio();
4948 #endif
4949     mysql_close(mysql);
4950     mi->mysql=0;
4951   }
4952   write_ignored_events_info_to_relay_log(thd, mi);
4953   if (mi->using_gtid != Master_info::USE_GTID_NO)
4954     flush_master_info(mi, TRUE, TRUE);
4955   THD_STAGE_INFO(thd, stage_waiting_for_slave_mutex_on_exit);
4956   thd->add_status_to_global();
4957   server_threads.erase(thd);
4958   mysql_mutex_lock(&mi->run_lock);
4959 
4960 err_during_init:
4961   /* Forget the relay log's format */
4962   delete mi->rli.relay_log.description_event_for_queue;
4963   mi->rli.relay_log.description_event_for_queue= 0;
4964   // TODO: make rpl_status part of Master_info
4965   change_rpl_status(RPL_ACTIVE_SLAVE,RPL_IDLE_SLAVE);
4966 
4967   thd->assert_not_linked();
4968   delete thd;
4969 
4970   mi->abort_slave= 0;
4971   mi->slave_running= MYSQL_SLAVE_NOT_RUN;
4972   mi->io_thd= 0;
4973   /*
4974     Note: the order of the two following calls (first broadcast, then unlock)
4975     is important. Otherwise a killer_thread can execute between the calls and
4976     delete the mi structure leading to a crash! (see BUG#25306 for details)
4977    */
4978   mysql_cond_broadcast(&mi->stop_cond);       // tell the world we are done
4979   DBUG_EXECUTE_IF("simulate_slave_delay_at_terminate_bug38694", sleep(5););
4980   mysql_mutex_unlock(&mi->run_lock);
4981 
4982   DBUG_LEAVE;                                   // Must match DBUG_ENTER()
4983   my_thread_end();
4984   ERR_remove_state(0);
4985   pthread_exit(0);
4986   return 0;                                     // Avoid compiler warnings
4987 }
4988 
4989 /*
4990   Check the temporary directory used by commands like
4991   LOAD DATA INFILE.
4992 
4993   As the directory never changes during a mysqld run, we only
4994   test this once and cache the result. This also resolve a race condition
4995   when this can be run by multiple threads at the same time.
4996  */
4997 
4998 static bool check_temp_dir_run= 0;
4999 static int check_temp_dir_result= 0;
5000 
5001 static
5002 int check_temp_dir(char* tmp_file)
5003 {
5004   File fd;
5005   int result= 1;                                // Assume failure
5006   MY_DIR *dirp;
5007   char tmp_dir[FN_REFLEN];
5008   size_t tmp_dir_size;
5009   DBUG_ENTER("check_temp_dir");
5010 
5011   /* This look is safe to use as this function is only called once */
5012   mysql_mutex_lock(&LOCK_start_thread);
5013   if (check_temp_dir_run)
5014   {
5015     if ((result= check_temp_dir_result))
5016       my_message(result, tmp_file, MYF(0));
5017     goto end;
5018   }
5019   check_temp_dir_run= 1;
5020 
5021   /*
5022     Get the directory from the temporary file.
5023   */
5024   dirname_part(tmp_dir, tmp_file, &tmp_dir_size);
5025 
5026   /*
5027     Check if the directory exists.
5028    */
5029   if (!(dirp=my_dir(tmp_dir,MYF(MY_WME))))
5030     goto end;
5031   my_dirend(dirp);
5032 
5033   /*
5034     Check permissions to create a file. We use O_TRUNC to ensure that
5035     things works even if we happen to have and old file laying around.
5036    */
5037   if ((fd= mysql_file_create(key_file_misc,
5038                              tmp_file, CREATE_MODE,
5039                              O_WRONLY | O_BINARY | O_TRUNC | O_NOFOLLOW,
5040                              MYF(MY_WME))) < 0)
5041     goto end;
5042 
5043   result= 0;                                    // Directory name ok
5044   /*
5045     Clean up.
5046    */
5047   mysql_file_close(fd, MYF(0));
5048   mysql_file_delete(key_file_misc, tmp_file, MYF(0));
5049 
5050 end:
5051   mysql_mutex_unlock(&LOCK_start_thread);
5052   DBUG_RETURN(result);
5053 }
5054 
5055 
5056 void
5057 slave_output_error_info(rpl_group_info *rgi, THD *thd)
5058 {
5059   /*
5060     retrieve as much info as possible from the thd and, error
5061     codes and warnings and print this to the error log as to
5062     allow the user to locate the error
5063   */
5064   Relay_log_info *rli= rgi->rli;
5065   uint32 const last_errno= rli->last_error().number;
5066 
5067   if (unlikely(thd->is_error()))
5068   {
5069     char const *const errmsg= thd->get_stmt_da()->message();
5070 
5071     DBUG_PRINT("info",
5072                ("thd->get_stmt_da()->sql_errno()=%d; rli->last_error.number=%d",
5073                 thd->get_stmt_da()->sql_errno(), last_errno));
5074     if (last_errno == 0)
5075     {
5076       /*
5077         This function is reporting an error which was not reported
5078         while executing exec_relay_log_event().
5079       */
5080       rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(),
5081                   rgi->gtid_info(), "%s", errmsg);
5082     }
5083     else if (last_errno != thd->get_stmt_da()->sql_errno())
5084     {
5085       /*
5086        * An error was reported while executing exec_relay_log_event()
5087        * however the error code differs from what is in the thread.
5088        * This function prints out more information to help finding
5089        * what caused the problem.
5090        */
5091       sql_print_error("Slave (additional info): %s Error_code: %d",
5092                       errmsg, thd->get_stmt_da()->sql_errno());
5093     }
5094   }
5095 
5096   /* Print any warnings issued */
5097   Diagnostics_area::Sql_condition_iterator it=
5098     thd->get_stmt_da()->sql_conditions();
5099   const Sql_condition *err;
5100   /*
5101     Added controlled slave thread cancel for replication
5102     of user-defined variables.
5103   */
5104   bool udf_error = false;
5105   while ((err= it++))
5106   {
5107     if (err->get_sql_errno() == ER_CANT_OPEN_LIBRARY)
5108       udf_error = true;
5109     sql_print_warning("Slave: %s Error_code: %d", err->get_message_text(), err->get_sql_errno());
5110   }
5111   if (unlikely(udf_error))
5112   {
5113     StringBuffer<100> tmp;
5114     if (rli->mi->using_gtid != Master_info::USE_GTID_NO)
5115     {
5116       tmp.append(STRING_WITH_LEN("; GTID position '"));
5117       rpl_append_gtid_state(&tmp, false);
5118       tmp.append(STRING_WITH_LEN("'"));
5119     }
5120     sql_print_error("Error loading user-defined library, slave SQL "
5121       "thread aborted. Install the missing library, and restart the "
5122       "slave SQL thread with \"SLAVE START\". We stopped at log '%s' "
5123       "position %llu%s", RPL_LOG_NAME, rli->group_master_log_pos,
5124       tmp.c_ptr_safe());
5125   }
5126   else
5127   {
5128     StringBuffer<100> tmp;
5129     if (rli->mi->using_gtid != Master_info::USE_GTID_NO)
5130     {
5131       tmp.append(STRING_WITH_LEN("; GTID position '"));
5132       rpl_append_gtid_state(&tmp, false);
5133       tmp.append(STRING_WITH_LEN("'"));
5134     }
5135     sql_print_error("Error running query, slave SQL thread aborted. "
5136                     "Fix the problem, and restart the slave SQL thread "
5137                     "with \"SLAVE START\". We stopped at log '%s' position "
5138                     "%llu%s", RPL_LOG_NAME, rli->group_master_log_pos,
5139                     tmp.c_ptr_safe());
5140   }
5141 }
5142 
5143 
5144 /**
5145   Slave SQL thread entry point.
5146 
5147   @param arg Pointer to Relay_log_info object that holds information
5148   for the SQL thread.
5149 
5150   @return Always 0.
5151 */
5152 pthread_handler_t handle_slave_sql(void *arg)
5153 {
5154   THD *thd;                     /* needs to be first for thread_stack */
5155   char saved_log_name[FN_REFLEN];
5156   char saved_master_log_name[FN_REFLEN];
5157   my_off_t UNINIT_VAR(saved_log_pos);
5158   my_off_t UNINIT_VAR(saved_master_log_pos);
5159   String saved_skip_gtid_pos;
5160   my_off_t saved_skip= 0;
5161   Master_info *mi= ((Master_info*)arg);
5162   Relay_log_info* rli = &mi->rli;
5163   my_bool wsrep_node_dropped __attribute__((unused)) = FALSE;
5164   const char *errmsg;
5165   rpl_group_info *serial_rgi;
5166   rpl_sql_thread_info sql_info(mi->rpl_filter);
5167 
5168   // needs to call my_thread_init(), otherwise we get a coredump in DBUG_ stuff
5169   my_thread_init();
5170   DBUG_ENTER("handle_slave_sql");
5171 
5172 #ifdef WITH_WSREP
5173  wsrep_restart_point:
5174 #endif
5175 
5176   serial_rgi= new rpl_group_info(rli);
5177   thd = new THD(next_thread_id()); // note that contructor of THD uses DBUG_ !
5178   thd->thread_stack = (char*)&thd; // remember where our stack is
5179   thd->system_thread_info.rpl_sql_info= &sql_info;
5180 
5181   DBUG_ASSERT(rli->inited);
5182   DBUG_ASSERT(rli->mi == mi);
5183   mysql_mutex_lock(&rli->run_lock);
5184   DBUG_ASSERT(!rli->slave_running);
5185   errmsg= 0;
5186 #ifndef DBUG_OFF
5187   rli->events_till_abort = abort_slave_event_count;
5188 #endif
5189 
5190   /*
5191     THD for the sql driver thd. In parallel replication this is the thread
5192     that reads things from the relay log and calls rpl_parallel::do_event()
5193     to execute queries.
5194 
5195     In single thread replication this is the THD for the thread that is
5196     executing SQL queries too.
5197   */
5198   serial_rgi->thd= rli->sql_driver_thd= thd;
5199 
5200   /* Inform waiting threads that slave has started */
5201   rli->slave_run_id++;
5202   rli->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT;
5203 
5204   pthread_detach_this_thread();
5205 
5206   if (opt_slave_parallel_threads > 0 &&
5207       rpl_parallel_activate_pool(&global_rpl_thread_pool))
5208   {
5209     mysql_cond_broadcast(&rli->start_cond);
5210     rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
5211                 "Failed during parallel slave pool activation");
5212     goto err_during_init;
5213   }
5214 
5215   if (init_slave_thread(thd, mi, SLAVE_THD_SQL))
5216   {
5217     /*
5218       TODO: this is currently broken - slave start and change master
5219       will be stuck if we fail here
5220     */
5221     mysql_cond_broadcast(&rli->start_cond);
5222     rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
5223                 "Failed during slave thread initialization");
5224     goto err_during_init;
5225   }
5226   thd->init_for_queries();
5227   thd->rgi_slave= serial_rgi;
5228   if ((serial_rgi->deferred_events_collecting= mi->rpl_filter->is_on()))
5229   {
5230     serial_rgi->deferred_events= new Deferred_log_events(rli);
5231   }
5232 
5233   /*
5234     binlog_annotate_row_events must be TRUE only after an Annotate_rows event
5235     has been received and only till the last corresponding rbr event has been
5236     applied. In all other cases it must be FALSE.
5237   */
5238   thd->variables.binlog_annotate_row_events= 0;
5239 
5240   /* Ensure that slave can exeute any alter table it gets from master */
5241   thd->variables.alter_algorithm= (ulong) Alter_info::ALTER_TABLE_ALGORITHM_DEFAULT;
5242 
5243   server_threads.insert(thd);
5244   /*
5245     We are going to set slave_running to 1. Assuming slave I/O thread is
5246     alive and connected, this is going to make Seconds_Behind_Master be 0
5247     i.e. "caught up". Even if we're just at start of thread. Well it's ok, at
5248     the moment we start we can think we are caught up, and the next second we
5249     start receiving data so we realize we are not caught up and
5250     Seconds_Behind_Master grows. No big deal.
5251   */
5252   rli->abort_slave = 0;
5253   rli->stop_for_until= false;
5254   mysql_mutex_unlock(&rli->run_lock);
5255   mysql_cond_broadcast(&rli->start_cond);
5256 
5257   /*
5258     Reset errors for a clean start (otherwise, if the master is idle, the SQL
5259     thread may execute no Query_log_event, so the error will remain even
5260     though there's no problem anymore). Do not reset the master timestamp
5261     (imagine the slave has caught everything, the STOP SLAVE and START SLAVE:
5262     as we are not sure that we are going to receive a query, we want to
5263     remember the last master timestamp (to say how many seconds behind we are
5264     now.
5265     But the master timestamp is reset by RESET SLAVE & CHANGE MASTER.
5266   */
5267   rli->clear_error();
5268   rli->parallel.reset();
5269 
5270   //tell the I/O thread to take relay_log_space_limit into account from now on
5271   rli->ignore_log_space_limit= 0;
5272 
5273   serial_rgi->gtid_sub_id= 0;
5274   serial_rgi->gtid_pending= false;
5275   if (mi->using_gtid != Master_info::USE_GTID_NO && mi->using_parallel() &&
5276       rli->restart_gtid_pos.count() > 0)
5277   {
5278     /*
5279       With parallel replication in GTID mode, if we have a multi-domain GTID
5280       position, we need to start some way back in the relay log and skip any
5281       GTID that was already applied before. Since event groups can be split
5282       across multiple relay logs, this earlier starting point may be in the
5283       middle of an already applied event group, so we also need to skip any
5284       remaining part of such group.
5285     */
5286     rli->gtid_skip_flag = GTID_SKIP_TRANSACTION;
5287   }
5288   else
5289     rli->gtid_skip_flag = GTID_SKIP_NOT;
5290   if (init_relay_log_pos(rli,
5291                          rli->group_relay_log_name,
5292                          rli->group_relay_log_pos,
5293                          1 /*need data lock*/, &errmsg,
5294                          1 /*look for a description_event*/))
5295   {
5296     rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
5297                 "Error initializing relay log position: %s", errmsg);
5298     goto err_before_start;
5299   }
5300   rli->reset_inuse_relaylog();
5301   if (rli->alloc_inuse_relaylog(rli->group_relay_log_name))
5302     goto err_before_start;
5303 
5304   strcpy(rli->future_event_master_log_name, rli->group_master_log_name);
5305   THD_CHECK_SENTRY(thd);
5306 #ifndef DBUG_OFF
5307   {
5308     DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%llu "
5309                         "rli->event_relay_log_pos=%llu",
5310                         my_b_tell(rli->cur_log), rli->event_relay_log_pos));
5311     DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE);
5312     /*
5313       Wonder if this is correct. I (Guilhem) wonder if my_b_tell() returns the
5314       correct position when it's called just after my_b_seek() (the questionable
5315       stuff is those "seek is done on next read" comments in the my_b_seek()
5316       source code).
5317       The crude reality is that this assertion randomly fails whereas
5318       replication seems to work fine. And there is no easy explanation why it
5319       fails (as we my_b_seek(rli->event_relay_log_pos) at the very end of
5320       init_relay_log_pos() called above). Maybe the assertion would be
5321       meaningful if we held rli->data_lock between the my_b_seek() and the
5322       DBUG_ASSERT().
5323     */
5324 #ifdef SHOULD_BE_CHECKED
5325     DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos);
5326 #endif
5327   }
5328 #endif
5329 
5330   DBUG_PRINT("master_info",("log_file_name: %s  position: %llu",
5331                             rli->group_master_log_name,
5332                             rli->group_master_log_pos));
5333   if (global_system_variables.log_warnings)
5334   {
5335     StringBuffer<100> tmp;
5336     if (mi->using_gtid != Master_info::USE_GTID_NO)
5337     {
5338       tmp.append(STRING_WITH_LEN("; GTID position '"));
5339       rpl_append_gtid_state(&tmp,
5340                             mi->using_gtid==Master_info::USE_GTID_CURRENT_POS);
5341       tmp.append(STRING_WITH_LEN("'"));
5342     }
5343     sql_print_information("Slave SQL thread initialized, starting replication "
5344                           "in log '%s' at position %llu, relay log '%s' "
5345                           "position: %llu%s", RPL_LOG_NAME,
5346                     rli->group_master_log_pos, rli->group_relay_log_name,
5347                     rli->group_relay_log_pos, tmp.c_ptr_safe());
5348   }
5349 
5350   if (check_temp_dir(rli->slave_patternload_file))
5351   {
5352     check_temp_dir_result= thd->get_stmt_da()->sql_errno();
5353     rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL,
5354                 "Unable to use slave's temporary directory %s - %s",
5355                 slave_load_tmpdir, thd->get_stmt_da()->message());
5356     goto err;
5357   }
5358   else
5359     check_temp_dir_result= 0;
5360 
5361   /* Load the set of seen GTIDs, if we did not already. */
5362   if (rpl_load_gtid_slave_state(thd))
5363   {
5364     rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL,
5365                 "Unable to load replication GTID slave state from mysql.%s: %s",
5366                 rpl_gtid_slave_state_table_name.str,
5367                 thd->get_stmt_da()->message());
5368     /*
5369       If we are using old-style replication, we can continue, even though we
5370       then will not be able to record the GTIDs we receive. But if using GTID,
5371       we must give up.
5372     */
5373     if (mi->using_gtid != Master_info::USE_GTID_NO || opt_gtid_strict_mode)
5374       goto err;
5375   }
5376   /* Re-load the set of mysql.gtid_slave_posXXX tables available. */
5377   if (find_gtid_slave_pos_tables(thd))
5378   {
5379     rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL,
5380                 "Error processing replication GTID position tables: %s",
5381                 thd->get_stmt_da()->message());
5382     goto err;
5383   }
5384 
5385   /* execute init_slave variable */
5386   if (opt_init_slave.length)
5387   {
5388     execute_init_command(thd, &opt_init_slave, &LOCK_sys_init_slave);
5389     if (unlikely(thd->is_slave_error))
5390     {
5391       rli->report(ERROR_LEVEL, thd->get_stmt_da()->sql_errno(), NULL,
5392                   "Slave SQL thread aborted. Can't execute init_slave query");
5393       goto err;
5394     }
5395   }
5396 
5397   /*
5398     First check until condition - probably there is nothing to execute. We
5399     do not want to wait for next event in this case.
5400   */
5401   mysql_mutex_lock(&rli->data_lock);
5402   if (rli->slave_skip_counter)
5403   {
5404     strmake_buf(saved_log_name, rli->group_relay_log_name);
5405     strmake_buf(saved_master_log_name, rli->group_master_log_name);
5406     saved_log_pos= rli->group_relay_log_pos;
5407     saved_master_log_pos= rli->group_master_log_pos;
5408     if (mi->using_gtid != Master_info::USE_GTID_NO)
5409     {
5410       saved_skip_gtid_pos.append(STRING_WITH_LEN(", GTID '"));
5411       rpl_append_gtid_state(&saved_skip_gtid_pos, false);
5412       saved_skip_gtid_pos.append(STRING_WITH_LEN("'; "));
5413     }
5414     saved_skip= rli->slave_skip_counter;
5415   }
5416   if ((rli->until_condition == Relay_log_info::UNTIL_MASTER_POS ||
5417        rli->until_condition == Relay_log_info::UNTIL_RELAY_POS) &&
5418       rli->is_until_satisfied(NULL))
5419   {
5420     sql_print_information("Slave SQL thread stopped because it reached its"
5421                           " UNTIL position %llu in %s %s file",
5422                           rli->until_pos(), rli->until_name(),
5423                           rli->until_condition ==
5424                           Relay_log_info::UNTIL_MASTER_POS ?
5425                           "binlog" : "relaylog");
5426     mysql_mutex_unlock(&rli->data_lock);
5427     goto err;
5428   }
5429   mysql_mutex_unlock(&rli->data_lock);
5430 #ifdef WITH_WSREP
5431   wsrep_open(thd);
5432   if (wsrep_before_command(thd))
5433   {
5434     WSREP_WARN("Slave SQL wsrep_before_command() failed");
5435     goto err;
5436   }
5437 #endif /* WITH_WSREP */
5438   /* Read queries from the IO/THREAD until this thread is killed */
5439 
5440   thd->set_command(COM_SLAVE_SQL);
5441   while (!sql_slave_killed(serial_rgi))
5442   {
5443     THD_STAGE_INFO(thd, stage_reading_event_from_the_relay_log);
5444     THD_CHECK_SENTRY(thd);
5445 
5446     if (saved_skip && rli->slave_skip_counter == 0)
5447     {
5448       StringBuffer<100> tmp;
5449       if (mi->using_gtid != Master_info::USE_GTID_NO)
5450       {
5451         tmp.append(STRING_WITH_LEN(", GTID '"));
5452         rpl_append_gtid_state(&tmp, false);
5453         tmp.append(STRING_WITH_LEN("'; "));
5454       }
5455 
5456       sql_print_information("'SQL_SLAVE_SKIP_COUNTER=%ld' executed at "
5457         "relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', "
5458         "master_log_pos='%ld'%s and new position at "
5459         "relay_log_file='%s', relay_log_pos='%ld', master_log_name='%s', "
5460         "master_log_pos='%ld'%s ",
5461         (ulong) saved_skip, saved_log_name, (ulong) saved_log_pos,
5462         saved_master_log_name, (ulong) saved_master_log_pos,
5463         saved_skip_gtid_pos.c_ptr_safe(),
5464         rli->group_relay_log_name, (ulong) rli->group_relay_log_pos,
5465         rli->group_master_log_name, (ulong) rli->group_master_log_pos,
5466         tmp.c_ptr_safe());
5467       saved_skip= 0;
5468       saved_skip_gtid_pos.free();
5469     }
5470 
5471     if (exec_relay_log_event(thd, rli, serial_rgi))
5472     {
5473 #ifdef WITH_WSREP
5474       if (WSREP(thd))
5475       {
5476         mysql_mutex_lock(&thd->LOCK_thd_data);
5477 
5478         if (thd->wsrep_cs().current_error())
5479         {
5480           wsrep_node_dropped = TRUE;
5481           rli->abort_slave   = TRUE;
5482         }
5483         mysql_mutex_unlock(&thd->LOCK_thd_data);
5484       }
5485 #endif /* WITH_WSREP */
5486 
5487       DBUG_PRINT("info", ("exec_relay_log_event() failed"));
5488       // do not scare the user if SQL thread was simply killed or stopped
5489       if (!sql_slave_killed(serial_rgi))
5490       {
5491         slave_output_error_info(serial_rgi, thd);
5492         if (WSREP(thd) && rli->last_error().number == ER_UNKNOWN_COM_ERROR)
5493         {
5494           wsrep_node_dropped= TRUE;
5495         }
5496       }
5497       goto err;
5498     }
5499   }
5500 
5501  err:
5502   if (mi->using_parallel())
5503     rli->parallel.wait_for_done(thd, rli);
5504   /* Gtid_list_log_event::do_apply_event has already reported the GTID until */
5505   if (rli->stop_for_until && rli->until_condition != Relay_log_info::UNTIL_GTID)
5506   {
5507     if (global_system_variables.log_warnings > 2)
5508       sql_print_information("Slave SQL thread UNTIL stop was requested at position "
5509                             "%llu in %s %s file",
5510                             rli->until_log_pos, rli->until_log_name,
5511                             rli->until_condition ==
5512                             Relay_log_info::UNTIL_MASTER_POS ?
5513                             "binlog" : "relaylog");
5514     sql_print_information("Slave SQL thread stopped because it reached its"
5515                           " UNTIL position %llu in %s %s file",
5516                           rli->until_pos(), rli->until_name(),
5517                           rli->until_condition ==
5518                           Relay_log_info::UNTIL_MASTER_POS ?
5519                           "binlog" : "relaylog");
5520 
5521   };
5522   /* Thread stopped. Print the current replication position to the log */
5523   {
5524     StringBuffer<100> tmp;
5525     if (mi->using_gtid != Master_info::USE_GTID_NO)
5526     {
5527       tmp.append(STRING_WITH_LEN("; GTID position '"));
5528       rpl_append_gtid_state(&tmp, false);
5529       tmp.append(STRING_WITH_LEN("'"));
5530     }
5531     sql_print_information("Slave SQL thread exiting, replication stopped in "
5532                           "log '%s' at position %llu%s", RPL_LOG_NAME,
5533                           rli->group_master_log_pos, tmp.c_ptr_safe());
5534     sql_print_information("master was %s:%d", mi->host, mi->port);
5535   }
5536 #ifdef WITH_WSREP
5537   wsrep_after_command_before_result(thd);
5538   wsrep_after_command_after_result(thd);
5539 #endif /* WITH_WSREP */
5540 
5541  err_before_start:
5542 
5543   /*
5544     Some events set some playgrounds, which won't be cleared because thread
5545     stops. Stopping of this thread may not be known to these events ("stop"
5546     request is detected only by the present function, not by events), so we
5547     must "proactively" clear playgrounds:
5548   */
5549   thd->clear_error();
5550   serial_rgi->cleanup_context(thd, 1);
5551   /*
5552     Some extra safety, which should not been needed (normally, event deletion
5553     should already have done these assignments (each event which sets these
5554     variables is supposed to set them to 0 before terminating)).
5555   */
5556   thd->catalog= 0;
5557   thd->reset_query();
5558   thd->reset_db(&null_clex_str);
5559   if (rli->mi->using_gtid != Master_info::USE_GTID_NO)
5560   {
5561     ulong domain_count;
5562     my_bool save_log_all_errors= thd->log_all_errors;
5563 
5564     /*
5565       We don't need to check return value for rli->flush()
5566       as any errors should be logged to stderr
5567     */
5568     thd->log_all_errors= 1;
5569     rli->flush();
5570     thd->log_all_errors= save_log_all_errors;
5571     if (mi->using_parallel())
5572     {
5573       /*
5574         In parallel replication GTID mode, we may stop with different domains
5575         at different positions in the relay log.
5576 
5577         To handle this when we restart the SQL thread, mark the current
5578         per-domain position in the Relay_log_info.
5579       */
5580       mysql_mutex_lock(&rpl_global_gtid_slave_state->LOCK_slave_state);
5581       domain_count= rpl_global_gtid_slave_state->count();
5582       mysql_mutex_unlock(&rpl_global_gtid_slave_state->LOCK_slave_state);
5583       if (domain_count > 1)
5584       {
5585         inuse_relaylog *ir;
5586 
5587         /*
5588           Load the starting GTID position, so that we can skip already applied
5589           GTIDs when we restart the SQL thread. And set the start position in
5590           the relay log back to a known safe place to start (prior to any not
5591           yet applied transaction in any domain).
5592         */
5593         rli->restart_gtid_pos.load(rpl_global_gtid_slave_state, NULL, 0);
5594         if ((ir= rli->inuse_relaylog_list))
5595         {
5596           rpl_gtid *gtid= ir->relay_log_state;
5597           uint32 count= ir->relay_log_state_count;
5598           while (count > 0)
5599           {
5600             process_gtid_for_restart_pos(rli, gtid);
5601             ++gtid;
5602             --count;
5603           }
5604           strmake_buf(rli->group_relay_log_name, ir->name);
5605           rli->group_relay_log_pos= BIN_LOG_HEADER_SIZE;
5606           rli->relay_log_state.load(ir->relay_log_state, ir->relay_log_state_count);
5607         }
5608       }
5609     }
5610   }
5611   THD_STAGE_INFO(thd, stage_waiting_for_slave_mutex_on_exit);
5612   thd->add_status_to_global();
5613   server_threads.erase(thd);
5614   mysql_mutex_lock(&rli->run_lock);
5615 
5616 err_during_init:
5617   /* We need data_lock, at least to wake up any waiting master_pos_wait() */
5618   mysql_mutex_lock(&rli->data_lock);
5619   DBUG_ASSERT(rli->slave_running == MYSQL_SLAVE_RUN_NOT_CONNECT); // tracking buffer overrun
5620   /* When master_pos_wait() wakes up it will check this and terminate */
5621   rli->slave_running= MYSQL_SLAVE_NOT_RUN;
5622   /* Forget the relay log's format */
5623   delete rli->relay_log.description_event_for_exec;
5624   rli->relay_log.description_event_for_exec= 0;
5625   rli->reset_inuse_relaylog();
5626   /* Wake up master_pos_wait() */
5627   mysql_mutex_unlock(&rli->data_lock);
5628   DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions"));
5629   mysql_cond_broadcast(&rli->data_cond);
5630   rli->ignore_log_space_limit= 0; /* don't need any lock */
5631   /* we die so won't remember charset - re-update them on next thread start */
5632   thd->system_thread_info.rpl_sql_info->cached_charset_invalidate();
5633 
5634   /*
5635     TODO: see if we can do this conditionally in next_event() instead
5636     to avoid unneeded position re-init
5637 
5638     We only reset THD::temporary_tables to 0 here and not free it, as this
5639     could be used by slave through Relay_log_info::save_temporary_tables.
5640   */
5641   thd->temporary_tables= 0;
5642   rli->sql_driver_thd= 0;
5643   thd->rgi_fake= thd->rgi_slave= NULL;
5644 
5645 #ifdef WITH_WSREP
5646   /*
5647     If slave stopped due to node going non primary, we set global flag to
5648     trigger automatic restart of slave when node joins back to cluster.
5649   */
5650   if (WSREP(thd) && wsrep_node_dropped && wsrep_restart_slave)
5651   {
5652     if (wsrep_ready_get())
5653     {
5654       WSREP_INFO("Slave error due to node temporarily non-primary"
5655                  "SQL slave will continue");
5656       wsrep_node_dropped= FALSE;
5657       mysql_mutex_unlock(&rli->run_lock);
5658       goto wsrep_restart_point;
5659     }
5660     else
5661     {
5662       WSREP_INFO("Slave error due to node going non-primary");
5663       WSREP_INFO("wsrep_restart_slave was set and therefore slave will be "
5664                  "automatically restarted when node joins back to cluster");
5665       wsrep_restart_slave_activated= TRUE;
5666     }
5667   }
5668   wsrep_close(thd);
5669 #endif /* WITH_WSREP */
5670 
5671  /*
5672    Note: the order of the broadcast and unlock calls below (first
5673    broadcast, then unlock) is important. Otherwise a killer_thread can
5674    execute between the calls and delete the mi structure leading to a
5675    crash! (see BUG#25306 for details)
5676  */
5677   mysql_cond_broadcast(&rli->stop_cond);
5678   DBUG_EXECUTE_IF("simulate_slave_delay_at_terminate_bug38694", sleep(5););
5679   mysql_mutex_unlock(&rli->run_lock);  // tell the world we are done
5680 
5681   rpl_parallel_resize_pool_if_no_slaves();
5682 
5683   delete serial_rgi;
5684   delete thd;
5685 
5686   DBUG_LEAVE;                                   // Must match DBUG_ENTER()
5687   my_thread_end();
5688   ERR_remove_state(0);
5689   pthread_exit(0);
5690   return 0;                                     // Avoid compiler warnings
5691 }
5692 
5693 
5694 /*
5695   process_io_create_file()
5696 */
5697 
5698 static int process_io_create_file(Master_info* mi, Create_file_log_event* cev)
5699 {
5700   int error = 1;
5701   ulong num_bytes;
5702   bool cev_not_written;
5703   THD *thd = mi->io_thd;
5704   NET *net = &mi->mysql->net;
5705   DBUG_ENTER("process_io_create_file");
5706 
5707   if (unlikely(!cev->is_valid()))
5708     DBUG_RETURN(1);
5709 
5710   if (!mi->rpl_filter->db_ok(cev->db))
5711   {
5712     skip_load_data_infile(net);
5713     DBUG_RETURN(0);
5714   }
5715   DBUG_ASSERT(cev->inited_from_old);
5716   thd->file_id = cev->file_id = mi->file_id++;
5717   thd->variables.server_id = cev->server_id;
5718   cev_not_written = 1;
5719 
5720   if (unlikely(net_request_file(net,cev->fname)))
5721   {
5722     sql_print_error("Slave I/O: failed requesting download of '%s'",
5723                     cev->fname);
5724     goto err;
5725   }
5726 
5727   /*
5728     This dummy block is so we could instantiate Append_block_log_event
5729     once and then modify it slightly instead of doing it multiple times
5730     in the loop
5731   */
5732   {
5733     Append_block_log_event aev(thd,0,0,0,0);
5734 
5735     for (;;)
5736     {
5737       if (unlikely((num_bytes=my_net_read(net)) == packet_error))
5738       {
5739         sql_print_error("Network read error downloading '%s' from master",
5740                         cev->fname);
5741         goto err;
5742       }
5743       if (unlikely(!num_bytes)) /* eof */
5744       {
5745 	/* 3.23 master wants it */
5746         net_write_command(net, 0, (uchar*) "", 0, (uchar*) "", 0);
5747         /*
5748           If we wrote Create_file_log_event, then we need to write
5749           Execute_load_log_event. If we did not write Create_file_log_event,
5750           then this is an empty file and we can just do as if the LOAD DATA
5751           INFILE had not existed, i.e. write nothing.
5752         */
5753         if (unlikely(cev_not_written))
5754           break;
5755         Execute_load_log_event xev(thd,0,0);
5756         xev.log_pos = cev->log_pos;
5757         if (unlikely(mi->rli.relay_log.append(&xev)))
5758         {
5759           mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL,
5760                      ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
5761                      "error writing Exec_load event to relay log");
5762           goto err;
5763         }
5764         mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total);
5765         break;
5766       }
5767       if (unlikely(cev_not_written))
5768       {
5769         cev->block = net->read_pos;
5770         cev->block_len = num_bytes;
5771         if (unlikely(mi->rli.relay_log.append(cev)))
5772         {
5773           mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL,
5774                      ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
5775                      "error writing Create_file event to relay log");
5776           goto err;
5777         }
5778         cev_not_written=0;
5779         mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total);
5780       }
5781       else
5782       {
5783         aev.block = net->read_pos;
5784         aev.block_len = num_bytes;
5785         aev.log_pos = cev->log_pos;
5786         if (unlikely(mi->rli.relay_log.append(&aev)))
5787         {
5788           mi->report(ERROR_LEVEL, ER_SLAVE_RELAY_LOG_WRITE_FAILURE, NULL,
5789                      ER_THD(thd, ER_SLAVE_RELAY_LOG_WRITE_FAILURE),
5790                      "error writing Append_block event to relay log");
5791           goto err;
5792         }
5793         mi->rli.relay_log.harvest_bytes_written(&mi->rli.log_space_total) ;
5794       }
5795     }
5796   }
5797   error=0;
5798 err:
5799   DBUG_RETURN(error);
5800 }
5801 
5802 
5803 /*
5804   Start using a new binary log on the master
5805 
5806   SYNOPSIS
5807     process_io_rotate()
5808     mi                  master_info for the slave
5809     rev                 The rotate log event read from the binary log
5810 
5811   DESCRIPTION
5812     Updates the master info with the place in the next binary
5813     log where we should start reading.
5814     Rotate the relay log to avoid mixed-format relay logs.
5815 
5816   NOTES
5817     We assume we already locked mi->data_lock
5818 
5819   RETURN VALUES
5820     0           ok
5821     1           Log event is illegal
5822 
5823 */
5824 
5825 static int process_io_rotate(Master_info *mi, Rotate_log_event *rev)
5826 {
5827   DBUG_ENTER("process_io_rotate");
5828   mysql_mutex_assert_owner(&mi->data_lock);
5829 
5830   if (unlikely(!rev->is_valid()))
5831     DBUG_RETURN(1);
5832 
5833   /* Safe copy as 'rev' has been "sanitized" in Rotate_log_event's ctor */
5834   memcpy(mi->master_log_name, rev->new_log_ident, rev->ident_len+1);
5835   mi->master_log_pos= rev->pos;
5836   DBUG_PRINT("info", ("master_log_pos: '%s' %lu",
5837                       mi->master_log_name, (ulong) mi->master_log_pos));
5838 #ifndef DBUG_OFF
5839   /*
5840     If we do not do this, we will be getting the first
5841     rotate event forever, so we need to not disconnect after one.
5842   */
5843   if (disconnect_slave_event_count)
5844     mi->events_till_disconnect++;
5845 #endif
5846 
5847   /*
5848     If description_event_for_queue is format <4, there is conversion in the
5849     relay log to the slave's format (4). And Rotate can mean upgrade or
5850     nothing. If upgrade, it's to 5.0 or newer, so we will get a Format_desc, so
5851     no need to reset description_event_for_queue now. And if it's nothing (same
5852     master version as before), no need (still using the slave's format).
5853   */
5854   if (mi->rli.relay_log.description_event_for_queue->binlog_version >= 4)
5855   {
5856     DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg ==
5857                 mi->rli.relay_log.relay_log_checksum_alg);
5858 
5859     delete mi->rli.relay_log.description_event_for_queue;
5860     /* start from format 3 (MySQL 4.0) again */
5861     mi->rli.relay_log.description_event_for_queue= new
5862       Format_description_log_event(3);
5863     mi->rli.relay_log.description_event_for_queue->checksum_alg=
5864       mi->rli.relay_log.relay_log_checksum_alg;
5865   }
5866   /*
5867     Rotate the relay log makes binlog format detection easier (at next slave
5868     start or mysqlbinlog)
5869   */
5870   DBUG_RETURN(rotate_relay_log(mi) /* will take the right mutexes */);
5871 }
5872 
5873 /*
5874   Reads a 3.23 event and converts it to the slave's format. This code was
5875   copied from MySQL 4.0.
5876 */
5877 static int queue_binlog_ver_1_event(Master_info *mi, const char *buf,
5878                            ulong event_len)
5879 {
5880   const char *errmsg = 0;
5881   ulong inc_pos;
5882   bool ignore_event= 0;
5883   char *tmp_buf = 0;
5884   Relay_log_info *rli= &mi->rli;
5885   DBUG_ENTER("queue_binlog_ver_1_event");
5886 
5887   /*
5888     If we get Load event, we need to pass a non-reusable buffer
5889     to read_log_event, so we do a trick
5890   */
5891   if ((uchar)buf[EVENT_TYPE_OFFSET] == LOAD_EVENT)
5892   {
5893     if (unlikely(!(tmp_buf=(char*)my_malloc(event_len+1,MYF(MY_WME)))))
5894     {
5895       mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
5896                  ER(ER_SLAVE_FATAL_ERROR), "Memory allocation failed");
5897       DBUG_RETURN(1);
5898     }
5899     memcpy(tmp_buf,buf,event_len);
5900     /*
5901       Create_file constructor wants a 0 as last char of buffer, this 0 will
5902       serve as the string-termination char for the file's name (which is at the
5903       end of the buffer)
5904       We must increment event_len, otherwise the event constructor will not see
5905       this end 0, which leads to segfault.
5906     */
5907     tmp_buf[event_len++]=0;
5908     int4store(tmp_buf+EVENT_LEN_OFFSET, event_len);
5909     buf = (const char*)tmp_buf;
5910   }
5911   /*
5912     This will transform LOAD_EVENT into CREATE_FILE_EVENT, ask the master to
5913     send the loaded file, and write it to the relay log in the form of
5914     Append_block/Exec_load (the SQL thread needs the data, as that thread is not
5915     connected to the master).
5916   */
5917   Log_event *ev=
5918     Log_event::read_log_event(buf, event_len, &errmsg,
5919                               mi->rli.relay_log.description_event_for_queue, 0);
5920   if (unlikely(!ev))
5921   {
5922     sql_print_error("Read invalid event from master: '%s',\
5923  master could be corrupt but a more likely cause of this is a bug",
5924                     errmsg);
5925     my_free(tmp_buf);
5926     DBUG_RETURN(1);
5927   }
5928 
5929   mysql_mutex_lock(&mi->data_lock);
5930   ev->log_pos= mi->master_log_pos; /* 3.23 events don't contain log_pos */
5931   switch (ev->get_type_code()) {
5932   case STOP_EVENT:
5933     ignore_event= 1;
5934     inc_pos= event_len;
5935     break;
5936   case ROTATE_EVENT:
5937     if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
5938     {
5939       delete ev;
5940       mysql_mutex_unlock(&mi->data_lock);
5941       DBUG_RETURN(1);
5942     }
5943     inc_pos= 0;
5944     break;
5945   case CREATE_FILE_EVENT:
5946     /*
5947       Yes it's possible to have CREATE_FILE_EVENT here, even if we're in
5948       queue_old_event() which is for 3.23 events which don't comprise
5949       CREATE_FILE_EVENT. This is because read_log_event() above has just
5950       transformed LOAD_EVENT into CREATE_FILE_EVENT.
5951     */
5952   {
5953     /* We come here when and only when tmp_buf != 0 */
5954     DBUG_ASSERT(tmp_buf != 0);
5955     inc_pos=event_len;
5956     ev->log_pos+= inc_pos;
5957     int error = process_io_create_file(mi,(Create_file_log_event*)ev);
5958     delete ev;
5959     mi->master_log_pos += inc_pos;
5960     DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos));
5961     mysql_mutex_unlock(&mi->data_lock);
5962     my_free(tmp_buf);
5963     DBUG_RETURN(error);
5964   }
5965   default:
5966     inc_pos= event_len;
5967     break;
5968   }
5969   if (likely(!ignore_event))
5970   {
5971     if (ev->log_pos)
5972       /*
5973          Don't do it for fake Rotate events (see comment in
5974       Log_event::Log_event(const char* buf...) in log_event.cc).
5975       */
5976       ev->log_pos+= event_len; /* make log_pos be the pos of the end of the event */
5977     if (unlikely(rli->relay_log.append(ev)))
5978     {
5979       delete ev;
5980       mysql_mutex_unlock(&mi->data_lock);
5981       DBUG_RETURN(1);
5982     }
5983     rli->relay_log.harvest_bytes_written(&rli->log_space_total);
5984   }
5985   delete ev;
5986   mi->master_log_pos+= inc_pos;
5987   DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos));
5988   mysql_mutex_unlock(&mi->data_lock);
5989   DBUG_RETURN(0);
5990 }
5991 
5992 /*
5993   Reads a 4.0 event and converts it to the slave's format. This code was copied
5994   from queue_binlog_ver_1_event(), with some affordable simplifications.
5995 */
5996 static int queue_binlog_ver_3_event(Master_info *mi, const char *buf,
5997                            ulong event_len)
5998 {
5999   const char *errmsg = 0;
6000   ulong inc_pos;
6001   char *tmp_buf = 0;
6002   Relay_log_info *rli= &mi->rli;
6003   DBUG_ENTER("queue_binlog_ver_3_event");
6004 
6005   /* read_log_event() will adjust log_pos to be end_log_pos */
6006   Log_event *ev=
6007     Log_event::read_log_event(buf,event_len, &errmsg,
6008                               mi->rli.relay_log.description_event_for_queue, 0);
6009   if (unlikely(!ev))
6010   {
6011     sql_print_error("Read invalid event from master: '%s',\
6012  master could be corrupt but a more likely cause of this is a bug",
6013                     errmsg);
6014     my_free(tmp_buf);
6015     DBUG_RETURN(1);
6016   }
6017   mysql_mutex_lock(&mi->data_lock);
6018   switch (ev->get_type_code()) {
6019   case STOP_EVENT:
6020     goto err;
6021   case ROTATE_EVENT:
6022     if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
6023     {
6024       delete ev;
6025       mysql_mutex_unlock(&mi->data_lock);
6026       DBUG_RETURN(1);
6027     }
6028     inc_pos= 0;
6029     break;
6030   default:
6031     inc_pos= event_len;
6032     break;
6033   }
6034 
6035   if (unlikely(rli->relay_log.append(ev)))
6036   {
6037     delete ev;
6038     mysql_mutex_unlock(&mi->data_lock);
6039     DBUG_RETURN(1);
6040   }
6041   rli->relay_log.harvest_bytes_written(&rli->log_space_total);
6042   delete ev;
6043   mi->master_log_pos+= inc_pos;
6044 err:
6045   DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos));
6046   mysql_mutex_unlock(&mi->data_lock);
6047   DBUG_RETURN(0);
6048 }
6049 
6050 /*
6051   queue_old_event()
6052 
6053   Writes a 3.23 or 4.0 event to the relay log, after converting it to the 5.0
6054   (exactly, slave's) format. To do the conversion, we create a 5.0 event from
6055   the 3.23/4.0 bytes, then write this event to the relay log.
6056 
6057   TODO:
6058     Test this code before release - it has to be tested on a separate
6059     setup with 3.23 master or 4.0 master
6060 */
6061 
6062 static int queue_old_event(Master_info *mi, const char *buf,
6063                            ulong event_len)
6064 {
6065   DBUG_ENTER("queue_old_event");
6066 
6067   switch (mi->rli.relay_log.description_event_for_queue->binlog_version)
6068   {
6069   case 1:
6070       DBUG_RETURN(queue_binlog_ver_1_event(mi,buf,event_len));
6071   case 3:
6072       DBUG_RETURN(queue_binlog_ver_3_event(mi,buf,event_len));
6073   default: /* unsupported format; eg version 2 */
6074     DBUG_PRINT("info",("unsupported binlog format %d in queue_old_event()",
6075                        mi->rli.relay_log.description_event_for_queue->binlog_version));
6076     DBUG_RETURN(1);
6077   }
6078 }
6079 
6080 /*
6081   queue_event()
6082 
6083   If the event is 3.23/4.0, passes it to queue_old_event() which will convert
6084   it. Otherwise, writes a 5.0 (or newer) event to the relay log. Then there is
6085   no format conversion, it's pure read/write of bytes.
6086   So a 5.0.0 slave's relay log can contain events in the slave's format or in
6087   any >=5.0.0 format.
6088 */
6089 
6090 static int queue_event(Master_info* mi,const char* buf, ulong event_len)
6091 {
6092   int error= 0;
6093   StringBuffer<1024> error_msg;
6094   ulonglong inc_pos= 0;
6095   ulonglong event_pos;
6096   Relay_log_info *rli= &mi->rli;
6097   mysql_mutex_t *log_lock= rli->relay_log.get_log_lock();
6098   ulong s_id;
6099   bool unlock_data_lock= TRUE;
6100   bool gtid_skip_enqueue= false;
6101   bool got_gtid_event= false;
6102   rpl_gtid event_gtid;
6103   static uint dbug_rows_event_count __attribute__((unused))= 0;
6104   bool is_compress_event = false;
6105   char* new_buf = NULL;
6106   char new_buf_arr[4096];
6107   bool is_malloc = false;
6108   bool is_rows_event= false;
6109   /*
6110     FD_q must have been prepared for the first R_a event
6111     inside get_master_version_and_clock()
6112     Show-up of FD:s affects checksum_alg at once because
6113     that changes FD_queue.
6114   */
6115   enum enum_binlog_checksum_alg checksum_alg=
6116     mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF ?
6117     mi->checksum_alg_before_fd : mi->rli.relay_log.relay_log_checksum_alg;
6118 
6119   char *save_buf= NULL; // needed for checksumming the fake Rotate event
6120   char rot_buf[LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN + FN_REFLEN];
6121 
6122   DBUG_ASSERT(checksum_alg == BINLOG_CHECKSUM_ALG_OFF ||
6123               checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF ||
6124               checksum_alg == BINLOG_CHECKSUM_ALG_CRC32);
6125 
6126   DBUG_ENTER("queue_event");
6127   /*
6128     FD_queue checksum alg description does not apply in a case of
6129     FD itself. The one carries both parts of the checksum data.
6130   */
6131   if (buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT)
6132   {
6133     checksum_alg= get_checksum_alg(buf, event_len);
6134   }
6135   else if (buf[EVENT_TYPE_OFFSET] == START_EVENT_V3)
6136   {
6137     // checksum behaviour is similar to the pre-checksum FD handling
6138     mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF;
6139     mi->rli.relay_log.description_event_for_queue->checksum_alg=
6140       mi->rli.relay_log.relay_log_checksum_alg= checksum_alg=
6141       BINLOG_CHECKSUM_ALG_OFF;
6142   }
6143 
6144   // does not hold always because of old binlog can work with NM
6145   // DBUG_ASSERT(checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF);
6146 
6147   // should hold unless manipulations with RL. Tests that do that
6148   // will have to refine the clause.
6149   DBUG_ASSERT(mi->rli.relay_log.relay_log_checksum_alg !=
6150               BINLOG_CHECKSUM_ALG_UNDEF);
6151 
6152   // Emulate the network corruption
6153   DBUG_EXECUTE_IF("corrupt_queue_event",
6154     if ((uchar)buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT)
6155     {
6156       char *debug_event_buf_c = (char*) buf;
6157       int debug_cor_pos = rand() % (event_len - BINLOG_CHECKSUM_LEN);
6158       debug_event_buf_c[debug_cor_pos] =~ debug_event_buf_c[debug_cor_pos];
6159       DBUG_PRINT("info", ("Corrupt the event at queue_event: byte on position %d", debug_cor_pos));
6160       DBUG_SET("-d,corrupt_queue_event");
6161     }
6162   );
6163 
6164   if (event_checksum_test((uchar *) buf, event_len, checksum_alg))
6165   {
6166     error= ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE;
6167     unlock_data_lock= FALSE;
6168     goto err;
6169   }
6170 
6171   if (mi->rli.relay_log.description_event_for_queue->binlog_version<4 &&
6172       (uchar)buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */)
6173     DBUG_RETURN(queue_old_event(mi,buf,event_len));
6174 
6175 #ifdef ENABLED_DEBUG_SYNC
6176   /*
6177     A (+d,dbug.rows_events_to_delay_relay_logging)-test is supposed to
6178     create a few Write_log_events and after receiving the 1st of them
6179     the IO thread signals to launch the SQL thread, and sets itself to
6180     wait for a release signal.
6181   */
6182   DBUG_EXECUTE_IF("dbug.rows_events_to_delay_relay_logging",
6183                   if ((buf[EVENT_TYPE_OFFSET] == WRITE_ROWS_EVENT_V1 ||
6184                        buf[EVENT_TYPE_OFFSET] == WRITE_ROWS_EVENT) &&
6185                       ++dbug_rows_event_count == 2)
6186                   {
6187                     const char act[]=
6188                       "now SIGNAL start_sql_thread "
6189                       "WAIT_FOR go_on_relay_logging";
6190                     DBUG_ASSERT(debug_sync_service);
6191                     DBUG_ASSERT(!debug_sync_set_action(current_thd,
6192                                                        STRING_WITH_LEN(act)));
6193                     dbug_rows_event_count = 0;
6194                   };);
6195 #endif
6196   mysql_mutex_lock(&mi->data_lock);
6197 
6198   switch ((uchar)buf[EVENT_TYPE_OFFSET]) {
6199   case STOP_EVENT:
6200     /*
6201       We needn't write this event to the relay log. Indeed, it just indicates a
6202       master server shutdown. The only thing this does is cleaning. But
6203       cleaning is already done on a per-master-thread basis (as the master
6204       server is shutting down cleanly, it has written all DROP TEMPORARY TABLE
6205       prepared statements' deletion are TODO only when we binlog prep stmts).
6206 
6207       We don't even increment mi->master_log_pos, because we may be just after
6208       a Rotate event. Btw, in a few milliseconds we are going to have a Start
6209       event from the next binlog (unless the master is presently running
6210       without --log-bin).
6211     */
6212     goto err;
6213   case ROTATE_EVENT:
6214   {
6215     Rotate_log_event rev(buf, checksum_alg != BINLOG_CHECKSUM_ALG_OFF ?
6216                          event_len - BINLOG_CHECKSUM_LEN : event_len,
6217                          mi->rli.relay_log.description_event_for_queue);
6218 
6219     if (unlikely(mi->gtid_reconnect_event_skip_count) &&
6220         unlikely(!mi->gtid_event_seen) &&
6221         rev.is_artificial_event() &&
6222         (mi->prev_master_id != mi->master_id ||
6223          strcmp(rev.new_log_ident, mi->master_log_name) != 0))
6224     {
6225       /*
6226         Artificial Rotate_log_event is the first event we receive at the start
6227         of each master binlog file. It gives the name of the new binlog file.
6228 
6229         Normally, we already have this name from the real rotate event at the
6230         end of the previous binlog file (unless we are making a new connection
6231         using GTID). But if the master server restarted/crashed, there is no
6232         rotate event at the end of the prior binlog file, so the name is new.
6233 
6234         We use this fact to handle a special case of master crashing. If the
6235         master crashed while writing the binlog, it might end with a partial
6236         event group lacking the COMMIT/XID event, which must be rolled
6237         back. If the slave IO thread happens to get a disconnect in the middle
6238         of exactly this event group, it will try to reconnect at the same GTID
6239         and skip already fetched events. However, that GTID did not commit on
6240         the master before the crash, so it does not really exist, and the
6241         master will connect the slave at the next following GTID starting in
6242         the next binlog. This could confuse the slave and make it mix the
6243         start of one event group with the end of another.
6244 
6245         But we detect this case here, by noticing the change of binlog name
6246         which detects the missing rotate event at the end of the previous
6247         binlog file. In this case, we reset the counters to make us not skip
6248         the next event group, and queue an artificial Format Description
6249         event. The previously fetched incomplete event group will then be
6250         rolled back when the Format Description event is executed by the SQL
6251         thread.
6252 
6253         A similar case is if the reconnect somehow connects to a different
6254         master server (like due to a network proxy or IP address takeover).
6255         We detect this case by noticing a change of server_id and in this
6256         case likewise rollback the partially received event group.
6257       */
6258       Format_description_log_event fdle(4);
6259 
6260       if (mi->prev_master_id != mi->master_id)
6261         sql_print_warning("The server_id of master server changed in the "
6262                           "middle of GTID %u-%u-%llu. Assuming a change of "
6263                           "master server, so rolling back the previously "
6264                           "received partial transaction. Expected: %lu, "
6265                           "received: %lu", mi->last_queued_gtid.domain_id,
6266                           mi->last_queued_gtid.server_id,
6267                           mi->last_queued_gtid.seq_no,
6268                           mi->prev_master_id, mi->master_id);
6269       else if (strcmp(rev.new_log_ident, mi->master_log_name) != 0)
6270         sql_print_warning("Unexpected change of master binlog file name in the "
6271                           "middle of GTID %u-%u-%llu, assuming that master has "
6272                           "crashed and rolling back the transaction. Expected: "
6273                           "'%s', received: '%s'",
6274                           mi->last_queued_gtid.domain_id,
6275                           mi->last_queued_gtid.server_id,
6276                           mi->last_queued_gtid.seq_no,
6277                           mi->master_log_name, rev.new_log_ident);
6278 
6279       mysql_mutex_lock(log_lock);
6280       if (likely(!rli->relay_log.write_event(&fdle) &&
6281                  !rli->relay_log.flush_and_sync(NULL)))
6282       {
6283         rli->relay_log.harvest_bytes_written(&rli->log_space_total);
6284       }
6285       else
6286       {
6287         error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6288         mysql_mutex_unlock(log_lock);
6289         goto err;
6290       }
6291       rli->relay_log.signal_relay_log_update();
6292       mysql_mutex_unlock(log_lock);
6293 
6294       mi->gtid_reconnect_event_skip_count= 0;
6295       mi->events_queued_since_last_gtid= 0;
6296     }
6297     mi->prev_master_id= mi->master_id;
6298 
6299     if (unlikely(process_io_rotate(mi, &rev)))
6300     {
6301       error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6302       goto err;
6303     }
6304     /*
6305        Checksum special cases for the fake Rotate (R_f) event caused by the protocol
6306        of events generation and serialization in RL where Rotate of master is
6307        queued right next to FD of slave.
6308        Since it's only FD that carries the alg desc of FD_s has to apply to R_m.
6309        Two special rules apply only to the first R_f which comes in before any FD_m.
6310        The 2nd R_f should be compatible with the FD_s that must have taken over
6311        the last seen FD_m's (A).
6312 
6313        RSC_1: If OM \and fake Rotate \and slave is configured to
6314               to compute checksum for its first FD event for RL
6315               the fake Rotate gets checksummed here.
6316     */
6317     if (uint4korr(&buf[0]) == 0 && checksum_alg == BINLOG_CHECKSUM_ALG_OFF &&
6318         mi->rli.relay_log.relay_log_checksum_alg != BINLOG_CHECKSUM_ALG_OFF)
6319     {
6320       ha_checksum rot_crc= 0;
6321       event_len += BINLOG_CHECKSUM_LEN;
6322       memcpy(rot_buf, buf, event_len - BINLOG_CHECKSUM_LEN);
6323       int4store(&rot_buf[EVENT_LEN_OFFSET],
6324                 uint4korr(&rot_buf[EVENT_LEN_OFFSET]) + BINLOG_CHECKSUM_LEN);
6325       rot_crc= my_checksum(rot_crc, (const uchar *) rot_buf,
6326                            event_len - BINLOG_CHECKSUM_LEN);
6327       int4store(&rot_buf[event_len - BINLOG_CHECKSUM_LEN], rot_crc);
6328       DBUG_ASSERT(event_len == uint4korr(&rot_buf[EVENT_LEN_OFFSET]));
6329       DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg ==
6330                   mi->rli.relay_log.relay_log_checksum_alg);
6331       /* the first one */
6332       DBUG_ASSERT(mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF);
6333       save_buf= (char *) buf;
6334       buf= rot_buf;
6335     }
6336     else
6337       /*
6338         RSC_2: If NM \and fake Rotate \and slave does not compute checksum
6339         the fake Rotate's checksum is stripped off before relay-logging.
6340       */
6341       if (uint4korr(&buf[0]) == 0 && checksum_alg != BINLOG_CHECKSUM_ALG_OFF &&
6342           mi->rli.relay_log.relay_log_checksum_alg == BINLOG_CHECKSUM_ALG_OFF)
6343       {
6344         event_len -= BINLOG_CHECKSUM_LEN;
6345         memcpy(rot_buf, buf, event_len);
6346         int4store(&rot_buf[EVENT_LEN_OFFSET],
6347                   uint4korr(&rot_buf[EVENT_LEN_OFFSET]) - BINLOG_CHECKSUM_LEN);
6348         DBUG_ASSERT(event_len == uint4korr(&rot_buf[EVENT_LEN_OFFSET]));
6349         DBUG_ASSERT(mi->rli.relay_log.description_event_for_queue->checksum_alg ==
6350                     mi->rli.relay_log.relay_log_checksum_alg);
6351         /* the first one */
6352         DBUG_ASSERT(mi->checksum_alg_before_fd != BINLOG_CHECKSUM_ALG_UNDEF);
6353         save_buf= (char *) buf;
6354         buf= rot_buf;
6355       }
6356     /*
6357       Now the I/O thread has just changed its mi->master_log_name, so
6358       incrementing mi->master_log_pos is nonsense.
6359     */
6360     inc_pos= 0;
6361     break;
6362   }
6363   case FORMAT_DESCRIPTION_EVENT:
6364   {
6365     /*
6366       Create an event, and save it (when we rotate the relay log, we will have
6367       to write this event again).
6368     */
6369     /*
6370       We are the only thread which reads/writes description_event_for_queue.
6371       The relay_log struct does not move (though some members of it can
6372       change), so we needn't any lock (no rli->data_lock, no log lock).
6373     */
6374     Format_description_log_event* tmp;
6375     const char* errmsg;
6376     // mark it as undefined that is irrelevant anymore
6377     mi->checksum_alg_before_fd= BINLOG_CHECKSUM_ALG_UNDEF;
6378     if (!(tmp= (Format_description_log_event*)
6379           Log_event::read_log_event(buf, event_len, &errmsg,
6380                                     mi->rli.relay_log.description_event_for_queue,
6381                                     1)))
6382     {
6383       error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6384       goto err;
6385     }
6386     tmp->copy_crypto_data(mi->rli.relay_log.description_event_for_queue);
6387     delete mi->rli.relay_log.description_event_for_queue;
6388     mi->rli.relay_log.description_event_for_queue= tmp;
6389     if (tmp->checksum_alg == BINLOG_CHECKSUM_ALG_UNDEF)
6390       tmp->checksum_alg= BINLOG_CHECKSUM_ALG_OFF;
6391 
6392     /* installing new value of checksum Alg for relay log */
6393     mi->rli.relay_log.relay_log_checksum_alg= tmp->checksum_alg;
6394 
6395     /*
6396       Do not queue any format description event that we receive after a
6397       reconnect where we are skipping over a partial event group received
6398       before the reconnect.
6399 
6400       (If we queued such an event, and it was the first format_description
6401       event after master restart, the slave SQL thread would think that
6402       the partial event group before it in the relay log was from a
6403       previous master crash and should be rolled back).
6404     */
6405     if (unlikely(mi->gtid_reconnect_event_skip_count && !mi->gtid_event_seen))
6406         gtid_skip_enqueue= true;
6407 
6408     /*
6409        Though this does some conversion to the slave's format, this will
6410        preserve the master's binlog format version, and number of event types.
6411     */
6412     /*
6413        If the event was not requested by the slave (the slave did not ask for
6414        it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos
6415     */
6416     inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
6417     DBUG_PRINT("info",("binlog format is now %d",
6418                        mi->rli.relay_log.description_event_for_queue->binlog_version));
6419 
6420   }
6421   break;
6422 
6423   case HEARTBEAT_LOG_EVENT:
6424   {
6425     /*
6426       HB (heartbeat) cannot come before RL (Relay)
6427     */
6428     Heartbeat_log_event hb(buf,
6429                            mi->rli.relay_log.relay_log_checksum_alg
6430                            != BINLOG_CHECKSUM_ALG_OFF ?
6431                            event_len - BINLOG_CHECKSUM_LEN : event_len,
6432                            mi->rli.relay_log.description_event_for_queue);
6433     if (!hb.is_valid())
6434     {
6435       error= ER_SLAVE_HEARTBEAT_FAILURE;
6436       error_msg.append(STRING_WITH_LEN("inconsistent heartbeat event content;"));
6437       error_msg.append(STRING_WITH_LEN("the event's data: log_file_name "));
6438       error_msg.append(hb.get_log_ident(), (uint) hb.get_ident_len());
6439       error_msg.append(STRING_WITH_LEN(" log_pos "));
6440       error_msg.append_ulonglong(hb.log_pos);
6441       goto err;
6442     }
6443     mi->received_heartbeats++;
6444     /*
6445        compare local and event's versions of log_file, log_pos.
6446 
6447        Heartbeat is sent only after an event corresponding to the corrdinates
6448        the heartbeat carries.
6449        Slave can not have a higher coordinate except in the only
6450        special case when mi->master_log_name, master_log_pos have never
6451        been updated by Rotate event i.e when slave does not have any history
6452        with the master (and thereafter mi->master_log_pos is NULL).
6453 
6454        Slave can have lower coordinates, if some event from master was omitted.
6455 
6456        TODO: handling `when' for SHOW SLAVE STATUS' snds behind
6457     */
6458     if (memcmp(mi->master_log_name, hb.get_log_ident(), hb.get_ident_len()) ||
6459         mi->master_log_pos > hb.log_pos) {
6460       /* missed events of heartbeat from the past */
6461       error= ER_SLAVE_HEARTBEAT_FAILURE;
6462       error_msg.append(STRING_WITH_LEN("heartbeat is not compatible with local info;"));
6463       error_msg.append(STRING_WITH_LEN("the event's data: log_file_name "));
6464       error_msg.append(hb.get_log_ident(), (uint) hb.get_ident_len());
6465       error_msg.append(STRING_WITH_LEN(" log_pos "));
6466       error_msg.append_ulonglong(hb.log_pos);
6467       goto err;
6468     }
6469 
6470     /*
6471       Heartbeat events doesn't count in the binlog size, so we don't have to
6472       increment mi->master_log_pos
6473     */
6474     goto skip_relay_logging;
6475   }
6476   break;
6477 
6478   case GTID_LIST_EVENT:
6479   {
6480     const char *errmsg;
6481     Gtid_list_log_event *glev;
6482     Log_event *tmp;
6483     uint32 flags;
6484 
6485     if (!(tmp= Log_event::read_log_event(buf, event_len, &errmsg,
6486            mi->rli.relay_log.description_event_for_queue,
6487            opt_slave_sql_verify_checksum)))
6488     {
6489       error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6490       goto err;
6491     }
6492     glev= static_cast<Gtid_list_log_event *>(tmp);
6493     event_pos= glev->log_pos;
6494     flags= glev->gl_flags;
6495     delete glev;
6496 
6497     /*
6498       We use fake Gtid_list events to update the old-style position (among
6499       other things).
6500 
6501       Early code created fake Gtid_list events with zero log_pos, those should
6502       not modify old-style position.
6503     */
6504     if (event_pos == 0 || event_pos <= mi->master_log_pos)
6505       inc_pos= 0;
6506     else
6507       inc_pos= event_pos - mi->master_log_pos;
6508 
6509     if (mi->rli.until_condition == Relay_log_info::UNTIL_GTID &&
6510         flags & Gtid_list_log_event::FLAG_UNTIL_REACHED)
6511     {
6512       char str_buf[128];
6513       String str(str_buf, sizeof(str_buf), system_charset_info);
6514       mi->rli.until_gtid_pos.to_string(&str);
6515       sql_print_information("Slave I/O thread stops because it reached its"
6516                             " UNTIL master_gtid_pos %s", str.c_ptr_safe());
6517       mi->abort_slave= true;
6518     }
6519   }
6520   break;
6521 
6522   case GTID_EVENT:
6523   {
6524     DBUG_EXECUTE_IF("kill_slave_io_after_2_events",
6525                     {
6526                       mi->dbug_do_disconnect= true;
6527                       mi->dbug_event_counter= 2;
6528                     };);
6529 
6530     uchar gtid_flag;
6531 
6532     if (Gtid_log_event::peek(buf, event_len, checksum_alg,
6533                              &event_gtid.domain_id, &event_gtid.server_id,
6534                              &event_gtid.seq_no, &gtid_flag,
6535                              rli->relay_log.description_event_for_queue))
6536     {
6537       error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6538       goto err;
6539     }
6540     got_gtid_event= true;
6541     if (mi->using_gtid == Master_info::USE_GTID_NO)
6542       goto default_action;
6543     if (unlikely(mi->gtid_reconnect_event_skip_count))
6544     {
6545       if (likely(!mi->gtid_event_seen))
6546       {
6547         mi->gtid_event_seen= true;
6548         /*
6549           If we are reconnecting, and we need to skip a partial event group
6550           already queued to the relay log before the reconnect, then we check
6551           that we actually get the same event group (same GTID) as before, so
6552           we do not end up with half of one group and half another.
6553 
6554           The only way we should be able to receive a different GTID than what
6555           we expect is if the binlog on the master (or more likely the whole
6556           master server) was replaced with a different one, on the same IP
6557           address, _and_ the new master happens to have domains in a different
6558           order so we get the GTID from a different domain first. Still, it is
6559           best to protect against this case.
6560         */
6561         if (event_gtid.domain_id != mi->last_queued_gtid.domain_id ||
6562             event_gtid.server_id != mi->last_queued_gtid.server_id ||
6563             event_gtid.seq_no != mi->last_queued_gtid.seq_no)
6564         {
6565           bool first;
6566           error= ER_SLAVE_UNEXPECTED_MASTER_SWITCH;
6567           error_msg.append(STRING_WITH_LEN("Expected: "));
6568           first= true;
6569           rpl_slave_state_tostring_helper(&error_msg, &mi->last_queued_gtid,
6570                                           &first);
6571           error_msg.append(STRING_WITH_LEN(", received: "));
6572           first= true;
6573           rpl_slave_state_tostring_helper(&error_msg, &event_gtid, &first);
6574           goto err;
6575         }
6576         if (global_system_variables.log_warnings > 1)
6577         {
6578           bool first= true;
6579           StringBuffer<1024> gtid_text;
6580           rpl_slave_state_tostring_helper(&gtid_text, &mi->last_queued_gtid,
6581                                           &first);
6582           sql_print_information("Slave IO thread is reconnected to "
6583                                 "receive Gtid_log_event %s. It is to skip %llu "
6584                                 "already received events including the gtid one",
6585                                 gtid_text.ptr(),
6586                                 mi->events_queued_since_last_gtid);
6587         }
6588         goto default_action;
6589       }
6590       else
6591       {
6592         bool first;
6593         StringBuffer<1024> gtid_text;
6594 
6595         gtid_text.append(STRING_WITH_LEN("Last received gtid: "));
6596         first= true;
6597         rpl_slave_state_tostring_helper(&gtid_text, &mi->last_queued_gtid,
6598                                           &first);
6599         gtid_text.append(STRING_WITH_LEN(", currently received: "));
6600         first= true;
6601         rpl_slave_state_tostring_helper(&gtid_text, &event_gtid, &first);
6602 
6603         error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6604         sql_print_error("Slave IO thread has received a new Gtid_log_event "
6605                         "while skipping already logged events "
6606                         "after reconnect. %s. %llu remains to be skipped. "
6607                         "The number of originally read events was %llu",
6608                         gtid_text.ptr(),
6609                         mi->gtid_reconnect_event_skip_count,
6610                         mi->events_queued_since_last_gtid);
6611         goto err;
6612       }
6613     }
6614     mi->gtid_event_seen= true;
6615 
6616     /*
6617       We have successfully queued to relay log everything before this GTID, so
6618       in case of reconnect we can start from after any previous GTID.
6619       (Normally we would have updated gtid_current_pos earlier at the end of
6620       the previous event group, but better leave an extra check here for
6621       safety).
6622     */
6623     if (mi->events_queued_since_last_gtid)
6624     {
6625       mi->gtid_current_pos.update(&mi->last_queued_gtid);
6626       mi->events_queued_since_last_gtid= 0;
6627     }
6628     mi->last_queued_gtid= event_gtid;
6629     mi->last_queued_gtid_standalone=
6630       (gtid_flag & Gtid_log_event::FL_STANDALONE) != 0;
6631 
6632     /* Should filter all the subsequent events in the current GTID group? */
6633     mi->domain_id_filter.do_filter(event_gtid.domain_id);
6634 
6635     ++mi->events_queued_since_last_gtid;
6636     inc_pos= event_len;
6637   }
6638   break;
6639   /*
6640     Binlog compressed event should uncompress in IO thread
6641   */
6642   case QUERY_COMPRESSED_EVENT:
6643     inc_pos= event_len;
6644     if (query_event_uncompress(rli->relay_log.description_event_for_queue,
6645                                checksum_alg == BINLOG_CHECKSUM_ALG_CRC32,
6646                                buf, event_len, new_buf_arr, sizeof(new_buf_arr),
6647                                &is_malloc, (char **)&new_buf, &event_len))
6648     {
6649       char  llbuf[22];
6650       error = ER_BINLOG_UNCOMPRESS_ERROR;
6651       error_msg.append(STRING_WITH_LEN("binlog uncompress error, master log_pos: "));
6652       llstr(mi->master_log_pos, llbuf);
6653       error_msg.append(llbuf, strlen(llbuf));
6654       goto err;
6655     }
6656     buf = new_buf;
6657     is_compress_event = true;
6658     goto default_action;
6659 
6660   case WRITE_ROWS_COMPRESSED_EVENT:
6661   case UPDATE_ROWS_COMPRESSED_EVENT:
6662   case DELETE_ROWS_COMPRESSED_EVENT:
6663   case WRITE_ROWS_COMPRESSED_EVENT_V1:
6664   case UPDATE_ROWS_COMPRESSED_EVENT_V1:
6665   case DELETE_ROWS_COMPRESSED_EVENT_V1:
6666     inc_pos = event_len;
6667     {
6668       if (row_log_event_uncompress(rli->relay_log.description_event_for_queue,
6669                                    checksum_alg == BINLOG_CHECKSUM_ALG_CRC32,
6670                                    buf, event_len, new_buf_arr, sizeof(new_buf_arr),
6671                                    &is_malloc, (char **)&new_buf, &event_len))
6672       {
6673         char  llbuf[22];
6674         error = ER_BINLOG_UNCOMPRESS_ERROR;
6675         error_msg.append(STRING_WITH_LEN("binlog uncompress error, master log_pos: "));
6676         llstr(mi->master_log_pos, llbuf);
6677         error_msg.append(llbuf, strlen(llbuf));
6678         goto err;
6679       }
6680     }
6681     is_compress_event = true;
6682     buf = new_buf;
6683     /*
6684       As we are uncertain about compressed V2 rows events, we don't track
6685       them
6686     */
6687     if (LOG_EVENT_IS_ROW_V2((Log_event_type) buf[EVENT_TYPE_OFFSET]))
6688       goto default_action;
6689     /* fall through */
6690   case WRITE_ROWS_EVENT_V1:
6691   case UPDATE_ROWS_EVENT_V1:
6692   case DELETE_ROWS_EVENT_V1:
6693   case WRITE_ROWS_EVENT:
6694   case UPDATE_ROWS_EVENT:
6695   case DELETE_ROWS_EVENT:
6696     {
6697       is_rows_event= true;
6698       mi->rows_event_tracker.update(mi->master_log_name,
6699                                     mi->master_log_pos,
6700                                     buf,
6701                                     mi->rli.relay_log.
6702                                     description_event_for_queue);
6703 
6704       DBUG_EXECUTE_IF("simulate_stmt_end_rows_event_loss",
6705                       {
6706                         mi->rows_event_tracker.stmt_end_seen= false;
6707                       });
6708     }
6709     goto default_action;
6710 
6711 #ifndef DBUG_OFF
6712   case XID_EVENT:
6713     DBUG_EXECUTE_IF("slave_discard_xid_for_gtid_0_x_1000",
6714     {
6715       /* Inject an event group that is missing its XID commit event. */
6716       if (mi->last_queued_gtid.domain_id == 0 &&
6717           mi->last_queued_gtid.seq_no == 1000)
6718         goto skip_relay_logging;
6719     });
6720     goto default_action;
6721 #endif
6722   case START_ENCRYPTION_EVENT:
6723     if (uint2korr(buf + FLAGS_OFFSET) & LOG_EVENT_IGNORABLE_F)
6724     {
6725       /*
6726          If the event was not requested by the slave (the slave did not ask for
6727          it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos
6728       */
6729       inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
6730       break;
6731     }
6732     /* fall through */
6733   default:
6734   default_action:
6735     DBUG_EXECUTE_IF("kill_slave_io_after_2_events",
6736                     {
6737                       if (mi->dbug_do_disconnect &&
6738                           (LOG_EVENT_IS_QUERY((Log_event_type)(uchar)buf[EVENT_TYPE_OFFSET]) ||
6739                            ((uchar)buf[EVENT_TYPE_OFFSET] == TABLE_MAP_EVENT))
6740                           && (--mi->dbug_event_counter == 0))
6741                       {
6742                         error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6743                         mi->dbug_do_disconnect= false;  /* Safety */
6744                         goto err;
6745                       }
6746                     };);
6747 
6748     DBUG_EXECUTE_IF("kill_slave_io_before_commit",
6749                     {
6750                       if ((uchar)buf[EVENT_TYPE_OFFSET] == XID_EVENT ||
6751                           ((uchar)buf[EVENT_TYPE_OFFSET] == QUERY_EVENT &&    /* QUERY_COMPRESSED_EVENT would never be commmit or rollback */
6752                            Query_log_event::peek_is_commit_rollback(buf, event_len,
6753                                                                     checksum_alg)))
6754                       {
6755                         error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6756                         goto err;
6757                       }
6758                     };);
6759 
6760     if (mi->using_gtid != Master_info::USE_GTID_NO && mi->gtid_event_seen)
6761     {
6762       if (unlikely(mi->gtid_reconnect_event_skip_count))
6763       {
6764         --mi->gtid_reconnect_event_skip_count;
6765         gtid_skip_enqueue= true;
6766       }
6767       else if (mi->events_queued_since_last_gtid)
6768         ++mi->events_queued_since_last_gtid;
6769     }
6770 
6771     if (!is_compress_event)
6772       inc_pos= event_len;
6773 
6774     break;
6775   }
6776 
6777   /*
6778     Integrity of Rows- event group check.
6779     A sequence of Rows- events must end with STMT_END_F flagged one.
6780     Even when Heartbeat event interrupts Rows- events flow this must indicate a
6781     malfunction e.g logging on the master.
6782   */
6783   if (((uchar) buf[EVENT_TYPE_OFFSET] != HEARTBEAT_LOG_EVENT) &&
6784       !is_rows_event &&
6785       mi->rows_event_tracker.check_and_report(mi->master_log_name,
6786                                               mi->master_log_pos))
6787   {
6788     error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6789     goto err;
6790   }
6791 
6792   /*
6793     If we filter events master-side (eg. @@skip_replication), we will see holes
6794     in the event positions from the master. If we see such a hole, adjust
6795     mi->master_log_pos accordingly so we maintain the correct position (for
6796     reconnect, MASTER_POS_WAIT(), etc.)
6797   */
6798   if (inc_pos > 0 &&
6799       event_len >= LOG_POS_OFFSET+4 &&
6800       (event_pos= uint4korr(buf+LOG_POS_OFFSET)) > mi->master_log_pos + inc_pos)
6801   {
6802     inc_pos= event_pos - mi->master_log_pos;
6803     DBUG_PRINT("info", ("Adjust master_log_pos %llu->%llu to account for "
6804                         "master-side filtering",
6805                         mi->master_log_pos + inc_pos, event_pos));
6806   }
6807 
6808   /*
6809      If this event is originating from this server, don't queue it.
6810      We don't check this for 3.23 events because it's simpler like this; 3.23
6811      will be filtered anyway by the SQL slave thread which also tests the
6812      server id (we must also keep this test in the SQL thread, in case somebody
6813      upgrades a 4.0 slave which has a not-filtered relay log).
6814 
6815      ANY event coming from ourselves can be ignored: it is obvious for queries;
6816      for STOP_EVENT/ROTATE_EVENT/START_EVENT: these cannot come from ourselves
6817      (--log-slave-updates would not log that) unless this slave is also its
6818      direct master (an unsupported, useless setup!).
6819   */
6820 
6821   mysql_mutex_lock(log_lock);
6822   s_id= uint4korr(buf + SERVER_ID_OFFSET);
6823   /*
6824     Write the event to the relay log, unless we reconnected in the middle
6825     of an event group and now need to skip the initial part of the group that
6826     we already wrote before reconnecting.
6827   */
6828   if (unlikely(gtid_skip_enqueue))
6829   {
6830     mi->master_log_pos+= inc_pos;
6831     if ((uchar)buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT &&
6832         s_id == mi->master_id)
6833     {
6834       /*
6835         If we write this master's description event in the middle of an event
6836         group due to GTID reconnect, SQL thread will think that master crashed
6837         in the middle of the group and roll back the first half, so we must not.
6838 
6839         But we still have to write an artificial copy of the masters description
6840         event, to override the initial slave-version description event so that
6841         SQL thread has the right information for parsing the events it reads.
6842       */
6843       rli->relay_log.description_event_for_queue->created= 0;
6844       rli->relay_log.description_event_for_queue->set_artificial_event();
6845       if (rli->relay_log.append_no_lock
6846           (rli->relay_log.description_event_for_queue))
6847         error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6848       else
6849         rli->relay_log.harvest_bytes_written(&rli->log_space_total);
6850     }
6851     else if (mi->gtid_reconnect_event_skip_count == 0)
6852     {
6853       /*
6854         Add a fake rotate event so that SQL thread can see the old-style
6855         position where we re-connected in the middle of a GTID event group.
6856       */
6857       Rotate_log_event fake_rev(mi->master_log_name, 0, mi->master_log_pos, 0);
6858       fake_rev.server_id= mi->master_id;
6859       if (rli->relay_log.append_no_lock(&fake_rev))
6860         error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6861       else
6862         rli->relay_log.harvest_bytes_written(&rli->log_space_total);
6863     }
6864   }
6865   else
6866   if ((s_id == global_system_variables.server_id &&
6867        !mi->rli.replicate_same_server_id) ||
6868       event_that_should_be_ignored(buf) ||
6869       /*
6870         the following conjunction deals with IGNORE_SERVER_IDS, if set
6871         If the master is on the ignore list, execution of
6872         format description log events and rotate events is necessary.
6873       */
6874       (mi->ignore_server_ids.elements > 0 &&
6875        mi->shall_ignore_server_id(s_id) &&
6876        /* everything is filtered out from non-master */
6877        (s_id != mi->master_id ||
6878         /* for the master meta information is necessary */
6879         ((uchar)buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT &&
6880          (uchar)buf[EVENT_TYPE_OFFSET] != ROTATE_EVENT))) ||
6881 
6882       /*
6883         Check whether it needs to be filtered based on domain_id
6884         (DO_DOMAIN_IDS/IGNORE_DOMAIN_IDS).
6885       */
6886       (mi->domain_id_filter.is_group_filtered() &&
6887        Log_event::is_group_event((Log_event_type)(uchar)
6888                                  buf[EVENT_TYPE_OFFSET])))
6889   {
6890     /*
6891       Do not write it to the relay log.
6892       a) We still want to increment mi->master_log_pos, so that we won't
6893       re-read this event from the master if the slave IO thread is now
6894       stopped/restarted (more efficient if the events we are ignoring are big
6895       LOAD DATA INFILE).
6896       b) We want to record that we are skipping events, for the information of
6897       the slave SQL thread, otherwise that thread may let
6898       rli->group_relay_log_pos stay too small if the last binlog's event is
6899       ignored.
6900       But events which were generated by this slave and which do not exist in
6901       the master's binlog (i.e. Format_desc, Rotate & Stop) should not increment
6902       mi->master_log_pos.
6903       If the event is originated remotely and is being filtered out by
6904       IGNORE_SERVER_IDS it increments mi->master_log_pos
6905       as well as rli->group_relay_log_pos.
6906     */
6907     if (!(s_id == global_system_variables.server_id &&
6908           !mi->rli.replicate_same_server_id) ||
6909         ((uchar)buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT &&
6910          (uchar)buf[EVENT_TYPE_OFFSET] != ROTATE_EVENT &&
6911          (uchar)buf[EVENT_TYPE_OFFSET] != STOP_EVENT))
6912     {
6913       mi->master_log_pos+= inc_pos;
6914       memcpy(rli->ign_master_log_name_end, mi->master_log_name, FN_REFLEN);
6915       DBUG_ASSERT(rli->ign_master_log_name_end[0]);
6916       rli->ign_master_log_pos_end= mi->master_log_pos;
6917       if (got_gtid_event)
6918         rli->ign_gtids.update(&event_gtid);
6919     }
6920     // the slave SQL thread needs to re-check
6921     rli->relay_log.signal_relay_log_update();
6922     DBUG_PRINT("info", ("master_log_pos: %lu, event originating from %u server, ignored",
6923                         (ulong) mi->master_log_pos, uint4korr(buf + SERVER_ID_OFFSET)));
6924   }
6925   else
6926   {
6927     if (likely(!rli->relay_log.write_event_buffer((uchar*)buf, event_len)))
6928     {
6929       mi->master_log_pos+= inc_pos;
6930       DBUG_PRINT("info", ("master_log_pos: %lu", (ulong) mi->master_log_pos));
6931       rli->relay_log.harvest_bytes_written(&rli->log_space_total);
6932     }
6933     else
6934     {
6935       error= ER_SLAVE_RELAY_LOG_WRITE_FAILURE;
6936     }
6937     rli->ign_master_log_name_end[0]= 0; // last event is not ignored
6938     if (got_gtid_event)
6939       rli->ign_gtids.remove_if_present(&event_gtid);
6940     if (save_buf != NULL)
6941       buf= save_buf;
6942   }
6943   mysql_mutex_unlock(log_lock);
6944 
6945   if (likely(!error) &&
6946       mi->using_gtid != Master_info::USE_GTID_NO &&
6947       mi->events_queued_since_last_gtid > 0 &&
6948       ( (mi->last_queued_gtid_standalone &&
6949          !Log_event::is_part_of_group((Log_event_type)(uchar)
6950                                       buf[EVENT_TYPE_OFFSET])) ||
6951         (!mi->last_queued_gtid_standalone &&
6952          ((uchar)buf[EVENT_TYPE_OFFSET] == XID_EVENT ||
6953           ((uchar)buf[EVENT_TYPE_OFFSET] == QUERY_EVENT &&    /* QUERY_COMPRESSED_EVENT would never be commmit or rollback */
6954            Query_log_event::peek_is_commit_rollback(buf, event_len,
6955                                                     checksum_alg))))))
6956     {
6957       /*
6958         The whole of the current event group is queued. So in case of
6959         reconnect we can start from after the current GTID.
6960       */
6961       if (mi->gtid_reconnect_event_skip_count)
6962       {
6963         bool first= true;
6964         StringBuffer<1024> gtid_text;
6965 
6966         rpl_slave_state_tostring_helper(&gtid_text, &mi->last_queued_gtid,
6967                                         &first);
6968         sql_print_error("Slave IO thread received a terminal event from "
6969                         "group %s whose retrieval was interrupted "
6970                         "with reconnect. We still had %llu events to read. "
6971                         "The number of originally read events was %llu",
6972                         gtid_text.ptr(),
6973                         mi->gtid_reconnect_event_skip_count,
6974                         mi->events_queued_since_last_gtid);
6975       }
6976       mi->gtid_current_pos.update(&mi->last_queued_gtid);
6977       mi->events_queued_since_last_gtid= 0;
6978 
6979       /* Reset the domain_id_filter flag. */
6980       mi->domain_id_filter.reset_filter();
6981     }
6982 
6983 skip_relay_logging:
6984 
6985 err:
6986   if (unlock_data_lock)
6987     mysql_mutex_unlock(&mi->data_lock);
6988   DBUG_PRINT("info", ("error: %d", error));
6989 
6990   /*
6991     Do not print ER_SLAVE_RELAY_LOG_WRITE_FAILURE error here, as the caller
6992     handle_slave_io() prints it on return.
6993   */
6994   if (unlikely(error) && error != ER_SLAVE_RELAY_LOG_WRITE_FAILURE)
6995     mi->report(ERROR_LEVEL, error, NULL, ER_DEFAULT(error),
6996                error_msg.ptr());
6997 
6998   if (unlikely(is_malloc))
6999     my_free((void *)new_buf);
7000 
7001   DBUG_RETURN(error);
7002 }
7003 
7004 
7005 void end_relay_log_info(Relay_log_info* rli)
7006 {
7007   mysql_mutex_t *log_lock;
7008   DBUG_ENTER("end_relay_log_info");
7009 
7010   rli->error_on_rli_init_info= false;
7011   if (!rli->inited)
7012     DBUG_VOID_RETURN;
7013   if (rli->info_fd >= 0)
7014   {
7015     end_io_cache(&rli->info_file);
7016     mysql_file_close(rli->info_fd, MYF(MY_WME));
7017     rli->info_fd = -1;
7018   }
7019   if (rli->cur_log_fd >= 0)
7020   {
7021     end_io_cache(&rli->cache_buf);
7022     mysql_file_close(rli->cur_log_fd, MYF(MY_WME));
7023     rli->cur_log_fd = -1;
7024   }
7025   rli->inited = 0;
7026   log_lock= rli->relay_log.get_log_lock();
7027   mysql_mutex_lock(log_lock);
7028   rli->relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT);
7029   rli->relay_log.harvest_bytes_written(&rli->log_space_total);
7030   mysql_mutex_unlock(log_lock);
7031   /*
7032     Delete the slave's temporary tables from memory.
7033     In the future there will be other actions than this, to ensure persistance
7034     of slave's temp tables after shutdown.
7035   */
7036   rli->close_temporary_tables();
7037   DBUG_VOID_RETURN;
7038 }
7039 
7040 
7041 /**
7042   Hook to detach the active VIO before closing a connection handle.
7043 
7044   The client API might close the connection (and associated data)
7045   in case it encounters a unrecoverable (network) error. This hook
7046   is called from the client code before the VIO handle is deleted
7047   allows the thread to detach the active vio so it does not point
7048   to freed memory.
7049 
7050   Other calls to THD::clear_active_vio throughout this module are
7051   redundant due to the hook but are left in place for illustrative
7052   purposes.
7053 */
7054 
7055 extern "C" void slave_io_thread_detach_vio()
7056 {
7057 #ifdef SIGNAL_WITH_VIO_CLOSE
7058   THD *thd= current_thd;
7059   if (thd && thd->slave_thread)
7060     thd->clear_active_vio();
7061 #endif
7062 }
7063 
7064 
7065 /*
7066   Try to connect until successful or slave killed
7067 
7068   SYNPOSIS
7069     safe_connect()
7070     thd                 Thread handler for slave
7071     mysql               MySQL connection handle
7072     mi                  Replication handle
7073 
7074   RETURN
7075     0   ok
7076     #   Error
7077 */
7078 
7079 static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi)
7080 {
7081   DBUG_ENTER("safe_connect");
7082 
7083   DBUG_RETURN(connect_to_master(thd, mysql, mi, 0, 0));
7084 }
7085 
7086 
7087 /*
7088   SYNPOSIS
7089     connect_to_master()
7090 
7091   IMPLEMENTATION
7092     Try to connect until successful or slave killed or we have retried
7093     master_retry_count times
7094 */
7095 
7096 static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
7097                              bool reconnect, bool suppress_warnings)
7098 {
7099   int slave_was_killed;
7100   int last_errno= -2;                           // impossible error
7101   ulong err_count=0;
7102   my_bool my_true= 1;
7103   DBUG_ENTER("connect_to_master");
7104   set_slave_max_allowed_packet(thd, mysql);
7105 #ifndef DBUG_OFF
7106   mi->events_till_disconnect = disconnect_slave_event_count;
7107 #endif
7108   ulong client_flag= CLIENT_REMEMBER_OPTIONS;
7109   if (opt_slave_compressed_protocol)
7110     client_flag|= CLIENT_COMPRESS;                /* We will use compression */
7111 
7112   mysql_options(mysql, MYSQL_OPT_CONNECT_TIMEOUT, (char *) &slave_net_timeout);
7113   mysql_options(mysql, MYSQL_OPT_READ_TIMEOUT, (char *) &slave_net_timeout);
7114   mysql_options(mysql, MYSQL_OPT_USE_THREAD_SPECIFIC_MEMORY,
7115                 (char*) &my_true);
7116 
7117 #ifdef HAVE_OPENSSL
7118   if (mi->ssl)
7119   {
7120     mysql_ssl_set(mysql,
7121                   mi->ssl_key[0]?mi->ssl_key:0,
7122                   mi->ssl_cert[0]?mi->ssl_cert:0,
7123                   mi->ssl_ca[0]?mi->ssl_ca:0,
7124                   mi->ssl_capath[0]?mi->ssl_capath:0,
7125                   mi->ssl_cipher[0]?mi->ssl_cipher:0);
7126     mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT,
7127                   &mi->ssl_verify_server_cert);
7128     mysql_options(mysql, MYSQL_OPT_SSL_CRLPATH,
7129                   mi->ssl_crlpath[0] ? mi->ssl_crlpath : 0);
7130     mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT,
7131                   &mi->ssl_verify_server_cert);
7132   }
7133 #endif
7134 
7135   /*
7136     If server's default charset is not supported (like utf16, utf32) as client
7137     charset, then set client charset to 'latin1' (default client charset).
7138   */
7139   if (is_supported_parser_charset(default_charset_info))
7140     mysql_options(mysql, MYSQL_SET_CHARSET_NAME, default_charset_info->csname);
7141   else
7142   {
7143     sql_print_information("'%s' can not be used as client character set. "
7144                           "'%s' will be used as default client character set "
7145                           "while connecting to master.",
7146                           default_charset_info->csname,
7147                           default_client_charset_info->csname);
7148     mysql_options(mysql, MYSQL_SET_CHARSET_NAME,
7149                   default_client_charset_info->csname);
7150   }
7151 
7152   /* This one is not strictly needed but we have it here for completeness */
7153   mysql_options(mysql, MYSQL_SET_CHARSET_DIR, (char *) charsets_dir);
7154 
7155   /* Set MYSQL_PLUGIN_DIR in case master asks for an external authentication plugin */
7156   if (opt_plugin_dir_ptr && *opt_plugin_dir_ptr)
7157     mysql_options(mysql, MYSQL_PLUGIN_DIR, opt_plugin_dir_ptr);
7158 
7159   /* we disallow empty users */
7160   if (mi->user[0] == 0)
7161   {
7162     mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
7163                ER_THD(thd, ER_SLAVE_FATAL_ERROR),
7164                "Invalid (empty) username when attempting to "
7165                "connect to the master server. Connection attempt "
7166                "terminated.");
7167     DBUG_RETURN(1);
7168   }
7169   while (!(slave_was_killed = io_slave_killed(mi)) &&
7170          (reconnect ? mysql_reconnect(mysql) != 0 :
7171           mysql_real_connect(mysql, mi->host, mi->user, mi->password, 0,
7172                              mi->port, 0, client_flag) == 0))
7173   {
7174     /* Don't repeat last error */
7175     if ((int)mysql_errno(mysql) != last_errno)
7176     {
7177       last_errno=mysql_errno(mysql);
7178       suppress_warnings= 0;
7179       mi->report(ERROR_LEVEL, last_errno, NULL,
7180                  "error %s to master '%s@%s:%d'"
7181                  " - retry-time: %d  maximum-retries: %lu  message: %s",
7182                  (reconnect ? "reconnecting" : "connecting"),
7183                  mi->user, mi->host, mi->port,
7184                  mi->connect_retry, master_retry_count,
7185                  mysql_error(mysql));
7186     }
7187     /*
7188       By default we try forever. The reason is that failure will trigger
7189       master election, so if the user did not set master_retry_count we
7190       do not want to have election triggered on the first failure to
7191       connect
7192     */
7193     if (++err_count == master_retry_count)
7194     {
7195       slave_was_killed=1;
7196       if (reconnect)
7197         change_rpl_status(RPL_ACTIVE_SLAVE,RPL_LOST_SOLDIER);
7198       break;
7199     }
7200     slave_sleep(thd,mi->connect_retry,io_slave_killed, mi);
7201   }
7202 
7203   if (!slave_was_killed)
7204   {
7205     mi->clear_error(); // clear possible left over reconnect error
7206     if (reconnect)
7207     {
7208       if (!suppress_warnings && global_system_variables.log_warnings)
7209         sql_print_information("Slave: connected to master '%s@%s:%d',"
7210                               "replication resumed in log '%s' at "
7211                               "position %llu", mi->user, mi->host, mi->port,
7212                               IO_RPL_LOG_NAME, mi->master_log_pos);
7213     }
7214     else
7215     {
7216       change_rpl_status(RPL_IDLE_SLAVE,RPL_ACTIVE_SLAVE);
7217       general_log_print(thd, COM_CONNECT_OUT, "%s@%s:%d",
7218                         mi->user, mi->host, mi->port);
7219     }
7220 #ifdef SIGNAL_WITH_VIO_CLOSE
7221     thd->set_active_vio(mysql->net.vio);
7222 #endif
7223   }
7224   mysql->reconnect= 1;
7225   DBUG_PRINT("exit",("slave_was_killed: %d", slave_was_killed));
7226   DBUG_RETURN(slave_was_killed);
7227 }
7228 
7229 
7230 /*
7231   safe_reconnect()
7232 
7233   IMPLEMENTATION
7234     Try to connect until successful or slave killed or we have retried
7235     master_retry_count times
7236 */
7237 
7238 static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
7239                           bool suppress_warnings)
7240 {
7241   DBUG_ENTER("safe_reconnect");
7242   DBUG_RETURN(connect_to_master(thd, mysql, mi, 1, suppress_warnings));
7243 }
7244 
7245 
7246 #ifdef NOT_USED
7247 MYSQL *rpl_connect_master(MYSQL *mysql)
7248 {
7249   Master_info *mi= my_pthread_getspecific_ptr(Master_info*, RPL_MASTER_INFO);
7250   bool allocated= false;
7251   my_bool my_true= 1;
7252   THD *thd;
7253 
7254   if (!mi)
7255   {
7256     sql_print_error("'rpl_connect_master' must be called in slave I/O thread context.");
7257     return NULL;
7258   }
7259   thd= mi->io_thd;
7260   if (!mysql)
7261   {
7262     if(!(mysql= mysql_init(NULL)))
7263     {
7264       sql_print_error("rpl_connect_master: failed in mysql_init()");
7265       return NULL;
7266     }
7267     allocated= true;
7268   }
7269 
7270   /*
7271     XXX: copied from connect_to_master, this function should not
7272     change the slave status, so we cannot use connect_to_master
7273     directly
7274 
7275     TODO: make this part a seperate function to eliminate duplication
7276   */
7277   mysql_options(mysql, MYSQL_OPT_CONNECT_TIMEOUT, (char *) &slave_net_timeout);
7278   mysql_options(mysql, MYSQL_OPT_READ_TIMEOUT, (char *) &slave_net_timeout);
7279   mysql_options(mysql, MYSQL_OPT_USE_THREAD_SPECIFIC_MEMORY,
7280                 (char*) &my_true);
7281 
7282 #ifdef HAVE_OPENSSL
7283   if (mi->ssl)
7284   {
7285     mysql_ssl_set(mysql,
7286                   mi->ssl_key[0]?mi->ssl_key:0,
7287                   mi->ssl_cert[0]?mi->ssl_cert:0,
7288                   mi->ssl_ca[0]?mi->ssl_ca:0,
7289                   mi->ssl_capath[0]?mi->ssl_capath:0,
7290                   mi->ssl_cipher[0]?mi->ssl_cipher:0);
7291     mysql_options(mysql, MYSQL_OPT_SSL_VERIFY_SERVER_CERT,
7292                   &mi->ssl_verify_server_cert);
7293   }
7294 #endif
7295 
7296   mysql_options(mysql, MYSQL_SET_CHARSET_NAME, default_charset_info->csname);
7297   /* This one is not strictly needed but we have it here for completeness */
7298   mysql_options(mysql, MYSQL_SET_CHARSET_DIR, (char *) charsets_dir);
7299 
7300   if (mi->user == NULL
7301       || mi->user[0] == 0
7302       || io_slave_killed( mi)
7303       || !mysql_real_connect(mysql, mi->host, mi->user, mi->password, 0,
7304                              mi->port, 0, 0))
7305   {
7306     if (!io_slave_killed( mi))
7307       sql_print_error("rpl_connect_master: error connecting to master: %s (server_error: %d)",
7308                       mysql_error(mysql), mysql_errno(mysql));
7309 
7310     if (allocated)
7311       mysql_close(mysql);                       // this will free the object
7312     return NULL;
7313   }
7314   return mysql;
7315 }
7316 #endif
7317 
7318 
7319 /*
7320   Called when we notice that the current "hot" log got rotated under our feet.
7321 */
7322 
7323 static IO_CACHE *reopen_relay_log(Relay_log_info *rli, const char **errmsg)
7324 {
7325   DBUG_ENTER("reopen_relay_log");
7326   DBUG_ASSERT(rli->cur_log != &rli->cache_buf);
7327   DBUG_ASSERT(rli->cur_log_fd == -1);
7328 
7329   IO_CACHE *cur_log = rli->cur_log=&rli->cache_buf;
7330   if ((rli->cur_log_fd=open_binlog(cur_log,rli->event_relay_log_name,
7331                                    errmsg)) <0)
7332     DBUG_RETURN(0);
7333   /*
7334     We want to start exactly where we was before:
7335     relay_log_pos       Current log pos
7336     pending             Number of bytes already processed from the event
7337   */
7338   rli->event_relay_log_pos= MY_MAX(rli->event_relay_log_pos, BIN_LOG_HEADER_SIZE);
7339   my_b_seek(cur_log,rli->event_relay_log_pos);
7340   DBUG_RETURN(cur_log);
7341 }
7342 
7343 
7344 /**
7345   Reads next event from the relay log.  Should be called from the
7346   slave IO thread.
7347 
7348   @param rli Relay_log_info structure for the slave IO thread.
7349 
7350   @return The event read, or NULL on error.  If an error occurs, the
7351   error is reported through the sql_print_information() or
7352   sql_print_error() functions.
7353 
7354   The size of the read event (in bytes) is returned in *event_size.
7355 */
7356 static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size)
7357 {
7358   Log_event* ev;
7359   Relay_log_info *rli= rgi->rli;
7360   IO_CACHE* cur_log = rli->cur_log;
7361   mysql_mutex_t *log_lock = rli->relay_log.get_log_lock();
7362   const char* errmsg=0;
7363   DBUG_ENTER("next_event");
7364 
7365   DBUG_ASSERT(rgi->thd != 0 && rgi->thd == rli->sql_driver_thd);
7366   *event_size= 0;
7367 
7368 #ifndef DBUG_OFF
7369   if (abort_slave_event_count && !rli->events_till_abort--)
7370     DBUG_RETURN(0);
7371 #endif
7372 
7373   /*
7374     For most operations we need to protect rli members with data_lock,
7375     so we assume calling function acquired this mutex for us and we will
7376     hold it for the most of the loop below However, we will release it
7377     whenever it is worth the hassle,  and in the cases when we go into a
7378     mysql_cond_wait() with the non-data_lock mutex
7379   */
7380   mysql_mutex_assert_owner(&rli->data_lock);
7381 
7382   while (!sql_slave_killed(rgi))
7383   {
7384     /*
7385       We can have two kinds of log reading:
7386       hot_log:
7387         rli->cur_log points at the IO_CACHE of relay_log, which
7388         is actively being updated by the I/O thread. We need to be careful
7389         in this case and make sure that we are not looking at a stale log that
7390         has already been rotated. If it has been, we reopen the log.
7391 
7392       The other case is much simpler:
7393         We just have a read only log that nobody else will be updating.
7394     */
7395     ulonglong old_pos;
7396     bool hot_log;
7397     if ((hot_log = (cur_log != &rli->cache_buf)))
7398     {
7399       DBUG_ASSERT(rli->cur_log_fd == -1); // foreign descriptor
7400       mysql_mutex_lock(log_lock);
7401 
7402       /*
7403         Reading xxx_file_id is safe because the log will only
7404         be rotated when we hold relay_log.LOCK_log
7405       */
7406       if (rli->relay_log.get_open_count() != rli->cur_log_old_open_count)
7407       {
7408         // The master has switched to a new log file; Reopen the old log file
7409         cur_log=reopen_relay_log(rli, &errmsg);
7410         mysql_mutex_unlock(log_lock);
7411         if (!cur_log)                           // No more log files
7412           goto err;
7413         hot_log=0;                              // Using old binary log
7414       }
7415     }
7416     /*
7417       As there is no guarantee that the relay is open (for example, an I/O
7418       error during a write by the slave I/O thread may have closed it), we
7419       have to test it.
7420     */
7421     if (!my_b_inited(cur_log))
7422       goto err;
7423 #ifndef DBUG_OFF
7424     {
7425       /* This is an assertion which sometimes fails, let's try to track it */
7426       DBUG_PRINT("info", ("my_b_tell(cur_log)=%llu rli->event_relay_log_pos=%llu",
7427                           my_b_tell(cur_log), rli->event_relay_log_pos));
7428       DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE);
7429       DBUG_ASSERT(rli->mi->using_parallel() ||
7430                   my_b_tell(cur_log) == rli->event_relay_log_pos);
7431     }
7432 #endif
7433     /*
7434       Relay log is always in new format - if the master is 3.23, the
7435       I/O thread will convert the format for us.
7436       A problem: the description event may be in a previous relay log. So if
7437       the slave has been shutdown meanwhile, we would have to look in old relay
7438       logs, which may even have been deleted. So we need to write this
7439       description event at the beginning of the relay log.
7440       When the relay log is created when the I/O thread starts, easy: the
7441       master will send the description event and we will queue it.
7442       But if the relay log is created by new_file(): then the solution is:
7443       MYSQL_BIN_LOG::open() will write the buffered description event.
7444     */
7445     old_pos= rli->event_relay_log_pos;
7446     if ((ev= Log_event::read_log_event(cur_log,
7447                                        rli->relay_log.description_event_for_exec,
7448                                        opt_slave_sql_verify_checksum)))
7449 
7450     {
7451       /*
7452         read it while we have a lock, to avoid a mutex lock in
7453         inc_event_relay_log_pos()
7454       */
7455       rli->future_event_relay_log_pos= my_b_tell(cur_log);
7456       *event_size= rli->future_event_relay_log_pos - old_pos;
7457 
7458       if (hot_log)
7459         mysql_mutex_unlock(log_lock);
7460       rli->sql_thread_caught_up= false;
7461       DBUG_RETURN(ev);
7462     }
7463     if (opt_reckless_slave)                     // For mysql-test
7464       cur_log->error = 0;
7465     if (unlikely(cur_log->error < 0))
7466     {
7467       errmsg = "slave SQL thread aborted because of I/O error";
7468       if (hot_log)
7469         mysql_mutex_unlock(log_lock);
7470       goto err;
7471     }
7472     if (!cur_log->error) /* EOF */
7473     {
7474       /*
7475         On a hot log, EOF means that there are no more updates to
7476         process and we must block until I/O thread adds some and
7477         signals us to continue
7478       */
7479       if (hot_log)
7480       {
7481         /*
7482           We say in Seconds_Behind_Master that we have "caught up". Note that
7483           for example if network link is broken but I/O slave thread hasn't
7484           noticed it (slave_net_timeout not elapsed), then we'll say "caught
7485           up" whereas we're not really caught up. Fixing that would require
7486           internally cutting timeout in smaller pieces in network read, no
7487           thanks. Another example: SQL has caught up on I/O, now I/O has read
7488           a new event and is queuing it; the false "0" will exist until SQL
7489           finishes executing the new event; it will be look abnormal only if
7490           the events have old timestamps (then you get "many", 0, "many").
7491 
7492           Transient phases like this can be fixed with implemeting
7493           Heartbeat event which provides the slave the status of the
7494           master at time the master does not have any new update to send.
7495           Seconds_Behind_Master would be zero only when master has no
7496           more updates in binlog for slave. The heartbeat can be sent
7497           in a (small) fraction of slave_net_timeout. Until it's done
7498           rli->sql_thread_caught_up is temporarely (for time of waiting for
7499           the following event) set whenever EOF is reached.
7500         */
7501         rli->sql_thread_caught_up= true;
7502 
7503         DBUG_ASSERT(rli->relay_log.get_open_count() ==
7504                     rli->cur_log_old_open_count);
7505 
7506         if (rli->ign_master_log_name_end[0])
7507         {
7508           /* We generate and return a Rotate, to make our positions advance */
7509           DBUG_PRINT("info",("seeing an ignored end segment"));
7510           ev= new Rotate_log_event(rli->ign_master_log_name_end,
7511                                    0, rli->ign_master_log_pos_end,
7512                                    Rotate_log_event::DUP_NAME);
7513           rli->ign_master_log_name_end[0]= 0;
7514           mysql_mutex_unlock(log_lock);
7515           if (unlikely(!ev))
7516           {
7517             errmsg= "Slave SQL thread failed to create a Rotate event "
7518               "(out of memory?), SHOW SLAVE STATUS may be inaccurate";
7519             goto err;
7520           }
7521           ev->server_id= 0; // don't be ignored by slave SQL thread
7522           DBUG_RETURN(ev);
7523         }
7524 
7525         if (rli->ign_gtids.count() && !rli->is_in_group())
7526         {
7527           /*
7528             We generate and return a Gtid_list, to update gtid_slave_pos,
7529             unless being in the middle of a group.
7530           */
7531           DBUG_PRINT("info",("seeing ignored end gtids"));
7532           ev= new Gtid_list_log_event(&rli->ign_gtids,
7533                                       Gtid_list_log_event::FLAG_IGN_GTIDS);
7534           rli->ign_gtids.reset();
7535           mysql_mutex_unlock(log_lock);
7536           if (unlikely(!ev))
7537           {
7538             errmsg= "Slave SQL thread failed to create a Gtid_list event "
7539               "(out of memory?), gtid_slave_pos may be inaccurate";
7540             goto err;
7541           }
7542           ev->server_id= 0; // don't be ignored by slave SQL thread
7543           ev->set_artificial_event(); // Don't mess up Exec_Master_Log_Pos
7544           DBUG_RETURN(ev);
7545         }
7546 
7547         /*
7548           We have to check sql_slave_killed() here an extra time.
7549           Otherwise we may miss a wakeup, since last check was done
7550           without holding LOCK_log.
7551         */
7552         if (sql_slave_killed(rgi))
7553         {
7554           mysql_mutex_unlock(log_lock);
7555           break;
7556         }
7557 
7558         /*
7559           We can, and should release data_lock while we are waiting for
7560           update. If we do not, show slave status will block
7561         */
7562         mysql_mutex_unlock(&rli->data_lock);
7563 
7564         /*
7565           Possible deadlock :
7566           - the I/O thread has reached log_space_limit
7567           - the SQL thread has read all relay logs, but cannot purge for some
7568           reason:
7569             * it has already purged all logs except the current one
7570             * there are other logs than the current one but they're involved in
7571             a transaction that finishes in the current one (or is not finished)
7572           Solution :
7573           Wake up the possibly waiting I/O thread, and set a boolean asking
7574           the I/O thread to temporarily ignore the log_space_limit
7575           constraint, because we do not want the I/O thread to block because of
7576           space (it's ok if it blocks for any other reason (e.g. because the
7577           master does not send anything). Then the I/O thread stops waiting
7578           and reads one more event and starts honoring log_space_limit again.
7579 
7580           If the SQL thread needs more events to be able to rotate the log (it
7581           might need to finish the current group first), then it can ask for
7582           one more at a time. Thus we don't outgrow the relay log indefinitely,
7583           but rather in a controlled manner, until the next rotate.
7584 
7585           When the SQL thread starts it sets ignore_log_space_limit to false.
7586           We should also reset ignore_log_space_limit to 0 when the user does
7587           RESET SLAVE, but in fact, no need as RESET SLAVE requires that the
7588           slave be stopped, and the SQL thread sets ignore_log_space_limit
7589           to 0 when
7590           it stops.
7591         */
7592         mysql_mutex_lock(&rli->log_space_lock);
7593 
7594         /*
7595           If we have reached the limit of the relay space and we
7596           are going to sleep, waiting for more events:
7597 
7598           1. If outside a group, SQL thread asks the IO thread
7599              to force a rotation so that the SQL thread purges
7600              logs next time it processes an event (thus space is
7601              freed).
7602 
7603           2. If in a group, SQL thread asks the IO thread to
7604              ignore the limit and queues yet one more event
7605              so that the SQL thread finishes the group and
7606              is are able to rotate and purge sometime soon.
7607          */
7608         if (rli->log_space_limit &&
7609             rli->log_space_limit < rli->log_space_total)
7610         {
7611           /* force rotation if not in an unfinished group */
7612           rli->sql_force_rotate_relay= !rli->is_in_group();
7613 
7614           /* ask for one more event */
7615           rli->ignore_log_space_limit= true;
7616         }
7617 
7618         mysql_cond_broadcast(&rli->log_space_cond);
7619         mysql_mutex_unlock(&rli->log_space_lock);
7620         // Note that wait_for_update_relay_log unlocks lock_log !
7621         rli->relay_log.wait_for_update_relay_log(rli->sql_driver_thd);
7622         // re-acquire data lock since we released it earlier
7623         mysql_mutex_lock(&rli->data_lock);
7624         rli->sql_thread_caught_up= false;
7625         continue;
7626       }
7627       /*
7628         If the log was not hot, we need to move to the next log in
7629         sequence. The next log could be hot or cold, we deal with both
7630         cases separately after doing some common initialization
7631       */
7632       end_io_cache(cur_log);
7633       DBUG_ASSERT(rli->cur_log_fd >= 0);
7634       mysql_file_close(rli->cur_log_fd, MYF(MY_WME));
7635       rli->cur_log_fd = -1;
7636       rli->last_inuse_relaylog->completed= true;
7637       rli->relay_log.description_event_for_exec->reset_crypto();
7638 
7639       if (relay_log_purge)
7640       {
7641         /*
7642           purge_first_log will properly set up relay log coordinates in rli.
7643           If the group's coordinates are equal to the event's coordinates
7644           (i.e. the relay log was not rotated in the middle of a group),
7645           we can purge this relay log too.
7646           We do ulonglong and string comparisons, this may be slow but
7647           - purging the last relay log is nice (it can save 1GB of disk), so we
7648           like to detect the case where we can do it, and given this,
7649           - I see no better detection method
7650           - purge_first_log is not called that often
7651         */
7652         if (rli->relay_log.purge_first_log
7653             (rli,
7654              rli->group_relay_log_pos == rli->event_relay_log_pos
7655              && !strcmp(rli->group_relay_log_name,rli->event_relay_log_name)))
7656         {
7657           errmsg = "Error purging processed logs";
7658           goto err;
7659         }
7660       }
7661       else
7662       {
7663         /*
7664           If hot_log is set, then we already have a lock on
7665           LOCK_log.  If not, we have to get the lock.
7666 
7667           According to Sasha, the only time this code will ever be executed
7668           is if we are recovering from a bug.
7669         */
7670         if (rli->relay_log.find_next_log(&rli->linfo, !hot_log))
7671         {
7672           errmsg = "error switching to the next log";
7673           goto err;
7674         }
7675         rli->event_relay_log_pos = BIN_LOG_HEADER_SIZE;
7676         strmake_buf(rli->event_relay_log_name,rli->linfo.log_file_name);
7677         if (rli->flush())
7678         {
7679           errmsg= "error flushing relay log";
7680           goto err;
7681         }
7682       }
7683       /*
7684         Now we want to open this next log. To know if it's a hot log (the one
7685         being written by the I/O thread now) or a cold log, we can use
7686         is_active(); if it is hot, we use the I/O cache; if it's cold we open
7687         the file normally. But if is_active() reports that the log is hot, this
7688         may change between the test and the consequence of the test. So we may
7689         open the I/O cache whereas the log is now cold, which is nonsense.
7690         To guard against this, we need to have LOCK_log.
7691       */
7692 
7693       DBUG_PRINT("info",("hot_log: %d",hot_log));
7694       if (!hot_log) /* if hot_log, we already have this mutex */
7695         mysql_mutex_lock(log_lock);
7696       if (rli->relay_log.is_active(rli->linfo.log_file_name))
7697       {
7698         rli->cur_log= cur_log= rli->relay_log.get_log_file();
7699         rli->cur_log_old_open_count= rli->relay_log.get_open_count();
7700         DBUG_ASSERT(rli->cur_log_fd == -1);
7701 
7702         /*
7703            When the SQL thread is [stopped and] (re)started the
7704            following may happen:
7705 
7706            1. Log was hot at stop time and remains hot at restart
7707 
7708               SQL thread reads again from hot_log (SQL thread was
7709               reading from the active log when it was stopped and the
7710               very same log is still active on SQL thread restart).
7711 
7712               In this case, my_b_seek is performed on cur_log, while
7713               cur_log points to relay_log.get_log_file();
7714 
7715            2. Log was hot at stop time but got cold before restart
7716 
7717               The log was hot when SQL thread stopped, but it is not
7718               anymore when the SQL thread restarts.
7719 
7720               In this case, the SQL thread reopens the log, using
7721               cache_buf, ie, cur_log points to &cache_buf, and thence
7722               its coordinates are reset.
7723 
7724            3. Log was already cold at stop time
7725 
7726               The log was not hot when the SQL thread stopped, and, of
7727               course, it will not be hot when it restarts.
7728 
7729               In this case, the SQL thread opens the cold log again,
7730               using cache_buf, ie, cur_log points to &cache_buf, and
7731               thence its coordinates are reset.
7732 
7733            4. Log was hot at stop time, DBA changes to previous cold
7734               log and restarts SQL thread
7735 
7736               The log was hot when the SQL thread was stopped, but the
7737               user changed the coordinates of the SQL thread to
7738               restart from a previous cold log.
7739 
7740               In this case, at start time, cur_log points to a cold
7741               log, opened using &cache_buf as cache, and coordinates
7742               are reset. However, as it moves on to the next logs, it
7743               will eventually reach the hot log. If the hot log is the
7744               same at the time the SQL thread was stopped, then
7745               coordinates were not reset - the cur_log will point to
7746               relay_log.get_log_file(), and not a freshly opened
7747               IO_CACHE through cache_buf. For this reason we need to
7748               deploy a my_b_seek before calling check_binlog_magic at
7749               this point of the code (see: BUG#55263 for more
7750               details).
7751 
7752           NOTES:
7753             - We must keep the LOCK_log to read the 4 first bytes, as
7754               this is a hot log (same as when we call read_log_event()
7755               above: for a hot log we take the mutex).
7756 
7757             - Because of scenario #4 above, we need to have a
7758               my_b_seek here. Otherwise, we might hit the assertion
7759               inside check_binlog_magic.
7760         */
7761 
7762         my_b_seek(cur_log, (my_off_t) 0);
7763         if (check_binlog_magic(cur_log,&errmsg))
7764         {
7765           if (!hot_log)
7766             mysql_mutex_unlock(log_lock);
7767           goto err;
7768         }
7769         if (rli->alloc_inuse_relaylog(rli->linfo.log_file_name))
7770         {
7771           if (!hot_log)
7772             mysql_mutex_unlock(log_lock);
7773           goto err;
7774         }
7775         if (!hot_log)
7776           mysql_mutex_unlock(log_lock);
7777         continue;
7778       }
7779       if (!hot_log)
7780         mysql_mutex_unlock(log_lock);
7781       /*
7782         if we get here, the log was not hot, so we will have to open it
7783         ourselves. We are sure that the log is still not hot now (a log can get
7784         from hot to cold, but not from cold to hot). No need for LOCK_log.
7785       */
7786       // open_binlog() will check the magic header
7787       if ((rli->cur_log_fd=open_binlog(cur_log,rli->linfo.log_file_name,
7788                                        &errmsg)) <0)
7789         goto err;
7790       if (rli->alloc_inuse_relaylog(rli->linfo.log_file_name))
7791         goto err;
7792     }
7793     else
7794     {
7795       /*
7796         Read failed with a non-EOF error.
7797         TODO: come up with something better to handle this error
7798       */
7799       if (hot_log)
7800         mysql_mutex_unlock(log_lock);
7801       sql_print_error("Slave SQL thread: I/O error reading \
7802 event(errno: %d  cur_log->error: %d)",
7803                       my_errno,cur_log->error);
7804       // set read position to the beginning of the event
7805       my_b_seek(cur_log,rli->event_relay_log_pos);
7806       /* otherwise, we have had a partial read */
7807       errmsg = "Aborting slave SQL thread because of partial event read";
7808       break;                                    // To end of function
7809     }
7810   }
7811   if (!errmsg && global_system_variables.log_warnings)
7812   {
7813     sql_print_information("Error reading relay log event: %s",
7814                           "slave SQL thread was killed");
7815     DBUG_RETURN(0);
7816   }
7817 
7818 err:
7819   if (errmsg)
7820     sql_print_error("Error reading relay log event: %s", errmsg);
7821   DBUG_RETURN(0);
7822 }
7823 #ifdef WITH_WSREP
7824 enum Log_event_type wsrep_peak_event(rpl_group_info *rgi, ulonglong* event_size)
7825 {
7826   enum Log_event_type ev_type;
7827 
7828   mysql_mutex_lock(&rgi->rli->data_lock);
7829 
7830   unsigned long long event_pos= rgi->event_relay_log_pos;
7831   unsigned long long orig_future_pos= rgi->future_event_relay_log_pos;
7832   unsigned long long future_pos= rgi->future_event_relay_log_pos;
7833 
7834   /* scan the log to read next event and we skip
7835      annotate events. */
7836   do {
7837     my_b_seek(rgi->rli->cur_log, future_pos);
7838     rgi->rli->event_relay_log_pos= future_pos;
7839     rgi->event_relay_log_pos= future_pos;
7840     Log_event* ev= next_event(rgi, event_size);
7841     ev_type= (ev) ? ev->get_type_code() : UNKNOWN_EVENT;
7842     delete ev;
7843     future_pos+= *event_size;
7844   } while (ev_type == ANNOTATE_ROWS_EVENT || ev_type == XID_EVENT);
7845 
7846   /* scan the log back and re-set the positions to original values */
7847   rgi->rli->event_relay_log_pos= event_pos;
7848   rgi->event_relay_log_pos= event_pos;
7849   my_b_seek(rgi->rli->cur_log, orig_future_pos);
7850 
7851   mysql_mutex_unlock(&rgi->rli->data_lock);
7852 
7853   return ev_type;
7854 }
7855 #endif /* WITH_WSREP */
7856 /*
7857   Rotate a relay log (this is used only by FLUSH LOGS; the automatic rotation
7858   because of size is simpler because when we do it we already have all relevant
7859   locks; here we don't, so this function is mainly taking locks).
7860   Returns nothing as we cannot catch any error (MYSQL_BIN_LOG::new_file()
7861   is void).
7862 */
7863 
7864 int rotate_relay_log(Master_info* mi)
7865 {
7866   DBUG_ENTER("rotate_relay_log");
7867   Relay_log_info* rli= &mi->rli;
7868   int error= 0;
7869 
7870   DBUG_EXECUTE_IF("crash_before_rotate_relaylog", DBUG_SUICIDE(););
7871 
7872   /*
7873      We need to test inited because otherwise, new_file() will attempt to lock
7874      LOCK_log, which may not be inited (if we're not a slave).
7875   */
7876   if (!rli->inited)
7877   {
7878     DBUG_PRINT("info", ("rli->inited == 0"));
7879     goto end;
7880   }
7881 
7882   /* If the relay log is closed, new_file() will do nothing. */
7883   if ((error= rli->relay_log.new_file()))
7884     goto end;
7885 
7886   /*
7887     We harvest now, because otherwise BIN_LOG_HEADER_SIZE will not immediately
7888     be counted, so imagine a succession of FLUSH LOGS  and assume the slave
7889     threads are started:
7890     relay_log_space decreases by the size of the deleted relay log, but does
7891     not increase, so flush-after-flush we may become negative, which is wrong.
7892     Even if this will be corrected as soon as a query is replicated on the
7893     slave (because the I/O thread will then call harvest_bytes_written() which
7894     will harvest all these BIN_LOG_HEADER_SIZE we forgot), it may give strange
7895     output in SHOW SLAVE STATUS meanwhile. So we harvest now.
7896     If the log is closed, then this will just harvest the last writes, probably
7897     0 as they probably have been harvested.
7898 
7899     Note that it needs to be protected by mi->data_lock.
7900   */
7901   mysql_mutex_assert_owner(&mi->data_lock);
7902   rli->relay_log.harvest_bytes_written(&rli->log_space_total);
7903 end:
7904   DBUG_RETURN(error);
7905 }
7906 
7907 
7908 /**
7909    Detects, based on master's version (as found in the relay log), if master
7910    has a certain bug.
7911    @param rli Relay_log_info which tells the master's version
7912    @param bug_id Number of the bug as found in bugs.mysql.com
7913    @param report bool report error message, default TRUE
7914 
7915    @param pred Predicate function that will be called with @c param to
7916    check for the bug. If the function return @c true, the bug is present,
7917    otherwise, it is not.
7918 
7919    @param param  State passed to @c pred function.
7920 
7921    @return TRUE if master has the bug, FALSE if it does not.
7922 */
7923 bool rpl_master_has_bug(const Relay_log_info *rli, uint bug_id, bool report,
7924                         bool (*pred)(const void *), const void *param)
7925 {
7926   struct st_version_range_for_one_bug {
7927     uint        bug_id;
7928     Version introduced_in; // first version with bug
7929     Version fixed_in;      // first version with fix
7930   };
7931   static struct st_version_range_for_one_bug versions_for_all_bugs[]=
7932   {
7933     {24432, { 5, 0, 24 }, { 5, 0, 38 } },
7934     {24432, { 5, 1, 12 }, { 5, 1, 17 } },
7935     {33029, { 5, 0,  0 }, { 5, 0, 58 } },
7936     {33029, { 5, 1,  0 }, { 5, 1, 12 } },
7937     {37426, { 5, 1,  0 }, { 5, 1, 26 } },
7938   };
7939   const Version &master_ver=
7940     rli->relay_log.description_event_for_exec->server_version_split;
7941 
7942   for (uint i= 0;
7943        i < sizeof(versions_for_all_bugs)/sizeof(*versions_for_all_bugs);i++)
7944   {
7945     const Version &introduced_in= versions_for_all_bugs[i].introduced_in;
7946     const Version &fixed_in= versions_for_all_bugs[i].fixed_in;
7947     if ((versions_for_all_bugs[i].bug_id == bug_id) &&
7948         introduced_in <= master_ver &&
7949         fixed_in > master_ver &&
7950         (pred == NULL || (*pred)(param)))
7951     {
7952       if (!report)
7953 	return TRUE;
7954       // a short message for SHOW SLAVE STATUS (message length constraints)
7955       my_printf_error(ER_UNKNOWN_ERROR, "master may suffer from"
7956                       " http://bugs.mysql.com/bug.php?id=%u"
7957                       " so slave stops; check error log on slave"
7958                       " for more info", MYF(0), bug_id);
7959       // a verbose message for the error log
7960       rli->report(ERROR_LEVEL, ER_UNKNOWN_ERROR, NULL,
7961                   "According to the master's version ('%s'),"
7962                   " it is probable that master suffers from this bug:"
7963                       " http://bugs.mysql.com/bug.php?id=%u"
7964                       " and thus replicating the current binary log event"
7965                       " may make the slave's data become different from the"
7966                       " master's data."
7967                       " To take no risk, slave refuses to replicate"
7968                       " this event and stops."
7969                       " We recommend that all updates be stopped on the"
7970                       " master and slave, that the data of both be"
7971                       " manually synchronized,"
7972                       " that master's binary logs be deleted,"
7973                       " that master be upgraded to a version at least"
7974                       " equal to '%d.%d.%d'. Then replication can be"
7975                       " restarted.",
7976                       rli->relay_log.description_event_for_exec->server_version,
7977                       bug_id,
7978                       fixed_in[0], fixed_in[1], fixed_in[2]);
7979       return TRUE;
7980     }
7981   }
7982   return FALSE;
7983 }
7984 
7985 /**
7986    BUG#33029, For all 5.0 up to 5.0.58 exclusive, and 5.1 up to 5.1.12
7987    exclusive, if one statement in a SP generated AUTO_INCREMENT value
7988    by the top statement, all statements after it would be considered
7989    generated AUTO_INCREMENT value by the top statement, and a
7990    erroneous INSERT_ID value might be associated with these statement,
7991    which could cause duplicate entry error and stop the slave.
7992 
7993    Detect buggy master to work around.
7994  */
7995 bool rpl_master_erroneous_autoinc(THD *thd)
7996 {
7997   if (thd->rgi_slave)
7998   {
7999     DBUG_EXECUTE_IF("simulate_bug33029", return TRUE;);
8000     return rpl_master_has_bug(thd->rgi_slave->rli, 33029, FALSE, NULL, NULL);
8001   }
8002   return FALSE;
8003 }
8004 
8005 
8006 static bool get_row_event_stmt_end(const char* buf,
8007                                    const Format_description_log_event *fdle)
8008 {
8009   uint8 const common_header_len= fdle->common_header_len;
8010   Log_event_type event_type= (Log_event_type)(uchar)buf[EVENT_TYPE_OFFSET];
8011 
8012   uint8 const post_header_len= fdle->post_header_len[event_type-1];
8013   const char *flag_start= buf + common_header_len;
8014   /*
8015     The term 4 below signifies that master is of 'an intermediate source', see
8016     Rows_log_event::Rows_log_event.
8017   */
8018   flag_start += RW_MAPID_OFFSET + ((post_header_len == 6) ? 4 :  RW_FLAGS_OFFSET);
8019 
8020   return (uint2korr(flag_start) & Rows_log_event::STMT_END_F) != 0;
8021 }
8022 
8023 
8024 /*
8025   Reset log event tracking data.
8026 */
8027 
8028 void Rows_event_tracker::reset()
8029 {
8030   binlog_file_name[0]= 0;
8031   first_seen= last_seen= 0;
8032   stmt_end_seen= false;
8033 }
8034 
8035 
8036 /*
8037   Update  log event tracking data.
8038 
8039   The first- and last- seen event binlog position get memorized, as
8040   well as the end-of-statement status of the last one.
8041 */
8042 
8043 void Rows_event_tracker::update(const char* file_name, my_off_t pos,
8044                                 const char* buf,
8045                                 const Format_description_log_event *fdle)
8046 {
8047   if (!first_seen)
8048   {
8049     first_seen= pos;
8050     strmake(binlog_file_name, file_name, sizeof(binlog_file_name) - 1);
8051   }
8052   last_seen= pos;
8053   DBUG_ASSERT(stmt_end_seen == 0);              // We can only have one
8054   stmt_end_seen= get_row_event_stmt_end(buf, fdle);
8055 };
8056 
8057 
8058 /**
8059   The function is called at next event reading
8060   after a sequence of Rows- log-events. It checks the end-of-statement status
8061   of the past sequence to report on any isssue.
8062   In the positive case the tracker gets reset.
8063 
8064   @return true  when the Rows- event group integrity found compromised,
8065                 false otherwise.
8066 */
8067 bool Rows_event_tracker::check_and_report(const char* file_name,
8068                                           my_off_t pos)
8069 {
8070   if (last_seen)
8071   {
8072     // there was at least one "block" event previously
8073     if (!stmt_end_seen)
8074     {
8075         sql_print_error("Slave IO thread did not receive an expected "
8076                         "Rows-log end-of-statement for event starting "
8077                         "at log '%s' position %llu "
8078                         "whose last block was seen at log '%s' position %llu. "
8079                         "The end-of-statement should have been delivered "
8080                         "before the current one at log '%s' position %llu",
8081                         binlog_file_name, first_seen,
8082                         binlog_file_name, last_seen, file_name, pos);
8083         return true;
8084     }
8085     reset();
8086   }
8087 
8088   return false;
8089 }
8090 
8091 /**
8092   @} (end of group Replication)
8093 */
8094 
8095 #endif /* HAVE_REPLICATION */
8096