1 /* Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software Foundation,
21    51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22 
23 #ifndef RPL_RLI_H
24 #define RPL_RLI_H
25 
26 #include "sql_priv.h"
27 #include "rpl_info.h"
28 #include "rpl_utility.h"
29 #include "rpl_tblmap.h"
30 #include "rpl_reporting.h"
31 #include "rpl_utility.h"
32 #include "log.h"                         /* LOG_INFO */
33 #include "binlog.h"                      /* MYSQL_BIN_LOG */
34 #include "sql_class.h"                   /* THD */
35 
36 struct RPL_TABLE_LIST;
37 class Master_info;
38 extern uint sql_slave_skip_counter;
39 
40 /*******************************************************************************
41 Replication SQL Thread
42 
43 Relay_log_info contains:
44   - the current relay log
45   - the current relay log offset
46   - master log name
47   - master log sequence corresponding to the last update
48   - misc information specific to the SQL thread
49 
50 Relay_log_info is initialized from a repository, i.e. table or file, if there is
51 one. Otherwise, data members are intialized with defaults by calling
52 init_relay_log_info().
53 
54 The relay.info table/file shall be updated whenever: (i) the relay log file
55 is rotated, (ii) SQL Thread is stopped, (iii) while processing a Xid_log_event,
56 (iv) after a Query_log_event (i.e. commit or rollback) and (v) after processing
57 any statement written to the binary log without a transaction context.
58 
59 The Xid_log_event is a commit for transactional engines and must be handled
60 differently to provide reliability/data integrity. In this case, positions
61 are updated within the context of the current transaction. So
62 
63   . If the relay.info is stored in a transactional repository and the server
64   crashes before successfully committing the transaction the changes to the
65   position table will be rolled back along with the data.
66 
67   . If the relay.info is stored in a non-transactional repository, for instance,
68   a file or a system table created using MyIsam, and the server crashes before
69   successfully committing the transaction the changes to the position table
70   will not be rolled back but data will.
71 
72 In particular, when there are mixed transactions, i.e a transaction that updates
73 both transaction and non-transactional engines, the Xid_log_event is still used
74 but reliability/data integrity cannot be achieved as we shall explain in what
75 follows.
76 
77 Changes to non-transactional engines, such as MyIsam, cannot be rolled back if a
78 failure happens. For that reason, there is no point in updating the positions
79 within the boundaries of any on-going transaction. This is true for both commit
80 and rollback. If a failure happens after processing the pseudo-transaction but
81 before updating the positions, the transaction will be re-executed when the
82 slave is up most likely causing an error that needs to be manually circumvented.
83 This is a well-known issue when non-transactional statements are executed.
84 
85 Specifically, if rolling back any transaction, positions are updated outside the
86 transaction boundaries. However, there may be a problem in this scenario even
87 when only transactional engines are updated. This happens because if there is a
88 rollback and such transaction is written to the binary log, a non-transactional
89 engine was updated or a temporary table was created or dropped within its
90 boundaries.
91 
92 In particular, in both STATEMENT and MIXED logging formats, this happens because
93 any temporary table is automatically dropped after a shutdown/startup.
94 See BUG#26945 for further details.
95 
96 Statements written to the binary log outside the boundaries of a transaction are
97 DDLs or maintenance commands which are not transactional. These means that they
98 cannot be rolled back if a failure happens. In such cases, the positions are
99 updated after processing the events. If a failure happens after processing the
100 statement but before updating the positions, the statement will be
101 re-executed when the slave is up most likely causing an error that needs to be
102 manually circumvented. This is a well-known issue when non-transactional
103 statements are executed.
104 
105 The --sync-relay-log-info does not have effect when a system table, either
106 transactional or non-transactional is used.
107 
108 To correctly recovery from failures, one should combine transactional system
109 tables along with the --relay-log-recovery.
110 *******************************************************************************/
111 class Relay_log_info : public Rpl_info
112 {
113   friend class Rpl_info_factory;
114 
115 public:
116   /**
117      Flags for the state of the replication.
118    */
119   enum enum_state_flag {
120     /** The replication thread is inside a statement */
121     IN_STMT,
122 
123     /** Flag counter.  Should always be last */
124     STATE_FLAGS_COUNT
125   };
126 
127   /*
128     The SQL thread owns one Relay_log_info, and each client that has
129     executed a BINLOG statement owns one Relay_log_info. This function
130     returns zero for the Relay_log_info object that belongs to the SQL
131     thread and nonzero for Relay_log_info objects that belong to
132     clients.
133   */
belongs_to_client()134   inline bool belongs_to_client()
135   {
136     DBUG_ASSERT(info_thd);
137     return !info_thd->slave_thread;
138   }
139 
140   /*
141     If true, events with the same server id should be replicated. This
142     field is set on creation of a relay log info structure by copying
143     the value of ::replicate_same_server_id and can be overridden if
144     necessary. For example of when this is done, check sql_binlog.cc,
145     where the BINLOG statement can be used to execute "raw" events.
146    */
147   bool replicate_same_server_id;
148 
149   /*** The following variables can only be read when protect by data lock ****/
150   /*
151     cur_log_fd - file descriptor of the current read  relay log
152   */
153   File cur_log_fd;
154   /*
155     Protected with internal locks.
156     Must get data_lock when resetting the logs.
157   */
158   MYSQL_BIN_LOG relay_log;
159   LOG_INFO linfo;
160 
161   /*
162    cur_log
163      Pointer that either points at relay_log.get_log_file() or
164      &rli->cache_buf, depending on whether the log is hot or there was
165      the need to open a cold relay_log.
166 
167    cache_buf
168      IO_CACHE used when opening cold relay logs.
169    */
170   IO_CACHE cache_buf,*cur_log;
171 
172   /*
173     Identifies when the recovery process is going on.
174     See sql/slave.cc:init_recovery for further details.
175   */
176   bool is_relay_log_recovery;
177 
178   /* The following variables are safe to read any time */
179 
180   /*
181     When we restart slave thread we need to have access to the previously
182     created temporary tables. Modified only on init/end and by the SQL
183     thread, read only by SQL thread.
184   */
185   TABLE *save_temporary_tables;
186 
187   /* parent Master_info structure */
188   Master_info *mi;
189 
190   /*
191     Needed to deal properly with cur_log getting closed and re-opened with
192     a different log under our feet
193   */
194   uint32 cur_log_old_open_count;
195 
196   /*
197     If on init_info() call error_on_rli_init_info is true that means
198     that previous call to init_info() terminated with an error, RESET
199     SLAVE must be executed and the problem fixed manually.
200    */
201   bool error_on_rli_init_info;
202 
203   /*
204     Let's call a group (of events) :
205       - a transaction
206       or
207       - an autocommiting query + its associated events (INSERT_ID,
208     TIMESTAMP...)
209     We need these rli coordinates :
210     - relay log name and position of the beginning of the group we currently are
211     executing. Needed to know where we have to restart when replication has
212     stopped in the middle of a group (which has been rolled back by the slave).
213     - relay log name and position just after the event we have just
214     executed. This event is part of the current group.
215     Formerly we only had the immediately above coordinates, plus a 'pending'
216     variable, but this dealt wrong with the case of a transaction starting on a
217     relay log and finishing (commiting) on another relay log. Case which can
218     happen when, for example, the relay log gets rotated because of
219     max_binlog_size.
220   */
221 protected:
222   char group_relay_log_name[FN_REFLEN];
223   ulonglong group_relay_log_pos;
224   char event_relay_log_name[FN_REFLEN];
225   ulonglong event_relay_log_pos;
226   ulonglong future_event_relay_log_pos;
227 
228   /*
229      Original log name and position of the group we're currently executing
230      (whose coordinates are group_relay_log_name/pos in the relay log)
231      in the master's binlog. These concern the *group*, because in the master's
232      binlog the log_pos that comes with each event is the position of the
233      beginning of the group.
234 
235     Note: group_master_log_name, group_master_log_pos must only be
236     written from the thread owning the Relay_log_info (SQL thread if
237     !belongs_to_client(); client thread executing BINLOG statement if
238     belongs_to_client()).
239   */
240   char group_master_log_name[FN_REFLEN];
241   volatile my_off_t group_master_log_pos;
242 
243   /*
244     When it commits, InnoDB internally stores the master log position it has
245     processed so far; the position to store is the one of the end of the
246     committing event (the COMMIT query event, or the event if in autocommit
247     mode).
248   */
249 #if MYSQL_VERSION_ID < 40100
250   ulonglong future_master_log_pos;
251 #else
252   ulonglong future_group_master_log_pos;
253 #endif
254 
255 private:
256   Gtid_set gtid_set;
257   /* Last gtid retrieved by IO thread */
258   Gtid last_retrieved_gtid;
259 
260 public:
get_last_retrieved_gtid()261   Gtid *get_last_retrieved_gtid() { return &last_retrieved_gtid; }
set_last_retrieved_gtid(Gtid gtid)262   void set_last_retrieved_gtid(Gtid gtid) { last_retrieved_gtid= gtid; }
add_logged_gtid(rpl_sidno sidno,rpl_gno gno)263   int add_logged_gtid(rpl_sidno sidno, rpl_gno gno)
264   {
265     int ret= 0;
266     global_sid_lock->assert_some_lock();
267     DBUG_ASSERT(sidno <= global_sid_map->get_max_sidno());
268     gtid_set.ensure_sidno(sidno);
269     if (gtid_set._add_gtid(sidno, gno) != RETURN_STATUS_OK)
270       ret= 1;
271     return ret;
272   }
get_gtid_set()273   const Gtid_set *get_gtid_set() const { return &gtid_set; }
274 
275   int init_relay_log_pos(const char* log,
276                          ulonglong pos, bool need_data_lock,
277                          const char** errmsg,
278                          bool keep_looking_for_fd);
279 
280   /*
281     Handling of the relay_log_space_limit optional constraint.
282     ignore_log_space_limit is used to resolve a deadlock between I/O and SQL
283     threads, the SQL thread sets it to unblock the I/O thread and make it
284     temporarily forget about the constraint.
285   */
286   ulonglong log_space_limit,log_space_total;
287   bool ignore_log_space_limit;
288 
289   /*
290     Used by the SQL thread to instructs the IO thread to rotate
291     the logs when the SQL thread needs to purge to release some
292     disk space.
293    */
294   bool sql_force_rotate_relay;
295 
296   time_t last_master_timestamp;
297 
298   void clear_until_condition();
299 
300   /**
301     Reset the delay.
302     This is used by RESET SLAVE to clear the delay.
303   */
clear_sql_delay()304   void clear_sql_delay()
305   {
306     sql_delay= 0;
307   }
308 
309   /*
310     Needed for problems when slave stops and we want to restart it
311     skipping one or more events in the master log that have caused
312     errors, and have been manually applied by DBA already.
313   */
314   volatile uint32 slave_skip_counter;
315   volatile ulong abort_pos_wait;	/* Incremented on change master */
316   mysql_mutex_t log_space_lock;
317   mysql_cond_t log_space_cond;
318 
319   /*
320      Condition and its parameters from START SLAVE UNTIL clause.
321 
322      UNTIL condition is tested with is_until_satisfied() method that is
323      called by exec_relay_log_event(). is_until_satisfied() caches the result
324      of the comparison of log names because log names don't change very often;
325      this cache is invalidated by parts of code which change log names with
326      notify_*_log_name_updated() methods. (They need to be called only if SQL
327      thread is running).
328    */
329   enum {UNTIL_NONE= 0, UNTIL_MASTER_POS, UNTIL_RELAY_POS,
330         UNTIL_SQL_BEFORE_GTIDS, UNTIL_SQL_AFTER_GTIDS,
331         UNTIL_SQL_AFTER_MTS_GAPS, UNTIL_DONE
332 }
333     until_condition;
334   char until_log_name[FN_REFLEN];
335   ulonglong until_log_pos;
336   /* extension extracted from log_name and converted to int */
337   ulong until_log_name_extension;
338   /**
339     The START SLAVE UNTIL SQL_*_GTIDS initializes until_sql_gtids.
340     Each time a gtid is about to be processed, we check if it is in the
341     set. Depending on until_condition, SQL thread is stopped before or
342     after applying the gtid.
343   */
344   Gtid_set until_sql_gtids;
345   /*
346     True if the current event is the first gtid event to be processed
347     after executing START SLAVE UNTIL SQL_*_GTIDS.
348   */
349   bool until_sql_gtids_first_event;
350   /*
351      Cached result of comparison of until_log_name and current log name
352      -2 means unitialised, -1,0,1 are comarison results
353   */
354   enum
355   {
356     UNTIL_LOG_NAMES_CMP_UNKNOWN= -2, UNTIL_LOG_NAMES_CMP_LESS= -1,
357     UNTIL_LOG_NAMES_CMP_EQUAL= 0, UNTIL_LOG_NAMES_CMP_GREATER= 1
358   } until_log_names_cmp_result;
359 
360   char cached_charset[6];
361   /*
362     trans_retries varies between 0 to slave_transaction_retries and counts how
363     many times the slave has retried the present transaction; gets reset to 0
364     when the transaction finally succeeds. retried_trans is a cumulative
365     counter: how many times the slave has retried a transaction (any) since
366     slave started.
367   */
368   ulong trans_retries, retried_trans;
369 
370   /*
371     If the end of the hot relay log is made of master's events ignored by the
372     slave I/O thread, these two keep track of the coords (in the master's
373     binlog) of the last of these events seen by the slave I/O thread. If not,
374     ign_master_log_name_end[0] == 0.
375     As they are like a Rotate event read/written from/to the relay log, they
376     are both protected by rli->relay_log.LOCK_log.
377   */
378   char ign_master_log_name_end[FN_REFLEN];
379   ulonglong ign_master_log_pos_end;
380 
381   /*
382     Indentifies where the SQL Thread should create temporary files for the
383     LOAD DATA INFILE. This is used for security reasons.
384    */
385   char slave_patternload_file[FN_REFLEN];
386   size_t slave_patternload_file_size;
387 
388   /**
389     Identifies the last time a checkpoint routine has been executed.
390   */
391   struct timespec last_clock;
392 
393   /**
394     Invalidates cached until_log_name and group_relay_log_name comparison
395     result. Should be called after any update of group_realy_log_name if
396     there chances that sql_thread is running.
397   */
notify_group_relay_log_name_update()398   inline void notify_group_relay_log_name_update()
399   {
400     if (until_condition==UNTIL_RELAY_POS)
401       until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN;
402   }
403 
404   /**
405     The same as @c notify_group_relay_log_name_update but for
406     @c group_master_log_name.
407   */
notify_group_master_log_name_update()408   inline void notify_group_master_log_name_update()
409   {
410     if (until_condition==UNTIL_MASTER_POS)
411       until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN;
412   }
413 
inc_event_relay_log_pos()414   inline void inc_event_relay_log_pos()
415   {
416     event_relay_log_pos= future_event_relay_log_pos;
417   }
418 
419   int inc_group_relay_log_pos(ulonglong log_pos,
420                               bool need_data_lock);
421 
422   int wait_for_pos(THD* thd, String* log_name, longlong log_pos,
423 		   double timeout);
424   int wait_for_gtid_set(THD* thd, String* gtid, double timeout);
425   void close_temporary_tables();
426 
427   /* Check if UNTIL condition is satisfied. See slave.cc for more. */
428   bool is_until_satisfied(THD *thd, Log_event *ev);
until_pos()429   inline ulonglong until_pos()
430   {
431     return ((until_condition == UNTIL_MASTER_POS) ? group_master_log_pos :
432 	    group_relay_log_pos);
433   }
434 
435   RPL_TABLE_LIST *tables_to_lock;           /* RBR: Tables to lock  */
436   uint tables_to_lock_count;        /* RBR: Count of tables to lock */
437   table_mapping m_table_map;      /* RBR: Mapping table-id to table */
438   /* RBR: Record Rows_query log event */
439   Rows_query_log_event* rows_query_ev;
440 
get_table_data(TABLE * table_arg,table_def ** tabledef_var,TABLE ** conv_table_var)441   bool get_table_data(TABLE *table_arg, table_def **tabledef_var, TABLE **conv_table_var) const
442   {
443     DBUG_ASSERT(tabledef_var && conv_table_var);
444     for (TABLE_LIST *ptr= tables_to_lock ; ptr != NULL ; ptr= ptr->next_global)
445       if (ptr->table == table_arg)
446       {
447         *tabledef_var= &static_cast<RPL_TABLE_LIST*>(ptr)->m_tabledef;
448         *conv_table_var= static_cast<RPL_TABLE_LIST*>(ptr)->m_conv_table;
449         DBUG_PRINT("debug", ("Fetching table data for table %s.%s:"
450                              " tabledef: %p, conv_table: %p",
451                              table_arg->s->db.str, table_arg->s->table_name.str,
452                              *tabledef_var, *conv_table_var));
453         return true;
454       }
455     return false;
456   }
457 
458   /**
459     Last charset (6 bytes) seen by slave SQL thread is cached here; it helps
460     the thread save 3 @c get_charset() per @c Query_log_event if the charset is not
461     changing from event to event (common situation).
462     When the 6 bytes are equal to 0 is used to mean "cache is invalidated".
463   */
464   void cached_charset_invalidate();
465   bool cached_charset_compare(char *charset) const;
466 
467   void cleanup_context(THD *, bool);
468   void slave_close_thread_tables(THD *);
469   void clear_tables_to_lock();
470   int purge_relay_logs(THD *thd, bool just_reset, const char** errmsg);
471 
472   /*
473     Used to defer stopping the SQL thread to give it a chance
474     to finish up the current group of events.
475     The timestamp is set and reset in @c sql_slave_killed().
476   */
477   time_t last_event_start_time;
478   /*
479     A container to hold on Intvar-, Rand-, Uservar- log-events in case
480     the slave is configured with table filtering rules.
481     The withhold events are executed when their parent Query destiny is
482     determined for execution as well.
483   */
484   Deferred_log_events *deferred_events;
485 
486   /*
487     State of the container: true stands for IRU events gathering,
488     false does for execution, either deferred or direct.
489   */
490   bool deferred_events_collecting;
491 
492   /*****************************************************************************
493     WL#5569 MTS
494 
495     legends:
496     C  - Coordinator;
497     W  - Worker;
498     WQ - Worker Queue containing event assignments
499   */
500   DYNAMIC_ARRAY workers; // number's is determined by global slave_parallel_workers
501   volatile ulong pending_jobs;
502   mysql_mutex_t pending_jobs_lock;
503   mysql_cond_t pending_jobs_cond;
504   mysql_mutex_t exit_count_lock; // mutex of worker exit count
505   ulong       mts_slave_worker_queue_len_max;
506   ulonglong   mts_pending_jobs_size;      // actual mem usage by WQ:s
507   ulonglong   mts_pending_jobs_size_max;  // max of WQ:s size forcing C to wait
508   bool    mts_wq_oversize;      // C raises flag to wait some memory's released
509   Slave_worker  *last_assigned_worker;// is set to a Worker at assigning a group
510   /*
511     master-binlog ordered queue of Slave_job_group descriptors of groups
512     that are under processing. The queue size is @c checkpoint_group.
513   */
514   Slave_committed_queue *gaq;
515   /*
516     Container for references of involved partitions for the current event group
517   */
518   DYNAMIC_ARRAY curr_group_assigned_parts;
519   DYNAMIC_ARRAY curr_group_da;  // deferred array to hold partition-info-free events
520   bool curr_group_seen_gtid;   // current group started with Gtid-event or not
521   bool curr_group_seen_begin;   // current group started with B-event or not
522   bool curr_group_isolated;     // current group requires execution in isolation
523   bool mts_end_group_sets_max_dbs; // flag indicates if partitioning info is discovered
524   volatile ulong mts_wq_underrun_w_id;  // Id of a Worker whose queue is getting empty
525   /*
526      Ongoing excessive overrun counter to correspond to number of events that
527      are being scheduled while a WQ is close to be filled up.
528      `Close' is defined as (100 - mts_worker_underrun_level) %.
529      The counter is incremented each time a WQ get filled over that level
530      and decremented when the level drops below.
531      The counter therefore describes level of saturation that Workers
532      are experiencing and is used as a parameter to compute a nap time for
533      Coordinator in order to avoid reaching WQ limits.
534   */
535   volatile long mts_wq_excess_cnt;
536   long  mts_worker_underrun_level; // % of WQ size at which W is considered hungry
537   ulong mts_coordinator_basic_nap; // C sleeps to avoid WQs overrun
538   ulong opt_slave_parallel_workers; // cache for ::opt_slave_parallel_workers
539   ulong slave_parallel_workers; // the one slave session time number of workers
540   ulong exit_counter; // Number of workers contributed to max updated group index
541   ulonglong max_updated_index;
542   ulong recovery_parallel_workers; // number of workers while recovering
543   uint checkpoint_seqno;  // counter of groups executed after the most recent CP
544   uint checkpoint_group;  // cache for ::opt_mts_checkpoint_group
545   MY_BITMAP recovery_groups;  // bitmap used during recovery
546   bool recovery_groups_inited;
547   ulong mts_recovery_group_cnt; // number of groups to execute at recovery
548   ulong mts_recovery_index;     // running index of recoverable groups
549   bool mts_recovery_group_seen_begin;
550 
551   /*
552     While distibuting events basing on their properties MTS
553     Coordinator changes its mts group status.
554     Transition normally flowws to follow `=>' arrows on the diagram:
555 
556             +----------------------------+
557             V                            |
558     MTS_NOT_IN_GROUP =>                  |
559         {MTS_IN_GROUP => MTS_END_GROUP --+} while (!killed) => MTS_KILLED_GROUP
560 
561     MTS_END_GROUP has `->' loop breaking link to MTS_NOT_IN_GROUP when
562     Coordinator synchronizes with Workers by demanding them to
563     complete their assignments.
564   */
565   enum
566   {
567     /*
568        no new events were scheduled after last synchronization,
569        includes Single-Threaded-Slave case.
570     */
571     MTS_NOT_IN_GROUP,
572 
573     MTS_IN_GROUP,    /* at least one not-terminal event scheduled to a Worker */
574     MTS_END_GROUP,   /* the last scheduled event is a terminal event */
575     MTS_KILLED_GROUP /* Coordinator gave up to reach MTS_END_GROUP */
576   } mts_group_status;
577 
578   /*
579     MTS statistics:
580   */
581   ulonglong mts_events_assigned; // number of events (statements) scheduled
582   ulonglong mts_groups_assigned; // number of groups (transactions) scheduled
583   volatile ulong mts_wq_overrun_cnt; // counter of all mts_wq_excess_cnt increments
584   ulong wq_size_waits_cnt;    // number of times C slept due to WQ:s oversize
585   /*
586     a counter for sleeps due to Coordinator
587     experienced waiting when Workers get hungry again
588   */
589   ulong mts_wq_no_underrun_cnt;
590   ulong mts_wq_overfill_cnt;  // counter of C waited due to a WQ queue was full
591   /*
592      A sorted array of the Workers' current assignement numbers to provide
593      approximate view on Workers loading.
594      The first row of the least occupied Worker is queried at assigning
595      a new partition. Is updated at checkpoint commit to the main RLI.
596   */
597   DYNAMIC_ARRAY least_occupied_workers;
598   time_t mts_last_online_stat;
599   /* end of MTS statistics */
600 
601   /* most of allocation in the coordinator rli is there */
602   void init_workers(ulong);
603 
604   /* counterpart of the init */
605   void deinit_workers();
606 
607   /**
608      returns true if there is any gap-group of events to execute
609                   at slave starting phase.
610   */
is_mts_recovery()611   inline bool is_mts_recovery() const
612   {
613     return mts_recovery_group_cnt != 0;
614   }
615 
clear_mts_recovery_groups()616   inline void clear_mts_recovery_groups()
617   {
618     if (recovery_groups_inited)
619     {
620       bitmap_free(&recovery_groups);
621       mts_recovery_group_cnt= 0;
622       recovery_groups_inited= false;
623     }
624   }
625 
626   /**
627      returns true if events are to be executed in parallel
628   */
is_parallel_exec()629   inline bool is_parallel_exec() const
630   {
631     bool ret= (slave_parallel_workers > 0) && !is_mts_recovery();
632 
633     DBUG_ASSERT(!ret || workers.elements > 0);
634 
635     return ret;
636   }
637 
638   /**
639      returns true if Coordinator is scheduling events belonging to
640      the same group and has not reached yet its terminal event.
641   */
is_mts_in_group()642   inline bool is_mts_in_group()
643   {
644     return is_parallel_exec() &&
645       mts_group_status == MTS_IN_GROUP;
646   }
647 
648   /**
649      While a group is executed by a Worker the relay log can change.
650      Coordinator notifies Workers about this event. Worker is supposed
651      to commit to the recovery table with the new info.
652   */
653   void reset_notified_relay_log_change();
654 
655   /**
656      While a group is executed by a Worker the relay log can change.
657      Coordinator notifies Workers about this event. Coordinator and Workers
658      maintain a bitmap of executed group that is reset with a new checkpoint.
659   */
660   void reset_notified_checkpoint(ulong, time_t, bool);
661 
662   /**
663      Called when gaps execution is ended so it is crash-safe
664      to reset the last session Workers info.
665   */
666   bool mts_finalize_recovery();
667   /*
668    * End of MTS section ******************************************************/
669 
670   /* The general cleanup that slave applier may need at the end of query. */
cleanup_after_query()671   inline void cleanup_after_query()
672   {
673     if (deferred_events)
674       deferred_events->rewind();
675   };
676   /* The general cleanup that slave applier may need at the end of session. */
cleanup_after_session()677   void cleanup_after_session()
678   {
679     if (deferred_events)
680       delete deferred_events;
681   };
682 
683   /**
684     Helper function to do after statement completion.
685 
686     This function is called from an event to complete the group by
687     either stepping the group position, if the "statement" is not
688     inside a transaction; or increase the event position, if the
689     "statement" is inside a transaction.
690 
691     @param event_log_pos
692     Master log position of the event. The position is recorded in the
693     relay log info and used to produce information for <code>SHOW
694     SLAVE STATUS</code>.
695   */
696   int stmt_done(my_off_t event_log_pos);
697 
698 
699   /**
700      Set the value of a replication state flag.
701 
702      @param flag Flag to set
703    */
set_flag(enum_state_flag flag)704   void set_flag(enum_state_flag flag)
705   {
706     m_flags |= (1UL << flag);
707   }
708 
709   /**
710      Get the value of a replication state flag.
711 
712      @param flag Flag to get value of
713 
714      @return @c true if the flag was set, @c false otherwise.
715    */
get_flag(enum_state_flag flag)716   bool get_flag(enum_state_flag flag)
717   {
718     return m_flags & (1UL << flag);
719   }
720 
721   /**
722      Clear the value of a replication state flag.
723 
724      @param flag Flag to clear
725    */
clear_flag(enum_state_flag flag)726   void clear_flag(enum_state_flag flag)
727   {
728     m_flags &= ~(1UL << flag);
729   }
730 
731   /**
732      Is the replication inside a group?
733 
734      Replication is inside a group if either:
735      - The OPTION_BEGIN flag is set, meaning we're inside a transaction
736      - The RLI_IN_STMT flag is set, meaning we're inside a statement
737      - There is an GTID owned by the thd, meaning we've passed a SET GTID_NEXT
738 
739      @retval true Replication thread is currently inside a group
740      @retval false Replication thread is currently not inside a group
741    */
is_in_group()742   bool is_in_group() const {
743     return (info_thd->variables.option_bits & OPTION_BEGIN) ||
744       (m_flags & (1UL << IN_STMT)) ||
745       /* If a SET GTID_NEXT was issued we are inside of a group */
746       info_thd->owned_gtid.sidno;
747   }
748 
749   int count_relay_log_space();
750 
751   int rli_init_info();
752   void end_info();
753   int flush_info(bool force= FALSE);
754   int flush_current_log();
755   void set_master_info(Master_info *info);
756 
get_future_event_relay_log_pos()757   inline ulonglong get_future_event_relay_log_pos() { return future_event_relay_log_pos; }
set_future_event_relay_log_pos(ulonglong log_pos)758   inline void set_future_event_relay_log_pos(ulonglong log_pos)
759   {
760     future_event_relay_log_pos= log_pos;
761   }
762 
get_group_master_log_name()763   inline const char* get_group_master_log_name() { return group_master_log_name; }
get_group_master_log_pos()764   inline ulonglong get_group_master_log_pos() { return group_master_log_pos; }
set_group_master_log_name(const char * log_file_name)765   inline void set_group_master_log_name(const char *log_file_name)
766   {
767      strmake(group_master_log_name,log_file_name, sizeof(group_master_log_name)-1);
768   }
set_group_master_log_pos(ulonglong log_pos)769   inline void set_group_master_log_pos(ulonglong log_pos)
770   {
771     group_master_log_pos= log_pos;
772   }
773 
get_group_relay_log_name()774   inline const char* get_group_relay_log_name() { return group_relay_log_name; }
get_group_relay_log_pos()775   inline ulonglong get_group_relay_log_pos() { return group_relay_log_pos; }
set_group_relay_log_name(const char * log_file_name)776   inline void set_group_relay_log_name(const char *log_file_name)
777   {
778      strmake(group_relay_log_name,log_file_name, sizeof(group_relay_log_name)-1);
779   }
set_group_relay_log_name(const char * log_file_name,size_t len)780   inline void set_group_relay_log_name(const char *log_file_name, size_t len)
781   {
782      strmake(group_relay_log_name, log_file_name, len);
783   }
set_group_relay_log_pos(ulonglong log_pos)784   inline void set_group_relay_log_pos(ulonglong log_pos)
785   {
786     group_relay_log_pos= log_pos;
787   }
788 
get_event_relay_log_name()789   inline const char* get_event_relay_log_name() { return event_relay_log_name; }
get_event_relay_log_pos()790   inline ulonglong get_event_relay_log_pos() { return event_relay_log_pos; }
set_event_relay_log_name(const char * log_file_name)791   inline void set_event_relay_log_name(const char *log_file_name)
792   {
793      strmake(event_relay_log_name,log_file_name, sizeof(event_relay_log_name)-1);
794   }
set_event_relay_log_name(const char * log_file_name,size_t len)795   inline void set_event_relay_log_name(const char *log_file_name, size_t len)
796   {
797      strmake(event_relay_log_name,log_file_name, len);
798   }
set_event_relay_log_pos(ulonglong log_pos)799   inline void set_event_relay_log_pos(ulonglong log_pos)
800   {
801     event_relay_log_pos= log_pos;
802   }
get_rpl_log_name()803   inline const char* get_rpl_log_name()
804   {
805     return (group_master_log_name[0] ? group_master_log_name : "FIRST");
806   }
807 
808 #if MYSQL_VERSION_ID < 40100
get_future_master_log_pos()809   inline ulonglong get_future_master_log_pos() { return future_master_log_pos; }
810 #else
get_future_group_master_log_pos()811   inline ulonglong get_future_group_master_log_pos() { return future_group_master_log_pos; }
set_future_group_master_log_pos(ulonglong log_pos)812   inline void set_future_group_master_log_pos(ulonglong log_pos)
813   {
814     future_group_master_log_pos= log_pos;
815   }
816 #endif
817 
818   static size_t get_number_info_rli_fields();
819 
820   /**
821     Indicate that a delay starts.
822 
823     This does not actually sleep; it only sets the state of this
824     Relay_log_info object to delaying so that the correct state can be
825     reported by SHOW SLAVE STATUS and SHOW PROCESSLIST.
826 
827     Requires rli->data_lock.
828 
829     @param delay_end The time when the delay shall end.
830   */
start_sql_delay(time_t delay_end)831   void start_sql_delay(time_t delay_end)
832   {
833     mysql_mutex_assert_owner(&data_lock);
834     sql_delay_end= delay_end;
835     THD_STAGE_INFO(info_thd, stage_sql_thd_waiting_until_delay);
836   }
837 
get_sql_delay()838   int32 get_sql_delay() { return sql_delay; }
set_sql_delay(time_t _sql_delay)839   void set_sql_delay(time_t _sql_delay) { sql_delay= _sql_delay; }
get_sql_delay_end()840   time_t get_sql_delay_end() { return sql_delay_end; }
841 
842   Relay_log_info(bool is_slave_recovery
843 #ifdef HAVE_PSI_INTERFACE
844                  ,PSI_mutex_key *param_key_info_run_lock,
845                  PSI_mutex_key *param_key_info_data_lock,
846                  PSI_mutex_key *param_key_info_sleep_lock,
847                  PSI_mutex_key *param_key_info_data_cond,
848                  PSI_mutex_key *param_key_info_start_cond,
849                  PSI_mutex_key *param_key_info_stop_cond,
850                  PSI_mutex_key *param_key_info_sleep_cond
851 #endif
852                  , uint param_id
853                 );
854   virtual ~Relay_log_info();
855 
856   /*
857     Determines if a warning message on unsafe execution was
858     already printed out to avoid clutering the error log
859     with several warning messages.
860   */
861   bool reported_unsafe_warning;
862 
863   /*
864     'sql_thread_kill_accepted is set to TRUE when killed status is recognized.
865   */
866   bool sql_thread_kill_accepted;
867 
get_row_stmt_start_timestamp()868   time_t get_row_stmt_start_timestamp()
869   {
870     return row_stmt_start_timestamp;
871   }
872 
set_row_stmt_start_timestamp()873   time_t set_row_stmt_start_timestamp()
874   {
875     if (row_stmt_start_timestamp == 0)
876       row_stmt_start_timestamp= my_time(0);
877 
878     return row_stmt_start_timestamp;
879   }
880 
reset_row_stmt_start_timestamp()881   void reset_row_stmt_start_timestamp()
882   {
883     row_stmt_start_timestamp= 0;
884   }
885 
set_long_find_row_note_printed()886   void set_long_find_row_note_printed()
887   {
888     long_find_row_note_printed= true;
889   }
890 
unset_long_find_row_note_printed()891   void unset_long_find_row_note_printed()
892   {
893     long_find_row_note_printed= false;
894   }
895 
is_long_find_row_note_printed()896   bool is_long_find_row_note_printed()
897   {
898     return long_find_row_note_printed;
899   }
900 
901 public:
902   /**
903     Delete the existing event and set a new one.  This class is
904     responsible for freeing the event, the caller should not do that.
905   */
906   virtual void set_rli_description_event(Format_description_log_event *fdle);
907 
908   /**
909     Return the current Format_description_log_event.
910   */
get_rli_description_event()911   Format_description_log_event *get_rli_description_event() const
912   {
913     return rli_description_event;
914   }
915 
916   /**
917     adaptation for the slave applier to specific master versions.
918   */
919   void adapt_to_master_version(Format_description_log_event *fdle);
920   uchar slave_version_split[3]; // bytes of the slave server version
921   /*
922     relay log info repository should be updated on relay log
923     rotate. But when the transaction is split across two relay logs,
924     update the repository will cause unexpected results and should
925     be postponed till the 'commit' of the transaction is executed.
926 
927     A flag that set to 'true' when this type of 'forced flush'(at the
928     time of rotate relay log) is postponed due to transaction split
929     across the relay logs.
930   */
931   bool force_flush_postponed_due_to_split_trans;
932 
933 protected:
934   Format_description_log_event *rli_description_event;
935 
936 private:
937 
938   /**
939     Delay slave SQL thread by this amount, compared to master (in
940     seconds). This is set with CHANGE MASTER TO MASTER_DELAY=X.
941 
942     Guarded by data_lock.  Initialized by the client thread executing
943     START SLAVE.  Written by client threads executing CHANGE MASTER TO
944     MASTER_DELAY=X.  Read by SQL thread and by client threads
945     executing SHOW SLAVE STATUS.  Note: must not be written while the
946     slave SQL thread is running, since the SQL thread reads it without
947     a lock when executing flush_info().
948   */
949   int sql_delay;
950 
951   /**
952     During a delay, specifies the point in time when the delay ends.
953 
954     This is used for the SQL_Remaining_Delay column in SHOW SLAVE STATUS.
955 
956     Guarded by data_lock. Written by the sql thread.  Read by client
957     threads executing SHOW SLAVE STATUS.
958   */
959   time_t sql_delay_end;
960 
961   uint32 m_flags;
962 
963   /*
964     Before the MASTER_DELAY parameter was added (WL#344), relay_log.info
965     had 4 lines. Now it has 5 lines.
966   */
967   static const int LINES_IN_RELAY_LOG_INFO_WITH_DELAY= 5;
968 
969   /*
970     Before the WL#5599, relay_log.info had 5 lines. Now it has 6 lines.
971   */
972   static const int LINES_IN_RELAY_LOG_INFO_WITH_WORKERS= 6;
973 
974   /*
975     Before the Id was added (BUG#2334346), relay_log.info
976     had 6 lines. Now it has 7 lines.
977   */
978   static const int LINES_IN_RELAY_LOG_INFO_WITH_ID= 7;
979 
980   bool read_info(Rpl_info_handler *from);
981   bool write_info(Rpl_info_handler *to);
982 
983   Relay_log_info(const Relay_log_info& info);
984   Relay_log_info& operator=(const Relay_log_info& info);
985 
986   /*
987     Runtime state for printing a note when slave is taking
988     too long while processing a row event.
989    */
990   time_t row_stmt_start_timestamp;
991   bool long_find_row_note_printed;
992 
993 };
994 
995 bool mysql_show_relaylog_events(THD* thd);
996 
997 /**
998    @param  thd a reference to THD
999    @return TRUE if thd belongs to a Worker thread and FALSE otherwise.
1000 */
is_mts_worker(const THD * thd)1001 inline bool is_mts_worker(const THD *thd)
1002 {
1003   return thd->system_thread == SYSTEM_THREAD_SLAVE_WORKER;
1004 }
1005 
1006 #endif /* RPL_RLI_H */
1007