1 /* Copyright (c) 2005, 2021, Oracle and/or its affiliates.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software Foundation,
21    51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22 
23 #ifndef RPL_RLI_H
24 #define RPL_RLI_H
25 
26 #include "my_global.h"
27 
28 #include "binlog.h"            // MYSQL_BIN_LOG
29 #include "prealloced_array.h"  // Prealloced_array
30 #include "rpl_gtid.h"          // Gtid_set
31 #include "rpl_info.h"          // Rpl_info
32 #include "rpl_mts_submode.h"   // enum_mts_parallel_type
33 #include "rpl_tblmap.h"        // table_mapping
34 #include "rpl_utility.h"       // Deferred_log_events
35 #include "sql_class.h"         // THD
36 
37 #include <string>
38 #include <vector>
39 
40 struct RPL_TABLE_LIST;
41 class Master_info;
42 class Mts_submode;
43 class Commit_order_manager;
44 class Slave_committed_queue;
45 typedef struct st_db_worker_hash_entry db_worker_hash_entry;
46 extern uint sql_slave_skip_counter;
47 
48 typedef Prealloced_array<Slave_worker*, 4> Slave_worker_array;
49 
50 typedef struct slave_job_item
51 {
52   Log_event *data;
53   uint relay_number;
54   my_off_t relay_pos;
55 } Slave_job_item;
56 
57 /*******************************************************************************
58 Replication SQL Thread
59 
60 Relay_log_info contains:
61   - the current relay log
62   - the current relay log offset
63   - master log name
64   - master log sequence corresponding to the last update
65   - misc information specific to the SQL thread
66 
67 Relay_log_info is initialized from a repository, i.e. table or file, if there is
68 one. Otherwise, data members are intialized with defaults by calling
69 init_relay_log_info().
70 
71 The relay.info table/file shall be updated whenever: (i) the relay log file
72 is rotated, (ii) SQL Thread is stopped, (iii) while processing a Xid_log_event,
73 (iv) after a Query_log_event (i.e. commit or rollback) and (v) after processing
74 any statement written to the binary log without a transaction context.
75 
76 The Xid_log_event is a commit for transactional engines and must be handled
77 differently to provide reliability/data integrity. In this case, positions
78 are updated within the context of the current transaction. So
79 
80   . If the relay.info is stored in a transactional repository and the server
81   crashes before successfully committing the transaction the changes to the
82   position table will be rolled back along with the data.
83 
84   . If the relay.info is stored in a non-transactional repository, for instance,
85   a file or a system table created using MyIsam, and the server crashes before
86   successfully committing the transaction the changes to the position table
87   will not be rolled back but data will.
88 
89 In particular, when there are mixed transactions, i.e a transaction that updates
90 both transaction and non-transactional engines, the Xid_log_event is still used
91 but reliability/data integrity cannot be achieved as we shall explain in what
92 follows.
93 
94 Changes to non-transactional engines, such as MyIsam, cannot be rolled back if a
95 failure happens. For that reason, there is no point in updating the positions
96 within the boundaries of any on-going transaction. This is true for both commit
97 and rollback. If a failure happens after processing the pseudo-transaction but
98 before updating the positions, the transaction will be re-executed when the
99 slave is up most likely causing an error that needs to be manually circumvented.
100 This is a well-known issue when non-transactional statements are executed.
101 
102 Specifically, if rolling back any transaction, positions are updated outside the
103 transaction boundaries. However, there may be a problem in this scenario even
104 when only transactional engines are updated. This happens because if there is a
105 rollback and such transaction is written to the binary log, a non-transactional
106 engine was updated or a temporary table was created or dropped within its
107 boundaries.
108 
109 In particular, in both STATEMENT and MIXED logging formats, this happens because
110 any temporary table is automatically dropped after a shutdown/startup.
111 See BUG#26945 for further details.
112 
113 Statements written to the binary log outside the boundaries of a transaction are
114 DDLs or maintenance commands which are not transactional. These means that they
115 cannot be rolled back if a failure happens. In such cases, the positions are
116 updated after processing the events. If a failure happens after processing the
117 statement but before updating the positions, the statement will be
118 re-executed when the slave is up most likely causing an error that needs to be
119 manually circumvented. This is a well-known issue when non-transactional
120 statements are executed.
121 
122 The --sync-relay-log-info does not have effect when a system table, either
123 transactional or non-transactional is used.
124 
125 To correctly recovery from failures, one should combine transactional system
126 tables along with the --relay-log-recovery.
127 *******************************************************************************/
128 class Relay_log_info : public Rpl_info
129 {
130   friend class Rpl_info_factory;
131 
132 public:
133   /**
134      Flags for the state of the replication.
135    */
136   enum enum_state_flag {
137     /** The replication thread is inside a statement */
138     IN_STMT,
139 
140     /** Flag counter.  Should always be last */
141     STATE_FLAGS_COUNT
142   };
143 
144   /*
145     The SQL thread owns one Relay_log_info, and each client that has
146     executed a BINLOG statement owns one Relay_log_info. This function
147     returns zero for the Relay_log_info object that belongs to the SQL
148     thread and nonzero for Relay_log_info objects that belong to
149     clients.
150   */
belongs_to_client()151   inline bool belongs_to_client()
152   {
153     assert(info_thd);
154     return !info_thd->slave_thread;
155   }
156 /* Instrumentation key for performance schema for mts_temp_table_LOCK */
157 #ifdef HAVE_PSI_INTERFACE
158   PSI_mutex_key m_key_mts_temp_table_LOCK;
159 #endif
160   /*
161      Lock to protect race condition while transferring temporary table from
162      worker thread to coordinator thread and vice-versa
163    */
164   mysql_mutex_t mts_temp_table_LOCK;
165   /*
166      Lock to acquire by methods that concurrently update lwm of committed
167      transactions and the min waited timestamp and its index.
168   */
169   mysql_mutex_t mts_gaq_LOCK;
170   mysql_cond_t  logical_clock_cond;
171   /*
172     If true, events with the same server id should be replicated. This
173     field is set on creation of a relay log info structure by copying
174     the value of ::replicate_same_server_id and can be overridden if
175     necessary. For example of when this is done, check sql_binlog.cc,
176     where the BINLOG statement can be used to execute "raw" events.
177    */
178   bool replicate_same_server_id;
179 
180   /*
181     The gtid (or anonymous) of the currently executing transaction, or
182     of the last executing transaction if no transaction is currently
183     executing.  This is used to fill the last_seen_transaction
184     column
185     of the table
186     performance_schema.replication_applier_status_by_worker.
187   */
188   Gtid_specification currently_executing_gtid;
189 
190   /*** The following variables can only be read when protect by data lock ****/
191   /*
192     cur_log_fd - file descriptor of the current read  relay log
193   */
194   File cur_log_fd;
195   /*
196     Protected with internal locks.
197     Must get data_lock when resetting the logs.
198   */
199   MYSQL_BIN_LOG relay_log;
200   LOG_INFO linfo;
201 
202   /*
203    cur_log
204      Pointer that either points at relay_log.get_log_file() or
205      &rli->cache_buf, depending on whether the log is hot or there was
206      the need to open a cold relay_log.
207 
208    cache_buf
209      IO_CACHE used when opening cold relay logs.
210    */
211   IO_CACHE cache_buf,*cur_log;
212 
213   /*
214     Identifies when the recovery process is going on.
215     See sql/slave.cc:init_recovery for further details.
216   */
217   bool is_relay_log_recovery;
218 
219   /* The following variables are safe to read any time */
220 
221   /*
222     When we restart slave thread we need to have access to the previously
223     created temporary tables. Modified only on init/end and by the SQL
224     thread, read only by SQL thread.
225   */
226   TABLE *save_temporary_tables;
227 
228   /* parent Master_info structure */
229   Master_info *mi;
230 
231   /* number of temporary tables open in this channel */
232   Atomic_int32 channel_open_temp_tables;
233 
234   /*
235     Needed to deal properly with cur_log getting closed and re-opened with
236     a different log under our feet
237   */
238   uint32 cur_log_old_open_count;
239 
240   /*
241     If on init_info() call error_on_rli_init_info is true that means
242     that previous call to init_info() terminated with an error, RESET
243     SLAVE must be executed and the problem fixed manually.
244    */
245   bool error_on_rli_init_info;
246 
247   /*
248     Let's call a group (of events) :
249       - a transaction
250       or
251       - an autocommiting query + its associated events (INSERT_ID,
252     TIMESTAMP...)
253     We need these rli coordinates :
254     - relay log name and position of the beginning of the group we currently are
255     executing. Needed to know where we have to restart when replication has
256     stopped in the middle of a group (which has been rolled back by the slave).
257     - relay log name and position just after the event we have just
258     executed. This event is part of the current group.
259     Formerly we only had the immediately above coordinates, plus a 'pending'
260     variable, but this dealt wrong with the case of a transaction starting on a
261     relay log and finishing (commiting) on another relay log. Case which can
262     happen when, for example, the relay log gets rotated because of
263     max_binlog_size.
264   */
265 
266   // overridden new and delete operators for 64 byte alignment
267   static void* operator new(size_t request);
268   static void operator delete(void * ptr);
269 
270 protected:
271   char group_relay_log_name[FN_REFLEN];
272   ulonglong group_relay_log_pos;
273   char event_relay_log_name[FN_REFLEN];
274   /* The suffix number of relay log name */
275   uint event_relay_log_number;
276   ulonglong event_relay_log_pos;
277   ulonglong future_event_relay_log_pos;
278 
279   /* current event's start position in relay log */
280   my_off_t event_start_pos;
281   /*
282      Original log name and position of the group we're currently executing
283      (whose coordinates are group_relay_log_name/pos in the relay log)
284      in the master's binlog. These concern the *group*, because in the master's
285      binlog the log_pos that comes with each event is the position of the
286      beginning of the group.
287 
288     Note: group_master_log_name, group_master_log_pos must only be
289     written from the thread owning the Relay_log_info (SQL thread if
290     !belongs_to_client(); client thread executing BINLOG statement if
291     belongs_to_client()).
292   */
293   char m_group_master_log_name[FN_REFLEN];
294   volatile my_off_t m_group_master_log_pos;
295 
296 private:
297   Gtid_set gtid_set;
298   /*
299     Identifies when this object belongs to the SQL thread and was not
300     created for a client thread or some other purpose including
301     Slave_worker instance initializations. Ends up serving the same
302     purpose as the belongs_to_client method, but its value is set
303     earlier on in the class constructor.
304   */
305   bool rli_fake;
306   /* Flag that ensures the retrieved GTID set is initialized only once. */
307   bool gtid_retrieved_initialized;
308 
309 public:
add_logged_gtid(rpl_sidno sidno,rpl_gno gno)310   void add_logged_gtid(rpl_sidno sidno, rpl_gno gno)
311   {
312     global_sid_lock->assert_some_lock();
313     assert(sidno <= global_sid_map->get_max_sidno());
314     gtid_set.ensure_sidno(sidno);
315     gtid_set._add_gtid(sidno, gno);
316   }
317 
318   /**
319     Adds a GTID set to received GTID set.
320 
321     @param gtid_set the gtid_set to add
322 
323     @return RETURN_STATUS_OK or RETURN_STATUS_REPORTED_ERROR.
324   */
325   enum_return_status add_gtid_set(const Gtid_set *gtid_set);
326 
get_gtid_set()327   const Gtid_set *get_gtid_set() const { return &gtid_set; }
328 
329   int init_relay_log_pos(const char* log,
330                          ulonglong pos, bool need_data_lock,
331                          const char** errmsg,
332                          bool keep_looking_for_fd);
333 
334   /*
335     Update the error number, message and timestamp fields. This function is
336     different from va_report() as va_report() also logs the error message in the
337     log apart from updating the error fields.
338   */
339   void fill_coord_err_buf(loglevel level, int err_code,
340                           const char *buff_coord) const;
341 
342 
343   /*
344     Flag that the group_master_log_pos is invalid. This may occur
345     (for example) after CHANGE MASTER TO RELAY_LOG_POS.  This will
346     be unset after the first event has been executed and the
347     group_master_log_pos is valid again.
348    */
349   bool is_group_master_log_pos_invalid;
350 
351   /*
352     Handling of the relay_log_space_limit optional constraint.
353     ignore_log_space_limit is used to resolve a deadlock between I/O and SQL
354     threads, the SQL thread sets it to unblock the I/O thread and make it
355     temporarily forget about the constraint.
356   */
357   ulonglong log_space_limit,log_space_total;
358   bool ignore_log_space_limit;
359 
360   /*
361     Used by the SQL thread to instructs the IO thread to rotate
362     the logs when the SQL thread needs to purge to release some
363     disk space.
364    */
365   bool sql_force_rotate_relay;
366 
367   time_t last_master_timestamp;
368 
369   void clear_until_condition();
370 
371   /**
372     Reset the delay.
373     This is used by RESET SLAVE to clear the delay.
374   */
clear_sql_delay()375   void clear_sql_delay()
376   {
377     sql_delay= 0;
378   }
379 
380   /*
381     Needed for problems when slave stops and we want to restart it
382     skipping one or more events in the master log that have caused
383     errors, and have been manually applied by DBA already.
384   */
385   volatile uint32 slave_skip_counter;
386   volatile ulong abort_pos_wait;	/* Incremented on change master */
387   mysql_mutex_t log_space_lock;
388   mysql_cond_t log_space_cond;
389 
390   /*
391      Condition and its parameters from START SLAVE UNTIL clause.
392 
393      UNTIL condition is tested with is_until_satisfied() method that is
394      called by exec_relay_log_event(). is_until_satisfied() caches the result
395      of the comparison of log names because log names don't change very often;
396      this cache is invalidated by parts of code which change log names with
397      notify_*_log_name_updated() methods. (They need to be called only if SQL
398      thread is running).
399    */
400   enum
401   {
402     UNTIL_NONE= 0,
403     UNTIL_MASTER_POS,
404     UNTIL_RELAY_POS,
405     UNTIL_SQL_BEFORE_GTIDS,
406     UNTIL_SQL_AFTER_GTIDS,
407     UNTIL_SQL_AFTER_MTS_GAPS,
408     UNTIL_SQL_VIEW_ID,
409     UNTIL_DONE
410   } until_condition;
411   char until_log_name[FN_REFLEN];
412   ulonglong until_log_pos;
413   /* extension extracted from log_name and converted to int */
414   ulong until_log_name_extension;
415   /**
416     The START SLAVE UNTIL SQL_*_GTIDS initializes until_sql_gtids.
417     Each time a gtid is about to be processed, we check if it is in the
418     set. Depending on until_condition, SQL thread is stopped before or
419     after applying the gtid.
420   */
421   Gtid_set until_sql_gtids;
422   /*
423     True if the current event is the first gtid event to be processed
424     after executing START SLAVE UNTIL SQL_*_GTIDS.
425   */
426   bool until_sql_gtids_first_event;
427   /*
428      Cached result of comparison of until_log_name and current log name
429      -2 means unitialised, -1,0,1 are comarison results
430   */
431   enum
432   {
433     UNTIL_LOG_NAMES_CMP_UNKNOWN= -2, UNTIL_LOG_NAMES_CMP_LESS= -1,
434     UNTIL_LOG_NAMES_CMP_EQUAL= 0, UNTIL_LOG_NAMES_CMP_GREATER= 1
435   } until_log_names_cmp_result;
436 
437   char cached_charset[6];
438 
439   /*
440     View_id until which UNTIL_SQL_VIEW_ID condition will wait.
441   */
442   std::string until_view_id;
443   /*
444     Flag used to indicate that view_id identified by 'until_view_id'
445     was found on the current UNTIL_SQL_VIEW_ID condition.
446     It is set to false on the beginning of the UNTIL_SQL_VIEW_ID
447     condition, and set to true when view_id is found.
448   */
449   bool until_view_id_found;
450   /*
451     Flag used to indicate that commit event after view_id identified
452     by 'until_view_id' was found on the current UNTIL_SQL_VIEW_ID condition.
453     It is set to false on the beginning of the UNTIL_SQL_VIEW_ID
454     condition, and set to true when commit event after view_id is found.
455   */
456   bool until_view_id_commit_found;
457 
458   /*
459     trans_retries varies between 0 to slave_transaction_retries and counts how
460     many times the slave has retried the present transaction; gets reset to 0
461     when the transaction finally succeeds. retried_trans is a cumulative
462     counter: how many times the slave has retried a transaction (any) since
463     slave started.
464   */
465   ulong trans_retries, retried_trans;
466 
467   /*
468     If the end of the hot relay log is made of master's events ignored by the
469     slave I/O thread, these two keep track of the coords (in the master's
470     binlog) of the last of these events seen by the slave I/O thread. If not,
471     ign_master_log_name_end[0] == 0.
472     As they are like a Rotate event read/written from/to the relay log, they
473     are both protected by rli->relay_log.LOCK_log.
474   */
475   char ign_master_log_name_end[FN_REFLEN];
476   ulonglong ign_master_log_pos_end;
477 
478   /*
479     Indentifies where the SQL Thread should create temporary files for the
480     LOAD DATA INFILE. This is used for security reasons.
481    */
482   char slave_patternload_file[FN_REFLEN];
483   size_t slave_patternload_file_size;
484 
485   /**
486     Identifies the last time a checkpoint routine has been executed.
487   */
488   struct timespec last_clock;
489 
490   /**
491     Invalidates cached until_log_name and group_relay_log_name comparison
492     result. Should be called after any update of group_realy_log_name if
493     there chances that sql_thread is running.
494   */
notify_group_relay_log_name_update()495   inline void notify_group_relay_log_name_update()
496   {
497     if (until_condition==UNTIL_RELAY_POS)
498       until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN;
499   }
500 
501   /**
502     The same as @c notify_group_relay_log_name_update but for
503     @c group_master_log_name.
504   */
notify_group_master_log_name_update()505   inline void notify_group_master_log_name_update()
506   {
507     if (until_condition==UNTIL_MASTER_POS)
508       until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN;
509   }
510 
inc_event_relay_log_pos()511   inline void inc_event_relay_log_pos()
512   {
513     event_relay_log_pos= future_event_relay_log_pos;
514   }
515 
516   int inc_group_relay_log_pos(ulonglong log_pos,
517                               bool need_data_lock);
518 
519   int wait_for_pos(THD* thd, String* log_name, longlong log_pos,
520                    double timeout);
521   int wait_for_gtid_set(THD* thd, String* gtid, double timeout);
522   int wait_for_gtid_set(THD* thd, const Gtid_set* wait_gtid_set,
523                         double timeout);
524 
525   void close_temporary_tables();
526 
527   /* Check if UNTIL condition is satisfied. See slave.cc for more. */
528   bool is_until_satisfied(THD *thd, Log_event *ev);
until_pos()529   inline ulonglong until_pos()
530   {
531     return ((until_condition == UNTIL_MASTER_POS) ? get_group_master_log_pos() :
532 	    group_relay_log_pos);
533   }
534 
535   RPL_TABLE_LIST *tables_to_lock;           /* RBR: Tables to lock  */
536   uint tables_to_lock_count;        /* RBR: Count of tables to lock */
537   table_mapping m_table_map;      /* RBR: Mapping table-id to table */
538   /* RBR: Record Rows_query log event */
539   Rows_query_log_event* rows_query_ev;
540 
get_table_data(TABLE * table_arg,table_def ** tabledef_var,TABLE ** conv_table_var)541   bool get_table_data(TABLE *table_arg, table_def **tabledef_var, TABLE **conv_table_var) const
542   {
543     assert(tabledef_var && conv_table_var);
544     for (TABLE_LIST *ptr= tables_to_lock ; ptr != NULL ; ptr= ptr->next_global)
545       if (ptr->table == table_arg)
546       {
547         *tabledef_var= &static_cast<RPL_TABLE_LIST*>(ptr)->m_tabledef;
548         *conv_table_var= static_cast<RPL_TABLE_LIST*>(ptr)->m_conv_table;
549         DBUG_PRINT("debug", ("Fetching table data for table %s.%s:"
550                              " tabledef: %p, conv_table: %p",
551                              table_arg->s->db.str, table_arg->s->table_name.str,
552                              *tabledef_var, *conv_table_var));
553         return true;
554       }
555     return false;
556   }
557 
558   /**
559     Last charset (6 bytes) seen by slave SQL thread is cached here; it helps
560     the thread save 3 @c get_charset() per @c Query_log_event if the charset is not
561     changing from event to event (common situation).
562     When the 6 bytes are equal to 0 is used to mean "cache is invalidated".
563   */
564   void cached_charset_invalidate();
565   bool cached_charset_compare(char *charset) const;
566 
567   void cleanup_context(THD *, bool);
568   void slave_close_thread_tables(THD *);
569   void clear_tables_to_lock();
570   int purge_relay_logs(THD *thd, bool just_reset, const char** errmsg,
571                        bool delete_only= false);
572 
573   /*
574     Used to defer stopping the SQL thread to give it a chance
575     to finish up the current group of events.
576     The timestamp is set and reset in @c sql_slave_killed().
577   */
578   time_t last_event_start_time;
579   /*
580     A container to hold on Intvar-, Rand-, Uservar- log-events in case
581     the slave is configured with table filtering rules.
582     The withhold events are executed when their parent Query destiny is
583     determined for execution as well.
584   */
585   Deferred_log_events *deferred_events;
586 
587   /*
588     State of the container: true stands for IRU events gathering,
589     false does for execution, either deferred or direct.
590   */
591   bool deferred_events_collecting;
592 
593   /*****************************************************************************
594     WL#5569 MTS
595 
596     legends:
597     C  - Coordinator;
598     W  - Worker;
599     WQ - Worker Queue containing event assignments
600   */
601   // number's is determined by global slave_parallel_workers
602   Slave_worker_array workers;
603 
604   HASH mapping_db_to_worker; // To map a database to a worker
605   bool inited_hash_workers; //  flag to check if mapping_db_to_worker is inited
606 
607   mysql_mutex_t slave_worker_hash_lock; // for mapping_db_to_worker
608   mysql_cond_t  slave_worker_hash_cond;// for mapping_db_to_worker
609 
610   /*
611     For the purpose of reporting the worker status in performance schema table,
612     we need to preserve the workers array after worker thread was killed. So, we
613     copy this array into the below vector which is used for reporting
614     until next init_workers(). Note that we only copy those attributes that
615     would be useful in reporting worker status. We only use a few attributes in
616     this object as of now but still save the whole object. The idea is
617     to be future proof. We will extend performance schema tables in future
618     and then we would use a good number of attributes from this object.
619   */
620 
621   std::vector<Slave_worker*> workers_copy_pfs;
622 
623   /*
624     This flag is turned ON when the workers array is initialized.
625     Before destroying the workers array we check this flag to make sure
626     we are not destroying an unitilized array. For the purpose of reporting the
627     worker status in performance schema table, we need to preserve the workers
628     array after worker thread was killed. So, we copy this array into
629     workers_copy_pfs array which is used for reporting until next init_workers().
630   */
631   bool workers_array_initialized;
632 
633   volatile ulong pending_jobs;
634   mysql_mutex_t pending_jobs_lock;
635   mysql_cond_t pending_jobs_cond;
636   mysql_mutex_t exit_count_lock; // mutex of worker exit count
637   ulong       mts_slave_worker_queue_len_max;
638   ulonglong   mts_pending_jobs_size;      // actual mem usage by WQ:s
639   ulonglong   mts_pending_jobs_size_max;  // max of WQ:s size forcing C to wait
640   bool    mts_wq_oversize;      // C raises flag to wait some memory's released
641   Slave_worker  *last_assigned_worker;// is set to a Worker at assigning a group
642   /*
643     master-binlog ordered queue of Slave_job_group descriptors of groups
644     that are under processing. The queue size is @c checkpoint_group.
645   */
646   Slave_committed_queue *gaq;
647   /*
648     Container for references of involved partitions for the current event group
649   */
650   // CGAP dynarray holds id:s of partitions of the Current being executed Group
651   Prealloced_array<db_worker_hash_entry*, 4, true> curr_group_assigned_parts;
652   // deferred array to hold partition-info-free events
653   Prealloced_array<Slave_job_item, 8, true> curr_group_da;
654 
655   bool curr_group_seen_gtid;   // current group started with Gtid-event or not
656   bool curr_group_seen_begin;   // current group started with B-event or not
657   bool curr_group_isolated;     // current group requires execution in isolation
658   bool mts_end_group_sets_max_dbs; // flag indicates if partitioning info is discovered
659   volatile ulong mts_wq_underrun_w_id;  // Id of a Worker whose queue is getting empty
660   /*
661      Ongoing excessive overrun counter to correspond to number of events that
662      are being scheduled while a WQ is close to be filled up.
663      `Close' is defined as (100 - mts_worker_underrun_level) %.
664      The counter is incremented each time a WQ get filled over that level
665      and decremented when the level drops below.
666      The counter therefore describes level of saturation that Workers
667      are experiencing and is used as a parameter to compute a nap time for
668      Coordinator in order to avoid reaching WQ limits.
669   */
670   volatile long mts_wq_excess_cnt;
671   long  mts_worker_underrun_level; // % of WQ size at which W is considered hungry
672   ulong mts_coordinator_basic_nap; // C sleeps to avoid WQs overrun
673   ulong opt_slave_parallel_workers; // cache for ::opt_slave_parallel_workers
674   ulong slave_parallel_workers; // the one slave session time number of workers
675   ulong exit_counter; // Number of workers contributed to max updated group index
676   ulonglong max_updated_index;
677   ulong recovery_parallel_workers; // number of workers while recovering
678   uint checkpoint_seqno;  // counter of groups executed after the most recent CP
679   uint checkpoint_group;  // cache for ::opt_mts_checkpoint_group
680   MY_BITMAP recovery_groups;  // bitmap used during recovery
681   bool recovery_groups_inited;
682   ulong mts_recovery_group_cnt; // number of groups to execute at recovery
683   ulong mts_recovery_index;     // running index of recoverable groups
684   bool mts_recovery_group_seen_begin;
685 
686   /*
687     While distibuting events basing on their properties MTS
688     Coordinator changes its mts group status.
689     Transition normally flowws to follow `=>' arrows on the diagram:
690 
691             +----------------------------+
692             V                            |
693     MTS_NOT_IN_GROUP =>                  |
694         {MTS_IN_GROUP => MTS_END_GROUP --+} while (!killed) => MTS_KILLED_GROUP
695 
696     MTS_END_GROUP has `->' loop breaking link to MTS_NOT_IN_GROUP when
697     Coordinator synchronizes with Workers by demanding them to
698     complete their assignments.
699   */
700   enum
701   {
702     /*
703        no new events were scheduled after last synchronization,
704        includes Single-Threaded-Slave case.
705     */
706     MTS_NOT_IN_GROUP,
707 
708     MTS_IN_GROUP,    /* at least one not-terminal event scheduled to a Worker */
709     MTS_END_GROUP,   /* the last scheduled event is a terminal event */
710     MTS_KILLED_GROUP /* Coordinator gave up to reach MTS_END_GROUP */
711   } mts_group_status;
712 
713   /*
714     MTS statistics:
715   */
716   ulonglong mts_events_assigned; // number of events (statements) scheduled
717   ulonglong mts_groups_assigned; // number of groups (transactions) scheduled
718   volatile ulong mts_wq_overrun_cnt; // counter of all mts_wq_excess_cnt increments
719   ulong wq_size_waits_cnt;    // number of times C slept due to WQ:s oversize
720   /*
721     Counter of how many times Coordinator saw Workers are filled up
722     "enough" with assignements. The enough definition depends on
723     the scheduler type.
724   */
725   ulong mts_wq_no_underrun_cnt;
726   longlong mts_total_wait_overlap; // Waiting time corresponding to above
727   /*
728     Stats to compute Coordinator waiting time for any Worker available,
729     applies solely to the Commit-clock scheduler.
730   */
731   ulonglong mts_total_wait_worker_avail;
732   ulong mts_wq_overfill_cnt;  // counter of C waited due to a WQ queue was full
733   /*
734     Statistics (todo: replace with WL5631) applies to either Coordinator and Worker.
735     The exec time in the Coordinator case means scheduling.
736     The read time in the Worker case means getting an event out of Worker queue
737   */
738   ulonglong stats_exec_time;
739   ulonglong stats_read_time;
740   struct timespec ts_exec[2];  // per event pre- and post- exec timestamp
741   struct timespec stats_begin; // applier's bootstrap time
742 
743   /*
744      A sorted array of the Workers' current assignement numbers to provide
745      approximate view on Workers loading.
746      The first row of the least occupied Worker is queried at assigning
747      a new partition. Is updated at checkpoint commit to the main RLI.
748   */
749   Prealloced_array<ulong, 16> least_occupied_workers;
750   time_t mts_last_online_stat;
751   /* end of MTS statistics */
752 
753   /* Returns the number of elements in workers array/vector. */
get_worker_count()754   inline size_t get_worker_count()
755   {
756     if (workers_array_initialized)
757       return workers.size();
758     else
759       return workers_copy_pfs.size();
760   }
761 
762   /*
763     Returns a pointer to the worker instance at index n in workers
764     array/vector.
765   */
get_worker(size_t n)766   Slave_worker* get_worker(size_t n)
767   {
768     if (workers_array_initialized)
769     {
770       if (n >= workers.size())
771         return NULL;
772 
773       return workers[n];
774     }
775     else if (workers_copy_pfs.size())
776     {
777       if (n >= workers_copy_pfs.size())
778         return NULL;
779 
780       return workers_copy_pfs[n];
781     }
782     else
783       return NULL;
784   }
785 
786   /*Channel defined mts submode*/
787   enum_mts_parallel_type channel_mts_submode;
788   /* MTS submode  */
789   Mts_submode* current_mts_submode;
790 
791   /*
792     Slave side local seq_no identifying a parent group that being
793     the scheduled transaction is considered to be dependent
794    */
795   ulonglong mts_last_known_parent_group_id;
796 
797   /* most of allocation in the coordinator rli is there */
798   void init_workers(ulong);
799 
800   /* counterpart of the init */
801   void deinit_workers();
802 
803   /**
804      returns true if there is any gap-group of events to execute
805                   at slave starting phase.
806   */
is_mts_recovery()807   inline bool is_mts_recovery() const
808   {
809     return mts_recovery_group_cnt != 0;
810   }
811 
clear_mts_recovery_groups()812   inline void clear_mts_recovery_groups()
813   {
814     if (recovery_groups_inited)
815     {
816       bitmap_free(&recovery_groups);
817       mts_recovery_group_cnt= 0;
818       recovery_groups_inited= false;
819     }
820   }
821 
822   /**
823      returns true if events are to be executed in parallel
824   */
is_parallel_exec()825   inline bool is_parallel_exec() const
826   {
827     bool ret= (slave_parallel_workers > 0) && !is_mts_recovery();
828 
829     assert(!ret || !workers.empty());
830 
831     return ret;
832   }
833 
834   /**
835      returns true if Coordinator is scheduling events belonging to
836      the same group and has not reached yet its terminal event.
837   */
is_mts_in_group()838   inline bool is_mts_in_group()
839   {
840     return is_parallel_exec() &&
841       mts_group_status == MTS_IN_GROUP;
842   }
843 
844   bool mts_workers_queue_empty() const;
845   bool cannot_safely_rollback() const;
846 
847   /**
848      While a group is executed by a Worker the relay log can change.
849      Coordinator notifies Workers about this event. Worker is supposed
850      to commit to the recovery table with the new info.
851   */
852   void reset_notified_relay_log_change();
853 
854   /**
855      While a group is executed by a Worker the relay log can change.
856      Coordinator notifies Workers about this event. Coordinator and Workers
857      maintain a bitmap of executed group that is reset with a new checkpoint.
858   */
859   void reset_notified_checkpoint(ulong count, time_t new_ts,
860                                  bool need_data_lock,
861                                  bool update_timestamp= false);
862 
863   /**
864      Called when gaps execution is ended so it is crash-safe
865      to reset the last session Workers info.
866   */
867   bool mts_finalize_recovery();
868   /*
869    * End of MTS section ******************************************************/
870 
871   /* The general cleanup that slave applier may need at the end of query. */
cleanup_after_query()872   inline void cleanup_after_query()
873   {
874     if (deferred_events)
875       deferred_events->rewind();
876   };
877   /* The general cleanup that slave applier may need at the end of session. */
cleanup_after_session()878   void cleanup_after_session()
879   {
880     if (deferred_events)
881       delete deferred_events;
882   };
883 
884   /**
885     Helper function to do after statement completion.
886 
887     This function is called from an event to complete the group by
888     either stepping the group position, if the "statement" is not
889     inside a transaction; or increase the event position, if the
890     "statement" is inside a transaction.
891 
892     @param event_log_pos
893     Master log position of the event. The position is recorded in the
894     relay log info and used to produce information for <code>SHOW
895     SLAVE STATUS</code>.
896   */
897   int stmt_done(my_off_t event_log_pos);
898 
899 
900   /**
901      Set the value of a replication state flag.
902 
903      @param flag Flag to set
904    */
set_flag(enum_state_flag flag)905   void set_flag(enum_state_flag flag)
906   {
907     m_flags |= (1UL << flag);
908   }
909 
910   /**
911      Get the value of a replication state flag.
912 
913      @param flag Flag to get value of
914 
915      @return @c true if the flag was set, @c false otherwise.
916    */
get_flag(enum_state_flag flag)917   bool get_flag(enum_state_flag flag)
918   {
919     return m_flags & (1UL << flag);
920   }
921 
922   /**
923      Clear the value of a replication state flag.
924 
925      @param flag Flag to clear
926    */
clear_flag(enum_state_flag flag)927   void clear_flag(enum_state_flag flag)
928   {
929     m_flags &= ~(1UL << flag);
930   }
931 
932 private:
933   /**
934     Auxiliary function used by is_in_group.
935 
936     The execute thread is in the middle of a statement in the
937     following cases:
938     - User_var/Intvar/Rand events have been processed, but the
939       corresponding Query_log_event has not been processed.
940     - Table_map or Row events have been processed, and the last Row
941       event did not have the STMT_END_F set.
942 
943     @retval true Replication thread is inside a statement.
944     @retval false Replication thread is not inside a statement.
945    */
is_in_stmt()946   bool is_in_stmt() const
947   {
948     bool ret= (m_flags & (1UL << IN_STMT));
949     DBUG_PRINT("info", ("is_in_stmt()=%d", ret));
950     return ret;
951   }
952   /**
953     Auxiliary function used by is_in_group.
954 
955     @retval true The execute thread is inside a statement or a
956     transaction, i.e., either a BEGIN has been executed or we are in
957     the middle of a statement.
958     @retval false The execute thread thread is not inside a statement
959     or a transaction.
960   */
is_in_trx_or_stmt()961   bool is_in_trx_or_stmt() const
962   {
963     bool ret= is_in_stmt() || (info_thd->variables.option_bits & OPTION_BEGIN);
964     DBUG_PRINT("info", ("is_in_trx_or_stmt()=%d", ret));
965     return ret;
966   }
967 public:
968   /**
969     A group is defined as the entire range of events that constitute
970     a transaction or auto-committed statement. It has one of the
971     following forms:
972 
973     (Gtid)? Query(BEGIN) ... (Query(COMMIT) | Query(ROLLBACK) | Xid)
974     (Gtid)? (Rand | User_var | Int_var)* Query(DDL)
975 
976     Thus, to check if the execute thread is in a group, there are
977     two cases:
978 
979     - If the master generates Gtid events (5.7.5 or later, or 5.6 or
980       later with GTID_MODE=ON), then is_in_group is the same as
981       info_thd->owned_gtid.sidno != 0, since owned_gtid.sidno is set
982       to non-zero by the Gtid_log_event and cleared to zero at commit
983       or rollback.
984 
985     - If the master does not generate Gtid events (i.e., master is
986       pre-5.6, or pre-5.7.5 with GTID_MODE=OFF), then is_in_group is
987       the same as is_in_trx_or_stmt().
988 
989     @retval true Replication thread is inside a group.
990     @retval false Replication thread is not inside a group.
991   */
is_in_group()992   bool is_in_group() const
993   {
994     bool ret= is_in_trx_or_stmt() || info_thd->owned_gtid.sidno != 0;
995     DBUG_PRINT("info", ("is_in_group()=%d", ret));
996     return ret;
997   }
998 
999   int count_relay_log_space();
1000 
1001   int rli_init_info();
1002   void end_info();
1003   int flush_info(bool force= FALSE);
1004   int flush_current_log();
1005   void set_master_info(Master_info *info);
1006 
get_future_event_relay_log_pos()1007   inline ulonglong get_future_event_relay_log_pos() { return future_event_relay_log_pos; }
set_future_event_relay_log_pos(ulonglong log_pos)1008   inline void set_future_event_relay_log_pos(ulonglong log_pos)
1009   {
1010     future_event_relay_log_pos= log_pos;
1011   }
1012 
get_group_master_log_name()1013   inline const char* get_group_master_log_name()
1014   {
1015     return m_group_master_log_name;
1016   }
get_group_master_log_pos()1017   inline ulonglong get_group_master_log_pos() { return m_group_master_log_pos; }
set_group_master_log_name(const char * log_file_name)1018   inline void set_group_master_log_name(const char *log_file_name)
1019   {
1020     assert(!info_thd ||
1021                 info_thd->backup_binlog_lock.is_protection_acquired());
1022 
1023     strmake(m_group_master_log_name, log_file_name,
1024             sizeof(m_group_master_log_name) - 1);
1025   }
set_group_master_log_pos(ulonglong log_pos)1026   inline void set_group_master_log_pos(ulonglong log_pos)
1027   {
1028     assert(!info_thd ||
1029                 info_thd->backup_binlog_lock.is_protection_acquired());
1030     m_group_master_log_pos= log_pos;
1031   }
1032 
get_group_relay_log_name()1033   inline const char* get_group_relay_log_name() { return group_relay_log_name; }
get_group_relay_log_pos()1034   inline ulonglong get_group_relay_log_pos() { return group_relay_log_pos; }
set_group_relay_log_name(const char * log_file_name)1035   inline void set_group_relay_log_name(const char *log_file_name)
1036   {
1037      strmake(group_relay_log_name,log_file_name, sizeof(group_relay_log_name)-1);
1038   }
set_group_relay_log_name(const char * log_file_name,size_t len)1039   inline void set_group_relay_log_name(const char *log_file_name, size_t len)
1040   {
1041      strmake(group_relay_log_name, log_file_name, len);
1042   }
set_group_relay_log_pos(ulonglong log_pos)1043   inline void set_group_relay_log_pos(ulonglong log_pos)
1044   {
1045     group_relay_log_pos= log_pos;
1046   }
1047 
get_event_relay_log_name()1048   inline const char* get_event_relay_log_name() { return event_relay_log_name; }
get_event_relay_log_pos()1049   inline ulonglong get_event_relay_log_pos() { return event_relay_log_pos; }
set_event_relay_log_name(const char * log_file_name)1050   inline void set_event_relay_log_name(const char *log_file_name)
1051   {
1052     strmake(event_relay_log_name,log_file_name, sizeof(event_relay_log_name)-1);
1053     set_event_relay_log_number(relay_log_name_to_number(log_file_name));
1054   }
1055 
get_event_relay_log_number()1056   uint get_event_relay_log_number() { return event_relay_log_number; }
set_event_relay_log_number(uint number)1057   void set_event_relay_log_number(uint number)
1058   {
1059     event_relay_log_number= number;
1060   }
1061 
1062   /**
1063     Given the extension number of the relay log, gets the full
1064     relay log path. Currently used in Slave_worker::retry_transaction()
1065 
1066     @param [in]   number      extension number of relay log
1067     @param[in, out] name      The full path of the relay log (per-channel)
1068                               to be read by the slave worker.
1069   */
1070   void relay_log_number_to_name(uint number, char name[FN_REFLEN+1]);
1071   uint relay_log_name_to_number(const char *name);
1072 
set_event_start_pos(my_off_t pos)1073   void set_event_start_pos(my_off_t pos) { event_start_pos= pos; }
get_event_start_pos()1074   my_off_t get_event_start_pos() { return event_start_pos; }
1075 
set_event_relay_log_pos(ulonglong log_pos)1076   inline void set_event_relay_log_pos(ulonglong log_pos)
1077   {
1078     event_relay_log_pos= log_pos;
1079   }
get_rpl_log_name()1080   inline const char* get_rpl_log_name()
1081   {
1082     return (m_group_master_log_name[0] ? m_group_master_log_name : "FIRST");
1083   }
1084 
1085   static size_t get_number_info_rli_fields();
1086 
1087   /**
1088     Indicate that a delay starts.
1089 
1090     This does not actually sleep; it only sets the state of this
1091     Relay_log_info object to delaying so that the correct state can be
1092     reported by SHOW SLAVE STATUS and SHOW PROCESSLIST.
1093 
1094     Requires rli->data_lock.
1095 
1096     @param delay_end The time when the delay shall end.
1097   */
start_sql_delay(time_t delay_end)1098   void start_sql_delay(time_t delay_end)
1099   {
1100     mysql_mutex_assert_owner(&data_lock);
1101     sql_delay_end= delay_end;
1102     THD_STAGE_INFO(info_thd, stage_sql_thd_waiting_until_delay);
1103   }
1104 
1105   /* Note that this is cast to uint32 in show_slave_status(). */
get_sql_delay()1106   time_t get_sql_delay() { return sql_delay; }
set_sql_delay(time_t _sql_delay)1107   void set_sql_delay(time_t _sql_delay) { sql_delay= _sql_delay; }
get_sql_delay_end()1108   time_t get_sql_delay_end() { return sql_delay_end; }
1109 
1110   Relay_log_info(bool is_slave_recovery
1111 #ifdef HAVE_PSI_INTERFACE
1112                  ,PSI_mutex_key *param_key_info_run_lock,
1113                  PSI_mutex_key *param_key_info_data_lock,
1114                  PSI_mutex_key *param_key_info_sleep_lock,
1115                  PSI_mutex_key *param_key_info_thd_lock,
1116                  PSI_mutex_key *param_key_info_data_cond,
1117                  PSI_mutex_key *param_key_info_start_cond,
1118                  PSI_mutex_key *param_key_info_stop_cond,
1119                  PSI_mutex_key *param_key_info_sleep_cond
1120 #endif
1121                  , uint param_id, const char* param_channel, bool is_rli_fake
1122                 );
1123   virtual ~Relay_log_info();
1124 
1125   /*
1126     Determines if a warning message on unsafe execution was
1127     already printed out to avoid clutering the error log
1128     with several warning messages.
1129   */
1130   bool reported_unsafe_warning;
1131 
1132   /*
1133     'sql_thread_kill_accepted is set to TRUE when killed status is recognized.
1134   */
1135   bool sql_thread_kill_accepted;
1136 
get_row_stmt_start_timestamp()1137   time_t get_row_stmt_start_timestamp()
1138   {
1139     return row_stmt_start_timestamp;
1140   }
1141 
set_row_stmt_start_timestamp()1142   time_t set_row_stmt_start_timestamp()
1143   {
1144     if (row_stmt_start_timestamp == 0)
1145       row_stmt_start_timestamp= my_time(0);
1146 
1147     return row_stmt_start_timestamp;
1148   }
1149 
reset_row_stmt_start_timestamp()1150   void reset_row_stmt_start_timestamp()
1151   {
1152     row_stmt_start_timestamp= 0;
1153   }
1154 
set_long_find_row_note_printed()1155   void set_long_find_row_note_printed()
1156   {
1157     long_find_row_note_printed= true;
1158   }
1159 
unset_long_find_row_note_printed()1160   void unset_long_find_row_note_printed()
1161   {
1162     long_find_row_note_printed= false;
1163   }
1164 
is_long_find_row_note_printed()1165   bool is_long_find_row_note_printed()
1166   {
1167     return long_find_row_note_printed;
1168   }
1169 
1170 public:
1171   /**
1172     Delete the existing event and set a new one.  This class is
1173     responsible for freeing the event, the caller should not do that.
1174   */
1175   virtual void set_rli_description_event(Format_description_log_event *fdle);
1176 
1177   /**
1178     Return the current Format_description_log_event.
1179   */
get_rli_description_event()1180   Format_description_log_event *get_rli_description_event() const
1181   {
1182     return rli_description_event;
1183   }
1184 
1185   /**
1186     adaptation for the slave applier to specific master versions.
1187   */
1188   ulong adapt_to_master_version(Format_description_log_event *fdle);
1189   ulong adapt_to_master_version_updown(ulong master_version,
1190                                        ulong current_version);
1191   uchar slave_version_split[3]; // bytes of the slave server version
1192   /*
1193     relay log info repository should be updated on relay log
1194     rotate. But when the transaction is split across two relay logs,
1195     update the repository will cause unexpected results and should
1196     be postponed till the 'commit' of the transaction is executed.
1197 
1198     A flag that set to 'true' when this type of 'forced flush'(at the
1199     time of rotate relay log) is postponed due to transaction split
1200     across the relay logs.
1201   */
1202   bool force_flush_postponed_due_to_split_trans;
1203 
get_commit_order_manager()1204   Commit_order_manager *get_commit_order_manager()
1205   {
1206     return commit_order_mngr;
1207   }
1208 
set_commit_order_manager(Commit_order_manager * mngr)1209   void set_commit_order_manager(Commit_order_manager *mngr)
1210   {
1211     commit_order_mngr= mngr;
1212   }
1213 
1214   bool set_info_search_keys(Rpl_info_handler *to);
1215 
1216   /**
1217     Get coordinator's RLI. Especially used get the rli from
1218     a slave thread, like this: thd->rli_slave->get_c_rli();
1219     thd could be a SQL thread or a worker thread
1220   */
get_c_rli()1221   virtual Relay_log_info* get_c_rli()
1222   {
1223     return this;
1224   }
1225 
1226   virtual const char* get_for_channel_str(bool upper_case= false) const;
1227 
1228 protected:
1229   Format_description_log_event *rli_description_event;
1230 
1231 private:
1232   /*
1233     Commit order manager to order commits made by its workers. In context of
1234     Multi Source Replication each worker will be ordered by the coresponding
1235     corrdinator's order manager.
1236    */
1237   Commit_order_manager* commit_order_mngr;
1238 
1239   /**
1240     Delay slave SQL thread by this amount, compared to master (in
1241     seconds). This is set with CHANGE MASTER TO MASTER_DELAY=X.
1242 
1243     Guarded by data_lock.  Initialized by the client thread executing
1244     START SLAVE.  Written by client threads executing CHANGE MASTER TO
1245     MASTER_DELAY=X.  Read by SQL thread and by client threads
1246     executing SHOW SLAVE STATUS.  Note: must not be written while the
1247     slave SQL thread is running, since the SQL thread reads it without
1248     a lock when executing flush_info().
1249   */
1250   time_t sql_delay;
1251 
1252   /**
1253     During a delay, specifies the point in time when the delay ends.
1254 
1255     This is used for the SQL_Remaining_Delay column in SHOW SLAVE STATUS.
1256 
1257     Guarded by data_lock. Written by the sql thread.  Read by client
1258     threads executing SHOW SLAVE STATUS.
1259   */
1260   time_t sql_delay_end;
1261 
1262   uint32 m_flags;
1263 
1264   /*
1265     Before the MASTER_DELAY parameter was added (WL#344), relay_log.info
1266     had 4 lines. Now it has 5 lines.
1267   */
1268   static const int LINES_IN_RELAY_LOG_INFO_WITH_DELAY= 5;
1269 
1270   /*
1271     Before the WL#5599, relay_log.info had 5 lines. Now it has 6 lines.
1272   */
1273   static const int LINES_IN_RELAY_LOG_INFO_WITH_WORKERS= 6;
1274 
1275   /*
1276     Before the Id was added (BUG#2334346), relay_log.info
1277     had 6 lines. Now it has 7 lines.
1278   */
1279   static const int LINES_IN_RELAY_LOG_INFO_WITH_ID= 7;
1280 
1281   /*
1282     Add a channel in the slave relay log info
1283   */
1284   static const int LINES_IN_RELAY_LOG_INFO_WITH_CHANNEL= 8;
1285 
1286   bool read_info(Rpl_info_handler *from);
1287   bool write_info(Rpl_info_handler *to);
1288 
1289   Relay_log_info(const Relay_log_info& info);
1290   Relay_log_info& operator=(const Relay_log_info& info);
1291 
1292   /*
1293     Runtime state for printing a note when slave is taking
1294     too long while processing a row event.
1295    */
1296   time_t row_stmt_start_timestamp;
1297   bool long_find_row_note_printed;
1298 
1299 
1300  /**
1301    sets the suffix required for relay log names
1302    in multisource replication.
1303    The extension is "-relay-bin-<channel_name>"
1304    @param[in, out]  buff       buffer to store the complete relay log file name
1305    @param[in]       buff_size  size of buffer buff
1306    @param[in]       base_name  the base name of the relay log file
1307  */
1308   const char* add_channel_to_relay_log_name(char *buff, uint buff_size,
1309                                             const char *base_name);
1310 
1311   /*
1312     Applier thread InnoDB priority.
1313     When two transactions conflict inside InnoDB, the one with
1314     greater priority wins.
1315     Priority must be set before applier thread start so that all
1316     executed transactions have the same priority.
1317   */
1318   int thd_tx_priority;
1319 
1320   /**
1321     If the SQL thread should or not ignore the set limit for
1322     write set collection
1323    */
1324   bool m_ignore_write_set_memory_limit;
1325 
1326   /**
1327     Even if a component says all transactions require write sets,
1328     this variable says the SQL thread transactions can drop them
1329   */
1330   bool m_allow_drop_write_set;
1331 
1332 public:
1333   /*
1334     The boolean is set to true when the binlog (rli_fake) or slave
1335     (rli_slave) applier thread detaches any engine ha_data
1336     it has dealt with at time of XA START processing.
1337     The boolean is reset to false at the end of XA PREPARE,
1338     XA COMMIT ONE PHASE for the binlog applier, and
1339     at internal rollback of the slave applier at the same time with
1340     the engine ha_data re-attachment.
1341   */
1342   bool is_engine_ha_data_detached;
1343 
set_thd_tx_priority(int priority)1344   void set_thd_tx_priority(int priority)
1345   {
1346     thd_tx_priority= priority;
1347   }
1348 
get_thd_tx_priority()1349   int get_thd_tx_priority()
1350   {
1351     return thd_tx_priority;
1352   }
1353 
set_ignore_write_set_memory_limit(bool ignore_limit)1354   void set_ignore_write_set_memory_limit(bool ignore_limit) {
1355     m_ignore_write_set_memory_limit = ignore_limit;
1356   }
1357 
get_ignore_write_set_memory_limit()1358   bool get_ignore_write_set_memory_limit() {
1359     return m_ignore_write_set_memory_limit;
1360   }
1361 
set_allow_drop_write_set(bool does_not_require_ws)1362   void set_allow_drop_write_set(bool does_not_require_ws) {
1363     m_allow_drop_write_set = does_not_require_ws;
1364   }
1365 
get_allow_drop_write_set()1366   bool get_allow_drop_write_set() { return m_allow_drop_write_set; }
1367 
1368   /**
1369     Detaches the engine ha_data from THD. The fact
1370     is memorized in @c is_engine_ha_detached flag.
1371 
1372     @param  thd a reference to THD
1373   */
1374   void detach_engine_ha_data(THD *thd);
1375   /**
1376     Reattaches the engine ha_data to THD. The fact
1377     is memorized in @c is_engine_ha_detached flag.
1378 
1379     @param  thd a reference to THD
1380   */
1381   void reattach_engine_ha_data(THD *thd);
1382   /**
1383     Drops the engine ha_data flag when it is up.
1384     The method is run at execution points of the engine ha_data
1385     re-attachment.
1386 
1387     @return true   when THD has detached the engine ha_data,
1388             false  otherwise
1389   */
unflag_detached_engine_ha_data()1390   bool unflag_detached_engine_ha_data()
1391   {
1392     bool rc= false;
1393 
1394     if (is_engine_ha_data_detached)
1395       rc= !(is_engine_ha_data_detached= false); // return the old value
1396 
1397     return rc;
1398   }
1399 };
1400 
1401 bool mysql_show_relaylog_events(THD* thd);
1402 
1403 
1404 /**
1405    @param  thd a reference to THD
1406    @return TRUE if thd belongs to a Worker thread and FALSE otherwise.
1407 */
is_mts_worker(const THD * thd)1408 inline bool is_mts_worker(const THD *thd)
1409 {
1410   return thd->system_thread == SYSTEM_THREAD_SLAVE_WORKER;
1411 }
1412 
1413 
1414 /**
1415  Auxiliary function to check if we have a db partitioned MTS
1416  */
1417 bool is_mts_db_partitioned(Relay_log_info * rli);
1418 #endif /* RPL_RLI_H */
1419