1 /* Copyright (c) 2005, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software Foundation,
21 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22
23 #ifndef RPL_RLI_H
24 #define RPL_RLI_H
25
26 #include "my_global.h"
27
28 #include "binlog.h" // MYSQL_BIN_LOG
29 #include "prealloced_array.h" // Prealloced_array
30 #include "rpl_gtid.h" // Gtid_set
31 #include "rpl_info.h" // Rpl_info
32 #include "rpl_mts_submode.h" // enum_mts_parallel_type
33 #include "rpl_tblmap.h" // table_mapping
34 #include "rpl_utility.h" // Deferred_log_events
35 #include "sql_class.h" // THD
36
37 #include <string>
38 #include <vector>
39
40 struct RPL_TABLE_LIST;
41 class Master_info;
42 class Mts_submode;
43 class Commit_order_manager;
44 class Slave_committed_queue;
45 typedef struct st_db_worker_hash_entry db_worker_hash_entry;
46 extern uint sql_slave_skip_counter;
47
48 typedef Prealloced_array<Slave_worker*, 4> Slave_worker_array;
49
50 typedef struct slave_job_item
51 {
52 Log_event *data;
53 uint relay_number;
54 my_off_t relay_pos;
55 } Slave_job_item;
56
57 /*******************************************************************************
58 Replication SQL Thread
59
60 Relay_log_info contains:
61 - the current relay log
62 - the current relay log offset
63 - master log name
64 - master log sequence corresponding to the last update
65 - misc information specific to the SQL thread
66
67 Relay_log_info is initialized from a repository, i.e. table or file, if there is
68 one. Otherwise, data members are intialized with defaults by calling
69 init_relay_log_info().
70
71 The relay.info table/file shall be updated whenever: (i) the relay log file
72 is rotated, (ii) SQL Thread is stopped, (iii) while processing a Xid_log_event,
73 (iv) after a Query_log_event (i.e. commit or rollback) and (v) after processing
74 any statement written to the binary log without a transaction context.
75
76 The Xid_log_event is a commit for transactional engines and must be handled
77 differently to provide reliability/data integrity. In this case, positions
78 are updated within the context of the current transaction. So
79
80 . If the relay.info is stored in a transactional repository and the server
81 crashes before successfully committing the transaction the changes to the
82 position table will be rolled back along with the data.
83
84 . If the relay.info is stored in a non-transactional repository, for instance,
85 a file or a system table created using MyIsam, and the server crashes before
86 successfully committing the transaction the changes to the position table
87 will not be rolled back but data will.
88
89 In particular, when there are mixed transactions, i.e a transaction that updates
90 both transaction and non-transactional engines, the Xid_log_event is still used
91 but reliability/data integrity cannot be achieved as we shall explain in what
92 follows.
93
94 Changes to non-transactional engines, such as MyIsam, cannot be rolled back if a
95 failure happens. For that reason, there is no point in updating the positions
96 within the boundaries of any on-going transaction. This is true for both commit
97 and rollback. If a failure happens after processing the pseudo-transaction but
98 before updating the positions, the transaction will be re-executed when the
99 slave is up most likely causing an error that needs to be manually circumvented.
100 This is a well-known issue when non-transactional statements are executed.
101
102 Specifically, if rolling back any transaction, positions are updated outside the
103 transaction boundaries. However, there may be a problem in this scenario even
104 when only transactional engines are updated. This happens because if there is a
105 rollback and such transaction is written to the binary log, a non-transactional
106 engine was updated or a temporary table was created or dropped within its
107 boundaries.
108
109 In particular, in both STATEMENT and MIXED logging formats, this happens because
110 any temporary table is automatically dropped after a shutdown/startup.
111 See BUG#26945 for further details.
112
113 Statements written to the binary log outside the boundaries of a transaction are
114 DDLs or maintenance commands which are not transactional. These means that they
115 cannot be rolled back if a failure happens. In such cases, the positions are
116 updated after processing the events. If a failure happens after processing the
117 statement but before updating the positions, the statement will be
118 re-executed when the slave is up most likely causing an error that needs to be
119 manually circumvented. This is a well-known issue when non-transactional
120 statements are executed.
121
122 The --sync-relay-log-info does not have effect when a system table, either
123 transactional or non-transactional is used.
124
125 To correctly recovery from failures, one should combine transactional system
126 tables along with the --relay-log-recovery.
127 *******************************************************************************/
128 class Relay_log_info : public Rpl_info
129 {
130 friend class Rpl_info_factory;
131
132 public:
133 /**
134 Flags for the state of the replication.
135 */
136 enum enum_state_flag {
137 /** The replication thread is inside a statement */
138 IN_STMT,
139
140 /** Flag counter. Should always be last */
141 STATE_FLAGS_COUNT
142 };
143
144 /*
145 The SQL thread owns one Relay_log_info, and each client that has
146 executed a BINLOG statement owns one Relay_log_info. This function
147 returns zero for the Relay_log_info object that belongs to the SQL
148 thread and nonzero for Relay_log_info objects that belong to
149 clients.
150 */
belongs_to_client()151 inline bool belongs_to_client()
152 {
153 assert(info_thd);
154 return !info_thd->slave_thread;
155 }
156 /* Instrumentation key for performance schema for mts_temp_table_LOCK */
157 #ifdef HAVE_PSI_INTERFACE
158 PSI_mutex_key m_key_mts_temp_table_LOCK;
159 #endif
160 /*
161 Lock to protect race condition while transferring temporary table from
162 worker thread to coordinator thread and vice-versa
163 */
164 mysql_mutex_t mts_temp_table_LOCK;
165 /*
166 Lock to acquire by methods that concurrently update lwm of committed
167 transactions and the min waited timestamp and its index.
168 */
169 mysql_mutex_t mts_gaq_LOCK;
170 mysql_cond_t logical_clock_cond;
171 /*
172 If true, events with the same server id should be replicated. This
173 field is set on creation of a relay log info structure by copying
174 the value of ::replicate_same_server_id and can be overridden if
175 necessary. For example of when this is done, check sql_binlog.cc,
176 where the BINLOG statement can be used to execute "raw" events.
177 */
178 bool replicate_same_server_id;
179
180 /*
181 The gtid (or anonymous) of the currently executing transaction, or
182 of the last executing transaction if no transaction is currently
183 executing. This is used to fill the last_seen_transaction
184 column
185 of the table
186 performance_schema.replication_applier_status_by_worker.
187 */
188 Gtid_specification currently_executing_gtid;
189
190 /*** The following variables can only be read when protect by data lock ****/
191 /*
192 cur_log_fd - file descriptor of the current read relay log
193 */
194 File cur_log_fd;
195 /*
196 Protected with internal locks.
197 Must get data_lock when resetting the logs.
198 */
199 MYSQL_BIN_LOG relay_log;
200 LOG_INFO linfo;
201
202 /*
203 cur_log
204 Pointer that either points at relay_log.get_log_file() or
205 &rli->cache_buf, depending on whether the log is hot or there was
206 the need to open a cold relay_log.
207
208 cache_buf
209 IO_CACHE used when opening cold relay logs.
210 */
211 IO_CACHE cache_buf,*cur_log;
212
213 /*
214 Identifies when the recovery process is going on.
215 See sql/slave.cc:init_recovery for further details.
216 */
217 bool is_relay_log_recovery;
218
219 /* The following variables are safe to read any time */
220
221 /*
222 When we restart slave thread we need to have access to the previously
223 created temporary tables. Modified only on init/end and by the SQL
224 thread, read only by SQL thread.
225 */
226 TABLE *save_temporary_tables;
227
228 /* parent Master_info structure */
229 Master_info *mi;
230
231 /* number of temporary tables open in this channel */
232 Atomic_int32 channel_open_temp_tables;
233
234 /*
235 Needed to deal properly with cur_log getting closed and re-opened with
236 a different log under our feet
237 */
238 uint32 cur_log_old_open_count;
239
240 /*
241 If on init_info() call error_on_rli_init_info is true that means
242 that previous call to init_info() terminated with an error, RESET
243 SLAVE must be executed and the problem fixed manually.
244 */
245 bool error_on_rli_init_info;
246
247 /*
248 Let's call a group (of events) :
249 - a transaction
250 or
251 - an autocommiting query + its associated events (INSERT_ID,
252 TIMESTAMP...)
253 We need these rli coordinates :
254 - relay log name and position of the beginning of the group we currently are
255 executing. Needed to know where we have to restart when replication has
256 stopped in the middle of a group (which has been rolled back by the slave).
257 - relay log name and position just after the event we have just
258 executed. This event is part of the current group.
259 Formerly we only had the immediately above coordinates, plus a 'pending'
260 variable, but this dealt wrong with the case of a transaction starting on a
261 relay log and finishing (commiting) on another relay log. Case which can
262 happen when, for example, the relay log gets rotated because of
263 max_binlog_size.
264 */
265
266 // overridden new and delete operators for 64 byte alignment
267 static void* operator new(size_t request);
268 static void operator delete(void * ptr);
269
270 protected:
271 char group_relay_log_name[FN_REFLEN];
272 ulonglong group_relay_log_pos;
273 char event_relay_log_name[FN_REFLEN];
274 /* The suffix number of relay log name */
275 uint event_relay_log_number;
276 ulonglong event_relay_log_pos;
277 ulonglong future_event_relay_log_pos;
278
279 /* current event's start position in relay log */
280 my_off_t event_start_pos;
281 /*
282 Original log name and position of the group we're currently executing
283 (whose coordinates are group_relay_log_name/pos in the relay log)
284 in the master's binlog. These concern the *group*, because in the master's
285 binlog the log_pos that comes with each event is the position of the
286 beginning of the group.
287
288 Note: group_master_log_name, group_master_log_pos must only be
289 written from the thread owning the Relay_log_info (SQL thread if
290 !belongs_to_client(); client thread executing BINLOG statement if
291 belongs_to_client()).
292 */
293 char m_group_master_log_name[FN_REFLEN];
294 volatile my_off_t m_group_master_log_pos;
295
296 private:
297 Gtid_set gtid_set;
298 /*
299 Identifies when this object belongs to the SQL thread and was not
300 created for a client thread or some other purpose including
301 Slave_worker instance initializations. Ends up serving the same
302 purpose as the belongs_to_client method, but its value is set
303 earlier on in the class constructor.
304 */
305 bool rli_fake;
306 /* Flag that ensures the retrieved GTID set is initialized only once. */
307 bool gtid_retrieved_initialized;
308
309 public:
add_logged_gtid(rpl_sidno sidno,rpl_gno gno)310 void add_logged_gtid(rpl_sidno sidno, rpl_gno gno)
311 {
312 global_sid_lock->assert_some_lock();
313 assert(sidno <= global_sid_map->get_max_sidno());
314 gtid_set.ensure_sidno(sidno);
315 gtid_set._add_gtid(sidno, gno);
316 }
317
318 /**
319 Adds a GTID set to received GTID set.
320
321 @param gtid_set the gtid_set to add
322
323 @return RETURN_STATUS_OK or RETURN_STATUS_REPORTED_ERROR.
324 */
325 enum_return_status add_gtid_set(const Gtid_set *gtid_set);
326
get_gtid_set()327 const Gtid_set *get_gtid_set() const { return >id_set; }
328
329 int init_relay_log_pos(const char* log,
330 ulonglong pos, bool need_data_lock,
331 const char** errmsg,
332 bool keep_looking_for_fd);
333
334 /*
335 Update the error number, message and timestamp fields. This function is
336 different from va_report() as va_report() also logs the error message in the
337 log apart from updating the error fields.
338 */
339 void fill_coord_err_buf(loglevel level, int err_code,
340 const char *buff_coord) const;
341
342
343 /*
344 Flag that the group_master_log_pos is invalid. This may occur
345 (for example) after CHANGE MASTER TO RELAY_LOG_POS. This will
346 be unset after the first event has been executed and the
347 group_master_log_pos is valid again.
348 */
349 bool is_group_master_log_pos_invalid;
350
351 /*
352 Handling of the relay_log_space_limit optional constraint.
353 ignore_log_space_limit is used to resolve a deadlock between I/O and SQL
354 threads, the SQL thread sets it to unblock the I/O thread and make it
355 temporarily forget about the constraint.
356 */
357 ulonglong log_space_limit,log_space_total;
358 bool ignore_log_space_limit;
359
360 /*
361 Used by the SQL thread to instructs the IO thread to rotate
362 the logs when the SQL thread needs to purge to release some
363 disk space.
364 */
365 bool sql_force_rotate_relay;
366
367 time_t last_master_timestamp;
368
369 void clear_until_condition();
370
371 /**
372 Reset the delay.
373 This is used by RESET SLAVE to clear the delay.
374 */
clear_sql_delay()375 void clear_sql_delay()
376 {
377 sql_delay= 0;
378 }
379
380 /*
381 Needed for problems when slave stops and we want to restart it
382 skipping one or more events in the master log that have caused
383 errors, and have been manually applied by DBA already.
384 */
385 volatile uint32 slave_skip_counter;
386 volatile ulong abort_pos_wait; /* Incremented on change master */
387 mysql_mutex_t log_space_lock;
388 mysql_cond_t log_space_cond;
389
390 /*
391 Condition and its parameters from START SLAVE UNTIL clause.
392
393 UNTIL condition is tested with is_until_satisfied() method that is
394 called by exec_relay_log_event(). is_until_satisfied() caches the result
395 of the comparison of log names because log names don't change very often;
396 this cache is invalidated by parts of code which change log names with
397 notify_*_log_name_updated() methods. (They need to be called only if SQL
398 thread is running).
399 */
400 enum
401 {
402 UNTIL_NONE= 0,
403 UNTIL_MASTER_POS,
404 UNTIL_RELAY_POS,
405 UNTIL_SQL_BEFORE_GTIDS,
406 UNTIL_SQL_AFTER_GTIDS,
407 UNTIL_SQL_AFTER_MTS_GAPS,
408 UNTIL_SQL_VIEW_ID,
409 UNTIL_DONE
410 } until_condition;
411 char until_log_name[FN_REFLEN];
412 ulonglong until_log_pos;
413 /* extension extracted from log_name and converted to int */
414 ulong until_log_name_extension;
415 /**
416 The START SLAVE UNTIL SQL_*_GTIDS initializes until_sql_gtids.
417 Each time a gtid is about to be processed, we check if it is in the
418 set. Depending on until_condition, SQL thread is stopped before or
419 after applying the gtid.
420 */
421 Gtid_set until_sql_gtids;
422 /*
423 True if the current event is the first gtid event to be processed
424 after executing START SLAVE UNTIL SQL_*_GTIDS.
425 */
426 bool until_sql_gtids_first_event;
427 /*
428 Cached result of comparison of until_log_name and current log name
429 -2 means unitialised, -1,0,1 are comarison results
430 */
431 enum
432 {
433 UNTIL_LOG_NAMES_CMP_UNKNOWN= -2, UNTIL_LOG_NAMES_CMP_LESS= -1,
434 UNTIL_LOG_NAMES_CMP_EQUAL= 0, UNTIL_LOG_NAMES_CMP_GREATER= 1
435 } until_log_names_cmp_result;
436
437 char cached_charset[6];
438
439 /*
440 View_id until which UNTIL_SQL_VIEW_ID condition will wait.
441 */
442 std::string until_view_id;
443 /*
444 Flag used to indicate that view_id identified by 'until_view_id'
445 was found on the current UNTIL_SQL_VIEW_ID condition.
446 It is set to false on the beginning of the UNTIL_SQL_VIEW_ID
447 condition, and set to true when view_id is found.
448 */
449 bool until_view_id_found;
450 /*
451 Flag used to indicate that commit event after view_id identified
452 by 'until_view_id' was found on the current UNTIL_SQL_VIEW_ID condition.
453 It is set to false on the beginning of the UNTIL_SQL_VIEW_ID
454 condition, and set to true when commit event after view_id is found.
455 */
456 bool until_view_id_commit_found;
457
458 /*
459 trans_retries varies between 0 to slave_transaction_retries and counts how
460 many times the slave has retried the present transaction; gets reset to 0
461 when the transaction finally succeeds. retried_trans is a cumulative
462 counter: how many times the slave has retried a transaction (any) since
463 slave started.
464 */
465 ulong trans_retries, retried_trans;
466
467 /*
468 If the end of the hot relay log is made of master's events ignored by the
469 slave I/O thread, these two keep track of the coords (in the master's
470 binlog) of the last of these events seen by the slave I/O thread. If not,
471 ign_master_log_name_end[0] == 0.
472 As they are like a Rotate event read/written from/to the relay log, they
473 are both protected by rli->relay_log.LOCK_log.
474 */
475 char ign_master_log_name_end[FN_REFLEN];
476 ulonglong ign_master_log_pos_end;
477
478 /*
479 Indentifies where the SQL Thread should create temporary files for the
480 LOAD DATA INFILE. This is used for security reasons.
481 */
482 char slave_patternload_file[FN_REFLEN];
483 size_t slave_patternload_file_size;
484
485 /**
486 Identifies the last time a checkpoint routine has been executed.
487 */
488 struct timespec last_clock;
489
490 /**
491 Invalidates cached until_log_name and group_relay_log_name comparison
492 result. Should be called after any update of group_realy_log_name if
493 there chances that sql_thread is running.
494 */
notify_group_relay_log_name_update()495 inline void notify_group_relay_log_name_update()
496 {
497 if (until_condition==UNTIL_RELAY_POS)
498 until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN;
499 }
500
501 /**
502 The same as @c notify_group_relay_log_name_update but for
503 @c group_master_log_name.
504 */
notify_group_master_log_name_update()505 inline void notify_group_master_log_name_update()
506 {
507 if (until_condition==UNTIL_MASTER_POS)
508 until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN;
509 }
510
inc_event_relay_log_pos()511 inline void inc_event_relay_log_pos()
512 {
513 event_relay_log_pos= future_event_relay_log_pos;
514 }
515
516 int inc_group_relay_log_pos(ulonglong log_pos,
517 bool need_data_lock);
518
519 int wait_for_pos(THD* thd, String* log_name, longlong log_pos,
520 double timeout);
521 int wait_for_gtid_set(THD* thd, String* gtid, double timeout);
522 int wait_for_gtid_set(THD* thd, const Gtid_set* wait_gtid_set,
523 double timeout);
524
525 void close_temporary_tables();
526
527 /* Check if UNTIL condition is satisfied. See slave.cc for more. */
528 bool is_until_satisfied(THD *thd, Log_event *ev);
until_pos()529 inline ulonglong until_pos()
530 {
531 return ((until_condition == UNTIL_MASTER_POS) ? get_group_master_log_pos() :
532 group_relay_log_pos);
533 }
534
535 RPL_TABLE_LIST *tables_to_lock; /* RBR: Tables to lock */
536 uint tables_to_lock_count; /* RBR: Count of tables to lock */
537 table_mapping m_table_map; /* RBR: Mapping table-id to table */
538 /* RBR: Record Rows_query log event */
539 Rows_query_log_event* rows_query_ev;
540
get_table_data(TABLE * table_arg,table_def ** tabledef_var,TABLE ** conv_table_var)541 bool get_table_data(TABLE *table_arg, table_def **tabledef_var, TABLE **conv_table_var) const
542 {
543 assert(tabledef_var && conv_table_var);
544 for (TABLE_LIST *ptr= tables_to_lock ; ptr != NULL ; ptr= ptr->next_global)
545 if (ptr->table == table_arg)
546 {
547 *tabledef_var= &static_cast<RPL_TABLE_LIST*>(ptr)->m_tabledef;
548 *conv_table_var= static_cast<RPL_TABLE_LIST*>(ptr)->m_conv_table;
549 DBUG_PRINT("debug", ("Fetching table data for table %s.%s:"
550 " tabledef: %p, conv_table: %p",
551 table_arg->s->db.str, table_arg->s->table_name.str,
552 *tabledef_var, *conv_table_var));
553 return true;
554 }
555 return false;
556 }
557
558 /**
559 Last charset (6 bytes) seen by slave SQL thread is cached here; it helps
560 the thread save 3 @c get_charset() per @c Query_log_event if the charset is not
561 changing from event to event (common situation).
562 When the 6 bytes are equal to 0 is used to mean "cache is invalidated".
563 */
564 void cached_charset_invalidate();
565 bool cached_charset_compare(char *charset) const;
566
567 void cleanup_context(THD *, bool);
568 void slave_close_thread_tables(THD *);
569 void clear_tables_to_lock();
570 int purge_relay_logs(THD *thd, bool just_reset, const char** errmsg,
571 bool delete_only= false);
572
573 /*
574 Used to defer stopping the SQL thread to give it a chance
575 to finish up the current group of events.
576 The timestamp is set and reset in @c sql_slave_killed().
577 */
578 time_t last_event_start_time;
579 /*
580 A container to hold on Intvar-, Rand-, Uservar- log-events in case
581 the slave is configured with table filtering rules.
582 The withhold events are executed when their parent Query destiny is
583 determined for execution as well.
584 */
585 Deferred_log_events *deferred_events;
586
587 /*
588 State of the container: true stands for IRU events gathering,
589 false does for execution, either deferred or direct.
590 */
591 bool deferred_events_collecting;
592
593 /*****************************************************************************
594 WL#5569 MTS
595
596 legends:
597 C - Coordinator;
598 W - Worker;
599 WQ - Worker Queue containing event assignments
600 */
601 // number's is determined by global slave_parallel_workers
602 Slave_worker_array workers;
603
604 HASH mapping_db_to_worker; // To map a database to a worker
605 bool inited_hash_workers; // flag to check if mapping_db_to_worker is inited
606
607 mysql_mutex_t slave_worker_hash_lock; // for mapping_db_to_worker
608 mysql_cond_t slave_worker_hash_cond;// for mapping_db_to_worker
609
610 /*
611 For the purpose of reporting the worker status in performance schema table,
612 we need to preserve the workers array after worker thread was killed. So, we
613 copy this array into the below vector which is used for reporting
614 until next init_workers(). Note that we only copy those attributes that
615 would be useful in reporting worker status. We only use a few attributes in
616 this object as of now but still save the whole object. The idea is
617 to be future proof. We will extend performance schema tables in future
618 and then we would use a good number of attributes from this object.
619 */
620
621 std::vector<Slave_worker*> workers_copy_pfs;
622
623 /*
624 This flag is turned ON when the workers array is initialized.
625 Before destroying the workers array we check this flag to make sure
626 we are not destroying an unitilized array. For the purpose of reporting the
627 worker status in performance schema table, we need to preserve the workers
628 array after worker thread was killed. So, we copy this array into
629 workers_copy_pfs array which is used for reporting until next init_workers().
630 */
631 bool workers_array_initialized;
632
633 volatile ulong pending_jobs;
634 mysql_mutex_t pending_jobs_lock;
635 mysql_cond_t pending_jobs_cond;
636 mysql_mutex_t exit_count_lock; // mutex of worker exit count
637 ulong mts_slave_worker_queue_len_max;
638 ulonglong mts_pending_jobs_size; // actual mem usage by WQ:s
639 ulonglong mts_pending_jobs_size_max; // max of WQ:s size forcing C to wait
640 bool mts_wq_oversize; // C raises flag to wait some memory's released
641 Slave_worker *last_assigned_worker;// is set to a Worker at assigning a group
642 /*
643 master-binlog ordered queue of Slave_job_group descriptors of groups
644 that are under processing. The queue size is @c checkpoint_group.
645 */
646 Slave_committed_queue *gaq;
647 /*
648 Container for references of involved partitions for the current event group
649 */
650 // CGAP dynarray holds id:s of partitions of the Current being executed Group
651 Prealloced_array<db_worker_hash_entry*, 4, true> curr_group_assigned_parts;
652 // deferred array to hold partition-info-free events
653 Prealloced_array<Slave_job_item, 8, true> curr_group_da;
654
655 bool curr_group_seen_gtid; // current group started with Gtid-event or not
656 bool curr_group_seen_begin; // current group started with B-event or not
657 bool curr_group_isolated; // current group requires execution in isolation
658 bool mts_end_group_sets_max_dbs; // flag indicates if partitioning info is discovered
659 volatile ulong mts_wq_underrun_w_id; // Id of a Worker whose queue is getting empty
660 /*
661 Ongoing excessive overrun counter to correspond to number of events that
662 are being scheduled while a WQ is close to be filled up.
663 `Close' is defined as (100 - mts_worker_underrun_level) %.
664 The counter is incremented each time a WQ get filled over that level
665 and decremented when the level drops below.
666 The counter therefore describes level of saturation that Workers
667 are experiencing and is used as a parameter to compute a nap time for
668 Coordinator in order to avoid reaching WQ limits.
669 */
670 volatile long mts_wq_excess_cnt;
671 long mts_worker_underrun_level; // % of WQ size at which W is considered hungry
672 ulong mts_coordinator_basic_nap; // C sleeps to avoid WQs overrun
673 ulong opt_slave_parallel_workers; // cache for ::opt_slave_parallel_workers
674 ulong slave_parallel_workers; // the one slave session time number of workers
675 ulong exit_counter; // Number of workers contributed to max updated group index
676 ulonglong max_updated_index;
677 ulong recovery_parallel_workers; // number of workers while recovering
678 uint checkpoint_seqno; // counter of groups executed after the most recent CP
679 uint checkpoint_group; // cache for ::opt_mts_checkpoint_group
680 MY_BITMAP recovery_groups; // bitmap used during recovery
681 bool recovery_groups_inited;
682 ulong mts_recovery_group_cnt; // number of groups to execute at recovery
683 ulong mts_recovery_index; // running index of recoverable groups
684 bool mts_recovery_group_seen_begin;
685
686 /*
687 While distibuting events basing on their properties MTS
688 Coordinator changes its mts group status.
689 Transition normally flowws to follow `=>' arrows on the diagram:
690
691 +----------------------------+
692 V |
693 MTS_NOT_IN_GROUP => |
694 {MTS_IN_GROUP => MTS_END_GROUP --+} while (!killed) => MTS_KILLED_GROUP
695
696 MTS_END_GROUP has `->' loop breaking link to MTS_NOT_IN_GROUP when
697 Coordinator synchronizes with Workers by demanding them to
698 complete their assignments.
699 */
700 enum
701 {
702 /*
703 no new events were scheduled after last synchronization,
704 includes Single-Threaded-Slave case.
705 */
706 MTS_NOT_IN_GROUP,
707
708 MTS_IN_GROUP, /* at least one not-terminal event scheduled to a Worker */
709 MTS_END_GROUP, /* the last scheduled event is a terminal event */
710 MTS_KILLED_GROUP /* Coordinator gave up to reach MTS_END_GROUP */
711 } mts_group_status;
712
713 /*
714 MTS statistics:
715 */
716 ulonglong mts_events_assigned; // number of events (statements) scheduled
717 ulonglong mts_groups_assigned; // number of groups (transactions) scheduled
718 volatile ulong mts_wq_overrun_cnt; // counter of all mts_wq_excess_cnt increments
719 ulong wq_size_waits_cnt; // number of times C slept due to WQ:s oversize
720 /*
721 Counter of how many times Coordinator saw Workers are filled up
722 "enough" with assignements. The enough definition depends on
723 the scheduler type.
724 */
725 ulong mts_wq_no_underrun_cnt;
726 longlong mts_total_wait_overlap; // Waiting time corresponding to above
727 /*
728 Stats to compute Coordinator waiting time for any Worker available,
729 applies solely to the Commit-clock scheduler.
730 */
731 ulonglong mts_total_wait_worker_avail;
732 ulong mts_wq_overfill_cnt; // counter of C waited due to a WQ queue was full
733 /*
734 Statistics (todo: replace with WL5631) applies to either Coordinator and Worker.
735 The exec time in the Coordinator case means scheduling.
736 The read time in the Worker case means getting an event out of Worker queue
737 */
738 ulonglong stats_exec_time;
739 ulonglong stats_read_time;
740 struct timespec ts_exec[2]; // per event pre- and post- exec timestamp
741 struct timespec stats_begin; // applier's bootstrap time
742
743 /*
744 A sorted array of the Workers' current assignement numbers to provide
745 approximate view on Workers loading.
746 The first row of the least occupied Worker is queried at assigning
747 a new partition. Is updated at checkpoint commit to the main RLI.
748 */
749 Prealloced_array<ulong, 16> least_occupied_workers;
750 time_t mts_last_online_stat;
751 /* end of MTS statistics */
752
753 /* Returns the number of elements in workers array/vector. */
get_worker_count()754 inline size_t get_worker_count()
755 {
756 if (workers_array_initialized)
757 return workers.size();
758 else
759 return workers_copy_pfs.size();
760 }
761
762 /*
763 Returns a pointer to the worker instance at index n in workers
764 array/vector.
765 */
get_worker(size_t n)766 Slave_worker* get_worker(size_t n)
767 {
768 if (workers_array_initialized)
769 {
770 if (n >= workers.size())
771 return NULL;
772
773 return workers[n];
774 }
775 else if (workers_copy_pfs.size())
776 {
777 if (n >= workers_copy_pfs.size())
778 return NULL;
779
780 return workers_copy_pfs[n];
781 }
782 else
783 return NULL;
784 }
785
786 /*Channel defined mts submode*/
787 enum_mts_parallel_type channel_mts_submode;
788 /* MTS submode */
789 Mts_submode* current_mts_submode;
790
791 /*
792 Slave side local seq_no identifying a parent group that being
793 the scheduled transaction is considered to be dependent
794 */
795 ulonglong mts_last_known_parent_group_id;
796
797 /* most of allocation in the coordinator rli is there */
798 void init_workers(ulong);
799
800 /* counterpart of the init */
801 void deinit_workers();
802
803 /**
804 returns true if there is any gap-group of events to execute
805 at slave starting phase.
806 */
is_mts_recovery()807 inline bool is_mts_recovery() const
808 {
809 return mts_recovery_group_cnt != 0;
810 }
811
clear_mts_recovery_groups()812 inline void clear_mts_recovery_groups()
813 {
814 if (recovery_groups_inited)
815 {
816 bitmap_free(&recovery_groups);
817 mts_recovery_group_cnt= 0;
818 recovery_groups_inited= false;
819 }
820 }
821
822 /**
823 returns true if events are to be executed in parallel
824 */
is_parallel_exec()825 inline bool is_parallel_exec() const
826 {
827 bool ret= (slave_parallel_workers > 0) && !is_mts_recovery();
828
829 assert(!ret || !workers.empty());
830
831 return ret;
832 }
833
834 /**
835 returns true if Coordinator is scheduling events belonging to
836 the same group and has not reached yet its terminal event.
837 */
is_mts_in_group()838 inline bool is_mts_in_group()
839 {
840 return is_parallel_exec() &&
841 mts_group_status == MTS_IN_GROUP;
842 }
843
844 bool mts_workers_queue_empty() const;
845 bool cannot_safely_rollback() const;
846
847 /**
848 While a group is executed by a Worker the relay log can change.
849 Coordinator notifies Workers about this event. Worker is supposed
850 to commit to the recovery table with the new info.
851 */
852 void reset_notified_relay_log_change();
853
854 /**
855 While a group is executed by a Worker the relay log can change.
856 Coordinator notifies Workers about this event. Coordinator and Workers
857 maintain a bitmap of executed group that is reset with a new checkpoint.
858 */
859 void reset_notified_checkpoint(ulong count, time_t new_ts,
860 bool need_data_lock,
861 bool update_timestamp= false);
862
863 /**
864 Called when gaps execution is ended so it is crash-safe
865 to reset the last session Workers info.
866 */
867 bool mts_finalize_recovery();
868 /*
869 * End of MTS section ******************************************************/
870
871 /* The general cleanup that slave applier may need at the end of query. */
cleanup_after_query()872 inline void cleanup_after_query()
873 {
874 if (deferred_events)
875 deferred_events->rewind();
876 };
877 /* The general cleanup that slave applier may need at the end of session. */
cleanup_after_session()878 void cleanup_after_session()
879 {
880 if (deferred_events)
881 delete deferred_events;
882 };
883
884 /**
885 Helper function to do after statement completion.
886
887 This function is called from an event to complete the group by
888 either stepping the group position, if the "statement" is not
889 inside a transaction; or increase the event position, if the
890 "statement" is inside a transaction.
891
892 @param event_log_pos
893 Master log position of the event. The position is recorded in the
894 relay log info and used to produce information for <code>SHOW
895 SLAVE STATUS</code>.
896 */
897 int stmt_done(my_off_t event_log_pos);
898
899
900 /**
901 Set the value of a replication state flag.
902
903 @param flag Flag to set
904 */
set_flag(enum_state_flag flag)905 void set_flag(enum_state_flag flag)
906 {
907 m_flags |= (1UL << flag);
908 }
909
910 /**
911 Get the value of a replication state flag.
912
913 @param flag Flag to get value of
914
915 @return @c true if the flag was set, @c false otherwise.
916 */
get_flag(enum_state_flag flag)917 bool get_flag(enum_state_flag flag)
918 {
919 return m_flags & (1UL << flag);
920 }
921
922 /**
923 Clear the value of a replication state flag.
924
925 @param flag Flag to clear
926 */
clear_flag(enum_state_flag flag)927 void clear_flag(enum_state_flag flag)
928 {
929 m_flags &= ~(1UL << flag);
930 }
931
932 private:
933 /**
934 Auxiliary function used by is_in_group.
935
936 The execute thread is in the middle of a statement in the
937 following cases:
938 - User_var/Intvar/Rand events have been processed, but the
939 corresponding Query_log_event has not been processed.
940 - Table_map or Row events have been processed, and the last Row
941 event did not have the STMT_END_F set.
942
943 @retval true Replication thread is inside a statement.
944 @retval false Replication thread is not inside a statement.
945 */
is_in_stmt()946 bool is_in_stmt() const
947 {
948 bool ret= (m_flags & (1UL << IN_STMT));
949 DBUG_PRINT("info", ("is_in_stmt()=%d", ret));
950 return ret;
951 }
952 /**
953 Auxiliary function used by is_in_group.
954
955 @retval true The execute thread is inside a statement or a
956 transaction, i.e., either a BEGIN has been executed or we are in
957 the middle of a statement.
958 @retval false The execute thread thread is not inside a statement
959 or a transaction.
960 */
is_in_trx_or_stmt()961 bool is_in_trx_or_stmt() const
962 {
963 bool ret= is_in_stmt() || (info_thd->variables.option_bits & OPTION_BEGIN);
964 DBUG_PRINT("info", ("is_in_trx_or_stmt()=%d", ret));
965 return ret;
966 }
967 public:
968 /**
969 A group is defined as the entire range of events that constitute
970 a transaction or auto-committed statement. It has one of the
971 following forms:
972
973 (Gtid)? Query(BEGIN) ... (Query(COMMIT) | Query(ROLLBACK) | Xid)
974 (Gtid)? (Rand | User_var | Int_var)* Query(DDL)
975
976 Thus, to check if the execute thread is in a group, there are
977 two cases:
978
979 - If the master generates Gtid events (5.7.5 or later, or 5.6 or
980 later with GTID_MODE=ON), then is_in_group is the same as
981 info_thd->owned_gtid.sidno != 0, since owned_gtid.sidno is set
982 to non-zero by the Gtid_log_event and cleared to zero at commit
983 or rollback.
984
985 - If the master does not generate Gtid events (i.e., master is
986 pre-5.6, or pre-5.7.5 with GTID_MODE=OFF), then is_in_group is
987 the same as is_in_trx_or_stmt().
988
989 @retval true Replication thread is inside a group.
990 @retval false Replication thread is not inside a group.
991 */
is_in_group()992 bool is_in_group() const
993 {
994 bool ret= is_in_trx_or_stmt() || info_thd->owned_gtid.sidno != 0;
995 DBUG_PRINT("info", ("is_in_group()=%d", ret));
996 return ret;
997 }
998
999 int count_relay_log_space();
1000
1001 int rli_init_info();
1002 void end_info();
1003 int flush_info(bool force= FALSE);
1004 int flush_current_log();
1005 void set_master_info(Master_info *info);
1006
get_future_event_relay_log_pos()1007 inline ulonglong get_future_event_relay_log_pos() { return future_event_relay_log_pos; }
set_future_event_relay_log_pos(ulonglong log_pos)1008 inline void set_future_event_relay_log_pos(ulonglong log_pos)
1009 {
1010 future_event_relay_log_pos= log_pos;
1011 }
1012
get_group_master_log_name()1013 inline const char* get_group_master_log_name()
1014 {
1015 return m_group_master_log_name;
1016 }
get_group_master_log_pos()1017 inline ulonglong get_group_master_log_pos() { return m_group_master_log_pos; }
set_group_master_log_name(const char * log_file_name)1018 inline void set_group_master_log_name(const char *log_file_name)
1019 {
1020 assert(!info_thd ||
1021 info_thd->backup_binlog_lock.is_protection_acquired());
1022
1023 strmake(m_group_master_log_name, log_file_name,
1024 sizeof(m_group_master_log_name) - 1);
1025 }
set_group_master_log_pos(ulonglong log_pos)1026 inline void set_group_master_log_pos(ulonglong log_pos)
1027 {
1028 assert(!info_thd ||
1029 info_thd->backup_binlog_lock.is_protection_acquired());
1030 m_group_master_log_pos= log_pos;
1031 }
1032
get_group_relay_log_name()1033 inline const char* get_group_relay_log_name() { return group_relay_log_name; }
get_group_relay_log_pos()1034 inline ulonglong get_group_relay_log_pos() { return group_relay_log_pos; }
set_group_relay_log_name(const char * log_file_name)1035 inline void set_group_relay_log_name(const char *log_file_name)
1036 {
1037 strmake(group_relay_log_name,log_file_name, sizeof(group_relay_log_name)-1);
1038 }
set_group_relay_log_name(const char * log_file_name,size_t len)1039 inline void set_group_relay_log_name(const char *log_file_name, size_t len)
1040 {
1041 strmake(group_relay_log_name, log_file_name, len);
1042 }
set_group_relay_log_pos(ulonglong log_pos)1043 inline void set_group_relay_log_pos(ulonglong log_pos)
1044 {
1045 group_relay_log_pos= log_pos;
1046 }
1047
get_event_relay_log_name()1048 inline const char* get_event_relay_log_name() { return event_relay_log_name; }
get_event_relay_log_pos()1049 inline ulonglong get_event_relay_log_pos() { return event_relay_log_pos; }
set_event_relay_log_name(const char * log_file_name)1050 inline void set_event_relay_log_name(const char *log_file_name)
1051 {
1052 strmake(event_relay_log_name,log_file_name, sizeof(event_relay_log_name)-1);
1053 set_event_relay_log_number(relay_log_name_to_number(log_file_name));
1054 }
1055
get_event_relay_log_number()1056 uint get_event_relay_log_number() { return event_relay_log_number; }
set_event_relay_log_number(uint number)1057 void set_event_relay_log_number(uint number)
1058 {
1059 event_relay_log_number= number;
1060 }
1061
1062 /**
1063 Given the extension number of the relay log, gets the full
1064 relay log path. Currently used in Slave_worker::retry_transaction()
1065
1066 @param [in] number extension number of relay log
1067 @param[in, out] name The full path of the relay log (per-channel)
1068 to be read by the slave worker.
1069 */
1070 void relay_log_number_to_name(uint number, char name[FN_REFLEN+1]);
1071 uint relay_log_name_to_number(const char *name);
1072
set_event_start_pos(my_off_t pos)1073 void set_event_start_pos(my_off_t pos) { event_start_pos= pos; }
get_event_start_pos()1074 my_off_t get_event_start_pos() { return event_start_pos; }
1075
set_event_relay_log_pos(ulonglong log_pos)1076 inline void set_event_relay_log_pos(ulonglong log_pos)
1077 {
1078 event_relay_log_pos= log_pos;
1079 }
get_rpl_log_name()1080 inline const char* get_rpl_log_name()
1081 {
1082 return (m_group_master_log_name[0] ? m_group_master_log_name : "FIRST");
1083 }
1084
1085 static size_t get_number_info_rli_fields();
1086
1087 /**
1088 Indicate that a delay starts.
1089
1090 This does not actually sleep; it only sets the state of this
1091 Relay_log_info object to delaying so that the correct state can be
1092 reported by SHOW SLAVE STATUS and SHOW PROCESSLIST.
1093
1094 Requires rli->data_lock.
1095
1096 @param delay_end The time when the delay shall end.
1097 */
start_sql_delay(time_t delay_end)1098 void start_sql_delay(time_t delay_end)
1099 {
1100 mysql_mutex_assert_owner(&data_lock);
1101 sql_delay_end= delay_end;
1102 THD_STAGE_INFO(info_thd, stage_sql_thd_waiting_until_delay);
1103 }
1104
1105 /* Note that this is cast to uint32 in show_slave_status(). */
get_sql_delay()1106 time_t get_sql_delay() { return sql_delay; }
set_sql_delay(time_t _sql_delay)1107 void set_sql_delay(time_t _sql_delay) { sql_delay= _sql_delay; }
get_sql_delay_end()1108 time_t get_sql_delay_end() { return sql_delay_end; }
1109
1110 Relay_log_info(bool is_slave_recovery
1111 #ifdef HAVE_PSI_INTERFACE
1112 ,PSI_mutex_key *param_key_info_run_lock,
1113 PSI_mutex_key *param_key_info_data_lock,
1114 PSI_mutex_key *param_key_info_sleep_lock,
1115 PSI_mutex_key *param_key_info_thd_lock,
1116 PSI_mutex_key *param_key_info_data_cond,
1117 PSI_mutex_key *param_key_info_start_cond,
1118 PSI_mutex_key *param_key_info_stop_cond,
1119 PSI_mutex_key *param_key_info_sleep_cond
1120 #endif
1121 , uint param_id, const char* param_channel, bool is_rli_fake
1122 );
1123 virtual ~Relay_log_info();
1124
1125 /*
1126 Determines if a warning message on unsafe execution was
1127 already printed out to avoid clutering the error log
1128 with several warning messages.
1129 */
1130 bool reported_unsafe_warning;
1131
1132 /*
1133 'sql_thread_kill_accepted is set to TRUE when killed status is recognized.
1134 */
1135 bool sql_thread_kill_accepted;
1136
get_row_stmt_start_timestamp()1137 time_t get_row_stmt_start_timestamp()
1138 {
1139 return row_stmt_start_timestamp;
1140 }
1141
set_row_stmt_start_timestamp()1142 time_t set_row_stmt_start_timestamp()
1143 {
1144 if (row_stmt_start_timestamp == 0)
1145 row_stmt_start_timestamp= my_time(0);
1146
1147 return row_stmt_start_timestamp;
1148 }
1149
reset_row_stmt_start_timestamp()1150 void reset_row_stmt_start_timestamp()
1151 {
1152 row_stmt_start_timestamp= 0;
1153 }
1154
set_long_find_row_note_printed()1155 void set_long_find_row_note_printed()
1156 {
1157 long_find_row_note_printed= true;
1158 }
1159
unset_long_find_row_note_printed()1160 void unset_long_find_row_note_printed()
1161 {
1162 long_find_row_note_printed= false;
1163 }
1164
is_long_find_row_note_printed()1165 bool is_long_find_row_note_printed()
1166 {
1167 return long_find_row_note_printed;
1168 }
1169
1170 public:
1171 /**
1172 Delete the existing event and set a new one. This class is
1173 responsible for freeing the event, the caller should not do that.
1174 */
1175 virtual void set_rli_description_event(Format_description_log_event *fdle);
1176
1177 /**
1178 Return the current Format_description_log_event.
1179 */
get_rli_description_event()1180 Format_description_log_event *get_rli_description_event() const
1181 {
1182 return rli_description_event;
1183 }
1184
1185 /**
1186 adaptation for the slave applier to specific master versions.
1187 */
1188 ulong adapt_to_master_version(Format_description_log_event *fdle);
1189 ulong adapt_to_master_version_updown(ulong master_version,
1190 ulong current_version);
1191 uchar slave_version_split[3]; // bytes of the slave server version
1192 /*
1193 relay log info repository should be updated on relay log
1194 rotate. But when the transaction is split across two relay logs,
1195 update the repository will cause unexpected results and should
1196 be postponed till the 'commit' of the transaction is executed.
1197
1198 A flag that set to 'true' when this type of 'forced flush'(at the
1199 time of rotate relay log) is postponed due to transaction split
1200 across the relay logs.
1201 */
1202 bool force_flush_postponed_due_to_split_trans;
1203
get_commit_order_manager()1204 Commit_order_manager *get_commit_order_manager()
1205 {
1206 return commit_order_mngr;
1207 }
1208
set_commit_order_manager(Commit_order_manager * mngr)1209 void set_commit_order_manager(Commit_order_manager *mngr)
1210 {
1211 commit_order_mngr= mngr;
1212 }
1213
1214 bool set_info_search_keys(Rpl_info_handler *to);
1215
1216 /**
1217 Get coordinator's RLI. Especially used get the rli from
1218 a slave thread, like this: thd->rli_slave->get_c_rli();
1219 thd could be a SQL thread or a worker thread
1220 */
get_c_rli()1221 virtual Relay_log_info* get_c_rli()
1222 {
1223 return this;
1224 }
1225
1226 virtual const char* get_for_channel_str(bool upper_case= false) const;
1227
1228 protected:
1229 Format_description_log_event *rli_description_event;
1230
1231 private:
1232 /*
1233 Commit order manager to order commits made by its workers. In context of
1234 Multi Source Replication each worker will be ordered by the coresponding
1235 corrdinator's order manager.
1236 */
1237 Commit_order_manager* commit_order_mngr;
1238
1239 /**
1240 Delay slave SQL thread by this amount, compared to master (in
1241 seconds). This is set with CHANGE MASTER TO MASTER_DELAY=X.
1242
1243 Guarded by data_lock. Initialized by the client thread executing
1244 START SLAVE. Written by client threads executing CHANGE MASTER TO
1245 MASTER_DELAY=X. Read by SQL thread and by client threads
1246 executing SHOW SLAVE STATUS. Note: must not be written while the
1247 slave SQL thread is running, since the SQL thread reads it without
1248 a lock when executing flush_info().
1249 */
1250 time_t sql_delay;
1251
1252 /**
1253 During a delay, specifies the point in time when the delay ends.
1254
1255 This is used for the SQL_Remaining_Delay column in SHOW SLAVE STATUS.
1256
1257 Guarded by data_lock. Written by the sql thread. Read by client
1258 threads executing SHOW SLAVE STATUS.
1259 */
1260 time_t sql_delay_end;
1261
1262 uint32 m_flags;
1263
1264 /*
1265 Before the MASTER_DELAY parameter was added (WL#344), relay_log.info
1266 had 4 lines. Now it has 5 lines.
1267 */
1268 static const int LINES_IN_RELAY_LOG_INFO_WITH_DELAY= 5;
1269
1270 /*
1271 Before the WL#5599, relay_log.info had 5 lines. Now it has 6 lines.
1272 */
1273 static const int LINES_IN_RELAY_LOG_INFO_WITH_WORKERS= 6;
1274
1275 /*
1276 Before the Id was added (BUG#2334346), relay_log.info
1277 had 6 lines. Now it has 7 lines.
1278 */
1279 static const int LINES_IN_RELAY_LOG_INFO_WITH_ID= 7;
1280
1281 /*
1282 Add a channel in the slave relay log info
1283 */
1284 static const int LINES_IN_RELAY_LOG_INFO_WITH_CHANNEL= 8;
1285
1286 bool read_info(Rpl_info_handler *from);
1287 bool write_info(Rpl_info_handler *to);
1288
1289 Relay_log_info(const Relay_log_info& info);
1290 Relay_log_info& operator=(const Relay_log_info& info);
1291
1292 /*
1293 Runtime state for printing a note when slave is taking
1294 too long while processing a row event.
1295 */
1296 time_t row_stmt_start_timestamp;
1297 bool long_find_row_note_printed;
1298
1299
1300 /**
1301 sets the suffix required for relay log names
1302 in multisource replication.
1303 The extension is "-relay-bin-<channel_name>"
1304 @param[in, out] buff buffer to store the complete relay log file name
1305 @param[in] buff_size size of buffer buff
1306 @param[in] base_name the base name of the relay log file
1307 */
1308 const char* add_channel_to_relay_log_name(char *buff, uint buff_size,
1309 const char *base_name);
1310
1311 /*
1312 Applier thread InnoDB priority.
1313 When two transactions conflict inside InnoDB, the one with
1314 greater priority wins.
1315 Priority must be set before applier thread start so that all
1316 executed transactions have the same priority.
1317 */
1318 int thd_tx_priority;
1319
1320 /**
1321 If the SQL thread should or not ignore the set limit for
1322 write set collection
1323 */
1324 bool m_ignore_write_set_memory_limit;
1325
1326 /**
1327 Even if a component says all transactions require write sets,
1328 this variable says the SQL thread transactions can drop them
1329 */
1330 bool m_allow_drop_write_set;
1331
1332 public:
1333 /*
1334 The boolean is set to true when the binlog (rli_fake) or slave
1335 (rli_slave) applier thread detaches any engine ha_data
1336 it has dealt with at time of XA START processing.
1337 The boolean is reset to false at the end of XA PREPARE,
1338 XA COMMIT ONE PHASE for the binlog applier, and
1339 at internal rollback of the slave applier at the same time with
1340 the engine ha_data re-attachment.
1341 */
1342 bool is_engine_ha_data_detached;
1343
set_thd_tx_priority(int priority)1344 void set_thd_tx_priority(int priority)
1345 {
1346 thd_tx_priority= priority;
1347 }
1348
get_thd_tx_priority()1349 int get_thd_tx_priority()
1350 {
1351 return thd_tx_priority;
1352 }
1353
set_ignore_write_set_memory_limit(bool ignore_limit)1354 void set_ignore_write_set_memory_limit(bool ignore_limit) {
1355 m_ignore_write_set_memory_limit = ignore_limit;
1356 }
1357
get_ignore_write_set_memory_limit()1358 bool get_ignore_write_set_memory_limit() {
1359 return m_ignore_write_set_memory_limit;
1360 }
1361
set_allow_drop_write_set(bool does_not_require_ws)1362 void set_allow_drop_write_set(bool does_not_require_ws) {
1363 m_allow_drop_write_set = does_not_require_ws;
1364 }
1365
get_allow_drop_write_set()1366 bool get_allow_drop_write_set() { return m_allow_drop_write_set; }
1367
1368 /**
1369 Detaches the engine ha_data from THD. The fact
1370 is memorized in @c is_engine_ha_detached flag.
1371
1372 @param thd a reference to THD
1373 */
1374 void detach_engine_ha_data(THD *thd);
1375 /**
1376 Reattaches the engine ha_data to THD. The fact
1377 is memorized in @c is_engine_ha_detached flag.
1378
1379 @param thd a reference to THD
1380 */
1381 void reattach_engine_ha_data(THD *thd);
1382 /**
1383 Drops the engine ha_data flag when it is up.
1384 The method is run at execution points of the engine ha_data
1385 re-attachment.
1386
1387 @return true when THD has detached the engine ha_data,
1388 false otherwise
1389 */
unflag_detached_engine_ha_data()1390 bool unflag_detached_engine_ha_data()
1391 {
1392 bool rc= false;
1393
1394 if (is_engine_ha_data_detached)
1395 rc= !(is_engine_ha_data_detached= false); // return the old value
1396
1397 return rc;
1398 }
1399 };
1400
1401 bool mysql_show_relaylog_events(THD* thd);
1402
1403
1404 /**
1405 @param thd a reference to THD
1406 @return TRUE if thd belongs to a Worker thread and FALSE otherwise.
1407 */
is_mts_worker(const THD * thd)1408 inline bool is_mts_worker(const THD *thd)
1409 {
1410 return thd->system_thread == SYSTEM_THREAD_SLAVE_WORKER;
1411 }
1412
1413
1414 /**
1415 Auxiliary function to check if we have a db partitioned MTS
1416 */
1417 bool is_mts_db_partitioned(Relay_log_info * rli);
1418 #endif /* RPL_RLI_H */
1419