1 /*
2   Copyright (c) 2006, 2020, Oracle and/or its affiliates.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
23 */
24 
25 #include "storage/ndb/plugin/ha_ndbcluster_binlog.h"
26 
27 #include <unordered_map>
28 
29 #include "my_dbug.h"
30 #include "my_thread.h"
31 #include "mysql/plugin.h"
32 #include "sql/auth/acl_change_notification.h"
33 #include "sql/binlog.h"
34 #include "sql/dd/types/abstract_table.h"  // dd::enum_table_type
35 #include "sql/dd/types/tablespace.h"      // dd::Tablespace
36 #include "sql/derror.h"                   // ER_THD
37 #include "sql/mysqld.h"                   // opt_bin_log
38 #include "sql/mysqld_thd_manager.h"       // Global_THD_manager
39 #include "sql/protocol_classic.h"
40 #include "sql/rpl_injector.h"
41 #include "sql/rpl_slave.h"
42 #include "sql/sql_lex.h"
43 #include "sql/sql_rewrite.h"
44 #include "sql/sql_table.h"  // build_table_filename
45 #include "sql/sql_thd_internal_api.h"
46 #include "sql/thd_raii.h"
47 #include "sql/transaction.h"
48 #include "storage/ndb/include/ndbapi/NdbDictionary.hpp"
49 #include "storage/ndb/include/ndbapi/ndb_cluster_connection.hpp"
50 #include "storage/ndb/plugin/ha_ndbcluster.h"
51 #include "storage/ndb/plugin/ha_ndbcluster_connection.h"
52 #include "storage/ndb/plugin/ndb_apply_status_table.h"
53 #include "storage/ndb/plugin/ndb_binlog_client.h"
54 #include "storage/ndb/plugin/ndb_bitmap.h"
55 #include "storage/ndb/plugin/ndb_dd.h"
56 #include "storage/ndb/plugin/ndb_dd_client.h"
57 #include "storage/ndb/plugin/ndb_dd_disk_data.h"
58 #include "storage/ndb/plugin/ndb_dd_sync.h"  // Ndb_dd_sync
59 #include "storage/ndb/plugin/ndb_dd_table.h"
60 #include "storage/ndb/plugin/ndb_global_schema_lock_guard.h"
61 #include "storage/ndb/plugin/ndb_local_connection.h"
62 #include "storage/ndb/plugin/ndb_log.h"
63 #include "storage/ndb/plugin/ndb_name_util.h"
64 #include "storage/ndb/plugin/ndb_ndbapi_util.h"
65 #include "storage/ndb/plugin/ndb_require.h"
66 #include "storage/ndb/plugin/ndb_retry.h"
67 #include "storage/ndb/plugin/ndb_schema_dist_table.h"
68 #include "storage/ndb/plugin/ndb_schema_result_table.h"
69 #include "storage/ndb/plugin/ndb_sleep.h"
70 #include "storage/ndb/plugin/ndb_stored_grants.h"
71 #include "storage/ndb/plugin/ndb_table_guard.h"
72 #include "storage/ndb/plugin/ndb_tdc.h"
73 #include "storage/ndb/plugin/ndb_thd.h"
74 #include "storage/ndb/plugin/ndb_upgrade_util.h"
75 
76 typedef NdbDictionary::Event NDBEVENT;
77 typedef NdbDictionary::Column NDBCOL;
78 typedef NdbDictionary::Table NDBTAB;
79 
80 extern bool opt_ndb_log_orig;
81 extern bool opt_ndb_log_bin;
82 extern bool opt_ndb_log_update_as_write;
83 extern bool opt_ndb_log_updated_only;
84 extern bool opt_ndb_log_update_minimal;
85 extern bool opt_ndb_log_binlog_index;
86 extern bool opt_ndb_log_apply_status;
87 extern st_ndb_slave_state g_ndb_slave_state;
88 extern bool opt_ndb_log_transaction_id;
89 extern bool log_bin_use_v1_row_events;
90 extern bool opt_ndb_log_empty_update;
91 extern bool opt_ndb_clear_apply_status;
92 extern bool opt_ndb_log_fail_terminate;
93 extern int opt_ndb_schema_dist_timeout;
94 extern ulong opt_ndb_schema_dist_lock_wait_timeout;
95 
96 bool ndb_log_empty_epochs(void);
97 
98 void ndb_index_stat_restart();
99 
100 #include "storage/ndb/plugin/ndb_anyvalue.h"
101 #include "storage/ndb/plugin/ndb_binlog_extra_row_info.h"
102 #include "storage/ndb/plugin/ndb_binlog_thread.h"
103 #include "storage/ndb/plugin/ndb_event_data.h"
104 #include "storage/ndb/plugin/ndb_repl_tab.h"
105 #include "storage/ndb/plugin/ndb_schema_dist.h"
106 #include "storage/ndb/plugin/ndb_schema_object.h"
107 
108 extern Ndb_cluster_connection *g_ndb_cluster_connection;
109 
110 /*
111   Timeout for syncing schema events between
112   mysql servers, and between mysql server and the binlog
113 */
114 static const int DEFAULT_SYNC_TIMEOUT = 120;
115 
116 /* Column numbers in the ndb_binlog_index table */
117 enum Ndb_binlog_index_cols {
118   NBICOL_START_POS = 0,
119   NBICOL_START_FILE = 1,
120   NBICOL_EPOCH = 2,
121   NBICOL_NUM_INSERTS = 3,
122   NBICOL_NUM_UPDATES = 4,
123   NBICOL_NUM_DELETES = 5,
124   NBICOL_NUM_SCHEMAOPS = 6
125   /* Following colums in schema 'v2' */
126   ,
127   NBICOL_ORIG_SERVERID = 7,
128   NBICOL_ORIG_EPOCH = 8,
129   NBICOL_GCI = 9
130   /* Following columns in schema 'v3' */
131   ,
132   NBICOL_NEXT_POS = 10,
133   NBICOL_NEXT_FILE = 11
134 };
135 
136 class Mutex_guard {
137  public:
Mutex_guard(mysql_mutex_t & mutex)138   Mutex_guard(mysql_mutex_t &mutex) : m_mutex(mutex) {
139     mysql_mutex_lock(&m_mutex);
140   }
~Mutex_guard()141   ~Mutex_guard() { mysql_mutex_unlock(&m_mutex); }
142 
143  private:
144   mysql_mutex_t &m_mutex;
145 };
146 
147 /*
148   Mutex and condition used for interacting between client sql thread
149   and injector thread
150    - injector_data_mutex protects global data maintained
151      by the injector thread and accessed by any client thread.
152    - injector_event_mutex, protects injector thread pollEvents()
153      and concurrent create and drop of events from client threads.
154      It also protects injector_ndb and schema_ndb which are the Ndb
155      objects used for the above create/drop/pollEvents()
156   Rational for splitting these into two separate mutexes, is that
157   the injector_event_mutex is held for 10ms across pollEvents().
158   That could (almost) block access to the shared binlog injector data,
159   like ndb_binlog_is_read_only().
160 */
161 static mysql_mutex_t injector_event_mutex;
162 static mysql_mutex_t injector_data_mutex;
163 static mysql_cond_t injector_data_cond;
164 
165 /*
166   NOTE:
167   Several of the ndb_binlog* variables use a 'relaxed locking' schema.
168   Such a variable is only modified by the 'injector_thd' thread,
169   but could be read by any 'thd'. Thus:
170     - Any update of such a variable need a mutex lock.
171     - Reading such a variable outside of the injector_thd need the mutex.
172   However, it should be safe to read the variable within the injector_thd
173   without holding the mutex! (As there are no other threads updating it)
174 */
175 
176 /**
177   ndb_binlog_running
178   Changes to NDB tables should be written to the binary log. I.e the
179   ndb binlog injector thread subscribes to changes in the cluster
180   and when such changes are received, they will be written to the
181   binary log
182 */
183 bool ndb_binlog_running = false;
184 
185 static bool ndb_binlog_tables_inited = false;  // injector_data_mutex, relaxed
186 static bool ndb_binlog_is_ready = false;       // injector_data_mutex, relaxed
187 
ndb_binlog_is_read_only(void)188 bool ndb_binlog_is_read_only(void) {
189   /*
190     Could be called from any client thread. Need a mutex to
191     protect ndb_binlog_tables_inited and ndb_binlog_is_ready.
192   */
193   Mutex_guard injector_g(injector_data_mutex);
194   if (!ndb_binlog_tables_inited) {
195     /* the ndb_* system tables not setup yet */
196     return true;
197   }
198 
199   if (ndb_binlog_running && !ndb_binlog_is_ready) {
200     /*
201       The binlog thread is supposed to write to binlog
202       but not ready (still initializing or has lost connection)
203     */
204     return true;
205   }
206   return false;
207 }
208 
209 static THD *injector_thd = NULL;
210 
211 /*
212   Global reference to ndb injector thd object.
213 
214   Used mainly by the binlog index thread, but exposed to the client sql
215   thread for one reason; to setup the events operations for a table
216   to enable ndb injector thread receiving events.
217 
218   Must therefore always be used with a surrounding
219   mysql_mutex_lock(&injector_event_mutex), when create/dropEventOperation
220 */
221 static Ndb *injector_ndb = NULL;  // Need injector_event_mutex
222 static Ndb *schema_ndb = NULL;    // Need injector_event_mutex
223 
224 static int ndbcluster_binlog_inited = 0;
225 
226 /* NDB Injector thread (used for binlog creation) */
227 static ulonglong ndb_latest_applied_binlog_epoch = 0;
228 static ulonglong ndb_latest_handled_binlog_epoch = 0;
229 static ulonglong ndb_latest_received_binlog_epoch = 0;
230 
231 NDB_SHARE *ndb_apply_status_share = NULL;
232 
233 extern bool opt_log_slave_updates;
234 static bool g_ndb_log_slave_updates;
235 
236 static bool g_injector_v1_warning_emitted = false;
237 
create_event_data(NDB_SHARE * share,const dd::Table * table_def,Ndb_event_data ** event_data) const238 bool Ndb_binlog_client::create_event_data(NDB_SHARE *share,
239                                           const dd::Table *table_def,
240                                           Ndb_event_data **event_data) const {
241   DBUG_TRACE;
242   DBUG_ASSERT(table_def);
243   DBUG_ASSERT(event_data);
244 
245   Ndb_event_data *new_event_data = Ndb_event_data::create_event_data(
246       m_thd, share, share->db, share->table_name, share->key_string(),
247       injector_thd, table_def);
248   if (!new_event_data) return false;
249 
250   // Return the newly created event_data to caller
251   *event_data = new_event_data;
252 
253   return true;
254 }
255 
get_ndb_blobs_value(TABLE * table,NdbValue * value_array,uchar * & buffer,uint & buffer_size,ptrdiff_t ptrdiff)256 static int get_ndb_blobs_value(TABLE *table, NdbValue *value_array,
257                                uchar *&buffer, uint &buffer_size,
258                                ptrdiff_t ptrdiff) {
259   DBUG_TRACE;
260 
261   // Field has no field number so cannot use TABLE blob_field
262   // Loop twice, first only counting total buffer size
263   for (int loop = 0; loop <= 1; loop++) {
264     uint32 offset = 0;
265     for (uint i = 0; i < table->s->fields; i++) {
266       Field *field = table->field[i];
267       NdbValue value = value_array[i];
268       if (!(field->is_flag_set(BLOB_FLAG) && field->stored_in_db)) continue;
269       if (value.blob == NULL) {
270         DBUG_PRINT("info", ("[%u] skipped", i));
271         continue;
272       }
273       Field_blob *field_blob = (Field_blob *)field;
274       NdbBlob *ndb_blob = value.blob;
275       int isNull;
276       if (ndb_blob->getNull(isNull) != 0) return -1;
277       if (isNull == 0) {
278         Uint64 len64 = 0;
279         if (ndb_blob->getLength(len64) != 0) return -1;
280         // Align to Uint64
281         uint32 size = Uint32(len64);
282         if (size % 8 != 0) size += 8 - size % 8;
283         if (loop == 1) {
284           uchar *buf = buffer + offset;
285           uint32 len = buffer_size - offset;  // Size of buf
286           if (ndb_blob->readData(buf, len) != 0) return -1;
287           DBUG_PRINT("info",
288                      ("[%u] offset: %u  buf: 0x%lx  len=%u  [ptrdiff=%d]", i,
289                       offset, (long)buf, len, (int)ptrdiff));
290           DBUG_ASSERT(len == len64);
291           // Ugly hack assumes only ptr needs to be changed
292           field_blob->set_ptr_offset(ptrdiff, len, buf);
293         }
294         offset += size;
295       } else if (loop == 1)  // undefined or null
296       {
297         // have to set length even in this case
298         uchar *buf = buffer + offset;  // or maybe NULL
299         uint32 len = 0;
300         field_blob->set_ptr_offset(ptrdiff, len, buf);
301         DBUG_PRINT("info", ("[%u] isNull=%d", i, isNull));
302       }
303     }
304     if (loop == 0 && offset > buffer_size) {
305       my_free(buffer);
306       buffer_size = 0;
307       DBUG_PRINT("info", ("allocate blobs buffer size %u", offset));
308       buffer = (uchar *)my_malloc(PSI_INSTRUMENT_ME, offset, MYF(MY_WME));
309       if (buffer == NULL) {
310         ndb_log_error("get_ndb_blobs_value, my_malloc(%u) failed", offset);
311         return -1;
312       }
313       buffer_size = offset;
314     }
315   }
316   return 0;
317 }
318 
319 /*
320   @brief Wait until the last committed epoch from the session enters the
321          binlog. Wait a maximum of 30 seconds. This wait is necessary in
322          SHOW BINLOG EVENTS so that the user see its own changes. Also
323          in RESET MASTER before clearing ndbcluster's binlog index.
324   @param thd Thread handle to wait for its changes to enter the binlog.
325 */
ndbcluster_binlog_wait(THD * thd)326 static void ndbcluster_binlog_wait(THD *thd) {
327   DBUG_TRACE;
328 
329   if (!ndb_binlog_running) {
330     DBUG_PRINT("exit", ("Not writing binlog -> nothing to wait for"));
331     return;
332   }
333 
334   // Assumption is that only these commands will wait
335   DBUG_ASSERT(thd_sql_command(thd) == SQLCOM_SHOW_BINLOG_EVENTS ||
336               thd_sql_command(thd) == SQLCOM_FLUSH ||
337               thd_sql_command(thd) == SQLCOM_RESET);
338 
339   if (thd->system_thread == SYSTEM_THREAD_NDBCLUSTER_BINLOG) {
340     // Binlog Injector thread should not wait for itself
341     DBUG_PRINT("exit", ("binlog injector should not wait for itself"));
342     return;
343   }
344 
345   Thd_ndb *thd_ndb = get_thd_ndb(thd);
346   if (!thd_ndb) {
347     // Thread has not used NDB before, no need for waiting
348     DBUG_PRINT("exit", ("Thread has not used NDB, nothing to wait for"));
349     return;
350   }
351 
352   const char *save_info = thd->proc_info;
353   thd->proc_info =
354       "Waiting for ndbcluster binlog update to reach current position";
355 
356   // Highest epoch that a transaction against Ndb has received
357   // as part of commit processing *in this thread*. This is a
358   // per-session 'most recent change' indicator.
359   const Uint64 session_last_committed_epoch =
360       thd_ndb->m_last_commit_epoch_session;
361 
362   // Wait until the last committed epoch from the session enters Binlog.
363   // Break any possible deadlock after 30s.
364   int count = 30;  // seconds
365   mysql_mutex_lock(&injector_data_mutex);
366   const Uint64 start_handled_epoch = ndb_latest_handled_binlog_epoch;
367   while (!thd->killed && count && ndb_binlog_running &&
368          (ndb_latest_handled_binlog_epoch == 0 ||
369           ndb_latest_handled_binlog_epoch < session_last_committed_epoch)) {
370     count--;
371     struct timespec abstime;
372     set_timespec(&abstime, 1);
373     mysql_cond_timedwait(&injector_data_cond, &injector_data_mutex, &abstime);
374   }
375   mysql_mutex_unlock(&injector_data_mutex);
376 
377   if (count == 0) {
378     ndb_log_warning(
379         "Thread id %u timed out (30s) waiting for epoch %u/%u "
380         "to be handled.  Progress : %u/%u -> %u/%u.",
381         thd->thread_id(),
382         Uint32((session_last_committed_epoch >> 32) & 0xffffffff),
383         Uint32(session_last_committed_epoch & 0xffffffff),
384         Uint32((start_handled_epoch >> 32) & 0xffffffff),
385         Uint32(start_handled_epoch & 0xffffffff),
386         Uint32((ndb_latest_handled_binlog_epoch >> 32) & 0xffffffff),
387         Uint32(ndb_latest_handled_binlog_epoch & 0xffffffff));
388 
389     // Fail on wait/deadlock timeout in debug compile
390     DBUG_ASSERT(false);
391   }
392 
393   thd->proc_info = save_info;
394 }
395 
396 /*
397   Setup THD object
398   'Inspired' from ha_ndbcluster.cc : ndb_util_thread_func
399 */
ndb_create_thd(char * stackptr)400 THD *ndb_create_thd(char *stackptr) {
401   DBUG_TRACE;
402   THD *thd = new THD; /* note that constructor of THD uses DBUG_ */
403   if (thd == 0) {
404     return 0;
405   }
406   THD_CHECK_SENTRY(thd);
407 
408   thd->thread_stack = stackptr; /* remember where our stack is */
409   thd->store_globals();
410 
411   thd->init_query_mem_roots();
412   thd->set_command(COM_DAEMON);
413   thd->system_thread = SYSTEM_THREAD_NDBCLUSTER_BINLOG;
414   thd->get_protocol_classic()->set_client_capabilities(0);
415   thd->lex->start_transaction_opt = 0;
416   thd->security_context()->skip_grants();
417 
418   CHARSET_INFO *charset_connection =
419       get_charset_by_csname("utf8", MY_CS_PRIMARY, MYF(MY_WME));
420   thd->variables.character_set_client = charset_connection;
421   thd->variables.character_set_results = charset_connection;
422   thd->variables.collation_connection = charset_connection;
423   thd->update_charset();
424   return thd;
425 }
426 
427 // Instantiate Ndb_binlog_thread component
428 static Ndb_binlog_thread ndb_binlog_thread;
429 
430 // Forward declaration
431 static bool ndbcluster_binlog_index_remove_file(THD *thd, const char *filename);
432 
433 /*
434   @brief called when a binlog file is purged(i.e the physical
435   binlog file is removed by the MySQL Server). ndbcluster need
436   to remove any rows in its mysql.ndb_binlog_index table which
437   references the removed file.
438 
439   @param thd Thread handle
440   @param filename Name of the binlog file which has been removed
441 
442   @return 0 for success
443 */
444 
ndbcluster_binlog_index_purge_file(THD * thd,const char * filename)445 static int ndbcluster_binlog_index_purge_file(THD *thd, const char *filename) {
446   DBUG_TRACE;
447   DBUG_PRINT("enter", ("filename: %s", filename));
448 
449   // Check if the binlog thread can handle the purge.
450   // This functionality is initially only implemented for the case when the
451   // "server started" state has not yet been reached, but could in the future be
452   // extended to handle all purging by the binlog thread(this would most likley
453   // eliminate the need to create a separate THD further down in this function)
454   if (ndb_binlog_thread.handle_purge(filename)) {
455     return 0;  // Ok, purge handled by binlog thread
456   }
457 
458   if (!ndb_binlog_running) {
459     return 0;  // Nothing to do, binlog thread not running
460   }
461 
462   if (thd_slave_thread(thd)) {
463     return 0;  // Nothing to do, slave thread
464   }
465 
466   // Create a separate temporary THD, primarily in order to isolate from any
467   // active transactions in the THD passed by caller. NOTE! This should be
468   // revisited
469   int stack_base = 0;
470   THD *tmp_thd = ndb_create_thd((char *)&stack_base);
471   if (!tmp_thd) {
472     ndb_log_warning("NDB Binlog: Failed to purge: '%s' (create THD failed)",
473                     filename);
474     return 0;
475   }
476 
477   int error = 0;
478   if (ndbcluster_binlog_index_remove_file(tmp_thd, filename)) {
479     // Failed to delete rows from table
480     ndb_log_warning("NDB Binlog: Failed to purge: '%s'", filename);
481     error = 1;  // Failed
482   }
483   delete tmp_thd;
484 
485   /* Relink original THD */
486   thd->store_globals();
487 
488   return error;
489 }
490 
491 /*
492   ndbcluster_binlog_log_query
493 
494    - callback function installed in handlerton->binlog_log_query
495    - called by MySQL Server in places where no other handlerton
496      function exists which can be used to notify about changes
497    - used by ndbcluster to detect when
498      -- databases are created or altered
499      -- privilege tables have been modified
500 */
501 
ndbcluster_binlog_log_query(handlerton *,THD * thd,enum_binlog_command binlog_command,const char * query,uint query_length,const char * db,const char *)502 static void ndbcluster_binlog_log_query(handlerton *, THD *thd,
503                                         enum_binlog_command binlog_command,
504                                         const char *query, uint query_length,
505                                         const char *db, const char *) {
506   DBUG_TRACE;
507   DBUG_PRINT("enter", ("binlog_command: %d, db: '%s', query: '%s'",
508                        binlog_command, db, query));
509 
510   switch (binlog_command) {
511     case LOGCOM_CREATE_DB: {
512       DBUG_PRINT("info", ("New database '%s' created", db));
513 
514       Ndb_schema_dist_client schema_dist_client(thd);
515 
516       if (!schema_dist_client.prepare(db, "")) {
517         // Could not prepare the schema distribution client
518         // NOTE! As there is no way return error, this may have to be
519         // revisited, the prepare should be done
520         // much earlier where it can return an error for the query
521         return;
522       }
523 
524       // Generate the id, version
525       unsigned int id = schema_dist_client.unique_id();
526       unsigned int version = schema_dist_client.unique_version();
527 
528       const bool result =
529           schema_dist_client.create_db(query, query_length, db, id, version);
530       if (result) {
531         // Update the schema with the generated id and version but skip
532         // committing the change in DD. Commit will be done by the caller.
533         ndb_dd_update_schema_version(thd, db, id, version,
534                                      true /*skip_commit*/);
535       } else {
536         // NOTE! There is currently no way to report an error from this
537         // function, just log an error and proceed
538         ndb_log_error("Failed to distribute 'CREATE DATABASE %s'", db);
539       }
540     } break;
541 
542     case LOGCOM_ALTER_DB: {
543       DBUG_PRINT("info", ("The database '%s' was altered", db));
544 
545       Ndb_schema_dist_client schema_dist_client(thd);
546 
547       if (!schema_dist_client.prepare(db, "")) {
548         // Could not prepare the schema distribution client
549         // NOTE! As there is no way return error, this may have to be
550         // revisited, the prepare should be done
551         // much earlier where it can return an error for the query
552         return;
553       }
554 
555       // Generate the id, version
556       unsigned int id = schema_dist_client.unique_id();
557       unsigned int version = schema_dist_client.unique_version();
558 
559       const bool result =
560           schema_dist_client.alter_db(query, query_length, db, id, version);
561       if (result) {
562         // Update the schema with the generated id and version but skip
563         // committing the change in DD. Commit will be done by the caller.
564         ndb_dd_update_schema_version(thd, db, id, version,
565                                      true /*skip_commit*/);
566       } else {
567         // NOTE! There is currently no way to report an error from this
568         // function, just log an error and proceed
569         ndb_log_error("Failed to distribute 'ALTER DATABASE %s'", db);
570       }
571     } break;
572 
573     case LOGCOM_CREATE_TABLE:
574     case LOGCOM_ALTER_TABLE:
575     case LOGCOM_RENAME_TABLE:
576     case LOGCOM_DROP_TABLE:
577     case LOGCOM_DROP_DB:
578       DBUG_PRINT("info", ("Ignoring binlog_log_query notification "
579                           "for binlog_command: %d",
580                           binlog_command));
581       break;
582   }
583 }
584 
ndbcluster_acl_notify(THD * thd,const Acl_change_notification * notice)585 static void ndbcluster_acl_notify(THD *thd,
586                                   const Acl_change_notification *notice) {
587   DBUG_TRACE;
588 
589   if (!check_ndb_in_thd(thd)) {
590     ndb_log_error("Privilege distribution failed to seize thd_ndb");
591     return;
592   }
593 
594   /* If this is the binlog thread, the ACL change has arrived via
595      schema distribution and requires no further action.
596   */
597   if (get_thd_ndb(thd)->check_option(Thd_ndb::NO_LOG_SCHEMA_OP)) {
598     return;
599   }
600 
601   /* Obtain the query in a form suitable for writing to the error log.
602      The password is replaced with the string "<secret>".
603   */
604   std::string query;
605   if (thd->rewritten_query().length())
606     query.assign(thd->rewritten_query().ptr(), thd->rewritten_query().length());
607   else
608     query.assign(thd->query().str, thd->query().length);
609   DBUG_ASSERT(query.length());
610   ndb_log_verbose(9, "ACL considering: %s", query.c_str());
611 
612   std::string user_list;
613   bool dist_use_db = false;   // Prepend "use [db];" to statement
614   bool dist_refresh = false;  // All participants must refresh their caches
615   Ndb_stored_grants::Strategy strategy =
616       Ndb_stored_grants::handle_local_acl_change(thd, notice, &user_list,
617                                                  &dist_use_db, &dist_refresh);
618 
619   Ndb_schema_dist_client schema_dist_client(thd);
620 
621   if (strategy == Ndb_stored_grants::Strategy::ERROR) {
622     ndb_log_error("Not distributing ACL change after error.");
623     return;
624   }
625 
626   if (strategy == Ndb_stored_grants::Strategy::NONE) {
627     ndb_log_verbose(9, "ACL change distribution: NONE");
628     return;
629   }
630 
631   const unsigned int &node_id = g_ndb_cluster_connection->node_id();
632   if (!schema_dist_client.prepare_acl_change(node_id)) {
633     ndb_log_error("Failed to distribute '%s' (Failed prepare)", query.c_str());
634     return;
635   }
636 
637   if (strategy == Ndb_stored_grants::Strategy::SNAPSHOT) {
638     ndb_log_verbose(9, "ACL change distribution: SNAPSHOT");
639     if (!schema_dist_client.acl_notify(user_list))
640       ndb_log_error("Failed to distribute '%s' (SNAPSHOT)", query.c_str());
641     return;
642   }
643 
644   DBUG_ASSERT(strategy == Ndb_stored_grants::Strategy::STATEMENT);
645   ndb_log_verbose(9, "ACL change distribution: STATEMENT");
646 
647   /* If the notice contains rewrite_params, query is an ALTER USER or SET
648      PASSWORD statement and must be rewritten again, as if for the binlog,
649      replacing a plaintext password with a crytpographic hash.
650   */
651   if (notice->get_rewrite_params()) {
652     String rewritten_query;
653     mysql_rewrite_acl_query(thd, rewritten_query, Consumer_type::BINLOG,
654                             notice->get_rewrite_params(), false);
655     query.assign(rewritten_query.c_ptr_safe(), rewritten_query.length());
656     DBUG_ASSERT(query.length());
657   }
658 
659   if (!schema_dist_client.acl_notify(
660           dist_use_db ? notice->get_db().c_str() : nullptr, query.c_str(),
661           query.length(), dist_refresh))
662     ndb_log_error("Failed to distribute '%s' (STATEMENT)", query.c_str());
663 }
664 
665 /*
666   End use of the NDB Cluster binlog
667    - wait for binlog thread to shutdown
668 */
669 
ndbcluster_binlog_end()670 int ndbcluster_binlog_end() {
671   DBUG_TRACE;
672 
673   if (ndbcluster_binlog_inited) {
674     ndbcluster_binlog_inited = 0;
675 
676     ndb_binlog_thread.stop();
677     ndb_binlog_thread.deinit();
678 
679     mysql_mutex_destroy(&injector_event_mutex);
680     mysql_mutex_destroy(&injector_data_mutex);
681     mysql_cond_destroy(&injector_data_cond);
682   }
683 
684   return 0;
685 }
686 
687 /*****************************************************************
688   functions called from slave sql client threads
689 ****************************************************************/
ndbcluster_reset_slave(THD * thd)690 static void ndbcluster_reset_slave(THD *thd) {
691   if (!ndb_binlog_running) return;
692 
693   DBUG_TRACE;
694 
695   /*
696     delete all rows from mysql.ndb_apply_status table
697     - if table does not exist ignore the error as it
698       is a consistent behavior
699   */
700   if (opt_ndb_clear_apply_status) {
701     Ndb_local_connection mysqld(thd);
702     const bool ignore_no_such_table = true;
703     if (mysqld.delete_rows(Ndb_apply_status_table::DB_NAME,
704                            Ndb_apply_status_table::TABLE_NAME,
705                            ignore_no_such_table, "1=1")) {
706       // Failed to delete rows from table
707     }
708   }
709 
710   g_ndb_slave_state.atResetSlave();
711 
712   // pending fix for bug#59844 will make this function return int
713 }
714 
ndbcluster_binlog_func(handlerton *,THD * thd,enum_binlog_func fn,void * arg)715 static int ndbcluster_binlog_func(handlerton *, THD *thd, enum_binlog_func fn,
716                                   void *arg) {
717   DBUG_TRACE;
718   int res = 0;
719   switch (fn) {
720     case BFN_RESET_LOGS:
721       break;
722     case BFN_RESET_SLAVE:
723       ndbcluster_reset_slave(thd);
724       break;
725     case BFN_BINLOG_WAIT:
726       ndbcluster_binlog_wait(thd);
727       break;
728     case BFN_BINLOG_END:
729       res = ndbcluster_binlog_end();
730       break;
731     case BFN_BINLOG_PURGE_FILE:
732       res = ndbcluster_binlog_index_purge_file(thd, (const char *)arg);
733       break;
734   }
735   return res;
736 }
737 
ndbcluster_binlog_init(handlerton * h)738 void ndbcluster_binlog_init(handlerton *h) {
739   h->binlog_func = ndbcluster_binlog_func;
740   h->binlog_log_query = ndbcluster_binlog_log_query;
741   h->acl_notify = ndbcluster_acl_notify;
742 }
743 
744 /*
745    ndb_notify_tables_writable
746 
747    Called to notify any waiting threads that Ndb tables are
748    now writable
749 */
ndb_notify_tables_writable()750 static void ndb_notify_tables_writable() {
751   mysql_mutex_lock(&ndbcluster_mutex);
752   ndb_setup_complete = 1;
753   mysql_cond_broadcast(&ndbcluster_cond);
754   mysql_mutex_unlock(&ndbcluster_mutex);
755 }
756 
757 /**
758   Utility class encapsulating the code which setup the 'ndb binlog thread'
759   to be "connected" to the cluster.
760   This involves:
761    - synchronizing the local mysqld data dictionary with that in NDB
762    - subscribing to changes that happen in NDB, thus allowing:
763     -- local Data Dictionary to be kept in synch
764     -- changes in NDB to be written to binlog
765 
766 */
767 
768 class Ndb_binlog_setup {
769   THD *const m_thd;
770 
771   /**
772      @brief Detect whether the binlog is being setup after an initial system
773             start/restart or after a normal system start/restart.
774 
775      @param thd_ndb  The Thd_ndb object
776 
777      @return true if this is an initial system start/restart, false otherwise.
778    */
detect_initial_restart(Thd_ndb * thd_ndb)779   bool detect_initial_restart(Thd_ndb *thd_ndb) {
780     DBUG_TRACE;
781 
782     // Retrieve the old schema UUID stored in DD.
783     dd::String_type dd_schema_uuid;
784     if (!ndb_dd_get_schema_uuid(m_thd, &dd_schema_uuid)) {
785       DBUG_ASSERT(false);
786       ndb_log_warning("Failed to read the schema UUID of DD");
787       return false;
788     }
789 
790     if (dd_schema_uuid.empty()) {
791       /*
792         DD didn't have any schema UUID previously. This is either an initial
793         start (or) an upgrade from a version which does not have the schema UUID
794         implemented. Such upgrades are considered as initial starts to keep this
795         code simple and due to the fact that the upgrade is probably being done
796         from a 5.x or a non GA 8.0.x versions to a 8.0.x cluster GA version.
797       */
798       ndb_log_info("Detected an initial system start");
799       return true;
800     }
801 
802     // Check if ndb_schema table exists in NDB
803     Ndb_schema_dist_table schema_dist_table(thd_ndb);
804     if (!schema_dist_table.exists()) {
805       /*
806         The ndb_schema table does not exist in NDB yet but the DD already has a
807         schema UUID. This is an initial system restart.
808       */
809       ndb_log_info("Detected an initial system restart");
810       return true;
811     }
812 
813     // Retrieve the old schema uuid stored in NDB
814     std::string ndb_schema_uuid;
815     if (!schema_dist_table.open() ||
816         !schema_dist_table.get_schema_uuid(&ndb_schema_uuid)) {
817       DBUG_ASSERT(false);
818       return false;
819     }
820     /*
821       Since the ndb_schema table exists already, the schema UUID also cannot be
822       empty as whichever mysqld created the table would also have updated the
823       schema UUID in NDB.
824     */
825     DBUG_ASSERT(!ndb_schema_uuid.empty());
826 
827     if (ndb_schema_uuid == dd_schema_uuid.c_str()) {
828       /*
829         Schema UUIDs are the same. This is either a normal system restart or an
830         upgrade. Any upgrade from versions having schema UUID to another newer
831         version will be handled here.
832       */
833       ndb_log_info("Detected a normal system restart");
834       return false;
835     }
836 
837     /*
838       Schema UUIDs don't match. This mysqld was previously connected to a
839       Cluster whose schema UUID is stored in DD. It is now connecting to a new
840       Cluster for the first time which already has a different schema UUID as
841       this is not the first mysqld connecting to that Cluster.
842       From this mysqld's perspective, this will be treated as an
843       initial system restart.
844     */
845     ndb_log_info("Detected an initial system restart");
846     return true;
847   }
848 
849   Ndb_binlog_setup(const Ndb_binlog_setup &) = delete;
850   Ndb_binlog_setup operator=(const Ndb_binlog_setup &) = delete;
851 
852  public:
Ndb_binlog_setup(THD * thd)853   Ndb_binlog_setup(THD *thd) : m_thd(thd) {}
854 
855   /**
856     @brief Setup this node to take part in schema distribution by creating the
857     ndbcluster util tables, perform schema synchronization and create references
858     to NDB_SHARE for all tables.
859 
860     @note See special error handling required when function fails.
861 
862     @return true if setup is succesful
863     @return false if setup fails. The creation of ndb_schema table and setup
864     of event operation registers this node in schema distribution protocol. Thus
865     this node is expected to reply to schema distribution events. Replying is
866     however not possible until setup has succesfully completed and the binlog
867     thread has started to handle events. If setup fails the event operation on
868     ndb_schema table and all other event operations must be removed in order to
869     signal unsubcribe and remove this node from schema distribution.
870   */
setup(Thd_ndb * thd_ndb)871   bool setup(Thd_ndb *thd_ndb) {
872     /* Test binlog_setup on this mysqld being slower (than other mysqld) */
873     if (DBUG_EVALUATE_IF("ndb_binlog_setup_slow", true, false)) {
874       ndb_log_info("'ndb_binlog_setup_slow' -> sleep");
875       ndb_milli_sleep(10 * 1000);
876       ndb_log_info(" <- sleep");
877     }
878 
879     DBUG_ASSERT(ndb_apply_status_share == nullptr);
880 
881     // Protect the schema synchronization with GSL(Global Schema Lock)
882     Ndb_global_schema_lock_guard global_schema_lock_guard(m_thd);
883     if (global_schema_lock_guard.lock()) {
884       return false;
885     }
886 
887     /* Give additional 'binlog_setup rights' to this Thd_ndb */
888     Thd_ndb::Options_guard thd_ndb_options(thd_ndb);
889     thd_ndb_options.set(Thd_ndb::ALLOW_BINLOG_SETUP);
890 
891     // Check if this is a initial restart/start
892     const bool initial_system_restart = detect_initial_restart(thd_ndb);
893 
894     Ndb_dd_sync dd_sync(m_thd, thd_ndb);
895     if (initial_system_restart) {
896       // Remove all NDB metadata from DD since this is an initial restart
897       if (!dd_sync.remove_all_metadata()) {
898         return false;
899       }
900     } else {
901       /*
902         Not an initial restart. Delete DD table definitions corresponding to NDB
903         tables that no longer exist in NDB Dictionary. This is to ensure that
904         synchronization of tables down the line doesn't run into issues related
905         to table ids being reused
906       */
907       if (!dd_sync.remove_deleted_tables()) {
908         return false;
909       }
910     }
911 
912     const bool ndb_schema_dist_upgrade_allowed = ndb_allow_ndb_schema_upgrade();
913     Ndb_schema_dist_table schema_dist_table(thd_ndb);
914     if (!schema_dist_table.create_or_upgrade(m_thd,
915                                              ndb_schema_dist_upgrade_allowed))
916       return false;
917 
918     if (!Ndb_schema_dist::is_ready(m_thd)) {
919       ndb_log_verbose(50, "Schema distribution setup failed");
920       return false;
921     }
922 
923     if (DBUG_EVALUATE_IF("ndb_binlog_setup_incomplete", true, false)) {
924       // Remove the dbug keyword, only fail first time and avoid infinite setup
925       DBUG_SET("-d,ndb_binlog_setup_incomplete");
926       // Test handling of setup failing to complete *after* created 'ndb_schema'
927       ndb_log_info("Simulate 'ndb_binlog_setup_incomplete' -> return error");
928       return false;
929     }
930 
931     // If this is an initial start/restart, update the schema UUID in DD
932     if (initial_system_restart) {
933       // Retrieve the new schema UUID from NDB
934       std::string ndb_schema_uuid;
935       if (!schema_dist_table.get_schema_uuid(&ndb_schema_uuid)) return false;
936 
937       // Update it in DD
938       if (!ndb_dd_update_schema_uuid(m_thd, ndb_schema_uuid)) {
939         ndb_log_warning("Failed to update schema uuid in DD.");
940         return false;
941       }
942     }
943 
944     Ndb_schema_result_table schema_result_table(thd_ndb);
945     if (!schema_result_table.create_or_upgrade(m_thd,
946                                                ndb_schema_dist_upgrade_allowed))
947       return false;
948 
949     Ndb_apply_status_table apply_status_table(thd_ndb);
950     if (!apply_status_table.create_or_upgrade(m_thd, true)) return false;
951 
952     if (!dd_sync.synchronize()) {
953       ndb_log_verbose(9, "Failed to synchronize DD with NDB");
954       return false;
955     }
956 
957     // Check that references for ndb_apply_status has been created
958     DBUG_ASSERT(!ndb_binlog_running || ndb_apply_status_share);
959 
960     if (!Ndb_stored_grants::initialize(m_thd, thd_ndb)) {
961       ndb_log_warning("stored grants: failed to initialize");
962       return false;
963     }
964 
965     Mutex_guard injector_mutex_g(injector_data_mutex);
966     ndb_binlog_tables_inited = true;
967 
968     // During upgrade from a non DD version, the DDLs are blocked until all
969     // nodes run a version that has support for the Data Dictionary.
970     Ndb_schema_dist_client::block_ddl(!ndb_all_nodes_support_mysql_dd());
971 
972     return true;  // Setup completed OK
973   }
974 };
975 
976 /*
977   Defines for the expected order of columns in ndb_schema table, should
978   match the accepted table definition.
979 */
980 constexpr uint SCHEMA_DB_I = 0;
981 constexpr uint SCHEMA_NAME_I = 1;
982 constexpr uint SCHEMA_SLOCK_I = 2;
983 constexpr uint SCHEMA_QUERY_I = 3;
984 constexpr uint SCHEMA_NODE_ID_I = 4;
985 constexpr uint SCHEMA_EPOCH_I = 5;
986 constexpr uint SCHEMA_ID_I = 6;
987 constexpr uint SCHEMA_VERSION_I = 7;
988 constexpr uint SCHEMA_TYPE_I = 8;
989 constexpr uint SCHEMA_OP_ID_I = 9;
990 
ndb_report_waiting(const char * key,int the_time,const char * op,const char * obj)991 static void ndb_report_waiting(const char *key, int the_time, const char *op,
992                                const char *obj) {
993   ulonglong ndb_latest_epoch = 0;
994   const char *proc_info = "<no info>";
995   mysql_mutex_lock(&injector_event_mutex);
996   if (injector_ndb) ndb_latest_epoch = injector_ndb->getLatestGCI();
997   if (injector_thd) proc_info = injector_thd->proc_info;
998   mysql_mutex_unlock(&injector_event_mutex);
999   {
1000     ndb_log_info(
1001         "%s, waiting max %u sec for %s %s."
1002         "  epochs: (%u/%u,%u/%u,%u/%u)"
1003         "  injector proc_info: %s",
1004         key, the_time, op, obj, (uint)(ndb_latest_handled_binlog_epoch >> 32),
1005         (uint)(ndb_latest_handled_binlog_epoch),
1006         (uint)(ndb_latest_received_binlog_epoch >> 32),
1007         (uint)(ndb_latest_received_binlog_epoch),
1008         (uint)(ndb_latest_epoch >> 32), (uint)(ndb_latest_epoch), proc_info);
1009   }
1010 }
1011 
write_schema_op_to_NDB(Ndb * ndb,const char * query,int query_length,const char * db,const char * name,uint32 id,uint32 version,uint32 nodeid,uint32 type,uint32 schema_op_id,uint32 anyvalue)1012 bool Ndb_schema_dist_client::write_schema_op_to_NDB(
1013     Ndb *ndb, const char *query, int query_length, const char *db,
1014     const char *name, uint32 id, uint32 version, uint32 nodeid, uint32 type,
1015     uint32 schema_op_id, uint32 anyvalue) {
1016   DBUG_TRACE;
1017 
1018   // Open ndb_schema table
1019   Ndb_schema_dist_table schema_dist_table(m_thd_ndb);
1020   if (!schema_dist_table.open()) {
1021     return false;
1022   }
1023   const NdbDictionary::Table *ndbtab = schema_dist_table.get_table();
1024 
1025   // Pack db and table_name
1026   char db_buf[FN_REFLEN];
1027   char name_buf[FN_REFLEN];
1028   ndb_pack_varchar(ndbtab, SCHEMA_DB_I, db_buf, db, strlen(db));
1029   ndb_pack_varchar(ndbtab, SCHEMA_NAME_I, name_buf, name, strlen(name));
1030 
1031   // Start the schema operation with all bits set in the slock column.
1032   // The expectation is that all participants will reply and those not
1033   // connected will be filtered away by the coordinator.
1034   std::vector<char> slock_data;
1035   slock_data.assign(schema_dist_table.get_slock_bytes(), 0xFF);
1036 
1037   // Function for writing row to ndb_schema
1038   std::function<const NdbError *(NdbTransaction *)> write_schema_op_func =
1039       [&](NdbTransaction *trans) -> const NdbError * {
1040     DBUG_TRACE;
1041 
1042     NdbOperation *op = trans->getNdbOperation(ndbtab);
1043     if (op == nullptr) return &trans->getNdbError();
1044 
1045     const Uint64 log_epoch = 0;
1046     if (op->writeTuple() != 0 || op->equal(SCHEMA_DB_I, db_buf) != 0 ||
1047         op->equal(SCHEMA_NAME_I, name_buf) != 0 ||
1048         op->setValue(SCHEMA_SLOCK_I, slock_data.data()) != 0 ||
1049         op->setValue(SCHEMA_NODE_ID_I, nodeid) != 0 ||
1050         op->setValue(SCHEMA_EPOCH_I, log_epoch) != 0 ||
1051         op->setValue(SCHEMA_ID_I, id) != 0 ||
1052         op->setValue(SCHEMA_VERSION_I, version) != 0 ||
1053         op->setValue(SCHEMA_TYPE_I, type) != 0 ||
1054         op->setAnyValue(anyvalue) != 0)
1055       return &op->getNdbError();
1056 
1057     NdbBlob *ndb_blob = op->getBlobHandle(SCHEMA_QUERY_I);
1058     if (ndb_blob == nullptr) return &op->getNdbError();
1059 
1060     if (ndb_blob->setValue(query, query_length) != 0)
1061       return &ndb_blob->getNdbError();
1062 
1063     if (schema_dist_table.have_schema_op_id_column()) {
1064       if (op->setValue(SCHEMA_OP_ID_I, schema_op_id) != 0)
1065         return &op->getNdbError();
1066     }
1067 
1068     if (trans->execute(NdbTransaction::Commit, NdbOperation::DefaultAbortOption,
1069                        1 /* force send */) != 0) {
1070       return &trans->getNdbError();
1071     }
1072 
1073     return nullptr;
1074   };
1075 
1076   NdbError ndb_err;
1077   if (!ndb_trans_retry(ndb, m_thd, ndb_err, write_schema_op_func)) {
1078     m_thd_ndb->push_ndb_error_warning(ndb_err);
1079     m_thd_ndb->push_warning("Failed to write schema operation");
1080     return false;
1081   }
1082 
1083   (void)ndb->getDictionary()->forceGCPWait(1);
1084 
1085   return true;
1086 }
1087 
1088 /*
1089   log query in ndb_schema table
1090 */
1091 
log_schema_op_impl(Ndb * ndb,const char * query,int query_length,const char * db,const char * table_name,uint32 ndb_table_id,uint32 ndb_table_version,SCHEMA_OP_TYPE type,uint32 anyvalue)1092 bool Ndb_schema_dist_client::log_schema_op_impl(
1093     Ndb *ndb, const char *query, int query_length, const char *db,
1094     const char *table_name, uint32 ndb_table_id, uint32 ndb_table_version,
1095     SCHEMA_OP_TYPE type, uint32 anyvalue) {
1096   DBUG_TRACE;
1097   DBUG_PRINT("enter",
1098              ("query: %s  db: %s  table_name: %s", query, db, table_name));
1099 
1100   // Create NDB_SCHEMA_OBJECT
1101   std::unique_ptr<NDB_SCHEMA_OBJECT, decltype(&NDB_SCHEMA_OBJECT::release)>
1102       ndb_schema_object(NDB_SCHEMA_OBJECT::get(db, table_name, ndb_table_id,
1103                                                ndb_table_version, true),
1104                         NDB_SCHEMA_OBJECT::release);
1105 
1106   if (DBUG_EVALUATE_IF("ndb_binlog_random_tableid", true, false)) {
1107     /**
1108      * Try to trigger a race between late incomming slock ack for
1109      * schema operations having its coordinator on another node,
1110      * which we would otherwise have discarded as no matching
1111      * ndb_schema_object existed, and another schema op with same 'key',
1112      * coordinated by this node. Thus causing a mixup betweeen these,
1113      * and the schema distribution getting totally out of synch.
1114      */
1115     ndb_milli_sleep(50);
1116   }
1117 
1118   // Format string to use in log printouts
1119   const std::string op_name = db + std::string(".") + table_name + "(" +
1120                               std::to_string(ndb_table_id) + "/" +
1121                               std::to_string(ndb_table_version) + ")";
1122 
1123   // Use nodeid of the primary cluster connection since that is
1124   // the nodeid which the coordinator and participants listen to
1125   const uint32 own_nodeid = g_ndb_cluster_connection->node_id();
1126 
1127   // Write schema operation to the table
1128   if (DBUG_EVALUATE_IF("ndb_schema_write_fail", true, false) ||
1129       !write_schema_op_to_NDB(ndb, query, query_length, db, table_name,
1130                               ndb_table_id, ndb_table_version, own_nodeid, type,
1131                               ndb_schema_object->schema_op_id(), anyvalue)) {
1132     ndb_schema_object->fail_schema_op(Ndb_schema_dist::NDB_TRANS_FAILURE,
1133                                       "Failed to write schema operation");
1134     ndb_log_warning("Failed to write the schema op into the ndb_schema table");
1135     return false;
1136   }
1137 
1138   ndb_log_verbose(19, "Distribution of '%s' - started!", op_name.c_str());
1139   if (ndb_log_get_verbose_level() >= 19) {
1140     ndb_log_error_dump("Schema_op {");
1141     ndb_log_error_dump("type: %d", type);
1142     ndb_log_error_dump("query: '%s'", query);
1143     ndb_log_error_dump("}");
1144   }
1145 
1146   // Wait for participants to complete the schema change
1147   while (true) {
1148     const bool completed = ndb_schema_object->client_wait_completed(1);
1149     if (completed) {
1150       // Schema operation completed
1151       ndb_log_verbose(19, "Distribution of '%s' - completed!", op_name.c_str());
1152       break;
1153     }
1154 
1155     // Check if schema distribution is still ready.
1156     if (m_share->have_event_operation() == false) {
1157       // This case is unlikely, but there is small race between
1158       // clients first check for schema distribution ready and schema op
1159       // registered in the coordinator(since the message is passed
1160       // via NDB).
1161       ndb_schema_object->fail_schema_op(Ndb_schema_dist::CLIENT_ABORT,
1162                                         "Schema distribution is not ready");
1163       ndb_log_warning("Distribution of '%s' - not ready!", op_name.c_str());
1164       break;
1165     }
1166 
1167     if (thd_killed(m_thd) ||
1168         DBUG_EVALUATE_IF("ndb_schema_dist_client_killed", true, false)) {
1169       ndb_schema_object->fail_schema_op(Ndb_schema_dist::CLIENT_KILLED,
1170                                         "Client was killed");
1171       ndb_log_warning("Distribution of '%s' - killed!", op_name.c_str());
1172       break;
1173     }
1174   }
1175 
1176   // Inspect results in NDB_SCHEMA_OBJECT before it's released
1177   std::vector<NDB_SCHEMA_OBJECT::Result> participant_results;
1178   ndb_schema_object->client_get_schema_op_results(participant_results);
1179   for (auto &it : participant_results) {
1180     // Save result for later
1181     m_schema_op_results.push_back({it.nodeid, it.result, it.message});
1182   }
1183 
1184   return true;
1185 }
1186 
1187 /*
1188   ndbcluster_binlog_event_operation_teardown
1189 
1190   Used when a NdbEventOperation has indicated that the table has been
1191   dropped or connection to cluster has failed. Function need to teardown
1192   the NdbEventOperation and it's associated datastructures owned
1193   by the binlog.
1194 
1195   It will also signal the "injector_data_cond" so that anyone using
1196   ndbcluster_binlog_wait_synch_drop_table() to wait for the binlog
1197   to handle the drop will be notified.
1198 
1199   The function may be called either by Ndb_schema_event_handler which
1200   listens to events only on mysql.ndb_schema or by the "injector" which
1201   listen to events on all the other tables.
1202 */
1203 
ndbcluster_binlog_event_operation_teardown(THD * thd,Ndb * is_ndb,NdbEventOperation * pOp)1204 static void ndbcluster_binlog_event_operation_teardown(THD *thd, Ndb *is_ndb,
1205                                                        NdbEventOperation *pOp) {
1206   DBUG_TRACE;
1207   DBUG_PRINT("enter", ("pOp: %p", pOp));
1208 
1209   // Get Ndb_event_data associated with the NdbEventOperation
1210   const Ndb_event_data *event_data =
1211       static_cast<const Ndb_event_data *>(pOp->getCustomData());
1212   DBUG_ASSERT(event_data);
1213 
1214   // Get NDB_SHARE associated with the Ndb_event_data, the share
1215   // is referenced by "binlog" and will not go away until released
1216   // further down in this function
1217   NDB_SHARE *share = event_data->share;
1218 
1219   // Invalidate any cached NdbApi table if object version is lower
1220   // than what was used when setting up the NdbEventOperation
1221   // NOTE! This functionality need to be explained further
1222   {
1223     Thd_ndb *thd_ndb = get_thd_ndb(thd);
1224     Ndb *ndb = thd_ndb->ndb;
1225     Ndb_table_guard ndbtab_g(ndb, share->db, share->table_name);
1226     const NDBTAB *ev_tab = pOp->getTable();
1227     const NDBTAB *cache_tab = ndbtab_g.get_table();
1228     if (cache_tab && cache_tab->getObjectId() == ev_tab->getObjectId() &&
1229         cache_tab->getObjectVersion() <= ev_tab->getObjectVersion())
1230       ndbtab_g.invalidate();
1231   }
1232 
1233   // Remove NdbEventOperation from the share
1234   mysql_mutex_lock(&share->mutex);
1235   DBUG_ASSERT(share->op == pOp);
1236   share->op = NULL;
1237   mysql_mutex_unlock(&share->mutex);
1238 
1239   /* Signal ha_ndbcluster::delete/rename_table that drop is done */
1240   DBUG_PRINT("info", ("signal that drop is done"));
1241   mysql_cond_broadcast(&injector_data_cond);
1242 
1243   // Close the table in MySQL Server
1244   ndb_tdc_close_cached_table(thd, share->db, share->table_name);
1245 
1246   // Release the "binlog" reference from NDB_SHARE
1247   NDB_SHARE::release_reference(share, "binlog");
1248 
1249   // Remove pointer to event_data from the EventOperation
1250   pOp->setCustomData(NULL);
1251 
1252   // Drop the NdbEventOperation from NdbApi
1253   DBUG_PRINT("info", ("Dropping event operation: %p", pOp));
1254   mysql_mutex_lock(&injector_event_mutex);
1255   is_ndb->dropEventOperation(pOp);
1256   mysql_mutex_unlock(&injector_event_mutex);
1257 
1258   // Finally delete the event_data and thus it's mem_root, shadow_table etc.
1259   Ndb_event_data::destroy(event_data);
1260 }
1261 
1262 /*
1263   Data used by the Ndb_schema_event_handler which lives
1264   as long as the NDB Binlog thread is connected to the cluster.
1265 
1266   NOTE! An Ndb_schema_event_handler instance only lives for one epoch
1267 
1268  */
1269 class Ndb_schema_dist_data {
1270   uint m_own_nodeid;
1271   uint m_max_subscribers{0};
1272   // List of active schema operations in this coordinator. Having an
1273   // active schema operation means it need to be checked
1274   // for timeout or request to be killed regularly
1275   std::unordered_set<const NDB_SCHEMA_OBJECT *> m_active_schema_ops;
1276 
1277   std::chrono::steady_clock::time_point m_next_check_time;
1278 
1279   // Keeps track of subscribers as reported by one data node
1280   class Node_subscribers {
1281     MY_BITMAP m_bitmap;
1282 
1283    public:
1284     Node_subscribers(const Node_subscribers &) = delete;
1285     Node_subscribers() = delete;
Node_subscribers(uint max_subscribers)1286     Node_subscribers(uint max_subscribers) {
1287       // Initialize the bitmap
1288       bitmap_init(&m_bitmap, nullptr, max_subscribers);
1289 
1290       // Assume that all bits are cleared by bitmap_init()
1291       DBUG_ASSERT(bitmap_is_clear_all(&m_bitmap));
1292     }
~Node_subscribers()1293     ~Node_subscribers() { bitmap_free(&m_bitmap); }
clear_all()1294     void clear_all() { bitmap_clear_all(&m_bitmap); }
set(uint subscriber_node_id)1295     void set(uint subscriber_node_id) {
1296       bitmap_set_bit(&m_bitmap, subscriber_node_id);
1297     }
clear(uint subscriber_node_id)1298     void clear(uint subscriber_node_id) {
1299       bitmap_clear_bit(&m_bitmap, subscriber_node_id);
1300     }
to_string() const1301     std::string to_string() const {
1302       return ndb_bitmap_to_hex_string(&m_bitmap);
1303     }
1304 
1305     /**
1306        @brief Add current subscribers to list of nodes.
1307        @param subscriber_list List of subscriber
1308     */
get_subscriber_list(std::unordered_set<uint32> & subscriber_list) const1309     void get_subscriber_list(
1310         std::unordered_set<uint32> &subscriber_list) const {
1311       for (uint i = bitmap_get_first_set(&m_bitmap); i != MY_BIT_NONE;
1312            i = bitmap_get_next_set(&m_bitmap, i)) {
1313         subscriber_list.insert(i);
1314       }
1315     }
1316   };
1317   /*
1318     List keeping track of the subscribers to ndb_schema. It contains one
1319     Node_subscribers per data node, this avoids the need to know which data
1320     nodes are connected.
1321   */
1322   std::unordered_map<uint, Node_subscribers *> m_subscriber_bitmaps;
1323 
1324   /**
1325     @brief Find node subscribers for given data node
1326     @param data_node_id Nodeid of data node
1327     @return Pointer to node subscribers or nullptr
1328    */
find_node_subscribers(uint data_node_id) const1329   Node_subscribers *find_node_subscribers(uint data_node_id) const {
1330     const auto it = m_subscriber_bitmaps.find(data_node_id);
1331     if (it == m_subscriber_bitmaps.end()) {
1332       // Unexpected data node id received, this may be caused by data node added
1333       // without restarting this MySQL Server or node id otherwise out of
1334       // range for current configuration. Handle the situation gracefully and
1335       // just print error message to the log.
1336       ndb_log_error("Could not find node subscribers for data node %d",
1337                     data_node_id);
1338       ndb_log_error("Restart this MySQL Server to adapt to configuration");
1339       return nullptr;
1340     }
1341     Node_subscribers *subscriber_bitmap = it->second;
1342     ndbcluster::ndbrequire(subscriber_bitmap);
1343     return subscriber_bitmap;
1344   }
1345 
1346   // Holds the new key for a table to be renamed
1347   struct NDB_SHARE_KEY *m_prepared_rename_key;
1348 
1349   // Holds the Ndb_event_data which is created during inplace alter table
1350   // prepare and used during commit
1351   // NOTE! this place holder is only used for the participant in same node
1352   const class Ndb_event_data *m_inplace_alter_event_data{nullptr};
1353 
1354  public:
1355   Ndb_schema_dist_data(const Ndb_schema_dist_data &);  // Not implemented
Ndb_schema_dist_data()1356   Ndb_schema_dist_data() : m_prepared_rename_key(NULL) {}
~Ndb_schema_dist_data()1357   ~Ndb_schema_dist_data() {
1358     // There should be no schema operations active
1359     DBUG_ASSERT(m_active_schema_ops.size() == 0);
1360   }
1361 
init(Ndb_cluster_connection * cluster_connection)1362   void init(Ndb_cluster_connection *cluster_connection) {
1363     Uint32 max_subscribers = cluster_connection->max_api_nodeid() + 1;
1364     m_own_nodeid = cluster_connection->node_id();
1365     NDB_SCHEMA_OBJECT::init(m_own_nodeid);
1366 
1367     // Add one subscriber bitmap per data node in the current configuration
1368     unsigned node_id;
1369     Ndb_cluster_connection_node_iter node_iter;
1370     while ((node_id = cluster_connection->get_next_node(node_iter))) {
1371       m_subscriber_bitmaps.emplace(node_id,
1372                                    new Node_subscribers(max_subscribers));
1373     }
1374     // Remember max number of subscribers
1375     m_max_subscribers = max_subscribers;
1376   }
1377 
release(void)1378   void release(void) {
1379     // Release the subscriber bitmaps
1380     for (const auto it : m_subscriber_bitmaps) {
1381       Node_subscribers *subscriber_bitmap = it.second;
1382       delete subscriber_bitmap;
1383     }
1384     m_subscriber_bitmaps.clear();
1385     m_max_subscribers = 0;
1386 
1387     // Release the prepared rename key, it's very unlikely
1388     // that the key is still around here, but just in case
1389     NDB_SHARE::free_key(m_prepared_rename_key);
1390     m_prepared_rename_key = NULL;
1391 
1392     // Release the event_data saved for inplace alter, it's very
1393     // unlikley that the event_data is still around, but just in case
1394     Ndb_event_data::destroy(m_inplace_alter_event_data);
1395     m_inplace_alter_event_data = nullptr;
1396 
1397     // Release any remaining active schema operations
1398     for (const NDB_SCHEMA_OBJECT *schema_op : m_active_schema_ops) {
1399       ndb_log_info(" - releasing schema operation on '%s.%s'", schema_op->db(),
1400                    schema_op->name());
1401       schema_op->fail_schema_op(Ndb_schema_dist::COORD_ABORT,
1402                                 "Coordinator aborted");
1403       // Release coordinator reference
1404       NDB_SCHEMA_OBJECT::release(const_cast<NDB_SCHEMA_OBJECT *>(schema_op));
1405     }
1406     m_active_schema_ops.clear();
1407   }
1408 
report_data_node_failure(unsigned data_node_id)1409   void report_data_node_failure(unsigned data_node_id) {
1410     ndb_log_verbose(1, "Data node %d failed", data_node_id);
1411 
1412     Node_subscribers *subscribers = find_node_subscribers(data_node_id);
1413     if (subscribers) {
1414       subscribers->clear_all();
1415 
1416       ndb_log_verbose(19, "Subscribers[%d]: %s", data_node_id,
1417                       subscribers->to_string().c_str());
1418     }
1419   }
1420 
report_subscribe(unsigned data_node_id,unsigned subscriber_node_id)1421   void report_subscribe(unsigned data_node_id, unsigned subscriber_node_id) {
1422     ndb_log_verbose(1, "Data node %d reports subscribe from node %d",
1423                     data_node_id, subscriber_node_id);
1424     ndbcluster::ndbrequire(subscriber_node_id != 0);
1425 
1426     Node_subscribers *subscribers = find_node_subscribers(data_node_id);
1427     if (subscribers) {
1428       subscribers->set(subscriber_node_id);
1429 
1430       ndb_log_verbose(19, "Subscribers[%d]: %s", data_node_id,
1431                       subscribers->to_string().c_str());
1432     }
1433   }
1434 
report_unsubscribe(unsigned data_node_id,unsigned subscriber_node_id)1435   void report_unsubscribe(unsigned data_node_id, unsigned subscriber_node_id) {
1436     ndb_log_verbose(1, "Data node %d reports unsubscribe from node %d",
1437                     data_node_id, subscriber_node_id);
1438     ndbcluster::ndbrequire(subscriber_node_id != 0);
1439 
1440     Node_subscribers *subscribers = find_node_subscribers(data_node_id);
1441     if (subscribers) {
1442       subscribers->clear(subscriber_node_id);
1443 
1444       ndb_log_verbose(19, "Subscribers[%d]: %s", data_node_id,
1445                       subscribers->to_string().c_str());
1446     }
1447   }
1448 
1449   /**
1450      @brief Get list of current subscribers
1451      @note A node counts as subscribed as soon as any data node report it as
1452      subscribed.
1453      @param subscriber_list The list where to return subscribers
1454   */
get_subscriber_list(std::unordered_set<uint32> & subscriber_list) const1455   void get_subscriber_list(std::unordered_set<uint32> &subscriber_list) const {
1456     for (const auto it : m_subscriber_bitmaps) {
1457       Node_subscribers *subscribers = it.second;
1458       subscribers->get_subscriber_list(subscriber_list);
1459     }
1460     // Always add own node which is always connected
1461     subscriber_list.insert(m_own_nodeid);
1462   }
1463 
save_prepared_rename_key(NDB_SHARE_KEY * key)1464   void save_prepared_rename_key(NDB_SHARE_KEY *key) {
1465     m_prepared_rename_key = key;
1466   }
1467 
get_prepared_rename_key() const1468   NDB_SHARE_KEY *get_prepared_rename_key() const {
1469     return m_prepared_rename_key;
1470   }
1471 
save_inplace_alter_event_data(const Ndb_event_data * event_data)1472   void save_inplace_alter_event_data(const Ndb_event_data *event_data) {
1473     // Should not already be set when saving a new pointer
1474     DBUG_ASSERT(event_data == nullptr || !m_inplace_alter_event_data);
1475     m_inplace_alter_event_data = event_data;
1476   }
get_inplace_alter_event_data() const1477   const Ndb_event_data *get_inplace_alter_event_data() const {
1478     return m_inplace_alter_event_data;
1479   }
1480 
add_active_schema_op(NDB_SCHEMA_OBJECT * schema_op)1481   void add_active_schema_op(NDB_SCHEMA_OBJECT *schema_op) {
1482     // Current assumption is that as long as all users of schema distribution
1483     // hold the GSL, there will ever only be one active schema operation at a
1484     // time. This assumption will probably change soon, but until then it can
1485     // be verifed with an assert.
1486     DBUG_ASSERT(m_active_schema_ops.size() == 0);
1487 
1488     // Get coordinator reference to NDB_SCHEMA_OBJECT. It will be kept alive
1489     // until the coordinator releases it
1490     NDB_SCHEMA_OBJECT::get(schema_op);
1491 
1492     // Insert NDB_SCHEMA_OBJECT in list of active schema ops
1493     ndbcluster::ndbrequire(m_active_schema_ops.insert(schema_op).second);
1494   }
1495 
remove_active_schema_op(NDB_SCHEMA_OBJECT * schema_op)1496   void remove_active_schema_op(NDB_SCHEMA_OBJECT *schema_op) {
1497     // Need to have active schema op for decrement
1498     ndbcluster::ndbrequire(m_active_schema_ops.size() > 0);
1499 
1500     // Remove NDB_SCHEMA_OBJECT from list of active schema ops
1501     ndbcluster::ndbrequire(m_active_schema_ops.erase(schema_op) == 1);
1502 
1503     // Release coordinator reference to NDB_SCHEMA_OBJECT
1504     NDB_SCHEMA_OBJECT::release(schema_op);
1505   }
1506 
active_schema_ops()1507   const std::unordered_set<const NDB_SCHEMA_OBJECT *> &active_schema_ops() {
1508     return m_active_schema_ops;
1509   }
1510 
time_for_check()1511   bool time_for_check() {
1512     std::chrono::steady_clock::time_point curr_time =
1513         std::chrono::steady_clock::now();
1514     if (m_next_check_time > curr_time) return false;
1515 
1516     // Setup time for next check in 1 second
1517     m_next_check_time = curr_time + std::chrono::seconds(1);
1518     return true;
1519   }
1520 
1521 };  // class Ndb_schema_dist_data
1522 
1523 class Ndb_schema_event_handler {
1524   class Ndb_schema_op {
1525     /*
1526        Unpack arbitrary length varbinary field and return pointer to zero
1527        terminated string allocated in current memory root.
1528 
1529        @param field The field to unpack
1530        @return pointer to string allocated in current MEM_ROOT
1531     */
unpack_varbinary(Field * field)1532     static char *unpack_varbinary(Field *field) {
1533       /*
1534         The Schema_dist_client will check the schema of the ndb_schema table
1535         and will not send any commands unless the table fulfills requirements.
1536         Thus this function assumes that the field is always a varbinary
1537         (with at least 63 bytes length since that's the legacy min limit)
1538       */
1539       ndbcluster::ndbrequire(field->type() == MYSQL_TYPE_VARCHAR);
1540       ndbcluster::ndbrequire(field->field_length >= 63);
1541 
1542       // Calculate number of length bytes, this depends on fields max length
1543       const uint length_bytes = HA_VARCHAR_PACKLENGTH(field->field_length);
1544       ndbcluster::ndbrequire(length_bytes <= 2);
1545 
1546       // Read length of the varbinary which is stored in the field
1547       const uint varbinary_length = length_bytes == 1
1548                                         ? static_cast<uint>(*field->field_ptr())
1549                                         : uint2korr(field->field_ptr());
1550       DBUG_PRINT("info", ("varbinary length: %u", varbinary_length));
1551       // Check that varbinary length is not greater than fields max length
1552       // (this would indicate that corrupted data has been written to table)
1553       ndbcluster::ndbrequire(varbinary_length <= field->field_length);
1554 
1555       const char *varbinary_start =
1556           reinterpret_cast<const char *>(field->field_ptr() + length_bytes);
1557       return sql_strmake(varbinary_start, varbinary_length);
1558     }
1559 
1560     /*
1561        Unpack blob field and return pointer to zero terminated string allocated
1562        in current MEM_ROOT.
1563 
1564        This function assumes that the blob has already been fetched from NDB
1565        and is ready to be extracted from buffers allocated inside NdbApi.
1566 
1567        @param ndb_blob The blob column to unpack
1568        @return pointer to string allocated in current MEM_ROOT
1569     */
unpack_blob(NdbBlob * ndb_blob)1570     static char *unpack_blob(NdbBlob *ndb_blob) {
1571       // Check if blob is NULL
1572       int blob_is_null;
1573       ndbcluster::ndbrequire(ndb_blob->getNull(blob_is_null) == 0);
1574       if (blob_is_null != 0) {
1575         // The blob column didn't contain anything, return empty string
1576         return sql_strdup("");
1577       }
1578 
1579       // Read length of blob
1580       Uint64 blob_len;
1581       ndbcluster::ndbrequire(ndb_blob->getLength(blob_len) == 0);
1582       if (blob_len == 0) {
1583         // The blob column didn't contain anything, return empty string
1584         return sql_strdup("");
1585       }
1586 
1587       // Allocate space for blob plus + zero terminator in current MEM_ROOT
1588       char *str = static_cast<char *>((*THR_MALLOC)->Alloc(blob_len + 1));
1589       ndbcluster::ndbrequire(str);
1590 
1591       // Read the blob content
1592       Uint32 read_len = static_cast<Uint32>(blob_len);
1593       ndbcluster::ndbrequire(ndb_blob->readData(str, read_len) == 0);
1594       ndbcluster::ndbrequire(blob_len == read_len);  // Assume all read
1595       str[blob_len] = 0;                             // Zero terminate
1596 
1597       DBUG_PRINT("unpack_blob", ("str: '%s'", str));
1598       return str;
1599     }
1600 
unpack_slock(const Field * field)1601     void unpack_slock(const Field *field) {
1602       // Allocate bitmap buffer in current MEM_ROOT
1603       slock_buf = static_cast<my_bitmap_map *>(
1604           (*THR_MALLOC)->Alloc(field->field_length));
1605       ndbcluster::ndbrequire(slock_buf);
1606 
1607       // Initialize bitmap(always suceeds when buffer is already allocated)
1608       (void)bitmap_init(&slock, slock_buf, field->field_length * 8);
1609 
1610       // Copy data into bitmap buffer
1611       memcpy(slock_buf, field->field_ptr(), field->field_length);
1612     }
1613 
1614     // Unpack Ndb_schema_op from event_data pointer
unpack_event(const Ndb_event_data * event_data)1615     void unpack_event(const Ndb_event_data *event_data) {
1616       TABLE *table = event_data->shadow_table;
1617       Field **field = table->field;
1618 
1619       my_bitmap_map *old_map = dbug_tmp_use_all_columns(table, table->read_set);
1620 
1621       /* db, varbinary */
1622       db = unpack_varbinary(*field);
1623       field++;
1624 
1625       /* name, varbinary */
1626       name = unpack_varbinary(*field);
1627       field++;
1628 
1629       /* slock, binary */
1630       unpack_slock(*field);
1631       field++;
1632 
1633       /* query, blob */
1634       query = unpack_blob(event_data->ndb_value[0][SCHEMA_QUERY_I].blob);
1635       field++;
1636 
1637       /* node_id */
1638       node_id = (Uint32)((Field_long *)*field)->val_int();
1639       /* epoch */
1640       field++;
1641       epoch = ((Field_long *)*field)->val_int();
1642       /* id */
1643       field++;
1644       id = (Uint32)((Field_long *)*field)->val_int();
1645       /* version */
1646       field++;
1647       version = (Uint32)((Field_long *)*field)->val_int();
1648       /* type */
1649       field++;
1650       type = (Uint32)((Field_long *)*field)->val_int();
1651       /* schema_op_id */
1652       field++;
1653       if (*field) {
1654         // Optional column
1655         schema_op_id = (Uint32)((Field_long *)*field)->val_int();
1656       } else {
1657         schema_op_id = 0;
1658       }
1659 
1660       dbug_tmp_restore_column_map(table->read_set, old_map);
1661     }
1662 
1663    public:
1664     // Note! The db, name, slock_buf and query variables point to memory
1665     // allocated in the current MEM_ROOT. When the Ndb_schema_op is put in the
1666     // list to be executed after epoch, only the pointers are copied and
1667     // still point to same memory inside the MEM_ROOT.
1668     char *db;
1669     char *name;
1670 
1671    private:
1672     // Buffer for the slock bitmap
1673     my_bitmap_map *slock_buf;
1674 
1675    public:
1676     MY_BITMAP slock;
1677     char *query;
query_length() const1678     size_t query_length() const {
1679       // Return length of "query" which is always zero terminated string
1680       return strlen(query);
1681     }
1682     Uint64 epoch;
1683     uint32 node_id;
1684     uint32 id;
1685     uint32 version;
1686     uint32 type;
1687     uint32 any_value;
1688     uint32 schema_op_id;
1689 
1690     /**
1691       Create a Ndb_schema_op from event_data
1692     */
create(const Ndb_event_data * event_data,Uint32 any_value)1693     static const Ndb_schema_op *create(const Ndb_event_data *event_data,
1694                                        Uint32 any_value) {
1695       DBUG_TRACE;
1696       // Allocate memory in current MEM_ROOT
1697       Ndb_schema_op *schema_op =
1698           (Ndb_schema_op *)(*THR_MALLOC)->Alloc(sizeof(Ndb_schema_op));
1699       schema_op->unpack_event(event_data);
1700       schema_op->any_value = any_value;
1701       DBUG_PRINT("exit", ("'%s.%s': query: '%s' type: %d", schema_op->db,
1702                           schema_op->name, schema_op->query, schema_op->type));
1703       return schema_op;
1704     }
1705   };
1706 
1707   class Ndb_schema_op_result {
1708     uint32 m_result{0};
1709     std::string m_message;
1710 
1711    public:
set_result(Ndb_schema_dist::Schema_op_result_code result,const std::string message)1712     void set_result(Ndb_schema_dist::Schema_op_result_code result,
1713                     const std::string message) {
1714       // Both result and message must be set
1715       DBUG_ASSERT(result && message.length());
1716       m_result = result;
1717       m_message = message;
1718     }
message() const1719     const char *message() const { return m_message.c_str(); }
result() const1720     uint32 result() const { return m_result; }
1721   };
1722 
1723   class Lock_wait_timeout_guard {
1724    public:
Lock_wait_timeout_guard(THD * thd,ulong lock_wait_timeout)1725     Lock_wait_timeout_guard(THD *thd, ulong lock_wait_timeout)
1726         : m_thd(thd),
1727           m_save_lock_wait_timeout(thd->variables.lock_wait_timeout) {
1728       m_thd->variables.lock_wait_timeout = lock_wait_timeout;
1729     }
1730 
~Lock_wait_timeout_guard()1731     ~Lock_wait_timeout_guard() {
1732       m_thd->variables.lock_wait_timeout = m_save_lock_wait_timeout;
1733     }
1734 
1735    private:
1736     THD *const m_thd;
1737     ulong m_save_lock_wait_timeout;
1738   };
1739 
1740   // Log error code and message returned from NDB
log_NDB_error(const NdbError & ndb_error) const1741   void log_NDB_error(const NdbError &ndb_error) const {
1742     ndb_log_info("Got error '%d: %s' from NDB", ndb_error.code,
1743                  ndb_error.message);
1744   }
1745 
write_schema_op_to_binlog(THD * thd,const Ndb_schema_op * schema)1746   static void write_schema_op_to_binlog(THD *thd, const Ndb_schema_op *schema) {
1747     if (!ndb_binlog_running) {
1748       // This mysqld is not writing a binlog
1749       return;
1750     }
1751 
1752     /* any_value == 0 means local cluster sourced change that
1753      * should be logged
1754      */
1755     if (ndbcluster_anyvalue_is_reserved(schema->any_value)) {
1756       /* Originating SQL node did not want this query logged */
1757       if (!ndbcluster_anyvalue_is_nologging(schema->any_value)) {
1758         ndb_log_warning(
1759             "unknown value for binlog signalling 0x%X, "
1760             "query not logged",
1761             schema->any_value);
1762       }
1763       return;
1764     }
1765 
1766     Uint32 queryServerId = ndbcluster_anyvalue_get_serverid(schema->any_value);
1767     /*
1768        Start with serverId as received AnyValue, in case it's a composite
1769        (server_id_bits < 31).
1770        This is for 'future', as currently schema ops do not have composite
1771        AnyValues.
1772        In future it may be useful to support *not* mapping composite
1773        AnyValues to/from Binlogged server-ids.
1774     */
1775     Uint32 loggedServerId = schema->any_value;
1776 
1777     if (queryServerId) {
1778       /*
1779          AnyValue has non-zero serverId, must be a query applied by a slave
1780          mysqld.
1781          TODO : Assert that we are running in the Binlog injector thread?
1782       */
1783       if (!g_ndb_log_slave_updates) {
1784         /* This MySQLD does not log slave updates */
1785         return;
1786       }
1787     } else {
1788       /* No ServerId associated with this query, mark it as ours */
1789       ndbcluster_anyvalue_set_serverid(loggedServerId, ::server_id);
1790     }
1791 
1792     /*
1793       Write the DDL query to binlog with server_id set
1794       to the server_id where the query originated.
1795     */
1796     const uint32 thd_server_id_save = thd->server_id;
1797     DBUG_ASSERT(sizeof(thd_server_id_save) == sizeof(thd->server_id));
1798     thd->server_id = loggedServerId;
1799 
1800     LEX_CSTRING thd_db_save = thd->db();
1801     LEX_CSTRING schema_db_lex_cstr = {schema->db, strlen(schema->db)};
1802     thd->reset_db(schema_db_lex_cstr);
1803 
1804     int errcode = query_error_code(thd, thd->killed == THD::NOT_KILLED);
1805     thd->binlog_query(THD::STMT_QUERY_TYPE, schema->query,
1806                       schema->query_length(),
1807                       false,  // is_trans
1808                       true,   // direct
1809                       schema->name[0] == 0 || thd->db().str[0] == 0, errcode);
1810 
1811     // Commit the binlog write
1812     (void)trans_commit_stmt(thd);
1813 
1814     /*
1815       Restore original server_id and db after commit
1816       since the server_id is being used also in the commit logic
1817     */
1818     thd->server_id = thd_server_id_save;
1819     thd->reset_db(thd_db_save);
1820   }
1821 
1822   /**
1823     @brief Inform the other nodes that schema operation has been completed by
1824     this node, this is done by updating the row in the ndb_schema table.
1825 
1826     @note The function will read the row from ndb_schema with exclusive lock,
1827     append it's own data to the 'slock' column and then write the row back.
1828 
1829     @param schema The schema operation which has just been completed
1830 
1831     @return different return values are returned, but not documented since they
1832     are currently unused
1833 
1834   */
ack_schema_op(const Ndb_schema_op * schema) const1835   int ack_schema_op(const Ndb_schema_op *schema) const {
1836     DBUG_TRACE;
1837     Ndb *ndb = m_thd_ndb->ndb;
1838 
1839     // Open ndb_schema table
1840     Ndb_schema_dist_table schema_dist_table(m_thd_ndb);
1841     if (!schema_dist_table.open()) {
1842       // NOTE! Legacy crash unless this was cluster connection failure, there
1843       // are simply no other of way sending error back to coordinator
1844       ndbcluster::ndbrequire(ndb->getDictionary()->getNdbError().code == 4009);
1845       return 1;
1846     }
1847     const NdbDictionary::Table *ndbtab = schema_dist_table.get_table();
1848 
1849     const NdbError *ndb_error = nullptr;
1850     char tmp_buf[FN_REFLEN];
1851     NdbTransaction *trans = 0;
1852     int retries = 100;
1853     std::string before_slock;
1854 
1855     // Bitmap for the slock bits
1856     MY_BITMAP slock;
1857     const uint slock_bits = schema_dist_table.get_slock_bytes() * 8;
1858     // Make sure that own nodeid fits in slock
1859     ndbcluster::ndbrequire(own_nodeid() <= slock_bits);
1860     (void)bitmap_init(&slock, nullptr, slock_bits);
1861 
1862     while (1) {
1863       if ((trans = ndb->startTransaction()) == 0) goto err;
1864       {
1865         NdbOperation *op = 0;
1866         int r = 0;
1867 
1868         /* read row from ndb_schema with exlusive row lock */
1869         r |= (op = trans->getNdbOperation(ndbtab)) == 0;
1870         DBUG_ASSERT(r == 0);
1871         r |= op->readTupleExclusive();
1872         DBUG_ASSERT(r == 0);
1873 
1874         /* db */
1875         ndb_pack_varchar(ndbtab, SCHEMA_DB_I, tmp_buf, schema->db,
1876                          strlen(schema->db));
1877         r |= op->equal(SCHEMA_DB_I, tmp_buf);
1878         DBUG_ASSERT(r == 0);
1879         /* name */
1880         ndb_pack_varchar(ndbtab, SCHEMA_NAME_I, tmp_buf, schema->name,
1881                          strlen(schema->name));
1882         r |= op->equal(SCHEMA_NAME_I, tmp_buf);
1883         DBUG_ASSERT(r == 0);
1884         /* slock */
1885         r |= op->getValue(SCHEMA_SLOCK_I, (char *)slock.bitmap) == 0;
1886         DBUG_ASSERT(r == 0);
1887 
1888         /* Execute in NDB */
1889         if (trans->execute(NdbTransaction::NoCommit)) goto err;
1890       }
1891 
1892       if (ndb_log_get_verbose_level() > 19) {
1893         // Generate the 'before slock' string
1894         before_slock = ndb_bitmap_to_hex_string(&slock);
1895       }
1896 
1897       bitmap_clear_bit(&slock, own_nodeid());
1898 
1899       if (ndb_log_get_verbose_level() > 19) {
1900         const std::string after_slock = ndb_bitmap_to_hex_string(&slock);
1901         ndb_log_info("reply to %s.%s(%u/%u) from %s to %s", schema->db,
1902                      schema->name, schema->id, schema->version,
1903                      before_slock.c_str(), after_slock.c_str());
1904       }
1905 
1906       {
1907         NdbOperation *op = 0;
1908         int r = 0;
1909 
1910         /* now update the tuple */
1911         r |= (op = trans->getNdbOperation(ndbtab)) == 0;
1912         DBUG_ASSERT(r == 0);
1913         r |= op->updateTuple();
1914         DBUG_ASSERT(r == 0);
1915 
1916         /* db */
1917         ndb_pack_varchar(ndbtab, SCHEMA_DB_I, tmp_buf, schema->db,
1918                          strlen(schema->db));
1919         r |= op->equal(SCHEMA_DB_I, tmp_buf);
1920         DBUG_ASSERT(r == 0);
1921         /* name */
1922         ndb_pack_varchar(ndbtab, SCHEMA_NAME_I, tmp_buf, schema->name,
1923                          strlen(schema->name));
1924         r |= op->equal(SCHEMA_NAME_I, tmp_buf);
1925         DBUG_ASSERT(r == 0);
1926         /* slock */
1927         r |= op->setValue(SCHEMA_SLOCK_I, (char *)slock.bitmap);
1928         DBUG_ASSERT(r == 0);
1929         /* node_id */
1930         // NOTE! Sends own nodeid here instead of nodeid who started schema op
1931         r |= op->setValue(SCHEMA_NODE_ID_I, own_nodeid());
1932         DBUG_ASSERT(r == 0);
1933         /* type */
1934         r |= op->setValue(SCHEMA_TYPE_I, (uint32)SOT_CLEAR_SLOCK);
1935         DBUG_ASSERT(r == 0);
1936       }
1937       if (trans->execute(NdbTransaction::Commit,
1938                          NdbOperation::DefaultAbortOption,
1939                          1 /*force send*/) == 0) {
1940         DBUG_PRINT("info", ("node %d cleared lock on '%s.%s'", own_nodeid(),
1941                             schema->db, schema->name));
1942         (void)ndb->getDictionary()->forceGCPWait(1);
1943         break;
1944       }
1945     err:
1946       const NdbError *this_error =
1947           trans ? &trans->getNdbError() : &ndb->getNdbError();
1948       if (this_error->status == NdbError::TemporaryError &&
1949           !thd_killed(m_thd)) {
1950         if (retries--) {
1951           if (trans) ndb->closeTransaction(trans);
1952           ndb_trans_retry_sleep();
1953           continue;  // retry
1954         }
1955       }
1956       ndb_error = this_error;
1957       break;
1958     }
1959 
1960     if (ndb_error) {
1961       ndb_log_warning(
1962           "Could not release slock on '%s.%s', "
1963           "Error code: %d Message: %s",
1964           schema->db, schema->name, ndb_error->code, ndb_error->message);
1965     }
1966     if (trans) ndb->closeTransaction(trans);
1967     bitmap_free(&slock);
1968     return 0;
1969   }
1970 
1971   /**
1972     @brief Inform the other nodes that schema operation has been completed by
1973     all nodes, this is done by updating the row in the ndb_schema table whith
1974     all bits of the 'slock' column cleared.
1975 
1976     @note this is done to allow the coordinator to control when the schema
1977     operation has completed and also to be backwards compatible with
1978     nodes not upgraded to new protocol
1979 
1980     @param db First part of key, normally used for db name
1981     @param table_name Second part of key, normally used for table name
1982 
1983     @return zero on sucess
1984 
1985   */
ack_schema_op_final(const char * db,const char * table_name) const1986   int ack_schema_op_final(const char *db, const char *table_name) const {
1987     DBUG_TRACE;
1988     Ndb *ndb = m_thd_ndb->ndb;
1989 
1990     // Open ndb_schema table
1991     Ndb_schema_dist_table schema_dist_table(m_thd_ndb);
1992     if (!schema_dist_table.open()) {
1993       // NOTE! Legacy crash unless this was cluster connection failure, there
1994       // are simply no other of way sending error back to coordinator
1995       ndbcluster::ndbrequire(ndb->getDictionary()->getNdbError().code == 4009);
1996       return 1;
1997     }
1998     const NdbDictionary::Table *ndbtab = schema_dist_table.get_table();
1999 
2000     // Pack db and table_name
2001     char db_buf[FN_REFLEN];
2002     char name_buf[FN_REFLEN];
2003     ndb_pack_varchar(ndbtab, SCHEMA_DB_I, db_buf, db, strlen(db));
2004     ndb_pack_varchar(ndbtab, SCHEMA_NAME_I, name_buf, table_name,
2005                      strlen(table_name));
2006 
2007     // Buffer with zeroes for slock
2008     std::vector<char> slock_zeroes;
2009     slock_zeroes.assign(schema_dist_table.get_slock_bytes(), 0);
2010     const char *slock_buf = slock_zeroes.data();
2011 
2012     // Function for updating row in ndb_schema
2013     std::function<const NdbError *(NdbTransaction *)> ack_schema_op_final_func =
2014         [ndbtab, db_buf, name_buf,
2015          slock_buf](NdbTransaction *trans) -> const NdbError * {
2016       DBUG_TRACE;
2017 
2018       NdbOperation *op = trans->getNdbOperation(ndbtab);
2019       if (op == nullptr) return &trans->getNdbError();
2020 
2021       // Update row
2022       if (op->updateTuple() != 0 || op->equal(SCHEMA_NAME_I, name_buf) != 0 ||
2023           op->equal(SCHEMA_DB_I, db_buf) != 0 ||
2024           op->setValue(SCHEMA_SLOCK_I, slock_buf) != 0 ||
2025           op->setValue(SCHEMA_TYPE_I, (uint32)SOT_CLEAR_SLOCK) != 0)
2026         return &op->getNdbError();
2027 
2028       if (trans->execute(NdbTransaction::Commit,
2029                          NdbOperation::DefaultAbortOption,
2030                          1 /*force send*/) != 0)
2031         return &trans->getNdbError();
2032 
2033       return nullptr;
2034     };
2035 
2036     NdbError ndb_err;
2037     if (!ndb_trans_retry(ndb, m_thd, ndb_err, ack_schema_op_final_func)) {
2038       log_NDB_error(ndb_err);
2039       ndb_log_warning("Could not release slock on '%s.%s'", db, table_name);
2040       return 1;
2041     }
2042     ndb_log_verbose(19, "Cleared slock on '%s.%s'", db, table_name);
2043 
2044     (void)ndb->getDictionary()->forceGCPWait(1);
2045 
2046     return 0;
2047   }
2048 
2049   /**
2050     @brief Inform the other nodes that schema operation has been completed by
2051     this node. This is done by writing a new row to the ndb_schema_result table.
2052 
2053     @param schema The schema operation which has just been completed
2054 
2055     @return true if ack suceeds
2056     @return false if ack fails(writing to the table could not be done)
2057 
2058   */
ack_schema_op_with_result(const Ndb_schema_op * schema) const2059   bool ack_schema_op_with_result(const Ndb_schema_op *schema) const {
2060     DBUG_TRACE;
2061 
2062     // Should only call this function if ndb_schema has a schema_op_id
2063     // column which enabled the client to send schema->schema_op_id != 0
2064     ndbcluster::ndbrequire(schema->schema_op_id);
2065 
2066     Ndb *ndb = m_thd_ndb->ndb;
2067 
2068     // Open ndb_schema_result table
2069     Ndb_schema_result_table schema_result_table(m_thd_ndb);
2070     if (!schema_result_table.open()) {
2071       // NOTE! Legacy crash unless this was cluster connection failure, there
2072       // are simply no other of way sending error back to coordinator
2073       ndbcluster::ndbrequire(ndb->getDictionary()->getNdbError().code == 4009);
2074       return false;
2075     }
2076 
2077     const NdbDictionary::Table *ndbtab = schema_result_table.get_table();
2078     const uint32 nodeid = schema->node_id;
2079     const uint32 schema_op_id = schema->schema_op_id;
2080     const uint32 participant_nodeid = own_nodeid();
2081     const uint32 result = m_schema_op_result.result();
2082     char message_buf[255];
2083     schema_result_table.pack_message(m_schema_op_result.message(), message_buf);
2084 
2085     // Function for inserting row with result in ndb_schema_result
2086     std::function<const NdbError *(NdbTransaction *)>
2087         ack_schema_op_with_result_func =
2088             [ndbtab, nodeid, schema_op_id, participant_nodeid, result,
2089              message_buf](NdbTransaction *trans) -> const NdbError * {
2090       DBUG_TRACE;
2091 
2092       NdbOperation *op = trans->getNdbOperation(ndbtab);
2093       if (op == nullptr) return &trans->getNdbError();
2094 
2095       /* Write row */
2096       if (op->insertTuple() != 0 ||
2097           op->equal(Ndb_schema_result_table::COL_NODEID, nodeid) != 0 ||
2098           op->equal(Ndb_schema_result_table::COL_SCHEMA_OP_ID, schema_op_id) !=
2099               0 ||
2100           op->equal(Ndb_schema_result_table::COL_PARTICIPANT_NODEID,
2101                     participant_nodeid) != 0 ||
2102           op->setValue(Ndb_schema_result_table::COL_RESULT, result) != 0 ||
2103           op->setValue(Ndb_schema_result_table::COL_MESSAGE, message_buf) != 0)
2104         return &op->getNdbError();
2105 
2106       if (trans->execute(NdbTransaction::Commit,
2107                          NdbOperation::DefaultAbortOption,
2108                          1 /*force send*/) != 0)
2109         return &trans->getNdbError();
2110 
2111       return nullptr;
2112     };
2113 
2114     NdbError ndb_err;
2115     if (!ndb_trans_retry(ndb, m_thd, ndb_err, ack_schema_op_with_result_func)) {
2116       log_NDB_error(ndb_err);
2117       ndb_log_warning(
2118           "Failed to send result for schema operation involving '%s.%s'",
2119           schema->db, schema->name);
2120       return false;
2121     }
2122 
2123     // Success
2124     ndb_log_verbose(19,
2125                     "Replied to schema operation '%s.%s(%u/%u)', nodeid: %d, "
2126                     "schema_op_id: %d",
2127                     schema->db, schema->name, schema->id, schema->version,
2128                     schema->node_id, schema->schema_op_id);
2129 
2130     return true;
2131   }
2132 
remove_schema_result_rows(uint32 schema_op_id)2133   void remove_schema_result_rows(uint32 schema_op_id) {
2134     Ndb *ndb = m_thd_ndb->ndb;
2135 
2136     // Open ndb_schema_result table
2137     Ndb_schema_result_table schema_result_table(m_thd_ndb);
2138     if (!schema_result_table.open()) {
2139       // NOTE! Legacy crash unless this was cluster connection failure, there
2140       // are simply no other of way sending error back to coordinator
2141       ndbcluster::ndbrequire(ndb->getDictionary()->getNdbError().code == 4009);
2142       return;
2143     }
2144     const NdbDictionary::Table *ndbtab = schema_result_table.get_table();
2145     const uint nodeid = own_nodeid();
2146 
2147     // Function for deleting all rows from ndb_schema_result matching
2148     // the given nodeid and schema operation id
2149     std::function<const NdbError *(NdbTransaction *)>
2150         remove_schema_result_rows_func =
2151             [nodeid, schema_op_id,
2152              ndbtab](NdbTransaction *trans) -> const NdbError * {
2153       DBUG_TRACE;
2154       DBUG_PRINT("enter",
2155                  ("nodeid: %d, schema_op_id: %d", nodeid, schema_op_id));
2156 
2157       NdbScanOperation *scan_op = trans->getNdbScanOperation(ndbtab);
2158       if (scan_op == nullptr) return &trans->getNdbError();
2159 
2160       if (scan_op->readTuples(NdbOperation::LM_Read,
2161                               NdbScanOperation::SF_KeyInfo) != 0)
2162         return &scan_op->getNdbError();
2163 
2164       // Read the columns to compare
2165       uint32 read_node_id, read_schema_op_id;
2166       if (scan_op->getValue(Ndb_schema_result_table::COL_NODEID,
2167                             (char *)&read_node_id) == nullptr ||
2168           scan_op->getValue(Ndb_schema_result_table::COL_SCHEMA_OP_ID,
2169                             (char *)&read_schema_op_id) == nullptr)
2170         return &scan_op->getNdbError();
2171 
2172       // Start the scan
2173       if (trans->execute(NdbTransaction::NoCommit) != 0)
2174         return &trans->getNdbError();
2175 
2176       // Loop through all rows
2177       unsigned deleted = 0;
2178       bool fetch = true;
2179       while (true) {
2180         const int r = scan_op->nextResult(fetch);
2181         if (r < 0) {
2182           // Failed to fetch next row
2183           return &scan_op->getNdbError();
2184         }
2185         fetch = false;  // Don't fetch more until nextResult returns 2
2186 
2187         switch (r) {
2188           case 0:  // Found row
2189             DBUG_PRINT("info", ("Found row"));
2190             // Delete rows if equal to nodeid and schema_op_id
2191             if (read_schema_op_id == schema_op_id && read_node_id == nodeid) {
2192               DBUG_PRINT("info", ("Deleting row"));
2193               if (scan_op->deleteCurrentTuple() != 0) {
2194                 // Failed to delete row
2195                 return &scan_op->getNdbError();
2196               }
2197               deleted++;
2198             }
2199             continue;
2200 
2201           case 1:
2202             DBUG_PRINT("info", ("No more rows"));
2203             // No more rows, commit the transation
2204             if (trans->execute(NdbTransaction::Commit) != 0) {
2205               // Failed to commit
2206               return &trans->getNdbError();
2207             }
2208             return nullptr;
2209 
2210           case 2:
2211             // Need to fetch more rows, first send the deletes
2212             DBUG_PRINT("info", ("Need to fetch more rows"));
2213             if (deleted > 0) {
2214               DBUG_PRINT("info", ("Sending deletes"));
2215               if (trans->execute(NdbTransaction::NoCommit) != 0) {
2216                 // Failed to send
2217                 return &trans->getNdbError();
2218               }
2219             }
2220             fetch = true;  // Fetch more rows
2221             continue;
2222         }
2223       }
2224       // Never reached
2225       ndbcluster::ndbrequire(false);
2226       return nullptr;
2227     };
2228 
2229     NdbError ndb_err;
2230     if (!ndb_trans_retry(ndb, m_thd, ndb_err, remove_schema_result_rows_func)) {
2231       log_NDB_error(ndb_err);
2232       ndb_log_error("Failed to remove rows from ndb_schema_result");
2233       return;
2234     }
2235     ndb_log_verbose(19,
2236                     "Deleted all rows from ndb_schema_result, nodeid: %d, "
2237                     "schema_op_id: %d",
2238                     nodeid, schema_op_id);
2239     return;
2240   }
2241 
check_wakeup_clients(Ndb_schema_dist::Schema_op_result_code result,const char * message) const2242   void check_wakeup_clients(Ndb_schema_dist::Schema_op_result_code result,
2243                             const char *message) const {
2244     // Build list of current subscribers
2245     std::unordered_set<uint32> subscribers;
2246     m_schema_dist_data.get_subscriber_list(subscribers);
2247 
2248     // Check all NDB_SCHEMA_OBJECTS for wakeup
2249     std::vector<uint32> schema_op_ids;
2250     NDB_SCHEMA_OBJECT::get_schema_op_ids(schema_op_ids);
2251     for (auto schema_op_id : schema_op_ids) {
2252       // Lookup NDB_SCHEMA_OBJECT from nodeid + schema_op_id
2253       std::unique_ptr<NDB_SCHEMA_OBJECT, decltype(&NDB_SCHEMA_OBJECT::release)>
2254           schema_object(NDB_SCHEMA_OBJECT::get(own_nodeid(), schema_op_id),
2255                         NDB_SCHEMA_OBJECT::release);
2256       if (schema_object == nullptr) {
2257         // The schema operation has already completed on this node
2258         continue;
2259       }
2260 
2261       const bool completed = schema_object->check_for_failed_subscribers(
2262           subscribers, result, message);
2263       if (completed) {
2264         // All participants have completed(or failed) -> send final ack
2265         ack_schema_op_final(schema_object->db(), schema_object->name());
2266       }
2267     }
2268   }
2269 
check_is_ndb_schema_event(const Ndb_event_data * event_data) const2270   bool check_is_ndb_schema_event(const Ndb_event_data *event_data) const {
2271     if (!event_data) {
2272       // Received event without event data pointer
2273       assert(false);
2274       return false;
2275     }
2276 
2277     NDB_SHARE *share = event_data->share;
2278     if (!share) {
2279       // Received event where the event_data is not properly initialized
2280       assert(false);
2281       return false;
2282     }
2283     assert(event_data->shadow_table);
2284     assert(event_data->ndb_value[0]);
2285     assert(event_data->ndb_value[1]);
2286     assert(Ndb_schema_dist_client::is_schema_dist_table(share->db,
2287                                                         share->table_name));
2288     return true;
2289   }
2290 
handle_after_epoch(const Ndb_schema_op * schema)2291   void handle_after_epoch(const Ndb_schema_op *schema) {
2292     DBUG_TRACE;
2293     DBUG_PRINT("info", ("Pushing Ndb_schema_op on list to be "
2294                         "handled after epoch"));
2295     assert(!is_post_epoch());  // Only before epoch
2296     m_post_epoch_handle_list.push_back(schema, m_mem_root);
2297   }
2298 
own_nodeid(void) const2299   uint own_nodeid(void) const { return m_own_nodeid; }
2300 
ndbapi_invalidate_table(const char * db_name,const char * table_name) const2301   void ndbapi_invalidate_table(const char *db_name,
2302                                const char *table_name) const {
2303     DBUG_TRACE;
2304     Ndb_table_guard ndbtab_g(m_thd_ndb->ndb, db_name, table_name);
2305     ndbtab_g.invalidate();
2306   }
2307 
acquire_reference(const char * db,const char * name,const char * reference) const2308   NDB_SHARE *acquire_reference(const char *db, const char *name,
2309                                const char *reference) const {
2310     DBUG_TRACE;
2311     DBUG_PRINT("enter", ("db: '%s', name: '%s'", db, name));
2312 
2313     char key[FN_REFLEN + 1];
2314     build_table_filename(key, sizeof(key) - 1, db, name, "", 0);
2315     NDB_SHARE *share = NDB_SHARE::acquire_reference_by_key(key, reference);
2316     return share;
2317   }
2318 
has_shadow_table(Ndb_dd_client & dd_client,const char * schema_name,const char * table_name) const2319   bool has_shadow_table(Ndb_dd_client &dd_client, const char *schema_name,
2320                         const char *table_name) const {
2321     dd::String_type engine;
2322     if (dd_client.get_engine(schema_name, table_name, &engine) &&
2323         engine != "ndbcluster") {
2324       ndb_log_warning(
2325           "Local table '%s.%s' in engine = '%s' shadows the NDB table",
2326           schema_name, table_name, engine.c_str());
2327       return true;
2328     }
2329     return false;
2330   }
2331 
install_table_in_dd(Ndb_dd_client & dd_client,const char * schema_name,const char * table_name,dd::sdi_t sdi,int table_id,int table_version,size_t num_partitions,const std::string & tablespace_name,bool force_overwrite,bool invalidate_referenced_tables) const2332   bool install_table_in_dd(Ndb_dd_client &dd_client, const char *schema_name,
2333                            const char *table_name, dd::sdi_t sdi, int table_id,
2334                            int table_version, size_t num_partitions,
2335                            const std::string &tablespace_name,
2336                            bool force_overwrite,
2337                            bool invalidate_referenced_tables) const {
2338     DBUG_TRACE;
2339 
2340     // First acquire exclusive MDL lock on schema and table
2341     if (!dd_client.mdl_locks_acquire_exclusive(schema_name, table_name)) {
2342       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2343       ndb_log_error(
2344           "Failed to acquire exclusive metadata lock for table '%s.%s'",
2345           schema_name, table_name);
2346       return false;
2347     }
2348 
2349     // Check if there is existing table in DD which is not a NDB table, in such
2350     // case refuse to overwrite the "shadow table"
2351     if (has_shadow_table(dd_client, schema_name, table_name)) return false;
2352 
2353     if (!tablespace_name.empty()) {
2354       // Acquire IX MDL on tablespace
2355       if (!dd_client.mdl_lock_tablespace(tablespace_name.c_str(), true)) {
2356         log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2357         ndb_log_error("Failed to acquire lock on tablespace '%s' for '%s.%s'",
2358                       tablespace_name.c_str(), schema_name, table_name);
2359         return false;
2360       }
2361     }
2362 
2363     Ndb_referenced_tables_invalidator invalidator(m_thd, dd_client);
2364     if (!dd_client.install_table(
2365             schema_name, table_name, sdi, table_id, table_version,
2366             num_partitions, tablespace_name, force_overwrite,
2367             (invalidate_referenced_tables ? &invalidator : nullptr))) {
2368       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2369       ndb_log_error("Failed to install table '%s.%s' in DD", schema_name,
2370                     table_name);
2371       return false;
2372     }
2373 
2374     if (invalidate_referenced_tables && !invalidator.invalidate()) {
2375       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2376       ndb_log_error("Failed to invalidate referenced tables for '%s.%s'",
2377                     schema_name, table_name);
2378       return false;
2379     }
2380     dd_client.commit();
2381     return true;
2382   }
2383 
create_table_from_engine(const char * schema_name,const char * table_name,bool force_overwrite,bool invalidate_referenced_tables=false) const2384   bool create_table_from_engine(
2385       const char *schema_name, const char *table_name, bool force_overwrite,
2386       bool invalidate_referenced_tables = false) const {
2387     DBUG_TRACE;
2388     DBUG_PRINT("enter",
2389                ("schema_name: %s, table_name: %s", schema_name, table_name));
2390 
2391     Ndb *ndb = m_thd_ndb->ndb;
2392     Ndb_table_guard ndbtab_g(ndb, schema_name, table_name);
2393     const NDBTAB *ndbtab = ndbtab_g.get_table();
2394     if (!ndbtab) {
2395       // Could not open the table from NDB, very unusual
2396       log_NDB_error(ndb->getDictionary()->getNdbError());
2397       ndb_log_error("Failed to open table '%s.%s' from NDB", schema_name,
2398                     table_name);
2399       return false;
2400     }
2401 
2402     const std::string tablespace_name =
2403         ndb_table_tablespace_name(ndb->getDictionary(), ndbtab);
2404 
2405     std::string serialized_metadata;
2406     if (!ndb_table_get_serialized_metadata(ndbtab, serialized_metadata)) {
2407       ndb_log_error("Failed to get serialized metadata for table '%s.%s'",
2408                     schema_name, table_name);
2409       return false;
2410     }
2411 
2412     Ndb_dd_client dd_client(m_thd);
2413 
2414     // Deserialize the metadata from NDB, this is done like this in order to
2415     // allow the table to be setup for binlogging independently of whether it
2416     // works to install it into DD.
2417     Ndb_dd_table dd_table(m_thd);
2418     const dd::sdi_t sdi = serialized_metadata.c_str();
2419     if (!dd_client.deserialize_table(sdi, dd_table.get_table_def())) {
2420       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2421       ndb_log_error("Failed to deserialize metadata for table '%s.%s'",
2422                     schema_name, table_name);
2423       return false;
2424     }
2425 
2426     // Setup binlogging for this table. In many cases the NDB_SHARE, the
2427     // event and event subscriptions are already created/setup, but this
2428     // function is called anyway in order to create/setup any missing parts.
2429     if (ndbcluster_binlog_setup_table(m_thd, ndb, schema_name, table_name,
2430                                       dd_table.get_table_def())) {
2431       // Error information has been logged AND pushed -> clear warnings
2432       clear_thd_conditions(m_thd);
2433       ndb_log_error("Failed to setup binlogging for table '%s.%s'", schema_name,
2434                     table_name);
2435       return false;
2436     }
2437 
2438     // Install the table definition in DD
2439     // NOTE! This is done after create/setup the NDB_SHARE to avoid that
2440     // server tries to open the table before the NDB_SHARE has been created
2441     if (!install_table_in_dd(dd_client, schema_name, table_name, sdi,
2442                              ndbtab->getObjectId(), ndbtab->getObjectVersion(),
2443                              ndbtab->getPartitionCount(), tablespace_name,
2444                              force_overwrite, invalidate_referenced_tables)) {
2445       ndb_log_warning("Failed to update table definition in DD");
2446       return false;
2447     }
2448 
2449     return true;
2450   }
2451 
handle_clear_slock(const Ndb_schema_op * schema)2452   void handle_clear_slock(const Ndb_schema_op *schema) {
2453     DBUG_TRACE;
2454 
2455     assert(is_post_epoch());
2456 
2457     if (DBUG_EVALUATE_IF("ndb_binlog_random_tableid", true, false)) {
2458       // Try to create a race between SLOCK acks handled after another
2459       // schema operation on same object could have been started.
2460 
2461       // Get temporary NDB_SCHEMA_OBJECT, sleep if one does not exist
2462       std::unique_ptr<NDB_SCHEMA_OBJECT, decltype(&NDB_SCHEMA_OBJECT::release)>
2463           tmp_ndb_schema_obj(
2464               NDB_SCHEMA_OBJECT::get(schema->db, schema->name, schema->id,
2465                                      schema->version),
2466               NDB_SCHEMA_OBJECT::release);
2467       if (tmp_ndb_schema_obj == nullptr) {
2468         ndb_milli_sleep(10);
2469       }
2470     }
2471 
2472     // Get NDB_SCHEMA_OBJECT
2473     std::unique_ptr<NDB_SCHEMA_OBJECT, decltype(&NDB_SCHEMA_OBJECT::release)>
2474         ndb_schema_object(NDB_SCHEMA_OBJECT::get(schema->db, schema->name,
2475                                                  schema->id, schema->version),
2476                           NDB_SCHEMA_OBJECT::release);
2477 
2478     if (!ndb_schema_object) {
2479       // NOTE! When participants ack they send their own nodeid instead of the
2480       // nodeid of node who initiated the schema operation. This makes it
2481       // impossible to do special checks for the coordinator here. Assume that
2482       // since no NDB_SCHEMA_OBJECT was found, this node is not the coordinator
2483       // and the ack can be safely ignored.
2484       return;
2485     }
2486 
2487     // Handle ack sent from node using old protocol, all nodes cleared
2488     // in the slock column have completed(it's not enough to use only nodeid
2489     // since events are merged)
2490     if (bitmap_bits_set(&schema->slock) > 0) {
2491       ndb_log_verbose(19, "Coordinator, handle old protocol ack from node: %d",
2492                       schema->node_id);
2493 
2494       std::unordered_set<uint32> cleared_nodes;
2495       for (uint i = 0; i < schema->slock.n_bits; i++) {
2496         if (!bitmap_is_set(&schema->slock, i)) {
2497           // Node is not set in bitmap
2498           cleared_nodes.insert(i);
2499         }
2500       }
2501       ndb_schema_object->result_received_from_nodes(cleared_nodes);
2502 
2503       if (ndb_schema_object->check_all_participants_completed()) {
2504         // All participants have completed(or failed) -> send final ack
2505         ack_schema_op_final(ndb_schema_object->db(), ndb_schema_object->name());
2506         return;
2507       }
2508 
2509       return;
2510     }
2511 
2512     // Check if all coordinator completed and wake up client
2513     const bool coordinator_completed =
2514         ndb_schema_object->check_coordinator_completed();
2515 
2516     if (coordinator_completed) {
2517       remove_schema_result_rows(ndb_schema_object->schema_op_id());
2518 
2519       // Remove active schema operation from coordinator
2520       m_schema_dist_data.remove_active_schema_op(ndb_schema_object.get());
2521     }
2522 
2523     /**
2524      * There is a possible race condition between this binlog-thread,
2525      * which has not yet released its schema_object, and the
2526      * coordinator which possibly release its reference
2527      * to the same schema_object when signaled above.
2528      *
2529      * If the coordinator then starts yet another schema operation
2530      * on the same schema / table, it will need a schema_object with
2531      * the same key as the one already completed, and which this
2532      * thread still referrs. Thus, it will get this schema_object,
2533      * instead of creating a new one as normally expected.
2534      */
2535     if (DBUG_EVALUATE_IF("ndb_binlog_schema_object_race", true, false)) {
2536       ndb_milli_sleep(10);
2537     }
2538   }
2539 
handle_offline_alter_table_commit(const Ndb_schema_op * schema)2540   void handle_offline_alter_table_commit(const Ndb_schema_op *schema) {
2541     DBUG_TRACE;
2542 
2543     assert(is_post_epoch());  // Always after epoch
2544 
2545     if (schema->node_id == own_nodeid()) return;
2546 
2547     write_schema_op_to_binlog(m_thd, schema);
2548     ndbapi_invalidate_table(schema->db, schema->name);
2549     ndb_tdc_close_cached_table(m_thd, schema->db, schema->name);
2550 
2551     NDB_SHARE *share =
2552         acquire_reference(schema->db, schema->name,
2553                           "offline_alter_table_commit");  // Temp ref.
2554     if (share) {
2555       mysql_mutex_lock(&share->mutex);
2556       if (share->op) {
2557         const Ndb_event_data *event_data =
2558             static_cast<const Ndb_event_data *>(share->op->getCustomData());
2559         Ndb_event_data::destroy(event_data);
2560         share->op->setCustomData(NULL);
2561         {
2562           Mutex_guard injector_mutex_g(injector_event_mutex);
2563           injector_ndb->dropEventOperation(share->op);
2564         }
2565         share->op = 0;
2566         NDB_SHARE::release_reference(share, "binlog");
2567       }
2568       mysql_mutex_unlock(&share->mutex);
2569 
2570       mysql_mutex_lock(&ndbcluster_mutex);
2571       NDB_SHARE::mark_share_dropped(&share);
2572       NDB_SHARE::release_reference_have_lock(share,
2573                                              "offline_alter_table_commit");
2574       // If this was the last share ref, it is now deleted. If there are more
2575       // references, the share will remain in the list of dropped until
2576       // remaining references are released.
2577       mysql_mutex_unlock(&ndbcluster_mutex);
2578     }
2579 
2580     // Install table from NDB, overwrite the existing table
2581     if (!create_table_from_engine(schema->db, schema->name,
2582                                   true /* force_overwrite */,
2583                                   true /* invalidate_referenced_tables */)) {
2584       ndb_log_error("Distribution of ALTER TABLE '%s.%s' failed", schema->db,
2585                     schema->name);
2586       m_schema_op_result.set_result(
2587           Ndb_schema_dist::SCHEMA_OP_FAILURE,
2588           "Distribution of ALTER TABLE " + std::string(1, '\'') +
2589               std::string(schema->name) + std::string(1, '\'') + " failed");
2590     }
2591   }
2592 
handle_online_alter_table_prepare(const Ndb_schema_op * schema)2593   void handle_online_alter_table_prepare(const Ndb_schema_op *schema) {
2594     assert(is_post_epoch());  // Always after epoch
2595 
2596     ndbapi_invalidate_table(schema->db, schema->name);
2597     ndb_tdc_close_cached_table(m_thd, schema->db, schema->name);
2598 
2599     if (schema->node_id == own_nodeid()) {
2600       // Special case for schema dist participant in own node!
2601       // The schema dist client has exclusive MDL lock and thus
2602       // the schema dist participant(this code) on the same mysqld
2603       // can't open the table def from the DD, trying to acquire
2604       // another MDL lock will just block. Instead(since this is in
2605       // the same mysqld) it provides the new table def via a
2606       // pointer in the NDB_SHARE.
2607       NDB_SHARE *share =
2608           acquire_reference(schema->db, schema->name,
2609                             "online_alter_table_prepare");  // temporary ref.
2610 
2611       const dd::Table *new_table_def =
2612           static_cast<const dd::Table *>(share->inplace_alter_new_table_def);
2613       DBUG_ASSERT(new_table_def);
2614 
2615       // Create a new Ndb_event_data which will be used when creating
2616       // the new NdbEventOperation
2617       Ndb_event_data *event_data = Ndb_event_data::create_event_data(
2618           m_thd, share, share->db, share->table_name, share->key_string(),
2619           injector_thd, new_table_def);
2620       if (!event_data) {
2621         ndb_log_error("NDB Binlog: Failed to create event data for table %s.%s",
2622                       schema->db, schema->name);
2623         DBUG_ASSERT(false);
2624         // NOTE! Should abort the alter from here
2625       }
2626 
2627       // Release old prepared event_data, this is rare but will happen
2628       // when an inplace alter table fails between prepare and commit phase
2629       const Ndb_event_data *old_event_data =
2630           m_schema_dist_data.get_inplace_alter_event_data();
2631       if (old_event_data) {
2632         Ndb_event_data::destroy(old_event_data);
2633         m_schema_dist_data.save_inplace_alter_event_data(nullptr);
2634       }
2635 
2636       // Save the new event_data
2637       m_schema_dist_data.save_inplace_alter_event_data(event_data);
2638 
2639       NDB_SHARE::release_reference(share,
2640                                    "online_alter_table_prepare");  // temp ref.
2641     } else {
2642       write_schema_op_to_binlog(m_thd, schema);
2643 
2644       // Install table from NDB, overwrite the altered table.
2645       // NOTE! it will also try to setup binlogging but since the share
2646       // has a op assigned, that part will be skipped
2647       if (!create_table_from_engine(schema->db, schema->name,
2648                                     true /* force_overwrite */,
2649                                     true /* invalidate_referenced_tables */)) {
2650         ndb_log_error("Distribution of ALTER TABLE '%s.%s' failed", schema->db,
2651                       schema->name);
2652         m_schema_op_result.set_result(
2653             Ndb_schema_dist::SCHEMA_OP_FAILURE,
2654             "Distribution of ALTER TABLE " + std::string(1, '\'') +
2655                 std::string(schema->name) + std::string(1, '\'') + " failed");
2656       }
2657 
2658       // Check that no event_data have been prepared yet(that is only
2659       // done on participant in same node)
2660       DBUG_ASSERT(m_schema_dist_data.get_inplace_alter_event_data() == nullptr);
2661     }
2662   }
2663 
remote_participant_inplace_alter_create_event_data(NDB_SHARE * share,const char * schema_name,const char * table_name) const2664   const Ndb_event_data *remote_participant_inplace_alter_create_event_data(
2665       NDB_SHARE *share, const char *schema_name, const char *table_name) const {
2666     DBUG_TRACE;
2667 
2668     // Read table definition from NDB, it might not exist in DD on this Server
2669     Ndb *ndb = m_thd_ndb->ndb;
2670     Ndb_table_guard ndbtab_g(ndb, schema_name, table_name);
2671     const NDBTAB *ndbtab = ndbtab_g.get_table();
2672     if (!ndbtab) {
2673       // Could not open the table from NDB, very unusual
2674       log_NDB_error(ndb->getDictionary()->getNdbError());
2675       ndb_log_error("Failed to open table '%s.%s' from NDB", schema_name,
2676                     table_name);
2677       return nullptr;
2678     }
2679 
2680     std::string serialized_metadata;
2681     if (!ndb_table_get_serialized_metadata(ndbtab, serialized_metadata)) {
2682       ndb_log_error("Failed to get serialized metadata for table '%s.%s'",
2683                     schema_name, table_name);
2684       return nullptr;
2685     }
2686 
2687     // Deserialize the metadata from NDB
2688     Ndb_dd_client dd_client(m_thd);
2689     Ndb_dd_table dd_table(m_thd);
2690     const dd::sdi_t sdi = serialized_metadata.c_str();
2691     if (!dd_client.deserialize_table(sdi, dd_table.get_table_def())) {
2692       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2693       ndb_log_error("Failed to deserialize metadata for table '%s.%s'",
2694                     schema_name, table_name);
2695       return nullptr;
2696     }
2697 
2698     // Create new event_data
2699     Ndb_event_data *event_data = Ndb_event_data::create_event_data(
2700         m_thd, share, schema_name, table_name, share->key_string(),
2701         injector_thd, dd_table.get_table_def());
2702     if (!event_data) {
2703       ndb_log_error("NDB Binlog: Failed to create event data for table '%s.%s'",
2704                     share->db, share->table_name);
2705       return nullptr;
2706     }
2707 
2708     return event_data;
2709   }
2710 
handle_online_alter_table_commit(const Ndb_schema_op * schema)2711   void handle_online_alter_table_commit(const Ndb_schema_op *schema) {
2712     assert(is_post_epoch());  // Always after epoch
2713 
2714     NDB_SHARE *share =
2715         acquire_reference(schema->db, schema->name,
2716                           "online_alter_table_commit");  // temporary ref.
2717     if (share) {
2718       ndb_log_verbose(9, "NDB Binlog: handling online alter/rename");
2719 
2720       mysql_mutex_lock(&share->mutex);
2721 
2722       const Ndb_event_data *event_data;
2723       if (schema->node_id == own_nodeid()) {
2724         // Get the event_data which has been created during prepare phase
2725         event_data = m_schema_dist_data.get_inplace_alter_event_data();
2726         if (!event_data) {
2727           ndb_log_error("Failed to get prepared event data '%s'",
2728                         share->key_string());
2729           DBUG_ASSERT(false);
2730         }
2731         // The event_data pointer has been taken over
2732         m_schema_dist_data.save_inplace_alter_event_data(nullptr);
2733       } else {
2734         // Create Ndb_event_data which will be used when creating
2735         // the new NdbEventOperation.
2736         event_data = remote_participant_inplace_alter_create_event_data(
2737             share, share->db, share->table_name);
2738         if (!event_data) {
2739           ndb_log_error("Failed to create event data for table '%s'",
2740                         share->key_string());
2741           DBUG_ASSERT(false);
2742         }
2743       }
2744       DBUG_ASSERT(event_data);
2745 
2746       NdbEventOperation *new_op = nullptr;
2747       if (share->op && event_data /* safety */) {
2748         Ndb_binlog_client binlog_client(m_thd, schema->db, schema->name);
2749         // The table have an event operation setup and during an inplace
2750         // alter table that need to be recreated for the new table layout.
2751         // NOTE! Nothing has changed here regarding whether or not the
2752         // table should still have event operation, i.e if it had
2753         // it before, it should still have it after the alter. But
2754         // for consistency, check that table should have event op
2755         DBUG_ASSERT(binlog_client.table_should_have_event_op(share));
2756 
2757         // Save the current event operation since create_event_op()
2758         // will assign the new in "share->op", also release the "binlog"
2759         // reference as it will be acquired again in create_event_op()
2760         // NOTE! This should probably be rewritten to not assign share->op and
2761         // acquire the reference in create_event_op()
2762         NdbEventOperation *const curr_op = share->op;
2763         share->op = nullptr;
2764         NDB_SHARE::release_reference(share, "binlog");
2765 
2766         // Get table from NDB
2767         Ndb_table_guard ndbtab_g(m_thd_ndb->ndb, schema->db, schema->name);
2768         const NDBTAB *ndbtab = ndbtab_g.get_table();
2769 
2770         DBUG_ASSERT(ndbtab != nullptr);
2771 
2772         // Create new NdbEventOperation
2773         if (binlog_client.create_event_op(share, ndbtab, event_data)) {
2774           ndb_log_error("Failed to create event operation for table '%s'",
2775                         share->key_string());
2776 
2777           // NOTE! Should fail the alter here
2778           DBUG_ASSERT(false);
2779         } else {
2780           // Get the newly created NdbEventOperation, will be swapped
2781           // into place (again) later
2782           new_op = share->op;
2783         }
2784 
2785         // Reinstall the current NdbEventOperation
2786         share->op = curr_op;
2787       } else {
2788         // New event_data was created(that's the default) but the table didn't
2789         // have event operations and thus the event_data is unused, free it
2790         Ndb_event_data::destroy(event_data);
2791       }
2792 
2793       ndb_log_verbose(9, "NDB Binlog: handling online alter/rename done");
2794 
2795       // There should be no event_data left in m_schema_dist_data at this point
2796       DBUG_ASSERT(m_schema_dist_data.get_inplace_alter_event_data() == nullptr);
2797 
2798       // Start using the new event operation and release the old
2799       if (share->op && new_op) {
2800         // Delete old event_data
2801         const Ndb_event_data *event_data =
2802             static_cast<const Ndb_event_data *>(share->op->getCustomData());
2803         share->op->setCustomData(NULL);
2804         Ndb_event_data::destroy(event_data);
2805 
2806         // Drop old event operation
2807         {
2808           Mutex_guard injector_mutex_g(injector_event_mutex);
2809           injector_ndb->dropEventOperation(share->op);
2810         }
2811         // Install new event operation
2812         share->op = new_op;
2813       }
2814       mysql_mutex_unlock(&share->mutex);
2815 
2816       NDB_SHARE::release_reference(share,
2817                                    "online_alter_table_commit");  // temp ref.
2818     }
2819 
2820     DBUG_ASSERT(m_schema_dist_data.get_inplace_alter_event_data() == nullptr);
2821   }
2822 
remove_table_from_dd(const char * schema_name,const char * table_name)2823   bool remove_table_from_dd(const char *schema_name, const char *table_name) {
2824     DBUG_TRACE;
2825 
2826     Ndb_dd_client dd_client(m_thd);
2827     Ndb_referenced_tables_invalidator invalidator(m_thd, dd_client);
2828 
2829     if (!dd_client.mdl_locks_acquire_exclusive(schema_name, table_name)) {
2830       log_and_clear_thd_conditions(m_thd, condition_logging_level::WARNING);
2831       ndb_log_warning("Failed to acquire exclusive metadata lock on '%s.%s'",
2832                       schema_name, table_name);
2833       return false;
2834     }
2835 
2836     // Check if there is existing table in DD which is not a NDB table, in such
2837     // case refuse to remove the "shadow table"
2838     if (has_shadow_table(dd_client, schema_name, table_name)) return false;
2839 
2840     if (!dd_client.remove_table(schema_name, table_name, &invalidator)) {
2841       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2842       ndb_log_error("Failed to remove table '%s.%s' from DD", schema_name,
2843                     table_name);
2844       return false;
2845     }
2846 
2847     if (!invalidator.invalidate()) {
2848       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2849       ndb_log_error("Failed to invalidate referenced tables for '%s.%s'",
2850                     schema_name, table_name);
2851       return false;
2852     }
2853 
2854     dd_client.commit();
2855     return true;
2856   }
2857 
handle_drop_table(const Ndb_schema_op * schema)2858   void handle_drop_table(const Ndb_schema_op *schema) {
2859     DBUG_TRACE;
2860 
2861     assert(is_post_epoch());  // Always after epoch
2862 
2863     if (schema->node_id == own_nodeid()) return;
2864 
2865     write_schema_op_to_binlog(m_thd, schema);
2866 
2867     // Participant never takes GSL
2868     assert(m_thd_ndb->check_option(Thd_ndb::IS_SCHEMA_DIST_PARTICIPANT));
2869 
2870     if (!remove_table_from_dd(schema->db, schema->name)) {
2871       // The table couldn't be removed, continue to invalidate the table in
2872       // NdbApi, close cached tables etc. This case may happen when a MySQL
2873       // Server drops a "shadow" table and afterwards someone drops also the
2874       // table with same name in NDB
2875       ndb_log_warning(
2876           "Failed to remove table definition from DD, continue anyway...");
2877       m_schema_op_result.set_result(
2878           Ndb_schema_dist::SCHEMA_OP_FAILURE,
2879           "Distribution of DROP TABLE " + std::string(1, '\'') +
2880               std::string(schema->name) + std::string(1, '\'') + " failed");
2881     }
2882 
2883     NDB_SHARE *share = acquire_reference(schema->db, schema->name,
2884                                          "drop_table");  // temporary ref.
2885     if (!share || !share->op) {
2886       ndbapi_invalidate_table(schema->db, schema->name);
2887       ndb_tdc_close_cached_table(m_thd, schema->db, schema->name);
2888     }
2889     if (share) {
2890       mysql_mutex_lock(&ndbcluster_mutex);
2891       NDB_SHARE::mark_share_dropped(&share);  // server ref.
2892       DBUG_ASSERT(share);                     // Should still be ref'ed
2893       NDB_SHARE::release_reference_have_lock(share,
2894                                              "drop_table");  // temporary ref.
2895       mysql_mutex_unlock(&ndbcluster_mutex);
2896     }
2897 
2898     ndbapi_invalidate_table(schema->db, schema->name);
2899     ndb_tdc_close_cached_table(m_thd, schema->db, schema->name);
2900   }
2901 
2902   /*
2903     The RENAME is performed in two steps.
2904     1) PREPARE_RENAME - sends the new table key to participants
2905     2) RENAME - perform the actual rename
2906   */
2907 
handle_rename_table_prepare(const Ndb_schema_op * schema)2908   void handle_rename_table_prepare(const Ndb_schema_op *schema) {
2909     DBUG_TRACE;
2910 
2911     assert(is_post_epoch());  // Always after epoch
2912 
2913     if (schema->node_id == own_nodeid()) return;
2914 
2915     const char *new_key_for_table = schema->query;
2916     DBUG_PRINT("info", ("new_key_for_table: '%s'", new_key_for_table));
2917 
2918     // Release potentially previously prepared new_key
2919     {
2920       NDB_SHARE_KEY *old_prepared_key =
2921           m_schema_dist_data.get_prepared_rename_key();
2922       if (old_prepared_key) NDB_SHARE::free_key(old_prepared_key);
2923     }
2924 
2925     // Create a new key save it, then hope for the best(i.e
2926     // that it can be found later when the RENAME arrives)
2927     NDB_SHARE_KEY *new_prepared_key = NDB_SHARE::create_key(new_key_for_table);
2928     m_schema_dist_data.save_prepared_rename_key(new_prepared_key);
2929   }
2930 
rename_table_in_dd(const char * schema_name,const char * table_name,const char * new_schema_name,const char * new_table_name,const NdbDictionary::Table * ndbtab,const std::string & tablespace_name) const2931   bool rename_table_in_dd(const char *schema_name, const char *table_name,
2932                           const char *new_schema_name,
2933                           const char *new_table_name,
2934                           const NdbDictionary::Table *ndbtab,
2935                           const std::string &tablespace_name) const {
2936     DBUG_TRACE;
2937 
2938     Ndb_dd_client dd_client(m_thd);
2939 
2940     // Acquire exclusive MDL lock on the table
2941     if (!dd_client.mdl_locks_acquire_exclusive(schema_name, table_name)) {
2942       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2943       ndb_log_error("Failed to acquire exclusive metadata lock on '%s.%s'",
2944                     schema_name, table_name);
2945       return false;
2946     }
2947 
2948     // Acquire exclusive MDL lock also on the new table name
2949     if (!dd_client.mdl_locks_acquire_exclusive(new_schema_name,
2950                                                new_table_name)) {
2951       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2952       ndb_log_error(
2953           "Failed to acquire exclusive metadata lock on new table name '%s.%s'",
2954           new_schema_name, new_table_name);
2955       return false;
2956     }
2957 
2958     if (has_shadow_table(dd_client, schema_name, table_name)) {
2959       // The renamed table was a "shadow table".
2960 
2961       if (has_shadow_table(dd_client, new_schema_name, new_table_name)) {
2962         // The new table name is also a "shadow table", nothing todo
2963         return false;
2964       }
2965 
2966       // Install the renamed table into DD
2967       std::string serialized_metadata;
2968       if (!ndb_table_get_serialized_metadata(ndbtab, serialized_metadata)) {
2969         ndb_log_error("Failed to get serialized metadata for table '%s.%s'",
2970                       new_schema_name, new_table_name);
2971         return false;
2972       }
2973 
2974       // Deserialize the metadata from NDB
2975       Ndb_dd_table dd_table(m_thd);
2976       const dd::sdi_t sdi = serialized_metadata.c_str();
2977       if (!dd_client.deserialize_table(sdi, dd_table.get_table_def())) {
2978         log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2979         ndb_log_error("Failed to deserialized metadata for table '%s.%s'",
2980                       new_schema_name, new_table_name);
2981         return false;
2982       }
2983 
2984       if (!dd_client.install_table(
2985               new_schema_name, new_table_name, sdi, ndbtab->getObjectId(),
2986               ndbtab->getObjectVersion(), ndbtab->getPartitionCount(),
2987               tablespace_name, true, nullptr)) {
2988         log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2989         ndb_log_error("Failed to install renamed table '%s.%s' in DD",
2990                       new_schema_name, new_table_name);
2991         return false;
2992       }
2993 
2994       dd_client.commit();
2995       return true;
2996     }
2997 
2998     Ndb_referenced_tables_invalidator invalidator(m_thd, dd_client);
2999 
3000     if (has_shadow_table(dd_client, new_schema_name, new_table_name)) {
3001       // There is a "shadow table", remove the table from DD
3002       ndb_log_warning(
3003           "Removing the renamed table '%s.%s' from DD, there is a local table",
3004           schema_name, table_name);
3005       if (!dd_client.remove_table(schema_name, table_name, &invalidator)) {
3006         log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3007         ndb_log_error("Failed to remove the renamed table '%s.%s' from DD",
3008                       schema_name, table_name);
3009         return false;
3010       }
3011     } else {
3012       // There are no "shadow table", rename table in DD
3013       if (!dd_client.rename_table(schema_name, table_name, new_schema_name,
3014                                   new_table_name, ndbtab->getObjectId(),
3015                                   ndbtab->getObjectVersion(), &invalidator)) {
3016         log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3017         ndb_log_error("Failed to rename table '%s.%s' to '%s.%s", schema_name,
3018                       table_name, new_schema_name, new_table_name);
3019         return false;
3020       }
3021     }
3022 
3023     if (!invalidator.invalidate()) {
3024       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3025       ndb_log_error("Failed to invalidate referenced tables for '%s.%s'",
3026                     schema_name, table_name);
3027       return false;
3028     }
3029 
3030     dd_client.commit();
3031     return true;
3032   }
3033 
handle_rename_table(const Ndb_schema_op * schema)3034   void handle_rename_table(const Ndb_schema_op *schema) {
3035     DBUG_TRACE;
3036 
3037     assert(is_post_epoch());  // Always after epoch
3038 
3039     if (schema->node_id == own_nodeid()) return;
3040 
3041     write_schema_op_to_binlog(m_thd, schema);
3042 
3043     // Participant never takes GSL
3044     assert(m_thd_ndb->check_option(Thd_ndb::IS_SCHEMA_DIST_PARTICIPANT));
3045 
3046     NDB_SHARE *share = acquire_reference(schema->db, schema->name,
3047                                          "rename_table");  // temporary ref.
3048     if (!share || !share->op) {
3049       ndbapi_invalidate_table(schema->db, schema->name);
3050       ndb_tdc_close_cached_table(m_thd, schema->db, schema->name);
3051     }
3052     if (share)
3053       NDB_SHARE::release_reference(share, "rename_table");  // temporary ref.
3054 
3055     share = acquire_reference(schema->db, schema->name,
3056                               "rename_table");  // temporary ref.
3057     if (!share) {
3058       // The RENAME need to find share so it can be renamed
3059       DBUG_ASSERT(share);
3060       return;
3061     }
3062 
3063     NDB_SHARE_KEY *prepared_key = m_schema_dist_data.get_prepared_rename_key();
3064     if (!prepared_key) {
3065       // The rename need to have new_key set
3066       // by a previous RENAME_PREPARE
3067       DBUG_ASSERT(prepared_key);
3068       return;
3069     }
3070 
3071     // Rename on participant is always from real to
3072     // real name(i.e neiher old or new name should be a temporary name)
3073     DBUG_ASSERT(!ndb_name_is_temp(schema->name));
3074     DBUG_ASSERT(!ndb_name_is_temp(NDB_SHARE::key_get_table_name(prepared_key)));
3075 
3076     // Open the renamed table from NDB
3077     const char *new_db_name = NDB_SHARE::key_get_db_name(prepared_key);
3078     const char *new_table_name = NDB_SHARE::key_get_table_name(prepared_key);
3079     Ndb_table_guard ndbtab_g(m_thd_ndb->ndb, new_db_name, new_table_name);
3080     const NdbDictionary::Table *ndbtab = ndbtab_g.get_table();
3081     if (!ndbtab) {
3082       // Could not open the table from NDB, very unusual
3083       log_NDB_error(m_thd_ndb->ndb->getDictionary()->getNdbError());
3084       ndb_log_error("Failed to rename, could not open table '%s.%s' from NDB",
3085                     new_db_name, new_table_name);
3086       m_schema_op_result.set_result(
3087           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3088           "Distribution of RENAME TABLE " + std::string(1, '\'') +
3089               std::string(schema->name) + std::string(1, '\'') + " failed");
3090       return;
3091     }
3092 
3093     const std::string tablespace_name =
3094         ndb_table_tablespace_name(m_thd_ndb->ndb->getDictionary(), ndbtab);
3095 
3096     // Rename table in DD
3097     if (!rename_table_in_dd(schema->db, schema->name, new_db_name,
3098                             new_table_name, ndbtab, tablespace_name)) {
3099       ndb_log_warning(
3100           "Failed to rename table definition in DD, continue anyway...");
3101       m_schema_op_result.set_result(
3102           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3103           "Distribution of RENAME TABLE " + std::string(1, '\'') +
3104               std::string(schema->name) + std::string(1, '\'') + " failed");
3105     }
3106 
3107     // Rename share and release the old key
3108     NDB_SHARE_KEY *old_key = share->key;
3109     NDB_SHARE::rename_share(share, prepared_key);
3110     m_schema_dist_data.save_prepared_rename_key(NULL);
3111     NDB_SHARE::free_key(old_key);
3112 
3113     NDB_SHARE::release_reference(share, "rename_table");  // temporary ref.
3114 
3115     ndbapi_invalidate_table(schema->db, schema->name);
3116     ndb_tdc_close_cached_table(m_thd, schema->db, schema->name);
3117   }
3118 
handle_drop_db(const Ndb_schema_op * schema)3119   void handle_drop_db(const Ndb_schema_op *schema) {
3120     DBUG_TRACE;
3121 
3122     assert(is_post_epoch());  // Always after epoch
3123 
3124     if (schema->node_id == own_nodeid()) return;
3125 
3126     write_schema_op_to_binlog(m_thd, schema);
3127 
3128     // Participant never takes GSL
3129     assert(m_thd_ndb->check_option(Thd_ndb::IS_SCHEMA_DIST_PARTICIPANT));
3130 
3131     Ndb_dd_client dd_client(m_thd);
3132 
3133     // Lock the schema in DD
3134     if (!dd_client.mdl_lock_schema(schema->db)) {
3135       // Failed to acquire lock, skip dropping
3136       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3137       ndb_log_error("Failed to acquire MDL for db '%s'", schema->db);
3138       m_schema_op_result.set_result(
3139           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3140           "Distribution of DROP DATABASE " + std::string(1, '\'') +
3141               std::string(schema->db) + std::string(1, '\'') + " failed");
3142       return;
3143     }
3144 
3145     bool schema_exists;
3146     if (!dd_client.schema_exists(schema->db, &schema_exists)) {
3147       // Failed to check if database exists, skip dropping
3148       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3149       ndb_log_error("Failed to determine if database '%s' exists", schema->db);
3150       m_schema_op_result.set_result(
3151           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3152           "Distribution of DROP DATABASE " + std::string(1, '\'') +
3153               std::string(schema->db) + std::string(1, '\'') + " failed");
3154       return;
3155     }
3156 
3157     if (!schema_exists) {
3158       DBUG_PRINT("info", ("Schema '%s' does not exist", schema->db));
3159       // Nothing to do
3160       return;
3161     }
3162 
3163     // Remove all NDB tables in the dropped database from DD,
3164     // this function is only called when they all have been dropped
3165     // from NDB by another MySQL Server
3166     //
3167     // NOTE! This is code which always run "in the server" so it would be
3168     // appropriate to log error messages to the server log file describing
3169     // any problems which occur in these functions.
3170     std::unordered_set<std::string> ndb_tables_in_DD;
3171     if (!dd_client.get_ndb_table_names_in_schema(schema->db,
3172                                                  &ndb_tables_in_DD)) {
3173       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3174       ndb_log_error("Failed to get list of NDB tables in database '%s'",
3175                     schema->db);
3176       m_schema_op_result.set_result(
3177           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3178           "Distribution of DROP DATABASE " + std::string(1, '\'') +
3179               std::string(schema->db) + std::string(1, '\'') + " failed");
3180       return;
3181     }
3182 
3183     Ndb_referenced_tables_invalidator invalidator(m_thd, dd_client);
3184 
3185     for (const auto &ndb_table_name : ndb_tables_in_DD) {
3186       if (!dd_client.mdl_locks_acquire_exclusive(schema->db,
3187                                                  ndb_table_name.c_str())) {
3188         log_and_clear_thd_conditions(m_thd, condition_logging_level::WARNING);
3189         ndb_log_warning("Failed to acquire exclusive MDL on '%s.%s'",
3190                         schema->db, ndb_table_name.c_str());
3191         continue;
3192       }
3193 
3194       if (!dd_client.remove_table(schema->db, ndb_table_name.c_str(),
3195                                   &invalidator)) {
3196         // Failed to remove the table from DD, not much else to do
3197         // than try with the next
3198         log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3199         ndb_log_error("Failed to remove table '%s.%s' from DD", schema->db,
3200                       ndb_table_name.c_str());
3201         continue;
3202       }
3203 
3204       NDB_SHARE *share = acquire_reference(schema->db, ndb_table_name.c_str(),
3205                                            "drop_db");  // temporary ref.
3206       if (!share || !share->op) {
3207         ndbapi_invalidate_table(schema->db, ndb_table_name.c_str());
3208         ndb_tdc_close_cached_table(m_thd, schema->db, ndb_table_name.c_str());
3209       }
3210       if (share) {
3211         mysql_mutex_lock(&ndbcluster_mutex);
3212         NDB_SHARE::mark_share_dropped(&share);  // server ref.
3213         DBUG_ASSERT(share);                     // Should still be ref'ed
3214         NDB_SHARE::release_reference_have_lock(share,
3215                                                "drop_db");  // temporary ref.
3216         mysql_mutex_unlock(&ndbcluster_mutex);
3217       }
3218 
3219       ndbapi_invalidate_table(schema->db, ndb_table_name.c_str());
3220       ndb_tdc_close_cached_table(m_thd, schema->db, ndb_table_name.c_str());
3221     }
3222 
3223     if (!invalidator.invalidate()) {
3224       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3225       ndb_log_error("Failed to invalidate referenced tables for database '%s'",
3226                     schema->db);
3227       m_schema_op_result.set_result(
3228           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3229           "Distribution of DROP DATABASE " + std::string(1, '\'') +
3230               std::string(schema->db) + std::string(1, '\'') + " failed");
3231       return;
3232     }
3233 
3234     dd_client.commit();
3235 
3236     bool found_local_tables;
3237     if (!dd_client.have_local_tables_in_schema(schema->db,
3238                                                &found_local_tables)) {
3239       // Failed to access the DD to check if non NDB tables existed, assume
3240       // the worst and skip dropping this database
3241       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3242       ndb_log_error("Failed to check if database '%s' contained local tables.",
3243                     schema->db);
3244       ndb_log_error("Skipping drop of non NDB database artifacts.");
3245       m_schema_op_result.set_result(
3246           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3247           "Distribution of DROP DATABASE " + std::string(1, '\'') +
3248               std::string(schema->db) + std::string(1, '\'') + " failed");
3249       return;
3250     }
3251 
3252     DBUG_PRINT("exit", ("found_local_tables: %d", found_local_tables));
3253 
3254     if (found_local_tables) {
3255       /* Tables exists as a local table, print error and leave it */
3256       ndb_log_warning(
3257           "NDB Binlog: Skipping drop database '%s' since "
3258           "it contained local tables "
3259           "binlog schema event '%s' from node %d. ",
3260           schema->db, schema->query, schema->node_id);
3261       m_schema_op_result.set_result(
3262           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3263           "Distribution of DROP DATABASE " + std::string(1, '\'') +
3264               std::string(schema->db) + std::string(1, '\'') + " failed");
3265       return;
3266     }
3267 
3268     // Run the plain DROP DATABASE query in order to remove other artifacts
3269     // like the physical database directory.
3270     // Note! This is not done in the case where a "shadow" table is found
3271     // in the schema, but at least all the NDB tables have in such case
3272     // already been removed from the DD
3273     Ndb_local_connection mysqld(m_thd);
3274     if (mysqld.drop_database(schema->db)) {
3275       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3276       ndb_log_error("Failed to execute 'DROP DATABASE' for database '%s'",
3277                     schema->db);
3278       m_schema_op_result.set_result(
3279           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3280           "Distribution of DROP DATABASE " + std::string(1, '\'') +
3281               std::string(schema->db) + std::string(1, '\'') + " failed");
3282     }
3283   }
3284 
handle_truncate_table(const Ndb_schema_op * schema)3285   void handle_truncate_table(const Ndb_schema_op *schema) {
3286     DBUG_TRACE;
3287 
3288     assert(!is_post_epoch());  // Always directly
3289 
3290     if (schema->node_id == own_nodeid()) return;
3291 
3292     write_schema_op_to_binlog(m_thd, schema);
3293 
3294     NDB_SHARE *share =
3295         acquire_reference(schema->db, schema->name, "truncate_table");
3296     // invalidation already handled by binlog thread
3297     if (!share || !share->op) {
3298       ndbapi_invalidate_table(schema->db, schema->name);
3299       ndb_tdc_close_cached_table(m_thd, schema->db, schema->name);
3300     }
3301     if (share) {
3302       // Reset the tables shared auto_increment counter
3303       share->reset_tuple_id_range();
3304 
3305       NDB_SHARE::release_reference(share, "truncate_table");  // temporary ref.
3306     }
3307 
3308     if (!create_table_from_engine(schema->db, schema->name,
3309                                   true /* force_overwrite */)) {
3310       ndb_log_error("Distribution of TRUNCATE TABLE '%s.%s' failed", schema->db,
3311                     schema->name);
3312       m_schema_op_result.set_result(
3313           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3314           "Distribution of TRUNCATE TABLE " + std::string(1, '\'') +
3315               std::string(schema->name) + std::string(1, '\'') + " failed");
3316     }
3317   }
3318 
handle_create_table(const Ndb_schema_op * schema)3319   void handle_create_table(const Ndb_schema_op *schema) {
3320     DBUG_TRACE;
3321 
3322     assert(!is_post_epoch());  // Always directly
3323 
3324     if (schema->node_id == own_nodeid()) return;
3325 
3326     write_schema_op_to_binlog(m_thd, schema);
3327 
3328     if (!create_table_from_engine(schema->db, schema->name,
3329                                   true, /* force_overwrite */
3330                                   true /* invalidate_referenced_tables */)) {
3331       ndb_log_error("Distribution of CREATE TABLE '%s.%s' failed", schema->db,
3332                     schema->name);
3333       m_schema_op_result.set_result(
3334           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3335           "Distribution of CREATE TABLE " + std::string(1, '\'') +
3336               std::string(schema->name) + std::string(1, '\'') + " failed");
3337     }
3338   }
3339 
handle_create_db(const Ndb_schema_op * schema)3340   void handle_create_db(const Ndb_schema_op *schema) {
3341     DBUG_TRACE;
3342 
3343     assert(!is_post_epoch());  // Always directly
3344 
3345     if (schema->node_id == own_nodeid()) return;
3346 
3347     write_schema_op_to_binlog(m_thd, schema);
3348 
3349     // Participant never takes GSL
3350     assert(m_thd_ndb->check_option(Thd_ndb::IS_SCHEMA_DIST_PARTICIPANT));
3351 
3352     Ndb_local_connection mysqld(m_thd);
3353     if (mysqld.execute_database_ddl(schema->query)) {
3354       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3355       ndb_log_error("Failed to execute 'CREATE DATABASE' for database '%s'",
3356                     schema->db);
3357       m_schema_op_result.set_result(
3358           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3359           "Distribution of CREATE DATABASE " + std::string(1, '\'') +
3360               std::string(schema->db) + std::string(1, '\'') + " failed");
3361       return;
3362     }
3363 
3364     // Update the Schema in DD with the id and version details
3365     if (!ndb_dd_update_schema_version(m_thd, schema->db, schema->id,
3366                                       schema->version)) {
3367       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3368       ndb_log_error("Failed to update schema version for database '%s'",
3369                     schema->db);
3370     }
3371   }
3372 
handle_alter_db(const Ndb_schema_op * schema)3373   void handle_alter_db(const Ndb_schema_op *schema) {
3374     DBUG_TRACE;
3375 
3376     assert(!is_post_epoch());  // Always directly
3377 
3378     if (schema->node_id == own_nodeid()) return;
3379 
3380     write_schema_op_to_binlog(m_thd, schema);
3381 
3382     // Participant never takes GSL
3383     assert(m_thd_ndb->check_option(Thd_ndb::IS_SCHEMA_DIST_PARTICIPANT));
3384 
3385     Ndb_local_connection mysqld(m_thd);
3386     if (mysqld.execute_database_ddl(schema->query)) {
3387       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3388       ndb_log_error("Failed to execute 'ALTER DATABASE' for database '%s'",
3389                     schema->db);
3390       m_schema_op_result.set_result(
3391           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3392           "Distribution of ALTER DATABASE " + std::string(1, '\'') +
3393               std::string(schema->db) + std::string(1, '\'') + " failed");
3394       return;
3395     }
3396 
3397     // Update the Schema in DD with the id and version details
3398     if (!ndb_dd_update_schema_version(m_thd, schema->db, schema->id,
3399                                       schema->version)) {
3400       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3401       ndb_log_error("Failed to update schema version for database '%s'",
3402                     schema->db);
3403     }
3404   }
3405 
handle_grant_op(const Ndb_schema_op * schema)3406   void handle_grant_op(const Ndb_schema_op *schema) {
3407     DBUG_TRACE;
3408     Ndb_local_connection sql_runner(m_thd);
3409 
3410     assert(!is_post_epoch());  // Always directly
3411 
3412     // Participant never takes GSL
3413     assert(
3414         get_thd_ndb(m_thd)->check_option(Thd_ndb::IS_SCHEMA_DIST_PARTICIPANT));
3415 
3416     if (schema->node_id == own_nodeid()) return;
3417 
3418     /* SOT_GRANT was sent by a pre-8.0 mysqld. Just ignore it. */
3419     if (schema->type == SOT_GRANT) {
3420       ndb_log_verbose(9, "Got SOT_GRANT event, disregarding.");
3421       return;
3422     }
3423 
3424     /* For SOT_ACL_SNAPSHOT, update the snapshots for the users listed.
3425      */
3426     if (schema->type == SOT_ACL_SNAPSHOT) {
3427       if (!Ndb_stored_grants::update_users_from_snapshot(m_thd,
3428                                                          schema->query)) {
3429         ndb_log_error("Failed to apply ACL snapshot for users: %s",
3430                       schema->query);
3431         m_schema_op_result.set_result(Ndb_schema_dist::SCHEMA_OP_FAILURE,
3432                                       "Distribution of ACL change failed");
3433       }
3434       return;
3435     }
3436 
3437     DBUG_ASSERT(schema->type == SOT_ACL_STATEMENT ||
3438                 schema->type == SOT_ACL_STATEMENT_REFRESH);
3439 
3440     LEX_CSTRING thd_db_save = m_thd->db();
3441 
3442     std::string use_db(schema->db);
3443     std::string query(schema->query);
3444 
3445     if (!query.compare(0, 4, "use ")) {
3446       size_t delimiter = query.find_first_of(';');
3447       use_db = query.substr(4, delimiter - 4);
3448       query = query.substr(delimiter + 1);
3449     }
3450 
3451     /* Execute ACL query */
3452     LEX_CSTRING set_db = {use_db.c_str(), use_db.length()};
3453     m_thd->reset_db(set_db);
3454     ndb_log_verbose(40, "Using database: %s", use_db.c_str());
3455     if (sql_runner.run_acl_statement(query)) {
3456       ndb_log_error("Failed to execute ACL query: %s", query.c_str());
3457       m_schema_op_result.set_result(Ndb_schema_dist::SCHEMA_OP_FAILURE,
3458                                     "Distribution of ACL change failed");
3459       m_thd->reset_db(thd_db_save);
3460       return;
3461     }
3462 
3463     /* Reset database */
3464     m_thd->reset_db(thd_db_save);
3465 
3466     if (schema->type == SOT_ACL_STATEMENT_REFRESH) {
3467       Ndb_stored_grants::maintain_cache(m_thd);
3468     }
3469   }
3470 
create_tablespace_from_engine(Ndb_dd_client & dd_client,const char * tablespace_name,uint32 id,uint32 version)3471   bool create_tablespace_from_engine(Ndb_dd_client &dd_client,
3472                                      const char *tablespace_name, uint32 id,
3473                                      uint32 version) {
3474     DBUG_TRACE;
3475     DBUG_PRINT("enter", ("tablespace_name: %s, id: %u, version: %u",
3476                          tablespace_name, id, version));
3477 
3478     Ndb *ndb = m_thd_ndb->ndb;
3479     NdbDictionary::Dictionary *dict = ndb->getDictionary();
3480     std::vector<std::string> datafile_names;
3481     if (!ndb_get_datafile_names(dict, tablespace_name, &datafile_names)) {
3482       log_NDB_error(dict->getNdbError());
3483       ndb_log_error("Failed to get data files assigned to tablespace '%s'",
3484                     tablespace_name);
3485       return false;
3486     }
3487 
3488     if (!dd_client.mdl_lock_tablespace_exclusive(tablespace_name)) {
3489       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3490       ndb_log_error("MDL lock could not be acquired for tablespace '%s'",
3491                     tablespace_name);
3492       return false;
3493     }
3494 
3495     if (!dd_client.install_tablespace(tablespace_name, datafile_names, id,
3496                                       version, true /* force_overwrite */)) {
3497       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3498       ndb_log_error("Failed to install tablespace '%s' in DD", tablespace_name);
3499       return false;
3500     }
3501 
3502     return true;
3503   }
3504 
handle_create_tablespace(const Ndb_schema_op * schema)3505   void handle_create_tablespace(const Ndb_schema_op *schema) {
3506     DBUG_TRACE;
3507 
3508     assert(!is_post_epoch());  // Always directly
3509 
3510     if (schema->node_id == own_nodeid()) {
3511       return;
3512     }
3513 
3514     write_schema_op_to_binlog(m_thd, schema);
3515 
3516     Ndb_dd_client dd_client(m_thd);
3517     if (!create_tablespace_from_engine(dd_client, schema->name, schema->id,
3518                                        schema->version)) {
3519       ndb_log_error("Distribution of CREATE TABLESPACE '%s' failed",
3520                     schema->name);
3521       m_schema_op_result.set_result(
3522           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3523           "Distribution of CREATE TABLESPACE " + std::string(1, '\'') +
3524               std::string(schema->name) + std::string(1, '\'') + " failed");
3525       return;
3526     }
3527     dd_client.commit();
3528   }
3529 
get_tablespace_table_refs(const char * name,std::vector<dd::Tablespace_table_ref> & table_refs) const3530   bool get_tablespace_table_refs(
3531       const char *name,
3532       std::vector<dd::Tablespace_table_ref> &table_refs) const {
3533     Ndb_dd_client dd_client(m_thd);
3534     if (!dd_client.mdl_lock_tablespace(name, true /* intention_exclusive */)) {
3535       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3536       ndb_log_error("MDL lock could not be acquired on tablespace '%s'", name);
3537       return false;
3538     }
3539 
3540     const dd::Tablespace *existing = nullptr;
3541     if (!dd_client.get_tablespace(name, &existing)) {
3542       log_and_clear_thd_conditions(m_thd, condition_logging_level::WARNING);
3543       return false;
3544     }
3545 
3546     if (existing == nullptr) {
3547       // Tablespace doesn't exist, no need to update tables after the ALTER
3548       return true;
3549     }
3550 
3551     if (!ndb_dd_disk_data_get_table_refs(m_thd, *existing, table_refs)) {
3552       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3553       ndb_log_error("Failed to get table refs in tablespace '%s'", name);
3554       return false;
3555     }
3556     return true;
3557   }
3558 
update_tablespace_id_in_tables(Ndb_dd_client & dd_client,const char * tablespace_name,const std::vector<dd::Tablespace_table_ref> & table_refs) const3559   bool update_tablespace_id_in_tables(
3560       Ndb_dd_client &dd_client, const char *tablespace_name,
3561       const std::vector<dd::Tablespace_table_ref> &table_refs) const {
3562     if (!dd_client.mdl_lock_tablespace(tablespace_name,
3563                                        true /* intention_exclusive */)) {
3564       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3565       ndb_log_error("MDL lock could not be acquired on tablespace '%s'",
3566                     tablespace_name);
3567       return false;
3568     }
3569 
3570     dd::Object_id tablespace_id;
3571     if (!dd_client.lookup_tablespace_id(tablespace_name, &tablespace_id)) {
3572       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3573       ndb_log_error("Failed to retrieve object id of tablespace '%s'",
3574                     tablespace_name);
3575       return false;
3576     }
3577 
3578     for (auto &table_ref : table_refs) {
3579       // Convert table_refs to correct case when necessary
3580       const std::string schema_name =
3581           ndb_dd_fs_name_case(table_ref.m_schema_name.c_str());
3582       const std::string table_name =
3583           ndb_dd_fs_name_case(table_ref.m_name.c_str());
3584       if (!dd_client.mdl_locks_acquire_exclusive(schema_name.c_str(),
3585                                                  table_name.c_str())) {
3586         log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3587         ndb_log_error("MDL lock could not be acquired on table '%s.%s'",
3588                       schema_name.c_str(), table_name.c_str());
3589         return false;
3590       }
3591 
3592       if (!dd_client.set_tablespace_id_in_table(
3593               schema_name.c_str(), table_name.c_str(), tablespace_id)) {
3594         log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3595         ndb_log_error("Could not set tablespace id in table '%s.%s'",
3596                       schema_name.c_str(), table_name.c_str());
3597         return false;
3598       }
3599     }
3600     return true;
3601   }
3602 
handle_alter_tablespace(const Ndb_schema_op * schema)3603   void handle_alter_tablespace(const Ndb_schema_op *schema) {
3604     DBUG_TRACE;
3605 
3606     assert(!is_post_epoch());  // Always directly
3607 
3608     if (schema->node_id == own_nodeid()) {
3609       return;
3610     }
3611 
3612     write_schema_op_to_binlog(m_thd, schema);
3613 
3614     // Get information about tables in the tablespace being ALTERed. This is
3615     // required for after the ALTER as the tablespace id of the every table
3616     // should be updated
3617     std::vector<dd::Tablespace_table_ref> table_refs;
3618     if (!get_tablespace_table_refs(schema->name, table_refs)) {
3619       ndb_log_error("Distribution of ALTER TABLESPACE '%s' failed",
3620                     schema->name);
3621       m_schema_op_result.set_result(
3622           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3623           "Distribution of ALTER TABLESPACE " + std::string(1, '\'') +
3624               std::string(schema->name) + std::string(1, '\'') + " failed");
3625       return;
3626     }
3627 
3628     Ndb_dd_client dd_client(m_thd);
3629     if (!create_tablespace_from_engine(dd_client, schema->name, schema->id,
3630                                        schema->version)) {
3631       ndb_log_error("Distribution of ALTER TABLESPACE '%s' failed",
3632                     schema->name);
3633       m_schema_op_result.set_result(
3634           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3635           "Distribution of ALTER TABLESPACE " + std::string(1, '\'') +
3636               std::string(schema->name) + std::string(1, '\'') + " failed");
3637       return;
3638     }
3639 
3640     if (!table_refs.empty()) {
3641       // Update tables in the tablespace with the new tablespace id
3642       if (!update_tablespace_id_in_tables(dd_client, schema->name,
3643                                           table_refs)) {
3644         ndb_log_error(
3645             "Failed to update tables in tablespace '%s' with the "
3646             "new tablespace id",
3647             schema->name);
3648         ndb_log_error("Distribution of ALTER TABLESPACE '%s' failed",
3649                       schema->name);
3650         m_schema_op_result.set_result(
3651             Ndb_schema_dist::SCHEMA_OP_FAILURE,
3652             "Distribution of ALTER TABLESPACE " + std::string(1, '\'') +
3653                 std::string(schema->name) + std::string(1, '\'') + " failed");
3654         return;
3655       }
3656     }
3657     dd_client.commit();
3658   }
3659 
handle_drop_tablespace(const Ndb_schema_op * schema)3660   void handle_drop_tablespace(const Ndb_schema_op *schema) {
3661     DBUG_TRACE;
3662 
3663     assert(is_post_epoch());  // Always after epoch
3664 
3665     if (schema->node_id == own_nodeid()) {
3666       return;
3667     }
3668 
3669     write_schema_op_to_binlog(m_thd, schema);
3670 
3671     Ndb_dd_client dd_client(m_thd);
3672     if (!dd_client.mdl_lock_tablespace_exclusive(schema->name)) {
3673       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3674       ndb_log_error("MDL lock could not be acquired for tablespace '%s'",
3675                     schema->name);
3676       ndb_log_error("Distribution of DROP TABLESPACE '%s' failed",
3677                     schema->name);
3678       m_schema_op_result.set_result(
3679           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3680           "Distribution of DROP TABLESPACE " + std::string(1, '\'') +
3681               std::string(schema->name) + std::string(1, '\'') + " failed");
3682       return;
3683     }
3684 
3685     if (!dd_client.drop_tablespace(schema->name,
3686                                    false /* fail_if_not_exists */)) {
3687       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3688       ndb_log_error("Failed to drop tablespace '%s' from DD", schema->name);
3689       ndb_log_error("Distribution of DROP TABLESPACE '%s' failed",
3690                     schema->name);
3691       m_schema_op_result.set_result(
3692           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3693           "Distribution of DROP TABLESPACE " + std::string(1, '\'') +
3694               std::string(schema->name) + std::string(1, '\'') + " failed");
3695       return;
3696     }
3697 
3698     dd_client.commit();
3699   }
3700 
create_logfile_group_from_engine(const char * logfile_group_name,uint32 id,uint32 version)3701   bool create_logfile_group_from_engine(const char *logfile_group_name,
3702                                         uint32 id, uint32 version) {
3703     DBUG_TRACE;
3704     DBUG_PRINT("enter", ("logfile_group_name: %s, id: %u, version: %u",
3705                          logfile_group_name, id, version));
3706 
3707     Ndb *ndb = m_thd_ndb->ndb;
3708     NdbDictionary::Dictionary *dict = ndb->getDictionary();
3709     std::vector<std::string> undofile_names;
3710     if (!ndb_get_undofile_names(dict, logfile_group_name, &undofile_names)) {
3711       log_NDB_error(dict->getNdbError());
3712       ndb_log_error("Failed to get undo files assigned to logfile group '%s'",
3713                     logfile_group_name);
3714       return false;
3715     }
3716 
3717     Ndb_dd_client dd_client(m_thd);
3718     if (!dd_client.mdl_lock_logfile_group_exclusive(logfile_group_name)) {
3719       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3720       ndb_log_error("MDL lock could not be acquired for logfile group '%s'",
3721                     logfile_group_name);
3722       return false;
3723     }
3724 
3725     if (!dd_client.install_logfile_group(logfile_group_name, undofile_names, id,
3726                                          version, true /* force_overwrite */)) {
3727       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3728       ndb_log_error("Failed to install logfile group '%s' in DD",
3729                     logfile_group_name);
3730       return false;
3731     }
3732 
3733     dd_client.commit();
3734     return true;
3735   }
3736 
handle_create_logfile_group(const Ndb_schema_op * schema)3737   void handle_create_logfile_group(const Ndb_schema_op *schema) {
3738     DBUG_TRACE;
3739 
3740     assert(!is_post_epoch());  // Always directly
3741 
3742     if (schema->node_id == own_nodeid()) {
3743       return;
3744     }
3745 
3746     write_schema_op_to_binlog(m_thd, schema);
3747 
3748     if (!create_logfile_group_from_engine(schema->name, schema->id,
3749                                           schema->version)) {
3750       ndb_log_error("Distribution of CREATE LOGFILE GROUP '%s' failed",
3751                     schema->name);
3752       m_schema_op_result.set_result(
3753           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3754           "Distribution of CREATE LOGFILE GROUP " + std::string(1, '\'') +
3755               std::string(schema->name) + std::string(1, '\'') + " failed");
3756     }
3757   }
3758 
handle_alter_logfile_group(const Ndb_schema_op * schema)3759   void handle_alter_logfile_group(const Ndb_schema_op *schema) {
3760     DBUG_TRACE;
3761 
3762     assert(!is_post_epoch());  // Always directly
3763 
3764     if (schema->node_id == own_nodeid()) {
3765       return;
3766     }
3767 
3768     write_schema_op_to_binlog(m_thd, schema);
3769 
3770     if (!create_logfile_group_from_engine(schema->name, schema->id,
3771                                           schema->version)) {
3772       ndb_log_error("Distribution of ALTER LOGFILE GROUP '%s' failed",
3773                     schema->name);
3774       m_schema_op_result.set_result(
3775           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3776           "Distribution of ALTER LOGFILE GROUP " + std::string(1, '\'') +
3777               std::string(schema->name) + std::string(1, '\'') + " failed");
3778     }
3779   }
3780 
handle_drop_logfile_group(const Ndb_schema_op * schema)3781   void handle_drop_logfile_group(const Ndb_schema_op *schema) {
3782     DBUG_TRACE;
3783 
3784     assert(is_post_epoch());  // Always after epoch
3785 
3786     if (schema->node_id == own_nodeid()) {
3787       return;
3788     }
3789 
3790     write_schema_op_to_binlog(m_thd, schema);
3791 
3792     Ndb_dd_client dd_client(m_thd);
3793     if (!dd_client.mdl_lock_logfile_group_exclusive(schema->name)) {
3794       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3795       ndb_log_error("MDL lock could not be acquired for logfile group '%s'",
3796                     schema->name);
3797       ndb_log_error("Distribution of DROP LOGFILE GROUP '%s' failed",
3798                     schema->name);
3799       m_schema_op_result.set_result(
3800           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3801           "Distribution of DROP LOGFILE GROUP " + std::string(1, '\'') +
3802               std::string(schema->name) + std::string(1, '\'') + " failed");
3803       return;
3804     }
3805 
3806     if (!dd_client.drop_logfile_group(schema->name,
3807                                       false /* fail_if_not_exists */)) {
3808       log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3809       ndb_log_error("Failed to drop logfile group '%s' from DD", schema->name);
3810       ndb_log_error("Distribution of DROP LOGFILE GROUP '%s' failed",
3811                     schema->name);
3812       m_schema_op_result.set_result(
3813           Ndb_schema_dist::SCHEMA_OP_FAILURE,
3814           "Distribution of DROP LOGFILE GROUP " + std::string(1, '\'') +
3815               std::string(schema->name) + std::string(1, '\'') + " failed");
3816       return;
3817     }
3818 
3819     dd_client.commit();
3820   }
3821 
handle_schema_op(const Ndb_schema_op * schema)3822   int handle_schema_op(const Ndb_schema_op *schema) {
3823     DBUG_TRACE;
3824     {
3825       const SCHEMA_OP_TYPE schema_type = (SCHEMA_OP_TYPE)schema->type;
3826 
3827       ndb_log_verbose(19,
3828                       "got schema event on '%s.%s(%u/%u)' query: '%s' "
3829                       "type: %s(%d) node: %u slock: %x%08x",
3830                       schema->db, schema->name, schema->id, schema->version,
3831                       schema->query,
3832                       Ndb_schema_dist_client::type_name(
3833                           static_cast<SCHEMA_OP_TYPE>(schema->type)),
3834                       schema_type, schema->node_id, schema->slock.bitmap[1],
3835                       schema->slock.bitmap[0]);
3836 
3837       DBUG_EXECUTE_IF("ndb_schema_op_start_crash", DBUG_SUICIDE(););
3838 
3839       // Return to simulate schema operation timeout
3840       DBUG_EXECUTE_IF("ndb_schema_op_start_timeout", return 0;);
3841 
3842       if ((schema->db[0] == 0) && (schema->name[0] == 0)) {
3843         /**
3844          * This happens if there is a schema event on a table (object)
3845          *   that this mysqld does not know about.
3846          *   E.g it had a local table shadowing a ndb table...
3847          */
3848         return 0;
3849       }
3850 
3851       if (schema_type == SOT_CLEAR_SLOCK) {
3852         // Handle the ack after epoch to ensure that schema events are inserted
3853         // in the binlog after any data events
3854         handle_after_epoch(schema);
3855         return 0;
3856       }
3857 
3858       if (schema->node_id == own_nodeid()) {
3859         // This is the Coordinator who hear about this schema operation for
3860         // the first time. Save the list of current subscribers as participants
3861         // in the NDB_SCHEMA_OBJECT, those are the nodes who need to acknowledge
3862         // (or fail) before the schema operation is completed.
3863         std::unique_ptr<NDB_SCHEMA_OBJECT,
3864                         decltype(&NDB_SCHEMA_OBJECT::release)>
3865             ndb_schema_object(
3866                 NDB_SCHEMA_OBJECT::get(schema->db, schema->name, schema->id,
3867                                        schema->version),
3868                 NDB_SCHEMA_OBJECT::release);
3869         if (!ndb_schema_object) {
3870           // There is no NDB_SCHEMA_OBJECT waiting for this schema operation
3871           // Unexpected since the client who started this schema op
3872           // is always in same node as coordinator
3873           ndbcluster::ndbrequire(false);
3874           return 0;
3875         }
3876         std::unordered_set<uint32> subscribers;
3877         m_schema_dist_data.get_subscriber_list(subscribers);
3878         ndb_schema_object->register_participants(subscribers);
3879         ndb_log_verbose(
3880             19, "Participants: %s",
3881             ndb_schema_object->waiting_participants_to_string().c_str());
3882 
3883         // Add active schema operation to coordinator
3884         m_schema_dist_data.add_active_schema_op(ndb_schema_object.get());
3885 
3886         // Test schema dist client killed
3887         if (DBUG_EVALUATE_IF("ndb_schema_dist_client_killed", true, false)) {
3888           // Wait until the Client has set "coordinator completed"
3889           while (!ndb_schema_object->check_coordinator_completed())
3890             ndb_milli_sleep(100);
3891         }
3892       }
3893 
3894       // Set the custom lock_wait_timeout for schema distribution
3895       Lock_wait_timeout_guard lwt_guard(m_thd,
3896                                         opt_ndb_schema_dist_lock_wait_timeout);
3897 
3898       Ndb_schema_op_result schema_op_result;
3899       switch (schema_type) {
3900         case SOT_CLEAR_SLOCK:
3901           // Already handled above, should never end up here
3902           ndbcluster::ndbrequire(schema_type != SOT_CLEAR_SLOCK);
3903           return 0;
3904 
3905         case SOT_ALTER_TABLE_COMMIT:
3906         case SOT_RENAME_TABLE_PREPARE:
3907         case SOT_ONLINE_ALTER_TABLE_PREPARE:
3908         case SOT_ONLINE_ALTER_TABLE_COMMIT:
3909         case SOT_RENAME_TABLE:
3910         case SOT_DROP_TABLE:
3911         case SOT_DROP_DB:
3912         case SOT_DROP_TABLESPACE:
3913         case SOT_DROP_LOGFILE_GROUP:
3914           handle_after_epoch(schema);
3915           return 0;
3916 
3917         case SOT_TRUNCATE_TABLE:
3918           handle_truncate_table(schema);
3919           break;
3920 
3921         case SOT_CREATE_TABLE:
3922           handle_create_table(schema);
3923           break;
3924 
3925         case SOT_CREATE_DB:
3926           handle_create_db(schema);
3927           break;
3928 
3929         case SOT_ALTER_DB:
3930           handle_alter_db(schema);
3931           break;
3932 
3933         case SOT_CREATE_USER:
3934         case SOT_DROP_USER:
3935         case SOT_RENAME_USER:
3936         case SOT_GRANT:
3937         case SOT_REVOKE:
3938         case SOT_ACL_SNAPSHOT:
3939         case SOT_ACL_STATEMENT:
3940         case SOT_ACL_STATEMENT_REFRESH:
3941           handle_grant_op(schema);
3942           break;
3943 
3944         case SOT_TABLESPACE:
3945         case SOT_LOGFILE_GROUP:
3946           if (schema->node_id == own_nodeid()) break;
3947           write_schema_op_to_binlog(m_thd, schema);
3948           break;
3949 
3950         case SOT_RENAME_TABLE_NEW:
3951           /*
3952             Only very old MySQL Server connected to the cluster may
3953             send this schema operation, ignore it
3954           */
3955           ndb_log_error(
3956               "Skipping old schema operation"
3957               "(RENAME_TABLE_NEW) on %s.%s",
3958               schema->db, schema->name);
3959           DBUG_ASSERT(false);
3960           break;
3961 
3962         case SOT_CREATE_TABLESPACE:
3963           handle_create_tablespace(schema);
3964           break;
3965 
3966         case SOT_ALTER_TABLESPACE:
3967           handle_alter_tablespace(schema);
3968           break;
3969 
3970         case SOT_CREATE_LOGFILE_GROUP:
3971           handle_create_logfile_group(schema);
3972           break;
3973 
3974         case SOT_ALTER_LOGFILE_GROUP:
3975           handle_alter_logfile_group(schema);
3976           break;
3977       }
3978 
3979       if (schema->schema_op_id) {
3980         // Use new protocol
3981         if (!ack_schema_op_with_result(schema)) {
3982           // Fallback to old protocol as stop gap, no result will be returned
3983           // but at least the coordinator will be informed
3984           ack_schema_op(schema);
3985         }
3986       } else {
3987         // Use old protocol
3988         ack_schema_op(schema);
3989       }
3990     }
3991 
3992     // Errors should have been reported to log and then cleared
3993     DBUG_ASSERT(!m_thd->is_error());
3994 
3995     return 0;
3996   }
3997 
handle_schema_op_post_epoch(const Ndb_schema_op * schema)3998   void handle_schema_op_post_epoch(const Ndb_schema_op *schema) {
3999     DBUG_TRACE;
4000     DBUG_PRINT("enter", ("%s.%s: query: '%s'  type: %d", schema->db,
4001                          schema->name, schema->query, schema->type));
4002 
4003     // Set the custom lock_wait_timeout for schema distribution
4004     Lock_wait_timeout_guard lwt_guard(m_thd,
4005                                       opt_ndb_schema_dist_lock_wait_timeout);
4006 
4007     {
4008       const SCHEMA_OP_TYPE schema_type = (SCHEMA_OP_TYPE)schema->type;
4009       ndb_log_verbose(9, "%s - %s.%s",
4010                       Ndb_schema_dist_client::type_name(
4011                           static_cast<SCHEMA_OP_TYPE>(schema->type)),
4012                       schema->db, schema->name);
4013 
4014       switch (schema_type) {
4015         case SOT_DROP_DB:
4016           handle_drop_db(schema);
4017           break;
4018 
4019         case SOT_DROP_TABLE:
4020           handle_drop_table(schema);
4021           break;
4022 
4023         case SOT_RENAME_TABLE_PREPARE:
4024           handle_rename_table_prepare(schema);
4025           break;
4026 
4027         case SOT_RENAME_TABLE:
4028           handle_rename_table(schema);
4029           break;
4030 
4031         case SOT_ALTER_TABLE_COMMIT:
4032           handle_offline_alter_table_commit(schema);
4033           break;
4034 
4035         case SOT_ONLINE_ALTER_TABLE_PREPARE:
4036           handle_online_alter_table_prepare(schema);
4037           break;
4038 
4039         case SOT_ONLINE_ALTER_TABLE_COMMIT:
4040           handle_online_alter_table_commit(schema);
4041           break;
4042 
4043         case SOT_DROP_TABLESPACE:
4044           handle_drop_tablespace(schema);
4045           break;
4046 
4047         case SOT_DROP_LOGFILE_GROUP:
4048           handle_drop_logfile_group(schema);
4049           break;
4050 
4051         default:
4052           DBUG_ASSERT(false);
4053       }
4054     }
4055 
4056     // Errors should have been reported to log and then cleared
4057     DBUG_ASSERT(!m_thd->is_error());
4058 
4059     // There should be no MDL locks left now
4060     DBUG_ASSERT(!m_thd->mdl_context.has_locks());
4061 
4062     return;
4063   }
4064 
4065   THD *const m_thd;
4066   Thd_ndb *const m_thd_ndb;
4067   MEM_ROOT *m_mem_root;
4068   uint m_own_nodeid;
4069   Ndb_schema_dist_data &m_schema_dist_data;
4070   Ndb_schema_op_result m_schema_op_result;
4071   bool m_post_epoch;
4072 
is_post_epoch(void) const4073   bool is_post_epoch(void) const { return m_post_epoch; }
4074 
4075   List<const Ndb_schema_op> m_post_epoch_handle_list;
4076 
4077  public:
4078   Ndb_schema_event_handler() = delete;
4079   Ndb_schema_event_handler(const Ndb_schema_event_handler &) = delete;
4080 
Ndb_schema_event_handler(THD * thd,MEM_ROOT * mem_root,uint own_nodeid,Ndb_schema_dist_data & schema_dist_data)4081   Ndb_schema_event_handler(THD *thd, MEM_ROOT *mem_root, uint own_nodeid,
4082                            Ndb_schema_dist_data &schema_dist_data)
4083       : m_thd(thd),
4084         m_thd_ndb(get_thd_ndb(thd)),
4085         m_mem_root(mem_root),
4086         m_own_nodeid(own_nodeid),
4087         m_schema_dist_data(schema_dist_data),
4088         m_post_epoch(false) {}
4089 
~Ndb_schema_event_handler()4090   ~Ndb_schema_event_handler() {
4091     // There should be no work left todo...
4092     DBUG_ASSERT(m_post_epoch_handle_list.elements == 0);
4093   }
4094 
handle_schema_result_insert(uint32 nodeid,uint32 schema_op_id,uint32 participant_node_id,uint32 result,const std::string & message)4095   void handle_schema_result_insert(uint32 nodeid, uint32 schema_op_id,
4096                                    uint32 participant_node_id, uint32 result,
4097                                    const std::string &message) {
4098     DBUG_TRACE;
4099     if (nodeid != own_nodeid()) {
4100       // Only the coordinator handle these events
4101       return;
4102     }
4103 
4104     // Unpack the message received
4105     Ndb_schema_result_table schema_result_table(m_thd_ndb);
4106     const std::string unpacked_message =
4107         schema_result_table.unpack_message(message);
4108 
4109     ndb_log_verbose(
4110         19,
4111         "Received ndb_schema_result insert, nodeid: %d, schema_op_id: %d, "
4112         "participant_node_id: %d, result: %d, message: '%s'",
4113         nodeid, schema_op_id, participant_node_id, result,
4114         unpacked_message.c_str());
4115 
4116     // Lookup NDB_SCHEMA_OBJECT from nodeid + schema_op_id
4117     std::unique_ptr<NDB_SCHEMA_OBJECT, decltype(&NDB_SCHEMA_OBJECT::release)>
4118         ndb_schema_object(NDB_SCHEMA_OBJECT::get(nodeid, schema_op_id),
4119                           NDB_SCHEMA_OBJECT::release);
4120     if (ndb_schema_object == nullptr) {
4121       // The schema operation has already completed on this node
4122       return;
4123     }
4124 
4125     ndb_schema_object->result_received_from_node(participant_node_id, result,
4126                                                  unpacked_message);
4127 
4128     if (ndb_schema_object->check_all_participants_completed()) {
4129       // All participants have completed(or failed) -> send final ack
4130       ack_schema_op_final(ndb_schema_object->db(), ndb_schema_object->name());
4131     }
4132   }
4133 
handle_schema_result_event(Ndb * s_ndb,NdbEventOperation * pOp,NdbDictionary::Event::TableEvent event_type,const Ndb_event_data * event_data)4134   void handle_schema_result_event(Ndb *s_ndb, NdbEventOperation *pOp,
4135                                   NdbDictionary::Event::TableEvent event_type,
4136                                   const Ndb_event_data *event_data) {
4137     // Test "coordinator abort active" by simulating cluster failure
4138     if (DBUG_EVALUATE_IF("ndb_schema_dist_coord_abort_active", true, false)) {
4139       ndb_log_info("Simulating cluster failure...");
4140       event_type = NdbDictionary::Event::TE_CLUSTER_FAILURE;
4141     }
4142 
4143     switch (event_type) {
4144       case NdbDictionary::Event::TE_INSERT:
4145         handle_schema_result_insert(
4146             event_data->unpack_uint32(0), event_data->unpack_uint32(1),
4147             event_data->unpack_uint32(2), event_data->unpack_uint32(3),
4148             event_data->unpack_string(4));
4149         break;
4150 
4151       case NdbDictionary::Event::TE_CLUSTER_FAILURE:
4152         // fall through
4153       case NdbDictionary::Event::TE_DROP:
4154         // Cluster failure or ndb_schema_result table dropped
4155         if (ndb_binlog_tables_inited && ndb_binlog_running)
4156           ndb_log_verbose(1, "NDB Binlog: util tables need to reinitialize");
4157 
4158         // Indicate util tables not ready
4159         mysql_mutex_lock(&injector_data_mutex);
4160         ndb_binlog_tables_inited = false;
4161         ndb_binlog_is_ready = false;
4162         mysql_mutex_unlock(&injector_data_mutex);
4163 
4164         ndb_tdc_close_cached_tables();
4165 
4166         // Tear down the event subscription on ndb_schema_result
4167         ndbcluster_binlog_event_operation_teardown(m_thd, s_ndb, pOp);
4168         break;
4169 
4170       default:
4171         // Ignore other event types
4172         break;
4173     }
4174     return;
4175   }
4176 
handle_event(Ndb * s_ndb,NdbEventOperation * pOp)4177   void handle_event(Ndb *s_ndb, NdbEventOperation *pOp) {
4178     DBUG_TRACE;
4179 
4180     const Ndb_event_data *event_data =
4181         static_cast<const Ndb_event_data *>(pOp->getCustomData());
4182     if (Ndb_schema_dist_client::is_schema_dist_result_table(
4183             event_data->share->db, event_data->share->table_name)) {
4184       // Received event on ndb_schema_result table
4185       handle_schema_result_event(s_ndb, pOp, pOp->getEventType(), event_data);
4186       return;
4187     }
4188 
4189     if (!check_is_ndb_schema_event(event_data)) return;
4190 
4191     NDBEVENT::TableEvent ev_type = pOp->getEventType();
4192 
4193     // Test "fail all schema ops" by simulating cluster failure
4194     // before the schema operation has been registered
4195     if (DBUG_EVALUATE_IF("ndb_schema_dist_coord_fail_all", true, false)) {
4196       ndb_log_info("Simulating cluster failure...");
4197       ev_type = NdbDictionary::Event::TE_CLUSTER_FAILURE;
4198     }
4199 
4200     // Test "client detect not ready" by simulating cluster failure
4201     if (DBUG_EVALUATE_IF("ndb_schema_dist_client_not_ready", true, false)) {
4202       ndb_log_info("Simulating cluster failure...");
4203       ev_type = NdbDictionary::Event::TE_CLUSTER_FAILURE;
4204       // There should be one NDB_SCHEMA_OBJECT registered
4205       ndbcluster::ndbrequire(NDB_SCHEMA_OBJECT::count_active_schema_ops() == 1);
4206     }
4207 
4208     switch (ev_type) {
4209       case NDBEVENT::TE_INSERT:
4210       case NDBEVENT::TE_UPDATE: {
4211         /* ndb_schema table, row INSERTed or UPDATEed*/
4212         const Ndb_schema_op *schema_op =
4213             Ndb_schema_op::create(event_data, pOp->getAnyValue());
4214         handle_schema_op(schema_op);
4215         break;
4216       }
4217 
4218       case NDBEVENT::TE_DELETE:
4219         /* ndb_schema table, row DELETEd */
4220         break;
4221 
4222       case NDBEVENT::TE_CLUSTER_FAILURE:
4223         ndb_log_verbose(1, "cluster failure at epoch %u/%u.",
4224                         (uint)(pOp->getGCI() >> 32), (uint)(pOp->getGCI()));
4225 
4226         // fall through
4227       case NDBEVENT::TE_DROP:
4228         /* ndb_schema table DROPped */
4229         if (ndb_binlog_tables_inited && ndb_binlog_running)
4230           ndb_log_verbose(1, "NDB Binlog: util tables need to reinitialize");
4231 
4232         // Indicate util tables not ready
4233         mysql_mutex_lock(&injector_data_mutex);
4234         ndb_binlog_tables_inited = false;
4235         ndb_binlog_is_ready = false;
4236         mysql_mutex_unlock(&injector_data_mutex);
4237 
4238         ndb_tdc_close_cached_tables();
4239 
4240         ndbcluster_binlog_event_operation_teardown(m_thd, s_ndb, pOp);
4241 
4242         if (DBUG_EVALUATE_IF("ndb_schema_dist_client_not_ready", true, false)) {
4243           ndb_log_info("Wait for client to detect not ready...");
4244           while (NDB_SCHEMA_OBJECT::count_active_schema_ops() > 0)
4245             ndb_milli_sleep(100);
4246         }
4247         break;
4248 
4249       case NDBEVENT::TE_ALTER:
4250         /* ndb_schema table ALTERed */
4251         break;
4252 
4253       case NDBEVENT::TE_NODE_FAILURE: {
4254         /* Remove all subscribers for node */
4255         m_schema_dist_data.report_data_node_failure(pOp->getNdbdNodeId());
4256         check_wakeup_clients(Ndb_schema_dist::NODE_FAILURE, "Data node failed");
4257         break;
4258       }
4259 
4260       case NDBEVENT::TE_SUBSCRIBE: {
4261         /* Add node as subscriber */
4262         m_schema_dist_data.report_subscribe(pOp->getNdbdNodeId(),
4263                                             pOp->getReqNodeId());
4264         // No 'check_wakeup_clients', adding subscribers doesn't complete
4265         // anything
4266         break;
4267       }
4268 
4269       case NDBEVENT::TE_UNSUBSCRIBE: {
4270         /* Remove node as subscriber */
4271         m_schema_dist_data.report_unsubscribe(pOp->getNdbdNodeId(),
4272                                               pOp->getReqNodeId());
4273         check_wakeup_clients(Ndb_schema_dist::NODE_UNSUBSCRIBE,
4274                              "Node unsubscribed");
4275         break;
4276       }
4277 
4278       default: {
4279         ndb_log_error("unknown event %u, ignoring...", ev_type);
4280       }
4281     }
4282 
4283     return;
4284   }
4285 
check_active_schema_ops(ulonglong current_epoch)4286   void check_active_schema_ops(ulonglong current_epoch) {
4287     // This function is called repeatedly as epochs pass but checks should only
4288     // be performed at regular intervals. Check if it's time for one now and
4289     // calculate the time for next if time is up
4290     if (likely(!m_schema_dist_data.time_for_check())) return;
4291 
4292     const uint active_ops = m_schema_dist_data.active_schema_ops().size();
4293     if (likely(active_ops == 0)) return;  // Nothing to do at this time
4294 
4295     ndb_log_info(
4296         "Coordinator checking active schema operations, "
4297         "epochs: (%u/%u,%u/%u,%u/%u), proc_info: '%s'",
4298         (uint)(ndb_latest_handled_binlog_epoch >> 32),
4299         (uint)(ndb_latest_handled_binlog_epoch),
4300         (uint)(ndb_latest_received_binlog_epoch >> 32),
4301         (uint)(ndb_latest_received_binlog_epoch), (uint)(current_epoch >> 32),
4302         (uint)(current_epoch), m_thd->proc_info);
4303 
4304     for (const NDB_SCHEMA_OBJECT *schema_object :
4305          m_schema_dist_data.active_schema_ops()) {
4306       // Print into about this schema operation
4307       ndb_log_info(" - schema operation active on '%s.%s'", schema_object->db(),
4308                    schema_object->name());
4309       if (ndb_log_get_verbose_level() > 30) {
4310         ndb_log_error_dump("%s", schema_object->to_string().c_str());
4311       }
4312 
4313       // Check if schema operation has timed out
4314       const bool completed = schema_object->check_timeout(
4315           opt_ndb_schema_dist_timeout, Ndb_schema_dist::NODE_TIMEOUT,
4316           "Participant timeout");
4317       if (completed) {
4318         ndb_log_warning("Schema dist coordinator detected timeout");
4319         // Timeout occured -> send final ack to complete the schema opration
4320         ack_schema_op_final(schema_object->db(), schema_object->name());
4321       }
4322     }
4323   }
4324 
post_epoch(ulonglong ndb_latest_epoch)4325   void post_epoch(ulonglong ndb_latest_epoch) {
4326     if (unlikely(m_post_epoch_handle_list.elements > 0)) {
4327       // Set the flag used to check that functions are called at correct time
4328       m_post_epoch = true;
4329 
4330       /*
4331        process any operations that should be done after
4332        the epoch is complete
4333       */
4334       const Ndb_schema_op *schema;
4335       while ((schema = m_post_epoch_handle_list.pop())) {
4336         if (schema->type == SOT_CLEAR_SLOCK) {
4337           handle_clear_slock(schema);
4338           continue;  // Handled an ack -> don't send new ack
4339         }
4340 
4341         handle_schema_op_post_epoch(schema);
4342         if (schema->schema_op_id) {
4343           // Use new protocol
4344           if (!ack_schema_op_with_result(schema)) {
4345             // Fallback to old protocol as stop gap, no result will be returned
4346             // but at least the coordinator will be informed
4347             ack_schema_op(schema);
4348           }
4349         } else {
4350           // Use old protocol
4351           ack_schema_op(schema);
4352         }
4353       }
4354     }
4355 
4356     check_active_schema_ops(ndb_latest_epoch);
4357 
4358     // There should be no work left todo...
4359     DBUG_ASSERT(m_post_epoch_handle_list.elements == 0);
4360   }
4361 };
4362 
4363 /*********************************************************************
4364   Internal helper functions for handling of the cluster replication tables
4365   - ndb_binlog_index
4366   - ndb_apply_status
4367 *********************************************************************/
4368 
4369 /*
4370   struct to hold the data to be inserted into the
4371   ndb_binlog_index table
4372 */
4373 struct ndb_binlog_index_row {
4374   ulonglong epoch;
4375   const char *start_master_log_file;
4376   ulonglong start_master_log_pos;
4377   ulong n_inserts;
4378   ulong n_updates;
4379   ulong n_deletes;
4380   ulong n_schemaops;
4381 
4382   ulong orig_server_id;
4383   ulonglong orig_epoch;
4384 
4385   ulong gci;
4386 
4387   const char *next_master_log_file;
4388   ulonglong next_master_log_pos;
4389 
4390   struct ndb_binlog_index_row *next;
4391 };
4392 
4393 /**
4394   Utility class encapsulating the code which open and writes
4395   to the mysql.ndb_binlog_index table
4396 */
4397 class Ndb_binlog_index_table_util {
4398   static constexpr const char *const DB_NAME = "mysql";
4399   static constexpr const char *const TABLE_NAME = "ndb_binlog_index";
4400   /*
4401     Open the ndb_binlog_index table for writing
4402   */
open_binlog_index_table(THD * thd,TABLE ** ndb_binlog_index)4403   static int open_binlog_index_table(THD *thd, TABLE **ndb_binlog_index) {
4404     const char *save_proc_info =
4405         thd_proc_info(thd, "Opening 'mysql.ndb_binlog_index'");
4406 
4407     TABLE_LIST tables(DB_NAME,     // db
4408                       TABLE_NAME,  // name, alias
4409                       TL_WRITE);   // for write
4410 
4411     /* Only allow real table to be opened */
4412     tables.required_type = dd::enum_table_type::BASE_TABLE;
4413 
4414     const uint flags =
4415         MYSQL_LOCK_IGNORE_TIMEOUT; /* Wait for lock "infinitely" */
4416     if (open_and_lock_tables(thd, &tables, flags)) {
4417       if (thd->killed)
4418         DBUG_PRINT("error", ("NDB Binlog: Opening ndb_binlog_index: killed"));
4419       else
4420         ndb_log_error("NDB Binlog: Opening ndb_binlog_index: %d, '%s'",
4421                       thd->get_stmt_da()->mysql_errno(),
4422                       thd->get_stmt_da()->message_text());
4423       thd_proc_info(thd, save_proc_info);
4424       return -1;
4425     }
4426     *ndb_binlog_index = tables.table;
4427     thd_proc_info(thd, save_proc_info);
4428     return 0;
4429   }
4430 
4431   /*
4432     Write rows to the ndb_binlog_index table
4433   */
write_rows_impl(THD * thd,ndb_binlog_index_row * row)4434   static int write_rows_impl(THD *thd, ndb_binlog_index_row *row) {
4435     int error = 0;
4436     ndb_binlog_index_row *first = row;
4437     TABLE *ndb_binlog_index = 0;
4438     // Save previous option settings
4439     ulonglong option_bits = thd->variables.option_bits;
4440 
4441     /*
4442       Assume this function is not called with an error set in thd
4443       (but clear for safety in release version)
4444      */
4445     assert(!thd->is_error());
4446     thd->clear_error();
4447 
4448     /*
4449       Turn off binlogging to prevent the table changes to be written to
4450       the binary log.
4451     */
4452     Disable_binlog_guard binlog_guard(thd);
4453 
4454     if (open_binlog_index_table(thd, &ndb_binlog_index)) {
4455       if (thd->killed)
4456         DBUG_PRINT(
4457             "error",
4458             ("NDB Binlog: Unable to lock table ndb_binlog_index, killed"));
4459       else
4460         ndb_log_error("NDB Binlog: Unable to lock table ndb_binlog_index");
4461       error = -1;
4462       goto add_ndb_binlog_index_err;
4463     }
4464 
4465     // Set all columns to be written
4466     ndb_binlog_index->use_all_columns();
4467 
4468     // Turn off autocommit to do all writes in one transaction
4469     thd->variables.option_bits |= OPTION_NOT_AUTOCOMMIT;
4470     do {
4471       ulonglong epoch = 0, orig_epoch = 0;
4472       uint orig_server_id = 0;
4473 
4474       // Intialize ndb_binlog_index->record[0]
4475       empty_record(ndb_binlog_index);
4476 
4477       ndb_binlog_index->field[NBICOL_START_POS]->store(
4478           first->start_master_log_pos, true);
4479       ndb_binlog_index->field[NBICOL_START_FILE]->store(
4480           first->start_master_log_file,
4481           (uint)strlen(first->start_master_log_file), &my_charset_bin);
4482       ndb_binlog_index->field[NBICOL_EPOCH]->store(epoch = first->epoch, true);
4483       if (ndb_binlog_index->s->fields > NBICOL_ORIG_SERVERID) {
4484         /* Table has ORIG_SERVERID / ORIG_EPOCH columns.
4485          * Write rows with different ORIG_SERVERID / ORIG_EPOCH
4486          * separately
4487          */
4488         ndb_binlog_index->field[NBICOL_NUM_INSERTS]->store(row->n_inserts,
4489                                                            true);
4490         ndb_binlog_index->field[NBICOL_NUM_UPDATES]->store(row->n_updates,
4491                                                            true);
4492         ndb_binlog_index->field[NBICOL_NUM_DELETES]->store(row->n_deletes,
4493                                                            true);
4494         ndb_binlog_index->field[NBICOL_NUM_SCHEMAOPS]->store(row->n_schemaops,
4495                                                              true);
4496         ndb_binlog_index->field[NBICOL_ORIG_SERVERID]->store(
4497             orig_server_id = row->orig_server_id, true);
4498         ndb_binlog_index->field[NBICOL_ORIG_EPOCH]->store(
4499             orig_epoch = row->orig_epoch, true);
4500         ndb_binlog_index->field[NBICOL_GCI]->store(first->gci, true);
4501 
4502         if (ndb_binlog_index->s->fields > NBICOL_NEXT_POS) {
4503           /* Table has next log pos fields, fill them in */
4504           ndb_binlog_index->field[NBICOL_NEXT_POS]->store(
4505               first->next_master_log_pos, true);
4506           ndb_binlog_index->field[NBICOL_NEXT_FILE]->store(
4507               first->next_master_log_file,
4508               (uint)strlen(first->next_master_log_file), &my_charset_bin);
4509         }
4510         row = row->next;
4511       } else {
4512         /* Old schema : Table has no separate
4513          * ORIG_SERVERID / ORIG_EPOCH columns.
4514          * Merge operation counts and write one row
4515          */
4516         while ((row = row->next)) {
4517           first->n_inserts += row->n_inserts;
4518           first->n_updates += row->n_updates;
4519           first->n_deletes += row->n_deletes;
4520           first->n_schemaops += row->n_schemaops;
4521         }
4522         ndb_binlog_index->field[NBICOL_NUM_INSERTS]->store(
4523             (ulonglong)first->n_inserts, true);
4524         ndb_binlog_index->field[NBICOL_NUM_UPDATES]->store(
4525             (ulonglong)first->n_updates, true);
4526         ndb_binlog_index->field[NBICOL_NUM_DELETES]->store(
4527             (ulonglong)first->n_deletes, true);
4528         ndb_binlog_index->field[NBICOL_NUM_SCHEMAOPS]->store(
4529             (ulonglong)first->n_schemaops, true);
4530       }
4531 
4532       error = ndb_binlog_index->file->ha_write_row(ndb_binlog_index->record[0]);
4533 
4534       /* Fault injection to test logging */
4535       if (DBUG_EVALUATE_IF("ndb_injector_binlog_index_write_fail_random", true,
4536                            false)) {
4537         if ((((uint32)rand()) % 10) == 9) {
4538           ndb_log_error("NDB Binlog: Injecting random write failure");
4539           error =
4540               ndb_binlog_index->file->ha_write_row(ndb_binlog_index->record[0]);
4541         }
4542       }
4543 
4544       if (error) {
4545         ndb_log_error(
4546             "NDB Binlog: Failed writing to ndb_binlog_index for "
4547             "epoch %u/%u orig_server_id %u orig_epoch %u/%u "
4548             "with error %d.",
4549             uint(epoch >> 32), uint(epoch), orig_server_id,
4550             uint(orig_epoch >> 32), uint(orig_epoch), error);
4551 
4552         bool seen_error_row = false;
4553         ndb_binlog_index_row *cursor = first;
4554         do {
4555           char tmp[128];
4556           if (ndb_binlog_index->s->fields > NBICOL_ORIG_SERVERID)
4557             snprintf(tmp, sizeof(tmp), "%u/%u,%u,%u/%u", uint(epoch >> 32),
4558                      uint(epoch), uint(cursor->orig_server_id),
4559                      uint(cursor->orig_epoch >> 32), uint(cursor->orig_epoch));
4560 
4561           else
4562             snprintf(tmp, sizeof(tmp), "%u/%u", uint(epoch >> 32), uint(epoch));
4563 
4564           bool error_row = (row == (cursor->next));
4565           ndb_log_error(
4566               "NDB Binlog: Writing row (%s) to ndb_binlog_index - %s", tmp,
4567               (error_row ? "ERROR" : (seen_error_row ? "Discarded" : "OK")));
4568           seen_error_row |= error_row;
4569 
4570         } while ((cursor = cursor->next));
4571 
4572         error = -1;
4573         goto add_ndb_binlog_index_err;
4574       }
4575     } while (row);
4576 
4577   add_ndb_binlog_index_err:
4578     /*
4579       Explicitly commit or rollback the writes.
4580       If we fail to commit we rollback.
4581       Note, trans_rollback_stmt() is defined to never fail.
4582     */
4583     thd->get_stmt_da()->set_overwrite_status(true);
4584     if (error) {
4585       // Error, rollback
4586       trans_rollback_stmt(thd);
4587     } else {
4588       assert(!thd->is_error());
4589       // Commit
4590       const bool failed = trans_commit_stmt(thd);
4591       if (failed || thd->transaction_rollback_request) {
4592         /*
4593           Transaction failed to commit or
4594           was rolled back internally by the engine
4595           print an error message in the log and return the
4596           error, which will cause replication to stop.
4597         */
4598         error = thd->get_stmt_da()->mysql_errno();
4599         ndb_log_error(
4600             "NDB Binlog: Failed committing transaction to "
4601             "ndb_binlog_index with error %d.",
4602             error);
4603         trans_rollback_stmt(thd);
4604       }
4605     }
4606 
4607     thd->get_stmt_da()->set_overwrite_status(false);
4608 
4609     // Restore previous option settings
4610     thd->variables.option_bits = option_bits;
4611 
4612     // Close the tables this thread has opened
4613     close_thread_tables(thd);
4614 
4615     // Release MDL locks on the opened table
4616     thd->mdl_context.release_transactional_locks();
4617 
4618     return error;
4619   }
4620 
4621   /*
4622     Write rows to the ndb_binlog_index table using a separate THD
4623     to avoid the write being killed
4624   */
write_rows_with_new_thd(ndb_binlog_index_row * rows)4625   static void write_rows_with_new_thd(ndb_binlog_index_row *rows) {
4626     // Create a new THD and retry the write
4627     THD *new_thd = new THD;
4628     new_thd->set_new_thread_id();
4629     new_thd->thread_stack = (char *)&new_thd;
4630     new_thd->store_globals();
4631     new_thd->set_command(COM_DAEMON);
4632     new_thd->system_thread = SYSTEM_THREAD_NDBCLUSTER_BINLOG;
4633     new_thd->get_protocol_classic()->set_client_capabilities(0);
4634     new_thd->security_context()->skip_grants();
4635     new_thd->set_current_stmt_binlog_format_row();
4636 
4637     // Retry the write
4638     const int retry_result = write_rows_impl(new_thd, rows);
4639     if (retry_result) {
4640       ndb_log_error(
4641           "NDB Binlog: Failed writing to ndb_binlog_index table "
4642           "while retrying after kill during shutdown");
4643       DBUG_ASSERT(false);  // Crash in debug compile
4644     }
4645 
4646     new_thd->restore_globals();
4647     delete new_thd;
4648   }
4649 
4650  public:
4651   /*
4652     Write rows to the ndb_binlog_index table
4653   */
write_rows(THD * thd,ndb_binlog_index_row * rows)4654   static inline int write_rows(THD *thd, ndb_binlog_index_row *rows) {
4655     return write_rows_impl(thd, rows);
4656   }
4657 
4658   /*
4659     Retry write rows to the ndb_binlog_index table after the THD
4660     has been killed (which should only happen during mysqld shutdown).
4661 
4662     NOTE! The reason that the session(aka. THD) is being killed is that
4663     it's in the global list of session and mysqld thus ask it to stop
4664     during shutdown by setting the "killed" flag. It's not possible to
4665     prevent the THD from being killed and instead a brand new THD is
4666     used which is not in the global list of sessions. Furthermore it's
4667     a feature to have the THD in the list of global session since it
4668     should show up in SHOW PROCESSLIST.
4669   */
write_rows_retry_after_kill(THD * orig_thd,ndb_binlog_index_row * rows)4670   static void write_rows_retry_after_kill(THD *orig_thd,
4671                                           ndb_binlog_index_row *rows) {
4672     // Should only be called when original THD has been killed
4673     DBUG_ASSERT(orig_thd->is_killed());
4674 
4675     write_rows_with_new_thd(rows);
4676 
4677     // Relink this thread with original THD
4678     orig_thd->store_globals();
4679   }
4680 
4681   /*
4682     @brief Remove all rows from mysql.ndb_binlog_index table that contain
4683     references to the given binlog filename.
4684 
4685     @note this function modifies THD state. Caller must ensure that
4686     the passed in THD is not affected by these changes. Presumably
4687     the state fixes should be moved down into Ndb_local_connection.
4688 
4689     @param thd The thread handle
4690     @param filename Name of the binlog file whose references should be removed
4691 
4692     @return true if failure to delete from the table occurs
4693   */
4694 
remove_rows_for_file(THD * thd,const char * filename)4695   static bool remove_rows_for_file(THD *thd, const char *filename) {
4696     Ndb_local_connection mysqld(thd);
4697 
4698     // Set isolation level to be independent from server settings
4699     thd->variables.transaction_isolation = ISO_REPEATABLE_READ;
4700 
4701     // Turn autocommit on, this will make delete_rows() commit
4702     thd->variables.option_bits &= ~OPTION_NOT_AUTOCOMMIT;
4703 
4704     // Ensure that file paths are escaped in a way that does not
4705     // interfere with path separator on Windows
4706     thd->variables.sql_mode |= MODE_NO_BACKSLASH_ESCAPES;
4707 
4708     // ignore "table does not exist" as it is a "consistent" behavior
4709     const bool ignore_no_such_table = true;
4710     std::string where;
4711     where.append("File='").append(filename).append("'");
4712     if (mysqld.delete_rows(DB_NAME, TABLE_NAME, ignore_no_such_table, where)) {
4713       // Failed
4714       return true;
4715     }
4716     return false;
4717   }
4718 };
4719 constexpr const char *const Ndb_binlog_index_table_util::DB_NAME;
4720 constexpr const char *const Ndb_binlog_index_table_util::TABLE_NAME;
4721 
4722 // Wrapper function allowing Ndb_binlog_index_table_util::remove_rows_for_file()
4723 // to be forward declared
ndbcluster_binlog_index_remove_file(THD * thd,const char * filename)4724 static bool ndbcluster_binlog_index_remove_file(THD *thd,
4725                                                 const char *filename) {
4726   return Ndb_binlog_index_table_util::remove_rows_for_file(thd, filename);
4727 }
4728 
4729 /*********************************************************************
4730   Functions for start, stop, wait for ndbcluster binlog thread
4731 *********************************************************************/
4732 
ndbcluster_binlog_start()4733 int ndbcluster_binlog_start() {
4734   DBUG_TRACE;
4735 
4736   if (::server_id == 0) {
4737     ndb_log_warning(
4738         "server id set to zero - changes logged to "
4739         "binlog with server id zero will be logged with "
4740         "another server id by slave mysqlds");
4741   }
4742 
4743   /*
4744      Check that ServerId is not using the reserved bit or bits reserved
4745      for application use
4746   */
4747   if ((::server_id & 0x1 << 31) ||                             // Reserved bit
4748       !ndbcluster_anyvalue_is_serverid_in_range(::server_id))  // server_id_bits
4749   {
4750     ndb_log_error(
4751         "server id provided is too large to be represented in "
4752         "opt_server_id_bits or is reserved");
4753     return -1;
4754   }
4755 
4756   /*
4757      Check that v2 events are enabled if log-transaction-id is set
4758   */
4759   if (opt_ndb_log_transaction_id && log_bin_use_v1_row_events) {
4760     ndb_log_error(
4761         "--ndb-log-transaction-id requires v2 Binlog row events "
4762         "but server is using v1.");
4763     return -1;
4764   }
4765 
4766   ndb_binlog_thread.init();
4767 
4768   /**
4769    * Note that injector_event_mutex is init'ed as a 'SLOW' mutex.
4770    * This is required as a FAST mutex could starve a waiter thread
4771    * forever if the thread holding the lock holds it for long.
4772    * See my_thread_global_init() which explicit warns about this.
4773    */
4774   mysql_mutex_init(PSI_INSTRUMENT_ME, &injector_event_mutex,
4775                    MY_MUTEX_INIT_SLOW);
4776   mysql_cond_init(PSI_INSTRUMENT_ME, &injector_data_cond);
4777   mysql_mutex_init(PSI_INSTRUMENT_ME, &injector_data_mutex, MY_MUTEX_INIT_FAST);
4778 
4779   // The binlog thread globals has been initied and should be freed
4780   ndbcluster_binlog_inited = 1;
4781 
4782   /* Start ndb binlog thread */
4783   if (ndb_binlog_thread.start()) {
4784     DBUG_PRINT("error", ("Could not start ndb binlog thread"));
4785     return -1;
4786   }
4787 
4788   return 0;
4789 }
4790 
ndbcluster_binlog_set_server_started()4791 void ndbcluster_binlog_set_server_started() {
4792   ndb_binlog_thread.set_server_started();
4793 }
4794 
set_binlog_flags(Ndb_binlog_type ndb_binlog_type)4795 void NDB_SHARE::set_binlog_flags(Ndb_binlog_type ndb_binlog_type) {
4796   DBUG_TRACE;
4797   switch (ndb_binlog_type) {
4798     case NBT_NO_LOGGING:
4799       DBUG_PRINT("info", ("NBT_NO_LOGGING"));
4800       flags |= NDB_SHARE::FLAG_NO_BINLOG;
4801       return;
4802     case NBT_DEFAULT:
4803       DBUG_PRINT("info", ("NBT_DEFAULT"));
4804       if (opt_ndb_log_updated_only) {
4805         flags &= ~NDB_SHARE::FLAG_BINLOG_MODE_FULL;
4806       } else {
4807         flags |= NDB_SHARE::FLAG_BINLOG_MODE_FULL;
4808       }
4809       if (opt_ndb_log_update_as_write) {
4810         flags &= ~NDB_SHARE::FLAG_BINLOG_MODE_USE_UPDATE;
4811       } else {
4812         flags |= NDB_SHARE::FLAG_BINLOG_MODE_USE_UPDATE;
4813       }
4814       if (opt_ndb_log_update_minimal) {
4815         flags |= NDB_SHARE::FLAG_BINLOG_MODE_MINIMAL_UPDATE;
4816       }
4817       break;
4818     case NBT_UPDATED_ONLY:
4819       DBUG_PRINT("info", ("NBT_UPDATED_ONLY"));
4820       flags &= ~NDB_SHARE::FLAG_BINLOG_MODE_FULL;
4821       flags &= ~NDB_SHARE::FLAG_BINLOG_MODE_USE_UPDATE;
4822       break;
4823     case NBT_USE_UPDATE:
4824       DBUG_PRINT("info", ("NBT_USE_UPDATE"));
4825       // fall through
4826     case NBT_UPDATED_ONLY_USE_UPDATE:
4827       DBUG_PRINT("info", ("NBT_UPDATED_ONLY_USE_UPDATE"));
4828       flags &= ~NDB_SHARE::FLAG_BINLOG_MODE_FULL;
4829       flags |= NDB_SHARE::FLAG_BINLOG_MODE_USE_UPDATE;
4830       break;
4831     case NBT_FULL:
4832       DBUG_PRINT("info", ("NBT_FULL"));
4833       flags |= NDB_SHARE::FLAG_BINLOG_MODE_FULL;
4834       flags &= ~NDB_SHARE::FLAG_BINLOG_MODE_USE_UPDATE;
4835       break;
4836     case NBT_FULL_USE_UPDATE:
4837       DBUG_PRINT("info", ("NBT_FULL_USE_UPDATE"));
4838       flags |= NDB_SHARE::FLAG_BINLOG_MODE_FULL;
4839       flags |= NDB_SHARE::FLAG_BINLOG_MODE_USE_UPDATE;
4840       break;
4841     case NBT_UPDATED_ONLY_MINIMAL:
4842       DBUG_PRINT("info", ("NBT_UPDATED_ONLY_MINIMAL"));
4843       flags &= ~NDB_SHARE::FLAG_BINLOG_MODE_FULL;
4844       flags |= NDB_SHARE::FLAG_BINLOG_MODE_USE_UPDATE;
4845       flags |= NDB_SHARE::FLAG_BINLOG_MODE_MINIMAL_UPDATE;
4846       break;
4847     case NBT_UPDATED_FULL_MINIMAL:
4848       DBUG_PRINT("info", ("NBT_UPDATED_FULL_MINIMAL"));
4849       flags |= NDB_SHARE::FLAG_BINLOG_MODE_FULL;
4850       flags |= NDB_SHARE::FLAG_BINLOG_MODE_USE_UPDATE;
4851       flags |= NDB_SHARE::FLAG_BINLOG_MODE_MINIMAL_UPDATE;
4852       break;
4853     default:
4854       return;
4855   }
4856   flags &= ~NDB_SHARE::FLAG_NO_BINLOG;
4857 }
4858 
4859 /*
4860   Ndb_binlog_client::read_replication_info
4861 
4862   This function retrieves the data for the given table
4863   from the ndb_replication table.
4864 
4865   If the table is not found, or the table does not exist,
4866   then defaults are returned.
4867 */
read_replication_info(Ndb * ndb,const char * db,const char * table_name,uint server_id,uint32 * binlog_flags,const st_conflict_fn_def ** conflict_fn,st_conflict_fn_arg * args,uint * num_args)4868 bool Ndb_binlog_client::read_replication_info(
4869     Ndb *ndb, const char *db, const char *table_name, uint server_id,
4870     uint32 *binlog_flags, const st_conflict_fn_def **conflict_fn,
4871     st_conflict_fn_arg *args, uint *num_args) {
4872   DBUG_TRACE;
4873 
4874   /* Override for ndb_apply_status when logging */
4875   if (opt_ndb_log_apply_status) {
4876     if (Ndb_apply_status_table::is_apply_status_table(db, table_name)) {
4877       // Ensure to get all columns from ndb_apply_status updates and that events
4878       // are always logged as WRITES.
4879       ndb_log_info(
4880           "ndb-log-apply-status forcing 'mysql.ndb_apply_status' to FULL "
4881           "USE_WRITE");
4882       *binlog_flags = NBT_FULL;
4883       *conflict_fn = NULL;
4884       *num_args = 0;
4885       return false;
4886     }
4887   }
4888 
4889   Ndb_rep_tab_reader rep_tab_reader;
4890 
4891   int const rc = rep_tab_reader.lookup(ndb, db, table_name, server_id);
4892 
4893   if (rc == 0) {
4894     // lookup() may return a warning although it succeeds
4895     const char *msg = rep_tab_reader.get_warning_message();
4896     if (msg != NULL) {
4897       push_warning_printf(m_thd, Sql_condition::SL_WARNING,
4898                           ER_NDB_REPLICATION_SCHEMA_ERROR,
4899                           ER_THD(m_thd, ER_NDB_REPLICATION_SCHEMA_ERROR), msg);
4900       ndb_log_warning("NDB Binlog: %s", msg);
4901     }
4902   } else {
4903     /* When rep_tab_reader.lookup() returns with non-zero error code,
4904     it must give a warning message describing why it failed*/
4905     const char *msg = rep_tab_reader.get_warning_message();
4906     DBUG_ASSERT(msg);
4907     my_error(ER_NDB_REPLICATION_SCHEMA_ERROR, MYF(0), msg);
4908     ndb_log_warning("NDB Binlog: %s", msg);
4909     return true;
4910   }
4911 
4912   *binlog_flags = rep_tab_reader.get_binlog_flags();
4913   const char *conflict_fn_spec = rep_tab_reader.get_conflict_fn_spec();
4914 
4915   if (conflict_fn_spec != NULL) {
4916     char msgbuf[FN_REFLEN];
4917     if (parse_conflict_fn_spec(conflict_fn_spec, conflict_fn, args, num_args,
4918                                msgbuf, sizeof(msgbuf)) != 0) {
4919       my_error(ER_CONFLICT_FN_PARSE_ERROR, MYF(0), msgbuf);
4920 
4921       /*
4922         Log as well, useful for contexts where the thd's stack of
4923         warnings are ignored
4924       */
4925       ndb_log_warning(
4926           "NDB Slave: Table %s.%s : Parse error on conflict fn : %s", db,
4927           table_name, msgbuf);
4928 
4929       return true;
4930     }
4931   } else {
4932     /* No conflict function specified */
4933     conflict_fn = NULL;
4934     num_args = 0;
4935   }
4936 
4937   return false;
4938 }
4939 
apply_replication_info(Ndb * ndb,NDB_SHARE * share,const NdbDictionary::Table * ndbtab,const st_conflict_fn_def * conflict_fn,const st_conflict_fn_arg * args,uint num_args,uint32 binlog_flags)4940 int Ndb_binlog_client::apply_replication_info(
4941     Ndb *ndb, NDB_SHARE *share, const NdbDictionary::Table *ndbtab,
4942     const st_conflict_fn_def *conflict_fn, const st_conflict_fn_arg *args,
4943     uint num_args, uint32 binlog_flags) {
4944   DBUG_TRACE;
4945   char tmp_buf[FN_REFLEN];
4946 
4947   DBUG_PRINT("info", ("Setting binlog flags to %u", binlog_flags));
4948   share->set_binlog_flags((enum Ndb_binlog_type)binlog_flags);
4949 
4950   if (conflict_fn != NULL) {
4951     if (setup_conflict_fn(ndb, &share->m_cfn_share, share->db,
4952                           share->table_name, share->get_binlog_use_update(),
4953                           ndbtab, tmp_buf, sizeof(tmp_buf), conflict_fn, args,
4954                           num_args) == 0) {
4955       ndb_log_verbose(1, "NDB Slave: %s", tmp_buf);
4956     } else {
4957       /*
4958         Dump setup failure message to error log
4959         for cases where thd warning stack is
4960         ignored
4961       */
4962       ndb_log_warning("NDB Slave: Table %s.%s : %s", share->db,
4963                       share->table_name, tmp_buf);
4964 
4965       push_warning_printf(m_thd, Sql_condition::SL_WARNING,
4966                           ER_CONFLICT_FN_PARSE_ERROR,
4967                           ER_THD(m_thd, ER_CONFLICT_FN_PARSE_ERROR), tmp_buf);
4968 
4969       return -1;
4970     }
4971   } else {
4972     /* No conflict function specified */
4973     slave_reset_conflict_fn(share->m_cfn_share);
4974   }
4975 
4976   return 0;
4977 }
4978 
read_and_apply_replication_info(Ndb * ndb,NDB_SHARE * share,const NdbDictionary::Table * ndbtab,uint server_id)4979 int Ndb_binlog_client::read_and_apply_replication_info(
4980     Ndb *ndb, NDB_SHARE *share, const NdbDictionary::Table *ndbtab,
4981     uint server_id) {
4982   DBUG_TRACE;
4983   uint32 binlog_flags;
4984   const st_conflict_fn_def *conflict_fn = NULL;
4985   st_conflict_fn_arg args[MAX_CONFLICT_ARGS];
4986   uint num_args = MAX_CONFLICT_ARGS;
4987 
4988   if (read_replication_info(ndb, share->db, share->table_name, server_id,
4989                             &binlog_flags, &conflict_fn, args, &num_args) ||
4990       apply_replication_info(ndb, share, ndbtab, conflict_fn, args, num_args,
4991                              binlog_flags)) {
4992     return -1;
4993   }
4994 
4995   return 0;
4996 }
4997 
4998 /*
4999   Common function for setting up everything for logging a table at
5000   create/discover.
5001 */
ndbcluster_setup_binlog_for_share(THD * thd,Ndb * ndb,NDB_SHARE * share,const dd::Table * table_def)5002 static int ndbcluster_setup_binlog_for_share(THD *thd, Ndb *ndb,
5003                                              NDB_SHARE *share,
5004                                              const dd::Table *table_def) {
5005   DBUG_TRACE;
5006 
5007   // This function should not be used to setup binlogging
5008   // of tables with temporary names.
5009   DBUG_ASSERT(!ndb_name_is_temp(share->table_name));
5010 
5011   Mutex_guard share_g(share->mutex);
5012   if (share->op != 0) {
5013     DBUG_PRINT("info", ("binlogging already setup"));
5014     return 0;
5015   }
5016 
5017   Ndb_binlog_client binlog_client(thd, share->db, share->table_name);
5018 
5019   Ndb_table_guard ndbtab_g(ndb, share->db, share->table_name);
5020   const NDBTAB *ndbtab = ndbtab_g.get_table();
5021   if (ndbtab == 0) {
5022     const NdbError ndb_error = ndb->getDictionary()->getNdbError();
5023     ndb_log_verbose(1,
5024                     "NDB Binlog: Failed to open table '%s' from NDB, "
5025                     "error: '%d - %s'",
5026                     share->key_string(), ndb_error.code, ndb_error.message);
5027     return -1;  // error
5028   }
5029 
5030   if (binlog_client.read_and_apply_replication_info(ndb, share, ndbtab,
5031                                                     ::server_id)) {
5032     ndb_log_error(
5033         "NDB Binlog: Failed to read and apply replication "
5034         "info for table '%s'",
5035         share->key_string());
5036     return -1;
5037   }
5038 
5039   if (binlog_client.table_should_have_event(share, ndbtab)) {
5040     // Check if the event already exists in NDB, otherwise create it
5041     if (!binlog_client.event_exists_for_table(ndb, share)) {
5042       // The event din't exist, create the event in NDB
5043       if (binlog_client.create_event(ndb, ndbtab, share)) {
5044         // Failed to create event
5045         return -1;
5046       }
5047     }
5048 
5049     if (binlog_client.table_should_have_event_op(share)) {
5050       // Create the NDB event operation on the event
5051       Ndb_event_data *event_data;
5052       if (!binlog_client.create_event_data(share, table_def, &event_data) ||
5053           binlog_client.create_event_op(share, ndbtab, event_data)) {
5054         // Failed to create event data or event operation
5055         return -1;
5056       }
5057     }
5058   }
5059 
5060   return 0;
5061 }
5062 
ndbcluster_binlog_setup_table(THD * thd,Ndb * ndb,const char * db,const char * table_name,const dd::Table * table_def)5063 int ndbcluster_binlog_setup_table(THD *thd, Ndb *ndb, const char *db,
5064                                   const char *table_name,
5065                                   const dd::Table *table_def) {
5066   DBUG_TRACE;
5067   DBUG_PRINT("enter", ("db: '%s', table_name: '%s'", db, table_name));
5068   DBUG_ASSERT(table_def);
5069 
5070   DBUG_ASSERT(!ndb_name_is_blob_prefix(table_name));
5071 
5072   // Create key for ndbcluster_open_tables
5073   char key[FN_REFLEN + 1];
5074   {
5075     char *end = key + build_table_filename(key, sizeof(key) - 1, db, "", "", 0);
5076     end += tablename_to_filename(table_name, end,
5077                                  (uint)(sizeof(key) - (end - key)));
5078   }
5079 
5080   mysql_mutex_lock(&ndbcluster_mutex);
5081 
5082   // Check if NDB_SHARE for this table already exist
5083   NDB_SHARE *share =
5084       NDB_SHARE::acquire_reference_by_key_have_lock(key, "create_binlog_setup");
5085   if (share == nullptr) {
5086     // NDB_SHARE didn't exist, the normal case, try to create it
5087     share = NDB_SHARE::create_and_acquire_reference(key, "create_binlog_setup");
5088     if (share == nullptr) {
5089       // Could not create the NDB_SHARE. Unlikely, catch in debug
5090       DBUG_ASSERT(false);
5091       return -1;
5092     }
5093   }
5094   mysql_mutex_unlock(&ndbcluster_mutex);
5095 
5096   // Before 'schema_dist_is_ready', Thd_ndb::ALLOW_BINLOG_SETUP is required
5097   int ret = 0;
5098   if (Ndb_schema_dist::is_ready(thd) ||
5099       get_thd_ndb(thd)->check_option(Thd_ndb::ALLOW_BINLOG_SETUP)) {
5100     ret = ndbcluster_setup_binlog_for_share(thd, ndb, share, table_def);
5101   }
5102 
5103   NDB_SHARE::release_reference(share, "create_binlog_setup");  // temporary ref.
5104 
5105 #ifndef DBUG_OFF
5106   // Force failure of setting up binlogging of a user table
5107   if (DBUG_EVALUATE_IF("ndb_binlog_fail_setup", true, false) &&
5108       !Ndb_schema_dist_client::is_schema_dist_table(db, table_name) &&
5109       !Ndb_schema_dist_client::is_schema_dist_result_table(db, table_name) &&
5110       !Ndb_apply_status_table::is_apply_status_table(db, table_name) &&
5111       !(!strcmp("test", db) && !strcmp(table_name, "check_not_readonly"))) {
5112     ret = -1;
5113   }
5114 #endif
5115 
5116   /*
5117    * Handle failure of setting up binlogging of a table
5118    */
5119   if (ret != 0) {
5120     ndb_log_error("Failed to setup binlogging for table '%s.%s'", db,
5121                   table_name);
5122     ndbcluster_handle_incomplete_binlog_setup();
5123   }
5124 
5125   return ret;
5126 }
5127 
5128 extern void kill_mysql(void);
5129 
ndbcluster_handle_incomplete_binlog_setup()5130 void ndbcluster_handle_incomplete_binlog_setup() {
5131   ndb_log_error("NDB Binlog: ndbcluster_handle_incomplete_binlog_setup");
5132   if (opt_ndb_log_fail_terminate) kill_mysql();
5133 }
5134 
create_event(Ndb * ndb,const NdbDictionary::Table * ndbtab,const NDB_SHARE * share)5135 int Ndb_binlog_client::create_event(Ndb *ndb,
5136                                     const NdbDictionary::Table *ndbtab,
5137                                     const NDB_SHARE *share) {
5138   DBUG_TRACE;
5139   DBUG_PRINT("enter", ("table: '%s', version: %d", ndbtab->getName(),
5140                        ndbtab->getObjectVersion()));
5141   DBUG_PRINT("enter", ("share->key: '%s'", share->key_string()));
5142   DBUG_ASSERT(share);
5143 
5144   // Never create event on table with temporary name
5145   DBUG_ASSERT(!ndb_name_is_temp(ndbtab->getName()));
5146 
5147   // Never create event on the blob table(s)
5148   DBUG_ASSERT(!ndb_name_is_blob_prefix(ndbtab->getName()));
5149 
5150   std::string event_name =
5151       event_name_for_table(m_dbname, m_tabname, share->get_binlog_full());
5152 
5153   ndb->setDatabaseName(share->db);
5154   NdbDictionary::Dictionary *dict = ndb->getDictionary();
5155   NDBEVENT my_event(event_name.c_str());
5156   my_event.setTable(*ndbtab);
5157   my_event.addTableEvent(NDBEVENT::TE_ALL);
5158   if (ndb_table_has_hidden_pk(ndbtab)) {
5159     /* Hidden primary key, subscribe for all attributes */
5160     my_event.setReport(
5161         (NDBEVENT::EventReport)(NDBEVENT::ER_ALL | NDBEVENT::ER_DDL));
5162     DBUG_PRINT("info", ("subscription all"));
5163   } else {
5164     if (Ndb_schema_dist_client::is_schema_dist_table(share->db,
5165                                                      share->table_name)) {
5166       /**
5167        * ER_SUBSCRIBE is only needed on schema distribution table
5168        */
5169       my_event.setReport((NDBEVENT::EventReport)(
5170           NDBEVENT::ER_ALL | NDBEVENT::ER_SUBSCRIBE | NDBEVENT::ER_DDL));
5171       DBUG_PRINT("info", ("subscription all and subscribe"));
5172     } else if (Ndb_schema_dist_client::is_schema_dist_result_table(
5173                    share->db, share->table_name)) {
5174       my_event.setReport(
5175           (NDBEVENT::EventReport)(NDBEVENT::ER_ALL | NDBEVENT::ER_DDL));
5176       DBUG_PRINT("info", ("subscription all"));
5177     } else {
5178       if (share->get_binlog_full()) {
5179         my_event.setReport(
5180             (NDBEVENT::EventReport)(NDBEVENT::ER_ALL | NDBEVENT::ER_DDL));
5181         DBUG_PRINT("info", ("subscription all"));
5182       } else {
5183         my_event.setReport(
5184             (NDBEVENT::EventReport)(NDBEVENT::ER_UPDATED | NDBEVENT::ER_DDL));
5185         DBUG_PRINT("info", ("subscription only updated"));
5186       }
5187     }
5188   }
5189   if (ndb_table_has_blobs(ndbtab)) my_event.mergeEvents(true);
5190 
5191   /* add all columns to the event */
5192   const int n_cols = ndbtab->getNoOfColumns();
5193   for (int a = 0; a < n_cols; a++) my_event.addEventColumn(a);
5194 
5195   if (dict->createEvent(my_event))  // Add event to database
5196   {
5197     if (dict->getNdbError().classification != NdbError::SchemaObjectExists) {
5198       // Failed to create event, log warning
5199       log_warning(ER_GET_ERRMSG,
5200                   "Unable to create event in database. "
5201                   "Event: %s  Error Code: %d  Message: %s",
5202                   event_name.c_str(), dict->getNdbError().code,
5203                   dict->getNdbError().message);
5204       return -1;
5205     }
5206 
5207     /*
5208       try retrieving the event, if table version/id matches, we will get
5209       a valid event.  Otherwise we have an old event from before
5210     */
5211     const NDBEVENT *ev;
5212     if ((ev = dict->getEvent(event_name.c_str()))) {
5213       delete ev;
5214       return 0;
5215     }
5216 
5217     // Old event from before; an error, but try to correct it
5218     if (dict->getNdbError().code == NDB_INVALID_SCHEMA_OBJECT &&
5219         dict->dropEvent(my_event.getName(), 1)) {
5220       // Failed to drop the old event, log warning
5221       log_warning(ER_GET_ERRMSG,
5222                   "Unable to create event in database. "
5223                   "Attempt to correct with drop failed. "
5224                   "Event: %s Error Code: %d Message: %s",
5225                   event_name.c_str(), dict->getNdbError().code,
5226                   dict->getNdbError().message);
5227       return -1;
5228     }
5229 
5230     // Try to add the event again
5231     if (dict->createEvent(my_event)) {
5232       // Still failed to create the event, log warning
5233       log_warning(ER_GET_ERRMSG,
5234                   "Unable to create event in database. "
5235                   "Attempt to correct with drop ok, but create failed. "
5236                   "Event: %s Error Code: %d Message: %s",
5237                   event_name.c_str(), dict->getNdbError().code,
5238                   dict->getNdbError().message);
5239       return -1;
5240     }
5241   }
5242 
5243   ndb_log_verbose(1, "Created event '%s' for table '%s.%s' in NDB",
5244                   event_name.c_str(), m_dbname, m_tabname);
5245 
5246   return 0;
5247 }
5248 
is_ndb_compatible_type(Field * field)5249 inline int is_ndb_compatible_type(Field *field) {
5250   return !field->is_flag_set(BLOB_FLAG) && field->type() != MYSQL_TYPE_BIT &&
5251          field->pack_length() != 0;
5252 }
5253 
5254 /*
5255   - create NdbEventOperation for receiving log events
5256   - setup ndb recattrs for reception of log event data
5257   - "start" the event operation
5258 
5259   used at create/discover of tables
5260 */
create_event_op(NDB_SHARE * share,const NdbDictionary::Table * ndbtab,const Ndb_event_data * event_data)5261 int Ndb_binlog_client::create_event_op(NDB_SHARE *share,
5262                                        const NdbDictionary::Table *ndbtab,
5263                                        const Ndb_event_data *event_data) {
5264   /*
5265     we are in either create table or rename table so table should be
5266     locked, hence we can work with the share without locks
5267   */
5268 
5269   DBUG_TRACE;
5270   DBUG_PRINT("enter", ("table: '%s', share->key: '%s'", ndbtab->getName(),
5271                        share->key_string()));
5272   DBUG_ASSERT(share);
5273   DBUG_ASSERT(event_data);
5274 
5275   // Never create event op on table with temporary name
5276   DBUG_ASSERT(!ndb_name_is_temp(ndbtab->getName()));
5277 
5278   // Never create event op on the blob table(s)
5279   DBUG_ASSERT(!ndb_name_is_blob_prefix(ndbtab->getName()));
5280 
5281   // Schema dist tables need special processing
5282   const bool is_schema_dist_setup =
5283       Ndb_schema_dist_client::is_schema_dist_table(share->db,
5284                                                    share->table_name) ||
5285       Ndb_schema_dist_client::is_schema_dist_result_table(share->db,
5286                                                           share->table_name);
5287 
5288   // Check if this is the event operation on mysql.ndb_apply_status
5289   // as it need special processing
5290   const bool do_ndb_apply_status_share =
5291       Ndb_apply_status_table::is_apply_status_table(share->db,
5292                                                     share->table_name);
5293 
5294   std::string event_name =
5295       event_name_for_table(m_dbname, m_tabname, share->get_binlog_full());
5296 
5297   // There should be no NdbEventOperation assigned yet
5298   DBUG_ASSERT(!share->op);
5299 
5300   TABLE *table = event_data->shadow_table;
5301 
5302   int retries = 100;
5303   int retry_sleep = 0;
5304   while (1) {
5305     if (retry_sleep > 0) {
5306       ndb_retry_sleep(retry_sleep);
5307     }
5308     Mutex_guard injector_mutex_g(injector_event_mutex);
5309     Ndb *ndb = injector_ndb;
5310     if (is_schema_dist_setup) ndb = schema_ndb;
5311 
5312     if (ndb == NULL) return -1;
5313 
5314     NdbEventOperation *op;
5315     if (is_schema_dist_setup)
5316       op = ndb->createEventOperation(event_name.c_str());
5317     else {
5318       // set injector_ndb database/schema from table internal name
5319       int ret = ndb->setDatabaseAndSchemaName(ndbtab);
5320       ndbcluster::ndbrequire(ret == 0);
5321       op = ndb->createEventOperation(event_name.c_str());
5322       // reset to catch errors
5323       ndb->setDatabaseName("");
5324     }
5325     if (!op) {
5326       const NdbError &ndb_err = ndb->getNdbError();
5327       if (ndb_err.code == 4710) {
5328         // Error code 4710 is returned when table or event is not found. The
5329         // generic error message for 4710 says "Event not found" but should
5330         // be reported as "table not found"
5331         log_warning(ER_GET_ERRMSG,
5332                     "Failed to create event operation on '%s', "
5333                     "table '%s' not found",
5334                     event_name.c_str(), table->s->table_name.str);
5335         return -1;
5336       }
5337       log_warning(ER_GET_ERRMSG,
5338                   "Failed to create event operation on '%s', error: %d - %s",
5339                   event_name.c_str(), ndb_err.code, ndb_err.message);
5340       return -1;
5341     }
5342 
5343     if (ndb_table_has_blobs(ndbtab))
5344       op->mergeEvents(true);  // currently not inherited from event
5345 
5346     const uint n_columns = ndbtab->getNoOfColumns();
5347     const uint n_stored_fields = Ndb_table_map::num_stored_fields(table);
5348     const uint val_length = sizeof(NdbValue) * n_columns;
5349 
5350     /*
5351        Allocate memory globally so it can be reused after online alter table
5352     */
5353     if (my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME),
5354                         &event_data->ndb_value[0], val_length,
5355                         &event_data->ndb_value[1], val_length, NULL) == 0) {
5356       log_warning(ER_GET_ERRMSG,
5357                   "Failed to allocate records for event operation");
5358       return -1;
5359     }
5360 
5361     Ndb_table_map map(table);
5362     for (uint j = 0; j < n_columns; j++) {
5363       const char *col_name = ndbtab->getColumn(j)->getName();
5364       NdbValue attr0, attr1;
5365       if (j < n_stored_fields) {
5366         Field *f = table->field[map.get_field_for_column(j)];
5367         if (is_ndb_compatible_type(f)) {
5368           DBUG_PRINT("info", ("%s compatible", col_name));
5369           attr0.rec = op->getValue(col_name, (char *)f->field_ptr());
5370           attr1.rec =
5371               op->getPreValue(col_name, (f->field_ptr() - table->record[0]) +
5372                                             (char *)table->record[1]);
5373         } else if (!f->is_flag_set(BLOB_FLAG)) {
5374           DBUG_PRINT("info", ("%s non compatible", col_name));
5375           attr0.rec = op->getValue(col_name);
5376           attr1.rec = op->getPreValue(col_name);
5377         } else {
5378           DBUG_PRINT("info", ("%s blob", col_name));
5379           DBUG_ASSERT(ndb_table_has_blobs(ndbtab));
5380           attr0.blob = op->getBlobHandle(col_name);
5381           attr1.blob = op->getPreBlobHandle(col_name);
5382           if (attr0.blob == NULL || attr1.blob == NULL) {
5383             log_warning(ER_GET_ERRMSG,
5384                         "Failed to cretate NdbEventOperation on '%s', "
5385                         "blob field %u handles failed, error: %d - %s",
5386                         event_name.c_str(), j, op->getNdbError().code,
5387                         op->getNdbError().message);
5388             ndb->dropEventOperation(op);
5389             return -1;
5390           }
5391         }
5392       } else {
5393         DBUG_PRINT("info", ("%s hidden key", col_name));
5394         attr0.rec = op->getValue(col_name);
5395         attr1.rec = op->getPreValue(col_name);
5396       }
5397       event_data->ndb_value[0][j].ptr = attr0.ptr;
5398       event_data->ndb_value[1][j].ptr = attr1.ptr;
5399       DBUG_PRINT("info",
5400                  ("&event_data->ndb_value[0][%d]: 0x%lx  "
5401                   "event_data->ndb_value[0][%d]: 0x%lx",
5402                   j, (long)&event_data->ndb_value[0][j], j, (long)attr0.ptr));
5403       DBUG_PRINT("info",
5404                  ("&event_data->ndb_value[1][%d]: 0x%lx  "
5405                   "event_data->ndb_value[1][%d]: 0x%lx",
5406                   j, (long)&event_data->ndb_value[0][j], j, (long)attr1.ptr));
5407     }
5408     op->setCustomData(
5409         const_cast<Ndb_event_data *>(event_data));  // set before execute
5410     share->op = op;                                 // assign op in NDB_SHARE
5411 
5412     /* Check if user explicitly requires monitoring of empty updates */
5413     if (opt_ndb_log_empty_update) op->setAllowEmptyUpdate(true);
5414 
5415     if (op->execute()) {
5416       // Failed to create the NdbEventOperation
5417       const NdbError &ndb_err = op->getNdbError();
5418       share->op = NULL;
5419       retries--;
5420       if (ndb_err.status != NdbError::TemporaryError && ndb_err.code != 1407) {
5421         // Don't retry after these errors
5422         retries = 0;
5423       }
5424       if (retries == 0) {
5425         log_warning(ER_GET_ERRMSG,
5426                     "Failed to activate NdbEventOperation for '%s', "
5427                     "error: %d - %s",
5428                     event_name.c_str(), ndb_err.code, ndb_err.message);
5429       }
5430       op->setCustomData(NULL);
5431       ndb->dropEventOperation(op);
5432       if (retries && !m_thd->killed) {
5433         // fairly high retry sleep, temporary error on schema operation can
5434         // take some time to resolve
5435         retry_sleep = 100;  // milliseconds
5436         continue;
5437       }
5438       // Delete the event data, caller should create new before calling
5439       // this function again
5440       Ndb_event_data::destroy(event_data);
5441       return -1;
5442     }
5443     break;
5444   }
5445 
5446   /* ndb_share reference binlog */
5447   NDB_SHARE::acquire_reference_on_existing(share, "binlog");
5448 
5449   if (do_ndb_apply_status_share) {
5450     ndb_apply_status_share = NDB_SHARE::acquire_reference_on_existing(
5451         share, "ndb_apply_status_share");
5452 
5453     DBUG_ASSERT(get_thd_ndb(m_thd)->check_option(Thd_ndb::ALLOW_BINLOG_SETUP));
5454   }
5455 
5456   ndb_log_verbose(1, "NDB Binlog: logging %s (%s,%s)", share->key_string(),
5457                   share->get_binlog_full() ? "FULL" : "UPDATED",
5458                   share->get_binlog_use_update() ? "USE_UPDATE" : "USE_WRITE");
5459   return 0;
5460 }
5461 
drop_events_for_table(THD * thd,Ndb * ndb,const char * db,const char * table_name)5462 void Ndb_binlog_client::drop_events_for_table(THD *thd, Ndb *ndb,
5463                                               const char *db,
5464                                               const char *table_name) {
5465   DBUG_TRACE;
5466   DBUG_PRINT("enter", ("db: %s, tabname: %s", db, table_name));
5467 
5468   if (DBUG_EVALUATE_IF("ndb_skip_drop_event", true, false)) {
5469     ndb_log_verbose(1, "NDB Binlog: skipping drop event on '%s.%s'", db,
5470                     table_name);
5471     return;
5472   }
5473 
5474   for (uint i = 0; i < 2; i++) {
5475     std::string event_name = event_name_for_table(db, table_name, i);
5476 
5477     NdbDictionary::Dictionary *dict = ndb->getDictionary();
5478     if (dict->dropEvent(event_name.c_str()) == 0) {
5479       // Event dropped successfully
5480       continue;
5481     }
5482 
5483     if (dict->getNdbError().code == 4710 || dict->getNdbError().code == 1419) {
5484       // Failed to drop event but return code says it was
5485       // because the event didn't exist, ignore
5486       continue;
5487     }
5488 
5489     /* Failed to drop event, push warning and write to log */
5490     push_warning_printf(thd, Sql_condition::SL_WARNING, ER_GET_ERRMSG,
5491                         ER_THD(thd, ER_GET_ERRMSG), dict->getNdbError().code,
5492                         dict->getNdbError().message, "NDB");
5493 
5494     ndb_log_error(
5495         "NDB Binlog: Unable to drop event for '%s.%s' from NDB, "
5496         "event_name: '%s' error: '%d - %s'",
5497         db, table_name, event_name.c_str(), dict->getNdbError().code,
5498         dict->getNdbError().message);
5499   }
5500 }
5501 
5502 /*
5503   Wait for the binlog thread to drop it's NdbEventOperations
5504   during a drop table
5505 
5506   Syncronized drop between client and injector thread is
5507   neccessary in order to maintain ordering in the binlog,
5508   such that the drop occurs _after_ any inserts/updates/deletes.
5509 
5510   Also the injector thread need to be given time to detect the
5511   drop and release it's resources allocated in the NDB_SHARE.
5512 */
5513 
ndbcluster_binlog_wait_synch_drop_table(THD * thd,NDB_SHARE * share)5514 int ndbcluster_binlog_wait_synch_drop_table(THD *thd, NDB_SHARE *share) {
5515   DBUG_TRACE;
5516   DBUG_ASSERT(share);
5517 
5518   const char *save_proc_info = thd->proc_info;
5519   thd->proc_info = "Syncing ndb table schema operation and binlog";
5520 
5521   int max_timeout = DEFAULT_SYNC_TIMEOUT;
5522 
5523   mysql_mutex_lock(&share->mutex);
5524   while (share->op) {
5525     struct timespec abstime;
5526     set_timespec(&abstime, 1);
5527 
5528     // Unlock the share and wait for injector to signal that
5529     // something has happened. (NOTE! convoluted in order to
5530     // only use injector_data_cond with injector_data_mutex)
5531     mysql_mutex_unlock(&share->mutex);
5532     mysql_mutex_lock(&injector_data_mutex);
5533     const int ret = mysql_cond_timedwait(&injector_data_cond,
5534                                          &injector_data_mutex, &abstime);
5535     mysql_mutex_unlock(&injector_data_mutex);
5536     mysql_mutex_lock(&share->mutex);
5537 
5538     if (thd->killed || share->op == 0) break;
5539     if (ret) {
5540       max_timeout--;
5541       if (max_timeout == 0) {
5542         ndb_log_error("%s, delete table timed out. Ignoring...",
5543                       share->key_string());
5544         DBUG_ASSERT(false);
5545         break;
5546       }
5547       if (ndb_log_get_verbose_level())
5548         ndb_report_waiting("delete table", max_timeout, "delete table",
5549                            share->key_string());
5550     }
5551   }
5552   mysql_mutex_unlock(&share->mutex);
5553 
5554   thd->proc_info = save_proc_info;
5555 
5556   return 0;
5557 }
5558 
ndbcluster_binlog_validate_sync_blacklist(THD * thd)5559 void ndbcluster_binlog_validate_sync_blacklist(THD *thd) {
5560   ndb_binlog_thread.validate_sync_blacklist(thd);
5561 }
5562 
ndbcluster_binlog_validate_sync_retry_list(THD * thd)5563 void ndbcluster_binlog_validate_sync_retry_list(THD *thd) {
5564   ndb_binlog_thread.validate_sync_retry_list(thd);
5565 }
5566 
ndbcluster_binlog_check_table_async(const std::string & db_name,const std::string & table_name)5567 bool ndbcluster_binlog_check_table_async(const std::string &db_name,
5568                                          const std::string &table_name) {
5569   if (db_name.empty()) {
5570     ndb_log_error("Database name of object to be synchronized not set");
5571     return false;
5572   }
5573 
5574   if (table_name.empty()) {
5575     ndb_log_error("Table name of object to be synchronized not set");
5576     return false;
5577   }
5578 
5579   return ndb_binlog_thread.add_table_to_check(db_name, table_name);
5580 }
5581 
ndbcluster_binlog_check_logfile_group_async(const std::string & lfg_name)5582 bool ndbcluster_binlog_check_logfile_group_async(const std::string &lfg_name) {
5583   if (lfg_name.empty()) {
5584     ndb_log_error("Name of logfile group to be synchronized not set");
5585     return false;
5586   }
5587 
5588   return ndb_binlog_thread.add_logfile_group_to_check(lfg_name);
5589 }
5590 
ndbcluster_binlog_check_tablespace_async(const std::string & tablespace_name)5591 bool ndbcluster_binlog_check_tablespace_async(
5592     const std::string &tablespace_name) {
5593   if (tablespace_name.empty()) {
5594     ndb_log_error("Name of tablespace to be synchronized not set");
5595     return false;
5596   }
5597 
5598   return ndb_binlog_thread.add_tablespace_to_check(tablespace_name);
5599 }
5600 
ndbcluster_binlog_check_schema_async(const std::string & schema_name)5601 bool ndbcluster_binlog_check_schema_async(const std::string &schema_name) {
5602   if (schema_name.empty()) {
5603     ndb_log_error("Name of schema to be synchronized not set");
5604     return false;
5605   }
5606   return ndb_binlog_thread.add_schema_to_check(schema_name);
5607 }
5608 
ndbcluster_binlog_retrieve_sync_blacklist(Ndb_sync_excluded_objects_table * excluded_table)5609 void ndbcluster_binlog_retrieve_sync_blacklist(
5610     Ndb_sync_excluded_objects_table *excluded_table) {
5611   ndb_binlog_thread.retrieve_sync_blacklist(excluded_table);
5612 }
5613 
ndbcluster_binlog_get_sync_blacklist_count()5614 unsigned int ndbcluster_binlog_get_sync_blacklist_count() {
5615   return ndb_binlog_thread.get_sync_blacklist_count();
5616 }
5617 
ndbcluster_binlog_retrieve_sync_pending_objects(Ndb_sync_pending_objects_table * pending_table)5618 void ndbcluster_binlog_retrieve_sync_pending_objects(
5619     Ndb_sync_pending_objects_table *pending_table) {
5620   ndb_binlog_thread.retrieve_sync_pending_objects(pending_table);
5621 }
5622 
ndbcluster_binlog_get_sync_pending_objects_count()5623 unsigned int ndbcluster_binlog_get_sync_pending_objects_count() {
5624   return ndb_binlog_thread.get_sync_pending_objects_count();
5625 }
5626 
5627 /********************************************************************
5628   Internal helper functions for differentd events from the stoarage nodes
5629   used by the ndb injector thread
5630 ********************************************************************/
5631 
5632 /*
5633   Unpack a record read from NDB
5634 
5635   SYNOPSIS
5636     ndb_unpack_record()
5637     buf                 Buffer to store read row
5638 
5639   NOTE
5640     The data for each row is read directly into the
5641     destination buffer. This function is primarily
5642     called in order to check if any fields should be
5643     set to null.
5644 */
5645 
ndb_unpack_record(TABLE * table,NdbValue * value,MY_BITMAP * defined,uchar * buf)5646 static void ndb_unpack_record(TABLE *table, NdbValue *value, MY_BITMAP *defined,
5647                               uchar *buf) {
5648   Field **p_field = table->field, *field = *p_field;
5649   ptrdiff_t row_offset = (ptrdiff_t)(buf - table->record[0]);
5650   my_bitmap_map *old_map = dbug_tmp_use_all_columns(table, table->write_set);
5651   DBUG_TRACE;
5652 
5653   /*
5654     Set the filler bits of the null byte, since they are
5655     not touched in the code below.
5656 
5657     The filler bits are the MSBs in the last null byte
5658   */
5659   if (table->s->null_bytes > 0)
5660     buf[table->s->null_bytes - 1] |= 256U - (1U << table->s->last_null_bit_pos);
5661   /*
5662     Set null flag(s)
5663   */
5664   for (; field; p_field++, field = *p_field) {
5665     if (field->is_virtual_gcol()) {
5666       if (field->is_flag_set(BLOB_FLAG)) {
5667         /**
5668          * Valgrind shows Server binlog code uses length
5669          * of virtual blob fields for allocation decisions
5670          * even when the blob is not read
5671          */
5672         Field_blob *field_blob = (Field_blob *)field;
5673         DBUG_PRINT("info", ("[%u] is virtual blob, setting length 0",
5674                             field->field_index()));
5675         Uint32 zerolen = 0;
5676         field_blob->set_ptr((uchar *)&zerolen, NULL);
5677       }
5678 
5679       continue;
5680     }
5681 
5682     field->set_notnull(row_offset);
5683     if ((*value).ptr) {
5684       if (!field->is_flag_set(BLOB_FLAG)) {
5685         int is_null = (*value).rec->isNULL();
5686         if (is_null) {
5687           if (is_null > 0) {
5688             DBUG_PRINT("info", ("[%u] NULL", field->field_index()));
5689             field->set_null(row_offset);
5690           } else {
5691             DBUG_PRINT("info", ("[%u] UNDEFINED", field->field_index()));
5692             bitmap_clear_bit(defined, field->field_index());
5693           }
5694         } else if (field->type() == MYSQL_TYPE_BIT) {
5695           Field_bit *field_bit = static_cast<Field_bit *>(field);
5696 
5697           /*
5698             Move internal field pointer to point to 'buf'.  Calling
5699             the correct member function directly since we know the
5700             type of the object.
5701            */
5702           field_bit->Field_bit::move_field_offset(row_offset);
5703           if (field->pack_length() < 5) {
5704             DBUG_PRINT("info",
5705                        ("bit field H'%.8X", (*value).rec->u_32_value()));
5706             field_bit->Field_bit::store((longlong)(*value).rec->u_32_value(),
5707                                         true);
5708           } else {
5709             DBUG_PRINT("info",
5710                        ("bit field H'%.8X%.8X", *(Uint32 *)(*value).rec->aRef(),
5711                         *((Uint32 *)(*value).rec->aRef() + 1)));
5712 #ifdef WORDS_BIGENDIAN
5713             /* lsw is stored first */
5714             Uint32 *buf = (Uint32 *)(*value).rec->aRef();
5715             field_bit->Field_bit::store(
5716                 (((longlong)*buf) & 0x00000000FFFFFFFFLL) |
5717                     ((((longlong) * (buf + 1)) << 32) & 0xFFFFFFFF00000000LL),
5718                 true);
5719 #else
5720             field_bit->Field_bit::store((longlong)(*value).rec->u_64_value(),
5721                                         true);
5722 #endif
5723           }
5724           /*
5725             Move back internal field pointer to point to original
5726             value (usually record[0]).
5727            */
5728           field_bit->Field_bit::move_field_offset(-row_offset);
5729           DBUG_PRINT("info",
5730                      ("[%u] SET", (*value).rec->getColumn()->getColumnNo()));
5731           DBUG_DUMP("info", field->field_ptr(), field->pack_length());
5732         } else {
5733           DBUG_ASSERT(
5734               !strcmp((*value).rec->getColumn()->getName(), field->field_name));
5735           DBUG_PRINT("info",
5736                      ("[%u] SET", (*value).rec->getColumn()->getColumnNo()));
5737           DBUG_DUMP("info", field->field_ptr(), field->pack_length());
5738         }
5739       } else {
5740         NdbBlob *ndb_blob = (*value).blob;
5741         const uint field_no = field->field_index();
5742         int isNull;
5743         ndb_blob->getDefined(isNull);
5744         if (isNull == 1) {
5745           DBUG_PRINT("info", ("[%u] NULL", field_no));
5746           field->set_null(row_offset);
5747         } else if (isNull == -1) {
5748           DBUG_PRINT("info", ("[%u] UNDEFINED", field_no));
5749           bitmap_clear_bit(defined, field_no);
5750         } else {
5751 #ifndef DBUG_OFF
5752           // pointer vas set in get_ndb_blobs_value
5753           Field_blob *field_blob = (Field_blob *)field;
5754           const uchar *ptr = field_blob->get_blob_data(row_offset);
5755           uint32 len = field_blob->get_length(row_offset);
5756           DBUG_PRINT("info", ("[%u] SET ptr: 0x%lx  len: %u", field_no,
5757                               (long)ptr, len));
5758 #endif
5759         }
5760       }       // else
5761     }         // if ((*value).ptr)
5762     value++;  // this field was not virtual
5763   }           // for()
5764   dbug_tmp_restore_column_map(table->write_set, old_map);
5765 }
5766 
5767 /*
5768   Handle error states on events from the storage nodes
5769 */
handle_error(NdbEventOperation * pOp)5770 static int handle_error(NdbEventOperation *pOp) {
5771   Ndb_event_data *event_data = (Ndb_event_data *)pOp->getCustomData();
5772   NDB_SHARE *share = event_data->share;
5773   DBUG_TRACE;
5774 
5775   ndb_log_error("NDB Binlog: unhandled error %d for table %s", pOp->hasError(),
5776                 share->key_string());
5777   pOp->clearError();
5778   return 0;
5779 }
5780 
5781 /*
5782   Handle _non_ data events from the storage nodes
5783 */
5784 
handle_non_data_event(THD * thd,NdbEventOperation * pOp,ndb_binlog_index_row & row)5785 static void handle_non_data_event(THD *thd, NdbEventOperation *pOp,
5786                                   ndb_binlog_index_row &row) {
5787   const Ndb_event_data *event_data =
5788       static_cast<const Ndb_event_data *>(pOp->getCustomData());
5789   NDB_SHARE *share = event_data->share;
5790   const NDBEVENT::TableEvent type = pOp->getEventType();
5791 
5792   DBUG_TRACE;
5793   DBUG_PRINT("enter",
5794              ("pOp: %p, event_data: %p, share: %p", pOp, event_data, share));
5795   DBUG_PRINT("enter", ("type: %d", type));
5796 
5797   if (type == NDBEVENT::TE_DROP || type == NDBEVENT::TE_ALTER) {
5798     // Count schema events
5799     row.n_schemaops++;
5800   }
5801 
5802   switch (type) {
5803     case NDBEVENT::TE_CLUSTER_FAILURE:
5804       ndb_log_verbose(1, "NDB Binlog: cluster failure for %s at epoch %u/%u.",
5805                       share->key_string(), (uint)(pOp->getGCI() >> 32),
5806                       (uint)(pOp->getGCI()));
5807       // fallthrough
5808     case NDBEVENT::TE_DROP:
5809       if (ndb_apply_status_share == share) {
5810         if (ndb_binlog_tables_inited && ndb_binlog_running)
5811           ndb_log_verbose(1, "NDB Binlog: util tables need to reinitialize");
5812 
5813         /* release the ndb_apply_status_share */
5814         NDB_SHARE::release_reference(ndb_apply_status_share,
5815                                      "ndb_apply_status_share");
5816         ndb_apply_status_share = NULL;
5817 
5818         Mutex_guard injector_g(injector_data_mutex);
5819         ndb_binlog_tables_inited = false;
5820       }
5821 
5822       ndbcluster_binlog_event_operation_teardown(thd, injector_ndb, pOp);
5823       break;
5824 
5825     case NDBEVENT::TE_ALTER:
5826       DBUG_PRINT("info", ("TE_ALTER"));
5827       break;
5828 
5829     case NDBEVENT::TE_NODE_FAILURE:
5830     case NDBEVENT::TE_SUBSCRIBE:
5831     case NDBEVENT::TE_UNSUBSCRIBE:
5832       /* ignore */
5833       break;
5834 
5835     default:
5836       ndb_log_error(
5837           "NDB Binlog: unknown non data event %d for %s. "
5838           "Ignoring...",
5839           (unsigned)type, share->key_string());
5840       break;
5841   }
5842 }
5843 
5844 /*
5845   Handle data events from the storage nodes
5846 */
ndb_find_binlog_index_row(ndb_binlog_index_row ** rows,uint orig_server_id,int flag)5847 inline ndb_binlog_index_row *ndb_find_binlog_index_row(
5848     ndb_binlog_index_row **rows, uint orig_server_id, int flag) {
5849   ndb_binlog_index_row *row = *rows;
5850   if (opt_ndb_log_orig) {
5851     ndb_binlog_index_row *first = row, *found_id = 0;
5852     for (;;) {
5853       if (row->orig_server_id == orig_server_id) {
5854         /* */
5855         if (!flag || !row->orig_epoch) return row;
5856         if (!found_id) found_id = row;
5857       }
5858       if (row->orig_server_id == 0) break;
5859       row = row->next;
5860       if (row == NULL) {
5861         // Allocate memory in current MEM_ROOT
5862         row = (ndb_binlog_index_row *)(*THR_MALLOC)
5863                   ->Alloc(sizeof(ndb_binlog_index_row));
5864         memset(row, 0, sizeof(ndb_binlog_index_row));
5865         row->next = first;
5866         *rows = row;
5867         if (found_id) {
5868           /*
5869             If we found index_row with same server id already
5870             that row will contain the current stats.
5871             Copy stats over to new and reset old.
5872           */
5873           row->n_inserts = found_id->n_inserts;
5874           row->n_updates = found_id->n_updates;
5875           row->n_deletes = found_id->n_deletes;
5876           found_id->n_inserts = 0;
5877           found_id->n_updates = 0;
5878           found_id->n_deletes = 0;
5879         }
5880         /* keep track of schema ops only on "first" index_row */
5881         row->n_schemaops = first->n_schemaops;
5882         first->n_schemaops = 0;
5883         break;
5884       }
5885     }
5886     row->orig_server_id = orig_server_id;
5887   }
5888   return row;
5889 }
5890 
handle_data_event(NdbEventOperation * pOp,ndb_binlog_index_row ** rows,injector::transaction & trans,unsigned & trans_row_count,unsigned & trans_slave_row_count)5891 static int handle_data_event(NdbEventOperation *pOp,
5892                              ndb_binlog_index_row **rows,
5893                              injector::transaction &trans,
5894                              unsigned &trans_row_count,
5895                              unsigned &trans_slave_row_count) {
5896   Ndb_event_data *event_data = (Ndb_event_data *)pOp->getCustomData();
5897   TABLE *table = event_data->shadow_table;
5898   NDB_SHARE *share = event_data->share;
5899   bool reflected_op = false;
5900   bool refresh_op = false;
5901   bool read_op = false;
5902 
5903   if (pOp != share->op) {
5904     return 0;
5905   }
5906 
5907   uint32 anyValue = pOp->getAnyValue();
5908   if (ndbcluster_anyvalue_is_reserved(anyValue)) {
5909     if (ndbcluster_anyvalue_is_nologging(anyValue)) return 0;
5910 
5911     if (ndbcluster_anyvalue_is_reflect_op(anyValue)) {
5912       DBUG_PRINT("info", ("Anyvalue -> Reflect (%u)", anyValue));
5913       reflected_op = true;
5914       anyValue = 0;
5915     } else if (ndbcluster_anyvalue_is_refresh_op(anyValue)) {
5916       DBUG_PRINT("info", ("Anyvalue -> Refresh"));
5917       refresh_op = true;
5918       anyValue = 0;
5919     } else if (ndbcluster_anyvalue_is_read_op(anyValue)) {
5920       DBUG_PRINT("info", ("Anyvalue -> Read"));
5921       read_op = true;
5922       anyValue = 0;
5923     } else {
5924       ndb_log_warning(
5925           "unknown value for binlog signalling 0x%X, "
5926           "event not logged",
5927           anyValue);
5928       return 0;
5929     }
5930   }
5931 
5932   uint32 originating_server_id = ndbcluster_anyvalue_get_serverid(anyValue);
5933   bool log_this_slave_update = g_ndb_log_slave_updates;
5934   bool count_this_event = true;
5935 
5936   if (share == ndb_apply_status_share) {
5937     /*
5938        Note that option values are read without synchronisation w.r.t.
5939        thread setting option variable or epoch boundaries.
5940     */
5941     if (opt_ndb_log_apply_status || opt_ndb_log_orig) {
5942       Uint32 ndb_apply_status_logging_server_id = originating_server_id;
5943       Uint32 ndb_apply_status_server_id = 0;
5944       Uint64 ndb_apply_status_epoch = 0;
5945       bool event_has_data = false;
5946 
5947       switch (pOp->getEventType()) {
5948         case NDBEVENT::TE_INSERT:
5949         case NDBEVENT::TE_UPDATE:
5950           event_has_data = true;
5951           break;
5952 
5953         case NDBEVENT::TE_DELETE:
5954           break;
5955         default:
5956           /* We should REALLY never get here */
5957           abort();
5958       }
5959 
5960       if (likely(event_has_data)) {
5961         /* unpack data to fetch orig_server_id and orig_epoch */
5962         MY_BITMAP b;
5963         uint32 bitbuf[128 / (sizeof(uint32) * 8)];
5964         ndb_bitmap_init(b, bitbuf, table->s->fields);
5965         bitmap_copy(&b, &event_data->stored_columns);
5966         ndb_unpack_record(table, event_data->ndb_value[0], &b,
5967                           table->record[0]);
5968         ndb_apply_status_server_id =
5969             (uint)((Field_long *)table->field[0])->val_int();
5970         ndb_apply_status_epoch = ((Field_longlong *)table->field[1])->val_int();
5971 
5972         if (opt_ndb_log_apply_status) {
5973           /*
5974              Determine if event came from our immediate Master server
5975              Ignore locally manually sourced and reserved events
5976           */
5977           if ((ndb_apply_status_logging_server_id != 0) &&
5978               (!ndbcluster_anyvalue_is_reserved(
5979                   ndb_apply_status_logging_server_id))) {
5980             bool isFromImmediateMaster = (ndb_apply_status_server_id ==
5981                                           ndb_apply_status_logging_server_id);
5982 
5983             if (isFromImmediateMaster) {
5984               /*
5985                  We log this event with our server-id so that it
5986                  propagates back to the originating Master (our
5987                  immediate Master)
5988               */
5989               assert(ndb_apply_status_logging_server_id != ::server_id);
5990 
5991               originating_server_id =
5992                   0; /* Will be set to our ::serverid below */
5993             }
5994           }
5995         }
5996 
5997         if (opt_ndb_log_orig) {
5998           /* store */
5999           ndb_binlog_index_row *row =
6000               ndb_find_binlog_index_row(rows, ndb_apply_status_server_id, 1);
6001           row->orig_epoch = ndb_apply_status_epoch;
6002         }
6003       }
6004     }  // opt_ndb_log_apply_status || opt_ndb_log_orig)
6005 
6006     if (opt_ndb_log_apply_status) {
6007       /* We are logging ndb_apply_status changes
6008        * Don't count this event as making an epoch non-empty
6009        * Log this event in the Binlog
6010        */
6011       count_this_event = false;
6012       log_this_slave_update = true;
6013     } else {
6014       /* Not logging ndb_apply_status updates, discard this event now */
6015       return 0;
6016     }
6017   }
6018 
6019   if (originating_server_id == 0)
6020     originating_server_id = ::server_id;
6021   else {
6022     assert(!reflected_op && !refresh_op);
6023     /* Track that we received a replicated row event */
6024     if (likely(count_this_event)) trans_slave_row_count++;
6025 
6026     if (!log_this_slave_update) {
6027       /*
6028         This event comes from a slave applier since it has an originating
6029         server id set. Since option to log slave updates is not set, skip it.
6030       */
6031       return 0;
6032     }
6033   }
6034 
6035   /*
6036      Start with logged_server_id as AnyValue in case it's a composite
6037      (server_id_bits < 31).  This way any user-values are passed-through
6038      to the Binlog in the high bits of the event's Server Id.
6039      In future it may be useful to support *not* mapping composite
6040      AnyValues to/from Binlogged server-ids.
6041   */
6042   uint32 logged_server_id = anyValue;
6043   ndbcluster_anyvalue_set_serverid(logged_server_id, originating_server_id);
6044 
6045   /*
6046      Get NdbApi transaction id for this event to put into Binlog
6047   */
6048   Ndb_binlog_extra_row_info extra_row_info;
6049   const unsigned char *extra_row_info_ptr = NULL;
6050   Uint16 erif_flags = 0;
6051   if (opt_ndb_log_transaction_id) {
6052     erif_flags |= Ndb_binlog_extra_row_info::NDB_ERIF_TRANSID;
6053     extra_row_info.setTransactionId(pOp->getTransId());
6054   }
6055 
6056   /* Set conflict flags member if necessary */
6057   Uint16 event_conflict_flags = 0;
6058   assert(!(reflected_op && refresh_op));
6059   if (reflected_op) {
6060     event_conflict_flags |= NDB_ERIF_CFT_REFLECT_OP;
6061   } else if (refresh_op) {
6062     event_conflict_flags |= NDB_ERIF_CFT_REFRESH_OP;
6063   } else if (read_op) {
6064     event_conflict_flags |= NDB_ERIF_CFT_READ_OP;
6065   }
6066 
6067   if (DBUG_EVALUATE_IF("ndb_injector_set_event_conflict_flags", true, false)) {
6068     event_conflict_flags = 0xfafa;
6069   }
6070   if (event_conflict_flags != 0) {
6071     erif_flags |= Ndb_binlog_extra_row_info::NDB_ERIF_CFT_FLAGS;
6072     extra_row_info.setConflictFlags(event_conflict_flags);
6073   }
6074 
6075   if (erif_flags != 0) {
6076     extra_row_info.setFlags(erif_flags);
6077     if (likely(!log_bin_use_v1_row_events)) {
6078       extra_row_info_ptr = extra_row_info.generateBuffer();
6079     } else {
6080       /**
6081        * Can't put the metadata in a v1 event
6082        * Produce 1 warning at most
6083        */
6084       if (!g_injector_v1_warning_emitted) {
6085         ndb_log_error(
6086             "Binlog Injector discarding row event "
6087             "meta data as server is using v1 row events. "
6088             "(%u %x)",
6089             opt_ndb_log_transaction_id, event_conflict_flags);
6090 
6091         g_injector_v1_warning_emitted = true;
6092       }
6093     }
6094   }
6095 
6096   DBUG_ASSERT(trans.good());
6097   DBUG_ASSERT(table != 0);
6098 
6099 #ifndef DBUG_OFF
6100   Ndb_table_map::print_table("table", table);
6101 #endif
6102 
6103   MY_BITMAP b;
6104   my_bitmap_map
6105       bitbuf[(NDB_MAX_ATTRIBUTES_IN_TABLE + 8 * sizeof(my_bitmap_map) - 1) /
6106              (8 * sizeof(my_bitmap_map))];
6107   ndb_bitmap_init(b, bitbuf, table->s->fields);
6108   bitmap_copy(&b, &event_data->stored_columns);
6109   if (bitmap_is_clear_all(&b)) {
6110     DBUG_PRINT("info", ("Skip logging of event without stored columns"));
6111     return 0;
6112   }
6113 
6114   /*
6115    row data is already in table->record[0]
6116    As we told the NdbEventOperation to do this
6117    (saves moving data about many times)
6118   */
6119 
6120   /*
6121     for now malloc/free blobs buffer each time
6122     TODO if possible share single permanent buffer with handlers
6123    */
6124   uchar *blobs_buffer[2] = {0, 0};
6125   uint blobs_buffer_size[2] = {0, 0};
6126 
6127   ndb_binlog_index_row *row =
6128       ndb_find_binlog_index_row(rows, originating_server_id, 0);
6129 
6130   switch (pOp->getEventType()) {
6131     case NDBEVENT::TE_INSERT:
6132       if (likely(count_this_event)) {
6133         row->n_inserts++;
6134         trans_row_count++;
6135       }
6136       DBUG_PRINT("info", ("INSERT INTO %s.%s", table->s->db.str,
6137                           table->s->table_name.str));
6138       {
6139         int ret;
6140         (void)ret;  // Bug27150740 HANDLE_DATA_EVENT NEED ERROR HANDLING
6141         if (event_data->have_blobs) {
6142           ptrdiff_t ptrdiff = 0;
6143           ret = get_ndb_blobs_value(table, event_data->ndb_value[0],
6144                                     blobs_buffer[0], blobs_buffer_size[0],
6145                                     ptrdiff);
6146           assert(ret == 0);
6147         }
6148         ndb_unpack_record(table, event_data->ndb_value[0], &b,
6149                           table->record[0]);
6150         ret = trans.write_row(logged_server_id,
6151                               injector::transaction::table(table, true), &b,
6152                               table->record[0], extra_row_info_ptr);
6153         assert(ret == 0);
6154       }
6155       break;
6156     case NDBEVENT::TE_DELETE:
6157       if (likely(count_this_event)) {
6158         row->n_deletes++;
6159         trans_row_count++;
6160       }
6161       DBUG_PRINT("info", ("DELETE FROM %s.%s", table->s->db.str,
6162                           table->s->table_name.str));
6163       {
6164         /*
6165           table->record[0] contains only the primary key in this case
6166           since we do not have an after image
6167         */
6168         int n;
6169         if (!share->get_binlog_full() && table->s->primary_key != MAX_KEY)
6170           n = 0; /*
6171                    use the primary key only as it save time and space and
6172                    it is the only thing needed to log the delete
6173                  */
6174         else
6175           n = 1; /*
6176                    we use the before values since we don't have a primary key
6177                    since the mysql server does not handle the hidden primary
6178                    key
6179                  */
6180 
6181         int ret;
6182         (void)ret;  // Bug27150740 HANDLE_DATA_EVENT NEED ERROR HANDLING
6183         if (event_data->have_blobs) {
6184           ptrdiff_t ptrdiff = table->record[n] - table->record[0];
6185           ret = get_ndb_blobs_value(table, event_data->ndb_value[n],
6186                                     blobs_buffer[n], blobs_buffer_size[n],
6187                                     ptrdiff);
6188           assert(ret == 0);
6189         }
6190         ndb_unpack_record(table, event_data->ndb_value[n], &b,
6191                           table->record[n]);
6192         DBUG_EXECUTE("info",
6193                      Ndb_table_map::print_record(table, table->record[n]););
6194         ret = trans.delete_row(logged_server_id,
6195                                injector::transaction::table(table, true), &b,
6196                                table->record[n], extra_row_info_ptr);
6197         assert(ret == 0);
6198       }
6199       break;
6200     case NDBEVENT::TE_UPDATE:
6201       if (likely(count_this_event)) {
6202         row->n_updates++;
6203         trans_row_count++;
6204       }
6205       DBUG_PRINT("info",
6206                  ("UPDATE %s.%s", table->s->db.str, table->s->table_name.str));
6207       {
6208         int ret;
6209         (void)ret;  // Bug27150740 HANDLE_DATA_EVENT NEED ERROR HANDLING
6210         if (event_data->have_blobs) {
6211           ptrdiff_t ptrdiff = 0;
6212           ret = get_ndb_blobs_value(table, event_data->ndb_value[0],
6213                                     blobs_buffer[0], blobs_buffer_size[0],
6214                                     ptrdiff);
6215           assert(ret == 0);
6216         }
6217         ndb_unpack_record(table, event_data->ndb_value[0], &b,
6218                           table->record[0]);
6219         DBUG_EXECUTE("info",
6220                      Ndb_table_map::print_record(table, table->record[0]););
6221         if (table->s->primary_key != MAX_KEY &&
6222             !share->get_binlog_use_update()) {
6223           /*
6224             since table has a primary key, we can do a write
6225             using only after values
6226           */
6227           ret = trans.write_row(logged_server_id,
6228                                 injector::transaction::table(table, true), &b,
6229                                 table->record[0],  // after values
6230                                 extra_row_info_ptr);
6231           assert(ret == 0);
6232         } else {
6233           /*
6234             mysql server cannot handle the ndb hidden key and
6235             therefore needs the before image as well
6236           */
6237           if (event_data->have_blobs) {
6238             ptrdiff_t ptrdiff = table->record[1] - table->record[0];
6239             ret = get_ndb_blobs_value(table, event_data->ndb_value[1],
6240                                       blobs_buffer[1], blobs_buffer_size[1],
6241                                       ptrdiff);
6242             assert(ret == 0);
6243           }
6244           ndb_unpack_record(table, event_data->ndb_value[1], &b,
6245                             table->record[1]);
6246           DBUG_EXECUTE("info",
6247                        Ndb_table_map::print_record(table, table->record[1]););
6248 
6249           MY_BITMAP col_bitmap_before_update;
6250           my_bitmap_map bitbuf[(NDB_MAX_ATTRIBUTES_IN_TABLE +
6251                                 8 * sizeof(my_bitmap_map) - 1) /
6252                                (8 * sizeof(my_bitmap_map))];
6253           ndb_bitmap_init(col_bitmap_before_update, bitbuf, table->s->fields);
6254           if (share->get_binlog_update_minimal()) {
6255             event_data->generate_minimal_bitmap(&col_bitmap_before_update, &b);
6256           } else {
6257             bitmap_copy(&col_bitmap_before_update, &b);
6258           }
6259 
6260           ret = trans.update_row(logged_server_id,
6261                                  injector::transaction::table(table, true),
6262                                  &col_bitmap_before_update, &b,
6263                                  table->record[1],  // before values
6264                                  table->record[0],  // after values
6265                                  extra_row_info_ptr);
6266           assert(ret == 0);
6267         }
6268       }
6269       break;
6270     default:
6271       /* We should REALLY never get here. */
6272       DBUG_PRINT("info", ("default - uh oh, a brain exploded."));
6273       break;
6274   }
6275 
6276   if (event_data->have_blobs) {
6277     my_free(blobs_buffer[0]);
6278     my_free(blobs_buffer[1]);
6279   }
6280 
6281   return 0;
6282 }
6283 
6284 /****************************************************************
6285   Injector thread main loop
6286 ****************************************************************/
6287 
remove_event_operations(Ndb * ndb) const6288 void Ndb_binlog_thread::remove_event_operations(Ndb *ndb) const {
6289   DBUG_TRACE;
6290   NdbEventOperation *op;
6291   while ((op = ndb->getEventOperation())) {
6292     DBUG_ASSERT(
6293         !ndb_name_is_blob_prefix(op->getEvent()->getTable()->getName()));
6294     DBUG_PRINT("info",
6295                ("removing event operation on %s", op->getEvent()->getName()));
6296 
6297     Ndb_event_data *event_data = (Ndb_event_data *)op->getCustomData();
6298     DBUG_ASSERT(event_data);
6299 
6300     NDB_SHARE *share = event_data->share;
6301     DBUG_ASSERT(share != NULL);
6302     DBUG_ASSERT(share->op == op);
6303     Ndb_event_data::destroy(event_data);
6304     op->setCustomData(NULL);
6305 
6306     mysql_mutex_lock(&share->mutex);
6307     share->op = 0;
6308     mysql_mutex_unlock(&share->mutex);
6309 
6310     NDB_SHARE::release_reference(share, "binlog");
6311 
6312     ndb->dropEventOperation(op);
6313   }
6314 }
6315 
remove_all_event_operations(Ndb * s_ndb,Ndb * i_ndb) const6316 void Ndb_binlog_thread::remove_all_event_operations(Ndb *s_ndb,
6317                                                     Ndb *i_ndb) const {
6318   DBUG_TRACE;
6319 
6320   if (ndb_apply_status_share) {
6321     NDB_SHARE::release_reference(ndb_apply_status_share,
6322                                  "ndb_apply_status_share");
6323     ndb_apply_status_share = NULL;
6324   }
6325 
6326   if (s_ndb) remove_event_operations(s_ndb);
6327 
6328   if (i_ndb) remove_event_operations(i_ndb);
6329 
6330   if (ndb_log_get_verbose_level() > 15) {
6331     NDB_SHARE::print_remaining_open_tables();
6332   }
6333 }
6334 
6335 static long long g_event_data_count = 0;
6336 static long long g_event_nondata_count = 0;
6337 static long long g_event_bytes_count = 0;
6338 
update_injector_stats(Ndb * schemaNdb,Ndb * dataNdb)6339 static void update_injector_stats(Ndb *schemaNdb, Ndb *dataNdb) {
6340   // Update globals to sum of totals from each listening Ndb object
6341   g_event_data_count = schemaNdb->getClientStat(Ndb::DataEventsRecvdCount) +
6342                        dataNdb->getClientStat(Ndb::DataEventsRecvdCount);
6343   g_event_nondata_count =
6344       schemaNdb->getClientStat(Ndb::NonDataEventsRecvdCount) +
6345       dataNdb->getClientStat(Ndb::NonDataEventsRecvdCount);
6346   g_event_bytes_count = schemaNdb->getClientStat(Ndb::EventBytesRecvdCount) +
6347                         dataNdb->getClientStat(Ndb::EventBytesRecvdCount);
6348 }
6349 
6350 static SHOW_VAR ndb_status_vars_injector[] = {
6351     {"api_event_data_count_injector",
6352      reinterpret_cast<char *>(&g_event_data_count), SHOW_LONGLONG,
6353      SHOW_SCOPE_GLOBAL},
6354     {"api_event_nondata_count_injector",
6355      reinterpret_cast<char *>(&g_event_nondata_count), SHOW_LONGLONG,
6356      SHOW_SCOPE_GLOBAL},
6357     {"api_event_bytes_count_injector",
6358      reinterpret_cast<char *>(&g_event_bytes_count), SHOW_LONGLONG,
6359      SHOW_SCOPE_GLOBAL},
6360     {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}};
6361 
show_ndb_status_injector(THD *,SHOW_VAR * var,char *)6362 int show_ndb_status_injector(THD *, SHOW_VAR *var, char *) {
6363   var->type = SHOW_ARRAY;
6364   var->value = reinterpret_cast<char *>(&ndb_status_vars_injector);
6365   return 0;
6366 }
6367 
6368 /**
6369    injectApplyStatusWriteRow
6370 
6371    Inject a WRITE_ROW event on the ndb_apply_status table into
6372    the Binlog.
6373    This contains our server_id and the supplied epoch number.
6374    When applied on the Slave it gives a transactional position
6375    marker
6376 */
injectApplyStatusWriteRow(injector::transaction & trans,ulonglong gci)6377 static bool injectApplyStatusWriteRow(injector::transaction &trans,
6378                                       ulonglong gci) {
6379   DBUG_TRACE;
6380   if (ndb_apply_status_share == NULL) {
6381     ndb_log_error("Could not get apply status share");
6382     DBUG_ASSERT(ndb_apply_status_share != NULL);
6383     return false;
6384   }
6385 
6386   longlong gci_to_store = (longlong)gci;
6387 
6388 #ifndef DBUG_OFF
6389   if (DBUG_EVALUATE_IF("ndb_binlog_injector_cycle_gcis", true, false)) {
6390     ulonglong gciHi = ((gci_to_store >> 32) & 0xffffffff);
6391     ulonglong gciLo = (gci_to_store & 0xffffffff);
6392     gciHi = (gciHi % 3);
6393     ndb_log_warning("Binlog injector cycling gcis (%llu -> %llu)", gci_to_store,
6394                     (gciHi << 32) + gciLo);
6395     gci_to_store = (gciHi << 32) + gciLo;
6396   }
6397   if (DBUG_EVALUATE_IF("ndb_binlog_injector_repeat_gcis", true, false)) {
6398     ulonglong gciHi = ((gci_to_store >> 32) & 0xffffffff);
6399     ulonglong gciLo = (gci_to_store & 0xffffffff);
6400     gciHi = 0xffffff00;
6401     gciLo = 0;
6402     ndb_log_warning("Binlog injector repeating gcis (%llu -> %llu)",
6403                     gci_to_store, (gciHi << 32) + gciLo);
6404     gci_to_store = (gciHi << 32) + gciLo;
6405   }
6406 #endif
6407 
6408   /* Build row buffer for generated ndb_apply_status
6409      WRITE_ROW event
6410      First get the relevant table structure.
6411   */
6412   DBUG_ASSERT(ndb_apply_status_share->op);
6413   Ndb_event_data *event_data =
6414       (Ndb_event_data *)ndb_apply_status_share->op->getCustomData();
6415   DBUG_ASSERT(event_data);
6416   DBUG_ASSERT(event_data->shadow_table);
6417   TABLE *apply_status_table = event_data->shadow_table;
6418 
6419   /*
6420     Intialize apply_status_table->record[0]
6421 
6422     When iterating past the end of the last epoch, the first event of
6423     the new epoch may be on ndb_apply_status.  Its event data saved
6424     in record[0] would be overwritten here by a subsequent event on a
6425     normal table.  So save and restore its record[0].
6426   */
6427   static const ulong sav_max = 512;  // current is 284
6428   const ulong sav_len = apply_status_table->s->reclength;
6429   DBUG_ASSERT(sav_len <= sav_max);
6430   uchar sav_buf[sav_max];
6431   memcpy(sav_buf, apply_status_table->record[0], sav_len);
6432   empty_record(apply_status_table);
6433 
6434   apply_status_table->field[0]->store((longlong)::server_id, true);
6435   apply_status_table->field[1]->store((longlong)gci_to_store, true);
6436   apply_status_table->field[2]->store("", 0, &my_charset_bin);
6437   apply_status_table->field[3]->store((longlong)0, true);
6438   apply_status_table->field[4]->store((longlong)0, true);
6439 #ifndef DBUG_OFF
6440   const LEX_CSTRING &name = apply_status_table->s->table_name;
6441   DBUG_PRINT("info", ("use_table: %.*s", (int)name.length, name.str));
6442 #endif
6443   injector::transaction::table tbl(apply_status_table, true);
6444   int ret = trans.use_table(::server_id, tbl);
6445   ndbcluster::ndbrequire(ret == 0);
6446 
6447   ret = trans.write_row(
6448       ::server_id, injector::transaction::table(apply_status_table, true),
6449       &apply_status_table->s->all_set, apply_status_table->record[0]);
6450 
6451   assert(ret == 0);
6452 
6453   memcpy(apply_status_table->record[0], sav_buf, sav_len);
6454   return true;
6455 }
6456 
6457 extern ulong opt_ndb_report_thresh_binlog_epoch_slip;
6458 extern ulong opt_ndb_report_thresh_binlog_mem_usage;
6459 extern ulong opt_ndb_eventbuffer_max_alloc;
6460 extern uint opt_ndb_eventbuffer_free_percent;
6461 
Ndb_binlog_thread()6462 Ndb_binlog_thread::Ndb_binlog_thread() : Ndb_component("Binlog") {}
6463 
~Ndb_binlog_thread()6464 Ndb_binlog_thread::~Ndb_binlog_thread() {}
6465 
do_wakeup()6466 void Ndb_binlog_thread::do_wakeup() {
6467   log_info("Wakeup");
6468 
6469   /*
6470     The binlog thread is normally waiting for another
6471     event from the cluster with short timeout and should
6472     soon(within 1 second) detect that stop has been requested.
6473 
6474     There are really no purpose(yet) to signal some condition
6475     trying to wake the thread up should it be waiting somewhere
6476     else since those waits are also short.
6477   */
6478 }
6479 
check_reconnect_incident(THD * thd,injector * inj,Reconnect_type incident_id) const6480 bool Ndb_binlog_thread::check_reconnect_incident(
6481     THD *thd, injector *inj, Reconnect_type incident_id) const {
6482   log_verbose(1, "Check for incidents");
6483 
6484   if (incident_id == MYSQLD_STARTUP) {
6485     LOG_INFO log_info;
6486     mysql_bin_log.get_current_log(&log_info);
6487     log_verbose(60, " - current binlog file: %s", log_info.log_file_name);
6488 
6489     uint log_number = 0;
6490     if ((sscanf(strend(log_info.log_file_name) - 6, "%u", &log_number) == 1) &&
6491         log_number == 1) {
6492       /*
6493         This is the fist binlog file, skip writing incident since
6494         there is really no log to have a gap in
6495       */
6496       log_verbose(60, " - skipping incident for first log, log_number: %u",
6497                   log_number);
6498       return false;  // No incident written
6499     }
6500     log_verbose(60, " - current binlog file number: %u", log_number);
6501   }
6502 
6503   // Write an incident event to the binlog since it's not possible to know what
6504   // has happened in the cluster while not being connected.
6505   LEX_CSTRING msg;
6506   switch (incident_id) {
6507     case MYSQLD_STARTUP:
6508       msg = {STRING_WITH_LEN("mysqld startup")};
6509       break;
6510     case CLUSTER_DISCONNECT:
6511       msg = {STRING_WITH_LEN("cluster disconnect")};
6512       break;
6513   }
6514   log_verbose(20, "Writing incident for %s", msg.str);
6515   (void)inj->record_incident(
6516       thd, binary_log::Incident_event::INCIDENT_LOST_EVENTS, msg);
6517 
6518   return true;  // Incident written
6519 }
6520 
handle_purge(const char * filename)6521 bool Ndb_binlog_thread::handle_purge(const char *filename) {
6522   if (is_server_started()) {
6523     // The binlog thread currently only handles purge requests
6524     // that occurs before "server started"
6525     return false;
6526   }
6527 
6528   // The "server started" state is not yet reached, defer the purge request of
6529   // this binlog file to later and handle it just before entering main loop
6530   log_verbose(1, "Remember purge binlog file: '%s'", filename);
6531   std::lock_guard<std::mutex> lock_pending_purges(m_purge_mutex);
6532   m_pending_purges.push_back(filename);
6533   return true;
6534 }
6535 
recall_pending_purges(THD * thd)6536 void Ndb_binlog_thread::recall_pending_purges(THD *thd) {
6537   std::lock_guard<std::mutex> lock_pending_purges(m_purge_mutex);
6538 
6539   // Iterate list of pending purges and delete corresponding
6540   // rows from ndb_binlog_index table
6541   for (const std::string &filename : m_pending_purges) {
6542     log_verbose(1, "Purging binlog file: '%s'", filename.c_str());
6543 
6544     if (Ndb_binlog_index_table_util::remove_rows_for_file(thd,
6545                                                           filename.c_str())) {
6546       log_warning("Failed to purge binlog file: '%s'", filename.c_str());
6547     }
6548   }
6549   // All pending purges performed, clear the list
6550   m_pending_purges.clear();
6551 }
6552 
6553 /*
6554   Events are handled one epoch at a time.
6555   Handle the lowest available epoch first.
6556 */
find_epoch_to_handle(const NdbEventOperation * s_pOp,const NdbEventOperation * i_pOp)6557 static Uint64 find_epoch_to_handle(const NdbEventOperation *s_pOp,
6558                                    const NdbEventOperation *i_pOp) {
6559   if (i_pOp != NULL) {
6560     if (s_pOp != NULL) {
6561       return std::min(i_pOp->getEpoch(), s_pOp->getEpoch());
6562     }
6563     return i_pOp->getEpoch();
6564   }
6565   if (s_pOp != NULL) {
6566     if (ndb_binlog_running) {
6567       return std::min(ndb_latest_received_binlog_epoch, s_pOp->getEpoch());
6568     }
6569     return s_pOp->getEpoch();
6570   }
6571   // 'latest_received' is '0' if not binlogging
6572   return ndb_latest_received_binlog_epoch;
6573 }
6574 
do_run()6575 void Ndb_binlog_thread::do_run() {
6576   THD *thd; /* needs to be first for thread_stack */
6577   Ndb *i_ndb = NULL;
6578   Ndb *s_ndb = NULL;
6579   Thd_ndb *thd_ndb = NULL;
6580   injector *inj = injector::instance();
6581   Global_THD_manager *thd_manager = Global_THD_manager::get_instance();
6582 
6583   enum {
6584     BCCC_starting,
6585     BCCC_running,
6586     BCCC_restart,
6587   } binlog_thread_state;
6588 
6589   /* Controls that only one incident is written per reconnect */
6590   bool do_reconnect_incident = true;
6591   /* Controls message of the reconnnect incident */
6592   Reconnect_type reconnect_incident_id = MYSQLD_STARTUP;
6593 
6594   DBUG_TRACE;
6595 
6596   log_info("Starting...");
6597 
6598   thd = new THD; /* note that constructor of THD uses DBUG_ */
6599   THD_CHECK_SENTRY(thd);
6600 
6601   /* We need to set thd->thread_id before thd->store_globals, or it will
6602      set an invalid value for thd->variables.pseudo_thread_id.
6603   */
6604   thd->set_new_thread_id();
6605 
6606   thd->thread_stack = (char *)&thd; /* remember where our stack is */
6607   thd->store_globals();
6608 
6609   thd->set_command(COM_DAEMON);
6610   thd->system_thread = SYSTEM_THREAD_NDBCLUSTER_BINLOG;
6611   thd->get_protocol_classic()->set_client_capabilities(0);
6612   thd->security_context()->skip_grants();
6613   // Create thd->net vithout vio
6614   thd->get_protocol_classic()->init_net((Vio *)0);
6615 
6616   // Ndb binlog thread always use row format
6617   thd->set_current_stmt_binlog_format_row();
6618 
6619   thd->real_id = my_thread_self();
6620   thd_manager->add_thd(thd);
6621   thd->lex->start_transaction_opt = 0;
6622 
6623   log_info("Started");
6624 
6625   Ndb_binlog_setup binlog_setup(thd);
6626   Ndb_schema_dist_data schema_dist_data;
6627 
6628 restart_cluster_failure:
6629   /**
6630    * Maintain a current schema & injector eventOp to be handled.
6631    * s_pOp and s_ndb handle events from the 'ndb_schema' dist table,
6632    * while i_pOp and i_ndb is for binlogging 'everything else'.
6633    */
6634   NdbEventOperation *s_pOp = NULL;
6635   NdbEventOperation *i_pOp = NULL;
6636   binlog_thread_state = BCCC_starting;
6637 
6638   log_verbose(1, "Setting up");
6639 
6640   if (!(thd_ndb = Thd_ndb::seize(thd))) {
6641     log_error("Creating Thd_ndb object failed");
6642     goto err;
6643   }
6644   thd_ndb->set_option(Thd_ndb::NO_LOG_SCHEMA_OP);
6645 
6646   if (!(s_ndb = new (std::nothrow) Ndb(g_ndb_cluster_connection)) ||
6647       s_ndb->setNdbObjectName("schema change monitoring") || s_ndb->init()) {
6648     log_error("Creating schema Ndb object failed");
6649     goto err;
6650   }
6651   log_verbose(49, "Created schema Ndb object, reference: 0x%x, name: '%s'",
6652               s_ndb->getReference(), s_ndb->getNdbObjectName());
6653 
6654   // empty database
6655   if (!(i_ndb = new (std::nothrow) Ndb(g_ndb_cluster_connection)) ||
6656       i_ndb->setNdbObjectName("data change monitoring") || i_ndb->init()) {
6657     log_error("Creating injector Ndb object failed");
6658     goto err;
6659   }
6660   log_verbose(49, "Created injector Ndb object, reference: 0x%x, name: '%s'",
6661               i_ndb->getReference(), i_ndb->getNdbObjectName());
6662 
6663   /* Set free percent event buffer needed to resume buffering */
6664   if (i_ndb->set_eventbuffer_free_percent(opt_ndb_eventbuffer_free_percent)) {
6665     log_error("Setting eventbuffer free percent failed");
6666     goto err;
6667   }
6668 
6669   log_verbose(10, "Exposing global references");
6670   /*
6671     Expose global reference to our Ndb object.
6672 
6673     Used by both sql client thread and binlog thread to interact
6674     with the storage
6675   */
6676   mysql_mutex_lock(&injector_event_mutex);
6677   injector_thd = thd;
6678   injector_ndb = i_ndb;
6679   schema_ndb = s_ndb;
6680   DBUG_PRINT("info", ("set schema_ndb to s_ndb"));
6681   mysql_mutex_unlock(&injector_event_mutex);
6682 
6683   if (opt_bin_log && opt_ndb_log_bin) {
6684     // Binary log has been enabled for the server and changes
6685     // to NDB tables should be logged
6686     ndb_binlog_running = true;
6687   }
6688   log_verbose(1, "Setup completed");
6689 
6690   /*
6691     Wait for the MySQL Server to start (so that the binlog is started
6692     and thus can receive the first GAP event)
6693   */
6694   if (!wait_for_server_started()) {
6695     goto err;
6696   }
6697 
6698   // Defer call of THD::init_query_mem_roots until after
6699   // wait_for_server_started() to ensure that the parts of
6700   // MySQL Server it uses has been created
6701   thd->init_query_mem_roots();
6702   lex_start(thd);
6703 
6704   if (do_reconnect_incident && ndb_binlog_running) {
6705     if (check_reconnect_incident(thd, inj, reconnect_incident_id)) {
6706       // Incident written, don't report incident again unless Ndb_binlog_thread
6707       // is restarted
6708       do_reconnect_incident = false;
6709     }
6710   }
6711   reconnect_incident_id = CLUSTER_DISCONNECT;
6712 
6713   // Handle pending purge requests from before "server started" state
6714   recall_pending_purges(thd);
6715 
6716   {
6717     log_verbose(1, "Wait for cluster to start");
6718     thd->proc_info = "Waiting for ndbcluster to start";
6719     thd_set_thd_ndb(thd, thd_ndb);
6720 
6721     while (!ndbcluster_is_connected(1) || !binlog_setup.setup(thd_ndb)) {
6722       // Failed to complete binlog_setup, remove all existing event
6723       // operations from potential partial setup
6724       remove_all_event_operations(s_ndb, i_ndb);
6725 
6726       // Fail any schema operations that has been registered but
6727       // never reached the coordinator
6728       NDB_SCHEMA_OBJECT::fail_all_schema_ops(Ndb_schema_dist::COORD_ABORT,
6729                                              "Aborted after setup");
6730 
6731       if (!thd_ndb->valid_ndb()) {
6732         /*
6733           Cluster has gone away before setup was completed.
6734           Restart binlog
6735           thread to get rid of any garbage on the ndb objects
6736         */
6737         binlog_thread_state = BCCC_restart;
6738         goto err;
6739       }
6740       if (is_stop_requested()) {
6741         goto err;
6742       }
6743       if (thd->killed == THD::KILL_CONNECTION) {
6744         /*
6745           Since the ndb binlog thread adds itself to the "global thread list"
6746           it need to look at the "killed" flag and stop the thread to avoid
6747           that the server hangs during shutdown while waiting for the "global
6748           thread list" to be emtpy.
6749         */
6750         log_info(
6751             "Server shutdown detected while "
6752             "waiting for ndbcluster to start...");
6753         goto err;
6754       }
6755       log_and_clear_thd_conditions(thd, condition_logging_level::WARNING);
6756       ndb_milli_sleep(1000);
6757     }  // while (!ndb_binlog_setup())
6758 
6759     DBUG_ASSERT(ndbcluster_hton->slot != ~(uint)0);
6760 
6761     /*
6762       Prevent schema dist participant from (implicitly)
6763       taking GSL lock as part of taking MDL lock
6764     */
6765     thd_ndb->set_option(Thd_ndb::IS_SCHEMA_DIST_PARTICIPANT);
6766   }
6767 
6768   /* Apply privilege statements stored in snapshot */
6769   if (!Ndb_stored_grants::apply_stored_grants(thd)) {
6770     ndb_log_error("stored grants: failed to apply stored grants.");
6771   }
6772 
6773   schema_dist_data.init(g_ndb_cluster_connection);
6774 
6775   {
6776     log_verbose(1, "Wait for first event");
6777     // wait for the first event
6778     thd->proc_info = "Waiting for first event from ndbcluster";
6779     Uint64 schema_gci;
6780     do {
6781       DBUG_PRINT("info", ("Waiting for the first event"));
6782 
6783       if (is_stop_requested()) goto err;
6784 
6785       my_thread_yield();
6786       mysql_mutex_lock(&injector_event_mutex);
6787       (void)s_ndb->pollEvents(100, &schema_gci);
6788       mysql_mutex_unlock(&injector_event_mutex);
6789     } while (schema_gci == 0 || ndb_latest_received_binlog_epoch == schema_gci);
6790 
6791     if (ndb_binlog_running) {
6792       Uint64 gci = i_ndb->getLatestGCI();
6793       while (gci < schema_gci || gci == ndb_latest_received_binlog_epoch) {
6794         if (is_stop_requested()) goto err;
6795 
6796         my_thread_yield();
6797         mysql_mutex_lock(&injector_event_mutex);
6798         (void)i_ndb->pollEvents(10, &gci);
6799         mysql_mutex_unlock(&injector_event_mutex);
6800       }
6801       if (gci > schema_gci) {
6802         schema_gci = gci;
6803       }
6804     }
6805     // now check that we have epochs consistent with what we had before the
6806     // restart
6807     DBUG_PRINT("info", ("schema_gci: %u/%u", (uint)(schema_gci >> 32),
6808                         (uint)(schema_gci)));
6809     {
6810       i_ndb->flushIncompleteEvents(schema_gci);
6811       s_ndb->flushIncompleteEvents(schema_gci);
6812       if (schema_gci < ndb_latest_handled_binlog_epoch) {
6813         log_error(
6814             "cluster has been restarted --initial or with older filesystem. "
6815             "ndb_latest_handled_binlog_epoch: %u/%u, while current epoch: "
6816             "%u/%u. "
6817             "RESET MASTER should be issued. Resetting "
6818             "ndb_latest_handled_binlog_epoch.",
6819             (uint)(ndb_latest_handled_binlog_epoch >> 32),
6820             (uint)(ndb_latest_handled_binlog_epoch), (uint)(schema_gci >> 32),
6821             (uint)(schema_gci));
6822         ndb_set_latest_trans_gci(0);
6823         ndb_latest_handled_binlog_epoch = 0;
6824         ndb_latest_applied_binlog_epoch = 0;
6825         ndb_latest_received_binlog_epoch = 0;
6826         ndb_index_stat_restart();
6827       } else if (ndb_latest_applied_binlog_epoch > 0) {
6828         log_warning(
6829             "cluster has reconnected. "
6830             "Changes to the database that occurred while "
6831             "disconnected will not be in the binlog");
6832       }
6833       log_verbose(1, "starting log at epoch %u/%u", (uint)(schema_gci >> 32),
6834                   (uint)(schema_gci));
6835     }
6836     log_verbose(1, "Got first event");
6837   }
6838   /*
6839     binlog thread is ready to receive events
6840     - client threads may now start updating data, i.e. tables are
6841     no longer read only
6842   */
6843   mysql_mutex_lock(&injector_data_mutex);
6844   ndb_binlog_is_ready = true;
6845   mysql_mutex_unlock(&injector_data_mutex);
6846 
6847   log_verbose(1, "ndb tables writable");
6848   ndb_tdc_close_cached_tables();
6849 
6850   /*
6851      Signal any waiting thread that ndb table setup is
6852      now complete
6853   */
6854   ndb_notify_tables_writable();
6855 
6856   {
6857     static LEX_CSTRING db_lex_cstr = EMPTY_CSTR;
6858     thd->reset_db(db_lex_cstr);
6859   }
6860 
6861   log_verbose(1, "Startup and setup completed");
6862 
6863   /*
6864     Main NDB Injector loop
6865   */
6866   do_reconnect_incident = true;  // Report incident if disconnected
6867   binlog_thread_state = BCCC_running;
6868 
6869   /**
6870    * Injector loop runs until itself bring it out of 'BCCC_running' state,
6871    * or we get a stop-request from outside. In the later case we ensure that
6872    * all ongoing transaction epochs are completed first.
6873    */
6874   while (binlog_thread_state == BCCC_running &&
6875          (!is_stop_requested() ||
6876           ndb_latest_handled_binlog_epoch < ndb_get_latest_trans_gci())) {
6877 #ifndef DBUG_OFF
6878     /**
6879      * As the Binlog thread is not a client thread, the 'set debug' commands
6880      * does not affect it. Update our thread-local debug settings from 'global'
6881      */
6882     {
6883       char buf[256];
6884       DBUG_EXPLAIN_INITIAL(buf, sizeof(buf));
6885       DBUG_SET(buf);
6886     }
6887 #endif
6888 
6889     /*
6890       now we don't want any events before next gci is complete
6891     */
6892     thd->proc_info = "Waiting for event from ndbcluster";
6893     thd->set_time();
6894 
6895     /**
6896      * The binlog-thread holds the injector_mutex when waiting for
6897      * pollEvents() - which is >99% of the elapsed time. As the
6898      * native mutex guarantees no 'fairness', there is no guarantee
6899      * that another thread waiting for the mutex will immeditately
6900      * get the lock when unlocked by this thread. Thus this thread
6901      * may lock it again rather soon and starve the waiting thread.
6902      * To avoid this, my_thread_yield() is used to give any waiting
6903      * threads a chance to run and grab the injector_mutex when
6904      * it is available. The same pattern is used multiple places
6905      * in the BI-thread where there are wait-loops holding this mutex.
6906      */
6907     my_thread_yield();
6908 
6909     /* Can't hold mutex too long, so wait for events in 10ms steps */
6910     int tot_poll_wait = 10;
6911 
6912     // If there are remaining unhandled injector eventOp we continue
6913     // handling of these, else poll for more.
6914     if (i_pOp == NULL) {
6915       // Capture any dynamic changes to max_alloc
6916       i_ndb->set_eventbuf_max_alloc(opt_ndb_eventbuffer_max_alloc);
6917 
6918       mysql_mutex_lock(&injector_event_mutex);
6919       Uint64 latest_epoch = 0;
6920       const int poll_wait = (ndb_binlog_running) ? tot_poll_wait : 0;
6921       const int res = i_ndb->pollEvents(poll_wait, &latest_epoch);
6922       (void)res;  // Unused except DBUG_PRINT
6923       mysql_mutex_unlock(&injector_event_mutex);
6924       i_pOp = i_ndb->nextEvent();
6925       if (ndb_binlog_running) {
6926         ndb_latest_received_binlog_epoch = latest_epoch;
6927         tot_poll_wait = 0;
6928       }
6929       DBUG_PRINT("info", ("pollEvents res: %d", res));
6930     }
6931 
6932     // Epoch to handle from i_ndb. Use latest 'empty epoch' if no events.
6933     const Uint64 i_epoch =
6934         (i_pOp != NULL) ? i_pOp->getEpoch() : ndb_latest_received_binlog_epoch;
6935 
6936     // If there are remaining unhandled schema eventOp we continue
6937     // handling of these, else poll for more.
6938     if (s_pOp == NULL) {
6939       if (DBUG_EVALUATE_IF("ndb_binlog_injector_yield_before_schema_pollEvent",
6940                            true, false)) {
6941         /**
6942          * Simulate that the binlog thread yields the CPU inbetween
6943          * these two pollEvents, which can result in reading a
6944          * 'schema_gci > gci'. (Likely due to mutex locking)
6945          */
6946         ndb_milli_sleep(50);
6947       }
6948 
6949       Uint64 schema_epoch = 0;
6950       mysql_mutex_lock(&injector_event_mutex);
6951       int schema_res = s_ndb->pollEvents(tot_poll_wait, &schema_epoch);
6952       mysql_mutex_unlock(&injector_event_mutex);
6953       s_pOp = s_ndb->nextEvent();
6954 
6955       /*
6956         Make sure we have seen any schema epochs upto the injector epoch,
6957         or we have an earlier schema event to handle.
6958       */
6959       while (s_pOp == NULL && i_epoch > schema_epoch && schema_res >= 0) {
6960         static char buf[64];
6961         thd->proc_info = "Waiting for schema epoch";
6962         snprintf(buf, sizeof(buf), "%s %u/%u(%u/%u)", thd->proc_info,
6963                  (uint)(schema_epoch >> 32), (uint)(schema_epoch),
6964                  (uint)(ndb_latest_received_binlog_epoch >> 32),
6965                  (uint)(ndb_latest_received_binlog_epoch));
6966         thd->proc_info = buf;
6967 
6968         my_thread_yield();
6969         mysql_mutex_lock(&injector_event_mutex);
6970         schema_res = s_ndb->pollEvents(10, &schema_epoch);
6971         mysql_mutex_unlock(&injector_event_mutex);
6972         s_pOp = s_ndb->nextEvent();
6973       }
6974     }
6975 
6976     /*
6977       We have now a (possibly empty) set of available events which the
6978       binlog injects should apply. These could span either a single,
6979       or possibly multiple epochs. In order to get the ordering between
6980       schema events and 'ordinary' events injected in a correct order
6981       relative to each other, we apply them one epoch at a time, with
6982       the schema events always applied first.
6983     */
6984 
6985     // Calculate the epoch to handle events from in this iteration.
6986     const Uint64 current_epoch = find_epoch_to_handle(s_pOp, i_pOp);
6987     DBUG_ASSERT(current_epoch != 0 || !ndb_binlog_running);
6988 
6989     // Did someone else request injector thread to stop?
6990     DBUG_ASSERT(binlog_thread_state == BCCC_running);
6991     if (is_stop_requested() &&
6992         (ndb_latest_handled_binlog_epoch >= ndb_get_latest_trans_gci() ||
6993          !ndb_binlog_running))
6994       break; /* Stopping thread */
6995 
6996     if (thd->killed == THD::KILL_CONNECTION) {
6997       /*
6998         Since the ndb binlog thread adds itself to the "global thread list"
6999         it need to look at the "killed" flag and stop the thread to avoid
7000         that the server hangs during shutdown while waiting for the "global
7001         thread list" to be emtpy.
7002         In pre 5.6 versions the thread was also added to "global thread
7003         list" but the "global thread *count*" variable was not incremented
7004         and thus the same problem didn't exist.
7005         The only reason for adding the ndb binlog thread to "global thread
7006         list" is to be able to see the thread state using SHOW PROCESSLIST
7007         and I_S.PROCESSLIST
7008       */
7009       log_info("Server shutdown detected...");
7010       break;
7011     }
7012 
7013     MEM_ROOT **root_ptr = THR_MALLOC;
7014     MEM_ROOT *old_root = *root_ptr;
7015     MEM_ROOT mem_root;
7016     init_sql_alloc(PSI_INSTRUMENT_ME, &mem_root, 4096, 0);
7017 
7018     // The Ndb_schema_event_handler does not necessarily need
7019     // to use the same memroot(or vice versa)
7020     Ndb_schema_event_handler schema_event_handler(
7021         thd, &mem_root, g_ndb_cluster_connection->node_id(), schema_dist_data);
7022 
7023     *root_ptr = &mem_root;
7024 
7025     if (unlikely(s_pOp != NULL && s_pOp->getEpoch() == current_epoch)) {
7026       thd->proc_info = "Processing events from schema table";
7027       g_ndb_log_slave_updates = opt_log_slave_updates;
7028       s_ndb->setReportThreshEventGCISlip(
7029           opt_ndb_report_thresh_binlog_epoch_slip);
7030       s_ndb->setReportThreshEventFreeMem(
7031           opt_ndb_report_thresh_binlog_mem_usage);
7032 
7033       // Handle all schema event, limit within 'current_epoch'
7034       while (s_pOp != NULL && s_pOp->getEpoch() == current_epoch) {
7035         if (!s_pOp->hasError()) {
7036           schema_event_handler.handle_event(s_ndb, s_pOp);
7037 
7038           if (DBUG_EVALUATE_IF("ndb_binlog_slow_failure_handling", true,
7039                                false)) {
7040             if (!ndb_binlog_is_ready) {
7041               log_info("Just lost schema connection, hanging around");
7042               ndb_milli_sleep(10 * 1000);  // seconds * 1000
7043               /* There could be a race where client side reconnect before we
7044                * are able to detect 's_ndb->getEventOperation() == NULL'.
7045                * Thus, we never restart the binlog thread as supposed to.
7046                * -> 'ndb_binlog_is_ready' remains false and we get stuck in
7047                * RO-mode
7048                */
7049               log_info("...and on our way");
7050             }
7051           }
7052 
7053           DBUG_PRINT("info",
7054                      ("s_ndb first: %s", s_ndb->getEventOperation()
7055                                              ? s_ndb->getEventOperation()
7056                                                    ->getEvent()
7057                                                    ->getTable()
7058                                                    ->getName()
7059                                              : "<empty>"));
7060           DBUG_PRINT("info",
7061                      ("i_ndb first: %s", i_ndb->getEventOperation()
7062                                              ? i_ndb->getEventOperation()
7063                                                    ->getEvent()
7064                                                    ->getTable()
7065                                                    ->getName()
7066                                              : "<empty>"));
7067         } else {
7068           log_error("error %d (%s) on handling binlog schema event",
7069                     s_pOp->getNdbError().code, s_pOp->getNdbError().message);
7070         }
7071         s_pOp = s_ndb->nextEvent();
7072       }
7073       update_injector_stats(s_ndb, i_ndb);
7074     }
7075 
7076     Uint64 inconsistent_epoch = 0;
7077     if (!ndb_binlog_running) {
7078       /*
7079         Just consume any events, not used if no binlogging
7080         e.g. node failure events
7081       */
7082       while (i_pOp != NULL && i_pOp->getEpoch() == current_epoch) {
7083         if ((unsigned)i_pOp->getEventType() >=
7084             (unsigned)NDBEVENT::TE_FIRST_NON_DATA_EVENT) {
7085           ndb_binlog_index_row row;
7086           handle_non_data_event(thd, i_pOp, row);
7087         }
7088         i_pOp = i_ndb->nextEvent();
7089       }
7090       update_injector_stats(s_ndb, i_ndb);
7091     }
7092 
7093     // i_pOp == NULL means an inconsistent epoch or the queue is empty
7094     else if (i_pOp == NULL && !i_ndb->isConsistent(inconsistent_epoch)) {
7095       char errmsg[72];
7096       snprintf(errmsg, sizeof(errmsg),
7097                "Detected missing data in GCI %llu, "
7098                "inserting GAP event",
7099                inconsistent_epoch);
7100       DBUG_PRINT("info", ("Detected missing data in GCI %llu, "
7101                           "inserting GAP event",
7102                           inconsistent_epoch));
7103       LEX_CSTRING const msg = {errmsg, strlen(errmsg)};
7104       inj->record_incident(
7105           thd, binary_log::Incident_event::INCIDENT_LOST_EVENTS, msg);
7106     }
7107 
7108     /* Handle all events withing 'current_epoch', or possible
7109      * log an empty epoch if log_empty_epoch is specified.
7110      */
7111     else if ((i_pOp != NULL && i_pOp->getEpoch() == current_epoch) ||
7112              (ndb_log_empty_epochs() &&
7113               current_epoch > ndb_latest_handled_binlog_epoch)) {
7114       thd->proc_info = "Processing events";
7115       ndb_binlog_index_row _row;
7116       ndb_binlog_index_row *rows = &_row;
7117       injector::transaction trans;
7118       unsigned trans_row_count = 0;
7119       unsigned trans_slave_row_count = 0;
7120 
7121       if (i_pOp == NULL || i_pOp->getEpoch() != current_epoch) {
7122         /*
7123           Must be an empty epoch since the condition
7124           (ndb_log_empty_epochs() &&
7125            current_epoch > ndb_latest_handled_binlog_epoch)
7126           must be true we write empty epoch into
7127           ndb_binlog_index
7128         */
7129         DBUG_ASSERT(ndb_log_empty_epochs());
7130         DBUG_ASSERT(current_epoch > ndb_latest_handled_binlog_epoch);
7131         DBUG_PRINT("info", ("Writing empty epoch for gci %llu", current_epoch));
7132         DBUG_PRINT("info", ("Initializing transaction"));
7133         inj->new_trans(thd, &trans);
7134         rows = &_row;
7135         memset(&_row, 0, sizeof(_row));
7136         thd->variables.character_set_client = &my_charset_latin1;
7137         goto commit_to_binlog;
7138       } else {
7139         assert(i_pOp != NULL && i_pOp->getEpoch() == current_epoch);
7140         rows = &_row;
7141 
7142         DBUG_PRINT("info",
7143                    ("Handling epoch: %u/%u", (uint)(current_epoch >> 32),
7144                     (uint)(current_epoch)));
7145         // sometimes get TE_ALTER with invalid table
7146         DBUG_ASSERT(
7147             i_pOp->getEventType() == NdbDictionary::Event::TE_ALTER ||
7148             !ndb_name_is_blob_prefix(i_pOp->getEvent()->getTable()->getName()));
7149         DBUG_ASSERT(current_epoch <= ndb_latest_received_binlog_epoch);
7150 
7151         /* Update our thread-local debug settings based on the global */
7152 #ifndef DBUG_OFF
7153         /* Get value of global...*/
7154         {
7155           char buf[256];
7156           DBUG_EXPLAIN_INITIAL(buf, sizeof(buf));
7157           //  fprintf(stderr, "Ndb Binlog Injector, setting debug to %s\n",
7158           //          buf);
7159           DBUG_SET(buf);
7160         }
7161 #endif
7162 
7163         /* initialize some variables for this epoch */
7164 
7165         i_ndb->set_eventbuf_max_alloc(opt_ndb_eventbuffer_max_alloc);
7166         g_ndb_log_slave_updates = opt_log_slave_updates;
7167         i_ndb->setReportThreshEventGCISlip(
7168             opt_ndb_report_thresh_binlog_epoch_slip);
7169         i_ndb->setReportThreshEventFreeMem(
7170             opt_ndb_report_thresh_binlog_mem_usage);
7171 
7172         memset(&_row, 0, sizeof(_row));
7173         thd->variables.character_set_client = &my_charset_latin1;
7174         DBUG_PRINT("info", ("Initializing transaction"));
7175         inj->new_trans(thd, &trans);
7176         trans_row_count = 0;
7177         trans_slave_row_count = 0;
7178         // pass table map before epoch
7179         {
7180           Uint32 iter = 0;
7181           const NdbEventOperation *gci_op;
7182           Uint32 event_types;
7183           Uint32 cumulative_any_value;
7184 
7185           while ((gci_op = i_ndb->getNextEventOpInEpoch3(
7186                       &iter, &event_types, &cumulative_any_value)) != NULL) {
7187             Ndb_event_data *event_data =
7188                 (Ndb_event_data *)gci_op->getCustomData();
7189             NDB_SHARE *share = (event_data) ? event_data->share : NULL;
7190             DBUG_PRINT("info",
7191                        ("per gci_op: 0x%lx  share: 0x%lx  event_types: 0x%x",
7192                         (long)gci_op, (long)share, event_types));
7193             // workaround for interface returning TE_STOP events
7194             // which are normally filtered out below in the nextEvent loop
7195             if ((event_types & ~NdbDictionary::Event::TE_STOP) == 0) {
7196               DBUG_PRINT("info", ("Skipped TE_STOP on table %s",
7197                                   gci_op->getEvent()->getTable()->getName()));
7198               continue;
7199             }
7200             // this should not happen
7201             if (share == NULL || event_data->shadow_table == NULL) {
7202               DBUG_PRINT("info", ("no share or table %s!",
7203                                   gci_op->getEvent()->getTable()->getName()));
7204               continue;
7205             }
7206             if (share == ndb_apply_status_share) {
7207               // skip this table, it is handled specially
7208               continue;
7209             }
7210             TABLE *table = event_data->shadow_table;
7211 #ifndef DBUG_OFF
7212             const LEX_CSTRING &name = table->s->table_name;
7213 #endif
7214             if ((event_types & (NdbDictionary::Event::TE_INSERT |
7215                                 NdbDictionary::Event::TE_UPDATE |
7216                                 NdbDictionary::Event::TE_DELETE)) == 0) {
7217               DBUG_PRINT("info", ("skipping non data event table: %.*s",
7218                                   (int)name.length, name.str));
7219               continue;
7220             }
7221             if (!trans.good()) {
7222               DBUG_PRINT("info",
7223                          ("Found new data event, initializing transaction"));
7224               inj->new_trans(thd, &trans);
7225             }
7226             {
7227               bool use_table = true;
7228               if (ndbcluster_anyvalue_is_reserved(cumulative_any_value)) {
7229                 /*
7230                    All events for this table in this epoch are marked as
7231                    nologging, therefore we do not include the table in the epoch
7232                    transaction.
7233                 */
7234                 if (ndbcluster_anyvalue_is_nologging(cumulative_any_value)) {
7235                   DBUG_PRINT("info", ("Skip binlogging table table: %.*s",
7236                                       (int)name.length, name.str));
7237                   use_table = false;
7238                 }
7239               }
7240               if (use_table) {
7241                 DBUG_PRINT("info",
7242                            ("use_table: %.*s, cols %u", (int)name.length,
7243                             name.str, table->s->fields));
7244                 injector::transaction::table tbl(table, true);
7245                 int ret = trans.use_table(::server_id, tbl);
7246                 ndbcluster::ndbrequire(ret == 0);
7247               }
7248             }
7249           }
7250         }
7251         if (trans.good()) {
7252           /* Inject ndb_apply_status WRITE_ROW event */
7253           if (!injectApplyStatusWriteRow(trans, current_epoch)) {
7254             log_error("Failed to inject apply status write row");
7255           }
7256         }
7257 
7258         do {
7259           if (i_pOp->hasError() && handle_error(i_pOp) < 0) goto err;
7260 
7261 #ifndef DBUG_OFF
7262           {
7263             Ndb_event_data *event_data =
7264                 (Ndb_event_data *)i_pOp->getCustomData();
7265             NDB_SHARE *share = (event_data) ? event_data->share : NULL;
7266             DBUG_PRINT("info",
7267                        ("EVENT TYPE: %d  Epoch: %u/%u last applied: %u/%u  "
7268                         "share: 0x%lx (%s.%s)",
7269                         i_pOp->getEventType(), (uint)(current_epoch >> 32),
7270                         (uint)(current_epoch),
7271                         (uint)(ndb_latest_applied_binlog_epoch >> 32),
7272                         (uint)(ndb_latest_applied_binlog_epoch), (long)share,
7273                         share ? share->db : "'NULL'",
7274                         share ? share->table_name : "'NULL'"));
7275             DBUG_ASSERT(share != 0);
7276           }
7277           // assert that there is consistancy between gci op list
7278           // and event list
7279           {
7280             Uint32 iter = 0;
7281             const NdbEventOperation *gci_op;
7282             Uint32 event_types;
7283             while ((gci_op = i_ndb->getGCIEventOperations(
7284                         &iter, &event_types)) != NULL) {
7285               if (gci_op == i_pOp) break;
7286             }
7287             DBUG_ASSERT(gci_op == i_pOp);
7288             DBUG_ASSERT((event_types & i_pOp->getEventType()) != 0);
7289           }
7290 #endif
7291 
7292           if ((unsigned)i_pOp->getEventType() <
7293               (unsigned)NDBEVENT::TE_FIRST_NON_DATA_EVENT)
7294             handle_data_event(i_pOp, &rows, trans, trans_row_count,
7295                               trans_slave_row_count);
7296           else {
7297             handle_non_data_event(thd, i_pOp, *rows);
7298             DBUG_PRINT("info",
7299                        ("s_ndb first: %s", s_ndb->getEventOperation()
7300                                                ? s_ndb->getEventOperation()
7301                                                      ->getEvent()
7302                                                      ->getTable()
7303                                                      ->getName()
7304                                                : "<empty>"));
7305             DBUG_PRINT("info",
7306                        ("i_ndb first: %s", i_ndb->getEventOperation()
7307                                                ? i_ndb->getEventOperation()
7308                                                      ->getEvent()
7309                                                      ->getTable()
7310                                                      ->getName()
7311                                                : "<empty>"));
7312           }
7313 
7314           // Capture any dynamic changes to max_alloc
7315           i_ndb->set_eventbuf_max_alloc(opt_ndb_eventbuffer_max_alloc);
7316 
7317           i_pOp = i_ndb->nextEvent();
7318         } while (i_pOp && i_pOp->getEpoch() == current_epoch);
7319 
7320         update_injector_stats(s_ndb, i_ndb);
7321 
7322         /*
7323           NOTE: i_pOp is now referring to an event in the next epoch
7324           or is == NULL
7325         */
7326 
7327         while (trans.good()) {
7328         commit_to_binlog:
7329           if (!ndb_log_empty_epochs()) {
7330             /*
7331               If
7332                 - We did not add any 'real' rows to the Binlog AND
7333                 - We did not apply any slave row updates, only
7334                   ndb_apply_status updates
7335               THEN
7336                 Don't write the Binlog transaction which just
7337                 contains ndb_apply_status updates.
7338                 (For cicular rep with log_apply_status, ndb_apply_status
7339                 updates will propagate while some related, real update
7340                 is propagating)
7341             */
7342             if ((trans_row_count == 0) &&
7343                 (!(opt_ndb_log_apply_status && trans_slave_row_count))) {
7344               /* nothing to commit, rollback instead */
7345               if (int r = trans.rollback()) {
7346                 log_error("Error during ROLLBACK of GCI %u/%u. Error: %d",
7347                           uint(current_epoch >> 32), uint(current_epoch), r);
7348                 /* TODO: Further handling? */
7349               }
7350               break;
7351             }
7352           }
7353           thd->proc_info = "Committing events to binlog";
7354           if (int r = trans.commit()) {
7355             log_error("Error during COMMIT of GCI. Error: %d", r);
7356             /* TODO: Further handling? */
7357           }
7358           injector::transaction::binlog_pos start = trans.start_pos();
7359           injector::transaction::binlog_pos next = trans.next_pos();
7360           rows->gci = (Uint32)(current_epoch >> 32);  // Expose gci hi/lo
7361           rows->epoch = current_epoch;
7362           rows->start_master_log_file = start.file_name();
7363           rows->start_master_log_pos = start.file_pos();
7364           if ((next.file_pos() == 0) && ndb_log_empty_epochs()) {
7365             /* Empty transaction 'committed' due to log_empty_epochs
7366              * therefore no next position
7367              */
7368             rows->next_master_log_file = start.file_name();
7369             rows->next_master_log_pos = start.file_pos();
7370           } else {
7371             rows->next_master_log_file = next.file_name();
7372             rows->next_master_log_pos = next.file_pos();
7373           }
7374 
7375           DBUG_PRINT("info", ("COMMIT epoch: %lu", (ulong)current_epoch));
7376           if (opt_ndb_log_binlog_index) {
7377             if (Ndb_binlog_index_table_util::write_rows(thd, rows)) {
7378               /*
7379                  Writing to ndb_binlog_index failed, check if it's because THD
7380                  have been killed and retry in such case
7381               */
7382               if (thd->killed) {
7383                 DBUG_PRINT("error", ("Failed to write to ndb_binlog_index at "
7384                                      "shutdown, retrying"));
7385                 Ndb_binlog_index_table_util::write_rows_retry_after_kill(thd,
7386                                                                          rows);
7387               }
7388             }
7389           }
7390           ndb_latest_applied_binlog_epoch = current_epoch;
7391           break;
7392         }  // while (trans.good())
7393 
7394         /*
7395           NOTE: There are possible more i_pOp available.
7396           However, these are from another epoch and should be handled
7397           in next iteration of the binlog injector loop.
7398         */
7399       }
7400     }  // end: 'handled a 'current_epoch' of i_pOp's
7401 
7402     // Notify the schema event handler about post_epoch so it may finish
7403     // any outstanding business
7404     schema_event_handler.post_epoch(current_epoch);
7405 
7406     free_root(&mem_root, MYF(0));
7407     *root_ptr = old_root;
7408 
7409     if (current_epoch > ndb_latest_handled_binlog_epoch) {
7410       Mutex_guard injector_mutex_g(injector_data_mutex);
7411       ndb_latest_handled_binlog_epoch = current_epoch;
7412       // Signal ndbcluster_binlog_wait'ers
7413       mysql_cond_broadcast(&injector_data_cond);
7414     }
7415 
7416     DBUG_ASSERT(binlog_thread_state == BCCC_running);
7417 
7418     // When a cluster failure occurs, each event operation will receive a
7419     // TE_CLUSTER_FAILURE event causing it to be torn down and removed.
7420     // When all event operations has been removed from their respective Ndb
7421     // object, the thread should restart and try to connect to NDB again.
7422     if (i_ndb->getEventOperation() == NULL &&
7423         s_ndb->getEventOperation() == NULL) {
7424       log_error("All event operations gone, restarting thread");
7425       binlog_thread_state = BCCC_restart;
7426       break;
7427     }
7428 
7429     if (!ndb_binlog_tables_inited /* relaxed read without lock */) {
7430       // One(or more) of the ndbcluster util tables have been dropped, restart
7431       // the thread in order to create or setup the util table(s) again
7432       log_error("The util tables has been lost, restarting thread");
7433       binlog_thread_state = BCCC_restart;
7434       break;
7435     }
7436 
7437     // Synchronize 1 object from the queue of objects detected for automatic
7438     // synchronization
7439     synchronize_detected_object(thd);
7440   }
7441 
7442   // Check if loop has been terminated without properly handling all events
7443   if (ndb_binlog_running &&
7444       ndb_latest_handled_binlog_epoch < ndb_get_latest_trans_gci()) {
7445     log_error(
7446         "latest transaction in epoch %u/%u not in binlog "
7447         "as latest handled epoch is %u/%u",
7448         (uint)(ndb_get_latest_trans_gci() >> 32),
7449         (uint)(ndb_get_latest_trans_gci()),
7450         (uint)(ndb_latest_handled_binlog_epoch >> 32),
7451         (uint)(ndb_latest_handled_binlog_epoch));
7452   }
7453 
7454 err:
7455   if (binlog_thread_state != BCCC_restart) {
7456     log_info("Shutting down");
7457     thd->proc_info = "Shutting down";
7458   } else {
7459     log_info("Restarting");
7460     thd->proc_info = "Restarting";
7461   }
7462 
7463   mysql_mutex_lock(&injector_event_mutex);
7464   /* don't mess with the injector_ndb anymore from other threads */
7465   injector_thd = NULL;
7466   injector_ndb = NULL;
7467   schema_ndb = NULL;
7468   mysql_mutex_unlock(&injector_event_mutex);
7469 
7470   mysql_mutex_lock(&injector_data_mutex);
7471   ndb_binlog_tables_inited = false;
7472   mysql_mutex_unlock(&injector_data_mutex);
7473 
7474   Ndb_stored_grants::shutdown(thd_ndb);
7475 
7476   thd->reset_db(NULL_CSTR);  // as not to try to free memory
7477   remove_all_event_operations(s_ndb, i_ndb);
7478 
7479   schema_dist_data.release();
7480 
7481   // Fail any schema operations that has been registered but
7482   // never reached the coordinator
7483   NDB_SCHEMA_OBJECT::fail_all_schema_ops(Ndb_schema_dist::COORD_ABORT,
7484                                          "Aborted during shutdown");
7485 
7486   delete s_ndb;
7487   s_ndb = NULL;
7488 
7489   delete i_ndb;
7490   i_ndb = NULL;
7491 
7492   if (thd_ndb) {
7493     Thd_ndb::release(thd_ndb);
7494     thd_set_thd_ndb(thd, NULL);
7495     thd_ndb = NULL;
7496   }
7497 
7498   /**
7499    * release all extra references from tables
7500    */
7501   log_verbose(9, "Release extra share references");
7502   NDB_SHARE::release_extra_share_references();
7503 
7504   log_info("Stopping...");
7505 
7506   ndb_tdc_close_cached_tables();
7507   if (ndb_log_get_verbose_level() > 15) {
7508     NDB_SHARE::print_remaining_open_tables();
7509   }
7510 
7511   if (binlog_thread_state == BCCC_restart) {
7512     goto restart_cluster_failure;
7513   }
7514 
7515   // Release the thd->net created without vio
7516   thd->get_protocol_classic()->end_net();
7517   thd->release_resources();
7518   thd_manager->remove_thd(thd);
7519   delete thd;
7520 
7521   ndb_binlog_running = false;
7522   mysql_cond_broadcast(&injector_data_cond);
7523 
7524   log_info("Stopped");
7525 
7526   DBUG_PRINT("exit", ("ndb_binlog_thread"));
7527 }
7528 
7529 /*
7530   Return string containing current status of ndb binlog as
7531   comma separated name value pairs.
7532 
7533   Used by ndbcluster_show_status() to fill the "binlog" row
7534   in result of SHOW ENGINE NDB STATUS
7535 
7536   @param     buf     The buffer where to print status string
7537   @param     bufzies Size of the buffer
7538 
7539   @return    Length of the string printed to "buf" or 0 if no string
7540              is printed
7541 */
7542 
ndbcluster_show_status_binlog(char * buf,size_t buf_size)7543 size_t ndbcluster_show_status_binlog(char *buf, size_t buf_size) {
7544   DBUG_TRACE;
7545 
7546   mysql_mutex_lock(&injector_event_mutex);
7547   if (injector_ndb) {
7548     const ulonglong latest_epoch = injector_ndb->getLatestGCI();
7549     mysql_mutex_unlock(&injector_event_mutex);
7550 
7551     // Get highest trans gci seen by the cluster connections
7552     const ulonglong latest_trans_epoch = ndb_get_latest_trans_gci();
7553 
7554     const size_t buf_len = snprintf(
7555         buf, buf_size,
7556         "latest_epoch=%llu, "
7557         "latest_trans_epoch=%llu, "
7558         "latest_received_binlog_epoch=%llu, "
7559         "latest_handled_binlog_epoch=%llu, "
7560         "latest_applied_binlog_epoch=%llu",
7561         latest_epoch, latest_trans_epoch, ndb_latest_received_binlog_epoch,
7562         ndb_latest_handled_binlog_epoch, ndb_latest_applied_binlog_epoch);
7563     return buf_len;
7564   } else
7565     mysql_mutex_unlock(&injector_event_mutex);
7566   return 0;
7567 }
7568