1 /*
2 Copyright (c) 2006, 2020, Oracle and/or its affiliates.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License, version 2.0,
6 as published by the Free Software Foundation.
7
8 This program is also distributed with certain software (including
9 but not limited to OpenSSL) that is licensed under separate terms,
10 as designated in a particular file or component or in included license
11 documentation. The authors of MySQL hereby grant you an additional
12 permission to link the program and your derivative works with the
13 separately licensed software that they have included with MySQL.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License, version 2.0, for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 #include "storage/ndb/plugin/ha_ndbcluster_binlog.h"
26
27 #include <unordered_map>
28
29 #include "my_dbug.h"
30 #include "my_thread.h"
31 #include "mysql/plugin.h"
32 #include "sql/auth/acl_change_notification.h"
33 #include "sql/binlog.h"
34 #include "sql/dd/types/abstract_table.h" // dd::enum_table_type
35 #include "sql/dd/types/tablespace.h" // dd::Tablespace
36 #include "sql/derror.h" // ER_THD
37 #include "sql/mysqld.h" // opt_bin_log
38 #include "sql/mysqld_thd_manager.h" // Global_THD_manager
39 #include "sql/protocol_classic.h"
40 #include "sql/rpl_injector.h"
41 #include "sql/rpl_slave.h"
42 #include "sql/sql_lex.h"
43 #include "sql/sql_rewrite.h"
44 #include "sql/sql_table.h" // build_table_filename
45 #include "sql/sql_thd_internal_api.h"
46 #include "sql/thd_raii.h"
47 #include "sql/transaction.h"
48 #include "storage/ndb/include/ndbapi/NdbDictionary.hpp"
49 #include "storage/ndb/include/ndbapi/ndb_cluster_connection.hpp"
50 #include "storage/ndb/plugin/ha_ndbcluster.h"
51 #include "storage/ndb/plugin/ha_ndbcluster_connection.h"
52 #include "storage/ndb/plugin/ndb_apply_status_table.h"
53 #include "storage/ndb/plugin/ndb_binlog_client.h"
54 #include "storage/ndb/plugin/ndb_bitmap.h"
55 #include "storage/ndb/plugin/ndb_dd.h"
56 #include "storage/ndb/plugin/ndb_dd_client.h"
57 #include "storage/ndb/plugin/ndb_dd_disk_data.h"
58 #include "storage/ndb/plugin/ndb_dd_sync.h" // Ndb_dd_sync
59 #include "storage/ndb/plugin/ndb_dd_table.h"
60 #include "storage/ndb/plugin/ndb_global_schema_lock_guard.h"
61 #include "storage/ndb/plugin/ndb_local_connection.h"
62 #include "storage/ndb/plugin/ndb_log.h"
63 #include "storage/ndb/plugin/ndb_name_util.h"
64 #include "storage/ndb/plugin/ndb_ndbapi_util.h"
65 #include "storage/ndb/plugin/ndb_require.h"
66 #include "storage/ndb/plugin/ndb_retry.h"
67 #include "storage/ndb/plugin/ndb_schema_dist_table.h"
68 #include "storage/ndb/plugin/ndb_schema_result_table.h"
69 #include "storage/ndb/plugin/ndb_sleep.h"
70 #include "storage/ndb/plugin/ndb_stored_grants.h"
71 #include "storage/ndb/plugin/ndb_table_guard.h"
72 #include "storage/ndb/plugin/ndb_tdc.h"
73 #include "storage/ndb/plugin/ndb_thd.h"
74 #include "storage/ndb/plugin/ndb_upgrade_util.h"
75
76 typedef NdbDictionary::Event NDBEVENT;
77 typedef NdbDictionary::Column NDBCOL;
78 typedef NdbDictionary::Table NDBTAB;
79
80 extern bool opt_ndb_log_orig;
81 extern bool opt_ndb_log_bin;
82 extern bool opt_ndb_log_update_as_write;
83 extern bool opt_ndb_log_updated_only;
84 extern bool opt_ndb_log_update_minimal;
85 extern bool opt_ndb_log_binlog_index;
86 extern bool opt_ndb_log_apply_status;
87 extern st_ndb_slave_state g_ndb_slave_state;
88 extern bool opt_ndb_log_transaction_id;
89 extern bool log_bin_use_v1_row_events;
90 extern bool opt_ndb_log_empty_update;
91 extern bool opt_ndb_clear_apply_status;
92 extern bool opt_ndb_log_fail_terminate;
93 extern int opt_ndb_schema_dist_timeout;
94 extern ulong opt_ndb_schema_dist_lock_wait_timeout;
95
96 bool ndb_log_empty_epochs(void);
97
98 void ndb_index_stat_restart();
99
100 #include "storage/ndb/plugin/ndb_anyvalue.h"
101 #include "storage/ndb/plugin/ndb_binlog_extra_row_info.h"
102 #include "storage/ndb/plugin/ndb_binlog_thread.h"
103 #include "storage/ndb/plugin/ndb_event_data.h"
104 #include "storage/ndb/plugin/ndb_repl_tab.h"
105 #include "storage/ndb/plugin/ndb_schema_dist.h"
106 #include "storage/ndb/plugin/ndb_schema_object.h"
107
108 extern Ndb_cluster_connection *g_ndb_cluster_connection;
109
110 /*
111 Timeout for syncing schema events between
112 mysql servers, and between mysql server and the binlog
113 */
114 static const int DEFAULT_SYNC_TIMEOUT = 120;
115
116 /* Column numbers in the ndb_binlog_index table */
117 enum Ndb_binlog_index_cols {
118 NBICOL_START_POS = 0,
119 NBICOL_START_FILE = 1,
120 NBICOL_EPOCH = 2,
121 NBICOL_NUM_INSERTS = 3,
122 NBICOL_NUM_UPDATES = 4,
123 NBICOL_NUM_DELETES = 5,
124 NBICOL_NUM_SCHEMAOPS = 6
125 /* Following colums in schema 'v2' */
126 ,
127 NBICOL_ORIG_SERVERID = 7,
128 NBICOL_ORIG_EPOCH = 8,
129 NBICOL_GCI = 9
130 /* Following columns in schema 'v3' */
131 ,
132 NBICOL_NEXT_POS = 10,
133 NBICOL_NEXT_FILE = 11
134 };
135
136 class Mutex_guard {
137 public:
Mutex_guard(mysql_mutex_t & mutex)138 Mutex_guard(mysql_mutex_t &mutex) : m_mutex(mutex) {
139 mysql_mutex_lock(&m_mutex);
140 }
~Mutex_guard()141 ~Mutex_guard() { mysql_mutex_unlock(&m_mutex); }
142
143 private:
144 mysql_mutex_t &m_mutex;
145 };
146
147 /*
148 Mutex and condition used for interacting between client sql thread
149 and injector thread
150 - injector_data_mutex protects global data maintained
151 by the injector thread and accessed by any client thread.
152 - injector_event_mutex, protects injector thread pollEvents()
153 and concurrent create and drop of events from client threads.
154 It also protects injector_ndb and schema_ndb which are the Ndb
155 objects used for the above create/drop/pollEvents()
156 Rational for splitting these into two separate mutexes, is that
157 the injector_event_mutex is held for 10ms across pollEvents().
158 That could (almost) block access to the shared binlog injector data,
159 like ndb_binlog_is_read_only().
160 */
161 static mysql_mutex_t injector_event_mutex;
162 static mysql_mutex_t injector_data_mutex;
163 static mysql_cond_t injector_data_cond;
164
165 /*
166 NOTE:
167 Several of the ndb_binlog* variables use a 'relaxed locking' schema.
168 Such a variable is only modified by the 'injector_thd' thread,
169 but could be read by any 'thd'. Thus:
170 - Any update of such a variable need a mutex lock.
171 - Reading such a variable outside of the injector_thd need the mutex.
172 However, it should be safe to read the variable within the injector_thd
173 without holding the mutex! (As there are no other threads updating it)
174 */
175
176 /**
177 ndb_binlog_running
178 Changes to NDB tables should be written to the binary log. I.e the
179 ndb binlog injector thread subscribes to changes in the cluster
180 and when such changes are received, they will be written to the
181 binary log
182 */
183 bool ndb_binlog_running = false;
184
185 static bool ndb_binlog_tables_inited = false; // injector_data_mutex, relaxed
186 static bool ndb_binlog_is_ready = false; // injector_data_mutex, relaxed
187
ndb_binlog_is_read_only(void)188 bool ndb_binlog_is_read_only(void) {
189 /*
190 Could be called from any client thread. Need a mutex to
191 protect ndb_binlog_tables_inited and ndb_binlog_is_ready.
192 */
193 Mutex_guard injector_g(injector_data_mutex);
194 if (!ndb_binlog_tables_inited) {
195 /* the ndb_* system tables not setup yet */
196 return true;
197 }
198
199 if (ndb_binlog_running && !ndb_binlog_is_ready) {
200 /*
201 The binlog thread is supposed to write to binlog
202 but not ready (still initializing or has lost connection)
203 */
204 return true;
205 }
206 return false;
207 }
208
209 static THD *injector_thd = NULL;
210
211 /*
212 Global reference to ndb injector thd object.
213
214 Used mainly by the binlog index thread, but exposed to the client sql
215 thread for one reason; to setup the events operations for a table
216 to enable ndb injector thread receiving events.
217
218 Must therefore always be used with a surrounding
219 mysql_mutex_lock(&injector_event_mutex), when create/dropEventOperation
220 */
221 static Ndb *injector_ndb = NULL; // Need injector_event_mutex
222 static Ndb *schema_ndb = NULL; // Need injector_event_mutex
223
224 static int ndbcluster_binlog_inited = 0;
225
226 /* NDB Injector thread (used for binlog creation) */
227 static ulonglong ndb_latest_applied_binlog_epoch = 0;
228 static ulonglong ndb_latest_handled_binlog_epoch = 0;
229 static ulonglong ndb_latest_received_binlog_epoch = 0;
230
231 NDB_SHARE *ndb_apply_status_share = NULL;
232
233 extern bool opt_log_slave_updates;
234 static bool g_ndb_log_slave_updates;
235
236 static bool g_injector_v1_warning_emitted = false;
237
create_event_data(NDB_SHARE * share,const dd::Table * table_def,Ndb_event_data ** event_data) const238 bool Ndb_binlog_client::create_event_data(NDB_SHARE *share,
239 const dd::Table *table_def,
240 Ndb_event_data **event_data) const {
241 DBUG_TRACE;
242 DBUG_ASSERT(table_def);
243 DBUG_ASSERT(event_data);
244
245 Ndb_event_data *new_event_data = Ndb_event_data::create_event_data(
246 m_thd, share, share->db, share->table_name, share->key_string(),
247 injector_thd, table_def);
248 if (!new_event_data) return false;
249
250 // Return the newly created event_data to caller
251 *event_data = new_event_data;
252
253 return true;
254 }
255
get_ndb_blobs_value(TABLE * table,NdbValue * value_array,uchar * & buffer,uint & buffer_size,ptrdiff_t ptrdiff)256 static int get_ndb_blobs_value(TABLE *table, NdbValue *value_array,
257 uchar *&buffer, uint &buffer_size,
258 ptrdiff_t ptrdiff) {
259 DBUG_TRACE;
260
261 // Field has no field number so cannot use TABLE blob_field
262 // Loop twice, first only counting total buffer size
263 for (int loop = 0; loop <= 1; loop++) {
264 uint32 offset = 0;
265 for (uint i = 0; i < table->s->fields; i++) {
266 Field *field = table->field[i];
267 NdbValue value = value_array[i];
268 if (!(field->is_flag_set(BLOB_FLAG) && field->stored_in_db)) continue;
269 if (value.blob == NULL) {
270 DBUG_PRINT("info", ("[%u] skipped", i));
271 continue;
272 }
273 Field_blob *field_blob = (Field_blob *)field;
274 NdbBlob *ndb_blob = value.blob;
275 int isNull;
276 if (ndb_blob->getNull(isNull) != 0) return -1;
277 if (isNull == 0) {
278 Uint64 len64 = 0;
279 if (ndb_blob->getLength(len64) != 0) return -1;
280 // Align to Uint64
281 uint32 size = Uint32(len64);
282 if (size % 8 != 0) size += 8 - size % 8;
283 if (loop == 1) {
284 uchar *buf = buffer + offset;
285 uint32 len = buffer_size - offset; // Size of buf
286 if (ndb_blob->readData(buf, len) != 0) return -1;
287 DBUG_PRINT("info",
288 ("[%u] offset: %u buf: 0x%lx len=%u [ptrdiff=%d]", i,
289 offset, (long)buf, len, (int)ptrdiff));
290 DBUG_ASSERT(len == len64);
291 // Ugly hack assumes only ptr needs to be changed
292 field_blob->set_ptr_offset(ptrdiff, len, buf);
293 }
294 offset += size;
295 } else if (loop == 1) // undefined or null
296 {
297 // have to set length even in this case
298 uchar *buf = buffer + offset; // or maybe NULL
299 uint32 len = 0;
300 field_blob->set_ptr_offset(ptrdiff, len, buf);
301 DBUG_PRINT("info", ("[%u] isNull=%d", i, isNull));
302 }
303 }
304 if (loop == 0 && offset > buffer_size) {
305 my_free(buffer);
306 buffer_size = 0;
307 DBUG_PRINT("info", ("allocate blobs buffer size %u", offset));
308 buffer = (uchar *)my_malloc(PSI_INSTRUMENT_ME, offset, MYF(MY_WME));
309 if (buffer == NULL) {
310 ndb_log_error("get_ndb_blobs_value, my_malloc(%u) failed", offset);
311 return -1;
312 }
313 buffer_size = offset;
314 }
315 }
316 return 0;
317 }
318
319 /*
320 @brief Wait until the last committed epoch from the session enters the
321 binlog. Wait a maximum of 30 seconds. This wait is necessary in
322 SHOW BINLOG EVENTS so that the user see its own changes. Also
323 in RESET MASTER before clearing ndbcluster's binlog index.
324 @param thd Thread handle to wait for its changes to enter the binlog.
325 */
ndbcluster_binlog_wait(THD * thd)326 static void ndbcluster_binlog_wait(THD *thd) {
327 DBUG_TRACE;
328
329 if (!ndb_binlog_running) {
330 DBUG_PRINT("exit", ("Not writing binlog -> nothing to wait for"));
331 return;
332 }
333
334 // Assumption is that only these commands will wait
335 DBUG_ASSERT(thd_sql_command(thd) == SQLCOM_SHOW_BINLOG_EVENTS ||
336 thd_sql_command(thd) == SQLCOM_FLUSH ||
337 thd_sql_command(thd) == SQLCOM_RESET);
338
339 if (thd->system_thread == SYSTEM_THREAD_NDBCLUSTER_BINLOG) {
340 // Binlog Injector thread should not wait for itself
341 DBUG_PRINT("exit", ("binlog injector should not wait for itself"));
342 return;
343 }
344
345 Thd_ndb *thd_ndb = get_thd_ndb(thd);
346 if (!thd_ndb) {
347 // Thread has not used NDB before, no need for waiting
348 DBUG_PRINT("exit", ("Thread has not used NDB, nothing to wait for"));
349 return;
350 }
351
352 const char *save_info = thd->proc_info;
353 thd->proc_info =
354 "Waiting for ndbcluster binlog update to reach current position";
355
356 // Highest epoch that a transaction against Ndb has received
357 // as part of commit processing *in this thread*. This is a
358 // per-session 'most recent change' indicator.
359 const Uint64 session_last_committed_epoch =
360 thd_ndb->m_last_commit_epoch_session;
361
362 // Wait until the last committed epoch from the session enters Binlog.
363 // Break any possible deadlock after 30s.
364 int count = 30; // seconds
365 mysql_mutex_lock(&injector_data_mutex);
366 const Uint64 start_handled_epoch = ndb_latest_handled_binlog_epoch;
367 while (!thd->killed && count && ndb_binlog_running &&
368 (ndb_latest_handled_binlog_epoch == 0 ||
369 ndb_latest_handled_binlog_epoch < session_last_committed_epoch)) {
370 count--;
371 struct timespec abstime;
372 set_timespec(&abstime, 1);
373 mysql_cond_timedwait(&injector_data_cond, &injector_data_mutex, &abstime);
374 }
375 mysql_mutex_unlock(&injector_data_mutex);
376
377 if (count == 0) {
378 ndb_log_warning(
379 "Thread id %u timed out (30s) waiting for epoch %u/%u "
380 "to be handled. Progress : %u/%u -> %u/%u.",
381 thd->thread_id(),
382 Uint32((session_last_committed_epoch >> 32) & 0xffffffff),
383 Uint32(session_last_committed_epoch & 0xffffffff),
384 Uint32((start_handled_epoch >> 32) & 0xffffffff),
385 Uint32(start_handled_epoch & 0xffffffff),
386 Uint32((ndb_latest_handled_binlog_epoch >> 32) & 0xffffffff),
387 Uint32(ndb_latest_handled_binlog_epoch & 0xffffffff));
388
389 // Fail on wait/deadlock timeout in debug compile
390 DBUG_ASSERT(false);
391 }
392
393 thd->proc_info = save_info;
394 }
395
396 /*
397 Setup THD object
398 'Inspired' from ha_ndbcluster.cc : ndb_util_thread_func
399 */
ndb_create_thd(char * stackptr)400 THD *ndb_create_thd(char *stackptr) {
401 DBUG_TRACE;
402 THD *thd = new THD; /* note that constructor of THD uses DBUG_ */
403 if (thd == 0) {
404 return 0;
405 }
406 THD_CHECK_SENTRY(thd);
407
408 thd->thread_stack = stackptr; /* remember where our stack is */
409 thd->store_globals();
410
411 thd->init_query_mem_roots();
412 thd->set_command(COM_DAEMON);
413 thd->system_thread = SYSTEM_THREAD_NDBCLUSTER_BINLOG;
414 thd->get_protocol_classic()->set_client_capabilities(0);
415 thd->lex->start_transaction_opt = 0;
416 thd->security_context()->skip_grants();
417
418 CHARSET_INFO *charset_connection =
419 get_charset_by_csname("utf8", MY_CS_PRIMARY, MYF(MY_WME));
420 thd->variables.character_set_client = charset_connection;
421 thd->variables.character_set_results = charset_connection;
422 thd->variables.collation_connection = charset_connection;
423 thd->update_charset();
424 return thd;
425 }
426
427 // Instantiate Ndb_binlog_thread component
428 static Ndb_binlog_thread ndb_binlog_thread;
429
430 // Forward declaration
431 static bool ndbcluster_binlog_index_remove_file(THD *thd, const char *filename);
432
433 /*
434 @brief called when a binlog file is purged(i.e the physical
435 binlog file is removed by the MySQL Server). ndbcluster need
436 to remove any rows in its mysql.ndb_binlog_index table which
437 references the removed file.
438
439 @param thd Thread handle
440 @param filename Name of the binlog file which has been removed
441
442 @return 0 for success
443 */
444
ndbcluster_binlog_index_purge_file(THD * thd,const char * filename)445 static int ndbcluster_binlog_index_purge_file(THD *thd, const char *filename) {
446 DBUG_TRACE;
447 DBUG_PRINT("enter", ("filename: %s", filename));
448
449 // Check if the binlog thread can handle the purge.
450 // This functionality is initially only implemented for the case when the
451 // "server started" state has not yet been reached, but could in the future be
452 // extended to handle all purging by the binlog thread(this would most likley
453 // eliminate the need to create a separate THD further down in this function)
454 if (ndb_binlog_thread.handle_purge(filename)) {
455 return 0; // Ok, purge handled by binlog thread
456 }
457
458 if (!ndb_binlog_running) {
459 return 0; // Nothing to do, binlog thread not running
460 }
461
462 if (thd_slave_thread(thd)) {
463 return 0; // Nothing to do, slave thread
464 }
465
466 // Create a separate temporary THD, primarily in order to isolate from any
467 // active transactions in the THD passed by caller. NOTE! This should be
468 // revisited
469 int stack_base = 0;
470 THD *tmp_thd = ndb_create_thd((char *)&stack_base);
471 if (!tmp_thd) {
472 ndb_log_warning("NDB Binlog: Failed to purge: '%s' (create THD failed)",
473 filename);
474 return 0;
475 }
476
477 int error = 0;
478 if (ndbcluster_binlog_index_remove_file(tmp_thd, filename)) {
479 // Failed to delete rows from table
480 ndb_log_warning("NDB Binlog: Failed to purge: '%s'", filename);
481 error = 1; // Failed
482 }
483 delete tmp_thd;
484
485 /* Relink original THD */
486 thd->store_globals();
487
488 return error;
489 }
490
491 /*
492 ndbcluster_binlog_log_query
493
494 - callback function installed in handlerton->binlog_log_query
495 - called by MySQL Server in places where no other handlerton
496 function exists which can be used to notify about changes
497 - used by ndbcluster to detect when
498 -- databases are created or altered
499 -- privilege tables have been modified
500 */
501
ndbcluster_binlog_log_query(handlerton *,THD * thd,enum_binlog_command binlog_command,const char * query,uint query_length,const char * db,const char *)502 static void ndbcluster_binlog_log_query(handlerton *, THD *thd,
503 enum_binlog_command binlog_command,
504 const char *query, uint query_length,
505 const char *db, const char *) {
506 DBUG_TRACE;
507 DBUG_PRINT("enter", ("binlog_command: %d, db: '%s', query: '%s'",
508 binlog_command, db, query));
509
510 switch (binlog_command) {
511 case LOGCOM_CREATE_DB: {
512 DBUG_PRINT("info", ("New database '%s' created", db));
513
514 Ndb_schema_dist_client schema_dist_client(thd);
515
516 if (!schema_dist_client.prepare(db, "")) {
517 // Could not prepare the schema distribution client
518 // NOTE! As there is no way return error, this may have to be
519 // revisited, the prepare should be done
520 // much earlier where it can return an error for the query
521 return;
522 }
523
524 // Generate the id, version
525 unsigned int id = schema_dist_client.unique_id();
526 unsigned int version = schema_dist_client.unique_version();
527
528 const bool result =
529 schema_dist_client.create_db(query, query_length, db, id, version);
530 if (result) {
531 // Update the schema with the generated id and version but skip
532 // committing the change in DD. Commit will be done by the caller.
533 ndb_dd_update_schema_version(thd, db, id, version,
534 true /*skip_commit*/);
535 } else {
536 // NOTE! There is currently no way to report an error from this
537 // function, just log an error and proceed
538 ndb_log_error("Failed to distribute 'CREATE DATABASE %s'", db);
539 }
540 } break;
541
542 case LOGCOM_ALTER_DB: {
543 DBUG_PRINT("info", ("The database '%s' was altered", db));
544
545 Ndb_schema_dist_client schema_dist_client(thd);
546
547 if (!schema_dist_client.prepare(db, "")) {
548 // Could not prepare the schema distribution client
549 // NOTE! As there is no way return error, this may have to be
550 // revisited, the prepare should be done
551 // much earlier where it can return an error for the query
552 return;
553 }
554
555 // Generate the id, version
556 unsigned int id = schema_dist_client.unique_id();
557 unsigned int version = schema_dist_client.unique_version();
558
559 const bool result =
560 schema_dist_client.alter_db(query, query_length, db, id, version);
561 if (result) {
562 // Update the schema with the generated id and version but skip
563 // committing the change in DD. Commit will be done by the caller.
564 ndb_dd_update_schema_version(thd, db, id, version,
565 true /*skip_commit*/);
566 } else {
567 // NOTE! There is currently no way to report an error from this
568 // function, just log an error and proceed
569 ndb_log_error("Failed to distribute 'ALTER DATABASE %s'", db);
570 }
571 } break;
572
573 case LOGCOM_CREATE_TABLE:
574 case LOGCOM_ALTER_TABLE:
575 case LOGCOM_RENAME_TABLE:
576 case LOGCOM_DROP_TABLE:
577 case LOGCOM_DROP_DB:
578 DBUG_PRINT("info", ("Ignoring binlog_log_query notification "
579 "for binlog_command: %d",
580 binlog_command));
581 break;
582 }
583 }
584
ndbcluster_acl_notify(THD * thd,const Acl_change_notification * notice)585 static void ndbcluster_acl_notify(THD *thd,
586 const Acl_change_notification *notice) {
587 DBUG_TRACE;
588
589 if (!check_ndb_in_thd(thd)) {
590 ndb_log_error("Privilege distribution failed to seize thd_ndb");
591 return;
592 }
593
594 /* If this is the binlog thread, the ACL change has arrived via
595 schema distribution and requires no further action.
596 */
597 if (get_thd_ndb(thd)->check_option(Thd_ndb::NO_LOG_SCHEMA_OP)) {
598 return;
599 }
600
601 /* Obtain the query in a form suitable for writing to the error log.
602 The password is replaced with the string "<secret>".
603 */
604 std::string query;
605 if (thd->rewritten_query().length())
606 query.assign(thd->rewritten_query().ptr(), thd->rewritten_query().length());
607 else
608 query.assign(thd->query().str, thd->query().length);
609 DBUG_ASSERT(query.length());
610 ndb_log_verbose(9, "ACL considering: %s", query.c_str());
611
612 std::string user_list;
613 bool dist_use_db = false; // Prepend "use [db];" to statement
614 bool dist_refresh = false; // All participants must refresh their caches
615 Ndb_stored_grants::Strategy strategy =
616 Ndb_stored_grants::handle_local_acl_change(thd, notice, &user_list,
617 &dist_use_db, &dist_refresh);
618
619 Ndb_schema_dist_client schema_dist_client(thd);
620
621 if (strategy == Ndb_stored_grants::Strategy::ERROR) {
622 ndb_log_error("Not distributing ACL change after error.");
623 return;
624 }
625
626 if (strategy == Ndb_stored_grants::Strategy::NONE) {
627 ndb_log_verbose(9, "ACL change distribution: NONE");
628 return;
629 }
630
631 const unsigned int &node_id = g_ndb_cluster_connection->node_id();
632 if (!schema_dist_client.prepare_acl_change(node_id)) {
633 ndb_log_error("Failed to distribute '%s' (Failed prepare)", query.c_str());
634 return;
635 }
636
637 if (strategy == Ndb_stored_grants::Strategy::SNAPSHOT) {
638 ndb_log_verbose(9, "ACL change distribution: SNAPSHOT");
639 if (!schema_dist_client.acl_notify(user_list))
640 ndb_log_error("Failed to distribute '%s' (SNAPSHOT)", query.c_str());
641 return;
642 }
643
644 DBUG_ASSERT(strategy == Ndb_stored_grants::Strategy::STATEMENT);
645 ndb_log_verbose(9, "ACL change distribution: STATEMENT");
646
647 /* If the notice contains rewrite_params, query is an ALTER USER or SET
648 PASSWORD statement and must be rewritten again, as if for the binlog,
649 replacing a plaintext password with a crytpographic hash.
650 */
651 if (notice->get_rewrite_params()) {
652 String rewritten_query;
653 mysql_rewrite_acl_query(thd, rewritten_query, Consumer_type::BINLOG,
654 notice->get_rewrite_params(), false);
655 query.assign(rewritten_query.c_ptr_safe(), rewritten_query.length());
656 DBUG_ASSERT(query.length());
657 }
658
659 if (!schema_dist_client.acl_notify(
660 dist_use_db ? notice->get_db().c_str() : nullptr, query.c_str(),
661 query.length(), dist_refresh))
662 ndb_log_error("Failed to distribute '%s' (STATEMENT)", query.c_str());
663 }
664
665 /*
666 End use of the NDB Cluster binlog
667 - wait for binlog thread to shutdown
668 */
669
ndbcluster_binlog_end()670 int ndbcluster_binlog_end() {
671 DBUG_TRACE;
672
673 if (ndbcluster_binlog_inited) {
674 ndbcluster_binlog_inited = 0;
675
676 ndb_binlog_thread.stop();
677 ndb_binlog_thread.deinit();
678
679 mysql_mutex_destroy(&injector_event_mutex);
680 mysql_mutex_destroy(&injector_data_mutex);
681 mysql_cond_destroy(&injector_data_cond);
682 }
683
684 return 0;
685 }
686
687 /*****************************************************************
688 functions called from slave sql client threads
689 ****************************************************************/
ndbcluster_reset_slave(THD * thd)690 static void ndbcluster_reset_slave(THD *thd) {
691 if (!ndb_binlog_running) return;
692
693 DBUG_TRACE;
694
695 /*
696 delete all rows from mysql.ndb_apply_status table
697 - if table does not exist ignore the error as it
698 is a consistent behavior
699 */
700 if (opt_ndb_clear_apply_status) {
701 Ndb_local_connection mysqld(thd);
702 const bool ignore_no_such_table = true;
703 if (mysqld.delete_rows(Ndb_apply_status_table::DB_NAME,
704 Ndb_apply_status_table::TABLE_NAME,
705 ignore_no_such_table, "1=1")) {
706 // Failed to delete rows from table
707 }
708 }
709
710 g_ndb_slave_state.atResetSlave();
711
712 // pending fix for bug#59844 will make this function return int
713 }
714
ndbcluster_binlog_func(handlerton *,THD * thd,enum_binlog_func fn,void * arg)715 static int ndbcluster_binlog_func(handlerton *, THD *thd, enum_binlog_func fn,
716 void *arg) {
717 DBUG_TRACE;
718 int res = 0;
719 switch (fn) {
720 case BFN_RESET_LOGS:
721 break;
722 case BFN_RESET_SLAVE:
723 ndbcluster_reset_slave(thd);
724 break;
725 case BFN_BINLOG_WAIT:
726 ndbcluster_binlog_wait(thd);
727 break;
728 case BFN_BINLOG_END:
729 res = ndbcluster_binlog_end();
730 break;
731 case BFN_BINLOG_PURGE_FILE:
732 res = ndbcluster_binlog_index_purge_file(thd, (const char *)arg);
733 break;
734 }
735 return res;
736 }
737
ndbcluster_binlog_init(handlerton * h)738 void ndbcluster_binlog_init(handlerton *h) {
739 h->binlog_func = ndbcluster_binlog_func;
740 h->binlog_log_query = ndbcluster_binlog_log_query;
741 h->acl_notify = ndbcluster_acl_notify;
742 }
743
744 /*
745 ndb_notify_tables_writable
746
747 Called to notify any waiting threads that Ndb tables are
748 now writable
749 */
ndb_notify_tables_writable()750 static void ndb_notify_tables_writable() {
751 mysql_mutex_lock(&ndbcluster_mutex);
752 ndb_setup_complete = 1;
753 mysql_cond_broadcast(&ndbcluster_cond);
754 mysql_mutex_unlock(&ndbcluster_mutex);
755 }
756
757 /**
758 Utility class encapsulating the code which setup the 'ndb binlog thread'
759 to be "connected" to the cluster.
760 This involves:
761 - synchronizing the local mysqld data dictionary with that in NDB
762 - subscribing to changes that happen in NDB, thus allowing:
763 -- local Data Dictionary to be kept in synch
764 -- changes in NDB to be written to binlog
765
766 */
767
768 class Ndb_binlog_setup {
769 THD *const m_thd;
770
771 /**
772 @brief Detect whether the binlog is being setup after an initial system
773 start/restart or after a normal system start/restart.
774
775 @param thd_ndb The Thd_ndb object
776
777 @return true if this is an initial system start/restart, false otherwise.
778 */
detect_initial_restart(Thd_ndb * thd_ndb)779 bool detect_initial_restart(Thd_ndb *thd_ndb) {
780 DBUG_TRACE;
781
782 // Retrieve the old schema UUID stored in DD.
783 dd::String_type dd_schema_uuid;
784 if (!ndb_dd_get_schema_uuid(m_thd, &dd_schema_uuid)) {
785 DBUG_ASSERT(false);
786 ndb_log_warning("Failed to read the schema UUID of DD");
787 return false;
788 }
789
790 if (dd_schema_uuid.empty()) {
791 /*
792 DD didn't have any schema UUID previously. This is either an initial
793 start (or) an upgrade from a version which does not have the schema UUID
794 implemented. Such upgrades are considered as initial starts to keep this
795 code simple and due to the fact that the upgrade is probably being done
796 from a 5.x or a non GA 8.0.x versions to a 8.0.x cluster GA version.
797 */
798 ndb_log_info("Detected an initial system start");
799 return true;
800 }
801
802 // Check if ndb_schema table exists in NDB
803 Ndb_schema_dist_table schema_dist_table(thd_ndb);
804 if (!schema_dist_table.exists()) {
805 /*
806 The ndb_schema table does not exist in NDB yet but the DD already has a
807 schema UUID. This is an initial system restart.
808 */
809 ndb_log_info("Detected an initial system restart");
810 return true;
811 }
812
813 // Retrieve the old schema uuid stored in NDB
814 std::string ndb_schema_uuid;
815 if (!schema_dist_table.open() ||
816 !schema_dist_table.get_schema_uuid(&ndb_schema_uuid)) {
817 DBUG_ASSERT(false);
818 return false;
819 }
820 /*
821 Since the ndb_schema table exists already, the schema UUID also cannot be
822 empty as whichever mysqld created the table would also have updated the
823 schema UUID in NDB.
824 */
825 DBUG_ASSERT(!ndb_schema_uuid.empty());
826
827 if (ndb_schema_uuid == dd_schema_uuid.c_str()) {
828 /*
829 Schema UUIDs are the same. This is either a normal system restart or an
830 upgrade. Any upgrade from versions having schema UUID to another newer
831 version will be handled here.
832 */
833 ndb_log_info("Detected a normal system restart");
834 return false;
835 }
836
837 /*
838 Schema UUIDs don't match. This mysqld was previously connected to a
839 Cluster whose schema UUID is stored in DD. It is now connecting to a new
840 Cluster for the first time which already has a different schema UUID as
841 this is not the first mysqld connecting to that Cluster.
842 From this mysqld's perspective, this will be treated as an
843 initial system restart.
844 */
845 ndb_log_info("Detected an initial system restart");
846 return true;
847 }
848
849 Ndb_binlog_setup(const Ndb_binlog_setup &) = delete;
850 Ndb_binlog_setup operator=(const Ndb_binlog_setup &) = delete;
851
852 public:
Ndb_binlog_setup(THD * thd)853 Ndb_binlog_setup(THD *thd) : m_thd(thd) {}
854
855 /**
856 @brief Setup this node to take part in schema distribution by creating the
857 ndbcluster util tables, perform schema synchronization and create references
858 to NDB_SHARE for all tables.
859
860 @note See special error handling required when function fails.
861
862 @return true if setup is succesful
863 @return false if setup fails. The creation of ndb_schema table and setup
864 of event operation registers this node in schema distribution protocol. Thus
865 this node is expected to reply to schema distribution events. Replying is
866 however not possible until setup has succesfully completed and the binlog
867 thread has started to handle events. If setup fails the event operation on
868 ndb_schema table and all other event operations must be removed in order to
869 signal unsubcribe and remove this node from schema distribution.
870 */
setup(Thd_ndb * thd_ndb)871 bool setup(Thd_ndb *thd_ndb) {
872 /* Test binlog_setup on this mysqld being slower (than other mysqld) */
873 if (DBUG_EVALUATE_IF("ndb_binlog_setup_slow", true, false)) {
874 ndb_log_info("'ndb_binlog_setup_slow' -> sleep");
875 ndb_milli_sleep(10 * 1000);
876 ndb_log_info(" <- sleep");
877 }
878
879 DBUG_ASSERT(ndb_apply_status_share == nullptr);
880
881 // Protect the schema synchronization with GSL(Global Schema Lock)
882 Ndb_global_schema_lock_guard global_schema_lock_guard(m_thd);
883 if (global_schema_lock_guard.lock()) {
884 return false;
885 }
886
887 /* Give additional 'binlog_setup rights' to this Thd_ndb */
888 Thd_ndb::Options_guard thd_ndb_options(thd_ndb);
889 thd_ndb_options.set(Thd_ndb::ALLOW_BINLOG_SETUP);
890
891 // Check if this is a initial restart/start
892 const bool initial_system_restart = detect_initial_restart(thd_ndb);
893
894 Ndb_dd_sync dd_sync(m_thd, thd_ndb);
895 if (initial_system_restart) {
896 // Remove all NDB metadata from DD since this is an initial restart
897 if (!dd_sync.remove_all_metadata()) {
898 return false;
899 }
900 } else {
901 /*
902 Not an initial restart. Delete DD table definitions corresponding to NDB
903 tables that no longer exist in NDB Dictionary. This is to ensure that
904 synchronization of tables down the line doesn't run into issues related
905 to table ids being reused
906 */
907 if (!dd_sync.remove_deleted_tables()) {
908 return false;
909 }
910 }
911
912 const bool ndb_schema_dist_upgrade_allowed = ndb_allow_ndb_schema_upgrade();
913 Ndb_schema_dist_table schema_dist_table(thd_ndb);
914 if (!schema_dist_table.create_or_upgrade(m_thd,
915 ndb_schema_dist_upgrade_allowed))
916 return false;
917
918 if (!Ndb_schema_dist::is_ready(m_thd)) {
919 ndb_log_verbose(50, "Schema distribution setup failed");
920 return false;
921 }
922
923 if (DBUG_EVALUATE_IF("ndb_binlog_setup_incomplete", true, false)) {
924 // Remove the dbug keyword, only fail first time and avoid infinite setup
925 DBUG_SET("-d,ndb_binlog_setup_incomplete");
926 // Test handling of setup failing to complete *after* created 'ndb_schema'
927 ndb_log_info("Simulate 'ndb_binlog_setup_incomplete' -> return error");
928 return false;
929 }
930
931 // If this is an initial start/restart, update the schema UUID in DD
932 if (initial_system_restart) {
933 // Retrieve the new schema UUID from NDB
934 std::string ndb_schema_uuid;
935 if (!schema_dist_table.get_schema_uuid(&ndb_schema_uuid)) return false;
936
937 // Update it in DD
938 if (!ndb_dd_update_schema_uuid(m_thd, ndb_schema_uuid)) {
939 ndb_log_warning("Failed to update schema uuid in DD.");
940 return false;
941 }
942 }
943
944 Ndb_schema_result_table schema_result_table(thd_ndb);
945 if (!schema_result_table.create_or_upgrade(m_thd,
946 ndb_schema_dist_upgrade_allowed))
947 return false;
948
949 Ndb_apply_status_table apply_status_table(thd_ndb);
950 if (!apply_status_table.create_or_upgrade(m_thd, true)) return false;
951
952 if (!dd_sync.synchronize()) {
953 ndb_log_verbose(9, "Failed to synchronize DD with NDB");
954 return false;
955 }
956
957 // Check that references for ndb_apply_status has been created
958 DBUG_ASSERT(!ndb_binlog_running || ndb_apply_status_share);
959
960 if (!Ndb_stored_grants::initialize(m_thd, thd_ndb)) {
961 ndb_log_warning("stored grants: failed to initialize");
962 return false;
963 }
964
965 Mutex_guard injector_mutex_g(injector_data_mutex);
966 ndb_binlog_tables_inited = true;
967
968 // During upgrade from a non DD version, the DDLs are blocked until all
969 // nodes run a version that has support for the Data Dictionary.
970 Ndb_schema_dist_client::block_ddl(!ndb_all_nodes_support_mysql_dd());
971
972 return true; // Setup completed OK
973 }
974 };
975
976 /*
977 Defines for the expected order of columns in ndb_schema table, should
978 match the accepted table definition.
979 */
980 constexpr uint SCHEMA_DB_I = 0;
981 constexpr uint SCHEMA_NAME_I = 1;
982 constexpr uint SCHEMA_SLOCK_I = 2;
983 constexpr uint SCHEMA_QUERY_I = 3;
984 constexpr uint SCHEMA_NODE_ID_I = 4;
985 constexpr uint SCHEMA_EPOCH_I = 5;
986 constexpr uint SCHEMA_ID_I = 6;
987 constexpr uint SCHEMA_VERSION_I = 7;
988 constexpr uint SCHEMA_TYPE_I = 8;
989 constexpr uint SCHEMA_OP_ID_I = 9;
990
ndb_report_waiting(const char * key,int the_time,const char * op,const char * obj)991 static void ndb_report_waiting(const char *key, int the_time, const char *op,
992 const char *obj) {
993 ulonglong ndb_latest_epoch = 0;
994 const char *proc_info = "<no info>";
995 mysql_mutex_lock(&injector_event_mutex);
996 if (injector_ndb) ndb_latest_epoch = injector_ndb->getLatestGCI();
997 if (injector_thd) proc_info = injector_thd->proc_info;
998 mysql_mutex_unlock(&injector_event_mutex);
999 {
1000 ndb_log_info(
1001 "%s, waiting max %u sec for %s %s."
1002 " epochs: (%u/%u,%u/%u,%u/%u)"
1003 " injector proc_info: %s",
1004 key, the_time, op, obj, (uint)(ndb_latest_handled_binlog_epoch >> 32),
1005 (uint)(ndb_latest_handled_binlog_epoch),
1006 (uint)(ndb_latest_received_binlog_epoch >> 32),
1007 (uint)(ndb_latest_received_binlog_epoch),
1008 (uint)(ndb_latest_epoch >> 32), (uint)(ndb_latest_epoch), proc_info);
1009 }
1010 }
1011
write_schema_op_to_NDB(Ndb * ndb,const char * query,int query_length,const char * db,const char * name,uint32 id,uint32 version,uint32 nodeid,uint32 type,uint32 schema_op_id,uint32 anyvalue)1012 bool Ndb_schema_dist_client::write_schema_op_to_NDB(
1013 Ndb *ndb, const char *query, int query_length, const char *db,
1014 const char *name, uint32 id, uint32 version, uint32 nodeid, uint32 type,
1015 uint32 schema_op_id, uint32 anyvalue) {
1016 DBUG_TRACE;
1017
1018 // Open ndb_schema table
1019 Ndb_schema_dist_table schema_dist_table(m_thd_ndb);
1020 if (!schema_dist_table.open()) {
1021 return false;
1022 }
1023 const NdbDictionary::Table *ndbtab = schema_dist_table.get_table();
1024
1025 // Pack db and table_name
1026 char db_buf[FN_REFLEN];
1027 char name_buf[FN_REFLEN];
1028 ndb_pack_varchar(ndbtab, SCHEMA_DB_I, db_buf, db, strlen(db));
1029 ndb_pack_varchar(ndbtab, SCHEMA_NAME_I, name_buf, name, strlen(name));
1030
1031 // Start the schema operation with all bits set in the slock column.
1032 // The expectation is that all participants will reply and those not
1033 // connected will be filtered away by the coordinator.
1034 std::vector<char> slock_data;
1035 slock_data.assign(schema_dist_table.get_slock_bytes(), 0xFF);
1036
1037 // Function for writing row to ndb_schema
1038 std::function<const NdbError *(NdbTransaction *)> write_schema_op_func =
1039 [&](NdbTransaction *trans) -> const NdbError * {
1040 DBUG_TRACE;
1041
1042 NdbOperation *op = trans->getNdbOperation(ndbtab);
1043 if (op == nullptr) return &trans->getNdbError();
1044
1045 const Uint64 log_epoch = 0;
1046 if (op->writeTuple() != 0 || op->equal(SCHEMA_DB_I, db_buf) != 0 ||
1047 op->equal(SCHEMA_NAME_I, name_buf) != 0 ||
1048 op->setValue(SCHEMA_SLOCK_I, slock_data.data()) != 0 ||
1049 op->setValue(SCHEMA_NODE_ID_I, nodeid) != 0 ||
1050 op->setValue(SCHEMA_EPOCH_I, log_epoch) != 0 ||
1051 op->setValue(SCHEMA_ID_I, id) != 0 ||
1052 op->setValue(SCHEMA_VERSION_I, version) != 0 ||
1053 op->setValue(SCHEMA_TYPE_I, type) != 0 ||
1054 op->setAnyValue(anyvalue) != 0)
1055 return &op->getNdbError();
1056
1057 NdbBlob *ndb_blob = op->getBlobHandle(SCHEMA_QUERY_I);
1058 if (ndb_blob == nullptr) return &op->getNdbError();
1059
1060 if (ndb_blob->setValue(query, query_length) != 0)
1061 return &ndb_blob->getNdbError();
1062
1063 if (schema_dist_table.have_schema_op_id_column()) {
1064 if (op->setValue(SCHEMA_OP_ID_I, schema_op_id) != 0)
1065 return &op->getNdbError();
1066 }
1067
1068 if (trans->execute(NdbTransaction::Commit, NdbOperation::DefaultAbortOption,
1069 1 /* force send */) != 0) {
1070 return &trans->getNdbError();
1071 }
1072
1073 return nullptr;
1074 };
1075
1076 NdbError ndb_err;
1077 if (!ndb_trans_retry(ndb, m_thd, ndb_err, write_schema_op_func)) {
1078 m_thd_ndb->push_ndb_error_warning(ndb_err);
1079 m_thd_ndb->push_warning("Failed to write schema operation");
1080 return false;
1081 }
1082
1083 (void)ndb->getDictionary()->forceGCPWait(1);
1084
1085 return true;
1086 }
1087
1088 /*
1089 log query in ndb_schema table
1090 */
1091
log_schema_op_impl(Ndb * ndb,const char * query,int query_length,const char * db,const char * table_name,uint32 ndb_table_id,uint32 ndb_table_version,SCHEMA_OP_TYPE type,uint32 anyvalue)1092 bool Ndb_schema_dist_client::log_schema_op_impl(
1093 Ndb *ndb, const char *query, int query_length, const char *db,
1094 const char *table_name, uint32 ndb_table_id, uint32 ndb_table_version,
1095 SCHEMA_OP_TYPE type, uint32 anyvalue) {
1096 DBUG_TRACE;
1097 DBUG_PRINT("enter",
1098 ("query: %s db: %s table_name: %s", query, db, table_name));
1099
1100 // Create NDB_SCHEMA_OBJECT
1101 std::unique_ptr<NDB_SCHEMA_OBJECT, decltype(&NDB_SCHEMA_OBJECT::release)>
1102 ndb_schema_object(NDB_SCHEMA_OBJECT::get(db, table_name, ndb_table_id,
1103 ndb_table_version, true),
1104 NDB_SCHEMA_OBJECT::release);
1105
1106 if (DBUG_EVALUATE_IF("ndb_binlog_random_tableid", true, false)) {
1107 /**
1108 * Try to trigger a race between late incomming slock ack for
1109 * schema operations having its coordinator on another node,
1110 * which we would otherwise have discarded as no matching
1111 * ndb_schema_object existed, and another schema op with same 'key',
1112 * coordinated by this node. Thus causing a mixup betweeen these,
1113 * and the schema distribution getting totally out of synch.
1114 */
1115 ndb_milli_sleep(50);
1116 }
1117
1118 // Format string to use in log printouts
1119 const std::string op_name = db + std::string(".") + table_name + "(" +
1120 std::to_string(ndb_table_id) + "/" +
1121 std::to_string(ndb_table_version) + ")";
1122
1123 // Use nodeid of the primary cluster connection since that is
1124 // the nodeid which the coordinator and participants listen to
1125 const uint32 own_nodeid = g_ndb_cluster_connection->node_id();
1126
1127 // Write schema operation to the table
1128 if (DBUG_EVALUATE_IF("ndb_schema_write_fail", true, false) ||
1129 !write_schema_op_to_NDB(ndb, query, query_length, db, table_name,
1130 ndb_table_id, ndb_table_version, own_nodeid, type,
1131 ndb_schema_object->schema_op_id(), anyvalue)) {
1132 ndb_schema_object->fail_schema_op(Ndb_schema_dist::NDB_TRANS_FAILURE,
1133 "Failed to write schema operation");
1134 ndb_log_warning("Failed to write the schema op into the ndb_schema table");
1135 return false;
1136 }
1137
1138 ndb_log_verbose(19, "Distribution of '%s' - started!", op_name.c_str());
1139 if (ndb_log_get_verbose_level() >= 19) {
1140 ndb_log_error_dump("Schema_op {");
1141 ndb_log_error_dump("type: %d", type);
1142 ndb_log_error_dump("query: '%s'", query);
1143 ndb_log_error_dump("}");
1144 }
1145
1146 // Wait for participants to complete the schema change
1147 while (true) {
1148 const bool completed = ndb_schema_object->client_wait_completed(1);
1149 if (completed) {
1150 // Schema operation completed
1151 ndb_log_verbose(19, "Distribution of '%s' - completed!", op_name.c_str());
1152 break;
1153 }
1154
1155 // Check if schema distribution is still ready.
1156 if (m_share->have_event_operation() == false) {
1157 // This case is unlikely, but there is small race between
1158 // clients first check for schema distribution ready and schema op
1159 // registered in the coordinator(since the message is passed
1160 // via NDB).
1161 ndb_schema_object->fail_schema_op(Ndb_schema_dist::CLIENT_ABORT,
1162 "Schema distribution is not ready");
1163 ndb_log_warning("Distribution of '%s' - not ready!", op_name.c_str());
1164 break;
1165 }
1166
1167 if (thd_killed(m_thd) ||
1168 DBUG_EVALUATE_IF("ndb_schema_dist_client_killed", true, false)) {
1169 ndb_schema_object->fail_schema_op(Ndb_schema_dist::CLIENT_KILLED,
1170 "Client was killed");
1171 ndb_log_warning("Distribution of '%s' - killed!", op_name.c_str());
1172 break;
1173 }
1174 }
1175
1176 // Inspect results in NDB_SCHEMA_OBJECT before it's released
1177 std::vector<NDB_SCHEMA_OBJECT::Result> participant_results;
1178 ndb_schema_object->client_get_schema_op_results(participant_results);
1179 for (auto &it : participant_results) {
1180 // Save result for later
1181 m_schema_op_results.push_back({it.nodeid, it.result, it.message});
1182 }
1183
1184 return true;
1185 }
1186
1187 /*
1188 ndbcluster_binlog_event_operation_teardown
1189
1190 Used when a NdbEventOperation has indicated that the table has been
1191 dropped or connection to cluster has failed. Function need to teardown
1192 the NdbEventOperation and it's associated datastructures owned
1193 by the binlog.
1194
1195 It will also signal the "injector_data_cond" so that anyone using
1196 ndbcluster_binlog_wait_synch_drop_table() to wait for the binlog
1197 to handle the drop will be notified.
1198
1199 The function may be called either by Ndb_schema_event_handler which
1200 listens to events only on mysql.ndb_schema or by the "injector" which
1201 listen to events on all the other tables.
1202 */
1203
ndbcluster_binlog_event_operation_teardown(THD * thd,Ndb * is_ndb,NdbEventOperation * pOp)1204 static void ndbcluster_binlog_event_operation_teardown(THD *thd, Ndb *is_ndb,
1205 NdbEventOperation *pOp) {
1206 DBUG_TRACE;
1207 DBUG_PRINT("enter", ("pOp: %p", pOp));
1208
1209 // Get Ndb_event_data associated with the NdbEventOperation
1210 const Ndb_event_data *event_data =
1211 static_cast<const Ndb_event_data *>(pOp->getCustomData());
1212 DBUG_ASSERT(event_data);
1213
1214 // Get NDB_SHARE associated with the Ndb_event_data, the share
1215 // is referenced by "binlog" and will not go away until released
1216 // further down in this function
1217 NDB_SHARE *share = event_data->share;
1218
1219 // Invalidate any cached NdbApi table if object version is lower
1220 // than what was used when setting up the NdbEventOperation
1221 // NOTE! This functionality need to be explained further
1222 {
1223 Thd_ndb *thd_ndb = get_thd_ndb(thd);
1224 Ndb *ndb = thd_ndb->ndb;
1225 Ndb_table_guard ndbtab_g(ndb, share->db, share->table_name);
1226 const NDBTAB *ev_tab = pOp->getTable();
1227 const NDBTAB *cache_tab = ndbtab_g.get_table();
1228 if (cache_tab && cache_tab->getObjectId() == ev_tab->getObjectId() &&
1229 cache_tab->getObjectVersion() <= ev_tab->getObjectVersion())
1230 ndbtab_g.invalidate();
1231 }
1232
1233 // Remove NdbEventOperation from the share
1234 mysql_mutex_lock(&share->mutex);
1235 DBUG_ASSERT(share->op == pOp);
1236 share->op = NULL;
1237 mysql_mutex_unlock(&share->mutex);
1238
1239 /* Signal ha_ndbcluster::delete/rename_table that drop is done */
1240 DBUG_PRINT("info", ("signal that drop is done"));
1241 mysql_cond_broadcast(&injector_data_cond);
1242
1243 // Close the table in MySQL Server
1244 ndb_tdc_close_cached_table(thd, share->db, share->table_name);
1245
1246 // Release the "binlog" reference from NDB_SHARE
1247 NDB_SHARE::release_reference(share, "binlog");
1248
1249 // Remove pointer to event_data from the EventOperation
1250 pOp->setCustomData(NULL);
1251
1252 // Drop the NdbEventOperation from NdbApi
1253 DBUG_PRINT("info", ("Dropping event operation: %p", pOp));
1254 mysql_mutex_lock(&injector_event_mutex);
1255 is_ndb->dropEventOperation(pOp);
1256 mysql_mutex_unlock(&injector_event_mutex);
1257
1258 // Finally delete the event_data and thus it's mem_root, shadow_table etc.
1259 Ndb_event_data::destroy(event_data);
1260 }
1261
1262 /*
1263 Data used by the Ndb_schema_event_handler which lives
1264 as long as the NDB Binlog thread is connected to the cluster.
1265
1266 NOTE! An Ndb_schema_event_handler instance only lives for one epoch
1267
1268 */
1269 class Ndb_schema_dist_data {
1270 uint m_own_nodeid;
1271 uint m_max_subscribers{0};
1272 // List of active schema operations in this coordinator. Having an
1273 // active schema operation means it need to be checked
1274 // for timeout or request to be killed regularly
1275 std::unordered_set<const NDB_SCHEMA_OBJECT *> m_active_schema_ops;
1276
1277 std::chrono::steady_clock::time_point m_next_check_time;
1278
1279 // Keeps track of subscribers as reported by one data node
1280 class Node_subscribers {
1281 MY_BITMAP m_bitmap;
1282
1283 public:
1284 Node_subscribers(const Node_subscribers &) = delete;
1285 Node_subscribers() = delete;
Node_subscribers(uint max_subscribers)1286 Node_subscribers(uint max_subscribers) {
1287 // Initialize the bitmap
1288 bitmap_init(&m_bitmap, nullptr, max_subscribers);
1289
1290 // Assume that all bits are cleared by bitmap_init()
1291 DBUG_ASSERT(bitmap_is_clear_all(&m_bitmap));
1292 }
~Node_subscribers()1293 ~Node_subscribers() { bitmap_free(&m_bitmap); }
clear_all()1294 void clear_all() { bitmap_clear_all(&m_bitmap); }
set(uint subscriber_node_id)1295 void set(uint subscriber_node_id) {
1296 bitmap_set_bit(&m_bitmap, subscriber_node_id);
1297 }
clear(uint subscriber_node_id)1298 void clear(uint subscriber_node_id) {
1299 bitmap_clear_bit(&m_bitmap, subscriber_node_id);
1300 }
to_string() const1301 std::string to_string() const {
1302 return ndb_bitmap_to_hex_string(&m_bitmap);
1303 }
1304
1305 /**
1306 @brief Add current subscribers to list of nodes.
1307 @param subscriber_list List of subscriber
1308 */
get_subscriber_list(std::unordered_set<uint32> & subscriber_list) const1309 void get_subscriber_list(
1310 std::unordered_set<uint32> &subscriber_list) const {
1311 for (uint i = bitmap_get_first_set(&m_bitmap); i != MY_BIT_NONE;
1312 i = bitmap_get_next_set(&m_bitmap, i)) {
1313 subscriber_list.insert(i);
1314 }
1315 }
1316 };
1317 /*
1318 List keeping track of the subscribers to ndb_schema. It contains one
1319 Node_subscribers per data node, this avoids the need to know which data
1320 nodes are connected.
1321 */
1322 std::unordered_map<uint, Node_subscribers *> m_subscriber_bitmaps;
1323
1324 /**
1325 @brief Find node subscribers for given data node
1326 @param data_node_id Nodeid of data node
1327 @return Pointer to node subscribers or nullptr
1328 */
find_node_subscribers(uint data_node_id) const1329 Node_subscribers *find_node_subscribers(uint data_node_id) const {
1330 const auto it = m_subscriber_bitmaps.find(data_node_id);
1331 if (it == m_subscriber_bitmaps.end()) {
1332 // Unexpected data node id received, this may be caused by data node added
1333 // without restarting this MySQL Server or node id otherwise out of
1334 // range for current configuration. Handle the situation gracefully and
1335 // just print error message to the log.
1336 ndb_log_error("Could not find node subscribers for data node %d",
1337 data_node_id);
1338 ndb_log_error("Restart this MySQL Server to adapt to configuration");
1339 return nullptr;
1340 }
1341 Node_subscribers *subscriber_bitmap = it->second;
1342 ndbcluster::ndbrequire(subscriber_bitmap);
1343 return subscriber_bitmap;
1344 }
1345
1346 // Holds the new key for a table to be renamed
1347 struct NDB_SHARE_KEY *m_prepared_rename_key;
1348
1349 // Holds the Ndb_event_data which is created during inplace alter table
1350 // prepare and used during commit
1351 // NOTE! this place holder is only used for the participant in same node
1352 const class Ndb_event_data *m_inplace_alter_event_data{nullptr};
1353
1354 public:
1355 Ndb_schema_dist_data(const Ndb_schema_dist_data &); // Not implemented
Ndb_schema_dist_data()1356 Ndb_schema_dist_data() : m_prepared_rename_key(NULL) {}
~Ndb_schema_dist_data()1357 ~Ndb_schema_dist_data() {
1358 // There should be no schema operations active
1359 DBUG_ASSERT(m_active_schema_ops.size() == 0);
1360 }
1361
init(Ndb_cluster_connection * cluster_connection)1362 void init(Ndb_cluster_connection *cluster_connection) {
1363 Uint32 max_subscribers = cluster_connection->max_api_nodeid() + 1;
1364 m_own_nodeid = cluster_connection->node_id();
1365 NDB_SCHEMA_OBJECT::init(m_own_nodeid);
1366
1367 // Add one subscriber bitmap per data node in the current configuration
1368 unsigned node_id;
1369 Ndb_cluster_connection_node_iter node_iter;
1370 while ((node_id = cluster_connection->get_next_node(node_iter))) {
1371 m_subscriber_bitmaps.emplace(node_id,
1372 new Node_subscribers(max_subscribers));
1373 }
1374 // Remember max number of subscribers
1375 m_max_subscribers = max_subscribers;
1376 }
1377
release(void)1378 void release(void) {
1379 // Release the subscriber bitmaps
1380 for (const auto it : m_subscriber_bitmaps) {
1381 Node_subscribers *subscriber_bitmap = it.second;
1382 delete subscriber_bitmap;
1383 }
1384 m_subscriber_bitmaps.clear();
1385 m_max_subscribers = 0;
1386
1387 // Release the prepared rename key, it's very unlikely
1388 // that the key is still around here, but just in case
1389 NDB_SHARE::free_key(m_prepared_rename_key);
1390 m_prepared_rename_key = NULL;
1391
1392 // Release the event_data saved for inplace alter, it's very
1393 // unlikley that the event_data is still around, but just in case
1394 Ndb_event_data::destroy(m_inplace_alter_event_data);
1395 m_inplace_alter_event_data = nullptr;
1396
1397 // Release any remaining active schema operations
1398 for (const NDB_SCHEMA_OBJECT *schema_op : m_active_schema_ops) {
1399 ndb_log_info(" - releasing schema operation on '%s.%s'", schema_op->db(),
1400 schema_op->name());
1401 schema_op->fail_schema_op(Ndb_schema_dist::COORD_ABORT,
1402 "Coordinator aborted");
1403 // Release coordinator reference
1404 NDB_SCHEMA_OBJECT::release(const_cast<NDB_SCHEMA_OBJECT *>(schema_op));
1405 }
1406 m_active_schema_ops.clear();
1407 }
1408
report_data_node_failure(unsigned data_node_id)1409 void report_data_node_failure(unsigned data_node_id) {
1410 ndb_log_verbose(1, "Data node %d failed", data_node_id);
1411
1412 Node_subscribers *subscribers = find_node_subscribers(data_node_id);
1413 if (subscribers) {
1414 subscribers->clear_all();
1415
1416 ndb_log_verbose(19, "Subscribers[%d]: %s", data_node_id,
1417 subscribers->to_string().c_str());
1418 }
1419 }
1420
report_subscribe(unsigned data_node_id,unsigned subscriber_node_id)1421 void report_subscribe(unsigned data_node_id, unsigned subscriber_node_id) {
1422 ndb_log_verbose(1, "Data node %d reports subscribe from node %d",
1423 data_node_id, subscriber_node_id);
1424 ndbcluster::ndbrequire(subscriber_node_id != 0);
1425
1426 Node_subscribers *subscribers = find_node_subscribers(data_node_id);
1427 if (subscribers) {
1428 subscribers->set(subscriber_node_id);
1429
1430 ndb_log_verbose(19, "Subscribers[%d]: %s", data_node_id,
1431 subscribers->to_string().c_str());
1432 }
1433 }
1434
report_unsubscribe(unsigned data_node_id,unsigned subscriber_node_id)1435 void report_unsubscribe(unsigned data_node_id, unsigned subscriber_node_id) {
1436 ndb_log_verbose(1, "Data node %d reports unsubscribe from node %d",
1437 data_node_id, subscriber_node_id);
1438 ndbcluster::ndbrequire(subscriber_node_id != 0);
1439
1440 Node_subscribers *subscribers = find_node_subscribers(data_node_id);
1441 if (subscribers) {
1442 subscribers->clear(subscriber_node_id);
1443
1444 ndb_log_verbose(19, "Subscribers[%d]: %s", data_node_id,
1445 subscribers->to_string().c_str());
1446 }
1447 }
1448
1449 /**
1450 @brief Get list of current subscribers
1451 @note A node counts as subscribed as soon as any data node report it as
1452 subscribed.
1453 @param subscriber_list The list where to return subscribers
1454 */
get_subscriber_list(std::unordered_set<uint32> & subscriber_list) const1455 void get_subscriber_list(std::unordered_set<uint32> &subscriber_list) const {
1456 for (const auto it : m_subscriber_bitmaps) {
1457 Node_subscribers *subscribers = it.second;
1458 subscribers->get_subscriber_list(subscriber_list);
1459 }
1460 // Always add own node which is always connected
1461 subscriber_list.insert(m_own_nodeid);
1462 }
1463
save_prepared_rename_key(NDB_SHARE_KEY * key)1464 void save_prepared_rename_key(NDB_SHARE_KEY *key) {
1465 m_prepared_rename_key = key;
1466 }
1467
get_prepared_rename_key() const1468 NDB_SHARE_KEY *get_prepared_rename_key() const {
1469 return m_prepared_rename_key;
1470 }
1471
save_inplace_alter_event_data(const Ndb_event_data * event_data)1472 void save_inplace_alter_event_data(const Ndb_event_data *event_data) {
1473 // Should not already be set when saving a new pointer
1474 DBUG_ASSERT(event_data == nullptr || !m_inplace_alter_event_data);
1475 m_inplace_alter_event_data = event_data;
1476 }
get_inplace_alter_event_data() const1477 const Ndb_event_data *get_inplace_alter_event_data() const {
1478 return m_inplace_alter_event_data;
1479 }
1480
add_active_schema_op(NDB_SCHEMA_OBJECT * schema_op)1481 void add_active_schema_op(NDB_SCHEMA_OBJECT *schema_op) {
1482 // Current assumption is that as long as all users of schema distribution
1483 // hold the GSL, there will ever only be one active schema operation at a
1484 // time. This assumption will probably change soon, but until then it can
1485 // be verifed with an assert.
1486 DBUG_ASSERT(m_active_schema_ops.size() == 0);
1487
1488 // Get coordinator reference to NDB_SCHEMA_OBJECT. It will be kept alive
1489 // until the coordinator releases it
1490 NDB_SCHEMA_OBJECT::get(schema_op);
1491
1492 // Insert NDB_SCHEMA_OBJECT in list of active schema ops
1493 ndbcluster::ndbrequire(m_active_schema_ops.insert(schema_op).second);
1494 }
1495
remove_active_schema_op(NDB_SCHEMA_OBJECT * schema_op)1496 void remove_active_schema_op(NDB_SCHEMA_OBJECT *schema_op) {
1497 // Need to have active schema op for decrement
1498 ndbcluster::ndbrequire(m_active_schema_ops.size() > 0);
1499
1500 // Remove NDB_SCHEMA_OBJECT from list of active schema ops
1501 ndbcluster::ndbrequire(m_active_schema_ops.erase(schema_op) == 1);
1502
1503 // Release coordinator reference to NDB_SCHEMA_OBJECT
1504 NDB_SCHEMA_OBJECT::release(schema_op);
1505 }
1506
active_schema_ops()1507 const std::unordered_set<const NDB_SCHEMA_OBJECT *> &active_schema_ops() {
1508 return m_active_schema_ops;
1509 }
1510
time_for_check()1511 bool time_for_check() {
1512 std::chrono::steady_clock::time_point curr_time =
1513 std::chrono::steady_clock::now();
1514 if (m_next_check_time > curr_time) return false;
1515
1516 // Setup time for next check in 1 second
1517 m_next_check_time = curr_time + std::chrono::seconds(1);
1518 return true;
1519 }
1520
1521 }; // class Ndb_schema_dist_data
1522
1523 class Ndb_schema_event_handler {
1524 class Ndb_schema_op {
1525 /*
1526 Unpack arbitrary length varbinary field and return pointer to zero
1527 terminated string allocated in current memory root.
1528
1529 @param field The field to unpack
1530 @return pointer to string allocated in current MEM_ROOT
1531 */
unpack_varbinary(Field * field)1532 static char *unpack_varbinary(Field *field) {
1533 /*
1534 The Schema_dist_client will check the schema of the ndb_schema table
1535 and will not send any commands unless the table fulfills requirements.
1536 Thus this function assumes that the field is always a varbinary
1537 (with at least 63 bytes length since that's the legacy min limit)
1538 */
1539 ndbcluster::ndbrequire(field->type() == MYSQL_TYPE_VARCHAR);
1540 ndbcluster::ndbrequire(field->field_length >= 63);
1541
1542 // Calculate number of length bytes, this depends on fields max length
1543 const uint length_bytes = HA_VARCHAR_PACKLENGTH(field->field_length);
1544 ndbcluster::ndbrequire(length_bytes <= 2);
1545
1546 // Read length of the varbinary which is stored in the field
1547 const uint varbinary_length = length_bytes == 1
1548 ? static_cast<uint>(*field->field_ptr())
1549 : uint2korr(field->field_ptr());
1550 DBUG_PRINT("info", ("varbinary length: %u", varbinary_length));
1551 // Check that varbinary length is not greater than fields max length
1552 // (this would indicate that corrupted data has been written to table)
1553 ndbcluster::ndbrequire(varbinary_length <= field->field_length);
1554
1555 const char *varbinary_start =
1556 reinterpret_cast<const char *>(field->field_ptr() + length_bytes);
1557 return sql_strmake(varbinary_start, varbinary_length);
1558 }
1559
1560 /*
1561 Unpack blob field and return pointer to zero terminated string allocated
1562 in current MEM_ROOT.
1563
1564 This function assumes that the blob has already been fetched from NDB
1565 and is ready to be extracted from buffers allocated inside NdbApi.
1566
1567 @param ndb_blob The blob column to unpack
1568 @return pointer to string allocated in current MEM_ROOT
1569 */
unpack_blob(NdbBlob * ndb_blob)1570 static char *unpack_blob(NdbBlob *ndb_blob) {
1571 // Check if blob is NULL
1572 int blob_is_null;
1573 ndbcluster::ndbrequire(ndb_blob->getNull(blob_is_null) == 0);
1574 if (blob_is_null != 0) {
1575 // The blob column didn't contain anything, return empty string
1576 return sql_strdup("");
1577 }
1578
1579 // Read length of blob
1580 Uint64 blob_len;
1581 ndbcluster::ndbrequire(ndb_blob->getLength(blob_len) == 0);
1582 if (blob_len == 0) {
1583 // The blob column didn't contain anything, return empty string
1584 return sql_strdup("");
1585 }
1586
1587 // Allocate space for blob plus + zero terminator in current MEM_ROOT
1588 char *str = static_cast<char *>((*THR_MALLOC)->Alloc(blob_len + 1));
1589 ndbcluster::ndbrequire(str);
1590
1591 // Read the blob content
1592 Uint32 read_len = static_cast<Uint32>(blob_len);
1593 ndbcluster::ndbrequire(ndb_blob->readData(str, read_len) == 0);
1594 ndbcluster::ndbrequire(blob_len == read_len); // Assume all read
1595 str[blob_len] = 0; // Zero terminate
1596
1597 DBUG_PRINT("unpack_blob", ("str: '%s'", str));
1598 return str;
1599 }
1600
unpack_slock(const Field * field)1601 void unpack_slock(const Field *field) {
1602 // Allocate bitmap buffer in current MEM_ROOT
1603 slock_buf = static_cast<my_bitmap_map *>(
1604 (*THR_MALLOC)->Alloc(field->field_length));
1605 ndbcluster::ndbrequire(slock_buf);
1606
1607 // Initialize bitmap(always suceeds when buffer is already allocated)
1608 (void)bitmap_init(&slock, slock_buf, field->field_length * 8);
1609
1610 // Copy data into bitmap buffer
1611 memcpy(slock_buf, field->field_ptr(), field->field_length);
1612 }
1613
1614 // Unpack Ndb_schema_op from event_data pointer
unpack_event(const Ndb_event_data * event_data)1615 void unpack_event(const Ndb_event_data *event_data) {
1616 TABLE *table = event_data->shadow_table;
1617 Field **field = table->field;
1618
1619 my_bitmap_map *old_map = dbug_tmp_use_all_columns(table, table->read_set);
1620
1621 /* db, varbinary */
1622 db = unpack_varbinary(*field);
1623 field++;
1624
1625 /* name, varbinary */
1626 name = unpack_varbinary(*field);
1627 field++;
1628
1629 /* slock, binary */
1630 unpack_slock(*field);
1631 field++;
1632
1633 /* query, blob */
1634 query = unpack_blob(event_data->ndb_value[0][SCHEMA_QUERY_I].blob);
1635 field++;
1636
1637 /* node_id */
1638 node_id = (Uint32)((Field_long *)*field)->val_int();
1639 /* epoch */
1640 field++;
1641 epoch = ((Field_long *)*field)->val_int();
1642 /* id */
1643 field++;
1644 id = (Uint32)((Field_long *)*field)->val_int();
1645 /* version */
1646 field++;
1647 version = (Uint32)((Field_long *)*field)->val_int();
1648 /* type */
1649 field++;
1650 type = (Uint32)((Field_long *)*field)->val_int();
1651 /* schema_op_id */
1652 field++;
1653 if (*field) {
1654 // Optional column
1655 schema_op_id = (Uint32)((Field_long *)*field)->val_int();
1656 } else {
1657 schema_op_id = 0;
1658 }
1659
1660 dbug_tmp_restore_column_map(table->read_set, old_map);
1661 }
1662
1663 public:
1664 // Note! The db, name, slock_buf and query variables point to memory
1665 // allocated in the current MEM_ROOT. When the Ndb_schema_op is put in the
1666 // list to be executed after epoch, only the pointers are copied and
1667 // still point to same memory inside the MEM_ROOT.
1668 char *db;
1669 char *name;
1670
1671 private:
1672 // Buffer for the slock bitmap
1673 my_bitmap_map *slock_buf;
1674
1675 public:
1676 MY_BITMAP slock;
1677 char *query;
query_length() const1678 size_t query_length() const {
1679 // Return length of "query" which is always zero terminated string
1680 return strlen(query);
1681 }
1682 Uint64 epoch;
1683 uint32 node_id;
1684 uint32 id;
1685 uint32 version;
1686 uint32 type;
1687 uint32 any_value;
1688 uint32 schema_op_id;
1689
1690 /**
1691 Create a Ndb_schema_op from event_data
1692 */
create(const Ndb_event_data * event_data,Uint32 any_value)1693 static const Ndb_schema_op *create(const Ndb_event_data *event_data,
1694 Uint32 any_value) {
1695 DBUG_TRACE;
1696 // Allocate memory in current MEM_ROOT
1697 Ndb_schema_op *schema_op =
1698 (Ndb_schema_op *)(*THR_MALLOC)->Alloc(sizeof(Ndb_schema_op));
1699 schema_op->unpack_event(event_data);
1700 schema_op->any_value = any_value;
1701 DBUG_PRINT("exit", ("'%s.%s': query: '%s' type: %d", schema_op->db,
1702 schema_op->name, schema_op->query, schema_op->type));
1703 return schema_op;
1704 }
1705 };
1706
1707 class Ndb_schema_op_result {
1708 uint32 m_result{0};
1709 std::string m_message;
1710
1711 public:
set_result(Ndb_schema_dist::Schema_op_result_code result,const std::string message)1712 void set_result(Ndb_schema_dist::Schema_op_result_code result,
1713 const std::string message) {
1714 // Both result and message must be set
1715 DBUG_ASSERT(result && message.length());
1716 m_result = result;
1717 m_message = message;
1718 }
message() const1719 const char *message() const { return m_message.c_str(); }
result() const1720 uint32 result() const { return m_result; }
1721 };
1722
1723 class Lock_wait_timeout_guard {
1724 public:
Lock_wait_timeout_guard(THD * thd,ulong lock_wait_timeout)1725 Lock_wait_timeout_guard(THD *thd, ulong lock_wait_timeout)
1726 : m_thd(thd),
1727 m_save_lock_wait_timeout(thd->variables.lock_wait_timeout) {
1728 m_thd->variables.lock_wait_timeout = lock_wait_timeout;
1729 }
1730
~Lock_wait_timeout_guard()1731 ~Lock_wait_timeout_guard() {
1732 m_thd->variables.lock_wait_timeout = m_save_lock_wait_timeout;
1733 }
1734
1735 private:
1736 THD *const m_thd;
1737 ulong m_save_lock_wait_timeout;
1738 };
1739
1740 // Log error code and message returned from NDB
log_NDB_error(const NdbError & ndb_error) const1741 void log_NDB_error(const NdbError &ndb_error) const {
1742 ndb_log_info("Got error '%d: %s' from NDB", ndb_error.code,
1743 ndb_error.message);
1744 }
1745
write_schema_op_to_binlog(THD * thd,const Ndb_schema_op * schema)1746 static void write_schema_op_to_binlog(THD *thd, const Ndb_schema_op *schema) {
1747 if (!ndb_binlog_running) {
1748 // This mysqld is not writing a binlog
1749 return;
1750 }
1751
1752 /* any_value == 0 means local cluster sourced change that
1753 * should be logged
1754 */
1755 if (ndbcluster_anyvalue_is_reserved(schema->any_value)) {
1756 /* Originating SQL node did not want this query logged */
1757 if (!ndbcluster_anyvalue_is_nologging(schema->any_value)) {
1758 ndb_log_warning(
1759 "unknown value for binlog signalling 0x%X, "
1760 "query not logged",
1761 schema->any_value);
1762 }
1763 return;
1764 }
1765
1766 Uint32 queryServerId = ndbcluster_anyvalue_get_serverid(schema->any_value);
1767 /*
1768 Start with serverId as received AnyValue, in case it's a composite
1769 (server_id_bits < 31).
1770 This is for 'future', as currently schema ops do not have composite
1771 AnyValues.
1772 In future it may be useful to support *not* mapping composite
1773 AnyValues to/from Binlogged server-ids.
1774 */
1775 Uint32 loggedServerId = schema->any_value;
1776
1777 if (queryServerId) {
1778 /*
1779 AnyValue has non-zero serverId, must be a query applied by a slave
1780 mysqld.
1781 TODO : Assert that we are running in the Binlog injector thread?
1782 */
1783 if (!g_ndb_log_slave_updates) {
1784 /* This MySQLD does not log slave updates */
1785 return;
1786 }
1787 } else {
1788 /* No ServerId associated with this query, mark it as ours */
1789 ndbcluster_anyvalue_set_serverid(loggedServerId, ::server_id);
1790 }
1791
1792 /*
1793 Write the DDL query to binlog with server_id set
1794 to the server_id where the query originated.
1795 */
1796 const uint32 thd_server_id_save = thd->server_id;
1797 DBUG_ASSERT(sizeof(thd_server_id_save) == sizeof(thd->server_id));
1798 thd->server_id = loggedServerId;
1799
1800 LEX_CSTRING thd_db_save = thd->db();
1801 LEX_CSTRING schema_db_lex_cstr = {schema->db, strlen(schema->db)};
1802 thd->reset_db(schema_db_lex_cstr);
1803
1804 int errcode = query_error_code(thd, thd->killed == THD::NOT_KILLED);
1805 thd->binlog_query(THD::STMT_QUERY_TYPE, schema->query,
1806 schema->query_length(),
1807 false, // is_trans
1808 true, // direct
1809 schema->name[0] == 0 || thd->db().str[0] == 0, errcode);
1810
1811 // Commit the binlog write
1812 (void)trans_commit_stmt(thd);
1813
1814 /*
1815 Restore original server_id and db after commit
1816 since the server_id is being used also in the commit logic
1817 */
1818 thd->server_id = thd_server_id_save;
1819 thd->reset_db(thd_db_save);
1820 }
1821
1822 /**
1823 @brief Inform the other nodes that schema operation has been completed by
1824 this node, this is done by updating the row in the ndb_schema table.
1825
1826 @note The function will read the row from ndb_schema with exclusive lock,
1827 append it's own data to the 'slock' column and then write the row back.
1828
1829 @param schema The schema operation which has just been completed
1830
1831 @return different return values are returned, but not documented since they
1832 are currently unused
1833
1834 */
ack_schema_op(const Ndb_schema_op * schema) const1835 int ack_schema_op(const Ndb_schema_op *schema) const {
1836 DBUG_TRACE;
1837 Ndb *ndb = m_thd_ndb->ndb;
1838
1839 // Open ndb_schema table
1840 Ndb_schema_dist_table schema_dist_table(m_thd_ndb);
1841 if (!schema_dist_table.open()) {
1842 // NOTE! Legacy crash unless this was cluster connection failure, there
1843 // are simply no other of way sending error back to coordinator
1844 ndbcluster::ndbrequire(ndb->getDictionary()->getNdbError().code == 4009);
1845 return 1;
1846 }
1847 const NdbDictionary::Table *ndbtab = schema_dist_table.get_table();
1848
1849 const NdbError *ndb_error = nullptr;
1850 char tmp_buf[FN_REFLEN];
1851 NdbTransaction *trans = 0;
1852 int retries = 100;
1853 std::string before_slock;
1854
1855 // Bitmap for the slock bits
1856 MY_BITMAP slock;
1857 const uint slock_bits = schema_dist_table.get_slock_bytes() * 8;
1858 // Make sure that own nodeid fits in slock
1859 ndbcluster::ndbrequire(own_nodeid() <= slock_bits);
1860 (void)bitmap_init(&slock, nullptr, slock_bits);
1861
1862 while (1) {
1863 if ((trans = ndb->startTransaction()) == 0) goto err;
1864 {
1865 NdbOperation *op = 0;
1866 int r = 0;
1867
1868 /* read row from ndb_schema with exlusive row lock */
1869 r |= (op = trans->getNdbOperation(ndbtab)) == 0;
1870 DBUG_ASSERT(r == 0);
1871 r |= op->readTupleExclusive();
1872 DBUG_ASSERT(r == 0);
1873
1874 /* db */
1875 ndb_pack_varchar(ndbtab, SCHEMA_DB_I, tmp_buf, schema->db,
1876 strlen(schema->db));
1877 r |= op->equal(SCHEMA_DB_I, tmp_buf);
1878 DBUG_ASSERT(r == 0);
1879 /* name */
1880 ndb_pack_varchar(ndbtab, SCHEMA_NAME_I, tmp_buf, schema->name,
1881 strlen(schema->name));
1882 r |= op->equal(SCHEMA_NAME_I, tmp_buf);
1883 DBUG_ASSERT(r == 0);
1884 /* slock */
1885 r |= op->getValue(SCHEMA_SLOCK_I, (char *)slock.bitmap) == 0;
1886 DBUG_ASSERT(r == 0);
1887
1888 /* Execute in NDB */
1889 if (trans->execute(NdbTransaction::NoCommit)) goto err;
1890 }
1891
1892 if (ndb_log_get_verbose_level() > 19) {
1893 // Generate the 'before slock' string
1894 before_slock = ndb_bitmap_to_hex_string(&slock);
1895 }
1896
1897 bitmap_clear_bit(&slock, own_nodeid());
1898
1899 if (ndb_log_get_verbose_level() > 19) {
1900 const std::string after_slock = ndb_bitmap_to_hex_string(&slock);
1901 ndb_log_info("reply to %s.%s(%u/%u) from %s to %s", schema->db,
1902 schema->name, schema->id, schema->version,
1903 before_slock.c_str(), after_slock.c_str());
1904 }
1905
1906 {
1907 NdbOperation *op = 0;
1908 int r = 0;
1909
1910 /* now update the tuple */
1911 r |= (op = trans->getNdbOperation(ndbtab)) == 0;
1912 DBUG_ASSERT(r == 0);
1913 r |= op->updateTuple();
1914 DBUG_ASSERT(r == 0);
1915
1916 /* db */
1917 ndb_pack_varchar(ndbtab, SCHEMA_DB_I, tmp_buf, schema->db,
1918 strlen(schema->db));
1919 r |= op->equal(SCHEMA_DB_I, tmp_buf);
1920 DBUG_ASSERT(r == 0);
1921 /* name */
1922 ndb_pack_varchar(ndbtab, SCHEMA_NAME_I, tmp_buf, schema->name,
1923 strlen(schema->name));
1924 r |= op->equal(SCHEMA_NAME_I, tmp_buf);
1925 DBUG_ASSERT(r == 0);
1926 /* slock */
1927 r |= op->setValue(SCHEMA_SLOCK_I, (char *)slock.bitmap);
1928 DBUG_ASSERT(r == 0);
1929 /* node_id */
1930 // NOTE! Sends own nodeid here instead of nodeid who started schema op
1931 r |= op->setValue(SCHEMA_NODE_ID_I, own_nodeid());
1932 DBUG_ASSERT(r == 0);
1933 /* type */
1934 r |= op->setValue(SCHEMA_TYPE_I, (uint32)SOT_CLEAR_SLOCK);
1935 DBUG_ASSERT(r == 0);
1936 }
1937 if (trans->execute(NdbTransaction::Commit,
1938 NdbOperation::DefaultAbortOption,
1939 1 /*force send*/) == 0) {
1940 DBUG_PRINT("info", ("node %d cleared lock on '%s.%s'", own_nodeid(),
1941 schema->db, schema->name));
1942 (void)ndb->getDictionary()->forceGCPWait(1);
1943 break;
1944 }
1945 err:
1946 const NdbError *this_error =
1947 trans ? &trans->getNdbError() : &ndb->getNdbError();
1948 if (this_error->status == NdbError::TemporaryError &&
1949 !thd_killed(m_thd)) {
1950 if (retries--) {
1951 if (trans) ndb->closeTransaction(trans);
1952 ndb_trans_retry_sleep();
1953 continue; // retry
1954 }
1955 }
1956 ndb_error = this_error;
1957 break;
1958 }
1959
1960 if (ndb_error) {
1961 ndb_log_warning(
1962 "Could not release slock on '%s.%s', "
1963 "Error code: %d Message: %s",
1964 schema->db, schema->name, ndb_error->code, ndb_error->message);
1965 }
1966 if (trans) ndb->closeTransaction(trans);
1967 bitmap_free(&slock);
1968 return 0;
1969 }
1970
1971 /**
1972 @brief Inform the other nodes that schema operation has been completed by
1973 all nodes, this is done by updating the row in the ndb_schema table whith
1974 all bits of the 'slock' column cleared.
1975
1976 @note this is done to allow the coordinator to control when the schema
1977 operation has completed and also to be backwards compatible with
1978 nodes not upgraded to new protocol
1979
1980 @param db First part of key, normally used for db name
1981 @param table_name Second part of key, normally used for table name
1982
1983 @return zero on sucess
1984
1985 */
ack_schema_op_final(const char * db,const char * table_name) const1986 int ack_schema_op_final(const char *db, const char *table_name) const {
1987 DBUG_TRACE;
1988 Ndb *ndb = m_thd_ndb->ndb;
1989
1990 // Open ndb_schema table
1991 Ndb_schema_dist_table schema_dist_table(m_thd_ndb);
1992 if (!schema_dist_table.open()) {
1993 // NOTE! Legacy crash unless this was cluster connection failure, there
1994 // are simply no other of way sending error back to coordinator
1995 ndbcluster::ndbrequire(ndb->getDictionary()->getNdbError().code == 4009);
1996 return 1;
1997 }
1998 const NdbDictionary::Table *ndbtab = schema_dist_table.get_table();
1999
2000 // Pack db and table_name
2001 char db_buf[FN_REFLEN];
2002 char name_buf[FN_REFLEN];
2003 ndb_pack_varchar(ndbtab, SCHEMA_DB_I, db_buf, db, strlen(db));
2004 ndb_pack_varchar(ndbtab, SCHEMA_NAME_I, name_buf, table_name,
2005 strlen(table_name));
2006
2007 // Buffer with zeroes for slock
2008 std::vector<char> slock_zeroes;
2009 slock_zeroes.assign(schema_dist_table.get_slock_bytes(), 0);
2010 const char *slock_buf = slock_zeroes.data();
2011
2012 // Function for updating row in ndb_schema
2013 std::function<const NdbError *(NdbTransaction *)> ack_schema_op_final_func =
2014 [ndbtab, db_buf, name_buf,
2015 slock_buf](NdbTransaction *trans) -> const NdbError * {
2016 DBUG_TRACE;
2017
2018 NdbOperation *op = trans->getNdbOperation(ndbtab);
2019 if (op == nullptr) return &trans->getNdbError();
2020
2021 // Update row
2022 if (op->updateTuple() != 0 || op->equal(SCHEMA_NAME_I, name_buf) != 0 ||
2023 op->equal(SCHEMA_DB_I, db_buf) != 0 ||
2024 op->setValue(SCHEMA_SLOCK_I, slock_buf) != 0 ||
2025 op->setValue(SCHEMA_TYPE_I, (uint32)SOT_CLEAR_SLOCK) != 0)
2026 return &op->getNdbError();
2027
2028 if (trans->execute(NdbTransaction::Commit,
2029 NdbOperation::DefaultAbortOption,
2030 1 /*force send*/) != 0)
2031 return &trans->getNdbError();
2032
2033 return nullptr;
2034 };
2035
2036 NdbError ndb_err;
2037 if (!ndb_trans_retry(ndb, m_thd, ndb_err, ack_schema_op_final_func)) {
2038 log_NDB_error(ndb_err);
2039 ndb_log_warning("Could not release slock on '%s.%s'", db, table_name);
2040 return 1;
2041 }
2042 ndb_log_verbose(19, "Cleared slock on '%s.%s'", db, table_name);
2043
2044 (void)ndb->getDictionary()->forceGCPWait(1);
2045
2046 return 0;
2047 }
2048
2049 /**
2050 @brief Inform the other nodes that schema operation has been completed by
2051 this node. This is done by writing a new row to the ndb_schema_result table.
2052
2053 @param schema The schema operation which has just been completed
2054
2055 @return true if ack suceeds
2056 @return false if ack fails(writing to the table could not be done)
2057
2058 */
ack_schema_op_with_result(const Ndb_schema_op * schema) const2059 bool ack_schema_op_with_result(const Ndb_schema_op *schema) const {
2060 DBUG_TRACE;
2061
2062 // Should only call this function if ndb_schema has a schema_op_id
2063 // column which enabled the client to send schema->schema_op_id != 0
2064 ndbcluster::ndbrequire(schema->schema_op_id);
2065
2066 Ndb *ndb = m_thd_ndb->ndb;
2067
2068 // Open ndb_schema_result table
2069 Ndb_schema_result_table schema_result_table(m_thd_ndb);
2070 if (!schema_result_table.open()) {
2071 // NOTE! Legacy crash unless this was cluster connection failure, there
2072 // are simply no other of way sending error back to coordinator
2073 ndbcluster::ndbrequire(ndb->getDictionary()->getNdbError().code == 4009);
2074 return false;
2075 }
2076
2077 const NdbDictionary::Table *ndbtab = schema_result_table.get_table();
2078 const uint32 nodeid = schema->node_id;
2079 const uint32 schema_op_id = schema->schema_op_id;
2080 const uint32 participant_nodeid = own_nodeid();
2081 const uint32 result = m_schema_op_result.result();
2082 char message_buf[255];
2083 schema_result_table.pack_message(m_schema_op_result.message(), message_buf);
2084
2085 // Function for inserting row with result in ndb_schema_result
2086 std::function<const NdbError *(NdbTransaction *)>
2087 ack_schema_op_with_result_func =
2088 [ndbtab, nodeid, schema_op_id, participant_nodeid, result,
2089 message_buf](NdbTransaction *trans) -> const NdbError * {
2090 DBUG_TRACE;
2091
2092 NdbOperation *op = trans->getNdbOperation(ndbtab);
2093 if (op == nullptr) return &trans->getNdbError();
2094
2095 /* Write row */
2096 if (op->insertTuple() != 0 ||
2097 op->equal(Ndb_schema_result_table::COL_NODEID, nodeid) != 0 ||
2098 op->equal(Ndb_schema_result_table::COL_SCHEMA_OP_ID, schema_op_id) !=
2099 0 ||
2100 op->equal(Ndb_schema_result_table::COL_PARTICIPANT_NODEID,
2101 participant_nodeid) != 0 ||
2102 op->setValue(Ndb_schema_result_table::COL_RESULT, result) != 0 ||
2103 op->setValue(Ndb_schema_result_table::COL_MESSAGE, message_buf) != 0)
2104 return &op->getNdbError();
2105
2106 if (trans->execute(NdbTransaction::Commit,
2107 NdbOperation::DefaultAbortOption,
2108 1 /*force send*/) != 0)
2109 return &trans->getNdbError();
2110
2111 return nullptr;
2112 };
2113
2114 NdbError ndb_err;
2115 if (!ndb_trans_retry(ndb, m_thd, ndb_err, ack_schema_op_with_result_func)) {
2116 log_NDB_error(ndb_err);
2117 ndb_log_warning(
2118 "Failed to send result for schema operation involving '%s.%s'",
2119 schema->db, schema->name);
2120 return false;
2121 }
2122
2123 // Success
2124 ndb_log_verbose(19,
2125 "Replied to schema operation '%s.%s(%u/%u)', nodeid: %d, "
2126 "schema_op_id: %d",
2127 schema->db, schema->name, schema->id, schema->version,
2128 schema->node_id, schema->schema_op_id);
2129
2130 return true;
2131 }
2132
remove_schema_result_rows(uint32 schema_op_id)2133 void remove_schema_result_rows(uint32 schema_op_id) {
2134 Ndb *ndb = m_thd_ndb->ndb;
2135
2136 // Open ndb_schema_result table
2137 Ndb_schema_result_table schema_result_table(m_thd_ndb);
2138 if (!schema_result_table.open()) {
2139 // NOTE! Legacy crash unless this was cluster connection failure, there
2140 // are simply no other of way sending error back to coordinator
2141 ndbcluster::ndbrequire(ndb->getDictionary()->getNdbError().code == 4009);
2142 return;
2143 }
2144 const NdbDictionary::Table *ndbtab = schema_result_table.get_table();
2145 const uint nodeid = own_nodeid();
2146
2147 // Function for deleting all rows from ndb_schema_result matching
2148 // the given nodeid and schema operation id
2149 std::function<const NdbError *(NdbTransaction *)>
2150 remove_schema_result_rows_func =
2151 [nodeid, schema_op_id,
2152 ndbtab](NdbTransaction *trans) -> const NdbError * {
2153 DBUG_TRACE;
2154 DBUG_PRINT("enter",
2155 ("nodeid: %d, schema_op_id: %d", nodeid, schema_op_id));
2156
2157 NdbScanOperation *scan_op = trans->getNdbScanOperation(ndbtab);
2158 if (scan_op == nullptr) return &trans->getNdbError();
2159
2160 if (scan_op->readTuples(NdbOperation::LM_Read,
2161 NdbScanOperation::SF_KeyInfo) != 0)
2162 return &scan_op->getNdbError();
2163
2164 // Read the columns to compare
2165 uint32 read_node_id, read_schema_op_id;
2166 if (scan_op->getValue(Ndb_schema_result_table::COL_NODEID,
2167 (char *)&read_node_id) == nullptr ||
2168 scan_op->getValue(Ndb_schema_result_table::COL_SCHEMA_OP_ID,
2169 (char *)&read_schema_op_id) == nullptr)
2170 return &scan_op->getNdbError();
2171
2172 // Start the scan
2173 if (trans->execute(NdbTransaction::NoCommit) != 0)
2174 return &trans->getNdbError();
2175
2176 // Loop through all rows
2177 unsigned deleted = 0;
2178 bool fetch = true;
2179 while (true) {
2180 const int r = scan_op->nextResult(fetch);
2181 if (r < 0) {
2182 // Failed to fetch next row
2183 return &scan_op->getNdbError();
2184 }
2185 fetch = false; // Don't fetch more until nextResult returns 2
2186
2187 switch (r) {
2188 case 0: // Found row
2189 DBUG_PRINT("info", ("Found row"));
2190 // Delete rows if equal to nodeid and schema_op_id
2191 if (read_schema_op_id == schema_op_id && read_node_id == nodeid) {
2192 DBUG_PRINT("info", ("Deleting row"));
2193 if (scan_op->deleteCurrentTuple() != 0) {
2194 // Failed to delete row
2195 return &scan_op->getNdbError();
2196 }
2197 deleted++;
2198 }
2199 continue;
2200
2201 case 1:
2202 DBUG_PRINT("info", ("No more rows"));
2203 // No more rows, commit the transation
2204 if (trans->execute(NdbTransaction::Commit) != 0) {
2205 // Failed to commit
2206 return &trans->getNdbError();
2207 }
2208 return nullptr;
2209
2210 case 2:
2211 // Need to fetch more rows, first send the deletes
2212 DBUG_PRINT("info", ("Need to fetch more rows"));
2213 if (deleted > 0) {
2214 DBUG_PRINT("info", ("Sending deletes"));
2215 if (trans->execute(NdbTransaction::NoCommit) != 0) {
2216 // Failed to send
2217 return &trans->getNdbError();
2218 }
2219 }
2220 fetch = true; // Fetch more rows
2221 continue;
2222 }
2223 }
2224 // Never reached
2225 ndbcluster::ndbrequire(false);
2226 return nullptr;
2227 };
2228
2229 NdbError ndb_err;
2230 if (!ndb_trans_retry(ndb, m_thd, ndb_err, remove_schema_result_rows_func)) {
2231 log_NDB_error(ndb_err);
2232 ndb_log_error("Failed to remove rows from ndb_schema_result");
2233 return;
2234 }
2235 ndb_log_verbose(19,
2236 "Deleted all rows from ndb_schema_result, nodeid: %d, "
2237 "schema_op_id: %d",
2238 nodeid, schema_op_id);
2239 return;
2240 }
2241
check_wakeup_clients(Ndb_schema_dist::Schema_op_result_code result,const char * message) const2242 void check_wakeup_clients(Ndb_schema_dist::Schema_op_result_code result,
2243 const char *message) const {
2244 // Build list of current subscribers
2245 std::unordered_set<uint32> subscribers;
2246 m_schema_dist_data.get_subscriber_list(subscribers);
2247
2248 // Check all NDB_SCHEMA_OBJECTS for wakeup
2249 std::vector<uint32> schema_op_ids;
2250 NDB_SCHEMA_OBJECT::get_schema_op_ids(schema_op_ids);
2251 for (auto schema_op_id : schema_op_ids) {
2252 // Lookup NDB_SCHEMA_OBJECT from nodeid + schema_op_id
2253 std::unique_ptr<NDB_SCHEMA_OBJECT, decltype(&NDB_SCHEMA_OBJECT::release)>
2254 schema_object(NDB_SCHEMA_OBJECT::get(own_nodeid(), schema_op_id),
2255 NDB_SCHEMA_OBJECT::release);
2256 if (schema_object == nullptr) {
2257 // The schema operation has already completed on this node
2258 continue;
2259 }
2260
2261 const bool completed = schema_object->check_for_failed_subscribers(
2262 subscribers, result, message);
2263 if (completed) {
2264 // All participants have completed(or failed) -> send final ack
2265 ack_schema_op_final(schema_object->db(), schema_object->name());
2266 }
2267 }
2268 }
2269
check_is_ndb_schema_event(const Ndb_event_data * event_data) const2270 bool check_is_ndb_schema_event(const Ndb_event_data *event_data) const {
2271 if (!event_data) {
2272 // Received event without event data pointer
2273 assert(false);
2274 return false;
2275 }
2276
2277 NDB_SHARE *share = event_data->share;
2278 if (!share) {
2279 // Received event where the event_data is not properly initialized
2280 assert(false);
2281 return false;
2282 }
2283 assert(event_data->shadow_table);
2284 assert(event_data->ndb_value[0]);
2285 assert(event_data->ndb_value[1]);
2286 assert(Ndb_schema_dist_client::is_schema_dist_table(share->db,
2287 share->table_name));
2288 return true;
2289 }
2290
handle_after_epoch(const Ndb_schema_op * schema)2291 void handle_after_epoch(const Ndb_schema_op *schema) {
2292 DBUG_TRACE;
2293 DBUG_PRINT("info", ("Pushing Ndb_schema_op on list to be "
2294 "handled after epoch"));
2295 assert(!is_post_epoch()); // Only before epoch
2296 m_post_epoch_handle_list.push_back(schema, m_mem_root);
2297 }
2298
own_nodeid(void) const2299 uint own_nodeid(void) const { return m_own_nodeid; }
2300
ndbapi_invalidate_table(const char * db_name,const char * table_name) const2301 void ndbapi_invalidate_table(const char *db_name,
2302 const char *table_name) const {
2303 DBUG_TRACE;
2304 Ndb_table_guard ndbtab_g(m_thd_ndb->ndb, db_name, table_name);
2305 ndbtab_g.invalidate();
2306 }
2307
acquire_reference(const char * db,const char * name,const char * reference) const2308 NDB_SHARE *acquire_reference(const char *db, const char *name,
2309 const char *reference) const {
2310 DBUG_TRACE;
2311 DBUG_PRINT("enter", ("db: '%s', name: '%s'", db, name));
2312
2313 char key[FN_REFLEN + 1];
2314 build_table_filename(key, sizeof(key) - 1, db, name, "", 0);
2315 NDB_SHARE *share = NDB_SHARE::acquire_reference_by_key(key, reference);
2316 return share;
2317 }
2318
has_shadow_table(Ndb_dd_client & dd_client,const char * schema_name,const char * table_name) const2319 bool has_shadow_table(Ndb_dd_client &dd_client, const char *schema_name,
2320 const char *table_name) const {
2321 dd::String_type engine;
2322 if (dd_client.get_engine(schema_name, table_name, &engine) &&
2323 engine != "ndbcluster") {
2324 ndb_log_warning(
2325 "Local table '%s.%s' in engine = '%s' shadows the NDB table",
2326 schema_name, table_name, engine.c_str());
2327 return true;
2328 }
2329 return false;
2330 }
2331
install_table_in_dd(Ndb_dd_client & dd_client,const char * schema_name,const char * table_name,dd::sdi_t sdi,int table_id,int table_version,size_t num_partitions,const std::string & tablespace_name,bool force_overwrite,bool invalidate_referenced_tables) const2332 bool install_table_in_dd(Ndb_dd_client &dd_client, const char *schema_name,
2333 const char *table_name, dd::sdi_t sdi, int table_id,
2334 int table_version, size_t num_partitions,
2335 const std::string &tablespace_name,
2336 bool force_overwrite,
2337 bool invalidate_referenced_tables) const {
2338 DBUG_TRACE;
2339
2340 // First acquire exclusive MDL lock on schema and table
2341 if (!dd_client.mdl_locks_acquire_exclusive(schema_name, table_name)) {
2342 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2343 ndb_log_error(
2344 "Failed to acquire exclusive metadata lock for table '%s.%s'",
2345 schema_name, table_name);
2346 return false;
2347 }
2348
2349 // Check if there is existing table in DD which is not a NDB table, in such
2350 // case refuse to overwrite the "shadow table"
2351 if (has_shadow_table(dd_client, schema_name, table_name)) return false;
2352
2353 if (!tablespace_name.empty()) {
2354 // Acquire IX MDL on tablespace
2355 if (!dd_client.mdl_lock_tablespace(tablespace_name.c_str(), true)) {
2356 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2357 ndb_log_error("Failed to acquire lock on tablespace '%s' for '%s.%s'",
2358 tablespace_name.c_str(), schema_name, table_name);
2359 return false;
2360 }
2361 }
2362
2363 Ndb_referenced_tables_invalidator invalidator(m_thd, dd_client);
2364 if (!dd_client.install_table(
2365 schema_name, table_name, sdi, table_id, table_version,
2366 num_partitions, tablespace_name, force_overwrite,
2367 (invalidate_referenced_tables ? &invalidator : nullptr))) {
2368 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2369 ndb_log_error("Failed to install table '%s.%s' in DD", schema_name,
2370 table_name);
2371 return false;
2372 }
2373
2374 if (invalidate_referenced_tables && !invalidator.invalidate()) {
2375 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2376 ndb_log_error("Failed to invalidate referenced tables for '%s.%s'",
2377 schema_name, table_name);
2378 return false;
2379 }
2380 dd_client.commit();
2381 return true;
2382 }
2383
create_table_from_engine(const char * schema_name,const char * table_name,bool force_overwrite,bool invalidate_referenced_tables=false) const2384 bool create_table_from_engine(
2385 const char *schema_name, const char *table_name, bool force_overwrite,
2386 bool invalidate_referenced_tables = false) const {
2387 DBUG_TRACE;
2388 DBUG_PRINT("enter",
2389 ("schema_name: %s, table_name: %s", schema_name, table_name));
2390
2391 Ndb *ndb = m_thd_ndb->ndb;
2392 Ndb_table_guard ndbtab_g(ndb, schema_name, table_name);
2393 const NDBTAB *ndbtab = ndbtab_g.get_table();
2394 if (!ndbtab) {
2395 // Could not open the table from NDB, very unusual
2396 log_NDB_error(ndb->getDictionary()->getNdbError());
2397 ndb_log_error("Failed to open table '%s.%s' from NDB", schema_name,
2398 table_name);
2399 return false;
2400 }
2401
2402 const std::string tablespace_name =
2403 ndb_table_tablespace_name(ndb->getDictionary(), ndbtab);
2404
2405 std::string serialized_metadata;
2406 if (!ndb_table_get_serialized_metadata(ndbtab, serialized_metadata)) {
2407 ndb_log_error("Failed to get serialized metadata for table '%s.%s'",
2408 schema_name, table_name);
2409 return false;
2410 }
2411
2412 Ndb_dd_client dd_client(m_thd);
2413
2414 // Deserialize the metadata from NDB, this is done like this in order to
2415 // allow the table to be setup for binlogging independently of whether it
2416 // works to install it into DD.
2417 Ndb_dd_table dd_table(m_thd);
2418 const dd::sdi_t sdi = serialized_metadata.c_str();
2419 if (!dd_client.deserialize_table(sdi, dd_table.get_table_def())) {
2420 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2421 ndb_log_error("Failed to deserialize metadata for table '%s.%s'",
2422 schema_name, table_name);
2423 return false;
2424 }
2425
2426 // Setup binlogging for this table. In many cases the NDB_SHARE, the
2427 // event and event subscriptions are already created/setup, but this
2428 // function is called anyway in order to create/setup any missing parts.
2429 if (ndbcluster_binlog_setup_table(m_thd, ndb, schema_name, table_name,
2430 dd_table.get_table_def())) {
2431 // Error information has been logged AND pushed -> clear warnings
2432 clear_thd_conditions(m_thd);
2433 ndb_log_error("Failed to setup binlogging for table '%s.%s'", schema_name,
2434 table_name);
2435 return false;
2436 }
2437
2438 // Install the table definition in DD
2439 // NOTE! This is done after create/setup the NDB_SHARE to avoid that
2440 // server tries to open the table before the NDB_SHARE has been created
2441 if (!install_table_in_dd(dd_client, schema_name, table_name, sdi,
2442 ndbtab->getObjectId(), ndbtab->getObjectVersion(),
2443 ndbtab->getPartitionCount(), tablespace_name,
2444 force_overwrite, invalidate_referenced_tables)) {
2445 ndb_log_warning("Failed to update table definition in DD");
2446 return false;
2447 }
2448
2449 return true;
2450 }
2451
handle_clear_slock(const Ndb_schema_op * schema)2452 void handle_clear_slock(const Ndb_schema_op *schema) {
2453 DBUG_TRACE;
2454
2455 assert(is_post_epoch());
2456
2457 if (DBUG_EVALUATE_IF("ndb_binlog_random_tableid", true, false)) {
2458 // Try to create a race between SLOCK acks handled after another
2459 // schema operation on same object could have been started.
2460
2461 // Get temporary NDB_SCHEMA_OBJECT, sleep if one does not exist
2462 std::unique_ptr<NDB_SCHEMA_OBJECT, decltype(&NDB_SCHEMA_OBJECT::release)>
2463 tmp_ndb_schema_obj(
2464 NDB_SCHEMA_OBJECT::get(schema->db, schema->name, schema->id,
2465 schema->version),
2466 NDB_SCHEMA_OBJECT::release);
2467 if (tmp_ndb_schema_obj == nullptr) {
2468 ndb_milli_sleep(10);
2469 }
2470 }
2471
2472 // Get NDB_SCHEMA_OBJECT
2473 std::unique_ptr<NDB_SCHEMA_OBJECT, decltype(&NDB_SCHEMA_OBJECT::release)>
2474 ndb_schema_object(NDB_SCHEMA_OBJECT::get(schema->db, schema->name,
2475 schema->id, schema->version),
2476 NDB_SCHEMA_OBJECT::release);
2477
2478 if (!ndb_schema_object) {
2479 // NOTE! When participants ack they send their own nodeid instead of the
2480 // nodeid of node who initiated the schema operation. This makes it
2481 // impossible to do special checks for the coordinator here. Assume that
2482 // since no NDB_SCHEMA_OBJECT was found, this node is not the coordinator
2483 // and the ack can be safely ignored.
2484 return;
2485 }
2486
2487 // Handle ack sent from node using old protocol, all nodes cleared
2488 // in the slock column have completed(it's not enough to use only nodeid
2489 // since events are merged)
2490 if (bitmap_bits_set(&schema->slock) > 0) {
2491 ndb_log_verbose(19, "Coordinator, handle old protocol ack from node: %d",
2492 schema->node_id);
2493
2494 std::unordered_set<uint32> cleared_nodes;
2495 for (uint i = 0; i < schema->slock.n_bits; i++) {
2496 if (!bitmap_is_set(&schema->slock, i)) {
2497 // Node is not set in bitmap
2498 cleared_nodes.insert(i);
2499 }
2500 }
2501 ndb_schema_object->result_received_from_nodes(cleared_nodes);
2502
2503 if (ndb_schema_object->check_all_participants_completed()) {
2504 // All participants have completed(or failed) -> send final ack
2505 ack_schema_op_final(ndb_schema_object->db(), ndb_schema_object->name());
2506 return;
2507 }
2508
2509 return;
2510 }
2511
2512 // Check if all coordinator completed and wake up client
2513 const bool coordinator_completed =
2514 ndb_schema_object->check_coordinator_completed();
2515
2516 if (coordinator_completed) {
2517 remove_schema_result_rows(ndb_schema_object->schema_op_id());
2518
2519 // Remove active schema operation from coordinator
2520 m_schema_dist_data.remove_active_schema_op(ndb_schema_object.get());
2521 }
2522
2523 /**
2524 * There is a possible race condition between this binlog-thread,
2525 * which has not yet released its schema_object, and the
2526 * coordinator which possibly release its reference
2527 * to the same schema_object when signaled above.
2528 *
2529 * If the coordinator then starts yet another schema operation
2530 * on the same schema / table, it will need a schema_object with
2531 * the same key as the one already completed, and which this
2532 * thread still referrs. Thus, it will get this schema_object,
2533 * instead of creating a new one as normally expected.
2534 */
2535 if (DBUG_EVALUATE_IF("ndb_binlog_schema_object_race", true, false)) {
2536 ndb_milli_sleep(10);
2537 }
2538 }
2539
handle_offline_alter_table_commit(const Ndb_schema_op * schema)2540 void handle_offline_alter_table_commit(const Ndb_schema_op *schema) {
2541 DBUG_TRACE;
2542
2543 assert(is_post_epoch()); // Always after epoch
2544
2545 if (schema->node_id == own_nodeid()) return;
2546
2547 write_schema_op_to_binlog(m_thd, schema);
2548 ndbapi_invalidate_table(schema->db, schema->name);
2549 ndb_tdc_close_cached_table(m_thd, schema->db, schema->name);
2550
2551 NDB_SHARE *share =
2552 acquire_reference(schema->db, schema->name,
2553 "offline_alter_table_commit"); // Temp ref.
2554 if (share) {
2555 mysql_mutex_lock(&share->mutex);
2556 if (share->op) {
2557 const Ndb_event_data *event_data =
2558 static_cast<const Ndb_event_data *>(share->op->getCustomData());
2559 Ndb_event_data::destroy(event_data);
2560 share->op->setCustomData(NULL);
2561 {
2562 Mutex_guard injector_mutex_g(injector_event_mutex);
2563 injector_ndb->dropEventOperation(share->op);
2564 }
2565 share->op = 0;
2566 NDB_SHARE::release_reference(share, "binlog");
2567 }
2568 mysql_mutex_unlock(&share->mutex);
2569
2570 mysql_mutex_lock(&ndbcluster_mutex);
2571 NDB_SHARE::mark_share_dropped(&share);
2572 NDB_SHARE::release_reference_have_lock(share,
2573 "offline_alter_table_commit");
2574 // If this was the last share ref, it is now deleted. If there are more
2575 // references, the share will remain in the list of dropped until
2576 // remaining references are released.
2577 mysql_mutex_unlock(&ndbcluster_mutex);
2578 }
2579
2580 // Install table from NDB, overwrite the existing table
2581 if (!create_table_from_engine(schema->db, schema->name,
2582 true /* force_overwrite */,
2583 true /* invalidate_referenced_tables */)) {
2584 ndb_log_error("Distribution of ALTER TABLE '%s.%s' failed", schema->db,
2585 schema->name);
2586 m_schema_op_result.set_result(
2587 Ndb_schema_dist::SCHEMA_OP_FAILURE,
2588 "Distribution of ALTER TABLE " + std::string(1, '\'') +
2589 std::string(schema->name) + std::string(1, '\'') + " failed");
2590 }
2591 }
2592
handle_online_alter_table_prepare(const Ndb_schema_op * schema)2593 void handle_online_alter_table_prepare(const Ndb_schema_op *schema) {
2594 assert(is_post_epoch()); // Always after epoch
2595
2596 ndbapi_invalidate_table(schema->db, schema->name);
2597 ndb_tdc_close_cached_table(m_thd, schema->db, schema->name);
2598
2599 if (schema->node_id == own_nodeid()) {
2600 // Special case for schema dist participant in own node!
2601 // The schema dist client has exclusive MDL lock and thus
2602 // the schema dist participant(this code) on the same mysqld
2603 // can't open the table def from the DD, trying to acquire
2604 // another MDL lock will just block. Instead(since this is in
2605 // the same mysqld) it provides the new table def via a
2606 // pointer in the NDB_SHARE.
2607 NDB_SHARE *share =
2608 acquire_reference(schema->db, schema->name,
2609 "online_alter_table_prepare"); // temporary ref.
2610
2611 const dd::Table *new_table_def =
2612 static_cast<const dd::Table *>(share->inplace_alter_new_table_def);
2613 DBUG_ASSERT(new_table_def);
2614
2615 // Create a new Ndb_event_data which will be used when creating
2616 // the new NdbEventOperation
2617 Ndb_event_data *event_data = Ndb_event_data::create_event_data(
2618 m_thd, share, share->db, share->table_name, share->key_string(),
2619 injector_thd, new_table_def);
2620 if (!event_data) {
2621 ndb_log_error("NDB Binlog: Failed to create event data for table %s.%s",
2622 schema->db, schema->name);
2623 DBUG_ASSERT(false);
2624 // NOTE! Should abort the alter from here
2625 }
2626
2627 // Release old prepared event_data, this is rare but will happen
2628 // when an inplace alter table fails between prepare and commit phase
2629 const Ndb_event_data *old_event_data =
2630 m_schema_dist_data.get_inplace_alter_event_data();
2631 if (old_event_data) {
2632 Ndb_event_data::destroy(old_event_data);
2633 m_schema_dist_data.save_inplace_alter_event_data(nullptr);
2634 }
2635
2636 // Save the new event_data
2637 m_schema_dist_data.save_inplace_alter_event_data(event_data);
2638
2639 NDB_SHARE::release_reference(share,
2640 "online_alter_table_prepare"); // temp ref.
2641 } else {
2642 write_schema_op_to_binlog(m_thd, schema);
2643
2644 // Install table from NDB, overwrite the altered table.
2645 // NOTE! it will also try to setup binlogging but since the share
2646 // has a op assigned, that part will be skipped
2647 if (!create_table_from_engine(schema->db, schema->name,
2648 true /* force_overwrite */,
2649 true /* invalidate_referenced_tables */)) {
2650 ndb_log_error("Distribution of ALTER TABLE '%s.%s' failed", schema->db,
2651 schema->name);
2652 m_schema_op_result.set_result(
2653 Ndb_schema_dist::SCHEMA_OP_FAILURE,
2654 "Distribution of ALTER TABLE " + std::string(1, '\'') +
2655 std::string(schema->name) + std::string(1, '\'') + " failed");
2656 }
2657
2658 // Check that no event_data have been prepared yet(that is only
2659 // done on participant in same node)
2660 DBUG_ASSERT(m_schema_dist_data.get_inplace_alter_event_data() == nullptr);
2661 }
2662 }
2663
remote_participant_inplace_alter_create_event_data(NDB_SHARE * share,const char * schema_name,const char * table_name) const2664 const Ndb_event_data *remote_participant_inplace_alter_create_event_data(
2665 NDB_SHARE *share, const char *schema_name, const char *table_name) const {
2666 DBUG_TRACE;
2667
2668 // Read table definition from NDB, it might not exist in DD on this Server
2669 Ndb *ndb = m_thd_ndb->ndb;
2670 Ndb_table_guard ndbtab_g(ndb, schema_name, table_name);
2671 const NDBTAB *ndbtab = ndbtab_g.get_table();
2672 if (!ndbtab) {
2673 // Could not open the table from NDB, very unusual
2674 log_NDB_error(ndb->getDictionary()->getNdbError());
2675 ndb_log_error("Failed to open table '%s.%s' from NDB", schema_name,
2676 table_name);
2677 return nullptr;
2678 }
2679
2680 std::string serialized_metadata;
2681 if (!ndb_table_get_serialized_metadata(ndbtab, serialized_metadata)) {
2682 ndb_log_error("Failed to get serialized metadata for table '%s.%s'",
2683 schema_name, table_name);
2684 return nullptr;
2685 }
2686
2687 // Deserialize the metadata from NDB
2688 Ndb_dd_client dd_client(m_thd);
2689 Ndb_dd_table dd_table(m_thd);
2690 const dd::sdi_t sdi = serialized_metadata.c_str();
2691 if (!dd_client.deserialize_table(sdi, dd_table.get_table_def())) {
2692 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2693 ndb_log_error("Failed to deserialize metadata for table '%s.%s'",
2694 schema_name, table_name);
2695 return nullptr;
2696 }
2697
2698 // Create new event_data
2699 Ndb_event_data *event_data = Ndb_event_data::create_event_data(
2700 m_thd, share, schema_name, table_name, share->key_string(),
2701 injector_thd, dd_table.get_table_def());
2702 if (!event_data) {
2703 ndb_log_error("NDB Binlog: Failed to create event data for table '%s.%s'",
2704 share->db, share->table_name);
2705 return nullptr;
2706 }
2707
2708 return event_data;
2709 }
2710
handle_online_alter_table_commit(const Ndb_schema_op * schema)2711 void handle_online_alter_table_commit(const Ndb_schema_op *schema) {
2712 assert(is_post_epoch()); // Always after epoch
2713
2714 NDB_SHARE *share =
2715 acquire_reference(schema->db, schema->name,
2716 "online_alter_table_commit"); // temporary ref.
2717 if (share) {
2718 ndb_log_verbose(9, "NDB Binlog: handling online alter/rename");
2719
2720 mysql_mutex_lock(&share->mutex);
2721
2722 const Ndb_event_data *event_data;
2723 if (schema->node_id == own_nodeid()) {
2724 // Get the event_data which has been created during prepare phase
2725 event_data = m_schema_dist_data.get_inplace_alter_event_data();
2726 if (!event_data) {
2727 ndb_log_error("Failed to get prepared event data '%s'",
2728 share->key_string());
2729 DBUG_ASSERT(false);
2730 }
2731 // The event_data pointer has been taken over
2732 m_schema_dist_data.save_inplace_alter_event_data(nullptr);
2733 } else {
2734 // Create Ndb_event_data which will be used when creating
2735 // the new NdbEventOperation.
2736 event_data = remote_participant_inplace_alter_create_event_data(
2737 share, share->db, share->table_name);
2738 if (!event_data) {
2739 ndb_log_error("Failed to create event data for table '%s'",
2740 share->key_string());
2741 DBUG_ASSERT(false);
2742 }
2743 }
2744 DBUG_ASSERT(event_data);
2745
2746 NdbEventOperation *new_op = nullptr;
2747 if (share->op && event_data /* safety */) {
2748 Ndb_binlog_client binlog_client(m_thd, schema->db, schema->name);
2749 // The table have an event operation setup and during an inplace
2750 // alter table that need to be recreated for the new table layout.
2751 // NOTE! Nothing has changed here regarding whether or not the
2752 // table should still have event operation, i.e if it had
2753 // it before, it should still have it after the alter. But
2754 // for consistency, check that table should have event op
2755 DBUG_ASSERT(binlog_client.table_should_have_event_op(share));
2756
2757 // Save the current event operation since create_event_op()
2758 // will assign the new in "share->op", also release the "binlog"
2759 // reference as it will be acquired again in create_event_op()
2760 // NOTE! This should probably be rewritten to not assign share->op and
2761 // acquire the reference in create_event_op()
2762 NdbEventOperation *const curr_op = share->op;
2763 share->op = nullptr;
2764 NDB_SHARE::release_reference(share, "binlog");
2765
2766 // Get table from NDB
2767 Ndb_table_guard ndbtab_g(m_thd_ndb->ndb, schema->db, schema->name);
2768 const NDBTAB *ndbtab = ndbtab_g.get_table();
2769
2770 DBUG_ASSERT(ndbtab != nullptr);
2771
2772 // Create new NdbEventOperation
2773 if (binlog_client.create_event_op(share, ndbtab, event_data)) {
2774 ndb_log_error("Failed to create event operation for table '%s'",
2775 share->key_string());
2776
2777 // NOTE! Should fail the alter here
2778 DBUG_ASSERT(false);
2779 } else {
2780 // Get the newly created NdbEventOperation, will be swapped
2781 // into place (again) later
2782 new_op = share->op;
2783 }
2784
2785 // Reinstall the current NdbEventOperation
2786 share->op = curr_op;
2787 } else {
2788 // New event_data was created(that's the default) but the table didn't
2789 // have event operations and thus the event_data is unused, free it
2790 Ndb_event_data::destroy(event_data);
2791 }
2792
2793 ndb_log_verbose(9, "NDB Binlog: handling online alter/rename done");
2794
2795 // There should be no event_data left in m_schema_dist_data at this point
2796 DBUG_ASSERT(m_schema_dist_data.get_inplace_alter_event_data() == nullptr);
2797
2798 // Start using the new event operation and release the old
2799 if (share->op && new_op) {
2800 // Delete old event_data
2801 const Ndb_event_data *event_data =
2802 static_cast<const Ndb_event_data *>(share->op->getCustomData());
2803 share->op->setCustomData(NULL);
2804 Ndb_event_data::destroy(event_data);
2805
2806 // Drop old event operation
2807 {
2808 Mutex_guard injector_mutex_g(injector_event_mutex);
2809 injector_ndb->dropEventOperation(share->op);
2810 }
2811 // Install new event operation
2812 share->op = new_op;
2813 }
2814 mysql_mutex_unlock(&share->mutex);
2815
2816 NDB_SHARE::release_reference(share,
2817 "online_alter_table_commit"); // temp ref.
2818 }
2819
2820 DBUG_ASSERT(m_schema_dist_data.get_inplace_alter_event_data() == nullptr);
2821 }
2822
remove_table_from_dd(const char * schema_name,const char * table_name)2823 bool remove_table_from_dd(const char *schema_name, const char *table_name) {
2824 DBUG_TRACE;
2825
2826 Ndb_dd_client dd_client(m_thd);
2827 Ndb_referenced_tables_invalidator invalidator(m_thd, dd_client);
2828
2829 if (!dd_client.mdl_locks_acquire_exclusive(schema_name, table_name)) {
2830 log_and_clear_thd_conditions(m_thd, condition_logging_level::WARNING);
2831 ndb_log_warning("Failed to acquire exclusive metadata lock on '%s.%s'",
2832 schema_name, table_name);
2833 return false;
2834 }
2835
2836 // Check if there is existing table in DD which is not a NDB table, in such
2837 // case refuse to remove the "shadow table"
2838 if (has_shadow_table(dd_client, schema_name, table_name)) return false;
2839
2840 if (!dd_client.remove_table(schema_name, table_name, &invalidator)) {
2841 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2842 ndb_log_error("Failed to remove table '%s.%s' from DD", schema_name,
2843 table_name);
2844 return false;
2845 }
2846
2847 if (!invalidator.invalidate()) {
2848 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2849 ndb_log_error("Failed to invalidate referenced tables for '%s.%s'",
2850 schema_name, table_name);
2851 return false;
2852 }
2853
2854 dd_client.commit();
2855 return true;
2856 }
2857
handle_drop_table(const Ndb_schema_op * schema)2858 void handle_drop_table(const Ndb_schema_op *schema) {
2859 DBUG_TRACE;
2860
2861 assert(is_post_epoch()); // Always after epoch
2862
2863 if (schema->node_id == own_nodeid()) return;
2864
2865 write_schema_op_to_binlog(m_thd, schema);
2866
2867 // Participant never takes GSL
2868 assert(m_thd_ndb->check_option(Thd_ndb::IS_SCHEMA_DIST_PARTICIPANT));
2869
2870 if (!remove_table_from_dd(schema->db, schema->name)) {
2871 // The table couldn't be removed, continue to invalidate the table in
2872 // NdbApi, close cached tables etc. This case may happen when a MySQL
2873 // Server drops a "shadow" table and afterwards someone drops also the
2874 // table with same name in NDB
2875 ndb_log_warning(
2876 "Failed to remove table definition from DD, continue anyway...");
2877 m_schema_op_result.set_result(
2878 Ndb_schema_dist::SCHEMA_OP_FAILURE,
2879 "Distribution of DROP TABLE " + std::string(1, '\'') +
2880 std::string(schema->name) + std::string(1, '\'') + " failed");
2881 }
2882
2883 NDB_SHARE *share = acquire_reference(schema->db, schema->name,
2884 "drop_table"); // temporary ref.
2885 if (!share || !share->op) {
2886 ndbapi_invalidate_table(schema->db, schema->name);
2887 ndb_tdc_close_cached_table(m_thd, schema->db, schema->name);
2888 }
2889 if (share) {
2890 mysql_mutex_lock(&ndbcluster_mutex);
2891 NDB_SHARE::mark_share_dropped(&share); // server ref.
2892 DBUG_ASSERT(share); // Should still be ref'ed
2893 NDB_SHARE::release_reference_have_lock(share,
2894 "drop_table"); // temporary ref.
2895 mysql_mutex_unlock(&ndbcluster_mutex);
2896 }
2897
2898 ndbapi_invalidate_table(schema->db, schema->name);
2899 ndb_tdc_close_cached_table(m_thd, schema->db, schema->name);
2900 }
2901
2902 /*
2903 The RENAME is performed in two steps.
2904 1) PREPARE_RENAME - sends the new table key to participants
2905 2) RENAME - perform the actual rename
2906 */
2907
handle_rename_table_prepare(const Ndb_schema_op * schema)2908 void handle_rename_table_prepare(const Ndb_schema_op *schema) {
2909 DBUG_TRACE;
2910
2911 assert(is_post_epoch()); // Always after epoch
2912
2913 if (schema->node_id == own_nodeid()) return;
2914
2915 const char *new_key_for_table = schema->query;
2916 DBUG_PRINT("info", ("new_key_for_table: '%s'", new_key_for_table));
2917
2918 // Release potentially previously prepared new_key
2919 {
2920 NDB_SHARE_KEY *old_prepared_key =
2921 m_schema_dist_data.get_prepared_rename_key();
2922 if (old_prepared_key) NDB_SHARE::free_key(old_prepared_key);
2923 }
2924
2925 // Create a new key save it, then hope for the best(i.e
2926 // that it can be found later when the RENAME arrives)
2927 NDB_SHARE_KEY *new_prepared_key = NDB_SHARE::create_key(new_key_for_table);
2928 m_schema_dist_data.save_prepared_rename_key(new_prepared_key);
2929 }
2930
rename_table_in_dd(const char * schema_name,const char * table_name,const char * new_schema_name,const char * new_table_name,const NdbDictionary::Table * ndbtab,const std::string & tablespace_name) const2931 bool rename_table_in_dd(const char *schema_name, const char *table_name,
2932 const char *new_schema_name,
2933 const char *new_table_name,
2934 const NdbDictionary::Table *ndbtab,
2935 const std::string &tablespace_name) const {
2936 DBUG_TRACE;
2937
2938 Ndb_dd_client dd_client(m_thd);
2939
2940 // Acquire exclusive MDL lock on the table
2941 if (!dd_client.mdl_locks_acquire_exclusive(schema_name, table_name)) {
2942 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2943 ndb_log_error("Failed to acquire exclusive metadata lock on '%s.%s'",
2944 schema_name, table_name);
2945 return false;
2946 }
2947
2948 // Acquire exclusive MDL lock also on the new table name
2949 if (!dd_client.mdl_locks_acquire_exclusive(new_schema_name,
2950 new_table_name)) {
2951 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2952 ndb_log_error(
2953 "Failed to acquire exclusive metadata lock on new table name '%s.%s'",
2954 new_schema_name, new_table_name);
2955 return false;
2956 }
2957
2958 if (has_shadow_table(dd_client, schema_name, table_name)) {
2959 // The renamed table was a "shadow table".
2960
2961 if (has_shadow_table(dd_client, new_schema_name, new_table_name)) {
2962 // The new table name is also a "shadow table", nothing todo
2963 return false;
2964 }
2965
2966 // Install the renamed table into DD
2967 std::string serialized_metadata;
2968 if (!ndb_table_get_serialized_metadata(ndbtab, serialized_metadata)) {
2969 ndb_log_error("Failed to get serialized metadata for table '%s.%s'",
2970 new_schema_name, new_table_name);
2971 return false;
2972 }
2973
2974 // Deserialize the metadata from NDB
2975 Ndb_dd_table dd_table(m_thd);
2976 const dd::sdi_t sdi = serialized_metadata.c_str();
2977 if (!dd_client.deserialize_table(sdi, dd_table.get_table_def())) {
2978 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2979 ndb_log_error("Failed to deserialized metadata for table '%s.%s'",
2980 new_schema_name, new_table_name);
2981 return false;
2982 }
2983
2984 if (!dd_client.install_table(
2985 new_schema_name, new_table_name, sdi, ndbtab->getObjectId(),
2986 ndbtab->getObjectVersion(), ndbtab->getPartitionCount(),
2987 tablespace_name, true, nullptr)) {
2988 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
2989 ndb_log_error("Failed to install renamed table '%s.%s' in DD",
2990 new_schema_name, new_table_name);
2991 return false;
2992 }
2993
2994 dd_client.commit();
2995 return true;
2996 }
2997
2998 Ndb_referenced_tables_invalidator invalidator(m_thd, dd_client);
2999
3000 if (has_shadow_table(dd_client, new_schema_name, new_table_name)) {
3001 // There is a "shadow table", remove the table from DD
3002 ndb_log_warning(
3003 "Removing the renamed table '%s.%s' from DD, there is a local table",
3004 schema_name, table_name);
3005 if (!dd_client.remove_table(schema_name, table_name, &invalidator)) {
3006 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3007 ndb_log_error("Failed to remove the renamed table '%s.%s' from DD",
3008 schema_name, table_name);
3009 return false;
3010 }
3011 } else {
3012 // There are no "shadow table", rename table in DD
3013 if (!dd_client.rename_table(schema_name, table_name, new_schema_name,
3014 new_table_name, ndbtab->getObjectId(),
3015 ndbtab->getObjectVersion(), &invalidator)) {
3016 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3017 ndb_log_error("Failed to rename table '%s.%s' to '%s.%s", schema_name,
3018 table_name, new_schema_name, new_table_name);
3019 return false;
3020 }
3021 }
3022
3023 if (!invalidator.invalidate()) {
3024 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3025 ndb_log_error("Failed to invalidate referenced tables for '%s.%s'",
3026 schema_name, table_name);
3027 return false;
3028 }
3029
3030 dd_client.commit();
3031 return true;
3032 }
3033
handle_rename_table(const Ndb_schema_op * schema)3034 void handle_rename_table(const Ndb_schema_op *schema) {
3035 DBUG_TRACE;
3036
3037 assert(is_post_epoch()); // Always after epoch
3038
3039 if (schema->node_id == own_nodeid()) return;
3040
3041 write_schema_op_to_binlog(m_thd, schema);
3042
3043 // Participant never takes GSL
3044 assert(m_thd_ndb->check_option(Thd_ndb::IS_SCHEMA_DIST_PARTICIPANT));
3045
3046 NDB_SHARE *share = acquire_reference(schema->db, schema->name,
3047 "rename_table"); // temporary ref.
3048 if (!share || !share->op) {
3049 ndbapi_invalidate_table(schema->db, schema->name);
3050 ndb_tdc_close_cached_table(m_thd, schema->db, schema->name);
3051 }
3052 if (share)
3053 NDB_SHARE::release_reference(share, "rename_table"); // temporary ref.
3054
3055 share = acquire_reference(schema->db, schema->name,
3056 "rename_table"); // temporary ref.
3057 if (!share) {
3058 // The RENAME need to find share so it can be renamed
3059 DBUG_ASSERT(share);
3060 return;
3061 }
3062
3063 NDB_SHARE_KEY *prepared_key = m_schema_dist_data.get_prepared_rename_key();
3064 if (!prepared_key) {
3065 // The rename need to have new_key set
3066 // by a previous RENAME_PREPARE
3067 DBUG_ASSERT(prepared_key);
3068 return;
3069 }
3070
3071 // Rename on participant is always from real to
3072 // real name(i.e neiher old or new name should be a temporary name)
3073 DBUG_ASSERT(!ndb_name_is_temp(schema->name));
3074 DBUG_ASSERT(!ndb_name_is_temp(NDB_SHARE::key_get_table_name(prepared_key)));
3075
3076 // Open the renamed table from NDB
3077 const char *new_db_name = NDB_SHARE::key_get_db_name(prepared_key);
3078 const char *new_table_name = NDB_SHARE::key_get_table_name(prepared_key);
3079 Ndb_table_guard ndbtab_g(m_thd_ndb->ndb, new_db_name, new_table_name);
3080 const NdbDictionary::Table *ndbtab = ndbtab_g.get_table();
3081 if (!ndbtab) {
3082 // Could not open the table from NDB, very unusual
3083 log_NDB_error(m_thd_ndb->ndb->getDictionary()->getNdbError());
3084 ndb_log_error("Failed to rename, could not open table '%s.%s' from NDB",
3085 new_db_name, new_table_name);
3086 m_schema_op_result.set_result(
3087 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3088 "Distribution of RENAME TABLE " + std::string(1, '\'') +
3089 std::string(schema->name) + std::string(1, '\'') + " failed");
3090 return;
3091 }
3092
3093 const std::string tablespace_name =
3094 ndb_table_tablespace_name(m_thd_ndb->ndb->getDictionary(), ndbtab);
3095
3096 // Rename table in DD
3097 if (!rename_table_in_dd(schema->db, schema->name, new_db_name,
3098 new_table_name, ndbtab, tablespace_name)) {
3099 ndb_log_warning(
3100 "Failed to rename table definition in DD, continue anyway...");
3101 m_schema_op_result.set_result(
3102 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3103 "Distribution of RENAME TABLE " + std::string(1, '\'') +
3104 std::string(schema->name) + std::string(1, '\'') + " failed");
3105 }
3106
3107 // Rename share and release the old key
3108 NDB_SHARE_KEY *old_key = share->key;
3109 NDB_SHARE::rename_share(share, prepared_key);
3110 m_schema_dist_data.save_prepared_rename_key(NULL);
3111 NDB_SHARE::free_key(old_key);
3112
3113 NDB_SHARE::release_reference(share, "rename_table"); // temporary ref.
3114
3115 ndbapi_invalidate_table(schema->db, schema->name);
3116 ndb_tdc_close_cached_table(m_thd, schema->db, schema->name);
3117 }
3118
handle_drop_db(const Ndb_schema_op * schema)3119 void handle_drop_db(const Ndb_schema_op *schema) {
3120 DBUG_TRACE;
3121
3122 assert(is_post_epoch()); // Always after epoch
3123
3124 if (schema->node_id == own_nodeid()) return;
3125
3126 write_schema_op_to_binlog(m_thd, schema);
3127
3128 // Participant never takes GSL
3129 assert(m_thd_ndb->check_option(Thd_ndb::IS_SCHEMA_DIST_PARTICIPANT));
3130
3131 Ndb_dd_client dd_client(m_thd);
3132
3133 // Lock the schema in DD
3134 if (!dd_client.mdl_lock_schema(schema->db)) {
3135 // Failed to acquire lock, skip dropping
3136 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3137 ndb_log_error("Failed to acquire MDL for db '%s'", schema->db);
3138 m_schema_op_result.set_result(
3139 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3140 "Distribution of DROP DATABASE " + std::string(1, '\'') +
3141 std::string(schema->db) + std::string(1, '\'') + " failed");
3142 return;
3143 }
3144
3145 bool schema_exists;
3146 if (!dd_client.schema_exists(schema->db, &schema_exists)) {
3147 // Failed to check if database exists, skip dropping
3148 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3149 ndb_log_error("Failed to determine if database '%s' exists", schema->db);
3150 m_schema_op_result.set_result(
3151 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3152 "Distribution of DROP DATABASE " + std::string(1, '\'') +
3153 std::string(schema->db) + std::string(1, '\'') + " failed");
3154 return;
3155 }
3156
3157 if (!schema_exists) {
3158 DBUG_PRINT("info", ("Schema '%s' does not exist", schema->db));
3159 // Nothing to do
3160 return;
3161 }
3162
3163 // Remove all NDB tables in the dropped database from DD,
3164 // this function is only called when they all have been dropped
3165 // from NDB by another MySQL Server
3166 //
3167 // NOTE! This is code which always run "in the server" so it would be
3168 // appropriate to log error messages to the server log file describing
3169 // any problems which occur in these functions.
3170 std::unordered_set<std::string> ndb_tables_in_DD;
3171 if (!dd_client.get_ndb_table_names_in_schema(schema->db,
3172 &ndb_tables_in_DD)) {
3173 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3174 ndb_log_error("Failed to get list of NDB tables in database '%s'",
3175 schema->db);
3176 m_schema_op_result.set_result(
3177 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3178 "Distribution of DROP DATABASE " + std::string(1, '\'') +
3179 std::string(schema->db) + std::string(1, '\'') + " failed");
3180 return;
3181 }
3182
3183 Ndb_referenced_tables_invalidator invalidator(m_thd, dd_client);
3184
3185 for (const auto &ndb_table_name : ndb_tables_in_DD) {
3186 if (!dd_client.mdl_locks_acquire_exclusive(schema->db,
3187 ndb_table_name.c_str())) {
3188 log_and_clear_thd_conditions(m_thd, condition_logging_level::WARNING);
3189 ndb_log_warning("Failed to acquire exclusive MDL on '%s.%s'",
3190 schema->db, ndb_table_name.c_str());
3191 continue;
3192 }
3193
3194 if (!dd_client.remove_table(schema->db, ndb_table_name.c_str(),
3195 &invalidator)) {
3196 // Failed to remove the table from DD, not much else to do
3197 // than try with the next
3198 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3199 ndb_log_error("Failed to remove table '%s.%s' from DD", schema->db,
3200 ndb_table_name.c_str());
3201 continue;
3202 }
3203
3204 NDB_SHARE *share = acquire_reference(schema->db, ndb_table_name.c_str(),
3205 "drop_db"); // temporary ref.
3206 if (!share || !share->op) {
3207 ndbapi_invalidate_table(schema->db, ndb_table_name.c_str());
3208 ndb_tdc_close_cached_table(m_thd, schema->db, ndb_table_name.c_str());
3209 }
3210 if (share) {
3211 mysql_mutex_lock(&ndbcluster_mutex);
3212 NDB_SHARE::mark_share_dropped(&share); // server ref.
3213 DBUG_ASSERT(share); // Should still be ref'ed
3214 NDB_SHARE::release_reference_have_lock(share,
3215 "drop_db"); // temporary ref.
3216 mysql_mutex_unlock(&ndbcluster_mutex);
3217 }
3218
3219 ndbapi_invalidate_table(schema->db, ndb_table_name.c_str());
3220 ndb_tdc_close_cached_table(m_thd, schema->db, ndb_table_name.c_str());
3221 }
3222
3223 if (!invalidator.invalidate()) {
3224 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3225 ndb_log_error("Failed to invalidate referenced tables for database '%s'",
3226 schema->db);
3227 m_schema_op_result.set_result(
3228 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3229 "Distribution of DROP DATABASE " + std::string(1, '\'') +
3230 std::string(schema->db) + std::string(1, '\'') + " failed");
3231 return;
3232 }
3233
3234 dd_client.commit();
3235
3236 bool found_local_tables;
3237 if (!dd_client.have_local_tables_in_schema(schema->db,
3238 &found_local_tables)) {
3239 // Failed to access the DD to check if non NDB tables existed, assume
3240 // the worst and skip dropping this database
3241 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3242 ndb_log_error("Failed to check if database '%s' contained local tables.",
3243 schema->db);
3244 ndb_log_error("Skipping drop of non NDB database artifacts.");
3245 m_schema_op_result.set_result(
3246 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3247 "Distribution of DROP DATABASE " + std::string(1, '\'') +
3248 std::string(schema->db) + std::string(1, '\'') + " failed");
3249 return;
3250 }
3251
3252 DBUG_PRINT("exit", ("found_local_tables: %d", found_local_tables));
3253
3254 if (found_local_tables) {
3255 /* Tables exists as a local table, print error and leave it */
3256 ndb_log_warning(
3257 "NDB Binlog: Skipping drop database '%s' since "
3258 "it contained local tables "
3259 "binlog schema event '%s' from node %d. ",
3260 schema->db, schema->query, schema->node_id);
3261 m_schema_op_result.set_result(
3262 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3263 "Distribution of DROP DATABASE " + std::string(1, '\'') +
3264 std::string(schema->db) + std::string(1, '\'') + " failed");
3265 return;
3266 }
3267
3268 // Run the plain DROP DATABASE query in order to remove other artifacts
3269 // like the physical database directory.
3270 // Note! This is not done in the case where a "shadow" table is found
3271 // in the schema, but at least all the NDB tables have in such case
3272 // already been removed from the DD
3273 Ndb_local_connection mysqld(m_thd);
3274 if (mysqld.drop_database(schema->db)) {
3275 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3276 ndb_log_error("Failed to execute 'DROP DATABASE' for database '%s'",
3277 schema->db);
3278 m_schema_op_result.set_result(
3279 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3280 "Distribution of DROP DATABASE " + std::string(1, '\'') +
3281 std::string(schema->db) + std::string(1, '\'') + " failed");
3282 }
3283 }
3284
handle_truncate_table(const Ndb_schema_op * schema)3285 void handle_truncate_table(const Ndb_schema_op *schema) {
3286 DBUG_TRACE;
3287
3288 assert(!is_post_epoch()); // Always directly
3289
3290 if (schema->node_id == own_nodeid()) return;
3291
3292 write_schema_op_to_binlog(m_thd, schema);
3293
3294 NDB_SHARE *share =
3295 acquire_reference(schema->db, schema->name, "truncate_table");
3296 // invalidation already handled by binlog thread
3297 if (!share || !share->op) {
3298 ndbapi_invalidate_table(schema->db, schema->name);
3299 ndb_tdc_close_cached_table(m_thd, schema->db, schema->name);
3300 }
3301 if (share) {
3302 // Reset the tables shared auto_increment counter
3303 share->reset_tuple_id_range();
3304
3305 NDB_SHARE::release_reference(share, "truncate_table"); // temporary ref.
3306 }
3307
3308 if (!create_table_from_engine(schema->db, schema->name,
3309 true /* force_overwrite */)) {
3310 ndb_log_error("Distribution of TRUNCATE TABLE '%s.%s' failed", schema->db,
3311 schema->name);
3312 m_schema_op_result.set_result(
3313 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3314 "Distribution of TRUNCATE TABLE " + std::string(1, '\'') +
3315 std::string(schema->name) + std::string(1, '\'') + " failed");
3316 }
3317 }
3318
handle_create_table(const Ndb_schema_op * schema)3319 void handle_create_table(const Ndb_schema_op *schema) {
3320 DBUG_TRACE;
3321
3322 assert(!is_post_epoch()); // Always directly
3323
3324 if (schema->node_id == own_nodeid()) return;
3325
3326 write_schema_op_to_binlog(m_thd, schema);
3327
3328 if (!create_table_from_engine(schema->db, schema->name,
3329 true, /* force_overwrite */
3330 true /* invalidate_referenced_tables */)) {
3331 ndb_log_error("Distribution of CREATE TABLE '%s.%s' failed", schema->db,
3332 schema->name);
3333 m_schema_op_result.set_result(
3334 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3335 "Distribution of CREATE TABLE " + std::string(1, '\'') +
3336 std::string(schema->name) + std::string(1, '\'') + " failed");
3337 }
3338 }
3339
handle_create_db(const Ndb_schema_op * schema)3340 void handle_create_db(const Ndb_schema_op *schema) {
3341 DBUG_TRACE;
3342
3343 assert(!is_post_epoch()); // Always directly
3344
3345 if (schema->node_id == own_nodeid()) return;
3346
3347 write_schema_op_to_binlog(m_thd, schema);
3348
3349 // Participant never takes GSL
3350 assert(m_thd_ndb->check_option(Thd_ndb::IS_SCHEMA_DIST_PARTICIPANT));
3351
3352 Ndb_local_connection mysqld(m_thd);
3353 if (mysqld.execute_database_ddl(schema->query)) {
3354 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3355 ndb_log_error("Failed to execute 'CREATE DATABASE' for database '%s'",
3356 schema->db);
3357 m_schema_op_result.set_result(
3358 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3359 "Distribution of CREATE DATABASE " + std::string(1, '\'') +
3360 std::string(schema->db) + std::string(1, '\'') + " failed");
3361 return;
3362 }
3363
3364 // Update the Schema in DD with the id and version details
3365 if (!ndb_dd_update_schema_version(m_thd, schema->db, schema->id,
3366 schema->version)) {
3367 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3368 ndb_log_error("Failed to update schema version for database '%s'",
3369 schema->db);
3370 }
3371 }
3372
handle_alter_db(const Ndb_schema_op * schema)3373 void handle_alter_db(const Ndb_schema_op *schema) {
3374 DBUG_TRACE;
3375
3376 assert(!is_post_epoch()); // Always directly
3377
3378 if (schema->node_id == own_nodeid()) return;
3379
3380 write_schema_op_to_binlog(m_thd, schema);
3381
3382 // Participant never takes GSL
3383 assert(m_thd_ndb->check_option(Thd_ndb::IS_SCHEMA_DIST_PARTICIPANT));
3384
3385 Ndb_local_connection mysqld(m_thd);
3386 if (mysqld.execute_database_ddl(schema->query)) {
3387 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3388 ndb_log_error("Failed to execute 'ALTER DATABASE' for database '%s'",
3389 schema->db);
3390 m_schema_op_result.set_result(
3391 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3392 "Distribution of ALTER DATABASE " + std::string(1, '\'') +
3393 std::string(schema->db) + std::string(1, '\'') + " failed");
3394 return;
3395 }
3396
3397 // Update the Schema in DD with the id and version details
3398 if (!ndb_dd_update_schema_version(m_thd, schema->db, schema->id,
3399 schema->version)) {
3400 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3401 ndb_log_error("Failed to update schema version for database '%s'",
3402 schema->db);
3403 }
3404 }
3405
handle_grant_op(const Ndb_schema_op * schema)3406 void handle_grant_op(const Ndb_schema_op *schema) {
3407 DBUG_TRACE;
3408 Ndb_local_connection sql_runner(m_thd);
3409
3410 assert(!is_post_epoch()); // Always directly
3411
3412 // Participant never takes GSL
3413 assert(
3414 get_thd_ndb(m_thd)->check_option(Thd_ndb::IS_SCHEMA_DIST_PARTICIPANT));
3415
3416 if (schema->node_id == own_nodeid()) return;
3417
3418 /* SOT_GRANT was sent by a pre-8.0 mysqld. Just ignore it. */
3419 if (schema->type == SOT_GRANT) {
3420 ndb_log_verbose(9, "Got SOT_GRANT event, disregarding.");
3421 return;
3422 }
3423
3424 /* For SOT_ACL_SNAPSHOT, update the snapshots for the users listed.
3425 */
3426 if (schema->type == SOT_ACL_SNAPSHOT) {
3427 if (!Ndb_stored_grants::update_users_from_snapshot(m_thd,
3428 schema->query)) {
3429 ndb_log_error("Failed to apply ACL snapshot for users: %s",
3430 schema->query);
3431 m_schema_op_result.set_result(Ndb_schema_dist::SCHEMA_OP_FAILURE,
3432 "Distribution of ACL change failed");
3433 }
3434 return;
3435 }
3436
3437 DBUG_ASSERT(schema->type == SOT_ACL_STATEMENT ||
3438 schema->type == SOT_ACL_STATEMENT_REFRESH);
3439
3440 LEX_CSTRING thd_db_save = m_thd->db();
3441
3442 std::string use_db(schema->db);
3443 std::string query(schema->query);
3444
3445 if (!query.compare(0, 4, "use ")) {
3446 size_t delimiter = query.find_first_of(';');
3447 use_db = query.substr(4, delimiter - 4);
3448 query = query.substr(delimiter + 1);
3449 }
3450
3451 /* Execute ACL query */
3452 LEX_CSTRING set_db = {use_db.c_str(), use_db.length()};
3453 m_thd->reset_db(set_db);
3454 ndb_log_verbose(40, "Using database: %s", use_db.c_str());
3455 if (sql_runner.run_acl_statement(query)) {
3456 ndb_log_error("Failed to execute ACL query: %s", query.c_str());
3457 m_schema_op_result.set_result(Ndb_schema_dist::SCHEMA_OP_FAILURE,
3458 "Distribution of ACL change failed");
3459 m_thd->reset_db(thd_db_save);
3460 return;
3461 }
3462
3463 /* Reset database */
3464 m_thd->reset_db(thd_db_save);
3465
3466 if (schema->type == SOT_ACL_STATEMENT_REFRESH) {
3467 Ndb_stored_grants::maintain_cache(m_thd);
3468 }
3469 }
3470
create_tablespace_from_engine(Ndb_dd_client & dd_client,const char * tablespace_name,uint32 id,uint32 version)3471 bool create_tablespace_from_engine(Ndb_dd_client &dd_client,
3472 const char *tablespace_name, uint32 id,
3473 uint32 version) {
3474 DBUG_TRACE;
3475 DBUG_PRINT("enter", ("tablespace_name: %s, id: %u, version: %u",
3476 tablespace_name, id, version));
3477
3478 Ndb *ndb = m_thd_ndb->ndb;
3479 NdbDictionary::Dictionary *dict = ndb->getDictionary();
3480 std::vector<std::string> datafile_names;
3481 if (!ndb_get_datafile_names(dict, tablespace_name, &datafile_names)) {
3482 log_NDB_error(dict->getNdbError());
3483 ndb_log_error("Failed to get data files assigned to tablespace '%s'",
3484 tablespace_name);
3485 return false;
3486 }
3487
3488 if (!dd_client.mdl_lock_tablespace_exclusive(tablespace_name)) {
3489 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3490 ndb_log_error("MDL lock could not be acquired for tablespace '%s'",
3491 tablespace_name);
3492 return false;
3493 }
3494
3495 if (!dd_client.install_tablespace(tablespace_name, datafile_names, id,
3496 version, true /* force_overwrite */)) {
3497 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3498 ndb_log_error("Failed to install tablespace '%s' in DD", tablespace_name);
3499 return false;
3500 }
3501
3502 return true;
3503 }
3504
handle_create_tablespace(const Ndb_schema_op * schema)3505 void handle_create_tablespace(const Ndb_schema_op *schema) {
3506 DBUG_TRACE;
3507
3508 assert(!is_post_epoch()); // Always directly
3509
3510 if (schema->node_id == own_nodeid()) {
3511 return;
3512 }
3513
3514 write_schema_op_to_binlog(m_thd, schema);
3515
3516 Ndb_dd_client dd_client(m_thd);
3517 if (!create_tablespace_from_engine(dd_client, schema->name, schema->id,
3518 schema->version)) {
3519 ndb_log_error("Distribution of CREATE TABLESPACE '%s' failed",
3520 schema->name);
3521 m_schema_op_result.set_result(
3522 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3523 "Distribution of CREATE TABLESPACE " + std::string(1, '\'') +
3524 std::string(schema->name) + std::string(1, '\'') + " failed");
3525 return;
3526 }
3527 dd_client.commit();
3528 }
3529
get_tablespace_table_refs(const char * name,std::vector<dd::Tablespace_table_ref> & table_refs) const3530 bool get_tablespace_table_refs(
3531 const char *name,
3532 std::vector<dd::Tablespace_table_ref> &table_refs) const {
3533 Ndb_dd_client dd_client(m_thd);
3534 if (!dd_client.mdl_lock_tablespace(name, true /* intention_exclusive */)) {
3535 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3536 ndb_log_error("MDL lock could not be acquired on tablespace '%s'", name);
3537 return false;
3538 }
3539
3540 const dd::Tablespace *existing = nullptr;
3541 if (!dd_client.get_tablespace(name, &existing)) {
3542 log_and_clear_thd_conditions(m_thd, condition_logging_level::WARNING);
3543 return false;
3544 }
3545
3546 if (existing == nullptr) {
3547 // Tablespace doesn't exist, no need to update tables after the ALTER
3548 return true;
3549 }
3550
3551 if (!ndb_dd_disk_data_get_table_refs(m_thd, *existing, table_refs)) {
3552 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3553 ndb_log_error("Failed to get table refs in tablespace '%s'", name);
3554 return false;
3555 }
3556 return true;
3557 }
3558
update_tablespace_id_in_tables(Ndb_dd_client & dd_client,const char * tablespace_name,const std::vector<dd::Tablespace_table_ref> & table_refs) const3559 bool update_tablespace_id_in_tables(
3560 Ndb_dd_client &dd_client, const char *tablespace_name,
3561 const std::vector<dd::Tablespace_table_ref> &table_refs) const {
3562 if (!dd_client.mdl_lock_tablespace(tablespace_name,
3563 true /* intention_exclusive */)) {
3564 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3565 ndb_log_error("MDL lock could not be acquired on tablespace '%s'",
3566 tablespace_name);
3567 return false;
3568 }
3569
3570 dd::Object_id tablespace_id;
3571 if (!dd_client.lookup_tablespace_id(tablespace_name, &tablespace_id)) {
3572 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3573 ndb_log_error("Failed to retrieve object id of tablespace '%s'",
3574 tablespace_name);
3575 return false;
3576 }
3577
3578 for (auto &table_ref : table_refs) {
3579 // Convert table_refs to correct case when necessary
3580 const std::string schema_name =
3581 ndb_dd_fs_name_case(table_ref.m_schema_name.c_str());
3582 const std::string table_name =
3583 ndb_dd_fs_name_case(table_ref.m_name.c_str());
3584 if (!dd_client.mdl_locks_acquire_exclusive(schema_name.c_str(),
3585 table_name.c_str())) {
3586 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3587 ndb_log_error("MDL lock could not be acquired on table '%s.%s'",
3588 schema_name.c_str(), table_name.c_str());
3589 return false;
3590 }
3591
3592 if (!dd_client.set_tablespace_id_in_table(
3593 schema_name.c_str(), table_name.c_str(), tablespace_id)) {
3594 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3595 ndb_log_error("Could not set tablespace id in table '%s.%s'",
3596 schema_name.c_str(), table_name.c_str());
3597 return false;
3598 }
3599 }
3600 return true;
3601 }
3602
handle_alter_tablespace(const Ndb_schema_op * schema)3603 void handle_alter_tablespace(const Ndb_schema_op *schema) {
3604 DBUG_TRACE;
3605
3606 assert(!is_post_epoch()); // Always directly
3607
3608 if (schema->node_id == own_nodeid()) {
3609 return;
3610 }
3611
3612 write_schema_op_to_binlog(m_thd, schema);
3613
3614 // Get information about tables in the tablespace being ALTERed. This is
3615 // required for after the ALTER as the tablespace id of the every table
3616 // should be updated
3617 std::vector<dd::Tablespace_table_ref> table_refs;
3618 if (!get_tablespace_table_refs(schema->name, table_refs)) {
3619 ndb_log_error("Distribution of ALTER TABLESPACE '%s' failed",
3620 schema->name);
3621 m_schema_op_result.set_result(
3622 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3623 "Distribution of ALTER TABLESPACE " + std::string(1, '\'') +
3624 std::string(schema->name) + std::string(1, '\'') + " failed");
3625 return;
3626 }
3627
3628 Ndb_dd_client dd_client(m_thd);
3629 if (!create_tablespace_from_engine(dd_client, schema->name, schema->id,
3630 schema->version)) {
3631 ndb_log_error("Distribution of ALTER TABLESPACE '%s' failed",
3632 schema->name);
3633 m_schema_op_result.set_result(
3634 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3635 "Distribution of ALTER TABLESPACE " + std::string(1, '\'') +
3636 std::string(schema->name) + std::string(1, '\'') + " failed");
3637 return;
3638 }
3639
3640 if (!table_refs.empty()) {
3641 // Update tables in the tablespace with the new tablespace id
3642 if (!update_tablespace_id_in_tables(dd_client, schema->name,
3643 table_refs)) {
3644 ndb_log_error(
3645 "Failed to update tables in tablespace '%s' with the "
3646 "new tablespace id",
3647 schema->name);
3648 ndb_log_error("Distribution of ALTER TABLESPACE '%s' failed",
3649 schema->name);
3650 m_schema_op_result.set_result(
3651 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3652 "Distribution of ALTER TABLESPACE " + std::string(1, '\'') +
3653 std::string(schema->name) + std::string(1, '\'') + " failed");
3654 return;
3655 }
3656 }
3657 dd_client.commit();
3658 }
3659
handle_drop_tablespace(const Ndb_schema_op * schema)3660 void handle_drop_tablespace(const Ndb_schema_op *schema) {
3661 DBUG_TRACE;
3662
3663 assert(is_post_epoch()); // Always after epoch
3664
3665 if (schema->node_id == own_nodeid()) {
3666 return;
3667 }
3668
3669 write_schema_op_to_binlog(m_thd, schema);
3670
3671 Ndb_dd_client dd_client(m_thd);
3672 if (!dd_client.mdl_lock_tablespace_exclusive(schema->name)) {
3673 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3674 ndb_log_error("MDL lock could not be acquired for tablespace '%s'",
3675 schema->name);
3676 ndb_log_error("Distribution of DROP TABLESPACE '%s' failed",
3677 schema->name);
3678 m_schema_op_result.set_result(
3679 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3680 "Distribution of DROP TABLESPACE " + std::string(1, '\'') +
3681 std::string(schema->name) + std::string(1, '\'') + " failed");
3682 return;
3683 }
3684
3685 if (!dd_client.drop_tablespace(schema->name,
3686 false /* fail_if_not_exists */)) {
3687 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3688 ndb_log_error("Failed to drop tablespace '%s' from DD", schema->name);
3689 ndb_log_error("Distribution of DROP TABLESPACE '%s' failed",
3690 schema->name);
3691 m_schema_op_result.set_result(
3692 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3693 "Distribution of DROP TABLESPACE " + std::string(1, '\'') +
3694 std::string(schema->name) + std::string(1, '\'') + " failed");
3695 return;
3696 }
3697
3698 dd_client.commit();
3699 }
3700
create_logfile_group_from_engine(const char * logfile_group_name,uint32 id,uint32 version)3701 bool create_logfile_group_from_engine(const char *logfile_group_name,
3702 uint32 id, uint32 version) {
3703 DBUG_TRACE;
3704 DBUG_PRINT("enter", ("logfile_group_name: %s, id: %u, version: %u",
3705 logfile_group_name, id, version));
3706
3707 Ndb *ndb = m_thd_ndb->ndb;
3708 NdbDictionary::Dictionary *dict = ndb->getDictionary();
3709 std::vector<std::string> undofile_names;
3710 if (!ndb_get_undofile_names(dict, logfile_group_name, &undofile_names)) {
3711 log_NDB_error(dict->getNdbError());
3712 ndb_log_error("Failed to get undo files assigned to logfile group '%s'",
3713 logfile_group_name);
3714 return false;
3715 }
3716
3717 Ndb_dd_client dd_client(m_thd);
3718 if (!dd_client.mdl_lock_logfile_group_exclusive(logfile_group_name)) {
3719 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3720 ndb_log_error("MDL lock could not be acquired for logfile group '%s'",
3721 logfile_group_name);
3722 return false;
3723 }
3724
3725 if (!dd_client.install_logfile_group(logfile_group_name, undofile_names, id,
3726 version, true /* force_overwrite */)) {
3727 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3728 ndb_log_error("Failed to install logfile group '%s' in DD",
3729 logfile_group_name);
3730 return false;
3731 }
3732
3733 dd_client.commit();
3734 return true;
3735 }
3736
handle_create_logfile_group(const Ndb_schema_op * schema)3737 void handle_create_logfile_group(const Ndb_schema_op *schema) {
3738 DBUG_TRACE;
3739
3740 assert(!is_post_epoch()); // Always directly
3741
3742 if (schema->node_id == own_nodeid()) {
3743 return;
3744 }
3745
3746 write_schema_op_to_binlog(m_thd, schema);
3747
3748 if (!create_logfile_group_from_engine(schema->name, schema->id,
3749 schema->version)) {
3750 ndb_log_error("Distribution of CREATE LOGFILE GROUP '%s' failed",
3751 schema->name);
3752 m_schema_op_result.set_result(
3753 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3754 "Distribution of CREATE LOGFILE GROUP " + std::string(1, '\'') +
3755 std::string(schema->name) + std::string(1, '\'') + " failed");
3756 }
3757 }
3758
handle_alter_logfile_group(const Ndb_schema_op * schema)3759 void handle_alter_logfile_group(const Ndb_schema_op *schema) {
3760 DBUG_TRACE;
3761
3762 assert(!is_post_epoch()); // Always directly
3763
3764 if (schema->node_id == own_nodeid()) {
3765 return;
3766 }
3767
3768 write_schema_op_to_binlog(m_thd, schema);
3769
3770 if (!create_logfile_group_from_engine(schema->name, schema->id,
3771 schema->version)) {
3772 ndb_log_error("Distribution of ALTER LOGFILE GROUP '%s' failed",
3773 schema->name);
3774 m_schema_op_result.set_result(
3775 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3776 "Distribution of ALTER LOGFILE GROUP " + std::string(1, '\'') +
3777 std::string(schema->name) + std::string(1, '\'') + " failed");
3778 }
3779 }
3780
handle_drop_logfile_group(const Ndb_schema_op * schema)3781 void handle_drop_logfile_group(const Ndb_schema_op *schema) {
3782 DBUG_TRACE;
3783
3784 assert(is_post_epoch()); // Always after epoch
3785
3786 if (schema->node_id == own_nodeid()) {
3787 return;
3788 }
3789
3790 write_schema_op_to_binlog(m_thd, schema);
3791
3792 Ndb_dd_client dd_client(m_thd);
3793 if (!dd_client.mdl_lock_logfile_group_exclusive(schema->name)) {
3794 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3795 ndb_log_error("MDL lock could not be acquired for logfile group '%s'",
3796 schema->name);
3797 ndb_log_error("Distribution of DROP LOGFILE GROUP '%s' failed",
3798 schema->name);
3799 m_schema_op_result.set_result(
3800 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3801 "Distribution of DROP LOGFILE GROUP " + std::string(1, '\'') +
3802 std::string(schema->name) + std::string(1, '\'') + " failed");
3803 return;
3804 }
3805
3806 if (!dd_client.drop_logfile_group(schema->name,
3807 false /* fail_if_not_exists */)) {
3808 log_and_clear_thd_conditions(m_thd, condition_logging_level::ERROR);
3809 ndb_log_error("Failed to drop logfile group '%s' from DD", schema->name);
3810 ndb_log_error("Distribution of DROP LOGFILE GROUP '%s' failed",
3811 schema->name);
3812 m_schema_op_result.set_result(
3813 Ndb_schema_dist::SCHEMA_OP_FAILURE,
3814 "Distribution of DROP LOGFILE GROUP " + std::string(1, '\'') +
3815 std::string(schema->name) + std::string(1, '\'') + " failed");
3816 return;
3817 }
3818
3819 dd_client.commit();
3820 }
3821
handle_schema_op(const Ndb_schema_op * schema)3822 int handle_schema_op(const Ndb_schema_op *schema) {
3823 DBUG_TRACE;
3824 {
3825 const SCHEMA_OP_TYPE schema_type = (SCHEMA_OP_TYPE)schema->type;
3826
3827 ndb_log_verbose(19,
3828 "got schema event on '%s.%s(%u/%u)' query: '%s' "
3829 "type: %s(%d) node: %u slock: %x%08x",
3830 schema->db, schema->name, schema->id, schema->version,
3831 schema->query,
3832 Ndb_schema_dist_client::type_name(
3833 static_cast<SCHEMA_OP_TYPE>(schema->type)),
3834 schema_type, schema->node_id, schema->slock.bitmap[1],
3835 schema->slock.bitmap[0]);
3836
3837 DBUG_EXECUTE_IF("ndb_schema_op_start_crash", DBUG_SUICIDE(););
3838
3839 // Return to simulate schema operation timeout
3840 DBUG_EXECUTE_IF("ndb_schema_op_start_timeout", return 0;);
3841
3842 if ((schema->db[0] == 0) && (schema->name[0] == 0)) {
3843 /**
3844 * This happens if there is a schema event on a table (object)
3845 * that this mysqld does not know about.
3846 * E.g it had a local table shadowing a ndb table...
3847 */
3848 return 0;
3849 }
3850
3851 if (schema_type == SOT_CLEAR_SLOCK) {
3852 // Handle the ack after epoch to ensure that schema events are inserted
3853 // in the binlog after any data events
3854 handle_after_epoch(schema);
3855 return 0;
3856 }
3857
3858 if (schema->node_id == own_nodeid()) {
3859 // This is the Coordinator who hear about this schema operation for
3860 // the first time. Save the list of current subscribers as participants
3861 // in the NDB_SCHEMA_OBJECT, those are the nodes who need to acknowledge
3862 // (or fail) before the schema operation is completed.
3863 std::unique_ptr<NDB_SCHEMA_OBJECT,
3864 decltype(&NDB_SCHEMA_OBJECT::release)>
3865 ndb_schema_object(
3866 NDB_SCHEMA_OBJECT::get(schema->db, schema->name, schema->id,
3867 schema->version),
3868 NDB_SCHEMA_OBJECT::release);
3869 if (!ndb_schema_object) {
3870 // There is no NDB_SCHEMA_OBJECT waiting for this schema operation
3871 // Unexpected since the client who started this schema op
3872 // is always in same node as coordinator
3873 ndbcluster::ndbrequire(false);
3874 return 0;
3875 }
3876 std::unordered_set<uint32> subscribers;
3877 m_schema_dist_data.get_subscriber_list(subscribers);
3878 ndb_schema_object->register_participants(subscribers);
3879 ndb_log_verbose(
3880 19, "Participants: %s",
3881 ndb_schema_object->waiting_participants_to_string().c_str());
3882
3883 // Add active schema operation to coordinator
3884 m_schema_dist_data.add_active_schema_op(ndb_schema_object.get());
3885
3886 // Test schema dist client killed
3887 if (DBUG_EVALUATE_IF("ndb_schema_dist_client_killed", true, false)) {
3888 // Wait until the Client has set "coordinator completed"
3889 while (!ndb_schema_object->check_coordinator_completed())
3890 ndb_milli_sleep(100);
3891 }
3892 }
3893
3894 // Set the custom lock_wait_timeout for schema distribution
3895 Lock_wait_timeout_guard lwt_guard(m_thd,
3896 opt_ndb_schema_dist_lock_wait_timeout);
3897
3898 Ndb_schema_op_result schema_op_result;
3899 switch (schema_type) {
3900 case SOT_CLEAR_SLOCK:
3901 // Already handled above, should never end up here
3902 ndbcluster::ndbrequire(schema_type != SOT_CLEAR_SLOCK);
3903 return 0;
3904
3905 case SOT_ALTER_TABLE_COMMIT:
3906 case SOT_RENAME_TABLE_PREPARE:
3907 case SOT_ONLINE_ALTER_TABLE_PREPARE:
3908 case SOT_ONLINE_ALTER_TABLE_COMMIT:
3909 case SOT_RENAME_TABLE:
3910 case SOT_DROP_TABLE:
3911 case SOT_DROP_DB:
3912 case SOT_DROP_TABLESPACE:
3913 case SOT_DROP_LOGFILE_GROUP:
3914 handle_after_epoch(schema);
3915 return 0;
3916
3917 case SOT_TRUNCATE_TABLE:
3918 handle_truncate_table(schema);
3919 break;
3920
3921 case SOT_CREATE_TABLE:
3922 handle_create_table(schema);
3923 break;
3924
3925 case SOT_CREATE_DB:
3926 handle_create_db(schema);
3927 break;
3928
3929 case SOT_ALTER_DB:
3930 handle_alter_db(schema);
3931 break;
3932
3933 case SOT_CREATE_USER:
3934 case SOT_DROP_USER:
3935 case SOT_RENAME_USER:
3936 case SOT_GRANT:
3937 case SOT_REVOKE:
3938 case SOT_ACL_SNAPSHOT:
3939 case SOT_ACL_STATEMENT:
3940 case SOT_ACL_STATEMENT_REFRESH:
3941 handle_grant_op(schema);
3942 break;
3943
3944 case SOT_TABLESPACE:
3945 case SOT_LOGFILE_GROUP:
3946 if (schema->node_id == own_nodeid()) break;
3947 write_schema_op_to_binlog(m_thd, schema);
3948 break;
3949
3950 case SOT_RENAME_TABLE_NEW:
3951 /*
3952 Only very old MySQL Server connected to the cluster may
3953 send this schema operation, ignore it
3954 */
3955 ndb_log_error(
3956 "Skipping old schema operation"
3957 "(RENAME_TABLE_NEW) on %s.%s",
3958 schema->db, schema->name);
3959 DBUG_ASSERT(false);
3960 break;
3961
3962 case SOT_CREATE_TABLESPACE:
3963 handle_create_tablespace(schema);
3964 break;
3965
3966 case SOT_ALTER_TABLESPACE:
3967 handle_alter_tablespace(schema);
3968 break;
3969
3970 case SOT_CREATE_LOGFILE_GROUP:
3971 handle_create_logfile_group(schema);
3972 break;
3973
3974 case SOT_ALTER_LOGFILE_GROUP:
3975 handle_alter_logfile_group(schema);
3976 break;
3977 }
3978
3979 if (schema->schema_op_id) {
3980 // Use new protocol
3981 if (!ack_schema_op_with_result(schema)) {
3982 // Fallback to old protocol as stop gap, no result will be returned
3983 // but at least the coordinator will be informed
3984 ack_schema_op(schema);
3985 }
3986 } else {
3987 // Use old protocol
3988 ack_schema_op(schema);
3989 }
3990 }
3991
3992 // Errors should have been reported to log and then cleared
3993 DBUG_ASSERT(!m_thd->is_error());
3994
3995 return 0;
3996 }
3997
handle_schema_op_post_epoch(const Ndb_schema_op * schema)3998 void handle_schema_op_post_epoch(const Ndb_schema_op *schema) {
3999 DBUG_TRACE;
4000 DBUG_PRINT("enter", ("%s.%s: query: '%s' type: %d", schema->db,
4001 schema->name, schema->query, schema->type));
4002
4003 // Set the custom lock_wait_timeout for schema distribution
4004 Lock_wait_timeout_guard lwt_guard(m_thd,
4005 opt_ndb_schema_dist_lock_wait_timeout);
4006
4007 {
4008 const SCHEMA_OP_TYPE schema_type = (SCHEMA_OP_TYPE)schema->type;
4009 ndb_log_verbose(9, "%s - %s.%s",
4010 Ndb_schema_dist_client::type_name(
4011 static_cast<SCHEMA_OP_TYPE>(schema->type)),
4012 schema->db, schema->name);
4013
4014 switch (schema_type) {
4015 case SOT_DROP_DB:
4016 handle_drop_db(schema);
4017 break;
4018
4019 case SOT_DROP_TABLE:
4020 handle_drop_table(schema);
4021 break;
4022
4023 case SOT_RENAME_TABLE_PREPARE:
4024 handle_rename_table_prepare(schema);
4025 break;
4026
4027 case SOT_RENAME_TABLE:
4028 handle_rename_table(schema);
4029 break;
4030
4031 case SOT_ALTER_TABLE_COMMIT:
4032 handle_offline_alter_table_commit(schema);
4033 break;
4034
4035 case SOT_ONLINE_ALTER_TABLE_PREPARE:
4036 handle_online_alter_table_prepare(schema);
4037 break;
4038
4039 case SOT_ONLINE_ALTER_TABLE_COMMIT:
4040 handle_online_alter_table_commit(schema);
4041 break;
4042
4043 case SOT_DROP_TABLESPACE:
4044 handle_drop_tablespace(schema);
4045 break;
4046
4047 case SOT_DROP_LOGFILE_GROUP:
4048 handle_drop_logfile_group(schema);
4049 break;
4050
4051 default:
4052 DBUG_ASSERT(false);
4053 }
4054 }
4055
4056 // Errors should have been reported to log and then cleared
4057 DBUG_ASSERT(!m_thd->is_error());
4058
4059 // There should be no MDL locks left now
4060 DBUG_ASSERT(!m_thd->mdl_context.has_locks());
4061
4062 return;
4063 }
4064
4065 THD *const m_thd;
4066 Thd_ndb *const m_thd_ndb;
4067 MEM_ROOT *m_mem_root;
4068 uint m_own_nodeid;
4069 Ndb_schema_dist_data &m_schema_dist_data;
4070 Ndb_schema_op_result m_schema_op_result;
4071 bool m_post_epoch;
4072
is_post_epoch(void) const4073 bool is_post_epoch(void) const { return m_post_epoch; }
4074
4075 List<const Ndb_schema_op> m_post_epoch_handle_list;
4076
4077 public:
4078 Ndb_schema_event_handler() = delete;
4079 Ndb_schema_event_handler(const Ndb_schema_event_handler &) = delete;
4080
Ndb_schema_event_handler(THD * thd,MEM_ROOT * mem_root,uint own_nodeid,Ndb_schema_dist_data & schema_dist_data)4081 Ndb_schema_event_handler(THD *thd, MEM_ROOT *mem_root, uint own_nodeid,
4082 Ndb_schema_dist_data &schema_dist_data)
4083 : m_thd(thd),
4084 m_thd_ndb(get_thd_ndb(thd)),
4085 m_mem_root(mem_root),
4086 m_own_nodeid(own_nodeid),
4087 m_schema_dist_data(schema_dist_data),
4088 m_post_epoch(false) {}
4089
~Ndb_schema_event_handler()4090 ~Ndb_schema_event_handler() {
4091 // There should be no work left todo...
4092 DBUG_ASSERT(m_post_epoch_handle_list.elements == 0);
4093 }
4094
handle_schema_result_insert(uint32 nodeid,uint32 schema_op_id,uint32 participant_node_id,uint32 result,const std::string & message)4095 void handle_schema_result_insert(uint32 nodeid, uint32 schema_op_id,
4096 uint32 participant_node_id, uint32 result,
4097 const std::string &message) {
4098 DBUG_TRACE;
4099 if (nodeid != own_nodeid()) {
4100 // Only the coordinator handle these events
4101 return;
4102 }
4103
4104 // Unpack the message received
4105 Ndb_schema_result_table schema_result_table(m_thd_ndb);
4106 const std::string unpacked_message =
4107 schema_result_table.unpack_message(message);
4108
4109 ndb_log_verbose(
4110 19,
4111 "Received ndb_schema_result insert, nodeid: %d, schema_op_id: %d, "
4112 "participant_node_id: %d, result: %d, message: '%s'",
4113 nodeid, schema_op_id, participant_node_id, result,
4114 unpacked_message.c_str());
4115
4116 // Lookup NDB_SCHEMA_OBJECT from nodeid + schema_op_id
4117 std::unique_ptr<NDB_SCHEMA_OBJECT, decltype(&NDB_SCHEMA_OBJECT::release)>
4118 ndb_schema_object(NDB_SCHEMA_OBJECT::get(nodeid, schema_op_id),
4119 NDB_SCHEMA_OBJECT::release);
4120 if (ndb_schema_object == nullptr) {
4121 // The schema operation has already completed on this node
4122 return;
4123 }
4124
4125 ndb_schema_object->result_received_from_node(participant_node_id, result,
4126 unpacked_message);
4127
4128 if (ndb_schema_object->check_all_participants_completed()) {
4129 // All participants have completed(or failed) -> send final ack
4130 ack_schema_op_final(ndb_schema_object->db(), ndb_schema_object->name());
4131 }
4132 }
4133
handle_schema_result_event(Ndb * s_ndb,NdbEventOperation * pOp,NdbDictionary::Event::TableEvent event_type,const Ndb_event_data * event_data)4134 void handle_schema_result_event(Ndb *s_ndb, NdbEventOperation *pOp,
4135 NdbDictionary::Event::TableEvent event_type,
4136 const Ndb_event_data *event_data) {
4137 // Test "coordinator abort active" by simulating cluster failure
4138 if (DBUG_EVALUATE_IF("ndb_schema_dist_coord_abort_active", true, false)) {
4139 ndb_log_info("Simulating cluster failure...");
4140 event_type = NdbDictionary::Event::TE_CLUSTER_FAILURE;
4141 }
4142
4143 switch (event_type) {
4144 case NdbDictionary::Event::TE_INSERT:
4145 handle_schema_result_insert(
4146 event_data->unpack_uint32(0), event_data->unpack_uint32(1),
4147 event_data->unpack_uint32(2), event_data->unpack_uint32(3),
4148 event_data->unpack_string(4));
4149 break;
4150
4151 case NdbDictionary::Event::TE_CLUSTER_FAILURE:
4152 // fall through
4153 case NdbDictionary::Event::TE_DROP:
4154 // Cluster failure or ndb_schema_result table dropped
4155 if (ndb_binlog_tables_inited && ndb_binlog_running)
4156 ndb_log_verbose(1, "NDB Binlog: util tables need to reinitialize");
4157
4158 // Indicate util tables not ready
4159 mysql_mutex_lock(&injector_data_mutex);
4160 ndb_binlog_tables_inited = false;
4161 ndb_binlog_is_ready = false;
4162 mysql_mutex_unlock(&injector_data_mutex);
4163
4164 ndb_tdc_close_cached_tables();
4165
4166 // Tear down the event subscription on ndb_schema_result
4167 ndbcluster_binlog_event_operation_teardown(m_thd, s_ndb, pOp);
4168 break;
4169
4170 default:
4171 // Ignore other event types
4172 break;
4173 }
4174 return;
4175 }
4176
handle_event(Ndb * s_ndb,NdbEventOperation * pOp)4177 void handle_event(Ndb *s_ndb, NdbEventOperation *pOp) {
4178 DBUG_TRACE;
4179
4180 const Ndb_event_data *event_data =
4181 static_cast<const Ndb_event_data *>(pOp->getCustomData());
4182 if (Ndb_schema_dist_client::is_schema_dist_result_table(
4183 event_data->share->db, event_data->share->table_name)) {
4184 // Received event on ndb_schema_result table
4185 handle_schema_result_event(s_ndb, pOp, pOp->getEventType(), event_data);
4186 return;
4187 }
4188
4189 if (!check_is_ndb_schema_event(event_data)) return;
4190
4191 NDBEVENT::TableEvent ev_type = pOp->getEventType();
4192
4193 // Test "fail all schema ops" by simulating cluster failure
4194 // before the schema operation has been registered
4195 if (DBUG_EVALUATE_IF("ndb_schema_dist_coord_fail_all", true, false)) {
4196 ndb_log_info("Simulating cluster failure...");
4197 ev_type = NdbDictionary::Event::TE_CLUSTER_FAILURE;
4198 }
4199
4200 // Test "client detect not ready" by simulating cluster failure
4201 if (DBUG_EVALUATE_IF("ndb_schema_dist_client_not_ready", true, false)) {
4202 ndb_log_info("Simulating cluster failure...");
4203 ev_type = NdbDictionary::Event::TE_CLUSTER_FAILURE;
4204 // There should be one NDB_SCHEMA_OBJECT registered
4205 ndbcluster::ndbrequire(NDB_SCHEMA_OBJECT::count_active_schema_ops() == 1);
4206 }
4207
4208 switch (ev_type) {
4209 case NDBEVENT::TE_INSERT:
4210 case NDBEVENT::TE_UPDATE: {
4211 /* ndb_schema table, row INSERTed or UPDATEed*/
4212 const Ndb_schema_op *schema_op =
4213 Ndb_schema_op::create(event_data, pOp->getAnyValue());
4214 handle_schema_op(schema_op);
4215 break;
4216 }
4217
4218 case NDBEVENT::TE_DELETE:
4219 /* ndb_schema table, row DELETEd */
4220 break;
4221
4222 case NDBEVENT::TE_CLUSTER_FAILURE:
4223 ndb_log_verbose(1, "cluster failure at epoch %u/%u.",
4224 (uint)(pOp->getGCI() >> 32), (uint)(pOp->getGCI()));
4225
4226 // fall through
4227 case NDBEVENT::TE_DROP:
4228 /* ndb_schema table DROPped */
4229 if (ndb_binlog_tables_inited && ndb_binlog_running)
4230 ndb_log_verbose(1, "NDB Binlog: util tables need to reinitialize");
4231
4232 // Indicate util tables not ready
4233 mysql_mutex_lock(&injector_data_mutex);
4234 ndb_binlog_tables_inited = false;
4235 ndb_binlog_is_ready = false;
4236 mysql_mutex_unlock(&injector_data_mutex);
4237
4238 ndb_tdc_close_cached_tables();
4239
4240 ndbcluster_binlog_event_operation_teardown(m_thd, s_ndb, pOp);
4241
4242 if (DBUG_EVALUATE_IF("ndb_schema_dist_client_not_ready", true, false)) {
4243 ndb_log_info("Wait for client to detect not ready...");
4244 while (NDB_SCHEMA_OBJECT::count_active_schema_ops() > 0)
4245 ndb_milli_sleep(100);
4246 }
4247 break;
4248
4249 case NDBEVENT::TE_ALTER:
4250 /* ndb_schema table ALTERed */
4251 break;
4252
4253 case NDBEVENT::TE_NODE_FAILURE: {
4254 /* Remove all subscribers for node */
4255 m_schema_dist_data.report_data_node_failure(pOp->getNdbdNodeId());
4256 check_wakeup_clients(Ndb_schema_dist::NODE_FAILURE, "Data node failed");
4257 break;
4258 }
4259
4260 case NDBEVENT::TE_SUBSCRIBE: {
4261 /* Add node as subscriber */
4262 m_schema_dist_data.report_subscribe(pOp->getNdbdNodeId(),
4263 pOp->getReqNodeId());
4264 // No 'check_wakeup_clients', adding subscribers doesn't complete
4265 // anything
4266 break;
4267 }
4268
4269 case NDBEVENT::TE_UNSUBSCRIBE: {
4270 /* Remove node as subscriber */
4271 m_schema_dist_data.report_unsubscribe(pOp->getNdbdNodeId(),
4272 pOp->getReqNodeId());
4273 check_wakeup_clients(Ndb_schema_dist::NODE_UNSUBSCRIBE,
4274 "Node unsubscribed");
4275 break;
4276 }
4277
4278 default: {
4279 ndb_log_error("unknown event %u, ignoring...", ev_type);
4280 }
4281 }
4282
4283 return;
4284 }
4285
check_active_schema_ops(ulonglong current_epoch)4286 void check_active_schema_ops(ulonglong current_epoch) {
4287 // This function is called repeatedly as epochs pass but checks should only
4288 // be performed at regular intervals. Check if it's time for one now and
4289 // calculate the time for next if time is up
4290 if (likely(!m_schema_dist_data.time_for_check())) return;
4291
4292 const uint active_ops = m_schema_dist_data.active_schema_ops().size();
4293 if (likely(active_ops == 0)) return; // Nothing to do at this time
4294
4295 ndb_log_info(
4296 "Coordinator checking active schema operations, "
4297 "epochs: (%u/%u,%u/%u,%u/%u), proc_info: '%s'",
4298 (uint)(ndb_latest_handled_binlog_epoch >> 32),
4299 (uint)(ndb_latest_handled_binlog_epoch),
4300 (uint)(ndb_latest_received_binlog_epoch >> 32),
4301 (uint)(ndb_latest_received_binlog_epoch), (uint)(current_epoch >> 32),
4302 (uint)(current_epoch), m_thd->proc_info);
4303
4304 for (const NDB_SCHEMA_OBJECT *schema_object :
4305 m_schema_dist_data.active_schema_ops()) {
4306 // Print into about this schema operation
4307 ndb_log_info(" - schema operation active on '%s.%s'", schema_object->db(),
4308 schema_object->name());
4309 if (ndb_log_get_verbose_level() > 30) {
4310 ndb_log_error_dump("%s", schema_object->to_string().c_str());
4311 }
4312
4313 // Check if schema operation has timed out
4314 const bool completed = schema_object->check_timeout(
4315 opt_ndb_schema_dist_timeout, Ndb_schema_dist::NODE_TIMEOUT,
4316 "Participant timeout");
4317 if (completed) {
4318 ndb_log_warning("Schema dist coordinator detected timeout");
4319 // Timeout occured -> send final ack to complete the schema opration
4320 ack_schema_op_final(schema_object->db(), schema_object->name());
4321 }
4322 }
4323 }
4324
post_epoch(ulonglong ndb_latest_epoch)4325 void post_epoch(ulonglong ndb_latest_epoch) {
4326 if (unlikely(m_post_epoch_handle_list.elements > 0)) {
4327 // Set the flag used to check that functions are called at correct time
4328 m_post_epoch = true;
4329
4330 /*
4331 process any operations that should be done after
4332 the epoch is complete
4333 */
4334 const Ndb_schema_op *schema;
4335 while ((schema = m_post_epoch_handle_list.pop())) {
4336 if (schema->type == SOT_CLEAR_SLOCK) {
4337 handle_clear_slock(schema);
4338 continue; // Handled an ack -> don't send new ack
4339 }
4340
4341 handle_schema_op_post_epoch(schema);
4342 if (schema->schema_op_id) {
4343 // Use new protocol
4344 if (!ack_schema_op_with_result(schema)) {
4345 // Fallback to old protocol as stop gap, no result will be returned
4346 // but at least the coordinator will be informed
4347 ack_schema_op(schema);
4348 }
4349 } else {
4350 // Use old protocol
4351 ack_schema_op(schema);
4352 }
4353 }
4354 }
4355
4356 check_active_schema_ops(ndb_latest_epoch);
4357
4358 // There should be no work left todo...
4359 DBUG_ASSERT(m_post_epoch_handle_list.elements == 0);
4360 }
4361 };
4362
4363 /*********************************************************************
4364 Internal helper functions for handling of the cluster replication tables
4365 - ndb_binlog_index
4366 - ndb_apply_status
4367 *********************************************************************/
4368
4369 /*
4370 struct to hold the data to be inserted into the
4371 ndb_binlog_index table
4372 */
4373 struct ndb_binlog_index_row {
4374 ulonglong epoch;
4375 const char *start_master_log_file;
4376 ulonglong start_master_log_pos;
4377 ulong n_inserts;
4378 ulong n_updates;
4379 ulong n_deletes;
4380 ulong n_schemaops;
4381
4382 ulong orig_server_id;
4383 ulonglong orig_epoch;
4384
4385 ulong gci;
4386
4387 const char *next_master_log_file;
4388 ulonglong next_master_log_pos;
4389
4390 struct ndb_binlog_index_row *next;
4391 };
4392
4393 /**
4394 Utility class encapsulating the code which open and writes
4395 to the mysql.ndb_binlog_index table
4396 */
4397 class Ndb_binlog_index_table_util {
4398 static constexpr const char *const DB_NAME = "mysql";
4399 static constexpr const char *const TABLE_NAME = "ndb_binlog_index";
4400 /*
4401 Open the ndb_binlog_index table for writing
4402 */
open_binlog_index_table(THD * thd,TABLE ** ndb_binlog_index)4403 static int open_binlog_index_table(THD *thd, TABLE **ndb_binlog_index) {
4404 const char *save_proc_info =
4405 thd_proc_info(thd, "Opening 'mysql.ndb_binlog_index'");
4406
4407 TABLE_LIST tables(DB_NAME, // db
4408 TABLE_NAME, // name, alias
4409 TL_WRITE); // for write
4410
4411 /* Only allow real table to be opened */
4412 tables.required_type = dd::enum_table_type::BASE_TABLE;
4413
4414 const uint flags =
4415 MYSQL_LOCK_IGNORE_TIMEOUT; /* Wait for lock "infinitely" */
4416 if (open_and_lock_tables(thd, &tables, flags)) {
4417 if (thd->killed)
4418 DBUG_PRINT("error", ("NDB Binlog: Opening ndb_binlog_index: killed"));
4419 else
4420 ndb_log_error("NDB Binlog: Opening ndb_binlog_index: %d, '%s'",
4421 thd->get_stmt_da()->mysql_errno(),
4422 thd->get_stmt_da()->message_text());
4423 thd_proc_info(thd, save_proc_info);
4424 return -1;
4425 }
4426 *ndb_binlog_index = tables.table;
4427 thd_proc_info(thd, save_proc_info);
4428 return 0;
4429 }
4430
4431 /*
4432 Write rows to the ndb_binlog_index table
4433 */
write_rows_impl(THD * thd,ndb_binlog_index_row * row)4434 static int write_rows_impl(THD *thd, ndb_binlog_index_row *row) {
4435 int error = 0;
4436 ndb_binlog_index_row *first = row;
4437 TABLE *ndb_binlog_index = 0;
4438 // Save previous option settings
4439 ulonglong option_bits = thd->variables.option_bits;
4440
4441 /*
4442 Assume this function is not called with an error set in thd
4443 (but clear for safety in release version)
4444 */
4445 assert(!thd->is_error());
4446 thd->clear_error();
4447
4448 /*
4449 Turn off binlogging to prevent the table changes to be written to
4450 the binary log.
4451 */
4452 Disable_binlog_guard binlog_guard(thd);
4453
4454 if (open_binlog_index_table(thd, &ndb_binlog_index)) {
4455 if (thd->killed)
4456 DBUG_PRINT(
4457 "error",
4458 ("NDB Binlog: Unable to lock table ndb_binlog_index, killed"));
4459 else
4460 ndb_log_error("NDB Binlog: Unable to lock table ndb_binlog_index");
4461 error = -1;
4462 goto add_ndb_binlog_index_err;
4463 }
4464
4465 // Set all columns to be written
4466 ndb_binlog_index->use_all_columns();
4467
4468 // Turn off autocommit to do all writes in one transaction
4469 thd->variables.option_bits |= OPTION_NOT_AUTOCOMMIT;
4470 do {
4471 ulonglong epoch = 0, orig_epoch = 0;
4472 uint orig_server_id = 0;
4473
4474 // Intialize ndb_binlog_index->record[0]
4475 empty_record(ndb_binlog_index);
4476
4477 ndb_binlog_index->field[NBICOL_START_POS]->store(
4478 first->start_master_log_pos, true);
4479 ndb_binlog_index->field[NBICOL_START_FILE]->store(
4480 first->start_master_log_file,
4481 (uint)strlen(first->start_master_log_file), &my_charset_bin);
4482 ndb_binlog_index->field[NBICOL_EPOCH]->store(epoch = first->epoch, true);
4483 if (ndb_binlog_index->s->fields > NBICOL_ORIG_SERVERID) {
4484 /* Table has ORIG_SERVERID / ORIG_EPOCH columns.
4485 * Write rows with different ORIG_SERVERID / ORIG_EPOCH
4486 * separately
4487 */
4488 ndb_binlog_index->field[NBICOL_NUM_INSERTS]->store(row->n_inserts,
4489 true);
4490 ndb_binlog_index->field[NBICOL_NUM_UPDATES]->store(row->n_updates,
4491 true);
4492 ndb_binlog_index->field[NBICOL_NUM_DELETES]->store(row->n_deletes,
4493 true);
4494 ndb_binlog_index->field[NBICOL_NUM_SCHEMAOPS]->store(row->n_schemaops,
4495 true);
4496 ndb_binlog_index->field[NBICOL_ORIG_SERVERID]->store(
4497 orig_server_id = row->orig_server_id, true);
4498 ndb_binlog_index->field[NBICOL_ORIG_EPOCH]->store(
4499 orig_epoch = row->orig_epoch, true);
4500 ndb_binlog_index->field[NBICOL_GCI]->store(first->gci, true);
4501
4502 if (ndb_binlog_index->s->fields > NBICOL_NEXT_POS) {
4503 /* Table has next log pos fields, fill them in */
4504 ndb_binlog_index->field[NBICOL_NEXT_POS]->store(
4505 first->next_master_log_pos, true);
4506 ndb_binlog_index->field[NBICOL_NEXT_FILE]->store(
4507 first->next_master_log_file,
4508 (uint)strlen(first->next_master_log_file), &my_charset_bin);
4509 }
4510 row = row->next;
4511 } else {
4512 /* Old schema : Table has no separate
4513 * ORIG_SERVERID / ORIG_EPOCH columns.
4514 * Merge operation counts and write one row
4515 */
4516 while ((row = row->next)) {
4517 first->n_inserts += row->n_inserts;
4518 first->n_updates += row->n_updates;
4519 first->n_deletes += row->n_deletes;
4520 first->n_schemaops += row->n_schemaops;
4521 }
4522 ndb_binlog_index->field[NBICOL_NUM_INSERTS]->store(
4523 (ulonglong)first->n_inserts, true);
4524 ndb_binlog_index->field[NBICOL_NUM_UPDATES]->store(
4525 (ulonglong)first->n_updates, true);
4526 ndb_binlog_index->field[NBICOL_NUM_DELETES]->store(
4527 (ulonglong)first->n_deletes, true);
4528 ndb_binlog_index->field[NBICOL_NUM_SCHEMAOPS]->store(
4529 (ulonglong)first->n_schemaops, true);
4530 }
4531
4532 error = ndb_binlog_index->file->ha_write_row(ndb_binlog_index->record[0]);
4533
4534 /* Fault injection to test logging */
4535 if (DBUG_EVALUATE_IF("ndb_injector_binlog_index_write_fail_random", true,
4536 false)) {
4537 if ((((uint32)rand()) % 10) == 9) {
4538 ndb_log_error("NDB Binlog: Injecting random write failure");
4539 error =
4540 ndb_binlog_index->file->ha_write_row(ndb_binlog_index->record[0]);
4541 }
4542 }
4543
4544 if (error) {
4545 ndb_log_error(
4546 "NDB Binlog: Failed writing to ndb_binlog_index for "
4547 "epoch %u/%u orig_server_id %u orig_epoch %u/%u "
4548 "with error %d.",
4549 uint(epoch >> 32), uint(epoch), orig_server_id,
4550 uint(orig_epoch >> 32), uint(orig_epoch), error);
4551
4552 bool seen_error_row = false;
4553 ndb_binlog_index_row *cursor = first;
4554 do {
4555 char tmp[128];
4556 if (ndb_binlog_index->s->fields > NBICOL_ORIG_SERVERID)
4557 snprintf(tmp, sizeof(tmp), "%u/%u,%u,%u/%u", uint(epoch >> 32),
4558 uint(epoch), uint(cursor->orig_server_id),
4559 uint(cursor->orig_epoch >> 32), uint(cursor->orig_epoch));
4560
4561 else
4562 snprintf(tmp, sizeof(tmp), "%u/%u", uint(epoch >> 32), uint(epoch));
4563
4564 bool error_row = (row == (cursor->next));
4565 ndb_log_error(
4566 "NDB Binlog: Writing row (%s) to ndb_binlog_index - %s", tmp,
4567 (error_row ? "ERROR" : (seen_error_row ? "Discarded" : "OK")));
4568 seen_error_row |= error_row;
4569
4570 } while ((cursor = cursor->next));
4571
4572 error = -1;
4573 goto add_ndb_binlog_index_err;
4574 }
4575 } while (row);
4576
4577 add_ndb_binlog_index_err:
4578 /*
4579 Explicitly commit or rollback the writes.
4580 If we fail to commit we rollback.
4581 Note, trans_rollback_stmt() is defined to never fail.
4582 */
4583 thd->get_stmt_da()->set_overwrite_status(true);
4584 if (error) {
4585 // Error, rollback
4586 trans_rollback_stmt(thd);
4587 } else {
4588 assert(!thd->is_error());
4589 // Commit
4590 const bool failed = trans_commit_stmt(thd);
4591 if (failed || thd->transaction_rollback_request) {
4592 /*
4593 Transaction failed to commit or
4594 was rolled back internally by the engine
4595 print an error message in the log and return the
4596 error, which will cause replication to stop.
4597 */
4598 error = thd->get_stmt_da()->mysql_errno();
4599 ndb_log_error(
4600 "NDB Binlog: Failed committing transaction to "
4601 "ndb_binlog_index with error %d.",
4602 error);
4603 trans_rollback_stmt(thd);
4604 }
4605 }
4606
4607 thd->get_stmt_da()->set_overwrite_status(false);
4608
4609 // Restore previous option settings
4610 thd->variables.option_bits = option_bits;
4611
4612 // Close the tables this thread has opened
4613 close_thread_tables(thd);
4614
4615 // Release MDL locks on the opened table
4616 thd->mdl_context.release_transactional_locks();
4617
4618 return error;
4619 }
4620
4621 /*
4622 Write rows to the ndb_binlog_index table using a separate THD
4623 to avoid the write being killed
4624 */
write_rows_with_new_thd(ndb_binlog_index_row * rows)4625 static void write_rows_with_new_thd(ndb_binlog_index_row *rows) {
4626 // Create a new THD and retry the write
4627 THD *new_thd = new THD;
4628 new_thd->set_new_thread_id();
4629 new_thd->thread_stack = (char *)&new_thd;
4630 new_thd->store_globals();
4631 new_thd->set_command(COM_DAEMON);
4632 new_thd->system_thread = SYSTEM_THREAD_NDBCLUSTER_BINLOG;
4633 new_thd->get_protocol_classic()->set_client_capabilities(0);
4634 new_thd->security_context()->skip_grants();
4635 new_thd->set_current_stmt_binlog_format_row();
4636
4637 // Retry the write
4638 const int retry_result = write_rows_impl(new_thd, rows);
4639 if (retry_result) {
4640 ndb_log_error(
4641 "NDB Binlog: Failed writing to ndb_binlog_index table "
4642 "while retrying after kill during shutdown");
4643 DBUG_ASSERT(false); // Crash in debug compile
4644 }
4645
4646 new_thd->restore_globals();
4647 delete new_thd;
4648 }
4649
4650 public:
4651 /*
4652 Write rows to the ndb_binlog_index table
4653 */
write_rows(THD * thd,ndb_binlog_index_row * rows)4654 static inline int write_rows(THD *thd, ndb_binlog_index_row *rows) {
4655 return write_rows_impl(thd, rows);
4656 }
4657
4658 /*
4659 Retry write rows to the ndb_binlog_index table after the THD
4660 has been killed (which should only happen during mysqld shutdown).
4661
4662 NOTE! The reason that the session(aka. THD) is being killed is that
4663 it's in the global list of session and mysqld thus ask it to stop
4664 during shutdown by setting the "killed" flag. It's not possible to
4665 prevent the THD from being killed and instead a brand new THD is
4666 used which is not in the global list of sessions. Furthermore it's
4667 a feature to have the THD in the list of global session since it
4668 should show up in SHOW PROCESSLIST.
4669 */
write_rows_retry_after_kill(THD * orig_thd,ndb_binlog_index_row * rows)4670 static void write_rows_retry_after_kill(THD *orig_thd,
4671 ndb_binlog_index_row *rows) {
4672 // Should only be called when original THD has been killed
4673 DBUG_ASSERT(orig_thd->is_killed());
4674
4675 write_rows_with_new_thd(rows);
4676
4677 // Relink this thread with original THD
4678 orig_thd->store_globals();
4679 }
4680
4681 /*
4682 @brief Remove all rows from mysql.ndb_binlog_index table that contain
4683 references to the given binlog filename.
4684
4685 @note this function modifies THD state. Caller must ensure that
4686 the passed in THD is not affected by these changes. Presumably
4687 the state fixes should be moved down into Ndb_local_connection.
4688
4689 @param thd The thread handle
4690 @param filename Name of the binlog file whose references should be removed
4691
4692 @return true if failure to delete from the table occurs
4693 */
4694
remove_rows_for_file(THD * thd,const char * filename)4695 static bool remove_rows_for_file(THD *thd, const char *filename) {
4696 Ndb_local_connection mysqld(thd);
4697
4698 // Set isolation level to be independent from server settings
4699 thd->variables.transaction_isolation = ISO_REPEATABLE_READ;
4700
4701 // Turn autocommit on, this will make delete_rows() commit
4702 thd->variables.option_bits &= ~OPTION_NOT_AUTOCOMMIT;
4703
4704 // Ensure that file paths are escaped in a way that does not
4705 // interfere with path separator on Windows
4706 thd->variables.sql_mode |= MODE_NO_BACKSLASH_ESCAPES;
4707
4708 // ignore "table does not exist" as it is a "consistent" behavior
4709 const bool ignore_no_such_table = true;
4710 std::string where;
4711 where.append("File='").append(filename).append("'");
4712 if (mysqld.delete_rows(DB_NAME, TABLE_NAME, ignore_no_such_table, where)) {
4713 // Failed
4714 return true;
4715 }
4716 return false;
4717 }
4718 };
4719 constexpr const char *const Ndb_binlog_index_table_util::DB_NAME;
4720 constexpr const char *const Ndb_binlog_index_table_util::TABLE_NAME;
4721
4722 // Wrapper function allowing Ndb_binlog_index_table_util::remove_rows_for_file()
4723 // to be forward declared
ndbcluster_binlog_index_remove_file(THD * thd,const char * filename)4724 static bool ndbcluster_binlog_index_remove_file(THD *thd,
4725 const char *filename) {
4726 return Ndb_binlog_index_table_util::remove_rows_for_file(thd, filename);
4727 }
4728
4729 /*********************************************************************
4730 Functions for start, stop, wait for ndbcluster binlog thread
4731 *********************************************************************/
4732
ndbcluster_binlog_start()4733 int ndbcluster_binlog_start() {
4734 DBUG_TRACE;
4735
4736 if (::server_id == 0) {
4737 ndb_log_warning(
4738 "server id set to zero - changes logged to "
4739 "binlog with server id zero will be logged with "
4740 "another server id by slave mysqlds");
4741 }
4742
4743 /*
4744 Check that ServerId is not using the reserved bit or bits reserved
4745 for application use
4746 */
4747 if ((::server_id & 0x1 << 31) || // Reserved bit
4748 !ndbcluster_anyvalue_is_serverid_in_range(::server_id)) // server_id_bits
4749 {
4750 ndb_log_error(
4751 "server id provided is too large to be represented in "
4752 "opt_server_id_bits or is reserved");
4753 return -1;
4754 }
4755
4756 /*
4757 Check that v2 events are enabled if log-transaction-id is set
4758 */
4759 if (opt_ndb_log_transaction_id && log_bin_use_v1_row_events) {
4760 ndb_log_error(
4761 "--ndb-log-transaction-id requires v2 Binlog row events "
4762 "but server is using v1.");
4763 return -1;
4764 }
4765
4766 ndb_binlog_thread.init();
4767
4768 /**
4769 * Note that injector_event_mutex is init'ed as a 'SLOW' mutex.
4770 * This is required as a FAST mutex could starve a waiter thread
4771 * forever if the thread holding the lock holds it for long.
4772 * See my_thread_global_init() which explicit warns about this.
4773 */
4774 mysql_mutex_init(PSI_INSTRUMENT_ME, &injector_event_mutex,
4775 MY_MUTEX_INIT_SLOW);
4776 mysql_cond_init(PSI_INSTRUMENT_ME, &injector_data_cond);
4777 mysql_mutex_init(PSI_INSTRUMENT_ME, &injector_data_mutex, MY_MUTEX_INIT_FAST);
4778
4779 // The binlog thread globals has been initied and should be freed
4780 ndbcluster_binlog_inited = 1;
4781
4782 /* Start ndb binlog thread */
4783 if (ndb_binlog_thread.start()) {
4784 DBUG_PRINT("error", ("Could not start ndb binlog thread"));
4785 return -1;
4786 }
4787
4788 return 0;
4789 }
4790
ndbcluster_binlog_set_server_started()4791 void ndbcluster_binlog_set_server_started() {
4792 ndb_binlog_thread.set_server_started();
4793 }
4794
set_binlog_flags(Ndb_binlog_type ndb_binlog_type)4795 void NDB_SHARE::set_binlog_flags(Ndb_binlog_type ndb_binlog_type) {
4796 DBUG_TRACE;
4797 switch (ndb_binlog_type) {
4798 case NBT_NO_LOGGING:
4799 DBUG_PRINT("info", ("NBT_NO_LOGGING"));
4800 flags |= NDB_SHARE::FLAG_NO_BINLOG;
4801 return;
4802 case NBT_DEFAULT:
4803 DBUG_PRINT("info", ("NBT_DEFAULT"));
4804 if (opt_ndb_log_updated_only) {
4805 flags &= ~NDB_SHARE::FLAG_BINLOG_MODE_FULL;
4806 } else {
4807 flags |= NDB_SHARE::FLAG_BINLOG_MODE_FULL;
4808 }
4809 if (opt_ndb_log_update_as_write) {
4810 flags &= ~NDB_SHARE::FLAG_BINLOG_MODE_USE_UPDATE;
4811 } else {
4812 flags |= NDB_SHARE::FLAG_BINLOG_MODE_USE_UPDATE;
4813 }
4814 if (opt_ndb_log_update_minimal) {
4815 flags |= NDB_SHARE::FLAG_BINLOG_MODE_MINIMAL_UPDATE;
4816 }
4817 break;
4818 case NBT_UPDATED_ONLY:
4819 DBUG_PRINT("info", ("NBT_UPDATED_ONLY"));
4820 flags &= ~NDB_SHARE::FLAG_BINLOG_MODE_FULL;
4821 flags &= ~NDB_SHARE::FLAG_BINLOG_MODE_USE_UPDATE;
4822 break;
4823 case NBT_USE_UPDATE:
4824 DBUG_PRINT("info", ("NBT_USE_UPDATE"));
4825 // fall through
4826 case NBT_UPDATED_ONLY_USE_UPDATE:
4827 DBUG_PRINT("info", ("NBT_UPDATED_ONLY_USE_UPDATE"));
4828 flags &= ~NDB_SHARE::FLAG_BINLOG_MODE_FULL;
4829 flags |= NDB_SHARE::FLAG_BINLOG_MODE_USE_UPDATE;
4830 break;
4831 case NBT_FULL:
4832 DBUG_PRINT("info", ("NBT_FULL"));
4833 flags |= NDB_SHARE::FLAG_BINLOG_MODE_FULL;
4834 flags &= ~NDB_SHARE::FLAG_BINLOG_MODE_USE_UPDATE;
4835 break;
4836 case NBT_FULL_USE_UPDATE:
4837 DBUG_PRINT("info", ("NBT_FULL_USE_UPDATE"));
4838 flags |= NDB_SHARE::FLAG_BINLOG_MODE_FULL;
4839 flags |= NDB_SHARE::FLAG_BINLOG_MODE_USE_UPDATE;
4840 break;
4841 case NBT_UPDATED_ONLY_MINIMAL:
4842 DBUG_PRINT("info", ("NBT_UPDATED_ONLY_MINIMAL"));
4843 flags &= ~NDB_SHARE::FLAG_BINLOG_MODE_FULL;
4844 flags |= NDB_SHARE::FLAG_BINLOG_MODE_USE_UPDATE;
4845 flags |= NDB_SHARE::FLAG_BINLOG_MODE_MINIMAL_UPDATE;
4846 break;
4847 case NBT_UPDATED_FULL_MINIMAL:
4848 DBUG_PRINT("info", ("NBT_UPDATED_FULL_MINIMAL"));
4849 flags |= NDB_SHARE::FLAG_BINLOG_MODE_FULL;
4850 flags |= NDB_SHARE::FLAG_BINLOG_MODE_USE_UPDATE;
4851 flags |= NDB_SHARE::FLAG_BINLOG_MODE_MINIMAL_UPDATE;
4852 break;
4853 default:
4854 return;
4855 }
4856 flags &= ~NDB_SHARE::FLAG_NO_BINLOG;
4857 }
4858
4859 /*
4860 Ndb_binlog_client::read_replication_info
4861
4862 This function retrieves the data for the given table
4863 from the ndb_replication table.
4864
4865 If the table is not found, or the table does not exist,
4866 then defaults are returned.
4867 */
read_replication_info(Ndb * ndb,const char * db,const char * table_name,uint server_id,uint32 * binlog_flags,const st_conflict_fn_def ** conflict_fn,st_conflict_fn_arg * args,uint * num_args)4868 bool Ndb_binlog_client::read_replication_info(
4869 Ndb *ndb, const char *db, const char *table_name, uint server_id,
4870 uint32 *binlog_flags, const st_conflict_fn_def **conflict_fn,
4871 st_conflict_fn_arg *args, uint *num_args) {
4872 DBUG_TRACE;
4873
4874 /* Override for ndb_apply_status when logging */
4875 if (opt_ndb_log_apply_status) {
4876 if (Ndb_apply_status_table::is_apply_status_table(db, table_name)) {
4877 // Ensure to get all columns from ndb_apply_status updates and that events
4878 // are always logged as WRITES.
4879 ndb_log_info(
4880 "ndb-log-apply-status forcing 'mysql.ndb_apply_status' to FULL "
4881 "USE_WRITE");
4882 *binlog_flags = NBT_FULL;
4883 *conflict_fn = NULL;
4884 *num_args = 0;
4885 return false;
4886 }
4887 }
4888
4889 Ndb_rep_tab_reader rep_tab_reader;
4890
4891 int const rc = rep_tab_reader.lookup(ndb, db, table_name, server_id);
4892
4893 if (rc == 0) {
4894 // lookup() may return a warning although it succeeds
4895 const char *msg = rep_tab_reader.get_warning_message();
4896 if (msg != NULL) {
4897 push_warning_printf(m_thd, Sql_condition::SL_WARNING,
4898 ER_NDB_REPLICATION_SCHEMA_ERROR,
4899 ER_THD(m_thd, ER_NDB_REPLICATION_SCHEMA_ERROR), msg);
4900 ndb_log_warning("NDB Binlog: %s", msg);
4901 }
4902 } else {
4903 /* When rep_tab_reader.lookup() returns with non-zero error code,
4904 it must give a warning message describing why it failed*/
4905 const char *msg = rep_tab_reader.get_warning_message();
4906 DBUG_ASSERT(msg);
4907 my_error(ER_NDB_REPLICATION_SCHEMA_ERROR, MYF(0), msg);
4908 ndb_log_warning("NDB Binlog: %s", msg);
4909 return true;
4910 }
4911
4912 *binlog_flags = rep_tab_reader.get_binlog_flags();
4913 const char *conflict_fn_spec = rep_tab_reader.get_conflict_fn_spec();
4914
4915 if (conflict_fn_spec != NULL) {
4916 char msgbuf[FN_REFLEN];
4917 if (parse_conflict_fn_spec(conflict_fn_spec, conflict_fn, args, num_args,
4918 msgbuf, sizeof(msgbuf)) != 0) {
4919 my_error(ER_CONFLICT_FN_PARSE_ERROR, MYF(0), msgbuf);
4920
4921 /*
4922 Log as well, useful for contexts where the thd's stack of
4923 warnings are ignored
4924 */
4925 ndb_log_warning(
4926 "NDB Slave: Table %s.%s : Parse error on conflict fn : %s", db,
4927 table_name, msgbuf);
4928
4929 return true;
4930 }
4931 } else {
4932 /* No conflict function specified */
4933 conflict_fn = NULL;
4934 num_args = 0;
4935 }
4936
4937 return false;
4938 }
4939
apply_replication_info(Ndb * ndb,NDB_SHARE * share,const NdbDictionary::Table * ndbtab,const st_conflict_fn_def * conflict_fn,const st_conflict_fn_arg * args,uint num_args,uint32 binlog_flags)4940 int Ndb_binlog_client::apply_replication_info(
4941 Ndb *ndb, NDB_SHARE *share, const NdbDictionary::Table *ndbtab,
4942 const st_conflict_fn_def *conflict_fn, const st_conflict_fn_arg *args,
4943 uint num_args, uint32 binlog_flags) {
4944 DBUG_TRACE;
4945 char tmp_buf[FN_REFLEN];
4946
4947 DBUG_PRINT("info", ("Setting binlog flags to %u", binlog_flags));
4948 share->set_binlog_flags((enum Ndb_binlog_type)binlog_flags);
4949
4950 if (conflict_fn != NULL) {
4951 if (setup_conflict_fn(ndb, &share->m_cfn_share, share->db,
4952 share->table_name, share->get_binlog_use_update(),
4953 ndbtab, tmp_buf, sizeof(tmp_buf), conflict_fn, args,
4954 num_args) == 0) {
4955 ndb_log_verbose(1, "NDB Slave: %s", tmp_buf);
4956 } else {
4957 /*
4958 Dump setup failure message to error log
4959 for cases where thd warning stack is
4960 ignored
4961 */
4962 ndb_log_warning("NDB Slave: Table %s.%s : %s", share->db,
4963 share->table_name, tmp_buf);
4964
4965 push_warning_printf(m_thd, Sql_condition::SL_WARNING,
4966 ER_CONFLICT_FN_PARSE_ERROR,
4967 ER_THD(m_thd, ER_CONFLICT_FN_PARSE_ERROR), tmp_buf);
4968
4969 return -1;
4970 }
4971 } else {
4972 /* No conflict function specified */
4973 slave_reset_conflict_fn(share->m_cfn_share);
4974 }
4975
4976 return 0;
4977 }
4978
read_and_apply_replication_info(Ndb * ndb,NDB_SHARE * share,const NdbDictionary::Table * ndbtab,uint server_id)4979 int Ndb_binlog_client::read_and_apply_replication_info(
4980 Ndb *ndb, NDB_SHARE *share, const NdbDictionary::Table *ndbtab,
4981 uint server_id) {
4982 DBUG_TRACE;
4983 uint32 binlog_flags;
4984 const st_conflict_fn_def *conflict_fn = NULL;
4985 st_conflict_fn_arg args[MAX_CONFLICT_ARGS];
4986 uint num_args = MAX_CONFLICT_ARGS;
4987
4988 if (read_replication_info(ndb, share->db, share->table_name, server_id,
4989 &binlog_flags, &conflict_fn, args, &num_args) ||
4990 apply_replication_info(ndb, share, ndbtab, conflict_fn, args, num_args,
4991 binlog_flags)) {
4992 return -1;
4993 }
4994
4995 return 0;
4996 }
4997
4998 /*
4999 Common function for setting up everything for logging a table at
5000 create/discover.
5001 */
ndbcluster_setup_binlog_for_share(THD * thd,Ndb * ndb,NDB_SHARE * share,const dd::Table * table_def)5002 static int ndbcluster_setup_binlog_for_share(THD *thd, Ndb *ndb,
5003 NDB_SHARE *share,
5004 const dd::Table *table_def) {
5005 DBUG_TRACE;
5006
5007 // This function should not be used to setup binlogging
5008 // of tables with temporary names.
5009 DBUG_ASSERT(!ndb_name_is_temp(share->table_name));
5010
5011 Mutex_guard share_g(share->mutex);
5012 if (share->op != 0) {
5013 DBUG_PRINT("info", ("binlogging already setup"));
5014 return 0;
5015 }
5016
5017 Ndb_binlog_client binlog_client(thd, share->db, share->table_name);
5018
5019 Ndb_table_guard ndbtab_g(ndb, share->db, share->table_name);
5020 const NDBTAB *ndbtab = ndbtab_g.get_table();
5021 if (ndbtab == 0) {
5022 const NdbError ndb_error = ndb->getDictionary()->getNdbError();
5023 ndb_log_verbose(1,
5024 "NDB Binlog: Failed to open table '%s' from NDB, "
5025 "error: '%d - %s'",
5026 share->key_string(), ndb_error.code, ndb_error.message);
5027 return -1; // error
5028 }
5029
5030 if (binlog_client.read_and_apply_replication_info(ndb, share, ndbtab,
5031 ::server_id)) {
5032 ndb_log_error(
5033 "NDB Binlog: Failed to read and apply replication "
5034 "info for table '%s'",
5035 share->key_string());
5036 return -1;
5037 }
5038
5039 if (binlog_client.table_should_have_event(share, ndbtab)) {
5040 // Check if the event already exists in NDB, otherwise create it
5041 if (!binlog_client.event_exists_for_table(ndb, share)) {
5042 // The event din't exist, create the event in NDB
5043 if (binlog_client.create_event(ndb, ndbtab, share)) {
5044 // Failed to create event
5045 return -1;
5046 }
5047 }
5048
5049 if (binlog_client.table_should_have_event_op(share)) {
5050 // Create the NDB event operation on the event
5051 Ndb_event_data *event_data;
5052 if (!binlog_client.create_event_data(share, table_def, &event_data) ||
5053 binlog_client.create_event_op(share, ndbtab, event_data)) {
5054 // Failed to create event data or event operation
5055 return -1;
5056 }
5057 }
5058 }
5059
5060 return 0;
5061 }
5062
ndbcluster_binlog_setup_table(THD * thd,Ndb * ndb,const char * db,const char * table_name,const dd::Table * table_def)5063 int ndbcluster_binlog_setup_table(THD *thd, Ndb *ndb, const char *db,
5064 const char *table_name,
5065 const dd::Table *table_def) {
5066 DBUG_TRACE;
5067 DBUG_PRINT("enter", ("db: '%s', table_name: '%s'", db, table_name));
5068 DBUG_ASSERT(table_def);
5069
5070 DBUG_ASSERT(!ndb_name_is_blob_prefix(table_name));
5071
5072 // Create key for ndbcluster_open_tables
5073 char key[FN_REFLEN + 1];
5074 {
5075 char *end = key + build_table_filename(key, sizeof(key) - 1, db, "", "", 0);
5076 end += tablename_to_filename(table_name, end,
5077 (uint)(sizeof(key) - (end - key)));
5078 }
5079
5080 mysql_mutex_lock(&ndbcluster_mutex);
5081
5082 // Check if NDB_SHARE for this table already exist
5083 NDB_SHARE *share =
5084 NDB_SHARE::acquire_reference_by_key_have_lock(key, "create_binlog_setup");
5085 if (share == nullptr) {
5086 // NDB_SHARE didn't exist, the normal case, try to create it
5087 share = NDB_SHARE::create_and_acquire_reference(key, "create_binlog_setup");
5088 if (share == nullptr) {
5089 // Could not create the NDB_SHARE. Unlikely, catch in debug
5090 DBUG_ASSERT(false);
5091 return -1;
5092 }
5093 }
5094 mysql_mutex_unlock(&ndbcluster_mutex);
5095
5096 // Before 'schema_dist_is_ready', Thd_ndb::ALLOW_BINLOG_SETUP is required
5097 int ret = 0;
5098 if (Ndb_schema_dist::is_ready(thd) ||
5099 get_thd_ndb(thd)->check_option(Thd_ndb::ALLOW_BINLOG_SETUP)) {
5100 ret = ndbcluster_setup_binlog_for_share(thd, ndb, share, table_def);
5101 }
5102
5103 NDB_SHARE::release_reference(share, "create_binlog_setup"); // temporary ref.
5104
5105 #ifndef DBUG_OFF
5106 // Force failure of setting up binlogging of a user table
5107 if (DBUG_EVALUATE_IF("ndb_binlog_fail_setup", true, false) &&
5108 !Ndb_schema_dist_client::is_schema_dist_table(db, table_name) &&
5109 !Ndb_schema_dist_client::is_schema_dist_result_table(db, table_name) &&
5110 !Ndb_apply_status_table::is_apply_status_table(db, table_name) &&
5111 !(!strcmp("test", db) && !strcmp(table_name, "check_not_readonly"))) {
5112 ret = -1;
5113 }
5114 #endif
5115
5116 /*
5117 * Handle failure of setting up binlogging of a table
5118 */
5119 if (ret != 0) {
5120 ndb_log_error("Failed to setup binlogging for table '%s.%s'", db,
5121 table_name);
5122 ndbcluster_handle_incomplete_binlog_setup();
5123 }
5124
5125 return ret;
5126 }
5127
5128 extern void kill_mysql(void);
5129
ndbcluster_handle_incomplete_binlog_setup()5130 void ndbcluster_handle_incomplete_binlog_setup() {
5131 ndb_log_error("NDB Binlog: ndbcluster_handle_incomplete_binlog_setup");
5132 if (opt_ndb_log_fail_terminate) kill_mysql();
5133 }
5134
create_event(Ndb * ndb,const NdbDictionary::Table * ndbtab,const NDB_SHARE * share)5135 int Ndb_binlog_client::create_event(Ndb *ndb,
5136 const NdbDictionary::Table *ndbtab,
5137 const NDB_SHARE *share) {
5138 DBUG_TRACE;
5139 DBUG_PRINT("enter", ("table: '%s', version: %d", ndbtab->getName(),
5140 ndbtab->getObjectVersion()));
5141 DBUG_PRINT("enter", ("share->key: '%s'", share->key_string()));
5142 DBUG_ASSERT(share);
5143
5144 // Never create event on table with temporary name
5145 DBUG_ASSERT(!ndb_name_is_temp(ndbtab->getName()));
5146
5147 // Never create event on the blob table(s)
5148 DBUG_ASSERT(!ndb_name_is_blob_prefix(ndbtab->getName()));
5149
5150 std::string event_name =
5151 event_name_for_table(m_dbname, m_tabname, share->get_binlog_full());
5152
5153 ndb->setDatabaseName(share->db);
5154 NdbDictionary::Dictionary *dict = ndb->getDictionary();
5155 NDBEVENT my_event(event_name.c_str());
5156 my_event.setTable(*ndbtab);
5157 my_event.addTableEvent(NDBEVENT::TE_ALL);
5158 if (ndb_table_has_hidden_pk(ndbtab)) {
5159 /* Hidden primary key, subscribe for all attributes */
5160 my_event.setReport(
5161 (NDBEVENT::EventReport)(NDBEVENT::ER_ALL | NDBEVENT::ER_DDL));
5162 DBUG_PRINT("info", ("subscription all"));
5163 } else {
5164 if (Ndb_schema_dist_client::is_schema_dist_table(share->db,
5165 share->table_name)) {
5166 /**
5167 * ER_SUBSCRIBE is only needed on schema distribution table
5168 */
5169 my_event.setReport((NDBEVENT::EventReport)(
5170 NDBEVENT::ER_ALL | NDBEVENT::ER_SUBSCRIBE | NDBEVENT::ER_DDL));
5171 DBUG_PRINT("info", ("subscription all and subscribe"));
5172 } else if (Ndb_schema_dist_client::is_schema_dist_result_table(
5173 share->db, share->table_name)) {
5174 my_event.setReport(
5175 (NDBEVENT::EventReport)(NDBEVENT::ER_ALL | NDBEVENT::ER_DDL));
5176 DBUG_PRINT("info", ("subscription all"));
5177 } else {
5178 if (share->get_binlog_full()) {
5179 my_event.setReport(
5180 (NDBEVENT::EventReport)(NDBEVENT::ER_ALL | NDBEVENT::ER_DDL));
5181 DBUG_PRINT("info", ("subscription all"));
5182 } else {
5183 my_event.setReport(
5184 (NDBEVENT::EventReport)(NDBEVENT::ER_UPDATED | NDBEVENT::ER_DDL));
5185 DBUG_PRINT("info", ("subscription only updated"));
5186 }
5187 }
5188 }
5189 if (ndb_table_has_blobs(ndbtab)) my_event.mergeEvents(true);
5190
5191 /* add all columns to the event */
5192 const int n_cols = ndbtab->getNoOfColumns();
5193 for (int a = 0; a < n_cols; a++) my_event.addEventColumn(a);
5194
5195 if (dict->createEvent(my_event)) // Add event to database
5196 {
5197 if (dict->getNdbError().classification != NdbError::SchemaObjectExists) {
5198 // Failed to create event, log warning
5199 log_warning(ER_GET_ERRMSG,
5200 "Unable to create event in database. "
5201 "Event: %s Error Code: %d Message: %s",
5202 event_name.c_str(), dict->getNdbError().code,
5203 dict->getNdbError().message);
5204 return -1;
5205 }
5206
5207 /*
5208 try retrieving the event, if table version/id matches, we will get
5209 a valid event. Otherwise we have an old event from before
5210 */
5211 const NDBEVENT *ev;
5212 if ((ev = dict->getEvent(event_name.c_str()))) {
5213 delete ev;
5214 return 0;
5215 }
5216
5217 // Old event from before; an error, but try to correct it
5218 if (dict->getNdbError().code == NDB_INVALID_SCHEMA_OBJECT &&
5219 dict->dropEvent(my_event.getName(), 1)) {
5220 // Failed to drop the old event, log warning
5221 log_warning(ER_GET_ERRMSG,
5222 "Unable to create event in database. "
5223 "Attempt to correct with drop failed. "
5224 "Event: %s Error Code: %d Message: %s",
5225 event_name.c_str(), dict->getNdbError().code,
5226 dict->getNdbError().message);
5227 return -1;
5228 }
5229
5230 // Try to add the event again
5231 if (dict->createEvent(my_event)) {
5232 // Still failed to create the event, log warning
5233 log_warning(ER_GET_ERRMSG,
5234 "Unable to create event in database. "
5235 "Attempt to correct with drop ok, but create failed. "
5236 "Event: %s Error Code: %d Message: %s",
5237 event_name.c_str(), dict->getNdbError().code,
5238 dict->getNdbError().message);
5239 return -1;
5240 }
5241 }
5242
5243 ndb_log_verbose(1, "Created event '%s' for table '%s.%s' in NDB",
5244 event_name.c_str(), m_dbname, m_tabname);
5245
5246 return 0;
5247 }
5248
is_ndb_compatible_type(Field * field)5249 inline int is_ndb_compatible_type(Field *field) {
5250 return !field->is_flag_set(BLOB_FLAG) && field->type() != MYSQL_TYPE_BIT &&
5251 field->pack_length() != 0;
5252 }
5253
5254 /*
5255 - create NdbEventOperation for receiving log events
5256 - setup ndb recattrs for reception of log event data
5257 - "start" the event operation
5258
5259 used at create/discover of tables
5260 */
create_event_op(NDB_SHARE * share,const NdbDictionary::Table * ndbtab,const Ndb_event_data * event_data)5261 int Ndb_binlog_client::create_event_op(NDB_SHARE *share,
5262 const NdbDictionary::Table *ndbtab,
5263 const Ndb_event_data *event_data) {
5264 /*
5265 we are in either create table or rename table so table should be
5266 locked, hence we can work with the share without locks
5267 */
5268
5269 DBUG_TRACE;
5270 DBUG_PRINT("enter", ("table: '%s', share->key: '%s'", ndbtab->getName(),
5271 share->key_string()));
5272 DBUG_ASSERT(share);
5273 DBUG_ASSERT(event_data);
5274
5275 // Never create event op on table with temporary name
5276 DBUG_ASSERT(!ndb_name_is_temp(ndbtab->getName()));
5277
5278 // Never create event op on the blob table(s)
5279 DBUG_ASSERT(!ndb_name_is_blob_prefix(ndbtab->getName()));
5280
5281 // Schema dist tables need special processing
5282 const bool is_schema_dist_setup =
5283 Ndb_schema_dist_client::is_schema_dist_table(share->db,
5284 share->table_name) ||
5285 Ndb_schema_dist_client::is_schema_dist_result_table(share->db,
5286 share->table_name);
5287
5288 // Check if this is the event operation on mysql.ndb_apply_status
5289 // as it need special processing
5290 const bool do_ndb_apply_status_share =
5291 Ndb_apply_status_table::is_apply_status_table(share->db,
5292 share->table_name);
5293
5294 std::string event_name =
5295 event_name_for_table(m_dbname, m_tabname, share->get_binlog_full());
5296
5297 // There should be no NdbEventOperation assigned yet
5298 DBUG_ASSERT(!share->op);
5299
5300 TABLE *table = event_data->shadow_table;
5301
5302 int retries = 100;
5303 int retry_sleep = 0;
5304 while (1) {
5305 if (retry_sleep > 0) {
5306 ndb_retry_sleep(retry_sleep);
5307 }
5308 Mutex_guard injector_mutex_g(injector_event_mutex);
5309 Ndb *ndb = injector_ndb;
5310 if (is_schema_dist_setup) ndb = schema_ndb;
5311
5312 if (ndb == NULL) return -1;
5313
5314 NdbEventOperation *op;
5315 if (is_schema_dist_setup)
5316 op = ndb->createEventOperation(event_name.c_str());
5317 else {
5318 // set injector_ndb database/schema from table internal name
5319 int ret = ndb->setDatabaseAndSchemaName(ndbtab);
5320 ndbcluster::ndbrequire(ret == 0);
5321 op = ndb->createEventOperation(event_name.c_str());
5322 // reset to catch errors
5323 ndb->setDatabaseName("");
5324 }
5325 if (!op) {
5326 const NdbError &ndb_err = ndb->getNdbError();
5327 if (ndb_err.code == 4710) {
5328 // Error code 4710 is returned when table or event is not found. The
5329 // generic error message for 4710 says "Event not found" but should
5330 // be reported as "table not found"
5331 log_warning(ER_GET_ERRMSG,
5332 "Failed to create event operation on '%s', "
5333 "table '%s' not found",
5334 event_name.c_str(), table->s->table_name.str);
5335 return -1;
5336 }
5337 log_warning(ER_GET_ERRMSG,
5338 "Failed to create event operation on '%s', error: %d - %s",
5339 event_name.c_str(), ndb_err.code, ndb_err.message);
5340 return -1;
5341 }
5342
5343 if (ndb_table_has_blobs(ndbtab))
5344 op->mergeEvents(true); // currently not inherited from event
5345
5346 const uint n_columns = ndbtab->getNoOfColumns();
5347 const uint n_stored_fields = Ndb_table_map::num_stored_fields(table);
5348 const uint val_length = sizeof(NdbValue) * n_columns;
5349
5350 /*
5351 Allocate memory globally so it can be reused after online alter table
5352 */
5353 if (my_multi_malloc(PSI_INSTRUMENT_ME, MYF(MY_WME),
5354 &event_data->ndb_value[0], val_length,
5355 &event_data->ndb_value[1], val_length, NULL) == 0) {
5356 log_warning(ER_GET_ERRMSG,
5357 "Failed to allocate records for event operation");
5358 return -1;
5359 }
5360
5361 Ndb_table_map map(table);
5362 for (uint j = 0; j < n_columns; j++) {
5363 const char *col_name = ndbtab->getColumn(j)->getName();
5364 NdbValue attr0, attr1;
5365 if (j < n_stored_fields) {
5366 Field *f = table->field[map.get_field_for_column(j)];
5367 if (is_ndb_compatible_type(f)) {
5368 DBUG_PRINT("info", ("%s compatible", col_name));
5369 attr0.rec = op->getValue(col_name, (char *)f->field_ptr());
5370 attr1.rec =
5371 op->getPreValue(col_name, (f->field_ptr() - table->record[0]) +
5372 (char *)table->record[1]);
5373 } else if (!f->is_flag_set(BLOB_FLAG)) {
5374 DBUG_PRINT("info", ("%s non compatible", col_name));
5375 attr0.rec = op->getValue(col_name);
5376 attr1.rec = op->getPreValue(col_name);
5377 } else {
5378 DBUG_PRINT("info", ("%s blob", col_name));
5379 DBUG_ASSERT(ndb_table_has_blobs(ndbtab));
5380 attr0.blob = op->getBlobHandle(col_name);
5381 attr1.blob = op->getPreBlobHandle(col_name);
5382 if (attr0.blob == NULL || attr1.blob == NULL) {
5383 log_warning(ER_GET_ERRMSG,
5384 "Failed to cretate NdbEventOperation on '%s', "
5385 "blob field %u handles failed, error: %d - %s",
5386 event_name.c_str(), j, op->getNdbError().code,
5387 op->getNdbError().message);
5388 ndb->dropEventOperation(op);
5389 return -1;
5390 }
5391 }
5392 } else {
5393 DBUG_PRINT("info", ("%s hidden key", col_name));
5394 attr0.rec = op->getValue(col_name);
5395 attr1.rec = op->getPreValue(col_name);
5396 }
5397 event_data->ndb_value[0][j].ptr = attr0.ptr;
5398 event_data->ndb_value[1][j].ptr = attr1.ptr;
5399 DBUG_PRINT("info",
5400 ("&event_data->ndb_value[0][%d]: 0x%lx "
5401 "event_data->ndb_value[0][%d]: 0x%lx",
5402 j, (long)&event_data->ndb_value[0][j], j, (long)attr0.ptr));
5403 DBUG_PRINT("info",
5404 ("&event_data->ndb_value[1][%d]: 0x%lx "
5405 "event_data->ndb_value[1][%d]: 0x%lx",
5406 j, (long)&event_data->ndb_value[0][j], j, (long)attr1.ptr));
5407 }
5408 op->setCustomData(
5409 const_cast<Ndb_event_data *>(event_data)); // set before execute
5410 share->op = op; // assign op in NDB_SHARE
5411
5412 /* Check if user explicitly requires monitoring of empty updates */
5413 if (opt_ndb_log_empty_update) op->setAllowEmptyUpdate(true);
5414
5415 if (op->execute()) {
5416 // Failed to create the NdbEventOperation
5417 const NdbError &ndb_err = op->getNdbError();
5418 share->op = NULL;
5419 retries--;
5420 if (ndb_err.status != NdbError::TemporaryError && ndb_err.code != 1407) {
5421 // Don't retry after these errors
5422 retries = 0;
5423 }
5424 if (retries == 0) {
5425 log_warning(ER_GET_ERRMSG,
5426 "Failed to activate NdbEventOperation for '%s', "
5427 "error: %d - %s",
5428 event_name.c_str(), ndb_err.code, ndb_err.message);
5429 }
5430 op->setCustomData(NULL);
5431 ndb->dropEventOperation(op);
5432 if (retries && !m_thd->killed) {
5433 // fairly high retry sleep, temporary error on schema operation can
5434 // take some time to resolve
5435 retry_sleep = 100; // milliseconds
5436 continue;
5437 }
5438 // Delete the event data, caller should create new before calling
5439 // this function again
5440 Ndb_event_data::destroy(event_data);
5441 return -1;
5442 }
5443 break;
5444 }
5445
5446 /* ndb_share reference binlog */
5447 NDB_SHARE::acquire_reference_on_existing(share, "binlog");
5448
5449 if (do_ndb_apply_status_share) {
5450 ndb_apply_status_share = NDB_SHARE::acquire_reference_on_existing(
5451 share, "ndb_apply_status_share");
5452
5453 DBUG_ASSERT(get_thd_ndb(m_thd)->check_option(Thd_ndb::ALLOW_BINLOG_SETUP));
5454 }
5455
5456 ndb_log_verbose(1, "NDB Binlog: logging %s (%s,%s)", share->key_string(),
5457 share->get_binlog_full() ? "FULL" : "UPDATED",
5458 share->get_binlog_use_update() ? "USE_UPDATE" : "USE_WRITE");
5459 return 0;
5460 }
5461
drop_events_for_table(THD * thd,Ndb * ndb,const char * db,const char * table_name)5462 void Ndb_binlog_client::drop_events_for_table(THD *thd, Ndb *ndb,
5463 const char *db,
5464 const char *table_name) {
5465 DBUG_TRACE;
5466 DBUG_PRINT("enter", ("db: %s, tabname: %s", db, table_name));
5467
5468 if (DBUG_EVALUATE_IF("ndb_skip_drop_event", true, false)) {
5469 ndb_log_verbose(1, "NDB Binlog: skipping drop event on '%s.%s'", db,
5470 table_name);
5471 return;
5472 }
5473
5474 for (uint i = 0; i < 2; i++) {
5475 std::string event_name = event_name_for_table(db, table_name, i);
5476
5477 NdbDictionary::Dictionary *dict = ndb->getDictionary();
5478 if (dict->dropEvent(event_name.c_str()) == 0) {
5479 // Event dropped successfully
5480 continue;
5481 }
5482
5483 if (dict->getNdbError().code == 4710 || dict->getNdbError().code == 1419) {
5484 // Failed to drop event but return code says it was
5485 // because the event didn't exist, ignore
5486 continue;
5487 }
5488
5489 /* Failed to drop event, push warning and write to log */
5490 push_warning_printf(thd, Sql_condition::SL_WARNING, ER_GET_ERRMSG,
5491 ER_THD(thd, ER_GET_ERRMSG), dict->getNdbError().code,
5492 dict->getNdbError().message, "NDB");
5493
5494 ndb_log_error(
5495 "NDB Binlog: Unable to drop event for '%s.%s' from NDB, "
5496 "event_name: '%s' error: '%d - %s'",
5497 db, table_name, event_name.c_str(), dict->getNdbError().code,
5498 dict->getNdbError().message);
5499 }
5500 }
5501
5502 /*
5503 Wait for the binlog thread to drop it's NdbEventOperations
5504 during a drop table
5505
5506 Syncronized drop between client and injector thread is
5507 neccessary in order to maintain ordering in the binlog,
5508 such that the drop occurs _after_ any inserts/updates/deletes.
5509
5510 Also the injector thread need to be given time to detect the
5511 drop and release it's resources allocated in the NDB_SHARE.
5512 */
5513
ndbcluster_binlog_wait_synch_drop_table(THD * thd,NDB_SHARE * share)5514 int ndbcluster_binlog_wait_synch_drop_table(THD *thd, NDB_SHARE *share) {
5515 DBUG_TRACE;
5516 DBUG_ASSERT(share);
5517
5518 const char *save_proc_info = thd->proc_info;
5519 thd->proc_info = "Syncing ndb table schema operation and binlog";
5520
5521 int max_timeout = DEFAULT_SYNC_TIMEOUT;
5522
5523 mysql_mutex_lock(&share->mutex);
5524 while (share->op) {
5525 struct timespec abstime;
5526 set_timespec(&abstime, 1);
5527
5528 // Unlock the share and wait for injector to signal that
5529 // something has happened. (NOTE! convoluted in order to
5530 // only use injector_data_cond with injector_data_mutex)
5531 mysql_mutex_unlock(&share->mutex);
5532 mysql_mutex_lock(&injector_data_mutex);
5533 const int ret = mysql_cond_timedwait(&injector_data_cond,
5534 &injector_data_mutex, &abstime);
5535 mysql_mutex_unlock(&injector_data_mutex);
5536 mysql_mutex_lock(&share->mutex);
5537
5538 if (thd->killed || share->op == 0) break;
5539 if (ret) {
5540 max_timeout--;
5541 if (max_timeout == 0) {
5542 ndb_log_error("%s, delete table timed out. Ignoring...",
5543 share->key_string());
5544 DBUG_ASSERT(false);
5545 break;
5546 }
5547 if (ndb_log_get_verbose_level())
5548 ndb_report_waiting("delete table", max_timeout, "delete table",
5549 share->key_string());
5550 }
5551 }
5552 mysql_mutex_unlock(&share->mutex);
5553
5554 thd->proc_info = save_proc_info;
5555
5556 return 0;
5557 }
5558
ndbcluster_binlog_validate_sync_blacklist(THD * thd)5559 void ndbcluster_binlog_validate_sync_blacklist(THD *thd) {
5560 ndb_binlog_thread.validate_sync_blacklist(thd);
5561 }
5562
ndbcluster_binlog_validate_sync_retry_list(THD * thd)5563 void ndbcluster_binlog_validate_sync_retry_list(THD *thd) {
5564 ndb_binlog_thread.validate_sync_retry_list(thd);
5565 }
5566
ndbcluster_binlog_check_table_async(const std::string & db_name,const std::string & table_name)5567 bool ndbcluster_binlog_check_table_async(const std::string &db_name,
5568 const std::string &table_name) {
5569 if (db_name.empty()) {
5570 ndb_log_error("Database name of object to be synchronized not set");
5571 return false;
5572 }
5573
5574 if (table_name.empty()) {
5575 ndb_log_error("Table name of object to be synchronized not set");
5576 return false;
5577 }
5578
5579 return ndb_binlog_thread.add_table_to_check(db_name, table_name);
5580 }
5581
ndbcluster_binlog_check_logfile_group_async(const std::string & lfg_name)5582 bool ndbcluster_binlog_check_logfile_group_async(const std::string &lfg_name) {
5583 if (lfg_name.empty()) {
5584 ndb_log_error("Name of logfile group to be synchronized not set");
5585 return false;
5586 }
5587
5588 return ndb_binlog_thread.add_logfile_group_to_check(lfg_name);
5589 }
5590
ndbcluster_binlog_check_tablespace_async(const std::string & tablespace_name)5591 bool ndbcluster_binlog_check_tablespace_async(
5592 const std::string &tablespace_name) {
5593 if (tablespace_name.empty()) {
5594 ndb_log_error("Name of tablespace to be synchronized not set");
5595 return false;
5596 }
5597
5598 return ndb_binlog_thread.add_tablespace_to_check(tablespace_name);
5599 }
5600
ndbcluster_binlog_check_schema_async(const std::string & schema_name)5601 bool ndbcluster_binlog_check_schema_async(const std::string &schema_name) {
5602 if (schema_name.empty()) {
5603 ndb_log_error("Name of schema to be synchronized not set");
5604 return false;
5605 }
5606 return ndb_binlog_thread.add_schema_to_check(schema_name);
5607 }
5608
ndbcluster_binlog_retrieve_sync_blacklist(Ndb_sync_excluded_objects_table * excluded_table)5609 void ndbcluster_binlog_retrieve_sync_blacklist(
5610 Ndb_sync_excluded_objects_table *excluded_table) {
5611 ndb_binlog_thread.retrieve_sync_blacklist(excluded_table);
5612 }
5613
ndbcluster_binlog_get_sync_blacklist_count()5614 unsigned int ndbcluster_binlog_get_sync_blacklist_count() {
5615 return ndb_binlog_thread.get_sync_blacklist_count();
5616 }
5617
ndbcluster_binlog_retrieve_sync_pending_objects(Ndb_sync_pending_objects_table * pending_table)5618 void ndbcluster_binlog_retrieve_sync_pending_objects(
5619 Ndb_sync_pending_objects_table *pending_table) {
5620 ndb_binlog_thread.retrieve_sync_pending_objects(pending_table);
5621 }
5622
ndbcluster_binlog_get_sync_pending_objects_count()5623 unsigned int ndbcluster_binlog_get_sync_pending_objects_count() {
5624 return ndb_binlog_thread.get_sync_pending_objects_count();
5625 }
5626
5627 /********************************************************************
5628 Internal helper functions for differentd events from the stoarage nodes
5629 used by the ndb injector thread
5630 ********************************************************************/
5631
5632 /*
5633 Unpack a record read from NDB
5634
5635 SYNOPSIS
5636 ndb_unpack_record()
5637 buf Buffer to store read row
5638
5639 NOTE
5640 The data for each row is read directly into the
5641 destination buffer. This function is primarily
5642 called in order to check if any fields should be
5643 set to null.
5644 */
5645
ndb_unpack_record(TABLE * table,NdbValue * value,MY_BITMAP * defined,uchar * buf)5646 static void ndb_unpack_record(TABLE *table, NdbValue *value, MY_BITMAP *defined,
5647 uchar *buf) {
5648 Field **p_field = table->field, *field = *p_field;
5649 ptrdiff_t row_offset = (ptrdiff_t)(buf - table->record[0]);
5650 my_bitmap_map *old_map = dbug_tmp_use_all_columns(table, table->write_set);
5651 DBUG_TRACE;
5652
5653 /*
5654 Set the filler bits of the null byte, since they are
5655 not touched in the code below.
5656
5657 The filler bits are the MSBs in the last null byte
5658 */
5659 if (table->s->null_bytes > 0)
5660 buf[table->s->null_bytes - 1] |= 256U - (1U << table->s->last_null_bit_pos);
5661 /*
5662 Set null flag(s)
5663 */
5664 for (; field; p_field++, field = *p_field) {
5665 if (field->is_virtual_gcol()) {
5666 if (field->is_flag_set(BLOB_FLAG)) {
5667 /**
5668 * Valgrind shows Server binlog code uses length
5669 * of virtual blob fields for allocation decisions
5670 * even when the blob is not read
5671 */
5672 Field_blob *field_blob = (Field_blob *)field;
5673 DBUG_PRINT("info", ("[%u] is virtual blob, setting length 0",
5674 field->field_index()));
5675 Uint32 zerolen = 0;
5676 field_blob->set_ptr((uchar *)&zerolen, NULL);
5677 }
5678
5679 continue;
5680 }
5681
5682 field->set_notnull(row_offset);
5683 if ((*value).ptr) {
5684 if (!field->is_flag_set(BLOB_FLAG)) {
5685 int is_null = (*value).rec->isNULL();
5686 if (is_null) {
5687 if (is_null > 0) {
5688 DBUG_PRINT("info", ("[%u] NULL", field->field_index()));
5689 field->set_null(row_offset);
5690 } else {
5691 DBUG_PRINT("info", ("[%u] UNDEFINED", field->field_index()));
5692 bitmap_clear_bit(defined, field->field_index());
5693 }
5694 } else if (field->type() == MYSQL_TYPE_BIT) {
5695 Field_bit *field_bit = static_cast<Field_bit *>(field);
5696
5697 /*
5698 Move internal field pointer to point to 'buf'. Calling
5699 the correct member function directly since we know the
5700 type of the object.
5701 */
5702 field_bit->Field_bit::move_field_offset(row_offset);
5703 if (field->pack_length() < 5) {
5704 DBUG_PRINT("info",
5705 ("bit field H'%.8X", (*value).rec->u_32_value()));
5706 field_bit->Field_bit::store((longlong)(*value).rec->u_32_value(),
5707 true);
5708 } else {
5709 DBUG_PRINT("info",
5710 ("bit field H'%.8X%.8X", *(Uint32 *)(*value).rec->aRef(),
5711 *((Uint32 *)(*value).rec->aRef() + 1)));
5712 #ifdef WORDS_BIGENDIAN
5713 /* lsw is stored first */
5714 Uint32 *buf = (Uint32 *)(*value).rec->aRef();
5715 field_bit->Field_bit::store(
5716 (((longlong)*buf) & 0x00000000FFFFFFFFLL) |
5717 ((((longlong) * (buf + 1)) << 32) & 0xFFFFFFFF00000000LL),
5718 true);
5719 #else
5720 field_bit->Field_bit::store((longlong)(*value).rec->u_64_value(),
5721 true);
5722 #endif
5723 }
5724 /*
5725 Move back internal field pointer to point to original
5726 value (usually record[0]).
5727 */
5728 field_bit->Field_bit::move_field_offset(-row_offset);
5729 DBUG_PRINT("info",
5730 ("[%u] SET", (*value).rec->getColumn()->getColumnNo()));
5731 DBUG_DUMP("info", field->field_ptr(), field->pack_length());
5732 } else {
5733 DBUG_ASSERT(
5734 !strcmp((*value).rec->getColumn()->getName(), field->field_name));
5735 DBUG_PRINT("info",
5736 ("[%u] SET", (*value).rec->getColumn()->getColumnNo()));
5737 DBUG_DUMP("info", field->field_ptr(), field->pack_length());
5738 }
5739 } else {
5740 NdbBlob *ndb_blob = (*value).blob;
5741 const uint field_no = field->field_index();
5742 int isNull;
5743 ndb_blob->getDefined(isNull);
5744 if (isNull == 1) {
5745 DBUG_PRINT("info", ("[%u] NULL", field_no));
5746 field->set_null(row_offset);
5747 } else if (isNull == -1) {
5748 DBUG_PRINT("info", ("[%u] UNDEFINED", field_no));
5749 bitmap_clear_bit(defined, field_no);
5750 } else {
5751 #ifndef DBUG_OFF
5752 // pointer vas set in get_ndb_blobs_value
5753 Field_blob *field_blob = (Field_blob *)field;
5754 const uchar *ptr = field_blob->get_blob_data(row_offset);
5755 uint32 len = field_blob->get_length(row_offset);
5756 DBUG_PRINT("info", ("[%u] SET ptr: 0x%lx len: %u", field_no,
5757 (long)ptr, len));
5758 #endif
5759 }
5760 } // else
5761 } // if ((*value).ptr)
5762 value++; // this field was not virtual
5763 } // for()
5764 dbug_tmp_restore_column_map(table->write_set, old_map);
5765 }
5766
5767 /*
5768 Handle error states on events from the storage nodes
5769 */
handle_error(NdbEventOperation * pOp)5770 static int handle_error(NdbEventOperation *pOp) {
5771 Ndb_event_data *event_data = (Ndb_event_data *)pOp->getCustomData();
5772 NDB_SHARE *share = event_data->share;
5773 DBUG_TRACE;
5774
5775 ndb_log_error("NDB Binlog: unhandled error %d for table %s", pOp->hasError(),
5776 share->key_string());
5777 pOp->clearError();
5778 return 0;
5779 }
5780
5781 /*
5782 Handle _non_ data events from the storage nodes
5783 */
5784
handle_non_data_event(THD * thd,NdbEventOperation * pOp,ndb_binlog_index_row & row)5785 static void handle_non_data_event(THD *thd, NdbEventOperation *pOp,
5786 ndb_binlog_index_row &row) {
5787 const Ndb_event_data *event_data =
5788 static_cast<const Ndb_event_data *>(pOp->getCustomData());
5789 NDB_SHARE *share = event_data->share;
5790 const NDBEVENT::TableEvent type = pOp->getEventType();
5791
5792 DBUG_TRACE;
5793 DBUG_PRINT("enter",
5794 ("pOp: %p, event_data: %p, share: %p", pOp, event_data, share));
5795 DBUG_PRINT("enter", ("type: %d", type));
5796
5797 if (type == NDBEVENT::TE_DROP || type == NDBEVENT::TE_ALTER) {
5798 // Count schema events
5799 row.n_schemaops++;
5800 }
5801
5802 switch (type) {
5803 case NDBEVENT::TE_CLUSTER_FAILURE:
5804 ndb_log_verbose(1, "NDB Binlog: cluster failure for %s at epoch %u/%u.",
5805 share->key_string(), (uint)(pOp->getGCI() >> 32),
5806 (uint)(pOp->getGCI()));
5807 // fallthrough
5808 case NDBEVENT::TE_DROP:
5809 if (ndb_apply_status_share == share) {
5810 if (ndb_binlog_tables_inited && ndb_binlog_running)
5811 ndb_log_verbose(1, "NDB Binlog: util tables need to reinitialize");
5812
5813 /* release the ndb_apply_status_share */
5814 NDB_SHARE::release_reference(ndb_apply_status_share,
5815 "ndb_apply_status_share");
5816 ndb_apply_status_share = NULL;
5817
5818 Mutex_guard injector_g(injector_data_mutex);
5819 ndb_binlog_tables_inited = false;
5820 }
5821
5822 ndbcluster_binlog_event_operation_teardown(thd, injector_ndb, pOp);
5823 break;
5824
5825 case NDBEVENT::TE_ALTER:
5826 DBUG_PRINT("info", ("TE_ALTER"));
5827 break;
5828
5829 case NDBEVENT::TE_NODE_FAILURE:
5830 case NDBEVENT::TE_SUBSCRIBE:
5831 case NDBEVENT::TE_UNSUBSCRIBE:
5832 /* ignore */
5833 break;
5834
5835 default:
5836 ndb_log_error(
5837 "NDB Binlog: unknown non data event %d for %s. "
5838 "Ignoring...",
5839 (unsigned)type, share->key_string());
5840 break;
5841 }
5842 }
5843
5844 /*
5845 Handle data events from the storage nodes
5846 */
ndb_find_binlog_index_row(ndb_binlog_index_row ** rows,uint orig_server_id,int flag)5847 inline ndb_binlog_index_row *ndb_find_binlog_index_row(
5848 ndb_binlog_index_row **rows, uint orig_server_id, int flag) {
5849 ndb_binlog_index_row *row = *rows;
5850 if (opt_ndb_log_orig) {
5851 ndb_binlog_index_row *first = row, *found_id = 0;
5852 for (;;) {
5853 if (row->orig_server_id == orig_server_id) {
5854 /* */
5855 if (!flag || !row->orig_epoch) return row;
5856 if (!found_id) found_id = row;
5857 }
5858 if (row->orig_server_id == 0) break;
5859 row = row->next;
5860 if (row == NULL) {
5861 // Allocate memory in current MEM_ROOT
5862 row = (ndb_binlog_index_row *)(*THR_MALLOC)
5863 ->Alloc(sizeof(ndb_binlog_index_row));
5864 memset(row, 0, sizeof(ndb_binlog_index_row));
5865 row->next = first;
5866 *rows = row;
5867 if (found_id) {
5868 /*
5869 If we found index_row with same server id already
5870 that row will contain the current stats.
5871 Copy stats over to new and reset old.
5872 */
5873 row->n_inserts = found_id->n_inserts;
5874 row->n_updates = found_id->n_updates;
5875 row->n_deletes = found_id->n_deletes;
5876 found_id->n_inserts = 0;
5877 found_id->n_updates = 0;
5878 found_id->n_deletes = 0;
5879 }
5880 /* keep track of schema ops only on "first" index_row */
5881 row->n_schemaops = first->n_schemaops;
5882 first->n_schemaops = 0;
5883 break;
5884 }
5885 }
5886 row->orig_server_id = orig_server_id;
5887 }
5888 return row;
5889 }
5890
handle_data_event(NdbEventOperation * pOp,ndb_binlog_index_row ** rows,injector::transaction & trans,unsigned & trans_row_count,unsigned & trans_slave_row_count)5891 static int handle_data_event(NdbEventOperation *pOp,
5892 ndb_binlog_index_row **rows,
5893 injector::transaction &trans,
5894 unsigned &trans_row_count,
5895 unsigned &trans_slave_row_count) {
5896 Ndb_event_data *event_data = (Ndb_event_data *)pOp->getCustomData();
5897 TABLE *table = event_data->shadow_table;
5898 NDB_SHARE *share = event_data->share;
5899 bool reflected_op = false;
5900 bool refresh_op = false;
5901 bool read_op = false;
5902
5903 if (pOp != share->op) {
5904 return 0;
5905 }
5906
5907 uint32 anyValue = pOp->getAnyValue();
5908 if (ndbcluster_anyvalue_is_reserved(anyValue)) {
5909 if (ndbcluster_anyvalue_is_nologging(anyValue)) return 0;
5910
5911 if (ndbcluster_anyvalue_is_reflect_op(anyValue)) {
5912 DBUG_PRINT("info", ("Anyvalue -> Reflect (%u)", anyValue));
5913 reflected_op = true;
5914 anyValue = 0;
5915 } else if (ndbcluster_anyvalue_is_refresh_op(anyValue)) {
5916 DBUG_PRINT("info", ("Anyvalue -> Refresh"));
5917 refresh_op = true;
5918 anyValue = 0;
5919 } else if (ndbcluster_anyvalue_is_read_op(anyValue)) {
5920 DBUG_PRINT("info", ("Anyvalue -> Read"));
5921 read_op = true;
5922 anyValue = 0;
5923 } else {
5924 ndb_log_warning(
5925 "unknown value for binlog signalling 0x%X, "
5926 "event not logged",
5927 anyValue);
5928 return 0;
5929 }
5930 }
5931
5932 uint32 originating_server_id = ndbcluster_anyvalue_get_serverid(anyValue);
5933 bool log_this_slave_update = g_ndb_log_slave_updates;
5934 bool count_this_event = true;
5935
5936 if (share == ndb_apply_status_share) {
5937 /*
5938 Note that option values are read without synchronisation w.r.t.
5939 thread setting option variable or epoch boundaries.
5940 */
5941 if (opt_ndb_log_apply_status || opt_ndb_log_orig) {
5942 Uint32 ndb_apply_status_logging_server_id = originating_server_id;
5943 Uint32 ndb_apply_status_server_id = 0;
5944 Uint64 ndb_apply_status_epoch = 0;
5945 bool event_has_data = false;
5946
5947 switch (pOp->getEventType()) {
5948 case NDBEVENT::TE_INSERT:
5949 case NDBEVENT::TE_UPDATE:
5950 event_has_data = true;
5951 break;
5952
5953 case NDBEVENT::TE_DELETE:
5954 break;
5955 default:
5956 /* We should REALLY never get here */
5957 abort();
5958 }
5959
5960 if (likely(event_has_data)) {
5961 /* unpack data to fetch orig_server_id and orig_epoch */
5962 MY_BITMAP b;
5963 uint32 bitbuf[128 / (sizeof(uint32) * 8)];
5964 ndb_bitmap_init(b, bitbuf, table->s->fields);
5965 bitmap_copy(&b, &event_data->stored_columns);
5966 ndb_unpack_record(table, event_data->ndb_value[0], &b,
5967 table->record[0]);
5968 ndb_apply_status_server_id =
5969 (uint)((Field_long *)table->field[0])->val_int();
5970 ndb_apply_status_epoch = ((Field_longlong *)table->field[1])->val_int();
5971
5972 if (opt_ndb_log_apply_status) {
5973 /*
5974 Determine if event came from our immediate Master server
5975 Ignore locally manually sourced and reserved events
5976 */
5977 if ((ndb_apply_status_logging_server_id != 0) &&
5978 (!ndbcluster_anyvalue_is_reserved(
5979 ndb_apply_status_logging_server_id))) {
5980 bool isFromImmediateMaster = (ndb_apply_status_server_id ==
5981 ndb_apply_status_logging_server_id);
5982
5983 if (isFromImmediateMaster) {
5984 /*
5985 We log this event with our server-id so that it
5986 propagates back to the originating Master (our
5987 immediate Master)
5988 */
5989 assert(ndb_apply_status_logging_server_id != ::server_id);
5990
5991 originating_server_id =
5992 0; /* Will be set to our ::serverid below */
5993 }
5994 }
5995 }
5996
5997 if (opt_ndb_log_orig) {
5998 /* store */
5999 ndb_binlog_index_row *row =
6000 ndb_find_binlog_index_row(rows, ndb_apply_status_server_id, 1);
6001 row->orig_epoch = ndb_apply_status_epoch;
6002 }
6003 }
6004 } // opt_ndb_log_apply_status || opt_ndb_log_orig)
6005
6006 if (opt_ndb_log_apply_status) {
6007 /* We are logging ndb_apply_status changes
6008 * Don't count this event as making an epoch non-empty
6009 * Log this event in the Binlog
6010 */
6011 count_this_event = false;
6012 log_this_slave_update = true;
6013 } else {
6014 /* Not logging ndb_apply_status updates, discard this event now */
6015 return 0;
6016 }
6017 }
6018
6019 if (originating_server_id == 0)
6020 originating_server_id = ::server_id;
6021 else {
6022 assert(!reflected_op && !refresh_op);
6023 /* Track that we received a replicated row event */
6024 if (likely(count_this_event)) trans_slave_row_count++;
6025
6026 if (!log_this_slave_update) {
6027 /*
6028 This event comes from a slave applier since it has an originating
6029 server id set. Since option to log slave updates is not set, skip it.
6030 */
6031 return 0;
6032 }
6033 }
6034
6035 /*
6036 Start with logged_server_id as AnyValue in case it's a composite
6037 (server_id_bits < 31). This way any user-values are passed-through
6038 to the Binlog in the high bits of the event's Server Id.
6039 In future it may be useful to support *not* mapping composite
6040 AnyValues to/from Binlogged server-ids.
6041 */
6042 uint32 logged_server_id = anyValue;
6043 ndbcluster_anyvalue_set_serverid(logged_server_id, originating_server_id);
6044
6045 /*
6046 Get NdbApi transaction id for this event to put into Binlog
6047 */
6048 Ndb_binlog_extra_row_info extra_row_info;
6049 const unsigned char *extra_row_info_ptr = NULL;
6050 Uint16 erif_flags = 0;
6051 if (opt_ndb_log_transaction_id) {
6052 erif_flags |= Ndb_binlog_extra_row_info::NDB_ERIF_TRANSID;
6053 extra_row_info.setTransactionId(pOp->getTransId());
6054 }
6055
6056 /* Set conflict flags member if necessary */
6057 Uint16 event_conflict_flags = 0;
6058 assert(!(reflected_op && refresh_op));
6059 if (reflected_op) {
6060 event_conflict_flags |= NDB_ERIF_CFT_REFLECT_OP;
6061 } else if (refresh_op) {
6062 event_conflict_flags |= NDB_ERIF_CFT_REFRESH_OP;
6063 } else if (read_op) {
6064 event_conflict_flags |= NDB_ERIF_CFT_READ_OP;
6065 }
6066
6067 if (DBUG_EVALUATE_IF("ndb_injector_set_event_conflict_flags", true, false)) {
6068 event_conflict_flags = 0xfafa;
6069 }
6070 if (event_conflict_flags != 0) {
6071 erif_flags |= Ndb_binlog_extra_row_info::NDB_ERIF_CFT_FLAGS;
6072 extra_row_info.setConflictFlags(event_conflict_flags);
6073 }
6074
6075 if (erif_flags != 0) {
6076 extra_row_info.setFlags(erif_flags);
6077 if (likely(!log_bin_use_v1_row_events)) {
6078 extra_row_info_ptr = extra_row_info.generateBuffer();
6079 } else {
6080 /**
6081 * Can't put the metadata in a v1 event
6082 * Produce 1 warning at most
6083 */
6084 if (!g_injector_v1_warning_emitted) {
6085 ndb_log_error(
6086 "Binlog Injector discarding row event "
6087 "meta data as server is using v1 row events. "
6088 "(%u %x)",
6089 opt_ndb_log_transaction_id, event_conflict_flags);
6090
6091 g_injector_v1_warning_emitted = true;
6092 }
6093 }
6094 }
6095
6096 DBUG_ASSERT(trans.good());
6097 DBUG_ASSERT(table != 0);
6098
6099 #ifndef DBUG_OFF
6100 Ndb_table_map::print_table("table", table);
6101 #endif
6102
6103 MY_BITMAP b;
6104 my_bitmap_map
6105 bitbuf[(NDB_MAX_ATTRIBUTES_IN_TABLE + 8 * sizeof(my_bitmap_map) - 1) /
6106 (8 * sizeof(my_bitmap_map))];
6107 ndb_bitmap_init(b, bitbuf, table->s->fields);
6108 bitmap_copy(&b, &event_data->stored_columns);
6109 if (bitmap_is_clear_all(&b)) {
6110 DBUG_PRINT("info", ("Skip logging of event without stored columns"));
6111 return 0;
6112 }
6113
6114 /*
6115 row data is already in table->record[0]
6116 As we told the NdbEventOperation to do this
6117 (saves moving data about many times)
6118 */
6119
6120 /*
6121 for now malloc/free blobs buffer each time
6122 TODO if possible share single permanent buffer with handlers
6123 */
6124 uchar *blobs_buffer[2] = {0, 0};
6125 uint blobs_buffer_size[2] = {0, 0};
6126
6127 ndb_binlog_index_row *row =
6128 ndb_find_binlog_index_row(rows, originating_server_id, 0);
6129
6130 switch (pOp->getEventType()) {
6131 case NDBEVENT::TE_INSERT:
6132 if (likely(count_this_event)) {
6133 row->n_inserts++;
6134 trans_row_count++;
6135 }
6136 DBUG_PRINT("info", ("INSERT INTO %s.%s", table->s->db.str,
6137 table->s->table_name.str));
6138 {
6139 int ret;
6140 (void)ret; // Bug27150740 HANDLE_DATA_EVENT NEED ERROR HANDLING
6141 if (event_data->have_blobs) {
6142 ptrdiff_t ptrdiff = 0;
6143 ret = get_ndb_blobs_value(table, event_data->ndb_value[0],
6144 blobs_buffer[0], blobs_buffer_size[0],
6145 ptrdiff);
6146 assert(ret == 0);
6147 }
6148 ndb_unpack_record(table, event_data->ndb_value[0], &b,
6149 table->record[0]);
6150 ret = trans.write_row(logged_server_id,
6151 injector::transaction::table(table, true), &b,
6152 table->record[0], extra_row_info_ptr);
6153 assert(ret == 0);
6154 }
6155 break;
6156 case NDBEVENT::TE_DELETE:
6157 if (likely(count_this_event)) {
6158 row->n_deletes++;
6159 trans_row_count++;
6160 }
6161 DBUG_PRINT("info", ("DELETE FROM %s.%s", table->s->db.str,
6162 table->s->table_name.str));
6163 {
6164 /*
6165 table->record[0] contains only the primary key in this case
6166 since we do not have an after image
6167 */
6168 int n;
6169 if (!share->get_binlog_full() && table->s->primary_key != MAX_KEY)
6170 n = 0; /*
6171 use the primary key only as it save time and space and
6172 it is the only thing needed to log the delete
6173 */
6174 else
6175 n = 1; /*
6176 we use the before values since we don't have a primary key
6177 since the mysql server does not handle the hidden primary
6178 key
6179 */
6180
6181 int ret;
6182 (void)ret; // Bug27150740 HANDLE_DATA_EVENT NEED ERROR HANDLING
6183 if (event_data->have_blobs) {
6184 ptrdiff_t ptrdiff = table->record[n] - table->record[0];
6185 ret = get_ndb_blobs_value(table, event_data->ndb_value[n],
6186 blobs_buffer[n], blobs_buffer_size[n],
6187 ptrdiff);
6188 assert(ret == 0);
6189 }
6190 ndb_unpack_record(table, event_data->ndb_value[n], &b,
6191 table->record[n]);
6192 DBUG_EXECUTE("info",
6193 Ndb_table_map::print_record(table, table->record[n]););
6194 ret = trans.delete_row(logged_server_id,
6195 injector::transaction::table(table, true), &b,
6196 table->record[n], extra_row_info_ptr);
6197 assert(ret == 0);
6198 }
6199 break;
6200 case NDBEVENT::TE_UPDATE:
6201 if (likely(count_this_event)) {
6202 row->n_updates++;
6203 trans_row_count++;
6204 }
6205 DBUG_PRINT("info",
6206 ("UPDATE %s.%s", table->s->db.str, table->s->table_name.str));
6207 {
6208 int ret;
6209 (void)ret; // Bug27150740 HANDLE_DATA_EVENT NEED ERROR HANDLING
6210 if (event_data->have_blobs) {
6211 ptrdiff_t ptrdiff = 0;
6212 ret = get_ndb_blobs_value(table, event_data->ndb_value[0],
6213 blobs_buffer[0], blobs_buffer_size[0],
6214 ptrdiff);
6215 assert(ret == 0);
6216 }
6217 ndb_unpack_record(table, event_data->ndb_value[0], &b,
6218 table->record[0]);
6219 DBUG_EXECUTE("info",
6220 Ndb_table_map::print_record(table, table->record[0]););
6221 if (table->s->primary_key != MAX_KEY &&
6222 !share->get_binlog_use_update()) {
6223 /*
6224 since table has a primary key, we can do a write
6225 using only after values
6226 */
6227 ret = trans.write_row(logged_server_id,
6228 injector::transaction::table(table, true), &b,
6229 table->record[0], // after values
6230 extra_row_info_ptr);
6231 assert(ret == 0);
6232 } else {
6233 /*
6234 mysql server cannot handle the ndb hidden key and
6235 therefore needs the before image as well
6236 */
6237 if (event_data->have_blobs) {
6238 ptrdiff_t ptrdiff = table->record[1] - table->record[0];
6239 ret = get_ndb_blobs_value(table, event_data->ndb_value[1],
6240 blobs_buffer[1], blobs_buffer_size[1],
6241 ptrdiff);
6242 assert(ret == 0);
6243 }
6244 ndb_unpack_record(table, event_data->ndb_value[1], &b,
6245 table->record[1]);
6246 DBUG_EXECUTE("info",
6247 Ndb_table_map::print_record(table, table->record[1]););
6248
6249 MY_BITMAP col_bitmap_before_update;
6250 my_bitmap_map bitbuf[(NDB_MAX_ATTRIBUTES_IN_TABLE +
6251 8 * sizeof(my_bitmap_map) - 1) /
6252 (8 * sizeof(my_bitmap_map))];
6253 ndb_bitmap_init(col_bitmap_before_update, bitbuf, table->s->fields);
6254 if (share->get_binlog_update_minimal()) {
6255 event_data->generate_minimal_bitmap(&col_bitmap_before_update, &b);
6256 } else {
6257 bitmap_copy(&col_bitmap_before_update, &b);
6258 }
6259
6260 ret = trans.update_row(logged_server_id,
6261 injector::transaction::table(table, true),
6262 &col_bitmap_before_update, &b,
6263 table->record[1], // before values
6264 table->record[0], // after values
6265 extra_row_info_ptr);
6266 assert(ret == 0);
6267 }
6268 }
6269 break;
6270 default:
6271 /* We should REALLY never get here. */
6272 DBUG_PRINT("info", ("default - uh oh, a brain exploded."));
6273 break;
6274 }
6275
6276 if (event_data->have_blobs) {
6277 my_free(blobs_buffer[0]);
6278 my_free(blobs_buffer[1]);
6279 }
6280
6281 return 0;
6282 }
6283
6284 /****************************************************************
6285 Injector thread main loop
6286 ****************************************************************/
6287
remove_event_operations(Ndb * ndb) const6288 void Ndb_binlog_thread::remove_event_operations(Ndb *ndb) const {
6289 DBUG_TRACE;
6290 NdbEventOperation *op;
6291 while ((op = ndb->getEventOperation())) {
6292 DBUG_ASSERT(
6293 !ndb_name_is_blob_prefix(op->getEvent()->getTable()->getName()));
6294 DBUG_PRINT("info",
6295 ("removing event operation on %s", op->getEvent()->getName()));
6296
6297 Ndb_event_data *event_data = (Ndb_event_data *)op->getCustomData();
6298 DBUG_ASSERT(event_data);
6299
6300 NDB_SHARE *share = event_data->share;
6301 DBUG_ASSERT(share != NULL);
6302 DBUG_ASSERT(share->op == op);
6303 Ndb_event_data::destroy(event_data);
6304 op->setCustomData(NULL);
6305
6306 mysql_mutex_lock(&share->mutex);
6307 share->op = 0;
6308 mysql_mutex_unlock(&share->mutex);
6309
6310 NDB_SHARE::release_reference(share, "binlog");
6311
6312 ndb->dropEventOperation(op);
6313 }
6314 }
6315
remove_all_event_operations(Ndb * s_ndb,Ndb * i_ndb) const6316 void Ndb_binlog_thread::remove_all_event_operations(Ndb *s_ndb,
6317 Ndb *i_ndb) const {
6318 DBUG_TRACE;
6319
6320 if (ndb_apply_status_share) {
6321 NDB_SHARE::release_reference(ndb_apply_status_share,
6322 "ndb_apply_status_share");
6323 ndb_apply_status_share = NULL;
6324 }
6325
6326 if (s_ndb) remove_event_operations(s_ndb);
6327
6328 if (i_ndb) remove_event_operations(i_ndb);
6329
6330 if (ndb_log_get_verbose_level() > 15) {
6331 NDB_SHARE::print_remaining_open_tables();
6332 }
6333 }
6334
6335 static long long g_event_data_count = 0;
6336 static long long g_event_nondata_count = 0;
6337 static long long g_event_bytes_count = 0;
6338
update_injector_stats(Ndb * schemaNdb,Ndb * dataNdb)6339 static void update_injector_stats(Ndb *schemaNdb, Ndb *dataNdb) {
6340 // Update globals to sum of totals from each listening Ndb object
6341 g_event_data_count = schemaNdb->getClientStat(Ndb::DataEventsRecvdCount) +
6342 dataNdb->getClientStat(Ndb::DataEventsRecvdCount);
6343 g_event_nondata_count =
6344 schemaNdb->getClientStat(Ndb::NonDataEventsRecvdCount) +
6345 dataNdb->getClientStat(Ndb::NonDataEventsRecvdCount);
6346 g_event_bytes_count = schemaNdb->getClientStat(Ndb::EventBytesRecvdCount) +
6347 dataNdb->getClientStat(Ndb::EventBytesRecvdCount);
6348 }
6349
6350 static SHOW_VAR ndb_status_vars_injector[] = {
6351 {"api_event_data_count_injector",
6352 reinterpret_cast<char *>(&g_event_data_count), SHOW_LONGLONG,
6353 SHOW_SCOPE_GLOBAL},
6354 {"api_event_nondata_count_injector",
6355 reinterpret_cast<char *>(&g_event_nondata_count), SHOW_LONGLONG,
6356 SHOW_SCOPE_GLOBAL},
6357 {"api_event_bytes_count_injector",
6358 reinterpret_cast<char *>(&g_event_bytes_count), SHOW_LONGLONG,
6359 SHOW_SCOPE_GLOBAL},
6360 {NullS, NullS, SHOW_LONG, SHOW_SCOPE_GLOBAL}};
6361
show_ndb_status_injector(THD *,SHOW_VAR * var,char *)6362 int show_ndb_status_injector(THD *, SHOW_VAR *var, char *) {
6363 var->type = SHOW_ARRAY;
6364 var->value = reinterpret_cast<char *>(&ndb_status_vars_injector);
6365 return 0;
6366 }
6367
6368 /**
6369 injectApplyStatusWriteRow
6370
6371 Inject a WRITE_ROW event on the ndb_apply_status table into
6372 the Binlog.
6373 This contains our server_id and the supplied epoch number.
6374 When applied on the Slave it gives a transactional position
6375 marker
6376 */
injectApplyStatusWriteRow(injector::transaction & trans,ulonglong gci)6377 static bool injectApplyStatusWriteRow(injector::transaction &trans,
6378 ulonglong gci) {
6379 DBUG_TRACE;
6380 if (ndb_apply_status_share == NULL) {
6381 ndb_log_error("Could not get apply status share");
6382 DBUG_ASSERT(ndb_apply_status_share != NULL);
6383 return false;
6384 }
6385
6386 longlong gci_to_store = (longlong)gci;
6387
6388 #ifndef DBUG_OFF
6389 if (DBUG_EVALUATE_IF("ndb_binlog_injector_cycle_gcis", true, false)) {
6390 ulonglong gciHi = ((gci_to_store >> 32) & 0xffffffff);
6391 ulonglong gciLo = (gci_to_store & 0xffffffff);
6392 gciHi = (gciHi % 3);
6393 ndb_log_warning("Binlog injector cycling gcis (%llu -> %llu)", gci_to_store,
6394 (gciHi << 32) + gciLo);
6395 gci_to_store = (gciHi << 32) + gciLo;
6396 }
6397 if (DBUG_EVALUATE_IF("ndb_binlog_injector_repeat_gcis", true, false)) {
6398 ulonglong gciHi = ((gci_to_store >> 32) & 0xffffffff);
6399 ulonglong gciLo = (gci_to_store & 0xffffffff);
6400 gciHi = 0xffffff00;
6401 gciLo = 0;
6402 ndb_log_warning("Binlog injector repeating gcis (%llu -> %llu)",
6403 gci_to_store, (gciHi << 32) + gciLo);
6404 gci_to_store = (gciHi << 32) + gciLo;
6405 }
6406 #endif
6407
6408 /* Build row buffer for generated ndb_apply_status
6409 WRITE_ROW event
6410 First get the relevant table structure.
6411 */
6412 DBUG_ASSERT(ndb_apply_status_share->op);
6413 Ndb_event_data *event_data =
6414 (Ndb_event_data *)ndb_apply_status_share->op->getCustomData();
6415 DBUG_ASSERT(event_data);
6416 DBUG_ASSERT(event_data->shadow_table);
6417 TABLE *apply_status_table = event_data->shadow_table;
6418
6419 /*
6420 Intialize apply_status_table->record[0]
6421
6422 When iterating past the end of the last epoch, the first event of
6423 the new epoch may be on ndb_apply_status. Its event data saved
6424 in record[0] would be overwritten here by a subsequent event on a
6425 normal table. So save and restore its record[0].
6426 */
6427 static const ulong sav_max = 512; // current is 284
6428 const ulong sav_len = apply_status_table->s->reclength;
6429 DBUG_ASSERT(sav_len <= sav_max);
6430 uchar sav_buf[sav_max];
6431 memcpy(sav_buf, apply_status_table->record[0], sav_len);
6432 empty_record(apply_status_table);
6433
6434 apply_status_table->field[0]->store((longlong)::server_id, true);
6435 apply_status_table->field[1]->store((longlong)gci_to_store, true);
6436 apply_status_table->field[2]->store("", 0, &my_charset_bin);
6437 apply_status_table->field[3]->store((longlong)0, true);
6438 apply_status_table->field[4]->store((longlong)0, true);
6439 #ifndef DBUG_OFF
6440 const LEX_CSTRING &name = apply_status_table->s->table_name;
6441 DBUG_PRINT("info", ("use_table: %.*s", (int)name.length, name.str));
6442 #endif
6443 injector::transaction::table tbl(apply_status_table, true);
6444 int ret = trans.use_table(::server_id, tbl);
6445 ndbcluster::ndbrequire(ret == 0);
6446
6447 ret = trans.write_row(
6448 ::server_id, injector::transaction::table(apply_status_table, true),
6449 &apply_status_table->s->all_set, apply_status_table->record[0]);
6450
6451 assert(ret == 0);
6452
6453 memcpy(apply_status_table->record[0], sav_buf, sav_len);
6454 return true;
6455 }
6456
6457 extern ulong opt_ndb_report_thresh_binlog_epoch_slip;
6458 extern ulong opt_ndb_report_thresh_binlog_mem_usage;
6459 extern ulong opt_ndb_eventbuffer_max_alloc;
6460 extern uint opt_ndb_eventbuffer_free_percent;
6461
Ndb_binlog_thread()6462 Ndb_binlog_thread::Ndb_binlog_thread() : Ndb_component("Binlog") {}
6463
~Ndb_binlog_thread()6464 Ndb_binlog_thread::~Ndb_binlog_thread() {}
6465
do_wakeup()6466 void Ndb_binlog_thread::do_wakeup() {
6467 log_info("Wakeup");
6468
6469 /*
6470 The binlog thread is normally waiting for another
6471 event from the cluster with short timeout and should
6472 soon(within 1 second) detect that stop has been requested.
6473
6474 There are really no purpose(yet) to signal some condition
6475 trying to wake the thread up should it be waiting somewhere
6476 else since those waits are also short.
6477 */
6478 }
6479
check_reconnect_incident(THD * thd,injector * inj,Reconnect_type incident_id) const6480 bool Ndb_binlog_thread::check_reconnect_incident(
6481 THD *thd, injector *inj, Reconnect_type incident_id) const {
6482 log_verbose(1, "Check for incidents");
6483
6484 if (incident_id == MYSQLD_STARTUP) {
6485 LOG_INFO log_info;
6486 mysql_bin_log.get_current_log(&log_info);
6487 log_verbose(60, " - current binlog file: %s", log_info.log_file_name);
6488
6489 uint log_number = 0;
6490 if ((sscanf(strend(log_info.log_file_name) - 6, "%u", &log_number) == 1) &&
6491 log_number == 1) {
6492 /*
6493 This is the fist binlog file, skip writing incident since
6494 there is really no log to have a gap in
6495 */
6496 log_verbose(60, " - skipping incident for first log, log_number: %u",
6497 log_number);
6498 return false; // No incident written
6499 }
6500 log_verbose(60, " - current binlog file number: %u", log_number);
6501 }
6502
6503 // Write an incident event to the binlog since it's not possible to know what
6504 // has happened in the cluster while not being connected.
6505 LEX_CSTRING msg;
6506 switch (incident_id) {
6507 case MYSQLD_STARTUP:
6508 msg = {STRING_WITH_LEN("mysqld startup")};
6509 break;
6510 case CLUSTER_DISCONNECT:
6511 msg = {STRING_WITH_LEN("cluster disconnect")};
6512 break;
6513 }
6514 log_verbose(20, "Writing incident for %s", msg.str);
6515 (void)inj->record_incident(
6516 thd, binary_log::Incident_event::INCIDENT_LOST_EVENTS, msg);
6517
6518 return true; // Incident written
6519 }
6520
handle_purge(const char * filename)6521 bool Ndb_binlog_thread::handle_purge(const char *filename) {
6522 if (is_server_started()) {
6523 // The binlog thread currently only handles purge requests
6524 // that occurs before "server started"
6525 return false;
6526 }
6527
6528 // The "server started" state is not yet reached, defer the purge request of
6529 // this binlog file to later and handle it just before entering main loop
6530 log_verbose(1, "Remember purge binlog file: '%s'", filename);
6531 std::lock_guard<std::mutex> lock_pending_purges(m_purge_mutex);
6532 m_pending_purges.push_back(filename);
6533 return true;
6534 }
6535
recall_pending_purges(THD * thd)6536 void Ndb_binlog_thread::recall_pending_purges(THD *thd) {
6537 std::lock_guard<std::mutex> lock_pending_purges(m_purge_mutex);
6538
6539 // Iterate list of pending purges and delete corresponding
6540 // rows from ndb_binlog_index table
6541 for (const std::string &filename : m_pending_purges) {
6542 log_verbose(1, "Purging binlog file: '%s'", filename.c_str());
6543
6544 if (Ndb_binlog_index_table_util::remove_rows_for_file(thd,
6545 filename.c_str())) {
6546 log_warning("Failed to purge binlog file: '%s'", filename.c_str());
6547 }
6548 }
6549 // All pending purges performed, clear the list
6550 m_pending_purges.clear();
6551 }
6552
6553 /*
6554 Events are handled one epoch at a time.
6555 Handle the lowest available epoch first.
6556 */
find_epoch_to_handle(const NdbEventOperation * s_pOp,const NdbEventOperation * i_pOp)6557 static Uint64 find_epoch_to_handle(const NdbEventOperation *s_pOp,
6558 const NdbEventOperation *i_pOp) {
6559 if (i_pOp != NULL) {
6560 if (s_pOp != NULL) {
6561 return std::min(i_pOp->getEpoch(), s_pOp->getEpoch());
6562 }
6563 return i_pOp->getEpoch();
6564 }
6565 if (s_pOp != NULL) {
6566 if (ndb_binlog_running) {
6567 return std::min(ndb_latest_received_binlog_epoch, s_pOp->getEpoch());
6568 }
6569 return s_pOp->getEpoch();
6570 }
6571 // 'latest_received' is '0' if not binlogging
6572 return ndb_latest_received_binlog_epoch;
6573 }
6574
do_run()6575 void Ndb_binlog_thread::do_run() {
6576 THD *thd; /* needs to be first for thread_stack */
6577 Ndb *i_ndb = NULL;
6578 Ndb *s_ndb = NULL;
6579 Thd_ndb *thd_ndb = NULL;
6580 injector *inj = injector::instance();
6581 Global_THD_manager *thd_manager = Global_THD_manager::get_instance();
6582
6583 enum {
6584 BCCC_starting,
6585 BCCC_running,
6586 BCCC_restart,
6587 } binlog_thread_state;
6588
6589 /* Controls that only one incident is written per reconnect */
6590 bool do_reconnect_incident = true;
6591 /* Controls message of the reconnnect incident */
6592 Reconnect_type reconnect_incident_id = MYSQLD_STARTUP;
6593
6594 DBUG_TRACE;
6595
6596 log_info("Starting...");
6597
6598 thd = new THD; /* note that constructor of THD uses DBUG_ */
6599 THD_CHECK_SENTRY(thd);
6600
6601 /* We need to set thd->thread_id before thd->store_globals, or it will
6602 set an invalid value for thd->variables.pseudo_thread_id.
6603 */
6604 thd->set_new_thread_id();
6605
6606 thd->thread_stack = (char *)&thd; /* remember where our stack is */
6607 thd->store_globals();
6608
6609 thd->set_command(COM_DAEMON);
6610 thd->system_thread = SYSTEM_THREAD_NDBCLUSTER_BINLOG;
6611 thd->get_protocol_classic()->set_client_capabilities(0);
6612 thd->security_context()->skip_grants();
6613 // Create thd->net vithout vio
6614 thd->get_protocol_classic()->init_net((Vio *)0);
6615
6616 // Ndb binlog thread always use row format
6617 thd->set_current_stmt_binlog_format_row();
6618
6619 thd->real_id = my_thread_self();
6620 thd_manager->add_thd(thd);
6621 thd->lex->start_transaction_opt = 0;
6622
6623 log_info("Started");
6624
6625 Ndb_binlog_setup binlog_setup(thd);
6626 Ndb_schema_dist_data schema_dist_data;
6627
6628 restart_cluster_failure:
6629 /**
6630 * Maintain a current schema & injector eventOp to be handled.
6631 * s_pOp and s_ndb handle events from the 'ndb_schema' dist table,
6632 * while i_pOp and i_ndb is for binlogging 'everything else'.
6633 */
6634 NdbEventOperation *s_pOp = NULL;
6635 NdbEventOperation *i_pOp = NULL;
6636 binlog_thread_state = BCCC_starting;
6637
6638 log_verbose(1, "Setting up");
6639
6640 if (!(thd_ndb = Thd_ndb::seize(thd))) {
6641 log_error("Creating Thd_ndb object failed");
6642 goto err;
6643 }
6644 thd_ndb->set_option(Thd_ndb::NO_LOG_SCHEMA_OP);
6645
6646 if (!(s_ndb = new (std::nothrow) Ndb(g_ndb_cluster_connection)) ||
6647 s_ndb->setNdbObjectName("schema change monitoring") || s_ndb->init()) {
6648 log_error("Creating schema Ndb object failed");
6649 goto err;
6650 }
6651 log_verbose(49, "Created schema Ndb object, reference: 0x%x, name: '%s'",
6652 s_ndb->getReference(), s_ndb->getNdbObjectName());
6653
6654 // empty database
6655 if (!(i_ndb = new (std::nothrow) Ndb(g_ndb_cluster_connection)) ||
6656 i_ndb->setNdbObjectName("data change monitoring") || i_ndb->init()) {
6657 log_error("Creating injector Ndb object failed");
6658 goto err;
6659 }
6660 log_verbose(49, "Created injector Ndb object, reference: 0x%x, name: '%s'",
6661 i_ndb->getReference(), i_ndb->getNdbObjectName());
6662
6663 /* Set free percent event buffer needed to resume buffering */
6664 if (i_ndb->set_eventbuffer_free_percent(opt_ndb_eventbuffer_free_percent)) {
6665 log_error("Setting eventbuffer free percent failed");
6666 goto err;
6667 }
6668
6669 log_verbose(10, "Exposing global references");
6670 /*
6671 Expose global reference to our Ndb object.
6672
6673 Used by both sql client thread and binlog thread to interact
6674 with the storage
6675 */
6676 mysql_mutex_lock(&injector_event_mutex);
6677 injector_thd = thd;
6678 injector_ndb = i_ndb;
6679 schema_ndb = s_ndb;
6680 DBUG_PRINT("info", ("set schema_ndb to s_ndb"));
6681 mysql_mutex_unlock(&injector_event_mutex);
6682
6683 if (opt_bin_log && opt_ndb_log_bin) {
6684 // Binary log has been enabled for the server and changes
6685 // to NDB tables should be logged
6686 ndb_binlog_running = true;
6687 }
6688 log_verbose(1, "Setup completed");
6689
6690 /*
6691 Wait for the MySQL Server to start (so that the binlog is started
6692 and thus can receive the first GAP event)
6693 */
6694 if (!wait_for_server_started()) {
6695 goto err;
6696 }
6697
6698 // Defer call of THD::init_query_mem_roots until after
6699 // wait_for_server_started() to ensure that the parts of
6700 // MySQL Server it uses has been created
6701 thd->init_query_mem_roots();
6702 lex_start(thd);
6703
6704 if (do_reconnect_incident && ndb_binlog_running) {
6705 if (check_reconnect_incident(thd, inj, reconnect_incident_id)) {
6706 // Incident written, don't report incident again unless Ndb_binlog_thread
6707 // is restarted
6708 do_reconnect_incident = false;
6709 }
6710 }
6711 reconnect_incident_id = CLUSTER_DISCONNECT;
6712
6713 // Handle pending purge requests from before "server started" state
6714 recall_pending_purges(thd);
6715
6716 {
6717 log_verbose(1, "Wait for cluster to start");
6718 thd->proc_info = "Waiting for ndbcluster to start";
6719 thd_set_thd_ndb(thd, thd_ndb);
6720
6721 while (!ndbcluster_is_connected(1) || !binlog_setup.setup(thd_ndb)) {
6722 // Failed to complete binlog_setup, remove all existing event
6723 // operations from potential partial setup
6724 remove_all_event_operations(s_ndb, i_ndb);
6725
6726 // Fail any schema operations that has been registered but
6727 // never reached the coordinator
6728 NDB_SCHEMA_OBJECT::fail_all_schema_ops(Ndb_schema_dist::COORD_ABORT,
6729 "Aborted after setup");
6730
6731 if (!thd_ndb->valid_ndb()) {
6732 /*
6733 Cluster has gone away before setup was completed.
6734 Restart binlog
6735 thread to get rid of any garbage on the ndb objects
6736 */
6737 binlog_thread_state = BCCC_restart;
6738 goto err;
6739 }
6740 if (is_stop_requested()) {
6741 goto err;
6742 }
6743 if (thd->killed == THD::KILL_CONNECTION) {
6744 /*
6745 Since the ndb binlog thread adds itself to the "global thread list"
6746 it need to look at the "killed" flag and stop the thread to avoid
6747 that the server hangs during shutdown while waiting for the "global
6748 thread list" to be emtpy.
6749 */
6750 log_info(
6751 "Server shutdown detected while "
6752 "waiting for ndbcluster to start...");
6753 goto err;
6754 }
6755 log_and_clear_thd_conditions(thd, condition_logging_level::WARNING);
6756 ndb_milli_sleep(1000);
6757 } // while (!ndb_binlog_setup())
6758
6759 DBUG_ASSERT(ndbcluster_hton->slot != ~(uint)0);
6760
6761 /*
6762 Prevent schema dist participant from (implicitly)
6763 taking GSL lock as part of taking MDL lock
6764 */
6765 thd_ndb->set_option(Thd_ndb::IS_SCHEMA_DIST_PARTICIPANT);
6766 }
6767
6768 /* Apply privilege statements stored in snapshot */
6769 if (!Ndb_stored_grants::apply_stored_grants(thd)) {
6770 ndb_log_error("stored grants: failed to apply stored grants.");
6771 }
6772
6773 schema_dist_data.init(g_ndb_cluster_connection);
6774
6775 {
6776 log_verbose(1, "Wait for first event");
6777 // wait for the first event
6778 thd->proc_info = "Waiting for first event from ndbcluster";
6779 Uint64 schema_gci;
6780 do {
6781 DBUG_PRINT("info", ("Waiting for the first event"));
6782
6783 if (is_stop_requested()) goto err;
6784
6785 my_thread_yield();
6786 mysql_mutex_lock(&injector_event_mutex);
6787 (void)s_ndb->pollEvents(100, &schema_gci);
6788 mysql_mutex_unlock(&injector_event_mutex);
6789 } while (schema_gci == 0 || ndb_latest_received_binlog_epoch == schema_gci);
6790
6791 if (ndb_binlog_running) {
6792 Uint64 gci = i_ndb->getLatestGCI();
6793 while (gci < schema_gci || gci == ndb_latest_received_binlog_epoch) {
6794 if (is_stop_requested()) goto err;
6795
6796 my_thread_yield();
6797 mysql_mutex_lock(&injector_event_mutex);
6798 (void)i_ndb->pollEvents(10, &gci);
6799 mysql_mutex_unlock(&injector_event_mutex);
6800 }
6801 if (gci > schema_gci) {
6802 schema_gci = gci;
6803 }
6804 }
6805 // now check that we have epochs consistent with what we had before the
6806 // restart
6807 DBUG_PRINT("info", ("schema_gci: %u/%u", (uint)(schema_gci >> 32),
6808 (uint)(schema_gci)));
6809 {
6810 i_ndb->flushIncompleteEvents(schema_gci);
6811 s_ndb->flushIncompleteEvents(schema_gci);
6812 if (schema_gci < ndb_latest_handled_binlog_epoch) {
6813 log_error(
6814 "cluster has been restarted --initial or with older filesystem. "
6815 "ndb_latest_handled_binlog_epoch: %u/%u, while current epoch: "
6816 "%u/%u. "
6817 "RESET MASTER should be issued. Resetting "
6818 "ndb_latest_handled_binlog_epoch.",
6819 (uint)(ndb_latest_handled_binlog_epoch >> 32),
6820 (uint)(ndb_latest_handled_binlog_epoch), (uint)(schema_gci >> 32),
6821 (uint)(schema_gci));
6822 ndb_set_latest_trans_gci(0);
6823 ndb_latest_handled_binlog_epoch = 0;
6824 ndb_latest_applied_binlog_epoch = 0;
6825 ndb_latest_received_binlog_epoch = 0;
6826 ndb_index_stat_restart();
6827 } else if (ndb_latest_applied_binlog_epoch > 0) {
6828 log_warning(
6829 "cluster has reconnected. "
6830 "Changes to the database that occurred while "
6831 "disconnected will not be in the binlog");
6832 }
6833 log_verbose(1, "starting log at epoch %u/%u", (uint)(schema_gci >> 32),
6834 (uint)(schema_gci));
6835 }
6836 log_verbose(1, "Got first event");
6837 }
6838 /*
6839 binlog thread is ready to receive events
6840 - client threads may now start updating data, i.e. tables are
6841 no longer read only
6842 */
6843 mysql_mutex_lock(&injector_data_mutex);
6844 ndb_binlog_is_ready = true;
6845 mysql_mutex_unlock(&injector_data_mutex);
6846
6847 log_verbose(1, "ndb tables writable");
6848 ndb_tdc_close_cached_tables();
6849
6850 /*
6851 Signal any waiting thread that ndb table setup is
6852 now complete
6853 */
6854 ndb_notify_tables_writable();
6855
6856 {
6857 static LEX_CSTRING db_lex_cstr = EMPTY_CSTR;
6858 thd->reset_db(db_lex_cstr);
6859 }
6860
6861 log_verbose(1, "Startup and setup completed");
6862
6863 /*
6864 Main NDB Injector loop
6865 */
6866 do_reconnect_incident = true; // Report incident if disconnected
6867 binlog_thread_state = BCCC_running;
6868
6869 /**
6870 * Injector loop runs until itself bring it out of 'BCCC_running' state,
6871 * or we get a stop-request from outside. In the later case we ensure that
6872 * all ongoing transaction epochs are completed first.
6873 */
6874 while (binlog_thread_state == BCCC_running &&
6875 (!is_stop_requested() ||
6876 ndb_latest_handled_binlog_epoch < ndb_get_latest_trans_gci())) {
6877 #ifndef DBUG_OFF
6878 /**
6879 * As the Binlog thread is not a client thread, the 'set debug' commands
6880 * does not affect it. Update our thread-local debug settings from 'global'
6881 */
6882 {
6883 char buf[256];
6884 DBUG_EXPLAIN_INITIAL(buf, sizeof(buf));
6885 DBUG_SET(buf);
6886 }
6887 #endif
6888
6889 /*
6890 now we don't want any events before next gci is complete
6891 */
6892 thd->proc_info = "Waiting for event from ndbcluster";
6893 thd->set_time();
6894
6895 /**
6896 * The binlog-thread holds the injector_mutex when waiting for
6897 * pollEvents() - which is >99% of the elapsed time. As the
6898 * native mutex guarantees no 'fairness', there is no guarantee
6899 * that another thread waiting for the mutex will immeditately
6900 * get the lock when unlocked by this thread. Thus this thread
6901 * may lock it again rather soon and starve the waiting thread.
6902 * To avoid this, my_thread_yield() is used to give any waiting
6903 * threads a chance to run and grab the injector_mutex when
6904 * it is available. The same pattern is used multiple places
6905 * in the BI-thread where there are wait-loops holding this mutex.
6906 */
6907 my_thread_yield();
6908
6909 /* Can't hold mutex too long, so wait for events in 10ms steps */
6910 int tot_poll_wait = 10;
6911
6912 // If there are remaining unhandled injector eventOp we continue
6913 // handling of these, else poll for more.
6914 if (i_pOp == NULL) {
6915 // Capture any dynamic changes to max_alloc
6916 i_ndb->set_eventbuf_max_alloc(opt_ndb_eventbuffer_max_alloc);
6917
6918 mysql_mutex_lock(&injector_event_mutex);
6919 Uint64 latest_epoch = 0;
6920 const int poll_wait = (ndb_binlog_running) ? tot_poll_wait : 0;
6921 const int res = i_ndb->pollEvents(poll_wait, &latest_epoch);
6922 (void)res; // Unused except DBUG_PRINT
6923 mysql_mutex_unlock(&injector_event_mutex);
6924 i_pOp = i_ndb->nextEvent();
6925 if (ndb_binlog_running) {
6926 ndb_latest_received_binlog_epoch = latest_epoch;
6927 tot_poll_wait = 0;
6928 }
6929 DBUG_PRINT("info", ("pollEvents res: %d", res));
6930 }
6931
6932 // Epoch to handle from i_ndb. Use latest 'empty epoch' if no events.
6933 const Uint64 i_epoch =
6934 (i_pOp != NULL) ? i_pOp->getEpoch() : ndb_latest_received_binlog_epoch;
6935
6936 // If there are remaining unhandled schema eventOp we continue
6937 // handling of these, else poll for more.
6938 if (s_pOp == NULL) {
6939 if (DBUG_EVALUATE_IF("ndb_binlog_injector_yield_before_schema_pollEvent",
6940 true, false)) {
6941 /**
6942 * Simulate that the binlog thread yields the CPU inbetween
6943 * these two pollEvents, which can result in reading a
6944 * 'schema_gci > gci'. (Likely due to mutex locking)
6945 */
6946 ndb_milli_sleep(50);
6947 }
6948
6949 Uint64 schema_epoch = 0;
6950 mysql_mutex_lock(&injector_event_mutex);
6951 int schema_res = s_ndb->pollEvents(tot_poll_wait, &schema_epoch);
6952 mysql_mutex_unlock(&injector_event_mutex);
6953 s_pOp = s_ndb->nextEvent();
6954
6955 /*
6956 Make sure we have seen any schema epochs upto the injector epoch,
6957 or we have an earlier schema event to handle.
6958 */
6959 while (s_pOp == NULL && i_epoch > schema_epoch && schema_res >= 0) {
6960 static char buf[64];
6961 thd->proc_info = "Waiting for schema epoch";
6962 snprintf(buf, sizeof(buf), "%s %u/%u(%u/%u)", thd->proc_info,
6963 (uint)(schema_epoch >> 32), (uint)(schema_epoch),
6964 (uint)(ndb_latest_received_binlog_epoch >> 32),
6965 (uint)(ndb_latest_received_binlog_epoch));
6966 thd->proc_info = buf;
6967
6968 my_thread_yield();
6969 mysql_mutex_lock(&injector_event_mutex);
6970 schema_res = s_ndb->pollEvents(10, &schema_epoch);
6971 mysql_mutex_unlock(&injector_event_mutex);
6972 s_pOp = s_ndb->nextEvent();
6973 }
6974 }
6975
6976 /*
6977 We have now a (possibly empty) set of available events which the
6978 binlog injects should apply. These could span either a single,
6979 or possibly multiple epochs. In order to get the ordering between
6980 schema events and 'ordinary' events injected in a correct order
6981 relative to each other, we apply them one epoch at a time, with
6982 the schema events always applied first.
6983 */
6984
6985 // Calculate the epoch to handle events from in this iteration.
6986 const Uint64 current_epoch = find_epoch_to_handle(s_pOp, i_pOp);
6987 DBUG_ASSERT(current_epoch != 0 || !ndb_binlog_running);
6988
6989 // Did someone else request injector thread to stop?
6990 DBUG_ASSERT(binlog_thread_state == BCCC_running);
6991 if (is_stop_requested() &&
6992 (ndb_latest_handled_binlog_epoch >= ndb_get_latest_trans_gci() ||
6993 !ndb_binlog_running))
6994 break; /* Stopping thread */
6995
6996 if (thd->killed == THD::KILL_CONNECTION) {
6997 /*
6998 Since the ndb binlog thread adds itself to the "global thread list"
6999 it need to look at the "killed" flag and stop the thread to avoid
7000 that the server hangs during shutdown while waiting for the "global
7001 thread list" to be emtpy.
7002 In pre 5.6 versions the thread was also added to "global thread
7003 list" but the "global thread *count*" variable was not incremented
7004 and thus the same problem didn't exist.
7005 The only reason for adding the ndb binlog thread to "global thread
7006 list" is to be able to see the thread state using SHOW PROCESSLIST
7007 and I_S.PROCESSLIST
7008 */
7009 log_info("Server shutdown detected...");
7010 break;
7011 }
7012
7013 MEM_ROOT **root_ptr = THR_MALLOC;
7014 MEM_ROOT *old_root = *root_ptr;
7015 MEM_ROOT mem_root;
7016 init_sql_alloc(PSI_INSTRUMENT_ME, &mem_root, 4096, 0);
7017
7018 // The Ndb_schema_event_handler does not necessarily need
7019 // to use the same memroot(or vice versa)
7020 Ndb_schema_event_handler schema_event_handler(
7021 thd, &mem_root, g_ndb_cluster_connection->node_id(), schema_dist_data);
7022
7023 *root_ptr = &mem_root;
7024
7025 if (unlikely(s_pOp != NULL && s_pOp->getEpoch() == current_epoch)) {
7026 thd->proc_info = "Processing events from schema table";
7027 g_ndb_log_slave_updates = opt_log_slave_updates;
7028 s_ndb->setReportThreshEventGCISlip(
7029 opt_ndb_report_thresh_binlog_epoch_slip);
7030 s_ndb->setReportThreshEventFreeMem(
7031 opt_ndb_report_thresh_binlog_mem_usage);
7032
7033 // Handle all schema event, limit within 'current_epoch'
7034 while (s_pOp != NULL && s_pOp->getEpoch() == current_epoch) {
7035 if (!s_pOp->hasError()) {
7036 schema_event_handler.handle_event(s_ndb, s_pOp);
7037
7038 if (DBUG_EVALUATE_IF("ndb_binlog_slow_failure_handling", true,
7039 false)) {
7040 if (!ndb_binlog_is_ready) {
7041 log_info("Just lost schema connection, hanging around");
7042 ndb_milli_sleep(10 * 1000); // seconds * 1000
7043 /* There could be a race where client side reconnect before we
7044 * are able to detect 's_ndb->getEventOperation() == NULL'.
7045 * Thus, we never restart the binlog thread as supposed to.
7046 * -> 'ndb_binlog_is_ready' remains false and we get stuck in
7047 * RO-mode
7048 */
7049 log_info("...and on our way");
7050 }
7051 }
7052
7053 DBUG_PRINT("info",
7054 ("s_ndb first: %s", s_ndb->getEventOperation()
7055 ? s_ndb->getEventOperation()
7056 ->getEvent()
7057 ->getTable()
7058 ->getName()
7059 : "<empty>"));
7060 DBUG_PRINT("info",
7061 ("i_ndb first: %s", i_ndb->getEventOperation()
7062 ? i_ndb->getEventOperation()
7063 ->getEvent()
7064 ->getTable()
7065 ->getName()
7066 : "<empty>"));
7067 } else {
7068 log_error("error %d (%s) on handling binlog schema event",
7069 s_pOp->getNdbError().code, s_pOp->getNdbError().message);
7070 }
7071 s_pOp = s_ndb->nextEvent();
7072 }
7073 update_injector_stats(s_ndb, i_ndb);
7074 }
7075
7076 Uint64 inconsistent_epoch = 0;
7077 if (!ndb_binlog_running) {
7078 /*
7079 Just consume any events, not used if no binlogging
7080 e.g. node failure events
7081 */
7082 while (i_pOp != NULL && i_pOp->getEpoch() == current_epoch) {
7083 if ((unsigned)i_pOp->getEventType() >=
7084 (unsigned)NDBEVENT::TE_FIRST_NON_DATA_EVENT) {
7085 ndb_binlog_index_row row;
7086 handle_non_data_event(thd, i_pOp, row);
7087 }
7088 i_pOp = i_ndb->nextEvent();
7089 }
7090 update_injector_stats(s_ndb, i_ndb);
7091 }
7092
7093 // i_pOp == NULL means an inconsistent epoch or the queue is empty
7094 else if (i_pOp == NULL && !i_ndb->isConsistent(inconsistent_epoch)) {
7095 char errmsg[72];
7096 snprintf(errmsg, sizeof(errmsg),
7097 "Detected missing data in GCI %llu, "
7098 "inserting GAP event",
7099 inconsistent_epoch);
7100 DBUG_PRINT("info", ("Detected missing data in GCI %llu, "
7101 "inserting GAP event",
7102 inconsistent_epoch));
7103 LEX_CSTRING const msg = {errmsg, strlen(errmsg)};
7104 inj->record_incident(
7105 thd, binary_log::Incident_event::INCIDENT_LOST_EVENTS, msg);
7106 }
7107
7108 /* Handle all events withing 'current_epoch', or possible
7109 * log an empty epoch if log_empty_epoch is specified.
7110 */
7111 else if ((i_pOp != NULL && i_pOp->getEpoch() == current_epoch) ||
7112 (ndb_log_empty_epochs() &&
7113 current_epoch > ndb_latest_handled_binlog_epoch)) {
7114 thd->proc_info = "Processing events";
7115 ndb_binlog_index_row _row;
7116 ndb_binlog_index_row *rows = &_row;
7117 injector::transaction trans;
7118 unsigned trans_row_count = 0;
7119 unsigned trans_slave_row_count = 0;
7120
7121 if (i_pOp == NULL || i_pOp->getEpoch() != current_epoch) {
7122 /*
7123 Must be an empty epoch since the condition
7124 (ndb_log_empty_epochs() &&
7125 current_epoch > ndb_latest_handled_binlog_epoch)
7126 must be true we write empty epoch into
7127 ndb_binlog_index
7128 */
7129 DBUG_ASSERT(ndb_log_empty_epochs());
7130 DBUG_ASSERT(current_epoch > ndb_latest_handled_binlog_epoch);
7131 DBUG_PRINT("info", ("Writing empty epoch for gci %llu", current_epoch));
7132 DBUG_PRINT("info", ("Initializing transaction"));
7133 inj->new_trans(thd, &trans);
7134 rows = &_row;
7135 memset(&_row, 0, sizeof(_row));
7136 thd->variables.character_set_client = &my_charset_latin1;
7137 goto commit_to_binlog;
7138 } else {
7139 assert(i_pOp != NULL && i_pOp->getEpoch() == current_epoch);
7140 rows = &_row;
7141
7142 DBUG_PRINT("info",
7143 ("Handling epoch: %u/%u", (uint)(current_epoch >> 32),
7144 (uint)(current_epoch)));
7145 // sometimes get TE_ALTER with invalid table
7146 DBUG_ASSERT(
7147 i_pOp->getEventType() == NdbDictionary::Event::TE_ALTER ||
7148 !ndb_name_is_blob_prefix(i_pOp->getEvent()->getTable()->getName()));
7149 DBUG_ASSERT(current_epoch <= ndb_latest_received_binlog_epoch);
7150
7151 /* Update our thread-local debug settings based on the global */
7152 #ifndef DBUG_OFF
7153 /* Get value of global...*/
7154 {
7155 char buf[256];
7156 DBUG_EXPLAIN_INITIAL(buf, sizeof(buf));
7157 // fprintf(stderr, "Ndb Binlog Injector, setting debug to %s\n",
7158 // buf);
7159 DBUG_SET(buf);
7160 }
7161 #endif
7162
7163 /* initialize some variables for this epoch */
7164
7165 i_ndb->set_eventbuf_max_alloc(opt_ndb_eventbuffer_max_alloc);
7166 g_ndb_log_slave_updates = opt_log_slave_updates;
7167 i_ndb->setReportThreshEventGCISlip(
7168 opt_ndb_report_thresh_binlog_epoch_slip);
7169 i_ndb->setReportThreshEventFreeMem(
7170 opt_ndb_report_thresh_binlog_mem_usage);
7171
7172 memset(&_row, 0, sizeof(_row));
7173 thd->variables.character_set_client = &my_charset_latin1;
7174 DBUG_PRINT("info", ("Initializing transaction"));
7175 inj->new_trans(thd, &trans);
7176 trans_row_count = 0;
7177 trans_slave_row_count = 0;
7178 // pass table map before epoch
7179 {
7180 Uint32 iter = 0;
7181 const NdbEventOperation *gci_op;
7182 Uint32 event_types;
7183 Uint32 cumulative_any_value;
7184
7185 while ((gci_op = i_ndb->getNextEventOpInEpoch3(
7186 &iter, &event_types, &cumulative_any_value)) != NULL) {
7187 Ndb_event_data *event_data =
7188 (Ndb_event_data *)gci_op->getCustomData();
7189 NDB_SHARE *share = (event_data) ? event_data->share : NULL;
7190 DBUG_PRINT("info",
7191 ("per gci_op: 0x%lx share: 0x%lx event_types: 0x%x",
7192 (long)gci_op, (long)share, event_types));
7193 // workaround for interface returning TE_STOP events
7194 // which are normally filtered out below in the nextEvent loop
7195 if ((event_types & ~NdbDictionary::Event::TE_STOP) == 0) {
7196 DBUG_PRINT("info", ("Skipped TE_STOP on table %s",
7197 gci_op->getEvent()->getTable()->getName()));
7198 continue;
7199 }
7200 // this should not happen
7201 if (share == NULL || event_data->shadow_table == NULL) {
7202 DBUG_PRINT("info", ("no share or table %s!",
7203 gci_op->getEvent()->getTable()->getName()));
7204 continue;
7205 }
7206 if (share == ndb_apply_status_share) {
7207 // skip this table, it is handled specially
7208 continue;
7209 }
7210 TABLE *table = event_data->shadow_table;
7211 #ifndef DBUG_OFF
7212 const LEX_CSTRING &name = table->s->table_name;
7213 #endif
7214 if ((event_types & (NdbDictionary::Event::TE_INSERT |
7215 NdbDictionary::Event::TE_UPDATE |
7216 NdbDictionary::Event::TE_DELETE)) == 0) {
7217 DBUG_PRINT("info", ("skipping non data event table: %.*s",
7218 (int)name.length, name.str));
7219 continue;
7220 }
7221 if (!trans.good()) {
7222 DBUG_PRINT("info",
7223 ("Found new data event, initializing transaction"));
7224 inj->new_trans(thd, &trans);
7225 }
7226 {
7227 bool use_table = true;
7228 if (ndbcluster_anyvalue_is_reserved(cumulative_any_value)) {
7229 /*
7230 All events for this table in this epoch are marked as
7231 nologging, therefore we do not include the table in the epoch
7232 transaction.
7233 */
7234 if (ndbcluster_anyvalue_is_nologging(cumulative_any_value)) {
7235 DBUG_PRINT("info", ("Skip binlogging table table: %.*s",
7236 (int)name.length, name.str));
7237 use_table = false;
7238 }
7239 }
7240 if (use_table) {
7241 DBUG_PRINT("info",
7242 ("use_table: %.*s, cols %u", (int)name.length,
7243 name.str, table->s->fields));
7244 injector::transaction::table tbl(table, true);
7245 int ret = trans.use_table(::server_id, tbl);
7246 ndbcluster::ndbrequire(ret == 0);
7247 }
7248 }
7249 }
7250 }
7251 if (trans.good()) {
7252 /* Inject ndb_apply_status WRITE_ROW event */
7253 if (!injectApplyStatusWriteRow(trans, current_epoch)) {
7254 log_error("Failed to inject apply status write row");
7255 }
7256 }
7257
7258 do {
7259 if (i_pOp->hasError() && handle_error(i_pOp) < 0) goto err;
7260
7261 #ifndef DBUG_OFF
7262 {
7263 Ndb_event_data *event_data =
7264 (Ndb_event_data *)i_pOp->getCustomData();
7265 NDB_SHARE *share = (event_data) ? event_data->share : NULL;
7266 DBUG_PRINT("info",
7267 ("EVENT TYPE: %d Epoch: %u/%u last applied: %u/%u "
7268 "share: 0x%lx (%s.%s)",
7269 i_pOp->getEventType(), (uint)(current_epoch >> 32),
7270 (uint)(current_epoch),
7271 (uint)(ndb_latest_applied_binlog_epoch >> 32),
7272 (uint)(ndb_latest_applied_binlog_epoch), (long)share,
7273 share ? share->db : "'NULL'",
7274 share ? share->table_name : "'NULL'"));
7275 DBUG_ASSERT(share != 0);
7276 }
7277 // assert that there is consistancy between gci op list
7278 // and event list
7279 {
7280 Uint32 iter = 0;
7281 const NdbEventOperation *gci_op;
7282 Uint32 event_types;
7283 while ((gci_op = i_ndb->getGCIEventOperations(
7284 &iter, &event_types)) != NULL) {
7285 if (gci_op == i_pOp) break;
7286 }
7287 DBUG_ASSERT(gci_op == i_pOp);
7288 DBUG_ASSERT((event_types & i_pOp->getEventType()) != 0);
7289 }
7290 #endif
7291
7292 if ((unsigned)i_pOp->getEventType() <
7293 (unsigned)NDBEVENT::TE_FIRST_NON_DATA_EVENT)
7294 handle_data_event(i_pOp, &rows, trans, trans_row_count,
7295 trans_slave_row_count);
7296 else {
7297 handle_non_data_event(thd, i_pOp, *rows);
7298 DBUG_PRINT("info",
7299 ("s_ndb first: %s", s_ndb->getEventOperation()
7300 ? s_ndb->getEventOperation()
7301 ->getEvent()
7302 ->getTable()
7303 ->getName()
7304 : "<empty>"));
7305 DBUG_PRINT("info",
7306 ("i_ndb first: %s", i_ndb->getEventOperation()
7307 ? i_ndb->getEventOperation()
7308 ->getEvent()
7309 ->getTable()
7310 ->getName()
7311 : "<empty>"));
7312 }
7313
7314 // Capture any dynamic changes to max_alloc
7315 i_ndb->set_eventbuf_max_alloc(opt_ndb_eventbuffer_max_alloc);
7316
7317 i_pOp = i_ndb->nextEvent();
7318 } while (i_pOp && i_pOp->getEpoch() == current_epoch);
7319
7320 update_injector_stats(s_ndb, i_ndb);
7321
7322 /*
7323 NOTE: i_pOp is now referring to an event in the next epoch
7324 or is == NULL
7325 */
7326
7327 while (trans.good()) {
7328 commit_to_binlog:
7329 if (!ndb_log_empty_epochs()) {
7330 /*
7331 If
7332 - We did not add any 'real' rows to the Binlog AND
7333 - We did not apply any slave row updates, only
7334 ndb_apply_status updates
7335 THEN
7336 Don't write the Binlog transaction which just
7337 contains ndb_apply_status updates.
7338 (For cicular rep with log_apply_status, ndb_apply_status
7339 updates will propagate while some related, real update
7340 is propagating)
7341 */
7342 if ((trans_row_count == 0) &&
7343 (!(opt_ndb_log_apply_status && trans_slave_row_count))) {
7344 /* nothing to commit, rollback instead */
7345 if (int r = trans.rollback()) {
7346 log_error("Error during ROLLBACK of GCI %u/%u. Error: %d",
7347 uint(current_epoch >> 32), uint(current_epoch), r);
7348 /* TODO: Further handling? */
7349 }
7350 break;
7351 }
7352 }
7353 thd->proc_info = "Committing events to binlog";
7354 if (int r = trans.commit()) {
7355 log_error("Error during COMMIT of GCI. Error: %d", r);
7356 /* TODO: Further handling? */
7357 }
7358 injector::transaction::binlog_pos start = trans.start_pos();
7359 injector::transaction::binlog_pos next = trans.next_pos();
7360 rows->gci = (Uint32)(current_epoch >> 32); // Expose gci hi/lo
7361 rows->epoch = current_epoch;
7362 rows->start_master_log_file = start.file_name();
7363 rows->start_master_log_pos = start.file_pos();
7364 if ((next.file_pos() == 0) && ndb_log_empty_epochs()) {
7365 /* Empty transaction 'committed' due to log_empty_epochs
7366 * therefore no next position
7367 */
7368 rows->next_master_log_file = start.file_name();
7369 rows->next_master_log_pos = start.file_pos();
7370 } else {
7371 rows->next_master_log_file = next.file_name();
7372 rows->next_master_log_pos = next.file_pos();
7373 }
7374
7375 DBUG_PRINT("info", ("COMMIT epoch: %lu", (ulong)current_epoch));
7376 if (opt_ndb_log_binlog_index) {
7377 if (Ndb_binlog_index_table_util::write_rows(thd, rows)) {
7378 /*
7379 Writing to ndb_binlog_index failed, check if it's because THD
7380 have been killed and retry in such case
7381 */
7382 if (thd->killed) {
7383 DBUG_PRINT("error", ("Failed to write to ndb_binlog_index at "
7384 "shutdown, retrying"));
7385 Ndb_binlog_index_table_util::write_rows_retry_after_kill(thd,
7386 rows);
7387 }
7388 }
7389 }
7390 ndb_latest_applied_binlog_epoch = current_epoch;
7391 break;
7392 } // while (trans.good())
7393
7394 /*
7395 NOTE: There are possible more i_pOp available.
7396 However, these are from another epoch and should be handled
7397 in next iteration of the binlog injector loop.
7398 */
7399 }
7400 } // end: 'handled a 'current_epoch' of i_pOp's
7401
7402 // Notify the schema event handler about post_epoch so it may finish
7403 // any outstanding business
7404 schema_event_handler.post_epoch(current_epoch);
7405
7406 free_root(&mem_root, MYF(0));
7407 *root_ptr = old_root;
7408
7409 if (current_epoch > ndb_latest_handled_binlog_epoch) {
7410 Mutex_guard injector_mutex_g(injector_data_mutex);
7411 ndb_latest_handled_binlog_epoch = current_epoch;
7412 // Signal ndbcluster_binlog_wait'ers
7413 mysql_cond_broadcast(&injector_data_cond);
7414 }
7415
7416 DBUG_ASSERT(binlog_thread_state == BCCC_running);
7417
7418 // When a cluster failure occurs, each event operation will receive a
7419 // TE_CLUSTER_FAILURE event causing it to be torn down and removed.
7420 // When all event operations has been removed from their respective Ndb
7421 // object, the thread should restart and try to connect to NDB again.
7422 if (i_ndb->getEventOperation() == NULL &&
7423 s_ndb->getEventOperation() == NULL) {
7424 log_error("All event operations gone, restarting thread");
7425 binlog_thread_state = BCCC_restart;
7426 break;
7427 }
7428
7429 if (!ndb_binlog_tables_inited /* relaxed read without lock */) {
7430 // One(or more) of the ndbcluster util tables have been dropped, restart
7431 // the thread in order to create or setup the util table(s) again
7432 log_error("The util tables has been lost, restarting thread");
7433 binlog_thread_state = BCCC_restart;
7434 break;
7435 }
7436
7437 // Synchronize 1 object from the queue of objects detected for automatic
7438 // synchronization
7439 synchronize_detected_object(thd);
7440 }
7441
7442 // Check if loop has been terminated without properly handling all events
7443 if (ndb_binlog_running &&
7444 ndb_latest_handled_binlog_epoch < ndb_get_latest_trans_gci()) {
7445 log_error(
7446 "latest transaction in epoch %u/%u not in binlog "
7447 "as latest handled epoch is %u/%u",
7448 (uint)(ndb_get_latest_trans_gci() >> 32),
7449 (uint)(ndb_get_latest_trans_gci()),
7450 (uint)(ndb_latest_handled_binlog_epoch >> 32),
7451 (uint)(ndb_latest_handled_binlog_epoch));
7452 }
7453
7454 err:
7455 if (binlog_thread_state != BCCC_restart) {
7456 log_info("Shutting down");
7457 thd->proc_info = "Shutting down";
7458 } else {
7459 log_info("Restarting");
7460 thd->proc_info = "Restarting";
7461 }
7462
7463 mysql_mutex_lock(&injector_event_mutex);
7464 /* don't mess with the injector_ndb anymore from other threads */
7465 injector_thd = NULL;
7466 injector_ndb = NULL;
7467 schema_ndb = NULL;
7468 mysql_mutex_unlock(&injector_event_mutex);
7469
7470 mysql_mutex_lock(&injector_data_mutex);
7471 ndb_binlog_tables_inited = false;
7472 mysql_mutex_unlock(&injector_data_mutex);
7473
7474 Ndb_stored_grants::shutdown(thd_ndb);
7475
7476 thd->reset_db(NULL_CSTR); // as not to try to free memory
7477 remove_all_event_operations(s_ndb, i_ndb);
7478
7479 schema_dist_data.release();
7480
7481 // Fail any schema operations that has been registered but
7482 // never reached the coordinator
7483 NDB_SCHEMA_OBJECT::fail_all_schema_ops(Ndb_schema_dist::COORD_ABORT,
7484 "Aborted during shutdown");
7485
7486 delete s_ndb;
7487 s_ndb = NULL;
7488
7489 delete i_ndb;
7490 i_ndb = NULL;
7491
7492 if (thd_ndb) {
7493 Thd_ndb::release(thd_ndb);
7494 thd_set_thd_ndb(thd, NULL);
7495 thd_ndb = NULL;
7496 }
7497
7498 /**
7499 * release all extra references from tables
7500 */
7501 log_verbose(9, "Release extra share references");
7502 NDB_SHARE::release_extra_share_references();
7503
7504 log_info("Stopping...");
7505
7506 ndb_tdc_close_cached_tables();
7507 if (ndb_log_get_verbose_level() > 15) {
7508 NDB_SHARE::print_remaining_open_tables();
7509 }
7510
7511 if (binlog_thread_state == BCCC_restart) {
7512 goto restart_cluster_failure;
7513 }
7514
7515 // Release the thd->net created without vio
7516 thd->get_protocol_classic()->end_net();
7517 thd->release_resources();
7518 thd_manager->remove_thd(thd);
7519 delete thd;
7520
7521 ndb_binlog_running = false;
7522 mysql_cond_broadcast(&injector_data_cond);
7523
7524 log_info("Stopped");
7525
7526 DBUG_PRINT("exit", ("ndb_binlog_thread"));
7527 }
7528
7529 /*
7530 Return string containing current status of ndb binlog as
7531 comma separated name value pairs.
7532
7533 Used by ndbcluster_show_status() to fill the "binlog" row
7534 in result of SHOW ENGINE NDB STATUS
7535
7536 @param buf The buffer where to print status string
7537 @param bufzies Size of the buffer
7538
7539 @return Length of the string printed to "buf" or 0 if no string
7540 is printed
7541 */
7542
ndbcluster_show_status_binlog(char * buf,size_t buf_size)7543 size_t ndbcluster_show_status_binlog(char *buf, size_t buf_size) {
7544 DBUG_TRACE;
7545
7546 mysql_mutex_lock(&injector_event_mutex);
7547 if (injector_ndb) {
7548 const ulonglong latest_epoch = injector_ndb->getLatestGCI();
7549 mysql_mutex_unlock(&injector_event_mutex);
7550
7551 // Get highest trans gci seen by the cluster connections
7552 const ulonglong latest_trans_epoch = ndb_get_latest_trans_gci();
7553
7554 const size_t buf_len = snprintf(
7555 buf, buf_size,
7556 "latest_epoch=%llu, "
7557 "latest_trans_epoch=%llu, "
7558 "latest_received_binlog_epoch=%llu, "
7559 "latest_handled_binlog_epoch=%llu, "
7560 "latest_applied_binlog_epoch=%llu",
7561 latest_epoch, latest_trans_epoch, ndb_latest_received_binlog_epoch,
7562 ndb_latest_handled_binlog_epoch, ndb_latest_applied_binlog_epoch);
7563 return buf_len;
7564 } else
7565 mysql_mutex_unlock(&injector_event_mutex);
7566 return 0;
7567 }
7568